import { EventBus } from '@stock-bot/event-bus'; import { Logger } from '@stock-bot/utils'; import { DataLineage, DataAsset, LineageTransformation, ImpactAnalysis, LineageQuery, LineageDirection } from '../types/DataCatalog'; export interface DataLineageService { addLineage(lineage: DataLineage): Promise; getLineage(assetId: string): Promise; updateLineage(assetId: string, lineage: Partial): Promise; addUpstreamDependency(assetId: string, upstreamAssetId: string, transformation?: LineageTransformation): Promise; addDownstreamDependency(assetId: string, downstreamAssetId: string, transformation?: LineageTransformation): Promise; removeUpstreamDependency(assetId: string, upstreamAssetId: string): Promise; removeDownstreamDependency(assetId: string, downstreamAssetId: string): Promise; getUpstreamAssets(assetId: string, depth?: number): Promise; getDownstreamAssets(assetId: string, depth?: number): Promise; analyzeImpact(assetId: string): Promise; queryLineage(query: LineageQuery): Promise; getLineageGraph(assetId: string, direction: LineageDirection, depth?: number): Promise; detectCircularDependencies(): Promise; } export class DataLineageServiceImpl implements DataLineageService { private lineages: Map = new Map(); private assets: Map = new Map(); constructor( private eventBus: EventBus, private logger: Logger ) {} async addLineage(lineage: DataLineage): Promise { try { this.lineages.set(lineage.assetId, lineage); this.logger.info('Data lineage added', { assetId: lineage.assetId, upstreamCount: lineage.upstreamAssets.length, downstreamCount: lineage.downstreamAssets.length }); await this.eventBus.emit('data.lineage.added', { assetId: lineage.assetId, lineage, timestamp: new Date() }); } catch (error) { this.logger.error('Failed to add data lineage', { lineage, error }); throw error; } } async getLineage(assetId: string): Promise { try { return this.lineages.get(assetId) || null; } catch (error) { this.logger.error('Failed to get data lineage', { assetId, error }); throw error; } } async updateLineage(assetId: string, lineage: Partial): Promise { try { const existingLineage = this.lineages.get(assetId); if (!existingLineage) { return null; } const updatedLineage: DataLineage = { ...existingLineage, ...lineage, updatedAt: new Date() }; this.lineages.set(assetId, updatedLineage); this.logger.info('Data lineage updated', { assetId, changes: lineage }); await this.eventBus.emit('data.lineage.updated', { assetId, lineage: updatedLineage, changes: lineage, timestamp: new Date() }); return updatedLineage; } catch (error) { this.logger.error('Failed to update data lineage', { assetId, lineage, error }); throw error; } } async addUpstreamDependency( assetId: string, upstreamAssetId: string, transformation?: LineageTransformation ): Promise { try { let lineage = this.lineages.get(assetId); if (!lineage) { lineage = this.createEmptyLineage(assetId); } // Check if dependency already exists if (!lineage.upstreamAssets.includes(upstreamAssetId)) { lineage.upstreamAssets.push(upstreamAssetId); if (transformation) { lineage.transformations.push(transformation); } lineage.updatedAt = new Date(); this.lineages.set(assetId, lineage); // Update downstream lineage of the upstream asset await this.addDownstreamToUpstream(upstreamAssetId, assetId); this.logger.info('Upstream dependency added', { assetId, upstreamAssetId }); await this.eventBus.emit('data.lineage.dependency.added', { assetId, upstreamAssetId, transformation, timestamp: new Date() }); } } catch (error) { this.logger.error('Failed to add upstream dependency', { assetId, upstreamAssetId, error }); throw error; } } async addDownstreamDependency( assetId: string, downstreamAssetId: string, transformation?: LineageTransformation ): Promise { try { let lineage = this.lineages.get(assetId); if (!lineage) { lineage = this.createEmptyLineage(assetId); } // Check if dependency already exists if (!lineage.downstreamAssets.includes(downstreamAssetId)) { lineage.downstreamAssets.push(downstreamAssetId); lineage.updatedAt = new Date(); this.lineages.set(assetId, lineage); // Update upstream lineage of the downstream asset await this.addUpstreamToDownstream(downstreamAssetId, assetId, transformation); this.logger.info('Downstream dependency added', { assetId, downstreamAssetId }); await this.eventBus.emit('data.lineage.dependency.added', { assetId, downstreamAssetId, transformation, timestamp: new Date() }); } } catch (error) { this.logger.error('Failed to add downstream dependency', { assetId, downstreamAssetId, error }); throw error; } } async removeUpstreamDependency(assetId: string, upstreamAssetId: string): Promise { try { const lineage = this.lineages.get(assetId); if (lineage) { lineage.upstreamAssets = lineage.upstreamAssets.filter(id => id !== upstreamAssetId); lineage.updatedAt = new Date(); this.lineages.set(assetId, lineage); // Remove from downstream lineage of upstream asset await this.removeDownstreamFromUpstream(upstreamAssetId, assetId); this.logger.info('Upstream dependency removed', { assetId, upstreamAssetId }); await this.eventBus.emit('data.lineage.dependency.removed', { assetId, upstreamAssetId, timestamp: new Date() }); } } catch (error) { this.logger.error('Failed to remove upstream dependency', { assetId, upstreamAssetId, error }); throw error; } } async removeDownstreamDependency(assetId: string, downstreamAssetId: string): Promise { try { const lineage = this.lineages.get(assetId); if (lineage) { lineage.downstreamAssets = lineage.downstreamAssets.filter(id => id !== downstreamAssetId); lineage.updatedAt = new Date(); this.lineages.set(assetId, lineage); // Remove from upstream lineage of downstream asset await this.removeUpstreamFromDownstream(downstreamAssetId, assetId); this.logger.info('Downstream dependency removed', { assetId, downstreamAssetId }); await this.eventBus.emit('data.lineage.dependency.removed', { assetId, downstreamAssetId, timestamp: new Date() }); } } catch (error) { this.logger.error('Failed to remove downstream dependency', { assetId, downstreamAssetId, error }); throw error; } } async getUpstreamAssets(assetId: string, depth: number = 1): Promise { try { const visited = new Set(); const result: DataAsset[] = []; await this.traverseUpstream(assetId, depth, visited, result); return result; } catch (error) { this.logger.error('Failed to get upstream assets', { assetId, depth, error }); throw error; } } async getDownstreamAssets(assetId: string, depth: number = 1): Promise { try { const visited = new Set(); const result: DataAsset[] = []; await this.traverseDownstream(assetId, depth, visited, result); return result; } catch (error) { this.logger.error('Failed to get downstream assets', { assetId, depth, error }); throw error; } } async analyzeImpact(assetId: string): Promise { try { const downstreamAssets = await this.getDownstreamAssets(assetId, 5); // Go deep for impact analysis const affectedUsers = new Set(); // Collect all users who might be affected for (const asset of downstreamAssets) { affectedUsers.add(asset.owner); if (asset.steward) { affectedUsers.add(asset.steward); } // Add users from usage analytics asset.usage.topUsers.forEach(user => affectedUsers.add(user.userId)); } // Calculate impact level let estimatedImpact: 'low' | 'medium' | 'high' | 'critical' = 'low'; if (downstreamAssets.length > 20) { estimatedImpact = 'critical'; } else if (downstreamAssets.length > 10) { estimatedImpact = 'high'; } else if (downstreamAssets.length > 5) { estimatedImpact = 'medium'; } const impact: ImpactAnalysis = { downstreamAssets: downstreamAssets.map(asset => asset.id), affectedUsers: Array.from(affectedUsers), estimatedImpact, impactDescription: this.generateImpactDescription(downstreamAssets.length, Array.from(affectedUsers).length), recommendations: this.generateRecommendations(estimatedImpact, downstreamAssets.length) }; this.logger.info('Impact analysis completed', { assetId, impactLevel: estimatedImpact, affectedAssets: downstreamAssets.length, affectedUsers: affectedUsers.size }); return impact; } catch (error) { this.logger.error('Failed to analyze impact', { assetId, error }); throw error; } } async queryLineage(query: LineageQuery): Promise { try { let results: DataAsset[] = []; if (query.assetIds) { for (const assetId of query.assetIds) { if (query.direction === 'upstream' || query.direction === 'both') { const upstream = await this.getUpstreamAssets(assetId, query.depth); results.push(...upstream); } if (query.direction === 'downstream' || query.direction === 'both') { const downstream = await this.getDownstreamAssets(assetId, query.depth); results.push(...downstream); } } } // Remove duplicates const uniqueResults = results.filter((asset, index, arr) => arr.findIndex(a => a.id === asset.id) === index ); return uniqueResults; } catch (error) { this.logger.error('Failed to query lineage', { query, error }); throw error; } } async getLineageGraph(assetId: string, direction: LineageDirection, depth: number = 3): Promise { try { const graph = { nodes: new Map(), edges: [] }; const visited = new Set(); await this.buildLineageGraph(assetId, direction, depth, visited, graph); return { nodes: Array.from(graph.nodes.values()), edges: graph.edges }; } catch (error) { this.logger.error('Failed to get lineage graph', { assetId, direction, depth, error }); throw error; } } async detectCircularDependencies(): Promise { try { const cycles: string[][] = []; const visited = new Set(); const recursionStack = new Set(); for (const assetId of this.lineages.keys()) { if (!visited.has(assetId)) { const path: string[] = []; await this.detectCycleDFS(assetId, visited, recursionStack, path, cycles); } } if (cycles.length > 0) { this.logger.warn('Circular dependencies detected', { cycleCount: cycles.length }); } return cycles; } catch (error) { this.logger.error('Failed to detect circular dependencies', { error }); throw error; } } // Private helper methods private createEmptyLineage(assetId: string): DataLineage { return { id: this.generateId(), assetId, upstreamAssets: [], downstreamAssets: [], transformations: [], impact: { downstreamAssets: [], affectedUsers: [], estimatedImpact: 'low', impactDescription: '', recommendations: [] }, createdAt: new Date(), updatedAt: new Date() }; } private async addDownstreamToUpstream(upstreamAssetId: string, downstreamAssetId: string): Promise { let upstreamLineage = this.lineages.get(upstreamAssetId); if (!upstreamLineage) { upstreamLineage = this.createEmptyLineage(upstreamAssetId); } if (!upstreamLineage.downstreamAssets.includes(downstreamAssetId)) { upstreamLineage.downstreamAssets.push(downstreamAssetId); upstreamLineage.updatedAt = new Date(); this.lineages.set(upstreamAssetId, upstreamLineage); } } private async addUpstreamToDownstream( downstreamAssetId: string, upstreamAssetId: string, transformation?: LineageTransformation ): Promise { let downstreamLineage = this.lineages.get(downstreamAssetId); if (!downstreamLineage) { downstreamLineage = this.createEmptyLineage(downstreamAssetId); } if (!downstreamLineage.upstreamAssets.includes(upstreamAssetId)) { downstreamLineage.upstreamAssets.push(upstreamAssetId); if (transformation) { downstreamLineage.transformations.push(transformation); } downstreamLineage.updatedAt = new Date(); this.lineages.set(downstreamAssetId, downstreamLineage); } } private async removeDownstreamFromUpstream(upstreamAssetId: string, downstreamAssetId: string): Promise { const upstreamLineage = this.lineages.get(upstreamAssetId); if (upstreamLineage) { upstreamLineage.downstreamAssets = upstreamLineage.downstreamAssets.filter(id => id !== downstreamAssetId); upstreamLineage.updatedAt = new Date(); this.lineages.set(upstreamAssetId, upstreamLineage); } } private async removeUpstreamFromDownstream(downstreamAssetId: string, upstreamAssetId: string): Promise { const downstreamLineage = this.lineages.get(downstreamAssetId); if (downstreamLineage) { downstreamLineage.upstreamAssets = downstreamLineage.upstreamAssets.filter(id => id !== upstreamAssetId); downstreamLineage.updatedAt = new Date(); this.lineages.set(downstreamAssetId, downstreamLineage); } } private async traverseUpstream( assetId: string, remainingDepth: number, visited: Set, result: DataAsset[] ): Promise { if (remainingDepth === 0 || visited.has(assetId)) { return; } visited.add(assetId); const lineage = this.lineages.get(assetId); if (lineage) { for (const upstreamId of lineage.upstreamAssets) { const asset = this.assets.get(upstreamId); if (asset && !result.find(a => a.id === asset.id)) { result.push(asset); } await this.traverseUpstream(upstreamId, remainingDepth - 1, visited, result); } } } private async traverseDownstream( assetId: string, remainingDepth: number, visited: Set, result: DataAsset[] ): Promise { if (remainingDepth === 0 || visited.has(assetId)) { return; } visited.add(assetId); const lineage = this.lineages.get(assetId); if (lineage) { for (const downstreamId of lineage.downstreamAssets) { const asset = this.assets.get(downstreamId); if (asset && !result.find(a => a.id === asset.id)) { result.push(asset); } await this.traverseDownstream(downstreamId, remainingDepth - 1, visited, result); } } } private async buildLineageGraph( assetId: string, direction: LineageDirection, remainingDepth: number, visited: Set, graph: any ): Promise { if (remainingDepth === 0 || visited.has(assetId)) { return; } visited.add(assetId); const asset = this.assets.get(assetId); const lineage = this.lineages.get(assetId); if (asset) { graph.nodes.set(assetId, { id: assetId, name: asset.name, type: asset.type, classification: asset.classification }); } if (lineage) { if (direction === 'upstream' || direction === 'both') { for (const upstreamId of lineage.upstreamAssets) { graph.edges.push({ source: upstreamId, target: assetId, type: 'upstream' }); await this.buildLineageGraph(upstreamId, direction, remainingDepth - 1, visited, graph); } } if (direction === 'downstream' || direction === 'both') { for (const downstreamId of lineage.downstreamAssets) { graph.edges.push({ source: assetId, target: downstreamId, type: 'downstream' }); await this.buildLineageGraph(downstreamId, direction, remainingDepth - 1, visited, graph); } } } } private async detectCycleDFS( assetId: string, visited: Set, recursionStack: Set, path: string[], cycles: string[][] ): Promise { visited.add(assetId); recursionStack.add(assetId); path.push(assetId); const lineage = this.lineages.get(assetId); if (lineage) { for (const downstreamId of lineage.downstreamAssets) { if (!visited.has(downstreamId)) { await this.detectCycleDFS(downstreamId, visited, recursionStack, path, cycles); } else if (recursionStack.has(downstreamId)) { // Found a cycle const cycleStart = path.indexOf(downstreamId); cycles.push(path.slice(cycleStart)); } } } path.pop(); recursionStack.delete(assetId); } private generateImpactDescription(assetCount: number, userCount: number): string { if (assetCount === 0) { return 'No downstream dependencies identified.'; } return `Changes to this asset may affect ${assetCount} downstream asset(s) and ${userCount} user(s).`; } private generateRecommendations(impact: string, assetCount: number): string[] { const recommendations: string[] = []; if (impact === 'critical') { recommendations.push('Schedule maintenance window'); recommendations.push('Notify all stakeholders in advance'); recommendations.push('Prepare rollback plan'); recommendations.push('Consider phased rollout'); } else if (impact === 'high') { recommendations.push('Notify affected users'); recommendations.push('Test changes thoroughly'); recommendations.push('Monitor downstream systems'); } else if (impact === 'medium') { recommendations.push('Test with subset of data'); recommendations.push('Monitor for issues'); } else { recommendations.push('Standard testing procedures apply'); } return recommendations; } private generateId(): string { return `lineage_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`; } // Method to inject assets (typically from DataCatalogService) setAssets(assets: Map): void { this.assets = assets; } }