stock-bot/apps/data-services/data-catalog/src/services/DataLineageService.ts

607 lines
19 KiB
TypeScript

import { EventBus } from '@stock-bot/event-bus';
import { Logger } from '@stock-bot/utils';
import {
DataLineage,
DataAsset,
LineageTransformation,
ImpactAnalysis,
LineageQuery,
LineageDirection
} from '../types/DataCatalog';
export interface DataLineageService {
addLineage(lineage: DataLineage): Promise<void>;
getLineage(assetId: string): Promise<DataLineage | null>;
updateLineage(assetId: string, lineage: Partial<DataLineage>): Promise<DataLineage | null>;
addUpstreamDependency(assetId: string, upstreamAssetId: string, transformation?: LineageTransformation): Promise<void>;
addDownstreamDependency(assetId: string, downstreamAssetId: string, transformation?: LineageTransformation): Promise<void>;
removeUpstreamDependency(assetId: string, upstreamAssetId: string): Promise<void>;
removeDownstreamDependency(assetId: string, downstreamAssetId: string): Promise<void>;
getUpstreamAssets(assetId: string, depth?: number): Promise<DataAsset[]>;
getDownstreamAssets(assetId: string, depth?: number): Promise<DataAsset[]>;
analyzeImpact(assetId: string): Promise<ImpactAnalysis>;
queryLineage(query: LineageQuery): Promise<DataAsset[]>;
getLineageGraph(assetId: string, direction: LineageDirection, depth?: number): Promise<any>;
detectCircularDependencies(): Promise<string[][]>;
}
export class DataLineageServiceImpl implements DataLineageService {
private lineages: Map<string, DataLineage> = new Map();
private assets: Map<string, DataAsset> = new Map();
constructor(
private eventBus: EventBus,
private logger: Logger
) {}
async addLineage(lineage: DataLineage): Promise<void> {
try {
this.lineages.set(lineage.assetId, lineage);
this.logger.info('Data lineage added', {
assetId: lineage.assetId,
upstreamCount: lineage.upstreamAssets.length,
downstreamCount: lineage.downstreamAssets.length
});
await this.eventBus.emit('data.lineage.added', {
assetId: lineage.assetId,
lineage,
timestamp: new Date()
});
} catch (error) {
this.logger.error('Failed to add data lineage', { lineage, error });
throw error;
}
}
async getLineage(assetId: string): Promise<DataLineage | null> {
try {
return this.lineages.get(assetId) || null;
} catch (error) {
this.logger.error('Failed to get data lineage', { assetId, error });
throw error;
}
}
async updateLineage(assetId: string, lineage: Partial<DataLineage>): Promise<DataLineage | null> {
try {
const existingLineage = this.lineages.get(assetId);
if (!existingLineage) {
return null;
}
const updatedLineage: DataLineage = {
...existingLineage,
...lineage,
updatedAt: new Date()
};
this.lineages.set(assetId, updatedLineage);
this.logger.info('Data lineage updated', { assetId, changes: lineage });
await this.eventBus.emit('data.lineage.updated', {
assetId,
lineage: updatedLineage,
changes: lineage,
timestamp: new Date()
});
return updatedLineage;
} catch (error) {
this.logger.error('Failed to update data lineage', { assetId, lineage, error });
throw error;
}
}
async addUpstreamDependency(
assetId: string,
upstreamAssetId: string,
transformation?: LineageTransformation
): Promise<void> {
try {
let lineage = this.lineages.get(assetId);
if (!lineage) {
lineage = this.createEmptyLineage(assetId);
}
// Check if dependency already exists
if (!lineage.upstreamAssets.includes(upstreamAssetId)) {
lineage.upstreamAssets.push(upstreamAssetId);
if (transformation) {
lineage.transformations.push(transformation);
}
lineage.updatedAt = new Date();
this.lineages.set(assetId, lineage);
// Update downstream lineage of the upstream asset
await this.addDownstreamToUpstream(upstreamAssetId, assetId);
this.logger.info('Upstream dependency added', { assetId, upstreamAssetId });
await this.eventBus.emit('data.lineage.dependency.added', {
assetId,
upstreamAssetId,
transformation,
timestamp: new Date()
});
}
} catch (error) {
this.logger.error('Failed to add upstream dependency', { assetId, upstreamAssetId, error });
throw error;
}
}
async addDownstreamDependency(
assetId: string,
downstreamAssetId: string,
transformation?: LineageTransformation
): Promise<void> {
try {
let lineage = this.lineages.get(assetId);
if (!lineage) {
lineage = this.createEmptyLineage(assetId);
}
// Check if dependency already exists
if (!lineage.downstreamAssets.includes(downstreamAssetId)) {
lineage.downstreamAssets.push(downstreamAssetId);
lineage.updatedAt = new Date();
this.lineages.set(assetId, lineage);
// Update upstream lineage of the downstream asset
await this.addUpstreamToDownstream(downstreamAssetId, assetId, transformation);
this.logger.info('Downstream dependency added', { assetId, downstreamAssetId });
await this.eventBus.emit('data.lineage.dependency.added', {
assetId,
downstreamAssetId,
transformation,
timestamp: new Date()
});
}
} catch (error) {
this.logger.error('Failed to add downstream dependency', { assetId, downstreamAssetId, error });
throw error;
}
}
async removeUpstreamDependency(assetId: string, upstreamAssetId: string): Promise<void> {
try {
const lineage = this.lineages.get(assetId);
if (lineage) {
lineage.upstreamAssets = lineage.upstreamAssets.filter(id => id !== upstreamAssetId);
lineage.updatedAt = new Date();
this.lineages.set(assetId, lineage);
// Remove from downstream lineage of upstream asset
await this.removeDownstreamFromUpstream(upstreamAssetId, assetId);
this.logger.info('Upstream dependency removed', { assetId, upstreamAssetId });
await this.eventBus.emit('data.lineage.dependency.removed', {
assetId,
upstreamAssetId,
timestamp: new Date()
});
}
} catch (error) {
this.logger.error('Failed to remove upstream dependency', { assetId, upstreamAssetId, error });
throw error;
}
}
async removeDownstreamDependency(assetId: string, downstreamAssetId: string): Promise<void> {
try {
const lineage = this.lineages.get(assetId);
if (lineage) {
lineage.downstreamAssets = lineage.downstreamAssets.filter(id => id !== downstreamAssetId);
lineage.updatedAt = new Date();
this.lineages.set(assetId, lineage);
// Remove from upstream lineage of downstream asset
await this.removeUpstreamFromDownstream(downstreamAssetId, assetId);
this.logger.info('Downstream dependency removed', { assetId, downstreamAssetId });
await this.eventBus.emit('data.lineage.dependency.removed', {
assetId,
downstreamAssetId,
timestamp: new Date()
});
}
} catch (error) {
this.logger.error('Failed to remove downstream dependency', { assetId, downstreamAssetId, error });
throw error;
}
}
async getUpstreamAssets(assetId: string, depth: number = 1): Promise<DataAsset[]> {
try {
const visited = new Set<string>();
const result: DataAsset[] = [];
await this.traverseUpstream(assetId, depth, visited, result);
return result;
} catch (error) {
this.logger.error('Failed to get upstream assets', { assetId, depth, error });
throw error;
}
}
async getDownstreamAssets(assetId: string, depth: number = 1): Promise<DataAsset[]> {
try {
const visited = new Set<string>();
const result: DataAsset[] = [];
await this.traverseDownstream(assetId, depth, visited, result);
return result;
} catch (error) {
this.logger.error('Failed to get downstream assets', { assetId, depth, error });
throw error;
}
}
async analyzeImpact(assetId: string): Promise<ImpactAnalysis> {
try {
const downstreamAssets = await this.getDownstreamAssets(assetId, 5); // Go deep for impact analysis
const affectedUsers = new Set<string>();
// Collect all users who might be affected
for (const asset of downstreamAssets) {
affectedUsers.add(asset.owner);
if (asset.steward) {
affectedUsers.add(asset.steward);
}
// Add users from usage analytics
asset.usage.topUsers.forEach(user => affectedUsers.add(user.userId));
}
// Calculate impact level
let estimatedImpact: 'low' | 'medium' | 'high' | 'critical' = 'low';
if (downstreamAssets.length > 20) {
estimatedImpact = 'critical';
} else if (downstreamAssets.length > 10) {
estimatedImpact = 'high';
} else if (downstreamAssets.length > 5) {
estimatedImpact = 'medium';
}
const impact: ImpactAnalysis = {
downstreamAssets: downstreamAssets.map(asset => asset.id),
affectedUsers: Array.from(affectedUsers),
estimatedImpact,
impactDescription: this.generateImpactDescription(downstreamAssets.length, Array.from(affectedUsers).length),
recommendations: this.generateRecommendations(estimatedImpact, downstreamAssets.length)
};
this.logger.info('Impact analysis completed', {
assetId,
impactLevel: estimatedImpact,
affectedAssets: downstreamAssets.length,
affectedUsers: affectedUsers.size
});
return impact;
} catch (error) {
this.logger.error('Failed to analyze impact', { assetId, error });
throw error;
}
}
async queryLineage(query: LineageQuery): Promise<DataAsset[]> {
try {
let results: DataAsset[] = [];
if (query.assetIds) {
for (const assetId of query.assetIds) {
if (query.direction === 'upstream' || query.direction === 'both') {
const upstream = await this.getUpstreamAssets(assetId, query.depth);
results.push(...upstream);
}
if (query.direction === 'downstream' || query.direction === 'both') {
const downstream = await this.getDownstreamAssets(assetId, query.depth);
results.push(...downstream);
}
}
}
// Remove duplicates
const uniqueResults = results.filter((asset, index, arr) =>
arr.findIndex(a => a.id === asset.id) === index
);
return uniqueResults;
} catch (error) {
this.logger.error('Failed to query lineage', { query, error });
throw error;
}
}
async getLineageGraph(assetId: string, direction: LineageDirection, depth: number = 3): Promise<any> {
try {
const graph = {
nodes: new Map(),
edges: []
};
const visited = new Set<string>();
await this.buildLineageGraph(assetId, direction, depth, visited, graph);
return {
nodes: Array.from(graph.nodes.values()),
edges: graph.edges
};
} catch (error) {
this.logger.error('Failed to get lineage graph', { assetId, direction, depth, error });
throw error;
}
}
async detectCircularDependencies(): Promise<string[][]> {
try {
const cycles: string[][] = [];
const visited = new Set<string>();
const recursionStack = new Set<string>();
for (const assetId of this.lineages.keys()) {
if (!visited.has(assetId)) {
const path: string[] = [];
await this.detectCycleDFS(assetId, visited, recursionStack, path, cycles);
}
}
if (cycles.length > 0) {
this.logger.warn('Circular dependencies detected', { cycleCount: cycles.length });
}
return cycles;
} catch (error) {
this.logger.error('Failed to detect circular dependencies', { error });
throw error;
}
}
// Private helper methods
private createEmptyLineage(assetId: string): DataLineage {
return {
id: this.generateId(),
assetId,
upstreamAssets: [],
downstreamAssets: [],
transformations: [],
impact: {
downstreamAssets: [],
affectedUsers: [],
estimatedImpact: 'low',
impactDescription: '',
recommendations: []
},
createdAt: new Date(),
updatedAt: new Date()
};
}
private async addDownstreamToUpstream(upstreamAssetId: string, downstreamAssetId: string): Promise<void> {
let upstreamLineage = this.lineages.get(upstreamAssetId);
if (!upstreamLineage) {
upstreamLineage = this.createEmptyLineage(upstreamAssetId);
}
if (!upstreamLineage.downstreamAssets.includes(downstreamAssetId)) {
upstreamLineage.downstreamAssets.push(downstreamAssetId);
upstreamLineage.updatedAt = new Date();
this.lineages.set(upstreamAssetId, upstreamLineage);
}
}
private async addUpstreamToDownstream(
downstreamAssetId: string,
upstreamAssetId: string,
transformation?: LineageTransformation
): Promise<void> {
let downstreamLineage = this.lineages.get(downstreamAssetId);
if (!downstreamLineage) {
downstreamLineage = this.createEmptyLineage(downstreamAssetId);
}
if (!downstreamLineage.upstreamAssets.includes(upstreamAssetId)) {
downstreamLineage.upstreamAssets.push(upstreamAssetId);
if (transformation) {
downstreamLineage.transformations.push(transformation);
}
downstreamLineage.updatedAt = new Date();
this.lineages.set(downstreamAssetId, downstreamLineage);
}
}
private async removeDownstreamFromUpstream(upstreamAssetId: string, downstreamAssetId: string): Promise<void> {
const upstreamLineage = this.lineages.get(upstreamAssetId);
if (upstreamLineage) {
upstreamLineage.downstreamAssets = upstreamLineage.downstreamAssets.filter(id => id !== downstreamAssetId);
upstreamLineage.updatedAt = new Date();
this.lineages.set(upstreamAssetId, upstreamLineage);
}
}
private async removeUpstreamFromDownstream(downstreamAssetId: string, upstreamAssetId: string): Promise<void> {
const downstreamLineage = this.lineages.get(downstreamAssetId);
if (downstreamLineage) {
downstreamLineage.upstreamAssets = downstreamLineage.upstreamAssets.filter(id => id !== upstreamAssetId);
downstreamLineage.updatedAt = new Date();
this.lineages.set(downstreamAssetId, downstreamLineage);
}
}
private async traverseUpstream(
assetId: string,
remainingDepth: number,
visited: Set<string>,
result: DataAsset[]
): Promise<void> {
if (remainingDepth === 0 || visited.has(assetId)) {
return;
}
visited.add(assetId);
const lineage = this.lineages.get(assetId);
if (lineage) {
for (const upstreamId of lineage.upstreamAssets) {
const asset = this.assets.get(upstreamId);
if (asset && !result.find(a => a.id === asset.id)) {
result.push(asset);
}
await this.traverseUpstream(upstreamId, remainingDepth - 1, visited, result);
}
}
}
private async traverseDownstream(
assetId: string,
remainingDepth: number,
visited: Set<string>,
result: DataAsset[]
): Promise<void> {
if (remainingDepth === 0 || visited.has(assetId)) {
return;
}
visited.add(assetId);
const lineage = this.lineages.get(assetId);
if (lineage) {
for (const downstreamId of lineage.downstreamAssets) {
const asset = this.assets.get(downstreamId);
if (asset && !result.find(a => a.id === asset.id)) {
result.push(asset);
}
await this.traverseDownstream(downstreamId, remainingDepth - 1, visited, result);
}
}
}
private async buildLineageGraph(
assetId: string,
direction: LineageDirection,
remainingDepth: number,
visited: Set<string>,
graph: any
): Promise<void> {
if (remainingDepth === 0 || visited.has(assetId)) {
return;
}
visited.add(assetId);
const asset = this.assets.get(assetId);
const lineage = this.lineages.get(assetId);
if (asset) {
graph.nodes.set(assetId, {
id: assetId,
name: asset.name,
type: asset.type,
classification: asset.classification
});
}
if (lineage) {
if (direction === 'upstream' || direction === 'both') {
for (const upstreamId of lineage.upstreamAssets) {
graph.edges.push({
source: upstreamId,
target: assetId,
type: 'upstream'
});
await this.buildLineageGraph(upstreamId, direction, remainingDepth - 1, visited, graph);
}
}
if (direction === 'downstream' || direction === 'both') {
for (const downstreamId of lineage.downstreamAssets) {
graph.edges.push({
source: assetId,
target: downstreamId,
type: 'downstream'
});
await this.buildLineageGraph(downstreamId, direction, remainingDepth - 1, visited, graph);
}
}
}
}
private async detectCycleDFS(
assetId: string,
visited: Set<string>,
recursionStack: Set<string>,
path: string[],
cycles: string[][]
): Promise<void> {
visited.add(assetId);
recursionStack.add(assetId);
path.push(assetId);
const lineage = this.lineages.get(assetId);
if (lineage) {
for (const downstreamId of lineage.downstreamAssets) {
if (!visited.has(downstreamId)) {
await this.detectCycleDFS(downstreamId, visited, recursionStack, path, cycles);
} else if (recursionStack.has(downstreamId)) {
// Found a cycle
const cycleStart = path.indexOf(downstreamId);
cycles.push(path.slice(cycleStart));
}
}
}
path.pop();
recursionStack.delete(assetId);
}
private generateImpactDescription(assetCount: number, userCount: number): string {
if (assetCount === 0) {
return 'No downstream dependencies identified.';
}
return `Changes to this asset may affect ${assetCount} downstream asset(s) and ${userCount} user(s).`;
}
private generateRecommendations(impact: string, assetCount: number): string[] {
const recommendations: string[] = [];
if (impact === 'critical') {
recommendations.push('Schedule maintenance window');
recommendations.push('Notify all stakeholders in advance');
recommendations.push('Prepare rollback plan');
recommendations.push('Consider phased rollout');
} else if (impact === 'high') {
recommendations.push('Notify affected users');
recommendations.push('Test changes thoroughly');
recommendations.push('Monitor downstream systems');
} else if (impact === 'medium') {
recommendations.push('Test with subset of data');
recommendations.push('Monitor for issues');
} else {
recommendations.push('Standard testing procedures apply');
}
return recommendations;
}
private generateId(): string {
return `lineage_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
}
// Method to inject assets (typically from DataCatalogService)
setAssets(assets: Map<string, DataAsset>): void {
this.assets = assets;
}
}