607 lines
19 KiB
TypeScript
607 lines
19 KiB
TypeScript
import { EventBus } from '@stock-bot/event-bus';
|
|
import { Logger } from '@stock-bot/utils';
|
|
import {
|
|
DataLineage,
|
|
DataAsset,
|
|
LineageTransformation,
|
|
ImpactAnalysis,
|
|
LineageQuery,
|
|
LineageDirection
|
|
} from '../types/DataCatalog';
|
|
|
|
export interface DataLineageService {
|
|
addLineage(lineage: DataLineage): Promise<void>;
|
|
getLineage(assetId: string): Promise<DataLineage | null>;
|
|
updateLineage(assetId: string, lineage: Partial<DataLineage>): Promise<DataLineage | null>;
|
|
addUpstreamDependency(assetId: string, upstreamAssetId: string, transformation?: LineageTransformation): Promise<void>;
|
|
addDownstreamDependency(assetId: string, downstreamAssetId: string, transformation?: LineageTransformation): Promise<void>;
|
|
removeUpstreamDependency(assetId: string, upstreamAssetId: string): Promise<void>;
|
|
removeDownstreamDependency(assetId: string, downstreamAssetId: string): Promise<void>;
|
|
getUpstreamAssets(assetId: string, depth?: number): Promise<DataAsset[]>;
|
|
getDownstreamAssets(assetId: string, depth?: number): Promise<DataAsset[]>;
|
|
analyzeImpact(assetId: string): Promise<ImpactAnalysis>;
|
|
queryLineage(query: LineageQuery): Promise<DataAsset[]>;
|
|
getLineageGraph(assetId: string, direction: LineageDirection, depth?: number): Promise<any>;
|
|
detectCircularDependencies(): Promise<string[][]>;
|
|
}
|
|
|
|
export class DataLineageServiceImpl implements DataLineageService {
|
|
private lineages: Map<string, DataLineage> = new Map();
|
|
private assets: Map<string, DataAsset> = new Map();
|
|
|
|
constructor(
|
|
private eventBus: EventBus,
|
|
private logger: Logger
|
|
) {}
|
|
|
|
async addLineage(lineage: DataLineage): Promise<void> {
|
|
try {
|
|
this.lineages.set(lineage.assetId, lineage);
|
|
|
|
this.logger.info('Data lineage added', {
|
|
assetId: lineage.assetId,
|
|
upstreamCount: lineage.upstreamAssets.length,
|
|
downstreamCount: lineage.downstreamAssets.length
|
|
});
|
|
|
|
await this.eventBus.emit('data.lineage.added', {
|
|
assetId: lineage.assetId,
|
|
lineage,
|
|
timestamp: new Date()
|
|
});
|
|
} catch (error) {
|
|
this.logger.error('Failed to add data lineage', { lineage, error });
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
async getLineage(assetId: string): Promise<DataLineage | null> {
|
|
try {
|
|
return this.lineages.get(assetId) || null;
|
|
} catch (error) {
|
|
this.logger.error('Failed to get data lineage', { assetId, error });
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
async updateLineage(assetId: string, lineage: Partial<DataLineage>): Promise<DataLineage | null> {
|
|
try {
|
|
const existingLineage = this.lineages.get(assetId);
|
|
if (!existingLineage) {
|
|
return null;
|
|
}
|
|
|
|
const updatedLineage: DataLineage = {
|
|
...existingLineage,
|
|
...lineage,
|
|
updatedAt: new Date()
|
|
};
|
|
|
|
this.lineages.set(assetId, updatedLineage);
|
|
|
|
this.logger.info('Data lineage updated', { assetId, changes: lineage });
|
|
|
|
await this.eventBus.emit('data.lineage.updated', {
|
|
assetId,
|
|
lineage: updatedLineage,
|
|
changes: lineage,
|
|
timestamp: new Date()
|
|
});
|
|
|
|
return updatedLineage;
|
|
} catch (error) {
|
|
this.logger.error('Failed to update data lineage', { assetId, lineage, error });
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
async addUpstreamDependency(
|
|
assetId: string,
|
|
upstreamAssetId: string,
|
|
transformation?: LineageTransformation
|
|
): Promise<void> {
|
|
try {
|
|
let lineage = this.lineages.get(assetId);
|
|
if (!lineage) {
|
|
lineage = this.createEmptyLineage(assetId);
|
|
}
|
|
|
|
// Check if dependency already exists
|
|
if (!lineage.upstreamAssets.includes(upstreamAssetId)) {
|
|
lineage.upstreamAssets.push(upstreamAssetId);
|
|
|
|
if (transformation) {
|
|
lineage.transformations.push(transformation);
|
|
}
|
|
|
|
lineage.updatedAt = new Date();
|
|
this.lineages.set(assetId, lineage);
|
|
|
|
// Update downstream lineage of the upstream asset
|
|
await this.addDownstreamToUpstream(upstreamAssetId, assetId);
|
|
|
|
this.logger.info('Upstream dependency added', { assetId, upstreamAssetId });
|
|
|
|
await this.eventBus.emit('data.lineage.dependency.added', {
|
|
assetId,
|
|
upstreamAssetId,
|
|
transformation,
|
|
timestamp: new Date()
|
|
});
|
|
}
|
|
} catch (error) {
|
|
this.logger.error('Failed to add upstream dependency', { assetId, upstreamAssetId, error });
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
async addDownstreamDependency(
|
|
assetId: string,
|
|
downstreamAssetId: string,
|
|
transformation?: LineageTransformation
|
|
): Promise<void> {
|
|
try {
|
|
let lineage = this.lineages.get(assetId);
|
|
if (!lineage) {
|
|
lineage = this.createEmptyLineage(assetId);
|
|
}
|
|
|
|
// Check if dependency already exists
|
|
if (!lineage.downstreamAssets.includes(downstreamAssetId)) {
|
|
lineage.downstreamAssets.push(downstreamAssetId);
|
|
lineage.updatedAt = new Date();
|
|
this.lineages.set(assetId, lineage);
|
|
|
|
// Update upstream lineage of the downstream asset
|
|
await this.addUpstreamToDownstream(downstreamAssetId, assetId, transformation);
|
|
|
|
this.logger.info('Downstream dependency added', { assetId, downstreamAssetId });
|
|
|
|
await this.eventBus.emit('data.lineage.dependency.added', {
|
|
assetId,
|
|
downstreamAssetId,
|
|
transformation,
|
|
timestamp: new Date()
|
|
});
|
|
}
|
|
} catch (error) {
|
|
this.logger.error('Failed to add downstream dependency', { assetId, downstreamAssetId, error });
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
async removeUpstreamDependency(assetId: string, upstreamAssetId: string): Promise<void> {
|
|
try {
|
|
const lineage = this.lineages.get(assetId);
|
|
if (lineage) {
|
|
lineage.upstreamAssets = lineage.upstreamAssets.filter(id => id !== upstreamAssetId);
|
|
lineage.updatedAt = new Date();
|
|
this.lineages.set(assetId, lineage);
|
|
|
|
// Remove from downstream lineage of upstream asset
|
|
await this.removeDownstreamFromUpstream(upstreamAssetId, assetId);
|
|
|
|
this.logger.info('Upstream dependency removed', { assetId, upstreamAssetId });
|
|
|
|
await this.eventBus.emit('data.lineage.dependency.removed', {
|
|
assetId,
|
|
upstreamAssetId,
|
|
timestamp: new Date()
|
|
});
|
|
}
|
|
} catch (error) {
|
|
this.logger.error('Failed to remove upstream dependency', { assetId, upstreamAssetId, error });
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
async removeDownstreamDependency(assetId: string, downstreamAssetId: string): Promise<void> {
|
|
try {
|
|
const lineage = this.lineages.get(assetId);
|
|
if (lineage) {
|
|
lineage.downstreamAssets = lineage.downstreamAssets.filter(id => id !== downstreamAssetId);
|
|
lineage.updatedAt = new Date();
|
|
this.lineages.set(assetId, lineage);
|
|
|
|
// Remove from upstream lineage of downstream asset
|
|
await this.removeUpstreamFromDownstream(downstreamAssetId, assetId);
|
|
|
|
this.logger.info('Downstream dependency removed', { assetId, downstreamAssetId });
|
|
|
|
await this.eventBus.emit('data.lineage.dependency.removed', {
|
|
assetId,
|
|
downstreamAssetId,
|
|
timestamp: new Date()
|
|
});
|
|
}
|
|
} catch (error) {
|
|
this.logger.error('Failed to remove downstream dependency', { assetId, downstreamAssetId, error });
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
async getUpstreamAssets(assetId: string, depth: number = 1): Promise<DataAsset[]> {
|
|
try {
|
|
const visited = new Set<string>();
|
|
const result: DataAsset[] = [];
|
|
|
|
await this.traverseUpstream(assetId, depth, visited, result);
|
|
|
|
return result;
|
|
} catch (error) {
|
|
this.logger.error('Failed to get upstream assets', { assetId, depth, error });
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
async getDownstreamAssets(assetId: string, depth: number = 1): Promise<DataAsset[]> {
|
|
try {
|
|
const visited = new Set<string>();
|
|
const result: DataAsset[] = [];
|
|
|
|
await this.traverseDownstream(assetId, depth, visited, result);
|
|
|
|
return result;
|
|
} catch (error) {
|
|
this.logger.error('Failed to get downstream assets', { assetId, depth, error });
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
async analyzeImpact(assetId: string): Promise<ImpactAnalysis> {
|
|
try {
|
|
const downstreamAssets = await this.getDownstreamAssets(assetId, 5); // Go deep for impact analysis
|
|
const affectedUsers = new Set<string>();
|
|
|
|
// Collect all users who might be affected
|
|
for (const asset of downstreamAssets) {
|
|
affectedUsers.add(asset.owner);
|
|
if (asset.steward) {
|
|
affectedUsers.add(asset.steward);
|
|
}
|
|
// Add users from usage analytics
|
|
asset.usage.topUsers.forEach(user => affectedUsers.add(user.userId));
|
|
}
|
|
|
|
// Calculate impact level
|
|
let estimatedImpact: 'low' | 'medium' | 'high' | 'critical' = 'low';
|
|
if (downstreamAssets.length > 20) {
|
|
estimatedImpact = 'critical';
|
|
} else if (downstreamAssets.length > 10) {
|
|
estimatedImpact = 'high';
|
|
} else if (downstreamAssets.length > 5) {
|
|
estimatedImpact = 'medium';
|
|
}
|
|
|
|
const impact: ImpactAnalysis = {
|
|
downstreamAssets: downstreamAssets.map(asset => asset.id),
|
|
affectedUsers: Array.from(affectedUsers),
|
|
estimatedImpact,
|
|
impactDescription: this.generateImpactDescription(downstreamAssets.length, Array.from(affectedUsers).length),
|
|
recommendations: this.generateRecommendations(estimatedImpact, downstreamAssets.length)
|
|
};
|
|
|
|
this.logger.info('Impact analysis completed', {
|
|
assetId,
|
|
impactLevel: estimatedImpact,
|
|
affectedAssets: downstreamAssets.length,
|
|
affectedUsers: affectedUsers.size
|
|
});
|
|
|
|
return impact;
|
|
} catch (error) {
|
|
this.logger.error('Failed to analyze impact', { assetId, error });
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
async queryLineage(query: LineageQuery): Promise<DataAsset[]> {
|
|
try {
|
|
let results: DataAsset[] = [];
|
|
|
|
if (query.assetIds) {
|
|
for (const assetId of query.assetIds) {
|
|
if (query.direction === 'upstream' || query.direction === 'both') {
|
|
const upstream = await this.getUpstreamAssets(assetId, query.depth);
|
|
results.push(...upstream);
|
|
}
|
|
if (query.direction === 'downstream' || query.direction === 'both') {
|
|
const downstream = await this.getDownstreamAssets(assetId, query.depth);
|
|
results.push(...downstream);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Remove duplicates
|
|
const uniqueResults = results.filter((asset, index, arr) =>
|
|
arr.findIndex(a => a.id === asset.id) === index
|
|
);
|
|
|
|
return uniqueResults;
|
|
} catch (error) {
|
|
this.logger.error('Failed to query lineage', { query, error });
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
async getLineageGraph(assetId: string, direction: LineageDirection, depth: number = 3): Promise<any> {
|
|
try {
|
|
const graph = {
|
|
nodes: new Map(),
|
|
edges: []
|
|
};
|
|
|
|
const visited = new Set<string>();
|
|
await this.buildLineageGraph(assetId, direction, depth, visited, graph);
|
|
|
|
return {
|
|
nodes: Array.from(graph.nodes.values()),
|
|
edges: graph.edges
|
|
};
|
|
} catch (error) {
|
|
this.logger.error('Failed to get lineage graph', { assetId, direction, depth, error });
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
async detectCircularDependencies(): Promise<string[][]> {
|
|
try {
|
|
const cycles: string[][] = [];
|
|
const visited = new Set<string>();
|
|
const recursionStack = new Set<string>();
|
|
|
|
for (const assetId of this.lineages.keys()) {
|
|
if (!visited.has(assetId)) {
|
|
const path: string[] = [];
|
|
await this.detectCycleDFS(assetId, visited, recursionStack, path, cycles);
|
|
}
|
|
}
|
|
|
|
if (cycles.length > 0) {
|
|
this.logger.warn('Circular dependencies detected', { cycleCount: cycles.length });
|
|
}
|
|
|
|
return cycles;
|
|
} catch (error) {
|
|
this.logger.error('Failed to detect circular dependencies', { error });
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
// Private helper methods
|
|
private createEmptyLineage(assetId: string): DataLineage {
|
|
return {
|
|
id: this.generateId(),
|
|
assetId,
|
|
upstreamAssets: [],
|
|
downstreamAssets: [],
|
|
transformations: [],
|
|
impact: {
|
|
downstreamAssets: [],
|
|
affectedUsers: [],
|
|
estimatedImpact: 'low',
|
|
impactDescription: '',
|
|
recommendations: []
|
|
},
|
|
createdAt: new Date(),
|
|
updatedAt: new Date()
|
|
};
|
|
}
|
|
|
|
private async addDownstreamToUpstream(upstreamAssetId: string, downstreamAssetId: string): Promise<void> {
|
|
let upstreamLineage = this.lineages.get(upstreamAssetId);
|
|
if (!upstreamLineage) {
|
|
upstreamLineage = this.createEmptyLineage(upstreamAssetId);
|
|
}
|
|
|
|
if (!upstreamLineage.downstreamAssets.includes(downstreamAssetId)) {
|
|
upstreamLineage.downstreamAssets.push(downstreamAssetId);
|
|
upstreamLineage.updatedAt = new Date();
|
|
this.lineages.set(upstreamAssetId, upstreamLineage);
|
|
}
|
|
}
|
|
|
|
private async addUpstreamToDownstream(
|
|
downstreamAssetId: string,
|
|
upstreamAssetId: string,
|
|
transformation?: LineageTransformation
|
|
): Promise<void> {
|
|
let downstreamLineage = this.lineages.get(downstreamAssetId);
|
|
if (!downstreamLineage) {
|
|
downstreamLineage = this.createEmptyLineage(downstreamAssetId);
|
|
}
|
|
|
|
if (!downstreamLineage.upstreamAssets.includes(upstreamAssetId)) {
|
|
downstreamLineage.upstreamAssets.push(upstreamAssetId);
|
|
|
|
if (transformation) {
|
|
downstreamLineage.transformations.push(transformation);
|
|
}
|
|
|
|
downstreamLineage.updatedAt = new Date();
|
|
this.lineages.set(downstreamAssetId, downstreamLineage);
|
|
}
|
|
}
|
|
|
|
private async removeDownstreamFromUpstream(upstreamAssetId: string, downstreamAssetId: string): Promise<void> {
|
|
const upstreamLineage = this.lineages.get(upstreamAssetId);
|
|
if (upstreamLineage) {
|
|
upstreamLineage.downstreamAssets = upstreamLineage.downstreamAssets.filter(id => id !== downstreamAssetId);
|
|
upstreamLineage.updatedAt = new Date();
|
|
this.lineages.set(upstreamAssetId, upstreamLineage);
|
|
}
|
|
}
|
|
|
|
private async removeUpstreamFromDownstream(downstreamAssetId: string, upstreamAssetId: string): Promise<void> {
|
|
const downstreamLineage = this.lineages.get(downstreamAssetId);
|
|
if (downstreamLineage) {
|
|
downstreamLineage.upstreamAssets = downstreamLineage.upstreamAssets.filter(id => id !== upstreamAssetId);
|
|
downstreamLineage.updatedAt = new Date();
|
|
this.lineages.set(downstreamAssetId, downstreamLineage);
|
|
}
|
|
}
|
|
|
|
private async traverseUpstream(
|
|
assetId: string,
|
|
remainingDepth: number,
|
|
visited: Set<string>,
|
|
result: DataAsset[]
|
|
): Promise<void> {
|
|
if (remainingDepth === 0 || visited.has(assetId)) {
|
|
return;
|
|
}
|
|
|
|
visited.add(assetId);
|
|
const lineage = this.lineages.get(assetId);
|
|
|
|
if (lineage) {
|
|
for (const upstreamId of lineage.upstreamAssets) {
|
|
const asset = this.assets.get(upstreamId);
|
|
if (asset && !result.find(a => a.id === asset.id)) {
|
|
result.push(asset);
|
|
}
|
|
await this.traverseUpstream(upstreamId, remainingDepth - 1, visited, result);
|
|
}
|
|
}
|
|
}
|
|
|
|
private async traverseDownstream(
|
|
assetId: string,
|
|
remainingDepth: number,
|
|
visited: Set<string>,
|
|
result: DataAsset[]
|
|
): Promise<void> {
|
|
if (remainingDepth === 0 || visited.has(assetId)) {
|
|
return;
|
|
}
|
|
|
|
visited.add(assetId);
|
|
const lineage = this.lineages.get(assetId);
|
|
|
|
if (lineage) {
|
|
for (const downstreamId of lineage.downstreamAssets) {
|
|
const asset = this.assets.get(downstreamId);
|
|
if (asset && !result.find(a => a.id === asset.id)) {
|
|
result.push(asset);
|
|
}
|
|
await this.traverseDownstream(downstreamId, remainingDepth - 1, visited, result);
|
|
}
|
|
}
|
|
}
|
|
|
|
private async buildLineageGraph(
|
|
assetId: string,
|
|
direction: LineageDirection,
|
|
remainingDepth: number,
|
|
visited: Set<string>,
|
|
graph: any
|
|
): Promise<void> {
|
|
if (remainingDepth === 0 || visited.has(assetId)) {
|
|
return;
|
|
}
|
|
|
|
visited.add(assetId);
|
|
const asset = this.assets.get(assetId);
|
|
const lineage = this.lineages.get(assetId);
|
|
|
|
if (asset) {
|
|
graph.nodes.set(assetId, {
|
|
id: assetId,
|
|
name: asset.name,
|
|
type: asset.type,
|
|
classification: asset.classification
|
|
});
|
|
}
|
|
|
|
if (lineage) {
|
|
if (direction === 'upstream' || direction === 'both') {
|
|
for (const upstreamId of lineage.upstreamAssets) {
|
|
graph.edges.push({
|
|
source: upstreamId,
|
|
target: assetId,
|
|
type: 'upstream'
|
|
});
|
|
await this.buildLineageGraph(upstreamId, direction, remainingDepth - 1, visited, graph);
|
|
}
|
|
}
|
|
|
|
if (direction === 'downstream' || direction === 'both') {
|
|
for (const downstreamId of lineage.downstreamAssets) {
|
|
graph.edges.push({
|
|
source: assetId,
|
|
target: downstreamId,
|
|
type: 'downstream'
|
|
});
|
|
await this.buildLineageGraph(downstreamId, direction, remainingDepth - 1, visited, graph);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
private async detectCycleDFS(
|
|
assetId: string,
|
|
visited: Set<string>,
|
|
recursionStack: Set<string>,
|
|
path: string[],
|
|
cycles: string[][]
|
|
): Promise<void> {
|
|
visited.add(assetId);
|
|
recursionStack.add(assetId);
|
|
path.push(assetId);
|
|
|
|
const lineage = this.lineages.get(assetId);
|
|
if (lineage) {
|
|
for (const downstreamId of lineage.downstreamAssets) {
|
|
if (!visited.has(downstreamId)) {
|
|
await this.detectCycleDFS(downstreamId, visited, recursionStack, path, cycles);
|
|
} else if (recursionStack.has(downstreamId)) {
|
|
// Found a cycle
|
|
const cycleStart = path.indexOf(downstreamId);
|
|
cycles.push(path.slice(cycleStart));
|
|
}
|
|
}
|
|
}
|
|
|
|
path.pop();
|
|
recursionStack.delete(assetId);
|
|
}
|
|
|
|
private generateImpactDescription(assetCount: number, userCount: number): string {
|
|
if (assetCount === 0) {
|
|
return 'No downstream dependencies identified.';
|
|
}
|
|
|
|
return `Changes to this asset may affect ${assetCount} downstream asset(s) and ${userCount} user(s).`;
|
|
}
|
|
|
|
private generateRecommendations(impact: string, assetCount: number): string[] {
|
|
const recommendations: string[] = [];
|
|
|
|
if (impact === 'critical') {
|
|
recommendations.push('Schedule maintenance window');
|
|
recommendations.push('Notify all stakeholders in advance');
|
|
recommendations.push('Prepare rollback plan');
|
|
recommendations.push('Consider phased rollout');
|
|
} else if (impact === 'high') {
|
|
recommendations.push('Notify affected users');
|
|
recommendations.push('Test changes thoroughly');
|
|
recommendations.push('Monitor downstream systems');
|
|
} else if (impact === 'medium') {
|
|
recommendations.push('Test with subset of data');
|
|
recommendations.push('Monitor for issues');
|
|
} else {
|
|
recommendations.push('Standard testing procedures apply');
|
|
}
|
|
|
|
return recommendations;
|
|
}
|
|
|
|
private generateId(): string {
|
|
return `lineage_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
|
|
}
|
|
|
|
// Method to inject assets (typically from DataCatalogService)
|
|
setAssets(assets: Map<string, DataAsset>): void {
|
|
this.assets = assets;
|
|
}
|
|
}
|