adding data-services
This commit is contained in:
parent
e3bfd05b90
commit
405b818c86
139 changed files with 55943 additions and 416 deletions
|
|
@ -0,0 +1,607 @@
|
|||
import { EventBus } from '@stock-bot/event-bus';
|
||||
import { Logger } from '@stock-bot/utils';
|
||||
import {
|
||||
DataLineage,
|
||||
DataAsset,
|
||||
LineageTransformation,
|
||||
ImpactAnalysis,
|
||||
LineageQuery,
|
||||
LineageDirection
|
||||
} from '../types/DataCatalog';
|
||||
|
||||
export interface DataLineageService {
|
||||
addLineage(lineage: DataLineage): Promise<void>;
|
||||
getLineage(assetId: string): Promise<DataLineage | null>;
|
||||
updateLineage(assetId: string, lineage: Partial<DataLineage>): Promise<DataLineage | null>;
|
||||
addUpstreamDependency(assetId: string, upstreamAssetId: string, transformation?: LineageTransformation): Promise<void>;
|
||||
addDownstreamDependency(assetId: string, downstreamAssetId: string, transformation?: LineageTransformation): Promise<void>;
|
||||
removeUpstreamDependency(assetId: string, upstreamAssetId: string): Promise<void>;
|
||||
removeDownstreamDependency(assetId: string, downstreamAssetId: string): Promise<void>;
|
||||
getUpstreamAssets(assetId: string, depth?: number): Promise<DataAsset[]>;
|
||||
getDownstreamAssets(assetId: string, depth?: number): Promise<DataAsset[]>;
|
||||
analyzeImpact(assetId: string): Promise<ImpactAnalysis>;
|
||||
queryLineage(query: LineageQuery): Promise<DataAsset[]>;
|
||||
getLineageGraph(assetId: string, direction: LineageDirection, depth?: number): Promise<any>;
|
||||
detectCircularDependencies(): Promise<string[][]>;
|
||||
}
|
||||
|
||||
export class DataLineageServiceImpl implements DataLineageService {
|
||||
private lineages: Map<string, DataLineage> = new Map();
|
||||
private assets: Map<string, DataAsset> = new Map();
|
||||
|
||||
constructor(
|
||||
private eventBus: EventBus,
|
||||
private logger: Logger
|
||||
) {}
|
||||
|
||||
async addLineage(lineage: DataLineage): Promise<void> {
|
||||
try {
|
||||
this.lineages.set(lineage.assetId, lineage);
|
||||
|
||||
this.logger.info('Data lineage added', {
|
||||
assetId: lineage.assetId,
|
||||
upstreamCount: lineage.upstreamAssets.length,
|
||||
downstreamCount: lineage.downstreamAssets.length
|
||||
});
|
||||
|
||||
await this.eventBus.emit('data.lineage.added', {
|
||||
assetId: lineage.assetId,
|
||||
lineage,
|
||||
timestamp: new Date()
|
||||
});
|
||||
} catch (error) {
|
||||
this.logger.error('Failed to add data lineage', { lineage, error });
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
async getLineage(assetId: string): Promise<DataLineage | null> {
|
||||
try {
|
||||
return this.lineages.get(assetId) || null;
|
||||
} catch (error) {
|
||||
this.logger.error('Failed to get data lineage', { assetId, error });
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
async updateLineage(assetId: string, lineage: Partial<DataLineage>): Promise<DataLineage | null> {
|
||||
try {
|
||||
const existingLineage = this.lineages.get(assetId);
|
||||
if (!existingLineage) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const updatedLineage: DataLineage = {
|
||||
...existingLineage,
|
||||
...lineage,
|
||||
updatedAt: new Date()
|
||||
};
|
||||
|
||||
this.lineages.set(assetId, updatedLineage);
|
||||
|
||||
this.logger.info('Data lineage updated', { assetId, changes: lineage });
|
||||
|
||||
await this.eventBus.emit('data.lineage.updated', {
|
||||
assetId,
|
||||
lineage: updatedLineage,
|
||||
changes: lineage,
|
||||
timestamp: new Date()
|
||||
});
|
||||
|
||||
return updatedLineage;
|
||||
} catch (error) {
|
||||
this.logger.error('Failed to update data lineage', { assetId, lineage, error });
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
async addUpstreamDependency(
|
||||
assetId: string,
|
||||
upstreamAssetId: string,
|
||||
transformation?: LineageTransformation
|
||||
): Promise<void> {
|
||||
try {
|
||||
let lineage = this.lineages.get(assetId);
|
||||
if (!lineage) {
|
||||
lineage = this.createEmptyLineage(assetId);
|
||||
}
|
||||
|
||||
// Check if dependency already exists
|
||||
if (!lineage.upstreamAssets.includes(upstreamAssetId)) {
|
||||
lineage.upstreamAssets.push(upstreamAssetId);
|
||||
|
||||
if (transformation) {
|
||||
lineage.transformations.push(transformation);
|
||||
}
|
||||
|
||||
lineage.updatedAt = new Date();
|
||||
this.lineages.set(assetId, lineage);
|
||||
|
||||
// Update downstream lineage of the upstream asset
|
||||
await this.addDownstreamToUpstream(upstreamAssetId, assetId);
|
||||
|
||||
this.logger.info('Upstream dependency added', { assetId, upstreamAssetId });
|
||||
|
||||
await this.eventBus.emit('data.lineage.dependency.added', {
|
||||
assetId,
|
||||
upstreamAssetId,
|
||||
transformation,
|
||||
timestamp: new Date()
|
||||
});
|
||||
}
|
||||
} catch (error) {
|
||||
this.logger.error('Failed to add upstream dependency', { assetId, upstreamAssetId, error });
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
async addDownstreamDependency(
|
||||
assetId: string,
|
||||
downstreamAssetId: string,
|
||||
transformation?: LineageTransformation
|
||||
): Promise<void> {
|
||||
try {
|
||||
let lineage = this.lineages.get(assetId);
|
||||
if (!lineage) {
|
||||
lineage = this.createEmptyLineage(assetId);
|
||||
}
|
||||
|
||||
// Check if dependency already exists
|
||||
if (!lineage.downstreamAssets.includes(downstreamAssetId)) {
|
||||
lineage.downstreamAssets.push(downstreamAssetId);
|
||||
lineage.updatedAt = new Date();
|
||||
this.lineages.set(assetId, lineage);
|
||||
|
||||
// Update upstream lineage of the downstream asset
|
||||
await this.addUpstreamToDownstream(downstreamAssetId, assetId, transformation);
|
||||
|
||||
this.logger.info('Downstream dependency added', { assetId, downstreamAssetId });
|
||||
|
||||
await this.eventBus.emit('data.lineage.dependency.added', {
|
||||
assetId,
|
||||
downstreamAssetId,
|
||||
transformation,
|
||||
timestamp: new Date()
|
||||
});
|
||||
}
|
||||
} catch (error) {
|
||||
this.logger.error('Failed to add downstream dependency', { assetId, downstreamAssetId, error });
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
async removeUpstreamDependency(assetId: string, upstreamAssetId: string): Promise<void> {
|
||||
try {
|
||||
const lineage = this.lineages.get(assetId);
|
||||
if (lineage) {
|
||||
lineage.upstreamAssets = lineage.upstreamAssets.filter(id => id !== upstreamAssetId);
|
||||
lineage.updatedAt = new Date();
|
||||
this.lineages.set(assetId, lineage);
|
||||
|
||||
// Remove from downstream lineage of upstream asset
|
||||
await this.removeDownstreamFromUpstream(upstreamAssetId, assetId);
|
||||
|
||||
this.logger.info('Upstream dependency removed', { assetId, upstreamAssetId });
|
||||
|
||||
await this.eventBus.emit('data.lineage.dependency.removed', {
|
||||
assetId,
|
||||
upstreamAssetId,
|
||||
timestamp: new Date()
|
||||
});
|
||||
}
|
||||
} catch (error) {
|
||||
this.logger.error('Failed to remove upstream dependency', { assetId, upstreamAssetId, error });
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
async removeDownstreamDependency(assetId: string, downstreamAssetId: string): Promise<void> {
|
||||
try {
|
||||
const lineage = this.lineages.get(assetId);
|
||||
if (lineage) {
|
||||
lineage.downstreamAssets = lineage.downstreamAssets.filter(id => id !== downstreamAssetId);
|
||||
lineage.updatedAt = new Date();
|
||||
this.lineages.set(assetId, lineage);
|
||||
|
||||
// Remove from upstream lineage of downstream asset
|
||||
await this.removeUpstreamFromDownstream(downstreamAssetId, assetId);
|
||||
|
||||
this.logger.info('Downstream dependency removed', { assetId, downstreamAssetId });
|
||||
|
||||
await this.eventBus.emit('data.lineage.dependency.removed', {
|
||||
assetId,
|
||||
downstreamAssetId,
|
||||
timestamp: new Date()
|
||||
});
|
||||
}
|
||||
} catch (error) {
|
||||
this.logger.error('Failed to remove downstream dependency', { assetId, downstreamAssetId, error });
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
async getUpstreamAssets(assetId: string, depth: number = 1): Promise<DataAsset[]> {
|
||||
try {
|
||||
const visited = new Set<string>();
|
||||
const result: DataAsset[] = [];
|
||||
|
||||
await this.traverseUpstream(assetId, depth, visited, result);
|
||||
|
||||
return result;
|
||||
} catch (error) {
|
||||
this.logger.error('Failed to get upstream assets', { assetId, depth, error });
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
async getDownstreamAssets(assetId: string, depth: number = 1): Promise<DataAsset[]> {
|
||||
try {
|
||||
const visited = new Set<string>();
|
||||
const result: DataAsset[] = [];
|
||||
|
||||
await this.traverseDownstream(assetId, depth, visited, result);
|
||||
|
||||
return result;
|
||||
} catch (error) {
|
||||
this.logger.error('Failed to get downstream assets', { assetId, depth, error });
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
async analyzeImpact(assetId: string): Promise<ImpactAnalysis> {
|
||||
try {
|
||||
const downstreamAssets = await this.getDownstreamAssets(assetId, 5); // Go deep for impact analysis
|
||||
const affectedUsers = new Set<string>();
|
||||
|
||||
// Collect all users who might be affected
|
||||
for (const asset of downstreamAssets) {
|
||||
affectedUsers.add(asset.owner);
|
||||
if (asset.steward) {
|
||||
affectedUsers.add(asset.steward);
|
||||
}
|
||||
// Add users from usage analytics
|
||||
asset.usage.topUsers.forEach(user => affectedUsers.add(user.userId));
|
||||
}
|
||||
|
||||
// Calculate impact level
|
||||
let estimatedImpact: 'low' | 'medium' | 'high' | 'critical' = 'low';
|
||||
if (downstreamAssets.length > 20) {
|
||||
estimatedImpact = 'critical';
|
||||
} else if (downstreamAssets.length > 10) {
|
||||
estimatedImpact = 'high';
|
||||
} else if (downstreamAssets.length > 5) {
|
||||
estimatedImpact = 'medium';
|
||||
}
|
||||
|
||||
const impact: ImpactAnalysis = {
|
||||
downstreamAssets: downstreamAssets.map(asset => asset.id),
|
||||
affectedUsers: Array.from(affectedUsers),
|
||||
estimatedImpact,
|
||||
impactDescription: this.generateImpactDescription(downstreamAssets.length, Array.from(affectedUsers).length),
|
||||
recommendations: this.generateRecommendations(estimatedImpact, downstreamAssets.length)
|
||||
};
|
||||
|
||||
this.logger.info('Impact analysis completed', {
|
||||
assetId,
|
||||
impactLevel: estimatedImpact,
|
||||
affectedAssets: downstreamAssets.length,
|
||||
affectedUsers: affectedUsers.size
|
||||
});
|
||||
|
||||
return impact;
|
||||
} catch (error) {
|
||||
this.logger.error('Failed to analyze impact', { assetId, error });
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
async queryLineage(query: LineageQuery): Promise<DataAsset[]> {
|
||||
try {
|
||||
let results: DataAsset[] = [];
|
||||
|
||||
if (query.assetIds) {
|
||||
for (const assetId of query.assetIds) {
|
||||
if (query.direction === 'upstream' || query.direction === 'both') {
|
||||
const upstream = await this.getUpstreamAssets(assetId, query.depth);
|
||||
results.push(...upstream);
|
||||
}
|
||||
if (query.direction === 'downstream' || query.direction === 'both') {
|
||||
const downstream = await this.getDownstreamAssets(assetId, query.depth);
|
||||
results.push(...downstream);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Remove duplicates
|
||||
const uniqueResults = results.filter((asset, index, arr) =>
|
||||
arr.findIndex(a => a.id === asset.id) === index
|
||||
);
|
||||
|
||||
return uniqueResults;
|
||||
} catch (error) {
|
||||
this.logger.error('Failed to query lineage', { query, error });
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
async getLineageGraph(assetId: string, direction: LineageDirection, depth: number = 3): Promise<any> {
|
||||
try {
|
||||
const graph = {
|
||||
nodes: new Map(),
|
||||
edges: []
|
||||
};
|
||||
|
||||
const visited = new Set<string>();
|
||||
await this.buildLineageGraph(assetId, direction, depth, visited, graph);
|
||||
|
||||
return {
|
||||
nodes: Array.from(graph.nodes.values()),
|
||||
edges: graph.edges
|
||||
};
|
||||
} catch (error) {
|
||||
this.logger.error('Failed to get lineage graph', { assetId, direction, depth, error });
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
async detectCircularDependencies(): Promise<string[][]> {
|
||||
try {
|
||||
const cycles: string[][] = [];
|
||||
const visited = new Set<string>();
|
||||
const recursionStack = new Set<string>();
|
||||
|
||||
for (const assetId of this.lineages.keys()) {
|
||||
if (!visited.has(assetId)) {
|
||||
const path: string[] = [];
|
||||
await this.detectCycleDFS(assetId, visited, recursionStack, path, cycles);
|
||||
}
|
||||
}
|
||||
|
||||
if (cycles.length > 0) {
|
||||
this.logger.warn('Circular dependencies detected', { cycleCount: cycles.length });
|
||||
}
|
||||
|
||||
return cycles;
|
||||
} catch (error) {
|
||||
this.logger.error('Failed to detect circular dependencies', { error });
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
// Private helper methods
|
||||
private createEmptyLineage(assetId: string): DataLineage {
|
||||
return {
|
||||
id: this.generateId(),
|
||||
assetId,
|
||||
upstreamAssets: [],
|
||||
downstreamAssets: [],
|
||||
transformations: [],
|
||||
impact: {
|
||||
downstreamAssets: [],
|
||||
affectedUsers: [],
|
||||
estimatedImpact: 'low',
|
||||
impactDescription: '',
|
||||
recommendations: []
|
||||
},
|
||||
createdAt: new Date(),
|
||||
updatedAt: new Date()
|
||||
};
|
||||
}
|
||||
|
||||
private async addDownstreamToUpstream(upstreamAssetId: string, downstreamAssetId: string): Promise<void> {
|
||||
let upstreamLineage = this.lineages.get(upstreamAssetId);
|
||||
if (!upstreamLineage) {
|
||||
upstreamLineage = this.createEmptyLineage(upstreamAssetId);
|
||||
}
|
||||
|
||||
if (!upstreamLineage.downstreamAssets.includes(downstreamAssetId)) {
|
||||
upstreamLineage.downstreamAssets.push(downstreamAssetId);
|
||||
upstreamLineage.updatedAt = new Date();
|
||||
this.lineages.set(upstreamAssetId, upstreamLineage);
|
||||
}
|
||||
}
|
||||
|
||||
private async addUpstreamToDownstream(
|
||||
downstreamAssetId: string,
|
||||
upstreamAssetId: string,
|
||||
transformation?: LineageTransformation
|
||||
): Promise<void> {
|
||||
let downstreamLineage = this.lineages.get(downstreamAssetId);
|
||||
if (!downstreamLineage) {
|
||||
downstreamLineage = this.createEmptyLineage(downstreamAssetId);
|
||||
}
|
||||
|
||||
if (!downstreamLineage.upstreamAssets.includes(upstreamAssetId)) {
|
||||
downstreamLineage.upstreamAssets.push(upstreamAssetId);
|
||||
|
||||
if (transformation) {
|
||||
downstreamLineage.transformations.push(transformation);
|
||||
}
|
||||
|
||||
downstreamLineage.updatedAt = new Date();
|
||||
this.lineages.set(downstreamAssetId, downstreamLineage);
|
||||
}
|
||||
}
|
||||
|
||||
private async removeDownstreamFromUpstream(upstreamAssetId: string, downstreamAssetId: string): Promise<void> {
|
||||
const upstreamLineage = this.lineages.get(upstreamAssetId);
|
||||
if (upstreamLineage) {
|
||||
upstreamLineage.downstreamAssets = upstreamLineage.downstreamAssets.filter(id => id !== downstreamAssetId);
|
||||
upstreamLineage.updatedAt = new Date();
|
||||
this.lineages.set(upstreamAssetId, upstreamLineage);
|
||||
}
|
||||
}
|
||||
|
||||
private async removeUpstreamFromDownstream(downstreamAssetId: string, upstreamAssetId: string): Promise<void> {
|
||||
const downstreamLineage = this.lineages.get(downstreamAssetId);
|
||||
if (downstreamLineage) {
|
||||
downstreamLineage.upstreamAssets = downstreamLineage.upstreamAssets.filter(id => id !== upstreamAssetId);
|
||||
downstreamLineage.updatedAt = new Date();
|
||||
this.lineages.set(downstreamAssetId, downstreamLineage);
|
||||
}
|
||||
}
|
||||
|
||||
private async traverseUpstream(
|
||||
assetId: string,
|
||||
remainingDepth: number,
|
||||
visited: Set<string>,
|
||||
result: DataAsset[]
|
||||
): Promise<void> {
|
||||
if (remainingDepth === 0 || visited.has(assetId)) {
|
||||
return;
|
||||
}
|
||||
|
||||
visited.add(assetId);
|
||||
const lineage = this.lineages.get(assetId);
|
||||
|
||||
if (lineage) {
|
||||
for (const upstreamId of lineage.upstreamAssets) {
|
||||
const asset = this.assets.get(upstreamId);
|
||||
if (asset && !result.find(a => a.id === asset.id)) {
|
||||
result.push(asset);
|
||||
}
|
||||
await this.traverseUpstream(upstreamId, remainingDepth - 1, visited, result);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private async traverseDownstream(
|
||||
assetId: string,
|
||||
remainingDepth: number,
|
||||
visited: Set<string>,
|
||||
result: DataAsset[]
|
||||
): Promise<void> {
|
||||
if (remainingDepth === 0 || visited.has(assetId)) {
|
||||
return;
|
||||
}
|
||||
|
||||
visited.add(assetId);
|
||||
const lineage = this.lineages.get(assetId);
|
||||
|
||||
if (lineage) {
|
||||
for (const downstreamId of lineage.downstreamAssets) {
|
||||
const asset = this.assets.get(downstreamId);
|
||||
if (asset && !result.find(a => a.id === asset.id)) {
|
||||
result.push(asset);
|
||||
}
|
||||
await this.traverseDownstream(downstreamId, remainingDepth - 1, visited, result);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private async buildLineageGraph(
|
||||
assetId: string,
|
||||
direction: LineageDirection,
|
||||
remainingDepth: number,
|
||||
visited: Set<string>,
|
||||
graph: any
|
||||
): Promise<void> {
|
||||
if (remainingDepth === 0 || visited.has(assetId)) {
|
||||
return;
|
||||
}
|
||||
|
||||
visited.add(assetId);
|
||||
const asset = this.assets.get(assetId);
|
||||
const lineage = this.lineages.get(assetId);
|
||||
|
||||
if (asset) {
|
||||
graph.nodes.set(assetId, {
|
||||
id: assetId,
|
||||
name: asset.name,
|
||||
type: asset.type,
|
||||
classification: asset.classification
|
||||
});
|
||||
}
|
||||
|
||||
if (lineage) {
|
||||
if (direction === 'upstream' || direction === 'both') {
|
||||
for (const upstreamId of lineage.upstreamAssets) {
|
||||
graph.edges.push({
|
||||
source: upstreamId,
|
||||
target: assetId,
|
||||
type: 'upstream'
|
||||
});
|
||||
await this.buildLineageGraph(upstreamId, direction, remainingDepth - 1, visited, graph);
|
||||
}
|
||||
}
|
||||
|
||||
if (direction === 'downstream' || direction === 'both') {
|
||||
for (const downstreamId of lineage.downstreamAssets) {
|
||||
graph.edges.push({
|
||||
source: assetId,
|
||||
target: downstreamId,
|
||||
type: 'downstream'
|
||||
});
|
||||
await this.buildLineageGraph(downstreamId, direction, remainingDepth - 1, visited, graph);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private async detectCycleDFS(
|
||||
assetId: string,
|
||||
visited: Set<string>,
|
||||
recursionStack: Set<string>,
|
||||
path: string[],
|
||||
cycles: string[][]
|
||||
): Promise<void> {
|
||||
visited.add(assetId);
|
||||
recursionStack.add(assetId);
|
||||
path.push(assetId);
|
||||
|
||||
const lineage = this.lineages.get(assetId);
|
||||
if (lineage) {
|
||||
for (const downstreamId of lineage.downstreamAssets) {
|
||||
if (!visited.has(downstreamId)) {
|
||||
await this.detectCycleDFS(downstreamId, visited, recursionStack, path, cycles);
|
||||
} else if (recursionStack.has(downstreamId)) {
|
||||
// Found a cycle
|
||||
const cycleStart = path.indexOf(downstreamId);
|
||||
cycles.push(path.slice(cycleStart));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
path.pop();
|
||||
recursionStack.delete(assetId);
|
||||
}
|
||||
|
||||
private generateImpactDescription(assetCount: number, userCount: number): string {
|
||||
if (assetCount === 0) {
|
||||
return 'No downstream dependencies identified.';
|
||||
}
|
||||
|
||||
return `Changes to this asset may affect ${assetCount} downstream asset(s) and ${userCount} user(s).`;
|
||||
}
|
||||
|
||||
private generateRecommendations(impact: string, assetCount: number): string[] {
|
||||
const recommendations: string[] = [];
|
||||
|
||||
if (impact === 'critical') {
|
||||
recommendations.push('Schedule maintenance window');
|
||||
recommendations.push('Notify all stakeholders in advance');
|
||||
recommendations.push('Prepare rollback plan');
|
||||
recommendations.push('Consider phased rollout');
|
||||
} else if (impact === 'high') {
|
||||
recommendations.push('Notify affected users');
|
||||
recommendations.push('Test changes thoroughly');
|
||||
recommendations.push('Monitor downstream systems');
|
||||
} else if (impact === 'medium') {
|
||||
recommendations.push('Test with subset of data');
|
||||
recommendations.push('Monitor for issues');
|
||||
} else {
|
||||
recommendations.push('Standard testing procedures apply');
|
||||
}
|
||||
|
||||
return recommendations;
|
||||
}
|
||||
|
||||
private generateId(): string {
|
||||
return `lineage_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
|
||||
}
|
||||
|
||||
// Method to inject assets (typically from DataCatalogService)
|
||||
setAssets(assets: Map<string, DataAsset>): void {
|
||||
this.assets = assets;
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue