adding data-services

This commit is contained in:
Bojan Kucera 2025-06-03 07:42:48 -04:00
parent e3bfd05b90
commit 405b818c86
139 changed files with 55943 additions and 416 deletions

View file

@ -0,0 +1,360 @@
import { Context } from 'hono';
import { Logger } from '@stock-bot/utils';
import { DataCatalogService } from '../services/DataCatalogService';
import {
CreateDataAssetRequest,
UpdateDataAssetRequest,
DataAssetType,
DataClassification
} from '../types/DataCatalog';
export class DataCatalogController {
constructor(
private dataCatalogService: DataCatalogService,
private logger: Logger
) {}
async createAsset(c: Context) {
try {
const request: CreateDataAssetRequest = await c.req.json();
// Validate required fields
if (!request.name || !request.type || !request.description || !request.owner) {
return c.json({ error: 'Missing required fields: name, type, description, owner' }, 400);
}
const asset = await this.dataCatalogService.createAsset(request);
this.logger.info('Asset created via API', {
assetId: asset.id,
name: asset.name,
type: asset.type
});
return c.json(asset, 201);
} catch (error) {
this.logger.error('Failed to create asset', { error });
return c.json({ error: 'Internal server error' }, 500);
}
}
async getAsset(c: Context) {
try {
const assetId = c.req.param('id');
if (!assetId) {
return c.json({ error: 'Asset ID is required' }, 400);
}
const asset = await this.dataCatalogService.getAsset(assetId);
if (!asset) {
return c.json({ error: 'Asset not found' }, 404);
}
return c.json(asset);
} catch (error) {
this.logger.error('Failed to get asset', { error });
return c.json({ error: 'Internal server error' }, 500);
}
}
async updateAsset(c: Context) {
try {
const assetId = c.req.param('id');
const updates: UpdateDataAssetRequest = await c.req.json();
if (!assetId) {
return c.json({ error: 'Asset ID is required' }, 400);
}
const asset = await this.dataCatalogService.updateAsset(assetId, updates);
if (!asset) {
return c.json({ error: 'Asset not found' }, 404);
}
this.logger.info('Asset updated via API', {
assetId,
changes: Object.keys(updates)
});
return c.json(asset);
} catch (error) {
this.logger.error('Failed to update asset', { error });
return c.json({ error: 'Internal server error' }, 500);
}
}
async deleteAsset(c: Context) {
try {
const assetId = c.req.param('id');
if (!assetId) {
return c.json({ error: 'Asset ID is required' }, 400);
}
await this.dataCatalogService.deleteAsset(assetId);
this.logger.info('Asset deleted via API', { assetId });
return c.json({ message: 'Asset deleted successfully' });
} catch (error) {
this.logger.error('Failed to delete asset', { error });
if (error instanceof Error && error.message.includes('not found')) {
return c.json({ error: 'Asset not found' }, 404);
}
return c.json({ error: 'Internal server error' }, 500);
}
}
async listAssets(c: Context) {
try {
const query = c.req.query();
const filters: Record<string, any> = {};
// Parse query parameters
if (query.type) filters.type = query.type;
if (query.owner) filters.owner = query.owner;
if (query.classification) filters.classification = query.classification;
if (query.tags) {
filters.tags = Array.isArray(query.tags) ? query.tags : [query.tags];
}
const assets = await this.dataCatalogService.listAssets(filters);
return c.json({
assets,
total: assets.length,
filters: filters
});
} catch (error) {
this.logger.error('Failed to list assets', { error });
return c.json({ error: 'Internal server error' }, 500);
}
}
async searchAssets(c: Context) {
try {
const query = c.req.query('q');
const queryParams = c.req.query();
if (!query) {
return c.json({ error: 'Search query is required' }, 400);
}
const filters: Record<string, any> = {};
if (queryParams.type) filters.type = queryParams.type;
if (queryParams.owner) filters.owner = queryParams.owner;
if (queryParams.classification) filters.classification = queryParams.classification;
const assets = await this.dataCatalogService.searchAssets(query, filters);
this.logger.info('Asset search performed', {
query,
filters,
resultCount: assets.length
});
return c.json({
assets,
total: assets.length,
query,
filters
});
} catch (error) {
this.logger.error('Failed to search assets', { error });
return c.json({ error: 'Internal server error' }, 500);
}
}
async getAssetsByOwner(c: Context) {
try {
const owner = c.req.param('owner');
if (!owner) {
return c.json({ error: 'Owner is required' }, 400);
}
const assets = await this.dataCatalogService.getAssetsByOwner(owner);
return c.json({
assets,
total: assets.length,
owner
});
} catch (error) {
this.logger.error('Failed to get assets by owner', { error });
return c.json({ error: 'Internal server error' }, 500);
}
}
async getAssetsByType(c: Context) {
try {
const type = c.req.param('type') as DataAssetType;
if (!type) {
return c.json({ error: 'Asset type is required' }, 400);
}
if (!Object.values(DataAssetType).includes(type)) {
return c.json({ error: 'Invalid asset type' }, 400);
}
const assets = await this.dataCatalogService.getAssetsByType(type);
return c.json({
assets,
total: assets.length,
type
});
} catch (error) {
this.logger.error('Failed to get assets by type', { error });
return c.json({ error: 'Internal server error' }, 500);
}
}
async getAssetsByClassification(c: Context) {
try {
const classification = c.req.param('classification') as DataClassification;
if (!classification) {
return c.json({ error: 'Classification is required' }, 400);
}
if (!Object.values(DataClassification).includes(classification)) {
return c.json({ error: 'Invalid classification' }, 400);
}
const assets = await this.dataCatalogService.getAssetsByClassification(classification);
return c.json({
assets,
total: assets.length,
classification
});
} catch (error) {
this.logger.error('Failed to get assets by classification', { error });
return c.json({ error: 'Internal server error' }, 500);
}
}
async getAssetsByTags(c: Context) {
try {
const tagsParam = c.req.query('tags');
if (!tagsParam) {
return c.json({ error: 'Tags parameter is required' }, 400);
}
const tags = Array.isArray(tagsParam) ? tagsParam : [tagsParam];
const assets = await this.dataCatalogService.getAssetsByTags(tags);
return c.json({
assets,
total: assets.length,
tags
});
} catch (error) {
this.logger.error('Failed to get assets by tags', { error });
return c.json({ error: 'Internal server error' }, 500);
}
}
async getAssetMetrics(c: Context) {
try {
const assetId = c.req.param('id');
if (!assetId) {
return c.json({ error: 'Asset ID is required' }, 400);
}
const asset = await this.dataCatalogService.getAsset(assetId);
if (!asset) {
return c.json({ error: 'Asset not found' }, 404);
}
const metrics = {
id: asset.id,
name: asset.name,
type: asset.type,
classification: asset.classification,
usage: {
accessCount: asset.usage.accessCount,
uniqueUsers: asset.usage.uniqueUsers,
lastAccessed: asset.usage.lastAccessed,
usageTrend: asset.usage.usageTrend
},
quality: {
overallScore: asset.quality.overallScore,
lastAssessment: asset.quality.lastAssessment,
issueCount: asset.quality.issues.filter(issue => !issue.resolved).length
},
governance: {
policiesApplied: asset.governance.policies.length,
complianceStatus: asset.governance.compliance.every(c => c.status === 'passed') ? 'compliant' : 'non-compliant',
auditEntries: asset.governance.audit.length
},
lineage: {
upstreamCount: asset.lineage.upstreamAssets.length,
downstreamCount: asset.lineage.downstreamAssets.length
}
};
return c.json(metrics);
} catch (error) {
this.logger.error('Failed to get asset metrics', { error });
return c.json({ error: 'Internal server error' }, 500);
}
}
async getCatalogStatistics(c: Context) {
try {
const allAssets = await this.dataCatalogService.listAssets();
const statistics = {
totalAssets: allAssets.length,
assetsByType: this.groupByProperty(allAssets, 'type'),
assetsByClassification: this.groupByProperty(allAssets, 'classification'),
assetsByOwner: this.groupByProperty(allAssets, 'owner'),
recentAssets: allAssets
.sort((a, b) => b.createdAt.getTime() - a.createdAt.getTime())
.slice(0, 10)
.map(asset => ({
id: asset.id,
name: asset.name,
type: asset.type,
owner: asset.owner,
createdAt: asset.createdAt
})),
mostAccessed: allAssets
.sort((a, b) => b.usage.accessCount - a.usage.accessCount)
.slice(0, 10)
.map(asset => ({
id: asset.id,
name: asset.name,
type: asset.type,
accessCount: asset.usage.accessCount,
lastAccessed: asset.usage.lastAccessed
}))
};
return c.json(statistics);
} catch (error) {
this.logger.error('Failed to get catalog statistics', { error });
return c.json({ error: 'Internal server error' }, 500);
}
}
// Helper method to group assets by property
private groupByProperty(assets: any[], property: string): Record<string, number> {
return assets.reduce((acc, asset) => {
const value = asset[property];
acc[value] = (acc[value] || 0) + 1;
return acc;
}, {});
}
}

View file

@ -0,0 +1,414 @@
import { Hono } from 'hono';
import { DataGovernanceService } from '../services/DataGovernanceService';
import {
GovernancePolicy,
ComplianceCheck,
AccessRequest,
DataSubjectRequest,
AuditLog
} from '../types/DataCatalog';
export class GovernanceController {
private app: Hono;
private governanceService: DataGovernanceService;
constructor() {
this.app = new Hono();
this.governanceService = new DataGovernanceService();
this.setupRoutes();
}
private setupRoutes() {
// Create governance policy
this.app.post('/policies', async (c) => {
try {
const policy: Omit<GovernancePolicy, 'id' | 'createdAt' | 'updatedAt'> = await c.req.json();
const createdPolicy = await this.governanceService.createPolicy(policy);
return c.json({
success: true,
data: createdPolicy
});
} catch (error) {
console.error('Error creating governance policy:', error);
return c.json({
success: false,
error: error instanceof Error ? error.message : 'Unknown error'
}, 500);
}
});
// Get governance policies
this.app.get('/policies', async (c) => {
try {
const type = c.req.query('type');
const category = c.req.query('category');
const active = c.req.query('active') === 'true';
const filters: any = {};
if (type) filters.type = type;
if (category) filters.category = category;
if (active !== undefined) filters.active = active;
const policies = await this.governanceService.getPolicies(filters);
return c.json({
success: true,
data: policies
});
} catch (error) {
console.error('Error getting governance policies:', error);
return c.json({
success: false,
error: error instanceof Error ? error.message : 'Unknown error'
}, 500);
}
});
// Update governance policy
this.app.put('/policies/:policyId', async (c) => {
try {
const policyId = c.req.param('policyId');
const updates: Partial<GovernancePolicy> = await c.req.json();
const updatedPolicy = await this.governanceService.updatePolicy(policyId, updates);
return c.json({
success: true,
data: updatedPolicy
});
} catch (error) {
console.error('Error updating governance policy:', error);
return c.json({
success: false,
error: error instanceof Error ? error.message : 'Unknown error'
}, 500);
}
});
// Delete governance policy
this.app.delete('/policies/:policyId', async (c) => {
try {
const policyId = c.req.param('policyId');
await this.governanceService.deletePolicy(policyId);
return c.json({
success: true,
message: 'Governance policy deleted successfully'
});
} catch (error) {
console.error('Error deleting governance policy:', error);
return c.json({
success: false,
error: error instanceof Error ? error.message : 'Unknown error'
}, 500);
}
});
// Apply policy to asset
this.app.post('/policies/:policyId/apply/:assetId', async (c) => {
try {
const policyId = c.req.param('policyId');
const assetId = c.req.param('assetId');
await this.governanceService.applyPolicy(policyId, assetId);
return c.json({
success: true,
message: 'Policy applied successfully'
});
} catch (error) {
console.error('Error applying policy:', error);
return c.json({
success: false,
error: error instanceof Error ? error.message : 'Unknown error'
}, 500);
}
});
// Check compliance for asset
this.app.post('/compliance/check', async (c) => {
try {
const request: { assetId: string; policyIds?: string[] } = await c.req.json();
const complianceResult = await this.governanceService.checkCompliance(
request.assetId,
request.policyIds
);
return c.json({
success: true,
data: complianceResult
});
} catch (error) {
console.error('Error checking compliance:', error);
return c.json({
success: false,
error: error instanceof Error ? error.message : 'Unknown error'
}, 500);
}
});
// Get compliance violations
this.app.get('/compliance/violations', async (c) => {
try {
const assetId = c.req.query('assetId');
const severity = c.req.query('severity');
const status = c.req.query('status');
const limit = c.req.query('limit') ? parseInt(c.req.query('limit')!) : 100;
const offset = c.req.query('offset') ? parseInt(c.req.query('offset')!) : 0;
const filters: any = {};
if (assetId) filters.assetId = assetId;
if (severity) filters.severity = severity;
if (status) filters.status = status;
const violations = await this.governanceService.getComplianceViolations(
filters,
{ limit, offset }
);
return c.json({
success: true,
data: violations
});
} catch (error) {
console.error('Error getting compliance violations:', error);
return c.json({
success: false,
error: error instanceof Error ? error.message : 'Unknown error'
}, 500);
}
});
// Request access to asset
this.app.post('/access/request', async (c) => {
try {
const request: Omit<AccessRequest, 'id' | 'requestedAt' | 'status'> = await c.req.json();
const accessRequest = await this.governanceService.requestAccess(request);
return c.json({
success: true,
data: accessRequest
});
} catch (error) {
console.error('Error requesting access:', error);
return c.json({
success: false,
error: error instanceof Error ? error.message : 'Unknown error'
}, 500);
}
});
// Approve/deny access request
this.app.patch('/access/:requestId', async (c) => {
try {
const requestId = c.req.param('requestId');
const { action, reviewedBy, reviewComments } = await c.req.json();
const updatedRequest = await this.governanceService.reviewAccessRequest(
requestId,
action,
reviewedBy,
reviewComments
);
return c.json({
success: true,
data: updatedRequest
});
} catch (error) {
console.error('Error reviewing access request:', error);
return c.json({
success: false,
error: error instanceof Error ? error.message : 'Unknown error'
}, 500);
}
});
// Check access authorization
this.app.post('/access/check', async (c) => {
try {
const { userId, assetId, action } = await c.req.json();
const authorized = await this.governanceService.checkAccess(userId, assetId, action);
return c.json({
success: true,
data: {
userId,
assetId,
action,
authorized
}
});
} catch (error) {
console.error('Error checking access:', error);
return c.json({
success: false,
error: error instanceof Error ? error.message : 'Unknown error'
}, 500);
}
});
// Handle data subject request (GDPR)
this.app.post('/privacy/subject-request', async (c) => {
try {
const request: Omit<DataSubjectRequest, 'id' | 'submittedAt' | 'status'> = await c.req.json();
const subjectRequest = await this.governanceService.handleDataSubjectRequest(request);
return c.json({
success: true,
data: subjectRequest
});
} catch (error) {
console.error('Error handling data subject request:', error);
return c.json({
success: false,
error: error instanceof Error ? error.message : 'Unknown error'
}, 500);
}
});
// Anonymize asset data
this.app.post('/privacy/anonymize/:assetId', async (c) => {
try {
const assetId = c.req.param('assetId');
const { fields, method, requestedBy } = await c.req.json();
const result = await this.governanceService.anonymizeData(
assetId,
fields,
method,
requestedBy
);
return c.json({
success: true,
data: result
});
} catch (error) {
console.error('Error anonymizing data:', error);
return c.json({
success: false,
error: error instanceof Error ? error.message : 'Unknown error'
}, 500);
}
});
// Get audit logs
this.app.get('/audit/logs', async (c) => {
try {
const assetId = c.req.query('assetId');
const userId = c.req.query('userId');
const action = c.req.query('action');
const startDate = c.req.query('startDate');
const endDate = c.req.query('endDate');
const limit = c.req.query('limit') ? parseInt(c.req.query('limit')!) : 100;
const offset = c.req.query('offset') ? parseInt(c.req.query('offset')!) : 0;
const filters: any = {};
if (assetId) filters.assetId = assetId;
if (userId) filters.userId = userId;
if (action) filters.action = action;
if (startDate) filters.startDate = new Date(startDate);
if (endDate) filters.endDate = new Date(endDate);
const logs = await this.governanceService.getAuditLogs(filters, { limit, offset });
return c.json({
success: true,
data: logs
});
} catch (error) {
console.error('Error getting audit logs:', error);
return c.json({
success: false,
error: error instanceof Error ? error.message : 'Unknown error'
}, 500);
}
});
// Log access event
this.app.post('/audit/log', async (c) => {
try {
const logEntry: Omit<AuditLog, 'id' | 'timestamp'> = await c.req.json();
const logged = await this.governanceService.logAccess(logEntry);
return c.json({
success: true,
data: logged
});
} catch (error) {
console.error('Error logging access event:', error);
return c.json({
success: false,
error: error instanceof Error ? error.message : 'Unknown error'
}, 500);
}
});
// Get retention policies
this.app.get('/retention/policies', async (c) => {
try {
const assetType = c.req.query('assetType');
const policies = await this.governanceService.getRetentionPolicies(assetType);
return c.json({
success: true,
data: policies
});
} catch (error) {
console.error('Error getting retention policies:', error);
return c.json({
success: false,
error: error instanceof Error ? error.message : 'Unknown error'
}, 500);
}
});
// Apply retention policy
this.app.post('/retention/apply', async (c) => {
try {
const { assetId, policyId, requestedBy } = await c.req.json();
const result = await this.governanceService.applyRetentionPolicy(
assetId,
policyId,
requestedBy
);
return c.json({
success: true,
data: result
});
} catch (error) {
console.error('Error applying retention policy:', error);
return c.json({
success: false,
error: error instanceof Error ? error.message : 'Unknown error'
}, 500);
}
});
// Get governance metrics
this.app.get('/metrics', async (c) => {
try {
const timeRange = c.req.query('timeRange') || '30d';
const metrics = await this.governanceService.getGovernanceMetrics(timeRange);
return c.json({
success: true,
data: metrics
});
} catch (error) {
console.error('Error getting governance metrics:', error);
return c.json({
success: false,
error: error instanceof Error ? error.message : 'Unknown error'
}, 500);
}
});
}
public getApp(): Hono {
return this.app;
}
}

View file

@ -0,0 +1,172 @@
import { Hono } from 'hono';
export class HealthController {
private app: Hono;
constructor() {
this.app = new Hono();
this.setupRoutes();
}
private setupRoutes() {
// Basic health check
this.app.get('/', async (c) => {
return c.json({
service: 'data-catalog',
status: 'healthy',
timestamp: new Date().toISOString(),
version: process.env.SERVICE_VERSION || '1.0.0'
});
});
// Detailed health check
this.app.get('/detailed', async (c) => {
try {
const healthStatus = {
service: 'data-catalog',
status: 'healthy',
timestamp: new Date().toISOString(),
version: process.env.SERVICE_VERSION || '1.0.0',
uptime: process.uptime(),
memory: process.memoryUsage(),
dependencies: {
database: await this.checkDatabase(),
search: await this.checkSearchService(),
eventBus: await this.checkEventBus()
}
};
// Determine overall status based on dependencies
const hasUnhealthyDependencies = Object.values(healthStatus.dependencies)
.some(dep => dep.status !== 'healthy');
if (hasUnhealthyDependencies) {
healthStatus.status = 'degraded';
}
const statusCode = healthStatus.status === 'healthy' ? 200 : 503;
return c.json(healthStatus, statusCode);
} catch (error) {
console.error('Health check error:', error);
return c.json({
service: 'data-catalog',
status: 'unhealthy',
timestamp: new Date().toISOString(),
error: error instanceof Error ? error.message : 'Unknown error'
}, 503);
}
});
// Readiness check
this.app.get('/ready', async (c) => {
try {
// Check if service is ready to accept requests
const readyChecks = await Promise.all([
this.checkDatabase(),
this.checkSearchService()
]);
const isReady = readyChecks.every(check => check.status === 'healthy');
if (isReady) {
return c.json({
service: 'data-catalog',
ready: true,
timestamp: new Date().toISOString()
});
} else {
return c.json({
service: 'data-catalog',
ready: false,
timestamp: new Date().toISOString(),
checks: readyChecks
}, 503);
}
} catch (error) {
console.error('Readiness check error:', error);
return c.json({
service: 'data-catalog',
ready: false,
timestamp: new Date().toISOString(),
error: error instanceof Error ? error.message : 'Unknown error'
}, 503);
}
});
// Liveness check
this.app.get('/live', async (c) => {
return c.json({
service: 'data-catalog',
alive: true,
timestamp: new Date().toISOString()
});
});
}
private async checkDatabase(): Promise<{ name: string; status: string; responseTime?: number }> {
const start = Date.now();
try {
// Simulate database check
// In real implementation, this would ping the actual database
await new Promise(resolve => setTimeout(resolve, 10));
return {
name: 'database',
status: 'healthy',
responseTime: Date.now() - start
};
} catch (error) {
return {
name: 'database',
status: 'unhealthy',
responseTime: Date.now() - start
};
}
}
private async checkSearchService(): Promise<{ name: string; status: string; responseTime?: number }> {
const start = Date.now();
try {
// Simulate search service check
// In real implementation, this would check search index health
await new Promise(resolve => setTimeout(resolve, 5));
return {
name: 'search',
status: 'healthy',
responseTime: Date.now() - start
};
} catch (error) {
return {
name: 'search',
status: 'unhealthy',
responseTime: Date.now() - start
};
}
}
private async checkEventBus(): Promise<{ name: string; status: string; responseTime?: number }> {
const start = Date.now();
try {
// Simulate event bus check
// In real implementation, this would check message broker connectivity
await new Promise(resolve => setTimeout(resolve, 3));
return {
name: 'eventBus',
status: 'healthy',
responseTime: Date.now() - start
};
} catch (error) {
return {
name: 'eventBus',
status: 'unhealthy',
responseTime: Date.now() - start
};
}
}
public getApp(): Hono {
return this.app;
}
}

View file

@ -0,0 +1,211 @@
import { Hono } from 'hono';
import { DataLineageService } from '../services/DataLineageService';
import { CreateLineageRequest, LineageQuery, ImpactAnalysisQuery } from '../types/DataCatalog';
export class LineageController {
private app: Hono;
private lineageService: DataLineageService;
constructor() {
this.app = new Hono();
this.lineageService = new DataLineageService();
this.setupRoutes();
}
private setupRoutes() {
// Create lineage relationship
this.app.post('/', async (c) => {
try {
const request: CreateLineageRequest = await c.req.json();
const lineage = await this.lineageService.createLineage(request);
return c.json({
success: true,
data: lineage
});
} catch (error) {
console.error('Error creating lineage:', error);
return c.json({
success: false,
error: error instanceof Error ? error.message : 'Unknown error'
}, 500);
}
});
// Get lineage for asset
this.app.get('/assets/:assetId', async (c) => {
try {
const assetId = c.req.param('assetId');
const direction = c.req.query('direction') as 'upstream' | 'downstream' | 'both';
const depth = c.req.query('depth') ? parseInt(c.req.query('depth')!) : undefined;
const lineage = await this.lineageService.getAssetLineage(assetId, {
direction: direction || 'both',
depth: depth || 10
});
return c.json({
success: true,
data: lineage
});
} catch (error) {
console.error('Error getting asset lineage:', error);
return c.json({
success: false,
error: error instanceof Error ? error.message : 'Unknown error'
}, 500);
}
});
// Get upstream dependencies
this.app.get('/assets/:assetId/upstream', async (c) => {
try {
const assetId = c.req.param('assetId');
const depth = c.req.query('depth') ? parseInt(c.req.query('depth')!) : 5;
const upstream = await this.lineageService.getUpstreamDependencies(assetId, depth);
return c.json({
success: true,
data: upstream
});
} catch (error) {
console.error('Error getting upstream dependencies:', error);
return c.json({
success: false,
error: error instanceof Error ? error.message : 'Unknown error'
}, 500);
}
});
// Get downstream dependencies
this.app.get('/assets/:assetId/downstream', async (c) => {
try {
const assetId = c.req.param('assetId');
const depth = c.req.query('depth') ? parseInt(c.req.query('depth')!) : 5;
const downstream = await this.lineageService.getDownstreamDependencies(assetId, depth);
return c.json({
success: true,
data: downstream
});
} catch (error) {
console.error('Error getting downstream dependencies:', error);
return c.json({
success: false,
error: error instanceof Error ? error.message : 'Unknown error'
}, 500);
}
});
// Perform impact analysis
this.app.post('/impact-analysis', async (c) => {
try {
const query: ImpactAnalysisQuery = await c.req.json();
const analysis = await this.lineageService.performImpactAnalysis(query);
return c.json({
success: true,
data: analysis
});
} catch (error) {
console.error('Error performing impact analysis:', error);
return c.json({
success: false,
error: error instanceof Error ? error.message : 'Unknown error'
}, 500);
}
});
// Get lineage graph
this.app.get('/graph', async (c) => {
try {
const assetIds = c.req.query('assetIds')?.split(',') || [];
const depth = c.req.query('depth') ? parseInt(c.req.query('depth')!) : 3;
if (assetIds.length === 0) {
return c.json({
success: false,
error: 'Asset IDs are required'
}, 400);
}
const graph = await this.lineageService.getLineageGraph(assetIds, depth);
return c.json({
success: true,
data: graph
});
} catch (error) {
console.error('Error getting lineage graph:', error);
return c.json({
success: false,
error: error instanceof Error ? error.message : 'Unknown error'
}, 500);
}
});
// Check for circular dependencies
this.app.get('/assets/:assetId/circular-check', async (c) => {
try {
const assetId = c.req.param('assetId');
const hasCycles = await this.lineageService.hasCircularDependencies(assetId);
return c.json({
success: true,
data: {
assetId,
hasCircularDependencies: hasCycles
}
});
} catch (error) {
console.error('Error checking circular dependencies:', error);
return c.json({
success: false,
error: error instanceof Error ? error.message : 'Unknown error'
}, 500);
}
});
// Delete lineage relationship
this.app.delete('/:lineageId', async (c) => {
try {
const lineageId = c.req.param('lineageId');
await this.lineageService.deleteLineage(lineageId);
return c.json({
success: true,
message: 'Lineage relationship deleted successfully'
});
} catch (error) {
console.error('Error deleting lineage:', error);
return c.json({
success: false,
error: error instanceof Error ? error.message : 'Unknown error'
}, 500);
}
});
// Get lineage statistics
this.app.get('/stats', async (c) => {
try {
const stats = await this.lineageService.getLineageStatistics();
return c.json({
success: true,
data: stats
});
} catch (error) {
console.error('Error getting lineage statistics:', error);
return c.json({
success: false,
error: error instanceof Error ? error.message : 'Unknown error'
}, 500);
}
});
}
public getApp(): Hono {
return this.app;
}
}

View file

@ -0,0 +1,321 @@
import { Hono } from 'hono';
import { DataQualityService } from '../services/DataQualityService';
import {
QualityAssessmentRequest,
QualityRule,
QualityIssue,
QualityReportRequest
} from '../types/DataCatalog';
export class QualityController {
private app: Hono;
private qualityService: DataQualityService;
constructor() {
this.app = new Hono();
this.qualityService = new DataQualityService();
this.setupRoutes();
}
private setupRoutes() {
// Assess asset quality
this.app.post('/assess', async (c) => {
try {
const request: QualityAssessmentRequest = await c.req.json();
const assessment = await this.qualityService.assessQuality(request);
return c.json({
success: true,
data: assessment
});
} catch (error) {
console.error('Error assessing quality:', error);
return c.json({
success: false,
error: error instanceof Error ? error.message : 'Unknown error'
}, 500);
}
});
// Get quality assessment for asset
this.app.get('/assets/:assetId', async (c) => {
try {
const assetId = c.req.param('assetId');
const assessment = await this.qualityService.getQualityAssessment(assetId);
if (!assessment) {
return c.json({
success: false,
error: 'Quality assessment not found'
}, 404);
}
return c.json({
success: true,
data: assessment
});
} catch (error) {
console.error('Error getting quality assessment:', error);
return c.json({
success: false,
error: error instanceof Error ? error.message : 'Unknown error'
}, 500);
}
});
// Create quality rule
this.app.post('/rules', async (c) => {
try {
const rule: Omit<QualityRule, 'id' | 'createdAt' | 'updatedAt'> = await c.req.json();
const createdRule = await this.qualityService.createQualityRule(rule);
return c.json({
success: true,
data: createdRule
});
} catch (error) {
console.error('Error creating quality rule:', error);
return c.json({
success: false,
error: error instanceof Error ? error.message : 'Unknown error'
}, 500);
}
});
// Get quality rules
this.app.get('/rules', async (c) => {
try {
const assetType = c.req.query('assetType');
const dimension = c.req.query('dimension');
const active = c.req.query('active') === 'true';
const filters: any = {};
if (assetType) filters.assetType = assetType;
if (dimension) filters.dimension = dimension;
if (active !== undefined) filters.active = active;
const rules = await this.qualityService.getQualityRules(filters);
return c.json({
success: true,
data: rules
});
} catch (error) {
console.error('Error getting quality rules:', error);
return c.json({
success: false,
error: error instanceof Error ? error.message : 'Unknown error'
}, 500);
}
});
// Update quality rule
this.app.put('/rules/:ruleId', async (c) => {
try {
const ruleId = c.req.param('ruleId');
const updates: Partial<QualityRule> = await c.req.json();
const updatedRule = await this.qualityService.updateQualityRule(ruleId, updates);
return c.json({
success: true,
data: updatedRule
});
} catch (error) {
console.error('Error updating quality rule:', error);
return c.json({
success: false,
error: error instanceof Error ? error.message : 'Unknown error'
}, 500);
}
});
// Delete quality rule
this.app.delete('/rules/:ruleId', async (c) => {
try {
const ruleId = c.req.param('ruleId');
await this.qualityService.deleteQualityRule(ruleId);
return c.json({
success: true,
message: 'Quality rule deleted successfully'
});
} catch (error) {
console.error('Error deleting quality rule:', error);
return c.json({
success: false,
error: error instanceof Error ? error.message : 'Unknown error'
}, 500);
}
});
// Validate quality rules for asset
this.app.post('/validate/:assetId', async (c) => {
try {
const assetId = c.req.param('assetId');
const data = await c.req.json();
const validationResults = await this.qualityService.validateQualityRules(assetId, data);
return c.json({
success: true,
data: validationResults
});
} catch (error) {
console.error('Error validating quality rules:', error);
return c.json({
success: false,
error: error instanceof Error ? error.message : 'Unknown error'
}, 500);
}
});
// Report quality issue
this.app.post('/issues', async (c) => {
try {
const issue: Omit<QualityIssue, 'id' | 'reportedAt' | 'updatedAt'> = await c.req.json();
const reportedIssue = await this.qualityService.reportQualityIssue(issue);
return c.json({
success: true,
data: reportedIssue
});
} catch (error) {
console.error('Error reporting quality issue:', error);
return c.json({
success: false,
error: error instanceof Error ? error.message : 'Unknown error'
}, 500);
}
});
// Get quality issues
this.app.get('/issues', async (c) => {
try {
const assetId = c.req.query('assetId');
const severity = c.req.query('severity');
const status = c.req.query('status');
const dimension = c.req.query('dimension');
const limit = c.req.query('limit') ? parseInt(c.req.query('limit')!) : 100;
const offset = c.req.query('offset') ? parseInt(c.req.query('offset')!) : 0;
const filters: any = {};
if (assetId) filters.assetId = assetId;
if (severity) filters.severity = severity;
if (status) filters.status = status;
if (dimension) filters.dimension = dimension;
const issues = await this.qualityService.getQualityIssues(filters, { limit, offset });
return c.json({
success: true,
data: issues
});
} catch (error) {
console.error('Error getting quality issues:', error);
return c.json({
success: false,
error: error instanceof Error ? error.message : 'Unknown error'
}, 500);
}
});
// Resolve quality issue
this.app.patch('/issues/:issueId/resolve', async (c) => {
try {
const issueId = c.req.param('issueId');
const { resolution, resolvedBy } = await c.req.json();
const resolvedIssue = await this.qualityService.resolveQualityIssue(
issueId,
resolution,
resolvedBy
);
return c.json({
success: true,
data: resolvedIssue
});
} catch (error) {
console.error('Error resolving quality issue:', error);
return c.json({
success: false,
error: error instanceof Error ? error.message : 'Unknown error'
}, 500);
}
});
// Get quality trends
this.app.get('/trends', async (c) => {
try {
const assetId = c.req.query('assetId');
const dimension = c.req.query('dimension');
const timeRange = c.req.query('timeRange') || '30d';
const trends = await this.qualityService.getQualityTrends(
assetId,
dimension,
timeRange
);
return c.json({
success: true,
data: trends
});
} catch (error) {
console.error('Error getting quality trends:', error);
return c.json({
success: false,
error: error instanceof Error ? error.message : 'Unknown error'
}, 500);
}
});
// Generate quality report
this.app.post('/reports', async (c) => {
try {
const request: QualityReportRequest = await c.req.json();
const report = await this.qualityService.generateQualityReport(request);
return c.json({
success: true,
data: report
});
} catch (error) {
console.error('Error generating quality report:', error);
return c.json({
success: false,
error: error instanceof Error ? error.message : 'Unknown error'
}, 500);
}
});
// Get quality metrics summary
this.app.get('/metrics/summary', async (c) => {
try {
const assetIds = c.req.query('assetIds')?.split(',');
const timeRange = c.req.query('timeRange') || '7d';
const summary = await this.qualityService.getQualityMetricsSummary(
assetIds,
timeRange
);
return c.json({
success: true,
data: summary
});
} catch (error) {
console.error('Error getting quality metrics summary:', error);
return c.json({
success: false,
error: error instanceof Error ? error.message : 'Unknown error'
}, 500);
}
});
}
public getApp(): Hono {
return this.app;
}
}

View file

@ -0,0 +1,334 @@
import { Context } from 'hono';
import { Logger } from '@stock-bot/utils';
import { SearchService } from '../services/SearchService';
import { SearchQuery, SearchFilters } from '../types/DataCatalog';
export class SearchController {
constructor(
private searchService: SearchService,
private logger: Logger
) {}
async search(c: Context) {
try {
const queryParams = c.req.query();
const searchQuery: SearchQuery = {
text: queryParams.q || '',
offset: parseInt(queryParams.offset || '0'),
limit: parseInt(queryParams.limit || '20'),
sortBy: queryParams.sortBy,
sortOrder: queryParams.sortOrder as 'asc' | 'desc',
userId: queryParams.userId
};
// Parse filters
const filters: SearchFilters = {};
if (queryParams.types) {
filters.types = Array.isArray(queryParams.types) ? queryParams.types : [queryParams.types];
}
if (queryParams.classifications) {
filters.classifications = Array.isArray(queryParams.classifications) ? queryParams.classifications : [queryParams.classifications];
}
if (queryParams.owners) {
filters.owners = Array.isArray(queryParams.owners) ? queryParams.owners : [queryParams.owners];
}
if (queryParams.tags) {
filters.tags = Array.isArray(queryParams.tags) ? queryParams.tags : [queryParams.tags];
}
if (queryParams.createdAfter) {
filters.createdAfter = new Date(queryParams.createdAfter);
}
if (queryParams.createdBefore) {
filters.createdBefore = new Date(queryParams.createdBefore);
}
if (Object.keys(filters).length > 0) {
searchQuery.filters = filters;
}
const result = await this.searchService.search(searchQuery);
this.logger.info('Search API call completed', {
query: searchQuery.text,
resultCount: result.total,
searchTime: result.searchTime
});
return c.json(result);
} catch (error) {
this.logger.error('Search API call failed', { error });
return c.json({ error: 'Internal server error' }, 500);
}
}
async suggest(c: Context) {
try {
const partial = c.req.query('q');
if (!partial || partial.length < 2) {
return c.json({ suggestions: [] });
}
const suggestions = await this.searchService.suggest(partial);
return c.json({ suggestions });
} catch (error) {
this.logger.error('Suggestion API call failed', { error });
return c.json({ error: 'Internal server error' }, 500);
}
}
async searchByFacets(c: Context) {
try {
const facets = await c.req.json();
if (!facets || typeof facets !== 'object') {
return c.json({ error: 'Facets object is required' }, 400);
}
const assets = await this.searchService.searchByFacets(facets);
return c.json({
assets,
total: assets.length,
facets
});
} catch (error) {
this.logger.error('Facet search API call failed', { error });
return c.json({ error: 'Internal server error' }, 500);
}
}
async searchSimilar(c: Context) {
try {
const assetId = c.req.param('id');
const limit = parseInt(c.req.query('limit') || '10');
if (!assetId) {
return c.json({ error: 'Asset ID is required' }, 400);
}
const similarAssets = await this.searchService.searchSimilar(assetId, limit);
return c.json({
assetId,
similarAssets,
total: similarAssets.length
});
} catch (error) {
this.logger.error('Similar search API call failed', { error });
return c.json({ error: 'Internal server error' }, 500);
}
}
async getPopularSearches(c: Context) {
try {
const limit = parseInt(c.req.query('limit') || '10');
const popularSearches = await this.searchService.getPopularSearches(limit);
return c.json({
searches: popularSearches,
total: popularSearches.length
});
} catch (error) {
this.logger.error('Popular searches API call failed', { error });
return c.json({ error: 'Internal server error' }, 500);
}
}
async getRecentSearches(c: Context) {
try {
const userId = c.req.param('userId');
const limit = parseInt(c.req.query('limit') || '10');
if (!userId) {
return c.json({ error: 'User ID is required' }, 400);
}
const recentSearches = await this.searchService.getRecentSearches(userId, limit);
return c.json({
userId,
searches: recentSearches,
total: recentSearches.length
});
} catch (error) {
this.logger.error('Recent searches API call failed', { error });
return c.json({ error: 'Internal server error' }, 500);
}
}
async reindexAssets(c: Context) {
try {
await this.searchService.reindexAll();
this.logger.info('Search index rebuilt via API');
return c.json({ message: 'Search index rebuilt successfully' });
} catch (error) {
this.logger.error('Reindex API call failed', { error });
return c.json({ error: 'Internal server error' }, 500);
}
}
async getSearchAnalytics(c: Context) {
try {
const timeframe = c.req.query('timeframe') || 'week';
const analytics = await this.searchService.getSearchAnalytics(timeframe);
return c.json({
timeframe,
analytics
});
} catch (error) {
this.logger.error('Search analytics API call failed', { error });
return c.json({ error: 'Internal server error' }, 500);
}
}
async advancedSearch(c: Context) {
try {
const searchRequest = await c.req.json();
if (!searchRequest) {
return c.json({ error: 'Search request is required' }, 400);
}
// Build advanced search query
const searchQuery: SearchQuery = {
text: searchRequest.query || '',
offset: searchRequest.offset || 0,
limit: searchRequest.limit || 20,
sortBy: searchRequest.sortBy,
sortOrder: searchRequest.sortOrder,
userId: searchRequest.userId,
filters: searchRequest.filters
};
const result = await this.searchService.search(searchQuery);
// If no results and query is complex, try to suggest simpler alternatives
if (result.total === 0 && searchQuery.text && searchQuery.text.split(' ').length > 2) {
const simpleQuery = searchQuery.text.split(' ')[0];
const simpleResult = await this.searchService.search({
...searchQuery,
text: simpleQuery
});
if (simpleResult.total > 0) {
result.suggestions = [`Try searching for "${simpleQuery}"`];
}
}
this.logger.info('Advanced search API call completed', {
query: searchQuery.text,
resultCount: result.total,
searchTime: result.searchTime
});
return c.json(result);
} catch (error) {
this.logger.error('Advanced search API call failed', { error });
return c.json({ error: 'Internal server error' }, 500);
}
}
async exportSearchResults(c: Context) {
try {
const queryParams = c.req.query();
const format = queryParams.format || 'json';
if (format !== 'json' && format !== 'csv') {
return c.json({ error: 'Unsupported export format. Use json or csv' }, 400);
}
// Perform search with maximum results
const searchQuery: SearchQuery = {
text: queryParams.q || '',
offset: 0,
limit: 10000, // Large limit for export
sortBy: queryParams.sortBy,
sortOrder: queryParams.sortOrder as 'asc' | 'desc'
};
const result = await this.searchService.search(searchQuery);
if (format === 'csv') {
const csv = this.convertToCSV(result.assets);
c.header('Content-Type', 'text/csv');
c.header('Content-Disposition', 'attachment; filename="search-results.csv"');
return c.text(csv);
} else {
c.header('Content-Type', 'application/json');
c.header('Content-Disposition', 'attachment; filename="search-results.json"');
return c.json(result);
}
} catch (error) {
this.logger.error('Export search results API call failed', { error });
return c.json({ error: 'Internal server error' }, 500);
}
}
async getSearchStatistics(c: Context) {
try {
const timeframe = c.req.query('timeframe') || 'week';
const analytics = await this.searchService.getSearchAnalytics(timeframe);
const statistics = {
searchVolume: analytics.totalSearches,
uniqueQueries: analytics.uniqueQueries,
averageResultsPerSearch: Math.round(analytics.averageResults),
noResultQueriesPercent: analytics.totalSearches > 0
? Math.round((analytics.noResultQueries / analytics.totalSearches) * 100)
: 0,
topSearchTerms: analytics.topQueries.slice(0, 5),
searchTrend: analytics.searchTrend.trend,
facetUsage: analytics.facetUsage
};
return c.json({
timeframe,
statistics
});
} catch (error) {
this.logger.error('Search statistics API call failed', { error });
return c.json({ error: 'Internal server error' }, 500);
}
}
// Helper method to convert assets to CSV format
private convertToCSV(assets: any[]): string {
if (assets.length === 0) {
return 'No results found';
}
const headers = [
'ID', 'Name', 'Type', 'Description', 'Owner', 'Classification',
'Tags', 'Created At', 'Updated At', 'Last Accessed'
];
const csvRows = [headers.join(',')];
for (const asset of assets) {
const row = [
asset.id,
`"${asset.name.replace(/"/g, '""')}"`,
asset.type,
`"${asset.description.replace(/"/g, '""')}"`,
asset.owner,
asset.classification,
`"${asset.tags.join('; ')}"`,
asset.createdAt.toISOString(),
asset.updatedAt.toISOString(),
asset.lastAccessed ? asset.lastAccessed.toISOString() : ''
];
csvRows.push(row.join(','));
}
return csvRows.join('\n');
}
}

View file

@ -0,0 +1,201 @@
import { Hono } from 'hono';
import { cors } from 'hono/cors';
import { logger } from 'hono/logger';
import { prettyJSON } from 'hono/pretty-json';
import { serve } from '@hono/node-server';
// Import controllers
import { DataCatalogController } from './controllers/DataCatalogController';
import { SearchController } from './controllers/SearchController';
import { LineageController } from './controllers/LineageController';
import { QualityController } from './controllers/QualityController';
import { GovernanceController } from './controllers/GovernanceController';
import { HealthController } from './controllers/HealthController';
// Create main application
const app = new Hono();
// Add middleware
app.use('*', cors({
origin: ['http://localhost:3000', 'http://localhost:4000', 'http://localhost:5173'],
allowMethods: ['GET', 'POST', 'PUT', 'DELETE', 'PATCH', 'OPTIONS'],
allowHeaders: ['Content-Type', 'Authorization', 'X-Requested-With'],
credentials: true
}));
app.use('*', logger());
app.use('*', prettyJSON());
// Initialize controllers
const dataCatalogController = new DataCatalogController();
const searchController = new SearchController();
const lineageController = new LineageController();
const qualityController = new QualityController();
const governanceController = new GovernanceController();
const healthController = new HealthController();
// Setup routes
app.route('/api/v1/assets', dataCatalogController.getApp());
app.route('/api/v1/search', searchController.getApp());
app.route('/api/v1/lineage', lineageController.getApp());
app.route('/api/v1/quality', qualityController.getApp());
app.route('/api/v1/governance', governanceController.getApp());
app.route('/health', healthController.getApp());
// Root endpoint
app.get('/', (c) => {
return c.json({
service: 'Data Catalog Service',
version: '1.0.0',
description: 'Comprehensive data catalog and governance service for stock-bot data platform',
endpoints: {
assets: '/api/v1/assets',
search: '/api/v1/search',
lineage: '/api/v1/lineage',
quality: '/api/v1/quality',
governance: '/api/v1/governance',
health: '/health'
},
documentation: '/api/v1/docs'
});
});
// API documentation endpoint
app.get('/api/v1/docs', (c) => {
return c.json({
title: 'Data Catalog Service API',
version: '1.0.0',
description: 'RESTful API for data catalog, lineage, quality, and governance operations',
endpoints: {
assets: {
description: 'Data asset management',
methods: {
'GET /api/v1/assets': 'List assets with filtering and pagination',
'POST /api/v1/assets': 'Create new data asset',
'GET /api/v1/assets/:id': 'Get asset by ID',
'PUT /api/v1/assets/:id': 'Update asset',
'DELETE /api/v1/assets/:id': 'Delete asset',
'GET /api/v1/assets/:id/schema': 'Get asset schema',
'PUT /api/v1/assets/:id/schema': 'Update asset schema',
'GET /api/v1/assets/:id/usage': 'Get asset usage analytics',
'POST /api/v1/assets/:id/usage': 'Record asset usage'
}
},
search: {
description: 'Data discovery and search',
methods: {
'GET /api/v1/search': 'Search assets with full-text and faceted search',
'GET /api/v1/search/suggest': 'Get search suggestions',
'GET /api/v1/search/facets': 'Get available search facets',
'GET /api/v1/search/similar/:id': 'Find similar assets',
'GET /api/v1/search/trending': 'Get trending searches',
'POST /api/v1/search/export': 'Export search results'
}
},
lineage: {
description: 'Data lineage and impact analysis',
methods: {
'POST /api/v1/lineage': 'Create lineage relationship',
'GET /api/v1/lineage/assets/:assetId': 'Get asset lineage',
'GET /api/v1/lineage/assets/:assetId/upstream': 'Get upstream dependencies',
'GET /api/v1/lineage/assets/:assetId/downstream': 'Get downstream dependencies',
'POST /api/v1/lineage/impact-analysis': 'Perform impact analysis',
'GET /api/v1/lineage/graph': 'Get lineage graph visualization',
'GET /api/v1/lineage/assets/:assetId/circular-check': 'Check for circular dependencies',
'DELETE /api/v1/lineage/:lineageId': 'Delete lineage relationship',
'GET /api/v1/lineage/stats': 'Get lineage statistics'
}
},
quality: {
description: 'Data quality assessment and monitoring',
methods: {
'POST /api/v1/quality/assess': 'Assess data quality',
'GET /api/v1/quality/assets/:assetId': 'Get quality assessment',
'POST /api/v1/quality/rules': 'Create quality rule',
'GET /api/v1/quality/rules': 'Get quality rules',
'PUT /api/v1/quality/rules/:ruleId': 'Update quality rule',
'DELETE /api/v1/quality/rules/:ruleId': 'Delete quality rule',
'POST /api/v1/quality/validate/:assetId': 'Validate quality rules',
'POST /api/v1/quality/issues': 'Report quality issue',
'GET /api/v1/quality/issues': 'Get quality issues',
'PATCH /api/v1/quality/issues/:issueId/resolve': 'Resolve quality issue',
'GET /api/v1/quality/trends': 'Get quality trends',
'POST /api/v1/quality/reports': 'Generate quality report',
'GET /api/v1/quality/metrics/summary': 'Get quality metrics summary'
}
},
governance: {
description: 'Data governance and compliance',
methods: {
'POST /api/v1/governance/policies': 'Create governance policy',
'GET /api/v1/governance/policies': 'Get governance policies',
'PUT /api/v1/governance/policies/:policyId': 'Update governance policy',
'DELETE /api/v1/governance/policies/:policyId': 'Delete governance policy',
'POST /api/v1/governance/policies/:policyId/apply/:assetId': 'Apply policy to asset',
'POST /api/v1/governance/compliance/check': 'Check compliance',
'GET /api/v1/governance/compliance/violations': 'Get compliance violations',
'POST /api/v1/governance/access/request': 'Request data access',
'PATCH /api/v1/governance/access/:requestId': 'Review access request',
'POST /api/v1/governance/access/check': 'Check access authorization',
'POST /api/v1/governance/privacy/subject-request': 'Handle data subject request',
'POST /api/v1/governance/privacy/anonymize/:assetId': 'Anonymize asset data',
'GET /api/v1/governance/audit/logs': 'Get audit logs',
'POST /api/v1/governance/audit/log': 'Log access event',
'GET /api/v1/governance/retention/policies': 'Get retention policies',
'POST /api/v1/governance/retention/apply': 'Apply retention policy',
'GET /api/v1/governance/metrics': 'Get governance metrics'
}
},
health: {
description: 'Service health monitoring',
methods: {
'GET /health': 'Basic health check',
'GET /health/detailed': 'Detailed health check with dependencies',
'GET /health/ready': 'Readiness check',
'GET /health/live': 'Liveness check'
}
}
}
});
});
// 404 handler
app.notFound((c) => {
return c.json({
success: false,
error: 'Endpoint not found',
availableEndpoints: [
'/api/v1/assets',
'/api/v1/search',
'/api/v1/lineage',
'/api/v1/quality',
'/api/v1/governance',
'/health'
]
}, 404);
});
// Error handler
app.onError((err, c) => {
console.error('Application error:', err);
return c.json({
success: false,
error: 'Internal server error',
message: process.env.NODE_ENV === 'development' ? err.message : 'Something went wrong'
}, 500);
});
// Start server
const port = parseInt(process.env.PORT || '3003');
console.log(`🚀 Data Catalog Service starting on port ${port}`);
console.log(`📚 API Documentation available at http://localhost:${port}/api/v1/docs`);
console.log(`❤️ Health endpoint available at http://localhost:${port}/health`);
serve({
fetch: app.fetch,
port: port
});
export default app;

View file

@ -0,0 +1,312 @@
import { EventBus } from '@stock-bot/event-bus';
import { Logger } from '@stock-bot/utils';
import {
DataAsset,
CreateDataAssetRequest,
UpdateDataAssetRequest,
DataAssetType,
DataClassification
} from '../types/DataCatalog';
export interface DataCatalogService {
createAsset(request: CreateDataAssetRequest): Promise<DataAsset>;
getAsset(id: string): Promise<DataAsset | null>;
updateAsset(id: string, request: UpdateDataAssetRequest): Promise<DataAsset | null>;
deleteAsset(id: string): Promise<void>;
listAssets(filters?: Record<string, any>): Promise<DataAsset[]>;
searchAssets(query: string, filters?: Record<string, any>): Promise<DataAsset[]>;
getAssetsByOwner(owner: string): Promise<DataAsset[]>;
getAssetsByType(type: DataAssetType): Promise<DataAsset[]>;
getAssetsByClassification(classification: DataClassification): Promise<DataAsset[]>;
getAssetsByTags(tags: string[]): Promise<DataAsset[]>;
}
export class DataCatalogServiceImpl implements DataCatalogService {
private assets: Map<string, DataAsset> = new Map();
constructor(
private eventBus: EventBus,
private logger: Logger
) {}
async createAsset(request: CreateDataAssetRequest): Promise<DataAsset> {
try {
const asset: DataAsset = {
id: this.generateId(),
name: request.name,
type: request.type,
description: request.description,
owner: request.owner,
steward: request.steward,
tags: request.tags || [],
classification: request.classification,
schema: request.schema,
location: request.location,
metadata: {
customProperties: {},
...request.metadata
},
lineage: {
id: this.generateId(),
assetId: '',
upstreamAssets: [],
downstreamAssets: [],
transformations: [],
impact: {
downstreamAssets: [],
affectedUsers: [],
estimatedImpact: 'low',
impactDescription: '',
recommendations: []
},
createdAt: new Date(),
updatedAt: new Date()
},
quality: {
id: this.generateId(),
assetId: '',
overallScore: 100,
dimensions: [],
rules: [],
issues: [],
trend: {
timeframe: 'week',
dataPoints: [],
trend: 'stable',
changeRate: 0
},
lastAssessment: new Date()
},
usage: {
id: this.generateId(),
assetId: '',
accessCount: 0,
uniqueUsers: 0,
lastAccessed: new Date(),
topUsers: [],
accessPatterns: [],
popularQueries: [],
usageTrend: {
timeframe: 'week',
dataPoints: [],
trend: 'stable',
changeRate: 0
}
},
governance: request.governance || {
id: this.generateId(),
assetId: '',
policies: [],
compliance: [],
retention: {
retentionPeriod: 365,
retentionReason: 'Business requirement',
legalHold: false
},
access: {
defaultAccess: 'none',
roles: [],
users: []
},
privacy: {
containsPII: false,
sensitiveFields: [],
anonymizationRules: [],
consentRequired: false,
dataSubjectRights: []
},
audit: []
},
createdAt: new Date(),
updatedAt: new Date()
};
// Set correct asset IDs in nested objects
asset.lineage.assetId = asset.id;
asset.quality.assetId = asset.id;
asset.usage.assetId = asset.id;
asset.governance.assetId = asset.id;
this.assets.set(asset.id, asset);
this.logger.info('Data asset created', { assetId: asset.id, name: asset.name });
await this.eventBus.emit('data.asset.created', {
assetId: asset.id,
asset,
timestamp: new Date()
});
return asset;
} catch (error) {
this.logger.error('Failed to create data asset', { request, error });
throw error;
}
}
async getAsset(id: string): Promise<DataAsset | null> {
try {
const asset = this.assets.get(id);
if (asset) {
// Update last accessed time
asset.lastAccessed = new Date();
asset.usage.lastAccessed = new Date();
asset.usage.accessCount++;
await this.eventBus.emit('data.asset.accessed', {
assetId: id,
timestamp: new Date()
});
}
return asset || null;
} catch (error) {
this.logger.error('Failed to get data asset', { assetId: id, error });
throw error;
}
}
async updateAsset(id: string, request: UpdateDataAssetRequest): Promise<DataAsset | null> {
try {
const asset = this.assets.get(id);
if (!asset) {
return null;
}
// Update only provided fields
if (request.name !== undefined) asset.name = request.name;
if (request.description !== undefined) asset.description = request.description;
if (request.owner !== undefined) asset.owner = request.owner;
if (request.steward !== undefined) asset.steward = request.steward;
if (request.tags !== undefined) asset.tags = request.tags;
if (request.classification !== undefined) asset.classification = request.classification;
if (request.schema !== undefined) asset.schema = request.schema;
if (request.metadata !== undefined) {
asset.metadata = { ...asset.metadata, ...request.metadata };
}
asset.updatedAt = new Date();
this.assets.set(id, asset);
this.logger.info('Data asset updated', { assetId: id, changes: request });
await this.eventBus.emit('data.asset.updated', {
assetId: id,
asset,
changes: request,
timestamp: new Date()
});
return asset;
} catch (error) {
this.logger.error('Failed to update data asset', { assetId: id, request, error });
throw error;
}
}
async deleteAsset(id: string): Promise<void> {
try {
const asset = this.assets.get(id);
if (!asset) {
throw new Error(`Asset with id ${id} not found`);
}
this.assets.delete(id);
this.logger.info('Data asset deleted', { assetId: id });
await this.eventBus.emit('data.asset.deleted', {
assetId: id,
asset,
timestamp: new Date()
});
} catch (error) {
this.logger.error('Failed to delete data asset', { assetId: id, error });
throw error;
}
}
async listAssets(filters?: Record<string, any>): Promise<DataAsset[]> {
try {
let assets = Array.from(this.assets.values());
if (filters) {
assets = assets.filter(asset => {
return Object.entries(filters).every(([key, value]) => {
if (key === 'type') return asset.type === value;
if (key === 'owner') return asset.owner === value;
if (key === 'classification') return asset.classification === value;
if (key === 'tags') return Array.isArray(value) ?
value.some(tag => asset.tags.includes(tag)) :
asset.tags.includes(value);
return true;
});
});
}
return assets;
} catch (error) {
this.logger.error('Failed to list data assets', { filters, error });
throw error;
}
}
async searchAssets(query: string, filters?: Record<string, any>): Promise<DataAsset[]> {
try {
let assets = Array.from(this.assets.values());
// Simple text search in name, description, and tags
const searchTerm = query.toLowerCase();
assets = assets.filter(asset =>
asset.name.toLowerCase().includes(searchTerm) ||
asset.description.toLowerCase().includes(searchTerm) ||
asset.tags.some(tag => tag.toLowerCase().includes(searchTerm))
);
// Apply additional filters
if (filters) {
assets = assets.filter(asset => {
return Object.entries(filters).every(([key, value]) => {
if (key === 'type') return asset.type === value;
if (key === 'owner') return asset.owner === value;
if (key === 'classification') return asset.classification === value;
return true;
});
});
}
this.logger.info('Asset search completed', {
query,
filters,
resultCount: assets.length
});
return assets;
} catch (error) {
this.logger.error('Failed to search data assets', { query, filters, error });
throw error;
}
}
async getAssetsByOwner(owner: string): Promise<DataAsset[]> {
return this.listAssets({ owner });
}
async getAssetsByType(type: DataAssetType): Promise<DataAsset[]> {
return this.listAssets({ type });
}
async getAssetsByClassification(classification: DataClassification): Promise<DataAsset[]> {
return this.listAssets({ classification });
}
async getAssetsByTags(tags: string[]): Promise<DataAsset[]> {
return this.listAssets({ tags });
}
private generateId(): string {
return `asset_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
}
}

View file

@ -0,0 +1,764 @@
import { EventBus } from '@stock-bot/event-bus';
import { Logger } from '@stock-bot/utils';
import {
DataGovernance,
GovernancePolicy,
ComplianceCheck,
RetentionPolicy,
AccessControl,
PrivacySettings,
AuditEntry,
DataAsset,
GovernancePolicyType,
ComplianceStatus,
DataClassification
} from '../types/DataCatalog';
export interface DataGovernanceService {
createPolicy(policy: Omit<GovernancePolicy, 'id' | 'createdAt' | 'updatedAt'>): Promise<GovernancePolicy>;
updatePolicy(policyId: string, updates: Partial<GovernancePolicy>): Promise<GovernancePolicy | null>;
deletePolicy(policyId: string): Promise<void>;
getPolicy(policyId: string): Promise<GovernancePolicy | null>;
listPolicies(filters?: Record<string, any>): Promise<GovernancePolicy[]>;
applyPolicy(assetId: string, policyId: string): Promise<void>;
removePolicy(assetId: string, policyId: string): Promise<void>;
checkCompliance(assetId: string): Promise<ComplianceCheck[]>;
updateRetentionPolicy(assetId: string, retention: RetentionPolicy): Promise<void>;
updateAccessControl(assetId: string, access: AccessControl): Promise<void>;
updatePrivacySettings(assetId: string, privacy: PrivacySettings): Promise<void>;
auditAccess(assetId: string, userId: string, action: string, details?: any): Promise<void>;
getAuditTrail(assetId: string, filters?: Record<string, any>): Promise<AuditEntry[]>;
generateComplianceReport(assetIds: string[]): Promise<any>;
validateDataAccess(assetId: string, userId: string, action: string): Promise<boolean>;
anonymizeData(assetId: string, options?: any): Promise<void>;
handleDataSubjectRequest(assetId: string, request: any): Promise<any>;
}
export class DataGovernanceServiceImpl implements DataGovernanceService {
private policies: Map<string, GovernancePolicy> = new Map();
private governance: Map<string, DataGovernance> = new Map();
private assets: Map<string, DataAsset> = new Map();
constructor(
private eventBus: EventBus,
private logger: Logger
) {
this.initializeDefaultPolicies();
}
async createPolicy(policy: Omit<GovernancePolicy, 'id' | 'createdAt' | 'updatedAt'>): Promise<GovernancePolicy> {
try {
const fullPolicy: GovernancePolicy = {
...policy,
id: this.generateId(),
createdAt: new Date(),
updatedAt: new Date()
};
this.policies.set(fullPolicy.id, fullPolicy);
this.logger.info('Governance policy created', {
policyId: fullPolicy.id,
name: fullPolicy.name,
type: fullPolicy.type
});
await this.eventBus.emit('data.governance.policy.created', {
policy: fullPolicy,
timestamp: new Date()
});
return fullPolicy;
} catch (error) {
this.logger.error('Failed to create governance policy', { policy, error });
throw error;
}
}
async updatePolicy(policyId: string, updates: Partial<GovernancePolicy>): Promise<GovernancePolicy | null> {
try {
const policy = this.policies.get(policyId);
if (!policy) {
return null;
}
const updatedPolicy: GovernancePolicy = {
...policy,
...updates,
updatedAt: new Date()
};
this.policies.set(policyId, updatedPolicy);
this.logger.info('Governance policy updated', { policyId, changes: updates });
await this.eventBus.emit('data.governance.policy.updated', {
policy: updatedPolicy,
changes: updates,
timestamp: new Date()
});
return updatedPolicy;
} catch (error) {
this.logger.error('Failed to update governance policy', { policyId, updates, error });
throw error;
}
}
async deletePolicy(policyId: string): Promise<void> {
try {
const policy = this.policies.get(policyId);
if (!policy) {
throw new Error(`Policy with id ${policyId} not found`);
}
this.policies.delete(policyId);
// Remove policy from all assets
for (const [assetId, governance] of this.governance) {
governance.policies = governance.policies.filter(p => p.id !== policyId);
this.governance.set(assetId, governance);
}
this.logger.info('Governance policy deleted', { policyId });
await this.eventBus.emit('data.governance.policy.deleted', {
policyId,
policy,
timestamp: new Date()
});
} catch (error) {
this.logger.error('Failed to delete governance policy', { policyId, error });
throw error;
}
}
async getPolicy(policyId: string): Promise<GovernancePolicy | null> {
try {
return this.policies.get(policyId) || null;
} catch (error) {
this.logger.error('Failed to get governance policy', { policyId, error });
throw error;
}
}
async listPolicies(filters?: Record<string, any>): Promise<GovernancePolicy[]> {
try {
let policies = Array.from(this.policies.values());
if (filters) {
policies = policies.filter(policy => {
return Object.entries(filters).every(([key, value]) => {
if (key === 'type') return policy.type === value;
if (key === 'active') return policy.active === value;
if (key === 'classification') return policy.applicableClassifications?.includes(value);
return true;
});
});
}
return policies;
} catch (error) {
this.logger.error('Failed to list governance policies', { filters, error });
throw error;
}
}
async applyPolicy(assetId: string, policyId: string): Promise<void> {
try {
const policy = this.policies.get(policyId);
if (!policy) {
throw new Error(`Policy with id ${policyId} not found`);
}
let governance = this.governance.get(assetId);
if (!governance) {
governance = this.createEmptyGovernance(assetId);
}
// Check if policy is already applied
if (!governance.policies.find(p => p.id === policyId)) {
governance.policies.push(policy);
this.governance.set(assetId, governance);
// Perform compliance check after applying policy
await this.checkCompliance(assetId);
this.logger.info('Policy applied to asset', { assetId, policyId });
await this.eventBus.emit('data.governance.policy.applied', {
assetId,
policyId,
timestamp: new Date()
});
}
} catch (error) {
this.logger.error('Failed to apply policy to asset', { assetId, policyId, error });
throw error;
}
}
async removePolicy(assetId: string, policyId: string): Promise<void> {
try {
const governance = this.governance.get(assetId);
if (!governance) {
throw new Error(`Governance not found for asset ${assetId}`);
}
governance.policies = governance.policies.filter(p => p.id !== policyId);
this.governance.set(assetId, governance);
this.logger.info('Policy removed from asset', { assetId, policyId });
await this.eventBus.emit('data.governance.policy.removed', {
assetId,
policyId,
timestamp: new Date()
});
} catch (error) {
this.logger.error('Failed to remove policy from asset', { assetId, policyId, error });
throw error;
}
}
async checkCompliance(assetId: string): Promise<ComplianceCheck[]> {
try {
const governance = this.governance.get(assetId);
const asset = this.assets.get(assetId);
if (!governance || !asset) {
return [];
}
const complianceChecks: ComplianceCheck[] = [];
for (const policy of governance.policies) {
if (!policy.active) continue;
const check = await this.performComplianceCheck(asset, policy);
complianceChecks.push(check);
}
// Update governance with compliance results
governance.compliance = complianceChecks;
this.governance.set(assetId, governance);
// Log compliance issues
const failedChecks = complianceChecks.filter(check => check.status === 'failed');
if (failedChecks.length > 0) {
this.logger.warn('Compliance violations detected', {
assetId,
violationCount: failedChecks.length
});
await this.eventBus.emit('data.governance.compliance.violation', {
assetId,
violations: failedChecks,
timestamp: new Date()
});
}
return complianceChecks;
} catch (error) {
this.logger.error('Failed to check compliance', { assetId, error });
throw error;
}
}
async updateRetentionPolicy(assetId: string, retention: RetentionPolicy): Promise<void> {
try {
let governance = this.governance.get(assetId);
if (!governance) {
governance = this.createEmptyGovernance(assetId);
}
governance.retention = retention;
this.governance.set(assetId, governance);
this.logger.info('Retention policy updated', { assetId, retentionPeriod: retention.retentionPeriod });
await this.eventBus.emit('data.governance.retention.updated', {
assetId,
retention,
timestamp: new Date()
});
} catch (error) {
this.logger.error('Failed to update retention policy', { assetId, retention, error });
throw error;
}
}
async updateAccessControl(assetId: string, access: AccessControl): Promise<void> {
try {
let governance = this.governance.get(assetId);
if (!governance) {
governance = this.createEmptyGovernance(assetId);
}
governance.access = access;
this.governance.set(assetId, governance);
this.logger.info('Access control updated', { assetId, defaultAccess: access.defaultAccess });
await this.eventBus.emit('data.governance.access.updated', {
assetId,
access,
timestamp: new Date()
});
} catch (error) {
this.logger.error('Failed to update access control', { assetId, access, error });
throw error;
}
}
async updatePrivacySettings(assetId: string, privacy: PrivacySettings): Promise<void> {
try {
let governance = this.governance.get(assetId);
if (!governance) {
governance = this.createEmptyGovernance(assetId);
}
governance.privacy = privacy;
this.governance.set(assetId, governance);
this.logger.info('Privacy settings updated', {
assetId,
containsPII: privacy.containsPII,
consentRequired: privacy.consentRequired
});
await this.eventBus.emit('data.governance.privacy.updated', {
assetId,
privacy,
timestamp: new Date()
});
} catch (error) {
this.logger.error('Failed to update privacy settings', { assetId, privacy, error });
throw error;
}
}
async auditAccess(assetId: string, userId: string, action: string, details?: any): Promise<void> {
try {
let governance = this.governance.get(assetId);
if (!governance) {
governance = this.createEmptyGovernance(assetId);
}
const auditEntry: AuditEntry = {
id: this.generateId(),
userId,
action,
timestamp: new Date(),
ipAddress: details?.ipAddress,
userAgent: details?.userAgent,
details
};
governance.audit.push(auditEntry);
this.governance.set(assetId, governance);
this.logger.info('Access audited', { assetId, userId, action });
await this.eventBus.emit('data.governance.access.audited', {
assetId,
auditEntry,
timestamp: new Date()
});
} catch (error) {
this.logger.error('Failed to audit access', { assetId, userId, action, error });
throw error;
}
}
async getAuditTrail(assetId: string, filters?: Record<string, any>): Promise<AuditEntry[]> {
try {
const governance = this.governance.get(assetId);
if (!governance) {
return [];
}
let auditEntries = governance.audit;
if (filters) {
auditEntries = auditEntries.filter(entry => {
return Object.entries(filters).every(([key, value]) => {
if (key === 'userId') return entry.userId === value;
if (key === 'action') return entry.action === value;
if (key === 'fromDate') return entry.timestamp >= new Date(value);
if (key === 'toDate') return entry.timestamp <= new Date(value);
return true;
});
});
}
return auditEntries.sort((a, b) => b.timestamp.getTime() - a.timestamp.getTime());
} catch (error) {
this.logger.error('Failed to get audit trail', { assetId, filters, error });
throw error;
}
}
async generateComplianceReport(assetIds: string[]): Promise<any> {
try {
const reportData = {
summary: {
totalAssets: assetIds.length,
compliantAssets: 0,
nonCompliantAssets: 0,
violationCount: 0,
reportDate: new Date()
},
assetCompliance: [] as any[],
policyViolations: [] as any[],
recommendations: [] as string[]
};
let totalViolations = 0;
for (const assetId of assetIds) {
const governance = this.governance.get(assetId);
const asset = this.assets.get(assetId);
if (governance && asset) {
const complianceChecks = await this.checkCompliance(assetId);
const violations = complianceChecks.filter(check => check.status === 'failed');
const isCompliant = violations.length === 0;
if (isCompliant) {
reportData.summary.compliantAssets++;
} else {
reportData.summary.nonCompliantAssets++;
}
totalViolations += violations.length;
reportData.assetCompliance.push({
assetId,
assetName: asset.name,
classification: asset.classification,
compliant: isCompliant,
violationCount: violations.length,
policiesApplied: governance.policies.length,
lastChecked: new Date()
});
// Add violations to report
violations.forEach(violation => {
reportData.policyViolations.push({
assetId,
assetName: asset.name,
policyName: violation.policyName,
violation: violation.details,
severity: violation.severity || 'medium',
checkedAt: violation.checkedAt
});
});
}
}
reportData.summary.violationCount = totalViolations;
// Generate recommendations
reportData.recommendations = this.generateComplianceRecommendations(reportData);
this.logger.info('Compliance report generated', {
totalAssets: assetIds.length,
compliantAssets: reportData.summary.compliantAssets,
violationCount: totalViolations
});
return reportData;
} catch (error) {
this.logger.error('Failed to generate compliance report', { assetIds, error });
throw error;
}
}
async validateDataAccess(assetId: string, userId: string, action: string): Promise<boolean> {
try {
const governance = this.governance.get(assetId);
const asset = this.assets.get(assetId);
if (!governance || !asset) {
return false;
}
// Check default access
if (governance.access.defaultAccess === 'none') {
// Must have explicit permission
const hasUserAccess = governance.access.users.some(user =>
user.userId === userId && user.permissions.includes(action)
);
const hasRoleAccess = governance.access.roles.some(role =>
role.permissions.includes(action) // Simplified - would check user roles
);
return hasUserAccess || hasRoleAccess;
}
// Check if explicitly denied
const isDenied = governance.access.users.some(user =>
user.userId === userId && user.permissions.includes(`deny:${action}`)
);
if (isDenied) {
return false;
}
// Check classification-based access
if (asset.classification === 'restricted' || asset.classification === 'confidential') {
// Require explicit permission for sensitive data
const hasPermission = governance.access.users.some(user =>
user.userId === userId && user.permissions.includes(action)
);
return hasPermission;
}
return true; // Default allow for non-sensitive data
} catch (error) {
this.logger.error('Failed to validate data access', { assetId, userId, action, error });
return false;
}
}
async anonymizeData(assetId: string, options?: any): Promise<void> {
try {
const governance = this.governance.get(assetId);
if (!governance || !governance.privacy.containsPII) {
return;
}
// Apply anonymization rules
for (const rule of governance.privacy.anonymizationRules) {
await this.applyAnonymizationRule(assetId, rule, options);
}
this.logger.info('Data anonymization completed', { assetId });
await this.eventBus.emit('data.governance.anonymization.completed', {
assetId,
options,
timestamp: new Date()
});
} catch (error) {
this.logger.error('Failed to anonymize data', { assetId, options, error });
throw error;
}
}
async handleDataSubjectRequest(assetId: string, request: any): Promise<any> {
try {
const governance = this.governance.get(assetId);
const asset = this.assets.get(assetId);
if (!governance || !asset) {
throw new Error(`Asset or governance not found for ${assetId}`);
}
let response: any = {};
switch (request.type) {
case 'access':
response = await this.handleAccessRequest(assetId, request);
break;
case 'rectification':
response = await this.handleRectificationRequest(assetId, request);
break;
case 'erasure':
response = await this.handleErasureRequest(assetId, request);
break;
case 'portability':
response = await this.handlePortabilityRequest(assetId, request);
break;
default:
throw new Error(`Unsupported request type: ${request.type}`);
}
this.logger.info('Data subject request handled', { assetId, requestType: request.type });
await this.eventBus.emit('data.governance.subject.request.handled', {
assetId,
request,
response,
timestamp: new Date()
});
return response;
} catch (error) {
this.logger.error('Failed to handle data subject request', { assetId, request, error });
throw error;
}
}
// Private helper methods
private initializeDefaultPolicies(): void {
const defaultPolicies: GovernancePolicy[] = [
{
id: 'policy_pii_protection',
name: 'PII Protection Policy',
description: 'Ensures proper handling of personally identifiable information',
type: 'privacy',
rules: [
'PII data must be encrypted at rest',
'PII access must be logged',
'PII retention must not exceed 7 years'
],
applicableClassifications: ['pii'],
active: true,
severity: 'high',
createdAt: new Date(),
updatedAt: new Date()
},
{
id: 'policy_financial_compliance',
name: 'Financial Data Compliance',
description: 'Compliance with financial regulations',
type: 'compliance',
rules: [
'Financial data must be retained for 7 years',
'Access to financial data must be role-based',
'All financial data access must be audited'
],
applicableClassifications: ['financial'],
active: true,
severity: 'critical',
createdAt: new Date(),
updatedAt: new Date()
}
];
defaultPolicies.forEach(policy => {
this.policies.set(policy.id, policy);
});
}
private createEmptyGovernance(assetId: string): DataGovernance {
return {
id: this.generateId(),
assetId,
policies: [],
compliance: [],
retention: {
retentionPeriod: 365,
retentionReason: 'Business requirement',
legalHold: false
},
access: {
defaultAccess: 'none',
roles: [],
users: []
},
privacy: {
containsPII: false,
sensitiveFields: [],
anonymizationRules: [],
consentRequired: false,
dataSubjectRights: []
},
audit: []
};
}
private async performComplianceCheck(asset: DataAsset, policy: GovernancePolicy): Promise<ComplianceCheck> {
// Mock compliance check implementation
// In real scenario, this would validate actual compliance
const isCompliant = Math.random() > 0.1; // 90% compliance rate for demo
const check: ComplianceCheck = {
id: this.generateId(),
policyId: policy.id,
policyName: policy.name,
status: isCompliant ? 'passed' : 'failed',
checkedAt: new Date(),
details: isCompliant ? 'All policy requirements met' : 'Policy violation detected',
severity: policy.severity
};
if (!isCompliant) {
check.recommendations = [
'Review data handling procedures',
'Update access controls',
'Implement additional monitoring'
];
}
return check;
}
private async applyAnonymizationRule(assetId: string, rule: any, options?: any): Promise<void> {
// Mock anonymization implementation
this.logger.info('Applying anonymization rule', { assetId, rule: rule.type });
}
private async handleAccessRequest(assetId: string, request: any): Promise<any> {
return {
status: 'completed',
data: 'Data access provided according to privacy policy',
timestamp: new Date()
};
}
private async handleRectificationRequest(assetId: string, request: any): Promise<any> {
return {
status: 'completed',
changes: 'Data rectification completed',
timestamp: new Date()
};
}
private async handleErasureRequest(assetId: string, request: any): Promise<any> {
return {
status: 'completed',
erasure: 'Data erasure completed',
timestamp: new Date()
};
}
private async handlePortabilityRequest(assetId: string, request: any): Promise<any> {
return {
status: 'completed',
export: 'Data export provided',
timestamp: new Date()
};
}
private generateComplianceRecommendations(reportData: any): string[] {
const recommendations: string[] = [];
if (reportData.summary.nonCompliantAssets > 0) {
recommendations.push(`${reportData.summary.nonCompliantAssets} assets require compliance remediation.`);
}
if (reportData.summary.violationCount > 10) {
recommendations.push('High number of policy violations detected. Review governance policies and implementation.');
}
const criticalViolations = reportData.policyViolations.filter((v: any) => v.severity === 'critical');
if (criticalViolations.length > 0) {
recommendations.push(`${criticalViolations.length} critical violations require immediate attention.`);
}
if (recommendations.length === 0) {
recommendations.push('All assets are compliant with governance policies. Continue monitoring.');
}
return recommendations;
}
private generateId(): string {
return `governance_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
}
// Method to inject assets (typically from DataCatalogService)
setAssets(assets: Map<string, DataAsset>): void {
this.assets = assets;
}
// Method to inject governance (typically from DataCatalogService)
setGovernance(governance: Map<string, DataGovernance>): void {
this.governance = governance;
}
}

View file

@ -0,0 +1,607 @@
import { EventBus } from '@stock-bot/event-bus';
import { Logger } from '@stock-bot/utils';
import {
DataLineage,
DataAsset,
LineageTransformation,
ImpactAnalysis,
LineageQuery,
LineageDirection
} from '../types/DataCatalog';
export interface DataLineageService {
addLineage(lineage: DataLineage): Promise<void>;
getLineage(assetId: string): Promise<DataLineage | null>;
updateLineage(assetId: string, lineage: Partial<DataLineage>): Promise<DataLineage | null>;
addUpstreamDependency(assetId: string, upstreamAssetId: string, transformation?: LineageTransformation): Promise<void>;
addDownstreamDependency(assetId: string, downstreamAssetId: string, transformation?: LineageTransformation): Promise<void>;
removeUpstreamDependency(assetId: string, upstreamAssetId: string): Promise<void>;
removeDownstreamDependency(assetId: string, downstreamAssetId: string): Promise<void>;
getUpstreamAssets(assetId: string, depth?: number): Promise<DataAsset[]>;
getDownstreamAssets(assetId: string, depth?: number): Promise<DataAsset[]>;
analyzeImpact(assetId: string): Promise<ImpactAnalysis>;
queryLineage(query: LineageQuery): Promise<DataAsset[]>;
getLineageGraph(assetId: string, direction: LineageDirection, depth?: number): Promise<any>;
detectCircularDependencies(): Promise<string[][]>;
}
export class DataLineageServiceImpl implements DataLineageService {
private lineages: Map<string, DataLineage> = new Map();
private assets: Map<string, DataAsset> = new Map();
constructor(
private eventBus: EventBus,
private logger: Logger
) {}
async addLineage(lineage: DataLineage): Promise<void> {
try {
this.lineages.set(lineage.assetId, lineage);
this.logger.info('Data lineage added', {
assetId: lineage.assetId,
upstreamCount: lineage.upstreamAssets.length,
downstreamCount: lineage.downstreamAssets.length
});
await this.eventBus.emit('data.lineage.added', {
assetId: lineage.assetId,
lineage,
timestamp: new Date()
});
} catch (error) {
this.logger.error('Failed to add data lineage', { lineage, error });
throw error;
}
}
async getLineage(assetId: string): Promise<DataLineage | null> {
try {
return this.lineages.get(assetId) || null;
} catch (error) {
this.logger.error('Failed to get data lineage', { assetId, error });
throw error;
}
}
async updateLineage(assetId: string, lineage: Partial<DataLineage>): Promise<DataLineage | null> {
try {
const existingLineage = this.lineages.get(assetId);
if (!existingLineage) {
return null;
}
const updatedLineage: DataLineage = {
...existingLineage,
...lineage,
updatedAt: new Date()
};
this.lineages.set(assetId, updatedLineage);
this.logger.info('Data lineage updated', { assetId, changes: lineage });
await this.eventBus.emit('data.lineage.updated', {
assetId,
lineage: updatedLineage,
changes: lineage,
timestamp: new Date()
});
return updatedLineage;
} catch (error) {
this.logger.error('Failed to update data lineage', { assetId, lineage, error });
throw error;
}
}
async addUpstreamDependency(
assetId: string,
upstreamAssetId: string,
transformation?: LineageTransformation
): Promise<void> {
try {
let lineage = this.lineages.get(assetId);
if (!lineage) {
lineage = this.createEmptyLineage(assetId);
}
// Check if dependency already exists
if (!lineage.upstreamAssets.includes(upstreamAssetId)) {
lineage.upstreamAssets.push(upstreamAssetId);
if (transformation) {
lineage.transformations.push(transformation);
}
lineage.updatedAt = new Date();
this.lineages.set(assetId, lineage);
// Update downstream lineage of the upstream asset
await this.addDownstreamToUpstream(upstreamAssetId, assetId);
this.logger.info('Upstream dependency added', { assetId, upstreamAssetId });
await this.eventBus.emit('data.lineage.dependency.added', {
assetId,
upstreamAssetId,
transformation,
timestamp: new Date()
});
}
} catch (error) {
this.logger.error('Failed to add upstream dependency', { assetId, upstreamAssetId, error });
throw error;
}
}
async addDownstreamDependency(
assetId: string,
downstreamAssetId: string,
transformation?: LineageTransformation
): Promise<void> {
try {
let lineage = this.lineages.get(assetId);
if (!lineage) {
lineage = this.createEmptyLineage(assetId);
}
// Check if dependency already exists
if (!lineage.downstreamAssets.includes(downstreamAssetId)) {
lineage.downstreamAssets.push(downstreamAssetId);
lineage.updatedAt = new Date();
this.lineages.set(assetId, lineage);
// Update upstream lineage of the downstream asset
await this.addUpstreamToDownstream(downstreamAssetId, assetId, transformation);
this.logger.info('Downstream dependency added', { assetId, downstreamAssetId });
await this.eventBus.emit('data.lineage.dependency.added', {
assetId,
downstreamAssetId,
transformation,
timestamp: new Date()
});
}
} catch (error) {
this.logger.error('Failed to add downstream dependency', { assetId, downstreamAssetId, error });
throw error;
}
}
async removeUpstreamDependency(assetId: string, upstreamAssetId: string): Promise<void> {
try {
const lineage = this.lineages.get(assetId);
if (lineage) {
lineage.upstreamAssets = lineage.upstreamAssets.filter(id => id !== upstreamAssetId);
lineage.updatedAt = new Date();
this.lineages.set(assetId, lineage);
// Remove from downstream lineage of upstream asset
await this.removeDownstreamFromUpstream(upstreamAssetId, assetId);
this.logger.info('Upstream dependency removed', { assetId, upstreamAssetId });
await this.eventBus.emit('data.lineage.dependency.removed', {
assetId,
upstreamAssetId,
timestamp: new Date()
});
}
} catch (error) {
this.logger.error('Failed to remove upstream dependency', { assetId, upstreamAssetId, error });
throw error;
}
}
async removeDownstreamDependency(assetId: string, downstreamAssetId: string): Promise<void> {
try {
const lineage = this.lineages.get(assetId);
if (lineage) {
lineage.downstreamAssets = lineage.downstreamAssets.filter(id => id !== downstreamAssetId);
lineage.updatedAt = new Date();
this.lineages.set(assetId, lineage);
// Remove from upstream lineage of downstream asset
await this.removeUpstreamFromDownstream(downstreamAssetId, assetId);
this.logger.info('Downstream dependency removed', { assetId, downstreamAssetId });
await this.eventBus.emit('data.lineage.dependency.removed', {
assetId,
downstreamAssetId,
timestamp: new Date()
});
}
} catch (error) {
this.logger.error('Failed to remove downstream dependency', { assetId, downstreamAssetId, error });
throw error;
}
}
async getUpstreamAssets(assetId: string, depth: number = 1): Promise<DataAsset[]> {
try {
const visited = new Set<string>();
const result: DataAsset[] = [];
await this.traverseUpstream(assetId, depth, visited, result);
return result;
} catch (error) {
this.logger.error('Failed to get upstream assets', { assetId, depth, error });
throw error;
}
}
async getDownstreamAssets(assetId: string, depth: number = 1): Promise<DataAsset[]> {
try {
const visited = new Set<string>();
const result: DataAsset[] = [];
await this.traverseDownstream(assetId, depth, visited, result);
return result;
} catch (error) {
this.logger.error('Failed to get downstream assets', { assetId, depth, error });
throw error;
}
}
async analyzeImpact(assetId: string): Promise<ImpactAnalysis> {
try {
const downstreamAssets = await this.getDownstreamAssets(assetId, 5); // Go deep for impact analysis
const affectedUsers = new Set<string>();
// Collect all users who might be affected
for (const asset of downstreamAssets) {
affectedUsers.add(asset.owner);
if (asset.steward) {
affectedUsers.add(asset.steward);
}
// Add users from usage analytics
asset.usage.topUsers.forEach(user => affectedUsers.add(user.userId));
}
// Calculate impact level
let estimatedImpact: 'low' | 'medium' | 'high' | 'critical' = 'low';
if (downstreamAssets.length > 20) {
estimatedImpact = 'critical';
} else if (downstreamAssets.length > 10) {
estimatedImpact = 'high';
} else if (downstreamAssets.length > 5) {
estimatedImpact = 'medium';
}
const impact: ImpactAnalysis = {
downstreamAssets: downstreamAssets.map(asset => asset.id),
affectedUsers: Array.from(affectedUsers),
estimatedImpact,
impactDescription: this.generateImpactDescription(downstreamAssets.length, Array.from(affectedUsers).length),
recommendations: this.generateRecommendations(estimatedImpact, downstreamAssets.length)
};
this.logger.info('Impact analysis completed', {
assetId,
impactLevel: estimatedImpact,
affectedAssets: downstreamAssets.length,
affectedUsers: affectedUsers.size
});
return impact;
} catch (error) {
this.logger.error('Failed to analyze impact', { assetId, error });
throw error;
}
}
async queryLineage(query: LineageQuery): Promise<DataAsset[]> {
try {
let results: DataAsset[] = [];
if (query.assetIds) {
for (const assetId of query.assetIds) {
if (query.direction === 'upstream' || query.direction === 'both') {
const upstream = await this.getUpstreamAssets(assetId, query.depth);
results.push(...upstream);
}
if (query.direction === 'downstream' || query.direction === 'both') {
const downstream = await this.getDownstreamAssets(assetId, query.depth);
results.push(...downstream);
}
}
}
// Remove duplicates
const uniqueResults = results.filter((asset, index, arr) =>
arr.findIndex(a => a.id === asset.id) === index
);
return uniqueResults;
} catch (error) {
this.logger.error('Failed to query lineage', { query, error });
throw error;
}
}
async getLineageGraph(assetId: string, direction: LineageDirection, depth: number = 3): Promise<any> {
try {
const graph = {
nodes: new Map(),
edges: []
};
const visited = new Set<string>();
await this.buildLineageGraph(assetId, direction, depth, visited, graph);
return {
nodes: Array.from(graph.nodes.values()),
edges: graph.edges
};
} catch (error) {
this.logger.error('Failed to get lineage graph', { assetId, direction, depth, error });
throw error;
}
}
async detectCircularDependencies(): Promise<string[][]> {
try {
const cycles: string[][] = [];
const visited = new Set<string>();
const recursionStack = new Set<string>();
for (const assetId of this.lineages.keys()) {
if (!visited.has(assetId)) {
const path: string[] = [];
await this.detectCycleDFS(assetId, visited, recursionStack, path, cycles);
}
}
if (cycles.length > 0) {
this.logger.warn('Circular dependencies detected', { cycleCount: cycles.length });
}
return cycles;
} catch (error) {
this.logger.error('Failed to detect circular dependencies', { error });
throw error;
}
}
// Private helper methods
private createEmptyLineage(assetId: string): DataLineage {
return {
id: this.generateId(),
assetId,
upstreamAssets: [],
downstreamAssets: [],
transformations: [],
impact: {
downstreamAssets: [],
affectedUsers: [],
estimatedImpact: 'low',
impactDescription: '',
recommendations: []
},
createdAt: new Date(),
updatedAt: new Date()
};
}
private async addDownstreamToUpstream(upstreamAssetId: string, downstreamAssetId: string): Promise<void> {
let upstreamLineage = this.lineages.get(upstreamAssetId);
if (!upstreamLineage) {
upstreamLineage = this.createEmptyLineage(upstreamAssetId);
}
if (!upstreamLineage.downstreamAssets.includes(downstreamAssetId)) {
upstreamLineage.downstreamAssets.push(downstreamAssetId);
upstreamLineage.updatedAt = new Date();
this.lineages.set(upstreamAssetId, upstreamLineage);
}
}
private async addUpstreamToDownstream(
downstreamAssetId: string,
upstreamAssetId: string,
transformation?: LineageTransformation
): Promise<void> {
let downstreamLineage = this.lineages.get(downstreamAssetId);
if (!downstreamLineage) {
downstreamLineage = this.createEmptyLineage(downstreamAssetId);
}
if (!downstreamLineage.upstreamAssets.includes(upstreamAssetId)) {
downstreamLineage.upstreamAssets.push(upstreamAssetId);
if (transformation) {
downstreamLineage.transformations.push(transformation);
}
downstreamLineage.updatedAt = new Date();
this.lineages.set(downstreamAssetId, downstreamLineage);
}
}
private async removeDownstreamFromUpstream(upstreamAssetId: string, downstreamAssetId: string): Promise<void> {
const upstreamLineage = this.lineages.get(upstreamAssetId);
if (upstreamLineage) {
upstreamLineage.downstreamAssets = upstreamLineage.downstreamAssets.filter(id => id !== downstreamAssetId);
upstreamLineage.updatedAt = new Date();
this.lineages.set(upstreamAssetId, upstreamLineage);
}
}
private async removeUpstreamFromDownstream(downstreamAssetId: string, upstreamAssetId: string): Promise<void> {
const downstreamLineage = this.lineages.get(downstreamAssetId);
if (downstreamLineage) {
downstreamLineage.upstreamAssets = downstreamLineage.upstreamAssets.filter(id => id !== upstreamAssetId);
downstreamLineage.updatedAt = new Date();
this.lineages.set(downstreamAssetId, downstreamLineage);
}
}
private async traverseUpstream(
assetId: string,
remainingDepth: number,
visited: Set<string>,
result: DataAsset[]
): Promise<void> {
if (remainingDepth === 0 || visited.has(assetId)) {
return;
}
visited.add(assetId);
const lineage = this.lineages.get(assetId);
if (lineage) {
for (const upstreamId of lineage.upstreamAssets) {
const asset = this.assets.get(upstreamId);
if (asset && !result.find(a => a.id === asset.id)) {
result.push(asset);
}
await this.traverseUpstream(upstreamId, remainingDepth - 1, visited, result);
}
}
}
private async traverseDownstream(
assetId: string,
remainingDepth: number,
visited: Set<string>,
result: DataAsset[]
): Promise<void> {
if (remainingDepth === 0 || visited.has(assetId)) {
return;
}
visited.add(assetId);
const lineage = this.lineages.get(assetId);
if (lineage) {
for (const downstreamId of lineage.downstreamAssets) {
const asset = this.assets.get(downstreamId);
if (asset && !result.find(a => a.id === asset.id)) {
result.push(asset);
}
await this.traverseDownstream(downstreamId, remainingDepth - 1, visited, result);
}
}
}
private async buildLineageGraph(
assetId: string,
direction: LineageDirection,
remainingDepth: number,
visited: Set<string>,
graph: any
): Promise<void> {
if (remainingDepth === 0 || visited.has(assetId)) {
return;
}
visited.add(assetId);
const asset = this.assets.get(assetId);
const lineage = this.lineages.get(assetId);
if (asset) {
graph.nodes.set(assetId, {
id: assetId,
name: asset.name,
type: asset.type,
classification: asset.classification
});
}
if (lineage) {
if (direction === 'upstream' || direction === 'both') {
for (const upstreamId of lineage.upstreamAssets) {
graph.edges.push({
source: upstreamId,
target: assetId,
type: 'upstream'
});
await this.buildLineageGraph(upstreamId, direction, remainingDepth - 1, visited, graph);
}
}
if (direction === 'downstream' || direction === 'both') {
for (const downstreamId of lineage.downstreamAssets) {
graph.edges.push({
source: assetId,
target: downstreamId,
type: 'downstream'
});
await this.buildLineageGraph(downstreamId, direction, remainingDepth - 1, visited, graph);
}
}
}
}
private async detectCycleDFS(
assetId: string,
visited: Set<string>,
recursionStack: Set<string>,
path: string[],
cycles: string[][]
): Promise<void> {
visited.add(assetId);
recursionStack.add(assetId);
path.push(assetId);
const lineage = this.lineages.get(assetId);
if (lineage) {
for (const downstreamId of lineage.downstreamAssets) {
if (!visited.has(downstreamId)) {
await this.detectCycleDFS(downstreamId, visited, recursionStack, path, cycles);
} else if (recursionStack.has(downstreamId)) {
// Found a cycle
const cycleStart = path.indexOf(downstreamId);
cycles.push(path.slice(cycleStart));
}
}
}
path.pop();
recursionStack.delete(assetId);
}
private generateImpactDescription(assetCount: number, userCount: number): string {
if (assetCount === 0) {
return 'No downstream dependencies identified.';
}
return `Changes to this asset may affect ${assetCount} downstream asset(s) and ${userCount} user(s).`;
}
private generateRecommendations(impact: string, assetCount: number): string[] {
const recommendations: string[] = [];
if (impact === 'critical') {
recommendations.push('Schedule maintenance window');
recommendations.push('Notify all stakeholders in advance');
recommendations.push('Prepare rollback plan');
recommendations.push('Consider phased rollout');
} else if (impact === 'high') {
recommendations.push('Notify affected users');
recommendations.push('Test changes thoroughly');
recommendations.push('Monitor downstream systems');
} else if (impact === 'medium') {
recommendations.push('Test with subset of data');
recommendations.push('Monitor for issues');
} else {
recommendations.push('Standard testing procedures apply');
}
return recommendations;
}
private generateId(): string {
return `lineage_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
}
// Method to inject assets (typically from DataCatalogService)
setAssets(assets: Map<string, DataAsset>): void {
this.assets = assets;
}
}

View file

@ -0,0 +1,734 @@
import { EventBus } from '@stock-bot/event-bus';
import { Logger } from '@stock-bot/utils';
import {
DataQuality,
QualityDimension,
QualityRule,
QualityIssue,
QualityTrend,
DataAsset,
QualityAssessmentRequest,
QualityRuleType,
QualitySeverity
} from '../types/DataCatalog';
export interface DataQualityService {
assessQuality(assetId: string, request: QualityAssessmentRequest): Promise<DataQuality>;
getQuality(assetId: string): Promise<DataQuality | null>;
updateQuality(assetId: string, quality: Partial<DataQuality>): Promise<DataQuality | null>;
addQualityRule(assetId: string, rule: QualityRule): Promise<void>;
removeQualityRule(assetId: string, ruleId: string): Promise<void>;
validateRule(assetId: string, ruleId: string): Promise<boolean>;
reportIssue(assetId: string, issue: Omit<QualityIssue, 'id' | 'detectedAt'>): Promise<void>;
resolveIssue(assetId: string, issueId: string): Promise<void>;
getTrendAnalysis(assetId: string, timeframe: string): Promise<QualityTrend>;
getQualityMetrics(filters?: Record<string, any>): Promise<any>;
generateQualityReport(assetIds: string[]): Promise<any>;
}
export class DataQualityServiceImpl implements DataQualityService {
private qualities: Map<string, DataQuality> = new Map();
private assets: Map<string, DataAsset> = new Map();
constructor(
private eventBus: EventBus,
private logger: Logger
) {}
async assessQuality(assetId: string, request: QualityAssessmentRequest): Promise<DataQuality> {
try {
const asset = this.assets.get(assetId);
if (!asset) {
throw new Error(`Asset with id ${assetId} not found`);
}
let quality = this.qualities.get(assetId);
if (!quality) {
quality = this.createEmptyQuality(assetId);
}
// Perform quality assessment based on request
const assessmentResults = await this.performQualityAssessment(asset, request);
// Update quality metrics
quality.dimensions = assessmentResults.dimensions;
quality.overallScore = this.calculateOverallScore(assessmentResults.dimensions);
quality.lastAssessment = new Date();
// Update trend data
this.updateQualityTrend(quality, quality.overallScore);
this.qualities.set(assetId, quality);
this.logger.info('Quality assessment completed', {
assetId,
overallScore: quality.overallScore,
dimensionCount: quality.dimensions.length
});
await this.eventBus.emit('data.quality.assessed', {
assetId,
quality,
request,
timestamp: new Date()
});
return quality;
} catch (error) {
this.logger.error('Failed to assess quality', { assetId, request, error });
throw error;
}
}
async getQuality(assetId: string): Promise<DataQuality | null> {
try {
return this.qualities.get(assetId) || null;
} catch (error) {
this.logger.error('Failed to get quality', { assetId, error });
throw error;
}
}
async updateQuality(assetId: string, quality: Partial<DataQuality>): Promise<DataQuality | null> {
try {
const existingQuality = this.qualities.get(assetId);
if (!existingQuality) {
return null;
}
const updatedQuality: DataQuality = {
...existingQuality,
...quality,
lastAssessment: new Date()
};
this.qualities.set(assetId, updatedQuality);
this.logger.info('Quality updated', { assetId, changes: quality });
await this.eventBus.emit('data.quality.updated', {
assetId,
quality: updatedQuality,
changes: quality,
timestamp: new Date()
});
return updatedQuality;
} catch (error) {
this.logger.error('Failed to update quality', { assetId, quality, error });
throw error;
}
}
async addQualityRule(assetId: string, rule: QualityRule): Promise<void> {
try {
let quality = this.qualities.get(assetId);
if (!quality) {
quality = this.createEmptyQuality(assetId);
}
// Ensure rule has an ID
if (!rule.id) {
rule.id = this.generateId();
}
quality.rules.push(rule);
this.qualities.set(assetId, quality);
this.logger.info('Quality rule added', { assetId, ruleId: rule.id, ruleType: rule.type });
await this.eventBus.emit('data.quality.rule.added', {
assetId,
rule,
timestamp: new Date()
});
} catch (error) {
this.logger.error('Failed to add quality rule', { assetId, rule, error });
throw error;
}
}
async removeQualityRule(assetId: string, ruleId: string): Promise<void> {
try {
const quality = this.qualities.get(assetId);
if (!quality) {
throw new Error(`Quality not found for asset ${assetId}`);
}
quality.rules = quality.rules.filter(rule => rule.id !== ruleId);
this.qualities.set(assetId, quality);
this.logger.info('Quality rule removed', { assetId, ruleId });
await this.eventBus.emit('data.quality.rule.removed', {
assetId,
ruleId,
timestamp: new Date()
});
} catch (error) {
this.logger.error('Failed to remove quality rule', { assetId, ruleId, error });
throw error;
}
}
async validateRule(assetId: string, ruleId: string): Promise<boolean> {
try {
const quality = this.qualities.get(assetId);
const asset = this.assets.get(assetId);
if (!quality || !asset) {
return false;
}
const rule = quality.rules.find(r => r.id === ruleId);
if (!rule) {
return false;
}
const isValid = await this.executeQualityRule(asset, rule);
if (!isValid) {
// Create quality issue
const issue: QualityIssue = {
id: this.generateId(),
ruleId: rule.id,
type: rule.type,
severity: rule.severity,
message: `Quality rule validation failed: ${rule.description}`,
detectedAt: new Date(),
resolved: false
};
quality.issues.push(issue);
this.qualities.set(assetId, quality);
await this.eventBus.emit('data.quality.issue.detected', {
assetId,
issue,
rule,
timestamp: new Date()
});
}
return isValid;
} catch (error) {
this.logger.error('Failed to validate quality rule', { assetId, ruleId, error });
throw error;
}
}
async reportIssue(assetId: string, issue: Omit<QualityIssue, 'id' | 'detectedAt'>): Promise<void> {
try {
let quality = this.qualities.get(assetId);
if (!quality) {
quality = this.createEmptyQuality(assetId);
}
const fullIssue: QualityIssue = {
...issue,
id: this.generateId(),
detectedAt: new Date()
};
quality.issues.push(fullIssue);
this.qualities.set(assetId, quality);
this.logger.info('Quality issue reported', {
assetId,
issueId: fullIssue.id,
severity: fullIssue.severity
});
await this.eventBus.emit('data.quality.issue.reported', {
assetId,
issue: fullIssue,
timestamp: new Date()
});
} catch (error) {
this.logger.error('Failed to report quality issue', { assetId, issue, error });
throw error;
}
}
async resolveIssue(assetId: string, issueId: string): Promise<void> {
try {
const quality = this.qualities.get(assetId);
if (!quality) {
throw new Error(`Quality not found for asset ${assetId}`);
}
const issue = quality.issues.find(i => i.id === issueId);
if (!issue) {
throw new Error(`Issue ${issueId} not found for asset ${assetId}`);
}
issue.resolved = true;
issue.resolvedAt = new Date();
this.qualities.set(assetId, quality);
this.logger.info('Quality issue resolved', { assetId, issueId });
await this.eventBus.emit('data.quality.issue.resolved', {
assetId,
issue,
timestamp: new Date()
});
} catch (error) {
this.logger.error('Failed to resolve quality issue', { assetId, issueId, error });
throw error;
}
}
async getTrendAnalysis(assetId: string, timeframe: string): Promise<QualityTrend> {
try {
const quality = this.qualities.get(assetId);
if (!quality) {
throw new Error(`Quality not found for asset ${assetId}`);
}
// Filter trend data by timeframe
const filteredTrend = this.filterTrendByTimeframe(quality.trend, timeframe);
// Calculate trend direction and change rate
const trendAnalysis = this.analyzeTrend(filteredTrend.dataPoints);
return {
...filteredTrend,
trend: trendAnalysis.direction,
changeRate: trendAnalysis.changeRate
};
} catch (error) {
this.logger.error('Failed to get trend analysis', { assetId, timeframe, error });
throw error;
}
}
async getQualityMetrics(filters?: Record<string, any>): Promise<any> {
try {
let qualities = Array.from(this.qualities.values());
// Apply filters if provided
if (filters) {
const assets = Array.from(this.assets.values());
const filteredAssets = assets.filter(asset => {
return Object.entries(filters).every(([key, value]) => {
if (key === 'type') return asset.type === value;
if (key === 'owner') return asset.owner === value;
if (key === 'classification') return asset.classification === value;
return true;
});
});
qualities = qualities.filter(quality =>
filteredAssets.some(asset => asset.id === quality.assetId)
);
}
// Calculate aggregate metrics
const metrics = {
totalAssets: qualities.length,
averageQualityScore: this.calculateAverageScore(qualities),
qualityDistribution: this.calculateQualityDistribution(qualities),
topIssues: this.getTopQualityIssues(qualities),
trendSummary: this.getTrendSummary(qualities),
ruleCompliance: this.calculateRuleCompliance(qualities)
};
this.logger.info('Quality metrics calculated', {
totalAssets: metrics.totalAssets,
averageScore: metrics.averageQualityScore
});
return metrics;
} catch (error) {
this.logger.error('Failed to get quality metrics', { filters, error });
throw error;
}
}
async generateQualityReport(assetIds: string[]): Promise<any> {
try {
const reportData = {
summary: {
totalAssets: assetIds.length,
assessmentDate: new Date(),
averageScore: 0,
criticalIssues: 0,
highIssues: 0
},
assetDetails: [] as any[],
recommendations: [] as string[]
};
let totalScore = 0;
let criticalCount = 0;
let highCount = 0;
for (const assetId of assetIds) {
const quality = this.qualities.get(assetId);
const asset = this.assets.get(assetId);
if (quality && asset) {
totalScore += quality.overallScore;
const criticalIssuesCount = quality.issues.filter(i =>
i.severity === 'critical' && !i.resolved
).length;
const highIssuesCount = quality.issues.filter(i =>
i.severity === 'high' && !i.resolved
).length;
criticalCount += criticalIssuesCount;
highCount += highIssuesCount;
reportData.assetDetails.push({
assetId,
assetName: asset.name,
qualityScore: quality.overallScore,
dimensions: quality.dimensions,
openIssues: quality.issues.filter(i => !i.resolved).length,
criticalIssues: criticalIssuesCount,
highIssues: highIssuesCount,
lastAssessment: quality.lastAssessment
});
}
}
reportData.summary.averageScore = Math.round(totalScore / assetIds.length);
reportData.summary.criticalIssues = criticalCount;
reportData.summary.highIssues = highCount;
// Generate recommendations
reportData.recommendations = this.generateQualityRecommendations(reportData);
this.logger.info('Quality report generated', {
assetCount: assetIds.length,
averageScore: reportData.summary.averageScore,
criticalIssues: criticalCount
});
return reportData;
} catch (error) {
this.logger.error('Failed to generate quality report', { assetIds, error });
throw error;
}
}
// Private helper methods
private createEmptyQuality(assetId: string): DataQuality {
return {
id: this.generateId(),
assetId,
overallScore: 100,
dimensions: [],
rules: [],
issues: [],
trend: {
timeframe: 'week',
dataPoints: [],
trend: 'stable',
changeRate: 0
},
lastAssessment: new Date()
};
}
private async performQualityAssessment(
asset: DataAsset,
request: QualityAssessmentRequest
): Promise<{ dimensions: QualityDimension[] }> {
const dimensions: QualityDimension[] = [];
// Completeness assessment
if (request.checkCompleteness) {
const completeness = await this.assessCompleteness(asset);
dimensions.push(completeness);
}
// Accuracy assessment
if (request.checkAccuracy) {
const accuracy = await this.assessAccuracy(asset);
dimensions.push(accuracy);
}
// Consistency assessment
if (request.checkConsistency) {
const consistency = await this.assessConsistency(asset);
dimensions.push(consistency);
}
// Validity assessment
if (request.checkValidity) {
const validity = await this.assessValidity(asset);
dimensions.push(validity);
}
// Timeliness assessment
if (request.checkTimeliness) {
const timeliness = await this.assessTimeliness(asset);
dimensions.push(timeliness);
}
// Uniqueness assessment
if (request.checkUniqueness) {
const uniqueness = await this.assessUniqueness(asset);
dimensions.push(uniqueness);
}
return { dimensions };
}
private async assessCompleteness(asset: DataAsset): Promise<QualityDimension> {
// Mock implementation - in real scenario, this would analyze actual data
const score = Math.floor(Math.random() * 20) + 80; // 80-100
return {
name: 'completeness',
score,
description: 'Measures the degree to which data is complete',
rules: [`No null values in required fields`],
threshold: 95,
lastChecked: new Date()
};
}
private async assessAccuracy(asset: DataAsset): Promise<QualityDimension> {
const score = Math.floor(Math.random() * 15) + 85; // 85-100
return {
name: 'accuracy',
score,
description: 'Measures how well data represents real-world values',
rules: [`Values within expected ranges`, `Format validation`],
threshold: 90,
lastChecked: new Date()
};
}
private async assessConsistency(asset: DataAsset): Promise<QualityDimension> {
const score = Math.floor(Math.random() * 25) + 75; // 75-100
return {
name: 'consistency',
score,
description: 'Measures uniformity of data across datasets',
rules: [`Consistent data types`, `Standardized formats`],
threshold: 85,
lastChecked: new Date()
};
}
private async assessValidity(asset: DataAsset): Promise<QualityDimension> {
const score = Math.floor(Math.random() * 20) + 80; // 80-100
return {
name: 'validity',
score,
description: 'Measures conformity to defined business rules',
rules: [`Business rule compliance`, `Schema validation`],
threshold: 90,
lastChecked: new Date()
};
}
private async assessTimeliness(asset: DataAsset): Promise<QualityDimension> {
const score = Math.floor(Math.random() * 30) + 70; // 70-100
return {
name: 'timeliness',
score,
description: 'Measures how up-to-date the data is',
rules: [`Data refreshed within SLA`, `Timestamp validation`],
threshold: 85,
lastChecked: new Date()
};
}
private async assessUniqueness(asset: DataAsset): Promise<QualityDimension> {
const score = Math.floor(Math.random() * 25) + 75; // 75-100
return {
name: 'uniqueness',
score,
description: 'Measures absence of duplicate records',
rules: [`No duplicate primary keys`, `Unique constraints enforced`],
threshold: 95,
lastChecked: new Date()
};
}
private async executeQualityRule(asset: DataAsset, rule: QualityRule): Promise<boolean> {
// Mock implementation - in real scenario, this would execute the actual rule
// For demo purposes, randomly pass/fail rules
const passRate = rule.severity === 'critical' ? 0.9 : 0.95;
return Math.random() < passRate;
}
private calculateOverallScore(dimensions: QualityDimension[]): number {
if (dimensions.length === 0) return 100;
const totalScore = dimensions.reduce((sum, dim) => sum + dim.score, 0);
return Math.round(totalScore / dimensions.length);
}
private updateQualityTrend(quality: DataQuality, newScore: number): void {
quality.trend.dataPoints.push({
timestamp: new Date(),
value: newScore
});
// Keep only last 30 data points
if (quality.trend.dataPoints.length > 30) {
quality.trend.dataPoints = quality.trend.dataPoints.slice(-30);
}
// Update trend analysis
const trendAnalysis = this.analyzeTrend(quality.trend.dataPoints);
quality.trend.trend = trendAnalysis.direction;
quality.trend.changeRate = trendAnalysis.changeRate;
}
private filterTrendByTimeframe(trend: QualityTrend, timeframe: string): QualityTrend {
const now = new Date();
let cutoffDate: Date;
switch (timeframe) {
case 'day':
cutoffDate = new Date(now.getTime() - 24 * 60 * 60 * 1000);
break;
case 'week':
cutoffDate = new Date(now.getTime() - 7 * 24 * 60 * 60 * 1000);
break;
case 'month':
cutoffDate = new Date(now.getTime() - 30 * 24 * 60 * 60 * 1000);
break;
default:
cutoffDate = new Date(0); // All time
}
const filteredDataPoints = trend.dataPoints.filter(dp => dp.timestamp >= cutoffDate);
return {
...trend,
timeframe,
dataPoints: filteredDataPoints
};
}
private analyzeTrend(dataPoints: { timestamp: Date; value: number }[]): { direction: 'improving' | 'declining' | 'stable'; changeRate: number } {
if (dataPoints.length < 2) {
return { direction: 'stable', changeRate: 0 };
}
const values = dataPoints.map(dp => dp.value);
const firstValue = values[0];
const lastValue = values[values.length - 1];
const changeRate = ((lastValue - firstValue) / firstValue) * 100;
let direction: 'improving' | 'declining' | 'stable';
if (Math.abs(changeRate) < 2) {
direction = 'stable';
} else if (changeRate > 0) {
direction = 'improving';
} else {
direction = 'declining';
}
return { direction, changeRate: Math.round(changeRate * 100) / 100 };
}
private calculateAverageScore(qualities: DataQuality[]): number {
if (qualities.length === 0) return 0;
const totalScore = qualities.reduce((sum, quality) => sum + quality.overallScore, 0);
return Math.round(totalScore / qualities.length);
}
private calculateQualityDistribution(qualities: DataQuality[]): Record<string, number> {
const distribution = { excellent: 0, good: 0, fair: 0, poor: 0 };
qualities.forEach(quality => {
if (quality.overallScore >= 90) distribution.excellent++;
else if (quality.overallScore >= 80) distribution.good++;
else if (quality.overallScore >= 70) distribution.fair++;
else distribution.poor++;
});
return distribution;
}
private getTopQualityIssues(qualities: DataQuality[]): Array<{ type: string; count: number }> {
const issueTypes = new Map<string, number>();
qualities.forEach(quality => {
quality.issues.filter(issue => !issue.resolved).forEach(issue => {
issueTypes.set(issue.type, (issueTypes.get(issue.type) || 0) + 1);
});
});
return Array.from(issueTypes.entries())
.map(([type, count]) => ({ type, count }))
.sort((a, b) => b.count - a.count)
.slice(0, 5);
}
private getTrendSummary(qualities: DataQuality[]): Record<string, number> {
const trends = { improving: 0, declining: 0, stable: 0 };
qualities.forEach(quality => {
trends[quality.trend.trend]++;
});
return trends;
}
private calculateRuleCompliance(qualities: DataQuality[]): number {
let totalRules = 0;
let passedRules = 0;
qualities.forEach(quality => {
totalRules += quality.rules.length;
// Mock compliance calculation
passedRules += Math.floor(quality.rules.length * (quality.overallScore / 100));
});
return totalRules > 0 ? Math.round((passedRules / totalRules) * 100) : 100;
}
private generateQualityRecommendations(reportData: any): string[] {
const recommendations: string[] = [];
if (reportData.summary.averageScore < 80) {
recommendations.push('Overall data quality is below acceptable threshold. Consider implementing comprehensive data quality monitoring.');
}
if (reportData.summary.criticalIssues > 0) {
recommendations.push(`${reportData.summary.criticalIssues} critical quality issues require immediate attention.`);
}
if (reportData.summary.highIssues > 5) {
recommendations.push('High number of quality issues detected. Review data validation processes.');
}
// Asset-specific recommendations
const lowScoreAssets = reportData.assetDetails.filter((asset: any) => asset.qualityScore < 70);
if (lowScoreAssets.length > 0) {
recommendations.push(`${lowScoreAssets.length} assets have quality scores below 70% and need immediate remediation.`);
}
if (recommendations.length === 0) {
recommendations.push('Data quality is within acceptable ranges. Continue monitoring and maintain current practices.');
}
return recommendations;
}
private generateId(): string {
return `quality_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
}
// Method to inject assets (typically from DataCatalogService)
setAssets(assets: Map<string, DataAsset>): void {
this.assets = assets;
}
}

View file

@ -0,0 +1,801 @@
import { EventBus } from '@stock-bot/event-bus';
import { Logger } from '@stock-bot/utils';
import {
DataAsset,
SearchQuery,
SearchResult,
SearchFilters,
SearchSuggestion,
DataAssetType,
DataClassification
} from '../types/DataCatalog';
export interface SearchService {
search(query: SearchQuery): Promise<SearchResult>;
suggest(partial: string): Promise<SearchSuggestion[]>;
searchByFacets(facets: Record<string, string[]>): Promise<DataAsset[]>;
searchSimilar(assetId: string, limit?: number): Promise<DataAsset[]>;
getPopularSearches(limit?: number): Promise<string[]>;
getRecentSearches(userId: string, limit?: number): Promise<string[]>;
indexAsset(asset: DataAsset): Promise<void>;
removeFromIndex(assetId: string): Promise<void>;
reindexAll(): Promise<void>;
getSearchAnalytics(timeframe?: string): Promise<any>;
}
export class SearchServiceImpl implements SearchService {
private searchIndex: Map<string, DataAsset> = new Map();
private searchHistory: Array<{ query: string; userId?: string; timestamp: Date; resultCount: number }> = [];
private assets: Map<string, DataAsset> = new Map();
// In-memory inverted index for search
private wordToAssets: Map<string, Set<string>> = new Map();
private tagToAssets: Map<string, Set<string>> = new Map();
private typeToAssets: Map<string, Set<string>> = new Map();
private classificationToAssets: Map<string, Set<string>> = new Map();
private ownerToAssets: Map<string, Set<string>> = new Map();
constructor(
private eventBus: EventBus,
private logger: Logger
) {}
async search(query: SearchQuery): Promise<SearchResult> {
try {
const startTime = Date.now();
let results: DataAsset[] = [];
if (query.text) {
results = await this.performTextSearch(query.text);
} else {
results = Array.from(this.assets.values());
}
// Apply filters
if (query.filters) {
results = this.applyFilters(results, query.filters);
}
// Sort results
results = this.sortResults(results, query.sortBy, query.sortOrder);
// Apply pagination
const total = results.length;
const offset = query.offset || 0;
const limit = query.limit || 20;
const paginatedResults = results.slice(offset, offset + limit);
// Calculate facets
const facets = this.calculateFacets(results);
const searchTime = Date.now() - startTime;
const searchResult: SearchResult = {
assets: paginatedResults,
total,
offset,
limit,
searchTime,
facets,
suggestions: await this.generateSearchSuggestions(query.text || '', results)
};
// Record search in history
this.recordSearch(query.text || '', query.userId, total);
this.logger.info('Search completed', {
query: query.text,
resultCount: total,
searchTime
});
await this.eventBus.emit('data.catalog.search.performed', {
query,
resultCount: total,
searchTime,
timestamp: new Date()
});
return searchResult;
} catch (error) {
this.logger.error('Search failed', { query, error });
throw error;
}
}
async suggest(partial: string): Promise<SearchSuggestion[]> {
try {
const suggestions: SearchSuggestion[] = [];
const normalizedPartial = partial.toLowerCase().trim();
if (normalizedPartial.length < 2) {
return suggestions;
}
// Asset name suggestions
for (const asset of this.assets.values()) {
if (asset.name.toLowerCase().includes(normalizedPartial)) {
suggestions.push({
text: asset.name,
type: 'asset_name',
count: 1,
highlight: this.highlightMatch(asset.name, partial)
});
}
}
// Tag suggestions
const tagCounts = new Map<string, number>();
for (const asset of this.assets.values()) {
for (const tag of asset.tags) {
if (tag.toLowerCase().includes(normalizedPartial)) {
tagCounts.set(tag, (tagCounts.get(tag) || 0) + 1);
}
}
}
for (const [tag, count] of tagCounts) {
suggestions.push({
text: tag,
type: 'tag',
count,
highlight: this.highlightMatch(tag, partial)
});
}
// Owner suggestions
const ownerCounts = new Map<string, number>();
for (const asset of this.assets.values()) {
if (asset.owner.toLowerCase().includes(normalizedPartial)) {
ownerCounts.set(asset.owner, (ownerCounts.get(asset.owner) || 0) + 1);
}
}
for (const [owner, count] of ownerCounts) {
suggestions.push({
text: owner,
type: 'owner',
count,
highlight: this.highlightMatch(owner, partial)
});
}
// Popular search suggestions
const popularSearches = this.getPopularSearchTerms().filter(term =>
term.toLowerCase().includes(normalizedPartial)
);
for (const search of popularSearches.slice(0, 5)) {
suggestions.push({
text: search,
type: 'popular_search',
count: this.getSearchCount(search),
highlight: this.highlightMatch(search, partial)
});
}
// Sort by relevance and count
return suggestions
.sort((a, b) => {
// Prefer exact matches
const aExact = a.text.toLowerCase().startsWith(normalizedPartial) ? 1 : 0;
const bExact = b.text.toLowerCase().startsWith(normalizedPartial) ? 1 : 0;
if (aExact !== bExact) return bExact - aExact;
// Then by count
return b.count - a.count;
})
.slice(0, 10);
} catch (error) {
this.logger.error('Suggestion generation failed', { partial, error });
throw error;
}
}
async searchByFacets(facets: Record<string, string[]>): Promise<DataAsset[]> {
try {
let results: Set<string> = new Set();
let isFirstFacet = true;
for (const [facetType, values] of Object.entries(facets)) {
const facetResults = new Set<string>();
for (const value of values) {
let assetIds: Set<string> | undefined;
switch (facetType) {
case 'type':
assetIds = this.typeToAssets.get(value);
break;
case 'classification':
assetIds = this.classificationToAssets.get(value);
break;
case 'owner':
assetIds = this.ownerToAssets.get(value);
break;
case 'tags':
assetIds = this.tagToAssets.get(value);
break;
}
if (assetIds) {
for (const assetId of assetIds) {
facetResults.add(assetId);
}
}
}
if (isFirstFacet) {
results = facetResults;
isFirstFacet = false;
} else {
// Intersection of results
results = new Set([...results].filter(id => facetResults.has(id)));
}
}
const assets = Array.from(results)
.map(id => this.assets.get(id))
.filter((asset): asset is DataAsset => asset !== undefined);
this.logger.info('Facet search completed', {
facets,
resultCount: assets.length
});
return assets;
} catch (error) {
this.logger.error('Facet search failed', { facets, error });
throw error;
}
}
async searchSimilar(assetId: string, limit: number = 10): Promise<DataAsset[]> {
try {
const targetAsset = this.assets.get(assetId);
if (!targetAsset) {
return [];
}
const similarities: Array<{ asset: DataAsset; score: number }> = [];
for (const asset of this.assets.values()) {
if (asset.id === assetId) continue;
const score = this.calculateSimilarity(targetAsset, asset);
if (score > 0.1) { // Minimum similarity threshold
similarities.push({ asset, score });
}
}
// Sort by similarity score and return top results
const results = similarities
.sort((a, b) => b.score - a.score)
.slice(0, limit)
.map(item => item.asset);
this.logger.info('Similar assets found', {
assetId,
similarCount: results.length
});
return results;
} catch (error) {
this.logger.error('Similar asset search failed', { assetId, error });
throw error;
}
}
async getPopularSearches(limit: number = 10): Promise<string[]> {
try {
const searchCounts = new Map<string, number>();
// Count search frequency
for (const search of this.searchHistory) {
if (search.query) {
searchCounts.set(search.query, (searchCounts.get(search.query) || 0) + 1);
}
}
// Sort by frequency and return top searches
return Array.from(searchCounts.entries())
.sort((a, b) => b[1] - a[1])
.slice(0, limit)
.map(([query]) => query);
} catch (error) {
this.logger.error('Failed to get popular searches', { error });
throw error;
}
}
async getRecentSearches(userId: string, limit: number = 10): Promise<string[]> {
try {
return this.searchHistory
.filter(search => search.userId === userId && search.query)
.sort((a, b) => b.timestamp.getTime() - a.timestamp.getTime())
.slice(0, limit)
.map(search => search.query);
} catch (error) {
this.logger.error('Failed to get recent searches', { userId, error });
throw error;
}
}
async indexAsset(asset: DataAsset): Promise<void> {
try {
// Add to main index
this.searchIndex.set(asset.id, asset);
this.assets.set(asset.id, asset);
// Update inverted indices
this.updateInvertedIndices(asset);
this.logger.debug('Asset indexed', { assetId: asset.id, name: asset.name });
await this.eventBus.emit('data.catalog.asset.indexed', {
assetId: asset.id,
timestamp: new Date()
});
} catch (error) {
this.logger.error('Failed to index asset', { asset, error });
throw error;
}
}
async removeFromIndex(assetId: string): Promise<void> {
try {
const asset = this.searchIndex.get(assetId);
if (!asset) {
return;
}
// Remove from main index
this.searchIndex.delete(assetId);
this.assets.delete(assetId);
// Remove from inverted indices
this.removeFromInvertedIndices(asset);
this.logger.debug('Asset removed from index', { assetId });
await this.eventBus.emit('data.catalog.asset.unindexed', {
assetId,
timestamp: new Date()
});
} catch (error) {
this.logger.error('Failed to remove asset from index', { assetId, error });
throw error;
}
}
async reindexAll(): Promise<void> {
try {
// Clear all indices
this.searchIndex.clear();
this.wordToAssets.clear();
this.tagToAssets.clear();
this.typeToAssets.clear();
this.classificationToAssets.clear();
this.ownerToAssets.clear();
// Reindex all assets
for (const asset of this.assets.values()) {
await this.indexAsset(asset);
}
this.logger.info('Search index rebuilt', { assetCount: this.assets.size });
await this.eventBus.emit('data.catalog.index.rebuilt', {
assetCount: this.assets.size,
timestamp: new Date()
});
} catch (error) {
this.logger.error('Failed to rebuild search index', { error });
throw error;
}
}
async getSearchAnalytics(timeframe: string = 'week'): Promise<any> {
try {
const now = new Date();
let cutoffDate: Date;
switch (timeframe) {
case 'day':
cutoffDate = new Date(now.getTime() - 24 * 60 * 60 * 1000);
break;
case 'week':
cutoffDate = new Date(now.getTime() - 7 * 24 * 60 * 60 * 1000);
break;
case 'month':
cutoffDate = new Date(now.getTime() - 30 * 24 * 60 * 60 * 1000);
break;
default:
cutoffDate = new Date(0);
}
const recentSearches = this.searchHistory.filter(search => search.timestamp >= cutoffDate);
const analytics = {
totalSearches: recentSearches.length,
uniqueQueries: new Set(recentSearches.map(s => s.query)).size,
averageResults: recentSearches.length > 0 ?
recentSearches.reduce((sum, s) => sum + s.resultCount, 0) / recentSearches.length : 0,
noResultQueries: recentSearches.filter(s => s.resultCount === 0).length,
topQueries: this.getTopQueries(recentSearches, 10),
searchTrend: this.calculateSearchTrend(recentSearches, timeframe),
facetUsage: this.getFacetUsage(recentSearches)
};
return analytics;
} catch (error) {
this.logger.error('Failed to get search analytics', { timeframe, error });
throw error;
}
}
// Private helper methods
private async performTextSearch(text: string): Promise<DataAsset[]> {
const words = this.tokenize(text);
const assetScores = new Map<string, number>();
for (const word of words) {
const assetIds = this.wordToAssets.get(word) || new Set();
for (const assetId of assetIds) {
assetScores.set(assetId, (assetScores.get(assetId) || 0) + 1);
}
}
// Sort by relevance score
const sortedAssetIds = Array.from(assetScores.entries())
.sort((a, b) => b[1] - a[1])
.map(([assetId]) => assetId);
return sortedAssetIds
.map(id => this.assets.get(id))
.filter((asset): asset is DataAsset => asset !== undefined);
}
private applyFilters(assets: DataAsset[], filters: SearchFilters): DataAsset[] {
return assets.filter(asset => {
if (filters.types && filters.types.length > 0) {
if (!filters.types.includes(asset.type)) return false;
}
if (filters.classifications && filters.classifications.length > 0) {
if (!filters.classifications.includes(asset.classification)) return false;
}
if (filters.owners && filters.owners.length > 0) {
if (!filters.owners.includes(asset.owner)) return false;
}
if (filters.tags && filters.tags.length > 0) {
if (!filters.tags.some(tag => asset.tags.includes(tag))) return false;
}
if (filters.createdAfter) {
if (asset.createdAt < filters.createdAfter) return false;
}
if (filters.createdBefore) {
if (asset.createdAt > filters.createdBefore) return false;
}
return true;
});
}
private sortResults(assets: DataAsset[], sortBy?: string, sortOrder?: 'asc' | 'desc'): DataAsset[] {
if (!sortBy) {
return assets; // Return as-is (relevance order)
}
const order = sortOrder === 'desc' ? -1 : 1;
return assets.sort((a, b) => {
let comparison = 0;
switch (sortBy) {
case 'name':
comparison = a.name.localeCompare(b.name);
break;
case 'createdAt':
comparison = a.createdAt.getTime() - b.createdAt.getTime();
break;
case 'updatedAt':
comparison = a.updatedAt.getTime() - b.updatedAt.getTime();
break;
case 'lastAccessed':
const aAccessed = a.lastAccessed?.getTime() || 0;
const bAccessed = b.lastAccessed?.getTime() || 0;
comparison = aAccessed - bAccessed;
break;
case 'usage':
comparison = a.usage.accessCount - b.usage.accessCount;
break;
default:
comparison = 0;
}
return comparison * order;
});
}
private calculateFacets(assets: DataAsset[]): Record<string, Array<{ value: string; count: number }>> {
const facets: Record<string, Map<string, number>> = {
types: new Map(),
classifications: new Map(),
owners: new Map(),
tags: new Map()
};
for (const asset of assets) {
// Type facet
facets.types.set(asset.type, (facets.types.get(asset.type) || 0) + 1);
// Classification facet
facets.classifications.set(asset.classification, (facets.classifications.get(asset.classification) || 0) + 1);
// Owner facet
facets.owners.set(asset.owner, (facets.owners.get(asset.owner) || 0) + 1);
// Tags facet
for (const tag of asset.tags) {
facets.tags.set(tag, (facets.tags.get(tag) || 0) + 1);
}
}
// Convert to required format
const result: Record<string, Array<{ value: string; count: number }>> = {};
for (const [facetName, facetMap] of Object.entries(facets)) {
result[facetName] = Array.from(facetMap.entries())
.map(([value, count]) => ({ value, count }))
.sort((a, b) => b.count - a.count);
}
return result;
}
private async generateSearchSuggestions(query: string, results: DataAsset[]): Promise<string[]> {
if (!query || results.length === 0) {
return [];
}
const suggestions: string[] = [];
// Extract common tags from results
const tagCounts = new Map<string, number>();
for (const asset of results.slice(0, 10)) { // Top 10 results
for (const tag of asset.tags) {
tagCounts.set(tag, (tagCounts.get(tag) || 0) + 1);
}
}
// Add top tags as suggestions
const topTags = Array.from(tagCounts.entries())
.sort((a, b) => b[1] - a[1])
.slice(0, 3)
.map(([tag]) => `${query} ${tag}`);
suggestions.push(...topTags);
return suggestions;
}
private updateInvertedIndices(asset: DataAsset): void {
// Index words from name and description
const words = [
...this.tokenize(asset.name),
...this.tokenize(asset.description)
];
for (const word of words) {
if (!this.wordToAssets.has(word)) {
this.wordToAssets.set(word, new Set());
}
this.wordToAssets.get(word)!.add(asset.id);
}
// Index tags
for (const tag of asset.tags) {
if (!this.tagToAssets.has(tag)) {
this.tagToAssets.set(tag, new Set());
}
this.tagToAssets.get(tag)!.add(asset.id);
}
// Index type
if (!this.typeToAssets.has(asset.type)) {
this.typeToAssets.set(asset.type, new Set());
}
this.typeToAssets.get(asset.type)!.add(asset.id);
// Index classification
if (!this.classificationToAssets.has(asset.classification)) {
this.classificationToAssets.set(asset.classification, new Set());
}
this.classificationToAssets.get(asset.classification)!.add(asset.id);
// Index owner
if (!this.ownerToAssets.has(asset.owner)) {
this.ownerToAssets.set(asset.owner, new Set());
}
this.ownerToAssets.get(asset.owner)!.add(asset.id);
}
private removeFromInvertedIndices(asset: DataAsset): void {
// Remove from word index
const words = [
...this.tokenize(asset.name),
...this.tokenize(asset.description)
];
for (const word of words) {
const assetSet = this.wordToAssets.get(word);
if (assetSet) {
assetSet.delete(asset.id);
if (assetSet.size === 0) {
this.wordToAssets.delete(word);
}
}
}
// Remove from other indices
this.removeFromIndex(this.tagToAssets, asset.tags, asset.id);
this.removeFromIndex(this.typeToAssets, [asset.type], asset.id);
this.removeFromIndex(this.classificationToAssets, [asset.classification], asset.id);
this.removeFromIndex(this.ownerToAssets, [asset.owner], asset.id);
}
private removeFromIndex(index: Map<string, Set<string>>, values: string[], assetId: string): void {
for (const value of values) {
const assetSet = index.get(value);
if (assetSet) {
assetSet.delete(assetId);
if (assetSet.size === 0) {
index.delete(value);
}
}
}
}
private tokenize(text: string): string[] {
return text
.toLowerCase()
.replace(/[^\w\s]/g, ' ')
.split(/\s+/)
.filter(word => word.length > 2);
}
private calculateSimilarity(asset1: DataAsset, asset2: DataAsset): number {
let score = 0;
// Type similarity
if (asset1.type === asset2.type) score += 0.3;
// Classification similarity
if (asset1.classification === asset2.classification) score += 0.2;
// Owner similarity
if (asset1.owner === asset2.owner) score += 0.1;
// Tag similarity (Jaccard similarity)
const tags1 = new Set(asset1.tags);
const tags2 = new Set(asset2.tags);
const intersection = new Set([...tags1].filter(tag => tags2.has(tag)));
const union = new Set([...tags1, ...tags2]);
if (union.size > 0) {
score += (intersection.size / union.size) * 0.4;
}
return score;
}
private highlightMatch(text: string, query: string): string {
const regex = new RegExp(`(${query})`, 'gi');
return text.replace(regex, '<mark>$1</mark>');
}
private recordSearch(query: string, userId?: string, resultCount: number = 0): void {
this.searchHistory.push({
query,
userId,
timestamp: new Date(),
resultCount
});
// Keep only last 1000 searches
if (this.searchHistory.length > 1000) {
this.searchHistory = this.searchHistory.slice(-1000);
}
}
private getPopularSearchTerms(): string[] {
const searchCounts = new Map<string, number>();
for (const search of this.searchHistory) {
if (search.query) {
searchCounts.set(search.query, (searchCounts.get(search.query) || 0) + 1);
}
}
return Array.from(searchCounts.entries())
.sort((a, b) => b[1] - a[1])
.map(([query]) => query);
}
private getSearchCount(query: string): number {
return this.searchHistory.filter(search => search.query === query).length;
}
private getTopQueries(searches: any[], limit: number): Array<{ query: string; count: number }> {
const queryCounts = new Map<string, number>();
for (const search of searches) {
if (search.query) {
queryCounts.set(search.query, (queryCounts.get(search.query) || 0) + 1);
}
}
return Array.from(queryCounts.entries())
.map(([query, count]) => ({ query, count }))
.sort((a, b) => b.count - a.count)
.slice(0, limit);
}
private calculateSearchTrend(searches: any[], timeframe: string): any {
// Group searches by day
const dailyCounts = new Map<string, number>();
for (const search of searches) {
const day = search.timestamp.toISOString().split('T')[0];
dailyCounts.set(day, (dailyCounts.get(day) || 0) + 1);
}
const dataPoints = Array.from(dailyCounts.entries())
.map(([date, count]) => ({ date, count }))
.sort((a, b) => a.date.localeCompare(b.date));
return {
dataPoints,
trend: this.analyzeTrend(dataPoints.map(p => p.count))
};
}
private analyzeTrend(values: number[]): string {
if (values.length < 2) return 'stable';
const firstHalf = values.slice(0, Math.floor(values.length / 2));
const secondHalf = values.slice(Math.floor(values.length / 2));
const firstAvg = firstHalf.reduce((sum, val) => sum + val, 0) / firstHalf.length;
const secondAvg = secondHalf.reduce((sum, val) => sum + val, 0) / secondHalf.length;
const changePercent = ((secondAvg - firstAvg) / firstAvg) * 100;
if (Math.abs(changePercent) < 10) return 'stable';
return changePercent > 0 ? 'increasing' : 'decreasing';
}
private getFacetUsage(searches: any[]): Record<string, number> {
// Mock facet usage tracking
return {
types: Math.floor(searches.length * 0.3),
classifications: Math.floor(searches.length * 0.2),
owners: Math.floor(searches.length * 0.1),
tags: Math.floor(searches.length * 0.4)
};
}
// Method to inject assets (typically from DataCatalogService)
setAssets(assets: Map<string, DataAsset>): void {
this.assets = assets;
// Reindex all assets when assets are updated
this.reindexAll();
}
}

View file

@ -0,0 +1,524 @@
// Data Asset Types
export interface DataAsset {
id: string;
name: string;
type: DataAssetType;
description: string;
owner: string;
steward?: string;
tags: string[];
classification: DataClassification;
schema?: DataSchema;
location: DataLocation;
metadata: DataAssetMetadata;
lineage: DataLineage;
quality: DataQuality;
usage: DataUsage;
governance: DataGovernance;
createdAt: Date;
updatedAt: Date;
lastAccessed?: Date;
}
export enum DataAssetType {
TABLE = 'table',
VIEW = 'view',
DATASET = 'dataset',
API = 'api',
FILE = 'file',
STREAM = 'stream',
MODEL = 'model',
FEATURE_GROUP = 'feature_group',
PIPELINE = 'pipeline',
REPORT = 'report'
}
export enum DataClassification {
PUBLIC = 'public',
INTERNAL = 'internal',
CONFIDENTIAL = 'confidential',
RESTRICTED = 'restricted',
PII = 'pii',
FINANCIAL = 'financial'
}
export interface DataSchema {
version: string;
fields: DataField[];
primaryKeys?: string[];
foreignKeys?: ForeignKey[];
indexes?: Index[];
}
export interface DataField {
name: string;
type: string;
nullable: boolean;
description?: string;
constraints?: FieldConstraint[];
tags?: string[];
classification?: DataClassification;
}
export interface ForeignKey {
fields: string[];
referencedAsset: string;
referencedFields: string[];
}
export interface Index {
name: string;
fields: string[];
unique: boolean;
type: 'btree' | 'hash' | 'gin' | 'gist';
}
export interface FieldConstraint {
type: 'not_null' | 'unique' | 'check' | 'range' | 'pattern';
value?: any;
description?: string;
}
export interface DataLocation {
type: 'database' | 'file_system' | 'cloud_storage' | 'api' | 'stream';
connection: string;
path: string;
format?: string;
compression?: string;
partitioning?: PartitionInfo;
}
export interface PartitionInfo {
fields: string[];
strategy: 'range' | 'hash' | 'list';
count?: number;
}
export interface DataAssetMetadata {
size?: number;
rowCount?: number;
columnCount?: number;
fileFormat?: string;
encoding?: string;
delimiter?: string;
compression?: string;
checksums?: Record<string, string>;
customProperties?: Record<string, any>;
}
// Data Lineage Types
export interface DataLineage {
id: string;
assetId: string;
upstreamAssets: LineageEdge[];
downstreamAssets: LineageEdge[];
transformations: DataTransformation[];
impact: ImpactAnalysis;
createdAt: Date;
updatedAt: Date;
}
export interface LineageEdge {
sourceAssetId: string;
targetAssetId: string;
relationship: LineageRelationship;
transformations: string[];
confidence: number;
metadata?: Record<string, any>;
}
export enum LineageRelationship {
DERIVED_FROM = 'derived_from',
AGGREGATED_FROM = 'aggregated_from',
JOINED_WITH = 'joined_with',
FILTERED_FROM = 'filtered_from',
TRANSFORMED_FROM = 'transformed_from',
COPIED_FROM = 'copied_from',
ENRICHED_WITH = 'enriched_with'
}
export interface DataTransformation {
id: string;
name: string;
type: TransformationType;
description?: string;
code?: string;
inputFields: string[];
outputFields: string[];
logic: string;
parameters?: Record<string, any>;
}
export enum TransformationType {
FILTER = 'filter',
AGGREGATE = 'aggregate',
JOIN = 'join',
UNION = 'union',
PIVOT = 'pivot',
UNPIVOT = 'unpivot',
SORT = 'sort',
DEDUPLICATE = 'deduplicate',
CALCULATE = 'calculate',
CAST = 'cast',
RENAME = 'rename'
}
export interface ImpactAnalysis {
downstreamAssets: string[];
affectedUsers: string[];
estimatedImpact: 'low' | 'medium' | 'high' | 'critical';
impactDescription: string;
recommendations: string[];
}
// Data Quality Types
export interface DataQuality {
id: string;
assetId: string;
overallScore: number;
dimensions: QualityDimension[];
rules: QualityRule[];
issues: QualityIssue[];
trend: QualityTrend;
lastAssessment: Date;
nextAssessment?: Date;
}
export interface QualityDimension {
name: QualityDimensionType;
score: number;
weight: number;
description: string;
metrics: QualityMetric[];
}
export enum QualityDimensionType {
COMPLETENESS = 'completeness',
ACCURACY = 'accuracy',
CONSISTENCY = 'consistency',
VALIDITY = 'validity',
UNIQUENESS = 'uniqueness',
TIMELINESS = 'timeliness',
INTEGRITY = 'integrity'
}
export interface QualityRule {
id: string;
name: string;
description: string;
dimension: QualityDimensionType;
type: QualityRuleType;
field?: string;
condition: string;
threshold: number;
severity: 'low' | 'medium' | 'high' | 'critical';
enabled: boolean;
}
export enum QualityRuleType {
NULL_CHECK = 'null_check',
RANGE_CHECK = 'range_check',
PATTERN_CHECK = 'pattern_check',
REFERENCE_CHECK = 'reference_check',
DUPLICATE_CHECK = 'duplicate_check',
FRESHNESS_CHECK = 'freshness_check',
CUSTOM = 'custom'
}
export interface QualityMetric {
name: string;
value: number;
unit?: string;
threshold?: number;
status: 'pass' | 'warn' | 'fail';
}
export interface QualityIssue {
id: string;
ruleId: string;
severity: 'low' | 'medium' | 'high' | 'critical';
description: string;
field?: string;
affectedRows?: number;
detectedAt: Date;
status: 'open' | 'acknowledged' | 'resolved' | 'false_positive';
assignee?: string;
resolution?: string;
resolvedAt?: Date;
}
export interface QualityTrend {
timeframe: 'day' | 'week' | 'month';
dataPoints: QualityDataPoint[];
trend: 'improving' | 'stable' | 'degrading';
changeRate: number;
}
export interface QualityDataPoint {
timestamp: Date;
score: number;
dimensionScores: Record<QualityDimensionType, number>;
}
// Data Usage Types
export interface DataUsage {
id: string;
assetId: string;
accessCount: number;
uniqueUsers: number;
lastAccessed: Date;
topUsers: UserUsage[];
accessPatterns: AccessPattern[];
popularQueries: PopularQuery[];
usageTrend: UsageTrend;
}
export interface UserUsage {
userId: string;
userName: string;
accessCount: number;
lastAccessed: Date;
accessType: 'read' | 'write' | 'query' | 'download';
}
export interface AccessPattern {
timeOfDay: number; // Hour 0-23
dayOfWeek: number; // 0-6
frequency: number;
accessType: 'read' | 'write' | 'query' | 'download';
}
export interface PopularQuery {
query: string;
count: number;
avgExecutionTime: number;
lastExecuted: Date;
users: string[];
}
export interface UsageTrend {
timeframe: 'day' | 'week' | 'month';
dataPoints: UsageDataPoint[];
trend: 'increasing' | 'stable' | 'decreasing';
changeRate: number;
}
export interface UsageDataPoint {
timestamp: Date;
accessCount: number;
uniqueUsers: number;
avgResponseTime?: number;
}
// Data Governance Types
export interface DataGovernance {
id: string;
assetId: string;
policies: GovernancePolicy[];
compliance: ComplianceStatus[];
retention: RetentionPolicy;
access: AccessPolicy;
privacy: PrivacySettings;
audit: AuditTrail[];
}
export interface GovernancePolicy {
id: string;
name: string;
type: PolicyType;
description: string;
rules: PolicyRule[];
enforcement: 'advisory' | 'preventive' | 'detective';
status: 'active' | 'inactive' | 'draft';
}
export enum PolicyType {
ACCESS_CONTROL = 'access_control',
DATA_RETENTION = 'data_retention',
DATA_PRIVACY = 'data_privacy',
DATA_QUALITY = 'data_quality',
USAGE_MONITORING = 'usage_monitoring',
COMPLIANCE = 'compliance'
}
export interface PolicyRule {
id: string;
condition: string;
action: string;
parameters?: Record<string, any>;
}
export interface ComplianceStatus {
regulation: 'GDPR' | 'CCPA' | 'SOX' | 'HIPAA' | 'PCI_DSS' | 'CUSTOM';
status: 'compliant' | 'non_compliant' | 'unknown';
lastAssessment: Date;
issues: ComplianceIssue[];
}
export interface ComplianceIssue {
id: string;
description: string;
severity: 'low' | 'medium' | 'high' | 'critical';
requirement: string;
remediation: string;
dueDate?: Date;
}
export interface RetentionPolicy {
retentionPeriod: number; // in days
archiveAfter?: number; // in days
deleteAfter?: number; // in days
retentionReason: string;
legalHold: boolean;
}
export interface AccessPolicy {
defaultAccess: 'none' | 'read' | 'write' | 'admin';
roles: RolePermission[];
users: UserPermission[];
conditions?: AccessCondition[];
}
export interface RolePermission {
role: string;
permissions: Permission[];
conditions?: AccessCondition[];
}
export interface UserPermission {
userId: string;
permissions: Permission[];
conditions?: AccessCondition[];
expiresAt?: Date;
}
export enum Permission {
READ = 'read',
WRITE = 'write',
DELETE = 'delete',
ADMIN = 'admin',
QUERY = 'query',
EXPORT = 'export'
}
export interface AccessCondition {
type: 'time_based' | 'location_based' | 'purpose_based' | 'data_sensitivity';
condition: string;
value: any;
}
export interface PrivacySettings {
containsPII: boolean;
sensitiveFields: string[];
anonymizationRules: AnonymizationRule[];
consentRequired: boolean;
dataSubjectRights: DataSubjectRight[];
}
export interface AnonymizationRule {
field: string;
method: 'mask' | 'hash' | 'encrypt' | 'tokenize' | 'generalize' | 'suppress';
parameters?: Record<string, any>;
}
export interface DataSubjectRight {
type: 'access' | 'rectification' | 'erasure' | 'portability' | 'restriction';
enabled: boolean;
automatedResponse: boolean;
}
export interface AuditTrail {
id: string;
timestamp: Date;
userId: string;
action: string;
resource: string;
details: Record<string, any>;
outcome: 'success' | 'failure';
ipAddress?: string;
userAgent?: string;
}
// Search and Discovery Types
export interface SearchRequest {
query: string;
filters?: SearchFilter[];
facets?: string[];
sortBy?: string;
sortOrder?: 'asc' | 'desc';
limit?: number;
offset?: number;
}
export interface SearchFilter {
field: string;
operator: 'eq' | 'ne' | 'gt' | 'gte' | 'lt' | 'lte' | 'in' | 'contains' | 'startswith' | 'endswith';
value: any;
}
export interface SearchResponse {
total: number;
assets: DataAsset[];
facets: SearchFacet[];
suggestions: string[];
}
export interface SearchFacet {
field: string;
values: FacetValue[];
}
export interface FacetValue {
value: string;
count: number;
}
// API Request/Response Types
export interface CreateDataAssetRequest {
name: string;
type: DataAssetType;
description: string;
owner: string;
steward?: string;
tags?: string[];
classification: DataClassification;
schema?: DataSchema;
location: DataLocation;
metadata?: Partial<DataAssetMetadata>;
governance?: Partial<DataGovernance>;
}
export interface UpdateDataAssetRequest {
name?: string;
description?: string;
owner?: string;
steward?: string;
tags?: string[];
classification?: DataClassification;
schema?: DataSchema;
metadata?: Partial<DataAssetMetadata>;
}
export interface LineageRequest {
assetId: string;
direction: 'upstream' | 'downstream' | 'both';
depth?: number;
includeTransformations?: boolean;
}
export interface QualityAssessmentRequest {
assetId: string;
rules?: string[];
immediate?: boolean;
}
export interface CreateQualityRuleRequest {
name: string;
description: string;
dimension: QualityDimensionType;
type: QualityRuleType;
field?: string;
condition: string;
threshold: number;
severity: 'low' | 'medium' | 'high' | 'critical';
}