adding data-services

This commit is contained in:
Bojan Kucera 2025-06-03 07:42:48 -04:00
parent e3bfd05b90
commit 405b818c86
139 changed files with 55943 additions and 416 deletions

View file

@ -0,0 +1,312 @@
import { EventBus } from '@stock-bot/event-bus';
import { Logger } from '@stock-bot/utils';
import {
DataAsset,
CreateDataAssetRequest,
UpdateDataAssetRequest,
DataAssetType,
DataClassification
} from '../types/DataCatalog';
export interface DataCatalogService {
createAsset(request: CreateDataAssetRequest): Promise<DataAsset>;
getAsset(id: string): Promise<DataAsset | null>;
updateAsset(id: string, request: UpdateDataAssetRequest): Promise<DataAsset | null>;
deleteAsset(id: string): Promise<void>;
listAssets(filters?: Record<string, any>): Promise<DataAsset[]>;
searchAssets(query: string, filters?: Record<string, any>): Promise<DataAsset[]>;
getAssetsByOwner(owner: string): Promise<DataAsset[]>;
getAssetsByType(type: DataAssetType): Promise<DataAsset[]>;
getAssetsByClassification(classification: DataClassification): Promise<DataAsset[]>;
getAssetsByTags(tags: string[]): Promise<DataAsset[]>;
}
export class DataCatalogServiceImpl implements DataCatalogService {
private assets: Map<string, DataAsset> = new Map();
constructor(
private eventBus: EventBus,
private logger: Logger
) {}
async createAsset(request: CreateDataAssetRequest): Promise<DataAsset> {
try {
const asset: DataAsset = {
id: this.generateId(),
name: request.name,
type: request.type,
description: request.description,
owner: request.owner,
steward: request.steward,
tags: request.tags || [],
classification: request.classification,
schema: request.schema,
location: request.location,
metadata: {
customProperties: {},
...request.metadata
},
lineage: {
id: this.generateId(),
assetId: '',
upstreamAssets: [],
downstreamAssets: [],
transformations: [],
impact: {
downstreamAssets: [],
affectedUsers: [],
estimatedImpact: 'low',
impactDescription: '',
recommendations: []
},
createdAt: new Date(),
updatedAt: new Date()
},
quality: {
id: this.generateId(),
assetId: '',
overallScore: 100,
dimensions: [],
rules: [],
issues: [],
trend: {
timeframe: 'week',
dataPoints: [],
trend: 'stable',
changeRate: 0
},
lastAssessment: new Date()
},
usage: {
id: this.generateId(),
assetId: '',
accessCount: 0,
uniqueUsers: 0,
lastAccessed: new Date(),
topUsers: [],
accessPatterns: [],
popularQueries: [],
usageTrend: {
timeframe: 'week',
dataPoints: [],
trend: 'stable',
changeRate: 0
}
},
governance: request.governance || {
id: this.generateId(),
assetId: '',
policies: [],
compliance: [],
retention: {
retentionPeriod: 365,
retentionReason: 'Business requirement',
legalHold: false
},
access: {
defaultAccess: 'none',
roles: [],
users: []
},
privacy: {
containsPII: false,
sensitiveFields: [],
anonymizationRules: [],
consentRequired: false,
dataSubjectRights: []
},
audit: []
},
createdAt: new Date(),
updatedAt: new Date()
};
// Set correct asset IDs in nested objects
asset.lineage.assetId = asset.id;
asset.quality.assetId = asset.id;
asset.usage.assetId = asset.id;
asset.governance.assetId = asset.id;
this.assets.set(asset.id, asset);
this.logger.info('Data asset created', { assetId: asset.id, name: asset.name });
await this.eventBus.emit('data.asset.created', {
assetId: asset.id,
asset,
timestamp: new Date()
});
return asset;
} catch (error) {
this.logger.error('Failed to create data asset', { request, error });
throw error;
}
}
async getAsset(id: string): Promise<DataAsset | null> {
try {
const asset = this.assets.get(id);
if (asset) {
// Update last accessed time
asset.lastAccessed = new Date();
asset.usage.lastAccessed = new Date();
asset.usage.accessCount++;
await this.eventBus.emit('data.asset.accessed', {
assetId: id,
timestamp: new Date()
});
}
return asset || null;
} catch (error) {
this.logger.error('Failed to get data asset', { assetId: id, error });
throw error;
}
}
async updateAsset(id: string, request: UpdateDataAssetRequest): Promise<DataAsset | null> {
try {
const asset = this.assets.get(id);
if (!asset) {
return null;
}
// Update only provided fields
if (request.name !== undefined) asset.name = request.name;
if (request.description !== undefined) asset.description = request.description;
if (request.owner !== undefined) asset.owner = request.owner;
if (request.steward !== undefined) asset.steward = request.steward;
if (request.tags !== undefined) asset.tags = request.tags;
if (request.classification !== undefined) asset.classification = request.classification;
if (request.schema !== undefined) asset.schema = request.schema;
if (request.metadata !== undefined) {
asset.metadata = { ...asset.metadata, ...request.metadata };
}
asset.updatedAt = new Date();
this.assets.set(id, asset);
this.logger.info('Data asset updated', { assetId: id, changes: request });
await this.eventBus.emit('data.asset.updated', {
assetId: id,
asset,
changes: request,
timestamp: new Date()
});
return asset;
} catch (error) {
this.logger.error('Failed to update data asset', { assetId: id, request, error });
throw error;
}
}
async deleteAsset(id: string): Promise<void> {
try {
const asset = this.assets.get(id);
if (!asset) {
throw new Error(`Asset with id ${id} not found`);
}
this.assets.delete(id);
this.logger.info('Data asset deleted', { assetId: id });
await this.eventBus.emit('data.asset.deleted', {
assetId: id,
asset,
timestamp: new Date()
});
} catch (error) {
this.logger.error('Failed to delete data asset', { assetId: id, error });
throw error;
}
}
async listAssets(filters?: Record<string, any>): Promise<DataAsset[]> {
try {
let assets = Array.from(this.assets.values());
if (filters) {
assets = assets.filter(asset => {
return Object.entries(filters).every(([key, value]) => {
if (key === 'type') return asset.type === value;
if (key === 'owner') return asset.owner === value;
if (key === 'classification') return asset.classification === value;
if (key === 'tags') return Array.isArray(value) ?
value.some(tag => asset.tags.includes(tag)) :
asset.tags.includes(value);
return true;
});
});
}
return assets;
} catch (error) {
this.logger.error('Failed to list data assets', { filters, error });
throw error;
}
}
async searchAssets(query: string, filters?: Record<string, any>): Promise<DataAsset[]> {
try {
let assets = Array.from(this.assets.values());
// Simple text search in name, description, and tags
const searchTerm = query.toLowerCase();
assets = assets.filter(asset =>
asset.name.toLowerCase().includes(searchTerm) ||
asset.description.toLowerCase().includes(searchTerm) ||
asset.tags.some(tag => tag.toLowerCase().includes(searchTerm))
);
// Apply additional filters
if (filters) {
assets = assets.filter(asset => {
return Object.entries(filters).every(([key, value]) => {
if (key === 'type') return asset.type === value;
if (key === 'owner') return asset.owner === value;
if (key === 'classification') return asset.classification === value;
return true;
});
});
}
this.logger.info('Asset search completed', {
query,
filters,
resultCount: assets.length
});
return assets;
} catch (error) {
this.logger.error('Failed to search data assets', { query, filters, error });
throw error;
}
}
async getAssetsByOwner(owner: string): Promise<DataAsset[]> {
return this.listAssets({ owner });
}
async getAssetsByType(type: DataAssetType): Promise<DataAsset[]> {
return this.listAssets({ type });
}
async getAssetsByClassification(classification: DataClassification): Promise<DataAsset[]> {
return this.listAssets({ classification });
}
async getAssetsByTags(tags: string[]): Promise<DataAsset[]> {
return this.listAssets({ tags });
}
private generateId(): string {
return `asset_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
}
}

View file

@ -0,0 +1,764 @@
import { EventBus } from '@stock-bot/event-bus';
import { Logger } from '@stock-bot/utils';
import {
DataGovernance,
GovernancePolicy,
ComplianceCheck,
RetentionPolicy,
AccessControl,
PrivacySettings,
AuditEntry,
DataAsset,
GovernancePolicyType,
ComplianceStatus,
DataClassification
} from '../types/DataCatalog';
export interface DataGovernanceService {
createPolicy(policy: Omit<GovernancePolicy, 'id' | 'createdAt' | 'updatedAt'>): Promise<GovernancePolicy>;
updatePolicy(policyId: string, updates: Partial<GovernancePolicy>): Promise<GovernancePolicy | null>;
deletePolicy(policyId: string): Promise<void>;
getPolicy(policyId: string): Promise<GovernancePolicy | null>;
listPolicies(filters?: Record<string, any>): Promise<GovernancePolicy[]>;
applyPolicy(assetId: string, policyId: string): Promise<void>;
removePolicy(assetId: string, policyId: string): Promise<void>;
checkCompliance(assetId: string): Promise<ComplianceCheck[]>;
updateRetentionPolicy(assetId: string, retention: RetentionPolicy): Promise<void>;
updateAccessControl(assetId: string, access: AccessControl): Promise<void>;
updatePrivacySettings(assetId: string, privacy: PrivacySettings): Promise<void>;
auditAccess(assetId: string, userId: string, action: string, details?: any): Promise<void>;
getAuditTrail(assetId: string, filters?: Record<string, any>): Promise<AuditEntry[]>;
generateComplianceReport(assetIds: string[]): Promise<any>;
validateDataAccess(assetId: string, userId: string, action: string): Promise<boolean>;
anonymizeData(assetId: string, options?: any): Promise<void>;
handleDataSubjectRequest(assetId: string, request: any): Promise<any>;
}
export class DataGovernanceServiceImpl implements DataGovernanceService {
private policies: Map<string, GovernancePolicy> = new Map();
private governance: Map<string, DataGovernance> = new Map();
private assets: Map<string, DataAsset> = new Map();
constructor(
private eventBus: EventBus,
private logger: Logger
) {
this.initializeDefaultPolicies();
}
async createPolicy(policy: Omit<GovernancePolicy, 'id' | 'createdAt' | 'updatedAt'>): Promise<GovernancePolicy> {
try {
const fullPolicy: GovernancePolicy = {
...policy,
id: this.generateId(),
createdAt: new Date(),
updatedAt: new Date()
};
this.policies.set(fullPolicy.id, fullPolicy);
this.logger.info('Governance policy created', {
policyId: fullPolicy.id,
name: fullPolicy.name,
type: fullPolicy.type
});
await this.eventBus.emit('data.governance.policy.created', {
policy: fullPolicy,
timestamp: new Date()
});
return fullPolicy;
} catch (error) {
this.logger.error('Failed to create governance policy', { policy, error });
throw error;
}
}
async updatePolicy(policyId: string, updates: Partial<GovernancePolicy>): Promise<GovernancePolicy | null> {
try {
const policy = this.policies.get(policyId);
if (!policy) {
return null;
}
const updatedPolicy: GovernancePolicy = {
...policy,
...updates,
updatedAt: new Date()
};
this.policies.set(policyId, updatedPolicy);
this.logger.info('Governance policy updated', { policyId, changes: updates });
await this.eventBus.emit('data.governance.policy.updated', {
policy: updatedPolicy,
changes: updates,
timestamp: new Date()
});
return updatedPolicy;
} catch (error) {
this.logger.error('Failed to update governance policy', { policyId, updates, error });
throw error;
}
}
async deletePolicy(policyId: string): Promise<void> {
try {
const policy = this.policies.get(policyId);
if (!policy) {
throw new Error(`Policy with id ${policyId} not found`);
}
this.policies.delete(policyId);
// Remove policy from all assets
for (const [assetId, governance] of this.governance) {
governance.policies = governance.policies.filter(p => p.id !== policyId);
this.governance.set(assetId, governance);
}
this.logger.info('Governance policy deleted', { policyId });
await this.eventBus.emit('data.governance.policy.deleted', {
policyId,
policy,
timestamp: new Date()
});
} catch (error) {
this.logger.error('Failed to delete governance policy', { policyId, error });
throw error;
}
}
async getPolicy(policyId: string): Promise<GovernancePolicy | null> {
try {
return this.policies.get(policyId) || null;
} catch (error) {
this.logger.error('Failed to get governance policy', { policyId, error });
throw error;
}
}
async listPolicies(filters?: Record<string, any>): Promise<GovernancePolicy[]> {
try {
let policies = Array.from(this.policies.values());
if (filters) {
policies = policies.filter(policy => {
return Object.entries(filters).every(([key, value]) => {
if (key === 'type') return policy.type === value;
if (key === 'active') return policy.active === value;
if (key === 'classification') return policy.applicableClassifications?.includes(value);
return true;
});
});
}
return policies;
} catch (error) {
this.logger.error('Failed to list governance policies', { filters, error });
throw error;
}
}
async applyPolicy(assetId: string, policyId: string): Promise<void> {
try {
const policy = this.policies.get(policyId);
if (!policy) {
throw new Error(`Policy with id ${policyId} not found`);
}
let governance = this.governance.get(assetId);
if (!governance) {
governance = this.createEmptyGovernance(assetId);
}
// Check if policy is already applied
if (!governance.policies.find(p => p.id === policyId)) {
governance.policies.push(policy);
this.governance.set(assetId, governance);
// Perform compliance check after applying policy
await this.checkCompliance(assetId);
this.logger.info('Policy applied to asset', { assetId, policyId });
await this.eventBus.emit('data.governance.policy.applied', {
assetId,
policyId,
timestamp: new Date()
});
}
} catch (error) {
this.logger.error('Failed to apply policy to asset', { assetId, policyId, error });
throw error;
}
}
async removePolicy(assetId: string, policyId: string): Promise<void> {
try {
const governance = this.governance.get(assetId);
if (!governance) {
throw new Error(`Governance not found for asset ${assetId}`);
}
governance.policies = governance.policies.filter(p => p.id !== policyId);
this.governance.set(assetId, governance);
this.logger.info('Policy removed from asset', { assetId, policyId });
await this.eventBus.emit('data.governance.policy.removed', {
assetId,
policyId,
timestamp: new Date()
});
} catch (error) {
this.logger.error('Failed to remove policy from asset', { assetId, policyId, error });
throw error;
}
}
async checkCompliance(assetId: string): Promise<ComplianceCheck[]> {
try {
const governance = this.governance.get(assetId);
const asset = this.assets.get(assetId);
if (!governance || !asset) {
return [];
}
const complianceChecks: ComplianceCheck[] = [];
for (const policy of governance.policies) {
if (!policy.active) continue;
const check = await this.performComplianceCheck(asset, policy);
complianceChecks.push(check);
}
// Update governance with compliance results
governance.compliance = complianceChecks;
this.governance.set(assetId, governance);
// Log compliance issues
const failedChecks = complianceChecks.filter(check => check.status === 'failed');
if (failedChecks.length > 0) {
this.logger.warn('Compliance violations detected', {
assetId,
violationCount: failedChecks.length
});
await this.eventBus.emit('data.governance.compliance.violation', {
assetId,
violations: failedChecks,
timestamp: new Date()
});
}
return complianceChecks;
} catch (error) {
this.logger.error('Failed to check compliance', { assetId, error });
throw error;
}
}
async updateRetentionPolicy(assetId: string, retention: RetentionPolicy): Promise<void> {
try {
let governance = this.governance.get(assetId);
if (!governance) {
governance = this.createEmptyGovernance(assetId);
}
governance.retention = retention;
this.governance.set(assetId, governance);
this.logger.info('Retention policy updated', { assetId, retentionPeriod: retention.retentionPeriod });
await this.eventBus.emit('data.governance.retention.updated', {
assetId,
retention,
timestamp: new Date()
});
} catch (error) {
this.logger.error('Failed to update retention policy', { assetId, retention, error });
throw error;
}
}
async updateAccessControl(assetId: string, access: AccessControl): Promise<void> {
try {
let governance = this.governance.get(assetId);
if (!governance) {
governance = this.createEmptyGovernance(assetId);
}
governance.access = access;
this.governance.set(assetId, governance);
this.logger.info('Access control updated', { assetId, defaultAccess: access.defaultAccess });
await this.eventBus.emit('data.governance.access.updated', {
assetId,
access,
timestamp: new Date()
});
} catch (error) {
this.logger.error('Failed to update access control', { assetId, access, error });
throw error;
}
}
async updatePrivacySettings(assetId: string, privacy: PrivacySettings): Promise<void> {
try {
let governance = this.governance.get(assetId);
if (!governance) {
governance = this.createEmptyGovernance(assetId);
}
governance.privacy = privacy;
this.governance.set(assetId, governance);
this.logger.info('Privacy settings updated', {
assetId,
containsPII: privacy.containsPII,
consentRequired: privacy.consentRequired
});
await this.eventBus.emit('data.governance.privacy.updated', {
assetId,
privacy,
timestamp: new Date()
});
} catch (error) {
this.logger.error('Failed to update privacy settings', { assetId, privacy, error });
throw error;
}
}
async auditAccess(assetId: string, userId: string, action: string, details?: any): Promise<void> {
try {
let governance = this.governance.get(assetId);
if (!governance) {
governance = this.createEmptyGovernance(assetId);
}
const auditEntry: AuditEntry = {
id: this.generateId(),
userId,
action,
timestamp: new Date(),
ipAddress: details?.ipAddress,
userAgent: details?.userAgent,
details
};
governance.audit.push(auditEntry);
this.governance.set(assetId, governance);
this.logger.info('Access audited', { assetId, userId, action });
await this.eventBus.emit('data.governance.access.audited', {
assetId,
auditEntry,
timestamp: new Date()
});
} catch (error) {
this.logger.error('Failed to audit access', { assetId, userId, action, error });
throw error;
}
}
async getAuditTrail(assetId: string, filters?: Record<string, any>): Promise<AuditEntry[]> {
try {
const governance = this.governance.get(assetId);
if (!governance) {
return [];
}
let auditEntries = governance.audit;
if (filters) {
auditEntries = auditEntries.filter(entry => {
return Object.entries(filters).every(([key, value]) => {
if (key === 'userId') return entry.userId === value;
if (key === 'action') return entry.action === value;
if (key === 'fromDate') return entry.timestamp >= new Date(value);
if (key === 'toDate') return entry.timestamp <= new Date(value);
return true;
});
});
}
return auditEntries.sort((a, b) => b.timestamp.getTime() - a.timestamp.getTime());
} catch (error) {
this.logger.error('Failed to get audit trail', { assetId, filters, error });
throw error;
}
}
async generateComplianceReport(assetIds: string[]): Promise<any> {
try {
const reportData = {
summary: {
totalAssets: assetIds.length,
compliantAssets: 0,
nonCompliantAssets: 0,
violationCount: 0,
reportDate: new Date()
},
assetCompliance: [] as any[],
policyViolations: [] as any[],
recommendations: [] as string[]
};
let totalViolations = 0;
for (const assetId of assetIds) {
const governance = this.governance.get(assetId);
const asset = this.assets.get(assetId);
if (governance && asset) {
const complianceChecks = await this.checkCompliance(assetId);
const violations = complianceChecks.filter(check => check.status === 'failed');
const isCompliant = violations.length === 0;
if (isCompliant) {
reportData.summary.compliantAssets++;
} else {
reportData.summary.nonCompliantAssets++;
}
totalViolations += violations.length;
reportData.assetCompliance.push({
assetId,
assetName: asset.name,
classification: asset.classification,
compliant: isCompliant,
violationCount: violations.length,
policiesApplied: governance.policies.length,
lastChecked: new Date()
});
// Add violations to report
violations.forEach(violation => {
reportData.policyViolations.push({
assetId,
assetName: asset.name,
policyName: violation.policyName,
violation: violation.details,
severity: violation.severity || 'medium',
checkedAt: violation.checkedAt
});
});
}
}
reportData.summary.violationCount = totalViolations;
// Generate recommendations
reportData.recommendations = this.generateComplianceRecommendations(reportData);
this.logger.info('Compliance report generated', {
totalAssets: assetIds.length,
compliantAssets: reportData.summary.compliantAssets,
violationCount: totalViolations
});
return reportData;
} catch (error) {
this.logger.error('Failed to generate compliance report', { assetIds, error });
throw error;
}
}
async validateDataAccess(assetId: string, userId: string, action: string): Promise<boolean> {
try {
const governance = this.governance.get(assetId);
const asset = this.assets.get(assetId);
if (!governance || !asset) {
return false;
}
// Check default access
if (governance.access.defaultAccess === 'none') {
// Must have explicit permission
const hasUserAccess = governance.access.users.some(user =>
user.userId === userId && user.permissions.includes(action)
);
const hasRoleAccess = governance.access.roles.some(role =>
role.permissions.includes(action) // Simplified - would check user roles
);
return hasUserAccess || hasRoleAccess;
}
// Check if explicitly denied
const isDenied = governance.access.users.some(user =>
user.userId === userId && user.permissions.includes(`deny:${action}`)
);
if (isDenied) {
return false;
}
// Check classification-based access
if (asset.classification === 'restricted' || asset.classification === 'confidential') {
// Require explicit permission for sensitive data
const hasPermission = governance.access.users.some(user =>
user.userId === userId && user.permissions.includes(action)
);
return hasPermission;
}
return true; // Default allow for non-sensitive data
} catch (error) {
this.logger.error('Failed to validate data access', { assetId, userId, action, error });
return false;
}
}
async anonymizeData(assetId: string, options?: any): Promise<void> {
try {
const governance = this.governance.get(assetId);
if (!governance || !governance.privacy.containsPII) {
return;
}
// Apply anonymization rules
for (const rule of governance.privacy.anonymizationRules) {
await this.applyAnonymizationRule(assetId, rule, options);
}
this.logger.info('Data anonymization completed', { assetId });
await this.eventBus.emit('data.governance.anonymization.completed', {
assetId,
options,
timestamp: new Date()
});
} catch (error) {
this.logger.error('Failed to anonymize data', { assetId, options, error });
throw error;
}
}
async handleDataSubjectRequest(assetId: string, request: any): Promise<any> {
try {
const governance = this.governance.get(assetId);
const asset = this.assets.get(assetId);
if (!governance || !asset) {
throw new Error(`Asset or governance not found for ${assetId}`);
}
let response: any = {};
switch (request.type) {
case 'access':
response = await this.handleAccessRequest(assetId, request);
break;
case 'rectification':
response = await this.handleRectificationRequest(assetId, request);
break;
case 'erasure':
response = await this.handleErasureRequest(assetId, request);
break;
case 'portability':
response = await this.handlePortabilityRequest(assetId, request);
break;
default:
throw new Error(`Unsupported request type: ${request.type}`);
}
this.logger.info('Data subject request handled', { assetId, requestType: request.type });
await this.eventBus.emit('data.governance.subject.request.handled', {
assetId,
request,
response,
timestamp: new Date()
});
return response;
} catch (error) {
this.logger.error('Failed to handle data subject request', { assetId, request, error });
throw error;
}
}
// Private helper methods
private initializeDefaultPolicies(): void {
const defaultPolicies: GovernancePolicy[] = [
{
id: 'policy_pii_protection',
name: 'PII Protection Policy',
description: 'Ensures proper handling of personally identifiable information',
type: 'privacy',
rules: [
'PII data must be encrypted at rest',
'PII access must be logged',
'PII retention must not exceed 7 years'
],
applicableClassifications: ['pii'],
active: true,
severity: 'high',
createdAt: new Date(),
updatedAt: new Date()
},
{
id: 'policy_financial_compliance',
name: 'Financial Data Compliance',
description: 'Compliance with financial regulations',
type: 'compliance',
rules: [
'Financial data must be retained for 7 years',
'Access to financial data must be role-based',
'All financial data access must be audited'
],
applicableClassifications: ['financial'],
active: true,
severity: 'critical',
createdAt: new Date(),
updatedAt: new Date()
}
];
defaultPolicies.forEach(policy => {
this.policies.set(policy.id, policy);
});
}
private createEmptyGovernance(assetId: string): DataGovernance {
return {
id: this.generateId(),
assetId,
policies: [],
compliance: [],
retention: {
retentionPeriod: 365,
retentionReason: 'Business requirement',
legalHold: false
},
access: {
defaultAccess: 'none',
roles: [],
users: []
},
privacy: {
containsPII: false,
sensitiveFields: [],
anonymizationRules: [],
consentRequired: false,
dataSubjectRights: []
},
audit: []
};
}
private async performComplianceCheck(asset: DataAsset, policy: GovernancePolicy): Promise<ComplianceCheck> {
// Mock compliance check implementation
// In real scenario, this would validate actual compliance
const isCompliant = Math.random() > 0.1; // 90% compliance rate for demo
const check: ComplianceCheck = {
id: this.generateId(),
policyId: policy.id,
policyName: policy.name,
status: isCompliant ? 'passed' : 'failed',
checkedAt: new Date(),
details: isCompliant ? 'All policy requirements met' : 'Policy violation detected',
severity: policy.severity
};
if (!isCompliant) {
check.recommendations = [
'Review data handling procedures',
'Update access controls',
'Implement additional monitoring'
];
}
return check;
}
private async applyAnonymizationRule(assetId: string, rule: any, options?: any): Promise<void> {
// Mock anonymization implementation
this.logger.info('Applying anonymization rule', { assetId, rule: rule.type });
}
private async handleAccessRequest(assetId: string, request: any): Promise<any> {
return {
status: 'completed',
data: 'Data access provided according to privacy policy',
timestamp: new Date()
};
}
private async handleRectificationRequest(assetId: string, request: any): Promise<any> {
return {
status: 'completed',
changes: 'Data rectification completed',
timestamp: new Date()
};
}
private async handleErasureRequest(assetId: string, request: any): Promise<any> {
return {
status: 'completed',
erasure: 'Data erasure completed',
timestamp: new Date()
};
}
private async handlePortabilityRequest(assetId: string, request: any): Promise<any> {
return {
status: 'completed',
export: 'Data export provided',
timestamp: new Date()
};
}
private generateComplianceRecommendations(reportData: any): string[] {
const recommendations: string[] = [];
if (reportData.summary.nonCompliantAssets > 0) {
recommendations.push(`${reportData.summary.nonCompliantAssets} assets require compliance remediation.`);
}
if (reportData.summary.violationCount > 10) {
recommendations.push('High number of policy violations detected. Review governance policies and implementation.');
}
const criticalViolations = reportData.policyViolations.filter((v: any) => v.severity === 'critical');
if (criticalViolations.length > 0) {
recommendations.push(`${criticalViolations.length} critical violations require immediate attention.`);
}
if (recommendations.length === 0) {
recommendations.push('All assets are compliant with governance policies. Continue monitoring.');
}
return recommendations;
}
private generateId(): string {
return `governance_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
}
// Method to inject assets (typically from DataCatalogService)
setAssets(assets: Map<string, DataAsset>): void {
this.assets = assets;
}
// Method to inject governance (typically from DataCatalogService)
setGovernance(governance: Map<string, DataGovernance>): void {
this.governance = governance;
}
}

View file

@ -0,0 +1,607 @@
import { EventBus } from '@stock-bot/event-bus';
import { Logger } from '@stock-bot/utils';
import {
DataLineage,
DataAsset,
LineageTransformation,
ImpactAnalysis,
LineageQuery,
LineageDirection
} from '../types/DataCatalog';
export interface DataLineageService {
addLineage(lineage: DataLineage): Promise<void>;
getLineage(assetId: string): Promise<DataLineage | null>;
updateLineage(assetId: string, lineage: Partial<DataLineage>): Promise<DataLineage | null>;
addUpstreamDependency(assetId: string, upstreamAssetId: string, transformation?: LineageTransformation): Promise<void>;
addDownstreamDependency(assetId: string, downstreamAssetId: string, transformation?: LineageTransformation): Promise<void>;
removeUpstreamDependency(assetId: string, upstreamAssetId: string): Promise<void>;
removeDownstreamDependency(assetId: string, downstreamAssetId: string): Promise<void>;
getUpstreamAssets(assetId: string, depth?: number): Promise<DataAsset[]>;
getDownstreamAssets(assetId: string, depth?: number): Promise<DataAsset[]>;
analyzeImpact(assetId: string): Promise<ImpactAnalysis>;
queryLineage(query: LineageQuery): Promise<DataAsset[]>;
getLineageGraph(assetId: string, direction: LineageDirection, depth?: number): Promise<any>;
detectCircularDependencies(): Promise<string[][]>;
}
export class DataLineageServiceImpl implements DataLineageService {
private lineages: Map<string, DataLineage> = new Map();
private assets: Map<string, DataAsset> = new Map();
constructor(
private eventBus: EventBus,
private logger: Logger
) {}
async addLineage(lineage: DataLineage): Promise<void> {
try {
this.lineages.set(lineage.assetId, lineage);
this.logger.info('Data lineage added', {
assetId: lineage.assetId,
upstreamCount: lineage.upstreamAssets.length,
downstreamCount: lineage.downstreamAssets.length
});
await this.eventBus.emit('data.lineage.added', {
assetId: lineage.assetId,
lineage,
timestamp: new Date()
});
} catch (error) {
this.logger.error('Failed to add data lineage', { lineage, error });
throw error;
}
}
async getLineage(assetId: string): Promise<DataLineage | null> {
try {
return this.lineages.get(assetId) || null;
} catch (error) {
this.logger.error('Failed to get data lineage', { assetId, error });
throw error;
}
}
async updateLineage(assetId: string, lineage: Partial<DataLineage>): Promise<DataLineage | null> {
try {
const existingLineage = this.lineages.get(assetId);
if (!existingLineage) {
return null;
}
const updatedLineage: DataLineage = {
...existingLineage,
...lineage,
updatedAt: new Date()
};
this.lineages.set(assetId, updatedLineage);
this.logger.info('Data lineage updated', { assetId, changes: lineage });
await this.eventBus.emit('data.lineage.updated', {
assetId,
lineage: updatedLineage,
changes: lineage,
timestamp: new Date()
});
return updatedLineage;
} catch (error) {
this.logger.error('Failed to update data lineage', { assetId, lineage, error });
throw error;
}
}
async addUpstreamDependency(
assetId: string,
upstreamAssetId: string,
transformation?: LineageTransformation
): Promise<void> {
try {
let lineage = this.lineages.get(assetId);
if (!lineage) {
lineage = this.createEmptyLineage(assetId);
}
// Check if dependency already exists
if (!lineage.upstreamAssets.includes(upstreamAssetId)) {
lineage.upstreamAssets.push(upstreamAssetId);
if (transformation) {
lineage.transformations.push(transformation);
}
lineage.updatedAt = new Date();
this.lineages.set(assetId, lineage);
// Update downstream lineage of the upstream asset
await this.addDownstreamToUpstream(upstreamAssetId, assetId);
this.logger.info('Upstream dependency added', { assetId, upstreamAssetId });
await this.eventBus.emit('data.lineage.dependency.added', {
assetId,
upstreamAssetId,
transformation,
timestamp: new Date()
});
}
} catch (error) {
this.logger.error('Failed to add upstream dependency', { assetId, upstreamAssetId, error });
throw error;
}
}
async addDownstreamDependency(
assetId: string,
downstreamAssetId: string,
transformation?: LineageTransformation
): Promise<void> {
try {
let lineage = this.lineages.get(assetId);
if (!lineage) {
lineage = this.createEmptyLineage(assetId);
}
// Check if dependency already exists
if (!lineage.downstreamAssets.includes(downstreamAssetId)) {
lineage.downstreamAssets.push(downstreamAssetId);
lineage.updatedAt = new Date();
this.lineages.set(assetId, lineage);
// Update upstream lineage of the downstream asset
await this.addUpstreamToDownstream(downstreamAssetId, assetId, transformation);
this.logger.info('Downstream dependency added', { assetId, downstreamAssetId });
await this.eventBus.emit('data.lineage.dependency.added', {
assetId,
downstreamAssetId,
transformation,
timestamp: new Date()
});
}
} catch (error) {
this.logger.error('Failed to add downstream dependency', { assetId, downstreamAssetId, error });
throw error;
}
}
async removeUpstreamDependency(assetId: string, upstreamAssetId: string): Promise<void> {
try {
const lineage = this.lineages.get(assetId);
if (lineage) {
lineage.upstreamAssets = lineage.upstreamAssets.filter(id => id !== upstreamAssetId);
lineage.updatedAt = new Date();
this.lineages.set(assetId, lineage);
// Remove from downstream lineage of upstream asset
await this.removeDownstreamFromUpstream(upstreamAssetId, assetId);
this.logger.info('Upstream dependency removed', { assetId, upstreamAssetId });
await this.eventBus.emit('data.lineage.dependency.removed', {
assetId,
upstreamAssetId,
timestamp: new Date()
});
}
} catch (error) {
this.logger.error('Failed to remove upstream dependency', { assetId, upstreamAssetId, error });
throw error;
}
}
async removeDownstreamDependency(assetId: string, downstreamAssetId: string): Promise<void> {
try {
const lineage = this.lineages.get(assetId);
if (lineage) {
lineage.downstreamAssets = lineage.downstreamAssets.filter(id => id !== downstreamAssetId);
lineage.updatedAt = new Date();
this.lineages.set(assetId, lineage);
// Remove from upstream lineage of downstream asset
await this.removeUpstreamFromDownstream(downstreamAssetId, assetId);
this.logger.info('Downstream dependency removed', { assetId, downstreamAssetId });
await this.eventBus.emit('data.lineage.dependency.removed', {
assetId,
downstreamAssetId,
timestamp: new Date()
});
}
} catch (error) {
this.logger.error('Failed to remove downstream dependency', { assetId, downstreamAssetId, error });
throw error;
}
}
async getUpstreamAssets(assetId: string, depth: number = 1): Promise<DataAsset[]> {
try {
const visited = new Set<string>();
const result: DataAsset[] = [];
await this.traverseUpstream(assetId, depth, visited, result);
return result;
} catch (error) {
this.logger.error('Failed to get upstream assets', { assetId, depth, error });
throw error;
}
}
async getDownstreamAssets(assetId: string, depth: number = 1): Promise<DataAsset[]> {
try {
const visited = new Set<string>();
const result: DataAsset[] = [];
await this.traverseDownstream(assetId, depth, visited, result);
return result;
} catch (error) {
this.logger.error('Failed to get downstream assets', { assetId, depth, error });
throw error;
}
}
async analyzeImpact(assetId: string): Promise<ImpactAnalysis> {
try {
const downstreamAssets = await this.getDownstreamAssets(assetId, 5); // Go deep for impact analysis
const affectedUsers = new Set<string>();
// Collect all users who might be affected
for (const asset of downstreamAssets) {
affectedUsers.add(asset.owner);
if (asset.steward) {
affectedUsers.add(asset.steward);
}
// Add users from usage analytics
asset.usage.topUsers.forEach(user => affectedUsers.add(user.userId));
}
// Calculate impact level
let estimatedImpact: 'low' | 'medium' | 'high' | 'critical' = 'low';
if (downstreamAssets.length > 20) {
estimatedImpact = 'critical';
} else if (downstreamAssets.length > 10) {
estimatedImpact = 'high';
} else if (downstreamAssets.length > 5) {
estimatedImpact = 'medium';
}
const impact: ImpactAnalysis = {
downstreamAssets: downstreamAssets.map(asset => asset.id),
affectedUsers: Array.from(affectedUsers),
estimatedImpact,
impactDescription: this.generateImpactDescription(downstreamAssets.length, Array.from(affectedUsers).length),
recommendations: this.generateRecommendations(estimatedImpact, downstreamAssets.length)
};
this.logger.info('Impact analysis completed', {
assetId,
impactLevel: estimatedImpact,
affectedAssets: downstreamAssets.length,
affectedUsers: affectedUsers.size
});
return impact;
} catch (error) {
this.logger.error('Failed to analyze impact', { assetId, error });
throw error;
}
}
async queryLineage(query: LineageQuery): Promise<DataAsset[]> {
try {
let results: DataAsset[] = [];
if (query.assetIds) {
for (const assetId of query.assetIds) {
if (query.direction === 'upstream' || query.direction === 'both') {
const upstream = await this.getUpstreamAssets(assetId, query.depth);
results.push(...upstream);
}
if (query.direction === 'downstream' || query.direction === 'both') {
const downstream = await this.getDownstreamAssets(assetId, query.depth);
results.push(...downstream);
}
}
}
// Remove duplicates
const uniqueResults = results.filter((asset, index, arr) =>
arr.findIndex(a => a.id === asset.id) === index
);
return uniqueResults;
} catch (error) {
this.logger.error('Failed to query lineage', { query, error });
throw error;
}
}
async getLineageGraph(assetId: string, direction: LineageDirection, depth: number = 3): Promise<any> {
try {
const graph = {
nodes: new Map(),
edges: []
};
const visited = new Set<string>();
await this.buildLineageGraph(assetId, direction, depth, visited, graph);
return {
nodes: Array.from(graph.nodes.values()),
edges: graph.edges
};
} catch (error) {
this.logger.error('Failed to get lineage graph', { assetId, direction, depth, error });
throw error;
}
}
async detectCircularDependencies(): Promise<string[][]> {
try {
const cycles: string[][] = [];
const visited = new Set<string>();
const recursionStack = new Set<string>();
for (const assetId of this.lineages.keys()) {
if (!visited.has(assetId)) {
const path: string[] = [];
await this.detectCycleDFS(assetId, visited, recursionStack, path, cycles);
}
}
if (cycles.length > 0) {
this.logger.warn('Circular dependencies detected', { cycleCount: cycles.length });
}
return cycles;
} catch (error) {
this.logger.error('Failed to detect circular dependencies', { error });
throw error;
}
}
// Private helper methods
private createEmptyLineage(assetId: string): DataLineage {
return {
id: this.generateId(),
assetId,
upstreamAssets: [],
downstreamAssets: [],
transformations: [],
impact: {
downstreamAssets: [],
affectedUsers: [],
estimatedImpact: 'low',
impactDescription: '',
recommendations: []
},
createdAt: new Date(),
updatedAt: new Date()
};
}
private async addDownstreamToUpstream(upstreamAssetId: string, downstreamAssetId: string): Promise<void> {
let upstreamLineage = this.lineages.get(upstreamAssetId);
if (!upstreamLineage) {
upstreamLineage = this.createEmptyLineage(upstreamAssetId);
}
if (!upstreamLineage.downstreamAssets.includes(downstreamAssetId)) {
upstreamLineage.downstreamAssets.push(downstreamAssetId);
upstreamLineage.updatedAt = new Date();
this.lineages.set(upstreamAssetId, upstreamLineage);
}
}
private async addUpstreamToDownstream(
downstreamAssetId: string,
upstreamAssetId: string,
transformation?: LineageTransformation
): Promise<void> {
let downstreamLineage = this.lineages.get(downstreamAssetId);
if (!downstreamLineage) {
downstreamLineage = this.createEmptyLineage(downstreamAssetId);
}
if (!downstreamLineage.upstreamAssets.includes(upstreamAssetId)) {
downstreamLineage.upstreamAssets.push(upstreamAssetId);
if (transformation) {
downstreamLineage.transformations.push(transformation);
}
downstreamLineage.updatedAt = new Date();
this.lineages.set(downstreamAssetId, downstreamLineage);
}
}
private async removeDownstreamFromUpstream(upstreamAssetId: string, downstreamAssetId: string): Promise<void> {
const upstreamLineage = this.lineages.get(upstreamAssetId);
if (upstreamLineage) {
upstreamLineage.downstreamAssets = upstreamLineage.downstreamAssets.filter(id => id !== downstreamAssetId);
upstreamLineage.updatedAt = new Date();
this.lineages.set(upstreamAssetId, upstreamLineage);
}
}
private async removeUpstreamFromDownstream(downstreamAssetId: string, upstreamAssetId: string): Promise<void> {
const downstreamLineage = this.lineages.get(downstreamAssetId);
if (downstreamLineage) {
downstreamLineage.upstreamAssets = downstreamLineage.upstreamAssets.filter(id => id !== upstreamAssetId);
downstreamLineage.updatedAt = new Date();
this.lineages.set(downstreamAssetId, downstreamLineage);
}
}
private async traverseUpstream(
assetId: string,
remainingDepth: number,
visited: Set<string>,
result: DataAsset[]
): Promise<void> {
if (remainingDepth === 0 || visited.has(assetId)) {
return;
}
visited.add(assetId);
const lineage = this.lineages.get(assetId);
if (lineage) {
for (const upstreamId of lineage.upstreamAssets) {
const asset = this.assets.get(upstreamId);
if (asset && !result.find(a => a.id === asset.id)) {
result.push(asset);
}
await this.traverseUpstream(upstreamId, remainingDepth - 1, visited, result);
}
}
}
private async traverseDownstream(
assetId: string,
remainingDepth: number,
visited: Set<string>,
result: DataAsset[]
): Promise<void> {
if (remainingDepth === 0 || visited.has(assetId)) {
return;
}
visited.add(assetId);
const lineage = this.lineages.get(assetId);
if (lineage) {
for (const downstreamId of lineage.downstreamAssets) {
const asset = this.assets.get(downstreamId);
if (asset && !result.find(a => a.id === asset.id)) {
result.push(asset);
}
await this.traverseDownstream(downstreamId, remainingDepth - 1, visited, result);
}
}
}
private async buildLineageGraph(
assetId: string,
direction: LineageDirection,
remainingDepth: number,
visited: Set<string>,
graph: any
): Promise<void> {
if (remainingDepth === 0 || visited.has(assetId)) {
return;
}
visited.add(assetId);
const asset = this.assets.get(assetId);
const lineage = this.lineages.get(assetId);
if (asset) {
graph.nodes.set(assetId, {
id: assetId,
name: asset.name,
type: asset.type,
classification: asset.classification
});
}
if (lineage) {
if (direction === 'upstream' || direction === 'both') {
for (const upstreamId of lineage.upstreamAssets) {
graph.edges.push({
source: upstreamId,
target: assetId,
type: 'upstream'
});
await this.buildLineageGraph(upstreamId, direction, remainingDepth - 1, visited, graph);
}
}
if (direction === 'downstream' || direction === 'both') {
for (const downstreamId of lineage.downstreamAssets) {
graph.edges.push({
source: assetId,
target: downstreamId,
type: 'downstream'
});
await this.buildLineageGraph(downstreamId, direction, remainingDepth - 1, visited, graph);
}
}
}
}
private async detectCycleDFS(
assetId: string,
visited: Set<string>,
recursionStack: Set<string>,
path: string[],
cycles: string[][]
): Promise<void> {
visited.add(assetId);
recursionStack.add(assetId);
path.push(assetId);
const lineage = this.lineages.get(assetId);
if (lineage) {
for (const downstreamId of lineage.downstreamAssets) {
if (!visited.has(downstreamId)) {
await this.detectCycleDFS(downstreamId, visited, recursionStack, path, cycles);
} else if (recursionStack.has(downstreamId)) {
// Found a cycle
const cycleStart = path.indexOf(downstreamId);
cycles.push(path.slice(cycleStart));
}
}
}
path.pop();
recursionStack.delete(assetId);
}
private generateImpactDescription(assetCount: number, userCount: number): string {
if (assetCount === 0) {
return 'No downstream dependencies identified.';
}
return `Changes to this asset may affect ${assetCount} downstream asset(s) and ${userCount} user(s).`;
}
private generateRecommendations(impact: string, assetCount: number): string[] {
const recommendations: string[] = [];
if (impact === 'critical') {
recommendations.push('Schedule maintenance window');
recommendations.push('Notify all stakeholders in advance');
recommendations.push('Prepare rollback plan');
recommendations.push('Consider phased rollout');
} else if (impact === 'high') {
recommendations.push('Notify affected users');
recommendations.push('Test changes thoroughly');
recommendations.push('Monitor downstream systems');
} else if (impact === 'medium') {
recommendations.push('Test with subset of data');
recommendations.push('Monitor for issues');
} else {
recommendations.push('Standard testing procedures apply');
}
return recommendations;
}
private generateId(): string {
return `lineage_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
}
// Method to inject assets (typically from DataCatalogService)
setAssets(assets: Map<string, DataAsset>): void {
this.assets = assets;
}
}

View file

@ -0,0 +1,734 @@
import { EventBus } from '@stock-bot/event-bus';
import { Logger } from '@stock-bot/utils';
import {
DataQuality,
QualityDimension,
QualityRule,
QualityIssue,
QualityTrend,
DataAsset,
QualityAssessmentRequest,
QualityRuleType,
QualitySeverity
} from '../types/DataCatalog';
export interface DataQualityService {
assessQuality(assetId: string, request: QualityAssessmentRequest): Promise<DataQuality>;
getQuality(assetId: string): Promise<DataQuality | null>;
updateQuality(assetId: string, quality: Partial<DataQuality>): Promise<DataQuality | null>;
addQualityRule(assetId: string, rule: QualityRule): Promise<void>;
removeQualityRule(assetId: string, ruleId: string): Promise<void>;
validateRule(assetId: string, ruleId: string): Promise<boolean>;
reportIssue(assetId: string, issue: Omit<QualityIssue, 'id' | 'detectedAt'>): Promise<void>;
resolveIssue(assetId: string, issueId: string): Promise<void>;
getTrendAnalysis(assetId: string, timeframe: string): Promise<QualityTrend>;
getQualityMetrics(filters?: Record<string, any>): Promise<any>;
generateQualityReport(assetIds: string[]): Promise<any>;
}
export class DataQualityServiceImpl implements DataQualityService {
private qualities: Map<string, DataQuality> = new Map();
private assets: Map<string, DataAsset> = new Map();
constructor(
private eventBus: EventBus,
private logger: Logger
) {}
async assessQuality(assetId: string, request: QualityAssessmentRequest): Promise<DataQuality> {
try {
const asset = this.assets.get(assetId);
if (!asset) {
throw new Error(`Asset with id ${assetId} not found`);
}
let quality = this.qualities.get(assetId);
if (!quality) {
quality = this.createEmptyQuality(assetId);
}
// Perform quality assessment based on request
const assessmentResults = await this.performQualityAssessment(asset, request);
// Update quality metrics
quality.dimensions = assessmentResults.dimensions;
quality.overallScore = this.calculateOverallScore(assessmentResults.dimensions);
quality.lastAssessment = new Date();
// Update trend data
this.updateQualityTrend(quality, quality.overallScore);
this.qualities.set(assetId, quality);
this.logger.info('Quality assessment completed', {
assetId,
overallScore: quality.overallScore,
dimensionCount: quality.dimensions.length
});
await this.eventBus.emit('data.quality.assessed', {
assetId,
quality,
request,
timestamp: new Date()
});
return quality;
} catch (error) {
this.logger.error('Failed to assess quality', { assetId, request, error });
throw error;
}
}
async getQuality(assetId: string): Promise<DataQuality | null> {
try {
return this.qualities.get(assetId) || null;
} catch (error) {
this.logger.error('Failed to get quality', { assetId, error });
throw error;
}
}
async updateQuality(assetId: string, quality: Partial<DataQuality>): Promise<DataQuality | null> {
try {
const existingQuality = this.qualities.get(assetId);
if (!existingQuality) {
return null;
}
const updatedQuality: DataQuality = {
...existingQuality,
...quality,
lastAssessment: new Date()
};
this.qualities.set(assetId, updatedQuality);
this.logger.info('Quality updated', { assetId, changes: quality });
await this.eventBus.emit('data.quality.updated', {
assetId,
quality: updatedQuality,
changes: quality,
timestamp: new Date()
});
return updatedQuality;
} catch (error) {
this.logger.error('Failed to update quality', { assetId, quality, error });
throw error;
}
}
async addQualityRule(assetId: string, rule: QualityRule): Promise<void> {
try {
let quality = this.qualities.get(assetId);
if (!quality) {
quality = this.createEmptyQuality(assetId);
}
// Ensure rule has an ID
if (!rule.id) {
rule.id = this.generateId();
}
quality.rules.push(rule);
this.qualities.set(assetId, quality);
this.logger.info('Quality rule added', { assetId, ruleId: rule.id, ruleType: rule.type });
await this.eventBus.emit('data.quality.rule.added', {
assetId,
rule,
timestamp: new Date()
});
} catch (error) {
this.logger.error('Failed to add quality rule', { assetId, rule, error });
throw error;
}
}
async removeQualityRule(assetId: string, ruleId: string): Promise<void> {
try {
const quality = this.qualities.get(assetId);
if (!quality) {
throw new Error(`Quality not found for asset ${assetId}`);
}
quality.rules = quality.rules.filter(rule => rule.id !== ruleId);
this.qualities.set(assetId, quality);
this.logger.info('Quality rule removed', { assetId, ruleId });
await this.eventBus.emit('data.quality.rule.removed', {
assetId,
ruleId,
timestamp: new Date()
});
} catch (error) {
this.logger.error('Failed to remove quality rule', { assetId, ruleId, error });
throw error;
}
}
async validateRule(assetId: string, ruleId: string): Promise<boolean> {
try {
const quality = this.qualities.get(assetId);
const asset = this.assets.get(assetId);
if (!quality || !asset) {
return false;
}
const rule = quality.rules.find(r => r.id === ruleId);
if (!rule) {
return false;
}
const isValid = await this.executeQualityRule(asset, rule);
if (!isValid) {
// Create quality issue
const issue: QualityIssue = {
id: this.generateId(),
ruleId: rule.id,
type: rule.type,
severity: rule.severity,
message: `Quality rule validation failed: ${rule.description}`,
detectedAt: new Date(),
resolved: false
};
quality.issues.push(issue);
this.qualities.set(assetId, quality);
await this.eventBus.emit('data.quality.issue.detected', {
assetId,
issue,
rule,
timestamp: new Date()
});
}
return isValid;
} catch (error) {
this.logger.error('Failed to validate quality rule', { assetId, ruleId, error });
throw error;
}
}
async reportIssue(assetId: string, issue: Omit<QualityIssue, 'id' | 'detectedAt'>): Promise<void> {
try {
let quality = this.qualities.get(assetId);
if (!quality) {
quality = this.createEmptyQuality(assetId);
}
const fullIssue: QualityIssue = {
...issue,
id: this.generateId(),
detectedAt: new Date()
};
quality.issues.push(fullIssue);
this.qualities.set(assetId, quality);
this.logger.info('Quality issue reported', {
assetId,
issueId: fullIssue.id,
severity: fullIssue.severity
});
await this.eventBus.emit('data.quality.issue.reported', {
assetId,
issue: fullIssue,
timestamp: new Date()
});
} catch (error) {
this.logger.error('Failed to report quality issue', { assetId, issue, error });
throw error;
}
}
async resolveIssue(assetId: string, issueId: string): Promise<void> {
try {
const quality = this.qualities.get(assetId);
if (!quality) {
throw new Error(`Quality not found for asset ${assetId}`);
}
const issue = quality.issues.find(i => i.id === issueId);
if (!issue) {
throw new Error(`Issue ${issueId} not found for asset ${assetId}`);
}
issue.resolved = true;
issue.resolvedAt = new Date();
this.qualities.set(assetId, quality);
this.logger.info('Quality issue resolved', { assetId, issueId });
await this.eventBus.emit('data.quality.issue.resolved', {
assetId,
issue,
timestamp: new Date()
});
} catch (error) {
this.logger.error('Failed to resolve quality issue', { assetId, issueId, error });
throw error;
}
}
async getTrendAnalysis(assetId: string, timeframe: string): Promise<QualityTrend> {
try {
const quality = this.qualities.get(assetId);
if (!quality) {
throw new Error(`Quality not found for asset ${assetId}`);
}
// Filter trend data by timeframe
const filteredTrend = this.filterTrendByTimeframe(quality.trend, timeframe);
// Calculate trend direction and change rate
const trendAnalysis = this.analyzeTrend(filteredTrend.dataPoints);
return {
...filteredTrend,
trend: trendAnalysis.direction,
changeRate: trendAnalysis.changeRate
};
} catch (error) {
this.logger.error('Failed to get trend analysis', { assetId, timeframe, error });
throw error;
}
}
async getQualityMetrics(filters?: Record<string, any>): Promise<any> {
try {
let qualities = Array.from(this.qualities.values());
// Apply filters if provided
if (filters) {
const assets = Array.from(this.assets.values());
const filteredAssets = assets.filter(asset => {
return Object.entries(filters).every(([key, value]) => {
if (key === 'type') return asset.type === value;
if (key === 'owner') return asset.owner === value;
if (key === 'classification') return asset.classification === value;
return true;
});
});
qualities = qualities.filter(quality =>
filteredAssets.some(asset => asset.id === quality.assetId)
);
}
// Calculate aggregate metrics
const metrics = {
totalAssets: qualities.length,
averageQualityScore: this.calculateAverageScore(qualities),
qualityDistribution: this.calculateQualityDistribution(qualities),
topIssues: this.getTopQualityIssues(qualities),
trendSummary: this.getTrendSummary(qualities),
ruleCompliance: this.calculateRuleCompliance(qualities)
};
this.logger.info('Quality metrics calculated', {
totalAssets: metrics.totalAssets,
averageScore: metrics.averageQualityScore
});
return metrics;
} catch (error) {
this.logger.error('Failed to get quality metrics', { filters, error });
throw error;
}
}
async generateQualityReport(assetIds: string[]): Promise<any> {
try {
const reportData = {
summary: {
totalAssets: assetIds.length,
assessmentDate: new Date(),
averageScore: 0,
criticalIssues: 0,
highIssues: 0
},
assetDetails: [] as any[],
recommendations: [] as string[]
};
let totalScore = 0;
let criticalCount = 0;
let highCount = 0;
for (const assetId of assetIds) {
const quality = this.qualities.get(assetId);
const asset = this.assets.get(assetId);
if (quality && asset) {
totalScore += quality.overallScore;
const criticalIssuesCount = quality.issues.filter(i =>
i.severity === 'critical' && !i.resolved
).length;
const highIssuesCount = quality.issues.filter(i =>
i.severity === 'high' && !i.resolved
).length;
criticalCount += criticalIssuesCount;
highCount += highIssuesCount;
reportData.assetDetails.push({
assetId,
assetName: asset.name,
qualityScore: quality.overallScore,
dimensions: quality.dimensions,
openIssues: quality.issues.filter(i => !i.resolved).length,
criticalIssues: criticalIssuesCount,
highIssues: highIssuesCount,
lastAssessment: quality.lastAssessment
});
}
}
reportData.summary.averageScore = Math.round(totalScore / assetIds.length);
reportData.summary.criticalIssues = criticalCount;
reportData.summary.highIssues = highCount;
// Generate recommendations
reportData.recommendations = this.generateQualityRecommendations(reportData);
this.logger.info('Quality report generated', {
assetCount: assetIds.length,
averageScore: reportData.summary.averageScore,
criticalIssues: criticalCount
});
return reportData;
} catch (error) {
this.logger.error('Failed to generate quality report', { assetIds, error });
throw error;
}
}
// Private helper methods
private createEmptyQuality(assetId: string): DataQuality {
return {
id: this.generateId(),
assetId,
overallScore: 100,
dimensions: [],
rules: [],
issues: [],
trend: {
timeframe: 'week',
dataPoints: [],
trend: 'stable',
changeRate: 0
},
lastAssessment: new Date()
};
}
private async performQualityAssessment(
asset: DataAsset,
request: QualityAssessmentRequest
): Promise<{ dimensions: QualityDimension[] }> {
const dimensions: QualityDimension[] = [];
// Completeness assessment
if (request.checkCompleteness) {
const completeness = await this.assessCompleteness(asset);
dimensions.push(completeness);
}
// Accuracy assessment
if (request.checkAccuracy) {
const accuracy = await this.assessAccuracy(asset);
dimensions.push(accuracy);
}
// Consistency assessment
if (request.checkConsistency) {
const consistency = await this.assessConsistency(asset);
dimensions.push(consistency);
}
// Validity assessment
if (request.checkValidity) {
const validity = await this.assessValidity(asset);
dimensions.push(validity);
}
// Timeliness assessment
if (request.checkTimeliness) {
const timeliness = await this.assessTimeliness(asset);
dimensions.push(timeliness);
}
// Uniqueness assessment
if (request.checkUniqueness) {
const uniqueness = await this.assessUniqueness(asset);
dimensions.push(uniqueness);
}
return { dimensions };
}
private async assessCompleteness(asset: DataAsset): Promise<QualityDimension> {
// Mock implementation - in real scenario, this would analyze actual data
const score = Math.floor(Math.random() * 20) + 80; // 80-100
return {
name: 'completeness',
score,
description: 'Measures the degree to which data is complete',
rules: [`No null values in required fields`],
threshold: 95,
lastChecked: new Date()
};
}
private async assessAccuracy(asset: DataAsset): Promise<QualityDimension> {
const score = Math.floor(Math.random() * 15) + 85; // 85-100
return {
name: 'accuracy',
score,
description: 'Measures how well data represents real-world values',
rules: [`Values within expected ranges`, `Format validation`],
threshold: 90,
lastChecked: new Date()
};
}
private async assessConsistency(asset: DataAsset): Promise<QualityDimension> {
const score = Math.floor(Math.random() * 25) + 75; // 75-100
return {
name: 'consistency',
score,
description: 'Measures uniformity of data across datasets',
rules: [`Consistent data types`, `Standardized formats`],
threshold: 85,
lastChecked: new Date()
};
}
private async assessValidity(asset: DataAsset): Promise<QualityDimension> {
const score = Math.floor(Math.random() * 20) + 80; // 80-100
return {
name: 'validity',
score,
description: 'Measures conformity to defined business rules',
rules: [`Business rule compliance`, `Schema validation`],
threshold: 90,
lastChecked: new Date()
};
}
private async assessTimeliness(asset: DataAsset): Promise<QualityDimension> {
const score = Math.floor(Math.random() * 30) + 70; // 70-100
return {
name: 'timeliness',
score,
description: 'Measures how up-to-date the data is',
rules: [`Data refreshed within SLA`, `Timestamp validation`],
threshold: 85,
lastChecked: new Date()
};
}
private async assessUniqueness(asset: DataAsset): Promise<QualityDimension> {
const score = Math.floor(Math.random() * 25) + 75; // 75-100
return {
name: 'uniqueness',
score,
description: 'Measures absence of duplicate records',
rules: [`No duplicate primary keys`, `Unique constraints enforced`],
threshold: 95,
lastChecked: new Date()
};
}
private async executeQualityRule(asset: DataAsset, rule: QualityRule): Promise<boolean> {
// Mock implementation - in real scenario, this would execute the actual rule
// For demo purposes, randomly pass/fail rules
const passRate = rule.severity === 'critical' ? 0.9 : 0.95;
return Math.random() < passRate;
}
private calculateOverallScore(dimensions: QualityDimension[]): number {
if (dimensions.length === 0) return 100;
const totalScore = dimensions.reduce((sum, dim) => sum + dim.score, 0);
return Math.round(totalScore / dimensions.length);
}
private updateQualityTrend(quality: DataQuality, newScore: number): void {
quality.trend.dataPoints.push({
timestamp: new Date(),
value: newScore
});
// Keep only last 30 data points
if (quality.trend.dataPoints.length > 30) {
quality.trend.dataPoints = quality.trend.dataPoints.slice(-30);
}
// Update trend analysis
const trendAnalysis = this.analyzeTrend(quality.trend.dataPoints);
quality.trend.trend = trendAnalysis.direction;
quality.trend.changeRate = trendAnalysis.changeRate;
}
private filterTrendByTimeframe(trend: QualityTrend, timeframe: string): QualityTrend {
const now = new Date();
let cutoffDate: Date;
switch (timeframe) {
case 'day':
cutoffDate = new Date(now.getTime() - 24 * 60 * 60 * 1000);
break;
case 'week':
cutoffDate = new Date(now.getTime() - 7 * 24 * 60 * 60 * 1000);
break;
case 'month':
cutoffDate = new Date(now.getTime() - 30 * 24 * 60 * 60 * 1000);
break;
default:
cutoffDate = new Date(0); // All time
}
const filteredDataPoints = trend.dataPoints.filter(dp => dp.timestamp >= cutoffDate);
return {
...trend,
timeframe,
dataPoints: filteredDataPoints
};
}
private analyzeTrend(dataPoints: { timestamp: Date; value: number }[]): { direction: 'improving' | 'declining' | 'stable'; changeRate: number } {
if (dataPoints.length < 2) {
return { direction: 'stable', changeRate: 0 };
}
const values = dataPoints.map(dp => dp.value);
const firstValue = values[0];
const lastValue = values[values.length - 1];
const changeRate = ((lastValue - firstValue) / firstValue) * 100;
let direction: 'improving' | 'declining' | 'stable';
if (Math.abs(changeRate) < 2) {
direction = 'stable';
} else if (changeRate > 0) {
direction = 'improving';
} else {
direction = 'declining';
}
return { direction, changeRate: Math.round(changeRate * 100) / 100 };
}
private calculateAverageScore(qualities: DataQuality[]): number {
if (qualities.length === 0) return 0;
const totalScore = qualities.reduce((sum, quality) => sum + quality.overallScore, 0);
return Math.round(totalScore / qualities.length);
}
private calculateQualityDistribution(qualities: DataQuality[]): Record<string, number> {
const distribution = { excellent: 0, good: 0, fair: 0, poor: 0 };
qualities.forEach(quality => {
if (quality.overallScore >= 90) distribution.excellent++;
else if (quality.overallScore >= 80) distribution.good++;
else if (quality.overallScore >= 70) distribution.fair++;
else distribution.poor++;
});
return distribution;
}
private getTopQualityIssues(qualities: DataQuality[]): Array<{ type: string; count: number }> {
const issueTypes = new Map<string, number>();
qualities.forEach(quality => {
quality.issues.filter(issue => !issue.resolved).forEach(issue => {
issueTypes.set(issue.type, (issueTypes.get(issue.type) || 0) + 1);
});
});
return Array.from(issueTypes.entries())
.map(([type, count]) => ({ type, count }))
.sort((a, b) => b.count - a.count)
.slice(0, 5);
}
private getTrendSummary(qualities: DataQuality[]): Record<string, number> {
const trends = { improving: 0, declining: 0, stable: 0 };
qualities.forEach(quality => {
trends[quality.trend.trend]++;
});
return trends;
}
private calculateRuleCompliance(qualities: DataQuality[]): number {
let totalRules = 0;
let passedRules = 0;
qualities.forEach(quality => {
totalRules += quality.rules.length;
// Mock compliance calculation
passedRules += Math.floor(quality.rules.length * (quality.overallScore / 100));
});
return totalRules > 0 ? Math.round((passedRules / totalRules) * 100) : 100;
}
private generateQualityRecommendations(reportData: any): string[] {
const recommendations: string[] = [];
if (reportData.summary.averageScore < 80) {
recommendations.push('Overall data quality is below acceptable threshold. Consider implementing comprehensive data quality monitoring.');
}
if (reportData.summary.criticalIssues > 0) {
recommendations.push(`${reportData.summary.criticalIssues} critical quality issues require immediate attention.`);
}
if (reportData.summary.highIssues > 5) {
recommendations.push('High number of quality issues detected. Review data validation processes.');
}
// Asset-specific recommendations
const lowScoreAssets = reportData.assetDetails.filter((asset: any) => asset.qualityScore < 70);
if (lowScoreAssets.length > 0) {
recommendations.push(`${lowScoreAssets.length} assets have quality scores below 70% and need immediate remediation.`);
}
if (recommendations.length === 0) {
recommendations.push('Data quality is within acceptable ranges. Continue monitoring and maintain current practices.');
}
return recommendations;
}
private generateId(): string {
return `quality_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
}
// Method to inject assets (typically from DataCatalogService)
setAssets(assets: Map<string, DataAsset>): void {
this.assets = assets;
}
}

View file

@ -0,0 +1,801 @@
import { EventBus } from '@stock-bot/event-bus';
import { Logger } from '@stock-bot/utils';
import {
DataAsset,
SearchQuery,
SearchResult,
SearchFilters,
SearchSuggestion,
DataAssetType,
DataClassification
} from '../types/DataCatalog';
export interface SearchService {
search(query: SearchQuery): Promise<SearchResult>;
suggest(partial: string): Promise<SearchSuggestion[]>;
searchByFacets(facets: Record<string, string[]>): Promise<DataAsset[]>;
searchSimilar(assetId: string, limit?: number): Promise<DataAsset[]>;
getPopularSearches(limit?: number): Promise<string[]>;
getRecentSearches(userId: string, limit?: number): Promise<string[]>;
indexAsset(asset: DataAsset): Promise<void>;
removeFromIndex(assetId: string): Promise<void>;
reindexAll(): Promise<void>;
getSearchAnalytics(timeframe?: string): Promise<any>;
}
export class SearchServiceImpl implements SearchService {
private searchIndex: Map<string, DataAsset> = new Map();
private searchHistory: Array<{ query: string; userId?: string; timestamp: Date; resultCount: number }> = [];
private assets: Map<string, DataAsset> = new Map();
// In-memory inverted index for search
private wordToAssets: Map<string, Set<string>> = new Map();
private tagToAssets: Map<string, Set<string>> = new Map();
private typeToAssets: Map<string, Set<string>> = new Map();
private classificationToAssets: Map<string, Set<string>> = new Map();
private ownerToAssets: Map<string, Set<string>> = new Map();
constructor(
private eventBus: EventBus,
private logger: Logger
) {}
async search(query: SearchQuery): Promise<SearchResult> {
try {
const startTime = Date.now();
let results: DataAsset[] = [];
if (query.text) {
results = await this.performTextSearch(query.text);
} else {
results = Array.from(this.assets.values());
}
// Apply filters
if (query.filters) {
results = this.applyFilters(results, query.filters);
}
// Sort results
results = this.sortResults(results, query.sortBy, query.sortOrder);
// Apply pagination
const total = results.length;
const offset = query.offset || 0;
const limit = query.limit || 20;
const paginatedResults = results.slice(offset, offset + limit);
// Calculate facets
const facets = this.calculateFacets(results);
const searchTime = Date.now() - startTime;
const searchResult: SearchResult = {
assets: paginatedResults,
total,
offset,
limit,
searchTime,
facets,
suggestions: await this.generateSearchSuggestions(query.text || '', results)
};
// Record search in history
this.recordSearch(query.text || '', query.userId, total);
this.logger.info('Search completed', {
query: query.text,
resultCount: total,
searchTime
});
await this.eventBus.emit('data.catalog.search.performed', {
query,
resultCount: total,
searchTime,
timestamp: new Date()
});
return searchResult;
} catch (error) {
this.logger.error('Search failed', { query, error });
throw error;
}
}
async suggest(partial: string): Promise<SearchSuggestion[]> {
try {
const suggestions: SearchSuggestion[] = [];
const normalizedPartial = partial.toLowerCase().trim();
if (normalizedPartial.length < 2) {
return suggestions;
}
// Asset name suggestions
for (const asset of this.assets.values()) {
if (asset.name.toLowerCase().includes(normalizedPartial)) {
suggestions.push({
text: asset.name,
type: 'asset_name',
count: 1,
highlight: this.highlightMatch(asset.name, partial)
});
}
}
// Tag suggestions
const tagCounts = new Map<string, number>();
for (const asset of this.assets.values()) {
for (const tag of asset.tags) {
if (tag.toLowerCase().includes(normalizedPartial)) {
tagCounts.set(tag, (tagCounts.get(tag) || 0) + 1);
}
}
}
for (const [tag, count] of tagCounts) {
suggestions.push({
text: tag,
type: 'tag',
count,
highlight: this.highlightMatch(tag, partial)
});
}
// Owner suggestions
const ownerCounts = new Map<string, number>();
for (const asset of this.assets.values()) {
if (asset.owner.toLowerCase().includes(normalizedPartial)) {
ownerCounts.set(asset.owner, (ownerCounts.get(asset.owner) || 0) + 1);
}
}
for (const [owner, count] of ownerCounts) {
suggestions.push({
text: owner,
type: 'owner',
count,
highlight: this.highlightMatch(owner, partial)
});
}
// Popular search suggestions
const popularSearches = this.getPopularSearchTerms().filter(term =>
term.toLowerCase().includes(normalizedPartial)
);
for (const search of popularSearches.slice(0, 5)) {
suggestions.push({
text: search,
type: 'popular_search',
count: this.getSearchCount(search),
highlight: this.highlightMatch(search, partial)
});
}
// Sort by relevance and count
return suggestions
.sort((a, b) => {
// Prefer exact matches
const aExact = a.text.toLowerCase().startsWith(normalizedPartial) ? 1 : 0;
const bExact = b.text.toLowerCase().startsWith(normalizedPartial) ? 1 : 0;
if (aExact !== bExact) return bExact - aExact;
// Then by count
return b.count - a.count;
})
.slice(0, 10);
} catch (error) {
this.logger.error('Suggestion generation failed', { partial, error });
throw error;
}
}
async searchByFacets(facets: Record<string, string[]>): Promise<DataAsset[]> {
try {
let results: Set<string> = new Set();
let isFirstFacet = true;
for (const [facetType, values] of Object.entries(facets)) {
const facetResults = new Set<string>();
for (const value of values) {
let assetIds: Set<string> | undefined;
switch (facetType) {
case 'type':
assetIds = this.typeToAssets.get(value);
break;
case 'classification':
assetIds = this.classificationToAssets.get(value);
break;
case 'owner':
assetIds = this.ownerToAssets.get(value);
break;
case 'tags':
assetIds = this.tagToAssets.get(value);
break;
}
if (assetIds) {
for (const assetId of assetIds) {
facetResults.add(assetId);
}
}
}
if (isFirstFacet) {
results = facetResults;
isFirstFacet = false;
} else {
// Intersection of results
results = new Set([...results].filter(id => facetResults.has(id)));
}
}
const assets = Array.from(results)
.map(id => this.assets.get(id))
.filter((asset): asset is DataAsset => asset !== undefined);
this.logger.info('Facet search completed', {
facets,
resultCount: assets.length
});
return assets;
} catch (error) {
this.logger.error('Facet search failed', { facets, error });
throw error;
}
}
async searchSimilar(assetId: string, limit: number = 10): Promise<DataAsset[]> {
try {
const targetAsset = this.assets.get(assetId);
if (!targetAsset) {
return [];
}
const similarities: Array<{ asset: DataAsset; score: number }> = [];
for (const asset of this.assets.values()) {
if (asset.id === assetId) continue;
const score = this.calculateSimilarity(targetAsset, asset);
if (score > 0.1) { // Minimum similarity threshold
similarities.push({ asset, score });
}
}
// Sort by similarity score and return top results
const results = similarities
.sort((a, b) => b.score - a.score)
.slice(0, limit)
.map(item => item.asset);
this.logger.info('Similar assets found', {
assetId,
similarCount: results.length
});
return results;
} catch (error) {
this.logger.error('Similar asset search failed', { assetId, error });
throw error;
}
}
async getPopularSearches(limit: number = 10): Promise<string[]> {
try {
const searchCounts = new Map<string, number>();
// Count search frequency
for (const search of this.searchHistory) {
if (search.query) {
searchCounts.set(search.query, (searchCounts.get(search.query) || 0) + 1);
}
}
// Sort by frequency and return top searches
return Array.from(searchCounts.entries())
.sort((a, b) => b[1] - a[1])
.slice(0, limit)
.map(([query]) => query);
} catch (error) {
this.logger.error('Failed to get popular searches', { error });
throw error;
}
}
async getRecentSearches(userId: string, limit: number = 10): Promise<string[]> {
try {
return this.searchHistory
.filter(search => search.userId === userId && search.query)
.sort((a, b) => b.timestamp.getTime() - a.timestamp.getTime())
.slice(0, limit)
.map(search => search.query);
} catch (error) {
this.logger.error('Failed to get recent searches', { userId, error });
throw error;
}
}
async indexAsset(asset: DataAsset): Promise<void> {
try {
// Add to main index
this.searchIndex.set(asset.id, asset);
this.assets.set(asset.id, asset);
// Update inverted indices
this.updateInvertedIndices(asset);
this.logger.debug('Asset indexed', { assetId: asset.id, name: asset.name });
await this.eventBus.emit('data.catalog.asset.indexed', {
assetId: asset.id,
timestamp: new Date()
});
} catch (error) {
this.logger.error('Failed to index asset', { asset, error });
throw error;
}
}
async removeFromIndex(assetId: string): Promise<void> {
try {
const asset = this.searchIndex.get(assetId);
if (!asset) {
return;
}
// Remove from main index
this.searchIndex.delete(assetId);
this.assets.delete(assetId);
// Remove from inverted indices
this.removeFromInvertedIndices(asset);
this.logger.debug('Asset removed from index', { assetId });
await this.eventBus.emit('data.catalog.asset.unindexed', {
assetId,
timestamp: new Date()
});
} catch (error) {
this.logger.error('Failed to remove asset from index', { assetId, error });
throw error;
}
}
async reindexAll(): Promise<void> {
try {
// Clear all indices
this.searchIndex.clear();
this.wordToAssets.clear();
this.tagToAssets.clear();
this.typeToAssets.clear();
this.classificationToAssets.clear();
this.ownerToAssets.clear();
// Reindex all assets
for (const asset of this.assets.values()) {
await this.indexAsset(asset);
}
this.logger.info('Search index rebuilt', { assetCount: this.assets.size });
await this.eventBus.emit('data.catalog.index.rebuilt', {
assetCount: this.assets.size,
timestamp: new Date()
});
} catch (error) {
this.logger.error('Failed to rebuild search index', { error });
throw error;
}
}
async getSearchAnalytics(timeframe: string = 'week'): Promise<any> {
try {
const now = new Date();
let cutoffDate: Date;
switch (timeframe) {
case 'day':
cutoffDate = new Date(now.getTime() - 24 * 60 * 60 * 1000);
break;
case 'week':
cutoffDate = new Date(now.getTime() - 7 * 24 * 60 * 60 * 1000);
break;
case 'month':
cutoffDate = new Date(now.getTime() - 30 * 24 * 60 * 60 * 1000);
break;
default:
cutoffDate = new Date(0);
}
const recentSearches = this.searchHistory.filter(search => search.timestamp >= cutoffDate);
const analytics = {
totalSearches: recentSearches.length,
uniqueQueries: new Set(recentSearches.map(s => s.query)).size,
averageResults: recentSearches.length > 0 ?
recentSearches.reduce((sum, s) => sum + s.resultCount, 0) / recentSearches.length : 0,
noResultQueries: recentSearches.filter(s => s.resultCount === 0).length,
topQueries: this.getTopQueries(recentSearches, 10),
searchTrend: this.calculateSearchTrend(recentSearches, timeframe),
facetUsage: this.getFacetUsage(recentSearches)
};
return analytics;
} catch (error) {
this.logger.error('Failed to get search analytics', { timeframe, error });
throw error;
}
}
// Private helper methods
private async performTextSearch(text: string): Promise<DataAsset[]> {
const words = this.tokenize(text);
const assetScores = new Map<string, number>();
for (const word of words) {
const assetIds = this.wordToAssets.get(word) || new Set();
for (const assetId of assetIds) {
assetScores.set(assetId, (assetScores.get(assetId) || 0) + 1);
}
}
// Sort by relevance score
const sortedAssetIds = Array.from(assetScores.entries())
.sort((a, b) => b[1] - a[1])
.map(([assetId]) => assetId);
return sortedAssetIds
.map(id => this.assets.get(id))
.filter((asset): asset is DataAsset => asset !== undefined);
}
private applyFilters(assets: DataAsset[], filters: SearchFilters): DataAsset[] {
return assets.filter(asset => {
if (filters.types && filters.types.length > 0) {
if (!filters.types.includes(asset.type)) return false;
}
if (filters.classifications && filters.classifications.length > 0) {
if (!filters.classifications.includes(asset.classification)) return false;
}
if (filters.owners && filters.owners.length > 0) {
if (!filters.owners.includes(asset.owner)) return false;
}
if (filters.tags && filters.tags.length > 0) {
if (!filters.tags.some(tag => asset.tags.includes(tag))) return false;
}
if (filters.createdAfter) {
if (asset.createdAt < filters.createdAfter) return false;
}
if (filters.createdBefore) {
if (asset.createdAt > filters.createdBefore) return false;
}
return true;
});
}
private sortResults(assets: DataAsset[], sortBy?: string, sortOrder?: 'asc' | 'desc'): DataAsset[] {
if (!sortBy) {
return assets; // Return as-is (relevance order)
}
const order = sortOrder === 'desc' ? -1 : 1;
return assets.sort((a, b) => {
let comparison = 0;
switch (sortBy) {
case 'name':
comparison = a.name.localeCompare(b.name);
break;
case 'createdAt':
comparison = a.createdAt.getTime() - b.createdAt.getTime();
break;
case 'updatedAt':
comparison = a.updatedAt.getTime() - b.updatedAt.getTime();
break;
case 'lastAccessed':
const aAccessed = a.lastAccessed?.getTime() || 0;
const bAccessed = b.lastAccessed?.getTime() || 0;
comparison = aAccessed - bAccessed;
break;
case 'usage':
comparison = a.usage.accessCount - b.usage.accessCount;
break;
default:
comparison = 0;
}
return comparison * order;
});
}
private calculateFacets(assets: DataAsset[]): Record<string, Array<{ value: string; count: number }>> {
const facets: Record<string, Map<string, number>> = {
types: new Map(),
classifications: new Map(),
owners: new Map(),
tags: new Map()
};
for (const asset of assets) {
// Type facet
facets.types.set(asset.type, (facets.types.get(asset.type) || 0) + 1);
// Classification facet
facets.classifications.set(asset.classification, (facets.classifications.get(asset.classification) || 0) + 1);
// Owner facet
facets.owners.set(asset.owner, (facets.owners.get(asset.owner) || 0) + 1);
// Tags facet
for (const tag of asset.tags) {
facets.tags.set(tag, (facets.tags.get(tag) || 0) + 1);
}
}
// Convert to required format
const result: Record<string, Array<{ value: string; count: number }>> = {};
for (const [facetName, facetMap] of Object.entries(facets)) {
result[facetName] = Array.from(facetMap.entries())
.map(([value, count]) => ({ value, count }))
.sort((a, b) => b.count - a.count);
}
return result;
}
private async generateSearchSuggestions(query: string, results: DataAsset[]): Promise<string[]> {
if (!query || results.length === 0) {
return [];
}
const suggestions: string[] = [];
// Extract common tags from results
const tagCounts = new Map<string, number>();
for (const asset of results.slice(0, 10)) { // Top 10 results
for (const tag of asset.tags) {
tagCounts.set(tag, (tagCounts.get(tag) || 0) + 1);
}
}
// Add top tags as suggestions
const topTags = Array.from(tagCounts.entries())
.sort((a, b) => b[1] - a[1])
.slice(0, 3)
.map(([tag]) => `${query} ${tag}`);
suggestions.push(...topTags);
return suggestions;
}
private updateInvertedIndices(asset: DataAsset): void {
// Index words from name and description
const words = [
...this.tokenize(asset.name),
...this.tokenize(asset.description)
];
for (const word of words) {
if (!this.wordToAssets.has(word)) {
this.wordToAssets.set(word, new Set());
}
this.wordToAssets.get(word)!.add(asset.id);
}
// Index tags
for (const tag of asset.tags) {
if (!this.tagToAssets.has(tag)) {
this.tagToAssets.set(tag, new Set());
}
this.tagToAssets.get(tag)!.add(asset.id);
}
// Index type
if (!this.typeToAssets.has(asset.type)) {
this.typeToAssets.set(asset.type, new Set());
}
this.typeToAssets.get(asset.type)!.add(asset.id);
// Index classification
if (!this.classificationToAssets.has(asset.classification)) {
this.classificationToAssets.set(asset.classification, new Set());
}
this.classificationToAssets.get(asset.classification)!.add(asset.id);
// Index owner
if (!this.ownerToAssets.has(asset.owner)) {
this.ownerToAssets.set(asset.owner, new Set());
}
this.ownerToAssets.get(asset.owner)!.add(asset.id);
}
private removeFromInvertedIndices(asset: DataAsset): void {
// Remove from word index
const words = [
...this.tokenize(asset.name),
...this.tokenize(asset.description)
];
for (const word of words) {
const assetSet = this.wordToAssets.get(word);
if (assetSet) {
assetSet.delete(asset.id);
if (assetSet.size === 0) {
this.wordToAssets.delete(word);
}
}
}
// Remove from other indices
this.removeFromIndex(this.tagToAssets, asset.tags, asset.id);
this.removeFromIndex(this.typeToAssets, [asset.type], asset.id);
this.removeFromIndex(this.classificationToAssets, [asset.classification], asset.id);
this.removeFromIndex(this.ownerToAssets, [asset.owner], asset.id);
}
private removeFromIndex(index: Map<string, Set<string>>, values: string[], assetId: string): void {
for (const value of values) {
const assetSet = index.get(value);
if (assetSet) {
assetSet.delete(assetId);
if (assetSet.size === 0) {
index.delete(value);
}
}
}
}
private tokenize(text: string): string[] {
return text
.toLowerCase()
.replace(/[^\w\s]/g, ' ')
.split(/\s+/)
.filter(word => word.length > 2);
}
private calculateSimilarity(asset1: DataAsset, asset2: DataAsset): number {
let score = 0;
// Type similarity
if (asset1.type === asset2.type) score += 0.3;
// Classification similarity
if (asset1.classification === asset2.classification) score += 0.2;
// Owner similarity
if (asset1.owner === asset2.owner) score += 0.1;
// Tag similarity (Jaccard similarity)
const tags1 = new Set(asset1.tags);
const tags2 = new Set(asset2.tags);
const intersection = new Set([...tags1].filter(tag => tags2.has(tag)));
const union = new Set([...tags1, ...tags2]);
if (union.size > 0) {
score += (intersection.size / union.size) * 0.4;
}
return score;
}
private highlightMatch(text: string, query: string): string {
const regex = new RegExp(`(${query})`, 'gi');
return text.replace(regex, '<mark>$1</mark>');
}
private recordSearch(query: string, userId?: string, resultCount: number = 0): void {
this.searchHistory.push({
query,
userId,
timestamp: new Date(),
resultCount
});
// Keep only last 1000 searches
if (this.searchHistory.length > 1000) {
this.searchHistory = this.searchHistory.slice(-1000);
}
}
private getPopularSearchTerms(): string[] {
const searchCounts = new Map<string, number>();
for (const search of this.searchHistory) {
if (search.query) {
searchCounts.set(search.query, (searchCounts.get(search.query) || 0) + 1);
}
}
return Array.from(searchCounts.entries())
.sort((a, b) => b[1] - a[1])
.map(([query]) => query);
}
private getSearchCount(query: string): number {
return this.searchHistory.filter(search => search.query === query).length;
}
private getTopQueries(searches: any[], limit: number): Array<{ query: string; count: number }> {
const queryCounts = new Map<string, number>();
for (const search of searches) {
if (search.query) {
queryCounts.set(search.query, (queryCounts.get(search.query) || 0) + 1);
}
}
return Array.from(queryCounts.entries())
.map(([query, count]) => ({ query, count }))
.sort((a, b) => b.count - a.count)
.slice(0, limit);
}
private calculateSearchTrend(searches: any[], timeframe: string): any {
// Group searches by day
const dailyCounts = new Map<string, number>();
for (const search of searches) {
const day = search.timestamp.toISOString().split('T')[0];
dailyCounts.set(day, (dailyCounts.get(day) || 0) + 1);
}
const dataPoints = Array.from(dailyCounts.entries())
.map(([date, count]) => ({ date, count }))
.sort((a, b) => a.date.localeCompare(b.date));
return {
dataPoints,
trend: this.analyzeTrend(dataPoints.map(p => p.count))
};
}
private analyzeTrend(values: number[]): string {
if (values.length < 2) return 'stable';
const firstHalf = values.slice(0, Math.floor(values.length / 2));
const secondHalf = values.slice(Math.floor(values.length / 2));
const firstAvg = firstHalf.reduce((sum, val) => sum + val, 0) / firstHalf.length;
const secondAvg = secondHalf.reduce((sum, val) => sum + val, 0) / secondHalf.length;
const changePercent = ((secondAvg - firstAvg) / firstAvg) * 100;
if (Math.abs(changePercent) < 10) return 'stable';
return changePercent > 0 ? 'increasing' : 'decreasing';
}
private getFacetUsage(searches: any[]): Record<string, number> {
// Mock facet usage tracking
return {
types: Math.floor(searches.length * 0.3),
classifications: Math.floor(searches.length * 0.2),
owners: Math.floor(searches.length * 0.1),
tags: Math.floor(searches.length * 0.4)
};
}
// Method to inject assets (typically from DataCatalogService)
setAssets(assets: Map<string, DataAsset>): void {
this.assets = assets;
// Reindex all assets when assets are updated
this.reindexAll();
}
}