work up marketdatagateway
This commit is contained in:
parent
58ae897e90
commit
d22f7aafa0
17 changed files with 653 additions and 494 deletions
|
|
@ -1,5 +1,7 @@
|
|||
import { Context } from 'hono';
|
||||
import { logger } from '@stock-bot/utils';
|
||||
import { getLogger } from '@stock-bot/logger';
|
||||
|
||||
const logger = getLogger('HealthController');
|
||||
|
||||
export class HealthController {
|
||||
async getHealth(c: Context): Promise<Response> {
|
||||
|
|
|
|||
|
|
@ -1,5 +1,7 @@
|
|||
import { Context } from 'hono';
|
||||
import { logger } from '@stock-bot/utils';
|
||||
import { getLogger } from '@stock-bot/logger';
|
||||
|
||||
const logger = getLogger('JobController');
|
||||
import { DataPipelineOrchestrator } from '../core/DataPipelineOrchestrator';
|
||||
import { JobStatus } from '../types/DataPipeline';
|
||||
|
||||
|
|
|
|||
|
|
@ -1,8 +1,10 @@
|
|||
import { Context } from 'hono';
|
||||
import { logger } from '@stock-bot/utils';
|
||||
import { getLogger } from '@stock-bot/logger';
|
||||
import { DataPipelineOrchestrator } from '../core/DataPipelineOrchestrator';
|
||||
import { DataPipeline, PipelineStatus } from '../types/DataPipeline';
|
||||
|
||||
const logger = getLogger('pipeline-controller');
|
||||
|
||||
export class PipelineController {
|
||||
constructor(private orchestrator: DataPipelineOrchestrator) {}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
import { EventBus } from '@stock-bot/event-bus';
|
||||
import { logger } from '@stock-bot/utils';
|
||||
import { EventBus, EventBusConfig } from '@stock-bot/event-bus';
|
||||
import { DataPipelineEvent, DataJobEvent } from '@stock-bot/types';
|
||||
import { getLogger } from '@stock-bot/logger';
|
||||
import { DataPipeline, PipelineStatus, PipelineJob, JobStatus } from '../types/DataPipeline';
|
||||
import { DataIngestionService } from '../services/DataIngestionService';
|
||||
import { DataTransformationService } from '../services/DataTransformationService';
|
||||
|
|
@ -8,34 +9,39 @@ import { DataQualityService } from '../services/DataQualityService';
|
|||
import { PipelineScheduler } from './PipelineScheduler';
|
||||
import { JobQueue } from './JobQueue';
|
||||
|
||||
const logger = getLogger('data-pipeline-orchestrator');
|
||||
|
||||
export class DataPipelineOrchestrator {
|
||||
private eventBus: EventBus;
|
||||
private scheduler: PipelineScheduler;
|
||||
private jobQueue: JobQueue;
|
||||
private pipelines: Map<string, DataPipeline> = new Map();
|
||||
private runningJobs: Map<string, PipelineJob> = new Map();
|
||||
|
||||
constructor(
|
||||
private ingestionService: DataIngestionService,
|
||||
private transformationService: DataTransformationService,
|
||||
private validationService: DataValidationService,
|
||||
private qualityService: DataQualityService
|
||||
) {
|
||||
this.eventBus = new EventBus();
|
||||
const eventBusConfig: EventBusConfig = {
|
||||
redisHost: process.env.REDIS_HOST || 'localhost',
|
||||
redisPort: parseInt(process.env.REDIS_PORT || '6379'),
|
||||
redisPassword: process.env.REDIS_PASSWORD
|
||||
};
|
||||
this.eventBus = new EventBus(eventBusConfig);
|
||||
this.scheduler = new PipelineScheduler(this);
|
||||
this.jobQueue = new JobQueue(this);
|
||||
}
|
||||
|
||||
async initialize(): Promise<void> {
|
||||
logger.info('🔄 Initializing Data Pipeline Orchestrator...');
|
||||
|
||||
await this.eventBus.initialize();
|
||||
// EventBus doesn't have initialize method, it connects automatically
|
||||
await this.scheduler.initialize();
|
||||
await this.jobQueue.initialize();
|
||||
|
||||
// Subscribe to pipeline events
|
||||
await this.eventBus.subscribe('data.pipeline.*', this.handlePipelineEvent.bind(this));
|
||||
await this.eventBus.subscribe('data.job.*', this.handleJobEvent.bind(this));
|
||||
this.eventBus.subscribe('data.pipeline.*', this.handlePipelineEvent.bind(this));
|
||||
this.eventBus.subscribe('data.job.*', this.handleJobEvent.bind(this));
|
||||
|
||||
// Load existing pipelines
|
||||
await this.loadPipelines();
|
||||
|
|
@ -50,14 +56,14 @@ export class DataPipelineOrchestrator {
|
|||
status: PipelineStatus.DRAFT,
|
||||
createdAt: new Date(),
|
||||
updatedAt: new Date(),
|
||||
};
|
||||
|
||||
this.pipelines.set(pipelineWithId.id, pipelineWithId);
|
||||
}; this.pipelines.set(pipelineWithId.id, pipelineWithId);
|
||||
|
||||
await this.eventBus.publish('data.pipeline.created', {
|
||||
type: 'PIPELINE_CREATED',
|
||||
pipelineId: pipelineWithId.id,
|
||||
pipeline: pipelineWithId,
|
||||
});
|
||||
pipelineName: pipelineWithId.name,
|
||||
timestamp: new Date()
|
||||
} as DataPipelineEvent);
|
||||
|
||||
logger.info(`📋 Created pipeline: ${pipelineWithId.name} (${pipelineWithId.id})`);
|
||||
return pipelineWithId;
|
||||
|
|
@ -91,15 +97,15 @@ export class DataPipelineOrchestrator {
|
|||
};
|
||||
|
||||
this.runningJobs.set(job.id, job);
|
||||
|
||||
// Queue the job for execution
|
||||
// Queue the job for execution
|
||||
await this.jobQueue.enqueueJob(job);
|
||||
|
||||
await this.eventBus.publish('data.job.queued', {
|
||||
type: 'JOB_STARTED',
|
||||
jobId: job.id,
|
||||
pipelineId,
|
||||
job,
|
||||
});
|
||||
timestamp: new Date()
|
||||
} as DataJobEvent);
|
||||
|
||||
logger.info(`🚀 Queued pipeline job: ${job.id} for pipeline: ${pipeline.name}`);
|
||||
return job;
|
||||
|
|
@ -111,15 +117,15 @@ export class DataPipelineOrchestrator {
|
|||
throw new Error(`Pipeline not found: ${job.pipelineId}`);
|
||||
}
|
||||
|
||||
const startTime = Date.now();
|
||||
job.status = JobStatus.RUNNING;
|
||||
const startTime = Date.now(); job.status = JobStatus.RUNNING;
|
||||
job.startedAt = new Date();
|
||||
|
||||
await this.eventBus.publish('data.job.started', {
|
||||
type: 'JOB_STARTED',
|
||||
jobId: job.id,
|
||||
pipelineId: job.pipelineId,
|
||||
job,
|
||||
});
|
||||
timestamp: new Date()
|
||||
} as DataJobEvent);
|
||||
|
||||
try {
|
||||
logger.info(`⚙️ Executing pipeline job: ${job.id}`);
|
||||
|
|
@ -131,30 +137,30 @@ export class DataPipelineOrchestrator {
|
|||
await this.executeQualityChecks(pipeline, job);
|
||||
|
||||
// Complete the job
|
||||
job.status = JobStatus.COMPLETED;
|
||||
job.completedAt = new Date();
|
||||
job.status = JobStatus.COMPLETED; job.completedAt = new Date();
|
||||
job.metrics.processingTimeMs = Date.now() - startTime;
|
||||
|
||||
await this.eventBus.publish('data.job.completed', {
|
||||
type: 'JOB_COMPLETED',
|
||||
jobId: job.id,
|
||||
pipelineId: job.pipelineId,
|
||||
job,
|
||||
});
|
||||
timestamp: new Date()
|
||||
} as DataJobEvent);
|
||||
|
||||
logger.info(`✅ Pipeline job completed: ${job.id} in ${job.metrics.processingTimeMs}ms`);
|
||||
|
||||
} catch (error) {
|
||||
job.status = JobStatus.FAILED;
|
||||
job.completedAt = new Date();
|
||||
job.error = error instanceof Error ? error.message : 'Unknown error';
|
||||
job.completedAt = new Date(); job.error = error instanceof Error ? error.message : 'Unknown error';
|
||||
job.metrics.processingTimeMs = Date.now() - startTime;
|
||||
|
||||
await this.eventBus.publish('data.job.failed', {
|
||||
type: 'JOB_FAILED',
|
||||
jobId: job.id,
|
||||
pipelineId: job.pipelineId,
|
||||
job,
|
||||
error: job.error,
|
||||
});
|
||||
timestamp: new Date()
|
||||
} as DataJobEvent);
|
||||
|
||||
logger.error(`❌ Pipeline job failed: ${job.id}`, error);
|
||||
throw error;
|
||||
|
|
@ -228,14 +234,15 @@ export class DataPipelineOrchestrator {
|
|||
pipeline.schedule = {
|
||||
cronExpression,
|
||||
enabled: true,
|
||||
lastRun: null,
|
||||
nextRun: this.scheduler.getNextRunTime(cronExpression),
|
||||
lastRun: null, nextRun: this.scheduler.getNextRunTime(cronExpression),
|
||||
};
|
||||
|
||||
await this.eventBus.publish('data.pipeline.scheduled', {
|
||||
type: 'PIPELINE_STARTED',
|
||||
pipelineId,
|
||||
cronExpression,
|
||||
});
|
||||
pipelineName: pipeline.name,
|
||||
timestamp: new Date()
|
||||
} as DataPipelineEvent);
|
||||
|
||||
logger.info(`📅 Scheduled pipeline: ${pipeline.name} with cron: ${cronExpression}`);
|
||||
}
|
||||
|
|
@ -280,13 +287,12 @@ export class DataPipelineOrchestrator {
|
|||
private generateJobId(): string {
|
||||
return `job_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
|
||||
}
|
||||
|
||||
async shutdown(): Promise<void> {
|
||||
logger.info('🔄 Shutting down Data Pipeline Orchestrator...');
|
||||
|
||||
await this.scheduler.shutdown();
|
||||
await this.jobQueue.shutdown();
|
||||
await this.eventBus.disconnect();
|
||||
await this.eventBus.close();
|
||||
|
||||
logger.info('✅ Data Pipeline Orchestrator shutdown complete');
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,8 +1,10 @@
|
|||
import Queue from 'bull';
|
||||
import { logger } from '@stock-bot/utils';
|
||||
import { getLogger } from '@stock-bot/logger';
|
||||
import { PipelineJob } from '../types/DataPipeline';
|
||||
import { DataPipelineOrchestrator } from './DataPipelineOrchestrator';
|
||||
|
||||
const logger = getLogger('job-queue');
|
||||
|
||||
export class JobQueue {
|
||||
private queue: Queue.Queue;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,9 @@
|
|||
import { CronJob } from 'cron';
|
||||
import { logger } from '@stock-bot/utils';
|
||||
import { getLogger } from '@stock-bot/logger';
|
||||
import { DataPipelineOrchestrator } from './DataPipelineOrchestrator';
|
||||
|
||||
const logger = getLogger('pipeline-scheduler');
|
||||
|
||||
export class PipelineScheduler {
|
||||
private scheduledJobs: Map<string, CronJob> = new Map();
|
||||
|
||||
|
|
@ -45,10 +47,9 @@ export class PipelineScheduler {
|
|||
logger.info(`🚫 Cancelled schedule for pipeline: ${pipelineId}`);
|
||||
}
|
||||
}
|
||||
|
||||
getNextRunTime(cronExpression: string): Date {
|
||||
const job = new CronJob(cronExpression);
|
||||
return job.nextDate().toDate();
|
||||
const job = new CronJob(cronExpression, () => {}, null, false);
|
||||
return job.nextDate().toJSDate();
|
||||
}
|
||||
|
||||
getScheduledPipelines(): string[] {
|
||||
|
|
|
|||
|
|
@ -1,7 +1,9 @@
|
|||
import { logger } from '@stock-bot/utils';
|
||||
import { getLogger } from '@stock-bot/logger';
|
||||
|
||||
const logger = getLogger('DataIngestionService');
|
||||
import { IngestionStep, ProcessingResult, DataSource } from '../types/DataPipeline';
|
||||
import axios from 'axios';
|
||||
import * as csv from 'csv-parser';
|
||||
import csv from 'csv-parser';
|
||||
import * as fs from 'fs';
|
||||
|
||||
export class DataIngestionService {
|
||||
|
|
@ -112,11 +114,9 @@ export class DataIngestionService {
|
|||
return new Promise((resolve, reject) => {
|
||||
const records: any[] = [];
|
||||
const errors: any[] = [];
|
||||
let recordCount = 0;
|
||||
|
||||
fs.createReadStream(filePath)
|
||||
let recordCount = 0; fs.createReadStream(filePath)
|
||||
.pipe(csv())
|
||||
.on('data', (data) => {
|
||||
.on('data', (data: any) => {
|
||||
recordCount++;
|
||||
try {
|
||||
records.push(data);
|
||||
|
|
|
|||
|
|
@ -1,4 +1,6 @@
|
|||
import { logger } from '@stock-bot/utils';
|
||||
import { getLogger } from '@stock-bot/logger';
|
||||
|
||||
const logger = getLogger('DataQualityService');
|
||||
import { QualityCheckStep, ProcessingResult, QualityCheck, QualityThresholds } from '../types/DataPipeline';
|
||||
|
||||
export class DataQualityService {
|
||||
|
|
@ -277,11 +279,12 @@ export class DataQualityService {
|
|||
private storeQualityMetrics(metrics: any): void {
|
||||
const key = `metrics_${Date.now()}`;
|
||||
this.qualityMetrics.set(key, metrics);
|
||||
|
||||
// Keep only last 100 metrics
|
||||
// Keep only last 100 metrics
|
||||
if (this.qualityMetrics.size > 100) {
|
||||
const oldestKey = this.qualityMetrics.keys().next().value;
|
||||
this.qualityMetrics.delete(oldestKey);
|
||||
if (oldestKey) {
|
||||
this.qualityMetrics.delete(oldestKey);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,4 +1,6 @@
|
|||
import { logger } from '@stock-bot/utils';
|
||||
import { getLogger } from '@stock-bot/logger';
|
||||
|
||||
const logger = getLogger('DataTransformationService');
|
||||
import { TransformationStep, ProcessingResult } from '../types/DataPipeline';
|
||||
|
||||
export class DataTransformationService {
|
||||
|
|
@ -163,21 +165,22 @@ export class DataTransformationService {
|
|||
}
|
||||
acc[key].push(record);
|
||||
return acc;
|
||||
}, {});
|
||||
}, {} as Record<string, any[]>);
|
||||
|
||||
const aggregated = Object.entries(grouped).map(([key, records]: [string, any[]]) => {
|
||||
const aggregated = Object.entries(grouped).map(([key, records]) => {
|
||||
const recordsArray = records as any[];
|
||||
const result: any = { [groupBy]: key };
|
||||
|
||||
if (aggregations.includes('avg')) {
|
||||
result.avgPrice = records.reduce((sum, r) => sum + (r.price || 0), 0) / records.length;
|
||||
result.avgPrice = recordsArray.reduce((sum: number, r: any) => sum + (r.price || 0), 0) / recordsArray.length;
|
||||
}
|
||||
|
||||
if (aggregations.includes('sum')) {
|
||||
result.totalVolume = records.reduce((sum, r) => sum + (r.volume || 0), 0);
|
||||
result.totalVolume = recordsArray.reduce((sum: number, r: any) => sum + (r.volume || 0), 0);
|
||||
}
|
||||
|
||||
if (aggregations.includes('count')) {
|
||||
result.count = records.length;
|
||||
result.count = recordsArray.length;
|
||||
}
|
||||
|
||||
return result;
|
||||
|
|
|
|||
|
|
@ -1,4 +1,6 @@
|
|||
import { logger } from '@stock-bot/utils';
|
||||
import { getLogger } from '@stock-bot/logger';
|
||||
|
||||
const logger = getLogger('DataValidationService');
|
||||
import { ValidationStep, ProcessingResult, ValidationRule } from '../types/DataPipeline';
|
||||
import Joi from 'joi';
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue