adding data-services
This commit is contained in:
parent
e3bfd05b90
commit
405b818c86
139 changed files with 55943 additions and 416 deletions
|
|
@ -0,0 +1,293 @@
|
|||
import { EventBus } from '@stock-bot/event-bus';
|
||||
import { logger } from '@stock-bot/utils';
|
||||
import { DataPipeline, PipelineStatus, PipelineJob, JobStatus } from '../types/DataPipeline';
|
||||
import { DataIngestionService } from '../services/DataIngestionService';
|
||||
import { DataTransformationService } from '../services/DataTransformationService';
|
||||
import { DataValidationService } from '../services/DataValidationService';
|
||||
import { DataQualityService } from '../services/DataQualityService';
|
||||
import { PipelineScheduler } from './PipelineScheduler';
|
||||
import { JobQueue } from './JobQueue';
|
||||
|
||||
export class DataPipelineOrchestrator {
|
||||
private eventBus: EventBus;
|
||||
private scheduler: PipelineScheduler;
|
||||
private jobQueue: JobQueue;
|
||||
private pipelines: Map<string, DataPipeline> = new Map();
|
||||
private runningJobs: Map<string, PipelineJob> = new Map();
|
||||
|
||||
constructor(
|
||||
private ingestionService: DataIngestionService,
|
||||
private transformationService: DataTransformationService,
|
||||
private validationService: DataValidationService,
|
||||
private qualityService: DataQualityService
|
||||
) {
|
||||
this.eventBus = new EventBus();
|
||||
this.scheduler = new PipelineScheduler(this);
|
||||
this.jobQueue = new JobQueue(this);
|
||||
}
|
||||
|
||||
async initialize(): Promise<void> {
|
||||
logger.info('🔄 Initializing Data Pipeline Orchestrator...');
|
||||
|
||||
await this.eventBus.initialize();
|
||||
await this.scheduler.initialize();
|
||||
await this.jobQueue.initialize();
|
||||
|
||||
// Subscribe to pipeline events
|
||||
await this.eventBus.subscribe('data.pipeline.*', this.handlePipelineEvent.bind(this));
|
||||
await this.eventBus.subscribe('data.job.*', this.handleJobEvent.bind(this));
|
||||
|
||||
// Load existing pipelines
|
||||
await this.loadPipelines();
|
||||
|
||||
logger.info('✅ Data Pipeline Orchestrator initialized');
|
||||
}
|
||||
|
||||
async createPipeline(pipeline: Omit<DataPipeline, 'id' | 'createdAt' | 'updatedAt'>): Promise<DataPipeline> {
|
||||
const pipelineWithId: DataPipeline = {
|
||||
...pipeline,
|
||||
id: this.generatePipelineId(),
|
||||
status: PipelineStatus.DRAFT,
|
||||
createdAt: new Date(),
|
||||
updatedAt: new Date(),
|
||||
};
|
||||
|
||||
this.pipelines.set(pipelineWithId.id, pipelineWithId);
|
||||
|
||||
await this.eventBus.publish('data.pipeline.created', {
|
||||
pipelineId: pipelineWithId.id,
|
||||
pipeline: pipelineWithId,
|
||||
});
|
||||
|
||||
logger.info(`📋 Created pipeline: ${pipelineWithId.name} (${pipelineWithId.id})`);
|
||||
return pipelineWithId;
|
||||
}
|
||||
|
||||
async runPipeline(pipelineId: string, parameters?: Record<string, any>): Promise<PipelineJob> {
|
||||
const pipeline = this.pipelines.get(pipelineId);
|
||||
if (!pipeline) {
|
||||
throw new Error(`Pipeline not found: ${pipelineId}`);
|
||||
}
|
||||
|
||||
if (pipeline.status !== PipelineStatus.ACTIVE) {
|
||||
throw new Error(`Pipeline is not active: ${pipeline.status}`);
|
||||
}
|
||||
|
||||
const job: PipelineJob = {
|
||||
id: this.generateJobId(),
|
||||
pipelineId,
|
||||
status: JobStatus.PENDING,
|
||||
parameters: parameters || {},
|
||||
createdAt: new Date(),
|
||||
startedAt: null,
|
||||
completedAt: null,
|
||||
error: null,
|
||||
metrics: {
|
||||
recordsProcessed: 0,
|
||||
recordsSuccessful: 0,
|
||||
recordsFailed: 0,
|
||||
processingTimeMs: 0,
|
||||
},
|
||||
};
|
||||
|
||||
this.runningJobs.set(job.id, job);
|
||||
|
||||
// Queue the job for execution
|
||||
await this.jobQueue.enqueueJob(job);
|
||||
|
||||
await this.eventBus.publish('data.job.queued', {
|
||||
jobId: job.id,
|
||||
pipelineId,
|
||||
job,
|
||||
});
|
||||
|
||||
logger.info(`🚀 Queued pipeline job: ${job.id} for pipeline: ${pipeline.name}`);
|
||||
return job;
|
||||
}
|
||||
|
||||
async executePipelineJob(job: PipelineJob): Promise<void> {
|
||||
const pipeline = this.pipelines.get(job.pipelineId);
|
||||
if (!pipeline) {
|
||||
throw new Error(`Pipeline not found: ${job.pipelineId}`);
|
||||
}
|
||||
|
||||
const startTime = Date.now();
|
||||
job.status = JobStatus.RUNNING;
|
||||
job.startedAt = new Date();
|
||||
|
||||
await this.eventBus.publish('data.job.started', {
|
||||
jobId: job.id,
|
||||
pipelineId: job.pipelineId,
|
||||
job,
|
||||
});
|
||||
|
||||
try {
|
||||
logger.info(`⚙️ Executing pipeline job: ${job.id}`);
|
||||
|
||||
// Execute pipeline steps
|
||||
await this.executeIngestionStep(pipeline, job);
|
||||
await this.executeTransformationStep(pipeline, job);
|
||||
await this.executeValidationStep(pipeline, job);
|
||||
await this.executeQualityChecks(pipeline, job);
|
||||
|
||||
// Complete the job
|
||||
job.status = JobStatus.COMPLETED;
|
||||
job.completedAt = new Date();
|
||||
job.metrics.processingTimeMs = Date.now() - startTime;
|
||||
|
||||
await this.eventBus.publish('data.job.completed', {
|
||||
jobId: job.id,
|
||||
pipelineId: job.pipelineId,
|
||||
job,
|
||||
});
|
||||
|
||||
logger.info(`✅ Pipeline job completed: ${job.id} in ${job.metrics.processingTimeMs}ms`);
|
||||
|
||||
} catch (error) {
|
||||
job.status = JobStatus.FAILED;
|
||||
job.completedAt = new Date();
|
||||
job.error = error instanceof Error ? error.message : 'Unknown error';
|
||||
job.metrics.processingTimeMs = Date.now() - startTime;
|
||||
|
||||
await this.eventBus.publish('data.job.failed', {
|
||||
jobId: job.id,
|
||||
pipelineId: job.pipelineId,
|
||||
job,
|
||||
error: job.error,
|
||||
});
|
||||
|
||||
logger.error(`❌ Pipeline job failed: ${job.id}`, error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
private async executeIngestionStep(pipeline: DataPipeline, job: PipelineJob): Promise<void> {
|
||||
if (!pipeline.steps.ingestion) return;
|
||||
|
||||
logger.info(`📥 Executing ingestion step for job: ${job.id}`);
|
||||
|
||||
const result = await this.ingestionService.ingestData(
|
||||
pipeline.steps.ingestion,
|
||||
job.parameters
|
||||
);
|
||||
|
||||
job.metrics.recordsProcessed += result.recordsProcessed;
|
||||
job.metrics.recordsSuccessful += result.recordsSuccessful;
|
||||
job.metrics.recordsFailed += result.recordsFailed;
|
||||
}
|
||||
|
||||
private async executeTransformationStep(pipeline: DataPipeline, job: PipelineJob): Promise<void> {
|
||||
if (!pipeline.steps.transformation) return;
|
||||
|
||||
logger.info(`🔄 Executing transformation step for job: ${job.id}`);
|
||||
|
||||
const result = await this.transformationService.transformData(
|
||||
pipeline.steps.transformation,
|
||||
job.parameters
|
||||
);
|
||||
|
||||
job.metrics.recordsProcessed += result.recordsProcessed;
|
||||
job.metrics.recordsSuccessful += result.recordsSuccessful;
|
||||
job.metrics.recordsFailed += result.recordsFailed;
|
||||
}
|
||||
|
||||
private async executeValidationStep(pipeline: DataPipeline, job: PipelineJob): Promise<void> {
|
||||
if (!pipeline.steps.validation) return;
|
||||
|
||||
logger.info(`✅ Executing validation step for job: ${job.id}`);
|
||||
|
||||
const result = await this.validationService.validateData(
|
||||
pipeline.steps.validation,
|
||||
job.parameters
|
||||
);
|
||||
|
||||
job.metrics.recordsProcessed += result.recordsProcessed;
|
||||
job.metrics.recordsSuccessful += result.recordsSuccessful;
|
||||
job.metrics.recordsFailed += result.recordsFailed;
|
||||
}
|
||||
|
||||
private async executeQualityChecks(pipeline: DataPipeline, job: PipelineJob): Promise<void> {
|
||||
if (!pipeline.steps.qualityChecks) return;
|
||||
|
||||
logger.info(`🔍 Executing quality checks for job: ${job.id}`);
|
||||
|
||||
await this.qualityService.runQualityChecks(
|
||||
pipeline.steps.qualityChecks,
|
||||
job.parameters
|
||||
);
|
||||
}
|
||||
|
||||
async schedulePipeline(pipelineId: string, cronExpression: string): Promise<void> {
|
||||
const pipeline = this.pipelines.get(pipelineId);
|
||||
if (!pipeline) {
|
||||
throw new Error(`Pipeline not found: ${pipelineId}`);
|
||||
}
|
||||
|
||||
await this.scheduler.schedulePipeline(pipelineId, cronExpression);
|
||||
|
||||
pipeline.schedule = {
|
||||
cronExpression,
|
||||
enabled: true,
|
||||
lastRun: null,
|
||||
nextRun: this.scheduler.getNextRunTime(cronExpression),
|
||||
};
|
||||
|
||||
await this.eventBus.publish('data.pipeline.scheduled', {
|
||||
pipelineId,
|
||||
cronExpression,
|
||||
});
|
||||
|
||||
logger.info(`📅 Scheduled pipeline: ${pipeline.name} with cron: ${cronExpression}`);
|
||||
}
|
||||
|
||||
// Pipeline CRUD operations
|
||||
getPipeline(pipelineId: string): DataPipeline | undefined {
|
||||
return this.pipelines.get(pipelineId);
|
||||
}
|
||||
|
||||
listPipelines(): DataPipeline[] {
|
||||
return Array.from(this.pipelines.values());
|
||||
}
|
||||
|
||||
getJob(jobId: string): PipelineJob | undefined {
|
||||
return this.runningJobs.get(jobId);
|
||||
}
|
||||
|
||||
listJobs(pipelineId?: string): PipelineJob[] {
|
||||
const jobs = Array.from(this.runningJobs.values());
|
||||
return pipelineId ? jobs.filter(job => job.pipelineId === pipelineId) : jobs;
|
||||
}
|
||||
|
||||
private async handlePipelineEvent(event: any): Promise<void> {
|
||||
logger.debug('📨 Received pipeline event:', event);
|
||||
// Handle pipeline-level events
|
||||
}
|
||||
|
||||
private async handleJobEvent(event: any): Promise<void> {
|
||||
logger.debug('📨 Received job event:', event);
|
||||
// Handle job-level events
|
||||
}
|
||||
|
||||
private async loadPipelines(): Promise<void> {
|
||||
// In a real implementation, load pipelines from persistent storage
|
||||
logger.info('📂 Loading existing pipelines...');
|
||||
}
|
||||
|
||||
private generatePipelineId(): string {
|
||||
return `pipeline_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
|
||||
}
|
||||
|
||||
private generateJobId(): string {
|
||||
return `job_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
|
||||
}
|
||||
|
||||
async shutdown(): Promise<void> {
|
||||
logger.info('🔄 Shutting down Data Pipeline Orchestrator...');
|
||||
|
||||
await this.scheduler.shutdown();
|
||||
await this.jobQueue.shutdown();
|
||||
await this.eventBus.disconnect();
|
||||
|
||||
logger.info('✅ Data Pipeline Orchestrator shutdown complete');
|
||||
}
|
||||
}
|
||||
77
apps/data-services/data-processor/src/core/JobQueue.ts
Normal file
77
apps/data-services/data-processor/src/core/JobQueue.ts
Normal file
|
|
@ -0,0 +1,77 @@
|
|||
import Queue from 'bull';
|
||||
import { logger } from '@stock-bot/utils';
|
||||
import { PipelineJob } from '../types/DataPipeline';
|
||||
import { DataPipelineOrchestrator } from './DataPipelineOrchestrator';
|
||||
|
||||
export class JobQueue {
|
||||
private queue: Queue.Queue;
|
||||
|
||||
constructor(private orchestrator: DataPipelineOrchestrator) {
|
||||
this.queue = new Queue('data-pipeline-jobs', {
|
||||
redis: {
|
||||
host: process.env.REDIS_HOST || 'localhost',
|
||||
port: parseInt(process.env.REDIS_PORT || '6379'),
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
async initialize(): Promise<void> {
|
||||
logger.info('🔄 Initializing Job Queue...');
|
||||
|
||||
// Process jobs with a maximum of 5 concurrent jobs
|
||||
this.queue.process('pipeline-job', 5, async (job) => {
|
||||
const pipelineJob: PipelineJob = job.data;
|
||||
await this.orchestrator.executePipelineJob(pipelineJob);
|
||||
});
|
||||
|
||||
// Handle job events
|
||||
this.queue.on('completed', (job) => {
|
||||
logger.info(`✅ Job completed: ${job.id}`);
|
||||
});
|
||||
|
||||
this.queue.on('failed', (job, error) => {
|
||||
logger.error(`❌ Job failed: ${job.id}`, error);
|
||||
});
|
||||
|
||||
this.queue.on('stalled', (job) => {
|
||||
logger.warn(`⚠️ Job stalled: ${job.id}`);
|
||||
});
|
||||
|
||||
logger.info('✅ Job Queue initialized');
|
||||
}
|
||||
|
||||
async enqueueJob(job: PipelineJob): Promise<void> {
|
||||
await this.queue.add('pipeline-job', job, {
|
||||
jobId: job.id,
|
||||
removeOnComplete: 100, // Keep last 100 completed jobs
|
||||
removeOnFail: 50, // Keep last 50 failed jobs
|
||||
attempts: 3, // Retry failed jobs up to 3 times
|
||||
backoff: {
|
||||
type: 'exponential',
|
||||
delay: 2000,
|
||||
},
|
||||
});
|
||||
|
||||
logger.info(`📤 Enqueued job: ${job.id}`);
|
||||
}
|
||||
|
||||
async getJobStats(): Promise<any> {
|
||||
const waiting = await this.queue.getWaiting();
|
||||
const active = await this.queue.getActive();
|
||||
const completed = await this.queue.getCompleted();
|
||||
const failed = await this.queue.getFailed();
|
||||
|
||||
return {
|
||||
waiting: waiting.length,
|
||||
active: active.length,
|
||||
completed: completed.length,
|
||||
failed: failed.length,
|
||||
};
|
||||
}
|
||||
|
||||
async shutdown(): Promise<void> {
|
||||
logger.info('🔄 Shutting down Job Queue...');
|
||||
await this.queue.close();
|
||||
logger.info('✅ Job Queue shutdown complete');
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,69 @@
|
|||
import { CronJob } from 'cron';
|
||||
import { logger } from '@stock-bot/utils';
|
||||
import { DataPipelineOrchestrator } from './DataPipelineOrchestrator';
|
||||
|
||||
export class PipelineScheduler {
|
||||
private scheduledJobs: Map<string, CronJob> = new Map();
|
||||
|
||||
constructor(private orchestrator: DataPipelineOrchestrator) {}
|
||||
|
||||
async initialize(): Promise<void> {
|
||||
logger.info('🔄 Initializing Pipeline Scheduler...');
|
||||
logger.info('✅ Pipeline Scheduler initialized');
|
||||
}
|
||||
|
||||
async schedulePipeline(pipelineId: string, cronExpression: string): Promise<void> {
|
||||
// Cancel existing schedule if it exists
|
||||
if (this.scheduledJobs.has(pipelineId)) {
|
||||
this.cancelSchedule(pipelineId);
|
||||
}
|
||||
|
||||
const cronJob = new CronJob(
|
||||
cronExpression,
|
||||
async () => {
|
||||
try {
|
||||
logger.info(`⏰ Scheduled execution triggered for pipeline: ${pipelineId}`);
|
||||
await this.orchestrator.runPipeline(pipelineId);
|
||||
} catch (error) {
|
||||
logger.error(`❌ Scheduled pipeline execution failed: ${pipelineId}`, error);
|
||||
}
|
||||
},
|
||||
null,
|
||||
true, // Start immediately
|
||||
'UTC'
|
||||
);
|
||||
|
||||
this.scheduledJobs.set(pipelineId, cronJob);
|
||||
logger.info(`📅 Scheduled pipeline ${pipelineId} with cron: ${cronExpression}`);
|
||||
}
|
||||
|
||||
cancelSchedule(pipelineId: string): void {
|
||||
const job = this.scheduledJobs.get(pipelineId);
|
||||
if (job) {
|
||||
job.stop();
|
||||
this.scheduledJobs.delete(pipelineId);
|
||||
logger.info(`🚫 Cancelled schedule for pipeline: ${pipelineId}`);
|
||||
}
|
||||
}
|
||||
|
||||
getNextRunTime(cronExpression: string): Date {
|
||||
const job = new CronJob(cronExpression);
|
||||
return job.nextDate().toDate();
|
||||
}
|
||||
|
||||
getScheduledPipelines(): string[] {
|
||||
return Array.from(this.scheduledJobs.keys());
|
||||
}
|
||||
|
||||
async shutdown(): Promise<void> {
|
||||
logger.info('🔄 Shutting down Pipeline Scheduler...');
|
||||
|
||||
for (const [pipelineId, job] of this.scheduledJobs) {
|
||||
job.stop();
|
||||
logger.info(`🚫 Stopped scheduled job for pipeline: ${pipelineId}`);
|
||||
}
|
||||
|
||||
this.scheduledJobs.clear();
|
||||
logger.info('✅ Pipeline Scheduler shutdown complete');
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue