work up marketdatagateway

This commit is contained in:
Bojan Kucera 2025-06-03 19:43:22 -04:00
parent 58ae897e90
commit d22f7aafa0
17 changed files with 653 additions and 494 deletions

View file

@ -1,5 +1,7 @@
import { Context } from 'hono';
import { logger } from '@stock-bot/utils';
import { getLogger } from '@stock-bot/logger';
const logger = getLogger('HealthController');
export class HealthController {
async getHealth(c: Context): Promise<Response> {

View file

@ -1,5 +1,7 @@
import { Context } from 'hono';
import { logger } from '@stock-bot/utils';
import { getLogger } from '@stock-bot/logger';
const logger = getLogger('JobController');
import { DataPipelineOrchestrator } from '../core/DataPipelineOrchestrator';
import { JobStatus } from '../types/DataPipeline';

View file

@ -1,8 +1,10 @@
import { Context } from 'hono';
import { logger } from '@stock-bot/utils';
import { getLogger } from '@stock-bot/logger';
import { DataPipelineOrchestrator } from '../core/DataPipelineOrchestrator';
import { DataPipeline, PipelineStatus } from '../types/DataPipeline';
const logger = getLogger('pipeline-controller');
export class PipelineController {
constructor(private orchestrator: DataPipelineOrchestrator) {}

View file

@ -1,5 +1,6 @@
import { EventBus } from '@stock-bot/event-bus';
import { logger } from '@stock-bot/utils';
import { EventBus, EventBusConfig } from '@stock-bot/event-bus';
import { DataPipelineEvent, DataJobEvent } from '@stock-bot/types';
import { getLogger } from '@stock-bot/logger';
import { DataPipeline, PipelineStatus, PipelineJob, JobStatus } from '../types/DataPipeline';
import { DataIngestionService } from '../services/DataIngestionService';
import { DataTransformationService } from '../services/DataTransformationService';
@ -8,34 +9,39 @@ import { DataQualityService } from '../services/DataQualityService';
import { PipelineScheduler } from './PipelineScheduler';
import { JobQueue } from './JobQueue';
const logger = getLogger('data-pipeline-orchestrator');
export class DataPipelineOrchestrator {
private eventBus: EventBus;
private scheduler: PipelineScheduler;
private jobQueue: JobQueue;
private pipelines: Map<string, DataPipeline> = new Map();
private runningJobs: Map<string, PipelineJob> = new Map();
constructor(
private ingestionService: DataIngestionService,
private transformationService: DataTransformationService,
private validationService: DataValidationService,
private qualityService: DataQualityService
) {
this.eventBus = new EventBus();
const eventBusConfig: EventBusConfig = {
redisHost: process.env.REDIS_HOST || 'localhost',
redisPort: parseInt(process.env.REDIS_PORT || '6379'),
redisPassword: process.env.REDIS_PASSWORD
};
this.eventBus = new EventBus(eventBusConfig);
this.scheduler = new PipelineScheduler(this);
this.jobQueue = new JobQueue(this);
}
async initialize(): Promise<void> {
logger.info('🔄 Initializing Data Pipeline Orchestrator...');
await this.eventBus.initialize();
// EventBus doesn't have initialize method, it connects automatically
await this.scheduler.initialize();
await this.jobQueue.initialize();
// Subscribe to pipeline events
await this.eventBus.subscribe('data.pipeline.*', this.handlePipelineEvent.bind(this));
await this.eventBus.subscribe('data.job.*', this.handleJobEvent.bind(this));
this.eventBus.subscribe('data.pipeline.*', this.handlePipelineEvent.bind(this));
this.eventBus.subscribe('data.job.*', this.handleJobEvent.bind(this));
// Load existing pipelines
await this.loadPipelines();
@ -50,14 +56,14 @@ export class DataPipelineOrchestrator {
status: PipelineStatus.DRAFT,
createdAt: new Date(),
updatedAt: new Date(),
};
this.pipelines.set(pipelineWithId.id, pipelineWithId);
}; this.pipelines.set(pipelineWithId.id, pipelineWithId);
await this.eventBus.publish('data.pipeline.created', {
type: 'PIPELINE_CREATED',
pipelineId: pipelineWithId.id,
pipeline: pipelineWithId,
});
pipelineName: pipelineWithId.name,
timestamp: new Date()
} as DataPipelineEvent);
logger.info(`📋 Created pipeline: ${pipelineWithId.name} (${pipelineWithId.id})`);
return pipelineWithId;
@ -91,15 +97,15 @@ export class DataPipelineOrchestrator {
};
this.runningJobs.set(job.id, job);
// Queue the job for execution
// Queue the job for execution
await this.jobQueue.enqueueJob(job);
await this.eventBus.publish('data.job.queued', {
type: 'JOB_STARTED',
jobId: job.id,
pipelineId,
job,
});
timestamp: new Date()
} as DataJobEvent);
logger.info(`🚀 Queued pipeline job: ${job.id} for pipeline: ${pipeline.name}`);
return job;
@ -111,15 +117,15 @@ export class DataPipelineOrchestrator {
throw new Error(`Pipeline not found: ${job.pipelineId}`);
}
const startTime = Date.now();
job.status = JobStatus.RUNNING;
const startTime = Date.now(); job.status = JobStatus.RUNNING;
job.startedAt = new Date();
await this.eventBus.publish('data.job.started', {
type: 'JOB_STARTED',
jobId: job.id,
pipelineId: job.pipelineId,
job,
});
timestamp: new Date()
} as DataJobEvent);
try {
logger.info(`⚙️ Executing pipeline job: ${job.id}`);
@ -131,30 +137,30 @@ export class DataPipelineOrchestrator {
await this.executeQualityChecks(pipeline, job);
// Complete the job
job.status = JobStatus.COMPLETED;
job.completedAt = new Date();
job.status = JobStatus.COMPLETED; job.completedAt = new Date();
job.metrics.processingTimeMs = Date.now() - startTime;
await this.eventBus.publish('data.job.completed', {
type: 'JOB_COMPLETED',
jobId: job.id,
pipelineId: job.pipelineId,
job,
});
timestamp: new Date()
} as DataJobEvent);
logger.info(`✅ Pipeline job completed: ${job.id} in ${job.metrics.processingTimeMs}ms`);
} catch (error) {
job.status = JobStatus.FAILED;
job.completedAt = new Date();
job.error = error instanceof Error ? error.message : 'Unknown error';
job.completedAt = new Date(); job.error = error instanceof Error ? error.message : 'Unknown error';
job.metrics.processingTimeMs = Date.now() - startTime;
await this.eventBus.publish('data.job.failed', {
type: 'JOB_FAILED',
jobId: job.id,
pipelineId: job.pipelineId,
job,
error: job.error,
});
timestamp: new Date()
} as DataJobEvent);
logger.error(`❌ Pipeline job failed: ${job.id}`, error);
throw error;
@ -228,14 +234,15 @@ export class DataPipelineOrchestrator {
pipeline.schedule = {
cronExpression,
enabled: true,
lastRun: null,
nextRun: this.scheduler.getNextRunTime(cronExpression),
lastRun: null, nextRun: this.scheduler.getNextRunTime(cronExpression),
};
await this.eventBus.publish('data.pipeline.scheduled', {
type: 'PIPELINE_STARTED',
pipelineId,
cronExpression,
});
pipelineName: pipeline.name,
timestamp: new Date()
} as DataPipelineEvent);
logger.info(`📅 Scheduled pipeline: ${pipeline.name} with cron: ${cronExpression}`);
}
@ -280,13 +287,12 @@ export class DataPipelineOrchestrator {
private generateJobId(): string {
return `job_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
}
async shutdown(): Promise<void> {
logger.info('🔄 Shutting down Data Pipeline Orchestrator...');
await this.scheduler.shutdown();
await this.jobQueue.shutdown();
await this.eventBus.disconnect();
await this.eventBus.close();
logger.info('✅ Data Pipeline Orchestrator shutdown complete');
}

View file

@ -1,8 +1,10 @@
import Queue from 'bull';
import { logger } from '@stock-bot/utils';
import { getLogger } from '@stock-bot/logger';
import { PipelineJob } from '../types/DataPipeline';
import { DataPipelineOrchestrator } from './DataPipelineOrchestrator';
const logger = getLogger('job-queue');
export class JobQueue {
private queue: Queue.Queue;

View file

@ -1,7 +1,9 @@
import { CronJob } from 'cron';
import { logger } from '@stock-bot/utils';
import { getLogger } from '@stock-bot/logger';
import { DataPipelineOrchestrator } from './DataPipelineOrchestrator';
const logger = getLogger('pipeline-scheduler');
export class PipelineScheduler {
private scheduledJobs: Map<string, CronJob> = new Map();
@ -45,10 +47,9 @@ export class PipelineScheduler {
logger.info(`🚫 Cancelled schedule for pipeline: ${pipelineId}`);
}
}
getNextRunTime(cronExpression: string): Date {
const job = new CronJob(cronExpression);
return job.nextDate().toDate();
const job = new CronJob(cronExpression, () => {}, null, false);
return job.nextDate().toJSDate();
}
getScheduledPipelines(): string[] {

View file

@ -1,7 +1,9 @@
import { logger } from '@stock-bot/utils';
import { getLogger } from '@stock-bot/logger';
const logger = getLogger('DataIngestionService');
import { IngestionStep, ProcessingResult, DataSource } from '../types/DataPipeline';
import axios from 'axios';
import * as csv from 'csv-parser';
import csv from 'csv-parser';
import * as fs from 'fs';
export class DataIngestionService {
@ -112,11 +114,9 @@ export class DataIngestionService {
return new Promise((resolve, reject) => {
const records: any[] = [];
const errors: any[] = [];
let recordCount = 0;
fs.createReadStream(filePath)
let recordCount = 0; fs.createReadStream(filePath)
.pipe(csv())
.on('data', (data) => {
.on('data', (data: any) => {
recordCount++;
try {
records.push(data);

View file

@ -1,4 +1,6 @@
import { logger } from '@stock-bot/utils';
import { getLogger } from '@stock-bot/logger';
const logger = getLogger('DataQualityService');
import { QualityCheckStep, ProcessingResult, QualityCheck, QualityThresholds } from '../types/DataPipeline';
export class DataQualityService {
@ -277,11 +279,12 @@ export class DataQualityService {
private storeQualityMetrics(metrics: any): void {
const key = `metrics_${Date.now()}`;
this.qualityMetrics.set(key, metrics);
// Keep only last 100 metrics
// Keep only last 100 metrics
if (this.qualityMetrics.size > 100) {
const oldestKey = this.qualityMetrics.keys().next().value;
this.qualityMetrics.delete(oldestKey);
if (oldestKey) {
this.qualityMetrics.delete(oldestKey);
}
}
}

View file

@ -1,4 +1,6 @@
import { logger } from '@stock-bot/utils';
import { getLogger } from '@stock-bot/logger';
const logger = getLogger('DataTransformationService');
import { TransformationStep, ProcessingResult } from '../types/DataPipeline';
export class DataTransformationService {
@ -163,21 +165,22 @@ export class DataTransformationService {
}
acc[key].push(record);
return acc;
}, {});
}, {} as Record<string, any[]>);
const aggregated = Object.entries(grouped).map(([key, records]: [string, any[]]) => {
const aggregated = Object.entries(grouped).map(([key, records]) => {
const recordsArray = records as any[];
const result: any = { [groupBy]: key };
if (aggregations.includes('avg')) {
result.avgPrice = records.reduce((sum, r) => sum + (r.price || 0), 0) / records.length;
result.avgPrice = recordsArray.reduce((sum: number, r: any) => sum + (r.price || 0), 0) / recordsArray.length;
}
if (aggregations.includes('sum')) {
result.totalVolume = records.reduce((sum, r) => sum + (r.volume || 0), 0);
result.totalVolume = recordsArray.reduce((sum: number, r: any) => sum + (r.volume || 0), 0);
}
if (aggregations.includes('count')) {
result.count = records.length;
result.count = recordsArray.length;
}
return result;

View file

@ -1,4 +1,6 @@
import { logger } from '@stock-bot/utils';
import { getLogger } from '@stock-bot/logger';
const logger = getLogger('DataValidationService');
import { ValidationStep, ProcessingResult, ValidationRule } from '../types/DataPipeline';
import Joi from 'joi';