import { getLogger } from '@stock-bot/logger'; const logger = getLogger('DataIngestionService'); import { IngestionStep, ProcessingResult, DataSource } from '../types/DataPipeline'; import axios from 'axios'; import csv from 'csv-parser'; import * as fs from 'fs'; export class DataIngestionService { private activeConnections: Map = new Map(); async initialize(): Promise { logger.info('🔄 Initializing Data Ingestion Service...'); logger.info('✅ Data Ingestion Service initialized'); } async ingestData(step: IngestionStep, parameters: Record): Promise { const startTime = Date.now(); logger.info(`📥 Starting data ingestion from ${step.source.type}: ${step.source.connection.url || step.source.connection.host}`); try { switch (step.source.type) { case 'api': return await this.ingestFromApi(step.source, parameters); case 'file': return await this.ingestFromFile(step.source, parameters); case 'database': return await this.ingestFromDatabase(step.source, parameters); case 'stream': return await this.ingestFromStream(step.source, parameters); default: throw new Error(`Unsupported ingestion type: ${step.source.type}`); } } catch (error) { const processingTime = Date.now() - startTime; logger.error(`❌ Data ingestion failed after ${processingTime}ms:`, error); return { recordsProcessed: 0, recordsSuccessful: 0, recordsFailed: 0, errors: [{ record: 0, message: error instanceof Error ? error.message : 'Unknown error', code: 'INGESTION_ERROR' }], metadata: { processingTimeMs: processingTime } }; } } private async ingestFromApi(source: DataSource, parameters: Record): Promise { const config = { method: 'GET', url: source.connection.url, headers: source.connection.headers || {}, params: { ...source.connection.params, ...parameters }, }; if (source.connection.apiKey) { config.headers['Authorization'] = `Bearer ${source.connection.apiKey}`; } const response = await axios(config); const data = response.data; // Process the data based on format let records: any[] = []; if (Array.isArray(data)) { records = data; } else if (data.data && Array.isArray(data.data)) { records = data.data; } else if (data.results && Array.isArray(data.results)) { records = data.results; } else { records = [data]; } logger.info(`📊 Ingested ${records.length} records from API: ${source.connection.url}`); return { recordsProcessed: records.length, recordsSuccessful: records.length, recordsFailed: 0, errors: [], metadata: { source: 'api', url: source.connection.url, statusCode: response.status, responseSize: JSON.stringify(data).length } }; } private async ingestFromFile(source: DataSource, parameters: Record): Promise { const filePath = source.connection.url || parameters.filePath; if (!filePath) { throw new Error('File path is required for file ingestion'); } switch (source.format) { case 'csv': return await this.ingestCsvFile(filePath); case 'json': return await this.ingestJsonFile(filePath); default: throw new Error(`Unsupported file format: ${source.format}`); } } private async ingestCsvFile(filePath: string): Promise { return new Promise((resolve, reject) => { const records: any[] = []; const errors: any[] = []; let recordCount = 0; fs.createReadStream(filePath) .pipe(csv()) .on('data', (data: any) => { recordCount++; try { records.push(data); } catch (error) { errors.push({ record: recordCount, message: error instanceof Error ? error.message : 'Parse error', code: 'CSV_PARSE_ERROR' }); } }) .on('end', () => { logger.info(`📊 Ingested ${records.length} records from CSV: ${filePath}`); resolve({ recordsProcessed: recordCount, recordsSuccessful: records.length, recordsFailed: errors.length, errors, metadata: { source: 'file', format: 'csv', filePath } }); }) .on('error', reject); }); } private async ingestJsonFile(filePath: string): Promise { const fileContent = await fs.promises.readFile(filePath, 'utf8'); const data = JSON.parse(fileContent); let records: any[] = []; if (Array.isArray(data)) { records = data; } else { records = [data]; } logger.info(`📊 Ingested ${records.length} records from JSON: ${filePath}`); return { recordsProcessed: records.length, recordsSuccessful: records.length, recordsFailed: 0, errors: [], metadata: { source: 'file', format: 'json', filePath, fileSize: fileContent.length } }; } private async ingestFromDatabase(source: DataSource, parameters: Record): Promise { // Placeholder for database ingestion // In a real implementation, this would connect to various databases // (PostgreSQL, MySQL, MongoDB, etc.) and execute queries throw new Error('Database ingestion not yet implemented'); } private async ingestFromStream(source: DataSource, parameters: Record): Promise { // Placeholder for stream ingestion // In a real implementation, this would connect to streaming sources // (Kafka, Kinesis, WebSocket, etc.) throw new Error('Stream ingestion not yet implemented'); } async getIngestionMetrics(): Promise { return { activeConnections: this.activeConnections.size, supportedSources: ['api', 'file', 'database', 'stream'], supportedFormats: ['json', 'csv', 'xml', 'parquet', 'avro'] }; } }