adding data-services
This commit is contained in:
parent
e3bfd05b90
commit
405b818c86
139 changed files with 55943 additions and 416 deletions
|
|
@ -0,0 +1,200 @@
|
|||
import { logger } from '@stock-bot/utils';
|
||||
import { IngestionStep, ProcessingResult, DataSource } from '../types/DataPipeline';
|
||||
import axios from 'axios';
|
||||
import * as csv from 'csv-parser';
|
||||
import * as fs from 'fs';
|
||||
|
||||
export class DataIngestionService {
|
||||
private activeConnections: Map<string, any> = new Map();
|
||||
|
||||
async initialize(): Promise<void> {
|
||||
logger.info('🔄 Initializing Data Ingestion Service...');
|
||||
logger.info('✅ Data Ingestion Service initialized');
|
||||
}
|
||||
|
||||
async ingestData(step: IngestionStep, parameters: Record<string, any>): Promise<ProcessingResult> {
|
||||
const startTime = Date.now();
|
||||
logger.info(`📥 Starting data ingestion from ${step.source.type}: ${step.source.connection.url || step.source.connection.host}`);
|
||||
|
||||
try {
|
||||
switch (step.source.type) {
|
||||
case 'api':
|
||||
return await this.ingestFromApi(step.source, parameters);
|
||||
case 'file':
|
||||
return await this.ingestFromFile(step.source, parameters);
|
||||
case 'database':
|
||||
return await this.ingestFromDatabase(step.source, parameters);
|
||||
case 'stream':
|
||||
return await this.ingestFromStream(step.source, parameters);
|
||||
default:
|
||||
throw new Error(`Unsupported ingestion type: ${step.source.type}`);
|
||||
}
|
||||
} catch (error) {
|
||||
const processingTime = Date.now() - startTime;
|
||||
logger.error(`❌ Data ingestion failed after ${processingTime}ms:`, error);
|
||||
|
||||
return {
|
||||
recordsProcessed: 0,
|
||||
recordsSuccessful: 0,
|
||||
recordsFailed: 0,
|
||||
errors: [{
|
||||
record: 0,
|
||||
message: error instanceof Error ? error.message : 'Unknown error',
|
||||
code: 'INGESTION_ERROR'
|
||||
}],
|
||||
metadata: { processingTimeMs: processingTime }
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
private async ingestFromApi(source: DataSource, parameters: Record<string, any>): Promise<ProcessingResult> {
|
||||
const config = {
|
||||
method: 'GET',
|
||||
url: source.connection.url,
|
||||
headers: source.connection.headers || {},
|
||||
params: { ...source.connection.params, ...parameters },
|
||||
};
|
||||
|
||||
if (source.connection.apiKey) {
|
||||
config.headers['Authorization'] = `Bearer ${source.connection.apiKey}`;
|
||||
}
|
||||
|
||||
const response = await axios(config);
|
||||
const data = response.data;
|
||||
|
||||
// Process the data based on format
|
||||
let records: any[] = [];
|
||||
|
||||
if (Array.isArray(data)) {
|
||||
records = data;
|
||||
} else if (data.data && Array.isArray(data.data)) {
|
||||
records = data.data;
|
||||
} else if (data.results && Array.isArray(data.results)) {
|
||||
records = data.results;
|
||||
} else {
|
||||
records = [data];
|
||||
}
|
||||
|
||||
logger.info(`📊 Ingested ${records.length} records from API: ${source.connection.url}`);
|
||||
|
||||
return {
|
||||
recordsProcessed: records.length,
|
||||
recordsSuccessful: records.length,
|
||||
recordsFailed: 0,
|
||||
errors: [],
|
||||
metadata: {
|
||||
source: 'api',
|
||||
url: source.connection.url,
|
||||
statusCode: response.status,
|
||||
responseSize: JSON.stringify(data).length
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
private async ingestFromFile(source: DataSource, parameters: Record<string, any>): Promise<ProcessingResult> {
|
||||
const filePath = source.connection.url || parameters.filePath;
|
||||
|
||||
if (!filePath) {
|
||||
throw new Error('File path is required for file ingestion');
|
||||
}
|
||||
|
||||
switch (source.format) {
|
||||
case 'csv':
|
||||
return await this.ingestCsvFile(filePath);
|
||||
case 'json':
|
||||
return await this.ingestJsonFile(filePath);
|
||||
default:
|
||||
throw new Error(`Unsupported file format: ${source.format}`);
|
||||
}
|
||||
}
|
||||
|
||||
private async ingestCsvFile(filePath: string): Promise<ProcessingResult> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const records: any[] = [];
|
||||
const errors: any[] = [];
|
||||
let recordCount = 0;
|
||||
|
||||
fs.createReadStream(filePath)
|
||||
.pipe(csv())
|
||||
.on('data', (data) => {
|
||||
recordCount++;
|
||||
try {
|
||||
records.push(data);
|
||||
} catch (error) {
|
||||
errors.push({
|
||||
record: recordCount,
|
||||
message: error instanceof Error ? error.message : 'Parse error',
|
||||
code: 'CSV_PARSE_ERROR'
|
||||
});
|
||||
}
|
||||
})
|
||||
.on('end', () => {
|
||||
logger.info(`📊 Ingested ${records.length} records from CSV: ${filePath}`);
|
||||
resolve({
|
||||
recordsProcessed: recordCount,
|
||||
recordsSuccessful: records.length,
|
||||
recordsFailed: errors.length,
|
||||
errors,
|
||||
metadata: {
|
||||
source: 'file',
|
||||
format: 'csv',
|
||||
filePath
|
||||
}
|
||||
});
|
||||
})
|
||||
.on('error', reject);
|
||||
});
|
||||
}
|
||||
|
||||
private async ingestJsonFile(filePath: string): Promise<ProcessingResult> {
|
||||
const fileContent = await fs.promises.readFile(filePath, 'utf8');
|
||||
const data = JSON.parse(fileContent);
|
||||
|
||||
let records: any[] = [];
|
||||
|
||||
if (Array.isArray(data)) {
|
||||
records = data;
|
||||
} else {
|
||||
records = [data];
|
||||
}
|
||||
|
||||
logger.info(`📊 Ingested ${records.length} records from JSON: ${filePath}`);
|
||||
|
||||
return {
|
||||
recordsProcessed: records.length,
|
||||
recordsSuccessful: records.length,
|
||||
recordsFailed: 0,
|
||||
errors: [],
|
||||
metadata: {
|
||||
source: 'file',
|
||||
format: 'json',
|
||||
filePath,
|
||||
fileSize: fileContent.length
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
private async ingestFromDatabase(source: DataSource, parameters: Record<string, any>): Promise<ProcessingResult> {
|
||||
// Placeholder for database ingestion
|
||||
// In a real implementation, this would connect to various databases
|
||||
// (PostgreSQL, MySQL, MongoDB, etc.) and execute queries
|
||||
|
||||
throw new Error('Database ingestion not yet implemented');
|
||||
}
|
||||
|
||||
private async ingestFromStream(source: DataSource, parameters: Record<string, any>): Promise<ProcessingResult> {
|
||||
// Placeholder for stream ingestion
|
||||
// In a real implementation, this would connect to streaming sources
|
||||
// (Kafka, Kinesis, WebSocket, etc.)
|
||||
|
||||
throw new Error('Stream ingestion not yet implemented');
|
||||
}
|
||||
|
||||
async getIngestionMetrics(): Promise<any> {
|
||||
return {
|
||||
activeConnections: this.activeConnections.size,
|
||||
supportedSources: ['api', 'file', 'database', 'stream'],
|
||||
supportedFormats: ['json', 'csv', 'xml', 'parquet', 'avro']
|
||||
};
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue