added initial py analytics / rust core / ts orchestrator services
This commit is contained in:
parent
680b5fd2ae
commit
c862ed496b
62 changed files with 13459 additions and 0 deletions
435
apps/stock/orchestrator/src/data/DataManager.ts
Normal file
435
apps/stock/orchestrator/src/data/DataManager.ts
Normal file
|
|
@ -0,0 +1,435 @@
|
|||
import { logger } from '@stock-bot/logger';
|
||||
import { StorageService } from '../services/StorageService';
|
||||
import { MarketData, Bar } from '../types';
|
||||
import { EventEmitter } from 'events';
|
||||
|
||||
export interface DataResolution {
|
||||
interval: string;
|
||||
milliseconds: number;
|
||||
}
|
||||
|
||||
export interface CorporateAction {
|
||||
symbol: string;
|
||||
date: Date;
|
||||
type: 'split' | 'dividend' | 'spinoff';
|
||||
factor?: number;
|
||||
amount?: number;
|
||||
newSymbol?: string;
|
||||
}
|
||||
|
||||
export interface DataQualityIssue {
|
||||
timestamp: Date;
|
||||
symbol: string;
|
||||
issue: string;
|
||||
severity: 'warning' | 'error';
|
||||
details?: any;
|
||||
}
|
||||
|
||||
export class DataManager extends EventEmitter {
|
||||
private static RESOLUTIONS: Record<string, DataResolution> = {
|
||||
'tick': { interval: 'tick', milliseconds: 0 },
|
||||
'1s': { interval: '1s', milliseconds: 1000 },
|
||||
'5s': { interval: '5s', milliseconds: 5000 },
|
||||
'10s': { interval: '10s', milliseconds: 10000 },
|
||||
'30s': { interval: '30s', milliseconds: 30000 },
|
||||
'1m': { interval: '1m', milliseconds: 60000 },
|
||||
'5m': { interval: '5m', milliseconds: 300000 },
|
||||
'15m': { interval: '15m', milliseconds: 900000 },
|
||||
'30m': { interval: '30m', milliseconds: 1800000 },
|
||||
'1h': { interval: '1h', milliseconds: 3600000 },
|
||||
'4h': { interval: '4h', milliseconds: 14400000 },
|
||||
'1d': { interval: '1d', milliseconds: 86400000 },
|
||||
};
|
||||
|
||||
private dataCache: Map<string, MarketData[]> = new Map();
|
||||
private aggregatedCache: Map<string, Map<string, Bar[]>> = new Map();
|
||||
private corporateActions: Map<string, CorporateAction[]> = new Map();
|
||||
private dataQualityIssues: DataQualityIssue[] = [];
|
||||
|
||||
constructor(private storageService: StorageService) {
|
||||
super();
|
||||
}
|
||||
|
||||
async loadHistoricalData(
|
||||
symbols: string[],
|
||||
startDate: Date,
|
||||
endDate: Date,
|
||||
resolution: string = '1m',
|
||||
includeExtendedHours: boolean = false
|
||||
): Promise<Map<string, MarketData[]>> {
|
||||
const result = new Map<string, MarketData[]>();
|
||||
|
||||
for (const symbol of symbols) {
|
||||
try {
|
||||
// Load raw data
|
||||
const data = await this.storageService.getHistoricalBars(
|
||||
symbol,
|
||||
startDate,
|
||||
endDate,
|
||||
resolution
|
||||
);
|
||||
|
||||
// Apply corporate actions
|
||||
const adjustedData = await this.applyCorporateActions(symbol, data, startDate, endDate);
|
||||
|
||||
// Quality checks
|
||||
const cleanedData = this.performQualityChecks(symbol, adjustedData);
|
||||
|
||||
// Convert to MarketData format
|
||||
const marketData = this.convertToMarketData(symbol, cleanedData);
|
||||
|
||||
result.set(symbol, marketData);
|
||||
this.dataCache.set(`${symbol}:${resolution}`, marketData);
|
||||
|
||||
logger.info(`Loaded ${marketData.length} bars for ${symbol} at ${resolution} resolution`);
|
||||
} catch (error) {
|
||||
logger.error(`Failed to load data for ${symbol}:`, error);
|
||||
this.emit('dataError', { symbol, error });
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
async applyCorporateActions(
|
||||
symbol: string,
|
||||
data: any[],
|
||||
startDate: Date,
|
||||
endDate: Date
|
||||
): Promise<any[]> {
|
||||
// Load corporate actions for the period
|
||||
const actions = await this.loadCorporateActions(symbol, startDate, endDate);
|
||||
if (actions.length === 0) return data;
|
||||
|
||||
// Sort actions by date (newest first)
|
||||
actions.sort((a, b) => b.date.getTime() - a.date.getTime());
|
||||
|
||||
// Apply adjustments
|
||||
return data.map(bar => {
|
||||
const barDate = new Date(bar.timestamp);
|
||||
let adjustedBar = { ...bar };
|
||||
|
||||
for (const action of actions) {
|
||||
if (barDate < action.date) {
|
||||
switch (action.type) {
|
||||
case 'split':
|
||||
if (action.factor) {
|
||||
adjustedBar.open /= action.factor;
|
||||
adjustedBar.high /= action.factor;
|
||||
adjustedBar.low /= action.factor;
|
||||
adjustedBar.close /= action.factor;
|
||||
adjustedBar.volume *= action.factor;
|
||||
}
|
||||
break;
|
||||
|
||||
case 'dividend':
|
||||
if (action.amount) {
|
||||
// Adjust for dividends (simplified)
|
||||
const adjustment = 1 - (action.amount / adjustedBar.close);
|
||||
adjustedBar.open *= adjustment;
|
||||
adjustedBar.high *= adjustment;
|
||||
adjustedBar.low *= adjustment;
|
||||
adjustedBar.close *= adjustment;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return adjustedBar;
|
||||
});
|
||||
}
|
||||
|
||||
performQualityChecks(symbol: string, data: any[]): any[] {
|
||||
const cleaned: any[] = [];
|
||||
|
||||
for (let i = 0; i < data.length; i++) {
|
||||
const bar = data[i];
|
||||
const prevBar = i > 0 ? data[i - 1] : null;
|
||||
const issues: string[] = [];
|
||||
|
||||
// Check for missing data
|
||||
if (!bar.open || !bar.high || !bar.low || !bar.close || bar.volume === undefined) {
|
||||
issues.push('Missing OHLCV data');
|
||||
}
|
||||
|
||||
// Check for invalid prices
|
||||
if (bar.low > bar.high) {
|
||||
issues.push('Low > High');
|
||||
}
|
||||
if (bar.open > bar.high || bar.open < bar.low) {
|
||||
issues.push('Open outside High/Low range');
|
||||
}
|
||||
if (bar.close > bar.high || bar.close < bar.low) {
|
||||
issues.push('Close outside High/Low range');
|
||||
}
|
||||
|
||||
// Check for zero or negative prices
|
||||
if (bar.open <= 0 || bar.high <= 0 || bar.low <= 0 || bar.close <= 0) {
|
||||
issues.push('Zero or negative prices');
|
||||
}
|
||||
|
||||
// Check for extreme price movements (>20% in one bar)
|
||||
if (prevBar) {
|
||||
const priceChange = Math.abs((bar.close - prevBar.close) / prevBar.close);
|
||||
if (priceChange > 0.2) {
|
||||
issues.push(`Extreme price movement: ${(priceChange * 100).toFixed(1)}%`);
|
||||
}
|
||||
}
|
||||
|
||||
// Check for volume spikes (>10x average)
|
||||
if (i >= 20) {
|
||||
const avgVolume = data.slice(i - 20, i)
|
||||
.reduce((sum, b) => sum + b.volume, 0) / 20;
|
||||
if (bar.volume > avgVolume * 10) {
|
||||
issues.push('Volume spike detected');
|
||||
}
|
||||
}
|
||||
|
||||
// Handle issues
|
||||
if (issues.length > 0) {
|
||||
const severity = issues.some(issue =>
|
||||
issue.includes('Missing') || issue.includes('Zero')
|
||||
) ? 'error' : 'warning';
|
||||
|
||||
this.dataQualityIssues.push({
|
||||
timestamp: new Date(bar.timestamp),
|
||||
symbol,
|
||||
issue: issues.join(', '),
|
||||
severity,
|
||||
details: bar
|
||||
});
|
||||
|
||||
// For errors, try to interpolate or skip
|
||||
if (severity === 'error') {
|
||||
if (prevBar && i < data.length - 1) {
|
||||
// Interpolate from surrounding bars
|
||||
const nextBar = data[i + 1];
|
||||
cleaned.push({
|
||||
...bar,
|
||||
open: (prevBar.close + nextBar.open) / 2,
|
||||
high: Math.max(prevBar.high, nextBar.high) * 0.9,
|
||||
low: Math.min(prevBar.low, nextBar.low) * 1.1,
|
||||
close: (prevBar.close + nextBar.close) / 2,
|
||||
volume: (prevBar.volume + nextBar.volume) / 2,
|
||||
interpolated: true
|
||||
});
|
||||
}
|
||||
// Skip if we can't interpolate
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
cleaned.push(bar);
|
||||
}
|
||||
|
||||
return cleaned;
|
||||
}
|
||||
|
||||
aggregateData(
|
||||
data: MarketData[],
|
||||
fromResolution: string,
|
||||
toResolution: string
|
||||
): Bar[] {
|
||||
const fromMs = DataManager.RESOLUTIONS[fromResolution]?.milliseconds;
|
||||
const toMs = DataManager.RESOLUTIONS[toResolution]?.milliseconds;
|
||||
|
||||
if (!fromMs || !toMs || fromMs >= toMs) {
|
||||
throw new Error(`Cannot aggregate from ${fromResolution} to ${toResolution}`);
|
||||
}
|
||||
|
||||
const bars: Bar[] = [];
|
||||
let currentBar: Partial<Bar> | null = null;
|
||||
let barStartTime = 0;
|
||||
|
||||
for (const item of data) {
|
||||
if (item.type !== 'bar') continue;
|
||||
|
||||
const bar = item.data;
|
||||
const timestamp = bar.timestamp;
|
||||
const alignedTime = Math.floor(timestamp / toMs) * toMs;
|
||||
|
||||
if (!currentBar || alignedTime > barStartTime) {
|
||||
// Finalize previous bar
|
||||
if (currentBar && currentBar.open !== undefined) {
|
||||
bars.push(currentBar as Bar);
|
||||
}
|
||||
|
||||
// Start new bar
|
||||
currentBar = {
|
||||
timestamp: alignedTime,
|
||||
open: bar.open,
|
||||
high: bar.high,
|
||||
low: bar.low,
|
||||
close: bar.close,
|
||||
volume: bar.volume,
|
||||
vwap: bar.vwap
|
||||
};
|
||||
barStartTime = alignedTime;
|
||||
} else {
|
||||
// Update current bar
|
||||
currentBar.high = Math.max(currentBar.high!, bar.high);
|
||||
currentBar.low = Math.min(currentBar.low!, bar.low);
|
||||
currentBar.close = bar.close;
|
||||
currentBar.volume! += bar.volume;
|
||||
|
||||
// Recalculate VWAP if available
|
||||
if (bar.vwap && currentBar.vwap) {
|
||||
const totalValue = (currentBar.vwap * (currentBar.volume! - bar.volume)) +
|
||||
(bar.vwap * bar.volume);
|
||||
currentBar.vwap = totalValue / currentBar.volume!;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Add final bar
|
||||
if (currentBar && currentBar.open !== undefined) {
|
||||
bars.push(currentBar as Bar);
|
||||
}
|
||||
|
||||
return bars;
|
||||
}
|
||||
|
||||
downsampleData(
|
||||
data: MarketData[],
|
||||
targetPoints: number
|
||||
): MarketData[] {
|
||||
if (data.length <= targetPoints) return data;
|
||||
|
||||
// Use LTTB (Largest Triangle Three Buckets) algorithm
|
||||
const downsampled: MarketData[] = [];
|
||||
const bucketSize = (data.length - 2) / (targetPoints - 2);
|
||||
|
||||
// Always include first point
|
||||
downsampled.push(data[0]);
|
||||
|
||||
for (let i = 0; i < targetPoints - 2; i++) {
|
||||
const bucketStart = Math.floor((i) * bucketSize) + 1;
|
||||
const bucketEnd = Math.floor((i + 1) * bucketSize) + 1;
|
||||
|
||||
// Find point with maximum area in bucket
|
||||
let maxArea = -1;
|
||||
let maxAreaPoint = 0;
|
||||
|
||||
const prevPoint = downsampled[downsampled.length - 1];
|
||||
const prevTime = prevPoint.data.timestamp;
|
||||
const prevPrice = this.getPrice(prevPoint);
|
||||
|
||||
// Calculate average of next bucket for area calculation
|
||||
let nextBucketStart = Math.floor((i + 1) * bucketSize) + 1;
|
||||
let nextBucketEnd = Math.floor((i + 2) * bucketSize) + 1;
|
||||
if (nextBucketEnd >= data.length) {
|
||||
nextBucketEnd = data.length - 1;
|
||||
}
|
||||
|
||||
let avgTime = 0;
|
||||
let avgPrice = 0;
|
||||
for (let j = nextBucketStart; j < nextBucketEnd; j++) {
|
||||
avgTime += data[j].data.timestamp;
|
||||
avgPrice += this.getPrice(data[j]);
|
||||
}
|
||||
avgTime /= (nextBucketEnd - nextBucketStart);
|
||||
avgPrice /= (nextBucketEnd - nextBucketStart);
|
||||
|
||||
// Find point with max area
|
||||
for (let j = bucketStart; j < bucketEnd && j < data.length; j++) {
|
||||
const time = data[j].data.timestamp;
|
||||
const price = this.getPrice(data[j]);
|
||||
|
||||
// Calculate triangle area
|
||||
const area = Math.abs(
|
||||
(prevTime - avgTime) * (price - prevPrice) -
|
||||
(prevTime - time) * (avgPrice - prevPrice)
|
||||
);
|
||||
|
||||
if (area > maxArea) {
|
||||
maxArea = area;
|
||||
maxAreaPoint = j;
|
||||
}
|
||||
}
|
||||
|
||||
downsampled.push(data[maxAreaPoint]);
|
||||
}
|
||||
|
||||
// Always include last point
|
||||
downsampled.push(data[data.length - 1]);
|
||||
|
||||
return downsampled;
|
||||
}
|
||||
|
||||
private getPrice(data: MarketData): number {
|
||||
switch (data.type) {
|
||||
case 'bar':
|
||||
return data.data.close;
|
||||
case 'trade':
|
||||
return data.data.price;
|
||||
case 'quote':
|
||||
return (data.data.bid + data.data.ask) / 2;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
private convertToMarketData(symbol: string, bars: any[]): MarketData[] {
|
||||
return bars.map(bar => ({
|
||||
type: 'bar' as const,
|
||||
data: {
|
||||
symbol,
|
||||
open: bar.open,
|
||||
high: bar.high,
|
||||
low: bar.low,
|
||||
close: bar.close,
|
||||
volume: bar.volume,
|
||||
vwap: bar.vwap,
|
||||
timestamp: new Date(bar.timestamp).getTime(),
|
||||
interpolated: bar.interpolated
|
||||
}
|
||||
}));
|
||||
}
|
||||
|
||||
private async loadCorporateActions(
|
||||
symbol: string,
|
||||
startDate: Date,
|
||||
endDate: Date
|
||||
): Promise<CorporateAction[]> {
|
||||
// Check cache first
|
||||
const cached = this.corporateActions.get(symbol);
|
||||
if (cached) {
|
||||
return cached.filter(action =>
|
||||
action.date >= startDate && action.date <= endDate
|
||||
);
|
||||
}
|
||||
|
||||
// In real implementation, load from database
|
||||
// For now, return empty array
|
||||
return [];
|
||||
}
|
||||
|
||||
getDataQualityReport(): {
|
||||
totalIssues: number;
|
||||
bySymbol: Record<string, number>;
|
||||
bySeverity: Record<string, number>;
|
||||
issues: DataQualityIssue[];
|
||||
} {
|
||||
const bySymbol: Record<string, number> = {};
|
||||
const bySeverity: Record<string, number> = { warning: 0, error: 0 };
|
||||
|
||||
for (const issue of this.dataQualityIssues) {
|
||||
bySymbol[issue.symbol] = (bySymbol[issue.symbol] || 0) + 1;
|
||||
bySeverity[issue.severity]++;
|
||||
}
|
||||
|
||||
return {
|
||||
totalIssues: this.dataQualityIssues.length,
|
||||
bySymbol,
|
||||
bySeverity,
|
||||
issues: this.dataQualityIssues
|
||||
};
|
||||
}
|
||||
|
||||
clearCache(): void {
|
||||
this.dataCache.clear();
|
||||
this.aggregatedCache.clear();
|
||||
this.dataQualityIssues = [];
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue