added initial py analytics / rust core / ts orchestrator services

This commit is contained in:
Boki 2025-07-01 11:16:25 -04:00
parent 680b5fd2ae
commit c862ed496b
62 changed files with 13459 additions and 0 deletions

View file

@ -0,0 +1,435 @@
import { logger } from '@stock-bot/logger';
import { StorageService } from '../services/StorageService';
import { MarketData, Bar } from '../types';
import { EventEmitter } from 'events';
export interface DataResolution {
interval: string;
milliseconds: number;
}
export interface CorporateAction {
symbol: string;
date: Date;
type: 'split' | 'dividend' | 'spinoff';
factor?: number;
amount?: number;
newSymbol?: string;
}
export interface DataQualityIssue {
timestamp: Date;
symbol: string;
issue: string;
severity: 'warning' | 'error';
details?: any;
}
export class DataManager extends EventEmitter {
private static RESOLUTIONS: Record<string, DataResolution> = {
'tick': { interval: 'tick', milliseconds: 0 },
'1s': { interval: '1s', milliseconds: 1000 },
'5s': { interval: '5s', milliseconds: 5000 },
'10s': { interval: '10s', milliseconds: 10000 },
'30s': { interval: '30s', milliseconds: 30000 },
'1m': { interval: '1m', milliseconds: 60000 },
'5m': { interval: '5m', milliseconds: 300000 },
'15m': { interval: '15m', milliseconds: 900000 },
'30m': { interval: '30m', milliseconds: 1800000 },
'1h': { interval: '1h', milliseconds: 3600000 },
'4h': { interval: '4h', milliseconds: 14400000 },
'1d': { interval: '1d', milliseconds: 86400000 },
};
private dataCache: Map<string, MarketData[]> = new Map();
private aggregatedCache: Map<string, Map<string, Bar[]>> = new Map();
private corporateActions: Map<string, CorporateAction[]> = new Map();
private dataQualityIssues: DataQualityIssue[] = [];
constructor(private storageService: StorageService) {
super();
}
async loadHistoricalData(
symbols: string[],
startDate: Date,
endDate: Date,
resolution: string = '1m',
includeExtendedHours: boolean = false
): Promise<Map<string, MarketData[]>> {
const result = new Map<string, MarketData[]>();
for (const symbol of symbols) {
try {
// Load raw data
const data = await this.storageService.getHistoricalBars(
symbol,
startDate,
endDate,
resolution
);
// Apply corporate actions
const adjustedData = await this.applyCorporateActions(symbol, data, startDate, endDate);
// Quality checks
const cleanedData = this.performQualityChecks(symbol, adjustedData);
// Convert to MarketData format
const marketData = this.convertToMarketData(symbol, cleanedData);
result.set(symbol, marketData);
this.dataCache.set(`${symbol}:${resolution}`, marketData);
logger.info(`Loaded ${marketData.length} bars for ${symbol} at ${resolution} resolution`);
} catch (error) {
logger.error(`Failed to load data for ${symbol}:`, error);
this.emit('dataError', { symbol, error });
}
}
return result;
}
async applyCorporateActions(
symbol: string,
data: any[],
startDate: Date,
endDate: Date
): Promise<any[]> {
// Load corporate actions for the period
const actions = await this.loadCorporateActions(symbol, startDate, endDate);
if (actions.length === 0) return data;
// Sort actions by date (newest first)
actions.sort((a, b) => b.date.getTime() - a.date.getTime());
// Apply adjustments
return data.map(bar => {
const barDate = new Date(bar.timestamp);
let adjustedBar = { ...bar };
for (const action of actions) {
if (barDate < action.date) {
switch (action.type) {
case 'split':
if (action.factor) {
adjustedBar.open /= action.factor;
adjustedBar.high /= action.factor;
adjustedBar.low /= action.factor;
adjustedBar.close /= action.factor;
adjustedBar.volume *= action.factor;
}
break;
case 'dividend':
if (action.amount) {
// Adjust for dividends (simplified)
const adjustment = 1 - (action.amount / adjustedBar.close);
adjustedBar.open *= adjustment;
adjustedBar.high *= adjustment;
adjustedBar.low *= adjustment;
adjustedBar.close *= adjustment;
}
break;
}
}
}
return adjustedBar;
});
}
performQualityChecks(symbol: string, data: any[]): any[] {
const cleaned: any[] = [];
for (let i = 0; i < data.length; i++) {
const bar = data[i];
const prevBar = i > 0 ? data[i - 1] : null;
const issues: string[] = [];
// Check for missing data
if (!bar.open || !bar.high || !bar.low || !bar.close || bar.volume === undefined) {
issues.push('Missing OHLCV data');
}
// Check for invalid prices
if (bar.low > bar.high) {
issues.push('Low > High');
}
if (bar.open > bar.high || bar.open < bar.low) {
issues.push('Open outside High/Low range');
}
if (bar.close > bar.high || bar.close < bar.low) {
issues.push('Close outside High/Low range');
}
// Check for zero or negative prices
if (bar.open <= 0 || bar.high <= 0 || bar.low <= 0 || bar.close <= 0) {
issues.push('Zero or negative prices');
}
// Check for extreme price movements (>20% in one bar)
if (prevBar) {
const priceChange = Math.abs((bar.close - prevBar.close) / prevBar.close);
if (priceChange > 0.2) {
issues.push(`Extreme price movement: ${(priceChange * 100).toFixed(1)}%`);
}
}
// Check for volume spikes (>10x average)
if (i >= 20) {
const avgVolume = data.slice(i - 20, i)
.reduce((sum, b) => sum + b.volume, 0) / 20;
if (bar.volume > avgVolume * 10) {
issues.push('Volume spike detected');
}
}
// Handle issues
if (issues.length > 0) {
const severity = issues.some(issue =>
issue.includes('Missing') || issue.includes('Zero')
) ? 'error' : 'warning';
this.dataQualityIssues.push({
timestamp: new Date(bar.timestamp),
symbol,
issue: issues.join(', '),
severity,
details: bar
});
// For errors, try to interpolate or skip
if (severity === 'error') {
if (prevBar && i < data.length - 1) {
// Interpolate from surrounding bars
const nextBar = data[i + 1];
cleaned.push({
...bar,
open: (prevBar.close + nextBar.open) / 2,
high: Math.max(prevBar.high, nextBar.high) * 0.9,
low: Math.min(prevBar.low, nextBar.low) * 1.1,
close: (prevBar.close + nextBar.close) / 2,
volume: (prevBar.volume + nextBar.volume) / 2,
interpolated: true
});
}
// Skip if we can't interpolate
continue;
}
}
cleaned.push(bar);
}
return cleaned;
}
aggregateData(
data: MarketData[],
fromResolution: string,
toResolution: string
): Bar[] {
const fromMs = DataManager.RESOLUTIONS[fromResolution]?.milliseconds;
const toMs = DataManager.RESOLUTIONS[toResolution]?.milliseconds;
if (!fromMs || !toMs || fromMs >= toMs) {
throw new Error(`Cannot aggregate from ${fromResolution} to ${toResolution}`);
}
const bars: Bar[] = [];
let currentBar: Partial<Bar> | null = null;
let barStartTime = 0;
for (const item of data) {
if (item.type !== 'bar') continue;
const bar = item.data;
const timestamp = bar.timestamp;
const alignedTime = Math.floor(timestamp / toMs) * toMs;
if (!currentBar || alignedTime > barStartTime) {
// Finalize previous bar
if (currentBar && currentBar.open !== undefined) {
bars.push(currentBar as Bar);
}
// Start new bar
currentBar = {
timestamp: alignedTime,
open: bar.open,
high: bar.high,
low: bar.low,
close: bar.close,
volume: bar.volume,
vwap: bar.vwap
};
barStartTime = alignedTime;
} else {
// Update current bar
currentBar.high = Math.max(currentBar.high!, bar.high);
currentBar.low = Math.min(currentBar.low!, bar.low);
currentBar.close = bar.close;
currentBar.volume! += bar.volume;
// Recalculate VWAP if available
if (bar.vwap && currentBar.vwap) {
const totalValue = (currentBar.vwap * (currentBar.volume! - bar.volume)) +
(bar.vwap * bar.volume);
currentBar.vwap = totalValue / currentBar.volume!;
}
}
}
// Add final bar
if (currentBar && currentBar.open !== undefined) {
bars.push(currentBar as Bar);
}
return bars;
}
downsampleData(
data: MarketData[],
targetPoints: number
): MarketData[] {
if (data.length <= targetPoints) return data;
// Use LTTB (Largest Triangle Three Buckets) algorithm
const downsampled: MarketData[] = [];
const bucketSize = (data.length - 2) / (targetPoints - 2);
// Always include first point
downsampled.push(data[0]);
for (let i = 0; i < targetPoints - 2; i++) {
const bucketStart = Math.floor((i) * bucketSize) + 1;
const bucketEnd = Math.floor((i + 1) * bucketSize) + 1;
// Find point with maximum area in bucket
let maxArea = -1;
let maxAreaPoint = 0;
const prevPoint = downsampled[downsampled.length - 1];
const prevTime = prevPoint.data.timestamp;
const prevPrice = this.getPrice(prevPoint);
// Calculate average of next bucket for area calculation
let nextBucketStart = Math.floor((i + 1) * bucketSize) + 1;
let nextBucketEnd = Math.floor((i + 2) * bucketSize) + 1;
if (nextBucketEnd >= data.length) {
nextBucketEnd = data.length - 1;
}
let avgTime = 0;
let avgPrice = 0;
for (let j = nextBucketStart; j < nextBucketEnd; j++) {
avgTime += data[j].data.timestamp;
avgPrice += this.getPrice(data[j]);
}
avgTime /= (nextBucketEnd - nextBucketStart);
avgPrice /= (nextBucketEnd - nextBucketStart);
// Find point with max area
for (let j = bucketStart; j < bucketEnd && j < data.length; j++) {
const time = data[j].data.timestamp;
const price = this.getPrice(data[j]);
// Calculate triangle area
const area = Math.abs(
(prevTime - avgTime) * (price - prevPrice) -
(prevTime - time) * (avgPrice - prevPrice)
);
if (area > maxArea) {
maxArea = area;
maxAreaPoint = j;
}
}
downsampled.push(data[maxAreaPoint]);
}
// Always include last point
downsampled.push(data[data.length - 1]);
return downsampled;
}
private getPrice(data: MarketData): number {
switch (data.type) {
case 'bar':
return data.data.close;
case 'trade':
return data.data.price;
case 'quote':
return (data.data.bid + data.data.ask) / 2;
default:
return 0;
}
}
private convertToMarketData(symbol: string, bars: any[]): MarketData[] {
return bars.map(bar => ({
type: 'bar' as const,
data: {
symbol,
open: bar.open,
high: bar.high,
low: bar.low,
close: bar.close,
volume: bar.volume,
vwap: bar.vwap,
timestamp: new Date(bar.timestamp).getTime(),
interpolated: bar.interpolated
}
}));
}
private async loadCorporateActions(
symbol: string,
startDate: Date,
endDate: Date
): Promise<CorporateAction[]> {
// Check cache first
const cached = this.corporateActions.get(symbol);
if (cached) {
return cached.filter(action =>
action.date >= startDate && action.date <= endDate
);
}
// In real implementation, load from database
// For now, return empty array
return [];
}
getDataQualityReport(): {
totalIssues: number;
bySymbol: Record<string, number>;
bySeverity: Record<string, number>;
issues: DataQualityIssue[];
} {
const bySymbol: Record<string, number> = {};
const bySeverity: Record<string, number> = { warning: 0, error: 0 };
for (const issue of this.dataQualityIssues) {
bySymbol[issue.symbol] = (bySymbol[issue.symbol] || 0) + 1;
bySeverity[issue.severity]++;
}
return {
totalIssues: this.dataQualityIssues.length,
bySymbol,
bySeverity,
issues: this.dataQualityIssues
};
}
clearCache(): void {
this.dataCache.clear();
this.aggregatedCache.clear();
this.dataQualityIssues = [];
}
}