more work

This commit is contained in:
Boki 2025-07-09 23:49:08 -04:00
parent 7a99d08d04
commit b87a931a2b
11 changed files with 595 additions and 183 deletions

View file

@ -1,5 +1,6 @@
import type { BaseHandler } from '@stock-bot/handlers';
import type { DataIngestionServices } from '../../../types';
import type { EodHandler } from '../eod.handler';
import { EOD_CONFIG } from '../shared';
import { getEodExchangeSuffix } from '../shared/utils';
@ -36,7 +37,7 @@ const MAX_DAYS_PER_INTERVAL = {
};
export async function scheduleIntradayCrawl(
this: BaseHandler<DataIngestionServices>
this: EodHandler
): Promise<{ success: boolean; jobsScheduled: number }> {
const logger = this.logger;
@ -46,61 +47,74 @@ export async function scheduleIntradayCrawl(
// Get Canadian exchanges for now
const canadianExchanges = ['TO', 'V', 'CN', 'NEO'];
// Find active symbols that need intraday data
const symbols = await this.mongodb.collection('eodSymbols').find({
Exchange: { $in: canadianExchanges },
delisted: false,
// Only symbols without complete intraday data
$or: [
{ 'intradayState.1m.finished': { $ne: true } },
{ 'intradayState.5m.finished': { $ne: true } },
{ 'intradayState.1h.finished': { $ne: true } },
{ 'intradayState': { $exists: false } }
]
}).limit(100).toArray(); // Limit to avoid too many jobs at once
// Use OperationTracker to find symbols needing intraday crawl
const intervals: Array<'1m' | '5m' | '1h'> = ['1m', '5m', '1h'];
const operationNames = ['intraday_1m', 'intraday_5m', 'intraday_1h'];
if (!symbols || symbols.length === 0) {
let allSymbolsForCrawl: any[] = [];
// Get symbols needing crawl for each interval
for (let i = 0; i < intervals.length; i++) {
const interval = intervals[i];
const operationName = operationNames[i];
const allSymbolsForInterval = await this.operationRegistry.getSymbolsForIntradayCrawl('eod', operationName, {
limit: 500 // Get more symbols to filter from
});
// Filter for Canadian exchanges and non-delisted symbols
const symbolsForInterval = allSymbolsForInterval.filter(item =>
canadianExchanges.includes(item.symbol.Exchange) &&
item.symbol.delisted === false
).slice(0, 100);
// Add interval info to each symbol
symbolsForInterval.forEach(item => {
allSymbolsForCrawl.push({
symbol: item.symbol,
interval: interval,
operationName: operationName,
crawlState: item.crawlState
});
});
}
if (!allSymbolsForCrawl || allSymbolsForCrawl.length === 0) {
logger.info('No symbols need intraday crawl');
return { success: true, jobsScheduled: 0 };
}
logger.info(`Found ${symbols.length} symbols needing intraday data`, {
count: symbols.length,
samples: symbols.slice(0, 5).map(s => ({
symbol: s.Code,
exchange: s.Exchange,
name: s.Name,
intradayState: s.intradayState
logger.info(`Found ${allSymbolsForCrawl.length} symbol/interval combinations needing intraday data`, {
count: allSymbolsForCrawl.length,
samples: allSymbolsForCrawl.slice(0, 5).map(s => ({
symbol: s.symbol.Code,
exchange: s.symbol.Exchange,
name: s.symbol.Name,
interval: s.interval,
crawlState: s.crawlState
}))
});
let jobsScheduled = 0;
const intervals: Array<'1m' | '5m' | '1h'> = ['1m', '5m', '1h'];
// Schedule crawl jobs for each symbol and interval
for (const symbol of symbols) {
for (const interval of intervals) {
// Check if this interval is already finished
const isFinished = symbol.intradayState?.[interval]?.finished;
if (isFinished) {
continue;
}
await this.scheduleOperation('crawl-intraday', {
symbol: symbol.Code,
exchange: symbol.eodExchange || symbol.Exchange, // Use eodExchange if available
interval,
country: symbol.Country
}, {
attempts: 3,
backoff: {
type: 'exponential',
delay: 10000
},
delay: jobsScheduled * 500 // Stagger jobs by 500ms
});
jobsScheduled++;
}
// Schedule crawl jobs for each symbol/interval combination
for (const item of allSymbolsForCrawl) {
const { symbol, interval } = item;
await this.scheduleOperation('crawl-intraday', {
symbol: symbol.Code,
exchange: symbol.eodExchange || symbol.Exchange, // Use eodExchange if available
interval,
country: symbol.Country
}, {
attempts: 3,
backoff: {
type: 'exponential',
delay: 10000
},
delay: jobsScheduled * 500 // Stagger jobs by 500ms
});
jobsScheduled++;
}
logger.info(`Successfully scheduled ${jobsScheduled} intraday crawl jobs`);
@ -116,7 +130,7 @@ export async function scheduleIntradayCrawl(
}
export async function crawlIntraday(
this: BaseHandler<DataIngestionServices>,
this: EodHandler,
input: CrawlIntradayInput
): Promise<{ success: boolean; recordsProcessed: number; finished: boolean }> {
const logger = this.logger;
@ -240,7 +254,7 @@ export async function crawlIntraday(
}
export async function fetchIntraday(
this: BaseHandler<DataIngestionServices>,
this: EodHandler,
input: FetchIntradayInput
): Promise<{ success: boolean; recordsSaved: number }> {
const logger = this.logger;