This commit is contained in:
Boki 2026-03-21 13:06:46 -04:00
parent 4705550359
commit deeb934526
5 changed files with 444 additions and 32 deletions

View file

@ -12,6 +12,9 @@ interface FetchIntradayInput {
interface CrawlIntradayInput {
eodSearchCode: string;
interval: '1m' | '5m' | '1h';
fromDate?: Date;
toDate?: Date;
isInitial?: boolean; // To distinguish initial vs continuation jobs
}
interface ScheduleIntradayConfig {
@ -28,7 +31,7 @@ const MAX_DAYS_PER_INTERVAL = {
};
// Default exchanges to process for intraday data
const DEFAULT_INTRADAY_EXCHANGES = ['US', 'TO', 'V', 'CN', 'NEO'];
// const DEFAULT_INTRADAY_EXCHANGES = [],//['US', 'TO', 'V', 'CN', 'NEO', 'CC'];
export async function scheduleIntradayCrawl(
this: EodHandler,
@ -39,7 +42,7 @@ export async function scheduleIntradayCrawl(
try {
logger.info('Scheduling intraday crawl jobs', {
config: {
exchanges: config?.exchanges || DEFAULT_INTRADAY_EXCHANGES,
exchanges: config?.exchanges, //|| DEFAULT_INTRADAY_EXCHANGES,
symbolTypes: config?.symbolTypes || 'all',
limit: config?.limit || 'unlimited'
}
@ -71,13 +74,12 @@ export async function scheduleIntradayCrawl(
logger.debug(`Getting stale symbols for ${operationName}...`);
// Get symbols with all filters applied at the database level
const targetExchanges = config?.exchanges || DEFAULT_INTRADAY_EXCHANGES;
const desiredLimit = config?.limit || 5000;
const targetExchanges = config?.exchanges //|| DEFAULT_INTRADAY_EXCHANGES;
const desiredLimit = config?.limit || 1000000;
const staleSymbols = await this.operationRegistry.getStaleSymbols('eod', operationName, {
limit: desiredLimit,
exchanges: targetExchanges,
delisted: false
});
logger.debug(`getStaleSymbols returned ${staleSymbols.length} symbols for ${operationName}`);
@ -98,21 +100,42 @@ export async function scheduleIntradayCrawl(
// 2. Are not finished (!operationStatus.finished)
// 3. Are finished but need new data (newest date > 1 day old)
const needsNewData = operationStatus?.finished && operationStatus?.newestDateReached &&
new Date(operationStatus.newestDateReached) < new Date(Date.now() - 24 * 60 * 60 * 1000);
new Date(operationStatus.newestDateReached) < new Date(Date.now() - 30* 24 * 60 * 60 * 1000);
if (!operationStatus || !operationStatus.finished || needsNewData) {
// Calculate initial date range for the job
const maxDays = MAX_DAYS_PER_INTERVAL[interval];
let toDate = new Date();
let fromDate = new Date();
let isInitial = true;
if (operationStatus?.lastProcessedDate) {
// Continue from where we left off
toDate = new Date(operationStatus.lastProcessedDate);
isInitial = false;
}
// Calculate from date (going backwards)
fromDate = new Date(toDate);
fromDate.setDate(fromDate.getDate() - maxDays);
allSymbolsForCrawl.push({
symbol: symbol,
interval: interval,
operationName: operationName,
lastRun: staleSymbol.lastRun,
lastSuccess: staleSymbol.lastSuccess
lastSuccess: staleSymbol.lastSuccess,
fromDate: fromDate,
toDate: toDate,
isInitial: isInitial
});
logger.debug(`Added ${symbol.Code}.${symbol.Exchange} for ${interval} crawl`, {
hasOperation: !!operationStatus,
finished: operationStatus?.finished,
needsNewData
needsNewData,
dateRange: `${fromDate.toISOString().split('T')[0]} to ${toDate.toISOString().split('T')[0]}`,
isInitial
});
}
}
@ -136,6 +159,8 @@ export async function scheduleIntradayCrawl(
name: s.symbol.Name,
eodSearchCode: s.symbol.eodSearchCode,
interval: s.interval,
dateRange: `${s.fromDate.toISOString().split('T')[0]} to ${s.toDate.toISOString().split('T')[0]}`,
isInitial: s.isInitial,
lastRun: s.lastRun ? new Date(s.lastRun).toISOString() : 'never',
lastSuccess: s.lastSuccess ? new Date(s.lastSuccess).toISOString() : 'never'
}))
@ -145,16 +170,22 @@ export async function scheduleIntradayCrawl(
// Schedule crawl jobs for each symbol/interval combination
for (const item of allSymbolsForCrawl) {
const { symbol, interval } = item;
const { symbol, interval, fromDate, toDate, isInitial } = item;
// Create jobId based on whether it's initial or continuation
const dateStr = isInitial ? 'initial' : `${fromDate.toISOString().split('T')[0]}-${toDate.toISOString().split('T')[0]}`;
const jobId = `crawl-intraday-${symbol.eodSearchCode}-${interval}-${dateStr}`;
const jobId = `crawl-intraday-${symbol.eodSearchCode}-${interval}`;
try {
await this.scheduleOperation('crawl-intraday', {
eodSearchCode: symbol.eodSearchCode,
interval
interval,
fromDate,
toDate,
isInitial
}, {
jobId,
priority: 5, // Initial crawl jobs get priority 5 (lower priority)
priority: isInitial ? 5 : 3, // Initial crawl jobs get lower priority
attempts: 3,
backoff: {
type: 'exponential',
@ -163,6 +194,15 @@ export async function scheduleIntradayCrawl(
delay: jobsScheduled * 500 // Stagger jobs by 500ms
});
jobsScheduled++;
logger.debug(`Scheduled crawl job`, {
jobId,
symbol: symbol.Code,
exchange: symbol.Exchange,
interval,
dateRange: `${fromDate.toISOString().split('T')[0]} to ${toDate.toISOString().split('T')[0]}`,
isInitial
});
} catch (error: any) {
if (error?.message?.includes('Job already exists')) {
logger.debug(`Job already exists: ${jobId}`);
@ -189,7 +229,7 @@ export async function crawlIntraday(
input: CrawlIntradayInput
): Promise<{ success: boolean; recordsProcessed: number; finished: boolean }> {
const logger = this.logger;
const { eodSearchCode, interval } = input;
const { eodSearchCode, interval, fromDate: providedFromDate, toDate: providedToDate, isInitial } = input;
try {
// Lookup symbol using eodSearchCode
@ -234,19 +274,39 @@ export async function crawlIntraday(
// Determine date range for this batch
const maxDays = MAX_DAYS_PER_INTERVAL[interval];
let toDate = new Date();
let fromDate = new Date();
let toDate: Date;
let fromDate: Date;
if (operationStatus.lastProcessedDate) {
// Continue from where we left off - the last processed date becomes the new toDate
toDate = new Date(operationStatus.lastProcessedDate);
// No need to subtract a day - lastProcessedDate is the fromDate of the last batch
// Use provided dates if available (from scheduled job)
if (providedFromDate && providedToDate) {
fromDate = new Date(providedFromDate);
toDate = new Date(providedToDate);
logger.info(`Using provided date range for ${symbol}.${exchange} - ${interval}`, {
fromDate: fromDate.toISOString(),
toDate: toDate.toISOString(),
isInitial
});
} else {
// Fallback to original logic (for backward compatibility)
toDate = new Date();
fromDate = new Date();
if (operationStatus.lastProcessedDate) {
// Continue from where we left off - the last processed date becomes the new toDate
toDate = new Date(operationStatus.lastProcessedDate);
}
// Calculate from date (going backwards)
fromDate = new Date(toDate);
fromDate.setDate(fromDate.getDate() - maxDays);
logger.info(`Calculated date range for ${symbol}.${exchange} - ${interval}`, {
fromDate: fromDate.toISOString(),
toDate: toDate.toISOString(),
basedOn: operationStatus.lastProcessedDate ? 'lastProcessedDate' : 'current date'
});
}
// Calculate from date (going backwards)
fromDate = new Date(toDate);
fromDate.setDate(fromDate.getDate() - maxDays);
logger.info(`Fetching intraday batch for ${symbol}.${exchange} - ${interval} from ${fromDate.toISOString().split('T')[0]} to ${toDate.toISOString().split('T')[0]}`, {
symbol,
exchange,
@ -334,11 +394,22 @@ export async function crawlIntraday(
// If not finished, schedule next batch
if (!updateData.finished) {
// Calculate next batch date range
const nextToDate = fromDate; // Next batch's toDate is current batch's fromDate
const nextFromDate = new Date(nextToDate);
nextFromDate.setDate(nextFromDate.getDate() - maxDays);
const dateStr = `${nextFromDate.toISOString().split('T')[0]}-${nextToDate.toISOString().split('T')[0]}`;
const jobId = `crawl-intraday-${eodSearchCode}-${interval}-${dateStr}`;
await this.scheduleOperation('crawl-intraday', {
eodSearchCode,
interval
interval,
fromDate: nextFromDate,
toDate: nextToDate,
isInitial: false
}, {
jobId: `crawl-intraday-${eodSearchCode}-${interval}`,
jobId,
priority: 3, // Continuation jobs get higher priority (3) than initial jobs (5)
attempts: 3,
backoff: {
@ -352,9 +423,16 @@ export async function crawlIntraday(
symbol,
exchange,
interval,
currentBatchFrom: fromDate.toISOString(),
currentBatchTo: toDate.toISOString(),
recordsSaved: result.recordsSaved,
jobId,
currentBatch: {
from: fromDate.toISOString().split('T')[0],
to: toDate.toISOString().split('T')[0],
recordsSaved: result.recordsSaved
},
nextBatch: {
from: nextFromDate.toISOString().split('T')[0],
to: nextToDate.toISOString().split('T')[0]
},
totalDaysProcessed: updateData.totalDaysProcessed
});
}

View file

@ -38,21 +38,21 @@ export const EOD_OPERATIONS: OperationConfig[] = [
type: 'intraday_crawl',
description: 'Crawl 1-minute intraday data',
requiresFinishedFlag: true,
defaultStaleHours: 24 // Daily check for new data
defaultStaleHours: 50000 // Daily check for new data
},
{
name: 'intraday_5m',
type: 'intraday_crawl',
description: 'Crawl 5-minute intraday data',
requiresFinishedFlag: true,
defaultStaleHours: 24 // Daily check for new data
defaultStaleHours: 50000 // Daily check for new data
},
{
name: 'intraday_1h',
type: 'intraday_crawl',
description: 'Crawl 1-hour intraday data',
requiresFinishedFlag: true,
defaultStaleHours: 24 // Daily check for new data
defaultStaleHours: 50000 // Daily check for new data
},
// Fundamental data