test
This commit is contained in:
parent
4705550359
commit
deeb934526
5 changed files with 444 additions and 32 deletions
|
|
@ -12,6 +12,9 @@ interface FetchIntradayInput {
|
|||
interface CrawlIntradayInput {
|
||||
eodSearchCode: string;
|
||||
interval: '1m' | '5m' | '1h';
|
||||
fromDate?: Date;
|
||||
toDate?: Date;
|
||||
isInitial?: boolean; // To distinguish initial vs continuation jobs
|
||||
}
|
||||
|
||||
interface ScheduleIntradayConfig {
|
||||
|
|
@ -28,7 +31,7 @@ const MAX_DAYS_PER_INTERVAL = {
|
|||
};
|
||||
|
||||
// Default exchanges to process for intraday data
|
||||
const DEFAULT_INTRADAY_EXCHANGES = ['US', 'TO', 'V', 'CN', 'NEO'];
|
||||
// const DEFAULT_INTRADAY_EXCHANGES = [],//['US', 'TO', 'V', 'CN', 'NEO', 'CC'];
|
||||
|
||||
export async function scheduleIntradayCrawl(
|
||||
this: EodHandler,
|
||||
|
|
@ -39,7 +42,7 @@ export async function scheduleIntradayCrawl(
|
|||
try {
|
||||
logger.info('Scheduling intraday crawl jobs', {
|
||||
config: {
|
||||
exchanges: config?.exchanges || DEFAULT_INTRADAY_EXCHANGES,
|
||||
exchanges: config?.exchanges, //|| DEFAULT_INTRADAY_EXCHANGES,
|
||||
symbolTypes: config?.symbolTypes || 'all',
|
||||
limit: config?.limit || 'unlimited'
|
||||
}
|
||||
|
|
@ -71,13 +74,12 @@ export async function scheduleIntradayCrawl(
|
|||
logger.debug(`Getting stale symbols for ${operationName}...`);
|
||||
|
||||
// Get symbols with all filters applied at the database level
|
||||
const targetExchanges = config?.exchanges || DEFAULT_INTRADAY_EXCHANGES;
|
||||
const desiredLimit = config?.limit || 5000;
|
||||
const targetExchanges = config?.exchanges //|| DEFAULT_INTRADAY_EXCHANGES;
|
||||
const desiredLimit = config?.limit || 1000000;
|
||||
|
||||
const staleSymbols = await this.operationRegistry.getStaleSymbols('eod', operationName, {
|
||||
limit: desiredLimit,
|
||||
exchanges: targetExchanges,
|
||||
delisted: false
|
||||
});
|
||||
|
||||
logger.debug(`getStaleSymbols returned ${staleSymbols.length} symbols for ${operationName}`);
|
||||
|
|
@ -98,21 +100,42 @@ export async function scheduleIntradayCrawl(
|
|||
// 2. Are not finished (!operationStatus.finished)
|
||||
// 3. Are finished but need new data (newest date > 1 day old)
|
||||
const needsNewData = operationStatus?.finished && operationStatus?.newestDateReached &&
|
||||
new Date(operationStatus.newestDateReached) < new Date(Date.now() - 24 * 60 * 60 * 1000);
|
||||
new Date(operationStatus.newestDateReached) < new Date(Date.now() - 30* 24 * 60 * 60 * 1000);
|
||||
|
||||
if (!operationStatus || !operationStatus.finished || needsNewData) {
|
||||
// Calculate initial date range for the job
|
||||
const maxDays = MAX_DAYS_PER_INTERVAL[interval];
|
||||
let toDate = new Date();
|
||||
let fromDate = new Date();
|
||||
let isInitial = true;
|
||||
|
||||
if (operationStatus?.lastProcessedDate) {
|
||||
// Continue from where we left off
|
||||
toDate = new Date(operationStatus.lastProcessedDate);
|
||||
isInitial = false;
|
||||
}
|
||||
|
||||
// Calculate from date (going backwards)
|
||||
fromDate = new Date(toDate);
|
||||
fromDate.setDate(fromDate.getDate() - maxDays);
|
||||
|
||||
allSymbolsForCrawl.push({
|
||||
symbol: symbol,
|
||||
interval: interval,
|
||||
operationName: operationName,
|
||||
lastRun: staleSymbol.lastRun,
|
||||
lastSuccess: staleSymbol.lastSuccess
|
||||
lastSuccess: staleSymbol.lastSuccess,
|
||||
fromDate: fromDate,
|
||||
toDate: toDate,
|
||||
isInitial: isInitial
|
||||
});
|
||||
|
||||
logger.debug(`Added ${symbol.Code}.${symbol.Exchange} for ${interval} crawl`, {
|
||||
hasOperation: !!operationStatus,
|
||||
finished: operationStatus?.finished,
|
||||
needsNewData
|
||||
needsNewData,
|
||||
dateRange: `${fromDate.toISOString().split('T')[0]} to ${toDate.toISOString().split('T')[0]}`,
|
||||
isInitial
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
@ -136,6 +159,8 @@ export async function scheduleIntradayCrawl(
|
|||
name: s.symbol.Name,
|
||||
eodSearchCode: s.symbol.eodSearchCode,
|
||||
interval: s.interval,
|
||||
dateRange: `${s.fromDate.toISOString().split('T')[0]} to ${s.toDate.toISOString().split('T')[0]}`,
|
||||
isInitial: s.isInitial,
|
||||
lastRun: s.lastRun ? new Date(s.lastRun).toISOString() : 'never',
|
||||
lastSuccess: s.lastSuccess ? new Date(s.lastSuccess).toISOString() : 'never'
|
||||
}))
|
||||
|
|
@ -145,16 +170,22 @@ export async function scheduleIntradayCrawl(
|
|||
|
||||
// Schedule crawl jobs for each symbol/interval combination
|
||||
for (const item of allSymbolsForCrawl) {
|
||||
const { symbol, interval } = item;
|
||||
const { symbol, interval, fromDate, toDate, isInitial } = item;
|
||||
|
||||
// Create jobId based on whether it's initial or continuation
|
||||
const dateStr = isInitial ? 'initial' : `${fromDate.toISOString().split('T')[0]}-${toDate.toISOString().split('T')[0]}`;
|
||||
const jobId = `crawl-intraday-${symbol.eodSearchCode}-${interval}-${dateStr}`;
|
||||
|
||||
const jobId = `crawl-intraday-${symbol.eodSearchCode}-${interval}`;
|
||||
try {
|
||||
await this.scheduleOperation('crawl-intraday', {
|
||||
eodSearchCode: symbol.eodSearchCode,
|
||||
interval
|
||||
interval,
|
||||
fromDate,
|
||||
toDate,
|
||||
isInitial
|
||||
}, {
|
||||
jobId,
|
||||
priority: 5, // Initial crawl jobs get priority 5 (lower priority)
|
||||
priority: isInitial ? 5 : 3, // Initial crawl jobs get lower priority
|
||||
attempts: 3,
|
||||
backoff: {
|
||||
type: 'exponential',
|
||||
|
|
@ -163,6 +194,15 @@ export async function scheduleIntradayCrawl(
|
|||
delay: jobsScheduled * 500 // Stagger jobs by 500ms
|
||||
});
|
||||
jobsScheduled++;
|
||||
|
||||
logger.debug(`Scheduled crawl job`, {
|
||||
jobId,
|
||||
symbol: symbol.Code,
|
||||
exchange: symbol.Exchange,
|
||||
interval,
|
||||
dateRange: `${fromDate.toISOString().split('T')[0]} to ${toDate.toISOString().split('T')[0]}`,
|
||||
isInitial
|
||||
});
|
||||
} catch (error: any) {
|
||||
if (error?.message?.includes('Job already exists')) {
|
||||
logger.debug(`Job already exists: ${jobId}`);
|
||||
|
|
@ -189,7 +229,7 @@ export async function crawlIntraday(
|
|||
input: CrawlIntradayInput
|
||||
): Promise<{ success: boolean; recordsProcessed: number; finished: boolean }> {
|
||||
const logger = this.logger;
|
||||
const { eodSearchCode, interval } = input;
|
||||
const { eodSearchCode, interval, fromDate: providedFromDate, toDate: providedToDate, isInitial } = input;
|
||||
|
||||
try {
|
||||
// Lookup symbol using eodSearchCode
|
||||
|
|
@ -234,19 +274,39 @@ export async function crawlIntraday(
|
|||
|
||||
// Determine date range for this batch
|
||||
const maxDays = MAX_DAYS_PER_INTERVAL[interval];
|
||||
let toDate = new Date();
|
||||
let fromDate = new Date();
|
||||
let toDate: Date;
|
||||
let fromDate: Date;
|
||||
|
||||
if (operationStatus.lastProcessedDate) {
|
||||
// Continue from where we left off - the last processed date becomes the new toDate
|
||||
toDate = new Date(operationStatus.lastProcessedDate);
|
||||
// No need to subtract a day - lastProcessedDate is the fromDate of the last batch
|
||||
// Use provided dates if available (from scheduled job)
|
||||
if (providedFromDate && providedToDate) {
|
||||
fromDate = new Date(providedFromDate);
|
||||
toDate = new Date(providedToDate);
|
||||
logger.info(`Using provided date range for ${symbol}.${exchange} - ${interval}`, {
|
||||
fromDate: fromDate.toISOString(),
|
||||
toDate: toDate.toISOString(),
|
||||
isInitial
|
||||
});
|
||||
} else {
|
||||
// Fallback to original logic (for backward compatibility)
|
||||
toDate = new Date();
|
||||
fromDate = new Date();
|
||||
|
||||
if (operationStatus.lastProcessedDate) {
|
||||
// Continue from where we left off - the last processed date becomes the new toDate
|
||||
toDate = new Date(operationStatus.lastProcessedDate);
|
||||
}
|
||||
|
||||
// Calculate from date (going backwards)
|
||||
fromDate = new Date(toDate);
|
||||
fromDate.setDate(fromDate.getDate() - maxDays);
|
||||
|
||||
logger.info(`Calculated date range for ${symbol}.${exchange} - ${interval}`, {
|
||||
fromDate: fromDate.toISOString(),
|
||||
toDate: toDate.toISOString(),
|
||||
basedOn: operationStatus.lastProcessedDate ? 'lastProcessedDate' : 'current date'
|
||||
});
|
||||
}
|
||||
|
||||
// Calculate from date (going backwards)
|
||||
fromDate = new Date(toDate);
|
||||
fromDate.setDate(fromDate.getDate() - maxDays);
|
||||
|
||||
logger.info(`Fetching intraday batch for ${symbol}.${exchange} - ${interval} from ${fromDate.toISOString().split('T')[0]} to ${toDate.toISOString().split('T')[0]}`, {
|
||||
symbol,
|
||||
exchange,
|
||||
|
|
@ -334,11 +394,22 @@ export async function crawlIntraday(
|
|||
|
||||
// If not finished, schedule next batch
|
||||
if (!updateData.finished) {
|
||||
// Calculate next batch date range
|
||||
const nextToDate = fromDate; // Next batch's toDate is current batch's fromDate
|
||||
const nextFromDate = new Date(nextToDate);
|
||||
nextFromDate.setDate(nextFromDate.getDate() - maxDays);
|
||||
|
||||
const dateStr = `${nextFromDate.toISOString().split('T')[0]}-${nextToDate.toISOString().split('T')[0]}`;
|
||||
const jobId = `crawl-intraday-${eodSearchCode}-${interval}-${dateStr}`;
|
||||
|
||||
await this.scheduleOperation('crawl-intraday', {
|
||||
eodSearchCode,
|
||||
interval
|
||||
interval,
|
||||
fromDate: nextFromDate,
|
||||
toDate: nextToDate,
|
||||
isInitial: false
|
||||
}, {
|
||||
jobId: `crawl-intraday-${eodSearchCode}-${interval}`,
|
||||
jobId,
|
||||
priority: 3, // Continuation jobs get higher priority (3) than initial jobs (5)
|
||||
attempts: 3,
|
||||
backoff: {
|
||||
|
|
@ -352,9 +423,16 @@ export async function crawlIntraday(
|
|||
symbol,
|
||||
exchange,
|
||||
interval,
|
||||
currentBatchFrom: fromDate.toISOString(),
|
||||
currentBatchTo: toDate.toISOString(),
|
||||
recordsSaved: result.recordsSaved,
|
||||
jobId,
|
||||
currentBatch: {
|
||||
from: fromDate.toISOString().split('T')[0],
|
||||
to: toDate.toISOString().split('T')[0],
|
||||
recordsSaved: result.recordsSaved
|
||||
},
|
||||
nextBatch: {
|
||||
from: nextFromDate.toISOString().split('T')[0],
|
||||
to: nextToDate.toISOString().split('T')[0]
|
||||
},
|
||||
totalDaysProcessed: updateData.totalDaysProcessed
|
||||
});
|
||||
}
|
||||
|
|
|
|||
|
|
@ -38,21 +38,21 @@ export const EOD_OPERATIONS: OperationConfig[] = [
|
|||
type: 'intraday_crawl',
|
||||
description: 'Crawl 1-minute intraday data',
|
||||
requiresFinishedFlag: true,
|
||||
defaultStaleHours: 24 // Daily check for new data
|
||||
defaultStaleHours: 50000 // Daily check for new data
|
||||
},
|
||||
{
|
||||
name: 'intraday_5m',
|
||||
type: 'intraday_crawl',
|
||||
description: 'Crawl 5-minute intraday data',
|
||||
requiresFinishedFlag: true,
|
||||
defaultStaleHours: 24 // Daily check for new data
|
||||
defaultStaleHours: 50000 // Daily check for new data
|
||||
},
|
||||
{
|
||||
name: 'intraday_1h',
|
||||
type: 'intraday_crawl',
|
||||
description: 'Crawl 1-hour intraday data',
|
||||
requiresFinishedFlag: true,
|
||||
defaultStaleHours: 24 // Daily check for new data
|
||||
defaultStaleHours: 50000 // Daily check for new data
|
||||
},
|
||||
|
||||
// Fundamental data
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue