changed up operationTracker and added eodSearchCode
This commit is contained in:
parent
18289f0a04
commit
7486a1fa65
11 changed files with 237 additions and 143 deletions
|
|
@ -1,5 +1,4 @@
|
|||
|
||||
import type { CrawlState } from '../../../shared/operation-manager/types';
|
||||
import type { EodHandler } from '../eod.handler';
|
||||
import { EOD_CONFIG } from '../shared';
|
||||
|
||||
|
|
@ -19,7 +18,6 @@ interface CrawlIntradayInput {
|
|||
country?: string;
|
||||
}
|
||||
|
||||
// CrawlState is imported from operation-manager types
|
||||
|
||||
// Max days per interval based on EOD limits
|
||||
const MAX_DAYS_PER_INTERVAL = {
|
||||
|
|
@ -47,16 +45,18 @@ export async function scheduleIntradayCrawl(
|
|||
const interval = intervals[i];
|
||||
const operationName = operationNames[i]!; // Non-null assertion since we know the array has 3 elements
|
||||
|
||||
const symbolsForInterval = await this.operationRegistry.getStaleSymbols('eod', operationName, {
|
||||
limit: 1000, // Get more to filter
|
||||
symbolFilter: { symbol: 'AAPL' } // Filter for AAPL only
|
||||
});
|
||||
// For intraday, we want to check even finished crawls for new data
|
||||
// So we'll query the symbols directly
|
||||
const symbolsForInterval = await this.mongodb.collection('eodSymbols').find({
|
||||
eodSearchCode: 'AAPL.US',
|
||||
delisted: false
|
||||
}).toArray();
|
||||
|
||||
// Filter out delisted symbols and ensure we get AAPL with US exchange
|
||||
const activeSymbols = symbolsForInterval.filter(item =>
|
||||
item.symbol.delisted === false &&
|
||||
item.symbol.Code === 'AAPL' &&
|
||||
(item.symbol.eodExchange === 'US' || item.symbol.Exchange === 'US')
|
||||
item.symbol.eodExchange === 'US'
|
||||
);
|
||||
|
||||
// Add interval info to each symbol
|
||||
|
|
@ -153,27 +153,38 @@ export async function crawlIntraday(
|
|||
throw new Error(`Symbol ${symbol}.${exchange} not found`);
|
||||
}
|
||||
|
||||
logger.debug('Found symbol document', {
|
||||
symbol,
|
||||
exchange,
|
||||
hasOperations: !!symbolDoc.operations,
|
||||
operationKeys: Object.keys(symbolDoc.operations || {})
|
||||
});
|
||||
|
||||
// Get operation status from tracker
|
||||
const operationName = `intraday_${interval}`;
|
||||
const operationStatus = symbolDoc.operations?.[operationName];
|
||||
const crawlState: CrawlState = operationStatus?.crawlState || {
|
||||
finished: false
|
||||
};
|
||||
const operationStatus = symbolDoc.operations?.[operationName] || {};
|
||||
|
||||
logger.info(`Current crawl state for ${symbol}.${exchange} - ${interval}`, {
|
||||
hasOperationStatus: !!symbolDoc.operations?.[operationName],
|
||||
operationStatus: operationStatus,
|
||||
lastProcessedDate: operationStatus.lastProcessedDate,
|
||||
finished: operationStatus.finished
|
||||
});
|
||||
|
||||
// Determine date range for this batch
|
||||
const maxDays = MAX_DAYS_PER_INTERVAL[interval];
|
||||
let toDate = new Date();
|
||||
let fromDate = new Date();
|
||||
|
||||
if (crawlState.lastProcessedDate) {
|
||||
// Continue from where we left off
|
||||
toDate = new Date(crawlState.lastProcessedDate);
|
||||
toDate.setDate(toDate.getDate() - 1); // Start from day before last processed
|
||||
if (operationStatus.lastProcessedDate) {
|
||||
// Continue from where we left off - the last processed date becomes the new toDate
|
||||
toDate = new Date(operationStatus.lastProcessedDate);
|
||||
// No need to subtract a day - lastProcessedDate is the fromDate of the last batch
|
||||
}
|
||||
|
||||
// Calculate from date (going backwards)
|
||||
fromDate = new Date(toDate);
|
||||
fromDate.setDate(fromDate.getDate() - maxDays + 1);
|
||||
fromDate.setDate(fromDate.getDate() - maxDays);
|
||||
|
||||
logger.info(`Fetching intraday batch for ${symbol}.${exchange} - ${interval} from ${fromDate.toISOString().split('T')[0]} to ${toDate.toISOString().split('T')[0]}`, {
|
||||
symbol,
|
||||
|
|
@ -182,9 +193,9 @@ export async function crawlIntraday(
|
|||
fromDate: fromDate.toISOString(),
|
||||
toDate: toDate.toISOString(),
|
||||
maxDays,
|
||||
crawlState: {
|
||||
lastProcessedDate: crawlState.lastProcessedDate,
|
||||
totalDaysProcessed: crawlState.totalDaysProcessed || 0
|
||||
operationStatus: {
|
||||
lastProcessedDate: operationStatus.lastProcessedDate,
|
||||
totalDaysProcessed: operationStatus.totalDaysProcessed || 0
|
||||
}
|
||||
});
|
||||
|
||||
|
|
@ -198,47 +209,73 @@ export async function crawlIntraday(
|
|||
country
|
||||
});
|
||||
|
||||
// Update crawl state
|
||||
const newState: CrawlState = {
|
||||
...crawlState,
|
||||
// Prepare update data
|
||||
const updateData: any = {
|
||||
status: 'partial',
|
||||
recordCount: result.recordsSaved,
|
||||
finished: false,
|
||||
lastProcessedDate: fromDate,
|
||||
totalDaysProcessed: (crawlState.totalDaysProcessed || 0) + 1
|
||||
lastProcessedDate: fromDate, // Store the fromDate so next batch continues from here
|
||||
totalDaysProcessed: (operationStatus.totalDaysProcessed || 0) + maxDays
|
||||
};
|
||||
|
||||
// Set oldest date reached
|
||||
if (!newState.oldestDateReached || fromDate < newState.oldestDateReached) {
|
||||
newState.oldestDateReached = fromDate;
|
||||
if (!operationStatus.oldestDateReached || fromDate < operationStatus.oldestDateReached) {
|
||||
updateData.oldestDateReached = fromDate;
|
||||
} else {
|
||||
updateData.oldestDateReached = operationStatus.oldestDateReached;
|
||||
}
|
||||
|
||||
// Set newest date reached
|
||||
if (!newState.newestDateReached || toDate > newState.newestDateReached) {
|
||||
newState.newestDateReached = toDate;
|
||||
if (!operationStatus.newestDateReached || toDate > operationStatus.newestDateReached) {
|
||||
updateData.newestDateReached = toDate;
|
||||
} else {
|
||||
updateData.newestDateReached = operationStatus.newestDateReached;
|
||||
}
|
||||
|
||||
// Check if we're finished (no data returned means we've reached the end)
|
||||
if (result.recordsSaved === 0) {
|
||||
newState.finished = true;
|
||||
logger.info(`Intraday crawl finished for ${symbol}.${exchange} - ${interval} (${newState.oldestDateReached?.toISOString().split('T')[0]} to ${newState.newestDateReached?.toISOString().split('T')[0]})`, {
|
||||
// Check if we're finished
|
||||
// Only mark as finished if:
|
||||
// 1. We got no data from the API (empty response)
|
||||
// 2. We've been crawling for a while and consistently getting no new records
|
||||
if (result.recordsSaved === 0 && result.recordsFetched === 0) {
|
||||
// No data returned from API - we've reached the end
|
||||
updateData.finished = true;
|
||||
updateData.status = 'success';
|
||||
logger.info(`Intraday crawl finished for ${symbol}.${exchange} - ${interval} (no more data available)`, {
|
||||
symbol,
|
||||
exchange,
|
||||
interval,
|
||||
oldestDate: newState.oldestDateReached?.toISOString(),
|
||||
newestDate: newState.newestDateReached?.toISOString(),
|
||||
totalDaysProcessed: newState.totalDaysProcessed,
|
||||
oldestDate: updateData.oldestDateReached?.toISOString(),
|
||||
newestDate: updateData.newestDateReached?.toISOString(),
|
||||
totalDaysProcessed: updateData.totalDaysProcessed,
|
||||
noDataReturned: true
|
||||
});
|
||||
} else if (result.recordsSaved === 0 && result.recordsFetched > 0) {
|
||||
// Data was fetched but all records already exist - continue crawling
|
||||
logger.info(`All ${result.recordsFetched} records already exist for ${symbol}.${exchange} - ${interval}, continuing crawl`, {
|
||||
symbol,
|
||||
exchange,
|
||||
interval,
|
||||
fromDate: fromDate.toISOString(),
|
||||
toDate: toDate.toISOString(),
|
||||
recordsFetched: result.recordsFetched
|
||||
});
|
||||
}
|
||||
|
||||
// Update operation tracker with crawl state
|
||||
await this.operationRegistry.updateOperation('eod', symbol, operationName, {
|
||||
status: newState.finished ? 'success' : 'partial',
|
||||
// Update operation tracker
|
||||
logger.info(`Updating operation tracker for ${symbol}.${exchange} - ${interval}`, {
|
||||
status: updateData.status,
|
||||
recordCount: result.recordsSaved,
|
||||
crawlState: newState
|
||||
lastProcessedDate: updateData.lastProcessedDate,
|
||||
finished: updateData.finished
|
||||
});
|
||||
|
||||
const eodSearchCode = `${symbol}.${exchange}`;
|
||||
await this.operationRegistry.updateOperation('eod', eodSearchCode, operationName, updateData);
|
||||
|
||||
logger.info(`Operation tracker updated for ${symbol}.${exchange} - ${interval}`);
|
||||
|
||||
// If not finished, schedule next batch
|
||||
if (!newState.finished) {
|
||||
if (!updateData.finished) {
|
||||
await this.scheduleOperation('crawl-intraday', {
|
||||
symbol,
|
||||
exchange,
|
||||
|
|
@ -261,14 +298,14 @@ export async function crawlIntraday(
|
|||
currentBatchFrom: fromDate.toISOString(),
|
||||
currentBatchTo: toDate.toISOString(),
|
||||
recordsSaved: result.recordsSaved,
|
||||
totalDaysProcessed: newState.totalDaysProcessed
|
||||
totalDaysProcessed: updateData.totalDaysProcessed
|
||||
});
|
||||
}
|
||||
|
||||
return {
|
||||
success: true,
|
||||
recordsProcessed: result.recordsSaved,
|
||||
finished: newState.finished
|
||||
finished: updateData.finished
|
||||
};
|
||||
} catch (error) {
|
||||
logger.error('Failed to crawl intraday data', { error, symbol, exchange, interval });
|
||||
|
|
@ -279,7 +316,7 @@ export async function crawlIntraday(
|
|||
export async function fetchIntraday(
|
||||
this: EodHandler,
|
||||
input: FetchIntradayInput
|
||||
): Promise<{ success: boolean; recordsSaved: number }> {
|
||||
): Promise<{ success: boolean; recordsSaved: number; recordsFetched: number }> {
|
||||
const logger = this.logger;
|
||||
const { symbol, exchange, interval, fromDate, toDate, country } = input;
|
||||
|
||||
|
|
@ -345,7 +382,7 @@ export async function fetchIntraday(
|
|||
|
||||
if (data.length === 0) {
|
||||
logger.info('No intraday data returned', { symbol, exchange, interval });
|
||||
return { success: true, recordsSaved: 0 };
|
||||
return { success: true, recordsSaved: 0, recordsFetched: 0 };
|
||||
}
|
||||
|
||||
logger.info(`Fetched ${data.length} intraday records for ${symbol}.${exchange} - ${interval}`);
|
||||
|
|
@ -383,7 +420,8 @@ export async function fetchIntraday(
|
|||
|
||||
return {
|
||||
success: true,
|
||||
recordsSaved: result.insertedCount
|
||||
recordsSaved: result.insertedCount,
|
||||
recordsFetched: data.length
|
||||
};
|
||||
} catch (error) {
|
||||
logger.error('Failed to fetch intraday data', { error, symbol, exchange, interval });
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue