From f4366f7289a64840f81fb225de980dff1efb8a2f Mon Sep 17 00:00:00 2001 From: Boki Date: Tue, 8 Jul 2025 08:51:33 -0400 Subject: [PATCH] fixed up eod and added more aggressive skip logic to te --- .../handlers/eod/actions/corporate-actions.ts | 5 +-- .../src/handlers/eod/actions/fundamentals.ts | 9 +++--- .../src/handlers/eod/actions/intraday.ts | 5 +-- .../src/handlers/eod/actions/prices.ts | 5 +-- .../src/handlers/eod/shared/utils.ts | 10 ++++-- .../src/handlers/te/actions/spider.action.ts | 31 +++++++++++++++++-- .../src/handlers/te/te.handler.ts | 2 +- docs/todo.md | 1 + 8 files changed, 53 insertions(+), 15 deletions(-) diff --git a/apps/stock/data-ingestion/src/handlers/eod/actions/corporate-actions.ts b/apps/stock/data-ingestion/src/handlers/eod/actions/corporate-actions.ts index 1938d61..6fd0994 100644 --- a/apps/stock/data-ingestion/src/handlers/eod/actions/corporate-actions.ts +++ b/apps/stock/data-ingestion/src/handlers/eod/actions/corporate-actions.ts @@ -1,6 +1,7 @@ import type { BaseHandler } from '@stock-bot/handlers'; import type { DataIngestionServices } from '../../../types'; import { EOD_CONFIG } from '../shared'; +import { getEodExchangeSuffix } from '../shared/utils'; interface FetchCorporateActionsInput { symbol: string; @@ -131,8 +132,8 @@ export async function fetchCorporateActions( } // Build URL based on action type - // For US symbols (Country: "USA"), use :US suffix instead of specific exchange code - const exchangeSuffix = symbolCountry === 'USA' ? 'US' : exchange; + // Use utility function to handle US symbols and EUFUND special case + const exchangeSuffix = getEodExchangeSuffix(exchange, symbolCountry); const endpoint = actionType === 'dividends' ? 'div' : 'splits'; const url = new URL(`https://eodhd.com/api/${endpoint}/${symbol}.${exchangeSuffix}`); diff --git a/apps/stock/data-ingestion/src/handlers/eod/actions/fundamentals.ts b/apps/stock/data-ingestion/src/handlers/eod/actions/fundamentals.ts index 66eb33d..e5daad1 100644 --- a/apps/stock/data-ingestion/src/handlers/eod/actions/fundamentals.ts +++ b/apps/stock/data-ingestion/src/handlers/eod/actions/fundamentals.ts @@ -1,6 +1,7 @@ import type { BaseHandler } from '@stock-bot/handlers'; import type { DataIngestionServices } from '../../../types'; import { EOD_CONFIG } from '../shared'; +import { getEodExchangeSuffix } from '../shared/utils'; interface BulkFundamentalsInput { symbols: Array<{ symbol: string; exchange: string; country?: string }>; @@ -147,8 +148,8 @@ export async function fetchBulkFundamentals( if (!acc[exchange]) { acc[exchange] = []; } - // For US symbols (Country: "USA"), use :US suffix - const exchangeSuffix = country === 'USA' ? 'US' : exchange; + // Use utility function to handle US symbols and EUFUND special case + const exchangeSuffix = getEodExchangeSuffix(exchange, country); acc[exchange].push(`${symbol}.${exchangeSuffix}`); return acc; }, {} as Record); @@ -281,8 +282,8 @@ export async function fetchSingleFundamentals( } // Build URL for single fundamentals endpoint - // For US symbols (Country: "USA"), use :US suffix instead of specific exchange code - const exchangeSuffix = symbolCountry === 'USA' ? 'US' : exchange; + // Use utility function to handle US symbols and EUFUND special case + const exchangeSuffix = getEodExchangeSuffix(exchange, symbolCountry); const url = new URL(`https://eodhd.com/api/fundamentals/${symbol}.${exchangeSuffix}`); url.searchParams.append('api_token', apiKey); diff --git a/apps/stock/data-ingestion/src/handlers/eod/actions/intraday.ts b/apps/stock/data-ingestion/src/handlers/eod/actions/intraday.ts index 04340a8..5f82ae2 100644 --- a/apps/stock/data-ingestion/src/handlers/eod/actions/intraday.ts +++ b/apps/stock/data-ingestion/src/handlers/eod/actions/intraday.ts @@ -1,6 +1,7 @@ import type { BaseHandler } from '@stock-bot/handlers'; import type { DataIngestionServices } from '../../../types'; import { EOD_CONFIG } from '../shared'; +import { getEodExchangeSuffix } from '../shared/utils'; interface FetchIntradayInput { symbol: string; @@ -272,8 +273,8 @@ export async function fetchIntraday( } // Build URL - // For US symbols (Country: "USA"), use :US suffix instead of specific exchange code - const exchangeSuffix = symbolCountry === 'USA' ? 'US' : exchange; + // Use utility function to handle US symbols and EUFUND special case + const exchangeSuffix = getEodExchangeSuffix(exchange, symbolCountry); const url = new URL(`https://eodhd.com/api/intraday/${symbol}.${exchangeSuffix}`); url.searchParams.append('api_token', apiKey); diff --git a/apps/stock/data-ingestion/src/handlers/eod/actions/prices.ts b/apps/stock/data-ingestion/src/handlers/eod/actions/prices.ts index 0ae5f0a..acb8b78 100644 --- a/apps/stock/data-ingestion/src/handlers/eod/actions/prices.ts +++ b/apps/stock/data-ingestion/src/handlers/eod/actions/prices.ts @@ -1,6 +1,7 @@ import type { BaseHandler } from '@stock-bot/handlers'; import type { DataIngestionServices } from '../../../types'; import { EOD_CONFIG } from '../shared'; +import { getEodExchangeSuffix } from '../shared/utils'; interface FetchPricesInput { symbol: string; @@ -112,8 +113,8 @@ export async function fetchPrices( } // Build URL for EOD price data - // For US symbols (Country: "USA"), use :US suffix instead of specific exchange code - const exchangeSuffix = symbolCountry === 'USA' ? 'US' : exchange; + // Use utility function to handle US symbols and EUFUND special case + const exchangeSuffix = getEodExchangeSuffix(exchange, symbolCountry); const url = new URL(`https://eodhd.com/api/eod/${symbol}.${exchangeSuffix}`); url.searchParams.append('api_token', apiKey); diff --git a/apps/stock/data-ingestion/src/handlers/eod/shared/utils.ts b/apps/stock/data-ingestion/src/handlers/eod/shared/utils.ts index b0a8690..bd172ca 100644 --- a/apps/stock/data-ingestion/src/handlers/eod/shared/utils.ts +++ b/apps/stock/data-ingestion/src/handlers/eod/shared/utils.ts @@ -1,8 +1,14 @@ /** - * Get the exchange suffix for EOD API calls based on country - * US symbols use :US suffix, others use their actual exchange code + * Get the exchange suffix for EOD API calls based on country and exchange + * US symbols use :US suffix, except EUFUND and GBOND which always use their own codes + * Others use their actual exchange code */ export function getEodExchangeSuffix(exchange: string, country?: string): string { + // Special cases that always use their own exchange code + if (exchange === 'EUFUND' || exchange === 'GBOND') { + return exchange; + } + // US symbols use :US suffix return country === 'USA' ? 'US' : exchange; } diff --git a/apps/stock/data-ingestion/src/handlers/te/actions/spider.action.ts b/apps/stock/data-ingestion/src/handlers/te/actions/spider.action.ts index d5f27c6..d7cc71b 100644 --- a/apps/stock/data-ingestion/src/handlers/te/actions/spider.action.ts +++ b/apps/stock/data-ingestion/src/handlers/te/actions/spider.action.ts @@ -8,14 +8,22 @@ export async function spiderUrl(this: TeHandler, payload: { url: string }): Prom const reqUrl = payload && payload.url ? TE_CONFIG.MAIN_URL + payload.url : TE_CONFIG.MAIN_URL; this.logger.info(`Spiderring URL: ${reqUrl}`, {reqUrl}); + // if( mongoRecord){ + // const url = mongoRecord.url; + // if (shouldSkipUrl(url)) { + // logger.info(`Skipping URL ${url} as its too deep`); + // return null; + // } + // } const mongoRecord = await mongodb?.findOne('teUrls', { url: payload?.url || '/' }); - if(payload && payload.url && mongoRecord && mongoRecord.lastCrawled && mongoRecord.lastCrawled.getTime() > Date.now() - 7* 24 * 60 * 60 * 1000) { + if(payload && payload.url && mongoRecord && mongoRecord.lastCrawled && mongoRecord.lastCrawled.getTime() > Date.now() - 30 * 24 * 60 * 60 * 1000) { this.logger.info(`Skipping URL ${reqUrl} as it was already crawled in the last 24 hours`); return null; // Skip if already crawled in the last 24 hours } + if (!payload) { - const oneDayAgo = new Date(Date.now() - 7 * 24 * 60 * 60 * 1000); + const oneDayAgo = new Date(Date.now() - 30 * 24 * 60 * 60 * 1000); const records = await mongodb?.find('teUrls', { $or: [ { lastCrawled: { $lt: oneDayAgo } }, // Crawled more than 24 hours ago @@ -24,6 +32,12 @@ export async function spiderUrl(this: TeHandler, payload: { url: string }): Prom }); this.logger.info(`Found ${records?.length || 0} records to process`); for (const record of records || []) { + const url = record.url; + if (shouldSkipUrl(url)) { + logger.info(`Skipping URL ${url} as its too deep`); + continue; + } + await this.scheduleOperation('te-spider', { url: record.url, }, { @@ -121,6 +135,10 @@ export async function spiderUrl(this: TeHandler, payload: { url: string }): Prom } for (const url of urls) { + if (shouldSkipUrl(url)) { + logger.info(`Skipping URL ${url} as its too deep`); + continue; // Skip if it's a subpage or already crawled + } this.scheduleOperation('te-spider', { url: url, }, { @@ -137,4 +155,13 @@ export async function spiderUrl(this: TeHandler, payload: { url: string }): Prom }); return null; } +} + +function shouldSkipUrl(url?: string): boolean { + // Skip if it's a subpage or already crawled in the last 24 hours + if (!url) { + return false; + } + const matches = url.match(/\//g); + return matches !== null && matches.length >= 4; } \ No newline at end of file diff --git a/apps/stock/data-ingestion/src/handlers/te/te.handler.ts b/apps/stock/data-ingestion/src/handlers/te/te.handler.ts index d5fe07c..35d5559 100644 --- a/apps/stock/data-ingestion/src/handlers/te/te.handler.ts +++ b/apps/stock/data-ingestion/src/handlers/te/te.handler.ts @@ -8,7 +8,7 @@ import type { DataIngestionServices } from '../../types'; import { fetchCountries, spiderUrl } from './actions'; @Handler('te') -@Disabled() +// @Disabled() export class TeHandler extends BaseHandler { constructor(services: any) { super(services); diff --git a/docs/todo.md b/docs/todo.md index b431ed9..f4479a8 100644 --- a/docs/todo.md +++ b/docs/todo.md @@ -6,6 +6,7 @@ Data Ingestion - CEO - watch all and save them to posts. do it every minute - Test our handler specific rate limits - Fix up handler worker counts +- In EOD get Symbols. instead of using their exchange in the symbol list save the original we searched for to avoid issues Servers