fixed up eod and added more aggressive skip logic to te

This commit is contained in:
Boki 2025-07-08 08:51:33 -04:00
parent d47f77fdc7
commit f4366f7289
8 changed files with 53 additions and 15 deletions

View file

@ -1,6 +1,7 @@
import type { BaseHandler } from '@stock-bot/handlers';
import type { DataIngestionServices } from '../../../types';
import { EOD_CONFIG } from '../shared';
import { getEodExchangeSuffix } from '../shared/utils';
interface FetchCorporateActionsInput {
symbol: string;
@ -131,8 +132,8 @@ export async function fetchCorporateActions(
}
// Build URL based on action type
// For US symbols (Country: "USA"), use :US suffix instead of specific exchange code
const exchangeSuffix = symbolCountry === 'USA' ? 'US' : exchange;
// Use utility function to handle US symbols and EUFUND special case
const exchangeSuffix = getEodExchangeSuffix(exchange, symbolCountry);
const endpoint = actionType === 'dividends' ? 'div' : 'splits';
const url = new URL(`https://eodhd.com/api/${endpoint}/${symbol}.${exchangeSuffix}`);

View file

@ -1,6 +1,7 @@
import type { BaseHandler } from '@stock-bot/handlers';
import type { DataIngestionServices } from '../../../types';
import { EOD_CONFIG } from '../shared';
import { getEodExchangeSuffix } from '../shared/utils';
interface BulkFundamentalsInput {
symbols: Array<{ symbol: string; exchange: string; country?: string }>;
@ -147,8 +148,8 @@ export async function fetchBulkFundamentals(
if (!acc[exchange]) {
acc[exchange] = [];
}
// For US symbols (Country: "USA"), use :US suffix
const exchangeSuffix = country === 'USA' ? 'US' : exchange;
// Use utility function to handle US symbols and EUFUND special case
const exchangeSuffix = getEodExchangeSuffix(exchange, country);
acc[exchange].push(`${symbol}.${exchangeSuffix}`);
return acc;
}, {} as Record<string, string[]>);
@ -281,8 +282,8 @@ export async function fetchSingleFundamentals(
}
// Build URL for single fundamentals endpoint
// For US symbols (Country: "USA"), use :US suffix instead of specific exchange code
const exchangeSuffix = symbolCountry === 'USA' ? 'US' : exchange;
// Use utility function to handle US symbols and EUFUND special case
const exchangeSuffix = getEodExchangeSuffix(exchange, symbolCountry);
const url = new URL(`https://eodhd.com/api/fundamentals/${symbol}.${exchangeSuffix}`);
url.searchParams.append('api_token', apiKey);

View file

@ -1,6 +1,7 @@
import type { BaseHandler } from '@stock-bot/handlers';
import type { DataIngestionServices } from '../../../types';
import { EOD_CONFIG } from '../shared';
import { getEodExchangeSuffix } from '../shared/utils';
interface FetchIntradayInput {
symbol: string;
@ -272,8 +273,8 @@ export async function fetchIntraday(
}
// Build URL
// For US symbols (Country: "USA"), use :US suffix instead of specific exchange code
const exchangeSuffix = symbolCountry === 'USA' ? 'US' : exchange;
// Use utility function to handle US symbols and EUFUND special case
const exchangeSuffix = getEodExchangeSuffix(exchange, symbolCountry);
const url = new URL(`https://eodhd.com/api/intraday/${symbol}.${exchangeSuffix}`);
url.searchParams.append('api_token', apiKey);

View file

@ -1,6 +1,7 @@
import type { BaseHandler } from '@stock-bot/handlers';
import type { DataIngestionServices } from '../../../types';
import { EOD_CONFIG } from '../shared';
import { getEodExchangeSuffix } from '../shared/utils';
interface FetchPricesInput {
symbol: string;
@ -112,8 +113,8 @@ export async function fetchPrices(
}
// Build URL for EOD price data
// For US symbols (Country: "USA"), use :US suffix instead of specific exchange code
const exchangeSuffix = symbolCountry === 'USA' ? 'US' : exchange;
// Use utility function to handle US symbols and EUFUND special case
const exchangeSuffix = getEodExchangeSuffix(exchange, symbolCountry);
const url = new URL(`https://eodhd.com/api/eod/${symbol}.${exchangeSuffix}`);
url.searchParams.append('api_token', apiKey);

View file

@ -1,8 +1,14 @@
/**
* Get the exchange suffix for EOD API calls based on country
* US symbols use :US suffix, others use their actual exchange code
* Get the exchange suffix for EOD API calls based on country and exchange
* US symbols use :US suffix, except EUFUND and GBOND which always use their own codes
* Others use their actual exchange code
*/
export function getEodExchangeSuffix(exchange: string, country?: string): string {
// Special cases that always use their own exchange code
if (exchange === 'EUFUND' || exchange === 'GBOND') {
return exchange;
}
// US symbols use :US suffix
return country === 'USA' ? 'US' : exchange;
}

View file

@ -8,14 +8,22 @@ export async function spiderUrl(this: TeHandler, payload: { url: string }): Prom
const reqUrl = payload && payload.url ? TE_CONFIG.MAIN_URL + payload.url : TE_CONFIG.MAIN_URL;
this.logger.info(`Spiderring URL: ${reqUrl}`, {reqUrl});
// if( mongoRecord){
// const url = mongoRecord.url;
// if (shouldSkipUrl(url)) {
// logger.info(`Skipping URL ${url} as its too deep`);
// return null;
// }
// }
const mongoRecord = await mongodb?.findOne('teUrls', { url: payload?.url || '/' });
if(payload && payload.url && mongoRecord && mongoRecord.lastCrawled && mongoRecord.lastCrawled.getTime() > Date.now() - 7* 24 * 60 * 60 * 1000) {
if(payload && payload.url && mongoRecord && mongoRecord.lastCrawled && mongoRecord.lastCrawled.getTime() > Date.now() - 30 * 24 * 60 * 60 * 1000) {
this.logger.info(`Skipping URL ${reqUrl} as it was already crawled in the last 24 hours`);
return null; // Skip if already crawled in the last 24 hours
}
if (!payload) {
const oneDayAgo = new Date(Date.now() - 7 * 24 * 60 * 60 * 1000);
const oneDayAgo = new Date(Date.now() - 30 * 24 * 60 * 60 * 1000);
const records = await mongodb?.find('teUrls', {
$or: [
{ lastCrawled: { $lt: oneDayAgo } }, // Crawled more than 24 hours ago
@ -24,6 +32,12 @@ export async function spiderUrl(this: TeHandler, payload: { url: string }): Prom
});
this.logger.info(`Found ${records?.length || 0} records to process`);
for (const record of records || []) {
const url = record.url;
if (shouldSkipUrl(url)) {
logger.info(`Skipping URL ${url} as its too deep`);
continue;
}
await this.scheduleOperation('te-spider', {
url: record.url,
}, {
@ -121,6 +135,10 @@ export async function spiderUrl(this: TeHandler, payload: { url: string }): Prom
}
for (const url of urls) {
if (shouldSkipUrl(url)) {
logger.info(`Skipping URL ${url} as its too deep`);
continue; // Skip if it's a subpage or already crawled
}
this.scheduleOperation('te-spider', {
url: url,
}, {
@ -137,4 +155,13 @@ export async function spiderUrl(this: TeHandler, payload: { url: string }): Prom
});
return null;
}
}
function shouldSkipUrl(url?: string): boolean {
// Skip if it's a subpage or already crawled in the last 24 hours
if (!url) {
return false;
}
const matches = url.match(/\//g);
return matches !== null && matches.length >= 4;
}

View file

@ -8,7 +8,7 @@ import type { DataIngestionServices } from '../../types';
import { fetchCountries, spiderUrl } from './actions';
@Handler('te')
@Disabled()
// @Disabled()
export class TeHandler extends BaseHandler<DataIngestionServices> {
constructor(services: any) {
super(services);