fixed up eod and added more aggressive skip logic to te
This commit is contained in:
parent
d47f77fdc7
commit
f4366f7289
8 changed files with 53 additions and 15 deletions
|
|
@ -1,6 +1,7 @@
|
|||
import type { BaseHandler } from '@stock-bot/handlers';
|
||||
import type { DataIngestionServices } from '../../../types';
|
||||
import { EOD_CONFIG } from '../shared';
|
||||
import { getEodExchangeSuffix } from '../shared/utils';
|
||||
|
||||
interface FetchCorporateActionsInput {
|
||||
symbol: string;
|
||||
|
|
@ -131,8 +132,8 @@ export async function fetchCorporateActions(
|
|||
}
|
||||
|
||||
// Build URL based on action type
|
||||
// For US symbols (Country: "USA"), use :US suffix instead of specific exchange code
|
||||
const exchangeSuffix = symbolCountry === 'USA' ? 'US' : exchange;
|
||||
// Use utility function to handle US symbols and EUFUND special case
|
||||
const exchangeSuffix = getEodExchangeSuffix(exchange, symbolCountry);
|
||||
|
||||
const endpoint = actionType === 'dividends' ? 'div' : 'splits';
|
||||
const url = new URL(`https://eodhd.com/api/${endpoint}/${symbol}.${exchangeSuffix}`);
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
import type { BaseHandler } from '@stock-bot/handlers';
|
||||
import type { DataIngestionServices } from '../../../types';
|
||||
import { EOD_CONFIG } from '../shared';
|
||||
import { getEodExchangeSuffix } from '../shared/utils';
|
||||
|
||||
interface BulkFundamentalsInput {
|
||||
symbols: Array<{ symbol: string; exchange: string; country?: string }>;
|
||||
|
|
@ -147,8 +148,8 @@ export async function fetchBulkFundamentals(
|
|||
if (!acc[exchange]) {
|
||||
acc[exchange] = [];
|
||||
}
|
||||
// For US symbols (Country: "USA"), use :US suffix
|
||||
const exchangeSuffix = country === 'USA' ? 'US' : exchange;
|
||||
// Use utility function to handle US symbols and EUFUND special case
|
||||
const exchangeSuffix = getEodExchangeSuffix(exchange, country);
|
||||
acc[exchange].push(`${symbol}.${exchangeSuffix}`);
|
||||
return acc;
|
||||
}, {} as Record<string, string[]>);
|
||||
|
|
@ -281,8 +282,8 @@ export async function fetchSingleFundamentals(
|
|||
}
|
||||
|
||||
// Build URL for single fundamentals endpoint
|
||||
// For US symbols (Country: "USA"), use :US suffix instead of specific exchange code
|
||||
const exchangeSuffix = symbolCountry === 'USA' ? 'US' : exchange;
|
||||
// Use utility function to handle US symbols and EUFUND special case
|
||||
const exchangeSuffix = getEodExchangeSuffix(exchange, symbolCountry);
|
||||
|
||||
const url = new URL(`https://eodhd.com/api/fundamentals/${symbol}.${exchangeSuffix}`);
|
||||
url.searchParams.append('api_token', apiKey);
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
import type { BaseHandler } from '@stock-bot/handlers';
|
||||
import type { DataIngestionServices } from '../../../types';
|
||||
import { EOD_CONFIG } from '../shared';
|
||||
import { getEodExchangeSuffix } from '../shared/utils';
|
||||
|
||||
interface FetchIntradayInput {
|
||||
symbol: string;
|
||||
|
|
@ -272,8 +273,8 @@ export async function fetchIntraday(
|
|||
}
|
||||
|
||||
// Build URL
|
||||
// For US symbols (Country: "USA"), use :US suffix instead of specific exchange code
|
||||
const exchangeSuffix = symbolCountry === 'USA' ? 'US' : exchange;
|
||||
// Use utility function to handle US symbols and EUFUND special case
|
||||
const exchangeSuffix = getEodExchangeSuffix(exchange, symbolCountry);
|
||||
|
||||
const url = new URL(`https://eodhd.com/api/intraday/${symbol}.${exchangeSuffix}`);
|
||||
url.searchParams.append('api_token', apiKey);
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
import type { BaseHandler } from '@stock-bot/handlers';
|
||||
import type { DataIngestionServices } from '../../../types';
|
||||
import { EOD_CONFIG } from '../shared';
|
||||
import { getEodExchangeSuffix } from '../shared/utils';
|
||||
|
||||
interface FetchPricesInput {
|
||||
symbol: string;
|
||||
|
|
@ -112,8 +113,8 @@ export async function fetchPrices(
|
|||
}
|
||||
|
||||
// Build URL for EOD price data
|
||||
// For US symbols (Country: "USA"), use :US suffix instead of specific exchange code
|
||||
const exchangeSuffix = symbolCountry === 'USA' ? 'US' : exchange;
|
||||
// Use utility function to handle US symbols and EUFUND special case
|
||||
const exchangeSuffix = getEodExchangeSuffix(exchange, symbolCountry);
|
||||
|
||||
const url = new URL(`https://eodhd.com/api/eod/${symbol}.${exchangeSuffix}`);
|
||||
url.searchParams.append('api_token', apiKey);
|
||||
|
|
|
|||
|
|
@ -1,8 +1,14 @@
|
|||
/**
|
||||
* Get the exchange suffix for EOD API calls based on country
|
||||
* US symbols use :US suffix, others use their actual exchange code
|
||||
* Get the exchange suffix for EOD API calls based on country and exchange
|
||||
* US symbols use :US suffix, except EUFUND and GBOND which always use their own codes
|
||||
* Others use their actual exchange code
|
||||
*/
|
||||
export function getEodExchangeSuffix(exchange: string, country?: string): string {
|
||||
// Special cases that always use their own exchange code
|
||||
if (exchange === 'EUFUND' || exchange === 'GBOND') {
|
||||
return exchange;
|
||||
}
|
||||
// US symbols use :US suffix
|
||||
return country === 'USA' ? 'US' : exchange;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -8,14 +8,22 @@ export async function spiderUrl(this: TeHandler, payload: { url: string }): Prom
|
|||
const reqUrl = payload && payload.url ? TE_CONFIG.MAIN_URL + payload.url : TE_CONFIG.MAIN_URL;
|
||||
this.logger.info(`Spiderring URL: ${reqUrl}`, {reqUrl});
|
||||
|
||||
// if( mongoRecord){
|
||||
// const url = mongoRecord.url;
|
||||
// if (shouldSkipUrl(url)) {
|
||||
// logger.info(`Skipping URL ${url} as its too deep`);
|
||||
// return null;
|
||||
// }
|
||||
// }
|
||||
const mongoRecord = await mongodb?.findOne('teUrls', { url: payload?.url || '/' });
|
||||
if(payload && payload.url && mongoRecord && mongoRecord.lastCrawled && mongoRecord.lastCrawled.getTime() > Date.now() - 7* 24 * 60 * 60 * 1000) {
|
||||
if(payload && payload.url && mongoRecord && mongoRecord.lastCrawled && mongoRecord.lastCrawled.getTime() > Date.now() - 30 * 24 * 60 * 60 * 1000) {
|
||||
this.logger.info(`Skipping URL ${reqUrl} as it was already crawled in the last 24 hours`);
|
||||
return null; // Skip if already crawled in the last 24 hours
|
||||
}
|
||||
|
||||
|
||||
if (!payload) {
|
||||
const oneDayAgo = new Date(Date.now() - 7 * 24 * 60 * 60 * 1000);
|
||||
const oneDayAgo = new Date(Date.now() - 30 * 24 * 60 * 60 * 1000);
|
||||
const records = await mongodb?.find('teUrls', {
|
||||
$or: [
|
||||
{ lastCrawled: { $lt: oneDayAgo } }, // Crawled more than 24 hours ago
|
||||
|
|
@ -24,6 +32,12 @@ export async function spiderUrl(this: TeHandler, payload: { url: string }): Prom
|
|||
});
|
||||
this.logger.info(`Found ${records?.length || 0} records to process`);
|
||||
for (const record of records || []) {
|
||||
const url = record.url;
|
||||
if (shouldSkipUrl(url)) {
|
||||
logger.info(`Skipping URL ${url} as its too deep`);
|
||||
continue;
|
||||
}
|
||||
|
||||
await this.scheduleOperation('te-spider', {
|
||||
url: record.url,
|
||||
}, {
|
||||
|
|
@ -121,6 +135,10 @@ export async function spiderUrl(this: TeHandler, payload: { url: string }): Prom
|
|||
}
|
||||
|
||||
for (const url of urls) {
|
||||
if (shouldSkipUrl(url)) {
|
||||
logger.info(`Skipping URL ${url} as its too deep`);
|
||||
continue; // Skip if it's a subpage or already crawled
|
||||
}
|
||||
this.scheduleOperation('te-spider', {
|
||||
url: url,
|
||||
}, {
|
||||
|
|
@ -138,3 +156,12 @@ export async function spiderUrl(this: TeHandler, payload: { url: string }): Prom
|
|||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function shouldSkipUrl(url?: string): boolean {
|
||||
// Skip if it's a subpage or already crawled in the last 24 hours
|
||||
if (!url) {
|
||||
return false;
|
||||
}
|
||||
const matches = url.match(/\//g);
|
||||
return matches !== null && matches.length >= 4;
|
||||
}
|
||||
|
|
@ -8,7 +8,7 @@ import type { DataIngestionServices } from '../../types';
|
|||
import { fetchCountries, spiderUrl } from './actions';
|
||||
|
||||
@Handler('te')
|
||||
@Disabled()
|
||||
// @Disabled()
|
||||
export class TeHandler extends BaseHandler<DataIngestionServices> {
|
||||
constructor(services: any) {
|
||||
super(services);
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ Data Ingestion
|
|||
- CEO - watch all and save them to posts. do it every minute
|
||||
- Test our handler specific rate limits
|
||||
- Fix up handler worker counts
|
||||
- In EOD get Symbols. instead of using their exchange in the symbol list save the original we searched for to avoid issues
|
||||
|
||||
|
||||
Servers
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue