fixed up eod and added more aggressive skip logic to te
This commit is contained in:
parent
d47f77fdc7
commit
f4366f7289
8 changed files with 53 additions and 15 deletions
|
|
@ -1,6 +1,7 @@
|
||||||
import type { BaseHandler } from '@stock-bot/handlers';
|
import type { BaseHandler } from '@stock-bot/handlers';
|
||||||
import type { DataIngestionServices } from '../../../types';
|
import type { DataIngestionServices } from '../../../types';
|
||||||
import { EOD_CONFIG } from '../shared';
|
import { EOD_CONFIG } from '../shared';
|
||||||
|
import { getEodExchangeSuffix } from '../shared/utils';
|
||||||
|
|
||||||
interface FetchCorporateActionsInput {
|
interface FetchCorporateActionsInput {
|
||||||
symbol: string;
|
symbol: string;
|
||||||
|
|
@ -131,8 +132,8 @@ export async function fetchCorporateActions(
|
||||||
}
|
}
|
||||||
|
|
||||||
// Build URL based on action type
|
// Build URL based on action type
|
||||||
// For US symbols (Country: "USA"), use :US suffix instead of specific exchange code
|
// Use utility function to handle US symbols and EUFUND special case
|
||||||
const exchangeSuffix = symbolCountry === 'USA' ? 'US' : exchange;
|
const exchangeSuffix = getEodExchangeSuffix(exchange, symbolCountry);
|
||||||
|
|
||||||
const endpoint = actionType === 'dividends' ? 'div' : 'splits';
|
const endpoint = actionType === 'dividends' ? 'div' : 'splits';
|
||||||
const url = new URL(`https://eodhd.com/api/${endpoint}/${symbol}.${exchangeSuffix}`);
|
const url = new URL(`https://eodhd.com/api/${endpoint}/${symbol}.${exchangeSuffix}`);
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
import type { BaseHandler } from '@stock-bot/handlers';
|
import type { BaseHandler } from '@stock-bot/handlers';
|
||||||
import type { DataIngestionServices } from '../../../types';
|
import type { DataIngestionServices } from '../../../types';
|
||||||
import { EOD_CONFIG } from '../shared';
|
import { EOD_CONFIG } from '../shared';
|
||||||
|
import { getEodExchangeSuffix } from '../shared/utils';
|
||||||
|
|
||||||
interface BulkFundamentalsInput {
|
interface BulkFundamentalsInput {
|
||||||
symbols: Array<{ symbol: string; exchange: string; country?: string }>;
|
symbols: Array<{ symbol: string; exchange: string; country?: string }>;
|
||||||
|
|
@ -147,8 +148,8 @@ export async function fetchBulkFundamentals(
|
||||||
if (!acc[exchange]) {
|
if (!acc[exchange]) {
|
||||||
acc[exchange] = [];
|
acc[exchange] = [];
|
||||||
}
|
}
|
||||||
// For US symbols (Country: "USA"), use :US suffix
|
// Use utility function to handle US symbols and EUFUND special case
|
||||||
const exchangeSuffix = country === 'USA' ? 'US' : exchange;
|
const exchangeSuffix = getEodExchangeSuffix(exchange, country);
|
||||||
acc[exchange].push(`${symbol}.${exchangeSuffix}`);
|
acc[exchange].push(`${symbol}.${exchangeSuffix}`);
|
||||||
return acc;
|
return acc;
|
||||||
}, {} as Record<string, string[]>);
|
}, {} as Record<string, string[]>);
|
||||||
|
|
@ -281,8 +282,8 @@ export async function fetchSingleFundamentals(
|
||||||
}
|
}
|
||||||
|
|
||||||
// Build URL for single fundamentals endpoint
|
// Build URL for single fundamentals endpoint
|
||||||
// For US symbols (Country: "USA"), use :US suffix instead of specific exchange code
|
// Use utility function to handle US symbols and EUFUND special case
|
||||||
const exchangeSuffix = symbolCountry === 'USA' ? 'US' : exchange;
|
const exchangeSuffix = getEodExchangeSuffix(exchange, symbolCountry);
|
||||||
|
|
||||||
const url = new URL(`https://eodhd.com/api/fundamentals/${symbol}.${exchangeSuffix}`);
|
const url = new URL(`https://eodhd.com/api/fundamentals/${symbol}.${exchangeSuffix}`);
|
||||||
url.searchParams.append('api_token', apiKey);
|
url.searchParams.append('api_token', apiKey);
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
import type { BaseHandler } from '@stock-bot/handlers';
|
import type { BaseHandler } from '@stock-bot/handlers';
|
||||||
import type { DataIngestionServices } from '../../../types';
|
import type { DataIngestionServices } from '../../../types';
|
||||||
import { EOD_CONFIG } from '../shared';
|
import { EOD_CONFIG } from '../shared';
|
||||||
|
import { getEodExchangeSuffix } from '../shared/utils';
|
||||||
|
|
||||||
interface FetchIntradayInput {
|
interface FetchIntradayInput {
|
||||||
symbol: string;
|
symbol: string;
|
||||||
|
|
@ -272,8 +273,8 @@ export async function fetchIntraday(
|
||||||
}
|
}
|
||||||
|
|
||||||
// Build URL
|
// Build URL
|
||||||
// For US symbols (Country: "USA"), use :US suffix instead of specific exchange code
|
// Use utility function to handle US symbols and EUFUND special case
|
||||||
const exchangeSuffix = symbolCountry === 'USA' ? 'US' : exchange;
|
const exchangeSuffix = getEodExchangeSuffix(exchange, symbolCountry);
|
||||||
|
|
||||||
const url = new URL(`https://eodhd.com/api/intraday/${symbol}.${exchangeSuffix}`);
|
const url = new URL(`https://eodhd.com/api/intraday/${symbol}.${exchangeSuffix}`);
|
||||||
url.searchParams.append('api_token', apiKey);
|
url.searchParams.append('api_token', apiKey);
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
import type { BaseHandler } from '@stock-bot/handlers';
|
import type { BaseHandler } from '@stock-bot/handlers';
|
||||||
import type { DataIngestionServices } from '../../../types';
|
import type { DataIngestionServices } from '../../../types';
|
||||||
import { EOD_CONFIG } from '../shared';
|
import { EOD_CONFIG } from '../shared';
|
||||||
|
import { getEodExchangeSuffix } from '../shared/utils';
|
||||||
|
|
||||||
interface FetchPricesInput {
|
interface FetchPricesInput {
|
||||||
symbol: string;
|
symbol: string;
|
||||||
|
|
@ -112,8 +113,8 @@ export async function fetchPrices(
|
||||||
}
|
}
|
||||||
|
|
||||||
// Build URL for EOD price data
|
// Build URL for EOD price data
|
||||||
// For US symbols (Country: "USA"), use :US suffix instead of specific exchange code
|
// Use utility function to handle US symbols and EUFUND special case
|
||||||
const exchangeSuffix = symbolCountry === 'USA' ? 'US' : exchange;
|
const exchangeSuffix = getEodExchangeSuffix(exchange, symbolCountry);
|
||||||
|
|
||||||
const url = new URL(`https://eodhd.com/api/eod/${symbol}.${exchangeSuffix}`);
|
const url = new URL(`https://eodhd.com/api/eod/${symbol}.${exchangeSuffix}`);
|
||||||
url.searchParams.append('api_token', apiKey);
|
url.searchParams.append('api_token', apiKey);
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,14 @@
|
||||||
/**
|
/**
|
||||||
* Get the exchange suffix for EOD API calls based on country
|
* Get the exchange suffix for EOD API calls based on country and exchange
|
||||||
* US symbols use :US suffix, others use their actual exchange code
|
* US symbols use :US suffix, except EUFUND and GBOND which always use their own codes
|
||||||
|
* Others use their actual exchange code
|
||||||
*/
|
*/
|
||||||
export function getEodExchangeSuffix(exchange: string, country?: string): string {
|
export function getEodExchangeSuffix(exchange: string, country?: string): string {
|
||||||
|
// Special cases that always use their own exchange code
|
||||||
|
if (exchange === 'EUFUND' || exchange === 'GBOND') {
|
||||||
|
return exchange;
|
||||||
|
}
|
||||||
|
// US symbols use :US suffix
|
||||||
return country === 'USA' ? 'US' : exchange;
|
return country === 'USA' ? 'US' : exchange;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -8,14 +8,22 @@ export async function spiderUrl(this: TeHandler, payload: { url: string }): Prom
|
||||||
const reqUrl = payload && payload.url ? TE_CONFIG.MAIN_URL + payload.url : TE_CONFIG.MAIN_URL;
|
const reqUrl = payload && payload.url ? TE_CONFIG.MAIN_URL + payload.url : TE_CONFIG.MAIN_URL;
|
||||||
this.logger.info(`Spiderring URL: ${reqUrl}`, {reqUrl});
|
this.logger.info(`Spiderring URL: ${reqUrl}`, {reqUrl});
|
||||||
|
|
||||||
|
// if( mongoRecord){
|
||||||
|
// const url = mongoRecord.url;
|
||||||
|
// if (shouldSkipUrl(url)) {
|
||||||
|
// logger.info(`Skipping URL ${url} as its too deep`);
|
||||||
|
// return null;
|
||||||
|
// }
|
||||||
|
// }
|
||||||
const mongoRecord = await mongodb?.findOne('teUrls', { url: payload?.url || '/' });
|
const mongoRecord = await mongodb?.findOne('teUrls', { url: payload?.url || '/' });
|
||||||
if(payload && payload.url && mongoRecord && mongoRecord.lastCrawled && mongoRecord.lastCrawled.getTime() > Date.now() - 7* 24 * 60 * 60 * 1000) {
|
if(payload && payload.url && mongoRecord && mongoRecord.lastCrawled && mongoRecord.lastCrawled.getTime() > Date.now() - 30 * 24 * 60 * 60 * 1000) {
|
||||||
this.logger.info(`Skipping URL ${reqUrl} as it was already crawled in the last 24 hours`);
|
this.logger.info(`Skipping URL ${reqUrl} as it was already crawled in the last 24 hours`);
|
||||||
return null; // Skip if already crawled in the last 24 hours
|
return null; // Skip if already crawled in the last 24 hours
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
if (!payload) {
|
if (!payload) {
|
||||||
const oneDayAgo = new Date(Date.now() - 7 * 24 * 60 * 60 * 1000);
|
const oneDayAgo = new Date(Date.now() - 30 * 24 * 60 * 60 * 1000);
|
||||||
const records = await mongodb?.find('teUrls', {
|
const records = await mongodb?.find('teUrls', {
|
||||||
$or: [
|
$or: [
|
||||||
{ lastCrawled: { $lt: oneDayAgo } }, // Crawled more than 24 hours ago
|
{ lastCrawled: { $lt: oneDayAgo } }, // Crawled more than 24 hours ago
|
||||||
|
|
@ -24,6 +32,12 @@ export async function spiderUrl(this: TeHandler, payload: { url: string }): Prom
|
||||||
});
|
});
|
||||||
this.logger.info(`Found ${records?.length || 0} records to process`);
|
this.logger.info(`Found ${records?.length || 0} records to process`);
|
||||||
for (const record of records || []) {
|
for (const record of records || []) {
|
||||||
|
const url = record.url;
|
||||||
|
if (shouldSkipUrl(url)) {
|
||||||
|
logger.info(`Skipping URL ${url} as its too deep`);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
await this.scheduleOperation('te-spider', {
|
await this.scheduleOperation('te-spider', {
|
||||||
url: record.url,
|
url: record.url,
|
||||||
}, {
|
}, {
|
||||||
|
|
@ -121,6 +135,10 @@ export async function spiderUrl(this: TeHandler, payload: { url: string }): Prom
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const url of urls) {
|
for (const url of urls) {
|
||||||
|
if (shouldSkipUrl(url)) {
|
||||||
|
logger.info(`Skipping URL ${url} as its too deep`);
|
||||||
|
continue; // Skip if it's a subpage or already crawled
|
||||||
|
}
|
||||||
this.scheduleOperation('te-spider', {
|
this.scheduleOperation('te-spider', {
|
||||||
url: url,
|
url: url,
|
||||||
}, {
|
}, {
|
||||||
|
|
@ -137,4 +155,13 @@ export async function spiderUrl(this: TeHandler, payload: { url: string }): Prom
|
||||||
});
|
});
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function shouldSkipUrl(url?: string): boolean {
|
||||||
|
// Skip if it's a subpage or already crawled in the last 24 hours
|
||||||
|
if (!url) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
const matches = url.match(/\//g);
|
||||||
|
return matches !== null && matches.length >= 4;
|
||||||
}
|
}
|
||||||
|
|
@ -8,7 +8,7 @@ import type { DataIngestionServices } from '../../types';
|
||||||
import { fetchCountries, spiderUrl } from './actions';
|
import { fetchCountries, spiderUrl } from './actions';
|
||||||
|
|
||||||
@Handler('te')
|
@Handler('te')
|
||||||
@Disabled()
|
// @Disabled()
|
||||||
export class TeHandler extends BaseHandler<DataIngestionServices> {
|
export class TeHandler extends BaseHandler<DataIngestionServices> {
|
||||||
constructor(services: any) {
|
constructor(services: any) {
|
||||||
super(services);
|
super(services);
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,7 @@ Data Ingestion
|
||||||
- CEO - watch all and save them to posts. do it every minute
|
- CEO - watch all and save them to posts. do it every minute
|
||||||
- Test our handler specific rate limits
|
- Test our handler specific rate limits
|
||||||
- Fix up handler worker counts
|
- Fix up handler worker counts
|
||||||
|
- In EOD get Symbols. instead of using their exchange in the symbol list save the original we searched for to avoid issues
|
||||||
|
|
||||||
|
|
||||||
Servers
|
Servers
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue