initial data-ingestion refactor

This commit is contained in:
Boki 2025-06-21 15:18:25 -04:00
parent 09d907a10c
commit 4f89affc2b
19 changed files with 309 additions and 549 deletions

View file

@ -3,11 +3,12 @@
*/
import { OperationContext } from '@stock-bot/utils';
import type { ServiceContainer } from '@stock-bot/connection-factory';
import { initializeQMResources } from './session.operations';
export async function fetchExchanges(): Promise<unknown[] | null> {
const ctx = OperationContext.create('qm', 'exchanges');
export async function fetchExchanges(container: ServiceContainer): Promise<unknown[] | null> {
const ctx = OperationContext.create('qm', 'exchanges', { container });
try {
// Ensure resources are initialized
@ -15,7 +16,7 @@ export async function fetchExchanges(): Promise<unknown[] | null> {
const sessionManager = QMSessionManager.getInstance();
if (!sessionManager.getInitialized()) {
await initializeQMResources();
await initializeQMResources(container);
}
ctx.logger.info('QM exchanges fetch - not implemented yet');
@ -37,5 +38,7 @@ export async function fetchExchanges(): Promise<unknown[] | null> {
} catch (error) {
ctx.logger.error('Failed to fetch QM exchanges', { error });
return null;
} finally {
await ctx.dispose();
}
}

View file

@ -5,13 +5,14 @@
import { OperationContext } from '@stock-bot/utils';
import { isShutdownSignalReceived } from '@stock-bot/shutdown';
import { getRandomProxy } from '@stock-bot/utils';
import type { ServiceContainer } from '@stock-bot/connection-factory';
import { QMSessionManager } from '../shared/session-manager';
import { QM_SESSION_IDS, QM_CONFIG, SESSION_CONFIG, getQmHeaders } from '../shared/config';
import type { QMSession } from '../shared/types';
export async function createSessions(): Promise<void> {
const ctx = OperationContext.create('qm', 'session');
export async function createSessions(container: ServiceContainer): Promise<void> {
const ctx = OperationContext.create('qm', 'session', { container });
try {
ctx.logger.info('Creating QM sessions...');
@ -21,7 +22,7 @@ export async function createSessions(): Promise<void> {
// Check if already initialized
if (!sessionManager.getInitialized()) {
await initializeQMResources();
await initializeQMResources(container);
}
// Clean up failed sessions first
@ -67,6 +68,8 @@ export async function createSessions(): Promise<void> {
} catch (error) {
ctx.logger.error('Failed to create QM sessions', { error });
throw error;
} finally {
await ctx.dispose();
}
}
@ -161,8 +164,8 @@ async function createSingleSession(
}
}
export async function initializeQMResources(): Promise<void> {
const ctx = OperationContext.create('qm', 'init');
export async function initializeQMResources(container?: ServiceContainer): Promise<void> {
const ctx = OperationContext.create('qm', 'init', container ? { container } : undefined);
// Check if already initialized
const alreadyInitialized = await ctx.cache.get('initialized');
@ -181,4 +184,6 @@ export async function initializeQMResources(): Promise<void> {
sessionManager.setInitialized(true);
ctx.logger.info('QM resources initialized successfully');
await ctx.dispose();
}

View file

@ -7,14 +7,16 @@ import { QueueManager } from '@stock-bot/queue';
import { QMSessionManager } from '../shared/session-manager';
import { QM_SESSION_IDS } from '../shared/config';
import type { ServiceContainer } from '@stock-bot/connection-factory';
import type { SymbolSpiderJob, SpiderResult } from '../shared/types';
import { initializeQMResources } from './session.operations';
import { searchQMSymbolsAPI } from './symbols.operations';
export async function spiderSymbolSearch(
payload: SymbolSpiderJob
payload: SymbolSpiderJob,
container: ServiceContainer
): Promise<SpiderResult> {
const ctx = OperationContext.create('qm', 'spider');
const ctx = OperationContext.create('qm', 'spider', { container });
try {
const { prefix, depth, source = 'qm', maxDepth = 4 } = payload;
@ -37,7 +39,7 @@ export async function spiderSymbolSearch(
// Ensure resources are initialized
const sessionManager = QMSessionManager.getInstance();
if (!sessionManager.getInitialized()) {
await initializeQMResources();
await initializeQMResources(container);
}
let result: SpiderResult;
@ -47,7 +49,7 @@ export async function spiderSymbolSearch(
result = await createAlphabetJobs(source, maxDepth, ctx);
} else {
// Leaf job: Search for symbols with this prefix
result = await searchAndSpawnJobs(prefix, depth, source, maxDepth, ctx);
result = await searchAndSpawnJobs(prefix, depth, source, maxDepth, ctx, container);
}
// Cache the result
@ -88,6 +90,8 @@ export async function spiderSymbolSearch(
await ctx.cache.set(cacheKey, failedResult, { ttl: 300 });
return failedResult;
} finally {
await ctx.dispose();
}
}
@ -154,7 +158,8 @@ async function searchAndSpawnJobs(
depth: number,
source: string,
maxDepth: number,
ctx: OperationContext
ctx: OperationContext,
container: ServiceContainer
): Promise<SpiderResult> {
try {
// Ensure sessions exist for symbol search
@ -164,14 +169,14 @@ async function searchAndSpawnJobs(
if (!lookupSession) {
ctx.logger.info('No lookup sessions available, creating sessions first...');
const { createSessions } = await import('./session.operations');
await createSessions();
await createSessions(container);
// Wait a bit for session creation
await new Promise(resolve => setTimeout(resolve, 1000));
}
// Search for symbols with this prefix
const symbols = await searchQMSymbolsAPI(prefix);
const symbols = await searchQMSymbolsAPI(prefix, container);
const symbolCount = symbols.length;
ctx.logger.info(`Prefix "${prefix}" returned ${symbolCount} symbols`);

View file

@ -4,6 +4,7 @@
import { OperationContext } from '@stock-bot/utils';
import { getRandomProxy } from '@stock-bot/utils';
import type { ServiceContainer } from '@stock-bot/connection-factory';
import { QMSessionManager } from '../shared/session-manager';
import { QM_SESSION_IDS, QM_CONFIG, SESSION_CONFIG } from '../shared/config';
@ -11,13 +12,13 @@ import type { SymbolSpiderJob, Exchange } from '../shared/types';
import { initializeQMResources } from './session.operations';
import { spiderSymbolSearch } from './spider.operations';
export async function fetchSymbols(): Promise<unknown[] | null> {
const ctx = OperationContext.create('qm', 'symbols');
export async function fetchSymbols(container: ServiceContainer): Promise<unknown[] | null> {
const ctx = OperationContext.create('qm', 'symbols', { container });
try {
const sessionManager = QMSessionManager.getInstance();
if (!sessionManager.getInitialized()) {
await initializeQMResources();
await initializeQMResources(container);
}
ctx.logger.info('Starting QM spider-based symbol search...');
@ -57,11 +58,13 @@ export async function fetchSymbols(): Promise<unknown[] | null> {
} catch (error) {
ctx.logger.error('Failed to start QM spider symbol search', { error });
return null;
} finally {
await ctx.dispose();
}
}
export async function searchQMSymbolsAPI(query: string): Promise<any[]> {
const ctx = OperationContext.create('qm', 'api-search');
export async function searchQMSymbolsAPI(query: string, container: ServiceContainer): Promise<any[]> {
const ctx = OperationContext.create('qm', 'api-search', { container });
const proxyInfo = await getRandomProxy();
if (!proxyInfo) {
@ -191,5 +194,7 @@ export async function searchQMSymbolsAPI(query: string): Promise<any[]> {
});
throw error;
} finally {
await ctx.dispose();
}
}

View file

@ -4,12 +4,13 @@ import {
handlerRegistry,
type HandlerConfigWithSchedule
} from '@stock-bot/queue';
import type { ServiceContainer } from '@stock-bot/connection-factory';
import type { SymbolSpiderJob } from './shared/types';
const handlerLogger = getLogger('qm-handler');
// Initialize and register the QM provider
export function initializeQMProvider() {
export function initializeQMProvider(container: ServiceContainer) {
handlerLogger.debug('Registering QM provider with scheduled jobs...');
const qmProviderConfig: HandlerConfigWithSchedule = {
@ -17,12 +18,12 @@ export function initializeQMProvider() {
operations: {
'create-sessions': createJobHandler(async () => {
const { createSessions } = await import('./operations/session.operations');
await createSessions();
await createSessions(container);
return { success: true, message: 'QM sessions created successfully' };
}),
'search-symbols': createJobHandler(async () => {
const { fetchSymbols } = await import('./operations/symbols.operations');
const symbols = await fetchSymbols();
const symbols = await fetchSymbols(container);
if (symbols && symbols.length > 0) {
return {
@ -41,9 +42,7 @@ export function initializeQMProvider() {
}),
'spider-symbol-search': createJobHandler(async (payload: SymbolSpiderJob) => {
const { spiderSymbolSearch } = await import('./operations/spider.operations');
const result = await spiderSymbolSearch(payload);
return result;
return await spiderSymbolSearch(payload, container);
}),
},

View file

@ -1,420 +0,0 @@
import { getRandomUserAgent } from '@stock-bot/http';
import { getLogger } from '@stock-bot/logger';
import { getMongoDBClient } from '@stock-bot/mongodb-client';
import { QueueManager } from '@stock-bot/queue';
import { isShutdownSignalReceived } from '@stock-bot/shutdown';
import { getRandomProxy } from '@stock-bot/utils';
// Shared instances (module-scoped, not global)
let isInitialized = false; // Track if resources are initialized
let logger: ReturnType<typeof getLogger>;
// let cache: CacheProvider;
export interface QMSession {
proxy: string;
headers: Record<string, string>;
successfulCalls: number;
failedCalls: number;
lastUsed: Date;
}
export interface SymbolSpiderJob {
prefix: string | null; // null = root job (A-Z)
depth: number; // 1=A, 2=AA, 3=AAA, etc.
source: string; // 'qm'
maxDepth?: number; // optional max depth limit
}
interface Exchange {
exchange: string;
exchangeCode: string;
exchangeShortName: string;
countryCode: string;
source: string;
}
function getQmHeaders(): Record<string, string> {
return {
'User-Agent': getRandomUserAgent(),
Accept: '*/*',
'Accept-Language': 'en',
'Sec-Fetch-Mode': 'cors',
Origin: 'https://www.quotemedia.com',
Referer: 'https://www.quotemedia.com/',
};
}
const sessionCache: Record<string, QMSession[]> = {
// '5ad521e05faf5778d567f6d0012ec34d6cdbaeb2462f41568f66558bc7b4ced9': [], //4488d072b
// cc1cbdaf040f76db8f4c94f7d156b9b9b716e1a7509ec9c74a48a47f6b6b9f87: [], //97ff00cf3 // getQuotes
// '74963ff42f1db2320d051762b5d3950ff9eab23f9d5c5b592551b4ca0441d086': [], //32ca24e394b // getSplitsBySymbol getBrokerRatingsBySymbol getDividendsBySymbol getEarningsSurprisesBySymbol getEarningsEventsBySymbol
// '1e1d7cb1de1fd2fe52684abdea41a446919a5fe12776dfab88615ac1ce1ec2f6': [], //fb5721812d2c // getEnhancedQuotes getProfiles
// a900a06cc6b3e8036afb9eeb1bbf9783f0007698ed8f5cb1e373dc790e7be2e5: [], //cc882cd95f9 // getEnhancedQuotes
// a863d519e38f80e45d10e280fb1afc729816e23f0218db2f3e8b23005a9ad8dd: [], //05a09a41225 // getCompanyFilings getEnhancedQuotes
// b3cdb1873f3682c5aeeac097be6181529bfb755945e5a412a24f4b9316291427: [], //6a63f56a6 // getHeadlinesTickerStory
dc8c9930437f65d30f6597768800957017bac203a0a50342932757c8dfa158d6: [], //fceb3c4bdd // lookup
// '97b24911d7b034620aafad9441afdb2bc906ee5c992d86933c5903254ca29709': [], //c56424868d // detailed-quotes
// '8a394f09cb8540c8be8988780660a7ae5b583c331a1f6cb12834f051a0169a8f': [], //2a86d214e50e5 // getGlobalIndustrySectorPeers getKeyRatiosBySymbol getGlobalIndustrySectorCodeList
// '2f059f75e2a839437095c9e7e4991d2365bafa7bbb086672a87ae0cf8d92eb01': [], // 48fa36d // getNethouseBySymbol
// d7ae7e0091dd1d7011948c3dc4af09b5ec552285d92bb188be2618968bc78e3f: [], // 63548ee //getRecentTradesBySymbol getQuotes getLevel2Quote getRecentTradesBySymbol
// d22d1db8f67fe6e420b4028e5129b289ca64862aa6cee8459193747b68c01de3: [], // 84e9e
// '6e0b22a7cbc02ac3fa07d45e2880b7696aaebeb29574dce81789e570570c9002': [], //
};
export async function initializeQMResources(): Promise<void> {
// Skip if already initialized
if (isInitialized) {
return;
}
logger = getLogger('qm-tasks');
isInitialized = true;
}
export async function createSessions(): Promise<void> {
try {
//for each session, check array length, if less than 5, create new session
if (!isInitialized) {
await initializeQMResources();
}
logger.info('Creating QM sessions...');
for (const [sessionId, sessionArray] of Object.entries(sessionCache)) {
const initialCount = sessionArray.length;
const filteredArray = sessionArray.filter(session => session.failedCalls <= 10);
sessionCache[sessionId] = filteredArray;
const removedCount = initialCount - filteredArray.length;
if (removedCount > 0) {
logger.info(
`Removed ${removedCount} sessions with excessive failures for ${sessionId}. Remaining: ${filteredArray.length}`
);
}
while (sessionCache[sessionId].length < 10) {
if(isShutdownSignalReceived()) {
logger.info('Shutting down, skipping session creation');
break; // Exit if shutting down
}
logger.info(`Creating new session for ${sessionId}`);
const proxyInfo = await getRandomProxy();
if (!proxyInfo) {
logger.error('No proxy available for QM session creation');
break; // Skip session creation if no proxy is available
}
// Convert ProxyInfo to string format
const auth = proxyInfo.username && proxyInfo.password ? `${proxyInfo.username}:${proxyInfo.password}@` : '';
const proxy = `${proxyInfo.protocol}://${auth}${proxyInfo.host}:${proxyInfo.port}`;
const newSession: QMSession = {
proxy: proxy, // Placeholder, should be set to a valid proxy
headers: getQmHeaders(),
successfulCalls: 0,
failedCalls: 0,
lastUsed: new Date(),
};
const sessionResponse = await fetch(
`https://app.quotemedia.com/auth/g/authenticate/dataTool/v0/500/${sessionId}`,
{
method: 'GET',
proxy: newSession.proxy,
headers: newSession.headers,
}
);
logger.debug('Session response received', {
status: sessionResponse.status,
sessionId,
});
if (!sessionResponse.ok) {
logger.error('Failed to create QM session', {
sessionId,
status: sessionResponse.status,
statusText: sessionResponse.statusText,
});
continue; // Skip this session if creation failed
}
const sessionData = await sessionResponse.json();
logger.info('QM session created successfully', {
sessionId,
sessionData,
proxy: newSession.proxy,
sessionCount: sessionCache[sessionId].length + 1,
});
newSession.headers['Datatool-Token'] = sessionData.token;
sessionCache[sessionId].push(newSession);
}
}
return undefined;
} catch (error) {
logger.error('❌ Failed to fetch QM session', { error });
return undefined;
}
}
// Spider-based symbol search functions
export async function spiderSymbolSearch(
payload: SymbolSpiderJob
): Promise<{ success: boolean; symbolsFound: number; jobsCreated: number }> {
try {
if (!isInitialized) {
await initializeQMResources();
}
const { prefix, depth, source = 'qm', maxDepth = 4 } = payload;
logger.info(`Starting spider search`, { prefix: prefix || 'ROOT', depth, source });
// Root job: Create A-Z jobs
if (prefix === null || prefix === undefined || prefix === '') {
return await createAlphabetJobs(source, maxDepth);
}
// Leaf job: Search for symbols with this prefix
return await searchAndSpawnJobs(prefix, depth, source, maxDepth);
} catch (error) {
logger.error('Spider symbol search failed', { error, payload });
return { success: false, symbolsFound: 0, jobsCreated: 0 };
}
}
async function createAlphabetJobs(
source: string,
maxDepth: number
): Promise<{ success: boolean; symbolsFound: number; jobsCreated: number }> {
try {
const queueManager = QueueManager.getInstance();
const queue = queueManager.getQueue('qm');
let jobsCreated = 0;
// Create jobs for A-Z
for (let i = 0; i < 26; i++) {
const letter = String.fromCharCode(65 + i); // A=65, B=66, etc.
const job: SymbolSpiderJob = {
prefix: letter,
depth: 1,
source,
maxDepth,
};
await queue.add(
'spider-symbol-search',
{
handler: 'qm',
operation: 'spider-symbol-search',
payload: job,
},
{
priority: 5,
delay: i * 100, // Stagger jobs by 100ms
attempts: 3,
backoff: { type: 'exponential', delay: 2000 },
}
);
jobsCreated++;
}
logger.info(`Created ${jobsCreated} alphabet jobs (A-Z)`);
return { success: true, symbolsFound: 0, jobsCreated };
} catch (error) {
logger.error('Failed to create alphabet jobs', { error });
return { success: false, symbolsFound: 0, jobsCreated: 0 };
}
}
async function searchAndSpawnJobs(
prefix: string,
depth: number,
source: string,
maxDepth: number
): Promise<{ success: boolean; symbolsFound: number; jobsCreated: number }> {
try {
// Ensure sessions exist
const sessionId = 'dc8c9930437f65d30f6597768800957017bac203a0a50342932757c8dfa158d6';
const currentSessions = sessionCache[sessionId] || [];
if (currentSessions.length === 0) {
logger.info('No sessions found, creating sessions first...');
await createSessions();
await new Promise(resolve => setTimeout(resolve, 1000));
}
// Search for symbols with this prefix
const symbols = await searchQMSymbolsAPI(prefix);
const symbolCount = symbols.length;
logger.info(`Prefix "${prefix}" returned ${symbolCount} symbols`);
let jobsCreated = 0;
// If we have 50+ symbols and haven't reached max depth, spawn sub-jobs
if (symbolCount >= 50 && depth < maxDepth) {
const queueManager = QueueManager.getInstance();
const queue = queueManager.getQueue('qm');
logger.info(`Spawning sub-jobs for prefix "${prefix}" (${symbolCount} >= 50 symbols)`);
// Create jobs for prefixA, prefixB, prefixC... prefixZ
for (let i = 0; i < 26; i++) {
const letter = String.fromCharCode(65 + i);
const newPrefix = prefix + letter;
const job: SymbolSpiderJob = {
prefix: newPrefix,
depth: depth + 1,
source,
maxDepth,
};
await queue.add(
'spider-symbol-search',
{
handler: 'qm',
operation: 'spider-symbol-search',
payload: job,
},
{
priority: Math.max(1, 6 - depth), // Higher priority for deeper jobs
delay: i * 50, // Stagger sub-jobs by 50ms
attempts: 3,
backoff: { type: 'exponential', delay: 2000 },
}
);
jobsCreated++;
}
logger.info(`Created ${jobsCreated} sub-jobs for prefix "${prefix}"`);
} else {
// Terminal case: save symbols and exchanges (already done in searchQMSymbolsAPI)
logger.info(`Terminal case for prefix "${prefix}": ${symbolCount} symbols saved`);
}
return { success: true, symbolsFound: symbolCount, jobsCreated };
} catch (error) {
logger.error(`Failed to search and spawn jobs for prefix "${prefix}"`, { error, depth });
return { success: false, symbolsFound: 0, jobsCreated: 0 };
}
}
// API call function to search symbols via QM
async function searchQMSymbolsAPI(query: string): Promise<string[]> {
const proxyInfo = await getRandomProxy();
if (!proxyInfo) {
throw new Error('No proxy available for QM API call');
}
const sessionId = 'dc8c9930437f65d30f6597768800957017bac203a0a50342932757c8dfa158d6'; // Use the session ID for symbol lookup
const session =
sessionCache[sessionId][Math.floor(Math.random() * sessionCache[sessionId].length)]; // lookup session
if (!session) {
throw new Error(`No active session found for QM API with ID: ${sessionId}`);
}
try {
// QM lookup endpoint for symbol search
const apiUrl = `https://app.quotemedia.com/datatool/lookup.json?marketType=equity&pathName=%2Fdemo%2Fportal%2Fcompany-summary.php&q=${encodeURIComponent(query)}&qmodTool=SmartSymbolLookup&searchType=symbol&showFree=false&showHisa=false&webmasterId=500`;
const response = await fetch(apiUrl, {
method: 'GET',
headers: session.headers,
proxy: session.proxy,
});
if (!response.ok) {
throw new Error(`QM API request failed: ${response.status} ${response.statusText}`);
}
const symbols = await response.json();
const mongoClient = getMongoDBClient();
const updatedSymbols = symbols.map((symbol: Record<string, unknown>) => {
return {
...symbol,
qmSearchCode: symbol.symbol, // Store original symbol for reference
symbol: symbol.symbol.split(':')[0], // Extract symbol from "symbol:exchange"
};
});
await mongoClient.batchUpsert('qmSymbols', updatedSymbols, ['qmSearchCode']);
const exchanges: Exchange[] = [];
for (const symbol of symbols) {
if (!exchanges.some(ex => ex.exchange === symbol.exchange)) {
exchanges.push({
exchange: symbol.exchange,
exchangeCode: symbol.exchangeCode,
exchangeShortName: symbol.exchangeShortName,
countryCode: symbol.countryCode,
source: 'qm',
});
}
}
await mongoClient.batchUpsert('qmExchanges', exchanges, ['exchange']);
session.successfulCalls++;
session.lastUsed = new Date();
logger.info(
`QM API returned ${symbols.length} symbols for query: ${query} with proxy ${session.proxy}`
);
return symbols;
} catch (error) {
logger.error(`Error searching QM symbols for query "${query}":`, error);
if (session) {
session.failedCalls++;
session.lastUsed = new Date();
}
throw error;
}
}
export async function fetchSymbols(): Promise<unknown[] | null> {
try {
if (!isInitialized) {
await initializeQMResources();
}
logger.info('🔄 Starting QM spider-based symbol search...');
// Start the spider process with root job
const rootJob: SymbolSpiderJob = {
prefix: null, // Root job creates A-Z jobs
depth: 0,
source: 'qm',
maxDepth: 4,
};
const result = await spiderSymbolSearch(rootJob);
if (result.success) {
logger.info(
`QM spider search initiated successfully. Created ${result.jobsCreated} initial jobs`
);
return [`Spider search initiated with ${result.jobsCreated} jobs`];
} else {
logger.error('Failed to initiate QM spider search');
return null;
}
} catch (error) {
logger.error('❌ Failed to start QM spider symbol search', { error });
return null;
}
}
export async function fetchExchanges(): Promise<unknown[] | null> {
try {
if (!isInitialized) {
await initializeQMResources();
}
logger.info('🔄 QM exchanges fetch - not implemented yet');
// TODO: Implement QM exchanges fetching logic
return null;
} catch (error) {
logger.error('❌ Failed to fetch QM exchanges', { error });
return null;
}
}
export const qmTasks = {
createSessions,
fetchSymbols,
fetchExchanges,
spiderSymbolSearch,
};