added deduplication and exchange stats
This commit is contained in:
parent
133f37e755
commit
100efb575f
7 changed files with 309 additions and 6 deletions
|
|
@ -77,8 +77,8 @@
|
||||||
"port": 6379,
|
"port": 6379,
|
||||||
"db": 1
|
"db": 1
|
||||||
},
|
},
|
||||||
"workers": 10,
|
"workers": 1,
|
||||||
"concurrency": 4,
|
"concurrency": 1,
|
||||||
"enableScheduledJobs": true,
|
"enableScheduledJobs": true,
|
||||||
"defaultJobOptions": {
|
"defaultJobOptions": {
|
||||||
"attempts": 3,
|
"attempts": 3,
|
||||||
|
|
|
||||||
|
|
@ -10,4 +10,5 @@ export { schedulePriceUpdates, updatePrices } from './prices.action';
|
||||||
export { checkSessions, createSession } from './session.action';
|
export { checkSessions, createSession } from './session.action';
|
||||||
export { scheduleSymbolInfoUpdates, updateSymbolInfo } from './symbol-info.action';
|
export { scheduleSymbolInfoUpdates, updateSymbolInfo } from './symbol-info.action';
|
||||||
export { searchSymbols, spiderSymbol } from './symbol.action';
|
export { searchSymbols, spiderSymbol } from './symbol.action';
|
||||||
|
export { deduplicateSymbols, updateExchangeStats, updateExchangeStatsAndDeduplicate } from './symbol-dedup.action';
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,269 @@
|
||||||
|
/**
|
||||||
|
* QM Symbol Deduplication and Exchange Statistics
|
||||||
|
* - Updates exchange statistics (symbol count, market cap)
|
||||||
|
* - Assigns priority to exchanges
|
||||||
|
* - Marks active/inactive symbols to eliminate duplicates
|
||||||
|
*/
|
||||||
|
|
||||||
|
import type { ExecutionContext } from '@stock-bot/handlers';
|
||||||
|
import type { QMHandler } from '../qm.handler';
|
||||||
|
import type { ExchangeStats } from '../shared/types';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Exchange priority mapping
|
||||||
|
*/
|
||||||
|
const EXCHANGE_PRIORITIES: Record<string, number> = {
|
||||||
|
// Major US exchanges
|
||||||
|
'NYSE': 10,
|
||||||
|
'NASDAQ': 10,
|
||||||
|
'NYSE American': 9,
|
||||||
|
|
||||||
|
// Other US exchanges
|
||||||
|
'EDGX': 5,
|
||||||
|
'BZX': 5,
|
||||||
|
'ARCA': 5,
|
||||||
|
|
||||||
|
// Canadian main exchanges
|
||||||
|
'TSX': 8,
|
||||||
|
'TSXV': 8,
|
||||||
|
'CSE': 7,
|
||||||
|
|
||||||
|
// Canadian quote exchanges
|
||||||
|
'CCQ TSX': 3,
|
||||||
|
'CCQ TSXV': 3,
|
||||||
|
|
||||||
|
// Default for others
|
||||||
|
'default': 1
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Update exchange statistics
|
||||||
|
*/
|
||||||
|
export async function updateExchangeStats(
|
||||||
|
this: QMHandler,
|
||||||
|
_input: any,
|
||||||
|
_context: ExecutionContext
|
||||||
|
): Promise<{ message: string; exchangesUpdated: number }> {
|
||||||
|
this.logger.info('Updating exchange statistics');
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Get all exchanges
|
||||||
|
const exchanges = await this.mongodb.find('qmExchanges', {});
|
||||||
|
|
||||||
|
if (!exchanges || exchanges.length === 0) {
|
||||||
|
return { message: 'No exchanges found', exchangesUpdated: 0 };
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate stats for each exchange
|
||||||
|
const bulkOps = [];
|
||||||
|
|
||||||
|
for (const exchange of exchanges) {
|
||||||
|
// Get symbol statistics for this exchange
|
||||||
|
const stats = await this.mongodb.aggregate('qmSymbols', [
|
||||||
|
{ $match: { exchange: exchange.exchange } },
|
||||||
|
{
|
||||||
|
$group: {
|
||||||
|
_id: null,
|
||||||
|
symbolCount: { $sum: 1 },
|
||||||
|
totalMarketCap: { $sum: { $ifNull: ['$marketCap', 0] } },
|
||||||
|
avgMarketCap: { $avg: { $ifNull: ['$marketCap', 0] } }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]);
|
||||||
|
|
||||||
|
const exchangeStats: ExchangeStats = stats[0] || {
|
||||||
|
symbolCount: 0,
|
||||||
|
totalMarketCap: 0,
|
||||||
|
avgMarketCap: 0
|
||||||
|
};
|
||||||
|
|
||||||
|
// Get priority for this exchange
|
||||||
|
const priority = EXCHANGE_PRIORITIES[exchange.exchange] || EXCHANGE_PRIORITIES.default;
|
||||||
|
|
||||||
|
// Prepare update
|
||||||
|
bulkOps.push({
|
||||||
|
updateOne: {
|
||||||
|
filter: { exchange: exchange.exchange },
|
||||||
|
update: {
|
||||||
|
$set: {
|
||||||
|
'stats.symbolCount': exchangeStats.symbolCount,
|
||||||
|
'stats.totalMarketCap': exchangeStats.totalMarketCap,
|
||||||
|
'stats.avgMarketCap': Math.round(exchangeStats.avgMarketCap),
|
||||||
|
priority,
|
||||||
|
updated_at: new Date()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Execute bulk update
|
||||||
|
if (bulkOps.length > 0) {
|
||||||
|
const collection = this.mongodb.collection('qmExchanges');
|
||||||
|
await collection.bulkWrite(bulkOps);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
this.logger.info(`Updated statistics for ${exchanges.length} exchanges`);
|
||||||
|
return {
|
||||||
|
message: `Updated statistics for ${exchanges.length} exchanges`,
|
||||||
|
exchangesUpdated: exchanges.length
|
||||||
|
};
|
||||||
|
|
||||||
|
} catch (error) {
|
||||||
|
this.logger.error('Failed to update exchange statistics', { error });
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Deduplicate symbols based on country and exchange rules
|
||||||
|
*/
|
||||||
|
export async function deduplicateSymbols(
|
||||||
|
this: QMHandler,
|
||||||
|
_input: any,
|
||||||
|
_context: ExecutionContext
|
||||||
|
): Promise<{ message: string; symbolsProcessed: number; deactivated: number }> {
|
||||||
|
this.logger.info('Starting symbol deduplication');
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Get all unique symbol names with their documents
|
||||||
|
const symbolGroups = await this.mongodb.aggregate('qmSymbols', [
|
||||||
|
{
|
||||||
|
$group: {
|
||||||
|
_id: '$symbol',
|
||||||
|
symbols: {
|
||||||
|
$push: {
|
||||||
|
qmSearchCode: '$qmSearchCode',
|
||||||
|
exchange: '$exchange',
|
||||||
|
countryCode: '$countryCode',
|
||||||
|
marketCap: { $ifNull: ['$marketCap', 0] },
|
||||||
|
_id: '$_id'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]);
|
||||||
|
|
||||||
|
let totalProcessed = 0;
|
||||||
|
let totalDeactivated = 0;
|
||||||
|
const bulkOps = [];
|
||||||
|
|
||||||
|
// Get exchange priorities for sorting
|
||||||
|
const exchangePriorities = await this.mongodb.find('qmExchanges', {}, {
|
||||||
|
projection: { exchange: 1, priority: 1 }
|
||||||
|
});
|
||||||
|
const priorityMap = new Map(exchangePriorities.map(e => [e.exchange, e.priority || 1]));
|
||||||
|
|
||||||
|
for (const group of symbolGroups) {
|
||||||
|
const symbol = group._id;
|
||||||
|
const documents = group.symbols;
|
||||||
|
|
||||||
|
// Group by country
|
||||||
|
const byCountry = new Map<string, any[]>();
|
||||||
|
for (const doc of documents) {
|
||||||
|
const country = doc.countryCode || 'UNKNOWN';
|
||||||
|
if (!byCountry.has(country)) {
|
||||||
|
byCountry.set(country, []);
|
||||||
|
}
|
||||||
|
byCountry.get(country)!.push(doc);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Process each country separately
|
||||||
|
for (const [country, countryDocs] of byCountry) {
|
||||||
|
if (countryDocs.length === 1) {
|
||||||
|
// Only one symbol in this country, mark as active
|
||||||
|
bulkOps.push({
|
||||||
|
updateOne: {
|
||||||
|
filter: { _id: countryDocs[0]._id },
|
||||||
|
update: { $set: { active: true } }
|
||||||
|
}
|
||||||
|
});
|
||||||
|
totalProcessed++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Multiple symbols in same country - need to choose one
|
||||||
|
let activeDoc = null;
|
||||||
|
|
||||||
|
if (country === 'US') {
|
||||||
|
// For US, prefer where qmSearchCode === symbol
|
||||||
|
activeDoc = countryDocs.find(doc => doc.qmSearchCode === symbol);
|
||||||
|
} else {
|
||||||
|
// For non-US, prefer symbol:COUNTRYCODE format
|
||||||
|
const expectedFormat = `${symbol}:${country}`;
|
||||||
|
activeDoc = countryDocs.find(doc => doc.qmSearchCode === expectedFormat);
|
||||||
|
}
|
||||||
|
|
||||||
|
// If no preferred format found, use highest priority exchange
|
||||||
|
if (!activeDoc) {
|
||||||
|
// Sort by exchange priority (desc) and market cap (desc)
|
||||||
|
countryDocs.sort((a, b) => {
|
||||||
|
const priorityA = priorityMap.get(a.exchange) || 1;
|
||||||
|
const priorityB = priorityMap.get(b.exchange) || 1;
|
||||||
|
if (priorityA !== priorityB) return priorityB - priorityA;
|
||||||
|
return (b.marketCap || 0) - (a.marketCap || 0);
|
||||||
|
});
|
||||||
|
activeDoc = countryDocs[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Mark active and inactive documents
|
||||||
|
for (const doc of countryDocs) {
|
||||||
|
const isActive = doc._id.equals(activeDoc._id);
|
||||||
|
bulkOps.push({
|
||||||
|
updateOne: {
|
||||||
|
filter: { _id: doc._id },
|
||||||
|
update: { $set: { active: isActive } }
|
||||||
|
}
|
||||||
|
});
|
||||||
|
totalProcessed++;
|
||||||
|
if (!isActive) totalDeactivated++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Execute bulk update
|
||||||
|
if (bulkOps.length > 0) {
|
||||||
|
const collection = this.mongodb.collection('qmSymbols');
|
||||||
|
await collection.bulkWrite(bulkOps, { ordered: false });
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
this.logger.info(`Deduplication complete: processed ${totalProcessed} symbols, deactivated ${totalDeactivated}`);
|
||||||
|
return {
|
||||||
|
message: `Processed ${totalProcessed} symbols, deactivated ${totalDeactivated} duplicates`,
|
||||||
|
symbolsProcessed: totalProcessed,
|
||||||
|
deactivated: totalDeactivated
|
||||||
|
};
|
||||||
|
|
||||||
|
} catch (error) {
|
||||||
|
this.logger.error('Failed to deduplicate symbols', { error });
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Run both exchange stats and symbol deduplication
|
||||||
|
*/
|
||||||
|
export async function updateExchangeStatsAndDeduplicate(
|
||||||
|
this: QMHandler,
|
||||||
|
input: any,
|
||||||
|
context: ExecutionContext
|
||||||
|
): Promise<{ message: string }> {
|
||||||
|
this.logger.info('Running exchange stats update and symbol deduplication');
|
||||||
|
|
||||||
|
try {
|
||||||
|
// First update exchange stats
|
||||||
|
const statsResult = await updateExchangeStats.call(this, input, context);
|
||||||
|
|
||||||
|
// Then deduplicate symbols
|
||||||
|
const dedupResult = await deduplicateSymbols.call(this, input, context);
|
||||||
|
|
||||||
|
return {
|
||||||
|
message: `${statsResult.message}. ${dedupResult.message}`
|
||||||
|
};
|
||||||
|
} catch (error) {
|
||||||
|
this.logger.error('Failed to update stats and deduplicate', { error });
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -9,6 +9,7 @@ import type { DataIngestionServices } from '../../types';
|
||||||
import {
|
import {
|
||||||
checkSessions,
|
checkSessions,
|
||||||
createSession,
|
createSession,
|
||||||
|
deduplicateSymbols,
|
||||||
scheduleCorporateActionsUpdates,
|
scheduleCorporateActionsUpdates,
|
||||||
scheduleFilingsUpdates,
|
scheduleFilingsUpdates,
|
||||||
scheduleFinancialsUpdates,
|
scheduleFinancialsUpdates,
|
||||||
|
|
@ -18,6 +19,8 @@ import {
|
||||||
searchSymbols,
|
searchSymbols,
|
||||||
spiderSymbol,
|
spiderSymbol,
|
||||||
updateCorporateActions,
|
updateCorporateActions,
|
||||||
|
updateExchangeStats,
|
||||||
|
updateExchangeStatsAndDeduplicate,
|
||||||
updateFilings,
|
updateFilings,
|
||||||
updateFinancials,
|
updateFinancials,
|
||||||
updateIntradayBars,
|
updateIntradayBars,
|
||||||
|
|
@ -63,6 +66,25 @@ export class QMHandler extends BaseHandler<DataIngestionServices> {
|
||||||
})
|
})
|
||||||
spiderSymbol = spiderSymbol;
|
spiderSymbol = spiderSymbol;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* EXCHANGE STATS & DEDUPLICATION
|
||||||
|
*/
|
||||||
|
@Operation('update-exchange-stats')
|
||||||
|
updateExchangeStats = updateExchangeStats;
|
||||||
|
|
||||||
|
@Operation('deduplicate-symbols')
|
||||||
|
deduplicateSymbols = deduplicateSymbols;
|
||||||
|
|
||||||
|
@Operation('update-exchange-stats-and-deduplicate')
|
||||||
|
updateExchangeStatsAndDeduplicate = updateExchangeStatsAndDeduplicate;
|
||||||
|
|
||||||
|
@ScheduledOperation('schedule-exchange-stats-and-dedup', '0 1 * * *', {
|
||||||
|
priority: 7,
|
||||||
|
immediately: false,
|
||||||
|
description: 'Update exchange statistics and deduplicate symbols daily at 1 AM'
|
||||||
|
})
|
||||||
|
scheduleExchangeStatsAndDedup = updateExchangeStatsAndDeduplicate;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* SYMBOL INFO
|
* SYMBOL INFO
|
||||||
*/
|
*/
|
||||||
|
|
|
||||||
|
|
@ -42,8 +42,8 @@ export const QM_CONFIG = {
|
||||||
|
|
||||||
// Session management settings
|
// Session management settings
|
||||||
export const SESSION_CONFIG = {
|
export const SESSION_CONFIG = {
|
||||||
MIN_SESSIONS: 100,
|
MIN_SESSIONS: 2,
|
||||||
MAX_SESSIONS: 100,
|
MAX_SESSIONS: 2,
|
||||||
MAX_FAILED_CALLS: 3,
|
MAX_FAILED_CALLS: 3,
|
||||||
SESSION_TIMEOUT: 5000, // 10 seconds
|
SESSION_TIMEOUT: 5000, // 10 seconds
|
||||||
API_TIMEOUT: 30000, // 15 seconds
|
API_TIMEOUT: 30000, // 15 seconds
|
||||||
|
|
|
||||||
|
|
@ -217,6 +217,7 @@ export class QMOperationTracker {
|
||||||
})();
|
})();
|
||||||
|
|
||||||
const filter: any = {
|
const filter: any = {
|
||||||
|
active: { $ne: false }, // Only active symbols (active: true or active doesn't exist)
|
||||||
$or: [
|
$or: [
|
||||||
{ [`operations.${operationName}.lastSuccessAt`]: { $lt: cutoffDate } },
|
{ [`operations.${operationName}.lastSuccessAt`]: { $lt: cutoffDate } },
|
||||||
{ [`operations.${operationName}.lastSuccessAt`]: { $exists: false } },
|
{ [`operations.${operationName}.lastSuccessAt`]: { $exists: false } },
|
||||||
|
|
@ -249,7 +250,9 @@ export class QMOperationTracker {
|
||||||
): Promise<IntradayCrawlSymbol[]> {
|
): Promise<IntradayCrawlSymbol[]> {
|
||||||
const { limit = 100, includeFinished = false } = options;
|
const { limit = 100, includeFinished = false } = options;
|
||||||
|
|
||||||
const filter: any = {};
|
const filter: any = {
|
||||||
|
active: { $ne: false } // Only active symbols
|
||||||
|
};
|
||||||
if (!includeFinished) {
|
if (!includeFinished) {
|
||||||
filter[`operations.${operationName}.crawlState.finished`] = { $ne: true };
|
filter[`operations.${operationName}.crawlState.finished`] = { $ne: true };
|
||||||
}
|
}
|
||||||
|
|
@ -308,7 +311,9 @@ export class QMOperationTracker {
|
||||||
} = {}
|
} = {}
|
||||||
): Promise<Array<{ qmSearchCode: string; lastRecordDate?: Date }>> {
|
): Promise<Array<{ qmSearchCode: string; lastRecordDate?: Date }>> {
|
||||||
const { limit = 500 } = options;
|
const { limit = 500 } = options;
|
||||||
const filter: any = {};
|
const filter: any = {
|
||||||
|
active: { $ne: false } // Only active symbols
|
||||||
|
};
|
||||||
|
|
||||||
if (options.neverRun) {
|
if (options.neverRun) {
|
||||||
filter[`operations.${operationName}`] = { $exists: false };
|
filter[`operations.${operationName}`] = { $exists: false };
|
||||||
|
|
|
||||||
|
|
@ -91,3 +91,9 @@ export interface IntradayCrawlSymbol {
|
||||||
lastCrawlDirection?: 'forward' | 'backward';
|
lastCrawlDirection?: 'forward' | 'backward';
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export interface ExchangeStats {
|
||||||
|
symbolCount: number;
|
||||||
|
totalMarketCap: number;
|
||||||
|
avgMarketCap: number;
|
||||||
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue