work on ib and cleanup
This commit is contained in:
parent
a20a11c1aa
commit
d686a72591
41 changed files with 601 additions and 2793 deletions
|
|
@ -1,249 +0,0 @@
|
|||
import type { Document } from 'mongodb';
|
||||
import type { MongoDBClient } from './client';
|
||||
import type { CollectionNames } from './types';
|
||||
|
||||
/**
|
||||
* MongoDB Aggregation Builder
|
||||
*
|
||||
* Provides a fluent interface for building MongoDB aggregation pipelines
|
||||
*/
|
||||
export class MongoDBAggregationBuilder {
|
||||
private pipeline: any[] = [];
|
||||
private readonly client: MongoDBClient;
|
||||
private collection: CollectionNames | null = null;
|
||||
|
||||
constructor(client: MongoDBClient) {
|
||||
this.client = client;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the collection to aggregate on
|
||||
*/
|
||||
from(collection: CollectionNames): this {
|
||||
this.collection = collection;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a match stage
|
||||
*/
|
||||
match(filter: any): this {
|
||||
this.pipeline.push({ $match: filter });
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a group stage
|
||||
*/
|
||||
group(groupBy: any): this {
|
||||
this.pipeline.push({ $group: groupBy });
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a sort stage
|
||||
*/
|
||||
sort(sortBy: any): this {
|
||||
this.pipeline.push({ $sort: sortBy });
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a limit stage
|
||||
*/
|
||||
limit(count: number): this {
|
||||
this.pipeline.push({ $limit: count });
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a skip stage
|
||||
*/
|
||||
skip(count: number): this {
|
||||
this.pipeline.push({ $skip: count });
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a project stage
|
||||
*/
|
||||
project(projection: any): this {
|
||||
this.pipeline.push({ $project: projection });
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add an unwind stage
|
||||
*/
|
||||
unwind(field: string, options?: any): this {
|
||||
this.pipeline.push({
|
||||
$unwind: options ? { path: field, ...options } : field,
|
||||
});
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a lookup stage (join)
|
||||
*/
|
||||
lookup(from: string, localField: string, foreignField: string, as: string): this {
|
||||
this.pipeline.push({
|
||||
$lookup: {
|
||||
from,
|
||||
localField,
|
||||
foreignField,
|
||||
as,
|
||||
},
|
||||
});
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a custom stage
|
||||
*/
|
||||
addStage(stage: any): this {
|
||||
this.pipeline.push(stage);
|
||||
return this;
|
||||
}
|
||||
/**
|
||||
* Execute the aggregation pipeline
|
||||
*/
|
||||
async execute<T extends Document = Document>(): Promise<T[]> {
|
||||
if (!this.collection) {
|
||||
throw new Error('Collection not specified. Use .from() to set the collection.');
|
||||
}
|
||||
|
||||
const collection = this.client.getCollection(this.collection);
|
||||
return await collection.aggregate<T>(this.pipeline).toArray();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the pipeline array
|
||||
*/
|
||||
getPipeline(): any[] {
|
||||
return [...this.pipeline];
|
||||
}
|
||||
|
||||
/**
|
||||
* Reset the pipeline
|
||||
*/
|
||||
reset(): this {
|
||||
this.pipeline = [];
|
||||
this.collection = null;
|
||||
return this;
|
||||
}
|
||||
|
||||
// Convenience methods for common aggregations
|
||||
|
||||
/**
|
||||
* Sentiment analysis aggregation
|
||||
*/
|
||||
sentimentAnalysis(symbol?: string, timeframe?: { start: Date; end: Date }): this {
|
||||
this.from('sentiment_data');
|
||||
|
||||
const matchConditions: any = {};
|
||||
if (symbol) {
|
||||
matchConditions.symbol = symbol;
|
||||
}
|
||||
if (timeframe) {
|
||||
matchConditions.timestamp = {
|
||||
$gte: timeframe.start,
|
||||
$lte: timeframe.end,
|
||||
};
|
||||
}
|
||||
|
||||
if (Object.keys(matchConditions).length > 0) {
|
||||
this.match(matchConditions);
|
||||
}
|
||||
|
||||
return this.group({
|
||||
_id: {
|
||||
symbol: '$symbol',
|
||||
sentiment: '$sentiment_label',
|
||||
},
|
||||
count: { $sum: 1 },
|
||||
avgScore: { $avg: '$sentiment_score' },
|
||||
avgConfidence: { $avg: '$confidence' },
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* News article aggregation by publication
|
||||
*/
|
||||
newsByPublication(symbols?: string[]): this {
|
||||
this.from('news_articles');
|
||||
|
||||
if (symbols && symbols.length > 0) {
|
||||
this.match({ symbols: { $in: symbols } });
|
||||
}
|
||||
|
||||
return this.group({
|
||||
_id: '$publication',
|
||||
articleCount: { $sum: 1 },
|
||||
symbols: { $addToSet: '$symbols' },
|
||||
avgSentiment: { $avg: '$sentiment_score' },
|
||||
latestArticle: { $max: '$published_date' },
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* SEC filings by company
|
||||
*/
|
||||
secFilingsByCompany(filingTypes?: string[]): this {
|
||||
this.from('sec_filings');
|
||||
|
||||
if (filingTypes && filingTypes.length > 0) {
|
||||
this.match({ filing_type: { $in: filingTypes } });
|
||||
}
|
||||
|
||||
return this.group({
|
||||
_id: {
|
||||
cik: '$cik',
|
||||
company: '$company_name',
|
||||
},
|
||||
filingCount: { $sum: 1 },
|
||||
filingTypes: { $addToSet: '$filing_type' },
|
||||
latestFiling: { $max: '$filing_date' },
|
||||
symbols: { $addToSet: '$symbols' },
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Document processing status summary
|
||||
*/
|
||||
processingStatusSummary(collection: CollectionNames): this {
|
||||
this.from(collection);
|
||||
|
||||
return this.group({
|
||||
_id: '$processing_status',
|
||||
count: { $sum: 1 },
|
||||
avgSizeBytes: { $avg: '$size_bytes' },
|
||||
oldestDocument: { $min: '$created_at' },
|
||||
newestDocument: { $max: '$created_at' },
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Time-based aggregation (daily/hourly counts)
|
||||
*/
|
||||
timeBasedCounts(
|
||||
collection: CollectionNames,
|
||||
dateField: string = 'created_at',
|
||||
interval: 'hour' | 'day' | 'week' | 'month' = 'day'
|
||||
): this {
|
||||
this.from(collection);
|
||||
|
||||
const dateFormat = {
|
||||
hour: { $dateToString: { format: '%Y-%m-%d %H:00:00', date: `$${dateField}` } },
|
||||
day: { $dateToString: { format: '%Y-%m-%d', date: `$${dateField}` } },
|
||||
week: { $dateToString: { format: '%Y-W%V', date: `$${dateField}` } },
|
||||
month: { $dateToString: { format: '%Y-%m', date: `$${dateField}` } },
|
||||
};
|
||||
|
||||
return this.group({
|
||||
_id: dateFormat[interval],
|
||||
count: { $sum: 1 },
|
||||
firstDocument: { $min: `$${dateField}` },
|
||||
lastDocument: { $max: `$${dateField}` },
|
||||
}).sort({ _id: 1 });
|
||||
}
|
||||
}
|
||||
|
|
@ -1,110 +1,68 @@
|
|||
import {
|
||||
Collection,
|
||||
Db,
|
||||
Document,
|
||||
MongoClient,
|
||||
MongoClientOptions,
|
||||
OptionalUnlessRequiredId,
|
||||
WithId,
|
||||
} from 'mongodb';
|
||||
import * as yup from 'yup';
|
||||
import { Collection, Db, MongoClient, OptionalUnlessRequiredId } from 'mongodb';
|
||||
import { mongodbConfig } from '@stock-bot/config';
|
||||
import { getLogger } from '@stock-bot/logger';
|
||||
import { MongoDBHealthMonitor } from './health';
|
||||
import { schemaMap } from './schemas';
|
||||
import type {
|
||||
AnalystReport,
|
||||
CollectionNames,
|
||||
DocumentBase,
|
||||
EarningsTranscript,
|
||||
MongoDBClientConfig,
|
||||
MongoDBConnectionOptions,
|
||||
NewsArticle,
|
||||
RawDocument,
|
||||
SecFiling,
|
||||
SentimentData,
|
||||
} from './types';
|
||||
import type { DocumentBase } from './types';
|
||||
|
||||
/**
|
||||
* MongoDB Client for Stock Bot
|
||||
* Simplified MongoDB Client for Stock Bot Data Service
|
||||
*
|
||||
* Provides type-safe access to MongoDB collections with built-in
|
||||
* health monitoring, connection pooling, and schema validation.
|
||||
* A singleton MongoDB client focused solely on batch upsert operations
|
||||
* with minimal configuration and no health monitoring complexity.
|
||||
*/
|
||||
export class MongoDBClient {
|
||||
private static instance: MongoDBClient | null = null;
|
||||
private client: MongoClient | null = null;
|
||||
private db: Db | null = null;
|
||||
private readonly config: MongoDBClientConfig;
|
||||
private readonly options: MongoDBConnectionOptions;
|
||||
private readonly logger: ReturnType<typeof getLogger>;
|
||||
private readonly healthMonitor: MongoDBHealthMonitor;
|
||||
private readonly logger = getLogger('mongodb-client-simple');
|
||||
private isConnected = false;
|
||||
|
||||
constructor(config?: Partial<MongoDBClientConfig>, options?: MongoDBConnectionOptions) {
|
||||
this.config = this.buildConfig(config);
|
||||
this.options = {
|
||||
retryAttempts: 3,
|
||||
retryDelay: 1000,
|
||||
healthCheckInterval: 30000,
|
||||
...options,
|
||||
};
|
||||
private constructor() {}
|
||||
|
||||
this.logger = getLogger('mongodb-client');
|
||||
this.healthMonitor = new MongoDBHealthMonitor(this);
|
||||
/**
|
||||
* Get singleton instance
|
||||
*/
|
||||
static getInstance(): MongoDBClient {
|
||||
if (!MongoDBClient.instance) {
|
||||
MongoDBClient.instance = new MongoDBClient();
|
||||
}
|
||||
return MongoDBClient.instance;
|
||||
}
|
||||
|
||||
/**
|
||||
* Connect to MongoDB
|
||||
* Connect to MongoDB with simple configuration
|
||||
*/
|
||||
async connect(): Promise<void> {
|
||||
if (this.isConnected && this.client) {
|
||||
return;
|
||||
}
|
||||
|
||||
const uri = this.buildConnectionUri();
|
||||
const clientOptions = this.buildClientOptions();
|
||||
try {
|
||||
const uri = this.buildConnectionUri();
|
||||
this.logger.info('Connecting to MongoDB...');
|
||||
|
||||
let lastError: Error | null = null;
|
||||
this.client = new MongoClient(uri, {
|
||||
maxPoolSize: 10,
|
||||
minPoolSize: 1,
|
||||
connectTimeoutMS: 10000,
|
||||
socketTimeoutMS: 30000,
|
||||
serverSelectionTimeoutMS: 5000,
|
||||
});
|
||||
|
||||
for (let attempt = 1; attempt <= this.options.retryAttempts!; attempt++) {
|
||||
try {
|
||||
this.logger.info(
|
||||
`Connecting to MongoDB (attempt ${attempt}/${this.options.retryAttempts})...`
|
||||
);
|
||||
await this.client.connect();
|
||||
await this.client.db(mongodbConfig.MONGODB_DATABASE).admin().ping();
|
||||
|
||||
this.client = new MongoClient(uri, clientOptions);
|
||||
await this.client.connect();
|
||||
this.db = this.client.db(mongodbConfig.MONGODB_DATABASE);
|
||||
this.isConnected = true;
|
||||
|
||||
// Test the connection
|
||||
await this.client.db(this.config.database).admin().ping();
|
||||
|
||||
this.db = this.client.db(this.config.database);
|
||||
this.isConnected = true;
|
||||
|
||||
this.logger.info('Successfully connected to MongoDB');
|
||||
|
||||
// Start health monitoring
|
||||
this.healthMonitor.start();
|
||||
|
||||
return;
|
||||
} catch (error) {
|
||||
lastError = error as Error;
|
||||
this.logger.error(`MongoDB connection attempt ${attempt} failed:`, error);
|
||||
|
||||
if (this.client) {
|
||||
await this.client.close();
|
||||
this.client = null;
|
||||
}
|
||||
|
||||
if (attempt < this.options.retryAttempts!) {
|
||||
await this.delay(this.options.retryDelay! * attempt);
|
||||
}
|
||||
this.logger.info('Successfully connected to MongoDB');
|
||||
} catch (error) {
|
||||
this.logger.error('MongoDB connection failed:', error);
|
||||
if (this.client) {
|
||||
await this.client.close();
|
||||
this.client = null;
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
|
||||
throw new Error(
|
||||
`Failed to connect to MongoDB after ${this.options.retryAttempts} attempts: ${lastError?.message}`
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -116,7 +74,6 @@ export class MongoDBClient {
|
|||
}
|
||||
|
||||
try {
|
||||
this.healthMonitor.stop();
|
||||
await this.client.close();
|
||||
this.isConnected = false;
|
||||
this.client = null;
|
||||
|
|
@ -128,10 +85,138 @@ export class MongoDBClient {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Batch upsert documents for high-performance operations
|
||||
* Supports single or multiple unique keys for matching
|
||||
*/
|
||||
async batchUpsert<T extends DocumentBase>(
|
||||
collectionName: string,
|
||||
documents: Array<
|
||||
Omit<T, '_id' | 'created_at' | 'updated_at'> & Partial<Pick<T, 'created_at' | 'updated_at'>>
|
||||
>,
|
||||
uniqueKeys: string | string[],
|
||||
options: {
|
||||
chunkSize?: number;
|
||||
} = {}
|
||||
): Promise<{ insertedCount: number; updatedCount: number; errors: unknown[] }> {
|
||||
if (!this.db) {
|
||||
throw new Error('MongoDB client not connected');
|
||||
}
|
||||
|
||||
if (documents.length === 0) {
|
||||
return { insertedCount: 0, updatedCount: 0, errors: [] };
|
||||
}
|
||||
|
||||
// Normalize uniqueKeys to array
|
||||
const keyFields = Array.isArray(uniqueKeys) ? uniqueKeys : [uniqueKeys];
|
||||
|
||||
if (keyFields.length === 0) {
|
||||
throw new Error('At least one unique key must be provided');
|
||||
}
|
||||
|
||||
const { chunkSize = 10000 } = options;
|
||||
const collection = this.db.collection<T>(collectionName);
|
||||
const operationId = Math.random().toString(36).substring(7);
|
||||
|
||||
let totalInserted = 0;
|
||||
let totalUpdated = 0;
|
||||
const errors: unknown[] = [];
|
||||
|
||||
this.logger.info(`Starting batch upsert operation [${operationId}]`, {
|
||||
collection: collectionName,
|
||||
totalDocuments: documents.length,
|
||||
uniqueKeys: keyFields,
|
||||
chunkSize
|
||||
});
|
||||
|
||||
// Process documents in chunks to avoid memory issues
|
||||
for (let i = 0; i < documents.length; i += chunkSize) {
|
||||
const chunk = documents.slice(i, i + chunkSize);
|
||||
|
||||
try {
|
||||
const startTime = Date.now();
|
||||
|
||||
// Prepare bulk operations
|
||||
const bulkOps = chunk.map(doc => {
|
||||
const now = new Date();
|
||||
const docWithTimestamps = {
|
||||
...doc,
|
||||
created_at: doc.created_at || now,
|
||||
updated_at: now,
|
||||
};
|
||||
|
||||
// Create filter using multiple unique keys
|
||||
const filter: Record<string, unknown> = {};
|
||||
keyFields.forEach(key => {
|
||||
const value = (doc as Record<string, unknown>)[key];
|
||||
if (value === undefined || value === null) {
|
||||
throw new Error(`Document missing required unique key: ${key}`);
|
||||
}
|
||||
filter[key] = value;
|
||||
});
|
||||
|
||||
// Remove created_at from $set to avoid conflict with $setOnInsert
|
||||
const { created_at, ...updateFields } = docWithTimestamps;
|
||||
|
||||
return {
|
||||
updateOne: {
|
||||
filter,
|
||||
update: {
|
||||
$set: updateFields,
|
||||
$setOnInsert: { created_at },
|
||||
},
|
||||
upsert: true,
|
||||
},
|
||||
};
|
||||
});
|
||||
|
||||
// Execute bulk operation with type assertion to handle complex MongoDB types
|
||||
const result = await collection.bulkWrite(bulkOps as never, { ordered: false });
|
||||
|
||||
const executionTime = Date.now() - startTime;
|
||||
const inserted = result.upsertedCount;
|
||||
const updated = result.modifiedCount;
|
||||
|
||||
totalInserted += inserted;
|
||||
totalUpdated += updated;
|
||||
|
||||
this.logger.debug(`Batch upsert chunk processed [${operationId}]`, {
|
||||
chunkNumber: Math.floor(i / chunkSize) + 1,
|
||||
chunkSize: chunk.length,
|
||||
inserted,
|
||||
updated,
|
||||
executionTime,
|
||||
collection: collectionName,
|
||||
});
|
||||
} catch (error) {
|
||||
this.logger.error(`Batch upsert failed on chunk [${operationId}]`, {
|
||||
error,
|
||||
collection: collectionName,
|
||||
chunkNumber: Math.floor(i / chunkSize) + 1,
|
||||
chunkStart: i,
|
||||
chunkSize: chunk.length,
|
||||
uniqueKeys: keyFields,
|
||||
});
|
||||
errors.push(error);
|
||||
}
|
||||
}
|
||||
|
||||
this.logger.info(`Batch upsert completed [${operationId}]`, {
|
||||
collection: collectionName,
|
||||
totalRecords: documents.length,
|
||||
inserted: totalInserted,
|
||||
updated: totalUpdated,
|
||||
errors: errors.length,
|
||||
uniqueKeys: keyFields,
|
||||
});
|
||||
|
||||
return { insertedCount: totalInserted, updatedCount: totalUpdated, errors };
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a typed collection
|
||||
*/
|
||||
getCollection<T extends DocumentBase>(name: CollectionNames): Collection<T> {
|
||||
getCollection<T extends DocumentBase>(name: string): Collection<T> {
|
||||
if (!this.db) {
|
||||
throw new Error('MongoDB client not connected');
|
||||
}
|
||||
|
|
@ -139,162 +224,26 @@ export class MongoDBClient {
|
|||
}
|
||||
|
||||
/**
|
||||
* Insert a document with validation
|
||||
* Simple insert operation
|
||||
*/
|
||||
async insertOne<T extends DocumentBase>(
|
||||
collectionName: CollectionNames,
|
||||
collectionName: string,
|
||||
document: Omit<T, '_id' | 'created_at' | 'updated_at'> &
|
||||
Partial<Pick<T, 'created_at' | 'updated_at'>>
|
||||
): Promise<T> {
|
||||
const collection = this.getCollection<T>(collectionName);
|
||||
|
||||
// Add timestamps
|
||||
const now = new Date();
|
||||
const docWithTimestamps = {
|
||||
...document,
|
||||
created_at: document.created_at || now,
|
||||
updated_at: now,
|
||||
} as T; // Validate document if schema exists
|
||||
if (collectionName in schemaMap) {
|
||||
try {
|
||||
(schemaMap as any)[collectionName].validateSync(docWithTimestamps);
|
||||
} catch (error) {
|
||||
if (error instanceof yup.ValidationError) {
|
||||
this.logger.error(`Document validation failed for ${collectionName}:`, error.errors);
|
||||
throw new Error(`Document validation failed: ${error.errors?.map(e => e).join(', ')}`);
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
} as T;
|
||||
|
||||
const result = await collection.insertOne(docWithTimestamps as OptionalUnlessRequiredId<T>);
|
||||
return { ...docWithTimestamps, _id: result.insertedId } as T;
|
||||
}
|
||||
|
||||
/**
|
||||
* Update a document with validation
|
||||
*/
|
||||
async updateOne<T extends DocumentBase>(
|
||||
collectionName: CollectionNames,
|
||||
filter: any,
|
||||
update: Partial<T>
|
||||
): Promise<boolean> {
|
||||
const collection = this.getCollection<T>(collectionName);
|
||||
|
||||
// Add updated timestamp
|
||||
const updateWithTimestamp = {
|
||||
...update,
|
||||
updated_at: new Date(),
|
||||
};
|
||||
|
||||
const result = await collection.updateOne(filter, { $set: updateWithTimestamp });
|
||||
return result.modifiedCount > 0;
|
||||
}
|
||||
/**
|
||||
* Find documents with optional validation
|
||||
*/
|
||||
async find<T extends DocumentBase>(
|
||||
collectionName: CollectionNames,
|
||||
filter: any = {},
|
||||
options: any = {}
|
||||
): Promise<T[]> {
|
||||
const collection = this.getCollection<T>(collectionName);
|
||||
return (await collection.find(filter, options).toArray()) as T[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Find one document
|
||||
*/
|
||||
async findOne<T extends DocumentBase>(
|
||||
collectionName: CollectionNames,
|
||||
filter: any
|
||||
): Promise<T | null> {
|
||||
const collection = this.getCollection<T>(collectionName);
|
||||
return (await collection.findOne(filter)) as T | null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Aggregate with type safety
|
||||
*/
|
||||
async aggregate<T extends DocumentBase>(
|
||||
collectionName: CollectionNames,
|
||||
pipeline: any[]
|
||||
): Promise<T[]> {
|
||||
const collection = this.getCollection<T>(collectionName);
|
||||
return await collection.aggregate<T>(pipeline).toArray();
|
||||
}
|
||||
|
||||
/**
|
||||
* Count documents
|
||||
*/
|
||||
async countDocuments(collectionName: CollectionNames, filter: any = {}): Promise<number> {
|
||||
const collection = this.getCollection(collectionName);
|
||||
return await collection.countDocuments(filter);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create indexes for better performance
|
||||
*/
|
||||
async createIndexes(): Promise<void> {
|
||||
if (!this.db) {
|
||||
throw new Error('MongoDB client not connected');
|
||||
}
|
||||
|
||||
try {
|
||||
// Sentiment data indexes
|
||||
await this.db
|
||||
.collection('sentiment_data')
|
||||
.createIndexes([
|
||||
{ key: { symbol: 1, timestamp: -1 } },
|
||||
{ key: { sentiment_label: 1 } },
|
||||
{ key: { source_type: 1 } },
|
||||
{ key: { created_at: -1 } },
|
||||
]);
|
||||
|
||||
// News articles indexes
|
||||
await this.db
|
||||
.collection('news_articles')
|
||||
.createIndexes([
|
||||
{ key: { symbols: 1, published_date: -1 } },
|
||||
{ key: { publication: 1 } },
|
||||
{ key: { categories: 1 } },
|
||||
{ key: { created_at: -1 } },
|
||||
]);
|
||||
|
||||
// SEC filings indexes
|
||||
await this.db
|
||||
.collection('sec_filings')
|
||||
.createIndexes([
|
||||
{ key: { symbols: 1, filing_date: -1 } },
|
||||
{ key: { filing_type: 1 } },
|
||||
{ key: { cik: 1 } },
|
||||
{ key: { created_at: -1 } },
|
||||
]); // Raw documents indexes
|
||||
await this.db.collection('raw_documents').createIndex({ content_hash: 1 }, { unique: true });
|
||||
await this.db
|
||||
.collection('raw_documents')
|
||||
.createIndexes([
|
||||
{ key: { processing_status: 1 } },
|
||||
{ key: { document_type: 1 } },
|
||||
{ key: { created_at: -1 } },
|
||||
]);
|
||||
|
||||
this.logger.info('MongoDB indexes created successfully');
|
||||
} catch (error) {
|
||||
this.logger.error('Error creating MongoDB indexes:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get database statistics
|
||||
*/
|
||||
async getStats(): Promise<any> {
|
||||
if (!this.db) {
|
||||
throw new Error('MongoDB client not connected');
|
||||
}
|
||||
return await this.db.stats();
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if client is connected
|
||||
*/
|
||||
|
|
@ -302,13 +251,6 @@ export class MongoDBClient {
|
|||
return this.isConnected && !!this.client;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the underlying MongoDB client
|
||||
*/
|
||||
get mongoClient(): MongoClient | null {
|
||||
return this.client;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the database instance
|
||||
*/
|
||||
|
|
@ -316,81 +258,24 @@ export class MongoDBClient {
|
|||
return this.db;
|
||||
}
|
||||
|
||||
private buildConfig(config?: Partial<MongoDBClientConfig>): MongoDBClientConfig {
|
||||
return {
|
||||
host: config?.host || mongodbConfig.MONGODB_HOST,
|
||||
port: config?.port || mongodbConfig.MONGODB_PORT,
|
||||
database: config?.database || mongodbConfig.MONGODB_DATABASE,
|
||||
username: config?.username || mongodbConfig.MONGODB_USERNAME,
|
||||
password: config?.password || mongodbConfig.MONGODB_PASSWORD,
|
||||
authSource: config?.authSource || mongodbConfig.MONGODB_AUTH_SOURCE,
|
||||
uri: config?.uri || mongodbConfig.MONGODB_URI,
|
||||
poolSettings: {
|
||||
maxPoolSize: mongodbConfig.MONGODB_MAX_POOL_SIZE,
|
||||
minPoolSize: mongodbConfig.MONGODB_MIN_POOL_SIZE,
|
||||
maxIdleTime: mongodbConfig.MONGODB_MAX_IDLE_TIME,
|
||||
...config?.poolSettings,
|
||||
},
|
||||
timeouts: {
|
||||
connectTimeout: mongodbConfig.MONGODB_CONNECT_TIMEOUT,
|
||||
socketTimeout: mongodbConfig.MONGODB_SOCKET_TIMEOUT,
|
||||
serverSelectionTimeout: mongodbConfig.MONGODB_SERVER_SELECTION_TIMEOUT,
|
||||
...config?.timeouts,
|
||||
},
|
||||
tls: {
|
||||
enabled: mongodbConfig.MONGODB_TLS,
|
||||
insecure: mongodbConfig.MONGODB_TLS_INSECURE,
|
||||
caFile: mongodbConfig.MONGODB_TLS_CA_FILE,
|
||||
...config?.tls,
|
||||
},
|
||||
options: {
|
||||
retryWrites: mongodbConfig.MONGODB_RETRY_WRITES,
|
||||
journal: mongodbConfig.MONGODB_JOURNAL,
|
||||
readPreference: mongodbConfig.MONGODB_READ_PREFERENCE as any,
|
||||
writeConcern: mongodbConfig.MONGODB_WRITE_CONCERN,
|
||||
...config?.options,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
private buildConnectionUri(): string {
|
||||
if (this.config.uri) {
|
||||
return this.config.uri;
|
||||
if (mongodbConfig.MONGODB_URI) {
|
||||
return mongodbConfig.MONGODB_URI;
|
||||
}
|
||||
|
||||
const { host, port, username, password, database, authSource } = this.config;
|
||||
const {
|
||||
MONGODB_HOST: host,
|
||||
MONGODB_PORT: port,
|
||||
MONGODB_USERNAME: username,
|
||||
MONGODB_PASSWORD: password,
|
||||
MONGODB_DATABASE: database,
|
||||
MONGODB_AUTH_SOURCE: authSource,
|
||||
} = mongodbConfig;
|
||||
|
||||
// Build URI components
|
||||
const auth = username && password ? `${username}:${password}@` : '';
|
||||
const authDb = authSource ? `?authSource=${authSource}` : '';
|
||||
const authParam = authSource && username ? `?authSource=${authSource}` : '';
|
||||
|
||||
return `mongodb://${auth}${host}:${port}/${database}${authDb}`;
|
||||
}
|
||||
|
||||
private buildClientOptions(): MongoClientOptions {
|
||||
return {
|
||||
maxPoolSize: this.config.poolSettings?.maxPoolSize,
|
||||
minPoolSize: this.config.poolSettings?.minPoolSize,
|
||||
maxIdleTimeMS: this.config.poolSettings?.maxIdleTime,
|
||||
connectTimeoutMS: this.config.timeouts?.connectTimeout,
|
||||
socketTimeoutMS: this.config.timeouts?.socketTimeout,
|
||||
serverSelectionTimeoutMS: this.config.timeouts?.serverSelectionTimeout,
|
||||
retryWrites: this.config.options?.retryWrites,
|
||||
journal: this.config.options?.journal,
|
||||
readPreference: this.config.options?.readPreference,
|
||||
writeConcern: this.config.options?.writeConcern
|
||||
? {
|
||||
w:
|
||||
this.config.options.writeConcern === 'majority'
|
||||
? ('majority' as const)
|
||||
: parseInt(this.config.options.writeConcern, 10) || 1,
|
||||
}
|
||||
: undefined,
|
||||
tls: this.config.tls?.enabled,
|
||||
tlsInsecure: this.config.tls?.insecure,
|
||||
tlsCAFile: this.config.tls?.caFile,
|
||||
};
|
||||
}
|
||||
|
||||
private delay(ms: number): Promise<void> {
|
||||
return new Promise(resolve => setTimeout(resolve, ms));
|
||||
return `mongodb://${auth}${host}:${port}/${database}${authParam}`;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,56 +1,19 @@
|
|||
import { mongodbConfig } from '@stock-bot/config';
|
||||
import { MongoDBClient } from './client';
|
||||
import type { MongoDBClientConfig, MongoDBConnectionOptions } from './types';
|
||||
|
||||
/**
|
||||
* Factory function to create a MongoDB client instance
|
||||
*/
|
||||
export function createMongoDBClient(
|
||||
config?: Partial<MongoDBClientConfig>,
|
||||
options?: MongoDBConnectionOptions
|
||||
): MongoDBClient {
|
||||
return new MongoDBClient(config, options);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a MongoDB client with default configuration
|
||||
*/
|
||||
export function createDefaultMongoDBClient(): MongoDBClient {
|
||||
const config: Partial<MongoDBClientConfig> = {
|
||||
host: mongodbConfig.MONGODB_HOST,
|
||||
port: mongodbConfig.MONGODB_PORT,
|
||||
database: mongodbConfig.MONGODB_DATABASE,
|
||||
username: mongodbConfig.MONGODB_USERNAME,
|
||||
password: mongodbConfig.MONGODB_PASSWORD,
|
||||
uri: mongodbConfig.MONGODB_URI,
|
||||
};
|
||||
|
||||
return new MongoDBClient(config);
|
||||
}
|
||||
|
||||
/**
|
||||
* Singleton MongoDB client instance
|
||||
*/
|
||||
let defaultClient: MongoDBClient | null = null;
|
||||
|
||||
/**
|
||||
* Get or create the default MongoDB client instance
|
||||
* Get the singleton MongoDB client instance
|
||||
*/
|
||||
export function getMongoDBClient(): MongoDBClient {
|
||||
if (!defaultClient) {
|
||||
defaultClient = createDefaultMongoDBClient();
|
||||
}
|
||||
return defaultClient;
|
||||
return MongoDBClient.getInstance();
|
||||
}
|
||||
|
||||
/**
|
||||
* Connect to MongoDB using the default client
|
||||
* Connect to MongoDB using the singleton client
|
||||
*/
|
||||
export async function connectMongoDB(): Promise<MongoDBClient> {
|
||||
const client = getMongoDBClient();
|
||||
if (!client.connected) {
|
||||
await client.connect();
|
||||
await client.createIndexes();
|
||||
}
|
||||
return client;
|
||||
}
|
||||
|
|
@ -59,8 +22,8 @@ export async function connectMongoDB(): Promise<MongoDBClient> {
|
|||
* Disconnect from MongoDB
|
||||
*/
|
||||
export async function disconnectMongoDB(): Promise<void> {
|
||||
if (defaultClient) {
|
||||
await defaultClient.disconnect();
|
||||
defaultClient = null;
|
||||
const client = getMongoDBClient();
|
||||
if (client.connected) {
|
||||
await client.disconnect();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,233 +0,0 @@
|
|||
import { getLogger } from '@stock-bot/logger';
|
||||
import type { MongoDBClient } from './client';
|
||||
import type { MongoDBHealthCheck, MongoDBHealthStatus, MongoDBMetrics } from './types';
|
||||
|
||||
/**
|
||||
* MongoDB Health Monitor
|
||||
*
|
||||
* Monitors MongoDB connection health and provides metrics
|
||||
*/
|
||||
export class MongoDBHealthMonitor {
|
||||
private readonly client: MongoDBClient;
|
||||
private readonly logger: ReturnType<typeof getLogger>;
|
||||
private healthCheckInterval: NodeJS.Timeout | null = null;
|
||||
private metrics: MongoDBMetrics;
|
||||
private lastHealthCheck: MongoDBHealthCheck | null = null;
|
||||
|
||||
constructor(client: MongoDBClient) {
|
||||
this.client = client;
|
||||
this.logger = getLogger('mongodb-health-monitor');
|
||||
this.metrics = {
|
||||
operationsPerSecond: 0,
|
||||
averageLatency: 0,
|
||||
errorRate: 0,
|
||||
connectionPoolUtilization: 0,
|
||||
documentsProcessed: 0,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Start health monitoring
|
||||
*/
|
||||
start(intervalMs: number = 30000): void {
|
||||
if (this.healthCheckInterval) {
|
||||
this.stop();
|
||||
}
|
||||
|
||||
this.logger.info(`Starting MongoDB health monitoring (interval: ${intervalMs}ms)`);
|
||||
|
||||
this.healthCheckInterval = setInterval(async () => {
|
||||
try {
|
||||
await this.performHealthCheck();
|
||||
} catch (error) {
|
||||
this.logger.error('Health check failed:', error);
|
||||
}
|
||||
}, intervalMs);
|
||||
|
||||
// Perform initial health check
|
||||
this.performHealthCheck().catch(error => {
|
||||
this.logger.error('Initial health check failed:', error);
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Stop health monitoring
|
||||
*/
|
||||
stop(): void {
|
||||
if (this.healthCheckInterval) {
|
||||
clearInterval(this.healthCheckInterval);
|
||||
this.healthCheckInterval = null;
|
||||
this.logger.info('Stopped MongoDB health monitoring');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get current health status
|
||||
*/
|
||||
async getHealth(): Promise<MongoDBHealthCheck> {
|
||||
if (!this.lastHealthCheck) {
|
||||
await this.performHealthCheck();
|
||||
}
|
||||
return this.lastHealthCheck!;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get current metrics
|
||||
*/
|
||||
getMetrics(): MongoDBMetrics {
|
||||
return { ...this.metrics };
|
||||
}
|
||||
|
||||
/**
|
||||
* Perform a health check
|
||||
*/
|
||||
private async performHealthCheck(): Promise<void> {
|
||||
const startTime = Date.now();
|
||||
const errors: string[] = [];
|
||||
let status: MongoDBHealthStatus = 'healthy';
|
||||
|
||||
try {
|
||||
if (!this.client.connected) {
|
||||
errors.push('MongoDB client not connected');
|
||||
status = 'unhealthy';
|
||||
} else {
|
||||
// Test basic connectivity
|
||||
const mongoClient = this.client.mongoClient;
|
||||
const db = this.client.database;
|
||||
|
||||
if (!mongoClient || !db) {
|
||||
errors.push('MongoDB client or database not available');
|
||||
status = 'unhealthy';
|
||||
} else {
|
||||
// Ping the database
|
||||
await db.admin().ping();
|
||||
|
||||
// Get server status for metrics
|
||||
try {
|
||||
const serverStatus = await db.admin().serverStatus();
|
||||
this.updateMetricsFromServerStatus(serverStatus);
|
||||
|
||||
// Check connection pool status
|
||||
const poolStats = this.getConnectionPoolStats(serverStatus);
|
||||
|
||||
if (poolStats.utilization > 0.9) {
|
||||
errors.push('High connection pool utilization');
|
||||
status = status === 'healthy' ? 'degraded' : status;
|
||||
}
|
||||
|
||||
// Check for high latency
|
||||
const latency = Date.now() - startTime;
|
||||
if (latency > 1000) {
|
||||
errors.push(`High latency: ${latency}ms`);
|
||||
status = status === 'healthy' ? 'degraded' : status;
|
||||
}
|
||||
} catch (statusError) {
|
||||
errors.push(`Failed to get server status: ${(statusError as Error).message}`);
|
||||
status = 'degraded';
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
errors.push(`Health check failed: ${(error as Error).message}`);
|
||||
status = 'unhealthy';
|
||||
}
|
||||
|
||||
const latency = Date.now() - startTime;
|
||||
|
||||
// Get connection stats
|
||||
const connectionStats = this.getConnectionStats();
|
||||
|
||||
this.lastHealthCheck = {
|
||||
status,
|
||||
timestamp: new Date(),
|
||||
latency,
|
||||
connections: connectionStats,
|
||||
errors: errors.length > 0 ? errors : undefined,
|
||||
};
|
||||
|
||||
// Log health status changes
|
||||
if (status !== 'healthy') {
|
||||
this.logger.warn(`MongoDB health status: ${status}`, { errors, latency });
|
||||
} else {
|
||||
this.logger.debug(`MongoDB health check passed (${latency}ms)`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Update metrics from MongoDB server status
|
||||
*/
|
||||
private updateMetricsFromServerStatus(serverStatus: any): void {
|
||||
try {
|
||||
const opcounters = serverStatus.opcounters || {};
|
||||
const connections = serverStatus.connections || {};
|
||||
const dur = serverStatus.dur || {};
|
||||
|
||||
// Calculate operations per second (approximate)
|
||||
const totalOps = Object.values(opcounters).reduce(
|
||||
(sum: number, count: any) => sum + (count || 0),
|
||||
0
|
||||
);
|
||||
this.metrics.operationsPerSecond = totalOps;
|
||||
|
||||
// Connection pool utilization
|
||||
if (connections.current && connections.available) {
|
||||
const total = connections.current + connections.available;
|
||||
this.metrics.connectionPoolUtilization = connections.current / total;
|
||||
}
|
||||
|
||||
// Average latency (from durability stats if available)
|
||||
if (dur.timeMS) {
|
||||
this.metrics.averageLatency = dur.timeMS.dt || 0;
|
||||
}
|
||||
} catch (error) {
|
||||
this.logger.debug('Error parsing server status for metrics:', error as any);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get connection pool statistics
|
||||
*/
|
||||
private getConnectionPoolStats(serverStatus: any): {
|
||||
utilization: number;
|
||||
active: number;
|
||||
available: number;
|
||||
} {
|
||||
const connections = serverStatus.connections || {};
|
||||
const active = connections.current || 0;
|
||||
const available = connections.available || 0;
|
||||
const total = active + available;
|
||||
|
||||
return {
|
||||
utilization: total > 0 ? active / total : 0,
|
||||
active,
|
||||
available,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Get connection statistics
|
||||
*/
|
||||
private getConnectionStats(): { active: number; available: number; total: number } {
|
||||
// This would ideally come from the MongoDB driver's connection pool
|
||||
// For now, we'll return estimated values
|
||||
return {
|
||||
active: 1,
|
||||
available: 9,
|
||||
total: 10,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Update error rate metric
|
||||
*/
|
||||
updateErrorRate(errorCount: number, totalOperations: number): void {
|
||||
this.metrics.errorRate = totalOperations > 0 ? errorCount / totalOperations : 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Update documents processed metric
|
||||
*/
|
||||
updateDocumentsProcessed(count: number): void {
|
||||
this.metrics.documentsProcessed += count;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,40 +1,22 @@
|
|||
/**
|
||||
* MongoDB Client Library for Stock Bot
|
||||
* Simplified MongoDB Client Library for Stock Bot Data Service
|
||||
*
|
||||
* Provides type-safe MongoDB access for document storage, sentiment data,
|
||||
* and raw content processing.
|
||||
* Provides a singleton MongoDB client focused on batch upsert operations
|
||||
* for high-performance data ingestion.
|
||||
*/
|
||||
|
||||
export { MongoDBClient } from './client';
|
||||
export { MongoDBHealthMonitor } from './health';
|
||||
export { MongoDBTransactionManager } from './transactions';
|
||||
export { MongoDBAggregationBuilder } from './aggregation';
|
||||
|
||||
// Types
|
||||
export type {
|
||||
MongoDBClientConfig,
|
||||
MongoDBConnectionOptions,
|
||||
MongoDBHealthStatus,
|
||||
MongoDBMetrics,
|
||||
CollectionNames,
|
||||
DocumentBase,
|
||||
SentimentData,
|
||||
RawDocument,
|
||||
NewsArticle,
|
||||
SecFiling,
|
||||
EarningsTranscript,
|
||||
AnalystReport,
|
||||
DocumentBase,
|
||||
EarningsTranscript,
|
||||
NewsArticle,
|
||||
RawDocument,
|
||||
SecFiling,
|
||||
SentimentData,
|
||||
} from './types';
|
||||
|
||||
// Schemas
|
||||
export {
|
||||
sentimentDataSchema,
|
||||
rawDocumentSchema,
|
||||
newsArticleSchema,
|
||||
secFilingSchema,
|
||||
earningsTranscriptSchema,
|
||||
analystReportSchema,
|
||||
} from './schemas';
|
||||
|
||||
// Utils
|
||||
export { createMongoDBClient } from './factory';
|
||||
// Factory functions
|
||||
export { connectMongoDB, disconnectMongoDB, getMongoDBClient } from './factory';
|
||||
|
|
|
|||
|
|
@ -1,146 +0,0 @@
|
|||
import * as yup from 'yup';
|
||||
|
||||
/**
|
||||
* Yup Schemas for MongoDB Document Validation
|
||||
*/
|
||||
|
||||
// Base schema for all documents
|
||||
export const documentBaseSchema = yup.object({
|
||||
_id: yup.mixed().optional(),
|
||||
created_at: yup.date().required(),
|
||||
updated_at: yup.date().required(),
|
||||
source: yup.string().required(),
|
||||
metadata: yup.object().optional(),
|
||||
});
|
||||
|
||||
// Sentiment Data Schema
|
||||
export const sentimentDataSchema = documentBaseSchema.shape({
|
||||
symbol: yup.string().min(1).max(10).required(),
|
||||
sentiment_score: yup.number().min(-1).max(1).required(),
|
||||
sentiment_label: yup.string().oneOf(['positive', 'negative', 'neutral']).required(),
|
||||
confidence: yup.number().min(0).max(1).required(),
|
||||
text: yup.string().min(1).required(),
|
||||
source_type: yup.string().oneOf(['reddit', 'twitter', 'news', 'forums']).required(),
|
||||
source_id: yup.string().required(),
|
||||
timestamp: yup.date().required(),
|
||||
processed_at: yup.date().required(),
|
||||
language: yup.string().default('en'),
|
||||
keywords: yup.array(yup.string()).required(),
|
||||
entities: yup
|
||||
.array(
|
||||
yup.object({
|
||||
name: yup.string().required(),
|
||||
type: yup.string().required(),
|
||||
confidence: yup.number().min(0).max(1).required(),
|
||||
})
|
||||
)
|
||||
.required(),
|
||||
});
|
||||
|
||||
// Raw Document Schema
|
||||
export const rawDocumentSchema = documentBaseSchema.shape({
|
||||
document_type: yup.string().oneOf(['html', 'pdf', 'text', 'json', 'xml']).required(),
|
||||
content: yup.string().required(),
|
||||
content_hash: yup.string().required(),
|
||||
url: yup.string().url().optional(),
|
||||
title: yup.string().optional(),
|
||||
author: yup.string().optional(),
|
||||
published_date: yup.date().optional(),
|
||||
extracted_text: yup.string().optional(),
|
||||
processing_status: yup.string().oneOf(['pending', 'processed', 'failed']).required(),
|
||||
size_bytes: yup.number().positive().required(),
|
||||
language: yup.string().optional(),
|
||||
});
|
||||
|
||||
// News Article Schema
|
||||
export const newsArticleSchema = documentBaseSchema.shape({
|
||||
headline: yup.string().min(1).required(),
|
||||
content: yup.string().min(1).required(),
|
||||
summary: yup.string().optional(),
|
||||
author: yup.string().required(),
|
||||
publication: yup.string().required(),
|
||||
published_date: yup.date().required(),
|
||||
url: yup.string().url().required(),
|
||||
symbols: yup.array(yup.string()).required(),
|
||||
categories: yup.array(yup.string()).required(),
|
||||
sentiment_score: yup.number().min(-1).max(1).optional(),
|
||||
relevance_score: yup.number().min(0).max(1).optional(),
|
||||
image_url: yup.string().url().optional(),
|
||||
tags: yup.array(yup.string()).required(),
|
||||
});
|
||||
|
||||
// SEC Filing Schema
|
||||
export const secFilingSchema = documentBaseSchema.shape({
|
||||
cik: yup.string().required(),
|
||||
accession_number: yup.string().required(),
|
||||
filing_type: yup.string().required(),
|
||||
company_name: yup.string().required(),
|
||||
symbols: yup.array(yup.string()).required(),
|
||||
filing_date: yup.date().required(),
|
||||
period_end_date: yup.date().required(),
|
||||
url: yup.string().url().required(),
|
||||
content: yup.string().required(),
|
||||
extracted_data: yup.object().optional(),
|
||||
financial_statements: yup
|
||||
.array(
|
||||
yup.object({
|
||||
statement_type: yup.string().required(),
|
||||
data: yup.object().required(),
|
||||
})
|
||||
)
|
||||
.optional(),
|
||||
processing_status: yup.string().oneOf(['pending', 'processed', 'failed']).required(),
|
||||
});
|
||||
|
||||
// Earnings Transcript Schema
|
||||
export const earningsTranscriptSchema = documentBaseSchema.shape({
|
||||
symbol: yup.string().min(1).max(10).required(),
|
||||
company_name: yup.string().required(),
|
||||
quarter: yup.string().required(),
|
||||
year: yup.number().min(2000).max(3000).required(),
|
||||
call_date: yup.date().required(),
|
||||
transcript: yup.string().required(),
|
||||
participants: yup
|
||||
.array(
|
||||
yup.object({
|
||||
name: yup.string().required(),
|
||||
title: yup.string().required(),
|
||||
type: yup.string().oneOf(['executive', 'analyst']).required(),
|
||||
})
|
||||
)
|
||||
.required(),
|
||||
key_topics: yup.array(yup.string()).required(),
|
||||
sentiment_analysis: yup
|
||||
.object({
|
||||
overall_sentiment: yup.number().min(-1).max(1).required(),
|
||||
topic_sentiments: yup.object().required(),
|
||||
})
|
||||
.optional(),
|
||||
financial_highlights: yup.object().optional(),
|
||||
});
|
||||
|
||||
// Analyst Report Schema
|
||||
export const analystReportSchema = documentBaseSchema.shape({
|
||||
symbol: yup.string().min(1).max(10).required(),
|
||||
analyst_firm: yup.string().required(),
|
||||
analyst_name: yup.string().required(),
|
||||
report_title: yup.string().required(),
|
||||
report_date: yup.date().required(),
|
||||
rating: yup.string().oneOf(['buy', 'hold', 'sell', 'strong_buy', 'strong_sell']).required(),
|
||||
price_target: yup.number().positive().optional(),
|
||||
previous_rating: yup.string().optional(),
|
||||
content: yup.string().required(),
|
||||
summary: yup.string().required(),
|
||||
key_points: yup.array(yup.string()).required(),
|
||||
financial_projections: yup.object().optional(),
|
||||
});
|
||||
|
||||
// Schema mapping for collections
|
||||
export const schemaMap = {
|
||||
sentiment_data: sentimentDataSchema,
|
||||
raw_documents: rawDocumentSchema,
|
||||
news_articles: newsArticleSchema,
|
||||
sec_filings: secFilingSchema,
|
||||
earnings_transcripts: earningsTranscriptSchema,
|
||||
analyst_reports: analystReportSchema,
|
||||
} as const;
|
||||
|
|
@ -1,238 +0,0 @@
|
|||
import type { OptionalUnlessRequiredId, WithId } from 'mongodb';
|
||||
import { getLogger } from '@stock-bot/logger';
|
||||
import type { MongoDBClient } from './client';
|
||||
import type { CollectionNames, DocumentBase } from './types';
|
||||
|
||||
/**
|
||||
* MongoDB Transaction Manager
|
||||
*
|
||||
* Provides transaction support for multi-document operations
|
||||
*/
|
||||
export class MongoDBTransactionManager {
|
||||
private readonly client: MongoDBClient;
|
||||
private readonly logger: ReturnType<typeof getLogger>;
|
||||
|
||||
constructor(client: MongoDBClient) {
|
||||
this.client = client;
|
||||
this.logger = getLogger('mongodb-transaction-manager');
|
||||
}
|
||||
|
||||
/**
|
||||
* Execute operations within a transaction
|
||||
*/
|
||||
async withTransaction<T>(
|
||||
operations: (session: any) => Promise<T>,
|
||||
options?: {
|
||||
readPreference?: string;
|
||||
readConcern?: string;
|
||||
writeConcern?: any;
|
||||
maxCommitTimeMS?: number;
|
||||
}
|
||||
): Promise<T> {
|
||||
const mongoClient = this.client.mongoClient;
|
||||
if (!mongoClient) {
|
||||
throw new Error('MongoDB client not connected');
|
||||
}
|
||||
|
||||
const session = mongoClient.startSession();
|
||||
|
||||
try {
|
||||
this.logger.debug('Starting MongoDB transaction');
|
||||
|
||||
const result = await session.withTransaction(
|
||||
async () => {
|
||||
return await operations(session);
|
||||
},
|
||||
{
|
||||
readPreference: options?.readPreference as any,
|
||||
readConcern: { level: options?.readConcern || 'majority' } as any,
|
||||
writeConcern: options?.writeConcern || { w: 'majority' },
|
||||
maxCommitTimeMS: options?.maxCommitTimeMS || 10000,
|
||||
}
|
||||
);
|
||||
|
||||
this.logger.debug('MongoDB transaction completed successfully');
|
||||
return result;
|
||||
} catch (error) {
|
||||
this.logger.error('MongoDB transaction failed:', error);
|
||||
throw error;
|
||||
} finally {
|
||||
await session.endSession();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Batch insert documents across collections within a transaction
|
||||
*/
|
||||
async batchInsert(
|
||||
operations: Array<{
|
||||
collection: CollectionNames;
|
||||
documents: DocumentBase[];
|
||||
}>,
|
||||
options?: { ordered?: boolean; bypassDocumentValidation?: boolean }
|
||||
): Promise<void> {
|
||||
await this.withTransaction(async session => {
|
||||
for (const operation of operations) {
|
||||
const collection = this.client.getCollection(operation.collection);
|
||||
|
||||
// Add timestamps to all documents
|
||||
const now = new Date();
|
||||
const documentsWithTimestamps = operation.documents.map(doc => ({
|
||||
...doc,
|
||||
created_at: doc.created_at || now,
|
||||
updated_at: now,
|
||||
}));
|
||||
|
||||
await collection.insertMany(documentsWithTimestamps, {
|
||||
session,
|
||||
ordered: options?.ordered ?? true,
|
||||
bypassDocumentValidation: options?.bypassDocumentValidation ?? false,
|
||||
});
|
||||
|
||||
this.logger.debug(
|
||||
`Inserted ${documentsWithTimestamps.length} documents into ${operation.collection}`
|
||||
);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Batch update documents across collections within a transaction
|
||||
*/
|
||||
async batchUpdate(
|
||||
operations: Array<{
|
||||
collection: CollectionNames;
|
||||
filter: any;
|
||||
update: any;
|
||||
options?: any;
|
||||
}>
|
||||
): Promise<void> {
|
||||
await this.withTransaction(async session => {
|
||||
const results = [];
|
||||
|
||||
for (const operation of operations) {
|
||||
const collection = this.client.getCollection(operation.collection);
|
||||
|
||||
// Add updated timestamp
|
||||
const updateWithTimestamp = {
|
||||
...operation.update,
|
||||
$set: {
|
||||
...operation.update.$set,
|
||||
updated_at: new Date(),
|
||||
},
|
||||
};
|
||||
|
||||
const result = await collection.updateMany(operation.filter, updateWithTimestamp, {
|
||||
session,
|
||||
...operation.options,
|
||||
});
|
||||
|
||||
results.push(result);
|
||||
this.logger.debug(`Updated ${result.modifiedCount} documents in ${operation.collection}`);
|
||||
}
|
||||
|
||||
return results;
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Move documents between collections within a transaction
|
||||
*/
|
||||
async moveDocuments<T extends DocumentBase>(
|
||||
fromCollection: CollectionNames,
|
||||
toCollection: CollectionNames,
|
||||
filter: any,
|
||||
transform?: (doc: T) => T
|
||||
): Promise<number> {
|
||||
return await this.withTransaction(async session => {
|
||||
const sourceCollection = this.client.getCollection<T>(fromCollection);
|
||||
const targetCollection = this.client.getCollection<T>(toCollection);
|
||||
|
||||
// Find documents to move
|
||||
const documents = await sourceCollection.find(filter, { session }).toArray();
|
||||
|
||||
if (documents.length === 0) {
|
||||
return 0;
|
||||
} // Transform documents if needed
|
||||
const documentsToInsert = transform
|
||||
? documents.map((doc: WithId<T>) => transform(doc as T))
|
||||
: documents;
|
||||
|
||||
// Add updated timestamp
|
||||
const now = new Date();
|
||||
documentsToInsert.forEach(doc => {
|
||||
doc.updated_at = now;
|
||||
}); // Insert into target collection
|
||||
await targetCollection.insertMany(documentsToInsert as OptionalUnlessRequiredId<T>[], {
|
||||
session,
|
||||
});
|
||||
|
||||
// Remove from source collection
|
||||
const deleteResult = await sourceCollection.deleteMany(filter, { session });
|
||||
|
||||
this.logger.info(
|
||||
`Moved ${documents.length} documents from ${fromCollection} to ${toCollection}`
|
||||
);
|
||||
|
||||
return deleteResult.deletedCount || 0;
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Archive old documents within a transaction
|
||||
*/
|
||||
async archiveDocuments(
|
||||
sourceCollection: CollectionNames,
|
||||
archiveCollection: CollectionNames,
|
||||
cutoffDate: Date,
|
||||
batchSize: number = 1000
|
||||
): Promise<number> {
|
||||
let totalArchived = 0;
|
||||
|
||||
while (true) {
|
||||
const batchArchived = await this.withTransaction(async session => {
|
||||
const collection = this.client.getCollection(sourceCollection);
|
||||
const archiveCol = this.client.getCollection(archiveCollection);
|
||||
|
||||
// Find old documents
|
||||
const documents = await collection
|
||||
.find({ created_at: { $lt: cutoffDate } }, { limit: batchSize, session })
|
||||
.toArray();
|
||||
|
||||
if (documents.length === 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Add archive metadata
|
||||
const now = new Date();
|
||||
const documentsToArchive = documents.map(doc => ({
|
||||
...doc,
|
||||
archived_at: now,
|
||||
archived_from: sourceCollection,
|
||||
}));
|
||||
|
||||
// Insert into archive collection
|
||||
await archiveCol.insertMany(documentsToArchive, { session });
|
||||
|
||||
// Remove from source collection
|
||||
const ids = documents.map(doc => doc._id);
|
||||
const deleteResult = await collection.deleteMany({ _id: { $in: ids } }, { session });
|
||||
|
||||
return deleteResult.deletedCount || 0;
|
||||
});
|
||||
|
||||
totalArchived += batchArchived;
|
||||
|
||||
if (batchArchived === 0) {
|
||||
break;
|
||||
}
|
||||
|
||||
this.logger.debug(`Archived batch of ${batchArchived} documents`);
|
||||
}
|
||||
|
||||
this.logger.info(
|
||||
`Archived ${totalArchived} documents from ${sourceCollection} to ${archiveCollection}`
|
||||
);
|
||||
return totalArchived;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,5 +1,4 @@
|
|||
import type { ObjectId } from 'mongodb';
|
||||
import * as yup from 'yup';
|
||||
|
||||
/**
|
||||
* MongoDB Client Configuration
|
||||
|
|
@ -69,20 +68,6 @@ export interface MongoDBMetrics {
|
|||
documentsProcessed: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Collection Names
|
||||
*/
|
||||
export type CollectionNames =
|
||||
| 'sentiment_data'
|
||||
| 'raw_documents'
|
||||
| 'news_articles'
|
||||
| 'sec_filings'
|
||||
| 'earnings_transcripts'
|
||||
| 'analyst_reports'
|
||||
| 'social_media_posts'
|
||||
| 'market_events'
|
||||
| 'economic_indicators';
|
||||
|
||||
/**
|
||||
* Base Document Interface
|
||||
*/
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue