running prettier for cleanup

This commit is contained in:
Boki 2025-06-11 10:13:25 -04:00
parent fe7733aeb5
commit d85cd58acd
151 changed files with 29158 additions and 27966 deletions

View file

@ -1,247 +1,247 @@
import type { Document } from 'mongodb';
import type { MongoDBClient } from './client';
import type { CollectionNames } from './types';
/**
* MongoDB Aggregation Builder
*
* Provides a fluent interface for building MongoDB aggregation pipelines
*/
export class MongoDBAggregationBuilder {
private pipeline: any[] = [];
private readonly client: MongoDBClient;
private collection: CollectionNames | null = null;
constructor(client: MongoDBClient) {
this.client = client;
}
/**
* Set the collection to aggregate on
*/
from(collection: CollectionNames): this {
this.collection = collection;
return this;
}
/**
* Add a match stage
*/
match(filter: any): this {
this.pipeline.push({ $match: filter });
return this;
}
/**
* Add a group stage
*/
group(groupBy: any): this {
this.pipeline.push({ $group: groupBy });
return this;
}
/**
* Add a sort stage
*/
sort(sortBy: any): this {
this.pipeline.push({ $sort: sortBy });
return this;
}
/**
* Add a limit stage
*/
limit(count: number): this {
this.pipeline.push({ $limit: count });
return this;
}
/**
* Add a skip stage
*/
skip(count: number): this {
this.pipeline.push({ $skip: count });
return this;
}
/**
* Add a project stage
*/
project(projection: any): this {
this.pipeline.push({ $project: projection });
return this;
}
/**
* Add an unwind stage
*/
unwind(field: string, options?: any): this {
this.pipeline.push({
$unwind: options ? { path: field, ...options } : field
});
return this;
}
/**
* Add a lookup stage (join)
*/
lookup(from: string, localField: string, foreignField: string, as: string): this {
this.pipeline.push({
$lookup: {
from,
localField,
foreignField,
as
}
});
return this;
}
/**
* Add a custom stage
*/
addStage(stage: any): this {
this.pipeline.push(stage);
return this;
}
/**
* Execute the aggregation pipeline
*/
async execute<T extends Document = Document>(): Promise<T[]> {
if (!this.collection) {
throw new Error('Collection not specified. Use .from() to set the collection.');
}
const collection = this.client.getCollection(this.collection);
return await collection.aggregate<T>(this.pipeline).toArray();
}
/**
* Get the pipeline array
*/
getPipeline(): any[] {
return [...this.pipeline];
}
/**
* Reset the pipeline
*/
reset(): this {
this.pipeline = [];
this.collection = null;
return this;
}
// Convenience methods for common aggregations
/**
* Sentiment analysis aggregation
*/
sentimentAnalysis(symbol?: string, timeframe?: { start: Date; end: Date }): this {
this.from('sentiment_data');
const matchConditions: any = {};
if (symbol) matchConditions.symbol = symbol;
if (timeframe) {
matchConditions.timestamp = {
$gte: timeframe.start,
$lte: timeframe.end
};
}
if (Object.keys(matchConditions).length > 0) {
this.match(matchConditions);
}
return this.group({
_id: {
symbol: '$symbol',
sentiment: '$sentiment_label'
},
count: { $sum: 1 },
avgScore: { $avg: '$sentiment_score' },
avgConfidence: { $avg: '$confidence' }
});
}
/**
* News article aggregation by publication
*/
newsByPublication(symbols?: string[]): this {
this.from('news_articles');
if (symbols && symbols.length > 0) {
this.match({ symbols: { $in: symbols } });
}
return this.group({
_id: '$publication',
articleCount: { $sum: 1 },
symbols: { $addToSet: '$symbols' },
avgSentiment: { $avg: '$sentiment_score' },
latestArticle: { $max: '$published_date' }
});
}
/**
* SEC filings by company
*/
secFilingsByCompany(filingTypes?: string[]): this {
this.from('sec_filings');
if (filingTypes && filingTypes.length > 0) {
this.match({ filing_type: { $in: filingTypes } });
}
return this.group({
_id: {
cik: '$cik',
company: '$company_name'
},
filingCount: { $sum: 1 },
filingTypes: { $addToSet: '$filing_type' },
latestFiling: { $max: '$filing_date' },
symbols: { $addToSet: '$symbols' }
});
}
/**
* Document processing status summary
*/
processingStatusSummary(collection: CollectionNames): this {
this.from(collection);
return this.group({
_id: '$processing_status',
count: { $sum: 1 },
avgSizeBytes: { $avg: '$size_bytes' },
oldestDocument: { $min: '$created_at' },
newestDocument: { $max: '$created_at' }
});
}
/**
* Time-based aggregation (daily/hourly counts)
*/
timeBasedCounts(
collection: CollectionNames,
dateField: string = 'created_at',
interval: 'hour' | 'day' | 'week' | 'month' = 'day'
): this {
this.from(collection);
const dateFormat = {
hour: { $dateToString: { format: '%Y-%m-%d %H:00:00', date: `$${dateField}` } },
day: { $dateToString: { format: '%Y-%m-%d', date: `$${dateField}` } },
week: { $dateToString: { format: '%Y-W%V', date: `$${dateField}` } },
month: { $dateToString: { format: '%Y-%m', date: `$${dateField}` } }
};
return this.group({
_id: dateFormat[interval],
count: { $sum: 1 },
firstDocument: { $min: `$${dateField}` },
lastDocument: { $max: `$${dateField}` }
}).sort({ _id: 1 });
}
}
import type { Document } from 'mongodb';
import type { MongoDBClient } from './client';
import type { CollectionNames } from './types';
/**
* MongoDB Aggregation Builder
*
* Provides a fluent interface for building MongoDB aggregation pipelines
*/
export class MongoDBAggregationBuilder {
private pipeline: any[] = [];
private readonly client: MongoDBClient;
private collection: CollectionNames | null = null;
constructor(client: MongoDBClient) {
this.client = client;
}
/**
* Set the collection to aggregate on
*/
from(collection: CollectionNames): this {
this.collection = collection;
return this;
}
/**
* Add a match stage
*/
match(filter: any): this {
this.pipeline.push({ $match: filter });
return this;
}
/**
* Add a group stage
*/
group(groupBy: any): this {
this.pipeline.push({ $group: groupBy });
return this;
}
/**
* Add a sort stage
*/
sort(sortBy: any): this {
this.pipeline.push({ $sort: sortBy });
return this;
}
/**
* Add a limit stage
*/
limit(count: number): this {
this.pipeline.push({ $limit: count });
return this;
}
/**
* Add a skip stage
*/
skip(count: number): this {
this.pipeline.push({ $skip: count });
return this;
}
/**
* Add a project stage
*/
project(projection: any): this {
this.pipeline.push({ $project: projection });
return this;
}
/**
* Add an unwind stage
*/
unwind(field: string, options?: any): this {
this.pipeline.push({
$unwind: options ? { path: field, ...options } : field,
});
return this;
}
/**
* Add a lookup stage (join)
*/
lookup(from: string, localField: string, foreignField: string, as: string): this {
this.pipeline.push({
$lookup: {
from,
localField,
foreignField,
as,
},
});
return this;
}
/**
* Add a custom stage
*/
addStage(stage: any): this {
this.pipeline.push(stage);
return this;
}
/**
* Execute the aggregation pipeline
*/
async execute<T extends Document = Document>(): Promise<T[]> {
if (!this.collection) {
throw new Error('Collection not specified. Use .from() to set the collection.');
}
const collection = this.client.getCollection(this.collection);
return await collection.aggregate<T>(this.pipeline).toArray();
}
/**
* Get the pipeline array
*/
getPipeline(): any[] {
return [...this.pipeline];
}
/**
* Reset the pipeline
*/
reset(): this {
this.pipeline = [];
this.collection = null;
return this;
}
// Convenience methods for common aggregations
/**
* Sentiment analysis aggregation
*/
sentimentAnalysis(symbol?: string, timeframe?: { start: Date; end: Date }): this {
this.from('sentiment_data');
const matchConditions: any = {};
if (symbol) matchConditions.symbol = symbol;
if (timeframe) {
matchConditions.timestamp = {
$gte: timeframe.start,
$lte: timeframe.end,
};
}
if (Object.keys(matchConditions).length > 0) {
this.match(matchConditions);
}
return this.group({
_id: {
symbol: '$symbol',
sentiment: '$sentiment_label',
},
count: { $sum: 1 },
avgScore: { $avg: '$sentiment_score' },
avgConfidence: { $avg: '$confidence' },
});
}
/**
* News article aggregation by publication
*/
newsByPublication(symbols?: string[]): this {
this.from('news_articles');
if (symbols && symbols.length > 0) {
this.match({ symbols: { $in: symbols } });
}
return this.group({
_id: '$publication',
articleCount: { $sum: 1 },
symbols: { $addToSet: '$symbols' },
avgSentiment: { $avg: '$sentiment_score' },
latestArticle: { $max: '$published_date' },
});
}
/**
* SEC filings by company
*/
secFilingsByCompany(filingTypes?: string[]): this {
this.from('sec_filings');
if (filingTypes && filingTypes.length > 0) {
this.match({ filing_type: { $in: filingTypes } });
}
return this.group({
_id: {
cik: '$cik',
company: '$company_name',
},
filingCount: { $sum: 1 },
filingTypes: { $addToSet: '$filing_type' },
latestFiling: { $max: '$filing_date' },
symbols: { $addToSet: '$symbols' },
});
}
/**
* Document processing status summary
*/
processingStatusSummary(collection: CollectionNames): this {
this.from(collection);
return this.group({
_id: '$processing_status',
count: { $sum: 1 },
avgSizeBytes: { $avg: '$size_bytes' },
oldestDocument: { $min: '$created_at' },
newestDocument: { $max: '$created_at' },
});
}
/**
* Time-based aggregation (daily/hourly counts)
*/
timeBasedCounts(
collection: CollectionNames,
dateField: string = 'created_at',
interval: 'hour' | 'day' | 'week' | 'month' = 'day'
): this {
this.from(collection);
const dateFormat = {
hour: { $dateToString: { format: '%Y-%m-%d %H:00:00', date: `$${dateField}` } },
day: { $dateToString: { format: '%Y-%m-%d', date: `$${dateField}` } },
week: { $dateToString: { format: '%Y-W%V', date: `$${dateField}` } },
month: { $dateToString: { format: '%Y-%m', date: `$${dateField}` } },
};
return this.group({
_id: dateFormat[interval],
count: { $sum: 1 },
firstDocument: { $min: `$${dateField}` },
lastDocument: { $max: `$${dateField}` },
}).sort({ _id: 1 });
}
}

View file

@ -1,379 +1,396 @@
import { MongoClient, Db, Collection, MongoClientOptions, Document, WithId, OptionalUnlessRequiredId } from 'mongodb';
import { mongodbConfig } from '@stock-bot/config';
import { getLogger } from '@stock-bot/logger';
import type {
MongoDBClientConfig,
MongoDBConnectionOptions,
CollectionNames,
DocumentBase,
SentimentData,
RawDocument,
NewsArticle,
SecFiling,
EarningsTranscript,
AnalystReport
} from './types';
import { MongoDBHealthMonitor } from './health';
import { schemaMap } from './schemas';
import * as yup from 'yup';
/**
* MongoDB Client for Stock Bot
*
* Provides type-safe access to MongoDB collections with built-in
* health monitoring, connection pooling, and schema validation.
*/
export class MongoDBClient {
private client: MongoClient | null = null;
private db: Db | null = null;
private readonly config: MongoDBClientConfig;
private readonly options: MongoDBConnectionOptions;
private readonly logger: ReturnType<typeof getLogger>;
private readonly healthMonitor: MongoDBHealthMonitor;
private isConnected = false;
constructor(
config?: Partial<MongoDBClientConfig>,
options?: MongoDBConnectionOptions
) {
this.config = this.buildConfig(config);
this.options = {
retryAttempts: 3,
retryDelay: 1000,
healthCheckInterval: 30000,
...options
};
this.logger = getLogger('mongodb-client');
this.healthMonitor = new MongoDBHealthMonitor(this);
}
/**
* Connect to MongoDB
*/
async connect(): Promise<void> {
if (this.isConnected && this.client) {
return;
}
const uri = this.buildConnectionUri();
const clientOptions = this.buildClientOptions();
let lastError: Error | null = null;
for (let attempt = 1; attempt <= this.options.retryAttempts!; attempt++) {
try {
this.logger.info(`Connecting to MongoDB (attempt ${attempt}/${this.options.retryAttempts})...`);
this.client = new MongoClient(uri, clientOptions);
await this.client.connect();
// Test the connection
await this.client.db(this.config.database).admin().ping();
this.db = this.client.db(this.config.database);
this.isConnected = true;
this.logger.info('Successfully connected to MongoDB');
// Start health monitoring
this.healthMonitor.start();
return;
} catch (error) {
lastError = error as Error;
this.logger.error(`MongoDB connection attempt ${attempt} failed:`, error);
if (this.client) {
await this.client.close();
this.client = null;
}
if (attempt < this.options.retryAttempts!) {
await this.delay(this.options.retryDelay! * attempt);
}
}
}
throw new Error(`Failed to connect to MongoDB after ${this.options.retryAttempts} attempts: ${lastError?.message}`);
}
/**
* Disconnect from MongoDB
*/
async disconnect(): Promise<void> {
if (!this.client) {
return;
}
try {
this.healthMonitor.stop();
await this.client.close();
this.isConnected = false;
this.client = null;
this.db = null;
this.logger.info('Disconnected from MongoDB');
} catch (error) {
this.logger.error('Error disconnecting from MongoDB:', error);
throw error;
}
}
/**
* Get a typed collection
*/
getCollection<T extends DocumentBase>(name: CollectionNames): Collection<T> {
if (!this.db) {
throw new Error('MongoDB client not connected');
}
return this.db.collection<T>(name);
}
/**
* Insert a document with validation
*/
async insertOne<T extends DocumentBase>(
collectionName: CollectionNames,
document: Omit<T, '_id' | 'created_at' | 'updated_at'> & Partial<Pick<T, 'created_at' | 'updated_at'>>
): Promise<T> {
const collection = this.getCollection<T>(collectionName);
// Add timestamps
const now = new Date();
const docWithTimestamps = {
...document,
created_at: document.created_at || now,
updated_at: now
} as T; // Validate document if schema exists
if (collectionName in schemaMap) {
try {
(schemaMap as any)[collectionName].validateSync(docWithTimestamps);
} catch (error) {
if (error instanceof yup.ValidationError) {
this.logger.error(`Document validation failed for ${collectionName}:`, error.errors);
throw new Error(`Document validation failed: ${error.errors?.map(e => e).join(', ')}`);
}
throw error;
}
}const result = await collection.insertOne(docWithTimestamps as OptionalUnlessRequiredId<T>);
return { ...docWithTimestamps, _id: result.insertedId } as T;
}
/**
* Update a document with validation
*/
async updateOne<T extends DocumentBase>(
collectionName: CollectionNames,
filter: any,
update: Partial<T>
): Promise<boolean> {
const collection = this.getCollection<T>(collectionName);
// Add updated timestamp
const updateWithTimestamp = {
...update,
updated_at: new Date()
};
const result = await collection.updateOne(filter, { $set: updateWithTimestamp });
return result.modifiedCount > 0;
}
/**
* Find documents with optional validation
*/
async find<T extends DocumentBase>(
collectionName: CollectionNames,
filter: any = {},
options: any = {}
): Promise<T[]> {
const collection = this.getCollection<T>(collectionName);
return await collection.find(filter, options).toArray() as T[];
}
/**
* Find one document
*/
async findOne<T extends DocumentBase>(
collectionName: CollectionNames,
filter: any
): Promise<T | null> {
const collection = this.getCollection<T>(collectionName);
return await collection.findOne(filter) as T | null;
}
/**
* Aggregate with type safety
*/
async aggregate<T extends DocumentBase>(
collectionName: CollectionNames,
pipeline: any[]
): Promise<T[]> {
const collection = this.getCollection<T>(collectionName);
return await collection.aggregate<T>(pipeline).toArray();
}
/**
* Count documents
*/
async countDocuments(
collectionName: CollectionNames,
filter: any = {}
): Promise<number> {
const collection = this.getCollection(collectionName);
return await collection.countDocuments(filter);
}
/**
* Create indexes for better performance
*/
async createIndexes(): Promise<void> {
if (!this.db) {
throw new Error('MongoDB client not connected');
}
try {
// Sentiment data indexes
await this.db.collection('sentiment_data').createIndexes([
{ key: { symbol: 1, timestamp: -1 } },
{ key: { sentiment_label: 1 } },
{ key: { source_type: 1 } },
{ key: { created_at: -1 } }
]);
// News articles indexes
await this.db.collection('news_articles').createIndexes([
{ key: { symbols: 1, published_date: -1 } },
{ key: { publication: 1 } },
{ key: { categories: 1 } },
{ key: { created_at: -1 } }
]);
// SEC filings indexes
await this.db.collection('sec_filings').createIndexes([
{ key: { symbols: 1, filing_date: -1 } },
{ key: { filing_type: 1 } },
{ key: { cik: 1 } },
{ key: { created_at: -1 } }
]); // Raw documents indexes
await this.db.collection('raw_documents').createIndex(
{ content_hash: 1 },
{ unique: true }
);
await this.db.collection('raw_documents').createIndexes([
{ key: { processing_status: 1 } },
{ key: { document_type: 1 } },
{ key: { created_at: -1 } }
]);
this.logger.info('MongoDB indexes created successfully');
} catch (error) {
this.logger.error('Error creating MongoDB indexes:', error);
throw error;
}
}
/**
* Get database statistics
*/
async getStats(): Promise<any> {
if (!this.db) {
throw new Error('MongoDB client not connected');
}
return await this.db.stats();
}
/**
* Check if client is connected
*/
get connected(): boolean {
return this.isConnected && !!this.client;
}
/**
* Get the underlying MongoDB client
*/
get mongoClient(): MongoClient | null {
return this.client;
}
/**
* Get the database instance
*/
get database(): Db | null {
return this.db;
}
private buildConfig(config?: Partial<MongoDBClientConfig>): MongoDBClientConfig {
return {
host: config?.host || mongodbConfig.MONGODB_HOST,
port: config?.port || mongodbConfig.MONGODB_PORT,
database: config?.database || mongodbConfig.MONGODB_DATABASE,
username: config?.username || mongodbConfig.MONGODB_USERNAME,
password: config?.password || mongodbConfig.MONGODB_PASSWORD,
authSource: config?.authSource || mongodbConfig.MONGODB_AUTH_SOURCE,
uri: config?.uri || mongodbConfig.MONGODB_URI,
poolSettings: {
maxPoolSize: mongodbConfig.MONGODB_MAX_POOL_SIZE,
minPoolSize: mongodbConfig.MONGODB_MIN_POOL_SIZE,
maxIdleTime: mongodbConfig.MONGODB_MAX_IDLE_TIME,
...config?.poolSettings
},
timeouts: {
connectTimeout: mongodbConfig.MONGODB_CONNECT_TIMEOUT,
socketTimeout: mongodbConfig.MONGODB_SOCKET_TIMEOUT,
serverSelectionTimeout: mongodbConfig.MONGODB_SERVER_SELECTION_TIMEOUT,
...config?.timeouts
},
tls: {
enabled: mongodbConfig.MONGODB_TLS,
insecure: mongodbConfig.MONGODB_TLS_INSECURE,
caFile: mongodbConfig.MONGODB_TLS_CA_FILE,
...config?.tls
},
options: {
retryWrites: mongodbConfig.MONGODB_RETRY_WRITES,
journal: mongodbConfig.MONGODB_JOURNAL,
readPreference: mongodbConfig.MONGODB_READ_PREFERENCE as any,
writeConcern: mongodbConfig.MONGODB_WRITE_CONCERN,
...config?.options
}
};
}
private buildConnectionUri(): string {
if (this.config.uri) {
return this.config.uri;
}
const { host, port, username, password, database, authSource } = this.config;
const auth = username && password ? `${username}:${password}@` : '';
const authDb = authSource ? `?authSource=${authSource}` : '';
return `mongodb://${auth}${host}:${port}/${database}${authDb}`;
}
private buildClientOptions(): MongoClientOptions {
return {
maxPoolSize: this.config.poolSettings?.maxPoolSize,
minPoolSize: this.config.poolSettings?.minPoolSize,
maxIdleTimeMS: this.config.poolSettings?.maxIdleTime,
connectTimeoutMS: this.config.timeouts?.connectTimeout,
socketTimeoutMS: this.config.timeouts?.socketTimeout,
serverSelectionTimeoutMS: this.config.timeouts?.serverSelectionTimeout,
retryWrites: this.config.options?.retryWrites,
journal: this.config.options?.journal,
readPreference: this.config.options?.readPreference, writeConcern: this.config.options?.writeConcern ? {
w: this.config.options.writeConcern === 'majority'
? 'majority' as const
: parseInt(this.config.options.writeConcern, 10) || 1
} : undefined,
tls: this.config.tls?.enabled,
tlsInsecure: this.config.tls?.insecure,
tlsCAFile: this.config.tls?.caFile
};
}
private delay(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
}
import {
Collection,
Db,
Document,
MongoClient,
MongoClientOptions,
OptionalUnlessRequiredId,
WithId,
} from 'mongodb';
import * as yup from 'yup';
import { mongodbConfig } from '@stock-bot/config';
import { getLogger } from '@stock-bot/logger';
import { MongoDBHealthMonitor } from './health';
import { schemaMap } from './schemas';
import type {
AnalystReport,
CollectionNames,
DocumentBase,
EarningsTranscript,
MongoDBClientConfig,
MongoDBConnectionOptions,
NewsArticle,
RawDocument,
SecFiling,
SentimentData,
} from './types';
/**
* MongoDB Client for Stock Bot
*
* Provides type-safe access to MongoDB collections with built-in
* health monitoring, connection pooling, and schema validation.
*/
export class MongoDBClient {
private client: MongoClient | null = null;
private db: Db | null = null;
private readonly config: MongoDBClientConfig;
private readonly options: MongoDBConnectionOptions;
private readonly logger: ReturnType<typeof getLogger>;
private readonly healthMonitor: MongoDBHealthMonitor;
private isConnected = false;
constructor(config?: Partial<MongoDBClientConfig>, options?: MongoDBConnectionOptions) {
this.config = this.buildConfig(config);
this.options = {
retryAttempts: 3,
retryDelay: 1000,
healthCheckInterval: 30000,
...options,
};
this.logger = getLogger('mongodb-client');
this.healthMonitor = new MongoDBHealthMonitor(this);
}
/**
* Connect to MongoDB
*/
async connect(): Promise<void> {
if (this.isConnected && this.client) {
return;
}
const uri = this.buildConnectionUri();
const clientOptions = this.buildClientOptions();
let lastError: Error | null = null;
for (let attempt = 1; attempt <= this.options.retryAttempts!; attempt++) {
try {
this.logger.info(
`Connecting to MongoDB (attempt ${attempt}/${this.options.retryAttempts})...`
);
this.client = new MongoClient(uri, clientOptions);
await this.client.connect();
// Test the connection
await this.client.db(this.config.database).admin().ping();
this.db = this.client.db(this.config.database);
this.isConnected = true;
this.logger.info('Successfully connected to MongoDB');
// Start health monitoring
this.healthMonitor.start();
return;
} catch (error) {
lastError = error as Error;
this.logger.error(`MongoDB connection attempt ${attempt} failed:`, error);
if (this.client) {
await this.client.close();
this.client = null;
}
if (attempt < this.options.retryAttempts!) {
await this.delay(this.options.retryDelay! * attempt);
}
}
}
throw new Error(
`Failed to connect to MongoDB after ${this.options.retryAttempts} attempts: ${lastError?.message}`
);
}
/**
* Disconnect from MongoDB
*/
async disconnect(): Promise<void> {
if (!this.client) {
return;
}
try {
this.healthMonitor.stop();
await this.client.close();
this.isConnected = false;
this.client = null;
this.db = null;
this.logger.info('Disconnected from MongoDB');
} catch (error) {
this.logger.error('Error disconnecting from MongoDB:', error);
throw error;
}
}
/**
* Get a typed collection
*/
getCollection<T extends DocumentBase>(name: CollectionNames): Collection<T> {
if (!this.db) {
throw new Error('MongoDB client not connected');
}
return this.db.collection<T>(name);
}
/**
* Insert a document with validation
*/
async insertOne<T extends DocumentBase>(
collectionName: CollectionNames,
document: Omit<T, '_id' | 'created_at' | 'updated_at'> &
Partial<Pick<T, 'created_at' | 'updated_at'>>
): Promise<T> {
const collection = this.getCollection<T>(collectionName);
// Add timestamps
const now = new Date();
const docWithTimestamps = {
...document,
created_at: document.created_at || now,
updated_at: now,
} as T; // Validate document if schema exists
if (collectionName in schemaMap) {
try {
(schemaMap as any)[collectionName].validateSync(docWithTimestamps);
} catch (error) {
if (error instanceof yup.ValidationError) {
this.logger.error(`Document validation failed for ${collectionName}:`, error.errors);
throw new Error(`Document validation failed: ${error.errors?.map(e => e).join(', ')}`);
}
throw error;
}
}
const result = await collection.insertOne(docWithTimestamps as OptionalUnlessRequiredId<T>);
return { ...docWithTimestamps, _id: result.insertedId } as T;
}
/**
* Update a document with validation
*/
async updateOne<T extends DocumentBase>(
collectionName: CollectionNames,
filter: any,
update: Partial<T>
): Promise<boolean> {
const collection = this.getCollection<T>(collectionName);
// Add updated timestamp
const updateWithTimestamp = {
...update,
updated_at: new Date(),
};
const result = await collection.updateOne(filter, { $set: updateWithTimestamp });
return result.modifiedCount > 0;
}
/**
* Find documents with optional validation
*/
async find<T extends DocumentBase>(
collectionName: CollectionNames,
filter: any = {},
options: any = {}
): Promise<T[]> {
const collection = this.getCollection<T>(collectionName);
return (await collection.find(filter, options).toArray()) as T[];
}
/**
* Find one document
*/
async findOne<T extends DocumentBase>(
collectionName: CollectionNames,
filter: any
): Promise<T | null> {
const collection = this.getCollection<T>(collectionName);
return (await collection.findOne(filter)) as T | null;
}
/**
* Aggregate with type safety
*/
async aggregate<T extends DocumentBase>(
collectionName: CollectionNames,
pipeline: any[]
): Promise<T[]> {
const collection = this.getCollection<T>(collectionName);
return await collection.aggregate<T>(pipeline).toArray();
}
/**
* Count documents
*/
async countDocuments(collectionName: CollectionNames, filter: any = {}): Promise<number> {
const collection = this.getCollection(collectionName);
return await collection.countDocuments(filter);
}
/**
* Create indexes for better performance
*/
async createIndexes(): Promise<void> {
if (!this.db) {
throw new Error('MongoDB client not connected');
}
try {
// Sentiment data indexes
await this.db
.collection('sentiment_data')
.createIndexes([
{ key: { symbol: 1, timestamp: -1 } },
{ key: { sentiment_label: 1 } },
{ key: { source_type: 1 } },
{ key: { created_at: -1 } },
]);
// News articles indexes
await this.db
.collection('news_articles')
.createIndexes([
{ key: { symbols: 1, published_date: -1 } },
{ key: { publication: 1 } },
{ key: { categories: 1 } },
{ key: { created_at: -1 } },
]);
// SEC filings indexes
await this.db
.collection('sec_filings')
.createIndexes([
{ key: { symbols: 1, filing_date: -1 } },
{ key: { filing_type: 1 } },
{ key: { cik: 1 } },
{ key: { created_at: -1 } },
]); // Raw documents indexes
await this.db.collection('raw_documents').createIndex({ content_hash: 1 }, { unique: true });
await this.db
.collection('raw_documents')
.createIndexes([
{ key: { processing_status: 1 } },
{ key: { document_type: 1 } },
{ key: { created_at: -1 } },
]);
this.logger.info('MongoDB indexes created successfully');
} catch (error) {
this.logger.error('Error creating MongoDB indexes:', error);
throw error;
}
}
/**
* Get database statistics
*/
async getStats(): Promise<any> {
if (!this.db) {
throw new Error('MongoDB client not connected');
}
return await this.db.stats();
}
/**
* Check if client is connected
*/
get connected(): boolean {
return this.isConnected && !!this.client;
}
/**
* Get the underlying MongoDB client
*/
get mongoClient(): MongoClient | null {
return this.client;
}
/**
* Get the database instance
*/
get database(): Db | null {
return this.db;
}
private buildConfig(config?: Partial<MongoDBClientConfig>): MongoDBClientConfig {
return {
host: config?.host || mongodbConfig.MONGODB_HOST,
port: config?.port || mongodbConfig.MONGODB_PORT,
database: config?.database || mongodbConfig.MONGODB_DATABASE,
username: config?.username || mongodbConfig.MONGODB_USERNAME,
password: config?.password || mongodbConfig.MONGODB_PASSWORD,
authSource: config?.authSource || mongodbConfig.MONGODB_AUTH_SOURCE,
uri: config?.uri || mongodbConfig.MONGODB_URI,
poolSettings: {
maxPoolSize: mongodbConfig.MONGODB_MAX_POOL_SIZE,
minPoolSize: mongodbConfig.MONGODB_MIN_POOL_SIZE,
maxIdleTime: mongodbConfig.MONGODB_MAX_IDLE_TIME,
...config?.poolSettings,
},
timeouts: {
connectTimeout: mongodbConfig.MONGODB_CONNECT_TIMEOUT,
socketTimeout: mongodbConfig.MONGODB_SOCKET_TIMEOUT,
serverSelectionTimeout: mongodbConfig.MONGODB_SERVER_SELECTION_TIMEOUT,
...config?.timeouts,
},
tls: {
enabled: mongodbConfig.MONGODB_TLS,
insecure: mongodbConfig.MONGODB_TLS_INSECURE,
caFile: mongodbConfig.MONGODB_TLS_CA_FILE,
...config?.tls,
},
options: {
retryWrites: mongodbConfig.MONGODB_RETRY_WRITES,
journal: mongodbConfig.MONGODB_JOURNAL,
readPreference: mongodbConfig.MONGODB_READ_PREFERENCE as any,
writeConcern: mongodbConfig.MONGODB_WRITE_CONCERN,
...config?.options,
},
};
}
private buildConnectionUri(): string {
if (this.config.uri) {
return this.config.uri;
}
const { host, port, username, password, database, authSource } = this.config;
const auth = username && password ? `${username}:${password}@` : '';
const authDb = authSource ? `?authSource=${authSource}` : '';
return `mongodb://${auth}${host}:${port}/${database}${authDb}`;
}
private buildClientOptions(): MongoClientOptions {
return {
maxPoolSize: this.config.poolSettings?.maxPoolSize,
minPoolSize: this.config.poolSettings?.minPoolSize,
maxIdleTimeMS: this.config.poolSettings?.maxIdleTime,
connectTimeoutMS: this.config.timeouts?.connectTimeout,
socketTimeoutMS: this.config.timeouts?.socketTimeout,
serverSelectionTimeoutMS: this.config.timeouts?.serverSelectionTimeout,
retryWrites: this.config.options?.retryWrites,
journal: this.config.options?.journal,
readPreference: this.config.options?.readPreference,
writeConcern: this.config.options?.writeConcern
? {
w:
this.config.options.writeConcern === 'majority'
? ('majority' as const)
: parseInt(this.config.options.writeConcern, 10) || 1,
}
: undefined,
tls: this.config.tls?.enabled,
tlsInsecure: this.config.tls?.insecure,
tlsCAFile: this.config.tls?.caFile,
};
}
private delay(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
}

View file

@ -1,66 +1,66 @@
import { MongoDBClient } from './client';
import { mongodbConfig } from '@stock-bot/config';
import type { MongoDBClientConfig, MongoDBConnectionOptions } from './types';
/**
* Factory function to create a MongoDB client instance
*/
export function createMongoDBClient(
config?: Partial<MongoDBClientConfig>,
options?: MongoDBConnectionOptions
): MongoDBClient {
return new MongoDBClient(config, options);
}
/**
* Create a MongoDB client with default configuration
*/
export function createDefaultMongoDBClient(): MongoDBClient {
const config: Partial<MongoDBClientConfig> = {
host: mongodbConfig.MONGODB_HOST,
port: mongodbConfig.MONGODB_PORT,
database: mongodbConfig.MONGODB_DATABASE,
username: mongodbConfig.MONGODB_USERNAME,
password: mongodbConfig.MONGODB_PASSWORD,
uri: mongodbConfig.MONGODB_URI
};
return new MongoDBClient(config);
}
/**
* Singleton MongoDB client instance
*/
let defaultClient: MongoDBClient | null = null;
/**
* Get or create the default MongoDB client instance
*/
export function getMongoDBClient(): MongoDBClient {
if (!defaultClient) {
defaultClient = createDefaultMongoDBClient();
}
return defaultClient;
}
/**
* Connect to MongoDB using the default client
*/
export async function connectMongoDB(): Promise<MongoDBClient> {
const client = getMongoDBClient();
if (!client.connected) {
await client.connect();
await client.createIndexes();
}
return client;
}
/**
* Disconnect from MongoDB
*/
export async function disconnectMongoDB(): Promise<void> {
if (defaultClient) {
await defaultClient.disconnect();
defaultClient = null;
}
}
import { mongodbConfig } from '@stock-bot/config';
import { MongoDBClient } from './client';
import type { MongoDBClientConfig, MongoDBConnectionOptions } from './types';
/**
* Factory function to create a MongoDB client instance
*/
export function createMongoDBClient(
config?: Partial<MongoDBClientConfig>,
options?: MongoDBConnectionOptions
): MongoDBClient {
return new MongoDBClient(config, options);
}
/**
* Create a MongoDB client with default configuration
*/
export function createDefaultMongoDBClient(): MongoDBClient {
const config: Partial<MongoDBClientConfig> = {
host: mongodbConfig.MONGODB_HOST,
port: mongodbConfig.MONGODB_PORT,
database: mongodbConfig.MONGODB_DATABASE,
username: mongodbConfig.MONGODB_USERNAME,
password: mongodbConfig.MONGODB_PASSWORD,
uri: mongodbConfig.MONGODB_URI,
};
return new MongoDBClient(config);
}
/**
* Singleton MongoDB client instance
*/
let defaultClient: MongoDBClient | null = null;
/**
* Get or create the default MongoDB client instance
*/
export function getMongoDBClient(): MongoDBClient {
if (!defaultClient) {
defaultClient = createDefaultMongoDBClient();
}
return defaultClient;
}
/**
* Connect to MongoDB using the default client
*/
export async function connectMongoDB(): Promise<MongoDBClient> {
const client = getMongoDBClient();
if (!client.connected) {
await client.connect();
await client.createIndexes();
}
return client;
}
/**
* Disconnect from MongoDB
*/
export async function disconnectMongoDB(): Promise<void> {
if (defaultClient) {
await defaultClient.disconnect();
defaultClient = null;
}
}

View file

@ -1,226 +1,233 @@
import { getLogger } from '@stock-bot/logger';
import type { MongoDBClient } from './client';
import type { MongoDBHealthCheck, MongoDBHealthStatus, MongoDBMetrics } from './types';
/**
* MongoDB Health Monitor
*
* Monitors MongoDB connection health and provides metrics
*/
export class MongoDBHealthMonitor {
private readonly client: MongoDBClient;
private readonly logger: ReturnType<typeof getLogger>;
private healthCheckInterval: NodeJS.Timeout | null = null;
private metrics: MongoDBMetrics;
private lastHealthCheck: MongoDBHealthCheck | null = null;
constructor(client: MongoDBClient) {
this.client = client;
this.logger = getLogger('mongodb-health-monitor');
this.metrics = {
operationsPerSecond: 0,
averageLatency: 0,
errorRate: 0,
connectionPoolUtilization: 0,
documentsProcessed: 0
};
}
/**
* Start health monitoring
*/
start(intervalMs: number = 30000): void {
if (this.healthCheckInterval) {
this.stop();
}
this.logger.info(`Starting MongoDB health monitoring (interval: ${intervalMs}ms)`);
this.healthCheckInterval = setInterval(async () => {
try {
await this.performHealthCheck();
} catch (error) {
this.logger.error('Health check failed:', error);
}
}, intervalMs);
// Perform initial health check
this.performHealthCheck().catch(error => {
this.logger.error('Initial health check failed:', error);
});
}
/**
* Stop health monitoring
*/
stop(): void {
if (this.healthCheckInterval) {
clearInterval(this.healthCheckInterval);
this.healthCheckInterval = null;
this.logger.info('Stopped MongoDB health monitoring');
}
}
/**
* Get current health status
*/
async getHealth(): Promise<MongoDBHealthCheck> {
if (!this.lastHealthCheck) {
await this.performHealthCheck();
}
return this.lastHealthCheck!;
}
/**
* Get current metrics
*/
getMetrics(): MongoDBMetrics {
return { ...this.metrics };
}
/**
* Perform a health check
*/
private async performHealthCheck(): Promise<void> {
const startTime = Date.now();
const errors: string[] = [];
let status: MongoDBHealthStatus = 'healthy';
try {
if (!this.client.connected) {
errors.push('MongoDB client not connected');
status = 'unhealthy';
} else {
// Test basic connectivity
const mongoClient = this.client.mongoClient;
const db = this.client.database;
if (!mongoClient || !db) {
errors.push('MongoDB client or database not available');
status = 'unhealthy';
} else {
// Ping the database
await db.admin().ping();
// Get server status for metrics
try {
const serverStatus = await db.admin().serverStatus();
this.updateMetricsFromServerStatus(serverStatus);
// Check connection pool status
const poolStats = this.getConnectionPoolStats(serverStatus);
if (poolStats.utilization > 0.9) {
errors.push('High connection pool utilization');
status = status === 'healthy' ? 'degraded' : status;
}
// Check for high latency
const latency = Date.now() - startTime;
if (latency > 1000) {
errors.push(`High latency: ${latency}ms`);
status = status === 'healthy' ? 'degraded' : status;
}
} catch (statusError) {
errors.push(`Failed to get server status: ${(statusError as Error).message}`);
status = 'degraded';
}
}
}
} catch (error) {
errors.push(`Health check failed: ${(error as Error).message}`);
status = 'unhealthy';
}
const latency = Date.now() - startTime;
// Get connection stats
const connectionStats = this.getConnectionStats();
this.lastHealthCheck = {
status,
timestamp: new Date(),
latency,
connections: connectionStats,
errors: errors.length > 0 ? errors : undefined
};
// Log health status changes
if (status !== 'healthy') {
this.logger.warn(`MongoDB health status: ${status}`, { errors, latency });
} else {
this.logger.debug(`MongoDB health check passed (${latency}ms)`);
}
}
/**
* Update metrics from MongoDB server status
*/
private updateMetricsFromServerStatus(serverStatus: any): void {
try {
const opcounters = serverStatus.opcounters || {};
const connections = serverStatus.connections || {};
const dur = serverStatus.dur || {};
// Calculate operations per second (approximate)
const totalOps = Object.values(opcounters).reduce((sum: number, count: any) => sum + (count || 0), 0);
this.metrics.operationsPerSecond = totalOps;
// Connection pool utilization
if (connections.current && connections.available) {
const total = connections.current + connections.available;
this.metrics.connectionPoolUtilization = connections.current / total;
}
// Average latency (from durability stats if available)
if (dur.timeMS) {
this.metrics.averageLatency = dur.timeMS.dt || 0;
} } catch (error) {
this.logger.debug('Error parsing server status for metrics:', error as any);
}
}
/**
* Get connection pool statistics
*/
private getConnectionPoolStats(serverStatus: any): { utilization: number; active: number; available: number } {
const connections = serverStatus.connections || {};
const active = connections.current || 0;
const available = connections.available || 0;
const total = active + available;
return {
utilization: total > 0 ? active / total : 0,
active,
available
};
}
/**
* Get connection statistics
*/
private getConnectionStats(): { active: number; available: number; total: number } {
// This would ideally come from the MongoDB driver's connection pool
// For now, we'll return estimated values
return {
active: 1,
available: 9,
total: 10
};
}
/**
* Update error rate metric
*/
updateErrorRate(errorCount: number, totalOperations: number): void {
this.metrics.errorRate = totalOperations > 0 ? errorCount / totalOperations : 0;
}
/**
* Update documents processed metric
*/
updateDocumentsProcessed(count: number): void {
this.metrics.documentsProcessed += count;
}
}
import { getLogger } from '@stock-bot/logger';
import type { MongoDBClient } from './client';
import type { MongoDBHealthCheck, MongoDBHealthStatus, MongoDBMetrics } from './types';
/**
* MongoDB Health Monitor
*
* Monitors MongoDB connection health and provides metrics
*/
export class MongoDBHealthMonitor {
private readonly client: MongoDBClient;
private readonly logger: ReturnType<typeof getLogger>;
private healthCheckInterval: NodeJS.Timeout | null = null;
private metrics: MongoDBMetrics;
private lastHealthCheck: MongoDBHealthCheck | null = null;
constructor(client: MongoDBClient) {
this.client = client;
this.logger = getLogger('mongodb-health-monitor');
this.metrics = {
operationsPerSecond: 0,
averageLatency: 0,
errorRate: 0,
connectionPoolUtilization: 0,
documentsProcessed: 0,
};
}
/**
* Start health monitoring
*/
start(intervalMs: number = 30000): void {
if (this.healthCheckInterval) {
this.stop();
}
this.logger.info(`Starting MongoDB health monitoring (interval: ${intervalMs}ms)`);
this.healthCheckInterval = setInterval(async () => {
try {
await this.performHealthCheck();
} catch (error) {
this.logger.error('Health check failed:', error);
}
}, intervalMs);
// Perform initial health check
this.performHealthCheck().catch(error => {
this.logger.error('Initial health check failed:', error);
});
}
/**
* Stop health monitoring
*/
stop(): void {
if (this.healthCheckInterval) {
clearInterval(this.healthCheckInterval);
this.healthCheckInterval = null;
this.logger.info('Stopped MongoDB health monitoring');
}
}
/**
* Get current health status
*/
async getHealth(): Promise<MongoDBHealthCheck> {
if (!this.lastHealthCheck) {
await this.performHealthCheck();
}
return this.lastHealthCheck!;
}
/**
* Get current metrics
*/
getMetrics(): MongoDBMetrics {
return { ...this.metrics };
}
/**
* Perform a health check
*/
private async performHealthCheck(): Promise<void> {
const startTime = Date.now();
const errors: string[] = [];
let status: MongoDBHealthStatus = 'healthy';
try {
if (!this.client.connected) {
errors.push('MongoDB client not connected');
status = 'unhealthy';
} else {
// Test basic connectivity
const mongoClient = this.client.mongoClient;
const db = this.client.database;
if (!mongoClient || !db) {
errors.push('MongoDB client or database not available');
status = 'unhealthy';
} else {
// Ping the database
await db.admin().ping();
// Get server status for metrics
try {
const serverStatus = await db.admin().serverStatus();
this.updateMetricsFromServerStatus(serverStatus);
// Check connection pool status
const poolStats = this.getConnectionPoolStats(serverStatus);
if (poolStats.utilization > 0.9) {
errors.push('High connection pool utilization');
status = status === 'healthy' ? 'degraded' : status;
}
// Check for high latency
const latency = Date.now() - startTime;
if (latency > 1000) {
errors.push(`High latency: ${latency}ms`);
status = status === 'healthy' ? 'degraded' : status;
}
} catch (statusError) {
errors.push(`Failed to get server status: ${(statusError as Error).message}`);
status = 'degraded';
}
}
}
} catch (error) {
errors.push(`Health check failed: ${(error as Error).message}`);
status = 'unhealthy';
}
const latency = Date.now() - startTime;
// Get connection stats
const connectionStats = this.getConnectionStats();
this.lastHealthCheck = {
status,
timestamp: new Date(),
latency,
connections: connectionStats,
errors: errors.length > 0 ? errors : undefined,
};
// Log health status changes
if (status !== 'healthy') {
this.logger.warn(`MongoDB health status: ${status}`, { errors, latency });
} else {
this.logger.debug(`MongoDB health check passed (${latency}ms)`);
}
}
/**
* Update metrics from MongoDB server status
*/
private updateMetricsFromServerStatus(serverStatus: any): void {
try {
const opcounters = serverStatus.opcounters || {};
const connections = serverStatus.connections || {};
const dur = serverStatus.dur || {};
// Calculate operations per second (approximate)
const totalOps = Object.values(opcounters).reduce(
(sum: number, count: any) => sum + (count || 0),
0
);
this.metrics.operationsPerSecond = totalOps;
// Connection pool utilization
if (connections.current && connections.available) {
const total = connections.current + connections.available;
this.metrics.connectionPoolUtilization = connections.current / total;
}
// Average latency (from durability stats if available)
if (dur.timeMS) {
this.metrics.averageLatency = dur.timeMS.dt || 0;
}
} catch (error) {
this.logger.debug('Error parsing server status for metrics:', error as any);
}
}
/**
* Get connection pool statistics
*/
private getConnectionPoolStats(serverStatus: any): {
utilization: number;
active: number;
available: number;
} {
const connections = serverStatus.connections || {};
const active = connections.current || 0;
const available = connections.available || 0;
const total = active + available;
return {
utilization: total > 0 ? active / total : 0,
active,
available,
};
}
/**
* Get connection statistics
*/
private getConnectionStats(): { active: number; available: number; total: number } {
// This would ideally come from the MongoDB driver's connection pool
// For now, we'll return estimated values
return {
active: 1,
available: 9,
total: 10,
};
}
/**
* Update error rate metric
*/
updateErrorRate(errorCount: number, totalOperations: number): void {
this.metrics.errorRate = totalOperations > 0 ? errorCount / totalOperations : 0;
}
/**
* Update documents processed metric
*/
updateDocumentsProcessed(count: number): void {
this.metrics.documentsProcessed += count;
}
}

View file

@ -1,40 +1,40 @@
/**
* MongoDB Client Library for Stock Bot
*
* Provides type-safe MongoDB access for document storage, sentiment data,
* and raw content processing.
*/
export { MongoDBClient } from './client';
export { MongoDBHealthMonitor } from './health';
export { MongoDBTransactionManager } from './transactions';
export { MongoDBAggregationBuilder } from './aggregation';
// Types
export type {
MongoDBClientConfig,
MongoDBConnectionOptions,
MongoDBHealthStatus,
MongoDBMetrics,
CollectionNames,
DocumentBase,
SentimentData,
RawDocument,
NewsArticle,
SecFiling,
EarningsTranscript,
AnalystReport
} from './types';
// Schemas
export {
sentimentDataSchema,
rawDocumentSchema,
newsArticleSchema,
secFilingSchema,
earningsTranscriptSchema,
analystReportSchema
} from './schemas';
// Utils
export { createMongoDBClient } from './factory';
/**
* MongoDB Client Library for Stock Bot
*
* Provides type-safe MongoDB access for document storage, sentiment data,
* and raw content processing.
*/
export { MongoDBClient } from './client';
export { MongoDBHealthMonitor } from './health';
export { MongoDBTransactionManager } from './transactions';
export { MongoDBAggregationBuilder } from './aggregation';
// Types
export type {
MongoDBClientConfig,
MongoDBConnectionOptions,
MongoDBHealthStatus,
MongoDBMetrics,
CollectionNames,
DocumentBase,
SentimentData,
RawDocument,
NewsArticle,
SecFiling,
EarningsTranscript,
AnalystReport,
} from './types';
// Schemas
export {
sentimentDataSchema,
rawDocumentSchema,
newsArticleSchema,
secFilingSchema,
earningsTranscriptSchema,
analystReportSchema,
} from './schemas';
// Utils
export { createMongoDBClient } from './factory';

View file

@ -1,132 +1,146 @@
import * as yup from 'yup';
/**
* Yup Schemas for MongoDB Document Validation
*/
// Base schema for all documents
export const documentBaseSchema = yup.object({
_id: yup.mixed().optional(),
created_at: yup.date().required(),
updated_at: yup.date().required(),
source: yup.string().required(),
metadata: yup.object().optional(),
});
// Sentiment Data Schema
export const sentimentDataSchema = documentBaseSchema.shape({
symbol: yup.string().min(1).max(10).required(),
sentiment_score: yup.number().min(-1).max(1).required(),
sentiment_label: yup.string().oneOf(['positive', 'negative', 'neutral']).required(),
confidence: yup.number().min(0).max(1).required(),
text: yup.string().min(1).required(),
source_type: yup.string().oneOf(['reddit', 'twitter', 'news', 'forums']).required(),
source_id: yup.string().required(),
timestamp: yup.date().required(),
processed_at: yup.date().required(),
language: yup.string().default('en'),
keywords: yup.array(yup.string()).required(),
entities: yup.array(yup.object({
name: yup.string().required(),
type: yup.string().required(),
confidence: yup.number().min(0).max(1).required(),
})).required(),
});
// Raw Document Schema
export const rawDocumentSchema = documentBaseSchema.shape({
document_type: yup.string().oneOf(['html', 'pdf', 'text', 'json', 'xml']).required(),
content: yup.string().required(),
content_hash: yup.string().required(),
url: yup.string().url().optional(),
title: yup.string().optional(),
author: yup.string().optional(),
published_date: yup.date().optional(),
extracted_text: yup.string().optional(),
processing_status: yup.string().oneOf(['pending', 'processed', 'failed']).required(),
size_bytes: yup.number().positive().required(),
language: yup.string().optional(),
});
// News Article Schema
export const newsArticleSchema = documentBaseSchema.shape({
headline: yup.string().min(1).required(),
content: yup.string().min(1).required(),
summary: yup.string().optional(),
author: yup.string().required(),
publication: yup.string().required(),
published_date: yup.date().required(),
url: yup.string().url().required(),
symbols: yup.array(yup.string()).required(),
categories: yup.array(yup.string()).required(),
sentiment_score: yup.number().min(-1).max(1).optional(),
relevance_score: yup.number().min(0).max(1).optional(),
image_url: yup.string().url().optional(),
tags: yup.array(yup.string()).required(),
});
// SEC Filing Schema
export const secFilingSchema = documentBaseSchema.shape({
cik: yup.string().required(),
accession_number: yup.string().required(),
filing_type: yup.string().required(),
company_name: yup.string().required(),
symbols: yup.array(yup.string()).required(),
filing_date: yup.date().required(),
period_end_date: yup.date().required(),
url: yup.string().url().required(),
content: yup.string().required(),
extracted_data: yup.object().optional(),
financial_statements: yup.array(yup.object({
statement_type: yup.string().required(),
data: yup.object().required(),
})).optional(),
processing_status: yup.string().oneOf(['pending', 'processed', 'failed']).required(),
});
// Earnings Transcript Schema
export const earningsTranscriptSchema = documentBaseSchema.shape({
symbol: yup.string().min(1).max(10).required(),
company_name: yup.string().required(),
quarter: yup.string().required(),
year: yup.number().min(2000).max(3000).required(),
call_date: yup.date().required(),
transcript: yup.string().required(),
participants: yup.array(yup.object({
name: yup.string().required(),
title: yup.string().required(),
type: yup.string().oneOf(['executive', 'analyst']).required(),
})).required(),
key_topics: yup.array(yup.string()).required(),
sentiment_analysis: yup.object({
overall_sentiment: yup.number().min(-1).max(1).required(),
topic_sentiments: yup.object().required(),
}).optional(),
financial_highlights: yup.object().optional(),
});
// Analyst Report Schema
export const analystReportSchema = documentBaseSchema.shape({
symbol: yup.string().min(1).max(10).required(),
analyst_firm: yup.string().required(),
analyst_name: yup.string().required(),
report_title: yup.string().required(),
report_date: yup.date().required(),
rating: yup.string().oneOf(['buy', 'hold', 'sell', 'strong_buy', 'strong_sell']).required(),
price_target: yup.number().positive().optional(),
previous_rating: yup.string().optional(),
content: yup.string().required(),
summary: yup.string().required(),
key_points: yup.array(yup.string()).required(),
financial_projections: yup.object().optional(),
});
// Schema mapping for collections
export const schemaMap = {
sentiment_data: sentimentDataSchema,
raw_documents: rawDocumentSchema,
news_articles: newsArticleSchema,
sec_filings: secFilingSchema,
earnings_transcripts: earningsTranscriptSchema,
analyst_reports: analystReportSchema,
} as const;
import * as yup from 'yup';
/**
* Yup Schemas for MongoDB Document Validation
*/
// Base schema for all documents
export const documentBaseSchema = yup.object({
_id: yup.mixed().optional(),
created_at: yup.date().required(),
updated_at: yup.date().required(),
source: yup.string().required(),
metadata: yup.object().optional(),
});
// Sentiment Data Schema
export const sentimentDataSchema = documentBaseSchema.shape({
symbol: yup.string().min(1).max(10).required(),
sentiment_score: yup.number().min(-1).max(1).required(),
sentiment_label: yup.string().oneOf(['positive', 'negative', 'neutral']).required(),
confidence: yup.number().min(0).max(1).required(),
text: yup.string().min(1).required(),
source_type: yup.string().oneOf(['reddit', 'twitter', 'news', 'forums']).required(),
source_id: yup.string().required(),
timestamp: yup.date().required(),
processed_at: yup.date().required(),
language: yup.string().default('en'),
keywords: yup.array(yup.string()).required(),
entities: yup
.array(
yup.object({
name: yup.string().required(),
type: yup.string().required(),
confidence: yup.number().min(0).max(1).required(),
})
)
.required(),
});
// Raw Document Schema
export const rawDocumentSchema = documentBaseSchema.shape({
document_type: yup.string().oneOf(['html', 'pdf', 'text', 'json', 'xml']).required(),
content: yup.string().required(),
content_hash: yup.string().required(),
url: yup.string().url().optional(),
title: yup.string().optional(),
author: yup.string().optional(),
published_date: yup.date().optional(),
extracted_text: yup.string().optional(),
processing_status: yup.string().oneOf(['pending', 'processed', 'failed']).required(),
size_bytes: yup.number().positive().required(),
language: yup.string().optional(),
});
// News Article Schema
export const newsArticleSchema = documentBaseSchema.shape({
headline: yup.string().min(1).required(),
content: yup.string().min(1).required(),
summary: yup.string().optional(),
author: yup.string().required(),
publication: yup.string().required(),
published_date: yup.date().required(),
url: yup.string().url().required(),
symbols: yup.array(yup.string()).required(),
categories: yup.array(yup.string()).required(),
sentiment_score: yup.number().min(-1).max(1).optional(),
relevance_score: yup.number().min(0).max(1).optional(),
image_url: yup.string().url().optional(),
tags: yup.array(yup.string()).required(),
});
// SEC Filing Schema
export const secFilingSchema = documentBaseSchema.shape({
cik: yup.string().required(),
accession_number: yup.string().required(),
filing_type: yup.string().required(),
company_name: yup.string().required(),
symbols: yup.array(yup.string()).required(),
filing_date: yup.date().required(),
period_end_date: yup.date().required(),
url: yup.string().url().required(),
content: yup.string().required(),
extracted_data: yup.object().optional(),
financial_statements: yup
.array(
yup.object({
statement_type: yup.string().required(),
data: yup.object().required(),
})
)
.optional(),
processing_status: yup.string().oneOf(['pending', 'processed', 'failed']).required(),
});
// Earnings Transcript Schema
export const earningsTranscriptSchema = documentBaseSchema.shape({
symbol: yup.string().min(1).max(10).required(),
company_name: yup.string().required(),
quarter: yup.string().required(),
year: yup.number().min(2000).max(3000).required(),
call_date: yup.date().required(),
transcript: yup.string().required(),
participants: yup
.array(
yup.object({
name: yup.string().required(),
title: yup.string().required(),
type: yup.string().oneOf(['executive', 'analyst']).required(),
})
)
.required(),
key_topics: yup.array(yup.string()).required(),
sentiment_analysis: yup
.object({
overall_sentiment: yup.number().min(-1).max(1).required(),
topic_sentiments: yup.object().required(),
})
.optional(),
financial_highlights: yup.object().optional(),
});
// Analyst Report Schema
export const analystReportSchema = documentBaseSchema.shape({
symbol: yup.string().min(1).max(10).required(),
analyst_firm: yup.string().required(),
analyst_name: yup.string().required(),
report_title: yup.string().required(),
report_date: yup.date().required(),
rating: yup.string().oneOf(['buy', 'hold', 'sell', 'strong_buy', 'strong_sell']).required(),
price_target: yup.number().positive().optional(),
previous_rating: yup.string().optional(),
content: yup.string().required(),
summary: yup.string().required(),
key_points: yup.array(yup.string()).required(),
financial_projections: yup.object().optional(),
});
// Schema mapping for collections
export const schemaMap = {
sentiment_data: sentimentDataSchema,
raw_documents: rawDocumentSchema,
news_articles: newsArticleSchema,
sec_filings: secFilingSchema,
earnings_transcripts: earningsTranscriptSchema,
analyst_reports: analystReportSchema,
} as const;

View file

@ -1,238 +1,238 @@
import { getLogger } from '@stock-bot/logger';
import type { MongoDBClient } from './client';
import type { CollectionNames, DocumentBase } from './types';
import type { WithId, OptionalUnlessRequiredId } from 'mongodb';
/**
* MongoDB Transaction Manager
*
* Provides transaction support for multi-document operations
*/
export class MongoDBTransactionManager {
private readonly client: MongoDBClient;
private readonly logger: ReturnType<typeof getLogger>;
constructor(client: MongoDBClient) {
this.client = client;
this.logger = getLogger('mongodb-transaction-manager');
}
/**
* Execute operations within a transaction
*/
async withTransaction<T>(
operations: (session: any) => Promise<T>,
options?: {
readPreference?: string;
readConcern?: string;
writeConcern?: any;
maxCommitTimeMS?: number;
}
): Promise<T> {
const mongoClient = this.client.mongoClient;
if (!mongoClient) {
throw new Error('MongoDB client not connected');
}
const session = mongoClient.startSession();
try {
this.logger.debug('Starting MongoDB transaction');
const result = await session.withTransaction(
async () => {
return await operations(session);
}, {
readPreference: options?.readPreference as any,
readConcern: { level: options?.readConcern || 'majority' } as any,
writeConcern: options?.writeConcern || { w: 'majority' },
maxCommitTimeMS: options?.maxCommitTimeMS || 10000
}
);
this.logger.debug('MongoDB transaction completed successfully');
return result;
} catch (error) {
this.logger.error('MongoDB transaction failed:', error);
throw error;
} finally {
await session.endSession();
}
}
/**
* Batch insert documents across collections within a transaction
*/
async batchInsert(
operations: Array<{
collection: CollectionNames;
documents: DocumentBase[];
}>,
options?: { ordered?: boolean; bypassDocumentValidation?: boolean }
): Promise<void> {
await this.withTransaction(async (session) => {
for (const operation of operations) {
const collection = this.client.getCollection(operation.collection);
// Add timestamps to all documents
const now = new Date();
const documentsWithTimestamps = operation.documents.map(doc => ({
...doc,
created_at: doc.created_at || now,
updated_at: now
}));
await collection.insertMany(documentsWithTimestamps, {
session,
ordered: options?.ordered ?? true,
bypassDocumentValidation: options?.bypassDocumentValidation ?? false
});
this.logger.debug(`Inserted ${documentsWithTimestamps.length} documents into ${operation.collection}`);
}
});
}
/**
* Batch update documents across collections within a transaction
*/
async batchUpdate(
operations: Array<{
collection: CollectionNames;
filter: any;
update: any;
options?: any;
}>
): Promise<void> {
await this.withTransaction(async (session) => {
const results = [];
for (const operation of operations) {
const collection = this.client.getCollection(operation.collection);
// Add updated timestamp
const updateWithTimestamp = {
...operation.update,
$set: {
...operation.update.$set,
updated_at: new Date()
}
};
const result = await collection.updateMany(
operation.filter,
updateWithTimestamp,
{
session,
...operation.options
}
);
results.push(result);
this.logger.debug(`Updated ${result.modifiedCount} documents in ${operation.collection}`);
}
return results;
});
}
/**
* Move documents between collections within a transaction
*/
async moveDocuments<T extends DocumentBase>(
fromCollection: CollectionNames,
toCollection: CollectionNames,
filter: any,
transform?: (doc: T) => T
): Promise<number> {
return await this.withTransaction(async (session) => {
const sourceCollection = this.client.getCollection<T>(fromCollection);
const targetCollection = this.client.getCollection<T>(toCollection);
// Find documents to move
const documents = await sourceCollection.find(filter, { session }).toArray();
if (documents.length === 0) {
return 0;
} // Transform documents if needed
const documentsToInsert = transform
? documents.map((doc: WithId<T>) => transform(doc as T))
: documents;
// Add updated timestamp
const now = new Date();
documentsToInsert.forEach(doc => {
doc.updated_at = now;
}); // Insert into target collection
await targetCollection.insertMany(documentsToInsert as OptionalUnlessRequiredId<T>[], { session });
// Remove from source collection
const deleteResult = await sourceCollection.deleteMany(filter, { session });
this.logger.info(`Moved ${documents.length} documents from ${fromCollection} to ${toCollection}`);
return deleteResult.deletedCount || 0;
});
}
/**
* Archive old documents within a transaction
*/
async archiveDocuments(
sourceCollection: CollectionNames,
archiveCollection: CollectionNames,
cutoffDate: Date,
batchSize: number = 1000
): Promise<number> {
let totalArchived = 0;
while (true) {
const batchArchived = await this.withTransaction(async (session) => {
const collection = this.client.getCollection(sourceCollection);
const archiveCol = this.client.getCollection(archiveCollection);
// Find old documents
const documents = await collection.find(
{ created_at: { $lt: cutoffDate } },
{ limit: batchSize, session }
).toArray();
if (documents.length === 0) {
return 0;
}
// Add archive metadata
const now = new Date();
const documentsToArchive = documents.map(doc => ({
...doc,
archived_at: now,
archived_from: sourceCollection
}));
// Insert into archive collection
await archiveCol.insertMany(documentsToArchive, { session });
// Remove from source collection
const ids = documents.map(doc => doc._id);
const deleteResult = await collection.deleteMany(
{ _id: { $in: ids } },
{ session }
);
return deleteResult.deletedCount || 0;
});
totalArchived += batchArchived;
if (batchArchived === 0) {
break;
}
this.logger.debug(`Archived batch of ${batchArchived} documents`);
}
this.logger.info(`Archived ${totalArchived} documents from ${sourceCollection} to ${archiveCollection}`);
return totalArchived;
}
}
import type { OptionalUnlessRequiredId, WithId } from 'mongodb';
import { getLogger } from '@stock-bot/logger';
import type { MongoDBClient } from './client';
import type { CollectionNames, DocumentBase } from './types';
/**
* MongoDB Transaction Manager
*
* Provides transaction support for multi-document operations
*/
export class MongoDBTransactionManager {
private readonly client: MongoDBClient;
private readonly logger: ReturnType<typeof getLogger>;
constructor(client: MongoDBClient) {
this.client = client;
this.logger = getLogger('mongodb-transaction-manager');
}
/**
* Execute operations within a transaction
*/
async withTransaction<T>(
operations: (session: any) => Promise<T>,
options?: {
readPreference?: string;
readConcern?: string;
writeConcern?: any;
maxCommitTimeMS?: number;
}
): Promise<T> {
const mongoClient = this.client.mongoClient;
if (!mongoClient) {
throw new Error('MongoDB client not connected');
}
const session = mongoClient.startSession();
try {
this.logger.debug('Starting MongoDB transaction');
const result = await session.withTransaction(
async () => {
return await operations(session);
},
{
readPreference: options?.readPreference as any,
readConcern: { level: options?.readConcern || 'majority' } as any,
writeConcern: options?.writeConcern || { w: 'majority' },
maxCommitTimeMS: options?.maxCommitTimeMS || 10000,
}
);
this.logger.debug('MongoDB transaction completed successfully');
return result;
} catch (error) {
this.logger.error('MongoDB transaction failed:', error);
throw error;
} finally {
await session.endSession();
}
}
/**
* Batch insert documents across collections within a transaction
*/
async batchInsert(
operations: Array<{
collection: CollectionNames;
documents: DocumentBase[];
}>,
options?: { ordered?: boolean; bypassDocumentValidation?: boolean }
): Promise<void> {
await this.withTransaction(async session => {
for (const operation of operations) {
const collection = this.client.getCollection(operation.collection);
// Add timestamps to all documents
const now = new Date();
const documentsWithTimestamps = operation.documents.map(doc => ({
...doc,
created_at: doc.created_at || now,
updated_at: now,
}));
await collection.insertMany(documentsWithTimestamps, {
session,
ordered: options?.ordered ?? true,
bypassDocumentValidation: options?.bypassDocumentValidation ?? false,
});
this.logger.debug(
`Inserted ${documentsWithTimestamps.length} documents into ${operation.collection}`
);
}
});
}
/**
* Batch update documents across collections within a transaction
*/
async batchUpdate(
operations: Array<{
collection: CollectionNames;
filter: any;
update: any;
options?: any;
}>
): Promise<void> {
await this.withTransaction(async session => {
const results = [];
for (const operation of operations) {
const collection = this.client.getCollection(operation.collection);
// Add updated timestamp
const updateWithTimestamp = {
...operation.update,
$set: {
...operation.update.$set,
updated_at: new Date(),
},
};
const result = await collection.updateMany(operation.filter, updateWithTimestamp, {
session,
...operation.options,
});
results.push(result);
this.logger.debug(`Updated ${result.modifiedCount} documents in ${operation.collection}`);
}
return results;
});
}
/**
* Move documents between collections within a transaction
*/
async moveDocuments<T extends DocumentBase>(
fromCollection: CollectionNames,
toCollection: CollectionNames,
filter: any,
transform?: (doc: T) => T
): Promise<number> {
return await this.withTransaction(async session => {
const sourceCollection = this.client.getCollection<T>(fromCollection);
const targetCollection = this.client.getCollection<T>(toCollection);
// Find documents to move
const documents = await sourceCollection.find(filter, { session }).toArray();
if (documents.length === 0) {
return 0;
} // Transform documents if needed
const documentsToInsert = transform
? documents.map((doc: WithId<T>) => transform(doc as T))
: documents;
// Add updated timestamp
const now = new Date();
documentsToInsert.forEach(doc => {
doc.updated_at = now;
}); // Insert into target collection
await targetCollection.insertMany(documentsToInsert as OptionalUnlessRequiredId<T>[], {
session,
});
// Remove from source collection
const deleteResult = await sourceCollection.deleteMany(filter, { session });
this.logger.info(
`Moved ${documents.length} documents from ${fromCollection} to ${toCollection}`
);
return deleteResult.deletedCount || 0;
});
}
/**
* Archive old documents within a transaction
*/
async archiveDocuments(
sourceCollection: CollectionNames,
archiveCollection: CollectionNames,
cutoffDate: Date,
batchSize: number = 1000
): Promise<number> {
let totalArchived = 0;
while (true) {
const batchArchived = await this.withTransaction(async session => {
const collection = this.client.getCollection(sourceCollection);
const archiveCol = this.client.getCollection(archiveCollection);
// Find old documents
const documents = await collection
.find({ created_at: { $lt: cutoffDate } }, { limit: batchSize, session })
.toArray();
if (documents.length === 0) {
return 0;
}
// Add archive metadata
const now = new Date();
const documentsToArchive = documents.map(doc => ({
...doc,
archived_at: now,
archived_from: sourceCollection,
}));
// Insert into archive collection
await archiveCol.insertMany(documentsToArchive, { session });
// Remove from source collection
const ids = documents.map(doc => doc._id);
const deleteResult = await collection.deleteMany({ _id: { $in: ids } }, { session });
return deleteResult.deletedCount || 0;
});
totalArchived += batchArchived;
if (batchArchived === 0) {
break;
}
this.logger.debug(`Archived batch of ${batchArchived} documents`);
}
this.logger.info(
`Archived ${totalArchived} documents from ${sourceCollection} to ${archiveCollection}`
);
return totalArchived;
}
}

View file

@ -1,215 +1,215 @@
import * as yup from 'yup';
import type { ObjectId } from 'mongodb';
/**
* MongoDB Client Configuration
*/
export interface MongoDBClientConfig {
host: string;
port: number;
database: string;
username?: string;
password?: string;
authSource?: string;
uri?: string;
poolSettings?: {
maxPoolSize: number;
minPoolSize: number;
maxIdleTime: number;
};
timeouts?: {
connectTimeout: number;
socketTimeout: number;
serverSelectionTimeout: number;
};
tls?: {
enabled: boolean;
insecure: boolean;
caFile?: string;
};
options?: {
retryWrites: boolean;
journal: boolean;
readPreference: 'primary' | 'primaryPreferred' | 'secondary' | 'secondaryPreferred' | 'nearest';
writeConcern: string;
};
}
/**
* MongoDB Connection Options
*/
export interface MongoDBConnectionOptions {
retryAttempts?: number;
retryDelay?: number;
healthCheckInterval?: number;
}
/**
* Health Status Types
*/
export type MongoDBHealthStatus = 'healthy' | 'degraded' | 'unhealthy';
export interface MongoDBHealthCheck {
status: MongoDBHealthStatus;
timestamp: Date;
latency: number;
connections: {
active: number;
available: number;
total: number;
};
errors?: string[];
}
export interface MongoDBMetrics {
operationsPerSecond: number;
averageLatency: number;
errorRate: number;
connectionPoolUtilization: number;
documentsProcessed: number;
}
/**
* Collection Names
*/
export type CollectionNames =
| 'sentiment_data'
| 'raw_documents'
| 'news_articles'
| 'sec_filings'
| 'earnings_transcripts'
| 'analyst_reports'
| 'social_media_posts'
| 'market_events'
| 'economic_indicators';
/**
* Base Document Interface
*/
export interface DocumentBase {
_id?: ObjectId;
created_at: Date;
updated_at: Date;
source: string;
metadata?: Record<string, any>;
}
/**
* Sentiment Data Document
*/
export interface SentimentData extends DocumentBase {
symbol: string;
sentiment_score: number;
sentiment_label: 'positive' | 'negative' | 'neutral';
confidence: number;
text: string;
source_type: 'reddit' | 'twitter' | 'news' | 'forums';
source_id: string;
timestamp: Date;
processed_at: Date;
language: string;
keywords: string[];
entities: Array<{
name: string;
type: string;
confidence: number;
}>;
}
/**
* Raw Document
*/
export interface RawDocument extends DocumentBase {
document_type: 'html' | 'pdf' | 'text' | 'json' | 'xml';
content: string;
content_hash: string;
url?: string;
title?: string;
author?: string;
published_date?: Date;
extracted_text?: string;
processing_status: 'pending' | 'processed' | 'failed';
size_bytes: number;
language?: string;
}
/**
* News Article
*/
export interface NewsArticle extends DocumentBase {
headline: string;
content: string;
summary?: string;
author: string;
publication: string;
published_date: Date;
url: string;
symbols: string[];
categories: string[];
sentiment_score?: number;
relevance_score?: number;
image_url?: string;
tags: string[];
}
/**
* SEC Filing
*/
export interface SecFiling extends DocumentBase {
cik: string;
accession_number: string;
filing_type: string;
company_name: string;
symbols: string[];
filing_date: Date;
period_end_date: Date;
url: string;
content: string;
extracted_data?: Record<string, any>;
financial_statements?: Array<{
statement_type: string;
data: Record<string, number>;
}>;
processing_status: 'pending' | 'processed' | 'failed';
}
/**
* Earnings Transcript
*/
export interface EarningsTranscript extends DocumentBase {
symbol: string;
company_name: string;
quarter: string;
year: number;
call_date: Date;
transcript: string;
participants: Array<{
name: string;
title: string;
type: 'executive' | 'analyst';
}>;
key_topics: string[];
sentiment_analysis?: {
overall_sentiment: number;
topic_sentiments: Record<string, number>;
};
financial_highlights?: Record<string, number>;
}
/**
* Analyst Report
*/
export interface AnalystReport extends DocumentBase {
symbol: string;
analyst_firm: string;
analyst_name: string;
report_title: string;
report_date: Date;
rating: 'buy' | 'hold' | 'sell' | 'strong_buy' | 'strong_sell';
price_target?: number;
previous_rating?: string;
content: string;
summary: string;
key_points: string[];
financial_projections?: Record<string, number>;
}
import type { ObjectId } from 'mongodb';
import * as yup from 'yup';
/**
* MongoDB Client Configuration
*/
export interface MongoDBClientConfig {
host: string;
port: number;
database: string;
username?: string;
password?: string;
authSource?: string;
uri?: string;
poolSettings?: {
maxPoolSize: number;
minPoolSize: number;
maxIdleTime: number;
};
timeouts?: {
connectTimeout: number;
socketTimeout: number;
serverSelectionTimeout: number;
};
tls?: {
enabled: boolean;
insecure: boolean;
caFile?: string;
};
options?: {
retryWrites: boolean;
journal: boolean;
readPreference: 'primary' | 'primaryPreferred' | 'secondary' | 'secondaryPreferred' | 'nearest';
writeConcern: string;
};
}
/**
* MongoDB Connection Options
*/
export interface MongoDBConnectionOptions {
retryAttempts?: number;
retryDelay?: number;
healthCheckInterval?: number;
}
/**
* Health Status Types
*/
export type MongoDBHealthStatus = 'healthy' | 'degraded' | 'unhealthy';
export interface MongoDBHealthCheck {
status: MongoDBHealthStatus;
timestamp: Date;
latency: number;
connections: {
active: number;
available: number;
total: number;
};
errors?: string[];
}
export interface MongoDBMetrics {
operationsPerSecond: number;
averageLatency: number;
errorRate: number;
connectionPoolUtilization: number;
documentsProcessed: number;
}
/**
* Collection Names
*/
export type CollectionNames =
| 'sentiment_data'
| 'raw_documents'
| 'news_articles'
| 'sec_filings'
| 'earnings_transcripts'
| 'analyst_reports'
| 'social_media_posts'
| 'market_events'
| 'economic_indicators';
/**
* Base Document Interface
*/
export interface DocumentBase {
_id?: ObjectId;
created_at: Date;
updated_at: Date;
source: string;
metadata?: Record<string, any>;
}
/**
* Sentiment Data Document
*/
export interface SentimentData extends DocumentBase {
symbol: string;
sentiment_score: number;
sentiment_label: 'positive' | 'negative' | 'neutral';
confidence: number;
text: string;
source_type: 'reddit' | 'twitter' | 'news' | 'forums';
source_id: string;
timestamp: Date;
processed_at: Date;
language: string;
keywords: string[];
entities: Array<{
name: string;
type: string;
confidence: number;
}>;
}
/**
* Raw Document
*/
export interface RawDocument extends DocumentBase {
document_type: 'html' | 'pdf' | 'text' | 'json' | 'xml';
content: string;
content_hash: string;
url?: string;
title?: string;
author?: string;
published_date?: Date;
extracted_text?: string;
processing_status: 'pending' | 'processed' | 'failed';
size_bytes: number;
language?: string;
}
/**
* News Article
*/
export interface NewsArticle extends DocumentBase {
headline: string;
content: string;
summary?: string;
author: string;
publication: string;
published_date: Date;
url: string;
symbols: string[];
categories: string[];
sentiment_score?: number;
relevance_score?: number;
image_url?: string;
tags: string[];
}
/**
* SEC Filing
*/
export interface SecFiling extends DocumentBase {
cik: string;
accession_number: string;
filing_type: string;
company_name: string;
symbols: string[];
filing_date: Date;
period_end_date: Date;
url: string;
content: string;
extracted_data?: Record<string, any>;
financial_statements?: Array<{
statement_type: string;
data: Record<string, number>;
}>;
processing_status: 'pending' | 'processed' | 'failed';
}
/**
* Earnings Transcript
*/
export interface EarningsTranscript extends DocumentBase {
symbol: string;
company_name: string;
quarter: string;
year: number;
call_date: Date;
transcript: string;
participants: Array<{
name: string;
title: string;
type: 'executive' | 'analyst';
}>;
key_topics: string[];
sentiment_analysis?: {
overall_sentiment: number;
topic_sentiments: Record<string, number>;
};
financial_highlights?: Record<string, number>;
}
/**
* Analyst Report
*/
export interface AnalystReport extends DocumentBase {
symbol: string;
analyst_firm: string;
analyst_name: string;
report_title: string;
report_date: Date;
rating: 'buy' | 'hold' | 'sell' | 'strong_buy' | 'strong_sell';
price_target?: number;
previous_rating?: string;
content: string;
summary: string;
key_points: string[];
financial_projections?: Record<string, number>;
}