// MongoDB Initialization Script for Trading Bot // This script creates collections and indexes for sentiment and document storage // Switch to the trading_documents database db = db.getSiblingDB('trading_documents'); // Create collections with validation schemas // Sentiment Analysis Collection db.createCollection('sentiment_analysis', { validator: { $jsonSchema: { bsonType: 'object', required: ['symbol', 'source', 'timestamp', 'sentiment_score'], properties: { symbol: { bsonType: 'string', description: 'Stock symbol (e.g., AAPL, GOOGL)' }, source: { bsonType: 'string', description: 'Data source (news, social, earnings_call, etc.)' }, timestamp: { bsonType: 'date', description: 'When the sentiment was recorded' }, sentiment_score: { bsonType: 'double', minimum: -1.0, maximum: 1.0, description: 'Sentiment score between -1 (negative) and 1 (positive)' }, confidence: { bsonType: 'double', minimum: 0.0, maximum: 1.0, description: 'Confidence level of the sentiment analysis' }, text_snippet: { bsonType: 'string', description: 'Original text that was analyzed' }, metadata: { bsonType: 'object', description: 'Additional metadata about the sentiment source' } } } } }); // Raw Documents Collection (for news articles, social media posts, etc.) db.createCollection('raw_documents', { validator: { $jsonSchema: { bsonType: 'object', required: ['source', 'document_type', 'timestamp', 'content'], properties: { source: { bsonType: 'string', description: 'Document source (news_api, twitter, reddit, etc.)' }, document_type: { bsonType: 'string', enum: ['news_article', 'social_post', 'earnings_transcript', 'research_report', 'press_release'], description: 'Type of document' }, timestamp: { bsonType: 'date', description: 'When the document was created/published' }, symbols: { bsonType: 'array', items: { bsonType: 'string' }, description: 'Array of stock symbols mentioned in the document' }, title: { bsonType: 'string', description: 'Document title or headline' }, content: { bsonType: 'string', description: 'Full document content' }, url: { bsonType: 'string', description: 'Original URL of the document' }, author: { bsonType: 'string', description: 'Document author or source account' }, processed: { bsonType: 'bool', description: 'Whether this document has been processed for sentiment' }, metadata: { bsonType: 'object', description: 'Additional document metadata' } } } } }); // Market Events Collection (for significant market events and their impact) db.createCollection('market_events', { validator: { $jsonSchema: { bsonType: 'object', required: ['event_type', 'timestamp', 'description'], properties: { event_type: { bsonType: 'string', enum: ['earnings', 'merger', 'acquisition', 'ipo', 'dividend', 'split', 'regulatory', 'economic_indicator'], description: 'Type of market event' }, timestamp: { bsonType: 'date', description: 'When the event occurred or was announced' }, symbols: { bsonType: 'array', items: { bsonType: 'string' }, description: 'Stock symbols affected by this event' }, description: { bsonType: 'string', description: 'Event description' }, impact_score: { bsonType: 'double', minimum: -5.0, maximum: 5.0, description: 'Expected market impact score' }, source_documents: { bsonType: 'array', items: { bsonType: 'objectId' }, description: 'References to raw_documents that reported this event' } } } } }); // Create indexes for efficient querying // Sentiment Analysis indexes db.sentiment_analysis.createIndex({ symbol: 1, timestamp: -1 }); db.sentiment_analysis.createIndex({ source: 1, timestamp: -1 }); db.sentiment_analysis.createIndex({ timestamp: -1 }); db.sentiment_analysis.createIndex({ symbol: 1, source: 1, timestamp: -1 }); // Raw Documents indexes db.raw_documents.createIndex({ symbols: 1, timestamp: -1 }); db.raw_documents.createIndex({ source: 1, timestamp: -1 }); db.raw_documents.createIndex({ document_type: 1, timestamp: -1 }); db.raw_documents.createIndex({ processed: 1, timestamp: -1 }); db.raw_documents.createIndex({ timestamp: -1 }); // Market Events indexes db.market_events.createIndex({ symbols: 1, timestamp: -1 }); db.market_events.createIndex({ event_type: 1, timestamp: -1 }); db.market_events.createIndex({ timestamp: -1 }); // Insert some sample data for testing // Sample sentiment data db.sentiment_analysis.insertMany([ { symbol: 'AAPL', source: 'news_analysis', timestamp: new Date(), sentiment_score: 0.75, confidence: 0.89, text_snippet: 'Apple reports strong quarterly earnings...', metadata: { article_id: 'news_001', provider: 'financial_news_api' } }, { symbol: 'GOOGL', source: 'social_media', timestamp: new Date(), sentiment_score: -0.25, confidence: 0.67, text_snippet: 'Concerns about Google AI regulation...', metadata: { platform: 'twitter', engagement_score: 450 } } ]); // Sample raw document db.raw_documents.insertOne({ source: 'financial_news_api', document_type: 'news_article', timestamp: new Date(), symbols: ['AAPL', 'MSFT'], title: 'Tech Giants Show Strong Q4 Performance', content: 'Apple and Microsoft both reported better than expected earnings for Q4...', url: 'https://example.com/tech-earnings-q4', author: 'Financial Reporter', processed: true, metadata: { word_count: 850, readability_score: 0.75 } }); // Sample market event db.market_events.insertOne({ event_type: 'earnings', timestamp: new Date(), symbols: ['AAPL'], description: 'Apple Q4 2024 Earnings Report', impact_score: 2.5, source_documents: [] }); print('MongoDB initialization completed successfully!'); print('Created collections: sentiment_analysis, raw_documents, market_events'); print('Created indexes for efficient querying'); print('Inserted sample data for testing');