// MongoDB Initialization Script for Trading Bot // This script creates collections and indexes for sentiment and document storage // Switch to the trading_documents database db = db.getSiblingDB('stock'); // // Create collections with validation schemas // Sentiment Analysis Collection // db.createCollection('sentiment_analysis', { // validator: { // $jsonSchema: { // bsonType: 'object', // required: ['symbol', 'source', 'timestamp', 'sentiment_score'], // properties: { // symbol: { // bsonType: 'string', // description: 'Stock symbol (e.g., AAPL, GOOGL)' // }, // source: { // bsonType: 'string', // description: 'Data source (news, social, earnings_call, etc.)' // }, // timestamp: { // bsonType: 'date', // description: 'When the sentiment was recorded' // }, // sentiment_score: { // bsonType: 'double', // minimum: -1.0, // maximum: 1.0, // description: 'Sentiment score between -1 (negative) and 1 (positive)' // }, // confidence: { // bsonType: 'double', // minimum: 0.0, // maximum: 1.0, // description: 'Confidence level of the sentiment analysis' // }, // text_snippet: { // bsonType: 'string', // description: 'Original text that was analyzed' // }, // metadata: { // bsonType: 'object', // description: 'Additional metadata about the sentiment source' // } // } // } // } // }); // // Raw Documents Collection (for news articles, social media posts, etc.) // db.createCollection('raw_documents', { // validator: { // $jsonSchema: { // bsonType: 'object', // required: ['source', 'document_type', 'timestamp', 'content'], // properties: { // source: { // bsonType: 'string', // description: 'Document source (news_api, twitter, reddit, etc.)' // }, // document_type: { // bsonType: 'string', // enum: ['news_article', 'social_post', 'earnings_transcript', 'research_report', 'press_release'], // description: 'Type of document' // }, // timestamp: { // bsonType: 'date', // description: 'When the document was created/published' // }, // symbols: { // bsonType: 'array', // items: { // bsonType: 'string' // }, // description: 'Array of stock symbols mentioned in the document' // }, // title: { // bsonType: 'string', // description: 'Document title or headline' // }, // content: { // bsonType: 'string', // description: 'Full document content' // }, // url: { // bsonType: 'string', // description: 'Original URL of the document' // }, // author: { // bsonType: 'string', // description: 'Document author or source account' // }, // processed: { // bsonType: 'bool', // description: 'Whether this document has been processed for sentiment' // }, // metadata: { // bsonType: 'object', // description: 'Additional document metadata' // } // } // } // } // }); // // Market Events Collection (for significant market events and their impact) // db.createCollection('market_events', { // validator: { // $jsonSchema: { // bsonType: 'object', // required: ['event_type', 'timestamp', 'description'], // properties: { // event_type: { // bsonType: 'string', // enum: ['earnings', 'merger', 'acquisition', 'ipo', 'dividend', 'split', 'regulatory', 'economic_indicator'], // description: 'Type of market event' // }, // timestamp: { // bsonType: 'date', // description: 'When the event occurred or was announced' // }, // symbols: { // bsonType: 'array', // items: { // bsonType: 'string' // }, // description: 'Stock symbols affected by this event' // }, // description: { // bsonType: 'string', // description: 'Event description' // }, // impact_score: { // bsonType: 'double', // minimum: -5.0, // maximum: 5.0, // description: 'Expected market impact score' // }, // source_documents: { // bsonType: 'array', // items: { // bsonType: 'objectId' // }, // description: 'References to raw_documents that reported this event' // } // } // } // } // }); // // Create indexes for efficient querying // // Sentiment Analysis indexes // db.sentiment_analysis.createIndex({ symbol: 1, timestamp: -1 }); // db.sentiment_analysis.createIndex({ source: 1, timestamp: -1 }); // db.sentiment_analysis.createIndex({ timestamp: -1 }); // db.sentiment_analysis.createIndex({ symbol: 1, source: 1, timestamp: -1 }); // // Raw Documents indexes // db.raw_documents.createIndex({ symbols: 1, timestamp: -1 }); // db.raw_documents.createIndex({ source: 1, timestamp: -1 }); // db.raw_documents.createIndex({ document_type: 1, timestamp: -1 }); // db.raw_documents.createIndex({ processed: 1, timestamp: -1 }); // db.raw_documents.createIndex({ timestamp: -1 }); // // Market Events indexes // db.market_events.createIndex({ symbols: 1, timestamp: -1 }); // db.market_events.createIndex({ event_type: 1, timestamp: -1 }); // db.market_events.createIndex({ timestamp: -1 }); // // Insert some sample data for testing // // Sample sentiment data // db.sentiment_analysis.insertMany([ // { // symbol: 'AAPL', // source: 'news_analysis', // timestamp: new Date(), // sentiment_score: 0.75, // confidence: 0.89, // text_snippet: 'Apple reports strong quarterly earnings...', // metadata: { // article_id: 'news_001', // provider: 'financial_news_api' // } // }, // { // symbol: 'GOOGL', // source: 'social_media', // timestamp: new Date(), // sentiment_score: -0.25, // confidence: 0.67, // text_snippet: 'Concerns about Google AI regulation...', // metadata: { // platform: 'twitter', // engagement_score: 450 // } // } // ]); // // Sample raw document // db.raw_documents.insertOne({ // source: 'financial_news_api', // document_type: 'news_article', // timestamp: new Date(), // symbols: ['AAPL', 'MSFT'], // title: 'Tech Giants Show Strong Q4 Performance', // content: 'Apple and Microsoft both reported better than expected earnings for Q4...', // url: 'https://example.com/tech-earnings-q4', // author: 'Financial Reporter', // processed: true, // metadata: { // word_count: 850, // readability_score: 0.75 // } // }); // // Sample market event // db.market_events.insertOne({ // event_type: 'earnings', // timestamp: new Date(), // symbols: ['AAPL'], // description: 'Apple Q4 2024 Earnings Report', // impact_score: 2.5, // source_documents: [] // }); print('MongoDB initialization completed successfully!'); print('Created collections: sentiment_analysis, raw_documents, market_events'); print('Created indexes for efficient querying'); print('Inserted sample data for testing');