234 lines
7.6 KiB
JavaScript
234 lines
7.6 KiB
JavaScript
// MongoDB Initialization Script for Trading Bot
|
|
// This script creates collections and indexes for sentiment and document storage
|
|
|
|
// Switch to the trading_documents database
|
|
db = db.getSiblingDB('stock');
|
|
|
|
// // Create collections with validation schemas
|
|
|
|
// Sentiment Analysis Collection
|
|
// db.createCollection('sentiment_analysis', {
|
|
// validator: {
|
|
// $jsonSchema: {
|
|
// bsonType: 'object',
|
|
// required: ['symbol', 'source', 'timestamp', 'sentiment_score'],
|
|
// properties: {
|
|
// symbol: {
|
|
// bsonType: 'string',
|
|
// description: 'Stock symbol (e.g., AAPL, GOOGL)'
|
|
// },
|
|
// source: {
|
|
// bsonType: 'string',
|
|
// description: 'Data source (news, social, earnings_call, etc.)'
|
|
// },
|
|
// timestamp: {
|
|
// bsonType: 'date',
|
|
// description: 'When the sentiment was recorded'
|
|
// },
|
|
// sentiment_score: {
|
|
// bsonType: 'double',
|
|
// minimum: -1.0,
|
|
// maximum: 1.0,
|
|
// description: 'Sentiment score between -1 (negative) and 1 (positive)'
|
|
// },
|
|
// confidence: {
|
|
// bsonType: 'double',
|
|
// minimum: 0.0,
|
|
// maximum: 1.0,
|
|
// description: 'Confidence level of the sentiment analysis'
|
|
// },
|
|
// text_snippet: {
|
|
// bsonType: 'string',
|
|
// description: 'Original text that was analyzed'
|
|
// },
|
|
// metadata: {
|
|
// bsonType: 'object',
|
|
// description: 'Additional metadata about the sentiment source'
|
|
// }
|
|
// }
|
|
// }
|
|
// }
|
|
// });
|
|
|
|
// // Raw Documents Collection (for news articles, social media posts, etc.)
|
|
// db.createCollection('raw_documents', {
|
|
// validator: {
|
|
// $jsonSchema: {
|
|
// bsonType: 'object',
|
|
// required: ['source', 'document_type', 'timestamp', 'content'],
|
|
// properties: {
|
|
// source: {
|
|
// bsonType: 'string',
|
|
// description: 'Document source (news_api, twitter, reddit, etc.)'
|
|
// },
|
|
// document_type: {
|
|
// bsonType: 'string',
|
|
// enum: ['news_article', 'social_post', 'earnings_transcript', 'research_report', 'press_release'],
|
|
// description: 'Type of document'
|
|
// },
|
|
// timestamp: {
|
|
// bsonType: 'date',
|
|
// description: 'When the document was created/published'
|
|
// },
|
|
// symbols: {
|
|
// bsonType: 'array',
|
|
// items: {
|
|
// bsonType: 'string'
|
|
// },
|
|
// description: 'Array of stock symbols mentioned in the document'
|
|
// },
|
|
// title: {
|
|
// bsonType: 'string',
|
|
// description: 'Document title or headline'
|
|
// },
|
|
// content: {
|
|
// bsonType: 'string',
|
|
// description: 'Full document content'
|
|
// },
|
|
// url: {
|
|
// bsonType: 'string',
|
|
// description: 'Original URL of the document'
|
|
// },
|
|
// author: {
|
|
// bsonType: 'string',
|
|
// description: 'Document author or source account'
|
|
// },
|
|
// processed: {
|
|
// bsonType: 'bool',
|
|
// description: 'Whether this document has been processed for sentiment'
|
|
// },
|
|
// metadata: {
|
|
// bsonType: 'object',
|
|
// description: 'Additional document metadata'
|
|
// }
|
|
// }
|
|
// }
|
|
// }
|
|
// });
|
|
|
|
// // Market Events Collection (for significant market events and their impact)
|
|
// db.createCollection('market_events', {
|
|
// validator: {
|
|
// $jsonSchema: {
|
|
// bsonType: 'object',
|
|
// required: ['event_type', 'timestamp', 'description'],
|
|
// properties: {
|
|
// event_type: {
|
|
// bsonType: 'string',
|
|
// enum: ['earnings', 'merger', 'acquisition', 'ipo', 'dividend', 'split', 'regulatory', 'economic_indicator'],
|
|
// description: 'Type of market event'
|
|
// },
|
|
// timestamp: {
|
|
// bsonType: 'date',
|
|
// description: 'When the event occurred or was announced'
|
|
// },
|
|
// symbols: {
|
|
// bsonType: 'array',
|
|
// items: {
|
|
// bsonType: 'string'
|
|
// },
|
|
// description: 'Stock symbols affected by this event'
|
|
// },
|
|
// description: {
|
|
// bsonType: 'string',
|
|
// description: 'Event description'
|
|
// },
|
|
// impact_score: {
|
|
// bsonType: 'double',
|
|
// minimum: -5.0,
|
|
// maximum: 5.0,
|
|
// description: 'Expected market impact score'
|
|
// },
|
|
// source_documents: {
|
|
// bsonType: 'array',
|
|
// items: {
|
|
// bsonType: 'objectId'
|
|
// },
|
|
// description: 'References to raw_documents that reported this event'
|
|
// }
|
|
// }
|
|
// }
|
|
// }
|
|
// });
|
|
|
|
// // Create indexes for efficient querying
|
|
|
|
// // Sentiment Analysis indexes
|
|
// db.sentiment_analysis.createIndex({ symbol: 1, timestamp: -1 });
|
|
// db.sentiment_analysis.createIndex({ source: 1, timestamp: -1 });
|
|
// db.sentiment_analysis.createIndex({ timestamp: -1 });
|
|
// db.sentiment_analysis.createIndex({ symbol: 1, source: 1, timestamp: -1 });
|
|
|
|
// // Raw Documents indexes
|
|
// db.raw_documents.createIndex({ symbols: 1, timestamp: -1 });
|
|
// db.raw_documents.createIndex({ source: 1, timestamp: -1 });
|
|
// db.raw_documents.createIndex({ document_type: 1, timestamp: -1 });
|
|
// db.raw_documents.createIndex({ processed: 1, timestamp: -1 });
|
|
// db.raw_documents.createIndex({ timestamp: -1 });
|
|
|
|
// // Market Events indexes
|
|
// db.market_events.createIndex({ symbols: 1, timestamp: -1 });
|
|
// db.market_events.createIndex({ event_type: 1, timestamp: -1 });
|
|
// db.market_events.createIndex({ timestamp: -1 });
|
|
|
|
// // Insert some sample data for testing
|
|
|
|
// // Sample sentiment data
|
|
// db.sentiment_analysis.insertMany([
|
|
// {
|
|
// symbol: 'AAPL',
|
|
// source: 'news_analysis',
|
|
// timestamp: new Date(),
|
|
// sentiment_score: 0.75,
|
|
// confidence: 0.89,
|
|
// text_snippet: 'Apple reports strong quarterly earnings...',
|
|
// metadata: {
|
|
// article_id: 'news_001',
|
|
// provider: 'financial_news_api'
|
|
// }
|
|
// },
|
|
// {
|
|
// symbol: 'GOOGL',
|
|
// source: 'social_media',
|
|
// timestamp: new Date(),
|
|
// sentiment_score: -0.25,
|
|
// confidence: 0.67,
|
|
// text_snippet: 'Concerns about Google AI regulation...',
|
|
// metadata: {
|
|
// platform: 'twitter',
|
|
// engagement_score: 450
|
|
// }
|
|
// }
|
|
// ]);
|
|
|
|
// // Sample raw document
|
|
// db.raw_documents.insertOne({
|
|
// source: 'financial_news_api',
|
|
// document_type: 'news_article',
|
|
// timestamp: new Date(),
|
|
// symbols: ['AAPL', 'MSFT'],
|
|
// title: 'Tech Giants Show Strong Q4 Performance',
|
|
// content: 'Apple and Microsoft both reported better than expected earnings for Q4...',
|
|
// url: 'https://example.com/tech-earnings-q4',
|
|
// author: 'Financial Reporter',
|
|
// processed: true,
|
|
// metadata: {
|
|
// word_count: 850,
|
|
// readability_score: 0.75
|
|
// }
|
|
// });
|
|
|
|
// // Sample market event
|
|
// db.market_events.insertOne({
|
|
// event_type: 'earnings',
|
|
// timestamp: new Date(),
|
|
// symbols: ['AAPL'],
|
|
// description: 'Apple Q4 2024 Earnings Report',
|
|
// impact_score: 2.5,
|
|
// source_documents: []
|
|
// });
|
|
|
|
print('MongoDB initialization completed successfully!');
|
|
print('Created collections: sentiment_analysis, raw_documents, market_events');
|
|
print('Created indexes for efficient querying');
|
|
print('Inserted sample data for testing');
|