234 lines
6.8 KiB
JavaScript
234 lines
6.8 KiB
JavaScript
// MongoDB Initialization Script for Trading Bot
|
|
// This script creates collections and indexes for sentiment and document storage
|
|
|
|
// Switch to the trading_documents database
|
|
db = db.getSiblingDB('trading_documents');
|
|
|
|
// Create collections with validation schemas
|
|
|
|
// Sentiment Analysis Collection
|
|
db.createCollection('sentiment_analysis', {
|
|
validator: {
|
|
$jsonSchema: {
|
|
bsonType: 'object',
|
|
required: ['symbol', 'source', 'timestamp', 'sentiment_score'],
|
|
properties: {
|
|
symbol: {
|
|
bsonType: 'string',
|
|
description: 'Stock symbol (e.g., AAPL, GOOGL)'
|
|
},
|
|
source: {
|
|
bsonType: 'string',
|
|
description: 'Data source (news, social, earnings_call, etc.)'
|
|
},
|
|
timestamp: {
|
|
bsonType: 'date',
|
|
description: 'When the sentiment was recorded'
|
|
},
|
|
sentiment_score: {
|
|
bsonType: 'double',
|
|
minimum: -1.0,
|
|
maximum: 1.0,
|
|
description: 'Sentiment score between -1 (negative) and 1 (positive)'
|
|
},
|
|
confidence: {
|
|
bsonType: 'double',
|
|
minimum: 0.0,
|
|
maximum: 1.0,
|
|
description: 'Confidence level of the sentiment analysis'
|
|
},
|
|
text_snippet: {
|
|
bsonType: 'string',
|
|
description: 'Original text that was analyzed'
|
|
},
|
|
metadata: {
|
|
bsonType: 'object',
|
|
description: 'Additional metadata about the sentiment source'
|
|
}
|
|
}
|
|
}
|
|
}
|
|
});
|
|
|
|
// Raw Documents Collection (for news articles, social media posts, etc.)
|
|
db.createCollection('raw_documents', {
|
|
validator: {
|
|
$jsonSchema: {
|
|
bsonType: 'object',
|
|
required: ['source', 'document_type', 'timestamp', 'content'],
|
|
properties: {
|
|
source: {
|
|
bsonType: 'string',
|
|
description: 'Document source (news_api, twitter, reddit, etc.)'
|
|
},
|
|
document_type: {
|
|
bsonType: 'string',
|
|
enum: ['news_article', 'social_post', 'earnings_transcript', 'research_report', 'press_release'],
|
|
description: 'Type of document'
|
|
},
|
|
timestamp: {
|
|
bsonType: 'date',
|
|
description: 'When the document was created/published'
|
|
},
|
|
symbols: {
|
|
bsonType: 'array',
|
|
items: {
|
|
bsonType: 'string'
|
|
},
|
|
description: 'Array of stock symbols mentioned in the document'
|
|
},
|
|
title: {
|
|
bsonType: 'string',
|
|
description: 'Document title or headline'
|
|
},
|
|
content: {
|
|
bsonType: 'string',
|
|
description: 'Full document content'
|
|
},
|
|
url: {
|
|
bsonType: 'string',
|
|
description: 'Original URL of the document'
|
|
},
|
|
author: {
|
|
bsonType: 'string',
|
|
description: 'Document author or source account'
|
|
},
|
|
processed: {
|
|
bsonType: 'bool',
|
|
description: 'Whether this document has been processed for sentiment'
|
|
},
|
|
metadata: {
|
|
bsonType: 'object',
|
|
description: 'Additional document metadata'
|
|
}
|
|
}
|
|
}
|
|
}
|
|
});
|
|
|
|
// Market Events Collection (for significant market events and their impact)
|
|
db.createCollection('market_events', {
|
|
validator: {
|
|
$jsonSchema: {
|
|
bsonType: 'object',
|
|
required: ['event_type', 'timestamp', 'description'],
|
|
properties: {
|
|
event_type: {
|
|
bsonType: 'string',
|
|
enum: ['earnings', 'merger', 'acquisition', 'ipo', 'dividend', 'split', 'regulatory', 'economic_indicator'],
|
|
description: 'Type of market event'
|
|
},
|
|
timestamp: {
|
|
bsonType: 'date',
|
|
description: 'When the event occurred or was announced'
|
|
},
|
|
symbols: {
|
|
bsonType: 'array',
|
|
items: {
|
|
bsonType: 'string'
|
|
},
|
|
description: 'Stock symbols affected by this event'
|
|
},
|
|
description: {
|
|
bsonType: 'string',
|
|
description: 'Event description'
|
|
},
|
|
impact_score: {
|
|
bsonType: 'double',
|
|
minimum: -5.0,
|
|
maximum: 5.0,
|
|
description: 'Expected market impact score'
|
|
},
|
|
source_documents: {
|
|
bsonType: 'array',
|
|
items: {
|
|
bsonType: 'objectId'
|
|
},
|
|
description: 'References to raw_documents that reported this event'
|
|
}
|
|
}
|
|
}
|
|
}
|
|
});
|
|
|
|
// Create indexes for efficient querying
|
|
|
|
// Sentiment Analysis indexes
|
|
db.sentiment_analysis.createIndex({ symbol: 1, timestamp: -1 });
|
|
db.sentiment_analysis.createIndex({ source: 1, timestamp: -1 });
|
|
db.sentiment_analysis.createIndex({ timestamp: -1 });
|
|
db.sentiment_analysis.createIndex({ symbol: 1, source: 1, timestamp: -1 });
|
|
|
|
// Raw Documents indexes
|
|
db.raw_documents.createIndex({ symbols: 1, timestamp: -1 });
|
|
db.raw_documents.createIndex({ source: 1, timestamp: -1 });
|
|
db.raw_documents.createIndex({ document_type: 1, timestamp: -1 });
|
|
db.raw_documents.createIndex({ processed: 1, timestamp: -1 });
|
|
db.raw_documents.createIndex({ timestamp: -1 });
|
|
|
|
// Market Events indexes
|
|
db.market_events.createIndex({ symbols: 1, timestamp: -1 });
|
|
db.market_events.createIndex({ event_type: 1, timestamp: -1 });
|
|
db.market_events.createIndex({ timestamp: -1 });
|
|
|
|
// Insert some sample data for testing
|
|
|
|
// Sample sentiment data
|
|
db.sentiment_analysis.insertMany([
|
|
{
|
|
symbol: 'AAPL',
|
|
source: 'news_analysis',
|
|
timestamp: new Date(),
|
|
sentiment_score: 0.75,
|
|
confidence: 0.89,
|
|
text_snippet: 'Apple reports strong quarterly earnings...',
|
|
metadata: {
|
|
article_id: 'news_001',
|
|
provider: 'financial_news_api'
|
|
}
|
|
},
|
|
{
|
|
symbol: 'GOOGL',
|
|
source: 'social_media',
|
|
timestamp: new Date(),
|
|
sentiment_score: -0.25,
|
|
confidence: 0.67,
|
|
text_snippet: 'Concerns about Google AI regulation...',
|
|
metadata: {
|
|
platform: 'twitter',
|
|
engagement_score: 450
|
|
}
|
|
}
|
|
]);
|
|
|
|
// Sample raw document
|
|
db.raw_documents.insertOne({
|
|
source: 'financial_news_api',
|
|
document_type: 'news_article',
|
|
timestamp: new Date(),
|
|
symbols: ['AAPL', 'MSFT'],
|
|
title: 'Tech Giants Show Strong Q4 Performance',
|
|
content: 'Apple and Microsoft both reported better than expected earnings for Q4...',
|
|
url: 'https://example.com/tech-earnings-q4',
|
|
author: 'Financial Reporter',
|
|
processed: true,
|
|
metadata: {
|
|
word_count: 850,
|
|
readability_score: 0.75
|
|
}
|
|
});
|
|
|
|
// Sample market event
|
|
db.market_events.insertOne({
|
|
event_type: 'earnings',
|
|
timestamp: new Date(),
|
|
symbols: ['AAPL'],
|
|
description: 'Apple Q4 2024 Earnings Report',
|
|
impact_score: 2.5,
|
|
source_documents: []
|
|
});
|
|
|
|
print('MongoDB initialization completed successfully!');
|
|
print('Created collections: sentiment_analysis, raw_documents, market_events');
|
|
print('Created indexes for efficient querying');
|
|
print('Inserted sample data for testing');
|