work on ib and cleanup

This commit is contained in:
Boki 2025-06-14 09:17:48 -04:00
parent a20a11c1aa
commit d686a72591
41 changed files with 601 additions and 2793 deletions

View file

@ -2,231 +2,231 @@
// This script creates collections and indexes for sentiment and document storage
// Switch to the trading_documents database
db = db.getSiblingDB('trading_documents');
db = db.getSiblingDB('stock');
// Create collections with validation schemas
// // Create collections with validation schemas
// Sentiment Analysis Collection
db.createCollection('sentiment_analysis', {
validator: {
$jsonSchema: {
bsonType: 'object',
required: ['symbol', 'source', 'timestamp', 'sentiment_score'],
properties: {
symbol: {
bsonType: 'string',
description: 'Stock symbol (e.g., AAPL, GOOGL)'
},
source: {
bsonType: 'string',
description: 'Data source (news, social, earnings_call, etc.)'
},
timestamp: {
bsonType: 'date',
description: 'When the sentiment was recorded'
},
sentiment_score: {
bsonType: 'double',
minimum: -1.0,
maximum: 1.0,
description: 'Sentiment score between -1 (negative) and 1 (positive)'
},
confidence: {
bsonType: 'double',
minimum: 0.0,
maximum: 1.0,
description: 'Confidence level of the sentiment analysis'
},
text_snippet: {
bsonType: 'string',
description: 'Original text that was analyzed'
},
metadata: {
bsonType: 'object',
description: 'Additional metadata about the sentiment source'
}
}
}
}
});
// db.createCollection('sentiment_analysis', {
// validator: {
// $jsonSchema: {
// bsonType: 'object',
// required: ['symbol', 'source', 'timestamp', 'sentiment_score'],
// properties: {
// symbol: {
// bsonType: 'string',
// description: 'Stock symbol (e.g., AAPL, GOOGL)'
// },
// source: {
// bsonType: 'string',
// description: 'Data source (news, social, earnings_call, etc.)'
// },
// timestamp: {
// bsonType: 'date',
// description: 'When the sentiment was recorded'
// },
// sentiment_score: {
// bsonType: 'double',
// minimum: -1.0,
// maximum: 1.0,
// description: 'Sentiment score between -1 (negative) and 1 (positive)'
// },
// confidence: {
// bsonType: 'double',
// minimum: 0.0,
// maximum: 1.0,
// description: 'Confidence level of the sentiment analysis'
// },
// text_snippet: {
// bsonType: 'string',
// description: 'Original text that was analyzed'
// },
// metadata: {
// bsonType: 'object',
// description: 'Additional metadata about the sentiment source'
// }
// }
// }
// }
// });
// Raw Documents Collection (for news articles, social media posts, etc.)
db.createCollection('raw_documents', {
validator: {
$jsonSchema: {
bsonType: 'object',
required: ['source', 'document_type', 'timestamp', 'content'],
properties: {
source: {
bsonType: 'string',
description: 'Document source (news_api, twitter, reddit, etc.)'
},
document_type: {
bsonType: 'string',
enum: ['news_article', 'social_post', 'earnings_transcript', 'research_report', 'press_release'],
description: 'Type of document'
},
timestamp: {
bsonType: 'date',
description: 'When the document was created/published'
},
symbols: {
bsonType: 'array',
items: {
bsonType: 'string'
},
description: 'Array of stock symbols mentioned in the document'
},
title: {
bsonType: 'string',
description: 'Document title or headline'
},
content: {
bsonType: 'string',
description: 'Full document content'
},
url: {
bsonType: 'string',
description: 'Original URL of the document'
},
author: {
bsonType: 'string',
description: 'Document author or source account'
},
processed: {
bsonType: 'bool',
description: 'Whether this document has been processed for sentiment'
},
metadata: {
bsonType: 'object',
description: 'Additional document metadata'
}
}
}
}
});
// // Raw Documents Collection (for news articles, social media posts, etc.)
// db.createCollection('raw_documents', {
// validator: {
// $jsonSchema: {
// bsonType: 'object',
// required: ['source', 'document_type', 'timestamp', 'content'],
// properties: {
// source: {
// bsonType: 'string',
// description: 'Document source (news_api, twitter, reddit, etc.)'
// },
// document_type: {
// bsonType: 'string',
// enum: ['news_article', 'social_post', 'earnings_transcript', 'research_report', 'press_release'],
// description: 'Type of document'
// },
// timestamp: {
// bsonType: 'date',
// description: 'When the document was created/published'
// },
// symbols: {
// bsonType: 'array',
// items: {
// bsonType: 'string'
// },
// description: 'Array of stock symbols mentioned in the document'
// },
// title: {
// bsonType: 'string',
// description: 'Document title or headline'
// },
// content: {
// bsonType: 'string',
// description: 'Full document content'
// },
// url: {
// bsonType: 'string',
// description: 'Original URL of the document'
// },
// author: {
// bsonType: 'string',
// description: 'Document author or source account'
// },
// processed: {
// bsonType: 'bool',
// description: 'Whether this document has been processed for sentiment'
// },
// metadata: {
// bsonType: 'object',
// description: 'Additional document metadata'
// }
// }
// }
// }
// });
// Market Events Collection (for significant market events and their impact)
db.createCollection('market_events', {
validator: {
$jsonSchema: {
bsonType: 'object',
required: ['event_type', 'timestamp', 'description'],
properties: {
event_type: {
bsonType: 'string',
enum: ['earnings', 'merger', 'acquisition', 'ipo', 'dividend', 'split', 'regulatory', 'economic_indicator'],
description: 'Type of market event'
},
timestamp: {
bsonType: 'date',
description: 'When the event occurred or was announced'
},
symbols: {
bsonType: 'array',
items: {
bsonType: 'string'
},
description: 'Stock symbols affected by this event'
},
description: {
bsonType: 'string',
description: 'Event description'
},
impact_score: {
bsonType: 'double',
minimum: -5.0,
maximum: 5.0,
description: 'Expected market impact score'
},
source_documents: {
bsonType: 'array',
items: {
bsonType: 'objectId'
},
description: 'References to raw_documents that reported this event'
}
}
}
}
});
// // Market Events Collection (for significant market events and their impact)
// db.createCollection('market_events', {
// validator: {
// $jsonSchema: {
// bsonType: 'object',
// required: ['event_type', 'timestamp', 'description'],
// properties: {
// event_type: {
// bsonType: 'string',
// enum: ['earnings', 'merger', 'acquisition', 'ipo', 'dividend', 'split', 'regulatory', 'economic_indicator'],
// description: 'Type of market event'
// },
// timestamp: {
// bsonType: 'date',
// description: 'When the event occurred or was announced'
// },
// symbols: {
// bsonType: 'array',
// items: {
// bsonType: 'string'
// },
// description: 'Stock symbols affected by this event'
// },
// description: {
// bsonType: 'string',
// description: 'Event description'
// },
// impact_score: {
// bsonType: 'double',
// minimum: -5.0,
// maximum: 5.0,
// description: 'Expected market impact score'
// },
// source_documents: {
// bsonType: 'array',
// items: {
// bsonType: 'objectId'
// },
// description: 'References to raw_documents that reported this event'
// }
// }
// }
// }
// });
// Create indexes for efficient querying
// // Create indexes for efficient querying
// Sentiment Analysis indexes
db.sentiment_analysis.createIndex({ symbol: 1, timestamp: -1 });
db.sentiment_analysis.createIndex({ source: 1, timestamp: -1 });
db.sentiment_analysis.createIndex({ timestamp: -1 });
db.sentiment_analysis.createIndex({ symbol: 1, source: 1, timestamp: -1 });
// // Sentiment Analysis indexes
// db.sentiment_analysis.createIndex({ symbol: 1, timestamp: -1 });
// db.sentiment_analysis.createIndex({ source: 1, timestamp: -1 });
// db.sentiment_analysis.createIndex({ timestamp: -1 });
// db.sentiment_analysis.createIndex({ symbol: 1, source: 1, timestamp: -1 });
// Raw Documents indexes
db.raw_documents.createIndex({ symbols: 1, timestamp: -1 });
db.raw_documents.createIndex({ source: 1, timestamp: -1 });
db.raw_documents.createIndex({ document_type: 1, timestamp: -1 });
db.raw_documents.createIndex({ processed: 1, timestamp: -1 });
db.raw_documents.createIndex({ timestamp: -1 });
// // Raw Documents indexes
// db.raw_documents.createIndex({ symbols: 1, timestamp: -1 });
// db.raw_documents.createIndex({ source: 1, timestamp: -1 });
// db.raw_documents.createIndex({ document_type: 1, timestamp: -1 });
// db.raw_documents.createIndex({ processed: 1, timestamp: -1 });
// db.raw_documents.createIndex({ timestamp: -1 });
// Market Events indexes
db.market_events.createIndex({ symbols: 1, timestamp: -1 });
db.market_events.createIndex({ event_type: 1, timestamp: -1 });
db.market_events.createIndex({ timestamp: -1 });
// // Market Events indexes
// db.market_events.createIndex({ symbols: 1, timestamp: -1 });
// db.market_events.createIndex({ event_type: 1, timestamp: -1 });
// db.market_events.createIndex({ timestamp: -1 });
// Insert some sample data for testing
// // Insert some sample data for testing
// Sample sentiment data
db.sentiment_analysis.insertMany([
{
symbol: 'AAPL',
source: 'news_analysis',
timestamp: new Date(),
sentiment_score: 0.75,
confidence: 0.89,
text_snippet: 'Apple reports strong quarterly earnings...',
metadata: {
article_id: 'news_001',
provider: 'financial_news_api'
}
},
{
symbol: 'GOOGL',
source: 'social_media',
timestamp: new Date(),
sentiment_score: -0.25,
confidence: 0.67,
text_snippet: 'Concerns about Google AI regulation...',
metadata: {
platform: 'twitter',
engagement_score: 450
}
}
]);
// // Sample sentiment data
// db.sentiment_analysis.insertMany([
// {
// symbol: 'AAPL',
// source: 'news_analysis',
// timestamp: new Date(),
// sentiment_score: 0.75,
// confidence: 0.89,
// text_snippet: 'Apple reports strong quarterly earnings...',
// metadata: {
// article_id: 'news_001',
// provider: 'financial_news_api'
// }
// },
// {
// symbol: 'GOOGL',
// source: 'social_media',
// timestamp: new Date(),
// sentiment_score: -0.25,
// confidence: 0.67,
// text_snippet: 'Concerns about Google AI regulation...',
// metadata: {
// platform: 'twitter',
// engagement_score: 450
// }
// }
// ]);
// Sample raw document
db.raw_documents.insertOne({
source: 'financial_news_api',
document_type: 'news_article',
timestamp: new Date(),
symbols: ['AAPL', 'MSFT'],
title: 'Tech Giants Show Strong Q4 Performance',
content: 'Apple and Microsoft both reported better than expected earnings for Q4...',
url: 'https://example.com/tech-earnings-q4',
author: 'Financial Reporter',
processed: true,
metadata: {
word_count: 850,
readability_score: 0.75
}
});
// // Sample raw document
// db.raw_documents.insertOne({
// source: 'financial_news_api',
// document_type: 'news_article',
// timestamp: new Date(),
// symbols: ['AAPL', 'MSFT'],
// title: 'Tech Giants Show Strong Q4 Performance',
// content: 'Apple and Microsoft both reported better than expected earnings for Q4...',
// url: 'https://example.com/tech-earnings-q4',
// author: 'Financial Reporter',
// processed: true,
// metadata: {
// word_count: 850,
// readability_score: 0.75
// }
// });
// Sample market event
db.market_events.insertOne({
event_type: 'earnings',
timestamp: new Date(),
symbols: ['AAPL'],
description: 'Apple Q4 2024 Earnings Report',
impact_score: 2.5,
source_documents: []
});
// // Sample market event
// db.market_events.insertOne({
// event_type: 'earnings',
// timestamp: new Date(),
// symbols: ['AAPL'],
// description: 'Apple Q4 2024 Earnings Report',
// impact_score: 2.5,
// source_documents: []
// });
print('MongoDB initialization completed successfully!');
print('Created collections: sentiment_analysis, raw_documents, market_events');

View file

View file