work on ib and cleanup
This commit is contained in:
parent
a20a11c1aa
commit
d686a72591
41 changed files with 601 additions and 2793 deletions
|
|
@ -2,231 +2,231 @@
|
|||
// This script creates collections and indexes for sentiment and document storage
|
||||
|
||||
// Switch to the trading_documents database
|
||||
db = db.getSiblingDB('trading_documents');
|
||||
db = db.getSiblingDB('stock');
|
||||
|
||||
// Create collections with validation schemas
|
||||
// // Create collections with validation schemas
|
||||
|
||||
// Sentiment Analysis Collection
|
||||
db.createCollection('sentiment_analysis', {
|
||||
validator: {
|
||||
$jsonSchema: {
|
||||
bsonType: 'object',
|
||||
required: ['symbol', 'source', 'timestamp', 'sentiment_score'],
|
||||
properties: {
|
||||
symbol: {
|
||||
bsonType: 'string',
|
||||
description: 'Stock symbol (e.g., AAPL, GOOGL)'
|
||||
},
|
||||
source: {
|
||||
bsonType: 'string',
|
||||
description: 'Data source (news, social, earnings_call, etc.)'
|
||||
},
|
||||
timestamp: {
|
||||
bsonType: 'date',
|
||||
description: 'When the sentiment was recorded'
|
||||
},
|
||||
sentiment_score: {
|
||||
bsonType: 'double',
|
||||
minimum: -1.0,
|
||||
maximum: 1.0,
|
||||
description: 'Sentiment score between -1 (negative) and 1 (positive)'
|
||||
},
|
||||
confidence: {
|
||||
bsonType: 'double',
|
||||
minimum: 0.0,
|
||||
maximum: 1.0,
|
||||
description: 'Confidence level of the sentiment analysis'
|
||||
},
|
||||
text_snippet: {
|
||||
bsonType: 'string',
|
||||
description: 'Original text that was analyzed'
|
||||
},
|
||||
metadata: {
|
||||
bsonType: 'object',
|
||||
description: 'Additional metadata about the sentiment source'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
// db.createCollection('sentiment_analysis', {
|
||||
// validator: {
|
||||
// $jsonSchema: {
|
||||
// bsonType: 'object',
|
||||
// required: ['symbol', 'source', 'timestamp', 'sentiment_score'],
|
||||
// properties: {
|
||||
// symbol: {
|
||||
// bsonType: 'string',
|
||||
// description: 'Stock symbol (e.g., AAPL, GOOGL)'
|
||||
// },
|
||||
// source: {
|
||||
// bsonType: 'string',
|
||||
// description: 'Data source (news, social, earnings_call, etc.)'
|
||||
// },
|
||||
// timestamp: {
|
||||
// bsonType: 'date',
|
||||
// description: 'When the sentiment was recorded'
|
||||
// },
|
||||
// sentiment_score: {
|
||||
// bsonType: 'double',
|
||||
// minimum: -1.0,
|
||||
// maximum: 1.0,
|
||||
// description: 'Sentiment score between -1 (negative) and 1 (positive)'
|
||||
// },
|
||||
// confidence: {
|
||||
// bsonType: 'double',
|
||||
// minimum: 0.0,
|
||||
// maximum: 1.0,
|
||||
// description: 'Confidence level of the sentiment analysis'
|
||||
// },
|
||||
// text_snippet: {
|
||||
// bsonType: 'string',
|
||||
// description: 'Original text that was analyzed'
|
||||
// },
|
||||
// metadata: {
|
||||
// bsonType: 'object',
|
||||
// description: 'Additional metadata about the sentiment source'
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// });
|
||||
|
||||
// Raw Documents Collection (for news articles, social media posts, etc.)
|
||||
db.createCollection('raw_documents', {
|
||||
validator: {
|
||||
$jsonSchema: {
|
||||
bsonType: 'object',
|
||||
required: ['source', 'document_type', 'timestamp', 'content'],
|
||||
properties: {
|
||||
source: {
|
||||
bsonType: 'string',
|
||||
description: 'Document source (news_api, twitter, reddit, etc.)'
|
||||
},
|
||||
document_type: {
|
||||
bsonType: 'string',
|
||||
enum: ['news_article', 'social_post', 'earnings_transcript', 'research_report', 'press_release'],
|
||||
description: 'Type of document'
|
||||
},
|
||||
timestamp: {
|
||||
bsonType: 'date',
|
||||
description: 'When the document was created/published'
|
||||
},
|
||||
symbols: {
|
||||
bsonType: 'array',
|
||||
items: {
|
||||
bsonType: 'string'
|
||||
},
|
||||
description: 'Array of stock symbols mentioned in the document'
|
||||
},
|
||||
title: {
|
||||
bsonType: 'string',
|
||||
description: 'Document title or headline'
|
||||
},
|
||||
content: {
|
||||
bsonType: 'string',
|
||||
description: 'Full document content'
|
||||
},
|
||||
url: {
|
||||
bsonType: 'string',
|
||||
description: 'Original URL of the document'
|
||||
},
|
||||
author: {
|
||||
bsonType: 'string',
|
||||
description: 'Document author or source account'
|
||||
},
|
||||
processed: {
|
||||
bsonType: 'bool',
|
||||
description: 'Whether this document has been processed for sentiment'
|
||||
},
|
||||
metadata: {
|
||||
bsonType: 'object',
|
||||
description: 'Additional document metadata'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
// // Raw Documents Collection (for news articles, social media posts, etc.)
|
||||
// db.createCollection('raw_documents', {
|
||||
// validator: {
|
||||
// $jsonSchema: {
|
||||
// bsonType: 'object',
|
||||
// required: ['source', 'document_type', 'timestamp', 'content'],
|
||||
// properties: {
|
||||
// source: {
|
||||
// bsonType: 'string',
|
||||
// description: 'Document source (news_api, twitter, reddit, etc.)'
|
||||
// },
|
||||
// document_type: {
|
||||
// bsonType: 'string',
|
||||
// enum: ['news_article', 'social_post', 'earnings_transcript', 'research_report', 'press_release'],
|
||||
// description: 'Type of document'
|
||||
// },
|
||||
// timestamp: {
|
||||
// bsonType: 'date',
|
||||
// description: 'When the document was created/published'
|
||||
// },
|
||||
// symbols: {
|
||||
// bsonType: 'array',
|
||||
// items: {
|
||||
// bsonType: 'string'
|
||||
// },
|
||||
// description: 'Array of stock symbols mentioned in the document'
|
||||
// },
|
||||
// title: {
|
||||
// bsonType: 'string',
|
||||
// description: 'Document title or headline'
|
||||
// },
|
||||
// content: {
|
||||
// bsonType: 'string',
|
||||
// description: 'Full document content'
|
||||
// },
|
||||
// url: {
|
||||
// bsonType: 'string',
|
||||
// description: 'Original URL of the document'
|
||||
// },
|
||||
// author: {
|
||||
// bsonType: 'string',
|
||||
// description: 'Document author or source account'
|
||||
// },
|
||||
// processed: {
|
||||
// bsonType: 'bool',
|
||||
// description: 'Whether this document has been processed for sentiment'
|
||||
// },
|
||||
// metadata: {
|
||||
// bsonType: 'object',
|
||||
// description: 'Additional document metadata'
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// });
|
||||
|
||||
// Market Events Collection (for significant market events and their impact)
|
||||
db.createCollection('market_events', {
|
||||
validator: {
|
||||
$jsonSchema: {
|
||||
bsonType: 'object',
|
||||
required: ['event_type', 'timestamp', 'description'],
|
||||
properties: {
|
||||
event_type: {
|
||||
bsonType: 'string',
|
||||
enum: ['earnings', 'merger', 'acquisition', 'ipo', 'dividend', 'split', 'regulatory', 'economic_indicator'],
|
||||
description: 'Type of market event'
|
||||
},
|
||||
timestamp: {
|
||||
bsonType: 'date',
|
||||
description: 'When the event occurred or was announced'
|
||||
},
|
||||
symbols: {
|
||||
bsonType: 'array',
|
||||
items: {
|
||||
bsonType: 'string'
|
||||
},
|
||||
description: 'Stock symbols affected by this event'
|
||||
},
|
||||
description: {
|
||||
bsonType: 'string',
|
||||
description: 'Event description'
|
||||
},
|
||||
impact_score: {
|
||||
bsonType: 'double',
|
||||
minimum: -5.0,
|
||||
maximum: 5.0,
|
||||
description: 'Expected market impact score'
|
||||
},
|
||||
source_documents: {
|
||||
bsonType: 'array',
|
||||
items: {
|
||||
bsonType: 'objectId'
|
||||
},
|
||||
description: 'References to raw_documents that reported this event'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
// // Market Events Collection (for significant market events and their impact)
|
||||
// db.createCollection('market_events', {
|
||||
// validator: {
|
||||
// $jsonSchema: {
|
||||
// bsonType: 'object',
|
||||
// required: ['event_type', 'timestamp', 'description'],
|
||||
// properties: {
|
||||
// event_type: {
|
||||
// bsonType: 'string',
|
||||
// enum: ['earnings', 'merger', 'acquisition', 'ipo', 'dividend', 'split', 'regulatory', 'economic_indicator'],
|
||||
// description: 'Type of market event'
|
||||
// },
|
||||
// timestamp: {
|
||||
// bsonType: 'date',
|
||||
// description: 'When the event occurred or was announced'
|
||||
// },
|
||||
// symbols: {
|
||||
// bsonType: 'array',
|
||||
// items: {
|
||||
// bsonType: 'string'
|
||||
// },
|
||||
// description: 'Stock symbols affected by this event'
|
||||
// },
|
||||
// description: {
|
||||
// bsonType: 'string',
|
||||
// description: 'Event description'
|
||||
// },
|
||||
// impact_score: {
|
||||
// bsonType: 'double',
|
||||
// minimum: -5.0,
|
||||
// maximum: 5.0,
|
||||
// description: 'Expected market impact score'
|
||||
// },
|
||||
// source_documents: {
|
||||
// bsonType: 'array',
|
||||
// items: {
|
||||
// bsonType: 'objectId'
|
||||
// },
|
||||
// description: 'References to raw_documents that reported this event'
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// });
|
||||
|
||||
// Create indexes for efficient querying
|
||||
// // Create indexes for efficient querying
|
||||
|
||||
// Sentiment Analysis indexes
|
||||
db.sentiment_analysis.createIndex({ symbol: 1, timestamp: -1 });
|
||||
db.sentiment_analysis.createIndex({ source: 1, timestamp: -1 });
|
||||
db.sentiment_analysis.createIndex({ timestamp: -1 });
|
||||
db.sentiment_analysis.createIndex({ symbol: 1, source: 1, timestamp: -1 });
|
||||
// // Sentiment Analysis indexes
|
||||
// db.sentiment_analysis.createIndex({ symbol: 1, timestamp: -1 });
|
||||
// db.sentiment_analysis.createIndex({ source: 1, timestamp: -1 });
|
||||
// db.sentiment_analysis.createIndex({ timestamp: -1 });
|
||||
// db.sentiment_analysis.createIndex({ symbol: 1, source: 1, timestamp: -1 });
|
||||
|
||||
// Raw Documents indexes
|
||||
db.raw_documents.createIndex({ symbols: 1, timestamp: -1 });
|
||||
db.raw_documents.createIndex({ source: 1, timestamp: -1 });
|
||||
db.raw_documents.createIndex({ document_type: 1, timestamp: -1 });
|
||||
db.raw_documents.createIndex({ processed: 1, timestamp: -1 });
|
||||
db.raw_documents.createIndex({ timestamp: -1 });
|
||||
// // Raw Documents indexes
|
||||
// db.raw_documents.createIndex({ symbols: 1, timestamp: -1 });
|
||||
// db.raw_documents.createIndex({ source: 1, timestamp: -1 });
|
||||
// db.raw_documents.createIndex({ document_type: 1, timestamp: -1 });
|
||||
// db.raw_documents.createIndex({ processed: 1, timestamp: -1 });
|
||||
// db.raw_documents.createIndex({ timestamp: -1 });
|
||||
|
||||
// Market Events indexes
|
||||
db.market_events.createIndex({ symbols: 1, timestamp: -1 });
|
||||
db.market_events.createIndex({ event_type: 1, timestamp: -1 });
|
||||
db.market_events.createIndex({ timestamp: -1 });
|
||||
// // Market Events indexes
|
||||
// db.market_events.createIndex({ symbols: 1, timestamp: -1 });
|
||||
// db.market_events.createIndex({ event_type: 1, timestamp: -1 });
|
||||
// db.market_events.createIndex({ timestamp: -1 });
|
||||
|
||||
// Insert some sample data for testing
|
||||
// // Insert some sample data for testing
|
||||
|
||||
// Sample sentiment data
|
||||
db.sentiment_analysis.insertMany([
|
||||
{
|
||||
symbol: 'AAPL',
|
||||
source: 'news_analysis',
|
||||
timestamp: new Date(),
|
||||
sentiment_score: 0.75,
|
||||
confidence: 0.89,
|
||||
text_snippet: 'Apple reports strong quarterly earnings...',
|
||||
metadata: {
|
||||
article_id: 'news_001',
|
||||
provider: 'financial_news_api'
|
||||
}
|
||||
},
|
||||
{
|
||||
symbol: 'GOOGL',
|
||||
source: 'social_media',
|
||||
timestamp: new Date(),
|
||||
sentiment_score: -0.25,
|
||||
confidence: 0.67,
|
||||
text_snippet: 'Concerns about Google AI regulation...',
|
||||
metadata: {
|
||||
platform: 'twitter',
|
||||
engagement_score: 450
|
||||
}
|
||||
}
|
||||
]);
|
||||
// // Sample sentiment data
|
||||
// db.sentiment_analysis.insertMany([
|
||||
// {
|
||||
// symbol: 'AAPL',
|
||||
// source: 'news_analysis',
|
||||
// timestamp: new Date(),
|
||||
// sentiment_score: 0.75,
|
||||
// confidence: 0.89,
|
||||
// text_snippet: 'Apple reports strong quarterly earnings...',
|
||||
// metadata: {
|
||||
// article_id: 'news_001',
|
||||
// provider: 'financial_news_api'
|
||||
// }
|
||||
// },
|
||||
// {
|
||||
// symbol: 'GOOGL',
|
||||
// source: 'social_media',
|
||||
// timestamp: new Date(),
|
||||
// sentiment_score: -0.25,
|
||||
// confidence: 0.67,
|
||||
// text_snippet: 'Concerns about Google AI regulation...',
|
||||
// metadata: {
|
||||
// platform: 'twitter',
|
||||
// engagement_score: 450
|
||||
// }
|
||||
// }
|
||||
// ]);
|
||||
|
||||
// Sample raw document
|
||||
db.raw_documents.insertOne({
|
||||
source: 'financial_news_api',
|
||||
document_type: 'news_article',
|
||||
timestamp: new Date(),
|
||||
symbols: ['AAPL', 'MSFT'],
|
||||
title: 'Tech Giants Show Strong Q4 Performance',
|
||||
content: 'Apple and Microsoft both reported better than expected earnings for Q4...',
|
||||
url: 'https://example.com/tech-earnings-q4',
|
||||
author: 'Financial Reporter',
|
||||
processed: true,
|
||||
metadata: {
|
||||
word_count: 850,
|
||||
readability_score: 0.75
|
||||
}
|
||||
});
|
||||
// // Sample raw document
|
||||
// db.raw_documents.insertOne({
|
||||
// source: 'financial_news_api',
|
||||
// document_type: 'news_article',
|
||||
// timestamp: new Date(),
|
||||
// symbols: ['AAPL', 'MSFT'],
|
||||
// title: 'Tech Giants Show Strong Q4 Performance',
|
||||
// content: 'Apple and Microsoft both reported better than expected earnings for Q4...',
|
||||
// url: 'https://example.com/tech-earnings-q4',
|
||||
// author: 'Financial Reporter',
|
||||
// processed: true,
|
||||
// metadata: {
|
||||
// word_count: 850,
|
||||
// readability_score: 0.75
|
||||
// }
|
||||
// });
|
||||
|
||||
// Sample market event
|
||||
db.market_events.insertOne({
|
||||
event_type: 'earnings',
|
||||
timestamp: new Date(),
|
||||
symbols: ['AAPL'],
|
||||
description: 'Apple Q4 2024 Earnings Report',
|
||||
impact_score: 2.5,
|
||||
source_documents: []
|
||||
});
|
||||
// // Sample market event
|
||||
// db.market_events.insertOne({
|
||||
// event_type: 'earnings',
|
||||
// timestamp: new Date(),
|
||||
// symbols: ['AAPL'],
|
||||
// description: 'Apple Q4 2024 Earnings Report',
|
||||
// impact_score: 2.5,
|
||||
// source_documents: []
|
||||
// });
|
||||
|
||||
print('MongoDB initialization completed successfully!');
|
||||
print('Created collections: sentiment_analysis, raw_documents, market_events');
|
||||
|
|
|
|||
0
database/postgres/providers/01-ib-simple.sql
Normal file
0
database/postgres/providers/01-ib-simple.sql
Normal file
0
database/postgres/scripts/ib.ts
Normal file
0
database/postgres/scripts/ib.ts
Normal file
0
database/postgres/scripts/populate-ib-exchanges.ts
Normal file
0
database/postgres/scripts/populate-ib-exchanges.ts
Normal file
0
database/postgres/scripts/setup-ib-fast.ts
Normal file
0
database/postgres/scripts/setup-ib-fast.ts
Normal file
0
database/postgres/scripts/setup-ib-schema-simple.ts
Normal file
0
database/postgres/scripts/setup-ib-schema-simple.ts
Normal file
0
database/postgres/scripts/setup-ib-schema.ts
Normal file
0
database/postgres/scripts/setup-ib-schema.ts
Normal file
0
database/postgres/scripts/setup.ts
Normal file
0
database/postgres/scripts/setup.ts
Normal file
Loading…
Add table
Add a link
Reference in a new issue