stock-bot/libs/mongodb-client/src/schemas.ts
2025-06-09 20:19:25 -04:00

132 lines
4.9 KiB
TypeScript

import * as yup from 'yup';
/**
* Yup Schemas for MongoDB Document Validation
*/
// Base schema for all documents
export const documentBaseSchema = yup.object({
_id: yup.mixed().optional(),
created_at: yup.date().required(),
updated_at: yup.date().required(),
source: yup.string().required(),
metadata: yup.object().optional(),
});
// Sentiment Data Schema
export const sentimentDataSchema = documentBaseSchema.shape({
symbol: yup.string().min(1).max(10).required(),
sentiment_score: yup.number().min(-1).max(1).required(),
sentiment_label: yup.string().oneOf(['positive', 'negative', 'neutral']).required(),
confidence: yup.number().min(0).max(1).required(),
text: yup.string().min(1).required(),
source_type: yup.string().oneOf(['reddit', 'twitter', 'news', 'forums']).required(),
source_id: yup.string().required(),
timestamp: yup.date().required(),
processed_at: yup.date().required(),
language: yup.string().default('en'),
keywords: yup.array(yup.string()).required(),
entities: yup.array(yup.object({
name: yup.string().required(),
type: yup.string().required(),
confidence: yup.number().min(0).max(1).required(),
})).required(),
});
// Raw Document Schema
export const rawDocumentSchema = documentBaseSchema.shape({
document_type: yup.string().oneOf(['html', 'pdf', 'text', 'json', 'xml']).required(),
content: yup.string().required(),
content_hash: yup.string().required(),
url: yup.string().url().optional(),
title: yup.string().optional(),
author: yup.string().optional(),
published_date: yup.date().optional(),
extracted_text: yup.string().optional(),
processing_status: yup.string().oneOf(['pending', 'processed', 'failed']).required(),
size_bytes: yup.number().positive().required(),
language: yup.string().optional(),
});
// News Article Schema
export const newsArticleSchema = documentBaseSchema.shape({
headline: yup.string().min(1).required(),
content: yup.string().min(1).required(),
summary: yup.string().optional(),
author: yup.string().required(),
publication: yup.string().required(),
published_date: yup.date().required(),
url: yup.string().url().required(),
symbols: yup.array(yup.string()).required(),
categories: yup.array(yup.string()).required(),
sentiment_score: yup.number().min(-1).max(1).optional(),
relevance_score: yup.number().min(0).max(1).optional(),
image_url: yup.string().url().optional(),
tags: yup.array(yup.string()).required(),
});
// SEC Filing Schema
export const secFilingSchema = documentBaseSchema.shape({
cik: yup.string().required(),
accession_number: yup.string().required(),
filing_type: yup.string().required(),
company_name: yup.string().required(),
symbols: yup.array(yup.string()).required(),
filing_date: yup.date().required(),
period_end_date: yup.date().required(),
url: yup.string().url().required(),
content: yup.string().required(),
extracted_data: yup.object().optional(),
financial_statements: yup.array(yup.object({
statement_type: yup.string().required(),
data: yup.object().required(),
})).optional(),
processing_status: yup.string().oneOf(['pending', 'processed', 'failed']).required(),
});
// Earnings Transcript Schema
export const earningsTranscriptSchema = documentBaseSchema.shape({
symbol: yup.string().min(1).max(10).required(),
company_name: yup.string().required(),
quarter: yup.string().required(),
year: yup.number().min(2000).max(3000).required(),
call_date: yup.date().required(),
transcript: yup.string().required(),
participants: yup.array(yup.object({
name: yup.string().required(),
title: yup.string().required(),
type: yup.string().oneOf(['executive', 'analyst']).required(),
})).required(),
key_topics: yup.array(yup.string()).required(),
sentiment_analysis: yup.object({
overall_sentiment: yup.number().min(-1).max(1).required(),
topic_sentiments: yup.object().required(),
}).optional(),
financial_highlights: yup.object().optional(),
});
// Analyst Report Schema
export const analystReportSchema = documentBaseSchema.shape({
symbol: yup.string().min(1).max(10).required(),
analyst_firm: yup.string().required(),
analyst_name: yup.string().required(),
report_title: yup.string().required(),
report_date: yup.date().required(),
rating: yup.string().oneOf(['buy', 'hold', 'sell', 'strong_buy', 'strong_sell']).required(),
price_target: yup.number().positive().optional(),
previous_rating: yup.string().optional(),
content: yup.string().required(),
summary: yup.string().required(),
key_points: yup.array(yup.string()).required(),
financial_projections: yup.object().optional(),
});
// Schema mapping for collections
export const schemaMap = {
sentiment_data: sentimentDataSchema,
raw_documents: rawDocumentSchema,
news_articles: newsArticleSchema,
sec_filings: secFilingSchema,
earnings_transcripts: earningsTranscriptSchema,
analyst_reports: analystReportSchema,
} as const;