import * as yup from 'yup'; /** * Yup Schemas for MongoDB Document Validation */ // Base schema for all documents export const documentBaseSchema = yup.object({ _id: yup.mixed().optional(), created_at: yup.date().required(), updated_at: yup.date().required(), source: yup.string().required(), metadata: yup.object().optional(), }); // Sentiment Data Schema export const sentimentDataSchema = documentBaseSchema.shape({ symbol: yup.string().min(1).max(10).required(), sentiment_score: yup.number().min(-1).max(1).required(), sentiment_label: yup.string().oneOf(['positive', 'negative', 'neutral']).required(), confidence: yup.number().min(0).max(1).required(), text: yup.string().min(1).required(), source_type: yup.string().oneOf(['reddit', 'twitter', 'news', 'forums']).required(), source_id: yup.string().required(), timestamp: yup.date().required(), processed_at: yup.date().required(), language: yup.string().default('en'), keywords: yup.array(yup.string()).required(), entities: yup.array(yup.object({ name: yup.string().required(), type: yup.string().required(), confidence: yup.number().min(0).max(1).required(), })).required(), }); // Raw Document Schema export const rawDocumentSchema = documentBaseSchema.shape({ document_type: yup.string().oneOf(['html', 'pdf', 'text', 'json', 'xml']).required(), content: yup.string().required(), content_hash: yup.string().required(), url: yup.string().url().optional(), title: yup.string().optional(), author: yup.string().optional(), published_date: yup.date().optional(), extracted_text: yup.string().optional(), processing_status: yup.string().oneOf(['pending', 'processed', 'failed']).required(), size_bytes: yup.number().positive().required(), language: yup.string().optional(), }); // News Article Schema export const newsArticleSchema = documentBaseSchema.shape({ headline: yup.string().min(1).required(), content: yup.string().min(1).required(), summary: yup.string().optional(), author: yup.string().required(), publication: yup.string().required(), published_date: yup.date().required(), url: yup.string().url().required(), symbols: yup.array(yup.string()).required(), categories: yup.array(yup.string()).required(), sentiment_score: yup.number().min(-1).max(1).optional(), relevance_score: yup.number().min(0).max(1).optional(), image_url: yup.string().url().optional(), tags: yup.array(yup.string()).required(), }); // SEC Filing Schema export const secFilingSchema = documentBaseSchema.shape({ cik: yup.string().required(), accession_number: yup.string().required(), filing_type: yup.string().required(), company_name: yup.string().required(), symbols: yup.array(yup.string()).required(), filing_date: yup.date().required(), period_end_date: yup.date().required(), url: yup.string().url().required(), content: yup.string().required(), extracted_data: yup.object().optional(), financial_statements: yup.array(yup.object({ statement_type: yup.string().required(), data: yup.object().required(), })).optional(), processing_status: yup.string().oneOf(['pending', 'processed', 'failed']).required(), }); // Earnings Transcript Schema export const earningsTranscriptSchema = documentBaseSchema.shape({ symbol: yup.string().min(1).max(10).required(), company_name: yup.string().required(), quarter: yup.string().required(), year: yup.number().min(2000).max(3000).required(), call_date: yup.date().required(), transcript: yup.string().required(), participants: yup.array(yup.object({ name: yup.string().required(), title: yup.string().required(), type: yup.string().oneOf(['executive', 'analyst']).required(), })).required(), key_topics: yup.array(yup.string()).required(), sentiment_analysis: yup.object({ overall_sentiment: yup.number().min(-1).max(1).required(), topic_sentiments: yup.object().required(), }).optional(), financial_highlights: yup.object().optional(), }); // Analyst Report Schema export const analystReportSchema = documentBaseSchema.shape({ symbol: yup.string().min(1).max(10).required(), analyst_firm: yup.string().required(), analyst_name: yup.string().required(), report_title: yup.string().required(), report_date: yup.date().required(), rating: yup.string().oneOf(['buy', 'hold', 'sell', 'strong_buy', 'strong_sell']).required(), price_target: yup.number().positive().optional(), previous_rating: yup.string().optional(), content: yup.string().required(), summary: yup.string().required(), key_points: yup.array(yup.string()).required(), financial_projections: yup.object().optional(), }); // Schema mapping for collections export const schemaMap = { sentiment_data: sentimentDataSchema, raw_documents: rawDocumentSchema, news_articles: newsArticleSchema, sec_filings: secFilingSchema, earnings_transcripts: earningsTranscriptSchema, analyst_reports: analystReportSchema, } as const;