132 lines
4.9 KiB
TypeScript
132 lines
4.9 KiB
TypeScript
import * as yup from 'yup';
|
|
|
|
/**
|
|
* Yup Schemas for MongoDB Document Validation
|
|
*/
|
|
|
|
// Base schema for all documents
|
|
export const documentBaseSchema = yup.object({
|
|
_id: yup.mixed().optional(),
|
|
created_at: yup.date().required(),
|
|
updated_at: yup.date().required(),
|
|
source: yup.string().required(),
|
|
metadata: yup.object().optional(),
|
|
});
|
|
|
|
// Sentiment Data Schema
|
|
export const sentimentDataSchema = documentBaseSchema.shape({
|
|
symbol: yup.string().min(1).max(10).required(),
|
|
sentiment_score: yup.number().min(-1).max(1).required(),
|
|
sentiment_label: yup.string().oneOf(['positive', 'negative', 'neutral']).required(),
|
|
confidence: yup.number().min(0).max(1).required(),
|
|
text: yup.string().min(1).required(),
|
|
source_type: yup.string().oneOf(['reddit', 'twitter', 'news', 'forums']).required(),
|
|
source_id: yup.string().required(),
|
|
timestamp: yup.date().required(),
|
|
processed_at: yup.date().required(),
|
|
language: yup.string().default('en'),
|
|
keywords: yup.array(yup.string()).required(),
|
|
entities: yup.array(yup.object({
|
|
name: yup.string().required(),
|
|
type: yup.string().required(),
|
|
confidence: yup.number().min(0).max(1).required(),
|
|
})).required(),
|
|
});
|
|
|
|
// Raw Document Schema
|
|
export const rawDocumentSchema = documentBaseSchema.shape({
|
|
document_type: yup.string().oneOf(['html', 'pdf', 'text', 'json', 'xml']).required(),
|
|
content: yup.string().required(),
|
|
content_hash: yup.string().required(),
|
|
url: yup.string().url().optional(),
|
|
title: yup.string().optional(),
|
|
author: yup.string().optional(),
|
|
published_date: yup.date().optional(),
|
|
extracted_text: yup.string().optional(),
|
|
processing_status: yup.string().oneOf(['pending', 'processed', 'failed']).required(),
|
|
size_bytes: yup.number().positive().required(),
|
|
language: yup.string().optional(),
|
|
});
|
|
|
|
// News Article Schema
|
|
export const newsArticleSchema = documentBaseSchema.shape({
|
|
headline: yup.string().min(1).required(),
|
|
content: yup.string().min(1).required(),
|
|
summary: yup.string().optional(),
|
|
author: yup.string().required(),
|
|
publication: yup.string().required(),
|
|
published_date: yup.date().required(),
|
|
url: yup.string().url().required(),
|
|
symbols: yup.array(yup.string()).required(),
|
|
categories: yup.array(yup.string()).required(),
|
|
sentiment_score: yup.number().min(-1).max(1).optional(),
|
|
relevance_score: yup.number().min(0).max(1).optional(),
|
|
image_url: yup.string().url().optional(),
|
|
tags: yup.array(yup.string()).required(),
|
|
});
|
|
|
|
// SEC Filing Schema
|
|
export const secFilingSchema = documentBaseSchema.shape({
|
|
cik: yup.string().required(),
|
|
accession_number: yup.string().required(),
|
|
filing_type: yup.string().required(),
|
|
company_name: yup.string().required(),
|
|
symbols: yup.array(yup.string()).required(),
|
|
filing_date: yup.date().required(),
|
|
period_end_date: yup.date().required(),
|
|
url: yup.string().url().required(),
|
|
content: yup.string().required(),
|
|
extracted_data: yup.object().optional(),
|
|
financial_statements: yup.array(yup.object({
|
|
statement_type: yup.string().required(),
|
|
data: yup.object().required(),
|
|
})).optional(),
|
|
processing_status: yup.string().oneOf(['pending', 'processed', 'failed']).required(),
|
|
});
|
|
|
|
// Earnings Transcript Schema
|
|
export const earningsTranscriptSchema = documentBaseSchema.shape({
|
|
symbol: yup.string().min(1).max(10).required(),
|
|
company_name: yup.string().required(),
|
|
quarter: yup.string().required(),
|
|
year: yup.number().min(2000).max(3000).required(),
|
|
call_date: yup.date().required(),
|
|
transcript: yup.string().required(),
|
|
participants: yup.array(yup.object({
|
|
name: yup.string().required(),
|
|
title: yup.string().required(),
|
|
type: yup.string().oneOf(['executive', 'analyst']).required(),
|
|
})).required(),
|
|
key_topics: yup.array(yup.string()).required(),
|
|
sentiment_analysis: yup.object({
|
|
overall_sentiment: yup.number().min(-1).max(1).required(),
|
|
topic_sentiments: yup.object().required(),
|
|
}).optional(),
|
|
financial_highlights: yup.object().optional(),
|
|
});
|
|
|
|
// Analyst Report Schema
|
|
export const analystReportSchema = documentBaseSchema.shape({
|
|
symbol: yup.string().min(1).max(10).required(),
|
|
analyst_firm: yup.string().required(),
|
|
analyst_name: yup.string().required(),
|
|
report_title: yup.string().required(),
|
|
report_date: yup.date().required(),
|
|
rating: yup.string().oneOf(['buy', 'hold', 'sell', 'strong_buy', 'strong_sell']).required(),
|
|
price_target: yup.number().positive().optional(),
|
|
previous_rating: yup.string().optional(),
|
|
content: yup.string().required(),
|
|
summary: yup.string().required(),
|
|
key_points: yup.array(yup.string()).required(),
|
|
financial_projections: yup.object().optional(),
|
|
});
|
|
|
|
// Schema mapping for collections
|
|
export const schemaMap = {
|
|
sentiment_data: sentimentDataSchema,
|
|
raw_documents: rawDocumentSchema,
|
|
news_articles: newsArticleSchema,
|
|
sec_filings: secFilingSchema,
|
|
earnings_transcripts: earningsTranscriptSchema,
|
|
analyst_reports: analystReportSchema,
|
|
} as const;
|