added initial integration tests with bun

This commit is contained in:
Bojan Kucera 2025-06-04 12:26:55 -04:00
parent 3e451558ac
commit fb22815450
52 changed files with 7588 additions and 364 deletions

View file

@ -0,0 +1,132 @@
import { z } from 'zod';
/**
* Zod Schemas for MongoDB Document Validation
*/
// Base schema for all documents
export const documentBaseSchema = z.object({
_id: z.any().optional(),
created_at: z.date(),
updated_at: z.date(),
source: z.string(),
metadata: z.record(z.any()).optional(),
});
// Sentiment Data Schema
export const sentimentDataSchema = documentBaseSchema.extend({
symbol: z.string().min(1).max(10),
sentiment_score: z.number().min(-1).max(1),
sentiment_label: z.enum(['positive', 'negative', 'neutral']),
confidence: z.number().min(0).max(1),
text: z.string().min(1),
source_type: z.enum(['reddit', 'twitter', 'news', 'forums']),
source_id: z.string(),
timestamp: z.date(),
processed_at: z.date(),
language: z.string().default('en'),
keywords: z.array(z.string()),
entities: z.array(z.object({
name: z.string(),
type: z.string(),
confidence: z.number().min(0).max(1),
})),
});
// Raw Document Schema
export const rawDocumentSchema = documentBaseSchema.extend({
document_type: z.enum(['html', 'pdf', 'text', 'json', 'xml']),
content: z.string(),
content_hash: z.string(),
url: z.string().url().optional(),
title: z.string().optional(),
author: z.string().optional(),
published_date: z.date().optional(),
extracted_text: z.string().optional(),
processing_status: z.enum(['pending', 'processed', 'failed']),
size_bytes: z.number().positive(),
language: z.string().optional(),
});
// News Article Schema
export const newsArticleSchema = documentBaseSchema.extend({
headline: z.string().min(1),
content: z.string().min(1),
summary: z.string().optional(),
author: z.string(),
publication: z.string(),
published_date: z.date(),
url: z.string().url(),
symbols: z.array(z.string()),
categories: z.array(z.string()),
sentiment_score: z.number().min(-1).max(1).optional(),
relevance_score: z.number().min(0).max(1).optional(),
image_url: z.string().url().optional(),
tags: z.array(z.string()),
});
// SEC Filing Schema
export const secFilingSchema = documentBaseSchema.extend({
cik: z.string(),
accession_number: z.string(),
filing_type: z.string(),
company_name: z.string(),
symbols: z.array(z.string()),
filing_date: z.date(),
period_end_date: z.date(),
url: z.string().url(),
content: z.string(),
extracted_data: z.record(z.any()).optional(),
financial_statements: z.array(z.object({
statement_type: z.string(),
data: z.record(z.number()),
})).optional(),
processing_status: z.enum(['pending', 'processed', 'failed']),
});
// Earnings Transcript Schema
export const earningsTranscriptSchema = documentBaseSchema.extend({
symbol: z.string().min(1).max(10),
company_name: z.string(),
quarter: z.string(),
year: z.number().min(2000).max(3000),
call_date: z.date(),
transcript: z.string(),
participants: z.array(z.object({
name: z.string(),
title: z.string(),
type: z.enum(['executive', 'analyst']),
})),
key_topics: z.array(z.string()),
sentiment_analysis: z.object({
overall_sentiment: z.number().min(-1).max(1),
topic_sentiments: z.record(z.number()),
}).optional(),
financial_highlights: z.record(z.number()).optional(),
});
// Analyst Report Schema
export const analystReportSchema = documentBaseSchema.extend({
symbol: z.string().min(1).max(10),
analyst_firm: z.string(),
analyst_name: z.string(),
report_title: z.string(),
report_date: z.date(),
rating: z.enum(['buy', 'hold', 'sell', 'strong_buy', 'strong_sell']),
price_target: z.number().positive().optional(),
previous_rating: z.string().optional(),
content: z.string(),
summary: z.string(),
key_points: z.array(z.string()),
financial_projections: z.record(z.number()).optional(),
});
// Schema mapping for collections
export const schemaMap = {
sentiment_data: sentimentDataSchema,
raw_documents: rawDocumentSchema,
news_articles: newsArticleSchema,
sec_filings: secFilingSchema,
earnings_transcripts: earningsTranscriptSchema,
analyst_reports: analystReportSchema,
} as const;