added initial py analytics / rust core / ts orchestrator services

This commit is contained in:
Boki 2025-07-01 11:16:25 -04:00
parent 680b5fd2ae
commit c862ed496b
62 changed files with 13459 additions and 0 deletions

View file

@ -0,0 +1,410 @@
import numpy as np
import pandas as pd
from scipy import stats
from typing import Dict, List, Tuple, Optional
import logging
from dataclasses import dataclass
from sklearn.model_selection import TimeSeriesSplit
import warnings
logger = logging.getLogger(__name__)
@dataclass
class ValidationResult:
"""Results from statistical validation tests"""
is_overfit: bool
confidence_level: float
psr: float # Probabilistic Sharpe Ratio
dsr: float # Deflated Sharpe Ratio
monte_carlo_percentile: float
out_of_sample_degradation: float
statistical_significance: bool
warnings: List[str]
recommendations: List[str]
class StatisticalValidator:
"""
Statistical validation for backtesting results
Detects overfitting and validates strategy robustness
"""
def __init__(self, min_trades: int = 30, confidence_level: float = 0.95):
self.min_trades = min_trades
self.confidence_level = confidence_level
def validate_backtest(
self,
returns: np.ndarray,
trades: pd.DataFrame,
parameters: Dict,
market_returns: Optional[np.ndarray] = None
) -> ValidationResult:
"""
Comprehensive validation of backtest results
"""
warnings_list = []
recommendations = []
# Check minimum requirements
if len(trades) < self.min_trades:
warnings_list.append(f"Insufficient trades ({len(trades)} < {self.min_trades})")
recommendations.append("Extend backtest period or reduce trading filters")
# Calculate key metrics
sharpe = self.calculate_sharpe_ratio(returns)
psr = self.calculate_probabilistic_sharpe_ratio(sharpe, len(returns))
dsr = self.calculate_deflated_sharpe_ratio(
sharpe, len(returns), len(parameters)
)
# Monte Carlo analysis
mc_percentile = self.monte_carlo_test(returns, trades)
# Out-of-sample testing
oos_degradation = self.out_of_sample_test(returns, trades)
# Statistical significance tests
is_significant = self.test_statistical_significance(returns, market_returns)
# Overfitting detection
is_overfit = self.detect_overfitting(
psr, dsr, mc_percentile, oos_degradation, len(parameters)
)
# Generate recommendations
if dsr < 0.95:
recommendations.append("Reduce strategy complexity or increase sample size")
if mc_percentile < 0.95:
recommendations.append("Strategy may be exploiting random patterns")
if oos_degradation > 0.5:
recommendations.append("Consider walk-forward optimization")
return ValidationResult(
is_overfit=is_overfit,
confidence_level=1 - is_overfit * 0.5, # Simple confidence measure
psr=psr,
dsr=dsr,
monte_carlo_percentile=mc_percentile,
out_of_sample_degradation=oos_degradation,
statistical_significance=is_significant,
warnings=warnings_list,
recommendations=recommendations
)
def calculate_sharpe_ratio(self, returns: np.ndarray) -> float:
"""Calculate annualized Sharpe ratio"""
if len(returns) == 0:
return 0.0
# Assume daily returns
mean_return = np.mean(returns)
std_return = np.std(returns, ddof=1)
if std_return == 0:
return 0.0
# Annualize
sharpe = mean_return / std_return * np.sqrt(252)
return sharpe
def calculate_probabilistic_sharpe_ratio(
self,
sharpe: float,
num_observations: int
) -> float:
"""
Calculate Probabilistic Sharpe Ratio (PSR)
Adjusts for sample size and non-normality
"""
if num_observations < 2:
return 0.0
# Adjust for sample size
psr = stats.norm.cdf(
sharpe * np.sqrt(num_observations - 1) /
np.sqrt(1 + 0.5 * sharpe**2)
)
return psr
def calculate_deflated_sharpe_ratio(
self,
sharpe: float,
num_observations: int,
num_parameters: int,
num_trials: int = 1
) -> float:
"""
Calculate Deflated Sharpe Ratio (DSR)
Accounts for multiple testing and parameter optimization
"""
if num_observations < num_parameters + 2:
return 0.0
# Expected maximum Sharpe under null hypothesis
expected_max_sharpe = np.sqrt(2 * np.log(num_trials)) / np.sqrt(num_observations)
# Standard error of Sharpe ratio
se_sharpe = np.sqrt(
(1 + 0.5 * sharpe**2) / (num_observations - 1)
)
# Deflated Sharpe Ratio
dsr = (sharpe - expected_max_sharpe) / se_sharpe
# Convert to probability
return stats.norm.cdf(dsr)
def monte_carlo_test(
self,
returns: np.ndarray,
trades: pd.DataFrame,
num_simulations: int = 1000
) -> float:
"""
Monte Carlo permutation test
Tests if strategy is better than random
"""
original_sharpe = self.calculate_sharpe_ratio(returns)
# Generate random strategies
random_sharpes = []
for _ in range(num_simulations):
# Randomly shuffle trade outcomes
shuffled_returns = np.random.permutation(returns)
random_sharpe = self.calculate_sharpe_ratio(shuffled_returns)
random_sharpes.append(random_sharpe)
# Calculate percentile
percentile = np.sum(original_sharpe > np.array(random_sharpes)) / num_simulations
return percentile
def out_of_sample_test(
self,
returns: np.ndarray,
trades: pd.DataFrame,
test_size: float = 0.3
) -> float:
"""
Test performance degradation out-of-sample
"""
if len(returns) < 100: # Need sufficient data
return 0.0
# Split data
split_point = int(len(returns) * (1 - test_size))
in_sample_returns = returns[:split_point]
out_sample_returns = returns[split_point:]
# Calculate Sharpe ratios
is_sharpe = self.calculate_sharpe_ratio(in_sample_returns)
oos_sharpe = self.calculate_sharpe_ratio(out_sample_returns)
# Calculate degradation
if is_sharpe > 0:
degradation = max(0, 1 - oos_sharpe / is_sharpe)
else:
degradation = 1.0
return degradation
def test_statistical_significance(
self,
strategy_returns: np.ndarray,
market_returns: Optional[np.ndarray] = None
) -> bool:
"""
Test if returns are statistically significant
"""
# Test against zero returns
t_stat, p_value = stats.ttest_1samp(strategy_returns, 0)
if p_value < (1 - self.confidence_level):
return True
# If market returns provided, test for alpha
if market_returns is not None and len(market_returns) == len(strategy_returns):
excess_returns = strategy_returns - market_returns
t_stat, p_value = stats.ttest_1samp(excess_returns, 0)
return p_value < (1 - self.confidence_level)
return False
def detect_overfitting(
self,
psr: float,
dsr: float,
mc_percentile: float,
oos_degradation: float,
num_parameters: int
) -> bool:
"""
Detect potential overfitting based on multiple criteria
"""
overfitting_score = 0
# Check PSR
if psr < 0.95:
overfitting_score += 1
# Check DSR
if dsr < 0.95:
overfitting_score += 2 # More weight on DSR
# Check Monte Carlo
if mc_percentile < 0.95:
overfitting_score += 1
# Check out-of-sample degradation
if oos_degradation > 0.5:
overfitting_score += 2
# Check parameter count
if num_parameters > 10:
overfitting_score += 1
# Decision threshold
return overfitting_score >= 3
def walk_forward_analysis(
self,
data: pd.DataFrame,
strategy_func,
window_size: int,
step_size: int,
num_windows: int = 5
) -> Dict:
"""
Perform walk-forward analysis
"""
results = {
'in_sample_sharpes': [],
'out_sample_sharpes': [],
'parameters': [],
'stability_score': 0
}
tscv = TimeSeriesSplit(n_splits=num_windows)
for train_idx, test_idx in tscv.split(data):
train_data = data.iloc[train_idx]
test_data = data.iloc[test_idx]
# Optimize on training data
best_params = self.optimize_parameters(train_data, strategy_func)
results['parameters'].append(best_params)
# Test on out-of-sample data
is_returns = strategy_func(train_data, best_params)
oos_returns = strategy_func(test_data, best_params)
is_sharpe = self.calculate_sharpe_ratio(is_returns)
oos_sharpe = self.calculate_sharpe_ratio(oos_returns)
results['in_sample_sharpes'].append(is_sharpe)
results['out_sample_sharpes'].append(oos_sharpe)
# Calculate stability score
param_stability = self.calculate_parameter_stability(results['parameters'])
performance_stability = 1 - np.std(results['out_sample_sharpes']) / (np.mean(results['out_sample_sharpes']) + 1e-6)
results['stability_score'] = (param_stability + performance_stability) / 2
return results
def calculate_parameter_stability(self, parameters_list: List[Dict]) -> float:
"""
Calculate how stable parameters are across different periods
"""
if len(parameters_list) < 2:
return 1.0
# Convert to DataFrame for easier analysis
params_df = pd.DataFrame(parameters_list)
# Calculate coefficient of variation for each parameter
stabilities = []
for col in params_df.columns:
if params_df[col].dtype in [np.float64, np.int64]:
mean_val = params_df[col].mean()
std_val = params_df[col].std()
if mean_val != 0:
cv = std_val / abs(mean_val)
stability = 1 / (1 + cv) # Convert to 0-1 scale
stabilities.append(stability)
return np.mean(stabilities) if stabilities else 0.5
def optimize_parameters(self, data: pd.DataFrame, strategy_func) -> Dict:
"""
Placeholder for parameter optimization
In practice, this would use grid search, Bayesian optimization, etc.
"""
# Simple example - would be replaced with actual optimization
return {'param1': 20, 'param2': 2.0}
def bootstrap_confidence_intervals(
self,
returns: np.ndarray,
metric_func,
confidence_level: float = 0.95,
num_samples: int = 1000
) -> Tuple[float, float, float]:
"""
Calculate bootstrap confidence intervals for any metric
"""
bootstrap_metrics = []
for _ in range(num_samples):
# Resample with replacement
sample_returns = np.random.choice(returns, size=len(returns), replace=True)
metric = metric_func(sample_returns)
bootstrap_metrics.append(metric)
# Calculate percentiles
lower_percentile = (1 - confidence_level) / 2
upper_percentile = 1 - lower_percentile
lower_bound = np.percentile(bootstrap_metrics, lower_percentile * 100)
upper_bound = np.percentile(bootstrap_metrics, upper_percentile * 100)
point_estimate = metric_func(returns)
return lower_bound, point_estimate, upper_bound
def generate_report(self, validation_result: ValidationResult) -> str:
"""
Generate human-readable validation report
"""
report = f"""
Statistical Validation Report
============================
Overall Assessment: {'PASSED' if not validation_result.is_overfit else 'FAILED'}
Confidence Level: {validation_result.confidence_level:.1%}
Key Metrics:
-----------
Probabilistic Sharpe Ratio (PSR): {validation_result.psr:.3f}
Deflated Sharpe Ratio (DSR): {validation_result.dsr:.3f}
Monte Carlo Percentile: {validation_result.monte_carlo_percentile:.1%}
Out-of-Sample Degradation: {validation_result.out_of_sample_degradation:.1%}
Statistical Significance: {'Yes' if validation_result.statistical_significance else 'No'}
Warnings:
---------
"""
for warning in validation_result.warnings:
report += f"- {warning}\n"
report += """
Recommendations:
---------------
"""
for rec in validation_result.recommendations:
report += f"- {rec}\n"
return report