added initial py analytics / rust core / ts orchestrator services
This commit is contained in:
parent
680b5fd2ae
commit
c862ed496b
62 changed files with 13459 additions and 0 deletions
410
apps/stock/analytics/src/analysis/statistical_validation.py
Normal file
410
apps/stock/analytics/src/analysis/statistical_validation.py
Normal file
|
|
@ -0,0 +1,410 @@
|
|||
import numpy as np
|
||||
import pandas as pd
|
||||
from scipy import stats
|
||||
from typing import Dict, List, Tuple, Optional
|
||||
import logging
|
||||
from dataclasses import dataclass
|
||||
from sklearn.model_selection import TimeSeriesSplit
|
||||
import warnings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@dataclass
|
||||
class ValidationResult:
|
||||
"""Results from statistical validation tests"""
|
||||
is_overfit: bool
|
||||
confidence_level: float
|
||||
psr: float # Probabilistic Sharpe Ratio
|
||||
dsr: float # Deflated Sharpe Ratio
|
||||
monte_carlo_percentile: float
|
||||
out_of_sample_degradation: float
|
||||
statistical_significance: bool
|
||||
warnings: List[str]
|
||||
recommendations: List[str]
|
||||
|
||||
class StatisticalValidator:
|
||||
"""
|
||||
Statistical validation for backtesting results
|
||||
Detects overfitting and validates strategy robustness
|
||||
"""
|
||||
|
||||
def __init__(self, min_trades: int = 30, confidence_level: float = 0.95):
|
||||
self.min_trades = min_trades
|
||||
self.confidence_level = confidence_level
|
||||
|
||||
def validate_backtest(
|
||||
self,
|
||||
returns: np.ndarray,
|
||||
trades: pd.DataFrame,
|
||||
parameters: Dict,
|
||||
market_returns: Optional[np.ndarray] = None
|
||||
) -> ValidationResult:
|
||||
"""
|
||||
Comprehensive validation of backtest results
|
||||
"""
|
||||
warnings_list = []
|
||||
recommendations = []
|
||||
|
||||
# Check minimum requirements
|
||||
if len(trades) < self.min_trades:
|
||||
warnings_list.append(f"Insufficient trades ({len(trades)} < {self.min_trades})")
|
||||
recommendations.append("Extend backtest period or reduce trading filters")
|
||||
|
||||
# Calculate key metrics
|
||||
sharpe = self.calculate_sharpe_ratio(returns)
|
||||
psr = self.calculate_probabilistic_sharpe_ratio(sharpe, len(returns))
|
||||
dsr = self.calculate_deflated_sharpe_ratio(
|
||||
sharpe, len(returns), len(parameters)
|
||||
)
|
||||
|
||||
# Monte Carlo analysis
|
||||
mc_percentile = self.monte_carlo_test(returns, trades)
|
||||
|
||||
# Out-of-sample testing
|
||||
oos_degradation = self.out_of_sample_test(returns, trades)
|
||||
|
||||
# Statistical significance tests
|
||||
is_significant = self.test_statistical_significance(returns, market_returns)
|
||||
|
||||
# Overfitting detection
|
||||
is_overfit = self.detect_overfitting(
|
||||
psr, dsr, mc_percentile, oos_degradation, len(parameters)
|
||||
)
|
||||
|
||||
# Generate recommendations
|
||||
if dsr < 0.95:
|
||||
recommendations.append("Reduce strategy complexity or increase sample size")
|
||||
if mc_percentile < 0.95:
|
||||
recommendations.append("Strategy may be exploiting random patterns")
|
||||
if oos_degradation > 0.5:
|
||||
recommendations.append("Consider walk-forward optimization")
|
||||
|
||||
return ValidationResult(
|
||||
is_overfit=is_overfit,
|
||||
confidence_level=1 - is_overfit * 0.5, # Simple confidence measure
|
||||
psr=psr,
|
||||
dsr=dsr,
|
||||
monte_carlo_percentile=mc_percentile,
|
||||
out_of_sample_degradation=oos_degradation,
|
||||
statistical_significance=is_significant,
|
||||
warnings=warnings_list,
|
||||
recommendations=recommendations
|
||||
)
|
||||
|
||||
def calculate_sharpe_ratio(self, returns: np.ndarray) -> float:
|
||||
"""Calculate annualized Sharpe ratio"""
|
||||
if len(returns) == 0:
|
||||
return 0.0
|
||||
|
||||
# Assume daily returns
|
||||
mean_return = np.mean(returns)
|
||||
std_return = np.std(returns, ddof=1)
|
||||
|
||||
if std_return == 0:
|
||||
return 0.0
|
||||
|
||||
# Annualize
|
||||
sharpe = mean_return / std_return * np.sqrt(252)
|
||||
return sharpe
|
||||
|
||||
def calculate_probabilistic_sharpe_ratio(
|
||||
self,
|
||||
sharpe: float,
|
||||
num_observations: int
|
||||
) -> float:
|
||||
"""
|
||||
Calculate Probabilistic Sharpe Ratio (PSR)
|
||||
Adjusts for sample size and non-normality
|
||||
"""
|
||||
if num_observations < 2:
|
||||
return 0.0
|
||||
|
||||
# Adjust for sample size
|
||||
psr = stats.norm.cdf(
|
||||
sharpe * np.sqrt(num_observations - 1) /
|
||||
np.sqrt(1 + 0.5 * sharpe**2)
|
||||
)
|
||||
|
||||
return psr
|
||||
|
||||
def calculate_deflated_sharpe_ratio(
|
||||
self,
|
||||
sharpe: float,
|
||||
num_observations: int,
|
||||
num_parameters: int,
|
||||
num_trials: int = 1
|
||||
) -> float:
|
||||
"""
|
||||
Calculate Deflated Sharpe Ratio (DSR)
|
||||
Accounts for multiple testing and parameter optimization
|
||||
"""
|
||||
if num_observations < num_parameters + 2:
|
||||
return 0.0
|
||||
|
||||
# Expected maximum Sharpe under null hypothesis
|
||||
expected_max_sharpe = np.sqrt(2 * np.log(num_trials)) / np.sqrt(num_observations)
|
||||
|
||||
# Standard error of Sharpe ratio
|
||||
se_sharpe = np.sqrt(
|
||||
(1 + 0.5 * sharpe**2) / (num_observations - 1)
|
||||
)
|
||||
|
||||
# Deflated Sharpe Ratio
|
||||
dsr = (sharpe - expected_max_sharpe) / se_sharpe
|
||||
|
||||
# Convert to probability
|
||||
return stats.norm.cdf(dsr)
|
||||
|
||||
def monte_carlo_test(
|
||||
self,
|
||||
returns: np.ndarray,
|
||||
trades: pd.DataFrame,
|
||||
num_simulations: int = 1000
|
||||
) -> float:
|
||||
"""
|
||||
Monte Carlo permutation test
|
||||
Tests if strategy is better than random
|
||||
"""
|
||||
original_sharpe = self.calculate_sharpe_ratio(returns)
|
||||
|
||||
# Generate random strategies
|
||||
random_sharpes = []
|
||||
|
||||
for _ in range(num_simulations):
|
||||
# Randomly shuffle trade outcomes
|
||||
shuffled_returns = np.random.permutation(returns)
|
||||
random_sharpe = self.calculate_sharpe_ratio(shuffled_returns)
|
||||
random_sharpes.append(random_sharpe)
|
||||
|
||||
# Calculate percentile
|
||||
percentile = np.sum(original_sharpe > np.array(random_sharpes)) / num_simulations
|
||||
|
||||
return percentile
|
||||
|
||||
def out_of_sample_test(
|
||||
self,
|
||||
returns: np.ndarray,
|
||||
trades: pd.DataFrame,
|
||||
test_size: float = 0.3
|
||||
) -> float:
|
||||
"""
|
||||
Test performance degradation out-of-sample
|
||||
"""
|
||||
if len(returns) < 100: # Need sufficient data
|
||||
return 0.0
|
||||
|
||||
# Split data
|
||||
split_point = int(len(returns) * (1 - test_size))
|
||||
in_sample_returns = returns[:split_point]
|
||||
out_sample_returns = returns[split_point:]
|
||||
|
||||
# Calculate Sharpe ratios
|
||||
is_sharpe = self.calculate_sharpe_ratio(in_sample_returns)
|
||||
oos_sharpe = self.calculate_sharpe_ratio(out_sample_returns)
|
||||
|
||||
# Calculate degradation
|
||||
if is_sharpe > 0:
|
||||
degradation = max(0, 1 - oos_sharpe / is_sharpe)
|
||||
else:
|
||||
degradation = 1.0
|
||||
|
||||
return degradation
|
||||
|
||||
def test_statistical_significance(
|
||||
self,
|
||||
strategy_returns: np.ndarray,
|
||||
market_returns: Optional[np.ndarray] = None
|
||||
) -> bool:
|
||||
"""
|
||||
Test if returns are statistically significant
|
||||
"""
|
||||
# Test against zero returns
|
||||
t_stat, p_value = stats.ttest_1samp(strategy_returns, 0)
|
||||
|
||||
if p_value < (1 - self.confidence_level):
|
||||
return True
|
||||
|
||||
# If market returns provided, test for alpha
|
||||
if market_returns is not None and len(market_returns) == len(strategy_returns):
|
||||
excess_returns = strategy_returns - market_returns
|
||||
t_stat, p_value = stats.ttest_1samp(excess_returns, 0)
|
||||
|
||||
return p_value < (1 - self.confidence_level)
|
||||
|
||||
return False
|
||||
|
||||
def detect_overfitting(
|
||||
self,
|
||||
psr: float,
|
||||
dsr: float,
|
||||
mc_percentile: float,
|
||||
oos_degradation: float,
|
||||
num_parameters: int
|
||||
) -> bool:
|
||||
"""
|
||||
Detect potential overfitting based on multiple criteria
|
||||
"""
|
||||
overfitting_score = 0
|
||||
|
||||
# Check PSR
|
||||
if psr < 0.95:
|
||||
overfitting_score += 1
|
||||
|
||||
# Check DSR
|
||||
if dsr < 0.95:
|
||||
overfitting_score += 2 # More weight on DSR
|
||||
|
||||
# Check Monte Carlo
|
||||
if mc_percentile < 0.95:
|
||||
overfitting_score += 1
|
||||
|
||||
# Check out-of-sample degradation
|
||||
if oos_degradation > 0.5:
|
||||
overfitting_score += 2
|
||||
|
||||
# Check parameter count
|
||||
if num_parameters > 10:
|
||||
overfitting_score += 1
|
||||
|
||||
# Decision threshold
|
||||
return overfitting_score >= 3
|
||||
|
||||
def walk_forward_analysis(
|
||||
self,
|
||||
data: pd.DataFrame,
|
||||
strategy_func,
|
||||
window_size: int,
|
||||
step_size: int,
|
||||
num_windows: int = 5
|
||||
) -> Dict:
|
||||
"""
|
||||
Perform walk-forward analysis
|
||||
"""
|
||||
results = {
|
||||
'in_sample_sharpes': [],
|
||||
'out_sample_sharpes': [],
|
||||
'parameters': [],
|
||||
'stability_score': 0
|
||||
}
|
||||
|
||||
tscv = TimeSeriesSplit(n_splits=num_windows)
|
||||
|
||||
for train_idx, test_idx in tscv.split(data):
|
||||
train_data = data.iloc[train_idx]
|
||||
test_data = data.iloc[test_idx]
|
||||
|
||||
# Optimize on training data
|
||||
best_params = self.optimize_parameters(train_data, strategy_func)
|
||||
results['parameters'].append(best_params)
|
||||
|
||||
# Test on out-of-sample data
|
||||
is_returns = strategy_func(train_data, best_params)
|
||||
oos_returns = strategy_func(test_data, best_params)
|
||||
|
||||
is_sharpe = self.calculate_sharpe_ratio(is_returns)
|
||||
oos_sharpe = self.calculate_sharpe_ratio(oos_returns)
|
||||
|
||||
results['in_sample_sharpes'].append(is_sharpe)
|
||||
results['out_sample_sharpes'].append(oos_sharpe)
|
||||
|
||||
# Calculate stability score
|
||||
param_stability = self.calculate_parameter_stability(results['parameters'])
|
||||
performance_stability = 1 - np.std(results['out_sample_sharpes']) / (np.mean(results['out_sample_sharpes']) + 1e-6)
|
||||
|
||||
results['stability_score'] = (param_stability + performance_stability) / 2
|
||||
|
||||
return results
|
||||
|
||||
def calculate_parameter_stability(self, parameters_list: List[Dict]) -> float:
|
||||
"""
|
||||
Calculate how stable parameters are across different periods
|
||||
"""
|
||||
if len(parameters_list) < 2:
|
||||
return 1.0
|
||||
|
||||
# Convert to DataFrame for easier analysis
|
||||
params_df = pd.DataFrame(parameters_list)
|
||||
|
||||
# Calculate coefficient of variation for each parameter
|
||||
stabilities = []
|
||||
for col in params_df.columns:
|
||||
if params_df[col].dtype in [np.float64, np.int64]:
|
||||
mean_val = params_df[col].mean()
|
||||
std_val = params_df[col].std()
|
||||
|
||||
if mean_val != 0:
|
||||
cv = std_val / abs(mean_val)
|
||||
stability = 1 / (1 + cv) # Convert to 0-1 scale
|
||||
stabilities.append(stability)
|
||||
|
||||
return np.mean(stabilities) if stabilities else 0.5
|
||||
|
||||
def optimize_parameters(self, data: pd.DataFrame, strategy_func) -> Dict:
|
||||
"""
|
||||
Placeholder for parameter optimization
|
||||
In practice, this would use grid search, Bayesian optimization, etc.
|
||||
"""
|
||||
# Simple example - would be replaced with actual optimization
|
||||
return {'param1': 20, 'param2': 2.0}
|
||||
|
||||
def bootstrap_confidence_intervals(
|
||||
self,
|
||||
returns: np.ndarray,
|
||||
metric_func,
|
||||
confidence_level: float = 0.95,
|
||||
num_samples: int = 1000
|
||||
) -> Tuple[float, float, float]:
|
||||
"""
|
||||
Calculate bootstrap confidence intervals for any metric
|
||||
"""
|
||||
bootstrap_metrics = []
|
||||
|
||||
for _ in range(num_samples):
|
||||
# Resample with replacement
|
||||
sample_returns = np.random.choice(returns, size=len(returns), replace=True)
|
||||
metric = metric_func(sample_returns)
|
||||
bootstrap_metrics.append(metric)
|
||||
|
||||
# Calculate percentiles
|
||||
lower_percentile = (1 - confidence_level) / 2
|
||||
upper_percentile = 1 - lower_percentile
|
||||
|
||||
lower_bound = np.percentile(bootstrap_metrics, lower_percentile * 100)
|
||||
upper_bound = np.percentile(bootstrap_metrics, upper_percentile * 100)
|
||||
point_estimate = metric_func(returns)
|
||||
|
||||
return lower_bound, point_estimate, upper_bound
|
||||
|
||||
def generate_report(self, validation_result: ValidationResult) -> str:
|
||||
"""
|
||||
Generate human-readable validation report
|
||||
"""
|
||||
report = f"""
|
||||
Statistical Validation Report
|
||||
============================
|
||||
|
||||
Overall Assessment: {'PASSED' if not validation_result.is_overfit else 'FAILED'}
|
||||
Confidence Level: {validation_result.confidence_level:.1%}
|
||||
|
||||
Key Metrics:
|
||||
-----------
|
||||
Probabilistic Sharpe Ratio (PSR): {validation_result.psr:.3f}
|
||||
Deflated Sharpe Ratio (DSR): {validation_result.dsr:.3f}
|
||||
Monte Carlo Percentile: {validation_result.monte_carlo_percentile:.1%}
|
||||
Out-of-Sample Degradation: {validation_result.out_of_sample_degradation:.1%}
|
||||
Statistical Significance: {'Yes' if validation_result.statistical_significance else 'No'}
|
||||
|
||||
Warnings:
|
||||
---------
|
||||
"""
|
||||
for warning in validation_result.warnings:
|
||||
report += f"- {warning}\n"
|
||||
|
||||
report += """
|
||||
Recommendations:
|
||||
---------------
|
||||
"""
|
||||
for rec in validation_result.recommendations:
|
||||
report += f"- {rec}\n"
|
||||
|
||||
return report
|
||||
Loading…
Add table
Add a link
Reference in a new issue