added initial py analytics / rust core / ts orchestrator services
This commit is contained in:
parent
680b5fd2ae
commit
c862ed496b
62 changed files with 13459 additions and 0 deletions
410
apps/stock/analytics/src/analysis/statistical_validation.py
Normal file
410
apps/stock/analytics/src/analysis/statistical_validation.py
Normal file
|
|
@ -0,0 +1,410 @@
|
|||
import numpy as np
|
||||
import pandas as pd
|
||||
from scipy import stats
|
||||
from typing import Dict, List, Tuple, Optional
|
||||
import logging
|
||||
from dataclasses import dataclass
|
||||
from sklearn.model_selection import TimeSeriesSplit
|
||||
import warnings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@dataclass
|
||||
class ValidationResult:
|
||||
"""Results from statistical validation tests"""
|
||||
is_overfit: bool
|
||||
confidence_level: float
|
||||
psr: float # Probabilistic Sharpe Ratio
|
||||
dsr: float # Deflated Sharpe Ratio
|
||||
monte_carlo_percentile: float
|
||||
out_of_sample_degradation: float
|
||||
statistical_significance: bool
|
||||
warnings: List[str]
|
||||
recommendations: List[str]
|
||||
|
||||
class StatisticalValidator:
|
||||
"""
|
||||
Statistical validation for backtesting results
|
||||
Detects overfitting and validates strategy robustness
|
||||
"""
|
||||
|
||||
def __init__(self, min_trades: int = 30, confidence_level: float = 0.95):
|
||||
self.min_trades = min_trades
|
||||
self.confidence_level = confidence_level
|
||||
|
||||
def validate_backtest(
|
||||
self,
|
||||
returns: np.ndarray,
|
||||
trades: pd.DataFrame,
|
||||
parameters: Dict,
|
||||
market_returns: Optional[np.ndarray] = None
|
||||
) -> ValidationResult:
|
||||
"""
|
||||
Comprehensive validation of backtest results
|
||||
"""
|
||||
warnings_list = []
|
||||
recommendations = []
|
||||
|
||||
# Check minimum requirements
|
||||
if len(trades) < self.min_trades:
|
||||
warnings_list.append(f"Insufficient trades ({len(trades)} < {self.min_trades})")
|
||||
recommendations.append("Extend backtest period or reduce trading filters")
|
||||
|
||||
# Calculate key metrics
|
||||
sharpe = self.calculate_sharpe_ratio(returns)
|
||||
psr = self.calculate_probabilistic_sharpe_ratio(sharpe, len(returns))
|
||||
dsr = self.calculate_deflated_sharpe_ratio(
|
||||
sharpe, len(returns), len(parameters)
|
||||
)
|
||||
|
||||
# Monte Carlo analysis
|
||||
mc_percentile = self.monte_carlo_test(returns, trades)
|
||||
|
||||
# Out-of-sample testing
|
||||
oos_degradation = self.out_of_sample_test(returns, trades)
|
||||
|
||||
# Statistical significance tests
|
||||
is_significant = self.test_statistical_significance(returns, market_returns)
|
||||
|
||||
# Overfitting detection
|
||||
is_overfit = self.detect_overfitting(
|
||||
psr, dsr, mc_percentile, oos_degradation, len(parameters)
|
||||
)
|
||||
|
||||
# Generate recommendations
|
||||
if dsr < 0.95:
|
||||
recommendations.append("Reduce strategy complexity or increase sample size")
|
||||
if mc_percentile < 0.95:
|
||||
recommendations.append("Strategy may be exploiting random patterns")
|
||||
if oos_degradation > 0.5:
|
||||
recommendations.append("Consider walk-forward optimization")
|
||||
|
||||
return ValidationResult(
|
||||
is_overfit=is_overfit,
|
||||
confidence_level=1 - is_overfit * 0.5, # Simple confidence measure
|
||||
psr=psr,
|
||||
dsr=dsr,
|
||||
monte_carlo_percentile=mc_percentile,
|
||||
out_of_sample_degradation=oos_degradation,
|
||||
statistical_significance=is_significant,
|
||||
warnings=warnings_list,
|
||||
recommendations=recommendations
|
||||
)
|
||||
|
||||
def calculate_sharpe_ratio(self, returns: np.ndarray) -> float:
|
||||
"""Calculate annualized Sharpe ratio"""
|
||||
if len(returns) == 0:
|
||||
return 0.0
|
||||
|
||||
# Assume daily returns
|
||||
mean_return = np.mean(returns)
|
||||
std_return = np.std(returns, ddof=1)
|
||||
|
||||
if std_return == 0:
|
||||
return 0.0
|
||||
|
||||
# Annualize
|
||||
sharpe = mean_return / std_return * np.sqrt(252)
|
||||
return sharpe
|
||||
|
||||
def calculate_probabilistic_sharpe_ratio(
|
||||
self,
|
||||
sharpe: float,
|
||||
num_observations: int
|
||||
) -> float:
|
||||
"""
|
||||
Calculate Probabilistic Sharpe Ratio (PSR)
|
||||
Adjusts for sample size and non-normality
|
||||
"""
|
||||
if num_observations < 2:
|
||||
return 0.0
|
||||
|
||||
# Adjust for sample size
|
||||
psr = stats.norm.cdf(
|
||||
sharpe * np.sqrt(num_observations - 1) /
|
||||
np.sqrt(1 + 0.5 * sharpe**2)
|
||||
)
|
||||
|
||||
return psr
|
||||
|
||||
def calculate_deflated_sharpe_ratio(
|
||||
self,
|
||||
sharpe: float,
|
||||
num_observations: int,
|
||||
num_parameters: int,
|
||||
num_trials: int = 1
|
||||
) -> float:
|
||||
"""
|
||||
Calculate Deflated Sharpe Ratio (DSR)
|
||||
Accounts for multiple testing and parameter optimization
|
||||
"""
|
||||
if num_observations < num_parameters + 2:
|
||||
return 0.0
|
||||
|
||||
# Expected maximum Sharpe under null hypothesis
|
||||
expected_max_sharpe = np.sqrt(2 * np.log(num_trials)) / np.sqrt(num_observations)
|
||||
|
||||
# Standard error of Sharpe ratio
|
||||
se_sharpe = np.sqrt(
|
||||
(1 + 0.5 * sharpe**2) / (num_observations - 1)
|
||||
)
|
||||
|
||||
# Deflated Sharpe Ratio
|
||||
dsr = (sharpe - expected_max_sharpe) / se_sharpe
|
||||
|
||||
# Convert to probability
|
||||
return stats.norm.cdf(dsr)
|
||||
|
||||
def monte_carlo_test(
|
||||
self,
|
||||
returns: np.ndarray,
|
||||
trades: pd.DataFrame,
|
||||
num_simulations: int = 1000
|
||||
) -> float:
|
||||
"""
|
||||
Monte Carlo permutation test
|
||||
Tests if strategy is better than random
|
||||
"""
|
||||
original_sharpe = self.calculate_sharpe_ratio(returns)
|
||||
|
||||
# Generate random strategies
|
||||
random_sharpes = []
|
||||
|
||||
for _ in range(num_simulations):
|
||||
# Randomly shuffle trade outcomes
|
||||
shuffled_returns = np.random.permutation(returns)
|
||||
random_sharpe = self.calculate_sharpe_ratio(shuffled_returns)
|
||||
random_sharpes.append(random_sharpe)
|
||||
|
||||
# Calculate percentile
|
||||
percentile = np.sum(original_sharpe > np.array(random_sharpes)) / num_simulations
|
||||
|
||||
return percentile
|
||||
|
||||
def out_of_sample_test(
|
||||
self,
|
||||
returns: np.ndarray,
|
||||
trades: pd.DataFrame,
|
||||
test_size: float = 0.3
|
||||
) -> float:
|
||||
"""
|
||||
Test performance degradation out-of-sample
|
||||
"""
|
||||
if len(returns) < 100: # Need sufficient data
|
||||
return 0.0
|
||||
|
||||
# Split data
|
||||
split_point = int(len(returns) * (1 - test_size))
|
||||
in_sample_returns = returns[:split_point]
|
||||
out_sample_returns = returns[split_point:]
|
||||
|
||||
# Calculate Sharpe ratios
|
||||
is_sharpe = self.calculate_sharpe_ratio(in_sample_returns)
|
||||
oos_sharpe = self.calculate_sharpe_ratio(out_sample_returns)
|
||||
|
||||
# Calculate degradation
|
||||
if is_sharpe > 0:
|
||||
degradation = max(0, 1 - oos_sharpe / is_sharpe)
|
||||
else:
|
||||
degradation = 1.0
|
||||
|
||||
return degradation
|
||||
|
||||
def test_statistical_significance(
|
||||
self,
|
||||
strategy_returns: np.ndarray,
|
||||
market_returns: Optional[np.ndarray] = None
|
||||
) -> bool:
|
||||
"""
|
||||
Test if returns are statistically significant
|
||||
"""
|
||||
# Test against zero returns
|
||||
t_stat, p_value = stats.ttest_1samp(strategy_returns, 0)
|
||||
|
||||
if p_value < (1 - self.confidence_level):
|
||||
return True
|
||||
|
||||
# If market returns provided, test for alpha
|
||||
if market_returns is not None and len(market_returns) == len(strategy_returns):
|
||||
excess_returns = strategy_returns - market_returns
|
||||
t_stat, p_value = stats.ttest_1samp(excess_returns, 0)
|
||||
|
||||
return p_value < (1 - self.confidence_level)
|
||||
|
||||
return False
|
||||
|
||||
def detect_overfitting(
|
||||
self,
|
||||
psr: float,
|
||||
dsr: float,
|
||||
mc_percentile: float,
|
||||
oos_degradation: float,
|
||||
num_parameters: int
|
||||
) -> bool:
|
||||
"""
|
||||
Detect potential overfitting based on multiple criteria
|
||||
"""
|
||||
overfitting_score = 0
|
||||
|
||||
# Check PSR
|
||||
if psr < 0.95:
|
||||
overfitting_score += 1
|
||||
|
||||
# Check DSR
|
||||
if dsr < 0.95:
|
||||
overfitting_score += 2 # More weight on DSR
|
||||
|
||||
# Check Monte Carlo
|
||||
if mc_percentile < 0.95:
|
||||
overfitting_score += 1
|
||||
|
||||
# Check out-of-sample degradation
|
||||
if oos_degradation > 0.5:
|
||||
overfitting_score += 2
|
||||
|
||||
# Check parameter count
|
||||
if num_parameters > 10:
|
||||
overfitting_score += 1
|
||||
|
||||
# Decision threshold
|
||||
return overfitting_score >= 3
|
||||
|
||||
def walk_forward_analysis(
|
||||
self,
|
||||
data: pd.DataFrame,
|
||||
strategy_func,
|
||||
window_size: int,
|
||||
step_size: int,
|
||||
num_windows: int = 5
|
||||
) -> Dict:
|
||||
"""
|
||||
Perform walk-forward analysis
|
||||
"""
|
||||
results = {
|
||||
'in_sample_sharpes': [],
|
||||
'out_sample_sharpes': [],
|
||||
'parameters': [],
|
||||
'stability_score': 0
|
||||
}
|
||||
|
||||
tscv = TimeSeriesSplit(n_splits=num_windows)
|
||||
|
||||
for train_idx, test_idx in tscv.split(data):
|
||||
train_data = data.iloc[train_idx]
|
||||
test_data = data.iloc[test_idx]
|
||||
|
||||
# Optimize on training data
|
||||
best_params = self.optimize_parameters(train_data, strategy_func)
|
||||
results['parameters'].append(best_params)
|
||||
|
||||
# Test on out-of-sample data
|
||||
is_returns = strategy_func(train_data, best_params)
|
||||
oos_returns = strategy_func(test_data, best_params)
|
||||
|
||||
is_sharpe = self.calculate_sharpe_ratio(is_returns)
|
||||
oos_sharpe = self.calculate_sharpe_ratio(oos_returns)
|
||||
|
||||
results['in_sample_sharpes'].append(is_sharpe)
|
||||
results['out_sample_sharpes'].append(oos_sharpe)
|
||||
|
||||
# Calculate stability score
|
||||
param_stability = self.calculate_parameter_stability(results['parameters'])
|
||||
performance_stability = 1 - np.std(results['out_sample_sharpes']) / (np.mean(results['out_sample_sharpes']) + 1e-6)
|
||||
|
||||
results['stability_score'] = (param_stability + performance_stability) / 2
|
||||
|
||||
return results
|
||||
|
||||
def calculate_parameter_stability(self, parameters_list: List[Dict]) -> float:
|
||||
"""
|
||||
Calculate how stable parameters are across different periods
|
||||
"""
|
||||
if len(parameters_list) < 2:
|
||||
return 1.0
|
||||
|
||||
# Convert to DataFrame for easier analysis
|
||||
params_df = pd.DataFrame(parameters_list)
|
||||
|
||||
# Calculate coefficient of variation for each parameter
|
||||
stabilities = []
|
||||
for col in params_df.columns:
|
||||
if params_df[col].dtype in [np.float64, np.int64]:
|
||||
mean_val = params_df[col].mean()
|
||||
std_val = params_df[col].std()
|
||||
|
||||
if mean_val != 0:
|
||||
cv = std_val / abs(mean_val)
|
||||
stability = 1 / (1 + cv) # Convert to 0-1 scale
|
||||
stabilities.append(stability)
|
||||
|
||||
return np.mean(stabilities) if stabilities else 0.5
|
||||
|
||||
def optimize_parameters(self, data: pd.DataFrame, strategy_func) -> Dict:
|
||||
"""
|
||||
Placeholder for parameter optimization
|
||||
In practice, this would use grid search, Bayesian optimization, etc.
|
||||
"""
|
||||
# Simple example - would be replaced with actual optimization
|
||||
return {'param1': 20, 'param2': 2.0}
|
||||
|
||||
def bootstrap_confidence_intervals(
|
||||
self,
|
||||
returns: np.ndarray,
|
||||
metric_func,
|
||||
confidence_level: float = 0.95,
|
||||
num_samples: int = 1000
|
||||
) -> Tuple[float, float, float]:
|
||||
"""
|
||||
Calculate bootstrap confidence intervals for any metric
|
||||
"""
|
||||
bootstrap_metrics = []
|
||||
|
||||
for _ in range(num_samples):
|
||||
# Resample with replacement
|
||||
sample_returns = np.random.choice(returns, size=len(returns), replace=True)
|
||||
metric = metric_func(sample_returns)
|
||||
bootstrap_metrics.append(metric)
|
||||
|
||||
# Calculate percentiles
|
||||
lower_percentile = (1 - confidence_level) / 2
|
||||
upper_percentile = 1 - lower_percentile
|
||||
|
||||
lower_bound = np.percentile(bootstrap_metrics, lower_percentile * 100)
|
||||
upper_bound = np.percentile(bootstrap_metrics, upper_percentile * 100)
|
||||
point_estimate = metric_func(returns)
|
||||
|
||||
return lower_bound, point_estimate, upper_bound
|
||||
|
||||
def generate_report(self, validation_result: ValidationResult) -> str:
|
||||
"""
|
||||
Generate human-readable validation report
|
||||
"""
|
||||
report = f"""
|
||||
Statistical Validation Report
|
||||
============================
|
||||
|
||||
Overall Assessment: {'PASSED' if not validation_result.is_overfit else 'FAILED'}
|
||||
Confidence Level: {validation_result.confidence_level:.1%}
|
||||
|
||||
Key Metrics:
|
||||
-----------
|
||||
Probabilistic Sharpe Ratio (PSR): {validation_result.psr:.3f}
|
||||
Deflated Sharpe Ratio (DSR): {validation_result.dsr:.3f}
|
||||
Monte Carlo Percentile: {validation_result.monte_carlo_percentile:.1%}
|
||||
Out-of-Sample Degradation: {validation_result.out_of_sample_degradation:.1%}
|
||||
Statistical Significance: {'Yes' if validation_result.statistical_significance else 'No'}
|
||||
|
||||
Warnings:
|
||||
---------
|
||||
"""
|
||||
for warning in validation_result.warnings:
|
||||
report += f"- {warning}\n"
|
||||
|
||||
report += """
|
||||
Recommendations:
|
||||
---------------
|
||||
"""
|
||||
for rec in validation_result.recommendations:
|
||||
report += f"- {rec}\n"
|
||||
|
||||
return report
|
||||
217
apps/stock/analytics/src/analytics/performance.py
Normal file
217
apps/stock/analytics/src/analytics/performance.py
Normal file
|
|
@ -0,0 +1,217 @@
|
|||
import pandas as pd
|
||||
import numpy as np
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class PerformanceAnalyzer:
|
||||
"""
|
||||
Comprehensive performance analysis for trading strategies and portfolios
|
||||
"""
|
||||
|
||||
def __init__(self, risk_free_rate: float = 0.02):
|
||||
self.risk_free_rate = risk_free_rate
|
||||
|
||||
def calculate_metrics(
|
||||
self,
|
||||
portfolio_id: str,
|
||||
start_date: datetime,
|
||||
end_date: datetime
|
||||
) -> Dict:
|
||||
"""
|
||||
Calculate comprehensive performance metrics
|
||||
"""
|
||||
# In real implementation, would fetch data from database
|
||||
# For now, generate sample data
|
||||
returns = self._generate_sample_returns(start_date, end_date)
|
||||
|
||||
metrics = {
|
||||
'total_return': self._calculate_total_return(returns),
|
||||
'annualized_return': self._calculate_annualized_return(returns),
|
||||
'volatility': self._calculate_volatility(returns),
|
||||
'sharpe_ratio': self._calculate_sharpe_ratio(returns),
|
||||
'sortino_ratio': self._calculate_sortino_ratio(returns),
|
||||
'max_drawdown': self._calculate_max_drawdown(returns),
|
||||
'calmar_ratio': self._calculate_calmar_ratio(returns),
|
||||
'win_rate': self._calculate_win_rate(returns),
|
||||
'profit_factor': self._calculate_profit_factor(returns),
|
||||
'avg_win': np.mean(returns[returns > 0]) if any(returns > 0) else 0,
|
||||
'avg_loss': np.mean(returns[returns < 0]) if any(returns < 0) else 0,
|
||||
'total_trades': len(returns),
|
||||
'best_day': np.max(returns),
|
||||
'worst_day': np.min(returns),
|
||||
'skewness': self._calculate_skewness(returns),
|
||||
'kurtosis': self._calculate_kurtosis(returns)
|
||||
}
|
||||
|
||||
return metrics
|
||||
|
||||
def calculate_risk_metrics(
|
||||
self,
|
||||
portfolio_id: str,
|
||||
window: int = 252,
|
||||
confidence_levels: List[float] = [0.95, 0.99]
|
||||
) -> Dict:
|
||||
"""
|
||||
Calculate risk metrics including VaR and CVaR
|
||||
"""
|
||||
# Generate sample returns
|
||||
returns = self._generate_sample_returns(
|
||||
datetime.now() - timedelta(days=window),
|
||||
datetime.now()
|
||||
)
|
||||
|
||||
risk_metrics = {
|
||||
'volatility': self._calculate_volatility(returns),
|
||||
'downside_deviation': self._calculate_downside_deviation(returns),
|
||||
'beta': self._calculate_beta(returns), # Would need market returns
|
||||
'tracking_error': 0.0, # Placeholder
|
||||
}
|
||||
|
||||
# Calculate VaR and CVaR for each confidence level
|
||||
for confidence in confidence_levels:
|
||||
var = self._calculate_var(returns, confidence)
|
||||
cvar = self._calculate_cvar(returns, confidence)
|
||||
risk_metrics[f'var_{int(confidence*100)}'] = var
|
||||
risk_metrics[f'cvar_{int(confidence*100)}'] = cvar
|
||||
|
||||
return risk_metrics
|
||||
|
||||
def analyze_backtest(self, backtest_id: str) -> Dict:
|
||||
"""
|
||||
Analyze backtest results
|
||||
"""
|
||||
# In real implementation, would fetch backtest data
|
||||
# For now, return comprehensive mock analysis
|
||||
|
||||
return {
|
||||
'metrics': {
|
||||
'total_return': 0.156,
|
||||
'sharpe_ratio': 1.45,
|
||||
'max_drawdown': 0.087,
|
||||
'win_rate': 0.58,
|
||||
'profit_factor': 1.78
|
||||
},
|
||||
'statistics': {
|
||||
'total_trades': 245,
|
||||
'winning_trades': 142,
|
||||
'losing_trades': 103,
|
||||
'avg_holding_period': 3.5,
|
||||
'max_consecutive_wins': 8,
|
||||
'max_consecutive_losses': 5
|
||||
},
|
||||
'risk_analysis': {
|
||||
'var_95': 0.024,
|
||||
'cvar_95': 0.031,
|
||||
'downside_deviation': 0.018,
|
||||
'ulcer_index': 0.045
|
||||
},
|
||||
'trade_analysis': {
|
||||
'best_trade': 0.087,
|
||||
'worst_trade': -0.043,
|
||||
'avg_win': 0.023,
|
||||
'avg_loss': -0.015,
|
||||
'largest_winner': 0.087,
|
||||
'largest_loser': -0.043
|
||||
}
|
||||
}
|
||||
|
||||
# Helper methods
|
||||
def _generate_sample_returns(self, start_date: datetime, end_date: datetime) -> np.ndarray:
|
||||
"""Generate sample returns for testing"""
|
||||
days = (end_date - start_date).days
|
||||
# Generate returns with realistic properties
|
||||
returns = np.random.normal(0.0005, 0.02, days)
|
||||
# Add some autocorrelation
|
||||
for i in range(1, len(returns)):
|
||||
returns[i] = 0.1 * returns[i-1] + 0.9 * returns[i]
|
||||
return returns
|
||||
|
||||
def _calculate_total_return(self, returns: np.ndarray) -> float:
|
||||
"""Calculate total cumulative return"""
|
||||
return np.prod(1 + returns) - 1
|
||||
|
||||
def _calculate_annualized_return(self, returns: np.ndarray) -> float:
|
||||
"""Calculate annualized return"""
|
||||
total_return = self._calculate_total_return(returns)
|
||||
years = len(returns) / 252
|
||||
return (1 + total_return) ** (1 / years) - 1
|
||||
|
||||
def _calculate_volatility(self, returns: np.ndarray) -> float:
|
||||
"""Calculate annualized volatility"""
|
||||
return np.std(returns) * np.sqrt(252)
|
||||
|
||||
def _calculate_sharpe_ratio(self, returns: np.ndarray) -> float:
|
||||
"""Calculate Sharpe ratio"""
|
||||
excess_returns = returns - self.risk_free_rate / 252
|
||||
return np.mean(excess_returns) / np.std(excess_returns) * np.sqrt(252)
|
||||
|
||||
def _calculate_sortino_ratio(self, returns: np.ndarray) -> float:
|
||||
"""Calculate Sortino ratio"""
|
||||
excess_returns = returns - self.risk_free_rate / 252
|
||||
downside_returns = excess_returns[excess_returns < 0]
|
||||
downside_std = np.std(downside_returns) if len(downside_returns) > 0 else 1e-6
|
||||
return np.mean(excess_returns) / downside_std * np.sqrt(252)
|
||||
|
||||
def _calculate_max_drawdown(self, returns: np.ndarray) -> float:
|
||||
"""Calculate maximum drawdown"""
|
||||
cumulative = (1 + returns).cumprod()
|
||||
running_max = np.maximum.accumulate(cumulative)
|
||||
drawdown = (cumulative - running_max) / running_max
|
||||
return np.min(drawdown)
|
||||
|
||||
def _calculate_calmar_ratio(self, returns: np.ndarray) -> float:
|
||||
"""Calculate Calmar ratio"""
|
||||
annual_return = self._calculate_annualized_return(returns)
|
||||
max_dd = abs(self._calculate_max_drawdown(returns))
|
||||
return annual_return / max_dd if max_dd > 0 else 0
|
||||
|
||||
def _calculate_win_rate(self, returns: np.ndarray) -> float:
|
||||
"""Calculate win rate"""
|
||||
return np.sum(returns > 0) / len(returns) if len(returns) > 0 else 0
|
||||
|
||||
def _calculate_profit_factor(self, returns: np.ndarray) -> float:
|
||||
"""Calculate profit factor"""
|
||||
gains = returns[returns > 0]
|
||||
losses = returns[returns < 0]
|
||||
total_gains = np.sum(gains) if len(gains) > 0 else 0
|
||||
total_losses = abs(np.sum(losses)) if len(losses) > 0 else 1e-6
|
||||
return total_gains / total_losses
|
||||
|
||||
def _calculate_downside_deviation(self, returns: np.ndarray, mar: float = 0) -> float:
|
||||
"""Calculate downside deviation"""
|
||||
downside_returns = returns[returns < mar]
|
||||
return np.std(downside_returns) * np.sqrt(252) if len(downside_returns) > 0 else 0
|
||||
|
||||
def _calculate_var(self, returns: np.ndarray, confidence: float) -> float:
|
||||
"""Calculate Value at Risk"""
|
||||
return np.percentile(returns, (1 - confidence) * 100)
|
||||
|
||||
def _calculate_cvar(self, returns: np.ndarray, confidence: float) -> float:
|
||||
"""Calculate Conditional Value at Risk"""
|
||||
var = self._calculate_var(returns, confidence)
|
||||
return np.mean(returns[returns <= var])
|
||||
|
||||
def _calculate_beta(self, returns: np.ndarray, market_returns: Optional[np.ndarray] = None) -> float:
|
||||
"""Calculate beta relative to market"""
|
||||
if market_returns is None:
|
||||
# Generate mock market returns
|
||||
market_returns = np.random.normal(0.0003, 0.015, len(returns))
|
||||
|
||||
covariance = np.cov(returns, market_returns)[0, 1]
|
||||
market_variance = np.var(market_returns)
|
||||
return covariance / market_variance if market_variance > 0 else 1.0
|
||||
|
||||
def _calculate_skewness(self, returns: np.ndarray) -> float:
|
||||
"""Calculate skewness of returns"""
|
||||
mean = np.mean(returns)
|
||||
std = np.std(returns)
|
||||
return np.mean(((returns - mean) / std) ** 3) if std > 0 else 0
|
||||
|
||||
def _calculate_kurtosis(self, returns: np.ndarray) -> float:
|
||||
"""Calculate kurtosis of returns"""
|
||||
mean = np.mean(returns)
|
||||
std = np.std(returns)
|
||||
return np.mean(((returns - mean) / std) ** 4) - 3 if std > 0 else 0
|
||||
284
apps/stock/analytics/src/analytics/regime.py
Normal file
284
apps/stock/analytics/src/analytics/regime.py
Normal file
|
|
@ -0,0 +1,284 @@
|
|||
import numpy as np
|
||||
import pandas as pd
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Dict, List, Tuple
|
||||
from scipy import stats
|
||||
from sklearn.mixture import GaussianMixture
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class RegimeDetector:
|
||||
"""
|
||||
Market regime detection using various statistical and ML methods
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.regimes = ['bull', 'bear', 'sideways', 'high_volatility', 'low_volatility']
|
||||
|
||||
def detect_current_regime(self, lookback_days: int = 60) -> Dict:
|
||||
"""
|
||||
Detect current market regime using multiple indicators
|
||||
"""
|
||||
# In real implementation, would fetch market data
|
||||
# For now, generate sample data
|
||||
market_data = self._generate_market_data(lookback_days)
|
||||
|
||||
# Calculate various regime indicators
|
||||
trend_regime = self._detect_trend_regime(market_data)
|
||||
volatility_regime = self._detect_volatility_regime(market_data)
|
||||
momentum_regime = self._detect_momentum_regime(market_data)
|
||||
|
||||
# Combine indicators for final regime
|
||||
regime, confidence = self._combine_regime_indicators(
|
||||
trend_regime,
|
||||
volatility_regime,
|
||||
momentum_regime
|
||||
)
|
||||
|
||||
return {
|
||||
'regime': regime,
|
||||
'confidence': confidence,
|
||||
'indicators': {
|
||||
'trend': trend_regime,
|
||||
'volatility': volatility_regime,
|
||||
'momentum': momentum_regime,
|
||||
'market_breadth': self._calculate_market_breadth(market_data),
|
||||
'fear_greed_index': self._calculate_fear_greed_index(market_data)
|
||||
},
|
||||
'sub_regimes': {
|
||||
'trend_strength': self._calculate_trend_strength(market_data),
|
||||
'volatility_percentile': self._calculate_volatility_percentile(market_data),
|
||||
'correlation_regime': self._detect_correlation_regime(market_data)
|
||||
}
|
||||
}
|
||||
|
||||
def _generate_market_data(self, days: int) -> pd.DataFrame:
|
||||
"""Generate sample market data for testing"""
|
||||
dates = pd.date_range(end=datetime.now(), periods=days, freq='D')
|
||||
|
||||
# Generate correlated returns for multiple assets
|
||||
n_assets = 10
|
||||
returns = np.random.multivariate_normal(
|
||||
mean=[0.0005] * n_assets,
|
||||
cov=np.eye(n_assets) * 0.0004 + np.ones((n_assets, n_assets)) * 0.0001,
|
||||
size=days
|
||||
)
|
||||
|
||||
# Create price series
|
||||
prices = pd.DataFrame(
|
||||
(1 + returns).cumprod(axis=0) * 100,
|
||||
index=dates,
|
||||
columns=[f'Asset_{i}' for i in range(n_assets)]
|
||||
)
|
||||
|
||||
# Add market index
|
||||
prices['Market'] = prices.mean(axis=1)
|
||||
|
||||
# Add volatility index (like VIX)
|
||||
prices['Volatility'] = pd.Series(returns[:, 0]).rolling(20).std() * np.sqrt(252) * 100
|
||||
|
||||
return prices
|
||||
|
||||
def _detect_trend_regime(self, data: pd.DataFrame) -> Dict:
|
||||
"""Detect trend regime using moving averages and linear regression"""
|
||||
market = data['Market']
|
||||
|
||||
# Calculate moving averages
|
||||
ma_short = market.rolling(20).mean()
|
||||
ma_long = market.rolling(50).mean()
|
||||
|
||||
# Trend strength
|
||||
current_price = market.iloc[-1]
|
||||
trend_score = (current_price - ma_long.iloc[-1]) / ma_long.iloc[-1]
|
||||
|
||||
# Linear regression trend
|
||||
x = np.arange(len(market))
|
||||
slope, _, r_value, _, _ = stats.linregress(x, market.values)
|
||||
|
||||
# Determine regime
|
||||
if trend_score > 0.05 and ma_short.iloc[-1] > ma_long.iloc[-1]:
|
||||
regime = 'bull'
|
||||
elif trend_score < -0.05 and ma_short.iloc[-1] < ma_long.iloc[-1]:
|
||||
regime = 'bear'
|
||||
else:
|
||||
regime = 'sideways'
|
||||
|
||||
return {
|
||||
'regime': regime,
|
||||
'trend_score': trend_score,
|
||||
'slope': slope,
|
||||
'r_squared': r_value ** 2
|
||||
}
|
||||
|
||||
def _detect_volatility_regime(self, data: pd.DataFrame) -> Dict:
|
||||
"""Detect volatility regime using GARCH-like analysis"""
|
||||
returns = data['Market'].pct_change().dropna()
|
||||
|
||||
# Calculate rolling volatility
|
||||
vol_short = returns.rolling(10).std() * np.sqrt(252)
|
||||
vol_long = returns.rolling(30).std() * np.sqrt(252)
|
||||
|
||||
current_vol = vol_short.iloc[-1]
|
||||
vol_percentile = stats.percentileofscore(vol_long.dropna(), current_vol)
|
||||
|
||||
# Volatility regime
|
||||
if vol_percentile > 75:
|
||||
regime = 'high_volatility'
|
||||
elif vol_percentile < 25:
|
||||
regime = 'low_volatility'
|
||||
else:
|
||||
regime = 'normal_volatility'
|
||||
|
||||
# Volatility of volatility
|
||||
vol_of_vol = vol_short.rolling(20).std().iloc[-1]
|
||||
|
||||
return {
|
||||
'regime': regime,
|
||||
'current_volatility': current_vol,
|
||||
'volatility_percentile': vol_percentile,
|
||||
'vol_of_vol': vol_of_vol
|
||||
}
|
||||
|
||||
def _detect_momentum_regime(self, data: pd.DataFrame) -> Dict:
|
||||
"""Detect momentum regime using RSI and rate of change"""
|
||||
market = data['Market']
|
||||
|
||||
# Calculate RSI
|
||||
rsi = self._calculate_rsi(market, period=14)
|
||||
|
||||
# Rate of change
|
||||
roc_short = (market.iloc[-1] / market.iloc[-5] - 1) * 100
|
||||
roc_long = (market.iloc[-1] / market.iloc[-20] - 1) * 100
|
||||
|
||||
# Momentum regime
|
||||
if rsi > 70 and roc_short > 0:
|
||||
regime = 'overbought'
|
||||
elif rsi < 30 and roc_short < 0:
|
||||
regime = 'oversold'
|
||||
elif roc_short > 2 and roc_long > 5:
|
||||
regime = 'strong_momentum'
|
||||
elif roc_short < -2 and roc_long < -5:
|
||||
regime = 'weak_momentum'
|
||||
else:
|
||||
regime = 'neutral_momentum'
|
||||
|
||||
return {
|
||||
'regime': regime,
|
||||
'rsi': rsi,
|
||||
'roc_short': roc_short,
|
||||
'roc_long': roc_long
|
||||
}
|
||||
|
||||
def _detect_correlation_regime(self, data: pd.DataFrame) -> str:
|
||||
"""Detect correlation regime among assets"""
|
||||
# Calculate rolling correlation
|
||||
asset_returns = data.iloc[:, :-2].pct_change().dropna()
|
||||
corr_matrix = asset_returns.rolling(30).corr()
|
||||
|
||||
# Average pairwise correlation
|
||||
n_assets = len(asset_returns.columns)
|
||||
avg_corr = (corr_matrix.sum().sum() - n_assets) / (n_assets * (n_assets - 1))
|
||||
current_avg_corr = avg_corr.iloc[-1]
|
||||
|
||||
if current_avg_corr > 0.7:
|
||||
return 'high_correlation'
|
||||
elif current_avg_corr < 0.3:
|
||||
return 'low_correlation'
|
||||
else:
|
||||
return 'normal_correlation'
|
||||
|
||||
def _calculate_rsi(self, prices: pd.Series, period: int = 14) -> float:
|
||||
"""Calculate RSI"""
|
||||
delta = prices.diff()
|
||||
gain = (delta.where(delta > 0, 0)).rolling(period).mean()
|
||||
loss = (-delta.where(delta < 0, 0)).rolling(period).mean()
|
||||
|
||||
rs = gain / loss
|
||||
rsi = 100 - (100 / (1 + rs))
|
||||
|
||||
return rsi.iloc[-1]
|
||||
|
||||
def _calculate_market_breadth(self, data: pd.DataFrame) -> float:
|
||||
"""Calculate market breadth (advance/decline ratio)"""
|
||||
# Calculate daily returns for all assets
|
||||
returns = data.iloc[:, :-2].pct_change().iloc[-1]
|
||||
|
||||
advancing = (returns > 0).sum()
|
||||
declining = (returns < 0).sum()
|
||||
|
||||
return advancing / (advancing + declining) if (advancing + declining) > 0 else 0.5
|
||||
|
||||
def _calculate_fear_greed_index(self, data: pd.DataFrame) -> float:
|
||||
"""Simplified fear & greed index"""
|
||||
# Combine multiple indicators
|
||||
volatility = data['Volatility'].iloc[-1]
|
||||
momentum = self._detect_momentum_regime(data)['roc_short']
|
||||
breadth = self._calculate_market_breadth(data)
|
||||
|
||||
# Normalize and combine
|
||||
vol_score = 1 - min(volatility / 40, 1) # Lower vol = higher greed
|
||||
momentum_score = (momentum + 10) / 20 # Normalize to 0-1
|
||||
|
||||
fear_greed = (vol_score + momentum_score + breadth) / 3
|
||||
|
||||
return fear_greed * 100 # 0 = extreme fear, 100 = extreme greed
|
||||
|
||||
def _calculate_trend_strength(self, data: pd.DataFrame) -> float:
|
||||
"""Calculate trend strength using ADX-like indicator"""
|
||||
market = data['Market']
|
||||
|
||||
# Calculate directional movement
|
||||
high = market.rolling(2).max()
|
||||
low = market.rolling(2).min()
|
||||
|
||||
plus_dm = (high - high.shift(1)).where(lambda x: x > 0, 0)
|
||||
minus_dm = (low.shift(1) - low).where(lambda x: x > 0, 0)
|
||||
|
||||
# Smooth and normalize
|
||||
period = 14
|
||||
plus_di = plus_dm.rolling(period).mean() / market.rolling(period).std()
|
||||
minus_di = minus_dm.rolling(period).mean() / market.rolling(period).std()
|
||||
|
||||
# Calculate trend strength
|
||||
dx = abs(plus_di - minus_di) / (plus_di + minus_di)
|
||||
adx = dx.rolling(period).mean().iloc[-1]
|
||||
|
||||
return min(adx * 100, 100) if not np.isnan(adx) else 50
|
||||
|
||||
def _calculate_volatility_percentile(self, data: pd.DataFrame) -> float:
|
||||
"""Calculate current volatility percentile"""
|
||||
volatility_regime = self._detect_volatility_regime(data)
|
||||
return volatility_regime['volatility_percentile']
|
||||
|
||||
def _combine_regime_indicators(
|
||||
self,
|
||||
trend: Dict,
|
||||
volatility: Dict,
|
||||
momentum: Dict
|
||||
) -> Tuple[str, float]:
|
||||
"""Combine multiple indicators to determine overall regime"""
|
||||
# Simple weighted combination
|
||||
regimes = []
|
||||
weights = []
|
||||
|
||||
# Trend regime
|
||||
if trend['regime'] in ['bull', 'bear']:
|
||||
regimes.append(trend['regime'])
|
||||
weights.append(abs(trend['trend_score']) * 10)
|
||||
|
||||
# Volatility regime
|
||||
if volatility['regime'] == 'high_volatility':
|
||||
regimes.append('high_volatility')
|
||||
weights.append(volatility['volatility_percentile'] / 100)
|
||||
|
||||
# Choose dominant regime
|
||||
if not regimes:
|
||||
return 'sideways', 0.5
|
||||
|
||||
# Weight by confidence
|
||||
dominant_idx = np.argmax(weights)
|
||||
regime = regimes[dominant_idx]
|
||||
confidence = min(weights[dominant_idx], 1.0)
|
||||
|
||||
return regime, confidence
|
||||
79
apps/stock/analytics/src/api/app.py
Normal file
79
apps/stock/analytics/src/api/app.py
Normal file
|
|
@ -0,0 +1,79 @@
|
|||
from fastapi import FastAPI, HTTPException
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from contextlib import asynccontextmanager
|
||||
import logging
|
||||
from typing import Dict, Any
|
||||
|
||||
from .endpoints import optimization, analytics, models
|
||||
from ..analytics.performance import PerformanceAnalyzer
|
||||
from ..analytics.regime import RegimeDetector
|
||||
from ..optimization.portfolio_optimizer import PortfolioOptimizer
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Global instances
|
||||
performance_analyzer = PerformanceAnalyzer()
|
||||
regime_detector = RegimeDetector()
|
||||
portfolio_optimizer = PortfolioOptimizer()
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
# Startup
|
||||
logger.info("Starting Trading Analytics Service...")
|
||||
# Initialize connections, load models, etc.
|
||||
yield
|
||||
# Shutdown
|
||||
logger.info("Shutting down Trading Analytics Service...")
|
||||
|
||||
# Create FastAPI app
|
||||
app = FastAPI(
|
||||
title="Trading Analytics Service",
|
||||
description="Complex analytics, optimization, and ML inference for trading",
|
||||
version="0.1.0",
|
||||
lifespan=lifespan
|
||||
)
|
||||
|
||||
# Configure CORS
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"], # Configure appropriately for production
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
# Include routers
|
||||
app.include_router(optimization.router, prefix="/optimize", tags=["optimization"])
|
||||
app.include_router(analytics.router, prefix="/analytics", tags=["analytics"])
|
||||
app.include_router(models.router, prefix="/models", tags=["models"])
|
||||
|
||||
@app.get("/")
|
||||
async def root():
|
||||
return {
|
||||
"service": "Trading Analytics",
|
||||
"status": "operational",
|
||||
"version": "0.1.0"
|
||||
}
|
||||
|
||||
@app.get("/health")
|
||||
async def health_check():
|
||||
return {
|
||||
"status": "healthy",
|
||||
"components": {
|
||||
"performance_analyzer": "operational",
|
||||
"regime_detector": "operational",
|
||||
"portfolio_optimizer": "operational"
|
||||
}
|
||||
}
|
||||
|
||||
# Dependency injection
|
||||
def get_performance_analyzer():
|
||||
return performance_analyzer
|
||||
|
||||
def get_regime_detector():
|
||||
return regime_detector
|
||||
|
||||
def get_portfolio_optimizer():
|
||||
return portfolio_optimizer
|
||||
163
apps/stock/analytics/src/api/endpoints/analytics.py
Normal file
163
apps/stock/analytics/src/api/endpoints/analytics.py
Normal file
|
|
@ -0,0 +1,163 @@
|
|||
from fastapi import APIRouter, HTTPException, Query, Depends
|
||||
from datetime import datetime, date
|
||||
from typing import List, Optional
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
from ...analytics.performance import PerformanceAnalyzer
|
||||
from ...analytics.regime import RegimeDetector
|
||||
from ..app import get_performance_analyzer, get_regime_detector
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
@router.get("/performance/{portfolio_id}")
|
||||
async def get_performance_metrics(
|
||||
portfolio_id: str,
|
||||
start_date: datetime = Query(..., description="Start date for analysis"),
|
||||
end_date: datetime = Query(..., description="End date for analysis"),
|
||||
analyzer: PerformanceAnalyzer = Depends(get_performance_analyzer)
|
||||
):
|
||||
"""
|
||||
Calculate comprehensive performance metrics for a portfolio
|
||||
"""
|
||||
try:
|
||||
# In real implementation, would fetch data from database
|
||||
# For now, using mock data
|
||||
metrics = analyzer.calculate_metrics(
|
||||
portfolio_id=portfolio_id,
|
||||
start_date=start_date,
|
||||
end_date=end_date
|
||||
)
|
||||
|
||||
return metrics
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Failed to calculate performance metrics: {str(e)}")
|
||||
|
||||
@router.get("/risk/{portfolio_id}")
|
||||
async def get_risk_metrics(
|
||||
portfolio_id: str,
|
||||
window: int = Query(252, description="Rolling window for risk calculations"),
|
||||
analyzer: PerformanceAnalyzer = Depends(get_performance_analyzer)
|
||||
):
|
||||
"""
|
||||
Calculate risk metrics including VaR and CVaR
|
||||
"""
|
||||
try:
|
||||
risk_metrics = analyzer.calculate_risk_metrics(
|
||||
portfolio_id=portfolio_id,
|
||||
window=window
|
||||
)
|
||||
|
||||
return risk_metrics
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Failed to calculate risk metrics: {str(e)}")
|
||||
|
||||
@router.get("/regime")
|
||||
async def detect_market_regime(
|
||||
lookback_days: int = Query(60, description="Days to look back for regime detection"),
|
||||
detector: RegimeDetector = Depends(get_regime_detector)
|
||||
):
|
||||
"""
|
||||
Detect current market regime using various indicators
|
||||
"""
|
||||
try:
|
||||
regime = detector.detect_current_regime(lookback_days=lookback_days)
|
||||
|
||||
return {
|
||||
"regime": regime['regime'],
|
||||
"confidence": regime['confidence'],
|
||||
"indicators": regime['indicators'],
|
||||
"timestamp": datetime.utcnow().isoformat()
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Failed to detect market regime: {str(e)}")
|
||||
|
||||
@router.post("/correlation")
|
||||
async def calculate_correlation_matrix(
|
||||
symbols: List[str],
|
||||
start_date: Optional[date] = None,
|
||||
end_date: Optional[date] = None,
|
||||
method: str = Query("pearson", pattern="^(pearson|spearman|kendall)$")
|
||||
):
|
||||
"""
|
||||
Calculate correlation matrix for given symbols
|
||||
"""
|
||||
try:
|
||||
# In real implementation, would fetch price data
|
||||
# For now, return mock correlation matrix
|
||||
n = len(symbols)
|
||||
|
||||
# Generate realistic correlation matrix
|
||||
np.random.seed(42)
|
||||
A = np.random.randn(n, n)
|
||||
correlation_matrix = np.dot(A, A.T)
|
||||
|
||||
# Normalize to correlation
|
||||
D = np.sqrt(np.diag(np.diag(correlation_matrix)))
|
||||
correlation_matrix = np.linalg.inv(D) @ correlation_matrix @ np.linalg.inv(D)
|
||||
|
||||
return {
|
||||
"symbols": symbols,
|
||||
"matrix": correlation_matrix.tolist(),
|
||||
"method": method
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Failed to calculate correlation: {str(e)}")
|
||||
|
||||
@router.get("/backtest/{backtest_id}")
|
||||
async def analyze_backtest_results(
|
||||
backtest_id: str,
|
||||
analyzer: PerformanceAnalyzer = Depends(get_performance_analyzer)
|
||||
):
|
||||
"""
|
||||
Analyze results from a completed backtest
|
||||
"""
|
||||
try:
|
||||
analysis = analyzer.analyze_backtest(backtest_id)
|
||||
|
||||
return {
|
||||
"backtest_id": backtest_id,
|
||||
"metrics": analysis['metrics'],
|
||||
"statistics": analysis['statistics'],
|
||||
"risk_analysis": analysis['risk_analysis'],
|
||||
"trade_analysis": analysis['trade_analysis']
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Failed to analyze backtest: {str(e)}")
|
||||
|
||||
@router.post("/attribution")
|
||||
async def performance_attribution(
|
||||
portfolio_id: str,
|
||||
benchmark: str,
|
||||
start_date: date,
|
||||
end_date: date,
|
||||
method: str = Query("brinson", pattern="^(brinson|factor|risk)$")
|
||||
):
|
||||
"""
|
||||
Perform performance attribution analysis
|
||||
"""
|
||||
try:
|
||||
# Placeholder for attribution analysis
|
||||
return {
|
||||
"portfolio_id": portfolio_id,
|
||||
"benchmark": benchmark,
|
||||
"period": {
|
||||
"start": start_date.isoformat(),
|
||||
"end": end_date.isoformat()
|
||||
},
|
||||
"method": method,
|
||||
"attribution": {
|
||||
"allocation_effect": 0.0023,
|
||||
"selection_effect": 0.0045,
|
||||
"interaction_effect": 0.0001,
|
||||
"total_effect": 0.0069
|
||||
}
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Failed to perform attribution: {str(e)}")
|
||||
182
apps/stock/analytics/src/api/endpoints/models.py
Normal file
182
apps/stock/analytics/src/api/endpoints/models.py
Normal file
|
|
@ -0,0 +1,182 @@
|
|||
from fastapi import APIRouter, HTTPException, UploadFile, File
|
||||
from pydantic import BaseModel
|
||||
from typing import Dict, Any, List, Optional
|
||||
import numpy as np
|
||||
import onnxruntime as ort
|
||||
import json
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
# In-memory model storage (in production, use proper model registry)
|
||||
loaded_models = {}
|
||||
|
||||
class PredictionRequest(BaseModel):
|
||||
model_id: str
|
||||
features: Dict[str, float]
|
||||
|
||||
class PredictionResponse(BaseModel):
|
||||
model_id: str
|
||||
prediction: float
|
||||
probability: Optional[Dict[str, float]] = None
|
||||
metadata: Optional[Dict[str, Any]] = None
|
||||
|
||||
class ModelInfo(BaseModel):
|
||||
model_id: str
|
||||
name: str
|
||||
version: str
|
||||
type: str
|
||||
input_features: List[str]
|
||||
output_shape: List[int]
|
||||
metadata: Dict[str, Any]
|
||||
|
||||
@router.post("/predict", response_model=PredictionResponse)
|
||||
async def predict(request: PredictionRequest):
|
||||
"""
|
||||
Run inference on a loaded model
|
||||
"""
|
||||
try:
|
||||
if request.model_id not in loaded_models:
|
||||
raise HTTPException(status_code=404, detail=f"Model {request.model_id} not found")
|
||||
|
||||
model_info = loaded_models[request.model_id]
|
||||
session = model_info['session']
|
||||
|
||||
# Prepare input
|
||||
input_features = model_info['input_features']
|
||||
input_array = np.array([[request.features.get(f, 0.0) for f in input_features]], dtype=np.float32)
|
||||
|
||||
# Run inference
|
||||
input_name = session.get_inputs()[0].name
|
||||
output = session.run(None, {input_name: input_array})
|
||||
|
||||
# Process output
|
||||
prediction = float(output[0][0])
|
||||
|
||||
# For classification models, get probabilities
|
||||
probability = None
|
||||
if model_info['type'] == 'classification' and len(output[0][0]) > 1:
|
||||
probability = {
|
||||
f"class_{i}": float(p)
|
||||
for i, p in enumerate(output[0][0])
|
||||
}
|
||||
|
||||
return PredictionResponse(
|
||||
model_id=request.model_id,
|
||||
prediction=prediction,
|
||||
probability=probability,
|
||||
metadata={
|
||||
"model_version": model_info['version'],
|
||||
"timestamp": np.datetime64('now').tolist()
|
||||
}
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Prediction failed: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"Prediction failed: {str(e)}")
|
||||
|
||||
@router.post("/load")
|
||||
async def load_model(
|
||||
model_id: str,
|
||||
model_file: UploadFile = File(...),
|
||||
metadata: str = None
|
||||
):
|
||||
"""
|
||||
Load an ONNX model for inference
|
||||
"""
|
||||
try:
|
||||
# Read model file
|
||||
content = await model_file.read()
|
||||
|
||||
# Create ONNX session
|
||||
session = ort.InferenceSession(content)
|
||||
|
||||
# Parse metadata
|
||||
model_metadata = json.loads(metadata) if metadata else {}
|
||||
|
||||
# Extract model info
|
||||
input_features = [inp.name for inp in session.get_inputs()]
|
||||
output_shape = [out.shape for out in session.get_outputs()]
|
||||
|
||||
# Store model
|
||||
loaded_models[model_id] = {
|
||||
'session': session,
|
||||
'input_features': model_metadata.get('feature_names', input_features),
|
||||
'type': model_metadata.get('model_type', 'regression'),
|
||||
'version': model_metadata.get('version', '1.0'),
|
||||
'metadata': model_metadata
|
||||
}
|
||||
|
||||
return {
|
||||
"message": f"Model {model_id} loaded successfully",
|
||||
"input_features": input_features,
|
||||
"output_shape": output_shape
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to load model: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"Failed to load model: {str(e)}")
|
||||
|
||||
@router.get("/list", response_model=List[ModelInfo])
|
||||
async def list_models():
|
||||
"""
|
||||
List all loaded models
|
||||
"""
|
||||
models = []
|
||||
|
||||
for model_id, info in loaded_models.items():
|
||||
session = info['session']
|
||||
models.append(ModelInfo(
|
||||
model_id=model_id,
|
||||
name=info['metadata'].get('name', model_id),
|
||||
version=info['version'],
|
||||
type=info['type'],
|
||||
input_features=info['input_features'],
|
||||
output_shape=[out.shape for out in session.get_outputs()],
|
||||
metadata=info['metadata']
|
||||
))
|
||||
|
||||
return models
|
||||
|
||||
@router.delete("/{model_id}")
|
||||
async def unload_model(model_id: str):
|
||||
"""
|
||||
Unload a model from memory
|
||||
"""
|
||||
if model_id not in loaded_models:
|
||||
raise HTTPException(status_code=404, detail=f"Model {model_id} not found")
|
||||
|
||||
del loaded_models[model_id]
|
||||
|
||||
return {"message": f"Model {model_id} unloaded successfully"}
|
||||
|
||||
@router.post("/batch_predict")
|
||||
async def batch_predict(
|
||||
model_id: str,
|
||||
features: List[Dict[str, float]]
|
||||
):
|
||||
"""
|
||||
Run batch predictions
|
||||
"""
|
||||
try:
|
||||
if model_id not in loaded_models:
|
||||
raise HTTPException(status_code=404, detail=f"Model {model_id} not found")
|
||||
|
||||
predictions = []
|
||||
|
||||
for feature_set in features:
|
||||
request = PredictionRequest(model_id=model_id, features=feature_set)
|
||||
result = await predict(request)
|
||||
predictions.append(result.dict())
|
||||
|
||||
return {
|
||||
"model_id": model_id,
|
||||
"predictions": predictions,
|
||||
"count": len(predictions)
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Batch prediction failed: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=f"Batch prediction failed: {str(e)}")
|
||||
120
apps/stock/analytics/src/api/endpoints/optimization.py
Normal file
120
apps/stock/analytics/src/api/endpoints/optimization.py
Normal file
|
|
@ -0,0 +1,120 @@
|
|||
from fastapi import APIRouter, HTTPException, Depends
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import List, Optional, Dict
|
||||
import numpy as np
|
||||
|
||||
from ...optimization.portfolio_optimizer import PortfolioOptimizer
|
||||
from ..app import get_portfolio_optimizer
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
class OptimizationConstraints(BaseModel):
|
||||
min_weight: Optional[float] = Field(0.0, ge=0.0, le=1.0)
|
||||
max_weight: Optional[float] = Field(1.0, ge=0.0, le=1.0)
|
||||
target_return: Optional[float] = None
|
||||
max_risk: Optional[float] = None
|
||||
|
||||
class PortfolioOptimizationRequest(BaseModel):
|
||||
symbols: List[str]
|
||||
returns: List[List[float]]
|
||||
constraints: Optional[OptimizationConstraints] = None
|
||||
method: str = Field("mean_variance", pattern="^(mean_variance|min_variance|max_sharpe|risk_parity|black_litterman)$")
|
||||
|
||||
class PortfolioWeights(BaseModel):
|
||||
symbols: List[str]
|
||||
weights: List[float]
|
||||
expected_return: float
|
||||
expected_risk: float
|
||||
sharpe_ratio: float
|
||||
|
||||
@router.post("/portfolio", response_model=PortfolioWeights)
|
||||
async def optimize_portfolio(
|
||||
request: PortfolioOptimizationRequest,
|
||||
optimizer: PortfolioOptimizer = Depends(get_portfolio_optimizer)
|
||||
):
|
||||
"""
|
||||
Optimize portfolio weights using various methods
|
||||
"""
|
||||
try:
|
||||
# Convert returns to numpy array
|
||||
returns_array = np.array(request.returns)
|
||||
|
||||
# Validate dimensions
|
||||
if len(request.symbols) != returns_array.shape[1]:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail="Number of symbols must match number of return columns"
|
||||
)
|
||||
|
||||
# Run optimization
|
||||
result = optimizer.optimize(
|
||||
returns=returns_array,
|
||||
method=request.method,
|
||||
constraints=request.constraints.dict() if request.constraints else None
|
||||
)
|
||||
|
||||
return PortfolioWeights(
|
||||
symbols=request.symbols,
|
||||
weights=result['weights'].tolist(),
|
||||
expected_return=float(result['expected_return']),
|
||||
expected_risk=float(result['expected_risk']),
|
||||
sharpe_ratio=float(result['sharpe_ratio'])
|
||||
)
|
||||
|
||||
except ValueError as e:
|
||||
raise HTTPException(status_code=400, detail=str(e))
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Optimization failed: {str(e)}")
|
||||
|
||||
@router.post("/efficient_frontier")
|
||||
async def calculate_efficient_frontier(
|
||||
request: PortfolioOptimizationRequest,
|
||||
num_portfolios: int = 100,
|
||||
optimizer: PortfolioOptimizer = Depends(get_portfolio_optimizer)
|
||||
):
|
||||
"""
|
||||
Calculate the efficient frontier for a set of assets
|
||||
"""
|
||||
try:
|
||||
returns_array = np.array(request.returns)
|
||||
|
||||
frontier = optimizer.calculate_efficient_frontier(
|
||||
returns=returns_array,
|
||||
num_portfolios=num_portfolios
|
||||
)
|
||||
|
||||
return {
|
||||
"symbols": request.symbols,
|
||||
"frontier": frontier
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Failed to calculate efficient frontier: {str(e)}")
|
||||
|
||||
@router.post("/rebalance")
|
||||
async def suggest_rebalance(
|
||||
current_weights: Dict[str, float],
|
||||
target_weights: Dict[str, float],
|
||||
constraints: Optional[Dict[str, float]] = None
|
||||
):
|
||||
"""
|
||||
Suggest trades to rebalance portfolio from current to target weights
|
||||
"""
|
||||
try:
|
||||
# Calculate differences
|
||||
trades = {}
|
||||
for symbol in target_weights:
|
||||
current = current_weights.get(symbol, 0.0)
|
||||
target = target_weights[symbol]
|
||||
diff = target - current
|
||||
|
||||
if abs(diff) > 0.001: # Ignore tiny differences
|
||||
trades[symbol] = diff
|
||||
|
||||
return {
|
||||
"trades": trades,
|
||||
"total_turnover": sum(abs(t) for t in trades.values())
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
raise HTTPException(status_code=500, detail=f"Rebalance calculation failed: {str(e)}")
|
||||
481
apps/stock/analytics/src/ml/feature_engineering.py
Normal file
481
apps/stock/analytics/src/ml/feature_engineering.py
Normal file
|
|
@ -0,0 +1,481 @@
|
|||
import numpy as np
|
||||
import pandas as pd
|
||||
from typing import Dict, List, Tuple, Optional, Union
|
||||
import talib
|
||||
from scipy import stats
|
||||
from sklearn.preprocessing import StandardScaler, RobustScaler
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class FeatureEngineer:
|
||||
"""
|
||||
Feature engineering for financial ML models
|
||||
"""
|
||||
|
||||
def __init__(self, lookback_periods: List[int] = None):
|
||||
self.lookback_periods = lookback_periods or [5, 10, 20, 50, 100, 200]
|
||||
self.scaler = RobustScaler() # Robust to outliers
|
||||
self.feature_names: List[str] = []
|
||||
|
||||
def create_features(
|
||||
self,
|
||||
data: pd.DataFrame,
|
||||
include_technical: bool = True,
|
||||
include_microstructure: bool = True,
|
||||
include_fundamental: bool = False,
|
||||
include_sentiment: bool = False
|
||||
) -> pd.DataFrame:
|
||||
"""
|
||||
Create comprehensive feature set for ML models
|
||||
"""
|
||||
features = pd.DataFrame(index=data.index)
|
||||
|
||||
# Price-based features
|
||||
logger.info("Creating price-based features...")
|
||||
price_features = self._create_price_features(data)
|
||||
features = pd.concat([features, price_features], axis=1)
|
||||
|
||||
# Technical indicators
|
||||
if include_technical:
|
||||
logger.info("Creating technical indicators...")
|
||||
tech_features = self._create_technical_features(data)
|
||||
features = pd.concat([features, tech_features], axis=1)
|
||||
|
||||
# Microstructure features
|
||||
if include_microstructure:
|
||||
logger.info("Creating microstructure features...")
|
||||
micro_features = self._create_microstructure_features(data)
|
||||
features = pd.concat([features, micro_features], axis=1)
|
||||
|
||||
# Fundamental features (if available)
|
||||
if include_fundamental and 'earnings' in data.columns:
|
||||
logger.info("Creating fundamental features...")
|
||||
fund_features = self._create_fundamental_features(data)
|
||||
features = pd.concat([features, fund_features], axis=1)
|
||||
|
||||
# Sentiment features (if available)
|
||||
if include_sentiment and 'sentiment' in data.columns:
|
||||
logger.info("Creating sentiment features...")
|
||||
sent_features = self._create_sentiment_features(data)
|
||||
features = pd.concat([features, sent_features], axis=1)
|
||||
|
||||
# Time-based features
|
||||
logger.info("Creating time-based features...")
|
||||
time_features = self._create_time_features(data)
|
||||
features = pd.concat([features, time_features], axis=1)
|
||||
|
||||
# Cross-sectional features (if multiple symbols)
|
||||
if 'symbol' in data.columns and data['symbol'].nunique() > 1:
|
||||
logger.info("Creating cross-sectional features...")
|
||||
cross_features = self._create_cross_sectional_features(data)
|
||||
features = pd.concat([features, cross_features], axis=1)
|
||||
|
||||
# Store feature names
|
||||
self.feature_names = features.columns.tolist()
|
||||
|
||||
# Handle missing values
|
||||
features = self._handle_missing_values(features)
|
||||
|
||||
return features
|
||||
|
||||
def _create_price_features(self, data: pd.DataFrame) -> pd.DataFrame:
|
||||
"""Create price-based features"""
|
||||
features = pd.DataFrame(index=data.index)
|
||||
|
||||
# Returns at different horizons
|
||||
for period in self.lookback_periods:
|
||||
features[f'returns_{period}'] = data['close'].pct_change(period)
|
||||
features[f'log_returns_{period}'] = np.log(data['close'] / data['close'].shift(period))
|
||||
|
||||
# Price ratios
|
||||
features['high_low_ratio'] = data['high'] / data['low']
|
||||
features['close_open_ratio'] = data['close'] / data['open']
|
||||
|
||||
# Price position in range
|
||||
features['price_position'] = (data['close'] - data['low']) / (data['high'] - data['low']).replace(0, np.nan)
|
||||
|
||||
# Volume-weighted metrics
|
||||
if 'volume' in data.columns:
|
||||
features['vwap'] = (data['close'] * data['volume']).rolling(20).sum() / data['volume'].rolling(20).sum()
|
||||
features['volume_ratio'] = data['volume'] / data['volume'].rolling(20).mean()
|
||||
features['dollar_volume'] = data['close'] * data['volume']
|
||||
|
||||
# Volatility measures
|
||||
for period in [5, 20, 50]:
|
||||
features[f'volatility_{period}'] = data['close'].pct_change().rolling(period).std() * np.sqrt(252)
|
||||
features[f'realized_var_{period}'] = (data['close'].pct_change() ** 2).rolling(period).sum()
|
||||
|
||||
# Price momentum
|
||||
features['momentum_1m'] = data['close'] / data['close'].shift(20) - 1
|
||||
features['momentum_3m'] = data['close'] / data['close'].shift(60) - 1
|
||||
features['momentum_6m'] = data['close'] / data['close'].shift(120) - 1
|
||||
|
||||
# Relative strength
|
||||
for short, long in [(10, 30), (20, 50), (50, 200)]:
|
||||
features[f'rs_{short}_{long}'] = (
|
||||
data['close'].rolling(short).mean() /
|
||||
data['close'].rolling(long).mean()
|
||||
)
|
||||
|
||||
return features
|
||||
|
||||
def _create_technical_features(self, data: pd.DataFrame) -> pd.DataFrame:
|
||||
"""Create technical indicator features"""
|
||||
features = pd.DataFrame(index=data.index)
|
||||
|
||||
# Moving averages
|
||||
for period in self.lookback_periods:
|
||||
sma = talib.SMA(data['close'].values, timeperiod=period)
|
||||
ema = talib.EMA(data['close'].values, timeperiod=period)
|
||||
features[f'sma_{period}'] = sma
|
||||
features[f'ema_{period}'] = ema
|
||||
features[f'price_to_sma_{period}'] = data['close'] / sma
|
||||
|
||||
# Bollinger Bands
|
||||
for period in [20, 50]:
|
||||
upper, middle, lower = talib.BBANDS(
|
||||
data['close'].values,
|
||||
timeperiod=period,
|
||||
nbdevup=2,
|
||||
nbdevdn=2
|
||||
)
|
||||
features[f'bb_upper_{period}'] = upper
|
||||
features[f'bb_lower_{period}'] = lower
|
||||
features[f'bb_width_{period}'] = (upper - lower) / middle
|
||||
features[f'bb_position_{period}'] = (data['close'] - lower) / (upper - lower)
|
||||
|
||||
# RSI
|
||||
for period in [14, 28]:
|
||||
features[f'rsi_{period}'] = talib.RSI(data['close'].values, timeperiod=period)
|
||||
|
||||
# MACD
|
||||
macd, signal, hist = talib.MACD(data['close'].values)
|
||||
features['macd'] = macd
|
||||
features['macd_signal'] = signal
|
||||
features['macd_hist'] = hist
|
||||
|
||||
# Stochastic
|
||||
slowk, slowd = talib.STOCH(
|
||||
data['high'].values,
|
||||
data['low'].values,
|
||||
data['close'].values
|
||||
)
|
||||
features['stoch_k'] = slowk
|
||||
features['stoch_d'] = slowd
|
||||
|
||||
# ADX (Average Directional Index)
|
||||
features['adx'] = talib.ADX(
|
||||
data['high'].values,
|
||||
data['low'].values,
|
||||
data['close'].values
|
||||
)
|
||||
|
||||
# ATR (Average True Range)
|
||||
for period in [14, 20]:
|
||||
features[f'atr_{period}'] = talib.ATR(
|
||||
data['high'].values,
|
||||
data['low'].values,
|
||||
data['close'].values,
|
||||
timeperiod=period
|
||||
)
|
||||
|
||||
# CCI (Commodity Channel Index)
|
||||
features['cci'] = talib.CCI(
|
||||
data['high'].values,
|
||||
data['low'].values,
|
||||
data['close'].values
|
||||
)
|
||||
|
||||
# Williams %R
|
||||
features['williams_r'] = talib.WILLR(
|
||||
data['high'].values,
|
||||
data['low'].values,
|
||||
data['close'].values
|
||||
)
|
||||
|
||||
# OBV (On Balance Volume)
|
||||
if 'volume' in data.columns:
|
||||
features['obv'] = talib.OBV(data['close'].values, data['volume'].values)
|
||||
features['obv_ema'] = talib.EMA(features['obv'].values, timeperiod=20)
|
||||
|
||||
return features
|
||||
|
||||
def _create_microstructure_features(self, data: pd.DataFrame) -> pd.DataFrame:
|
||||
"""Create market microstructure features"""
|
||||
features = pd.DataFrame(index=data.index)
|
||||
|
||||
# Spread estimation (using high-low)
|
||||
features['hl_spread'] = 2 * (data['high'] - data['low']) / (data['high'] + data['low'])
|
||||
features['hl_spread_ma'] = features['hl_spread'].rolling(20).mean()
|
||||
|
||||
# Roll's implied spread
|
||||
if len(data) > 2:
|
||||
returns = data['close'].pct_change()
|
||||
features['roll_spread'] = 2 * np.sqrt(-returns.rolling(20).cov(returns.shift(1)))
|
||||
|
||||
# Amihud illiquidity
|
||||
if 'volume' in data.columns:
|
||||
features['amihud'] = (returns.abs() / (data['volume'] * data['close'])).rolling(20).mean() * 1e6
|
||||
features['log_amihud'] = np.log(features['amihud'].replace(0, np.nan) + 1e-10)
|
||||
|
||||
# Kyle's lambda (price impact)
|
||||
if 'volume' in data.columns:
|
||||
# Simplified version using rolling regression
|
||||
for period in [20, 50]:
|
||||
price_changes = data['close'].pct_change()
|
||||
signed_volume = data['volume'] * np.sign(price_changes)
|
||||
|
||||
# Rolling correlation as proxy for Kyle's lambda
|
||||
features[f'kyle_lambda_{period}'] = (
|
||||
price_changes.rolling(period).corr(signed_volume) *
|
||||
price_changes.rolling(period).std() /
|
||||
signed_volume.rolling(period).std()
|
||||
)
|
||||
|
||||
# Intraday patterns
|
||||
if 'timestamp' in data.columns:
|
||||
data['hour'] = pd.to_datetime(data['timestamp']).dt.hour
|
||||
data['minute'] = pd.to_datetime(data['timestamp']).dt.minute
|
||||
|
||||
# Time since market open (assuming 9:30 AM open)
|
||||
features['minutes_since_open'] = (data['hour'] - 9) * 60 + data['minute'] - 30
|
||||
features['minutes_to_close'] = 390 - features['minutes_since_open'] # 6.5 hour day
|
||||
|
||||
# Normalized time of day
|
||||
features['time_of_day_norm'] = features['minutes_since_open'] / 390
|
||||
|
||||
# Order flow imbalance proxy
|
||||
features['high_low_imbalance'] = (data['high'] - data['close']) / (data['close'] - data['low'] + 1e-10)
|
||||
features['close_position_in_range'] = (data['close'] - data['low']) / (data['high'] - data['low'] + 1e-10)
|
||||
|
||||
return features
|
||||
|
||||
def _create_fundamental_features(self, data: pd.DataFrame) -> pd.DataFrame:
|
||||
"""Create fundamental analysis features"""
|
||||
features = pd.DataFrame(index=data.index)
|
||||
|
||||
# Price to earnings
|
||||
if 'earnings' in data.columns:
|
||||
features['pe_ratio'] = data['close'] / data['earnings']
|
||||
features['earnings_yield'] = data['earnings'] / data['close']
|
||||
features['pe_relative'] = features['pe_ratio'] / features['pe_ratio'].rolling(252).mean()
|
||||
|
||||
# Price to book
|
||||
if 'book_value' in data.columns:
|
||||
features['pb_ratio'] = data['close'] / data['book_value']
|
||||
features['pb_relative'] = features['pb_ratio'] / features['pb_ratio'].rolling(252).mean()
|
||||
|
||||
# Dividend yield
|
||||
if 'dividends' in data.columns:
|
||||
features['dividend_yield'] = data['dividends'].rolling(252).sum() / data['close']
|
||||
features['dividend_growth'] = data['dividends'].pct_change(252)
|
||||
|
||||
# Sales/Revenue metrics
|
||||
if 'revenue' in data.columns:
|
||||
features['price_to_sales'] = data['close'] * data['shares_outstanding'] / data['revenue']
|
||||
features['revenue_growth'] = data['revenue'].pct_change(4) # YoY for quarterly
|
||||
|
||||
# Profitability metrics
|
||||
if 'net_income' in data.columns and 'total_assets' in data.columns:
|
||||
features['roe'] = data['net_income'] / data['shareholders_equity']
|
||||
features['roa'] = data['net_income'] / data['total_assets']
|
||||
features['profit_margin'] = data['net_income'] / data['revenue']
|
||||
|
||||
return features
|
||||
|
||||
def _create_sentiment_features(self, data: pd.DataFrame) -> pd.DataFrame:
|
||||
"""Create sentiment-based features"""
|
||||
features = pd.DataFrame(index=data.index)
|
||||
|
||||
if 'sentiment' in data.columns:
|
||||
# Raw sentiment
|
||||
features['sentiment'] = data['sentiment']
|
||||
features['sentiment_ma'] = data['sentiment'].rolling(20).mean()
|
||||
features['sentiment_std'] = data['sentiment'].rolling(20).std()
|
||||
|
||||
# Sentiment momentum
|
||||
features['sentiment_change'] = data['sentiment'].pct_change(5)
|
||||
features['sentiment_momentum'] = data['sentiment'] - data['sentiment'].shift(20)
|
||||
|
||||
# Sentiment extremes
|
||||
features['sentiment_zscore'] = (
|
||||
(data['sentiment'] - features['sentiment_ma']) /
|
||||
features['sentiment_std']
|
||||
)
|
||||
|
||||
# Sentiment divergence from price
|
||||
price_zscore = (data['close'] - data['close'].rolling(20).mean()) / data['close'].rolling(20).std()
|
||||
features['sentiment_price_divergence'] = features['sentiment_zscore'] - price_zscore
|
||||
|
||||
# News volume features
|
||||
if 'news_count' in data.columns:
|
||||
features['news_volume'] = data['news_count']
|
||||
features['news_volume_ma'] = data['news_count'].rolling(5).mean()
|
||||
features['news_spike'] = data['news_count'] / features['news_volume_ma']
|
||||
|
||||
# Social media features
|
||||
if 'twitter_mentions' in data.columns:
|
||||
features['social_volume'] = data['twitter_mentions']
|
||||
features['social_momentum'] = data['twitter_mentions'].pct_change(1)
|
||||
features['social_vs_avg'] = data['twitter_mentions'] / data['twitter_mentions'].rolling(20).mean()
|
||||
|
||||
return features
|
||||
|
||||
def _create_time_features(self, data: pd.DataFrame) -> pd.DataFrame:
|
||||
"""Create time-based features"""
|
||||
features = pd.DataFrame(index=data.index)
|
||||
|
||||
if 'timestamp' in data.columns:
|
||||
timestamps = pd.to_datetime(data['timestamp'])
|
||||
|
||||
# Day of week
|
||||
features['day_of_week'] = timestamps.dt.dayofweek
|
||||
features['is_monday'] = (features['day_of_week'] == 0).astype(int)
|
||||
features['is_friday'] = (features['day_of_week'] == 4).astype(int)
|
||||
|
||||
# Month
|
||||
features['month'] = timestamps.dt.month
|
||||
features['is_quarter_end'] = timestamps.dt.month.isin([3, 6, 9, 12]).astype(int)
|
||||
features['is_year_end'] = timestamps.dt.month.eq(12).astype(int)
|
||||
|
||||
# Trading day in month
|
||||
features['trading_day_of_month'] = timestamps.dt.day
|
||||
features['trading_day_of_year'] = timestamps.dt.dayofyear
|
||||
|
||||
# Seasonality features
|
||||
features['sin_day_of_year'] = np.sin(2 * np.pi * features['trading_day_of_year'] / 365)
|
||||
features['cos_day_of_year'] = np.cos(2 * np.pi * features['trading_day_of_year'] / 365)
|
||||
|
||||
# Options expiration week (third Friday)
|
||||
features['is_opex_week'] = self._is_options_expiration_week(timestamps)
|
||||
|
||||
# Fed meeting weeks (approximate)
|
||||
features['is_fed_week'] = self._is_fed_meeting_week(timestamps)
|
||||
|
||||
return features
|
||||
|
||||
def _create_cross_sectional_features(self, data: pd.DataFrame) -> pd.DataFrame:
|
||||
"""Create features comparing across multiple symbols"""
|
||||
features = pd.DataFrame(index=data.index)
|
||||
|
||||
# Calculate market averages
|
||||
market_returns = data.groupby('timestamp')['close'].mean().pct_change()
|
||||
market_volume = data.groupby('timestamp')['volume'].mean()
|
||||
|
||||
# Relative performance
|
||||
data['returns'] = data.groupby('symbol')['close'].pct_change()
|
||||
features['relative_returns'] = data['returns'] - market_returns[data['timestamp']].values
|
||||
features['relative_volume'] = data['volume'] / market_volume[data['timestamp']].values
|
||||
|
||||
# Sector/market correlation
|
||||
for period in [20, 50]:
|
||||
rolling_corr = data.groupby('symbol')['returns'].rolling(period).corr(market_returns)
|
||||
features[f'market_correlation_{period}'] = rolling_corr
|
||||
|
||||
# Cross-sectional momentum
|
||||
features['cross_sectional_rank'] = data.groupby('timestamp')['returns'].rank(pct=True)
|
||||
|
||||
return features
|
||||
|
||||
def _handle_missing_values(self, features: pd.DataFrame) -> pd.DataFrame:
|
||||
"""Handle missing values in features"""
|
||||
# Forward fill for small gaps
|
||||
features = features.fillna(method='ffill', limit=5)
|
||||
|
||||
# For remaining NaNs, use median of non-missing values
|
||||
for col in features.columns:
|
||||
if features[col].isna().any():
|
||||
median_val = features[col].median()
|
||||
features[col].fillna(median_val, inplace=True)
|
||||
|
||||
# Replace any infinities
|
||||
features = features.replace([np.inf, -np.inf], np.nan)
|
||||
features = features.fillna(0)
|
||||
|
||||
return features
|
||||
|
||||
def _is_options_expiration_week(self, timestamps: pd.Series) -> pd.Series:
|
||||
"""Identify options expiration weeks (third Friday of month)"""
|
||||
# This is a simplified version
|
||||
is_third_week = (timestamps.dt.day >= 15) & (timestamps.dt.day <= 21)
|
||||
is_friday = timestamps.dt.dayofweek == 4
|
||||
return (is_third_week & is_friday).astype(int)
|
||||
|
||||
def _is_fed_meeting_week(self, timestamps: pd.Series) -> pd.Series:
|
||||
"""Identify approximate Fed meeting weeks"""
|
||||
# Fed typically meets 8 times per year, roughly every 6 weeks
|
||||
# This is a simplified approximation
|
||||
week_of_year = timestamps.dt.isocalendar().week
|
||||
return (week_of_year % 6 == 0).astype(int)
|
||||
|
||||
def transform_features(
|
||||
self,
|
||||
features: pd.DataFrame,
|
||||
method: str = 'robust',
|
||||
clip_outliers: bool = True,
|
||||
clip_quantile: float = 0.01
|
||||
) -> pd.DataFrame:
|
||||
"""
|
||||
Transform features for ML models
|
||||
"""
|
||||
transformed = features.copy()
|
||||
|
||||
# Clip outliers if requested
|
||||
if clip_outliers:
|
||||
lower = features.quantile(clip_quantile)
|
||||
upper = features.quantile(1 - clip_quantile)
|
||||
transformed = features.clip(lower=lower, upper=upper, axis=1)
|
||||
|
||||
# Scale features
|
||||
if method == 'robust':
|
||||
scaler = RobustScaler()
|
||||
elif method == 'standard':
|
||||
scaler = StandardScaler()
|
||||
else:
|
||||
raise ValueError(f"Unknown scaling method: {method}")
|
||||
|
||||
scaled_values = scaler.fit_transform(transformed)
|
||||
transformed = pd.DataFrame(
|
||||
scaled_values,
|
||||
index=features.index,
|
||||
columns=features.columns
|
||||
)
|
||||
|
||||
self.scaler = scaler
|
||||
|
||||
return transformed
|
||||
|
||||
def get_feature_importance(
|
||||
self,
|
||||
features: pd.DataFrame,
|
||||
target: pd.Series,
|
||||
method: str = 'mutual_info'
|
||||
) -> pd.DataFrame:
|
||||
"""
|
||||
Calculate feature importance scores
|
||||
"""
|
||||
importance_scores = {}
|
||||
|
||||
if method == 'mutual_info':
|
||||
from sklearn.feature_selection import mutual_info_regression
|
||||
scores = mutual_info_regression(features, target)
|
||||
importance_scores['mutual_info'] = scores
|
||||
|
||||
elif method == 'correlation':
|
||||
scores = features.corrwith(target).abs()
|
||||
importance_scores['correlation'] = scores.values
|
||||
|
||||
elif method == 'random_forest':
|
||||
from sklearn.ensemble import RandomForestRegressor
|
||||
rf = RandomForestRegressor(n_estimators=100, random_state=42)
|
||||
rf.fit(features, target)
|
||||
importance_scores['rf_importance'] = rf.feature_importances_
|
||||
|
||||
# Create DataFrame with results
|
||||
importance_df = pd.DataFrame(
|
||||
importance_scores,
|
||||
index=features.columns
|
||||
).sort_values(by=list(importance_scores.keys())[0], ascending=False)
|
||||
|
||||
return importance_df
|
||||
354
apps/stock/analytics/src/optimization/portfolio_optimizer.py
Normal file
354
apps/stock/analytics/src/optimization/portfolio_optimizer.py
Normal file
|
|
@ -0,0 +1,354 @@
|
|||
import numpy as np
|
||||
import pandas as pd
|
||||
import cvxpy as cp
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class PortfolioOptimizer:
|
||||
"""
|
||||
Portfolio optimization using various methods
|
||||
"""
|
||||
|
||||
def __init__(self, risk_free_rate: float = 0.02):
|
||||
self.risk_free_rate = risk_free_rate
|
||||
|
||||
def optimize(
|
||||
self,
|
||||
returns: np.ndarray,
|
||||
method: str = 'mean_variance',
|
||||
constraints: Optional[Dict] = None
|
||||
) -> Dict:
|
||||
"""
|
||||
Optimize portfolio weights using specified method
|
||||
"""
|
||||
if method == 'mean_variance':
|
||||
return self._mean_variance_optimization(returns, constraints)
|
||||
elif method == 'min_variance':
|
||||
return self._minimum_variance_optimization(returns, constraints)
|
||||
elif method == 'max_sharpe':
|
||||
return self._maximum_sharpe_optimization(returns, constraints)
|
||||
elif method == 'risk_parity':
|
||||
return self._risk_parity_optimization(returns)
|
||||
elif method == 'black_litterman':
|
||||
return self._black_litterman_optimization(returns, constraints)
|
||||
else:
|
||||
raise ValueError(f"Unknown optimization method: {method}")
|
||||
|
||||
def _mean_variance_optimization(
|
||||
self,
|
||||
returns: np.ndarray,
|
||||
constraints: Optional[Dict] = None
|
||||
) -> Dict:
|
||||
"""
|
||||
Classical Markowitz mean-variance optimization
|
||||
"""
|
||||
n_assets = returns.shape[1]
|
||||
|
||||
# Calculate expected returns and covariance
|
||||
expected_returns = np.mean(returns, axis=0)
|
||||
cov_matrix = np.cov(returns.T)
|
||||
|
||||
# Add small value to diagonal for numerical stability
|
||||
cov_matrix += np.eye(n_assets) * 1e-6
|
||||
|
||||
# Define optimization variables
|
||||
weights = cp.Variable(n_assets)
|
||||
|
||||
# Define objective (maximize return - lambda * risk)
|
||||
risk_aversion = 2.0 # Can be parameterized
|
||||
portfolio_return = expected_returns @ weights
|
||||
portfolio_risk = cp.quad_form(weights, cov_matrix)
|
||||
|
||||
objective = cp.Maximize(portfolio_return - risk_aversion * portfolio_risk)
|
||||
|
||||
# Define constraints
|
||||
constraints_list = [
|
||||
cp.sum(weights) == 1, # Weights sum to 1
|
||||
weights >= 0, # No short selling (can be relaxed)
|
||||
]
|
||||
|
||||
# Add custom constraints
|
||||
if constraints:
|
||||
if 'min_weight' in constraints:
|
||||
constraints_list.append(weights >= constraints['min_weight'])
|
||||
if 'max_weight' in constraints:
|
||||
constraints_list.append(weights <= constraints['max_weight'])
|
||||
if 'target_return' in constraints:
|
||||
constraints_list.append(portfolio_return >= constraints['target_return'])
|
||||
if 'max_risk' in constraints:
|
||||
max_variance = constraints['max_risk'] ** 2
|
||||
constraints_list.append(portfolio_risk <= max_variance)
|
||||
|
||||
# Solve optimization
|
||||
problem = cp.Problem(objective, constraints_list)
|
||||
problem.solve()
|
||||
|
||||
if problem.status != 'optimal':
|
||||
logger.warning(f"Optimization status: {problem.status}")
|
||||
# Return equal weights as fallback
|
||||
weights_array = np.ones(n_assets) / n_assets
|
||||
else:
|
||||
weights_array = weights.value
|
||||
|
||||
# Calculate portfolio metrics
|
||||
portfolio_return = expected_returns @ weights_array
|
||||
portfolio_risk = np.sqrt(weights_array @ cov_matrix @ weights_array)
|
||||
sharpe_ratio = (portfolio_return - self.risk_free_rate) / portfolio_risk
|
||||
|
||||
return {
|
||||
'weights': weights_array,
|
||||
'expected_return': portfolio_return * 252, # Annualized
|
||||
'expected_risk': portfolio_risk * np.sqrt(252), # Annualized
|
||||
'sharpe_ratio': sharpe_ratio * np.sqrt(252)
|
||||
}
|
||||
|
||||
def _minimum_variance_optimization(
|
||||
self,
|
||||
returns: np.ndarray,
|
||||
constraints: Optional[Dict] = None
|
||||
) -> Dict:
|
||||
"""
|
||||
Minimize portfolio variance
|
||||
"""
|
||||
n_assets = returns.shape[1]
|
||||
cov_matrix = np.cov(returns.T)
|
||||
cov_matrix += np.eye(n_assets) * 1e-6
|
||||
|
||||
# Define optimization
|
||||
weights = cp.Variable(n_assets)
|
||||
portfolio_risk = cp.quad_form(weights, cov_matrix)
|
||||
|
||||
objective = cp.Minimize(portfolio_risk)
|
||||
|
||||
constraints_list = [
|
||||
cp.sum(weights) == 1,
|
||||
weights >= 0,
|
||||
]
|
||||
|
||||
# Solve
|
||||
problem = cp.Problem(objective, constraints_list)
|
||||
problem.solve()
|
||||
|
||||
weights_array = weights.value if problem.status == 'optimal' else np.ones(n_assets) / n_assets
|
||||
|
||||
# Calculate metrics
|
||||
expected_returns = np.mean(returns, axis=0)
|
||||
portfolio_return = expected_returns @ weights_array
|
||||
portfolio_risk = np.sqrt(weights_array @ cov_matrix @ weights_array)
|
||||
sharpe_ratio = (portfolio_return - self.risk_free_rate / 252) / portfolio_risk
|
||||
|
||||
return {
|
||||
'weights': weights_array,
|
||||
'expected_return': portfolio_return * 252,
|
||||
'expected_risk': portfolio_risk * np.sqrt(252),
|
||||
'sharpe_ratio': sharpe_ratio * np.sqrt(252)
|
||||
}
|
||||
|
||||
def _maximum_sharpe_optimization(
|
||||
self,
|
||||
returns: np.ndarray,
|
||||
constraints: Optional[Dict] = None
|
||||
) -> Dict:
|
||||
"""
|
||||
Maximize Sharpe ratio
|
||||
"""
|
||||
# This is a bit tricky as Sharpe ratio is not convex
|
||||
# We use a trick: for each target return, find min variance
|
||||
# Then select the portfolio with highest Sharpe
|
||||
|
||||
n_assets = returns.shape[1]
|
||||
expected_returns = np.mean(returns, axis=0)
|
||||
cov_matrix = np.cov(returns.T)
|
||||
|
||||
# Generate efficient frontier
|
||||
target_returns = np.linspace(
|
||||
np.min(expected_returns),
|
||||
np.max(expected_returns),
|
||||
50
|
||||
)
|
||||
|
||||
best_sharpe = -np.inf
|
||||
best_weights = None
|
||||
|
||||
for target_ret in target_returns:
|
||||
weights = cp.Variable(n_assets)
|
||||
portfolio_risk = cp.quad_form(weights, cov_matrix)
|
||||
|
||||
objective = cp.Minimize(portfolio_risk)
|
||||
constraints_list = [
|
||||
cp.sum(weights) == 1,
|
||||
weights >= 0,
|
||||
expected_returns @ weights >= target_ret
|
||||
]
|
||||
|
||||
problem = cp.Problem(objective, constraints_list)
|
||||
problem.solve()
|
||||
|
||||
if problem.status == 'optimal':
|
||||
w = weights.value
|
||||
ret = expected_returns @ w
|
||||
risk = np.sqrt(w @ cov_matrix @ w)
|
||||
sharpe = (ret - self.risk_free_rate / 252) / risk
|
||||
|
||||
if sharpe > best_sharpe:
|
||||
best_sharpe = sharpe
|
||||
best_weights = w
|
||||
|
||||
if best_weights is None:
|
||||
best_weights = np.ones(n_assets) / n_assets
|
||||
|
||||
# Calculate final metrics
|
||||
portfolio_return = expected_returns @ best_weights
|
||||
portfolio_risk = np.sqrt(best_weights @ cov_matrix @ best_weights)
|
||||
|
||||
return {
|
||||
'weights': best_weights,
|
||||
'expected_return': portfolio_return * 252,
|
||||
'expected_risk': portfolio_risk * np.sqrt(252),
|
||||
'sharpe_ratio': best_sharpe * np.sqrt(252)
|
||||
}
|
||||
|
||||
def _risk_parity_optimization(self, returns: np.ndarray) -> Dict:
|
||||
"""
|
||||
Risk parity optimization - equal risk contribution
|
||||
"""
|
||||
n_assets = returns.shape[1]
|
||||
cov_matrix = np.cov(returns.T)
|
||||
|
||||
# Initial guess - equal weights
|
||||
weights = np.ones(n_assets) / n_assets
|
||||
|
||||
# Iterative algorithm
|
||||
for _ in range(100):
|
||||
# Calculate marginal risk contributions
|
||||
portfolio_vol = np.sqrt(weights @ cov_matrix @ weights)
|
||||
marginal_contrib = cov_matrix @ weights / portfolio_vol
|
||||
contrib = weights * marginal_contrib
|
||||
|
||||
# Target equal contribution
|
||||
target_contrib = portfolio_vol / n_assets
|
||||
|
||||
# Update weights
|
||||
weights = weights * (target_contrib / contrib)
|
||||
weights = weights / np.sum(weights)
|
||||
|
||||
# Calculate metrics
|
||||
expected_returns = np.mean(returns, axis=0)
|
||||
portfolio_return = expected_returns @ weights
|
||||
portfolio_risk = np.sqrt(weights @ cov_matrix @ weights)
|
||||
sharpe_ratio = (portfolio_return - self.risk_free_rate / 252) / portfolio_risk
|
||||
|
||||
return {
|
||||
'weights': weights,
|
||||
'expected_return': portfolio_return * 252,
|
||||
'expected_risk': portfolio_risk * np.sqrt(252),
|
||||
'sharpe_ratio': sharpe_ratio * np.sqrt(252)
|
||||
}
|
||||
|
||||
def _black_litterman_optimization(
|
||||
self,
|
||||
returns: np.ndarray,
|
||||
constraints: Optional[Dict] = None,
|
||||
views: Optional[Dict] = None
|
||||
) -> Dict:
|
||||
"""
|
||||
Black-Litterman optimization
|
||||
"""
|
||||
# Simplified implementation
|
||||
# In practice, would incorporate market views
|
||||
|
||||
n_assets = returns.shape[1]
|
||||
|
||||
# Market equilibrium weights (market cap weighted)
|
||||
# For demo, use equal weights
|
||||
market_weights = np.ones(n_assets) / n_assets
|
||||
|
||||
# Calculate implied returns
|
||||
cov_matrix = np.cov(returns.T)
|
||||
risk_aversion = 2.5
|
||||
implied_returns = risk_aversion * cov_matrix @ market_weights
|
||||
|
||||
# Without views, this reduces to market weights
|
||||
# With views, would blend implied returns with views
|
||||
|
||||
if views:
|
||||
# Implement view blending
|
||||
pass
|
||||
|
||||
# For now, return mean-variance with implied returns
|
||||
expected_returns = implied_returns
|
||||
|
||||
# Run mean-variance with these returns
|
||||
weights = cp.Variable(n_assets)
|
||||
portfolio_return = expected_returns @ weights
|
||||
portfolio_risk = cp.quad_form(weights, cov_matrix)
|
||||
|
||||
objective = cp.Maximize(portfolio_return - risk_aversion * portfolio_risk)
|
||||
constraints_list = [
|
||||
cp.sum(weights) == 1,
|
||||
weights >= 0,
|
||||
]
|
||||
|
||||
problem = cp.Problem(objective, constraints_list)
|
||||
problem.solve()
|
||||
|
||||
weights_array = weights.value if problem.status == 'optimal' else market_weights
|
||||
|
||||
# Calculate metrics
|
||||
portfolio_return = expected_returns @ weights_array
|
||||
portfolio_risk = np.sqrt(weights_array @ cov_matrix @ weights_array)
|
||||
sharpe_ratio = (portfolio_return - self.risk_free_rate / 252) / portfolio_risk
|
||||
|
||||
return {
|
||||
'weights': weights_array,
|
||||
'expected_return': portfolio_return * 252,
|
||||
'expected_risk': portfolio_risk * np.sqrt(252),
|
||||
'sharpe_ratio': sharpe_ratio * np.sqrt(252)
|
||||
}
|
||||
|
||||
def calculate_efficient_frontier(
|
||||
self,
|
||||
returns: np.ndarray,
|
||||
num_portfolios: int = 100
|
||||
) -> List[Dict]:
|
||||
"""
|
||||
Calculate the efficient frontier
|
||||
"""
|
||||
n_assets = returns.shape[1]
|
||||
expected_returns = np.mean(returns, axis=0)
|
||||
cov_matrix = np.cov(returns.T)
|
||||
|
||||
# Range of target returns
|
||||
min_ret = np.min(expected_returns)
|
||||
max_ret = np.max(expected_returns)
|
||||
target_returns = np.linspace(min_ret, max_ret, num_portfolios)
|
||||
|
||||
frontier = []
|
||||
|
||||
for target_ret in target_returns:
|
||||
weights = cp.Variable(n_assets)
|
||||
portfolio_risk = cp.quad_form(weights, cov_matrix)
|
||||
|
||||
objective = cp.Minimize(portfolio_risk)
|
||||
constraints_list = [
|
||||
cp.sum(weights) == 1,
|
||||
weights >= 0,
|
||||
expected_returns @ weights >= target_ret
|
||||
]
|
||||
|
||||
problem = cp.Problem(objective, constraints_list)
|
||||
problem.solve()
|
||||
|
||||
if problem.status == 'optimal':
|
||||
w = weights.value
|
||||
risk = np.sqrt(w @ cov_matrix @ w)
|
||||
|
||||
frontier.append({
|
||||
'return': target_ret * 252,
|
||||
'risk': risk * np.sqrt(252),
|
||||
'weights': w.tolist()
|
||||
})
|
||||
|
||||
return frontier
|
||||
Loading…
Add table
Add a link
Reference in a new issue