test
This commit is contained in:
parent
4705550359
commit
deeb934526
5 changed files with 444 additions and 32 deletions
|
|
@ -12,6 +12,9 @@ interface FetchIntradayInput {
|
|||
interface CrawlIntradayInput {
|
||||
eodSearchCode: string;
|
||||
interval: '1m' | '5m' | '1h';
|
||||
fromDate?: Date;
|
||||
toDate?: Date;
|
||||
isInitial?: boolean; // To distinguish initial vs continuation jobs
|
||||
}
|
||||
|
||||
interface ScheduleIntradayConfig {
|
||||
|
|
@ -28,7 +31,7 @@ const MAX_DAYS_PER_INTERVAL = {
|
|||
};
|
||||
|
||||
// Default exchanges to process for intraday data
|
||||
const DEFAULT_INTRADAY_EXCHANGES = ['US', 'TO', 'V', 'CN', 'NEO'];
|
||||
// const DEFAULT_INTRADAY_EXCHANGES = [],//['US', 'TO', 'V', 'CN', 'NEO', 'CC'];
|
||||
|
||||
export async function scheduleIntradayCrawl(
|
||||
this: EodHandler,
|
||||
|
|
@ -39,7 +42,7 @@ export async function scheduleIntradayCrawl(
|
|||
try {
|
||||
logger.info('Scheduling intraday crawl jobs', {
|
||||
config: {
|
||||
exchanges: config?.exchanges || DEFAULT_INTRADAY_EXCHANGES,
|
||||
exchanges: config?.exchanges, //|| DEFAULT_INTRADAY_EXCHANGES,
|
||||
symbolTypes: config?.symbolTypes || 'all',
|
||||
limit: config?.limit || 'unlimited'
|
||||
}
|
||||
|
|
@ -71,13 +74,12 @@ export async function scheduleIntradayCrawl(
|
|||
logger.debug(`Getting stale symbols for ${operationName}...`);
|
||||
|
||||
// Get symbols with all filters applied at the database level
|
||||
const targetExchanges = config?.exchanges || DEFAULT_INTRADAY_EXCHANGES;
|
||||
const desiredLimit = config?.limit || 5000;
|
||||
const targetExchanges = config?.exchanges //|| DEFAULT_INTRADAY_EXCHANGES;
|
||||
const desiredLimit = config?.limit || 1000000;
|
||||
|
||||
const staleSymbols = await this.operationRegistry.getStaleSymbols('eod', operationName, {
|
||||
limit: desiredLimit,
|
||||
exchanges: targetExchanges,
|
||||
delisted: false
|
||||
});
|
||||
|
||||
logger.debug(`getStaleSymbols returned ${staleSymbols.length} symbols for ${operationName}`);
|
||||
|
|
@ -98,21 +100,42 @@ export async function scheduleIntradayCrawl(
|
|||
// 2. Are not finished (!operationStatus.finished)
|
||||
// 3. Are finished but need new data (newest date > 1 day old)
|
||||
const needsNewData = operationStatus?.finished && operationStatus?.newestDateReached &&
|
||||
new Date(operationStatus.newestDateReached) < new Date(Date.now() - 24 * 60 * 60 * 1000);
|
||||
new Date(operationStatus.newestDateReached) < new Date(Date.now() - 30* 24 * 60 * 60 * 1000);
|
||||
|
||||
if (!operationStatus || !operationStatus.finished || needsNewData) {
|
||||
// Calculate initial date range for the job
|
||||
const maxDays = MAX_DAYS_PER_INTERVAL[interval];
|
||||
let toDate = new Date();
|
||||
let fromDate = new Date();
|
||||
let isInitial = true;
|
||||
|
||||
if (operationStatus?.lastProcessedDate) {
|
||||
// Continue from where we left off
|
||||
toDate = new Date(operationStatus.lastProcessedDate);
|
||||
isInitial = false;
|
||||
}
|
||||
|
||||
// Calculate from date (going backwards)
|
||||
fromDate = new Date(toDate);
|
||||
fromDate.setDate(fromDate.getDate() - maxDays);
|
||||
|
||||
allSymbolsForCrawl.push({
|
||||
symbol: symbol,
|
||||
interval: interval,
|
||||
operationName: operationName,
|
||||
lastRun: staleSymbol.lastRun,
|
||||
lastSuccess: staleSymbol.lastSuccess
|
||||
lastSuccess: staleSymbol.lastSuccess,
|
||||
fromDate: fromDate,
|
||||
toDate: toDate,
|
||||
isInitial: isInitial
|
||||
});
|
||||
|
||||
logger.debug(`Added ${symbol.Code}.${symbol.Exchange} for ${interval} crawl`, {
|
||||
hasOperation: !!operationStatus,
|
||||
finished: operationStatus?.finished,
|
||||
needsNewData
|
||||
needsNewData,
|
||||
dateRange: `${fromDate.toISOString().split('T')[0]} to ${toDate.toISOString().split('T')[0]}`,
|
||||
isInitial
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
@ -136,6 +159,8 @@ export async function scheduleIntradayCrawl(
|
|||
name: s.symbol.Name,
|
||||
eodSearchCode: s.symbol.eodSearchCode,
|
||||
interval: s.interval,
|
||||
dateRange: `${s.fromDate.toISOString().split('T')[0]} to ${s.toDate.toISOString().split('T')[0]}`,
|
||||
isInitial: s.isInitial,
|
||||
lastRun: s.lastRun ? new Date(s.lastRun).toISOString() : 'never',
|
||||
lastSuccess: s.lastSuccess ? new Date(s.lastSuccess).toISOString() : 'never'
|
||||
}))
|
||||
|
|
@ -145,16 +170,22 @@ export async function scheduleIntradayCrawl(
|
|||
|
||||
// Schedule crawl jobs for each symbol/interval combination
|
||||
for (const item of allSymbolsForCrawl) {
|
||||
const { symbol, interval } = item;
|
||||
const { symbol, interval, fromDate, toDate, isInitial } = item;
|
||||
|
||||
// Create jobId based on whether it's initial or continuation
|
||||
const dateStr = isInitial ? 'initial' : `${fromDate.toISOString().split('T')[0]}-${toDate.toISOString().split('T')[0]}`;
|
||||
const jobId = `crawl-intraday-${symbol.eodSearchCode}-${interval}-${dateStr}`;
|
||||
|
||||
const jobId = `crawl-intraday-${symbol.eodSearchCode}-${interval}`;
|
||||
try {
|
||||
await this.scheduleOperation('crawl-intraday', {
|
||||
eodSearchCode: symbol.eodSearchCode,
|
||||
interval
|
||||
interval,
|
||||
fromDate,
|
||||
toDate,
|
||||
isInitial
|
||||
}, {
|
||||
jobId,
|
||||
priority: 5, // Initial crawl jobs get priority 5 (lower priority)
|
||||
priority: isInitial ? 5 : 3, // Initial crawl jobs get lower priority
|
||||
attempts: 3,
|
||||
backoff: {
|
||||
type: 'exponential',
|
||||
|
|
@ -163,6 +194,15 @@ export async function scheduleIntradayCrawl(
|
|||
delay: jobsScheduled * 500 // Stagger jobs by 500ms
|
||||
});
|
||||
jobsScheduled++;
|
||||
|
||||
logger.debug(`Scheduled crawl job`, {
|
||||
jobId,
|
||||
symbol: symbol.Code,
|
||||
exchange: symbol.Exchange,
|
||||
interval,
|
||||
dateRange: `${fromDate.toISOString().split('T')[0]} to ${toDate.toISOString().split('T')[0]}`,
|
||||
isInitial
|
||||
});
|
||||
} catch (error: any) {
|
||||
if (error?.message?.includes('Job already exists')) {
|
||||
logger.debug(`Job already exists: ${jobId}`);
|
||||
|
|
@ -189,7 +229,7 @@ export async function crawlIntraday(
|
|||
input: CrawlIntradayInput
|
||||
): Promise<{ success: boolean; recordsProcessed: number; finished: boolean }> {
|
||||
const logger = this.logger;
|
||||
const { eodSearchCode, interval } = input;
|
||||
const { eodSearchCode, interval, fromDate: providedFromDate, toDate: providedToDate, isInitial } = input;
|
||||
|
||||
try {
|
||||
// Lookup symbol using eodSearchCode
|
||||
|
|
@ -234,19 +274,39 @@ export async function crawlIntraday(
|
|||
|
||||
// Determine date range for this batch
|
||||
const maxDays = MAX_DAYS_PER_INTERVAL[interval];
|
||||
let toDate = new Date();
|
||||
let fromDate = new Date();
|
||||
let toDate: Date;
|
||||
let fromDate: Date;
|
||||
|
||||
// Use provided dates if available (from scheduled job)
|
||||
if (providedFromDate && providedToDate) {
|
||||
fromDate = new Date(providedFromDate);
|
||||
toDate = new Date(providedToDate);
|
||||
logger.info(`Using provided date range for ${symbol}.${exchange} - ${interval}`, {
|
||||
fromDate: fromDate.toISOString(),
|
||||
toDate: toDate.toISOString(),
|
||||
isInitial
|
||||
});
|
||||
} else {
|
||||
// Fallback to original logic (for backward compatibility)
|
||||
toDate = new Date();
|
||||
fromDate = new Date();
|
||||
|
||||
if (operationStatus.lastProcessedDate) {
|
||||
// Continue from where we left off - the last processed date becomes the new toDate
|
||||
toDate = new Date(operationStatus.lastProcessedDate);
|
||||
// No need to subtract a day - lastProcessedDate is the fromDate of the last batch
|
||||
}
|
||||
|
||||
// Calculate from date (going backwards)
|
||||
fromDate = new Date(toDate);
|
||||
fromDate.setDate(fromDate.getDate() - maxDays);
|
||||
|
||||
logger.info(`Calculated date range for ${symbol}.${exchange} - ${interval}`, {
|
||||
fromDate: fromDate.toISOString(),
|
||||
toDate: toDate.toISOString(),
|
||||
basedOn: operationStatus.lastProcessedDate ? 'lastProcessedDate' : 'current date'
|
||||
});
|
||||
}
|
||||
|
||||
logger.info(`Fetching intraday batch for ${symbol}.${exchange} - ${interval} from ${fromDate.toISOString().split('T')[0]} to ${toDate.toISOString().split('T')[0]}`, {
|
||||
symbol,
|
||||
exchange,
|
||||
|
|
@ -334,11 +394,22 @@ export async function crawlIntraday(
|
|||
|
||||
// If not finished, schedule next batch
|
||||
if (!updateData.finished) {
|
||||
// Calculate next batch date range
|
||||
const nextToDate = fromDate; // Next batch's toDate is current batch's fromDate
|
||||
const nextFromDate = new Date(nextToDate);
|
||||
nextFromDate.setDate(nextFromDate.getDate() - maxDays);
|
||||
|
||||
const dateStr = `${nextFromDate.toISOString().split('T')[0]}-${nextToDate.toISOString().split('T')[0]}`;
|
||||
const jobId = `crawl-intraday-${eodSearchCode}-${interval}-${dateStr}`;
|
||||
|
||||
await this.scheduleOperation('crawl-intraday', {
|
||||
eodSearchCode,
|
||||
interval
|
||||
interval,
|
||||
fromDate: nextFromDate,
|
||||
toDate: nextToDate,
|
||||
isInitial: false
|
||||
}, {
|
||||
jobId: `crawl-intraday-${eodSearchCode}-${interval}`,
|
||||
jobId,
|
||||
priority: 3, // Continuation jobs get higher priority (3) than initial jobs (5)
|
||||
attempts: 3,
|
||||
backoff: {
|
||||
|
|
@ -352,9 +423,16 @@ export async function crawlIntraday(
|
|||
symbol,
|
||||
exchange,
|
||||
interval,
|
||||
currentBatchFrom: fromDate.toISOString(),
|
||||
currentBatchTo: toDate.toISOString(),
|
||||
recordsSaved: result.recordsSaved,
|
||||
jobId,
|
||||
currentBatch: {
|
||||
from: fromDate.toISOString().split('T')[0],
|
||||
to: toDate.toISOString().split('T')[0],
|
||||
recordsSaved: result.recordsSaved
|
||||
},
|
||||
nextBatch: {
|
||||
from: nextFromDate.toISOString().split('T')[0],
|
||||
to: nextToDate.toISOString().split('T')[0]
|
||||
},
|
||||
totalDaysProcessed: updateData.totalDaysProcessed
|
||||
});
|
||||
}
|
||||
|
|
|
|||
|
|
@ -38,21 +38,21 @@ export const EOD_OPERATIONS: OperationConfig[] = [
|
|||
type: 'intraday_crawl',
|
||||
description: 'Crawl 1-minute intraday data',
|
||||
requiresFinishedFlag: true,
|
||||
defaultStaleHours: 24 // Daily check for new data
|
||||
defaultStaleHours: 50000 // Daily check for new data
|
||||
},
|
||||
{
|
||||
name: 'intraday_5m',
|
||||
type: 'intraday_crawl',
|
||||
description: 'Crawl 5-minute intraday data',
|
||||
requiresFinishedFlag: true,
|
||||
defaultStaleHours: 24 // Daily check for new data
|
||||
defaultStaleHours: 50000 // Daily check for new data
|
||||
},
|
||||
{
|
||||
name: 'intraday_1h',
|
||||
type: 'intraday_crawl',
|
||||
description: 'Crawl 1-hour intraday data',
|
||||
requiresFinishedFlag: true,
|
||||
defaultStaleHours: 24 // Daily check for new data
|
||||
defaultStaleHours: 50000 // Daily check for new data
|
||||
},
|
||||
|
||||
// Fundamental data
|
||||
|
|
|
|||
|
|
@ -113,7 +113,7 @@ services: # Dragonfly - Redis replacement for caching and events
|
|||
image: dpage/pgadmin4:latest
|
||||
container_name: trading-bot-pgadmin
|
||||
environment:
|
||||
PGADMIN_DEFAULT_EMAIL: admin@tradingbot.local
|
||||
PGADMIN_DEFAULT_EMAIL: admin@tradingbot.com
|
||||
PGADMIN_DEFAULT_PASSWORD: admin123
|
||||
PGADMIN_CONFIG_SERVER_MODE: 'False'
|
||||
PGADMIN_DISABLE_POSTFIX: 'true'
|
||||
|
|
|
|||
106
docs/backtest-validation.md
Normal file
106
docs/backtest-validation.md
Normal file
|
|
@ -0,0 +1,106 @@
|
|||
I'll search for specific information about Timothy Masters' recommendations on permutation tests for trading and newer alternatives in 2025.Based on my research, here's a comprehensive overview of **Timothy Masters' recommendations and newer alternatives for permutation and randomization tests in trading (2025)**:
|
||||
|
||||
## Timothy Masters' Permutation Test Recommendations
|
||||
|
||||
Timothy Masters, in his book "Permutation and Randomization Tests for Trading System Development," advocates for several key permutation tests:
|
||||
|
||||
### 1. **Core Permutation Methods**
|
||||
The book covers: Testing for overfitting at the earliest possible stage, Evaluating the luckiness-versus-skill of a fully developed system before deploying it, Testing the effectiveness and reliability of a trading system factory, Removing selection bias when screening a large number of indicators
|
||||
|
||||
### 2. **Monte Carlo Permutation Test**
|
||||
The Monte Carlo permutation test of Timothy Masters involves simulating multiple iterations of the strategy using randomly permuted versions of historical data. The process:
|
||||
- Transforms absolute prices into log-returns for better statistical properties
|
||||
- Creates 500 or 1000 permuted versions resulting in worse performance for >>90% if the strategy picks up a real "signal"
|
||||
|
||||
### 3. **Key Applications**
|
||||
One usage of the Monte Carlo Permutation test allows us to discover the degree of data overfitting, without using the out-of-sample data
|
||||
- Tests whether trading performance beats 99% of what could be expected by luck
|
||||
- Preserves the single-usage principle of out-of-sample data
|
||||
- If our trading system beats 99% of what can be expected by good luck, we can conclude that our indicator, trading system or trading system development method is actually worthwhile
|
||||
|
||||
### 4. **Selection Bias Testing**
|
||||
On March 30, 2020, I released a third edition of the book which includes an important new section that introduces an alternative permutation test for selection bias that is substantially superior to the traditional test
|
||||
|
||||
## Newer Alternatives and Advances in 2025
|
||||
|
||||
### 1. **System Parameter Permutation (SPP) and Randomization (SPR)**
|
||||
System Parameter Randomization (SPR) is introduced as a close cousin of SPP to address computational limitations and provide a better alternative
|
||||
|
||||
**Advantages over Masters' approach:**
|
||||
- The idea behind System Parameter Permutation is that we must test ALL the possible parameter combinations and only from the data of all optimization runs we can make some assumptions about the realistic strategy performance
|
||||
- Computes median values for all performance statistics (Net profit, Drawdown, Sharpe ratio, etc.)
|
||||
- More computationally feasible for complex trading systems
|
||||
|
||||
### 2. **Advanced Bootstrap Methods**
|
||||
|
||||
**Block Bootstrap Variants:**
|
||||
Advanced block bootstrap methods are specifically tailored for dependent time series data, including stationary bootstrap, circular bootstrap, and tapered block bootstrap
|
||||
- **Stationary Bootstrap**: Uses random block lengths from geometric distribution
|
||||
- **Circular Bootstrap**: Attempts to solve edge effects by wrapping the series
|
||||
- **Tapered Block Bootstrap**: Offers smoother transitions at block boundaries
|
||||
|
||||
### 3. **White's Reality Check and Hansen's SPA Test**
|
||||
White's Reality Check bootstrap methodology evaluates simple technical trading rules while quantifying the data-snooping bias
|
||||
|
||||
**Superior Predictive Ability (SPA) Test:**
|
||||
- The stepwise SPA test is more powerful than the stepwise Reality Check test of Romano and Wolf
|
||||
- Addresses data snooping bias more effectively
|
||||
- Can identify predictive models without potential bias
|
||||
|
||||
### 4. **Conformal Prediction for Trading**
|
||||
|
||||
This is one of the most promising newer alternatives:
|
||||
|
||||
**Key Features:**
|
||||
- Distribution-free nature - unlike Bayesian approaches and other statistical methods that require specific assumptions about the data distribution, conformal prediction makes no such assumptions
|
||||
- Provides valid prediction intervals with guaranteed coverage
|
||||
- The EnbPI (Ensemble batch Prediction Intervals) method removes the data exchangeability requirement and can thus be applied in time series forecasting
|
||||
|
||||
**Applications in Trading:**
|
||||
- Uncertainty quantification for price predictions
|
||||
- Risk-adjusted position sizing based on prediction intervals
|
||||
- Model confidence assessment without distributional assumptions
|
||||
|
||||
### 5. **Synthetic Data Generation Methods**
|
||||
|
||||
Deep generative models produce synthetic time-series data, enhancing the amount of data available for training predictive models
|
||||
|
||||
**Benefits for Trading Strategy Validation:**
|
||||
- Creates realistic market scenarios while preserving statistical properties
|
||||
- Synthetic data obtained this way replicates the distribution properties of real historical data, leads to better performance, and enables thorough validation of predictive models
|
||||
- Addresses data scarcity issues in certain market conditions
|
||||
- In the finance sector, synthetic data is used for fraud detection, risk assessment, and algorithmic trading
|
||||
|
||||
### 6. **Hybrid Approaches**
|
||||
|
||||
**Combining Multiple Methods:**
|
||||
The variance test takes the historical trades and creates a probable distribution to resample, selecting random trades from this distribution until it reaches the backtest's total trade count
|
||||
|
||||
**Modern Robustness Tests:**
|
||||
- Noise testing, Vs Shifted, Variance testing or Monte Carlo Permutation test multiple price paths
|
||||
- Address limitations of traditional walk-forward analysis
|
||||
- Four-step approach: in-sample excellence, Monte Carlo permutation testing, walk-forward testing, and walk-forward Monte Carlo testing
|
||||
|
||||
## Key Advantages of Newer Methods
|
||||
|
||||
1. **Computational Efficiency**: Modern methods like SPR and conformal prediction are more computationally efficient than exhaustive permutation testing
|
||||
|
||||
2. **Better Coverage**: CopulaCPTS produces significantly sharper and more calibrated uncertainty estimates than state-of-the-art baselines
|
||||
|
||||
3. **No Distributional Assumptions**: Conformal prediction and synthetic data methods work without assuming specific data distributions
|
||||
|
||||
4. **Adaptive Capabilities**: Methods like EnbPI update intervals as new data arrives, maintaining adaptiveness
|
||||
|
||||
5. **Multi-Path Testing**: Unlike single-path methods, newer approaches test multiple possible market scenarios simultaneously
|
||||
|
||||
## Recommendations for 2025
|
||||
|
||||
For modern trading system development, consider:
|
||||
|
||||
1. **Start with Masters' Monte Carlo Permutation** for basic validation
|
||||
2. **Implement SPP/SPR** for parameter robustness testing
|
||||
3. **Use Conformal Prediction** for uncertainty quantification and risk management
|
||||
4. **Apply Synthetic Data Generation** for stress testing under various market conditions
|
||||
5. **Combine multiple methods** for comprehensive validation
|
||||
|
||||
The field has evolved significantly, with newer methods offering better computational efficiency, fewer assumptions, and more robust validation while maintaining the core principles that Masters established.
|
||||
228
docs/focus.md
Normal file
228
docs/focus.md
Normal file
|
|
@ -0,0 +1,228 @@
|
|||
Based on my experience and research, here's a comprehensive breakdown of **strategy development components and where you should focus**:
|
||||
|
||||
## The Complete Strategy Development Pipeline
|
||||
|
||||
### 1. **Idea Generation & Hypothesis Formation** (15% of effort)
|
||||
**What it involves:**
|
||||
- Market microstructure understanding
|
||||
- Economic rationale for the edge
|
||||
- Literature review and academic research
|
||||
- Observing market inefficiencies
|
||||
|
||||
**Focus Level: MEDIUM**
|
||||
- Don't over-research; many good ideas are simple
|
||||
- Ensure there's a logical reason WHY the strategy should work
|
||||
- Avoid pure data mining without economic rationale
|
||||
|
||||
### 2. **Data Infrastructure** (25% of effort)
|
||||
**Critical Components:**
|
||||
- Data quality and cleaning
|
||||
- Survivorship bias handling
|
||||
- Corporate actions adjustment
|
||||
- Proper point-in-time data
|
||||
|
||||
**Focus Level: VERY HIGH** ⭐
|
||||
- **This is where most strategies fail in production**
|
||||
- Bad data = invalid backtests = losing money
|
||||
- Look-ahead bias is the silent killer
|
||||
- The time-series nature of financial datasets limits the effective amount of data available to train, validate and retrain models since special care must be taken not to include future data in any way
|
||||
|
||||
### 3. **Feature Engineering** (20% of effort)
|
||||
**Key Areas:**
|
||||
- Market microstructure features (order flow, volume profiles)
|
||||
- Cross-sectional features (relative value metrics)
|
||||
- Alternative data integration
|
||||
- Regime indicators
|
||||
|
||||
**Focus Level: HIGH**
|
||||
- Features matter more than models
|
||||
- Domain expertise pays off here
|
||||
- Keep features interpretable when possible
|
||||
|
||||
### 4. **Strategy Logic & Signal Generation** (15% of effort)
|
||||
**Components:**
|
||||
- Entry/exit rules
|
||||
- Position sizing algorithms
|
||||
- Risk limits and constraints
|
||||
- Portfolio construction
|
||||
|
||||
**Focus Level: MEDIUM**
|
||||
- Simpler is often better
|
||||
- Complexity should come from combining simple, robust signals
|
||||
- Avoid overfitting with too many rules
|
||||
|
||||
### 5. **Backtesting Framework** (10% of effort)
|
||||
**Essential Elements:**
|
||||
- Transaction cost modeling
|
||||
- Market impact estimation
|
||||
- Proper execution assumptions
|
||||
- Realistic capacity constraints
|
||||
|
||||
**Focus Level: HIGH** ⭐
|
||||
- Most backtests are too optimistic
|
||||
- Walk forward analysis only tests a single price path whereas other tests such as Noise testing, Vs Shifted, Variance testing or Monte Carlo Permutation test multiple price paths
|
||||
- Focus on realistic execution assumptions
|
||||
|
||||
### 6. **Statistical Validation** (10% of effort)
|
||||
**Including:**
|
||||
- Permutation tests (as discussed)
|
||||
- Out-of-sample testing
|
||||
- Statistical significance tests
|
||||
- Robustness checks
|
||||
|
||||
**Focus Level: MEDIUM-HIGH**
|
||||
- Important but don't over-optimize
|
||||
- Testing for overfitting at the earliest possible stage
|
||||
|
||||
### 7. **Risk Management** (5% of effort but 90% of survival)
|
||||
**Critical Aspects:**
|
||||
- Drawdown controls
|
||||
- Correlation management
|
||||
- Tail risk hedging
|
||||
- Position limits
|
||||
|
||||
**Focus Level: VERY HIGH** ⭐
|
||||
- **This determines survival**
|
||||
- Good risk management saves bad strategies
|
||||
- Bad risk management kills good strategies
|
||||
|
||||
## Where You Should REALLY Focus
|
||||
|
||||
### 🎯 **Priority 1: Data Quality & Infrastructure**
|
||||
**Why:**
|
||||
- 80% of production failures come from data issues
|
||||
- It's unsexy but absolutely critical
|
||||
- Garbage in = garbage out
|
||||
|
||||
**Specific Actions:**
|
||||
- Build robust data pipelines
|
||||
- Implement comprehensive data quality checks
|
||||
- Create point-in-time data snapshots
|
||||
- Test for survivorship bias
|
||||
|
||||
### 🎯 **Priority 2: Transaction Costs & Market Impact**
|
||||
**Why:**
|
||||
- The difference between paper and real trading
|
||||
- Can turn profitable strategies unprofitable
|
||||
- Often underestimated in backtests
|
||||
|
||||
**Key Considerations:**
|
||||
- Bid-ask spreads during your trading times
|
||||
- Market impact models for your size
|
||||
- Slippage estimates based on real execution data
|
||||
- Hidden costs (borrow costs for shorts, etc.)
|
||||
|
||||
### 🎯 **Priority 3: Regime Awareness**
|
||||
**Why:**
|
||||
- Markets change; strategies that don't adapt die
|
||||
- Market dynamics can change, but there is no solution to this risk
|
||||
|
||||
**Implementation:**
|
||||
- Build regime detection systems
|
||||
- Adjust position sizing by regime
|
||||
- Have strategy on/off switches
|
||||
- Monitor strategy degradation metrics
|
||||
|
||||
## Common Traps to Avoid
|
||||
|
||||
### 1. **Over-Optimization**
|
||||
- Too many parameters = overfitting
|
||||
- It is highly recommended for your strategy to have as little configurable parameters (degrees of freedom) as possible
|
||||
|
||||
### 2. **Selection Bias**
|
||||
- Testing 1000 strategies and picking the best
|
||||
- Not accounting for multiple testing
|
||||
- One of the permutation tests created by the author detected a hidden selection bias problem in a trading system
|
||||
|
||||
### 3. **Ignoring Capacity**
|
||||
- Strategy works with $100k but not $10M
|
||||
- Market impact kills returns at scale
|
||||
- Liquidity constraints binding
|
||||
|
||||
### 4. **Complexity Bias**
|
||||
- Complex != better
|
||||
- Simple strategies are more robust
|
||||
- Many strategies that look profitable actually perform just as well on completely random data
|
||||
|
||||
## Modern Best Practices (2025)
|
||||
|
||||
### 1. **Machine Learning Integration**
|
||||
- Use ML for feature selection, not just prediction
|
||||
- Ensemble methods for robustness
|
||||
- Deep generative models to produce synthetic time-series data, enhancing the amount of data available for training
|
||||
|
||||
### 2. **Real-Time Monitoring**
|
||||
- Live performance tracking vs. backtest
|
||||
- Automatic strategy shutdown triggers
|
||||
- A/B testing framework for improvements
|
||||
|
||||
### 3. **Alternative Data**
|
||||
- Sentiment analysis
|
||||
- Satellite data
|
||||
- Web scraping (where legal)
|
||||
- But validate the alpha decay
|
||||
|
||||
### 4. **Execution Alpha**
|
||||
- Smart order routing
|
||||
- Optimal execution algorithms
|
||||
- Dark pool access
|
||||
- This is often easier edge than signal alpha
|
||||
|
||||
## Recommended Development Process
|
||||
|
||||
### Phase 1: Research (2-4 weeks)
|
||||
1. Hypothesis formation with economic rationale
|
||||
2. Initial data exploration
|
||||
3. Simple prototype testing
|
||||
4. **Go/No-go decision**
|
||||
|
||||
### Phase 2: Development (4-8 weeks)
|
||||
1. Full data pipeline build
|
||||
2. Feature engineering
|
||||
3. Strategy implementation
|
||||
4. Initial backtesting
|
||||
|
||||
### Phase 3: Validation (2-4 weeks)
|
||||
1. Permutation tests
|
||||
2. Out-of-sample testing
|
||||
3. Sensitivity analysis
|
||||
4. **Go/No-go decision**
|
||||
|
||||
### Phase 4: Production Prep (2-4 weeks)
|
||||
1. Execution infrastructure
|
||||
2. Risk management systems
|
||||
3. Monitoring and alerting
|
||||
4. Paper trading
|
||||
|
||||
### Phase 5: Go-Live (Ongoing)
|
||||
1. Gradual position scaling
|
||||
2. Live performance monitoring
|
||||
3. Continuous improvement
|
||||
4. Regular strategy review
|
||||
|
||||
## The Reality Check
|
||||
|
||||
**What Actually Matters Most:**
|
||||
1. **Data quality** (can't emphasize enough)
|
||||
2. **Transaction costs** (the silent killer)
|
||||
3. **Risk management** (determines survival)
|
||||
4. **Execution quality** (often overlooked)
|
||||
5. **Regime adaptability** (markets change)
|
||||
|
||||
**What's Often Overemphasized:**
|
||||
1. Complex models (simple often better)
|
||||
2. Perfect optimization (robustness > perfection)
|
||||
3. High Sharpe ratios in backtest (usually unrealistic)
|
||||
4. Academic purity (markets are messy)
|
||||
|
||||
## Final Advice
|
||||
|
||||
Focus on building **robust, simple strategies** with:
|
||||
- Clean data pipelines
|
||||
- Realistic execution assumptions
|
||||
- Strong risk management
|
||||
- Adaptability to changing markets
|
||||
|
||||
Remember: Over 90% of strategies that look amazing fail in production. The difference between success and failure is usually in the unglamorous details of data quality, execution, and risk management, not in having the most sophisticated model.
|
||||
|
||||
Start simple, test thoroughly, and scale gradually. The market will teach you humility quickly enough.
|
||||
Loading…
Add table
Add a link
Reference in a new issue