You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
736 lines
368 KiB
736 lines
368 KiB
|
4 days ago
|
{
|
||
|
|
"cells": [
|
||
|
|
{
|
||
|
|
"cell_type": "markdown",
|
||
|
|
"metadata": {},
|
||
|
|
"source": [
|
||
|
|
"# CTA 1D Baseline XGBoost Model\n",
|
||
|
|
"\n",
|
||
|
|
"Train and evaluate a baseline XGBoost model for CTA 1-day return prediction.\n",
|
||
|
|
"\n",
|
||
|
|
"**Purpose**: Establish a baseline performance benchmark with standard configuration."
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": 1,
|
||
|
|
"metadata": {
|
||
|
|
"execution": {
|
||
|
|
"iopub.execute_input": "2026-02-14T08:12:19.244972Z",
|
||
|
|
"iopub.status.busy": "2026-02-14T08:12:19.244658Z",
|
||
|
|
"iopub.status.idle": "2026-02-14T08:12:20.730424Z",
|
||
|
|
"shell.execute_reply": "2026-02-14T08:12:20.729462Z"
|
||
|
|
}
|
||
|
|
},
|
||
|
|
"outputs": [],
|
||
|
|
"source": [
|
||
|
|
"import pandas as pd\n",
|
||
|
|
"import numpy as np\n",
|
||
|
|
"import matplotlib.pyplot as plt\n",
|
||
|
|
"import json\n",
|
||
|
|
"from datetime import datetime\n",
|
||
|
|
"\n",
|
||
|
|
"# Use the new API from src/\n",
|
||
|
|
"from src.loader_parquet import CTA1DLoaderParquet\n",
|
||
|
|
"from src.train import train_model, TrainConfig\n",
|
||
|
|
"from src.backtest import run_backtest, BacktestConfig\n",
|
||
|
|
"from src.labels import get_blend_weights\n",
|
||
|
|
"\n",
|
||
|
|
"import sys\n",
|
||
|
|
"sys.path.insert(0, '../')\n",
|
||
|
|
"from common.plotting import setup_plot_style, plot_ic_series, plot_cumulative_returns\n",
|
||
|
|
"from common.paths import create_experiment_dir\n",
|
||
|
|
"\n",
|
||
|
|
"setup_plot_style()"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "markdown",
|
||
|
|
"metadata": {},
|
||
|
|
"source": [
|
||
|
|
"## 1. Configuration\n",
|
||
|
|
"\n",
|
||
|
|
"Edit this cell to modify experiment parameters."
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": 2,
|
||
|
|
"metadata": {
|
||
|
|
"execution": {
|
||
|
|
"iopub.execute_input": "2026-02-14T08:12:20.733937Z",
|
||
|
|
"iopub.status.busy": "2026-02-14T08:12:20.733741Z",
|
||
|
|
"iopub.status.idle": "2026-02-14T08:12:20.739463Z",
|
||
|
|
"shell.execute_reply": "2026-02-14T08:12:20.738798Z"
|
||
|
|
}
|
||
|
|
},
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"name": "stdout",
|
||
|
|
"output_type": "stream",
|
||
|
|
"text": [
|
||
|
|
"Configuration:\n",
|
||
|
|
" Experiment: baseline_xgb\n",
|
||
|
|
" Train: 2020-01-01 to 2021-12-31\n",
|
||
|
|
" Valid: 2022-01-01 to 2022-06-30\n",
|
||
|
|
" Test: 2022-07-01 to 2023-12-31\n",
|
||
|
|
" Fit: 2020-01-01 to 2021-12-31 (normalization)\n",
|
||
|
|
" Blend: default: [0.2, 0.1, 0.3, 0.4]\n"
|
||
|
|
]
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"source": [
|
||
|
|
"CONFIG = {\n",
|
||
|
|
" # Experiment\n",
|
||
|
|
" 'experiment_name': 'baseline_xgb', # Will be appended with timestamp\n",
|
||
|
|
" \n",
|
||
|
|
" # Date ranges (YYYYMMDD format for Parquet loader)\n",
|
||
|
|
" 'dt_range': ['2020-01-01', '2023-12-31'],\n",
|
||
|
|
" 'train_range': ['2020-01-01', '2021-12-31'],\n",
|
||
|
|
" 'valid_range': ['2022-01-01', '2022-06-30'],\n",
|
||
|
|
" 'test_range': ['2022-07-01', '2023-12-31'],\n",
|
||
|
|
" 'fit_range': ['2020-01-01', '2021-12-31'], # MUST match train_range - prevents data leakage\n",
|
||
|
|
" \n",
|
||
|
|
" # Data\n",
|
||
|
|
" 'feature_sets': ['alpha158', 'hffactor'],\n",
|
||
|
|
" 'return_type': 'o2c_twap1min',\n",
|
||
|
|
" 'normalization': 'dual',\n",
|
||
|
|
" 'blend_weights': None, # Use default [0.2, 0.1, 0.3, 0.4] or specify name/list\n",
|
||
|
|
" 'weight_factors': {'positive': 1.0, 'negative': 2.0},\n",
|
||
|
|
" \n",
|
||
|
|
" # Model (XGBoost parameters with regularization)\n",
|
||
|
|
" 'model_params': {\n",
|
||
|
|
" 'objective': 'reg:squarederror',\n",
|
||
|
|
" 'eval_metric': 'rmse',\n",
|
||
|
|
" 'eta': 0.05,\n",
|
||
|
|
" 'max_depth': 4, # Reduced to prevent overfitting\n",
|
||
|
|
" 'subsample': 0.8,\n",
|
||
|
|
" 'colsample_bytree': 0.8,\n",
|
||
|
|
" 'lambda': 1.0, # L2 regularization\n",
|
||
|
|
" 'alpha': 0.1, # L1 regularization\n",
|
||
|
|
" 'seed': 42\n",
|
||
|
|
" },\n",
|
||
|
|
" \n",
|
||
|
|
" # Backtest\n",
|
||
|
|
" 'num_trades': 4,\n",
|
||
|
|
" 'signal_dist': 'normal',\n",
|
||
|
|
" 'pos_weight': True,\n",
|
||
|
|
" \n",
|
||
|
|
" # Output\n",
|
||
|
|
" 'save_results': True,\n",
|
||
|
|
"}\n",
|
||
|
|
"\n",
|
||
|
|
"print(\"Configuration:\")\n",
|
||
|
|
"print(f\" Experiment: {CONFIG['experiment_name']}\")\n",
|
||
|
|
"print(f\" Train: {CONFIG['train_range'][0]} to {CONFIG['train_range'][1]}\")\n",
|
||
|
|
"print(f\" Valid: {CONFIG['valid_range'][0]} to {CONFIG['valid_range'][1]}\")\n",
|
||
|
|
"print(f\" Test: {CONFIG['test_range'][0]} to {CONFIG['test_range'][1]}\")\n",
|
||
|
|
"print(f\" Fit: {CONFIG['fit_range'][0]} to {CONFIG['fit_range'][1]} (normalization)\")\n",
|
||
|
|
"blend_desc = \"default: [0.2, 0.1, 0.3, 0.4]\" if CONFIG['blend_weights'] is None else str(CONFIG['blend_weights'])\n",
|
||
|
|
"print(f\" Blend: {blend_desc}\")"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "markdown",
|
||
|
|
"metadata": {},
|
||
|
|
"source": [
|
||
|
|
"## 2. Load Dataset and Train Model"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": 3,
|
||
|
|
"metadata": {
|
||
|
|
"execution": {
|
||
|
|
"iopub.execute_input": "2026-02-14T08:12:20.797194Z",
|
||
|
|
"iopub.status.busy": "2026-02-14T08:12:20.796882Z",
|
||
|
|
"iopub.status.idle": "2026-02-14T08:13:57.887738Z",
|
||
|
|
"shell.execute_reply": "2026-02-14T08:13:57.886964Z"
|
||
|
|
}
|
||
|
|
},
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"name": "stdout",
|
||
|
|
"output_type": "stream",
|
||
|
|
"text": [
|
||
|
|
"Loading dataset and training model...\n"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"name": "stderr",
|
||
|
|
"output_type": "stream",
|
||
|
|
"text": [
|
||
|
|
"INFO:src.train:Loaded 175 features\n"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"name": "stderr",
|
||
|
|
"output_type": "stream",
|
||
|
|
"text": [
|
||
|
|
"INFO:src.train:Train size: 29749, Valid: 7527, Test: 23799\n"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"name": "stdout",
|
||
|
|
"output_type": "stream",
|
||
|
|
"text": [
|
||
|
|
"Dataset loaded with 175 features\n"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"name": "stderr",
|
||
|
|
"output_type": "stream",
|
||
|
|
"text": [
|
||
|
|
"INFO:src.train:Training XGBoost model...\n"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"name": "stdout",
|
||
|
|
"output_type": "stream",
|
||
|
|
"text": [
|
||
|
|
"[0]\ttrain-rmse:0.45902\tvalid-rmse:0.41803\n"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"name": "stdout",
|
||
|
|
"output_type": "stream",
|
||
|
|
"text": [
|
||
|
|
"[50]\ttrain-rmse:0.44688\tvalid-rmse:0.41783\n"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"name": "stdout",
|
||
|
|
"output_type": "stream",
|
||
|
|
"text": [
|
||
|
|
"[65]\ttrain-rmse:0.44472\tvalid-rmse:0.41812\n"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"name": "stderr",
|
||
|
|
"output_type": "stream",
|
||
|
|
"text": [
|
||
|
|
"INFO:src.train:train - IC: 0.2707, R²: 0.0596\n"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"name": "stderr",
|
||
|
|
"output_type": "stream",
|
||
|
|
"text": [
|
||
|
|
"INFO:src.train:valid - IC: 0.0799, R²: -0.0001\n"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"name": "stderr",
|
||
|
|
"output_type": "stream",
|
||
|
|
"text": [
|
||
|
|
"INFO:src.train:test - IC: 0.0898, R²: 0.0014\n"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"name": "stdout",
|
||
|
|
"output_type": "stream",
|
||
|
|
"text": [
|
||
|
|
"\n",
|
||
|
|
"Training metrics:\n",
|
||
|
|
" train_ic: 0.2707\n",
|
||
|
|
" train_r2: 0.0596\n",
|
||
|
|
" valid_ic: 0.0799\n",
|
||
|
|
" valid_r2: -0.0001\n",
|
||
|
|
" test_ic: 0.0898\n",
|
||
|
|
" test_r2: 0.0014\n"
|
||
|
|
]
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"source": [
|
||
|
|
"print(\"Loading dataset and training model...\")\n",
|
||
|
|
"\n",
|
||
|
|
"# Load dataset first - we'll use this for both training and prediction\n",
|
||
|
|
"loader = CTA1DLoaderParquet(\n",
|
||
|
|
" return_type=CONFIG['return_type'],\n",
|
||
|
|
" normalization=CONFIG['normalization'],\n",
|
||
|
|
" feature_sets=CONFIG['feature_sets'],\n",
|
||
|
|
" blend_weights=CONFIG['blend_weights'],\n",
|
||
|
|
")\n",
|
||
|
|
"\n",
|
||
|
|
"dataset = loader.load(\n",
|
||
|
|
" dt_range=CONFIG['dt_range'],\n",
|
||
|
|
" fit_range=CONFIG['train_range'] # Use train range for normalization - prevents data leakage\n",
|
||
|
|
")\n",
|
||
|
|
"feature_cols = dataset.features\n",
|
||
|
|
"df_full = dataset.to_pandas().data\n",
|
||
|
|
"print(f\"Dataset loaded with {len(feature_cols)} features\")\n",
|
||
|
|
"\n",
|
||
|
|
"# Create training config\n",
|
||
|
|
"train_config = TrainConfig(\n",
|
||
|
|
" dt_range=CONFIG['dt_range'],\n",
|
||
|
|
" feature_sets=CONFIG['feature_sets'],\n",
|
||
|
|
" normalization=CONFIG['normalization'],\n",
|
||
|
|
" blend_weights=CONFIG['blend_weights'],\n",
|
||
|
|
" model_type='xgb',\n",
|
||
|
|
" model_params=CONFIG['model_params'],\n",
|
||
|
|
" segments={\n",
|
||
|
|
" 'train': (CONFIG['train_range'][0], CONFIG['train_range'][1]),\n",
|
||
|
|
" 'valid': (CONFIG['valid_range'][0], CONFIG['valid_range'][1]),\n",
|
||
|
|
" 'test': (CONFIG['test_range'][0], CONFIG['test_range'][1]),\n",
|
||
|
|
" }\n",
|
||
|
|
")\n",
|
||
|
|
"\n",
|
||
|
|
"# Train model using the pre-loaded dataset to ensure feature consistency\n",
|
||
|
|
"model, metrics = train_model(\n",
|
||
|
|
" config=train_config,\n",
|
||
|
|
" output_dir=None,\n",
|
||
|
|
" dataset=dataset # Pass pre-loaded dataset\n",
|
||
|
|
")\n",
|
||
|
|
"\n",
|
||
|
|
"print(f\"\\nTraining metrics:\")\n",
|
||
|
|
"for key, value in metrics.items():\n",
|
||
|
|
" print(f\" {key}: {value:.4f}\")"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": 4,
|
||
|
|
"metadata": {
|
||
|
|
"execution": {
|
||
|
|
"iopub.execute_input": "2026-02-14T08:13:57.889494Z",
|
||
|
|
"iopub.status.busy": "2026-02-14T08:13:57.889304Z",
|
||
|
|
"iopub.status.idle": "2026-02-14T08:13:58.092001Z",
|
||
|
|
"shell.execute_reply": "2026-02-14T08:13:58.091114Z"
|
||
|
|
}
|
||
|
|
},
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"name": "stdout",
|
||
|
|
"output_type": "stream",
|
||
|
|
"text": [
|
||
|
|
"\n",
|
||
|
|
"Top 10 Features:\n",
|
||
|
|
" feature importance\n",
|
||
|
|
"57 f58 15.151920\n",
|
||
|
|
"22 f22 14.377318\n",
|
||
|
|
"23 f23 10.843798\n",
|
||
|
|
"21 f21 10.386007\n",
|
||
|
|
"64 f68 8.473670\n",
|
||
|
|
"99 f119 7.243452\n",
|
||
|
|
"96 f116 6.621992\n",
|
||
|
|
"97 f117 5.903418\n",
|
||
|
|
"19 f19 4.761685\n",
|
||
|
|
"101 f121 4.529542\n"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"data": {
|
||
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA94AAAJOCAYAAABBfN/cAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjYsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvq6yFwwAAAAlwSFlzAAAPYQAAD2EBqD+naQAAZ6NJREFUeJzt3X98zfX///H7dmZbmCERU6Te+XnszK+FZaI3vSvxJqVs+PhRjETNSH5GUfmVX/1Q1LZkoqhI3uiHaumg1SHsnZAfUWkT5sd2tu8fvs67NbTZXnud83K7Xi4un7PX67mz+zmPPu/tvteP+eXl5eUJAAAAAAAYwt/sAAAAAAAAWBnFGwAAAAAAA1G8AQAAAAAwEMUbAAAAAAADUbwBAAAAADAQxRsAAAAAAANRvAEAAAAAMBDFGwAAAAAAA1G8AQAAAAAwUIDZAQAAkKRRo0bp3XffveSaFi1aKCkpybAMhw4d0owZM/TVV1/p5MmTuuGGG/Twww+rY8eOnjV5eXl69dVXtXTpUv3888+69tprdf/996t///7y8/O76HO3a9dOBw8evOj+N998U82aNSvR1+MN5syZo7lz5+q7775TUFCQ2XEAADAFxRsA4BWefPJJPf74456Px48fr+3bt2vZsmWebWXKlDHs6x87dkyxsbG65pprNGfOHIWGhuqtt97S0KFD9dprrykqKkqSNH/+fL3yyiuaOHGimjVrpq1bt2rs2LFyu90aOHDgJb9G+/btNXHixAvuq1ixYkm/JN12222aOnWqIiMjS/y5rWjkyJGqWbOmHnnkEbOjAAAshuINAPAKISEhCgkJ8XwcFBQkm82ma665plS+/urVq3XgwAElJiYqLCxMkjR69GitX79e7777rqKionT69Gm9+uqr6tOnj7p06SJJqlmzptLT0/XKK6/o//7v/y55VDcoKKjUXs+RI0d06NChYj/P2bNnFRgYWAKJvFd2drbKlCmjb775RjVr1jQ7DgDAgrjGGwDgUz7++GPdd999aty4sRwOhx544AGlpqZ69n/xxReqW7euPv/8cw0ZMkQRERFq2rSpRo0apVOnTl30ebt166bPPvvMU7olyc/PT5UrV9Yvv/wiSdq6dauysrLUpk2bfJ/btm1bnTx5Ulu3bi3268vJydGcOXPUvn17NWrUSNHR0Xruued09uxZzxq3263Zs2erY8eOaty4sVq3bq2hQ4fqwIEDkqRNmzZ5Mvbq1Uvt2rWTJMXGxuq+++7L9/U2bdqkunXr6rPPPpMkvfPOO56P27dvn2/96tWr1aVLF9ntdrVo0ULDhw/XkSNHivT6zs9n06ZN6tevnxo3bqzo6GitWLFCR44c0YABA+RwOBQdHa3k5GTP5y1dulR169aVy+VS7969FR4erltuuUVTp06V2+32rDt+/LjGjx+vqKgoz/s3efLkfLOPjY1VXFycZs+erYiICL355puqW7eu9u3bp7lz56pu3bqe9/KDDz5Q165d1aRJEzVt2lQPPPCAvv76a89z7d27V3Xr1tXq1as1efJkRUZGqmnTpnrooYfyvTd5eXl65ZVX1L59e9ntdnXs2FGJiYn53pvt27erX79+ioiIUHh4uGJiYkrkvykAgPko3gAAn/Hll19q0KBBuvnmm7V06VItXrxY1apVU//+/fX9999Lkmw2myRp8uTJ6tixo9577z2NHj1aH3zwgZ577rmLPndgYKCqVauWb9uhQ4e0c+dORURESJL27NkjSbruuuvyrTv/8fn9xfHUU09pwYIFGjBggFatWqWRI0dq2bJlGj9+vGfNyy+/rJdfflmPPvqo1qxZoxdffFEHDhzQ0KFDJUkRERGaPn26pHPXWP/5dP3CWrBggZ555hm9/PLLks4V0OHDh6tFixZasWKF5s2bpx9++EF9+vTJ90uBvxMQcO5ku5kzZyomJkYrVqzQDTfcoAkTJmjUqFG6//77tWLFCjVt2lTPPPOMpwCfn+v48ePVt29fvf/+++rfv78WLVqkRYsWeZ5/4MCBWr9+vcaPH69Vq1YpISFBK1euVEJCQr4cP/zwg/bs2aPly5era9eu2rBhgySpb9+++vzzz1W9enVt3rxZjz/+uFq3bq0VK1bo7bff1nXXXaeHH37YU6rPv5758+erWrVqevvttzVz5kx9/fXXmjVrlufrzZ07Vy+++KIeeeQRrVq1SgMGDNDUqVP15ptvSpL27dunmJgY5eTkKDExUUuXLlWVKlX0f//3f/rxxx8L/f4CALwTxRsA4DNee+01XXfddZo0aZLq1aunBg0aaOrUqSpbtqzn6Oj5G5xFR0erU6dOuu6669StWzd16tRJ77//vvLy8gr1tc6cOaPHH39coaGh6tOnjyTpjz/+kCSVL18+39rzH5/ff7l+/fVXLVu2TH369FGPHj1Uq1Yt3XnnnYqLi9OKFSs8R95jYmK0du1a3XnnnapRo4YaN26se++9V9u3b9fvv/+uwMBAVahQQZIUGhqqypUrFzlLx44dFRkZ6fllxIsvvqiIiAiNHj1aN954o5o3b65nn31WP/74o9atW1fk57/tttt02223qU6dOurRo4dOnTqlFi1a6Pbbb1ft2rXVp08fud1u7dy5U9L/5tqlSxdFR0fr+uuvV//+/dWiRQu99957kqRvvvlGmzdvVnx8vP75z3+qVq1auuuuuzRgwACtXbs236n3hw4d0vjx41WnTh1VqFBBVapUkSSVLVtW11xzjWw2mxo1aqR169bp0Ucf1fXXX686dero4YcfVlZWVoEj0XXq1NGAAQN0/fXXq02bNrrlllv03XffSTp3uv7rr7+unj17qkuXLrr++ut17733avDgwTpx4oQk6fXXX5ckvfDCC7Lb7apbt66effZZlS9fXm+88UaR318AgHfhGm8AgM9wuVxq3759vruHBwcHq169ep6Cdl6TJk3yfdygQQO98847+uWXXwoc2f6rEydOaNCgQfrvf/+rhQsXeorrpe5aXpj9a9eu9Rw9/6svvvhCLpdLbre7wM3QbrnlFuXm5iotLU0dOnSQJC1atEgbN27U0aNH5Xa7lZOTI0nKyMi4rKL9V40aNfI8PnHihH744YcCN49r0KCBKlasqK1bt+rOO+8s0vPXq1fP8/j8jeUaNGjg2VapUiVJ504d/7O/zrVhw4aeX7q4XC5JUvPmzfOtcTgckqSdO3eqRo0aks5dm/93N7QLCgrSunXr9N577+ngwYPKzs72/OImMzMz39rw8PB8H4eGhmrbtm2Szp0JceLECTVs2DDfmsGDB3sef/vtt6pbt26+TEFBQYqIiOB0cwCwAIo3AMBnnDhxQqGhoQW2h4aGFrjW+M83apOkq666SpJ0+vTpS36NX375RQMGDNDRo0eVlJSk+vXre/adP4p8/PhxlS1bNl+u8zkuJSoqSqNHj77gvquuuspTMuPi4uTv/7+T0s6XvV9//VXSuTvAf/755xo5cqQiIiIUHBystWvXatq0aZf8+kXx5/fv/Ot77bXXClyXfOrUKU+uoggODvY8Pv8Liwtt++sZCheaa3Z2tnJyci46h/NzO7//z9suJTk5WVOnTlW/fv30r3/9SxUqVNCRI0cUGxtbYO2f/3s4n/989vNz/fPr+6vjx4/r4MGDBX4xc/bs2UJlBQB4N4o3AMBnhISEFDjSKJ07+vjXQnbs2LF8H588eVKSVK5cuYs+f2Zmpue08qVLl3qOjp534403SpJ++umnfEfNz1/bfX7/xZQtW1a1atW66P7zhfH5559X3bp1C+yvXLmyzp49q/Xr16tfv37q0aOHZ9/fHW2/mL/7RYT0v7Lbu3fvAjdnkwqWTiNdaK5BQUEKCAjw5MzMzMx3OcD5/2aKWmBXr14th8OR7/rwv379wvhzroupUKGCrr32Wk2ePLnAvj//EgYA4Jv4X3IAgM8IDw+X0+nMdxT05MmT+v7772W32/Ot/eabb/J9vGPHDlWuXNlzLe9f5eXlaejQocrNzVVycnKB0i2du2lZaGio50Zc523YsEEVK1b0nNJ8uRo1aiSbzabDhw+rVq1ann/
|
||
|
|
"text/plain": [
|
||
|
|
"<Figure size 1000x600 with 1 Axes>"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"metadata": {},
|
||
|
|
"output_type": "display_data"
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"source": [
|
||
|
|
"# Feature importance\n",
|
||
|
|
"importance = model.get_score(importance_type='gain')\n",
|
||
|
|
"importance_df = pd.DataFrame([\n",
|
||
|
|
" {'feature': k, 'importance': v}\n",
|
||
|
|
" for k, v in importance.items()\n",
|
||
|
|
"]).sort_values('importance', ascending=False)\n",
|
||
|
|
"\n",
|
||
|
|
"print(\"\\nTop 10 Features:\")\n",
|
||
|
|
"print(importance_df.head(10))\n",
|
||
|
|
"\n",
|
||
|
|
"# Plot\n",
|
||
|
|
"fig, ax = plt.subplots(figsize=(10, 6))\n",
|
||
|
|
"importance_df.head(20).plot(x='feature', y='importance', kind='barh', ax=ax)\n",
|
||
|
|
"ax.set_title('Top 20 Feature Importance')\n",
|
||
|
|
"plt.tight_layout()\n",
|
||
|
|
"plt.show()"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "markdown",
|
||
|
|
"metadata": {},
|
||
|
|
"source": [
|
||
|
|
"## 3. Feature Importance"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": 5,
|
||
|
|
"metadata": {
|
||
|
|
"execution": {
|
||
|
|
"iopub.execute_input": "2026-02-14T08:13:58.094128Z",
|
||
|
|
"iopub.status.busy": "2026-02-14T08:13:58.093959Z",
|
||
|
|
"iopub.status.idle": "2026-02-14T08:13:58.526368Z",
|
||
|
|
"shell.execute_reply": "2026-02-14T08:13:58.525387Z"
|
||
|
|
}
|
||
|
|
},
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"name": "stdout",
|
||
|
|
"output_type": "stream",
|
||
|
|
"text": [
|
||
|
|
"Running backtest on test set...\n",
|
||
|
|
"\n",
|
||
|
|
"Predictions generated: 23799 samples\n",
|
||
|
|
"Signal statistics:\n",
|
||
|
|
" Mean: 0.0242\n",
|
||
|
|
" Std: 0.0614\n"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"data": {
|
||
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABWgAAAGGCAYAAADmcmilAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjYsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvq6yFwwAAAAlwSFlzAAAPYQAAD2EBqD+naQABAABJREFUeJzsnXmYFOW9/U9V9+wzzMIuCBiQfQZQVNwVNSJxi4EIXqO5GkJcYlwSl1wNJvqLiQY10eiVRIWbRYxo3OKCSqIh4i6ygwuI7AwzA8Ns3V31/v6ovbqql5neZvp8noeH6arqqrf2rlPnPV9JCCFACCGEEEIIIYQQQgghJOPI2W4AIYQQQgghhBBCCCGE5CsUaAkhhBBCCCGEEEIIISRLUKAlhBBCCCGEEEIIIYSQLEGBlhBCCCGEEEIIIYQQQrIEBVpCCCGEEEIIIYQQQgjJEhRoCSGEEEIIIYQQQgghJEtQoCWEEEIIIYQQQgghhJAsQYGWEEIIIYQQQgghhBBCsgQFWkIIIYQQQgghhBBCCMkSFGgJITnFqlWrcM0112Dq1Kmora3FlClTMHv2bDz77LOO6aZOnYrrrrsuK23ctm0bRo0ahSeeeMJ3mgceeACjRo0y/40bNw4nnHACfvCDH2Dp0qVR03/nO9/Bt7/97ZS31WhHR0eHuZzvfOc7KV+O17IIIYQQQnoqN998M0aNGoVbbrnFd5qrr74ao0aNwgMPPJDBlsWnqakJ999/P77xjW/giCOOwKRJk3DmmWfiV7/6FQ4cOGBOl+3fdvF+Hz/zzDMYNWoUtm3blvJlT5061fFbfuLEiZg2bRpuv/12bN68OeXLI4QQCrSEkJzh3XffxezZs1FYWIj58+fj1VdfxaOPPoqxY8fipptuwsKFC81plyxZgl/84hfZa2yCLFu2DMuXL8frr7+O3/3udzjssMNwww034JprrkEkEjGne+CBB7BgwYKE5/vOO+9g6tSpcae77LLLsHz5chQVFXWq/bG46aabHA8c6VwWIYQQQkiuUVpaiqVLl6K9vT1q3IEDB/Dmm2+ipKQkCy3zJxQK4eKLL8bLL7+MH/3oR3j22WfxzDPPYO7cuXj22Wdx6aWXQlVVAPxtd9ppp2H58uVYvnw5nnvuOdxwww344osvcN555+Gll15Ken6J/n4nhOQnwWw3gBBCDJ544gn07dsX99xzDyRJAgAccsghGDduHNra2rB27Vpz2pqammw1Myn69Olj/qgdOHAgjjjiCJx88sm4/PLL8eCDD+Laa68FAFRVVSU1348//jjm+EgkgkAggLKyMpSVlXWm6Qm1YfDgwebndC6LEEIIISTXGDNmDD777DO88cYb+MY3vuEY9+qrr2LIkCFoa2vLUuu8WbFiBT799FM8/vjjOO6448zhhx12GKqrq/Hggw/iiy++wIgRI/L+t11RURH69u1rfh46dChOP/103HbbbbjxxhsxcuRIjBgxIuH5xfv9TgjJb+igJYTkDOFwGIqiIBwOR4375S9/iXvuucf87I44+Oyzz/Cd73wHdXV1OOGEE/Dwww9j4cKFGDVqlDm/WbNm4corr8Rrr72Gs846C7W1tTjrrLPwxhtvOJb14osv4oILLsARRxyBI488ErNnz8Z7772XsvWcMmUKLrjgAixatMgRPWDvwvXaa6/hW9/6Fo444ggcccQRmDVrFt5++20AWpe6+++/H9u3bze7zRmxC0899RRmzZqFuro6NDc3+3ZNe+GFF/D1r38d48ePx/Tp0/Hmm2+a4/y+M2rUKPzmN78x//7yyy/x4IMPml3LvL73zDPP4JxzzkFtbS2OPPJIXH755Q6h/W9/+xtGjRqFTz/9FHPnzsXEiRNx/PHH4+c//7nDYUwIIYQQkmsEAgGccsopeO6556LGPf/88zjttNOihkciETzwwAM47bTTMH78eJx88sm4++67EQqFzGkURcHvfvc7nHnmmairq8Pxxx+Pa665xtGVv7O/oYzfxfblGZx66ql4+umnTdHR/dsuHA7jzjvvxJQpUzBp0iT84Ac/wGeffYZRo0bhb3/7W1Lt+vLLL/HDH/4Qxx13HGpra3HGGWfg4YcfNt27ybB9+3b893//NyZMmIBjjjkGv/rVr6AoChoaGlBbW+sZMXH55Zdj5syZSS9LkiT89Kc/RUlJCR5//PGE18fr9zsAtLa24s4778RJJ52E8ePH44wzzsCCBQsghEi6bYSQ7g0FWkJIznDKKadgz549mD17Nl577TU0Nzcn9L1QKITvf//72LVrF/7whz9g0aJF2LRpk5kRW1BQYP7/2Wef4W9/+xvmz5+PZ599Fn369MGNN96IgwcPAgA++OAD3HDDDTj++OPx7LPP4qmnnsKhhx6KuXPnYvfu3Slb11NPPRWtra1YvXp11LjNmzfj2muvxZlnnonnnnsOTz31FGpra/H9738fO3fuxP/8z//gtNNOw4ABA7B8+XJcdtll5ncff/xxzJgxA6+++irKy8s9l71582b8/e9/x29+8xssWbIE/fv3xw9/+EPs2rUr4fYvW7YMgNX1beDAgVHTLFmyBLfccgumTp2KZ599Fo899hhCoRAuueQSc1sGg1pHjnnz5uGb3/wmXnzxRVx22WX461//ihdeeCHh9hBCCCGEZINvfOMb+M9//oN9+/aZw3bu3In3338f06dPj5r+F7/4Bf7whz9gzpw5+Mc//oGbbroJS5Yswbx588xpHnnkETzyyCP40Y9+hFdeeQUPP/wwtm3bhmuuucacprO/oY444gj06tUL119/Pf74xz/iyy+/THhdH3zwQfzlL3/BFVdcgWeffRannXaaaZgw2pNIu4QQ+P73v4/t27fjkUcewSuvvIJrrrkGv//97/GXv/wl4fYY3HnnnZg5cyaee+45zJkzBwsXLsSiRYtQU1ODr3/963j22WcdgmdDQwPeeeedTgm0gBZtMWXKFLz77rsJr4/f7/cf/vCHeOGFF3DzzTfjH//4By6//HI88MAD+P3vf9+pthFCui8UaAkhOcOMGTPwgx/8AJ9++imuvvpqHH300bjgggtw7733xgzjf//997F9+3bccMMNOOaYYzB8+HDcfffdnnlgu3btwq9//WuMHTsWw4cPx8UXX4yDBw/iiy++AACMHz8er7/+On70ox9hyJAh+NrXvoa5c+eitbUVH330UcrW9ZBDDgEA7NmzJ2rc+vXrEYlEcMEFF+DQQw/F8OHDccstt+BPf/oTevXqhYqKChQVFSEQCKBv376OrmcjRozAjBkzcOihh0KWvS/xjY2N+PWvf426ujqMHj0ad955Jzo6OvDKK68k3P4+ffoA0H6g9u3bF4FAIGqaP/zhDzjmmGNw3XXXYfjw4ZgwYQJ+85vfoK2tDUuWLHFMO336dEybNg2DBw/GZZddhrKyMqxatSrh9hBCCCGEZIPjjz8elZWVDlH0xRdfxOGHH47Ro0c7pt27dy+WLFmC7373u5g1axaGDh2K6dOn48orr8Szzz5r/i68+OKLsXTpUkyfPh2HHHII6urqMGPGDKxduxYNDQ2OeSb7G6qmpgYPPvggampqcM899+DrX/86TjrpJNx0001YtmxZTOfmM888g5NOOgmXXnophg4dipkzZ+Lkk0/2nDZeu/7v//4Pjz76KGprazFo0CCcc845GDNmDP7973/7b2wfzj//fEyfPh3Dhg3D9773PRx11FGmq3n27NnYtm2bKaYCwCuvvILCwkJPAT1RBg4c6PgdH299vH6/r1q1CsuXL8f111+P6dOnY+jQoZg1axYuvPBCLFy40NPlTAjpuVCgJYTkDJIk4brrrsPy5ctx33334dvf/jba2trwyCOPYPr06b5v1D/99FMAwIQJE8xhBQUFOPHEE6OmHTJkiCO/trKyEgCwf/9+AFrW1Ouvv46ZM2fi6KOPxqRJk/Ctb30LgFbxNlUYP7gKCwujxh155JHo3bs3LrnkEixcuBAbNmyALMuYNGlS3Byw8ePHx132kCFDHHlagwYNQmVlpSlSp4KDBw9iy5YtOProox3D+/fvjwEDBmDDhg2
|
||
|
|
"text/plain": [
|
||
|
|
"<Figure size 1400x400 with 2 Axes>"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"metadata": {},
|
||
|
|
"output_type": "display_data"
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"source": [
|
||
|
|
"print(\"Running backtest on test set...\")\n",
|
||
|
|
"\n",
|
||
|
|
"# Filter test data by date (datetime index is now datetime64 type)\n",
|
||
|
|
"# Use string dates for comparison since pandas will convert them appropriately\n",
|
||
|
|
"test_start = CONFIG['test_range'][0]\n",
|
||
|
|
"test_end = CONFIG['test_range'][1]\n",
|
||
|
|
"\n",
|
||
|
|
"# Get test data by filtering the pandas DataFrame\n",
|
||
|
|
"df_test = df_full.loc[\n",
|
||
|
|
" (df_full.index.get_level_values(0) >= test_start) &\n",
|
||
|
|
" (df_full.index.get_level_values(0) <= test_end)\n",
|
||
|
|
"].copy()\n",
|
||
|
|
"\n",
|
||
|
|
"# Extract features and labels\n",
|
||
|
|
"X_test = df_test[feature_cols].values\n",
|
||
|
|
"y_test = df_test['label'].values\n",
|
||
|
|
"\n",
|
||
|
|
"# Get predictions\n",
|
||
|
|
"import xgboost as xgb\n",
|
||
|
|
"dtest = xgb.DMatrix(X_test)\n",
|
||
|
|
"predictions = model.predict(dtest)\n",
|
||
|
|
"\n",
|
||
|
|
"# Create signal Series with proper index for backtest\n",
|
||
|
|
"signal_series = pd.Series(predictions, index=df_test.index)\n",
|
||
|
|
"\n",
|
||
|
|
"print(f\"\\nPredictions generated: {len(predictions)} samples\")\n",
|
||
|
|
"print(f\"Signal statistics:\")\n",
|
||
|
|
"print(f\" Mean: {predictions.mean():.4f}\")\n",
|
||
|
|
"print(f\" Std: {predictions.std():.4f}\")\n",
|
||
|
|
"\n",
|
||
|
|
"# Plot signal distribution\n",
|
||
|
|
"fig, axes = plt.subplots(1, 2, figsize=(14, 4))\n",
|
||
|
|
"\n",
|
||
|
|
"axes[0].hist(predictions, bins=100, edgecolor='black')\n",
|
||
|
|
"axes[0].set_title('Signal Distribution')\n",
|
||
|
|
"axes[0].axvline(x=0, color='red', linestyle='--')\n",
|
||
|
|
"\n",
|
||
|
|
"# Group by date for mean signal plot\n",
|
||
|
|
"signal_by_date = signal_series.groupby(level=0).mean()\n",
|
||
|
|
"axes[1].plot(signal_by_date.index, signal_by_date.values)\n",
|
||
|
|
"axes[1].set_title('Mean Signal by Date')\n",
|
||
|
|
"axes[1].axhline(y=0, color='red', linestyle='--')\n",
|
||
|
|
"\n",
|
||
|
|
"plt.tight_layout()\n",
|
||
|
|
"plt.show()"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "markdown",
|
||
|
|
"metadata": {},
|
||
|
|
"source": [
|
||
|
|
"## 4. Generate Predictions"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": 6,
|
||
|
|
"metadata": {
|
||
|
|
"execution": {
|
||
|
|
"iopub.execute_input": "2026-02-14T08:13:58.528619Z",
|
||
|
|
"iopub.status.busy": "2026-02-14T08:13:58.528462Z",
|
||
|
|
"iopub.status.idle": "2026-02-14T08:13:58.798553Z",
|
||
|
|
"shell.execute_reply": "2026-02-14T08:13:58.797346Z"
|
||
|
|
}
|
||
|
|
},
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"name": "stdout",
|
||
|
|
"output_type": "stream",
|
||
|
|
"text": [
|
||
|
|
"\n",
|
||
|
|
"Backtest Summary:\n",
|
||
|
|
" 0: ic_mean 0.074090\n",
|
||
|
|
"ic_std 0.196369\n",
|
||
|
|
"ic_ir 0.377298\n",
|
||
|
|
"ic_positive_ratio 0.659401\n",
|
||
|
|
"annual_return 20.731708\n",
|
||
|
|
"annual_volatility 3.728406\n",
|
||
|
|
"sharpe_ratio 5.560475\n",
|
||
|
|
"max_drawdown -0.758262\n",
|
||
|
|
"Name: 0, dtype: float64\n"
|
||
|
|
]
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"source": [
|
||
|
|
"# Run backtest using CTABacktester\n",
|
||
|
|
"from qshare.eval.cta.backtest import CTABacktester\n",
|
||
|
|
"\n",
|
||
|
|
"# Create return Series from test data (need actual returns, not normalized labels)\n",
|
||
|
|
"# The 'return' column should have the raw returns if available\n",
|
||
|
|
"if 'return' in df_test.columns:\n",
|
||
|
|
" return_series = df_test['return']\n",
|
||
|
|
"else:\n",
|
||
|
|
" # Use label as proxy (it's normalized returns)\n",
|
||
|
|
" return_series = df_test['label']\n",
|
||
|
|
"\n",
|
||
|
|
"backtester = CTABacktester(\n",
|
||
|
|
" num_trades=CONFIG['num_trades'],\n",
|
||
|
|
" signal_dist=CONFIG['signal_dist'],\n",
|
||
|
|
" pos_weight=CONFIG['pos_weight']\n",
|
||
|
|
")\n",
|
||
|
|
"\n",
|
||
|
|
"results = backtester.run(return_series, signal_series)\n",
|
||
|
|
"summary = backtester.summary()\n",
|
||
|
|
"\n",
|
||
|
|
"print(\"\\nBacktest Summary:\")\n",
|
||
|
|
"for key, value in summary.items():\n",
|
||
|
|
" if isinstance(value, float):\n",
|
||
|
|
" print(f\" {key}: {value:.4f}\")\n",
|
||
|
|
" else:\n",
|
||
|
|
" print(f\" {key}: {value}\")"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": 7,
|
||
|
|
"metadata": {
|
||
|
|
"execution": {
|
||
|
|
"iopub.execute_input": "2026-02-14T08:13:58.801046Z",
|
||
|
|
"iopub.status.busy": "2026-02-14T08:13:58.800882Z",
|
||
|
|
"iopub.status.idle": "2026-02-14T08:13:58.965447Z",
|
||
|
|
"shell.execute_reply": "2026-02-14T08:13:58.964134Z"
|
||
|
|
}
|
||
|
|
},
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"data": {
|
||
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABW0AAAGGCAYAAAAAW6PhAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjYsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvq6yFwwAAAAlwSFlzAAAPYQAAD2EBqD+naQABAABJREFUeJzsnXd4HMX9/99XdOrNVXJv2LgigynGlFBCNx1imoEvLRhTEmIgYAIJvYaAQ34mQGw6mGpCCcXg0NwwtuXebRVLsqRTO12//f1xmfXc3O7d7t3e3Ur6vJ7Hj6XT3uzs7uyU97znMxZJkiQQBEEQBEEQBEEQBEEQBEEQpsCa6QwQBEEQBEEQBEEQBEEQBEEQByDRliAIgiAIgiAIgiAIgiAIwkSQaEsQBEEQBEEQBEEQBEEQBGEiSLQlCIIgCIIgCIIgCIIgCIIwESTaEgRBEARBEARBEARBEARBmAgSbQmCIAiCIAiCIAiCIAiCIEwEibYEQRAEQRAEQRAEQRAEQRAmgkRbgiAIgiAIgiAIgiAIgiAIE0GiLUEQBEEQBEEQBEEQBEEQhImwZzoDBEEQBEEQROp47rnnMG/ePKxbtw7Z2dny5263G6+88go+++wz7NmzBwAwYMAAnHTSSbjqqqvQq1evuGlXVlbiX//6F1atWoXm5mYUFxdjzJgxuOiii3D66aen7JriceKJJ6KmpibmMbNnz0ZNTQ2+++47/PDDD2nKmTJ/+tOfsH79evzud7/DtddeG/f4r7/+GoMGDUpDzoCWlhYsWLAAX375Jfbt2wdJktCvXz+ccMIJmDVrFoqKihJO++uvv8acOXPw5ptvYsyYMQbmmiAIgiAIoutjkSRJynQmCIIgCIIgiNSgJNo6nU5cddVVaGlpwezZs3H44YdDkiT8/PPPeO6552C1WvHKK69g8ODBqum+8847uP/++3HqqadixowZGDx4MFpbW/Hll1/ixRdfxOmnn45HH30UFoslXZcq09zcjGAwKP9+9tln48gjj8Q999wjf5aXl4dQKAS/369JoE4Vb775Jp566il89NFH6Nu3L1pbW+W/LV++HLfffjuee+45TJ48Wf68V69esNlsSZ132bJluPvuu7FkyRLVY3w+H84//3z4/X7cfvvtOPjggxEMBvHLL7/g8ccfR3l5Od577z1YrdoX711++eU4//zzcf755wMAnnjiCXz22Wf48MMPkxKACYIgCIIguhvktCUIgiAIguhh/PnPf0Z9fT0+/PBDlJWVyZ8PHz4cxxxzDM466yzMmzcPjz32mOL3N2/ejD//+c+YOXMm7rrrLvnzAQMGYOzYsTjkkENw/fXXY9KkSbjssstSdh2BQAA2my1KGBZFWKvVipycHPTt2zdleUmElpYWPP300/i///s/DBw4EAAi8shEzOLiYsPz/ssvv8Q95qeffsK2bdvwr3/9C0cffbT8+fDhw1FaWop58+Zh586dGDVqlKZzBgIBrF+/XhZsAWDWrFl47733MH/+fMyZM0f/hRAEQRAEQXRTKKYtQRAEQRBED6Kmpgaff/45rrnmmgjBllFWVoYPP/wQjzzyiGoar7zyCvLz8/G73/1O8e/HH388TjjhBCxcuBAAMGfOHBx33HEQF3h98sknGDNmDDZu3AgA2Lt3L26++WYcfvjhmDhxIs4//3x888038vHV1dUYM2YMFi1ahBkzZmDSpElob2/XfQ8Yd911F6ZNmyb/fuyxx+LJJ5/EvHnzMHXqVEyePBl/+MMf4PV6MW/ePEybNg2HHnoobr755ojzBgIBPPfcczjppJMwYcIEHH/88Xj88cfh8/linv+VV15BIBDAzJkzdeW7qakJf/zjHzF16lRMmDABZ555Jt59992IY7788ktccMEFOPTQQ3HooYdixowZ+PHHH+XrfuaZZ1BTU4MxY8bgueeeUzyP3+8HAMXrOOGEE/Dee+9FCLYbNmzANddcg8mTJ+OQQw7B5ZdfjtWrVwMIP7vx48fD7Xbjj3/8oxwOIT8/H1dccQVee+21CJcxQRAEQRBET4dEW4IgCIIgiB7EypUrIUkSjj/+eNVjBg0aFHPJ+/Lly3HUUUdFxMgVOeGEE7Bnzx5UVVVh+vTpqK+vj3J3fvrppzjooIMwbtw4tLS04NJLL8XevXvxj3/8Ax9++CEOPfRQzJo1C8uWLYv43r/+9S9ceOGF+M9//oOCggKNVx6frKwsfPnll/B6vXjzzTdx66234uOPP8bVV18Nl8uF1157DX/+85/xxRdf4JVXXpG/95e//AX//Oc/cd111+GTTz7BnXfeiXfffRf33XdfzPN9+eWXOOKII3Rdg8/nw1VXXYVly5bh0Ucfxccff4zp06fjnnvuwYcffggA2LVrF2677Taceuqp+Oijj7Bo0SJMnDgR119/Pfbt24d77rkHJ510EsrKyvD999/j//7v/xTPdeihh6KoqAi///3v8eKLL8qxj5XYs2cPLr/8cgQCAbzyyit455130KdPH1x99dXYuXMnysvL8frrrwMA7r77bnz//ffyd0866SR4PJ6IzwiCIAiCIHo6JNoSBEEQBEH0IBoaGgCEQxkkk0Z5eXnMY1j69fX1OProo9G7d298/vnn8t87Ojrw3//+F2effTYA4N1330VjYyP++te/YsqUKRg5ciTuuecejB49Gi+++GJE2qNGjcKFF16IwYMH64qnqpXbb78dw4YNw5VXXom8vDw0NTXhjjvuwPDhwzF9+nSMGjVKdgfv378f7777Lq666irMmDEDQ4cOxRlnnIFZs2bhww8/lO+3SGtrK7Zu3YopU6boytvXX3+NrVu34oEHHsDxxx+P4cOH47e//S1OPPFEzJ8/HwCwadMmBAIBnH/++Rg8eDBGjhyJP/7xj3j11VdRVFSEwsJCZGdnw2azoW/fvsjPz1c8V69evTBv3jz06tULTzzxBE455RQcd9xxuPPOO7FkyZII5/SCBQsAAH/7298wceJEjBkzBo899hgKCgqwcOFC2Gw2lJaWAgAKCwsjwj0cfPDBKCoqwvLly3XdC4IgCIIgiO4MibYEQRAEQRA9CLs9vKVBKBRKOI2srKy432d/t9lssNvtOP300/HFF1/IQt9XX32FQCAgi7Zr165F3759MWLECDkNi8WCo446Sl5iz5gwYULCeY/HwQcfHHH+kpISHHzwwRFxc0tLS9HW1gYAqKysRDAYxJFHHhmRzlFHHYVQKIQ1a9Yonmf//v0AoDtW7dq1a2GxWHDEEUdEfD516lTs3LkTLS0tOOyww9C7d2/MnDkTCxYswObNm2G1WjF58mRVgVaNI488El988QVef/11zJ49G0OHDsUnn3yCG2+8EVdeeSW8Xq+crzFjxqCkpET+bnZ2NiZPnhz1/JTo27evfE8IgiAIgiAI2oiMIAiCIAiiR9G/f38A4fixiYqfZWVlqKqqinlMbW0tAMiO3OnTp+O1117D2rVrUVFRgc8++wxHHHGEHFe3vb0djY2NmDx5ckQ6fr8ffr8fbrdb/qywsDChfGshNzc34neLxaL4GYPFtp01a1aE65eJ02pCJIvfyjYb00p7ezskSYoSiQOBAACgsbERo0aNwqJFi/Dyyy9jwYIFeOSRRzBw4EDcdNNNuOCCC3SdDwhv5DZlyhTZFdzS0oJnnnkGb775Jt58801cddVVaG9vR01NTdTz8/l8mq6xqKhIFsIJgiAIgiAIEm0JgiAIgiB6FFOnToXNZsOXX36pKtp+//33KCoqwqRJkxT/fuyxx+Kdd95BR0eHajzWpUuXYsyYMbIoW1FRgSFDhuDzzz/H8OHD8cMPP+Avf/mLfHxRUREGDRoUFQqBESt+biYpLi4GADzxxBPy5lo8vXr1ivk9vUJlUVERsrOz5fi1IkwkHzhwIO69917ce++92L59O1577TXcfffdGDx4cJRLVw1JkuB0OqOuoaSkBPfddx8+++wzbNmyRc5XWVkZHnzwwah0tISwaGtrw9ChQzXliyAIgiAIoidA4REIgiAIgiB6EL169cL06dPxyiuvYMeOHVF/r62txZ133okXXnh
|
||
|
|
"text/plain": [
|
||
|
|
"<Figure size 1400x400 with 1 Axes>"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"metadata": {},
|
||
|
|
"output_type": "display_data"
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"name": "stdout",
|
||
|
|
"output_type": "stream",
|
||
|
|
"text": [
|
||
|
|
"\n",
|
||
|
|
"IC Statistics:\n",
|
||
|
|
" Mean: 0.0741\n",
|
||
|
|
" Std: 0.1964\n",
|
||
|
|
" IR: 0.3773\n"
|
||
|
|
]
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"source": [
|
||
|
|
"# IC Analysis\n",
|
||
|
|
"# The results from CTABacktester is a dict, extract the IC series\n",
|
||
|
|
"if isinstance(results, dict):\n",
|
||
|
|
" # Results is a dict with various metrics\n",
|
||
|
|
" ic_by_date = results.get('df_ic', pd.Series())\n",
|
||
|
|
"else:\n",
|
||
|
|
" # Try the old approach if results is a DataFrame\n",
|
||
|
|
" ic_by_date = results.groupby(results.index.get_level_values(0))['ic'].mean()\n",
|
||
|
|
"\n",
|
||
|
|
"if len(ic_by_date) > 0:\n",
|
||
|
|
" fig = plot_ic_series(ic_by_date, title=\"IC Over Time (Test Set)\")\n",
|
||
|
|
" plt.show()\n",
|
||
|
|
"\n",
|
||
|
|
" print(f\"\\nIC Statistics:\")\n",
|
||
|
|
" print(f\" Mean: {ic_by_date.mean():.4f}\")\n",
|
||
|
|
" print(f\" Std: {ic_by_date.std():.4f}\")\n",
|
||
|
|
" print(f\" IR: {ic_by_date.mean() / ic_by_date.std():.4f}\")\n",
|
||
|
|
"else:\n",
|
||
|
|
" print(\"No IC data available in results\")"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": 8,
|
||
|
|
"metadata": {
|
||
|
|
"execution": {
|
||
|
|
"iopub.execute_input": "2026-02-14T08:13:58.968285Z",
|
||
|
|
"iopub.status.busy": "2026-02-14T08:13:58.968127Z",
|
||
|
|
"iopub.status.idle": "2026-02-14T08:13:59.499839Z",
|
||
|
|
"shell.execute_reply": "2026-02-14T08:13:59.498582Z"
|
||
|
|
}
|
||
|
|
},
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"data": {
|
||
|
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABKEAAAJOCAYAAABvBRRKAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjYsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvq6yFwwAAAAlwSFlzAAAPYQAAD2EBqD+naQAAw7tJREFUeJzs3Xd8VFX+//HXvXcmnSQEQgsQSiC00AVpKooFsbuWxYqIbfWrrr/ddVdXV1fX7tp2XcF1sa0Nsa0FBZWiKCK9IxAgBEJLII3MzL3398fAYAyBBCbMJLyfjwcPMufee+7n5jAk+eSczzFc13URERERERERERGpQ2akAxARERERERERkYZPSSgREREREREREalzSkKJiIiIiIiIiEidUxJKRERERERERETqnJJQIiIiIiIiIiJS55SEEhERERERERGROqcklIiIiIiIiIiI1DkloUREREREREREpM4pCSUiIiIiIiIiInVOSSgREZEwmzFjBtdddx3HH388PXr04KSTTuLWW29l7ty5kQ6tku+//57s7GxmzJhxRP08++yzZGdnU1FREabIqldUVMRTTz3FqFGj6Nu3L3369OH000/n4YcfZvfu3XV+/2gxefJksrOzq/wZMGAAl1xyCZ988kmkQxQRERGpQkkoERGRMHrqqae4/vrradeuHePHj2fKlCk89NBDlJeXc8UVV/DWW29FOsQj9oc//IFnn3029Pqaa65h1qxZxMbG1ul9fT4fl19+OZ9++im33nor77//PpMnT+b666/n/fff56qrrsJxnNC5PXr0IC8vLyz3fvfdd7niiivC0lc4vf7668yaNSv055VXXqFv377cfvvt/Oc//6l1f8OHD+f777+vg0hFREREwBPpAERERBqK6dOn8/zzz/OXv/yFX//616H2jIwMjj/+eG699VYef/xxzjjjDFJSUiIY6ZGZP38+rVu3Dr1OTEwkMTGxzu87e/ZsVq9ezX/+8x8GDx4cam/fvj2NGzfmueeeY+3atWRlZbF48WL8fn+1fbmui23beDw1+1Zo/vz5Rxx/XWjcuDHp6emh1+np6XTp0oVVq1bxn//8hzFjxtS4r4KCAvLz8484Jp/PR0xMzBH3IyIiIg2PZkKJiIiEyUsvvURWVlalBNQ+hmFw//33M23atFAC6uSTT+b222+vdN6+ZVZr1qwB4O9//ztDhgxh/vz5nH322eTk5HDOOeewdOlS5s6dy3nnnUfPnj0566yz+OGHH0L93HnnnQwZMqRS33l5eWRnZ/PGG29U+wz/+9//uOCCC+jbty/9+vXj17/+NXPmzAkdz87OZv369Tz33HNkZ2eTl5dXaTne7373O0444QRc163U78cff0x2djbLli0DYMOGDdxyyy0cd9xx5OTkcMEFF/DVV18d9PO7L6nk8/mqHBs+fDjvvvsuWVlZTJ48mdGjRwNwyimnhGYwnXzyyTzwwAP86U9/olevXnz99dcALF68mLFjxzJw4EB69erFmWeeyZtvvhnq+4orruCdd95hzpw5ZGdnM3nyZAB27NjBH//4RwYNGkSPHj0YNWoUkyZNqhRXQUEBN9xwA71792bgwIE8+OCDfPLJJ2RnZ5Obm8sjjzxCnz59KC0trXTdggULyM7OZvr06Qf9nFQnOzubrVu3hmaGAXz77bdceuml9OrVi759+3LdddeF/p19//33nHDCCQBceeWVnHzyyaFnv/jiiyv1/ctlnPv+zc6YMYNTTjkldP6ll17KTTfdxBdffMHIkSPJyclh5MiRTJs2LdTXrl27uOuuuxg2bBg9evTgxBNP5IEHHmDPnj2H9dwiIiIS3ZSEEhERCQO/38+8efNCP8gfSGpqKsnJybXq1+PxsGfPHp5//nkeeughXnvtNQoLC7nzzjt5+umnuf/++3nrrbdwXZc//vGPR/QMc+fO5Y477mDIkCG8//77vPPOO7Rp04brr7+egoICAL788ktg/xK8li1bVurj7LPPpqCgoMrMoU8++YROnTrRrVs3ioqKGD16NBs2bOD555/n/fffp2/fvtx0001899131cbXt29fkpOT+e1vf8uLL77I+vXrD3jemWeeyf/7f/8PgHfeeafS0sGZM2eSmJjIRx99xKBBgygtLWXMmDGYpskrr7zCxx9/zCWXXMK9994betZnn32W7t2706dPH2bNmsWZZ56Jz+fj6quv5rvvvuPhhx/mo48+4uyzz+auu+7i/fffD93v1ltvZcGCBTz55JO8/fbbuK7L008/DQTH9uKLL6a8vJwpU6ZUeob//e9/tGzZkmHDhlX7+TiYtWvX0rJlS0wz+K3e3Llzufbaa2nVqhVvv/02EydOpLy8nMsvv5ydO3fSp08fnnjiidDz/jKZVhMTJkzgb3/7Gy+88AIAXq+Xn376ibfffpsnnniC999/n6ZNm/L73/+ekpISAB544AEWLVrEM888w+eff84DDzzAtGnTeOihhw7ruUVERCS6KQklIiISBkVFRfh8vipJmXAoKSnhxhtvpEePHvTq1YtTTz2VVatWcdttt9GzZ0+6du3K+eefz8aNGykuLj7s+/To0YOpU6dy66230rZtWzp06MD1119PWVkZ8+bNA6Bp06YAJCQkkJ6ejmVZlfoYPHgwTZo04bPPPqsU/4wZMzjnnHMAmDRpEtu3b+fvf/87/fv3p2PHjtx111107tyZF198sdr40tLSeO6550hLS+Oxxx7jtNNO44QTTuAPf/gDX375ZWj2VVxcHElJSaFrUlNTK8Vy5513kpmZSWJiInFxcXz00Uf8/e9/Jzs7m9atW3PVVVfRpEkTZs6cCQSThx6PB6/XS3p6OnFxcUybNo1Vq1bx17/+lRNPPJH27dtzww03cPLJJ4eSMLm5ucyfP59x48Zx8sknk5mZyd13311pKWb79u0ZOHBgaHYVgOM4fPbZZ1xwwQWhJFJNlZWV8fLLL/PVV19x2WWXhdrHjx9P8+bNeeSRR8jOzqZnz548+eSTFBcXM2nSJGJiYkIJ0pSUFNLS0mp1X4DTTz+dgQMH0rx581Dbli1beOSRR+jWrRsdO3bk8ssvp6SkhLVr1wKwdOnSUIH5Vq1aMWzYMF5++eVaLSMUERGR+kM1oURERMJgX22hny9/CqcuXbqEPt6XVOnWrVuorXHjxgDs3r2bRo0aHdY9YmNjmTp1Kh9++CGbNm3C7/eHEjtFRUU16sPj8TBy5Eg+//xz/vjHP2IYBlOnTiUQCISSUAsXLiQ9PZ0OHTqErjMMg+OPP5533nnnoP0PHDiQzz//nHnz5jF79mzmzJnDxx9/zPvvv8/AgQOZMGHCQQukd+nSpVLizLIsVq5cyUsvvcRPP/1EeXk5AOXl5Qd95oULF2IYBgMGDKjUPmjQIL788kuKior46aefAOjVq1elc4YPH87ChQtDry+99FJuv/12Nm7cSJs2bZgzZw47duzgwgsvPOjnAuDCCy/EMIzQ6/Lycpo1a8Ydd9zBtddeWyneoUOH4vV6Q23p6el06tQplGA8Uj169KjS1rZt20oJrX0JuF27dgFw6qmnMmHCBGzb5qSTTmLgwIG0bds2LPGIiIhI9FESSkREJAxSU1OJjY1l48aNddJ/fHx86ON9SYcDtf2yFlNtvPbaazz88MOMHTuWkSNHkpycTEFBQa13hTv77LN57bXXWLhwIb179+bTTz9lwIABtGjRAoDi4mK2b99Onz59Kl3n9/vx+/2Ul5dXerZfMk2T/v37079/fyCYIHvqqad44403eOONN7j66qurvfaXyyGXLVvGb37zG4YOHcrTTz9N06ZNMU3zkM9cXFyM67oMHDiwUnsgEABg+/btoSVnv0wKNmnSpNLrESNG0KRJEyZPnsytt97KJ598wuDBg8nIyDhoDADPPfccbdq0AaC0tJQrr7ySk046ieuuu65KvFOmTAktMdynoqKiymy2w3Wg5GdCQkKl17/8d3r77bfTvn17Jk+ezG233YbrupxyyincddddlWZUiYiISMOgJJSIiEgYGIbB4MGD+eqrr/jTn/50wB/sd+3axZQpU7j
|
||
|
|
"text/plain": [
|
||
|
|
"<Figure size 1200x600 with 1 Axes>"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
"metadata": {},
|
||
|
|
"output_type": "display_data"
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"name": "stdout",
|
||
|
|
"output_type": "stream",
|
||
|
|
"text": [
|
||
|
|
"\n",
|
||
|
|
"Return Statistics:\n",
|
||
|
|
" Total Return: 2990.33%\n",
|
||
|
|
" Annual Return: 954.65%\n",
|
||
|
|
" Sharpe Ratio: 5.50\n"
|
||
|
|
]
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"source": [
|
||
|
|
"# Cumulative returns\n",
|
||
|
|
"# Get returns from the backtest results\n",
|
||
|
|
"daily_returns = results.get('df_return_accum', pd.Series())\n",
|
||
|
|
"if len(daily_returns) > 0:\n",
|
||
|
|
" # If it's cumulative, calculate daily from it\n",
|
||
|
|
" if hasattr(daily_returns, 'diff'):\n",
|
||
|
|
" daily_rets = daily_returns.diff().fillna(daily_returns.iloc[0] if len(daily_returns) > 0 else 0)\n",
|
||
|
|
" else:\n",
|
||
|
|
" daily_rets = daily_returns\n",
|
||
|
|
"else:\n",
|
||
|
|
" # Fallback to simple IC-based approximation\n",
|
||
|
|
" daily_rets = results.get('df_ic', pd.Series())\n",
|
||
|
|
" daily_rets = daily_rets * 0.01 # Rough approximation\n",
|
||
|
|
"\n",
|
||
|
|
"fig = plot_cumulative_returns(daily_rets, title=\"Cumulative Strategy Returns\")\n",
|
||
|
|
"plt.show()\n",
|
||
|
|
"\n",
|
||
|
|
"if len(daily_rets) > 0:\n",
|
||
|
|
" total_return = daily_rets.sum() if not hasattr(daily_returns, 'iloc') else daily_returns.iloc[-1] if len(daily_returns) > 0 else 0\n",
|
||
|
|
" annual_return = (1 + total_return) ** (252 / len(daily_rets)) - 1 if len(daily_rets) > 0 else 0\n",
|
||
|
|
" sharpe = daily_rets.mean() / daily_rets.std() * np.sqrt(252) if daily_rets.std() > 0 else 0\n",
|
||
|
|
"\n",
|
||
|
|
" print(f\"\\nReturn Statistics:\")\n",
|
||
|
|
" print(f\" Total Return: {total_return:.2%}\")\n",
|
||
|
|
" print(f\" Annual Return: {annual_return:.2%}\")\n",
|
||
|
|
" print(f\" Sharpe Ratio: {sharpe:.2f}\")"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "markdown",
|
||
|
|
"metadata": {},
|
||
|
|
"source": [
|
||
|
|
"## 6. Save Results\n",
|
||
|
|
"\n",
|
||
|
|
"Save model, predictions, and metrics for later analysis."
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": 9,
|
||
|
|
"metadata": {
|
||
|
|
"execution": {
|
||
|
|
"iopub.execute_input": "2026-02-14T08:13:59.502588Z",
|
||
|
|
"iopub.status.busy": "2026-02-14T08:13:59.502439Z",
|
||
|
|
"iopub.status.idle": "2026-02-14T08:13:59.564977Z",
|
||
|
|
"shell.execute_reply": "2026-02-14T08:13:59.564063Z"
|
||
|
|
}
|
||
|
|
},
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"name": "stdout",
|
||
|
|
"output_type": "stream",
|
||
|
|
"text": [
|
||
|
|
"Saving results to: /home/guofu/Workspaces/alpha_lab/cta_1d/../results/cta_1d/baseline_xgb\n",
|
||
|
|
"\n",
|
||
|
|
"Files saved:\n",
|
||
|
|
" - model.json\n",
|
||
|
|
" - backtest_df_short_accum.csv\n",
|
||
|
|
" - summary.json\n",
|
||
|
|
" - backtest_df_return_accum.csv\n",
|
||
|
|
" - backtest_df_long_accum.csv\n",
|
||
|
|
" - backtest_df_signal_dist.csv\n",
|
||
|
|
" - predictions.csv\n",
|
||
|
|
" - feature_importance.csv\n",
|
||
|
|
" - backtest_df_num_trade_short.csv\n",
|
||
|
|
" - backtest_df_return_per_trade.csv\n",
|
||
|
|
" - backtest_df_num_trade_long.csv\n",
|
||
|
|
" - backtest_df_ic.csv\n",
|
||
|
|
" - backtest_df_num_trade.csv\n",
|
||
|
|
" - config.json\n",
|
||
|
|
" - backtest_df_ic_test.csv\n"
|
||
|
|
]
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"source": [
|
||
|
|
"if CONFIG['save_results']:\n",
|
||
|
|
" # Create output directory\n",
|
||
|
|
" output_dir = create_experiment_dir('cta_1d', CONFIG['experiment_name'])\n",
|
||
|
|
" print(f\"Saving results to: {output_dir}\")\n",
|
||
|
|
" \n",
|
||
|
|
" # Save config\n",
|
||
|
|
" with open(output_dir / 'config.json', 'w') as f:\n",
|
||
|
|
" json.dump(CONFIG, f, indent=2, default=str)\n",
|
||
|
|
" \n",
|
||
|
|
" # Save model\n",
|
||
|
|
" model.save_model(str(output_dir / 'model.json'))\n",
|
||
|
|
" \n",
|
||
|
|
" # Save feature importance\n",
|
||
|
|
" importance_df.to_csv(output_dir / 'feature_importance.csv', index=False)\n",
|
||
|
|
" \n",
|
||
|
|
" # Save predictions\n",
|
||
|
|
" signal_series.to_csv(output_dir / 'predictions.csv')\n",
|
||
|
|
" \n",
|
||
|
|
" # Save backtest results (handle dict or DataFrame)\n",
|
||
|
|
" if isinstance(results, dict):\n",
|
||
|
|
" # Save each DataFrame in the results dict separately\n",
|
||
|
|
" for key, value in results.items():\n",
|
||
|
|
" if isinstance(value, pd.DataFrame):\n",
|
||
|
|
" value.to_csv(output_dir / f'backtest_{key}.csv')\n",
|
||
|
|
" elif isinstance(value, pd.Series):\n",
|
||
|
|
" value.to_csv(output_dir / f'backtest_{key}.csv')\n",
|
||
|
|
" else:\n",
|
||
|
|
" results.to_csv(output_dir / 'backtest_results.csv')\n",
|
||
|
|
" \n",
|
||
|
|
" # Save summary\n",
|
||
|
|
" with open(output_dir / 'summary.json', 'w') as f:\n",
|
||
|
|
" json.dump(summary, f, indent=2, default=str)\n",
|
||
|
|
" \n",
|
||
|
|
" print(\"\\nFiles saved:\")\n",
|
||
|
|
" for f in output_dir.iterdir():\n",
|
||
|
|
" print(f\" - {f.name}\")\n",
|
||
|
|
"else:\n",
|
||
|
|
" print(\"Results not saved (save_results=False)\")"
|
||
|
|
]
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"kernelspec": {
|
||
|
|
"display_name": "Python 3",
|
||
|
|
"language": "python",
|
||
|
|
"name": "python3"
|
||
|
|
},
|
||
|
|
"language_info": {
|
||
|
|
"codemirror_mode": {
|
||
|
|
"name": "ipython",
|
||
|
|
"version": 3
|
||
|
|
},
|
||
|
|
"file_extension": ".py",
|
||
|
|
"mimetype": "text/x-python",
|
||
|
|
"name": "python",
|
||
|
|
"nbconvert_exporter": "python",
|
||
|
|
"pygments_lexer": "ipython3",
|
||
|
|
"version": "3.12.3"
|
||
|
|
}
|
||
|
|
},
|
||
|
|
"nbformat": 4,
|
||
|
|
"nbformat_minor": 4
|
||
|
|
}
|