You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
89 lines
2.5 KiB
89 lines
2.5 KiB
|
3 weeks ago
|
# CTA 1D Parquet Dataset Requirements
|
||
|
|
# This file specifies the required Parquet tables for alpha_lab CTA 1D task
|
||
|
|
|
||
|
|
# Table 1: Alpha158 Features
|
||
|
|
cta_alpha158_1d:
|
||
|
|
source:
|
||
|
|
database: dfs://daily_stock_run
|
||
|
|
table: stg_1day_tinysoft_cta_alpha159_0_7_beta
|
||
|
|
host: 192.168.1.146
|
||
|
|
port: 8848
|
||
|
|
target:
|
||
|
|
path: cta_alpha158_1d/
|
||
|
|
partition_freq: 1D
|
||
|
|
col_datetime: m_nDate
|
||
|
|
code_format: tscode
|
||
|
|
description: Alpha158 features for CTA futures (~163 columns)
|
||
|
|
priority: medium
|
||
|
|
|
||
|
|
# Table 2: HFFactor Features (requires pivot)
|
||
|
|
cta_hffactor_1d:
|
||
|
|
source:
|
||
|
|
database: dfs://daily_stock_run
|
||
|
|
table: stg_1day_tinysoft_cta_hffactor
|
||
|
|
host: 192.168.1.146
|
||
|
|
port: 8848
|
||
|
|
# Long format: code, m_nDate, factor_name, value
|
||
|
|
# Pivot to wide format during export
|
||
|
|
pivot:
|
||
|
|
index: [code, m_nDate]
|
||
|
|
columns: factor_name
|
||
|
|
values: value
|
||
|
|
filter: # Only these 8 columns needed
|
||
|
|
- vol_1min
|
||
|
|
- skew_1min
|
||
|
|
- volp_1min
|
||
|
|
- volp_ratio_1min
|
||
|
|
- voln_ratio_1min
|
||
|
|
- trend_strength_1min
|
||
|
|
- pv_corr_1min
|
||
|
|
- flowin_ratio_1min
|
||
|
|
target:
|
||
|
|
path: cta_hffactor_1d/
|
||
|
|
partition_freq: 1D
|
||
|
|
col_datetime: m_nDate
|
||
|
|
code_format: tscode
|
||
|
|
description: High-frequency factor features (8 columns, pivoted from long format)
|
||
|
|
priority: medium
|
||
|
|
notes: Requires pivot transformation from long to wide format
|
||
|
|
|
||
|
|
# Table 3: Dominant Contract Mapping
|
||
|
|
cta_dom_1d:
|
||
|
|
source:
|
||
|
|
database: dfs://daily_stock_run
|
||
|
|
table: dwm_1day_cta_dom
|
||
|
|
host: 192.168.1.146
|
||
|
|
port: 8848
|
||
|
|
# Group and aggregate during export
|
||
|
|
group_by: [m_nDate, code_init]
|
||
|
|
filter: "version='vp_csmax_roll2_cummax'"
|
||
|
|
agg: "first(code) as code"
|
||
|
|
target:
|
||
|
|
path: cta_dom_1d/
|
||
|
|
partition_freq: 1D
|
||
|
|
col_datetime: m_nDate
|
||
|
|
code_format: tscode
|
||
|
|
description: Dominant contract mapping for continuous contracts
|
||
|
|
priority: medium
|
||
|
|
notes: Requires group_by + aggregation, filter by version
|
||
|
|
|
||
|
|
# Table 4: Return Labels
|
||
|
|
cta_labels_1d:
|
||
|
|
source:
|
||
|
|
database: dfs://daily_stock_run
|
||
|
|
table: stg_1day_tinysoft_cta_hfvalue
|
||
|
|
host: 192.168.1.146
|
||
|
|
port: 8848
|
||
|
|
# Filter for specific indicators
|
||
|
|
indicators:
|
||
|
|
- twap_open1m@1_twap_close1m@1 # o2c_twap1min
|
||
|
|
- twap_open1m@1_twap_open1m@2 # o2o_twap1min
|
||
|
|
target:
|
||
|
|
path: cta_labels_1d/
|
||
|
|
partition_freq: 1D
|
||
|
|
col_datetime: m_nDate
|
||
|
|
code_format: tscode
|
||
|
|
description: Return labels for different return types
|
||
|
|
priority: medium
|
||
|
|
notes: Filter indicator column for specific return types
|