特征工程关键
1)领域知识;2)特征多样性;3)正则化;4)持续优化。
项目六:机器学习择时策略
本节介绍特征工程的方法和技术,包括特征构造、选择和变换。
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest
class FeatureEngine:
"""
特征工程引擎
"""
def __init__(self):
self.scaler = StandardScaler()
self.selector = None
def create_features(self, data):
"""
创建特征
"""
features = pd.DataFrame(index=data.index)
# 收益率特征
features['return_1d'] = data['close'].pct_change(1)
features['return_5d'] = data['close'].pct_change(5)
features['return_20d'] = data['close'].pct_change(20)
# 波动率特征
features['volatility_5d'] = features['return_1d'].rolling(5).std()
features['volatility_20d'] = features['return_1d'].rolling(20).std()
# 技术指标
features['ma_5'] = data['close'].rolling(5).mean()
features['ma_20'] = data['close'].rolling(20).mean()
features['ma_diff'] = (features['ma_5'] - features['ma_20']) / features['ma_20']
# 成交量特征
features['volume_ratio'] = data['volume'] / data['volume'].rolling(20).mean()
return features
def normalize_features(self, features):
"""
标准化特征
"""
return pd.DataFrame(
self.scaler.fit_transform(features),
index=features.index,
columns=features.columns
)
def select_features(self, X, y, k=20):
"""
特征选择
"""
selector = SelectKBest(k=k)
X_selected = selector.fit_transform(X, y)
self.selector = selector
return X_selected
1)领域知识;2)特征多样性;3)正则化;4)持续优化。