💻 过滤法实现
from sklearn.feature_selection import (
SelectKBest, f_classif, mutual_info_classif,
VarianceThreshold, chi2
)
selector = VarianceThreshold(threshold=0.1)
X_high_var = selector.fit_transform(X)
selector = SelectKBest(f_classif, k=10)
X_selected = selector.fit_transform(X, y)
selected_features = selector.get_support(indices=True)
print(f'选中的特征索引: {selected_features}')
selector = SelectKBest(mutual_info_classif, k=10)
X_selected = selector.fit_transform(X, y)
📦 包装法实现
from sklearn.feature_selection import RFE, RFECV
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
rfe = RFE(model, n_features_to_select=10)
X_selected = rfe.fit_transform(X, y)
print(f'特征排名: {rfe.ranking_}')
rfecv = RFECV(model, cv=5, scoring='accuracy')
X_selected = rfecv.fit_transform(X, y)
print(f'最佳特征数: {rfecv.n_features_}')
🧠 嵌入法实现
from sklearn.feature_selection import SelectFromModel
from sklearn.linear_model import Lasso
lasso = Lasso(alpha=0.01)
selector = SelectFromModel(lasso)
X_selected = selector.fit_transform(X, y)
rf = RandomForestClassifier(n_estimators=100)
selector = SelectFromModel(rf, threshold='median')
X_selected = selector.fit_transform(X, y)
rf.fit(X, y)
import pandas as pd
importance = pd.Series(rf.feature_importances_, index=feature_names)
print(importance.sort_values(ascending=False).head(10))