■コード
1.パラメータ
LR = 0.001
lightgbm_params = {
"boosting_type": "dart", #gbdt or dart(こちらの方が汎化性能が良いケースが多い)
"learning_rate": LR,
"objective": "multiclass",
"num_class": 3,
"metric": "multi_logloss", #multi_error/multi_logloss
"verbosity": 1,
# GPU使用
"device": 'gpu',
'gpu_platform_id': 1,
'gpu_device_id': 1, # GPUがNVIDIA でない時はこの数字を0 or 1に変更する
2.学習コード
import time
from sklearn.metrics import f1_score
import lightgbm as lgb
from sklearn.model_selection import KFold, StratifiedKFold
from tqdm import tqdm
valid_scores = []
models = []
SEED=42
FOLD=5
feature_list = ここで特徴量列のリスト定義
kf = KFold(n_splits=FOLD, shuffle=True, random_state=SEED)
skf = StratifiedKFold(n_splits=FOLD, shuffle=True, random_state=SEED)
# for fold, (tr_idx, va_idx) in tqdm(enumerate(kf.split(train_df))):
for fold, (tr_idx, va_idx) in tqdm(enumerate(skf.split(train_df[feature_list], train_df['target']))):
print(f"=========={fold+1}FOLD目=======================")
#時間計測開始
time_start = time.perf_counter()
print('経過時間測定開始')
evals_result = {}
X_tr = train_df[feature_list].iloc[tr_idx]
X_va = train_df[feature_list].iloc[va_idx]
y_tr = train_df['target'].iloc[tr_idx]
y_va = train_df['target'].iloc[va_idx]
# クラスの重み付けを設定(不均衡データの場合に使用)
class_weights = {0: 2, 1: 3, 2: 6}
lgb_train = lgb.Dataset(X_tr, y_tr, weight=y_tr.map(class_weights))
lgb_eval = lgb.Dataset(X_va, y_va, reference=lgb_train)
#繰り返し回数
NUM_BOOST_ROUND = 2000
model = lgb.train(
lightgbm_params,
lgb_train,
num_boost_round=NUM_BOOST_ROUND,
valid_sets = [lgb_train, lgb_eval],
valid_names = ['train', 'valid'],
callbacks = [
lgb.early_stopping(10), #EarlyStopping基準
lgb.log_evaluation(50), #評価表示インタバル
lgb.record_evaluation(evals_result),
]
)
y_va_pred = model.predict(X_va, num_iteration=model.best_iteration)
#f1_score(macro)の場合
score = f1_score(y_va, np.argmax(y_va_pred, axis=-1), average='macro')
print(f'fold {fold+1} Validation F1 Score (Macro): {score:.3f}')
print('')
#学習過程プロット
lgb.plot_metric(evals_result)
# スコア、モデル、予測値の格納
valid_scores.append(score)
models.append(model)
# 時間計測終了
time_end = time.perf_counter()
print('経過時間測定終了')
tim = time_end-time_start
print('経過時間(分):', round(tim/60))
print('===========End===================================')