【AI】[LightGBM]多クラス分類のコード | 夕湖津のブログ

夕湖津のブログ

問題解決に役立つ情報の提供を目指します

■コード

1.パラメータ

LR = 0.001
lightgbm_params = {
    "boosting_type": "dart", #gbdt or dart(こちらの方が汎化性能が良いケースが多い)
    "learning_rate": LR,
    "objective": "multiclass",
    "num_class": 3,
    "metric": "multi_logloss",  #multi_error/multi_logloss
    "verbosity": 1,
    # GPU使用
    "device": 'gpu',
    'gpu_platform_id': 1,
    'gpu_device_id': 1,    # GPUがNVIDIA でない時はこの数字を0 or 1に変更する

 

2.学習コード

import time

from sklearn.metrics import f1_score

import lightgbm as lgb

from sklearn.model_selection import KFold, StratifiedKFold

from tqdm import tqdm

 

valid_scores = []
models = []

SEED=42
FOLD=5

 

feature_list = ここで特徴量列のリスト定義

 

kf = KFold(n_splits=FOLD, shuffle=True, random_state=SEED)
skf = StratifiedKFold(n_splits=FOLD, shuffle=True, random_state=SEED)

 

# for fold, (tr_idx, va_idx) in tqdm(enumerate(kf.split(train_df))):
for fold, (tr_idx, va_idx) in tqdm(enumerate(skf.split(train_df[feature_list], train_df['target']))):
    print(f"=========={fold+1}FOLD目=======================")
    #時間計測開始
    time_start = time.perf_counter()
    print('経過時間測定開始')
 
    evals_result = {}
    
    X_tr = train_df[feature_list].iloc[tr_idx]
    X_va = train_df[feature_list].iloc[va_idx]
    y_tr = train_df['target'].iloc[tr_idx]
    y_va = train_df['target'].iloc[va_idx]
    
    # クラスの重み付けを設定(不均衡データの場合に使用)
    class_weights = {0: 2, 1: 3, 2: 6} 

    lgb_train = lgb.Dataset(X_tr, y_tr, weight=y_tr.map(class_weights))
    lgb_eval  = lgb.Dataset(X_va, y_va, reference=lgb_train)
    
    #繰り返し回数
    NUM_BOOST_ROUND = 2000
    
    model = lgb.train(
        lightgbm_params,
        lgb_train,
        num_boost_round=NUM_BOOST_ROUND,
        valid_sets  = [lgb_train, lgb_eval],
        valid_names = ['train', 'valid'],
        callbacks = [
            lgb.early_stopping(10),  #EarlyStopping基準
            lgb.log_evaluation(50),  #評価表示インタバル
            lgb.record_evaluation(evals_result),
        ]
    )
    
    y_va_pred = model.predict(X_va, num_iteration=model.best_iteration)

    #f1_score(macro)の場合
    score = f1_score(y_va, np.argmax(y_va_pred, axis=-1), average='macro')
    print(f'fold {fold+1} Validation F1 Score (Macro): {score:.3f}')
    print('')
    
    #学習過程プロット
    lgb.plot_metric(evals_result)
    
    # スコア、モデル、予測値の格納
    valid_scores.append(score)
    models.append(model)
    
    # 時間計測終了
    time_end = time.perf_counter()
    print('経過時間測定終了')
    tim = time_end-time_start
    print('経過時間(分):', round(tim/60))
          
print('===========End===================================')