Skip to content

Kernel Crashes After Stage 2 #53

@Yeoonsu

Description

@Yeoonsu

Hello,

The kernel keeps crashing after stage 2. Could you please advise on how to resolve this issue?
I use 16 GB RAM, i7 cpu labtop.

Thank you!

import lightgbm as lgb
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error
from datetime import timedelta
from openfe import OpenFE, transform

# 테스트 기간 리스트
test_dates = [
    pd.to_datetime('2023-01-05'),
    pd.to_datetime('2023-02-08'),
    pd.to_datetime('2023-03-07'),
    pd.to_datetime('2023-04-11'),
    pd.to_datetime('2023-05-07'),
    pd.to_datetime('2023-06-08'),
    pd.to_datetime('2023-07-09'),
    pd.to_datetime('2023-08-08'),
    pd.to_datetime('2023-09-06'),
    pd.to_datetime('2024-01-10'),
    pd.to_datetime('2024-02-11'),
    pd.to_datetime('2024-03-08'),
    pd.to_datetime('2024-04-06'),
]

# 결과를 저장할 리스트 초기화
results = []

# 각 테스트 기간에 대해 train, test 데이터를 생성하고 LightGBM 모델로 예측
for test_start_date in test_dates:
    test_end_date = test_start_date + timedelta(days=90) - timedelta(seconds=1)
    
    train = df[df['tm'] <= test_start_date - timedelta(seconds=1)]
    test = df[(df['tm'] >= test_start_date) & (df['tm'] <= test_end_date)]
    
    # Train 데이터가 비어 있는지 확인
    if train.empty or test.empty:
        print(f"Skipping period {test_start_date} to {test_end_date} due to insufficient data.")
        continue
    
    # Features (X)와 Target (y) 설정
    x_train = train[var]
    y_train = train['demand']
    x_test = test[var]
    y_test = test['demand']

    # OpenFE로 피처 엔지니어링 수행
    ofe = OpenFE()
    features = ofe.fit(data=x_train, label=y_train, n_jobs=4)  # 새로운 피처 생성
    x_train, x_test = transform(x_train, x_test, features, n_jobs=4)  # 생성된 피처로 데이터 변환
    
    # LightGBM 데이터셋 생성
    train_data = lgb.Dataset(x_train, label=y_train)

    # LightGBM 모델 파라미터 설정
    params = {
        'objective': 'regression',
        'metric': 'rmse',
        'boosting_type': 'gbdt',
        'learning_rate': 0.05,
        'num_leaves': 31,
        'n_estimators': 200,
        'feature_fraction': 0.8,
        'bagging_fraction': 0.8,
        'bagging_freq': 10,
        'verbose': 0
    }
    
    # 모델 학습
    model_lgb = lgb.train(params, train_data, num_boost_round=100)
    
    # 테스트 데이터에 대한 예측
    preds_lgb = model_lgb.predict(x_test)
    
    # RMSE 계산
    rmse = np.sqrt(mean_squared_error(y_test, preds_lgb))
    
    # 결과 저장
    results.append({
        'Test Start Date': test_start_date,
        'Test End Date': test_end_date,
        'RMSE': rmse
    })
    
    print(f"Period {test_start_date} to {test_end_date} - RMSE: {rmse}")

# 결과를 데이터프레임으로 변환
results_df = pd.DataFrame(results)
results_df

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions