Colab/머신러닝

15. XGBoost 01

HicKee 2023. 3. 10. 15:40
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingClassifier
# 정확도, 혼돈행렬(참, 예측), 리포트
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.model_selection import train_test_split, GridSearchCV
import xgboost as xgb

br_cancer = datasets.load_breast_cancer()
X = br_cancer.data
y = br_cancer.target

X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.3, random_state=123)

dtrain = xgb.DMatrix(data=X_train, label=y_train)
dtest = xgb.DMatrix(data=X_test, label=y_test)

params = {
    'max_depth': 3,  # 트리 depth
    'eta':0.1,    #  학습률
    'objective':'binary:logistic',
    'num_boost_round':10,  #  weak_learn 수
    'eval_metric':'logloss',  # 오류 (손실) 함수 
    'early_stoppings':100  # 100회 실행시 오류가 줄지 않으면 중단 
}

wlist = [(dtrain,'train'),(dtest,'eval')]
xgb_model = xgb.train(params=params,
                      dtrain=dtrain,
                      num_boost_round=1000,
                      evals=wlist)
더보기

...

[994] train-logloss:0.00539 eval-logloss:0.09461
[995] train-logloss:0.00539 eval-logloss:0.09461
[996] train-logloss:0.00539 eval-logloss:0.09461
[997] train-logloss:0.00539 eval-logloss:0.09461
[998] train-logloss:0.00539 eval-logloss:0.09461
[999] train-logloss:0.00539 eval-logloss:0.09461

y_predprob = xgb_model.predict(dtest)
print(y_predprob[:5])
# 1 악성 0 양성
y_pred = [1 if x> 0.95 else 0 for x in y_predprob]
print(y_pred[:5])
print(accuracy_score(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))
더보기

[9.9982399e-01 9.9977666e-01 6.9134818e-05 9.9401939e-01 3.0826402e-04]
[1, 1, 0, 1, 0]
0.9707602339181286
[[67  1]
 [ 4 99]]
              precision    recall  f1-score   support

           0       0.94      0.99      0.96        68
           1       0.99      0.96      0.98       103

    accuracy                           0.97       171
   macro avg       0.97      0.97      0.97       171
weighted avg       0.97      0.97      0.97       171


from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.model_selection import GridSearchCV
import xgboost as xgb

b_iris = datasets.load_iris()

X = b_iris.data
y = b_iris.target

X_train, X_test, y_train, y_test = train_test_split( X,y,test_size= 0.3 , random_state=123)

d_train = xgb.DMatrix(data = X_train, label = y_train)
d_test = xgb.DMatrix(data = X_test, label = y_test)

params = {
    'max_depth':3,          # 트리 depth
    'eta':0.1,              # 학습률
    'objective':'reg:squarederror', # 목적함수
    'eval_metric':'logloss',# 오류(손실) 함수
    'early_stoppings':500   # 100회이상 시행시 오류가 줄지않으면 중단
}

wlist = [(d_train, 'train'), (d_test, 'eval')]
xgb_model = xgb.train(params=params , dtrain=d_train, num_boost_round=1000, evals=wlist)

y_predprob = xgb_model.predict(d_test)

print(y_predprob[:5])
# 1:악성, 0:양성
y_pred = [1 if x >= 0.95 else 0 for x in y_predprob]
print(y_pred[:5])
print(accuracy_score(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))
더보기

[ 1.0771132e+00  1.9628091e+00  1.9999171e+00  9.8521781e-01
 -1.1360166e-03]
[1, 1, 1, 1, 0]
0.6
[[18  0  0]
 [ 1  9  0]
 [ 0 17  0]]
              precision    recall  f1-score   support

           0       0.95      1.00      0.97        18
           1       0.35      0.90      0.50        10
           2       0.00      0.00      0.00        17

    accuracy                           0.60        45
   macro avg       0.43      0.63      0.49        45
weighted avg       0.46      0.60      0.50        45


from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.model_selection import GridSearchCV
import xgboost as xgb

b_iris = datasets.load_iris()

X = b_iris.data
y = b_iris.target

X_train, X_test, y_train, y_test = train_test_split( X,y,test_size= 0.3 , random_state=123)

d_train = xgb.DMatrix(data = X_train, label = y_train)
d_test = xgb.DMatrix(data = X_test, label = y_test)

params = {
    'max_depth':3,          # 트리 depth
    'eta':0.1,              # 학습률
    'objective':'reg:squarederror', # 목적함수
    'eval_metric':'logloss',# 오류(손실) 함수
    'early_stoppings':500   # 100회이상 시행시 오류가 줄지않으면 중단
}

wlist = [(d_train, 'train'), (d_test, 'eval')]
xgb_model = xgb.train(params=params , dtrain=d_train, num_boost_round=1000, evals=wlist)

y_predprob = xgb_model.predict(d_test)

print(y_predprob[:5])
# 1:악성, 0:양성
y_pred = [1 if x >= 0.95 else 0 for x in y_predprob]
print(y_pred[:5])
print(accuracy_score(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))