from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingClassifier
# 정확도, 혼돈행렬(참, 예측), 리포트
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.model_selection import train_test_split, GridSearchCV
import xgboost as xgb
br_cancer = datasets.load_breast_cancer()
X = br_cancer.data
y = br_cancer.target
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.3, random_state=123)
dtrain = xgb.DMatrix(data=X_train, label=y_train)
dtest = xgb.DMatrix(data=X_test, label=y_test)
params = {
'max_depth': 3, # 트리 depth
'eta':0.1, # 학습률
'objective':'binary:logistic',
'num_boost_round':10, # weak_learn 수
'eval_metric':'logloss', # 오류 (손실) 함수
'early_stoppings':100 # 100회 실행시 오류가 줄지 않으면 중단
}
wlist = [(dtrain,'train'),(dtest,'eval')]
xgb_model = xgb.train(params=params,
dtrain=dtrain,
num_boost_round=1000,
evals=wlist)
...
[994] train-logloss:0.00539 eval-logloss:0.09461
[995] train-logloss:0.00539 eval-logloss:0.09461
[996] train-logloss:0.00539 eval-logloss:0.09461
[997] train-logloss:0.00539 eval-logloss:0.09461
[998] train-logloss:0.00539 eval-logloss:0.09461
[999] train-logloss:0.00539 eval-logloss:0.09461
y_predprob = xgb_model.predict(dtest)
print(y_predprob[:5])
# 1 악성 0 양성
y_pred = [1 if x> 0.95 else 0 for x in y_predprob]
print(y_pred[:5])
print(accuracy_score(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))
[9.9982399e-01 9.9977666e-01 6.9134818e-05 9.9401939e-01 3.0826402e-04]
[1, 1, 0, 1, 0]
0.9707602339181286
[[67 1]
[ 4 99]]
precision recall f1-score support
0 0.94 0.99 0.96 68
1 0.99 0.96 0.98 103
accuracy 0.97 171
macro avg 0.97 0.97 0.97 171
weighted avg 0.97 0.97 0.97 171
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.model_selection import GridSearchCV
import xgboost as xgb
b_iris = datasets.load_iris()
X = b_iris.data
y = b_iris.target
X_train, X_test, y_train, y_test = train_test_split( X,y,test_size= 0.3 , random_state=123)
d_train = xgb.DMatrix(data = X_train, label = y_train)
d_test = xgb.DMatrix(data = X_test, label = y_test)
params = {
'max_depth':3, # 트리 depth
'eta':0.1, # 학습률
'objective':'reg:squarederror', # 목적함수
'eval_metric':'logloss',# 오류(손실) 함수
'early_stoppings':500 # 100회이상 시행시 오류가 줄지않으면 중단
}
wlist = [(d_train, 'train'), (d_test, 'eval')]
xgb_model = xgb.train(params=params , dtrain=d_train, num_boost_round=1000, evals=wlist)
y_predprob = xgb_model.predict(d_test)
print(y_predprob[:5])
# 1:악성, 0:양성
y_pred = [1 if x >= 0.95 else 0 for x in y_predprob]
print(y_pred[:5])
print(accuracy_score(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))
[ 1.0771132e+00 1.9628091e+00 1.9999171e+00 9.8521781e-01
-1.1360166e-03]
[1, 1, 1, 1, 0]
0.6
[[18 0 0]
[ 1 9 0]
[ 0 17 0]]
precision recall f1-score support
0 0.95 1.00 0.97 18
1 0.35 0.90 0.50 10
2 0.00 0.00 0.00 17
accuracy 0.60 45
macro avg 0.43 0.63 0.49 45
weighted avg 0.46 0.60 0.50 45
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.model_selection import GridSearchCV
import xgboost as xgb
b_iris = datasets.load_iris()
X = b_iris.data
y = b_iris.target
X_train, X_test, y_train, y_test = train_test_split( X,y,test_size= 0.3 , random_state=123)
d_train = xgb.DMatrix(data = X_train, label = y_train)
d_test = xgb.DMatrix(data = X_test, label = y_test)
params = {
'max_depth':3, # 트리 depth
'eta':0.1, # 학습률
'objective':'reg:squarederror', # 목적함수
'eval_metric':'logloss',# 오류(손실) 함수
'early_stoppings':500 # 100회이상 시행시 오류가 줄지않으면 중단
}
wlist = [(d_train, 'train'), (d_test, 'eval')]
xgb_model = xgb.train(params=params , dtrain=d_train, num_boost_round=1000, evals=wlist)
y_predprob = xgb_model.predict(d_test)
print(y_predprob[:5])
# 1:악성, 0:양성
y_pred = [1 if x >= 0.95 else 0 for x in y_predprob]
print(y_pred[:5])
print(accuracy_score(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))
'Colab > 머신러닝' 카테고리의 다른 글
17. K-평균 (K-means) & 실루엣 계수 (silhouette coefficient) 02 (0) | 2023.03.10 |
---|---|
16. K-평균 (K-means) & 실루엣 계수 (silhouette coefficient) 01 (0) | 2023.03.10 |
14. GBoost 02 (0) | 2023.03.10 |
13. GBoost 01 (0) | 2023.03.10 |
12. 부스팅(Boosting) 01 (0) | 2023.03.09 |