import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets, metrics
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.preprocessing import StandardScaler
iris = datasets.load_iris()
X = iris.data
y = iris.target
X_train, X_test, y_train, y_test = \
train_test_split(X, y,
test_size=0.3, random_state=123)
rf_params = {
'n_estimators':[80, 100],
'max_depth':[3, 5, 7],
'min_samples_leaf':[8, 12, 15],
'max_features':[2, 3, 4]
}
model = RandomForestClassifier()
rf_gd = GridSearchCV(model, param_grid=rf_params,
n_jobs=-1)
rf_gd.fit(X_train, y_train)
# max_depth:5, max_features:2
# min_samples_leaf:15, n_estimators:80
print('최적 변수:', rf_gd.best_params_)
print('예측 성능:',rf_gd.best_score_)
model = RandomForestClassifier(max_depth=5,
min_samples_leaf=15,
n_estimators=80)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print('정확도:', accuracy_score(y_test, y_pred))
print('R2 score:', model.score(X_test, y_test))
print('R2 score:', model.score(X_train, y_train))
feature_imp = \
pd.Series(model.feature_importances_,
index=iris.feature_names) \
.sort_values(ascending=False)
import seaborn as sns
sns.barplot(x=feature_imp, y=feature_imp.index)
plt.xlabel('Feature Importance Score')
plt.ylabel('Features')
plt.show()
더보기
최적 변수:
{'max_depth': 3, 'max_features': 2, 'min_samples_leaf': 15, 'n_estimators': 80}
예측 성능: 0.9333333333333333
정확도: 0.9555555555555556
R2 score: 0.9555555555555556
R2 score: 0.9523809523809523
'Colab > 머신러닝' 카테고리의 다른 글
13. GBoost 01 (0) | 2023.03.10 |
---|---|
12. 부스팅(Boosting) 01 (0) | 2023.03.09 |
10. 랜덤 포레스트 (random forest) 02 (0) | 2023.03.09 |
09. 랜덤 포레스트 (random forest) 01 (0) | 2023.03.09 |
08. 결정 트리 (Decision Tree) 02 (0) | 2023.03.08 |