评估指标
1.分类问题:
①混淆矩阵
import matplotlib.pyplot as plt
import itertools
from sklearn.metrics import confusion_matrix
#画混淆矩阵
def plot_confusion_matrix(cm, classes,title='Confusion matrix',cmap=plt.cm.Blues):
plt.imshow(cm, interpolation='nearest', cmap=cmap)
plt.title(title)
plt.colorbar()
tick_marks = np.arange(len(classes))
plt.xticks(tick_marks, classes, rotation=45)
plt.yticks(tick_marks, classes)
fmt ='d'
thresh = cm.max() / 2.
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
plt.text(j, i, format(cm[i, j], fmt),
horizontalalignment="center",
color="white" if cm[i, j] > thresh else "black")
plt.tight_layout()
plt.ylabel('True label')
plt.xlabel('Predicted label')
matrix=confusion_matrix(y_test,y_pred)
plt.figure()
plot_confusion_matrix(matrix,classes=['unsurvived','survived'],title='Confusion matrix')
plt.show()
②precision,recall,f1-score指标
使用classification_report得到这些指标
from sklearn.metrics import classification_report
target_names = ['unsurived', 'survived']
print(classification_report(y_test,y_pred,target_names=target_names))
③AUC指标
是二元分类问题中才有的指标
from sklearn import metrics
#y_pred_prob是样本属于正类的概率,pos_label是正类的类别标签,剩下的为负类,返回不同thresholds下的一组fpr,tpr,从而得到roc曲线
fpr,tpr,thresholds=metrics.roc_curve(y_test,y_pred_prob,pos_label=1)
auc=metrics.auc(fpr,tpr)#计算roc曲线下的面积就死auc的值
print(auc)
2.回归问题
回归问题一般是采用均方误差(Mean Squared Error, MSE)或者均方根误差(Root Mean Squared Error, RMSE)来评估模型。下面是计算交叉验证的RMSE
from sklearn.model_selection import cross_val_score
def rmse_cv(model,x_train,y_train):
#scoring='neg_mean_squared_error'表示MSE上取负,所以需要加上一个负号
rmse=np.sqrt(-cross_val_sore(model,x_train,y_train,scoring='neg_mean_squared_error',cv=5)
return rmse