评估指标

1.分类问题:

①混淆矩阵

import matplotlib.pyplot as plt
import itertools
from sklearn.metrics import confusion_matrix

#画混淆矩阵
def plot_confusion_matrix(cm, classes,title='Confusion matrix',cmap=plt.cm.Blues):
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt ='d'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

matrix=confusion_matrix(y_test,y_pred)
plt.figure()
plot_confusion_matrix(matrix,classes=['unsurvived','survived'],title='Confusion matrix')
plt.show()

②precision,recall,f1-score指标

使用classification_report得到这些指标

from sklearn.metrics import classification_report

target_names = ['unsurived', 'survived']
print(classification_report(y_test,y_pred,target_names=target_names))

③AUC指标

二元分类问题中才有的指标

from sklearn import metrics

#y_pred_prob是样本属于正类的概率,pos_label是正类的类别标签,剩下的为负类,返回不同thresholds下的一组fpr,tpr,从而得到roc曲线
fpr,tpr,thresholds=metrics.roc_curve(y_test,y_pred_prob,pos_label=1)
auc=metrics.auc(fpr,tpr)#计算roc曲线下的面积就死auc的值
print(auc)

2.回归问题

回归问题一般是采用均方误差(Mean Squared Error, MSE)或者均方根误差(Root Mean Squared Error, RMSE)来评估模型。下面是计算交叉验证的RMSE

from sklearn.model_selection import cross_val_score

def rmse_cv(model,x_train,y_train):
    #scoring='neg_mean_squared_error'表示MSE上取负,所以需要加上一个负号
    rmse=np.sqrt(-cross_val_sore(model,x_train,y_train,scoring='neg_mean_squared_error',cv=5)
    return rmse

results matching ""

    No results matching ""