import numpy as np from scipy.integrate import simpson import matplotlib.pyplot as plt import warnings def true_positives(y_true, y_pred): tp = 0 for label, pred in zip(y_true, y_pred): if pred == 1 and label == 1: tp += 1 return tp def true_negatives(y_true, y_pred): tn = 0 for label, pred in zip(y_true, y_pred): if pred == 0 and label == 0: tn += 1 return tn def false_positives(y_true, y_pred): fp = 0 for label, pred in zip(y_true, y_pred): if pred == 1 and label == 0: fp += 1 return fp def false_negatives(y_true, y_pred): fn = 0 for label, pred in zip(y_true, y_pred): if pred == 0 and label == 1: fn += 1 return fn def binary_accuracy(y_true, y_pred): tp = true_positives(y_true, y_pred) tn = true_negatives(y_true, y_pred) fp = false_positives(y_true, y_pred) fn = false_negatives(y_true, y_pred) return (tp + tn) / (tp + tn + fp + fn) def precision(y_true, y_pred): """ Fraction of True Positive Elements divided by total number of positive predicted units How I view it: Assuming we say someone has cancer: how often are we correct? It tells us how much we can trust the model when it predicts an individual as positive. """ tp = true_negatives(y_true, y_pred) fp = false_positives(y_true, y_pred) return tp / (tp + fp) def recall(y_true, y_pred): """ Recall meaasure the model's predictive accuracy for the positive class. How I view it, out of all the people that has cancer: how often are we able to detect it? """ tp = true_negatives(y_true, y_pred) fn = false_negatives(y_true, y_pred) return tp / (tp + fn) def multiclass_accuracy(y_true, y_pred): correct = 0 total = len(y_true) for label, pred in zip(y_true, y_pred): correct += label == pred return correct/total def confusion_matrix(y_true, y_pred): y_true = np.array(y_true) y_pred = np.array(y_pred) assert y_true.shape == y_pred.shape unique_classes = np.unique(np.concatenate([y_true, y_pred], axis=0)).shape[0] cm = np.zeros((unique_classes, unique_classes), dtype=np.int64) for label, pred in zip(y_true, y_pred): cm[label, pred] += 1 return cm def accuracy_cm(cm): return np.trace(cm)/np.sum(cm) def balanced_accuracy_cm(cm): correctly_classified = np.diagonal(cm) rows_sum = np.sum(cm, axis=1) indices = np.nonzero(rows_sum)[0] if rows_sum.shape[0] != indices.shape[0]: warnings.warn("y_pred contains classes not in y_true") accuracy_per_class = correctly_classified[indices]/(rows_sum[indices]) return np.sum(accuracy_per_class)/accuracy_per_class.shape[0] def precision_cm(cm, average="specific", class_label=1, eps=1e-12): tp = np.diagonal(cm) fp = np.sum(cm, axis=0) - tp #precisions = np.diagonal(cm)/np.maximum(np.sum(cm, axis=0), 1e-12) if average == "none": return tp/(tp+fp+eps) if average == "specific": precisions = tp / (tp + fp + eps) return precisions[class_label] if average == "micro": # all samples equally contribute to the average, # hence there is a distinction between highly # and poorly populated classes return np.sum(tp) / (np.sum(tp) + np.sum(fp) + eps) if average == "macro": # all classes equally contribute to the average, # no distinction between highly and poorly populated classes. precisions = tp / (tp + fp + eps) return np.sum(precisions)/precisions.shape[0] if average == "weighted": pass def recall_cm(cm, average="specific", class_label=1, eps=1e-12): tp = np.diagonal(cm) fn = np.sum(cm, axis=1) - tp if average == "none": return tp / (tp + fn + eps) if average == "specific": recalls = tp / (tp + fn + eps) return recalls[class_label] if average == "micro": return np.sum(tp) / (np.sum(tp) + np.sum(fn)) if average == "macro": recalls = tp / (tp + fn + eps) return np.sum(recalls)/recalls.shape[0] if average == "weighted": pass def f1score_cm(cm, average="specific", class_label=1): precision = precision_cm(cm, average, class_label) recall = recall_cm(cm, average, class_label) return 2 * (precision*recall)/(precision+recall) # true positive rate <-> sensitivity <-> recall # true negative rate <-> specificity <-> recall for neg. class # ROC curve # AUC from ROC # Precision-Recall Curve # Log Loss # Mattheus Correlation # Cohen Kappa score # --> REGRESSION METRICS def roc_curve(y_true, y_preds, plot_graph=True, calculate_AUC=True, threshold_step=0.01): TPR, FPR = [], [] for threshold in np.arange(np.min(y_preds), np.max(y_preds), threshold_step): predictions = (y_preds > threshold) * 1 cm = confusion_matrix(y_true, predictions) recalls = recall_cm(cm, average="none") # note TPR == sensitivity == recall tpr = recalls[1] # note tnr == specificity (which is same as recall for the negative class) tnr = recalls[0] TPR.append(tpr) FPR.append(1-tnr) if plot_graph: plt.plot(FPR, TPR) plt.xlabel("False Positive Rate") plt.ylabel("True Positive Rate") plt.title("ROC curve") plt.show() if calculate_AUC: print(np.abs(np.trapz(TPR, FPR))) def precision_recall_curve(y_true, y_preds, plot_graph=True, calculate_AUC=True, threshold_step=0.01): recalls, precisions = [], [] for threshold in np.arange(np.min(y_preds), np.max(y_preds), threshold_step): predictions = (y_preds > threshold) * 1 cm = confusion_matrix(y_true, predictions) recall = recall_cm(cm, average="specific", class_label=1) precision = precision_cm(cm, average="specific", class_label=1) recalls.append(recall) precisions.append(precision) recalls.append(0) precisions.append(1) if plot_graph: plt.plot(recalls, precisions) plt.xlabel("Recall") plt.ylabel("Precision") plt.title("Precision-Recall curve") plt.show() if calculate_AUC: print(np.abs(np.trapz(precisions, recalls))) y = [] probs = [] with open("data.txt") as f: for line in f.readlines(): label, pred = line.split() label = int(label) pred = float(pred) y.append(label) probs.append(pred) precision_recall_curve(y, probs, threshold_step=0.001) #from sklearn.metrics import precision_recall_curve #precisions, recalls, _ = precision_recall_curve(y, probs) #plt.plot(recalls, precisions) #plt.xlabel("Recall") #plt.ylabel("Precision") #plt.title("Precision-Recall curve") #plt.show() #print(np.abs(np.trapz(precisions, recalls)))