added_mcts_and_metrics

2026-02-21 11:18:01 +00:00 · 2022-09-29 11:18:12 +02:00
parent 6c792599cf
commit 088bdb63e9
3 changed files with 568 additions and 0 deletions
--- a/ML/ml_metrics/data.txt
+++ b/ML/ml_metrics/data.txt
@@ -0,0 +1,100 @@
+0 	0.827142151760153
+0 	0.6044595910412887
+0 	0.7916340858282026
+0 	0.16080518180592987
+0 	0.611222921705038
+0 	0.2555087295500818
+0 	0.5681507664364468
+0 	0.05990570219972058
+0 	0.6644434078306367
+0 	0.11293577405861703
+0 	0.06152372321587048
+0 	0.35250697207600584
+0 	0.3226701829081975
+0 	0.43339115381458776
+0 	0.2280744262436838
+0 	0.7219848389339433
+0 	0.23527698971402375
+0 	0.2850245335200196
+0 	0.4107047877448165
+0 	0.2008356196164621
+0 	0.3711921802697385
+0 	0.4234822657253734
+0 	0.4876482027124213
+0 	0.4234822657253734
+0 	0.5750985220664769
+0 	0.6734047730095499
+0 	0.7355892648444824
+0 	0.7137899092959652
+0 	0.3873972469024071
+0 	0.24042033264833723
+0 	0.1663411647259707
+0 	0.1663411647259707
+0 	0.2850245335200196
+0 	0.3683741846950643
+0 	0.17375784896208155
+0 	0.43636290738886574
+0 	0.7219848389339433
+0 	0.46745878087292836
+0 	0.23527698971402375
+0 	0.17202866439941822
+0 	0.17786913865061538
+0 	0.44335359557308707
+0 	0.2768503833164947
+0 	0.06891755391553003
+0 	0.21414010746535972
+0 	0.27120595352357546
+0 	0.26328216986315905
+0 	0.48056205121673834
+0 	0.08848560476699129
+0 	0.2555087295500818
+1 	0.5681507664364468
+1 	0.2850245335200196
+1 	0.842216416418616
+1 	0.5280820469827786
+1 	0.6302728469340095
+1 	0.9325162813331325
+1 	0.062225621463076315
+1 	0.8823445035377085
+1 	0.670739773835188
+1 	0.891663414209465
+1 	0.6489254823470298
+1 	0.5552119758821265
+1 	0.7510275470993321
+1 	0.23310831157247616
+1 	0.2933421288888426
+1 	0.6044595910412887
+1 	0.6302728469340095
+1 	0.9585115007613662
+1 	0.9342800686704079
+1 	0.3226701829081975
+1 	0.7982301827889998
+1 	0.22102862644325694
+1 	0.9390780973389883
+1 	0.5078780077620866
+1 	0.7379344573081708
+1 	0.8750078631067137
+1 	0.4704701704107932
+1 	0.44335359557308707
+1 	0.5651814720676593
+1 	0.8658845001112441
+1 	0.897024614730928
+1 	0.9712637967845552
+1 	0.5651814720676593
+1 	0.517987379389242
+1 	0.40385540386469254
+1 	0.9435470013187671
+1 	0.5780506539476005
+1 	0.594744923406366
+1 	0.3970432858350056
+1 	0.7916340858282026
+1 	0.7219848389339433
+1 	0.7916340858282026
+1 	0.2850245335200196
+1 	0.7658513560779588
+1 	0.7379344573081708
+1 	0.7137899092959652
+1 	0.4876482027124213
+1 	0.6302728469340095
+1 	0.5310944974701136
+1 	0.35250697207600584
--- a/ML/ml_metrics/metrics.py
+++ b/ML/ml_metrics/metrics.py
@@ -0,0 +1,240 @@
+import numpy as np
+from scipy.integrate import simpson
+import matplotlib.pyplot as plt
+import warnings
+
+
+def true_positives(y_true, y_pred):
+    tp = 0
+    for label, pred in zip(y_true, y_pred):
+        if pred == 1 and label == 1:
+            tp += 1
+    return tp
+
+
+def true_negatives(y_true, y_pred):
+    tn = 0
+    for label, pred in zip(y_true, y_pred):
+        if pred == 0 and label == 0:
+            tn += 1
+    return tn
+
+
+def false_positives(y_true, y_pred):
+    fp = 0
+    for label, pred in zip(y_true, y_pred):
+        if pred == 1 and label == 0:
+            fp += 1
+    return fp
+
+
+def false_negatives(y_true, y_pred):
+    fn = 0
+    for label, pred in zip(y_true, y_pred):
+        if pred == 0 and label == 1:
+            fn += 1
+    return fn
+
+
+def binary_accuracy(y_true, y_pred):
+    tp = true_positives(y_true, y_pred)
+    tn = true_negatives(y_true, y_pred)
+    fp = false_positives(y_true, y_pred)
+    fn = false_negatives(y_true, y_pred)
+    return (tp + tn) / (tp + tn + fp + fn)
+
+
+def precision(y_true, y_pred):
+    """
+    Fraction of True Positive Elements divided by total number of positive predicted units
+    How I view it: Assuming we say someone has cancer: how often are we correct?
+    It tells us how much we can trust the model when it predicts an individual as positive.
+    """
+    tp = true_negatives(y_true, y_pred)
+    fp = false_positives(y_true, y_pred)
+    return tp / (tp + fp)
+
+
+def recall(y_true, y_pred):
+    """
+    Recall meaasure the model's predictive accuracy for the positive class.
+    How I view it, out of all the people that has cancer: how often are
+    we able to detect it?
+    """
+    tp = true_negatives(y_true, y_pred)
+    fn = false_negatives(y_true, y_pred)
+    return tp / (tp + fn)
+
+
+def multiclass_accuracy(y_true, y_pred):
+    correct = 0
+    total = len(y_true)
+    for label, pred in zip(y_true, y_pred):
+        correct += label == pred
+    return correct/total
+
+
+def confusion_matrix(y_true, y_pred):
+    y_true = np.array(y_true)
+    y_pred = np.array(y_pred)
+    assert y_true.shape == y_pred.shape
+    unique_classes = np.unique(np.concatenate([y_true, y_pred], axis=0)).shape[0]
+    cm = np.zeros((unique_classes, unique_classes), dtype=np.int64)
+
+    for label, pred in zip(y_true, y_pred):
+        cm[label, pred] += 1
+
+    return cm
+
+
+def accuracy_cm(cm):
+    return np.trace(cm)/np.sum(cm)
+
+
+def balanced_accuracy_cm(cm):
+    correctly_classified = np.diagonal(cm)
+    rows_sum = np.sum(cm, axis=1)
+    indices = np.nonzero(rows_sum)[0]
+    if rows_sum.shape[0] != indices.shape[0]:
+        warnings.warn("y_pred contains classes not in y_true")
+    accuracy_per_class = correctly_classified[indices]/(rows_sum[indices])
+    return np.sum(accuracy_per_class)/accuracy_per_class.shape[0]
+
+
+def precision_cm(cm, average="specific", class_label=1, eps=1e-12):
+    tp = np.diagonal(cm)
+    fp = np.sum(cm, axis=0) - tp
+    #precisions = np.diagonal(cm)/np.maximum(np.sum(cm, axis=0), 1e-12)
+
+    if average == "none":
+        return tp/(tp+fp+eps)
+
+    if average == "specific":
+        precisions = tp / (tp + fp + eps)
+        return precisions[class_label]
+
+    if average == "micro":
+        # all samples equally contribute to the average,
+        # hence there is a distinction between highly
+        # and poorly populated classes
+        return np.sum(tp) / (np.sum(tp) + np.sum(fp) + eps)
+
+    if average == "macro":
+        # all classes equally contribute to the average,
+        # no distinction between highly and poorly populated classes.
+        precisions = tp / (tp + fp + eps)
+        return np.sum(precisions)/precisions.shape[0]
+
+    if average == "weighted":
+        pass
+
+
+def recall_cm(cm, average="specific", class_label=1, eps=1e-12):
+    tp = np.diagonal(cm)
+    fn = np.sum(cm, axis=1) - tp
+
+    if average == "none":
+        return tp / (tp + fn + eps)
+
+    if average == "specific":
+        recalls = tp / (tp + fn + eps)
+        return recalls[class_label]
+
+    if average == "micro":
+        return np.sum(tp) / (np.sum(tp) + np.sum(fn))
+
+    if average == "macro":
+        recalls = tp / (tp + fn + eps)
+        return np.sum(recalls)/recalls.shape[0]
+
+    if average == "weighted":
+        pass
+
+
+def f1score_cm(cm, average="specific", class_label=1):
+    precision = precision_cm(cm, average, class_label)
+    recall = recall_cm(cm, average, class_label)
+    return 2 * (precision*recall)/(precision+recall)
+
+# true positive rate <-> sensitivity <-> recall
+# true negative rate <-> specificity <-> recall for neg. class
+# ROC curve
+# AUC from ROC
+# Precision-Recall Curve
+# Log Loss
+# Mattheus Correlation
+# Cohen Kappa score
+# --> REGRESSION METRICS
+
+
+def roc_curve(y_true, y_preds, plot_graph=True, calculate_AUC=True, threshold_step=0.01):
+    TPR, FPR = [], []
+
+    for threshold in np.arange(np.min(y_preds), np.max(y_preds), threshold_step):
+        predictions = (y_preds > threshold) * 1
+        cm = confusion_matrix(y_true, predictions)
+        recalls = recall_cm(cm, average="none")
+        # note TPR == sensitivity == recall
+        tpr = recalls[1]
+        # note tnr == specificity (which is same as recall for the negative class)
+        tnr = recalls[0]
+        TPR.append(tpr)
+        FPR.append(1-tnr)
+
+    if plot_graph:
+        plt.plot(FPR, TPR)
+        plt.xlabel("False Positive Rate")
+        plt.ylabel("True Positive Rate")
+        plt.title("ROC curve")
+        plt.show()
+
+    if calculate_AUC:
+        print(np.abs(np.trapz(TPR, FPR)))
+
+
+def precision_recall_curve(y_true, y_preds, plot_graph=True, calculate_AUC=True, threshold_step=0.01):
+    recalls, precisions = [], []
+
+    for threshold in np.arange(np.min(y_preds), np.max(y_preds), threshold_step):
+        predictions = (y_preds > threshold) * 1
+        cm = confusion_matrix(y_true, predictions)
+        recall = recall_cm(cm, average="specific", class_label=1)
+        precision = precision_cm(cm, average="specific", class_label=1)
+        recalls.append(recall)
+        precisions.append(precision)
+
+    recalls.append(0)
+    precisions.append(1)
+
+    if plot_graph:
+        plt.plot(recalls, precisions)
+        plt.xlabel("Recall")
+        plt.ylabel("Precision")
+        plt.title("Precision-Recall curve")
+        plt.show()
+
+    if calculate_AUC:
+        print(np.abs(np.trapz(precisions, recalls)))
+
+
+y = []
+probs = []
+with open("data.txt") as f:
+    for line in f.readlines():
+        label, pred = line.split()
+        label = int(label)
+        pred = float(pred)
+        y.append(label)
+        probs.append(pred)
+
+precision_recall_curve(y, probs, threshold_step=0.001)
+#from sklearn.metrics import precision_recall_curve
+#precisions, recalls, _ = precision_recall_curve(y, probs)
+#plt.plot(recalls, precisions)
+#plt.xlabel("Recall")
+#plt.ylabel("Precision")
+#plt.title("Precision-Recall curve")
+#plt.show()
+#print(np.abs(np.trapz(precisions, recalls)))
+
+