added_mcts_and_metrics

This commit is contained in:
dino
2022-09-29 11:18:12 +02:00
parent 6c792599cf
commit 088bdb63e9
3 changed files with 568 additions and 0 deletions

100
ML/ml_metrics/data.txt Normal file
View File

@@ -0,0 +1,100 @@
0 0.827142151760153
0 0.6044595910412887
0 0.7916340858282026
0 0.16080518180592987
0 0.611222921705038
0 0.2555087295500818
0 0.5681507664364468
0 0.05990570219972058
0 0.6644434078306367
0 0.11293577405861703
0 0.06152372321587048
0 0.35250697207600584
0 0.3226701829081975
0 0.43339115381458776
0 0.2280744262436838
0 0.7219848389339433
0 0.23527698971402375
0 0.2850245335200196
0 0.4107047877448165
0 0.2008356196164621
0 0.3711921802697385
0 0.4234822657253734
0 0.4876482027124213
0 0.4234822657253734
0 0.5750985220664769
0 0.6734047730095499
0 0.7355892648444824
0 0.7137899092959652
0 0.3873972469024071
0 0.24042033264833723
0 0.1663411647259707
0 0.1663411647259707
0 0.2850245335200196
0 0.3683741846950643
0 0.17375784896208155
0 0.43636290738886574
0 0.7219848389339433
0 0.46745878087292836
0 0.23527698971402375
0 0.17202866439941822
0 0.17786913865061538
0 0.44335359557308707
0 0.2768503833164947
0 0.06891755391553003
0 0.21414010746535972
0 0.27120595352357546
0 0.26328216986315905
0 0.48056205121673834
0 0.08848560476699129
0 0.2555087295500818
1 0.5681507664364468
1 0.2850245335200196
1 0.842216416418616
1 0.5280820469827786
1 0.6302728469340095
1 0.9325162813331325
1 0.062225621463076315
1 0.8823445035377085
1 0.670739773835188
1 0.891663414209465
1 0.6489254823470298
1 0.5552119758821265
1 0.7510275470993321
1 0.23310831157247616
1 0.2933421288888426
1 0.6044595910412887
1 0.6302728469340095
1 0.9585115007613662
1 0.9342800686704079
1 0.3226701829081975
1 0.7982301827889998
1 0.22102862644325694
1 0.9390780973389883
1 0.5078780077620866
1 0.7379344573081708
1 0.8750078631067137
1 0.4704701704107932
1 0.44335359557308707
1 0.5651814720676593
1 0.8658845001112441
1 0.897024614730928
1 0.9712637967845552
1 0.5651814720676593
1 0.517987379389242
1 0.40385540386469254
1 0.9435470013187671
1 0.5780506539476005
1 0.594744923406366
1 0.3970432858350056
1 0.7916340858282026
1 0.7219848389339433
1 0.7916340858282026
1 0.2850245335200196
1 0.7658513560779588
1 0.7379344573081708
1 0.7137899092959652
1 0.4876482027124213
1 0.6302728469340095
1 0.5310944974701136
1 0.35250697207600584

240
ML/ml_metrics/metrics.py Normal file
View File

@@ -0,0 +1,240 @@
import numpy as np
from scipy.integrate import simpson
import matplotlib.pyplot as plt
import warnings
def true_positives(y_true, y_pred):
tp = 0
for label, pred in zip(y_true, y_pred):
if pred == 1 and label == 1:
tp += 1
return tp
def true_negatives(y_true, y_pred):
tn = 0
for label, pred in zip(y_true, y_pred):
if pred == 0 and label == 0:
tn += 1
return tn
def false_positives(y_true, y_pred):
fp = 0
for label, pred in zip(y_true, y_pred):
if pred == 1 and label == 0:
fp += 1
return fp
def false_negatives(y_true, y_pred):
fn = 0
for label, pred in zip(y_true, y_pred):
if pred == 0 and label == 1:
fn += 1
return fn
def binary_accuracy(y_true, y_pred):
tp = true_positives(y_true, y_pred)
tn = true_negatives(y_true, y_pred)
fp = false_positives(y_true, y_pred)
fn = false_negatives(y_true, y_pred)
return (tp + tn) / (tp + tn + fp + fn)
def precision(y_true, y_pred):
"""
Fraction of True Positive Elements divided by total number of positive predicted units
How I view it: Assuming we say someone has cancer: how often are we correct?
It tells us how much we can trust the model when it predicts an individual as positive.
"""
tp = true_negatives(y_true, y_pred)
fp = false_positives(y_true, y_pred)
return tp / (tp + fp)
def recall(y_true, y_pred):
"""
Recall meaasure the model's predictive accuracy for the positive class.
How I view it, out of all the people that has cancer: how often are
we able to detect it?
"""
tp = true_negatives(y_true, y_pred)
fn = false_negatives(y_true, y_pred)
return tp / (tp + fn)
def multiclass_accuracy(y_true, y_pred):
correct = 0
total = len(y_true)
for label, pred in zip(y_true, y_pred):
correct += label == pred
return correct/total
def confusion_matrix(y_true, y_pred):
y_true = np.array(y_true)
y_pred = np.array(y_pred)
assert y_true.shape == y_pred.shape
unique_classes = np.unique(np.concatenate([y_true, y_pred], axis=0)).shape[0]
cm = np.zeros((unique_classes, unique_classes), dtype=np.int64)
for label, pred in zip(y_true, y_pred):
cm[label, pred] += 1
return cm
def accuracy_cm(cm):
return np.trace(cm)/np.sum(cm)
def balanced_accuracy_cm(cm):
correctly_classified = np.diagonal(cm)
rows_sum = np.sum(cm, axis=1)
indices = np.nonzero(rows_sum)[0]
if rows_sum.shape[0] != indices.shape[0]:
warnings.warn("y_pred contains classes not in y_true")
accuracy_per_class = correctly_classified[indices]/(rows_sum[indices])
return np.sum(accuracy_per_class)/accuracy_per_class.shape[0]
def precision_cm(cm, average="specific", class_label=1, eps=1e-12):
tp = np.diagonal(cm)
fp = np.sum(cm, axis=0) - tp
#precisions = np.diagonal(cm)/np.maximum(np.sum(cm, axis=0), 1e-12)
if average == "none":
return tp/(tp+fp+eps)
if average == "specific":
precisions = tp / (tp + fp + eps)
return precisions[class_label]
if average == "micro":
# all samples equally contribute to the average,
# hence there is a distinction between highly
# and poorly populated classes
return np.sum(tp) / (np.sum(tp) + np.sum(fp) + eps)
if average == "macro":
# all classes equally contribute to the average,
# no distinction between highly and poorly populated classes.
precisions = tp / (tp + fp + eps)
return np.sum(precisions)/precisions.shape[0]
if average == "weighted":
pass
def recall_cm(cm, average="specific", class_label=1, eps=1e-12):
tp = np.diagonal(cm)
fn = np.sum(cm, axis=1) - tp
if average == "none":
return tp / (tp + fn + eps)
if average == "specific":
recalls = tp / (tp + fn + eps)
return recalls[class_label]
if average == "micro":
return np.sum(tp) / (np.sum(tp) + np.sum(fn))
if average == "macro":
recalls = tp / (tp + fn + eps)
return np.sum(recalls)/recalls.shape[0]
if average == "weighted":
pass
def f1score_cm(cm, average="specific", class_label=1):
precision = precision_cm(cm, average, class_label)
recall = recall_cm(cm, average, class_label)
return 2 * (precision*recall)/(precision+recall)
# true positive rate <-> sensitivity <-> recall
# true negative rate <-> specificity <-> recall for neg. class
# ROC curve
# AUC from ROC
# Precision-Recall Curve
# Log Loss
# Mattheus Correlation
# Cohen Kappa score
# --> REGRESSION METRICS
def roc_curve(y_true, y_preds, plot_graph=True, calculate_AUC=True, threshold_step=0.01):
TPR, FPR = [], []
for threshold in np.arange(np.min(y_preds), np.max(y_preds), threshold_step):
predictions = (y_preds > threshold) * 1
cm = confusion_matrix(y_true, predictions)
recalls = recall_cm(cm, average="none")
# note TPR == sensitivity == recall
tpr = recalls[1]
# note tnr == specificity (which is same as recall for the negative class)
tnr = recalls[0]
TPR.append(tpr)
FPR.append(1-tnr)
if plot_graph:
plt.plot(FPR, TPR)
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC curve")
plt.show()
if calculate_AUC:
print(np.abs(np.trapz(TPR, FPR)))
def precision_recall_curve(y_true, y_preds, plot_graph=True, calculate_AUC=True, threshold_step=0.01):
recalls, precisions = [], []
for threshold in np.arange(np.min(y_preds), np.max(y_preds), threshold_step):
predictions = (y_preds > threshold) * 1
cm = confusion_matrix(y_true, predictions)
recall = recall_cm(cm, average="specific", class_label=1)
precision = precision_cm(cm, average="specific", class_label=1)
recalls.append(recall)
precisions.append(precision)
recalls.append(0)
precisions.append(1)
if plot_graph:
plt.plot(recalls, precisions)
plt.xlabel("Recall")
plt.ylabel("Precision")
plt.title("Precision-Recall curve")
plt.show()
if calculate_AUC:
print(np.abs(np.trapz(precisions, recalls)))
y = []
probs = []
with open("data.txt") as f:
for line in f.readlines():
label, pred = line.split()
label = int(label)
pred = float(pred)
y.append(label)
probs.append(pred)
precision_recall_curve(y, probs, threshold_step=0.001)
#from sklearn.metrics import precision_recall_curve
#precisions, recalls, _ = precision_recall_curve(y, probs)
#plt.plot(recalls, precisions)
#plt.xlabel("Recall")
#plt.ylabel("Precision")
#plt.title("Precision-Recall curve")
#plt.show()
#print(np.abs(np.trapz(precisions, recalls)))