Initial commit

2026-02-21 19:27:58 +00:00 · 2021-01-30 21:49:15 +01:00
commit 65b8c80495
432 changed files with 1290844 additions and 0 deletions
--- a/ML/TensorFlow/Basics/tutorial12-tensorflowdatasets.py
+++ b/ML/TensorFlow/Basics/tutorial12-tensorflowdatasets.py
@@ -0,0 +1,135 @@
+import os
+import matplotlib.pyplot
+
+os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
+import tensorflow as tf
+from tensorflow import keras
+from tensorflow.keras import layers
+import tensorflow_datasets as tfds
+
+physical_devices = tf.config.list_physical_devices("GPU")
+tf.config.experimental.set_memory_growth(physical_devices[0], True)
+
+(ds_train, ds_test), ds_info = tfds.load(
+    "mnist",
+    split=["train", "test"],
+    shuffle_files=True,
+    as_supervised=True,  # will return tuple (img, label) otherwise dict
+    with_info=True,  # able to get info about dataset
+)
+
+# fig = tfds.show_examples(ds_train, ds_info, rows=4, cols=4)
+# print(ds_info)
+
+
+def normalize_img(image, label):
+    """Normalizes images"""
+    return tf.cast(image, tf.float32) / 255.0, label
+
+
+AUTOTUNE = tf.data.experimental.AUTOTUNE
+BATCH_SIZE = 128
+
+# Setup for train dataset
+ds_train = ds_train.map(normalize_img, num_parallel_calls=AUTOTUNE)
+ds_train = ds_train.cache()
+ds_train = ds_train.shuffle(ds_info.splits["train"].num_examples)
+ds_train = ds_train.batch(BATCH_SIZE)
+ds_train = ds_train.prefetch(AUTOTUNE)
+
+# Setup for test Dataset
+ds_test = ds_train.map(normalize_img, num_parallel_calls=AUTOTUNE)
+ds_test = ds_train.batch(128)
+ds_test = ds_train.prefetch(AUTOTUNE)
+
+model = keras.Sequential(
+    [
+        keras.Input((28, 28, 1)),
+        layers.Conv2D(32, 3, activation="relu"),
+        layers.Flatten(),
+        tf.keras.layers.Dense(10, activation="softmax"),
+    ]
+)
+
+model.compile(
+    optimizer=keras.optimizers.Adam(0.001),
+    loss=keras.losses.SparseCategoricalCrossentropy(),
+    metrics=["accuracy"],
+)
+
+model.fit(ds_train, epochs=5, verbose=2)
+model.evaluate(ds_test)
+
+
+(ds_train, ds_test), ds_info = tfds.load(
+    "imdb_reviews",
+    split=["train", "test"],
+    shuffle_files=True,
+    as_supervised=True,  # will return tuple (img, label) otherwise dict
+    with_info=True,  # able to get info about dataset
+)
+
+tokenizer = tfds.features.text.Tokenizer()
+
+
+def build_vocabulary():
+    vocabulary = set()
+    for text, _ in ds_train:
+        vocabulary.update(tokenizer.tokenize(text.numpy().lower()))
+    return vocabulary
+
+
+vocabulary = build_vocabulary()
+
+encoder = tfds.features.text.TokenTextEncoder(
+    list(vocabulary), oov_token="<UNK>", lowercase=True, tokenizer=tokenizer
+)
+
+
+def my_enc(text_tensor, label):
+    encoded_text = encoder.encode(text_tensor.numpy())
+    return encoded_text, label
+
+
+def encode_map_fn(text, label):
+    # py_func doesn't set the shape of the returned tensors.
+    encoded_text, label = tf.py_function(
+        my_enc, inp=[text, label], Tout=(tf.int64, tf.int64)
+    )
+
+    # `tf.data.Datasets` work best if all components have a shape set
+    #  so set the shapes manually:
+    encoded_text.set_shape([None])
+    label.set_shape([])
+
+    return encoded_text, label
+
+
+AUTOTUNE = tf.data.experimental.AUTOTUNE
+ds_train = ds_train.map(encode_map_fn, num_parallel_calls=AUTOTUNE)
+ds_train = ds_train.cache()
+ds_train = ds_train.shuffle(1000)
+ds_train = ds_train.padded_batch(32, padded_shapes=([None], ()))
+ds_train = ds_train.prefetch(AUTOTUNE)
+
+ds_test = ds_test.map(encode_map_fn)
+ds_test = ds_test.padded_batch(32, padded_shapes=([None], ()))
+
+model = keras.Sequential(
+    [
+        layers.Masking(mask_value=0),
+        layers.Embedding(input_dim=len(vocabulary) + 2, output_dim=32),
+        layers.GlobalAveragePooling1D(),
+        layers.Dense(64, activation="relu"),
+        layers.Dense(1),
+    ]
+)
+
+model.compile(
+    loss=keras.losses.BinaryCrossentropy(from_logits=True),
+    optimizer=keras.optimizers.Adam(3e-4, clipnorm=1),
+    metrics=["accuracy"],
+)
+
+model.fit(ds_train, epochs=15, verbose=2)
+model.evaluate(ds_test)