mirror of
https://github.com/aladdinpersson/Machine-Learning-Collection.git
synced 2026-02-21 19:27:58 +00:00
Initial commit
This commit is contained in:
135
ML/TensorFlow/Basics/tutorial12-tensorflowdatasets.py
Normal file
135
ML/TensorFlow/Basics/tutorial12-tensorflowdatasets.py
Normal file
@@ -0,0 +1,135 @@
|
||||
import os
|
||||
import matplotlib.pyplot
|
||||
|
||||
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
|
||||
import tensorflow as tf
|
||||
from tensorflow import keras
|
||||
from tensorflow.keras import layers
|
||||
import tensorflow_datasets as tfds
|
||||
|
||||
physical_devices = tf.config.list_physical_devices("GPU")
|
||||
tf.config.experimental.set_memory_growth(physical_devices[0], True)
|
||||
|
||||
(ds_train, ds_test), ds_info = tfds.load(
|
||||
"mnist",
|
||||
split=["train", "test"],
|
||||
shuffle_files=True,
|
||||
as_supervised=True, # will return tuple (img, label) otherwise dict
|
||||
with_info=True, # able to get info about dataset
|
||||
)
|
||||
|
||||
# fig = tfds.show_examples(ds_train, ds_info, rows=4, cols=4)
|
||||
# print(ds_info)
|
||||
|
||||
|
||||
def normalize_img(image, label):
|
||||
"""Normalizes images"""
|
||||
return tf.cast(image, tf.float32) / 255.0, label
|
||||
|
||||
|
||||
AUTOTUNE = tf.data.experimental.AUTOTUNE
|
||||
BATCH_SIZE = 128
|
||||
|
||||
# Setup for train dataset
|
||||
ds_train = ds_train.map(normalize_img, num_parallel_calls=AUTOTUNE)
|
||||
ds_train = ds_train.cache()
|
||||
ds_train = ds_train.shuffle(ds_info.splits["train"].num_examples)
|
||||
ds_train = ds_train.batch(BATCH_SIZE)
|
||||
ds_train = ds_train.prefetch(AUTOTUNE)
|
||||
|
||||
# Setup for test Dataset
|
||||
ds_test = ds_train.map(normalize_img, num_parallel_calls=AUTOTUNE)
|
||||
ds_test = ds_train.batch(128)
|
||||
ds_test = ds_train.prefetch(AUTOTUNE)
|
||||
|
||||
model = keras.Sequential(
|
||||
[
|
||||
keras.Input((28, 28, 1)),
|
||||
layers.Conv2D(32, 3, activation="relu"),
|
||||
layers.Flatten(),
|
||||
tf.keras.layers.Dense(10, activation="softmax"),
|
||||
]
|
||||
)
|
||||
|
||||
model.compile(
|
||||
optimizer=keras.optimizers.Adam(0.001),
|
||||
loss=keras.losses.SparseCategoricalCrossentropy(),
|
||||
metrics=["accuracy"],
|
||||
)
|
||||
|
||||
model.fit(ds_train, epochs=5, verbose=2)
|
||||
model.evaluate(ds_test)
|
||||
|
||||
|
||||
(ds_train, ds_test), ds_info = tfds.load(
|
||||
"imdb_reviews",
|
||||
split=["train", "test"],
|
||||
shuffle_files=True,
|
||||
as_supervised=True, # will return tuple (img, label) otherwise dict
|
||||
with_info=True, # able to get info about dataset
|
||||
)
|
||||
|
||||
tokenizer = tfds.features.text.Tokenizer()
|
||||
|
||||
|
||||
def build_vocabulary():
|
||||
vocabulary = set()
|
||||
for text, _ in ds_train:
|
||||
vocabulary.update(tokenizer.tokenize(text.numpy().lower()))
|
||||
return vocabulary
|
||||
|
||||
|
||||
vocabulary = build_vocabulary()
|
||||
|
||||
encoder = tfds.features.text.TokenTextEncoder(
|
||||
list(vocabulary), oov_token="<UNK>", lowercase=True, tokenizer=tokenizer
|
||||
)
|
||||
|
||||
|
||||
def my_enc(text_tensor, label):
|
||||
encoded_text = encoder.encode(text_tensor.numpy())
|
||||
return encoded_text, label
|
||||
|
||||
|
||||
def encode_map_fn(text, label):
|
||||
# py_func doesn't set the shape of the returned tensors.
|
||||
encoded_text, label = tf.py_function(
|
||||
my_enc, inp=[text, label], Tout=(tf.int64, tf.int64)
|
||||
)
|
||||
|
||||
# `tf.data.Datasets` work best if all components have a shape set
|
||||
# so set the shapes manually:
|
||||
encoded_text.set_shape([None])
|
||||
label.set_shape([])
|
||||
|
||||
return encoded_text, label
|
||||
|
||||
|
||||
AUTOTUNE = tf.data.experimental.AUTOTUNE
|
||||
ds_train = ds_train.map(encode_map_fn, num_parallel_calls=AUTOTUNE)
|
||||
ds_train = ds_train.cache()
|
||||
ds_train = ds_train.shuffle(1000)
|
||||
ds_train = ds_train.padded_batch(32, padded_shapes=([None], ()))
|
||||
ds_train = ds_train.prefetch(AUTOTUNE)
|
||||
|
||||
ds_test = ds_test.map(encode_map_fn)
|
||||
ds_test = ds_test.padded_batch(32, padded_shapes=([None], ()))
|
||||
|
||||
model = keras.Sequential(
|
||||
[
|
||||
layers.Masking(mask_value=0),
|
||||
layers.Embedding(input_dim=len(vocabulary) + 2, output_dim=32),
|
||||
layers.GlobalAveragePooling1D(),
|
||||
layers.Dense(64, activation="relu"),
|
||||
layers.Dense(1),
|
||||
]
|
||||
)
|
||||
|
||||
model.compile(
|
||||
loss=keras.losses.BinaryCrossentropy(from_logits=True),
|
||||
optimizer=keras.optimizers.Adam(3e-4, clipnorm=1),
|
||||
metrics=["accuracy"],
|
||||
)
|
||||
|
||||
model.fit(ds_train, epochs=15, verbose=2)
|
||||
model.evaluate(ds_test)
|
||||
Reference in New Issue
Block a user