Tensorflow read images with labels
Using slice_input_producer
provides a solution which is much cleaner. Slice Input Producer allows us to create an Input Queue containing arbitrarily many separable values. This snippet of the question would look like this:
def read_labeled_image_list(image_list_file):
"""Reads a .txt file containing pathes and labeles
Args:
image_list_file: a .txt file with one /path/to/image per line
label: optionally, if set label will be pasted after each line
Returns:
List with all filenames in file image_list_file
"""
f = open(image_list_file, 'r')
filenames = []
labels = []
for line in f:
filename, label = line[:-1].split(' ')
filenames.append(filename)
labels.append(int(label))
return filenames, labels
def read_images_from_disk(input_queue):
"""Consumes a single filename and label as a ' '-delimited string.
Args:
filename_and_label_tensor: A scalar string tensor.
Returns:
Two tensors: the decoded image, and the string label.
"""
label = input_queue[1]
file_contents = tf.read_file(input_queue[0])
example = tf.image.decode_png(file_contents, channels=3)
return example, label
# Reads pfathes of images together with their labels
image_list, label_list = read_labeled_image_list(filename)
images = ops.convert_to_tensor(image_list, dtype=dtypes.string)
labels = ops.convert_to_tensor(label_list, dtype=dtypes.int32)
# Makes an input queue
input_queue = tf.train.slice_input_producer([images, labels],
num_epochs=num_epochs,
shuffle=True)
image, label = read_images_from_disk(input_queue)
# Optional Preprocessing or Data Augmentation
# tf.image implements most of the standard image augmentation
image = preprocess_image(image)
label = preprocess_label(label)
# Optional Image and Label Batching
image_batch, label_batch = tf.train.batch([image, label],
batch_size=batch_size)
See also the generic_input_producer from the TensorVision examples for full input-pipeline.
Convert folder of images with labels in CSV file into a tensorflow Dataset
Based on the answers:
- https://stackoverflow.com/a/72343548/
- https://stackoverflow.com/a/54752691/
I have DIY created the following. I am sure there is a simpler way, but this at least is something functional. I was hoping for more built-in support though:
import os.path
from typing import Dict, Tuple
import pandas as pd
import tensorflow as tf
def get_full_dataset(
batch_size: int = 32, image_size: Tuple[int, int] = (256, 256)
) -> tf.data.Dataset:
data = pd.read_csv(os.path.join(DATA_ABS_PATH, "images.csv"))
images_path = os.path.join(DATA_ABS_PATH, "images")
data["image"] = data["image"].map(lambda x: os.path.join(images_path, f"{x}.jpg"))
filenames: tf.Tensor = tf.constant(data["image"], dtype=tf.string)
data["label"] = data["label"].str.lower()
class_name_to_label: Dict[str, int] = {
label: i for i, label in enumerate(set(data["label"]))
}
labels: tf.Tensor = tf.constant(
data["label"].map(class_name_to_label.__getitem__), dtype=tf.uint8
)
dataset = tf.data.Dataset.from_tensor_slices((filenames, labels))
def _parse_function(filename, label):
jpg_image: tf.Tensor = tf.io.decode_jpeg(tf.io.read_file(filename))
return tf.image.resize(jpg_image, size=image_size), label
dataset = dataset.map(_parse_function)
return dataset.batch(batch_size)
Create Tensorflow Dataset with dataframe of images and labels
You can actually pass a dataframe directly to tf.data.Dataset.from_tensor_slices
:
import tensorflow as tf
import numpy as np
import pandas as pd
df = pd.DataFrame(data={'images': [np.random.random((64, 64, 3)) for _ in range(100)],
'labels': ['ok', 'not ok']*50})
dataset = tf.data.Dataset.from_tensor_slices((list(df['images'].values), df['labels'].values)).batch(2)
for x, y in dataset.take(1):
print(x.shape, y)
# (2, 64, 64, 3) tf.Tensor([b'ok' b'not ok'], shape=(2,), dtype=string)
Get labels from dataset when using tensorflow image_dataset_from_directory
If I were you, I'll iterate over the entire testData, I'll save the predictions and labels along the way and I'll build the confusion matrix at the end.
testData = tf.keras.preprocessing.image_dataset_from_directory(
dataDirectory,
labels='inferred',
label_mode='categorical',
seed=324893,
image_size=(height,width),
batch_size=32)
predictions = np.array([])
labels = np.array([])
for x, y in testData:
predictions = np.concatenate([predictions, model.predict_classes(x)])
labels = np.concatenate([labels, np.argmax(y.numpy(), axis=-1)])
tf.math.confusion_matrix(labels=labels, predictions=predictions).numpy()
and the result is
Found 4 files belonging to 2 classes.
array([[2, 0],
[2, 0]], dtype=int32)
Using queues in TensorFlow to load images and labels from text file
It might be caused by num_epochs=1
here tf.train.slice_input_producer([filenames, labels], num_epochs=1, shuffle=True)
. You can check api of slice_input_producer
, where it explains: num_epochs: An integer (optional). If specified, slice_input_producer produces each slice num_epochs times before generating an OutOfRange error.
Related Topics
Pandas Groupby and Select Rows with the Minimum Value in a Specific Column
Sorting Text File by Using Python
Why Does Str(Float) Return More Digits in Python 3 Than Python 2
How to Pass an Operator to a Python Function
Print a String as Hexadecimal Bytes
Matplotlib Xticks Not Lining Up with Histogram
Why Is Semicolon Allowed in This Python Snippet
Python JSON Parser Allow Duplicate Keys
What Do the Python File Extensions, .Pyc .Pyd .Pyo Stand For
Python - 'Ascii' Codec Can't Decode Byte
Writing a Dict to Txt File and Reading It Back
How to Set Up a Virtual Environment for Python in Visual Studio Code
Repeating Each Element of a Numpy Array 5 Times