--- title: datagenerator keywords: fastai sidebar: home_sidebar summary: "API details." description: "API details." nb_path: "nbs/02_datagenerator.ipynb" ---
{% raw %}
{% endraw %} {% raw %}
{% endraw %}

Data generator

components

components are methods that can be easily overridden

  • image path gen
  • image label gen
  • image resizer

the generator object will also support callbacks that can update the components

{% raw %}

benchmark[source]

benchmark(dataset, num_epochs=2, fake_infer_time=0.001)

Use this function to benchmark your Dataset loading time

{% endraw %} {% raw %}
{% endraw %} {% raw %}

get_filenames[source]

get_filenames(root_dir)

{% endraw %} {% raw %}

get_label[source]

get_label(filename)

{% endraw %} {% raw %}
{% endraw %} {% raw %}

class ImageSizeList[source]

ImageSizeList(img_sz_list=None)

{% endraw %} {% raw %}
{% endraw %} {% raw %}
img_sz_list = ImageSizeList(None)
img_sz_list.get_size()
No item present in the image size list
Returning the last set size which is: None
{% endraw %} {% raw %}
class LabelEncoder:
    def __init__(self, labels):
        self.labels = labels
        self.label_to_idx = {label: i for i, label in enumerate(self.labels)}

    def encode(self, label):
        return self.label_to_idx[label]
{% endraw %} {% raw %}

class Pipeline[source]

Pipeline(funcs:Union[Callable, list, tuple]=[])

{% endraw %} {% raw %}
{% endraw %} {% raw %}

class Dataset[source]

Dataset(train_dir:Union[str, Path], image_size=[], transforms=None, default_encode=True, **kwargs)

{% endraw %} {% raw %}
{% endraw %} {% raw %}
ds = Dataset(cat_dog_path, image_size=[(28, 28), (32, 32), (64, 64)])
# ds = Dataset('/data/aniket/tiny-imagenet/data/tiny-imagenet-200/train')
# ds = Dataset('/data/aniket/tiny-imagenet/data/tiny-imagenet-200/train', image_size=(224,224))
{% endraw %} {% raw %}
#     return glob(f'{path}/*/images/*')

# def get_label(path):
#     return path.split('/')[-3]
{% endraw %} {% raw %}
# ds.update_component('get_label', get_label)
{% endraw %} {% raw %}
for e in ds.generator(True):
    print(e[0].dtype, e[1])
    break
<dtype: 'float32'> 0
{% endraw %} {% raw %}
dl = ds.get_tf_dataset()
{% endraw %} {% raw %}
for e in dl.take(1):
    print(e[0].shape)
(28, 28, 3)
{% endraw %} {% raw %}
 
Converted 03_datagenerator.ipynb.
{% endraw %}