Source code for fortuna.data.loader.array_loaders
from __future__ import annotations
from typing import (
Optional,
Tuple,
)
import numpy as np
from fortuna.data.loader.base import (
BaseDataLoaderABC,
BaseInputsLoader,
BaseTargetsLoader,
)
from fortuna.data.loader.utils import IterableData
from fortuna.typing import (
Array,
Batch,
Shape,
)
[docs]class DataLoader(BaseDataLoaderABC):
@property
def num_unique_labels(self) -> Optional[int]:
if self._num_unique_labels is None:
self._num_unique_labels = len(np.unique(self.to_array_targets()))
return self._num_unique_labels
[docs] @classmethod
def from_array_data(
cls,
data: Batch,
batch_size: Optional[int] = None,
shuffle: bool = False,
prefetch: bool = False,
) -> DataLoader:
"""
Build a :class:`~fortuna.data.loader.DataLoader` object from a tuple of arrays of input and target variables,
respectively.
Parameters
----------
data: Batch
Input and target arrays of data.
batch_size: Optional[int]
The batch size. If not given, the data will not be batched.
shuffle: bool
Whether the data loader should shuffle at every call.
prefetch: bool
Whether to prefetch the next batch.
Returns
-------
DataLoader
A data loader built out of the tuple of arrays.
"""
return cls(
iterable=IterableData.from_batch_array_data(
data, batch_size=batch_size, shuffle=shuffle, prefetch=prefetch
)
)
[docs] def to_inputs_loader(self) -> InputsLoader:
"""
Reduce a data loader to an inputs loader.
Returns
-------
InputsLoader
The inputs loader derived from the data loader.
"""
return InputsLoader(IterableData.data_loader_to_inputs_iterable(self))
[docs] def to_targets_loader(self) -> TargetsLoader:
"""
Reduce a data loader to a targets loader.
Returns
-------
TargetsLoader
The targets loader derived from the data loader.
"""
return TargetsLoader(IterableData.data_loader_to_targets_iterable(self))
[docs] def to_array_data(self) -> Batch:
"""
Reduce a data loader to a tuple of input and target arrays.
Returns
-------
Batch
Tuple of input and target arrays.
"""
inputs, targets = [], []
for batch_inputs, batch_targets in self:
inputs.append(batch_inputs)
targets.append(batch_targets)
return np.concatenate(inputs, 0), np.concatenate(targets, 0)
[docs] def to_array_inputs(self) -> Array:
"""
Reduce a data loader to an array of target data.
Returns
-------
Array
Array of input data.
"""
inputs = []
for batch_inputs, batch_targets in self:
inputs.append(batch_inputs)
return np.concatenate(inputs, 0)
[docs] def to_array_targets(self) -> Array:
"""
Reduce a data loader to an array of target data.
Returns
-------
Array
Array of input data.
"""
targets = []
for batch_inputs, batch_targets in self:
targets.append(batch_targets)
return np.concatenate(targets, 0)
[docs] def chop(self, divisor: int) -> DataLoader:
"""
Chop the last part of each batch of the data loader, to make sure the number od data points per batch divides
`divisor`.
Parameters
----------
divisor : int
Number of data points that each batched must divide.
Returns
-------
DataLoader
A data loader with chopped batches.
"""
def fun():
for inputs, targets in self:
reminder = targets.shape[0] % divisor
if reminder == 0:
yield inputs, targets
elif targets.shape[0] > divisor:
yield inputs[:-reminder], targets[:-reminder]
return self.from_callable_iterable(fun)
[docs] def split(self, n_data: int) -> Tuple[DataLoader, DataLoader]:
"""
Split a data loader into two data loaders.
Parameters
----------
n_data: int
Number of data point after which the data loader should be split. The first returned data loader will
contain exactly `n_data` data points. The second one will contain the remaining ones.
Returns
-------
Tuple[DataLoader, DataLoader]
The two data loaders made out of the original one.
"""
def data_loader1():
count = 0
for inputs, targets in self:
if count == n_data:
break
if count + inputs.shape[0] <= n_data:
count += inputs.shape[0]
yield inputs, targets
else:
inputs, targets = (
inputs[: n_data - count],
targets[: n_data - count],
)
count = n_data
yield inputs, targets
def data_loader2():
count = 0
for inputs, targets in self:
if count > n_data:
yield inputs, targets
elif (count <= n_data) and (count + inputs.shape[0] > n_data):
count2 = count
count += inputs.shape[0]
inputs, targets = (
inputs[n_data - count2 :],
targets[n_data - count2 :],
)
yield inputs, targets
else:
count += inputs.shape[0]
return self.from_callable_iterable(data_loader1), self.from_callable_iterable(
data_loader2
)
[docs] def sample(self, seed: int, n_samples: int) -> DataLoader:
"""
Sample from the data loader, with replacement.
Parameters
----------
seed: int
Random seed.
n_samples: int
Number of samples.
Returns
-------
DataLoader
A data loader made of the sampled data points.
"""
def fun():
rng = np.random.default_rng(seed)
count = 0
while True:
for inputs, targets in self:
if count == n_samples:
break
idx = rng.choice(2, inputs.shape[0]).astype("bool")
inputs, targets = inputs[idx], targets[idx]
if count + inputs.shape[0] > n_samples:
inputs, targets = (
inputs[: n_samples - count],
targets[: n_samples - count],
)
count += inputs.shape[0]
if inputs.shape[0] > 0:
yield inputs, targets
if count == n_samples:
break
return self.from_callable_iterable(fun)
@property
def input_shape(self) -> Shape:
def fun():
for inputs, targets in self:
input_shape = inputs.shape[1:]
break
return input_shape
return fun()
[docs]class InputsLoader(BaseInputsLoader):
[docs] @classmethod
def from_array_inputs(
cls,
inputs: Array,
batch_size: Optional[int] = None,
shuffle: bool = False,
prefetch: bool = False,
) -> InputsLoader:
"""
Build a :class:`~fortuna.data.loader.InputsLoader` object from an array of input data.
Parameters
----------
inputs: Array
Input array of data.
batch_size: Optional[int]
The batch size. If not given, the inputs will not be batched.
shuffle: bool
Whether the inputs loader should shuffle at every call.
prefetch: bool
Whether to prefetch the next batch.
Returns
-------
InputsLoader
An inputs loader built out of the array of inputs.
"""
return cls(
iterable=IterableData.from_array_data(
inputs, batch_size=batch_size, shuffle=shuffle, prefetch=prefetch
)
)
[docs] def to_array_inputs(self) -> Array:
"""
Reduce an inputs loader to an array of inputs.
Returns
-------
Array
Array of input data.
"""
inputs = []
for batch_inputs in self:
inputs.append(batch_inputs)
return np.concatenate(inputs, 0)
[docs] def chop(self, divisor: int) -> InputsLoader:
"""
Chop the last part of each batch of the inputs loader, to make sure the number od data points per batch divides
`divisor`.
Parameters
----------
divisor : int
Number of data points that each batched must divide.
Returns
-------
InputsLoader
An inputs loader with chopped batches.
"""
def fun():
for inputs in self:
reminder = inputs.shape[0] % divisor
if reminder == 0:
yield inputs
elif inputs.shape[0] > divisor:
yield inputs[:-reminder]
return self.from_callable_iterable(fun)
[docs] def sample(self, seed: int, n_samples: int) -> InputsLoader:
"""
Sample from the inputs loader, with replacement.
Parameters
----------
seed: int
Random seed.
n_samples: int
Number of samples.
Returns
-------
InputsLoader
An inputs loader made of the sampled inputs.
"""
def fun():
rng = np.random.default_rng(seed)
count = 0
while True:
for inputs in self:
if count == n_samples:
break
idx = rng.choice(2, inputs.shape[0]).astype("bool")
inputs = inputs[idx]
if count + inputs.shape[0] > n_samples:
inputs = inputs[: n_samples - count]
count += inputs.shape[0]
if inputs.shape[0] > 0:
yield inputs
if count == n_samples:
break
return self.from_callable_iterable(fun)
[docs] def split(self, n_data: int) -> Tuple[InputsLoader, InputsLoader]:
"""
Split an inputs loader into two inputs loaders.
Parameters
----------
n_data: int
Number of data point after which the inputs loader should be split. The first returned inputs loader will
contain exactly `n_data` inputs. The second one will contain the remaining ones.
Returns
-------
Tuple[InputsLoader, InputsLoader]
The two inputs loaders made out of the original one.
"""
def inputs_loader1():
count = 0
for inputs in self:
if count == n_data:
break
if count + inputs.shape[0] <= n_data:
count += inputs.shape[0]
yield inputs
else:
inputs = inputs[: n_data - count]
count = n_data
yield inputs
def inputs_loader2():
count = 0
for inputs in self:
if count > n_data:
yield inputs
elif (count <= n_data) and (count + inputs.shape[0] > n_data):
count2 = count
count += inputs.shape[0]
inputs = inputs[n_data - count2 :]
yield inputs
else:
count += inputs.shape[0]
return self.from_callable_iterable(inputs_loader1), self.from_callable_iterable(
inputs_loader2
)
[docs]class TargetsLoader(BaseTargetsLoader):
[docs] @classmethod
def from_array_targets(
cls,
targets: Array,
batch_size: Optional[int] = None,
shuffle: bool = False,
prefetch: bool = False,
) -> TargetsLoader:
"""
Build a :class:`~fortuna.data.loader.TargetsLoader` object from an array of target data.
Parameters
----------
targets: Array
Target array of data.
batch_size: Optional[int]
The batch size. If not given, the targets will not be batched.
shuffle: bool
Whether the target loader should shuffle at every call.
prefetch: bool
Whether to prefetch the next batch.
Returns
-------
TargetsLoader
A targets loader built out of the array of targets.
"""
return cls(
iterable=IterableData.from_array_data(
targets, batch_size=batch_size, shuffle=shuffle, prefetch=prefetch
)
)
[docs] def to_array_targets(self) -> Array:
"""
Reduce a targets loader to an array of targets.
Returns
-------
Array
Array of target data.
"""
targets = []
for batch_targets in self:
targets.append(batch_targets)
return np.concatenate(targets, 0)
[docs] def chop(self, divisor: int) -> TargetsLoader:
"""
Chop the last part of each batch of the targets loader, to make sure the number od data points per batch divides
`divisor`.
Parameters
----------
divisor : int
Number of data points that each batched must divide.
Returns
-------
TargetsLoader
A targets loader with chopped batches.
"""
def fun():
for targets in self:
reminder = targets.shape[0] % divisor
if reminder == 0:
yield targets
elif targets.shape[0] > divisor:
yield targets[:-reminder]
return self.from_callable_iterable(fun)
[docs] def sample(self, seed: int, n_samples: int) -> TargetsLoader:
"""
Sample from the targets loader, with replacement.
Parameters
----------
seed: int
Random seed.
n_samples: int
Number of samples.
Returns
-------
TargetsLoader
A targets loader made of the sampled targets.
"""
def fun():
rng = np.random.default_rng(seed)
count = 0
while True:
for targets in self:
if count == n_samples:
break
idx = rng.choice(2, targets.shape[0]).astype("bool")
targets = targets[idx]
if count + targets.shape[0] > n_samples:
targets = targets[: n_samples - count]
count += targets.shape[0]
if targets.shape[0] > 0:
yield targets
if count == n_samples:
break
return self.from_callable_iterable(fun)
[docs] def split(self, n_data: int) -> Tuple[TargetsLoader, TargetsLoader]:
"""
Split a targets loader into two targets loaders.
Parameters
----------
n_data: int
Number of data point after which the targets loader should be split. The first returned targets loader will
contain exactly `n_data` targets. The second one will contain the remaining ones.
Returns
-------
Tuple[TargetsLoader, TargetsLoader]
The two targets loaders made out of the original one.
"""
def targets_loader1():
count = 0
for targets in self:
if count == n_data:
break
if count + targets.shape[0] <= n_data:
count += targets.shape[0]
yield targets
else:
targets = targets[: n_data - count]
count = n_data
yield targets
def targets_loader2():
count = 0
for targets in self:
if count > n_data:
yield targets
elif (count <= n_data) and (count + targets.shape[0] > n_data):
count2 = count
count += targets.shape[0]
targets = targets[n_data - count2 :]
yield targets
else:
count += targets.shape[0]
return self.from_callable_iterable(
targets_loader1
), self.from_callable_iterable(targets_loader2)