Skip to content

supervised

Supervised denoiser implementation.

@author: Nicola VIGANÒ, CEA-MEM, Grenoble, France

Classes:

Supervised

Supervised(
    model: int | str | NetworkParams | Module | Mapping,
    data_scale_bias: DataScaleBias | None = None,
    reg_val: float | LossRegularizer | None = None,
    device: str = "cuda" if is_available() else "cpu",
    batch_size: int | None = None,
    augmentation: str | Sequence[str] | None = None,
    save_epochs_dir: str | None = None,
    verbose: bool = True,
)

Bases: Denoiser

Supervised denoising class.

Parameters:

  • model (str | NetworkParams | Module | Mapping | None) –

    Type of neural network to use or a specific network (or state) to use

  • data_scale_bias (DataScaleBias | None, default: None ) –

    Scale and bias of the input data, by default None

  • reg_val (float | None, default: None ) –

    Regularization value, by default 1e-5

  • device (str, default: 'cuda' if is_available() else 'cpu' ) –

    Device to use, by default "cuda" if cuda is available, otherwise "cpu"

  • save_epochs_dir (str | None, default: None ) –

    Directory where to save network states at each epoch. If None disabled, by default None

  • verbose (bool, default: True ) –

    Whether to produce verbose output, by default True

Methods:

  • infer

    Inference, given an initial stack of images.

  • prepare_data

    Prepare input data for training.

  • train

    Supervised training.

Attributes:

  • n_dims (int) –

    Returns the expected signal dimensions.

Source code in src/autoden/algorithms/denoiser.py
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
def __init__(
    self,
    model: int | str | NetworkParams | pt.nn.Module | Mapping,
    data_scale_bias: DataScaleBias | None = None,
    reg_val: float | LossRegularizer | None = None,
    device: str = "cuda" if pt.cuda.is_available() else "cpu",
    batch_size: int | None = None,
    augmentation: str | Sequence[str] | None = None,
    save_epochs_dir: str | None = None,
    verbose: bool = True,
) -> None:
    """Initialize the noise2noise method.

    Parameters
    ----------
    model : str | NetworkParams | pt.nn.Module | Mapping | None
        Type of neural network to use or a specific network (or state) to use
    data_scale_bias : DataScaleBias | None, optional
        Scale and bias of the input data, by default None
    reg_val : float | None, optional
        Regularization value, by default 1e-5
    device : str, optional
        Device to use, by default "cuda" if cuda is available, otherwise "cpu"
    save_epochs_dir : str | None, optional
        Directory where to save network states at each epoch.
        If None disabled, by default None
    verbose : bool, optional
        Whether to produce verbose output, by default True
    """
    if isinstance(model, int):
        if self.save_epochs_dir is None:
            raise ValueError("Directory for saving epochs not specified")

        model = load_model_state(self.save_epochs_dir, epoch_num=model)

    if isinstance(model, (str, NetworkParams, Mapping, pt.nn.Module)):
        self.model = create_network(model, device=device)
    else:
        raise ValueError(f"Invalid model {type(model)}")
    if verbose:
        get_num_parameters(self.model, verbose=True)

    if augmentation is None:
        augmentation = []
    elif isinstance(augmentation, str):
        augmentation = [augmentation.lower()]
    elif isinstance(augmentation, Sequence):
        augmentation = [str(a).lower() for a in augmentation]

    self.data_sb = data_scale_bias

    self.reg_val = reg_val
    self.device = device
    self.batch_size = batch_size
    self.augmentation = augmentation
    self.save_epochs_dir = save_epochs_dir
    self.verbose = verbose

n_dims property

n_dims: int

Returns the expected signal dimensions.

If the model is an instance of SerializableModel and has an init_params attribute containing the key "n_dims", this property returns the value associated with "n_dims". Otherwise, it defaults to 2.

Returns:

  • int

    The expected signal dimensions.

infer

infer(inp: NDArray) -> NDArray

Inference, given an initial stack of images.

Parameters:

  • inp (NDArray) –

    The input stack of images

Returns:

  • NDArray

    The denoised stack of images

Source code in src/autoden/algorithms/denoiser.py
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
def infer(self, inp: NDArray) -> NDArray:
    """Inference, given an initial stack of images.

    Parameters
    ----------
    inp : NDArray
        The input stack of images

    Returns
    -------
    NDArray
        The denoised stack of images
    """
    # Rescale input
    if self.data_sb is not None:
        inp = inp * self.data_sb.scale_inp - self.data_sb.bias_inp

    inp_t = data_to_tensor(inp, device=self.device, n_dims=self.n_dims)

    self.model.eval()
    with pt.inference_mode():
        out_t: pt.Tensor = self.model(inp_t)
        output = out_t.squeeze(dim=(0, 1)).to("cpu").numpy()

    # Rescale output
    if self.data_sb is not None:
        output = (output + self.data_sb.bias_out) / self.data_sb.scale_out

    return output

prepare_data

prepare_data(
    inp: NDArray,
    tgt: NDArray,
    num_tst_ratio: float = 0.2,
    strategy: str = "pixel-mask",
) -> tuple[NDArray, NDArray, NDArray | list[int]]

Prepare input data for training.

Parameters:

  • inp (NDArray) –

    The input data to be used for training. This should be a NumPy array of shape (N, [D, H], W), where N is the number of samples, and D, H and W are the depth, height and width of each sample, respectively.

  • tgt (NDArray) –

    The target data to be used for training. This should be a NumPy array of shape (N, [D, H], W), where N is the number of samples, and D, H and W are the depth, height and width of each sample, respectively.

  • num_tst_ratio (float, default: 0.2 ) –

    The ratio of the input data to be used for testing. The remaining data will be used for training. Default is 0.2.

  • strategy (str, default: 'pixel-mask' ) –

    The strategy to be used for creating training and testing sets. The available strategies are: - "pixel-mask": Use randomly chosen pixels in the images as test set. - "self-similar": Use entire randomly chosen images as test set. Default is "pixel-mask".

Returns:

  • tuple[NDArray, NDArray, NDArray]

    A tuple containing: - The input data array. - The target data array. - Either the mask array indicating the testing pixels or the list of test indices.

Source code in src/autoden/algorithms/supervised.py
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
def prepare_data(
    self, inp: NDArray, tgt: NDArray, num_tst_ratio: float = 0.2, strategy: str = "pixel-mask"
) -> tuple[NDArray, NDArray, NDArray | list[int]]:
    """
    Prepare input data for training.

    Parameters
    ----------
    inp : NDArray
        The input data to be used for training. This should be a NumPy array of shape (N, [D, H], W), where N is the
        number of samples, and D, H and W are the depth, height and width of each sample, respectively.
    tgt : NDArray
        The target data to be used for training. This should be a NumPy array of shape (N, [D, H], W), where N is the
        number of samples, and D, H and W are the depth, height and width of each sample, respectively.
    num_tst_ratio : float, optional
        The ratio of the input data to be used for testing. The remaining data will be used for training.
        Default is 0.2.
    strategy : str, optional
        The strategy to be used for creating training and testing sets. The available strategies are:
        - "pixel-mask": Use randomly chosen pixels in the images as test set.
        - "self-similar": Use entire randomly chosen images as test set.
        Default is "pixel-mask".

    Returns
    -------
    tuple[NDArray, NDArray, NDArray]
        A tuple containing:
        - The input data array.
        - The target data array.
        - Either the mask array indicating the testing pixels or the list of test indices.
    """
    if inp.ndim < self.n_dims:
        raise ValueError(f"Target data should at least be of {self.n_dims} dimensions, but its shape is {inp.shape}")

    num_imgs = inp.shape[0]
    if tgt.ndim == (inp.ndim - 1):
        tgt = np.tile(tgt[None, ...], [num_imgs, *np.ones_like(tgt.shape)])

    if inp.shape != tgt.shape:
        raise ValueError(
            f"Input and target data must have the same shape. Input shape: {inp.shape}, Target shape: {tgt.shape}"
        )

    if strategy.lower() == "pixel-mask":
        mask_tst = get_random_pixel_mask(inp.shape, mask_pixel_ratio=num_tst_ratio)
    elif strategy.lower() == "self-similar":
        mask_tst = get_random_image_indices(num_imgs, num_tst_ratio=num_tst_ratio)
    else:
        raise ValueError(f"Strategy {strategy} not implemented. Please choose one of: ['pixel-mask', 'self-similar']")

    return inp, tgt, mask_tst

train

train(
    inp: NDArray,
    tgt: NDArray,
    tst_inds: Sequence[int] | NDArray,
    epochs: int,
    learning_rate: float = 0.001,
    optimizer: str = "adam",
    lower_limit: float | NDArray | None = None,
    restarts: int | None = None,
    accum_grads: bool = False,
) -> dict[str, NDArray]

Supervised training.

Parameters:

  • inp (NDArray) –

    The input images

  • tgt (NDArray) –

    The target images

  • tst_inds (Sequence[int] | NDArray) –

    The validation set indices (either image indices if Sequence[int] or pixel indices if NDArray)

  • epochs (int) –

    Number of training epochs

  • learning_rate (float, default: 0.001 ) –

    The learning rate for the optimizer. Default is 1e-3.

  • optimizer (str, default: 'adam' ) –

    The optimization algorithm to be used for training. Default is "adam".

  • lower_limit (float | NDArray | None, default: None ) –

    The lower limit for the input data. If provided, the input data will be clipped to this limit. Default is None.

  • restarts (int | None, default: None ) –

    The number of times to restart the cosine annealing of the learning rate. If provided, the cosine annealing of the learning rate will be restarted the specified number of times. Default is None.

  • accum_grads (bool, default: False ) –

    Whether to accumulate gradients over multiple batches. If True, gradients will be accumulated over multiple batches before updating the model parameters. Default is False.

Source code in src/autoden/algorithms/supervised.py
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
def train(
    self,
    inp: NDArray,
    tgt: NDArray,
    tst_inds: Sequence[int] | NDArray,
    epochs: int,
    learning_rate: float = 1e-3,
    optimizer: str = "adam",
    lower_limit: float | NDArray | None = None,
    restarts: int | None = None,
    accum_grads: bool = False,
) -> dict[str, NDArray]:
    """Supervised training.

    Parameters
    ----------
    inp : NDArray
        The input images
    tgt : NDArray
        The target images
    tst_inds : Sequence[int] | NDArray
        The validation set indices (either image indices if Sequence[int] or pixel indices if NDArray)
    epochs : int
        Number of training epochs
    learning_rate : float, optional
        The learning rate for the optimizer. Default is 1e-3.
    optimizer : str, optional
        The optimization algorithm to be used for training. Default is "adam".
    lower_limit : float | NDArray | None, optional
        The lower limit for the input data. If provided, the input data will be clipped to this limit.
        Default is None.
    restarts : int | None, optional
        The number of times to restart the cosine annealing of the learning rate. If provided, the cosine annealing
        of the learning rate will be restarted the specified number of times. Default is None.
    accum_grads : bool, optional
        Whether to accumulate gradients over multiple batches. If True, gradients will be accumulated over multiple
        batches before updating the model parameters. Default is False.
    """
    num_imgs = inp.shape[0]

    if self.data_sb is None:
        self.data_sb = compute_scaling_supervised(inp, tgt)

    # Rescale the datasets
    inp = inp * self.data_sb.scale_inp - self.data_sb.bias_inp
    tgt = tgt * self.data_sb.scale_tgt - self.data_sb.bias_tgt

    inp = inp.astype(np.float32)
    tgt = tgt.astype(np.float32)

    reg = self._get_regularization()

    if isinstance(tst_inds, Sequence):
        tst_inds = np.array(tst_inds, dtype=int)
        if np.any(tst_inds < 0) or np.any(tst_inds >= num_imgs):
            raise ValueError(
                "Each cross-validation index should be greater or equal than 0,"
                f" and less than the number of images {num_imgs}"
            )
        trn_inds = np.delete(np.arange(num_imgs), obj=tst_inds)

        # Create datasets
        dset_trn = (inp[trn_inds], tgt[trn_inds])
        dset_tst = (inp[tst_inds], tgt[tst_inds])

        losses = self._train_selfsimilar_batched(
            dset_trn,
            dset_tst,
            epochs=epochs,
            learning_rate=learning_rate,
            optimizer=optimizer,
            regularizer=reg,
            lower_limit=lower_limit,
            restarts=restarts,
            accum_grads=accum_grads,
        )
    elif isinstance(tst_inds, np.ndarray):
        losses = self._train_pixelmask_batched(
            inp,
            tgt,
            tst_inds,
            epochs=epochs,
            learning_rate=learning_rate,
            optimizer=optimizer,
            regularizer=reg,
            lower_limit=lower_limit,
            restarts=restarts,
            accum_grads=accum_grads,
        )
    else:
        raise ValueError(
            "`tst_inds` should either be a Sequence[int] or NDArray. Please use the the `prepare_data` function if unsure."
        )

    if self.verbose:
        self._plot_loss_curves(losses, f"Supervised {optimizer.upper()}")

    return losses