Skip to content

declearn.main.privacy.DPTrainingManager

Bases: TrainingManager

TrainingManager subclass adding Differential Privacy mechanisms.

This class extends the base TrainingManager class in three key ways:

  • Perform per-sample gradients clipping (through the Model API), parametrized by the added, optional sclip_norm attribute.
  • Add noise to batch-averaged gradients at each step of training, calibrated from an (epsilon, delta) DP budget and the planned training computational effort (number of steps, sample rate...).
  • Keep track of the spent privacy budget during training, and block training once the monitored budget is fully spent (early-stop the training routine if the next step would result in over-spending).

This TrainingManager therefore implements the differentially-private stochastic gradient descent algorithm (DP-SGD) [1] algorithm, in a modular fashion that enables using any kind of optimizer plug-in supported by its (non-DP) parent.

References

[1] Abadi et al, 2016. Deep Learning with Differential Privacy. https://arxiv.org/abs/1607.00133

Source code in declearn/main/privacy/_dp_trainer.py
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
class DPTrainingManager(TrainingManager):
    """TrainingManager subclass adding Differential Privacy mechanisms.

    This class extends the base TrainingManager class in three key ways:

    * Perform per-sample gradients clipping (through the Model API),
      parametrized by the added, optional `sclip_norm` attribute.
    * Add noise to batch-averaged gradients at each step of training,
      calibrated from an (epsilon, delta) DP budget and the planned
      training computational effort (number of steps, sample rate...).
    * Keep track of the spent privacy budget during training, and block
      training once the monitored budget is fully spent (early-stop the
      training routine if the next step would result in over-spending).

    This TrainingManager therefore implements the differentially-private
    stochastic gradient descent algorithm (DP-SGD) [1] algorithm, in a
    modular fashion that enables using any kind of optimizer plug-in
    supported by its (non-DP) parent.

    References
    ----------
    [1] Abadi et al, 2016.
        Deep Learning with Differential Privacy.
        https://arxiv.org/abs/1607.00133
    """

    def __init__(
        self,
        model: Model,
        optim: Optimizer,
        aggrg: Aggregator,
        train_data: Dataset,
        valid_data: Optional[Dataset] = None,
        metrics: Union[MetricSet, List[MetricInputType], None] = None,
        logger: Union[logging.Logger, str, None] = None,
        verbose: bool = True,
    ) -> None:
        # inherited signature; pylint: disable=too-many-arguments
        super().__init__(
            model=model,
            optim=optim,
            aggrg=aggrg,
            train_data=train_data,
            valid_data=valid_data,
            metrics=metrics,
            logger=logger,
            verbose=verbose,
        )
        # Add DP-related fields: accountant, clipping norm and budget.
        self.accountant = None  # type: Optional[IAccountant]
        self.sclip_norm = None  # type: Optional[float]
        self._dp_budget = (0.0, 0.0)
        self._dp_states = None  # type: Optional[Tuple[float, float]]

    def make_private(
        self,
        message: messaging.PrivacyRequest,
    ) -> None:
        """Set up the use of DP-SGD based on a received PrivacyRequest.

        Parameters
        ----------
        message: PrivacyRequest
            PrivacyRequest message specifying the privacy budget, type
            of accountant and expected use of the training data.
        """
        # REVISE: add support for fixed requested noise multiplier
        # Compute the noise multiplier to use based on the budget
        # and the planned training duration and parameters.
        noise_multiplier = self._fit_noise_multiplier(
            budget=message.budget,
            n_samples=self.train_data.get_data_specs().n_samples,
            batch_size=message.batches["batch_size"],
            n_round=message.rounds,
            n_epoch=message.n_epoch,
            n_steps=message.n_steps,
            accountant=message.accountant,
            drop_remainder=message.batches.get("drop_remainder", True),
        )
        # Add a gaussian noise addition module to the optimizer's pipeline.
        noise_module = GaussianNoiseModule(
            std=noise_multiplier * message.sclip_norm,
            safe_mode=message.use_csprng,
            seed=message.seed,
        )
        self.optim.modules.insert(0, noise_module)
        # Create an accountant and store the clipping norm and privacy budget.
        self.accountant = create_accountant(message.accountant)
        self.sclip_norm = message.sclip_norm
        self._dp_budget = message.budget

    def _fit_noise_multiplier(
        self,
        budget: Tuple[float, float],
        n_samples: int,
        batch_size: int,
        n_round: int,
        n_epoch: Optional[int] = None,
        n_steps: Optional[int] = None,
        accountant: str = "rdp",
        drop_remainder: bool = True,
    ) -> float:
        """Parametrize a DP noise multiplier based on a training schedule."""
        # arguments are all required; pylint: disable=too-many-arguments
        # Compute the expected number of batches per epoch.
        n_batches = n_samples // batch_size
        if not drop_remainder:
            n_batches += bool(n_batches % batch_size)
        # Compute the total number of steps that will be performed.
        steps = n_round
        if n_epoch and n_steps:
            steps *= min(n_steps, n_epoch * n_batches)
        elif n_steps:  # i.e. n_epoch is None
            steps *= n_steps
        elif n_epoch:  # i.e. n_steps is None
            steps *= n_epoch * n_batches
        else:  # i.e. both None: then default n_epoch=1 is used
            steps *= n_batches
            if n_epoch is None:
                self.logger.warning(
                    "Both `n_epoch` and `n_steps` are None in the received "
                    "PrivacyRequest. As a result, the noise used for DP is "
                    "calibrated assuming `n_epoch=1` per round, which might "
                    "be wrong, and result in under- or over-spending the "
                    "privacy budget during the actual training rounds."
                )
        # Use the former information to choose the noise multiplier.
        return get_noise_multiplier(
            target_epsilon=budget[0],
            target_delta=budget[1],
            sample_rate=batch_size / n_samples,
            steps=steps,
            accountant=accountant,
        )

    def get_noise_multiplier(self) -> Optional[float]:
        """Return the noise multiplier used for DP-SGD, if any.

        Returns
        -------
        noise_multiplier: float or None
            Standard deviation of the gaussian noise-addition module
            placed at the start of the wrapped optimizer's pipeline,
            if one is indeed present.
        """
        if self.optim.modules:
            if isinstance(self.optim.modules[0], GaussianNoiseModule):
                return self.optim.modules[0].std / (self.sclip_norm or 1.0)
        return None

    def get_privacy_spent(self) -> Tuple[float, float]:
        """Return the (epsilon, delta) privacy budget spent so far.

        Returns
        -------
        epsilon: float
            epsilon component of the privacy budget spent.
        delta: float
            delta component of the privacy budget spent.
        """
        if self.accountant is None:
            raise RuntimeError("Cannot return spent privacy: DP is not used.")
        delta = self._dp_budget[1]
        epsilon = self.accountant.get_epsilon(delta=delta)
        return epsilon, delta

    def _run_train_step(
        self,
        batch: Batch,
    ) -> None:
        # Optionally have the DP accountant authorize or prevent the step.
        # Note that once the step is authorized, it is also accounted for.
        self._prevent_budget_overspending()
        # Use fixed-threshold sample-wise gradients clipping, in addition
        # to all the features implemented at the parent level.
        # Note: in the absence of `make_private`, no clipping is performed.
        self.optim.run_train_step(self.model, batch, sclip=self.sclip_norm)

    def _prevent_budget_overspending(self) -> None:
        """Raise a StopIteration if a step would overspend the DP budget.

        This method relies on the private attribute `_dp_states` to have
        been properly set as part of the `_training_round` routine.
        """
        if self.accountant is not None and self._dp_states is not None:
            noise, srate = self._dp_states
            self.accountant.step(noise_multiplier=noise, sample_rate=srate)
            if self.get_privacy_spent()[0] > self._dp_budget[0]:
                # Remove the step from the history as it will not be taken.
                last = self.accountant.history.pop(-1)
                if last[-1] > 1:  # number of steps with that (noise, srate)
                    last = (last[0], last[1], last[2] - 1)
                    self.accountant.history.append(last)
                # Prevent the step from being taken.
                raise StopIteration(
                    "Local DP budget would be exceeded by taking the next "
                    "training step."
                )

    def _training_round(
        self,
        message: messaging.TrainRequest,
    ) -> messaging.TrainReply:
        # When using differential privacy, store accountant-required values.
        if self.accountant is not None:
            n_smp = self.train_data.get_data_specs().n_samples
            srate = message.batches["batch_size"] / n_smp  # type: float
            noise = self.get_noise_multiplier()
            if noise is None:
                raise RuntimeError(
                    "Noise multiplier not found: something is wrong with "
                    "the local DP setup."
                )
            self._dp_states = (noise, srate)
        # Delegate all of the actual training routine to the parent class.
        # DP budget saturation will cause training to be interrupted.
        reply = super()._training_round(message)
        # When using DP, clean up things and log about the spent budget.
        if self.accountant is not None:
            self._dp_states = None  # remove now out-of-scope state values
            self.logger.info(
                "Local DP budget spent at the end of the round: %s",
                self.get_privacy_spent(),
            )
        return reply

get_noise_multiplier()

Return the noise multiplier used for DP-SGD, if any.

Returns:

Name Type Description
noise_multiplier float or None

Standard deviation of the gaussian noise-addition module placed at the start of the wrapped optimizer's pipeline, if one is indeed present.

Source code in declearn/main/privacy/_dp_trainer.py
177
178
179
180
181
182
183
184
185
186
187
188
189
190
def get_noise_multiplier(self) -> Optional[float]:
    """Return the noise multiplier used for DP-SGD, if any.

    Returns
    -------
    noise_multiplier: float or None
        Standard deviation of the gaussian noise-addition module
        placed at the start of the wrapped optimizer's pipeline,
        if one is indeed present.
    """
    if self.optim.modules:
        if isinstance(self.optim.modules[0], GaussianNoiseModule):
            return self.optim.modules[0].std / (self.sclip_norm or 1.0)
    return None

get_privacy_spent()

Return the (epsilon, delta) privacy budget spent so far.

Returns:

Name Type Description
epsilon float

epsilon component of the privacy budget spent.

delta float

delta component of the privacy budget spent.

Source code in declearn/main/privacy/_dp_trainer.py
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
def get_privacy_spent(self) -> Tuple[float, float]:
    """Return the (epsilon, delta) privacy budget spent so far.

    Returns
    -------
    epsilon: float
        epsilon component of the privacy budget spent.
    delta: float
        delta component of the privacy budget spent.
    """
    if self.accountant is None:
        raise RuntimeError("Cannot return spent privacy: DP is not used.")
    delta = self._dp_budget[1]
    epsilon = self.accountant.get_epsilon(delta=delta)
    return epsilon, delta

make_private(message)

Set up the use of DP-SGD based on a received PrivacyRequest.

Parameters:

Name Type Description Default
message messaging.PrivacyRequest

PrivacyRequest message specifying the privacy budget, type of accountant and expected use of the training data.

required
Source code in declearn/main/privacy/_dp_trainer.py
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
def make_private(
    self,
    message: messaging.PrivacyRequest,
) -> None:
    """Set up the use of DP-SGD based on a received PrivacyRequest.

    Parameters
    ----------
    message: PrivacyRequest
        PrivacyRequest message specifying the privacy budget, type
        of accountant and expected use of the training data.
    """
    # REVISE: add support for fixed requested noise multiplier
    # Compute the noise multiplier to use based on the budget
    # and the planned training duration and parameters.
    noise_multiplier = self._fit_noise_multiplier(
        budget=message.budget,
        n_samples=self.train_data.get_data_specs().n_samples,
        batch_size=message.batches["batch_size"],
        n_round=message.rounds,
        n_epoch=message.n_epoch,
        n_steps=message.n_steps,
        accountant=message.accountant,
        drop_remainder=message.batches.get("drop_remainder", True),
    )
    # Add a gaussian noise addition module to the optimizer's pipeline.
    noise_module = GaussianNoiseModule(
        std=noise_multiplier * message.sclip_norm,
        safe_mode=message.use_csprng,
        seed=message.seed,
    )
    self.optim.modules.insert(0, noise_module)
    # Create an accountant and store the clipping norm and privacy budget.
    self.accountant = create_accountant(message.accountant)
    self.sclip_norm = message.sclip_norm
    self._dp_budget = message.budget