Source code for opacus.scripts.compute_dp_sgd_privacy

#!/usr/bin/env python3
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Command-line script for computing privacy of a model trained with DP-SGD.
The script applies the RDP accountant to estimate privacy budget of an iterated
Sampled Gaussian Mechanism.

The code is mainly based on Google's TF Privacy:
https://github.com/tensorflow/privacy/blob/master/tensorflow_privacy/privacy/analysis/compute_dp_sgd_privacy.py


Example:

    To call this script from command line, you can enter:

    $ python -m opacus.scripts.compute_dp_sgd_privacy --epochs=3 --delta=1e-5 --sample-rate 0.01 --noise-multiplier 1.0 --alphas 2 5 10 20 100

    DP-SGD with
    - sampling rate = 1%,
    - noise_multiplier = 1.0,
    - iterated over 300 steps

    satisfies differential privacy with
    - epsilon = 2.39,
    - delta = 1e-05.

    The optimal alpha is 5.0.
"""
import argparse
import math
from typing import List, Tuple

from opacus.accountants.analysis.rdp import compute_rdp, get_privacy_spent


def _apply_dp_sgd_analysis(
    *,
    sample_rate: float,
    noise_multiplier: float,
    steps: int,
    alphas: List[float],
    delta: float,
    verbose: bool = True,
) -> Tuple[float, float]:
    """
    Computes the privacy Epsilon at a given delta via RDP accounting and
    converting to an (epsilon, delta) guarantee for a target Delta.

    Args:
        sample_rate : The sample rate in SGD
        noise_multiplier : The ratio of the standard deviation of the Gaussian
            noise to the L2-sensitivity of the function to which the noise is added
        steps : The number of steps
        alphas : A list of RDP orders
        delta : Target delta
        verbose : If enabled, will print the results of DP-SGD analysis

    Returns:
        Pair of privacy loss epsilon and optimal order alpha
    """
    rdp = compute_rdp(
        q=sample_rate, noise_multiplier=noise_multiplier, steps=steps, orders=alphas
    )
    eps, opt_alpha = get_privacy_spent(orders=alphas, rdp=rdp, delta=delta)

    if verbose:
        print(
            f"DP-SGD with\n\tsampling rate = {100 * sample_rate:.3g}%,"
            f"\n\tnoise_multiplier = {noise_multiplier},"
            f"\n\titerated over {steps} steps,\nsatisfies "
            f"differential privacy with\n\tepsilon = {eps:.3g},"
            f"\n\tdelta = {delta}."
            f"\nThe optimal alpha is {opt_alpha}."
        )

        if opt_alpha == max(alphas) or opt_alpha == min(alphas):
            print(
                "The privacy estimate is likely to be improved by expanding "
                "the set of alpha orders."
            )
    return eps, opt_alpha


[docs] def compute_dp_sgd_privacy( *, sample_rate: float, noise_multiplier: float, epochs: int, delta: float, alphas: List[float], verbose: bool = True, ) -> Tuple[float, float]: """ Performs the DP-SGD privacy analysis. Finds sample rate and number of steps based on the input parameters, and calls DP-SGD privacy analysis to find the privacy loss epsilon and optimal order alpha. Args: sample_rate : probability of each sample from the dataset to be selected for a next batch noise_multiplier : The ratio of the standard deviation of the Gaussian noise to the L2-sensitivity of the function to which the noise is added epochs : Number of epochs delta : Target delta alphas : A list of RDP orders verbose : If enabled, will print the results of DP-SGD analysis Returns: Pair of privacy loss epsilon and optimal order alpha Raises: ValueError When batch size is greater than sample size """ if sample_rate > 1: raise ValueError("sample_rate must be no greater than 1") steps = epochs * math.ceil(1 / sample_rate) return _apply_dp_sgd_analysis( sample_rate=sample_rate, noise_multiplier=noise_multiplier, steps=steps, alphas=alphas, delta=delta, verbose=verbose, )
def main(): parser = argparse.ArgumentParser( description="Estimate privacy of a model trained with DP-SGD using RDP accountant", ) parser.add_argument( "-r", "--sample-rate", type=float, required=True, help="Input sample rate (probability of each sample from the dataset to be selected for a next batch)", ) parser.add_argument( "-n", "--noise-multiplier", type=float, required=True, help="Noise multiplier", ) parser.add_argument( "-e", "--epochs", type=int, required=True, help="Number of epochs to train", ) parser.add_argument( "-d", "--delta", type=float, default=1e-5, help="Targeted delta (default: 1e-5)" ) parser.add_argument( "-a", "--alphas", action="store", dest="alphas", type=float, nargs="+", default=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)), help="List of alpha values (alpha orders of Renyi-DP evaluation). " "A default list is provided. Else, space separated numbers. E.g.," "-a 10 100", ) args = parser.parse_args() compute_dp_sgd_privacy( sample_rate=args.sample_rate, noise_multiplier=args.noise_multiplier, epochs=args.epochs, delta=args.delta, alphas=args.alphas, ) if __name__ == "__main__": main()