require import AllCore Ring. require import Distr List Aprhl StdRing StdOrder StdBigop. (*---*) import IntID IntOrder RField RealOrder. (* we define two positive constants eps, del that we will use to reason about our privacy budget*) op eps: { real | 0%r <= eps } as ge0_eps. op del: { real | 0%r <= del } as ge0_delta. op gs: { int | 0 <= gs } as ge0_gs. hint exact : ge0_eps. hint exact : ge0_delta. hint exact : ge0_gs. (* used by trivial only if exact *) (* Our first example shows how we can use the laplace distribution to reason about differential privacy for two values that differ by at most one. *) module M = { var x: int proc f (): int = { var r <- 0; r <$ lap eps x; return r; } }. (* An equivalence judgment in aprhl is parametrized by two parameters epsilon and delta. *) lemma lem1 : aequiv [ [eps & 0%r] M.f ~ M.f : (`|M.x{1} - M.x{2}|<= 1) ==> res{2} = res{1} ]. proof. proc. seq 1 1 :(`|M.x{1} - M.x{2}|<= 1 /\ r{1}=r{2} /\ r{1}=0). wp. (* here we would like to apply the skip rule from aprhl but the same result can be achieved by switching to pHL if we guarantee that this doesn't affect the privacy parameters. This is achieved by making them not available in pHL. *) toequiv. auto. (* to prove this we can use the lap tactic which takes two parameters k1 and k2 and generate two subgoals (simplified) 1) k2 * local_eps <= global eps 2) |(M.x{1} - M.x{2})| <= k2 *) lap 0 1. qed. lemma lem1Fail1 : aequiv [ [eps & 0%r] M.f ~ M.f : (`|M.x{1} - M.x{2}|<= 1) ==> res{2} = res{1} ]. proof. proc. seq 1 1 :(`|M.x{1} - M.x{2}|<= 1 /\ r{1}=r{2} /\ r{1}=0). wp. toequiv. auto. lap 0 0. trivial. simplify. (* we cannot prove this *) admit. qed. lemma lem2 : aequiv [ [(2%r * eps) & 0%r] M.f ~ M.f : (`|M.x{1} - M.x{2}|<= 1) ==> res{2} = res{1} ]. proof. proc. seq 1 1 :(`|M.x{1} - M.x{2}|<= 1 /\ r{1}=r{2} /\ r{1}=0). wp. toequiv. auto. lap 0 1. smt. qed. lemma lem3fail : aequiv [ [eps & 0%r] M.f ~ M.f : (`|M.x{1} - M.x{2}|<= 2) ==> res{2} = res{1} ]. proof. proc. seq 1 1 :(`|M.x{1} - M.x{2}|<= 2 /\ r{1}=r{2} /\ r{1}=0). wp. toequiv. auto. lap 0 2. admit. qed. lemma lem3 : aequiv [ [(2%r * eps) & 0%r] M.f ~ M.f : (`|M.x{1} - M.x{2}|<= 2) ==> res{2} = res{1} ]. proof. proc. seq 1 1 :(`|M.x{1} - M.x{2}|<= 2 /\ r{1}=r{2} /\ r{1}=0). wp. toequiv. auto. lap 0 2. qed. module M' = { var x: int proc f (): int = { var r; r <$ lap (eps/2%r) x; return r; } }. lemma lem4 : aequiv [ [eps & 0%r] M'.f ~ M'.f : (`|M'.x{1} - M'.x{2}|<= 2) ==> res{2} = res{1} ]. proof. proc. lap 0 2. smt. qed. (* since we have (eps,0)-DP we can also prove (eps,delta)-DP *) lemma lem1withDelta : aequiv [ [eps & del] M.f ~ M.f : (`|M.x{1} - M.x{2}|<= 1) ==> res{2} = res{1} ]. proof. proc. seq 1 1 :(`|M.x{1} - M.x{2}|<= 1 /\ r{1}=r{2} /\ r{1}=0). wp. toequiv. auto. lap 0 1 => //. qed. (* So far we have assumed that we have only one data point and we know that this is the data that may differ. However, in general we are interested in situations where we have a whole dataset We now want to consider a dataset as a list of integers and consider a notion of adjacency. We will start with some additional notion. *) pred adj (ms ns : int list) = size ms = size ns /\ (exists (i : int), forall (j : int), !(i=j)=> nth 0 ms j = nth 0 ns j). (***************************************) (* we can now consider the Laplace mechanism *) module LapMech = { proc lm (d:int list , q:int list -> int): int = { var r; r <$ lap (eps/gs%r) (q d); return r; } }. lemma lapMech : aequiv [ [eps & 0%r] LapMech.lm ~ LapMech.lm : (adj d{1} d{2} /\ ={q} /\ forall d d', (adj d d') => (`|(q{1} d)-(q{2} d')| <= gs) ) ==> res{2} = res{1} ]. proof. proc. simplify. lap 0 gs. smt. trivial. simplify. smt(). qed. (* we now want to look at composition *) module M1 = { var d: int * int proc f (): int * int = { var r <- 0; var s <- 0; var (z,y)<-d; r <$ lap eps z; s <$ lap eps y; return (r,s); } }. (* we assume that the data are now a pair of integers, and we want to release both of them *) lemma lem5 : aequiv [ [(eps*2%r) & del] M1.f ~ M1.f : (`|M1.d{1}.`1 - M1.d{2}.`1|<= 1 /\ `|M1.d{1}.`2 - M1.d{2}.`2|<= 1) ==> res{2} = res{1}]. proof. proc. seq 3 3 :( `|z{1} - z{2}|<= 1/\ `|y{1} - y{2}|<= 1). wp. toequiv. skip. trivial. seq 1 1 : (r{1}=r{2} /\ `|y{1} - y{2}|<= 1) <[ eps & del ]>. lap 0 1 => //. lap 0 1. smt(). qed. (* if we knew that part of the data is always the same we could still apply Laplace but only pay for the difference *) lemma lem5par : aequiv [ [eps & del] M1.f ~ M1.f : (`|M1.d{1}.`1 - M1.d{2}.`1|<= 1 /\ M1.d{1}.`2 = M1.d{2}.`2) ==> res{2} = res{1}]. proof. proc. seq 3 3 :( `|z{1} - z{2}|<= 1/\ y{1} =y{2}). wp. toequiv. skip. trivial. seq 1 1 : (r{1}=r{2} /\ y{1} = y{2}) <[ eps & del ]>. lap 0 1 => //. lap 0 0. (* we could have just avoid using laplace at all in the algorithm, but the point of this example is to show how the same algorithm can be analyzed in different ways. We will also see that this idea is behind the idea of parallel composition *) qed. (* We now want to consider a dataset as a list of integers and consider a notion of adjacency which guarantees that the element that changes has a bounded impact. We will start with some additional notion. *) module M2 = { proc sum (ls : int list) : int = { var s : int <- 0; var z : int; var i : int; i <- 0; while (i < size ls) { s <- s + (nth 0 ls i); i <- i + 1; } z <$ lap eps s; return z; } }. pred eq_in_range (ms ns : int list, i j : int) = forall (k : int), i <= k <= j => nth 0 ms k = nth 0 ns k. lemma eq_in_range_succ (ms ns : int list, i j : int) : eq_in_range ms ns i j => eq_in_range ms ns (i + 1) j. proof. move => eir_i. rewrite /eq_in_range => k le_iplus1_k_j. rewrite eir_i /#. qed. pred adjacent_e (ms ns : int list) (i:int)= size ms = size ns /\ 0 <= i < size ms /\ eq_in_range ms ns 0 (i - 1) /\ `|nth 0 ms i - nth 0 ns i| <= 1 /\ eq_in_range ms ns (i + 1) (size ms - 1). (* here is our definition of adjacency *) pred adjacent (ms ns : int list) = exists i, adjacent_e ms ns i. (* we can prove some lemma about this. *) lemma size_eq_adjacent (ms ns : int list) : adjacent ms ns => size ms = size ns. proof. rewrite /adjacent. rewrite /adjacent_e => [#]. smt(). qed. lemma adjacent_sub_abs_bound (ms ns : int list, i : int) : adjacent_e ms ns i => 0 <= i < size ms => `|nth 0 ms i - nth 0 ns i| <= 1. proof. rewrite /adjacent_e. smt(). qed. lemma adjacent_ne_sub_eq_after (ms ns : int list, i : int) : adjacent_e ms ns i => 0 <= i < size ms => nth 0 ms i <> nth 0 ns i => eq_in_range ms ns (i + 1) (size ms - 1). proof. rewrite /adjacent_e. smt(). qed. (* we can now prove the previous program differentially private *) lemma lem6 : aequiv [[ eps & 0%r] M2.sum ~ M2.sum : adjacent ls{1} ls{2} ==> res{1} = res{2}]. proof. proc. seq 2 2: (adjacent ls{1} ls{2} /\ ={i, s} /\ i{1} = 0 /\ s{1} = 0). auto. toequiv. auto. seq 1 1 : (`|s{1} - s{2}| <= 1). toequiv. while (adjacent ls{1} ls{2} /\ ={i} /\ 0 <= i{1} <= size ls{1} /\ (! ={s} => `|s{1} - s{2}| <= 1 /\ eq_in_range ls{1} ls{2} i{1} (size ls{1} - 1))). wp. skip. progress. smt(). smt(). smt(adjacent_sub_abs_bound). smt(adjacent_ne_sub_eq_after). smt(size_eq_adjacent). smt(size_eq_adjacent). auto; progress. rewrite size_ge0. smt(size_eq_adjacent). smt(size_eq_adjacent). smt(addzN). lap 0 1. qed. (* the previous example still adds noise only at the end. What shall we do if we want to add noise in an iterative way? Let's consider releasing all the partial sums of a vector: e.g. on input [1,2,3,4,5] we want to release [1,3,6,10,15] *) module M4 = { proc dummy_sum (ls : int list) : int list = { var s :int <- 0; var output : int list; var z : int; var i :int <- 0; output <- []; while (i < size ls ) { s <- s + (nth 0 ls i); i <- i + 1; (* notice that the laplace noise is now added in the loop *) z <$ lap eps s; output <- z :: output; } return output; } }. (* helper lemma *) lemma bigops_help : forall n, 0<=n => eps * n%r = bigi predT (fun (_ : int) => eps) 0 n. proof. move => n H. rewrite sumr_const count_predT size_range ler_maxr. smt(). rewrite intmulr; auto. qed. (* we now want to prove that the budget that we spend depends on the size of the list, that is we want to prove that dummy_sum is n*eps-DP *) lemma dummy_sum n : 0<=n => aequiv [ [ (eps *n%r) & 0%r] M4.dummy_sum ~ M4.dummy_sum : (adjacent ls{1} ls{2} /\ n = size ls{1}) ==> res{2} = res{1} ]. proof. move => H. proc. seq 3 3: (adjacent ls{1} ls{2} /\ ={i, s, output} /\ i{1} = 0 /\ s{1} = 0 /\ 0<= n /\ n = size ls{1} ). toequiv; auto. (* we could try to use the usual while rule but this would not bring us very far. Instead, we use the approximate while rule which takes as additional parameters two functions describing how the privacy budget change at each iteration, and how the number of iterations decreases. *) awhile [ (fun _ => eps) & (fun _ => 0%r) ] n [n-i-1] (adjacent ls{1} ls{2} /\ ={i, output} /\ 0 <= i{1} <= n /\ (! ={s} => `|s{1} - s{2}| <= 1 /\ eq_in_range ls{1} ls{2} i{1} (n - 1)) /\ 0<= n /\ n = size ls{1}); first 3 try (auto; progress;smt(ge0_eps)). smt(bigops_help). rewrite /sumr sumr_const intmulr;auto. move => v. seq 2 2: (adjacent ls{1} ls{2} /\ ={i, output} /\ 0 <= i{1} <= size ls{1} /\ (! ={s} => `|s{1} - s{2}| <= 1 /\ eq_in_range ls{1} ls{2} i{1} (size ls{1} - 1)) /\ 0<= n /\ n = size ls{1} /\ v=n-i{1}). toequiv; auto; progress. smt(). smt(). smt(adjacent_sub_abs_bound). smt(adjacent_ne_sub_eq_after). smt(). seq 1 1: (adjacent ls{1} ls{2} /\ ={i, output} /\ 0 <= i{1} <= size ls{1} /\ (! ={s} => `|s{1} - s{2}| <= 1 /\ eq_in_range ls{1} ls{2} i{1} (size ls{1} - 1)) /\ 0<= n /\ n = size ls{1} /\ v=n-i{1} /\ z{1}=z{2}) <[ eps & 0%r ]>. lap 0 1. auto; progress. smt(). toequiv; auto; progress. smt(). smt(). qed. (* Here another approach where we add noise to each element *) module M3 = { proc noisy_sum (ls : int list) : int list = { var s :int <- 0; var output : int list; var z : int; var i :int <- 0; output <- []; while (i < size ls ) { (* we add noise before summing *) z <$ lap eps (nth 0 ls i); s <- s + z; output <- z :: output; i <- i + 1; } return output; } }. (* other helper lemmas *) lemma count_iota_0 : forall n, count (transpose (=) n) (iota_ 0 n) = 0. proof. move => n. apply count_eq0. rewrite hasPn. move => x. smt(mem_iota). qed. lemma count_iota : forall n j, n=size(iota_ 0 n) => 0<=j< n => (count (fun (i : int) => j = n-i-1) (iota_ 0 n)) = 1. proof. elim/natind. move => n Hn j Hs Hj. smt(size_iota). move => n Hn HI j Hs Hj. rewrite iotaSr. assumption. simplify. rewrite -cats1. rewrite count_cat. rewrite /count. case (j=0) => H. rewrite H. simplify. have H0 : (n+1-n-1=0). smt(). rewrite H0. simplify. have H1 :(fun i => i = n) =(fun i => 0=n+1-i-1). smt(). rewrite -H1. rewrite count_iota_0. smt(). have H0 : (n+1-n-1=0). smt(). rewrite H0 H. simplify. have H1 :(fun i => j-1 = n-i-1) =(fun i => j=n+1-i-1). smt(). rewrite -H1. rewrite HI. smt(size_iota). smt(). smt(). qed. lemma bigi_eps : forall n j, 0<= j < n=> eps = bigi (fun (i : int) => j = n - i - 1) (fun (_ : int) => eps) 0 n. proof. move => ? ? ?. rewrite big_const. rewrite count_iota. smt(size_iota). assumption. smt(iter1). qed. (* We can now prove that that noisy_sum is eps-DP *) lemma noisy_sum n j : 0<= j < n => aequiv [ [ eps & 0%r] M3.noisy_sum ~ M3.noisy_sum : adjacent_e ls{1} ls{2} j /\ n=size ls{1} ==> res{2} = res{1} ]. proof. move => H. proc. seq 3 3: (adjacent_e ls{1} ls{2} j /\ ={i, s, output} /\ i{1} = 0 /\ s{1} = 0 /\ n=size ls{1}). toequiv; auto. (* notice the budget function we are using for epsilon *) awhile [ (fun x => if j=n-x -1 then eps else 0%r ) & (fun _ => 0%r) ] n [n -i-1] (adjacent_e ls{1} ls{2} j /\ ={i, output} /\ 0 <= i{1} <= size ls{1} /\ n=size ls{1} /\ (i{1}=j => `|(nth 0 ls{1} i{1}) - (nth 0 ls{2} i{2}) | <= 1 /\ eq_in_range ls{1} ls{2} (i{1}+1) (size ls{1} - 1)) /\ 0<= size ls{1}); first 4 try (auto; progress;smt(ge0_eps)). search pred1. search iota_. rewrite /bigi. rewrite -big_mkcond. apply bigi_eps => //. rewrite /sumr sumr_const intmulr; smt(). move => k. have H1: (j = n - k - 1)\/ (j <> n - k - 1). smt(). elim H1 => [?|?]. wp. progress. lap 0 1. smt(). progress. smt(). progress. smt(). smt(). smt(). smt(adjacent_sub_abs_bound). smt(adjacent_ne_sub_eq_after). smt(size_eq_adjacent). wp. lap 0 0. smt(). progress. smt(size_eq_adjacent). auto; progress. smt(). smt(). smt(adjacent_sub_abs_bound). smt(adjacent_ne_sub_eq_after). smt(size_eq_adjacent). smt(size_eq_adjacent). qed.