F# Data


F# Data: Anonymizing JSON

This tutorial shows how to implement an anonymizer for a JSON document (represented using the JsonValue type discussed in JSON parser article) This functionality is not directly available in the F# Data library, but it can be very easily implemented by recursively walking over the JSON document.

If you want to use the JSON anonymizer in your code, you can copy the source from GitHub and just include it in your project. If you use these functions often and would like to see them in the F# Data library, please submit a feature request.

DISCLAIMER: Don't use this for sensitive data as it's just a sample

 1: 
 2: 
 3: 
 4: 
 5: 
 6: 
 7: 
 8: 
 9: 
10: 
11: 
12: 
13: 
14: 
15: 
16: 
17: 
18: 
19: 
20: 
21: 
22: 
23: 
24: 
25: 
26: 
27: 
28: 
29: 
30: 
31: 
32: 
33: 
34: 
35: 
36: 
37: 
38: 
39: 
40: 
41: 
42: 
43: 
44: 
45: 
46: 
47: 
48: 
49: 
50: 
51: 
52: 
53: 
54: 
55: 
56: 
57: 
58: 
59: 
60: 
61: 
62: 
63: 
64: 
65: 
66: 
67: 
68: 
69: 
#r "../../../bin/FSharp.Data.dll"
open System
open System.Globalization
open FSharp.Data

type JsonAnonymizer(?propertiesToSkip, ?valuesToSkip) = 

    let propertiesToSkip = Set.ofList (defaultArg propertiesToSkip [])
    let valuesToSkip = Set.ofList (defaultArg valuesToSkip [])

    let rng = Random()

    let digits = [| '0' .. '9' |]
    let lowerLetters = [| 'a' .. 'z' |]
    let upperLetters = [| 'A' .. 'Z' |]

    let getRandomChar (c:char) =
        if Char.IsDigit c then digits.[rng.Next(10)]
        elif Char.IsLetter c then
            if Char.IsLower c
            then lowerLetters.[rng.Next(26)]
            else upperLetters.[rng.Next(26)]
        else c

    let randomize (str:string) =
        String(str.ToCharArray() |> Array.map getRandomChar)

    let rec anonymize json =
        match json with
        | JsonValue.String s when valuesToSkip.Contains s -> json
        | JsonValue.String s ->
            let typ = Runtime.StructuralInference.inferPrimitiveType CultureInfo.InvariantCulture s
            if typ = typeof<Guid> then Guid.NewGuid().ToString()
            elif typ = typeof<Runtime.StructuralTypes.Bit0> || typ = typeof<Runtime.StructuralTypes.Bit1> then s
            elif typ = typeof<DateTime> then s
            else 
                let prefix, s =
                    if s.StartsWith "http://" then "http://", s.Substring("http://".Length)
                    elif s.StartsWith "https://" then "https://", s.Substring("https://".Length)
                    else "", s
                prefix + randomize s
            |> JsonValue.String
        | JsonValue.Number d -> 
            let typ = Runtime.StructuralInference.inferPrimitiveType CultureInfo.InvariantCulture (d.ToString())
            if typ = typeof<Runtime.StructuralTypes.Bit0> || typ = typeof<Runtime.StructuralTypes.Bit1>
            then json
            else d.ToString() |> randomize |> Decimal.Parse |> JsonValue.Number
        | JsonValue.Float f -> 
            f.ToString()
            |> randomize 
            |> Double.Parse 
            |> JsonValue.Float
        | JsonValue.Boolean _  | JsonValue.Null -> json
        | JsonValue.Record props -> 
            props 
            |> Array.map (fun (key, value) -> key, if propertiesToSkip.Contains key then value else anonymize value)
            |> JsonValue.Record
        | JsonValue.Array array -> 
            array 
            |> Array.map anonymize 
            |> JsonValue.Array

    member __.Anonymize json = anonymize json

let json = JsonValue.Load (__SOURCE_DIRECTORY__ + "../../data/TwitterStream.json")
printfn "%O" json

let anonymizedJson = (JsonAnonymizer ["lang"]).Anonymize json
printfn "%O" anonymizedJson

Related articles

namespace System
namespace System.Globalization
namespace FSharp
namespace FSharp.Data
Multiple items
type JsonAnonymizer =
  new : ?propertiesToSkip:string list * ?valuesToSkip:string list -> JsonAnonymizer
  member Anonymize : json:JsonValue -> JsonValue

Full name: JsonAnonymizer.JsonAnonymizer

--------------------
new : ?propertiesToSkip:string list * ?valuesToSkip:string list -> JsonAnonymizer
val propertiesToSkip : string list option
val valuesToSkip : string list option
val propertiesToSkip : Set<string>
Multiple items
module Set

from Microsoft.FSharp.Collections

--------------------
type Set<'T (requires comparison)> =
  interface IComparable
  interface IEnumerable
  interface IEnumerable<'T>
  interface ICollection<'T>
  new : elements:seq<'T> -> Set<'T>
  member Add : value:'T -> Set<'T>
  member Contains : value:'T -> bool
  override Equals : obj -> bool
  member IsProperSubsetOf : otherSet:Set<'T> -> bool
  member IsProperSupersetOf : otherSet:Set<'T> -> bool
  ...

Full name: Microsoft.FSharp.Collections.Set<_>

--------------------
new : elements:seq<'T> -> Set<'T>
val ofList : elements:'T list -> Set<'T> (requires comparison)

Full name: Microsoft.FSharp.Collections.Set.ofList
val defaultArg : arg:'T option -> defaultValue:'T -> 'T

Full name: Microsoft.FSharp.Core.Operators.defaultArg
val valuesToSkip : Set<string>
val rng : Random
Multiple items
type Random =
  new : unit -> Random + 1 overload
  member Next : unit -> int + 2 overloads
  member NextBytes : buffer:byte[] -> unit
  member NextDouble : unit -> float

Full name: System.Random

--------------------
Random() : unit
Random(Seed: int) : unit
val digits : char []
val lowerLetters : char []
val upperLetters : char []
val getRandomChar : (char -> char)
val c : char
Multiple items
val char : value:'T -> char (requires member op_Explicit)

Full name: Microsoft.FSharp.Core.Operators.char

--------------------
type char = Char

Full name: Microsoft.FSharp.Core.char
type Char =
  struct
    member CompareTo : value:obj -> int + 1 overload
    member Equals : obj:obj -> bool + 1 overload
    member GetHashCode : unit -> int
    member GetTypeCode : unit -> TypeCode
    member ToString : unit -> string + 1 overload
    static val MaxValue : char
    static val MinValue : char
    static member ConvertFromUtf32 : utf32:int -> string
    static member ConvertToUtf32 : highSurrogate:char * lowSurrogate:char -> int + 1 overload
    static member GetNumericValue : c:char -> float + 1 overload
    ...
  end

Full name: System.Char
Char.IsDigit(c: char) : bool
Char.IsDigit(s: string, index: int) : bool
Random.Next() : int
Random.Next(maxValue: int) : int
Random.Next(minValue: int, maxValue: int) : int
Char.IsLetter(c: char) : bool
Char.IsLetter(s: string, index: int) : bool
Char.IsLower(c: char) : bool
Char.IsLower(s: string, index: int) : bool
val randomize : (string -> String)
val str : string
Multiple items
val string : value:'T -> string

Full name: Microsoft.FSharp.Core.Operators.string

--------------------
type string = String

Full name: Microsoft.FSharp.Core.string
Multiple items
type String =
  new : value:char -> string + 7 overloads
  member Chars : int -> char
  member Clone : unit -> obj
  member CompareTo : value:obj -> int + 1 overload
  member Contains : value:string -> bool
  member CopyTo : sourceIndex:int * destination:char[] * destinationIndex:int * count:int -> unit
  member EndsWith : value:string -> bool + 2 overloads
  member Equals : obj:obj -> bool + 2 overloads
  member GetEnumerator : unit -> CharEnumerator
  member GetHashCode : unit -> int
  ...

Full name: System.String

--------------------
String(value: nativeptr<char>) : unit
String(value: nativeptr<sbyte>) : unit
String(value: char []) : unit
String(c: char, count: int) : unit
String(value: nativeptr<char>, startIndex: int, length: int) : unit
String(value: nativeptr<sbyte>, startIndex: int, length: int) : unit
String(value: char [], startIndex: int, length: int) : unit
String(value: nativeptr<sbyte>, startIndex: int, length: int, enc: Text.Encoding) : unit
String.ToCharArray() : char []
String.ToCharArray(startIndex: int, length: int) : char []
type Array =
  member Clone : unit -> obj
  member CopyTo : array:Array * index:int -> unit + 1 overload
  member GetEnumerator : unit -> IEnumerator
  member GetLength : dimension:int -> int
  member GetLongLength : dimension:int -> int64
  member GetLowerBound : dimension:int -> int
  member GetUpperBound : dimension:int -> int
  member GetValue : params indices:int[] -> obj + 7 overloads
  member Initialize : unit -> unit
  member IsFixedSize : bool
  ...

Full name: System.Array
val map : mapping:('T -> 'U) -> array:'T [] -> 'U []

Full name: Microsoft.FSharp.Collections.Array.map
val anonymize : (JsonValue -> JsonValue)
val json : JsonValue
type JsonValue =
  | String of string
  | Number of decimal
  | Float of float
  | Record of properties: (string * JsonValue) []
  | Array of elements: JsonValue []
  | Boolean of bool
  | Null
  member Post : uri:string * ?headers:(string * string) list -> HttpResponse
  member Request : uri:string * ?httpMethod:string * ?headers:(string * string) list -> HttpResponse
  member RequestAsync : uri:string * ?httpMethod:string * ?headers:(string * string) list -> Async<HttpResponse>
  override ToString : unit -> string
  member ToString : saveOptions:JsonSaveOptions -> string
  static member AsyncLoad : uri:string * ?cultureInfo:CultureInfo -> Async<JsonValue>
  static member Load : uri:string * ?cultureInfo:CultureInfo -> JsonValue
  static member Load : reader:TextReader * ?cultureInfo:CultureInfo -> JsonValue
  static member Load : stream:Stream * ?cultureInfo:CultureInfo -> JsonValue
  static member Parse : text:string * ?cultureInfo:CultureInfo -> JsonValue
  static member ParseMultiple : text:string * ?cultureInfo:CultureInfo -> seq<JsonValue>
  static member ParseSample : text:string * ?cultureInfo:CultureInfo -> JsonValue

Full name: FSharp.Data.JsonValue
union case JsonValue.String: string -> JsonValue
val s : string
member Set.Contains : value:'T -> bool
val typ : Type
Multiple items
namespace FSharp.Data.Runtime

--------------------
namespace System.Runtime
module StructuralInference

from FSharp.Data.Runtime
val inferPrimitiveType : cultureInfo:CultureInfo -> value:string -> Type

Full name: FSharp.Data.Runtime.StructuralInference.inferPrimitiveType
Multiple items
type CultureInfo =
  new : name:string -> CultureInfo + 3 overloads
  member Calendar : Calendar
  member ClearCachedData : unit -> unit
  member Clone : unit -> obj
  member CompareInfo : CompareInfo
  member CultureTypes : CultureTypes
  member DateTimeFormat : DateTimeFormatInfo with get, set
  member DisplayName : string
  member EnglishName : string
  member Equals : value:obj -> bool
  ...

Full name: System.Globalization.CultureInfo

--------------------
CultureInfo(name: string) : unit
CultureInfo(culture: int) : unit
CultureInfo(name: string, useUserOverride: bool) : unit
CultureInfo(culture: int, useUserOverride: bool) : unit
property CultureInfo.InvariantCulture: CultureInfo
val typeof<'T> : Type

Full name: Microsoft.FSharp.Core.Operators.typeof
Multiple items
type Guid =
  struct
    new : b:byte[] -> Guid + 4 overloads
    member CompareTo : value:obj -> int + 1 overload
    member Equals : o:obj -> bool + 1 overload
    member GetHashCode : unit -> int
    member ToByteArray : unit -> byte[]
    member ToString : unit -> string + 2 overloads
    static val Empty : Guid
    static member NewGuid : unit -> Guid
    static member Parse : input:string -> Guid
    static member ParseExact : input:string * format:string -> Guid
    ...
  end

Full name: System.Guid

--------------------
Guid()
Guid(b: byte []) : unit
Guid(g: string) : unit
Guid(a: int, b: int16, c: int16, d: byte []) : unit
Guid(a: uint32, b: uint16, c: uint16, d: byte, e: byte, f: byte, g: byte, h: byte, i: byte, j: byte, k: byte) : unit
Guid(a: int, b: int16, c: int16, d: byte, e: byte, f: byte, g: byte, h: byte, i: byte, j: byte, k: byte) : unit
Guid.NewGuid() : Guid
namespace FSharp.Data.Runtime.StructuralTypes
Multiple items
union case Runtime.StructuralTypes.Bit0.Bit0: Runtime.StructuralTypes.Bit0

--------------------
type Bit0 = | Bit0

Full name: FSharp.Data.Runtime.StructuralTypes.Bit0
Multiple items
union case Runtime.StructuralTypes.Bit1.Bit1: Runtime.StructuralTypes.Bit1

--------------------
type Bit1 = | Bit1

Full name: FSharp.Data.Runtime.StructuralTypes.Bit1
Multiple items
type DateTime =
  struct
    new : ticks:int64 -> DateTime + 10 overloads
    member Add : value:TimeSpan -> DateTime
    member AddDays : value:float -> DateTime
    member AddHours : value:float -> DateTime
    member AddMilliseconds : value:float -> DateTime
    member AddMinutes : value:float -> DateTime
    member AddMonths : months:int -> DateTime
    member AddSeconds : value:float -> DateTime
    member AddTicks : value:int64 -> DateTime
    member AddYears : value:int -> DateTime
    ...
  end

Full name: System.DateTime

--------------------
DateTime()
   (+0 other overloads)
DateTime(ticks: int64) : unit
   (+0 other overloads)
DateTime(ticks: int64, kind: DateTimeKind) : unit
   (+0 other overloads)
DateTime(year: int, month: int, day: int) : unit
   (+0 other overloads)
DateTime(year: int, month: int, day: int, calendar: Calendar) : unit
   (+0 other overloads)
DateTime(year: int, month: int, day: int, hour: int, minute: int, second: int) : unit
   (+0 other overloads)
DateTime(year: int, month: int, day: int, hour: int, minute: int, second: int, kind: DateTimeKind) : unit
   (+0 other overloads)
DateTime(year: int, month: int, day: int, hour: int, minute: int, second: int, calendar: Calendar) : unit
   (+0 other overloads)
DateTime(year: int, month: int, day: int, hour: int, minute: int, second: int, millisecond: int) : unit
   (+0 other overloads)
DateTime(year: int, month: int, day: int, hour: int, minute: int, second: int, millisecond: int, kind: DateTimeKind) : unit
   (+0 other overloads)
val prefix : string
String.StartsWith(value: string) : bool
String.StartsWith(value: string, comparisonType: StringComparison) : bool
String.StartsWith(value: string, ignoreCase: bool, culture: CultureInfo) : bool
String.Substring(startIndex: int) : string
String.Substring(startIndex: int, length: int) : string
union case JsonValue.Number: decimal -> JsonValue
val d : decimal
Decimal.ToString() : string
Decimal.ToString(provider: IFormatProvider) : string
Decimal.ToString(format: string) : string
Decimal.ToString(format: string, provider: IFormatProvider) : string
Multiple items
type Decimal =
  struct
    new : value:int -> decimal + 7 overloads
    member CompareTo : value:obj -> int + 1 overload
    member Equals : value:obj -> bool + 1 overload
    member GetHashCode : unit -> int
    member GetTypeCode : unit -> TypeCode
    member ToString : unit -> string + 3 overloads
    static val Zero : decimal
    static val One : decimal
    static val MinusOne : decimal
    static val MaxValue : decimal
    ...
  end

Full name: System.Decimal

--------------------
Decimal()
Decimal(value: int) : unit
Decimal(value: uint32) : unit
Decimal(value: int64) : unit
Decimal(value: uint64) : unit
Decimal(value: float32) : unit
Decimal(value: float) : unit
Decimal(bits: int []) : unit
Decimal(lo: int, mid: int, hi: int, isNegative: bool, scale: byte) : unit
Decimal.Parse(s: string) : decimal
Decimal.Parse(s: string, provider: IFormatProvider) : decimal
Decimal.Parse(s: string, style: NumberStyles) : decimal
Decimal.Parse(s: string, style: NumberStyles, provider: IFormatProvider) : decimal
union case JsonValue.Float: float -> JsonValue
val f : float
Double.ToString() : string
Double.ToString(provider: IFormatProvider) : string
Double.ToString(format: string) : string
Double.ToString(format: string, provider: IFormatProvider) : string
type Double =
  struct
    member CompareTo : value:obj -> int + 1 overload
    member Equals : obj:obj -> bool + 1 overload
    member GetHashCode : unit -> int
    member GetTypeCode : unit -> TypeCode
    member ToString : unit -> string + 3 overloads
    static val MinValue : float
    static val MaxValue : float
    static val Epsilon : float
    static val NegativeInfinity : float
    static val PositiveInfinity : float
    ...
  end

Full name: System.Double
Double.Parse(s: string) : float
Double.Parse(s: string, provider: IFormatProvider) : float
Double.Parse(s: string, style: NumberStyles) : float
Double.Parse(s: string, style: NumberStyles, provider: IFormatProvider) : float
union case JsonValue.Boolean: bool -> JsonValue
union case JsonValue.Null: JsonValue
union case JsonValue.Record: properties: (string * JsonValue) [] -> JsonValue
val props : (string * JsonValue) []
val key : string
val value : JsonValue
union case JsonValue.Array: elements: JsonValue [] -> JsonValue
Multiple items
val array : JsonValue []

--------------------
type 'T array = 'T []

Full name: Microsoft.FSharp.Core.array<_>
member JsonAnonymizer.Anonymize : json:JsonValue -> JsonValue

Full name: JsonAnonymizer.JsonAnonymizer.Anonymize
val json : JsonValue

Full name: JsonAnonymizer.json
static member JsonValue.Load : uri:string * ?cultureInfo:CultureInfo -> JsonValue
static member JsonValue.Load : reader:IO.TextReader * ?cultureInfo:CultureInfo -> JsonValue
static member JsonValue.Load : stream:IO.Stream * ?cultureInfo:CultureInfo -> JsonValue
val printfn : format:Printf.TextWriterFormat<'T> -> 'T

Full name: Microsoft.FSharp.Core.ExtraTopLevelOperators.printfn
val anonymizedJson : JsonValue

Full name: JsonAnonymizer.anonymizedJson
Fork me on GitHub