Sample Requests for Protegrity Anonymization

Modify and use the sample requests provided here for anonymizing your dataset. Use these requests as a template or as a guideline for building the required request.

Tree-based Aggregation for Attributes with k-Anonymity

This sample uses the following attributes:

  • Source: Local file system
  • Target: Amazon S3 bucket
  • Data set: 1 Quasi Identifier
  • Suppression: 0.01
  • Privacy Model: K-Anonimity with k value as 50

In this example, the data has custom delimiters.

{
    "source": {
        "type": "File",
        "file": {
            "name": "samples/adult.csv",
            "props": {
                "sep": ";"
            }
        }
    },
    "attributes": [
        {
            "name": "age",
            "dataType": "String",
            "classificationType": "Quasi Identifier",
            "dataTransformationType": "Generalization",
            "generalization": {
                "type": "Masking Based",
                "hierarchyType": "Rule",
                "rule": {
                    "masking": {
                        "maskOrder": "Right To Left",
                        "maskChar": "*",
                        "maxDomainSize": 2
                    }
                }
            }
        }
    ],
    "privacyModel": {
        "k": {
            "kValue": 50
        }
    },
    "config": {
        "maxSuppression": 0.01
    },
    "target": {
        "type": "File",
        "file": {
            "name": "s3://<Your-S3-BucketName>/anon-adult-e1.csv",
            "props": {
                "lineterminator": "\n"
            },
            "accessOptions": {
                "key": "<Your-S3-API Key>",
                "secret": "<Your-S3-API Secret>"
            }
        }
    }
}
#import  the anonsdk library
import anonsdk as asdk
import pandas as pd

# s3 bucket credentials
s3_key = <AWS_Key>
s3_secret = <AWS_Secret>

#set the source path for anonymization
# dataset path
source_csv_path = "adult.csv"
# create Store Object source_datastore
source_datastore = asdk.FileDataStore(source_csv_path)

#Set the target path for anonymized result
# anonymized file path
target_csv_path = "s3://target/anon-adult-e1.csv"
# create Store Object target_datastore
target_datastore = asdk.FileDataStore(target_csv_path, access_options={"key": s3_key,"secret": s3_secret})

# Create connection Object with Rest API server
conn = asdk.Connection("https://anon.protegrity.com/")
df = pd.read_csv(source_csv_path,sep=";")
df.head()

# create AnonObject with connection, dataframe metadata and source path
anon_object = asdk.AnonElement(conn, df, source_datastore)
# configure masking of string datatype
anon_object["age"] = asdk.Gen_Mask(maskchar="*",maskOrder="R",maxLength=2)

#Configure K-anonymity , suppression in the dataset allowed
anon_object.config.k = asdk.K(50)
anon_object.config['maxSuppression'] = 0.01

# Send Anonymization request with Transformation Configuration with the target store
job = asdk.anonymize(anon_object,target_datastore ,force=True)

# check the status of the job <check the status iteratively until  'status': 'Completed' >
job.status()

# check the comparative risk statistics from the source and result dataset
job.riskStat()

# check the comparative utility statistics from the source and result dataset
job.utilityStat()

Tree-based Aggregation for Attributes with k-Anonymity, l-Diversity, and t-Closeness

This sample uses the following attributes:

  • Source: Local file system
  • Target: Amazon S3 bucket
  • Data set: 4 Quasi Identifiers, 2 Sensitive Attributes
  • Suppression: 0.10
  • Privacy Model: K with value 3, T-closeness with value 0.2, and L-diversity with value 2

In this example, for an attribute, the generalization hierarchy is a part of the request.

{
    "source": {
        "type": "File",
        "file": {
            "name": "samples/adult.csv",
            "props": {
                "sep": ";",
                "decimal": ",",
                "quotechar": "\"",
                "escapechar": "\\",
                "encoding": "utf-8"
            }
        }
    },
    "attributes": [
        {
            "name": "marital-status",
            "dataType": "String",
            "classificationType": "Quasi Identifier",
            "dataTransformationType": "Generalization",
            "generalization": {
                "type": "Tree Based",
                "hierarchyType": "Data Store",
                "dataStore": {
                    "type": "File",
                    "format": "CSV",
                    "file": {
                        "name": "samples/hierarchy/adult_hierarchy_marital-status.csv",
                        "props": {
                            "delimiter": ";",
                            "quotechar": "\"",
                            "header": null
                        }
                    }
                }
            }
        },
        {
            "name": "native-country",
            "dataType": "String",
            "classificationType": "Quasi Identifier",
            "dataTransformationType": "Generalization",
            "generalization": {
                "type": "Tree Based",
                "hierarchyType": "Data Store",
                "dataStore": {
                    "type": "File",
                    "format": "CSV",
                    "file": {
                        "name": "samples/hierarchy/adult_hierarchy_native-country.csv",
                        "props": {
                            "delimiter": ";",
                            "quotechar": "\"",
                            "header": null
                        }
                    }
                }
            }
        },
        {
            "name": "occupation",
            "dataType": "String",
            "classificationType": "Quasi Identifier",
            "dataTransformationType": "Generalization",
            "generalization": {
                "type": "Tree Based",
                "hierarchyType": "Data Store",
                "dataStore": {
                    "type": "File",
                    "format": "CSV",
                    "file": {
                        "name": "samples/hierarchy/adult_hierarchy_occupation.csv",
                        "props": {
                            "delimiter": ";",
                            "quotechar": "\"",
                            "header": null
                        }
                    }
                }
            }
        },
        {
            "name": "race",
            "dataType": "String",
            "classificationType": "Quasi Identifier",
            "dataTransformationType": "Generalization",
            "generalization": {
                "type": "Tree Based",
                "hierarchyType": "Data",
                "data": {
                    "hierarchy": [
                        [
                            "White",
                            "*"
                        ],
                        [
                            "Asian-Pac-Islander",
                            "*"
                        ],
                        [
                            "Amer-Indian-Eskimo",
                            "*"
                        ],
                        [
                            "Black",
                            "*"
                        ]
                    ],
                    "defaultHierarchy": [
                        "Other",
                        "*"
                    ]
                }
            }
        },
        {
            "name": "sex",
            "dataType": "String",
            "classificationType": "Sensitive Attribute"
        },
        {
            "name": "salary-class",
            "dataType": "String",
            "classificationType": "Sensitive Attribute"
        }
    ],
    "config": {
        "maxSuppression": 0.10
    },
    "privacyModel": {
        "k": {
            "kValue": 3
        },
        "tcloseness": [
            {
                "name": "salary-class",
                "emdType": "EMD with equal ground distance",
                "tFactor": 0.2
            }
        ],
        "ldiversity": [
            {
                "name": "sex",
                "lFactor": 2,
                "lType": "Distinct-l-diversity"
            }
        ]
    },
    "target": {
        "type": "File",
        "file": {
            "name": "s3://<Your-S3-BucketName>/anon-adult_klt.csv",
            "props": {
                "lineterminator": "\n"
            },
            "accessOptions": {
                "key": "<Your-S3-API Key>",
                "secret": "<Your-S3-API Secret>"
            }
        }
    }
}
#import the anonsdk library
import anonsdk as asdk
import pandas as pd

# s3 bucket credentials
s3_key = <AWS_Key>
s3_secret = <AWS_Secret>

#set the source path for anonymization
# dataset path
source_csv_path = "adult.csv"
# create Store Object source_datastore
source_datastore = asdk.FileDataStore(source_csv_path)

#Set the target path for anonymized result
# anonymized file path
target_csv_path = "s3://target/anon-adult_klt.csv"

# create Store Object target_datastore
target_datastore = asdk.FileDataStore(target_csv_path, access_options={"key": s3_key,"secret": s3_secret})

# Create connection Object with Rest API server
conn = asdk.Connection("https://anon.protegrity.com/")

# create AnonObject with connection, dataframe metadata and source path
df = pd.read_csv(source_csv_path,sep=";")
df.head()
anon_object = asdk.AnonElement(conn, df, source_datastore)

# configuration
hierarchy_marital_status_path = "samples/hierarchy/adult_hierarchy_marital-status.csv"
df_ms = pd.read_csv(hierarchy_marital_status_path,sep=";").compute()
print(df_ms)
anon_object['marital-status']=asdk.Gen_Tree(df_ms)

hierarchy_native_country_path = "samples/hierarchy/adult_hierarchy_native-country.csv"
df_nc = pd.read_csv(hierarchy_native_country_path,sep=";").compute()
print(df_nc)
anon_object['nativecountry']=asdk.Gen_Tree(df_nc)

hierarchy_occupation_path = "hierarchy/adult_hierarchy_occupation.csv"
df_occ = pd.read_csv(hierarchy_occupation_path).compute()
print(df_occ)
anon_object['occupation']=asdk.Gen_Tree(df_occ)

df_race = pd.DataFrame(data={"lvl0":["White","Asian-Pac-Islander","Amer-Indian","Black","Other"], "lvl1":["*","*","*","*","*"]})
anon_object['race']=asdk.Gen_Tree(df_race)

#Configure K-anonymity , suppression allowed in the dataset
anon_object.config.k = asdk.K(3)
anon_object.config['maxSuppression'] = 0.10

#Configure L-diversity and T-closeness
anon_object["sex"]=asdk.LDiv(lfactor=2)
anon_object["salary-class"]=asdk.TClose(tfactor=0.2)

# Send Anonymization request with Transformation Configuration with the target store
job = asdk.anonymize(anon_object,target_datastore ,force=True)

# check the status of the job
job.status()

# check the comparative risk statistics from the source and result dataset
job.riskStat()

# check the comparative utility statistics from the source and result dataset
job.utilityStat()

Micro-Aggregation and Generalization with Aggregates

This sample uses the following attributes:

  • Source: Local file system
  • Target: Amazon S3 bucket
  • Data set: 2 Quasi Identifiers, 1 Aggregation-based Quasi Identifier, 2 Micro Aggregations, and 2 Sensitive Attributes
  • Suppression: 0.50
  • Privacy Model: K with value 5, T-closeness with value 0.2, and L-diversity with value 2
{
    "source": {
        "type": "File",
        "file": {
            "name": "samples/adult.csv",
            "props": {
                "sep": ";"
            }
        }
    },
    "attributes": [
        {
            "name": "age",
            "dataType": "Integer",
            "classificationType": "Quasi Identifier",
            "dataTransformationType": "Micro Aggregation",
            "aggregateFn": "GMean"
        },
        {
            "name": "marital-status",
            "dataType": "String",
            "classificationType": "Quasi Identifier",
            "dataTransformationType": "Micro Aggregation",
            "aggregateFn": "Mode"
        },
        {
            "name": "native-country",
            "dataType": "String",
            "classificationType": "Quasi Identifier",
            "dataTransformationType": "Generalization",
            "generalization": {
                "type": "Tree Based",
                "hierarchyType": "Data Store",
                "dataStore": {
                    "type": "File",
                    "format": "CSV",
                    "file": {
                        "name": "samples/hierarchy/adult_hierarchy_native-country.csv",
                        "props": {
                            "delimiter": ";",
                            "quotechar": "\"",
                            "header": null
                        }
                    }
                }
            }
        },
        {
            "name": "occupation",
            "dataType": "String",
            "classificationType": "Quasi Identifier",
            "dataTransformationType": "Generalization",
            "generalization": {
                "type": "Tree Based",
                "hierarchyType": "Data Store",
                "dataStore": {
                    "type": "File",
                    "format": "CSV",
                    "file": {
                        "name": "samples/hierarchy/adult_hierarchy_occupation.csv",
                        "props": {
                            "delimiter": ";",
                            "quotechar": "\"",
                            "header": null
                        }
                    }
                }
            }
        },
        {
            "name": "race",
            "dataType": "String",
            "classificationType": "Quasi Identifier",
            "dataTransformationType": "Generalization",
            "generalization": {
                "type": "Aggregation Based",
                "hierarchyType": "Aggregate",
                "aggregateFn": "Mode"
            }
        },
        {
            "name": "sex",
            "classificationType": "Sensitive Attribute",
            "dataType": "String"
        },
        {
            "name": "salary-class",
            "classificationType": "Sensitive Attribute",
            "dataType": "String"
        }
    ],
    "config": {
        "maxSuppression": 0.50
    },
    "privacyModel": {
        "k": {
            "kValue": 5
        },
        "tcloseness": [
            {
                "name": "salary-class",
                "emdType": "EMD with equal ground distance",
                "tFactor": 0.2
            }
        ],
        "ldiversity": [
            {
                "name": "sex",
                "lType": "Distinct-l-diversity",
                "lFactor": 2
            }
        ]
    },
    "target": {
        "type": "File",
        "file": {
            "name": "s3://<Your-S3-BucketName>/anon-adult_micro.csv",
            "props": {
                "lineterminator": "\n"
            },
            "accessOptions": {
                "key": "<Your-S3-API Key>",
                "secret": "<Your-S3-API Secret>"
            }
        }
    }
}
#import the anonsdk library
import anonsdk as asdk
import pandas as pd

# s3 bucket credentials
s3_key = <AWS_Key>
s3_secret = <AWS_Secret>

#set the source path for anonymization
# dataset path
source_csv_path = "adult.csv"
# create Store Object source_datastore
source_datastore = asdk.FileDataStore(source_csv_path)

#Set the target path for anonymized result
# anonymized file path
target_csv_path = "s3://target/anon-adult_micro.csv"
# create Store Object target_datastore
target_datastore = asdk.FileDataStore(target_csv_path, access_options={"key": s3_key,"secret": s3_secret})

# Create connection Object with Rest API server
conn = asdk.Connection("https://anon.protegrity.com/")
df = pd.read_csv(source_csv_path,sep=";")
df.head()

# create AnonObject with connection, dataframe metadata and source path
anon_object = asdk.AnonElement(conn, df, source_datastore)

# configuration
hierarchy_native_country_path = "hierarchy/adult_hierarchy_native-country.csv"
df_nc = pd.read_csv(hierarchy_native_country_path,sep=";")
print(df_nc)
anon_object['nativecountry']=asdk.Gen_Tree(df_nc)

hierarchy_occupation_path = "samples/hierarchy/adult_hierarchy_occupation.csv"
df_occ = pd.read_csv(hierarchy_occupation_path)
print(df_occ)
anon_object['marital-status']=asdk.Gen_Tree(df_occ)

# applying aggregation rules
anon_object['age']=asdk.MicroAgg(asdk.AggregateFunction.GMean)
anon_object['race']=asdk.Gen_Agg(asdk.AggregateFunction.Mode)

# applying micro-aggregation rule
anon_object['marital-status']=asdk.MicroAgg(asdk.AggregateFunction.Mode)

#Configure K-anonymity , suppression in the dataset allowed
anon_object.config.k = asdk.K(5)
anon_object.config['maxSuppression'] = 0.50

#Configure L-diversity and T-closeness
anon_object["sex"]=asdk.LDiv(lfactor=2)
anon_object["salary-class"]=asdk.TClose(tfactor=0.2)

# Send Anonymization request with Transformation Configuration with the target store
job = asdk.anonymize(anon_object,target_datastore ,force=True)

# check the status of the job
job.status()

# check the comparative risk statistics from the source and result dataset
job.riskStat()

# check the comparative utility statistics from the source and result dataset
job.utilityStat()

Parquet File Format

This sample uses the following attributes:

  • Source: Local file system
  • Target: Amazon S3 bucket in the Parquet format
  • Data set: 4 Quasi Identifiers, 1 Aggregation-based Quasi Identifier, 1 Micro Aggregation, and 1 Sensitive Attribute
  • Suppression: 0.4
  • Privacy Model: K with value 350 and L-diversity with value 2

In this example, for an attribute, the generalization hierarchy is part of the request.

    {
        "source": {
            "type": "File",
            "file": {
                "name": "samples/adult.csv",
                "props": {
                    "sep": ";",
                    "decimal": ",",
                    "quotechar": "\"",
                    "escapechar": "\\",
                    "encoding": "utf-8"
                }
            }
        },
        "attributes": [
            {
                "name": "age",
                "dataType": "Integer",
                "classificationType": "Quasi Identifier",
                "dataTransformationType": "Generalization",
                "generalization": {
                    "hierarchyType": "Rule",
                    "type": "Rounding",
                    "rule": {
                        "interval": {
                            "levels": [
                                "5",
                                "10",
                                "50",
                                "100"
                            ],
                            "lowerBound":"5",
                            "upperBound":"100"
                        }
                    }
                }
            },
            {
                "name": "marital-status",
                "dataType": "String",
                "classificationType": "Quasi Identifier",
                "dataTransformationType": "Micro Aggregation",
                "aggregateFn": "Mode"
            },
            {
                "name": "citizenSince",
                "dataType": "Date",
                "classificationType": "Quasi Identifier",
                "dataTransformationType": "Generalization",
                "generalization": {
                    "type": "Rounding",
                    "hierarchyType": "Rule",
                    "rule": {
                        "daterange": {
                            "levels": [
                                "WD.M.Y",
                                "FD.M.Y",
                                "QTR.Y",
                                "Y"
                            ]
                        }
                    }
                },
                "props": {
                    "dateformat": "dd-mm-yyyy"
                }
            },
            {
                "name": "occupation",
                "dataType": "String",
                "classificationType": "Quasi Identifier",
                "dataTransformationType": "Generalization",
                "generalization": {
                    "type": "Tree Based",
                    "hierarchyType": "Data Store",
                    "dataStore": {
                        "type": "File",
                        "format": "CSV",
                        "file": {
                            "name": "samples/hierarchy/adult_hierarchy_occupation.csv",
                            "props": {
                                "delimiter": ";",
                                "quotechar": "\"",
                                "header": null
                            }
                        }
                    }
                }
            },
            {
                "name": "race",
                "classificationType": "Quasi Identifier",
                "dataTransformationType": "Generalization",
                "dataType": "String",
                "generalization": {
                    "type": "Aggregation Based",
                    "hierarchyType": "Aggregate",
                    "aggregateFn": "Mode"
                }
            },
            {
                "name": "salary-class",
                "dataType": "String",
                "classificationType": "Quasi Identifier",
                "dataTransformationType": "Generalization",
                "generalization": {
                    "type": "Masking Based",
                    "hierarchyType": "Rule",
                    "rule": {
                        "masking": {
                            "maskOrder": "Left To Right",
                            "maskChar": "*",
                            "maxDomainSize": 3
                        }
                    }
                }
            },
            {
                "name": "sex",
                "dataType": "String",
                "classificationType": "Sensitive Attribute"
            }
        ],
        "config": {
            "maxSuppression": 0.4,
            "redactOutliers": true,
            "suppressionData": "Any"
        },
        "privacyModel": {
            "k": {
                "kValue": 350
            },
            "ldiversity": [
                {
                    "name": "sex",
                    "lType": "Distinct-l-diversity",
                    "lFactor": 2
                }
            ]
        },
        "target": {
            "type": "File",
            "file": {
                "name": "s3://<Your-S3-BucketName>/anon-adult-rules",
                "format": "Parquet",
                "accessOptions": {
                    "key": "<Your-S3-API Key>",
                    "secret": "<Your-S3-API Secret>"
                }
            }
        }
    }
It is not applicable for SDK functions.

Retaining and Redacting

This sample uses the following attributes:

  • Source: Local file system
  • Target: Amazon S3 bucket in the Parquet format
  • Data set: 2 Quasi Identifiers, 1 Aggregation-based Quasi Identifier, 1 Micro Aggregation, 1 Non-Sensitive Attribute, 1 Identifying Attribute, and 2 Sensitive Attributes
  • Suppression: 0.10
  • Privacy Model: K with value 200 and L-diversity with value 2

In this example, for an attribute, the generalization hierarchy is part of the request.

    {
        "source": {
            "type": "File",
            "file": {
                "name": "samples/adult.csv",
                "props": {
                    "sep": ";",
                    "decimal": ",",
                    "quotechar": "\"",
                    "escapechar": "\\",
                    "encoding": "utf-8"
                }
            }
        },
        "attributes": [
            {
                "name": "age",
                "dataType": "Integer",
                "classificationType": "Quasi Identifier",
                "dataTransformationType": "Generalization",
                "generalization": {
                    "type": "Rounding",
                    "hierarchyType": "Rule",
                    "rule": {
                        "interval": {
                            "levels": [
                                "5",
                                "10",
                                "50",
                                "100"
                            ]
                        }
                    }
                }
            },
            {
                "name": "marital-status",
                "dataType": "String",
                "classificationType": "Quasi Identifier",
                "dataTransformationType": "Micro Aggregation",
                "aggregateFn": "Mode"
            },
            {
                "name": "occupation",
                "dataType": "String",
                "classificationType": "Quasi Identifier",
                "dataTransformationType": "Generalization",
                "generalization": {
                    "type": "Tree Based",
                    "hierarchyType": "Data Store",
                    "dataStore": {
                        "type": "File",
                        "format": "CSV",
                        "file": {
                            "name": "samples/hierarchy/adult_hierarchy_occupation.csv",
                            "props": {
                                "delimiter": ";",
                                "quotechar": "\"",
                                "header": null
                            }
                        }
                    }
                }
            },
            {
                "name": "race",
                "dataType": "String",
                "classificationType": "Quasi Identifier",
                "dataTransformationType": "Generalization",
                "generalization": {
                    "type": "Aggregation Based",
                    "hierarchyType": "Aggregate",
                    "aggregateFn": "Mode"
                }
            },
            {
                "name": "citizenSince",
                "dataType": "Date",
                "classificationType": "Identifying Attribute"
            },
            {
                "name": "education",
                "dataType": "String",
                "classificationType": "Non-Sensitive Attribute"
            },
            {
                "name": "salary-class",
                "dataType": "String",
                "classificationType": "Sensitive Attribute"
            },
            {
                "name": "sex",
                "dataType": "String",
                "classificationType": "Sensitive Attribute"
            }
        ],
        "config": {
            "maxSuppression": 0.10,
            "suppressionData": "Any"
        },
        "privacyModel": {
            "k": {
                "kValue": 200
            },
            "ldiversity": [
                {
                    "name": "sex",
                    "lType": "Distinct-l-diversity",
                    "lFactor": 2
                },
                {
                    "name": "salary-class",
                    "lType": "Distinct-l-diversity",
                    "lFactor": 2
                }
            ]
        },
        "target": {
            "type": "File",
            "file": {
                "name": "s3://<Your-S3-BucketName>/anon-adult_retd",
                "format": "Parquet",
                "accessOptions": {
                    "key": "<Your-S3-API Key>",
                    "secret": "<Your-S3-API Secret>"
                }
            }
        }
    }
# import the anonsdk library
import anonsdk as asdk
import pandas as pd

# s3 bucket credentials
s3_key = < AWS_Key >
s3_secret = < AWS_Secret >

# set the source path for anonymization
# dataset path
source_csv_path = "adult.csv"
# create Store Object source_datastore
source_datastore = asdk.FileDataStore(source_csv_path)

# Set the target path for anonymized result
# anonymized file path
target_csv_path = "s3://target/anon-adult_retd"

# create Store Object target_datastore
target_datastore = asdk.FileDataStore(target_csv_path, access_options={"key": s3_key, "secret": s3_secret})

# Create connection Object with Rest API server
conn = asdk.Connection("https://anon.protegrity.com/")
df = pd.read_csv(source_csv_path, sep=";")
df.head()

# create AnonObject with connection, dataframe metadata and source path
anon_object = asdk.AnonElement(conn, df, source_datastore)

# configuration
hierarchy_occupation_path = "samples/hierarchy/adult_hierarchy_occupation.csv"
df_occ = pd.read_csv(hierarchy_occupation_path, sep=";")
print(df_occ)
anon_object['marital-status'] = asdk.Gen_Tree(df_occ)
anon_object['marital-status'] = asdk.MicroAgg(asdk.AggregateFunction.Mode)
anon_object['race'] = asdk.Gen_Agg(asdk.AggregateFunction.Mode)
anon_object['age'] = asdk.Gen_Interval([5, 10, 50, 100])
anon_object['citizenSince'] = asdk.Preserve()
anon_object['education'] = asdk.Preserve()
anon_object['salary-class'] = asdk.Redact()
anon_object['sex'] = asdk.Redact()

# Configure K-anonymity , suppression in the dataset allowed
anon_object.config.k = asdk.K(200)
anon_object.config['maxSuppression'] = 0.10

# Configure L-diversity
anon_object["sex"] = asdk.LDiv(lfactor=2)
anon_object["salary-class"] = asdk.LDiv(lfactor=2)

# Send Anonymization request with Transformation Configuration with the target store
job = asdk.anonymize(anon_object, target_datastore, force=True)

# check the status of the job
job.status()

# check the comparative risk statistics from the source and result dataset
job.riskStat()

# check the comparative utility statistics from the source and result dataset
job.utilityStat()

Last modified : June 27, 2025