values.yaml

Configuration for setting up the Protegrity Anonymization API.

The values.yaml file contains the configuration for setting up the Protegrity Anonymization API. Use the template provided with the Protegrity Anonymization API or copy the following code to a .yaml file and modify it as per your requirements before running it.

## PREREQUISITES
## Create separate namespace. Eg: kubectl create ns anon-ns. Update your namespace name in values.yaml.

## Running all pods in the namespace specific for Protegrity Anonymization API
namespace:
  name: anon-ns                           # Update the namespace if required.

## Prerequisite for setting up Database and Minio Pod.
## This is to handle any new DB pod getting created that uses the same persistence storage in case the running Database pod gets disrupted.
## This persistence also helps persist Anon-storage data.
persistence:
  ## 1. Get the list of nodes in the cluster. CMD: kubectl get nodes
  ## 2. Get the node name which is running in the same zone where the external-storage is created. CMD: kubectl describe nodes
  nodename: "<Node_name>"                    # Update the Node name

  ## Fetch the zone in which the node is running using the `kubectl describe node/nodename` command or the following command.
  ## CMD: ` kubectl describe node/<nodename> | grep topology.kubernetes.io/zone | grep -oP 'topology.kubernetes.io/zone=\K[^ ]+' `
  zone: "<Zone in which above Node is running>"

  ## For EKS cluster, supply the volumeID of the aws-ebs
  ## For AKS cluster, supply the subscriptionID of the azure-disk
  dbstorageId: "<Provide dbstorage ID>"           # To persist database schemas.
  anonstorageId: "<Provide anonstorage ID>"       # To persist Anonymized data.
  notebookstorageId: "<Provide Notebookstorage ID>" # To persist User created notebooks.

  fsType: ext4

anonstorage:
  ## Refer the following command for creating your own secret.
  ## CMD: kubectl create secret generic my-minio-secret --from-literal=rootUser=foobarbaz --from-literal=rootPassword=foobarbazqux
  existingSecret: ""                # Supply your secret Name for ignoring below default credentials.
  bucket_name: "anonstorage"        # Default bucket name for minio
  secret:
    name: "storage-creds"           # Secret to access minio-server
    access_key: "anonuser"          # Access key for minio-server
    secret_key: "protegrity"        # Secret key for minio-server

## This section is required if the image is getting pulled from the Azure Container Registry
## create image pull secrets and specify the name here.
## remove the [] after 'imagePullSecrets:' once you specify the secrets
#imagePullSecrets: []
#  - name: regcred

image:
  minio_repo: quay.io/minio/minio                    # Public repo path for Minio Image.
  minio_tag: RELEASE.2022-10-29T06-21-33Z            # Tag name for Minio image.

  repository: <Repo_path>                            # Repo path for the Container Registry in Azure, GCP, AWS.
  anonapi_tag: <AnonImage_tag>                       # Tag name of the ANON-API Image.
  anonworkstation_tag: <WorkstationImage_tag>        # Tag name of the ANON-Workstation Image.
  syndataapi_tag: <SyntheticDataImage_tag>           # Tag name for synthetic Image.
  mlflow_tag: <MlflowImage_tag>                       # Tag name for Mlflow Image.

  pullPolicy: Always

## Refer to the section in the documentation for setting up and configuring NGINX-INGRESS before deploying the application.
ingress:
  ## Add the host section with the hostname used as CN while creating server certificates.
  ## While creating the certificates you can use *.protegrity.com as CN and SAN as used in the below example
  anonhost: anon.protegrity.com                  # Update the host according to your server certificates.
  sdatahost: syndata.protegrity.com

  ## To terminate TLS on the Ingress Controller Load Balancer.
  ## K8s TLS Secret containing the certificate and key must be provided.
  secret: anon-protegrity-tls                # Update the secretName according to your secretName.

  ## To validate the client certificate with the above server certificate
  ## Create the secret of the CA certificate used to sign both the server and client certificate as shown in the example below
  ca_secret: ca-protegrity                    # Update the ca-secretName according to your secretName.

  ingress_class: nginx-anon
  ## IP Address of Ingress Server
  ## CMD: kubectl get service -n nginx
  ingressIP: <IP Address of Ingress Server>       # Specify the external IP address obtained from above command.
  ## ingress connection timeout (connect/read/send time out interval)
  timeout: 600
## Typically the deployment includes checksums of secrets/config,
## So that when these change on a subsequent helm install, the deployment/statefulset
## is restarted, so set to "true" to disable this behaviour.
ignoreChartChecksums: false

####################### WORKER CONFIGURATIONS #########################
## Increase the number of worker pods as per your requirement
workers:
  hpa: anon-worker-hpa
  labels:
    app: dask-worker
  replicaCount: 1

## Resources defined for the worker pod
  worker_resources:
    requests:
      cpu: 2
      memory: 6Gi
    limits:
      cpu: 2
      memory: 6Gi

## Specs with which worker container should start
  containerSpecs:
    memLimit: "6G"
    nthreads: 2

## Worker pod env to read values from configMap manifest.
## A config Map(wrkr-specs) is used to set these values.
  workerPodEnv:
    - name: worker_mem_limit
      valueFrom:
        configMapKeyRef:
          name: wrkr-specs
          key: worker-mem-limit
    - name: num_threads
      valueFrom:
        configMapKeyRef:
          name: wrkr-specs
          key: num-threads

  autoscaling:
    minReplicas: 1                        # Min number of worker pods which will be running when the cluster starts.
    maxReplicas: 3                        # Max number of worker pods which will autoscale in the cluster.
    targetMemoryThreshold: 4Gi            # Threshold memory-load beyond which worker pods will autoscale.

## FOR MORE INFO ABOUT PROCESSING LARGE DATASETS REFER TO THE DOCUMENTATION
########################################################################

## Create the volumes and specify the names here.
## remove the [] after 'volumes:' once you specify volumes
volumes: []
  #- name: gcs-secret             ##This secret is used when user wants to read and write data to a Google cloud storage Refer DOC.
    #secret:
      #secretName: adc-gcs-creds

## Create the volumeMounts and specify the names here.
## remove the [] after 'volumeMounts:' once you specify volumeMounts
volumeMounts: []
  #- name: gcs-secret
    #mountPath: /home/anonuser/gcs

## Creating a service account for Anonymization
serviceaccount:
  name: anon-service-account

## Setting the pod security context
podSecurityContext:
  runAsNonRoot: true
  runAsUser: 1000
  fsGroup: 1000

# Configure the delays for Liveness Probe here
livenessProbe:
  initialDelaySeconds: 50
  periodSeconds: 40

#Configure the delays for Readiness Probe here
readinessProbe:
  initialDelaySeconds: 15
  periodSeconds: 20

## MLFLOW-APP ##
mlflow:
  name: mlflow-depl
  service:
    name: mlflow-svc
    mlflowPort: 8200
    labels:
      appname: mlflow

## SYNDATA-APP ##
syndataapp:
  name: syndata-app-depl
  service:
    name: syndata-app-svc
    syndataPort: 8095
    labels:
      appname: syndataapp

## ANON-APP ##
anonapp:
  name: anon-app-depl
  service:
    name: anon-app-svc
    anonPort: 8090
    labels:
      appname: anonapp
  loglevel: INFO                            # To get logs at DEBUG: Set loglevel to DEBUG and do helm upgrade

## ANON-DATABASE ##
database:
  name: anon-db-depl
  labels:
    app: anon-db
  service:
    name: anon-db-svc
    dbport: 5432
  persistence:    ## Persistence Volume size
    pvName: anon-db-pv
    pvcName: anon-db-pvc
    accessMode: ReadWriteOnce
    storageDB:
      size: 20Gi

## ANON-WORKSTATION ##
anonlab:
  name: anon-workstation-depl
  labels:
    app: anon-lab
  service:
    name: anon-lab-svc
    labport: 8888
  persistence:
    pvName: anon-nb-pv
    pvcName: anon-nb-pvc
    accessMode: ReadWriteOnce
    size: 2Gi

## ANON-DASK ##
dask:
  scheduler:
    name: anon-scheduler-depl
  worker:
    name: anon-worker-depl
  service:
    name: anon-dask-svc
    daskMasterPort: 8786
    daskUiPort: 8787
    labels:
      appname: dask

## ANON-STORAGE ##
storage:
  persistence:
    ## Path where PV would be mounted on the MinIO Pod
    mountPath: "/data"
    volumeName: "anon-storage-pv"
    claimName: "anon-storage-pvc"
    accessMode: ReadWriteOnce
    size: 20Gi
  service:
    name: anon-minio-svc
    port: 8100
  securityContext:
    runAsUser: 1000
    runAsGroup: 1000
    fsGroup: 1000
    fsGroupChangePolicy: "OnRootMismatch"
  resources:
    requests:
      memory: 2Gi
      cpu: 1
  certsPath: "/etc/minio/certs/"
  configPathmc: "/etc/minio/mc/"

Last modified : November 13, 2025