Privacy Engineering as a Service for Data Engineers, Scientists, Researchers
Privacy engineering tools delivered to you as APIs. Synthesize and transform data in minutes. Build trust with your users and community.
Get started for free
By signing up you are agreeing to our Privacy Policy and full Terms of service.


Discover. Transform. Share.
Create an anonymized or synthetic dataset to safely work with data while preserving privacy.

hello_gretel.py
from gretel_client import get_cloud_client
client = get_cloud_client("api", GRETEL_API_KEY)
my_data = load_from_csv("my_sensitive_data.csv")
labeled_data = client.detect_entities(my_data)
print(labeled_data)
client = get_cloud_client("api", GRETEL_API_KEY)
my_data = load_from_csv("my_sensitive_data.csv")
labeled_data = client.detect_entities(my_data)
print(labeled_data)
$~ python hello_gretel.py
{
"project_record_count": 11515,
"total_entity_count": 8,
"data": {
"entities": [
{
"entity": "location",
"count": 76151.0,
"last_seen": "2020-07-06T20:55:54Z",
"approx_card": 32748
},
{
"entity": "latitude",
"count": 23030.0,
"last_seen": "2020-07-06T20:55:54Z",
"approx_card": 10678
},
{
"entity": "longitude",
"count": 23030.0,
"last_seen": "2020-07-06T20:55:54Z",
"approx_card": 10905
},
{
"entity": "md5",
"count": 17353.0,
"last_seen": "2020-07-06T20:55:54Z",
"approx_card": 14377
},
{
"entity": "url",
"count": 17353.0,
"last_seen": "2020-07-06T20:55:54Z",
"approx_card": 109
},
{
"entity": "uuid",
"count": 15885.0,
"last_seen": "2020-07-06T20:55:54Z",
"approx_card": 13385
},
{
"entity": "datetime",
"count": 11515.0,
"last_seen": "2020-07-06T20:55:54Z",
"approx_card": 119
},
{
"entity": "person_name",
"count": 1013.0,
"last_seen": "2020-07-06T20:55:42Z",
"approx_card": 49
}
]
}
}
{
"project_record_count": 11515,
"total_entity_count": 8,
"data": {
"entities": [
{
"entity": "location",
"count": 76151.0,
"last_seen": "2020-07-06T20:55:54Z",
"approx_card": 32748
},
{
"entity": "latitude",
"count": 23030.0,
"last_seen": "2020-07-06T20:55:54Z",
"approx_card": 10678
},
{
"entity": "longitude",
"count": 23030.0,
"last_seen": "2020-07-06T20:55:54Z",
"approx_card": 10905
},
{
"entity": "md5",
"count": 17353.0,
"last_seen": "2020-07-06T20:55:54Z",
"approx_card": 14377
},
{
"entity": "url",
"count": 17353.0,
"last_seen": "2020-07-06T20:55:54Z",
"approx_card": 109
},
{
"entity": "uuid",
"count": 15885.0,
"last_seen": "2020-07-06T20:55:54Z",
"approx_card": 13385
},
{
"entity": "datetime",
"count": 11515.0,
"last_seen": "2020-07-06T20:55:54Z",
"approx_card": 119
},
{
"entity": "person_name",
"count": 1013.0,
"last_seen": "2020-07-06T20:55:42Z",
"approx_card": 49
}
]
}
}
hello_gretel.py
import pandas as pd
from gretel_synthetics.batch import DataFrameBatch
config = {
"epochs": 15,
"vocab_size": 20000,
"dp": True, # enable differential privacy,
"field_delimiter": ",",
"checkpoint_dir": "my_models"
}
df = pd.read_csv("my_sensitive_data.csv")
gretel = DataFrameBatch(df=df, config=config)
gretel.create_training_data()
gretel.train_all_batches() # build AI models
gretel.generate_all_batch_lines()
synthetic_data_df = gretel.batches_to_df()
synthetic_data_df.head()
from gretel_synthetics.batch import DataFrameBatch
config = {
"epochs": 15,
"vocab_size": 20000,
"dp": True, # enable differential privacy,
"field_delimiter": ",",
"checkpoint_dir": "my_models"
}
df = pd.read_csv("my_sensitive_data.csv")
gretel = DataFrameBatch(df=df, config=config)
gretel.create_training_data()
gretel.train_all_batches() # build AI models
gretel.generate_all_batch_lines()
synthetic_data_df = gretel.batches_to_df()
synthetic_data_df.head()
transform.py
from gretel_client.transformers import DataPath, DataTransformPipeline, FakeConstantConfig, RedactWithLabelConfig
SEED = 8675309
RECORD = {
"name": "Clark Kent",
"address": "344 Clinton Street, Apartment 3D, Metropolis",
"mobile": "800-579-7866",
"email": "clark@daily-planet.com"
}
name = FakeConstantConfig(seed=SEED, fake_method="name")
address = FakeConstantConfig(seed=SEED, fake_method="address")
auto_redact = RedactWithLabelConfig(labels=["phone_number", "email_address"])
paths = [
DataPath(input="name", xforms=[name]),
DataPath(input="address", xforms=[address]),
DataPath(input="*", xforms=[auto_redact])
]
pipe = DataTransformPipeline(paths)
print(pipe.transform_record(RECORD))
SEED = 8675309
RECORD = {
"name": "Clark Kent",
"address": "344 Clinton Street, Apartment 3D, Metropolis",
"mobile": "800-579-7866",
"email": "clark@daily-planet.com"
}
name = FakeConstantConfig(seed=SEED, fake_method="name")
address = FakeConstantConfig(seed=SEED, fake_method="address")
auto_redact = RedactWithLabelConfig(labels=["phone_number", "email_address"])
paths = [
DataPath(input="name", xforms=[name]),
DataPath(input="address", xforms=[address]),
DataPath(input="*", xforms=[auto_redact])
]
pipe = DataTransformPipeline(paths)
print(pipe.transform_record(RECORD))
$ python transform.py
{
"name": "Dr. Julia Evans, MD",
"address": "1061 Autumn Station Apt. 934 Lake Tiffany, FL 56915",
"mobile": "PHONE_NUMBER",
"email": "EMAIL_ADDRESS"
}
{
"name": "Dr. Julia Evans, MD",
"address": "1061 Autumn Station Apt. 934 Lake Tiffany, FL 56915",
"mobile": "PHONE_NUMBER",
"email": "EMAIL_ADDRESS"
}
transform.py
import requests
from gretel_client import project_from_uri
from gretel_client.transformers import DataPath, DataTransformPipeline, FakeConstantConfig
# We don't want to write sensitive data into our analysis store
faker = FakeConstantConfig(
seed=12345,
labels=["email_address", "person_name", "phone_number", "date_of_birth", "address"])
path = [DataPath(input="*", xforms=[faker])]
transformer = DataTransformPipeline(path)
project = project_from_uri(GRETEL_URI)
# stream labeled data from the Gretel API, sanitize and
# send it to the analysis store
while True:
for record in project.iter_records():
safe_record = transformer.transform_record(record)
requests.post("https://analysis.store:9200/customers/_doc", json=safe_record)
from gretel_client import project_from_uri
from gretel_client.transformers import DataPath, DataTransformPipeline, FakeConstantConfig
# We don't want to write sensitive data into our analysis store
faker = FakeConstantConfig(
seed=12345,
labels=["email_address", "person_name", "phone_number", "date_of_birth", "address"])
path = [DataPath(input="*", xforms=[faker])]
transformer = DataTransformPipeline(path)
project = project_from_uri(GRETEL_URI)
# stream labeled data from the Gretel API, sanitize and
# send it to the analysis store
while True:
for record in project.iter_records():
safe_record = transformer.transform_record(record)
requests.post("https://analysis.store:9200/customers/_doc", json=safe_record)
Accelerate your work with data
Gretel APIs grant immediate access to creating anonymized or synthetic datasets so you can work safely with data while preserving privacy.
Synthetics
Train machine learning models on your dataset and generate synthetic data that is statistically equivalent.
- AI-based and open source.
- Differential privacy enabled.
- Generate unlimited data.
Transforms
Automatically label data and perform privacy preserving transformations on a dataset.
- Encrypt or replace sensitive data.
- Anonymize data in real-time.
Data catalog
Explore records, labels and fields from any CSV.
- Stream data via our API or SDKs.
- Explore records, labels and fields.
- Create and share datasets.

Start creating safe data
Sign up now to start using our public beta. Gretel is free to use during our beta period.