Create safe data
The first and only APIs to enable you to balance, anonymize, and share your data. With privacy guarantees.
By signing up you are agreeing to our Privacy Policy and full Terms of service.

Discover. Transform. Share.
Create an anonymized or synthetic dataset to safely work with data while preserving privacy.

hello_gretel.py
from gretel_client import get_cloud_client
client = get_cloud_client("api", GRETEL_API_KEY)
my_data = load_from_csv("my_sensitive_data.csv")
labeled_data = client.detect_entities(my_data)
print(labeled_data)
client = get_cloud_client("api", GRETEL_API_KEY)
my_data = load_from_csv("my_sensitive_data.csv")
labeled_data = client.detect_entities(my_data)
print(labeled_data)
$~ python hello_gretel.py
{
"project_record_count": 11515,
"total_entity_count": 8,
"data": {
"entities": [
{
"entity": "location",
"count": 76151.0,
"last_seen": "2020-07-06T20:55:54Z",
"approx_card": 32748
},
{
"entity": "latitude",
"count": 23030.0,
"last_seen": "2020-07-06T20:55:54Z",
"approx_card": 10678
},
{
"entity": "longitude",
"count": 23030.0,
"last_seen": "2020-07-06T20:55:54Z",
"approx_card": 10905
},
{
"entity": "md5",
"count": 17353.0,
"last_seen": "2020-07-06T20:55:54Z",
"approx_card": 14377
},
{
"entity": "url",
"count": 17353.0,
"last_seen": "2020-07-06T20:55:54Z",
"approx_card": 109
},
{
"entity": "uuid",
"count": 15885.0,
"last_seen": "2020-07-06T20:55:54Z",
"approx_card": 13385
},
{
"entity": "datetime",
"count": 11515.0,
"last_seen": "2020-07-06T20:55:54Z",
"approx_card": 119
},
{
"entity": "person_name",
"count": 1013.0,
"last_seen": "2020-07-06T20:55:42Z",
"approx_card": 49
}
]
}
}
{
"project_record_count": 11515,
"total_entity_count": 8,
"data": {
"entities": [
{
"entity": "location",
"count": 76151.0,
"last_seen": "2020-07-06T20:55:54Z",
"approx_card": 32748
},
{
"entity": "latitude",
"count": 23030.0,
"last_seen": "2020-07-06T20:55:54Z",
"approx_card": 10678
},
{
"entity": "longitude",
"count": 23030.0,
"last_seen": "2020-07-06T20:55:54Z",
"approx_card": 10905
},
{
"entity": "md5",
"count": 17353.0,
"last_seen": "2020-07-06T20:55:54Z",
"approx_card": 14377
},
{
"entity": "url",
"count": 17353.0,
"last_seen": "2020-07-06T20:55:54Z",
"approx_card": 109
},
{
"entity": "uuid",
"count": 15885.0,
"last_seen": "2020-07-06T20:55:54Z",
"approx_card": 13385
},
{
"entity": "datetime",
"count": 11515.0,
"last_seen": "2020-07-06T20:55:54Z",
"approx_card": 119
},
{
"entity": "person_name",
"count": 1013.0,
"last_seen": "2020-07-06T20:55:42Z",
"approx_card": 49
}
]
}
}
hello_gretel.py
import pandas as pd
from gretel_synthetics.batch import DataFrameBatch
config = {
"epochs": 15,
"vocab_size": 20000,
"dp": True, # enable differential privacy,
"field_delimiter": ",",
"checkpoint_dir": "my_models"
}
df = pd.read_csv("my_sensitive_data.csv")
gretel = DataFrameBatch(df=df, config=config)
gretel.create_training_data()
gretel.train_all_batches() # build AI models
gretel.generate_all_batch_lines()
synthetic_data_df = gretel.batches_to_df()
synthetic_data_df.head()
from gretel_synthetics.batch import DataFrameBatch
config = {
"epochs": 15,
"vocab_size": 20000,
"dp": True, # enable differential privacy,
"field_delimiter": ",",
"checkpoint_dir": "my_models"
}
df = pd.read_csv("my_sensitive_data.csv")
gretel = DataFrameBatch(df=df, config=config)
gretel.create_training_data()
gretel.train_all_batches() # build AI models
gretel.generate_all_batch_lines()
synthetic_data_df = gretel.batches_to_df()
synthetic_data_df.head()
transform.py
from gretel_client.transformers import DataPath, DataTransformPipeline, FakeConstantConfig, RedactWithLabelConfig
SEED = 8675309
RECORD = {
"name": "Clark Kent",
"address": "344 Clinton Street, Apartment 3D, Metropolis",
"mobile": "800-579-7866",
"email": "clark@daily-planet.com"
}
name = FakeConstantConfig(seed=SEED, fake_method="name")
address = FakeConstantConfig(seed=SEED, fake_method="address")
auto_redact = RedactWithLabelConfig(labels=["phone_number", "email_address"])
paths = [
DataPath(input="name", xforms=[name]),
DataPath(input="address", xforms=[address]),
DataPath(input="*", xforms=[auto_redact])
]
pipe = DataTransformPipeline(paths)
print(pipe.transform_record(RECORD))
SEED = 8675309
RECORD = {
"name": "Clark Kent",
"address": "344 Clinton Street, Apartment 3D, Metropolis",
"mobile": "800-579-7866",
"email": "clark@daily-planet.com"
}
name = FakeConstantConfig(seed=SEED, fake_method="name")
address = FakeConstantConfig(seed=SEED, fake_method="address")
auto_redact = RedactWithLabelConfig(labels=["phone_number", "email_address"])
paths = [
DataPath(input="name", xforms=[name]),
DataPath(input="address", xforms=[address]),
DataPath(input="*", xforms=[auto_redact])
]
pipe = DataTransformPipeline(paths)
print(pipe.transform_record(RECORD))
$ python transform.py
{
"name": "Dr. Julia Evans, MD",
"address": "1061 Autumn Station Apt. 934 Lake Tiffany, FL 56915",
"mobile": "PHONE_NUMBER",
"email": "EMAIL_ADDRESS"
}
{
"name": "Dr. Julia Evans, MD",
"address": "1061 Autumn Station Apt. 934 Lake Tiffany, FL 56915",
"mobile": "PHONE_NUMBER",
"email": "EMAIL_ADDRESS"
}
transform.py
import requests
from gretel_client import project_from_uri
from gretel_client.transformers import DataPath, DataTransformPipeline, FakeConstantConfig
# We don't want to write sensitive data into our analysis store
faker = FakeConstantConfig(
seed=12345,
labels=["email_address", "person_name", "phone_number", "date_of_birth", "address"])
path = [DataPath(input="*", xforms=[faker])]
transformer = DataTransformPipeline(path)
project = project_from_uri(GRETEL_URI)
# stream labeled data from the Gretel API, sanitize and
# send it to the analysis store
while True:
for record in project.iter_records():
safe_record = transformer.transform_record(record)
requests.post("https://analysis.store:9200/customers/_doc", json=safe_record)
from gretel_client import project_from_uri
from gretel_client.transformers import DataPath, DataTransformPipeline, FakeConstantConfig
# We don't want to write sensitive data into our analysis store
faker = FakeConstantConfig(
seed=12345,
labels=["email_address", "person_name", "phone_number", "date_of_birth", "address"])
path = [DataPath(input="*", xforms=[faker])]
transformer = DataTransformPipeline(path)
project = project_from_uri(GRETEL_URI)
# stream labeled data from the Gretel API, sanitize and
# send it to the analysis store
while True:
for record in project.iter_records():
safe_record = transformer.transform_record(record)
requests.post("https://analysis.store:9200/customers/_doc", json=safe_record)
Workflow agnostic, data forward
Gretel offers you all the tools you need to augment and improve your current data processing workflows.
Data Catalog
- Stream data via our API or SDKs
- Explore records, labels and fields
- Create and share datasets
Transformation
- Simple APIs and clear docs
- Encrypt or replace sensitive data
- Anonymize data in real-time
Synthetics
- AI-based and open source
- Differential privacy enabled
- Generate unlimited data
Data transformation made easy
Gretel gives you the tools to make customer data useful and safe for developers and companies.
Synthetics
Generate a synthetic dataset for training ML models and safely sharing data with privacy guaranteed.
Explore SyntheticsTransformations
De-identify, anomymize, and mask datasets to share internally or with the public.
Explore TransformersStart creating safe data
Sign up now to start using our public beta. Gretel is free to use during our beta period.