From 2b92ec801c30798d5e16f6448c9c31fca4b518ed Mon Sep 17 00:00:00 2001 From: jzonthemtn Date: Wed, 23 Oct 2024 16:24:32 -0400 Subject: [PATCH] Deployed 201ce14 with MkDocs version: 1.6.1 --- 404.html | 106 ++ deidentification/bucketing/index.html | 106 ++ deidentification/date-shifting/index.html | 106 ++ deidentification/deidentification/index.html | 106 ++ deidentification/encryption/index.html | 106 ++ deidentification/pii_phi_nppi/index.html | 106 ++ .../redaction-and-masking/index.html | 106 ++ deidentification/replacement/index.html | 106 ++ evaluating-performance/index.html | 106 ++ index.html | 106 ++ other_features/alerts/index.html | 106 ++ other_features/anonymization/index.html | 106 ++ other_features/span_disambiguation/index.html | 106 ++ policies/document_analysis/index.html | 1071 +++++++++++++++ .../excluding_by_document_type/index.html | 1172 ++++++++++++++++ policies/filter_policies/index.html | 106 ++ policies/filter_strategies/index.html | 108 +- .../filters/common_filters/ages/index.html | 106 ++ .../bank-routing-numbers/index.html | 106 ++ .../bitcoin-addresses/index.html | 106 ++ .../common_filters/creditcards/index.html | 106 ++ .../filters/common_filters/dates/index.html | 106 ++ .../drivers-license-numbers/index.html | 106 ++ .../common_filters/email-addresses/index.html | 106 ++ .../common_filters/iban-codes/index.html | 106 ++ .../common_filters/ip-addresses/index.html | 106 ++ .../common_filters/mac-addresses/index.html | 106 ++ .../passport-numbers/index.html | 106 ++ .../phone-number-extensions/index.html | 106 ++ .../common_filters/phone-numbers/index.html | 106 ++ .../common_filters/sections/index.html | 106 ++ .../common_filters/ssns-and-tins/index.html | 106 ++ .../tracking-numbers/index.html | 106 ++ .../filters/common_filters/urls/index.html | 106 ++ .../filters/common_filters/vins/index.html | 106 ++ .../common_filters/zip-codes/index.html | 106 ++ .../custom_filters/dictionary/index.html | 106 ++ .../custom_filters/identifier/index.html | 106 ++ policies/filters/index.html | 106 ++ policies/filters/locations/cities/index.html | 106 ++ .../filters/locations/counties/index.html | 106 ++ .../hospital-abbreviations/index.html | 106 ++ .../filters/locations/hospitals/index.html | 106 ++ .../locations/state-abbreviations/index.html | 106 ++ policies/filters/locations/states/index.html | 106 ++ .../persons_names/first-names/index.html | 106 ++ .../persons-names-ner/index.html | 106 ++ .../physician-names-ner/index.html | 106 ++ .../filters/persons_names/surnames/index.html | 106 ++ .../ignoring_sensitive_information/index.html | 108 +- policies/sample_policies/index.html | 106 ++ policies/splitting_input_text/index.html | 1178 +++++++++++++++++ quick_starts/quick_start_aws/index.html | 106 ++ quick_starts/quick_start_azure/index.html | 106 ++ quick_starts/quick_start_gcp/index.html | 106 ++ search/search_index.json | 2 +- settings/index.html | 106 ++ sitemap.xml | 12 + sitemap.xml.gz | Bin 683 -> 726 bytes system_requirements/index.html | 106 ++ upgrading/index.html | 106 ++ 61 files changed, 9266 insertions(+), 3 deletions(-) create mode 100644 policies/document_analysis/index.html create mode 100644 policies/excluding_by_document_type/index.html create mode 100644 policies/splitting_input_text/index.html diff --git a/404.html b/404.html index b225dcf..fbebde2 100644 --- a/404.html +++ b/404.html @@ -670,6 +670,112 @@ + + + + +
  • + + + + + + + + + + +
  • + + + + + + + + + + +
  • + + + + + Splitting Input Text + + + + +
  • + + + + + + + + + +
  • diff --git a/deidentification/bucketing/index.html b/deidentification/bucketing/index.html index c0f26cf..6118275 100644 --- a/deidentification/bucketing/index.html +++ b/deidentification/bucketing/index.html @@ -695,6 +695,112 @@ + + + + +
  • + + + + + + + + + + +
  • + + + + + + + + + + +
  • + + + + + Splitting Input Text + + + + +
  • + + + + + + + + + +
  • diff --git a/deidentification/date-shifting/index.html b/deidentification/date-shifting/index.html index a06f5cb..d58adf7 100644 --- a/deidentification/date-shifting/index.html +++ b/deidentification/date-shifting/index.html @@ -695,6 +695,112 @@ + + + + +
  • + + + + + + + + + + +
  • + + + + + + + + + + +
  • + + + + + Splitting Input Text + + + + +
  • + + + + + + + + + +
  • diff --git a/deidentification/deidentification/index.html b/deidentification/deidentification/index.html index e8f188e..63e26cb 100644 --- a/deidentification/deidentification/index.html +++ b/deidentification/deidentification/index.html @@ -693,6 +693,112 @@ + + + + +
  • + + + + + + + + + + +
  • + + + + + + + + + + +
  • + + + + + Splitting Input Text + + + + +
  • + + + + + + + + + +
  • diff --git a/deidentification/encryption/index.html b/deidentification/encryption/index.html index d9264f3..34c7e27 100644 --- a/deidentification/encryption/index.html +++ b/deidentification/encryption/index.html @@ -677,6 +677,112 @@ + + + + +
  • + + + + + + + + + + +
  • + + + + + + + + + + +
  • + + + + + Splitting Input Text + + + + +
  • + + + + + + + + + +
  • diff --git a/deidentification/pii_phi_nppi/index.html b/deidentification/pii_phi_nppi/index.html index e632690..9f273c5 100644 --- a/deidentification/pii_phi_nppi/index.html +++ b/deidentification/pii_phi_nppi/index.html @@ -732,6 +732,112 @@ + + + + +
  • + + + + + + + + + + +
  • + + + + + + + + + + +
  • + + + + + Splitting Input Text + + + + +
  • + + + + + + + + + +
  • diff --git a/deidentification/redaction-and-masking/index.html b/deidentification/redaction-and-masking/index.html index 2ae79fe..7d710cb 100644 --- a/deidentification/redaction-and-masking/index.html +++ b/deidentification/redaction-and-masking/index.html @@ -677,6 +677,112 @@ + + + + +
  • + + + + + + + + + + +
  • + + + + + + + + + + +
  • + + + + + Splitting Input Text + + + + +
  • + + + + + + + + + +
  • diff --git a/deidentification/replacement/index.html b/deidentification/replacement/index.html index 31c4244..089371e 100644 --- a/deidentification/replacement/index.html +++ b/deidentification/replacement/index.html @@ -677,6 +677,112 @@ + + + + +
  • + + + + + + + + + + +
  • + + + + + + + + + + +
  • + + + + + Splitting Input Text + + + + +
  • + + + + + + + + + +
  • diff --git a/evaluating-performance/index.html b/evaluating-performance/index.html index 78dd5ec..ab881ab 100644 --- a/evaluating-performance/index.html +++ b/evaluating-performance/index.html @@ -677,6 +677,112 @@ + + + + +
  • + + + + + + + + + + +
  • + + + + + + + + + + +
  • + + + + + Splitting Input Text + + + + +
  • + + + + + + + + + +
  • diff --git a/index.html b/index.html index 004841f..4f13dc7 100644 --- a/index.html +++ b/index.html @@ -689,6 +689,112 @@ + + + + +
  • + + + + + + + + + + +
  • + + + + + + + + + + +
  • + + + + + Splitting Input Text + + + + +
  • + + + + + + + + + +
  • diff --git a/other_features/alerts/index.html b/other_features/alerts/index.html index 61a8137..47e1e3c 100644 --- a/other_features/alerts/index.html +++ b/other_features/alerts/index.html @@ -681,6 +681,112 @@ + + + + +
  • + + + + + + + + + + +
  • + + + + + + + + + + +
  • + + + + + Splitting Input Text + + + + +
  • + + + + + + + + + +
  • diff --git a/other_features/anonymization/index.html b/other_features/anonymization/index.html index 767ef5e..5755cdf 100644 --- a/other_features/anonymization/index.html +++ b/other_features/anonymization/index.html @@ -681,6 +681,112 @@ + + + + +
  • + + + + + + + + + + +
  • + + + + + + + + + + +
  • + + + + + Splitting Input Text + + + + +
  • + + + + + + + + + +
  • diff --git a/other_features/span_disambiguation/index.html b/other_features/span_disambiguation/index.html index feb75c3..571fd38 100644 --- a/other_features/span_disambiguation/index.html +++ b/other_features/span_disambiguation/index.html @@ -681,6 +681,112 @@ + + + + +
  • + + + + + + + + + + +
  • + + + + + + + + + + +
  • + + + + + Splitting Input Text + + + + +
  • + + + + + + + + + +
  • diff --git a/policies/document_analysis/index.html b/policies/document_analysis/index.html new file mode 100644 index 0000000..14209f1 --- /dev/null +++ b/policies/document_analysis/index.html @@ -0,0 +1,1071 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Document Analysis - Philter + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + Skip to content + + +
    +
    + +
    + + + + + + +
    + + +
    + +
    + + + + + + +
    +
    + + + +
    +
    +
    + + + + + +
    +
    +
    + + + +
    +
    +
    + + + +
    +
    +
    + + + +
    +
    + + + + + + + +

    Document Analysis

    +

    Philter analyzes received documents prior to redacting the document. This analysis is done to help Philter get a better understanding of the document. The results of the analysis are used to exclude certain document types from redaction and to improve Philter's redaction performance.

    +

    While not recommended, the automatic document analysis can be disabled in a policy. By default, document analysis is enabled.

    +
    +

    Disabling document analysis will cause any policy features dependent on the results of the document analysis to not function. +{style="warning"}

    +
    +

    An example policy with disabled document analysis is shown below.

    +
    {
    +  "name": "email-and-phone-numbers",
    +  "config": {
    +    "analysis": {
    +      "enabled": false
    +    }
    +  },
    +  "identifiers": {
    +    "emailAddress": {
    +      "emailAddressFilterStrategies": [
    +        {
    +          "strategy": "REDACT",
    +          "redactionFormat": "{{{REDACTED-%t}}}"
    +        }
    +      ]
    +    }
    +  }
    +}
    +
    + + + + + + + + + + + + + +
    +
    + + + +
    + +
    + + + +
    +
    +
    +
    + + + + + + + + + + \ No newline at end of file diff --git a/policies/excluding_by_document_type/index.html b/policies/excluding_by_document_type/index.html new file mode 100644 index 0000000..c97d539 --- /dev/null +++ b/policies/excluding_by_document_type/index.html @@ -0,0 +1,1172 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Excluding by DocumentType - Philter + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + Skip to content + + +
    +
    + +
    + + + + + + +
    + + +
    + +
    + + + + + + +
    +
    + + + +
    +
    +
    + + + + + +
    +
    +
    + + + +
    +
    +
    + + + +
    +
    +
    + + + +
    +
    + + + + + + + +

    Excluding by Document Type

    +

    Philter can automatically detect certain types of documents and exclude those documents from redaction of certain sensitive information. For example, you want to redact SSN/TINs in all but one type of document.

    +

    To exclude a document type from a specific filter, set the excludeDocumentTypes value to a list of document types to exclude for a filter strategy. Filter strategies for all filter types support the excludeDocumentTypes property.

    +

    An example to exclude email addresses from being redacted in a subpoena document is given below:

    +
    {
    +   "name": "email-address",
    +   "identifiers": {
    +      "emailAddress": {
    +         "emailAddressFilterStrategies": [
    +            {
    +               "strategy": "REDACT",
    +               "redactionFormat": "{{{REDACTED-%t}}}",
    +               "excludeDocumentTypes": ["SUBPOENA"]
    +            }
    +         ]
    +      }
    +   }
    +}
    +
    +

    In this example, email addresses are redacted in all document types except documents Philter identifies as being subpoena documents.

    +

    Document Types Supported by Automatic Detection

    +

    Philter currently supports automatically detecting the following document types.

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Document TypeDocument Description
    SubpoenaForm 2540 Federal Bankruptcy - SUBPOENA FOR RULE 2004 EXAMINATION
    SubpoenaForm 2550 - Federal Bankruptcy - SUBPOENA TO APPEAR AND TESTIFY
    SubpoenaForm 2560 - Federal Bankruptcy - SUBPOENA TO TESTIFY AT A DEPOSITION
    SubpoenaForm 2570 - Federal Bankruptcy - SUBPOENA TO PRODUCE DOCUMENTS
    SubpoenaAO 88 - SUBPOENA TO APPEAR AND TESTIFY AT A HEARING OR TRIAL IN A CIVIL ACTION
    SubpoenaAO 88A - SUBPOENA TO TESTIFY AT A DEPOSITION IN A CIVIL ACTION
    SubpoenaAO 88B - SUBPOENA TO PRODUCE DOCUMENTS, INFORMATION, OR OBJECTS
    SubpoenaAO 89 - SUBPOENA TO TESTIFY AT A HEARING OR TRIAL IN A CRIMINAL CASE
    SubpoenaAO 90 - SUBPOENA TO TESTIFY AT A DEPOSITION IN A CRIMINAL CASE
    SubpoenaAO 110 - SUBPOENA TO TESTIFY BEFORE A GRAND JURY
    + + + + + + + + + + + + + +
    +
    + + + +
    + +
    + + + +
    +
    +
    +
    + + + + + + + + + + \ No newline at end of file diff --git a/policies/filter_policies/index.html b/policies/filter_policies/index.html index f795a73..17c6303 100644 --- a/policies/filter_policies/index.html +++ b/policies/filter_policies/index.html @@ -750,6 +750,112 @@ + + + + +
  • + + + + + + + + + + +
  • + + + + + + + + + + +
  • + + + + + Splitting Input Text + + + + +
  • + + + + + + + + + +
  • diff --git a/policies/filter_strategies/index.html b/policies/filter_strategies/index.html index f6cb82f..ed7dfa5 100644 --- a/policies/filter_strategies/index.html +++ b/policies/filter_strategies/index.html @@ -18,7 +18,7 @@ - + @@ -834,6 +834,112 @@ + + + + +
  • + + + + + + + + + + +
  • + + + + + + + + + + +
  • + + + + + Splitting Input Text + + + + +
  • + + + + + + + + + +
  • diff --git a/policies/filters/common_filters/ages/index.html b/policies/filters/common_filters/ages/index.html index b4ae40d..6cad354 100644 --- a/policies/filters/common_filters/ages/index.html +++ b/policies/filters/common_filters/ages/index.html @@ -677,6 +677,112 @@ + + + + +
  • + + + + + + + + + + +
  • + + + + + + + + + + +
  • + + + + + Splitting Input Text + + + + +
  • + + + + + + + + + +
  • diff --git a/policies/filters/common_filters/bank-routing-numbers/index.html b/policies/filters/common_filters/bank-routing-numbers/index.html index e99f3d9..1bb4c37 100644 --- a/policies/filters/common_filters/bank-routing-numbers/index.html +++ b/policies/filters/common_filters/bank-routing-numbers/index.html @@ -677,6 +677,112 @@ + + + + +
  • + + + + + + + + + + +
  • + + + + + + + + + + +
  • + + + + + Splitting Input Text + + + + +
  • + + + + + + + + + +
  • diff --git a/policies/filters/common_filters/bitcoin-addresses/index.html b/policies/filters/common_filters/bitcoin-addresses/index.html index 9abc577..386cefb 100644 --- a/policies/filters/common_filters/bitcoin-addresses/index.html +++ b/policies/filters/common_filters/bitcoin-addresses/index.html @@ -677,6 +677,112 @@ + + + + +
  • + + + + + + + + + + +
  • + + + + + + + + + + +
  • + + + + + Splitting Input Text + + + + +
  • + + + + + + + + + +
  • diff --git a/policies/filters/common_filters/creditcards/index.html b/policies/filters/common_filters/creditcards/index.html index 4b52f29..1126d55 100644 --- a/policies/filters/common_filters/creditcards/index.html +++ b/policies/filters/common_filters/creditcards/index.html @@ -677,6 +677,112 @@ + + + + +
  • + + + + + + + + + + +
  • + + + + + + + + + + +
  • + + + + + Splitting Input Text + + + + +
  • + + + + + + + + + +
  • diff --git a/policies/filters/common_filters/dates/index.html b/policies/filters/common_filters/dates/index.html index 51cc91e..affa2b8 100644 --- a/policies/filters/common_filters/dates/index.html +++ b/policies/filters/common_filters/dates/index.html @@ -677,6 +677,112 @@ + + + + +
  • + + + + + + + + + + +
  • + + + + + + + + + + +
  • + + + + + Splitting Input Text + + + + +
  • + + + + + + + + + +
  • diff --git a/policies/filters/common_filters/drivers-license-numbers/index.html b/policies/filters/common_filters/drivers-license-numbers/index.html index 9039e03..43ce353 100644 --- a/policies/filters/common_filters/drivers-license-numbers/index.html +++ b/policies/filters/common_filters/drivers-license-numbers/index.html @@ -677,6 +677,112 @@ + + + + +
  • + + + + + + + + + + +
  • + + + + + + + + + + +
  • + + + + + Splitting Input Text + + + + +
  • + + + + + + + + + +
  • diff --git a/policies/filters/common_filters/email-addresses/index.html b/policies/filters/common_filters/email-addresses/index.html index 7fdacff..e45e209 100644 --- a/policies/filters/common_filters/email-addresses/index.html +++ b/policies/filters/common_filters/email-addresses/index.html @@ -677,6 +677,112 @@ + + + + +
  • + + + + + + + + + + +
  • + + + + + + + + + + +
  • + + + + + Splitting Input Text + + + + +
  • + + + + + + + + + +
  • diff --git a/policies/filters/common_filters/iban-codes/index.html b/policies/filters/common_filters/iban-codes/index.html index 450407d..dc5d759 100644 --- a/policies/filters/common_filters/iban-codes/index.html +++ b/policies/filters/common_filters/iban-codes/index.html @@ -677,6 +677,112 @@ + + + + +
  • + + + + + + + + + + +
  • + + + + + + + + + + +
  • + + + + + Splitting Input Text + + + + +
  • + + + + + + + + + +
  • diff --git a/policies/filters/common_filters/ip-addresses/index.html b/policies/filters/common_filters/ip-addresses/index.html index c25c916..919b91c 100644 --- a/policies/filters/common_filters/ip-addresses/index.html +++ b/policies/filters/common_filters/ip-addresses/index.html @@ -677,6 +677,112 @@ + + + + +
  • + + + + + + + + + + +
  • + + + + + + + + + + +
  • + + + + + Splitting Input Text + + + + +
  • + + + + + + + + + +
  • diff --git a/policies/filters/common_filters/mac-addresses/index.html b/policies/filters/common_filters/mac-addresses/index.html index b16c1b8..b4cc6b3 100644 --- a/policies/filters/common_filters/mac-addresses/index.html +++ b/policies/filters/common_filters/mac-addresses/index.html @@ -677,6 +677,112 @@ + + + + +
  • + + + + + + + + + + +
  • + + + + + + + + + + +
  • + + + + + Splitting Input Text + + + + +
  • + + + + + + + + + +
  • diff --git a/policies/filters/common_filters/passport-numbers/index.html b/policies/filters/common_filters/passport-numbers/index.html index 073345b..9e07d1f 100644 --- a/policies/filters/common_filters/passport-numbers/index.html +++ b/policies/filters/common_filters/passport-numbers/index.html @@ -677,6 +677,112 @@ + + + + +
  • + + + + + + + + + + +
  • + + + + + + + + + + +
  • + + + + + Splitting Input Text + + + + +
  • + + + + + + + + + +
  • diff --git a/policies/filters/common_filters/phone-number-extensions/index.html b/policies/filters/common_filters/phone-number-extensions/index.html index 28cffce..a015261 100644 --- a/policies/filters/common_filters/phone-number-extensions/index.html +++ b/policies/filters/common_filters/phone-number-extensions/index.html @@ -677,6 +677,112 @@ + + + + +
  • + + + + + + + + + + +
  • + + + + + + + + + + +
  • + + + + + Splitting Input Text + + + + +
  • + + + + + + + + + +
  • diff --git a/policies/filters/common_filters/phone-numbers/index.html b/policies/filters/common_filters/phone-numbers/index.html index 8737428..eb6b442 100644 --- a/policies/filters/common_filters/phone-numbers/index.html +++ b/policies/filters/common_filters/phone-numbers/index.html @@ -677,6 +677,112 @@ + + + + +
  • + + + + + + + + + + +
  • + + + + + + + + + + +
  • + + + + + Splitting Input Text + + + + +
  • + + + + + + + + + +
  • diff --git a/policies/filters/common_filters/sections/index.html b/policies/filters/common_filters/sections/index.html index 4acbfe2..6502605 100644 --- a/policies/filters/common_filters/sections/index.html +++ b/policies/filters/common_filters/sections/index.html @@ -677,6 +677,112 @@ + + + + +
  • + + + + + + + + + + +
  • + + + + + + + + + + +
  • + + + + + Splitting Input Text + + + + +
  • + + + + + + + + + +
  • diff --git a/policies/filters/common_filters/ssns-and-tins/index.html b/policies/filters/common_filters/ssns-and-tins/index.html index 1121dec..8ef2e9e 100644 --- a/policies/filters/common_filters/ssns-and-tins/index.html +++ b/policies/filters/common_filters/ssns-and-tins/index.html @@ -677,6 +677,112 @@ + + + + +
  • + + + + + + + + + + +
  • + + + + + + + + + + +
  • + + + + + Splitting Input Text + + + + +
  • + + + + + + + + + +
  • diff --git a/policies/filters/common_filters/tracking-numbers/index.html b/policies/filters/common_filters/tracking-numbers/index.html index 1092d7f..225f568 100644 --- a/policies/filters/common_filters/tracking-numbers/index.html +++ b/policies/filters/common_filters/tracking-numbers/index.html @@ -677,6 +677,112 @@ + + + + +
  • + + + + + + + + + + +
  • + + + + + + + + + + +
  • + + + + + Splitting Input Text + + + + +
  • + + + + + + + + + +
  • diff --git a/policies/filters/common_filters/urls/index.html b/policies/filters/common_filters/urls/index.html index b379e4a..60db3a0 100644 --- a/policies/filters/common_filters/urls/index.html +++ b/policies/filters/common_filters/urls/index.html @@ -677,6 +677,112 @@ + + + + +
  • + + + + + + + + + + +
  • + + + + + + + + + + +
  • + + + + + Splitting Input Text + + + + +
  • + + + + + + + + + +
  • diff --git a/policies/filters/common_filters/vins/index.html b/policies/filters/common_filters/vins/index.html index bd666b6..0e4828e 100644 --- a/policies/filters/common_filters/vins/index.html +++ b/policies/filters/common_filters/vins/index.html @@ -677,6 +677,112 @@ + + + + +
  • + + + + + + + + + + +
  • + + + + + + + + + + +
  • + + + + + Splitting Input Text + + + + +
  • + + + + + + + + + +
  • diff --git a/policies/filters/common_filters/zip-codes/index.html b/policies/filters/common_filters/zip-codes/index.html index 9663c22..a81cbd8 100644 --- a/policies/filters/common_filters/zip-codes/index.html +++ b/policies/filters/common_filters/zip-codes/index.html @@ -677,6 +677,112 @@ + + + + +
  • + + + + + + + + + + +
  • + + + + + + + + + + +
  • + + + + + Splitting Input Text + + + + +
  • + + + + + + + + + +
  • diff --git a/policies/filters/custom_filters/dictionary/index.html b/policies/filters/custom_filters/dictionary/index.html index 1ebc4a0..92fd002 100644 --- a/policies/filters/custom_filters/dictionary/index.html +++ b/policies/filters/custom_filters/dictionary/index.html @@ -677,6 +677,112 @@ + + + + +
  • + + + + + + + + + + +
  • + + + + + + + + + + +
  • + + + + + Splitting Input Text + + + + +
  • + + + + + + + + + +
  • diff --git a/policies/filters/custom_filters/identifier/index.html b/policies/filters/custom_filters/identifier/index.html index 96eec9d..0671fa8 100644 --- a/policies/filters/custom_filters/identifier/index.html +++ b/policies/filters/custom_filters/identifier/index.html @@ -677,6 +677,112 @@ + + + + +
  • + + + + + + + + + + +
  • + + + + + + + + + + +
  • + + + + + Splitting Input Text + + + + +
  • + + + + + + + + + +
  • diff --git a/policies/filters/index.html b/policies/filters/index.html index 4661aac..bb1268c 100644 --- a/policies/filters/index.html +++ b/policies/filters/index.html @@ -765,6 +765,112 @@ + + + + +
  • + + + + + + + + + + +
  • + + + + + + + + + + +
  • + + + + + Splitting Input Text + + + + +
  • + + + + + + + + + +
  • diff --git a/policies/filters/locations/cities/index.html b/policies/filters/locations/cities/index.html index a885a1a..8a198ed 100644 --- a/policies/filters/locations/cities/index.html +++ b/policies/filters/locations/cities/index.html @@ -677,6 +677,112 @@ + + + + +
  • + + + + + + + + + + +
  • + + + + + + + + + + +
  • + + + + + Splitting Input Text + + + + +
  • + + + + + + + + + +
  • diff --git a/policies/filters/locations/counties/index.html b/policies/filters/locations/counties/index.html index a685d85..04c25b6 100644 --- a/policies/filters/locations/counties/index.html +++ b/policies/filters/locations/counties/index.html @@ -677,6 +677,112 @@ + + + + +
  • + + + + + + + + + + +
  • + + + + + + + + + + +
  • + + + + + Splitting Input Text + + + + +
  • + + + + + + + + + +
  • diff --git a/policies/filters/locations/hospital-abbreviations/index.html b/policies/filters/locations/hospital-abbreviations/index.html index c43a1c2..613b9f9 100644 --- a/policies/filters/locations/hospital-abbreviations/index.html +++ b/policies/filters/locations/hospital-abbreviations/index.html @@ -677,6 +677,112 @@ + + + + +
  • + + + + + + + + + + +
  • + + + + + + + + + + +
  • + + + + + Splitting Input Text + + + + +
  • + + + + + + + + + +
  • diff --git a/policies/filters/locations/hospitals/index.html b/policies/filters/locations/hospitals/index.html index 017b249..3fbe938 100644 --- a/policies/filters/locations/hospitals/index.html +++ b/policies/filters/locations/hospitals/index.html @@ -677,6 +677,112 @@ + + + + +
  • + + + + + + + + + + +
  • + + + + + + + + + + +
  • + + + + + Splitting Input Text + + + + +
  • + + + + + + + + + +
  • diff --git a/policies/filters/locations/state-abbreviations/index.html b/policies/filters/locations/state-abbreviations/index.html index f96e526..bb08b1a 100644 --- a/policies/filters/locations/state-abbreviations/index.html +++ b/policies/filters/locations/state-abbreviations/index.html @@ -677,6 +677,112 @@ + + + + +
  • + + + + + + + + + + +
  • + + + + + + + + + + +
  • + + + + + Splitting Input Text + + + + +
  • + + + + + + + + + +
  • diff --git a/policies/filters/locations/states/index.html b/policies/filters/locations/states/index.html index 5695def..adbd3e9 100644 --- a/policies/filters/locations/states/index.html +++ b/policies/filters/locations/states/index.html @@ -677,6 +677,112 @@ + + + + +
  • + + + + + + + + + + +
  • + + + + + + + + + + +
  • + + + + + Splitting Input Text + + + + +
  • + + + + + + + + + +
  • diff --git a/policies/filters/persons_names/first-names/index.html b/policies/filters/persons_names/first-names/index.html index def5fcf..d3d0d71 100644 --- a/policies/filters/persons_names/first-names/index.html +++ b/policies/filters/persons_names/first-names/index.html @@ -677,6 +677,112 @@ + + + + +
  • + + + + + + + + + + +
  • + + + + + + + + + + +
  • + + + + + Splitting Input Text + + + + +
  • + + + + + + + + + +
  • diff --git a/policies/filters/persons_names/persons-names-ner/index.html b/policies/filters/persons_names/persons-names-ner/index.html index a37c40b..21ecc56 100644 --- a/policies/filters/persons_names/persons-names-ner/index.html +++ b/policies/filters/persons_names/persons-names-ner/index.html @@ -677,6 +677,112 @@ + + + + +
  • + + + + + + + + + + +
  • + + + + + + + + + + +
  • + + + + + Splitting Input Text + + + + +
  • + + + + + + + + + +
  • diff --git a/policies/filters/persons_names/physician-names-ner/index.html b/policies/filters/persons_names/physician-names-ner/index.html index 809afdc..e4dd21b 100644 --- a/policies/filters/persons_names/physician-names-ner/index.html +++ b/policies/filters/persons_names/physician-names-ner/index.html @@ -677,6 +677,112 @@ + + + + +
  • + + + + + + + + + + +
  • + + + + + + + + + + +
  • + + + + + Splitting Input Text + + + + +
  • + + + + + + + + + +
  • diff --git a/policies/filters/persons_names/surnames/index.html b/policies/filters/persons_names/surnames/index.html index 353a3d2..56501e6 100644 --- a/policies/filters/persons_names/surnames/index.html +++ b/policies/filters/persons_names/surnames/index.html @@ -677,6 +677,112 @@ + + + + +
  • + + + + + + + + + + +
  • + + + + + + + + + + +
  • + + + + + Splitting Input Text + + + + +
  • + + + + + + + + + +
  • diff --git a/policies/ignoring_sensitive_information/index.html b/policies/ignoring_sensitive_information/index.html index dec33a8..7ec5f88 100644 --- a/policies/ignoring_sensitive_information/index.html +++ b/policies/ignoring_sensitive_information/index.html @@ -15,7 +15,7 @@ - + @@ -681,6 +681,112 @@ + + + + + + +
  • + + + + + + + + + + +
  • + + + + + + + + + + +
  • + + + + + Splitting Input Text + + + + +
  • + + + + + + + + diff --git a/policies/sample_policies/index.html b/policies/sample_policies/index.html index 4dee7a2..f9acc5c 100644 --- a/policies/sample_policies/index.html +++ b/policies/sample_policies/index.html @@ -795,6 +795,112 @@ + + + + +
  • + + + + + + + + + + +
  • + + + + + + + + + + +
  • + + + + + Splitting Input Text + + + + +
  • + + + + + + + + + +
  • diff --git a/policies/splitting_input_text/index.html b/policies/splitting_input_text/index.html new file mode 100644 index 0000000..cf9d97c --- /dev/null +++ b/policies/splitting_input_text/index.html @@ -0,0 +1,1178 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Splitting Input Text - Philter + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + + + + Skip to content + + +
    +
    + +
    + + + + + + +
    + + +
    + +
    + + + + + + +
    +
    + + + +
    +
    +
    + + + + + +
    +
    +
    + + + +
    +
    +
    + + + +
    +
    +
    + + + +
    +
    + + + + + + + +

    Splitting Input Text

    +

    On a per-policy basis, Philter can split input text to process each split individually. This can improve performance and allows for handling long input text. Splitting is disabled by default.

    +

    An example split configuration in a policy is shown below

    +
    {
    +  "name": "default",
    +  "identifiers": {}, 
    +  "config": {
    +    "splitting": {
    +      "enabled": true,
    +      "threshold": 10000,
    +      "method": "newline"
    +    }
    +  }
    +}
    +
    +

    In this example policy, splitting is enabled for inputs greater than equal to 10,000 characters in length.

    +

    The method of splitting the text will be the newline method. This method will cause Philter to split the text based on the locations of new line characters in the input text. Additional methods of text splitting may be added in future versions.

    +

    Because the newline method splits text based on the locations of new line characters in the text, the text contained in the reassembled filter responses may not be an exact match of the input text. This is due to white space and other characters that may reside near the new line characters that get omitted during processing.

    +

    Text Splitting Policy Properties

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    PropertyDescriptionAllowed ValuesDefault Value
    enabledWhether or not input texts are split. Whether or not input texts are split. When false, requests with text exceeding the threshold generate a HTTP 413 PayloadTooLarge error response.true or falsefalse
    thresholdWhen to split the input text. Set to -1 to disable splitting.Any integer value.10000
    methodHow to split the text.newlinenewline
    +

    Alternative to Philter Splitting Text

    +

    In some cases it may be best to split your input text client side prior to sending the text to Philter. This gives you full control over how the text will be split and provides more predictable responses from Philter because you know how the text is split.

    +

    An example of splitting text into chunks prior to sending the text to Philter is given in the commands below:

    +
    # Given a large file called largefile.txt, split it into 10k pieces.
    +$ split -b 10k largefile.txt segment
    +
    +# Now process the pieces.
    +$ curl -s -X POST -k "https://philter:8080/api/filter?d=document1" --data "@/tmp/segmentaa" -H "Content-type: text/plain" > out1
    +$ curl -s -X POST -k "https://philter:8080/api/filter?d=document1" --data "@/tmp/segmentab" -H "Content-type: text/plain" > out2
    +
    +# Now recombine the outputs into a single file.
    +$ cat out1 out2 > filtered.txt
    +
    + + + + + + + + + + + + + +
    +
    + + + +
    + +
    + + + +
    +
    +
    +
    + + + + + + + + + + \ No newline at end of file diff --git a/quick_starts/quick_start_aws/index.html b/quick_starts/quick_start_aws/index.html index 7c15691..b8cedaa 100644 --- a/quick_starts/quick_start_aws/index.html +++ b/quick_starts/quick_start_aws/index.html @@ -774,6 +774,112 @@ + + + + +
  • + + + + + + + + + + +
  • + + + + + + + + + + +
  • + + + + + Splitting Input Text + + + + +
  • + + + + + + + + + +
  • diff --git a/quick_starts/quick_start_azure/index.html b/quick_starts/quick_start_azure/index.html index 00d94f7..f442529 100644 --- a/quick_starts/quick_start_azure/index.html +++ b/quick_starts/quick_start_azure/index.html @@ -774,6 +774,112 @@ + + + + +
  • + + + + + + + + + + +
  • + + + + + + + + + + +
  • + + + + + Splitting Input Text + + + + +
  • + + + + + + + + + +
  • diff --git a/quick_starts/quick_start_gcp/index.html b/quick_starts/quick_start_gcp/index.html index f9bf3a1..2fa56da 100644 --- a/quick_starts/quick_start_gcp/index.html +++ b/quick_starts/quick_start_gcp/index.html @@ -774,6 +774,112 @@ + + + + +
  • + + + + + + + + + + +
  • + + + + + + + + + + +
  • + + + + + Splitting Input Text + + + + +
  • + + + + + + + + + +
  • diff --git a/search/search_index.json b/search/search_index.json index e37773d..2a9fd76 100644 --- a/search/search_index.json +++ b/search/search_index.json @@ -1 +1 @@ -{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"Philter","text":"

    Philter is an application that finds, identifies, and removes sensitive information, such as protected health information (PHI) and personally identifiable information (PII), and user-defined sensitive information from natural language text. Philter is ideal for usage in text processing pipelines where sensitive information needs removed, encrypted, or redacted from the text.

    This documentation applies to Philter 2.4.0. If you are upgrading to this version see Upgrading Philter.

    To get going fast, jump to the Quick Starts to launch Philter on AWS, Azure, or Google Cloud.

    "},{"location":"evaluating-performance/","title":"How to Evaluate Phileas' Performance","text":"

    A common question we receive is how well does Phileas perform? Our answer to this question is probably less than satisfactory because it simply depends. What does it depend on? Phileas' performance is heavily dependent upon your individual data. Sharing to compare metrics of Phileas' performance between different customer datasets is like comparing apples and oranges.

    If your data is not exactly like another customer's data then the metrics will not be applicable to your data. In terms of the classic information retrieval metrics precision and recall, comparing these values between customers can give false impressions about Phileas' performance, both good and bad.

    This guide walks you through how to evaluate Phileas' performance. If you are just getting started with Phileas please see the Quick Starts instead. Then you can come back here to learn how to evaluate Phileas' performance.

    "},{"location":"evaluating-performance/#guide-to-evaluating-performance","title":"Guide to Evaluating Performance","text":"

    We have created this guide to help guide you in evaluating Phileas' performance on your data. The guide involves determining the types of sensitive information you want to redact, configuring those filters, optimizing the configuration, and then capturing the performance metrics.

    If you are using Philter we will gladly perform these steps for you and provide you a detailed Phileas performance report generated from your data. Please contact us to start the process.

    "},{"location":"evaluating-performance/#what-you-need","title":"What You Need","text":"

    To evaluate Phileas' performance you need:

    • An application using Phileas.
    • A list of the types of sensitive information you want to redact.
    • A data set representative of the text you will be redacting using Phileas. It's important the data set be representative so the evaluation results will transfer to the actual data redaction.
    • The same data set but with annotated sensitive information. These annotations will be used to calculate the precision and recall metrics.
    "},{"location":"evaluating-performance/#configuring-phileas","title":"Configuring Phileas","text":"

    Before we can begin our evaluation we need to create a policy. A policy is a file that defines the types of sensitive information that will be redacted and how it will be redacted. The policies are stored on the Phileas instance under /opt/Phileas/policies. You can edit the policies directly there using a text editor or you can use Phileas' API to upload a policy. In this case we recommend just using a text editor on the Phileas instance to create a policy.

    When using a text editor to create and edit a policy, be sure to save the policy often. Frequent saving can make editing a policy easier.

    We also recommend considering to place your policy directory under source control to have a history and change log of your policies.

    "},{"location":"evaluating-performance/#creating-a-policy","title":"Creating a Policy","text":"

    Make a copy of the default policy, and we will modify the copy for our needs.

    cp /opt/Phileas/policies/default.json /opt/Phileas/policies/evaluation.json

    Now open /opt/Phileas/policies/evaluation.json in a text editor. (The content of evaluation.json will be similar to what's shown below but may have minor differences between different versions of Phileas.)

    {\n   \"name\": \"default\",\n   \"identifiers\": {\n      \"emailAddress\": {\n         \"emailAddressFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      },\n      \"phoneNumber\": {\n         \"phoneNumberFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n

    The first thing we need to do is to set the name of the policy. Replace default with evaluation and save the file.

    "},{"location":"evaluating-performance/#identifying-the-filters-you-need","title":"Identifying the Filters You Need","text":"

    The rest of the file contains the filters that are enabled in the default policy. We need to make sure that each type of sensitive information that you want to redact is represented by a filter in this file. Look through the rest of the policy and determine which filters are listed that you do not need and also which filters you do need that are not listed.

    "},{"location":"evaluating-performance/#disabling-filters-we-do-not-need","title":"Disabling Filters We Do Not Need","text":"

    If a filter is listed in the policy and you do not need the filter you have two options. You can either delete those lines from the policy and save the file, or you can set the filter's enabled property to false. Using the enabled property allows you to keep the filter configuration in the policy in case it is needed later but both options have the same effect.

    "},{"location":"evaluating-performance/#enabling-filters-not-in-the-default-policy","title":"Enabling Filters Not in the Default Policy","text":"

    Let's say you want to redact bitcoin addresses. The bitcoin address filter is not in the default policy. To add the bitcoin address filter we will refer to Phileas' documentation on the bitcoin address filter, get the configuration, and copy it into the policy.

    From the bitcoin address filter documentation we see the configuration for the bitcoin address filter is:

          \"bitcoinAddress\": {\n         \"bitcoinAddressFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n

    We can copy this configuration and paste it into our policy:

    {\n   \"name\": \"evaluation\",\n   \"identifiers\": {\n      \"bitcoinAddress\": {\n         \"bitcoinAddressFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      },\n      \"emailAddress\": {\n         \"emailAddressFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      },\n      \"phoneNumber\": {\n         \"phoneNumberFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n

    The order of the filters in the policy does not matter and has no impact on performance. We typically place the filters in the policy alphabetically just to improve readability.

    Repeat these steps until you have added a filter for each of the types of sensitive information you want to redact. Typically, the default redaction strategy and redactionFormat values for each filter should be fine for evaluation.

    When finished modifying the policy, save the file and close the text editor. Now restart Phileas for the policy changes to be loaded:

    sudo systemctl restart Phileas\n
    "},{"location":"evaluating-performance/#submitting-text-for-redaction","title":"Submitting Text for Redaction","text":"

    With our policy in place we can now send text to Phileas for redaction using that policy:

    PhileasConfiguration phileasConfiguration = ConfigFactory.create(PhileasConfiguration.class);\n\nFilterService filterService = new PhileasFilterService(phileasConfiguration);\n\nFilterResponse response = filterService.filter(policies, context, documentId, body, MimeType.TEXT_PLAIN);\n

    The explain API endpoint produces a detailed description of the redaction. The response will include a list of spans that contain the start and stop positions of redacted text and the type of sensitive information that was redacted. Using this information we can compare the redacted information to our annotated file to calculate precision and recall metrics.

    "},{"location":"evaluating-performance/#calculating-precision-and-recall","title":"Calculating Precision and Recall","text":"

    Now we can calculate the precision and recall metrics.

    • Precision is the number of true positives divided by the number true positives plus false positives.
    • Recall is the number of true positives divided by the number of false negatives plus true positives.

    • The F-1 score is the harmonic mean of precision and recall.

    "},{"location":"settings/","title":"Settings","text":"

    Phileas has settings to control how it operates. The settings and how to configure each are described below.

    The configuration for the types of sensitive information that Phileas identifies are defined in filter policies outside of Phileas' configuration properties described on this page.

    "},{"location":"settings/#configuring-phileas","title":"Configuring Phileas","text":""},{"location":"settings/#the-phileas-settings-file","title":"The Phileas Settings File","text":"

    Phileas looks for its settings in an application.properties file.

    "},{"location":"settings/#using-environment-variables","title":"Using Environment Variables","text":"

    Properties set via environment variables take precedence over properties set in Phileas' settings file.

    All following properties can also be set as environment variables by prepending PHILTER_ to the property name and changing periods to underscores. For example, the property filter.profiles.directory can be set using the environment variable PHILTER_FILTER_PROFILES_DIRECTORY by:

    export PHILTER_FILTER_PROFILES_DIRECTORY=/profiles/\n

    Using environment variables to configure Phileas instead of using Phileas' settings file can allow for easier configuration management when deploying Phileas.

    "},{"location":"settings/#policies","title":"Policies","text":"Setting Description Allowed Values Default Value filter.policies.directory The directory in which to look for policies. Any valid directory path. ./policies/"},{"location":"settings/#span-disambiguation","title":"Span Disambiguation","text":"

    These values configure Phileas' span disambiguation feature to determine the most appropriate type of sensitive information when duplicate spans are identified. In a deployment of multiple Phileas instances, you must enable the cache service for span disambiguation to work as expected.

    Description Allowed Values Default Value span.disambiguation.enabled Whether or not to enable span disambiguation. true, false false"},{"location":"settings/#cache-service","title":"Cache Service","text":"

    The cache service is required to use consistent anonymization and policies stored in Amazon S3. Phileas supports Redis as the backend cache. When Redis is not used, an in-memory cache is used instead. The in-memory cache is not recommended because all contents will be stored in memory on the local Phileas instance.

    The cache will contain sensitive information. It is important that you take the necessary precautions to secure the cache itself and all communication between Phileas and the cache.

    Setting Description Allowed Values Default Value cache.redis.enabled Whether or not to use Redis as the cache. true, false false cache.redis.host The hostname or IP address of the Redis cache. Any valid Redis endpoint. None cache.redis.port The Redis cache port. Any valid port. 6379 cache.redis.auth.token The Redis auth token. Any valid token. None cache.redis.ssl Whether or not to use SSL for communication with the Redis cache. true, false false

    The following Redis settings are only required when using a self-signed SSL certificate.

    Setting Description Allowed Values Default Value cache.redis.truststore The path to the trust store. Any valid file path. None cache.redis.truststore.password The trust store password. Any valid file path. None cache.redis.keystore The path to the keystore. Any valid file path. None cache.redis.keystore.password The keystore password. Any valid file path. None"},{"location":"settings/#advanced-settings","title":"Advanced Settings","text":"

    In most cases the settings below do not need changed. Contact us for more information on any of these settings.

    Setting Description Allowed Values Default Value ner.timeout.sec Controls the timeout in seconds when performing name entity recognition. Longer text may require longer processing times. An integer value 600 ner.max.idle.connections The maximum number of idle connections to maintain for the named entity recognition. More connections may improve performance in some cases. An integer value. 30 ner.keep.alive.duration.ms The amount of time in milliseconds to keep named entity recognition connections alive. Longer text may require longer processing times. An integer value. 60"},{"location":"system_requirements/","title":"System Requirements","text":"

    When launched from a cloud marketplace, Philter is pre-configured and contains all required dependencies.

    Philter requires the following:

    • 2 vCPU (e.g., m5.large instance type on AWS)
    • 8 GB of RAM
    • Java 17
    "},{"location":"upgrading/","title":"Upgrading Philter","text":"

    We recommend reviewing the Philter Release Notes prior to upgrading.

    "},{"location":"upgrading/#upgrading-from-a-2x-version","title":"Upgrading from a 2.x Version","text":"

    Upgrading Philter to the newest version requires moving Philter's configuration to the new version of Philter. To upgrade Philter from a 2.x version, follow the steps below.

    1. Launch a new instance of the newest version of Philter.
    2. Copy your policies from /opt/philter/policies to the new instance.
    3. Copy your /opt/philter/philter.properties to the new instance.
    4. Copy your /opt/philter/philter-ui.properties to the new instance.
    5. Replace the new virtual machine's properties file with your copy from step 1.
    6. Copy your policies from /opt/philter/policies to the new instance.
    7. If you have configured any SSL certificates for Philter, copy those files over to the new instance.
    8. Restart Philter: sudo systemctl restart philter.service && sudo systemctl restart philter-ui.service && sudo systemctl restart philter-ner.service
    9. Test the new Philter virtual machine to make sure it is behaving as expected.
    10. Decommission the old Philter instance.
    "},{"location":"upgrading/#upgrading-from-a-1x-version","title":"Upgrading from a 1.x Version","text":"

    Upgrading Philter to the newest version requires moving Philter's configuration to the new version of Philter. To upgrade Philter from a 1.x version, follow the steps below.

    1. Make local copies of your current Philter's properties files.

    2. /opt/philter/philter.properties (prior to 1.10.1 the filename was /opt/philter/application.properties)

    3. /opt/philter/philter-ui.properties (not applicable prior to version 1.10)

    4. Launch a new instance of the newest version of Philter.

    5. Replace the new virtual machine's properties file with your copy from step 1.
    6. Restart Philter: sudo systemctl restart philter.service sudo systemctl restart philter-ui.service sudo systemctl restart philter-ner.service
    7. Test the new Philter virtual machine to make sure it is behaving appropriately.
    8. Decommission the old Philter instance.
    "},{"location":"deidentification/bucketing/","title":"Bucketing","text":""},{"location":"deidentification/date-shifting/","title":"Date Shifting","text":""},{"location":"deidentification/deidentification/","title":"De-identification Methods","text":"

    There are several ways data can be de-identified, and which you use depends on the types of data you want to de-identify and your use-case for de-identifying the data. The terminology around the different methods is often used interchangeably, but there are differences between each method.

    In this User's Guide, we may use the terms filter and redact interchangeably.

    In Philter, de-identification methods vary for each type of sensitive information. For example, all types can be replaced or redacted, but only dates can be shifted and only zip codes can be truncated. How a de-identification method is applied by Philter is called a filter strategy. Each type of sensitive information can have one or more filter strategies, and the combination of the filter strategies you select is called a policy. A policy determines how a document will be de-identified.

    The following is a list of de-identification methods that describes how each method works and its applicability to our Philter software. De-identifying a document is likely to require a combination of the following methods. For instance, you may want to redact names, encrypt credit card numbers, and shift appointment dates.

    De-identification MethodDescriptionReplacementReplaces sensitive information with a defined value. For example, you might want to replace a credit card number with the literal value \"CREDIT_CARD_NUMBER\".Redaction and MaskingRemoves sensitive information. Our Philter software gives you a choice of how to remove the sensitive information, whether it is by replacing it with ***** (masking) or by some other set of characters.EncryptionEncrypts sensitive information.Date ShiftingShifts dates either forward or backward by some interval.BucketingCategorizes data into buckets based on the data. Examples of bucketing is Philter can bucket dates into years, and zip codes by population.

    A difference between Philter and other services is that Philter does not send your data to a third-party for de-identification. Philter runs in your cloud and your data stays in your cloud.

    "},{"location":"deidentification/encryption/","title":"Encryption","text":""},{"location":"deidentification/pii_phi_nppi/","title":"PII, PHI, and NPPI","text":"

    Philter has many predefined types of sensitive information called filters that can be redacted. The individual types are described below.

    • Personally identifiable information (PII) is any information that could potentially be used to identify a specific person.
    • Protected health information (PHI) is any information about health status, provision of health care, or payment for health care that can be linked to an individual. The Health Insurance Portability And Accountability Act (HIPAA) defines 18 types of PHI.
    "},{"location":"deidentification/pii_phi_nppi/#predefined-types-of-pii-and-phi","title":"Predefined Types of PII and PHI","text":"

    The types of sensitive information that Philter will identify is customizable. For example, if you are not interested in VIN numbers you can have Philter ignore them. This configuration is performed through Policies.

    Because Philter only operates on text, the biometric identifiers and face images outlined in the HIPAA regulations as PHI are not applicable to Philter. The types of sensitive information and how Philter identifies each one is listed in the table below.

    Type of PHI How Philter Identifies It 1

    Names

    Ex: John Smith, Jane Doe

    • Philter identifies names in natural language text using state of the art machine learning algorithms and natural language processing techniques to identify named-person entities.
    • Philter also uses common first name and surname dictionaries with spellcheck capability to identify common names per the US census.
    2

    All geographical identifiers smaller than a state, except for the initial three digits of a zip code if, according to the current publicly available data from the U.S. Bureau of the Census: the geographic unit formed by combining all zip codes with the same three initial digits contains more than 20,000 people; and the initial three digits of a zip code for all such geographic units containing 20,000 or fewer people is changed to 000

    Ex: 85055, 90213-1544

    • Philter can identify many US cities, US counties, and all US states (full names and abbreviations).
    • Philter uses a dictionary with spelling correction to identify misspelled locations.
    • Filter conditions in policies can be used to apply logic based on zip code population according to the US census. (Filter strategies can truncate the zip code.)
    • Philter also uses state of the art machine learning algorithms and natural language processing techniques to identify locations.
    • Philter includes a dictionary of some hospital locations to quickly identify medical locations.
    3

    Dates (other than year) directly related to an individual

    Ex: 10-10-2000. 10/10/2000, October 10, 2000

    • Philter can identify dates in many formats such as with hypens (10-10-2000), with slashes (10/10/2000), or spelled out (May 1, 2000).
    • Philter can also identify ages, e.g. 57 years, 57yrs.
    4

    Phone Numbers

    Ex: (304) 555-5555, 304-555-5555, 1-800-123-4567

    • Philter can identify phone numbers in many formats. (Philter is currently limited to US phone numbers.)
    5

    Fax numbers

    Ex: (304) 555-5555, 304-555-5555, 1-800-123-4567

    • Philter can identify fax numbers in many formats. (Philter is currently limited to US phone numbers.)
    6

    Email addresses

    Ex: john.fake.address@hotmail.com

    • Philter can identify email addresses per the email standard (summarized on Wikipedia).
    7

    Social Security numbers

    Ex: 123-45-6789, 123456789

    • Philter can identify social security numbers (SSNs) in multiple formats such as with spaces and hyphens.
    8

    Medical record numbers

    Ex: 86637729, AB473-6021, 473-6AB021

    • Philter can identify alphanumeric identifiers.
    9

    Health insurance beneficiary numbers

    Ex: 86637729, AB473-6021, 473-6AB021

    • Philter can identify alphanumeric identifiers.
    10

    Account numbers

    Ex: 86637729, AB473-6021, 473-6AB021

    • Philter can identify alphanumeric identifiers, as well as credit card numbers from all major types of credit cards.
    11

    Certificate/license numbers

    Ex: 86637729, AB473-6021, 473-6AB021

    • Philter can identify alphanumeric identifiers.
    12

    Vehicle identifiers and serial numbers, including license plate numbers

    Ex: WBAPM7G50ANL19218, 1GBJC34K3RE176005

    • Philter can identify vehicle serial numbers (17-character VIN numbers). License plates will be identified as alphanumeric identifiers.
    13

    Device identifiers and serial numbers

    Ex: H3SNPUHYEE7JD3H, 33778376

    • Philter can identify alphanumeric identifiers.
    14

    Web Uniform Resource Locators (URLs)

    Ex: myhomepage.com, http://myhomepage.com/folder/page.html, www.myhomepage.com/folder/page.html

    • Philter can identify URLs adhering to the URL naming standard.
    15

    Internet Protocol (IP) address numbers

    Ex: 127.0.0.1, 192.168.3.58, 2001:0db8:85a3:0000:0000:8a2e:0370:7334

    • Philter can identify IPv4 and IPv6 addresses.
    16 Biometric identifiers, including finger, retinal and voice prints
    • Not applicable \u2013 Philter only identifies PHI in text.
    17 Full face photographic images and any comparable images
    • Not applicable \u2013 Philter only identifies PHI in text.
    18

    Any other unique identifying number, characteristic, or code except the unique code assigned by the investigator to code the data

    Ex: 86637729, AB473-6021, 473-6AB021

    • Philter can identify alphanumeric identifiers.
    "},{"location":"deidentification/redaction-and-masking/","title":"Redaction and Masking","text":"

    Redaction and masking are two methods of de-identification that are often used interchangeably. The term redaction refers to removing a sensitive value from a document. When we hear the term redaction we often think of an image of a document with black bars across pieces of the text.

    Masking is similar to redaction but allows for configuring how the sensitive value is removed. The most common example is using asterisks (i.e. ******) in place of a sensitive value.

    "},{"location":"deidentification/replacement/","title":"Replacement","text":"

    Replacement is a method of de-identification that simply replaces a sensitive value with another value. Replacement is useful when the sensitive value is not needed once the document has been de-identified. Philter can replace a sensitive value with a preset value or with a random value.

    In Philter's filter strategies, replacement is achieved by using the strategy to REDACT, STATIC_REPLACE , or RANDOM_REPLACE .

    "},{"location":"other_features/alerts/","title":"Alerts","text":"

    Phileas can optionally generate alerts when a particular type of sensitive information is identified.

    "},{"location":"other_features/alerts/#alert-conditions","title":"Alert Conditions","text":"

    In a policy, each type of sensitive information can have zero or more filter strategies. Each filter strategy can optionally have a condition associated with it. When a condition is present, the filter strategy will only be satisfied when the condition is satisfied. For example, a condition may be created to only filter phone numbers that start with the digits 123 or only filter names that start with John. Filter strategy conditions give you granular control over the filtering process.

    When a filter strategy condition is satisfied, Phileas can optionally generate an alert. This feature allows you to be notified when a particular type of sensitive information is identified.

    "},{"location":"other_features/alerts/#enabling-alerts","title":"Enabling Alerts","text":"

    Alerts are enabled on a per-condition basis. For instance, given the following policy to identify email addresses, a condition has been added to only match the email address test@test.com. Because of the property alert set to true, an alert will be generated when this condition is satisfied. By default, the alert property is set to false disabling alerts for the condition.

    {\n  \"name\": \"email-address-alert\",\n  \"identifiers\": {\n    \"emailAddress\": {\n      \"emailAddressFilterStrategies\": [\n        {\n          \"id\": \"my-email-strategy\",\n          \"strategy\": \"REDACT\",\n          \"redactionFormat\": \"{{{REDACTED-%t}}}\",\n          \"condition\": \"token == \\\"test@test.com\\\"\",\n          \"alert\": true\n        }\n      ]\n    }\n  }\n}\n
    "},{"location":"other_features/alerts/#structure-of-an-alert","title":"Structure of an Alert","text":"

    An alert contains the following information:

    Property Name Description id A unique ID for the alert formatted as an UUID. filterProfile The name of the policy triggering the alert. strategyId The ID of the filter strategy triggering the alert. In the example above the id would be my-email-strategy. context The context. documentId The ID of the document which triggered the alert. filterType The filter type (\"email-address\", \"credit-card\", etc.) triggering the alert. date A timestamp when the alert was generated formatted as yyyy-MM-dd'T'HH:mm:ss.SSS'Z'."},{"location":"other_features/alerts/#retrieving-and-deleting-alerts","title":"Retrieving and Deleting Alerts","text":"

    The alerts that Phileas has generated are available through Phileas' alerts API. This API allows for retrieving and deleting alerts. Using this API you can build sophisticated notification systems around Phileas' capabilities.

    "},{"location":"other_features/anonymization/","title":"Consistent Anonymization","text":"

    Anonymization in the context of Phileas is the process of replacing certain values with random but similar values. For example, the identified name of \u201cJohn Smith\u201d may be replaced with \u201cDavid Jones\u201d, or an identified phone number of 123-555-9358 may be replaced by 842-436-2042. A VIN number will be replaced by a 17 character randomly selected VIN number that adheres to the standard for VIN numbers.

    Anonymization is useful in instances where you want to remove sensitive information from text without changing the meaning of the text. Anonymization can be enabled for each type of sensitive information in the policy by setting the filter strategy to RANDOM_REPLACE. (See Policies for more information.)

    "},{"location":"other_features/anonymization/#consistent-anonymization_1","title":"Consistent Anonymization","text":"

    Consistent anonymization refers to the process of always anonymizing the same sensitive information with the same replacement values. For example, if the name \"John Smith\" is randomly replaced with \"Pete Baker\", all other occurrences of \"John Smith\" will also be replaced by \"Pete Baker.\"

    Consistent anonymization can be done on the document level or on the context level. When enabled on the document level, \"John Smith\" will only be replaced by \"Pete Baker\" in the same document. If \"John Smith\" occurs in a separate document it will be anonymized with a different random name. When enabled on the context level, \"John Smith\" will be replaced by \"Pete Baker\" whenever \"John Smith\" is found in all documents in the same context.

    Enabling consistent anonymization on the context level requires a cache to store the sensitive information and the corresponding replacement values. If a single instance of Phileas is running, its internal cache service (enabled by default) is the best choice and no additional configuration is required.

    If multiple instances of Phileas are deployed together, Phileas requires access to a Redis cache service as shown below. See Phileas' Settings on how to configure the cache.

    When Phileas is deployed in a cluster, a Redis cache is required to enable consistent anonymization.

    The anonymization cache will contain PHI. It is important that you take the necessary precautions to secure the cache and all communication to and from the cache.

    "},{"location":"other_features/span_disambiguation/","title":"Span Disambiguation","text":"

    Span disambiguation is an optional feature in Phileas that is disabled by default. Refer to Phileas' Settings to enable and configure span disambiguation.

    In Phileas, a span is a piece of the input text that Phileas has identified as sensitive information. A span has a start and end positions, a confidence, a type, and other attributes. Ideally, each piece of identified sensitive information will only have a single span associated with it. In this case, the type of sensitive information is unambiguous. The goal of span disambiguation is provide more accurate filtering by removing the potential ambiguities in the types of sensitive information for duplicate spans.

    However, sometimes a piece of text can be identified by multiple spans, each having a different type of sensitive information. In an example hypothetical scenario, let's say given the input text My SSN is 123456789. , Phileas identifies 123456789 as an SSN and as a phone number. This type of scenario can be quite common, and its likelihood increases as the number of enabled filters in a policy increase.

    "},{"location":"other_features/span_disambiguation/#how-phileas-span-disambiguation-works","title":"How Phileas' Span Disambiguation Works","text":"

    When we read the sentence My SSN is 123456789. we can tell the span in question should be identified as an SSN because we can look at the text surrounding the span. We use the surrounding words to deduce the correct type of sensitive information for 123456789.

    That is exactly how Phileas' span disambiguation works. When presented with identical spans differing only by the type of sensitive information, Phileas looks at the text surrounding the span in question in combination with the previous spans it has seen in the same context to determine which type of sensitive information is most likely to be correct. Phileas then removes the ambiguous spans from the results and replaces them with a single span.

    "},{"location":"other_features/span_disambiguation/#improves-over-time","title":"Improves Over Time","text":"

    Because Phileas is able to consider previously seen text to make its decision concerning ambiguous spans, Phileas' span disambiguation gets \"smarter\" as more text is filtered. This is because Phileas will have more text to consider in its calculations.

    "},{"location":"other_features/span_disambiguation/#more-details","title":"More Details","text":""},{"location":"other_features/span_disambiguation/#span-disambiguation-and-confidence-values","title":"Span Disambiguation and Confidence Values","text":"

    Span disambiguation is only invoked for spans that differ only by the type of sensitive information. This means the span's location (start and end positions), confidence, and all other values must match. If two spans have identical locations but have different confidence values, span disambiguation will not be applied and the span having the highest confidence will be used.

    "},{"location":"other_features/span_disambiguation/#cache-service","title":"Cache Service","text":"

    When multiple application using Phileas are deployed alongside each other behind a load balancer, Phileas' cache service should be configured and enabled. Phileas will store the information needed to disambiguate spans in the cache such that the information is available to each instance of Phileas. If only a single instance of Phileas is running then the cache service is not required, however, the information needed to disambiguate spans will be stored in memory and will be lost when Phileas is stopped or restarted. Because of this, we recommend the cache service always be used unless there is a specific reason not to.

    "},{"location":"other_features/span_disambiguation/#fine-tuning-the-span-disambiguation","title":"Fine-Tuning the Span Disambiguation","text":"

    There are properties available to fine-tune how the span disambiguation operates. These properties are not documented because improper use of the properties could have a negative impact on performance. We will be glad to walk through these properties upon request.

    "},{"location":"policies/filter_policies/","title":"Filter Policies","text":"

    The types of sensitive information identified by Phileas and how that information is de-identified are controlled through policies. A policy is a file stored under Phileas\u2019s policies directory, which by default is located at /opt/Phileas/policies/. You can have an unlimited number of policies.

    Each policy has a name that is used by Phileas to apply the appropriate de-identification methods. The name is passed to Phileas\u2019s API along with the text to be filtered when submitting text to Phileas. This provides flexibility and allows you to de-identify different types of documents in differing manners with a single instance of Phileas. For example, you may have a policy for bankruptcy documents and a separate policy for financial documents.

    There are sample policies available for immediate use or customization to fit your use-cases.

    "},{"location":"policies/filter_policies/#the-structure-of-a-policy","title":"The Structure of a Policy","text":"

    A policy:

    • Must have a name that uniquely identifies it.
    • Must have a list of identifiers that are filters for sensitive information.
      • Each identifier , or filter, can have zero or more filter strategies. A filter strategy tells Phileas how to manipulate that type of sensitive information when it is identified.
    • Can have an optional list of terms or patterns.
    • Can have encryption keys to support encryption of sensitive information.
    "},{"location":"policies/filter_policies/#an-example-policy","title":"An Example Policy","text":"

    The following is an example policy. In the example below you can see the types of sensitive information that are enabled and the strategy for manipulating each type when found. This policy identifies email addresses and phone numbers and redacts each with the format given.

    {\n   \"name\": \"email-and-phone-numbers\",\n   \"identifiers\": {\n      \"emailAddress\": {\n         \"emailAddressFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      },\n      \"phoneNumber\": {\n         \"phoneNumberFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n

    When an email address is identified by this policy, the email address is replaced with the text {{{REDACTED-email-address}}}. The %t gets replaced by the type of the filter. Likewise, when a phone number is found it is replaced with the text {{{REDACTED-phone-number}}}. You are free to change the redaction formats to whatever fits your use-case. See Filter Strategies for all replacement options.

    The name of the policy is email-and-phone-numbers. Policies can be named anything you like but their names must be unique from all other policies. As a best practice, the policy should be saved as [name].json, e.g. email-and-phone-numbers.json.

    "},{"location":"policies/filter_policies/#applying-a-policy-to-text","title":"Applying a Policy to Text","text":"

    To use this policy we will save it as /opt/Phileas/profiles/email-and-phone-numbers.json. We must restart Phileas for the new profile to be available for use. To apply the policy we will pass the policy's name to Phileas when making a filter request, as shown in the example request below.

    curl -k -X POST \"https://localhost:8080/api/filter?c=context&p=email-and-phone-numbers\" \\\n  -d @file.txt -H Content-Type \"text/plain\"\n

    In this command, we have provided the parameter p along with a value that is the name of the policy we want to use for this request. If we had multiple policies in Phileas we could choose a different policy for this request simply by changing the name given to the parameter p. For more details see Phileas\u2019s API.

    Phileas will process the contents of file.txt by applying the policy named email-and-phone-numbers. As we saw in the policy above, this policy redacts email addresses and phone numbers. Phileas will return the redacted text in response to the API call.

    To manipulate the sensitive information by methods other than redaction, see the Filter Strategies.

    "},{"location":"policies/filter_strategies/","title":"Filter Strategies","text":"

    A filter strategy defines how sensitive information identified by Phileas should be manipulated, whether it is redacted, replaced, encrypted, or manipulated in some other fashion.

    In a policy, you list the types of sensitive information that should be filtered. How Phileas replaces each type of sensitive information is specific to each type. For instance, zip codes can be truncated based on the leading digits or zip code population while phone numbers are redacted. These replacements are performed by \"filter strategies.\"

    Each filter can have one or more filter strategies and conditions can be used to determine when to apply each filter strategy.

    A sample policy containing a filter strategy is shown below. In this example, email addresses will be redacted.

    {\n   \"name\": \"email-address\",\n   \"identifiers\": {\n      \"emailAddress\": {\n         \"emailAddressFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n

    Most of the filter strategies apply to all types of data, however, some filter strategies only apply to a few types. For example, the TRUNCATE filter strategy only applies to a zip code filter.

    "},{"location":"policies/filter_strategies/#filter-strategies_1","title":"Filter Strategies","text":"

    The filter strategies are described below. Each filter type can specify zero or more filter strategies. When no filter strategies are given, Phileas will default to REDACT for that filter type. When multiple filter strategies are given for a single filter type, the filter strategies will be applied in order as they are listed in the policy, top to bottom.

    • REDACT
    • CRYPTO_REPLACE(AES encryption)
    • HASH_SHA256_REPLACE(SHA512 encryption)
    • FPE_ENCRYPT_REPLACE(Format-preserving encryption)
    • RANDOM_REPLACE
    • STATIC_REPLACE
    • TRUNCATE
    • ZERO_LEADING
    "},{"location":"policies/filter_strategies/#the-redact-filter-strategy","title":"The REDACT Filter Strategy","text":"

    The REDACT filter strategy replaces sensitive information with a given redaction format. You can put variables in the redaction format that Phileas will replace when performing the redaction.

    The available redaction variables are:

    Redaction Variable Description %t Will be replaced with the type of sensitive information. This is to allow you to know the type of sensitive information that was identified and redacted. %l Will be replaced by the given classification for the type of sensitive information. %v Will be replaced by the original value of the sensitive text. With %v you can annotate sensitive information instead of masking or removing it.

    To redact sensitive information by replacing it with the type of sensitive information, the redaction format would be REDACTED-%t.

    An example filter using the REDACT filter strategy:

    {\n   \"name\": \"email-address\",\n   \"identifiers\": {\n      \"emailAddress\": {\n         \"emailAddressFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/filter_strategies/#the-crypto_replace-filter-strategy-idcrypto","title":"The CRYPTO_REPLACE Filter Strategy {id=\"crypto\"}","text":"

    The CRYPTO_REPLACE filter strategy replaces each identified piece of sensitive information by encrypting it using the AES encryption algorithm. To use this filter strategy, the policy must include the details of the encryption key as shown below:

    {\n   \"name\":\"sample-profile\",\n   \"crypto\": {\n     \"key\": \"....\",\n     \"iv\": \"....\"\n   },\n   ...\n

    In the snippet of a policy shown above, a crypto element is is defined with a key and an initialization vector (iv). These two items are required to encrypt the sensitive information. To generate a key, run the following command:

    openssl enc -e -aes-256-cbc -a -salt -P\n

    You will be prompted to enter an encryption password. Once entered, the values of the key and iv will be shown. Copy and paste those values into the policy.

    An example policy using the CRYPTO_REPLACE filter strategy:

    {\n   \"name\": \"email-address\",\n   \"crypto\": {\n     \"key\": \"....\",\n     \"iv\": \"....\"\n   },\n   \"identifiers\": {\n      \"emailAddress\": {\n         \"emailAddressFilterStrategies\": [\n            {\n               \"strategy\": \"CRYPTO_REPLACE\"\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/filter_strategies/#the-hash_sha256_replace-filter-strategy-idhash","title":"The HASH_SHA256_REPLACE Filter Strategy {id=\"hash\"}","text":"

    The HASH_SHA256_REPLACE filter strategy replaces sensitive information with the SHA256 hash value of the sensitive information. To append a random salt value to each value prior to hashing, set the salt property to true. The salt value used will be returned in the explain response from Phileas' API.

    An example policy using the HASH_SHA256_REPLACE filter strategy:

    {\n   \"name\": \"email-address\",\n   \"identifiers\": {\n      \"emailAddress\": {\n         \"emailAddressFilterStrategies\": [\n            {\n               \"strategy\": \"HASH_SHA256_REPLACE\"\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/filter_strategies/#the-fpe_encrypt_replace-filter-strategy-idfpe","title":"The FPE_ENCRYPT_REPLACE Filter Strategy {id=\"fpe\"}","text":"

    The FPE_ENCRYPT_REPLACE filter strategy uses format-preserving encryption (FPE) to encrypt the sensitive information. Phileas uses the FF3-1 algorithm for format-preserving encryption. The FPE_ENCRYPT_REPLACE filter strategy requires a key and a tweak value. These values control the format-preserving encryption. For more information on these values and format-preserving encryption, refer to the resources below:

    • https://nvlpubs.nist.gov/nistpubs/SpecialPublications/NIST.SP.800-38Gr1-draft.pdf
    • https://nvlpubs.nist.gov/nistpubs/specialpublications/nist.sp.800-38g.pdf

    An example policy using the FPE_ENCRYPT_REPLACE filter strategy:

    {\n   \"name\": \"credit-cards\",\n   \"identifiers\": {\n      \"creditCardNumbers\": {\n         \"creditCardNumbersFilterStrategies\": [\n            {\n               \"strategy\": \"FPE_ENCRYPT_REPLACE\",\n               \"key\": \"...\",\n               \"tweak\": \"...\"\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/filter_strategies/#the-random_replace-filter-strategy-idrandom","title":"The RANDOM_REPLACE Filter Strategy {id=\"random\"}","text":"

    Replaces the identified text with a fake value but of the same type. For example, an SSN will be replaced by a random text having the format ###-##-####, such as 123-45-6789. An email address will be replaced with a randomly generated email address. Available to all filter types.

    An example policy using the RANDOM_REPLACE filter strategy:

    {\n   \"name\": \"email-address\",\n   \"identifiers\": {\n      \"emailAddress\": {\n         \"emailAddressFilterStrategies\": [\n            {\n               \"strategy\": \"RANDOM_REPLACE\"\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/filter_strategies/#the-static_replace-filter-strategy-idstatic","title":"The STATIC_REPLACE Filter Strategy {id=\"static\"}","text":"

    Replaces the identified text with a given static value. Available to all filter types.

    An example policy using the STATIC_REPLACE filter strategy:

    {\n   \"name\": \"email-address\",\n   \"identifiers\": {\n      \"emailAddress\": {\n         \"emailAddressFilterStrategies\": [\n            {\n               \"strategy\": \"STATIC_REPLACE\",\n               \"staticReplacement\": \"some new value\"\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/filter_strategies/#the-truncate-filter-strategy-idtruncate","title":"The TRUNCATE Filter Strategy {id=\"truncate\"}","text":"

    Available only to zip codes, this strategy allows for truncating zip codes to only a select number of digits. Specify truncateDigits to set the desired number of leading digits to leave. For example, if truncateDigits is 2, the zip code 90210 will be truncated to 90***.

    The TRUNCATE filter strategy is available only to the zip code filter. An example policy using the TRUNCATE filter strategy:

    {\n   \"name\": \"zip-codes\",\n   \"identifiers\": {\n      \"zipCode\": {\n         \"zipCodeFilterStrategies\": [\n            {\n               \"strategy\": \"TRUNCATE\",\n               \"truncateDigits\": 3\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/filter_strategies/#the-zero_leading-filter-strategy-idzero_leading","title":"The ZERO_LEADING Filter Strategy {id=\"zero_leading\"}","text":"

    Available only to zip codes, this strategy changes the first 3 digits of a zip code to be 0. For example, the zip code 90210 will be changed to 00010.

    The ZERO_LEADING filter strategy is only available to zip code filters. An example zip code filter using the ZERO_LEADING filter strategy:

    {\n   \"name\": \"zip-codes\",\n   \"identifiers\": {\n      \"zipCodes\": {\n         \"zipCodeFilterStrategies\": [\n            {\n               \"strategy\": \"ZERO_LEADING\"\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/filter_strategies/#filter-strategy-conditions","title":"Filter Strategy Conditions","text":"

    A replacement strategy can be applied based on the sensitive information meeting one or more conditions. For example, you can create a condition such that only dates of 11/05/2010 are replaced by using the condition token == \"11/05/2010\". The conditions that can be applied vary based on the type of sensitive information. For instance, zip codes can have conditions based on their population. Refer to each specific filter type for the conditions available.

    The following is an example policy for credit cards that contains a condition to only redact credit card numbers that start with the digits 3000:

    {\n  \"name\": \"default\",\n  \"identifiers\": {\n    \"creditCard\": {\n      \"creditCardFilterStrategies\": [\n        {\n          \"condition\": \"token startswith \\\"3000\\\"\",\n          \"strategy\": \"REDACT\",\n          \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n        }\n      ]\n    }\n  }\n}\n
    "},{"location":"policies/filter_strategies/#combining-conditions","title":"Combining Conditions","text":"

    Conditions can be joined through the use of the and keyword. When conditions are joined, each condition must be satisfied for the identified text to be filtered. If any of the conditions are not satisfied the identified text will not be filtered. Below is an example joined condition:

    token != \"123-45-6789\" and context == \"my-context\"\n

    This condition requires that the identified text (the token) not be equal to 123-45-6789 and the context be equal to my-context. Both of these conditions must be satisfied for the identified text to be filtered.

    Conversely, conditions can be OR'd through the use of multiple filter strategies. For example, if we want to OR a condition on the token and a condition on the context, we would use two filter strategies:

    \"ssnFilterStrategies\": [\n  {\n    \"condition\": \"token != \\\"123-45-6789\\\"\",\n    \"strategy\": \"REDACT\",\n    \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n  },\n  {\n    \"condition\": \"context == \\\"my-context\\\"\",\n    \"strategy\": \"REDACT\",\n    \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n  }        \n]\n
    "},{"location":"policies/filters/","title":"Filters","text":"

    A \"filter\" corresponds to a type of sensitive information. Phileas has filters for sensitive information such as names, addresses, ages, and lots of others.

    These are predefined filters that are ready to be used as well as custom filters that let you define your own Phileas to identify sensitive information outside of what the predefined filters can identify. An example of a custom filter is a filter to identify your patient account numbers, where the structure of an account number is specific to your organization.

    Each filter is capable of identifying and redacting a specific type of sensitive information. For example, there is a filter for phone numbers, a filter for US social security numbers, and a filter for person's names. You can enable any combination of these filters based on the types of sensitive information you need to redact.

    This section of the documentation describes the filters available in Phileas. The configuration options for each filter can vary due to the type of the sensitive information. For instance, only the zip code filter has a configuration to truncate the zip code.

    A selection of filters and their configurations is called a policy. A policy describes how to de-identify a document.

    "},{"location":"policies/filters/#predefined-filters","title":"Predefined Filters","text":""},{"location":"policies/filters/#persons-names","title":"Person's Names","text":"

    Phileas uses several methods to identify person's names.

    Type Description First Names Identifies common first names Surnames Identifies common surnames Person's Names (NER) Identifies full names using natural language processing analysis Physician's Names (NER) Identifies physican names using natural language processing analysis"},{"location":"policies/filters/#other-filters","title":"Other Filters","text":"Type Description Ages Identifies ages such as 3.5 years old Bank Routing Numbers Identifies bank routing numbers Bitcoin Addresses Identifies Bitcoin addresses such as 127NVqnjf8gB9BFAW2dnQeM6wqmy1gbGtv Cities Identifies common cities Counties Identifies common counties Credit Card Numbers Identifies VISA, American Express, MasterCard, and Discover credit card numbers Dates Identifies dates in many formats such as May 22, 1999 Driver's License Numbers Identifies driver's license numbers for all 50 US states Email Addresses Identifies email addresses Hospitals Identifies common hospital names Hospital Abreviations Identifies common hospitals by their name abbreviations IBAN Codes Identifies international bank account numbers IP Addresses Identifies IPv4 and IPv6 addresses MAC Addresses Identifies network MAC addresses Passport Numbers Identifies US passport numbers Phone Numbers Identifies phone numbers Phone Number Extensions Identifies phone numbers Sections Identifies sections in text denoted by SSNs and TINs Identifies US SSNs and TINs States Identifies US state names State Abbreviations Identifies US state names by their abbreviations Tracking Numbers Identifies UPS, FedEx, and USPS tracking numbers URLs Identifies URLs VINs Identifies vehicle identification numbers Zip Codes Identifies US zip codes"},{"location":"policies/filters/#custom-filter-types-of-sensitive-information","title":"Custom Filter Types of Sensitive Information","text":"

    In addition to the predefined types of sensitive information listed in the table above, you can also define your own types of sensitive information. Through custom identifiers and dictionaries, Phileas can identify many other types of information that may be sensitive in your use-case. For example, if you have patient identifiers that follow a pattern of AA-00000 you can define a custom identifier for this sensitive information.

    Phileas can be configured to look identify sensitive information based on custom dictionaries. When a term in the dictionary is found in the text, Phileas will treat the term as sensitive information and apply the given filter strategy.

    Custom dictionaries support fuzziness to accommodate for misspellings. The replacement strategy for a custom dictionary has a sensitivityLevel that controls the amount of allowed fuzziness.

    Type Description Custom Dictionaries Identifies sensitive information based on dictionary values. Custom Identifiers Identifies custom alphanumeric identifiers that may be used for medical record numbers, patient identifiers, account number, or other specific identifier."},{"location":"policies/ignoring_sensitive_information/","title":"Ignoring Sensitive Information","text":"

    Phileas can optionally ignore a list of terms and prevent those terms from being redacted. For example, if the name John Smith is being redacted and you do not want it to be redacted, you can add John Smith to an ignore list. Each time Phileas identifies sensitive information it will check the ignore lists to see if the sensitive information is to be ignored.

    Phileas can ignore terms and patterns per-policy, meaning each policy can have its own unique list of terms or patterns to ignore.

    "},{"location":"policies/ignoring_sensitive_information/#ignore-lists","title":"Ignore Lists","text":"

    Ignore lists can be specified at the policy level and/or for each filter in the policy. When set for the policy, the list of ignored terms will be applied to all filter types. When set for a filter, the list of ignored terms will be applied only to that filter.

    "},{"location":"policies/ignoring_sensitive_information/#ignore-list-for-a-policy","title":"Ignore List for a Policy","text":"

    In the policy shown below, an ignore list is set at the level of the policy. The terms specified in the list will be ignored for all filter types enabled in the policy. Only the terms property is required. The name and caseSensitive properties are optional.

    {\n   \"name\": \"example-policy\",\n   \"ignored\": [\n     {\n       \"name\": \"names to ignore\",\n       \"terms\": [\"john smith\", \"jane doe\"],\n       \"caseSensitive\": false\n     }\n   ],\n   \"identifiers\": {\n      \"emailAddress\": {\n         \"emailAddressFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n

    Terms to be ignored at the policy level can also be read from one or more files located on the local file system. The file must be formatted as one term per line.

    {\n   \"name\": \"example-policy\",\n   \"ignored\": [\n     {\n       \"name\": \"names to ignore\",\n       \"terms\": [\"john smith\", \"jane doe\"],\n       \"files\": [\"/tmp/names.txt\"]\n       \"caseSensitive\": false\n     }\n   ],   \n   \"identifiers\": {\n      \"emailAddress\": {\n         \"emailAddressFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/ignoring_sensitive_information/#ignore-list-for-a-filter","title":"Ignore List for a Filter","text":"

    In the policy shown below, an ignore list is set at the level of a filter. The terms specified in the list will be ignored only for that filter type. Each filter in a policy can have its own list of ignored terms. The terms listed will be ignored case-sensitive, meaning, \"John\" will be ignored if \"John\" is an ignored term but will not be ignored if \"john\" is an ignored term.

    {\n   \"name\": \"example-filter-profile\",\n   \"identifiers\": {\n      \"emailAddress\": {\n         \"ignored\": [\"john smith\", \"jane doe\"],\n         \"emailAddressFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/ignoring_sensitive_information/#ignoring-patterns","title":"Ignoring Patterns","text":"

    Phileas can ignore information based on a regular expression pattern. An example use of this feature is to ignore terms that are present in your text but are dynamic, such as logged timestamps. When using the date filter these timestamps may be identified as being sensitive but you do not want them redacted. With an ignore pattern we can ignore the logged timestamps.

    "},{"location":"policies/ignoring_sensitive_information/#ignore-patterns","title":"Ignore Patterns","text":"

    Ignore patterns can be specified at the policy level and/or at the level of each type of filter. When set at the policy level, the list of ignored patterns will be applied to all filter types. When set for an individual filter, the list of ignored patterns will be applied only to that filter.

    "},{"location":"policies/ignoring_sensitive_information/#ignore-patterns-for-a-policy","title":"Ignore Patterns for a Policy","text":"

    In the policy shown below, ignore patterns are set at the level of the policy. The patterns specified in the list will be ignored for all filter types enabled in the policy.

    {\n   \"name\": \"example-policy\",\n   \"ignoredPatterns\": [\n     {\n       \"name\": \"ignore-room-numbers\",\n       \"pattern\": \"Room [A-Z0-4]{4}\"\n     }\n   ],\n   \"identifiers\": {\n      \"emailAddress\": {\n         \"emailAddressFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/ignoring_sensitive_information/#ignore-patterns-for-a-filter","title":"Ignore Patterns for a Filter","text":"

    In the policy shown below, ignore patterns are set at the level of a filter. The patterns specified in the list will be ignored only for that filter type. Each filter in a policy can have its own list of ignored patterns.

    {\n   \"name\": \"example-policy\",\n   \"identifiers\": {\n      \"emailAddress\": {\n         \"ignoredPatterns\": [\n           {\n             \"name\": \"ignore-room-numbers\",\n             \"pattern\": \"Room [A-Z0-4]{4}\"\n           }\n         ],\n         \"emailAddressFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/sample_policies/","title":"Sample Policies","text":"

    This page lists some sample policies. You can use these policies either as-is or as starting points for customizing them to meet your specific de-identification needs.

    These policies are examples and not an exhaustive list of all the sensitive information Phileas can identify. Items from each of these policies can be combined to make policies to meet your use-cases.

    "},{"location":"policies/sample_policies/#email-addresses-and-phone-numbers","title":"Email Addresses and Phone Numbers","text":"

    This policy finds email addresses and phone numbers and redacts them with {{{REDACTED-email-address}}} and {{{REDACTED-phone-number}}}, respectively.

    {\n  \"name\": \"email-and-phone-numbers\",\n  \"identifiers\": {\n    \"emailAddress\": {\n      \"emailAddressFilterStrategies\": [\n        {\n          \"strategy\": \"REDACT\",\n          \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n        }\n      ]\n    },\n    \"phoneNumber\": {\n      \"phoneNumberFilterStrategies\": [\n        {\n          \"strategy\": \"REDACT\",\n          \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n        }\n      ]\n    }\n  }\n}\n
    "},{"location":"policies/sample_policies/#persons-names-and-ssns","title":"Persons Names and SSNs","text":"

    This policy finds persons names and SSNs and redacts them with {{{REDACTED-entity}}} and {{{REDACTED-ssn}}}, respectively.

    {\n  \"name\": \"persons-names-ssn\",\n  \"identifiers\": {\n    \"ner\": {\n      \"nerFilterStrategies\": [\n        {\n          \"strategy\": \"REDACT\",\n          \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n        }\n      ]\n    },\n    \"ssn\": {\n      \"ssnFilterStrategies\": [\n        {\n          \"strategy\": \"REDACT\",\n          \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n        }\n      ]\n    }\n  }\n}\n
    "},{"location":"policies/sample_policies/#dates-urls-and-vins","title":"Dates, URLs, and VINs","text":"

    This policy finds dates, URLs, and VINs. Dates and URLs are redacted with {{{REDACTED-date}}} and {{{REDACTED-url}}}, respectively. Each VIN number are replaced by a randomly generated VIN number.

    {\n  \"name\": \"dates-urls-vin\",\n  \"identifiers\": {\n    \"date\": {\n      \"dateFilterStrategies\": [\n        {\n          \"strategy\": \"REDACT\",\n          \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n        }\n      ]\n    },\n    \"url\": {\n      \"urlFilterStrategies\": [\n        {\n          \"strategy\": \"REDACT\",\n          \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n        }\n      ]\n    },\n    \"vin\": {\n      \"vinFilterStrategies\": [\n        {\n          \"strategy\": \"RANDOM_REPLACE\"\n        }\n      ]\n    }\n  }\n}\n
    "},{"location":"policies/sample_policies/#ip-addresses","title":"IP Addresses","text":"

    This policy finds IP addresses and replaces each identified IP address with the static text IP_ADDRESS as long as the IP address is not 127.0.0.1. (A condition on the filter strategy sets the IP address requirement.)

    {\n  \"name\": \"ip-addresses\",\n  \"identifiers\": {\n    \"ipAddress\": {\n      \"ipAddressFilterStrategies\": [\n        {\n          \"strategy\": \"STATIC_REPLACE\",\n          \"redactionFormat\": \"IP_ADDRESS\",\n          \"condition\": \"token != \\\"127.0.0.1\\\"\"\n        }\n      ]\n    }\n  }\n}\n
    "},{"location":"policies/sample_policies/#zip-codes","title":"Zip Codes","text":"

    This policy finds ZIP codes starting with 90 and truncates the zip code to just the first two digits.

    {\n  \"name\": \"zip-codes\",\n  \"identifiers\": {\n    \"creditCard\": {\n      \"creditCardFilterStrategies\": [\n        {\n          \"condition\": \"token startswith \\\"90\\\"\",\n          \"strategy\": \"TRUNCATE\",\n          \"truncateDigits\": 2\n        }\n      ]\n    }\n  }\n}\n
    "},{"location":"policies/sample_policies/#enable-text-splitting","title":"Enable Text Splitting","text":"

    This policy enables text splitting for input over 10,000 characters.

    {\n  \"name\": \"default-split-enabled\",\n  \"config\": {\n    \"splitting\": {\n      \"enabled\": true,\n      \"threshold\": 10000,\n      \"method\": \"newline\"\n    }\n  },\n  \"identifiers\": {\n    \"ssn\": {\n      \"ssnFilterStrategies\": [\n        {\n          \"strategy\": \"REDACT\",\n          \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n        }\n      ]\n    }\n  }\n}\n
    "},{"location":"policies/sample_policies/#globally-ignored-terms","title":"Globally Ignored Terms","text":"

    This policy has a list of globally ignored terms.

    {\n  \"name\": \"default-global-ignore\",\n  \"ignored\": [\n    {\n      \"name\": \"ignored credit cards\",\n      \"terms\": [\"4111111111111111\", \"0000000000000000\"]\n    }\n  ],\n  \"identifiers\": {\n    \"creditCard\": {\n      \"creditCardFilterStrategies\": [\n        {\n          \"strategy\": \"REDACT\",\n          \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n        }\n      ]\n    }\n  }\n}\n
    "},{"location":"policies/sample_policies/#generating-alerts","title":"Generating Alerts","text":"

    This policy generates an alert when a matching email address is identified.

    {\n  \"name\": \"email-address-alert\",\n  \"identifiers\": {\n    \"emailAddress\": {\n      \"emailAddressFilterStrategies\": [\n        {\n          \"strategy\": \"REDACT\",\n          \"redactionFormat\": \"{{{REDACTED-%t}}}\",\n          \"condition\": \"token == \\\"test@test.com\\\"\",\n          \"alert\": true\n        }\n      ]\n    }\n  }\n}\n
    "},{"location":"policies/filters/common_filters/ages/","title":"Ages","text":""},{"location":"policies/filters/common_filters/ages/#filter","title":"Filter","text":"

    This filter identifies ages such as 3.5 years old in text.

    "},{"location":"policies/filters/common_filters/ages/#required-parameters","title":"Required Parameters","text":"

    This filter has no required parameters.

    "},{"location":"policies/filters/common_filters/ages/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value ageFilterStrategies A list of filter strategies. None enabled When set to false, the filter will be disabled and not applied true ignored A list of terms to be ignored by the filter. None"},{"location":"policies/filters/common_filters/ages/#filter-strategies","title":"Filter Strategies","text":"

    The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of REDACT is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.

    Strategy Description REDACT Replace the sensitive text with a placeholder. RANDOM_REPLACE Replace the sensitive text with a similar, random value. STATIC_REPLACE Replace the sensitive text with a given value. CRYPTO_REPLACE Replace the sensitive text with its encrypted value. HASH_SHA256_REPLACE Replace the sensitive text with its SHA256 hash value."},{"location":"policies/filters/common_filters/ages/#conditions","title":"Conditions","text":"

    Each filter strategy may have one condition. The filter will only be applied when the condition is satisfied. See Conditions for details.

    Conditional Description Operators TOKEN Compares the value of the sensitive text. == , != CONTEXT Compares the filtering context. == , != CONFIDENCE Compares the confidence in the sensitive text against a threshold value. < , <=, > , >=, ==, !="},{"location":"policies/filters/common_filters/ages/#example-policy","title":"Example Policy","text":"
    {\n   \"name\": \"ages-example\",\n   \"identifiers\": {\n      \"age\": {\n         \"ageFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/filters/common_filters/bank-routing-numbers/","title":"Bank Routing Numbers","text":""},{"location":"policies/filters/common_filters/bank-routing-numbers/#filter","title":"Filter","text":"

    This filter identifies bank routing numbers (ABA routing transit numbers) such as 111000025 in text. Identified routing numbers must pass checksum validation.

    "},{"location":"policies/filters/common_filters/bank-routing-numbers/#required-parameters","title":"Required Parameters","text":"

    This filter has no required parameters.

    "},{"location":"policies/filters/common_filters/bank-routing-numbers/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value bankRoutingNumberFilterStrategies A list of filter strategies. None enabled When set to false, the filter will be disabled and not applied true ignored A list of terms to be ignored by the filter. None"},{"location":"policies/filters/common_filters/bank-routing-numbers/#filter-strategies","title":"Filter Strategies","text":"

    The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of REDACT is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.

    Strategy Description REDACT Replace the sensitive text with a placeholder. RANDOM_REPLACE Replace the sensitive text with a similar, random value. STATIC_REPLACE Replace the sensitive text with a given value. CRYPTO_REPLACE Replace the sensitive text with its encrypted value. HASH_SHA256_REPLACE Replace the sensitive text with its SHA256 hash value. FPE_ENCRYPT_REPLACE Replace the sensitive text with a value generated by format-preserving encryption (FPE)"},{"location":"policies/filters/common_filters/bank-routing-numbers/#conditions","title":"Conditions","text":"

    Each filter strategy may have one condition. The filter will only be applied when the condition is satisfied. See Conditions for details.

    Conditional Description Operators TOKEN Compares the value of the sensitive text. == , != CONTEXT Compares the filtering context. == , != CONFIDENCE Compares the confidence in the sensitive text against a threshold value. < , <=, > , >=, ==, !="},{"location":"policies/filters/common_filters/bank-routing-numbers/#example-policy","title":"Example Policy","text":"
    {\n   \"name\": \"bank-routing-number-example\",\n   \"identifiers\": {\n      \"bankRoutingNumber\": {\n         \"bankRoutingNumberFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/filters/common_filters/bitcoin-addresses/","title":"Bitcoin Addresses","text":""},{"location":"policies/filters/common_filters/bitcoin-addresses/#filter","title":"Filter","text":"

    This filter identifies bitcoin addresses such as 1BvBMSEYstWetqTFn5Au4m4GFg7xJaNVN2 in text.

    "},{"location":"policies/filters/common_filters/bitcoin-addresses/#required-parameters","title":"Required Parameters","text":"

    This filter has no required parameters.

    "},{"location":"policies/filters/common_filters/bitcoin-addresses/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value bitcoinAddressFilterStrategies A list of filter strategies. None enabled When set to false, the filter will be disabled and not applied true ignored A list of terms to be ignored by the filter. None"},{"location":"policies/filters/common_filters/bitcoin-addresses/#filter-strategies","title":"Filter Strategies","text":"

    The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of REDACT is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.

    Strategy Description REDACT Replace the sensitive text with a placeholder. RANDOM_REPLACE Replace the sensitive text with a similar, random value. STATIC_REPLACE Replace the sensitive text with a given value. CRYPTO_REPLACE Replace the sensitive text with its encrypted value. HASH_SHA256_REPLACE Replace the sensitive text with its SHA256 hash value. FPE_ENCRYPT_REPLACE Replace the sensitive text with a value generated by format-preserving encryption (FPE)"},{"location":"policies/filters/common_filters/bitcoin-addresses/#conditions","title":"Conditions","text":"

    Each filter strategy may have one condition. See Conditions for details.

    Conditional Description Operators TOKEN Compares the value of the sensitive text. == , != CONTEXT Compares the filtering context. == , != CONFIDENCE Compares the confidence in the sensitive text against a threshold value. < , <=, > , >=, ==, !="},{"location":"policies/filters/common_filters/bitcoin-addresses/#example-policy","title":"Example Policy","text":"
    {\n   \"name\": \"bitcoin-address-example\",\n   \"identifiers\": {\n      \"bitcoinAddress\": {\n         \"bitcoinAddressFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/filters/common_filters/creditcards/","title":"Credit Cards","text":""},{"location":"policies/filters/common_filters/creditcards/#filter","title":"Filter","text":"

    This filter identifies credit cards such as 378282246310005 in text.

    "},{"location":"policies/filters/common_filters/creditcards/#required-parameters","title":"Required Parameters","text":"

    This filter has no required parameters.

    "},{"location":"policies/filters/common_filters/creditcards/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value creditCardFilterStrategies A list of filter strategies. None enabled When set to false, the filter will be disabled and not applied true ignored A list of terms to be ignored by the filter. None onlyValidCreditCardNumbers When set to true, only valid credit card numbers will be filtered. true ignoreWhenInUnixTimestamp When set to true, only credit card numbers that do not match the pattern for a Unix timestamp will be filtered. false"},{"location":"policies/filters/common_filters/creditcards/#filter-strategies","title":"Filter Strategies","text":"

    The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of REDACT is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.

    Strategy Description REDACT Replace the sensitive text with a placeholder. RANDOM_REPLACE Replace the sensitive text with a similar, random value. STATIC_REPLACE Replace the sensitive text with a given value. CRYPTO_REPLACE Replace the sensitive text with its encrypted value. HASH_SHA256_REPLACE Replace the sensitive text with its SHA256 hash value. FPE_ENCRYPT_REPLACE Replace the sensitive text with a value generated by format-preserving encryption (FPE) LAST_4 Replace the sensitive text with just the last four characters of the text."},{"location":"policies/filters/common_filters/creditcards/#conditions","title":"Conditions","text":"

    Each filter strategy may have one condition. See Conditions for details.

    Conditional Description Operators TOKEN Compares the value of the sensitive text. == , != CONTEXT Compares the filtering context. == , != CONFIDENCE Compares the confidence in the sensitive text against a threshold value. < , <=, > , >=, ==, !="},{"location":"policies/filters/common_filters/creditcards/#example-policy","title":"Example Policy","text":"
    {\n   \"name\": \"credit-cards-example\",\n   \"identifiers\": {\n      \"creditcard\": {\n         \"onlyValidCreditCardNumbers\": false,\n         \"creditCardFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/filters/common_filters/dates/","title":"Dates","text":""},{"location":"policies/filters/common_filters/dates/#filter","title":"Filter","text":"

    This filter identifies dates such as May 22, 2014 in text. The supported date formats are:

    Format Example yyyy-MM-d 2020-05-10 MM-dd-yyyy 05-10-2020 M-d-y 5-10-2020 MMM dd May 5 or May 05 MMMM dd, yyyy May 5, 2020 or May 5 2020"},{"location":"policies/filters/common_filters/dates/#required-parameters","title":"Required Parameters","text":"

    This filter has no required parameters.

    "},{"location":"policies/filters/common_filters/dates/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value dateFilterStrategies A list of filter strategies. None enabled When set to false, the filter will be disabled and not applied true ignored A list of terms to be ignored by the filter. None onlyValidDates When set to true, only valid dates will be filtered. false"},{"location":"policies/filters/common_filters/dates/#filter-strategies","title":"Filter Strategies","text":"

    The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of REDACT is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.

    Strategy Description REDACT Replace the sensitive text with a placeholder. RANDOM_REPLACE Replace the sensitive text with a similar, random value. STATIC_REPLACE Replace the sensitive text with a given value. CRYPTO_REPLACE Replace the sensitive text with its encrypted value. HASH_SHA256_REPLACE Replace the sensitive text with its SHA256 hash value. SHIFT Shift the date by a number of months, days, and/or years. SHIFTRANDOM Shift the data by a random number of months, days, and years. RELATIVE Replace the date by a words relative to the date."},{"location":"policies/filters/common_filters/dates/#filter-strategy-options","title":"Filter Strategy Options","text":"

    The following filter strategy options are available for the RELATIVE filter strategy.

    Description Default Value futureDates When true, future dates are replaced by relative words. When false, future dates are redacted. false

    The following filter strategy options are available for the SHIFT filter strategy.

    Option Description Default Value shiftDays The number of days to shift the date. Can be a negative or positive integer. Defaults to 0 if not specified. 0 shiftMinutes The number of minutes to shift the date. Can be a negative or positive integer. Defaults to 0 if not specified. 0 shiftYears The number of years to shift the date. Can be a negative or positive integer. Defaults to 0 if not specified. 0"},{"location":"policies/filters/common_filters/dates/#conditions","title":"Conditions","text":"

    Each filter strategy may have one condition. See Conditions for details.

    Conditional Description Operators TOKEN Compares the value of the sensitive text. == , != TOKEN Compares the sensitive text to some category, e.g. birthdate. is CONTEXT Compares the filtering context. == , != CONFIDENCE Compares the confidence in the sensitive text against a threshold value. < , <=, > , >=, ==, !="},{"location":"policies/filters/common_filters/dates/#differentiating-between-dates-and-birth-dates","title":"Differentiating Between Dates and Birth Dates","text":"

    In some cases it may be necessary to redact birth dates and dates differently. Using conditions it is possible to determine if an identified date is a birth date. The conditional token is birthdate will determine if the identified date (token) is a birth date by analyzing the content surrounding the date.

    "},{"location":"policies/filters/common_filters/dates/#example-policy-to-redact-dates","title":"Example Policy to Redact Dates","text":"

    The following policy redacts dates.

    {\n   \"name\": \"dates-example\",\n   \"identifiers\": {\n      \"date\": {\n         \"onlyValidDates\": false,\n         \"dateFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/filters/common_filters/dates/#example-policy-to-shift-dates","title":"Example Policy to Shift Dates","text":"

    The following policy to shift dates forward by 2 days and 4 months.

    {\n   \"name\": \"dates-example\",\n   \"identifiers\": {\n      \"date\": {\n         \"onlyValidDates\": false,\n         \"dateFilterStrategies\": [\n            {\n               \"strategy\": \"SHIFT\",\n               \"shiftDays\": 2,\n               \"shiftMonths\": 4,\n               \"shiftYears\": 0\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/filters/common_filters/drivers-license-numbers/","title":"Driver's License Numbers","text":""},{"location":"policies/filters/common_filters/drivers-license-numbers/#filter","title":"Filter","text":"

    This filter identifies driver's license numbers such as 194784357 in text. Driver's license number formats for all 50 US states are supported.

    "},{"location":"policies/filters/common_filters/drivers-license-numbers/#required-parameters","title":"Required Parameters","text":"

    This filter has no required parameters.

    "},{"location":"policies/filters/common_filters/drivers-license-numbers/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value driversLicenseFilterStrategies A list of filter strategies. None enabled When set to false, the filter will be disabled and not applied true ignored A list of terms to be ignored by the filter. None"},{"location":"policies/filters/common_filters/drivers-license-numbers/#filter-strategies","title":"Filter Strategies","text":"

    The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of REDACT is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.

    Strategy Description REDACT Replace the sensitive text with a placeholder. RANDOM_REPLACE Replace the sensitive text with a similar, random value. STATIC_REPLACE Replace the sensitive text with a given value. CRYPTO_REPLACE Replace the sensitive text with its encrypted value. HASH_SHA256_REPLACE Replace the sensitive text with its SHA256 hash value. FPE_ENCRYPT_REPLACE Replace the sensitive text with a value generated by format-preserving encryption (FPE)"},{"location":"policies/filters/common_filters/drivers-license-numbers/#conditions","title":"Conditions","text":"

    Each filter strategy may have one condition. See Conditions for details.

    Conditional Description Operators TOKEN Compares the value of the sensitive text. == , != CONTEXT Compares the filtering context. == , != CONFIDENCE Compares the confidence in the sensitive text against a threshold value. < , <=, > , >=, ==, !="},{"location":"policies/filters/common_filters/drivers-license-numbers/#example-policy","title":"Example Policy","text":"
    {\n   \"name\": \"drivers-license-example\",\n   \"identifiers\": {\n      \"driversLicense\": {\n         \"driversLicenseFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/filters/common_filters/email-addresses/","title":"Email Addresses","text":""},{"location":"policies/filters/common_filters/email-addresses/#filter","title":"Filter","text":"

    This filter identifies email addresses such as john.fake.address@hotmail.com in text.

    "},{"location":"policies/filters/common_filters/email-addresses/#required-parameters","title":"Required Parameters","text":"

    This filter has no required parameters.

    "},{"location":"policies/filters/common_filters/email-addresses/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value emailAddressFilterStrategies A list of filter strategies. None enabled When set to false, the filter will be disabled and not applied true ignored A list of terms to be ignored by the filter. None onlyStrictMatches When set to false, the pattern for identifying email addresses will be relaxed. Filtered email addresses will have a lower confidence, but filter performance will increase. true onlyValidTLDs When set to true, only email addresses that are for a top-level domain are filtered. false"},{"location":"policies/filters/common_filters/email-addresses/#filter-strategies","title":"Filter Strategies","text":"

    The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of REDACT is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.

    Strategy Description REDACT Replace the sensitive text with a placeholder. RANDOM_REPLACE Replace the sensitive text with a similar, random value. STATIC_REPLACE Replace the sensitive text with a given value. CRYPTO_REPLACE Replace the sensitive text with its encrypted value. HASH_SHA256_REPLACE Replace the sensitive text with its SHA256 hash value."},{"location":"policies/filters/common_filters/email-addresses/#conditions","title":"Conditions","text":"

    Each filter strategy may have one condition. See Conditions for details.

    Conditional Description Operators TOKEN Compares the value of the sensitive text. == , != CONTEXT Compares the filtering context. == , != CONFIDENCE Compares the confidence in the sensitive text against a threshold value. < , <=, > , >=, ==, !="},{"location":"policies/filters/common_filters/email-addresses/#example-policy","title":"Example Policy","text":"
    {\n   \"name\": \"email-address-example\",\n   \"identifiers\": {\n      \"emailAddress\": {\n         \"emailAddressFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/filters/common_filters/iban-codes/","title":"IBAN Codes","text":""},{"location":"policies/filters/common_filters/iban-codes/#filter","title":"Filter","text":"

    This filter identifies IBAN (international banking account numbers) Codes such as HU4211773016111110180000000 in text. Driver's license number formats for all 50 US states are supported.

    "},{"location":"policies/filters/common_filters/iban-codes/#required-parameters","title":"Required Parameters","text":"

    This filter has no required parameters.

    "},{"location":"policies/filters/common_filters/iban-codes/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value allowSpaces When true, IBAN codes will be allowed to contain spaces and grouped in sections of 4. Set to false to disallow spaces in IBAN codes. true ibanCodeFilterStrategies A list of filter strategies. None enabled When set to false, the filter will be disabled and not applied true ignored A list of terms to be ignored by the filter. None onlyValidIBANCodes When set to true, only valid IBAN codes will be filtered. true"},{"location":"policies/filters/common_filters/iban-codes/#filter-strategies","title":"Filter Strategies","text":"

    The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of REDACT is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.

    Strategy Description REDACT Replace the sensitive text with a placeholder. RANDOM_REPLACE Replace the sensitive text with a similar, random value. STATIC_REPLACE Replace the sensitive text with a given value. CRYPTO_REPLACE Replace the sensitive text with its encrypted value. HASH_SHA256_REPLACE Replace the sensitive text with its SHA256 hash value. FPE_ENCRYPT_REPLACE Replace the sensitive text with a value generated by format-preserving encryption (FPE) LAST_4 Replace the sensitive text with just the last four characters of the text."},{"location":"policies/filters/common_filters/iban-codes/#conditions","title":"Conditions","text":"

    Each filter strategy may have one condition. See Conditions for details.

    Conditional Description Operators TOKEN Compares the value of the sensitive text. == , != CONTEXT Compares the filtering context. == , != CONFIDENCE Compares the confidence in the sensitive text against a threshold value. < , <=, > , >=, ==, !="},{"location":"policies/filters/common_filters/iban-codes/#example-policy","title":"Example Policy","text":"
    {\n   \"name\": \"iban-example\",\n   \"identifiers\": {\n      \"ibanCode\": {\n         \"onlyValidIBANCodes\": false,\n         \"ibanCodeFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/filters/common_filters/ip-addresses/","title":"IP Addresses","text":""},{"location":"policies/filters/common_filters/ip-addresses/#filter","title":"Filter","text":"

    This filter identifies IPv4 and IPv6 addresses 127.0.0.1, 192.168.3.58, and 2001:0db8:85a3:0000:0000:8a2e:0370:7334 in text.

    "},{"location":"policies/filters/common_filters/ip-addresses/#required-parameters","title":"Required Parameters","text":"

    This filter has no required parameters.

    "},{"location":"policies/filters/common_filters/ip-addresses/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value ipAddressFilterStrategies A list of filter strategies. None enabled When set to false, the filter will be disabled and not applied true ignored A list of terms to be ignored by the filter. None"},{"location":"policies/filters/common_filters/ip-addresses/#filter-strategies","title":"Filter Strategies","text":"

    The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of REDACT is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.

    Strategy Description REDACT Replace the sensitive text with a placeholder. RANDOM_REPLACE Replace the sensitive text with a similar, random value. STATIC_REPLACE Replace the sensitive text with a given value. CRYPTO_REPLACE Replace the sensitive text with its encrypted value. HASH_SHA256_REPLACE Replace the sensitive text with its SHA256 hash value."},{"location":"policies/filters/common_filters/ip-addresses/#conditions","title":"Conditions","text":"

    Each filter strategy may have one condition. See Conditions for details.

    Conditional Description Operators TOKEN Compares the value of the sensitive text. == , != CONTEXT Compares the filtering context. == , != CONFIDENCE Compares the confidence in the sensitive text against a threshold value. < , <=, > , >=, ==, !="},{"location":"policies/filters/common_filters/ip-addresses/#example-policy","title":"Example Policy","text":"
    {\n   \"name\": \"ip-address-example\",\n   \"identifiers\": {\n      \"ipAddress\": {\n         \"ipAddressFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/filters/common_filters/mac-addresses/","title":"MAC Addresses","text":""},{"location":"policies/filters/common_filters/mac-addresses/#filter","title":"Filter","text":"

    This filter identifies MAC addresses in text.

    "},{"location":"policies/filters/common_filters/mac-addresses/#required-parameters","title":"Required Parameters","text":"

    This filter has no required parameters.

    "},{"location":"policies/filters/common_filters/mac-addresses/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value macAddressFilterStrategies A list of filter strategies. None enabled When set to false, the filter will be disabled and not applied true ignored A list of terms to be ignored by the filter. None"},{"location":"policies/filters/common_filters/mac-addresses/#filter-strategies","title":"Filter Strategies","text":"

    The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of REDACT is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.

    Strategy Description REDACT Replace the sensitive text with a placeholder. RANDOM_REPLACE Replace the sensitive text with a similar, random value. STATIC_REPLACE Replace the sensitive text with a given value. CRYPTO_REPLACE Replace the sensitive text with its encrypted value. HASH_SHA256_REPLACE Replace the sensitive text with its SHA256 hash value."},{"location":"policies/filters/common_filters/mac-addresses/#conditions","title":"Conditions","text":"

    Each filter strategy may have one condition. See Conditions for details.

    Conditional Description Operators TOKEN Compares the value of the sensitive text. == , != CONTEXT Compares the filtering context. == , != CONFIDENCE Compares the confidence in the sensitive text against a threshold value. < , <=, > , >=, ==, !="},{"location":"policies/filters/common_filters/mac-addresses/#example-policy","title":"Example Policy","text":"
    {\n   \"name\": \"mac-address-example\",\n   \"identifiers\": {\n      \"macAddress\": {\n         \"macAddressFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/filters/common_filters/passport-numbers/","title":"Passport Numbers","text":""},{"location":"policies/filters/common_filters/passport-numbers/#filter","title":"Filter","text":"

    This filter identifies US passport numbers in text.

    "},{"location":"policies/filters/common_filters/passport-numbers/#required-parameters","title":"Required Parameters","text":"

    This filter has no required parameters.

    "},{"location":"policies/filters/common_filters/passport-numbers/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value passportNumberFilterStrategies A list of filter strategies. None enabled When set to false, the filter will be disabled and not applied true ignored A list of terms to be ignored by the filter. None"},{"location":"policies/filters/common_filters/passport-numbers/#filter-strategies","title":"Filter Strategies","text":"

    The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of REDACT is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.

    Strategy Description REDACT Replace the sensitive text with a placeholder. RANDOM_REPLACE Replace the sensitive text with a similar, random value. STATIC_REPLACE Replace the sensitive text with a given value. CRYPTO_REPLACE Replace the sensitive text with its encrypted value. HASH_SHA256_REPLACE Replace the sensitive text with its SHA256 hash value. FPE_ENCRYPT_REPLACE Replace the sensitive text with a value generated by format-preserving encryption (FPE)"},{"location":"policies/filters/common_filters/passport-numbers/#conditions","title":"Conditions","text":"

    Each filter strategy may have one condition. See Conditions for details.

    Conditional Description Operators TOKEN Compares the value of the sensitive text. == , != CLASSIFICATION Compares the issuing country of the passport number. == , != CONTEXT Compares the filtering context. == , != CONFIDENCE Compares the confidence in the sensitive text against a threshold value. < , <=, > , >=, ==, !="},{"location":"policies/filters/common_filters/passport-numbers/#example-policy","title":"Example Policy","text":"
    {\n   \"name\": \"passport-number-example\",\n   \"identifiers\": {\n      \"passportNumber\": {\n         \"passportNumberFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/filters/common_filters/phone-number-extensions/","title":"Phone Number Extensions","text":""},{"location":"policies/filters/common_filters/phone-number-extensions/#filter","title":"Filter","text":"

    This filter identifies phone numbers extensions such as \"x100\" in text.

    "},{"location":"policies/filters/common_filters/phone-number-extensions/#required-parameters","title":"Required Parameters","text":"

    This filter has no required parameters.

    "},{"location":"policies/filters/common_filters/phone-number-extensions/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value phoneNumberExtensionFilterStrategies A list of filter strategies. None enabled When set to false, the filter will be disabled and not applied true ignored A list of terms to be ignored by the filter. None"},{"location":"policies/filters/common_filters/phone-number-extensions/#filter-strategies","title":"Filter Strategies","text":"

    The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of REDACT is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.

    Strategy Description REDACT Replace the sensitive text with a placeholder. RANDOM_REPLACE Replace the sensitive text with a similar, random value. STATIC_REPLACE Replace the sensitive text with a given value. CRYPTO_REPLACE Replace the sensitive text with its encrypted value. HASH_SHA256_REPLACE Replace the sensitive text with its SHA256 hash value."},{"location":"policies/filters/common_filters/phone-number-extensions/#conditions","title":"Conditions","text":"

    Each filter strategy may have one condition. See Conditions for details.

    Conditional Description Operators TOKEN Compares the value of the sensitive text. == , != CONTEXT Compares the filtering context. == , != CONFIDENCE Compares the confidence in the sensitive text against a threshold value. < , <=, > , >=, ==, !="},{"location":"policies/filters/common_filters/phone-number-extensions/#example-policy","title":"Example Policy","text":"
    {\n   \"name\": \"phone-number-ext-example\",\n   \"identifiers\": {\n      \"phoneNumberExtension\": {\n         \"phoneNumberExtensionFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      } \n   }     \n}\n
    "},{"location":"policies/filters/common_filters/phone-numbers/","title":"Phone Numbers","text":""},{"location":"policies/filters/common_filters/phone-numbers/#filter","title":"Filter","text":"

    This filter identifies phone and fax numbers such as (304) 555-5555, 304-555-5555, and 1-800-123-4567 in text.

    "},{"location":"policies/filters/common_filters/phone-numbers/#required-parameters","title":"Required Parameters","text":"

    This filter has no required parameters.

    "},{"location":"policies/filters/common_filters/phone-numbers/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value phoneNumberFilterStrategies A list of filter strategies. None enabled When set to false, the filter will be disabled and not applied true ignored A list of terms to be ignored by the filter. None"},{"location":"policies/filters/common_filters/phone-numbers/#filter-strategies","title":"Filter Strategies","text":"

    The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of REDACT is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.

    Strategy Description REDACT Replace the sensitive text with a placeholder. RANDOM_REPLACE Replace the sensitive text with a similar, random value. STATIC_REPLACE Replace the sensitive text with a given value. CRYPTO_REPLACE Replace the sensitive text with its encrypted value. HASH_SHA256_REPLACE Replace the sensitive text with its SHA256 hash value."},{"location":"policies/filters/common_filters/phone-numbers/#conditions","title":"Conditions","text":"

    Each filter strategy may have one condition. See Conditions for details.

    Conditional Description Operators TOKEN Compares the value of the sensitive text. == , != CONTEXT Compares the filtering context. == , != CONFIDENCE Compares the confidence in the sensitive text against a threshold value. < , <=, > , >=, ==, !="},{"location":"policies/filters/common_filters/phone-numbers/#example-policy","title":"Example Policy","text":"
    {\n   \"name\": \"phone-number-example\",\n   \"identifiers\": {\n      \"phoneNumber\": {\n         \"phoneNumberFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }     \n}\n
    "},{"location":"policies/filters/common_filters/sections/","title":"Sections","text":""},{"location":"policies/filters/common_filters/sections/#filter","title":"Filter","text":"

    This filter identifies sections in text between a given start regular expression pattern and a given end regular expression pattern.

    "},{"location":"policies/filters/common_filters/sections/#required-parameters","title":"Required Parameters","text":"Parameter Description Default Value startPattern A regular expression denoting the start of the section. None endPattern A regular expression denoting the end of the section. None"},{"location":"policies/filters/common_filters/sections/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value sectionFilterStrategies A list of filter strategies. None enabled When set to false, the filter will be disabled and not applied true ignored A list of terms to be ignored by the filter. None"},{"location":"policies/filters/common_filters/sections/#filter-strategies","title":"Filter Strategies","text":"

    The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of REDACT is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.

    Strategy Description REDACT Replace the sensitive text with a placeholder. RANDOM_REPLACE Replace the sensitive text with a similar, random value. STATIC_REPLACE Replace the sensitive text with a given value. CRYPTO_REPLACE Replace the sensitive text with its encrypted value. HASH_SHA256_REPLACE Replace the sensitive text with its SHA256 hash value."},{"location":"policies/filters/common_filters/sections/#conditions","title":"Conditions","text":"

    Each filter strategy may have one condition. See Conditions for details.

    Conditional Description Operators TOKEN Compares the value of the sensitive text. == , != CONTEXT Compares the filtering context. == , != CONFIDENCE Compares the confidence in the sensitive text against a threshold value. < , <=, > , >=, ==, !="},{"location":"policies/filters/common_filters/sections/#example-policy","title":"Example Policy","text":"
    {\n   \"name\": \"sections-example\",\n   \"identifiers\": {\n      \"section\": {\n         \"startPattern\": \"START\",\n         \"endPattern\": \"END\",\n         \"sectionFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n}\n
    "},{"location":"policies/filters/common_filters/ssns-and-tins/","title":"SSNs and TINs","text":""},{"location":"policies/filters/common_filters/ssns-and-tins/#filter","title":"Filter","text":"

    This filter identifies US SSNs and TINs such as 123-45-6789 and 123456789 in text.

    "},{"location":"policies/filters/common_filters/ssns-and-tins/#required-parameters","title":"Required Parameters","text":"

    This filter has no required parameters.

    "},{"location":"policies/filters/common_filters/ssns-and-tins/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value ssnFilterStrategies A list of filter strategies. None enabled When set to false, the filter will be disabled and not applied true ignored A list of terms to be ignored by the filter. None"},{"location":"policies/filters/common_filters/ssns-and-tins/#filter-strategies","title":"Filter Strategies","text":"

    The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of REDACT is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.

    Strategy Description REDACT Replace the sensitive text with a placeholder. RANDOM_REPLACE Replace the sensitive text with a similar, random value. STATIC_REPLACE Replace the sensitive text with a given value. CRYPTO_REPLACE Replace the sensitive text with its encrypted value. HASH_SHA256_REPLACE Replace the sensitive text with its SHA256 hash value. FPE_ENCRYPT_REPLACE Replace the sensitive text with a value generated by format-preserving encryption (FPE) LAST_4 Replace the sensitive text with just the last four characters of the text."},{"location":"policies/filters/common_filters/ssns-and-tins/#conditions","title":"Conditions","text":"

    Each filter strategy may have one condition. See Conditions for details.

    Conditional Description Operators TOKEN Compares the value of the sensitive text. == , != CONTEXT Compares the filtering context. == , != CONFIDENCE Compares the confidence in the sensitive text against a threshold value. < , <=, > , >=, ==, !="},{"location":"policies/filters/common_filters/ssns-and-tins/#example-policy","title":"Example Policy","text":"
    {\n   \"name\": \"ssn-tin-example\",\n   \"identifiers\": {\n      \"ssn\": {\n         \"ssnFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/filters/common_filters/tracking-numbers/","title":"Tracking Numbers","text":""},{"location":"policies/filters/common_filters/tracking-numbers/#filter","title":"Filter","text":"

    This filter identifies tracking numbers in text. FedEx, UPS, and USPS tracking number formats are supported.

    "},{"location":"policies/filters/common_filters/tracking-numbers/#required-parameters","title":"Required Parameters","text":"

    This filter has no required parameters.

    "},{"location":"policies/filters/common_filters/tracking-numbers/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value trackingNumberFilterStrategies A list of filter strategies. None enabled When set to false, the filter will be disabled and not applied true ignored A list of terms to be ignored by the filter. None"},{"location":"policies/filters/common_filters/tracking-numbers/#filter-strategies","title":"Filter Strategies","text":"

    The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of REDACT is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.

    Strategy Description REDACT Replace the sensitive text with a placeholder. RANDOM_REPLACE Replace the sensitive text with a similar, random value. STATIC_REPLACE Replace the sensitive text with a given value. CRYPTO_REPLACE Replace the sensitive text with its encrypted value. HASH_SHA256_REPLACE Replace the sensitive text with its SHA256 hash value. FPE_ENCRYPT_REPLACE Replace the sensitive text with a value generated by format-preserving encryption (FPE) LAST_4 Replace the sensitive text with just the last four characters of the text."},{"location":"policies/filters/common_filters/tracking-numbers/#conditions","title":"Conditions","text":"

    Each filter strategy may have one condition. See Conditions for details.

    Conditional Description Operators TOKEN Compares the value of the sensitive text. == , != CONTEXT Compares the filtering context. == , != CONFIDENCE Compares the confidence in the sensitive text against a threshold value. < , <=, > , >=, ==, !="},{"location":"policies/filters/common_filters/tracking-numbers/#example-policy","title":"Example Policy","text":"
    {\n   \"name\": \"tracking-numbers-example\",\n   \"identifiers\": {\n      \"trackingNumber\": {\n         \"trackingNumberFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/filters/common_filters/urls/","title":"URLs","text":""},{"location":"policies/filters/common_filters/urls/#filter","title":"Filter","text":"

    This filter identifies URLs such as myhomepage.com, http://myhomepage.com/folder/page.html, and www.myhomepage.com/folder/page.html in text.

    "},{"location":"policies/filters/common_filters/urls/#required-parameters","title":"Required Parameters","text":"

    This filter has no required parameters.

    "},{"location":"policies/filters/common_filters/urls/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value urlFilterStrategies A list of filter strategies. None enabled When set to false, the filter will be disabled and not applied true ignored A list of terms to be ignored by the filter. None requireHttpWwwPrefix When set to true, only URLs that begin with http or www will be filtered. true"},{"location":"policies/filters/common_filters/urls/#filter-strategies","title":"Filter Strategies","text":"

    The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of REDACT is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.

    Strategy Description REDACT Replace the sensitive text with a placeholder. RANDOM_REPLACE Replace the sensitive text with a similar, random value. STATIC_REPLACE Replace the sensitive text with a given value. CRYPTO_REPLACE Replace the sensitive text with its encrypted value. HASH_SHA256_REPLACE Replace the sensitive text with its SHA256 hash value."},{"location":"policies/filters/common_filters/urls/#conditions","title":"Conditions","text":"

    Each filter strategy may have one condition. See Conditions for details.

    Conditional Description Operators TOKEN Compares the value of the sensitive text. == , != CONTEXT Compares the filtering context. == , != CONFIDENCE Compares the confidence in the sensitive text against a threshold value. < , <=, > , >=, ==, !="},{"location":"policies/filters/common_filters/urls/#example-policy","title":"Example Policy","text":"
    {\n   \"name\": \"urls-example\",\n   \"identifiers\": {\n      \"url\": {\n         \"requireHttpWwwPrefix\": true,\n         \"urlFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/filters/common_filters/vins/","title":"VINs","text":""},{"location":"policies/filters/common_filters/vins/#filter","title":"Filter","text":"

    This filter identifies 17-digit vehicle identification numbers (VINs) such as WBAPM7G50ANL19218 and 1GBJC34K3RE176005 in text.

    "},{"location":"policies/filters/common_filters/vins/#required-parameters","title":"Required Parameters","text":"

    This filter has no required parameters.

    "},{"location":"policies/filters/common_filters/vins/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value vinFilterStrategies A list of filter strategies. None enabled When set to false, the filter will be disabled and not applied true ignored A list of terms to be ignored by the filter. None"},{"location":"policies/filters/common_filters/vins/#filter-strategies","title":"Filter Strategies","text":"

    The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of REDACT is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.

    Strategy Description REDACT Replace the sensitive text with a placeholder. RANDOM_REPLACE Replace the sensitive text with a similar, random value. STATIC_REPLACE Replace the sensitive text with a given value. CRYPTO_REPLACE Replace the sensitive text with its encrypted value. HASH_SHA256_REPLACE Replace the sensitive text with its SHA256 hash value. FPE_ENCRYPT_REPLACE Replace the sensitive text with a value generated by format-preserving encryption (FPE) LAST_4 Replace the sensitive text with just the last four characters of the text."},{"location":"policies/filters/common_filters/vins/#conditions","title":"Conditions","text":"

    Each filter strategy may have one condition. See Conditions for details.

    Conditional Description Operators TOKEN Compares the value of the sensitive text. == , != CONTEXT Compares the filtering context. == , != CONFIDENCE Compares the confidence in the sensitive text against a threshold value. < , <=, > , >=, ==, !="},{"location":"policies/filters/common_filters/vins/#example-policy","title":"Example Policy","text":"
    {\n   \"name\": \"vins-example\",\n   \"identifiers\": {\n      \"vin\": {\n         \"vinFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/filters/common_filters/zip-codes/","title":"Zip Codes","text":""},{"location":"policies/filters/common_filters/zip-codes/#filter","title":"Filter","text":"

    This filter identifies zip codes in text.

    "},{"location":"policies/filters/common_filters/zip-codes/#required-parameters","title":"Required Parameters","text":"

    This filter has no required parameters.

    "},{"location":"policies/filters/common_filters/zip-codes/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value zipCodeFilterStrategies A list of filter strategies. None enabled When set to false, the filter will be disabled and not applied true ignored A list of terms to be ignored by the filter. None requireDelimiter When set to false, the filter will not require a dash in 9 digit zip codes, e.g. 12345-6789. Setting to false may increase the number of zip code false positives. true"},{"location":"policies/filters/common_filters/zip-codes/#filter-strategies","title":"Filter Strategies","text":"

    The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of REDACT is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.

    Strategy Description REDACT Replace the sensitive text with a placeholder. RANDOM_REPLACE Replace the sensitive text with a similar, random value. STATIC_REPLACE Replace the sensitive text with a given value. CRYPTO_REPLACE Replace the sensitive text with its encrypted value. HASH_SHA256_REPLACE Replace the sensitive text with its SHA256 hash value. TRUNCATE Replace the sensitive text by removing the last x digits. (Set the number of digits using the truncateDigits parameter of the filter strategy.) ZERO_LEADING Replace the sensitive text by zeroing the first 3 digits."},{"location":"policies/filters/common_filters/zip-codes/#conditions","title":"Conditions","text":"

    Each filter strategy may have one condition. See Conditions for details.

    Conditional Description Operators TOKEN Compares the value of the sensitive text. == , != CONTEXT Compares the filtering context. == , != CONFIDENCE Compares the confidence in the sensitive text against a threshold value. < , <=, > , >=, ==, != POPULATION Compares the population of the zip code against the 2010 census values. < , <=, > , >=, ==, !="},{"location":"policies/filters/common_filters/zip-codes/#example-policy","title":"Example Policy","text":"
    {\n   \"name\": \"zip-code-example\",\n   \"identifiers\": {\n      \"zipCode\": {\n         \"zipCodeFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/filters/custom_filters/dictionary/","title":"Dictionary","text":""},{"location":"policies/filters/custom_filters/dictionary/#filter","title":"Filter","text":"

    This filter identifies custom text based on a given dictionary.

    "},{"location":"policies/filters/custom_filters/dictionary/#required-parameters","title":"Required Parameters","text":"

    At least one of terms or files must be provided.

    Parameter Description Default Value terms A list of terms in the dictionary. None files A list of files containing terms one per line. None"},{"location":"policies/filters/custom_filters/dictionary/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value enabled When set to false, the filter will be disabled and not applied true ignored A list of terms to be ignored by the filter. None fuzzy When set to true, the dictionary will employ fuzzy comparisons. Use the sensitivity parameter to control the level of fuzziness. Setting this value to false will disable fuzziness and provide a higher level of performance. false classification Used to apply an arbitrary label to the identifier, such as \"patient-id\", or \"account-number.\" \"custom-identifier\" sensitivity Controls the \"fuzziness\" of allowed values to account for misspellings and derivations. Valid values are low, medium, and high. Only applies when fuzzy is set to true. medium"},{"location":"policies/filters/custom_filters/dictionary/#filter-strategies","title":"Filter Strategies","text":"

    The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of REDACT is used. When multiple filter strategies are given the filter strategies will be applied in as they are listed. See Filter Strategies for details.

    Strategy Description REDACT Replace the sensitive text with a placeholder. RANDOM_REPLACE Replace the sensitive text with a similar, random value. STATIC_REPLACE Replace the sensitive text with a given value. CRYPTO_REPLACE Replace the sensitive text with its encrypted value. HASH_SHA256_REPLACE Replace the sensitive text with its SHA256 hash value."},{"location":"policies/filters/custom_filters/dictionary/#conditions","title":"Conditions","text":"

    Each filter strategy may have one condition. See Conditions for details.

    Conditional Description Operators TOKEN Compares the value of the sensitive text. == , != CONTEXT Compares the filtering context. == , != CONFIDENCE Compares the confidence in the sensitive text against a threshold value. < , <=, > , >=, ==, !="},{"location":"policies/filters/custom_filters/dictionary/#example-policy","title":"Example Policy","text":"
    {\n   \"name\": \"dictionary-example\",\n   \"identifiers\": {\n      \"dictionaries\": [\n         \"customDictionary\": {\n            \"terms\": [\"john\", \"jane\", \"doe\"],\n            \"files\": \"c:\\temp\\dictionary.txt\",\n            \"fuzzy\": true,\n            \"sensitivity\": \"medium\",\n            \"sectionFilterStrategies\": [\n               {\n                  \"strategy\": \"REDACT\",\n                  \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n               }\n            ]\n         }\n      ]\n   }   \n}\n
    "},{"location":"policies/filters/custom_filters/identifier/","title":"Identifier","text":""},{"location":"policies/filters/custom_filters/identifier/#filter","title":"Filter","text":"

    This filter identifies custom text based on a given regular expression.

    The Identifier filter accepts a list of regular expression-based identifiers. See the policy at the bottom of this page for an example.

    Note that backslashes in the regular expression will need to be escaped for the policy to be valid JSON.

    "},{"location":"policies/filters/custom_filters/identifier/#required-parameters","title":"Required Parameters","text":"

    This filter has no required parameters.

    "},{"location":"policies/filters/custom_filters/identifier/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value enabled When set to false, the filter will be disabled and not applied true ignored A list of terms to be ignored by the filter. None caseSensitive When set to true, the regular expression will be case sensitive. true classification Used to apply an arbitrary label to the identifier, such as \"patient-id\", or \"account-number.\" \"custom-identifier\" pattern A regular expression for the identifier. Note that backslashes will need to be escaped. \\b[A-Z0-9_-]{4,}\\b"},{"location":"policies/filters/custom_filters/identifier/#filter-strategies","title":"Filter Strategies","text":"

    The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of REDACT is used. When multiple filter strategies are given the filter strategies will be applied in as they are listed. See Filter Strategies for details.

    Strategy Description REDACT Replace the sensitive text with a placeholder. RANDOM_REPLACE Replace the sensitive text with a similar, random value. STATIC_REPLACE Replace the sensitive text with a given value. CRYPTO_REPLACE Replace the sensitive text with its encrypted value. HASH_SHA256_REPLACE Replace the sensitive text with its SHA256 hash value. LAST_4 Replace the sensitive text with just the last four characters of the text."},{"location":"policies/filters/custom_filters/identifier/#conditions","title":"Conditions","text":"

    Each filter strategy may have one condition. See Conditions for details.

    Conditional Description Operators TOKEN Compares the value of the sensitive text. == , != CONTEXT Compares the filtering context. == , != CONFIDENCE Compares the confidence in the sensitive text against a threshold value. < , <=, > , >=, ==, != CLASSIFICATION Compares the classification of the sensitive text. == , !="},{"location":"policies/filters/custom_filters/identifier/#example-policy","title":"Example Policy","text":"
    {\n  \"name\": \"default\",\n  \"identifiers\": {\n    \"identifiers\": [\n      {\n        \"pattern\": \"[A-Z]{9}\",\n        \"caseSensitive\": false,\n        \"classification\": \"custom-identifier\",\n        \"enabled\": true,\n        \"identifierFilterStrategies\": [\n          {\n            \"strategy\": \"REDACT\",\n            \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n          }\n        ]        \n      }\n    ]\n  }\n}\n
    "},{"location":"policies/filters/locations/cities/","title":"Cities","text":""},{"location":"policies/filters/locations/cities/#filter","title":"Filter","text":"

    This filter identifies common US cities as determined by the US census in text.

    "},{"location":"policies/filters/locations/cities/#required-parameters","title":"Required Parameters","text":"

    This filter has no required parameters.

    "},{"location":"policies/filters/locations/cities/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value cityFilterStrategies A list of filter strategies. None sensitivity Controls the \"fuzziness\" of allowed values to account for misspellings and derivations. Valid values are low, medium, and high. medium"},{"location":"policies/filters/locations/cities/#filter-strategies","title":"Filter Strategies","text":"

    The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of REDACT is used. When multiple filter strategies are given the filter strategies will be applied in as they are listed. See Filter Strategies for details.

    Strategy Description REDACT Replace the sensitive text with a placeholder. RANDOM_REPLACE Replace the sensitive text with a similar, random value. STATIC_REPLACE Replace the sensitive text with a given value. CRYPTO_REPLACE Replace the sensitive text with its encrypted value. HASH_SHA256_REPLACE Replace the sensitive text with its SHA256 hash value."},{"location":"policies/filters/locations/cities/#conditions","title":"Conditions","text":"

    Each filter strategy may have one condition. See Conditions for details.

    Conditional Description Operators TOKEN Compares the value of the sensitive text. == , != CONTEXT Compares the filtering context. == , != CONFIDENCE Compares the confidence in the sensitive text against a threshold value. < , <=, > , >=, ==, !="},{"location":"policies/filters/locations/cities/#example-policy","title":"Example Policy","text":"
    {\n   \"name\": \"cities-example\",\n   \"identifiers\": {\n      \"city\": {\n         \"sensitivity\": \"medium\",\n         \"cityFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/filters/locations/counties/","title":"Counties","text":""},{"location":"policies/filters/locations/counties/#filter","title":"Filter","text":"

    This filter identifies common US counties as determined by the US census in text.

    "},{"location":"policies/filters/locations/counties/#required-parameters","title":"Required Parameters","text":"

    This filter has no required parameters.

    "},{"location":"policies/filters/locations/counties/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value countyFilterStrategies A list of filter strategies. None sensitivity Controls the \"fuzziness\" of allowed values to account for misspellings and derivations. Valid values are low, medium, and high. medium"},{"location":"policies/filters/locations/counties/#filter-strategies","title":"Filter Strategies","text":"

    The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of REDACT is used. When multiple filter strategies are given the filter strategies will be applied in as they are listed. See Filter Strategies for details.

    Strategy Description REDACT Replace the sensitive text with a placeholder. RANDOM_REPLACE Replace the sensitive text with a similar, random value. STATIC_REPLACE Replace the sensitive text with a given value. CRYPTO_REPLACE Replace the sensitive text with its encrypted value. HASH_SHA256_REPLACE Replace the sensitive text with its SHA256 hash value."},{"location":"policies/filters/locations/counties/#conditions","title":"Conditions","text":"

    Each filter strategy may have one condition. See Conditions for details.

    Conditional Description Operators TOKEN Compares the value of the sensitive text. == , != CONTEXT Compares the filtering context. == , != CONFIDENCE Compares the confidence in the sensitive text against a threshold value. < , <=, > , >=, ==, !="},{"location":"policies/filters/locations/counties/#example-policy","title":"Example Policy","text":"
    {\n   \"name\": \"counties-example\",\n   \"identifiers\": {\n      \"county\": {\n         \"sensitivity\": \"medium\",\n         \"countyFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/filters/locations/hospital-abbreviations/","title":"Hospital Abbreviations","text":""},{"location":"policies/filters/locations/hospital-abbreviations/#filter","title":"Filter","text":"

    This filter identifies US hospital abbreviations in text.

    "},{"location":"policies/filters/locations/hospital-abbreviations/#required-parameters","title":"Required Parameters","text":"

    This filter has no required parameters.

    "},{"location":"policies/filters/locations/hospital-abbreviations/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value hospitalAbbreviationFilterStrategies A list of filter strategies. None sensitivity Controls the \"fuzziness\" of allowed values to account for misspellings and derivations. Valid values are low, medium, and high. medium"},{"location":"policies/filters/locations/hospital-abbreviations/#filter-strategies","title":"Filter Strategies","text":"

    The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of REDACT is used. When multiple filter strategies are given the filter strategies will be applied in as they are listed. See Filter Strategies for details.

    Strategy Description REDACT Replace the sensitive text with a placeholder. RANDOM_REPLACE Replace the sensitive text with a similar, random value. STATIC_REPLACE Replace the sensitive text with a given value. CRYPTO_REPLACE Replace the sensitive text with its encrypted value. HASH_SHA256_REPLACE Replace the sensitive text with its SHA256 hash value."},{"location":"policies/filters/locations/hospital-abbreviations/#conditions","title":"Conditions","text":"

    Each filter strategy may have one condition. See Conditions for details.

    Conditional Description Operators TOKEN Compares the value of the sensitive text. == , != CONTEXT Compares the filtering context. == , != CONFIDENCE Compares the confidence in the sensitive text against a threshold value. < , <=, > , >=, ==, !="},{"location":"policies/filters/locations/hospital-abbreviations/#example-policy","title":"Example Policy","text":"
    {\n   \"name\": \"hospital-abbreviations-example\",\n   \"identifiers\": {\n      \"hospitalAbbreviation\": {\n         \"sensitivity\": \"medium\",\n         \"hospitalAbbreviationFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/filters/locations/hospitals/","title":"Hospitals","text":""},{"location":"policies/filters/locations/hospitals/#filter","title":"Filter","text":"

    This filter identifies US hospitals in text.

    "},{"location":"policies/filters/locations/hospitals/#required-parameters","title":"Required Parameters","text":"

    This filter has no required parameters.

    "},{"location":"policies/filters/locations/hospitals/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value hospitalFilterStrategies A list of filter strategies. None sensitivity Controls the \"fuzziness\" of allowed values to account for misspellings and derivations. Valid values are low, medium, and high. medium"},{"location":"policies/filters/locations/hospitals/#filter-strategies","title":"Filter Strategies","text":"

    The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of REDACT is used. When multiple filter strategies are given the filter strategies will be applied in as they are listed. See Filter Strategies for details.

    Strategy Description REDACT Replace the sensitive text with a placeholder. RANDOM_REPLACE Replace the sensitive text with a similar, random value. STATIC_REPLACE Replace the sensitive text with a given value. CRYPTO_REPLACE Replace the sensitive text with its encrypted value. HASH_SHA256_REPLACE Replace the sensitive text with its SHA256 hash value."},{"location":"policies/filters/locations/hospitals/#conditions","title":"Conditions","text":"

    Each filter strategy may have one condition. See Conditions for details.

    Conditional Description Operators TOKEN Compares the value of the sensitive text. == , != CONTEXT Compares the filtering context. == , != CONFIDENCE Compares the confidence in the sensitive text against a threshold value. < , <=, > , >=, ==, !="},{"location":"policies/filters/locations/hospitals/#example-policy","title":"Example Policy","text":"
    {\n   \"name\": \"hospitals-example\",\n   \"identifiers\": {\n      \"hospital\": {\n         \"sensitivity\": \"medium\",\n         \"hospitalFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/filters/locations/state-abbreviations/","title":"State Abbreviations","text":""},{"location":"policies/filters/locations/state-abbreviations/#filter","title":"Filter","text":"

    This filter identifies US state abbreviations in text.

    "},{"location":"policies/filters/locations/state-abbreviations/#required-parameters","title":"Required Parameters","text":"

    This filter has no required parameters.

    "},{"location":"policies/filters/locations/state-abbreviations/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value stateAbbreviationsFilterStrategies A list of filter strategies. None enabled When set to false, the filter will be disabled and not applied true ignored A list of terms to be ignored by the filter. None"},{"location":"policies/filters/locations/state-abbreviations/#filter-strategies","title":"Filter Strategies","text":"

    The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of REDACT is used. When multiple filter strategies are given the filter strategies will be applied in as they are listed. See Filter Strategies for details.

    Strategy Description REDACT Replace the sensitive text with a placeholder. RANDOM_REPLACE Replace the sensitive text with a similar, random value. STATIC_REPLACE Replace the sensitive text with a given value. CRYPTO_REPLACE Replace the sensitive text with its encrypted value. HASH_SHA256_REPLACE Replace the sensitive text with its SHA256 hash value."},{"location":"policies/filters/locations/state-abbreviations/#conditions","title":"Conditions","text":"

    Each filter strategy may have one condition. See Conditions for details.

    Conditional Description Operators TOKEN Compares the value of the sensitive text. == , != CONTEXT Compares the filtering context. == , != CONFIDENCE Compares the confidence in the sensitive text against a threshold value. < , <=, > , >=, ==, !="},{"location":"policies/filters/locations/state-abbreviations/#example-policy","title":"Example Policy","text":"
    {\n   \"name\": \"states-abbreviations-example\",\n   \"identifiers\": {\n      \"stateAbbreviation\": {\n         \"stateAbbreviationFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/filters/locations/states/","title":"States","text":""},{"location":"policies/filters/locations/states/#filter","title":"Filter","text":"

    This filter identifies US states in text.

    "},{"location":"policies/filters/locations/states/#required-parameters","title":"Required Parameters","text":"

    This filter has no required parameters.

    "},{"location":"policies/filters/locations/states/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value stateFilterStrategies A list of filter strategies. None enabled When set to false, the filter will be disabled and not applied true ignored A list of terms to be ignored by the filter. None"},{"location":"policies/filters/locations/states/#filter-strategies","title":"Filter Strategies","text":"

    The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of REDACT is used. When multiple filter strategies are given the filter strategies will be applied in as they are listed. See Filter Strategies for details.

    Strategy Description REDACT Replace the sensitive text with a placeholder. RANDOM_REPLACE Replace the sensitive text with a similar, random value. STATIC_REPLACE Replace the sensitive text with a given value. CRYPTO_REPLACE Replace the sensitive text with its encrypted value. HASH_SHA256_REPLACE Replace the sensitive text with its SHA256 hash value."},{"location":"policies/filters/locations/states/#conditions","title":"Conditions","text":"

    Each filter strategy may have one condition. See Conditions for details.

    Conditional Description Operators TOKEN Compares the value of the sensitive text. == , != CONTEXT Compares the filtering context. == , != CONFIDENCE Compares the confidence in the sensitive text against a threshold value. < , <=, > , >=, ==, !="},{"location":"policies/filters/locations/states/#example-policy","title":"Example Policy","text":"
    {\n   \"name\": \"states-example\",\n   \"identifiers\": {\n      \"state\": {\n         \"stateFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/filters/persons_names/first-names/","title":"First Names","text":""},{"location":"policies/filters/persons_names/first-names/#filter","title":"Filter","text":"

    This filter identifies common first names as identified by the US census in text.

    "},{"location":"policies/filters/persons_names/first-names/#required-parameters","title":"Required Parameters","text":"

    This filter has no required parameters.

    "},{"location":"policies/filters/persons_names/first-names/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value sensitivity Controls the \"fuzziness\" of allowed values to account for misspellings and derivations. Valid values are low, medium, and high. medium firstNameFilterStrategies A list of filter strategies. None enabled When set to false, the filter will be disabled and not applied true ignored A list of terms to be ignored by the filter. None"},{"location":"policies/filters/persons_names/first-names/#filter-strategies","title":"Filter Strategies","text":"

    The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of REDACT is used. When multiple filter strategies are given the filter strategies will be applied in as they are listed. See Filter Strategies for details.

    Strategy Description REDACT Replace the sensitive text with a placeholder. RANDOM_REPLACE Replace the sensitive text with a similar, random value. STATIC_REPLACE Replace the sensitive text with a given value. CRYPTO_REPLACE Replace the sensitive text with its encrypted value. HASH_SHA256_REPLACE Replace the sensitive text with its SHA256 hash value."},{"location":"policies/filters/persons_names/first-names/#conditions","title":"Conditions","text":"

    Each filter strategy may have one condition. See Conditions for details.

    Conditional Description Operators TOKEN Compares the value of the sensitive text. == , != CONTEXT Compares the filtering context. == , != CONFIDENCE Compares the confidence in the sensitive text against a threshold value. < , <=, > , >=, ==, !="},{"location":"policies/filters/persons_names/first-names/#example-policy","title":"Example Policy","text":"
    {\n   \"name\": \"first-names-example\",\n   \"identifiers\": {\n      \"firstName\": {\n         \"firstNameFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/filters/persons_names/persons-names-ner/","title":"Person's Names (NER)","text":""},{"location":"policies/filters/persons_names/persons-names-ner/#filter","title":"Filter","text":"

    This filter identifies person's names based on natural language processing (NLP) and named-entity recognition (NER) in text.

    "},{"location":"policies/filters/persons_names/persons-names-ner/#required-parameters","title":"Required Parameters","text":"

    This filter has no required parameters.

    "},{"location":"policies/filters/persons_names/persons-names-ner/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value removePunctuation When set to true, punctuation will be removed prior to analysis. false firstNameFilterStrategies A list of filter strategies. None enabled When set to false, the filter will be disabled and not applied true ignored A list of terms to be ignored by the filter. None"},{"location":"policies/filters/persons_names/persons-names-ner/#filter-strategies","title":"Filter Strategies","text":"

    The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of REDACT is used. When multiple filter strategies are given the filter strategies will be applied in as they are listed. See Filter Strategies for details.

    Strategy Description REDACT Replace the sensitive text with a placeholder. RANDOM_REPLACE Replace the sensitive text with a similar, random value. STATIC_REPLACE Replace the sensitive text with a given value. CRYPTO_REPLACE Replace the sensitive text with its encrypted value. HASH_SHA256_REPLACE Replace the sensitive text with its SHA256 hash value. ABBREVIATE Replace the sensitive text with the initials of the text."},{"location":"policies/filters/persons_names/persons-names-ner/#conditions","title":"Conditions","text":"

    Each filter strategy may have one condition. See Conditions for details.

    Conditional Description Operators TOKEN Compares the value of the sensitive text. == , != CONTEXT Compares the filtering context. == , != CONFIDENCE Compares the confidence in the sensitive text against a threshold value. < , <=, > , >=, ==, !="},{"location":"policies/filters/persons_names/persons-names-ner/#example-policy","title":"Example Policy","text":"
    {\n   \"name\": \"ner-example\",\n   \"identifiers\": {\n      \"ner\": {\n         \"nerFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/filters/persons_names/physician-names-ner/","title":"Physician Names","text":""},{"location":"policies/filters/persons_names/physician-names-ner/#filter","title":"Filter","text":"

    This filter identifies physician names (e.g. Dr. John Smith) in text.

    "},{"location":"policies/filters/persons_names/physician-names-ner/#required-parameters","title":"Required Parameters","text":"

    This filter has no required parameters.

    "},{"location":"policies/filters/persons_names/physician-names-ner/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value physicianNameFilterStrategies A list of filter strategies. None enabled When set to false, the filter will be disabled and not applied true ignored A list of terms to be ignored by the filter. None"},{"location":"policies/filters/persons_names/physician-names-ner/#filter-strategies","title":"Filter Strategies","text":"

    The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of REDACT is used. When multiple filter strategies are given the filter strategies will be applied in as they are listed. See Filter Strategies for details.

    Strategy Description REDACT Replace the sensitive text with a placeholder. RANDOM_REPLACE Replace the sensitive text with a similar, random value. STATIC_REPLACE Replace the sensitive text with a given value. CRYPTO_REPLACE Replace the sensitive text with its encrypted value. HASH_SHA256_REPLACE Replace the sensitive text with its SHA256 hash value."},{"location":"policies/filters/persons_names/physician-names-ner/#conditions","title":"Conditions","text":"

    Each filter strategy may have one condition. See Conditions for details.

    Conditional Description Operators TOKEN Compares the value of the sensitive text. == , != CONTEXT Compares the filtering context. == , != CONFIDENCE Compares the confidence in the sensitive text against a threshold value. < , <=, > , >=, ==, !="},{"location":"policies/filters/persons_names/physician-names-ner/#example-policy","title":"Example Policy","text":"
    {\n   \"name\": \"physician-names-example\",\n   \"identifiers\": {\n      \"physicianName\": {\n         \"physicianNameFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/filters/persons_names/surnames/","title":"Surnames","text":""},{"location":"policies/filters/persons_names/surnames/#filter","title":"Filter","text":"

    This filter identifies common surnames as identified by the US census in text.

    "},{"location":"policies/filters/persons_names/surnames/#required-parameters","title":"Required Parameters","text":"

    This filter has no required parameters.

    "},{"location":"policies/filters/persons_names/surnames/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value sensitivity Controls the \"fuzziness\" of allowed values to account for misspellings and derivations. Valid values are low, medium, and high. medium surnameFilterStrategies A list of filter strategies. None enabled When set to false, the filter will be disabled and not applied true ignored A list of terms to be ignored by the filter. None"},{"location":"policies/filters/persons_names/surnames/#filter-strategies","title":"Filter Strategies","text":"

    The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of REDACT is used. When multiple filter strategies are given the filter strategies will be applied in as they are listed. See Filter Strategies for details.

    Strategy Description REDACT Replace the sensitive text with a placeholder. RANDOM_REPLACE Replace the sensitive text with a similar, random value. STATIC_REPLACE Replace the sensitive text with a given value. CRYPTO_REPLACE Replace the sensitive text with its encrypted value. HASH_SHA256_REPLACE Replace the sensitive text with its SHA256 hash value."},{"location":"policies/filters/persons_names/surnames/#conditions","title":"Conditions","text":"

    Each filter strategy may have one condition. See Conditions for details.

    Conditional Description Operators TOKEN Compares the value of the sensitive text. == , != CONTEXT Compares the filtering context. == , != CONFIDENCE Compares the confidence in the sensitive text against a threshold value. < , <=, > , >=, ==, !="},{"location":"policies/filters/persons_names/surnames/#example-policy","title":"Example Policy","text":"
    {\n   \"name\": \"surnames-example\",\n   \"identifiers\": {\n      \"surname\": {\n         \"surnameFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"quick_starts/quick_start_aws/","title":"Philter Quick Start on AWS","text":"

    Philter on AWS is a virtual machine-based product. It runs in EC2 on its own EC2 instance. A free trial period is available during which there is no charge for the Philter software but there may be charges for the underlying AWS infrastructure.

    Cloud virtual machines launched from a cloud marketplace may not be immediately suitable for a HIPAA environment. Refer to your compliance officer for your organization's requirements to ensure compliance with all relevant regulations.

    Here\u2019s a brief screen cast showing how to launch Philter in AWS.

    "},{"location":"quick_starts/quick_start_aws/#launch-philter-in-aws","title":"Launch Philter in AWS","text":"
    1. Go to Philter in the AWS Marketplace. On this page you can see the Philter overview, the pricing, and the supported EC2 instance types.
    2. Select an instance type. We recommend m5.large. The smaller instance types are intended only for testing and are not well-suited for production usage.
    3. Click the Continue to Subscribe button.
    4. View and accept Philter\u2019s license agreement. Then click Accept Terms.
    5. The subscription will now be created and you will be notified when it is ready! This usually only takes less than a minute.
    6. Click the Continue to Configuration button to select the AMI, the version, and the region. We recommend using the newest version if multiple are available.
    7. Click the Continue to Launch button to launch Philter in your AWS account!

    AWS will automatically open ports 22 (SSH) and 8080 (Philter API) for the Philter instance's security group. These ports are required to be open but you may want to modify the security groups to limit their scope of availability by restricting access to specific CIDR ranges.

    Congratulations! You have deployed Philter in AWS. You are now ready to filter text!

    "},{"location":"quick_starts/quick_start_aws/#try-it-out","title":"Try it out!","text":"

    With Philter now running we can take it for a spin. We will send some text to Philter and inspect at the response we get back. The Philter virtual machine running in your cloud account should have a public IP address (unless you customized the deployment). We will use that public IP address to interact with Philter.

    Philter, by default, will be configured with an HTTPS listener on port 8080 using a self-signed certificate. It is recommended that prior to use in a production environment the self-signed certificate is replaced by a valid certificate owned by your organization.

    In the command below, replace <PUBLIC_IP> with the virtual machine\u2019s public IP address or public host name.

    curl -k -X POST https://<PUBLIC_IP>:8080/api/filter --data \"George Washington was a patient and his SSN is 123-45-6789.\" -H \"Content-type: text/plain\"\n

    With this command we are sending the text in the command to Philter for filtering. Philter will identify the patient name (George Washington) and the SSN (123-45-6789) and redact those values in the response. You can always use curl to send text to Philter as in these examples but there are also SDKs you can use, too, to integrate Philter with your applications.

    "},{"location":"quick_starts/quick_start_aws/#redacting-sensitive-information-from-text","title":"Redacting Sensitive Information from Text","text":"

    The types of sensitive information that Philter identifies and removes is controlled by policies. By default, Philter includes a filter profile that includes many of the types of sensitive information, such as names and social security numbers. We can send text to filter to Philter for filtering using this default filter profile with the following command:

    curl -k -X POST https://localhost:8080/api/filter -d @file.txt -H \"Content-Type: text/plain\"\n

    This command sends the contents of the file file.txt to Philter. Philter will apply the enabled filters and return a plain-text response consisting of the filtered text. (Replace localhost with the IP address or host name of Philter if you are not running the command where Philter is running.) You can also send text directly in the request instead of sending it as a file:

    curl -k -X POST https://localhost:8080/api/filter --data \"Your text goes here...\" -H \"Content-type: text/plain\"\n
    "},{"location":"quick_starts/quick_start_aws/#next-steps","title":"Next Steps","text":"

    Now that you have Philter running and know how to send text to it you are ready to integrate Philter into your existing workflow and systems. Philter\u2019s API details how to send files to Philter. Clients for some languages for Philter\u2019s API are available on GitHub.

    Be sure to check out Policies to see how you can customize the types of sensitive information Philter redacts!

    "},{"location":"quick_starts/quick_start_aws/#example-uses","title":"Example Uses","text":"

    Here's a few examples showing how to use Philter with some common big-data and streaming applications.

    Description Technologies Remove sensitive information from text in an Apache NiFi dataflow Apache NiFi Remove sensitive information from text using AWS Lambda in an Amazon Kinesis Firehose pipeline Amazon Kinesis, AWS Lambda Removing PII/PHI from OpenAI ChatGPT API Requests ChatGPT Redacting PHI and PII in Apache Kafka Data streams Apache Kafka"},{"location":"quick_starts/quick_start_azure/","title":"Philter Quick Start on Microsoft Azure","text":"

    Philter on Microsoft Azure is a virtual machine-based product. A free trial period is available during which there is no charge for the Philter software but there may be charges for the underlying Azure infrastructure.

    Cloud virtual machines launched from a cloud marketplace may not be immediately suitable for a HIPAA environment. Refer to your compliance officer for your organization's requirements to ensure compliance with all relevant regulations.

    "},{"location":"quick_starts/quick_start_azure/#launch-philter-on-microsoft-azure","title":"Launch Philter on Microsoft Azure","text":"
    1. Go to Philter in the Azure Marketplace.
    2. Click the Get It Now button.
    3. Review the information that is shown on the popup and click Continue when ready.
    4. You will now be asked to log in to your Microsoft Azure account if you were not already logged in.
    5. Click the Create button to begin making a Philter virtual machine.
    6. Enter the required details of the virtual machine and click the Review + create button.
    7. Review the virtual machine details and click Create when ready!

    Your Philter virtual machine will now be launching.

    Microsoft Azure will automatically open ports 22 (SSH) and 8080 (Philter API). These ports are required to be open but you may want to modify the security groups to limit their scope of availability by restricting access to specific CIDR ranges.

    Congratulations! You have deployed Philter in Azure. You are now ready to filter text!

    "},{"location":"quick_starts/quick_start_azure/#try-it-out","title":"Try it out!","text":"

    With Philter now running we can take it for a spin. We will send some text to Philter and inspect at the response we get back. The Philter virtual machine running in your cloud account should have a public IP address (unless you customized the deployment). We will use that public IP address to interact with Philter.

    Philter, by default, will be configured with an HTTPS listener on port 8080 using a self-signed certificate. It is recommended that prior to use in a production environment the self-signed certificate is replaced by a valid certificate owned by your organization.

    In the command below, replace <PUBLIC_IP> with the virtual machine\u2019s public IP address or public host name.

    curl -k -X POST https://<PUBLIC_IP>:8080/api/filter --data \"George Washington was a patient and his SSN is 123-45-6789.\" -H \"Content-type: text/plain\"\n

    With this command we are sending the text in the command to Philter for filtering. Philter will identify the patient name (George Washington) and the SSN (123-45-6789) and redact those values in the response. You can always use curl to send text to Philter as in these examples but there are also SDKs you can use, too, to integrate Philter with your applications.

    "},{"location":"quick_starts/quick_start_azure/#redacting-sensitive-information-from-text","title":"Redacting Sensitive Information from Text","text":"

    The types of sensitive information that Philter identifies and removes is controlled by policies. By default, Philter includes a filter profile that includes many of the types of sensitive information, such as names and social security numbers. We can send text to filter to Philter for filtering using this default filter profile with the following command:

    curl -k -X POST https://localhost:8080/api/filter -d @file.txt -H \"Content-Type: text/plain\"\n

    This command sends the contents of the file file.txt to Philter. Philter will apply the enabled filters and return a plain-text response consisting of the filtered text. (Replace localhost with the IP address or host name of Philter if you are not running the command where Philter is running.) You can also send text directly in the request instead of sending it as a file:

    curl -k -X POST https://localhost:8080/api/filter --data \"Your text goes here...\" -H \"Content-type: text/plain\"\n
    "},{"location":"quick_starts/quick_start_azure/#next-steps","title":"Next Steps","text":"

    Now that you have Philter running and know how to send text to it you are ready to integrate Philter into your existing workflow and systems. Philter\u2019s API details how to send files to Philter. Clients for some languages for Philter\u2019s API are available on GitHub.

    Be sure to check out Policies to see how you can customize the types of sensitive information Philter redacts!

    "},{"location":"quick_starts/quick_start_azure/#example-uses","title":"Example Uses","text":"

    Here's a few examples showing how to use Philter with some common big-data and streaming applications.

    Description Technologies Remove sensitive information from text in an Apache NiFi dataflow Apache NiFi Remove sensitive information from text using AWS Lambda in an Amazon Kinesis Firehose pipeline Amazon Kinesis, AWS Lambda Removing PII/PHI from OpenAI ChatGPT API Requests ChatGPT Redacting PHI and PII in Apache Kafka Data streams Apache Kafka

    "},{"location":"quick_starts/quick_start_gcp/","title":"Philter Quick Start on Google Cloud","text":"

    Philter on Google Cloud is a virtual machine-based product. A free trial period is available during which there is no charge for the Philter software but there may be charges for the underlying Google Cloud infrastructure.

    Cloud virtual machines launched from a cloud marketplace may not be immediately suitable for a HIPAA environment. Refer to your compliance officer for your organization's requirements to ensure compliance with all relevant regulations.

    "},{"location":"quick_starts/quick_start_gcp/#launch-philter-in-google-cloud","title":"Launch Philter in Google Cloud","text":"
    1. Go to Philter in the Google Cloud Marketplace.
    2. Click the Launch on Compute Engine button.

    Virtual Machine Recommendations

    The general purpose machine type is n2-standard-2 and this machine type should be adequate for most use-cases. We recommend 8 vCPUs and 8-16 GB of RAM for a production deployment.

    Google Cloud will automatically open ports 22 (SSH) and 8080 (Philter API). These ports are required to be open but you may want to modify the security groups to limit their scope of availability by restricting access to specific CIDR ranges.

    Congratulations! You have deployed Philter in Google Cloud. You are now ready to filter text!

    "},{"location":"quick_starts/quick_start_gcp/#try-it-out","title":"Try it out!","text":"

    With Philter now running we can take it for a spin. We will send some text to Philter and inspect at the response we get back. The Philter virtual machine running in your cloud account should have a public IP address (unless you customized the deployment). We will use that public IP address to interact with Philter.

    Philter, by default, will be configured with an HTTPS listener on port 8080 using a self-signed certificate. It is recommended that prior to use in a production environment the self-signed certificate is replaced by a valid certificate owned by your organization.

    In the command below, replace <PUBLIC_IP> with the virtual machine\u2019s public IP address or public host name.

    curl -k -X POST https://<PUBLIC_IP>:8080/api/filter --data \"George Washington was a patient and his SSN is 123-45-6789.\" -H \"Content-type: text/plain\"\n

    With this command we are sending the text in the command to Philter for filtering. Philter will identify the patient name (George Washington) and the SSN (123-45-6789) and redact those values in the response. You can always use curl to send text to Philter as in these examples but there are also SDKs you can use, too, to integrate Philter with your applications.

    "},{"location":"quick_starts/quick_start_gcp/#redacting-sensitive-information-from-text","title":"Redacting Sensitive Information from Text","text":"

    The types of sensitive information that Philter identifies and removes is controlled by policies. By default, Philter includes a filter profile that includes many of the types of sensitive information, such as names and social security numbers. We can send text to filter to Philter for filtering using this default filter profile with the following command:

    curl -k -X POST https://localhost:8080/api/filter -d @file.txt -H \"Content-Type: text/plain\"\n

    This command sends the contents of the file file.txt to Philter. Philter will apply the enabled filters and return a plain-text response consisting of the filtered text. (Replace localhost with the IP address or host name of Philter if you are not running the command where Philter is running.) You can also send text directly in the request instead of sending it as a file:

    curl -k -X POST https://localhost:8080/api/filter --data \"Your text goes here...\" -H \"Content-type: text/plain\"\n
    "},{"location":"quick_starts/quick_start_gcp/#next-steps","title":"Next Steps","text":"

    Now that you have Philter running and know how to send text to it you are ready to integrate Philter into your existing workflow and systems. Philter\u2019s API details how to send files to Philter. Clients for some languages for Philter\u2019s API are available on GitHub.

    Be sure to check out Policies to see how you can customize the types of sensitive information Philter redacts!

    "},{"location":"quick_starts/quick_start_gcp/#example-uses","title":"Example Uses","text":"

    Here's a few examples showing how to use Philter with some common big-data and streaming applications.

    Description Technologies Remove sensitive information from text in an Apache NiFi dataflow Apache NiFi Remove sensitive information from text using AWS Lambda in an Amazon Kinesis Firehose pipeline Amazon Kinesis, AWS Lambda Removing PII/PHI from OpenAI ChatGPT API Requests ChatGPT Redacting PHI and PII in Apache Kafka Data streams Apache Kafka

    "}]} \ No newline at end of file +{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"Philter","text":"

    Philter is an application that finds, identifies, and removes sensitive information, such as protected health information (PHI) and personally identifiable information (PII), and user-defined sensitive information from natural language text. Philter is ideal for usage in text processing pipelines where sensitive information needs removed, encrypted, or redacted from the text.

    This documentation applies to Philter 2.4.0. If you are upgrading to this version see Upgrading Philter.

    To get going fast, jump to the Quick Starts to launch Philter on AWS, Azure, or Google Cloud.

    "},{"location":"evaluating-performance/","title":"How to Evaluate Phileas' Performance","text":"

    A common question we receive is how well does Phileas perform? Our answer to this question is probably less than satisfactory because it simply depends. What does it depend on? Phileas' performance is heavily dependent upon your individual data. Sharing to compare metrics of Phileas' performance between different customer datasets is like comparing apples and oranges.

    If your data is not exactly like another customer's data then the metrics will not be applicable to your data. In terms of the classic information retrieval metrics precision and recall, comparing these values between customers can give false impressions about Phileas' performance, both good and bad.

    This guide walks you through how to evaluate Phileas' performance. If you are just getting started with Phileas please see the Quick Starts instead. Then you can come back here to learn how to evaluate Phileas' performance.

    "},{"location":"evaluating-performance/#guide-to-evaluating-performance","title":"Guide to Evaluating Performance","text":"

    We have created this guide to help guide you in evaluating Phileas' performance on your data. The guide involves determining the types of sensitive information you want to redact, configuring those filters, optimizing the configuration, and then capturing the performance metrics.

    If you are using Philter we will gladly perform these steps for you and provide you a detailed Phileas performance report generated from your data. Please contact us to start the process.

    "},{"location":"evaluating-performance/#what-you-need","title":"What You Need","text":"

    To evaluate Phileas' performance you need:

    • An application using Phileas.
    • A list of the types of sensitive information you want to redact.
    • A data set representative of the text you will be redacting using Phileas. It's important the data set be representative so the evaluation results will transfer to the actual data redaction.
    • The same data set but with annotated sensitive information. These annotations will be used to calculate the precision and recall metrics.
    "},{"location":"evaluating-performance/#configuring-phileas","title":"Configuring Phileas","text":"

    Before we can begin our evaluation we need to create a policy. A policy is a file that defines the types of sensitive information that will be redacted and how it will be redacted. The policies are stored on the Phileas instance under /opt/Phileas/policies. You can edit the policies directly there using a text editor or you can use Phileas' API to upload a policy. In this case we recommend just using a text editor on the Phileas instance to create a policy.

    When using a text editor to create and edit a policy, be sure to save the policy often. Frequent saving can make editing a policy easier.

    We also recommend considering to place your policy directory under source control to have a history and change log of your policies.

    "},{"location":"evaluating-performance/#creating-a-policy","title":"Creating a Policy","text":"

    Make a copy of the default policy, and we will modify the copy for our needs.

    cp /opt/Phileas/policies/default.json /opt/Phileas/policies/evaluation.json

    Now open /opt/Phileas/policies/evaluation.json in a text editor. (The content of evaluation.json will be similar to what's shown below but may have minor differences between different versions of Phileas.)

    {\n   \"name\": \"default\",\n   \"identifiers\": {\n      \"emailAddress\": {\n         \"emailAddressFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      },\n      \"phoneNumber\": {\n         \"phoneNumberFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n

    The first thing we need to do is to set the name of the policy. Replace default with evaluation and save the file.

    "},{"location":"evaluating-performance/#identifying-the-filters-you-need","title":"Identifying the Filters You Need","text":"

    The rest of the file contains the filters that are enabled in the default policy. We need to make sure that each type of sensitive information that you want to redact is represented by a filter in this file. Look through the rest of the policy and determine which filters are listed that you do not need and also which filters you do need that are not listed.

    "},{"location":"evaluating-performance/#disabling-filters-we-do-not-need","title":"Disabling Filters We Do Not Need","text":"

    If a filter is listed in the policy and you do not need the filter you have two options. You can either delete those lines from the policy and save the file, or you can set the filter's enabled property to false. Using the enabled property allows you to keep the filter configuration in the policy in case it is needed later but both options have the same effect.

    "},{"location":"evaluating-performance/#enabling-filters-not-in-the-default-policy","title":"Enabling Filters Not in the Default Policy","text":"

    Let's say you want to redact bitcoin addresses. The bitcoin address filter is not in the default policy. To add the bitcoin address filter we will refer to Phileas' documentation on the bitcoin address filter, get the configuration, and copy it into the policy.

    From the bitcoin address filter documentation we see the configuration for the bitcoin address filter is:

          \"bitcoinAddress\": {\n         \"bitcoinAddressFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n

    We can copy this configuration and paste it into our policy:

    {\n   \"name\": \"evaluation\",\n   \"identifiers\": {\n      \"bitcoinAddress\": {\n         \"bitcoinAddressFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      },\n      \"emailAddress\": {\n         \"emailAddressFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      },\n      \"phoneNumber\": {\n         \"phoneNumberFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n

    The order of the filters in the policy does not matter and has no impact on performance. We typically place the filters in the policy alphabetically just to improve readability.

    Repeat these steps until you have added a filter for each of the types of sensitive information you want to redact. Typically, the default redaction strategy and redactionFormat values for each filter should be fine for evaluation.

    When finished modifying the policy, save the file and close the text editor. Now restart Phileas for the policy changes to be loaded:

    sudo systemctl restart Phileas\n
    "},{"location":"evaluating-performance/#submitting-text-for-redaction","title":"Submitting Text for Redaction","text":"

    With our policy in place we can now send text to Phileas for redaction using that policy:

    PhileasConfiguration phileasConfiguration = ConfigFactory.create(PhileasConfiguration.class);\n\nFilterService filterService = new PhileasFilterService(phileasConfiguration);\n\nFilterResponse response = filterService.filter(policies, context, documentId, body, MimeType.TEXT_PLAIN);\n

    The explain API endpoint produces a detailed description of the redaction. The response will include a list of spans that contain the start and stop positions of redacted text and the type of sensitive information that was redacted. Using this information we can compare the redacted information to our annotated file to calculate precision and recall metrics.

    "},{"location":"evaluating-performance/#calculating-precision-and-recall","title":"Calculating Precision and Recall","text":"

    Now we can calculate the precision and recall metrics.

    • Precision is the number of true positives divided by the number true positives plus false positives.
    • Recall is the number of true positives divided by the number of false negatives plus true positives.

    • The F-1 score is the harmonic mean of precision and recall.

    "},{"location":"settings/","title":"Settings","text":"

    Phileas has settings to control how it operates. The settings and how to configure each are described below.

    The configuration for the types of sensitive information that Phileas identifies are defined in filter policies outside of Phileas' configuration properties described on this page.

    "},{"location":"settings/#configuring-phileas","title":"Configuring Phileas","text":""},{"location":"settings/#the-phileas-settings-file","title":"The Phileas Settings File","text":"

    Phileas looks for its settings in an application.properties file.

    "},{"location":"settings/#using-environment-variables","title":"Using Environment Variables","text":"

    Properties set via environment variables take precedence over properties set in Phileas' settings file.

    All following properties can also be set as environment variables by prepending PHILTER_ to the property name and changing periods to underscores. For example, the property filter.profiles.directory can be set using the environment variable PHILTER_FILTER_PROFILES_DIRECTORY by:

    export PHILTER_FILTER_PROFILES_DIRECTORY=/profiles/\n

    Using environment variables to configure Phileas instead of using Phileas' settings file can allow for easier configuration management when deploying Phileas.

    "},{"location":"settings/#policies","title":"Policies","text":"Setting Description Allowed Values Default Value filter.policies.directory The directory in which to look for policies. Any valid directory path. ./policies/"},{"location":"settings/#span-disambiguation","title":"Span Disambiguation","text":"

    These values configure Phileas' span disambiguation feature to determine the most appropriate type of sensitive information when duplicate spans are identified. In a deployment of multiple Phileas instances, you must enable the cache service for span disambiguation to work as expected.

    Description Allowed Values Default Value span.disambiguation.enabled Whether or not to enable span disambiguation. true, false false"},{"location":"settings/#cache-service","title":"Cache Service","text":"

    The cache service is required to use consistent anonymization and policies stored in Amazon S3. Phileas supports Redis as the backend cache. When Redis is not used, an in-memory cache is used instead. The in-memory cache is not recommended because all contents will be stored in memory on the local Phileas instance.

    The cache will contain sensitive information. It is important that you take the necessary precautions to secure the cache itself and all communication between Phileas and the cache.

    Setting Description Allowed Values Default Value cache.redis.enabled Whether or not to use Redis as the cache. true, false false cache.redis.host The hostname or IP address of the Redis cache. Any valid Redis endpoint. None cache.redis.port The Redis cache port. Any valid port. 6379 cache.redis.auth.token The Redis auth token. Any valid token. None cache.redis.ssl Whether or not to use SSL for communication with the Redis cache. true, false false

    The following Redis settings are only required when using a self-signed SSL certificate.

    Setting Description Allowed Values Default Value cache.redis.truststore The path to the trust store. Any valid file path. None cache.redis.truststore.password The trust store password. Any valid file path. None cache.redis.keystore The path to the keystore. Any valid file path. None cache.redis.keystore.password The keystore password. Any valid file path. None"},{"location":"settings/#advanced-settings","title":"Advanced Settings","text":"

    In most cases the settings below do not need changed. Contact us for more information on any of these settings.

    Setting Description Allowed Values Default Value ner.timeout.sec Controls the timeout in seconds when performing name entity recognition. Longer text may require longer processing times. An integer value 600 ner.max.idle.connections The maximum number of idle connections to maintain for the named entity recognition. More connections may improve performance in some cases. An integer value. 30 ner.keep.alive.duration.ms The amount of time in milliseconds to keep named entity recognition connections alive. Longer text may require longer processing times. An integer value. 60"},{"location":"system_requirements/","title":"System Requirements","text":"

    When launched from a cloud marketplace, Philter is pre-configured and contains all required dependencies.

    Philter requires the following:

    • 2 vCPU (e.g., m5.large instance type on AWS)
    • 8 GB of RAM
    • Java 17
    "},{"location":"upgrading/","title":"Upgrading Philter","text":"

    We recommend reviewing the Philter Release Notes prior to upgrading.

    "},{"location":"upgrading/#upgrading-from-a-2x-version","title":"Upgrading from a 2.x Version","text":"

    Upgrading Philter to the newest version requires moving Philter's configuration to the new version of Philter. To upgrade Philter from a 2.x version, follow the steps below.

    1. Launch a new instance of the newest version of Philter.
    2. Copy your policies from /opt/philter/policies to the new instance.
    3. Copy your /opt/philter/philter.properties to the new instance.
    4. Copy your /opt/philter/philter-ui.properties to the new instance.
    5. Replace the new virtual machine's properties file with your copy from step 1.
    6. Copy your policies from /opt/philter/policies to the new instance.
    7. If you have configured any SSL certificates for Philter, copy those files over to the new instance.
    8. Restart Philter: sudo systemctl restart philter.service && sudo systemctl restart philter-ui.service && sudo systemctl restart philter-ner.service
    9. Test the new Philter virtual machine to make sure it is behaving as expected.
    10. Decommission the old Philter instance.
    "},{"location":"upgrading/#upgrading-from-a-1x-version","title":"Upgrading from a 1.x Version","text":"

    Upgrading Philter to the newest version requires moving Philter's configuration to the new version of Philter. To upgrade Philter from a 1.x version, follow the steps below.

    1. Make local copies of your current Philter's properties files.

    2. /opt/philter/philter.properties (prior to 1.10.1 the filename was /opt/philter/application.properties)

    3. /opt/philter/philter-ui.properties (not applicable prior to version 1.10)

    4. Launch a new instance of the newest version of Philter.

    5. Replace the new virtual machine's properties file with your copy from step 1.
    6. Restart Philter: sudo systemctl restart philter.service sudo systemctl restart philter-ui.service sudo systemctl restart philter-ner.service
    7. Test the new Philter virtual machine to make sure it is behaving appropriately.
    8. Decommission the old Philter instance.
    "},{"location":"deidentification/bucketing/","title":"Bucketing","text":""},{"location":"deidentification/date-shifting/","title":"Date Shifting","text":""},{"location":"deidentification/deidentification/","title":"De-identification Methods","text":"

    There are several ways data can be de-identified, and which you use depends on the types of data you want to de-identify and your use-case for de-identifying the data. The terminology around the different methods is often used interchangeably, but there are differences between each method.

    In this User's Guide, we may use the terms filter and redact interchangeably.

    In Philter, de-identification methods vary for each type of sensitive information. For example, all types can be replaced or redacted, but only dates can be shifted and only zip codes can be truncated. How a de-identification method is applied by Philter is called a filter strategy. Each type of sensitive information can have one or more filter strategies, and the combination of the filter strategies you select is called a policy. A policy determines how a document will be de-identified.

    The following is a list of de-identification methods that describes how each method works and its applicability to our Philter software. De-identifying a document is likely to require a combination of the following methods. For instance, you may want to redact names, encrypt credit card numbers, and shift appointment dates.

    De-identification MethodDescriptionReplacementReplaces sensitive information with a defined value. For example, you might want to replace a credit card number with the literal value \"CREDIT_CARD_NUMBER\".Redaction and MaskingRemoves sensitive information. Our Philter software gives you a choice of how to remove the sensitive information, whether it is by replacing it with ***** (masking) or by some other set of characters.EncryptionEncrypts sensitive information.Date ShiftingShifts dates either forward or backward by some interval.BucketingCategorizes data into buckets based on the data. Examples of bucketing is Philter can bucket dates into years, and zip codes by population.

    A difference between Philter and other services is that Philter does not send your data to a third-party for de-identification. Philter runs in your cloud and your data stays in your cloud.

    "},{"location":"deidentification/encryption/","title":"Encryption","text":""},{"location":"deidentification/pii_phi_nppi/","title":"PII, PHI, and NPPI","text":"

    Philter has many predefined types of sensitive information called filters that can be redacted. The individual types are described below.

    • Personally identifiable information (PII) is any information that could potentially be used to identify a specific person.
    • Protected health information (PHI) is any information about health status, provision of health care, or payment for health care that can be linked to an individual. The Health Insurance Portability And Accountability Act (HIPAA) defines 18 types of PHI.
    "},{"location":"deidentification/pii_phi_nppi/#predefined-types-of-pii-and-phi","title":"Predefined Types of PII and PHI","text":"

    The types of sensitive information that Philter will identify is customizable. For example, if you are not interested in VIN numbers you can have Philter ignore them. This configuration is performed through Policies.

    Because Philter only operates on text, the biometric identifiers and face images outlined in the HIPAA regulations as PHI are not applicable to Philter. The types of sensitive information and how Philter identifies each one is listed in the table below.

    Type of PHI How Philter Identifies It 1

    Names

    Ex: John Smith, Jane Doe

    • Philter identifies names in natural language text using state of the art machine learning algorithms and natural language processing techniques to identify named-person entities.
    • Philter also uses common first name and surname dictionaries with spellcheck capability to identify common names per the US census.
    2

    All geographical identifiers smaller than a state, except for the initial three digits of a zip code if, according to the current publicly available data from the U.S. Bureau of the Census: the geographic unit formed by combining all zip codes with the same three initial digits contains more than 20,000 people; and the initial three digits of a zip code for all such geographic units containing 20,000 or fewer people is changed to 000

    Ex: 85055, 90213-1544

    • Philter can identify many US cities, US counties, and all US states (full names and abbreviations).
    • Philter uses a dictionary with spelling correction to identify misspelled locations.
    • Filter conditions in policies can be used to apply logic based on zip code population according to the US census. (Filter strategies can truncate the zip code.)
    • Philter also uses state of the art machine learning algorithms and natural language processing techniques to identify locations.
    • Philter includes a dictionary of some hospital locations to quickly identify medical locations.
    3

    Dates (other than year) directly related to an individual

    Ex: 10-10-2000. 10/10/2000, October 10, 2000

    • Philter can identify dates in many formats such as with hypens (10-10-2000), with slashes (10/10/2000), or spelled out (May 1, 2000).
    • Philter can also identify ages, e.g. 57 years, 57yrs.
    4

    Phone Numbers

    Ex: (304) 555-5555, 304-555-5555, 1-800-123-4567

    • Philter can identify phone numbers in many formats. (Philter is currently limited to US phone numbers.)
    5

    Fax numbers

    Ex: (304) 555-5555, 304-555-5555, 1-800-123-4567

    • Philter can identify fax numbers in many formats. (Philter is currently limited to US phone numbers.)
    6

    Email addresses

    Ex: john.fake.address@hotmail.com

    • Philter can identify email addresses per the email standard (summarized on Wikipedia).
    7

    Social Security numbers

    Ex: 123-45-6789, 123456789

    • Philter can identify social security numbers (SSNs) in multiple formats such as with spaces and hyphens.
    8

    Medical record numbers

    Ex: 86637729, AB473-6021, 473-6AB021

    • Philter can identify alphanumeric identifiers.
    9

    Health insurance beneficiary numbers

    Ex: 86637729, AB473-6021, 473-6AB021

    • Philter can identify alphanumeric identifiers.
    10

    Account numbers

    Ex: 86637729, AB473-6021, 473-6AB021

    • Philter can identify alphanumeric identifiers, as well as credit card numbers from all major types of credit cards.
    11

    Certificate/license numbers

    Ex: 86637729, AB473-6021, 473-6AB021

    • Philter can identify alphanumeric identifiers.
    12

    Vehicle identifiers and serial numbers, including license plate numbers

    Ex: WBAPM7G50ANL19218, 1GBJC34K3RE176005

    • Philter can identify vehicle serial numbers (17-character VIN numbers). License plates will be identified as alphanumeric identifiers.
    13

    Device identifiers and serial numbers

    Ex: H3SNPUHYEE7JD3H, 33778376

    • Philter can identify alphanumeric identifiers.
    14

    Web Uniform Resource Locators (URLs)

    Ex: myhomepage.com, http://myhomepage.com/folder/page.html, www.myhomepage.com/folder/page.html

    • Philter can identify URLs adhering to the URL naming standard.
    15

    Internet Protocol (IP) address numbers

    Ex: 127.0.0.1, 192.168.3.58, 2001:0db8:85a3:0000:0000:8a2e:0370:7334

    • Philter can identify IPv4 and IPv6 addresses.
    16 Biometric identifiers, including finger, retinal and voice prints
    • Not applicable \u2013 Philter only identifies PHI in text.
    17 Full face photographic images and any comparable images
    • Not applicable \u2013 Philter only identifies PHI in text.
    18

    Any other unique identifying number, characteristic, or code except the unique code assigned by the investigator to code the data

    Ex: 86637729, AB473-6021, 473-6AB021

    • Philter can identify alphanumeric identifiers.
    "},{"location":"deidentification/redaction-and-masking/","title":"Redaction and Masking","text":"

    Redaction and masking are two methods of de-identification that are often used interchangeably. The term redaction refers to removing a sensitive value from a document. When we hear the term redaction we often think of an image of a document with black bars across pieces of the text.

    Masking is similar to redaction but allows for configuring how the sensitive value is removed. The most common example is using asterisks (i.e. ******) in place of a sensitive value.

    "},{"location":"deidentification/replacement/","title":"Replacement","text":"

    Replacement is a method of de-identification that simply replaces a sensitive value with another value. Replacement is useful when the sensitive value is not needed once the document has been de-identified. Philter can replace a sensitive value with a preset value or with a random value.

    In Philter's filter strategies, replacement is achieved by using the strategy to REDACT, STATIC_REPLACE , or RANDOM_REPLACE .

    "},{"location":"other_features/alerts/","title":"Alerts","text":"

    Phileas can optionally generate alerts when a particular type of sensitive information is identified.

    "},{"location":"other_features/alerts/#alert-conditions","title":"Alert Conditions","text":"

    In a policy, each type of sensitive information can have zero or more filter strategies. Each filter strategy can optionally have a condition associated with it. When a condition is present, the filter strategy will only be satisfied when the condition is satisfied. For example, a condition may be created to only filter phone numbers that start with the digits 123 or only filter names that start with John. Filter strategy conditions give you granular control over the filtering process.

    When a filter strategy condition is satisfied, Phileas can optionally generate an alert. This feature allows you to be notified when a particular type of sensitive information is identified.

    "},{"location":"other_features/alerts/#enabling-alerts","title":"Enabling Alerts","text":"

    Alerts are enabled on a per-condition basis. For instance, given the following policy to identify email addresses, a condition has been added to only match the email address test@test.com. Because of the property alert set to true, an alert will be generated when this condition is satisfied. By default, the alert property is set to false disabling alerts for the condition.

    {\n  \"name\": \"email-address-alert\",\n  \"identifiers\": {\n    \"emailAddress\": {\n      \"emailAddressFilterStrategies\": [\n        {\n          \"id\": \"my-email-strategy\",\n          \"strategy\": \"REDACT\",\n          \"redactionFormat\": \"{{{REDACTED-%t}}}\",\n          \"condition\": \"token == \\\"test@test.com\\\"\",\n          \"alert\": true\n        }\n      ]\n    }\n  }\n}\n
    "},{"location":"other_features/alerts/#structure-of-an-alert","title":"Structure of an Alert","text":"

    An alert contains the following information:

    Property Name Description id A unique ID for the alert formatted as an UUID. filterProfile The name of the policy triggering the alert. strategyId The ID of the filter strategy triggering the alert. In the example above the id would be my-email-strategy. context The context. documentId The ID of the document which triggered the alert. filterType The filter type (\"email-address\", \"credit-card\", etc.) triggering the alert. date A timestamp when the alert was generated formatted as yyyy-MM-dd'T'HH:mm:ss.SSS'Z'."},{"location":"other_features/alerts/#retrieving-and-deleting-alerts","title":"Retrieving and Deleting Alerts","text":"

    The alerts that Phileas has generated are available through Phileas' alerts API. This API allows for retrieving and deleting alerts. Using this API you can build sophisticated notification systems around Phileas' capabilities.

    "},{"location":"other_features/anonymization/","title":"Consistent Anonymization","text":"

    Anonymization in the context of Phileas is the process of replacing certain values with random but similar values. For example, the identified name of \u201cJohn Smith\u201d may be replaced with \u201cDavid Jones\u201d, or an identified phone number of 123-555-9358 may be replaced by 842-436-2042. A VIN number will be replaced by a 17 character randomly selected VIN number that adheres to the standard for VIN numbers.

    Anonymization is useful in instances where you want to remove sensitive information from text without changing the meaning of the text. Anonymization can be enabled for each type of sensitive information in the policy by setting the filter strategy to RANDOM_REPLACE. (See Policies for more information.)

    "},{"location":"other_features/anonymization/#consistent-anonymization_1","title":"Consistent Anonymization","text":"

    Consistent anonymization refers to the process of always anonymizing the same sensitive information with the same replacement values. For example, if the name \"John Smith\" is randomly replaced with \"Pete Baker\", all other occurrences of \"John Smith\" will also be replaced by \"Pete Baker.\"

    Consistent anonymization can be done on the document level or on the context level. When enabled on the document level, \"John Smith\" will only be replaced by \"Pete Baker\" in the same document. If \"John Smith\" occurs in a separate document it will be anonymized with a different random name. When enabled on the context level, \"John Smith\" will be replaced by \"Pete Baker\" whenever \"John Smith\" is found in all documents in the same context.

    Enabling consistent anonymization on the context level requires a cache to store the sensitive information and the corresponding replacement values. If a single instance of Phileas is running, its internal cache service (enabled by default) is the best choice and no additional configuration is required.

    If multiple instances of Phileas are deployed together, Phileas requires access to a Redis cache service as shown below. See Phileas' Settings on how to configure the cache.

    When Phileas is deployed in a cluster, a Redis cache is required to enable consistent anonymization.

    The anonymization cache will contain PHI. It is important that you take the necessary precautions to secure the cache and all communication to and from the cache.

    "},{"location":"other_features/span_disambiguation/","title":"Span Disambiguation","text":"

    Span disambiguation is an optional feature in Phileas that is disabled by default. Refer to Phileas' Settings to enable and configure span disambiguation.

    In Phileas, a span is a piece of the input text that Phileas has identified as sensitive information. A span has a start and end positions, a confidence, a type, and other attributes. Ideally, each piece of identified sensitive information will only have a single span associated with it. In this case, the type of sensitive information is unambiguous. The goal of span disambiguation is provide more accurate filtering by removing the potential ambiguities in the types of sensitive information for duplicate spans.

    However, sometimes a piece of text can be identified by multiple spans, each having a different type of sensitive information. In an example hypothetical scenario, let's say given the input text My SSN is 123456789. , Phileas identifies 123456789 as an SSN and as a phone number. This type of scenario can be quite common, and its likelihood increases as the number of enabled filters in a policy increase.

    "},{"location":"other_features/span_disambiguation/#how-phileas-span-disambiguation-works","title":"How Phileas' Span Disambiguation Works","text":"

    When we read the sentence My SSN is 123456789. we can tell the span in question should be identified as an SSN because we can look at the text surrounding the span. We use the surrounding words to deduce the correct type of sensitive information for 123456789.

    That is exactly how Phileas' span disambiguation works. When presented with identical spans differing only by the type of sensitive information, Phileas looks at the text surrounding the span in question in combination with the previous spans it has seen in the same context to determine which type of sensitive information is most likely to be correct. Phileas then removes the ambiguous spans from the results and replaces them with a single span.

    "},{"location":"other_features/span_disambiguation/#improves-over-time","title":"Improves Over Time","text":"

    Because Phileas is able to consider previously seen text to make its decision concerning ambiguous spans, Phileas' span disambiguation gets \"smarter\" as more text is filtered. This is because Phileas will have more text to consider in its calculations.

    "},{"location":"other_features/span_disambiguation/#more-details","title":"More Details","text":""},{"location":"other_features/span_disambiguation/#span-disambiguation-and-confidence-values","title":"Span Disambiguation and Confidence Values","text":"

    Span disambiguation is only invoked for spans that differ only by the type of sensitive information. This means the span's location (start and end positions), confidence, and all other values must match. If two spans have identical locations but have different confidence values, span disambiguation will not be applied and the span having the highest confidence will be used.

    "},{"location":"other_features/span_disambiguation/#cache-service","title":"Cache Service","text":"

    When multiple application using Phileas are deployed alongside each other behind a load balancer, Phileas' cache service should be configured and enabled. Phileas will store the information needed to disambiguate spans in the cache such that the information is available to each instance of Phileas. If only a single instance of Phileas is running then the cache service is not required, however, the information needed to disambiguate spans will be stored in memory and will be lost when Phileas is stopped or restarted. Because of this, we recommend the cache service always be used unless there is a specific reason not to.

    "},{"location":"other_features/span_disambiguation/#fine-tuning-the-span-disambiguation","title":"Fine-Tuning the Span Disambiguation","text":"

    There are properties available to fine-tune how the span disambiguation operates. These properties are not documented because improper use of the properties could have a negative impact on performance. We will be glad to walk through these properties upon request.

    "},{"location":"policies/document_analysis/","title":"Document Analysis","text":"

    Philter analyzes received documents prior to redacting the document. This analysis is done to help Philter get a better understanding of the document. The results of the analysis are used to exclude certain document types from redaction and to improve Philter's redaction performance.

    While not recommended, the automatic document analysis can be disabled in a policy. By default, document analysis is enabled.

    Disabling document analysis will cause any policy features dependent on the results of the document analysis to not function. {style=\"warning\"}

    An example policy with disabled document analysis is shown below.

    {\n  \"name\": \"email-and-phone-numbers\",\n  \"config\": {\n    \"analysis\": {\n      \"enabled\": false\n    }\n  },\n  \"identifiers\": {\n    \"emailAddress\": {\n      \"emailAddressFilterStrategies\": [\n        {\n          \"strategy\": \"REDACT\",\n          \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n        }\n      ]\n    }\n  }\n}\n
    "},{"location":"policies/excluding_by_document_type/","title":"Excluding by Document Type","text":"

    Philter can automatically detect certain types of documents and exclude those documents from redaction of certain sensitive information. For example, you want to redact SSN/TINs in all but one type of document.

    To exclude a document type from a specific filter, set the excludeDocumentTypes value to a list of document types to exclude for a filter strategy. Filter strategies for all filter types support the excludeDocumentTypes property.

    An example to exclude email addresses from being redacted in a subpoena document is given below:

    {\n   \"name\": \"email-address\",\n   \"identifiers\": {\n      \"emailAddress\": {\n         \"emailAddressFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\",\n               \"excludeDocumentTypes\": [\"SUBPOENA\"]\n            }\n         ]\n      }\n   }\n}\n

    In this example, email addresses are redacted in all document types except documents Philter identifies as being subpoena documents.

    "},{"location":"policies/excluding_by_document_type/#document-types-supported-by-automatic-detection","title":"Document Types Supported by Automatic Detection","text":"

    Philter currently supports automatically detecting the following document types.

    Document Type Document Description Subpoena Form 2540 Federal Bankruptcy - SUBPOENA FOR RULE 2004 EXAMINATION Subpoena Form 2550 - Federal Bankruptcy - SUBPOENA TO APPEAR AND TESTIFY Subpoena Form 2560 - Federal Bankruptcy - SUBPOENA TO TESTIFY AT A DEPOSITION Subpoena Form 2570 - Federal Bankruptcy - SUBPOENA TO PRODUCE DOCUMENTS Subpoena AO 88 - SUBPOENA TO APPEAR AND TESTIFY AT A HEARING OR TRIAL IN A CIVIL ACTION Subpoena AO 88A - SUBPOENA TO TESTIFY AT A DEPOSITION IN A CIVIL ACTION Subpoena AO 88B - SUBPOENA TO PRODUCE DOCUMENTS, INFORMATION, OR OBJECTS Subpoena AO 89 - SUBPOENA TO TESTIFY AT A HEARING OR TRIAL IN A CRIMINAL CASE Subpoena AO 90 - SUBPOENA TO TESTIFY AT A DEPOSITION IN A CRIMINAL CASE Subpoena AO 110 - SUBPOENA TO TESTIFY BEFORE A GRAND JURY"},{"location":"policies/filter_policies/","title":"Filter Policies","text":"

    The types of sensitive information identified by Phileas and how that information is de-identified are controlled through policies. A policy is a file stored under Phileas\u2019s policies directory, which by default is located at /opt/Phileas/policies/. You can have an unlimited number of policies.

    Each policy has a name that is used by Phileas to apply the appropriate de-identification methods. The name is passed to Phileas\u2019s API along with the text to be filtered when submitting text to Phileas. This provides flexibility and allows you to de-identify different types of documents in differing manners with a single instance of Phileas. For example, you may have a policy for bankruptcy documents and a separate policy for financial documents.

    There are sample policies available for immediate use or customization to fit your use-cases.

    "},{"location":"policies/filter_policies/#the-structure-of-a-policy","title":"The Structure of a Policy","text":"

    A policy:

    • Must have a name that uniquely identifies it.
    • Must have a list of identifiers that are filters for sensitive information.
      • Each identifier , or filter, can have zero or more filter strategies. A filter strategy tells Phileas how to manipulate that type of sensitive information when it is identified.
    • Can have an optional list of terms or patterns.
    • Can have encryption keys to support encryption of sensitive information.
    "},{"location":"policies/filter_policies/#an-example-policy","title":"An Example Policy","text":"

    The following is an example policy. In the example below you can see the types of sensitive information that are enabled and the strategy for manipulating each type when found. This policy identifies email addresses and phone numbers and redacts each with the format given.

    {\n   \"name\": \"email-and-phone-numbers\",\n   \"identifiers\": {\n      \"emailAddress\": {\n         \"emailAddressFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      },\n      \"phoneNumber\": {\n         \"phoneNumberFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n

    When an email address is identified by this policy, the email address is replaced with the text {{{REDACTED-email-address}}}. The %t gets replaced by the type of the filter. Likewise, when a phone number is found it is replaced with the text {{{REDACTED-phone-number}}}. You are free to change the redaction formats to whatever fits your use-case. See Filter Strategies for all replacement options.

    The name of the policy is email-and-phone-numbers. Policies can be named anything you like but their names must be unique from all other policies. As a best practice, the policy should be saved as [name].json, e.g. email-and-phone-numbers.json.

    "},{"location":"policies/filter_policies/#applying-a-policy-to-text","title":"Applying a Policy to Text","text":"

    To use this policy we will save it as /opt/Phileas/profiles/email-and-phone-numbers.json. We must restart Phileas for the new profile to be available for use. To apply the policy we will pass the policy's name to Phileas when making a filter request, as shown in the example request below.

    curl -k -X POST \"https://localhost:8080/api/filter?c=context&p=email-and-phone-numbers\" \\\n  -d @file.txt -H Content-Type \"text/plain\"\n

    In this command, we have provided the parameter p along with a value that is the name of the policy we want to use for this request. If we had multiple policies in Phileas we could choose a different policy for this request simply by changing the name given to the parameter p. For more details see Phileas\u2019s API.

    Phileas will process the contents of file.txt by applying the policy named email-and-phone-numbers. As we saw in the policy above, this policy redacts email addresses and phone numbers. Phileas will return the redacted text in response to the API call.

    To manipulate the sensitive information by methods other than redaction, see the Filter Strategies.

    "},{"location":"policies/filter_strategies/","title":"Filter Strategies","text":"

    A filter strategy defines how sensitive information identified by Phileas should be manipulated, whether it is redacted, replaced, encrypted, or manipulated in some other fashion.

    In a policy, you list the types of sensitive information that should be filtered. How Phileas replaces each type of sensitive information is specific to each type. For instance, zip codes can be truncated based on the leading digits or zip code population while phone numbers are redacted. These replacements are performed by \"filter strategies.\"

    Each filter can have one or more filter strategies and conditions can be used to determine when to apply each filter strategy.

    A sample policy containing a filter strategy is shown below. In this example, email addresses will be redacted.

    {\n   \"name\": \"email-address\",\n   \"identifiers\": {\n      \"emailAddress\": {\n         \"emailAddressFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n

    Most of the filter strategies apply to all types of data, however, some filter strategies only apply to a few types. For example, the TRUNCATE filter strategy only applies to a zip code filter.

    "},{"location":"policies/filter_strategies/#filter-strategies_1","title":"Filter Strategies","text":"

    The filter strategies are described below. Each filter type can specify zero or more filter strategies. When no filter strategies are given, Phileas will default to REDACT for that filter type. When multiple filter strategies are given for a single filter type, the filter strategies will be applied in order as they are listed in the policy, top to bottom.

    • REDACT
    • CRYPTO_REPLACE(AES encryption)
    • HASH_SHA256_REPLACE(SHA512 encryption)
    • FPE_ENCRYPT_REPLACE(Format-preserving encryption)
    • RANDOM_REPLACE
    • STATIC_REPLACE
    • TRUNCATE
    • ZERO_LEADING
    "},{"location":"policies/filter_strategies/#the-redact-filter-strategy","title":"The REDACT Filter Strategy","text":"

    The REDACT filter strategy replaces sensitive information with a given redaction format. You can put variables in the redaction format that Phileas will replace when performing the redaction.

    The available redaction variables are:

    Redaction Variable Description %t Will be replaced with the type of sensitive information. This is to allow you to know the type of sensitive information that was identified and redacted. %l Will be replaced by the given classification for the type of sensitive information. %v Will be replaced by the original value of the sensitive text. With %v you can annotate sensitive information instead of masking or removing it.

    To redact sensitive information by replacing it with the type of sensitive information, the redaction format would be REDACTED-%t.

    An example filter using the REDACT filter strategy:

    {\n   \"name\": \"email-address\",\n   \"identifiers\": {\n      \"emailAddress\": {\n         \"emailAddressFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/filter_strategies/#the-crypto_replace-filter-strategy-idcrypto","title":"The CRYPTO_REPLACE Filter Strategy {id=\"crypto\"}","text":"

    The CRYPTO_REPLACE filter strategy replaces each identified piece of sensitive information by encrypting it using the AES encryption algorithm. To use this filter strategy, the policy must include the details of the encryption key as shown below:

    {\n   \"name\":\"sample-profile\",\n   \"crypto\": {\n     \"key\": \"....\",\n     \"iv\": \"....\"\n   },\n   ...\n

    In the snippet of a policy shown above, a crypto element is is defined with a key and an initialization vector (iv). These two items are required to encrypt the sensitive information. To generate a key, run the following command:

    openssl enc -e -aes-256-cbc -a -salt -P\n

    You will be prompted to enter an encryption password. Once entered, the values of the key and iv will be shown. Copy and paste those values into the policy.

    An example policy using the CRYPTO_REPLACE filter strategy:

    {\n   \"name\": \"email-address\",\n   \"crypto\": {\n     \"key\": \"....\",\n     \"iv\": \"....\"\n   },\n   \"identifiers\": {\n      \"emailAddress\": {\n         \"emailAddressFilterStrategies\": [\n            {\n               \"strategy\": \"CRYPTO_REPLACE\"\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/filter_strategies/#the-hash_sha256_replace-filter-strategy-idhash","title":"The HASH_SHA256_REPLACE Filter Strategy {id=\"hash\"}","text":"

    The HASH_SHA256_REPLACE filter strategy replaces sensitive information with the SHA256 hash value of the sensitive information. To append a random salt value to each value prior to hashing, set the salt property to true. The salt value used will be returned in the explain response from Phileas' API.

    An example policy using the HASH_SHA256_REPLACE filter strategy:

    {\n   \"name\": \"email-address\",\n   \"identifiers\": {\n      \"emailAddress\": {\n         \"emailAddressFilterStrategies\": [\n            {\n               \"strategy\": \"HASH_SHA256_REPLACE\"\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/filter_strategies/#the-fpe_encrypt_replace-filter-strategy-idfpe","title":"The FPE_ENCRYPT_REPLACE Filter Strategy {id=\"fpe\"}","text":"

    The FPE_ENCRYPT_REPLACE filter strategy uses format-preserving encryption (FPE) to encrypt the sensitive information. Phileas uses the FF3-1 algorithm for format-preserving encryption. The FPE_ENCRYPT_REPLACE filter strategy requires a key and a tweak value. These values control the format-preserving encryption. For more information on these values and format-preserving encryption, refer to the resources below:

    • https://nvlpubs.nist.gov/nistpubs/SpecialPublications/NIST.SP.800-38Gr1-draft.pdf
    • https://nvlpubs.nist.gov/nistpubs/specialpublications/nist.sp.800-38g.pdf

    An example policy using the FPE_ENCRYPT_REPLACE filter strategy:

    {\n   \"name\": \"credit-cards\",\n   \"identifiers\": {\n      \"creditCardNumbers\": {\n         \"creditCardNumbersFilterStrategies\": [\n            {\n               \"strategy\": \"FPE_ENCRYPT_REPLACE\",\n               \"key\": \"...\",\n               \"tweak\": \"...\"\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/filter_strategies/#the-random_replace-filter-strategy-idrandom","title":"The RANDOM_REPLACE Filter Strategy {id=\"random\"}","text":"

    Replaces the identified text with a fake value but of the same type. For example, an SSN will be replaced by a random text having the format ###-##-####, such as 123-45-6789. An email address will be replaced with a randomly generated email address. Available to all filter types.

    An example policy using the RANDOM_REPLACE filter strategy:

    {\n   \"name\": \"email-address\",\n   \"identifiers\": {\n      \"emailAddress\": {\n         \"emailAddressFilterStrategies\": [\n            {\n               \"strategy\": \"RANDOM_REPLACE\"\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/filter_strategies/#the-static_replace-filter-strategy-idstatic","title":"The STATIC_REPLACE Filter Strategy {id=\"static\"}","text":"

    Replaces the identified text with a given static value. Available to all filter types.

    An example policy using the STATIC_REPLACE filter strategy:

    {\n   \"name\": \"email-address\",\n   \"identifiers\": {\n      \"emailAddress\": {\n         \"emailAddressFilterStrategies\": [\n            {\n               \"strategy\": \"STATIC_REPLACE\",\n               \"staticReplacement\": \"some new value\"\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/filter_strategies/#the-truncate-filter-strategy-idtruncate","title":"The TRUNCATE Filter Strategy {id=\"truncate\"}","text":"

    Available only to zip codes, this strategy allows for truncating zip codes to only a select number of digits. Specify truncateDigits to set the desired number of leading digits to leave. For example, if truncateDigits is 2, the zip code 90210 will be truncated to 90***.

    The TRUNCATE filter strategy is available only to the zip code filter. An example policy using the TRUNCATE filter strategy:

    {\n   \"name\": \"zip-codes\",\n   \"identifiers\": {\n      \"zipCode\": {\n         \"zipCodeFilterStrategies\": [\n            {\n               \"strategy\": \"TRUNCATE\",\n               \"truncateDigits\": 3\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/filter_strategies/#the-zero_leading-filter-strategy-idzero_leading","title":"The ZERO_LEADING Filter Strategy {id=\"zero_leading\"}","text":"

    Available only to zip codes, this strategy changes the first 3 digits of a zip code to be 0. For example, the zip code 90210 will be changed to 00010.

    The ZERO_LEADING filter strategy is only available to zip code filters. An example zip code filter using the ZERO_LEADING filter strategy:

    {\n   \"name\": \"zip-codes\",\n   \"identifiers\": {\n      \"zipCodes\": {\n         \"zipCodeFilterStrategies\": [\n            {\n               \"strategy\": \"ZERO_LEADING\"\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/filter_strategies/#filter-strategy-conditions","title":"Filter Strategy Conditions","text":"

    A replacement strategy can be applied based on the sensitive information meeting one or more conditions. For example, you can create a condition such that only dates of 11/05/2010 are replaced by using the condition token == \"11/05/2010\". The conditions that can be applied vary based on the type of sensitive information. For instance, zip codes can have conditions based on their population. Refer to each specific filter type for the conditions available.

    The following is an example policy for credit cards that contains a condition to only redact credit card numbers that start with the digits 3000:

    {\n  \"name\": \"default\",\n  \"identifiers\": {\n    \"creditCard\": {\n      \"creditCardFilterStrategies\": [\n        {\n          \"condition\": \"token startswith \\\"3000\\\"\",\n          \"strategy\": \"REDACT\",\n          \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n        }\n      ]\n    }\n  }\n}\n
    "},{"location":"policies/filter_strategies/#combining-conditions","title":"Combining Conditions","text":"

    Conditions can be joined through the use of the and keyword. When conditions are joined, each condition must be satisfied for the identified text to be filtered. If any of the conditions are not satisfied the identified text will not be filtered. Below is an example joined condition:

    token != \"123-45-6789\" and context == \"my-context\"\n

    This condition requires that the identified text (the token) not be equal to 123-45-6789 and the context be equal to my-context. Both of these conditions must be satisfied for the identified text to be filtered.

    Conversely, conditions can be OR'd through the use of multiple filter strategies. For example, if we want to OR a condition on the token and a condition on the context, we would use two filter strategies:

    \"ssnFilterStrategies\": [\n  {\n    \"condition\": \"token != \\\"123-45-6789\\\"\",\n    \"strategy\": \"REDACT\",\n    \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n  },\n  {\n    \"condition\": \"context == \\\"my-context\\\"\",\n    \"strategy\": \"REDACT\",\n    \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n  }        \n]\n
    "},{"location":"policies/filters/","title":"Filters","text":"

    A \"filter\" corresponds to a type of sensitive information. Phileas has filters for sensitive information such as names, addresses, ages, and lots of others.

    These are predefined filters that are ready to be used as well as custom filters that let you define your own Phileas to identify sensitive information outside of what the predefined filters can identify. An example of a custom filter is a filter to identify your patient account numbers, where the structure of an account number is specific to your organization.

    Each filter is capable of identifying and redacting a specific type of sensitive information. For example, there is a filter for phone numbers, a filter for US social security numbers, and a filter for person's names. You can enable any combination of these filters based on the types of sensitive information you need to redact.

    This section of the documentation describes the filters available in Phileas. The configuration options for each filter can vary due to the type of the sensitive information. For instance, only the zip code filter has a configuration to truncate the zip code.

    A selection of filters and their configurations is called a policy. A policy describes how to de-identify a document.

    "},{"location":"policies/filters/#predefined-filters","title":"Predefined Filters","text":""},{"location":"policies/filters/#persons-names","title":"Person's Names","text":"

    Phileas uses several methods to identify person's names.

    Type Description First Names Identifies common first names Surnames Identifies common surnames Person's Names (NER) Identifies full names using natural language processing analysis Physician's Names (NER) Identifies physican names using natural language processing analysis"},{"location":"policies/filters/#other-filters","title":"Other Filters","text":"Type Description Ages Identifies ages such as 3.5 years old Bank Routing Numbers Identifies bank routing numbers Bitcoin Addresses Identifies Bitcoin addresses such as 127NVqnjf8gB9BFAW2dnQeM6wqmy1gbGtv Cities Identifies common cities Counties Identifies common counties Credit Card Numbers Identifies VISA, American Express, MasterCard, and Discover credit card numbers Dates Identifies dates in many formats such as May 22, 1999 Driver's License Numbers Identifies driver's license numbers for all 50 US states Email Addresses Identifies email addresses Hospitals Identifies common hospital names Hospital Abreviations Identifies common hospitals by their name abbreviations IBAN Codes Identifies international bank account numbers IP Addresses Identifies IPv4 and IPv6 addresses MAC Addresses Identifies network MAC addresses Passport Numbers Identifies US passport numbers Phone Numbers Identifies phone numbers Phone Number Extensions Identifies phone numbers Sections Identifies sections in text denoted by SSNs and TINs Identifies US SSNs and TINs States Identifies US state names State Abbreviations Identifies US state names by their abbreviations Tracking Numbers Identifies UPS, FedEx, and USPS tracking numbers URLs Identifies URLs VINs Identifies vehicle identification numbers Zip Codes Identifies US zip codes"},{"location":"policies/filters/#custom-filter-types-of-sensitive-information","title":"Custom Filter Types of Sensitive Information","text":"

    In addition to the predefined types of sensitive information listed in the table above, you can also define your own types of sensitive information. Through custom identifiers and dictionaries, Phileas can identify many other types of information that may be sensitive in your use-case. For example, if you have patient identifiers that follow a pattern of AA-00000 you can define a custom identifier for this sensitive information.

    Phileas can be configured to look identify sensitive information based on custom dictionaries. When a term in the dictionary is found in the text, Phileas will treat the term as sensitive information and apply the given filter strategy.

    Custom dictionaries support fuzziness to accommodate for misspellings. The replacement strategy for a custom dictionary has a sensitivityLevel that controls the amount of allowed fuzziness.

    Type Description Custom Dictionaries Identifies sensitive information based on dictionary values. Custom Identifiers Identifies custom alphanumeric identifiers that may be used for medical record numbers, patient identifiers, account number, or other specific identifier."},{"location":"policies/ignoring_sensitive_information/","title":"Ignoring Sensitive Information","text":"

    Phileas can optionally ignore a list of terms and prevent those terms from being redacted. For example, if the name John Smith is being redacted and you do not want it to be redacted, you can add John Smith to an ignore list. Each time Phileas identifies sensitive information it will check the ignore lists to see if the sensitive information is to be ignored.

    Phileas can ignore terms and patterns per-policy, meaning each policy can have its own unique list of terms or patterns to ignore.

    "},{"location":"policies/ignoring_sensitive_information/#ignore-lists","title":"Ignore Lists","text":"

    Ignore lists can be specified at the policy level and/or for each filter in the policy. When set for the policy, the list of ignored terms will be applied to all filter types. When set for a filter, the list of ignored terms will be applied only to that filter.

    "},{"location":"policies/ignoring_sensitive_information/#ignore-list-for-a-policy","title":"Ignore List for a Policy","text":"

    In the policy shown below, an ignore list is set at the level of the policy. The terms specified in the list will be ignored for all filter types enabled in the policy. Only the terms property is required. The name and caseSensitive properties are optional.

    {\n   \"name\": \"example-policy\",\n   \"ignored\": [\n     {\n       \"name\": \"names to ignore\",\n       \"terms\": [\"john smith\", \"jane doe\"],\n       \"caseSensitive\": false\n     }\n   ],\n   \"identifiers\": {\n      \"emailAddress\": {\n         \"emailAddressFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n

    Terms to be ignored at the policy level can also be read from one or more files located on the local file system. The file must be formatted as one term per line.

    {\n   \"name\": \"example-policy\",\n   \"ignored\": [\n     {\n       \"name\": \"names to ignore\",\n       \"terms\": [\"john smith\", \"jane doe\"],\n       \"files\": [\"/tmp/names.txt\"]\n       \"caseSensitive\": false\n     }\n   ],   \n   \"identifiers\": {\n      \"emailAddress\": {\n         \"emailAddressFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/ignoring_sensitive_information/#ignore-list-for-a-filter","title":"Ignore List for a Filter","text":"

    In the policy shown below, an ignore list is set at the level of a filter. The terms specified in the list will be ignored only for that filter type. Each filter in a policy can have its own list of ignored terms. The terms listed will be ignored case-sensitive, meaning, \"John\" will be ignored if \"John\" is an ignored term but will not be ignored if \"john\" is an ignored term.

    {\n   \"name\": \"example-filter-profile\",\n   \"identifiers\": {\n      \"emailAddress\": {\n         \"ignored\": [\"john smith\", \"jane doe\"],\n         \"emailAddressFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/ignoring_sensitive_information/#ignoring-patterns","title":"Ignoring Patterns","text":"

    Phileas can ignore information based on a regular expression pattern. An example use of this feature is to ignore terms that are present in your text but are dynamic, such as logged timestamps. When using the date filter these timestamps may be identified as being sensitive but you do not want them redacted. With an ignore pattern we can ignore the logged timestamps.

    "},{"location":"policies/ignoring_sensitive_information/#ignore-patterns","title":"Ignore Patterns","text":"

    Ignore patterns can be specified at the policy level and/or at the level of each type of filter. When set at the policy level, the list of ignored patterns will be applied to all filter types. When set for an individual filter, the list of ignored patterns will be applied only to that filter.

    "},{"location":"policies/ignoring_sensitive_information/#ignore-patterns-for-a-policy","title":"Ignore Patterns for a Policy","text":"

    In the policy shown below, ignore patterns are set at the level of the policy. The patterns specified in the list will be ignored for all filter types enabled in the policy.

    {\n   \"name\": \"example-policy\",\n   \"ignoredPatterns\": [\n     {\n       \"name\": \"ignore-room-numbers\",\n       \"pattern\": \"Room [A-Z0-4]{4}\"\n     }\n   ],\n   \"identifiers\": {\n      \"emailAddress\": {\n         \"emailAddressFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/ignoring_sensitive_information/#ignore-patterns-for-a-filter","title":"Ignore Patterns for a Filter","text":"

    In the policy shown below, ignore patterns are set at the level of a filter. The patterns specified in the list will be ignored only for that filter type. Each filter in a policy can have its own list of ignored patterns.

    {\n   \"name\": \"example-policy\",\n   \"identifiers\": {\n      \"emailAddress\": {\n         \"ignoredPatterns\": [\n           {\n             \"name\": \"ignore-room-numbers\",\n             \"pattern\": \"Room [A-Z0-4]{4}\"\n           }\n         ],\n         \"emailAddressFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/sample_policies/","title":"Sample Policies","text":"

    This page lists some sample policies. You can use these policies either as-is or as starting points for customizing them to meet your specific de-identification needs.

    These policies are examples and not an exhaustive list of all the sensitive information Phileas can identify. Items from each of these policies can be combined to make policies to meet your use-cases.

    "},{"location":"policies/sample_policies/#email-addresses-and-phone-numbers","title":"Email Addresses and Phone Numbers","text":"

    This policy finds email addresses and phone numbers and redacts them with {{{REDACTED-email-address}}} and {{{REDACTED-phone-number}}}, respectively.

    {\n  \"name\": \"email-and-phone-numbers\",\n  \"identifiers\": {\n    \"emailAddress\": {\n      \"emailAddressFilterStrategies\": [\n        {\n          \"strategy\": \"REDACT\",\n          \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n        }\n      ]\n    },\n    \"phoneNumber\": {\n      \"phoneNumberFilterStrategies\": [\n        {\n          \"strategy\": \"REDACT\",\n          \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n        }\n      ]\n    }\n  }\n}\n
    "},{"location":"policies/sample_policies/#persons-names-and-ssns","title":"Persons Names and SSNs","text":"

    This policy finds persons names and SSNs and redacts them with {{{REDACTED-entity}}} and {{{REDACTED-ssn}}}, respectively.

    {\n  \"name\": \"persons-names-ssn\",\n  \"identifiers\": {\n    \"ner\": {\n      \"nerFilterStrategies\": [\n        {\n          \"strategy\": \"REDACT\",\n          \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n        }\n      ]\n    },\n    \"ssn\": {\n      \"ssnFilterStrategies\": [\n        {\n          \"strategy\": \"REDACT\",\n          \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n        }\n      ]\n    }\n  }\n}\n
    "},{"location":"policies/sample_policies/#dates-urls-and-vins","title":"Dates, URLs, and VINs","text":"

    This policy finds dates, URLs, and VINs. Dates and URLs are redacted with {{{REDACTED-date}}} and {{{REDACTED-url}}}, respectively. Each VIN number are replaced by a randomly generated VIN number.

    {\n  \"name\": \"dates-urls-vin\",\n  \"identifiers\": {\n    \"date\": {\n      \"dateFilterStrategies\": [\n        {\n          \"strategy\": \"REDACT\",\n          \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n        }\n      ]\n    },\n    \"url\": {\n      \"urlFilterStrategies\": [\n        {\n          \"strategy\": \"REDACT\",\n          \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n        }\n      ]\n    },\n    \"vin\": {\n      \"vinFilterStrategies\": [\n        {\n          \"strategy\": \"RANDOM_REPLACE\"\n        }\n      ]\n    }\n  }\n}\n
    "},{"location":"policies/sample_policies/#ip-addresses","title":"IP Addresses","text":"

    This policy finds IP addresses and replaces each identified IP address with the static text IP_ADDRESS as long as the IP address is not 127.0.0.1. (A condition on the filter strategy sets the IP address requirement.)

    {\n  \"name\": \"ip-addresses\",\n  \"identifiers\": {\n    \"ipAddress\": {\n      \"ipAddressFilterStrategies\": [\n        {\n          \"strategy\": \"STATIC_REPLACE\",\n          \"redactionFormat\": \"IP_ADDRESS\",\n          \"condition\": \"token != \\\"127.0.0.1\\\"\"\n        }\n      ]\n    }\n  }\n}\n
    "},{"location":"policies/sample_policies/#zip-codes","title":"Zip Codes","text":"

    This policy finds ZIP codes starting with 90 and truncates the zip code to just the first two digits.

    {\n  \"name\": \"zip-codes\",\n  \"identifiers\": {\n    \"creditCard\": {\n      \"creditCardFilterStrategies\": [\n        {\n          \"condition\": \"token startswith \\\"90\\\"\",\n          \"strategy\": \"TRUNCATE\",\n          \"truncateDigits\": 2\n        }\n      ]\n    }\n  }\n}\n
    "},{"location":"policies/sample_policies/#enable-text-splitting","title":"Enable Text Splitting","text":"

    This policy enables text splitting for input over 10,000 characters.

    {\n  \"name\": \"default-split-enabled\",\n  \"config\": {\n    \"splitting\": {\n      \"enabled\": true,\n      \"threshold\": 10000,\n      \"method\": \"newline\"\n    }\n  },\n  \"identifiers\": {\n    \"ssn\": {\n      \"ssnFilterStrategies\": [\n        {\n          \"strategy\": \"REDACT\",\n          \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n        }\n      ]\n    }\n  }\n}\n
    "},{"location":"policies/sample_policies/#globally-ignored-terms","title":"Globally Ignored Terms","text":"

    This policy has a list of globally ignored terms.

    {\n  \"name\": \"default-global-ignore\",\n  \"ignored\": [\n    {\n      \"name\": \"ignored credit cards\",\n      \"terms\": [\"4111111111111111\", \"0000000000000000\"]\n    }\n  ],\n  \"identifiers\": {\n    \"creditCard\": {\n      \"creditCardFilterStrategies\": [\n        {\n          \"strategy\": \"REDACT\",\n          \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n        }\n      ]\n    }\n  }\n}\n
    "},{"location":"policies/sample_policies/#generating-alerts","title":"Generating Alerts","text":"

    This policy generates an alert when a matching email address is identified.

    {\n  \"name\": \"email-address-alert\",\n  \"identifiers\": {\n    \"emailAddress\": {\n      \"emailAddressFilterStrategies\": [\n        {\n          \"strategy\": \"REDACT\",\n          \"redactionFormat\": \"{{{REDACTED-%t}}}\",\n          \"condition\": \"token == \\\"test@test.com\\\"\",\n          \"alert\": true\n        }\n      ]\n    }\n  }\n}\n
    "},{"location":"policies/splitting_input_text/","title":"Splitting Input Text","text":"

    On a per-policy basis, Philter can split input text to process each split individually. This can improve performance and allows for handling long input text. Splitting is disabled by default.

    An example split configuration in a policy is shown below

    {\n  \"name\": \"default\",\n  \"identifiers\": {}, \n  \"config\": {\n    \"splitting\": {\n      \"enabled\": true,\n      \"threshold\": 10000,\n      \"method\": \"newline\"\n    }\n  }\n}\n

    In this example policy, splitting is enabled for inputs greater than equal to 10,000 characters in length.

    The method of splitting the text will be the newline method. This method will cause Philter to split the text based on the locations of new line characters in the input text. Additional methods of text splitting may be added in future versions.

    Because the newline method splits text based on the locations of new line characters in the text, the text contained in the reassembled filter responses may not be an exact match of the input text. This is due to white space and other characters that may reside near the new line characters that get omitted during processing.

    "},{"location":"policies/splitting_input_text/#text-splitting-policy-properties","title":"Text Splitting Policy Properties","text":"Property Description Allowed Values Default Value enabled Whether or not input texts are split. Whether or not input texts are split. When false, requests with text exceeding the threshold generate a HTTP 413 PayloadTooLarge error response. true or false false threshold When to split the input text. Set to -1 to disable splitting. Any integer value. 10000 method How to split the text. newline newline"},{"location":"policies/splitting_input_text/#alternative-to-philter-splitting-text","title":"Alternative to Philter Splitting Text","text":"

    In some cases it may be best to split your input text client side prior to sending the text to Philter. This gives you full control over how the text will be split and provides more predictable responses from Philter because you know how the text is split.

    An example of splitting text into chunks prior to sending the text to Philter is given in the commands below:

    # Given a large file called largefile.txt, split it into 10k pieces.\n$ split -b 10k largefile.txt segment\n\n# Now process the pieces.\n$ curl -s -X POST -k \"https://philter:8080/api/filter?d=document1\" --data \"@/tmp/segmentaa\" -H \"Content-type: text/plain\" > out1\n$ curl -s -X POST -k \"https://philter:8080/api/filter?d=document1\" --data \"@/tmp/segmentab\" -H \"Content-type: text/plain\" > out2\n\n# Now recombine the outputs into a single file.\n$ cat out1 out2 > filtered.txt\n
    "},{"location":"policies/filters/common_filters/ages/","title":"Ages","text":""},{"location":"policies/filters/common_filters/ages/#filter","title":"Filter","text":"

    This filter identifies ages such as 3.5 years old in text.

    "},{"location":"policies/filters/common_filters/ages/#required-parameters","title":"Required Parameters","text":"

    This filter has no required parameters.

    "},{"location":"policies/filters/common_filters/ages/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value ageFilterStrategies A list of filter strategies. None enabled When set to false, the filter will be disabled and not applied true ignored A list of terms to be ignored by the filter. None"},{"location":"policies/filters/common_filters/ages/#filter-strategies","title":"Filter Strategies","text":"

    The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of REDACT is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.

    Strategy Description REDACT Replace the sensitive text with a placeholder. RANDOM_REPLACE Replace the sensitive text with a similar, random value. STATIC_REPLACE Replace the sensitive text with a given value. CRYPTO_REPLACE Replace the sensitive text with its encrypted value. HASH_SHA256_REPLACE Replace the sensitive text with its SHA256 hash value."},{"location":"policies/filters/common_filters/ages/#conditions","title":"Conditions","text":"

    Each filter strategy may have one condition. The filter will only be applied when the condition is satisfied. See Conditions for details.

    Conditional Description Operators TOKEN Compares the value of the sensitive text. == , != CONTEXT Compares the filtering context. == , != CONFIDENCE Compares the confidence in the sensitive text against a threshold value. < , <=, > , >=, ==, !="},{"location":"policies/filters/common_filters/ages/#example-policy","title":"Example Policy","text":"
    {\n   \"name\": \"ages-example\",\n   \"identifiers\": {\n      \"age\": {\n         \"ageFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/filters/common_filters/bank-routing-numbers/","title":"Bank Routing Numbers","text":""},{"location":"policies/filters/common_filters/bank-routing-numbers/#filter","title":"Filter","text":"

    This filter identifies bank routing numbers (ABA routing transit numbers) such as 111000025 in text. Identified routing numbers must pass checksum validation.

    "},{"location":"policies/filters/common_filters/bank-routing-numbers/#required-parameters","title":"Required Parameters","text":"

    This filter has no required parameters.

    "},{"location":"policies/filters/common_filters/bank-routing-numbers/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value bankRoutingNumberFilterStrategies A list of filter strategies. None enabled When set to false, the filter will be disabled and not applied true ignored A list of terms to be ignored by the filter. None"},{"location":"policies/filters/common_filters/bank-routing-numbers/#filter-strategies","title":"Filter Strategies","text":"

    The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of REDACT is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.

    Strategy Description REDACT Replace the sensitive text with a placeholder. RANDOM_REPLACE Replace the sensitive text with a similar, random value. STATIC_REPLACE Replace the sensitive text with a given value. CRYPTO_REPLACE Replace the sensitive text with its encrypted value. HASH_SHA256_REPLACE Replace the sensitive text with its SHA256 hash value. FPE_ENCRYPT_REPLACE Replace the sensitive text with a value generated by format-preserving encryption (FPE)"},{"location":"policies/filters/common_filters/bank-routing-numbers/#conditions","title":"Conditions","text":"

    Each filter strategy may have one condition. The filter will only be applied when the condition is satisfied. See Conditions for details.

    Conditional Description Operators TOKEN Compares the value of the sensitive text. == , != CONTEXT Compares the filtering context. == , != CONFIDENCE Compares the confidence in the sensitive text against a threshold value. < , <=, > , >=, ==, !="},{"location":"policies/filters/common_filters/bank-routing-numbers/#example-policy","title":"Example Policy","text":"
    {\n   \"name\": \"bank-routing-number-example\",\n   \"identifiers\": {\n      \"bankRoutingNumber\": {\n         \"bankRoutingNumberFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/filters/common_filters/bitcoin-addresses/","title":"Bitcoin Addresses","text":""},{"location":"policies/filters/common_filters/bitcoin-addresses/#filter","title":"Filter","text":"

    This filter identifies bitcoin addresses such as 1BvBMSEYstWetqTFn5Au4m4GFg7xJaNVN2 in text.

    "},{"location":"policies/filters/common_filters/bitcoin-addresses/#required-parameters","title":"Required Parameters","text":"

    This filter has no required parameters.

    "},{"location":"policies/filters/common_filters/bitcoin-addresses/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value bitcoinAddressFilterStrategies A list of filter strategies. None enabled When set to false, the filter will be disabled and not applied true ignored A list of terms to be ignored by the filter. None"},{"location":"policies/filters/common_filters/bitcoin-addresses/#filter-strategies","title":"Filter Strategies","text":"

    The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of REDACT is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.

    Strategy Description REDACT Replace the sensitive text with a placeholder. RANDOM_REPLACE Replace the sensitive text with a similar, random value. STATIC_REPLACE Replace the sensitive text with a given value. CRYPTO_REPLACE Replace the sensitive text with its encrypted value. HASH_SHA256_REPLACE Replace the sensitive text with its SHA256 hash value. FPE_ENCRYPT_REPLACE Replace the sensitive text with a value generated by format-preserving encryption (FPE)"},{"location":"policies/filters/common_filters/bitcoin-addresses/#conditions","title":"Conditions","text":"

    Each filter strategy may have one condition. See Conditions for details.

    Conditional Description Operators TOKEN Compares the value of the sensitive text. == , != CONTEXT Compares the filtering context. == , != CONFIDENCE Compares the confidence in the sensitive text against a threshold value. < , <=, > , >=, ==, !="},{"location":"policies/filters/common_filters/bitcoin-addresses/#example-policy","title":"Example Policy","text":"
    {\n   \"name\": \"bitcoin-address-example\",\n   \"identifiers\": {\n      \"bitcoinAddress\": {\n         \"bitcoinAddressFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/filters/common_filters/creditcards/","title":"Credit Cards","text":""},{"location":"policies/filters/common_filters/creditcards/#filter","title":"Filter","text":"

    This filter identifies credit cards such as 378282246310005 in text.

    "},{"location":"policies/filters/common_filters/creditcards/#required-parameters","title":"Required Parameters","text":"

    This filter has no required parameters.

    "},{"location":"policies/filters/common_filters/creditcards/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value creditCardFilterStrategies A list of filter strategies. None enabled When set to false, the filter will be disabled and not applied true ignored A list of terms to be ignored by the filter. None onlyValidCreditCardNumbers When set to true, only valid credit card numbers will be filtered. true ignoreWhenInUnixTimestamp When set to true, only credit card numbers that do not match the pattern for a Unix timestamp will be filtered. false"},{"location":"policies/filters/common_filters/creditcards/#filter-strategies","title":"Filter Strategies","text":"

    The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of REDACT is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.

    Strategy Description REDACT Replace the sensitive text with a placeholder. RANDOM_REPLACE Replace the sensitive text with a similar, random value. STATIC_REPLACE Replace the sensitive text with a given value. CRYPTO_REPLACE Replace the sensitive text with its encrypted value. HASH_SHA256_REPLACE Replace the sensitive text with its SHA256 hash value. FPE_ENCRYPT_REPLACE Replace the sensitive text with a value generated by format-preserving encryption (FPE) LAST_4 Replace the sensitive text with just the last four characters of the text."},{"location":"policies/filters/common_filters/creditcards/#conditions","title":"Conditions","text":"

    Each filter strategy may have one condition. See Conditions for details.

    Conditional Description Operators TOKEN Compares the value of the sensitive text. == , != CONTEXT Compares the filtering context. == , != CONFIDENCE Compares the confidence in the sensitive text against a threshold value. < , <=, > , >=, ==, !="},{"location":"policies/filters/common_filters/creditcards/#example-policy","title":"Example Policy","text":"
    {\n   \"name\": \"credit-cards-example\",\n   \"identifiers\": {\n      \"creditcard\": {\n         \"onlyValidCreditCardNumbers\": false,\n         \"creditCardFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/filters/common_filters/dates/","title":"Dates","text":""},{"location":"policies/filters/common_filters/dates/#filter","title":"Filter","text":"

    This filter identifies dates such as May 22, 2014 in text. The supported date formats are:

    Format Example yyyy-MM-d 2020-05-10 MM-dd-yyyy 05-10-2020 M-d-y 5-10-2020 MMM dd May 5 or May 05 MMMM dd, yyyy May 5, 2020 or May 5 2020"},{"location":"policies/filters/common_filters/dates/#required-parameters","title":"Required Parameters","text":"

    This filter has no required parameters.

    "},{"location":"policies/filters/common_filters/dates/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value dateFilterStrategies A list of filter strategies. None enabled When set to false, the filter will be disabled and not applied true ignored A list of terms to be ignored by the filter. None onlyValidDates When set to true, only valid dates will be filtered. false"},{"location":"policies/filters/common_filters/dates/#filter-strategies","title":"Filter Strategies","text":"

    The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of REDACT is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.

    Strategy Description REDACT Replace the sensitive text with a placeholder. RANDOM_REPLACE Replace the sensitive text with a similar, random value. STATIC_REPLACE Replace the sensitive text with a given value. CRYPTO_REPLACE Replace the sensitive text with its encrypted value. HASH_SHA256_REPLACE Replace the sensitive text with its SHA256 hash value. SHIFT Shift the date by a number of months, days, and/or years. SHIFTRANDOM Shift the data by a random number of months, days, and years. RELATIVE Replace the date by a words relative to the date."},{"location":"policies/filters/common_filters/dates/#filter-strategy-options","title":"Filter Strategy Options","text":"

    The following filter strategy options are available for the RELATIVE filter strategy.

    Description Default Value futureDates When true, future dates are replaced by relative words. When false, future dates are redacted. false

    The following filter strategy options are available for the SHIFT filter strategy.

    Option Description Default Value shiftDays The number of days to shift the date. Can be a negative or positive integer. Defaults to 0 if not specified. 0 shiftMinutes The number of minutes to shift the date. Can be a negative or positive integer. Defaults to 0 if not specified. 0 shiftYears The number of years to shift the date. Can be a negative or positive integer. Defaults to 0 if not specified. 0"},{"location":"policies/filters/common_filters/dates/#conditions","title":"Conditions","text":"

    Each filter strategy may have one condition. See Conditions for details.

    Conditional Description Operators TOKEN Compares the value of the sensitive text. == , != TOKEN Compares the sensitive text to some category, e.g. birthdate. is CONTEXT Compares the filtering context. == , != CONFIDENCE Compares the confidence in the sensitive text against a threshold value. < , <=, > , >=, ==, !="},{"location":"policies/filters/common_filters/dates/#differentiating-between-dates-and-birth-dates","title":"Differentiating Between Dates and Birth Dates","text":"

    In some cases it may be necessary to redact birth dates and dates differently. Using conditions it is possible to determine if an identified date is a birth date. The conditional token is birthdate will determine if the identified date (token) is a birth date by analyzing the content surrounding the date.

    "},{"location":"policies/filters/common_filters/dates/#example-policy-to-redact-dates","title":"Example Policy to Redact Dates","text":"

    The following policy redacts dates.

    {\n   \"name\": \"dates-example\",\n   \"identifiers\": {\n      \"date\": {\n         \"onlyValidDates\": false,\n         \"dateFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/filters/common_filters/dates/#example-policy-to-shift-dates","title":"Example Policy to Shift Dates","text":"

    The following policy to shift dates forward by 2 days and 4 months.

    {\n   \"name\": \"dates-example\",\n   \"identifiers\": {\n      \"date\": {\n         \"onlyValidDates\": false,\n         \"dateFilterStrategies\": [\n            {\n               \"strategy\": \"SHIFT\",\n               \"shiftDays\": 2,\n               \"shiftMonths\": 4,\n               \"shiftYears\": 0\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/filters/common_filters/drivers-license-numbers/","title":"Driver's License Numbers","text":""},{"location":"policies/filters/common_filters/drivers-license-numbers/#filter","title":"Filter","text":"

    This filter identifies driver's license numbers such as 194784357 in text. Driver's license number formats for all 50 US states are supported.

    "},{"location":"policies/filters/common_filters/drivers-license-numbers/#required-parameters","title":"Required Parameters","text":"

    This filter has no required parameters.

    "},{"location":"policies/filters/common_filters/drivers-license-numbers/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value driversLicenseFilterStrategies A list of filter strategies. None enabled When set to false, the filter will be disabled and not applied true ignored A list of terms to be ignored by the filter. None"},{"location":"policies/filters/common_filters/drivers-license-numbers/#filter-strategies","title":"Filter Strategies","text":"

    The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of REDACT is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.

    Strategy Description REDACT Replace the sensitive text with a placeholder. RANDOM_REPLACE Replace the sensitive text with a similar, random value. STATIC_REPLACE Replace the sensitive text with a given value. CRYPTO_REPLACE Replace the sensitive text with its encrypted value. HASH_SHA256_REPLACE Replace the sensitive text with its SHA256 hash value. FPE_ENCRYPT_REPLACE Replace the sensitive text with a value generated by format-preserving encryption (FPE)"},{"location":"policies/filters/common_filters/drivers-license-numbers/#conditions","title":"Conditions","text":"

    Each filter strategy may have one condition. See Conditions for details.

    Conditional Description Operators TOKEN Compares the value of the sensitive text. == , != CONTEXT Compares the filtering context. == , != CONFIDENCE Compares the confidence in the sensitive text against a threshold value. < , <=, > , >=, ==, !="},{"location":"policies/filters/common_filters/drivers-license-numbers/#example-policy","title":"Example Policy","text":"
    {\n   \"name\": \"drivers-license-example\",\n   \"identifiers\": {\n      \"driversLicense\": {\n         \"driversLicenseFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/filters/common_filters/email-addresses/","title":"Email Addresses","text":""},{"location":"policies/filters/common_filters/email-addresses/#filter","title":"Filter","text":"

    This filter identifies email addresses such as john.fake.address@hotmail.com in text.

    "},{"location":"policies/filters/common_filters/email-addresses/#required-parameters","title":"Required Parameters","text":"

    This filter has no required parameters.

    "},{"location":"policies/filters/common_filters/email-addresses/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value emailAddressFilterStrategies A list of filter strategies. None enabled When set to false, the filter will be disabled and not applied true ignored A list of terms to be ignored by the filter. None onlyStrictMatches When set to false, the pattern for identifying email addresses will be relaxed. Filtered email addresses will have a lower confidence, but filter performance will increase. true onlyValidTLDs When set to true, only email addresses that are for a top-level domain are filtered. false"},{"location":"policies/filters/common_filters/email-addresses/#filter-strategies","title":"Filter Strategies","text":"

    The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of REDACT is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.

    Strategy Description REDACT Replace the sensitive text with a placeholder. RANDOM_REPLACE Replace the sensitive text with a similar, random value. STATIC_REPLACE Replace the sensitive text with a given value. CRYPTO_REPLACE Replace the sensitive text with its encrypted value. HASH_SHA256_REPLACE Replace the sensitive text with its SHA256 hash value."},{"location":"policies/filters/common_filters/email-addresses/#conditions","title":"Conditions","text":"

    Each filter strategy may have one condition. See Conditions for details.

    Conditional Description Operators TOKEN Compares the value of the sensitive text. == , != CONTEXT Compares the filtering context. == , != CONFIDENCE Compares the confidence in the sensitive text against a threshold value. < , <=, > , >=, ==, !="},{"location":"policies/filters/common_filters/email-addresses/#example-policy","title":"Example Policy","text":"
    {\n   \"name\": \"email-address-example\",\n   \"identifiers\": {\n      \"emailAddress\": {\n         \"emailAddressFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/filters/common_filters/iban-codes/","title":"IBAN Codes","text":""},{"location":"policies/filters/common_filters/iban-codes/#filter","title":"Filter","text":"

    This filter identifies IBAN (international banking account numbers) Codes such as HU4211773016111110180000000 in text. Driver's license number formats for all 50 US states are supported.

    "},{"location":"policies/filters/common_filters/iban-codes/#required-parameters","title":"Required Parameters","text":"

    This filter has no required parameters.

    "},{"location":"policies/filters/common_filters/iban-codes/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value allowSpaces When true, IBAN codes will be allowed to contain spaces and grouped in sections of 4. Set to false to disallow spaces in IBAN codes. true ibanCodeFilterStrategies A list of filter strategies. None enabled When set to false, the filter will be disabled and not applied true ignored A list of terms to be ignored by the filter. None onlyValidIBANCodes When set to true, only valid IBAN codes will be filtered. true"},{"location":"policies/filters/common_filters/iban-codes/#filter-strategies","title":"Filter Strategies","text":"

    The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of REDACT is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.

    Strategy Description REDACT Replace the sensitive text with a placeholder. RANDOM_REPLACE Replace the sensitive text with a similar, random value. STATIC_REPLACE Replace the sensitive text with a given value. CRYPTO_REPLACE Replace the sensitive text with its encrypted value. HASH_SHA256_REPLACE Replace the sensitive text with its SHA256 hash value. FPE_ENCRYPT_REPLACE Replace the sensitive text with a value generated by format-preserving encryption (FPE) LAST_4 Replace the sensitive text with just the last four characters of the text."},{"location":"policies/filters/common_filters/iban-codes/#conditions","title":"Conditions","text":"

    Each filter strategy may have one condition. See Conditions for details.

    Conditional Description Operators TOKEN Compares the value of the sensitive text. == , != CONTEXT Compares the filtering context. == , != CONFIDENCE Compares the confidence in the sensitive text against a threshold value. < , <=, > , >=, ==, !="},{"location":"policies/filters/common_filters/iban-codes/#example-policy","title":"Example Policy","text":"
    {\n   \"name\": \"iban-example\",\n   \"identifiers\": {\n      \"ibanCode\": {\n         \"onlyValidIBANCodes\": false,\n         \"ibanCodeFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/filters/common_filters/ip-addresses/","title":"IP Addresses","text":""},{"location":"policies/filters/common_filters/ip-addresses/#filter","title":"Filter","text":"

    This filter identifies IPv4 and IPv6 addresses 127.0.0.1, 192.168.3.58, and 2001:0db8:85a3:0000:0000:8a2e:0370:7334 in text.

    "},{"location":"policies/filters/common_filters/ip-addresses/#required-parameters","title":"Required Parameters","text":"

    This filter has no required parameters.

    "},{"location":"policies/filters/common_filters/ip-addresses/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value ipAddressFilterStrategies A list of filter strategies. None enabled When set to false, the filter will be disabled and not applied true ignored A list of terms to be ignored by the filter. None"},{"location":"policies/filters/common_filters/ip-addresses/#filter-strategies","title":"Filter Strategies","text":"

    The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of REDACT is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.

    Strategy Description REDACT Replace the sensitive text with a placeholder. RANDOM_REPLACE Replace the sensitive text with a similar, random value. STATIC_REPLACE Replace the sensitive text with a given value. CRYPTO_REPLACE Replace the sensitive text with its encrypted value. HASH_SHA256_REPLACE Replace the sensitive text with its SHA256 hash value."},{"location":"policies/filters/common_filters/ip-addresses/#conditions","title":"Conditions","text":"

    Each filter strategy may have one condition. See Conditions for details.

    Conditional Description Operators TOKEN Compares the value of the sensitive text. == , != CONTEXT Compares the filtering context. == , != CONFIDENCE Compares the confidence in the sensitive text against a threshold value. < , <=, > , >=, ==, !="},{"location":"policies/filters/common_filters/ip-addresses/#example-policy","title":"Example Policy","text":"
    {\n   \"name\": \"ip-address-example\",\n   \"identifiers\": {\n      \"ipAddress\": {\n         \"ipAddressFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/filters/common_filters/mac-addresses/","title":"MAC Addresses","text":""},{"location":"policies/filters/common_filters/mac-addresses/#filter","title":"Filter","text":"

    This filter identifies MAC addresses in text.

    "},{"location":"policies/filters/common_filters/mac-addresses/#required-parameters","title":"Required Parameters","text":"

    This filter has no required parameters.

    "},{"location":"policies/filters/common_filters/mac-addresses/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value macAddressFilterStrategies A list of filter strategies. None enabled When set to false, the filter will be disabled and not applied true ignored A list of terms to be ignored by the filter. None"},{"location":"policies/filters/common_filters/mac-addresses/#filter-strategies","title":"Filter Strategies","text":"

    The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of REDACT is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.

    Strategy Description REDACT Replace the sensitive text with a placeholder. RANDOM_REPLACE Replace the sensitive text with a similar, random value. STATIC_REPLACE Replace the sensitive text with a given value. CRYPTO_REPLACE Replace the sensitive text with its encrypted value. HASH_SHA256_REPLACE Replace the sensitive text with its SHA256 hash value."},{"location":"policies/filters/common_filters/mac-addresses/#conditions","title":"Conditions","text":"

    Each filter strategy may have one condition. See Conditions for details.

    Conditional Description Operators TOKEN Compares the value of the sensitive text. == , != CONTEXT Compares the filtering context. == , != CONFIDENCE Compares the confidence in the sensitive text against a threshold value. < , <=, > , >=, ==, !="},{"location":"policies/filters/common_filters/mac-addresses/#example-policy","title":"Example Policy","text":"
    {\n   \"name\": \"mac-address-example\",\n   \"identifiers\": {\n      \"macAddress\": {\n         \"macAddressFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/filters/common_filters/passport-numbers/","title":"Passport Numbers","text":""},{"location":"policies/filters/common_filters/passport-numbers/#filter","title":"Filter","text":"

    This filter identifies US passport numbers in text.

    "},{"location":"policies/filters/common_filters/passport-numbers/#required-parameters","title":"Required Parameters","text":"

    This filter has no required parameters.

    "},{"location":"policies/filters/common_filters/passport-numbers/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value passportNumberFilterStrategies A list of filter strategies. None enabled When set to false, the filter will be disabled and not applied true ignored A list of terms to be ignored by the filter. None"},{"location":"policies/filters/common_filters/passport-numbers/#filter-strategies","title":"Filter Strategies","text":"

    The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of REDACT is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.

    Strategy Description REDACT Replace the sensitive text with a placeholder. RANDOM_REPLACE Replace the sensitive text with a similar, random value. STATIC_REPLACE Replace the sensitive text with a given value. CRYPTO_REPLACE Replace the sensitive text with its encrypted value. HASH_SHA256_REPLACE Replace the sensitive text with its SHA256 hash value. FPE_ENCRYPT_REPLACE Replace the sensitive text with a value generated by format-preserving encryption (FPE)"},{"location":"policies/filters/common_filters/passport-numbers/#conditions","title":"Conditions","text":"

    Each filter strategy may have one condition. See Conditions for details.

    Conditional Description Operators TOKEN Compares the value of the sensitive text. == , != CLASSIFICATION Compares the issuing country of the passport number. == , != CONTEXT Compares the filtering context. == , != CONFIDENCE Compares the confidence in the sensitive text against a threshold value. < , <=, > , >=, ==, !="},{"location":"policies/filters/common_filters/passport-numbers/#example-policy","title":"Example Policy","text":"
    {\n   \"name\": \"passport-number-example\",\n   \"identifiers\": {\n      \"passportNumber\": {\n         \"passportNumberFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/filters/common_filters/phone-number-extensions/","title":"Phone Number Extensions","text":""},{"location":"policies/filters/common_filters/phone-number-extensions/#filter","title":"Filter","text":"

    This filter identifies phone numbers extensions such as \"x100\" in text.

    "},{"location":"policies/filters/common_filters/phone-number-extensions/#required-parameters","title":"Required Parameters","text":"

    This filter has no required parameters.

    "},{"location":"policies/filters/common_filters/phone-number-extensions/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value phoneNumberExtensionFilterStrategies A list of filter strategies. None enabled When set to false, the filter will be disabled and not applied true ignored A list of terms to be ignored by the filter. None"},{"location":"policies/filters/common_filters/phone-number-extensions/#filter-strategies","title":"Filter Strategies","text":"

    The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of REDACT is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.

    Strategy Description REDACT Replace the sensitive text with a placeholder. RANDOM_REPLACE Replace the sensitive text with a similar, random value. STATIC_REPLACE Replace the sensitive text with a given value. CRYPTO_REPLACE Replace the sensitive text with its encrypted value. HASH_SHA256_REPLACE Replace the sensitive text with its SHA256 hash value."},{"location":"policies/filters/common_filters/phone-number-extensions/#conditions","title":"Conditions","text":"

    Each filter strategy may have one condition. See Conditions for details.

    Conditional Description Operators TOKEN Compares the value of the sensitive text. == , != CONTEXT Compares the filtering context. == , != CONFIDENCE Compares the confidence in the sensitive text against a threshold value. < , <=, > , >=, ==, !="},{"location":"policies/filters/common_filters/phone-number-extensions/#example-policy","title":"Example Policy","text":"
    {\n   \"name\": \"phone-number-ext-example\",\n   \"identifiers\": {\n      \"phoneNumberExtension\": {\n         \"phoneNumberExtensionFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      } \n   }     \n}\n
    "},{"location":"policies/filters/common_filters/phone-numbers/","title":"Phone Numbers","text":""},{"location":"policies/filters/common_filters/phone-numbers/#filter","title":"Filter","text":"

    This filter identifies phone and fax numbers such as (304) 555-5555, 304-555-5555, and 1-800-123-4567 in text.

    "},{"location":"policies/filters/common_filters/phone-numbers/#required-parameters","title":"Required Parameters","text":"

    This filter has no required parameters.

    "},{"location":"policies/filters/common_filters/phone-numbers/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value phoneNumberFilterStrategies A list of filter strategies. None enabled When set to false, the filter will be disabled and not applied true ignored A list of terms to be ignored by the filter. None"},{"location":"policies/filters/common_filters/phone-numbers/#filter-strategies","title":"Filter Strategies","text":"

    The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of REDACT is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.

    Strategy Description REDACT Replace the sensitive text with a placeholder. RANDOM_REPLACE Replace the sensitive text with a similar, random value. STATIC_REPLACE Replace the sensitive text with a given value. CRYPTO_REPLACE Replace the sensitive text with its encrypted value. HASH_SHA256_REPLACE Replace the sensitive text with its SHA256 hash value."},{"location":"policies/filters/common_filters/phone-numbers/#conditions","title":"Conditions","text":"

    Each filter strategy may have one condition. See Conditions for details.

    Conditional Description Operators TOKEN Compares the value of the sensitive text. == , != CONTEXT Compares the filtering context. == , != CONFIDENCE Compares the confidence in the sensitive text against a threshold value. < , <=, > , >=, ==, !="},{"location":"policies/filters/common_filters/phone-numbers/#example-policy","title":"Example Policy","text":"
    {\n   \"name\": \"phone-number-example\",\n   \"identifiers\": {\n      \"phoneNumber\": {\n         \"phoneNumberFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }     \n}\n
    "},{"location":"policies/filters/common_filters/sections/","title":"Sections","text":""},{"location":"policies/filters/common_filters/sections/#filter","title":"Filter","text":"

    This filter identifies sections in text between a given start regular expression pattern and a given end regular expression pattern.

    "},{"location":"policies/filters/common_filters/sections/#required-parameters","title":"Required Parameters","text":"Parameter Description Default Value startPattern A regular expression denoting the start of the section. None endPattern A regular expression denoting the end of the section. None"},{"location":"policies/filters/common_filters/sections/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value sectionFilterStrategies A list of filter strategies. None enabled When set to false, the filter will be disabled and not applied true ignored A list of terms to be ignored by the filter. None"},{"location":"policies/filters/common_filters/sections/#filter-strategies","title":"Filter Strategies","text":"

    The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of REDACT is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.

    Strategy Description REDACT Replace the sensitive text with a placeholder. RANDOM_REPLACE Replace the sensitive text with a similar, random value. STATIC_REPLACE Replace the sensitive text with a given value. CRYPTO_REPLACE Replace the sensitive text with its encrypted value. HASH_SHA256_REPLACE Replace the sensitive text with its SHA256 hash value."},{"location":"policies/filters/common_filters/sections/#conditions","title":"Conditions","text":"

    Each filter strategy may have one condition. See Conditions for details.

    Conditional Description Operators TOKEN Compares the value of the sensitive text. == , != CONTEXT Compares the filtering context. == , != CONFIDENCE Compares the confidence in the sensitive text against a threshold value. < , <=, > , >=, ==, !="},{"location":"policies/filters/common_filters/sections/#example-policy","title":"Example Policy","text":"
    {\n   \"name\": \"sections-example\",\n   \"identifiers\": {\n      \"section\": {\n         \"startPattern\": \"START\",\n         \"endPattern\": \"END\",\n         \"sectionFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n}\n
    "},{"location":"policies/filters/common_filters/ssns-and-tins/","title":"SSNs and TINs","text":""},{"location":"policies/filters/common_filters/ssns-and-tins/#filter","title":"Filter","text":"

    This filter identifies US SSNs and TINs such as 123-45-6789 and 123456789 in text.

    "},{"location":"policies/filters/common_filters/ssns-and-tins/#required-parameters","title":"Required Parameters","text":"

    This filter has no required parameters.

    "},{"location":"policies/filters/common_filters/ssns-and-tins/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value ssnFilterStrategies A list of filter strategies. None enabled When set to false, the filter will be disabled and not applied true ignored A list of terms to be ignored by the filter. None"},{"location":"policies/filters/common_filters/ssns-and-tins/#filter-strategies","title":"Filter Strategies","text":"

    The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of REDACT is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.

    Strategy Description REDACT Replace the sensitive text with a placeholder. RANDOM_REPLACE Replace the sensitive text with a similar, random value. STATIC_REPLACE Replace the sensitive text with a given value. CRYPTO_REPLACE Replace the sensitive text with its encrypted value. HASH_SHA256_REPLACE Replace the sensitive text with its SHA256 hash value. FPE_ENCRYPT_REPLACE Replace the sensitive text with a value generated by format-preserving encryption (FPE) LAST_4 Replace the sensitive text with just the last four characters of the text."},{"location":"policies/filters/common_filters/ssns-and-tins/#conditions","title":"Conditions","text":"

    Each filter strategy may have one condition. See Conditions for details.

    Conditional Description Operators TOKEN Compares the value of the sensitive text. == , != CONTEXT Compares the filtering context. == , != CONFIDENCE Compares the confidence in the sensitive text against a threshold value. < , <=, > , >=, ==, !="},{"location":"policies/filters/common_filters/ssns-and-tins/#example-policy","title":"Example Policy","text":"
    {\n   \"name\": \"ssn-tin-example\",\n   \"identifiers\": {\n      \"ssn\": {\n         \"ssnFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/filters/common_filters/tracking-numbers/","title":"Tracking Numbers","text":""},{"location":"policies/filters/common_filters/tracking-numbers/#filter","title":"Filter","text":"

    This filter identifies tracking numbers in text. FedEx, UPS, and USPS tracking number formats are supported.

    "},{"location":"policies/filters/common_filters/tracking-numbers/#required-parameters","title":"Required Parameters","text":"

    This filter has no required parameters.

    "},{"location":"policies/filters/common_filters/tracking-numbers/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value trackingNumberFilterStrategies A list of filter strategies. None enabled When set to false, the filter will be disabled and not applied true ignored A list of terms to be ignored by the filter. None"},{"location":"policies/filters/common_filters/tracking-numbers/#filter-strategies","title":"Filter Strategies","text":"

    The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of REDACT is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.

    Strategy Description REDACT Replace the sensitive text with a placeholder. RANDOM_REPLACE Replace the sensitive text with a similar, random value. STATIC_REPLACE Replace the sensitive text with a given value. CRYPTO_REPLACE Replace the sensitive text with its encrypted value. HASH_SHA256_REPLACE Replace the sensitive text with its SHA256 hash value. FPE_ENCRYPT_REPLACE Replace the sensitive text with a value generated by format-preserving encryption (FPE) LAST_4 Replace the sensitive text with just the last four characters of the text."},{"location":"policies/filters/common_filters/tracking-numbers/#conditions","title":"Conditions","text":"

    Each filter strategy may have one condition. See Conditions for details.

    Conditional Description Operators TOKEN Compares the value of the sensitive text. == , != CONTEXT Compares the filtering context. == , != CONFIDENCE Compares the confidence in the sensitive text against a threshold value. < , <=, > , >=, ==, !="},{"location":"policies/filters/common_filters/tracking-numbers/#example-policy","title":"Example Policy","text":"
    {\n   \"name\": \"tracking-numbers-example\",\n   \"identifiers\": {\n      \"trackingNumber\": {\n         \"trackingNumberFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/filters/common_filters/urls/","title":"URLs","text":""},{"location":"policies/filters/common_filters/urls/#filter","title":"Filter","text":"

    This filter identifies URLs such as myhomepage.com, http://myhomepage.com/folder/page.html, and www.myhomepage.com/folder/page.html in text.

    "},{"location":"policies/filters/common_filters/urls/#required-parameters","title":"Required Parameters","text":"

    This filter has no required parameters.

    "},{"location":"policies/filters/common_filters/urls/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value urlFilterStrategies A list of filter strategies. None enabled When set to false, the filter will be disabled and not applied true ignored A list of terms to be ignored by the filter. None requireHttpWwwPrefix When set to true, only URLs that begin with http or www will be filtered. true"},{"location":"policies/filters/common_filters/urls/#filter-strategies","title":"Filter Strategies","text":"

    The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of REDACT is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.

    Strategy Description REDACT Replace the sensitive text with a placeholder. RANDOM_REPLACE Replace the sensitive text with a similar, random value. STATIC_REPLACE Replace the sensitive text with a given value. CRYPTO_REPLACE Replace the sensitive text with its encrypted value. HASH_SHA256_REPLACE Replace the sensitive text with its SHA256 hash value."},{"location":"policies/filters/common_filters/urls/#conditions","title":"Conditions","text":"

    Each filter strategy may have one condition. See Conditions for details.

    Conditional Description Operators TOKEN Compares the value of the sensitive text. == , != CONTEXT Compares the filtering context. == , != CONFIDENCE Compares the confidence in the sensitive text against a threshold value. < , <=, > , >=, ==, !="},{"location":"policies/filters/common_filters/urls/#example-policy","title":"Example Policy","text":"
    {\n   \"name\": \"urls-example\",\n   \"identifiers\": {\n      \"url\": {\n         \"requireHttpWwwPrefix\": true,\n         \"urlFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/filters/common_filters/vins/","title":"VINs","text":""},{"location":"policies/filters/common_filters/vins/#filter","title":"Filter","text":"

    This filter identifies 17-digit vehicle identification numbers (VINs) such as WBAPM7G50ANL19218 and 1GBJC34K3RE176005 in text.

    "},{"location":"policies/filters/common_filters/vins/#required-parameters","title":"Required Parameters","text":"

    This filter has no required parameters.

    "},{"location":"policies/filters/common_filters/vins/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value vinFilterStrategies A list of filter strategies. None enabled When set to false, the filter will be disabled and not applied true ignored A list of terms to be ignored by the filter. None"},{"location":"policies/filters/common_filters/vins/#filter-strategies","title":"Filter Strategies","text":"

    The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of REDACT is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.

    Strategy Description REDACT Replace the sensitive text with a placeholder. RANDOM_REPLACE Replace the sensitive text with a similar, random value. STATIC_REPLACE Replace the sensitive text with a given value. CRYPTO_REPLACE Replace the sensitive text with its encrypted value. HASH_SHA256_REPLACE Replace the sensitive text with its SHA256 hash value. FPE_ENCRYPT_REPLACE Replace the sensitive text with a value generated by format-preserving encryption (FPE) LAST_4 Replace the sensitive text with just the last four characters of the text."},{"location":"policies/filters/common_filters/vins/#conditions","title":"Conditions","text":"

    Each filter strategy may have one condition. See Conditions for details.

    Conditional Description Operators TOKEN Compares the value of the sensitive text. == , != CONTEXT Compares the filtering context. == , != CONFIDENCE Compares the confidence in the sensitive text against a threshold value. < , <=, > , >=, ==, !="},{"location":"policies/filters/common_filters/vins/#example-policy","title":"Example Policy","text":"
    {\n   \"name\": \"vins-example\",\n   \"identifiers\": {\n      \"vin\": {\n         \"vinFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/filters/common_filters/zip-codes/","title":"Zip Codes","text":""},{"location":"policies/filters/common_filters/zip-codes/#filter","title":"Filter","text":"

    This filter identifies zip codes in text.

    "},{"location":"policies/filters/common_filters/zip-codes/#required-parameters","title":"Required Parameters","text":"

    This filter has no required parameters.

    "},{"location":"policies/filters/common_filters/zip-codes/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value zipCodeFilterStrategies A list of filter strategies. None enabled When set to false, the filter will be disabled and not applied true ignored A list of terms to be ignored by the filter. None requireDelimiter When set to false, the filter will not require a dash in 9 digit zip codes, e.g. 12345-6789. Setting to false may increase the number of zip code false positives. true"},{"location":"policies/filters/common_filters/zip-codes/#filter-strategies","title":"Filter Strategies","text":"

    The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of REDACT is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.

    Strategy Description REDACT Replace the sensitive text with a placeholder. RANDOM_REPLACE Replace the sensitive text with a similar, random value. STATIC_REPLACE Replace the sensitive text with a given value. CRYPTO_REPLACE Replace the sensitive text with its encrypted value. HASH_SHA256_REPLACE Replace the sensitive text with its SHA256 hash value. TRUNCATE Replace the sensitive text by removing the last x digits. (Set the number of digits using the truncateDigits parameter of the filter strategy.) ZERO_LEADING Replace the sensitive text by zeroing the first 3 digits."},{"location":"policies/filters/common_filters/zip-codes/#conditions","title":"Conditions","text":"

    Each filter strategy may have one condition. See Conditions for details.

    Conditional Description Operators TOKEN Compares the value of the sensitive text. == , != CONTEXT Compares the filtering context. == , != CONFIDENCE Compares the confidence in the sensitive text against a threshold value. < , <=, > , >=, ==, != POPULATION Compares the population of the zip code against the 2010 census values. < , <=, > , >=, ==, !="},{"location":"policies/filters/common_filters/zip-codes/#example-policy","title":"Example Policy","text":"
    {\n   \"name\": \"zip-code-example\",\n   \"identifiers\": {\n      \"zipCode\": {\n         \"zipCodeFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/filters/custom_filters/dictionary/","title":"Dictionary","text":""},{"location":"policies/filters/custom_filters/dictionary/#filter","title":"Filter","text":"

    This filter identifies custom text based on a given dictionary.

    "},{"location":"policies/filters/custom_filters/dictionary/#required-parameters","title":"Required Parameters","text":"

    At least one of terms or files must be provided.

    Parameter Description Default Value terms A list of terms in the dictionary. None files A list of files containing terms one per line. None"},{"location":"policies/filters/custom_filters/dictionary/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value enabled When set to false, the filter will be disabled and not applied true ignored A list of terms to be ignored by the filter. None fuzzy When set to true, the dictionary will employ fuzzy comparisons. Use the sensitivity parameter to control the level of fuzziness. Setting this value to false will disable fuzziness and provide a higher level of performance. false classification Used to apply an arbitrary label to the identifier, such as \"patient-id\", or \"account-number.\" \"custom-identifier\" sensitivity Controls the \"fuzziness\" of allowed values to account for misspellings and derivations. Valid values are low, medium, and high. Only applies when fuzzy is set to true. medium"},{"location":"policies/filters/custom_filters/dictionary/#filter-strategies","title":"Filter Strategies","text":"

    The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of REDACT is used. When multiple filter strategies are given the filter strategies will be applied in as they are listed. See Filter Strategies for details.

    Strategy Description REDACT Replace the sensitive text with a placeholder. RANDOM_REPLACE Replace the sensitive text with a similar, random value. STATIC_REPLACE Replace the sensitive text with a given value. CRYPTO_REPLACE Replace the sensitive text with its encrypted value. HASH_SHA256_REPLACE Replace the sensitive text with its SHA256 hash value."},{"location":"policies/filters/custom_filters/dictionary/#conditions","title":"Conditions","text":"

    Each filter strategy may have one condition. See Conditions for details.

    Conditional Description Operators TOKEN Compares the value of the sensitive text. == , != CONTEXT Compares the filtering context. == , != CONFIDENCE Compares the confidence in the sensitive text against a threshold value. < , <=, > , >=, ==, !="},{"location":"policies/filters/custom_filters/dictionary/#example-policy","title":"Example Policy","text":"
    {\n   \"name\": \"dictionary-example\",\n   \"identifiers\": {\n      \"dictionaries\": [\n         \"customDictionary\": {\n            \"terms\": [\"john\", \"jane\", \"doe\"],\n            \"files\": \"c:\\temp\\dictionary.txt\",\n            \"fuzzy\": true,\n            \"sensitivity\": \"medium\",\n            \"sectionFilterStrategies\": [\n               {\n                  \"strategy\": \"REDACT\",\n                  \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n               }\n            ]\n         }\n      ]\n   }   \n}\n
    "},{"location":"policies/filters/custom_filters/identifier/","title":"Identifier","text":""},{"location":"policies/filters/custom_filters/identifier/#filter","title":"Filter","text":"

    This filter identifies custom text based on a given regular expression.

    The Identifier filter accepts a list of regular expression-based identifiers. See the policy at the bottom of this page for an example.

    Note that backslashes in the regular expression will need to be escaped for the policy to be valid JSON.

    "},{"location":"policies/filters/custom_filters/identifier/#required-parameters","title":"Required Parameters","text":"

    This filter has no required parameters.

    "},{"location":"policies/filters/custom_filters/identifier/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value enabled When set to false, the filter will be disabled and not applied true ignored A list of terms to be ignored by the filter. None caseSensitive When set to true, the regular expression will be case sensitive. true classification Used to apply an arbitrary label to the identifier, such as \"patient-id\", or \"account-number.\" \"custom-identifier\" pattern A regular expression for the identifier. Note that backslashes will need to be escaped. \\b[A-Z0-9_-]{4,}\\b"},{"location":"policies/filters/custom_filters/identifier/#filter-strategies","title":"Filter Strategies","text":"

    The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of REDACT is used. When multiple filter strategies are given the filter strategies will be applied in as they are listed. See Filter Strategies for details.

    Strategy Description REDACT Replace the sensitive text with a placeholder. RANDOM_REPLACE Replace the sensitive text with a similar, random value. STATIC_REPLACE Replace the sensitive text with a given value. CRYPTO_REPLACE Replace the sensitive text with its encrypted value. HASH_SHA256_REPLACE Replace the sensitive text with its SHA256 hash value. LAST_4 Replace the sensitive text with just the last four characters of the text."},{"location":"policies/filters/custom_filters/identifier/#conditions","title":"Conditions","text":"

    Each filter strategy may have one condition. See Conditions for details.

    Conditional Description Operators TOKEN Compares the value of the sensitive text. == , != CONTEXT Compares the filtering context. == , != CONFIDENCE Compares the confidence in the sensitive text against a threshold value. < , <=, > , >=, ==, != CLASSIFICATION Compares the classification of the sensitive text. == , !="},{"location":"policies/filters/custom_filters/identifier/#example-policy","title":"Example Policy","text":"
    {\n  \"name\": \"default\",\n  \"identifiers\": {\n    \"identifiers\": [\n      {\n        \"pattern\": \"[A-Z]{9}\",\n        \"caseSensitive\": false,\n        \"classification\": \"custom-identifier\",\n        \"enabled\": true,\n        \"identifierFilterStrategies\": [\n          {\n            \"strategy\": \"REDACT\",\n            \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n          }\n        ]        \n      }\n    ]\n  }\n}\n
    "},{"location":"policies/filters/locations/cities/","title":"Cities","text":""},{"location":"policies/filters/locations/cities/#filter","title":"Filter","text":"

    This filter identifies common US cities as determined by the US census in text.

    "},{"location":"policies/filters/locations/cities/#required-parameters","title":"Required Parameters","text":"

    This filter has no required parameters.

    "},{"location":"policies/filters/locations/cities/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value cityFilterStrategies A list of filter strategies. None sensitivity Controls the \"fuzziness\" of allowed values to account for misspellings and derivations. Valid values are low, medium, and high. medium"},{"location":"policies/filters/locations/cities/#filter-strategies","title":"Filter Strategies","text":"

    The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of REDACT is used. When multiple filter strategies are given the filter strategies will be applied in as they are listed. See Filter Strategies for details.

    Strategy Description REDACT Replace the sensitive text with a placeholder. RANDOM_REPLACE Replace the sensitive text with a similar, random value. STATIC_REPLACE Replace the sensitive text with a given value. CRYPTO_REPLACE Replace the sensitive text with its encrypted value. HASH_SHA256_REPLACE Replace the sensitive text with its SHA256 hash value."},{"location":"policies/filters/locations/cities/#conditions","title":"Conditions","text":"

    Each filter strategy may have one condition. See Conditions for details.

    Conditional Description Operators TOKEN Compares the value of the sensitive text. == , != CONTEXT Compares the filtering context. == , != CONFIDENCE Compares the confidence in the sensitive text against a threshold value. < , <=, > , >=, ==, !="},{"location":"policies/filters/locations/cities/#example-policy","title":"Example Policy","text":"
    {\n   \"name\": \"cities-example\",\n   \"identifiers\": {\n      \"city\": {\n         \"sensitivity\": \"medium\",\n         \"cityFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/filters/locations/counties/","title":"Counties","text":""},{"location":"policies/filters/locations/counties/#filter","title":"Filter","text":"

    This filter identifies common US counties as determined by the US census in text.

    "},{"location":"policies/filters/locations/counties/#required-parameters","title":"Required Parameters","text":"

    This filter has no required parameters.

    "},{"location":"policies/filters/locations/counties/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value countyFilterStrategies A list of filter strategies. None sensitivity Controls the \"fuzziness\" of allowed values to account for misspellings and derivations. Valid values are low, medium, and high. medium"},{"location":"policies/filters/locations/counties/#filter-strategies","title":"Filter Strategies","text":"

    The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of REDACT is used. When multiple filter strategies are given the filter strategies will be applied in as they are listed. See Filter Strategies for details.

    Strategy Description REDACT Replace the sensitive text with a placeholder. RANDOM_REPLACE Replace the sensitive text with a similar, random value. STATIC_REPLACE Replace the sensitive text with a given value. CRYPTO_REPLACE Replace the sensitive text with its encrypted value. HASH_SHA256_REPLACE Replace the sensitive text with its SHA256 hash value."},{"location":"policies/filters/locations/counties/#conditions","title":"Conditions","text":"

    Each filter strategy may have one condition. See Conditions for details.

    Conditional Description Operators TOKEN Compares the value of the sensitive text. == , != CONTEXT Compares the filtering context. == , != CONFIDENCE Compares the confidence in the sensitive text against a threshold value. < , <=, > , >=, ==, !="},{"location":"policies/filters/locations/counties/#example-policy","title":"Example Policy","text":"
    {\n   \"name\": \"counties-example\",\n   \"identifiers\": {\n      \"county\": {\n         \"sensitivity\": \"medium\",\n         \"countyFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/filters/locations/hospital-abbreviations/","title":"Hospital Abbreviations","text":""},{"location":"policies/filters/locations/hospital-abbreviations/#filter","title":"Filter","text":"

    This filter identifies US hospital abbreviations in text.

    "},{"location":"policies/filters/locations/hospital-abbreviations/#required-parameters","title":"Required Parameters","text":"

    This filter has no required parameters.

    "},{"location":"policies/filters/locations/hospital-abbreviations/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value hospitalAbbreviationFilterStrategies A list of filter strategies. None sensitivity Controls the \"fuzziness\" of allowed values to account for misspellings and derivations. Valid values are low, medium, and high. medium"},{"location":"policies/filters/locations/hospital-abbreviations/#filter-strategies","title":"Filter Strategies","text":"

    The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of REDACT is used. When multiple filter strategies are given the filter strategies will be applied in as they are listed. See Filter Strategies for details.

    Strategy Description REDACT Replace the sensitive text with a placeholder. RANDOM_REPLACE Replace the sensitive text with a similar, random value. STATIC_REPLACE Replace the sensitive text with a given value. CRYPTO_REPLACE Replace the sensitive text with its encrypted value. HASH_SHA256_REPLACE Replace the sensitive text with its SHA256 hash value."},{"location":"policies/filters/locations/hospital-abbreviations/#conditions","title":"Conditions","text":"

    Each filter strategy may have one condition. See Conditions for details.

    Conditional Description Operators TOKEN Compares the value of the sensitive text. == , != CONTEXT Compares the filtering context. == , != CONFIDENCE Compares the confidence in the sensitive text against a threshold value. < , <=, > , >=, ==, !="},{"location":"policies/filters/locations/hospital-abbreviations/#example-policy","title":"Example Policy","text":"
    {\n   \"name\": \"hospital-abbreviations-example\",\n   \"identifiers\": {\n      \"hospitalAbbreviation\": {\n         \"sensitivity\": \"medium\",\n         \"hospitalAbbreviationFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/filters/locations/hospitals/","title":"Hospitals","text":""},{"location":"policies/filters/locations/hospitals/#filter","title":"Filter","text":"

    This filter identifies US hospitals in text.

    "},{"location":"policies/filters/locations/hospitals/#required-parameters","title":"Required Parameters","text":"

    This filter has no required parameters.

    "},{"location":"policies/filters/locations/hospitals/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value hospitalFilterStrategies A list of filter strategies. None sensitivity Controls the \"fuzziness\" of allowed values to account for misspellings and derivations. Valid values are low, medium, and high. medium"},{"location":"policies/filters/locations/hospitals/#filter-strategies","title":"Filter Strategies","text":"

    The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of REDACT is used. When multiple filter strategies are given the filter strategies will be applied in as they are listed. See Filter Strategies for details.

    Strategy Description REDACT Replace the sensitive text with a placeholder. RANDOM_REPLACE Replace the sensitive text with a similar, random value. STATIC_REPLACE Replace the sensitive text with a given value. CRYPTO_REPLACE Replace the sensitive text with its encrypted value. HASH_SHA256_REPLACE Replace the sensitive text with its SHA256 hash value."},{"location":"policies/filters/locations/hospitals/#conditions","title":"Conditions","text":"

    Each filter strategy may have one condition. See Conditions for details.

    Conditional Description Operators TOKEN Compares the value of the sensitive text. == , != CONTEXT Compares the filtering context. == , != CONFIDENCE Compares the confidence in the sensitive text against a threshold value. < , <=, > , >=, ==, !="},{"location":"policies/filters/locations/hospitals/#example-policy","title":"Example Policy","text":"
    {\n   \"name\": \"hospitals-example\",\n   \"identifiers\": {\n      \"hospital\": {\n         \"sensitivity\": \"medium\",\n         \"hospitalFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/filters/locations/state-abbreviations/","title":"State Abbreviations","text":""},{"location":"policies/filters/locations/state-abbreviations/#filter","title":"Filter","text":"

    This filter identifies US state abbreviations in text.

    "},{"location":"policies/filters/locations/state-abbreviations/#required-parameters","title":"Required Parameters","text":"

    This filter has no required parameters.

    "},{"location":"policies/filters/locations/state-abbreviations/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value stateAbbreviationsFilterStrategies A list of filter strategies. None enabled When set to false, the filter will be disabled and not applied true ignored A list of terms to be ignored by the filter. None"},{"location":"policies/filters/locations/state-abbreviations/#filter-strategies","title":"Filter Strategies","text":"

    The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of REDACT is used. When multiple filter strategies are given the filter strategies will be applied in as they are listed. See Filter Strategies for details.

    Strategy Description REDACT Replace the sensitive text with a placeholder. RANDOM_REPLACE Replace the sensitive text with a similar, random value. STATIC_REPLACE Replace the sensitive text with a given value. CRYPTO_REPLACE Replace the sensitive text with its encrypted value. HASH_SHA256_REPLACE Replace the sensitive text with its SHA256 hash value."},{"location":"policies/filters/locations/state-abbreviations/#conditions","title":"Conditions","text":"

    Each filter strategy may have one condition. See Conditions for details.

    Conditional Description Operators TOKEN Compares the value of the sensitive text. == , != CONTEXT Compares the filtering context. == , != CONFIDENCE Compares the confidence in the sensitive text against a threshold value. < , <=, > , >=, ==, !="},{"location":"policies/filters/locations/state-abbreviations/#example-policy","title":"Example Policy","text":"
    {\n   \"name\": \"states-abbreviations-example\",\n   \"identifiers\": {\n      \"stateAbbreviation\": {\n         \"stateAbbreviationFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/filters/locations/states/","title":"States","text":""},{"location":"policies/filters/locations/states/#filter","title":"Filter","text":"

    This filter identifies US states in text.

    "},{"location":"policies/filters/locations/states/#required-parameters","title":"Required Parameters","text":"

    This filter has no required parameters.

    "},{"location":"policies/filters/locations/states/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value stateFilterStrategies A list of filter strategies. None enabled When set to false, the filter will be disabled and not applied true ignored A list of terms to be ignored by the filter. None"},{"location":"policies/filters/locations/states/#filter-strategies","title":"Filter Strategies","text":"

    The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of REDACT is used. When multiple filter strategies are given the filter strategies will be applied in as they are listed. See Filter Strategies for details.

    Strategy Description REDACT Replace the sensitive text with a placeholder. RANDOM_REPLACE Replace the sensitive text with a similar, random value. STATIC_REPLACE Replace the sensitive text with a given value. CRYPTO_REPLACE Replace the sensitive text with its encrypted value. HASH_SHA256_REPLACE Replace the sensitive text with its SHA256 hash value."},{"location":"policies/filters/locations/states/#conditions","title":"Conditions","text":"

    Each filter strategy may have one condition. See Conditions for details.

    Conditional Description Operators TOKEN Compares the value of the sensitive text. == , != CONTEXT Compares the filtering context. == , != CONFIDENCE Compares the confidence in the sensitive text against a threshold value. < , <=, > , >=, ==, !="},{"location":"policies/filters/locations/states/#example-policy","title":"Example Policy","text":"
    {\n   \"name\": \"states-example\",\n   \"identifiers\": {\n      \"state\": {\n         \"stateFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/filters/persons_names/first-names/","title":"First Names","text":""},{"location":"policies/filters/persons_names/first-names/#filter","title":"Filter","text":"

    This filter identifies common first names as identified by the US census in text.

    "},{"location":"policies/filters/persons_names/first-names/#required-parameters","title":"Required Parameters","text":"

    This filter has no required parameters.

    "},{"location":"policies/filters/persons_names/first-names/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value sensitivity Controls the \"fuzziness\" of allowed values to account for misspellings and derivations. Valid values are low, medium, and high. medium firstNameFilterStrategies A list of filter strategies. None enabled When set to false, the filter will be disabled and not applied true ignored A list of terms to be ignored by the filter. None"},{"location":"policies/filters/persons_names/first-names/#filter-strategies","title":"Filter Strategies","text":"

    The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of REDACT is used. When multiple filter strategies are given the filter strategies will be applied in as they are listed. See Filter Strategies for details.

    Strategy Description REDACT Replace the sensitive text with a placeholder. RANDOM_REPLACE Replace the sensitive text with a similar, random value. STATIC_REPLACE Replace the sensitive text with a given value. CRYPTO_REPLACE Replace the sensitive text with its encrypted value. HASH_SHA256_REPLACE Replace the sensitive text with its SHA256 hash value."},{"location":"policies/filters/persons_names/first-names/#conditions","title":"Conditions","text":"

    Each filter strategy may have one condition. See Conditions for details.

    Conditional Description Operators TOKEN Compares the value of the sensitive text. == , != CONTEXT Compares the filtering context. == , != CONFIDENCE Compares the confidence in the sensitive text against a threshold value. < , <=, > , >=, ==, !="},{"location":"policies/filters/persons_names/first-names/#example-policy","title":"Example Policy","text":"
    {\n   \"name\": \"first-names-example\",\n   \"identifiers\": {\n      \"firstName\": {\n         \"firstNameFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/filters/persons_names/persons-names-ner/","title":"Person's Names (NER)","text":""},{"location":"policies/filters/persons_names/persons-names-ner/#filter","title":"Filter","text":"

    This filter identifies person's names based on natural language processing (NLP) and named-entity recognition (NER) in text.

    "},{"location":"policies/filters/persons_names/persons-names-ner/#required-parameters","title":"Required Parameters","text":"

    This filter has no required parameters.

    "},{"location":"policies/filters/persons_names/persons-names-ner/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value removePunctuation When set to true, punctuation will be removed prior to analysis. false firstNameFilterStrategies A list of filter strategies. None enabled When set to false, the filter will be disabled and not applied true ignored A list of terms to be ignored by the filter. None"},{"location":"policies/filters/persons_names/persons-names-ner/#filter-strategies","title":"Filter Strategies","text":"

    The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of REDACT is used. When multiple filter strategies are given the filter strategies will be applied in as they are listed. See Filter Strategies for details.

    Strategy Description REDACT Replace the sensitive text with a placeholder. RANDOM_REPLACE Replace the sensitive text with a similar, random value. STATIC_REPLACE Replace the sensitive text with a given value. CRYPTO_REPLACE Replace the sensitive text with its encrypted value. HASH_SHA256_REPLACE Replace the sensitive text with its SHA256 hash value. ABBREVIATE Replace the sensitive text with the initials of the text."},{"location":"policies/filters/persons_names/persons-names-ner/#conditions","title":"Conditions","text":"

    Each filter strategy may have one condition. See Conditions for details.

    Conditional Description Operators TOKEN Compares the value of the sensitive text. == , != CONTEXT Compares the filtering context. == , != CONFIDENCE Compares the confidence in the sensitive text against a threshold value. < , <=, > , >=, ==, !="},{"location":"policies/filters/persons_names/persons-names-ner/#example-policy","title":"Example Policy","text":"
    {\n   \"name\": \"ner-example\",\n   \"identifiers\": {\n      \"ner\": {\n         \"nerFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/filters/persons_names/physician-names-ner/","title":"Physician Names","text":""},{"location":"policies/filters/persons_names/physician-names-ner/#filter","title":"Filter","text":"

    This filter identifies physician names (e.g. Dr. John Smith) in text.

    "},{"location":"policies/filters/persons_names/physician-names-ner/#required-parameters","title":"Required Parameters","text":"

    This filter has no required parameters.

    "},{"location":"policies/filters/persons_names/physician-names-ner/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value physicianNameFilterStrategies A list of filter strategies. None enabled When set to false, the filter will be disabled and not applied true ignored A list of terms to be ignored by the filter. None"},{"location":"policies/filters/persons_names/physician-names-ner/#filter-strategies","title":"Filter Strategies","text":"

    The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of REDACT is used. When multiple filter strategies are given the filter strategies will be applied in as they are listed. See Filter Strategies for details.

    Strategy Description REDACT Replace the sensitive text with a placeholder. RANDOM_REPLACE Replace the sensitive text with a similar, random value. STATIC_REPLACE Replace the sensitive text with a given value. CRYPTO_REPLACE Replace the sensitive text with its encrypted value. HASH_SHA256_REPLACE Replace the sensitive text with its SHA256 hash value."},{"location":"policies/filters/persons_names/physician-names-ner/#conditions","title":"Conditions","text":"

    Each filter strategy may have one condition. See Conditions for details.

    Conditional Description Operators TOKEN Compares the value of the sensitive text. == , != CONTEXT Compares the filtering context. == , != CONFIDENCE Compares the confidence in the sensitive text against a threshold value. < , <=, > , >=, ==, !="},{"location":"policies/filters/persons_names/physician-names-ner/#example-policy","title":"Example Policy","text":"
    {\n   \"name\": \"physician-names-example\",\n   \"identifiers\": {\n      \"physicianName\": {\n         \"physicianNameFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"policies/filters/persons_names/surnames/","title":"Surnames","text":""},{"location":"policies/filters/persons_names/surnames/#filter","title":"Filter","text":"

    This filter identifies common surnames as identified by the US census in text.

    "},{"location":"policies/filters/persons_names/surnames/#required-parameters","title":"Required Parameters","text":"

    This filter has no required parameters.

    "},{"location":"policies/filters/persons_names/surnames/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value sensitivity Controls the \"fuzziness\" of allowed values to account for misspellings and derivations. Valid values are low, medium, and high. medium surnameFilterStrategies A list of filter strategies. None enabled When set to false, the filter will be disabled and not applied true ignored A list of terms to be ignored by the filter. None"},{"location":"policies/filters/persons_names/surnames/#filter-strategies","title":"Filter Strategies","text":"

    The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of REDACT is used. When multiple filter strategies are given the filter strategies will be applied in as they are listed. See Filter Strategies for details.

    Strategy Description REDACT Replace the sensitive text with a placeholder. RANDOM_REPLACE Replace the sensitive text with a similar, random value. STATIC_REPLACE Replace the sensitive text with a given value. CRYPTO_REPLACE Replace the sensitive text with its encrypted value. HASH_SHA256_REPLACE Replace the sensitive text with its SHA256 hash value."},{"location":"policies/filters/persons_names/surnames/#conditions","title":"Conditions","text":"

    Each filter strategy may have one condition. See Conditions for details.

    Conditional Description Operators TOKEN Compares the value of the sensitive text. == , != CONTEXT Compares the filtering context. == , != CONFIDENCE Compares the confidence in the sensitive text against a threshold value. < , <=, > , >=, ==, !="},{"location":"policies/filters/persons_names/surnames/#example-policy","title":"Example Policy","text":"
    {\n   \"name\": \"surnames-example\",\n   \"identifiers\": {\n      \"surname\": {\n         \"surnameFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n
    "},{"location":"quick_starts/quick_start_aws/","title":"Philter Quick Start on AWS","text":"

    Philter on AWS is a virtual machine-based product. It runs in EC2 on its own EC2 instance. A free trial period is available during which there is no charge for the Philter software but there may be charges for the underlying AWS infrastructure.

    Cloud virtual machines launched from a cloud marketplace may not be immediately suitable for a HIPAA environment. Refer to your compliance officer for your organization's requirements to ensure compliance with all relevant regulations.

    Here\u2019s a brief screen cast showing how to launch Philter in AWS.

    "},{"location":"quick_starts/quick_start_aws/#launch-philter-in-aws","title":"Launch Philter in AWS","text":"
    1. Go to Philter in the AWS Marketplace. On this page you can see the Philter overview, the pricing, and the supported EC2 instance types.
    2. Select an instance type. We recommend m5.large. The smaller instance types are intended only for testing and are not well-suited for production usage.
    3. Click the Continue to Subscribe button.
    4. View and accept Philter\u2019s license agreement. Then click Accept Terms.
    5. The subscription will now be created and you will be notified when it is ready! This usually only takes less than a minute.
    6. Click the Continue to Configuration button to select the AMI, the version, and the region. We recommend using the newest version if multiple are available.
    7. Click the Continue to Launch button to launch Philter in your AWS account!

    AWS will automatically open ports 22 (SSH) and 8080 (Philter API) for the Philter instance's security group. These ports are required to be open but you may want to modify the security groups to limit their scope of availability by restricting access to specific CIDR ranges.

    Congratulations! You have deployed Philter in AWS. You are now ready to filter text!

    "},{"location":"quick_starts/quick_start_aws/#try-it-out","title":"Try it out!","text":"

    With Philter now running we can take it for a spin. We will send some text to Philter and inspect at the response we get back. The Philter virtual machine running in your cloud account should have a public IP address (unless you customized the deployment). We will use that public IP address to interact with Philter.

    Philter, by default, will be configured with an HTTPS listener on port 8080 using a self-signed certificate. It is recommended that prior to use in a production environment the self-signed certificate is replaced by a valid certificate owned by your organization.

    In the command below, replace <PUBLIC_IP> with the virtual machine\u2019s public IP address or public host name.

    curl -k -X POST https://<PUBLIC_IP>:8080/api/filter --data \"George Washington was a patient and his SSN is 123-45-6789.\" -H \"Content-type: text/plain\"\n

    With this command we are sending the text in the command to Philter for filtering. Philter will identify the patient name (George Washington) and the SSN (123-45-6789) and redact those values in the response. You can always use curl to send text to Philter as in these examples but there are also SDKs you can use, too, to integrate Philter with your applications.

    "},{"location":"quick_starts/quick_start_aws/#redacting-sensitive-information-from-text","title":"Redacting Sensitive Information from Text","text":"

    The types of sensitive information that Philter identifies and removes is controlled by policies. By default, Philter includes a filter profile that includes many of the types of sensitive information, such as names and social security numbers. We can send text to filter to Philter for filtering using this default filter profile with the following command:

    curl -k -X POST https://localhost:8080/api/filter -d @file.txt -H \"Content-Type: text/plain\"\n

    This command sends the contents of the file file.txt to Philter. Philter will apply the enabled filters and return a plain-text response consisting of the filtered text. (Replace localhost with the IP address or host name of Philter if you are not running the command where Philter is running.) You can also send text directly in the request instead of sending it as a file:

    curl -k -X POST https://localhost:8080/api/filter --data \"Your text goes here...\" -H \"Content-type: text/plain\"\n
    "},{"location":"quick_starts/quick_start_aws/#next-steps","title":"Next Steps","text":"

    Now that you have Philter running and know how to send text to it you are ready to integrate Philter into your existing workflow and systems. Philter\u2019s API details how to send files to Philter. Clients for some languages for Philter\u2019s API are available on GitHub.

    Be sure to check out Policies to see how you can customize the types of sensitive information Philter redacts!

    "},{"location":"quick_starts/quick_start_aws/#example-uses","title":"Example Uses","text":"

    Here's a few examples showing how to use Philter with some common big-data and streaming applications.

    Description Technologies Remove sensitive information from text in an Apache NiFi dataflow Apache NiFi Remove sensitive information from text using AWS Lambda in an Amazon Kinesis Firehose pipeline Amazon Kinesis, AWS Lambda Removing PII/PHI from OpenAI ChatGPT API Requests ChatGPT Redacting PHI and PII in Apache Kafka Data streams Apache Kafka"},{"location":"quick_starts/quick_start_azure/","title":"Philter Quick Start on Microsoft Azure","text":"

    Philter on Microsoft Azure is a virtual machine-based product. A free trial period is available during which there is no charge for the Philter software but there may be charges for the underlying Azure infrastructure.

    Cloud virtual machines launched from a cloud marketplace may not be immediately suitable for a HIPAA environment. Refer to your compliance officer for your organization's requirements to ensure compliance with all relevant regulations.

    "},{"location":"quick_starts/quick_start_azure/#launch-philter-on-microsoft-azure","title":"Launch Philter on Microsoft Azure","text":"
    1. Go to Philter in the Azure Marketplace.
    2. Click the Get It Now button.
    3. Review the information that is shown on the popup and click Continue when ready.
    4. You will now be asked to log in to your Microsoft Azure account if you were not already logged in.
    5. Click the Create button to begin making a Philter virtual machine.
    6. Enter the required details of the virtual machine and click the Review + create button.
    7. Review the virtual machine details and click Create when ready!

    Your Philter virtual machine will now be launching.

    Microsoft Azure will automatically open ports 22 (SSH) and 8080 (Philter API). These ports are required to be open but you may want to modify the security groups to limit their scope of availability by restricting access to specific CIDR ranges.

    Congratulations! You have deployed Philter in Azure. You are now ready to filter text!

    "},{"location":"quick_starts/quick_start_azure/#try-it-out","title":"Try it out!","text":"

    With Philter now running we can take it for a spin. We will send some text to Philter and inspect at the response we get back. The Philter virtual machine running in your cloud account should have a public IP address (unless you customized the deployment). We will use that public IP address to interact with Philter.

    Philter, by default, will be configured with an HTTPS listener on port 8080 using a self-signed certificate. It is recommended that prior to use in a production environment the self-signed certificate is replaced by a valid certificate owned by your organization.

    In the command below, replace <PUBLIC_IP> with the virtual machine\u2019s public IP address or public host name.

    curl -k -X POST https://<PUBLIC_IP>:8080/api/filter --data \"George Washington was a patient and his SSN is 123-45-6789.\" -H \"Content-type: text/plain\"\n

    With this command we are sending the text in the command to Philter for filtering. Philter will identify the patient name (George Washington) and the SSN (123-45-6789) and redact those values in the response. You can always use curl to send text to Philter as in these examples but there are also SDKs you can use, too, to integrate Philter with your applications.

    "},{"location":"quick_starts/quick_start_azure/#redacting-sensitive-information-from-text","title":"Redacting Sensitive Information from Text","text":"

    The types of sensitive information that Philter identifies and removes is controlled by policies. By default, Philter includes a filter profile that includes many of the types of sensitive information, such as names and social security numbers. We can send text to filter to Philter for filtering using this default filter profile with the following command:

    curl -k -X POST https://localhost:8080/api/filter -d @file.txt -H \"Content-Type: text/plain\"\n

    This command sends the contents of the file file.txt to Philter. Philter will apply the enabled filters and return a plain-text response consisting of the filtered text. (Replace localhost with the IP address or host name of Philter if you are not running the command where Philter is running.) You can also send text directly in the request instead of sending it as a file:

    curl -k -X POST https://localhost:8080/api/filter --data \"Your text goes here...\" -H \"Content-type: text/plain\"\n
    "},{"location":"quick_starts/quick_start_azure/#next-steps","title":"Next Steps","text":"

    Now that you have Philter running and know how to send text to it you are ready to integrate Philter into your existing workflow and systems. Philter\u2019s API details how to send files to Philter. Clients for some languages for Philter\u2019s API are available on GitHub.

    Be sure to check out Policies to see how you can customize the types of sensitive information Philter redacts!

    "},{"location":"quick_starts/quick_start_azure/#example-uses","title":"Example Uses","text":"

    Here's a few examples showing how to use Philter with some common big-data and streaming applications.

    Description Technologies Remove sensitive information from text in an Apache NiFi dataflow Apache NiFi Remove sensitive information from text using AWS Lambda in an Amazon Kinesis Firehose pipeline Amazon Kinesis, AWS Lambda Removing PII/PHI from OpenAI ChatGPT API Requests ChatGPT Redacting PHI and PII in Apache Kafka Data streams Apache Kafka

    "},{"location":"quick_starts/quick_start_gcp/","title":"Philter Quick Start on Google Cloud","text":"

    Philter on Google Cloud is a virtual machine-based product. A free trial period is available during which there is no charge for the Philter software but there may be charges for the underlying Google Cloud infrastructure.

    Cloud virtual machines launched from a cloud marketplace may not be immediately suitable for a HIPAA environment. Refer to your compliance officer for your organization's requirements to ensure compliance with all relevant regulations.

    "},{"location":"quick_starts/quick_start_gcp/#launch-philter-in-google-cloud","title":"Launch Philter in Google Cloud","text":"
    1. Go to Philter in the Google Cloud Marketplace.
    2. Click the Launch on Compute Engine button.

    Virtual Machine Recommendations

    The general purpose machine type is n2-standard-2 and this machine type should be adequate for most use-cases. We recommend 8 vCPUs and 8-16 GB of RAM for a production deployment.

    Google Cloud will automatically open ports 22 (SSH) and 8080 (Philter API). These ports are required to be open but you may want to modify the security groups to limit their scope of availability by restricting access to specific CIDR ranges.

    Congratulations! You have deployed Philter in Google Cloud. You are now ready to filter text!

    "},{"location":"quick_starts/quick_start_gcp/#try-it-out","title":"Try it out!","text":"

    With Philter now running we can take it for a spin. We will send some text to Philter and inspect at the response we get back. The Philter virtual machine running in your cloud account should have a public IP address (unless you customized the deployment). We will use that public IP address to interact with Philter.

    Philter, by default, will be configured with an HTTPS listener on port 8080 using a self-signed certificate. It is recommended that prior to use in a production environment the self-signed certificate is replaced by a valid certificate owned by your organization.

    In the command below, replace <PUBLIC_IP> with the virtual machine\u2019s public IP address or public host name.

    curl -k -X POST https://<PUBLIC_IP>:8080/api/filter --data \"George Washington was a patient and his SSN is 123-45-6789.\" -H \"Content-type: text/plain\"\n

    With this command we are sending the text in the command to Philter for filtering. Philter will identify the patient name (George Washington) and the SSN (123-45-6789) and redact those values in the response. You can always use curl to send text to Philter as in these examples but there are also SDKs you can use, too, to integrate Philter with your applications.

    "},{"location":"quick_starts/quick_start_gcp/#redacting-sensitive-information-from-text","title":"Redacting Sensitive Information from Text","text":"

    The types of sensitive information that Philter identifies and removes is controlled by policies. By default, Philter includes a filter profile that includes many of the types of sensitive information, such as names and social security numbers. We can send text to filter to Philter for filtering using this default filter profile with the following command:

    curl -k -X POST https://localhost:8080/api/filter -d @file.txt -H \"Content-Type: text/plain\"\n

    This command sends the contents of the file file.txt to Philter. Philter will apply the enabled filters and return a plain-text response consisting of the filtered text. (Replace localhost with the IP address or host name of Philter if you are not running the command where Philter is running.) You can also send text directly in the request instead of sending it as a file:

    curl -k -X POST https://localhost:8080/api/filter --data \"Your text goes here...\" -H \"Content-type: text/plain\"\n
    "},{"location":"quick_starts/quick_start_gcp/#next-steps","title":"Next Steps","text":"

    Now that you have Philter running and know how to send text to it you are ready to integrate Philter into your existing workflow and systems. Philter\u2019s API details how to send files to Philter. Clients for some languages for Philter\u2019s API are available on GitHub.

    Be sure to check out Policies to see how you can customize the types of sensitive information Philter redacts!

    "},{"location":"quick_starts/quick_start_gcp/#example-uses","title":"Example Uses","text":"

    Here's a few examples showing how to use Philter with some common big-data and streaming applications.

    Description Technologies Remove sensitive information from text in an Apache NiFi dataflow Apache NiFi Remove sensitive information from text using AWS Lambda in an Amazon Kinesis Firehose pipeline Amazon Kinesis, AWS Lambda Removing PII/PHI from OpenAI ChatGPT API Requests ChatGPT Redacting PHI and PII in Apache Kafka Data streams Apache Kafka

    "}]} \ No newline at end of file diff --git a/settings/index.html b/settings/index.html index a7b6630..cfaa491 100644 --- a/settings/index.html +++ b/settings/index.html @@ -679,6 +679,112 @@ + + + + +
  • + + + + + + + + + + +
  • + + + + + + + + + + +
  • + + + + + Splitting Input Text + + + + +
  • + + + + + + + + + +
  • diff --git a/sitemap.xml b/sitemap.xml index c9aa42c..0c9b70c 100644 --- a/sitemap.xml +++ b/sitemap.xml @@ -60,6 +60,14 @@ https://philterd.github.io/philter/other_features/span_disambiguation/ 2024-10-23 + + https://philterd.github.io/philter/policies/document_analysis/ + 2024-10-23 + + + https://philterd.github.io/philter/policies/excluding_by_document_type/ + 2024-10-23 + https://philterd.github.io/philter/policies/filter_policies/ 2024-10-23 @@ -80,6 +88,10 @@ https://philterd.github.io/philter/policies/sample_policies/ 2024-10-23 + + https://philterd.github.io/philter/policies/splitting_input_text/ + 2024-10-23 + https://philterd.github.io/philter/policies/filters/common_filters/ages/ 2024-10-23 diff --git a/sitemap.xml.gz b/sitemap.xml.gz index 5433a1beeda7af50fbcaa7469fe158fdae9ed0b5..c31547df749c4877a729ed75a4d56e08d2f86b49 100644 GIT binary patch delta 715 zcmV;+0yO=r1=a9iIJZ7Qdu9+?OFI@nVZn~_^j+jQ z%{bFbFRTtSAOxRbf6?yg#ZIdeL_+T#*Y}(4dIiq-9^LS`{{8D`^KJdKzuV!~<=Rc&?8_*`!-Ns1 z1hL-?n8w^~&|jO{U0XVSd{0xdzTZD=AHFvC+ved5cX?~OOw_*5^q+IWNl#fPUVtV* zJosp}Gf)MEm%MOLoylk7T~raCGe)rBQXP&QhNz1?R++sA?D?M8yx43jt*iMd-CDrAE8HVLN^lBT^BO9|W=cH$lh%bf>Rj+2Y=^dksAWX@im@6poF59lU@c#=Glkv#$ zO5d~ZA)(7@IIv<+9ebI4C4~@57O@;nG~A1g9*CAjuo@@fl8CI?jq$xyydZr2tTw_4 zgC+_-CV$bY5=QT?%G|JbE&+{q@>yjP5-dr(P&jh594BxrA(N1E40Dto=#=AkkzgmO z+xp7RM3Se=l)U{L*x;hGI?f{TuBUI6zQBCK24s$nfN&; xZK5$L$9ntxoY5Q=>p}S<^P!^jSsKu5jvxzZFya4!mLGz&{{tNDIf_Lh0046xWAp$3 delta 672 zcmV;R0$=^s1*-*+7k`+&a@#NrfcHFw&$}!q?a=mYXUh|`vyLKR31{&q0BOg0`a*J? z=A7%2MbID-6h(d_0TAHk@x>2|BM@T@+vWXgy<9-B(P8Me%Rj%Lsvpb8-QA|pz>pWx zIPiAa=bV1k_33n4F=p^4@hVbRv+YIT`mM5FuYN9fcMG}34u3AC>Dmm@?#5nNx=B3_ z8OW_V%zbHB7_U|JrXCx9{LXOp(d{1A58tc%b@lK~n!MFr2CCmj`r?>yG($02CZI|{ z`$*md3$viel#d)d&){d4Rcr{J3lew;c^;0EIx?d^&N8|Kbn@%CM=LrJL){j8fUE|` znG6;8xL1U9TYsT~O1v7KlOCRg*vQK!q=Y(%2%NEFsWQP;;>QnaREi)C#!gOX7ND4W zpk@yy7Xq%$05p>~yd?=SoISqIXJ|J-oJ?pOvhgi;Wj?GF2ec?a`$=xzWeqT#N!)g) z1ZQRl>_Q~*Gz~+Lz!8toV3@+L`ISf1I1QjmdAPRG`+pdkt7tl$- zyQ=*U6tj&e+n#gc!dU5aTG^ik)R5E3H%6jL$q2^`w~|)kVnDSnO5Tiva!o|B?8-)` z6)!2DKX0sZ!ep3Jq^#OhQXj+Bm@9b6WB3q*epa1?3{&o>6pjN=S*gSlIteABw+HQk z&UlKe5`XNdby;6!Gtsou70%KB3v6(*Swm-)c-KRmnGW(l;mLRA;^VC3nOfHL163?R zBUc|eVKx$$wk^RCjiQa>Go?B(&f^)q>baW`l~5(Q2~!h{pK6IE44ujf@!q5hU=_4@ zx*N#;%<^86Hr1G1XufrRE@%&p*ui+P|3PTXNf{f^Ya!5u)Y + + + + + + + + + + +
  • + + + + + + + + + + +
  • + + + + + Splitting Input Text + + + + +
  • + + + + + + + + + +
  • diff --git a/upgrading/index.html b/upgrading/index.html index 80ccb00..6b7076d 100644 --- a/upgrading/index.html +++ b/upgrading/index.html @@ -739,6 +739,112 @@ + + + + +
  • + + + + + + + + + + +
  • + + + + + + + + + + +
  • + + + + + Splitting Input Text + + + + +
  • + + + + + + + + + +