From 2b92ec801c30798d5e16f6448c9c31fca4b518ed Mon Sep 17 00:00:00 2001
From: jzonthemtn <jeff.zemerick@philterd.ai>
Date: Wed, 23 Oct 2024 16:24:32 -0400
Subject: [PATCH] Deployed 201ce14 with MkDocs version: 1.6.1

---
 404.html                                      |  106 ++
 deidentification/bucketing/index.html         |  106 ++
 deidentification/date-shifting/index.html     |  106 ++
 deidentification/deidentification/index.html  |  106 ++
 deidentification/encryption/index.html        |  106 ++
 deidentification/pii_phi_nppi/index.html      |  106 ++
 .../redaction-and-masking/index.html          |  106 ++
 deidentification/replacement/index.html       |  106 ++
 evaluating-performance/index.html             |  106 ++
 index.html                                    |  106 ++
 other_features/alerts/index.html              |  106 ++
 other_features/anonymization/index.html       |  106 ++
 other_features/span_disambiguation/index.html |  106 ++
 policies/document_analysis/index.html         | 1071 +++++++++++++++
 .../excluding_by_document_type/index.html     | 1172 ++++++++++++++++
 policies/filter_policies/index.html           |  106 ++
 policies/filter_strategies/index.html         |  108 +-
 .../filters/common_filters/ages/index.html    |  106 ++
 .../bank-routing-numbers/index.html           |  106 ++
 .../bitcoin-addresses/index.html              |  106 ++
 .../common_filters/creditcards/index.html     |  106 ++
 .../filters/common_filters/dates/index.html   |  106 ++
 .../drivers-license-numbers/index.html        |  106 ++
 .../common_filters/email-addresses/index.html |  106 ++
 .../common_filters/iban-codes/index.html      |  106 ++
 .../common_filters/ip-addresses/index.html    |  106 ++
 .../common_filters/mac-addresses/index.html   |  106 ++
 .../passport-numbers/index.html               |  106 ++
 .../phone-number-extensions/index.html        |  106 ++
 .../common_filters/phone-numbers/index.html   |  106 ++
 .../common_filters/sections/index.html        |  106 ++
 .../common_filters/ssns-and-tins/index.html   |  106 ++
 .../tracking-numbers/index.html               |  106 ++
 .../filters/common_filters/urls/index.html    |  106 ++
 .../filters/common_filters/vins/index.html    |  106 ++
 .../common_filters/zip-codes/index.html       |  106 ++
 .../custom_filters/dictionary/index.html      |  106 ++
 .../custom_filters/identifier/index.html      |  106 ++
 policies/filters/index.html                   |  106 ++
 policies/filters/locations/cities/index.html  |  106 ++
 .../filters/locations/counties/index.html     |  106 ++
 .../hospital-abbreviations/index.html         |  106 ++
 .../filters/locations/hospitals/index.html    |  106 ++
 .../locations/state-abbreviations/index.html  |  106 ++
 policies/filters/locations/states/index.html  |  106 ++
 .../persons_names/first-names/index.html      |  106 ++
 .../persons-names-ner/index.html              |  106 ++
 .../physician-names-ner/index.html            |  106 ++
 .../filters/persons_names/surnames/index.html |  106 ++
 .../ignoring_sensitive_information/index.html |  108 +-
 policies/sample_policies/index.html           |  106 ++
 policies/splitting_input_text/index.html      | 1178 +++++++++++++++++
 quick_starts/quick_start_aws/index.html       |  106 ++
 quick_starts/quick_start_azure/index.html     |  106 ++
 quick_starts/quick_start_gcp/index.html       |  106 ++
 search/search_index.json                      |    2 +-
 settings/index.html                           |  106 ++
 sitemap.xml                                   |   12 +
 sitemap.xml.gz                                |  Bin 683 -> 726 bytes
 system_requirements/index.html                |  106 ++
 upgrading/index.html                          |  106 ++
 61 files changed, 9266 insertions(+), 3 deletions(-)
 create mode 100644 policies/document_analysis/index.html
 create mode 100644 policies/excluding_by_document_type/index.html
 create mode 100644 policies/splitting_input_text/index.html

diff --git a/404.html b/404.html
index b225dcf..fbebde2 100644
--- a/404.html
+++ b/404.html
@@ -670,6 +670,112 @@
   
   
   
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6_5" >
+        
+          
+          <label class="md-nav__link" for="__nav_6_5" id="__nav_6_5_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="2" aria-labelledby="__nav_6_5_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_6_5">
+            <span class="md-nav__icon md-icon"></span>
+            Document Analysis
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="/philter/policies/document_analysis/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="/philter/policies/excluding_by_document_type/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Excluding by DocumentType
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="/philter/policies/splitting_input_text/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Splitting Input Text
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
     <li class="md-nav__item">
       <a href="/philter/policies/ignoring_sensitive_information/" class="md-nav__link">
         
diff --git a/deidentification/bucketing/index.html b/deidentification/bucketing/index.html
index c0f26cf..6118275 100644
--- a/deidentification/bucketing/index.html
+++ b/deidentification/bucketing/index.html
@@ -695,6 +695,112 @@
   
   
   
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6_5" >
+        
+          
+          <label class="md-nav__link" for="__nav_6_5" id="__nav_6_5_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="2" aria-labelledby="__nav_6_5_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_6_5">
+            <span class="md-nav__icon md-icon"></span>
+            Document Analysis
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../policies/document_analysis/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../policies/excluding_by_document_type/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Excluding by DocumentType
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../policies/splitting_input_text/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Splitting Input Text
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../policies/ignoring_sensitive_information/" class="md-nav__link">
         
diff --git a/deidentification/date-shifting/index.html b/deidentification/date-shifting/index.html
index a06f5cb..d58adf7 100644
--- a/deidentification/date-shifting/index.html
+++ b/deidentification/date-shifting/index.html
@@ -695,6 +695,112 @@
   
   
   
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6_5" >
+        
+          
+          <label class="md-nav__link" for="__nav_6_5" id="__nav_6_5_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="2" aria-labelledby="__nav_6_5_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_6_5">
+            <span class="md-nav__icon md-icon"></span>
+            Document Analysis
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../policies/document_analysis/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../policies/excluding_by_document_type/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Excluding by DocumentType
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../policies/splitting_input_text/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Splitting Input Text
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../policies/ignoring_sensitive_information/" class="md-nav__link">
         
diff --git a/deidentification/deidentification/index.html b/deidentification/deidentification/index.html
index e8f188e..63e26cb 100644
--- a/deidentification/deidentification/index.html
+++ b/deidentification/deidentification/index.html
@@ -693,6 +693,112 @@
   
   
   
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6_5" >
+        
+          
+          <label class="md-nav__link" for="__nav_6_5" id="__nav_6_5_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="2" aria-labelledby="__nav_6_5_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_6_5">
+            <span class="md-nav__icon md-icon"></span>
+            Document Analysis
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../policies/document_analysis/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../policies/excluding_by_document_type/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Excluding by DocumentType
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../policies/splitting_input_text/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Splitting Input Text
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../policies/ignoring_sensitive_information/" class="md-nav__link">
         
diff --git a/deidentification/encryption/index.html b/deidentification/encryption/index.html
index d9264f3..34c7e27 100644
--- a/deidentification/encryption/index.html
+++ b/deidentification/encryption/index.html
@@ -677,6 +677,112 @@
   
   
   
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6_5" >
+        
+          
+          <label class="md-nav__link" for="__nav_6_5" id="__nav_6_5_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="2" aria-labelledby="__nav_6_5_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_6_5">
+            <span class="md-nav__icon md-icon"></span>
+            Document Analysis
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../policies/document_analysis/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../policies/excluding_by_document_type/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Excluding by DocumentType
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../policies/splitting_input_text/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Splitting Input Text
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../policies/ignoring_sensitive_information/" class="md-nav__link">
         
diff --git a/deidentification/pii_phi_nppi/index.html b/deidentification/pii_phi_nppi/index.html
index e632690..9f273c5 100644
--- a/deidentification/pii_phi_nppi/index.html
+++ b/deidentification/pii_phi_nppi/index.html
@@ -732,6 +732,112 @@
   
   
   
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6_5" >
+        
+          
+          <label class="md-nav__link" for="__nav_6_5" id="__nav_6_5_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="2" aria-labelledby="__nav_6_5_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_6_5">
+            <span class="md-nav__icon md-icon"></span>
+            Document Analysis
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../policies/document_analysis/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../policies/excluding_by_document_type/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Excluding by DocumentType
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../policies/splitting_input_text/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Splitting Input Text
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../policies/ignoring_sensitive_information/" class="md-nav__link">
         
diff --git a/deidentification/redaction-and-masking/index.html b/deidentification/redaction-and-masking/index.html
index 2ae79fe..7d710cb 100644
--- a/deidentification/redaction-and-masking/index.html
+++ b/deidentification/redaction-and-masking/index.html
@@ -677,6 +677,112 @@
   
   
   
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6_5" >
+        
+          
+          <label class="md-nav__link" for="__nav_6_5" id="__nav_6_5_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="2" aria-labelledby="__nav_6_5_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_6_5">
+            <span class="md-nav__icon md-icon"></span>
+            Document Analysis
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../policies/document_analysis/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../policies/excluding_by_document_type/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Excluding by DocumentType
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../policies/splitting_input_text/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Splitting Input Text
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../policies/ignoring_sensitive_information/" class="md-nav__link">
         
diff --git a/deidentification/replacement/index.html b/deidentification/replacement/index.html
index 31c4244..089371e 100644
--- a/deidentification/replacement/index.html
+++ b/deidentification/replacement/index.html
@@ -677,6 +677,112 @@
   
   
   
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6_5" >
+        
+          
+          <label class="md-nav__link" for="__nav_6_5" id="__nav_6_5_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="2" aria-labelledby="__nav_6_5_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_6_5">
+            <span class="md-nav__icon md-icon"></span>
+            Document Analysis
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../policies/document_analysis/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../policies/excluding_by_document_type/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Excluding by DocumentType
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../policies/splitting_input_text/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Splitting Input Text
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../policies/ignoring_sensitive_information/" class="md-nav__link">
         
diff --git a/evaluating-performance/index.html b/evaluating-performance/index.html
index 78dd5ec..ab881ab 100644
--- a/evaluating-performance/index.html
+++ b/evaluating-performance/index.html
@@ -677,6 +677,112 @@
   
   
   
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6_5" >
+        
+          
+          <label class="md-nav__link" for="__nav_6_5" id="__nav_6_5_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="2" aria-labelledby="__nav_6_5_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_6_5">
+            <span class="md-nav__icon md-icon"></span>
+            Document Analysis
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../policies/document_analysis/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../policies/excluding_by_document_type/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Excluding by DocumentType
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../policies/splitting_input_text/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Splitting Input Text
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
     <li class="md-nav__item">
       <a href="../policies/ignoring_sensitive_information/" class="md-nav__link">
         
diff --git a/index.html b/index.html
index 004841f..4f13dc7 100644
--- a/index.html
+++ b/index.html
@@ -689,6 +689,112 @@
   
   
   
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6_5" >
+        
+          
+          <label class="md-nav__link" for="__nav_6_5" id="__nav_6_5_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="2" aria-labelledby="__nav_6_5_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_6_5">
+            <span class="md-nav__icon md-icon"></span>
+            Document Analysis
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="policies/document_analysis/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="policies/excluding_by_document_type/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Excluding by DocumentType
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="policies/splitting_input_text/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Splitting Input Text
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
     <li class="md-nav__item">
       <a href="policies/ignoring_sensitive_information/" class="md-nav__link">
         
diff --git a/other_features/alerts/index.html b/other_features/alerts/index.html
index 61a8137..47e1e3c 100644
--- a/other_features/alerts/index.html
+++ b/other_features/alerts/index.html
@@ -681,6 +681,112 @@
   
   
   
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6_5" >
+        
+          
+          <label class="md-nav__link" for="__nav_6_5" id="__nav_6_5_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="2" aria-labelledby="__nav_6_5_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_6_5">
+            <span class="md-nav__icon md-icon"></span>
+            Document Analysis
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../policies/document_analysis/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../policies/excluding_by_document_type/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Excluding by DocumentType
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../policies/splitting_input_text/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Splitting Input Text
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../policies/ignoring_sensitive_information/" class="md-nav__link">
         
diff --git a/other_features/anonymization/index.html b/other_features/anonymization/index.html
index 767ef5e..5755cdf 100644
--- a/other_features/anonymization/index.html
+++ b/other_features/anonymization/index.html
@@ -681,6 +681,112 @@
   
   
   
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6_5" >
+        
+          
+          <label class="md-nav__link" for="__nav_6_5" id="__nav_6_5_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="2" aria-labelledby="__nav_6_5_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_6_5">
+            <span class="md-nav__icon md-icon"></span>
+            Document Analysis
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../policies/document_analysis/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../policies/excluding_by_document_type/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Excluding by DocumentType
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../policies/splitting_input_text/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Splitting Input Text
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../policies/ignoring_sensitive_information/" class="md-nav__link">
         
diff --git a/other_features/span_disambiguation/index.html b/other_features/span_disambiguation/index.html
index feb75c3..571fd38 100644
--- a/other_features/span_disambiguation/index.html
+++ b/other_features/span_disambiguation/index.html
@@ -681,6 +681,112 @@
   
   
   
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6_5" >
+        
+          
+          <label class="md-nav__link" for="__nav_6_5" id="__nav_6_5_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="2" aria-labelledby="__nav_6_5_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_6_5">
+            <span class="md-nav__icon md-icon"></span>
+            Document Analysis
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../policies/document_analysis/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../policies/excluding_by_document_type/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Excluding by DocumentType
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../policies/splitting_input_text/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Splitting Input Text
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../policies/ignoring_sensitive_information/" class="md-nav__link">
         
diff --git a/policies/document_analysis/index.html b/policies/document_analysis/index.html
new file mode 100644
index 0000000..14209f1
--- /dev/null
+++ b/policies/document_analysis/index.html
@@ -0,0 +1,1071 @@
+
+<!doctype html>
+<html lang="en" class="no-js">
+  <head>
+    
+      <meta charset="utf-8">
+      <meta name="viewport" content="width=device-width,initial-scale=1">
+      
+        <meta name="description" content="User guide for Philter, the open source PII/PHI redaction engine.">
+      
+      
+        <meta name="author" content="philterd">
+      
+      
+        <link rel="canonical" href="https://philterd.github.io/philter/policies/document_analysis/">
+      
+      
+        <link rel="prev" href="../filter_strategies/">
+      
+      
+        <link rel="next" href="../excluding_by_document_type/">
+      
+      
+      <link rel="icon" href="../../assets/images/favicon.png">
+      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.5.42">
+    
+    
+      
+        <title>Document Analysis - Philter</title>
+      
+    
+    
+      <link rel="stylesheet" href="../../assets/stylesheets/main.0253249f.min.css">
+      
+        
+        <link rel="stylesheet" href="../../assets/stylesheets/palette.06af60db.min.css">
+      
+      
+
+
+    
+    
+      
+    
+    
+      
+        
+        
+        <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
+        <link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto:300,300i,400,400i,700,700i%7CRoboto+Mono:400,400i,700,700i&display=fallback">
+        <style>:root{--md-text-font:"Roboto";--md-code-font:"Roboto Mono"}</style>
+      
+    
+    
+    <script>__md_scope=new URL("../..",location),__md_hash=e=>[...e].reduce(((e,_)=>(e<<5)-e+_.charCodeAt(0)),0),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script>
+    
+      
+
+    
+    
+    
+  </head>
+  
+  
+    
+    
+    
+    
+    
+    <body dir="ltr" data-md-color-scheme="default" data-md-color-primary="red" data-md-color-accent="indigo">
+  
+    
+    <input class="md-toggle" data-md-toggle="drawer" type="checkbox" id="__drawer" autocomplete="off">
+    <input class="md-toggle" data-md-toggle="search" type="checkbox" id="__search" autocomplete="off">
+    <label class="md-overlay" for="__drawer"></label>
+    <div data-md-component="skip">
+      
+        
+        <a href="#document-analysis" class="md-skip">
+          Skip to content
+        </a>
+      
+    </div>
+    <div data-md-component="announce">
+      
+    </div>
+    
+    
+      
+
+  
+
+<header class="md-header md-header--shadow" data-md-component="header">
+  <nav class="md-header__inner md-grid" aria-label="Header">
+    <a href="../.." title="Philter" class="md-header__button md-logo" aria-label="Philter" data-md-component="logo">
+      
+  
+  <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3 3 3 0 0 0 3 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54"/></svg>
+
+    </a>
+    <label class="md-header__button md-icon" for="__drawer">
+      
+      <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 6h18v2H3zm0 5h18v2H3zm0 5h18v2H3z"/></svg>
+    </label>
+    <div class="md-header__title" data-md-component="header-title">
+      <div class="md-header__ellipsis">
+        <div class="md-header__topic">
+          <span class="md-ellipsis">
+            Philter
+          </span>
+        </div>
+        <div class="md-header__topic" data-md-component="header-topic">
+          <span class="md-ellipsis">
+            
+              Document Analysis
+            
+          </span>
+        </div>
+      </div>
+    </div>
+    
+      
+    
+    
+    
+    
+      <label class="md-header__button md-icon" for="__search">
+        
+        <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.52 6.52 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5"/></svg>
+      </label>
+      <div class="md-search" data-md-component="search" role="dialog">
+  <label class="md-search__overlay" for="__search"></label>
+  <div class="md-search__inner" role="search">
+    <form class="md-search__form" name="search">
+      <input type="text" class="md-search__input" name="query" aria-label="Search" placeholder="Search" autocapitalize="off" autocorrect="off" autocomplete="off" spellcheck="false" data-md-component="search-query" required>
+      <label class="md-search__icon md-icon" for="__search">
+        
+        <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.52 6.52 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5"/></svg>
+        
+        <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11z"/></svg>
+      </label>
+      <nav class="md-search__options" aria-label="Search">
+        
+        <button type="reset" class="md-search__icon md-icon" title="Clear" aria-label="Clear" tabindex="-1">
+          
+          <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41 17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12z"/></svg>
+        </button>
+      </nav>
+      
+    </form>
+    <div class="md-search__output">
+      <div class="md-search__scrollwrap" tabindex="0" data-md-scrollfix>
+        <div class="md-search-result" data-md-component="search-result">
+          <div class="md-search-result__meta">
+            Initializing search
+          </div>
+          <ol class="md-search-result__list" role="presentation"></ol>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+    
+    
+      <div class="md-header__source">
+        <a href="https://github.com/philterd/philter" title="Go to repository" class="md-source" data-md-component="source">
+  <div class="md-source__icon md-icon">
+    
+    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 6.6.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2024 Fonticons, Inc.--><path d="M439.55 236.05 244 40.45a28.87 28.87 0 0 0-40.81 0l-40.66 40.63 51.52 51.52c27.06-9.14 52.68 16.77 43.39 43.68l49.66 49.66c34.23-11.8 61.18 31 35.47 56.69-26.49 26.49-70.21-2.87-56-37.34L240.22 199v121.85c25.3 12.54 22.26 41.85 9.08 55a34.34 34.34 0 0 1-48.55 0c-17.57-17.6-11.07-46.91 11.25-56v-123c-20.8-8.51-24.6-30.74-18.64-45L142.57 101 8.45 235.14a28.86 28.86 0 0 0 0 40.81l195.61 195.6a28.86 28.86 0 0 0 40.8 0l194.69-194.69a28.86 28.86 0 0 0 0-40.81"/></svg>
+  </div>
+  <div class="md-source__repository">
+    GitHub
+  </div>
+</a>
+      </div>
+    
+  </nav>
+  
+</header>
+    
+    <div class="md-container" data-md-component="container">
+      
+      
+        
+          
+        
+      
+      <main class="md-main" data-md-component="main">
+        <div class="md-main__inner md-grid">
+          
+            
+              
+              <div class="md-sidebar md-sidebar--primary" data-md-component="sidebar" data-md-type="navigation" >
+                <div class="md-sidebar__scrollwrap">
+                  <div class="md-sidebar__inner">
+                    
+
+
+
+<nav class="md-nav md-nav--primary" aria-label="Navigation" data-md-level="0">
+  <label class="md-nav__title" for="__drawer">
+    <a href="../.." title="Philter" class="md-nav__button md-logo" aria-label="Philter" data-md-component="logo">
+      
+  
+  <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3 3 3 0 0 0 3 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54"/></svg>
+
+    </a>
+    Philter
+  </label>
+  
+    <div class="md-nav__source">
+      <a href="https://github.com/philterd/philter" title="Go to repository" class="md-source" data-md-component="source">
+  <div class="md-source__icon md-icon">
+    
+    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 6.6.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2024 Fonticons, Inc.--><path d="M439.55 236.05 244 40.45a28.87 28.87 0 0 0-40.81 0l-40.66 40.63 51.52 51.52c27.06-9.14 52.68 16.77 43.39 43.68l49.66 49.66c34.23-11.8 61.18 31 35.47 56.69-26.49 26.49-70.21-2.87-56-37.34L240.22 199v121.85c25.3 12.54 22.26 41.85 9.08 55a34.34 34.34 0 0 1-48.55 0c-17.57-17.6-11.07-46.91 11.25-56v-123c-20.8-8.51-24.6-30.74-18.64-45L142.57 101 8.45 235.14a28.86 28.86 0 0 0 0 40.81l195.61 195.6a28.86 28.86 0 0 0 40.8 0l194.69-194.69a28.86 28.86 0 0 0 0-40.81"/></svg>
+  </div>
+  <div class="md-source__repository">
+    GitHub
+  </div>
+</a>
+    </div>
+  
+  <ul class="md-nav__list" data-md-scrollfix>
+    
+      
+      
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../.." class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Home
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+    
+      
+      
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../upgrading/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Upgrading
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+    
+      
+      
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../system_requirements/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    System Requirements
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+    
+      
+      
+  
+  
+  
+  
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_4" >
+        
+          
+          <label class="md-nav__link" for="__nav_4" id="__nav_4_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    Quick Starts
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_4_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_4">
+            <span class="md-nav__icon md-icon"></span>
+            Quick Starts
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../quick_starts/quick_start_aws/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Philter Quick Start on AWS
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../quick_starts/quick_start_azure/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Philter Quick Start on Microsoft Azure
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../quick_starts/quick_start_gcp/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Philter Quick Start on Google Cloud
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+    
+      
+      
+  
+  
+  
+  
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_5" >
+        
+          
+          <label class="md-nav__link" for="__nav_5" id="__nav_5_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    PII and De-identification
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_5_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_5">
+            <span class="md-nav__icon md-icon"></span>
+            PII and De-identification
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../deidentification/pii_phi_nppi/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    PII, PHI, and NPPI
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../deidentification/deidentification/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Deidentification
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_5_3" >
+        
+          
+          <label class="md-nav__link" for="__nav_5_3" id="__nav_5_3_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    Deidentification Methods
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="2" aria-labelledby="__nav_5_3_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_5_3">
+            <span class="md-nav__icon md-icon"></span>
+            Deidentification Methods
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../deidentification/bucketing/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Bucketing
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../deidentification/date-shifting/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Date Shifting
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+    
+      
+      
+  
+  
+    
+  
+  
+  
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--active md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6" checked>
+        
+          
+          <label class="md-nav__link" for="__nav_6" id="__nav_6_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    Policies
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_6_label" aria-expanded="true">
+          <label class="md-nav__title" for="__nav_6">
+            <span class="md-nav__icon md-icon"></span>
+            Policies
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../sample_policies/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Sample Policies
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../filter_policies/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Filter Policies
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../filters/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Filters
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../filter_strategies/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Filter Strategies
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+    
+  
+  
+  
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--active md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6_5" checked>
+        
+          
+          <label class="md-nav__link" for="__nav_6_5" id="__nav_6_5_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="2" aria-labelledby="__nav_6_5_label" aria-expanded="true">
+          <label class="md-nav__title" for="__nav_6_5">
+            <span class="md-nav__icon md-icon"></span>
+            Document Analysis
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+    
+  
+  
+  
+    <li class="md-nav__item md-nav__item--active">
+      
+      <input class="md-nav__toggle md-toggle" type="checkbox" id="__toc">
+      
+      
+        
+      
+      
+      <a href="./" class="md-nav__link md-nav__link--active">
+        
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+      </a>
+      
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../excluding_by_document_type/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Excluding by DocumentType
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../splitting_input_text/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Splitting Input Text
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../ignoring_sensitive_information/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Ignoring Sensitive Information
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+    
+      
+      
+  
+  
+  
+  
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_7" >
+        
+          
+          <label class="md-nav__link" for="__nav_7" id="__nav_7_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    Other Features
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_7_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_7">
+            <span class="md-nav__icon md-icon"></span>
+            Other Features
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../other_features/alerts/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Alerts
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../other_features/anonymization/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Anonymization
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../other_features/span_disambiguation/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Span Disambiguation
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+    
+      
+      
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../settings/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Settings
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+    
+  </ul>
+</nav>
+                  </div>
+                </div>
+              </div>
+            
+            
+              
+              <div class="md-sidebar md-sidebar--secondary" data-md-component="sidebar" data-md-type="toc" >
+                <div class="md-sidebar__scrollwrap">
+                  <div class="md-sidebar__inner">
+                    
+
+<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
+  
+  
+  
+    
+  
+  
+</nav>
+                  </div>
+                </div>
+              </div>
+            
+          
+          
+            <div class="md-content" data-md-component="content">
+              <article class="md-content__inner md-typeset">
+                
+                  
+
+  
+  
+
+
+<h1 id="document-analysis">Document Analysis</h1>
+<p>Philter analyzes received documents prior to redacting the document. This analysis is done to help Philter get a better understanding of the document. The results of the analysis are used to <a href="excluding-by-document-type.md">exclude certain document types from redaction</a> and to improve Philter's redaction performance.</p>
+<p>While not recommended, the automatic document analysis can be disabled in a policy. By default, document analysis is enabled.</p>
+<blockquote>
+<p>Disabling document analysis will cause any policy features dependent on the results of the document analysis to not function.
+{style="warning"}</p>
+</blockquote>
+<p>An example policy with disabled document analysis is shown below.</p>
+<pre><code>{
+  &quot;name&quot;: &quot;email-and-phone-numbers&quot;,
+  &quot;config&quot;: {
+    &quot;analysis&quot;: {
+      &quot;enabled&quot;: false
+    }
+  },
+  &quot;identifiers&quot;: {
+    &quot;emailAddress&quot;: {
+      &quot;emailAddressFilterStrategies&quot;: [
+        {
+          &quot;strategy&quot;: &quot;REDACT&quot;,
+          &quot;redactionFormat&quot;: &quot;{{{REDACTED-%t}}}&quot;
+        }
+      ]
+    }
+  }
+}
+</code></pre>
+
+
+
+
+
+
+
+
+
+
+
+
+                
+              </article>
+            </div>
+          
+          
+<script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script>
+        </div>
+        
+      </main>
+      
+        <footer class="md-footer">
+  
+  <div class="md-footer-meta md-typeset">
+    <div class="md-footer-meta__inner md-grid">
+      <div class="md-copyright">
+  
+    <div class="md-copyright__highlight">
+      Copyright 2024 Philterd, LLC
+    </div>
+  
+  
+    Made with
+    <a href="https://squidfunk.github.io/mkdocs-material/" target="_blank" rel="noopener">
+      Material for MkDocs
+    </a>
+  
+</div>
+      
+    </div>
+  </div>
+</footer>
+      
+    </div>
+    <div class="md-dialog" data-md-component="dialog">
+      <div class="md-dialog__inner md-typeset"></div>
+    </div>
+    
+    
+    <script id="__config" type="application/json">{"base": "../..", "features": [], "search": "../../assets/javascripts/workers/search.6ce7567c.min.js", "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}}</script>
+    
+    
+      <script src="../../assets/javascripts/bundle.83f73b43.min.js"></script>
+      
+    
+  </body>
+</html>
\ No newline at end of file
diff --git a/policies/excluding_by_document_type/index.html b/policies/excluding_by_document_type/index.html
new file mode 100644
index 0000000..c97d539
--- /dev/null
+++ b/policies/excluding_by_document_type/index.html
@@ -0,0 +1,1172 @@
+
+<!doctype html>
+<html lang="en" class="no-js">
+  <head>
+    
+      <meta charset="utf-8">
+      <meta name="viewport" content="width=device-width,initial-scale=1">
+      
+        <meta name="description" content="User guide for Philter, the open source PII/PHI redaction engine.">
+      
+      
+        <meta name="author" content="philterd">
+      
+      
+        <link rel="canonical" href="https://philterd.github.io/philter/policies/excluding_by_document_type/">
+      
+      
+        <link rel="prev" href="../document_analysis/">
+      
+      
+        <link rel="next" href="../splitting_input_text/">
+      
+      
+      <link rel="icon" href="../../assets/images/favicon.png">
+      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.5.42">
+    
+    
+      
+        <title>Excluding by DocumentType - Philter</title>
+      
+    
+    
+      <link rel="stylesheet" href="../../assets/stylesheets/main.0253249f.min.css">
+      
+        
+        <link rel="stylesheet" href="../../assets/stylesheets/palette.06af60db.min.css">
+      
+      
+
+
+    
+    
+      
+    
+    
+      
+        
+        
+        <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
+        <link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto:300,300i,400,400i,700,700i%7CRoboto+Mono:400,400i,700,700i&display=fallback">
+        <style>:root{--md-text-font:"Roboto";--md-code-font:"Roboto Mono"}</style>
+      
+    
+    
+    <script>__md_scope=new URL("../..",location),__md_hash=e=>[...e].reduce(((e,_)=>(e<<5)-e+_.charCodeAt(0)),0),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script>
+    
+      
+
+    
+    
+    
+  </head>
+  
+  
+    
+    
+    
+    
+    
+    <body dir="ltr" data-md-color-scheme="default" data-md-color-primary="red" data-md-color-accent="indigo">
+  
+    
+    <input class="md-toggle" data-md-toggle="drawer" type="checkbox" id="__drawer" autocomplete="off">
+    <input class="md-toggle" data-md-toggle="search" type="checkbox" id="__search" autocomplete="off">
+    <label class="md-overlay" for="__drawer"></label>
+    <div data-md-component="skip">
+      
+        
+        <a href="#excluding-by-document-type" class="md-skip">
+          Skip to content
+        </a>
+      
+    </div>
+    <div data-md-component="announce">
+      
+    </div>
+    
+    
+      
+
+  
+
+<header class="md-header md-header--shadow" data-md-component="header">
+  <nav class="md-header__inner md-grid" aria-label="Header">
+    <a href="../.." title="Philter" class="md-header__button md-logo" aria-label="Philter" data-md-component="logo">
+      
+  
+  <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3 3 3 0 0 0 3 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54"/></svg>
+
+    </a>
+    <label class="md-header__button md-icon" for="__drawer">
+      
+      <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 6h18v2H3zm0 5h18v2H3zm0 5h18v2H3z"/></svg>
+    </label>
+    <div class="md-header__title" data-md-component="header-title">
+      <div class="md-header__ellipsis">
+        <div class="md-header__topic">
+          <span class="md-ellipsis">
+            Philter
+          </span>
+        </div>
+        <div class="md-header__topic" data-md-component="header-topic">
+          <span class="md-ellipsis">
+            
+              Excluding by DocumentType
+            
+          </span>
+        </div>
+      </div>
+    </div>
+    
+      
+    
+    
+    
+    
+      <label class="md-header__button md-icon" for="__search">
+        
+        <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.52 6.52 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5"/></svg>
+      </label>
+      <div class="md-search" data-md-component="search" role="dialog">
+  <label class="md-search__overlay" for="__search"></label>
+  <div class="md-search__inner" role="search">
+    <form class="md-search__form" name="search">
+      <input type="text" class="md-search__input" name="query" aria-label="Search" placeholder="Search" autocapitalize="off" autocorrect="off" autocomplete="off" spellcheck="false" data-md-component="search-query" required>
+      <label class="md-search__icon md-icon" for="__search">
+        
+        <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.52 6.52 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5"/></svg>
+        
+        <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11z"/></svg>
+      </label>
+      <nav class="md-search__options" aria-label="Search">
+        
+        <button type="reset" class="md-search__icon md-icon" title="Clear" aria-label="Clear" tabindex="-1">
+          
+          <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41 17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12z"/></svg>
+        </button>
+      </nav>
+      
+    </form>
+    <div class="md-search__output">
+      <div class="md-search__scrollwrap" tabindex="0" data-md-scrollfix>
+        <div class="md-search-result" data-md-component="search-result">
+          <div class="md-search-result__meta">
+            Initializing search
+          </div>
+          <ol class="md-search-result__list" role="presentation"></ol>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+    
+    
+      <div class="md-header__source">
+        <a href="https://github.com/philterd/philter" title="Go to repository" class="md-source" data-md-component="source">
+  <div class="md-source__icon md-icon">
+    
+    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 6.6.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2024 Fonticons, Inc.--><path d="M439.55 236.05 244 40.45a28.87 28.87 0 0 0-40.81 0l-40.66 40.63 51.52 51.52c27.06-9.14 52.68 16.77 43.39 43.68l49.66 49.66c34.23-11.8 61.18 31 35.47 56.69-26.49 26.49-70.21-2.87-56-37.34L240.22 199v121.85c25.3 12.54 22.26 41.85 9.08 55a34.34 34.34 0 0 1-48.55 0c-17.57-17.6-11.07-46.91 11.25-56v-123c-20.8-8.51-24.6-30.74-18.64-45L142.57 101 8.45 235.14a28.86 28.86 0 0 0 0 40.81l195.61 195.6a28.86 28.86 0 0 0 40.8 0l194.69-194.69a28.86 28.86 0 0 0 0-40.81"/></svg>
+  </div>
+  <div class="md-source__repository">
+    GitHub
+  </div>
+</a>
+      </div>
+    
+  </nav>
+  
+</header>
+    
+    <div class="md-container" data-md-component="container">
+      
+      
+        
+          
+        
+      
+      <main class="md-main" data-md-component="main">
+        <div class="md-main__inner md-grid">
+          
+            
+              
+              <div class="md-sidebar md-sidebar--primary" data-md-component="sidebar" data-md-type="navigation" >
+                <div class="md-sidebar__scrollwrap">
+                  <div class="md-sidebar__inner">
+                    
+
+
+
+<nav class="md-nav md-nav--primary" aria-label="Navigation" data-md-level="0">
+  <label class="md-nav__title" for="__drawer">
+    <a href="../.." title="Philter" class="md-nav__button md-logo" aria-label="Philter" data-md-component="logo">
+      
+  
+  <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3 3 3 0 0 0 3 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54"/></svg>
+
+    </a>
+    Philter
+  </label>
+  
+    <div class="md-nav__source">
+      <a href="https://github.com/philterd/philter" title="Go to repository" class="md-source" data-md-component="source">
+  <div class="md-source__icon md-icon">
+    
+    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 6.6.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2024 Fonticons, Inc.--><path d="M439.55 236.05 244 40.45a28.87 28.87 0 0 0-40.81 0l-40.66 40.63 51.52 51.52c27.06-9.14 52.68 16.77 43.39 43.68l49.66 49.66c34.23-11.8 61.18 31 35.47 56.69-26.49 26.49-70.21-2.87-56-37.34L240.22 199v121.85c25.3 12.54 22.26 41.85 9.08 55a34.34 34.34 0 0 1-48.55 0c-17.57-17.6-11.07-46.91 11.25-56v-123c-20.8-8.51-24.6-30.74-18.64-45L142.57 101 8.45 235.14a28.86 28.86 0 0 0 0 40.81l195.61 195.6a28.86 28.86 0 0 0 40.8 0l194.69-194.69a28.86 28.86 0 0 0 0-40.81"/></svg>
+  </div>
+  <div class="md-source__repository">
+    GitHub
+  </div>
+</a>
+    </div>
+  
+  <ul class="md-nav__list" data-md-scrollfix>
+    
+      
+      
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../.." class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Home
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+    
+      
+      
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../upgrading/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Upgrading
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+    
+      
+      
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../system_requirements/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    System Requirements
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+    
+      
+      
+  
+  
+  
+  
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_4" >
+        
+          
+          <label class="md-nav__link" for="__nav_4" id="__nav_4_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    Quick Starts
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_4_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_4">
+            <span class="md-nav__icon md-icon"></span>
+            Quick Starts
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../quick_starts/quick_start_aws/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Philter Quick Start on AWS
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../quick_starts/quick_start_azure/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Philter Quick Start on Microsoft Azure
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../quick_starts/quick_start_gcp/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Philter Quick Start on Google Cloud
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+    
+      
+      
+  
+  
+  
+  
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_5" >
+        
+          
+          <label class="md-nav__link" for="__nav_5" id="__nav_5_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    PII and De-identification
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_5_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_5">
+            <span class="md-nav__icon md-icon"></span>
+            PII and De-identification
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../deidentification/pii_phi_nppi/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    PII, PHI, and NPPI
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../deidentification/deidentification/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Deidentification
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_5_3" >
+        
+          
+          <label class="md-nav__link" for="__nav_5_3" id="__nav_5_3_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    Deidentification Methods
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="2" aria-labelledby="__nav_5_3_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_5_3">
+            <span class="md-nav__icon md-icon"></span>
+            Deidentification Methods
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../deidentification/bucketing/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Bucketing
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../deidentification/date-shifting/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Date Shifting
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+    
+      
+      
+  
+  
+    
+  
+  
+  
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--active md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6" checked>
+        
+          
+          <label class="md-nav__link" for="__nav_6" id="__nav_6_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    Policies
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_6_label" aria-expanded="true">
+          <label class="md-nav__title" for="__nav_6">
+            <span class="md-nav__icon md-icon"></span>
+            Policies
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../sample_policies/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Sample Policies
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../filter_policies/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Filter Policies
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../filters/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Filters
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../filter_strategies/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Filter Strategies
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+    
+  
+  
+  
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--active md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6_5" checked>
+        
+          
+          <label class="md-nav__link" for="__nav_6_5" id="__nav_6_5_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="2" aria-labelledby="__nav_6_5_label" aria-expanded="true">
+          <label class="md-nav__title" for="__nav_6_5">
+            <span class="md-nav__icon md-icon"></span>
+            Document Analysis
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../document_analysis/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+    
+  
+  
+  
+    <li class="md-nav__item md-nav__item--active">
+      
+      <input class="md-nav__toggle md-toggle" type="checkbox" id="__toc">
+      
+      
+        
+      
+      
+        <label class="md-nav__link md-nav__link--active" for="__toc">
+          
+  
+  <span class="md-ellipsis">
+    Excluding by DocumentType
+  </span>
+  
+
+          <span class="md-nav__icon md-icon"></span>
+        </label>
+      
+      <a href="./" class="md-nav__link md-nav__link--active">
+        
+  
+  <span class="md-ellipsis">
+    Excluding by DocumentType
+  </span>
+  
+
+      </a>
+      
+        
+
+<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
+  
+  
+  
+    
+  
+  
+    <label class="md-nav__title" for="__toc">
+      <span class="md-nav__icon md-icon"></span>
+      Table of contents
+    </label>
+    <ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
+      
+        <li class="md-nav__item">
+  <a href="#document-types-supported-by-automatic-detection" class="md-nav__link">
+    <span class="md-ellipsis">
+      Document Types Supported by Automatic Detection
+    </span>
+  </a>
+  
+</li>
+      
+    </ul>
+  
+</nav>
+      
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../splitting_input_text/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Splitting Input Text
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../ignoring_sensitive_information/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Ignoring Sensitive Information
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+    
+      
+      
+  
+  
+  
+  
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_7" >
+        
+          
+          <label class="md-nav__link" for="__nav_7" id="__nav_7_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    Other Features
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_7_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_7">
+            <span class="md-nav__icon md-icon"></span>
+            Other Features
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../other_features/alerts/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Alerts
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../other_features/anonymization/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Anonymization
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../other_features/span_disambiguation/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Span Disambiguation
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+    
+      
+      
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../settings/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Settings
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+    
+  </ul>
+</nav>
+                  </div>
+                </div>
+              </div>
+            
+            
+              
+              <div class="md-sidebar md-sidebar--secondary" data-md-component="sidebar" data-md-type="toc" >
+                <div class="md-sidebar__scrollwrap">
+                  <div class="md-sidebar__inner">
+                    
+
+<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
+  
+  
+  
+    
+  
+  
+    <label class="md-nav__title" for="__toc">
+      <span class="md-nav__icon md-icon"></span>
+      Table of contents
+    </label>
+    <ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
+      
+        <li class="md-nav__item">
+  <a href="#document-types-supported-by-automatic-detection" class="md-nav__link">
+    <span class="md-ellipsis">
+      Document Types Supported by Automatic Detection
+    </span>
+  </a>
+  
+</li>
+      
+    </ul>
+  
+</nav>
+                  </div>
+                </div>
+              </div>
+            
+          
+          
+            <div class="md-content" data-md-component="content">
+              <article class="md-content__inner md-typeset">
+                
+                  
+
+  
+  
+
+
+<h1 id="excluding-by-document-type">Excluding by Document Type</h1>
+<p>Philter can automatically detect certain types of documents and exclude those documents from redaction of certain sensitive information. For example, you want to redact SSN/TINs in all but one type of document.</p>
+<p>To exclude a document type from a specific filter, set the <code>excludeDocumentTypes</code> value to a list of document types to exclude for a filter strategy. Filter strategies for all filter types support the <code>excludeDocumentTypes</code> property.</p>
+<p>An example to exclude email addresses from being redacted in a subpoena document is given below:</p>
+<pre><code>{
+   &quot;name&quot;: &quot;email-address&quot;,
+   &quot;identifiers&quot;: {
+      &quot;emailAddress&quot;: {
+         &quot;emailAddressFilterStrategies&quot;: [
+            {
+               &quot;strategy&quot;: &quot;REDACT&quot;,
+               &quot;redactionFormat&quot;: &quot;{{{REDACTED-%t}}}&quot;,
+               &quot;excludeDocumentTypes&quot;: [&quot;SUBPOENA&quot;]
+            }
+         ]
+      }
+   }
+}
+</code></pre>
+<p>In this example, email addresses are redacted in all document types except documents Philter identifies as being subpoena documents.</p>
+<h3 id="document-types-supported-by-automatic-detection">Document Types Supported by Automatic Detection</h3>
+<p>Philter currently supports automatically detecting the following document types.</p>
+<table>
+<thead>
+<tr>
+<th>Document Type</th>
+<th>Document Description</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td>Subpoena</td>
+<td>Form 2540 Federal Bankruptcy - SUBPOENA FOR RULE 2004 EXAMINATION</td>
+</tr>
+<tr>
+<td>Subpoena</td>
+<td>Form 2550 - Federal Bankruptcy - SUBPOENA TO APPEAR AND TESTIFY</td>
+</tr>
+<tr>
+<td>Subpoena</td>
+<td>Form 2560 - Federal Bankruptcy - SUBPOENA TO TESTIFY AT A DEPOSITION</td>
+</tr>
+<tr>
+<td>Subpoena</td>
+<td>Form 2570 - Federal Bankruptcy - SUBPOENA TO PRODUCE DOCUMENTS</td>
+</tr>
+<tr>
+<td>Subpoena</td>
+<td>AO 88 - SUBPOENA TO APPEAR AND TESTIFY AT A HEARING OR TRIAL IN A CIVIL ACTION</td>
+</tr>
+<tr>
+<td>Subpoena</td>
+<td>AO 88A - SUBPOENA TO TESTIFY AT A DEPOSITION IN A CIVIL ACTION</td>
+</tr>
+<tr>
+<td>Subpoena</td>
+<td>AO 88B - SUBPOENA TO PRODUCE DOCUMENTS, INFORMATION, OR OBJECTS</td>
+</tr>
+<tr>
+<td>Subpoena</td>
+<td>AO 89 - SUBPOENA TO TESTIFY AT A HEARING OR TRIAL IN A CRIMINAL CASE</td>
+</tr>
+<tr>
+<td>Subpoena</td>
+<td>AO 90 - SUBPOENA TO TESTIFY AT A DEPOSITION IN A CRIMINAL CASE</td>
+</tr>
+<tr>
+<td>Subpoena</td>
+<td>AO 110 - SUBPOENA TO TESTIFY BEFORE A GRAND JURY</td>
+</tr>
+</tbody>
+</table>
+
+
+
+
+
+
+
+
+
+
+
+
+                
+              </article>
+            </div>
+          
+          
+<script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script>
+        </div>
+        
+      </main>
+      
+        <footer class="md-footer">
+  
+  <div class="md-footer-meta md-typeset">
+    <div class="md-footer-meta__inner md-grid">
+      <div class="md-copyright">
+  
+    <div class="md-copyright__highlight">
+      Copyright 2024 Philterd, LLC
+    </div>
+  
+  
+    Made with
+    <a href="https://squidfunk.github.io/mkdocs-material/" target="_blank" rel="noopener">
+      Material for MkDocs
+    </a>
+  
+</div>
+      
+    </div>
+  </div>
+</footer>
+      
+    </div>
+    <div class="md-dialog" data-md-component="dialog">
+      <div class="md-dialog__inner md-typeset"></div>
+    </div>
+    
+    
+    <script id="__config" type="application/json">{"base": "../..", "features": [], "search": "../../assets/javascripts/workers/search.6ce7567c.min.js", "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}}</script>
+    
+    
+      <script src="../../assets/javascripts/bundle.83f73b43.min.js"></script>
+      
+    
+  </body>
+</html>
\ No newline at end of file
diff --git a/policies/filter_policies/index.html b/policies/filter_policies/index.html
index f795a73..17c6303 100644
--- a/policies/filter_policies/index.html
+++ b/policies/filter_policies/index.html
@@ -750,6 +750,112 @@
   
   
   
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6_5" >
+        
+          
+          <label class="md-nav__link" for="__nav_6_5" id="__nav_6_5_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="2" aria-labelledby="__nav_6_5_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_6_5">
+            <span class="md-nav__icon md-icon"></span>
+            Document Analysis
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../document_analysis/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../excluding_by_document_type/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Excluding by DocumentType
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../splitting_input_text/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Splitting Input Text
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
     <li class="md-nav__item">
       <a href="../ignoring_sensitive_information/" class="md-nav__link">
         
diff --git a/policies/filter_strategies/index.html b/policies/filter_strategies/index.html
index f6cb82f..ed7dfa5 100644
--- a/policies/filter_strategies/index.html
+++ b/policies/filter_strategies/index.html
@@ -18,7 +18,7 @@
         <link rel="prev" href="../filters/">
       
       
-        <link rel="next" href="../ignoring_sensitive_information/">
+        <link rel="next" href="../document_analysis/">
       
       
       <link rel="icon" href="../../assets/images/favicon.png">
@@ -834,6 +834,112 @@
   
   
   
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6_5" >
+        
+          
+          <label class="md-nav__link" for="__nav_6_5" id="__nav_6_5_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="2" aria-labelledby="__nav_6_5_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_6_5">
+            <span class="md-nav__icon md-icon"></span>
+            Document Analysis
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../document_analysis/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../excluding_by_document_type/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Excluding by DocumentType
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../splitting_input_text/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Splitting Input Text
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
     <li class="md-nav__item">
       <a href="../ignoring_sensitive_information/" class="md-nav__link">
         
diff --git a/policies/filters/common_filters/ages/index.html b/policies/filters/common_filters/ages/index.html
index b4ae40d..6cad354 100644
--- a/policies/filters/common_filters/ages/index.html
+++ b/policies/filters/common_filters/ages/index.html
@@ -677,6 +677,112 @@
   
   
   
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6_5" >
+        
+          
+          <label class="md-nav__link" for="__nav_6_5" id="__nav_6_5_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="2" aria-labelledby="__nav_6_5_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_6_5">
+            <span class="md-nav__icon md-icon"></span>
+            Document Analysis
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../document_analysis/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../excluding_by_document_type/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Excluding by DocumentType
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../splitting_input_text/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Splitting Input Text
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../../ignoring_sensitive_information/" class="md-nav__link">
         
diff --git a/policies/filters/common_filters/bank-routing-numbers/index.html b/policies/filters/common_filters/bank-routing-numbers/index.html
index e99f3d9..1bb4c37 100644
--- a/policies/filters/common_filters/bank-routing-numbers/index.html
+++ b/policies/filters/common_filters/bank-routing-numbers/index.html
@@ -677,6 +677,112 @@
   
   
   
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6_5" >
+        
+          
+          <label class="md-nav__link" for="__nav_6_5" id="__nav_6_5_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="2" aria-labelledby="__nav_6_5_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_6_5">
+            <span class="md-nav__icon md-icon"></span>
+            Document Analysis
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../document_analysis/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../excluding_by_document_type/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Excluding by DocumentType
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../splitting_input_text/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Splitting Input Text
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../../ignoring_sensitive_information/" class="md-nav__link">
         
diff --git a/policies/filters/common_filters/bitcoin-addresses/index.html b/policies/filters/common_filters/bitcoin-addresses/index.html
index 9abc577..386cefb 100644
--- a/policies/filters/common_filters/bitcoin-addresses/index.html
+++ b/policies/filters/common_filters/bitcoin-addresses/index.html
@@ -677,6 +677,112 @@
   
   
   
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6_5" >
+        
+          
+          <label class="md-nav__link" for="__nav_6_5" id="__nav_6_5_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="2" aria-labelledby="__nav_6_5_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_6_5">
+            <span class="md-nav__icon md-icon"></span>
+            Document Analysis
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../document_analysis/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../excluding_by_document_type/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Excluding by DocumentType
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../splitting_input_text/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Splitting Input Text
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../../ignoring_sensitive_information/" class="md-nav__link">
         
diff --git a/policies/filters/common_filters/creditcards/index.html b/policies/filters/common_filters/creditcards/index.html
index 4b52f29..1126d55 100644
--- a/policies/filters/common_filters/creditcards/index.html
+++ b/policies/filters/common_filters/creditcards/index.html
@@ -677,6 +677,112 @@
   
   
   
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6_5" >
+        
+          
+          <label class="md-nav__link" for="__nav_6_5" id="__nav_6_5_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="2" aria-labelledby="__nav_6_5_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_6_5">
+            <span class="md-nav__icon md-icon"></span>
+            Document Analysis
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../document_analysis/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../excluding_by_document_type/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Excluding by DocumentType
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../splitting_input_text/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Splitting Input Text
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../../ignoring_sensitive_information/" class="md-nav__link">
         
diff --git a/policies/filters/common_filters/dates/index.html b/policies/filters/common_filters/dates/index.html
index 51cc91e..affa2b8 100644
--- a/policies/filters/common_filters/dates/index.html
+++ b/policies/filters/common_filters/dates/index.html
@@ -677,6 +677,112 @@
   
   
   
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6_5" >
+        
+          
+          <label class="md-nav__link" for="__nav_6_5" id="__nav_6_5_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="2" aria-labelledby="__nav_6_5_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_6_5">
+            <span class="md-nav__icon md-icon"></span>
+            Document Analysis
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../document_analysis/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../excluding_by_document_type/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Excluding by DocumentType
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../splitting_input_text/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Splitting Input Text
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../../ignoring_sensitive_information/" class="md-nav__link">
         
diff --git a/policies/filters/common_filters/drivers-license-numbers/index.html b/policies/filters/common_filters/drivers-license-numbers/index.html
index 9039e03..43ce353 100644
--- a/policies/filters/common_filters/drivers-license-numbers/index.html
+++ b/policies/filters/common_filters/drivers-license-numbers/index.html
@@ -677,6 +677,112 @@
   
   
   
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6_5" >
+        
+          
+          <label class="md-nav__link" for="__nav_6_5" id="__nav_6_5_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="2" aria-labelledby="__nav_6_5_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_6_5">
+            <span class="md-nav__icon md-icon"></span>
+            Document Analysis
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../document_analysis/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../excluding_by_document_type/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Excluding by DocumentType
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../splitting_input_text/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Splitting Input Text
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../../ignoring_sensitive_information/" class="md-nav__link">
         
diff --git a/policies/filters/common_filters/email-addresses/index.html b/policies/filters/common_filters/email-addresses/index.html
index 7fdacff..e45e209 100644
--- a/policies/filters/common_filters/email-addresses/index.html
+++ b/policies/filters/common_filters/email-addresses/index.html
@@ -677,6 +677,112 @@
   
   
   
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6_5" >
+        
+          
+          <label class="md-nav__link" for="__nav_6_5" id="__nav_6_5_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="2" aria-labelledby="__nav_6_5_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_6_5">
+            <span class="md-nav__icon md-icon"></span>
+            Document Analysis
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../document_analysis/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../excluding_by_document_type/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Excluding by DocumentType
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../splitting_input_text/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Splitting Input Text
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../../ignoring_sensitive_information/" class="md-nav__link">
         
diff --git a/policies/filters/common_filters/iban-codes/index.html b/policies/filters/common_filters/iban-codes/index.html
index 450407d..dc5d759 100644
--- a/policies/filters/common_filters/iban-codes/index.html
+++ b/policies/filters/common_filters/iban-codes/index.html
@@ -677,6 +677,112 @@
   
   
   
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6_5" >
+        
+          
+          <label class="md-nav__link" for="__nav_6_5" id="__nav_6_5_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="2" aria-labelledby="__nav_6_5_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_6_5">
+            <span class="md-nav__icon md-icon"></span>
+            Document Analysis
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../document_analysis/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../excluding_by_document_type/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Excluding by DocumentType
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../splitting_input_text/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Splitting Input Text
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../../ignoring_sensitive_information/" class="md-nav__link">
         
diff --git a/policies/filters/common_filters/ip-addresses/index.html b/policies/filters/common_filters/ip-addresses/index.html
index c25c916..919b91c 100644
--- a/policies/filters/common_filters/ip-addresses/index.html
+++ b/policies/filters/common_filters/ip-addresses/index.html
@@ -677,6 +677,112 @@
   
   
   
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6_5" >
+        
+          
+          <label class="md-nav__link" for="__nav_6_5" id="__nav_6_5_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="2" aria-labelledby="__nav_6_5_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_6_5">
+            <span class="md-nav__icon md-icon"></span>
+            Document Analysis
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../document_analysis/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../excluding_by_document_type/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Excluding by DocumentType
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../splitting_input_text/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Splitting Input Text
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../../ignoring_sensitive_information/" class="md-nav__link">
         
diff --git a/policies/filters/common_filters/mac-addresses/index.html b/policies/filters/common_filters/mac-addresses/index.html
index b16c1b8..b4cc6b3 100644
--- a/policies/filters/common_filters/mac-addresses/index.html
+++ b/policies/filters/common_filters/mac-addresses/index.html
@@ -677,6 +677,112 @@
   
   
   
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6_5" >
+        
+          
+          <label class="md-nav__link" for="__nav_6_5" id="__nav_6_5_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="2" aria-labelledby="__nav_6_5_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_6_5">
+            <span class="md-nav__icon md-icon"></span>
+            Document Analysis
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../document_analysis/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../excluding_by_document_type/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Excluding by DocumentType
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../splitting_input_text/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Splitting Input Text
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../../ignoring_sensitive_information/" class="md-nav__link">
         
diff --git a/policies/filters/common_filters/passport-numbers/index.html b/policies/filters/common_filters/passport-numbers/index.html
index 073345b..9e07d1f 100644
--- a/policies/filters/common_filters/passport-numbers/index.html
+++ b/policies/filters/common_filters/passport-numbers/index.html
@@ -677,6 +677,112 @@
   
   
   
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6_5" >
+        
+          
+          <label class="md-nav__link" for="__nav_6_5" id="__nav_6_5_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="2" aria-labelledby="__nav_6_5_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_6_5">
+            <span class="md-nav__icon md-icon"></span>
+            Document Analysis
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../document_analysis/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../excluding_by_document_type/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Excluding by DocumentType
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../splitting_input_text/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Splitting Input Text
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../../ignoring_sensitive_information/" class="md-nav__link">
         
diff --git a/policies/filters/common_filters/phone-number-extensions/index.html b/policies/filters/common_filters/phone-number-extensions/index.html
index 28cffce..a015261 100644
--- a/policies/filters/common_filters/phone-number-extensions/index.html
+++ b/policies/filters/common_filters/phone-number-extensions/index.html
@@ -677,6 +677,112 @@
   
   
   
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6_5" >
+        
+          
+          <label class="md-nav__link" for="__nav_6_5" id="__nav_6_5_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="2" aria-labelledby="__nav_6_5_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_6_5">
+            <span class="md-nav__icon md-icon"></span>
+            Document Analysis
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../document_analysis/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../excluding_by_document_type/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Excluding by DocumentType
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../splitting_input_text/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Splitting Input Text
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../../ignoring_sensitive_information/" class="md-nav__link">
         
diff --git a/policies/filters/common_filters/phone-numbers/index.html b/policies/filters/common_filters/phone-numbers/index.html
index 8737428..eb6b442 100644
--- a/policies/filters/common_filters/phone-numbers/index.html
+++ b/policies/filters/common_filters/phone-numbers/index.html
@@ -677,6 +677,112 @@
   
   
   
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6_5" >
+        
+          
+          <label class="md-nav__link" for="__nav_6_5" id="__nav_6_5_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="2" aria-labelledby="__nav_6_5_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_6_5">
+            <span class="md-nav__icon md-icon"></span>
+            Document Analysis
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../document_analysis/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../excluding_by_document_type/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Excluding by DocumentType
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../splitting_input_text/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Splitting Input Text
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../../ignoring_sensitive_information/" class="md-nav__link">
         
diff --git a/policies/filters/common_filters/sections/index.html b/policies/filters/common_filters/sections/index.html
index 4acbfe2..6502605 100644
--- a/policies/filters/common_filters/sections/index.html
+++ b/policies/filters/common_filters/sections/index.html
@@ -677,6 +677,112 @@
   
   
   
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6_5" >
+        
+          
+          <label class="md-nav__link" for="__nav_6_5" id="__nav_6_5_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="2" aria-labelledby="__nav_6_5_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_6_5">
+            <span class="md-nav__icon md-icon"></span>
+            Document Analysis
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../document_analysis/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../excluding_by_document_type/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Excluding by DocumentType
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../splitting_input_text/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Splitting Input Text
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../../ignoring_sensitive_information/" class="md-nav__link">
         
diff --git a/policies/filters/common_filters/ssns-and-tins/index.html b/policies/filters/common_filters/ssns-and-tins/index.html
index 1121dec..8ef2e9e 100644
--- a/policies/filters/common_filters/ssns-and-tins/index.html
+++ b/policies/filters/common_filters/ssns-and-tins/index.html
@@ -677,6 +677,112 @@
   
   
   
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6_5" >
+        
+          
+          <label class="md-nav__link" for="__nav_6_5" id="__nav_6_5_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="2" aria-labelledby="__nav_6_5_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_6_5">
+            <span class="md-nav__icon md-icon"></span>
+            Document Analysis
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../document_analysis/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../excluding_by_document_type/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Excluding by DocumentType
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../splitting_input_text/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Splitting Input Text
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../../ignoring_sensitive_information/" class="md-nav__link">
         
diff --git a/policies/filters/common_filters/tracking-numbers/index.html b/policies/filters/common_filters/tracking-numbers/index.html
index 1092d7f..225f568 100644
--- a/policies/filters/common_filters/tracking-numbers/index.html
+++ b/policies/filters/common_filters/tracking-numbers/index.html
@@ -677,6 +677,112 @@
   
   
   
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6_5" >
+        
+          
+          <label class="md-nav__link" for="__nav_6_5" id="__nav_6_5_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="2" aria-labelledby="__nav_6_5_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_6_5">
+            <span class="md-nav__icon md-icon"></span>
+            Document Analysis
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../document_analysis/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../excluding_by_document_type/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Excluding by DocumentType
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../splitting_input_text/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Splitting Input Text
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../../ignoring_sensitive_information/" class="md-nav__link">
         
diff --git a/policies/filters/common_filters/urls/index.html b/policies/filters/common_filters/urls/index.html
index b379e4a..60db3a0 100644
--- a/policies/filters/common_filters/urls/index.html
+++ b/policies/filters/common_filters/urls/index.html
@@ -677,6 +677,112 @@
   
   
   
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6_5" >
+        
+          
+          <label class="md-nav__link" for="__nav_6_5" id="__nav_6_5_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="2" aria-labelledby="__nav_6_5_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_6_5">
+            <span class="md-nav__icon md-icon"></span>
+            Document Analysis
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../document_analysis/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../excluding_by_document_type/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Excluding by DocumentType
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../splitting_input_text/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Splitting Input Text
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../../ignoring_sensitive_information/" class="md-nav__link">
         
diff --git a/policies/filters/common_filters/vins/index.html b/policies/filters/common_filters/vins/index.html
index bd666b6..0e4828e 100644
--- a/policies/filters/common_filters/vins/index.html
+++ b/policies/filters/common_filters/vins/index.html
@@ -677,6 +677,112 @@
   
   
   
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6_5" >
+        
+          
+          <label class="md-nav__link" for="__nav_6_5" id="__nav_6_5_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="2" aria-labelledby="__nav_6_5_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_6_5">
+            <span class="md-nav__icon md-icon"></span>
+            Document Analysis
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../document_analysis/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../excluding_by_document_type/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Excluding by DocumentType
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../splitting_input_text/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Splitting Input Text
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../../ignoring_sensitive_information/" class="md-nav__link">
         
diff --git a/policies/filters/common_filters/zip-codes/index.html b/policies/filters/common_filters/zip-codes/index.html
index 9663c22..a81cbd8 100644
--- a/policies/filters/common_filters/zip-codes/index.html
+++ b/policies/filters/common_filters/zip-codes/index.html
@@ -677,6 +677,112 @@
   
   
   
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6_5" >
+        
+          
+          <label class="md-nav__link" for="__nav_6_5" id="__nav_6_5_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="2" aria-labelledby="__nav_6_5_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_6_5">
+            <span class="md-nav__icon md-icon"></span>
+            Document Analysis
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../document_analysis/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../excluding_by_document_type/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Excluding by DocumentType
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../splitting_input_text/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Splitting Input Text
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../../ignoring_sensitive_information/" class="md-nav__link">
         
diff --git a/policies/filters/custom_filters/dictionary/index.html b/policies/filters/custom_filters/dictionary/index.html
index 1ebc4a0..92fd002 100644
--- a/policies/filters/custom_filters/dictionary/index.html
+++ b/policies/filters/custom_filters/dictionary/index.html
@@ -677,6 +677,112 @@
   
   
   
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6_5" >
+        
+          
+          <label class="md-nav__link" for="__nav_6_5" id="__nav_6_5_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="2" aria-labelledby="__nav_6_5_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_6_5">
+            <span class="md-nav__icon md-icon"></span>
+            Document Analysis
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../document_analysis/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../excluding_by_document_type/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Excluding by DocumentType
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../splitting_input_text/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Splitting Input Text
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../../ignoring_sensitive_information/" class="md-nav__link">
         
diff --git a/policies/filters/custom_filters/identifier/index.html b/policies/filters/custom_filters/identifier/index.html
index 96eec9d..0671fa8 100644
--- a/policies/filters/custom_filters/identifier/index.html
+++ b/policies/filters/custom_filters/identifier/index.html
@@ -677,6 +677,112 @@
   
   
   
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6_5" >
+        
+          
+          <label class="md-nav__link" for="__nav_6_5" id="__nav_6_5_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="2" aria-labelledby="__nav_6_5_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_6_5">
+            <span class="md-nav__icon md-icon"></span>
+            Document Analysis
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../document_analysis/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../excluding_by_document_type/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Excluding by DocumentType
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../splitting_input_text/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Splitting Input Text
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../../ignoring_sensitive_information/" class="md-nav__link">
         
diff --git a/policies/filters/index.html b/policies/filters/index.html
index 4661aac..bb1268c 100644
--- a/policies/filters/index.html
+++ b/policies/filters/index.html
@@ -765,6 +765,112 @@
   
   
   
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6_5" >
+        
+          
+          <label class="md-nav__link" for="__nav_6_5" id="__nav_6_5_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="2" aria-labelledby="__nav_6_5_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_6_5">
+            <span class="md-nav__icon md-icon"></span>
+            Document Analysis
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../document_analysis/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../excluding_by_document_type/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Excluding by DocumentType
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../splitting_input_text/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Splitting Input Text
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
     <li class="md-nav__item">
       <a href="../ignoring_sensitive_information/" class="md-nav__link">
         
diff --git a/policies/filters/locations/cities/index.html b/policies/filters/locations/cities/index.html
index a885a1a..8a198ed 100644
--- a/policies/filters/locations/cities/index.html
+++ b/policies/filters/locations/cities/index.html
@@ -677,6 +677,112 @@
   
   
   
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6_5" >
+        
+          
+          <label class="md-nav__link" for="__nav_6_5" id="__nav_6_5_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="2" aria-labelledby="__nav_6_5_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_6_5">
+            <span class="md-nav__icon md-icon"></span>
+            Document Analysis
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../document_analysis/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../excluding_by_document_type/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Excluding by DocumentType
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../splitting_input_text/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Splitting Input Text
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../../ignoring_sensitive_information/" class="md-nav__link">
         
diff --git a/policies/filters/locations/counties/index.html b/policies/filters/locations/counties/index.html
index a685d85..04c25b6 100644
--- a/policies/filters/locations/counties/index.html
+++ b/policies/filters/locations/counties/index.html
@@ -677,6 +677,112 @@
   
   
   
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6_5" >
+        
+          
+          <label class="md-nav__link" for="__nav_6_5" id="__nav_6_5_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="2" aria-labelledby="__nav_6_5_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_6_5">
+            <span class="md-nav__icon md-icon"></span>
+            Document Analysis
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../document_analysis/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../excluding_by_document_type/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Excluding by DocumentType
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../splitting_input_text/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Splitting Input Text
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../../ignoring_sensitive_information/" class="md-nav__link">
         
diff --git a/policies/filters/locations/hospital-abbreviations/index.html b/policies/filters/locations/hospital-abbreviations/index.html
index c43a1c2..613b9f9 100644
--- a/policies/filters/locations/hospital-abbreviations/index.html
+++ b/policies/filters/locations/hospital-abbreviations/index.html
@@ -677,6 +677,112 @@
   
   
   
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6_5" >
+        
+          
+          <label class="md-nav__link" for="__nav_6_5" id="__nav_6_5_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="2" aria-labelledby="__nav_6_5_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_6_5">
+            <span class="md-nav__icon md-icon"></span>
+            Document Analysis
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../document_analysis/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../excluding_by_document_type/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Excluding by DocumentType
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../splitting_input_text/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Splitting Input Text
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../../ignoring_sensitive_information/" class="md-nav__link">
         
diff --git a/policies/filters/locations/hospitals/index.html b/policies/filters/locations/hospitals/index.html
index 017b249..3fbe938 100644
--- a/policies/filters/locations/hospitals/index.html
+++ b/policies/filters/locations/hospitals/index.html
@@ -677,6 +677,112 @@
   
   
   
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6_5" >
+        
+          
+          <label class="md-nav__link" for="__nav_6_5" id="__nav_6_5_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="2" aria-labelledby="__nav_6_5_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_6_5">
+            <span class="md-nav__icon md-icon"></span>
+            Document Analysis
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../document_analysis/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../excluding_by_document_type/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Excluding by DocumentType
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../splitting_input_text/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Splitting Input Text
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../../ignoring_sensitive_information/" class="md-nav__link">
         
diff --git a/policies/filters/locations/state-abbreviations/index.html b/policies/filters/locations/state-abbreviations/index.html
index f96e526..bb08b1a 100644
--- a/policies/filters/locations/state-abbreviations/index.html
+++ b/policies/filters/locations/state-abbreviations/index.html
@@ -677,6 +677,112 @@
   
   
   
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6_5" >
+        
+          
+          <label class="md-nav__link" for="__nav_6_5" id="__nav_6_5_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="2" aria-labelledby="__nav_6_5_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_6_5">
+            <span class="md-nav__icon md-icon"></span>
+            Document Analysis
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../document_analysis/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../excluding_by_document_type/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Excluding by DocumentType
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../splitting_input_text/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Splitting Input Text
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../../ignoring_sensitive_information/" class="md-nav__link">
         
diff --git a/policies/filters/locations/states/index.html b/policies/filters/locations/states/index.html
index 5695def..adbd3e9 100644
--- a/policies/filters/locations/states/index.html
+++ b/policies/filters/locations/states/index.html
@@ -677,6 +677,112 @@
   
   
   
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6_5" >
+        
+          
+          <label class="md-nav__link" for="__nav_6_5" id="__nav_6_5_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="2" aria-labelledby="__nav_6_5_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_6_5">
+            <span class="md-nav__icon md-icon"></span>
+            Document Analysis
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../document_analysis/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../excluding_by_document_type/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Excluding by DocumentType
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../splitting_input_text/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Splitting Input Text
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../../ignoring_sensitive_information/" class="md-nav__link">
         
diff --git a/policies/filters/persons_names/first-names/index.html b/policies/filters/persons_names/first-names/index.html
index def5fcf..d3d0d71 100644
--- a/policies/filters/persons_names/first-names/index.html
+++ b/policies/filters/persons_names/first-names/index.html
@@ -677,6 +677,112 @@
   
   
   
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6_5" >
+        
+          
+          <label class="md-nav__link" for="__nav_6_5" id="__nav_6_5_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="2" aria-labelledby="__nav_6_5_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_6_5">
+            <span class="md-nav__icon md-icon"></span>
+            Document Analysis
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../document_analysis/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../excluding_by_document_type/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Excluding by DocumentType
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../splitting_input_text/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Splitting Input Text
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../../ignoring_sensitive_information/" class="md-nav__link">
         
diff --git a/policies/filters/persons_names/persons-names-ner/index.html b/policies/filters/persons_names/persons-names-ner/index.html
index a37c40b..21ecc56 100644
--- a/policies/filters/persons_names/persons-names-ner/index.html
+++ b/policies/filters/persons_names/persons-names-ner/index.html
@@ -677,6 +677,112 @@
   
   
   
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6_5" >
+        
+          
+          <label class="md-nav__link" for="__nav_6_5" id="__nav_6_5_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="2" aria-labelledby="__nav_6_5_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_6_5">
+            <span class="md-nav__icon md-icon"></span>
+            Document Analysis
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../document_analysis/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../excluding_by_document_type/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Excluding by DocumentType
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../splitting_input_text/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Splitting Input Text
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../../ignoring_sensitive_information/" class="md-nav__link">
         
diff --git a/policies/filters/persons_names/physician-names-ner/index.html b/policies/filters/persons_names/physician-names-ner/index.html
index 809afdc..e4dd21b 100644
--- a/policies/filters/persons_names/physician-names-ner/index.html
+++ b/policies/filters/persons_names/physician-names-ner/index.html
@@ -677,6 +677,112 @@
   
   
   
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6_5" >
+        
+          
+          <label class="md-nav__link" for="__nav_6_5" id="__nav_6_5_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="2" aria-labelledby="__nav_6_5_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_6_5">
+            <span class="md-nav__icon md-icon"></span>
+            Document Analysis
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../document_analysis/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../excluding_by_document_type/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Excluding by DocumentType
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../splitting_input_text/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Splitting Input Text
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../../ignoring_sensitive_information/" class="md-nav__link">
         
diff --git a/policies/filters/persons_names/surnames/index.html b/policies/filters/persons_names/surnames/index.html
index 353a3d2..56501e6 100644
--- a/policies/filters/persons_names/surnames/index.html
+++ b/policies/filters/persons_names/surnames/index.html
@@ -677,6 +677,112 @@
   
   
   
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6_5" >
+        
+          
+          <label class="md-nav__link" for="__nav_6_5" id="__nav_6_5_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="2" aria-labelledby="__nav_6_5_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_6_5">
+            <span class="md-nav__icon md-icon"></span>
+            Document Analysis
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../document_analysis/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../excluding_by_document_type/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Excluding by DocumentType
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../../splitting_input_text/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Splitting Input Text
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../../ignoring_sensitive_information/" class="md-nav__link">
         
diff --git a/policies/ignoring_sensitive_information/index.html b/policies/ignoring_sensitive_information/index.html
index dec33a8..7ec5f88 100644
--- a/policies/ignoring_sensitive_information/index.html
+++ b/policies/ignoring_sensitive_information/index.html
@@ -15,7 +15,7 @@
         <link rel="canonical" href="https://philterd.github.io/philter/policies/ignoring_sensitive_information/">
       
       
-        <link rel="prev" href="../filter_strategies/">
+        <link rel="prev" href="../splitting_input_text/">
       
       
         <link rel="next" href="../../other_features/alerts/">
@@ -681,6 +681,112 @@
                 
   
   
+  
+  
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6_5" >
+        
+          
+          <label class="md-nav__link" for="__nav_6_5" id="__nav_6_5_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="2" aria-labelledby="__nav_6_5_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_6_5">
+            <span class="md-nav__icon md-icon"></span>
+            Document Analysis
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../document_analysis/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../excluding_by_document_type/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Excluding by DocumentType
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../splitting_input_text/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Splitting Input Text
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
     
   
   
diff --git a/policies/sample_policies/index.html b/policies/sample_policies/index.html
index 4dee7a2..f9acc5c 100644
--- a/policies/sample_policies/index.html
+++ b/policies/sample_policies/index.html
@@ -795,6 +795,112 @@
   
   
   
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6_5" >
+        
+          
+          <label class="md-nav__link" for="__nav_6_5" id="__nav_6_5_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="2" aria-labelledby="__nav_6_5_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_6_5">
+            <span class="md-nav__icon md-icon"></span>
+            Document Analysis
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../document_analysis/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../excluding_by_document_type/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Excluding by DocumentType
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../splitting_input_text/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Splitting Input Text
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
     <li class="md-nav__item">
       <a href="../ignoring_sensitive_information/" class="md-nav__link">
         
diff --git a/policies/splitting_input_text/index.html b/policies/splitting_input_text/index.html
new file mode 100644
index 0000000..cf9d97c
--- /dev/null
+++ b/policies/splitting_input_text/index.html
@@ -0,0 +1,1178 @@
+
+<!doctype html>
+<html lang="en" class="no-js">
+  <head>
+    
+      <meta charset="utf-8">
+      <meta name="viewport" content="width=device-width,initial-scale=1">
+      
+        <meta name="description" content="User guide for Philter, the open source PII/PHI redaction engine.">
+      
+      
+        <meta name="author" content="philterd">
+      
+      
+        <link rel="canonical" href="https://philterd.github.io/philter/policies/splitting_input_text/">
+      
+      
+        <link rel="prev" href="../excluding_by_document_type/">
+      
+      
+        <link rel="next" href="../ignoring_sensitive_information/">
+      
+      
+      <link rel="icon" href="../../assets/images/favicon.png">
+      <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.5.42">
+    
+    
+      
+        <title>Splitting Input Text - Philter</title>
+      
+    
+    
+      <link rel="stylesheet" href="../../assets/stylesheets/main.0253249f.min.css">
+      
+        
+        <link rel="stylesheet" href="../../assets/stylesheets/palette.06af60db.min.css">
+      
+      
+
+
+    
+    
+      
+    
+    
+      
+        
+        
+        <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
+        <link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Roboto:300,300i,400,400i,700,700i%7CRoboto+Mono:400,400i,700,700i&display=fallback">
+        <style>:root{--md-text-font:"Roboto";--md-code-font:"Roboto Mono"}</style>
+      
+    
+    
+    <script>__md_scope=new URL("../..",location),__md_hash=e=>[...e].reduce(((e,_)=>(e<<5)-e+_.charCodeAt(0)),0),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script>
+    
+      
+
+    
+    
+    
+  </head>
+  
+  
+    
+    
+    
+    
+    
+    <body dir="ltr" data-md-color-scheme="default" data-md-color-primary="red" data-md-color-accent="indigo">
+  
+    
+    <input class="md-toggle" data-md-toggle="drawer" type="checkbox" id="__drawer" autocomplete="off">
+    <input class="md-toggle" data-md-toggle="search" type="checkbox" id="__search" autocomplete="off">
+    <label class="md-overlay" for="__drawer"></label>
+    <div data-md-component="skip">
+      
+        
+        <a href="#splitting-input-text" class="md-skip">
+          Skip to content
+        </a>
+      
+    </div>
+    <div data-md-component="announce">
+      
+    </div>
+    
+    
+      
+
+  
+
+<header class="md-header md-header--shadow" data-md-component="header">
+  <nav class="md-header__inner md-grid" aria-label="Header">
+    <a href="../.." title="Philter" class="md-header__button md-logo" aria-label="Philter" data-md-component="logo">
+      
+  
+  <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3 3 3 0 0 0 3 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54"/></svg>
+
+    </a>
+    <label class="md-header__button md-icon" for="__drawer">
+      
+      <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 6h18v2H3zm0 5h18v2H3zm0 5h18v2H3z"/></svg>
+    </label>
+    <div class="md-header__title" data-md-component="header-title">
+      <div class="md-header__ellipsis">
+        <div class="md-header__topic">
+          <span class="md-ellipsis">
+            Philter
+          </span>
+        </div>
+        <div class="md-header__topic" data-md-component="header-topic">
+          <span class="md-ellipsis">
+            
+              Splitting Input Text
+            
+          </span>
+        </div>
+      </div>
+    </div>
+    
+      
+    
+    
+    
+    
+      <label class="md-header__button md-icon" for="__search">
+        
+        <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.52 6.52 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5"/></svg>
+      </label>
+      <div class="md-search" data-md-component="search" role="dialog">
+  <label class="md-search__overlay" for="__search"></label>
+  <div class="md-search__inner" role="search">
+    <form class="md-search__form" name="search">
+      <input type="text" class="md-search__input" name="query" aria-label="Search" placeholder="Search" autocapitalize="off" autocorrect="off" autocomplete="off" spellcheck="false" data-md-component="search-query" required>
+      <label class="md-search__icon md-icon" for="__search">
+        
+        <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.52 6.52 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5"/></svg>
+        
+        <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11z"/></svg>
+      </label>
+      <nav class="md-search__options" aria-label="Search">
+        
+        <button type="reset" class="md-search__icon md-icon" title="Clear" aria-label="Clear" tabindex="-1">
+          
+          <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41 17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12z"/></svg>
+        </button>
+      </nav>
+      
+    </form>
+    <div class="md-search__output">
+      <div class="md-search__scrollwrap" tabindex="0" data-md-scrollfix>
+        <div class="md-search-result" data-md-component="search-result">
+          <div class="md-search-result__meta">
+            Initializing search
+          </div>
+          <ol class="md-search-result__list" role="presentation"></ol>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
+    
+    
+      <div class="md-header__source">
+        <a href="https://github.com/philterd/philter" title="Go to repository" class="md-source" data-md-component="source">
+  <div class="md-source__icon md-icon">
+    
+    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 6.6.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2024 Fonticons, Inc.--><path d="M439.55 236.05 244 40.45a28.87 28.87 0 0 0-40.81 0l-40.66 40.63 51.52 51.52c27.06-9.14 52.68 16.77 43.39 43.68l49.66 49.66c34.23-11.8 61.18 31 35.47 56.69-26.49 26.49-70.21-2.87-56-37.34L240.22 199v121.85c25.3 12.54 22.26 41.85 9.08 55a34.34 34.34 0 0 1-48.55 0c-17.57-17.6-11.07-46.91 11.25-56v-123c-20.8-8.51-24.6-30.74-18.64-45L142.57 101 8.45 235.14a28.86 28.86 0 0 0 0 40.81l195.61 195.6a28.86 28.86 0 0 0 40.8 0l194.69-194.69a28.86 28.86 0 0 0 0-40.81"/></svg>
+  </div>
+  <div class="md-source__repository">
+    GitHub
+  </div>
+</a>
+      </div>
+    
+  </nav>
+  
+</header>
+    
+    <div class="md-container" data-md-component="container">
+      
+      
+        
+          
+        
+      
+      <main class="md-main" data-md-component="main">
+        <div class="md-main__inner md-grid">
+          
+            
+              
+              <div class="md-sidebar md-sidebar--primary" data-md-component="sidebar" data-md-type="navigation" >
+                <div class="md-sidebar__scrollwrap">
+                  <div class="md-sidebar__inner">
+                    
+
+
+
+<nav class="md-nav md-nav--primary" aria-label="Navigation" data-md-level="0">
+  <label class="md-nav__title" for="__drawer">
+    <a href="../.." title="Philter" class="md-nav__button md-logo" aria-label="Philter" data-md-component="logo">
+      
+  
+  <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a3 3 0 0 0 3-3 3 3 0 0 0-3-3 3 3 0 0 0-3 3 3 3 0 0 0 3 3m0 3.54C9.64 9.35 6.5 8 3 8v11c3.5 0 6.64 1.35 9 3.54 2.36-2.19 5.5-3.54 9-3.54V8c-3.5 0-6.64 1.35-9 3.54"/></svg>
+
+    </a>
+    Philter
+  </label>
+  
+    <div class="md-nav__source">
+      <a href="https://github.com/philterd/philter" title="Go to repository" class="md-source" data-md-component="source">
+  <div class="md-source__icon md-icon">
+    
+    <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 6.6.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2024 Fonticons, Inc.--><path d="M439.55 236.05 244 40.45a28.87 28.87 0 0 0-40.81 0l-40.66 40.63 51.52 51.52c27.06-9.14 52.68 16.77 43.39 43.68l49.66 49.66c34.23-11.8 61.18 31 35.47 56.69-26.49 26.49-70.21-2.87-56-37.34L240.22 199v121.85c25.3 12.54 22.26 41.85 9.08 55a34.34 34.34 0 0 1-48.55 0c-17.57-17.6-11.07-46.91 11.25-56v-123c-20.8-8.51-24.6-30.74-18.64-45L142.57 101 8.45 235.14a28.86 28.86 0 0 0 0 40.81l195.61 195.6a28.86 28.86 0 0 0 40.8 0l194.69-194.69a28.86 28.86 0 0 0 0-40.81"/></svg>
+  </div>
+  <div class="md-source__repository">
+    GitHub
+  </div>
+</a>
+    </div>
+  
+  <ul class="md-nav__list" data-md-scrollfix>
+    
+      
+      
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../.." class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Home
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+    
+      
+      
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../upgrading/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Upgrading
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+    
+      
+      
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../system_requirements/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    System Requirements
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+    
+      
+      
+  
+  
+  
+  
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_4" >
+        
+          
+          <label class="md-nav__link" for="__nav_4" id="__nav_4_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    Quick Starts
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_4_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_4">
+            <span class="md-nav__icon md-icon"></span>
+            Quick Starts
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../quick_starts/quick_start_aws/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Philter Quick Start on AWS
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../quick_starts/quick_start_azure/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Philter Quick Start on Microsoft Azure
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../quick_starts/quick_start_gcp/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Philter Quick Start on Google Cloud
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+    
+      
+      
+  
+  
+  
+  
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_5" >
+        
+          
+          <label class="md-nav__link" for="__nav_5" id="__nav_5_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    PII and De-identification
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_5_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_5">
+            <span class="md-nav__icon md-icon"></span>
+            PII and De-identification
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../deidentification/pii_phi_nppi/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    PII, PHI, and NPPI
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../deidentification/deidentification/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Deidentification
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_5_3" >
+        
+          
+          <label class="md-nav__link" for="__nav_5_3" id="__nav_5_3_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    Deidentification Methods
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="2" aria-labelledby="__nav_5_3_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_5_3">
+            <span class="md-nav__icon md-icon"></span>
+            Deidentification Methods
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../deidentification/bucketing/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Bucketing
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../deidentification/date-shifting/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Date Shifting
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+    
+      
+      
+  
+  
+    
+  
+  
+  
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--active md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6" checked>
+        
+          
+          <label class="md-nav__link" for="__nav_6" id="__nav_6_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    Policies
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_6_label" aria-expanded="true">
+          <label class="md-nav__title" for="__nav_6">
+            <span class="md-nav__icon md-icon"></span>
+            Policies
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../sample_policies/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Sample Policies
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../filter_policies/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Filter Policies
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../filters/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Filters
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../filter_strategies/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Filter Strategies
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6_5" >
+        
+          
+          <label class="md-nav__link" for="__nav_6_5" id="__nav_6_5_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="2" aria-labelledby="__nav_6_5_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_6_5">
+            <span class="md-nav__icon md-icon"></span>
+            Document Analysis
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../document_analysis/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../excluding_by_document_type/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Excluding by DocumentType
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+    
+  
+  
+  
+    <li class="md-nav__item md-nav__item--active">
+      
+      <input class="md-nav__toggle md-toggle" type="checkbox" id="__toc">
+      
+      
+        
+      
+      
+        <label class="md-nav__link md-nav__link--active" for="__toc">
+          
+  
+  <span class="md-ellipsis">
+    Splitting Input Text
+  </span>
+  
+
+          <span class="md-nav__icon md-icon"></span>
+        </label>
+      
+      <a href="./" class="md-nav__link md-nav__link--active">
+        
+  
+  <span class="md-ellipsis">
+    Splitting Input Text
+  </span>
+  
+
+      </a>
+      
+        
+
+<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
+  
+  
+  
+    
+  
+  
+    <label class="md-nav__title" for="__toc">
+      <span class="md-nav__icon md-icon"></span>
+      Table of contents
+    </label>
+    <ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
+      
+        <li class="md-nav__item">
+  <a href="#text-splitting-policy-properties" class="md-nav__link">
+    <span class="md-ellipsis">
+      Text Splitting Policy Properties
+    </span>
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#alternative-to-philter-splitting-text" class="md-nav__link">
+    <span class="md-ellipsis">
+      Alternative to Philter Splitting Text
+    </span>
+  </a>
+  
+</li>
+      
+    </ul>
+  
+</nav>
+      
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../ignoring_sensitive_information/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Ignoring Sensitive Information
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+    
+      
+      
+  
+  
+  
+  
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_7" >
+        
+          
+          <label class="md-nav__link" for="__nav_7" id="__nav_7_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    Other Features
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_7_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_7">
+            <span class="md-nav__icon md-icon"></span>
+            Other Features
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../other_features/alerts/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Alerts
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../other_features/anonymization/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Anonymization
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../other_features/span_disambiguation/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Span Disambiguation
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+    
+      
+      
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../settings/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Settings
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+    
+  </ul>
+</nav>
+                  </div>
+                </div>
+              </div>
+            
+            
+              
+              <div class="md-sidebar md-sidebar--secondary" data-md-component="sidebar" data-md-type="toc" >
+                <div class="md-sidebar__scrollwrap">
+                  <div class="md-sidebar__inner">
+                    
+
+<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
+  
+  
+  
+    
+  
+  
+    <label class="md-nav__title" for="__toc">
+      <span class="md-nav__icon md-icon"></span>
+      Table of contents
+    </label>
+    <ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
+      
+        <li class="md-nav__item">
+  <a href="#text-splitting-policy-properties" class="md-nav__link">
+    <span class="md-ellipsis">
+      Text Splitting Policy Properties
+    </span>
+  </a>
+  
+</li>
+      
+        <li class="md-nav__item">
+  <a href="#alternative-to-philter-splitting-text" class="md-nav__link">
+    <span class="md-ellipsis">
+      Alternative to Philter Splitting Text
+    </span>
+  </a>
+  
+</li>
+      
+    </ul>
+  
+</nav>
+                  </div>
+                </div>
+              </div>
+            
+          
+          
+            <div class="md-content" data-md-component="content">
+              <article class="md-content__inner md-typeset">
+                
+                  
+
+  
+  
+
+
+<h1 id="splitting-input-text">Splitting Input Text</h1>
+<p>On a per-policy basis, Philter can split input text to process each split individually. This can improve performance and allows for handling long input text. Splitting is disabled by default.</p>
+<p>An example split configuration in a policy is shown below</p>
+<pre><code>{
+  &quot;name&quot;: &quot;default&quot;,
+  &quot;identifiers&quot;: {}, 
+  &quot;config&quot;: {
+    &quot;splitting&quot;: {
+      &quot;enabled&quot;: true,
+      &quot;threshold&quot;: 10000,
+      &quot;method&quot;: &quot;newline&quot;
+    }
+  }
+}
+</code></pre>
+<p>In this example policy, splitting is enabled for inputs greater than equal to 10,000 characters in length.</p>
+<p>The method of splitting the text will be the <code>newline</code> method. This method will cause Philter to split the text based on the locations of new line characters in the input text. Additional methods of text splitting may be added in future versions.</p>
+<p>Because the newline method splits text based on the locations of new line characters in the text, the text contained in the reassembled filter responses may not be an exact match of the input text. This is due to white space and other characters that may reside near the new line characters that get omitted during processing.</p>
+<h3 id="text-splitting-policy-properties">Text Splitting Policy Properties</h3>
+<table>
+<thead>
+<tr>
+<th>Property</th>
+<th>Description</th>
+<th>Allowed Values</th>
+<th>Default Value</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td><code>enabled</code></td>
+<td>Whether or not input texts are split. Whether or not input texts are split. When <code>false</code>, requests with text exceeding the threshold generate a <code>HTTP 413 PayloadTooLarge</code> error response.</td>
+<td><code>true</code> or <code>false</code></td>
+<td><code>false</code></td>
+</tr>
+<tr>
+<td><code>threshold</code></td>
+<td>When to split the input text. Set to <code>-1</code> to disable splitting.</td>
+<td>Any integer value.</td>
+<td><code>10000</code></td>
+</tr>
+<tr>
+<td><code>method</code></td>
+<td>How to split the text.</td>
+<td><code>newline</code></td>
+<td><code>newline</code></td>
+</tr>
+</tbody>
+</table>
+<h3 id="alternative-to-philter-splitting-text">Alternative to Philter Splitting Text</h3>
+<p>In some cases it may be best to split your input text client side prior to sending the text to Philter. This gives you full control over how the text will be split and provides more predictable responses from Philter because you know how the text is split.</p>
+<p>An example of splitting text into chunks prior to sending the text to Philter is given in the commands below:</p>
+<pre><code># Given a large file called largefile.txt, split it into 10k pieces.
+$ split -b 10k largefile.txt segment
+
+# Now process the pieces.
+$ curl -s -X POST -k &quot;https://philter:8080/api/filter?d=document1&quot; --data &quot;@/tmp/segmentaa&quot; -H &quot;Content-type: text/plain&quot; &gt; out1
+$ curl -s -X POST -k &quot;https://philter:8080/api/filter?d=document1&quot; --data &quot;@/tmp/segmentab&quot; -H &quot;Content-type: text/plain&quot; &gt; out2
+
+# Now recombine the outputs into a single file.
+$ cat out1 out2 &gt; filtered.txt
+</code></pre>
+
+
+
+
+
+
+
+
+
+
+
+
+                
+              </article>
+            </div>
+          
+          
+<script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script>
+        </div>
+        
+      </main>
+      
+        <footer class="md-footer">
+  
+  <div class="md-footer-meta md-typeset">
+    <div class="md-footer-meta__inner md-grid">
+      <div class="md-copyright">
+  
+    <div class="md-copyright__highlight">
+      Copyright 2024 Philterd, LLC
+    </div>
+  
+  
+    Made with
+    <a href="https://squidfunk.github.io/mkdocs-material/" target="_blank" rel="noopener">
+      Material for MkDocs
+    </a>
+  
+</div>
+      
+    </div>
+  </div>
+</footer>
+      
+    </div>
+    <div class="md-dialog" data-md-component="dialog">
+      <div class="md-dialog__inner md-typeset"></div>
+    </div>
+    
+    
+    <script id="__config" type="application/json">{"base": "../..", "features": [], "search": "../../assets/javascripts/workers/search.6ce7567c.min.js", "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}}</script>
+    
+    
+      <script src="../../assets/javascripts/bundle.83f73b43.min.js"></script>
+      
+    
+  </body>
+</html>
\ No newline at end of file
diff --git a/quick_starts/quick_start_aws/index.html b/quick_starts/quick_start_aws/index.html
index 7c15691..b8cedaa 100644
--- a/quick_starts/quick_start_aws/index.html
+++ b/quick_starts/quick_start_aws/index.html
@@ -774,6 +774,112 @@
   
   
   
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6_5" >
+        
+          
+          <label class="md-nav__link" for="__nav_6_5" id="__nav_6_5_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="2" aria-labelledby="__nav_6_5_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_6_5">
+            <span class="md-nav__icon md-icon"></span>
+            Document Analysis
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../policies/document_analysis/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../policies/excluding_by_document_type/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Excluding by DocumentType
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../policies/splitting_input_text/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Splitting Input Text
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../policies/ignoring_sensitive_information/" class="md-nav__link">
         
diff --git a/quick_starts/quick_start_azure/index.html b/quick_starts/quick_start_azure/index.html
index 00d94f7..f442529 100644
--- a/quick_starts/quick_start_azure/index.html
+++ b/quick_starts/quick_start_azure/index.html
@@ -774,6 +774,112 @@
   
   
   
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6_5" >
+        
+          
+          <label class="md-nav__link" for="__nav_6_5" id="__nav_6_5_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="2" aria-labelledby="__nav_6_5_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_6_5">
+            <span class="md-nav__icon md-icon"></span>
+            Document Analysis
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../policies/document_analysis/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../policies/excluding_by_document_type/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Excluding by DocumentType
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../policies/splitting_input_text/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Splitting Input Text
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../policies/ignoring_sensitive_information/" class="md-nav__link">
         
diff --git a/quick_starts/quick_start_gcp/index.html b/quick_starts/quick_start_gcp/index.html
index f9bf3a1..2fa56da 100644
--- a/quick_starts/quick_start_gcp/index.html
+++ b/quick_starts/quick_start_gcp/index.html
@@ -774,6 +774,112 @@
   
   
   
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6_5" >
+        
+          
+          <label class="md-nav__link" for="__nav_6_5" id="__nav_6_5_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="2" aria-labelledby="__nav_6_5_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_6_5">
+            <span class="md-nav__icon md-icon"></span>
+            Document Analysis
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../policies/document_analysis/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../policies/excluding_by_document_type/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Excluding by DocumentType
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../../policies/splitting_input_text/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Splitting Input Text
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
     <li class="md-nav__item">
       <a href="../../policies/ignoring_sensitive_information/" class="md-nav__link">
         
diff --git a/search/search_index.json b/search/search_index.json
index e37773d..2a9fd76 100644
--- a/search/search_index.json
+++ b/search/search_index.json
@@ -1 +1 @@
-{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"Philter","text":"<p>Philter is an application that finds, identifies, and removes sensitive information, such as protected health information (PHI) and personally identifiable information (PII), and user-defined sensitive information from natural language text. Philter is ideal for usage in text processing pipelines where sensitive information needs removed, encrypted, or redacted from the text.</p> <p>This documentation applies to Philter 2.4.0. If you are upgrading to this version see Upgrading Philter.</p> <p>To get going fast, jump to the Quick Starts to launch Philter on AWS, Azure, or Google Cloud.</p>"},{"location":"evaluating-performance/","title":"How to Evaluate Phileas' Performance","text":"<p>A common question we receive is how well does Phileas perform? Our answer to this question is probably less than satisfactory because it simply depends. What does it depend on? Phileas' performance is heavily dependent upon your individual data. Sharing to compare metrics of Phileas' performance between different customer datasets is like comparing apples and oranges.</p> <p>If your data is not exactly like another customer's data then the metrics will not be applicable to your data. In terms of the classic information retrieval metrics precision and recall, comparing these values between customers can give false impressions about Phileas' performance, both good and bad.</p> <p>This guide walks you through how to evaluate Phileas' performance. If you are just getting started with Phileas please see the Quick Starts instead. Then you can come back here to learn how to evaluate Phileas' performance.</p>"},{"location":"evaluating-performance/#guide-to-evaluating-performance","title":"Guide to Evaluating Performance","text":"<p>We have created this guide to help guide you in evaluating Phileas' performance on your data. The guide involves determining the types of sensitive information you want to redact, configuring those filters, optimizing the configuration, and then capturing the performance metrics.</p> <p>If you are using Philter we will gladly perform these steps for you and provide you a detailed Phileas performance report generated from your data. Please contact us to start the process.</p>"},{"location":"evaluating-performance/#what-you-need","title":"What You Need","text":"<p>To evaluate Phileas' performance you need:</p> <ul> <li>An application using Phileas.</li> <li>A list of the types of sensitive information you want to redact.</li> <li>A data set representative of the text you will be redacting using Phileas. It's important the data set be representative so the evaluation results will transfer to the actual data redaction.</li> <li>The same data set but with annotated sensitive information. These annotations will be used to calculate the precision and recall metrics.</li> </ul>"},{"location":"evaluating-performance/#configuring-phileas","title":"Configuring Phileas","text":"<p>Before we can begin our evaluation we need to create a policy. A policy is a file that defines the types of sensitive information that will be redacted and how it will be redacted. The policies are stored on the Phileas instance under <code>/opt/Phileas/policies</code>. You can edit the policies directly there using a text editor or you can use Phileas' API to upload a policy. In this case we recommend just using a text editor on the Phileas instance to create a policy.</p> <p>When using a text editor to create and edit a policy, be sure to save the policy often. Frequent saving can make editing a policy easier.</p> <p>We also recommend considering to place your policy directory under source control to have a history and change log of your policies.</p>"},{"location":"evaluating-performance/#creating-a-policy","title":"Creating a Policy","text":"<p>Make a copy of the default policy, and we will modify the copy for our needs.</p> <p><code>cp /opt/Phileas/policies/default.json /opt/Phileas/policies/evaluation.json</code></p> <p>Now open <code>/opt/Phileas/policies/evaluation.json</code> in a text editor. (The content of <code>evaluation.json</code> will be similar to what's shown below but may have minor differences between different versions of Phileas.)</p> <pre><code>{\n   \"name\": \"default\",\n   \"identifiers\": {\n      \"emailAddress\": {\n         \"emailAddressFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      },\n      \"phoneNumber\": {\n         \"phoneNumberFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n</code></pre> <p>The first thing we need to do is to set the name of the policy. Replace <code>default</code> with <code>evaluation</code> and save the file.</p>"},{"location":"evaluating-performance/#identifying-the-filters-you-need","title":"Identifying the Filters You Need","text":"<p>The rest of the file contains the filters that are enabled in the default policy. We need to make sure that each type of sensitive information that you want to redact is represented by a filter in this file. Look through the rest of the policy and determine which filters are listed that you do not need and also which filters you do need that are not listed.</p>"},{"location":"evaluating-performance/#disabling-filters-we-do-not-need","title":"Disabling Filters We Do Not Need","text":"<p>If a filter is listed in the policy and you do not need the filter you have two options. You can either delete those lines from the policy and save the file, or you can set the filter's <code>enabled</code> property to false. Using the <code>enabled</code> property allows you to keep the filter configuration in the policy in case it is needed later but both options have the same effect.</p>"},{"location":"evaluating-performance/#enabling-filters-not-in-the-default-policy","title":"Enabling Filters Not in the Default Policy","text":"<p>Let's say you want to redact bitcoin addresses. The bitcoin address filter is not in the default policy. To add the bitcoin address filter we will refer to Phileas' documentation on the bitcoin address filter, get the configuration, and copy it into the policy.</p> <p>From the bitcoin address filter documentation we see the configuration for the bitcoin address filter is:</p> <pre><code>      \"bitcoinAddress\": {\n         \"bitcoinAddressFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n</code></pre> <p>We can copy this configuration and paste it into our policy:</p> <pre><code>{\n   \"name\": \"evaluation\",\n   \"identifiers\": {\n      \"bitcoinAddress\": {\n         \"bitcoinAddressFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      },\n      \"emailAddress\": {\n         \"emailAddressFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      },\n      \"phoneNumber\": {\n         \"phoneNumberFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n</code></pre> <p>The order of the filters in the policy does not matter and has no impact on performance. We typically place the filters in the policy alphabetically just to improve readability.</p> <p>Repeat these steps until you have added a filter for each of the types of sensitive information you want to redact. Typically, the default redaction <code>strategy</code> and <code>redactionFormat</code> values for each filter should be fine for evaluation.</p> <p>When finished modifying the policy, save the file and close the text editor. Now restart Phileas for the policy changes to be loaded:</p> <pre><code>sudo systemctl restart Phileas\n</code></pre>"},{"location":"evaluating-performance/#submitting-text-for-redaction","title":"Submitting Text for Redaction","text":"<p>With our policy in place we can now send text to Phileas for redaction using that policy:</p> <pre><code>PhileasConfiguration phileasConfiguration = ConfigFactory.create(PhileasConfiguration.class);\n\nFilterService filterService = new PhileasFilterService(phileasConfiguration);\n\nFilterResponse response = filterService.filter(policies, context, documentId, body, MimeType.TEXT_PLAIN);\n</code></pre> <p>The <code>explain</code> API endpoint produces a detailed description of the redaction. The response will include a list of spans that contain the start and stop positions of redacted text and the type of sensitive information that was redacted. Using this information we can compare the redacted information to our annotated file to calculate precision and recall metrics.</p>"},{"location":"evaluating-performance/#calculating-precision-and-recall","title":"Calculating Precision and Recall","text":"<p>Now we can calculate the precision and recall metrics.</p> <ul> <li>Precision is the number of true positives divided by the number true positives plus false positives.</li> <li>Recall is the number of true positives divided by the number of false negatives plus true positives.</li> </ul> <p></p> <ul> <li>The F-1 score is the harmonic mean of precision and recall.</li> </ul> <p></p>"},{"location":"settings/","title":"Settings","text":"<p>Phileas has settings to control how it operates. The settings and how to configure each are described below.</p> <p>The configuration for the types of sensitive information that Phileas identifies are defined in filter policies outside of Phileas' configuration properties described on this page.</p>"},{"location":"settings/#configuring-phileas","title":"Configuring Phileas","text":""},{"location":"settings/#the-phileas-settings-file","title":"The Phileas Settings File","text":"<p>Phileas looks for its settings in an <code>application.properties</code> file.</p>"},{"location":"settings/#using-environment-variables","title":"Using Environment Variables","text":"<p>Properties set via environment variables take precedence over properties set in Phileas' settings file.</p> <p>All following properties can also be set as environment variables by prepending <code>PHILTER_</code> to the property name and changing periods to underscores. For example, the property <code>filter.profiles.directory</code> can be set using the environment variable <code>PHILTER_FILTER_PROFILES_DIRECTORY</code> by:</p> <pre><code>export PHILTER_FILTER_PROFILES_DIRECTORY=/profiles/\n</code></pre> <p>Using environment variables to configure Phileas instead of using Phileas' settings file can allow for easier configuration management when deploying Phileas.</p>"},{"location":"settings/#policies","title":"Policies","text":"Setting Description Allowed Values Default Value <code>filter.policies.directory</code> The directory in which to look for policies. Any valid directory path. <code>./policies/</code>"},{"location":"settings/#span-disambiguation","title":"Span Disambiguation","text":"<p>These values configure Phileas' span disambiguation feature to determine the most appropriate type of sensitive information when duplicate spans are identified. In a deployment of multiple Phileas instances, you must enable the cache service for span disambiguation to work as expected.</p> Description Allowed Values Default Value <code>span.disambiguation.enabled</code> Whether or not to enable span disambiguation. <code>true</code>, <code>false</code> <code>false</code>"},{"location":"settings/#cache-service","title":"Cache Service","text":"<p>The cache service is required to use consistent anonymization and policies stored in Amazon S3. Phileas supports Redis as the backend cache. When Redis is not used, an in-memory cache is used instead. The in-memory cache is not recommended because all contents will be stored in memory on the local Phileas instance.</p> <p>The cache will contain sensitive information. It is important that you take the necessary precautions to secure the cache itself and all communication between Phileas and the cache.</p> Setting Description Allowed Values Default Value <code>cache.redis.enabled</code> Whether or not to use Redis as the cache. <code>true</code>, <code>false</code> <code>false</code> <code>cache.redis.host</code> The hostname or IP address of the Redis cache. Any valid Redis endpoint. None <code>cache.redis.port</code> The Redis cache port. Any valid port. <code>6379</code> <code>cache.redis.auth.token</code> The Redis auth token. Any valid token. None <code>cache.redis.ssl</code> Whether or not to use SSL for communication with the Redis cache. <code>true</code>, <code>false</code> <code>false</code> <p>The following Redis settings are only required when using a self-signed SSL certificate.</p> Setting Description Allowed Values Default Value <code>cache.redis.truststore</code> The path to the trust store. Any valid file path. None <code>cache.redis.truststore.password</code> The trust store password. Any valid file path. None <code>cache.redis.keystore</code> The path to the keystore. Any valid file path. None <code>cache.redis.keystore.password</code> The keystore password. Any valid file path. None"},{"location":"settings/#advanced-settings","title":"Advanced Settings","text":"<p>In most cases the settings below do not need changed. Contact us for more information on any of these settings.</p> Setting Description Allowed Values Default Value <code>ner.timeout.sec</code> Controls the timeout in seconds when performing name entity recognition. Longer text may require longer processing times. An integer value <code>600</code> <code>ner.max.idle.connections</code> The maximum number of idle connections to maintain for the named entity recognition. More connections may improve performance in some cases. An integer value. <code>30</code> <code>ner.keep.alive.duration.ms</code> The amount of time in milliseconds to keep named entity recognition connections alive. Longer text may require longer processing times. An integer value. <code>60</code>"},{"location":"system_requirements/","title":"System Requirements","text":"<p>When launched from a cloud marketplace, Philter is pre-configured and contains all required dependencies.</p> <p>Philter requires the following:</p> <ul> <li>2 vCPU (e.g., m5.large instance type on AWS)</li> <li>8 GB of RAM</li> <li>Java 17</li> </ul>"},{"location":"upgrading/","title":"Upgrading Philter","text":"<p>We recommend reviewing the Philter Release Notes prior to upgrading.</p>"},{"location":"upgrading/#upgrading-from-a-2x-version","title":"Upgrading from a 2.x Version","text":"<p>Upgrading Philter to the newest version requires moving Philter's configuration to the new version of Philter. To upgrade Philter from a 2.x version, follow the steps below.</p> <ol> <li>Launch a new instance of the newest version of Philter.</li> <li>Copy your policies from /opt/philter/policies to the new instance.</li> <li>Copy your /opt/philter/philter.properties to the new instance.</li> <li>Copy your /opt/philter/philter-ui.properties to the new instance.</li> <li>Replace the new virtual machine's properties file with your copy from step 1.</li> <li>Copy your policies from /opt/philter/policies to the new instance.</li> <li>If you have configured any SSL certificates for Philter, copy those files over to the new instance.</li> <li>Restart Philter: sudo systemctl restart philter.service &amp;&amp; sudo systemctl restart philter-ui.service &amp;&amp; sudo systemctl restart philter-ner.service</li> <li>Test the new Philter virtual machine to make sure it is behaving as expected.</li> <li>Decommission the old Philter instance.</li> </ol>"},{"location":"upgrading/#upgrading-from-a-1x-version","title":"Upgrading from a 1.x Version","text":"<p>Upgrading Philter to the newest version requires moving Philter's configuration to the new version of Philter. To upgrade Philter from a 1.x version, follow the steps below.</p> <ol> <li> <p>Make local copies of your current Philter's properties files.</p> </li> <li> <p><code>/opt/philter/philter.properties</code> (prior to 1.10.1 the filename was /opt/philter/application.properties)</p> </li> <li> <p><code>/opt/philter/philter-ui.properties</code> (not applicable prior to version 1.10)</p> </li> <li> <p>Launch a new instance of the newest version of Philter.</p> </li> <li>Replace the new virtual machine's properties file with your copy from step 1.</li> <li>Restart Philter: sudo systemctl restart philter.service sudo systemctl restart philter-ui.service sudo systemctl restart philter-ner.service</li> <li>Test the new Philter virtual machine to make sure it is behaving appropriately.</li> <li>Decommission the old Philter instance.</li> </ol>"},{"location":"deidentification/bucketing/","title":"Bucketing","text":""},{"location":"deidentification/date-shifting/","title":"Date Shifting","text":""},{"location":"deidentification/deidentification/","title":"De-identification Methods","text":"<p>There are several ways data can be de-identified, and which you use depends on the types of data you want to de-identify and your use-case for de-identifying the data. The terminology around the different methods is often used interchangeably, but there are differences between each method.</p> <p>In this User's Guide, we may use the terms <code>filter</code> and <code>redact</code> interchangeably.</p> <p>In Philter, de-identification methods vary for each type of sensitive information. For example, all types can be replaced or redacted, but only dates can be shifted and only zip codes can be truncated. How a de-identification method is applied by Philter is called a filter strategy. Each type of sensitive information can have one or more filter strategies, and the combination of the filter strategies you select is called a policy. A policy determines how a document will be de-identified.</p> <p>The following is a list of de-identification methods that describes how each method works and its applicability to our Philter software. De-identifying a document is likely to require a combination of the following methods. For instance, you may want to redact names, encrypt credit card numbers, and shift appointment dates.</p> De-identification MethodDescriptionReplacementReplaces sensitive information with a defined value. For example, you might want to replace a credit card number with the literal value \"CREDIT_CARD_NUMBER\".Redaction and MaskingRemoves sensitive information. Our Philter software gives you a choice of how to remove the sensitive information, whether it is by replacing it with ***** (masking) or by some other set of characters.EncryptionEncrypts sensitive information.Date ShiftingShifts dates either forward or backward by some interval.BucketingCategorizes data into buckets based on the data. Examples of bucketing is Philter can bucket dates into years, and zip codes by population. <p>A difference between Philter and other services is that Philter does not send your data to a third-party for de-identification. Philter runs in your cloud and your data stays in your cloud.</p>"},{"location":"deidentification/encryption/","title":"Encryption","text":""},{"location":"deidentification/pii_phi_nppi/","title":"PII, PHI, and NPPI","text":"<p>Philter has many predefined types of sensitive information called filters that can be redacted. The individual types are described below.</p> <ul> <li>Personally identifiable information (PII) is any information that could potentially be used to identify a specific person.</li> <li>Protected health information (PHI) is any information about health status, provision of health care, or payment for health care that can be linked to an individual. The Health Insurance Portability And Accountability Act (HIPAA) defines 18 types of PHI.</li> </ul>"},{"location":"deidentification/pii_phi_nppi/#predefined-types-of-pii-and-phi","title":"Predefined Types of PII and PHI","text":"<p>The types of sensitive information that Philter will identify is customizable. For example, if you are not interested in VIN numbers you can have Philter ignore them. This configuration is performed through Policies.</p> <p>Because Philter only operates on text, the biometric identifiers and face images outlined in the HIPAA regulations as PHI are not applicable to Philter. The types of sensitive information and how Philter identifies each one is listed in the table below.</p> Type of PHI How Philter Identifies It 1 <p>Names</p><p>Ex: John Smith, Jane Doe</p> <ul><li>Philter identifies names in natural language text using state of the art machine learning algorithms and natural language processing techniques to identify named-person entities.</li><li>Philter also uses common first name and surname dictionaries with spellcheck capability to identify common names per the US census.</li></ul> 2 <p>All geographical identifiers smaller than a state, except for the initial three digits of a zip code if, according to the current publicly available data from the U.S. Bureau of the Census: the geographic unit formed by combining all zip codes with the same three initial digits contains more than 20,000 people; and the initial three digits of a zip code for all such geographic units containing 20,000 or fewer people is changed to 000</p><p>Ex: 85055, 90213-1544</p> <ul><li>Philter can identify many US cities, US counties, and all US states (full names and abbreviations).</li><li>Philter uses a dictionary with spelling correction to identify misspelled locations.</li><li>Filter conditions in policies can be used to apply logic based on zip code population according to the US census. (Filter strategies can truncate the zip code.)</li><li>Philter also uses state of the art machine learning algorithms and natural language processing techniques to identify locations.</li><li>Philter includes a dictionary of some hospital locations to quickly identify medical locations.</li></ul> 3 <p>Dates (other than year) directly related to an individual</p><p>Ex: 10-10-2000. 10/10/2000, October 10, 2000</p> <ul><li>Philter can identify dates in many formats such as with hypens (10-10-2000), with slashes (10/10/2000), or spelled out (May 1, 2000).</li><li>Philter can also identify ages, e.g. 57 years, 57yrs.</li></ul> 4 <p>Phone Numbers</p><p>Ex: (304) 555-5555, 304-555-5555, 1-800-123-4567</p> <ul><li>Philter can identify phone numbers in many formats. (Philter is currently limited to US phone numbers.)</li></ul> 5 <p>Fax numbers</p><p>Ex: (304) 555-5555, 304-555-5555, 1-800-123-4567</p> <ul><li>Philter can identify fax numbers in many formats. (Philter is currently limited to US phone numbers.)</li></ul> 6 <p>Email addresses</p><p>Ex: john.fake.address@hotmail.com</p> <ul><li>Philter can identify email addresses per the email standard (summarized on Wikipedia).</li></ul> 7 <p>Social Security numbers</p><p>Ex: 123-45-6789, 123456789</p> <ul><li>Philter can identify social security numbers (SSNs) in multiple formats such as with spaces and hyphens.</li></ul> 8 <p>Medical record numbers</p><p>Ex: 86637729, AB473-6021, 473-6AB021</p> <ul><li>Philter can identify alphanumeric identifiers.</li></ul> 9 <p>Health insurance beneficiary numbers</p><p>Ex: 86637729, AB473-6021, 473-6AB021</p> <ul><li>Philter can identify alphanumeric identifiers.</li></ul> 10 <p>Account numbers</p><p>Ex: 86637729, AB473-6021, 473-6AB021</p> <ul><li>Philter can identify alphanumeric identifiers, as well as credit card numbers from all major types of credit cards.</li></ul> 11 <p>Certificate/license numbers</p><p>Ex: 86637729, AB473-6021, 473-6AB021</p> <ul><li>Philter can identify alphanumeric identifiers.</li></ul> 12 <p>Vehicle identifiers and serial numbers, including license plate numbers</p><p>Ex: WBAPM7G50ANL19218, 1GBJC34K3RE176005</p> <ul><li>Philter can identify vehicle serial numbers (17-character VIN numbers). License plates will be identified as alphanumeric identifiers.</li></ul> 13 <p>Device identifiers and serial numbers</p><p>Ex: H3SNPUHYEE7JD3H, 33778376</p> <ul><li>Philter can identify alphanumeric identifiers.</li></ul> 14 <p>Web Uniform Resource Locators (URLs)</p><p>Ex: myhomepage.com, http://myhomepage.com/folder/page.html, www.myhomepage.com/folder/page.html</p> <ul><li>Philter can identify URLs adhering to the URL naming standard.</li></ul> 15 <p>Internet Protocol (IP) address numbers</p><p>Ex: 127.0.0.1, 192.168.3.58, 2001:0db8:85a3:0000:0000:8a2e:0370:7334</p> <ul><li>Philter can identify IPv4 and IPv6 addresses.</li></ul> 16 Biometric identifiers, including finger, retinal and voice prints <ul><li>Not applicable \u2013 Philter only identifies PHI in text.</li></ul> 17 Full face photographic images and any comparable images <ul><li>Not applicable \u2013 Philter only identifies PHI in text.</li></ul> 18 <p>Any other unique identifying number, characteristic, or code except the unique code assigned by the investigator to code the data</p><p>Ex: 86637729, AB473-6021, 473-6AB021</p> <ul><li>Philter can identify alphanumeric identifiers.</li></ul>"},{"location":"deidentification/redaction-and-masking/","title":"Redaction and Masking","text":"<p>Redaction and masking are two methods of de-identification that are often used interchangeably. The term redaction refers to removing a sensitive value from a document. When we hear the term redaction we often think of an image of a document with black bars across pieces of the text.</p> <p>Masking is similar to redaction but allows for configuring how the sensitive value is removed. The most common example is using asterisks (i.e. ******) in place of a sensitive value.</p>"},{"location":"deidentification/replacement/","title":"Replacement","text":"<p>Replacement is a method of de-identification that simply replaces a sensitive value with another value. Replacement is useful when the sensitive value is not needed once the document has been de-identified. Philter can replace a sensitive value with a preset value or with a random value.</p> <p>In Philter's filter strategies, replacement is achieved by using the strategy to <code>REDACT</code>, <code>STATIC_REPLACE</code> , or <code>RANDOM_REPLACE</code> .</p>"},{"location":"other_features/alerts/","title":"Alerts","text":"<p>Phileas can optionally generate alerts when a particular type of sensitive information is identified.</p>"},{"location":"other_features/alerts/#alert-conditions","title":"Alert Conditions","text":"<p>In a policy, each type of sensitive information can have zero or more filter strategies. Each filter strategy can optionally have a condition associated with it. When a condition is present, the filter strategy will only be satisfied when the condition is satisfied. For example, a condition may be created to only filter phone numbers that start with the digits <code>123</code> or only filter names that start with <code>John</code>. Filter strategy conditions give you granular control over the filtering process.</p> <p>When a filter strategy condition is satisfied, Phileas can optionally generate an alert. This feature allows you to be notified when a particular type of sensitive information is identified.</p>"},{"location":"other_features/alerts/#enabling-alerts","title":"Enabling Alerts","text":"<p>Alerts are enabled on a per-condition basis. For instance, given the following policy to identify email addresses, a condition has been added to only match the email address <code>test@test.com</code>. Because of the property <code>alert</code> set to <code>true</code>, an alert will be generated when this condition is satisfied. By default, the alert property is set to <code>false</code> disabling alerts for the condition.</p> <pre><code>{\n  \"name\": \"email-address-alert\",\n  \"identifiers\": {\n    \"emailAddress\": {\n      \"emailAddressFilterStrategies\": [\n        {\n          \"id\": \"my-email-strategy\",\n          \"strategy\": \"REDACT\",\n          \"redactionFormat\": \"{{{REDACTED-%t}}}\",\n          \"condition\": \"token == \\\"test@test.com\\\"\",\n          \"alert\": true\n        }\n      ]\n    }\n  }\n}\n</code></pre>"},{"location":"other_features/alerts/#structure-of-an-alert","title":"Structure of an Alert","text":"<p>An alert contains the following information:</p> Property Name Description <code>id</code> A unique ID for the alert formatted as an UUID. <code>filterProfile</code> The name of the policy triggering the alert. <code>strategyId</code> The ID of the filter strategy triggering the alert. In the example above the <code>id</code> would be <code>my-email-strategy</code>. <code>context</code> The context. <code>documentId</code> The ID of the document which triggered the alert. <code>filterType</code> The filter type (\"email-address\", \"credit-card\", etc.) triggering the alert. <code>date</code> A timestamp when the alert was generated formatted as <code>yyyy-MM-dd'T'HH:mm:ss.SSS'Z'</code>."},{"location":"other_features/alerts/#retrieving-and-deleting-alerts","title":"Retrieving and Deleting Alerts","text":"<p>The alerts that Phileas has generated are available through Phileas' alerts API. This API allows for retrieving and deleting alerts. Using this API you can build sophisticated notification systems around Phileas' capabilities.</p>"},{"location":"other_features/anonymization/","title":"Consistent Anonymization","text":"<p>Anonymization in the context of Phileas is the process of replacing certain values with random but similar values. For example, the identified name of \u201cJohn Smith\u201d may be replaced with \u201cDavid Jones\u201d, or an identified phone number of 123-555-9358 may be replaced by 842-436-2042. A VIN number will be replaced by a 17 character randomly selected VIN number that adheres to the standard for VIN numbers.</p> <p>Anonymization is useful in instances where you want to remove sensitive information from text without changing the meaning of the text. Anonymization can be enabled for each type of sensitive information in the policy by setting the filter strategy to <code>RANDOM_REPLACE</code>. (See Policies for more information.)</p>"},{"location":"other_features/anonymization/#consistent-anonymization_1","title":"Consistent Anonymization","text":"<p>Consistent anonymization refers to the process of always anonymizing the same sensitive information with the same replacement values. For example, if the name \"John Smith\" is randomly replaced with \"Pete Baker\", all other occurrences of \"John Smith\" will also be replaced by \"Pete Baker.\"</p> <p>Consistent anonymization can be done on the document level or on the context level. When enabled on the document level, \"John Smith\" will only be replaced by \"Pete Baker\" in the same document. If \"John Smith\" occurs in a separate document it will be anonymized with a different random name. When enabled on the context level, \"John Smith\" will be replaced by \"Pete Baker\" whenever \"John Smith\" is found in all documents in the same context.</p> <p>Enabling consistent anonymization on the context level requires a cache to store the sensitive information and the corresponding replacement values. If a single instance of Phileas is running, its internal cache service (enabled by default) is the best choice and no additional configuration is required.</p> <p>If multiple instances of Phileas are deployed together, Phileas requires access to a Redis cache service as shown below. See Phileas' Settings on how to configure the cache.</p> <p>When Phileas is deployed in a cluster, a Redis cache is required to enable consistent anonymization.</p> <p>The anonymization cache will contain PHI. It is important that you take the necessary precautions to secure the cache and all communication to and from the cache.</p>"},{"location":"other_features/span_disambiguation/","title":"Span Disambiguation","text":"<p>Span disambiguation is an optional feature in Phileas that is disabled by default. Refer to Phileas' Settings to enable and configure span disambiguation.</p> <p>In Phileas, a span is a piece of the input text that Phileas has identified as sensitive information. A span has a start and end positions, a confidence, a type, and other attributes. Ideally, each piece of identified sensitive information will only have a single span associated with it. In this case, the type of sensitive information is unambiguous. The goal of span disambiguation is provide more accurate filtering by removing the potential ambiguities in the types of sensitive information for duplicate spans.</p> <p>However, sometimes a piece of text can be identified by multiple spans, each having a different type of sensitive information. In an example hypothetical scenario, let's say given the input text <code>My SSN is 123456789.</code> , Phileas identifies <code>123456789</code> as an SSN and as a phone number. This type of scenario can be quite common, and its likelihood increases as the number of enabled filters in a policy increase.</p>"},{"location":"other_features/span_disambiguation/#how-phileas-span-disambiguation-works","title":"How Phileas' Span Disambiguation Works","text":"<p>When we read the sentence <code>My SSN is 123456789.</code> we can tell the span in question should be identified as an SSN because we can look at the text surrounding the span. We use the surrounding words to deduce the correct type of sensitive information for <code>123456789</code>.</p> <p>That is exactly how Phileas' span disambiguation works. When presented with identical spans differing only by the type of sensitive information, Phileas looks at the text surrounding the span in question in combination with the previous spans it has seen in the same context to determine which type of sensitive information is most likely to be correct. Phileas then removes the ambiguous spans from the results and replaces them with a single span.</p>"},{"location":"other_features/span_disambiguation/#improves-over-time","title":"Improves Over Time","text":"<p>Because Phileas is able to consider previously seen text to make its decision concerning ambiguous spans, Phileas' span disambiguation gets \"smarter\" as more text is filtered. This is because Phileas will have more text to consider in its calculations.</p>"},{"location":"other_features/span_disambiguation/#more-details","title":"More Details","text":""},{"location":"other_features/span_disambiguation/#span-disambiguation-and-confidence-values","title":"Span Disambiguation and Confidence Values","text":"<p>Span disambiguation is only invoked for spans that differ only by the type of sensitive information. This means the span's location (start and end positions), confidence, and all other values must match. If two spans have identical locations but have different confidence values, span disambiguation will not be applied and the span having the highest confidence will be used.</p>"},{"location":"other_features/span_disambiguation/#cache-service","title":"Cache Service","text":"<p>When multiple application using Phileas are deployed alongside each other behind a load balancer, Phileas' cache service should be configured and enabled. Phileas will store the information needed to disambiguate spans in the cache such that the information is available to each instance of Phileas. If only a single instance of Phileas is running then the cache service is not required, however, the information needed to disambiguate spans will be stored in memory and will be lost when Phileas is stopped or restarted. Because of this, we recommend the cache service always be used unless there is a specific reason not to.</p>"},{"location":"other_features/span_disambiguation/#fine-tuning-the-span-disambiguation","title":"Fine-Tuning the Span Disambiguation","text":"<p>There are properties available to fine-tune how the span disambiguation operates. These properties are not documented because improper use of the properties could have a negative impact on performance. We will be glad to walk through these properties upon request.</p>"},{"location":"policies/filter_policies/","title":"Filter Policies","text":"<p>The types of sensitive information identified by Phileas and how that information is de-identified are controlled through policies. A policy is a file stored under Phileas\u2019s <code>policies</code> directory, which by default is located at <code>/opt/Phileas/policies/</code>. You can have an unlimited number of policies.</p> <p>Each policy has a <code>name</code> that is used by Phileas to apply the appropriate de-identification methods. The <code>name</code> is passed to Phileas\u2019s API along with the text to be filtered when submitting text to Phileas. This provides flexibility and allows you to de-identify different types of documents in differing manners with a single instance of Phileas. For example, you may have a policy for bankruptcy documents and a separate policy for financial documents.</p> <p>There are sample policies available for immediate use or customization to fit your use-cases.</p>"},{"location":"policies/filter_policies/#the-structure-of-a-policy","title":"The Structure of a Policy","text":"<p>A policy:</p> <ul> <li>Must have a <code>name</code> that uniquely identifies it.</li> <li>Must have a list of <code>identifiers</code> that are filters for sensitive information.<ul> <li>Each <code>identifier</code> , or filter, can have zero or more filter strategies. A filter strategy tells Phileas how to manipulate that type of sensitive information when it is identified.</li> </ul> </li> <li>Can have an optional list of terms or patterns.</li> <li>Can have encryption keys to support encryption of sensitive information.</li> </ul>"},{"location":"policies/filter_policies/#an-example-policy","title":"An Example Policy","text":"<p>The following is an example policy. In the example below you can see the types of sensitive information that are enabled and the strategy for manipulating each type when found. This policy identifies email addresses and phone numbers and redacts each with the format given.</p> <pre><code>{\n   \"name\": \"email-and-phone-numbers\",\n   \"identifiers\": {\n      \"emailAddress\": {\n         \"emailAddressFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      },\n      \"phoneNumber\": {\n         \"phoneNumberFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n</code></pre> <p>When an email address is identified by this policy, the email address is replaced with the text <code>{{{REDACTED-email-address}}}</code>. The <code>%t</code> gets replaced by the type of the filter. Likewise, when a phone number is found it is replaced with the text <code>{{{REDACTED-phone-number}}}</code>. You are free to change the redaction formats to whatever fits your use-case. See Filter Strategies for all replacement options.</p> <p>The name of the policy is <code>email-and-phone-numbers</code>. Policies can be named anything you like but their names must be unique from all other policies. As a best practice, the policy should be saved as <code>[name].json</code>, e.g. <code>email-and-phone-numbers.json</code>.</p>"},{"location":"policies/filter_policies/#applying-a-policy-to-text","title":"Applying a Policy to Text","text":"<p>To use this policy we will save it as <code>/opt/Phileas/profiles/email-and-phone-numbers.json</code>. We must restart Phileas for the new profile to be available for use. To apply the policy we will pass the policy's name to Phileas when making a filter request, as shown in the example request below.</p> <pre><code>curl -k -X POST \"https://localhost:8080/api/filter?c=context&amp;p=email-and-phone-numbers\" \\\n  -d @file.txt -H Content-Type \"text/plain\"\n</code></pre> <p>In this command, we have provided the parameter <code>p</code> along with a value that is the name of the policy we want to use for this request. If we had multiple policies in Phileas we could choose a different policy for this request simply by changing the name given to the parameter <code>p</code>. For more details see Phileas\u2019s API.</p> <p>Phileas will process the contents of <code>file.txt</code> by applying the policy named <code>email-and-phone-numbers</code>. As we saw in the policy above, this policy redacts email addresses and phone numbers. Phileas will return the redacted text in response to the API call.</p> <p>To manipulate the sensitive information by methods other than redaction, see the Filter Strategies.</p>"},{"location":"policies/filter_strategies/","title":"Filter Strategies","text":"<p>A filter strategy defines how sensitive information identified by Phileas should be manipulated, whether it is redacted, replaced, encrypted, or manipulated in some other fashion.</p> <p>In a policy, you list the types of sensitive information that should be filtered. How Phileas replaces each type of sensitive information is specific to each type. For instance, zip codes can be truncated based on the leading digits or zip code population while phone numbers are redacted. These replacements are performed by \"filter strategies.\"</p> <p>Each filter can have one or more filter strategies and conditions can be used to determine when to apply each filter strategy.</p> <p>A sample policy containing a filter strategy is shown below. In this example, email addresses will be redacted.</p> <pre><code>{\n   \"name\": \"email-address\",\n   \"identifiers\": {\n      \"emailAddress\": {\n         \"emailAddressFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n</code></pre> <p>Most of the filter strategies apply to all types of data, however, some filter strategies only apply to a few types. For example, the <code>TRUNCATE</code> filter strategy only applies to a zip code filter.</p>"},{"location":"policies/filter_strategies/#filter-strategies_1","title":"Filter Strategies","text":"<p>The filter strategies are described below. Each filter type can specify zero or more filter strategies. When no filter strategies are given, Phileas will default to <code>REDACT</code> for that filter type. When multiple filter strategies are given for a single filter type, the filter strategies will be applied in order as they are listed in the policy, top to bottom.</p> <ul> <li><code>REDACT</code></li> <li><code>CRYPTO_REPLACE</code>(AES encryption)</li> <li><code>HASH_SHA256_REPLACE</code>(SHA512 encryption)</li> <li><code>FPE_ENCRYPT_REPLACE</code>(Format-preserving encryption)</li> <li><code>RANDOM_REPLACE</code></li> <li><code>STATIC_REPLACE</code></li> <li><code>TRUNCATE</code></li> <li><code>ZERO_LEADING</code></li> </ul>"},{"location":"policies/filter_strategies/#the-redact-filter-strategy","title":"The <code>REDACT</code> Filter Strategy","text":"<p>The REDACT filter strategy replaces sensitive information with a given redaction format. You can put variables in the redaction format that Phileas will replace when performing the redaction.</p> <p>The available redaction variables are:</p> Redaction Variable Description <code>%t</code> Will be replaced with the type of sensitive information. This is to allow you to know the type of sensitive information that was identified and redacted. <code>%l</code> Will be replaced by the given classification for the type of sensitive information. <code>%v</code> Will be replaced by the original value of the sensitive text. With <code>%v</code> you can annotate sensitive information instead of masking or removing it. <p>To redact sensitive information by replacing it with the type of sensitive information, the redaction format would be <code>REDACTED-%t</code>.</p> <p>An example filter using the <code>REDACT</code> filter strategy:</p> <pre><code>{\n   \"name\": \"email-address\",\n   \"identifiers\": {\n      \"emailAddress\": {\n         \"emailAddressFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/filter_strategies/#the-crypto_replace-filter-strategy-idcrypto","title":"The <code>CRYPTO_REPLACE</code> Filter Strategy {id=\"crypto\"}","text":"<p>The <code>CRYPTO_REPLACE</code> filter strategy replaces each identified piece of sensitive information by encrypting it using the AES encryption algorithm. To use this filter strategy, the policy must include the details of the encryption key as shown below:</p> <pre><code>{\n   \"name\":\"sample-profile\",\n   \"crypto\": {\n     \"key\": \"....\",\n     \"iv\": \"....\"\n   },\n   ...\n</code></pre> <p>In the snippet of a policy shown above, a crypto element is is defined with a <code>key</code> and an initialization vector (<code>iv</code>). These two items are required to encrypt the sensitive information. To generate a key, run the following command:</p> <pre><code>openssl enc -e -aes-256-cbc -a -salt -P\n</code></pre> <p>You will be prompted to enter an encryption password. Once entered, the values of the <code>key</code> and <code>iv</code> will be shown. Copy and paste those values into the policy.</p> <p>An example policy using the <code>CRYPTO_REPLACE</code> filter strategy:</p> <pre><code>{\n   \"name\": \"email-address\",\n   \"crypto\": {\n     \"key\": \"....\",\n     \"iv\": \"....\"\n   },\n   \"identifiers\": {\n      \"emailAddress\": {\n         \"emailAddressFilterStrategies\": [\n            {\n               \"strategy\": \"CRYPTO_REPLACE\"\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/filter_strategies/#the-hash_sha256_replace-filter-strategy-idhash","title":"The <code>HASH_SHA256_REPLACE</code> Filter Strategy {id=\"hash\"}","text":"<p>The <code>HASH_SHA256_REPLACE</code> filter strategy replaces sensitive information with the SHA256 hash value of the sensitive information. To append a random salt value to each value prior to hashing, set the <code>salt</code> property to <code>true</code>. The salt value used will be returned in the <code>explain</code> response from Phileas' API.</p> <p>An example policy using the <code>HASH_SHA256_REPLACE</code> filter strategy:</p> <pre><code>{\n   \"name\": \"email-address\",\n   \"identifiers\": {\n      \"emailAddress\": {\n         \"emailAddressFilterStrategies\": [\n            {\n               \"strategy\": \"HASH_SHA256_REPLACE\"\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/filter_strategies/#the-fpe_encrypt_replace-filter-strategy-idfpe","title":"The FPE_ENCRYPT_REPLACE Filter Strategy {id=\"fpe\"}","text":"<p>The <code>FPE_ENCRYPT_REPLACE</code> filter strategy uses format-preserving encryption (FPE) to encrypt the sensitive information. Phileas uses the FF3-1 algorithm for format-preserving encryption. The FPE_ENCRYPT_REPLACE filter strategy requires a <code>key</code> and a <code>tweak</code> value. These values control the format-preserving encryption. For more information on these values and format-preserving encryption, refer to the resources below:</p> <ul> <li>https://nvlpubs.nist.gov/nistpubs/SpecialPublications/NIST.SP.800-38Gr1-draft.pdf</li> <li>https://nvlpubs.nist.gov/nistpubs/specialpublications/nist.sp.800-38g.pdf</li> </ul> <p>An example policy using the FPE_ENCRYPT_REPLACE filter strategy:</p> <pre><code>{\n   \"name\": \"credit-cards\",\n   \"identifiers\": {\n      \"creditCardNumbers\": {\n         \"creditCardNumbersFilterStrategies\": [\n            {\n               \"strategy\": \"FPE_ENCRYPT_REPLACE\",\n               \"key\": \"...\",\n               \"tweak\": \"...\"\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/filter_strategies/#the-random_replace-filter-strategy-idrandom","title":"The <code>RANDOM_REPLACE</code> Filter Strategy {id=\"random\"}","text":"<p>Replaces the identified text with a fake value but of the same type. For example, an SSN will be replaced by a random text having the format <code>###-##-####</code>, such as 123-45-6789. An email address will be replaced with a randomly generated email address. Available to all filter types.</p> <p>An example policy using the <code>RANDOM_REPLACE</code> filter strategy:</p> <pre><code>{\n   \"name\": \"email-address\",\n   \"identifiers\": {\n      \"emailAddress\": {\n         \"emailAddressFilterStrategies\": [\n            {\n               \"strategy\": \"RANDOM_REPLACE\"\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/filter_strategies/#the-static_replace-filter-strategy-idstatic","title":"The <code>STATIC_REPLACE</code> Filter Strategy {id=\"static\"}","text":"<p>Replaces the identified text with a given static value. Available to all filter types.</p> <p>An example policy using the <code>STATIC_REPLACE</code> filter strategy:</p> <pre><code>{\n   \"name\": \"email-address\",\n   \"identifiers\": {\n      \"emailAddress\": {\n         \"emailAddressFilterStrategies\": [\n            {\n               \"strategy\": \"STATIC_REPLACE\",\n               \"staticReplacement\": \"some new value\"\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/filter_strategies/#the-truncate-filter-strategy-idtruncate","title":"The <code>TRUNCATE</code> Filter Strategy {id=\"truncate\"}","text":"<p>Available only to zip codes, this strategy allows for truncating zip codes to only a select number of digits. Specify <code>truncateDigits</code> to set the desired number of leading digits to leave. For example, if <code>truncateDigits</code> is 2, the zip code 90210 will be truncated to <code>90***</code>. </p> <p>The TRUNCATE filter strategy is available only to the zip code filter. An example policy using the <code>TRUNCATE</code> filter strategy:</p> <pre><code>{\n   \"name\": \"zip-codes\",\n   \"identifiers\": {\n      \"zipCode\": {\n         \"zipCodeFilterStrategies\": [\n            {\n               \"strategy\": \"TRUNCATE\",\n               \"truncateDigits\": 3\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/filter_strategies/#the-zero_leading-filter-strategy-idzero_leading","title":"The <code>ZERO_LEADING</code> Filter Strategy {id=\"zero_leading\"}","text":"<p>Available only to zip codes, this strategy changes the first 3 digits of a zip code to be 0. For example, the zip code 90210 will be changed to 00010.</p> <p>The <code>ZERO_LEADING</code> filter strategy is only available to zip code filters. An example zip code filter using the <code>ZERO_LEADING</code> filter strategy:</p> <pre><code>{\n   \"name\": \"zip-codes\",\n   \"identifiers\": {\n      \"zipCodes\": {\n         \"zipCodeFilterStrategies\": [\n            {\n               \"strategy\": \"ZERO_LEADING\"\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/filter_strategies/#filter-strategy-conditions","title":"Filter Strategy Conditions","text":"<p>A replacement strategy can be applied based on the sensitive information meeting one or more conditions. For example, you can create a condition such that only dates of <code>11/05/2010</code> are replaced by using the condition <code>token == \"11/05/2010\"</code>. The conditions that can be applied vary based on the type of sensitive information. For instance, zip codes can have conditions based on their population. Refer to each specific filter type for the conditions available.</p> <p>The following is an example policy for credit cards that contains a condition to only redact credit card numbers that start with the digits <code>3000</code>:</p> <pre><code>{\n  \"name\": \"default\",\n  \"identifiers\": {\n    \"creditCard\": {\n      \"creditCardFilterStrategies\": [\n        {\n          \"condition\": \"token startswith \\\"3000\\\"\",\n          \"strategy\": \"REDACT\",\n          \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n        }\n      ]\n    }\n  }\n}\n</code></pre>"},{"location":"policies/filter_strategies/#combining-conditions","title":"Combining Conditions","text":"<p>Conditions can be joined through the use of the <code>and</code> keyword. When conditions are joined, each condition must be satisfied for the identified text to be filtered. If any of the conditions are not satisfied the identified text will not be filtered. Below is an example joined condition:</p> <pre><code>token != \"123-45-6789\" and context == \"my-context\"\n</code></pre> <p>This condition requires that the identified text (the token) not be equal to <code>123-45-6789</code> and the context be equal to <code>my-context</code>. Both of these conditions must be satisfied for the identified text to be filtered.</p> <p>Conversely, conditions can be <code>OR</code>'d through the use of multiple filter strategies. For example, if we want to <code>OR</code> a condition on the token and a condition on the context, we would use two filter strategies:</p> <pre><code>\"ssnFilterStrategies\": [\n  {\n    \"condition\": \"token != \\\"123-45-6789\\\"\",\n    \"strategy\": \"REDACT\",\n    \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n  },\n  {\n    \"condition\": \"context == \\\"my-context\\\"\",\n    \"strategy\": \"REDACT\",\n    \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n  }        \n]\n</code></pre>"},{"location":"policies/filters/","title":"Filters","text":"<p>A \"filter\" corresponds to a type of sensitive information. Phileas has filters for sensitive information such as names, addresses, ages, and lots of others.</p> <p>These are predefined filters that are ready to be used as well as custom filters that let you define your own Phileas to identify sensitive information outside of what the predefined filters can identify. An example of a custom filter is a filter to identify your patient account numbers, where the structure of an account number is specific to your organization.</p> <p>Each filter is capable of identifying and redacting a specific type of sensitive information. For example, there is a filter for phone numbers, a filter for US social security numbers, and a filter for person's names. You can enable any combination of these filters based on the types of sensitive information you need to redact.</p> <p>This section of the documentation describes the filters available in Phileas. The configuration options for each filter can vary due to the type of the sensitive information. For instance, only the zip code filter has a configuration to truncate the zip code.</p> <p>A selection of filters and their configurations is called a policy. A policy describes how to de-identify a document.</p>"},{"location":"policies/filters/#predefined-filters","title":"Predefined Filters","text":""},{"location":"policies/filters/#persons-names","title":"Person's Names","text":"<p>Phileas uses several methods to identify person's names.</p> Type Description First Names Identifies common first names Surnames Identifies common surnames Person's Names (NER) Identifies full names using natural language processing analysis Physician's Names (NER) Identifies physican names using natural language processing analysis"},{"location":"policies/filters/#other-filters","title":"Other Filters","text":"Type Description Ages Identifies ages such as <code>3.5 years old</code> Bank Routing Numbers Identifies bank routing numbers Bitcoin Addresses Identifies Bitcoin addresses such as <code>127NVqnjf8gB9BFAW2dnQeM6wqmy1gbGtv</code> Cities Identifies common cities Counties Identifies common counties Credit Card Numbers Identifies VISA, American Express, MasterCard, and Discover credit card numbers Dates Identifies dates in many formats such as May 22, 1999 Driver's License Numbers Identifies driver's license numbers for all 50 US states Email Addresses Identifies email addresses Hospitals Identifies common hospital names Hospital Abreviations Identifies common hospitals by their name abbreviations IBAN Codes Identifies international bank account numbers IP Addresses Identifies IPv4 and IPv6 addresses MAC Addresses Identifies network MAC addresses Passport Numbers Identifies US passport numbers Phone Numbers Identifies phone numbers Phone Number Extensions Identifies phone numbers Sections Identifies sections in text denoted by SSNs and TINs Identifies US SSNs and TINs States Identifies US state names State Abbreviations Identifies US state names by their abbreviations Tracking Numbers Identifies UPS, FedEx, and USPS tracking numbers URLs Identifies URLs VINs Identifies vehicle identification numbers Zip Codes Identifies US zip codes"},{"location":"policies/filters/#custom-filter-types-of-sensitive-information","title":"Custom Filter Types of Sensitive Information","text":"<p>In addition to the predefined types of sensitive information listed in the table above, you can also define your own types of sensitive information. Through custom identifiers and dictionaries, Phileas can identify many other types of information that may be sensitive in your use-case. For example, if you have patient identifiers that follow a pattern of <code>AA-00000</code> you can define a custom identifier for this sensitive information.</p> <p>Phileas can be configured to look identify sensitive information based on custom dictionaries. When a term in the dictionary is found in the text, Phileas will treat the term as sensitive information and apply the given filter strategy.</p> <p>Custom dictionaries support fuzziness to accommodate for misspellings. The replacement strategy for a custom dictionary has a <code>sensitivityLevel</code> that controls the amount of allowed fuzziness.</p> Type Description Custom Dictionaries Identifies sensitive information based on dictionary values. Custom Identifiers Identifies custom alphanumeric identifiers that may be used for medical record numbers, patient identifiers, account number, or other specific identifier."},{"location":"policies/ignoring_sensitive_information/","title":"Ignoring Sensitive Information","text":"<p>Phileas can optionally ignore a list of terms and prevent those terms from being redacted. For example, if the name <code>John Smith</code> is being redacted and you do not want it to be redacted, you can add <code>John Smith</code> to an ignore list. Each time Phileas identifies sensitive information it will check the ignore lists to see if the sensitive information is to be ignored.</p> <p>Phileas can ignore terms and patterns per-policy, meaning each policy can have its own unique list of terms or patterns to ignore.</p>"},{"location":"policies/ignoring_sensitive_information/#ignore-lists","title":"Ignore Lists","text":"<p>Ignore lists can be specified at the policy level and/or for each filter in the policy. When set for the policy, the list of ignored terms will be applied to all filter types. When set for a filter, the list of ignored terms will be applied only to that filter.</p>"},{"location":"policies/ignoring_sensitive_information/#ignore-list-for-a-policy","title":"Ignore List for a Policy","text":"<p>In the policy shown below, an ignore list is set at the level of the policy. The terms specified in the list will be ignored for all filter types enabled in the policy. Only the terms property is required. The <code>name</code> and <code>caseSensitive</code> properties are optional.</p> <pre><code>{\n   \"name\": \"example-policy\",\n   \"ignored\": [\n     {\n       \"name\": \"names to ignore\",\n       \"terms\": [\"john smith\", \"jane doe\"],\n       \"caseSensitive\": false\n     }\n   ],\n   \"identifiers\": {\n      \"emailAddress\": {\n         \"emailAddressFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n</code></pre> <p>Terms to be ignored at the policy level can also be read from one or more files located on the local file system. The file must be formatted as one term per line.</p> <pre><code>{\n   \"name\": \"example-policy\",\n   \"ignored\": [\n     {\n       \"name\": \"names to ignore\",\n       \"terms\": [\"john smith\", \"jane doe\"],\n       \"files\": [\"/tmp/names.txt\"]\n       \"caseSensitive\": false\n     }\n   ],   \n   \"identifiers\": {\n      \"emailAddress\": {\n         \"emailAddressFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/ignoring_sensitive_information/#ignore-list-for-a-filter","title":"Ignore List for a Filter","text":"<p>In the policy shown below, an ignore list is set at the level of a filter. The terms specified in the list will be ignored only for that filter type. Each filter in a policy can have its own list of ignored terms. The terms listed will be ignored case-sensitive, meaning, \"John\" will be ignored if \"John\" is an ignored term but will not be ignored if \"john\" is an ignored term.</p> <pre><code>{\n   \"name\": \"example-filter-profile\",\n   \"identifiers\": {\n      \"emailAddress\": {\n         \"ignored\": [\"john smith\", \"jane doe\"],\n         \"emailAddressFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/ignoring_sensitive_information/#ignoring-patterns","title":"Ignoring Patterns","text":"<p>Phileas can ignore information based on a regular expression pattern. An example use of this feature is to ignore terms that are present in your text but are dynamic, such as logged timestamps. When using the date filter these timestamps may be identified as being sensitive but you do not want them redacted. With an ignore pattern we can ignore the logged timestamps.</p>"},{"location":"policies/ignoring_sensitive_information/#ignore-patterns","title":"Ignore Patterns","text":"<p>Ignore patterns can be specified at the policy level and/or at the level of each type of filter. When set at the policy level, the list of ignored patterns will be applied to all filter types. When set for an individual filter, the list of ignored patterns will be applied only to that filter.</p>"},{"location":"policies/ignoring_sensitive_information/#ignore-patterns-for-a-policy","title":"Ignore Patterns for a Policy","text":"<p>In the policy shown below, ignore patterns are set at the level of the policy. The patterns specified in the list will be ignored for all filter types enabled in the policy.</p> <pre><code>{\n   \"name\": \"example-policy\",\n   \"ignoredPatterns\": [\n     {\n       \"name\": \"ignore-room-numbers\",\n       \"pattern\": \"Room [A-Z0-4]{4}\"\n     }\n   ],\n   \"identifiers\": {\n      \"emailAddress\": {\n         \"emailAddressFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/ignoring_sensitive_information/#ignore-patterns-for-a-filter","title":"Ignore Patterns for a Filter","text":"<p>In the policy shown below, ignore patterns are set at the level of a filter. The patterns specified in the list will be ignored only for that filter type. Each filter in a policy can have its own list of ignored patterns.</p> <pre><code>{\n   \"name\": \"example-policy\",\n   \"identifiers\": {\n      \"emailAddress\": {\n         \"ignoredPatterns\": [\n           {\n             \"name\": \"ignore-room-numbers\",\n             \"pattern\": \"Room [A-Z0-4]{4}\"\n           }\n         ],\n         \"emailAddressFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/sample_policies/","title":"Sample Policies","text":"<p>This page lists some sample policies. You can use these policies either as-is or as starting points for customizing them to meet your specific de-identification needs.</p> <p>These policies are examples and not an exhaustive list of all the sensitive information Phileas can identify. Items from each of these policies can be combined to make policies to meet your use-cases.</p>"},{"location":"policies/sample_policies/#email-addresses-and-phone-numbers","title":"Email Addresses and Phone Numbers","text":"<p>This policy finds email addresses and phone numbers and redacts them with <code>{{{REDACTED-email-address}}}</code> and <code>{{{REDACTED-phone-number}}}</code>, respectively.</p> <pre><code>{\n  \"name\": \"email-and-phone-numbers\",\n  \"identifiers\": {\n    \"emailAddress\": {\n      \"emailAddressFilterStrategies\": [\n        {\n          \"strategy\": \"REDACT\",\n          \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n        }\n      ]\n    },\n    \"phoneNumber\": {\n      \"phoneNumberFilterStrategies\": [\n        {\n          \"strategy\": \"REDACT\",\n          \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n        }\n      ]\n    }\n  }\n}\n</code></pre>"},{"location":"policies/sample_policies/#persons-names-and-ssns","title":"Persons Names and SSNs","text":"<p>This policy finds persons names and SSNs and redacts them with <code>{{{REDACTED-entity}}}</code> and <code>{{{REDACTED-ssn}}}</code>, respectively.</p> <pre><code>{\n  \"name\": \"persons-names-ssn\",\n  \"identifiers\": {\n    \"ner\": {\n      \"nerFilterStrategies\": [\n        {\n          \"strategy\": \"REDACT\",\n          \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n        }\n      ]\n    },\n    \"ssn\": {\n      \"ssnFilterStrategies\": [\n        {\n          \"strategy\": \"REDACT\",\n          \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n        }\n      ]\n    }\n  }\n}\n</code></pre>"},{"location":"policies/sample_policies/#dates-urls-and-vins","title":"Dates, URLs, and VINs","text":"<p>This policy finds dates, URLs, and VINs. Dates and URLs are redacted with <code>{{{REDACTED-date}}}</code> and <code>{{{REDACTED-url}}}</code>, respectively. Each VIN number are replaced by a randomly generated VIN number.</p> <pre><code>{\n  \"name\": \"dates-urls-vin\",\n  \"identifiers\": {\n    \"date\": {\n      \"dateFilterStrategies\": [\n        {\n          \"strategy\": \"REDACT\",\n          \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n        }\n      ]\n    },\n    \"url\": {\n      \"urlFilterStrategies\": [\n        {\n          \"strategy\": \"REDACT\",\n          \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n        }\n      ]\n    },\n    \"vin\": {\n      \"vinFilterStrategies\": [\n        {\n          \"strategy\": \"RANDOM_REPLACE\"\n        }\n      ]\n    }\n  }\n}\n</code></pre>"},{"location":"policies/sample_policies/#ip-addresses","title":"IP Addresses","text":"<p>This policy finds IP addresses and replaces each identified IP address with the static text <code>IP_ADDRESS</code> as long as the IP address is not <code>127.0.0.1</code>. (A condition on the filter strategy sets the IP address requirement.)</p> <pre><code>{\n  \"name\": \"ip-addresses\",\n  \"identifiers\": {\n    \"ipAddress\": {\n      \"ipAddressFilterStrategies\": [\n        {\n          \"strategy\": \"STATIC_REPLACE\",\n          \"redactionFormat\": \"IP_ADDRESS\",\n          \"condition\": \"token != \\\"127.0.0.1\\\"\"\n        }\n      ]\n    }\n  }\n}\n</code></pre>"},{"location":"policies/sample_policies/#zip-codes","title":"Zip Codes","text":"<p>This policy finds ZIP codes starting with <code>90</code> and truncates the zip code to just the first two digits.</p> <pre><code>{\n  \"name\": \"zip-codes\",\n  \"identifiers\": {\n    \"creditCard\": {\n      \"creditCardFilterStrategies\": [\n        {\n          \"condition\": \"token startswith \\\"90\\\"\",\n          \"strategy\": \"TRUNCATE\",\n          \"truncateDigits\": 2\n        }\n      ]\n    }\n  }\n}\n</code></pre>"},{"location":"policies/sample_policies/#enable-text-splitting","title":"Enable Text Splitting","text":"<p>This policy enables text splitting for input over 10,000 characters.</p> <pre><code>{\n  \"name\": \"default-split-enabled\",\n  \"config\": {\n    \"splitting\": {\n      \"enabled\": true,\n      \"threshold\": 10000,\n      \"method\": \"newline\"\n    }\n  },\n  \"identifiers\": {\n    \"ssn\": {\n      \"ssnFilterStrategies\": [\n        {\n          \"strategy\": \"REDACT\",\n          \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n        }\n      ]\n    }\n  }\n}\n</code></pre>"},{"location":"policies/sample_policies/#globally-ignored-terms","title":"Globally Ignored Terms","text":"<p>This policy has a list of globally ignored terms.</p> <pre><code>{\n  \"name\": \"default-global-ignore\",\n  \"ignored\": [\n    {\n      \"name\": \"ignored credit cards\",\n      \"terms\": [\"4111111111111111\", \"0000000000000000\"]\n    }\n  ],\n  \"identifiers\": {\n    \"creditCard\": {\n      \"creditCardFilterStrategies\": [\n        {\n          \"strategy\": \"REDACT\",\n          \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n        }\n      ]\n    }\n  }\n}\n</code></pre>"},{"location":"policies/sample_policies/#generating-alerts","title":"Generating Alerts","text":"<p>This policy generates an alert when a matching email address is identified.</p> <pre><code>{\n  \"name\": \"email-address-alert\",\n  \"identifiers\": {\n    \"emailAddress\": {\n      \"emailAddressFilterStrategies\": [\n        {\n          \"strategy\": \"REDACT\",\n          \"redactionFormat\": \"{{{REDACTED-%t}}}\",\n          \"condition\": \"token == \\\"test@test.com\\\"\",\n          \"alert\": true\n        }\n      ]\n    }\n  }\n}\n</code></pre>"},{"location":"policies/filters/common_filters/ages/","title":"Ages","text":""},{"location":"policies/filters/common_filters/ages/#filter","title":"Filter","text":"<p>This filter identifies ages such as <code>3.5 years old</code> in text.</p>"},{"location":"policies/filters/common_filters/ages/#required-parameters","title":"Required Parameters","text":"<p>This filter has no required parameters.</p>"},{"location":"policies/filters/common_filters/ages/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value <code>ageFilterStrategies</code> A list of filter strategies. None <code>enabled</code> When set to false, the filter will be disabled and not applied <code>true</code> <code>ignored</code> A list of terms to be ignored by the filter. None"},{"location":"policies/filters/common_filters/ages/#filter-strategies","title":"Filter Strategies","text":"<p>The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of <code>REDACT</code> is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.</p> Strategy Description <code>REDACT</code> Replace the sensitive text with a placeholder. <code>RANDOM_REPLACE</code> Replace the sensitive text with a similar, random value. <code>STATIC_REPLACE</code> Replace the sensitive text with a given value. <code>CRYPTO_REPLACE</code> Replace the sensitive text with its encrypted value. <code>HASH_SHA256_REPLACE</code> Replace the sensitive text with its SHA256 hash value."},{"location":"policies/filters/common_filters/ages/#conditions","title":"Conditions","text":"<p>Each filter strategy may have one condition. The filter will only be applied when the condition is satisfied. See Conditions for details.</p> Conditional Description Operators <code>TOKEN</code> Compares the value of the sensitive text. <code>==</code> , <code>!=</code> <code>CONTEXT</code> Compares the filtering context. <code>==</code> , <code>!=</code> <code>CONFIDENCE</code> Compares the confidence in the sensitive text against a threshold value. <code>&lt;</code> , <code>&lt;=</code>, <code>&gt;</code> , <code>&gt;=</code>, <code>==</code>, <code>!=</code>"},{"location":"policies/filters/common_filters/ages/#example-policy","title":"Example Policy","text":"<pre><code>{\n   \"name\": \"ages-example\",\n   \"identifiers\": {\n      \"age\": {\n         \"ageFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/filters/common_filters/bank-routing-numbers/","title":"Bank Routing Numbers","text":""},{"location":"policies/filters/common_filters/bank-routing-numbers/#filter","title":"Filter","text":"<p>This filter identifies bank routing numbers (ABA routing transit numbers) such as <code>111000025</code> in text. Identified routing numbers must pass checksum validation.</p>"},{"location":"policies/filters/common_filters/bank-routing-numbers/#required-parameters","title":"Required Parameters","text":"<p>This filter has no required parameters.</p>"},{"location":"policies/filters/common_filters/bank-routing-numbers/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value <code>bankRoutingNumberFilterStrategies</code> A list of filter strategies. None <code>enabled</code> When set to false, the filter will be disabled and not applied <code>true</code> <code>ignored</code> A list of terms to be ignored by the filter. None"},{"location":"policies/filters/common_filters/bank-routing-numbers/#filter-strategies","title":"Filter Strategies","text":"<p>The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of <code>REDACT</code> is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.</p> Strategy Description <code>REDACT</code> Replace the sensitive text with a placeholder. <code>RANDOM_REPLACE</code> Replace the sensitive text with a similar, random value. <code>STATIC_REPLACE</code> Replace the sensitive text with a given value. <code>CRYPTO_REPLACE</code> Replace the sensitive text with its encrypted value. <code>HASH_SHA256_REPLACE</code> Replace the sensitive text with its SHA256 hash value. <code>FPE_ENCRYPT_REPLACE</code> Replace the sensitive text with a value generated by format-preserving encryption (FPE)"},{"location":"policies/filters/common_filters/bank-routing-numbers/#conditions","title":"Conditions","text":"<p>Each filter strategy may have one condition. The filter will only be applied when the condition is satisfied. See Conditions for details.</p> Conditional Description Operators <code>TOKEN</code> Compares the value of the sensitive text. <code>==</code> , <code>!=</code> <code>CONTEXT</code> Compares the filtering context. <code>==</code> , <code>!=</code> <code>CONFIDENCE</code> Compares the confidence in the sensitive text against a threshold value. <code>&lt;</code> , <code>&lt;=</code>, <code>&gt;</code> , <code>&gt;=</code>, <code>==</code>, <code>!=</code>"},{"location":"policies/filters/common_filters/bank-routing-numbers/#example-policy","title":"Example Policy","text":"<pre><code>{\n   \"name\": \"bank-routing-number-example\",\n   \"identifiers\": {\n      \"bankRoutingNumber\": {\n         \"bankRoutingNumberFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/filters/common_filters/bitcoin-addresses/","title":"Bitcoin Addresses","text":""},{"location":"policies/filters/common_filters/bitcoin-addresses/#filter","title":"Filter","text":"<p>This filter identifies bitcoin addresses such as <code>1BvBMSEYstWetqTFn5Au4m4GFg7xJaNVN2</code> in text.</p>"},{"location":"policies/filters/common_filters/bitcoin-addresses/#required-parameters","title":"Required Parameters","text":"<p>This filter has no required parameters.</p>"},{"location":"policies/filters/common_filters/bitcoin-addresses/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value <code>bitcoinAddressFilterStrategies</code> A list of filter strategies. None <code>enabled</code> When set to false, the filter will be disabled and not applied <code>true</code> <code>ignored</code> A list of terms to be ignored by the filter. None"},{"location":"policies/filters/common_filters/bitcoin-addresses/#filter-strategies","title":"Filter Strategies","text":"<p>The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of <code>REDACT</code> is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.</p> Strategy Description <code>REDACT</code> Replace the sensitive text with a placeholder. <code>RANDOM_REPLACE</code> Replace the sensitive text with a similar, random value. <code>STATIC_REPLACE</code> Replace the sensitive text with a given value. <code>CRYPTO_REPLACE</code> Replace the sensitive text with its encrypted value. <code>HASH_SHA256_REPLACE</code> Replace the sensitive text with its SHA256 hash value. <code>FPE_ENCRYPT_REPLACE</code> Replace the sensitive text with a value generated by format-preserving encryption (FPE)"},{"location":"policies/filters/common_filters/bitcoin-addresses/#conditions","title":"Conditions","text":"<p>Each filter strategy may have one condition. See Conditions for details.</p> Conditional Description Operators <code>TOKEN</code> Compares the value of the sensitive text. <code>==</code> , <code>!=</code> <code>CONTEXT</code> Compares the filtering context. <code>==</code> , <code>!=</code> <code>CONFIDENCE</code> Compares the confidence in the sensitive text against a threshold value. <code>&lt;</code> , <code>&lt;=</code>, <code>&gt;</code> , <code>&gt;=</code>, <code>==</code>, <code>!=</code>"},{"location":"policies/filters/common_filters/bitcoin-addresses/#example-policy","title":"Example Policy","text":"<pre><code>{\n   \"name\": \"bitcoin-address-example\",\n   \"identifiers\": {\n      \"bitcoinAddress\": {\n         \"bitcoinAddressFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/filters/common_filters/creditcards/","title":"Credit Cards","text":""},{"location":"policies/filters/common_filters/creditcards/#filter","title":"Filter","text":"<p>This filter identifies credit cards such as <code>378282246310005</code> in text.</p>"},{"location":"policies/filters/common_filters/creditcards/#required-parameters","title":"Required Parameters","text":"<p>This filter has no required parameters.</p>"},{"location":"policies/filters/common_filters/creditcards/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value <code>creditCardFilterStrategies</code> A list of filter strategies. None <code>enabled</code> When set to false, the filter will be disabled and not applied <code>true</code> <code>ignored</code> A list of terms to be ignored by the filter. None <code>onlyValidCreditCardNumbers</code> When set to true, only valid credit card numbers will be filtered. <code>true</code> <code>ignoreWhenInUnixTimestamp</code> When set to true, only credit card numbers that do not match the pattern for a Unix timestamp will be filtered. <code>false</code>"},{"location":"policies/filters/common_filters/creditcards/#filter-strategies","title":"Filter Strategies","text":"<p>The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of <code>REDACT</code> is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.</p> Strategy Description <code>REDACT</code> Replace the sensitive text with a placeholder. <code>RANDOM_REPLACE</code> Replace the sensitive text with a similar, random value. <code>STATIC_REPLACE</code> Replace the sensitive text with a given value. <code>CRYPTO_REPLACE</code> Replace the sensitive text with its encrypted value. <code>HASH_SHA256_REPLACE</code> Replace the sensitive text with its SHA256 hash value. <code>FPE_ENCRYPT_REPLACE</code> Replace the sensitive text with a value generated by format-preserving encryption (FPE) <code>LAST_4</code> Replace the sensitive text with just the last four characters of the text."},{"location":"policies/filters/common_filters/creditcards/#conditions","title":"Conditions","text":"<p>Each filter strategy may have one condition. See Conditions for details.</p> Conditional Description Operators <code>TOKEN</code> Compares the value of the sensitive text. <code>==</code> , <code>!=</code> <code>CONTEXT</code> Compares the filtering context. <code>==</code> , <code>!=</code> <code>CONFIDENCE</code> Compares the confidence in the sensitive text against a threshold value. <code>&lt;</code> , <code>&lt;=</code>, <code>&gt;</code> , <code>&gt;=</code>, <code>==</code>, <code>!=</code>"},{"location":"policies/filters/common_filters/creditcards/#example-policy","title":"Example Policy","text":"<pre><code>{\n   \"name\": \"credit-cards-example\",\n   \"identifiers\": {\n      \"creditcard\": {\n         \"onlyValidCreditCardNumbers\": false,\n         \"creditCardFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/filters/common_filters/dates/","title":"Dates","text":""},{"location":"policies/filters/common_filters/dates/#filter","title":"Filter","text":"<p>This filter identifies dates such as <code>May 22, 2014</code> in text. The supported date formats are:</p> Format Example yyyy-MM-d 2020-05-10 MM-dd-yyyy 05-10-2020 M-d-y 5-10-2020 MMM dd May 5 or May 05 MMMM dd, yyyy May 5, 2020 or May 5 2020"},{"location":"policies/filters/common_filters/dates/#required-parameters","title":"Required Parameters","text":"<p>This filter has no required parameters.</p>"},{"location":"policies/filters/common_filters/dates/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value <code>dateFilterStrategies</code> A list of filter strategies. None <code>enabled</code> When set to false, the filter will be disabled and not applied <code>true</code> <code>ignored</code> A list of terms to be ignored by the filter. None <code>onlyValidDates</code> When set to true, only valid dates will be filtered. <code>false</code>"},{"location":"policies/filters/common_filters/dates/#filter-strategies","title":"Filter Strategies","text":"<p>The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of <code>REDACT</code> is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.</p> Strategy Description <code>REDACT</code> Replace the sensitive text with a placeholder. <code>RANDOM_REPLACE</code> Replace the sensitive text with a similar, random value. <code>STATIC_REPLACE</code> Replace the sensitive text with a given value. <code>CRYPTO_REPLACE</code> Replace the sensitive text with its encrypted value. <code>HASH_SHA256_REPLACE</code> Replace the sensitive text with its SHA256 hash value. <code>SHIFT</code> Shift the date by a number of months, days, and/or years. <code>SHIFTRANDOM</code> Shift the data by a random number of months, days, and years. <code>RELATIVE</code> Replace the date by a words relative to the date."},{"location":"policies/filters/common_filters/dates/#filter-strategy-options","title":"Filter Strategy Options","text":"<p>The following filter strategy options are available for the <code>RELATIVE</code> filter strategy.</p> Description Default Value <code>futureDates</code> When <code>true</code>, future dates are replaced by relative words. When <code>false</code>, future dates are redacted. <code>false</code> <p>The following filter strategy options are available for the <code>SHIFT</code> filter strategy.</p> Option Description Default Value <code>shiftDays</code> The number of days to shift the date. Can be a negative or positive integer. Defaults to <code>0</code> if not specified. <code>0</code> <code>shiftMinutes</code> The number of minutes to shift the date. Can be a negative or positive integer. Defaults to <code>0</code> if not specified. <code>0</code> <code>shiftYears</code> The number of years to shift the date. Can be a negative or positive integer. Defaults to <code>0</code> if not specified. <code>0</code>"},{"location":"policies/filters/common_filters/dates/#conditions","title":"Conditions","text":"<p>Each filter strategy may have one condition. See Conditions for details.</p> Conditional Description Operators <code>TOKEN</code> Compares the value of the sensitive text. <code>==</code> , <code>!=</code> <code>TOKEN</code> Compares the sensitive text to some category, e.g. <code>birthdate</code>. <code>is</code> <code>CONTEXT</code> Compares the filtering context. <code>==</code> , <code>!=</code> <code>CONFIDENCE</code> Compares the confidence in the sensitive text against a threshold value. <code>&lt;</code> , <code>&lt;=</code>, <code>&gt;</code> , <code>&gt;=</code>, <code>==</code>, <code>!=</code>"},{"location":"policies/filters/common_filters/dates/#differentiating-between-dates-and-birth-dates","title":"Differentiating Between Dates and Birth Dates","text":"<p>In some cases it may be necessary to redact birth dates and dates differently. Using conditions it is possible to determine if an identified date is a birth date. The conditional <code>token is birthdate</code> will determine if the identified date (token) is a birth date by analyzing the content surrounding the date.</p>"},{"location":"policies/filters/common_filters/dates/#example-policy-to-redact-dates","title":"Example Policy to Redact Dates","text":"<p>The following policy redacts dates.</p> <pre><code>{\n   \"name\": \"dates-example\",\n   \"identifiers\": {\n      \"date\": {\n         \"onlyValidDates\": false,\n         \"dateFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/filters/common_filters/dates/#example-policy-to-shift-dates","title":"Example Policy to Shift Dates","text":"<p>The following policy to shift dates forward by 2 days and 4 months.</p> <pre><code>{\n   \"name\": \"dates-example\",\n   \"identifiers\": {\n      \"date\": {\n         \"onlyValidDates\": false,\n         \"dateFilterStrategies\": [\n            {\n               \"strategy\": \"SHIFT\",\n               \"shiftDays\": 2,\n               \"shiftMonths\": 4,\n               \"shiftYears\": 0\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/filters/common_filters/drivers-license-numbers/","title":"Driver's License Numbers","text":""},{"location":"policies/filters/common_filters/drivers-license-numbers/#filter","title":"Filter","text":"<p>This filter identifies driver's license numbers such as 194784357 in text. Driver's license number formats for all 50 US states are supported.</p>"},{"location":"policies/filters/common_filters/drivers-license-numbers/#required-parameters","title":"Required Parameters","text":"<p>This filter has no required parameters.</p>"},{"location":"policies/filters/common_filters/drivers-license-numbers/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value <code>driversLicenseFilterStrategies</code> A list of filter strategies. None <code>enabled</code> When set to false, the filter will be disabled and not applied <code>true</code> <code>ignored</code> A list of terms to be ignored by the filter. None"},{"location":"policies/filters/common_filters/drivers-license-numbers/#filter-strategies","title":"Filter Strategies","text":"<p>The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of <code>REDACT</code> is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.</p> Strategy Description <code>REDACT</code> Replace the sensitive text with a placeholder. <code>RANDOM_REPLACE</code> Replace the sensitive text with a similar, random value. <code>STATIC_REPLACE</code> Replace the sensitive text with a given value. <code>CRYPTO_REPLACE</code> Replace the sensitive text with its encrypted value. <code>HASH_SHA256_REPLACE</code> Replace the sensitive text with its SHA256 hash value. <code>FPE_ENCRYPT_REPLACE</code> Replace the sensitive text with a value generated by format-preserving encryption (FPE)"},{"location":"policies/filters/common_filters/drivers-license-numbers/#conditions","title":"Conditions","text":"<p>Each filter strategy may have one condition. See Conditions for details.</p> Conditional Description Operators <code>TOKEN</code> Compares the value of the sensitive text. <code>==</code> , <code>!=</code> <code>CONTEXT</code> Compares the filtering context. <code>==</code> , <code>!=</code> <code>CONFIDENCE</code> Compares the confidence in the sensitive text against a threshold value. <code>&lt;</code> , <code>&lt;=</code>, <code>&gt;</code> , <code>&gt;=</code>, <code>==</code>, <code>!=</code>"},{"location":"policies/filters/common_filters/drivers-license-numbers/#example-policy","title":"Example Policy","text":"<pre><code>{\n   \"name\": \"drivers-license-example\",\n   \"identifiers\": {\n      \"driversLicense\": {\n         \"driversLicenseFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/filters/common_filters/email-addresses/","title":"Email Addresses","text":""},{"location":"policies/filters/common_filters/email-addresses/#filter","title":"Filter","text":"<p>This filter identifies email addresses such as <code>john.fake.address@hotmail.com</code> in text.</p>"},{"location":"policies/filters/common_filters/email-addresses/#required-parameters","title":"Required Parameters","text":"<p>This filter has no required parameters.</p>"},{"location":"policies/filters/common_filters/email-addresses/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value <code>emailAddressFilterStrategies</code> A list of filter strategies. None <code>enabled</code> When set to false, the filter will be disabled and not applied <code>true</code> <code>ignored</code> A list of terms to be ignored by the filter. None <code>onlyStrictMatches</code> When set to false, the pattern for identifying email addresses will be relaxed. Filtered email addresses will have a lower confidence, but filter performance will increase. <code>true</code> <code>onlyValidTLDs</code> When set to true, only email addresses that are for a top-level domain are filtered. <code>false</code>"},{"location":"policies/filters/common_filters/email-addresses/#filter-strategies","title":"Filter Strategies","text":"<p>The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of <code>REDACT</code> is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.</p> Strategy Description <code>REDACT</code> Replace the sensitive text with a placeholder. <code>RANDOM_REPLACE</code> Replace the sensitive text with a similar, random value. <code>STATIC_REPLACE</code> Replace the sensitive text with a given value. <code>CRYPTO_REPLACE</code> Replace the sensitive text with its encrypted value. <code>HASH_SHA256_REPLACE</code> Replace the sensitive text with its SHA256 hash value."},{"location":"policies/filters/common_filters/email-addresses/#conditions","title":"Conditions","text":"<p>Each filter strategy may have one condition. See Conditions for details.</p> Conditional Description Operators <code>TOKEN</code> Compares the value of the sensitive text. <code>==</code> , <code>!=</code> <code>CONTEXT</code> Compares the filtering context. <code>==</code> , <code>!=</code> <code>CONFIDENCE</code> Compares the confidence in the sensitive text against a threshold value. <code>&lt;</code> , <code>&lt;=</code>, <code>&gt;</code> , <code>&gt;=</code>, <code>==</code>, <code>!=</code>"},{"location":"policies/filters/common_filters/email-addresses/#example-policy","title":"Example Policy","text":"<pre><code>{\n   \"name\": \"email-address-example\",\n   \"identifiers\": {\n      \"emailAddress\": {\n         \"emailAddressFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/filters/common_filters/iban-codes/","title":"IBAN Codes","text":""},{"location":"policies/filters/common_filters/iban-codes/#filter","title":"Filter","text":"<p>This filter identifies IBAN (international banking account numbers) Codes such as <code>HU4211773016111110180000000</code> in text. Driver's license number formats for all 50 US states are supported.</p>"},{"location":"policies/filters/common_filters/iban-codes/#required-parameters","title":"Required Parameters","text":"<p>This filter has no required parameters.</p>"},{"location":"policies/filters/common_filters/iban-codes/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value <code>allowSpaces</code> When <code>true</code>, IBAN codes will be allowed to contain spaces and grouped in sections of 4. Set to <code>false</code> to disallow spaces in IBAN codes. <code>true</code> <code>ibanCodeFilterStrategies</code> A list of filter strategies. None <code>enabled</code> When set to false, the filter will be disabled and not applied <code>true</code> <code>ignored</code> A list of terms to be ignored by the filter. None <code>onlyValidIBANCodes</code> When set to true, only valid IBAN codes will be filtered. <code>true</code>"},{"location":"policies/filters/common_filters/iban-codes/#filter-strategies","title":"Filter Strategies","text":"<p>The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of <code>REDACT</code> is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.</p> Strategy Description <code>REDACT</code> Replace the sensitive text with a placeholder. <code>RANDOM_REPLACE</code> Replace the sensitive text with a similar, random value. <code>STATIC_REPLACE</code> Replace the sensitive text with a given value. <code>CRYPTO_REPLACE</code> Replace the sensitive text with its encrypted value. <code>HASH_SHA256_REPLACE</code> Replace the sensitive text with its SHA256 hash value. <code>FPE_ENCRYPT_REPLACE</code> Replace the sensitive text with a value generated by format-preserving encryption (FPE) <code>LAST_4</code> Replace the sensitive text with just the last four characters of the text."},{"location":"policies/filters/common_filters/iban-codes/#conditions","title":"Conditions","text":"<p>Each filter strategy may have one condition. See Conditions for details.</p> Conditional Description Operators <code>TOKEN</code> Compares the value of the sensitive text. <code>==</code> , <code>!=</code> <code>CONTEXT</code> Compares the filtering context. <code>==</code> , <code>!=</code> <code>CONFIDENCE</code> Compares the confidence in the sensitive text against a threshold value. <code>&lt;</code> , <code>&lt;=</code>, <code>&gt;</code> , <code>&gt;=</code>, <code>==</code>, <code>!=</code>"},{"location":"policies/filters/common_filters/iban-codes/#example-policy","title":"Example Policy","text":"<pre><code>{\n   \"name\": \"iban-example\",\n   \"identifiers\": {\n      \"ibanCode\": {\n         \"onlyValidIBANCodes\": false,\n         \"ibanCodeFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/filters/common_filters/ip-addresses/","title":"IP Addresses","text":""},{"location":"policies/filters/common_filters/ip-addresses/#filter","title":"Filter","text":"<p>This filter identifies IPv4 and IPv6 addresses <code>127.0.0.1</code>, <code>192.168.3.58</code>, and <code>2001:0db8:85a3:0000:0000:8a2e:0370:7334</code> in text.</p>"},{"location":"policies/filters/common_filters/ip-addresses/#required-parameters","title":"Required Parameters","text":"<p>This filter has no required parameters.</p>"},{"location":"policies/filters/common_filters/ip-addresses/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value <code>ipAddressFilterStrategies</code> A list of filter strategies. None <code>enabled</code> When set to false, the filter will be disabled and not applied <code>true</code> <code>ignored</code> A list of terms to be ignored by the filter. None"},{"location":"policies/filters/common_filters/ip-addresses/#filter-strategies","title":"Filter Strategies","text":"<p>The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of <code>REDACT</code> is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.</p> Strategy Description <code>REDACT</code> Replace the sensitive text with a placeholder. <code>RANDOM_REPLACE</code> Replace the sensitive text with a similar, random value. <code>STATIC_REPLACE</code> Replace the sensitive text with a given value. <code>CRYPTO_REPLACE</code> Replace the sensitive text with its encrypted value. <code>HASH_SHA256_REPLACE</code> Replace the sensitive text with its SHA256 hash value."},{"location":"policies/filters/common_filters/ip-addresses/#conditions","title":"Conditions","text":"<p>Each filter strategy may have one condition. See Conditions for details.</p> Conditional Description Operators <code>TOKEN</code> Compares the value of the sensitive text. <code>==</code> , <code>!=</code> <code>CONTEXT</code> Compares the filtering context. <code>==</code> , <code>!=</code> <code>CONFIDENCE</code> Compares the confidence in the sensitive text against a threshold value. <code>&lt;</code> , <code>&lt;=</code>, <code>&gt;</code> , <code>&gt;=</code>, <code>==</code>, <code>!=</code>"},{"location":"policies/filters/common_filters/ip-addresses/#example-policy","title":"Example Policy","text":"<pre><code>{\n   \"name\": \"ip-address-example\",\n   \"identifiers\": {\n      \"ipAddress\": {\n         \"ipAddressFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/filters/common_filters/mac-addresses/","title":"MAC Addresses","text":""},{"location":"policies/filters/common_filters/mac-addresses/#filter","title":"Filter","text":"<p>This filter identifies MAC addresses in text.</p>"},{"location":"policies/filters/common_filters/mac-addresses/#required-parameters","title":"Required Parameters","text":"<p>This filter has no required parameters.</p>"},{"location":"policies/filters/common_filters/mac-addresses/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value <code>macAddressFilterStrategies</code> A list of filter strategies. None <code>enabled</code> When set to false, the filter will be disabled and not applied <code>true</code> <code>ignored</code> A list of terms to be ignored by the filter. None"},{"location":"policies/filters/common_filters/mac-addresses/#filter-strategies","title":"Filter Strategies","text":"<p>The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of <code>REDACT</code> is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.</p> Strategy Description <code>REDACT</code> Replace the sensitive text with a placeholder. <code>RANDOM_REPLACE</code> Replace the sensitive text with a similar, random value. <code>STATIC_REPLACE</code> Replace the sensitive text with a given value. <code>CRYPTO_REPLACE</code> Replace the sensitive text with its encrypted value. <code>HASH_SHA256_REPLACE</code> Replace the sensitive text with its SHA256 hash value."},{"location":"policies/filters/common_filters/mac-addresses/#conditions","title":"Conditions","text":"<p>Each filter strategy may have one condition. See Conditions for details.</p> Conditional Description Operators <code>TOKEN</code> Compares the value of the sensitive text. <code>==</code> , <code>!=</code> <code>CONTEXT</code> Compares the filtering context. <code>==</code> , <code>!=</code> <code>CONFIDENCE</code> Compares the confidence in the sensitive text against a threshold value. <code>&lt;</code> , <code>&lt;=</code>, <code>&gt;</code> , <code>&gt;=</code>, <code>==</code>, <code>!=</code>"},{"location":"policies/filters/common_filters/mac-addresses/#example-policy","title":"Example Policy","text":"<pre><code>{\n   \"name\": \"mac-address-example\",\n   \"identifiers\": {\n      \"macAddress\": {\n         \"macAddressFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/filters/common_filters/passport-numbers/","title":"Passport Numbers","text":""},{"location":"policies/filters/common_filters/passport-numbers/#filter","title":"Filter","text":"<p>This filter identifies US passport numbers in text.</p>"},{"location":"policies/filters/common_filters/passport-numbers/#required-parameters","title":"Required Parameters","text":"<p>This filter has no required parameters.</p>"},{"location":"policies/filters/common_filters/passport-numbers/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value <code>passportNumberFilterStrategies</code> A list of filter strategies. None <code>enabled</code> When set to false, the filter will be disabled and not applied <code>true</code> <code>ignored</code> A list of terms to be ignored by the filter. None"},{"location":"policies/filters/common_filters/passport-numbers/#filter-strategies","title":"Filter Strategies","text":"<p>The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of <code>REDACT</code> is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.</p> Strategy Description <code>REDACT</code> Replace the sensitive text with a placeholder. <code>RANDOM_REPLACE</code> Replace the sensitive text with a similar, random value. <code>STATIC_REPLACE</code> Replace the sensitive text with a given value. <code>CRYPTO_REPLACE</code> Replace the sensitive text with its encrypted value. <code>HASH_SHA256_REPLACE</code> Replace the sensitive text with its SHA256 hash value. <code>FPE_ENCRYPT_REPLACE</code> Replace the sensitive text with a value generated by format-preserving encryption (FPE)"},{"location":"policies/filters/common_filters/passport-numbers/#conditions","title":"Conditions","text":"<p>Each filter strategy may have one condition. See Conditions for details.</p> Conditional Description Operators <code>TOKEN</code> Compares the value of the sensitive text. <code>==</code> , <code>!=</code> <code>CLASSIFICATION</code> Compares the issuing country of the passport number. <code>==</code> , <code>!=</code> <code>CONTEXT</code> Compares the filtering context. <code>==</code> , <code>!=</code> <code>CONFIDENCE</code> Compares the confidence in the sensitive text against a threshold value. <code>&lt;</code> , <code>&lt;=</code>, <code>&gt;</code> , <code>&gt;=</code>, <code>==</code>, <code>!=</code>"},{"location":"policies/filters/common_filters/passport-numbers/#example-policy","title":"Example Policy","text":"<pre><code>{\n   \"name\": \"passport-number-example\",\n   \"identifiers\": {\n      \"passportNumber\": {\n         \"passportNumberFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/filters/common_filters/phone-number-extensions/","title":"Phone Number Extensions","text":""},{"location":"policies/filters/common_filters/phone-number-extensions/#filter","title":"Filter","text":"<p>This filter identifies phone numbers extensions such as \"x100\" in text.</p>"},{"location":"policies/filters/common_filters/phone-number-extensions/#required-parameters","title":"Required Parameters","text":"<p>This filter has no required parameters.</p>"},{"location":"policies/filters/common_filters/phone-number-extensions/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value <code>phoneNumberExtensionFilterStrategies</code> A list of filter strategies. None <code>enabled</code> When set to false, the filter will be disabled and not applied <code>true</code> <code>ignored</code> A list of terms to be ignored by the filter. None"},{"location":"policies/filters/common_filters/phone-number-extensions/#filter-strategies","title":"Filter Strategies","text":"<p>The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of <code>REDACT</code> is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.</p> Strategy Description <code>REDACT</code> Replace the sensitive text with a placeholder. <code>RANDOM_REPLACE</code> Replace the sensitive text with a similar, random value. <code>STATIC_REPLACE</code> Replace the sensitive text with a given value. <code>CRYPTO_REPLACE</code> Replace the sensitive text with its encrypted value. <code>HASH_SHA256_REPLACE</code> Replace the sensitive text with its SHA256 hash value."},{"location":"policies/filters/common_filters/phone-number-extensions/#conditions","title":"Conditions","text":"<p>Each filter strategy may have one condition. See Conditions for details.</p> Conditional Description Operators <code>TOKEN</code> Compares the value of the sensitive text. <code>==</code> , <code>!=</code> <code>CONTEXT</code> Compares the filtering context. <code>==</code> , <code>!=</code> <code>CONFIDENCE</code> Compares the confidence in the sensitive text against a threshold value. <code>&lt;</code> , <code>&lt;=</code>, <code>&gt;</code> , <code>&gt;=</code>, <code>==</code>, <code>!=</code>"},{"location":"policies/filters/common_filters/phone-number-extensions/#example-policy","title":"Example Policy","text":"<pre><code>{\n   \"name\": \"phone-number-ext-example\",\n   \"identifiers\": {\n      \"phoneNumberExtension\": {\n         \"phoneNumberExtensionFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      } \n   }     \n}\n</code></pre>"},{"location":"policies/filters/common_filters/phone-numbers/","title":"Phone Numbers","text":""},{"location":"policies/filters/common_filters/phone-numbers/#filter","title":"Filter","text":"<p>This filter identifies phone and fax numbers such as (304) 555-5555, 304-555-5555, and 1-800-123-4567 in text.</p>"},{"location":"policies/filters/common_filters/phone-numbers/#required-parameters","title":"Required Parameters","text":"<p>This filter has no required parameters.</p>"},{"location":"policies/filters/common_filters/phone-numbers/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value <code>phoneNumberFilterStrategies</code> A list of filter strategies. None <code>enabled</code> When set to false, the filter will be disabled and not applied <code>true</code> <code>ignored</code> A list of terms to be ignored by the filter. None"},{"location":"policies/filters/common_filters/phone-numbers/#filter-strategies","title":"Filter Strategies","text":"<p>The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of <code>REDACT</code> is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.</p> Strategy Description <code>REDACT</code> Replace the sensitive text with a placeholder. <code>RANDOM_REPLACE</code> Replace the sensitive text with a similar, random value. <code>STATIC_REPLACE</code> Replace the sensitive text with a given value. <code>CRYPTO_REPLACE</code> Replace the sensitive text with its encrypted value. <code>HASH_SHA256_REPLACE</code> Replace the sensitive text with its SHA256 hash value."},{"location":"policies/filters/common_filters/phone-numbers/#conditions","title":"Conditions","text":"<p>Each filter strategy may have one condition. See Conditions for details.</p> Conditional Description Operators <code>TOKEN</code> Compares the value of the sensitive text. <code>==</code> , <code>!=</code> <code>CONTEXT</code> Compares the filtering context. <code>==</code> , <code>!=</code> <code>CONFIDENCE</code> Compares the confidence in the sensitive text against a threshold value. <code>&lt;</code> , <code>&lt;=</code>, <code>&gt;</code> , <code>&gt;=</code>, <code>==</code>, <code>!=</code>"},{"location":"policies/filters/common_filters/phone-numbers/#example-policy","title":"Example Policy","text":"<pre><code>{\n   \"name\": \"phone-number-example\",\n   \"identifiers\": {\n      \"phoneNumber\": {\n         \"phoneNumberFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }     \n}\n</code></pre>"},{"location":"policies/filters/common_filters/sections/","title":"Sections","text":""},{"location":"policies/filters/common_filters/sections/#filter","title":"Filter","text":"<p>This filter identifies sections in text between a given start regular expression pattern and a given end regular expression pattern.</p>"},{"location":"policies/filters/common_filters/sections/#required-parameters","title":"Required Parameters","text":"Parameter Description Default Value <code>startPattern</code> A regular expression denoting the start of the section. None <code>endPattern</code> A regular expression denoting the end of the section. None"},{"location":"policies/filters/common_filters/sections/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value <code>sectionFilterStrategies</code> A list of filter strategies. None <code>enabled</code> When set to false, the filter will be disabled and not applied <code>true</code> <code>ignored</code> A list of terms to be ignored by the filter. None"},{"location":"policies/filters/common_filters/sections/#filter-strategies","title":"Filter Strategies","text":"<p>The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of <code>REDACT</code> is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.</p> Strategy Description <code>REDACT</code> Replace the sensitive text with a placeholder. <code>RANDOM_REPLACE</code> Replace the sensitive text with a similar, random value. <code>STATIC_REPLACE</code> Replace the sensitive text with a given value. <code>CRYPTO_REPLACE</code> Replace the sensitive text with its encrypted value. <code>HASH_SHA256_REPLACE</code> Replace the sensitive text with its SHA256 hash value."},{"location":"policies/filters/common_filters/sections/#conditions","title":"Conditions","text":"<p>Each filter strategy may have one condition. See Conditions for details.</p> Conditional Description Operators <code>TOKEN</code> Compares the value of the sensitive text. <code>==</code> , <code>!=</code> <code>CONTEXT</code> Compares the filtering context. <code>==</code> , <code>!=</code> <code>CONFIDENCE</code> Compares the confidence in the sensitive text against a threshold value. <code>&lt;</code> , <code>&lt;=</code>, <code>&gt;</code> , <code>&gt;=</code>, <code>==</code>, <code>!=</code>"},{"location":"policies/filters/common_filters/sections/#example-policy","title":"Example Policy","text":"<pre><code>{\n   \"name\": \"sections-example\",\n   \"identifiers\": {\n      \"section\": {\n         \"startPattern\": \"START\",\n         \"endPattern\": \"END\",\n         \"sectionFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n}\n</code></pre>"},{"location":"policies/filters/common_filters/ssns-and-tins/","title":"SSNs and TINs","text":""},{"location":"policies/filters/common_filters/ssns-and-tins/#filter","title":"Filter","text":"<p>This filter identifies US SSNs and TINs such as <code>123-45-6789</code> and <code>123456789</code> in text.</p>"},{"location":"policies/filters/common_filters/ssns-and-tins/#required-parameters","title":"Required Parameters","text":"<p>This filter has no required parameters.</p>"},{"location":"policies/filters/common_filters/ssns-and-tins/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value <code>ssnFilterStrategies</code> A list of filter strategies. None <code>enabled</code> When set to false, the filter will be disabled and not applied <code>true</code> <code>ignored</code> A list of terms to be ignored by the filter. None"},{"location":"policies/filters/common_filters/ssns-and-tins/#filter-strategies","title":"Filter Strategies","text":"<p>The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of <code>REDACT</code> is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.</p> Strategy Description <code>REDACT</code> Replace the sensitive text with a placeholder. <code>RANDOM_REPLACE</code> Replace the sensitive text with a similar, random value. <code>STATIC_REPLACE</code> Replace the sensitive text with a given value. <code>CRYPTO_REPLACE</code> Replace the sensitive text with its encrypted value. <code>HASH_SHA256_REPLACE</code> Replace the sensitive text with its SHA256 hash value. <code>FPE_ENCRYPT_REPLACE</code> Replace the sensitive text with a value generated by format-preserving encryption (FPE) <code>LAST_4</code> Replace the sensitive text with just the last four characters of the text."},{"location":"policies/filters/common_filters/ssns-and-tins/#conditions","title":"Conditions","text":"<p>Each filter strategy may have one condition. See Conditions for details.</p> Conditional Description Operators <code>TOKEN</code> Compares the value of the sensitive text. <code>==</code> , <code>!=</code> <code>CONTEXT</code> Compares the filtering context. <code>==</code> , <code>!=</code> <code>CONFIDENCE</code> Compares the confidence in the sensitive text against a threshold value. <code>&lt;</code> , <code>&lt;=</code>, <code>&gt;</code> , <code>&gt;=</code>, <code>==</code>, <code>!=</code>"},{"location":"policies/filters/common_filters/ssns-and-tins/#example-policy","title":"Example Policy","text":"<pre><code>{\n   \"name\": \"ssn-tin-example\",\n   \"identifiers\": {\n      \"ssn\": {\n         \"ssnFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/filters/common_filters/tracking-numbers/","title":"Tracking Numbers","text":""},{"location":"policies/filters/common_filters/tracking-numbers/#filter","title":"Filter","text":"<p>This filter identifies tracking numbers in text. FedEx, UPS, and USPS tracking number formats are supported.</p>"},{"location":"policies/filters/common_filters/tracking-numbers/#required-parameters","title":"Required Parameters","text":"<p>This filter has no required parameters.</p>"},{"location":"policies/filters/common_filters/tracking-numbers/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value <code>trackingNumberFilterStrategies</code> A list of filter strategies. None <code>enabled</code> When set to false, the filter will be disabled and not applied <code>true</code> <code>ignored</code> A list of terms to be ignored by the filter. None"},{"location":"policies/filters/common_filters/tracking-numbers/#filter-strategies","title":"Filter Strategies","text":"<p>The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of <code>REDACT</code> is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.</p> Strategy Description <code>REDACT</code> Replace the sensitive text with a placeholder. <code>RANDOM_REPLACE</code> Replace the sensitive text with a similar, random value. <code>STATIC_REPLACE</code> Replace the sensitive text with a given value. <code>CRYPTO_REPLACE</code> Replace the sensitive text with its encrypted value. <code>HASH_SHA256_REPLACE</code> Replace the sensitive text with its SHA256 hash value. <code>FPE_ENCRYPT_REPLACE</code> Replace the sensitive text with a value generated by format-preserving encryption (FPE) <code>LAST_4</code> Replace the sensitive text with just the last four characters of the text."},{"location":"policies/filters/common_filters/tracking-numbers/#conditions","title":"Conditions","text":"<p>Each filter strategy may have one condition. See Conditions for details.</p> Conditional Description Operators <code>TOKEN</code> Compares the value of the sensitive text. <code>==</code> , <code>!=</code> <code>CONTEXT</code> Compares the filtering context. <code>==</code> , <code>!=</code> <code>CONFIDENCE</code> Compares the confidence in the sensitive text against a threshold value. <code>&lt;</code> , <code>&lt;=</code>, <code>&gt;</code> , <code>&gt;=</code>, <code>==</code>, <code>!=</code>"},{"location":"policies/filters/common_filters/tracking-numbers/#example-policy","title":"Example Policy","text":"<pre><code>{\n   \"name\": \"tracking-numbers-example\",\n   \"identifiers\": {\n      \"trackingNumber\": {\n         \"trackingNumberFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/filters/common_filters/urls/","title":"URLs","text":""},{"location":"policies/filters/common_filters/urls/#filter","title":"Filter","text":"<p>This filter identifies URLs such as <code>myhomepage.com</code>, <code>http://myhomepage.com/folder/page.html</code>, and <code>www.myhomepage.com/folder/page.html</code> in text.</p>"},{"location":"policies/filters/common_filters/urls/#required-parameters","title":"Required Parameters","text":"<p>This filter has no required parameters.</p>"},{"location":"policies/filters/common_filters/urls/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value <code>urlFilterStrategies</code> A list of filter strategies. None <code>enabled</code> When set to false, the filter will be disabled and not applied <code>true</code> <code>ignored</code> A list of terms to be ignored by the filter. None <code>requireHttpWwwPrefix</code> When set to true, only URLs that begin with <code>http</code> or <code>www</code> will be filtered. <code>true</code>"},{"location":"policies/filters/common_filters/urls/#filter-strategies","title":"Filter Strategies","text":"<p>The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of <code>REDACT</code> is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.</p> Strategy Description <code>REDACT</code> Replace the sensitive text with a placeholder. <code>RANDOM_REPLACE</code> Replace the sensitive text with a similar, random value. <code>STATIC_REPLACE</code> Replace the sensitive text with a given value. <code>CRYPTO_REPLACE</code> Replace the sensitive text with its encrypted value. <code>HASH_SHA256_REPLACE</code> Replace the sensitive text with its SHA256 hash value."},{"location":"policies/filters/common_filters/urls/#conditions","title":"Conditions","text":"<p>Each filter strategy may have one condition. See Conditions for details.</p> Conditional Description Operators <code>TOKEN</code> Compares the value of the sensitive text. <code>==</code> , <code>!=</code> <code>CONTEXT</code> Compares the filtering context. <code>==</code> , <code>!=</code> <code>CONFIDENCE</code> Compares the confidence in the sensitive text against a threshold value. <code>&lt;</code> , <code>&lt;=</code>, <code>&gt;</code> , <code>&gt;=</code>, <code>==</code>, <code>!=</code>"},{"location":"policies/filters/common_filters/urls/#example-policy","title":"Example Policy","text":"<pre><code>{\n   \"name\": \"urls-example\",\n   \"identifiers\": {\n      \"url\": {\n         \"requireHttpWwwPrefix\": true,\n         \"urlFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/filters/common_filters/vins/","title":"VINs","text":""},{"location":"policies/filters/common_filters/vins/#filter","title":"Filter","text":"<p>This filter identifies 17-digit vehicle identification numbers (VINs) such as <code>WBAPM7G50ANL19218</code> and <code>1GBJC34K3RE176005</code> in text.</p>"},{"location":"policies/filters/common_filters/vins/#required-parameters","title":"Required Parameters","text":"<p>This filter has no required parameters.</p>"},{"location":"policies/filters/common_filters/vins/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value <code>vinFilterStrategies</code> A list of filter strategies. None <code>enabled</code> When set to false, the filter will be disabled and not applied <code>true</code> <code>ignored</code> A list of terms to be ignored by the filter. None"},{"location":"policies/filters/common_filters/vins/#filter-strategies","title":"Filter Strategies","text":"<p>The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of <code>REDACT</code> is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.</p> Strategy Description <code>REDACT</code> Replace the sensitive text with a placeholder. <code>RANDOM_REPLACE</code> Replace the sensitive text with a similar, random value. <code>STATIC_REPLACE</code> Replace the sensitive text with a given value. <code>CRYPTO_REPLACE</code> Replace the sensitive text with its encrypted value. <code>HASH_SHA256_REPLACE</code> Replace the sensitive text with its SHA256 hash value. <code>FPE_ENCRYPT_REPLACE</code> Replace the sensitive text with a value generated by format-preserving encryption (FPE) <code>LAST_4</code> Replace the sensitive text with just the last four characters of the text."},{"location":"policies/filters/common_filters/vins/#conditions","title":"Conditions","text":"<p>Each filter strategy may have one condition. See Conditions for details.</p> Conditional Description Operators <code>TOKEN</code> Compares the value of the sensitive text. <code>==</code> , <code>!=</code> <code>CONTEXT</code> Compares the filtering context. <code>==</code> , <code>!=</code> <code>CONFIDENCE</code> Compares the confidence in the sensitive text against a threshold value. <code>&lt;</code> , <code>&lt;=</code>, <code>&gt;</code> , <code>&gt;=</code>, <code>==</code>, <code>!=</code>"},{"location":"policies/filters/common_filters/vins/#example-policy","title":"Example Policy","text":"<pre><code>{\n   \"name\": \"vins-example\",\n   \"identifiers\": {\n      \"vin\": {\n         \"vinFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/filters/common_filters/zip-codes/","title":"Zip Codes","text":""},{"location":"policies/filters/common_filters/zip-codes/#filter","title":"Filter","text":"<p>This filter identifies zip codes in text.</p>"},{"location":"policies/filters/common_filters/zip-codes/#required-parameters","title":"Required Parameters","text":"<p>This filter has no required parameters.</p>"},{"location":"policies/filters/common_filters/zip-codes/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value <code>zipCodeFilterStrategies</code> A list of filter strategies. None <code>enabled</code> When set to false, the filter will be disabled and not applied <code>true</code> <code>ignored</code> A list of terms to be ignored by the filter. None <code>requireDelimiter</code> When set to false, the filter will not require a dash in 9 digit zip codes, e.g. 12345-6789. Setting to false may increase the number of zip code false positives. <code>true</code>"},{"location":"policies/filters/common_filters/zip-codes/#filter-strategies","title":"Filter Strategies","text":"<p>The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of <code>REDACT</code> is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.</p> Strategy Description <code>REDACT</code> Replace the sensitive text with a placeholder. <code>RANDOM_REPLACE</code> Replace the sensitive text with a similar, random value. <code>STATIC_REPLACE</code> Replace the sensitive text with a given value. <code>CRYPTO_REPLACE</code> Replace the sensitive text with its encrypted value. <code>HASH_SHA256_REPLACE</code> Replace the sensitive text with its SHA256 hash value. <code>TRUNCATE</code> Replace the sensitive text by removing the last <code>x</code> digits. (Set the number of digits using the <code>truncateDigits</code> parameter of the filter strategy.) <code>ZERO_LEADING</code> Replace the sensitive text by zeroing the first 3 digits."},{"location":"policies/filters/common_filters/zip-codes/#conditions","title":"Conditions","text":"<p>Each filter strategy may have one condition. See Conditions for details.</p> Conditional Description Operators <code>TOKEN</code> Compares the value of the sensitive text. <code>==</code> , <code>!=</code> <code>CONTEXT</code> Compares the filtering context. <code>==</code> , <code>!=</code> <code>CONFIDENCE</code> Compares the confidence in the sensitive text against a threshold value. <code>&lt;</code> , <code>&lt;=</code>, <code>&gt;</code> , <code>&gt;=</code>, <code>==</code>, <code>!=</code> <code>POPULATION</code> Compares the population of the zip code against the 2010 census values. <code>&lt;</code> , <code>&lt;=</code>, <code>&gt;</code> , <code>&gt;=</code>, <code>==</code>, <code>!=</code>"},{"location":"policies/filters/common_filters/zip-codes/#example-policy","title":"Example Policy","text":"<pre><code>{\n   \"name\": \"zip-code-example\",\n   \"identifiers\": {\n      \"zipCode\": {\n         \"zipCodeFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/filters/custom_filters/dictionary/","title":"Dictionary","text":""},{"location":"policies/filters/custom_filters/dictionary/#filter","title":"Filter","text":"<p>This filter identifies custom text based on a given dictionary.</p>"},{"location":"policies/filters/custom_filters/dictionary/#required-parameters","title":"Required Parameters","text":"<p>At least one of <code>terms</code> or <code>files</code> must be provided.</p> Parameter Description Default Value <code>terms</code> A list of terms in the dictionary. None <code>files</code> A list of files containing terms one per line. None"},{"location":"policies/filters/custom_filters/dictionary/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value <code>enabled</code> When set to false, the filter will be disabled and not applied <code>true</code> <code>ignored</code> A list of terms to be ignored by the filter. None <code>fuzzy</code> When set to true, the dictionary will employ fuzzy comparisons. Use the <code>sensitivity</code> parameter to control the level of fuzziness. Setting this value to false will disable fuzziness and provide a higher level of performance. <code>false</code> <code>classification</code> Used to apply an arbitrary label to the identifier, such as \"patient-id\", or \"account-number.\" <code>\"custom-identifier\"</code> <code>sensitivity</code> Controls the \"fuzziness\" of allowed values to account for misspellings and derivations. Valid values are <code>low</code>, <code>medium</code>, and <code>high</code>. Only applies when <code>fuzzy</code> is set to <code>true</code>. <code>medium</code>"},{"location":"policies/filters/custom_filters/dictionary/#filter-strategies","title":"Filter Strategies","text":"<p>The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of <code>REDACT</code> is used. When multiple filter strategies are given the filter strategies will be applied in as they are listed. See Filter Strategies for details.</p> Strategy Description <code>REDACT</code> Replace the sensitive text with a placeholder. <code>RANDOM_REPLACE</code> Replace the sensitive text with a similar, random value. <code>STATIC_REPLACE</code> Replace the sensitive text with a given value. <code>CRYPTO_REPLACE</code> Replace the sensitive text with its encrypted value. <code>HASH_SHA256_REPLACE</code> Replace the sensitive text with its SHA256 hash value."},{"location":"policies/filters/custom_filters/dictionary/#conditions","title":"Conditions","text":"<p>Each filter strategy may have one condition. See Conditions for details.</p> Conditional Description Operators <code>TOKEN</code> Compares the value of the sensitive text. <code>==</code> , <code>!=</code> <code>CONTEXT</code> Compares the filtering context. <code>==</code> , <code>!=</code> <code>CONFIDENCE</code> Compares the confidence in the sensitive text against a threshold value. <code>&lt;</code> , <code>&lt;=</code>, <code>&gt;</code> , <code>&gt;=</code>, <code>==</code>, <code>!=</code>"},{"location":"policies/filters/custom_filters/dictionary/#example-policy","title":"Example Policy","text":"<pre><code>{\n   \"name\": \"dictionary-example\",\n   \"identifiers\": {\n      \"dictionaries\": [\n         \"customDictionary\": {\n            \"terms\": [\"john\", \"jane\", \"doe\"],\n            \"files\": \"c:\\temp\\dictionary.txt\",\n            \"fuzzy\": true,\n            \"sensitivity\": \"medium\",\n            \"sectionFilterStrategies\": [\n               {\n                  \"strategy\": \"REDACT\",\n                  \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n               }\n            ]\n         }\n      ]\n   }   \n}\n</code></pre>"},{"location":"policies/filters/custom_filters/identifier/","title":"Identifier","text":""},{"location":"policies/filters/custom_filters/identifier/#filter","title":"Filter","text":"<p>This filter identifies custom text based on a given regular expression. </p> <p>The Identifier filter accepts a list of regular expression-based identifiers. See the policy at the bottom of this page for an example. </p> <p>Note that backslashes in the regular expression will need to be escaped for the policy to be valid JSON.</p>"},{"location":"policies/filters/custom_filters/identifier/#required-parameters","title":"Required Parameters","text":"<p>This filter has no required parameters.</p>"},{"location":"policies/filters/custom_filters/identifier/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value <code>enabled</code> When set to false, the filter will be disabled and not applied <code>true</code> <code>ignored</code> A list of terms to be ignored by the filter. None <code>caseSensitive</code> When set to true, the regular expression will be case sensitive. <code>true</code> <code>classification</code> Used to apply an arbitrary label to the identifier, such as \"patient-id\", or \"account-number.\" <code>\"custom-identifier\"</code> <code>pattern</code> A regular expression for the identifier. Note that backslashes will need to be escaped. <code>\\b[A-Z0-9_-]{4,}\\b</code>"},{"location":"policies/filters/custom_filters/identifier/#filter-strategies","title":"Filter Strategies","text":"<p>The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of <code>REDACT</code> is used. When multiple filter strategies are given the filter strategies will be applied in as they are listed. See Filter Strategies for details.</p> Strategy Description <code>REDACT</code> Replace the sensitive text with a placeholder. <code>RANDOM_REPLACE</code> Replace the sensitive text with a similar, random value. <code>STATIC_REPLACE</code> Replace the sensitive text with a given value. <code>CRYPTO_REPLACE</code> Replace the sensitive text with its encrypted value. <code>HASH_SHA256_REPLACE</code> Replace the sensitive text with its SHA256 hash value. <code>LAST_4</code> Replace the sensitive text with just the last four characters of the text."},{"location":"policies/filters/custom_filters/identifier/#conditions","title":"Conditions","text":"<p>Each filter strategy may have one condition. See Conditions for details.</p> Conditional Description Operators <code>TOKEN</code> Compares the value of the sensitive text. <code>==</code> , <code>!=</code> <code>CONTEXT</code> Compares the filtering context. <code>==</code> , <code>!=</code> <code>CONFIDENCE</code> Compares the confidence in the sensitive text against a threshold value. <code>&lt;</code> , <code>&lt;=</code>, <code>&gt;</code> , <code>&gt;=</code>, <code>==</code>, <code>!=</code> <code>CLASSIFICATION</code> Compares the classification of the sensitive text. <code>==</code> , <code>!=</code>"},{"location":"policies/filters/custom_filters/identifier/#example-policy","title":"Example Policy","text":"<pre><code>{\n  \"name\": \"default\",\n  \"identifiers\": {\n    \"identifiers\": [\n      {\n        \"pattern\": \"[A-Z]{9}\",\n        \"caseSensitive\": false,\n        \"classification\": \"custom-identifier\",\n        \"enabled\": true,\n        \"identifierFilterStrategies\": [\n          {\n            \"strategy\": \"REDACT\",\n            \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n          }\n        ]        \n      }\n    ]\n  }\n}\n</code></pre>"},{"location":"policies/filters/locations/cities/","title":"Cities","text":""},{"location":"policies/filters/locations/cities/#filter","title":"Filter","text":"<p>This filter identifies common US cities as determined by the US census in text.</p>"},{"location":"policies/filters/locations/cities/#required-parameters","title":"Required Parameters","text":"<p>This filter has no required parameters.</p>"},{"location":"policies/filters/locations/cities/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value <code>cityFilterStrategies</code> A list of filter strategies. None <code>sensitivity</code> Controls the \"fuzziness\" of allowed values to account for misspellings and derivations. Valid values are <code>low</code>, <code>medium</code>, and <code>high</code>. <code>medium</code>"},{"location":"policies/filters/locations/cities/#filter-strategies","title":"Filter Strategies","text":"<p>The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of <code>REDACT</code> is used. When multiple filter strategies are given the filter strategies will be applied in as they are listed. See Filter Strategies for details.</p> Strategy Description <code>REDACT</code> Replace the sensitive text with a placeholder. <code>RANDOM_REPLACE</code> Replace the sensitive text with a similar, random value. <code>STATIC_REPLACE</code> Replace the sensitive text with a given value. <code>CRYPTO_REPLACE</code> Replace the sensitive text with its encrypted value. <code>HASH_SHA256_REPLACE</code> Replace the sensitive text with its SHA256 hash value."},{"location":"policies/filters/locations/cities/#conditions","title":"Conditions","text":"<p>Each filter strategy may have one condition. See Conditions for details.</p> Conditional Description Operators <code>TOKEN</code> Compares the value of the sensitive text. <code>==</code> , <code>!=</code> <code>CONTEXT</code> Compares the filtering context. <code>==</code> , <code>!=</code> <code>CONFIDENCE</code> Compares the confidence in the sensitive text against a threshold value. <code>&lt;</code> , <code>&lt;=</code>, <code>&gt;</code> , <code>&gt;=</code>, <code>==</code>, <code>!=</code>"},{"location":"policies/filters/locations/cities/#example-policy","title":"Example Policy","text":"<pre><code>{\n   \"name\": \"cities-example\",\n   \"identifiers\": {\n      \"city\": {\n         \"sensitivity\": \"medium\",\n         \"cityFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/filters/locations/counties/","title":"Counties","text":""},{"location":"policies/filters/locations/counties/#filter","title":"Filter","text":"<p>This filter identifies common US counties as determined by the US census in text.</p>"},{"location":"policies/filters/locations/counties/#required-parameters","title":"Required Parameters","text":"<p>This filter has no required parameters.</p>"},{"location":"policies/filters/locations/counties/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value <code>countyFilterStrategies</code> A list of filter strategies. None <code>sensitivity</code> Controls the \"fuzziness\" of allowed values to account for misspellings and derivations. Valid values are <code>low</code>, <code>medium</code>, and <code>high</code>. <code>medium</code>"},{"location":"policies/filters/locations/counties/#filter-strategies","title":"Filter Strategies","text":"<p>The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of <code>REDACT</code> is used. When multiple filter strategies are given the filter strategies will be applied in as they are listed. See Filter Strategies for details.</p> Strategy Description <code>REDACT</code> Replace the sensitive text with a placeholder. <code>RANDOM_REPLACE</code> Replace the sensitive text with a similar, random value. <code>STATIC_REPLACE</code> Replace the sensitive text with a given value. <code>CRYPTO_REPLACE</code> Replace the sensitive text with its encrypted value. <code>HASH_SHA256_REPLACE</code> Replace the sensitive text with its SHA256 hash value."},{"location":"policies/filters/locations/counties/#conditions","title":"Conditions","text":"<p>Each filter strategy may have one condition. See Conditions for details.</p> Conditional Description Operators <code>TOKEN</code> Compares the value of the sensitive text. <code>==</code> , <code>!=</code> <code>CONTEXT</code> Compares the filtering context. <code>==</code> , <code>!=</code> <code>CONFIDENCE</code> Compares the confidence in the sensitive text against a threshold value. <code>&lt;</code> , <code>&lt;=</code>, <code>&gt;</code> , <code>&gt;=</code>, <code>==</code>, <code>!=</code>"},{"location":"policies/filters/locations/counties/#example-policy","title":"Example Policy","text":"<pre><code>{\n   \"name\": \"counties-example\",\n   \"identifiers\": {\n      \"county\": {\n         \"sensitivity\": \"medium\",\n         \"countyFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/filters/locations/hospital-abbreviations/","title":"Hospital Abbreviations","text":""},{"location":"policies/filters/locations/hospital-abbreviations/#filter","title":"Filter","text":"<p>This filter identifies US hospital abbreviations in text.</p>"},{"location":"policies/filters/locations/hospital-abbreviations/#required-parameters","title":"Required Parameters","text":"<p>This filter has no required parameters.</p>"},{"location":"policies/filters/locations/hospital-abbreviations/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value <code>hospitalAbbreviationFilterStrategies</code> A list of filter strategies. None <code>sensitivity</code> Controls the \"fuzziness\" of allowed values to account for misspellings and derivations. Valid values are <code>low</code>, <code>medium</code>, and <code>high</code>. <code>medium</code>"},{"location":"policies/filters/locations/hospital-abbreviations/#filter-strategies","title":"Filter Strategies","text":"<p>The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of <code>REDACT</code> is used. When multiple filter strategies are given the filter strategies will be applied in as they are listed. See Filter Strategies for details.</p> Strategy Description <code>REDACT</code> Replace the sensitive text with a placeholder. <code>RANDOM_REPLACE</code> Replace the sensitive text with a similar, random value. <code>STATIC_REPLACE</code> Replace the sensitive text with a given value. <code>CRYPTO_REPLACE</code> Replace the sensitive text with its encrypted value. <code>HASH_SHA256_REPLACE</code> Replace the sensitive text with its SHA256 hash value."},{"location":"policies/filters/locations/hospital-abbreviations/#conditions","title":"Conditions","text":"<p>Each filter strategy may have one condition. See Conditions for details.</p> Conditional Description Operators <code>TOKEN</code> Compares the value of the sensitive text. <code>==</code> , <code>!=</code> <code>CONTEXT</code> Compares the filtering context. <code>==</code> , <code>!=</code> <code>CONFIDENCE</code> Compares the confidence in the sensitive text against a threshold value. <code>&lt;</code> , <code>&lt;=</code>, <code>&gt;</code> , <code>&gt;=</code>, <code>==</code>, <code>!=</code>"},{"location":"policies/filters/locations/hospital-abbreviations/#example-policy","title":"Example Policy","text":"<pre><code>{\n   \"name\": \"hospital-abbreviations-example\",\n   \"identifiers\": {\n      \"hospitalAbbreviation\": {\n         \"sensitivity\": \"medium\",\n         \"hospitalAbbreviationFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/filters/locations/hospitals/","title":"Hospitals","text":""},{"location":"policies/filters/locations/hospitals/#filter","title":"Filter","text":"<p>This filter identifies US hospitals in text.</p>"},{"location":"policies/filters/locations/hospitals/#required-parameters","title":"Required Parameters","text":"<p>This filter has no required parameters.</p>"},{"location":"policies/filters/locations/hospitals/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value <code>hospitalFilterStrategies</code> A list of filter strategies. None <code>sensitivity</code> Controls the \"fuzziness\" of allowed values to account for misspellings and derivations. Valid values are <code>low</code>, <code>medium</code>, and <code>high</code>. <code>medium</code>"},{"location":"policies/filters/locations/hospitals/#filter-strategies","title":"Filter Strategies","text":"<p>The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of <code>REDACT</code> is used. When multiple filter strategies are given the filter strategies will be applied in as they are listed. See Filter Strategies for details.</p> Strategy Description <code>REDACT</code> Replace the sensitive text with a placeholder. <code>RANDOM_REPLACE</code> Replace the sensitive text with a similar, random value. <code>STATIC_REPLACE</code> Replace the sensitive text with a given value. <code>CRYPTO_REPLACE</code> Replace the sensitive text with its encrypted value. <code>HASH_SHA256_REPLACE</code> Replace the sensitive text with its SHA256 hash value."},{"location":"policies/filters/locations/hospitals/#conditions","title":"Conditions","text":"<p>Each filter strategy may have one condition. See Conditions for details.</p> Conditional Description Operators <code>TOKEN</code> Compares the value of the sensitive text. <code>==</code> , <code>!=</code> <code>CONTEXT</code> Compares the filtering context. <code>==</code> , <code>!=</code> <code>CONFIDENCE</code> Compares the confidence in the sensitive text against a threshold value. <code>&lt;</code> , <code>&lt;=</code>, <code>&gt;</code> , <code>&gt;=</code>, <code>==</code>, <code>!=</code>"},{"location":"policies/filters/locations/hospitals/#example-policy","title":"Example Policy","text":"<pre><code>{\n   \"name\": \"hospitals-example\",\n   \"identifiers\": {\n      \"hospital\": {\n         \"sensitivity\": \"medium\",\n         \"hospitalFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/filters/locations/state-abbreviations/","title":"State Abbreviations","text":""},{"location":"policies/filters/locations/state-abbreviations/#filter","title":"Filter","text":"<p>This filter identifies US state abbreviations in text.</p>"},{"location":"policies/filters/locations/state-abbreviations/#required-parameters","title":"Required Parameters","text":"<p>This filter has no required parameters.</p>"},{"location":"policies/filters/locations/state-abbreviations/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value <code>stateAbbreviationsFilterStrategies</code> A list of filter strategies. None <code>enabled</code> When set to false, the filter will be disabled and not applied <code>true</code> <code>ignored</code> A list of terms to be ignored by the filter. None"},{"location":"policies/filters/locations/state-abbreviations/#filter-strategies","title":"Filter Strategies","text":"<p>The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of <code>REDACT</code> is used. When multiple filter strategies are given the filter strategies will be applied in as they are listed. See Filter Strategies for details.</p> Strategy Description <code>REDACT</code> Replace the sensitive text with a placeholder. <code>RANDOM_REPLACE</code> Replace the sensitive text with a similar, random value. <code>STATIC_REPLACE</code> Replace the sensitive text with a given value. <code>CRYPTO_REPLACE</code> Replace the sensitive text with its encrypted value. <code>HASH_SHA256_REPLACE</code> Replace the sensitive text with its SHA256 hash value."},{"location":"policies/filters/locations/state-abbreviations/#conditions","title":"Conditions","text":"<p>Each filter strategy may have one condition. See Conditions for details.</p> Conditional Description Operators <code>TOKEN</code> Compares the value of the sensitive text. <code>==</code> , <code>!=</code> <code>CONTEXT</code> Compares the filtering context. <code>==</code> , <code>!=</code> <code>CONFIDENCE</code> Compares the confidence in the sensitive text against a threshold value. <code>&lt;</code> , <code>&lt;=</code>, <code>&gt;</code> , <code>&gt;=</code>, <code>==</code>, <code>!=</code>"},{"location":"policies/filters/locations/state-abbreviations/#example-policy","title":"Example Policy","text":"<pre><code>{\n   \"name\": \"states-abbreviations-example\",\n   \"identifiers\": {\n      \"stateAbbreviation\": {\n         \"stateAbbreviationFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/filters/locations/states/","title":"States","text":""},{"location":"policies/filters/locations/states/#filter","title":"Filter","text":"<p>This filter identifies US states in text.</p>"},{"location":"policies/filters/locations/states/#required-parameters","title":"Required Parameters","text":"<p>This filter has no required parameters.</p>"},{"location":"policies/filters/locations/states/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value <code>stateFilterStrategies</code> A list of filter strategies. None <code>enabled</code> When set to false, the filter will be disabled and not applied <code>true</code> <code>ignored</code> A list of terms to be ignored by the filter. None"},{"location":"policies/filters/locations/states/#filter-strategies","title":"Filter Strategies","text":"<p>The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of <code>REDACT</code> is used. When multiple filter strategies are given the filter strategies will be applied in as they are listed. See Filter Strategies for details.</p> Strategy Description <code>REDACT</code> Replace the sensitive text with a placeholder. <code>RANDOM_REPLACE</code> Replace the sensitive text with a similar, random value. <code>STATIC_REPLACE</code> Replace the sensitive text with a given value. <code>CRYPTO_REPLACE</code> Replace the sensitive text with its encrypted value. <code>HASH_SHA256_REPLACE</code> Replace the sensitive text with its SHA256 hash value."},{"location":"policies/filters/locations/states/#conditions","title":"Conditions","text":"<p>Each filter strategy may have one condition. See Conditions for details.</p> Conditional Description Operators <code>TOKEN</code> Compares the value of the sensitive text. <code>==</code> , <code>!=</code> <code>CONTEXT</code> Compares the filtering context. <code>==</code> , <code>!=</code> <code>CONFIDENCE</code> Compares the confidence in the sensitive text against a threshold value. <code>&lt;</code> , <code>&lt;=</code>, <code>&gt;</code> , <code>&gt;=</code>, <code>==</code>, <code>!=</code>"},{"location":"policies/filters/locations/states/#example-policy","title":"Example Policy","text":"<pre><code>{\n   \"name\": \"states-example\",\n   \"identifiers\": {\n      \"state\": {\n         \"stateFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/filters/persons_names/first-names/","title":"First Names","text":""},{"location":"policies/filters/persons_names/first-names/#filter","title":"Filter","text":"<p>This filter identifies common first names as identified by the US census in text.</p>"},{"location":"policies/filters/persons_names/first-names/#required-parameters","title":"Required Parameters","text":"<p>This filter has no required parameters.</p>"},{"location":"policies/filters/persons_names/first-names/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value <code>sensitivity</code> Controls the \"fuzziness\" of allowed values to account for misspellings and derivations. Valid values are <code>low</code>, <code>medium</code>, and <code>high</code>. <code>medium</code> <code>firstNameFilterStrategies</code> A list of filter strategies. None <code>enabled</code> When set to false, the filter will be disabled and not applied <code>true</code> <code>ignored</code> A list of terms to be ignored by the filter. None"},{"location":"policies/filters/persons_names/first-names/#filter-strategies","title":"Filter Strategies","text":"<p>The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of <code>REDACT</code> is used. When multiple filter strategies are given the filter strategies will be applied in as they are listed. See Filter Strategies for details.</p> Strategy Description <code>REDACT</code> Replace the sensitive text with a placeholder. <code>RANDOM_REPLACE</code> Replace the sensitive text with a similar, random value. <code>STATIC_REPLACE</code> Replace the sensitive text with a given value. <code>CRYPTO_REPLACE</code> Replace the sensitive text with its encrypted value. <code>HASH_SHA256_REPLACE</code> Replace the sensitive text with its SHA256 hash value."},{"location":"policies/filters/persons_names/first-names/#conditions","title":"Conditions","text":"<p>Each filter strategy may have one condition. See Conditions for details.</p> Conditional Description Operators <code>TOKEN</code> Compares the value of the sensitive text. <code>==</code> , <code>!=</code> <code>CONTEXT</code> Compares the filtering context. <code>==</code> , <code>!=</code> <code>CONFIDENCE</code> Compares the confidence in the sensitive text against a threshold value. <code>&lt;</code> , <code>&lt;=</code>, <code>&gt;</code> , <code>&gt;=</code>, <code>==</code>, <code>!=</code>"},{"location":"policies/filters/persons_names/first-names/#example-policy","title":"Example Policy","text":"<pre><code>{\n   \"name\": \"first-names-example\",\n   \"identifiers\": {\n      \"firstName\": {\n         \"firstNameFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/filters/persons_names/persons-names-ner/","title":"Person's Names (NER)","text":""},{"location":"policies/filters/persons_names/persons-names-ner/#filter","title":"Filter","text":"<p>This filter identifies person's names based on natural language processing (NLP) and named-entity recognition (NER) in text.</p>"},{"location":"policies/filters/persons_names/persons-names-ner/#required-parameters","title":"Required Parameters","text":"<p>This filter has no required parameters.</p>"},{"location":"policies/filters/persons_names/persons-names-ner/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value <code>removePunctuation</code> When set to true, punctuation will be removed prior to analysis. <code>false</code> <code>firstNameFilterStrategies</code> A list of filter strategies. None <code>enabled</code> When set to false, the filter will be disabled and not applied <code>true</code> <code>ignored</code> A list of terms to be ignored by the filter. None"},{"location":"policies/filters/persons_names/persons-names-ner/#filter-strategies","title":"Filter Strategies","text":"<p>The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of <code>REDACT</code> is used. When multiple filter strategies are given the filter strategies will be applied in as they are listed. See Filter Strategies for details.</p> Strategy Description <code>REDACT</code> Replace the sensitive text with a placeholder. <code>RANDOM_REPLACE</code> Replace the sensitive text with a similar, random value. <code>STATIC_REPLACE</code> Replace the sensitive text with a given value. <code>CRYPTO_REPLACE</code> Replace the sensitive text with its encrypted value. <code>HASH_SHA256_REPLACE</code> Replace the sensitive text with its SHA256 hash value. <code>ABBREVIATE</code> Replace the sensitive text with the initials of the text."},{"location":"policies/filters/persons_names/persons-names-ner/#conditions","title":"Conditions","text":"<p>Each filter strategy may have one condition. See Conditions for details.</p> Conditional Description Operators <code>TOKEN</code> Compares the value of the sensitive text. <code>==</code> , <code>!=</code> <code>CONTEXT</code> Compares the filtering context. <code>==</code> , <code>!=</code> <code>CONFIDENCE</code> Compares the confidence in the sensitive text against a threshold value. <code>&lt;</code> , <code>&lt;=</code>, <code>&gt;</code> , <code>&gt;=</code>, <code>==</code>, <code>!=</code>"},{"location":"policies/filters/persons_names/persons-names-ner/#example-policy","title":"Example Policy","text":"<pre><code>{\n   \"name\": \"ner-example\",\n   \"identifiers\": {\n      \"ner\": {\n         \"nerFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/filters/persons_names/physician-names-ner/","title":"Physician Names","text":""},{"location":"policies/filters/persons_names/physician-names-ner/#filter","title":"Filter","text":"<p>This filter identifies physician names (e.g. Dr. John Smith) in text.</p>"},{"location":"policies/filters/persons_names/physician-names-ner/#required-parameters","title":"Required Parameters","text":"<p>This filter has no required parameters.</p>"},{"location":"policies/filters/persons_names/physician-names-ner/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value <code>physicianNameFilterStrategies</code> A list of filter strategies. None <code>enabled</code> When set to false, the filter will be disabled and not applied <code>true</code> <code>ignored</code> A list of terms to be ignored by the filter. None"},{"location":"policies/filters/persons_names/physician-names-ner/#filter-strategies","title":"Filter Strategies","text":"<p>The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of <code>REDACT</code> is used. When multiple filter strategies are given the filter strategies will be applied in as they are listed. See Filter Strategies for details.</p> Strategy Description <code>REDACT</code> Replace the sensitive text with a placeholder. <code>RANDOM_REPLACE</code> Replace the sensitive text with a similar, random value. <code>STATIC_REPLACE</code> Replace the sensitive text with a given value. <code>CRYPTO_REPLACE</code> Replace the sensitive text with its encrypted value. <code>HASH_SHA256_REPLACE</code> Replace the sensitive text with its SHA256 hash value."},{"location":"policies/filters/persons_names/physician-names-ner/#conditions","title":"Conditions","text":"<p>Each filter strategy may have one condition. See Conditions for details.</p> Conditional Description Operators <code>TOKEN</code> Compares the value of the sensitive text. <code>==</code> , <code>!=</code> <code>CONTEXT</code> Compares the filtering context. <code>==</code> , <code>!=</code> <code>CONFIDENCE</code> Compares the confidence in the sensitive text against a threshold value. <code>&lt;</code> , <code>&lt;=</code>, <code>&gt;</code> , <code>&gt;=</code>, <code>==</code>, <code>!=</code>"},{"location":"policies/filters/persons_names/physician-names-ner/#example-policy","title":"Example Policy","text":"<pre><code>{\n   \"name\": \"physician-names-example\",\n   \"identifiers\": {\n      \"physicianName\": {\n         \"physicianNameFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/filters/persons_names/surnames/","title":"Surnames","text":""},{"location":"policies/filters/persons_names/surnames/#filter","title":"Filter","text":"<p>This filter identifies common surnames as identified by the US census in text.</p>"},{"location":"policies/filters/persons_names/surnames/#required-parameters","title":"Required Parameters","text":"<p>This filter has no required parameters.</p>"},{"location":"policies/filters/persons_names/surnames/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value <code>sensitivity</code> Controls the \"fuzziness\" of allowed values to account for misspellings and derivations. Valid values are <code>low</code>, <code>medium</code>, and <code>high</code>. <code>medium</code> <code>surnameFilterStrategies</code> A list of filter strategies. None <code>enabled</code> When set to false, the filter will be disabled and not applied <code>true</code> <code>ignored</code> A list of terms to be ignored by the filter. None"},{"location":"policies/filters/persons_names/surnames/#filter-strategies","title":"Filter Strategies","text":"<p>The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of <code>REDACT</code> is used. When multiple filter strategies are given the filter strategies will be applied in as they are listed. See Filter Strategies for details.</p> Strategy Description <code>REDACT</code> Replace the sensitive text with a placeholder. <code>RANDOM_REPLACE</code> Replace the sensitive text with a similar, random value. <code>STATIC_REPLACE</code> Replace the sensitive text with a given value. <code>CRYPTO_REPLACE</code> Replace the sensitive text with its encrypted value. <code>HASH_SHA256_REPLACE</code> Replace the sensitive text with its SHA256 hash value."},{"location":"policies/filters/persons_names/surnames/#conditions","title":"Conditions","text":"<p>Each filter strategy may have one condition. See Conditions for details.</p> Conditional Description Operators <code>TOKEN</code> Compares the value of the sensitive text. <code>==</code> , <code>!=</code> <code>CONTEXT</code> Compares the filtering context. <code>==</code> , <code>!=</code> <code>CONFIDENCE</code> Compares the confidence in the sensitive text against a threshold value. <code>&lt;</code> , <code>&lt;=</code>, <code>&gt;</code> , <code>&gt;=</code>, <code>==</code>, <code>!=</code>"},{"location":"policies/filters/persons_names/surnames/#example-policy","title":"Example Policy","text":"<pre><code>{\n   \"name\": \"surnames-example\",\n   \"identifiers\": {\n      \"surname\": {\n         \"surnameFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"quick_starts/quick_start_aws/","title":"Philter Quick Start on AWS","text":"<p>Philter on AWS is a virtual machine-based product. It runs in EC2 on its own EC2 instance. A free trial period is available during which there is no charge for the Philter software but there may be charges for the underlying AWS infrastructure.</p> <p>Cloud virtual machines launched from a cloud marketplace may not be immediately suitable for a HIPAA environment. Refer to your compliance officer for your organization's requirements to ensure compliance with all relevant regulations.</p> <p>Here\u2019s a brief screen cast showing how to launch Philter in AWS.</p>"},{"location":"quick_starts/quick_start_aws/#launch-philter-in-aws","title":"Launch Philter in AWS","text":"<ol> <li>Go to Philter in the AWS Marketplace. On this page you can see the Philter overview, the pricing, and the supported EC2 instance types.</li> <li>Select an instance type. We recommend <code>m5.large</code>. The smaller instance types are intended only for testing and are not well-suited for production usage.</li> <li>Click the Continue to Subscribe button.</li> <li>View and accept Philter\u2019s license agreement. Then click Accept Terms.</li> <li>The subscription will now be created and you will be notified when it is ready! This usually only takes less than a minute.</li> <li>Click the Continue to Configuration button to select the AMI, the version, and the region. We recommend using the newest version if multiple are available.</li> <li>Click the Continue to Launch button to launch Philter in your AWS account!</li> </ol> <p>AWS will automatically open ports <code>22</code> (SSH) and <code>8080</code> (Philter API) for the Philter instance's security group. These ports are required to be open but you may want to modify the security groups to limit their scope of availability by restricting access to specific CIDR ranges.</p> <p>Congratulations! You have deployed Philter in AWS. You are now ready to filter text!</p>"},{"location":"quick_starts/quick_start_aws/#try-it-out","title":"Try it out!","text":"<p>With Philter now running we can take it for a spin. We will send some text to Philter and inspect at the response we get back. The Philter virtual machine running in your cloud account should have a public IP address (unless you customized the deployment). We will use that public IP address to interact with Philter.</p> <p>Philter, by default, will be configured with an HTTPS listener on port 8080 using a self-signed certificate. It is recommended that prior to use in a production environment the self-signed certificate is replaced by a valid certificate owned by your organization.</p> <p>In the command below, replace <code>&lt;PUBLIC_IP&gt;</code> with the virtual machine\u2019s public IP address or public host name.</p> <pre><code>curl -k -X POST https://&lt;PUBLIC_IP&gt;:8080/api/filter --data \"George Washington was a patient and his SSN is 123-45-6789.\" -H \"Content-type: text/plain\"\n</code></pre> <p>With this command we are sending the text in the command to Philter for filtering. Philter will identify the patient name (George Washington) and the SSN (123-45-6789) and redact those values in the response. You can always use curl to send text to Philter as in these examples but there are also SDKs you can use, too, to integrate Philter with your applications.</p>"},{"location":"quick_starts/quick_start_aws/#redacting-sensitive-information-from-text","title":"Redacting Sensitive Information from Text","text":"<p>The types of sensitive information that Philter identifies and removes is controlled by policies. By default, Philter includes a filter profile that includes many of the types of sensitive information, such as names and social security numbers. We can send text to filter to Philter for filtering using this default filter profile with the following command:</p> <pre><code>curl -k -X POST https://localhost:8080/api/filter -d @file.txt -H \"Content-Type: text/plain\"\n</code></pre> <p>This command sends the contents of the file <code>file.txt</code> to Philter. Philter will apply the enabled filters and return a plain-text response consisting of the filtered text. (Replace localhost with the IP address or host name of Philter if you are not running the command where Philter is running.) You can also send text directly in the request instead of sending it as a file:</p> <pre><code>curl -k -X POST https://localhost:8080/api/filter --data \"Your text goes here...\" -H \"Content-type: text/plain\"\n</code></pre>"},{"location":"quick_starts/quick_start_aws/#next-steps","title":"Next Steps","text":"<p>Now that you have Philter running and know how to send text to it you are ready to integrate Philter into your existing workflow and systems. Philter\u2019s API details how to send files to Philter. Clients for some languages for Philter\u2019s API are available on GitHub.</p> <p>Be sure to check out Policies to see how you can customize the types of sensitive information Philter redacts!</p>"},{"location":"quick_starts/quick_start_aws/#example-uses","title":"Example Uses","text":"<p>Here's a few examples showing how to use Philter with some common big-data and streaming applications.</p> Description Technologies Remove sensitive information from text in an Apache NiFi dataflow Apache NiFi Remove sensitive information from text using AWS Lambda in an Amazon Kinesis Firehose pipeline Amazon Kinesis, AWS Lambda Removing PII/PHI from OpenAI ChatGPT API Requests ChatGPT Redacting PHI and PII in Apache Kafka Data streams Apache Kafka"},{"location":"quick_starts/quick_start_azure/","title":"Philter Quick Start on Microsoft Azure","text":"<p>Philter on Microsoft Azure is a virtual machine-based product. A free trial period is available during which there is no charge for the Philter software but there may be charges for the underlying Azure infrastructure.</p> <p>Cloud virtual machines launched from a cloud marketplace may not be immediately suitable for a HIPAA environment. Refer to your compliance officer for your organization's requirements to ensure compliance with all relevant regulations.</p>"},{"location":"quick_starts/quick_start_azure/#launch-philter-on-microsoft-azure","title":"Launch Philter on Microsoft Azure","text":"<ol> <li>Go to Philter in the Azure Marketplace.</li> <li>Click the Get It Now button.</li> <li>Review the information that is shown on the popup and click Continue when ready.</li> <li>You will now be asked to log in to your Microsoft Azure account if you were not already logged in.</li> <li>Click the Create button to begin making a Philter virtual machine.</li> <li>Enter the required details of the virtual machine and click the Review + create button.</li> <li>Review the virtual machine details and click Create when ready!</li> </ol> <p>Your Philter virtual machine will now be launching.</p> <p>Microsoft Azure will automatically open ports <code>22</code> (SSH) and <code>8080</code> (Philter API). These ports are required to be open but you may want to modify the security groups to limit their scope of availability by restricting access to specific CIDR ranges.</p> <p>Congratulations! You have deployed Philter in Azure. You are now ready to filter text!</p>"},{"location":"quick_starts/quick_start_azure/#try-it-out","title":"Try it out!","text":"<p>With Philter now running we can take it for a spin. We will send some text to Philter and inspect at the response we get back. The Philter virtual machine running in your cloud account should have a public IP address (unless you customized the deployment). We will use that public IP address to interact with Philter.</p> <p>Philter, by default, will be configured with an HTTPS listener on port 8080 using a self-signed certificate. It is recommended that prior to use in a production environment the self-signed certificate is replaced by a valid certificate owned by your organization.</p> <p>In the command below, replace <code>&lt;PUBLIC_IP&gt;</code> with the virtual machine\u2019s public IP address or public host name.</p> <pre><code>curl -k -X POST https://&lt;PUBLIC_IP&gt;:8080/api/filter --data \"George Washington was a patient and his SSN is 123-45-6789.\" -H \"Content-type: text/plain\"\n</code></pre> <p>With this command we are sending the text in the command to Philter for filtering. Philter will identify the patient name (George Washington) and the SSN (123-45-6789) and redact those values in the response. You can always use curl to send text to Philter as in these examples but there are also SDKs you can use, too, to integrate Philter with your applications.</p>"},{"location":"quick_starts/quick_start_azure/#redacting-sensitive-information-from-text","title":"Redacting Sensitive Information from Text","text":"<p>The types of sensitive information that Philter identifies and removes is controlled by policies. By default, Philter includes a filter profile that includes many of the types of sensitive information, such as names and social security numbers. We can send text to filter to Philter for filtering using this default filter profile with the following command:</p> <pre><code>curl -k -X POST https://localhost:8080/api/filter -d @file.txt -H \"Content-Type: text/plain\"\n</code></pre> <p>This command sends the contents of the file <code>file.txt</code> to Philter. Philter will apply the enabled filters and return a plain-text response consisting of the filtered text. (Replace localhost with the IP address or host name of Philter if you are not running the command where Philter is running.) You can also send text directly in the request instead of sending it as a file:</p> <pre><code>curl -k -X POST https://localhost:8080/api/filter --data \"Your text goes here...\" -H \"Content-type: text/plain\"\n</code></pre>"},{"location":"quick_starts/quick_start_azure/#next-steps","title":"Next Steps","text":"<p>Now that you have Philter running and know how to send text to it you are ready to integrate Philter into your existing workflow and systems. Philter\u2019s API details how to send files to Philter. Clients for some languages for Philter\u2019s API are available on GitHub.</p> <p>Be sure to check out Policies to see how you can customize the types of sensitive information Philter redacts!</p>"},{"location":"quick_starts/quick_start_azure/#example-uses","title":"Example Uses","text":"<p>Here's a few examples showing how to use Philter with some common big-data and streaming applications.</p> Description Technologies Remove sensitive information from text in an Apache NiFi dataflow Apache NiFi Remove sensitive information from text using AWS Lambda in an Amazon Kinesis Firehose pipeline Amazon Kinesis, AWS Lambda Removing PII/PHI from OpenAI ChatGPT API Requests ChatGPT Redacting PHI and PII in Apache Kafka Data streams Apache Kafka <p> </p>"},{"location":"quick_starts/quick_start_gcp/","title":"Philter Quick Start on Google Cloud","text":"<p>Philter on Google Cloud is a virtual machine-based product. A free trial period is available during which there is no charge for the Philter software but there may be charges for the underlying Google Cloud infrastructure.</p> <p>Cloud virtual machines launched from a cloud marketplace may not be immediately suitable for a HIPAA environment. Refer to your compliance officer for your organization's requirements to ensure compliance with all relevant regulations.</p>"},{"location":"quick_starts/quick_start_gcp/#launch-philter-in-google-cloud","title":"Launch Philter in Google Cloud","text":"<ol> <li>Go to Philter in the Google Cloud Marketplace.</li> <li>Click the Launch on Compute Engine button.</li> </ol> <p>Virtual Machine Recommendations</p> <p>The general purpose machine type is n2-standard-2 and this machine type should be adequate for most use-cases. We recommend 8 vCPUs and 8-16 GB of RAM for a production deployment.</p> <p>Google Cloud will automatically open ports <code>22</code> (SSH) and <code>8080</code> (Philter API). These ports are required to be open but you may want to modify the security groups to limit their scope of availability by restricting access to specific CIDR ranges.</p> <p>Congratulations! You have deployed Philter in Google Cloud. You are now ready to filter text!</p>"},{"location":"quick_starts/quick_start_gcp/#try-it-out","title":"Try it out!","text":"<p>With Philter now running we can take it for a spin. We will send some text to Philter and inspect at the response we get back. The Philter virtual machine running in your cloud account should have a public IP address (unless you customized the deployment). We will use that public IP address to interact with Philter.</p> <p>Philter, by default, will be configured with an HTTPS listener on port 8080 using a self-signed certificate. It is recommended that prior to use in a production environment the self-signed certificate is replaced by a valid certificate owned by your organization.</p> <p>In the command below, replace <code>&lt;PUBLIC_IP&gt;</code> with the virtual machine\u2019s public IP address or public host name.</p> <pre><code>curl -k -X POST https://&lt;PUBLIC_IP&gt;:8080/api/filter --data \"George Washington was a patient and his SSN is 123-45-6789.\" -H \"Content-type: text/plain\"\n</code></pre> <p>With this command we are sending the text in the command to Philter for filtering. Philter will identify the patient name (George Washington) and the SSN (123-45-6789) and redact those values in the response. You can always use curl to send text to Philter as in these examples but there are also SDKs you can use, too, to integrate Philter with your applications.</p>"},{"location":"quick_starts/quick_start_gcp/#redacting-sensitive-information-from-text","title":"Redacting Sensitive Information from Text","text":"<p>The types of sensitive information that Philter identifies and removes is controlled by policies. By default, Philter includes a filter profile that includes many of the types of sensitive information, such as names and social security numbers. We can send text to filter to Philter for filtering using this default filter profile with the following command:</p> <pre><code>curl -k -X POST https://localhost:8080/api/filter -d @file.txt -H \"Content-Type: text/plain\"\n</code></pre> <p>This command sends the contents of the file <code>file.txt</code> to Philter. Philter will apply the enabled filters and return a plain-text response consisting of the filtered text. (Replace localhost with the IP address or host name of Philter if you are not running the command where Philter is running.) You can also send text directly in the request instead of sending it as a file:</p> <pre><code>curl -k -X POST https://localhost:8080/api/filter --data \"Your text goes here...\" -H \"Content-type: text/plain\"\n</code></pre>"},{"location":"quick_starts/quick_start_gcp/#next-steps","title":"Next Steps","text":"<p>Now that you have Philter running and know how to send text to it you are ready to integrate Philter into your existing workflow and systems. Philter\u2019s API details how to send files to Philter. Clients for some languages for Philter\u2019s API are available on GitHub.</p> <p>Be sure to check out Policies to see how you can customize the types of sensitive information Philter redacts!</p>"},{"location":"quick_starts/quick_start_gcp/#example-uses","title":"Example Uses","text":"<p>Here's a few examples showing how to use Philter with some common big-data and streaming applications.</p> Description Technologies Remove sensitive information from text in an Apache NiFi dataflow Apache NiFi Remove sensitive information from text using AWS Lambda in an Amazon Kinesis Firehose pipeline Amazon Kinesis, AWS Lambda Removing PII/PHI from OpenAI ChatGPT API Requests ChatGPT Redacting PHI and PII in Apache Kafka Data streams Apache Kafka <p> </p>"}]}
\ No newline at end of file
+{"config":{"lang":["en"],"separator":"[\\s\\-]+","pipeline":["stopWordFilter"]},"docs":[{"location":"","title":"Philter","text":"<p>Philter is an application that finds, identifies, and removes sensitive information, such as protected health information (PHI) and personally identifiable information (PII), and user-defined sensitive information from natural language text. Philter is ideal for usage in text processing pipelines where sensitive information needs removed, encrypted, or redacted from the text.</p> <p>This documentation applies to Philter 2.4.0. If you are upgrading to this version see Upgrading Philter.</p> <p>To get going fast, jump to the Quick Starts to launch Philter on AWS, Azure, or Google Cloud.</p>"},{"location":"evaluating-performance/","title":"How to Evaluate Phileas' Performance","text":"<p>A common question we receive is how well does Phileas perform? Our answer to this question is probably less than satisfactory because it simply depends. What does it depend on? Phileas' performance is heavily dependent upon your individual data. Sharing to compare metrics of Phileas' performance between different customer datasets is like comparing apples and oranges.</p> <p>If your data is not exactly like another customer's data then the metrics will not be applicable to your data. In terms of the classic information retrieval metrics precision and recall, comparing these values between customers can give false impressions about Phileas' performance, both good and bad.</p> <p>This guide walks you through how to evaluate Phileas' performance. If you are just getting started with Phileas please see the Quick Starts instead. Then you can come back here to learn how to evaluate Phileas' performance.</p>"},{"location":"evaluating-performance/#guide-to-evaluating-performance","title":"Guide to Evaluating Performance","text":"<p>We have created this guide to help guide you in evaluating Phileas' performance on your data. The guide involves determining the types of sensitive information you want to redact, configuring those filters, optimizing the configuration, and then capturing the performance metrics.</p> <p>If you are using Philter we will gladly perform these steps for you and provide you a detailed Phileas performance report generated from your data. Please contact us to start the process.</p>"},{"location":"evaluating-performance/#what-you-need","title":"What You Need","text":"<p>To evaluate Phileas' performance you need:</p> <ul> <li>An application using Phileas.</li> <li>A list of the types of sensitive information you want to redact.</li> <li>A data set representative of the text you will be redacting using Phileas. It's important the data set be representative so the evaluation results will transfer to the actual data redaction.</li> <li>The same data set but with annotated sensitive information. These annotations will be used to calculate the precision and recall metrics.</li> </ul>"},{"location":"evaluating-performance/#configuring-phileas","title":"Configuring Phileas","text":"<p>Before we can begin our evaluation we need to create a policy. A policy is a file that defines the types of sensitive information that will be redacted and how it will be redacted. The policies are stored on the Phileas instance under <code>/opt/Phileas/policies</code>. You can edit the policies directly there using a text editor or you can use Phileas' API to upload a policy. In this case we recommend just using a text editor on the Phileas instance to create a policy.</p> <p>When using a text editor to create and edit a policy, be sure to save the policy often. Frequent saving can make editing a policy easier.</p> <p>We also recommend considering to place your policy directory under source control to have a history and change log of your policies.</p>"},{"location":"evaluating-performance/#creating-a-policy","title":"Creating a Policy","text":"<p>Make a copy of the default policy, and we will modify the copy for our needs.</p> <p><code>cp /opt/Phileas/policies/default.json /opt/Phileas/policies/evaluation.json</code></p> <p>Now open <code>/opt/Phileas/policies/evaluation.json</code> in a text editor. (The content of <code>evaluation.json</code> will be similar to what's shown below but may have minor differences between different versions of Phileas.)</p> <pre><code>{\n   \"name\": \"default\",\n   \"identifiers\": {\n      \"emailAddress\": {\n         \"emailAddressFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      },\n      \"phoneNumber\": {\n         \"phoneNumberFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n</code></pre> <p>The first thing we need to do is to set the name of the policy. Replace <code>default</code> with <code>evaluation</code> and save the file.</p>"},{"location":"evaluating-performance/#identifying-the-filters-you-need","title":"Identifying the Filters You Need","text":"<p>The rest of the file contains the filters that are enabled in the default policy. We need to make sure that each type of sensitive information that you want to redact is represented by a filter in this file. Look through the rest of the policy and determine which filters are listed that you do not need and also which filters you do need that are not listed.</p>"},{"location":"evaluating-performance/#disabling-filters-we-do-not-need","title":"Disabling Filters We Do Not Need","text":"<p>If a filter is listed in the policy and you do not need the filter you have two options. You can either delete those lines from the policy and save the file, or you can set the filter's <code>enabled</code> property to false. Using the <code>enabled</code> property allows you to keep the filter configuration in the policy in case it is needed later but both options have the same effect.</p>"},{"location":"evaluating-performance/#enabling-filters-not-in-the-default-policy","title":"Enabling Filters Not in the Default Policy","text":"<p>Let's say you want to redact bitcoin addresses. The bitcoin address filter is not in the default policy. To add the bitcoin address filter we will refer to Phileas' documentation on the bitcoin address filter, get the configuration, and copy it into the policy.</p> <p>From the bitcoin address filter documentation we see the configuration for the bitcoin address filter is:</p> <pre><code>      \"bitcoinAddress\": {\n         \"bitcoinAddressFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n</code></pre> <p>We can copy this configuration and paste it into our policy:</p> <pre><code>{\n   \"name\": \"evaluation\",\n   \"identifiers\": {\n      \"bitcoinAddress\": {\n         \"bitcoinAddressFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      },\n      \"emailAddress\": {\n         \"emailAddressFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      },\n      \"phoneNumber\": {\n         \"phoneNumberFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n</code></pre> <p>The order of the filters in the policy does not matter and has no impact on performance. We typically place the filters in the policy alphabetically just to improve readability.</p> <p>Repeat these steps until you have added a filter for each of the types of sensitive information you want to redact. Typically, the default redaction <code>strategy</code> and <code>redactionFormat</code> values for each filter should be fine for evaluation.</p> <p>When finished modifying the policy, save the file and close the text editor. Now restart Phileas for the policy changes to be loaded:</p> <pre><code>sudo systemctl restart Phileas\n</code></pre>"},{"location":"evaluating-performance/#submitting-text-for-redaction","title":"Submitting Text for Redaction","text":"<p>With our policy in place we can now send text to Phileas for redaction using that policy:</p> <pre><code>PhileasConfiguration phileasConfiguration = ConfigFactory.create(PhileasConfiguration.class);\n\nFilterService filterService = new PhileasFilterService(phileasConfiguration);\n\nFilterResponse response = filterService.filter(policies, context, documentId, body, MimeType.TEXT_PLAIN);\n</code></pre> <p>The <code>explain</code> API endpoint produces a detailed description of the redaction. The response will include a list of spans that contain the start and stop positions of redacted text and the type of sensitive information that was redacted. Using this information we can compare the redacted information to our annotated file to calculate precision and recall metrics.</p>"},{"location":"evaluating-performance/#calculating-precision-and-recall","title":"Calculating Precision and Recall","text":"<p>Now we can calculate the precision and recall metrics.</p> <ul> <li>Precision is the number of true positives divided by the number true positives plus false positives.</li> <li>Recall is the number of true positives divided by the number of false negatives plus true positives.</li> </ul> <p></p> <ul> <li>The F-1 score is the harmonic mean of precision and recall.</li> </ul> <p></p>"},{"location":"settings/","title":"Settings","text":"<p>Phileas has settings to control how it operates. The settings and how to configure each are described below.</p> <p>The configuration for the types of sensitive information that Phileas identifies are defined in filter policies outside of Phileas' configuration properties described on this page.</p>"},{"location":"settings/#configuring-phileas","title":"Configuring Phileas","text":""},{"location":"settings/#the-phileas-settings-file","title":"The Phileas Settings File","text":"<p>Phileas looks for its settings in an <code>application.properties</code> file.</p>"},{"location":"settings/#using-environment-variables","title":"Using Environment Variables","text":"<p>Properties set via environment variables take precedence over properties set in Phileas' settings file.</p> <p>All following properties can also be set as environment variables by prepending <code>PHILTER_</code> to the property name and changing periods to underscores. For example, the property <code>filter.profiles.directory</code> can be set using the environment variable <code>PHILTER_FILTER_PROFILES_DIRECTORY</code> by:</p> <pre><code>export PHILTER_FILTER_PROFILES_DIRECTORY=/profiles/\n</code></pre> <p>Using environment variables to configure Phileas instead of using Phileas' settings file can allow for easier configuration management when deploying Phileas.</p>"},{"location":"settings/#policies","title":"Policies","text":"Setting Description Allowed Values Default Value <code>filter.policies.directory</code> The directory in which to look for policies. Any valid directory path. <code>./policies/</code>"},{"location":"settings/#span-disambiguation","title":"Span Disambiguation","text":"<p>These values configure Phileas' span disambiguation feature to determine the most appropriate type of sensitive information when duplicate spans are identified. In a deployment of multiple Phileas instances, you must enable the cache service for span disambiguation to work as expected.</p> Description Allowed Values Default Value <code>span.disambiguation.enabled</code> Whether or not to enable span disambiguation. <code>true</code>, <code>false</code> <code>false</code>"},{"location":"settings/#cache-service","title":"Cache Service","text":"<p>The cache service is required to use consistent anonymization and policies stored in Amazon S3. Phileas supports Redis as the backend cache. When Redis is not used, an in-memory cache is used instead. The in-memory cache is not recommended because all contents will be stored in memory on the local Phileas instance.</p> <p>The cache will contain sensitive information. It is important that you take the necessary precautions to secure the cache itself and all communication between Phileas and the cache.</p> Setting Description Allowed Values Default Value <code>cache.redis.enabled</code> Whether or not to use Redis as the cache. <code>true</code>, <code>false</code> <code>false</code> <code>cache.redis.host</code> The hostname or IP address of the Redis cache. Any valid Redis endpoint. None <code>cache.redis.port</code> The Redis cache port. Any valid port. <code>6379</code> <code>cache.redis.auth.token</code> The Redis auth token. Any valid token. None <code>cache.redis.ssl</code> Whether or not to use SSL for communication with the Redis cache. <code>true</code>, <code>false</code> <code>false</code> <p>The following Redis settings are only required when using a self-signed SSL certificate.</p> Setting Description Allowed Values Default Value <code>cache.redis.truststore</code> The path to the trust store. Any valid file path. None <code>cache.redis.truststore.password</code> The trust store password. Any valid file path. None <code>cache.redis.keystore</code> The path to the keystore. Any valid file path. None <code>cache.redis.keystore.password</code> The keystore password. Any valid file path. None"},{"location":"settings/#advanced-settings","title":"Advanced Settings","text":"<p>In most cases the settings below do not need changed. Contact us for more information on any of these settings.</p> Setting Description Allowed Values Default Value <code>ner.timeout.sec</code> Controls the timeout in seconds when performing name entity recognition. Longer text may require longer processing times. An integer value <code>600</code> <code>ner.max.idle.connections</code> The maximum number of idle connections to maintain for the named entity recognition. More connections may improve performance in some cases. An integer value. <code>30</code> <code>ner.keep.alive.duration.ms</code> The amount of time in milliseconds to keep named entity recognition connections alive. Longer text may require longer processing times. An integer value. <code>60</code>"},{"location":"system_requirements/","title":"System Requirements","text":"<p>When launched from a cloud marketplace, Philter is pre-configured and contains all required dependencies.</p> <p>Philter requires the following:</p> <ul> <li>2 vCPU (e.g., m5.large instance type on AWS)</li> <li>8 GB of RAM</li> <li>Java 17</li> </ul>"},{"location":"upgrading/","title":"Upgrading Philter","text":"<p>We recommend reviewing the Philter Release Notes prior to upgrading.</p>"},{"location":"upgrading/#upgrading-from-a-2x-version","title":"Upgrading from a 2.x Version","text":"<p>Upgrading Philter to the newest version requires moving Philter's configuration to the new version of Philter. To upgrade Philter from a 2.x version, follow the steps below.</p> <ol> <li>Launch a new instance of the newest version of Philter.</li> <li>Copy your policies from /opt/philter/policies to the new instance.</li> <li>Copy your /opt/philter/philter.properties to the new instance.</li> <li>Copy your /opt/philter/philter-ui.properties to the new instance.</li> <li>Replace the new virtual machine's properties file with your copy from step 1.</li> <li>Copy your policies from /opt/philter/policies to the new instance.</li> <li>If you have configured any SSL certificates for Philter, copy those files over to the new instance.</li> <li>Restart Philter: sudo systemctl restart philter.service &amp;&amp; sudo systemctl restart philter-ui.service &amp;&amp; sudo systemctl restart philter-ner.service</li> <li>Test the new Philter virtual machine to make sure it is behaving as expected.</li> <li>Decommission the old Philter instance.</li> </ol>"},{"location":"upgrading/#upgrading-from-a-1x-version","title":"Upgrading from a 1.x Version","text":"<p>Upgrading Philter to the newest version requires moving Philter's configuration to the new version of Philter. To upgrade Philter from a 1.x version, follow the steps below.</p> <ol> <li> <p>Make local copies of your current Philter's properties files.</p> </li> <li> <p><code>/opt/philter/philter.properties</code> (prior to 1.10.1 the filename was /opt/philter/application.properties)</p> </li> <li> <p><code>/opt/philter/philter-ui.properties</code> (not applicable prior to version 1.10)</p> </li> <li> <p>Launch a new instance of the newest version of Philter.</p> </li> <li>Replace the new virtual machine's properties file with your copy from step 1.</li> <li>Restart Philter: sudo systemctl restart philter.service sudo systemctl restart philter-ui.service sudo systemctl restart philter-ner.service</li> <li>Test the new Philter virtual machine to make sure it is behaving appropriately.</li> <li>Decommission the old Philter instance.</li> </ol>"},{"location":"deidentification/bucketing/","title":"Bucketing","text":""},{"location":"deidentification/date-shifting/","title":"Date Shifting","text":""},{"location":"deidentification/deidentification/","title":"De-identification Methods","text":"<p>There are several ways data can be de-identified, and which you use depends on the types of data you want to de-identify and your use-case for de-identifying the data. The terminology around the different methods is often used interchangeably, but there are differences between each method.</p> <p>In this User's Guide, we may use the terms <code>filter</code> and <code>redact</code> interchangeably.</p> <p>In Philter, de-identification methods vary for each type of sensitive information. For example, all types can be replaced or redacted, but only dates can be shifted and only zip codes can be truncated. How a de-identification method is applied by Philter is called a filter strategy. Each type of sensitive information can have one or more filter strategies, and the combination of the filter strategies you select is called a policy. A policy determines how a document will be de-identified.</p> <p>The following is a list of de-identification methods that describes how each method works and its applicability to our Philter software. De-identifying a document is likely to require a combination of the following methods. For instance, you may want to redact names, encrypt credit card numbers, and shift appointment dates.</p> De-identification MethodDescriptionReplacementReplaces sensitive information with a defined value. For example, you might want to replace a credit card number with the literal value \"CREDIT_CARD_NUMBER\".Redaction and MaskingRemoves sensitive information. Our Philter software gives you a choice of how to remove the sensitive information, whether it is by replacing it with ***** (masking) or by some other set of characters.EncryptionEncrypts sensitive information.Date ShiftingShifts dates either forward or backward by some interval.BucketingCategorizes data into buckets based on the data. Examples of bucketing is Philter can bucket dates into years, and zip codes by population. <p>A difference between Philter and other services is that Philter does not send your data to a third-party for de-identification. Philter runs in your cloud and your data stays in your cloud.</p>"},{"location":"deidentification/encryption/","title":"Encryption","text":""},{"location":"deidentification/pii_phi_nppi/","title":"PII, PHI, and NPPI","text":"<p>Philter has many predefined types of sensitive information called filters that can be redacted. The individual types are described below.</p> <ul> <li>Personally identifiable information (PII) is any information that could potentially be used to identify a specific person.</li> <li>Protected health information (PHI) is any information about health status, provision of health care, or payment for health care that can be linked to an individual. The Health Insurance Portability And Accountability Act (HIPAA) defines 18 types of PHI.</li> </ul>"},{"location":"deidentification/pii_phi_nppi/#predefined-types-of-pii-and-phi","title":"Predefined Types of PII and PHI","text":"<p>The types of sensitive information that Philter will identify is customizable. For example, if you are not interested in VIN numbers you can have Philter ignore them. This configuration is performed through Policies.</p> <p>Because Philter only operates on text, the biometric identifiers and face images outlined in the HIPAA regulations as PHI are not applicable to Philter. The types of sensitive information and how Philter identifies each one is listed in the table below.</p> Type of PHI How Philter Identifies It 1 <p>Names</p><p>Ex: John Smith, Jane Doe</p> <ul><li>Philter identifies names in natural language text using state of the art machine learning algorithms and natural language processing techniques to identify named-person entities.</li><li>Philter also uses common first name and surname dictionaries with spellcheck capability to identify common names per the US census.</li></ul> 2 <p>All geographical identifiers smaller than a state, except for the initial three digits of a zip code if, according to the current publicly available data from the U.S. Bureau of the Census: the geographic unit formed by combining all zip codes with the same three initial digits contains more than 20,000 people; and the initial three digits of a zip code for all such geographic units containing 20,000 or fewer people is changed to 000</p><p>Ex: 85055, 90213-1544</p> <ul><li>Philter can identify many US cities, US counties, and all US states (full names and abbreviations).</li><li>Philter uses a dictionary with spelling correction to identify misspelled locations.</li><li>Filter conditions in policies can be used to apply logic based on zip code population according to the US census. (Filter strategies can truncate the zip code.)</li><li>Philter also uses state of the art machine learning algorithms and natural language processing techniques to identify locations.</li><li>Philter includes a dictionary of some hospital locations to quickly identify medical locations.</li></ul> 3 <p>Dates (other than year) directly related to an individual</p><p>Ex: 10-10-2000. 10/10/2000, October 10, 2000</p> <ul><li>Philter can identify dates in many formats such as with hypens (10-10-2000), with slashes (10/10/2000), or spelled out (May 1, 2000).</li><li>Philter can also identify ages, e.g. 57 years, 57yrs.</li></ul> 4 <p>Phone Numbers</p><p>Ex: (304) 555-5555, 304-555-5555, 1-800-123-4567</p> <ul><li>Philter can identify phone numbers in many formats. (Philter is currently limited to US phone numbers.)</li></ul> 5 <p>Fax numbers</p><p>Ex: (304) 555-5555, 304-555-5555, 1-800-123-4567</p> <ul><li>Philter can identify fax numbers in many formats. (Philter is currently limited to US phone numbers.)</li></ul> 6 <p>Email addresses</p><p>Ex: john.fake.address@hotmail.com</p> <ul><li>Philter can identify email addresses per the email standard (summarized on Wikipedia).</li></ul> 7 <p>Social Security numbers</p><p>Ex: 123-45-6789, 123456789</p> <ul><li>Philter can identify social security numbers (SSNs) in multiple formats such as with spaces and hyphens.</li></ul> 8 <p>Medical record numbers</p><p>Ex: 86637729, AB473-6021, 473-6AB021</p> <ul><li>Philter can identify alphanumeric identifiers.</li></ul> 9 <p>Health insurance beneficiary numbers</p><p>Ex: 86637729, AB473-6021, 473-6AB021</p> <ul><li>Philter can identify alphanumeric identifiers.</li></ul> 10 <p>Account numbers</p><p>Ex: 86637729, AB473-6021, 473-6AB021</p> <ul><li>Philter can identify alphanumeric identifiers, as well as credit card numbers from all major types of credit cards.</li></ul> 11 <p>Certificate/license numbers</p><p>Ex: 86637729, AB473-6021, 473-6AB021</p> <ul><li>Philter can identify alphanumeric identifiers.</li></ul> 12 <p>Vehicle identifiers and serial numbers, including license plate numbers</p><p>Ex: WBAPM7G50ANL19218, 1GBJC34K3RE176005</p> <ul><li>Philter can identify vehicle serial numbers (17-character VIN numbers). License plates will be identified as alphanumeric identifiers.</li></ul> 13 <p>Device identifiers and serial numbers</p><p>Ex: H3SNPUHYEE7JD3H, 33778376</p> <ul><li>Philter can identify alphanumeric identifiers.</li></ul> 14 <p>Web Uniform Resource Locators (URLs)</p><p>Ex: myhomepage.com, http://myhomepage.com/folder/page.html, www.myhomepage.com/folder/page.html</p> <ul><li>Philter can identify URLs adhering to the URL naming standard.</li></ul> 15 <p>Internet Protocol (IP) address numbers</p><p>Ex: 127.0.0.1, 192.168.3.58, 2001:0db8:85a3:0000:0000:8a2e:0370:7334</p> <ul><li>Philter can identify IPv4 and IPv6 addresses.</li></ul> 16 Biometric identifiers, including finger, retinal and voice prints <ul><li>Not applicable \u2013 Philter only identifies PHI in text.</li></ul> 17 Full face photographic images and any comparable images <ul><li>Not applicable \u2013 Philter only identifies PHI in text.</li></ul> 18 <p>Any other unique identifying number, characteristic, or code except the unique code assigned by the investigator to code the data</p><p>Ex: 86637729, AB473-6021, 473-6AB021</p> <ul><li>Philter can identify alphanumeric identifiers.</li></ul>"},{"location":"deidentification/redaction-and-masking/","title":"Redaction and Masking","text":"<p>Redaction and masking are two methods of de-identification that are often used interchangeably. The term redaction refers to removing a sensitive value from a document. When we hear the term redaction we often think of an image of a document with black bars across pieces of the text.</p> <p>Masking is similar to redaction but allows for configuring how the sensitive value is removed. The most common example is using asterisks (i.e. ******) in place of a sensitive value.</p>"},{"location":"deidentification/replacement/","title":"Replacement","text":"<p>Replacement is a method of de-identification that simply replaces a sensitive value with another value. Replacement is useful when the sensitive value is not needed once the document has been de-identified. Philter can replace a sensitive value with a preset value or with a random value.</p> <p>In Philter's filter strategies, replacement is achieved by using the strategy to <code>REDACT</code>, <code>STATIC_REPLACE</code> , or <code>RANDOM_REPLACE</code> .</p>"},{"location":"other_features/alerts/","title":"Alerts","text":"<p>Phileas can optionally generate alerts when a particular type of sensitive information is identified.</p>"},{"location":"other_features/alerts/#alert-conditions","title":"Alert Conditions","text":"<p>In a policy, each type of sensitive information can have zero or more filter strategies. Each filter strategy can optionally have a condition associated with it. When a condition is present, the filter strategy will only be satisfied when the condition is satisfied. For example, a condition may be created to only filter phone numbers that start with the digits <code>123</code> or only filter names that start with <code>John</code>. Filter strategy conditions give you granular control over the filtering process.</p> <p>When a filter strategy condition is satisfied, Phileas can optionally generate an alert. This feature allows you to be notified when a particular type of sensitive information is identified.</p>"},{"location":"other_features/alerts/#enabling-alerts","title":"Enabling Alerts","text":"<p>Alerts are enabled on a per-condition basis. For instance, given the following policy to identify email addresses, a condition has been added to only match the email address <code>test@test.com</code>. Because of the property <code>alert</code> set to <code>true</code>, an alert will be generated when this condition is satisfied. By default, the alert property is set to <code>false</code> disabling alerts for the condition.</p> <pre><code>{\n  \"name\": \"email-address-alert\",\n  \"identifiers\": {\n    \"emailAddress\": {\n      \"emailAddressFilterStrategies\": [\n        {\n          \"id\": \"my-email-strategy\",\n          \"strategy\": \"REDACT\",\n          \"redactionFormat\": \"{{{REDACTED-%t}}}\",\n          \"condition\": \"token == \\\"test@test.com\\\"\",\n          \"alert\": true\n        }\n      ]\n    }\n  }\n}\n</code></pre>"},{"location":"other_features/alerts/#structure-of-an-alert","title":"Structure of an Alert","text":"<p>An alert contains the following information:</p> Property Name Description <code>id</code> A unique ID for the alert formatted as an UUID. <code>filterProfile</code> The name of the policy triggering the alert. <code>strategyId</code> The ID of the filter strategy triggering the alert. In the example above the <code>id</code> would be <code>my-email-strategy</code>. <code>context</code> The context. <code>documentId</code> The ID of the document which triggered the alert. <code>filterType</code> The filter type (\"email-address\", \"credit-card\", etc.) triggering the alert. <code>date</code> A timestamp when the alert was generated formatted as <code>yyyy-MM-dd'T'HH:mm:ss.SSS'Z'</code>."},{"location":"other_features/alerts/#retrieving-and-deleting-alerts","title":"Retrieving and Deleting Alerts","text":"<p>The alerts that Phileas has generated are available through Phileas' alerts API. This API allows for retrieving and deleting alerts. Using this API you can build sophisticated notification systems around Phileas' capabilities.</p>"},{"location":"other_features/anonymization/","title":"Consistent Anonymization","text":"<p>Anonymization in the context of Phileas is the process of replacing certain values with random but similar values. For example, the identified name of \u201cJohn Smith\u201d may be replaced with \u201cDavid Jones\u201d, or an identified phone number of 123-555-9358 may be replaced by 842-436-2042. A VIN number will be replaced by a 17 character randomly selected VIN number that adheres to the standard for VIN numbers.</p> <p>Anonymization is useful in instances where you want to remove sensitive information from text without changing the meaning of the text. Anonymization can be enabled for each type of sensitive information in the policy by setting the filter strategy to <code>RANDOM_REPLACE</code>. (See Policies for more information.)</p>"},{"location":"other_features/anonymization/#consistent-anonymization_1","title":"Consistent Anonymization","text":"<p>Consistent anonymization refers to the process of always anonymizing the same sensitive information with the same replacement values. For example, if the name \"John Smith\" is randomly replaced with \"Pete Baker\", all other occurrences of \"John Smith\" will also be replaced by \"Pete Baker.\"</p> <p>Consistent anonymization can be done on the document level or on the context level. When enabled on the document level, \"John Smith\" will only be replaced by \"Pete Baker\" in the same document. If \"John Smith\" occurs in a separate document it will be anonymized with a different random name. When enabled on the context level, \"John Smith\" will be replaced by \"Pete Baker\" whenever \"John Smith\" is found in all documents in the same context.</p> <p>Enabling consistent anonymization on the context level requires a cache to store the sensitive information and the corresponding replacement values. If a single instance of Phileas is running, its internal cache service (enabled by default) is the best choice and no additional configuration is required.</p> <p>If multiple instances of Phileas are deployed together, Phileas requires access to a Redis cache service as shown below. See Phileas' Settings on how to configure the cache.</p> <p>When Phileas is deployed in a cluster, a Redis cache is required to enable consistent anonymization.</p> <p>The anonymization cache will contain PHI. It is important that you take the necessary precautions to secure the cache and all communication to and from the cache.</p>"},{"location":"other_features/span_disambiguation/","title":"Span Disambiguation","text":"<p>Span disambiguation is an optional feature in Phileas that is disabled by default. Refer to Phileas' Settings to enable and configure span disambiguation.</p> <p>In Phileas, a span is a piece of the input text that Phileas has identified as sensitive information. A span has a start and end positions, a confidence, a type, and other attributes. Ideally, each piece of identified sensitive information will only have a single span associated with it. In this case, the type of sensitive information is unambiguous. The goal of span disambiguation is provide more accurate filtering by removing the potential ambiguities in the types of sensitive information for duplicate spans.</p> <p>However, sometimes a piece of text can be identified by multiple spans, each having a different type of sensitive information. In an example hypothetical scenario, let's say given the input text <code>My SSN is 123456789.</code> , Phileas identifies <code>123456789</code> as an SSN and as a phone number. This type of scenario can be quite common, and its likelihood increases as the number of enabled filters in a policy increase.</p>"},{"location":"other_features/span_disambiguation/#how-phileas-span-disambiguation-works","title":"How Phileas' Span Disambiguation Works","text":"<p>When we read the sentence <code>My SSN is 123456789.</code> we can tell the span in question should be identified as an SSN because we can look at the text surrounding the span. We use the surrounding words to deduce the correct type of sensitive information for <code>123456789</code>.</p> <p>That is exactly how Phileas' span disambiguation works. When presented with identical spans differing only by the type of sensitive information, Phileas looks at the text surrounding the span in question in combination with the previous spans it has seen in the same context to determine which type of sensitive information is most likely to be correct. Phileas then removes the ambiguous spans from the results and replaces them with a single span.</p>"},{"location":"other_features/span_disambiguation/#improves-over-time","title":"Improves Over Time","text":"<p>Because Phileas is able to consider previously seen text to make its decision concerning ambiguous spans, Phileas' span disambiguation gets \"smarter\" as more text is filtered. This is because Phileas will have more text to consider in its calculations.</p>"},{"location":"other_features/span_disambiguation/#more-details","title":"More Details","text":""},{"location":"other_features/span_disambiguation/#span-disambiguation-and-confidence-values","title":"Span Disambiguation and Confidence Values","text":"<p>Span disambiguation is only invoked for spans that differ only by the type of sensitive information. This means the span's location (start and end positions), confidence, and all other values must match. If two spans have identical locations but have different confidence values, span disambiguation will not be applied and the span having the highest confidence will be used.</p>"},{"location":"other_features/span_disambiguation/#cache-service","title":"Cache Service","text":"<p>When multiple application using Phileas are deployed alongside each other behind a load balancer, Phileas' cache service should be configured and enabled. Phileas will store the information needed to disambiguate spans in the cache such that the information is available to each instance of Phileas. If only a single instance of Phileas is running then the cache service is not required, however, the information needed to disambiguate spans will be stored in memory and will be lost when Phileas is stopped or restarted. Because of this, we recommend the cache service always be used unless there is a specific reason not to.</p>"},{"location":"other_features/span_disambiguation/#fine-tuning-the-span-disambiguation","title":"Fine-Tuning the Span Disambiguation","text":"<p>There are properties available to fine-tune how the span disambiguation operates. These properties are not documented because improper use of the properties could have a negative impact on performance. We will be glad to walk through these properties upon request.</p>"},{"location":"policies/document_analysis/","title":"Document Analysis","text":"<p>Philter analyzes received documents prior to redacting the document. This analysis is done to help Philter get a better understanding of the document. The results of the analysis are used to exclude certain document types from redaction and to improve Philter's redaction performance.</p> <p>While not recommended, the automatic document analysis can be disabled in a policy. By default, document analysis is enabled.</p> <p>Disabling document analysis will cause any policy features dependent on the results of the document analysis to not function. {style=\"warning\"}</p> <p>An example policy with disabled document analysis is shown below.</p> <pre><code>{\n  \"name\": \"email-and-phone-numbers\",\n  \"config\": {\n    \"analysis\": {\n      \"enabled\": false\n    }\n  },\n  \"identifiers\": {\n    \"emailAddress\": {\n      \"emailAddressFilterStrategies\": [\n        {\n          \"strategy\": \"REDACT\",\n          \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n        }\n      ]\n    }\n  }\n}\n</code></pre>"},{"location":"policies/excluding_by_document_type/","title":"Excluding by Document Type","text":"<p>Philter can automatically detect certain types of documents and exclude those documents from redaction of certain sensitive information. For example, you want to redact SSN/TINs in all but one type of document.</p> <p>To exclude a document type from a specific filter, set the <code>excludeDocumentTypes</code> value to a list of document types to exclude for a filter strategy. Filter strategies for all filter types support the <code>excludeDocumentTypes</code> property.</p> <p>An example to exclude email addresses from being redacted in a subpoena document is given below:</p> <pre><code>{\n   \"name\": \"email-address\",\n   \"identifiers\": {\n      \"emailAddress\": {\n         \"emailAddressFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\",\n               \"excludeDocumentTypes\": [\"SUBPOENA\"]\n            }\n         ]\n      }\n   }\n}\n</code></pre> <p>In this example, email addresses are redacted in all document types except documents Philter identifies as being subpoena documents.</p>"},{"location":"policies/excluding_by_document_type/#document-types-supported-by-automatic-detection","title":"Document Types Supported by Automatic Detection","text":"<p>Philter currently supports automatically detecting the following document types.</p> Document Type Document Description Subpoena Form 2540 Federal Bankruptcy - SUBPOENA FOR RULE 2004 EXAMINATION Subpoena Form 2550 - Federal Bankruptcy - SUBPOENA TO APPEAR AND TESTIFY Subpoena Form 2560 - Federal Bankruptcy - SUBPOENA TO TESTIFY AT A DEPOSITION Subpoena Form 2570 - Federal Bankruptcy - SUBPOENA TO PRODUCE DOCUMENTS Subpoena AO 88 - SUBPOENA TO APPEAR AND TESTIFY AT A HEARING OR TRIAL IN A CIVIL ACTION Subpoena AO 88A - SUBPOENA TO TESTIFY AT A DEPOSITION IN A CIVIL ACTION Subpoena AO 88B - SUBPOENA TO PRODUCE DOCUMENTS, INFORMATION, OR OBJECTS Subpoena AO 89 - SUBPOENA TO TESTIFY AT A HEARING OR TRIAL IN A CRIMINAL CASE Subpoena AO 90 - SUBPOENA TO TESTIFY AT A DEPOSITION IN A CRIMINAL CASE Subpoena AO 110 - SUBPOENA TO TESTIFY BEFORE A GRAND JURY"},{"location":"policies/filter_policies/","title":"Filter Policies","text":"<p>The types of sensitive information identified by Phileas and how that information is de-identified are controlled through policies. A policy is a file stored under Phileas\u2019s <code>policies</code> directory, which by default is located at <code>/opt/Phileas/policies/</code>. You can have an unlimited number of policies.</p> <p>Each policy has a <code>name</code> that is used by Phileas to apply the appropriate de-identification methods. The <code>name</code> is passed to Phileas\u2019s API along with the text to be filtered when submitting text to Phileas. This provides flexibility and allows you to de-identify different types of documents in differing manners with a single instance of Phileas. For example, you may have a policy for bankruptcy documents and a separate policy for financial documents.</p> <p>There are sample policies available for immediate use or customization to fit your use-cases.</p>"},{"location":"policies/filter_policies/#the-structure-of-a-policy","title":"The Structure of a Policy","text":"<p>A policy:</p> <ul> <li>Must have a <code>name</code> that uniquely identifies it.</li> <li>Must have a list of <code>identifiers</code> that are filters for sensitive information.<ul> <li>Each <code>identifier</code> , or filter, can have zero or more filter strategies. A filter strategy tells Phileas how to manipulate that type of sensitive information when it is identified.</li> </ul> </li> <li>Can have an optional list of terms or patterns.</li> <li>Can have encryption keys to support encryption of sensitive information.</li> </ul>"},{"location":"policies/filter_policies/#an-example-policy","title":"An Example Policy","text":"<p>The following is an example policy. In the example below you can see the types of sensitive information that are enabled and the strategy for manipulating each type when found. This policy identifies email addresses and phone numbers and redacts each with the format given.</p> <pre><code>{\n   \"name\": \"email-and-phone-numbers\",\n   \"identifiers\": {\n      \"emailAddress\": {\n         \"emailAddressFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      },\n      \"phoneNumber\": {\n         \"phoneNumberFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n</code></pre> <p>When an email address is identified by this policy, the email address is replaced with the text <code>{{{REDACTED-email-address}}}</code>. The <code>%t</code> gets replaced by the type of the filter. Likewise, when a phone number is found it is replaced with the text <code>{{{REDACTED-phone-number}}}</code>. You are free to change the redaction formats to whatever fits your use-case. See Filter Strategies for all replacement options.</p> <p>The name of the policy is <code>email-and-phone-numbers</code>. Policies can be named anything you like but their names must be unique from all other policies. As a best practice, the policy should be saved as <code>[name].json</code>, e.g. <code>email-and-phone-numbers.json</code>.</p>"},{"location":"policies/filter_policies/#applying-a-policy-to-text","title":"Applying a Policy to Text","text":"<p>To use this policy we will save it as <code>/opt/Phileas/profiles/email-and-phone-numbers.json</code>. We must restart Phileas for the new profile to be available for use. To apply the policy we will pass the policy's name to Phileas when making a filter request, as shown in the example request below.</p> <pre><code>curl -k -X POST \"https://localhost:8080/api/filter?c=context&amp;p=email-and-phone-numbers\" \\\n  -d @file.txt -H Content-Type \"text/plain\"\n</code></pre> <p>In this command, we have provided the parameter <code>p</code> along with a value that is the name of the policy we want to use for this request. If we had multiple policies in Phileas we could choose a different policy for this request simply by changing the name given to the parameter <code>p</code>. For more details see Phileas\u2019s API.</p> <p>Phileas will process the contents of <code>file.txt</code> by applying the policy named <code>email-and-phone-numbers</code>. As we saw in the policy above, this policy redacts email addresses and phone numbers. Phileas will return the redacted text in response to the API call.</p> <p>To manipulate the sensitive information by methods other than redaction, see the Filter Strategies.</p>"},{"location":"policies/filter_strategies/","title":"Filter Strategies","text":"<p>A filter strategy defines how sensitive information identified by Phileas should be manipulated, whether it is redacted, replaced, encrypted, or manipulated in some other fashion.</p> <p>In a policy, you list the types of sensitive information that should be filtered. How Phileas replaces each type of sensitive information is specific to each type. For instance, zip codes can be truncated based on the leading digits or zip code population while phone numbers are redacted. These replacements are performed by \"filter strategies.\"</p> <p>Each filter can have one or more filter strategies and conditions can be used to determine when to apply each filter strategy.</p> <p>A sample policy containing a filter strategy is shown below. In this example, email addresses will be redacted.</p> <pre><code>{\n   \"name\": \"email-address\",\n   \"identifiers\": {\n      \"emailAddress\": {\n         \"emailAddressFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n</code></pre> <p>Most of the filter strategies apply to all types of data, however, some filter strategies only apply to a few types. For example, the <code>TRUNCATE</code> filter strategy only applies to a zip code filter.</p>"},{"location":"policies/filter_strategies/#filter-strategies_1","title":"Filter Strategies","text":"<p>The filter strategies are described below. Each filter type can specify zero or more filter strategies. When no filter strategies are given, Phileas will default to <code>REDACT</code> for that filter type. When multiple filter strategies are given for a single filter type, the filter strategies will be applied in order as they are listed in the policy, top to bottom.</p> <ul> <li><code>REDACT</code></li> <li><code>CRYPTO_REPLACE</code>(AES encryption)</li> <li><code>HASH_SHA256_REPLACE</code>(SHA512 encryption)</li> <li><code>FPE_ENCRYPT_REPLACE</code>(Format-preserving encryption)</li> <li><code>RANDOM_REPLACE</code></li> <li><code>STATIC_REPLACE</code></li> <li><code>TRUNCATE</code></li> <li><code>ZERO_LEADING</code></li> </ul>"},{"location":"policies/filter_strategies/#the-redact-filter-strategy","title":"The <code>REDACT</code> Filter Strategy","text":"<p>The REDACT filter strategy replaces sensitive information with a given redaction format. You can put variables in the redaction format that Phileas will replace when performing the redaction.</p> <p>The available redaction variables are:</p> Redaction Variable Description <code>%t</code> Will be replaced with the type of sensitive information. This is to allow you to know the type of sensitive information that was identified and redacted. <code>%l</code> Will be replaced by the given classification for the type of sensitive information. <code>%v</code> Will be replaced by the original value of the sensitive text. With <code>%v</code> you can annotate sensitive information instead of masking or removing it. <p>To redact sensitive information by replacing it with the type of sensitive information, the redaction format would be <code>REDACTED-%t</code>.</p> <p>An example filter using the <code>REDACT</code> filter strategy:</p> <pre><code>{\n   \"name\": \"email-address\",\n   \"identifiers\": {\n      \"emailAddress\": {\n         \"emailAddressFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/filter_strategies/#the-crypto_replace-filter-strategy-idcrypto","title":"The <code>CRYPTO_REPLACE</code> Filter Strategy {id=\"crypto\"}","text":"<p>The <code>CRYPTO_REPLACE</code> filter strategy replaces each identified piece of sensitive information by encrypting it using the AES encryption algorithm. To use this filter strategy, the policy must include the details of the encryption key as shown below:</p> <pre><code>{\n   \"name\":\"sample-profile\",\n   \"crypto\": {\n     \"key\": \"....\",\n     \"iv\": \"....\"\n   },\n   ...\n</code></pre> <p>In the snippet of a policy shown above, a crypto element is is defined with a <code>key</code> and an initialization vector (<code>iv</code>). These two items are required to encrypt the sensitive information. To generate a key, run the following command:</p> <pre><code>openssl enc -e -aes-256-cbc -a -salt -P\n</code></pre> <p>You will be prompted to enter an encryption password. Once entered, the values of the <code>key</code> and <code>iv</code> will be shown. Copy and paste those values into the policy.</p> <p>An example policy using the <code>CRYPTO_REPLACE</code> filter strategy:</p> <pre><code>{\n   \"name\": \"email-address\",\n   \"crypto\": {\n     \"key\": \"....\",\n     \"iv\": \"....\"\n   },\n   \"identifiers\": {\n      \"emailAddress\": {\n         \"emailAddressFilterStrategies\": [\n            {\n               \"strategy\": \"CRYPTO_REPLACE\"\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/filter_strategies/#the-hash_sha256_replace-filter-strategy-idhash","title":"The <code>HASH_SHA256_REPLACE</code> Filter Strategy {id=\"hash\"}","text":"<p>The <code>HASH_SHA256_REPLACE</code> filter strategy replaces sensitive information with the SHA256 hash value of the sensitive information. To append a random salt value to each value prior to hashing, set the <code>salt</code> property to <code>true</code>. The salt value used will be returned in the <code>explain</code> response from Phileas' API.</p> <p>An example policy using the <code>HASH_SHA256_REPLACE</code> filter strategy:</p> <pre><code>{\n   \"name\": \"email-address\",\n   \"identifiers\": {\n      \"emailAddress\": {\n         \"emailAddressFilterStrategies\": [\n            {\n               \"strategy\": \"HASH_SHA256_REPLACE\"\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/filter_strategies/#the-fpe_encrypt_replace-filter-strategy-idfpe","title":"The FPE_ENCRYPT_REPLACE Filter Strategy {id=\"fpe\"}","text":"<p>The <code>FPE_ENCRYPT_REPLACE</code> filter strategy uses format-preserving encryption (FPE) to encrypt the sensitive information. Phileas uses the FF3-1 algorithm for format-preserving encryption. The FPE_ENCRYPT_REPLACE filter strategy requires a <code>key</code> and a <code>tweak</code> value. These values control the format-preserving encryption. For more information on these values and format-preserving encryption, refer to the resources below:</p> <ul> <li>https://nvlpubs.nist.gov/nistpubs/SpecialPublications/NIST.SP.800-38Gr1-draft.pdf</li> <li>https://nvlpubs.nist.gov/nistpubs/specialpublications/nist.sp.800-38g.pdf</li> </ul> <p>An example policy using the FPE_ENCRYPT_REPLACE filter strategy:</p> <pre><code>{\n   \"name\": \"credit-cards\",\n   \"identifiers\": {\n      \"creditCardNumbers\": {\n         \"creditCardNumbersFilterStrategies\": [\n            {\n               \"strategy\": \"FPE_ENCRYPT_REPLACE\",\n               \"key\": \"...\",\n               \"tweak\": \"...\"\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/filter_strategies/#the-random_replace-filter-strategy-idrandom","title":"The <code>RANDOM_REPLACE</code> Filter Strategy {id=\"random\"}","text":"<p>Replaces the identified text with a fake value but of the same type. For example, an SSN will be replaced by a random text having the format <code>###-##-####</code>, such as 123-45-6789. An email address will be replaced with a randomly generated email address. Available to all filter types.</p> <p>An example policy using the <code>RANDOM_REPLACE</code> filter strategy:</p> <pre><code>{\n   \"name\": \"email-address\",\n   \"identifiers\": {\n      \"emailAddress\": {\n         \"emailAddressFilterStrategies\": [\n            {\n               \"strategy\": \"RANDOM_REPLACE\"\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/filter_strategies/#the-static_replace-filter-strategy-idstatic","title":"The <code>STATIC_REPLACE</code> Filter Strategy {id=\"static\"}","text":"<p>Replaces the identified text with a given static value. Available to all filter types.</p> <p>An example policy using the <code>STATIC_REPLACE</code> filter strategy:</p> <pre><code>{\n   \"name\": \"email-address\",\n   \"identifiers\": {\n      \"emailAddress\": {\n         \"emailAddressFilterStrategies\": [\n            {\n               \"strategy\": \"STATIC_REPLACE\",\n               \"staticReplacement\": \"some new value\"\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/filter_strategies/#the-truncate-filter-strategy-idtruncate","title":"The <code>TRUNCATE</code> Filter Strategy {id=\"truncate\"}","text":"<p>Available only to zip codes, this strategy allows for truncating zip codes to only a select number of digits. Specify <code>truncateDigits</code> to set the desired number of leading digits to leave. For example, if <code>truncateDigits</code> is 2, the zip code 90210 will be truncated to <code>90***</code>. </p> <p>The TRUNCATE filter strategy is available only to the zip code filter. An example policy using the <code>TRUNCATE</code> filter strategy:</p> <pre><code>{\n   \"name\": \"zip-codes\",\n   \"identifiers\": {\n      \"zipCode\": {\n         \"zipCodeFilterStrategies\": [\n            {\n               \"strategy\": \"TRUNCATE\",\n               \"truncateDigits\": 3\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/filter_strategies/#the-zero_leading-filter-strategy-idzero_leading","title":"The <code>ZERO_LEADING</code> Filter Strategy {id=\"zero_leading\"}","text":"<p>Available only to zip codes, this strategy changes the first 3 digits of a zip code to be 0. For example, the zip code 90210 will be changed to 00010.</p> <p>The <code>ZERO_LEADING</code> filter strategy is only available to zip code filters. An example zip code filter using the <code>ZERO_LEADING</code> filter strategy:</p> <pre><code>{\n   \"name\": \"zip-codes\",\n   \"identifiers\": {\n      \"zipCodes\": {\n         \"zipCodeFilterStrategies\": [\n            {\n               \"strategy\": \"ZERO_LEADING\"\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/filter_strategies/#filter-strategy-conditions","title":"Filter Strategy Conditions","text":"<p>A replacement strategy can be applied based on the sensitive information meeting one or more conditions. For example, you can create a condition such that only dates of <code>11/05/2010</code> are replaced by using the condition <code>token == \"11/05/2010\"</code>. The conditions that can be applied vary based on the type of sensitive information. For instance, zip codes can have conditions based on their population. Refer to each specific filter type for the conditions available.</p> <p>The following is an example policy for credit cards that contains a condition to only redact credit card numbers that start with the digits <code>3000</code>:</p> <pre><code>{\n  \"name\": \"default\",\n  \"identifiers\": {\n    \"creditCard\": {\n      \"creditCardFilterStrategies\": [\n        {\n          \"condition\": \"token startswith \\\"3000\\\"\",\n          \"strategy\": \"REDACT\",\n          \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n        }\n      ]\n    }\n  }\n}\n</code></pre>"},{"location":"policies/filter_strategies/#combining-conditions","title":"Combining Conditions","text":"<p>Conditions can be joined through the use of the <code>and</code> keyword. When conditions are joined, each condition must be satisfied for the identified text to be filtered. If any of the conditions are not satisfied the identified text will not be filtered. Below is an example joined condition:</p> <pre><code>token != \"123-45-6789\" and context == \"my-context\"\n</code></pre> <p>This condition requires that the identified text (the token) not be equal to <code>123-45-6789</code> and the context be equal to <code>my-context</code>. Both of these conditions must be satisfied for the identified text to be filtered.</p> <p>Conversely, conditions can be <code>OR</code>'d through the use of multiple filter strategies. For example, if we want to <code>OR</code> a condition on the token and a condition on the context, we would use two filter strategies:</p> <pre><code>\"ssnFilterStrategies\": [\n  {\n    \"condition\": \"token != \\\"123-45-6789\\\"\",\n    \"strategy\": \"REDACT\",\n    \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n  },\n  {\n    \"condition\": \"context == \\\"my-context\\\"\",\n    \"strategy\": \"REDACT\",\n    \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n  }        \n]\n</code></pre>"},{"location":"policies/filters/","title":"Filters","text":"<p>A \"filter\" corresponds to a type of sensitive information. Phileas has filters for sensitive information such as names, addresses, ages, and lots of others.</p> <p>These are predefined filters that are ready to be used as well as custom filters that let you define your own Phileas to identify sensitive information outside of what the predefined filters can identify. An example of a custom filter is a filter to identify your patient account numbers, where the structure of an account number is specific to your organization.</p> <p>Each filter is capable of identifying and redacting a specific type of sensitive information. For example, there is a filter for phone numbers, a filter for US social security numbers, and a filter for person's names. You can enable any combination of these filters based on the types of sensitive information you need to redact.</p> <p>This section of the documentation describes the filters available in Phileas. The configuration options for each filter can vary due to the type of the sensitive information. For instance, only the zip code filter has a configuration to truncate the zip code.</p> <p>A selection of filters and their configurations is called a policy. A policy describes how to de-identify a document.</p>"},{"location":"policies/filters/#predefined-filters","title":"Predefined Filters","text":""},{"location":"policies/filters/#persons-names","title":"Person's Names","text":"<p>Phileas uses several methods to identify person's names.</p> Type Description First Names Identifies common first names Surnames Identifies common surnames Person's Names (NER) Identifies full names using natural language processing analysis Physician's Names (NER) Identifies physican names using natural language processing analysis"},{"location":"policies/filters/#other-filters","title":"Other Filters","text":"Type Description Ages Identifies ages such as <code>3.5 years old</code> Bank Routing Numbers Identifies bank routing numbers Bitcoin Addresses Identifies Bitcoin addresses such as <code>127NVqnjf8gB9BFAW2dnQeM6wqmy1gbGtv</code> Cities Identifies common cities Counties Identifies common counties Credit Card Numbers Identifies VISA, American Express, MasterCard, and Discover credit card numbers Dates Identifies dates in many formats such as May 22, 1999 Driver's License Numbers Identifies driver's license numbers for all 50 US states Email Addresses Identifies email addresses Hospitals Identifies common hospital names Hospital Abreviations Identifies common hospitals by their name abbreviations IBAN Codes Identifies international bank account numbers IP Addresses Identifies IPv4 and IPv6 addresses MAC Addresses Identifies network MAC addresses Passport Numbers Identifies US passport numbers Phone Numbers Identifies phone numbers Phone Number Extensions Identifies phone numbers Sections Identifies sections in text denoted by SSNs and TINs Identifies US SSNs and TINs States Identifies US state names State Abbreviations Identifies US state names by their abbreviations Tracking Numbers Identifies UPS, FedEx, and USPS tracking numbers URLs Identifies URLs VINs Identifies vehicle identification numbers Zip Codes Identifies US zip codes"},{"location":"policies/filters/#custom-filter-types-of-sensitive-information","title":"Custom Filter Types of Sensitive Information","text":"<p>In addition to the predefined types of sensitive information listed in the table above, you can also define your own types of sensitive information. Through custom identifiers and dictionaries, Phileas can identify many other types of information that may be sensitive in your use-case. For example, if you have patient identifiers that follow a pattern of <code>AA-00000</code> you can define a custom identifier for this sensitive information.</p> <p>Phileas can be configured to look identify sensitive information based on custom dictionaries. When a term in the dictionary is found in the text, Phileas will treat the term as sensitive information and apply the given filter strategy.</p> <p>Custom dictionaries support fuzziness to accommodate for misspellings. The replacement strategy for a custom dictionary has a <code>sensitivityLevel</code> that controls the amount of allowed fuzziness.</p> Type Description Custom Dictionaries Identifies sensitive information based on dictionary values. Custom Identifiers Identifies custom alphanumeric identifiers that may be used for medical record numbers, patient identifiers, account number, or other specific identifier."},{"location":"policies/ignoring_sensitive_information/","title":"Ignoring Sensitive Information","text":"<p>Phileas can optionally ignore a list of terms and prevent those terms from being redacted. For example, if the name <code>John Smith</code> is being redacted and you do not want it to be redacted, you can add <code>John Smith</code> to an ignore list. Each time Phileas identifies sensitive information it will check the ignore lists to see if the sensitive information is to be ignored.</p> <p>Phileas can ignore terms and patterns per-policy, meaning each policy can have its own unique list of terms or patterns to ignore.</p>"},{"location":"policies/ignoring_sensitive_information/#ignore-lists","title":"Ignore Lists","text":"<p>Ignore lists can be specified at the policy level and/or for each filter in the policy. When set for the policy, the list of ignored terms will be applied to all filter types. When set for a filter, the list of ignored terms will be applied only to that filter.</p>"},{"location":"policies/ignoring_sensitive_information/#ignore-list-for-a-policy","title":"Ignore List for a Policy","text":"<p>In the policy shown below, an ignore list is set at the level of the policy. The terms specified in the list will be ignored for all filter types enabled in the policy. Only the terms property is required. The <code>name</code> and <code>caseSensitive</code> properties are optional.</p> <pre><code>{\n   \"name\": \"example-policy\",\n   \"ignored\": [\n     {\n       \"name\": \"names to ignore\",\n       \"terms\": [\"john smith\", \"jane doe\"],\n       \"caseSensitive\": false\n     }\n   ],\n   \"identifiers\": {\n      \"emailAddress\": {\n         \"emailAddressFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n</code></pre> <p>Terms to be ignored at the policy level can also be read from one or more files located on the local file system. The file must be formatted as one term per line.</p> <pre><code>{\n   \"name\": \"example-policy\",\n   \"ignored\": [\n     {\n       \"name\": \"names to ignore\",\n       \"terms\": [\"john smith\", \"jane doe\"],\n       \"files\": [\"/tmp/names.txt\"]\n       \"caseSensitive\": false\n     }\n   ],   \n   \"identifiers\": {\n      \"emailAddress\": {\n         \"emailAddressFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/ignoring_sensitive_information/#ignore-list-for-a-filter","title":"Ignore List for a Filter","text":"<p>In the policy shown below, an ignore list is set at the level of a filter. The terms specified in the list will be ignored only for that filter type. Each filter in a policy can have its own list of ignored terms. The terms listed will be ignored case-sensitive, meaning, \"John\" will be ignored if \"John\" is an ignored term but will not be ignored if \"john\" is an ignored term.</p> <pre><code>{\n   \"name\": \"example-filter-profile\",\n   \"identifiers\": {\n      \"emailAddress\": {\n         \"ignored\": [\"john smith\", \"jane doe\"],\n         \"emailAddressFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/ignoring_sensitive_information/#ignoring-patterns","title":"Ignoring Patterns","text":"<p>Phileas can ignore information based on a regular expression pattern. An example use of this feature is to ignore terms that are present in your text but are dynamic, such as logged timestamps. When using the date filter these timestamps may be identified as being sensitive but you do not want them redacted. With an ignore pattern we can ignore the logged timestamps.</p>"},{"location":"policies/ignoring_sensitive_information/#ignore-patterns","title":"Ignore Patterns","text":"<p>Ignore patterns can be specified at the policy level and/or at the level of each type of filter. When set at the policy level, the list of ignored patterns will be applied to all filter types. When set for an individual filter, the list of ignored patterns will be applied only to that filter.</p>"},{"location":"policies/ignoring_sensitive_information/#ignore-patterns-for-a-policy","title":"Ignore Patterns for a Policy","text":"<p>In the policy shown below, ignore patterns are set at the level of the policy. The patterns specified in the list will be ignored for all filter types enabled in the policy.</p> <pre><code>{\n   \"name\": \"example-policy\",\n   \"ignoredPatterns\": [\n     {\n       \"name\": \"ignore-room-numbers\",\n       \"pattern\": \"Room [A-Z0-4]{4}\"\n     }\n   ],\n   \"identifiers\": {\n      \"emailAddress\": {\n         \"emailAddressFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/ignoring_sensitive_information/#ignore-patterns-for-a-filter","title":"Ignore Patterns for a Filter","text":"<p>In the policy shown below, ignore patterns are set at the level of a filter. The patterns specified in the list will be ignored only for that filter type. Each filter in a policy can have its own list of ignored patterns.</p> <pre><code>{\n   \"name\": \"example-policy\",\n   \"identifiers\": {\n      \"emailAddress\": {\n         \"ignoredPatterns\": [\n           {\n             \"name\": \"ignore-room-numbers\",\n             \"pattern\": \"Room [A-Z0-4]{4}\"\n           }\n         ],\n         \"emailAddressFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/sample_policies/","title":"Sample Policies","text":"<p>This page lists some sample policies. You can use these policies either as-is or as starting points for customizing them to meet your specific de-identification needs.</p> <p>These policies are examples and not an exhaustive list of all the sensitive information Phileas can identify. Items from each of these policies can be combined to make policies to meet your use-cases.</p>"},{"location":"policies/sample_policies/#email-addresses-and-phone-numbers","title":"Email Addresses and Phone Numbers","text":"<p>This policy finds email addresses and phone numbers and redacts them with <code>{{{REDACTED-email-address}}}</code> and <code>{{{REDACTED-phone-number}}}</code>, respectively.</p> <pre><code>{\n  \"name\": \"email-and-phone-numbers\",\n  \"identifiers\": {\n    \"emailAddress\": {\n      \"emailAddressFilterStrategies\": [\n        {\n          \"strategy\": \"REDACT\",\n          \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n        }\n      ]\n    },\n    \"phoneNumber\": {\n      \"phoneNumberFilterStrategies\": [\n        {\n          \"strategy\": \"REDACT\",\n          \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n        }\n      ]\n    }\n  }\n}\n</code></pre>"},{"location":"policies/sample_policies/#persons-names-and-ssns","title":"Persons Names and SSNs","text":"<p>This policy finds persons names and SSNs and redacts them with <code>{{{REDACTED-entity}}}</code> and <code>{{{REDACTED-ssn}}}</code>, respectively.</p> <pre><code>{\n  \"name\": \"persons-names-ssn\",\n  \"identifiers\": {\n    \"ner\": {\n      \"nerFilterStrategies\": [\n        {\n          \"strategy\": \"REDACT\",\n          \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n        }\n      ]\n    },\n    \"ssn\": {\n      \"ssnFilterStrategies\": [\n        {\n          \"strategy\": \"REDACT\",\n          \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n        }\n      ]\n    }\n  }\n}\n</code></pre>"},{"location":"policies/sample_policies/#dates-urls-and-vins","title":"Dates, URLs, and VINs","text":"<p>This policy finds dates, URLs, and VINs. Dates and URLs are redacted with <code>{{{REDACTED-date}}}</code> and <code>{{{REDACTED-url}}}</code>, respectively. Each VIN number are replaced by a randomly generated VIN number.</p> <pre><code>{\n  \"name\": \"dates-urls-vin\",\n  \"identifiers\": {\n    \"date\": {\n      \"dateFilterStrategies\": [\n        {\n          \"strategy\": \"REDACT\",\n          \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n        }\n      ]\n    },\n    \"url\": {\n      \"urlFilterStrategies\": [\n        {\n          \"strategy\": \"REDACT\",\n          \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n        }\n      ]\n    },\n    \"vin\": {\n      \"vinFilterStrategies\": [\n        {\n          \"strategy\": \"RANDOM_REPLACE\"\n        }\n      ]\n    }\n  }\n}\n</code></pre>"},{"location":"policies/sample_policies/#ip-addresses","title":"IP Addresses","text":"<p>This policy finds IP addresses and replaces each identified IP address with the static text <code>IP_ADDRESS</code> as long as the IP address is not <code>127.0.0.1</code>. (A condition on the filter strategy sets the IP address requirement.)</p> <pre><code>{\n  \"name\": \"ip-addresses\",\n  \"identifiers\": {\n    \"ipAddress\": {\n      \"ipAddressFilterStrategies\": [\n        {\n          \"strategy\": \"STATIC_REPLACE\",\n          \"redactionFormat\": \"IP_ADDRESS\",\n          \"condition\": \"token != \\\"127.0.0.1\\\"\"\n        }\n      ]\n    }\n  }\n}\n</code></pre>"},{"location":"policies/sample_policies/#zip-codes","title":"Zip Codes","text":"<p>This policy finds ZIP codes starting with <code>90</code> and truncates the zip code to just the first two digits.</p> <pre><code>{\n  \"name\": \"zip-codes\",\n  \"identifiers\": {\n    \"creditCard\": {\n      \"creditCardFilterStrategies\": [\n        {\n          \"condition\": \"token startswith \\\"90\\\"\",\n          \"strategy\": \"TRUNCATE\",\n          \"truncateDigits\": 2\n        }\n      ]\n    }\n  }\n}\n</code></pre>"},{"location":"policies/sample_policies/#enable-text-splitting","title":"Enable Text Splitting","text":"<p>This policy enables text splitting for input over 10,000 characters.</p> <pre><code>{\n  \"name\": \"default-split-enabled\",\n  \"config\": {\n    \"splitting\": {\n      \"enabled\": true,\n      \"threshold\": 10000,\n      \"method\": \"newline\"\n    }\n  },\n  \"identifiers\": {\n    \"ssn\": {\n      \"ssnFilterStrategies\": [\n        {\n          \"strategy\": \"REDACT\",\n          \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n        }\n      ]\n    }\n  }\n}\n</code></pre>"},{"location":"policies/sample_policies/#globally-ignored-terms","title":"Globally Ignored Terms","text":"<p>This policy has a list of globally ignored terms.</p> <pre><code>{\n  \"name\": \"default-global-ignore\",\n  \"ignored\": [\n    {\n      \"name\": \"ignored credit cards\",\n      \"terms\": [\"4111111111111111\", \"0000000000000000\"]\n    }\n  ],\n  \"identifiers\": {\n    \"creditCard\": {\n      \"creditCardFilterStrategies\": [\n        {\n          \"strategy\": \"REDACT\",\n          \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n        }\n      ]\n    }\n  }\n}\n</code></pre>"},{"location":"policies/sample_policies/#generating-alerts","title":"Generating Alerts","text":"<p>This policy generates an alert when a matching email address is identified.</p> <pre><code>{\n  \"name\": \"email-address-alert\",\n  \"identifiers\": {\n    \"emailAddress\": {\n      \"emailAddressFilterStrategies\": [\n        {\n          \"strategy\": \"REDACT\",\n          \"redactionFormat\": \"{{{REDACTED-%t}}}\",\n          \"condition\": \"token == \\\"test@test.com\\\"\",\n          \"alert\": true\n        }\n      ]\n    }\n  }\n}\n</code></pre>"},{"location":"policies/splitting_input_text/","title":"Splitting Input Text","text":"<p>On a per-policy basis, Philter can split input text to process each split individually. This can improve performance and allows for handling long input text. Splitting is disabled by default.</p> <p>An example split configuration in a policy is shown below</p> <pre><code>{\n  \"name\": \"default\",\n  \"identifiers\": {}, \n  \"config\": {\n    \"splitting\": {\n      \"enabled\": true,\n      \"threshold\": 10000,\n      \"method\": \"newline\"\n    }\n  }\n}\n</code></pre> <p>In this example policy, splitting is enabled for inputs greater than equal to 10,000 characters in length.</p> <p>The method of splitting the text will be the <code>newline</code> method. This method will cause Philter to split the text based on the locations of new line characters in the input text. Additional methods of text splitting may be added in future versions.</p> <p>Because the newline method splits text based on the locations of new line characters in the text, the text contained in the reassembled filter responses may not be an exact match of the input text. This is due to white space and other characters that may reside near the new line characters that get omitted during processing.</p>"},{"location":"policies/splitting_input_text/#text-splitting-policy-properties","title":"Text Splitting Policy Properties","text":"Property Description Allowed Values Default Value <code>enabled</code> Whether or not input texts are split. Whether or not input texts are split. When <code>false</code>, requests with text exceeding the threshold generate a <code>HTTP 413 PayloadTooLarge</code> error response. <code>true</code> or <code>false</code> <code>false</code> <code>threshold</code> When to split the input text. Set to <code>-1</code> to disable splitting. Any integer value. <code>10000</code> <code>method</code> How to split the text. <code>newline</code> <code>newline</code>"},{"location":"policies/splitting_input_text/#alternative-to-philter-splitting-text","title":"Alternative to Philter Splitting Text","text":"<p>In some cases it may be best to split your input text client side prior to sending the text to Philter. This gives you full control over how the text will be split and provides more predictable responses from Philter because you know how the text is split.</p> <p>An example of splitting text into chunks prior to sending the text to Philter is given in the commands below:</p> <pre><code># Given a large file called largefile.txt, split it into 10k pieces.\n$ split -b 10k largefile.txt segment\n\n# Now process the pieces.\n$ curl -s -X POST -k \"https://philter:8080/api/filter?d=document1\" --data \"@/tmp/segmentaa\" -H \"Content-type: text/plain\" &gt; out1\n$ curl -s -X POST -k \"https://philter:8080/api/filter?d=document1\" --data \"@/tmp/segmentab\" -H \"Content-type: text/plain\" &gt; out2\n\n# Now recombine the outputs into a single file.\n$ cat out1 out2 &gt; filtered.txt\n</code></pre>"},{"location":"policies/filters/common_filters/ages/","title":"Ages","text":""},{"location":"policies/filters/common_filters/ages/#filter","title":"Filter","text":"<p>This filter identifies ages such as <code>3.5 years old</code> in text.</p>"},{"location":"policies/filters/common_filters/ages/#required-parameters","title":"Required Parameters","text":"<p>This filter has no required parameters.</p>"},{"location":"policies/filters/common_filters/ages/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value <code>ageFilterStrategies</code> A list of filter strategies. None <code>enabled</code> When set to false, the filter will be disabled and not applied <code>true</code> <code>ignored</code> A list of terms to be ignored by the filter. None"},{"location":"policies/filters/common_filters/ages/#filter-strategies","title":"Filter Strategies","text":"<p>The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of <code>REDACT</code> is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.</p> Strategy Description <code>REDACT</code> Replace the sensitive text with a placeholder. <code>RANDOM_REPLACE</code> Replace the sensitive text with a similar, random value. <code>STATIC_REPLACE</code> Replace the sensitive text with a given value. <code>CRYPTO_REPLACE</code> Replace the sensitive text with its encrypted value. <code>HASH_SHA256_REPLACE</code> Replace the sensitive text with its SHA256 hash value."},{"location":"policies/filters/common_filters/ages/#conditions","title":"Conditions","text":"<p>Each filter strategy may have one condition. The filter will only be applied when the condition is satisfied. See Conditions for details.</p> Conditional Description Operators <code>TOKEN</code> Compares the value of the sensitive text. <code>==</code> , <code>!=</code> <code>CONTEXT</code> Compares the filtering context. <code>==</code> , <code>!=</code> <code>CONFIDENCE</code> Compares the confidence in the sensitive text against a threshold value. <code>&lt;</code> , <code>&lt;=</code>, <code>&gt;</code> , <code>&gt;=</code>, <code>==</code>, <code>!=</code>"},{"location":"policies/filters/common_filters/ages/#example-policy","title":"Example Policy","text":"<pre><code>{\n   \"name\": \"ages-example\",\n   \"identifiers\": {\n      \"age\": {\n         \"ageFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/filters/common_filters/bank-routing-numbers/","title":"Bank Routing Numbers","text":""},{"location":"policies/filters/common_filters/bank-routing-numbers/#filter","title":"Filter","text":"<p>This filter identifies bank routing numbers (ABA routing transit numbers) such as <code>111000025</code> in text. Identified routing numbers must pass checksum validation.</p>"},{"location":"policies/filters/common_filters/bank-routing-numbers/#required-parameters","title":"Required Parameters","text":"<p>This filter has no required parameters.</p>"},{"location":"policies/filters/common_filters/bank-routing-numbers/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value <code>bankRoutingNumberFilterStrategies</code> A list of filter strategies. None <code>enabled</code> When set to false, the filter will be disabled and not applied <code>true</code> <code>ignored</code> A list of terms to be ignored by the filter. None"},{"location":"policies/filters/common_filters/bank-routing-numbers/#filter-strategies","title":"Filter Strategies","text":"<p>The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of <code>REDACT</code> is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.</p> Strategy Description <code>REDACT</code> Replace the sensitive text with a placeholder. <code>RANDOM_REPLACE</code> Replace the sensitive text with a similar, random value. <code>STATIC_REPLACE</code> Replace the sensitive text with a given value. <code>CRYPTO_REPLACE</code> Replace the sensitive text with its encrypted value. <code>HASH_SHA256_REPLACE</code> Replace the sensitive text with its SHA256 hash value. <code>FPE_ENCRYPT_REPLACE</code> Replace the sensitive text with a value generated by format-preserving encryption (FPE)"},{"location":"policies/filters/common_filters/bank-routing-numbers/#conditions","title":"Conditions","text":"<p>Each filter strategy may have one condition. The filter will only be applied when the condition is satisfied. See Conditions for details.</p> Conditional Description Operators <code>TOKEN</code> Compares the value of the sensitive text. <code>==</code> , <code>!=</code> <code>CONTEXT</code> Compares the filtering context. <code>==</code> , <code>!=</code> <code>CONFIDENCE</code> Compares the confidence in the sensitive text against a threshold value. <code>&lt;</code> , <code>&lt;=</code>, <code>&gt;</code> , <code>&gt;=</code>, <code>==</code>, <code>!=</code>"},{"location":"policies/filters/common_filters/bank-routing-numbers/#example-policy","title":"Example Policy","text":"<pre><code>{\n   \"name\": \"bank-routing-number-example\",\n   \"identifiers\": {\n      \"bankRoutingNumber\": {\n         \"bankRoutingNumberFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/filters/common_filters/bitcoin-addresses/","title":"Bitcoin Addresses","text":""},{"location":"policies/filters/common_filters/bitcoin-addresses/#filter","title":"Filter","text":"<p>This filter identifies bitcoin addresses such as <code>1BvBMSEYstWetqTFn5Au4m4GFg7xJaNVN2</code> in text.</p>"},{"location":"policies/filters/common_filters/bitcoin-addresses/#required-parameters","title":"Required Parameters","text":"<p>This filter has no required parameters.</p>"},{"location":"policies/filters/common_filters/bitcoin-addresses/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value <code>bitcoinAddressFilterStrategies</code> A list of filter strategies. None <code>enabled</code> When set to false, the filter will be disabled and not applied <code>true</code> <code>ignored</code> A list of terms to be ignored by the filter. None"},{"location":"policies/filters/common_filters/bitcoin-addresses/#filter-strategies","title":"Filter Strategies","text":"<p>The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of <code>REDACT</code> is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.</p> Strategy Description <code>REDACT</code> Replace the sensitive text with a placeholder. <code>RANDOM_REPLACE</code> Replace the sensitive text with a similar, random value. <code>STATIC_REPLACE</code> Replace the sensitive text with a given value. <code>CRYPTO_REPLACE</code> Replace the sensitive text with its encrypted value. <code>HASH_SHA256_REPLACE</code> Replace the sensitive text with its SHA256 hash value. <code>FPE_ENCRYPT_REPLACE</code> Replace the sensitive text with a value generated by format-preserving encryption (FPE)"},{"location":"policies/filters/common_filters/bitcoin-addresses/#conditions","title":"Conditions","text":"<p>Each filter strategy may have one condition. See Conditions for details.</p> Conditional Description Operators <code>TOKEN</code> Compares the value of the sensitive text. <code>==</code> , <code>!=</code> <code>CONTEXT</code> Compares the filtering context. <code>==</code> , <code>!=</code> <code>CONFIDENCE</code> Compares the confidence in the sensitive text against a threshold value. <code>&lt;</code> , <code>&lt;=</code>, <code>&gt;</code> , <code>&gt;=</code>, <code>==</code>, <code>!=</code>"},{"location":"policies/filters/common_filters/bitcoin-addresses/#example-policy","title":"Example Policy","text":"<pre><code>{\n   \"name\": \"bitcoin-address-example\",\n   \"identifiers\": {\n      \"bitcoinAddress\": {\n         \"bitcoinAddressFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/filters/common_filters/creditcards/","title":"Credit Cards","text":""},{"location":"policies/filters/common_filters/creditcards/#filter","title":"Filter","text":"<p>This filter identifies credit cards such as <code>378282246310005</code> in text.</p>"},{"location":"policies/filters/common_filters/creditcards/#required-parameters","title":"Required Parameters","text":"<p>This filter has no required parameters.</p>"},{"location":"policies/filters/common_filters/creditcards/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value <code>creditCardFilterStrategies</code> A list of filter strategies. None <code>enabled</code> When set to false, the filter will be disabled and not applied <code>true</code> <code>ignored</code> A list of terms to be ignored by the filter. None <code>onlyValidCreditCardNumbers</code> When set to true, only valid credit card numbers will be filtered. <code>true</code> <code>ignoreWhenInUnixTimestamp</code> When set to true, only credit card numbers that do not match the pattern for a Unix timestamp will be filtered. <code>false</code>"},{"location":"policies/filters/common_filters/creditcards/#filter-strategies","title":"Filter Strategies","text":"<p>The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of <code>REDACT</code> is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.</p> Strategy Description <code>REDACT</code> Replace the sensitive text with a placeholder. <code>RANDOM_REPLACE</code> Replace the sensitive text with a similar, random value. <code>STATIC_REPLACE</code> Replace the sensitive text with a given value. <code>CRYPTO_REPLACE</code> Replace the sensitive text with its encrypted value. <code>HASH_SHA256_REPLACE</code> Replace the sensitive text with its SHA256 hash value. <code>FPE_ENCRYPT_REPLACE</code> Replace the sensitive text with a value generated by format-preserving encryption (FPE) <code>LAST_4</code> Replace the sensitive text with just the last four characters of the text."},{"location":"policies/filters/common_filters/creditcards/#conditions","title":"Conditions","text":"<p>Each filter strategy may have one condition. See Conditions for details.</p> Conditional Description Operators <code>TOKEN</code> Compares the value of the sensitive text. <code>==</code> , <code>!=</code> <code>CONTEXT</code> Compares the filtering context. <code>==</code> , <code>!=</code> <code>CONFIDENCE</code> Compares the confidence in the sensitive text against a threshold value. <code>&lt;</code> , <code>&lt;=</code>, <code>&gt;</code> , <code>&gt;=</code>, <code>==</code>, <code>!=</code>"},{"location":"policies/filters/common_filters/creditcards/#example-policy","title":"Example Policy","text":"<pre><code>{\n   \"name\": \"credit-cards-example\",\n   \"identifiers\": {\n      \"creditcard\": {\n         \"onlyValidCreditCardNumbers\": false,\n         \"creditCardFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/filters/common_filters/dates/","title":"Dates","text":""},{"location":"policies/filters/common_filters/dates/#filter","title":"Filter","text":"<p>This filter identifies dates such as <code>May 22, 2014</code> in text. The supported date formats are:</p> Format Example yyyy-MM-d 2020-05-10 MM-dd-yyyy 05-10-2020 M-d-y 5-10-2020 MMM dd May 5 or May 05 MMMM dd, yyyy May 5, 2020 or May 5 2020"},{"location":"policies/filters/common_filters/dates/#required-parameters","title":"Required Parameters","text":"<p>This filter has no required parameters.</p>"},{"location":"policies/filters/common_filters/dates/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value <code>dateFilterStrategies</code> A list of filter strategies. None <code>enabled</code> When set to false, the filter will be disabled and not applied <code>true</code> <code>ignored</code> A list of terms to be ignored by the filter. None <code>onlyValidDates</code> When set to true, only valid dates will be filtered. <code>false</code>"},{"location":"policies/filters/common_filters/dates/#filter-strategies","title":"Filter Strategies","text":"<p>The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of <code>REDACT</code> is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.</p> Strategy Description <code>REDACT</code> Replace the sensitive text with a placeholder. <code>RANDOM_REPLACE</code> Replace the sensitive text with a similar, random value. <code>STATIC_REPLACE</code> Replace the sensitive text with a given value. <code>CRYPTO_REPLACE</code> Replace the sensitive text with its encrypted value. <code>HASH_SHA256_REPLACE</code> Replace the sensitive text with its SHA256 hash value. <code>SHIFT</code> Shift the date by a number of months, days, and/or years. <code>SHIFTRANDOM</code> Shift the data by a random number of months, days, and years. <code>RELATIVE</code> Replace the date by a words relative to the date."},{"location":"policies/filters/common_filters/dates/#filter-strategy-options","title":"Filter Strategy Options","text":"<p>The following filter strategy options are available for the <code>RELATIVE</code> filter strategy.</p> Description Default Value <code>futureDates</code> When <code>true</code>, future dates are replaced by relative words. When <code>false</code>, future dates are redacted. <code>false</code> <p>The following filter strategy options are available for the <code>SHIFT</code> filter strategy.</p> Option Description Default Value <code>shiftDays</code> The number of days to shift the date. Can be a negative or positive integer. Defaults to <code>0</code> if not specified. <code>0</code> <code>shiftMinutes</code> The number of minutes to shift the date. Can be a negative or positive integer. Defaults to <code>0</code> if not specified. <code>0</code> <code>shiftYears</code> The number of years to shift the date. Can be a negative or positive integer. Defaults to <code>0</code> if not specified. <code>0</code>"},{"location":"policies/filters/common_filters/dates/#conditions","title":"Conditions","text":"<p>Each filter strategy may have one condition. See Conditions for details.</p> Conditional Description Operators <code>TOKEN</code> Compares the value of the sensitive text. <code>==</code> , <code>!=</code> <code>TOKEN</code> Compares the sensitive text to some category, e.g. <code>birthdate</code>. <code>is</code> <code>CONTEXT</code> Compares the filtering context. <code>==</code> , <code>!=</code> <code>CONFIDENCE</code> Compares the confidence in the sensitive text against a threshold value. <code>&lt;</code> , <code>&lt;=</code>, <code>&gt;</code> , <code>&gt;=</code>, <code>==</code>, <code>!=</code>"},{"location":"policies/filters/common_filters/dates/#differentiating-between-dates-and-birth-dates","title":"Differentiating Between Dates and Birth Dates","text":"<p>In some cases it may be necessary to redact birth dates and dates differently. Using conditions it is possible to determine if an identified date is a birth date. The conditional <code>token is birthdate</code> will determine if the identified date (token) is a birth date by analyzing the content surrounding the date.</p>"},{"location":"policies/filters/common_filters/dates/#example-policy-to-redact-dates","title":"Example Policy to Redact Dates","text":"<p>The following policy redacts dates.</p> <pre><code>{\n   \"name\": \"dates-example\",\n   \"identifiers\": {\n      \"date\": {\n         \"onlyValidDates\": false,\n         \"dateFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/filters/common_filters/dates/#example-policy-to-shift-dates","title":"Example Policy to Shift Dates","text":"<p>The following policy to shift dates forward by 2 days and 4 months.</p> <pre><code>{\n   \"name\": \"dates-example\",\n   \"identifiers\": {\n      \"date\": {\n         \"onlyValidDates\": false,\n         \"dateFilterStrategies\": [\n            {\n               \"strategy\": \"SHIFT\",\n               \"shiftDays\": 2,\n               \"shiftMonths\": 4,\n               \"shiftYears\": 0\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/filters/common_filters/drivers-license-numbers/","title":"Driver's License Numbers","text":""},{"location":"policies/filters/common_filters/drivers-license-numbers/#filter","title":"Filter","text":"<p>This filter identifies driver's license numbers such as 194784357 in text. Driver's license number formats for all 50 US states are supported.</p>"},{"location":"policies/filters/common_filters/drivers-license-numbers/#required-parameters","title":"Required Parameters","text":"<p>This filter has no required parameters.</p>"},{"location":"policies/filters/common_filters/drivers-license-numbers/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value <code>driversLicenseFilterStrategies</code> A list of filter strategies. None <code>enabled</code> When set to false, the filter will be disabled and not applied <code>true</code> <code>ignored</code> A list of terms to be ignored by the filter. None"},{"location":"policies/filters/common_filters/drivers-license-numbers/#filter-strategies","title":"Filter Strategies","text":"<p>The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of <code>REDACT</code> is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.</p> Strategy Description <code>REDACT</code> Replace the sensitive text with a placeholder. <code>RANDOM_REPLACE</code> Replace the sensitive text with a similar, random value. <code>STATIC_REPLACE</code> Replace the sensitive text with a given value. <code>CRYPTO_REPLACE</code> Replace the sensitive text with its encrypted value. <code>HASH_SHA256_REPLACE</code> Replace the sensitive text with its SHA256 hash value. <code>FPE_ENCRYPT_REPLACE</code> Replace the sensitive text with a value generated by format-preserving encryption (FPE)"},{"location":"policies/filters/common_filters/drivers-license-numbers/#conditions","title":"Conditions","text":"<p>Each filter strategy may have one condition. See Conditions for details.</p> Conditional Description Operators <code>TOKEN</code> Compares the value of the sensitive text. <code>==</code> , <code>!=</code> <code>CONTEXT</code> Compares the filtering context. <code>==</code> , <code>!=</code> <code>CONFIDENCE</code> Compares the confidence in the sensitive text against a threshold value. <code>&lt;</code> , <code>&lt;=</code>, <code>&gt;</code> , <code>&gt;=</code>, <code>==</code>, <code>!=</code>"},{"location":"policies/filters/common_filters/drivers-license-numbers/#example-policy","title":"Example Policy","text":"<pre><code>{\n   \"name\": \"drivers-license-example\",\n   \"identifiers\": {\n      \"driversLicense\": {\n         \"driversLicenseFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/filters/common_filters/email-addresses/","title":"Email Addresses","text":""},{"location":"policies/filters/common_filters/email-addresses/#filter","title":"Filter","text":"<p>This filter identifies email addresses such as <code>john.fake.address@hotmail.com</code> in text.</p>"},{"location":"policies/filters/common_filters/email-addresses/#required-parameters","title":"Required Parameters","text":"<p>This filter has no required parameters.</p>"},{"location":"policies/filters/common_filters/email-addresses/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value <code>emailAddressFilterStrategies</code> A list of filter strategies. None <code>enabled</code> When set to false, the filter will be disabled and not applied <code>true</code> <code>ignored</code> A list of terms to be ignored by the filter. None <code>onlyStrictMatches</code> When set to false, the pattern for identifying email addresses will be relaxed. Filtered email addresses will have a lower confidence, but filter performance will increase. <code>true</code> <code>onlyValidTLDs</code> When set to true, only email addresses that are for a top-level domain are filtered. <code>false</code>"},{"location":"policies/filters/common_filters/email-addresses/#filter-strategies","title":"Filter Strategies","text":"<p>The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of <code>REDACT</code> is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.</p> Strategy Description <code>REDACT</code> Replace the sensitive text with a placeholder. <code>RANDOM_REPLACE</code> Replace the sensitive text with a similar, random value. <code>STATIC_REPLACE</code> Replace the sensitive text with a given value. <code>CRYPTO_REPLACE</code> Replace the sensitive text with its encrypted value. <code>HASH_SHA256_REPLACE</code> Replace the sensitive text with its SHA256 hash value."},{"location":"policies/filters/common_filters/email-addresses/#conditions","title":"Conditions","text":"<p>Each filter strategy may have one condition. See Conditions for details.</p> Conditional Description Operators <code>TOKEN</code> Compares the value of the sensitive text. <code>==</code> , <code>!=</code> <code>CONTEXT</code> Compares the filtering context. <code>==</code> , <code>!=</code> <code>CONFIDENCE</code> Compares the confidence in the sensitive text against a threshold value. <code>&lt;</code> , <code>&lt;=</code>, <code>&gt;</code> , <code>&gt;=</code>, <code>==</code>, <code>!=</code>"},{"location":"policies/filters/common_filters/email-addresses/#example-policy","title":"Example Policy","text":"<pre><code>{\n   \"name\": \"email-address-example\",\n   \"identifiers\": {\n      \"emailAddress\": {\n         \"emailAddressFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/filters/common_filters/iban-codes/","title":"IBAN Codes","text":""},{"location":"policies/filters/common_filters/iban-codes/#filter","title":"Filter","text":"<p>This filter identifies IBAN (international banking account numbers) Codes such as <code>HU4211773016111110180000000</code> in text. Driver's license number formats for all 50 US states are supported.</p>"},{"location":"policies/filters/common_filters/iban-codes/#required-parameters","title":"Required Parameters","text":"<p>This filter has no required parameters.</p>"},{"location":"policies/filters/common_filters/iban-codes/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value <code>allowSpaces</code> When <code>true</code>, IBAN codes will be allowed to contain spaces and grouped in sections of 4. Set to <code>false</code> to disallow spaces in IBAN codes. <code>true</code> <code>ibanCodeFilterStrategies</code> A list of filter strategies. None <code>enabled</code> When set to false, the filter will be disabled and not applied <code>true</code> <code>ignored</code> A list of terms to be ignored by the filter. None <code>onlyValidIBANCodes</code> When set to true, only valid IBAN codes will be filtered. <code>true</code>"},{"location":"policies/filters/common_filters/iban-codes/#filter-strategies","title":"Filter Strategies","text":"<p>The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of <code>REDACT</code> is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.</p> Strategy Description <code>REDACT</code> Replace the sensitive text with a placeholder. <code>RANDOM_REPLACE</code> Replace the sensitive text with a similar, random value. <code>STATIC_REPLACE</code> Replace the sensitive text with a given value. <code>CRYPTO_REPLACE</code> Replace the sensitive text with its encrypted value. <code>HASH_SHA256_REPLACE</code> Replace the sensitive text with its SHA256 hash value. <code>FPE_ENCRYPT_REPLACE</code> Replace the sensitive text with a value generated by format-preserving encryption (FPE) <code>LAST_4</code> Replace the sensitive text with just the last four characters of the text."},{"location":"policies/filters/common_filters/iban-codes/#conditions","title":"Conditions","text":"<p>Each filter strategy may have one condition. See Conditions for details.</p> Conditional Description Operators <code>TOKEN</code> Compares the value of the sensitive text. <code>==</code> , <code>!=</code> <code>CONTEXT</code> Compares the filtering context. <code>==</code> , <code>!=</code> <code>CONFIDENCE</code> Compares the confidence in the sensitive text against a threshold value. <code>&lt;</code> , <code>&lt;=</code>, <code>&gt;</code> , <code>&gt;=</code>, <code>==</code>, <code>!=</code>"},{"location":"policies/filters/common_filters/iban-codes/#example-policy","title":"Example Policy","text":"<pre><code>{\n   \"name\": \"iban-example\",\n   \"identifiers\": {\n      \"ibanCode\": {\n         \"onlyValidIBANCodes\": false,\n         \"ibanCodeFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/filters/common_filters/ip-addresses/","title":"IP Addresses","text":""},{"location":"policies/filters/common_filters/ip-addresses/#filter","title":"Filter","text":"<p>This filter identifies IPv4 and IPv6 addresses <code>127.0.0.1</code>, <code>192.168.3.58</code>, and <code>2001:0db8:85a3:0000:0000:8a2e:0370:7334</code> in text.</p>"},{"location":"policies/filters/common_filters/ip-addresses/#required-parameters","title":"Required Parameters","text":"<p>This filter has no required parameters.</p>"},{"location":"policies/filters/common_filters/ip-addresses/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value <code>ipAddressFilterStrategies</code> A list of filter strategies. None <code>enabled</code> When set to false, the filter will be disabled and not applied <code>true</code> <code>ignored</code> A list of terms to be ignored by the filter. None"},{"location":"policies/filters/common_filters/ip-addresses/#filter-strategies","title":"Filter Strategies","text":"<p>The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of <code>REDACT</code> is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.</p> Strategy Description <code>REDACT</code> Replace the sensitive text with a placeholder. <code>RANDOM_REPLACE</code> Replace the sensitive text with a similar, random value. <code>STATIC_REPLACE</code> Replace the sensitive text with a given value. <code>CRYPTO_REPLACE</code> Replace the sensitive text with its encrypted value. <code>HASH_SHA256_REPLACE</code> Replace the sensitive text with its SHA256 hash value."},{"location":"policies/filters/common_filters/ip-addresses/#conditions","title":"Conditions","text":"<p>Each filter strategy may have one condition. See Conditions for details.</p> Conditional Description Operators <code>TOKEN</code> Compares the value of the sensitive text. <code>==</code> , <code>!=</code> <code>CONTEXT</code> Compares the filtering context. <code>==</code> , <code>!=</code> <code>CONFIDENCE</code> Compares the confidence in the sensitive text against a threshold value. <code>&lt;</code> , <code>&lt;=</code>, <code>&gt;</code> , <code>&gt;=</code>, <code>==</code>, <code>!=</code>"},{"location":"policies/filters/common_filters/ip-addresses/#example-policy","title":"Example Policy","text":"<pre><code>{\n   \"name\": \"ip-address-example\",\n   \"identifiers\": {\n      \"ipAddress\": {\n         \"ipAddressFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/filters/common_filters/mac-addresses/","title":"MAC Addresses","text":""},{"location":"policies/filters/common_filters/mac-addresses/#filter","title":"Filter","text":"<p>This filter identifies MAC addresses in text.</p>"},{"location":"policies/filters/common_filters/mac-addresses/#required-parameters","title":"Required Parameters","text":"<p>This filter has no required parameters.</p>"},{"location":"policies/filters/common_filters/mac-addresses/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value <code>macAddressFilterStrategies</code> A list of filter strategies. None <code>enabled</code> When set to false, the filter will be disabled and not applied <code>true</code> <code>ignored</code> A list of terms to be ignored by the filter. None"},{"location":"policies/filters/common_filters/mac-addresses/#filter-strategies","title":"Filter Strategies","text":"<p>The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of <code>REDACT</code> is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.</p> Strategy Description <code>REDACT</code> Replace the sensitive text with a placeholder. <code>RANDOM_REPLACE</code> Replace the sensitive text with a similar, random value. <code>STATIC_REPLACE</code> Replace the sensitive text with a given value. <code>CRYPTO_REPLACE</code> Replace the sensitive text with its encrypted value. <code>HASH_SHA256_REPLACE</code> Replace the sensitive text with its SHA256 hash value."},{"location":"policies/filters/common_filters/mac-addresses/#conditions","title":"Conditions","text":"<p>Each filter strategy may have one condition. See Conditions for details.</p> Conditional Description Operators <code>TOKEN</code> Compares the value of the sensitive text. <code>==</code> , <code>!=</code> <code>CONTEXT</code> Compares the filtering context. <code>==</code> , <code>!=</code> <code>CONFIDENCE</code> Compares the confidence in the sensitive text against a threshold value. <code>&lt;</code> , <code>&lt;=</code>, <code>&gt;</code> , <code>&gt;=</code>, <code>==</code>, <code>!=</code>"},{"location":"policies/filters/common_filters/mac-addresses/#example-policy","title":"Example Policy","text":"<pre><code>{\n   \"name\": \"mac-address-example\",\n   \"identifiers\": {\n      \"macAddress\": {\n         \"macAddressFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/filters/common_filters/passport-numbers/","title":"Passport Numbers","text":""},{"location":"policies/filters/common_filters/passport-numbers/#filter","title":"Filter","text":"<p>This filter identifies US passport numbers in text.</p>"},{"location":"policies/filters/common_filters/passport-numbers/#required-parameters","title":"Required Parameters","text":"<p>This filter has no required parameters.</p>"},{"location":"policies/filters/common_filters/passport-numbers/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value <code>passportNumberFilterStrategies</code> A list of filter strategies. None <code>enabled</code> When set to false, the filter will be disabled and not applied <code>true</code> <code>ignored</code> A list of terms to be ignored by the filter. None"},{"location":"policies/filters/common_filters/passport-numbers/#filter-strategies","title":"Filter Strategies","text":"<p>The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of <code>REDACT</code> is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.</p> Strategy Description <code>REDACT</code> Replace the sensitive text with a placeholder. <code>RANDOM_REPLACE</code> Replace the sensitive text with a similar, random value. <code>STATIC_REPLACE</code> Replace the sensitive text with a given value. <code>CRYPTO_REPLACE</code> Replace the sensitive text with its encrypted value. <code>HASH_SHA256_REPLACE</code> Replace the sensitive text with its SHA256 hash value. <code>FPE_ENCRYPT_REPLACE</code> Replace the sensitive text with a value generated by format-preserving encryption (FPE)"},{"location":"policies/filters/common_filters/passport-numbers/#conditions","title":"Conditions","text":"<p>Each filter strategy may have one condition. See Conditions for details.</p> Conditional Description Operators <code>TOKEN</code> Compares the value of the sensitive text. <code>==</code> , <code>!=</code> <code>CLASSIFICATION</code> Compares the issuing country of the passport number. <code>==</code> , <code>!=</code> <code>CONTEXT</code> Compares the filtering context. <code>==</code> , <code>!=</code> <code>CONFIDENCE</code> Compares the confidence in the sensitive text against a threshold value. <code>&lt;</code> , <code>&lt;=</code>, <code>&gt;</code> , <code>&gt;=</code>, <code>==</code>, <code>!=</code>"},{"location":"policies/filters/common_filters/passport-numbers/#example-policy","title":"Example Policy","text":"<pre><code>{\n   \"name\": \"passport-number-example\",\n   \"identifiers\": {\n      \"passportNumber\": {\n         \"passportNumberFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/filters/common_filters/phone-number-extensions/","title":"Phone Number Extensions","text":""},{"location":"policies/filters/common_filters/phone-number-extensions/#filter","title":"Filter","text":"<p>This filter identifies phone numbers extensions such as \"x100\" in text.</p>"},{"location":"policies/filters/common_filters/phone-number-extensions/#required-parameters","title":"Required Parameters","text":"<p>This filter has no required parameters.</p>"},{"location":"policies/filters/common_filters/phone-number-extensions/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value <code>phoneNumberExtensionFilterStrategies</code> A list of filter strategies. None <code>enabled</code> When set to false, the filter will be disabled and not applied <code>true</code> <code>ignored</code> A list of terms to be ignored by the filter. None"},{"location":"policies/filters/common_filters/phone-number-extensions/#filter-strategies","title":"Filter Strategies","text":"<p>The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of <code>REDACT</code> is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.</p> Strategy Description <code>REDACT</code> Replace the sensitive text with a placeholder. <code>RANDOM_REPLACE</code> Replace the sensitive text with a similar, random value. <code>STATIC_REPLACE</code> Replace the sensitive text with a given value. <code>CRYPTO_REPLACE</code> Replace the sensitive text with its encrypted value. <code>HASH_SHA256_REPLACE</code> Replace the sensitive text with its SHA256 hash value."},{"location":"policies/filters/common_filters/phone-number-extensions/#conditions","title":"Conditions","text":"<p>Each filter strategy may have one condition. See Conditions for details.</p> Conditional Description Operators <code>TOKEN</code> Compares the value of the sensitive text. <code>==</code> , <code>!=</code> <code>CONTEXT</code> Compares the filtering context. <code>==</code> , <code>!=</code> <code>CONFIDENCE</code> Compares the confidence in the sensitive text against a threshold value. <code>&lt;</code> , <code>&lt;=</code>, <code>&gt;</code> , <code>&gt;=</code>, <code>==</code>, <code>!=</code>"},{"location":"policies/filters/common_filters/phone-number-extensions/#example-policy","title":"Example Policy","text":"<pre><code>{\n   \"name\": \"phone-number-ext-example\",\n   \"identifiers\": {\n      \"phoneNumberExtension\": {\n         \"phoneNumberExtensionFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      } \n   }     \n}\n</code></pre>"},{"location":"policies/filters/common_filters/phone-numbers/","title":"Phone Numbers","text":""},{"location":"policies/filters/common_filters/phone-numbers/#filter","title":"Filter","text":"<p>This filter identifies phone and fax numbers such as (304) 555-5555, 304-555-5555, and 1-800-123-4567 in text.</p>"},{"location":"policies/filters/common_filters/phone-numbers/#required-parameters","title":"Required Parameters","text":"<p>This filter has no required parameters.</p>"},{"location":"policies/filters/common_filters/phone-numbers/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value <code>phoneNumberFilterStrategies</code> A list of filter strategies. None <code>enabled</code> When set to false, the filter will be disabled and not applied <code>true</code> <code>ignored</code> A list of terms to be ignored by the filter. None"},{"location":"policies/filters/common_filters/phone-numbers/#filter-strategies","title":"Filter Strategies","text":"<p>The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of <code>REDACT</code> is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.</p> Strategy Description <code>REDACT</code> Replace the sensitive text with a placeholder. <code>RANDOM_REPLACE</code> Replace the sensitive text with a similar, random value. <code>STATIC_REPLACE</code> Replace the sensitive text with a given value. <code>CRYPTO_REPLACE</code> Replace the sensitive text with its encrypted value. <code>HASH_SHA256_REPLACE</code> Replace the sensitive text with its SHA256 hash value."},{"location":"policies/filters/common_filters/phone-numbers/#conditions","title":"Conditions","text":"<p>Each filter strategy may have one condition. See Conditions for details.</p> Conditional Description Operators <code>TOKEN</code> Compares the value of the sensitive text. <code>==</code> , <code>!=</code> <code>CONTEXT</code> Compares the filtering context. <code>==</code> , <code>!=</code> <code>CONFIDENCE</code> Compares the confidence in the sensitive text against a threshold value. <code>&lt;</code> , <code>&lt;=</code>, <code>&gt;</code> , <code>&gt;=</code>, <code>==</code>, <code>!=</code>"},{"location":"policies/filters/common_filters/phone-numbers/#example-policy","title":"Example Policy","text":"<pre><code>{\n   \"name\": \"phone-number-example\",\n   \"identifiers\": {\n      \"phoneNumber\": {\n         \"phoneNumberFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }     \n}\n</code></pre>"},{"location":"policies/filters/common_filters/sections/","title":"Sections","text":""},{"location":"policies/filters/common_filters/sections/#filter","title":"Filter","text":"<p>This filter identifies sections in text between a given start regular expression pattern and a given end regular expression pattern.</p>"},{"location":"policies/filters/common_filters/sections/#required-parameters","title":"Required Parameters","text":"Parameter Description Default Value <code>startPattern</code> A regular expression denoting the start of the section. None <code>endPattern</code> A regular expression denoting the end of the section. None"},{"location":"policies/filters/common_filters/sections/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value <code>sectionFilterStrategies</code> A list of filter strategies. None <code>enabled</code> When set to false, the filter will be disabled and not applied <code>true</code> <code>ignored</code> A list of terms to be ignored by the filter. None"},{"location":"policies/filters/common_filters/sections/#filter-strategies","title":"Filter Strategies","text":"<p>The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of <code>REDACT</code> is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.</p> Strategy Description <code>REDACT</code> Replace the sensitive text with a placeholder. <code>RANDOM_REPLACE</code> Replace the sensitive text with a similar, random value. <code>STATIC_REPLACE</code> Replace the sensitive text with a given value. <code>CRYPTO_REPLACE</code> Replace the sensitive text with its encrypted value. <code>HASH_SHA256_REPLACE</code> Replace the sensitive text with its SHA256 hash value."},{"location":"policies/filters/common_filters/sections/#conditions","title":"Conditions","text":"<p>Each filter strategy may have one condition. See Conditions for details.</p> Conditional Description Operators <code>TOKEN</code> Compares the value of the sensitive text. <code>==</code> , <code>!=</code> <code>CONTEXT</code> Compares the filtering context. <code>==</code> , <code>!=</code> <code>CONFIDENCE</code> Compares the confidence in the sensitive text against a threshold value. <code>&lt;</code> , <code>&lt;=</code>, <code>&gt;</code> , <code>&gt;=</code>, <code>==</code>, <code>!=</code>"},{"location":"policies/filters/common_filters/sections/#example-policy","title":"Example Policy","text":"<pre><code>{\n   \"name\": \"sections-example\",\n   \"identifiers\": {\n      \"section\": {\n         \"startPattern\": \"START\",\n         \"endPattern\": \"END\",\n         \"sectionFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n}\n</code></pre>"},{"location":"policies/filters/common_filters/ssns-and-tins/","title":"SSNs and TINs","text":""},{"location":"policies/filters/common_filters/ssns-and-tins/#filter","title":"Filter","text":"<p>This filter identifies US SSNs and TINs such as <code>123-45-6789</code> and <code>123456789</code> in text.</p>"},{"location":"policies/filters/common_filters/ssns-and-tins/#required-parameters","title":"Required Parameters","text":"<p>This filter has no required parameters.</p>"},{"location":"policies/filters/common_filters/ssns-and-tins/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value <code>ssnFilterStrategies</code> A list of filter strategies. None <code>enabled</code> When set to false, the filter will be disabled and not applied <code>true</code> <code>ignored</code> A list of terms to be ignored by the filter. None"},{"location":"policies/filters/common_filters/ssns-and-tins/#filter-strategies","title":"Filter Strategies","text":"<p>The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of <code>REDACT</code> is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.</p> Strategy Description <code>REDACT</code> Replace the sensitive text with a placeholder. <code>RANDOM_REPLACE</code> Replace the sensitive text with a similar, random value. <code>STATIC_REPLACE</code> Replace the sensitive text with a given value. <code>CRYPTO_REPLACE</code> Replace the sensitive text with its encrypted value. <code>HASH_SHA256_REPLACE</code> Replace the sensitive text with its SHA256 hash value. <code>FPE_ENCRYPT_REPLACE</code> Replace the sensitive text with a value generated by format-preserving encryption (FPE) <code>LAST_4</code> Replace the sensitive text with just the last four characters of the text."},{"location":"policies/filters/common_filters/ssns-and-tins/#conditions","title":"Conditions","text":"<p>Each filter strategy may have one condition. See Conditions for details.</p> Conditional Description Operators <code>TOKEN</code> Compares the value of the sensitive text. <code>==</code> , <code>!=</code> <code>CONTEXT</code> Compares the filtering context. <code>==</code> , <code>!=</code> <code>CONFIDENCE</code> Compares the confidence in the sensitive text against a threshold value. <code>&lt;</code> , <code>&lt;=</code>, <code>&gt;</code> , <code>&gt;=</code>, <code>==</code>, <code>!=</code>"},{"location":"policies/filters/common_filters/ssns-and-tins/#example-policy","title":"Example Policy","text":"<pre><code>{\n   \"name\": \"ssn-tin-example\",\n   \"identifiers\": {\n      \"ssn\": {\n         \"ssnFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/filters/common_filters/tracking-numbers/","title":"Tracking Numbers","text":""},{"location":"policies/filters/common_filters/tracking-numbers/#filter","title":"Filter","text":"<p>This filter identifies tracking numbers in text. FedEx, UPS, and USPS tracking number formats are supported.</p>"},{"location":"policies/filters/common_filters/tracking-numbers/#required-parameters","title":"Required Parameters","text":"<p>This filter has no required parameters.</p>"},{"location":"policies/filters/common_filters/tracking-numbers/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value <code>trackingNumberFilterStrategies</code> A list of filter strategies. None <code>enabled</code> When set to false, the filter will be disabled and not applied <code>true</code> <code>ignored</code> A list of terms to be ignored by the filter. None"},{"location":"policies/filters/common_filters/tracking-numbers/#filter-strategies","title":"Filter Strategies","text":"<p>The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of <code>REDACT</code> is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.</p> Strategy Description <code>REDACT</code> Replace the sensitive text with a placeholder. <code>RANDOM_REPLACE</code> Replace the sensitive text with a similar, random value. <code>STATIC_REPLACE</code> Replace the sensitive text with a given value. <code>CRYPTO_REPLACE</code> Replace the sensitive text with its encrypted value. <code>HASH_SHA256_REPLACE</code> Replace the sensitive text with its SHA256 hash value. <code>FPE_ENCRYPT_REPLACE</code> Replace the sensitive text with a value generated by format-preserving encryption (FPE) <code>LAST_4</code> Replace the sensitive text with just the last four characters of the text."},{"location":"policies/filters/common_filters/tracking-numbers/#conditions","title":"Conditions","text":"<p>Each filter strategy may have one condition. See Conditions for details.</p> Conditional Description Operators <code>TOKEN</code> Compares the value of the sensitive text. <code>==</code> , <code>!=</code> <code>CONTEXT</code> Compares the filtering context. <code>==</code> , <code>!=</code> <code>CONFIDENCE</code> Compares the confidence in the sensitive text against a threshold value. <code>&lt;</code> , <code>&lt;=</code>, <code>&gt;</code> , <code>&gt;=</code>, <code>==</code>, <code>!=</code>"},{"location":"policies/filters/common_filters/tracking-numbers/#example-policy","title":"Example Policy","text":"<pre><code>{\n   \"name\": \"tracking-numbers-example\",\n   \"identifiers\": {\n      \"trackingNumber\": {\n         \"trackingNumberFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/filters/common_filters/urls/","title":"URLs","text":""},{"location":"policies/filters/common_filters/urls/#filter","title":"Filter","text":"<p>This filter identifies URLs such as <code>myhomepage.com</code>, <code>http://myhomepage.com/folder/page.html</code>, and <code>www.myhomepage.com/folder/page.html</code> in text.</p>"},{"location":"policies/filters/common_filters/urls/#required-parameters","title":"Required Parameters","text":"<p>This filter has no required parameters.</p>"},{"location":"policies/filters/common_filters/urls/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value <code>urlFilterStrategies</code> A list of filter strategies. None <code>enabled</code> When set to false, the filter will be disabled and not applied <code>true</code> <code>ignored</code> A list of terms to be ignored by the filter. None <code>requireHttpWwwPrefix</code> When set to true, only URLs that begin with <code>http</code> or <code>www</code> will be filtered. <code>true</code>"},{"location":"policies/filters/common_filters/urls/#filter-strategies","title":"Filter Strategies","text":"<p>The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of <code>REDACT</code> is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.</p> Strategy Description <code>REDACT</code> Replace the sensitive text with a placeholder. <code>RANDOM_REPLACE</code> Replace the sensitive text with a similar, random value. <code>STATIC_REPLACE</code> Replace the sensitive text with a given value. <code>CRYPTO_REPLACE</code> Replace the sensitive text with its encrypted value. <code>HASH_SHA256_REPLACE</code> Replace the sensitive text with its SHA256 hash value."},{"location":"policies/filters/common_filters/urls/#conditions","title":"Conditions","text":"<p>Each filter strategy may have one condition. See Conditions for details.</p> Conditional Description Operators <code>TOKEN</code> Compares the value of the sensitive text. <code>==</code> , <code>!=</code> <code>CONTEXT</code> Compares the filtering context. <code>==</code> , <code>!=</code> <code>CONFIDENCE</code> Compares the confidence in the sensitive text against a threshold value. <code>&lt;</code> , <code>&lt;=</code>, <code>&gt;</code> , <code>&gt;=</code>, <code>==</code>, <code>!=</code>"},{"location":"policies/filters/common_filters/urls/#example-policy","title":"Example Policy","text":"<pre><code>{\n   \"name\": \"urls-example\",\n   \"identifiers\": {\n      \"url\": {\n         \"requireHttpWwwPrefix\": true,\n         \"urlFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/filters/common_filters/vins/","title":"VINs","text":""},{"location":"policies/filters/common_filters/vins/#filter","title":"Filter","text":"<p>This filter identifies 17-digit vehicle identification numbers (VINs) such as <code>WBAPM7G50ANL19218</code> and <code>1GBJC34K3RE176005</code> in text.</p>"},{"location":"policies/filters/common_filters/vins/#required-parameters","title":"Required Parameters","text":"<p>This filter has no required parameters.</p>"},{"location":"policies/filters/common_filters/vins/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value <code>vinFilterStrategies</code> A list of filter strategies. None <code>enabled</code> When set to false, the filter will be disabled and not applied <code>true</code> <code>ignored</code> A list of terms to be ignored by the filter. None"},{"location":"policies/filters/common_filters/vins/#filter-strategies","title":"Filter Strategies","text":"<p>The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of <code>REDACT</code> is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.</p> Strategy Description <code>REDACT</code> Replace the sensitive text with a placeholder. <code>RANDOM_REPLACE</code> Replace the sensitive text with a similar, random value. <code>STATIC_REPLACE</code> Replace the sensitive text with a given value. <code>CRYPTO_REPLACE</code> Replace the sensitive text with its encrypted value. <code>HASH_SHA256_REPLACE</code> Replace the sensitive text with its SHA256 hash value. <code>FPE_ENCRYPT_REPLACE</code> Replace the sensitive text with a value generated by format-preserving encryption (FPE) <code>LAST_4</code> Replace the sensitive text with just the last four characters of the text."},{"location":"policies/filters/common_filters/vins/#conditions","title":"Conditions","text":"<p>Each filter strategy may have one condition. See Conditions for details.</p> Conditional Description Operators <code>TOKEN</code> Compares the value of the sensitive text. <code>==</code> , <code>!=</code> <code>CONTEXT</code> Compares the filtering context. <code>==</code> , <code>!=</code> <code>CONFIDENCE</code> Compares the confidence in the sensitive text against a threshold value. <code>&lt;</code> , <code>&lt;=</code>, <code>&gt;</code> , <code>&gt;=</code>, <code>==</code>, <code>!=</code>"},{"location":"policies/filters/common_filters/vins/#example-policy","title":"Example Policy","text":"<pre><code>{\n   \"name\": \"vins-example\",\n   \"identifiers\": {\n      \"vin\": {\n         \"vinFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/filters/common_filters/zip-codes/","title":"Zip Codes","text":""},{"location":"policies/filters/common_filters/zip-codes/#filter","title":"Filter","text":"<p>This filter identifies zip codes in text.</p>"},{"location":"policies/filters/common_filters/zip-codes/#required-parameters","title":"Required Parameters","text":"<p>This filter has no required parameters.</p>"},{"location":"policies/filters/common_filters/zip-codes/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value <code>zipCodeFilterStrategies</code> A list of filter strategies. None <code>enabled</code> When set to false, the filter will be disabled and not applied <code>true</code> <code>ignored</code> A list of terms to be ignored by the filter. None <code>requireDelimiter</code> When set to false, the filter will not require a dash in 9 digit zip codes, e.g. 12345-6789. Setting to false may increase the number of zip code false positives. <code>true</code>"},{"location":"policies/filters/common_filters/zip-codes/#filter-strategies","title":"Filter Strategies","text":"<p>The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of <code>REDACT</code> is used. When multiple filter strategies are given the filter strategies will be applied in order as they are listed. See Filter Strategies for details.</p> Strategy Description <code>REDACT</code> Replace the sensitive text with a placeholder. <code>RANDOM_REPLACE</code> Replace the sensitive text with a similar, random value. <code>STATIC_REPLACE</code> Replace the sensitive text with a given value. <code>CRYPTO_REPLACE</code> Replace the sensitive text with its encrypted value. <code>HASH_SHA256_REPLACE</code> Replace the sensitive text with its SHA256 hash value. <code>TRUNCATE</code> Replace the sensitive text by removing the last <code>x</code> digits. (Set the number of digits using the <code>truncateDigits</code> parameter of the filter strategy.) <code>ZERO_LEADING</code> Replace the sensitive text by zeroing the first 3 digits."},{"location":"policies/filters/common_filters/zip-codes/#conditions","title":"Conditions","text":"<p>Each filter strategy may have one condition. See Conditions for details.</p> Conditional Description Operators <code>TOKEN</code> Compares the value of the sensitive text. <code>==</code> , <code>!=</code> <code>CONTEXT</code> Compares the filtering context. <code>==</code> , <code>!=</code> <code>CONFIDENCE</code> Compares the confidence in the sensitive text against a threshold value. <code>&lt;</code> , <code>&lt;=</code>, <code>&gt;</code> , <code>&gt;=</code>, <code>==</code>, <code>!=</code> <code>POPULATION</code> Compares the population of the zip code against the 2010 census values. <code>&lt;</code> , <code>&lt;=</code>, <code>&gt;</code> , <code>&gt;=</code>, <code>==</code>, <code>!=</code>"},{"location":"policies/filters/common_filters/zip-codes/#example-policy","title":"Example Policy","text":"<pre><code>{\n   \"name\": \"zip-code-example\",\n   \"identifiers\": {\n      \"zipCode\": {\n         \"zipCodeFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/filters/custom_filters/dictionary/","title":"Dictionary","text":""},{"location":"policies/filters/custom_filters/dictionary/#filter","title":"Filter","text":"<p>This filter identifies custom text based on a given dictionary.</p>"},{"location":"policies/filters/custom_filters/dictionary/#required-parameters","title":"Required Parameters","text":"<p>At least one of <code>terms</code> or <code>files</code> must be provided.</p> Parameter Description Default Value <code>terms</code> A list of terms in the dictionary. None <code>files</code> A list of files containing terms one per line. None"},{"location":"policies/filters/custom_filters/dictionary/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value <code>enabled</code> When set to false, the filter will be disabled and not applied <code>true</code> <code>ignored</code> A list of terms to be ignored by the filter. None <code>fuzzy</code> When set to true, the dictionary will employ fuzzy comparisons. Use the <code>sensitivity</code> parameter to control the level of fuzziness. Setting this value to false will disable fuzziness and provide a higher level of performance. <code>false</code> <code>classification</code> Used to apply an arbitrary label to the identifier, such as \"patient-id\", or \"account-number.\" <code>\"custom-identifier\"</code> <code>sensitivity</code> Controls the \"fuzziness\" of allowed values to account for misspellings and derivations. Valid values are <code>low</code>, <code>medium</code>, and <code>high</code>. Only applies when <code>fuzzy</code> is set to <code>true</code>. <code>medium</code>"},{"location":"policies/filters/custom_filters/dictionary/#filter-strategies","title":"Filter Strategies","text":"<p>The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of <code>REDACT</code> is used. When multiple filter strategies are given the filter strategies will be applied in as they are listed. See Filter Strategies for details.</p> Strategy Description <code>REDACT</code> Replace the sensitive text with a placeholder. <code>RANDOM_REPLACE</code> Replace the sensitive text with a similar, random value. <code>STATIC_REPLACE</code> Replace the sensitive text with a given value. <code>CRYPTO_REPLACE</code> Replace the sensitive text with its encrypted value. <code>HASH_SHA256_REPLACE</code> Replace the sensitive text with its SHA256 hash value."},{"location":"policies/filters/custom_filters/dictionary/#conditions","title":"Conditions","text":"<p>Each filter strategy may have one condition. See Conditions for details.</p> Conditional Description Operators <code>TOKEN</code> Compares the value of the sensitive text. <code>==</code> , <code>!=</code> <code>CONTEXT</code> Compares the filtering context. <code>==</code> , <code>!=</code> <code>CONFIDENCE</code> Compares the confidence in the sensitive text against a threshold value. <code>&lt;</code> , <code>&lt;=</code>, <code>&gt;</code> , <code>&gt;=</code>, <code>==</code>, <code>!=</code>"},{"location":"policies/filters/custom_filters/dictionary/#example-policy","title":"Example Policy","text":"<pre><code>{\n   \"name\": \"dictionary-example\",\n   \"identifiers\": {\n      \"dictionaries\": [\n         \"customDictionary\": {\n            \"terms\": [\"john\", \"jane\", \"doe\"],\n            \"files\": \"c:\\temp\\dictionary.txt\",\n            \"fuzzy\": true,\n            \"sensitivity\": \"medium\",\n            \"sectionFilterStrategies\": [\n               {\n                  \"strategy\": \"REDACT\",\n                  \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n               }\n            ]\n         }\n      ]\n   }   \n}\n</code></pre>"},{"location":"policies/filters/custom_filters/identifier/","title":"Identifier","text":""},{"location":"policies/filters/custom_filters/identifier/#filter","title":"Filter","text":"<p>This filter identifies custom text based on a given regular expression. </p> <p>The Identifier filter accepts a list of regular expression-based identifiers. See the policy at the bottom of this page for an example. </p> <p>Note that backslashes in the regular expression will need to be escaped for the policy to be valid JSON.</p>"},{"location":"policies/filters/custom_filters/identifier/#required-parameters","title":"Required Parameters","text":"<p>This filter has no required parameters.</p>"},{"location":"policies/filters/custom_filters/identifier/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value <code>enabled</code> When set to false, the filter will be disabled and not applied <code>true</code> <code>ignored</code> A list of terms to be ignored by the filter. None <code>caseSensitive</code> When set to true, the regular expression will be case sensitive. <code>true</code> <code>classification</code> Used to apply an arbitrary label to the identifier, such as \"patient-id\", or \"account-number.\" <code>\"custom-identifier\"</code> <code>pattern</code> A regular expression for the identifier. Note that backslashes will need to be escaped. <code>\\b[A-Z0-9_-]{4,}\\b</code>"},{"location":"policies/filters/custom_filters/identifier/#filter-strategies","title":"Filter Strategies","text":"<p>The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of <code>REDACT</code> is used. When multiple filter strategies are given the filter strategies will be applied in as they are listed. See Filter Strategies for details.</p> Strategy Description <code>REDACT</code> Replace the sensitive text with a placeholder. <code>RANDOM_REPLACE</code> Replace the sensitive text with a similar, random value. <code>STATIC_REPLACE</code> Replace the sensitive text with a given value. <code>CRYPTO_REPLACE</code> Replace the sensitive text with its encrypted value. <code>HASH_SHA256_REPLACE</code> Replace the sensitive text with its SHA256 hash value. <code>LAST_4</code> Replace the sensitive text with just the last four characters of the text."},{"location":"policies/filters/custom_filters/identifier/#conditions","title":"Conditions","text":"<p>Each filter strategy may have one condition. See Conditions for details.</p> Conditional Description Operators <code>TOKEN</code> Compares the value of the sensitive text. <code>==</code> , <code>!=</code> <code>CONTEXT</code> Compares the filtering context. <code>==</code> , <code>!=</code> <code>CONFIDENCE</code> Compares the confidence in the sensitive text against a threshold value. <code>&lt;</code> , <code>&lt;=</code>, <code>&gt;</code> , <code>&gt;=</code>, <code>==</code>, <code>!=</code> <code>CLASSIFICATION</code> Compares the classification of the sensitive text. <code>==</code> , <code>!=</code>"},{"location":"policies/filters/custom_filters/identifier/#example-policy","title":"Example Policy","text":"<pre><code>{\n  \"name\": \"default\",\n  \"identifiers\": {\n    \"identifiers\": [\n      {\n        \"pattern\": \"[A-Z]{9}\",\n        \"caseSensitive\": false,\n        \"classification\": \"custom-identifier\",\n        \"enabled\": true,\n        \"identifierFilterStrategies\": [\n          {\n            \"strategy\": \"REDACT\",\n            \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n          }\n        ]        \n      }\n    ]\n  }\n}\n</code></pre>"},{"location":"policies/filters/locations/cities/","title":"Cities","text":""},{"location":"policies/filters/locations/cities/#filter","title":"Filter","text":"<p>This filter identifies common US cities as determined by the US census in text.</p>"},{"location":"policies/filters/locations/cities/#required-parameters","title":"Required Parameters","text":"<p>This filter has no required parameters.</p>"},{"location":"policies/filters/locations/cities/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value <code>cityFilterStrategies</code> A list of filter strategies. None <code>sensitivity</code> Controls the \"fuzziness\" of allowed values to account for misspellings and derivations. Valid values are <code>low</code>, <code>medium</code>, and <code>high</code>. <code>medium</code>"},{"location":"policies/filters/locations/cities/#filter-strategies","title":"Filter Strategies","text":"<p>The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of <code>REDACT</code> is used. When multiple filter strategies are given the filter strategies will be applied in as they are listed. See Filter Strategies for details.</p> Strategy Description <code>REDACT</code> Replace the sensitive text with a placeholder. <code>RANDOM_REPLACE</code> Replace the sensitive text with a similar, random value. <code>STATIC_REPLACE</code> Replace the sensitive text with a given value. <code>CRYPTO_REPLACE</code> Replace the sensitive text with its encrypted value. <code>HASH_SHA256_REPLACE</code> Replace the sensitive text with its SHA256 hash value."},{"location":"policies/filters/locations/cities/#conditions","title":"Conditions","text":"<p>Each filter strategy may have one condition. See Conditions for details.</p> Conditional Description Operators <code>TOKEN</code> Compares the value of the sensitive text. <code>==</code> , <code>!=</code> <code>CONTEXT</code> Compares the filtering context. <code>==</code> , <code>!=</code> <code>CONFIDENCE</code> Compares the confidence in the sensitive text against a threshold value. <code>&lt;</code> , <code>&lt;=</code>, <code>&gt;</code> , <code>&gt;=</code>, <code>==</code>, <code>!=</code>"},{"location":"policies/filters/locations/cities/#example-policy","title":"Example Policy","text":"<pre><code>{\n   \"name\": \"cities-example\",\n   \"identifiers\": {\n      \"city\": {\n         \"sensitivity\": \"medium\",\n         \"cityFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/filters/locations/counties/","title":"Counties","text":""},{"location":"policies/filters/locations/counties/#filter","title":"Filter","text":"<p>This filter identifies common US counties as determined by the US census in text.</p>"},{"location":"policies/filters/locations/counties/#required-parameters","title":"Required Parameters","text":"<p>This filter has no required parameters.</p>"},{"location":"policies/filters/locations/counties/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value <code>countyFilterStrategies</code> A list of filter strategies. None <code>sensitivity</code> Controls the \"fuzziness\" of allowed values to account for misspellings and derivations. Valid values are <code>low</code>, <code>medium</code>, and <code>high</code>. <code>medium</code>"},{"location":"policies/filters/locations/counties/#filter-strategies","title":"Filter Strategies","text":"<p>The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of <code>REDACT</code> is used. When multiple filter strategies are given the filter strategies will be applied in as they are listed. See Filter Strategies for details.</p> Strategy Description <code>REDACT</code> Replace the sensitive text with a placeholder. <code>RANDOM_REPLACE</code> Replace the sensitive text with a similar, random value. <code>STATIC_REPLACE</code> Replace the sensitive text with a given value. <code>CRYPTO_REPLACE</code> Replace the sensitive text with its encrypted value. <code>HASH_SHA256_REPLACE</code> Replace the sensitive text with its SHA256 hash value."},{"location":"policies/filters/locations/counties/#conditions","title":"Conditions","text":"<p>Each filter strategy may have one condition. See Conditions for details.</p> Conditional Description Operators <code>TOKEN</code> Compares the value of the sensitive text. <code>==</code> , <code>!=</code> <code>CONTEXT</code> Compares the filtering context. <code>==</code> , <code>!=</code> <code>CONFIDENCE</code> Compares the confidence in the sensitive text against a threshold value. <code>&lt;</code> , <code>&lt;=</code>, <code>&gt;</code> , <code>&gt;=</code>, <code>==</code>, <code>!=</code>"},{"location":"policies/filters/locations/counties/#example-policy","title":"Example Policy","text":"<pre><code>{\n   \"name\": \"counties-example\",\n   \"identifiers\": {\n      \"county\": {\n         \"sensitivity\": \"medium\",\n         \"countyFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/filters/locations/hospital-abbreviations/","title":"Hospital Abbreviations","text":""},{"location":"policies/filters/locations/hospital-abbreviations/#filter","title":"Filter","text":"<p>This filter identifies US hospital abbreviations in text.</p>"},{"location":"policies/filters/locations/hospital-abbreviations/#required-parameters","title":"Required Parameters","text":"<p>This filter has no required parameters.</p>"},{"location":"policies/filters/locations/hospital-abbreviations/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value <code>hospitalAbbreviationFilterStrategies</code> A list of filter strategies. None <code>sensitivity</code> Controls the \"fuzziness\" of allowed values to account for misspellings and derivations. Valid values are <code>low</code>, <code>medium</code>, and <code>high</code>. <code>medium</code>"},{"location":"policies/filters/locations/hospital-abbreviations/#filter-strategies","title":"Filter Strategies","text":"<p>The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of <code>REDACT</code> is used. When multiple filter strategies are given the filter strategies will be applied in as they are listed. See Filter Strategies for details.</p> Strategy Description <code>REDACT</code> Replace the sensitive text with a placeholder. <code>RANDOM_REPLACE</code> Replace the sensitive text with a similar, random value. <code>STATIC_REPLACE</code> Replace the sensitive text with a given value. <code>CRYPTO_REPLACE</code> Replace the sensitive text with its encrypted value. <code>HASH_SHA256_REPLACE</code> Replace the sensitive text with its SHA256 hash value."},{"location":"policies/filters/locations/hospital-abbreviations/#conditions","title":"Conditions","text":"<p>Each filter strategy may have one condition. See Conditions for details.</p> Conditional Description Operators <code>TOKEN</code> Compares the value of the sensitive text. <code>==</code> , <code>!=</code> <code>CONTEXT</code> Compares the filtering context. <code>==</code> , <code>!=</code> <code>CONFIDENCE</code> Compares the confidence in the sensitive text against a threshold value. <code>&lt;</code> , <code>&lt;=</code>, <code>&gt;</code> , <code>&gt;=</code>, <code>==</code>, <code>!=</code>"},{"location":"policies/filters/locations/hospital-abbreviations/#example-policy","title":"Example Policy","text":"<pre><code>{\n   \"name\": \"hospital-abbreviations-example\",\n   \"identifiers\": {\n      \"hospitalAbbreviation\": {\n         \"sensitivity\": \"medium\",\n         \"hospitalAbbreviationFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/filters/locations/hospitals/","title":"Hospitals","text":""},{"location":"policies/filters/locations/hospitals/#filter","title":"Filter","text":"<p>This filter identifies US hospitals in text.</p>"},{"location":"policies/filters/locations/hospitals/#required-parameters","title":"Required Parameters","text":"<p>This filter has no required parameters.</p>"},{"location":"policies/filters/locations/hospitals/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value <code>hospitalFilterStrategies</code> A list of filter strategies. None <code>sensitivity</code> Controls the \"fuzziness\" of allowed values to account for misspellings and derivations. Valid values are <code>low</code>, <code>medium</code>, and <code>high</code>. <code>medium</code>"},{"location":"policies/filters/locations/hospitals/#filter-strategies","title":"Filter Strategies","text":"<p>The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of <code>REDACT</code> is used. When multiple filter strategies are given the filter strategies will be applied in as they are listed. See Filter Strategies for details.</p> Strategy Description <code>REDACT</code> Replace the sensitive text with a placeholder. <code>RANDOM_REPLACE</code> Replace the sensitive text with a similar, random value. <code>STATIC_REPLACE</code> Replace the sensitive text with a given value. <code>CRYPTO_REPLACE</code> Replace the sensitive text with its encrypted value. <code>HASH_SHA256_REPLACE</code> Replace the sensitive text with its SHA256 hash value."},{"location":"policies/filters/locations/hospitals/#conditions","title":"Conditions","text":"<p>Each filter strategy may have one condition. See Conditions for details.</p> Conditional Description Operators <code>TOKEN</code> Compares the value of the sensitive text. <code>==</code> , <code>!=</code> <code>CONTEXT</code> Compares the filtering context. <code>==</code> , <code>!=</code> <code>CONFIDENCE</code> Compares the confidence in the sensitive text against a threshold value. <code>&lt;</code> , <code>&lt;=</code>, <code>&gt;</code> , <code>&gt;=</code>, <code>==</code>, <code>!=</code>"},{"location":"policies/filters/locations/hospitals/#example-policy","title":"Example Policy","text":"<pre><code>{\n   \"name\": \"hospitals-example\",\n   \"identifiers\": {\n      \"hospital\": {\n         \"sensitivity\": \"medium\",\n         \"hospitalFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/filters/locations/state-abbreviations/","title":"State Abbreviations","text":""},{"location":"policies/filters/locations/state-abbreviations/#filter","title":"Filter","text":"<p>This filter identifies US state abbreviations in text.</p>"},{"location":"policies/filters/locations/state-abbreviations/#required-parameters","title":"Required Parameters","text":"<p>This filter has no required parameters.</p>"},{"location":"policies/filters/locations/state-abbreviations/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value <code>stateAbbreviationsFilterStrategies</code> A list of filter strategies. None <code>enabled</code> When set to false, the filter will be disabled and not applied <code>true</code> <code>ignored</code> A list of terms to be ignored by the filter. None"},{"location":"policies/filters/locations/state-abbreviations/#filter-strategies","title":"Filter Strategies","text":"<p>The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of <code>REDACT</code> is used. When multiple filter strategies are given the filter strategies will be applied in as they are listed. See Filter Strategies for details.</p> Strategy Description <code>REDACT</code> Replace the sensitive text with a placeholder. <code>RANDOM_REPLACE</code> Replace the sensitive text with a similar, random value. <code>STATIC_REPLACE</code> Replace the sensitive text with a given value. <code>CRYPTO_REPLACE</code> Replace the sensitive text with its encrypted value. <code>HASH_SHA256_REPLACE</code> Replace the sensitive text with its SHA256 hash value."},{"location":"policies/filters/locations/state-abbreviations/#conditions","title":"Conditions","text":"<p>Each filter strategy may have one condition. See Conditions for details.</p> Conditional Description Operators <code>TOKEN</code> Compares the value of the sensitive text. <code>==</code> , <code>!=</code> <code>CONTEXT</code> Compares the filtering context. <code>==</code> , <code>!=</code> <code>CONFIDENCE</code> Compares the confidence in the sensitive text against a threshold value. <code>&lt;</code> , <code>&lt;=</code>, <code>&gt;</code> , <code>&gt;=</code>, <code>==</code>, <code>!=</code>"},{"location":"policies/filters/locations/state-abbreviations/#example-policy","title":"Example Policy","text":"<pre><code>{\n   \"name\": \"states-abbreviations-example\",\n   \"identifiers\": {\n      \"stateAbbreviation\": {\n         \"stateAbbreviationFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/filters/locations/states/","title":"States","text":""},{"location":"policies/filters/locations/states/#filter","title":"Filter","text":"<p>This filter identifies US states in text.</p>"},{"location":"policies/filters/locations/states/#required-parameters","title":"Required Parameters","text":"<p>This filter has no required parameters.</p>"},{"location":"policies/filters/locations/states/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value <code>stateFilterStrategies</code> A list of filter strategies. None <code>enabled</code> When set to false, the filter will be disabled and not applied <code>true</code> <code>ignored</code> A list of terms to be ignored by the filter. None"},{"location":"policies/filters/locations/states/#filter-strategies","title":"Filter Strategies","text":"<p>The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of <code>REDACT</code> is used. When multiple filter strategies are given the filter strategies will be applied in as they are listed. See Filter Strategies for details.</p> Strategy Description <code>REDACT</code> Replace the sensitive text with a placeholder. <code>RANDOM_REPLACE</code> Replace the sensitive text with a similar, random value. <code>STATIC_REPLACE</code> Replace the sensitive text with a given value. <code>CRYPTO_REPLACE</code> Replace the sensitive text with its encrypted value. <code>HASH_SHA256_REPLACE</code> Replace the sensitive text with its SHA256 hash value."},{"location":"policies/filters/locations/states/#conditions","title":"Conditions","text":"<p>Each filter strategy may have one condition. See Conditions for details.</p> Conditional Description Operators <code>TOKEN</code> Compares the value of the sensitive text. <code>==</code> , <code>!=</code> <code>CONTEXT</code> Compares the filtering context. <code>==</code> , <code>!=</code> <code>CONFIDENCE</code> Compares the confidence in the sensitive text against a threshold value. <code>&lt;</code> , <code>&lt;=</code>, <code>&gt;</code> , <code>&gt;=</code>, <code>==</code>, <code>!=</code>"},{"location":"policies/filters/locations/states/#example-policy","title":"Example Policy","text":"<pre><code>{\n   \"name\": \"states-example\",\n   \"identifiers\": {\n      \"state\": {\n         \"stateFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/filters/persons_names/first-names/","title":"First Names","text":""},{"location":"policies/filters/persons_names/first-names/#filter","title":"Filter","text":"<p>This filter identifies common first names as identified by the US census in text.</p>"},{"location":"policies/filters/persons_names/first-names/#required-parameters","title":"Required Parameters","text":"<p>This filter has no required parameters.</p>"},{"location":"policies/filters/persons_names/first-names/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value <code>sensitivity</code> Controls the \"fuzziness\" of allowed values to account for misspellings and derivations. Valid values are <code>low</code>, <code>medium</code>, and <code>high</code>. <code>medium</code> <code>firstNameFilterStrategies</code> A list of filter strategies. None <code>enabled</code> When set to false, the filter will be disabled and not applied <code>true</code> <code>ignored</code> A list of terms to be ignored by the filter. None"},{"location":"policies/filters/persons_names/first-names/#filter-strategies","title":"Filter Strategies","text":"<p>The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of <code>REDACT</code> is used. When multiple filter strategies are given the filter strategies will be applied in as they are listed. See Filter Strategies for details.</p> Strategy Description <code>REDACT</code> Replace the sensitive text with a placeholder. <code>RANDOM_REPLACE</code> Replace the sensitive text with a similar, random value. <code>STATIC_REPLACE</code> Replace the sensitive text with a given value. <code>CRYPTO_REPLACE</code> Replace the sensitive text with its encrypted value. <code>HASH_SHA256_REPLACE</code> Replace the sensitive text with its SHA256 hash value."},{"location":"policies/filters/persons_names/first-names/#conditions","title":"Conditions","text":"<p>Each filter strategy may have one condition. See Conditions for details.</p> Conditional Description Operators <code>TOKEN</code> Compares the value of the sensitive text. <code>==</code> , <code>!=</code> <code>CONTEXT</code> Compares the filtering context. <code>==</code> , <code>!=</code> <code>CONFIDENCE</code> Compares the confidence in the sensitive text against a threshold value. <code>&lt;</code> , <code>&lt;=</code>, <code>&gt;</code> , <code>&gt;=</code>, <code>==</code>, <code>!=</code>"},{"location":"policies/filters/persons_names/first-names/#example-policy","title":"Example Policy","text":"<pre><code>{\n   \"name\": \"first-names-example\",\n   \"identifiers\": {\n      \"firstName\": {\n         \"firstNameFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/filters/persons_names/persons-names-ner/","title":"Person's Names (NER)","text":""},{"location":"policies/filters/persons_names/persons-names-ner/#filter","title":"Filter","text":"<p>This filter identifies person's names based on natural language processing (NLP) and named-entity recognition (NER) in text.</p>"},{"location":"policies/filters/persons_names/persons-names-ner/#required-parameters","title":"Required Parameters","text":"<p>This filter has no required parameters.</p>"},{"location":"policies/filters/persons_names/persons-names-ner/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value <code>removePunctuation</code> When set to true, punctuation will be removed prior to analysis. <code>false</code> <code>firstNameFilterStrategies</code> A list of filter strategies. None <code>enabled</code> When set to false, the filter will be disabled and not applied <code>true</code> <code>ignored</code> A list of terms to be ignored by the filter. None"},{"location":"policies/filters/persons_names/persons-names-ner/#filter-strategies","title":"Filter Strategies","text":"<p>The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of <code>REDACT</code> is used. When multiple filter strategies are given the filter strategies will be applied in as they are listed. See Filter Strategies for details.</p> Strategy Description <code>REDACT</code> Replace the sensitive text with a placeholder. <code>RANDOM_REPLACE</code> Replace the sensitive text with a similar, random value. <code>STATIC_REPLACE</code> Replace the sensitive text with a given value. <code>CRYPTO_REPLACE</code> Replace the sensitive text with its encrypted value. <code>HASH_SHA256_REPLACE</code> Replace the sensitive text with its SHA256 hash value. <code>ABBREVIATE</code> Replace the sensitive text with the initials of the text."},{"location":"policies/filters/persons_names/persons-names-ner/#conditions","title":"Conditions","text":"<p>Each filter strategy may have one condition. See Conditions for details.</p> Conditional Description Operators <code>TOKEN</code> Compares the value of the sensitive text. <code>==</code> , <code>!=</code> <code>CONTEXT</code> Compares the filtering context. <code>==</code> , <code>!=</code> <code>CONFIDENCE</code> Compares the confidence in the sensitive text against a threshold value. <code>&lt;</code> , <code>&lt;=</code>, <code>&gt;</code> , <code>&gt;=</code>, <code>==</code>, <code>!=</code>"},{"location":"policies/filters/persons_names/persons-names-ner/#example-policy","title":"Example Policy","text":"<pre><code>{\n   \"name\": \"ner-example\",\n   \"identifiers\": {\n      \"ner\": {\n         \"nerFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/filters/persons_names/physician-names-ner/","title":"Physician Names","text":""},{"location":"policies/filters/persons_names/physician-names-ner/#filter","title":"Filter","text":"<p>This filter identifies physician names (e.g. Dr. John Smith) in text.</p>"},{"location":"policies/filters/persons_names/physician-names-ner/#required-parameters","title":"Required Parameters","text":"<p>This filter has no required parameters.</p>"},{"location":"policies/filters/persons_names/physician-names-ner/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value <code>physicianNameFilterStrategies</code> A list of filter strategies. None <code>enabled</code> When set to false, the filter will be disabled and not applied <code>true</code> <code>ignored</code> A list of terms to be ignored by the filter. None"},{"location":"policies/filters/persons_names/physician-names-ner/#filter-strategies","title":"Filter Strategies","text":"<p>The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of <code>REDACT</code> is used. When multiple filter strategies are given the filter strategies will be applied in as they are listed. See Filter Strategies for details.</p> Strategy Description <code>REDACT</code> Replace the sensitive text with a placeholder. <code>RANDOM_REPLACE</code> Replace the sensitive text with a similar, random value. <code>STATIC_REPLACE</code> Replace the sensitive text with a given value. <code>CRYPTO_REPLACE</code> Replace the sensitive text with its encrypted value. <code>HASH_SHA256_REPLACE</code> Replace the sensitive text with its SHA256 hash value."},{"location":"policies/filters/persons_names/physician-names-ner/#conditions","title":"Conditions","text":"<p>Each filter strategy may have one condition. See Conditions for details.</p> Conditional Description Operators <code>TOKEN</code> Compares the value of the sensitive text. <code>==</code> , <code>!=</code> <code>CONTEXT</code> Compares the filtering context. <code>==</code> , <code>!=</code> <code>CONFIDENCE</code> Compares the confidence in the sensitive text against a threshold value. <code>&lt;</code> , <code>&lt;=</code>, <code>&gt;</code> , <code>&gt;=</code>, <code>==</code>, <code>!=</code>"},{"location":"policies/filters/persons_names/physician-names-ner/#example-policy","title":"Example Policy","text":"<pre><code>{\n   \"name\": \"physician-names-example\",\n   \"identifiers\": {\n      \"physicianName\": {\n         \"physicianNameFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"policies/filters/persons_names/surnames/","title":"Surnames","text":""},{"location":"policies/filters/persons_names/surnames/#filter","title":"Filter","text":"<p>This filter identifies common surnames as identified by the US census in text.</p>"},{"location":"policies/filters/persons_names/surnames/#required-parameters","title":"Required Parameters","text":"<p>This filter has no required parameters.</p>"},{"location":"policies/filters/persons_names/surnames/#optional-parameters","title":"Optional Parameters","text":"Parameter Description Default Value <code>sensitivity</code> Controls the \"fuzziness\" of allowed values to account for misspellings and derivations. Valid values are <code>low</code>, <code>medium</code>, and <code>high</code>. <code>medium</code> <code>surnameFilterStrategies</code> A list of filter strategies. None <code>enabled</code> When set to false, the filter will be disabled and not applied <code>true</code> <code>ignored</code> A list of terms to be ignored by the filter. None"},{"location":"policies/filters/persons_names/surnames/#filter-strategies","title":"Filter Strategies","text":"<p>The filter may have zero or more filter strategies. When no filter strategy is given the default strategy of <code>REDACT</code> is used. When multiple filter strategies are given the filter strategies will be applied in as they are listed. See Filter Strategies for details.</p> Strategy Description <code>REDACT</code> Replace the sensitive text with a placeholder. <code>RANDOM_REPLACE</code> Replace the sensitive text with a similar, random value. <code>STATIC_REPLACE</code> Replace the sensitive text with a given value. <code>CRYPTO_REPLACE</code> Replace the sensitive text with its encrypted value. <code>HASH_SHA256_REPLACE</code> Replace the sensitive text with its SHA256 hash value."},{"location":"policies/filters/persons_names/surnames/#conditions","title":"Conditions","text":"<p>Each filter strategy may have one condition. See Conditions for details.</p> Conditional Description Operators <code>TOKEN</code> Compares the value of the sensitive text. <code>==</code> , <code>!=</code> <code>CONTEXT</code> Compares the filtering context. <code>==</code> , <code>!=</code> <code>CONFIDENCE</code> Compares the confidence in the sensitive text against a threshold value. <code>&lt;</code> , <code>&lt;=</code>, <code>&gt;</code> , <code>&gt;=</code>, <code>==</code>, <code>!=</code>"},{"location":"policies/filters/persons_names/surnames/#example-policy","title":"Example Policy","text":"<pre><code>{\n   \"name\": \"surnames-example\",\n   \"identifiers\": {\n      \"surname\": {\n         \"surnameFilterStrategies\": [\n            {\n               \"strategy\": \"REDACT\",\n               \"redactionFormat\": \"{{{REDACTED-%t}}}\"\n            }\n         ]\n      }\n   }\n}\n</code></pre>"},{"location":"quick_starts/quick_start_aws/","title":"Philter Quick Start on AWS","text":"<p>Philter on AWS is a virtual machine-based product. It runs in EC2 on its own EC2 instance. A free trial period is available during which there is no charge for the Philter software but there may be charges for the underlying AWS infrastructure.</p> <p>Cloud virtual machines launched from a cloud marketplace may not be immediately suitable for a HIPAA environment. Refer to your compliance officer for your organization's requirements to ensure compliance with all relevant regulations.</p> <p>Here\u2019s a brief screen cast showing how to launch Philter in AWS.</p>"},{"location":"quick_starts/quick_start_aws/#launch-philter-in-aws","title":"Launch Philter in AWS","text":"<ol> <li>Go to Philter in the AWS Marketplace. On this page you can see the Philter overview, the pricing, and the supported EC2 instance types.</li> <li>Select an instance type. We recommend <code>m5.large</code>. The smaller instance types are intended only for testing and are not well-suited for production usage.</li> <li>Click the Continue to Subscribe button.</li> <li>View and accept Philter\u2019s license agreement. Then click Accept Terms.</li> <li>The subscription will now be created and you will be notified when it is ready! This usually only takes less than a minute.</li> <li>Click the Continue to Configuration button to select the AMI, the version, and the region. We recommend using the newest version if multiple are available.</li> <li>Click the Continue to Launch button to launch Philter in your AWS account!</li> </ol> <p>AWS will automatically open ports <code>22</code> (SSH) and <code>8080</code> (Philter API) for the Philter instance's security group. These ports are required to be open but you may want to modify the security groups to limit their scope of availability by restricting access to specific CIDR ranges.</p> <p>Congratulations! You have deployed Philter in AWS. You are now ready to filter text!</p>"},{"location":"quick_starts/quick_start_aws/#try-it-out","title":"Try it out!","text":"<p>With Philter now running we can take it for a spin. We will send some text to Philter and inspect at the response we get back. The Philter virtual machine running in your cloud account should have a public IP address (unless you customized the deployment). We will use that public IP address to interact with Philter.</p> <p>Philter, by default, will be configured with an HTTPS listener on port 8080 using a self-signed certificate. It is recommended that prior to use in a production environment the self-signed certificate is replaced by a valid certificate owned by your organization.</p> <p>In the command below, replace <code>&lt;PUBLIC_IP&gt;</code> with the virtual machine\u2019s public IP address or public host name.</p> <pre><code>curl -k -X POST https://&lt;PUBLIC_IP&gt;:8080/api/filter --data \"George Washington was a patient and his SSN is 123-45-6789.\" -H \"Content-type: text/plain\"\n</code></pre> <p>With this command we are sending the text in the command to Philter for filtering. Philter will identify the patient name (George Washington) and the SSN (123-45-6789) and redact those values in the response. You can always use curl to send text to Philter as in these examples but there are also SDKs you can use, too, to integrate Philter with your applications.</p>"},{"location":"quick_starts/quick_start_aws/#redacting-sensitive-information-from-text","title":"Redacting Sensitive Information from Text","text":"<p>The types of sensitive information that Philter identifies and removes is controlled by policies. By default, Philter includes a filter profile that includes many of the types of sensitive information, such as names and social security numbers. We can send text to filter to Philter for filtering using this default filter profile with the following command:</p> <pre><code>curl -k -X POST https://localhost:8080/api/filter -d @file.txt -H \"Content-Type: text/plain\"\n</code></pre> <p>This command sends the contents of the file <code>file.txt</code> to Philter. Philter will apply the enabled filters and return a plain-text response consisting of the filtered text. (Replace localhost with the IP address or host name of Philter if you are not running the command where Philter is running.) You can also send text directly in the request instead of sending it as a file:</p> <pre><code>curl -k -X POST https://localhost:8080/api/filter --data \"Your text goes here...\" -H \"Content-type: text/plain\"\n</code></pre>"},{"location":"quick_starts/quick_start_aws/#next-steps","title":"Next Steps","text":"<p>Now that you have Philter running and know how to send text to it you are ready to integrate Philter into your existing workflow and systems. Philter\u2019s API details how to send files to Philter. Clients for some languages for Philter\u2019s API are available on GitHub.</p> <p>Be sure to check out Policies to see how you can customize the types of sensitive information Philter redacts!</p>"},{"location":"quick_starts/quick_start_aws/#example-uses","title":"Example Uses","text":"<p>Here's a few examples showing how to use Philter with some common big-data and streaming applications.</p> Description Technologies Remove sensitive information from text in an Apache NiFi dataflow Apache NiFi Remove sensitive information from text using AWS Lambda in an Amazon Kinesis Firehose pipeline Amazon Kinesis, AWS Lambda Removing PII/PHI from OpenAI ChatGPT API Requests ChatGPT Redacting PHI and PII in Apache Kafka Data streams Apache Kafka"},{"location":"quick_starts/quick_start_azure/","title":"Philter Quick Start on Microsoft Azure","text":"<p>Philter on Microsoft Azure is a virtual machine-based product. A free trial period is available during which there is no charge for the Philter software but there may be charges for the underlying Azure infrastructure.</p> <p>Cloud virtual machines launched from a cloud marketplace may not be immediately suitable for a HIPAA environment. Refer to your compliance officer for your organization's requirements to ensure compliance with all relevant regulations.</p>"},{"location":"quick_starts/quick_start_azure/#launch-philter-on-microsoft-azure","title":"Launch Philter on Microsoft Azure","text":"<ol> <li>Go to Philter in the Azure Marketplace.</li> <li>Click the Get It Now button.</li> <li>Review the information that is shown on the popup and click Continue when ready.</li> <li>You will now be asked to log in to your Microsoft Azure account if you were not already logged in.</li> <li>Click the Create button to begin making a Philter virtual machine.</li> <li>Enter the required details of the virtual machine and click the Review + create button.</li> <li>Review the virtual machine details and click Create when ready!</li> </ol> <p>Your Philter virtual machine will now be launching.</p> <p>Microsoft Azure will automatically open ports <code>22</code> (SSH) and <code>8080</code> (Philter API). These ports are required to be open but you may want to modify the security groups to limit their scope of availability by restricting access to specific CIDR ranges.</p> <p>Congratulations! You have deployed Philter in Azure. You are now ready to filter text!</p>"},{"location":"quick_starts/quick_start_azure/#try-it-out","title":"Try it out!","text":"<p>With Philter now running we can take it for a spin. We will send some text to Philter and inspect at the response we get back. The Philter virtual machine running in your cloud account should have a public IP address (unless you customized the deployment). We will use that public IP address to interact with Philter.</p> <p>Philter, by default, will be configured with an HTTPS listener on port 8080 using a self-signed certificate. It is recommended that prior to use in a production environment the self-signed certificate is replaced by a valid certificate owned by your organization.</p> <p>In the command below, replace <code>&lt;PUBLIC_IP&gt;</code> with the virtual machine\u2019s public IP address or public host name.</p> <pre><code>curl -k -X POST https://&lt;PUBLIC_IP&gt;:8080/api/filter --data \"George Washington was a patient and his SSN is 123-45-6789.\" -H \"Content-type: text/plain\"\n</code></pre> <p>With this command we are sending the text in the command to Philter for filtering. Philter will identify the patient name (George Washington) and the SSN (123-45-6789) and redact those values in the response. You can always use curl to send text to Philter as in these examples but there are also SDKs you can use, too, to integrate Philter with your applications.</p>"},{"location":"quick_starts/quick_start_azure/#redacting-sensitive-information-from-text","title":"Redacting Sensitive Information from Text","text":"<p>The types of sensitive information that Philter identifies and removes is controlled by policies. By default, Philter includes a filter profile that includes many of the types of sensitive information, such as names and social security numbers. We can send text to filter to Philter for filtering using this default filter profile with the following command:</p> <pre><code>curl -k -X POST https://localhost:8080/api/filter -d @file.txt -H \"Content-Type: text/plain\"\n</code></pre> <p>This command sends the contents of the file <code>file.txt</code> to Philter. Philter will apply the enabled filters and return a plain-text response consisting of the filtered text. (Replace localhost with the IP address or host name of Philter if you are not running the command where Philter is running.) You can also send text directly in the request instead of sending it as a file:</p> <pre><code>curl -k -X POST https://localhost:8080/api/filter --data \"Your text goes here...\" -H \"Content-type: text/plain\"\n</code></pre>"},{"location":"quick_starts/quick_start_azure/#next-steps","title":"Next Steps","text":"<p>Now that you have Philter running and know how to send text to it you are ready to integrate Philter into your existing workflow and systems. Philter\u2019s API details how to send files to Philter. Clients for some languages for Philter\u2019s API are available on GitHub.</p> <p>Be sure to check out Policies to see how you can customize the types of sensitive information Philter redacts!</p>"},{"location":"quick_starts/quick_start_azure/#example-uses","title":"Example Uses","text":"<p>Here's a few examples showing how to use Philter with some common big-data and streaming applications.</p> Description Technologies Remove sensitive information from text in an Apache NiFi dataflow Apache NiFi Remove sensitive information from text using AWS Lambda in an Amazon Kinesis Firehose pipeline Amazon Kinesis, AWS Lambda Removing PII/PHI from OpenAI ChatGPT API Requests ChatGPT Redacting PHI and PII in Apache Kafka Data streams Apache Kafka <p> </p>"},{"location":"quick_starts/quick_start_gcp/","title":"Philter Quick Start on Google Cloud","text":"<p>Philter on Google Cloud is a virtual machine-based product. A free trial period is available during which there is no charge for the Philter software but there may be charges for the underlying Google Cloud infrastructure.</p> <p>Cloud virtual machines launched from a cloud marketplace may not be immediately suitable for a HIPAA environment. Refer to your compliance officer for your organization's requirements to ensure compliance with all relevant regulations.</p>"},{"location":"quick_starts/quick_start_gcp/#launch-philter-in-google-cloud","title":"Launch Philter in Google Cloud","text":"<ol> <li>Go to Philter in the Google Cloud Marketplace.</li> <li>Click the Launch on Compute Engine button.</li> </ol> <p>Virtual Machine Recommendations</p> <p>The general purpose machine type is n2-standard-2 and this machine type should be adequate for most use-cases. We recommend 8 vCPUs and 8-16 GB of RAM for a production deployment.</p> <p>Google Cloud will automatically open ports <code>22</code> (SSH) and <code>8080</code> (Philter API). These ports are required to be open but you may want to modify the security groups to limit their scope of availability by restricting access to specific CIDR ranges.</p> <p>Congratulations! You have deployed Philter in Google Cloud. You are now ready to filter text!</p>"},{"location":"quick_starts/quick_start_gcp/#try-it-out","title":"Try it out!","text":"<p>With Philter now running we can take it for a spin. We will send some text to Philter and inspect at the response we get back. The Philter virtual machine running in your cloud account should have a public IP address (unless you customized the deployment). We will use that public IP address to interact with Philter.</p> <p>Philter, by default, will be configured with an HTTPS listener on port 8080 using a self-signed certificate. It is recommended that prior to use in a production environment the self-signed certificate is replaced by a valid certificate owned by your organization.</p> <p>In the command below, replace <code>&lt;PUBLIC_IP&gt;</code> with the virtual machine\u2019s public IP address or public host name.</p> <pre><code>curl -k -X POST https://&lt;PUBLIC_IP&gt;:8080/api/filter --data \"George Washington was a patient and his SSN is 123-45-6789.\" -H \"Content-type: text/plain\"\n</code></pre> <p>With this command we are sending the text in the command to Philter for filtering. Philter will identify the patient name (George Washington) and the SSN (123-45-6789) and redact those values in the response. You can always use curl to send text to Philter as in these examples but there are also SDKs you can use, too, to integrate Philter with your applications.</p>"},{"location":"quick_starts/quick_start_gcp/#redacting-sensitive-information-from-text","title":"Redacting Sensitive Information from Text","text":"<p>The types of sensitive information that Philter identifies and removes is controlled by policies. By default, Philter includes a filter profile that includes many of the types of sensitive information, such as names and social security numbers. We can send text to filter to Philter for filtering using this default filter profile with the following command:</p> <pre><code>curl -k -X POST https://localhost:8080/api/filter -d @file.txt -H \"Content-Type: text/plain\"\n</code></pre> <p>This command sends the contents of the file <code>file.txt</code> to Philter. Philter will apply the enabled filters and return a plain-text response consisting of the filtered text. (Replace localhost with the IP address or host name of Philter if you are not running the command where Philter is running.) You can also send text directly in the request instead of sending it as a file:</p> <pre><code>curl -k -X POST https://localhost:8080/api/filter --data \"Your text goes here...\" -H \"Content-type: text/plain\"\n</code></pre>"},{"location":"quick_starts/quick_start_gcp/#next-steps","title":"Next Steps","text":"<p>Now that you have Philter running and know how to send text to it you are ready to integrate Philter into your existing workflow and systems. Philter\u2019s API details how to send files to Philter. Clients for some languages for Philter\u2019s API are available on GitHub.</p> <p>Be sure to check out Policies to see how you can customize the types of sensitive information Philter redacts!</p>"},{"location":"quick_starts/quick_start_gcp/#example-uses","title":"Example Uses","text":"<p>Here's a few examples showing how to use Philter with some common big-data and streaming applications.</p> Description Technologies Remove sensitive information from text in an Apache NiFi dataflow Apache NiFi Remove sensitive information from text using AWS Lambda in an Amazon Kinesis Firehose pipeline Amazon Kinesis, AWS Lambda Removing PII/PHI from OpenAI ChatGPT API Requests ChatGPT Redacting PHI and PII in Apache Kafka Data streams Apache Kafka <p> </p>"}]}
\ No newline at end of file
diff --git a/settings/index.html b/settings/index.html
index a7b6630..cfaa491 100644
--- a/settings/index.html
+++ b/settings/index.html
@@ -679,6 +679,112 @@
   
   
   
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6_5" >
+        
+          
+          <label class="md-nav__link" for="__nav_6_5" id="__nav_6_5_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="2" aria-labelledby="__nav_6_5_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_6_5">
+            <span class="md-nav__icon md-icon"></span>
+            Document Analysis
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../policies/document_analysis/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../policies/excluding_by_document_type/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Excluding by DocumentType
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../policies/splitting_input_text/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Splitting Input Text
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
     <li class="md-nav__item">
       <a href="../policies/ignoring_sensitive_information/" class="md-nav__link">
         
diff --git a/sitemap.xml b/sitemap.xml
index c9aa42c..0c9b70c 100644
--- a/sitemap.xml
+++ b/sitemap.xml
@@ -60,6 +60,14 @@
          <loc>https://philterd.github.io/philter/other_features/span_disambiguation/</loc>
          <lastmod>2024-10-23</lastmod>
     </url>
+    <url>
+         <loc>https://philterd.github.io/philter/policies/document_analysis/</loc>
+         <lastmod>2024-10-23</lastmod>
+    </url>
+    <url>
+         <loc>https://philterd.github.io/philter/policies/excluding_by_document_type/</loc>
+         <lastmod>2024-10-23</lastmod>
+    </url>
     <url>
          <loc>https://philterd.github.io/philter/policies/filter_policies/</loc>
          <lastmod>2024-10-23</lastmod>
@@ -80,6 +88,10 @@
          <loc>https://philterd.github.io/philter/policies/sample_policies/</loc>
          <lastmod>2024-10-23</lastmod>
     </url>
+    <url>
+         <loc>https://philterd.github.io/philter/policies/splitting_input_text/</loc>
+         <lastmod>2024-10-23</lastmod>
+    </url>
     <url>
          <loc>https://philterd.github.io/philter/policies/filters/common_filters/ages/</loc>
          <lastmod>2024-10-23</lastmod>
diff --git a/sitemap.xml.gz b/sitemap.xml.gz
index 5433a1beeda7af50fbcaa7469fe158fdae9ed0b5..c31547df749c4877a729ed75a4d56e08d2f86b49 100644
GIT binary patch
delta 715
zcmV;+0yO=r1=a<S7k`<}a@!yffbV$<kMFXaw1>9iIJZ7Qdu9+?OFI@nVZn~_^j+jQ
z%{bFbFRTtSAOxRbf6?yg#ZIdeL_+T#*Y}(4dIiq-9^LS`{{8D`^KJdKzuV<F5u_D&
zcJ#O&Qwl%Y_Iy5X2oqQxXyfD1k{P)|?OS8J-F#o~?^b-30e>!~<=Rc&?8_*`!-Ns1
z1hL-?n8w^~&|jO{U0XVSd{0xdzTZD=AHFvC+ved5cX?~OOw_*5^q+IWNl#fPUVtV*
zJosp}Gf)MEm%MOLoylk7T~raCGe)rBQXP&QhNz1?R++sA?D?<b0S(K9cWsx=5t103
zUMFa1#DgHD+kXZTRBYAa9OUo>M8yx43jt*iMd-CDrAE8HVLN^lBT^B<q|IW4ssQ;k
zLR1IPDMz4IPY^3v!&?&P-E8r-o}%3VQP58HNZNKd<a$`aPiT;VdT(-(8>O9|W<siq
z121OE#ha;aR_|<S4p5itu+-IiT>=cH$lh%bf>Rj+2Y=^dksAWX@im@6p<ANU`Z~ti
z9wrbXJQOBe!e(BdkXg6jCCQK<<E`~hU8{PKBG@-zop#42`n=>oF59lU@c#=Glkv#$
zO5d~ZA)(7@IIv<+9ebI4C4~@57O@;nG~A1g9*CAjuo@@fl8CI?jq$xyydZr2tTw_4
zgC+_-CV$bY5=QT?%G|JbE&+{q@>yjP5-dr(P&jh594BxrA(N1E40Dto=#=AkkzgmO
z+xp7RM3Se=l)U{L*x;hGI?f{TuBUI6zQBCK<M;H%#u?6hrR?bkDxaN1uF+G#q&Y0@
zx(FxKf*i$XN^xM6%EuMaYquZ@p$dFsCgrqUT0|pp7`l`N;=R=_fQ^%4>24s$nfN&;
xZK5$L$9ntxoY5Q=>p}S<^P!^jSsKu5jvxzZFya4!mLGz&{{tNDIf_Lh0046xWAp$3

delta 672
zcmV;R0$=^s1*-*+7k`+&a@#NrfcHFw&$}!q?a=mYXUh|`vyLKR31{&q0BOg0`a*J?
z=A7%2MbID-6h(d_0TAHk@x>2|BM@T@+vWXgy<9-B(P8Me%Rj%Lsvpb8-QA|pz>pWx
zIPiAa=bV1k_33n4F=p^4@hVbRv+YIT`mM5FuYN9fcMG}34u3AC>Dmm@?#5nNx=B3_
z8OW_V%zbHB7_U|JrXCx9{LXOp(d{1A58tc%b@lK~n!MFr2CCmj`r?>yG($02CZI|{
z`$*md3$viel#d)d&){d4Rcr{J3lew;c^;0EIx?d^&N8|Kbn@%CM=LrJL){j8fUE|`
znG6;8xL1U9TYsT~O1v7KlOCRg*vQK!q=Y(%2%NEFsWQP;;>QnaREi)C#!gOX7ND4W
zpk@yy7Xq%$05p>~yd?=SoISqIXJ|J-oJ?pOvhgi;Wj?GF2ec?a`$=xzWeqT#N!)g)
z1ZQRl>_Q~*Gz~+Lz!8toV3@+L`ISf1I1QjmdAPRG`+pdkt7tl<V66#<iei~U=}>$-
zyQ=*U6tj&e+n#gc!dU5aTG^ik)R5E3H%6jL$q2^`w~|)kVnDSnO5Tiva!o|B?8-)`
z6)!2DKX0sZ!ep3Jq^#OhQXj+Bm@9b6WB3q*epa1?3{&o>6pjN=S*gSlIteABw+HQk
z&UlKe5`XNdby;6!Gtsou70%KB3v6(*Swm-)c-KRmnGW(l;mLRA;^VC3nOfHL163?R
zBUc|eVKx$$wk^RCjiQa>Go?B(&f^)q>baW`l~5(Q2~!h{pK6IE44ujf@!q5hU=_4@
zx*N#;%<^86Hr1G1XufrRE@%&p*ui+P|3PTXNf{f^Ya!5u)Y<fPpy|PocYgsW?g~AO
GAOHZk1W5q^

diff --git a/system_requirements/index.html b/system_requirements/index.html
index d32367f..c621d11 100644
--- a/system_requirements/index.html
+++ b/system_requirements/index.html
@@ -691,6 +691,112 @@
   
   
   
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6_5" >
+        
+          
+          <label class="md-nav__link" for="__nav_6_5" id="__nav_6_5_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="2" aria-labelledby="__nav_6_5_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_6_5">
+            <span class="md-nav__icon md-icon"></span>
+            Document Analysis
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../policies/document_analysis/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../policies/excluding_by_document_type/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Excluding by DocumentType
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../policies/splitting_input_text/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Splitting Input Text
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
     <li class="md-nav__item">
       <a href="../policies/ignoring_sensitive_information/" class="md-nav__link">
         
diff --git a/upgrading/index.html b/upgrading/index.html
index 80ccb00..6b7076d 100644
--- a/upgrading/index.html
+++ b/upgrading/index.html
@@ -739,6 +739,112 @@
   
   
   
+    
+    
+    
+    
+    <li class="md-nav__item md-nav__item--nested">
+      
+        
+        
+        <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_6_5" >
+        
+          
+          <label class="md-nav__link" for="__nav_6_5" id="__nav_6_5_label" tabindex="0">
+            
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+            <span class="md-nav__icon md-icon"></span>
+          </label>
+        
+        <nav class="md-nav" data-md-level="2" aria-labelledby="__nav_6_5_label" aria-expanded="false">
+          <label class="md-nav__title" for="__nav_6_5">
+            <span class="md-nav__icon md-icon"></span>
+            Document Analysis
+          </label>
+          <ul class="md-nav__list" data-md-scrollfix>
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../policies/document_analysis/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Document Analysis
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../policies/excluding_by_document_type/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Excluding by DocumentType
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+          </ul>
+        </nav>
+      
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../policies/splitting_input_text/" class="md-nav__link">
+        
+  
+  <span class="md-ellipsis">
+    Splitting Input Text
+  </span>
+  
+
+      </a>
+    </li>
+  
+
+              
+            
+              
+                
+  
+  
+  
+  
     <li class="md-nav__item">
       <a href="../policies/ignoring_sensitive_information/" class="md-nav__link">