From 5028c48fef1ea7ab36bce371a75397947dcf37f4 Mon Sep 17 00:00:00 2001 From: JAM <272807+JessieAMorris@users.noreply.github.com> Date: Tue, 17 Dec 2024 16:32:46 -0700 Subject: [PATCH 1/3] Enable outputting the replacement value on PDFs --- .../phileas/services/EndToEndTestsHelper.java | 18 +++++ .../services/PhileasFilterServiceTest.java | 16 ++++- .../phileas/model/policy/config/Pdf.java | 29 +++++++- .../ai/philterd/services/pdf/PdfRedacter.java | 70 ++++++++++++++++--- 4 files changed, 121 insertions(+), 12 deletions(-) diff --git a/phileas-core/src/test/java/ai/philterd/test/phileas/services/EndToEndTestsHelper.java b/phileas-core/src/test/java/ai/philterd/test/phileas/services/EndToEndTestsHelper.java index 6bccad289..abac0c6d3 100644 --- a/phileas-core/src/test/java/ai/philterd/test/phileas/services/EndToEndTestsHelper.java +++ b/phileas-core/src/test/java/ai/philterd/test/phileas/services/EndToEndTestsHelper.java @@ -62,6 +62,11 @@ import ai.philterd.phileas.model.policy.filters.strategies.rules.VinFilterStrategy; import ai.philterd.phileas.model.policy.filters.strategies.rules.ZipCodeFilterStrategy; import org.apache.commons.io.FileUtils; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.pdfbox.Loader; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.text.PDFTextStripper; import java.io.File; import java.io.IOException; @@ -72,6 +77,7 @@ import java.util.Set; public class EndToEndTestsHelper { + private static final Logger LOGGER = LogManager.getLogger(EndToEndTestsHelper.class); public static Policy getPolicyWithSentiment(String policyName) throws IOException { @@ -454,4 +460,16 @@ public static Policy getPolicyJustPhoneNumber(String policyName) { } + public static boolean documentContainsText(byte[] doc, String needle) throws IOException { + try (PDDocument pdDocument = Loader.loadPDF(doc)) { + PDFTextStripper textStripper = new PDFTextStripper(); + String pdfText = textStripper.getText(pdDocument); + + if(pdfText.trim().isEmpty()) { + LOGGER.warn("documentContainsText called on a PDF with no text streams"); + } + + return pdfText.contains(needle); + } + } } diff --git a/phileas-core/src/test/java/ai/philterd/test/phileas/services/PhileasFilterServiceTest.java b/phileas-core/src/test/java/ai/philterd/test/phileas/services/PhileasFilterServiceTest.java index 147dbb261..d4482e7db 100644 --- a/phileas-core/src/test/java/ai/philterd/test/phileas/services/PhileasFilterServiceTest.java +++ b/phileas-core/src/test/java/ai/philterd/test/phileas/services/PhileasFilterServiceTest.java @@ -46,6 +46,7 @@ import java.util.List; import java.util.Properties; +import static ai.philterd.test.phileas.services.EndToEndTestsHelper.documentContainsText; import static ai.philterd.test.phileas.services.EndToEndTestsHelper.getPdfPolicy; import static ai.philterd.test.phileas.services.EndToEndTestsHelper.getPolicy; @@ -106,6 +107,8 @@ public void pdf1() throws Exception { final byte[] document = IOUtils.toByteArray(is); is.close(); + Assertions.assertTrue(documentContainsText(document, "Wendy")); + final Path temp = Files.createTempDirectory("philter"); final File file1 = Paths.get(temp.toFile().getAbsolutePath(), "pdf.json").toFile(); @@ -131,8 +134,10 @@ public void pdf1() throws Exception { LOGGER.info("Spans: {}", response.getExplanation().appliedSpans().size()); showSpans(response.getExplanation().appliedSpans()); - // TODO: How to assert? MD5 gives a different value each time. - + // TODO: This is asserting that it doesn't contain anything as a text stream + // but it's possible that they're in the images, we would need to OCR + // the files for this assertion to be truly valuable + Assertions.assertFalse(documentContainsText(response.getDocument(), "Wendy")); } @Test @@ -142,6 +147,8 @@ public void pdf2() throws Exception { final byte[] document = IOUtils.toByteArray(is); is.close(); + Assertions.assertTrue(documentContainsText(document, "90210")); + final Path temp = Files.createTempDirectory("philter"); final File file1 = Paths.get(temp.toFile().getAbsolutePath(), "pdf.json").toFile(); @@ -170,7 +177,10 @@ public void pdf2() throws Exception { // output: // characterStart: 35; characterEnd: 40; filterType: zip-code; context: context; documentId: documentid; confidence: 0.9; text: 90210; replacement: {{{REDACTED-zip-code}}}; salt: ; ignored: false; classification: null; - // TODO: How to assert? MD5 gives a different value each time. + // TODO: This is asserting that it doesn't contain anything as a text stream + // but it's possible that they're in the images, we would need to OCR + // the files for this assertion to be truly valuable + Assertions.assertFalse(documentContainsText(response.getDocument(), "90210")); } diff --git a/phileas-model/src/main/java/ai/philterd/phileas/model/policy/config/Pdf.java b/phileas-model/src/main/java/ai/philterd/phileas/model/policy/config/Pdf.java index 75b8e8328..4872142d2 100644 --- a/phileas-model/src/main/java/ai/philterd/phileas/model/policy/config/Pdf.java +++ b/phileas-model/src/main/java/ai/philterd/phileas/model/policy/config/Pdf.java @@ -20,10 +20,22 @@ public class Pdf { - @SerializedName("enabled") + @SerializedName("redactionColor") @Expose private String redactionColor = "black"; + @SerializedName("redactionFont") + @Expose + private String redactionFont = "Helvetica"; + + @SerializedName("redactionFontSize") + @Expose + private float redactionFontSize = 12; + + @SerializedName("redactionFontColor") + @Expose + private String redactionFontColor; + public String getRedactionColor() { return redactionColor; } @@ -32,4 +44,19 @@ public void setRedactionColor(String redactionColor) { this.redactionColor = redactionColor; } + public String getRedactionFont() { + return redactionFont; + } + + public void setRedactionFont(String redactionFont) { + this.redactionFont = redactionFont; + } + + public float getRedactionFontSize() { + return redactionFontSize; + } + + public String getRedactionFontColor() { + return redactionFontColor; + } } diff --git a/phileas-services/phileas-services-pdf/src/main/java/ai/philterd/services/pdf/PdfRedacter.java b/phileas-services/phileas-services-pdf/src/main/java/ai/philterd/services/pdf/PdfRedacter.java index b5c299eb5..b6c5d96eb 100644 --- a/phileas-services/phileas-services-pdf/src/main/java/ai/philterd/services/pdf/PdfRedacter.java +++ b/phileas-services/phileas-services-pdf/src/main/java/ai/philterd/services/pdf/PdfRedacter.java @@ -29,6 +29,9 @@ import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.PDPageContentStream; import org.apache.pdfbox.pdmodel.common.PDRectangle; +import org.apache.pdfbox.pdmodel.font.PDFont; +import org.apache.pdfbox.pdmodel.font.PDType1Font; +import org.apache.pdfbox.pdmodel.font.Standard14Fonts; import org.apache.pdfbox.pdmodel.graphics.color.PDColor; import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceRGB; import org.apache.pdfbox.pdmodel.graphics.image.JPEGFactory; @@ -44,8 +47,19 @@ import javax.imageio.ImageWriteParam; import javax.imageio.ImageWriter; import java.awt.image.BufferedImage; -import java.io.*; -import java.util.*; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStreamWriter; +import java.io.Writer; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Set; import java.util.zip.ZipEntry; import java.util.zip.ZipOutputStream; @@ -64,13 +78,23 @@ public class PdfRedacter extends PDFTextStripper implements Redacter { private final List boundingBoxes; private static final Map COLORS = new LinkedHashMap<>(); + private static final Map FONTS = new LinkedHashMap<>(); static { + COLORS.put("white", new PDColor(new float[]{255, 255, 255}, PDDeviceRGB.INSTANCE)); COLORS.put("black", new PDColor(new float[]{0, 0, 0}, PDDeviceRGB.INSTANCE)); COLORS.put("red", new PDColor(new float[]{255, 0, 0}, PDDeviceRGB.INSTANCE)); COLORS.put("yellow", new PDColor(new float[]{1, 1, 100 / 255F}, PDDeviceRGB.INSTANCE)); + + FONTS.put("helvetica", new PDType1Font(Standard14Fonts.FontName.HELVETICA)); + FONTS.put("times", new PDType1Font(Standard14Fonts.FontName.TIMES_ROMAN)); + FONTS.put("courier", new PDType1Font(Standard14Fonts.FontName.COURIER)); } + private final float replacementFontSize; + private final PDFont replacementFont; + private final PDColor replacementFontColor; + public PdfRedacter(Policy policy, Set spans, PdfRedactionOptions pdfRedactionOptions, List boundingBoxes) throws IOException { @@ -79,6 +103,9 @@ public PdfRedacter(Policy policy, this.spans = spans; this.pdfRedactionOptions = pdfRedactionOptions; this.boundingBoxes = boundingBoxes; + replacementFont = FONTS.getOrDefault(policy.getConfig().getPdf().getRedactionFont(), FONTS.get("helvetica")); + replacementFontSize = policy.getConfig().getPdf().getRedactionFontSize(); + replacementFontColor = COLORS.getOrDefault(policy.getConfig().getPdf().getRedactionFontColor(), COLORS.get("white")); } @@ -209,24 +236,51 @@ protected void endDocument(PDDocument doc) throws IOException { for(int pageNumber : rectangles.keySet()) { final PDPage page = document.getPage(pageNumber); - final PDPageContentStream contentStream = new PDPageContentStream(doc, page, PDPageContentStream.AppendMode.APPEND, true, true); + final PDPageContentStream rectContentStream = new PDPageContentStream(doc, page, PDPageContentStream.AppendMode.APPEND, true, true); + final PDPageContentStream textContentStream = new PDPageContentStream(doc, page, PDPageContentStream.AppendMode.APPEND, true, true); for(final RedactedRectangle rectangle : rectangles.get(pageNumber)) { - contentStream.addRect( + rectContentStream.addRect( rectangle.getPdRectangle().getLowerLeftX(), rectangle.getPdRectangle().getLowerLeftY() - 3, rectangle.getPdRectangle().getWidth(), rectangle.getPdRectangle().getHeight() + buffer); + var replacementText = rectangle.getSpan().getReplacement(); + var rectangleWidth = rectangle.getPdRectangle().getWidth(); + + var boxFontSize = replacementFontSize; + float textWidth = (replacementFont.getStringWidth(replacementText) / 1000.0f) * boxFontSize; + while(textWidth > rectangleWidth) { + boxFontSize -= 1; + textWidth = (replacementFont.getStringWidth(replacementText) / 1000.0f) * boxFontSize; + } + + var textHeight = ( replacementFont.getFontDescriptor().getCapHeight()) / 1000 * boxFontSize; + + var textXLocation = (rectangle.getPdRectangle().getLowerLeftX() + + ((rectangle.getPdRectangle().getWidth() / 2.0f) - (textWidth / 2.0f))); + + var textYLocation = (rectangle.getPdRectangle().getLowerLeftY() + + ((rectangle.getPdRectangle().getHeight() / 2.0f) - (textHeight / 2.0f))); + + textContentStream.beginText(); + textContentStream.setNonStrokingColor(replacementFontColor); + textContentStream.setFont(replacementFont, boxFontSize); + textContentStream.newLineAtOffset(textXLocation, textYLocation); + textContentStream.showText(replacementText); + textContentStream.endText(); } // Get the color based on the filter. final PDColor pdColor = COLORS.getOrDefault(policy.getConfig().getPdf().getRedactionColor(), COLORS.get("black")); - contentStream.setNonStrokingColor(pdColor); - contentStream.setRenderingMode(RenderingMode.FILL); - contentStream.fill(); - contentStream.close(); + rectContentStream.setNonStrokingColor(pdColor); + rectContentStream.setRenderingMode(RenderingMode.FILL); + rectContentStream.fill(); + rectContentStream.close(); + + textContentStream.close(); } From 9ed2e7cba1ecfdb35b53c51a52b337c47e38f459 Mon Sep 17 00:00:00 2001 From: JAM <272807+JessieAMorris@users.noreply.github.com> Date: Wed, 18 Dec 2024 22:12:56 -0700 Subject: [PATCH 2/3] Disable PDF replacement by default, add test, minor refactor --- .../phileas/model/policy/config/Pdf.java | 44 ++++++++----- .../ai/philterd/services/pdf/PdfRedacter.java | 63 +++++++++++-------- .../java/ai/philterd/PdfRedacterTest.java | 49 +++++++++++++++ 3 files changed, 114 insertions(+), 42 deletions(-) diff --git a/phileas-model/src/main/java/ai/philterd/phileas/model/policy/config/Pdf.java b/phileas-model/src/main/java/ai/philterd/phileas/model/policy/config/Pdf.java index 4872142d2..1de28cf05 100644 --- a/phileas-model/src/main/java/ai/philterd/phileas/model/policy/config/Pdf.java +++ b/phileas-model/src/main/java/ai/philterd/phileas/model/policy/config/Pdf.java @@ -24,39 +24,51 @@ public class Pdf { @Expose private String redactionColor = "black"; - @SerializedName("redactionFont") + @SerializedName("showReplacement") @Expose - private String redactionFont = "Helvetica"; + private boolean showReplacement = false; - @SerializedName("redactionFontSize") + @SerializedName("replacementFont") @Expose - private float redactionFontSize = 12; + private String replacementFont = "helvetica"; - @SerializedName("redactionFontColor") + @SerializedName("replacementMaxFontSize") @Expose - private String redactionFontColor; + private float replacementMaxFontSize = 12; + + @SerializedName("replacementFontColor") + @Expose + private String replacementFontColor; public String getRedactionColor() { return redactionColor; } - public void setRedactionColor(String redactionColor) { - this.redactionColor = redactionColor; + public void setRedactionColor(String replacementColor) { + this.redactionColor = replacementColor; + } + + public String getReplacementFont() { + return replacementFont; + } + + public void setReplacementFont(String replacementFont) { + this.replacementFont = replacementFont; } - public String getRedactionFont() { - return redactionFont; + public float getReplacementMaxFontSize() { + return replacementMaxFontSize; } - public void setRedactionFont(String redactionFont) { - this.redactionFont = redactionFont; + public String getReplacementFontColor() { + return replacementFontColor; } - public float getRedactionFontSize() { - return redactionFontSize; + public boolean getShowReplacement() { + return showReplacement; } - public String getRedactionFontColor() { - return redactionFontColor; + public void setShowReplacement(boolean showReplacement) { + this.showReplacement = showReplacement; } } diff --git a/phileas-services/phileas-services-pdf/src/main/java/ai/philterd/services/pdf/PdfRedacter.java b/phileas-services/phileas-services-pdf/src/main/java/ai/philterd/services/pdf/PdfRedacter.java index b6c5d96eb..353d68405 100644 --- a/phileas-services/phileas-services-pdf/src/main/java/ai/philterd/services/pdf/PdfRedacter.java +++ b/phileas-services/phileas-services-pdf/src/main/java/ai/philterd/services/pdf/PdfRedacter.java @@ -91,6 +91,7 @@ public class PdfRedacter extends PDFTextStripper implements Redacter { FONTS.put("courier", new PDType1Font(Standard14Fonts.FontName.COURIER)); } + private final boolean showReplacement; private final float replacementFontSize; private final PDFont replacementFont; private final PDColor replacementFontColor; @@ -103,9 +104,10 @@ public PdfRedacter(Policy policy, this.spans = spans; this.pdfRedactionOptions = pdfRedactionOptions; this.boundingBoxes = boundingBoxes; - replacementFont = FONTS.getOrDefault(policy.getConfig().getPdf().getRedactionFont(), FONTS.get("helvetica")); - replacementFontSize = policy.getConfig().getPdf().getRedactionFontSize(); - replacementFontColor = COLORS.getOrDefault(policy.getConfig().getPdf().getRedactionFontColor(), COLORS.get("white")); + this.showReplacement = policy.getConfig().getPdf().getShowReplacement(); + this.replacementFont = FONTS.getOrDefault(policy.getConfig().getPdf().getReplacementFont(), FONTS.get("helvetica")); + this.replacementFontSize = policy.getConfig().getPdf().getReplacementMaxFontSize(); + this.replacementFontColor = COLORS.getOrDefault(policy.getConfig().getPdf().getReplacementFontColor(), COLORS.get("white")); } @@ -247,30 +249,9 @@ protected void endDocument(PDDocument doc) throws IOException { rectangle.getPdRectangle().getWidth(), rectangle.getPdRectangle().getHeight() + buffer); - var replacementText = rectangle.getSpan().getReplacement(); - var rectangleWidth = rectangle.getPdRectangle().getWidth(); - - var boxFontSize = replacementFontSize; - float textWidth = (replacementFont.getStringWidth(replacementText) / 1000.0f) * boxFontSize; - while(textWidth > rectangleWidth) { - boxFontSize -= 1; - textWidth = (replacementFont.getStringWidth(replacementText) / 1000.0f) * boxFontSize; + if(showReplacement) { + addReplacementTextToRect(rectangle, textContentStream); } - - var textHeight = ( replacementFont.getFontDescriptor().getCapHeight()) / 1000 * boxFontSize; - - var textXLocation = (rectangle.getPdRectangle().getLowerLeftX() + - ((rectangle.getPdRectangle().getWidth() / 2.0f) - (textWidth / 2.0f))); - - var textYLocation = (rectangle.getPdRectangle().getLowerLeftY() + - ((rectangle.getPdRectangle().getHeight() / 2.0f) - (textHeight / 2.0f))); - - textContentStream.beginText(); - textContentStream.setNonStrokingColor(replacementFontColor); - textContentStream.setFont(replacementFont, boxFontSize); - textContentStream.newLineAtOffset(textXLocation, textYLocation); - textContentStream.showText(replacementText); - textContentStream.endText(); } // Get the color based on the filter. @@ -286,6 +267,36 @@ protected void endDocument(PDDocument doc) throws IOException { } + public void addReplacementTextToRect(RedactedRectangle rectangle, PDPageContentStream textContentStream) throws IOException { + var replacementText = rectangle.getSpan().getReplacement(); + var rectangleWidth = rectangle.getPdRectangle().getWidth(); + var rectangleHeight = rectangle.getPdRectangle().getHeight(); + + var boxFontSize = replacementFontSize; + float textWidth = (replacementFont.getStringWidth(replacementText) / 1000.0f) * boxFontSize; + while (textWidth > rectangleWidth) { + boxFontSize -= 1; + textWidth = (replacementFont.getStringWidth(replacementText) / 1000.0f) * boxFontSize; + } + + // Y position is actually based on the font's "baseline", so we use the descent + // (how far the font goes under the baseline) for the height calculation + var textDescent = (replacementFont.getFontDescriptor().getDescent() / 1000.0f) * boxFontSize; + + var textXLocation = (rectangle.getPdRectangle().getLowerLeftX() + + ((rectangleWidth / 2.0f) - (textWidth / 2.0f))); + + var textYLocation = (rectangle.getPdRectangle().getLowerLeftY() + + ((rectangleHeight / 2.0f) + (textDescent / 2.0f))); + + textContentStream.beginText(); + textContentStream.setNonStrokingColor(replacementFontColor); + textContentStream.setFont(replacementFont, boxFontSize); + textContentStream.newLineAtOffset(textXLocation, textYLocation); + textContentStream.showText(replacementText); + textContentStream.endText(); + } + @Override protected void writeString(String text, List textPositions) throws IOException { diff --git a/phileas-services/phileas-services-pdf/src/test/java/ai/philterd/PdfRedacterTest.java b/phileas-services/phileas-services-pdf/src/test/java/ai/philterd/PdfRedacterTest.java index 7875310a2..b6f805eb4 100644 --- a/phileas-services/phileas-services-pdf/src/test/java/ai/philterd/PdfRedacterTest.java +++ b/phileas-services/phileas-services-pdf/src/test/java/ai/philterd/PdfRedacterTest.java @@ -23,11 +23,19 @@ import ai.philterd.phileas.model.policy.graphical.BoundingBox; import ai.philterd.phileas.model.services.Redacter; import ai.philterd.services.pdf.PdfRedacter; +import ai.philterd.services.pdf.model.RedactedRectangle; import org.apache.commons.io.FileUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.apache.pdfbox.io.IOUtils; +import org.apache.pdfbox.pdmodel.PDPageContentStream; +import org.apache.pdfbox.pdmodel.common.PDRectangle; +import org.apache.pdfbox.pdmodel.font.PDType1Font; +import org.apache.pdfbox.pdmodel.font.Standard14Fonts; +import org.apache.pdfbox.pdmodel.graphics.color.PDColor; +import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceRGB; import org.junit.jupiter.api.Test; +import org.mockito.Mockito; import java.io.File; import java.io.IOException; @@ -39,6 +47,10 @@ import java.util.List; import java.util.Set; +import static org.mockito.ArgumentMatchers.argThat; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.verify; + public class PdfRedacterTest { private static final Logger LOGGER = LogManager.getLogger(PdfRedacterTest.class); @@ -87,6 +99,7 @@ public void testPDF2() throws IOException { final byte[] document = IOUtils.toByteArray(is); final Policy policy = new Policy(); + policy.getConfig().getPdf().setShowReplacement(true); final PdfRedactionOptions pdfRedactionOptions = new PdfRedactionOptions(); final List boundingBoxes = Collections.emptyList(); @@ -278,6 +291,42 @@ public void testPdfSpansAndBoundingBoxes() throws IOException { } + @Test + public void testAddReplacementTextToRect() throws IOException { + + var contentStream = Mockito.mock(PDPageContentStream.class); + + final Span span1 = Span.make(0, 1, FilterType.AGE, "ctx", "docid", 0.25, "Wendy", "repl", null, false, true, null); + final Span span2 = Span.make(0, 1, FilterType.AGE, "ctx", "docid", 0.25, "Bankruptcy", "repl", null, false, true, null); + final Set spans = Set.copyOf(Arrays.asList(span1, span2)); + + final Policy policy = new Policy(); + policy.getConfig().getPdf().setShowReplacement(true); + final PdfRedactionOptions pdfRedactionOptions = new PdfRedactionOptions(); + + final List boundingBoxes = Collections.emptyList(); + + final PdfRedacter pdfRedacter = new PdfRedacter(policy, spans, pdfRedactionOptions, boundingBoxes); + + RedactedRectangle redactedRectangle = new RedactedRectangle(PDRectangle.LETTER, span1); + pdfRedacter.addReplacementTextToRect(redactedRectangle, contentStream); + + verify(contentStream).beginText(); + verify(contentStream).setNonStrokingColor( + argThat((PDColor color) -> { + return ( + Arrays.equals(color.getComponents(), new float[]{255, 255, 255}) + && color.getColorSpace() == PDDeviceRGB.INSTANCE + ); + }) + ); + verify(contentStream).setFont(argThat((PDType1Font font) -> font.getName().equals(Standard14Fonts.FontName.HELVETICA.getName())), eq(12.0f)); + verify(contentStream).newLineAtOffset(295.998f, 394.758f); + verify(contentStream).showText("repl"); + verify(contentStream).endText(); + + } + private void showFileSizes(Path inputFile, Path outputFile) throws IOException { long inputFileBytes = Files.size(inputFile); From fd6cadef95bc2b72c1ee6da4cdb6d02c08009905 Mon Sep 17 00:00:00 2001 From: JAM <272807+JessieAMorris@users.noreply.github.com> Date: Wed, 18 Dec 2024 22:13:22 -0700 Subject: [PATCH 3/3] Add pdf redaction configuration docs. Fixes #181. --- docs/docs/filter_policies/pdf.md | 40 ++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 docs/docs/filter_policies/pdf.md diff --git a/docs/docs/filter_policies/pdf.md b/docs/docs/filter_policies/pdf.md new file mode 100644 index 000000000..c263c0cb2 --- /dev/null +++ b/docs/docs/filter_policies/pdf.md @@ -0,0 +1,40 @@ +# PDF Redaction Configuration + +PDF redaction can be configured through the `config.pdf` path of a policy. + +The available options are: + +| Key | Type | Default | Description | +|--------------------------|-----------|-------------|-----------------------------------------------------------------------------------------------------------------------------------| +| `redactionColor` | `string` | `black` | This is the color of the redaction boxes that are drawn over the PII. Available options are `white`, `black`, `red`, and `yellow` | +| `showReplacement` | `boolean` | `false` | If `true` then the output of the filter's strategy will be output on the redaction box in the PDF | +| `replacementFont` | `string` | `helvetica` | The font to use for the replacement output. Available options are `helvetica`, `times`, and `courier` | +| `replacementMaxFontSize` | `float` | `12` | The maximum font size for the replacement text. Best efforts will be made to fit the replacement text within the redaction box | +| `replacementFontColor` | `string` | `white` | The font color for the replacement. Available options match the `redactionColor` options | + +### An Example PDF Configuration Policy + +The following is an example policy setting the PDF redaction options. + +``` +{ + "name": "example-pdf-policy", + "identifiers": { + "emailAddress": { + "emailAddressFilterStrategies": [ + { + "strategy": "REDACT", + "redactionFormat": "{{{REDACTED-%t}}}" + } + ] + } + }, + "config": { + "pdf": { + "redactionColor": "red", + "showReplacement": true, + "replacementFontColor": "yellow" + } + } +} +``` \ No newline at end of file