Skip to content

Commit

Permalink
PHL-308: Finishing sentiment condition and adding tests.
Browse files Browse the repository at this point in the history
  • Loading branch information
jzonthemtn committed Nov 9, 2023
1 parent bc3a9cf commit 27c8f2f
Show file tree
Hide file tree
Showing 52 changed files with 787 additions and 440 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -578,6 +578,39 @@ public void endToEnd17() throws Exception {

}

@Test
public void endToEndWithSentiment() throws Exception {

final Path temp = Files.createTempDirectory("philter");
final File file = Paths.get(temp.toFile().getAbsolutePath(), "sentiment.json").toFile();
LOGGER.info("Writing policy to {}", file.getAbsolutePath());
final String policy = gson.toJson(getPolicyWithSentiment("sentiment"));
LOGGER.info(policy);
FileUtils.writeStringToFile(file, policy);

Properties properties = new Properties();
properties.setProperty("indexes.directory", INDEXES_DIRECTORY);
properties.setProperty("store.enabled", "false");
properties.setProperty("filter.policies.directory", temp.toFile().getAbsolutePath());

final PhileasConfiguration phileasConfiguration = ConfigFactory.create(PhileasConfiguration.class, properties);

final String input = "his ssn was 123-45-6789";

final PhileasFilterService service = new PhileasFilterService(phileasConfiguration);
final FilterResponse response = service.filter(List.of("sentiment"), "context", "documentid", input, MimeType.TEXT_PLAIN);

LOGGER.info(response.filteredText());

showSpans(response.explanation().appliedSpans());

Assertions.assertEquals("documentid", response.documentId());
Assertions.assertEquals(1, response.explanation().appliedSpans().size());
Assertions.assertEquals("his ssn was {{{REDACTED-ssn}}}", response.filteredText().trim());
Assertions.assertEquals("1", response.attributes().get("sentiment"));

}

@Test
public void endToEndUsingCustomDictionary() throws Exception {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,25 @@
public class EndToEndTestsHelper {


public static Policy getPolicyWithSentiment(String policyName) throws IOException {

SsnFilterStrategy ssnFilterStrategy = new SsnFilterStrategy();

Ssn ssn = new Ssn();
ssn.setSsnFilterStrategies(List.of(ssnFilterStrategy));

Identifiers identifiers = new Identifiers();
identifiers.setSsn(ssn);

Policy policy = new Policy();
policy.getConfig().getAnalysis().getSentiment().setEnabled(true);
policy.setName(policyName);
policy.setIdentifiers(identifiers);

return policy;

}

public static Policy getPolicyZipCodeWithIgnored(String policyName) throws IOException {

Set<String> ignored = new HashSet<>();
Expand All @@ -51,13 +70,13 @@ public static Policy getPolicyZipCodeWithIgnored(String policyName) throws IOExc
SsnFilterStrategy ssnFilterStrategy = new SsnFilterStrategy();

Ssn ssn = new Ssn();
ssn.setSsnFilterStrategies(Arrays.asList(ssnFilterStrategy));
ssn.setSsnFilterStrategies(List.of(ssnFilterStrategy));

ZipCodeFilterStrategy zipCodeFilterStrategy = new ZipCodeFilterStrategy();
zipCodeFilterStrategy.setTruncateDigits(2);

ZipCode zipCode = new ZipCode();
zipCode.setZipCodeFilterStrategies(Arrays.asList(zipCodeFilterStrategy));
zipCode.setZipCodeFilterStrategies(List.of(zipCodeFilterStrategy));
zipCode.setIgnored(ignored);

Identifiers identifiers = new Identifiers();
Expand All @@ -84,13 +103,13 @@ public static Policy getPolicyZipCodeWithIgnoredFromFile(String policyName) thro
SsnFilterStrategy ssnFilterStrategy = new SsnFilterStrategy();

Ssn ssn = new Ssn();
ssn.setSsnFilterStrategies(Arrays.asList(ssnFilterStrategy));
ssn.setSsnFilterStrategies(List.of(ssnFilterStrategy));

ZipCodeFilterStrategy zipCodeFilterStrategy = new ZipCodeFilterStrategy();
zipCodeFilterStrategy.setTruncateDigits(2);

ZipCode zipCode = new ZipCode();
zipCode.setZipCodeFilterStrategies(Arrays.asList(zipCodeFilterStrategy));
zipCode.setZipCodeFilterStrategies(List.of(zipCodeFilterStrategy));
zipCode.setIgnoredFiles(ignoredFiles);

Identifiers identifiers = new Identifiers();
Expand Down Expand Up @@ -135,8 +154,8 @@ public static Policy getPdfPolicy(String policyName) throws IOException {

public static Policy getPdfFilterWithPersonPolicy(String policyName) throws URISyntaxException {

final File model = new File(EndToEndTestsHelper.class.getClassLoader().getResource("ner/model.onnx").toURI());
final File vocab = new File(EndToEndTestsHelper.class.getClassLoader().getResource("ner/vocab.txt").toURI());
final File model = new File(EndToEndTestsHelper.class.getClassLoader().getResource("models/model.onnx").toURI());
final File vocab = new File(EndToEndTestsHelper.class.getClassLoader().getResource("models/vocab.txt").toURI());

final PersonV2 personV2 = new PersonV2();
personV2.setModel(model.getAbsolutePath());
Expand Down Expand Up @@ -222,80 +241,80 @@ public static Policy getPolicy(String policyName) throws IOException, URISyntaxE
PhoneNumberFilterStrategy phoneNumberFilterStrategy = new PhoneNumberFilterStrategy();

PhoneNumber phoneNumber = new PhoneNumber();
phoneNumber.setPhoneNumberFilterStrategies(Arrays.asList(phoneNumberFilterStrategy));
phoneNumber.setPhoneNumberFilterStrategies(List.of(phoneNumberFilterStrategy));

SsnFilterStrategy ssnFilterStrategy = new SsnFilterStrategy();

Ssn ssn = new Ssn();
ssn.setSsnFilterStrategies(Arrays.asList(ssnFilterStrategy));
ssn.setSsnFilterStrategies(List.of(ssnFilterStrategy));

StateAbbreviationFilterStrategy stateAbbreviationFilterStrategy = new StateAbbreviationFilterStrategy();

StateAbbreviation stateAbbreviation = new StateAbbreviation();
stateAbbreviation.setStateAbbreviationsFilterStrategies(Arrays.asList(stateAbbreviationFilterStrategy));
stateAbbreviation.setStateAbbreviationsFilterStrategies(List.of(stateAbbreviationFilterStrategy));

UrlFilterStrategy urlFilterStrategy = new UrlFilterStrategy();

Url url = new Url();
url.setUrlFilterStrategies(Arrays.asList(urlFilterStrategy));
url.setUrlFilterStrategies(List.of(urlFilterStrategy));

VinFilterStrategy vinFilterStrategy = new VinFilterStrategy();

Vin vin = new Vin();
vin.setVinFilterStrategies(Arrays.asList(vinFilterStrategy));
vin.setVinFilterStrategies(List.of(vinFilterStrategy));

ZipCodeFilterStrategy zipCodeFilterStrategy = new ZipCodeFilterStrategy();
zipCodeFilterStrategy.setTruncateDigits(2);

ZipCode zipCode = new ZipCode();
zipCode.setZipCodeFilterStrategies(Arrays.asList(zipCodeFilterStrategy));
zipCode.setZipCodeFilterStrategies(List.of(zipCodeFilterStrategy));

PersonsFilterStrategy personsFilterStrategy = new PersonsFilterStrategy();

final File model = new File(EndToEndTestsHelper.class.getClassLoader().getResource("ner/model.onnx").toURI());
final File vocab = new File(EndToEndTestsHelper.class.getClassLoader().getResource("ner/vocab.txt").toURI());
final File model = new File(EndToEndTestsHelper.class.getClassLoader().getResource("models/model.onnx").toURI());
final File vocab = new File(EndToEndTestsHelper.class.getClassLoader().getResource("models/vocab.txt").toURI());

PersonV2 personV2 = new PersonV2();
personV2.setModel(model.getAbsolutePath());
personV2.setVocab(vocab.getAbsolutePath());
personV2.setPersonFilterStrategies(Arrays.asList(personsFilterStrategy));
personV2.setPersonFilterStrategies(List.of(personsFilterStrategy));

// ----------------------------------------------------------------------------------

CityFilterStrategy cityFilterStrategy = new CityFilterStrategy();

City city = new City();
city.setCityFilterStrategies(Arrays.asList(cityFilterStrategy));
city.setCityFilterStrategies(List.of(cityFilterStrategy));

CountyFilterStrategy countyFilterStrategy = new CountyFilterStrategy();

County county = new County();
county.setCountyFilterStrategies(Arrays.asList(countyFilterStrategy));
county.setCountyFilterStrategies(List.of(countyFilterStrategy));

FirstNameFilterStrategy firstNameFilterStrategy = new FirstNameFilterStrategy();

FirstName firstName = new FirstName();
firstName.setFirstNameFilterStrategies(Arrays.asList(firstNameFilterStrategy));
firstName.setFirstNameFilterStrategies(List.of(firstNameFilterStrategy));

HospitalAbbreviationFilterStrategy hospitalAbbreviationFilterStrategy = new HospitalAbbreviationFilterStrategy();

HospitalAbbreviation hospitalAbbreviation = new HospitalAbbreviation();
hospitalAbbreviation.setHospitalAbbreviationFilterStrategies(Arrays.asList(hospitalAbbreviationFilterStrategy));
hospitalAbbreviation.setHospitalAbbreviationFilterStrategies(List.of(hospitalAbbreviationFilterStrategy));

HospitalFilterStrategy hospitalFilterStrategy = new HospitalFilterStrategy();

Hospital hospital = new Hospital();
hospital.setHospitalFilterStrategies(Arrays.asList(hospitalFilterStrategy));
hospital.setHospitalFilterStrategies(List.of(hospitalFilterStrategy));

StateFilterStrategy stateFilterStrategy = new StateFilterStrategy();

State state = new State();
state.setStateFilterStrategies(Arrays.asList(stateFilterStrategy));
state.setStateFilterStrategies(List.of(stateFilterStrategy));

SurnameFilterStrategy surnameFilterStrategy = new SurnameFilterStrategy();

Surname surname = new Surname();
surname.setSurnameFilterStrategies(Arrays.asList(surnameFilterStrategy));
surname.setSurnameFilterStrategies(List.of(surnameFilterStrategy));

// ----------------------------------------------------------------------------------

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,8 @@ public void filter1() throws Exception {
.withWindowSize(windowSize)
.build();

final File model = new File(getClass().getClassLoader().getResource("ner/model.onnx").toURI());
final File vocab = new File(getClass().getClassLoader().getResource("ner/vocab.txt").toURI());
final File model = new File(getClass().getClassLoader().getResource("models/model.onnx").toURI());
final File vocab = new File(getClass().getClassLoader().getResource("models/vocab.txt").toURI());
final Map<String, DescriptiveStatistics> stats = new LinkedHashMap<>();
final Map<String, Double> thresholds = new LinkedHashMap<>();

Expand Down Expand Up @@ -91,8 +91,8 @@ public void filter2() throws Exception {
.withWindowSize(windowSize)
.build();

final File model = new File(getClass().getClassLoader().getResource("ner/model.onnx").toURI());
final File vocab = new File(getClass().getClassLoader().getResource("ner/vocab.txt").toURI());
final File model = new File(getClass().getClassLoader().getResource("models/model.onnx").toURI());
final File vocab = new File(getClass().getClassLoader().getResource("models/vocab.txt").toURI());
final Map<String, DescriptiveStatistics> stats = new LinkedHashMap<>();
final Map<String, Double> thresholds = new LinkedHashMap<>();

Expand Down Expand Up @@ -131,8 +131,8 @@ public void filter3() throws Exception {
.withWindowSize(windowSize)
.build();

final File model = new File(getClass().getClassLoader().getResource("ner/model.onnx").toURI());
final File vocab = new File(getClass().getClassLoader().getResource("ner/vocab.txt").toURI());
final File model = new File(getClass().getClassLoader().getResource("models/model.onnx").toURI());
final File vocab = new File(getClass().getClassLoader().getResource("models/vocab.txt").toURI());
final Map<String, DescriptiveStatistics> stats = new LinkedHashMap<>();
final Map<String, Double> thresholds = new LinkedHashMap<>();

Expand Down Expand Up @@ -169,8 +169,8 @@ public void filter4() throws Exception {
.withWindowSize(windowSize)
.build();

final File model = new File(getClass().getClassLoader().getResource("ner/model.onnx").toURI());
final File vocab = new File(getClass().getClassLoader().getResource("ner/vocab.txt").toURI());
final File model = new File(getClass().getClassLoader().getResource("models/model.onnx").toURI());
final File vocab = new File(getClass().getClassLoader().getResource("models/vocab.txt").toURI());
final Map<String, DescriptiveStatistics> stats = new LinkedHashMap<>();
final Map<String, Double> thresholds = new LinkedHashMap<>();

Expand Down Expand Up @@ -203,8 +203,8 @@ public void filter5() throws Exception {
.withWindowSize(windowSize)
.build();

final File model = new File(getClass().getClassLoader().getResource("ner/model.onnx").toURI());
final File vocab = new File(getClass().getClassLoader().getResource("ner/vocab.txt").toURI());
final File model = new File(getClass().getClassLoader().getResource("models/model.onnx").toURI());
final File vocab = new File(getClass().getClassLoader().getResource("models/vocab.txt").toURI());
final Map<String, DescriptiveStatistics> stats = new LinkedHashMap<>();
final Map<String, Double> thresholds = new LinkedHashMap<>();

Expand Down Expand Up @@ -239,8 +239,8 @@ public void filter6() throws Exception {
.withWindowSize(windowSize)
.build();

final File model = new File(getClass().getClassLoader().getResource("ner/model.onnx").toURI());
final File vocab = new File(getClass().getClassLoader().getResource("ner/vocab.txt").toURI());
final File model = new File(getClass().getClassLoader().getResource("models/model.onnx").toURI());
final File vocab = new File(getClass().getClassLoader().getResource("models/vocab.txt").toURI());
final Map<String, DescriptiveStatistics> stats = new LinkedHashMap<>();
final Map<String, Double> thresholds = new LinkedHashMap<>();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ public void filter1() throws Exception {
.withWindowSize(windowSize)
.build();

final File model = new File(getClass().getClassLoader().getResource("ner/en-ner-person.bin").toURI());
final File model = new File(getClass().getClassLoader().getResource("models/en-ner-person.bin").toURI());

final Map<String, DescriptiveStatistics> stats = new LinkedHashMap<>();
final Map<String, Double> thresholds = new LinkedHashMap<>();
Expand Down Expand Up @@ -84,7 +84,7 @@ public void filter2() throws Exception {
.withWindowSize(windowSize)
.build();

final File model = new File(getClass().getClassLoader().getResource("ner/en-ner-person.bin").toURI());
final File model = new File(getClass().getClassLoader().getResource("models/en-ner-person.bin").toURI());

final Map<String, DescriptiveStatistics> stats = new LinkedHashMap<>();
final Map<String, Double> thresholds = new LinkedHashMap<>();
Expand Down
Binary file not shown.
3 changes: 0 additions & 3 deletions phileas-core/src/test/resources/ner/en-ner-person.bin

This file was deleted.

2 changes: 1 addition & 1 deletion phileas-model/src/main/antlr4/FilterCondition.g4
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ expression: (
| 'confidence' COMPARATOR NUMBER (AND expression)?
| 'context' COMPARATOR WORD (AND expression)?
| 'classification' COMPARATOR WORD (AND expression)?
| 'sentiment' COMPARATOR WORD (AND expression)?
| 'sentiment' COMPARATOR NUMBER (AND expression)?
);

TYPE: (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,4 @@ expression


atn:
[3, 24715, 42794, 33075, 47597, 16764, 15335, 30598, 22884, 3, 15, 57, 4, 2, 9, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 5, 2, 11, 10, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 5, 2, 18, 10, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 5, 2, 25, 10, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 5, 2, 32, 10, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 5, 2, 39, 10, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 5, 2, 46, 10, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 5, 2, 53, 10, 2, 5, 2, 55, 10, 2, 3, 2, 2, 2, 3, 2, 2, 2, 2, 69, 2, 54, 3, 2, 2, 2, 4, 55, 3, 2, 2, 2, 5, 6, 7, 3, 2, 2, 6, 7, 7, 12, 2, 2, 7, 10, 7, 13, 2, 2, 8, 9, 7, 11, 2, 2, 9, 11, 5, 2, 2, 2, 10, 8, 3, 2, 2, 2, 10, 11, 3, 2, 2, 2, 11, 55, 3, 2, 2, 2, 12, 13, 7, 4, 2, 2, 13, 14, 7, 12, 2, 2, 14, 17, 7, 14, 2, 2, 15, 16, 7, 11, 2, 2, 16, 18, 5, 2, 2, 2, 17, 15, 3, 2, 2, 2, 17, 18, 3, 2, 2, 2, 18, 55, 3, 2, 2, 2, 19, 20, 7, 5, 2, 2, 20, 21, 7, 12, 2, 2, 21, 24, 7, 10, 2, 2, 22, 23, 7, 11, 2, 2, 23, 25, 5, 2, 2, 2, 24, 22, 3, 2, 2, 2, 24, 25, 3, 2, 2, 2, 25, 55, 3, 2, 2, 2, 26, 27, 7, 6, 2, 2, 27, 28, 7, 12, 2, 2, 28, 31, 7, 13, 2, 2, 29, 30, 7, 11, 2, 2, 30, 32, 5, 2, 2, 2, 31, 29, 3, 2, 2, 2, 31, 32, 3, 2, 2, 2, 32, 55, 3, 2, 2, 2, 33, 34, 7, 7, 2, 2, 34, 35, 7, 12, 2, 2, 35, 38, 7, 14, 2, 2, 36, 37, 7, 11, 2, 2, 37, 39, 5, 2, 2, 2, 38, 36, 3, 2, 2, 2, 38, 39, 3, 2, 2, 2, 39, 55, 3, 2, 2, 2, 40, 41, 7, 8, 2, 2, 41, 42, 7, 12, 2, 2, 42, 45, 7, 14, 2, 2, 43, 44, 7, 11, 2, 2, 44, 46, 5, 2, 2, 2, 45, 43, 3, 2, 2, 2, 45, 46, 3, 2, 2, 2, 46, 55, 3, 2, 2, 2, 47, 48, 7, 9, 2, 2, 48, 49, 7, 12, 2, 2, 49, 52, 7, 14, 2, 2, 50, 51, 7, 11, 2, 2, 51, 53, 5, 2, 2, 2, 52, 50, 3, 2, 2, 2, 52, 53, 3, 2, 2, 2, 53, 55, 3, 2, 2, 2, 54, 4, 3, 2, 2, 2, 54, 5, 3, 2, 2, 2, 54, 12, 3, 2, 2, 2, 54, 19, 3, 2, 2, 2, 54, 26, 3, 2, 2, 2, 54, 33, 3, 2, 2, 2, 54, 40, 3, 2, 2, 2, 54, 47, 3, 2, 2, 2, 55, 3, 3, 2, 2, 2, 10, 10, 17, 24, 31, 38, 45, 52, 54]
[3, 24715, 42794, 33075, 47597, 16764, 15335, 30598, 22884, 3, 15, 57, 4, 2, 9, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 5, 2, 11, 10, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 5, 2, 18, 10, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 5, 2, 25, 10, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 5, 2, 32, 10, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 5, 2, 39, 10, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 5, 2, 46, 10, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 5, 2, 53, 10, 2, 5, 2, 55, 10, 2, 3, 2, 2, 2, 3, 2, 2, 2, 2, 69, 2, 54, 3, 2, 2, 2, 4, 55, 3, 2, 2, 2, 5, 6, 7, 3, 2, 2, 6, 7, 7, 12, 2, 2, 7, 10, 7, 13, 2, 2, 8, 9, 7, 11, 2, 2, 9, 11, 5, 2, 2, 2, 10, 8, 3, 2, 2, 2, 10, 11, 3, 2, 2, 2, 11, 55, 3, 2, 2, 2, 12, 13, 7, 4, 2, 2, 13, 14, 7, 12, 2, 2, 14, 17, 7, 14, 2, 2, 15, 16, 7, 11, 2, 2, 16, 18, 5, 2, 2, 2, 17, 15, 3, 2, 2, 2, 17, 18, 3, 2, 2, 2, 18, 55, 3, 2, 2, 2, 19, 20, 7, 5, 2, 2, 20, 21, 7, 12, 2, 2, 21, 24, 7, 10, 2, 2, 22, 23, 7, 11, 2, 2, 23, 25, 5, 2, 2, 2, 24, 22, 3, 2, 2, 2, 24, 25, 3, 2, 2, 2, 25, 55, 3, 2, 2, 2, 26, 27, 7, 6, 2, 2, 27, 28, 7, 12, 2, 2, 28, 31, 7, 13, 2, 2, 29, 30, 7, 11, 2, 2, 30, 32, 5, 2, 2, 2, 31, 29, 3, 2, 2, 2, 31, 32, 3, 2, 2, 2, 32, 55, 3, 2, 2, 2, 33, 34, 7, 7, 2, 2, 34, 35, 7, 12, 2, 2, 35, 38, 7, 14, 2, 2, 36, 37, 7, 11, 2, 2, 37, 39, 5, 2, 2, 2, 38, 36, 3, 2, 2, 2, 38, 39, 3, 2, 2, 2, 39, 55, 3, 2, 2, 2, 40, 41, 7, 8, 2, 2, 41, 42, 7, 12, 2, 2, 42, 45, 7, 14, 2, 2, 43, 44, 7, 11, 2, 2, 44, 46, 5, 2, 2, 2, 45, 43, 3, 2, 2, 2, 45, 46, 3, 2, 2, 2, 46, 55, 3, 2, 2, 2, 47, 48, 7, 9, 2, 2, 48, 49, 7, 12, 2, 2, 49, 52, 7, 13, 2, 2, 50, 51, 7, 11, 2, 2, 51, 53, 5, 2, 2, 2, 52, 50, 3, 2, 2, 2, 52, 53, 3, 2, 2, 2, 53, 55, 3, 2, 2, 2, 54, 4, 3, 2, 2, 2, 54, 5, 3, 2, 2, 2, 54, 12, 3, 2, 2, 2, 54, 19, 3, 2, 2, 2, 54, 26, 3, 2, 2, 2, 54, 33, 3, 2, 2, 2, 54, 40, 3, 2, 2, 2, 54, 47, 3, 2, 2, 2, 55, 3, 3, 2, 2, 2, 10, 10, 17, 24, 31, 38, 45, 52, 54]
Loading

0 comments on commit 27c8f2f

Please sign in to comment.