Skip to content

Commit

Permalink
Merge branch '#1988_FixRegexTask'
Browse files Browse the repository at this point in the history
  • Loading branch information
lfcnassif committed Nov 23, 2023
2 parents 5ddf1aa + 3b90bb2 commit fd55e3f
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,10 @@ public class HashDBDataSource {

private static final String ledFileLength = "fileLength";
private static final String ledFileExt = "fileExt";
private static final String ledMd5_512 = "md5_512";
private static final String ledMd5_64k = "md5_64k";
public static final String ledMd5_512 = "md5_512";
public static final String ledMd5_64k = "md5_64k";

private static final String photoDna = "photoDna";
public static final String photoDna = "photoDna";
private static final int photoDnaBase64Len = 192;

private static final String propertySet = "set";
Expand Down
30 changes: 28 additions & 2 deletions iped-engine/src/main/java/iped/engine/task/regex/RegexTask.java
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,14 @@
import iped.engine.config.RegexTaskConfig;
import iped.engine.config.RegexTaskConfig.RegexEntry;
import iped.engine.data.Item;
import iped.engine.hashdb.HashDBDataSource;
import iped.engine.lucene.analysis.FastASCIIFoldingFilter;
import iped.engine.task.AbstractTask;
import iped.engine.task.HashDBLookupTask;
import iped.engine.task.HashTask;
import iped.engine.task.PhotoDNALookup;
import iped.engine.task.index.IndexItem;
import iped.properties.ExtraProperties;

public class RegexTask extends AbstractTask {

Expand All @@ -61,6 +67,26 @@ public class RegexTask extends AbstractTask {

private static FSTConfiguration fastSerializer = FSTConfiguration.createDefaultConfiguration();

private static final Set<String> ignoredKeys = new HashSet<String>();
static {
// Ignore these keys when reading item's properties to be searched (issue #1988)
ignoredKeys.add(HashTask.HASH.EDONKEY.toString());
ignoredKeys.add(HashTask.HASH.MD5.toString());
ignoredKeys.add(HashTask.HASH.SHA1.toString());
ignoredKeys.add(HashTask.HASH.SHA256.toString());
ignoredKeys.add(HashTask.HASH.SHA512.toString());
ignoredKeys.add(IndexItem.TRACK_ID);
ignoredKeys.add(IndexItem.PARENT_TRACK_ID);
ignoredKeys.add(IndexItem.CONTAINER_TRACK_ID);
ignoredKeys.add(IndexItem.EVIDENCE_UUID);
ignoredKeys.add(ExtraProperties.GLOBAL_ID);
ignoredKeys.add(HashDBLookupTask.ATTRIBUTES_PREFIX + HashDBDataSource.ledMd5_512);
ignoredKeys.add(HashDBLookupTask.ATTRIBUTES_PREFIX + HashDBDataSource.ledMd5_64k);
ignoredKeys.add(HashDBLookupTask.ATTRIBUTES_PREFIX + HashDBDataSource.photoDna);
ignoredKeys.add(PhotoDNALookup.PHOTO_DNA_HIT_PREFIX + HashTask.HASH.MD5.name());
ignoredKeys.add(PhotoDNALookup.PHOTO_DNA_NEAREAST_HASH);
}

private char[] cbuf = new char[1 << 20];

private static RegexValidator regexValidator;
Expand Down Expand Up @@ -273,9 +299,9 @@ protected void process(IItem item) throws Exception {
private Reader getExtraAttributeReader(IItem item) {
StringBuilder sb = new StringBuilder();
for (String key : item.getExtraAttributeMap().keySet().toArray(new String[0])) {
if (!key.startsWith(REGEX_PREFIX)) {
if (!key.startsWith(REGEX_PREFIX) && !ignoredKeys.contains(key)) {
Object val = item.getExtraAttribute(key);
sb.append(key).append(": ").append(val.toString());
sb.append(key).append(": ").append(val.toString()).append('\n');
}
}
return new StringReader(sb.toString());
Expand Down

0 comments on commit fd55e3f

Please sign in to comment.