Skip to content

Commit

Permalink
Improves IpedMetadata implementation to avoid original TIKA slow
Browse files Browse the repository at this point in the history
implementation of multi value add in a field.
  • Loading branch information
patrickdalla committed Jan 5, 2024
1 parent 56df412 commit dcd61d7
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 9 deletions.
59 changes: 53 additions & 6 deletions iped-utils/src/main/java/iped/utils/tika/IpedMetadata.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,66 @@

import java.util.ArrayList;
import java.util.Iterator;
import java.util.Map;

import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.writefilter.MetadataWriteFilter;

public class IpedMetadata extends SyncMetadata {
class IpedMetadataFilter implements MetadataWriteFilter {
String arr[];
int pos = -1;

@Override
public void add(String field, String value, Map<String, String[]> data) {
if (pos == -1) {
// when pos == -1 the method to add an array list was never called
set(field, value, data);
} else {
arr[pos] = value;
pos++;
if (pos == arr.length) {
data.put(field, arr);
}
}
}

//legacy behavior -- remove the field if value is null
@Override
public void set(String field, String value, Map<String, String[]> data) {
if (value != null) {
data.put(field, new String[]{ value });
} else {
data.remove(field);
}
}

@Override
public void filterExisting(Map<String, String[]> arg0) {
// TODO Auto-generated method stub

}

public void allocateSpace(int count) {
arr = new String[count];
pos = 0;
}
}

IpedMetadataFilter ipedFilter = new IpedMetadataFilter();

public IpedMetadata() {
setMetadataWriteFilter(ipedFilter);
}

public class IpedMetadata extends Metadata{

public void set(String arg0, ArrayList<String> list) {
String[] arr= new String[list.size()];
String[] arr = new String[list.size()];
int i=0;
for (Iterator iterator = list.iterator(); iterator.hasNext();) {
arr[i++]=(String) iterator.next();
}

super.set(arg0, arr);

ipedFilter.allocateSpace(list.size());
super.set(arg0, arr);
}

}
4 changes: 1 addition & 3 deletions iped-utils/src/main/java/iped/utils/tika/SyncMetadata.java
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
package iped.utils.tika;

import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.Iterator;
import java.util.Properties;

import org.apache.tika.metadata.Metadata;
Expand All @@ -16,7 +14,7 @@
* @author Nassif
*
*/
public class SyncMetadata extends IpedMetadata {
public class SyncMetadata extends Metadata {

/**
*
Expand Down

0 comments on commit dcd61d7

Please sign in to comment.