Skip to content

Commit

Permalink
'#1859 Change the code to read v3 info and loop through linked entries
Browse files Browse the repository at this point in the history
  • Loading branch information
patrickdalla committed Nov 9, 2023
1 parent 5eac8f2 commit 6f8feee
Show file tree
Hide file tree
Showing 4 changed files with 80 additions and 42 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -92,38 +92,36 @@ public void parse(InputStream indexFile, ContentHandler handler, Metadata metada
for (CacheEntry ce : lce) {

Map<String, String> httpResponse = ce.getHttpResponse();
String requestUrl = ce.getRequestURL();

try {

if (ce.getRequestURL().contains("unknown")) {
System.out.println();
}

String contentEncoding = httpResponse.get("content-encoding");
InputStream is;
try {
is = ce.getResponseDataSize() > 0
? ce.getResponseDataStream(httpResponse.get("content-encoding"))
? ce.getResponseDataStream(contentEncoding)
: new ByteArrayInputStream(new byte[] {});
} catch (InputStreamNotAvailable e) {
LOGGER.warn("Input Stream for entry not found:" + ce.getRequestURL() + " in item "
LOGGER.warn(
"Input Stream for entry not found:" + requestUrl + " in item "
+ item.getPath());

is = new ByteArrayInputStream(new byte[] {});
}

Metadata entryMeta = new Metadata();
entryMeta.set("URL", ce.getRequestURL());
entryMeta.set("URL", requestUrl);
entryMeta.set(TikaCoreProperties.TITLE,
ce.getRequestURL().substring(ce.getRequestURL().lastIndexOf('/') + 1));
requestUrl.substring(requestUrl.lastIndexOf('/') + 1));
entryMeta.set(TikaCoreProperties.RESOURCE_NAME_KEY,
ce.getRequestURL().substring(ce.getRequestURL().lastIndexOf('/') + 1));
requestUrl.substring(requestUrl.lastIndexOf('/') + 1));
entryMeta.set(BasicProps.HASCHILD, Boolean.TRUE.toString());
entryMeta.set(ExtraProperties.DECODED_DATA, Boolean.TRUE.toString());

entryMeta.set(IS_CACHE_INDEX_ENTRY, Boolean.TRUE.toString());
entryMeta.set(CACHE_ENTRY_NAME, ce.getName());
entryMeta.set(CACHE_URL, ce.getRequestURL());
entryMeta.set(CACHE_URL, requestUrl);
entryMeta.set(TikaCoreProperties.CREATED, ce.getCreationTime());

for (Map.Entry<String, String> entry : httpResponse.entrySet()) {
Expand All @@ -146,9 +144,7 @@ public void parse(InputStream indexFile, ContentHandler handler, Metadata metada
throw exception;
}
} catch (IOException e1) {
e1.printStackTrace();
} catch (ChromeCacheException e) {
e.printStackTrace();
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -149,4 +149,12 @@ public InputStream getInputStream(List<IItemReader> dataFiles, List<IItemReader>
throw new InputStreamNotAvailable();
}

public long getAddress() {
return address;
}

public void setAddress(long address) {
this.address = address;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -27,25 +27,25 @@ public class CacheEntry {

private static Logger logger = LoggerFactory.getLogger(CacheEntry.class);

private long hash;
private CacheAddr nextEntry;
private CacheAddr rankingsNode;
private int reuseCount;
private int refetchCount;
private int state;
private Date creationTime;
private int keyDataSize;
private CacheAddr longKeyAddressCacheAddress;
private long longKeyAddress;
private String longKey;
private int[] dataStreamSize;
private CacheAddr[] dataStreamAdresses;
private long flags;
private int[] paddings;
private long selfHash;
private byte[] keyData;
private List<IItemReader> dataFiles;
private List<IItemReader> externalFiles;
protected long hash;
protected CacheAddr nextEntry;
protected CacheAddr rankingsNode;
protected int reuseCount;
protected int refetchCount;
protected int state;
protected Date creationTime;
protected int keyDataSize;
protected CacheAddr longKeyAddressCacheAddress;
protected long longKeyAddress;
protected String key;
protected int[] dataStreamSize;
protected CacheAddr[] dataStreamAdresses;
protected long flags;
protected int[] paddings;
protected long selfHash;
protected byte[] keyData;
protected List<IItemReader> dataFiles;
protected List<IItemReader> externalFiles;

public long getHash() {
return hash;
Expand Down Expand Up @@ -116,9 +116,16 @@ public InputStream getResponseInfo() throws IOException {
* @param externalFiles
* @throws IOException
*/
public CacheEntry(InputStream is, List<IItemReader> dataFiles, List<IItemReader> externalFiles) throws IOException {
public CacheEntry(InputStream is, List<IItemReader> dataFiles, List<IItemReader> externalFiles)
throws IOException {
this.dataFiles = dataFiles;
this.externalFiles = externalFiles;

read(is);

}

private void read(InputStream is) throws IOException {
hash = Index.readUnsignedInt(is);
nextEntry = new CacheAddr(Index.readUnsignedInt(is));
rankingsNode = new CacheAddr(Index.readUnsignedInt(is));
Expand Down Expand Up @@ -157,7 +164,6 @@ public CacheEntry(InputStream is, List<IItemReader> dataFiles, List<IItemReader>
} else {
keyData = new byte[0];
}

}

public int getResponseDataSize() {
Expand All @@ -179,19 +185,29 @@ public String getName() {
public String getRequestURL() {
try {

if (longKey == null) {
if (key == null) {
if (keyDataSize < 0) {
return null;
}

if (longKeyAddress > 0) {
longKey = new String(longKeyAddressCacheAddress.getInputStream(dataFiles, externalFiles, null).readNBytes(keyDataSize));
key = new String(longKeyAddressCacheAddress.getInputStream(dataFiles, externalFiles, null)
.readNBytes(keyDataSize));
} else {
return new String(keyData);
key = new String(keyData);
}
}

return longKey;
if (key.contains("messages")) {
System.out.println();
}

if (key.contains("_dk_")) {
String[] parts = key.split(" ");
key = parts[parts.length - 1];
}

return key;

} catch (Exception exe) {
exe.printStackTrace();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,12 @@ public class Index {
private static Logger logger = LoggerFactory.getLogger(Index.class);

static private final long MAGIC_NUMBER_LE = 0xC103CAC3l; // magic number in little endian
static private final List<Long> supportedVersions = ImmutableList.of(0x00020001l);
static private final List<Long> supportedVersions = ImmutableList.of(0x00020001l, 0x00030000l);

private final long magicNumber;
private final long version;
private final int entriesCont;
private final int bytesCont;
private long bytesCont;
private final int lastFile;
private final int id;
private final CacheAddr stats;
Expand Down Expand Up @@ -71,7 +71,7 @@ public int getEntriesCont() {
return entriesCont;
}

public int getBytesCont() {
public long getBytesCont() {
return bytesCont;
}

Expand Down Expand Up @@ -176,7 +176,11 @@ public Index(InputStream is, String path, List<IItemReader> dataFiles, List<IIte
experiment = read4bytes(is);
createTime = readDate(is);

for (int i = 0; i < 52; i++) {
if(version == 0x00030000l) {
bytesCont = read8bytes(is);
}

for (int i = 0; i < 50; i++) {
padding[i] = (int) readUnsignedInt(is);
}

Expand Down Expand Up @@ -217,15 +221,29 @@ public Index(InputStream is, String path, List<IItemReader> dataFiles, List<IIte
table[i] = new CacheAddr(readUnsignedInt(is));
}

int validEntryCount = 0;
for (CacheAddr ea : table) {
try (InputStream eaIS = ea.getInputStream(dataFiles, externalFiles, null)) {
lst.add(new CacheEntry(eaIS, dataFiles, externalFiles));
validEntryCount++;
CacheEntry ce = new CacheEntry(eaIS, dataFiles, externalFiles);
lst.add(ce);
while (ce.getNextEntry().getAddress() != 0) {
CacheAddr na = ce.getNextEntry();
try (InputStream naIS = na.getInputStream(dataFiles, externalFiles, null)) {
ce = new CacheEntry(naIS, dataFiles, externalFiles);
lst.add(ce);
}
}
} catch (InputStreamNotAvailable e) {
continue;
} catch (IOException e) {
logger.warn("Exception reading CacheEntry of Discord Index " + path, e);
}
}
if (validEntryCount != entriesCont) {
System.out.println();

}
}

public static int read2bytes(InputStream is) throws IOException {
Expand Down

0 comments on commit 6f8feee

Please sign in to comment.