From ade5bff61079d843c0401b28365053c63b6f1182 Mon Sep 17 00:00:00 2001 From: marcus6n Date: Thu, 18 Jul 2024 15:07:09 -0300 Subject: [PATCH] feature: facebook messenger parser --- iped-parsers/iped-parsers-impl/pom.xml | 10 ++- .../facebook/FacebookMessengerJSONParser.java | 55 +++++++++++++++ .../FacebookMessengerJSONParserTest.java | 44 ++++++++++++ .../test-files/test_facebookMessenger.json | 68 +++++++++++++++++++ 4 files changed, 175 insertions(+), 2 deletions(-) create mode 100644 iped-parsers/iped-parsers-impl/src/main/java/iped/parsers/facebook/FacebookMessengerJSONParser.java create mode 100644 iped-parsers/iped-parsers-impl/src/test/java/iped/parsers/facebook/FacebookMessengerJSONParserTest.java create mode 100644 iped-parsers/iped-parsers-impl/src/test/resources/test-files/test_facebookMessenger.json diff --git a/iped-parsers/iped-parsers-impl/pom.xml b/iped-parsers/iped-parsers-impl/pom.xml index 5623bd6120..bff845be33 100644 --- a/iped-parsers/iped-parsers-impl/pom.xml +++ b/iped-parsers/iped-parsers-impl/pom.xml @@ -8,7 +8,7 @@ iped-parsers-impl jar - + org.slf4j @@ -242,8 +242,14 @@ ofx4j 1.36 + + com.google.code.gson + gson + 2.9.0 + compile + - + diff --git a/iped-parsers/iped-parsers-impl/src/main/java/iped/parsers/facebook/FacebookMessengerJSONParser.java b/iped-parsers/iped-parsers-impl/src/main/java/iped/parsers/facebook/FacebookMessengerJSONParser.java new file mode 100644 index 0000000000..ccd8f24d19 --- /dev/null +++ b/iped-parsers/iped-parsers-impl/src/main/java/iped/parsers/facebook/FacebookMessengerJSONParser.java @@ -0,0 +1,55 @@ +package iped.parsers.facebook; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; +import java.util.Set; + +import org.apache.tika.exception.TikaException; +import org.apache.tika.metadata.Metadata; +import org.apache.tika.mime.MediaType; +import org.apache.tika.parser.ParseContext; +import org.apache.tika.parser.Parser; +import org.apache.tika.sax.XHTMLContentHandler; +import org.xml.sax.ContentHandler; +import org.xml.sax.SAXException; + +import com.google.gson.JsonArray; +import com.google.gson.JsonObject; +import com.google.gson.JsonParser; + +public class FacebookMessengerJSONParser implements Parser { + + private static final long serialVersionUID = 1L; + private static final Set SUPPORTED_TYPES = MediaType.set(MediaType.application("json")); + + @Override + public Set getSupportedTypes(ParseContext arg0) { + return SUPPORTED_TYPES; + } + + @Override + public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { + + XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); + xhtml.startDocument(); + + try (BufferedReader reader = new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))) { + JsonObject jsonObject = JsonParser.parseReader(reader).getAsJsonObject(); + JsonArray messages = jsonObject.getAsJsonArray("messages"); + + for (int i = 0; i < messages.size(); i++) { + JsonObject message = messages.get(i).getAsJsonObject(); + String sender = message.has("sender_name") ? message.get("sender_name").getAsString() : "Unknown"; + String content = message.has("content") ? message.get("content").getAsString() : "No Content"; + String timestamp = message.has("timestamp_ms") ? message.get("timestamp_ms").getAsString() : "No Timestamp"; + + xhtml.element("p", String.format("Sender: %s\nMessage: %s\nTimestamp: %s\n", sender, content, timestamp)); + } + } finally { + xhtml.endDocument(); + } + } +} diff --git a/iped-parsers/iped-parsers-impl/src/test/java/iped/parsers/facebook/FacebookMessengerJSONParserTest.java b/iped-parsers/iped-parsers-impl/src/test/java/iped/parsers/facebook/FacebookMessengerJSONParserTest.java new file mode 100644 index 0000000000..279826f4c7 --- /dev/null +++ b/iped-parsers/iped-parsers-impl/src/test/java/iped/parsers/facebook/FacebookMessengerJSONParserTest.java @@ -0,0 +1,44 @@ +package iped.parsers.facebook; + +import java.io.InputStream; + +import org.apache.tika.metadata.Metadata; +import org.apache.tika.parser.ParseContext; +import org.apache.tika.sax.ToTextContentHandler; +import org.junit.Test; + +import junit.framework.TestCase; + +public class FacebookMessengerJSONParserTest extends TestCase { + + @Test + public void testParse() throws Exception { + + FacebookMessengerJSONParser parser = new FacebookMessengerJSONParser(); + ToTextContentHandler handler = new ToTextContentHandler(); + Metadata metadata = new Metadata(); + ParseContext context = new ParseContext(); + + try (InputStream stream = getClass().getResourceAsStream("/test-files/test_facebookMessenger.json")) { + assertNotNull("Input stream should not be null", stream); + parser.parse(stream, handler, metadata, context); + + // Asserts + String parsedContent = handler.toString().trim(); + String[] lines = parsedContent.lines().toArray(String[]::new); + + // Verifique se os conteúdos esperados estão presentes + assertTrue(parsedContent.contains("Sender: Guilherme David")); + assertTrue(parsedContent.contains("Message: Voc\u00c3\u00aa n\u00c3\u00a3o respondeu a: Boa tarde, deixe seu contato que o responsavel pelo imovel vai te passar todas informa\u00c3\u00a7\u00c3\u00b5es")); + assertTrue(parsedContent.contains("Timestamp: 1718225887428")); + + assertTrue(parsedContent.contains("Sender: Guilherme David")); + assertTrue(parsedContent.contains("Message: Boa tarde, deixe seu contato que o responsavel pelo imovel vai te passar todas informa\u00c3\u00a7\u00c3\u00b5es")); + assertTrue(parsedContent.contains("Timestamp: 1718220406722")); + + assertTrue(parsedContent.contains("Sender: Marcus De Oliveira")); + assertTrue(parsedContent.contains("Message: Ol\u00c3\u00a1, esse item ainda est\u00c3\u00a1 dispon\u00c3\u00advel?")); + assertTrue(parsedContent.contains("Timestamp: 1717920369981")); + } + } +} diff --git a/iped-parsers/iped-parsers-impl/src/test/resources/test-files/test_facebookMessenger.json b/iped-parsers/iped-parsers-impl/src/test/resources/test-files/test_facebookMessenger.json new file mode 100644 index 0000000000..331f604ab0 --- /dev/null +++ b/iped-parsers/iped-parsers-impl/src/test/resources/test-files/test_facebookMessenger.json @@ -0,0 +1,68 @@ +{ + "participants": [ + { + "name": "Marcus De Oliveira" + }, + { + "name": "Guilherme David" + } + ], + "messages": [ + { + "sender_name": "Guilherme David", + "timestamp_ms": 1718225887428, + "content": "Voc\u00c3\u00aa n\u00c3\u00a3o respondeu a: Boa tarde, deixe seu contato que o responsavel pelo imovel vai te passar todas informa\u00c3\u00a7\u00c3\u00b5es", + "is_geoblocked_for_viewer": false + }, + { + "sender_name": "Guilherme David", + "timestamp_ms": 1718220406722, + "content": "Boa tarde, deixe seu contato que o responsavel pelo imovel vai te passar todas informa\u00c3\u00a7\u00c3\u00b5es", + "is_geoblocked_for_viewer": false + }, + { + "sender_name": "Guilherme David", + "timestamp_ms": 1718220404074, + "content": "Sim, voc\u00c3\u00aa ainda tem interesse?", + "is_geoblocked_for_viewer": false + }, + { + "sender_name": "Marcus De Oliveira", + "timestamp_ms": 1718049971976, + "content": "Classificados semelhantes a \"3 quartos 2 banheiros \u00e2\u0080\u0093 Casa\"", + "is_geoblocked_for_viewer": false + }, + { + "sender_name": "Marcus De Oliveira", + "timestamp_ms": 1717920369981, + "content": "Ol\u00c3\u00a1, esse item ainda est\u00c3\u00a1 dispon\u00c3\u00advel?", + "is_geoblocked_for_viewer": false + }, + { + "sender_name": "Marcus De Oliveira", + "timestamp_ms": 1717920367339, + "content": "Voc\u00c3\u00aa mudou a foto do grupo.", + "is_geoblocked_for_viewer": false + }, + { + "sender_name": "Marcus De Oliveira", + "timestamp_ms": 1717920366902, + "content": "Voc\u00c3\u00aa deu o nome \"Marcus \u00c2\u00b7 3 quartos 2 banheiros \u00e2\u0080\u0093 Casa\" ao grupo.", + "is_geoblocked_for_viewer": false + } + ], + "title": "Marcus \u00c2\u00b7 3 quartos 2 banheiros \u00e2\u0080\u0093 Casa", + "is_still_participant": true, + "thread_path": "inbox/marcus3quartos2banheiroscasa_7442173665892250", + "magic_words": [ + + ], + "image": { + "uri": "your_facebook_activity/messages/photos/435917623_1194274758404788_8125811693636917451_n_844277654271524.jpg", + "creation_timestamp": 1717920365 + }, + "joinable_mode": { + "mode": 1, + "link": "" + } +} \ No newline at end of file