-
Notifications
You must be signed in to change notification settings - Fork 222
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
4 changed files
with
175 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
55 changes: 55 additions & 0 deletions
55
...rs/iped-parsers-impl/src/main/java/iped/parsers/facebook/FacebookMessengerJSONParser.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
package iped.parsers.facebook; | ||
|
||
import java.io.BufferedReader; | ||
import java.io.IOException; | ||
import java.io.InputStream; | ||
import java.io.InputStreamReader; | ||
import java.nio.charset.StandardCharsets; | ||
import java.util.Set; | ||
|
||
import org.apache.tika.exception.TikaException; | ||
import org.apache.tika.metadata.Metadata; | ||
import org.apache.tika.mime.MediaType; | ||
import org.apache.tika.parser.ParseContext; | ||
import org.apache.tika.parser.Parser; | ||
import org.apache.tika.sax.XHTMLContentHandler; | ||
import org.xml.sax.ContentHandler; | ||
import org.xml.sax.SAXException; | ||
|
||
import com.google.gson.JsonArray; | ||
import com.google.gson.JsonObject; | ||
import com.google.gson.JsonParser; | ||
|
||
public class FacebookMessengerJSONParser implements Parser { | ||
|
||
private static final long serialVersionUID = 1L; | ||
private static final Set<MediaType> SUPPORTED_TYPES = MediaType.set(MediaType.application("json")); | ||
|
||
@Override | ||
public Set<MediaType> getSupportedTypes(ParseContext arg0) { | ||
return SUPPORTED_TYPES; | ||
} | ||
|
||
@Override | ||
public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { | ||
|
||
XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); | ||
xhtml.startDocument(); | ||
|
||
try (BufferedReader reader = new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))) { | ||
JsonObject jsonObject = JsonParser.parseReader(reader).getAsJsonObject(); | ||
JsonArray messages = jsonObject.getAsJsonArray("messages"); | ||
|
||
for (int i = 0; i < messages.size(); i++) { | ||
JsonObject message = messages.get(i).getAsJsonObject(); | ||
String sender = message.has("sender_name") ? message.get("sender_name").getAsString() : "Unknown"; | ||
String content = message.has("content") ? message.get("content").getAsString() : "No Content"; | ||
String timestamp = message.has("timestamp_ms") ? message.get("timestamp_ms").getAsString() : "No Timestamp"; | ||
|
||
xhtml.element("p", String.format("Sender: %s\nMessage: %s\nTimestamp: %s\n", sender, content, timestamp)); | ||
} | ||
} finally { | ||
xhtml.endDocument(); | ||
} | ||
} | ||
} |
44 changes: 44 additions & 0 deletions
44
...ped-parsers-impl/src/test/java/iped/parsers/facebook/FacebookMessengerJSONParserTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
package iped.parsers.facebook; | ||
|
||
import java.io.InputStream; | ||
|
||
import org.apache.tika.metadata.Metadata; | ||
import org.apache.tika.parser.ParseContext; | ||
import org.apache.tika.sax.ToTextContentHandler; | ||
import org.junit.Test; | ||
|
||
import junit.framework.TestCase; | ||
|
||
public class FacebookMessengerJSONParserTest extends TestCase { | ||
|
||
@Test | ||
public void testParse() throws Exception { | ||
|
||
FacebookMessengerJSONParser parser = new FacebookMessengerJSONParser(); | ||
ToTextContentHandler handler = new ToTextContentHandler(); | ||
Metadata metadata = new Metadata(); | ||
ParseContext context = new ParseContext(); | ||
|
||
try (InputStream stream = getClass().getResourceAsStream("/test-files/test_facebookMessenger.json")) { | ||
assertNotNull("Input stream should not be null", stream); | ||
parser.parse(stream, handler, metadata, context); | ||
|
||
// Asserts | ||
String parsedContent = handler.toString().trim(); | ||
String[] lines = parsedContent.lines().toArray(String[]::new); | ||
|
||
// Verifique se os conteúdos esperados estão presentes | ||
assertTrue(parsedContent.contains("Sender: Guilherme David")); | ||
assertTrue(parsedContent.contains("Message: Voc\u00c3\u00aa n\u00c3\u00a3o respondeu a: Boa tarde, deixe seu contato que o responsavel pelo imovel vai te passar todas informa\u00c3\u00a7\u00c3\u00b5es")); | ||
assertTrue(parsedContent.contains("Timestamp: 1718225887428")); | ||
|
||
assertTrue(parsedContent.contains("Sender: Guilherme David")); | ||
assertTrue(parsedContent.contains("Message: Boa tarde, deixe seu contato que o responsavel pelo imovel vai te passar todas informa\u00c3\u00a7\u00c3\u00b5es")); | ||
assertTrue(parsedContent.contains("Timestamp: 1718220406722")); | ||
|
||
assertTrue(parsedContent.contains("Sender: Marcus De Oliveira")); | ||
assertTrue(parsedContent.contains("Message: Ol\u00c3\u00a1, esse item ainda est\u00c3\u00a1 dispon\u00c3\u00advel?")); | ||
assertTrue(parsedContent.contains("Timestamp: 1717920369981")); | ||
} | ||
} | ||
} |
68 changes: 68 additions & 0 deletions
68
iped-parsers/iped-parsers-impl/src/test/resources/test-files/test_facebookMessenger.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
{ | ||
"participants": [ | ||
{ | ||
"name": "Marcus De Oliveira" | ||
}, | ||
{ | ||
"name": "Guilherme David" | ||
} | ||
], | ||
"messages": [ | ||
{ | ||
"sender_name": "Guilherme David", | ||
"timestamp_ms": 1718225887428, | ||
"content": "Voc\u00c3\u00aa n\u00c3\u00a3o respondeu a: Boa tarde, deixe seu contato que o responsavel pelo imovel vai te passar todas informa\u00c3\u00a7\u00c3\u00b5es", | ||
"is_geoblocked_for_viewer": false | ||
}, | ||
{ | ||
"sender_name": "Guilherme David", | ||
"timestamp_ms": 1718220406722, | ||
"content": "Boa tarde, deixe seu contato que o responsavel pelo imovel vai te passar todas informa\u00c3\u00a7\u00c3\u00b5es", | ||
"is_geoblocked_for_viewer": false | ||
}, | ||
{ | ||
"sender_name": "Guilherme David", | ||
"timestamp_ms": 1718220404074, | ||
"content": "Sim, voc\u00c3\u00aa ainda tem interesse?", | ||
"is_geoblocked_for_viewer": false | ||
}, | ||
{ | ||
"sender_name": "Marcus De Oliveira", | ||
"timestamp_ms": 1718049971976, | ||
"content": "Classificados semelhantes a \"3 quartos 2 banheiros \u00e2\u0080\u0093 Casa\"", | ||
"is_geoblocked_for_viewer": false | ||
}, | ||
{ | ||
"sender_name": "Marcus De Oliveira", | ||
"timestamp_ms": 1717920369981, | ||
"content": "Ol\u00c3\u00a1, esse item ainda est\u00c3\u00a1 dispon\u00c3\u00advel?", | ||
"is_geoblocked_for_viewer": false | ||
}, | ||
{ | ||
"sender_name": "Marcus De Oliveira", | ||
"timestamp_ms": 1717920367339, | ||
"content": "Voc\u00c3\u00aa mudou a foto do grupo.", | ||
"is_geoblocked_for_viewer": false | ||
}, | ||
{ | ||
"sender_name": "Marcus De Oliveira", | ||
"timestamp_ms": 1717920366902, | ||
"content": "Voc\u00c3\u00aa deu o nome \"Marcus \u00c2\u00b7 3 quartos 2 banheiros \u00e2\u0080\u0093 Casa\" ao grupo.", | ||
"is_geoblocked_for_viewer": false | ||
} | ||
], | ||
"title": "Marcus \u00c2\u00b7 3 quartos 2 banheiros \u00e2\u0080\u0093 Casa", | ||
"is_still_participant": true, | ||
"thread_path": "inbox/marcus3quartos2banheiroscasa_7442173665892250", | ||
"magic_words": [ | ||
|
||
], | ||
"image": { | ||
"uri": "your_facebook_activity/messages/photos/435917623_1194274758404788_8125811693636917451_n_844277654271524.jpg", | ||
"creation_timestamp": 1717920365 | ||
}, | ||
"joinable_mode": { | ||
"mode": 1, | ||
"link": "" | ||
} | ||
} |