Skip to content

Commit

Permalink
Merge branch 'master' into pr-2330
Browse files Browse the repository at this point in the history
  • Loading branch information
lfcnassif committed Oct 8, 2024
2 parents 54efdb6 + c51db60 commit c168a67
Show file tree
Hide file tree
Showing 19 changed files with 747 additions and 146 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,12 @@ WhatsAppReport.FoundInPedoHashDB=Found in Child Porn Alert Hash Database
WhatsAppReport.Owner=Owner
WhatsAppReport.Recovered=Recovered
WhatsAppReport.QuoteNotFound=Quoted message not found
WhatsAppReport.QuoteStaus=Quoted Status
WhatsAppReport.QuotePrivacy=Message quoted privately through the group
WhatsAppReport.QuotePrivacyMessage=This quoted message was created in the group
WhatsAppReport.QuotePrivacyNotFound=Message quoted privately through the group not found
WhatsAppReport.QuoteCatalog=Quoted Catalog
WhatsAppReport.ContactedFindBusinesses=You contacted {0} from find businesses
WhatsAppReport.Document=Document
WhatsAppReport.Photo=Photo
WhatsAppReport.Audio=Audio
Expand Down Expand Up @@ -325,6 +331,8 @@ WhatsAppReport.EditedOn=Edited on
WhatsAppReport.UserJoinedWhatsApp=joined WhatsApp
WhatsAppReport.PinnedMessage=pinned a message
WhatsAppReport.AIThirdParty=This AI is from a third-party developer. Meta receives your AI chats to improve AI quality.
WhatsAppReport.Over256MembersOnlyAdminsCanEdit=This group has over 256 members so now only admins can edit the groups settings.
WhatsAppReport.SecurityNotificationsNoLongerAvailable=Security code notifications are no longer available for this chat.
VCardParser.FormattedName=Formatted Name
VCardParser.Name=Name
VCardParser.Nickname=Nickname
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,12 @@ WhatsAppReport.FoundInPedoHashDB=gefunden in KiPo Hash Database
WhatsAppReport.Owner=Besitzer
WhatsAppReport.Recovered=Recovered[TBT]
WhatsAppReport.QuoteNotFound=Quoted message not found[TBT]
WhatsAppReport.QuoteStaus=Quoted Status[TBT]
WhatsAppReport.QuotePrivacy=Message quoted privately through the group[TBT]
WhatsAppReport.QuotePrivacyMessage=This quoted message was created in the group[TBT]
WhatsAppReport.QuotePrivacyNotFound=Message quoted privately through the group not found[TBT]
WhatsAppReport.QuoteCatalog=Quoted Catalog[TBT]
WhatsAppReport.ContactedFindBusinesses=You contacted {0} from find businesses[TBT]
WhatsAppReport.Document=Document[TBT]
WhatsAppReport.Photo=Photo[TBT]
WhatsAppReport.Audio=Audio[TBT]
Expand Down Expand Up @@ -325,6 +331,8 @@ WhatsAppReport.EditedOn=Edited on[TBT]
WhatsAppReport.UserJoinedWhatsApp=joined WhatsApp[TBT]
WhatsAppReport.PinnedMessage=pinned a message[TBT]
WhatsAppReport.AIThirdParty=This AI is from a third-party developer. Meta receives your AI chats to improve AI quality.[TBT]
WhatsAppReport.Over256MembersOnlyAdminsCanEdit=Diese Gruppe hat mehr als 256 Mitglieder. Daher können jetzt nur noch Admins die Gruppeneinstellungen bearbeiten.
WhatsAppReport.SecurityNotificationsNoLongerAvailable=Benachrichtigungen zur Sicherheitsnummer sind für diesen Chat nicht länger verfügbar.
VCardParser.FormattedName=Name formatiert
VCardParser.Name=Name
VCardParser.Nickname=Nickname
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,12 @@ WhatsAppReport.FoundInPedoHashDB=Encontrado en la base de datos Hash de Alerta d
WhatsAppReport.Owner=Propietario
WhatsAppReport.Recovered=Recovered[TBT]
WhatsAppReport.QuoteNotFound=Quoted message not found[TBT]
WhatsAppReport.QuoteStaus=Quoted Status[TBT]
WhatsAppReport.QuotePrivacy=Message quoted privately through the group[TBT]
WhatsAppReport.QuotePrivacyMessage=This quoted message was created in the group[TBT]
WhatsAppReport.QuotePrivacyNotFound=Message quoted privately through the group not found[TBT]
WhatsAppReport.QuoteCatalog=Quoted Catalog[TBT]
WhatsAppReport.ContactedFindBusinesses=You contacted {0} from find businesses[TBT]
WhatsAppReport.Document=Document[TBT]
WhatsAppReport.Photo=Photo[TBT]
WhatsAppReport.Audio=Audio[TBT]
Expand Down Expand Up @@ -325,6 +331,8 @@ WhatsAppReport.EditedOn=Edited on[TBT]
WhatsAppReport.UserJoinedWhatsApp=joined WhatsApp[TBT]
WhatsAppReport.PinnedMessage=pinned a message[TBT]
WhatsAppReport.AIThirdParty=This AI is from a third-party developer. Meta receives your AI chats to improve AI quality.[TBT]
WhatsAppReport.Over256MembersOnlyAdminsCanEdit=This group has over 256 members so now only admins can edit the groups settings.[TBT]
WhatsAppReport.SecurityNotificationsNoLongerAvailable=Security code notifications are no longer available for this chat.[TBT]
VCardParser.FormattedName=Nombre con formato
VCardParser.Name=Nombre
VCardParser.Nickname=Sobrenombre
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,12 @@ WhatsAppReport.FoundInPedoHashDB=Contenu pédopornographique détecté via la ba
WhatsAppReport.Owner=Propriétaire
WhatsAppReport.Recovered=Récupéré
WhatsAppReport.QuoteNotFound=Message cité est introuvable
WhatsAppReport.QuoteStaus=Quoted Status[TBT]
WhatsAppReport.QuotePrivacy=Message quoted privately through the group[TBT]
WhatsAppReport.QuotePrivacyMessage=This quoted message was created in the group[TBT]
WhatsAppReport.QuotePrivacyNotFound=Message quoted privately through the group not found[TBT]
WhatsAppReport.QuoteCatalog=Quoted Catalog[TBT]
WhatsAppReport.ContactedFindBusinesses=You contacted {0} from find businesses[TBT]
WhatsAppReport.Document=Document
WhatsAppReport.Photo=Photo
WhatsAppReport.Audio=Audio
Expand Down Expand Up @@ -325,6 +331,8 @@ WhatsAppReport.EditedOn=Modifié en
WhatsAppReport.UserJoinedWhatsApp=a rejoint WhatsApp
WhatsAppReport.PinnedMessage=a epinglé un message
WhatsAppReport.AIThirdParty=Cette IA provient d'un développeur tiers. Meta reçoit vos discussions IA pour améliorer la qualité de l'IA.
WhatsAppReport.Over256MembersOnlyAdminsCanEdit=Comme ce groupe inclut plus de 256 membres, désormais, seulement les admins peuvent modifier les paramètres du groupe.
WhatsAppReport.SecurityNotificationsNoLongerAvailable=Les notifications relatives aux codes de sécurité ne sont plus disponibles pour cette discoussion.
VCardParser.FormattedName=Nom formaté
VCardParser.Name=Nom
VCardParser.Nickname=Surnom
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,12 @@ WhatsAppReport.FoundInPedoHashDB=Trovato nel Database di materiale pedopornograf
WhatsAppReport.Owner=Proprietario
WhatsAppReport.Recovered=Recovered[TBT]
WhatsAppReport.QuoteNotFound=Quoted message not found[TBT]
WhatsAppReport.QuoteStaus=Quoted Status[TBT]
WhatsAppReport.QuotePrivacy=Message quoted privately through the group[TBT]
WhatsAppReport.QuotePrivacyMessage=This quoted message was created in the group[TBT]
WhatsAppReport.QuotePrivacyNotFound=Message quoted privately through the group not found[TBT]
WhatsAppReport.QuoteCatalog=Quoted Catalog[TBT]
WhatsAppReport.ContactedFindBusinesses=You contacted {0} from find businesses[TBT]
WhatsAppReport.Document=Document[TBT]
WhatsAppReport.Photo=Photo[TBT]
WhatsAppReport.Audio=Audio[TBT]
Expand Down Expand Up @@ -325,6 +331,8 @@ WhatsAppReport.EditedOn=Edited on[TBT]
WhatsAppReport.UserJoinedWhatsApp=joined WhatsApp[TBT]
WhatsAppReport.PinnedMessage=pinned a message[TBT]
WhatsAppReport.AIThirdParty=This AI is from a third-party developer. Meta receives your AI chats to improve AI quality.[TBT]
WhatsAppReport.Over256MembersOnlyAdminsCanEdit=Dato che questo gruppo ha più di 256 membri, solo gli amministratori potranno modificarne le impostazioni.
WhatsAppReport.SecurityNotificationsNoLongerAvailable=Le notifiche sul codice di sicurezza non sono più disponibili per questa chat.
VCardParser.FormattedName=Nome formattato
VCardParser.Name=Nome
VCardParser.Nickname=Nickname
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,12 @@ WhatsAppReport.FoundInPedoHashDB=Encontrado em base de hashes de pornografia inf
WhatsAppReport.Owner=Proprietário
WhatsAppReport.Recovered=Recuperado
WhatsAppReport.QuoteNotFound=Mensagem citada não localizada
WhatsAppReport.QuoteStaus=Status citado
WhatsAppReport.QuotePrivacy=Mensagem citada em particular através de grupo
WhatsAppReport.QuotePrivacyMessage=Esta mensagem citada foi criada no grupo
WhatsAppReport.QuotePrivacyNotFound=Mensagem citada em particular através de grupo não encontrada
WhatsAppReport.QuoteCatalog=Catálogo citado
WhatsAppReport.ContactedFindBusinesses=Você entrou em contato com a empresa {0} usando o recurso de encontrar empresas
WhatsAppReport.Document=Documento
WhatsAppReport.Photo=Foto
WhatsAppReport.Audio=Áudio
Expand Down Expand Up @@ -325,6 +331,8 @@ WhatsAppReport.EditedOn=Editada em
WhatsAppReport.UserJoinedWhatsApp=entrou no WhatsApp
WhatsAppReport.PinnedMessage=fixou uma mensagem
WhatsAppReport.AIThirdParty=Esta IA pertence a um desenvolvedor terceirizado. A Meta recebe suas conversas com IA para melhorar a qualidade desse recurso.
WhatsAppReport.Over256MembersOnlyAdminsCanEdit=Agora somente admins podem editar as configurações porque o grupo tem mais de 256 membros.
WhatsAppReport.SecurityNotificationsNoLongerAvailable=As notificações sobre o código de segurança não estão mais disponíveis para esta conversa.
VCardParser.FormattedName=Nome Formatado
VCardParser.Name=Nome
VCardParser.Nickname=Apelido
Expand Down
47 changes: 26 additions & 21 deletions iped-app/resources/scripts/tasks/WhisperProcess.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import sys
import sys
import numpy
stdout = sys.stdout
sys.stdout = sys.stderr
Expand All @@ -10,7 +10,6 @@
ping = 'ping'

def main():

modelName = sys.argv[1]
deviceNum = int(sys.argv[2])
threads = int(sys.argv[3])
Expand Down Expand Up @@ -74,38 +73,44 @@ def main():
if line == ping:
print(ping, file=stdout, flush=True)
continue

transcription = ''

files = line.split(",")
transcription = []
logprobs = []
for file in files:
transcription.append("")
logprobs.append([])
try:
if whisperx_found:
audio = whisperx.load_audio(line)
result = model.transcribe(audio, batch_size=batch_size, language=language)
result = model.transcribe(files, batch_size=batch_size, language=language,wav=True)
for segment in result['segments']:
transcription += segment['text']
idx = segment["audio"]
transcription[idx] += segment['text']
if 'avg_logprob' in segment:
logprobs.append(segment['avg_logprob'])
logprobs[idx].append(segment['avg_logprob'])
else:
segments, info = model.transcribe(audio=line, language=language, beam_size=5, vad_filter=True)
for segment in segments:
transcription += segment.text
logprobs.append(segment.avg_logprob)
for idx in range(len(files)):
segments, info = model.transcribe(audio=files[idx], language=language, beam_size=5, vad_filter=True)
for segment in segments:
transcription[idx] += segment.text
logprobs[idx].append(segment.avg_logprob)

except Exception as e:
msg = repr(e).replace('\n', ' ').replace('\r', ' ')
print(msg, file=stdout, flush=True)
continue

text = transcription.replace('\n', ' ').replace('\r', ' ')

if len(logprobs) == 0:
finalScore = 0
else:
finalScore = numpy.mean(numpy.exp(logprobs))

print(finished, file=stdout, flush=True)
print(str(finalScore), file=stdout, flush=True)
print(text, file=stdout, flush=True)

for idx in range(len(files)):
text = transcription[idx].replace('\n', ' ').replace('\r', ' ')

if len(logprobs[idx]) == 0:
finalScore = 0
else:
finalScore = numpy.mean(numpy.exp(logprobs[idx]))
print(str(finalScore), file=stdout, flush=True)
print(text, file=stdout, flush=True)

return

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ public abstract class AbstractTranscriptTask extends AbstractTask {
private static final int MAX_WAV_SIZE = 16000 * 2 * MAX_WAV_TIME;

protected AudioTranscriptConfig transcriptConfig;

// Variables to store some statistics
private static final AtomicLong wavTime = new AtomicLong();
private static final AtomicLong transcriptionTime = new AtomicLong();
Expand All @@ -91,8 +91,7 @@ public boolean isEnabled() {

protected boolean isToProcess(IItem evidence) {

if (evidence.getLength() == null || evidence.getLength() == 0 || !evidence.isToAddToCase()
|| evidence.getMetadata().get(ExtraProperties.TRANSCRIPT_ATTR) != null) {
if (evidence.getLength() == null || evidence.getLength() == 0 || !evidence.isToAddToCase() || evidence.getMetadata().get(ExtraProperties.TRANSCRIPT_ATTR) != null) {
return false;
}
if (transcriptConfig.getSkipKnownFiles() && evidence.getExtraAttribute(HashDBLookupTask.STATUS_ATTRIBUTE) != null) {
Expand Down Expand Up @@ -192,8 +191,7 @@ public void init(ConfigurationManager configurationManager) throws Exception {

}

public static TextAndScore transcribeWavBreaking(File tmpFile, String itemPath,
Function<File, TextAndScore> transcribeWavPart) throws Exception {
public static TextAndScore transcribeWavBreaking(File tmpFile, String itemPath, Function<File, TextAndScore> transcribeWavPart) throws Exception {
if (tmpFile.length() <= MAX_WAV_SIZE) {
return transcribeWavPart.apply(tmpFile);
} else {
Expand All @@ -218,6 +216,10 @@ public static TextAndScore transcribeWavBreaking(File tmpFile, String itemPath,
}

protected static Collection<File> getAudioSplits(File inFile, String itemPath) {
return getAudioSplits(inFile, itemPath, MAX_WAV_TIME);
}

protected static Collection<File> getAudioSplits(File inFile, String itemPath, int max_wave_time) {
List<File> splitFiles = new ArrayList<File>();
AudioInputStream aIn = null;
AudioInputStream aOut = null;
Expand All @@ -226,7 +228,7 @@ protected static Collection<File> getAudioSplits(File inFile, String itemPath) {
outFile.delete();
aIn = AudioSystem.getAudioInputStream(inFile);
int bytesPerFrame = aIn.getFormat().getFrameSize();
int framesPerPart = Math.round(aIn.getFormat().getFrameRate() * MAX_WAV_TIME);
int framesPerPart = Math.round(aIn.getFormat().getFrameRate() * max_wave_time);
byte[] partBytes = new byte[framesPerPart * bytesPerFrame];
int numBytesRead = 0;
int seq = 0;
Expand Down Expand Up @@ -312,7 +314,7 @@ public void finish() throws Exception {
conn.close();
conn = null;
}

long totWavConversions = wavSuccess.longValue() + wavFail.longValue();
if (totWavConversions != 0) {
LOGGER.info("Total conversions to WAV: " + totWavConversions);
Expand All @@ -336,8 +338,7 @@ public void finish() throws Exception {
}
}

protected File getTempFileToTranscript(IItem evidence, TemporaryResources tmp)
throws IOException, InterruptedException {
protected File getTempFileToTranscript(IItem evidence, TemporaryResources tmp) throws IOException, InterruptedException {
long t = System.currentTimeMillis();
File tempWav = null;
try {
Expand Down Expand Up @@ -369,8 +370,7 @@ protected void process(IItem evidence) throws Exception {
return;
}

if (evidence.getMetadata().get(ExtraProperties.TRANSCRIPT_ATTR) != null
&& evidence.getMetadata().get(ExtraProperties.CONFIDENCE_ATTR) != null)
if (evidence.getMetadata().get(ExtraProperties.TRANSCRIPT_ATTR) != null && evidence.getMetadata().get(ExtraProperties.CONFIDENCE_ATTR) != null)
return;

TextAndScore prevResult = getTextFromDb(evidence.getHash());
Expand Down
Loading

0 comments on commit c168a67

Please sign in to comment.