Skip to content

Commit

Permalink
Move QR code module to a docker container (#2)
Browse files Browse the repository at this point in the history
This resolves the issue with zbar and opencv dependencies
  • Loading branch information
Augustin-FL authored Nov 9, 2023
1 parent 7b1a1b8 commit 471dc61
Show file tree
Hide file tree
Showing 11 changed files with 106 additions and 94 deletions.
1 change: 0 additions & 1 deletion processing/document_preview/docker/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
pdf2image==1.16.0
Pillow==9.3.0
opencv-python==4.8.0.76
6 changes: 1 addition & 5 deletions processing/document_preview/docker/script.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import os
from pdf2image import convert_from_path
import re
import cv2


def pdftoimages(target, max_pages):
Expand Down Expand Up @@ -37,6 +36,7 @@ def libreofficeconversion(args):
print('error: libreoffice could not convert file to PDF')
return False


def main(args):

# if office file, convert to pdf file with libreoffice library
Expand All @@ -48,10 +48,6 @@ def main(args):
pdftoimages(args.target, args.max_pages)
# if another type
# try to convert to pdf with libreoffice
elif args.target_type == 'jpg' or args.target_type == 'png' or args.target_type == 'jpeg':
#cv2.imread(args, cv2.IMREAD_ANYCOLOR)
cv2.imwrite('./output/output_1.jpeg')
pass
else:
print('warning: Unsupported target file')
print('warning: libreoffice will try to convert the file to PDF')
Expand Down
3 changes: 2 additions & 1 deletion processing/document_preview/document_preview.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ class DocumentPreview(ProcessingModule):

name = 'document_preview'
description = 'Display pages of pdf and office files'
acts_on = ['pdf', 'word', 'powerpoint', 'excel', 'rtf','png','jpeg']
acts_on = ['pdf', 'word', 'powerpoint', 'excel', 'rtf']

config = [
{
Expand Down Expand Up @@ -52,6 +52,7 @@ def save_images(self, directory):
# extract page number from filename
number = filename.split('_')[-1].split('.')[0]
self.add_support_file('page_#{}'.format(number), os.path.join(directory, filename))
self.register_files('jpeg', os.path.join(directory, filename))
extracted_images = True

return extracted_images
Expand Down
16 changes: 6 additions & 10 deletions processing/qrcode_extractor/details.html
Original file line number Diff line number Diff line change
@@ -1,22 +1,18 @@
<div class="col-md-12">
{% if results %}
<div class="col-md-12">
<div class="card">
<div class="header">
<h4 class="title">QRcode Reader</h4>
<p class="category">Detailed Results</p>
</div>
<div class="content">
{% if results.urls %}
<label>Extracted Links</label>
<label>Extracted Data</label>
<div class="labeled-content">
{% for url in results.urls %}
<div>{{url}}</div>
{% for data in results %}
<div>{{data}}</div>
{% endfor %}
</div>
{% endif %}

{% if results.clean %}
<p>No suspicious object found</p>
{% endif %}
</div>
</div>
</div>
{% endif %}
15 changes: 15 additions & 0 deletions processing/qrcode_extractor/docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
FROM python:3

RUN apt-get update && apt-get install -y libzbar0

COPY requirements.txt /app/requirements.txt

RUN pip install -r /app/requirements.txt

COPY script.py /app/script.py

VOLUME ["/data"]

WORKDIR /data

ENTRYPOINT ["python", "/app/script.py"]
2 changes: 2 additions & 0 deletions processing/qrcode_extractor/docker/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
pyzbar==0.1.9
opencv-python-headless==4.8.0.76
35 changes: 35 additions & 0 deletions processing/qrcode_extractor/docker/script.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#!/usr/bin/env python3
import sys
import cv2
from pyzbar.pyzbar import decode


def main(img):
qrcode_data = set()

# attempt QR code decoding via opencv
image = cv2.imread(img)
detect = cv2.QRCodeDetector()
try:
retval, decoded, points, straight_qr = detect.detectAndDecodeMulti(image)
if retval:
qrcode_data |= set(decoded)
except cv2.error as e:
pass

# attempt QR code decoding via zbar
image = cv2.imread(img, 0)
try:
value = decode(image)
qrcode_data |= set([v.data.decode() for v in value])
except TypeError:
pass

for data in qrcode_data:
if data.strip():
print(data.strip())


if __name__ == "__main__":
target_file = sys.argv[1]
main(target_file)
5 changes: 5 additions & 0 deletions processing/qrcode_extractor/install.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#!/bin/bash
SCRIPT=`realpath $0`
SCRIPTPATH=`dirname $SCRIPT`

docker build -t fame/qr_extractor $SCRIPTPATH/docker
113 changes: 38 additions & 75 deletions processing/qrcode_extractor/qr_extractor.py
Original file line number Diff line number Diff line change
@@ -1,83 +1,46 @@
import glob
import pathlib
import hashlib
import pyzbar

from fame.core.module import ProcessingModule, ModuleInitializationError, ModuleExecutionError
from fame.common.utils import tempdir

try:
import cv2
HAVE_CV2 = True
except ImportError:
HAVE_CV2 = False

try:
from pyzbar.pyzbar import decode
HAVE_PYZBAR = True
except ImportError:
HAVE_PYZBAR = False
from fame.core.module import ProcessingModule, ModuleInitializationError
from ..docker_utils import HAVE_DOCKER, docker_client, docker
import re


class QrCodeExtractor(ProcessingModule):
name = "qr_extractor"
description = "Analyze files (via docement preview) to find QRcodes and decode them with two different libs."
acts_on = ["png","jpg","jpeg", "pdf", "word", "html", "excel", "powerpoint"]
triggered_by = "document_preview"
config = [
{
"name": "skip_safe_file_review",
"type": "bool",
"default": False,
"description": "Skip file review when no suspicious elements are found."
}
]
description = "find QRcodes in images and decode them"
acts_on = ["png", "jpeg", "bmp", "webp", "avif"]
triggered_by = "*_preview"
config = []

# Check that libraries wer loaded correctly

def initialize(self):
if not HAVE_CV2:
raise ModuleInitializationError(self, "Missing dependency: opencv2")
if not HAVE_PYZBAR:
raise ModuleInitializationError(self, "Missing dependency: pyzbar")

# For each, check if QRcode is found and extract potentiel URL
# - TO-DO : include document preview for pdf to be able to read the qrcode
# Or trigger the qrcode extractor after document preview
# possibly => mutualize Read image target
# decode QRcode

def extract_qr_code_by_opencv(img):
image = cv2.imread(img, 0)
try:
detect = cv2.QRCodeDetector()
value, points, straight_qrcode = detect.detectAndDecode(image)
print(value)
return value
except:
return
# Make sure docker is available
if not HAVE_DOCKER:
raise ModuleInitializationError(self, "Missing dependency: docker")

def parse_output(self, out):
out = out.decode("utf-8", errors="replace")
for line in out.splitlines():
if re.match("^https?:", line, re.UNICODE | re.IGNORECASE):
self.add_ioc(line)
else:
if not self.results:
self.results = []
self.results.append(line)

def each_with_type(self, target, file_type):
if file_type != "url":
try:
self.parse_output(
docker_client.containers.run(
"fame/qr_extractor",
"target.file",
volumes={target: {"bind": "/data/target.file", "mode": "ro"}},
stderr=True,
remove=True,
)
)
except (docker.errors.ContainerError, docker.errors.APIError) as e:
if hasattr(e, "stderr"):
self.log("error", e.stderr)
elif hasattr(e, "explanation"):
self.log("error", e.explanation)

def extract_qr_code_by_pyzbar(img):
image = cv2.imread(img, 0)
try:
value = decode(image)
print(value)
return value
except:
return

def each(self, target):
self.results = {}

# Get QRcode
self.results[">PYZBAR"] = self.extract_qr_code_by_pyzbar(target)
self.results[">OPENCV"] = self.extract_qr_code_by_opencv(target)
self.add_ioc(results)
#TO-DO add ioc for the url decoded only
#if filetype == "url" and not target.startswith("http"):
# target = "http://{}".format(target)
#if filetype == "url":
# self.add_ioc(target)
return True

#TO-DO Include call to URL preview
3 changes: 1 addition & 2 deletions processing/qrcode_extractor/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1 @@
opencv-python==4.8.0.76
pyzbar==0.1.9
docker==6.1.3
1 change: 1 addition & 0 deletions processing/url_preview/url_preview.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ def save_preview(self, outdir):

if os.path.exists(filepath) and os.path.isfile(filepath):
self.add_support_file("preview", filepath)
self.register_files('png', filepath)
return True
else:
return False
Expand Down

0 comments on commit 471dc61

Please sign in to comment.