Move QR code module to a docker container (#2)

This resolves the issue with zbar and opencv dependencies
certsocietegenerale · Nov 9, 2023 · 471dc61 · 471dc61
1 parent 7b1a1b8
commit 471dc61
Show file tree

Hide file tree

Showing 11 changed files with 106 additions and 94 deletions.
diff --git a/processing/document_preview/docker/requirements.txt b/processing/document_preview/docker/requirements.txt
@@ -1,3 +1,2 @@
 pdf2image==1.16.0
 Pillow==9.3.0
-opencv-python==4.8.0.76
diff --git a/processing/document_preview/docker/script.py b/processing/document_preview/docker/script.py
@@ -2,7 +2,6 @@
 import os
 from pdf2image import convert_from_path
 import re
-import cv2
 
 
 def pdftoimages(target, max_pages):
@@ -37,6 +36,7 @@ def libreofficeconversion(args):
         print('error: libreoffice could not convert file to PDF')
         return False
 
+
 def main(args):
 
     # if office file, convert to pdf file with libreoffice library
@@ -48,10 +48,6 @@ def main(args):
         pdftoimages(args.target, args.max_pages)
     # if another type
     # try to convert to pdf with libreoffice
-    elif args.target_type == 'jpg' or args.target_type == 'png' or args.target_type == 'jpeg':
-        #cv2.imread(args, cv2.IMREAD_ANYCOLOR)
-        cv2.imwrite('./output/output_1.jpeg')
-        pass
     else:
         print('warning: Unsupported target file')
         print('warning: libreoffice will try to convert the file to PDF')

diff --git a/processing/document_preview/document_preview.py b/processing/document_preview/document_preview.py
@@ -20,7 +20,7 @@ class DocumentPreview(ProcessingModule):
 
     name = 'document_preview'
     description = 'Display pages of pdf and office files'
-    acts_on = ['pdf', 'word', 'powerpoint', 'excel', 'rtf','png','jpeg']
+    acts_on = ['pdf', 'word', 'powerpoint', 'excel', 'rtf']
 
     config = [
         {
@@ -52,6 +52,7 @@ def save_images(self, directory):
                 # extract page number from filename
                 number = filename.split('_')[-1].split('.')[0]
                 self.add_support_file('page_#{}'.format(number), os.path.join(directory, filename))
+                self.register_files('jpeg', os.path.join(directory, filename))
                 extracted_images = True
 
         return extracted_images

diff --git a/processing/qrcode_extractor/details.html b/processing/qrcode_extractor/details.html
@@ -1,22 +1,18 @@
-<div class="col-md-12">
+{% if results %}
+    <div class="col-md-12">
         <div class="card">
             <div class="header">
                 <h4 class="title">QRcode Reader</h4>
                 <p class="category">Detailed Results</p>
             </div>
             <div class="content">
-                {% if results.urls %}
-                    <label>Extracted Links</label>
+                    <label>Extracted Data</label>
                     <div class="labeled-content">
-                        {% for url in results.urls %}
-                            <div>{{url}}</div>
+                        {% for data in results %}
+                            <div>{{data}}</div>
                         {% endfor %}
                     </div>
-                {% endif %}
-
-                {% if results.clean %}
-                    <p>No suspicious object found</p>
-                {% endif %}
             </div>
         </div>
     </div>
+{% endif %}
diff --git a/processing/qrcode_extractor/docker/Dockerfile b/processing/qrcode_extractor/docker/Dockerfile
@@ -0,0 +1,15 @@
+FROM python:3
+
+RUN apt-get update && apt-get install -y libzbar0
+
+COPY requirements.txt /app/requirements.txt
+
+RUN pip install -r /app/requirements.txt
+
+COPY script.py /app/script.py
+
+VOLUME ["/data"]
+
+WORKDIR /data
+
+ENTRYPOINT ["python", "/app/script.py"]
diff --git a/processing/qrcode_extractor/docker/requirements.txt b/processing/qrcode_extractor/docker/requirements.txt
@@ -0,0 +1,2 @@
+pyzbar==0.1.9
+opencv-python-headless==4.8.0.76
diff --git a/processing/qrcode_extractor/docker/script.py b/processing/qrcode_extractor/docker/script.py
@@ -0,0 +1,35 @@
+#!/usr/bin/env python3
+import sys
+import cv2
+from pyzbar.pyzbar import decode
+
+
+def main(img):
+    qrcode_data = set()
+
+    # attempt QR code decoding via opencv
+    image = cv2.imread(img)
+    detect = cv2.QRCodeDetector()
+    try:
+        retval, decoded, points, straight_qr = detect.detectAndDecodeMulti(image)
+        if retval:
+            qrcode_data |= set(decoded)
+    except cv2.error as e:
+        pass
+
+    # attempt QR code decoding via zbar
+    image = cv2.imread(img, 0)
+    try:
+        value = decode(image)
+        qrcode_data |= set([v.data.decode() for v in value])
+    except TypeError:
+        pass
+
+    for data in qrcode_data:
+        if data.strip():
+            print(data.strip())
+
+
+if __name__ == "__main__":
+    target_file = sys.argv[1]
+    main(target_file)
diff --git a/processing/qrcode_extractor/install.sh b/processing/qrcode_extractor/install.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+SCRIPT=`realpath $0`
+SCRIPTPATH=`dirname $SCRIPT`
+
+docker build -t fame/qr_extractor $SCRIPTPATH/docker
diff --git a/processing/qrcode_extractor/qr_extractor.py b/processing/qrcode_extractor/qr_extractor.py
@@ -1,83 +1,46 @@
-import glob
-import pathlib
-import hashlib
-import pyzbar
-
-from fame.core.module import ProcessingModule, ModuleInitializationError, ModuleExecutionError
-from fame.common.utils import tempdir
-
-try:
-    import cv2
-    HAVE_CV2 = True
-except ImportError:
-    HAVE_CV2 = False
-
-try:
-    from pyzbar.pyzbar import decode
-    HAVE_PYZBAR = True
-except ImportError:
-    HAVE_PYZBAR = False
+from fame.core.module import ProcessingModule, ModuleInitializationError
+from ..docker_utils import HAVE_DOCKER, docker_client, docker
+import re
 
 
 class QrCodeExtractor(ProcessingModule):
     name = "qr_extractor"
-    description = "Analyze files (via docement preview) to find QRcodes and decode them with two different libs."
-    acts_on = ["png","jpg","jpeg", "pdf", "word", "html", "excel", "powerpoint"]
-    triggered_by = "document_preview"
-    config = [
-        {
-            "name": "skip_safe_file_review",
-            "type": "bool",
-            "default": False,
-            "description": "Skip file review when no suspicious elements are found."
-        }
-    ]
+    description = "find QRcodes in images and decode them"
+    acts_on = ["png", "jpeg", "bmp", "webp", "avif"]
+    triggered_by = "*_preview"
+    config = []
 
-# Check that libraries wer loaded correctly
-
     def initialize(self):
-        if not HAVE_CV2:
-            raise ModuleInitializationError(self, "Missing dependency: opencv2")
-        if not HAVE_PYZBAR:
-            raise ModuleInitializationError(self, "Missing dependency: pyzbar")
-
-# For each, check if QRcode is found and extract potentiel URL
-#   - TO-DO : include document preview for pdf to be able to read the qrcode
-#               Or trigger the qrcode extractor after document preview
-# possibly => mutualize Read image target
-    # decode QRcode
-
-    def extract_qr_code_by_opencv(img):
-        image = cv2.imread(img, 0)
-        try:
-            detect = cv2.QRCodeDetector()
-            value, points, straight_qrcode = detect.detectAndDecode(image)
-            print(value)
-            return value
-        except:
-            return
+        # Make sure docker is available
+        if not HAVE_DOCKER:
+            raise ModuleInitializationError(self, "Missing dependency: docker")
+
+    def parse_output(self, out):
+        out = out.decode("utf-8", errors="replace")
+        for line in out.splitlines():
+            if re.match("^https?:", line, re.UNICODE | re.IGNORECASE):
+                self.add_ioc(line)
+            else:
+                if not self.results:
+                    self.results = []
+                self.results.append(line)
+
+    def each_with_type(self, target, file_type):
+        if file_type != "url":
+            try:
+                self.parse_output(
+                    docker_client.containers.run(
+                        "fame/qr_extractor",
+                        "target.file",
+                        volumes={target: {"bind": "/data/target.file", "mode": "ro"}},
+                        stderr=True,
+                        remove=True,
+                    )
+                )
+            except (docker.errors.ContainerError, docker.errors.APIError) as e:
+                if hasattr(e, "stderr"):
+                    self.log("error", e.stderr)
+                elif hasattr(e, "explanation"):
+                    self.log("error", e.explanation)
 
-    def extract_qr_code_by_pyzbar(img):
-        image = cv2.imread(img, 0)
-        try:
-            value = decode(image)
-            print(value)
-            return value
-        except:
-            return
-
-    def each(self, target):
-        self.results = {}
-
-        # Get QRcode
-        self.results[">PYZBAR"] = self.extract_qr_code_by_pyzbar(target)
-        self.results[">OPENCV"] = self.extract_qr_code_by_opencv(target)	    
-        self.add_ioc(results)
-        #TO-DO add ioc for the url decoded only
-        #if filetype == "url" and not target.startswith("http"):
-        #    target = "http://{}".format(target)
-        #if filetype == "url":
-        #    self.add_ioc(target)
         return True
-
-#TO-DO Include call to URL preview
diff --git a/processing/qrcode_extractor/requirements.txt b/processing/qrcode_extractor/requirements.txt
@@ -1,2 +1 @@
-opencv-python==4.8.0.76
-pyzbar==0.1.9
+docker==6.1.3
diff --git a/processing/url_preview/url_preview.py b/processing/url_preview/url_preview.py
@@ -70,6 +70,7 @@ def save_preview(self, outdir):
 
         if os.path.exists(filepath) and os.path.isfile(filepath):
             self.add_support_file("preview", filepath)
+            self.register_files('png', filepath)
             return True
         else:
             return False
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		pyzbar==0.1.9
		opencv-python-headless==4.8.0.76