Merge branch 'master' into main

fmpfeifer · Feb 14, 2021 · 8d509d2 · 8d509d2
2 parents b092864 + 8e15a4e
commit 8d509d2
Show file tree

Hide file tree

Showing 5 changed files with 317 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,138 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
diff --git a/convert_wav.py b/convert_wav.py
@@ -0,0 +1,66 @@
+import os
+import concurrent.futures
+import subprocess
+import sys
+
+
+WORKERS = 32
+
+devnull = open(os.devnull, 'w')
+
+
+def is_wav_file(file_to_test: str) -> bool:
+    output = subprocess.run(
+        ['ffprobe.exe', file_to_test], capture_output=True).stderr
+    if b'pcm_s16le' in output:
+        return True
+    return False
+
+
+def convert_wav_to_mp3(wav_file: str) -> bool:
+    if not is_wav_file(wav_file):
+        print(f"Arquivo     {wav_file} não é um arquivo wav")
+        return False
+
+    print(f"Convertendo {wav_file} ...")
+    renamed_wav = wav_file + '_oldwav'
+    os.rename(wav_file, renamed_wav)
+    subprocess.call(['ffmpeg.exe', '-i', renamed_wav, '-codec:a',
+                     'libmp3lame', '-qscale:a', '6', wav_file], stderr=devnull,
+                    stdout=devnull)
+    os.unlink(renamed_wav)
+    print(f"Convertido  {wav_file}")
+    return True
+
+
+def collect_files_to_convert(source_folder):
+    files_to_convert = []
+    for current_folder, _, file_list in os.walk(source_folder):
+        for fname in file_list:
+            if fname.endswith('_Converted.wav'):
+                full_file = os.path.join(current_folder, fname)
+                files_to_convert.append(full_file)
+    return files_to_convert
+
+
+def process_dir(dir):
+    files_to_convert = collect_files_to_convert(dir)
+    if len(dir) == 0:
+        print("Não foram enconrados arquivos para converter")
+        exit(0)
+    print(f"Convertendo {len(files_to_convert)} arquivos...")
+    with concurrent.futures.ThreadPoolExecutor(max_workers=WORKERS) as pool:
+        count = sum(pool.map(convert_wav_to_mp3, files_to_convert))
+    print(f"Convertidos {count} arquivos de {len(files_to_convert)}")
+
+
+if __name__ == "__main__":
+    if len(sys.argv) != 2:
+        print(f"É necessário um argumento (pasta do relatório)."
+              f" Foram fornecidos {len(sys.argv)-1}.")
+        exit(1)
+    dir = sys.argv[1]
+    if not os.path.isdir(dir):
+        print(f"{dir} não é um diretório")
+        exit(1)
+    process_dir(dir)
diff --git a/deduper.py b/deduper.py
@@ -0,0 +1,113 @@
+import os
+import hashlib
+import shutil
+import sys
+
+
+EXTENSIONS_NOT_TO_HARDLINK = ["-shm"]
+
+
+def hashfile(filename: str) -> str:
+    md5 = hashlib.md5()
+    with open(filename, 'rb') as file:
+        chunk = file.read(8192)
+        while chunk:
+            md5.update(chunk)
+            chunk = file.read(8192)
+    return md5.hexdigest()
+
+
+def create_hard_link(original, link):
+    link_folder, link_filename = os.path.split(link)
+    link_path = os.path.relpath(original, link_folder)
+    os.chdir(link_folder)
+    os.system(f'cmd /c mklink /H "{link_filename}" "{link_path}"')
+
+
+def copy_file(source, destdir, destfile, fsize, hash=None):
+    if os.path.isfile(destfile):
+        if os.path.getsize(destfile) == fsize:
+            if hash is not None:
+                desthash = hashfile(destfile)
+                if desthash == hash:
+                    print(
+                        f"Skipping, {destfile} exist and has same hash as original...")
+                    return
+                else:
+                    print(f"Deleting {destfile}. Hash mismatch...")
+                    os.remove(destfile)
+            else:
+                print(f"Skipping, {destfile} exists..")
+                # shutil.copystat(source, destfile)
+                return
+    print(f"Copying {source} to {destdir} ...")
+    shutil.copy(source, destdir)
+    shutil.copystat(source, destfile)
+
+
+def shoud_try_hardlink(filename, fsize) -> bool:
+    if fsize == 0:
+        return False  # do not hardlink zero size file
+    for ext in EXTENSIONS_NOT_TO_HARDLINK:
+        if filename.lower().endswith(ext):
+            return False
+    return True
+
+
+def copy_tree(source_folder, dest_root):
+    hash_dict = dict()
+    saved = 0
+    for current_folder, _, file_list in os.walk(source_folder):
+        rel_folder = os.path.relpath(current_folder, source_folder)
+        dest_folder = os.path.join(dest_root, rel_folder)
+        os.makedirs(dest_folder, exist_ok=True)
+        for fname in file_list:
+            source_filename = os.path.join(source_folder, rel_folder, fname)
+            dest_file = os.path.join(dest_folder, fname)
+            fsize = os.path.getsize(source_filename)
+            hash = hashfile(source_filename)
+            if shoud_try_hardlink(source_filename, fsize):
+                key = (hash, fsize)
+                if key not in hash_dict:
+                    hash_dict[key] = [dest_file, 1]
+                    copy_file(source_filename, dest_folder,
+                              dest_file, fsize, hash)
+                else:
+                    [link_source, link_count] = hash_dict[key]
+                    if link_count > 1000:
+                        copy_file(source_filename, dest_folder,
+                                  dest_file, fsize)
+                        hash_dict[key] = [dest_file, 1]
+                    else:
+                        if os.path.isfile(dest_file):
+                            print(
+                                f"Do not create hardlink, {dest_file} exists")
+                        else:
+                            print(f"Creating hardlink for "
+                                  f"{dest_file} with {link_source} ...")
+                            create_hard_link(link_source, dest_file)
+                        hash_dict[key][1] += 1
+                        saved += fsize
+            else:
+                copy_file(source_filename, dest_folder, dest_file, fsize, hash)
+    print(f"Saved {saved} bytes with hardlinks")
+
+
+if __name__ == "__main__":
+    if len(sys.argv) != 3:
+        print(f"São necessários 2 argumentos,"
+              f" mas foram informados {len(sys.argv)-1}")
+        print("Informe a pasta de origem e a de destino da cópia.")
+        exit(1)
+    source = sys.argv[1]
+    dest = sys.argv[2]
+    if dest.endswith(":"):
+        dest += "\\"
+    if not os.path.isdir(source):
+        print(f"{source} não é um diretório")
+        exit(1)
+    if not os.path.isdir(dest):
+        print(f"{dest} não é um diretório")
+        exit(1)
+    copy_tree(source, dest)
+    exit(0)
diff --git a/ffmpeg.exe b/ffmpeg.exe
diff --git a/ffprobe.exe b/ffprobe.exe