Skip to content

Commit

Permalink
make python-magic optional, rely on builtin mimetypes module first
Browse files Browse the repository at this point in the history
  • Loading branch information
karlicoss committed Nov 9, 2020
1 parent eb7ae12 commit c442081
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 27 deletions.
15 changes: 7 additions & 8 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,15 @@ def main():

python_requires='>=3.6',
install_requires=[
*DEPS_INDEXER,
*DEPS_SERVER,
'appdirs', # for portable user directories detection
'tzlocal',
'more_itertools',
'pytz',
'sqlalchemy', # DB api
'cachew>=0.8.0', # caching with type hints

*DEPS_INDEXER,
*DEPS_SERVER,
],
extras_require={
'testing': [
Expand All @@ -61,17 +64,12 @@ def main():
}
)

# todo might be nice to ensure they are installable in separation?
DEPS_INDEXER = [
'appdirs', # for portable user directories detection

'urlextract',

# TODO could be optional?
'python-magic', # for detecting mime types
]

DEPS_SERVER = [
'tzlocal',
'hug',
]

Expand All @@ -80,6 +78,7 @@ def main():
# althrough server uses it so not sure...
('optional', 'dependencies that bring some bells & whistles'): [
'logzero', # pretty colored logging
'python-magic', # better mimetype decetion
],
('HPI' , 'dependencies for [[https://github.com/karlicoss/HPI][HPI]]'): [
'HPI', # pypi version
Expand Down
33 changes: 29 additions & 4 deletions src/promnesia/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -352,12 +352,37 @@ def python3() -> str:
# ideally would be nice to fix it properly https://github.com/ahupp/python-magic#windows
@lru_cache(1)
def _magic():
import magic # type: ignore
return magic.Magic(mime=True)
logger = get_logger()
try:
import magic # type: ignore
except ModuleNotFoundError as me:
logger.exception(me)
msg = "python-magic is not detected. It's recommended for better file type detection (pip3 install --user python-magic). See https://github.com/ahupp/python-magic#installation"
logger.warning(msg)
warnings.warn(msg)
return lambda path: None # stub
else:
mm = magic.Magic(mime=True)
return mm.from_file


def mime(path: PathIsh) -> str:
return _magic().from_file(str(path))
@lru_cache(1)
def _mimetypes():
import mimetypes
mimetypes.init()
return mimetypes


def mime(path: PathIsh) -> Optional[str]:
ps = str(path)
mimetypes = _mimetypes()
# first try mimetypes, it's only using the filename without opening the file
pm, _ = mimetypes.guess_type(ps)
if pm is not None:
return pm
# next, libmagic, it might access the file, so a bit slower
magic = _magic()
return magic(ps)


def find_args(root: Path, follow: bool) -> List[str]:
Expand Down
17 changes: 5 additions & 12 deletions src/promnesia/sources/auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,25 +252,18 @@ def rit() -> Iterable[Path]:
yield from r


import mimetypes
mimetypes.init()


def by_path(pp: Path):
suf = pp.suffix.lower()
# firt check suffixes, it's faster
s = type2idx(suf)
if s is not None:
return s, None
# then try mimetypes, it's only using the filename
pm, _ = mimetypes.guess_type(str(pp))
if pm is not None:
s = type2idx(pm)
if s is not None:
return s, pm
# lastly, use libmagic, it's the slowest
# then try with mime
pm = mime(pp)
return type2idx(pm), pm
if pm is not None:
return type2idx(pm), pm
else:
return None, None


def _index_file(pp: Path, opts: Options) -> Results:
Expand Down
5 changes: 2 additions & 3 deletions src/promnesia/sources/browser.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

import pytz

from ..common import PathIsh, Results, Visit, Loc, get_logger, Second, _magic
from ..common import PathIsh, Results, Visit, Loc, get_logger, Second, mime
from .. import config

# todo mcachew?
Expand All @@ -16,12 +16,11 @@


def index(p: PathIsh) -> Results:
mime = _magic()
pp = Path(p)
assert pp.exists() # just in case of broken symlinks

# is_file check because it also returns dirs
is_db = lambda x: x.is_file() and mime.from_file(str(x)) in ['application/x-sqlite3']
is_db = lambda x: x.is_file() and mime(x) in ['application/x-sqlite3']

# todo warn if filtered out too many?
# todo wonder how quickly mimes can be computed?
Expand Down

0 comments on commit c442081

Please sign in to comment.