From 40e7b0a28c923b5c23826bc75058f53c978be079 Mon Sep 17 00:00:00 2001 From: tpoisonooo Date: Mon, 15 Apr 2024 18:52:53 +0800 Subject: [PATCH] fix(requirements.txt): use forked textract --- huixiangdou/__init__.py | 1 + huixiangdou/version.py | 2 +- requirements.txt | 5 +- setup.py | 119 +++++----------------------------------- 4 files changed, 20 insertions(+), 107 deletions(-) diff --git a/huixiangdou/__init__.py b/huixiangdou/__init__.py index c2641a70..b7e1dff2 100644 --- a/huixiangdou/__init__.py +++ b/huixiangdou/__init__.py @@ -1,6 +1,7 @@ # Copyright (c) OpenMMLab. All rights reserved. """import module.""" # only import frontend when needed, not here +from .version import __version__ from .service import ChatClient # noqa E401 from .service import ErrorCode # noqa E401 from .service import FeatureStore # noqa E401 diff --git a/huixiangdou/version.py b/huixiangdou/version.py index e1cccaa3..e6f6fd68 100644 --- a/huixiangdou/version.py +++ b/huixiangdou/version.py @@ -1,7 +1,7 @@ # Copyright (c) OpenMMLab. All rights reserved. from typing import Tuple -__version__ = '0.1.2' +__version__ = '20240415' short_version = __version__ diff --git a/requirements.txt b/requirements.txt index 96ecca1d..c8ff5368 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,7 +4,7 @@ auto-gptq bcembedding beautifulsoup4 einops -faiss-gpu +faiss langchain>=0.1.12 loguru lxml_html_clean @@ -20,7 +20,8 @@ redis requests scikit-learn sentence_transformers==2.2.2 -textract +# See https://github.com/deanmalmgren/textract/issues/461 +textract @ git+https://github.com/tpoisonooo/textract@master tiktoken torch>=2.0.0 transformers>=4.37.0 diff --git a/setup.py b/setup.py index 29585a4f..c097285e 100644 --- a/setup.py +++ b/setup.py @@ -19,108 +19,19 @@ def get_version(): exec(compile(f.read(), version_file, 'exec')) return locals()['__version__'] - -def get_cuda_pkgs(): - arg_name = '--cuda=' - arg_value = None - for arg in sys.argv[1:]: - if arg.startswith(arg_name): - arg_value = arg[len(arg_name):] - sys.argv.remove(arg) - break - - cuda_pkgs = [] - if arg_value == '11': - cuda_pkgs = [ - 'nvidia-nccl-cu11', 'nvidia-cuda-runtime-cu11', - 'nvidia-cublas-cu11' - ] - elif arg_value == '12': - cuda_pkgs = [ - 'nvidia-nccl-cu12', 'nvidia-cuda-runtime-cu12', - 'nvidia-cublas-cu12' - ] - return cuda_pkgs - - -cuda_pkgs = get_cuda_pkgs() - - -def parse_requirements(fname='requirements.txt', with_version=True): - """Parse the package dependencies listed in a file but strips specific - versioning information. - - Args: - fname (str): path to the file - with_version (bool, default=False): if True include version specs - - Returns: - List[str]: list of requirements items - - CommandLine: - python -c "import setup; print(setup.parse_requirements())" - """ - require_fpath = fname - - def parse_line(line): - """Parse information from a line in a requirements text file.""" - if line.startswith('-r '): - # Allow specifying requirements in other files - target = line.split(' ')[1] - for info in parse_require_file(target): - yield info - else: - info = {'line': line} - if line.startswith('-e '): - info['package'] = line.split('#egg=')[1] - elif '@git+' in line: - info['package'] = line - else: - # Remove versioning from the package - pat = '(' + '|'.join(['>=', '==', '>']) + ')' - parts = re.split(pat, line, maxsplit=1) - parts = [p.strip() for p in parts] - - info['package'] = parts[0] - if len(parts) > 1: - op, rest = parts[1:] - if ';' in rest: - # Handle platform specific dependencies - # http://setuptools.readthedocs.io/en/latest/setuptools.html#declaring-platform-specific-dependencies - version, platform_deps = map(str.strip, - rest.split(';')) - info['platform_deps'] = platform_deps - else: - version = rest # NOQA - info['version'] = (op, version) - yield info - - def parse_require_file(fpath): - with open(fpath, 'r') as f: - for line in f.readlines(): - line = line.strip() - if line and not line.startswith('#'): - for info in parse_line(line): - yield info - - def gen_packages_items(): - if os.path.exists(require_fpath): - for info in parse_require_file(require_fpath): - parts = [info['package']] - if with_version and 'version' in info: - parts.extend(info['version']) - if not sys.version.startswith('3.4'): - # apparently package_deps are broken in 3.4 - platform_deps = info.get('platform_deps') - if platform_deps is not None: - parts.append(';' + platform_deps) - item = ''.join(parts) - yield item - - packages = list(gen_packages_items()) - packages += cuda_pkgs - return packages - +def read_requirements(): + lines = [] + with open('requirements.txt', 'r') as f: + for line in f.readlines(): + if line.startswith('#'): + continue + if 'textract' in line: + continue + if len(line) > 0: + lines.append(line) + return lines + +install_packages = read_requirements() if __name__ == '__main__': huixiangdou_package_data = [ @@ -142,8 +53,8 @@ def gen_packages_items(): 'huixiangdou': huixiangdou_package_data, }, include_package_data=True, - setup_requires=parse_requirements('requirements.txt'), - install_requires=parse_requirements('requirements.txt'), + setup_requires=install_packages, + install_requires=install_packages, classifiers=[ 'Programming Language :: Python :: 3.8', 'Programming Language :: Python :: 3.9',