Skip to content

Commit

Permalink
fix(requirements.txt): use forked textract
Browse files Browse the repository at this point in the history
  • Loading branch information
tpoisonooo committed Apr 15, 2024
1 parent bb382e6 commit 40e7b0a
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 107 deletions.
1 change: 1 addition & 0 deletions huixiangdou/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Copyright (c) OpenMMLab. All rights reserved.
"""import module."""
# only import frontend when needed, not here
from .version import __version__
from .service import ChatClient # noqa E401
from .service import ErrorCode # noqa E401
from .service import FeatureStore # noqa E401
Expand Down
2 changes: 1 addition & 1 deletion huixiangdou/version.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Tuple

__version__ = '0.1.2'
__version__ = '20240415'
short_version = __version__


Expand Down
5 changes: 3 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ auto-gptq
bcembedding
beautifulsoup4
einops
faiss-gpu
faiss
langchain>=0.1.12
loguru
lxml_html_clean
Expand All @@ -20,7 +20,8 @@ redis
requests
scikit-learn
sentence_transformers==2.2.2
textract
# See https://github.com/deanmalmgren/textract/issues/461
textract @ git+https://github.com/tpoisonooo/textract@master
tiktoken
torch>=2.0.0
transformers>=4.37.0
Expand Down
119 changes: 15 additions & 104 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,108 +19,19 @@ def get_version():
exec(compile(f.read(), version_file, 'exec'))
return locals()['__version__']


def get_cuda_pkgs():
arg_name = '--cuda='
arg_value = None
for arg in sys.argv[1:]:
if arg.startswith(arg_name):
arg_value = arg[len(arg_name):]
sys.argv.remove(arg)
break

cuda_pkgs = []
if arg_value == '11':
cuda_pkgs = [
'nvidia-nccl-cu11', 'nvidia-cuda-runtime-cu11',
'nvidia-cublas-cu11'
]
elif arg_value == '12':
cuda_pkgs = [
'nvidia-nccl-cu12', 'nvidia-cuda-runtime-cu12',
'nvidia-cublas-cu12'
]
return cuda_pkgs


cuda_pkgs = get_cuda_pkgs()


def parse_requirements(fname='requirements.txt', with_version=True):
"""Parse the package dependencies listed in a file but strips specific
versioning information.
Args:
fname (str): path to the file
with_version (bool, default=False): if True include version specs
Returns:
List[str]: list of requirements items
CommandLine:
python -c "import setup; print(setup.parse_requirements())"
"""
require_fpath = fname

def parse_line(line):
"""Parse information from a line in a requirements text file."""
if line.startswith('-r '):
# Allow specifying requirements in other files
target = line.split(' ')[1]
for info in parse_require_file(target):
yield info
else:
info = {'line': line}
if line.startswith('-e '):
info['package'] = line.split('#egg=')[1]
elif '@git+' in line:
info['package'] = line
else:
# Remove versioning from the package
pat = '(' + '|'.join(['>=', '==', '>']) + ')'
parts = re.split(pat, line, maxsplit=1)
parts = [p.strip() for p in parts]

info['package'] = parts[0]
if len(parts) > 1:
op, rest = parts[1:]
if ';' in rest:
# Handle platform specific dependencies
# http://setuptools.readthedocs.io/en/latest/setuptools.html#declaring-platform-specific-dependencies
version, platform_deps = map(str.strip,
rest.split(';'))
info['platform_deps'] = platform_deps
else:
version = rest # NOQA
info['version'] = (op, version)
yield info

def parse_require_file(fpath):
with open(fpath, 'r') as f:
for line in f.readlines():
line = line.strip()
if line and not line.startswith('#'):
for info in parse_line(line):
yield info

def gen_packages_items():
if os.path.exists(require_fpath):
for info in parse_require_file(require_fpath):
parts = [info['package']]
if with_version and 'version' in info:
parts.extend(info['version'])
if not sys.version.startswith('3.4'):
# apparently package_deps are broken in 3.4
platform_deps = info.get('platform_deps')
if platform_deps is not None:
parts.append(';' + platform_deps)
item = ''.join(parts)
yield item

packages = list(gen_packages_items())
packages += cuda_pkgs
return packages

def read_requirements():
lines = []
with open('requirements.txt', 'r') as f:
for line in f.readlines():
if line.startswith('#'):
continue
if 'textract' in line:
continue
if len(line) > 0:
lines.append(line)
return lines

install_packages = read_requirements()

if __name__ == '__main__':
huixiangdou_package_data = [
Expand All @@ -142,8 +53,8 @@ def gen_packages_items():
'huixiangdou': huixiangdou_package_data,
},
include_package_data=True,
setup_requires=parse_requirements('requirements.txt'),
install_requires=parse_requirements('requirements.txt'),
setup_requires=install_packages,
install_requires=install_packages,
classifiers=[
'Programming Language :: Python :: 3.8',
'Programming Language :: Python :: 3.9',
Expand Down

0 comments on commit 40e7b0a

Please sign in to comment.