-
Notifications
You must be signed in to change notification settings - Fork 1
/
setup.py
84 lines (76 loc) · 2.83 KB
/
setup.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
from distutils.core import setup
import setuptools
from setuptools import find_packages
#check if nltk is installed
NLTK_INSTALLED = True
try:
import nltk
except ImportError:
print('NLTK is not installed some functions of this package will not work.')
pass
# find external data and C libraries
def get_stanford_data():
print('Please insert your full stanford postagger path!')
stanford_pos_dir = input()
eng_model_filename= stanford_pos_dir + 'models/english-left3words-distsim.tagger'
my_path_to_jar= stanford_pos_dir + 'stanford-postagger.jar'
print('The evaluation of your data could take some time. Wait please!')
try:
StanfordPOSTagger(model_filename=eng_model_filename, path_to_jar=my_path_to_jar)
except LookupError:
raise ('Some preprocessing techniques will not work because one of the stanford \
parser or CoreNLP jars doesn\'t exist')
# find packages to be included. exclude benchmarks.
packages = setuptools.find_packages(exclude=['docs','notebooks'])
with open('README.md') as f:
LONG_DESCRIPTION = f.read()
setup(
name='preprocess',
version='0.2.0',
description='Python library for text preprocessing and normalization.',
long_description=LONG_DESCRIPTION,
url='https://github.com/sorice/preprocess',
author='Abel Meneses-Abad',
author_email='[email protected]',
license='BSD 3-Clause License',
classifiers=[
'Development Status :: 1 - Production',
'Intended Audience :: Developers',
'Intended Audience :: Education',
'Intended Audience :: Information Technology',
'Intended Audience :: Science/Research',
'License :: OSI Approved :: BSD License',
'Operating System :: OS Independent',
'Programming Language :: Python :: 3.5',
'Topic :: Scientific/Engineering',
'Topic :: Scientific/Engineering :: Artificial Intelligence',
'Topic :: Scientific/Engineering :: Human Machine Interfaces',
'Topic :: Scientific/Engineering :: Information Analysis',
'Topic :: Scientific/Engineering :: Text Data Mining',
'Topic :: Text Processing',
'Topic :: Text Processing :: Filters',
'Topic :: Text Processing :: General',
'Topic :: Text Processing :: Indexing',
'Topic :: Text Processing :: Linguistic',
],
packages=packages,
install_requires=[
'numpy >= 1.7.0',
'nltk >= 3.1',
'nose >= 1.3.7',
'pdfminer>=20191020',
'PyPDF2>=1.26.0'
],
setup_requires=[
'numpy >= 1.7.0'
],
include_package_data=True,
data_files=[
('config',['preprocess/data/cfg/stanford.cfg']),
('data', ['preprocess/data/books/Cultura_Libre.txt',
'preprocess/data/books/Free_Culture.txt',
'preprocess/data/abbreviations.en',
]),
],
zip_safe=False
)