Skip to content

Commit

Permalink
Merge pull request htrc#39 from htrc/host-as-arg
Browse files Browse the repository at this point in the history
Added arguments to give Data API host and port in the htrc download c…
  • Loading branch information
samithaliyanage authored May 1, 2018
2 parents be2cb43 + 4c37513 commit 80f6c91
Show file tree
Hide file tree
Showing 6 changed files with 27 additions and 16 deletions.
2 changes: 2 additions & 0 deletions htrc/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ def download_parser(parser=None):
parser.add_argument("-c", "--concat", action='store_true',
help="concatenate a volume's pages in to a single file")
parser.add_argument("-t", "--token", help="JWT for volumes download.")
parser.add_argument("-dh", "--datahost", help="Data API host.")
parser.add_argument("-dp", "--dataport", help="Data API port.")
return parser

def add_workset_path(parser=None):
Expand Down
8 changes: 5 additions & 3 deletions htrc/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,13 @@ def _get_value(section, key, path=None):
raise EnvironmentError("Config not set for {} {} in {}".format(
section, key, path))

def get_dataapi_host_port(path=None):
host = _get_value('data', 'host', path)
def get_dataapi_port(path=None):
port = int(_get_value('data', 'port', path))
return (host, port)
return (port)

def get_dataapi_host(path=None):
host = _get_value('data', 'host', path)
return (host)

def get_dataapi_epr(path=None):
return _get_value('data', 'url', path)
Expand Down
2 changes: 1 addition & 1 deletion htrc/tools/topicexplorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def main(path, topics, iterations, output_dir='/media/secure_volume/workset'):
subprocess.check_call([
'topicexplorer', 'prep', path,
'-q', '--min-word-len', '3', '--lang', 'en',
'--high-percent', '30', '--low-percent', '10'
'--high', '30', '--low', '10'
])
subprocess.check_call([
'topicexplorer', 'train', path,
Expand Down
23 changes: 15 additions & 8 deletions htrc/volumes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
from logging import NullHandler
logging.getLogger(__name__).addHandler(NullHandler())

def get_volumes(token, volume_ids, concat=False):
def get_volumes(token, volume_ids, host, port, concat=False):
"""
Returns volumes from the Data API as a raw zip stream.
Expand All @@ -47,6 +47,8 @@ def get_volumes(token, volume_ids, concat=False):
:volume_ids: A list of volume_ids
:concat: If True, return a single file per volume. If False, return a single
file per page (default).
:host: Data API host
:port: Data API port
"""
if not volume_ids:
raise ValueError("volume_ids is empty.")
Expand All @@ -73,7 +75,6 @@ def get_volumes(token, volume_ids, concat=False):
ctx.verify_mode = ssl.CERT_NONE

# Retrieve the volumes
host, port = htrc.config.get_dataapi_host_port()
httpsConnection = http.client.HTTPSConnection(host, port, context=ctx)
httpsConnection.request("POST", url, urlencode(data), headers)

Expand Down Expand Up @@ -134,7 +135,8 @@ def get_pages(token, page_ids, concat=False):
ctx.verify_mode = ssl.CERT_NONE

# Create connection
host, port = htrc.config.get_dataapi_host_port()
host = htrc.config.get_dataapi_host()
port = htrc.config.get_dataapi_port()
httpsConnection = http.client.HTTPSConnection(host, port, context=ctx)

headers = {"Authorization": "Bearer " + token}
Expand Down Expand Up @@ -218,21 +220,26 @@ def check_error_file(output_dir):


def download_volumes(volume_ids, output_dir, username=None, password=None,
config_path=None, token=None, concat=False):
config_path=None, token=None, concat=False, host=None, port=None):
# create output_dir folder, if nonexistant
if not os.path.isdir(output_dir):
os.makedirs(output_dir)

# get token if not specified
if not token:
import htrc.config
token = htrc.config.get_jwt_token()

if token is not None:
if not host:
host= htrc.config.get_dataapi_host()

if not port:
port = htrc.config.get_dataapi_port()

if any((token, host, port)) is not None:
logging.info("obtained token: %s\n" % token)

try:
data = get_volumes(token, volume_ids, concat)
data = get_volumes(token, volume_ids, host, port, concat)

myzip = ZipFile(BytesIO(data))
myzip.extractall(output_dir)
Expand All @@ -254,5 +261,5 @@ def download(args):

return download_volumes(volumeIDs, args.output,
username=args.username, password=args.password,
token=args.token, concat=args.concat)
token=args.token, concat=args.concat, host=args.datahost, port=args.dataport)

2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import tarfile
import wget

__version__ = '0.1.43'
__version__ = '0.1.44'

install_requires = ['PyLD', 'future', 'prov', 'unicodecsv', 'progressbar2',
'requests', 'wget', 'argparse==1.1', 'topicexplorer>=1.0b194']
Expand Down
6 changes: 3 additions & 3 deletions tests/test_htrc_volumes.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def test_get_volumes_and_pages(self, https_mock):
''.encode('utf8')
https_mock.return_value.getresponse.return_value = response_mock

htrc.volumes.get_volumes('1234', self.test_vols)
htrc.volumes.get_volumes('1234', self.test_vols, 'data-host', '443')
htrc.volumes.get_pages('1234', self.test_vols)

@patch('htrc.volumes.http.client.HTTPSConnection')
Expand All @@ -70,14 +70,14 @@ def test_get_volumes_and_pages_error(self, https_mock):
https_mock.return_value.getresponse.return_value = response_mock

with self.assertRaises(EnvironmentError):
htrc.volumes.get_volumes('1234', self.test_vols)
htrc.volumes.get_volumes('1234', self.test_vols, 'data-host', '443')

with self.assertRaises(EnvironmentError):
htrc.volumes.get_pages('1234', self.test_vols)

def test_get_volumes_and_pages_empty(self):
with self.assertRaises(ValueError):
htrc.volumes.get_volumes('1234', [])
htrc.volumes.get_volumes('1234', [], 'data-host', '443')

with self.assertRaises(ValueError):
htrc.volumes.get_pages('1234', [])
Expand Down

0 comments on commit 80f6c91

Please sign in to comment.