diff --git a/htrc/auth.py b/htrc/auth.py index c366717..7d4a9df 100644 --- a/htrc/auth.py +++ b/htrc/auth.py @@ -1,8 +1,13 @@ -import time +#from base64 import b64encode from getpass import getpass +#import http.client +#import ssl +#import time +import subprocess import requests import requests.auth +#import configparser import htrc.config @@ -10,23 +15,29 @@ def get_jwt_token(): # Currently we just store one common jwt token locally at .htrc file for simplicity # Expect to add POST method to query unique jwt token with the combo of username and password - username, password = credential_prompt() + #username, password = credential_prompt() - client_id, client_secret = htrc.config.get_credentials() + #client_id, client_secret = htrc.config.get_credentials() - auth = requests.auth.HTTPBasicAuth(client_id, client_secret) - data = {"grant_type": "password", - "username": username, - "password": password, - "scope": "openid"} + #auth = requests.auth.HTTPBasicAuth(client_id, client_secret) + #data = { "grant_type": "password", + #"username": username, + #"password": password, + #"scope" : "openid"} - url = htrc.config.get_idp_url() - r = requests.post(url, data=data, auth=auth) + url1 = htrc.config.get_idp_url() + capsule_id = htrc.config._get_value("jwt", "capsule_id") + result = subprocess.check_output("hostname -s -I | awk '{print $1}'", shell=True) + result = result.decode('utf-8') + result = result[:-1] + capsule_ip = result.strip() + url = url1 + "/" + capsule_id + "/" + capsule_ip + r = requests.get(url) data = r.json() if 'error' not in data: - expiration = int(time.time()) + data['expires_in'] - return data['id_token'], expiration + #expiration = int(time.time()) + data['expires_in'] + return data['token'] elif data['error'] == 'invalid_grant': print("Invalid username or password. Please try again.\n") return get_jwt_token() @@ -50,5 +61,5 @@ def credential_prompt(): if __name__ == '__main__': - token, expiration = get_jwt_token() - htrc.config.save_jwt_token(token, expiration) + token = get_jwt_token() + htrc.config.save_jwt_token(token) diff --git a/htrc/config.py b/htrc/config.py index e937858..e48c3cf 100644 --- a/htrc/config.py +++ b/htrc/config.py @@ -33,7 +33,7 @@ def __init__(self, key: Optional[str] = None) -> None: super().__init__() - self.token = token or get_jwt_token(save_new_token=False) + self.token = token or get_jwt_token() self.host = host or get_dataapi_host() self.port = port or get_dataapi_port() self.epr = epr or get_dataapi_epr() @@ -103,27 +103,15 @@ def get_idp_url(path=None): # Add jwt credential access methods -def get_jwt_token(path=None, save_new_token=True): - try: - token = _get_value('jwt', 'token', path) - - # check expiration date - expiration = int(_get_value('jwt', 'expiration', path)) - if time.time() > expiration: - import htrc - htrc.config.remove_jwt_token() - raise RuntimeError("JWT token expired.") - except: - # This should run on either a missing or expired token. - import htrc.auth - token, expiration = htrc.auth.get_jwt_token() - if save_new_token: - htrc.config.save_jwt_token(token, expiration, path) +def get_jwt_token(path=None): + + import htrc.auth + token = htrc.auth.get_jwt_token() return token +def save_jwt_token(token, path=None): -def save_jwt_token(token, expiration=None, path=None): """ Saves JWT token in the config file. """ @@ -132,8 +120,8 @@ def save_jwt_token(token, expiration=None, path=None): path = DEFAULT_PATH # Default to expiration of now - force a new token on next request - if expiration is None: - expiration = time.time() + #if expiration is None: + #expiration = time.time() # Open and modify existing config file, if it exists. config = ConfigParser(allow_no_value=True) @@ -144,7 +132,7 @@ def save_jwt_token(token, expiration=None, path=None): # set token and expiration config.set('jwt', 'token', token) - config.set('jwt', 'expiration', expiration) + #config.set('jwt', 'expiration', expiration) with open(path, 'w') as credential_file: config.write(credential_file) @@ -168,7 +156,7 @@ def remove_jwt_token(path=None): config.add_section('jwt') # set token and expiration config.set('jwt', 'token', " ") - config.set('jwt', 'expiration', " ") + #config.set('jwt', 'expiration', " ") with open(path, 'w') as credential_file: config.write(credential_file) diff --git a/htrc/volumes/__init__.py b/htrc/volumes/__init__.py index c4c11b9..6fa2145 100644 --- a/htrc/volumes/__init__.py +++ b/htrc/volumes/__init__.py @@ -13,6 +13,8 @@ standard_library.install_aliases() + +#from builtins import input from htrc.models import HtrcPage import http.client @@ -20,14 +22,23 @@ import json import os.path import progressbar + +#import re import socket import ssl +#import sys +#from time import sleep +#from urllib.request import urlopen +#from urllib.error import HTTPError +from urllib.parse import urlencode +#import xml.etree.ElementTree as ET from urllib.parse import urlencode from zipfile import ZipFile # used to decompress requested zip archives. from tqdm import tqdm from htrc.runningheaders import parse_page_structure from functools import partial import pandas as pd +#from htrc.lib.cli import bool_prompt from htrc.util import split_items import htrc.config import multiprocessing @@ -62,7 +73,8 @@ def get_volumes(data_api_config: htrc.config.HtrcDataApiConfig, volume_ids, conc data = {'volumeIDs': '|'.join( [id.replace('+', ':').replace('=', '/') for id in volume_ids])} - + + if concat: data['concat'] = 'true' @@ -77,7 +89,7 @@ def get_volumes(data_api_config: htrc.config.HtrcDataApiConfig, volume_ids, conc # TODO: Fix SSL cert verification ctx = ssl.create_default_context() ctx.check_hostname = False - ctx.verify_mode = ssl.CERT_NONE + #ctx.verify_mode = ssl.CERT_NONE # Retrieve the volumes httpsConnection = http.client.HTTPSConnection( @@ -157,7 +169,7 @@ def get_pages(data_api_config: htrc.config.HtrcDataApiConfig, page_ids, concat=F # TODO: Fix SSL cert verification ctx = ssl.create_default_context() ctx.check_hostname = False - ctx.verify_mode = ssl.CERT_NONE + #ctx.verify_mode = ssl.CERT_NONE # Retrieve the volumes httpsConnection = http.client.HTTPSConnection( @@ -200,49 +212,52 @@ def get_pages(data_api_config: htrc.config.HtrcDataApiConfig, page_ids, concat=F return data -def get_oauth2_token(username, password): +#def get_oauth2_token(username, password): # make sure to set the request content-type as application/x-www-form-urlencoded - headers = {"Content-type": "application/x-www-form-urlencoded"} - data = {"grant_type": "client_credentials", - "client_secret": password, - "client_id": username} - data = urlencode(data) + #headers = {"Content-type": "application/x-www-form-urlencoded"} + #data = { "grant_type": "client_credentials", + #"client_secret": password, + #"client_id": username } + #data = urlencode(data) # create an SSL context - ctx = ssl.create_default_context() - ctx.check_hostname = False - ctx.verify_mode = ssl.CERT_NONE + #ctx = ssl.create_default_context() + #ctx.check_hostname = False + #ctx.verify_mode = ssl.CERT_NONE # make sure the request method is POST - host, port = htrc.config.get_oauth2_host_port() - oauth2port = htrc.config.get_oauth2_port() - oauth2EPRurl = htrc.config.get_oauth2_url() - httpsConnection = http.client.HTTPSConnection(host, oauth2port, context=ctx) - httpsConnection.request("POST", oauth2EPRurl + "?" + data, "", headers) + #host, port = htrc.config.get_oauth2_host_port() + #oauth2port = htrc.config.get_oauth2_port() + #oauth2EPRurl = htrc.config.get_oauth2_url() + #httpsConnection = http.client.HTTPSConnection(host, oauth2port, context=ctx) + #httpsConnection.request("POST", oauth2EPRurl + "?" + data, "", headers) - response = httpsConnection.getresponse() + #response = httpsConnection.getresponse() # if response status is OK - if response.status == 200: - data = response.read().decode('utf8') + #if response.status == 200: + #data = response.read().decode('utf8') - jsonData = json.loads(data) - logging.info("*** JSON: {}".format(jsonData)) + #jsonData = json.loads(data) + #logging.info("*** JSON: {}".format(jsonData)) - token = jsonData["access_token"] - logging.info("*** parsed token: {}".format(token)) + #token = jsonData["access_token"] + #logging.info("*** parsed token: {}".format(token)) - else: - logging.debug("Unable to get token") - logging.debug("Response Code: {}".format(response.status)) - logging.debug("Response: {}".format(response.reason)) - logging.debug(response.read()) - raise EnvironmentError("Unable to get the token.") - if httpsConnection is not None: - httpsConnection.close() + #else: + #logging.debug("Unable to get token") + #logging.debug("Response Code: {}".format(response.status)) + #logging.debug("Response: {}".format(response.reason)) + #logging.debug(response.read()) + #raise EnvironmentError("Unable to get token.") + + #if httpsConnection is not None: + #httpsConnection.close() + + + #return token - return token def grep_error(file_name, output_dir, pattern, txt_index): @@ -260,6 +275,9 @@ def grep_error(file_name, output_dir, pattern, txt_index): return na_volume + return na_volume + + def _to_htrc_page(page_file, zip): with TextIOWrapper(BytesIO(zip.read(page_file)), encoding='utf-8') as page: return HtrcPage([line.rstrip() for line in page.readlines()]) @@ -416,7 +434,9 @@ def _remove_headers_footers_and_save(vol_data, concat, hf_min_similarity, hf_win for vol_page_path, page_body in zip(sorted_vol_zip_page_paths, pages_body): with open(os.path.join(output_dir, vol_page_path), 'w', encoding='utf-8') as page_file: page_file.write(page_body) - + + + removed_hf = [] for vol_page_path, vol_page in zip(sorted_vol_zip_page_paths, vol_pages): if not (vol_page.has_header or vol_page.has_footer): diff --git a/setup.py b/setup.py index e2d26f3..215fae3 100644 --- a/setup.py +++ b/setup.py @@ -9,7 +9,9 @@ import atexit import tarfile -__version__ = '0.1.57b0' + +__version__ = '0.1.57' + install_requires = ['PyLD', 'future', 'prov', 'unicodecsv', 'progressbar2', 'pandas', 'requests', 'argparse==1.1', 'topicexplorer==1.0b226', 'numpy==1.16.2', 'tqdm==4.46.0']