Skip to content

Commit

Permalink
checks if files are already downloaded or extracted, and changes 'tmp…
Browse files Browse the repository at this point in the history
…dir' to 'destination'
  • Loading branch information
wrought committed Sep 14, 2014
1 parent d8e90bd commit 5fcedde
Showing 1 changed file with 23 additions and 16 deletions.
39 changes: 23 additions & 16 deletions jats-to-mediawiki.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ def main():
'Command-line interface to jats-to-mediawiki.xslt, a script to ' +
'manage conversion of articles (documents) from JATS xml format ' +
'to MediaWiki markup, based on DOI or PMCID')
parser.add_argument('-t', '--tmpdir', default='tmp/',
help='path to temporary directory for purposes of this script')
parser.add_argument('-d', '--destination', default='articles/',
help='path to destination directory for purposes of this script')
parser.add_argument('-x', '--xmlcatalogfiles',
default='dtd/catalog-test-jats-v1.xml',
help='path to xml catalog files for xsltproc')
Expand All @@ -61,7 +61,7 @@ def main():


# Handle and convert input values
tmpdir = args.tmpdir
destination = args.destination
xmlcatalogfiles = args.xmlcatalogfiles
infile = args.infile
outfile = args.outfile
Expand Down Expand Up @@ -90,16 +90,16 @@ def main():
except:
print 'Unable to set XML_CATALOG_FILES environment variable'
sys.exit(-1)
# print "\n" + os.environ.get('XML_CATALOG_FILES') + "\n" #debug

# create temporary directory for zips
tmpdir = cwd + "/" + to_unicode_or_bust(tmpdir)
# create destination directory
destination = cwd + "/" + to_unicode_or_bust(destination)
try:
if not os.path.exists(tmpdir):
os.makedirs(tmpdir)
if not os.path.exists(destination):
os.makedirs(destination)
except:
print 'Unable to find or create temporary directory'
sys.exit(-1)
# print "\n" + os.environ.get('XML_CATALOG_FILES') + "\n" #debug

# separate DOIs and PMCIDs
articledois = [i for i in articleids if re.match('^10*', i)]
Expand Down Expand Up @@ -151,21 +151,28 @@ def main():
format='tgz')['href']

# download the file
print "\nDownloading file..."
archivefilename = wget.filename_from_url(archivefileurl)
urllib.urlretrieve(archivefileurl, archivefilename)

# @TODO For some reason, wget hangs and doesn't finish, using
# urllib.urlretrieve() instead for this for now.
# archivefile = wget.download(archivefileurl, wget.bar_thermometer)
if not os.path.exists(archivefilename):
urllib.urlretrieve(archivefileurl, archivefilename)
print "\nDownloading file..."
else:
print "\nFound local file, skipping download..."

# @TODO For some reason, wget hangs and doesn't finish, using
# urllib.urlretrieve() instead for this for now.
# archivefile = wget.download(archivefileurl, wget.bar_thermometer)

# open the archive
archivedirectoryname, archivefileextension = archivefilename.split(
'.tar.gz')
print archivedirectoryname
tfile = tarfile.open(archivefilename, 'r:gz')
tfile.extractall('.')

if not os.path.exists(archivedirectoryname):
print "\nExtracting " + archivedirectoryname + " ..."
tfile = tarfile.open(archivefilename, 'r:gz')
tfile.extractall('.')
else:
print "\nFound local directory, skipping extraction..."

# run xsltproc
# @TODO use list comprehension instead
Expand Down

0 comments on commit 5fcedde

Please sign in to comment.