Merge pull request #584 from broadinstitute/ct-add-conda-recipe

add conda recipe w/ rendering script, refactor dependency files
broadinstitute · Feb 21, 2017 · 460837f · 460837f
2 parents 9b0af0d + fff652f
commit 460837f
Show file tree

Hide file tree

Showing 21 changed files with 709 additions and 37 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -1,15 +1,14 @@
 language: python
+sudo: false
 
 matrix:
+    fast_finish: true
     include:
         - os: linux
-          sudo: false
           python: 2.7
         - os: linux
-          sudo: false
           python: 3.4
         - os: linux
-          sudo: false
           python: 3.5
 #        - os: osx
 #          language: generic
@@ -30,7 +29,12 @@ env:
   - PIP_DIR="$HOME/virtualenv"
   - GATK_PATH="$CACHE_DIR/GenomeAnalysisTK-3.6"
   - PYTHONIOENCODING=UTF8
+  # $BUNDLE_SECRET for decrypting tarball of third-party tools
   - secure: KX7DwKRD85S7NgspxevgbulTtV+jHQIiM6NBus2/Ur/P0RMdpt0EQQ2wDq79qGN70bvvkw901N7EjSYd+GWCAM7StXtaxnLRrrZ3XI1gX7KMk8E3QzPf0zualLDs7cuQmL6l6WiElUAEqumLc7WGpLZZLdSPzNqFSg+CBKCmTI8=
+  # $ANACONDA_TOKEN for uploading builds to anaconda.org ("broad-viral" channel) 
+  - secure: O+yKZxHthroiSi8KqMEF9qWDNv43iDXpk4rbhaZDlUKBiR5+AeXLR7OodWpX9LvhihpWgDoS5W42K0joPRP/rUJ2Jux9GH84Jhg+uDKN+XDi2sNT1/DsI4BTF0xxO0TeQ6IokbzV7idfW6gbhNoWMky7DnXtK6ruCJkkx4tWlno=
+  # $TRAVIS_ACCESS_TOKEN_FOR_OTHER_REPO (viral-ngs-deploy)
+  - secure: ChB0K3gPr5HknxYA41xCrpgChHDmLkqc79p1NABB/tbqOEnrPzDPqE+FU4/QlmeV96jMYn4uyLVauJpzVXyBIVoOa8guqoF5VdiKlAhaUwh9UQJ75i3SKQtGBrqaTXSDVI1vJARMiGabduCrcNJxVsxV9Bm+YzTq6tuhWyqR4fs=
 
 git:
   depth: 3
@@ -49,3 +53,10 @@ script:
 
 after_success:
   - coveralls
+
+deploy:
+  provider: script
+  script: travis/deploy.sh $TRAVIS_TAG
+  on:
+    tags: true
+    #all_branches: true
diff --git a/packaging/conda-recipe/render-recipe.py b/packaging/conda-recipe/render-recipe.py
@@ -0,0 +1,289 @@
+#!/usr/bin/python
+
+# stdlib
+import os, sys, re
+import glob
+import jinja2
+import json
+import pprint
+import argparse
+import hashlib
+import time
+# since py3 split up urllib
+try:
+    from urllib.request import urlopen
+except ImportError:
+    from urllib2 import urlopen
+
+"""
+Renders Jinja2 templates using variables from dependency files
+
+The behavior is not (yet) recursive.
+"""
+
+input_directory = "viral-ngs-template"
+output_directory = "viral-ngs"
+source_url = ""
+
+dir_path = os.path.dirname(os.path.realpath(__file__))
+
+class VersionString(object):
+    """
+        Class to validate and parse PEP440 version strings (also used by conda)
+        Shortened and derived from: https://github.com/pypa/packaging/blob/16.7/packaging/version.py
+    """
+
+    VERSION_PATTERN = r"""
+    (?P<prefix>v?)
+    (?:
+        (?:(?P<epoch>[0-9]+)!)?                           # epoch
+        (?P<release>[0-9]+(?:\.[0-9]+)*)                  # release segment
+        (?P<pre>                                          # pre-release
+            [-_\.]?
+            (?P<pre_l>(a|b|c|rc|alpha|beta|pre|preview))
+            [-_\.]?
+            (?P<pre_n>[0-9]+)?
+        )?
+        (?P<post>                                         # post release
+            (?:-(?P<post_n1>[0-9]+))
+            |
+            (?:
+                [-_\.]?
+                (?P<post_l>post|rev|r)
+                [-_\.]?
+                (?P<post_n2>[0-9]+)?
+            )
+        )?
+        (?P<dev>                                          # dev release
+            [-_\.]?
+            (?P<dev_l>dev)
+            [-_\.]?
+            (?P<dev_n>[0-9]+)?
+        )?
+    )
+    (?:\+(?P<local>[a-z0-9]+(?:[-_\.][a-z0-9]+)*))?       # local version
+    """
+    version_re = re.compile(
+        r"^\s*" + VERSION_PATTERN + r"\s*$",
+        re.VERBOSE | re.IGNORECASE,)
+
+    def __init__(self, v):
+        self.v = v
+
+    def __str__(self):
+        parts = []
+
+        try:
+            # 'v' prefix
+            if self.version_re.match(self.v).group("prefix") is not None:
+                parts.append("{0}".format(self.version_re.match(self.v).group("prefix")))
+
+            # Epoch
+            if ( int(self.version_re.match(self.v).group("epoch")) if self.version_re.match(self.v).group("epoch") else 0) != 0:
+                parts.append("{0}!".format(self.version_re.match(self.v).group("epoch")))
+
+            # Release segment
+            parts.append(".".join(str(x) for x in self.version_re.match(self.v).group("release").split(".")))
+
+            # Pre-release
+            if self.version_re.match(self.v).group("pre") is not None:
+                parts.append("".join(str(x) for x in self.version_re.match(self.v).group("pre")))
+
+            # Post-release
+            if self.version_re.match(self.v).group("post") is not None:
+                parts.append(".post{0}".format(self.version_re.match(self.v).group("post")))
+
+            # Development release
+            if self.version_re.match(self.v).group("dev") is not None:
+                parts.append(".dev{0}".format(self.version_re.match(self.v).group("dev")))
+
+            # Local version segment
+            if self.version_re.match(self.v).group("local") is not None:
+                parts.append(
+                    "+{0}".format(".".join(str(x) for x in self.version_re.match(self.v).group("local")))
+                )
+        except:
+            raise argparse.ArgumentTypeError("String '%s' does not match required PEP440 format"%(self.v,))
+
+        return "".join(parts)
+
+
+def reformat_package_line(line):
+    """
+    This function is meant to take a package spec in conda or pip format
+    and return one in conda recipe format: https://conda.io/docs/spec.html
+    """
+    # regex to match comment-only line
+    comment_re = re.compile(r"^(?:\s*\#.*)$")
+
+    # regex to match package spec line, with support for comments and selectors.
+    # This will also capture hash-indicated selectors and comments (ex. "# [osx]")
+    # which may, or may not, be useful in their original context.
+    package_re = re.compile(r"^(?P<package>[a-zA-Z0-9\-\_]+)(?:\s*)(?:(?P<comparator>[\>\<=]?=?)(?:\s*)(?P<version>[^\s\#=]+)(?:=(?P<build>[0-9]*))?(?:\s*))?(?P<selector>\s*\#\s*\[.*\])?(?P<comment>\s*\#.*)?$")
+
+    # when we need to specify a different comparator for the recipe
+    comparator_replacements = {
+        "=": "==",
+    }
+
+    # the line shold not have a newline
+    line = line.replace("\n","").replace("\r","")
+
+    # if the line is a comment, simpy return it
+    if len(line)==0 or comment_re.match(line):
+        return line
+    # otherwise, build a package spec string suitable for a conda recipe
+    else: 
+        m = package_re.match(line)
+        recipe_package_string = "- {package} {comparator}{version}{build}{selector}{comment}".format(
+            package    = m.group("package").lower(), # conda packages must have lowercase names
+            comparator = "" if not m.group("comparator") else comparator_replacements.get(m.group("comparator"), m.group("comparator")),
+            version    = "" if not m.group("version") else m.group("version"),
+            build      = "" if not m.group("build") else " "+m.group("build")+"*", # Todo: verify build separator character for recip format
+            selector   = "" if not m.group("selector") else " "+m.group("selector"),
+            comment    = "" if not m.group("comment") else " "+m.group("comment")
+        )
+        return recipe_package_string
+
+def url_md5(url):
+    hash_md5 = hashlib.md5()
+    CHUNK_SIZE = 16 * 1024
+
+    # try four times to download the file. If one fails, wait two seconds and try again.
+    try_count = 1
+    while True:
+        try:
+            print("Downloading source package for hash calculation...")
+            response = urlopen(url)
+            for chunk in iter(lambda: response.read(CHUNK_SIZE), b""):
+                hash_md5.update(chunk)
+            break
+        except:
+            print("Download {} failed, sleeping then retrying...".format(try_count))
+            try_count +=1
+            if try_count >3:
+                raise
+            time.sleep(2)
+            continue
+
+    return hash_md5.hexdigest()
+
+if __name__ == "__main__":
+
+    parser = argparse.ArgumentParser(description='Renger the conda recipe.')
+    parser.add_argument('version',
+                        type=VersionString,
+                        help='the version number of the package')
+    parser.add_argument('--build-reqs', nargs='*', dest='build_requirements',
+                        type=argparse.FileType('r'),
+                        help='build-time requirements file')
+    parser.add_argument('--run-reqs', nargs='*', dest='run_requirements',
+                        type=argparse.FileType('r'),
+                        help='run-time requirements file')
+    parser.add_argument('--py2-run-reqs', nargs='*', dest='py2_run_requirements',
+                        type=argparse.FileType('r'),
+                        help='python2-only run-time requirements file')
+    parser.add_argument('--py3-run-reqs', nargs='*', dest='py3_run_requirements',
+                        type=argparse.FileType('r'),
+                        help='python3-only run-time requirements file')
+    parser.add_argument('--linux-run-reqs', nargs='*', dest='linux_run_requirements',
+                        type=argparse.FileType('r'),
+                        help='linux-only run-time requirements file')
+    parser.add_argument('--osx-run-reqs', nargs='*', dest='osx_run_requirements',
+                        type=argparse.FileType('r'),
+                        help='osx-only run-time requirements file')
+    parser.add_argument('--test-reqs', nargs='*', dest='test_requirements',
+                        type=argparse.FileType('r'),
+                        help='test-time requirements file')
+
+    try:
+       args = parser.parse_args()
+       if not any(vars(args).values()):
+            parser.print_help()
+            sys.exit(0)
+    except:
+        sys.exit(0)
+
+    args_dict = vars(args)
+
+    recipe_variables = {}
+
+    # store two separate version strings, one to use for the conda package and one
+    # that should match github tagged releases
+    recipe_variables["PKG_VERSION"] = str(args_dict.pop("version"))
+
+    # strip "v" prefix from versions that look like v1.14.0
+    if recipe_variables["PKG_VERSION"].startswith("v"):
+        recipe_variables["PKG_VERSION_CONDA"] = recipe_variables["PKG_VERSION"][1:]
+    else:
+        recipe_variables["PKG_VERSION_CONDA"] = recipe_variables["PKG_VERSION"]
+
+    # after we pop the positional argument(s), the optional ones remaining are all files
+    for var_name, req_files in args_dict.items():
+        if req_files:
+            for reqs_file in req_files:
+                if reqs_file:
+                    recipe_variables[var_name] = []
+                    for line in reqs_file:
+                        conda_style_package_line = reformat_package_line(line)
+                        if len(conda_style_package_line):
+                            recipe_variables[var_name].append(conda_style_package_line)
+    pprint.pprint(recipe_variables)
+
+    j_env = jinja2.Environment(loader=jinja2.FileSystemLoader(os.path.join(dir_path, input_directory)))
+
+    if not os.path.exists(output_directory):
+        os.makedirs(output_directory)
+
+    template_files = os.listdir(os.path.join(dir_path,input_directory))
+
+    for template_file in template_files:
+        print("Rendering "+ template_file)
+        # jinja expects the filename to be just that, not a path
+        # it should be relative to the FileSystemLoader() path set above
+        template = j_env.get_template(template_file)
+        output_from_parsed_template = template.render(recipe_variables)
+
+        # save the rendered output
+        with open(os.path.join(dir_path, output_directory, template_file), "wb") as f:
+            f.write(output_from_parsed_template)
+
+        # populate md5 hashes for any source urls present
+        if(template_file.endswith(".yaml")):
+            # calculate and add md5 hashes to recipe
+            with open(os.path.join(dir_path, output_directory, template_file), "rb") as inf:
+                with open(os.path.join(dir_path, output_directory, template_file+".checksumed"), "wb") as outf:
+                    for line in inf:
+                        # if this is an md5 line, don't write it out
+                        if line.strip().startswith("md5"):
+                            continue
+                        # if this is not an md5 line, write it verbatim
+                        else:
+                            outf.writelines([line])
+
+                            # if this is a url line
+                            if line.strip().startswith("url"):
+                                # parse out the url
+                                url_re = re.compile(r"^(?:(?P<leadingspace>\s*)url:\s*)(?P<url>[\S]*)(?P<extra>.*)$")
+                                matches = url_re.match(line)
+                                if matches:
+                                    if matches.group("url"):
+                                        # download file and calculate md5
+                                        src_hash = url_md5(matches.group("url"))
+                                        hash_line = "{leadingspace}md5: {src_hash}{extra}".format(
+                                            leadingspace="" if not matches.group("leadingspace") else matches.group("leadingspace"),
+                                            src_hash=src_hash,
+                                            extra="" if not matches.group("extra") else matches.group("extra")
+                                        )
+                                        outf.writelines([hash_line+"\n"])
+
+                                    else:
+                                        raise Exception("The yaml file url line does not appear to contain a url")
+
+
+            # move the file with checksums
+            os.rename(os.path.join(dir_path, output_directory, template_file+".checksumed"), os.path.join(dir_path, output_directory, template_file))
+
+
+
+
diff --git a/packaging/conda-recipe/viral-ngs-template/build.sh b/packaging/conda-recipe/viral-ngs-template/build.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+BINARY_HOME=$PREFIX/bin
+PACKAGE_HOME=$PREFIX/opt/$PKG_NAME-$PKG_VERSION
+
+cd $SRC_DIR
+
+# remove files duplicated by conda packages
+rm tools/binaries/V-Phaser-2.0/MacOSX/libgomp.1.dylib
+#chmod +x tools/scripts/*
+
+find tools/scripts/ -name "*.py" -exec chmod +x {} \;
+find tools/scripts/ -name "*.sh" -exec chmod +x {} \;
+
+# copy source to bin
+mkdir -p $PREFIX/bin
+mkdir -p $PACKAGE_HOME
+cp -R $SRC_DIR/* $PACKAGE_HOME/
+cd $PACKAGE_HOME && chmod a+x *.py
+
+cd $PACKAGE_HOME
+find *.py -type f -exec ln -s $PACKAGE_HOME/{} $BINARY_HOME/{} \;