blurstudio · MHendricks · Dec 6, 2024 · Dec 7, 2024 · Dec 13, 2024 · Dec 13, 2024
diff --git a/.github/workflows/python-static-analysis-and-test.yml b/.github/workflows/python-static-analysis-and-test.yml
@@ -52,15 +52,6 @@ jobs:
         json_ver: ['json', 'json5']
         os: ['ubuntu-latest', 'windows-latest']
         python: ['3.7', '3.8', '3.9', '3.10', '3.11']
-        # Works around the depreciation of python 3.6 for ubuntu
-        # https://github.com/actions/setup-python/issues/544
-        include:
-          - json_ver: 'json'
-            os: 'ubuntu-20.04'
-            python: '3.6'
-          - json_ver: 'json5'
-            os: 'ubuntu-20.04'
-            python: '3.6'
 
     runs-on: ${{ matrix.os }}
 

diff --git a/README.md b/README.md
@@ -139,7 +139,7 @@ home directory on other platforms.
 
 ## Installing
 
-Hab is installed using pip. It requires python 3.6 or above. It's recommended
+Hab is installed using pip. It requires python 3.7 or above. It's recommended
 that you add the path to your python's bin or Scripts folder to the `PATH`
 environment variable so you can simply run the `hab` command.
 
@@ -1367,7 +1367,7 @@ most part you can control the output using the `hab -v ...` verbosity option.
 However if you need more fine grained control you can create a `.hab_logging_prefs.json`
 file next to your user [user prefs](#user-prefs) file. The cli also supports passing
 the path to a configuration file using `hab --logging-config [path/to/file.json]`
-that is used instead of the default file if pased.
+that is used instead of the default file if passed.
 
 # Caveats
 
@@ -1451,6 +1451,9 @@ hab in batch mode.
 Approximate time generated using `time cmd.exe /c  "hab -h"` in git bash after
 omitting the `%py_exe% -m ...` call.
 
+You can also set the `%TMP%` environment variable to a unique folder, but this
+is more of a fix for scripted or unittest workflows.
+
 # Glosary
 
 * **activate:** Update the current process(shell) for a given configuration. Name taken

diff --git a/hab/__init__.py b/hab/__init__.py
@@ -1,10 +1,10 @@
-__all__ = ["__version__", "NotSet", "Resolver", "Site"]
+__all__ = ["__version__", "DistroMode", "NotSet", "Resolver", "Site"]
 
 from .utils import NotSet
 
 # Note: Future imports depend on NotSet so it must be imported first
 # isort: split
 
-from .resolver import Resolver
+from .resolver import DistroMode, Resolver
 from .site import Site
 from .version import version as __version__
diff --git a/hab/cache.py b/hab/cache.py
@@ -111,6 +111,7 @@ def generate_cache(self, resolver, site_file, version=1):
         the provided site file. Use this method any time changes are made that
         hab needs to be aware of. Caching is enabled by the existence of this file.
         """
+        from .distro_finders.distro_finder import DistroFinder
         from .site import Site
 
         # Indicate the version specification this habcache file conforms to.
@@ -125,6 +126,9 @@ def generate_cache(self, resolver, site_file, version=1):
             glob_str, cls = stats
             # Process each glob dir defined for this site
             for dirname in temp_site.get(key, []):
+                # Caching is only supported for direct file paths
+                if isinstance(dirname, DistroFinder):
+                    dirname = dirname.root
                 cfg_paths = output.setdefault(key, {}).setdefault(
                     platform_path_key(dirname).as_posix(), {}
                 )
@@ -152,9 +156,23 @@ def iter_cache_paths(cls, name, paths, cache, glob_str=None, include_path=True):
         """Yields path information stored in the cache falling back to glob if
         not cached.
 
+        Args:
+            name (str): The name of the cache being iterated. Often "config_paths"
+                or "distro_paths".
+            paths (list): A list of `pathlib.Path` paths to process. If this includes
+                glob paths they will be processed.
+            cache (dict): The cached data used if possible for each path. If a
+                path isn't in the cache, then will glob the path.
+            glob_str (str, optional): Added to each path if passed and a glob
+                is required. Ignored if the path is cached.
+            include_path (bool, optional): Controls how many items are yielded.
+                If True then each cached or globed path is yielded. Otherwise only
+                each path(dirname) is yielded and path is always None.
+
         Yields:
-            dirname: Each path stored in paths.
-            path
+            dirname: Each path passed by paths.
+            path: The path to a given resource for this dirname.
+            cached: If the path was stored in a cache or required using glob.
         """
         for dirname in paths:
             dn_posix = dirname.as_posix()
@@ -166,7 +184,7 @@ def iter_cache_paths(cls, name, paths, cache, glob_str=None, include_path=True):
                 logger.debug(f"Using glob for {name} dir: {dirname}")
                 # Fallback to globing the file system
                 if glob_str:
-                    paths = sorted(glob.glob(str(dirname / glob_str)))
+                    paths = utils.glob_path(dirname / glob_str)
                 else:
                     paths = []
             if not include_path:

diff --git a/hab/cli.py b/hab/cli.py
@@ -9,7 +9,7 @@
 from click.shell_completion import CompletionItem
 from colorama import Fore
 
-from . import Resolver, Site, __version__, utils
+from . import DistroMode, Resolver, Site, __version__, utils
 from .parsers.unfrozen_config import UnfrozenConfig
 
 logger = logging.getLogger(__name__)
@@ -603,7 +603,18 @@ def env(settings, uri, launch):
     "--type",
     "report_type",
     type=click.Choice(
-        ["nice", "site", "s", "uris", "u", "versions", "v", "forest", "f", "all-uris"]
+        # Note: Put short names on same line as full name
+        # fmt: off
+        [
+            "nice",
+            "site", "s",
+            "uris", "u",
+            "versions", "v",
+            "downloads",
+            "forest", "f",
+            "all-uris",
+        ]
+        # fmt: on
     ),
     default="nice",
     help="Type of report.",
@@ -644,7 +655,7 @@ def dump(settings, uri, env, env_config, report_type, flat, verbosity, format_ty
 
     resolver = settings.resolver
 
-    if report_type in ("uris", "versions", "forest"):
+    if report_type in ("uris", "versions", "downloads", "forest"):
         from .parsers.format_parser import FormatParser
 
         formatter = FormatParser(verbosity, color=True)
@@ -659,16 +670,22 @@ def dump(settings, uri, env, env_config, report_type, flat, verbosity, format_ty
                     resolver.configs, fmt=formatter.format
                 ):
                     click.echo(line)
-        if report_type in ("versions", "forest"):
+        if report_type in ("versions", "downloads", "forest"):
             click.echo(f'{Fore.YELLOW}{" Versions ".center(50, "-")}{Fore.RESET}')
 
-            for line in resolver.dump_forest(
-                resolver.distros,
-                attr="name",
-                fmt=formatter.format,
-                truncate=truncate,
-            ):
-                click.echo(line)
+            mode = (
+                DistroMode.Downloaded
+                if report_type == "downloads"
+                else DistroMode.Installed
+            )
+            with resolver.distro_mode_override(mode):
+                for line in resolver.dump_forest(
+                    resolver.distros,
+                    attr="name",
+                    fmt=formatter.format,
+                    truncate=truncate,
+                ):
+                    click.echo(line)
     elif report_type == "all-uris":
         # Combines all non-placeholder URI's into a single json document and display.
         # This can be used to compare changes to configs when editing them in bulk.
@@ -786,6 +803,59 @@ def cache(settings, path):
     click.echo(f"Cache took: {e - s}, cache file: {out}")
 
 
+@_cli.command()
+@click.option(
+    "-u",
+    "--uri",
+    "uris",
+    multiple=True,
+    help="A URI that is resolved and all required distros are installed. Can "
+    "be used multiple times and each URI's distros are resolved independently.",
+)
+@click.option(
+    "-d",
+    "--distro",
+    "distros",
+    multiple=True,
+    help="Additional distros to install. Can be used multiple times and each use "
+    "is resolved independently.",
+)
+@click.option(
+    "--dry-run/--no-dry-run",
+    default=False,
+    help="Don't actually install anything, just print what would be installed.",
+)
+@click.option(
+    "--force-reinstall/--no-force-reinstall",
+    default=False,
+    help="Reinstall all resolved distros even if they are already installed.",
+)
+@click.option(
+    "--target",
+    type=click.Path(file_okay=False, resolve_path=True),
+    help="Install distros into DIRECTORY. Defaults to the sites "
+    'downloads["install_root"] setting.',
+)
+@click.pass_obj
+def install(settings, uris, distros, dry_run, force_reinstall, target):
+    """Install distros for use in hab. At least one uri or distro must be
+    specified to install. This is intended to install all versions of hab distros
+    that are required for a collection of hab URI on this system. This means that
+    unlike pip this may install multiple versions of hab distros.
+    """
+    distros = list(distros) if distros else None
+    uris = list(uris) if uris else None
+    if not distros and not uris:
+        raise ValueError("You must specify at least one --uri or --distro to install.")
+    settings.resolver.install(
+        uris=uris,
+        additional_distros=distros,
+        target=target,
+        dry_run=dry_run,
+        replace=force_reinstall,
+    )
+
+
 def cli(*args, **kwargs):
     """Runs the hab cli. If an exception is raised, only the exception message
     is printed and the stack trace is hidden. Use `hab -v ...` to enable showing

diff --git a/hab/distro_finders/__init__.py b/hab/distro_finders/__init__.py
diff --git a/hab/distro_finders/cloud_zip.py b/hab/distro_finders/cloud_zip.py
@@ -0,0 +1,133 @@
+import logging
+import pathlib
+import time
+import zipfile
+from abc import ABCMeta, abstractmethod
+
+import remotezip
+from cloudpathlib import CloudPath
+
+from .df_zip import DistroFinderZip
+
+logger = logging.getLogger(__name__)
+
+
+class HabRemoteZip(remotezip.RemoteZip):
+    """`remotezip.RemoteZip` that doesn't call `close()` when exiting a with context.
+
+    Opening a new RemoteZip instance is slow and changes depending on the size
+    of the .zip file. Cloud based workflow doesn't need to close the file pointer
+    like you need to when working on a local file.
+    """
+
+    def __exit__(self, type, value, traceback):
+        pass
+
+
+class DistroFinderCloudZip(DistroFinderZip, metaclass=ABCMeta):
+    """Works with zipped distros stored remotely in Amazon S3 buckets.
+
+    Working with zipped distros extracting the `hab_filename` information from
+    inside the .zip file. This is useful when you have direct access to the .zip
+    file.
+
+    For `path`, this class uses a .zip `member path`. A member path is the absolute
+    path to the .zip joined with the member path of files contained inside the .zip
+    file. So if the archive file path is `c:/temp/dist_a_v0.1.zip` and the member is
+    `hab_filename`, then the member_path would be `c:/temp/dist_a_v0.1.zip/.hab.json`.
+
+    Note:
+        This class should only be used to install distros in the hab download system.
+
+    This expects one file to exist with a specific naming convention:
+        - `{distro}_v{version}.zip` contains the entire contents of the distro.
+          This should also contain the top level file `hab_filename`. When the distro
+          is installed and using hab normally this file will be used. The `hab_filename`
+          file's contents are extracted from the zip file and used to initialize the
+          `DistroVersion` returned by `self.distro` without being written to disk.
+    """
+
+    def __init__(self, root, site=None, safe=False, client=None):
+        # Only define client if it was passed, otherwise create it lazily.
+        if client:
+            self.client = client
+        super().__init__(root, site=site, safe=safe)
+        self._archives = {}
+
+    def as_posix(self):
+        """Returns the root path as a posix style string."""
+        if isinstance(self.root, CloudPath):
+            # CloudPath doesn't need as_posix
+            return str(self.root)
+        return super().as_posix()
+
+    def cast_path(self, path):
+        """Return path cast to the `pathlib.Path` like class preferred by this class."""
+        return CloudPath(path, client=self.client)
+
+    @property
+    @abstractmethod
+    def client(self):
+        """A `cloudpathlib.client.Client` used to create `CloudPath` instances."""
+
+    @client.setter
+    @abstractmethod
+    def client(self, client):
+        pass
+
+    @abstractmethod
+    def credentials(self):
+        """Returns the credentials needed for requests to connect to the cloud resource.
+
+        Generates these credentials using the client object.
+        """
+
+    def archive(self, zip_path, partial=True):
+        """Returns a `zipfile.Zipfile` like instance for zip_path.
+
+        Args:
+            zip_path (cloudpathlib.CloudPath): The path to the zip file to open.
+            partial (bool, optional): If True then you only need access to a small
+                part of the archive. If True then `HabRemoteZip` will be used
+                to only download specific files from the remote archive without
+                caching them to disk. If False then remote archives will be fully
+                downloaded to disk(using caching) before returning the open archive.
+        """
+        if not partial or isinstance(zip_path, pathlib.PurePath):
+            logger.debug(f"Using CloudPath to open(downloading if needed) {zip_path}.")
+            archive = zipfile.ZipFile(zip_path)
+            archive.filename = zip_path
+            return archive
+
+        # Creating a RemoteZip instance is very slow compared to local file access.
+        # Reuse existing objects if already created.
+        if zip_path in self._archives:
+            logger.debug(f"Reusing cloud .zip resource: {zip_path}")
+            return self._archives[zip_path]
+
+        logger.debug(f"Connecting to cloud .zip resource: {zip_path}")
+        s = time.time()
+        auth, headers = self.credentials()
+
+        archive = HabRemoteZip(zip_path.as_url(), auth=auth, headers=headers)
+        archive.filename = zip_path
+        e = time.time()
+        logger.info(f"Connected to cloud .zip resource: {zip_path}, took: {e - s}")
+        self._archives[zip_path] = archive
+        return archive
+
+    def clear_cache(self, persistent=False):
+        """Clear cached data in memory. If `persistent` is True then also remove
+        cache data from disk if it exists.
+        """
+        if persistent:
+            self.remove_download_cache()
+        super().clear_cache(persistent=persistent)
+
+        # Ensure all cached archives are closed before clearing the cache.
+        for archive in self._archives.values():
+            archive.close()
+        self._archives = {}
+        if persistent:
+            # Clear downloaded temp files
+            self.client.clear_cache()