diff --git a/CMakeLists.txt b/CMakeLists.txt
index 6925b127..c6211c75 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -130,9 +130,7 @@ the structure of this directory
   # these intentionally are not CACHE variables
   set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${stageDir}/${CMAKE_INSTALL_LIBDIR})
   set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${stageDir}/${CMAKE_INSTALL_LIBDIR})
-
-  # don't currently need the following since grackle doesn't ship an executable
-  #set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${stageDir}/${CMAKE_INSTALL_BINDIR})
+  set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${stageDir}/${CMAKE_INSTALL_BINDIR})
 
   # the location where the export files go to export from build-tree
   set(GRACKLE_BUILD_EXPORT_PREFIX_PATH ${CMAKE_LIBRARY_OUTPUT_DIRECTORY})
@@ -171,10 +169,55 @@ if (UNIX AND NOT CMAKE_SYSTEM_NAME STREQUAL "Darwin")
   set_target_properties(toolchain::m PROPERTIES IMPORTED_LIBNAME "m")
 endif()
 
+
+# picohash is a vendored, self-contained, header-only library
+# -> it's a internal dependency of Grackle::Grackle (it shouldn't be exposed)
+# -> If it were an `INTERFACE` library, CMake would abort with an error when we
+#    declare rules for installing the "export information" for a static-lib
+#    build of Grackle::Grackle (there aren't problems for a shared-lib build)
+#    -> The "export information" includes autogenerated linking logic that gets
+#       evaluated by external CMake projects that consume Grackle::Grackle via
+#       `find_package`. This logic infers a list of any other libraries that
+#       are shipped by this project that need to be linked to use libgrackle.a
+#    -> When invoked, the logic goes through ALL of Grackle::Grackle's (public
+#       & private) depenedencies that COULD specify such linking requirements.
+#    -> CMake will complain if any INTERFACE library used by Grackle::Grackle
+#       isn't publicaly exported since it COULD specify this information.
+#    -> the BUILD_LOCAL_INTERFACE generator expression, (in CMake 3.26+) can
+#       work around this
+# -> this isn't an issue for INTERFACE IMPORTED libraries since IMPORTED
+#    libs should only specify linker requirements of prebuilt external libs,
+#    if there are any. (i.e. if there are any reqs, CMake expects the developer
+#    to manually add logic to the installed "export info")
+add_library(picohash INTERFACE IMPORTED)
+
+# we use the SYSTEM option to suppress any warnings
+target_include_directories(picohash SYSTEM INTERFACE
+  ${CMAKE_CURRENT_SOURCE_DIR}/external
+)
+
 # Main build targets
 # ------------------
 add_subdirectory(src/clib)
 
+# configure the grdata cli program
+# -> we are essentially performing some template substitutions on a python
+#    file so that the file can be executed as a standalone cli program
+include(CreateProgram-grdata)
+create_grdata_program(
+  GRACKLE_VERSION "${_GRACKLE_FULL_VERSION}"
+  TARGET_NAME Grackle::grdata
+)
+
+file(GENERATE
+  OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/grackle-buildpaths-$<CONFIG>.txt
+  CONTENT [[
+# This file lists paths where useful build products can be found in the build
+# directory (if/when the products are actually built)
+$<TARGET_FILE:Grackle::grdata>
+]])
+
+
 # declare build-recipies for examples
 if (GRACKLE_EXAMPLES)
   add_subdirectory(src/example)
diff --git a/cmake/CreateProgram-grdata.cmake b/cmake/CreateProgram-grdata.cmake
new file mode 100644
index 00000000..84eed508
--- /dev/null
+++ b/cmake/CreateProgram-grdata.cmake
@@ -0,0 +1,404 @@
+# This is a cmake module that defines the logic for creating the grdata
+# "program"
+#
+# In a vacuum, it would be more idiomatic to take the logic in this file and
+# directly embed it in a CMakeLists.txt file located in close proximity to the
+# template file.
+#
+# However the situation is complicated by the following 2 factors:
+# 1. grdata.py is used both as a template file and as part of the pygrackle
+#    package. The grdata.py file is written in such away that it can be used
+#    without as part of pygrackle without any sort of variable substitution
+#
+# 2. Moreover, the pygrackle package is built with the scikit-build-core
+#    backend, which requires a set of CMake files. Since there is no reason to
+#    ever directly execute this file's logic as part of building the python
+#    package it makes even more sense to try to keep the logic separate
+
+
+# load the file_registry information for the current version of grackle into a
+# string held by the variable specified by the outvar argument
+function(load_file_registry_string UPDATE_CONFIGURE_DEPENDS outvar)
+  set(path
+    "${PROJECT_SOURCE_DIR}/src/python/pygrackle/file_registry/file_registry.txt"
+  )
+  if ("${UPDATE_CONFIGURE_DEPENDS}")
+    set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS "${path}")
+  endif()
+  file(READ ${path} contents)
+  set("${outvar}" "${contents}" PARENT_SCOPE)
+endfunction()
+
+# create the grdata program (and represent it as an executable target)
+#
+# To install the program, and properly expose export information, see the
+# grdata_install_export_helper_ command
+#
+# the program's resulting location is controlled by the global
+# CMAKE_RUNTIME_OUTPUT_DIRECTORY variable, if it is set. Otherwise, we put it
+# in the CURRENT_BINARY_DIR.
+#
+# Arguments
+# ---------
+# GRACKLE_VERSION: the full grackle version number of the grackle version that
+#                  the program is associated with (not just the truncated
+#                  number understood by cmake)
+# TARGET_NAME: specifies the name of the target that is created to represent
+#              the program.
+#
+# Notes
+# -----
+# The grdata program can be very useful for downstream codes for testing
+# purposes (and for the hypothetical scenario where we support downloading
+# precompiled copies of Grackle). Due to the nature of the grdata program,
+# providing the program details (i.e. namely providing the path to it) to
+# downstream projects is not straight-forward.
+# - In more detail, the program is a little weird in a CMake context since we
+#   want people to essentially think of it as a generic command line program 
+#   even though it isn't technically a compiled program (people shouldn't care
+#   that it is actually a portable, executable python script).
+# - I was originally hesitant to declare it as an IMPORTED executable. I was
+#   primarily concerned about the scenario where other CMake-built projects
+#   might consume Grackle by embedding it as a part of the build. Since the
+#   documentation explains that IMPORTED executable machinery is intended to
+#   represent machinery from outside the CMake, I was concerned that we could
+#   encounter some unforseen side-effects.
+# - Prior to CMake 3.0, we might have instead provided the tool's path in a
+#   package variable (like GRACKLE_GRDATA_TOOL_PATH). While the details of how
+#   we achieve this in modern CMake differ, we can still provide the path in a
+#   manner similar to variable-access
+#   - it is still possible to provide package variables, but the *Professional
+#     CMake* book, by Craig Scott (a primary CMake developer), makes is clear
+#     we should to avoid package-variables to make Grackle easily consumable.
+#     In the book's 18th edition this advice is in section 40.4.
+#   - essentially, we would need to take extra steps for every package variable
+#     that we introduce to support downstream projects employ newer dependency
+#     management machinery introduced in 2022 (cmake 3.24).
+#   - the advice is to make this information accessible through a function
+#     (that returns values for known keys) or as properties of a target. We
+#     experimented with the function approach -- see commits just before this
+#     documentation was written. It requires a somewhat involved solution
+#     to completely avoid global/package variables.
+#
+# After giving this some more thought, it became clear that the IMPORTED
+# executable approach is superior for 2 reasons:
+# 1. Even if there is some unforseen side-effect of the IMPORTED executable, we
+#    wiil be no worse-off than the variable-like approach.
+#    - The worst imaginable side-effect is that somebody consuming Grackle
+#      in an embedded manner, might find that some of CMake's source-file
+#      dependency magic doesn't work right after updating the files that
+#      compose the grdata tool.
+#    - This scenario seems extremely pathological (and should probably never
+#      arise), since it could only occur if people alter their source files
+#      based on the output of the grdata tool.
+#    - Regardless of the practicality, the variable-like approach definitely
+#      wouldn't offer any benefits here (the same issues would still occur)
+# 2. The use of targets is generally more idiomatic than variables. While we
+#    still require some custom logic to get everything to work right, we need
+#    less of it than the alternative. Moreover, the custom-code will be much
+#    more directly analogous to standard cmake logic (making it easier to
+#    understand)
+function(create_grdata_program)
+
+  set(options)
+  set(oneValueArgs DESTINATION GRACKLE_VERSION TARGET_NAME)
+  set(multiValueArgs)
+
+  cmake_parse_arguments(PARSE_ARGV 0
+    CREATE_GRDATA "${options}" "${oneValueArgs}" "${multiValueArgs}")
+
+  # some basic error-handling
+  set(_funcname "create_grdata_program")
+  if (DEFINED CREATE_GRDATA_UNPARSED_ARGUMENTS)
+    message(FATAL_ERROR
+      "${_funcname} recieved invalid arguments: "
+      "\"${CREATE_GRDATA_UNPARSED_ARGUMENTS}\"")
+  elseif (DEFINED CREATE_GRDATA_KEYWORDS_MISSING_VALUES)
+    message(FATAL_ERROR
+      "${_funcname} received the ${CREATE_GRDATA_KEYWORDS_MISSING_VALUES} "
+      "keyword(s) without any associated arguments.")
+  endif()
+
+  if (DEFINED CMAKE_RUNTIME_OUTPUT_DIRECTORY)
+    set(output_path "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/grdata")
+  else()
+    set(output_path "${CMAKE_CURRENT_BINARY_DIR}/grdata")
+  endif()
+
+  load_file_registry_string(TRUE "_GRDATA_FILE_REGISTRY_CONTENTS")
+  set(_GRDATA_GRACKLE_VERSION "${CREATE_GRDATA_GRACKLE_VERSION}")
+  configure_file(
+    "${PROJECT_SOURCE_DIR}/src/python/pygrackle/utilities/grdata.py"
+    "${output_path}"
+    @ONLY
+  )
+
+  if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.19")
+    file(CHMOD ${output_path} FILE_PERMISSIONS
+      OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ
+      WORLD_EXECUTE
+    )
+  else()
+    execute_process(COMMAND chmod a+rx ${output_path})
+  endif()
+
+  set(_GRACKLEGRDATAPRIVATE_TARGETNAME "${CREATE_GRDATA_TARGET_NAME}"
+    CACHE INTERNAL "name of the target representing the grdata tool")
+
+  add_executable(${_GRACKLEGRDATAPRIVATE_TARGETNAME} IMPORTED GLOBAL)
+  set_target_properties(${_GRACKLEGRDATAPRIVATE_TARGETNAME} PROPERTIES
+    IMPORTED_LOCATION "${output_path}"
+  )
+
+endfunction()
+
+# Helper Functions used to define export files
+# --------------------------------------------
+
+# note that cmake normalizes paths on all platforms to use forward-slashes
+function(_num_path_segments path outCount)
+  set(arg_descr "the path argument, of the _num_path_segments command,")
+  if (path MATCHES "^/.*")
+    message(FATAL_ERROR "${arg_descr} can't start with `/`")
+  elseif (path STREQUAL ".")
+    message(FATAL_ERROR "${arg_descr} can't currently be `.`")
+  elseif ((path MATCHES "^\./.*$") OR (path MATCHES "^.*/\./.*$"))
+    message(FATAL_ERROR "${arg_descr} can't currently hold a `./` segment")
+  elseif (path STREQUAL "..")
+    message(FATAL_ERROR "${arg_descr} can't currently be `..`")
+  elseif ((path MATCHES "^\.\./.*$") OR (path MATCHES "^.*/\.\./.*$"))
+    message(FATAL_ERROR "${arg_descr} can't currently hold a `../` segment")
+  elseif (path STREQUAL "")
+    set(${outCount} "0" PARENT_SCOPE)
+    return()
+  endif()
+
+  set(count 1)
+  set(remainder "${path}")
+
+  while(NOT (remainder MATCHES "^[^/]+/*$"))  # exit loop if no slashes or all
+                                              # slashes are trailing
+    math(EXPR count "${count} + 1")
+
+    # remove trailing slash(es)
+    if (remainder MATCHES "^(.*/[^/])/+$")
+      set(remainder "${CMAKE_MATCH_1}")
+    endif()
+    get_filename_component(remainder "${remainder}" DIRECTORY)
+  endwhile()
+
+  set(${outCount} "${count}" PARENT_SCOPE)
+endfunction()
+
+# create a relocatable export-file (to be placed in the installation directory)
+# that declares a target representing the grdata tool
+#
+# Arguments
+# ---------
+# EXPORT_FILE_DESTINATION_DIR specifies directory where the export-file will
+#                             be installed, relative to the root-install path
+# TOOL_RELATIVE_INSTALL_PATH  specifies path where the grdata tool will be
+#                             installed, relative to the root-install path
+# TMP_FILE_LOCATION           Where to put the file (right after we create it)
+function(_grdata_write_installdir_export_file
+    EXPORT_FILE_DESTINATION_DIR TOOL_RELATIVE_INSTALL_PATH TMP_FILE_LOCATION
+)
+  # a sanity check!
+  if ((EXPORT_FILE_DESTINATION_DIR MATCHES "^/.*") OR
+      (TOOL_RELATIVE_INSTALL_PATH MATCHES "^/.*"))
+    message(
+      FATAL_ERROR 
+      "_grdata_get_export_file_contents can't handle an argument that starts "
+      "with a forward slash")
+  endif()
+
+  _num_path_segments("${EXPORT_FILE_DESTINATION_DIR}" num_segments)
+
+  if (num_segments EQUAL 0)
+    set(REL_PATH_TO_PREFIX "")
+  else()
+    string(REPEAT "../" "${num_segments}" REL_PATH_TO_PREFIX)
+  endif()
+
+  set(template [======================[
+# Autogenerated file that stores the location of the grdata tool
+# -> this is directly analogous to a file that would be defined with cmake's
+#    install(EXPORT ...) command.
+# -> since the grdata tool is a weird sort of pseudo target (i.e. it isn't
+#    compiled), we store the path to the file
+# -> like `install(EXPORT ...)` (and in contrast to `export(EXPORT ...)`),
+#    we use a relative path to the grdata tool
+
+set(_IMPORT_PREFIX "${CMAKE_CURRENT_LIST_FILE}/@REL_PATH_TO_PREFIX@")
+
+add_executable(@_GRACKLEGRDATAPRIVATE_TARGETNAME@ IMPORTED)
+set_target_properties(@_GRACKLEGRDATAPRIVATE_TARGETNAME@ PROPERTIES
+  IMPORTED_LOCATION "${_IMPORT_PREFIX}/@TOOL_RELATIVE_INSTALL_PATH@"
+)
+
+unset(_IMPORT_PREFIX)
+]======================]
+)
+
+  string(CONFIGURE "${template}" contents @ONLY)
+  file(WRITE ${TMP_FILE_LOCATION} "${contents}")
+
+endfunction()
+
+# helper function to create a export-file to support find_package using the
+# build-directory (analogous to the of `export(EXPORT)` command)
+function(_grdata_write_builddir_export_file IMMEDIATE_EXPORT_FILE_PATH)
+
+  set(template [======================[
+# Autogenerated file that stores the location of the grdata tool
+# -> this is directly analogous to a file that would be defined with cmake's
+#    export(EXPORT ...) command.
+# -> since the grdata tool is a weird sort of pseudo target (i.e. it isn't
+#    compiled), we store the path to the file
+# -> like `export(EXPORT ...)` (and in contrast to `install(EXPORT ...)`), 
+#    we use an absolute path
+
+set(_GRACKLE_GRDATA_TOOL_PATH "@absolute_tool_path@")
+]======================]
+)
+  get_target_property(absolute_tool_path
+    ${_GRACKLEGRDATAPRIVATE_TARGETNAME} IMPORTED_LOCATION)
+
+  string(CONFIGURE "${template}" contents @ONLY)
+  file(WRITE ${IMMEDIATE_EXPORT_FILE_PATH} "${contents}")
+
+endfunction()
+
+# due to the "weird" nature of the grdata tool, (we treat it like its an
+# executable even though it isn't compiled), this is a command to help with
+# standard export/installation options
+#
+# This command operates in 3 modes:
+#
+# 1. In ``INSTALL_TOOL`` mode, the following command
+#      ```
+#      grdata_install_export_helper_(INSTALL_TOOL DESTINATION <dir>
+#                                    COMPONENT <component>)
+#      ```
+#    acts as a substitute for the `install(TARGETS grdata-target ...)` command,
+#    if the grdata tool were a typicial cmake target (i.e. that got compiled).
+#    In practice, this does some bookkeeping and wraps the following command:
+#      ```
+#      install(PROGRAMS path/to/grdata DESTINATION <dir> COMPONENT <component>)
+#      ```
+#
+# 2. In ``INSTALL_EXPORT`` mode, the following command
+#      ```
+#      grdata_install_export_helper_(INSTALL_EXPORT DESTINATION <dir>
+#                                    FILE <name>.cmake TMPDIR <tmp-dir>)
+#      ```
+#    acts as an analog to the ``install(EXPORT ...)`` command, but with 1 minor
+#    difference:
+#      - the ``install(EXPORT ...)`` command doesn't obviously do anything at
+#        configuration time (in practice it does create the export files and
+#        stores them within <build>/CMakeFiles/Export)
+#      - we explicitly generate the export files at configuration time & store
+#        them within directory specified by TMPDIR
+#    NOTE: the namespace used when we originally defined the target is reused
+#
+# 3. In ``EXPORT_EXPORT`` mode, the following command
+#      ```
+#      grdata_install_export_helper_(EXPORT_EXPORT FILE <filename>.cmake)
+#      ```
+#    acts as an analog to the ``export(EXPORT ...)`` command
+#
+macro(grdata_install_export_helper_ mode)
+  # we only take 1-value arguments (after the mode argument)
+
+  set(_GRDATA_IEH_name "grdata_install_export_helper_")
+
+  # get the kwargs for the current mode (all kwargs expect a single arg)
+  if("INSTALL_TOOL" STREQUAL "${mode}")
+    set(_GRDATA_IEH_Args DESTINATION COMPONENT)
+  elseif("INSTALL_EXPORT" STREQUAL "${mode}")
+    set(_GRDATA_IEH_Args DESTINATION FILE TMPDIR)
+  elseif("EXPORT_EXPORT" STREQUAL "${mode}")
+    set(_GRDATA_IEH_Args FILE)
+  else()
+    message(FATAL_ERROR
+      "${_GRDATA_IEH_name} command invoked with unexpected mode: \"${mode}\""
+    )
+  endif()
+
+  # parse the arguments
+  cmake_parse_arguments(_GRDATA_IEH "" "${_GRDATA_IEH_Args}" "" ${ARGN})
+
+  # check argument validity
+  if (DEFINED _GRDATA_IEH_UNPARSED_ARGUMENTS)
+    message(FATAL_ERROR
+      "${_GRDATA_IEH_name}(${mode}) recieved invalid arguments: "
+      "\"${_GRDATA_IEH_UNPARSED_ARGUMENTS}\"")
+  elseif (DEFINED _GRDATA_IEH_KEYWORDS_MISSING_VALUES)
+    message(FATAL_ERROR
+      "${_GRDATA_IEH_name}(${mode}) received the "
+      "${_GRDATA_IEH_KEYWORDS_MISSING_VALUES} keyword(s) without any "
+      "associated arguments.")
+  endif()
+  foreach(_GRDATA_IEH_ARG IN ITEMS ${_GRDATA_IEH_Args})
+    if (NOT DEFINED "_GRDATA_IEH_${_GRDATA_IEH_ARG}")
+      message(FATAL_ERROR
+        "${_GRDATA_IEH_name}(${mode}) requires the `${_GRDATA_IEH_ARG}` kwarg")
+    endif()
+  endforeach()
+
+  # check a precondition
+  if (NOT DEFINED _GRACKLEGRDATAPRIVATE_TARGETNAME)
+    message(FATAL_ERROR
+      "${_GRDATA_IEH_name}(${mode}) can only be called AFTER a call to the "
+      "create_grdata_program command.")
+  endif()
+
+  # now, actually complete the command
+  if ("INSTALL_TOOL" STREQUAL "${mode}")
+    install(PROGRAMS $<TARGET_FILE:${_GRACKLEGRDATAPRIVATE_TARGETNAME}>
+      COMPONENT ${_GRDATA_IEH_COMPONENT}
+      DESTINATION ${_GRDATA_IEH_DESTINATION}
+    )
+
+    set(_GRACKLEGRDATAPRIVATE_RELATIVE_INSTALL_PATH
+        "${_GRDATA_IEH_DESTINATION}/grdata" CACHE INTERNAL
+        "install location of the grdata tool relative to base install path")
+
+  elseif("INSTALL_EXPORT" STREQUAL "${mode}")
+    if (NOT DEFINED _GRACKLEGRDATAPRIVATE_RELATIVE_INSTALL_PATH)
+      message(FATAL_ERROR
+        "${_GRDATA_IEH_name}(${mode}) can only be called AFTER a call to the "
+        "${_GRDATA_IEH_name}(INSTALL_TOOL) command.")
+    endif()
+
+    # create the export file
+    _grdata_write_installdir_export_file(
+      # where we'll put export file during install (relative to install-prefix)
+      ${_GRDATA_IEH_DESTINATION}
+      # where we'll put grdata tool during install (relative to install-prefix)
+      ${_GRACKLEGRDATAPRIVATE_RELATIVE_INSTALL_PATH}
+      # where in the build-directly we'll put the export file immediately after
+      # we create it (we will copy it from here when we install)
+      "${_GRDATA_IEH_TMPDIR}/${_GRDATA_IEH_FILE}"
+    )
+
+    # define the rule to copy the export-file during installation
+    install(FILES
+      "${_GRDATA_IEH_TMPDIR}/${_GRDATA_IEH_FILE}"
+      DESTINATION "${_GRDATA_IEH_DESTINATION}"
+    )
+
+  elseif ("EXPORT_EXPORT" STREQUAL "${mode}")
+    _grdata_write_builddir_export_file("${_GRDATA_IEH_FILE}")
+
+  else()
+    message(FATAL_ERROR
+      "something went horribly wrong within ${_GRDATA_IEH_name}(${mode})")
+  endif()
+
+  # need to do some cleanup (since we are in a macro):
+  foreach(_GRDATA_IEH_ARG IN LISTS _GRDATA_IEH_Args)
+    unset("_GRDATA_IEH_${_GRDATA_IEH_ARG}")
+  endforeach()
+
+endmacro()
diff --git a/cmake/GrackleConfig.cmake.in b/cmake/GrackleConfig.cmake.in
index 0241836a..7f5b65b9 100644
--- a/cmake/GrackleConfig.cmake.in
+++ b/cmake/GrackleConfig.cmake.in
@@ -23,7 +23,7 @@
 #    -> when the logic in this file is executed, it is intended to define
 #       variables necessary for the external project to make use of grackle.
 #       Historically, this involved defining new variables. But, we adopt the
-#       modern convention of ONLY defining library-targets
+#       modern convention of ONLY defining targets
 #    -> if we include `return()`, anywhere in the top-level scope of this file
 #       (i.e. not in a function definition), the evaluation of this file
 #       abruptly ends. We can also define some specific variables to inform
@@ -355,6 +355,9 @@ set_target_properties(Grackle::Grackle PROPERTIES
 @_GRACKLE_INFO_PROPERTIES@
 )
 
+# define the Grackle::grdata executable
+include(${CMAKE_CURRENT_LIST_DIR}/Grackle_grdata_pseudotarget.cmake)
+
 # Finally, let's do some cleanup so people don't rely upon these variables
 # (specifically, let's cleanup the record of the build-flags)
 unset(_GRACKLEBUILD_USE_DOUBLE)
diff --git a/cmake/grackle.pc.in b/cmake/grackle.pc.in
index baba0c43..18c3e44b 100644
--- a/cmake/grackle.pc.in
+++ b/cmake/grackle.pc.in
@@ -30,12 +30,15 @@
 # commonly used to make the build relocatable (there's not a ton of docs on it)
 
 prefix=${pcfiledir}/../..
-libdir=${prefix}/lib
+libdir=${prefix}/@CMAKE_INSTALL_LIBDIR@
 includedir=${prefix}/include
 
 # define Grackle-specific variables conveying extra information
 @_PC_INFO_PROPERTIES@
 
+# define other variables conveying extra Grackle-related information
+GRACKLE_GRDATA_TOOL_PATH=${prefix}/@CMAKE_INSTALL_BINDIR@/grdata
+
 Name: grackle
 Description: chemistry and radiative cooling library for astrophysical simulations and models
 Version: @Grackle_VERSION@
diff --git a/cmake/installation_rules.cmake b/cmake/installation_rules.cmake
index f91b5a8b..9bd3f647 100644
--- a/cmake/installation_rules.cmake
+++ b/cmake/installation_rules.cmake
@@ -141,6 +141,18 @@ install(TARGETS Grackle_Grackle
     COMPONENT Grackle_Development
 )
 
+include(CreateProgram-grdata)
+# define installation rules for installation of grdata cli tool. This is
+# analogous to install(TARGETS ...), but is needed since grdata isn't a
+# compiled executable
+# -> in the future, maybe we should make this part of the Grackle_Development
+#    "installation component?" (from my perspective it's probably better err
+#    on the side of being a little too atomic here)
+grdata_install_export_helper_(INSTALL_TOOL
+  DESTINATION ${CMAKE_INSTALL_BINDIR}
+  COMPONENT Grackle_Tools
+)
+
 if (BUILD_SHARED_LIBS)
   # (As noted above) Because we renamed the shared library so its called
   # `libgrackle-{VERSION_NUM}.so` (rather than `libgrackle.so`), we need an
@@ -329,6 +341,13 @@ endif()
 # Define the cmake Package Config File
 #-------------------------------------
 
+# create variable that holds the path to the current directory
+set(LOCAL_CMAKE_MODULE_DIR "${CMAKE_CURRENT_LIST_DIR}")
+
+# create variable storing where copies of the cmake files are stored so that
+# they can be used without a full installation
+set(BUILDTREE_CMAKE_DIR ${GRACKLE_BUILD_EXPORT_PREFIX_PATH}/cmake/Grackle)
+
 include(CMakePackageConfigHelpers)
 
 # The following function implements standardized logic for determining
@@ -363,7 +382,7 @@ write_basic_package_version_file(
 get_info_properties_export_str(Grackle_Grackle
     CMAKE_CONFIG _GRACKLE_INFO_PROPERTIES)
 configure_file(
-  ${PROJECT_SOURCE_DIR}/cmake/GrackleConfig.cmake.in
+  ${LOCAL_CMAKE_MODULE_DIR}/GrackleConfig.cmake.in
   ${CMAKE_CURRENT_BINARY_DIR}/install-metadata/GrackleConfig.cmake
   @ONLY
 )
@@ -390,11 +409,19 @@ install(EXPORT GrackleTargets
   FILE Grackle_${GRACKLE_CONFIG_FILL_VAL}_targets.cmake
 )
 
-# generate and configure some cmake files in the build-tree so that external
-# cmake projects can use find_package to directly import Grackle::Grackle from
-# the build-tree (without requiring a full installation)
+# call the analog of install(EXPORT ...) for the grdata tool
+grdata_install_export_helper_(INSTALL_EXPORT
+  DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/Grackle
+  FILE Grackle_grdata_pseudotarget.cmake
+  TMPDIR ${CMAKE_CURRENT_BINARY_DIR}/install-metadata
+)
+
+
+# copy some files to the build tree to locations specified by the
+# BUILDTREE_CMAKE_DIR variable so so that external cmake projects can use
+# find_package to directly import Grackle::Grackle from the build-tree (without
+# requiring a full installation)
 
-set(BUILDTREE_CMAKE_DIR ${GRACKLE_BUILD_EXPORT_PREFIX_PATH}/cmake/Grackle)
 
 file(COPY
   ${CMAKE_CURRENT_BINARY_DIR}/install-metadata/GrackleConfig.cmake
@@ -406,3 +433,8 @@ export(EXPORT GrackleTargets
   FILE ${BUILDTREE_CMAKE_DIR}/Grackle_${GRACKLE_CONFIG_FILL_VAL}_targets.cmake
   NAMESPACE Grackle::
 )
+
+# analog to export(EXPORT ...) for the grdata tool
+grdata_install_export_helper_(EXPORT_EXPORT
+  FILE ${BUILDTREE_CMAKE_DIR}/Grackle_grdata_pseudotarget.cmake
+)
diff --git a/config/configure_file.py b/config/configure_file.py
index 808a48e1..82b5213f 100755
--- a/config/configure_file.py
+++ b/config/configure_file.py
@@ -19,6 +19,9 @@
     "alphanumeric character is an uppercase or lowercase letter (A-Z or a-z), "
     "a digit (0-9) or an underscore (_)")
 
+# simple pattern to detect simple occurences python decorator syntax
+_PY_DECORATOR_PATTERN = re.compile(r"^[ \t]*@[^\s@]+[^@]*$")
+
 def is_valid_varname(s, start = None, stop = None):
     return re.fullmatch(_VALID_VARNAME_STR, s[slice(start, stop)]) is not None
     
@@ -55,12 +58,18 @@ def replace(matchobj):
         line = line[:-1]
         match_count = 0
 
-        out_f.write(_PATTERN.sub(replace,line))
+        if _PY_DECORATOR_PATTERN.match(line) is not None:
+            # this is a crude workaround to support python decorators.
+            # - if we didn't have this, then our eager error-handling would
+            #   classify this line as an error
+            out_f.write(line)
+        else:
+            out_f.write(_PATTERN.sub(replace,line))
         out_f.write('\n')
         if err_msg is not None:
             out_f.close()
             os.remove(out_fname)
-            raise RuntimeError(rslt)
+            raise RuntimeError(err_msg)
 
     unused_variables = used_variable_set.symmetric_difference(variable_map)
 
@@ -70,7 +79,9 @@ def replace(matchobj):
                            "were unused: {!r}".format(unused_variables))
 
 def _parse_variables(dict_to_update, var_val_assignment_str_l,
-                     val_is_file_path = False):
+                     val_kind = 'literal'):
+    assert val_kind in ['literal', 'file-path-escaped-contents',
+                        'file-path-literal-contents']
     for var_val_assignment_str in var_val_assignment_str_l:
         stripped_str = var_val_assignment_str.strip() # for safety
 
@@ -104,7 +115,7 @@ def _parse_variables(dict_to_update, var_val_assignment_str_l,
             raise RuntimeError(
                 "the {!r} variable is defined more than once".format(var_name))
 
-        if val_is_file_path:
+        if val_kind != 'literal': # val_kind is some kind of file path
             path = value
             if not os.path.isfile(path):
                 raise RuntimeError(
@@ -112,11 +123,15 @@ def _parse_variables(dict_to_update, var_val_assignment_str_l,
                      "at {!r} with the {!r} variable: no such file exists"
                      ).format(path, var_name))
             with open(value, "r") as f:
-                # we generally treat the characters in the file as literals
-                # -> we do need to make a point of properly escaping the
-                #    newline characters
-                assert os.linesep == '\n' # implicit assumption
-                value = f.read().replace(os.linesep, r'\n')
+                if val_kind == 'file-path-escaped-contents':
+                    # we generally treat the characters in the file as literals
+                    # -> we do need to make a point of properly escaping the
+                    #    newline characters
+                    assert os.linesep == '\n' # implicit assumption
+                    value = f.read().replace(os.linesep, r'\n')
+                else: # val_kind == 'file-path-literal-contents'
+                    value = f.read()
+
         dict_to_update[var_name] = value
 
 def main(args):
@@ -130,9 +145,11 @@ def main(args):
     # fill variable_map with the specified variables and values
     variable_map = {}
     _parse_variables(variable_map, args.variables,
-                     val_is_file_path = False)
+                     val_kind = 'literal')
     _parse_variables(variable_map, args.variable_use_file_contents,
-                     val_is_file_path = True)
+                     val_kind = 'file-path-escaped-contents')
+    _parse_variables(variable_map, args.variable_use_literal_file_contents,
+                     val_kind = 'file-path-literal-contents')
 
     # use variable_map to actually create the output file
     with open(args.input, 'r') as f_input:
@@ -148,7 +165,16 @@ def main(args):
     '--variable-use-file-contents',  action = 'append', default = [],
     metavar = 'VAR=path/to/file',
     help = ("associates the (possibly multi-line) contents contained by the "
-            "specified file with VAR")
+            "specified file with VAR. This replaces each newline character "
+            "with the pair of characters \"\\n\". This is useful if the "
+            "contents represent a string to be printed")
+)
+
+parser.add_argument(
+    '--variable-use-literal-file-contents',  action = 'append', default = [],
+    metavar = 'VAR=path/to/file',
+    help = ("associates the (possibly multi-line) contents contained by the "
+            "specified file with VAR. This does NOT escape newline characters.")
 )
 parser.add_argument(
     "variables", nargs = '*', action = 'store', default = [],
diff --git a/doc/source/Examples.rst b/doc/source/Examples.rst
new file mode 100644
index 00000000..10e42e46
--- /dev/null
+++ b/doc/source/Examples.rst
@@ -0,0 +1,166 @@
+
+.. _examples:
+
+Example Executables
+===================
+
+The Grackle repository provides example C, C++, and Fortran code for interacting with Grackle.
+
+Example Descriptions
+--------------------
+
+The examples are located in the **src/example** directory.
+The examples include:
+
+    * **c_example.c** - C example
+
+    * **c_local_example.c** - C example using only :ref:`local_functions`
+
+    * **cxx_example.C** - C++ example
+
+    * **cxx_omp_example.C** - C++ example using OpenMP
+
+    * **fortran_example.F** - Fortran example
+
+.. _how-to-run-example:
+
+Preparing and Executing the Examples
+------------------------------------
+
+In this section, we explain how to prepare and execute the example executables.
+Running an example is a useful way to quickly check whether Grackle is functioning correctly
+(to more rigorously check that Grackle is fully functional, you can try :ref:`running the
+test suite <testing>`).
+
+The instructions for building and executing the examples vary based on the build-system.
+In both cases, the examples require that you haven't cleanup up from your build.
+If you used the classic build-system, the examples require that Grackle has been fully installed.
+
+1. Fetch datafiles with the ``grdata`` tool.
+
+   * some of the examples require that the :ref:`datafiles have been fetched and managed <manage-data-files>` by the ``grdata`` tool.
+
+   * In a full, standalone Grackle installation (regardless of build-system), the ``grdata`` tool will be :ref:`one of the installed components <install-products>`.
+     If you build a standalone copy of Grackle with the CMake build-system, the build-system provides details about where to find a copy of the ``grdata`` tool :ref:`within the build-directory <build-dir-product-locations>`.
+
+   Once you locate ``grdata`` you should invoke:
+
+   .. code-block:: shell-session
+
+      $ ./<path/to/grdata> fetch
+
+   .. tip::
+
+      Even if you don't think it will be necessary, it is always worth fetching the data files with the copy of ``grdata`` that was created alongside Grackle.
+      Grackle will only access managed files if you have invoked this command with a copy of ``grdata`` that exactly matches the Grackle version :ref:`(steps are taken to deduplicate files on disk) <grdata-versioning-and-deduplication>`.
+      At worst, the command will confirm that nothing needs to be done.
+
+2. Compile the example (if necessary):
+
+   .. tabs::
+
+      .. group-tab:: Classic Build System
+     
+         Once you have already installed the grackle library, you can build the examples by typing ``make`` and the name of the file without extension.
+         Assuming that you were just in the **src/clib** subdirectory, you would execute the following to build the C++ example:
+
+         .. code-block:: shell-session
+
+            ~/grackle/src/clib $ cd ../example
+            ~/grackle/src/example $ make clean 
+            ~/grackle/src/example $ make 
+
+            Compiling cxx_example.C
+            Linking
+            Success!
+
+      .. group-tab:: CMake Build System
+ 
+         By default, the examples are automatically built with the rest of Grackle.
+         The compiled example binaries can be found within **<build-dir>/example**, where **<build-dir>** is the arbitrary :ref:`build-directory <dir-defs>` that you previously specified while compiling Grackle.
+
+         .. warning::
+
+            It's important that **<build-dir>** is a top-level directory in the grackle repository (e.g. something like **~/grackle/my-build** is fine, but choices like **~/grackle/../my-grackle-build** and **~/grackle/my_builds/my-first-build** are problematic).
+            If this isn't the case, then the examples (that don't use automatically managed input data files) won't be able to locate the data files.
+
+   .. important::
+
+      If you're using the Classic build system, make sure to add the path to the directory containing the installed **libgrackle.so** to your LD_LIBRARY_PATH (or DYLD_LIBRARY_PATH on Mac).
+      This is **NOT** necessary for the CMake build system.
+      :ref:`More information is provided below.<how-examples-are-built>`
+
+3. Now we execute the example
+
+   .. note::
+
+      The examples make certain assumptions about the location of the data files.
+      To ensure that the data files can be found, you should execute each example-binary from the same directory where the example binary is produced.
+
+   To execute the example, invoke:
+
+   .. tabs::
+
+      .. group-tab:: Classic Build System
+
+         .. code-block:: shell-session
+
+            ~/grackle/src/example $ ./cxx_example
+
+      .. group-tab:: CMake Build System
+
+         .. code-block:: shell-session
+
+            ~/grackle $ cd <build-dir>/examples
+            ~/grackle/<build-dir>/examples $ ./cxx_example
+
+   The output will look like the following:
+
+   .. code-block:: shell-session
+
+    The Grackle Version 2.2
+    Mercurial Branch   default
+    Mercurial Revision b4650914153d
+
+    Initializing grackle data.
+    with_radiative_cooling: 1.
+    primordial_chemistry: 3.
+    metal_cooling: 1.
+    UVbackground: 1.
+    Initializing Cloudy cooling: Metals.
+    cloudy_table_file: ../../input/CloudyData_UVB=HM2012.h5.
+    Cloudy cooling grid rank: 3.
+    Cloudy cooling grid dimensions: 29 26 161.
+    Parameter1: -10 to 4 (29 steps).
+    Parameter2: 0 to 14.849 (26 steps).
+    Temperature: 1 to 9 (161 steps).
+    Reading Cloudy Cooling dataset.
+    Reading Cloudy Heating dataset.
+    Initializing UV background.
+    Reading UV background data from ../../input/CloudyData_UVB=HM2012.h5.
+    UV background information:
+    Haardt & Madau (2012, ApJ, 746, 125) [Galaxies & Quasars]
+    z_min =  0.000
+    z_max = 15.130
+    Setting UVbackground_redshift_on to 15.130000.
+    Setting UVbackground_redshift_off to 0.000000.
+    Cooling time = -1.434987e+13 s.
+    Temperature = 4.637034e+02 K.
+    Pressure = 3.345738e+34.
+    gamma = 1.666645e+00.
+
+
+.. _how-examples-are-built:
+
+More details about how examples are built
+-----------------------------------------
+
+In more detail, both build-systems use copies of the grackle-library within the build directory while compiling the example.
+
+* the Classic build-system **always** links Grackle against the shared-library version of Grackle and requires that Grackle is fully installed in a location known by the system (either a standard system location OR a location specified by ``LD_LIBRARY_PATH``/``DYLD_LIBRARY_PATH``).
+
+* In contrast, CMake automatically takes special-steps to try to ensure that each example-binary will link to the copy of the Grackle library (whether it is shared or static) that is in the ``<build-dir>``; in fact, Grackle doesn't even need to be installed to run the Grackle library.
+
+* With that said, if you compile Grackle as a shared library in a cmake build, an example-binary **might** try to use a copy of a shared grackle library found in a directory specified by ``LD_LIBRARY_PATH``/``DYLD_LIBRARY_PATH`` if one exists.
+  The exact behavior may be platform dependent and also depends on whether CMake instructs the linker to use RPATH or RUNPATH (this is not specified by the cmake docs).
+
diff --git a/doc/source/Installation.rst b/doc/source/Installation.rst
index 2931a7bb..de743773 100644
--- a/doc/source/Installation.rst
+++ b/doc/source/Installation.rst
@@ -17,7 +17,7 @@ There are 3 steps to setting up Grackle on your system
    Given a smooth roll-out of the :ref:`CMake build system <cmake_build>`, it is our intention to deprecate and remove the :ref:`classic build system <classic_build>`.
    If you encounter any problems with the CMake system or anticipate any issues with this plan, :doc:`please let us know <Help>`.
 
-We include a :ref:`note on compiler toolchain compatability <compiler_toolchain_compatability>` at the end of this page.
+We include a :ref:`description of the installed products <install-products>` and a :ref:`note on compiler toolchain compatability <compiler_toolchain_compatability>` at the end of this page.
 
 
 .. _install_grackle_dependencies:
@@ -116,7 +116,17 @@ Linux systems, and an unformatted ``Make.mach.unknown``.  If you have a make
 file prepared for an Enzo install, it cannot be used straight away, but is a 
 very good place to start.
 
-Once you have chosen the make file to be used, a few variables should be set:
+.. COMMENT BLOCK
+
+   To support cross-referencing of the following block of text with sphinx's
+   `:ref:` construct (while suppressing warnings about referencing plain text),
+   we enclose the text in RST's container directive (it won't impact rendering)
+      https://docutils.sourceforge.io/docs/ref/rst/directives.html#toc-entry-19
+
+.. container::
+   :name: classic-makefile-variable-list
+
+   Once you have chosen the make file to be used, a few variables should be set:
 
     * ``MACH_LIBTOOL`` - path to ``libtool`` executable.  Note, on a Mac, 
       this should point to ``glibtool``, which can be installed with macports 
@@ -136,6 +146,10 @@ Once you have chosen the make file to be used, a few variables should be set:
     * ``MACH_INSTALL_INCLUDE_DIR`` - path where grackle header files will be 
       installed (only set if different from MACH_INSTALL_PREFIX/include).
 
+    * ``MACH_INSTALL_BIN_DIR`` - path where grackle-related utility executables
+      will be installed (only set if different from MACH_INSTALL_PREFIX/include).
+
+
 Once the proper variables are set, they are loaded into the build system by 
 doing the following:
 
@@ -261,54 +275,13 @@ Then, to install:
 5. Test your Installation
 
 Once installed, you can test your installation with the provided example to
-assure it is functioning correctly.  If something goes wrong in this process,
+assure it is functioning correctly.
+More details are provided :ref:`here <how-to-run-example>`.
+If something goes wrong in this process,
 check the ``out.compile`` file to see what went wrong during compilation,
 or use ``ldd`` (``otool -L`` on Mac) on your executable to determine what went 
 wrong during linking.
 
-::
-
-    ~/grackle/src/clib $ cd ../example
-    ~/grackle/src/example $ make clean 
-    ~/grackle/src/example $ make 
-
-    Compiling cxx_example.C
-    Linking
-    Success!
-  
-    ~/grackle/src/example $ ./cxx_example
-
-    The Grackle Version 2.2
-    Mercurial Branch   default
-    Mercurial Revision b4650914153d
-
-    Initializing grackle data.
-    with_radiative_cooling: 1.
-    primordial_chemistry: 3.
-    metal_cooling: 1.
-    UVbackground: 1.
-    Initializing Cloudy cooling: Metals.
-    cloudy_table_file: ../../input/CloudyData_UVB=HM2012.h5.
-    Cloudy cooling grid rank: 3.
-    Cloudy cooling grid dimensions: 29 26 161.
-    Parameter1: -10 to 4 (29 steps).
-    Parameter2: 0 to 14.849 (26 steps).
-    Temperature: 1 to 9 (161 steps).
-    Reading Cloudy Cooling dataset.
-    Reading Cloudy Heating dataset.
-    Initializing UV background.
-    Reading UV background data from ../../input/CloudyData_UVB=HM2012.h5.
-    UV background information:
-    Haardt & Madau (2012, ApJ, 746, 125) [Galaxies & Quasars]
-    z_min =  0.000
-    z_max = 15.130
-    Setting UVbackground_redshift_on to 15.130000.
-    Setting UVbackground_redshift_off to 0.000000.
-    Cooling time = -1.434987e+13 s.
-    Temperature = 4.637034e+02 K.
-    Pressure = 3.345738e+34.
-    gamma = 1.666645e+00.
-
 In order to verify that Grackle is fully functional, try :ref:`running the
 test suite <testing>`.
 
@@ -324,13 +297,39 @@ An overview of our design philosophy is provided :ref:`here <cmake_buildsystem_d
 This build-system makes integration of Grackle into simulation codes that are themselves built with CMake extremely easy.
 Steps have also been taken simplify integration of Grackle into simulation codes built with any other build-systems (they just need to call the standardized ``pkg-config`` command-line tool).
 More details about integration are provided :doc:`on this page <Integration>`.
-This current section focuses on installation.
+The current section focuses on building and installation.
+
+Basic Definitions
++++++++++++++++++
+
+For the uninitiated, the CMake build-system performs an *out-of-source* build.
+To introduce what this means we define the terms **source directory** and **build directory** and touch on the idea of an **install destination**.
+For concreteness, we continue to assume that the root of the cloned Grackle repository is located at **~/grackle**.
+
+.. COMMENT: The following is a RST "Definition List" structure (with a label so we can reference it later)
+
+.. _dir-defs:
+
+source directory
+   The root directory holding all of Grackle's source files.
+   We generally consider this to be **~/grackle/src** (a case could be made that it's actually **~/grackle**).
+
+build directory
+   The root directory where we put all build artifacts (auto-generated source/header files, object files, libraries, executables, etc.)
 
-For the uninitiated, the CMake build-system performs an out-of-source build.
-An out-of-source build places all build artifacts (auto-generated source/header files, object files, etc.) into a "build-directory."
-The build-directory is at a user-specified location that is organized into a hierarchy that resembles the source directory hierarchy.
-Cleaning up from a CMake-build is as simple as deleting this build-directory.
-In contrast, the "classic build system" performs an in-source build (because that type of build distributes build artifacts throughout the source directory hierarchy, clean up requires more complex logic encapsulated by the ``make clean`` command).
+   * for an *in-source-build* (e.g. a build performed by the classic build system) the build and source directories are comingled (i.e. build artifacts are distributed throughout the source directory hierarchy).
+   * for an *out-of-source* build, this is a location chosen so that no build artifacts are placed within the source directory
+   * for the CMake build system, this is an arbitrary, user-specified location.
+     It is conventionally placed within the root of the grackle repository and called something like **~/grackle/build**.
+     We commonly denote this location as **<build-dir>**
+
+install destination
+   Specifies where the primary products of the build process are copied during a build system's installation phase.
+   Properties of the copied products (e.g. file owners, file permissions, executable/shared library properties) may be altered.
+   More information will be provided :ref:`below <install-products>`.
+
+While cleaning up from an *in-source-build* requires special logic (commonly encoded in a ``make clean`` command), cleaning up from an *out-of-source-build* is much more straight-forward.
+To clean up from an *out-of-source-build*, you can simply delete the build directory.
 
 .. warning::
 
@@ -357,9 +356,8 @@ The remainder of this subsection is primarily intended for readers who are relat
 
    For now, we make 2 basic decisions:
 
-   #. Decide on the directory, ``<build-dir>``, where you want to build Grackle. [#f1]_
-      This is referred to as the build-directory and is generally placed at the root level of the grackle repository.
-      A common choice is ``build`` (but this is fairly arbitrary).
+   #. Decide on the :ref:`build-directory <dir-defs>`, ``<build-dir>``, where you want to build Grackle.\ [#f1]_
+      This is generally placed at the root level of the grackle repository and commonly named ``build`` (but this is fairly arbitrary).
 
    #. Decide on the installation directory prefix, ``<install-prefix>``, where Grackle will be installed.
       This is be specified via the ``CMAKE_INSTALL_PREFIX`` cmake configuration variable.
@@ -404,33 +402,9 @@ The remainder of this subsection is primarily intended for readers who are relat
 
 
 4. Test your Build.
-
-   Once you have compiled Grackle, you can run one of the provided example to test if it functions correctly.
-   These examples are automatically compiled with Grackle.
-
-   .. code-block:: shell-session
-
-      ~/grackle $ cd <build-dir>/examples
-      ~/grackle/<build-dir>/examples $ ./cxx_example
-
-   .. warning::
-
-      The examples make certain assumptions about the location of the input files.
-      The examples are only guaranteed to work if both:
-
-         1. you execute the example-binary from the same-directory where the example-binary is found
-
-         2. ``<build-dir>`` is a top-level directory in the grackle repository (e.g. something like ``my-build`` is fine, but choices like ``../my-grackle-build`` and ``my_builds/my-first-build`` are problematic).
-
-   .. note::
-
-      For reference, the Classic build-system always links Grackle against the shared-library version of Grackle and requires that Grackle is fully installed in a location known by the system (either a standard system location OR a location specified by ``LD_LIBRARY_PATH``/``DYLD_LIBRARY_PATH``).
-      In contrast, cmake automatically takes special-steps to try to ensure that each example-binary will link to the copy of the Grackle library (whether it is shared or static) that is in the ``<build-dir>``; in fact, Grackle doesn't even need to be installed to run the Grackle library.
-
-      With that said, if you compile Grackle as a shared library in a cmake build, an example-binary **might** try to use a copy of a shared grackle library found in a directory specified by ``LD_LIBRARY_PATH``/``DYLD_LIBRARY_PATH`` if one exists.
-      The exact behavior may be platform dependent and also depends on whether CMake instructs the linker to use RPATH or RUNPATH (this is not specified by the cmake docs).
-
-In order to verify that Grackle is fully functional, you can try :ref:`running the test suite <testing>`.
+   Once you have compiled Grackle, you can run one of the provided examples to test if it functions correctly.
+   More details are provided :ref:`here <how-to-run-example>`.
+   In order to verify that Grackle is fully functional, you can try :ref:`running the test suite <testing>`.
 
 .. _how_to_configure:
 
@@ -507,6 +481,7 @@ This second table highlights a subset of standardized CMake options that may als
 .. list-table:: Standard CMake Options
    :widths: 12 30 5
    :header-rows: 1
+   :name: standard-cmake-options
 
    * - Name
      - Description
@@ -517,7 +492,7 @@ This second table highlights a subset of standardized CMake options that may als
      - ``<undefined>``
 
    * - ``CMAKE_BUILD_TYPE``
-     - Specifies the desired build configuration (for single-configuration generators [#f3]_).
+     - Specifies the desired build configuration (for single-configuration generators\ [#f3]_).
        Grackle currently supports the standard choices ``Debug``, ``Release``, ``RelWithDebInfo`` and ``MinSizeRel``.
      - ``<undefined>``
 
@@ -539,6 +514,22 @@ This second table highlights a subset of standardized CMake options that may als
        This is commonly set by host-files.
      - ``<undefined>``
 
+
+.. COMMENT BLOCK
+
+   To support cross-referencing the following block of text with sphinx's `ref`
+   construct (while suppressing warnings about referencing plain text), we
+   enclose the text in RST's container directive (it won't impact rendering)
+
+.. container::
+   :name: cmake-granular-install-vars
+
+   In the (unlikely) event you need more control over :ref:`installation locations <install-products>`, the build-sytem honors values specified for standard variables like ``CMAKE_INSTALL_BINDIR``, ``CMAKE_INSTALL_LIBDIR``, ``CMAKE_INSTALL_INCLUDEDIR``.
+   More information is provided `here <https://cmake.org/cmake/help/latest/module/GNUInstallDirs.html>`__ about these cmake variables.
+
+
+
+
 There are also additional standard options for BOTH configuring other aspects of the build and for finding the correct/preferred HDF5 library and configuring the correct openmp library.
 
 Addtionally, CMake will also respect the values of certain environment variables.
@@ -575,6 +566,28 @@ The following code snippet illustrates how you might do this (for concreteness,
    ~ grackle $ cmake --build build-shared
    ~ grackle $ cmake --install build-shared
 
+.. _build-dir-product-locations:
+
+Build Directory Product Locations
++++++++++++++++++++++++++++++++++
+
+Up until now, we have been pretty vague about how the build-products are organized within the build directory.
+**This is intentional!**
+The paths to files within the build directory are an implementation detail that can and will change at any time in the future (some files could be removed entirely from the build-directory).
+We generally expect consumers to interact with most of Grackle's build products :ref:`once they are installed <install-products>`.
+
+With that said, we recognize that it can be useful to make use of certain build-products from a standalone Grackle build without requiring a full installation.
+We provide 2 mechanisms for doing this:
+
+1. The root of the build directory contains a file called **grackle-buildpaths-<CONFIG>.txt** contains paths specifying where some useful grackle utilities can be found in the build-directory (if/when they are built)
+
+   - if you have been following the above compilation instructions, you can only have 1 file in you build-directory called **grackle-buildpaths-*.txt** at a time.\ [#buildproducts1]_
+
+   - at the moment, this just contains the ``grdata`` command line tool for :ref:`managing grackle data files<manage-data-files>`.
+
+2. We provide an experimental :ref:`approach for integrating grackle <integration-consuming-grackle>` in a downstream project using a the build directory of a Grackle directory and we may add another approach in the future.\ [#buildproducts2]_
+   (Technically, the :ref:`embedded install apprach <Embed_Grackle_in_Sim_Build>` also lets you avoid fully installing Grackle, but this is a special case)
+
 .. _cmake_host-files:
 
 More About Host-Files
@@ -632,6 +645,57 @@ While embedded builds currently respect ``GRACKLE_OPTIMIZATION_FLIST_INIT``, tha
 
    * after we update the minimum required CMake version for compiling Grackle to at least 3.19, we may transition to using these features.
 
+.. _install-products:
+
+Installation Products
+---------------------
+
+We now give an overview of the products of an installation (e.g. the result of commands like ``make install`` or ``cmake --install <build-dir>``).
+
+We describe these products in terms of the :ref:`installation destination <dir-defs>`.
+Organization of the installed products has a similar description on all major platforms.\ [#installproducts1]_
+Essentially, products are distributed among a standard set of directories contained within a single root directory.
+This root directory is often called the "installation prefix" (or simply the "prefix").
+The **include** subdirectory typically holds headers, the **bin** subdirectory typically holds executables, and the **lib** subdirectory (some platform use similar names like **lib64**) holds libraries.
+
+.. tabs::
+
+   .. group-tab:: Classic Build System
+
+      Unless overriden, the **lib** subdirectory is *always* called **lib**.
+
+      Overrides are specified with :ref:`Makefile variables <classic-makefile-variable-list>` 
+      ``MACH_INSTALL_PREFIX`` controls the prefix while ``MACH_INSTALL_LIB_DIR``, ``MACH_INSTALL_INCLUDE_DIR``, and ``MACH_INSTALL_BIN_DIR`` gives finer grain control over the other variables.
+
+   .. group-tab:: CMake Build System
+
+      The default value for the **lib** subdirectory is `platform dependent <https://stackoverflow.com/a/76528304>`__ (currently either **lib** or **lib64**).
+
+      The standard ``CMAKE_INSTALL_PREFIX`` option :ref:`controls the prefix <standard-cmake-options>` while 
+      ``CMAKE_INSTALL_LIBDIR``, ``CMAKE_INSTALL_INCLUDEDIR``, and ``CMAKE_INSTALL_BINDIR`` options :ref:`provide finer control <cmake-granular-install-vars>`.
+
+A vanilla, standalone (i.e. :ref:`not an embedded build <Embed_Grackle_in_Sim_Build>`) Grackle installation provides:
+
+- The Grackle library (in the **lib** subdirectory).
+  Depending on how build system (and your choices), installation provides it as a shared library, a static library, or both.
+
+- Header files (in the **include** subdirectory).
+  More details about the header files (e.g. public headers vs. implementation details) are provided :ref:`here <public-header-files>`.
+
+- Utility Executables (in the **bin** subdirectory).
+  At the moment, this just includes the ``grdata`` command line tool for :ref:`managing grackle data files<manage-data-files>`.
+
+     .. note::
+
+        .. COMMENT: (Maybe we should just put a redirect at the root of the build-directory?)
+
+        When you use the CMake build-system, you can reliably find the ``grdata`` command line program within the build directory at **<build-dir>/grackle/bin/grdata** (this assumes that you performed a stand-alone build, with default configuration settings)
+
+        **REMINDER: Unless explicitly noted, the locations of all other installation products (and any other contents) within the build directory are considered implementation details -- they can/will change at ANY time.**
+
+
+- If you used the CMake build system, some metadata files are also included to make it :ref:`easy for other projects to consume Grackle <integration-consuming-grackle>`
+
 
 .. _compiler_toolchain_compatability:
 
@@ -681,5 +745,14 @@ For example, adding GPU-support with the likes of CUDA or HIP would involve link
 .. [#f4] Aside: performing these 2 separate CMake builds compiles the source files the same number of times as the Classic build system.
          Behind the scenes, the classic build system always compile each source file twice (once with position independent code and once without).
 
+.. [#buildproducts1] In principle, you can get multiple files if you are using a multi-configuration generation.
+                     If you don't know what this means, you really don't need to worry about it.
+
+.. [#buildproducts2] The common property to a supported integration approach that lets you use grackle from the build-directory is they don't require hardcoding assumptions about Grackle's build-directory into a downstream project's build-system.
+                     Instead, they introduce a standardized way for us, the Grackle developers, to communicate Grackle's usage requirements (and any assumptions abouts paths) to the downstream build system.
+                     
 
+.. [#installproducts1] The primary exception is for MacOS software distributed through official Apple channels.
+                       For our purposes, we (like most open-source science software) get away with treating MacOS as a generic Unix-like system.
+                       Ironically, while Windows (which we definitely don't support) may prefer some alternative organization, it is much less of an exception than MacOS.
 
diff --git a/doc/source/Integration.rst b/doc/source/Integration.rst
index a84f9c15..93230707 100644
--- a/doc/source/Integration.rst
+++ b/doc/source/Integration.rst
@@ -1,3 +1,6 @@
+
+.. _integration-consuming-grackle:
+
 Integrating Grackle into your Application
 =========================================
 
@@ -172,13 +175,16 @@ The following snippet shows a sample Makefile for compiling a sample application
 
 pkg-config also provides additional functionality, like querying version numbers, enforcing version requirements, etc.
 Most of that functionality is described in `this guide <https://people.freedesktop.org/~dbn/pkg-config-guide.html>`__.
-You can also query Grackle-specific details, such as:
+You can also query Grackle-related details, such as:
 
 * the full version string (to determine if it's a dev-version or not) via ``pkg-config --variable=GRACKLE_VERSION_STR grackle``
 
 * whether Grackle was compiled with double precision, via ``pkg-config --variable=GRACKLE_USE_DOUBLE grackle``
 
-* whether grackle was compiled with openmp, via ``pkg-config --variable=GRACKLE_USE_OPENMP grackle``
+* whether Grackle was compiled with openmp, via ``pkg-config --variable=GRACKLE_USE_OPENMP grackle``
+
+* the path to the :ref:`grdata cli tool <manage-data-files>` associated with this version of Grackle, via ``pkg-config --variable=GRACKLE_GRDATA_TOOL_PATH grackle`` (this might be useful for testing purposes)
+
 
 .. warning::
 
@@ -240,6 +246,8 @@ These properties include:
 * ``GRACKLE_USE_DOUBLE`` -- stores whether Grackle was compiled with single or double precision
 * ``GRACKLE_USE_OPENMP`` -- stores whether Grackle was compiled with OpenMP
 
+Information about the :ref:`grdata cli tool <manage-data-files>` tool that is created and built alongside this version of Grackle is exposed via the ``Grackle::grcli`` executable target.
+This can be useful for testing purposes.
 
 .. _Embed_Grackle_in_Sim_Build:
 
@@ -308,6 +316,8 @@ Care has been taken while designing the CMake build-system to ensure that the ``
 In both cases, the target provides the same custom properties to describe information about the build.
 See the :ref:`section <cmake_grackle_linking>` about ``find_package`` for more details.
 
+Additionally, information about the :ref:`grdata cli tool <manage-data-files>` tool that is created and built alongside this version of Grackle is exposed via the ``Grackle::grcli`` executable target.
+
 .. rubric:: Footnotes
 
 .. [#f1] This is required by CMake.
diff --git a/doc/source/Interaction.rst b/doc/source/Interaction.rst
index 3f93b2ea..3aabf128 100644
--- a/doc/source/Interaction.rst
+++ b/doc/source/Interaction.rst
@@ -15,57 +15,14 @@ The :ref:`Primary API <primary_functions>`, manages some of Grackle's data struc
 In contrast, the :ref:`Local API <local_functions>`, requires that the downstream application explicitly manage pointers to these same data-structures and requires that the pointers are provided as arguments to each function.
 The latter API is explicitly thread-safe as it involves no global data.
 
-.. _examples:
-
 Example Executables
 -------------------
 
 The grackle source code contains examples for C, C++, and Fortran codes.  
-They are located in the **src/example** directory and provide examples
-of calling all of grackle's functions.
-
-    * **c_example.c** - C example
-
-    * **c_local_example.c** - C example using only :ref:`local_functions`
-
-    * **cxx_example.C** - C++ example
-
-    * **cxx_omp_example.C** - C++ example using OpenMP
-
-    * **fortran_example.F** - Fortran example
-
-The instructions for building and executing the examples vary based on the build-system:
-
-.. tabs::
-
-   .. tab:: Classic Build System
-
-      Once you have already installed the grackle library, you can build the examples by typing *make* and the name of the file without extension.
-      For example, to build the C++ example, type:
-
-      .. code-block:: shell-session
-
-         $ make cxx_example
-
-      To run the example, make sure to add the path to the directory containing 
-      the installed **libgrackle.so** to your LD_LIBRARY_PATH (or 
-      DYLD_LIBRARY_PATH on Mac).
-
-
-   .. tab:: CMake Build System
-
-      By default, the examples are automatically built with the rest of Grackle.
-      The compiled example binaries can be found within *<build-dir>/example*, where *<build-dir>* is the arbitrary build-directory that you need to specify when compiling Grackle.
-
-      It's important that *<build-dir>* is a top-level directory in the grackle repository (e.g. something like *my-build* is fine, but choices like *../my-grackle-build* and *my_builds/my-first-build* are problematic).
-      If this isn't the case, then the examples won't be able to locate the input data files.
-
-      You don't need to worry about using LD_LIBRARY_PATH (or DYLD_LIBRARY_PATH on Mac) to run these examples with this build-system.
-
-.. important::
+These files illustrate how to call all of grackle's functions.
+More details are provided :ref:`here <examples>`.
 
-   The examples make certain assumptions about the location of the input files.
-   To ensure that the input files can be found, you should execute each example-binary from the same directory where the example binary is produced.
+.. _public-header-files:
 
 Header Files
 ------------
diff --git a/doc/source/Parameters.rst b/doc/source/Parameters.rst
index 7905442a..e25a6d2a 100644
--- a/doc/source/Parameters.rst
+++ b/doc/source/Parameters.rst
@@ -147,6 +147,41 @@ For all on/off integer flags, 0 is off and 1 is on.
    Path to the data file containing the metal cooling and UV background
    tables.  Default: "".
 
+.. c:var:: int grackle_data_file_options
+
+   This controls how the string passed to the :c:data:`grackle_data_file` parameter is interpretted.
+   Allowable values are represented by global constants specified in the header file.
+   The primary choices include:
+
+     * :c:macro:`!GR_DFOPT_FULLPATH_NO_CKSUM` indicates that the user wants to use the data file at an arbitrary path specified by :c:data:`grackle_data_file`.
+       This is the legacy behavior.
+       If no value is specified, we fall back to this choice.
+
+     * :c:macro:`!GR_DFOPT_MANAGED` indicates that the caller wants to use one of the standard datafiles shipped with the current version of grackle and that is managed by the :ref:`data management tool <manage-data-files>`.
+
+       * In this case, :c:data:`grackle_data_file` holds a string that **EXACTLY** matches the name of a standard data file.
+         For example, ``"CloudyData_UVB=HM2012.h5"`` is valid but "path/to/CloudyData_UVB=HM2012.h5" is **NOT** valid.
+
+       * Grackle uses the same algorithm as :ref:`the data management tool <manage-data-files>` to infer the path to data file that is explicitly associated with the current version of Grackle (if a different version of Grackle ever ships a different version of the same data file, this will never use that version).
+
+       * For safety reasons, Grackle will always validate the contents of the file; it will compute the checksum and compare it with its internal expectations.
+         If the checksums don't match Grackle will report an error.
+         The overhead of the checksum calculation is minimal and it only affects initialization of Grackle (i.e. you just pay the cost once)
+
+       * At the moment, this option is most useful when used with pygrackle (since that is currently the only way to invoke :ref:`the data management tool <manage-data-files>`).
+         In the near future, we expect this to become easier to use.
+
+       .. note::
+
+          The primary reason we validate the checksum is to protect users from the unlikely scenarios where logical bugs get introduced into the core grackle library or the :ref:`data-management-tool <manage-data-files>`.
+          The concern is that a hypothetical bug could cause the logic to silently load the wrong data file (or worse, a partially corrupted datafile) and continue operating with any indication of a problem.
+
+          With that said, we recognize that some parallel filesystems can be very fragile.
+          Thus we introduce :c:macro:`!GR_DFOPT_MANAGED_NO_CKSUM`, which is exactly the same as :c:macro:`!GR_DFOPT_MANAGED`, except that the the checksum is not computed and compared against expectations.
+          This should **ONLY** be used in a parallel operation where at least 1 of the processed is using the :c:macro:`!GR_DFOPT_MANAGED` choice.
+          (If you choose to use :c:macro:`!GR_DFOPT_MANAGED_NO_CKSUM`, be aware that you are giving up all safety checks)
+
+
 .. c:var:: float Gamma
 
    The ratio of specific heats for an ideal gas.  A direct calculation
diff --git a/doc/source/Python.rst b/doc/source/Python.rst
index 3f1938f2..f9defbfa 100644
--- a/doc/source/Python.rst
+++ b/doc/source/Python.rst
@@ -151,6 +151,17 @@ To make sure everything is installed properly, you can try invoking pygrackle fr
 
 If this command executes without raising any errors, then you have successfully installed Pygrackle.
 
+Installing DataFiles
+++++++++++++++++++++
+
+To install the datafiles in a location usable for automatic usage in the Pygrackle examples (and tests) we recommend invoking the following command (from any directory):
+
+.. code-block:: shell-session
+
+   $ python -m pygrackle fetch
+
+:ref:`This section <manage-data-files>` for more details about customizing the the location where data is stored and about managing datafiles in general.
+
 .. _pygrackle-dev:
 
 Installing Pygrackle Development Requirements
diff --git a/doc/source/Tools.rst b/doc/source/Tools.rst
new file mode 100644
index 00000000..65b66468
--- /dev/null
+++ b/doc/source/Tools.rst
@@ -0,0 +1,128 @@
+
+.. _manage-data-files:
+
+Datafile Management
+===================
+
+We provide a command line tool to optionally manage Grackle's datafiles.
+We call this the ``grdata`` tool.
+
+At a Quick Glance
+-----------------
+
+We provide 2 ways to access this tool:
+
+1. As a standalone command line application installed alongside of grackle.
+2. As a command line tool shipped :ref:`as a part of pygrackle <install-pygrackle>`.
+
+To execute the tool:
+
+.. tabs::
+
+   .. group-tab:: As a Standalone CLI
+
+      In a full, standalone Grackle installation (regardless of build-system), the ``grdata`` tool will be :ref:`one of the installed components <install-products>`.
+      If you build a standalone copy of Grackle with the CMake build-system, the build-system provides details about where to find a copy of the ``grdata`` tool :ref:`within the build-directory <build-dir-product-locations>`.
+
+      Once you locate the tool, you can invoke in with:
+
+      .. code-block:: shell-session
+
+         $ ./<path/to/grdata> <args>...
+
+      .. note::
+
+         In detail, the tool is implemented as an executable python script (it only uses the python standard library) and it relies upon a `shebang <https://en.wikipedia.org/wiki/Shebang_%28Unix%29>`__ to launch the program.
+
+         In the event that, that your machine finds an invalid python version (at this time of writing, the minimum required version is 3.6.1) or can't find any python interpretter, you have 2 options:
+
+         1. you can modify the shebang path OR
+
+         2. you can directly invoke the tool with python: ``<path/to/python> <path/to/grdata> <args>...``.
+
+
+   .. group-tab:: As a part of Pygrackle
+
+      When Pygrackle is installed, you can invoke
+
+      .. code-block:: shell-session
+
+         $ python -m pygrackle <args>...
+
+      .. note::
+
+         If you choose to install pygrackle without manually downloading the grackle repository, this tool is the most efficient way to download the files.
+
+
+
+In the sample snippets ``<args>...`` is replaced with one or more command-line arguments.
+For example, ``fetch`` will invoke a subcommand that downloads all associated files (if they aren't already downloaded).
+You can use the ``--help`` option to get a list of all subcommands.
+You can also pass the ``--help`` option after the name of a subcommand (e.g. you can use ``fetch --help``) to get more details about subcommand-specific options.
+
+
+The pygrackle examples and the pygrackle tests all rely upon this functionality.
+The Grackle C library has support for access the datafiles managed by this tool.
+Some of the examples may soon rely upon the functionality.
+
+.. _grdata-versioning-and-deduplication:
+
+.. important::
+
+   Instances of the grdata tool are associated with a single version of Grackle (if you are using Pygrackle, the version of the core Grackle c-library is the relevant version number).
+
+   Internal (Py)Grackle logic that automatically locate and use the files managed by this tool, will **only** work if the files have been "fetched" by versions of the grdata tool that **exactly** match.
+   In detail, matching version strings have identical major, minor, and micro version numbers.
+   Both version strings must either have an identical suffix (namely "-dev") or they must both lack a suffix.
+   For development versions of Grackle, we do **NOT** require the commit-hashes to exactly match.
+
+   Consider the following hypothetical scenario:
+
+   * You install version 3.4.0 of Grackle and use the associated version of ``grdata`` to install datafiles.
+
+      * The version of Pygrackle that wraps 3.4.0 will also be able to automatically locate these datafiles
+
+      * If you install a different version of Grackle (say version 3.3.1-dev or 3.4.0-dev or 3.4.1 or 3.4.1-dev) or a copy of Pygrackle that wraps a different version, you will need to use the copy of ``grdata`` associated with that copy to fetch the datafiles.
+
+   * You install the version of Pygrackle that wraps version 3.4.0 of Grackle and use it to install the associated datafiles
+
+      * Any build/installation of Grackle version 3.4.0 (whether or not it is wrapped by Pygrackle) will also be able to locate and use these datafiles 
+ 
+   As we will discuss, the grdata tool takes a steps to deduplicate the storage used by the datafiles for different Grackle versions.
+   For example, if you use ``grdata`` instances to associate 5 distinct grackle versions only one copy of the **CloudyData_UVB=HM2012_high_density.h5** file's data will be stored on disk (i.e. that data will only take ~6.74 MB of disk space rather than ~33.7 MB)
+
+Description
+-----------
+
+.. include:: ../../src/python/pygrackle/utilities/grdata.py
+   :start-after: [[[BEGIN-SECTION:DESCRIPTION]]]
+   :end-before: [[[END-SECTION:DESCRIPTION]]]
+
+
+
+Motivation
+----------
+
+.. include:: ../../src/python/pygrackle/utilities/grdata.py
+   :start-after: [[[BEGIN-SECTION:MOTIVATION]]]
+   :end-before: [[[END-SECTION:MOTIVATION]]]
+
+
+How it works
+------------
+
+.. include:: ../../src/python/pygrackle/utilities/grdata.py
+   :start-after: [[[BEGIN-SECTION:INTERNALS-OVERVIEW]]]
+   :end-before: [[[END-SECTION:INTERNALS-OVERVIEW]]]
+
+
+Sample Directory Structure
+++++++++++++++++++++++++++
+
+Down below, we sketch out what the directory-structure might look like:
+
+
+.. literalinclude:: ../../src/python/pygrackle/utilities/grdata.py
+   :language: none
+   :start-after: [[[BEGIN:DIRECTORY-CARTOON]]]
+   :end-before: [[[END:DIRECTORY-CARTOON]]]
diff --git a/doc/source/index.rst b/doc/source/index.rst
index a1c007c3..51f314aa 100644
--- a/doc/source/index.rst
+++ b/doc/source/index.rst
@@ -39,6 +39,7 @@ Contents:
    :maxdepth: 2
 
    Installation.rst
+   Examples.rst
    Testing.rst
    Integration.rst
    Interaction.rst
@@ -47,6 +48,7 @@ Contents:
    Reference.rst
    Versioning.rst
    Python.rst
+   Tools.rst
    Conduct.rst
    Contributing.rst
    Help.rst
diff --git a/external/picohash.h b/external/picohash.h
new file mode 100644
index 00000000..28b37422
--- /dev/null
+++ b/external/picohash.h
@@ -0,0 +1,754 @@
+/*
+ * The code is placed under public domain by Kazuho Oku <kazuhooku@gmail.com>.
+ *
+ * The MD5 implementation is based on a public domain implementation written by
+ * Solar Designer <solar@openwall.com> in 2001, which is used by Dovecot.
+ *
+ * The SHA1 implementation is based on a public domain implementation written
+ * by Wei Dai and other contributors for libcrypt, used also in liboauth.
+ *
+ * The SHA224/SHA256 implementation is based on a public domain implementation
+ * by Sam Hocevar <sam@hocevar.net> for LibTomCrypt.
+ */
+#ifndef _picohash_h_
+#define _picohash_h_
+
+#include <assert.h>
+#include <inttypes.h>
+#include <string.h>
+
+#ifdef __BIG_ENDIAN__
+#define _PICOHASH_BIG_ENDIAN
+#elif defined __LITTLE_ENDIAN__
+/* override */
+#elif defined __BYTE_ORDER
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+#define _PICOHASH_BIG_ENDIAN
+#endif
+#elif !defined(_WIN32)
+#include <endian.h> // machine/endian.h
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+#define _PICOHASH_BIG_ENDIAN
+#endif
+#endif
+
+#define PICOHASH_MD5_BLOCK_LENGTH 64
+#define PICOHASH_MD5_DIGEST_LENGTH 16
+
+typedef struct _picohash_md5_ctx_t {
+    uint_fast32_t lo, hi;
+    uint_fast32_t a, b, c, d;
+    unsigned char buffer[64];
+    uint_fast32_t block[PICOHASH_MD5_DIGEST_LENGTH];
+    const void *(*_body)(struct _picohash_md5_ctx_t *ctx, const void *data, size_t size);
+} _picohash_md5_ctx_t;
+
+static void _picohash_md5_init(_picohash_md5_ctx_t *ctx);
+static void _picohash_md5_update(_picohash_md5_ctx_t *ctx, const void *data, size_t size);
+static void _picohash_md5_final(_picohash_md5_ctx_t *ctx, void *digest);
+
+#define PICOHASH_SHA1_BLOCK_LENGTH 64
+#define PICOHASH_SHA1_DIGEST_LENGTH 20
+
+typedef struct {
+    uint32_t buffer[PICOHASH_SHA1_BLOCK_LENGTH / 4];
+    uint32_t state[PICOHASH_SHA1_DIGEST_LENGTH / 4];
+    uint64_t byteCount;
+    uint8_t bufferOffset;
+} _picohash_sha1_ctx_t;
+
+static void _picohash_sha1_init(_picohash_sha1_ctx_t *ctx);
+static void _picohash_sha1_update(_picohash_sha1_ctx_t *ctx, const void *input, size_t len);
+static void _picohash_sha1_final(_picohash_sha1_ctx_t *ctx, void *digest);
+
+#define PICOHASH_SHA256_BLOCK_LENGTH 64
+#define PICOHASH_SHA256_DIGEST_LENGTH 32
+#define PICOHASH_SHA224_BLOCK_LENGTH PICOHASH_SHA256_BLOCK_LENGTH
+#define PICOHASH_SHA224_DIGEST_LENGTH 28
+
+typedef struct {
+    uint64_t length;
+    uint32_t state[PICOHASH_SHA256_DIGEST_LENGTH / 4];
+    uint32_t curlen;
+    unsigned char buf[PICOHASH_SHA256_BLOCK_LENGTH];
+} _picohash_sha256_ctx_t;
+
+static void _picohash_sha256_init(_picohash_sha256_ctx_t *ctx);
+static void _picohash_sha256_update(_picohash_sha256_ctx_t *ctx, const void *data, size_t len);
+static void _picohash_sha256_final(_picohash_sha256_ctx_t *ctx, void *digest);
+static void _picohash_sha224_init(_picohash_sha256_ctx_t *ctx);
+static void _picohash_sha224_final(_picohash_sha256_ctx_t *ctx, void *digest);
+
+#define PICOHASH_MAX_BLOCK_LENGTH 64
+#define PICOHASH_MAX_DIGEST_LENGTH 32
+
+typedef struct {
+    union {
+        _picohash_md5_ctx_t _md5;
+        _picohash_sha1_ctx_t _sha1;
+        _picohash_sha256_ctx_t _sha256;
+    };
+    size_t block_length;
+    size_t digest_length;
+    void (*_reset)(void *ctx);
+    void (*_update)(void *ctx, const void *input, size_t len);
+    void (*_final)(void *ctx, void *digest);
+    struct {
+        unsigned char key[PICOHASH_MAX_BLOCK_LENGTH];
+        void (*hash_reset)(void *ctx);
+        void (*hash_final)(void *ctx, void *digest);
+    } _hmac;
+} picohash_ctx_t;
+
+static void picohash_init_md5(picohash_ctx_t *ctx);
+static void picohash_init_sha1(picohash_ctx_t *ctx);
+static void picohash_init_sha224(picohash_ctx_t *ctx);
+static void picohash_init_sha256(picohash_ctx_t *ctx);
+static void picohash_update(picohash_ctx_t *ctx, const void *input, size_t len);
+static void picohash_final(picohash_ctx_t *ctx, void *digest);
+static void picohash_reset(picohash_ctx_t *ctx);
+
+static void picohash_init_hmac(picohash_ctx_t *ctx, void (*initf)(picohash_ctx_t *), const void *key, size_t key_len);
+
+/* following are private definitions */
+
+/*
+ * The basic MD5 functions.
+ *
+ * F is optimized compared to its RFC 1321 definition just like in Colin
+ * Plumb's implementation.
+ */
+#define _PICOHASH_MD5_F(x, y, z) ((z) ^ ((x) & ((y) ^ (z))))
+#define _PICOHASH_MD5_G(x, y, z) ((y) ^ ((z) & ((x) ^ (y))))
+#define _PICOHASH_MD5_H(x, y, z) ((x) ^ (y) ^ (z))
+#define _PICOHASH_MD5_I(x, y, z) ((y) ^ ((x) | ~(z)))
+
+/*
+ * The MD5 transformation for all four rounds.
+ */
+#define _PICOHASH_MD5_STEP(f, a, b, c, d, x, t, s)                                                                                 \
+    (a) += f((b), (c), (d)) + (x) + (t);                                                                                           \
+    (a) = (((a) << (s)) | (((a)&0xffffffff) >> (32 - (s))));                                                                       \
+    (a) += (b);
+
+/*
+ * SET reads 4 input bytes in little-endian byte order and stores them
+ * in a properly aligned word in host byte order.
+ *
+ * The check for little-endian architectures which tolerate unaligned
+ * memory accesses is just an optimization.  Nothing will break if it
+ * doesn't work.
+ */
+#if defined(__i386__) || defined(__x86_64__) || defined(__vax__)
+#define _PICOHASH_MD5_SET(n) (*(const uint32_t *)&ptr[(n)*4])
+#define _PICOHASH_MD5_GET(n) _PICOHASH_MD5_SET(n)
+#else
+#define _PICOHASH_MD5_SET(n)                                                                                                       \
+    (ctx->block[(n)] = (uint_fast32_t)ptr[(n)*4] | ((uint_fast32_t)ptr[(n)*4 + 1] << 8) | ((uint_fast32_t)ptr[(n)*4 + 2] << 16) |  \
+                       ((uint_fast32_t)ptr[(n)*4 + 3] << 24))
+#define _PICOHASH_MD5_GET(n) (ctx->block[(n)])
+#endif
+
+/*
+ * This processes one or more 64-byte data blocks, but does NOT update
+ * the bit counters.  There're no alignment requirements.
+ */
+static const void *_picohash_md5_body(_picohash_md5_ctx_t *ctx, const void *data, size_t size)
+{
+    const unsigned char *ptr;
+    uint_fast32_t a, b, c, d;
+    uint_fast32_t saved_a, saved_b, saved_c, saved_d;
+
+    ptr = data;
+
+    a = ctx->a;
+    b = ctx->b;
+    c = ctx->c;
+    d = ctx->d;
+
+    do {
+        saved_a = a;
+        saved_b = b;
+        saved_c = c;
+        saved_d = d;
+
+        /* Round 1 */
+        _PICOHASH_MD5_STEP(_PICOHASH_MD5_F, a, b, c, d, _PICOHASH_MD5_SET(0), 0xd76aa478, 7)
+        _PICOHASH_MD5_STEP(_PICOHASH_MD5_F, d, a, b, c, _PICOHASH_MD5_SET(1), 0xe8c7b756, 12)
+        _PICOHASH_MD5_STEP(_PICOHASH_MD5_F, c, d, a, b, _PICOHASH_MD5_SET(2), 0x242070db, 17)
+        _PICOHASH_MD5_STEP(_PICOHASH_MD5_F, b, c, d, a, _PICOHASH_MD5_SET(3), 0xc1bdceee, 22)
+        _PICOHASH_MD5_STEP(_PICOHASH_MD5_F, a, b, c, d, _PICOHASH_MD5_SET(4), 0xf57c0faf, 7)
+        _PICOHASH_MD5_STEP(_PICOHASH_MD5_F, d, a, b, c, _PICOHASH_MD5_SET(5), 0x4787c62a, 12)
+        _PICOHASH_MD5_STEP(_PICOHASH_MD5_F, c, d, a, b, _PICOHASH_MD5_SET(6), 0xa8304613, 17)
+        _PICOHASH_MD5_STEP(_PICOHASH_MD5_F, b, c, d, a, _PICOHASH_MD5_SET(7), 0xfd469501, 22)
+        _PICOHASH_MD5_STEP(_PICOHASH_MD5_F, a, b, c, d, _PICOHASH_MD5_SET(8), 0x698098d8, 7)
+        _PICOHASH_MD5_STEP(_PICOHASH_MD5_F, d, a, b, c, _PICOHASH_MD5_SET(9), 0x8b44f7af, 12)
+        _PICOHASH_MD5_STEP(_PICOHASH_MD5_F, c, d, a, b, _PICOHASH_MD5_SET(10), 0xffff5bb1, 17)
+        _PICOHASH_MD5_STEP(_PICOHASH_MD5_F, b, c, d, a, _PICOHASH_MD5_SET(11), 0x895cd7be, 22)
+        _PICOHASH_MD5_STEP(_PICOHASH_MD5_F, a, b, c, d, _PICOHASH_MD5_SET(12), 0x6b901122, 7)
+        _PICOHASH_MD5_STEP(_PICOHASH_MD5_F, d, a, b, c, _PICOHASH_MD5_SET(13), 0xfd987193, 12)
+        _PICOHASH_MD5_STEP(_PICOHASH_MD5_F, c, d, a, b, _PICOHASH_MD5_SET(14), 0xa679438e, 17)
+        _PICOHASH_MD5_STEP(_PICOHASH_MD5_F, b, c, d, a, _PICOHASH_MD5_SET(15), 0x49b40821, 22)
+
+        /* Round 2 */
+        _PICOHASH_MD5_STEP(_PICOHASH_MD5_G, a, b, c, d, _PICOHASH_MD5_GET(1), 0xf61e2562, 5)
+        _PICOHASH_MD5_STEP(_PICOHASH_MD5_G, d, a, b, c, _PICOHASH_MD5_GET(6), 0xc040b340, 9)
+        _PICOHASH_MD5_STEP(_PICOHASH_MD5_G, c, d, a, b, _PICOHASH_MD5_GET(11), 0x265e5a51, 14)
+        _PICOHASH_MD5_STEP(_PICOHASH_MD5_G, b, c, d, a, _PICOHASH_MD5_GET(0), 0xe9b6c7aa, 20)
+        _PICOHASH_MD5_STEP(_PICOHASH_MD5_G, a, b, c, d, _PICOHASH_MD5_GET(5), 0xd62f105d, 5)
+        _PICOHASH_MD5_STEP(_PICOHASH_MD5_G, d, a, b, c, _PICOHASH_MD5_GET(10), 0x02441453, 9)
+        _PICOHASH_MD5_STEP(_PICOHASH_MD5_G, c, d, a, b, _PICOHASH_MD5_GET(15), 0xd8a1e681, 14)
+        _PICOHASH_MD5_STEP(_PICOHASH_MD5_G, b, c, d, a, _PICOHASH_MD5_GET(4), 0xe7d3fbc8, 20)
+        _PICOHASH_MD5_STEP(_PICOHASH_MD5_G, a, b, c, d, _PICOHASH_MD5_GET(9), 0x21e1cde6, 5)
+        _PICOHASH_MD5_STEP(_PICOHASH_MD5_G, d, a, b, c, _PICOHASH_MD5_GET(14), 0xc33707d6, 9)
+        _PICOHASH_MD5_STEP(_PICOHASH_MD5_G, c, d, a, b, _PICOHASH_MD5_GET(3), 0xf4d50d87, 14)
+        _PICOHASH_MD5_STEP(_PICOHASH_MD5_G, b, c, d, a, _PICOHASH_MD5_GET(8), 0x455a14ed, 20)
+        _PICOHASH_MD5_STEP(_PICOHASH_MD5_G, a, b, c, d, _PICOHASH_MD5_GET(13), 0xa9e3e905, 5)
+        _PICOHASH_MD5_STEP(_PICOHASH_MD5_G, d, a, b, c, _PICOHASH_MD5_GET(2), 0xfcefa3f8, 9)
+        _PICOHASH_MD5_STEP(_PICOHASH_MD5_G, c, d, a, b, _PICOHASH_MD5_GET(7), 0x676f02d9, 14)
+        _PICOHASH_MD5_STEP(_PICOHASH_MD5_G, b, c, d, a, _PICOHASH_MD5_GET(12), 0x8d2a4c8a, 20)
+
+        /* Round 3 */
+        _PICOHASH_MD5_STEP(_PICOHASH_MD5_H, a, b, c, d, _PICOHASH_MD5_GET(5), 0xfffa3942, 4)
+        _PICOHASH_MD5_STEP(_PICOHASH_MD5_H, d, a, b, c, _PICOHASH_MD5_GET(8), 0x8771f681, 11)
+        _PICOHASH_MD5_STEP(_PICOHASH_MD5_H, c, d, a, b, _PICOHASH_MD5_GET(11), 0x6d9d6122, 16)
+        _PICOHASH_MD5_STEP(_PICOHASH_MD5_H, b, c, d, a, _PICOHASH_MD5_GET(14), 0xfde5380c, 23)
+        _PICOHASH_MD5_STEP(_PICOHASH_MD5_H, a, b, c, d, _PICOHASH_MD5_GET(1), 0xa4beea44, 4)
+        _PICOHASH_MD5_STEP(_PICOHASH_MD5_H, d, a, b, c, _PICOHASH_MD5_GET(4), 0x4bdecfa9, 11)
+        _PICOHASH_MD5_STEP(_PICOHASH_MD5_H, c, d, a, b, _PICOHASH_MD5_GET(7), 0xf6bb4b60, 16)
+        _PICOHASH_MD5_STEP(_PICOHASH_MD5_H, b, c, d, a, _PICOHASH_MD5_GET(10), 0xbebfbc70, 23)
+        _PICOHASH_MD5_STEP(_PICOHASH_MD5_H, a, b, c, d, _PICOHASH_MD5_GET(13), 0x289b7ec6, 4)
+        _PICOHASH_MD5_STEP(_PICOHASH_MD5_H, d, a, b, c, _PICOHASH_MD5_GET(0), 0xeaa127fa, 11)
+        _PICOHASH_MD5_STEP(_PICOHASH_MD5_H, c, d, a, b, _PICOHASH_MD5_GET(3), 0xd4ef3085, 16)
+        _PICOHASH_MD5_STEP(_PICOHASH_MD5_H, b, c, d, a, _PICOHASH_MD5_GET(6), 0x04881d05, 23)
+        _PICOHASH_MD5_STEP(_PICOHASH_MD5_H, a, b, c, d, _PICOHASH_MD5_GET(9), 0xd9d4d039, 4)
+        _PICOHASH_MD5_STEP(_PICOHASH_MD5_H, d, a, b, c, _PICOHASH_MD5_GET(12), 0xe6db99e5, 11)
+        _PICOHASH_MD5_STEP(_PICOHASH_MD5_H, c, d, a, b, _PICOHASH_MD5_GET(15), 0x1fa27cf8, 16)
+        _PICOHASH_MD5_STEP(_PICOHASH_MD5_H, b, c, d, a, _PICOHASH_MD5_GET(2), 0xc4ac5665, 23)
+
+        /* Round 4 */
+        _PICOHASH_MD5_STEP(_PICOHASH_MD5_I, a, b, c, d, _PICOHASH_MD5_GET(0), 0xf4292244, 6)
+        _PICOHASH_MD5_STEP(_PICOHASH_MD5_I, d, a, b, c, _PICOHASH_MD5_GET(7), 0x432aff97, 10)
+        _PICOHASH_MD5_STEP(_PICOHASH_MD5_I, c, d, a, b, _PICOHASH_MD5_GET(14), 0xab9423a7, 15)
+        _PICOHASH_MD5_STEP(_PICOHASH_MD5_I, b, c, d, a, _PICOHASH_MD5_GET(5), 0xfc93a039, 21)
+        _PICOHASH_MD5_STEP(_PICOHASH_MD5_I, a, b, c, d, _PICOHASH_MD5_GET(12), 0x655b59c3, 6)
+        _PICOHASH_MD5_STEP(_PICOHASH_MD5_I, d, a, b, c, _PICOHASH_MD5_GET(3), 0x8f0ccc92, 10)
+        _PICOHASH_MD5_STEP(_PICOHASH_MD5_I, c, d, a, b, _PICOHASH_MD5_GET(10), 0xffeff47d, 15)
+        _PICOHASH_MD5_STEP(_PICOHASH_MD5_I, b, c, d, a, _PICOHASH_MD5_GET(1), 0x85845dd1, 21)
+        _PICOHASH_MD5_STEP(_PICOHASH_MD5_I, a, b, c, d, _PICOHASH_MD5_GET(8), 0x6fa87e4f, 6)
+        _PICOHASH_MD5_STEP(_PICOHASH_MD5_I, d, a, b, c, _PICOHASH_MD5_GET(15), 0xfe2ce6e0, 10)
+        _PICOHASH_MD5_STEP(_PICOHASH_MD5_I, c, d, a, b, _PICOHASH_MD5_GET(6), 0xa3014314, 15)
+        _PICOHASH_MD5_STEP(_PICOHASH_MD5_I, b, c, d, a, _PICOHASH_MD5_GET(13), 0x4e0811a1, 21)
+        _PICOHASH_MD5_STEP(_PICOHASH_MD5_I, a, b, c, d, _PICOHASH_MD5_GET(4), 0xf7537e82, 6)
+        _PICOHASH_MD5_STEP(_PICOHASH_MD5_I, d, a, b, c, _PICOHASH_MD5_GET(11), 0xbd3af235, 10)
+        _PICOHASH_MD5_STEP(_PICOHASH_MD5_I, c, d, a, b, _PICOHASH_MD5_GET(2), 0x2ad7d2bb, 15)
+        _PICOHASH_MD5_STEP(_PICOHASH_MD5_I, b, c, d, a, _PICOHASH_MD5_GET(9), 0xeb86d391, 21)
+
+        a += saved_a;
+        b += saved_b;
+        c += saved_c;
+        d += saved_d;
+
+        ptr += 64;
+    } while (size -= 64);
+
+    ctx->a = a;
+    ctx->b = b;
+    ctx->c = c;
+    ctx->d = d;
+
+    return ptr;
+}
+
+inline void _picohash_md5_init(_picohash_md5_ctx_t *ctx)
+{
+    ctx->a = 0x67452301;
+    ctx->b = 0xefcdab89;
+    ctx->c = 0x98badcfe;
+    ctx->d = 0x10325476;
+
+    ctx->lo = 0;
+    ctx->hi = 0;
+
+    ctx->_body = _picohash_md5_body;
+}
+
+inline void _picohash_md5_update(_picohash_md5_ctx_t *ctx, const void *data, size_t size)
+{
+    uint_fast32_t saved_lo;
+    unsigned long used, free;
+
+    saved_lo = ctx->lo;
+    if ((ctx->lo = (saved_lo + size) & 0x1fffffff) < saved_lo)
+        ctx->hi++;
+    ctx->hi += size >> 29;
+
+    used = saved_lo & 0x3f;
+
+    if (used) {
+        free = 64 - used;
+
+        if (size < free) {
+            memcpy(&ctx->buffer[used], data, size);
+            return;
+        }
+
+        memcpy(&ctx->buffer[used], data, free);
+        data = (const unsigned char *)data + free;
+        size -= free;
+        ctx->_body(ctx, ctx->buffer, 64);
+    }
+
+    if (size >= 64) {
+        data = ctx->_body(ctx, data, size & ~(unsigned long)0x3f);
+        size &= 0x3f;
+    }
+
+    memcpy(ctx->buffer, data, size);
+}
+
+inline void _picohash_md5_final(_picohash_md5_ctx_t *ctx, void *_digest)
+{
+    unsigned char *digest = _digest;
+    unsigned long used, free;
+
+    used = ctx->lo & 0x3f;
+
+    ctx->buffer[used++] = 0x80;
+
+    free = 64 - used;
+
+    if (free < 8) {
+        memset(&ctx->buffer[used], 0, free);
+        ctx->_body(ctx, ctx->buffer, 64);
+        used = 0;
+        free = 64;
+    }
+
+    memset(&ctx->buffer[used], 0, free - 8);
+
+    ctx->lo <<= 3;
+    ctx->buffer[56] = ctx->lo;
+    ctx->buffer[57] = ctx->lo >> 8;
+    ctx->buffer[58] = ctx->lo >> 16;
+    ctx->buffer[59] = ctx->lo >> 24;
+    ctx->buffer[60] = ctx->hi;
+    ctx->buffer[61] = ctx->hi >> 8;
+    ctx->buffer[62] = ctx->hi >> 16;
+    ctx->buffer[63] = ctx->hi >> 24;
+
+    ctx->_body(ctx, ctx->buffer, 64);
+
+    digest[0] = ctx->a;
+    digest[1] = ctx->a >> 8;
+    digest[2] = ctx->a >> 16;
+    digest[3] = ctx->a >> 24;
+    digest[4] = ctx->b;
+    digest[5] = ctx->b >> 8;
+    digest[6] = ctx->b >> 16;
+    digest[7] = ctx->b >> 24;
+    digest[8] = ctx->c;
+    digest[9] = ctx->c >> 8;
+    digest[10] = ctx->c >> 16;
+    digest[11] = ctx->c >> 24;
+    digest[12] = ctx->d;
+    digest[13] = ctx->d >> 8;
+    digest[14] = ctx->d >> 16;
+    digest[15] = ctx->d >> 24;
+
+    memset(ctx, 0, sizeof(*ctx));
+}
+
+#define _PICOHASH_SHA1_K0 0x5a827999
+#define _PICOHASH_SHA1_K20 0x6ed9eba1
+#define _PICOHASH_SHA1_K40 0x8f1bbcdc
+#define _PICOHASH_SHA1_K60 0xca62c1d6
+
+static inline uint32_t _picohash_sha1_rol32(uint32_t number, uint8_t bits)
+{
+    return ((number << bits) | (number >> (32 - bits)));
+}
+
+static inline void _picohash_sha1_hash_block(_picohash_sha1_ctx_t *s)
+{
+    uint8_t i;
+    uint32_t a, b, c, d, e, t;
+
+    a = s->state[0];
+    b = s->state[1];
+    c = s->state[2];
+    d = s->state[3];
+    e = s->state[4];
+    for (i = 0; i < 80; i++) {
+        if (i >= 16) {
+            t = s->buffer[(i + 13) & 15] ^ s->buffer[(i + 8) & 15] ^ s->buffer[(i + 2) & 15] ^ s->buffer[i & 15];
+            s->buffer[i & 15] = _picohash_sha1_rol32(t, 1);
+        }
+        if (i < 20) {
+            t = (d ^ (b & (c ^ d))) + _PICOHASH_SHA1_K0;
+        } else if (i < 40) {
+            t = (b ^ c ^ d) + _PICOHASH_SHA1_K20;
+        } else if (i < 60) {
+            t = ((b & c) | (d & (b | c))) + _PICOHASH_SHA1_K40;
+        } else {
+            t = (b ^ c ^ d) + _PICOHASH_SHA1_K60;
+        }
+        t += _picohash_sha1_rol32(a, 5) + e + s->buffer[i & 15];
+        e = d;
+        d = c;
+        c = _picohash_sha1_rol32(b, 30);
+        b = a;
+        a = t;
+    }
+    s->state[0] += a;
+    s->state[1] += b;
+    s->state[2] += c;
+    s->state[3] += d;
+    s->state[4] += e;
+}
+
+static inline void _picohash_sha1_add_uncounted(_picohash_sha1_ctx_t *s, uint8_t data)
+{
+    uint8_t *const b = (uint8_t *)s->buffer;
+#ifdef _PICOHASH_BIG_ENDIAN
+    b[s->bufferOffset] = data;
+#else
+    b[s->bufferOffset ^ 3] = data;
+#endif
+    s->bufferOffset++;
+    if (s->bufferOffset == PICOHASH_SHA1_BLOCK_LENGTH) {
+        _picohash_sha1_hash_block(s);
+        s->bufferOffset = 0;
+    }
+}
+
+inline void _picohash_sha1_init(_picohash_sha1_ctx_t *s)
+{
+    s->state[0] = 0x67452301;
+    s->state[1] = 0xefcdab89;
+    s->state[2] = 0x98badcfe;
+    s->state[3] = 0x10325476;
+    s->state[4] = 0xc3d2e1f0;
+    s->byteCount = 0;
+    s->bufferOffset = 0;
+}
+
+inline void _picohash_sha1_update(_picohash_sha1_ctx_t *s, const void *_data, size_t len)
+{
+    const uint8_t *data = _data;
+    for (; len != 0; --len) {
+        ++s->byteCount;
+        _picohash_sha1_add_uncounted(s, *data++);
+    }
+}
+
+inline void _picohash_sha1_final(_picohash_sha1_ctx_t *s, void *digest)
+{
+    // Pad with 0x80 followed by 0x00 until the end of the block
+    _picohash_sha1_add_uncounted(s, 0x80);
+    while (s->bufferOffset != 56)
+        _picohash_sha1_add_uncounted(s, 0x00);
+
+    // Append length in the last 8 bytes
+    _picohash_sha1_add_uncounted(s, s->byteCount >> 53); // Shifting to multiply by 8
+    _picohash_sha1_add_uncounted(s, s->byteCount >> 45); // as SHA-1 supports bitstreams as well as
+    _picohash_sha1_add_uncounted(s, s->byteCount >> 37); // byte.
+    _picohash_sha1_add_uncounted(s, s->byteCount >> 29);
+    _picohash_sha1_add_uncounted(s, s->byteCount >> 21);
+    _picohash_sha1_add_uncounted(s, s->byteCount >> 13);
+    _picohash_sha1_add_uncounted(s, s->byteCount >> 5);
+    _picohash_sha1_add_uncounted(s, s->byteCount << 3);
+
+#ifndef SHA_BIG_ENDIAN
+    { // Swap byte order back
+        int i;
+        for (i = 0; i < 5; i++) {
+            s->state[i] = (((s->state[i]) << 24) & 0xff000000) | (((s->state[i]) << 8) & 0x00ff0000) |
+                          (((s->state[i]) >> 8) & 0x0000ff00) | (((s->state[i]) >> 24) & 0x000000ff);
+        }
+    }
+#endif
+
+    memcpy(digest, s->state, sizeof(s->state));
+}
+
+#define _picohash_sha256_ch(x, y, z) (z ^ (x & (y ^ z)))
+#define _picohash_sha256_maj(x, y, z) (((x | y) & z) | (x & y))
+#define _picohash_sha256_s(x, y)                                                                                                   \
+    (((((uint32_t)(x)&0xFFFFFFFFUL) >> (uint32_t)((y)&31)) | ((uint32_t)(x) << (uint32_t)(32 - ((y)&31)))) & 0xFFFFFFFFUL)
+#define _picohash_sha256_r(x, n) (((x)&0xFFFFFFFFUL) >> (n))
+#define _picohash_sha256_sigma0(x) (_picohash_sha256_s(x, 2) ^ _picohash_sha256_s(x, 13) ^ _picohash_sha256_s(x, 22))
+#define _picohash_sha256_sigma1(x) (_picohash_sha256_s(x, 6) ^ _picohash_sha256_s(x, 11) ^ _picohash_sha256_s(x, 25))
+#define _picohash_sha256_gamma0(x) (_picohash_sha256_s(x, 7) ^ _picohash_sha256_s(x, 18) ^ _picohash_sha256_r(x, 3))
+#define _picohash_sha256_gamma1(x) (_picohash_sha256_s(x, 17) ^ _picohash_sha256_s(x, 19) ^ _picohash_sha256_r(x, 10))
+#define _picohash_sha256_rnd(a, b, c, d, e, f, g, h, i)                                                                            \
+    t0 = h + _picohash_sha256_sigma1(e) + _picohash_sha256_ch(e, f, g) + K[i] + W[i];                                              \
+    t1 = _picohash_sha256_sigma0(a) + _picohash_sha256_maj(a, b, c);                                                               \
+    d += t0;                                                                                                                       \
+    h = t0 + t1;
+
+static inline void _picohash_sha256_compress(_picohash_sha256_ctx_t *ctx, unsigned char *buf)
+{
+    static const uint32_t K[64] = {
+        0x428a2f98UL, 0x71374491UL, 0xb5c0fbcfUL, 0xe9b5dba5UL, 0x3956c25bUL, 0x59f111f1UL, 0x923f82a4UL, 0xab1c5ed5UL,
+        0xd807aa98UL, 0x12835b01UL, 0x243185beUL, 0x550c7dc3UL, 0x72be5d74UL, 0x80deb1feUL, 0x9bdc06a7UL, 0xc19bf174UL,
+        0xe49b69c1UL, 0xefbe4786UL, 0x0fc19dc6UL, 0x240ca1ccUL, 0x2de92c6fUL, 0x4a7484aaUL, 0x5cb0a9dcUL, 0x76f988daUL,
+        0x983e5152UL, 0xa831c66dUL, 0xb00327c8UL, 0xbf597fc7UL, 0xc6e00bf3UL, 0xd5a79147UL, 0x06ca6351UL, 0x14292967UL,
+        0x27b70a85UL, 0x2e1b2138UL, 0x4d2c6dfcUL, 0x53380d13UL, 0x650a7354UL, 0x766a0abbUL, 0x81c2c92eUL, 0x92722c85UL,
+        0xa2bfe8a1UL, 0xa81a664bUL, 0xc24b8b70UL, 0xc76c51a3UL, 0xd192e819UL, 0xd6990624UL, 0xf40e3585UL, 0x106aa070UL,
+        0x19a4c116UL, 0x1e376c08UL, 0x2748774cUL, 0x34b0bcb5UL, 0x391c0cb3UL, 0x4ed8aa4aUL, 0x5b9cca4fUL, 0x682e6ff3UL,
+        0x748f82eeUL, 0x78a5636fUL, 0x84c87814UL, 0x8cc70208UL, 0x90befffaUL, 0xa4506cebUL, 0xbef9a3f7UL, 0xc67178f2UL};
+    uint32_t S[8], W[64], t, t0, t1;
+    int i;
+
+    /* copy state into S */
+    for (i = 0; i < 8; i++)
+        S[i] = ctx->state[i];
+
+    /* copy the state into 512-bits into W[0..15] */
+    for (i = 0; i < 16; i++)
+        W[i] =
+            (uint32_t)buf[4 * i] << 24 | (uint32_t)buf[4 * i + 1] << 16 | (uint32_t)buf[4 * i + 2] << 8 | (uint32_t)buf[4 * i + 3];
+
+    /* fill W[16..63] */
+    for (i = 16; i < 64; i++)
+        W[i] = _picohash_sha256_gamma1(W[i - 2]) + W[i - 7] + _picohash_sha256_gamma0(W[i - 15]) + W[i - 16];
+
+    /* Compress */
+    for (i = 0; i < 64; ++i) {
+        _picohash_sha256_rnd(S[0], S[1], S[2], S[3], S[4], S[5], S[6], S[7], i);
+        t = S[7];
+        S[7] = S[6];
+        S[6] = S[5];
+        S[5] = S[4];
+        S[4] = S[3];
+        S[3] = S[2];
+        S[2] = S[1];
+        S[1] = S[0];
+        S[0] = t;
+    }
+
+    /* feedback */
+    for (i = 0; i < 8; i++)
+        ctx->state[i] = ctx->state[i] + S[i];
+}
+
+static inline void _picohash_sha256_do_final(_picohash_sha256_ctx_t *ctx, void *digest, size_t len)
+{
+    unsigned char *out = digest;
+    size_t i;
+
+    /* increase the length of the message */
+    ctx->length += ctx->curlen * 8;
+
+    /* append the '1' bit */
+    ctx->buf[ctx->curlen++] = (unsigned char)0x80;
+
+    /* if the length is currently above 56 bytes we append zeros
+     * then compress.  Then we can fall back to padding zeros and length
+     * encoding like normal.
+     */
+    if (ctx->curlen > 56) {
+        while (ctx->curlen < 64) {
+            ctx->buf[ctx->curlen++] = (unsigned char)0;
+        }
+        _picohash_sha256_compress(ctx, ctx->buf);
+        ctx->curlen = 0;
+    }
+
+    /* pad upto 56 bytes of zeroes */
+    while (ctx->curlen < 56) {
+        ctx->buf[ctx->curlen++] = (unsigned char)0;
+    }
+
+    /* store length */
+    for (i = 0; i != 8; ++i)
+        ctx->buf[56 + i] = ctx->length >> (56 - 8 * i);
+    _picohash_sha256_compress(ctx, ctx->buf);
+
+    /* copy output */
+    for (i = 0; i != len / 4; ++i) {
+        out[i * 4] = ctx->state[i] >> 24;
+        out[i * 4 + 1] = ctx->state[i] >> 16;
+        out[i * 4 + 2] = ctx->state[i] >> 8;
+        out[i * 4 + 3] = ctx->state[i];
+    }
+}
+
+inline void _picohash_sha256_init(_picohash_sha256_ctx_t *ctx)
+{
+    ctx->curlen = 0;
+    ctx->length = 0;
+    ctx->state[0] = 0x6A09E667UL;
+    ctx->state[1] = 0xBB67AE85UL;
+    ctx->state[2] = 0x3C6EF372UL;
+    ctx->state[3] = 0xA54FF53AUL;
+    ctx->state[4] = 0x510E527FUL;
+    ctx->state[5] = 0x9B05688CUL;
+    ctx->state[6] = 0x1F83D9ABUL;
+    ctx->state[7] = 0x5BE0CD19UL;
+}
+
+inline void _picohash_sha256_update(_picohash_sha256_ctx_t *ctx, const void *data, size_t len)
+{
+    const unsigned char *in = data;
+    size_t n;
+
+    while (len > 0) {
+        if (ctx->curlen == 0 && len >= PICOHASH_SHA256_BLOCK_LENGTH) {
+            _picohash_sha256_compress(ctx, (unsigned char *)in);
+            ctx->length += PICOHASH_SHA256_BLOCK_LENGTH * 8;
+            in += PICOHASH_SHA256_BLOCK_LENGTH;
+            len -= PICOHASH_SHA256_BLOCK_LENGTH;
+        } else {
+            n = PICOHASH_SHA256_BLOCK_LENGTH - ctx->curlen;
+            if (n > len)
+                n = len;
+            memcpy(ctx->buf + ctx->curlen, in, (size_t)n);
+            ctx->curlen += n;
+            in += n;
+            len -= n;
+            if (ctx->curlen == 64) {
+                _picohash_sha256_compress(ctx, ctx->buf);
+                ctx->length += 8 * PICOHASH_SHA256_BLOCK_LENGTH;
+                ctx->curlen = 0;
+            }
+        }
+    }
+}
+
+inline void _picohash_sha256_final(_picohash_sha256_ctx_t *ctx, void *digest)
+{
+    _picohash_sha256_do_final(ctx, digest, PICOHASH_SHA256_DIGEST_LENGTH);
+}
+
+inline void _picohash_sha224_init(_picohash_sha256_ctx_t *ctx)
+{
+    ctx->curlen = 0;
+    ctx->length = 0;
+    ctx->state[0] = 0xc1059ed8UL;
+    ctx->state[1] = 0x367cd507UL;
+    ctx->state[2] = 0x3070dd17UL;
+    ctx->state[3] = 0xf70e5939UL;
+    ctx->state[4] = 0xffc00b31UL;
+    ctx->state[5] = 0x68581511UL;
+    ctx->state[6] = 0x64f98fa7UL;
+    ctx->state[7] = 0xbefa4fa4UL;
+}
+
+inline void _picohash_sha224_final(_picohash_sha256_ctx_t *ctx, void *digest)
+{
+    _picohash_sha256_do_final(ctx, digest, PICOHASH_SHA224_DIGEST_LENGTH);
+}
+
+inline void picohash_init_md5(picohash_ctx_t *ctx)
+{
+    ctx->block_length = PICOHASH_MD5_BLOCK_LENGTH;
+    ctx->digest_length = PICOHASH_MD5_DIGEST_LENGTH;
+    ctx->_reset = (void *)_picohash_md5_init;
+    ctx->_update = (void *)_picohash_md5_update;
+    ctx->_final = (void *)_picohash_md5_final;
+
+    _picohash_md5_init(&ctx->_md5);
+}
+
+inline void picohash_init_sha1(picohash_ctx_t *ctx)
+{
+    ctx->block_length = PICOHASH_SHA1_BLOCK_LENGTH;
+    ctx->digest_length = PICOHASH_SHA1_DIGEST_LENGTH;
+    ctx->_reset = (void *)_picohash_sha1_init;
+    ctx->_update = (void *)_picohash_sha1_update;
+    ctx->_final = (void *)_picohash_sha1_final;
+    _picohash_sha1_init(&ctx->_sha1);
+}
+
+inline void picohash_init_sha224(picohash_ctx_t *ctx)
+{
+    ctx->block_length = PICOHASH_SHA224_BLOCK_LENGTH;
+    ctx->digest_length = PICOHASH_SHA224_DIGEST_LENGTH;
+    ctx->_reset = (void *)_picohash_sha224_init;
+    ctx->_update = (void *)_picohash_sha256_update;
+    ctx->_final = (void *)_picohash_sha224_final;
+    _picohash_sha224_init(&ctx->_sha256);
+}
+
+inline void picohash_init_sha256(picohash_ctx_t *ctx)
+{
+    ctx->block_length = PICOHASH_SHA256_BLOCK_LENGTH;
+    ctx->digest_length = PICOHASH_SHA256_DIGEST_LENGTH;
+    ctx->_reset = (void *)_picohash_sha256_init;
+    ctx->_update = (void *)_picohash_sha256_update;
+    ctx->_final = (void *)_picohash_sha256_final;
+    _picohash_sha256_init(&ctx->_sha256);
+}
+
+inline void picohash_update(picohash_ctx_t *ctx, const void *input, size_t len)
+{
+    ctx->_update(ctx, input, len);
+}
+
+inline void picohash_final(picohash_ctx_t *ctx, void *digest)
+{
+    ctx->_final(ctx, digest);
+}
+
+inline void picohash_reset(picohash_ctx_t *ctx)
+{
+    ctx->_reset(ctx);
+}
+
+static inline void _picohash_hmac_apply_key(picohash_ctx_t *ctx, unsigned char delta)
+{
+    size_t i;
+    for (i = 0; i != ctx->block_length; ++i)
+        ctx->_hmac.key[i] ^= delta;
+    picohash_update(ctx, ctx->_hmac.key, ctx->block_length);
+    for (i = 0; i != ctx->block_length; ++i)
+        ctx->_hmac.key[i] ^= delta;
+}
+
+static void _picohash_hmac_final(picohash_ctx_t *ctx, void *digest)
+{
+    unsigned char inner_digest[PICOHASH_MAX_DIGEST_LENGTH];
+
+    ctx->_hmac.hash_final(ctx, inner_digest);
+
+    ctx->_hmac.hash_reset(ctx);
+    _picohash_hmac_apply_key(ctx, 0x5c);
+    picohash_update(ctx, inner_digest, ctx->digest_length);
+    memset(inner_digest, 0, ctx->digest_length);
+
+    ctx->_hmac.hash_final(ctx, digest);
+}
+
+static inline void _picohash_hmac_reset(picohash_ctx_t *ctx)
+{
+    ctx->_hmac.hash_reset(ctx);
+    _picohash_hmac_apply_key(ctx, 0x36);
+}
+
+inline void picohash_init_hmac(picohash_ctx_t *ctx, void (*initf)(picohash_ctx_t *), const void *key, size_t key_len)
+{
+    initf(ctx);
+
+    memset(ctx->_hmac.key, 0, ctx->block_length);
+    if (key_len > ctx->block_length) {
+        /* hash the key if it is too long */
+        picohash_update(ctx, key, key_len);
+        picohash_final(ctx, ctx->_hmac.key);
+        ctx->_hmac.hash_reset(ctx);
+    } else {
+        memcpy(ctx->_hmac.key, key, key_len);
+    }
+
+    /* replace reset and final function */
+    ctx->_hmac.hash_reset = ctx->_reset;
+    ctx->_hmac.hash_final = ctx->_final;
+    ctx->_reset = (void *)_picohash_hmac_reset;
+    ctx->_final = (void *)_picohash_hmac_final;
+
+    /* start calculating the inner hash */
+    _picohash_hmac_apply_key(ctx, 0x36);
+}
+
+#endif
diff --git a/pyproject.toml b/pyproject.toml
index 38fd1fae..ffa08dd1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -51,7 +51,8 @@ dependencies = [
   'h5py',
   'numpy',
   'matplotlib',
-  'yt>=4.0.2'
+  'yt>=4.0.2',
+  "importlib_resources;python_version<'3.9'"
 ]
 
 [project.license]
diff --git a/src/clib/CMakeLists.txt b/src/clib/CMakeLists.txt
index 35aedc94..87c5d324 100644
--- a/src/clib/CMakeLists.txt
+++ b/src/clib/CMakeLists.txt
@@ -33,6 +33,13 @@ endif()
 configure_file(../include/grackle_float.h.in
   ${GRACKLE_GENRATED_PUBLIC_HEADERS}/grackle_float.h @ONLY)
 
+# now, declare recipe for generating file_registry.h:
+include(CreateProgram-grdata)
+set(GRACKLE_GENERATED_PRIVATE_HEADERS "${CMAKE_CURRENT_BINARY_DIR}")
+load_file_registry_string(TRUE FILE_REGISTRY_CONTENTS)
+configure_file(file_registry.h.in
+  ${GRACKLE_GENERATED_PRIVATE_HEADERS}/file_registry.h @ONLY)
+
 # next, declare recipe for generating auto_general.c:
 
 # fetch necessary version information via query-version.py script
@@ -86,6 +93,7 @@ add_library(Grackle_Grackle
   calculate_gamma.c
   calculate_pressure.c
   calculate_temperature.c
+  data_file_utils.c
   dynamic_api.c
   grackle_units.c
   index_helper.c
@@ -93,6 +101,7 @@ add_library(Grackle_Grackle
   initialize_cloudy_data.c
   initialize_rates.c
   initialize_UVbackground_data.c
+  os_utils.c
   rate_functions.c
   set_default_chemistry_parameters.c
   solve_chemistry.c
@@ -135,9 +144,11 @@ add_library(Grackle_Grackle
 
   # C private headers
   cie_thin_cooling_rate_tables.h
+  data_file_utils.h
   grackle_chemistry_data_fields.def # <-- acts as a C header
   grackle_macros.h
   index_helper.h
+  os_utils.h
   phys_constants.h
   utils.h
 
@@ -208,15 +219,18 @@ set_target_typed_info_properties(Grackle_Grackle BOOL_PROPERTIES
 )
 
 target_include_directories(Grackle_Grackle
-  # specify where to search for generated and ordinary headers when building
+  # specify where to search for generated/ordinary private headers (only used
+  # when building grackle)
+  # -> while it may seem unnecessary to specify the ordinary private headers'
+  #    directory, it's necessary to compile auto_general.c
+  PRIVATE ${GRACKLE_GENERATED_PRIVATE_HEADERS} ${CMAKE_CURRENT_SOURCE_DIR}
+
+  # specify where to search for generated/ordinary public headers when building
   # grackle AND when linking against grackle under inclusion approach #1
-  # -> while it may seem unnecessary to specify the ordinary headers' directory
-  #    while building grackle, it's necessary to compile auto_general.c
-  PUBLIC $<BUILD_INTERFACE:${GRACKLE_GENRATED_PUBLIC_HEADERS}> # generated hdrs
-         $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include> # public hdrs
-         $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}> # private hdrs
+  PUBLIC $<BUILD_INTERFACE:${GRACKLE_GENRATED_PUBLIC_HEADERS}>
+         $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include>
 
-  # specify where to search for the other headers when linking against grackle
+  # specify where to search for public headers when linking against grackle
   # (for inclusion approach #2)
   INTERFACE $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
 )
@@ -224,6 +238,7 @@ target_include_directories(Grackle_Grackle
 target_link_libraries(Grackle_Grackle
   PRIVATE toolchain::m
           GRACKLE_HDF5_C
+          picohash
           $<$<BOOL:${GRACKLE_USE_OPENMP}>:OpenMP::OpenMP_Fortran>
           $<$<BOOL:${GRACKLE_USE_OPENMP}>:OpenMP::OpenMP_C>
 )
@@ -256,6 +271,25 @@ if ("${CMAKE_SYSTEM_NAME}" MATCHES "^(Linux)|(Darwin)$")
   target_compile_definitions(Grackle_Grackle PRIVATE "LINUX")
 endif()
 
+# define a macro to specify the correct macro for use within os_utils.c
+# -> this is used to define functionality that specifies the default
+#    location where data is stored (this location is prefered by OS)
+#    when the GRACKLE_DATA_DIR isn't specified
+if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Darwin")
+  target_compile_definitions(Grackle_Grackle PRIVATE PLATFORM_MACOS)
+else()
+  # check if we can treat the platform as a generic unix-like OS that provides
+  # a few standard headers
+
+  include(CheckIncludeFiles)
+  unset(HAS_POSIX_HDRS CACHE)
+  CHECK_INCLUDE_FILES("unistd.h;sys/types.h;pwd.h" HAS_POSIX_HDRS)
+  if(UNIX AND "${HAS_POSIX_HDRS}" STREQUAL "1")
+    target_compile_definitions(Grackle_Grackle PRIVATE PLATFORM_GENERIC_UNIX)
+  endif()
+endif()
+
+
 # If we are building a shared library, construct a "configuration Makefile"
 # that is used to build the code examples
 # - we're mostly just doing this to let us run the code examples as part of the
diff --git a/src/clib/Make.config.assemble b/src/clib/Make.config.assemble
index 623cc5ea..519b1fe1 100644
--- a/src/clib/Make.config.assemble
+++ b/src/clib/Make.config.assemble
@@ -161,6 +161,18 @@
 	$(error Illegal value '$(CONFIG_OMP)' for $$(CONFIG_OMP))
     endif
 
+#=======================================================================
+# DETERMIN PLATFORM-SPECIFIC DEFINES
+#=======================================================================
+
+    # this is only used within os_utils.c
+    PLATFORM := $(shell uname)
+    ifeq  ($(PLATFORM),Darwin)
+       PLATFORM_DEFINES = -DPLATFORM_MACOS
+    else
+       PLATFORM_DEFINES = -DPLATFORM_GENERIC_UNIX
+    endif
+
 #=======================================================================
 # ASSIGN ALL OUTPUT VARIABLES
 #=======================================================================
@@ -193,12 +205,15 @@
     LDOUTPUT_FLAGS = $(ASSEMBLE_LDOUTPUT_FLAGS)
 
     DEFINES = $(MACH_DEFINES) \
-              $(ASSEMBLE_IO_DEFINES)
+              $(ASSEMBLE_IO_DEFINES) \
+              $(PLATFORM_DEFINES)
 
     PUBLIC_HEADER_SRCDIR = $(GRACKLE_DIR)/../include
     AUTOGEN_DIR = $(GRACKLE_DIR)/autogen
 
-    BUILD_INCLUDES = -I$(PUBLIC_HEADER_SRCDIR) -I$(AUTOGEN_DIR)
+    BUILD_INCLUDES = -I$(PUBLIC_HEADER_SRCDIR) \
+                     -I$(AUTOGEN_DIR) \
+                     -I$(GRACKLE_DIR)/../../external
 
     INCLUDES = $(MACH_INCLUDES) \
                $(MAKEFILE_INCLUDES) \
@@ -214,10 +229,12 @@
 
     INSTALL_LIB_DIR = $(DEFAULT_INSTALL_PREFIX)/lib
     INSTALL_INCLUDE_DIR = $(DEFAULT_INSTALL_PREFIX)/include
+    INSTALL_BIN_DIR = $(DEFAULT_INSTALL_PREFIX)/bin
 
     ifdef MACH_INSTALL_PREFIX
 	INSTALL_LIB_DIR = $(MACH_INSTALL_PREFIX)/lib
 	INSTALL_INCLUDE_DIR = $(MACH_INSTALL_PREFIX)/include
+	INSTALL_BIN_DIR = $(MACH_INSTALL_PREFIX)/bin
     endif
 
     ifdef MACH_INSTALL_LIB_DIR
@@ -227,3 +244,7 @@
     ifdef MACH_INSTALL_INCLUDE_DIR
     	INSTALL_INCLUDE_DIR = $(MACH_INSTALL_INCLUDE_DIR)
     endif
+
+    ifdef MACH_INSTALL_BIN_DIR
+        INSTALL_BIN_DIR = $(MACH_INSTALL_BIN_DIR)
+    endif
diff --git a/src/clib/Make.config.objects b/src/clib/Make.config.objects
index 7940c11b..806d9fdb 100644
--- a/src/clib/Make.config.objects
+++ b/src/clib/Make.config.objects
@@ -39,4 +39,6 @@ OBJS_CONFIG_LIB = \
         update_UVbackground_rates.lo \
         rate_functions.lo \
         initialize_rates.lo \
-        utils.lo
\ No newline at end of file
+        utils.lo \
+        data_file_utils.lo \
+        os_utils.lo
diff --git a/src/clib/Make.mach.darwin b/src/clib/Make.mach.darwin
index 26268727..640cf36b 100644
--- a/src/clib/Make.mach.darwin
+++ b/src/clib/Make.mach.darwin
@@ -109,4 +109,5 @@ MACH_LIBS        = $(LOCAL_LIBS_HDF5) $(LOCAL_LIBS_MACH)
 
 MACH_INSTALL_PREFIX = $(HOME)/grackle_install
 MACH_INSTALL_LIB_DIR =
-MACH_INSTALL_INCLUDE_DIR =
\ No newline at end of file
+MACH_INSTALL_INCLUDE_DIR =
+MACH_INSTALL_BIN_DIR =
diff --git a/src/clib/Make.mach.linux-gnu b/src/clib/Make.mach.linux-gnu
index d21e452b..abf90c0c 100644
--- a/src/clib/Make.mach.linux-gnu
+++ b/src/clib/Make.mach.linux-gnu
@@ -83,3 +83,4 @@ MACH_LIBS         = $(LOCAL_LIBS_HDF5) $(LOCAL_LIBS_MACH)
 MACH_INSTALL_PREFIX = $(HOME)/local
 MACH_INSTALL_LIB_DIR =
 MACH_INSTALL_INCLUDE_DIR =
+MACH_INSTALL_BIN_DIR =
diff --git a/src/clib/Make.mach.nasa-aitken-rome b/src/clib/Make.mach.nasa-aitken-rome
index eab28523..15ca034a 100644
--- a/src/clib/Make.mach.nasa-aitken-rome
+++ b/src/clib/Make.mach.nasa-aitken-rome
@@ -99,3 +99,4 @@ MACH_LIBS         = $(LOCAL_LIBS_HDF5) $(LOCAL_LIBS_MACH) #$(LOCAL_LIBS_PYTHON)
 MACH_INSTALL_PREFIX = $(HOME)/local
 MACH_INSTALL_LIB_DIR =
 MACH_INSTALL_INCLUDE_DIR =
+MACH_INSTALL_BIN_DIR =
diff --git a/src/clib/Make.mach.nasa-pleiades b/src/clib/Make.mach.nasa-pleiades
index 3dcbe2fb..19194bdc 100644
--- a/src/clib/Make.mach.nasa-pleiades
+++ b/src/clib/Make.mach.nasa-pleiades
@@ -105,3 +105,4 @@ MACH_LIBS         = $(LOCAL_LIBS_HDF5) $(LOCAL_LIBS_MACH) #$(LOCAL_LIBS_PYTHON)
 MACH_INSTALL_PREFIX = $(HOME)/local
 MACH_INSTALL_LIB_DIR =
 MACH_INSTALL_INCLUDE_DIR =
+MACH_INSTALL_BIN_DIR =
diff --git a/src/clib/Make.mach.ncsa-bluewaters-cray b/src/clib/Make.mach.ncsa-bluewaters-cray
index f5770934..7528eebf 100644
--- a/src/clib/Make.mach.ncsa-bluewaters-cray
+++ b/src/clib/Make.mach.ncsa-bluewaters-cray
@@ -95,3 +95,4 @@ MACH_LIBS         = $(LOCAL_LIBS_HDF5) $(LOCAL_LIBS_MACH)
 MACH_INSTALL_PREFIX = $(HOME)/local/cray
 MACH_INSTALL_LIB_DIR =
 MACH_INSTALL_INCLUDE_DIR =
+MACH_INSTALL_BIN_DIR =
diff --git a/src/clib/Make.mach.ncsa-bluewaters-gnu b/src/clib/Make.mach.ncsa-bluewaters-gnu
index 68f7c87c..7efe6315 100644
--- a/src/clib/Make.mach.ncsa-bluewaters-gnu
+++ b/src/clib/Make.mach.ncsa-bluewaters-gnu
@@ -97,3 +97,4 @@ MACH_LIBS         = $(LOCAL_LIBS_HDF5) $(LOCAL_LIBS_MACH)
 MACH_INSTALL_PREFIX = $(HOME)/local
 MACH_INSTALL_LIB_DIR =
 MACH_INSTALL_INCLUDE_DIR =
+MACH_INSTALL_BIN_DIR =
diff --git a/src/clib/Make.mach.summit b/src/clib/Make.mach.summit
index 8aaf1252..16afb920 100644
--- a/src/clib/Make.mach.summit
+++ b/src/clib/Make.mach.summit
@@ -81,4 +81,5 @@ MACH_LIBS         = $(LOCAL_LIBS_HDF5) $(LOCAL_LIBS_MACH)
 
 MACH_INSTALL_PREFIX = $(HOME)/local
 MACH_INSTALL_LIB_DIR =
-MACH_INSTALL_INCLUDE_DIR =
\ No newline at end of file
+MACH_INSTALL_INCLUDE_DIR =
+MACH_INSTALL_BIN_DIR =
diff --git a/src/clib/Make.mach.tacc-stampede-gnu b/src/clib/Make.mach.tacc-stampede-gnu
index cddf1f5c..7d54e288 100644
--- a/src/clib/Make.mach.tacc-stampede-gnu
+++ b/src/clib/Make.mach.tacc-stampede-gnu
@@ -86,3 +86,4 @@ MACH_LIBS         = $(LOCAL_LIBS_HDF5) $(LOCAL_LIBS_MACH)
 MACH_INSTALL_PREFIX = $(HOME)/local
 MACH_INSTALL_LIB_DIR =
 MACH_INSTALL_INCLUDE_DIR =
+MACH_INSTALL_BIN_DIR =
diff --git a/src/clib/Make.mach.tacc-stampede-intel b/src/clib/Make.mach.tacc-stampede-intel
index 5adb9a16..6c0f5b11 100644
--- a/src/clib/Make.mach.tacc-stampede-intel
+++ b/src/clib/Make.mach.tacc-stampede-intel
@@ -96,3 +96,4 @@ MACH_LIBS         = $(LOCAL_LIBS_HDF5) $(LOCAL_LIBS_MACH)
 MACH_INSTALL_PREFIX = $(HOME)/local
 MACH_INSTALL_LIB_DIR =
 MACH_INSTALL_INCLUDE_DIR =
+MACH_INSTALL_BIN_DIR =
diff --git a/src/clib/Make.mach.tigercpu b/src/clib/Make.mach.tigercpu
index 2979d2ee..fd4e256b 100644
--- a/src/clib/Make.mach.tigercpu
+++ b/src/clib/Make.mach.tigercpu
@@ -85,3 +85,4 @@ MACH_LIBS = $(LOCAL_LIBS_HDF5) $(LOCAL_LIBS_MACH)
 MACH_INSTALL_PREFIX = $(HOME)/grackle-build
 MACH_INSTALL_LIB_DIR  =
 MACH_INSTALL_INCLUDE_DIR  =
+MACH_INSTALL_BIN_DIR =
diff --git a/src/clib/Make.mach.uiuc-campus-gnu b/src/clib/Make.mach.uiuc-campus-gnu
index ee17f2d9..8c7e66a4 100644
--- a/src/clib/Make.mach.uiuc-campus-gnu
+++ b/src/clib/Make.mach.uiuc-campus-gnu
@@ -92,3 +92,4 @@ MACH_LIBS         = $(LOCAL_LIBS_HDF5) $(LOCAL_LIBS_MACH)
 MACH_INSTALL_PREFIX = $(HOME)/local
 MACH_INSTALL_LIB_DIR =
 MACH_INSTALL_INCLUDE_DIR =
+MACH_INSTALL_BIN_DIR =
diff --git a/src/clib/Make.mach.uiuc-campus-intel b/src/clib/Make.mach.uiuc-campus-intel
index e18aee9e..cf1e8a8f 100644
--- a/src/clib/Make.mach.uiuc-campus-intel
+++ b/src/clib/Make.mach.uiuc-campus-intel
@@ -91,3 +91,4 @@ MACH_LIBS         = $(LOCAL_LIBS_HDF5) $(LOCAL_LIBS_MACH)
 MACH_INSTALL_PREFIX = $(HOME)/local
 MACH_INSTALL_LIB_DIR =
 MACH_INSTALL_INCLUDE_DIR =
+MACH_INSTALL_BIN_DIR =
diff --git a/src/clib/Make.mach.uiuc-campus-pgi b/src/clib/Make.mach.uiuc-campus-pgi
index a3013c45..fd0da040 100644
--- a/src/clib/Make.mach.uiuc-campus-pgi
+++ b/src/clib/Make.mach.uiuc-campus-pgi
@@ -92,3 +92,4 @@ MACH_LIBS         = $(LOCAL_LIBS_HDF5) $(LOCAL_LIBS_MACH)
 MACH_INSTALL_PREFIX = $(HOME)/local
 MACH_INSTALL_LIB_DIR =
 MACH_INSTALL_INCLUDE_DIR =
+MACH_INSTALL_BIN_DIR =
diff --git a/src/clib/Make.mach.unknown b/src/clib/Make.mach.unknown
index d78dce0d..acbdfa43 100644
--- a/src/clib/Make.mach.unknown
+++ b/src/clib/Make.mach.unknown
@@ -82,3 +82,4 @@ MACH_LIBS         = $(LOCAL_LIBS_HDF5) $(LOCAL_LIBS_MACH)
 MACH_INSTALL_PREFIX = $(HOME)/local
 MACH_INSTALL_LIB_DIR =
 MACH_INSTALL_INCLUDE_DIR =
+MACH_INSTALL_BIN_DIR =
diff --git a/src/clib/Make.mach.wheeler-intel b/src/clib/Make.mach.wheeler-intel
index c301faef..27e23e82 100644
--- a/src/clib/Make.mach.wheeler-intel
+++ b/src/clib/Make.mach.wheeler-intel
@@ -91,3 +91,4 @@ MACH_LIBS         = $(LOCAL_LIBS_HDF5) $(LOCAL_LIBS_MACH)
 MACH_INSTALL_PREFIX = $(HOME)/local
 MACH_INSTALL_LIB_DIR =
 MACH_INSTALL_INCLUDE_DIR =
+MACH_INSTALL_BIN_DIR =
diff --git a/src/clib/Makefile b/src/clib/Makefile
index 737b9dbf..dde0406c 100644
--- a/src/clib/Makefile
+++ b/src/clib/Makefile
@@ -212,7 +212,7 @@ verbose: VERBOSE = 1
 # This variable is defined with Make.config.assemble.
 
 .PHONY: autogen
-autogen: config_type $(AUTOGEN_DIR)/auto_general.c
+autogen: config_type $(AUTOGEN_DIR)/auto_general.c $(AUTOGEN_DIR)/file_registry.h $(AUTOGEN_DIR)/grdata
 
 # in following recipe, GRACKLE_FLOAT_MACRO is set to either GRACKLE_FLOAT_4 or
 # GRACKLE_FLOAT_8
@@ -239,9 +239,36 @@ $(AUTOGEN_DIR)/auto_general.c: auto_general.c.in
             GIT_BRANCH=`$(QUERY_VERSION) git-branch` \
             GIT_REVISION=`$(QUERY_VERSION) git-revision`
 
+FILE_REGISTRY_PATH = $(GRACKLE_DIR)/../python/pygrackle/file_registry/file_registry.txt
+
+# Force update of file_registry.h (an internally used header file)
+.PHONY: $(AUTOGEN_DIR)/file_registry.h
+$(AUTOGEN_DIR)/file_registry.h: file_registry.h.in
+	-@(echo "Generating $@")
+	-@(mkdir -p $(AUTOGEN_DIR))
+	@$(CONFIG_DIR)/configure_file.py --clobber \
+	    --input $< \
+	    --output $@ \
+	    --variable-use-literal-file-contents FILE_REGISTRY_CONTENTS=$(FILE_REGISTRY_PATH)
+
+# Force update of the grdata cli tool
+.PHONY: $(AUTOGEN_DIR)/grdata
+$(AUTOGEN_DIR)/grdata: $(GRACKLE_DIR)/../python/pygrackle/utilities/grdata.py
+	-@(echo "Generating $@")
+	-@(mkdir -p $(AUTOGEN_DIR))
+	@$(CONFIG_DIR)/configure_file.py --clobber \
+	    --input $< \
+	    --output $@ \
+	    _GRDATA_GRACKLE_VERSION=$(LIB_RELEASE_VERSION) \
+	    --variable-use-literal-file-contents _GRDATA_FILE_REGISTRY_CONTENTS=$(FILE_REGISTRY_PATH)
+	-@(chmod a+rx $@)
+
 # keep this recipe updated so that we always clean up the autogenerated files
+# (the second line cleans up autogenerated files that might be left over from
+# before we moved the autogenerated files into a subdirectory)
 .PHONY: clean_autogen
 clean_autogen:
+	-@rm -rf $(AUTOGEN_DIR)
 	-@rm -f auto_*.c $(PUBLIC_HEADER_SRCDIR)/grackle_float.h
 
 #-----------------------------------------------------------------------
@@ -302,6 +329,13 @@ install:
 	fi)
 	@echo "Installing grackle library files to $(INSTALL_LIB_DIR)."
 	$(LIBTOOL) --mode=install install -c libgrackle.la $(INSTALL_LIB_DIR)/libgrackle.la
+	@echo "Installing grackle tools to $(INSTALL_BIN_DIR)."
+	@(if [ ! -d $(INSTALL_BIN_DIR) ]; then \
+		mkdir $(INSTALL_BIN_DIR); \
+	fi)
+	@echo "-- Copying $(AUTOGEN_DIR)/grdata to $(INSTALL_BIN_DIR)/grdata"
+	@cp $(AUTOGEN_DIR)/grdata $(INSTALL_BIN_DIR)/grdata
+
 
 #-----------------------------------------------------------------------
 
diff --git a/src/clib/data_file_utils.c b/src/clib/data_file_utils.c
new file mode 100644
index 00000000..d427e0de
--- /dev/null
+++ b/src/clib/data_file_utils.c
@@ -0,0 +1,360 @@
+/***********************************************************************
+/
+/ Implement logic used internally by Grackle to determine data files.
+/
+/
+/ Copyright (c) 2013, Enzo/Grackle Development Team.
+/
+/ Distributed under the terms of the Enzo Public Licence.
+/
+/ The full license is in the file LICENSE, distributed with this 
+/ software.
+************************************************************************/
+
+#include <ctype.h> // tolower
+#include <limits.h> // CHAR_BIT
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "picohash.h"
+
+#include "data_file_utils.h"
+#include "file_registry.h"
+#include "os_utils.h"
+
+#include "grackle.h" // get_grackle_version
+
+
+#define CKSUM_ALGORITHM "sha1"
+#define CKSUM_STR_PREFIX CKSUM_ALGORITHM ":"
+#define CKSUM_DIGEST_N_BYTES PICOHASH_SHA1_DIGEST_LENGTH
+//#define CKSUM_DIGEST_N_BYTES 20
+#define CKSUM_DIGEST_N_HEXDIGITS (2*CKSUM_DIGEST_N_BYTES)
+
+// confirm a byte is 8 bits
+// -> the C standard technically allows it to be larger. But on any modern
+//    POSIX system (or even Windows) it must be 8 bytes
+// -> this scenario only comes up on highly specialize DSP hardware
+#if CHAR_BIT != 8
+  #error "our assumption that a byte is 8 bits is violated"
+#endif
+
+/// returns whether 2 null-terminated checksum strings are equal
+///
+/// A checksum string consists of 2 parts:
+/// - a prefix that includes the name of a hash algorthim used to compute the
+///   checksum followed by a colon (e.g. `md5:`, `sha1:`, `sha256:`)
+/// - the suffix that specifies the actual values of the checksum as a string
+///   of hexadecimal digits.
+///
+/// @note
+/// We could make this faster by encoding the checksum as an array of bytes
+/// (rather than a string of hexadecimal digits).
+///  - This would involve half the memory and we wouldn't need to worry about
+///    case-insensitivity.
+///  - But it's not worth the effort to do this to perform just a single
+///    checksum comparison. (we need to compute the string-representation
+///    anyway in order to effectively communicate issues with library users)
+static int cksum_str_eq_(const char* lhs, const char*rhs)
+{
+  // locales could theoretically be an issue here... (but we should be fine)
+  // as long as the strings only contain latin letters (without modifiers)
+  // and arabic numerals
+  if ((lhs == NULL) || (rhs == NULL)) return 0;
+
+  size_t len = strlen(lhs); // excludes trailing '\0'
+  if ((len == 0) || (len != strlen(rhs))) return 0;
+
+  int neq = 0;
+  for (size_t i = 0; i < len; i++){
+    neq += (tolower(lhs[i]) == tolower(rhs[i]));
+  }
+  return (len == (size_t)neq);
+}
+
+/// abort the program with an error message if the checksum string
+/// isn't valid
+///
+/// we abort, rather than return NULL because there is a programming error
+/// (and people can simply avoid this error by running their program without
+/// any cksum calculations)
+///
+/// behavior is undefined when cksum_str is NULL
+static void assert_valid_cksum_str_(const char* cksum_str,
+                                    const char* cksum_origin_descr,
+                                    const char* extra_fmt_arg) {
+  char* err = NULL;
+
+  const char* hexstr_start = post_prefix_ptr_(cksum_str, CKSUM_STR_PREFIX);
+  const char* colon_pos = strchr(cksum_str, ':');
+
+  // ignore '\0' in length calculation
+  size_t hexstr_len = (hexstr_start == NULL) ? 0 : strlen(hexstr_start);
+
+  if ((hexstr_start == NULL) && (colon_pos == NULL)){
+    err = my_strdup_(
+      "no prefix specifying an algorithm name (e.g. \"" CKSUM_STR_PREFIX "\")"
+    );
+  } else if (hexstr_start == NULL) {
+    err = my_strdup_(
+      "the algorithm name (i.e. characters before the colon), doesn't match"
+      "  \"" CKSUM_ALGORITHM "\""
+    );
+  } else if (hexstr_len != CKSUM_DIGEST_N_HEXDIGITS) {
+    const char fmt[] = "it should have %d characters after the prefix, not %d";
+    int sz = snprintf(err, 0, fmt, CKSUM_DIGEST_N_HEXDIGITS, (int)hexstr_len);
+    err = malloc(sz+1);
+    snprintf(err, sz, fmt, CKSUM_DIGEST_N_HEXDIGITS, (int)hexstr_len);
+
+  } else {
+    const char hexdigits[] = "0123456789abcdefABCDEF";
+    int bad_digits = 0;
+    for (int i = 0; i < CKSUM_DIGEST_N_HEXDIGITS; i++) {
+      bad_digits += (strchr(hexdigits, hexstr_start[i]) == NULL);
+    }
+    if (bad_digits) {
+      err = strdup(
+        "the characters after the prefix include non-hexadecimal digit(s)"
+      );
+    }
+  }
+
+  // let's perform some sanity checks on the contents of this string!
+  if (err != NULL) {
+    const char* extra_fmt = (extra_fmt_arg == NULL) ? "" : extra_fmt_arg;
+    fprintf(
+      stderr,
+      ("INTERNAL ERROR: There is a problem with a checksum string\n"
+       "  string value: \"%s\"\n"
+       "  origin: %s %s\n"
+       "  issue: %s\n"),
+      cksum_str, cksum_origin_descr, extra_fmt_arg, err);
+    free(err);
+    abort();
+  }
+}
+
+/// Converts a checksum digest into a hexadecimal string
+///
+/// @param[in] digest is an array of bytes where each byte has an
+///     arbitrary value from 0 to 255
+/// @param[in] digest_len is the length of digest
+/// @param[out] str is an emtpy array of length `2*digest_len + 1`
+///     entries. At the conclusion of this operation,
+///     `str[i*2:i*2+2]` specifies the value of `digest[i]` in
+///     hexadecimal notation. `str[digest_len*2]` will be assigned
+///     the null terminator.
+static void convert_to_hex_(char* digest, int digest_len, char* str) {
+
+  // some important context: the standard does not specify whether `char` is 
+  //   signed or unsigned and the call to snprintf will only only work if we
+  //   pass the values of each byte as an unsigned char.
+  //
+  // Thus: we need to explicitly reinterpret the value of each element digest
+  //       as an unsigned char.
+  // - there are rules in the C & C++ standard for this topic. Consider
+  //   an object of T1. We want to access it through a value of type T2.
+  //   For arbitrary types, this "type punning" is undefined behavior
+  //   (i.e. standards compliant compilers are free to do whatever they
+  //   want when they encounter undefined behavior without any consistency)
+  // - C++ is generally stricter about this topic (e.g. they forbid using
+  //   unions to reinterpret values)
+  // - there are exceptions when it comes to `unsigned char` & `char`
+  // - discussions of these rules for C & C++ are found at
+  //   https://en.cppreference.com/w/c/language/object#Strict_aliasing
+  //   https://en.cppreference.com/w/cpp/language/reinterpret_cast#Type_aliasing
+
+  for (int i = 0; i < digest_len; i++){
+    // while it may seem like there are faster ways to do this, please don't
+    // change this without including a reference or argument explaining why
+    // your approach won't invoke undefined behavior.
+
+    char elem = digest[i];
+#ifdef __cplusplus
+    unsigned char *uchar_ptr = reinterpret_cast<unsigned char*>(&elem);
+#else
+    unsigned char *uchar_ptr = (unsigned char*)(&elem);
+#endif
+    snprintf(str + 2*i, 3, "%02hhx\n", *uchar_ptr);
+  }
+}
+
+/// calculate the checksum for the specified file
+static char* calc_checksum_str_(const char* fname) {
+
+  FILE* fp = fopen(fname, "rb");
+  if (!fp) {
+    fprintf(stderr,
+            ("ERROR: unable to open `%s` to calculate checksum. Does the file "
+             "actually exist?"),
+            fname);
+    return NULL;
+  }
+
+  picohash_ctx_t ctx;
+  picohash_init_sha1(&ctx);
+
+  const size_t CHUNKSIZE = 4096;
+  char* buffer = malloc(CHUNKSIZE);
+
+  int any_data_read = 0;
+  int cur_len;
+  do {
+    cur_len = fread(buffer, 1, CHUNKSIZE, fp);
+    if (cur_len != 0) {
+      picohash_update(&ctx, buffer, cur_len);
+      any_data_read = 1;
+    }
+  } while(cur_len == CHUNKSIZE);
+  free(buffer);
+  fclose(fp);
+
+  if (!any_data_read) {
+    fprintf(stderr, "ERROR: `%s` either specifies a path to an empty file\n",
+            fname);
+  }
+
+  char digest[PICOHASH_SHA1_DIGEST_LENGTH];
+  picohash_final(&ctx, digest);
+
+  // now we just need to convert all of the bytes to a string of hexadecimal
+  // digits for the sake of comparison
+  const char prefix[] = CKSUM_STR_PREFIX;
+  size_t prefix_len = strlen(prefix); // excludes nul character
+  size_t out_length = prefix_len + (2 * sizeof(digest)) + 1; // add 1 for nul
+
+  char* out = malloc(out_length);
+  memcpy(out, prefix, prefix_len);
+  convert_to_hex_(digest, sizeof(digest), out+prefix_len);
+  return out;
+}
+
+
+static struct generic_file_props file_from_data_dir_(
+  const char* grackle_data_file, int grackle_data_file_options,
+  int calculate_checksum
+)
+{
+  // initialize output struct in a format that will denote an error (if is
+  // never modified)
+  struct generic_file_props out = {NULL, 0, NULL, 0};
+
+  // first, let's check if the specified file name is known to Grackle
+  const char* expected_cksum_str = expected_file_cksum_(grackle_data_file);
+  if (expected_cksum_str == NULL) {
+    // in the future, depending on the value of grackle_data_file_options,
+    // we may want to provide special handling for the case where
+    // grackle_data_file starts with `"user-data/..."
+
+    fprintf(stderr,
+            "ERROR: can't load %s from data directory, no such file is in "
+            "the file registry\n",
+            grackle_data_file);
+    return out;
+  }
+
+  // sanity check that checksum from the file registry was properly formatted
+  assert_valid_cksum_str_(expected_cksum_str,
+                          "from the file-registry for the file named",
+                          grackle_data_file);
+
+  // now it's time to construct the full path to the file (if it exists)
+  grackle_version version_info = get_grackle_version();
+
+  // get the data_directory
+  char* data_dir_path = get_data_dir_(get_platform_());
+  if (data_dir_path == NULL) return out;
+
+  const char* path_parts[4] = {
+    data_dir_path, "data-store-v1", version_info.version, grackle_data_file
+  };
+  char* full_path = join_parts_('/', path_parts, 4);
+  free(data_dir_path);
+
+  if (calculate_checksum == 0) { // skip the checksum calculation
+    out.path = full_path;
+    out.path_requires_dealloc = 1;
+    return out;
+  }
+
+  char* measured_cksum_str = calc_checksum_str_(full_path);
+
+  if (measured_cksum_str == NULL) {
+    return out;
+  } else if (cksum_str_eq_(measured_cksum_str, expected_cksum_str) == 0) {
+    fprintf(stderr,
+            "ERROR: the measured checksums doesn't match expectations\n"
+            "     -> measured: \"%s\"\n"
+            "     -> expected: \"%s\"\n"
+            "     -> path: `%s`\n"
+            "  This error is indicative of 1 of 3 scenarios:\n"
+            "     1. There is a bug in the core Grackle library for locating\n"
+            "        the file or computing the checksum\n"
+            "     2. There is a bug in the Grackle's data-file management\n"
+            "        tool.\n"
+            "     3. It isn't Grackle's fault. Either the datafile was\n"
+            "        corrupted or its the fault of the user/some other tool\n"
+            "        that tried to modify the file.\n",
+            measured_cksum_str, expected_cksum_str, full_path);
+    free(measured_cksum_str);
+    free(full_path);
+  } else {
+    out.path = full_path;
+    out.path_requires_dealloc = 1;
+    out.checksum = measured_cksum_str;
+    out.checksum_requires_dealloc = 1;
+  }
+  return out;
+}
+
+
+struct generic_file_props determine_data_file_(const char* grackle_data_file,
+                                               int grackle_data_file_options)
+{
+  // initialize output struct in a format that will denote an error (if is
+  // never modified)
+  struct generic_file_props out = {NULL, 0, NULL, 0};
+
+  if (grackle_data_file == NULL) {
+    fprintf(stderr, "grackle_data_file must not be NULL\n");
+    return out;
+  }
+
+  if (grackle_data_file_options == -1) {
+    grackle_data_file_options = GR_DFOPT_FULLPATH_NO_CKSUM;  // the legacy case
+  }
+
+  switch (grackle_data_file_options) {
+    case GR_DFOPT_FULLPATH_NO_CKSUM: {
+      out.path = grackle_data_file;
+      return out;
+    }
+    case GR_DFOPT_MANAGED: {
+      return file_from_data_dir_(grackle_data_file, grackle_data_file_options,
+                                 1);
+    }
+    case GR_DFOPT_MANAGED_NO_CKSUM: {
+      return file_from_data_dir_(grackle_data_file, grackle_data_file_options,
+                                 0);
+    }
+    default: {
+      fprintf(stderr, "grackle_data_file_options has an unexpected value: %d\n",
+              grackle_data_file_options);
+      return out;
+    }
+  }
+}
+
+void free_generic_file_props_(struct generic_file_props* ptr) {
+  if (ptr != NULL) {
+    if (ptr->path_requires_dealloc){
+      free((char*)ptr->path);
+    }
+    ptr->path = NULL;
+    if (ptr->checksum_requires_dealloc){
+      free((char*)ptr->checksum);
+    }
+    ptr->checksum = NULL;
+  }
+}
diff --git a/src/clib/data_file_utils.h b/src/clib/data_file_utils.h
new file mode 100644
index 00000000..664e3b3c
--- /dev/null
+++ b/src/clib/data_file_utils.h
@@ -0,0 +1,48 @@
+/***********************************************************************
+/
+/ Declare utility functions used internally by Grackle to encapsulate
+/ logic for determining data files
+/
+/
+/ Copyright (c) 2013, Enzo/Grackle Development Team.
+/
+/ Distributed under the terms of the Enzo Public Licence.
+/
+/ The full license is in the file LICENSE, distributed with this 
+/ software.
+************************************************************************/
+
+#ifndef DATA_FILE_UTILS_H
+#define DATA_FILE_UTILS_H
+
+/// used as the return type when determining the self-shielding location
+///
+/// if ``path`` is ``NULL``, then there is an error. This struct should NEVER
+/// be exposed as part of the public API
+struct generic_file_props {
+  const char* path;
+  int path_requires_dealloc;
+  const char* checksum;
+  int checksum_requires_dealloc;
+};
+
+
+/// Determines the path to the data file.
+///
+/// @param[in]  grackle_data_file specified grackle data file
+/// @param[in]  grackle_data_file_options specifies how to interpret the first
+///     argument
+///
+/// @note
+/// If this functionality ever get's exposed as part of the public API, we
+/// should stop using generic_data_file_props as a return type. We should also
+/// make it possible for the caller to pre-allocate any buffers to hold the
+/// file path and the computed checksum (in that case, we should consider
+/// adopting an interface sorta like snprintf)
+struct generic_file_props determine_data_file_(const char* grackle_data_file,
+                                               int grackle_data_file_options);
+
+/// Deallocates the memory held within a given ``struct generic_file_props``
+void free_generic_file_props_(struct generic_file_props* ptr);
+
+#endif /* DATA_FILE_UTILS_H */
diff --git a/src/clib/file_registry.h.in b/src/clib/file_registry.h.in
new file mode 100644
index 00000000..506c0a67
--- /dev/null
+++ b/src/clib/file_registry.h.in
@@ -0,0 +1,43 @@
+/***********************************************************************
+/
+/ Template header-file that is used to internally specify the file
+/ registry. This is only intended to be included once
+/
+/
+/ Copyright (c) 2013, Enzo/Grackle Development Team.
+/
+/ Distributed under the terms of the Enzo Public Licence.
+/
+/ The full license is in the file LICENSE, distributed with this 
+/ software.
+************************************************************************/
+
+#ifndef FILE_REGISTRY_H
+#define FILE_REGISTRY_H
+
+#include <string.h> // strcmp
+
+typedef struct { const char* fname; const char* cksum; } registry_entry;
+
+static registry_entry file_registry[] = {
+@FILE_REGISTRY_CONTENTS@
+};
+
+/// return the full checksum string of the file if it is in the registry
+///
+/// returns NULL if there is no match!
+static inline const char* expected_file_cksum_(const char* fname) {
+  if (fname == NULL) return NULL;
+
+  const size_t n_entries = sizeof(file_registry) / sizeof(registry_entry);
+  const char* cksum_str = NULL;
+  for (size_t i = 0; i < n_entries; i++) {
+    if (strcmp(fname, file_registry[i].fname) == 0) {
+      return file_registry[i].cksum;
+    }
+  }
+  return NULL;
+}
+
+
+#endif /* FILE_REGISTRY_H */
diff --git a/src/clib/grackle_chemistry_data_fields.def b/src/clib/grackle_chemistry_data_fields.def
index bcc8aa4a..22df5656 100644
--- a/src/clib/grackle_chemistry_data_fields.def
+++ b/src/clib/grackle_chemistry_data_fields.def
@@ -53,6 +53,9 @@ ENTRY(UVbackground, INT, 0)
 /* data file containing cooling and UV background tables */
 ENTRY(grackle_data_file, STRING, "")
 
+/* specifies handling of grackle_data_file */
+ENTRY(grackle_data_file_options, INT, -1)
+
 /* Use a CMB temperature floor
    0) no, 1) yes */
 ENTRY(cmb_temperature_floor, INT, 1)
diff --git a/src/clib/initialize_UVbackground_data.c b/src/clib/initialize_UVbackground_data.c
index eff87ba8..9c6e7efa 100644
--- a/src/clib/initialize_UVbackground_data.c
+++ b/src/clib/initialize_UVbackground_data.c
@@ -48,7 +48,8 @@ void initialize_empty_UVBtable_struct(UVBtable *table)
 
 
 // Initialize UV Background data
-int initialize_UVbackground_data(chemistry_data *my_chemistry,
+int initialize_UVbackground_data(const char* path,
+                                 chemistry_data *my_chemistry,
                                  chemistry_data_storage *my_rates)
 {
   long long Nz;
@@ -71,8 +72,8 @@ int initialize_UVbackground_data(chemistry_data *my_chemistry,
 
   if (grackle_verbose)
     fprintf(stdout, "Reading UV background data from %s.\n",
-            my_chemistry->grackle_data_file);
-  file_id = H5Fopen(my_chemistry->grackle_data_file,
+            path);
+  file_id = H5Fopen(path,
                     H5F_ACC_RDONLY, H5P_DEFAULT);
 
 
@@ -81,7 +82,7 @@ int initialize_UVbackground_data(chemistry_data *my_chemistry,
   dset_id =  H5Dopen(file_id, "/UVBRates/Info");
   if (dset_id == h5_error) {
     fprintf(stderr, "Can't open 'Info' dataset in %s.\n",
-            my_chemistry->grackle_data_file);
+            path);
     return FAIL;
   }
 
@@ -107,21 +108,21 @@ int initialize_UVbackground_data(chemistry_data *my_chemistry,
   dset_id =  H5Dopen(file_id, "/UVBRates/z");
   if (dset_id == h5_error) {
     fprintf(stderr, "Can't open redshift dataset ('z') in %s.\n",
-            my_chemistry->grackle_data_file);
+            path);
     return FAIL;
   }
 
   dspace_id = H5Dget_space(dset_id);
   if (dspace_id == h5_error) {
     fprintf(stderr, "Error opening dataspace for dataset 'z' in %s.\n",
-            my_chemistry->grackle_data_file);
+            path);
     return FAIL;
   }
 
   Nz = H5Sget_simple_extent_npoints(dspace_id);
   if(Nz <= 0) {
     fprintf(stderr, "Redshift dataset ('z') has inappropriate size = %lld in %s.\n",
-            Nz, my_chemistry->grackle_data_file);
+            Nz, path);
     return FAIL;
   }
 
@@ -164,7 +165,7 @@ int initialize_UVbackground_data(chemistry_data *my_chemistry,
   if(! read_dataset(file_id, "/UVBRates/z",
                     my_rates->UVbackground_table.z) ) {
     fprintf(stderr, "Error reading dataset 'z' in %s.\n",
-            my_chemistry->grackle_data_file);
+            path);
     return FAIL;
   }
 
@@ -172,7 +173,7 @@ int initialize_UVbackground_data(chemistry_data *my_chemistry,
   if(! read_dataset(file_id, "/UVBRates/Chemistry/k24",
                     my_rates->UVbackground_table.k24) ) {
     fprintf(stderr, "Error reading dataset '/UVBRates/Chemistry/k24' in %s.\n",
-            my_chemistry->grackle_data_file);
+            path);
     return FAIL;
   }
 
@@ -180,7 +181,7 @@ int initialize_UVbackground_data(chemistry_data *my_chemistry,
   if(! read_dataset(file_id, "/UVBRates/Chemistry/k25",
                     my_rates->UVbackground_table.k25) ) {
     fprintf(stderr, "Error reading dataset '/UVBRates/Chemistry/k25' in %s.\n",
-            my_chemistry->grackle_data_file);
+            path);
     return FAIL;
   }
 
@@ -188,7 +189,7 @@ int initialize_UVbackground_data(chemistry_data *my_chemistry,
   if(! read_dataset(file_id, "/UVBRates/Chemistry/k26",
                     my_rates->UVbackground_table.k26) ) {
     fprintf(stderr, "Error reading dataset '/UVBRates/Chemistry/k26' in %s.\n",
-            my_chemistry->grackle_data_file);
+            path);
     return FAIL;
   }
 
@@ -198,7 +199,7 @@ int initialize_UVbackground_data(chemistry_data *my_chemistry,
     if(! read_dataset(file_id, "/UVBRates/Chemistry/k27",
                       my_rates->UVbackground_table.k27) ) {
       fprintf(stderr, "Error reading dataset '/UVBRates/Chemistry/k27' in %s.\n",
-              my_chemistry->grackle_data_file);
+              path);
       return FAIL;
     }
 
@@ -206,7 +207,7 @@ int initialize_UVbackground_data(chemistry_data *my_chemistry,
     if(! read_dataset(file_id, "/UVBRates/Chemistry/k28",
                       my_rates->UVbackground_table.k28) ) {
       fprintf(stderr, "Error reading dataset '/UVBRates/Chemistry/k28' in %s.\n",
-              my_chemistry->grackle_data_file);
+              path);
       return FAIL;
     }
 
@@ -214,7 +215,7 @@ int initialize_UVbackground_data(chemistry_data *my_chemistry,
     if(! read_dataset(file_id, "/UVBRates/Chemistry/k29",
                       my_rates->UVbackground_table.k29) ) {
       fprintf(stderr, "Error reading dataset '/UVBRates/Chemistry/k29' in %s.\n",
-              my_chemistry->grackle_data_file);
+              path);
       return FAIL;
     }
 
@@ -222,7 +223,7 @@ int initialize_UVbackground_data(chemistry_data *my_chemistry,
     if(! read_dataset(file_id, "/UVBRates/Chemistry/k30",
                       my_rates->UVbackground_table.k30) ) {
       fprintf(stderr, "Error reading dataset '/UVBRates/Chemistry/k30' in %s.\n",
-              my_chemistry->grackle_data_file);
+              path);
       return FAIL;
     }
 
@@ -230,7 +231,7 @@ int initialize_UVbackground_data(chemistry_data *my_chemistry,
     if(! read_dataset(file_id, "/UVBRates/Chemistry/k31",
                       my_rates->UVbackground_table.k31) ) {
       fprintf(stderr, "Error reading dataset '/UVBRates/Chemistry/k31' in %s.\n",
-              my_chemistry->grackle_data_file);
+              path);
       return FAIL;
     }
 
@@ -240,7 +241,7 @@ int initialize_UVbackground_data(chemistry_data *my_chemistry,
   if(! read_dataset(file_id, "/UVBRates/Photoheating/piHI",
                     my_rates->UVbackground_table.piHI) ) {
     fprintf(stderr, "Error reading dataset '/UVBRates/Photoheating/piHI' in %s.\n",
-            my_chemistry->grackle_data_file);
+            path);
     return FAIL;
   }
 
@@ -248,7 +249,7 @@ int initialize_UVbackground_data(chemistry_data *my_chemistry,
   if(! read_dataset(file_id, "/UVBRates/Photoheating/piHeII",
                     my_rates->UVbackground_table.piHeII) ) {
     fprintf(stderr, "Error reading dataset '/UVBRates/Photoheating/piHeII' in %s.\n",
-            my_chemistry->grackle_data_file);
+            path);
     return FAIL;
   }
 
@@ -256,7 +257,7 @@ int initialize_UVbackground_data(chemistry_data *my_chemistry,
   if(! read_dataset(file_id, "/UVBRates/Photoheating/piHeI",
                     my_rates->UVbackground_table.piHeI) ) {
     fprintf(stderr, "Error reading dataset '/UVBRates/Photoheating/piHeI' in %s.\n",
-            my_chemistry->grackle_data_file);
+            path);
     return FAIL;
   }
 
@@ -266,7 +267,7 @@ int initialize_UVbackground_data(chemistry_data *my_chemistry,
     if(! read_dataset(file_id, "/UVBRates/CrossSections/hi_avg_crs",
                       my_rates->UVbackground_table.crsHI) ) {
       fprintf(stderr, "Error reading dataset '/UVBRates/CrossSections/hi_avg_crs' in %s.\n",
-              my_chemistry->grackle_data_file);
+              path);
       fprintf(stderr, "In order to use self-shielding, you must use the shielding datasets\n");
       return FAIL;
     }
@@ -275,7 +276,7 @@ int initialize_UVbackground_data(chemistry_data *my_chemistry,
     if(! read_dataset(file_id, "/UVBRates/CrossSections/heii_avg_crs",
                     my_rates->UVbackground_table.crsHeII) ) {
       fprintf(stderr, "Error reading dataset '/UVBRates/CrossSections/heii_avg_crs' in %s.\n",
-              my_chemistry->grackle_data_file);
+              path);
       fprintf(stderr, "In order to use self-shielding, you must use the shielding datasets\n");
       return FAIL;
     }
@@ -284,7 +285,7 @@ int initialize_UVbackground_data(chemistry_data *my_chemistry,
     if(! read_dataset(file_id, "/UVBRates/CrossSections/hei_avg_crs",
                       my_rates->UVbackground_table.crsHeI) ) {
       fprintf(stderr, "Error reading dataset '/UVBRates/CrossSections/hei_avg_crs' in %s.\n",
-              my_chemistry->grackle_data_file);
+              path);
       fprintf(stderr, "In order to use self-shielding, you must use the shielding datasets\n");
       return FAIL;
     }
diff --git a/src/clib/initialize_chemistry_data.c b/src/clib/initialize_chemistry_data.c
index ed51108b..858f344f 100644
--- a/src/clib/initialize_chemistry_data.c
+++ b/src/clib/initialize_chemistry_data.c
@@ -16,6 +16,7 @@
 #include <stdio.h>
 #include <time.h>
 #include <math.h>
+#include "data_file_utils.h"
 #include "grackle.h"
 #include "grackle_macros.h"
 #include "grackle_types.h"
@@ -40,13 +41,13 @@ grackle_version get_grackle_version();
 void show_parameters(FILE *fp, chemistry_data *my_chemistry);
 
 int _free_cloudy_data(cloudy_data *my_cloudy, chemistry_data *my_chemistry, int primordial);
-int initialize_cloudy_data(chemistry_data *my_chemistry,
+int initialize_cloudy_data(const char* path, chemistry_data *my_chemistry,
                            chemistry_data_storage *my_rates,
                            cloudy_data *my_cloudy, char *group_name,
                            code_units *my_units,
                            int read_data);
 
-int initialize_UVbackground_data(chemistry_data *my_chemistry,
+int initialize_UVbackground_data(const char* path, chemistry_data *my_chemistry,
                                  chemistry_data_storage *my_rates);
 
 int local_free_chemistry_data(chemistry_data *my_chemistry, chemistry_data_storage *my_rates);
@@ -313,13 +314,22 @@ int local_initialize_chemistry_data(chemistry_data *my_chemistry,
   //* Call initialise_rates to compute rate tables.
   initialize_rates(my_chemistry, my_rates, my_units, co_length_units, co_density_units);
 
+  // prepare to read data from data files
+
+  struct generic_file_props file_props =
+    determine_data_file_(my_chemistry->grackle_data_file,
+                         my_chemistry->grackle_data_file_options);
+  if (file_props.path == NULL) {
+    return GR_FAIL;
+  }
+
   /* Initialize Cloudy cooling. */
   my_rates->cloudy_data_new = 1;
   int read_data;
 
   /* Primordial tables. */
   read_data = my_chemistry->primordial_chemistry == 0;
-  if (initialize_cloudy_data(my_chemistry, my_rates,
+  if (initialize_cloudy_data(file_props.path, my_chemistry, my_rates,
                              &my_rates->cloudy_primordial,
                              "Primordial", my_units, read_data) == GR_FAIL) {
     fprintf(stderr, "Error in initialize_cloudy_data.\n");
@@ -328,7 +338,7 @@ int local_initialize_chemistry_data(chemistry_data *my_chemistry,
 
   /* Metal tables. */
   read_data = my_chemistry->metal_cooling == TRUE;
-  if (initialize_cloudy_data(my_chemistry, my_rates,
+  if (initialize_cloudy_data(file_props.path, my_chemistry, my_rates,
                              &my_rates->cloudy_metal,
                              "Metals", my_units, read_data) == GR_FAIL) {
     fprintf(stderr, "Error in initialize_cloudy_data.\n");
@@ -337,11 +347,16 @@ int local_initialize_chemistry_data(chemistry_data *my_chemistry,
 
   /* Initialize UV Background data. */
   initialize_empty_UVBtable_struct(&(my_rates->UVbackground_table));
-  if (initialize_UVbackground_data(my_chemistry, my_rates) == GR_FAIL) {
+  if (initialize_UVbackground_data(file_props.path, my_chemistry, my_rates)
+      == GR_FAIL) {
     fprintf(stderr, "Error in initialize_UVbackground_data.\n");
     return GR_FAIL;
   }
 
+  // clean up from reading in data files
+  free_generic_file_props_(&file_props);
+
+
   /* store a copy of the initial units */
   my_rates->initial_units = *my_units;
 
diff --git a/src/clib/initialize_cloudy_data.c b/src/clib/initialize_cloudy_data.c
index 26d649e9..9d8acaee 100644
--- a/src/clib/initialize_cloudy_data.c
+++ b/src/clib/initialize_cloudy_data.c
@@ -41,7 +41,7 @@ void initialize_empty_cloudy_data_struct(cloudy_data *my_cloudy)
 }
 
 // Initialize Cloudy cooling data
-int initialize_cloudy_data(chemistry_data *my_chemistry,
+int initialize_cloudy_data(const char* path, chemistry_data *my_chemistry,
                            chemistry_data_storage *my_rates,
                            cloudy_data *my_cloudy, char *group_name,
                            code_units *my_units, int read_data)
@@ -60,7 +60,7 @@ int initialize_cloudy_data(chemistry_data *my_chemistry,
 
   if (grackle_verbose) {
     fprintf(stdout,"Initializing Cloudy cooling: %s.\n", group_name);
-    fprintf(stdout,"cloudy_table_file: %s.\n",my_chemistry->grackle_data_file);
+    fprintf(stdout,"cloudy_table_file: %s.\n",path);
   }
 
   /* Get conversion units. */
@@ -91,8 +91,7 @@ int initialize_cloudy_data(chemistry_data *my_chemistry,
   herr_t      status;
   herr_t      h5_error = -1;
 
-  file_id = H5Fopen(my_chemistry->grackle_data_file,
-                    H5F_ACC_RDONLY, H5P_DEFAULT);
+  file_id = H5Fopen(path, H5F_ACC_RDONLY, H5P_DEFAULT);
 
   if (H5Aexists(file_id, "old_style")) {
     my_rates->cloudy_data_new = 0;
@@ -105,8 +104,7 @@ int initialize_cloudy_data(chemistry_data *my_chemistry,
   sprintf(parameter_name, "/CoolingRates/%s/Cooling", group_name);
   dset_id =  H5Dopen(file_id, parameter_name);
   if (dset_id == h5_error) {
-    fprintf(stderr,"Can't open Cooling in %s.\n",
-            my_chemistry->grackle_data_file);
+    fprintf(stderr,"Can't open Cooling in %s.\n", path);
     return FAIL;
   }
 
@@ -249,8 +247,7 @@ int initialize_cloudy_data(chemistry_data *my_chemistry,
     sprintf(parameter_name, "/CoolingRates/%s/Heating", group_name);
     dset_id =  H5Dopen(file_id, parameter_name);
     if (dset_id == h5_error) {
-      fprintf(stderr,"Can't open Heating in %s.\n",
-              my_chemistry->grackle_data_file);
+      fprintf(stderr,"Can't open Heating in %s.\n", path);
       return FAIL;
     }
 
@@ -288,8 +285,7 @@ int initialize_cloudy_data(chemistry_data *my_chemistry,
     sprintf(parameter_name, "/CoolingRates/%s/MMW", group_name);
     dset_id =  H5Dopen(file_id, parameter_name);
     if (dset_id == h5_error) {
-      fprintf(stderr,"Can't open MMW in %s.\n",
-              my_chemistry->grackle_data_file);
+      fprintf(stderr,"Can't open MMW in %s.\n", path);
       return FAIL;
     }
 
diff --git a/src/clib/os_utils.c b/src/clib/os_utils.c
new file mode 100644
index 00000000..9fef4460
--- /dev/null
+++ b/src/clib/os_utils.c
@@ -0,0 +1,273 @@
+/***********************************************************************
+/
+/ Implement utility functions used internally by Grackle to related to
+/ path manipulation and OS-specific functionality
+/
+/
+/ Copyright (c) 2013, Enzo/Grackle Development Team.
+/
+/ Distributed under the terms of the Enzo Public Licence.
+/
+/ The full license is in the file LICENSE, distributed with this 
+/ software.
+************************************************************************/
+
+#include <errno.h>  // ERANGE
+#include <stdio.h>  // fprintf, stderr
+#include <stdlib.h>  // malloc, realloc, free, abort
+#include <string.h>  // memcpy, strlen
+
+#include "os_utils.h"
+#include "grackle.h" // grackle_verbose
+
+/// Just like getenv, except it returns NULL in place of strings of length 0.
+static const char* getenv_nonempty_(const char* name) {
+  const char* out = getenv(name);
+  return ((out == NULL) || (out[0] == '\0')) ? NULL : out;
+}
+
+char* my_strdup_(const char* src) {
+  size_t len_with_nul = strlen(src) + 1;
+  char* out = malloc(sizeof(char) * len_with_nul);
+  memcpy(out, src, len_with_nul);
+  return out;
+}
+
+const char* post_prefix_ptr_(const char* s, const char* prefix) {
+  if ((s == NULL) || (prefix == NULL)) return NULL;
+
+  // these lengths don't include the null terminator
+  size_t len_s = strlen(s);
+  size_t len_prefix = strlen(prefix);
+  if ((len_s < len_prefix) || (len_prefix == 0)) return NULL;
+
+  if (memcmp(s, prefix, len_prefix) != 0) return NULL;
+
+  return s + len_prefix;
+}
+
+char* join_parts_(char sep, const char** parts, int nparts) {
+  if (nparts < 2) return NULL;
+
+  // in principle, we could give sep == '\0' special significance
+
+  size_t total_len = 0;
+  for (int i = 0; i < nparts; i++) {
+    if (parts[i] == NULL) return NULL;
+    total_len += strlen(parts[i]); // we don't include the nul-terminator
+  }
+  total_len += (nparts - 1); // account for the size of sep and
+  total_len++; // account for trailing nul-terminator
+
+  char* out = malloc(total_len);
+  size_t cur_offset = 0;
+  for (int i = 0; i < nparts; i++) {
+    if (i > 0) {
+      out[cur_offset] = sep;
+      cur_offset++;
+    }
+    size_t cur_part_len = strlen(parts[i]);
+    memcpy(out + cur_offset, parts[i], cur_part_len);
+    cur_offset += cur_part_len;
+  }
+  out[cur_offset] = '\0';
+  if ((cur_offset+1) != total_len) abort();
+  return out;
+}
+
+
+// Platform-Specific Stuff
+// -----------------------
+
+enum platform_kind get_platform_(void) {
+#if defined(PLATFORM_GENERIC_UNIX) && defined(PLATFORM_MACOS)
+  #error "more than 1 platform macro was defined"
+#elif defined(PLATFORM_GENERIC_UNIX)
+  return platform_kind_generic_unix;
+#elif defined(PLATFORM_MACOS)
+  return platform_kind_macos;
+#else
+  return platform_kind_unknown;
+#endif
+}
+
+// define a function to get the home directory
+
+/// returns the user's home directory
+///
+/// If it is defined with a non-empty value, the function honors the value in
+/// the ``HOME`` environment variable. Otherwise, the function falls back to
+/// fetching the value using platform specific apis.
+///
+/// @return a string pointing to the current user's home directory. ``NULL`` is
+///     returned if there was an error. The caller is always responsible for
+///     deallocating this string.
+static char* get_home_dir(void);
+
+#if defined(PLATFORM_GENERIC_UNIX) || defined(PLATFORM_MACOS)
+
+// assume a posix-platform, the following headers are all standard 
+
+
+#include <sys/types.h> // uid_t
+#include <unistd.h> // getuid, sysconf
+#include <pwd.h> // getpwuid, struct passwd
+
+static char* get_home_dir(void)
+{
+  // first, try to get the value set in the environment
+  const char* env_str = getenv_nonempty_("HOME");
+  if (env_str != NULL) return my_strdup_(env_str);
+
+  // fall back to checking the user database (standard on posix systems)
+
+  // ask the system for an upper limit on the buffersize to hold the results
+  const long initial_bufsize_guess = sysconf(_SC_GETPW_R_SIZE_MAX);
+
+  // If the system can't give a firm answer, we guess.
+  long bufsize = (initial_bufsize_guess == -1) ? 2048 : initial_bufsize_guess;
+  char* buffer = NULL;
+
+  struct passwd pwd, *result;
+  int return_code;
+
+  do {
+    if (buffer == NULL) { // our 1st attempt
+      buffer = malloc(sizeof(char)*bufsize);
+    } else { // our next attempt
+      bufsize *= 2;
+      char* tmp = realloc(buffer, sizeof(char)*bufsize);
+      if (tmp == NULL) break;
+      buffer = tmp;
+    }
+    return_code = getpwuid_r(getuid(), &pwd, buffer, bufsize, &result);
+  } while ((return_code == ERANGE) && (bufsize < 1000000000));
+
+  if (return_code != 0) {
+    free(buffer);
+    fprintf(stderr, "ERROR while determining the HOME directory\n");
+    return NULL;
+  }
+
+  char* out = my_strdup_(pwd.pw_dir);
+  free(buffer);
+  return out;
+}
+#else
+static char* get_home_dir(void) {
+  fprintf(stderr,
+          "Don't know how to determine HOME directory on current platform\n");
+  return NULL;
+}
+#endif
+
+/// Returns a string specifying the default data directory
+///
+/// All of these choices are inspired by the API description of the
+/// platformdirs python package
+/// * we only looked at online documentation:
+///   https://platformdirs.readthedocs.io/en/latest/
+/// * we have NOT read any source code
+static char* default_data_dir_(enum platform_kind kind) {
+  const char* appname = "grackle";
+  switch(kind) {
+
+    case platform_kind_unknown: {
+      fprintf(
+        stderr,
+        ("ERROR: can't infer default data dir on unknown platform.\n"
+         " -> can only infer data directories on macOS and unix systems\n")
+      );
+      return NULL;
+    }
+
+    case platform_kind_macos: {
+      // https://developer.apple.com/library/archive/documentation/FileManagement/Conceptual/FileSystemProgrammingGuide/MacOSXDirectories/MacOSXDirectories.html
+      char* home_dir = get_home_dir();
+      const char * parts[3] = {
+        home_dir, "Library/Application Support", appname
+      };
+      char* out = join_parts_('/', parts, 3);
+      free(home_dir);
+      return out;
+    }
+
+    case platform_kind_generic_unix: {
+      // https://specifications.freedesktop.org/basedir-spec/latest/
+      const char* env_str = getenv_nonempty_("XDG_DATA_HOME");
+
+      // check if we need to fall back to the default
+      const char* dflt = "~/.local/share";
+      if (env_str == NULL) {
+        env_str = dflt;
+      } else if ((env_str[0] != '~') && (env_str[0] != '/')) {
+        // this is what the specification tells us to do
+        fprintf(stderr,
+                "WARNING: ignoring XDG_DATA_HOME because it doesn't hold an "
+                "absolute path\n");
+        env_str = dflt;
+      }
+
+      // now actually infer the absolute path
+      if (env_str[0] == '~') {
+        if (post_prefix_ptr_(env_str, "~/") == NULL) {
+          fprintf(stderr,
+                  "ERROR: can't expand env-variable, XDG_DATA_HOME when it "
+                  "starts with `~user/` or just contains `~`\n");
+          return NULL;
+        }
+
+        char* home_dir = get_home_dir();
+        const char* parts[3] = {home_dir, env_str + 1, appname};
+        char* out = join_parts_('/', parts, 3);
+        free(home_dir);
+        return out;
+
+      } else {
+        const char* parts[2] = {env_str, appname};
+        char* out = join_parts_('/', parts, 2);
+        return out;
+
+      }
+    }
+  
+  }
+
+  fprintf(stderr,
+          "ERROR: This part of the function should be unreachable! Did you add "
+          "a new platform_kind and forget to update the function?\n");
+  abort();
+}
+
+char* get_data_dir_(enum platform_kind kind) {
+  const char* env_str = getenv_nonempty_("GRACKLE_DATA_DIR");
+  char* out;
+  const char* description;
+  if (env_str != NULL) {
+    out = my_strdup_(env_str);
+    description = "from the `GRACKLE_DATA_DIR` environment variable";
+  } else {
+    if (grackle_verbose) {
+      fprintf(stdout,
+              ("INFO: looking up system-default for the data directory since "
+               "`GRACKLE_DATA_DIR` env variable is empty\n"));
+      fflush(stdout); // flush in case we run into an error in the next call
+    }
+    out = default_data_dir_(kind);
+    description = "inferred from the system defaults";
+  }
+
+  // confirm we are providing an absolute path
+  if (out[0] != '/') {
+    fprintf(stderr,
+            "ERROR: the data-directory %s, `%s` is not an absolute path\n",
+            description, out);
+    free(out);
+    return out;
+  }
+  if (grackle_verbose) {
+    fprintf(stdout, "INFO: the data-directory (%s) is: `%s`\n",
+            description, out);
+  }
+  return out;
+}
diff --git a/src/clib/os_utils.h b/src/clib/os_utils.h
new file mode 100644
index 00000000..81c1cf25
--- /dev/null
+++ b/src/clib/os_utils.h
@@ -0,0 +1,51 @@
+/***********************************************************************
+/
+/ Declare utility functions used internally by Grackle to related to
+/ path manipulation and OS-specific functionality
+/
+/
+/ Copyright (c) 2013, Enzo/Grackle Development Team.
+/
+/ Distributed under the terms of the Enzo Public Licence.
+/
+/ The full license is in the file LICENSE, distributed with this 
+/ software.
+************************************************************************/
+
+#ifndef OS_UTILS_H
+#define OS_UTILS_H
+
+/// a portable version of strdup, which is provided on posix and in C23
+char* my_strdup_(const char* src);
+
+/// For a string ``s`` that starts with prefix ``prefix``, this returns
+/// the first character in ``s`` after the prefix. Otherwise, it returns NULL.
+///
+/// If the returned non-NULL ptr points to a '\0' character, then both strings
+/// are identical.
+///
+/// @param s the full string that may begin with the prefix
+/// @param prefix the prefix that the full string may begin with
+///
+/// @return ``NULL`` either argument was ``NULL`` or if ``path`` does not
+///   start with ``prefix``. Otherwise, this returns ``path + strlen(prefix)``
+const char* post_prefix_ptr_(const char* s, const char* prefix);
+
+/// join together fragments of a string into 1 newly allocated string
+char* join_parts_(char sep, const char** parts, int nparts);
+
+
+/// represents the known platform types (that produce different results)
+enum platform_kind {
+  platform_kind_generic_unix,
+  platform_kind_macos,
+  platform_kind_unknown
+};
+
+/// function that returns the appropriate platform enum
+enum platform_kind get_platform_(void);
+
+/// get the Grackle data directory
+char* get_data_dir_(enum platform_kind kind);
+
+#endif /* OS_UTILS_H */
diff --git a/src/example/cxx_example.C b/src/example/cxx_example.C
index f064f669..baa33961 100644
--- a/src/example/cxx_example.C
+++ b/src/example/cxx_example.C
@@ -65,7 +65,10 @@ int main(int argc, char *argv[])
   grackle_data->dust_chemistry = 1;
   grackle_data->metal_cooling = 1;          // metal cooling on
   grackle_data->UVbackground = 1;           // UV background on
-  grackle_data->grackle_data_file = "../../input/CloudyData_UVB=HM2012.h5"; // data file
+
+  // assume that the grdata tool was previously used to fetch data files
+  grackle_data->grackle_data_file_options = GR_DFOPT_MANAGED;
+  grackle_data->grackle_data_file = "CloudyData_UVB=HM2012.h5"; // data file
 
   // Finally, initialize the chemistry object.
   if (initialize_chemistry_data(&my_units) == 0) {
diff --git a/src/include/grackle.h b/src/include/grackle.h
index 3796e787..6893b4b2 100644
--- a/src/include/grackle.h
+++ b/src/include/grackle.h
@@ -27,6 +27,14 @@ extern "C" {
 
 #define GR_SPECIFY_INITIAL_A_VALUE -1
 
+// here, we define the precise values passed to the grackle_data_file_options
+// to specify how data files are handled. The precise values are experimental
+// (passing -1 will always map to the legacy behavior) and may change. But,
+// we will do our best to avoid changing anything.
+#define GR_DFOPT_FULLPATH_NO_CKSUM 1
+#define GR_DFOPT_MANAGED 2
+#define GR_DFOPT_MANAGED_NO_CKSUM 3
+
 extern int grackle_verbose;
 
 extern chemistry_data *grackle_data;
diff --git a/src/include/grackle_chemistry_data.h b/src/include/grackle_chemistry_data.h
index 22ce5007..fece76f2 100644
--- a/src/include/grackle_chemistry_data.h
+++ b/src/include/grackle_chemistry_data.h
@@ -60,6 +60,9 @@ typedef struct
   /* data file containing cooling and UV background tables */
   const char *grackle_data_file;
 
+  /* specifies handling of grackle_data_file */
+  int grackle_data_file_options;
+
   /* Use a CMB temperature floor
      0) no, 1) yes */
   int cmb_temperature_floor;
diff --git a/src/include/grackle_fortran_interface.def b/src/include/grackle_fortran_interface.def
index 6629c69e..b3ecb47a 100644
--- a/src/include/grackle_fortran_interface.def
+++ b/src/include/grackle_fortran_interface.def
@@ -89,6 +89,7 @@ c     This is the fortran definition of grackle_chemistry_data
          INTEGER(C_INT) :: metal_cooling
          INTEGER(C_INT) :: UVbackground
          TYPE(C_PTR)    :: grackle_data_file
+         INTEGER(C_INT) :: grackle_data_file_options
          INTEGER(C_INT) :: cmb_temperature_floor
          REAL(C_DOUBLE) :: Gamma
          INTEGER(C_INT) :: h2_on_dust
@@ -252,4 +253,4 @@ c     The following define the fortran interfaces to the C routines
             IMPORT
             TYPE(grackle_field_data), INTENT(INOUT) :: my_fields
          END FUNCTION gr_initialize_field_data
-      END INTERFACE
\ No newline at end of file
+      END INTERFACE
diff --git a/src/python/examples/cooling_cell.py b/src/python/examples/cooling_cell.py
index 0809d086..cd6355f6 100644
--- a/src/python/examples/cooling_cell.py
+++ b/src/python/examples/cooling_cell.py
@@ -22,13 +22,13 @@
 
 from pygrackle import \
     chemistry_data, \
+    constants, \
     evolve_constant_density, \
     setup_fluid_container
 from pygrackle.utilities.physical_constants import \
     mass_hydrogen_cgs, \
     sec_per_Myr, \
     cm_per_mpc
-from pygrackle.utilities.data_path import grackle_data_dir
 from pygrackle.utilities.model_tests import \
     get_model_set, \
     model_test_format_version
@@ -62,8 +62,10 @@
         my_chemistry.primordial_chemistry = 0
         my_chemistry.metal_cooling = 1
         my_chemistry.UVbackground = 1
-        my_chemistry.grackle_data_file = \
-          os.path.join(grackle_data_dir, "CloudyData_UVB=HM2012.h5")
+        my_chemistry.grackle_data_file = "CloudyData_UVB=HM2012.h5"
+        my_chemistry.grackle_data_file_options = constants.GR_DFOPT_MANAGED
+
+
 
     density = 0.1 * mass_hydrogen_cgs # g /cm^3
     temperature = 1e6 # K
diff --git a/src/python/examples/cooling_rate.py b/src/python/examples/cooling_rate.py
index f3720d9d..346b9383 100644
--- a/src/python/examples/cooling_rate.py
+++ b/src/python/examples/cooling_rate.py
@@ -19,8 +19,8 @@
 
 from pygrackle import \
     chemistry_data, \
+    constants, \
     setup_fluid_container
-from pygrackle.utilities.data_path import grackle_data_dir
 from pygrackle.utilities.physical_constants import \
     mass_hydrogen_cgs, \
     sec_per_Myr, \
@@ -63,8 +63,8 @@
         my_chemistry.UVbackground = 1
         my_chemistry.self_shielding_method = 0
         my_chemistry.H2_self_shielding = 0
-        my_chemistry.grackle_data_file = \
-          os.path.join(grackle_data_dir, "CloudyData_UVB=HM2012.h5")
+        my_chemistry.grackle_data_file = "CloudyData_UVB=HM2012.h5"
+        my_chemistry.grackle_data_file_options = constants.GR_DFOPT_MANAGED
 
         my_chemistry.use_specific_heating_rate = 1
         my_chemistry.use_volumetric_heating_rate = 1
diff --git a/src/python/examples/freefall.py b/src/python/examples/freefall.py
index 81035b61..8b2d3806 100644
--- a/src/python/examples/freefall.py
+++ b/src/python/examples/freefall.py
@@ -18,6 +18,7 @@
 
 from pygrackle import \
     chemistry_data, \
+    constants, \
     evolve_constant_density, \
     evolve_freefall, \
     setup_fluid_container
@@ -25,7 +26,6 @@
     mass_hydrogen_cgs, \
     sec_per_Myr, \
     cm_per_mpc
-from pygrackle.utilities.data_path import grackle_data_dir
 from pygrackle.utilities.model_tests import \
     get_model_set, \
     model_test_format_version
@@ -65,8 +65,8 @@
         my_chemistry.CaseBRecombination = 1
         my_chemistry.cie_cooling = 1
         my_chemistry.h2_optical_depth_approximation = 1
-        my_chemistry.grackle_data_file = os.path.join(
-            grackle_data_dir, "cloudy_metals_2008_3D.h5")
+        my_chemistry.grackle_data_file = "cloudy_metals_2008_3D.h5"
+        my_chemistry.grackle_data_file_options = constants.GR_DFOPT_MANAGED
 
     redshift = 0.
 
diff --git a/src/python/examples/yt_grackle.py b/src/python/examples/yt_grackle.py
index fabe95e3..4f0a8a58 100644
--- a/src/python/examples/yt_grackle.py
+++ b/src/python/examples/yt_grackle.py
@@ -15,8 +15,7 @@
 import sys
 import yt
 
-from pygrackle import add_grackle_fields
-from pygrackle.utilities.data_path import grackle_data_dir
+from pygrackle import add_grackle_fields, constants
 from pygrackle.utilities.model_tests import model_test_format_version
 
 output_name = os.path.basename(__file__[:-3]) # strip off ".py"
@@ -45,9 +44,11 @@
 
     ds = yt.load(ds_path)
 
-    grackle_data_file = os.path.join(grackle_data_dir, "CloudyData_UVB=HM2012.h5")
+    grackle_data_file = "CloudyData_UVB=HM2012.h5"
+    grackle_data_file_options = constants.GR_DFOPT_MANAGED
 
     grackle_pars = {'grackle_data_file': grackle_data_file,
+                    'grackle_data_file_options': grackle_data_file_options,
                     'UVbackground': 1,
                     'h2_on_dust': 1}
 
diff --git a/src/python/pygrackle/__init__.py b/src/python/pygrackle/__init__.py
index e4b96935..e57c8778 100644
--- a/src/python/pygrackle/__init__.py
+++ b/src/python/pygrackle/__init__.py
@@ -18,7 +18,9 @@
     FluidContainer
 
 from .grackle_wrapper import \
-    chemistry_data
+    chemistry_data, \
+    constants
+
 
 from .utilities.convenience import \
     setup_fluid_container
diff --git a/src/python/pygrackle/__main__.py b/src/python/pygrackle/__main__.py
new file mode 100644
index 00000000..3cc0675c
--- /dev/null
+++ b/src/python/pygrackle/__main__.py
@@ -0,0 +1,13 @@
+import sys
+
+from .utilities.grdata import main as grdata_main
+from .utilities.data_path import _make_config_pair
+
+def main(args=None):
+    return grdata_main(
+        *_make_config_pair(), prog_name="python -m pygrackle", args=args
+    )
+
+
+if __name__ == '__main__':
+    sys.exit(main())
diff --git a/src/python/pygrackle/file_registry/__init__.py b/src/python/pygrackle/file_registry/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/python/pygrackle/file_registry/file_registry.txt b/src/python/pygrackle/file_registry/file_registry.txt
new file mode 100644
index 00000000..573741f5
--- /dev/null
+++ b/src/python/pygrackle/file_registry/file_registry.txt
@@ -0,0 +1,15 @@
+// This is a file registry generated by the grackle data management tool
+// To overwrite this file with an updated copy (assuming that pygrackle is
+// installed), you might invoke:
+//    python -m pygrackle --hash_name sha1 --output <outpath> <dir>
+// in this sample command, you would substitute:
+//    -> ``<outpath>`` with a path to the output file
+//    -> ``<dir>`` with a path to the directory containing all files that are
+//       to be included in the registry
+{"CloudyData_UVB=FG2011.h5", "sha1:5b3423fb5cb96d6f8fae65655e204f1f82a276fa"},
+{"CloudyData_UVB=FG2011_shielded.h5", "sha1:60d13b4632f074fcb295f7adea85843046c0d4ef"},
+{"CloudyData_UVB=HM2012.h5", "sha1:3ae95f71926aa9543964fbd41c5e53a42345c19c"},
+{"CloudyData_UVB=HM2012_high_density.h5", "sha1:6db93abf8cb818975e8d751776328c5dab44d4ee"},
+{"CloudyData_UVB=HM2012_shielded.h5", "sha1:16cab5b5bd0bf5ef87db717dd5e8901be11812c2"},
+{"CloudyData_noUVB.h5", "sha1:55fed7c4bfd10e35d60660ca1adc5ceb411befb2"},
+{"cloudy_metals_2008_3D.h5", "sha1:ade563216d1102e8befab822cbb60c418b130aa1"}
diff --git a/src/python/pygrackle/grackle_defs.pxd b/src/python/pygrackle/grackle_defs.pxd
index 63bdcb69..c359d5a1 100644
--- a/src/python/pygrackle/grackle_defs.pxd
+++ b/src/python/pygrackle/grackle_defs.pxd
@@ -175,6 +175,11 @@ cdef extern from "grackle.h":
     cdef int GRACKLE_FAIL_VALUE "GR_FAIL"
     cdef int GR_SPECIFY_INITIAL_A_VALUE
 
+    # options for grackle_data_field_option
+    cdef int GR_DFOPT_FULLPATH_NO_CKSUM
+    cdef int GR_DFOPT_MANAGED
+    cdef int GR_DFOPT_MANAGED_NO_CKSUM
+
     int local_initialize_chemistry_parameters(c_chemistry_data *my_chemistry)
 
     void set_velocity_units(c_code_units *my_units)
diff --git a/src/python/pygrackle/grackle_wrapper.pyx b/src/python/pygrackle/grackle_wrapper.pyx
index 243d37f2..71b52e42 100644
--- a/src/python/pygrackle/grackle_wrapper.pyx
+++ b/src/python/pygrackle/grackle_wrapper.pyx
@@ -12,6 +12,7 @@
 ########################################################################
 
 import copy
+from types import SimpleNamespace
 from pygrackle.utilities.physical_constants import \
     boltzmann_constant_cgs, \
     mass_hydrogen_cgs
@@ -20,6 +21,18 @@ from libc.limits cimport INT_MAX
 from .grackle_defs cimport *
 import numpy as np
 
+# declare a variable that acts as a namespace for all of Grackle's named
+# constants. The name of this variable is all lowercase in case we ever want to
+# make a module called `constants.pyx`
+_constants_contents = {
+    'GR_FAIL': GRACKLE_FAIL_VALUE,
+    'GR_DFOPT_FULLPATH_NO_CKSUM' : GR_DFOPT_FULLPATH_NO_CKSUM,
+    'GR_DFOPT_MANAGED' : GR_DFOPT_MANAGED,
+    'GR_DFOPT_MANAGED_NO_CKSUM' : GR_DFOPT_MANAGED_NO_CKSUM
+}
+constants = SimpleNamespace(**_constants_contents)
+del _constants_contents
+
 cdef class chemistry_data:
     cdef _wrapped_c_chemistry_data data
     cdef c_chemistry_data_storage rates
diff --git a/src/python/pygrackle/utilities/data_path.py b/src/python/pygrackle/utilities/data_path.py
index c640258c..557418b1 100644
--- a/src/python/pygrackle/utilities/data_path.py
+++ b/src/python/pygrackle/utilities/data_path.py
@@ -11,25 +11,66 @@
 # software.
 ########################################################################
 
+import io
 import os
+import sys
 
 from pygrackle.__config__ import _is_editable_installation
+from pygrackle.grackle_wrapper import get_grackle_version
+from pygrackle.utilities.grdata import (
+    make_config_objects,
+    VersionDataManager,
+    _parse_file_registry,
+)
 from pygrackle.utilities.misc import dirname
 
-grackle_data_dir = os.environ.get("GRACKLE_DATA_DIR")
-if (grackle_data_dir is None) and _is_editable_installation():
-    # Note, this only works with an editable install of pygrackle.
-    _install_dir = dirname(os.path.abspath(__file__), level=5)
-    grackle_data_dir = os.path.join(_install_dir, "input")
-elif (grackle_data_dir is None):
-    raise RuntimeError(
-        "in non-editable pygrackle installations, like this one, "
-        f"grackle_data_dir can only be imported from {__file__} if it is set "
-        "by the GRACKLE_DATA_DIR environment variable"
+# maybe it would be better to export nothing?
+__all__ = ["grackle_data_dir"]
+
+
+def _get_file_registry_contents():
+    if _is_editable_installation():
+        fname = os.path.join(
+            dirname(os.path.abspath(__file__), 2), "file_registry", "file_registry.txt"
+        )
+        if not os.path.isfile(fname):
+            raise RuntimeError(
+                "could not find the file_registry.txt in an editable install."
+            )
+        return fname
+
+    if (sys.version_info.major, sys.version_info.minor) < (3, 9):
+        import importlib_resources as resources
+    else:
+        from importlib import resources
+    ref = resources.files("pygrackle.file_registry") / "file_registry.txt"
+
+    contents = ref.read_text(encoding="utf-8")
+    return io.StringIO(contents)
+
+
+def _make_config_pair(grackle_version=None):
+    if grackle_version is None:
+        grackle_version = get_grackle_version()["version"]
+    return make_config_objects(
+        grackle_version=grackle_version,
+        file_registry_file=_get_file_registry_contents(),
     )
 
-if not os.path.isdir(grackle_data_dir):
-    raise RuntimeError(
-        f"grackle_data_dir not set to a valid directory: {grackle_data_dir}. "
-        "Use the GRACKLE_DATA_DIR environment variable to set path to "
-        "Grackle data.")
+
+_CONFIG_PAIR = _make_config_pair()
+_MANAGER = VersionDataManager.create(*_CONFIG_PAIR)
+
+
+def _fnames_in_registry():
+    # used for testing/debugging
+    return tuple(_parse_file_registry(_CONFIG_PAIR[1].file_registry_file).keys())
+
+
+def _download_all_datafiles():
+    """Download all datafiles if it hasn't been downloaded already."""
+    registry = _parse_file_registry(_CONFIG_PAIR[1].file_registry_file)
+    return _MANAGER.fetch_all(registry)
+
+
+grackle_data_dir = _MANAGER.version_dir
diff --git a/src/python/pygrackle/utilities/grdata.py b/src/python/pygrackle/utilities/grdata.py
new file mode 100644
index 00000000..42036e37
--- /dev/null
+++ b/src/python/pygrackle/utilities/grdata.py
@@ -0,0 +1,1827 @@
+#!/usr/bin/env python3
+
+# A tool for managing grackle data files. (More details provided after imports)
+#
+# This file should be usable as both (i) a part of pygrackle and (ii) a standalone
+# command line tool (when pygrackle IS NOT installed)
+#
+# To support scenario 1, this CAN ONLY use python's built in modules.
+
+import argparse
+import contextlib
+import filecmp
+import hashlib
+import io
+from math import log10
+import os
+import re
+import shutil
+import stat
+import sys
+import traceback
+from typing import IO, NamedTuple, Optional, Union
+import urllib.request
+from urllib.error import URLError, HTTPError
+import warnings
+
+# Down below, we provide a detailed description that serves 3 purposes
+#   1. to act as a description of this files contents for developers
+#   2. to serve as documentation on the website
+#   3. to serve as queryable documentation via the `help` subcommand
+#
+# The text enclosed in triple braces serves 2 purposes:
+# -> it is designed to be anchors used by sphinx to include the documentation.
+# -> while executing the `help` subcommand, anchors of the format
+#    `[[[BEGIN-SECTION:<name>]]]` will be replaced with a section title `<name>`,
+#    and all other anchors are removed.
+
+_EXTENDED_DESCRIPTION = """\
+[[[BEGIN-SECTION:DESCRIPTION]]]
+This is a management system for managing Grackle data files. The command line
+interface provides commands to fetch these data files, list all of the
+available data, and delete the data.
+
+The system stores the data files at a single global location. (Grackle,
+itself, can access files from this location).
+
+The key feature of this system is its support for versioning:
+
+- it is able to support management of sets of datafiles (associated with
+  different grackle versions) where the datafiles have been renamed,
+  modified, or deleted between Grackle versions.
+
+- additionally, the system implements deduplication for the (very common)
+  scenario when the contents of a file are unchanged between grackle
+  versions.
+
+One minor caveat: a given version of this tool is ONLY able to download
+data for the grackle version specified by the ``--version-grackle`` flag
+(i.e. this is the grackle version that the tool ships with). However, it
+does support listing and deleting data associated with other grackle
+versions.
+
+The location of the data is controlled by the ``GRACKLE_DATA_DIR``
+environment variable. When this variable isn't specified, the tool uses
+the operating-system recommendation for user-site-data. This location can
+be queried with the ``getpath`` subcomand.
+[[[END-SECTION:DESCRIPTION]]]
+
+[[[BEGIN-SECTION:MOTIVATION]]]
+Why does this tool exist? Datafiles are required by **ANY** non-trivial
+program (e.g. a simulation-code or python script) that invokes Grackle.
+
+It is instructive to consider the historic experience of an end-user of one of
+these programs. To build Grackle, they would typically clone the git repository
+for Grackle (including the data files). To invoke their program, they would
+manually specify the path to the downloaded data file. Frankly, this doesn't
+seem so bad; the manual intervention is a minor inconvenience, at worst.
+While it would be nice to eliminate the manual intervention, this it doesn't
+seem it warrants development of a special tool.
+
+Indeed, this is all true. Users who like this workflow can continue using it.
+However, this manual management of datafiles becomes problematic in any
+use-case that is marginally more complex. There are 3 considerations worth
+highlighting:
+
+  1. **Portability:** Currently, there is no out-of-the-box approach for
+     any program using Grackle configured to run on one computer to run on
+     another machine without manual intervention.
+
+     - If there are differences in how the machines are set up (e.g. where
+       the data files are placed), the paths to the Grackle data file(s) need
+       to be updated. This is relevant if you want to use a Pygrackle script
+       on a different machine or if you want to use a configuration script to
+       rerun a simulation (involving Grackle) on a different machine.
+
+     - This is particularly noteworthy when it comes to automated testing! For
+       example, before this tool existed, Pygrackle, made some assumptions that
+       it was installed as an editable installation to run some examples. The
+       test-suite of Enzo-E is another example where extra book-keeping is
+       required for all test-problems that invoke Grackle.
+
+  2. **If the Grackle repository isn't present:** This includes the case where
+     a user deletes the repository after installing Grackle. It is more
+     important to consider the case where users are installing programs that use
+     Grackle without downloading the repository (or, even if the repository is
+     downloaded, it is done so without the user's knowledge). This latter case
+     will become increasingly common as we make pygrackle easier to install.
+     This is also plausible for cmake-builds of downstream projects that embed
+     Grackle compilation as part of their build.
+
+  3. **Having multiple Grackle Versions Installed:** This is going to be
+     increasingly common as Pygrackle becomes easier to install. Users have 2
+     existing options in this case: (i) they maintain separate repositories of
+     data files for each version or (ii) they assume that they can just use
+     the newest version of the data-file repository. The latter option, has
+     historically been true (and will probably continue to be true). But, it could
+     conceivably lead to cases where people could unintentionally use a data-file
+     created for a newer version of grackle. (While this likely won't be a
+     problem, users should probably be explicitly aware that they are doing this
+     on the off-chance that problems do arise).
+
+This tool is a first step to addressing these cases.
+
+Currently the tool just works for Pygrackle. There is an ongoing effort to add
+functionality for the Grackle library, itself, to access the files managed by
+this tool.
+[[[END-SECTION:MOTIVATION]]]
+
+[[[BEGIN-SECTION:INTERNALS-OVERVIEW]]]
+We now turn our attention to describing how the internals of the
+management system work.
+
+Fundamentally, the data management system manages a **data store**.
+We will return to that in a moment.
+
+Protocol Version
+++++++++++++++++
+
+This internal logic has an associated protocol-version, (you can query
+this via the ``--version-protocol`` flag). The logic may change between
+protocol versions. The protocol version will change very rarely (if it
+ever changes at all)
+
+Data Directory
+++++++++++++++
+
+This is simply the data directory that includes all grackle data. This path
+is given by the ``GRACKLE_DATA_DIR`` environment variable, if it exists.
+Otherwise it defaults to the operating-system's recommendation for
+user-site-data.
+
+This contains several entries including the:
+
+  - a **user-data** directory. This directory currently isn't used yet, but
+    it is reserved for users to put custom data-files in the future.
+
+  - a **tmp** directory (used by the data-management tool)
+
+  - it sometimes holds a lockfile (used to ensure that multiple instances of
+    this tool aren't running at once)
+
+  - the **data store** directory(ies). This is named
+    ``data-store-v<PROTOCOL-VERSION>`` so that earlier versions of this
+    tool will continue to function if we ever change the protocol. (Each of
+    these directories are completely independent of each other).
+
+Outside of the **user-data** directory, users should not modify/create/delete
+any files within Data Directory (unless the tool instructs them to).
+
+Data Store
+++++++++++
+
+This is where we track the data files managed by this system. This holds a
+directory called **object-store** and 1 or more "version-directories".
+
+The primary-representation of each file is tracked within the ``object-store``
+subdirectory.
+
+- The name of each item in this directory is a unique key. This key is the
+  file’s SHA-1 checksum.
+
+- Git internally tracks objects in a very similar way (they have historically
+  used SHA-1 checksums as unique keys). The chance of an accidental collision
+  in the checksum in a large Git repository is extremely tiny. It was only 10 or
+  12 years after Git was created that the developers started worrying about
+  collisions (and they are primarily concerned with intentional collisions from
+  maclicious actors).
+
+Each version-directory is named after a Grackle version (**NOT** a Pygrackle
+version).
+
+- a given version directory holds data-file references.
+- the references have the contemporaneous name of each of the data-files that
+  was shipped with the Grackle-version that corresponds to the directory's name.
+- each reference is linked to the corresponding file in the ``object-store``.
+
+When a program outside of this tool accesses a data-file, they will **ONLY**
+access the references in the version-directory that shares its name with the
+version of Grackle that the program is linked against.
+
+This tool makes use of references and the ``object-store`` to effectively
+deduplicate data. Whenever this tool deletes a "data-file" reference it will
+also delete the corresponding file from the ``object-store`` if it had no other
+references. We choose to implement references as "hard links" in order to make
+it easy to determine when a file in ``object-store`` has no reference.
+[[[END-SECTION:INTERNALS-OVERVIEW]]]
+
+Sample Directory Structure
+++++++++++++++++++++++++++
+
+Down below, we sketch out what the directory-structure might look like:
+
+[[[BEGIN:DIRECTORY-CARTOON]]]
+GRACKLE_DATA_DIR/
+ ├── data-store-v1/                      # <- the data-store
+ │    ├── 3.3.1-dev/                     # <- a version-dir
+ │    │    ├── CloudyData_UVB=FG2011.h5
+ │    │    ├── ...
+ │    │    └── cloudy_metals_2008_3D.h5
+ │    ├── 3.4.0/                         # <- another version-dir
+ │    │    ├── CloudyData_UVB=FG2011.h5
+ │    │    ├── ...
+ │    │    └── cloudy_metals_2008_3D.h5
+ │    └── object-store/                  # <- the object-store
+ │         ├── ...
+ │         └── ...
+ ├── tmp/                                # <- reserved for scratch-space
+ ├── user-data/                          # <- reserved for user data
+ │    ├── ...
+ │    └── ...
+ └── lockfile                            # <- temporary file
+[[[END:DIRECTORY-CARTOON]]]
+"""
+
+
+# Notes on the file registry
+# --------------------------
+# The file registry refers to a small file that associates a filename with a checksum.
+#
+# In the long term, we plan to support 3 cases involving hashes checksums:
+#
+# 1. have the C layer of Grackle provide the option to directly read in files from the
+#    data-store without being provided a full path
+#     -> this logic is already mostly implemented
+#     -> In this case, we will also need to have access to the file checksum so we can
+#        validate that the correct file is being accessed. (This is mostly to ensure we
+#        don't break other people's results because we make a mistake). The checksum
+#        validation will be performed with a tool like picohash
+#     -> When we do this, we will directly embed the information encoded in the file
+#        registry inside of scikit-build-core (we picked the file format to ensure that
+#        the information is easy to embed in a C file)
+#
+# 2. Continue supporting the functionality (and cli) implemented by this file within
+#    pygrackle
+#
+# 3. Support running using this script as a standalone command-line program.
+#    -> See the end of this file for what that entails
+
+
+# define some constants
+# =====================
+
+# default chunksize used for file operations
+_CHUNKSIZE = 4096
+
+# the name of the subdirectory in a data-store where we handle deduplication
+_OBJECT_STORE_SUBDIR = "object-store"
+
+# Alternative to `None` for specifying that a value wasn't specified. This is primarily
+# used as a default value for an optional command line flag that requires an argument.
+# In that case, a value of _UNSPECIFIED means that the flag wasn't specified while a
+# value of `None` means that the flag doesn't have an associated value.
+_UNSPECIFIED = object()
+
+
+# Version check and define some backports
+# =======================================
+
+if sys.version_info < (3, 6, 1):
+    # 3.6.0 doesn't support all NamedTuple features
+    raise RuntimeError("python 3.6.1 or newer is required")
+
+elif sys.version_info < (3, 7, 0):
+
+    class nullcontext:
+        def __init__(self, enter_result=None):
+            self.enter_result = enter_result
+
+        def __enter__(self):
+            return self.enter_result
+
+        def __exit__(self, *args):
+            pass
+
+else:
+    nullcontext = contextlib.nullcontext
+
+
+class GenericToolError(RuntimeError):
+    pass
+
+
+class ToolConfig(NamedTuple):
+    """Tracks basic information about this tool"""
+
+    grackle_version: str
+    protocol_version: str = "1"
+    checksum_kind: str = "sha1"
+
+
+def _ensure_all_removed(fnames):
+    for fname in fnames:
+        try:
+            os.remove(fname)
+        except FileNotFoundError:
+            continue
+
+
+_MAX_BAR = 160 * "="
+
+
+@contextlib.contextmanager
+def _progress_bar(ncols, total_bytes, *, use_dummy=False):
+    """
+    ContextManager that provides a function used for drawing/updating progress bars
+
+    If the program wasn't excuted from a shell, or the caller want to draw
+    too few columns, the returned function does nothing.
+    """
+    # the main template is '[<progress-bar>] <size>/<size> <unit>'
+    # -> <progress-bar> is some fraction of _FULL_BAR and empty space
+    # -> <size> describes the current/total download size (takes up to 6 characters)
+    # -> <unit> is 1 or 2 characters
+    # -> thus, we need 19 characters for everything other than <progress-bar>
+    bar_len = min(len(_MAX_BAR), ncols - 19)
+
+    if use_dummy or (total_bytes <= 0) or (bar_len <= 0) or not sys.stdout.isatty():
+        use_dummy = True
+
+        def _update(size):
+            return None
+    else:
+        power_div_3 = int(log10(total_bytes)) // 3
+        factor, unit = 1000.0**power_div_3, ("B", "KB", "MB", "GB")[power_div_3]
+        fmt = "\r[{bar:{len}.{nfill}}] {size:.2f}" + f"/{total_bytes/factor:.2f} {unit}"
+
+        def _update(size):
+            nfill = bar_len * int(size / total_bytes)
+            val = fmt.format(bar=_MAX_BAR, len=bar_len, nfill=nfill, size=size / factor)
+            print(val, end="", flush=True)
+
+    try:
+        yield _update
+    finally:
+        # always execute this clause when exiting the context manager. If an exception
+        # caused the exit, it will be re-raised after this clause
+        if not use_dummy:
+            print(flush=True)
+
+
+def _retrieve_url(url, dest, fname, *, use_progress_bar=True, chunksize=_CHUNKSIZE):
+    """
+    download the file from url to dest
+
+    Note
+    ----
+    Online discussion about calling `response.read(chunksize)`, where
+    `response` is the context manager object produced by
+    `url.request.urlopen`, seems to strongly imply that this limits the
+    amount of data read from the http request into memory at a given
+    point in time. However, the documentation seems vague on this point.
+
+    This is unlikely to ever be a problem (the biggest file we need is
+    currently under 10 Megabytes). However, if it does become a problem,
+    we have 2 options:
+      1. we could conditionally fall back to ``curl`` (cli tool) or
+         ``Requests`` (python package) if they are present on the system
+      2. we could craft custom http requests
+    """
+    ncols = shutil.get_terminal_size()[0] - 1
+    req = urllib.request.Request(url)
+    try:
+        with contextlib.ExitStack() as stack:
+            # enter context managers for http-response, progress-bar, & output-file
+            response = stack.enter_context(urllib.request.urlopen(req))
+            total_bytes = int(response.headers.get("Content-Length", -1))
+            update_progress = stack.enter_context(
+                _progress_bar(ncols, total_bytes, use_dummy=not use_progress_bar)
+            )
+            out_file = stack.enter_context(open(dest, "wb"))
+
+            # write downloaded data to a file
+            downloaded_bytes = 0
+            while True:
+                update_progress(downloaded_bytes)
+                block = response.read(chunksize)
+                if not block:
+                    break
+                downloaded_bytes += len(block)
+                out_file.write(block)
+    except HTTPError as e:
+        raise GenericToolError(
+            f"The server couldn't fulfill the request for retrieving {fname} from "
+            f"{url}.\nError code: {e.code}"
+        )
+    except URLError as e:
+        raise GenericToolError(
+            f"The server couldn't be reached while trying to retrieve {fname} from "
+            f"{url}.\nError code: {e.code}"
+        )
+
+
+class Fetcher(NamedTuple):
+    """Encodes information for fetching data files
+
+    Note
+    ----
+    Right now, we always assume that we want to support downloading from
+    GitHub, but in the future, we can also support fetching from a
+    directory
+    """
+
+    base_path: str
+    holds_url: bool
+
+    @classmethod
+    def configure_GitHub_url(cls, data_repository_url, contemporaneous_git_hash):
+        repo_url = data_repository_url
+        repo_version = contemporaneous_git_hash
+        # we could also use the name of a branch (instead of a commit-hash) if we think
+        # that would be better
+        return cls(base_path=f"{repo_url}/raw/{repo_version}/input/", holds_url=True)
+
+    @classmethod
+    def configure_src_dir(cls, dir_path):
+        return cls(base_path=dir_path, holds_url=False)
+
+    def __call__(self, fname, checksum, checksum_kind, dest_dir):
+        """
+        Retrieve the file named ``fname`` to a location dest_dir
+
+        Returns
+        -------
+        full_path: str
+            Upon success, we return the full path of the newly fetched file
+
+        Notes
+        -----
+        We follow the following procedure (inspired by pooch):
+          1. downloads the file to a temporary location
+          2. verifies that the checksum is correct
+          3. move the file to the appropriate destination
+
+        This provides robust behavior if the program is interupted. In
+        principle, we could combine steps 1 and 2. But, there may be some
+        minor benefits to keeping our procedure like this (theoretically,
+        we may be more likely to catch corruption from harddrive hardware
+        errors).
+        """
+        src = os.path.join(self.base_path, fname)
+        dst = os.path.join(dest_dir, fname)
+        _pretty_log(f"-> fetching `{fname}`", indent_all=True)
+        tmp_name = os.path.join(dest_dir, "_tempfile")
+        # tmp_name can safely be removed if it exists (it only exists if this logic
+        # previously crashed or was interupted by SIGKILL)
+        _ensure_all_removed([tmp_name])
+
+        try:
+            if self.holds_url:
+                _retrieve_url(src, tmp_name, fname)
+            else:
+                # copy the file
+                shutil.copyfile(src, tmp_name)
+            if not matches_checksum(tmp_name, checksum_kind, checksum):
+                if matches_checksum(src, checksum_kind, checksum):
+                    raise GenericToolError(
+                        f"while copying from {src}, data may have been corrupted"
+                    )
+                raise GenericToolError(f"{src} does't have the expected checksum")
+            os.rename(tmp_name, dst)
+
+        finally:
+            _ensure_all_removed([tmp_name])
+
+
+class DataStoreConfig(NamedTuple):
+    """Track basic configuration information
+
+    In principle, this information is intended to be a little more
+    flexible and might not be known as early as ToolConfig.
+    """
+
+    data_dir: str
+    store_location: str
+    checksum_kind: str
+    default_fetcher: Fetcher
+    file_registry_file: Union[str, bytes, os.PathLike, IO, None]
+
+    @property
+    def tmp_dir(self):
+        """Used for hardlink test and scratch-space"""
+        return os.path.join(self.data_dir, "tmp")
+
+    @property
+    def user_data_dir(self):
+        """Reserved for user data"""
+        return os.path.join(self.data_dir, "user-data")
+
+
+def _get_platform_data_dir(appname="grackle", system_str=None):
+    """Returns a string specifying the default data directory
+
+    All of these choices are inspired by the API description of the platformdirs python
+    package
+        * we only looked at online documentation:
+          https://platformdirs.readthedocs.io/en/latest/
+        * we have NOT read any source code
+    """
+    if system_str is None:
+        system_str = sys.platform
+    if system_str.startswith("win32"):
+        raise RuntimeError()
+    elif system_str.startswith("darwin"):
+        return os.path.expanduser(f"~/Library/Application Support/{appname}")
+    else:  # assume linux/unix
+        # https://specifications.freedesktop.org/basedir-spec/latest/
+        dflt = "~/.local/share"
+        env_str = os.getenv("XDG_DATA_HOME", default=dflt)
+        if env_str[:1] not in ["~", "/"]:
+            # this is what the specification tells us to do
+            warnings.warn(
+                "ignoring XDG_DATA_HOME because it doesn't hold an " "absolute path"
+            )
+            env_str = dflt
+
+        # now actually infer the absolute path
+        if env_str[0] == "~":
+            if env_str[:2] != "~/":  # for parity with C-version of this function
+                raise RuntimeError(
+                    "can't expand can't expand env-variable, XDG_DATA_HOME when "
+                    "it starts with `~user/` or just contains `~`"
+                )
+            return os.path.expanduser(f"{env_str}/{appname}")
+        else:
+            return f"{env_str}/{appname}"
+
+
+def _get_data_dir():
+    manual_choice = os.getenv("GRACKLE_DATA_DIR", default=None)
+    if (manual_choice is None) or (len(manual_choice) == 0):
+        return _get_platform_data_dir()
+    elif (manual_choice[0] != "~") and (not os.path.isabs(manual_choice)):
+        raise RuntimeError("GRACKLE_DATA_DIR must specify an absolute path")
+    elif manual_choice[0] == "~":
+        if not manual_choice[:2] != "~/":  # for parity with C-version of this function
+            raise RuntimeError(
+                "can't expand can't expand env-variable, GRACKLE_DATA_DIR when "
+                "it starts with `~user/` or just contains `~`"
+            )
+        return os.path.expanduser(manual_choice)
+    else:
+        return manual_choice
+
+
+@contextlib.contextmanager
+def _file_openner(f, mode, **kwargs):
+    """Open a file or pass through an already open file"""
+    if (sys.version_info.major, sys.version_info.minor) < (3, 6):
+        if not isinstance(f, io.IOBase):
+            path = f
+        else:
+            path = None
+    else:
+        try:
+            path = os.fspath(f)
+        except TypeError:
+            path = None
+    if path is None:
+        yield f
+    else:
+        with open(path, mode, **kwargs) as fobj:
+            yield fobj
+
+
+def _parse_file_registry(f):
+    """Read the file registry, as a dict from a text file
+
+    Parameters
+    ----------
+    f : file or str or bytes or ``os.PathLike``
+        Contains the data to be read in
+
+    Notes
+    -----
+    We describe the format below. This format was choosen so that the
+    contents could be injected into a C to be used as a literal.
+
+      * empty lines and lines that start with ``//`` are ignored
+
+      * all other lines should look like ``{"<file-name>", "<hash>"}``
+        and there is allowed to be a trailing comma
+    """
+
+    with _file_openner(f, "r") as file:
+        file_registry = {}
+        for i, line in enumerate(file):  # iterater over lines
+            if (len(line) == 0) or line.isspace() or line.startswith("//"):
+                continue
+            m = re.match(
+                r'^\s*{\s*"(?P<fname>[^"]+)"\s*,\s*"(?P<cksum>[^"]+)"\s*},?\s*', line
+            )
+            if m is None:
+                raise RuntimeError(
+                    f"Something went wrong with parsing line {i+1} of {f}:\n "
+                    f"  `{line}`"
+                )
+            file_registry[m["fname"]] = m["cksum"]
+    return file_registry
+
+
+class LockFileExistsError(FileExistsError):
+    pass
+
+
+class LockFileContext:
+    """Reentrant context manager that creates a "lockfile".
+
+    The context-manager will delete the file when we finish. If the lock
+    already exists, the program will abort with an explanatory error
+    (this ensures that only 1 copy of the program will try to run at a
+    time).
+
+    Examples
+    --------
+    To use this you might invoke:
+
+    >>> dir_lock = LockFileContext("path/to/lockfile")
+    >>> with dir_lock:
+    ...     # do something critical
+
+    This is reentrant in the sense that you can perform something like the
+    following (the real value here is that you can mover internal
+    with-statement inside of functions)
+
+    >>> dir_lock = LockFileContext("path/to/lockfile")
+    >>> with dir_lock:
+    ...     # do something critical
+    ...     with dir_lock:
+    ...         # do something else critical
+    """
+
+    def __init__(self, lock_file_path):
+        self.lock_file_path = lock_file_path
+
+        # the following is always non-negative. It can exceed 1 if the same context
+        # manager is used in nested with-statements
+        self._acquisition_count = 0
+
+    def locked(self):
+        return self._acquisition_count > 0
+
+    def __enter__(self):
+        if self._acquisition_count == 0:
+            # try to acquire the lock (by trying to create the file)
+            try:
+                f = open(self.lock_file_path, "x")
+                f.close()
+            except FileExistsError as err:
+                raise LockFileExistsError(
+                    err.errno,
+                    err.strerror,
+                    err.filename,
+                    getattr(err, "winerror", None),
+                    err.filename2,
+                ) from None
+        else:
+            # this is a nested with-statement, in a process that already owns the lock
+            pass
+
+        self._acquisition_count += 1  # only executed if process owns the lock
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        if exc_type is FileExistsError:
+            return False
+        elif self._acquisition_count <= 0:
+            raise RuntimeError("the contextmanager has a totally invalid state!")
+        elif self._acquisition_count == 1:
+            os.remove(self.lock_file_path)
+        self._acquisition_count -= 1
+        return False  # if an exception triggered the exitting of a context manager,
+        # don't suppress it!
+
+
+def standard_lockfile(data_store_config):
+    return LockFileContext(os.path.join(data_store_config.data_dir, "lockfile"))
+
+
+def calc_checksum(fname, alg_name, *, chunksize=_CHUNKSIZE):
+    """Calculate the checksum for a given fname"""
+    # construct the object to track intermediate state of the checksum
+    # calculation as we stream through the data
+    hash_obj = hashlib.new(alg_name)
+    with _file_openner(fname, "rb") as f:
+        if f is fname:
+            f.seek(0, os.SEEK_SET)
+
+        buffer = bytearray(chunksize)
+        while True:
+            nbytes = f.readinto(buffer)
+            if nbytes == chunksize:
+                hash_obj.update(buffer)
+            elif nbytes:  # equivalent to: (nbytes is not None) and (nbytes > 0)
+                hash_obj.update(buffer[:nbytes])
+            else:
+                break
+    return hash_obj.hexdigest()
+
+
+def matches_checksum(fname, alg_name, checksum):
+    return checksum == calc_checksum(fname, alg_name)
+
+
+def _pretty_log(arg, *, indent_all=False):
+    """indent messages so it's clear when multiline messages are a single thought"""
+    lines = arg.splitlines()
+    if len(lines) and not indent_all:
+        formatted = [f"-- {lines[0]}"] + [f"   {e}" for e in lines[1:]]
+    else:
+        formatted = [f"   {e}" for e in lines]
+    print(*formatted, sep="\n")
+
+
+def _ensure_exists(path, content_description):
+    if not os.path.isdir(path):
+        if len(content_description) > 0:
+            _pretty_log(f"creating directory {content_description}\n-> {path}")
+        os.mkdir(path)
+
+
+# to be used with os.chmod to set permissions to prevent mutations of files (you can
+# always delete it if you own it)
+_IMMUTABLE_MODE = stat.S_IREAD | stat.S_IRGRP | stat.S_IROTH
+
+
+class _HardlinkStrat:
+    """
+    Acts as a "namespace" for functions related to our deduplication strategy
+    that uses Hardlinks
+    """
+
+    @staticmethod
+    def is_supported(dirname):
+        """returns whether the OS (and filesystem supports hardlinks)"""
+
+        fnames = [os.path.join(dirname, f"linktest_f{i}.txt") for i in [0, 1]]
+        _ensure_all_removed(fnames)
+
+        try:
+            _contents = "THIS IS SOME TEST DATA"
+            with open(fnames[0], "w") as f:
+                f.write(_contents)
+            os.link(fnames[0], fnames[1])
+            os.remove(fnames[0])
+            with open(fnames[1], "r") as f:
+                support_hardlinks = f.read() == _contents
+        except OSError:
+            support_hardlinks = False
+        finally:
+            _ensure_all_removed(fnames)
+        return support_hardlinks
+
+    @staticmethod
+    def are_linked(fname, fname2):
+        """return whether ``fname`` & ``fname2`` specify paths that are hardlinks"""
+        try:
+            statinfo1 = os.stat(fname, follow_symlinks=False)
+            statinfo2 = os.stat(fname2, follow_symlinks=False)
+        except FileNotFoundError:
+            return False
+        return statinfo1.st_ino == statinfo2.st_ino
+
+    @staticmethod
+    def remove_if_norefs(fname):
+        """
+        Removes the specified file if there are no other references to it.
+
+        Parameters
+        ----------
+        fname : str
+            Path to the file that we are operating on
+
+        Returns
+        -------
+        bool
+            Indicates if any file was removed
+        """
+        statinfo = os.stat(fname, follow_symlinks=False)
+
+        # statinfo.st_nlink == 1 means that the only hardlink is the hardlink of
+        # associated with fname
+        # -> it should be possible for ``os.stat(fname).st_nlink`` to return ``0``
+        if statinfo.st_nlink == 1:
+            os.remove(fname)
+            return True
+        return False
+
+    @staticmethod
+    def deduplicate(full_fname, shared_fname):
+        """
+        Perform logic to ensure that ``full_fname`` and ``shared_fname``
+        are both paths that refer to the same hardlink.
+
+        This handles 3 main cases:
+
+          1. ``full_fname`` and ``shared_fname`` are already hardlinked.
+
+             * Nothing is done.
+
+          2. ``full_fname`` exists and ``shared_fname`` doesn't.
+
+             * A hardlink will be created at ``shared_fname`` that refers
+               to ``full_fname``.
+
+          3. ``full_fname`` and ``shared_fname`` specify existing distinct
+             copies of the same existing file.
+
+             * in this case, ``full_fname`` is deleted and then replaced
+               with a hardlink that refers to ``shared_fname``.
+
+        Parameters
+        ----------
+        full_fname : str
+            Specifies an existing file-path that already exists
+        shared_fname : str
+            Specifies a file-path that may or may not exist. If it does
+            already exist, it will be preserved (in case it is already in
+            use for deduplicating other existing files)
+        """
+        if not os.path.isfile(full_fname):
+            raise FileNotFoundError(full_fname)
+        elif _HardlinkStrat.are_linked(full_fname, shared_fname):
+            pass  # do nothing!
+        elif os.path.isfile(shared_fname):
+            if not filecmp.cmp(full_fname, shared_fname, shallow=False):
+                raise ValueError(
+                    f"`{full_fname}` and `{shared_fname}` specify files that aren't "
+                    "perfect copies"
+                )
+            os.remove(full_fname)
+            os.link(shared_fname, full_fname)
+        else:
+            os.link(full_fname, shared_fname)
+
+
+def _ensure_data_dir_exists(data_store_config):
+    """Creates the data_dir if it doesn't exist
+
+    the data_dir is a directory that contains:
+     -> the data-store directory for data managed by the current protocol version
+     -> (possibly) data-store directories for data managed by other protocol version
+     -> (possibly) a directory called `user-data/` where users can put custom data
+    """
+    _ensure_exists(data_store_config.data_dir, "that will hold all Grackle data")
+
+    # even though it isn't used for anything right now, make the directory that is
+    # reserved for user content
+    _ensure_exists(data_store_config.user_data_dir, "reserved for user-defined data")
+
+    # primarily for testing whether hard-links are supported
+    _ensure_exists(data_store_config.tmp_dir, "reserved for scratch-space")
+
+
+def get_version_dir(tool_config, data_store_config):
+    return os.path.join(data_store_config.store_location, tool_config.grackle_version)
+
+
+def get_object_dir(data_store_config):
+    return os.path.join(data_store_config.store_location, _OBJECT_STORE_SUBDIR)
+
+
+class VersionDataManager(NamedTuple):
+    """
+    Actually manages downloads of files to a directory where the
+    directory and files are associated with single Grackle version.
+
+    Instances of this class support 2 modes of operation:
+      1. The instance manages a data file directory that is part of the
+         larger data-management system. This data-management system may
+         include multiple version directories and data-files are
+         deduplicated.
+      2. The instance manages a data file directory that is completely
+         isolated from any data-management system (i.e. there is no
+         deduplication)
+
+    The first mode is the primary usecase of this class. (The second mode
+    is mostly provided as a convenience)
+
+    Warnings
+    --------
+    This should not be considered part of a public API. The names and
+    existence of all attributes and methods are subject to change
+
+    Notes
+    -----
+    A major motivating factor in the design was providing the capacity
+    to create the necessary directories only when absolutely necessary
+    (i.e. when we are about to download data)
+
+    Some future methods that might be worth implmenting
+
+      * a method to download a single file
+
+      * a method to check the validity of a single Version file (i.e. it
+        ONLY contains files listed in the specified registry, all files
+        match the specified checksum, AND they are all properly linked to
+        a file in the object directory)
+    """
+
+    # Path to output directory, where the file-name matches the name given in the
+    # registry and is known by the associated grackle-version.
+    version_dir: str
+
+    # data_store_config holds a little more information than we actually need
+    # -> we may chooise to redefine this in the future
+    data_store_config: Optional[DataStoreConfig]
+
+    # encodes the configuration (and logic) for fetching the files
+    fetcher: Fetcher
+
+    @classmethod
+    def create(
+        cls,
+        tool_config,
+        data_store_config,
+        *,
+        untracked_dest_dir=None,
+        override_fetcher=None,
+    ):
+        """
+        Create a new instance
+
+        Parameters
+        ----------
+        tool_config : ToolConfig
+        data_store_config : DataStoreConfig
+        untracked_dest_dir : str, optional
+            When specified the constructed object will be used to manage
+            fetched files are placed in the specified
+            directory and no attempt is made to track the file as part of
+            the data-directory.
+        override_fetcher : Fetcher, optional
+            When specified, this fetcher is used in place of the standard
+            default fetcher provided by data_store_config.
+        """
+
+        fetcher = override_fetcher
+        if fetcher is None:
+            fetcher = data_store_config.default_fetcher
+
+        if untracked_dest_dir is None:
+            version_dir = get_version_dir(tool_config, data_store_config)
+        else:
+            version_dir = untracked_dest_dir
+            data_store_config = None
+
+        return cls(
+            version_dir=version_dir,
+            data_store_config=data_store_config,
+            fetcher=fetcher,
+        )
+
+    def manages_untracked_data(self):
+        return self.data_store_config is None
+
+    def _object_dir(self):
+        """
+        If the instance manages data as part of a larger data management
+        system, then this method returns the path to the object directory.
+        This is the directory where checksum names are used as filenames.
+        (Linking with files in this directory is the mechanism used to aid
+        deduplication)
+        """
+        if self.data_store_config is None:
+            return None
+        return get_object_dir(self.data_store_config)
+
+    def _setup_file_system(self):
+        """
+        helper function that ensures that the file system is set up for
+        fetching new files and returns the configured lockfile context
+        manager (it isn't locked yet)
+        """
+        if self.manages_untracked_data():
+            lockfile_ctx = nullcontext()
+        else:
+            _ensure_data_dir_exists(self.data_store_config)
+
+            lockfile_ctx = standard_lockfile(self.data_store_config)
+            with lockfile_ctx:
+                # let's validate we can actually use hardlinks
+                if not hasattr(os, "link"):
+                    raise GenericToolError("operating system doesn't support hardlinks")
+                elif not _HardlinkStrat.is_supported(self.data_store_config.tmp_dir):
+                    raise GenericToolError("The file system does not support hardlinks")
+
+                # a little more set up
+                _ensure_exists(
+                    self.data_store_config.store_location, "that holds the data-store"
+                )
+                _ensure_exists(self._object_dir(), "")
+
+            assert not lockfile_ctx.locked()  # sanity check!
+
+        with lockfile_ctx:
+            _ensure_exists(
+                self.version_dir, "that holds data for current Grackle version"
+            )
+
+        return lockfile_ctx
+
+    def _fetch_file(self, fname, full_checksum_str, *, lockfile_ctx=None):
+        """
+        Helper method to fetch a single file. Provides the full path
+
+        Parameters
+        ----------
+        fname : str
+            The name of the file to be fetched
+        full_checksum_str : str
+            The checksum of the file.
+        lockfile_ctx : LockFileContext, optional
+            When this is None, the calling process doesn't already own the
+            lock for the data-directory (and this function will try to
+            acquire the lock)
+
+        Returns
+        -------
+        any_work : bool
+            Indicates whether any work was actually required to fetch the
+            file. the file was freshly fetched. ``True`` indicates that we
+            actually needed to go get the file, while ``False`` denotes
+            that the file already existed
+        full_fname : str
+            Specifies the absolute path to the file. When a tracked file
+            is fetched, then this is path to the file entry where
+            `os.path.basename(full_fname)` is equal `fname`
+        """
+
+        if lockfile_ctx is None:
+            lockfile_ctx = self._setup_file_system()
+
+        # extract the checksum_kind and string that are stored in the registry
+        if ":" in full_checksum_str:
+            cur_cksum_kind, checksum = full_checksum_str.split(":")
+        else:
+            raise ValueError(
+                f"the checksum for {fname} does not specify the checksum kind"
+            )
+
+        # when tracking files as part of the data file management system, there are
+        # strict requirements on the kind of checksum that is used.
+        # -> This is because the entries in the object directory (used for
+        #    deduplication) use the checksum string (without the algorithm tag) as
+        #    file names.
+        # -> here, we check this requirement
+        if self.manages_untracked_data():
+            req_cksum_kind = None
+        else:
+            req_cksum_kind = self.data_store_config.checksum_kind
+            if cur_cksum_kind != req_cksum_kind:
+                raise ValueError(
+                    "To download a file as part of Grackle's data file management "
+                    "system, we must know the file's checksum computed with the "
+                    f"{req_cksum_kind} checksum algorithm. The provided checksum "
+                    f"for {fname} was computed with the {cur_cksum_kind} algorithm."
+                )
+
+        # now we actually fetch the file (if necessary)
+        with lockfile_ctx:
+            # get the full path to the downloaded file
+            full_fname = os.path.join(self.version_dir, fname)
+
+            # if the file already exists we are done
+            if os.path.exists(full_fname):
+                if not matches_checksum(full_fname, cur_cksum_kind, checksum):
+                    raise RuntimeError(
+                        f"{full_fname} already exists but has the wrong hash"
+                    )
+                # when we're handling tracked data, we could theoretically check whether
+                # this data is properly linked. But I don't think I would want to take
+                # any kind of action if it isn't properly tracked (the most action I
+                # would recommend is providing the user with a warning)
+                return (False, full_fname)
+
+            # download the file
+            fetcher = self.fetcher
+            fetcher(
+                fname,
+                checksum=checksum,
+                checksum_kind=cur_cksum_kind,
+                dest_dir=self.version_dir,
+            )
+            if not self.manages_untracked_data():
+                # by changing permissions, certain platforms (like MacOS) will ask
+                # users "are you sure you want to delete this file" when the ``rm``
+                # command is used:
+                # -> this is desirable for files managed by the grackle data management
+                #    systems (when we don't want users removing individual files)
+                # -> but, we avoid doing this for untracked data files (because it's
+                #    just an annoyance for the end-user)
+                os.chmod(full_fname, _IMMUTABLE_MODE)
+
+            if not self.manages_untracked_data():
+                # handle deduplication (since the data files are part of the larger
+                # grackle data file management system)
+                cksum_fname = os.path.join(self._object_dir(), checksum)
+
+                try:
+                    _HardlinkStrat.deduplicate(full_fname, cksum_fname)
+
+                    # not strictly necessary, but doing this for safety reasons
+                    os.chmod(cksum_fname, _IMMUTABLE_MODE)
+
+                except Exception as err:
+                    # remove full_fname
+                    # -> we don't want users to use it before resolving the issues
+                    # -> We also want to make the errors as reproducible as possible
+                    #    (ideally, rerunning the command should produce the same error
+                    #    if you haven't changed anything)
+                    os.remove(full_fname)
+                    if os.path.is_file(cksum_fname) and not isinstance(err, ValueError):
+                        raise err
+
+                    # this should only happens when full_fname and cksum_fname both
+                    # exist, but aren't perfect matches of each other. Here, we try to
+                    # provide a more informative error message
+                    if not matches_checksum(cksum_fname, req_cksum_kind, checksum):
+                        raise GenericToolError(f"""\
+A file (used for deduplication) that already existed on disk
+   `{cksum_fname}`
+which is probably a version of `{fname}`,
+doesn't have the appropriate {req_cksum_kind} checksum.
+-> expected: {calc_checksum(cksum_fname, req_cksum_kind)}
+-> actual: {checksum}
+-> This implies that the data was corrupted and it needs to be dealt with.
+   To avoid confusion we have deleted the newly downloaded version of
+   `{fname}`
+-> The safest bet is probably to delete the data directory""")
+                    else:
+                        raise GenericToolError(f"""\
+Something bizare (& extremely unlikely) happened:
+-> a previous invocation of this tool appears to have installed a data file
+   with the same checksum as {fname}, but has different contents.
+-> we adopt a similar system to git and the odds for this to organically
+   happen for a small collection of files is truly astronomical!
+-> this is probably a sign that something went wrong. We deleted the newly
+   downloaded version of the file""")
+        return (True, full_fname)
+
+    def fetch_all(self, registry, *, fnames=None):
+        """
+        Ensures that files in the specified registry are downloaded
+
+        Parameters
+        ----------
+        registry : dict
+            maps file names to associated checksums
+        fnames : sequence, optional
+            Optionally specifies a list of files, with corresponding
+            registry entries to fetch. When this is ``None``, all files in
+            the registry are fetched.
+        """
+
+        if fnames is None:
+            fname_cksum_pairs = registry.items()
+        else:
+            for fname in filter(lambda fname: fname not in registry, fnames):
+                raise ValueError(
+                    f"{fname} is not the name of a file with a registry "
+                    "entry. Thus it can't be downloaded.\n\nFiles with "
+                    f"registry entries include: {list(registry.keys())!r}"
+                )
+            fname_cksum_pairs = ((fname, registry[fname]) for fname in fnames)
+
+        # ensure all needed directories exist and fetch the lockfile context manager
+        lockfile_ctx = self._setup_file_system()
+
+        with lockfile_ctx:
+            num_fetched = 0
+            _pretty_log(f"preparing to fetch files from: {self.fetcher.base_path}")
+            for fname, full_checksum_str in fname_cksum_pairs:
+                any_work, _ = self._fetch_file(
+                    fname, full_checksum_str, lockfile_ctx=lockfile_ctx
+                )
+                num_fetched += any_work
+
+        if num_fetched == 0:
+            _pretty_log("-> no files needed to be retrieved", indent_all=True)
+
+
+def fetch_command(args, tool_config, data_store_config):
+    override_fetcher = None
+    if args.from_dir is not None:
+        override_fetcher = Fetcher.configure_src_dir(args.from_dir)
+
+    fnames = None if len(args.fnames) == 0 else args.fnames
+
+    man = VersionDataManager.create(
+        tool_config=tool_config,
+        data_store_config=data_store_config,
+        untracked_dest_dir=args.untracked_dest_dir,
+        override_fetcher=override_fetcher,
+    )
+    registry = _parse_file_registry(data_store_config.file_registry_file)
+    man.fetch_all(registry, fnames=fnames)
+
+
+def _register_fetch_command(subparsers):
+    parser_fetch = subparsers.add_parser(
+        "fetch",
+        help=(
+            "fetch data files if we don't already have the data for the "
+            "associated version of grackle"
+        ),
+    )
+    parser_fetch.add_argument(
+        "fnames",
+        nargs="*",
+        help=(
+            "Optionally specify the names of files that should be fetched. "
+            "Each listed file must have a corresponding entry in the file "
+            "registry used by this tool. If no files are specified, then the "
+            "tool will fetch every known file."
+        ),
+    )
+    parser_fetch.add_argument(
+        "--untracked-dest-dir",
+        default=None,
+        help=(
+            "This flag can be used to instruct to download files to an "
+            "arbitrary directory where the data files will NOT be stored as "
+            "part of the data file management system. This provided as a "
+            "convenience. The specified directory should NOT be located "
+            "inside the grackle data directory. The tool also won't use any "
+            "kind of lock files (thus, it's the user's responsibility to "
+            "ensure that only a single process is modifying the specified "
+            "directory at any given time. This is provided mostly as a "
+            "convenience. Management of the files in this directory is "
+            "outside this tool's scope."
+        ),
+    )
+    parser_fetch.add_argument(
+        "--from-dir",
+        default=None,
+        help=(
+            "optionally specify a path to a directory where we copy the files from "
+            "(instead of downloading them)"
+        ),
+    )
+    parser_fetch.set_defaults(func=fetch_command)
+
+
+def direntry_iter(path, *, ftype="file", mismatch="skip", ignore=None):
+    """
+    Iterate over the contents of a single directory with focus on a
+    particular file type assumption that all.
+
+    Parameters
+    ----------
+    path : str
+        path to the directory
+    ftype : {None, 'file', 'dir'}
+        When not ``None``, the iterator only produces entries for the
+        specified file-type
+    mismatch : {'skip', 'lazy_err', 'eager_err'}
+        Specifies the action to take when this generator encounters an
+        entry in ``path`` that doesn't have the specified type.
+          * ``'skip'`` means to simply skip the entry
+          * ``'lazy_err'`` means that we raise an error
+          * ``'eager_err'`` means that we check for any mismatches and
+            raise an error if any mismatch is encountered and afterwards,
+            we start yielding elements
+    ignore : container of str, optional
+        Optional container of strings that are ignored
+
+    Yields
+    ------
+    pair : tuple of two str
+        The first element is the entry's base filename and the second is the full path
+    """
+
+    def always_true(*args):
+        return True
+
+    if ftype is None:
+        has_ftype = always_true
+    elif ftype == "dir":
+        has_ftype = os.path.isdir
+    elif ftype == "file":
+        has_ftype = os.path.isfile
+    else:
+        raise ValueError("ftype must be None, 'file' or 'dir'")
+
+    if ignore is None:
+        ignore = []
+    elif isinstance(ignore, str):
+        raise TypeError("ignore can't be a string")
+
+    it = map(
+        lambda e: (e, os.path.join(path, e)),
+        filter(lambda e: e not in ignore, os.listdir(path)),
+    )
+    if mismatch == "eager_err":
+        for pair in direntry_iter(path, ftype=ftype, mismatch="lazy_err"):
+            pass
+        yield from it
+    elif mismatch in ["lazy_err", "skip"]:
+        for pair in it:
+            if has_ftype(pair[1]):
+                yield pair
+            elif mismatch == "lazy_err":
+                raise RuntimeError(f"{pair[1]} isn't a {ftype}")
+    else:
+        raise ValueError("mismatch must be 'eager_err', 'lazy_err' or 'skip'")
+
+
+def rm_command(args, tool_config, data_store_config):
+    """Logic for removing files"""
+    if args.vdata is _UNSPECIFIED:
+        # this means that we are removing the whole data store
+        if not args.data_store:
+            raise RuntimeError("SOMETHING WENT HORRIBLY, HORRIBLY WRONG")
+
+        _descr = os.path.basename(data_store_config.store_location)
+        target_path = data_store_config.store_location
+        operation_description = (
+            f"deleting ALL files in the data-store associated with this tool, {_descr}"
+        )
+        if not os.path.isdir(target_path):
+            raise GenericToolError(
+                "intended to recursively delete all contents of the associated "
+                "data-store. But no such directory can be found."
+            )
+
+        fn = shutil.rmtree
+
+    else:
+        if args.vdata is None:
+            target = tool_config.grackle_version
+            _descr = f"associated with this tool (`{tool_config.grackle_version}`)"
+        else:
+            target = args.vdata
+            _descr = f"`{target}`"
+        target_path = os.path.join(data_store_config.store_location, target)
+        operation_description = (
+            f"deleting all data file references for the grackle-version {_descr}. "
+            "Any files for which the reference-count drops to zero will also be "
+            "removed."
+        )
+
+        if not os.path.isdir(target_path):
+            raise GenericToolError(
+                "intended to delete all data-file references for the grackle-version "
+                f"{_descr}, but no such data is tracked in the data-store."
+            )
+
+        def fn(path):
+            object_dir = os.path.join(
+                data_store_config.store_location, _OBJECT_STORE_SUBDIR
+            )
+            if not os.path.isdir(object_dir):
+                raise RuntimeError(
+                    "SOMETHING IS HORRIBLY WRONG!!! THE {object_dir} IS MISSING"
+                )
+
+            # we throw an err if this directory contains some unexpected stuff
+            it = direntry_iter(path, ftype="file", mismatch="eager_err")
+            for name, full_path in it:
+                # get path to corresponding hardlinked file in _OBJECT_STORE_SUBDIR
+                checksum = calc_checksum(full_path, alg_name=tool_config.checksum_kind)
+                cksum_fname = os.path.join(object_dir, checksum)
+                cksum_fname_exists = os.path.isfile(cksum_fname)
+
+                if not cksum_fname_exists:
+                    warnings.warn(
+                        "Something weird has happened. There is no deduplication file "
+                        f"associated with {full_path}"
+                    )
+                os.remove(full_path)
+                if cksum_fname_exists:
+                    _HardlinkStrat.remove_if_norefs(cksum_fname)
+            os.rmdir(path)
+
+    with standard_lockfile(data_store_config):
+        if not args.force:
+            _pretty_log(
+                f"{operation_description}\n"
+                "-> essentially, we are recursively removing\n"
+                f"     `{target_path}`\n"
+                "-> to actually perform this command, pass the --force flag"
+            )
+        else:
+            fn(target_path)
+
+
+def _register_rm_command(subparsers):
+    parser_rm = subparsers.add_parser(
+        "rm", help="remove data associated with a given version"
+    )
+    parser_rm.add_argument(
+        "-f",
+        "--force",
+        action="store_true",
+        help="This option must be present to actually remove things",
+    )
+    rm_spec_grp = parser_rm.add_argument_group(
+        title="Target", description="specifies the target that will be removed"
+    ).add_mutually_exclusive_group(required=True)
+    rm_spec_grp.add_argument(
+        "--data-store", action="store_true", help="remove the full data-store"
+    )
+    rm_spec_grp.add_argument(
+        "--vdata",
+        default=_UNSPECIFIED,
+        nargs="?",
+        help="remove all data associated with the contemporaneous grackle version",
+    )
+    parser_rm.set_defaults(func=rm_command)
+
+
+def lsversions_command(args, tool_config, data_store_config):
+    if not os.path.exists(data_store_config.store_location):
+        print("there is no data")
+    with standard_lockfile(data_store_config):
+        it = direntry_iter(
+            data_store_config.store_location,
+            ftype="dir",
+            mismatch="lazy_err",
+            ignore=[_OBJECT_STORE_SUBDIR],
+        )
+        print(*sorted(pair[0] for pair in it), sep="\n")
+
+
+def _register_lsversions_command(subparsers):
+    parser_ls = subparsers.add_parser("ls-versions", help="list the versions")
+    parser_ls.set_defaults(func=lsversions_command)
+
+
+def getpath_command(args, tool_config, data_store_config):
+    if args.data_dir:
+        print(data_store_config.data_dir)
+    elif args.data_store:
+        print(data_store_config.store_location)
+    else:
+        assert args.vdata is not _UNSPECIFIED  # sanity check!
+        if args.vdata is None:
+            version = tool_config.grackle_version
+        else:
+            version = args.vdata
+        print(os.path.join(data_store_config.store_location, version))
+
+
+def _register_getpath_command(subparsers):
+    parser_getpath = subparsers.add_parser(
+        "getpath",
+        description=(
+            "Provides the expected filesystem location for data. This command "
+            "doesn't care about whether the filesystem location actually exists."
+        ),
+        help="show expected filesystem location for data.",
+    )
+    getpath_spec_grp = parser_getpath.add_argument_group(
+        title="Target",
+        description="specifies the target that we retrieve the path for.",
+    ).add_mutually_exclusive_group(required=True)
+    getpath_spec_grp.add_argument(
+        "--data-dir", action="store_true", help="get path to the data directory"
+    )
+    getpath_spec_grp.add_argument(
+        "--data-store",
+        action="store_true",
+        help="get path to the data-store (for the protocol version used by this tool)",
+    )
+    getpath_spec_grp.add_argument(
+        "--vdata",
+        default=_UNSPECIFIED,
+        nargs="?",
+        help=(
+            "get path to the directory of files-references associated with the "
+            "specified version. This command assumes that the version-data was "
+            "managed by a version of this tool that uses the same protocol version "
+            "as the version returned by --version-protocol. If no version is "
+            "specified, it uses the version associated with the --version-dir flag."
+        ),
+    )
+    parser_getpath.set_defaults(func=getpath_command)
+
+
+def showknownreg_command(args, tool_config, data_store_config):
+    f = data_store_config.file_registry_file
+    if isinstance(f, io.IOBase):
+        lines = f.readlines()
+    else:
+        with open(data_store_config.file_registry_file, "r") as f:
+            lines = f.readlines()
+    contents = [
+        line for line in lines if len(line.strip()) > 0 and not line.startswith("//")
+    ]
+    print(*contents, sep="", end="")
+
+
+def _register_showknownreg_command(subparsers):
+    parser_showknownreg = subparsers.add_parser(
+        "showknownreg",
+        help=(
+            "prints the pre-registered file registry expected by the current version "
+            "of Grackle"
+        ),
+    )
+    parser_showknownreg.set_defaults(func=showknownreg_command)
+
+
+def _fmt_registry_lines(fname_cksum_pairs, hash_alg):
+    length, suffix = len(fname_cksum_pairs), (",\n", "\n")
+    return [
+        f'{{"{fname}", "{hash_alg}:{cksum}"}}{suffix[(i+1) == length]}'
+        for i, (fname, cksum) in enumerate(sorted(fname_cksum_pairs))
+    ]
+
+
+def calcreg_command(args, tool_config, data_store_config):
+    # print the properly file registry information (in the proper format that can be
+    # used to configure newer versions of Grackle
+
+    # we use listdir since we are targetting 3.3, but we set things up so that we could
+    # use os.scandir
+    try:
+        it = direntry_iter(args.path, ftype="file", mismatch="eager_err")
+    except FileNotFoundError:
+        raise ValueError(f"{args.path!r} doesn't specify a directory or file")
+    except NotADirectoryError:
+        it = [(os.path.basename(args.path), args.path)]
+
+    pairs = [(name, calc_checksum(path, args.hash_name)) for name, path in it]
+
+    with contextlib.ExitStack() as stack:
+        if args.output is None:
+            file = sys.stdout
+        else:
+            file = stack.enter_context(open(args.output, "w"))
+
+        '''
+        if file is not None:
+            file.write(f"""\
+// This is a file registry generated by the grackle data management tool
+// To overwrite this file with an updated copy (assuming that pygrackle is
+// installed), you might invoke:
+//    python -m pygrackle --hash_name {args.hash_name} --output <outpath> <dir>
+// in this sample command, you would substitute:
+//    -> ``<outpath>`` with a path to the output file
+//    -> ``<dir>`` with a path to the directory containing all files that are
+//       to be included in the registry
+""")
+        '''
+        print(*_fmt_registry_lines(pairs, args.hash_name), sep="", end="", file=file)
+
+
+def _register_calcreg_command(subparsers):
+    parser_calcregistry = subparsers.add_parser(
+        "calcreg",
+        help=(
+            "prints the file registry (file hash pairs) for a given directory. This "
+            "computed registry can be used to configure future versions of Grackle."
+        ),
+    )
+    parser_calcregistry.add_argument(
+        "-o",
+        "--output",
+        metavar="FILE",
+        help=(
+            "Write the output to a file instead of stdout. The file will include extra "
+            "metadata (as comments)."
+        ),
+    )
+    parser_calcregistry.add_argument(
+        "--hash-name",
+        required=True,
+        metavar="HASH",
+        choices=hashlib.algorithms_guaranteed,
+        help=(
+            "the kind of checksum to compute. Must be one of: "
+            f"{ ', '.join(sorted(hashlib.algorithms_guaranteed))}"
+        ),
+    )
+    parser_calcregistry.add_argument(
+        "path", help="path to the directory containing the files in the registry"
+    )
+    parser_calcregistry.set_defaults(func=calcreg_command)
+
+
+def help_command(*args, **kwargs):
+    # it might be nice to pipe to a pager (specified by PAGER env variable or
+
+    # here is some logic to strip anchors
+    # replace the [[[BEGIN:...]]] & [[[END:...]]] anchors
+    _open, _close = r"\[\[\[", r"\]\]\]"
+    section_start_anchor = re.compile(
+        rf"^{_open}BEGIN-SECTION:([-,_+.! 0-9A-Za-z]+){_close}[ \t]*$"
+    )
+    generic_anchor = re.compile(rf"^{_open}[-:,_+.! 0-9A-Za-z]+{_close}[ \t]*$")
+
+    for line in _EXTENDED_DESCRIPTION.splitlines():
+        m = section_start_anchor.match(line)
+        if m:
+            section_name = m.group(1)
+            print(section_name, len(section_name) * "-", sep="\n")
+        elif generic_anchor.match(line):
+            continue
+        else:
+            print(line)
+
+
+def _register_help_command(subparsers):
+    parser_help = subparsers.add_parser(
+        "help", help="Display detailed help information about this tool"
+    )
+    parser_help.set_defaults(func=help_command)
+
+
+def _add_program_prop_query(parser, flag, value, short_descr):
+    """
+    add a flag to parser to trigger a control flow that:
+    1. shows a fundamental piece of information about the command line program (like a
+        version number or the ``--help`` option)
+    2. then immediately exits the program
+    """
+
+    class _Action(argparse.Action):
+        def __call__(self, *args, **kwargs):
+            print(value)
+            sys.exit(0)
+
+    parser.add_argument(
+        flag,
+        metavar="",
+        action=_Action,
+        nargs=0,
+        help=f"show associated {short_descr} and exit",
+    )
+
+
+def build_parser(tool_config, prog_name):
+    parser = argparse.ArgumentParser(
+        prog=prog_name,
+        description=(
+            "This is a management system for Grackle's data files. Subcommands are "
+            "provided to fetch data files, list all available data, and delete data"
+        ),
+        epilog=f"Invoke `{prog_name} help` to get a detailed overview of the tool",
+    )
+
+    # This is a hidden argument. It is only used for sake of testing (we may remove it
+    # any time in the future)
+    parser.add_argument(
+        "--testing-override-registry-file",
+        help=argparse.SUPPRESS,  # hides the help message
+        default=argparse.SUPPRESS,  # adds no attribute if option wasn't specified
+    )
+    parser.add_argument(
+        "--testing-override-version-grackle",
+        help=argparse.SUPPRESS,  # hides the help message
+        default=argparse.SUPPRESS,  # adds no attribute if option wasn't specified
+    )
+
+    query_l = [
+        ("--version-grackle", "Grackle version", tool_config.grackle_version),
+        (
+            "--version-protocol",
+            "data-store protocol version",
+            tool_config.protocol_version,
+        ),
+        ("--cksum-alg", "name of the checksum algorithm", tool_config.checksum_kind),
+    ]
+    for flag, short_descr, val in query_l:
+        _add_program_prop_query(parser, flag, val, short_descr)
+
+    subparsers = parser.add_subparsers(required=True)
+
+    _register_fetch_command(subparsers)
+    _register_rm_command(subparsers)
+    _register_lsversions_command(subparsers)
+    _register_getpath_command(subparsers)
+    _register_showknownreg_command(subparsers)
+    _register_calcreg_command(subparsers)
+    _register_help_command(subparsers)
+
+    return parser
+
+
+def main(tool_config, data_store_config, prog_name, *, args=None):
+    """
+    Launch the command
+
+    Returns
+    -------
+    int
+        Specified the exit code
+    """
+    parser = build_parser(tool_config, prog_name)
+    args = parser.parse_args(args=args)
+
+    # handle testing overrides
+    if hasattr(args, "testing_override_registry_file"):
+        # _replace makes a copy & in the copy any specified attributes are overridden
+        data_store_config = data_store_config._replace(
+            file_registry_file=args.testing_override_registry_file
+        )
+    if hasattr(args, "testing_override_version_grackle"):
+        # _replace makes a copy & in the copy any specified attributes are overridden
+        tool_config = tool_config._replace(
+            grackle_version=args.testing_override_version_grackle
+        )
+
+    try:
+        args.func(args, tool_config=tool_config, data_store_config=data_store_config)
+    except SystemExit:
+        pass  # this shouldn't come up!
+    except LockFileExistsError as err:
+        lock_file_path = err.filename
+        print(
+            f"""\
+ERROR: The `{lock_file_path}` lock-file already exists.
+-> This probably means that another copy of this tool is currently running.
+-> If you are absolutely sure that's not the case, that probably means that a copy
+   of this tool previously crashed""",
+            file=sys.stderr,
+        )
+        return 78  # https://www.man7.org/linux/man-pages/man3/sysexits.h.3head.html
+    except GenericToolError as err:
+        print(f"ERROR: {err.args[0]}")
+        return 70  # https://www.man7.org/linux/man-pages/man3/sysexits.h.3head.html
+    except BaseException:
+        print("Unexpected error:", file=sys.stderr)
+        traceback.print_exc(file=sys.stderr)
+        return 70  # https://www.man7.org/linux/man-pages/man3/sysexits.h.3head.html
+    else:
+        return 0
+
+
+def _default_data_store_config(tool_config, file_registry_file):
+    """Provides default data configuration"""
+    _REPO_URL = "https://github.com/grackle-project/grackle_data_files/"
+
+    # this is hash that holds the versions of the datafiles from the time when this
+    # version of the file was shipped
+    _CONTEMPORANEOUS_COMMIT_HASH = "9a63dbefeb1410483df0071eefcbff666f40816d"
+
+    # FILE_REGISTRY is in a format that could be injected into a C file as a literal
+    data_dir = _get_data_dir()
+    protocol_version = tool_config.protocol_version
+    return DataStoreConfig(
+        data_dir=data_dir,
+        store_location=os.path.join(data_dir, f"data-store-v{protocol_version}"),
+        default_fetcher=Fetcher.configure_GitHub_url(
+            data_repository_url=_REPO_URL,
+            contemporaneous_git_hash=_CONTEMPORANEOUS_COMMIT_HASH,
+        ),
+        checksum_kind=tool_config.checksum_kind,
+        file_registry_file=file_registry_file,
+    )
+
+
+def make_config_objects(grackle_version, file_registry_file):
+    """Construct the pair of configuration objects used for running the calculation
+
+    Parameters
+    ----------
+    grackle_version : str
+        the version of grackle (NOT pygrackle)
+    file_registry_file : file or str or bytes or ``os.PathLike``
+        Contains the file registry
+    """
+    tool_config = ToolConfig(grackle_version=grackle_version)
+    data_store_config = _default_data_store_config(tool_config, file_registry_file)
+    return tool_config, data_store_config
+
+
+# Here, we define machinery employed when used as a standalone program
+# ====================================================================
+
+# to support installing this file as a standalone program, we will need to introduce the
+# following procedure to the build-system:
+#    - treat this file as a template-file and configure it with CMake's
+#      ``configure_file`` command (or invoke ``configure_file.py`` under the classic
+#      build system) in order to substitute the names enclosed by the "at sign" symbol
+#    - make resulting file executable (and maybe drop the .py suffix)
+#    - install it into the bin directory alongside the grackle libraries
+
+if __name__ == "__main__":
+    _GRACKLE_VERSION = "@_GRDATA_GRACKLE_VERSION@"
+    _FILE_REGISTRY_CONTENTS = """\
+@_GRDATA_FILE_REGISTRY_CONTENTS@
+"""
+
+    def _check_substitution_problems(var_name, var_value):
+        # we use unicode escape sequence, \u0040, that python automatically converts
+        # to the "at sign" to prevent the configure_file.py script (used by Grackle's
+        # Grackle's build-system) from falsely reporting an error
+        if (
+            (var_name in var_value)
+            or ("\u0040" in var_value)
+            or (len(var_value) == 0)
+            or (var_value.isspace())
+        ):
+            raise RuntimeError(
+                "something went wrong when the build-system was configuring the "
+                f"{var_name} variable"
+            )
+
+    _check_substitution_problems("GRACKLE_VERSION", _GRACKLE_VERSION)
+    _check_substitution_problems("FILE_REGISTRY_CONTENTS", _FILE_REGISTRY_CONTENTS)
+
+    _CONFIG_PAIR = make_config_objects(
+        grackle_version=_GRACKLE_VERSION,
+        file_registry_file=io.StringIO(_FILE_REGISTRY_CONTENTS),
+    )
+    sys.exit(main(*_CONFIG_PAIR, prog_name="grdata"))
diff --git a/src/python/pygrackle/utilities/testing.py b/src/python/pygrackle/utilities/testing.py
index 71dae01e..8dbaf963 100644
--- a/src/python/pygrackle/utilities/testing.py
+++ b/src/python/pygrackle/utilities/testing.py
@@ -99,3 +99,53 @@ def ensure_dir(path):
         else:
             raise
     return path
+
+def _fetch_keys(actual, reference, err_msg = ""):
+    # check consistency in dictionary keys
+    refkeys = reference.keys()
+    refkey_set = set(refkeys)
+    mismatch_keys = refkey_set.symmetric_difference(actual.keys())
+
+    if len(mismatch_keys):
+        shared_keys = list(refkey_set.intersection(actual.keys()))
+        extra_ref, extra_actual = [], []
+        for k in mismatch_keys:
+            if k in refkeys:
+                extra_ref.append(k)
+            else:
+                extra_actual.append(k)
+
+        raise AssertionError(
+            "The results are not equal to specified tolerance.\n"
+            f"{err_msg}\n"
+            "There is a keys mismatch. Both results have the keys:\n"
+            f" {shared_keys!r}\n"
+            "Extra Keys:\n"
+            f" actual:    {extra_actual}\n"
+            f" reference: {extra_ref}"
+        )
+    return list(refkeys)
+
+def assert_allequal_arraydict(actual, reference, err_msg=''):
+    """
+    Raises an AssertionError if any contents of the 2 compared mappings of
+    arrays are not EXACTLY equal
+    Parameters
+    ----------
+    actual : mapping
+         A mapping of arrays obtained in a calculation
+    reference : mapping
+         A mapping of reference arrays
+    err_msg : str
+         Custom error message to be printed in case of failure.
+    Note
+    ----
+    A separate function is proposed as part of PR #195 to do approximate
+    equality checks (like np.testing.assert_allclose).
+    """
+    __tracebackhide__ = True  # control pytest traceback depth
+
+    keys = _fetch_keys(actual, reference, err_msg = err_msg)
+    for key in keys:
+        assert_array_equal(actual[key], reference[key], err_msg = err_msg,
+                           strict = True)
diff --git a/src/python/tests/conftest.py b/src/python/tests/conftest.py
new file mode 100644
index 00000000..f3051408
--- /dev/null
+++ b/src/python/tests/conftest.py
@@ -0,0 +1,12 @@
+# define some hook-functions that will customize pytest's behavior
+
+from pygrackle.utilities.data_path import _download_all_datafiles
+
+
+def pytest_sessionstart(session):
+    # this is a hook that is called just before collecting tests and entering
+    # the test loop.
+
+    # All we want to do is make sure that we have all of the data files that we
+    # need downloaded (This might not be the right place to put this logic)
+    _download_all_datafiles()
diff --git a/src/python/tests/test_auto_files.py b/src/python/tests/test_auto_files.py
new file mode 100644
index 00000000..011167f0
--- /dev/null
+++ b/src/python/tests/test_auto_files.py
@@ -0,0 +1,341 @@
+########################################################################
+#
+# Test the API for dynamically accessing fields of chemistry_data
+#
+#
+# Copyright (c) 2013, Enzo/Grackle Development Team.
+#
+# Distributed under the terms of the Enzo Public Licence.
+#
+# The full license is in the file LICENSE, distributed with this
+# software.
+########################################################################
+
+import contextlib
+import io
+import os
+import shutil
+
+import numpy as np
+import pytest
+
+
+from pygrackle import setup_fluid_container, constants
+from pygrackle.utilities.data_path import (
+    _make_config_pair,
+    _fnames_in_registry,
+)
+from pygrackle.utilities.grdata import main
+from pygrackle.utilities.physical_constants import sec_per_Myr
+from pygrackle.utilities.testing import assert_allequal_arraydict, ensure_dir
+
+from test_grdata import (
+    _ENV_VARS,  # holds list of environment variables that affect data dir location
+    modified_env,
+)
+from test_query_units import _setup_generic_chemistry_data
+
+
+# we probably don't have to skip everything
+if not hasattr(os, "putenv"):
+    pytest.skip(
+        "several tests need os.putenv to work properly", allow_module_level=True
+    )
+
+
+# it would be nice to replace the following with test_grdata.CLIApp, but that would
+# definitely take some work
+class DataFileManagementHarness:
+    """
+    This is a wrapper around the cli interface provided by pygrackle.
+
+    This mainly exists to make it easier for us to wrap a standalone script
+    in the future that isn't part of pygrackle
+    """
+
+    def __init__(self, config_pair=None):
+        self.config_pair = config_pair
+        self.fnames_in_registry = _fnames_in_registry()
+
+    def __call__(self, args):
+        """pass in cli args. The exit code and the captured stdout is returned"""
+        if (args is None) or isinstance(args, str) or not isinstance(args[0], str):
+            raise RuntimeError("invalid args sanity check failed!")
+        config_pair = self.config_pair
+        if config_pair is None:
+            config_pair = _make_config_pair()
+        tmp = io.StringIO()
+        with contextlib.redirect_stdout(tmp):
+            exitcode = main(*config_pair, prog_name="python -m pygrackle", args=args)
+        return exitcode, tmp.getvalue().rstrip()
+
+    def version_dir_path(self):
+        rc, current_version_data_path = self(["getpath", "--vdata"])
+        if rc != 0:
+            raise RuntimeError("something went horribly wrong")
+        return current_version_data_path
+
+    def data_dir_path(self):
+        rc, current_version_data_path = self(["getpath", "--data-dir"])
+        if rc != 0:
+            raise RuntimeError("something went horribly wrong")
+        return current_version_data_path
+
+
+# this one is locked environment variables as they are set right now
+_static_GRDATA = DataFileManagementHarness(_make_config_pair())
+# this one will be affected by changes in the environment variable
+_flexible_GRDATA = DataFileManagementHarness()
+
+
+@contextlib.contextmanager
+def tmpversiondir_with_file(input_path, env, fname=None, *, cleanup_on_close=False):
+    """
+    A context manager that sets up a temporary on disk that appears (to
+    the Grackle library), as if the grdata tool set up a data directory
+    (the location is governed by the environment variables specified by
+    env), that contains a single file called ``fname``, which is a copy
+    of the has the file at `input_path`.
+
+    In practice, the data-directory structure may not actually be
+    managaed by the grdata tool. Consequently, some implementation
+    details (e.g. related to deduplication) may not be defined. But,
+    that's ok since the logic in the Grackle library should only care
+    about whether a file (or link) shows up in the version directory.
+
+    Parameters
+    ----------
+    input_path : str
+        the path to the file we will copy
+    env : dict of strs
+        Dictionary holding the new values that we will use
+    fname : Optional, str
+        This is the name of the file as it appears inside of the
+        versiondir. When not specified, this is inferred from
+        input_path
+    """
+    for var, val in env.items():
+        if var not in _ENV_VARS:
+            raise ValueError(f"{var} isn't a known overridable env variable.")
+    if not os.path.isfile(input_path):
+        raise ValueError("input_path must specify a real file")
+
+    with modified_env(env, extra_cleared_variables=_ENV_VARS):
+        try:
+            data_dir = _flexible_GRDATA.data_dir_path()
+            if os.path.isdir(data_dir) and (len(os.listdir(data_dir)) > 0):
+                raise ValueError(
+                    "sanity check: this context manager requires that you specify "
+                    "environment variables that lead to a data directory that doesn't "
+                    "exist yet (or at least is empty)"
+                )
+
+            version_dir = _flexible_GRDATA.version_dir_path()
+            if fname is None:
+                # in the future, we may want to actually invoke GRDATA to make the copy
+                # in this case
+                fname = os.path.basename(input_path)
+
+            ensure_dir(version_dir)
+            full_path = os.path.join(version_dir, fname)
+            shutil.copy(input_path, full_path)
+            yield full_path
+
+        finally:
+            if cleanup_on_close:
+                shutil.rmtree(_flexible_GRDATA.data_dir_path())
+
+
+def _check_valid_datafile_fname(fname):
+    if fname not in _static_GRDATA.fnames_in_registry:
+        pytest.skip(
+            f"test is broken since {fname} is not a datafile distributed "
+            "with the current version Grackle"
+        )
+
+
+@pytest.fixture(scope="function")
+def managed_datafile(request, tmp_path):
+    """
+    A pytest fixture that ensures that a data-directory (and associated
+    environment variables specifying its location) is correctly configured
+    so that Grackle's internal logic can automatically lookup the
+    location of a standard datafile for the duration of a test.
+
+    The standard datafile is "CloudyData_UVB=HM2012.h5".
+
+    For the sake of convenience, this fixture passes provides the full
+    path of the datafile (in that data directory) to the test.
+
+    This operates in 2 modes:
+      1. when `hasattr(request, "param", None)` is `None`, we use the
+         existing data directory (essentially we ignore the tmp_path
+         fixture).
+      2. otherwise, we use the `tmpversiondir_with_file` context manager
+         to temporarily (for the duration of the test) delete any/all
+         environment variables that could control the location of the
+         data-directory and replace it with the environment variable
+         specified by `param.request`.
+         - That environment variable hints at the location of a
+           temporary data directory.
+         - The location of that directory is controlled by the path
+           provided by the pytest's `tmp_path` fixture.
+         - We also copy the standard datafile into the appropriate
+           location within the data directory so that the test can
+           actually read in the data file.
+
+    Note
+    ----
+    If we want to parameterize the actual name of the file, then maybe we should return
+    some kind of factory?
+    """
+
+    fname = "CloudyData_UVB=HM2012.h5"
+    _check_valid_datafile_fname(fname)
+
+    existing_fname_path = os.path.join(_static_GRDATA.version_dir_path(), fname)
+
+    if getattr(request, "param", None) is None:
+        full_path = existing_fname_path
+        yield full_path
+    else:
+        env_var = request.param
+        with tmpversiondir_with_file(
+            input_path=existing_fname_path, env={env_var: str(tmp_path)}
+        ) as full_path:
+            yield full_path
+
+
+def setup_generic_problem(parameter_overrides={}):
+    """set up a really simplistic problem"""
+    chem = _setup_generic_chemistry_data(
+        initial_redshift=2.7, parameter_overrides=parameter_overrides
+    )
+    # the precise details don't really matter here...
+    dt = sec_per_Myr / chem.time_units
+    fc = setup_fluid_container(
+        chem,
+        density=1.67e-24,
+        temperature=np.geomspace(1e3, 1e7, num=11),
+        metal_mass_fraction=0.01,  # kinda arbitrary
+        state="ionized",
+        converge=False,
+    )
+    return fc, dt
+
+
+@pytest.mark.parametrize(
+    "managed_datafile",
+    (
+        [pytest.param(None, id="default-datadir")]
+        + [pytest.param(var, id=f"arbitrary-{var}") for var in _ENV_VARS]
+    ),
+    indirect=True,
+)
+def test_autofile_equivalence(managed_datafile):
+    """
+    A parameterized test that confirms that grackle produces the same
+    exact result (for a generic test problem) when you:
+    - you pass grackle_data_file a full path to the data file
+    - automatic lookup is used to infer the full path (to the same file)
+
+    This test uses a parameterized fixture that may
+    - use the existing data directory variable,
+    - or use a custom environment variable that specifies the location
+      of the data file (in this case, the variable points to a location in
+      a temporary directory, where the datafile has been copied to)
+
+    Essentially, the use of parametrized fixtures let us confirm that
+    Grackle's internal logic searches for the data files in the right
+    locations.
+    """
+
+    full_path = managed_datafile
+    fname = os.path.basename(full_path)
+
+    assert os.path.isfile(full_path)  # sanity check
+
+    # generate a simple test problem
+    fc_ref, dt = setup_generic_problem(
+        parameter_overrides={"grackle_data_file": full_path}
+    )
+    fc_ref.solve_chemistry(dt)
+
+    # rerun the same problem, but now don't use the full path
+    fc_other, _ = setup_generic_problem(
+        parameter_overrides={
+            "grackle_data_file": fname,
+            "grackle_data_file_options": constants.GR_DFOPT_MANAGED,
+        }
+    )
+    fc_other.solve_chemistry(dt)
+    assert_allequal_arraydict(fc_ref, fc_other)
+
+
+def test_autofile_fail_unknown_file():
+    # verify that the autofile machinery properly tells Grackle to abort initialization
+    # when we specify an invalid filename
+    chem = _setup_generic_chemistry_data(
+        initial_redshift=0.0,
+        skip_initialize=True,
+        parameter_overrides={
+            "grackle_data_file": "not-a-file.png",
+            "grackle_data_file_options": constants.GR_DFOPT_MANAGED,
+        },
+    )
+    assert chem.initialize() == constants.GR_FAIL
+
+
+def test_autofile_fail_known_missing_file(tmp_path):
+    # verify that the autofile machinery properly tells Grackle to abort initialization
+    # when we specify a filename known to Grackle but that is missing
+
+    fname_to_copy = "CloudyData_UVB=HM2012.h5"
+    alt_fname = "CloudyData_UVB=FG2011.h5"
+    _check_valid_datafile_fname(fname_to_copy)
+    _check_valid_datafile_fname(alt_fname)
+
+    file_to_copy = os.path.join(_static_GRDATA.version_dir_path(), fname_to_copy)
+
+    with tmpversiondir_with_file(
+        input_path=file_to_copy,
+        env={"GRACKLE_DATA_DIR": str(tmp_path)},
+    ):
+        chem = _setup_generic_chemistry_data(
+            initial_redshift=0.0,
+            skip_initialize=True,
+            parameter_overrides={
+                "grackle_data_file": alt_fname,
+                "grackle_data_file_options": constants.GR_DFOPT_MANAGED,
+            },
+        )
+        assert chem.initialize() == constants.GR_FAIL
+
+
+def test_autofile_fail_bad_checksum(tmp_path):
+    # verify that the autofile machinery properly tells Grackle to abort initialization
+    # when we specify a filename known to Grackle, that exists, but has the wrong
+    # checksum value
+
+    fname_to_copy = "CloudyData_UVB=HM2012.h5"
+    alt_fname = "CloudyData_UVB=FG2011.h5"
+    _check_valid_datafile_fname(fname_to_copy)
+    _check_valid_datafile_fname(alt_fname)
+
+    file_to_copy = os.path.join(_static_GRDATA.version_dir_path(), fname_to_copy)
+
+    # for this test, we intentionally copy a file and give it the wrong name
+    with tmpversiondir_with_file(
+        input_path=file_to_copy,
+        env={"GRACKLE_DATA_DIR": str(tmp_path)},
+        fname=alt_fname,
+    ):
+        chem = _setup_generic_chemistry_data(
+            initial_redshift=0.0,
+            skip_initialize=True,
+            parameter_overrides={
+                "grackle_data_file": alt_fname,
+                "grackle_data_file_options": constants.GR_DFOPT_MANAGED,
+            },
+        )
+        assert chem.initialize() == constants.GR_FAIL
diff --git a/src/python/tests/test_grdata.py b/src/python/tests/test_grdata.py
new file mode 100644
index 00000000..01255530
--- /dev/null
+++ b/src/python/tests/test_grdata.py
@@ -0,0 +1,860 @@
+########################################################################
+#
+# Test the command line tool managing test files
+#
+#
+# Copyright (c) 2013, Enzo/Grackle Development Team.
+#
+# Distributed under the terms of the Enzo Public Licence.
+#
+# The full license is in the file LICENSE, distributed with this
+# software.
+########################################################################
+
+import contextlib
+import hashlib
+import io
+import operator
+import os
+import shutil
+import subprocess
+import sys
+from textwrap import indent
+from typing import Any, NamedTuple
+
+import pytest
+
+# a goal here is to be able to run this test without installing pygrackle!
+# -> in the near future, we will install grdata as a standalone command-line
+#    script and it would really nice to be able to test the command-line script
+#    without installing pygrackle
+# -> when that time comes, we will modify the logic within the cli_app fixture
+# -> currently, we need to include the following import. But, in the future, we
+#    could add a new subcommand to grdata to make it unnecessary
+from pygrackle.utilities.grdata import _parse_file_registry
+
+
+# _ENV_VAR holds the list of environment variables that could affect the
+# location of the data directory
+if sys.platform.startswith("darwin"):
+    _ENV_VARS = ("HOME", "GRACKLE_DATA_DIR")
+else:
+    _ENV_VARS = ("HOME", "GRACKLE_DATA_DIR", "XDG_DATA_HOME")
+
+
+def _ensure_removed(d, key):
+    try:
+        del d[key]
+    except KeyError:
+        pass
+
+
+@contextlib.contextmanager
+def modified_env(new_env_vals, extra_cleared_variables=None):
+    """
+    Temporarily overwrite the environment variables. This is necessary to test C
+    extensions that rely upon the environment variables
+    """
+    if extra_cleared_variables is None:
+        extra_cleared_variables = None
+
+    # record the original values for any variable we will overwrite
+    original_vals = {}
+    try:
+        for var in filter(lambda e: e not in new_env_vals, extra_cleared_variables):
+            original_vals[var] = os.environ.get(var, None)
+            _ensure_removed(os.environ, var)
+
+        for var, new_val in new_env_vals.items():
+            original_vals[var] = os.environ.get(var, None)
+            if new_val is None:
+                _ensure_removed(os.environ, var)
+            else:
+                os.environ[var] = new_val
+
+        yield
+
+    finally:
+        # restore to the initial values
+        for var, val in original_vals.items():
+            if val is None:
+                _ensure_removed(os.environ, var)
+            else:
+                os.environ[var] = val
+
+
+@contextlib.contextmanager
+def custom_datadir(path):
+    """
+    A contextmanager used to put the data directory at an arbitrary location.
+    """
+    clear_env = [var for var in _ENV_VARS if var != "GRACKLE_DATA_DIR"]
+    with modified_env({"GRACKLE_DATA_DIR": path}, clear_env):
+        yield
+
+
+class GRDataExecErr(Exception):
+    pass
+
+
+class CLIApp:
+    """exists to wrap the command-line-interface.
+
+    We use this so that we can eventually test the application when it is
+    configured as a standalone script.
+    """
+
+    def __init__(self, common_args, *, use_function_call=None):
+        self.common_args = common_args
+        self.use_function_call = use_function_call
+
+    def __call__(self, subcommand_args, *, expect_success=None):
+        # have pytest hide certain kinds of noisy tracebacks
+        __tracebackhide__ = operator.methodcaller("errisinstance", GRDataExecErr)
+
+        all_args = self.common_args + subcommand_args
+        if self.use_function_call is None:
+            tmp = subprocess.run(all_args, capture_output=True)
+            returncode = tmp.returncode
+            stdout = tmp.stdout.decode("ascii")
+            stderr = tmp.stderr.decode("ascii")
+        else:
+            fn = self.use_function_call
+            with contextlib.ExitStack() as stack:
+                f_out, f_err = [
+                    stack.enter_context(contextlib.redirect_stdout(io.StringIO())),
+                    stack.enter_context(contextlib.redirect_stderr(io.StringIO())),
+                ]
+                try:
+                    returncode = fn(all_args)
+                except SystemExit as err:
+                    returncode = err.code
+                stdout = f_out.getvalue()
+                stderr = f_err.getvalue()
+
+        expected_result = (
+            (expect_success is None)
+            or (returncode == 0 and expect_success)
+            or (returncode != 0 and not expect_success)
+        )
+        if not expected_result:
+            detail_indent = "   >"
+            msg_lines = [
+                "Invocation of grdata produced an unexpected result:\n",
+                f"  expected: {('failure', 'success')[expect_success]}\n",
+                f"  args: {all_args}\n",
+                "  env:\n",
+            ]
+            for var in _ENV_VARS:
+                msg_lines.append(
+                    f"{detail_indent}{var!r}: {os.environ.get(var,'<unset>')!r}\n"
+                )
+            msg_lines.append(f"  returncode: {tmp.returncode}\n")
+
+            for stream, val in [("stdout", stdout), ("stderr", stderr)]:
+                if val is None or len(val) == 0:
+                    msg_lines.append(f"  {stream}: <N/A>\n")
+                else:
+                    msg_lines += [
+                        f"  {stream}:\n",
+                        indent(val.decode("ascii"), detail_indent),
+                    ]
+            raise GRDataExecErr("".join(msg_lines))
+        return returncode, stdout.rstrip()
+
+    def fetch(
+        self,
+        src_dir=None,
+        *,
+        file_list=None,
+        untracked_dest_dir=None,
+        expect_success=None,
+    ):
+        # a number of tests care about whether the command fails. (In some case we
+        # actually expect it to fail). We return whether or not it was succesful.
+
+        # have pytest hide certain kinds of noisy tracebacks
+        __tracebackhide__ = operator.methodcaller("errisinstance", GRDataExecErr)
+
+        if src_dir is None:
+            subcommand_args = ["fetch"]
+        else:
+            subcommand_args = ["fetch", "--from-dir", src_dir]
+
+        if file_list is not None:
+            subcommand_args += file_list
+
+        if untracked_dest_dir is not None:
+            subcommand_args += ["--untracked-dest-dir", untracked_dest_dir]
+        return self(subcommand_args, expect_success=expect_success)[0] == 0
+
+    def rm_vdata(self, version, *, omit_force=False, expect_success=None):
+        # remove a whole version-directory. returns whether this was successful
+
+        # have pytest hide certain kinds of noisy tracebacks
+        __tracebackhide__ = operator.methodcaller("errisinstance", GRDataExecErr)
+
+        if version is None:
+            # delete the associated version
+            subcommand_args = ["rm", "--force", "--vdata"]
+        elif isinstance(version, str):
+            subcommand_args = ["rm", "--force", "--vdata", version]
+        else:
+            # this particular mistake occurs a surprising amount
+            raise TypeError("version must be None or a str")
+        if omit_force:
+            subcommand_args.remove("--force")
+
+        return self(subcommand_args, expect_success=expect_success)[0] == 0
+
+    def rm_datastore(self, *, omit_force=False, expect_success=None):
+        # remove a whole data-store. returns whether this was successful
+
+        # have pytest hide certain kinds of noisy tracebacks
+        __tracebackhide__ = operator.methodcaller("errisinstance", GRDataExecErr)
+        subcommand_args = ["rm", "--force", "--data-store"]
+        if omit_force:
+            subcommand_args.remove("--force")
+        return self(subcommand_args, expect_success=expect_success)[0] == 0
+
+    def showknownreg(self):
+        # have pytest hide certain kinds of noisy tracebacks
+        __tracebackhide__ = operator.methodcaller("errisinstance", GRDataExecErr)
+        return self(["showknownreg"], expect_success=True)[1]
+
+    def calcreg(self, cksum_alg, dir_path):
+        # have pytest hide certain kinds of noisy tracebacks
+        __tracebackhide__ = operator.methodcaller("errisinstance", GRDataExecErr)
+        return self(
+            ["calcreg", "--hash-name", cksum_alg, dir_path], expect_success=True
+        )[1]
+
+    def cksum_alg(self):
+        # have pytest hide certain kinds of noisy tracebacks
+        __tracebackhide__ = operator.methodcaller("errisinstance", GRDataExecErr)
+        return self(["--cksum-alg"], expect_success=True)[1]
+
+    def version_dir_path(self):
+        __tracebackhide__ = operator.methodcaller("errisinstance", GRDataExecErr)
+        return self(["getpath", "--vdata"], expect_success=True)[1]
+
+    def data_dir_path(self):
+        __tracebackhide__ = operator.methodcaller("errisinstance", GRDataExecErr)
+        return self(["getpath", "--data-dir"], expect_success=True)[1]
+
+
+@pytest.fixture(scope="module")
+def cli_app():
+    if False:
+        assert sys.executable is not None
+        return CLIApp([sys.executable, "-m", "pygrackle"])
+    else:
+        import pygrackle.__main__
+
+        return CLIApp([], use_function_call=pygrackle.__main__.main)
+
+
+_SHASUM_INSTALLED = shutil.which("shasum") is not None
+
+
+def _calc_ref_cksum(contents, cksum_alg):
+    if _SHASUM_INSTALLED:
+        _algs = {"sha1": "1", "sha256": "256"}
+        args = ["shasum", "--algorithm", _algs[cksum_alg], "-"]
+        rslt_str = (
+            subprocess.run(
+                args, input=contents.encode("ascii"), check=True, capture_output=True
+            )
+            .stdout.rstrip()
+            .decode("utf8")
+        )
+        if rslt_str.endswith("  -"):
+            cksum = rslt_str[:-3].lower()
+        else:
+            raise RuntimeError(f"the output of shasum was unexpected: '{rslt_str}'")
+    else:
+        if isinstance(contents, str):
+            contents = contents.encode("ascii")
+        hash_obj = hashlib.new(cksum_alg)
+        hash_obj.update(contents)
+        cksum = hash_obj.hexdigest()
+    return f"{cksum_alg}:{cksum}"
+
+
+class DummyFileSpec(NamedTuple):
+    contents_str: str
+    sha1: str
+    sha256: str
+
+
+def _dummy_file_contents(variant=1, trailing_content=None):
+    assert variant >= 0 and int(variant) == variant
+    newline_str = "\n" * (variant + 1)
+    contents_str = f"I am a test-file.{newline_str}Variant number {variant}\n"
+    if trailing_content is not None:
+        contents_str = contents_str + trailing_content
+    return DummyFileSpec(
+        contents_str,
+        _calc_ref_cksum(contents_str, "sha1"),
+        _calc_ref_cksum(contents_str, "sha256"),
+    )
+
+
+# here we define file-sets. Each fileset is a sequence of pairs specifying a filename
+# and its contents. The idea is to act like these correspond to different grackle
+# versions and make sure we can handle them appropriately
+
+_DUMMY_SET_PRIMARY = (
+    ("file-0.txt", _dummy_file_contents(1)),
+    ("file-1.txt", _dummy_file_contents(2)),
+)
+
+_DUMMY_SET_RENAME = (
+    _DUMMY_SET_PRIMARY[0],
+    ("renamed-file-2.txt", _DUMMY_SET_PRIMARY[1][1]),
+)
+
+# this scenario shouldn't come up in practice (we replaced a file with a different one,
+# of the same name), but we should still handle it properly
+_DUMMY_SET_REPLACE = (
+    _DUMMY_SET_PRIMARY[0],
+    ("file-2.txt", _dummy_file_contents(2, trailing_content="version 2 of file\n")),
+)
+
+
+class FileSetTuple(NamedTuple):
+    """Holds an object for each of the filesets"""
+
+    # corresponds to the primary file-set
+    primary: Any
+    # exactly like the primary file-set, but the 2nd file was renamed
+    rename: Any
+    # exactly like the primary file-set, but the contents of the second file was changed
+    replace: Any
+
+    def get(self, key):
+        if key in self._fields:
+            return getattr(self, key)
+        raise KeyError(key)
+
+
+_DUMMY_SET_TUPLE = FileSetTuple(
+    _DUMMY_SET_PRIMARY, _DUMMY_SET_RENAME, _DUMMY_SET_REPLACE
+)
+
+
+class DummyFileRepository(NamedTuple):
+    test_dir: str  # the path reserved for the user to do stuff in
+
+    # following variables specify properties for primary set of dummy files
+    registry_path: FileSetTuple
+    src_file_dir: FileSetTuple
+
+    def cli_app_with_overrides(self, ref, kind, version_override=None):
+        new_args = ["--testing-override-registry-file", self.registry_path.get(kind)]
+
+        if version_override is not None:
+            if not isinstance(version_override, str):
+                raise TypeError("version_override must be a str")
+            new_args += ["--testing-override-version-grackle", version_override]
+        return CLIApp(
+            ref.common_args + new_args, use_function_call=ref.use_function_call
+        )
+
+
+@pytest.fixture
+def dummy_file_repo(tmp_path, cli_app):
+    test_dir = os.path.join(tmp_path, "test-dir")
+    os.mkdir(test_dir)
+
+    path = os.path.join(tmp_path, "fixture_dir")
+
+    cksum_kind = cli_app.cksum_alg()
+
+    registry_paths, src_file_dirs = {}, {}
+
+    for kind in _DUMMY_SET_TUPLE._fields:
+        file_set = _DUMMY_SET_TUPLE.get(kind)
+
+        registry_path = os.path.join(path, f"{kind}_file_registry.txt")
+        src_file_dir = os.path.join(path, f"{kind}-ref-files")
+        os.makedirs(src_file_dir)
+
+        src_file_dirs[kind] = src_file_dir
+        registry_paths[kind] = registry_path
+
+        pairs = []
+        for i, (fname, file_spec) in enumerate(file_set):
+            full_path = os.path.join(src_file_dir, fname)
+            with open(full_path, "w") as f:
+                f.write(file_spec.contents_str)
+            pairs.append((fname, getattr(file_spec, cksum_kind)))
+        with open(registry_path, "w") as f:
+            print(
+                *["".join(['{"', p[0], '", "', p[1], '"}']) for p in pairs],
+                sep=",\n",
+                file=f,
+            )
+
+    yield DummyFileRepository(
+        test_dir=test_dir,
+        registry_path=FileSetTuple(**registry_paths),
+        src_file_dir=FileSetTuple(**src_file_dirs),
+    )
+
+
+def test_showknownreg(dummy_file_repo, cli_app):
+    # essentially, we are checking that the testing override works
+    app = dummy_file_repo.cli_app_with_overrides(cli_app, "primary")
+    full_registry_str = app.showknownreg().rstrip()
+
+    with open(dummy_file_repo.registry_path.primary, "r") as f:
+        ref_full_registry_str = f.read().rstrip()
+    assert full_registry_str == ref_full_registry_str
+
+
+def test_calcreg(dummy_file_repo, cli_app):
+    for alg in ["sha1", "sha256"]:
+        registry_str = cli_app.calcreg(alg, dummy_file_repo.src_file_dir.primary)
+        registry = _parse_file_registry(io.StringIO(registry_str))
+
+        for i, (fname, cksum) in enumerate(sorted(registry.items())):
+            if not hasattr(_DUMMY_SET_PRIMARY[i][1], alg):
+                raise RuntimeError("This should never happen. Unclear what went wrong")
+            ref = getattr(_DUMMY_SET_PRIMARY[i][1], alg)
+            if cksum != ref:
+                raise AssertionError(
+                    f"calculation of the {alg} checksum for the dummy-file, {fname}, "
+                    "may have revealed an issue in the command line tool's "
+                    "internal checksum logic\n"
+                    f"expected: {ref}\nactual: {cksum}"
+                )
+
+
+def _get_lockfile_path(datadir_path):
+    return os.path.join(datadir_path, "lockfile")
+
+
+def _get_datastore_dir(datadir_path):
+    return os.path.join(datadir_path, "data-store-v1")
+
+
+def _get_version_dir(datadir_path, version):
+    return os.path.join(datadir_path, "data-store-v1", version)
+
+
+def _get_managed_file(datadir_path, version, fname):
+    return os.path.join(datadir_path, "data-store-v1", version, fname)
+
+
+def _dummy_errmsg_writer(msg):
+    return AssertionError(msg)
+
+
+def check_version_data_dir_contents(
+    version_dir_path,
+    file_set,
+    *,
+    exhaustive_file_set=True,
+    errmsg_writer=_dummy_errmsg_writer,
+):
+    __tracebackhide__ = True  # suppress noisy pytest tracebacks
+    for fname, file_spec in file_set:
+        full_path = os.path.join(version_dir_path, fname)
+
+        if not os.path.isfile(full_path):
+            raise errmsg_writer(
+                f"file, {full_path}, doesn't exist after the last invocation of grdata"
+            )
+        with open(full_path, "r") as f:
+            contents = f.read()
+        if contents != file_spec.contents_str:
+            raise errmsg_writer(
+                f"the file, {full_path}, doesn't have the correct contents"
+            )
+
+    if exhaustive_file_set and (len(os.listdir(version_dir_path)) != len(file_set)):
+        raise errmsg_writer(
+            f"the directory, {version_dir_path}, doesn't contain the right number of "
+            "entries."
+        )
+
+
+def check_version_data_dir(
+    datadir_path, version, lockfile_should_exist=False, file_set=None, *, err_msg=None
+):
+    __tracebackhide__ = True  # suppress noisy pytest tracebacks
+    required_paths = [
+        ("data directory", datadir_path),
+        ("user-data directory", _get_version_dir(datadir_path, version)),
+        ("version data directory", os.path.join(datadir_path, "user-data")),
+    ]
+
+    def prep_assertion_err(nominal_err):
+        if err_msg is None:
+            return AssertionError(nominal_err)
+        return AssertionError(f"{err_msg}\n\n{nominal_err}")
+
+    for descr, path in required_paths:
+        if not os.path.isdir(path):
+            raise prep_assertion_err(
+                f"the {descr}, {path} does not exist following an invocation of grdata"
+            )
+
+    lockfile_path = _get_lockfile_path(datadir_path)
+    if lockfile_should_exist and not os.path.isfile(lockfile_path):
+        raise prep_assertion_err(
+            f"the lockfile should exist at {lockfile_path} following "
+            "the invocation of grdata."
+        )
+    elif (not lockfile_should_exist) and os.path.isfile(lockfile_path):
+        raise prep_assertion_err(
+            "a lockfile shouldn't exist after the last invocation of grdata."
+        )
+
+    exhaustive_file_set = True
+    if file_set is None:
+        file_set = []
+        exhaustive_file_set = False
+    check_version_data_dir_contents(
+        version_dir_path=_get_version_dir(datadir_path, version),
+        file_set=file_set,
+        exhaustive_file_set=exhaustive_file_set,
+        errmsg_writer=prep_assertion_err,
+    )
+
+
+def _check_removal(data_dir, version, retains_datastore):
+    version_dir = _get_version_dir(data_dir, version)
+    datastore_dir = _get_datastore_dir(data_dir)
+    if os.path.isdir(version_dir):
+        raise AssertionError(
+            f"after a successful remove operation, the version-dir, {version_dir}, "
+            "shouldn't exist"
+        )
+    elif os.path.isdir(datastore_dir) != retains_datastore:
+        raise AssertionError(
+            f"the data-store directory, {datastore_dir}, should "
+            + ["not", "still"][retains_datastore]
+            + "after the removal operation"
+        )
+
+
+@pytest.mark.parametrize(
+    "rm_approach", ["rm-implicit-vdata", "rm-explicit-vdata", "rm-data-store"]
+)
+def test_fetch_and_remove(dummy_file_repo, rm_approach, cli_app):
+    # in this test, the fetch operation is used to fetch all files in the registry
+    # and we vary the rm approach
+
+    version = "1.0"
+    app = dummy_file_repo.cli_app_with_overrides(cli_app, "primary", version)
+    data_dir = os.path.join(dummy_file_repo.test_dir, "my-data-dir")
+    with custom_datadir(data_dir):
+        # fetch the data
+        app.fetch(dummy_file_repo.src_file_dir.primary, expect_success=True)
+        check_version_data_dir(data_dir, version, file_set=_DUMMY_SET_PRIMARY)
+
+        # confirm that if we call fetch again, that we still consider it a success
+        app.fetch(dummy_file_repo.src_file_dir.primary, expect_success=True)
+        check_version_data_dir(data_dir, version, file_set=_DUMMY_SET_PRIMARY)
+
+        # now, lets remove the version-data
+        # -> currently, we explicitly test a handful of scenarios where we expect things
+        #    to fail. It might be nice to split these cases out into separate tests
+        if "rm-data-store" == rm_approach:
+            rm_method, rm_args, retains_datastore = app.rm_datastore, (), False
+        elif "rm-implicit-vdata" == rm_approach:
+            rm_method, rm_args, retains_datastore = app.rm_vdata, (None,), True
+        elif "rm-explicit-vdata" == rm_approach:
+            rm_method, rm_args, retains_datastore = app.rm_vdata, (version,), True
+
+            # extra scenario worth testing - trying to remove values that don't exist
+            app.rm_vdata(version="9999999999999.0", expect_success=False)
+            check_version_data_dir(data_dir, version, file_set=_DUMMY_SET_PRIMARY)
+        else:
+            raise RuntimeError("unexpected rm_approach")
+
+        # we expect the remove command to report success, when we omit the force flag,
+        # but not actually do anything
+        rm_method(*rm_args, omit_force=True, expect_success=True)
+        check_version_data_dir(data_dir, version, file_set=_DUMMY_SET_PRIMARY)
+
+        # we expect the following case to fail
+        with open(_get_lockfile_path(data_dir), "w") as f:
+            f.write("a dummy lockfile")
+        rm_method(*rm_args, expect_success=False)
+        check_version_data_dir(
+            data_dir, version, lockfile_should_exist=True, file_set=_DUMMY_SET_PRIMARY
+        )
+        os.remove(_get_lockfile_path(data_dir))
+
+        # now we expect it to succeed
+        rm_method(*rm_args, expect_success=True)
+
+        _check_removal(data_dir, version, retains_datastore)
+
+
+@pytest.mark.parametrize(
+    "fetch_subset",
+    [
+        "fetch-single",
+        "fetch-single-then-all",
+        "fetch-all-then-single",
+        "fetch-all-explicit-list",
+    ],
+)
+def test_fetch_subset_and_remove(dummy_file_repo, fetch_subset, cli_app):
+    # in this test, the fetch operation is used to fetch a named subset of files
+    # in the registry and we use a single rm approach
+    version = "1.0"
+    app = dummy_file_repo.cli_app_with_overrides(cli_app, "primary", version)
+    data_dir = os.path.join(dummy_file_repo.test_dir, "my-data-dir")
+    with custom_datadir(data_dir):
+        # fetch the data
+        if fetch_subset == "fetch-single":
+            # call it twice to ensure we consider the operation a success each time
+            for i in range(2):
+                app.fetch(
+                    dummy_file_repo.src_file_dir.primary,
+                    file_list=[_DUMMY_SET_PRIMARY[1][0]],
+                    expect_success=True,
+                )
+                check_version_data_dir(
+                    data_dir, version, file_set=[_DUMMY_SET_PRIMARY[1]]
+                )
+
+        elif fetch_subset == "fetch-single-then-all":
+            app.fetch(
+                dummy_file_repo.src_file_dir.primary,
+                file_list=[_DUMMY_SET_PRIMARY[1][0]],
+                expect_success=True,
+            )
+            check_version_data_dir(data_dir, version, file_set=[_DUMMY_SET_PRIMARY[1]])
+
+            # fetch all data
+            app.fetch(dummy_file_repo.src_file_dir.primary, expect_success=True)
+            check_version_data_dir(data_dir, version, file_set=_DUMMY_SET_PRIMARY)
+
+        elif fetch_subset == "fetch-all-then-single":
+            # fetch all data
+            app.fetch(dummy_file_repo.src_file_dir.primary, expect_success=True)
+            check_version_data_dir(data_dir, version, file_set=_DUMMY_SET_PRIMARY)
+
+            # fetch a single data file
+            app.fetch(
+                dummy_file_repo.src_file_dir.primary,
+                file_list=[_DUMMY_SET_PRIMARY[1][0]],
+                expect_success=True,
+            )
+            check_version_data_dir(data_dir, version, file_set=_DUMMY_SET_PRIMARY)
+
+        elif fetch_subset == "fetch-all-explicit-list":
+            # fetch all data files
+            app.fetch(
+                dummy_file_repo.src_file_dir.primary,
+                file_list=[pair[0] for pair in _DUMMY_SET_PRIMARY],
+                expect_success=True,
+            )
+            check_version_data_dir(data_dir, version, file_set=_DUMMY_SET_PRIMARY)
+
+        else:
+            raise RuntimeError("unexpected fetch_subset")
+
+        app.rm_vdata(None, expect_success=True)
+        _check_removal(data_dir, version, retains_datastore=True)
+
+
+@pytest.mark.parametrize(
+    "src_file_dir_key",
+    [
+        pytest.param("replace", id="cksum-mismatch"),
+        pytest.param("rename", id="no-src-file"),
+    ],
+)
+def test_fetch_fail(src_file_dir_key, dummy_file_repo, cli_app):
+    # here we intentionally use a file registry and a mismatched source-directory (where
+    # files are fetched from). Essentially we want to ensure correct (and graceful)
+    # behavior in 2 failure modes:
+    # 1. somehow the checksum is mismatched. This might happen if a file got corrupted in
+    #    a download OR (more likely) we made an error while creating the registry file.
+    # 2. somwhow the file can't be fetched. This might happen for a range of reasons
+    #    such as internet connectivity issues or server issues. Alternatively it could
+    #    happen if we make a mistake (e.g. update the registry, but forget to upload the
+    #    data file)
+    nominal_version = "1.0"
+    app = dummy_file_repo.cli_app_with_overrides(cli_app, "primary", nominal_version)
+    data_dir = os.path.join(dummy_file_repo.test_dir, "my-data-dir")
+    with custom_datadir(data_dir):
+        success = app.fetch(dummy_file_repo.src_file_dir.get(src_file_dir_key))
+    assert not success
+
+    # the directories should exist, but the file should not be "downloaded". We also
+    # confirm that there isn't a lockfile (i.e. we exit gracefully)
+    check_version_data_dir(data_dir, nominal_version, lockfile_should_exist=False)
+    assert not os.path.isfile(
+        _get_managed_file(data_dir, nominal_version, _DUMMY_SET_REPLACE[1][0])
+    )
+
+
+def test_fetch_fail_locked(dummy_file_repo, cli_app):
+    # confirm that the fetch command will fail if a lockfile exists
+    nominal_version = "1.0"
+    app = dummy_file_repo.cli_app_with_overrides(cli_app, "primary", nominal_version)
+    data_dir = os.path.join(dummy_file_repo.test_dir, "my-data-dir")
+    # let's create a lockfile
+    os.makedirs(data_dir)
+    with open(_get_lockfile_path(data_dir), "w") as f:
+        f.write("a dummy lockfile")
+
+    with custom_datadir(data_dir):
+        success = app.fetch(dummy_file_repo.src_file_dir.primary)
+    assert not success, "Failure is expected when a lockfile exists"
+    if os.path.isdir(_get_version_dir(data_dir, nominal_version)):
+        raise AssertionError(
+            "the tool should not create a version directory when a lock file exists"
+        )
+
+
+def test_fetch_untracked(dummy_file_repo, cli_app):
+    # test the fetch operation is used to fetch files to an untracked directory
+    version = "1.0"
+    app = dummy_file_repo.cli_app_with_overrides(cli_app, "primary", version)
+    data_dir = os.path.join(dummy_file_repo.test_dir, "my-data-dir")
+    with custom_datadir(data_dir):
+        # first we download everything
+        dest_dir_1 = os.path.join(dummy_file_repo.test_dir, "my-untracked-dir-all")
+        for i in range(2):
+            app.fetch(
+                dummy_file_repo.src_file_dir.primary,
+                untracked_dest_dir=dest_dir_1,
+                expect_success=True,
+            )
+            assert not os.path.isfile(data_dir)
+            check_version_data_dir_contents(dest_dir_1, file_set=_DUMMY_SET_PRIMARY)
+
+        # now confirm that we can download a subset
+        dest_dir_2 = os.path.join(dummy_file_repo.test_dir, "my-untracked-dir-single")
+        for i in range(2):
+            app.fetch(
+                dummy_file_repo.src_file_dir.primary,
+                untracked_dest_dir=dest_dir_2,
+                file_list=[_DUMMY_SET_PRIMARY[1][0]],
+                expect_success=True,
+            )
+            assert not os.path.isfile(data_dir)
+            check_version_data_dir_contents(
+                dest_dir_2, file_set=[_DUMMY_SET_PRIMARY[1]]
+            )
+
+
+def is_linked(*paths):
+    if len(paths) < 2:
+        raise TypeError("is_linked() must have at least 2 arguments")
+    try:
+        # if ref has a value of 0, then we can't actually make a meaningful
+        # comparison. There isn't any obvious behavior in this scenario
+        ref = os.stat(paths[0], follow_symlinks=True).st_ino
+        return all(
+            ref == os.stat(path, follow_symlinks=True).st_ino for path in paths[1:]
+        )
+    except FileNotFoundError:
+        return False
+
+
+def test_multiversion(dummy_file_repo, cli_app):
+    # test what happens when we fetch multiple sets of files
+    # - in the future, it might be nice to break this test up into smaller pieces
+
+    # from a realism perspective, primary -> rename -> replace may make more sense, but
+    # the current order seems more likely to catch an error
+    version_kind_map = {"1.0": "primary", "2.0": "replace", "3.0": "rename"}
+    versions = tuple(version_kind_map.keys())
+    app_v1, app_v2, app_v3 = [
+        dummy_file_repo.cli_app_with_overrides(cli_app, kind, version)
+        for version, kind in version_kind_map.items()
+    ]
+    data_dir = os.path.join(dummy_file_repo.test_dir, "my-data-dir")
+
+    def _basic_datastore_check(expected_versions, last_op, last_op_version):
+        err_msg = (
+            f"This check is performed right after performing the `{last_op}` "
+            "operation with grdata, specialized for simulated version "
+            f"{last_op_version} of grackle. (That version is associated with the "
+            f"{version_kind_map[last_op_version]!r} dummy fileset)"
+        )
+
+        for ver in expected_versions:
+            check_version_data_dir(
+                data_dir,
+                ver,
+                file_set=_DUMMY_SET_TUPLE.get(version_kind_map[ver]),
+                err_msg=err_msg,
+            )
+        for ver in version_kind_map.keys():
+            if ver not in expected_versions:
+                ver_dir = _get_version_dir(data_dir, ver)
+                if os.path.isdir(ver_dir):
+                    raise AssertionError(
+                        f"{err_msg}\n\n"
+                        "The version-directorty, {ver_dir}, should not exist!"
+                    )
+
+    with custom_datadir(data_dir):
+        # step 1: load data associated with v1 (the `primary` fileset)
+        app_v1.fetch(dummy_file_repo.src_file_dir.primary, expect_success=True)
+        _basic_datastore_check(versions[:1], "fetch", last_op_version=versions[0])
+
+        # step 2: load data associated with v2 (the `replace` fileset)
+        app_v2.fetch(dummy_file_repo.src_file_dir.replace, expect_success=True)
+        _basic_datastore_check(versions[:2], "fetch", last_op_version=versions[1])
+
+        # confirming linking... (it might be better to check disk-usage and be more
+        # agnostic about deduplication)
+        assert is_linked(
+            _get_managed_file(data_dir, versions[0], _DUMMY_SET_TUPLE.primary[0][0]),
+            _get_managed_file(data_dir, versions[1], _DUMMY_SET_TUPLE.replace[0][0]),
+        ), "the file-0.txt files should all be linked"
+        assert not is_linked(
+            _get_managed_file(data_dir, versions[0], _DUMMY_SET_TUPLE.primary[1][0]),
+            _get_managed_file(data_dir, versions[1], _DUMMY_SET_TUPLE.replace[1][0]),
+        ), "the file-1.txt files are expected to hold different contents"
+
+        # step 3: load data associated with v3 (the `rename` fileset)
+        app_v3.fetch(dummy_file_repo.src_file_dir.rename, expect_success=True)
+        _basic_datastore_check(versions, "fetch", last_op_version=versions[2])
+        # checking linking... (it might be better to check disk-usage and be more
+        # agnostic about the fact that we use linking for deduplication)
+        assert is_linked(
+            _get_managed_file(data_dir, versions[0], _DUMMY_SET_TUPLE.primary[0][0]),
+            _get_managed_file(data_dir, versions[1], _DUMMY_SET_TUPLE.replace[0][0]),
+            _get_managed_file(data_dir, versions[2], _DUMMY_SET_TUPLE.rename[0][0]),
+        ), "the file-0.txt files should all be linked"
+        assert is_linked(
+            _get_managed_file(data_dir, versions[0], _DUMMY_SET_TUPLE.primary[1][0]),
+            _get_managed_file(data_dir, versions[2], _DUMMY_SET_TUPLE.rename[1][0]),
+        ), "the file-1.txt files from primary and rename filesets should be linked"
+        assert not is_linked(
+            _get_managed_file(data_dir, versions[0], _DUMMY_SET_TUPLE.primary[1][0]),
+            _get_managed_file(data_dir, versions[1], _DUMMY_SET_TUPLE.replace[1][0]),
+        ), "file-1.txt file from the replace fileset should not be linked to anything"
+
+        # step 4: remove data associated with v1 (the `primary` fileset)
+        # -> we EXPLICITLY use a different app version to remove this data
+        app_v3.rm_vdata(versions[0], expect_success=True)
+        _basic_datastore_check(
+            [versions[1], versions[2]], "rm-vdata", last_op_version=versions[0]
+        )
+        assert is_linked(
+            _get_managed_file(data_dir, versions[1], _DUMMY_SET_TUPLE.replace[0][0]),
+            _get_managed_file(data_dir, versions[2], _DUMMY_SET_TUPLE.rename[0][0]),
+        ), "remaining file-0.txt files should remain linked"
+        assert not is_linked(
+            _get_managed_file(data_dir, versions[1], _DUMMY_SET_TUPLE.replace[1][0]),
+            _get_managed_file(data_dir, versions[2], _DUMMY_SET_TUPLE.rename[1][0]),
+        ), "remaining file-1.txt files should remain unlinked"
+
+        # step 5: remove data associated with v3 (the `rename` fileset)
+        # -> we EXPLICITLY use a different app version to remove this data
+        app_v2.rm_vdata(versions[2], expect_success=True)
+        _basic_datastore_check([versions[1]], "rm-vdata", last_op_version=versions[2])
diff --git a/src/python/tests/test_query_units.py b/src/python/tests/test_query_units.py
index df5a8a7a..f68ddfb2 100644
--- a/src/python/tests/test_query_units.py
+++ b/src/python/tests/test_query_units.py
@@ -11,6 +11,7 @@
 # software.
 ########################################################################
 
+from collections import ChainMap
 import os
 import numpy as np
 import pytest
@@ -26,20 +27,43 @@
 
 from pygrackle.grackle_wrapper import _query_units
 
-_local_dir = os.path.dirname(os.path.abspath(__file__))
-def _setup_generic_chemistry_data(initial_redshift, current_redshift = None):
-    # construct a generic chemistry_data instance
-    # -> it is ONLY set up for comoving coordinates when current_redshift is
-    #    not None
-    data_file_path = os.sep.join([_local_dir, "..", "..", "..", "input",
-                                  "CloudyData_UVB=HM2012.h5"])
+from testing_common import grackle_data_dir
+
+_UNITS_NAMES = ('density_units', 'time_units', 'length_units', 'a_value',
+                'a_units', 'velocity_units', 'temperature_units')
+
+def _setup_generic_chemistry_data(initial_redshift, current_redshift = None, *,
+                                  skip_initialize = False, parameter_overrides = None):
+    """
+    construct a generic chemistry_data instance
+
+    It is ONLY set up for comoving coordinates when current_redshift is
+    not None
+    """
+
+    defaults = {
+        "use_grackle" : 1,
+        "with_radiative_cooling" : 0,
+        "primordial_chemistry" : 0,
+        "metal_cooling" : 1,
+        "UVbackground" : 1,
+        "grackle_data_file" : os.path.join(grackle_data_dir, "CloudyData_UVB=HM2012.h5")
+    }
+
+    params = ChainMap(
+        {} if parameter_overrides is None else parameter_overrides,
+        defaults
+    )
+
     chem = chemistry_data()
-    chem.use_grackle = 1
-    chem.with_radiative_cooling = 0
-    chem.primordial_chemistry = 0
-    chem.metal_cooling = 1
-    chem.UVbackground = 1
-    chem.grackle_data_file = data_file_path
+    for param_name, value in params.items():
+        if (param_name in _UNITS_NAMES) or (param_name == "comoving_coordinates"):
+            raise ValueError(
+                f"{param_name!r} isn't allowed to be passed an override parameter "
+                "because this function has special handling for initializing "
+                "unit-related parameters")
+        setattr(chem, param_name, value)
+
     if current_redshift is not None:
         set_cosmology_units(chem,
                             current_redshift=current_redshift,
@@ -53,8 +77,11 @@ def _setup_generic_chemistry_data(initial_redshift, current_redshift = None):
         chem.density_units = mass_hydrogen_cgs # rho = 1.0 is 1.67e-24 g
         chem.length_units = cm_per_mpc         # 1 Mpc in cm
         chem.time_units = sec_per_Myr          # 1 Myr in s
-    chem.initialize()
-    return chem
+    if skip_initialize:
+        return chem
+    else:
+        chem.initialize()
+        return chem
 
 
 _UNITS_NAMES = ('density_units', 'time_units', 'length_units', 'a_value',
@@ -82,8 +109,11 @@ def test_query_units(comoving_coordinates, initial_redshift):
         current_redshift = initial_redshift
     else:
         current_redshift = None
-    chem = _setup_generic_chemistry_data(initial_redshift = initial_redshift,
-                                         current_redshift = current_redshift)
+    chem = _setup_generic_chemistry_data(
+        initial_redshift = initial_redshift,
+        current_redshift = current_redshift,
+        parameter_overrides = {"with_radiative_cooling" : 0}
+    )
 
     # retrieve the initial units-related quantities
     units_at_init = _prefetch_units_vals(chem)
@@ -120,8 +150,11 @@ def test_query_units(comoving_coordinates, initial_redshift):
         # for the comoving-case, the returned value should match the physical
         # units at the desired cosmological scale_factor
         expected = _prefetch_units_vals(
-            _setup_generic_chemistry_data(initial_redshift = initial_redshift,
-                                          current_redshift = later_redshift)
+            _setup_generic_chemistry_data(
+                initial_redshift = initial_redshift,
+                current_redshift = later_redshift,
+                parameter_overrides = {"with_radiative_cooling" : 0}
+            )
         )
         for name in _UNITS_NAMES:
             if name in ('time_units', 'a_units'):