Skip to content

Commit

Permalink
Minimal parsing, inferred loc type, fewer commands and less bandwidth
Browse files Browse the repository at this point in the history
  • Loading branch information
benfitzpatrick committed Jun 5, 2024
1 parent 64a17e5 commit 5a16399
Show file tree
Hide file tree
Showing 3 changed files with 89 additions and 68 deletions.
66 changes: 27 additions & 39 deletions metomi/rose/loc_handlers/git.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,51 +69,20 @@ def parse(self, loc, conf_tree):
"""Set loc.real_name, loc.scheme, loc.loc_type.
Within Git we have a lot of trouble figuring out remote
loc_type - a clone is required, unfortunately.
Short commit hashes will not work since there is no remote
rev-parse functionality. Long commit hashes will work if the
uploadpack.allowAnySHA1InWant configuration is set on the
remote repo or server.
Filtering requires uploadpack.allowFilter to be set true on
the remote repo or server.
loc_type - a clone is required, unfortunately. There is a
tradeoff between extra Git commands and bandwidth vs
parse failure behaviour. We have decided to short cut the
loc_type calculation to save commands and bandwidth,
catching failures later.
"""
loc.scheme = self.SCHEMES[0]
remote, path, ref = self._parse_name(loc)
with tempfile.TemporaryDirectory() as tmpdirname:
git_dir_opt = f"--git-dir={tmpdirname}/.git"
self.manager.popen.run_ok(
"git", git_dir_opt, "init"
)

# Make sure we configure for minimum fetching.
if self.git_version < (2, 25, 0):
self.manager.popen.run_ok(
"git", git_dir_opt, "config", "extensions.partialClone",
"true"
)

# Extract the commit hash if we don't already have it.
commithash = self._get_commithash(remote, ref)
# Extract the commit hash if we don't already have it.
commithash = self._get_commithash(remote, ref)

# Fetch the ref/commit as efficiently as we can.
ret_code, _, stderr = self.manager.popen.run(
"git", git_dir_opt, "fetch", "--depth=1",
"--filter=blob:none", remote, commithash
)
if ret_code:
raise ValueError(f"source={loc.name}: {stderr}")

# Determine the type of the path object.
ret_code, typetext, stderr = self.manager.popen.run(
"git", git_dir_opt, "cat-file", "-t", f"{commithash}:{path}"
) # N.B. git versions >1.8 can use '-C' to set git dir.
if ret_code:
raise ValueError(f"source={loc.name}: {stderr}")

if typetext.strip() == "tree":
if path.endswith("/"): # This is a short cut, checked later.
loc.loc_type = loc.TYPE_TREE
else:
loc.loc_type = loc.TYPE_BLOB
Expand All @@ -128,6 +97,9 @@ async def pull(self, loc, conf_tree):
git sparse-checkout is not available below Git 2.25, and seems to omit
contents altogether if set to the root of the repo (as of 2.40.1).
Filtering requires uploadpack.allowFilter to be set true on
the remote repo or server.
"""
if not loc.real_name:
self.parse(loc, conf_tree)
Expand Down Expand Up @@ -156,6 +128,14 @@ async def pull(self, loc, conf_tree):
loc.key
)
name = tmpdirname + "/" + path

# Check that we have inferred the right type from the path name.
real_loc_type = loc.TYPE_TREE if os.path.isdir(name) else loc.TYPE_BLOB
if real_loc_type != loc.loc_type:
raise ValueError(
f"Expected path '{path}' to be type '{loc.loc_type}', "
+ f"but it was '{real_loc_type}'. Check trailing slash."
)
dest = loc.cache
if loc.loc_type == "tree":
dest += "/"
Expand All @@ -167,6 +147,14 @@ def _parse_name(self, loc):
return re.split(self.URI_SEPARATOR, nonscheme, maxsplit=3)

def _get_commithash(self, remote, ref):
"""Get the commit hash given a branch, tag, or commit hash.
Short commit hashes will not resolve since there is no remote
rev-parse functionality. Long commit hashes will work if the
uploadpack.allowAnySHA1InWant configuration is set on the
remote repo or server.
"""
ret_code, info, _ = self.manager.popen.run(
"git", "ls-remote", "--exit-code", remote, ref)
if ret_code:
Expand Down
9 changes: 5 additions & 4 deletions sphinx/api/configuration/file-creation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -35,17 +35,18 @@ root directory to install file targets with a relative path:
branch, or long commit hash to specify the commit at which you want
to extract. These should follow the same semantics as if you git
cloned ``REPOSITORY_URL``, git checkout'ed ``TREEISH``, and extracted
the path ``PATHSPEC`` within the clone. To extract from the root of
the repository use a ``PATHSPEC`` of ``./`` e.g.
the path ``PATHSPEC`` within the clone. ``PATHSPEC`` must end with a
trailing slash (``/``) if it is a directory. To extract from the root
of the repository use a ``PATHSPEC`` of ``./`` e.g.
``git:[email protected]:metomi/rose::./::2.2.0``. It may help to think
of the parts of the location as git:Where::What::When. Examples:

.. code-block:: rose
# Download the sphinx directory from the master branch of
# the github.com/metomi/rose repo.
# the github.com/metomi/rose repo. Note trailing slash.
[file:rose-docs]
source=git:[email protected]:metomi/rose::sphinx::master
source=git:[email protected]:metomi/rose::sphinx/::master
# Extract the whole contents of version 2.0.1 of the local
# repository at /home/user/some/path/to/my/git/repo.
Expand Down
82 changes: 57 additions & 25 deletions t/rose-app-run/28-git.t
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ GIT_WS_PID=${!}
sleep 10
cd $START_PWD
#-------------------------------------------------------------------------------
tests 51
tests 55
#-------------------------------------------------------------------------------
remote_test_modes=("ssh" "http" "local")
remote_locations=("$HOSTNAME:$TEST_DIR/hellorepo/" "http://localhost:$GIT_WS_PORT/cgi-bin/git" "$TEST_DIR/hellorepo/")
Expand All @@ -87,19 +87,19 @@ for i in 0 1 2; do
default=true
[file:hello/fruit_main]
source=git:$remote::fruit::$MAIN_BRANCH
source=git:$remote::fruit/::$MAIN_BRANCH
[file:hello/fruit_tag1]
source=git:$remote::fruit::v1.0
source=git:$remote::fruit/::v1.0
[file:hello/fruit_tag2]
source=git:$remote::fruit::v2.0
source=git:$remote::fruit/::v2.0
[file:hello/fruit_commit1]
source=git:$remote::fruit::$COMMITHASH1
source=git:$remote::fruit/::$COMMITHASH1
[file:hello/fruit_commit2]
source=git:$remote::fruit::$COMMITHASH2
source=git:$remote::fruit/::$COMMITHASH2
[file:hello/tree_main.txt]
source=git:$remote::tree.txt::$MAIN_BRANCH
Expand All @@ -121,31 +121,31 @@ __CONFIG__
run_pass "$TEST_KEY" rose app-run --config=../config -q
find hello -type f | LANG=C sort >'find-hello.out'
file_cmp "$TEST_KEY.found" "find-hello.out" <<__CONTENT__
hello/fruit_commit1/fruit/raspberry.txt
hello/fruit_commit2/fruit/orange.txt
hello/fruit_commit2/fruit/raspberry.txt
hello/fruit_main/fruit/orange.txt
hello/fruit_main/fruit/raspberry.txt
hello/fruit_tag1/fruit/raspberry.txt
hello/fruit_tag2/fruit/orange.txt
hello/fruit_tag2/fruit/raspberry.txt
hello/fruit_commit1/raspberry.txt
hello/fruit_commit2/orange.txt
hello/fruit_commit2/raspberry.txt
hello/fruit_main/orange.txt
hello/fruit_main/raspberry.txt
hello/fruit_tag1/raspberry.txt
hello/fruit_tag2/orange.txt
hello/fruit_tag2/raspberry.txt
hello/tree_branch1.txt
hello/tree_commit1.txt
hello/tree_main.txt
hello/tree_tag1.txt
hello/tree_tag2.txt
__CONTENT__
file_cmp "$TEST_KEY.found_file0" "hello/fruit_commit1/fruit/raspberry.txt" <<__CONTENT__
file_cmp "$TEST_KEY.found_file0" "hello/fruit_commit1/raspberry.txt" <<__CONTENT__
Octavia
__CONTENT__
file_cmp "$TEST_KEY.found_file1" "hello/fruit_commit1/fruit/raspberry.txt" "hello/fruit_commit2/fruit/raspberry.txt"
file_cmp "$TEST_KEY.found_file2" "hello/fruit_commit1/fruit/raspberry.txt" "hello/fruit_main/fruit/raspberry.txt"
file_cmp "$TEST_KEY.found_file3" "hello/fruit_commit1/fruit/raspberry.txt" "hello/fruit_tag1/fruit/raspberry.txt"
file_cmp "$TEST_KEY.found_file4" "hello/fruit_commit2/fruit/orange.txt" <<__CONTENT__
file_cmp "$TEST_KEY.found_file1" "hello/fruit_commit1/raspberry.txt" "hello/fruit_commit2/raspberry.txt"
file_cmp "$TEST_KEY.found_file2" "hello/fruit_commit1/raspberry.txt" "hello/fruit_main/raspberry.txt"
file_cmp "$TEST_KEY.found_file3" "hello/fruit_commit1/raspberry.txt" "hello/fruit_tag1/raspberry.txt"
file_cmp "$TEST_KEY.found_file4" "hello/fruit_commit2/orange.txt" <<__CONTENT__
Clementine
__CONTENT__
file_cmp "$TEST_KEY.found_file5" "hello/fruit_commit2/fruit/orange.txt" "hello/fruit_main/fruit/orange.txt"
file_cmp "$TEST_KEY.found_file6" "hello/fruit_commit2/fruit/orange.txt" "hello/fruit_tag2/fruit/orange.txt"
file_cmp "$TEST_KEY.found_file5" "hello/fruit_commit2/orange.txt" "hello/fruit_main/orange.txt"
file_cmp "$TEST_KEY.found_file6" "hello/fruit_commit2/orange.txt" "hello/fruit_tag2/orange.txt"
file_cmp "$TEST_KEY.found_file7" "hello/tree_commit1.txt" <<__CONTENT__
Holly
__CONTENT__
Expand Down Expand Up @@ -203,11 +203,11 @@ test_init <<__CONFIG__
default=true
[file:hello/fruit_main]
source=git:$TEST_DIR/hellorepo/::fruit::bad_ref
source=git:$TEST_DIR/hellorepo/::fruit/::bad_ref
__CONFIG__
run_fail "$TEST_KEY" rose app-run --config=../config
file_cmp "$TEST_KEY.err" "$TEST_KEY.err" <<__ERROR__
[FAIL] file:hello/fruit_main=source=git:$TEST_DIR/hellorepo/::fruit::bad_ref: ls-remote: could not find ref 'bad_ref' in '$TEST_DIR/hellorepo/'
[FAIL] file:hello/fruit_main=source=git:$TEST_DIR/hellorepo/::fruit/::bad_ref: ls-remote: could not find ref 'bad_ref' in '$TEST_DIR/hellorepo/'
__ERROR__
test_teardown
#-------------------------------------------------------------------------------
Expand All @@ -218,10 +218,42 @@ test_init <<__CONFIG__
default=true
[file:hello/fruit_main]
source=git:$TEST_DIR/hellorepo/::fruit::${COMMITHASH1::7}
source=git:$TEST_DIR/hellorepo/::fruit/::${COMMITHASH1::7}
__CONFIG__
run_fail "$TEST_KEY" rose app-run --config=../config
file_cmp "$TEST_KEY.err" "$TEST_KEY.err" <<__ERROR__
[FAIL] file:hello/fruit_main=source=git:$TEST_DIR/hellorepo/::fruit::${COMMITHASH1::7}: ls-remote: could not find ref '${COMMITHASH1::7}' in '$TEST_DIR/hellorepo/': you may be using an unsupported short commit hash
[FAIL] file:hello/fruit_main=source=git:$TEST_DIR/hellorepo/::fruit/::${COMMITHASH1::7}: ls-remote: could not find ref '${COMMITHASH1::7}' in '$TEST_DIR/hellorepo/': you may be using an unsupported short commit hash
__ERROR__
test_teardown
#-------------------------------------------------------------------------------
TEST_KEY="$TEST_KEY_BASE-bad-path-blob-should-be-tree"
test_setup
test_init <<__CONFIG__
[command]
default=true
[file:hello/fruit_main]
source=git:$TEST_DIR/hellorepo::fruit::$MAIN_BRANCH
__CONFIG__
run_fail "$TEST_KEY" rose app-run --config=../config
file_cmp "$TEST_KEY.err" "$TEST_KEY.err" <<__ERROR__
[FAIL] Expected path 'fruit' to be type 'blob', but it was 'tree'. Check trailing slash.
[FAIL] source: remote:$TEST_DIR/hellorepo ref:$MAIN_BRANCH commit:$COMMITHASH2 path:fruit (git:$TEST_DIR/hellorepo::fruit::$MAIN_BRANCH)
__ERROR__
test_teardown
#-------------------------------------------------------------------------------
TEST_KEY="$TEST_KEY_BASE-bad-path-tree-should-be-blob"
test_setup
test_init <<__CONFIG__
[command]
default=true
[file:hello/fruit_main/txt]
source=git:$TEST_DIR/hellorepo::fruit/orange.txt/::$MAIN_BRANCH
__CONFIG__
run_fail "$TEST_KEY" rose app-run --config=../config
file_cmp "$TEST_KEY.err" "$TEST_KEY.err" <<__ERROR__
[FAIL] Expected path 'fruit/orange.txt/' to be type 'tree', but it was 'blob'. Check trailing slash.
[FAIL] source: remote:$TEST_DIR/hellorepo ref:$MAIN_BRANCH commit:$COMMITHASH2 path:fruit/orange.txt/ (git:$TEST_DIR/hellorepo::fruit/orange.txt/::$MAIN_BRANCH)
__ERROR__
test_teardown

0 comments on commit 5a16399

Please sign in to comment.