From c9a859aa8211e81bdf6df70672abbf91c55c6b3c Mon Sep 17 00:00:00 2001 From: Marc <7050295+marcleblanc2@users.noreply.github.com> Date: Tue, 20 Feb 2024 15:16:01 +0300 Subject: [PATCH] Got it working in customer environment (#14) * Refactoring docker compose files for easier execution and customer PoC usage of repo-converter * Moved running process check to beginning of loop to skip earlier * Better handling and checking of child processes * Deleting abandoned lockfiles * Starting on batch sizes (not finished) * Removing psutils from docker image build to speed up build time * Updating README to match recent changes * Updating cloud-agent to latest * Showing more output for LOG_LEVEL=INFO --- README.md | 37 +- .../docker-compose-common-services.yaml | 4 +- host-ubuntu/docker-compose-override.yaml | 5 + host-ubuntu/docker-compose.yaml | 31 +- host-wsl/docker-compose-override.yaml | 5 + host-wsl/docker-compose.yaml | 4 + .../wsl2-startup.sh | 5 +- repo-converter/build/Dockerfile | 5 +- .../build/docker-compose-override.yaml | 6 + repo-converter/build/docker-compose.yaml | 11 +- repo-converter/build/pull-build-start.sh | 14 + repo-converter/build/requirements.txt | 1 - repo-converter/build/run.py | 482 ++++++++++++------ repo-converter/docker-compose-override.yaml | 5 + repo-converter/docker-compose.yaml | 11 +- repo-converter/pull-start.sh | 13 + .../get-svn-repo-stats.sh | 0 stats/repos.txt | 204 ++++++++ 18 files changed, 621 insertions(+), 222 deletions(-) rename host-wsl2-ubuntu/docker-compose.yaml => config/docker-compose-common-services.yaml (93%) create mode 100644 host-ubuntu/docker-compose-override.yaml create mode 100644 host-wsl/docker-compose-override.yaml create mode 100644 host-wsl/docker-compose.yaml rename {host-wsl2-ubuntu => host-wsl}/wsl2-startup.sh (77%) create mode 100644 repo-converter/build/docker-compose-override.yaml create mode 100644 repo-converter/build/pull-build-start.sh create mode 100644 repo-converter/docker-compose-override.yaml create mode 100644 repo-converter/pull-start.sh rename {svn-repo-stats => stats}/get-svn-repo-stats.sh (100%) create mode 100644 stats/repos.txt diff --git a/README.md b/README.md index 334a080..ac2b267 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ Running src serve-git and the agent together on the same Docker network allows t Docker compose also allows for easier upgrades, troubleshooting, monitoring, logging, flexibility of hosting, etc. than running the binaries directly on the OS. ## Setup - Sourcegraph Staff Only -1. Add the needed entries to the sourcegraphConnect targetGroups list in the Cloud instance's config.yaml, get your PR approved and merged +1. Add the needed entries to the sourcegraphConnect targetGroups list in the Cloud instance's config.yaml, and get your PR approved and merged ```yaml - dnsName: src-serve-git-ubuntu.local listeningAddress: 100.100.100.0 @@ -22,20 +22,25 @@ Docker compose also allows for easier upgrades, troubleshooting, monitoring, log ports: - 443 ``` -2. Run the Reload frontend GitHub Action, as this seems to be needed for the frontend pods to start using tunnel connections +2. Run the "Reload Instance for srcconnect config change" GitHub Action, as many containers need to be restarted to pick up tunnel connection config changes 3. Clone this repo to a customer's bridge VM, install Docker and Docker's Compose plugin 4. Copy the `config.yaml` and `service-account-key.json` files using the instructions on the instance's Cloud Ops dashboard - - Paste them into `./config/cloud-agent-config.yaml` and `./config/cloud-agent-service-account-key.json` -5. Modify the `./config/cloud-agent-config.yaml` file - - `serviceAccountKeyFile: /sourcegraph/cloud-agent-service-account-key.json` so that the Go binary inside the agent container finds this file in the path that's mapped via the docker-compose.yaml files - - Only include the `- dialAddress` entries that this cloud agent instance can reach, remove the others, so the Cloud instance doesn't try using this agent instance for code hosts it can't reach - - Use extra caution when pasting the config.yaml in Windows, as it may use Windows' line endings or extra spaces, which breaks YAML, as a whitespace-dependent format -6. Clone the customer's repos into the `repos-to-serve` directory at the root of this repo on the bridge VM, or update the volume mount path for the src-serve-git service in the docker-compose.yaml file -7. Run `docker compose up -d` -8. Add a Code Host config to the customer's Cloud instance - - Type: src serve-git - - URL: "http://src-serve-git-ubuntu.local:443" - - or - - URL: "http://src-serve-git-wsl.local:443" - - Note that the :443 port may be required, as this seems to default to port 443, even when used with http:// -9. Use the repo-converter to convert SVN, TFVC, or Git repos, to Git format, which will store them in the `repos-to-serve` directory, or use any other means to get the repos into the directory + - Paste them into `./config/cloud-agent-config.yaml` and `./config/cloud-agent-service-account-key.json` +5. Modify the contents of the `./config/cloud-agent-config.yaml` file: + - `serviceAccountKeyFile: /sourcegraph/cloud-agent-service-account-key.json` so that the Go binary inside the agent container finds this file in the path that's mapped via the docker-compose.yaml files + - Only include the `- dialAddress` entries that this cloud agent instance can reach, remove the others, so the Cloud instance doesn't try using this agent instance for code hosts it can't reach + - Use extra caution when pasting the config.yaml in Windows, as it may use Windows' line endings or extra spaces, which breaks YAML, as a whitespace-dependent format +6. Run `docker compose up -d` +7. Add a Code Host config to the customer's Cloud instance + - Type: src serve-git + - `"url": "http://src-serve-git-ubuntu.local:443",` + - or + - `"url": "http://src-serve-git-wsl.local:443",` + - Note the port 443, even when used with http:// +8. Use the repo-converter to convert SVN, TFVC, or Git repos, to Git format, which will store them in the `src-serve-root` directory, or use any other means to get the repos into the directory + - There are docker-compose.yaml and override files in a few different directories in this repo, separated by use case, so that each use case only needs to run `docker compose up -d` in one directory, and not fuss around with `-f` paths. + - The only difference between the docker-compose-override.yaml files in host-ubuntu vs host-wsl is the src-serve-git container's name, which is how we get a separate `dnsName` for each. + - If you're using the repo-converter: + - If you're using the pre-built images, `cd ./repo-converter && docker compose up -d` + - If you're building the Docker images, `cd ./repo-converter/build && docker compose up -d --build` + - Either of these will start all 3 containers: cloud-agent, src-serve-git, and the repo-converter \ No newline at end of file diff --git a/host-wsl2-ubuntu/docker-compose.yaml b/config/docker-compose-common-services.yaml similarity index 93% rename from host-wsl2-ubuntu/docker-compose.yaml rename to config/docker-compose-common-services.yaml index 823abb4..b962235 100644 --- a/host-wsl2-ubuntu/docker-compose.yaml +++ b/config/docker-compose-common-services.yaml @@ -4,7 +4,7 @@ services: cloud-agent: container_name: cloud-agent - image: index.docker.io/sourcegraph/src-tunnel-agent:2024-02-05-22-28-333320ee + image: index.docker.io/sourcegraph/src-tunnel-agent:2024-02-15-04-02-110c2ea9 volumes: - ../config/cloud-agent-service-account-key.json:/sourcegraph/cloud-agent-service-account-key.json:ro - ../config/cloud-agent-config.yaml:/sourcegraph/cloud-agent-config.yaml:ro @@ -15,7 +15,7 @@ services: src-serve-git: # Uses a valid hostname as container_name, to trick the cloud agent and code host config into finding this container on the Docker network - container_name: src-serve-git-wsl.local + container_name: src-serve-git-ubuntu.local image: index.docker.io/sourcegraph/src-cli:latest volumes: - ../src-serve-root/:/sourcegraph/src-serve-root:ro diff --git a/host-ubuntu/docker-compose-override.yaml b/host-ubuntu/docker-compose-override.yaml new file mode 100644 index 0000000..16f493e --- /dev/null +++ b/host-ubuntu/docker-compose-override.yaml @@ -0,0 +1,5 @@ +services: + + src-serve-git: + # Uses a valid hostname as container_name, to trick the cloud agent and code host config into finding this container on the Docker network + container_name: src-serve-git-ubuntu.local diff --git a/host-ubuntu/docker-compose.yaml b/host-ubuntu/docker-compose.yaml index c6b0bbb..922be6c 100644 --- a/host-ubuntu/docker-compose.yaml +++ b/host-ubuntu/docker-compose.yaml @@ -1,27 +1,4 @@ -version: '2.4' - -services: - - cloud-agent: - container_name: cloud-agent - image: index.docker.io/sourcegraph/src-tunnel-agent:2024-02-05-22-28-333320ee - volumes: - - ../config/cloud-agent-service-account-key.json:/sourcegraph/cloud-agent-service-account-key.json:ro - - ../config/cloud-agent-config.yaml:/sourcegraph/cloud-agent-config.yaml:ro - command: ["-config=/sourcegraph/cloud-agent-config.yaml"] - restart: always - networks: - - sourcegraph - - src-serve-git: - # Uses a valid hostname as container_name, to trick the cloud agent and code host config into finding this container on the Docker network - container_name: src-serve-git-ubuntu.local - image: index.docker.io/sourcegraph/src-cli:latest - volumes: - - ../src-serve-root/:/sourcegraph/src-serve-root:ro - command: "serve-git -addr :443 /sourcegraph/src-serve-root" - networks: - - sourcegraph - -networks: - sourcegraph: +include: + - path: + - ../config/docker-compose-common-services.yaml + - docker-compose-override.yaml diff --git a/host-wsl/docker-compose-override.yaml b/host-wsl/docker-compose-override.yaml new file mode 100644 index 0000000..91210aa --- /dev/null +++ b/host-wsl/docker-compose-override.yaml @@ -0,0 +1,5 @@ +services: + + src-serve-git: + # Uses a valid hostname as container_name, to trick the cloud agent and code host config into finding this container on the Docker network + container_name: src-serve-git-wsl.local diff --git a/host-wsl/docker-compose.yaml b/host-wsl/docker-compose.yaml new file mode 100644 index 0000000..922be6c --- /dev/null +++ b/host-wsl/docker-compose.yaml @@ -0,0 +1,4 @@ +include: + - path: + - ../config/docker-compose-common-services.yaml + - docker-compose-override.yaml diff --git a/host-wsl2-ubuntu/wsl2-startup.sh b/host-wsl/wsl2-startup.sh similarity index 77% rename from host-wsl2-ubuntu/wsl2-startup.sh rename to host-wsl/wsl2-startup.sh index d0cf725..a3be726 100644 --- a/host-wsl2-ubuntu/wsl2-startup.sh +++ b/host-wsl/wsl2-startup.sh @@ -4,11 +4,12 @@ log_file="./log" git_exit_status="" docker_compose_exit_status="" +repo_build_path="/sourcegraph/implementation-bridges/repo-converter/build" echo "Starting $0 $@" >> $log_file # Git pull latest commits to main -if ! git pull +if ! git -C $repo_build_path pull then git_exit_status=$? echo "git pull failed, exit code $git_exit_status" >> $log_file @@ -16,7 +17,7 @@ then fi # Start docker compose services -if ! docker compose up -d +if ! docker compose -f $repo_build_path/docker-compose.yaml up -d --build then docker_compose_exit_status=$? echo "docker compose up failed, exit code $docker_compose_exit_status" >> $log_file diff --git a/repo-converter/build/Dockerfile b/repo-converter/build/Dockerfile index 77be094..eab663c 100644 --- a/repo-converter/build/Dockerfile +++ b/repo-converter/build/Dockerfile @@ -18,11 +18,9 @@ RUN apt-get update && \ apt-get upgrade -y && \ apt-get install --no-install-recommends -y \ cron \ - gcc \ git \ git-svn \ python3 \ - python3-dev \ python3-pip \ python3-wheel \ subversion \ @@ -33,6 +31,9 @@ RUN apt-get update && \ systemctl enable cron && \ systemctl start cron +# python3-dev \ +# gcc \ + # Download dependencies as a separate step to take advantage of Docker's caching # Leverage a cache mount to /root/.cache/pip to speed up subsequent builds # Leverage a bind mount to requirements.txt to avoid having to copy them into diff --git a/repo-converter/build/docker-compose-override.yaml b/repo-converter/build/docker-compose-override.yaml new file mode 100644 index 0000000..f9c7dbd --- /dev/null +++ b/repo-converter/build/docker-compose-override.yaml @@ -0,0 +1,6 @@ +services: + + src-serve-git: + # Uses a valid hostname as container_name, to trick the cloud agent and code host config into finding this container on the Docker network + container_name: src-serve-git-wsl.local + command: "-v serve-git -addr :443 /sourcegraph/src-serve-root" \ No newline at end of file diff --git a/repo-converter/build/docker-compose.yaml b/repo-converter/build/docker-compose.yaml index 64d35a5..9555969 100644 --- a/repo-converter/build/docker-compose.yaml +++ b/repo-converter/build/docker-compose.yaml @@ -1,15 +1,22 @@ version: '2.4' +include: + - path: + - ../../config/docker-compose-common-services.yaml + - docker-compose-override.yaml + services: repo-converter: container_name: repo-converter build: context: . + image: sourcegraph/implementation-bridge-repo-converter:build + restart: always volumes: - ../../config/repos-to-convert.yaml:/sourcegraph/repos-to-convert.yaml:ro - ../../config/toprc:/root/.config/procps/toprc - ../../src-serve-root/:/sourcegraph/src-serve-root environment: - - BRIDGE_REPO_CONVERTER_INTERVAL_SECONDS=10 - - LOG_LEVEL=DEBUG # DEBUG INFO WARNING ERROR CRITICAL # Default is INFO + - BRIDGE_REPO_CONVERTER_INTERVAL_SECONDS=60 + - LOG_LEVEL=DEBUG # DEBUG INFO WARNING ERROR CRITICAL # Default is INFO \ No newline at end of file diff --git a/repo-converter/build/pull-build-start.sh b/repo-converter/build/pull-build-start.sh new file mode 100644 index 0000000..9fb23bb --- /dev/null +++ b/repo-converter/build/pull-build-start.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +# To be used in a cronjob to always pull and build the latest commit to the current branch +# every 10 minutes +# so that the running container is only 10 minutes behind the latest commit in the branch + +# crontab -e +# */10 * * * * sudo bash /sourcegraph/implementation-bridges/repo-converter/build/pull-build-start.sh >> /sourcegraph/implementation-bridges/repo-converter/build/pull-build-start.log 2>&1 + +repo_converter_build_path="/sourcegraph/implementation-bridges/repo-converter/build" + +git -C $repo_converter_build_path pull + +docker compose -f $repo_converter_build_path/docker-compose.yaml up -d --build diff --git a/repo-converter/build/requirements.txt b/repo-converter/build/requirements.txt index 3241865..5500f00 100644 --- a/repo-converter/build/requirements.txt +++ b/repo-converter/build/requirements.txt @@ -1,2 +1 @@ -psutil PyYAML diff --git a/repo-converter/build/run.py b/repo-converter/build/run.py index c259487..8a03e07 100644 --- a/repo-converter/build/run.py +++ b/repo-converter/build/run.py @@ -3,25 +3,30 @@ ### TODO: + # Configure batch size, so we see repos in Sourcegraph update as the fetch jobs progress + + # git svn fetch --revision START:END + # git svn fetch --revision BASE:[number] + # to speed things along - so I recently added that as an answer to my own question (update now taking routinely less than an hour). Still that all seemed really odd. "--revision" isn't documented as an option to "git svn fetch", you need to dig START out of .git/svn/.metadata and END out of the SVN repository. + + # Git SSH clone + # Parallelism - # Check its last line of output - # Try calling the process to - # Also clears out zombie processes - # If not, script starts a new fetch job - # Creates a lock file - # Use the multiprocessing module to fork off a child process, but don't reuse the run_subprocess function, to avoid reference before assignment error of completed_process - # Poll the fetch process - # To see if it's completed, then log it - # Output status update on clone jobs - # Revision x of y completed, time taken, ETA for remaining revisions - # Store subprocess_dict in a file? + # Poll the fetch process + # To see if it's actually doing something, then log it + # Output status update on clone jobs + # Revision x of y completed, time taken, ETA for remaining revisions - # Configure batch size, so we see repos in Sourcegraph update as the fetch jobs progress - # May be able to use - # git config svn-remote.svn.branches-maxRev 590500 + #.git ignore files + # git svn create-ignore + # git svn show-ignore + # https://git-scm.com/docs/git-svn#Documentation/git-svn.txt-emcreate-ignoreem + # Performance + # --log-window-size # Test layout tags and branches as lists / arrays + # Atlassian's Java binary to tidy up branches and tags # Delete repos from disk no longer in scope for the script? @@ -65,19 +70,21 @@ ## Import libraries # Standard libraries -from multiprocessing import Process # https://docs.python.org/3/library/multiprocessing.html from pathlib import Path # https://docs.python.org/3/library/pathlib.html import argparse # https://docs.python.org/3/library/argparse.html import json # https://docs.python.org/3/library/json.html import logging # https://docs.python.org/3/library/logging.html +import multiprocessing # https://docs.python.org/3/library/multiprocessing.html import os # https://docs.python.org/3/library/os.html import shutil # https://docs.python.org/3/library/shutil.html +import signal # https://docs.python.org/3/library/signal.html import subprocess # https://docs.python.org/3/library/subprocess.html import sys # https://docs.python.org/3/library/sys.html import time # https://docs.python.org/3/library/time.html # Third party libraries -# psutil requires adding gcc to the Docker image build, which adds about 4 minutes to the build time -import psutil # https://pypi.org/project/psutil/ +# psutil requires adding gcc to the Docker image build, which adds about 4 minutes to the build time, and doubled the size of the image +# If there's a way to remove it, that may be handy +# import psutil # https://pypi.org/project/psutil/ import yaml # https://pyyaml.org/wiki/PyYAMLDocumentation @@ -85,6 +92,13 @@ script_name = os.path.basename(__file__) args_dict = {} repos_dict = {} +running_processes = [] + + +def signal_handler(signal, frame): + signal_name = signal.Signals(signal).name + logging.debug(f"Received signal {signal_name}: {signal} frame: {frame}") +signal.signal(signal.SIGINT, signal_handler) def parse_args(): @@ -190,15 +204,20 @@ def set_logging(): def parse_repos_to_convert_file_into_repos_dict(): + # Clear the dict for this execution to remove repos which have been removed from the yaml file + repos_dict.clear() + # Parse the repos-to-convert.yaml file try: # Open the file with open(args_dict["repos_to_convert_file"], "r") as repos_to_convert_file: - # Returns a list, not a dict + # This should return a dict code_hosts_list_temp = yaml.safe_load(repos_to_convert_file) + # Weird thing we have to do + # Reading directory into repos_dict doesn't persist the dict outside the function for repo_dict_key in code_hosts_list_temp.keys(): # Store the repo_dict_key in the repos_dict @@ -213,7 +232,7 @@ def parse_repos_to_convert_file_into_repos_dict(): except (AttributeError, yaml.scanner.ScannerError) as e: - logging.error(f"Invalid YAML file format in {args_dict['repos_to_convert_file']}, please check the structure matches the format in the README.md. {type(e)}, {e.args}, {e}") + logging.error(f"Invalid YAML file format in {args_dict['repos_to_convert_file']}, please check the structure matches the format in the README.md. Exception: {type(e)}, {e.args}, {e}") sys.exit(2) @@ -226,49 +245,106 @@ def clone_svn_repos(): if repos_dict[repo_key].get('type','').lower() != 'svn': continue - # Get config parameters read from repos-to-clone.yaml - svn_repo_code_root = repos_dict[repo_key].get('svn-repo-code-root','') - username = repos_dict[repo_key].get('username','') - password = repos_dict[repo_key].get('password','') - code_host_name = repos_dict[repo_key].get('code-host-name','') - git_org_name = repos_dict[repo_key].get('git-org-name','') - git_repo_name = repos_dict[repo_key].get('git-repo-name','') + # Get config parameters read from repos-to-clone.yaml, and set defaults if they're not provided + git_repo_name = repo_key + svn_repo_code_root = repos_dict[repo_key].get('svn-repo-code-root', None) + username = repos_dict[repo_key].get('username', None) + password = repos_dict[repo_key].get('password', None) + code_host_name = repos_dict[repo_key].get('code-host-name', None) + git_org_name = repos_dict[repo_key].get('git-org-name', None) git_default_branch = repos_dict[repo_key].get('git-default-branch','main') - authors_file_path = repos_dict[repo_key].get('authors-file-path','') - authors_prog_path = repos_dict[repo_key].get('authors-prog-path','') - git_ignore_file_path = repos_dict[repo_key].get('git-ignore-file-path','') - layout = repos_dict[repo_key].get('layout','') - trunk = repos_dict[repo_key].get('trunk','') - tags = repos_dict[repo_key].get('tags','') - branches = repos_dict[repo_key].get('branches','') + fetch_batch_size = repos_dict[repo_key].get('fetch-batch-size', None) + repo_total_revisions = repos_dict[repo_key].get('repo-total-revisions', None) + authors_file_path = repos_dict[repo_key].get('authors-file-path', None) + authors_prog_path = repos_dict[repo_key].get('authors-prog-path', None) + git_ignore_file_path = repos_dict[repo_key].get('git-ignore-file-path', None) + layout = repos_dict[repo_key].get('layout', None) + trunk = repos_dict[repo_key].get('trunk', None) + tags = repos_dict[repo_key].get('tags', None) + branches = repos_dict[repo_key].get('branches', None) ## Parse config parameters into command args # TODO: Interpret code_host_name, git_org_name, and git_repo_name if not given + # ex. https://svn.apache.org/repos/asf/parquet/site + # code_host_name = svn.apache.org # can get by removing url scheme, if any, till the first / + # arbitrary path on server = repos # optional, can either be a directory, or may actually be the repo + # git_org_name = asf + # git_repo_name = parquet + # git repo root = site # arbitrary path inside the repo where contributors decided to start storing /trunk /branches /tags and other files to be included in the repo repo_path = str(args_dict["repo_share_path"]+"/"+code_host_name+"/"+git_org_name+"/"+git_repo_name) + ## Define common command args + arg_svn_non_interactive = [ "--non-interactive" ] # Do not prompt, just fail if the command doesn't work, only used for direct `svn` command + arg_svn_username = [ "--username", username ] + arg_svn_password = [ "--password", password ] # Only used for direct `svn` command + # arg_svn_echo_password = [ "echo", f"\"{password}\"", "|" ] # Used for git svn commands # Breaks getting the correct process exit code + arg_svn_echo_password = None + arg_svn_repo_code_root = [ svn_repo_code_root ] + arg_git_cfg = [ "git", "-C", repo_path, "config" ] + arg_git_svn = [ "git", "-C", repo_path, "svn" ] + + ## Define commands + cmd_run_svn_info = [ "svn", "info" ] + arg_svn_repo_code_root + arg_svn_non_interactive + cmd_run_svn_log = [ "svn", "log", "--xml" ] + arg_svn_repo_code_root + arg_svn_non_interactive + cmd_cfg_git_default_branch = arg_git_cfg + [ "--global", "init.defaultBranch", git_default_branch ] # Possibility of collisions if multiple of these are run overlapping, make sure it's quick between reading and using this + cmd_run_git_svn_init = arg_git_svn + [ "init" ] + arg_svn_repo_code_root + cmd_cfg_git_bare_clone = arg_git_cfg + [ "core.bare", "true" ] + cmd_cfg_git_authors_file = arg_git_cfg + [ "svn.authorsfile", authors_file_path ] + cmd_cfg_git_authors_prog = arg_git_cfg + [ "svn.authorsProg", authors_prog_path ] + cmd_run_git_svn_fetch = arg_git_svn + [ "fetch" ] + + # Used to check if this command is already running in another process, without the password + cmd_run_git_svn_fetch_without_password = ' '.join(cmd_run_git_svn_fetch) + # States - # repo_state = "create" # Create: - # First time - Create new path / repo / fetch job - # First run of the script - # New repo was added to the repos-to-convert.yaml file - # Repo was deleted from disk - # repo_state = "update" + # State: + # The directory doesn't already exist + # The repo doesn't already exist + # How did we get here: + # First time - Create new path / repo / fetch job + # First run of the script + # New repo was added to the repos-to-convert.yaml file + # Repo was deleted from disk + # Approach: + # Harder to test for the negative, so assume we're in the Create state, unless we find we're in the Running or Update states + repo_state = "create" + # Running: + # State: + # An svn fetch process is still running + # How did we get here: + # Fetch process is still running from a previous run of the script + # Approach: + # Check first if the process is running, then continue this outer loop # Update: - # Not the first time - # Repo already exists - # A fetch job was previously started, and may or may not still be running + # State: + # Repo already exists, with a valid configuration + # How did we get here: + # A fetch job was previously run, but is not currently running + # Approach: + # Check if we're in the update state, then set repo_state = "update" + # repo_state = "update" + - # Assumptions - # If the folder or repo don't already exist, then we're in the first time state + ## Check if we're in the Running state + # Check if a fetch process is currently running for this repo + try: - # Check - # If the git repo exists and has the correct settings in the config file, then it's not the first time + ps_command = ["ps", "-e", "--format", "%a"] + + completed_ps_command = subprocess.run(ps_command, check=True, capture_output=True, text=True) + + if cmd_run_git_svn_fetch_without_password in completed_ps_command.stdout: + + logging.info(f"Fetching process already running for {repo_key}") + continue + + except Exception as e: + logging.warning(f"Failed to check if {cmd_run_git_svn_fetch_without_password} is already running, will try to start it. Exception: {type(e)}, {e.args}, {e}") - # Assume we're in the Create state, unless the repo's git config file contains the svn repo url - repo_state = "create" - # If git config file exists for this repo, check if it contains the svn_repo_code_root value + ## Check if we're in the Update state + # Check if the git repo already exists and has the correct settings in the config file repo_git_config_file_path = repo_path + "/.git/config" if os.path.exists(repo_git_config_file_path): @@ -284,162 +360,209 @@ def clone_svn_repos(): # fetch = ambari/trunk:refs/remotes/origin/trunk # branches = ambari/branches/*:refs/remotes/origin/* # tags = ambari/tags/*:refs/remotes/origin/tags/* + # So the url value is likely a substring, or a match of the svn_repo_code_root variable value # So we need to extract the url line, then check if it's in the svn_repo_code_root variable value - for line in repo_git_config_file_contents.splitlines(): if "url =" in line: - # Get the URL value + + # Get the URL value from the line url_value = line.split("url = ")[1] + if url_value in svn_repo_code_root: repo_state = "update" logging.info(f"Found existing repo for {repo_key}, updating it") - break - ## Define common command args - arg_svn_non_interactive = [ "--non-interactive" ] # Do not prompt, just fail if the command doesn't work, not supported by all commands - arg_svn_username = [ "--username", username ] - arg_svn_password = [ "--password", password ] # Only used for direct `svn` command - arg_svn_echo_password = [ "echo", password, "|" ] # Used for git svn commands - arg_svn_repo_code_root = [ svn_repo_code_root ] - arg_git_cfg = [ "git", "-C", repo_path, "config" ] - arg_git_svn = [ "git", "-C", repo_path, "svn" ] - - ## Define commands - cmd_svn_run_login = [ "svn", "info" ] + arg_svn_repo_code_root + arg_svn_non_interactive - cmd_git_cfg_default_branch = arg_git_cfg + [ "--global", "init.defaultBranch", git_default_branch ] # Possibility of collisions if multiple of these are run overlapping, make sure it's quick between reading and using this - cmd_git_run_svn_init = arg_git_svn + [ "init" ] + arg_svn_repo_code_root - cmd_git_cfg_bare_clone = arg_git_cfg + [ "core.bare", "true" ] - cmd_git_cfg_authors_file = arg_git_cfg + [ "svn.authorsfile", authors_file_path ] - cmd_git_cfg_authors_prog = arg_git_cfg + [ "svn.authorsProg", authors_prog_path ] - cmd_git_run_svn_fetch = arg_git_svn + [ "fetch" ] - - # Used to check if this command is already running in another process, without the password - cmd_git_run_svn_fetch_without_password = ' '.join(cmd_git_run_svn_fetch) + # Break out of the inner for loop + break ## Modify commands based on config parameters if username: - cmd_git_run_svn_init += arg_svn_username + cmd_run_svn_info += arg_svn_username + cmd_run_svn_log += arg_svn_username + cmd_run_git_svn_init += arg_svn_username + cmd_run_git_svn_fetch += arg_svn_username if password: - cmd_git_run_svn_init = arg_svn_echo_password + cmd_git_run_svn_init - cmd_git_run_svn_fetch = arg_svn_echo_password + cmd_git_run_svn_fetch + arg_svn_echo_password = True + cmd_run_svn_info += arg_svn_password + cmd_run_svn_log += arg_svn_password + + ## Run commands + # Run the svn info command to test logging in to the SVN server, for network connectivity and credentials + # Capture the output so we know the max revision in this repo's history + svn_info = subprocess_run(cmd_run_svn_info, password, arg_svn_echo_password) - if username and password: - cmd_svn_run_login += arg_svn_username + arg_svn_password + if repo_state == "create": - if layout: - cmd_git_run_svn_init += ["--stdlayout"] + logging.info(f"Didn't find a repo on disk for {repo_key}, creating it") - # Warn the user if they provided an invalid value for the layout, only standard is supported - if "standard" not in layout and "std" not in layout: - logging.warning(f"Layout {layout} provided for repo {repo_key}, only standard is supported, continuing assuming standard") + # # If the user didn't provide a batch size, try and determine one from repo stats + # if not fetch_batch_size and not repo_total_revisions: - if trunk: - cmd_git_run_svn_init += ["--trunk", trunk] - if tags: - cmd_git_run_svn_init += ["--tags", tags] - if branches: - cmd_git_run_svn_init += ["--branches", branches] + # # Get the rev number for the last rev this repo was changed from the svn info output + # # Default to not specifying a --revision + # if "Last Changed Rev:" in svn_info: - ## Run commands - # Log in to the SVN server to test if credentials are needed / provided / valid - subprocess_run(cmd_svn_run_login, password) + # last_changed_rev = int(svn_info.split("Last Changed Rev: ")[1].split(" ")[0]) + # logging.debug(f"Last Changed Rev for {repo_key}: {last_changed_rev}") + + # cmd_run_svn_log += ["--revision", "BASE:"+str(last_changed_rev)] + + # # Get the number of revisions in this repo's history, to know how many batches to fetch in the initial clone + # # Note this could be a slow process + # svn_log = subprocess_run(cmd_run_svn_log, password) + + # repo_rev_count = int(svn_info.split("Revision: ")[1].split(" ")[0]) + + # if repo_rev_count < 10000: + # fetch_batch_size = last_changed_rev + # else: + # fetch_batch_size = f"BASE:{last_changed_rev}" + + # # TODO: Find a way to set batch size for initial fetch vs update fetches + # if fetch_batch_size and not fetch_batch_size == "HEAD": + # cmd_run_git_svn_fetch += ["--revision", fetch_batch_size] - if repo_state == "create": # Create the repo path if it doesn't exist if not os.path.exists(repo_path): os.makedirs(repo_path) # Set the default branch before init - subprocess_run(cmd_git_cfg_default_branch) + subprocess_run(cmd_cfg_git_default_branch) + + if layout: + cmd_run_git_svn_init += ["--stdlayout"] + + # Warn the user if they provided an invalid value for the layout, only standard is supported + if "standard" not in layout and "std" not in layout: + logging.warning(f"Layout {layout} provided for repo {repo_key}, only standard is supported, continuing assuming standard") + + if trunk: + cmd_run_git_svn_init += ["--trunk", trunk] + if tags: + cmd_run_git_svn_init += ["--tags", tags] + if branches: + cmd_run_git_svn_init += ["--branches", branches] # Initialize the repo - subprocess_run(cmd_git_run_svn_init, password) + subprocess_run(cmd_run_git_svn_init, password) # Configure the bare clone - subprocess_run(cmd_git_cfg_bare_clone) - - # Configure the authors file, if provided - if authors_file_path: - if os.path.exists(authors_file_path): - subprocess_run(cmd_git_cfg_authors_file) - else: - logging.warning(f"Authors file not found at {authors_file_path}, skipping") - - # Configure the authors program, if provided - if authors_prog_path: - if os.path.exists(authors_prog_path): - subprocess_run(cmd_git_cfg_authors_prog) - else: - logging.warning(f"Authors prog not found at {authors_prog_path}, skipping") - - # Configure the .gitignore file, if provided - if git_ignore_file_path: - if os.path.exists(git_ignore_file_path): - logging.debug(f"Copying .gitignore file from {git_ignore_file_path} to {repo_path}") - shutil.copy2(git_ignore_file_path, repo_path) - else: - logging.warning(f".gitignore file not found at {git_ignore_file_path}, skipping") + subprocess_run(cmd_cfg_git_bare_clone) - try: - # Check if any running process has the git svn fetch command in it - running_processes = {} - for process in psutil.process_iter(): - process_command = ' '.join(process.cmdline()) - running_processes[process_command] = process.pid + ## Back to steps we do for both Create and Update states, so users can update the below parameters without having to restart the clone from scratch - # If yes, continue - # It'd be much easier to run this check directly in the above loop, but then the continue would just break out of the inner loop, and not skip the repo - if cmd_git_run_svn_fetch_without_password in running_processes.keys(): - pid = running_processes[cmd_git_run_svn_fetch_without_password] - process = psutil.Process(pid) - process_command = ' '.join(process.cmdline()) - logging.debug(f"Found pid {pid} running, skipping git svn fetch. Process: {process}, Command: {process_command}") - continue + # Configure the authors file, if provided + if authors_file_path: + if os.path.exists(authors_file_path): + subprocess_run(cmd_cfg_git_authors_file) + else: + logging.warning(f"Authors file not found at {authors_file_path}, skipping") - except Exception as e: - logging.warning(f"Failed to check if {cmd_git_run_svn_fetch_without_password} is already running, will try to start it. Exception: {e}") + # Configure the authors program, if provided + if authors_prog_path: + if os.path.exists(authors_prog_path): + subprocess_run(cmd_cfg_git_authors_prog) + else: + logging.warning(f"Authors prog not found at {authors_prog_path}, skipping") + + # Configure the .gitignore file, if provided + if git_ignore_file_path: + if os.path.exists(git_ignore_file_path): + logging.info(f"Copying .gitignore file from {git_ignore_file_path} to {repo_path}") + shutil.copy2(git_ignore_file_path, repo_path) + else: + logging.warning(f".gitignore file not found at {git_ignore_file_path}, skipping") # Start a fetch - logging.info(f"Fetching SVN repo {repo_key} with {cmd_git_run_svn_fetch_without_password}") - git_svn_fetch(cmd_git_run_svn_fetch, password) + logging.info(f"Fetching SVN repo {repo_key} with {cmd_run_git_svn_fetch_without_password}") + + process = multiprocessing.Process(target=subprocess_run, name="git svn fetch "+git_repo_name, args=(cmd_run_git_svn_fetch, password, arg_svn_echo_password)) + process.start() + # process.join() # join prevents zombies, but it also blocks parallel processing + running_processes.append(process) + + +def redact_password_from_list(args, password=None): + + args_without_password = [] + + if password: + + for arg in args: + + if password in arg: + arg = arg.replace(password, "REDACTED-PASSWORD") -def git_svn_fetch(cmd_git_run_svn_fetch, password): + args_without_password.append(arg) - fetch_process = Process(target=subprocess_run, args=(cmd_git_run_svn_fetch, password)) - fetch_process.start() + else: + args_without_password = args.copy() - return fetch_process.pid + return args_without_password -def subprocess_run(args, password=False): +def subprocess_run(args, password=None, echo_password=None): # Using the subprocess module # https://docs.python.org/3/library/subprocess.html#module-subprocess # Waits for the process to complete # Redact passwords for logging - args_without_password = args.copy() - if password: - args_without_password[args_without_password.index(password)] = "REDACTED-PASSWORD" + # Convert to string because that's all we're using it for anyway + args_without_password_string = ' '.join(redact_password_from_list(args, password)) + std_out_without_password = None try: - logging.debug(f"Starting subprocess: {' '.join(args_without_password)}") + logging.debug(f"Starting subprocess: {args_without_password_string}") - completed_process = subprocess.run(args, check=True, capture_output=True, text=True) + # If password is provided to this function, feed it into the subprocess' stdin pipe + # Otherwise the input keyword arg is still set to the None type + if echo_password: + finished_process = subprocess.run(args, capture_output=True, check=True, text=True, input=password) + else: + finished_process = subprocess.run(args, capture_output=True, check=True, text=True) - if completed_process.returncode == 0: - logging.debug(f"Subprocess succeeded: {' '.join(args_without_password)} with output: {completed_process.stdout}") + # If the subprocess didn't raise an exception, then it succeeded + std_out_without_password = ' '.join(redact_password_from_list(finished_process.stdout.splitlines(), password)) + logging.info(f"Subprocess succeeded: {args_without_password_string} with output: {std_out_without_password}") except subprocess.CalledProcessError as error: - logging.error(f"Subprocess failed: {' '.join(args_without_password)} with error: {error}, and stderr: {error.stderr}") + std_err_without_password = ' '.join(redact_password_from_list(error.stderr.splitlines(), password)) + logging.error(f"Subprocess failed: {args_without_password_string} with error: {error}, and stderr: {std_err_without_password}") + + # Handle the case of abandoned git svn lock files blocking fetch processes + # We already know that no other git svn fetch processes are running, because we checked for that before spawning this fetch process + # fatal: Unable to create '/sourcegraph/src-serve-root/svn.apache.org/wsl/zest/.git/svn/refs/remotes/git-svn/index.lock': File exists. Another git process seems to be running in this repository, e.g. an editor opened by 'git commit'. Please make sure all processes are terminated then try again. If it still fails, a git process may have crashed in this repository earlier: remove the file manually to continue. write-tree: command returned error: 128 + lock_file_error_strings = ["Unable to create", "index.lock", "File exists"] + lock_file_error_conditions = (lock_file_error_string in std_err_without_password for lock_file_error_string in lock_file_error_strings) + if all(lock_file_error_conditions): + + try: + + # Get the index.lock file path from std_err_without_password + lock_file_path = std_err_without_password.split("Unable to create '")[1].split("': File exists.")[0] + + logging.warning(f"Fetch failed to start due to finding a lockfile in repo at {lock_file_path}. Deleting the lockfile so it'll try again on the next run.") + + # Careful with recursive function call, don't create infinite recursion and fork bomb the container + if subprocess_run(["rm", "-f", lock_file_path]): + logging.info(f"Successfully deleted {lock_file_path}") + + except subprocess.CalledProcessError as error: + logging.error(f"Failed to rm -f lockfile at {lock_file_path} with error: {error}") + + except ValueError as error: + logging.error(f"Failed to find git execution path in command args while trying to delete {lock_file_path} with error: {error}") + + return std_out_without_password def clone_tfs_repos(): @@ -457,23 +580,40 @@ def clone_tfs_repos(): tfs_repos_dict[repo_key] = repos_dict[repo_key] - logging.debug("Cloning TFS repos" + str(tfs_repos_dict)) + logging.info("Cloning TFS repos" + str(tfs_repos_dict)) -def cleanup_zombie_processes(): +def status_update_and_cleanup_zombie_processes(): - logging.debug("Checking for zombie processes") + count_processes_still_running = 0 + count_processes_finished = 0 - # Get a list of all the running processes - pid_list = psutil.pids() - for pid in pid_list: - try: - if psutil.Process(pid).status() == psutil.STATUS_ZOMBIE: - logging.debug(f"Found zombie process {pid}, trying to flush it from the proc table") - psutil.Process(pid).wait(0) + try: - except Exception as e: - logging.debug(f"Failed while checking for zombie processes, exception: {type(e)}, {e.args}, {e}") + for process in running_processes: + + if process.is_alive(): + + count_processes_still_running += 1 + logging.info(f"pid {process.pid} still running: {process.name}") + + else: + + count_processes_finished += 1 + logging.info(f"Process finished with exit code {process.exitcode}: {process.name}") + running_processes.remove(process) + + except Exception as e: + + logging.error(f"Failed while checking for zombie processes, Exception: {type(e)}, {e.args}, {e}") + + logging.info(f"Count of repo fetch processes still running: {count_processes_still_running}") + logging.info(f"Count of repo fetch processes finished: {count_processes_finished}") + logging.info("Cleaning up zombie processes") + + # Returns a list of all child processes still running + # Also joins all completed (zombie) processes to clear them + multiprocessing.active_children() def main(): @@ -482,24 +622,32 @@ def main(): run_interval_seconds = os.environ.get('BRIDGE_REPO_CONVERTER_INTERVAL_SECONDS', 3600) run_number = 0 - while True: + parse_args() + set_logging() + cmd_cfg_git_safe_directory = ["git", "config", "--system", "--add", "safe.directory", "\"*\""] + subprocess_run(cmd_cfg_git_safe_directory) + + logging.debug("Multiprocessing module using start method: " + multiprocessing.get_start_method()) + + while True: - parse_args() - set_logging() - logging.debug(f"Starting {script_name} run {run_number} with args: " + str(args_dict)) + logging.info(f"Starting {script_name} run {run_number} with args: " + str(args_dict)) - cleanup_zombie_processes() + status_update_and_cleanup_zombie_processes() parse_repos_to_convert_file_into_repos_dict() clone_svn_repos() # clone_tfs_repos() - logging.debug(f"Finishing {script_name} run {run_number} with args: " + str(args_dict)) - logging.debug(f"Sleeping for BRIDGE_REPO_CONVERTER_INTERVAL_SECONDS={run_interval_seconds} seconds") + logging.info(f"Finishing {script_name} run {run_number} with args: " + str(args_dict)) run_number += 1 + status_update_and_cleanup_zombie_processes() # Sleep the configured interval + # Wait 1 second for the last repo sub process to get kicked off before logging this message, otherwise it gets logg out of order + time.sleep(1) + logging.info(f"Sleeping for BRIDGE_REPO_CONVERTER_INTERVAL_SECONDS={run_interval_seconds} seconds") time.sleep(int(run_interval_seconds)) diff --git a/repo-converter/docker-compose-override.yaml b/repo-converter/docker-compose-override.yaml new file mode 100644 index 0000000..16f493e --- /dev/null +++ b/repo-converter/docker-compose-override.yaml @@ -0,0 +1,5 @@ +services: + + src-serve-git: + # Uses a valid hostname as container_name, to trick the cloud agent and code host config into finding this container on the Docker network + container_name: src-serve-git-ubuntu.local diff --git a/repo-converter/docker-compose.yaml b/repo-converter/docker-compose.yaml index 6c045b6..210877a 100644 --- a/repo-converter/docker-compose.yaml +++ b/repo-converter/docker-compose.yaml @@ -1,5 +1,10 @@ version: '2.4' +include: + - path: + - ../config/docker-compose-common-services.yaml + - docker-compose-override.yaml + services: repo-converter: @@ -7,8 +12,8 @@ services: image: ghcr.io/sourcegraph/implementation-bridge-repo-converter:insiders volumes: - ../config/repos-to-convert.yaml:/sourcegraph/repos-to-convert.yaml:ro - - ../src-serve-root/:/sourcegraph/src-serve-root" + - ../src-serve-root/:/sourcegraph/src-serve-root restart: always environment: - - BRIDGE_REPO_CONVERTER_INTERVAL_SECONDS=3600 - - LOG_LEVEL=INFO # DEBUG INFO WARNING ERROR CRITICAL # Default is INFO + - BRIDGE_REPO_CONVERTER_INTERVAL_SECONDS=300 + # - LOG_LEVEL=INFO # DEBUG INFO WARNING ERROR CRITICAL # Default is INFO if unspecified diff --git a/repo-converter/pull-start.sh b/repo-converter/pull-start.sh new file mode 100644 index 0000000..a242b33 --- /dev/null +++ b/repo-converter/pull-start.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +# To be used in a cronjob to always pull and use the latest image +# so that the running container is only x minutes/hours behind the latest version of the docker-compose.yaml file, and the Docker image tagged latest in GitHub packages + +# crontab -e +# */10 * * * * sudo bash /sourcegraph/implementation-bridges/repo-converter/pull-start.sh >> /sourcegraph/implementation-bridges/repo-converter/pull-start.log 2>&1 + +repo_converter_path="/sourcegraph/implementation-bridges/repo-converter" + +git -C $repo_converter_path pull + +docker compose -f $repo_converter_path/docker-compose.yaml up -d diff --git a/svn-repo-stats/get-svn-repo-stats.sh b/stats/get-svn-repo-stats.sh similarity index 100% rename from svn-repo-stats/get-svn-repo-stats.sh rename to stats/get-svn-repo-stats.sh diff --git a/stats/repos.txt b/stats/repos.txt new file mode 100644 index 0000000..4b7021d --- /dev/null +++ b/stats/repos.txt @@ -0,0 +1,204 @@ +https://svn.apache.org/repos/asf/ace +https://svn.apache.org/repos/asf/activemq +https://svn.apache.org/repos/asf/airavata +https://svn.apache.org/repos/asf/allura +https://svn.apache.org/repos/asf/ambari +https://svn.apache.org/repos/asf/ant +https://svn.apache.org/repos/asf/any23 +https://svn.apache.org/repos/asf/apr +https://svn.apache.org/repos/asf/archiva +https://svn.apache.org/repos/asf/aries +https://svn.apache.org/repos/asf/attic +https://svn.apache.org/repos/asf/aurora +https://svn.apache.org/repos/asf/avalon +https://svn.apache.org/repos/asf/avro +https://svn.apache.org/repos/asf/axis +https://svn.apache.org/repos/asf/beam +https://svn.apache.org/repos/asf/beehive +https://svn.apache.org/repos/asf/bigtop +https://svn.apache.org/repos/asf/bloodhound +https://svn.apache.org/repos/asf/board +https://svn.apache.org/repos/asf/bookkeeper +https://svn.apache.org/repos/asf/brooklyn +https://svn.apache.org/repos/asf/bugs +https://svn.apache.org/repos/asf/buildr +https://svn.apache.org/repos/asf/bval +https://svn.apache.org/repos/asf/calcite +https://svn.apache.org/repos/asf/camel +https://svn.apache.org/repos/asf/cassandra +https://svn.apache.org/repos/asf/cayenne +https://svn.apache.org/repos/asf/celix +https://svn.apache.org/repos/asf/chemistry +https://svn.apache.org/repos/asf/chukwa +https://svn.apache.org/repos/asf/clerezza +https://svn.apache.org/repos/asf/click +https://svn.apache.org/repos/asf/climate +https://svn.apache.org/repos/asf/cloudstack +https://svn.apache.org/repos/asf/cocoon +https://svn.apache.org/repos/asf/comdev +https://svn.apache.org/repos/asf/commons +https://svn.apache.org/repos/asf/concom +https://svn.apache.org/repos/asf/continuum +https://svn.apache.org/repos/asf/cordova +https://svn.apache.org/repos/asf/couchdb +https://svn.apache.org/repos/asf/creadur +https://svn.apache.org/repos/asf/crunch +https://svn.apache.org/repos/asf/ctakes +https://svn.apache.org/repos/asf/curator +https://svn.apache.org/repos/asf/cxf +https://svn.apache.org/repos/asf/datafu +https://svn.apache.org/repos/asf/db +https://svn.apache.org/repos/asf/deltacloud +https://svn.apache.org/repos/asf/deltaspike +https://svn.apache.org/repos/asf/devicemap +https://svn.apache.org/repos/asf/directmemory +https://svn.apache.org/repos/asf/directory +https://svn.apache.org/repos/asf/drill +https://svn.apache.org/repos/asf/eagle +https://svn.apache.org/repos/asf/empire-db +https://svn.apache.org/repos/asf/esme +https://svn.apache.org/repos/asf/etch +https://svn.apache.org/repos/asf/excalibur +https://svn.apache.org/repos/asf/falcon +https://svn.apache.org/repos/asf/felix +https://svn.apache.org/repos/asf/flex +https://svn.apache.org/repos/asf/flink +https://svn.apache.org/repos/asf/flume +https://svn.apache.org/repos/asf/forrest +https://svn.apache.org/repos/asf/fundraising +https://svn.apache.org/repos/asf/geode +https://svn.apache.org/repos/asf/geronimo +https://svn.apache.org/repos/asf/giraph +https://svn.apache.org/repos/asf/gora +https://svn.apache.org/repos/asf/gump +https://svn.apache.org/repos/asf/hadoop +https://svn.apache.org/repos/asf/hama +https://svn.apache.org/repos/asf/harmony +https://svn.apache.org/repos/asf/hbase +https://svn.apache.org/repos/asf/helix +https://svn.apache.org/repos/asf/hive +https://svn.apache.org/repos/asf/hivemind +https://svn.apache.org/repos/asf/httpcomponents +https://svn.apache.org/repos/asf/httpd +https://svn.apache.org/repos/asf/ibatis +https://svn.apache.org/repos/asf/ignite +https://svn.apache.org/repos/asf/infrastructure +https://svn.apache.org/repos/asf/isis +https://svn.apache.org/repos/asf/jackrabbit +https://svn.apache.org/repos/asf/jakarta +https://svn.apache.org/repos/asf/james +https://svn.apache.org/repos/asf/jclouds +https://svn.apache.org/repos/asf/jena +https://svn.apache.org/repos/asf/jmeter +https://svn.apache.org/repos/asf/johnzon +https://svn.apache.org/repos/asf/jspwiki +https://svn.apache.org/repos/asf/juddi +https://svn.apache.org/repos/asf/kafka +https://svn.apache.org/repos/asf/karaf +https://svn.apache.org/repos/asf/knox +https://svn.apache.org/repos/asf/kylin +https://svn.apache.org/repos/asf/labs +https://svn.apache.org/repos/asf/lens +https://svn.apache.org/repos/asf/lenya +https://svn.apache.org/repos/asf/libcloud +https://svn.apache.org/repos/asf/logging +https://svn.apache.org/repos/asf/lucene +https://svn.apache.org/repos/asf/lucene.net +https://svn.apache.org/repos/asf/lucy +https://svn.apache.org/repos/asf/mahout +https://svn.apache.org/repos/asf/manifoldcf +https://svn.apache.org/repos/asf/marmotta +https://svn.apache.org/repos/asf/maven +https://svn.apache.org/repos/asf/mesos +https://svn.apache.org/repos/asf/metamodel +https://svn.apache.org/repos/asf/mina +https://svn.apache.org/repos/asf/mrunit +https://svn.apache.org/repos/asf/myfaces +https://svn.apache.org/repos/asf/nifi +https://svn.apache.org/repos/asf/nutch +https://svn.apache.org/repos/asf/ode +https://svn.apache.org/repos/asf/ofbiz +https://svn.apache.org/repos/asf/olingo +https://svn.apache.org/repos/asf/oltu +https://svn.apache.org/repos/asf/onami +https://svn.apache.org/repos/asf/oodt +https://svn.apache.org/repos/asf/oozie +https://svn.apache.org/repos/asf/openjpa +https://svn.apache.org/repos/asf/openmeetings +https://svn.apache.org/repos/asf/opennlp +https://svn.apache.org/repos/asf/openoffice +https://svn.apache.org/repos/asf/openwebbeans +https://svn.apache.org/repos/asf/parquet +https://svn.apache.org/repos/asf/pdfbox +https://svn.apache.org/repos/asf/perl +https://svn.apache.org/repos/asf/phoenix +https://svn.apache.org/repos/asf/pig +https://svn.apache.org/repos/asf/pivot +https://svn.apache.org/repos/asf/planet +https://svn.apache.org/repos/asf/poi +https://svn.apache.org/repos/asf/portals +https://svn.apache.org/repos/asf/qpid +https://svn.apache.org/repos/asf/quetzalcoatl +https://svn.apache.org/repos/asf/ranger +https://svn.apache.org/repos/asf/rave +https://svn.apache.org/repos/asf/reef +https://svn.apache.org/repos/asf/river +https://svn.apache.org/repos/asf/roller +https://svn.apache.org/repos/asf/samza +https://svn.apache.org/repos/asf/santuario +https://svn.apache.org/repos/asf/sentry +https://svn.apache.org/repos/asf/serf +https://svn.apache.org/repos/asf/servicemix +https://svn.apache.org/repos/asf/shale +https://svn.apache.org/repos/asf/shindig +https://svn.apache.org/repos/asf/shiro +https://svn.apache.org/repos/asf/singa +https://svn.apache.org/repos/asf/sis +https://svn.apache.org/repos/asf/sling +https://svn.apache.org/repos/asf/spamassassin +https://svn.apache.org/repos/asf/spark +https://svn.apache.org/repos/asf/sqoop +https://svn.apache.org/repos/asf/stanbol +https://svn.apache.org/repos/asf/stdcxx +https://svn.apache.org/repos/asf/steve +https://svn.apache.org/repos/asf/storm +https://svn.apache.org/repos/asf/stratos +https://svn.apache.org/repos/asf/struts +https://svn.apache.org/repos/asf/subversion +https://svn.apache.org/repos/asf/synapse +https://svn.apache.org/repos/asf/syncope +https://svn.apache.org/repos/asf/systemds +https://svn.apache.org/repos/asf/tajo +https://svn.apache.org/repos/asf/tapestry +https://svn.apache.org/repos/asf/tcl +https://svn.apache.org/repos/asf/tez +https://svn.apache.org/repos/asf/thrift +https://svn.apache.org/repos/asf/tika +https://svn.apache.org/repos/asf/tiles +https://svn.apache.org/repos/asf/tinkerpop +https://svn.apache.org/repos/asf/tomcat +https://svn.apache.org/repos/asf/tomee +https://svn.apache.org/repos/asf/trafficserver +https://svn.apache.org/repos/asf/turbine +https://svn.apache.org/repos/asf/tuscany +https://svn.apache.org/repos/asf/twill +https://svn.apache.org/repos/asf/uima +https://svn.apache.org/repos/asf/unomi +https://svn.apache.org/repos/asf/usergrid +https://svn.apache.org/repos/asf/vcl +https://svn.apache.org/repos/asf/velocity +https://svn.apache.org/repos/asf/vxquery +https://svn.apache.org/repos/asf/webservices +https://svn.apache.org/repos/asf/whirr +https://svn.apache.org/repos/asf/wicket +https://svn.apache.org/repos/asf/wink +https://svn.apache.org/repos/asf/wookie +https://svn.apache.org/repos/asf/xalan +https://svn.apache.org/repos/asf/xerces +https://svn.apache.org/repos/asf/xml +https://svn.apache.org/repos/asf/xmlbeans +https://svn.apache.org/repos/asf/xmlgraphics +https://svn.apache.org/repos/asf/zeppelin +https://svn.apache.org/repos/asf/zest +https://svn.apache.org/repos/asf/zookeeper +https://svn.apache.org/repos/asf/incubator