Skip to content

Commit

Permalink
Merge branch 'master' into dependabot/npm_and_yarn/packages/web-compo…
Browse files Browse the repository at this point in the history
…nents/bootstrap-5.0.0
  • Loading branch information
chejennifer authored Dec 27, 2024
2 parents 7bbcbf7 + 553cf2d commit 29323b6
Show file tree
Hide file tree
Showing 722 changed files with 97,129 additions and 106,413 deletions.
8 changes: 4 additions & 4 deletions .github/workflows/codeql-analysis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,11 @@ jobs:

steps:
- name: Checkout repository
uses: actions/checkout@v2
uses: actions/checkout@v4

# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
uses: github/codeql-action/init@v2
uses: github/codeql-action/init@v3
with:
languages: ${{ matrix.language }}
# If you wish to specify custom queries, you can do so here or in a config file.
Expand All @@ -50,7 +50,7 @@ jobs:
# Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
# If this step fails, then you should remove it and run the build manually (see below)
- name: Autobuild
uses: github/codeql-action/autobuild@v2
uses: github/codeql-action/autobuild@v3

# ℹ️ Command-line programs to run using the OS shell.
# 📚 https://git.io/JvXDl
Expand All @@ -64,4 +64,4 @@ jobs:
# make release

- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v2
uses: github/codeql-action/analyze@v3
50 changes: 50 additions & 0 deletions .github/workflows/release-branch-checks.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
name: Release branch checks

on:
pull_request:
branches: [ "customdc_stable" ]
# Required for merge queue to work: https://docs.github.com/en/repositories/configuring-branches-and-merges-in-your-repository/configuring-pull-request-merges/managing-a-merge-queue#triggering-merge-group-checks-with-github-actions
merge_group:
branches: [ "customdc_stable" ]

jobs:
verify_all_commits_are_already_in_master:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
# Fetch all history for accurate comparison
fetch-depth: 0
# Check out the PR branch
ref: ${{ github.event.pull_request.head.ref }}
repository: ${{ github.event.pull_request.head.repo.full_name }}

- name: Verify that all commits are already in the master branch
run: |
git remote add dc https://github.com/datacommonsorg/website.git
git fetch dc
MASTER_BRANCH="dc/master"
# Get the list of commits in the source branch that are not in the master branch.
# Exclude merge commits only if this is the final run in the merge queue.
# This way the only merge commits that end up in the final commit history
# are the ones added by GitHub when merging PRs.
if [[ ${{ github.event_name }} == 'merge_group' ]]; then
MISSING_COMMITS=$(git log --pretty="%H - %s" --no-merges $MASTER_BRANCH..HEAD --)
else
MISSING_COMMITS=$(git log --pretty="%H - %s" $MASTER_BRANCH..HEAD --)
fi
if [[ -n "$MISSING_COMMITS" ]]; then
echo ""
echo "ERROR: The following commits are not present in $MASTER_BRANCH:"
echo ""
echo "$MISSING_COMMITS"
echo ""
echo "PRs to release branches should only contain commits that are already in master."
echo "To fix this PR, reset its branch locally to a commit at or behind https://github.com/datacommonsorg/website/commits/master/ and then force-push it."
echo "Note that a release branch PR should be based on master and not the previous version of the release branch, which contains merge commits."
exit 1
fi
echo "All commits are present in $MASTER_BRANCH"
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ experimental/sdg-static/datacommons/nl_interface.min.css

# Custom DC data
dc-data/
custom_dc/env.list

# Topic cache
gen_ordered_list_for_topics.mcf
Expand Down
13 changes: 8 additions & 5 deletions .run_cdc_dev.env
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,14 @@

# Env variables for running run_cdc_dev.sh

# The 3 keys / passwords below are not checked in.
# Either uncomment these lines and set them here or set them separately as environment variables.
# DC_API_KEY=
# MAPS_API_KEY=
# DB_PASS=
# The 3 keys / passwords below must be specified before the script can be run.
# Either specify them here or better still is to
# copy this file, specify the values in the copy and set the env variable RUN_CDC_DEV_ENV_FILE to that file's path before running the script.
# e.g. if the copied file is at tmp/cdc.env, you can run the script as follows:
# RUN_CDC_DEV_ENV_FILE=tmp/cdc.env ./run_cdc_dev.sh
DC_API_KEY=
MAPS_API_KEY=
DB_PASS=

# If your DC_API_KEY is for autopush, change this to https://autopush.api.datacommons.org
DC_API_ROOT=https://api.datacommons.org
Expand Down
79 changes: 31 additions & 48 deletions build/cdc_data/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,43 +12,47 @@
# See the License for the specific language governing permissions and
# limitations under the License.

# #### Stage 1: Build env for data importer. ####
FROM python:3.11.4-slim as data-importer

ARG PIP_DISABLE_PIP_VERSION_CHECK=1
ARG PIP_NO_CACHE_DIR=1

WORKDIR /workspace
# #### Stage 1: Download base dc model from GCS. ####
FROM google/cloud-sdk:slim AS model-downloader

# Copy requirements.
COPY import/simple/requirements.txt ./import/simple/requirements.txt
# Copy model.
RUN mkdir -p /tmp/datcom-nl-models \
&& gsutil -m cp -R gs://datcom-nl-models/ft_final_v20230717230459.all-MiniLM-L6-v2/ /tmp/datcom-nl-models/

# Create a virtual env and install requirements.
RUN python -m venv /workspace/venv
ENV PATH="/workspace/venv/bin:$PATH"
RUN pip3 install -r ./import/simple/requirements.txt

# Copy simple importer.
COPY import/simple/ ./import/simple/
# #### Stage 2: Python runtime. ####
FROM python:3.11.4-slim AS runner

ARG ENV
ENV ENV=${ENV}

# #### Stage 2: Build env for embeddings builder. ####
FROM python:3.11.4-slim as embeddings-builder
WORKDIR /workspace

ARG PIP_DISABLE_PIP_VERSION_CHECK=1
ARG PIP_NO_CACHE_DIR=1
# Copy models
COPY --from=model-downloader /tmp/datcom-nl-models /tmp/datcom-nl-models

WORKDIR /workspace
# Copy simple importer requirements.
COPY import/simple/requirements.txt ./import/simple/requirements.txt

# Copy requirements.
# Copy embeddings builder requirements.
# Copy nl_requirements.txt since it is referenced by embeddings requirements.txt
COPY tools/nl/embeddings/requirements.txt ./tools/nl/embeddings/requirements.txt
COPY nl_requirements.txt ./nl_requirements.txt

# Create a virtual env and install requirements.
# Remove lancedb - it is not used by custom dc.
RUN python -m venv ./venv
ARG PIP_DISABLE_PIP_VERSION_CHECK=1
ARG PIP_NO_CACHE_DIR=1

# Create a virtual env, add it to path, and install all requirements.
RUN python -m venv /workspace/venv
ENV PATH="/workspace/venv/bin:$PATH"

# TODO: Install requirements for embeddings importer and data importer in separate virtual envs.
# Install embeddings importer requirements.
RUN pip3 install -r ./import/simple/requirements.txt

# Install data requirements.
# Remove lancedb - it is not used by custom dc.
RUN sed -i'' '/lancedb/d' /workspace/nl_requirements.txt \
&& pip3 install torch==2.2.2 --extra-index-url https://download.pytorch.org/whl/cpu \
&& pip3 install -r ./tools/nl/embeddings/requirements.txt
Expand All @@ -61,38 +65,17 @@ COPY shared/. ./shared/
COPY nl_server/. /workspace/nl_server/
# Copy yaml files used by the embeddings builder.
COPY deploy/nl/. /datacommons/nl/


# #### Stage 3: Download base dc model from GCS. ####
FROM google/cloud-sdk:slim as model-downloader

# Copy model.
RUN mkdir -p /tmp/datcom-nl-models \
&& gsutil -m cp -R gs://datcom-nl-models/ft_final_v20230717230459.all-MiniLM-L6-v2/ /tmp/datcom-nl-models/


# #### Stage 4: Runtime env. ####
FROM python:3.11.4-slim as runner

ARG ENV
ENV ENV=${ENV}

WORKDIR /workspace

# Copy scripts, dependencies and files from the build stages.
COPY --from=data-importer /workspace/ .
COPY --from=embeddings-builder /workspace/ .
COPY --from=embeddings-builder /datacommons/ /datacommons
COPY --from=model-downloader /tmp/datcom-nl-models /tmp/datcom-nl-models
# Copy simple importer.
COPY import/simple/ ./import/simple/

# Copy executable script.
COPY build/cdc_data/run.sh .

# Make script executable.
RUN chmod +x run.sh

# Add virtual env to the path.
# Activate the virtual env.
ENV PATH="/workspace/venv/bin:$PATH"

# Set the default command to run the script.
CMD ./run.sh
CMD ["./run.sh"]
29 changes: 22 additions & 7 deletions build/cdc_data/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,16 @@ if [[ $OUTPUT_DIR == "" ]]; then
exit 1
fi

if [[ $DATA_RUN_MODE != "" ]]; then
if [[ $DATA_RUN_MODE != "schemaupdate" ]]; then
echo "DATA_RUN_MODE must be either empty or 'schemaupdate'"
exit 1
fi
echo "DATA_RUN_MODE=$DATA_RUN_MODE"
else
DATA_RUN_MODE="customdc"
fi

echo "INPUT_DIR=$INPUT_DIR"
echo "OUTPUT_DIR=$OUTPUT_DIR"

Expand All @@ -51,7 +61,7 @@ ADDITIONAL_CATALOG_PATH=$DC_NL_EMBEDDINGS_DIR/custom_catalog.yaml
CUSTOM_EMBEDDINGS_INDEX=user_all_minilm_mem

# Set IS_CUSTOM_DC var to true.
# This is used by the embeddings builder to set up a custom dc env.
# This is used by the embeddings builder to set up a custom dc env.
export IS_CUSTOM_DC=true

if [[ $USE_SQLITE == "true" ]]; then
Expand All @@ -67,15 +77,20 @@ cd $WORKSPACE_DIR/import/simple
# Run importer.
python3 -m stats.main \
--input_dir=$INPUT_DIR \
--output_dir=$DC_OUTPUT_DIR
--output_dir=$DC_OUTPUT_DIR \
--mode=$DATA_RUN_MODE

# cd back to workspace dir to run the embeddings builder.
cd $WORKSPACE_DIR

# Run embeddings builder.
python3 -m tools.nl.embeddings.build_embeddings \
--embeddings_name=$CUSTOM_EMBEDDINGS_INDEX \
if [[ $DATA_RUN_MODE == "schemaupdate" ]]; then
echo "Skipping embeddings builder because run mode is 'schemaupdate'."
echo "Schema update complete."
else
# Run embeddings builder.
python3 -m tools.nl.embeddings.build_embeddings \
--embeddings_name=$CUSTOM_EMBEDDINGS_INDEX \
--output_dir=$DC_NL_EMBEDDINGS_DIR \
--additional_catalog_path=$ADDITIONAL_CATALOG_PATH

echo "Data loading completed."
echo "Data loading complete."
fi
4 changes: 2 additions & 2 deletions build/cdc_services/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ else
fi

# Wait for any process to exit
wait
wait -n

# Exit with status of process that exited first
exit $?
exit $?
79 changes: 79 additions & 0 deletions build/ci/cloudbuild.push_cdc_stable.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Updates stable-tagged Docker images for custom DC.
# Assumes the stable branch is already checked out, which it should be
# if this is triggered on push to branch for the stable branch.

################################################################################

# NOTE: Logs-based metrics for this build are dependent on step numbers.
# For this reason, please either add new steps at the end of the file OR
# update ALL metrics when adding/removing steps.

################################################################################

steps:
# Step 0: Initialize submods
- id: init-submods
name: gcr.io/cloud-builders/git
entrypoint: bash
args:
- -c
- |
set -e
git submodule update --init --recursive
waitFor: ["-"]

# Step 1: Get a label that combines commit hashes.
- id: get-label
name: gcr.io/cloud-builders/git
entrypoint: bash
args:
- -c
- |
set -e
set -o pipefail
./scripts/get_commits_label.sh | tail -1 >"$_IMAGE_LABEL_PATH"
waitFor: ["init-submods"]

# Step 2: Services container
- id: build-and-tag-stable-services
name: gcr.io/datcom-ci/deploy-tool
entrypoint: bash
args:
- -c
- |
set -e
image_label=$(cat "$_IMAGE_LABEL_PATH")
./scripts/build_cdc_services_and_tag_stable.sh $image_label
waitFor: ["get-label"]

# Step 3: Data management container
- id: build-and-tag-stable-data
name: gcr.io/datcom-ci/deploy-tool
entrypoint: bash
args:
- -c
- |
set -e
image_label=$(cat "$_IMAGE_LABEL_PATH")
./scripts/build_cdc_data_and_tag_stable.sh $image_label
waitFor: ["get-label"]

substitutions:
_IMAGE_LABEL_PATH: "/workspace/tmp_cdc_stable_image_label.txt"

options:
machineType: "E2_HIGHCPU_32"
11 changes: 11 additions & 0 deletions build/ci/cloudbuild.py.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,20 @@

# Run Python tests, lint, etc.
steps:
# Download the files needed for nl embeddings tests.
- id: download_nl_files
name: python:3.11.3
entrypoint: /bin/sh
args:
- -c
- |
cd tools/nl/download_nl_files
./run.sh
- id: flask_test
name: python:3.11.3
entrypoint: /bin/sh
waitFor: ["download_nl_files"]
args:
- -c
- |
Expand Down
2 changes: 1 addition & 1 deletion build/ci/cloudbuild.screenshot.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
steps:
# Build the static files
- id: package_js
name: gcr.io/datcom-ci/node:2024-06-11
name: gcr.io/datcom-ci/node:2024-11-19
entrypoint: /bin/bash
waitFor: ["-"]
args:
Expand Down
Loading

0 comments on commit 29323b6

Please sign in to comment.