From a61f41f5b4c719a74643b217af860ee0cccc8e1e Mon Sep 17 00:00:00 2001 From: Jose Luis Franco Arza Date: Fri, 15 Nov 2024 06:55:18 +0100 Subject: [PATCH] Add support for the RBAC feature. This PR adds two new options RBAC (bool) and RBAC_CONFIG (string) which allow configuring RBAC in Weaviate. It also improves the help information displayed when no option is passed or when local-k8s.sh --help is executed. Last, it improves the busy loops that wait for services to be ready which were kind of broken before and adds an automatic way to pass authentication if required. --- .github/workflows/main.yml | 32 +++++- README.md | 38 +++++++ action.yml | 12 ++- local-k8s.sh | 24 +++-- rbac.yaml.example | 22 ++++ utilities/helpers.sh | 204 ++++++++++++++++++++++++++++++++----- 6 files changed, 299 insertions(+), 33 deletions(-) create mode 100644 rbac.yaml.example diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 2eac89f..42d6af2 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -57,6 +57,8 @@ jobs: WEAVIATE_GRPC_PORT: '50052' HELM_BRANCH: 'master' ENABLE_BACKUP: 'true' + RBAC: 'true' + RBAC_CONFIG: './rbac-config.yaml' VALUES_OVERRIDE: | storage: size: 50Gi @@ -67,6 +69,32 @@ jobs: steps: - name: Checkout repository uses: actions/checkout@v2 + - name: Create RBAC config file + run: | + cat > rbac-config.yaml << 'EOF' + authentication: + anonymous_access: + enabled: false + apikey: + enabled: true + allowed_keys: + - admin-key + - reader-key + users: + - admin + - reader + roles: + - admin + - reader + oidc: + enabled: false + authorization: + admin_list: + enabled: true + users: + - admin + - reader + EOF - name: Deploy weaviate-local-k8s from current branch. id: invoke-local-k8s uses: ./ @@ -81,6 +109,8 @@ jobs: enable-backup: ${{ env.ENABLE_BACKUP }} values-inline: ${{ env.VALUES_INLINE }} observability: 'true' + rbac: ${{ env.RBAC }} + rbac-config: ${{ env.RBAC_CONFIG }} - name: Check the configured values run: | replicas=$(kubectl get sts weaviate -n weaviate -o=jsonpath="{.spec.replicas}") @@ -93,7 +123,7 @@ jobs: echo "Error: Workers count is not equal to ${{ env.WORKERS }}. Found $workers" exit 1 fi - versions=$(curl -s http://127.0.0.1:${{ env.WEAVIATE_PORT }}/v1/nodes | jq '.nodes[] | .version' | tr -d '"') + versions=$(curl -H "Authorization: Bearer admin-key" -s http://127.0.0.1:${{ env.WEAVIATE_PORT }}/v1/nodes | jq '.nodes[] | .version' | tr -d '"') for version in `echo $versions | tr '\n' ' '`; do if [[ "$version" != "1.25.0" ]]; then echo "Error: Version is not equal to 1.25.0. Found $version" diff --git a/README.md b/README.md index 38dd5f0..585cad5 100644 --- a/README.md +++ b/README.md @@ -16,6 +16,8 @@ This GitHub composite action allows you to deploy Weaviate to a local Kubernetes - **enable-backup**: When set to true it configures Weaviate to support S3 backups using MinIO. Refer to the [backup and restore](https://weaviate.io/developers/weaviate/configuration/backups#) documentation for more information. - **s3-offload**: When set to true it configures Weaviate to support S3 tenant offloading using MinIO. This functionality is only supported in Weaviate 1.26 - **values-override**: Override values for the Helm chart in YAML string. (Optional, default: '') +- **rbac**: When set to true it will create an admin user with admin role and the API key be `admin-key`. (Optional, default: 'false') +- **rbac-config**: File location containing the RBAC configuration in YAML format. (Optional, default: '') ### Usage To use this action in your GitHub Actions workflow, you can add the following step: @@ -67,6 +69,8 @@ You can also execute the local-k8s.sh script locally. Ensure you have the requir Then, you can execute the script with the desired option: ```bash +#Setup Weaviate instance with RBAC enabled (default admin user only) +WEAVIATE_VERSION="1.28.0" RBAC=true REPLICAS=3 ./local-k8s.sh setup # Setup Weaviate on local Kubernetes WEAVIATE_VERSION="1.24.4" REPLICAS=3 ./local-k8s.sh setup @@ -90,6 +94,8 @@ The environment variables that can be passed are: - **REPLICAS** - **HELM_BRANCH** - **MODULES** +- **RBAC** +- **RBAC_CONFIG** Example, running preview version of Weaviate, using the `raft-configuration` weaviate-helm branch: ```bash @@ -139,3 +145,35 @@ Make sure your images are present in your environment, as otherwise the script w This action is invoked from a GitHub Actions workflow using the uses keyword followed by the action's repository and version. Input values can be provided using the with keyword within the workflow YAML file. For local execution of the local-k8s.sh script, ensure you have the necessary dependencies installed and then execute the script with one of the supported options: setup, upgrade, or clean. + + +### RBAC + +Role-Based Access Control (RBAC) is integrated into this repository to manage and secure access to Weaviate. To facilitate configuration, a test example is provided in the `rbac.yaml.example` file. + +You have two options to configure RBAC: + +1. **Enable RBAC with Default Settings:** + + Simply set the `RBAC` environment variable to `true` when running the setup script. This enables RBAC using the default configuration, which creates an admin user with admin role and the API key be `admin-key`. + + ```bash + RBAC=true WEAVIATE_VERSION="1.28.6" REPLICAS=1 WORKERS=1 ./local-k8s.sh setup + ``` + +2. **Use a Custom RBAC Configuration:** + + For a customized RBAC setup, specify the path to your YAML configuration file using the `RBAC_CONFIG` environment variable. This allows you to define specific roles, users, and permissions as needed. + + ```bash + RBAC=true RBAC_CONFIG="./custom-rbac.yaml" WEAVIATE_VERSION="1.28.2" REPLICAS=4 WORKERS=3 ./local-k8s.sh setup + ``` + + Make sure to create and configure your `custom-rbac.yaml` based on the structure provided in `rbac.yaml.example`. + +By leveraging RBAC, you can ensure that access to Weaviate is managed securely and tailored to your specific requirements. + +``` + + + diff --git a/action.yml b/action.yml index c115ced..ad094c0 100644 --- a/action.yml +++ b/action.yml @@ -58,6 +58,14 @@ inputs: description: 'Enable observability stack' required: false default: 'false' + rbac: + description: 'Enable RBAC. By default it will create an admin user with admin role and the API key be `admin-key`' + required: false + default: 'false' + rbac-config: + description: 'File location containing the RBAC configuration in YAML format' + required: false + default: '' runs: using: 'composite' @@ -68,7 +76,7 @@ runs: shell: bash if: ${{ inputs.values-override != '' }} run: | - echo "${{ inputs.values-override }}" > ${{ github.action_path }}/values-override.yaml + echo '${{ inputs.values-override }}' | yq eval '.' - > ${{ github.action_path }}/values-override.yaml - name: Deploy local kubernetes cluster shell: bash env: @@ -85,6 +93,8 @@ runs: DELETE_STS: ${{ inputs.delete-sts }} VALUES_INLINE: ${{ inputs.values-inline }} OBSERVABILITY: ${{ inputs.observability }} + RBAC: ${{ inputs.rbac }} + RBAC_CONFIG: ${{ inputs.rbac-config }} run: ${{ github.action_path }}/local-k8s.sh $OPERATION - name: Retrieve weaviate logs shell: bash diff --git a/local-k8s.sh b/local-k8s.sh index a3cddf5..63eaf17 100755 --- a/local-k8s.sh +++ b/local-k8s.sh @@ -35,6 +35,8 @@ OBSERVABILITY=${OBSERVABILITY:-"true"} PROMETHEUS_PORT=9091 GRAFANA_PORT=3000 TARGET="" +RBAC=${RBAC:-"false"} +RBAC_CONFIG=${RBAC_CONFIG:-""} function get_timeout() { @@ -188,6 +190,14 @@ EOF wait_for_all_healthy_nodes $REPLICAS echo_green "setup # Success" echo_green "setup # Weaviate is up and running on http://localhost:$WEAVIATE_PORT" + if [[ $RBAC == "true" && -n "$RBAC_CONFIG" ]]; then + echo_green "setup # RBAC is enabled" + fi + auth_enabled=$(is_auth_enabled) + if [[ "$auth_enabled" == "true" ]]; then + bearer_token=$(get_bearer_token) + echo_green "setup # You can now access the Weaviate API with the following API key: $bearer_token" + fi if [[ $OBSERVABILITY == "true" ]]; then echo_green "setup # Grafana is accessible on http://localhost:$GRAFANA_PORT (admin/admin)" echo_green "setup # Prometheus is accessible on http://localhost:$PROMETHEUS_PORT" @@ -232,16 +242,16 @@ function clean() { # Check if any options are passed if [ $# -eq 0 ]; then - echo "Usage: $0 " - echo "options:" - echo " setup" - echo " clean" - echo " upgrade" - echo "flags:" - echo " --local-images (optional) [Upload local images to the cluster]" + show_help exit 1 fi +# Show help if requested +if [ "$1" == "--help" ] || [ "$1" == "-h" ]; then + show_help + exit 0 +fi + # Check if all requirements are installed for requirement in "${REQUIREMENTS[@]}"; do if ! command -v $requirement &> /dev/null; then diff --git a/rbac.yaml.example b/rbac.yaml.example new file mode 100644 index 0000000..be3d3cc --- /dev/null +++ b/rbac.yaml.example @@ -0,0 +1,22 @@ +authentication: + anonymous_access: + enabled: false + apikey: + enabled: true + allowed_keys: + - admin-key + - reader-key + users: + - admin + - reader + roles: + - admin + - reader + oidc: + enabled: false +authorization: + admin_list: + enabled: true + users: + - admin + - reader diff --git a/utilities/helpers.sh b/utilities/helpers.sh index 90c4911..090f611 100644 --- a/utilities/helpers.sh +++ b/utilities/helpers.sh @@ -21,6 +21,70 @@ function startup_minio() { kubectl apply -f "$(dirname "$0")/manifests/minio-dev.yaml" } +function show_help() { + cat << EOF +Usage: $0 [flags] [ENV_VARS] + +Commands: + setup Create and configure a local Kubernetes cluster with Weaviate + upgrade Upgrade an existing Weaviate installation + clean Remove the local Kubernetes cluster and all resources + +Flags: + --local-images Upload local Docker images to the cluster instead of pulling from registry + +Environment Variables: + Cluster Configuration: + WORKERS Number of worker nodes in the Kind cluster (default: 0) + REPLICAS Number of Weaviate replicas to deploy (default: 1) + WEAVIATE_VERSION Specific Weaviate version to deploy (required) + + Network Configuration: + WEAVIATE_PORT HTTP API port (default: 8080) + WEAVIATE_GRPC_PORT gRPC API port (default: 50051) + WEAVIATE_METRICS Metrics port (default: 2112) + + Feature Flags: + OBSERVABILITY Enable Prometheus and Grafana monitoring (default: true) + RBAC Enable Role-Based Access Control (default: false) + RBAC_CONFIG Path to custom RBAC configuration file (optional) + ENABLE_BACKUP Enable backup functionality with MinIO (default: false) + S3_OFFLOAD Enable S3 data offloading with MinIO (default: false) + + Deployment Options: + MODULES Comma-separated list of Weaviate modules to enable (default: "") + Available modules: https://weaviate.io/developers/weaviate/model-providers + HELM_BRANCH Specific branch of weaviate-helm to use (default: "") + VALUES_INLINE Additional Helm values to pass inline (default: "") + DELETE_STS Delete StatefulSet during upgrade (default: false) + +Examples: + # Basic setup with single node + WEAVIATE_VERSION="1.28.0" ./local-k8s.sh setup + + # Multi-node setup with monitoring disabled + WORKERS=1 REPLICAS=3 WEAVIATE_VERSION="1.28.0" OBSERVABILITY=false ./local-k8s.sh setup + + # Setup with RBAC enabled + WEAVIATE_VERSION="1.28.0" RBAC=true ./local-k8s.sh setup + + # Setup with custom RBAC configuration + WEAVIATE_VERSION="1.28.0" RBAC=true RBAC_CONFIG="./rbac-config.yaml" ./local-k8s.sh setup + + # Setup with modules and backup enabled + WEAVIATE_VERSION="1.28.0" MODULES="text2vec-transformers" ENABLE_BACKUP=true ./local-k8s.sh setup + + # Clean up all resources + ./local-k8s.sh clean + +Notes: + - When using Mac with Docker, use 'host.docker.internal' instead of 'localhost' + for container connectivity + - RBAC default configuration creates a single admin user with 'admin-key' + - Monitoring (when enabled) provides Grafana (port 3000) and Prometheus (port 9091) +EOF +} + function wait_for_minio() { kubectl wait pod/minio -n weaviate --for=condition=Ready --timeout=300s echo_green "Minio is ready" @@ -57,19 +121,6 @@ function shutdown_minio() { kubectl delete -f "$(dirname "$0")/manifests/minio-dev.yaml" || true } -function wait_weaviate() { - echo_green "Wait for Weaviate to be ready" - for _ in {1..120}; do - if curl -sf -o /dev/null localhost:${WEAVIATE_PORT}; then - echo_green "Weaviate is ready" - break - fi - - echo_yellow "Weaviate is not ready, trying again in 1s" - sleep 1 - done -} - function wait_for_other_services() { # Wait for minio service to be ready if S3 offload or backup is enabled @@ -83,12 +134,28 @@ function wait_for_other_services() { fi } +function curl_with_auth() { + local url=$1 + local extra_args=${2:-} # Optional additional curl arguments + + auth_enabled=$(is_auth_enabled) + curl_cmd="curl -sf ${extra_args}" + + if [[ "$auth_enabled" == "true" ]]; then + bearer_token=$(get_bearer_token) + curl_cmd="$curl_cmd -H 'Authorization: Bearer $bearer_token'" + fi + + curl_cmd="$curl_cmd $url" + eval "$curl_cmd" +} + function wait_cluster_join() { node=$1 echo_green "Wait for node ${node} to join the cluster" for _ in {1..120}; do - if curl -sf localhost:${WEAVIATE_PORT}/v1/nodes | jq ".nodes[] | select(.name == \"${node}\" ) | select (.status == \"HEALTHY\" )" | grep -q $node; then + if curl_with_auth "localhost:${WEAVIATE_PORT}/v1/nodes" | jq ".nodes[] | select(.name == \"${node}\" ) | select (.status == \"HEALTHY\" )" | grep -q $node; then echo_green "Node ${node} has joined the cluster" break fi @@ -98,9 +165,66 @@ function wait_cluster_join() { done } +function is_auth_enabled() { + env_auth_enabled=$(kubectl get sts weaviate -n weaviate -o jsonpath='{.spec.template.spec.containers[*].env[?(@.name=="AUTHENTICATION_APIKEY_ENABLED")].value}') + if [[ "$env_auth_enabled" == "true" ]]; then + echo "true" + else + # Check configmap as fallback + config=$(kubectl get configmap -n weaviate weaviate-config -o jsonpath='{.data.conf\.yaml}') + if [[ -n "$config" ]] && [[ $(echo "$config" | yq -r '.authentication.apikey.enabled') == "true" ]]; then + echo "true" + else + echo "false" + fi + fi +} + +function get_bearer_token() { + # Check if auth is enabled via env var first (simpler case) + + # if AUTHENTICATION_APIKEY_ALLOWED_KEYS is set, use the first one + env_bearer_tokens=$(kubectl get sts weaviate -n weaviate -o jsonpath='{.spec.template.spec.containers[*].env[?(@.name=="AUTHENTICATION_APIKEY_ALLOWED_KEYS")].value}') + IFS=',' read -r bearer_token _ <<< "$env_bearer_tokens" + if [[ -n "$bearer_token" ]]; then + echo "$bearer_token" + return + fi + + + # Check configmap as fallback + if kubectl get configmap -n weaviate weaviate-config &>/dev/null; then + config=$(kubectl get configmap -n weaviate weaviate-config -o jsonpath='{.data.conf\.yaml}') + if [[ -n "$config" ]]; then + bearer_token=$(echo "$config" | yq -r '.authentication.apikey.allowed_keys[0]') + echo "$bearer_token" + return + fi + fi + +} + +function wait_weaviate() { + auth_enabled=$(is_auth_enabled) + + echo_green "Wait for Weaviate to be ready" + for _ in {1..120}; do + if curl_with_auth "localhost:${WEAVIATE_PORT}" "-o /dev/null"; then + echo_green "Weaviate is ready" + return + fi + + echo_yellow "Weaviate is not ready, trying again in 1s" + sleep 1 + done + echo_red "Weaviate is not ready" + exit 1 +} + function is_node_healthy() { node=$1 - if curl -sf localhost:${WEAVIATE_PORT}/v1/nodes | jq ".nodes[] | select(.name == \"${node}\" ) | select (.status == \"HEALTHY\" )" | grep -q $node; then + response=$(curl_with_auth "localhost:${WEAVIATE_PORT}/v1/nodes") + if echo "$response" | jq ".nodes[] | select(.name == \"${node}\" ) | select (.status == \"HEALTHY\" )" | grep -q "$node"; then echo "true" else echo "false" @@ -111,45 +235,48 @@ function wait_for_all_healthy_nodes() { replicas=$1 echo_green "Wait for all Weaviate $replicas nodes in cluster" for _ in {1..120}; do - healty_nodes=0 + healthy_nodes=0 for i in $(seq 0 $((replicas-1))); do node="weaviate-$i" - is_healthy=$(is_node_healthy $node) - if [ "$is_healthy" == "true" ]; then - healty_nodes=$((healty_nodes+1)) + if [ "$(is_node_healthy "$node")" == "true" ]; then + healthy_nodes=$((healthy_nodes+1)) else echo_yellow "Weaviate node $node is not healthy" fi done - if [ "$healty_nodes" == "$replicas" ]; then + if [ "$healthy_nodes" == "$replicas" ]; then echo_green "All Weaviate $replicas nodes in cluster are healthy" - break + return fi echo_yellow "Not all Weaviate nodes in cluster are healthy, trying again in 2s" sleep 2 done + echo_red "Weaviate $replicas nodes in cluster are not healthy" + exit 1 } function wait_for_raft_sync() { nodes_count=$1 - if [ "$(curl -s -o /dev/null -w "%{http_code}" localhost:${WEAVIATE_PORT}/v1/cluster/statistics)" == "200" ]; then + if curl_with_auth "localhost:${WEAVIATE_PORT}/v1/cluster/statistics" "-o /dev/null -w '%{http_code}'" | grep -q "200"; then echo_green "Wait for Weaviate Raft schema to be in sync" for _ in {1..1200}; do - statistics=$(curl -sf http://localhost:${WEAVIATE_PORT}/v1/cluster/statistics) + statistics=$(curl_with_auth "localhost:${WEAVIATE_PORT}/v1/cluster/statistics") count=$(echo $statistics | jq '.statistics | length') synchronized=$(echo $statistics | jq '.synchronized') if [ "$count" == "$nodes_count" ] && [ "$synchronized" == "true" ]; then echo_green "Weaviate $count nodes out of $nodes_count are synchronized: $synchronized." echo_green "Weaviate Raft cluster is in sync" - break + return fi echo_yellow "Weaviate $count nodes out of $nodes_count are synchronized: $synchronized..." echo_yellow "Raft schema is out of sync, trying again to query Weaviate $nodes_count nodes cluster in 2s" sleep 2 done fi + echo_red "Weaviate Raft schema is not in sync" + exit 1 } function port_forward_to_weaviate() { @@ -252,6 +379,35 @@ function generate_helm_values() { helm_values="${helm_values} --set serviceMonitor.enabled=true" fi + # RBAC configuration. + # If RBAC is enabled, always enable RBAC in environment + # also an RBAC_CONFIG can be provided to override the default authentication and authorization configuration. + if [[ $RBAC == "true" ]]; then + # Always enable RBAC in environment + helm_values="${helm_values} --set env.AUTHORIZATION_ENABLE_RBAC=true" + + # Check if RBAC_CONFIG is provided + if [[ -n "${RBAC_CONFIG:-}" ]]; then + if [[ ! -f "$RBAC_CONFIG" ]]; then + echo_red "RBAC config file not found at $RBAC_CONFIG" + exit 1 + fi + + # Pass the RBAC config file directly to helm + helm_values="${helm_values} -f $RBAC_CONFIG" + else + # Use default RBAC configuration + helm_values="${helm_values} \ + --set authentication.anonymous_access.enabled=false \ + --set authentication.apikey.enabled=true \ + --set authentication.apikey.allowed_keys={admin-key} \ + --set authentication.apikey.users={admin} \ + --set authentication.apikey.roles={admin} \ + --set authorization.admin_list.enabled=true \ + --set authorization.admin_list.users={admin}" + fi + fi + # Check if VALUES_INLINE variable is not empty if [ "$VALUES_INLINE" != "" ]; then helm_values="$helm_values $VALUES_INLINE"