diff --git a/configuration/index.html b/configuration/index.html index 83221aa..6fda341 100644 --- a/configuration/index.html +++ b/configuration/index.html @@ -561,8 +561,14 @@

Webhook Server

Graph Updater

1
-2
GITHUB_TOKEN=changeme
+2
+3
+4
+5
GITHUB_TOKEN=changeme
 NATS_URL=localhost:4222
+
+# GRAPHQL_URL=http://localhost:4000/graphql
+GRAPHQL_URL=http://api:4000/graphql
 

Endpoint Scanners

Github Cloned Repo Checks

@@ -573,11 +579,17 @@

Github Octokit Checks

2 3 4 -5
GITHUB_TOKEN=changeme
+5
+6
+7
+8
GITHUB_TOKEN=changeme
 NATS_URL=localhost:4222
 
 # NATS sub stream
 NATS_SUB_STREAM="EventsScanner.githubEndpoints"
+
+# GRAPHQL_URL=http://localhost:4000/graphql
+GRAPHQL_URL=http://api:4000/graphql
 

Web Endpoint Checks

1
diff --git a/graphql-api/index.html b/graphql-api/index.html
index b70f311..4ecd2ca 100644
--- a/graphql-api/index.html
+++ b/graphql-api/index.html
@@ -782,8 +782,7 @@ 

Mutations

Source code in api/src/graphql_types/mutation.py -
 8
- 9
+              
 9
 10
 11
 12
@@ -861,7 +860,8 @@ 

Mutations

84 85 86 -87
@strawberry.type
+87
+88
@strawberry.type
 class Mutation:
     @strawberry.mutation
     def endpoint(self, url: str) -> str:
@@ -917,7 +917,7 @@ 

Mutations

``` """ client = GraphDB() - client.upsert_github_endpoint(endpoint) + client.upsert_scanner_endpoint(endpoint) client.close() return endpoint.url @@ -975,8 +975,7 @@

Source code in api/src/graphql_types/mutation.py -
10
-11
+            
11
 12
 13
 14
@@ -984,7 +983,8 @@ 

16 17 18 -19

@strawberry.mutation
+19
+20
@strawberry.mutation
 def endpoint(self, url: str) -> str:
     """
     Insert an endpoint with no additional metadata. This mutation should only
@@ -1019,8 +1019,7 @@ 

Source code in api/src/graphql_types/mutation.py -
68
-69
+            
69
 70
 71
 72
@@ -1028,7 +1027,8 @@ 

74 75 76 -77

@strawberry.mutation
+77
+78
@strawberry.mutation
 def endpoints(self, urls: List[str]) -> List[str]:
     """
     Writes a list of URLs to the graph. Each URL will be associated with
@@ -1122,8 +1122,7 @@ 

Source code in api/src/graphql_types/mutation.py -
21
-22
+            
22
 23
 24
 25
@@ -1167,7 +1166,8 @@ 

63 64 65 -66

@strawberry.mutation
+66
+67
@strawberry.mutation
 def githubEndpoint(self, endpoint: GithubEndpoint) -> str:
     """
     # Update/Insert Github Endpoint
@@ -1210,7 +1210,7 @@ 

``` """ client = GraphDB() - client.upsert_github_endpoint(endpoint) + client.upsert_scanner_endpoint(endpoint) client.close() return endpoint.url

@@ -1237,15 +1237,15 @@

Source code in api/src/graphql_types/mutation.py -
79
-80
+            
80
 81
 82
 83
 84
 85
 86
-87
@strawberry.mutation
+87
+88
@strawberry.mutation
 def product(self, name: str, urls: List[str]) -> str:
     """
     Attaches a product label to a list of URLs.
diff --git a/search/search_index.json b/search/search_index.json
index f9be3d2..e58e8b4 100644
--- a/search/search_index.json
+++ b/search/search_index.json
@@ -1 +1 @@
-{"config":{"indexing":"full","lang":["en"],"min_search_length":3,"prebuild_index":false,"separator":"[\\s\\-\\.]+"},"docs":[{"location":"","text":"Observatory (R U OK?) Observatory is an automated real-time scanning framework for various endpoints containing relevant products (e.g. GitHub URLs, Container Registry URLs, Webapp URLs, and so on). The purpose of Observatory is to ensure continuous compliance with requirements around concerns such as security and accessibility by performing various automated scans on these endpoints.","title":"Observatory (R U OK?)"},{"location":"#observatory-r-u-ok","text":"Observatory is an automated real-time scanning framework for various endpoints containing relevant products (e.g. GitHub URLs, Container Registry URLs, Webapp URLs, and so on). The purpose of Observatory is to ensure continuous compliance with requirements around concerns such as security and accessibility by performing various automated scans on these endpoints.","title":"Observatory (R U OK?)"},{"location":"architecture/","text":"RUOK Architecture The core architecture uses an event-driven workflow based on GitHub Webhooks for repository events. Note that nothing about this architecture relies on event updates coming exclusively from webhooks, although this provides a convenient way to receive push notifications for the time being. A webhook server listens for various webhook events. Two primary sources of events are considered, although the event sources are highly extensible. Github repository events : any GitHub repository that creates webhooks registered with the webhook server URL will automatically send event notifications when selected repository events occur. Repositories can optionally include a .product.yaml file with links that can be used to make associations between the GitHub repository and other endpoint nodes on the graph. DNS repository events : all DNS A-records for projects in PHACDataHub are provisioned using the dns repository. Annotation metadata from these Config Connector manifests can be parsed to make associations between the DNS A-record URL and other endpoints such as the associated Github repository. Depending on the type of endpoint being updated, the webhook server adds an event to the appropriate queue group in NATS (e.g. RepoEventsUpdate , WebEventsUpdate , etc.). Graph Updater components subscribe to *EventUpdate queue groups. Each kind of graph updater component performs a few tasks. If a metadata file (e.g. .product.yaml ) is present, parse the metadata file and construct the graph that associates the current endpoint with the endpoints it's related to. Note that if there is no metadata file, the graph is trivially a single node containing the current endpoint. Traverse the graph from (1) and query the GraphQL API for each node on the graph to see if any entrypoint to the graph already exists in the database. Merge the graphs from (1) and (2), where nodes from (1) take precedence over nodes from (2). Write each node from the merged graph in (3) to the database using the appropriate mutation queries in the GraphQL API. Traverse the graph from (3) and add endpoint nodes to the appropraite scanner queue groups. Each kind of Endpoint Scanner subscribes to the appropriate queue groups, listening for endpoint nodes added by the appropriate Graph Updater component. Each Endpoint Scanner performs a series of type-specific endpoint scans, largely reusing open source scanning tools such as Trivy , gitleaks , and axe-core (accessibility engine). Endpoint Scanners write the updated endpoint nodes back to the GraphQL API via the appropriate mutation query. Consumers of the GraphQL API (such as the web application) are able to read data about product subgraphs, using any valid entrypoint into the subgraph. A special kind of Product label can be added with pointers to one or more endpoints in a subgraph, which allows clients such as the web application to attach a meaningful label to a subgraph of connected endpoints. Importantly, note that Graph Updater components are aware of graph structure, but have no knowledge of node attributes added by Endpoint Scanner. Conversely, Endpoint Scanners are aware of attributes for the type of endpoint node they scan, but have no knowledge of the graph structure maintained by the Graph Updater components. In this way, there is a clean separation of concerns between the Graph Updater components and the Endpoint Scanner components. Detailed Architecture Diagram","title":"Architecture"},{"location":"architecture/#ruok-architecture","text":"The core architecture uses an event-driven workflow based on GitHub Webhooks for repository events. Note that nothing about this architecture relies on event updates coming exclusively from webhooks, although this provides a convenient way to receive push notifications for the time being. A webhook server listens for various webhook events. Two primary sources of events are considered, although the event sources are highly extensible. Github repository events : any GitHub repository that creates webhooks registered with the webhook server URL will automatically send event notifications when selected repository events occur. Repositories can optionally include a .product.yaml file with links that can be used to make associations between the GitHub repository and other endpoint nodes on the graph. DNS repository events : all DNS A-records for projects in PHACDataHub are provisioned using the dns repository. Annotation metadata from these Config Connector manifests can be parsed to make associations between the DNS A-record URL and other endpoints such as the associated Github repository. Depending on the type of endpoint being updated, the webhook server adds an event to the appropriate queue group in NATS (e.g. RepoEventsUpdate , WebEventsUpdate , etc.). Graph Updater components subscribe to *EventUpdate queue groups. Each kind of graph updater component performs a few tasks. If a metadata file (e.g. .product.yaml ) is present, parse the metadata file and construct the graph that associates the current endpoint with the endpoints it's related to. Note that if there is no metadata file, the graph is trivially a single node containing the current endpoint. Traverse the graph from (1) and query the GraphQL API for each node on the graph to see if any entrypoint to the graph already exists in the database. Merge the graphs from (1) and (2), where nodes from (1) take precedence over nodes from (2). Write each node from the merged graph in (3) to the database using the appropriate mutation queries in the GraphQL API. Traverse the graph from (3) and add endpoint nodes to the appropraite scanner queue groups. Each kind of Endpoint Scanner subscribes to the appropriate queue groups, listening for endpoint nodes added by the appropriate Graph Updater component. Each Endpoint Scanner performs a series of type-specific endpoint scans, largely reusing open source scanning tools such as Trivy , gitleaks , and axe-core (accessibility engine). Endpoint Scanners write the updated endpoint nodes back to the GraphQL API via the appropriate mutation query. Consumers of the GraphQL API (such as the web application) are able to read data about product subgraphs, using any valid entrypoint into the subgraph. A special kind of Product label can be added with pointers to one or more endpoints in a subgraph, which allows clients such as the web application to attach a meaningful label to a subgraph of connected endpoints. Importantly, note that Graph Updater components are aware of graph structure, but have no knowledge of node attributes added by Endpoint Scanner. Conversely, Endpoint Scanners are aware of attributes for the type of endpoint node they scan, but have no knowledge of the graph structure maintained by the Graph Updater components. In this way, there is a clean separation of concerns between the Graph Updater components and the Endpoint Scanner components.","title":"RUOK Architecture"},{"location":"architecture/#detailed-architecture-diagram","text":"","title":"Detailed Architecture Diagram"},{"location":"configuration/","text":"Configuration Since this repository contains multiple deployable services, each service needs to be configured with environment variables. This page outlines the necessary configuration for all services in this repository. Webhook Server 1 Graph Updater 1 2 GITHUB_TOKEN = changeme NATS_URL = localhost:4222 Endpoint Scanners Github Cloned Repo Checks 1 NATS_URL = localhost:4222 Github Octokit Checks 1 2 3 4 5 GITHUB_TOKEN = changeme NATS_URL = localhost:4222 # NATS sub stream NATS_SUB_STREAM = \"EventsScanner.githubEndpoints\" Web Endpoint Checks 1 2 NATS_URL = localhost:4222 API_URL = localhost:4000 Container Checks TODO GraphQL API 1 2 3 4 5 6 7 8 9 10 11 GRAPHQL_HOST = \"127.0.0.1\" GRAPHQL_PORT = \"4000\" DB_HOST = \"http://example-simple-single-ea:8529\" DB_NAME = \"ruok\" USERNAME = \"changeme\" PASSWORD = \"changeme\" GRAPH_NAME = \"endpoints\" VERTEX_COLLECTION = \"endpointNodes\" EDGE_COLLECTION = \"endpointEdges\" Web UI TODO","title":"Configuration"},{"location":"configuration/#configuration","text":"Since this repository contains multiple deployable services, each service needs to be configured with environment variables. This page outlines the necessary configuration for all services in this repository.","title":"Configuration"},{"location":"configuration/#webhook-server","text":"1","title":"Webhook Server"},{"location":"configuration/#graph-updater","text":"1 2 GITHUB_TOKEN = changeme NATS_URL = localhost:4222","title":"Graph Updater"},{"location":"configuration/#endpoint-scanners","text":"","title":"Endpoint Scanners"},{"location":"configuration/#github-cloned-repo-checks","text":"1 NATS_URL = localhost:4222","title":"Github Cloned Repo Checks"},{"location":"configuration/#github-octokit-checks","text":"1 2 3 4 5 GITHUB_TOKEN = changeme NATS_URL = localhost:4222 # NATS sub stream NATS_SUB_STREAM = \"EventsScanner.githubEndpoints\"","title":"Github Octokit Checks"},{"location":"configuration/#web-endpoint-checks","text":"1 2 NATS_URL = localhost:4222 API_URL = localhost:4000","title":"Web Endpoint Checks"},{"location":"configuration/#container-checks","text":"TODO","title":"Container Checks"},{"location":"configuration/#graphql-api","text":"1 2 3 4 5 6 7 8 9 10 11 GRAPHQL_HOST = \"127.0.0.1\" GRAPHQL_PORT = \"4000\" DB_HOST = \"http://example-simple-single-ea:8529\" DB_NAME = \"ruok\" USERNAME = \"changeme\" PASSWORD = \"changeme\" GRAPH_NAME = \"endpoints\" VERTEX_COLLECTION = \"endpointNodes\" EDGE_COLLECTION = \"endpointEdges\"","title":"GraphQL API"},{"location":"configuration/#web-ui","text":"TODO","title":"Web UI"},{"location":"data-contracts/","text":"Data Contracts This document highlights the data contracts that implicitly exist between various services. Event Collectors and Graph Updater Currently, the event collectors write a message to the EventsUpdate NATS queue with the following payload schema. 1 2 3 { \"endpoint\": \"https://\" } The only job of the event collectors is to determine which events are valid and relevant (i.e. corresponding to meaningful updates to an endpoint), and pass the endpoint URL along to the Graph Updater component. Graph Updater and GraphQL API Each kind of endpoint has zero or more ways to attach metadata about related endpoints (e.g. GitHub repository endpoints can have a .product.yaml file in the project root containing URLs related to that GitHub repository). After parsing this optional metadata, the Graph Updater component has one or more endpoint URLs, which form a graph of related endpoints. Currently, the Graph Updater makes the following mutation to the GraphQL API: 1 2 3 4 5 mutation { endpoints ( urls : $URLs ) { url } } In this case, $URLs is an array of one or more endpoint URLs. Once the mutation above has been written to the GraphQL API, Graph Updater makes the following GraphQL query: 1 2 3 4 5 query { endpoints ( urls : $URLs ) { url } } The rationale here is that there may be existing vertices in the graph database that need to be re-scanned. For example, if the current URL was associated with https://endpoint1 and https://endpoint2 , and the graph of https://endpoint2 and https://endpoint3 already exists in the database, then we want to update each of https://endpoint1 , https://endpoint2 , and https://endpoint3 . At this point, each endpoint is dispatched to the appropriate EndpointScanner queue (e.g. EndpointScanner.githubEndpoints ) with the following payload. 1 2 3 { \"endpoint\": \"https://\" } Graph Updater and Scanners TODO Scanners and GraphQL API TODO","title":"Data Contracts"},{"location":"data-contracts/#data-contracts","text":"This document highlights the data contracts that implicitly exist between various services.","title":"Data Contracts"},{"location":"data-contracts/#event-collectors-and-graph-updater","text":"Currently, the event collectors write a message to the EventsUpdate NATS queue with the following payload schema. 1 2 3 { \"endpoint\": \"https://\" } The only job of the event collectors is to determine which events are valid and relevant (i.e. corresponding to meaningful updates to an endpoint), and pass the endpoint URL along to the Graph Updater component.","title":"Event Collectors and Graph Updater"},{"location":"data-contracts/#graph-updater-and-graphql-api","text":"Each kind of endpoint has zero or more ways to attach metadata about related endpoints (e.g. GitHub repository endpoints can have a .product.yaml file in the project root containing URLs related to that GitHub repository). After parsing this optional metadata, the Graph Updater component has one or more endpoint URLs, which form a graph of related endpoints. Currently, the Graph Updater makes the following mutation to the GraphQL API: 1 2 3 4 5 mutation { endpoints ( urls : $URLs ) { url } } In this case, $URLs is an array of one or more endpoint URLs. Once the mutation above has been written to the GraphQL API, Graph Updater makes the following GraphQL query: 1 2 3 4 5 query { endpoints ( urls : $URLs ) { url } } The rationale here is that there may be existing vertices in the graph database that need to be re-scanned. For example, if the current URL was associated with https://endpoint1 and https://endpoint2 , and the graph of https://endpoint2 and https://endpoint3 already exists in the database, then we want to update each of https://endpoint1 , https://endpoint2 , and https://endpoint3 . At this point, each endpoint is dispatched to the appropriate EndpointScanner queue (e.g. EndpointScanner.githubEndpoints ) with the following payload. 1 2 3 { \"endpoint\": \"https://\" }","title":"Graph Updater and GraphQL API"},{"location":"data-contracts/#graph-updater-and-scanners","text":"TODO","title":"Graph Updater and Scanners"},{"location":"data-contracts/#scanners-and-graphql-api","text":"TODO","title":"Scanners and GraphQL API"},{"location":"deployment/","text":"Deploying on Kubernetes This document outlines how to deploy the Kubernetes application for ruok-service-autochecker . Deploying on a Local Kubernetes Cluster To deploy the ruok-service-autochecker application onto a local Kubernetes environment, ensure your current context is set to your local cluster (i.e. kubectl config set-context  ). You can verify your Kubernetes context by running kubectl config get-contexts ; your current context will be indicated with * . Before deploying the application, it is necessary to first build and tag all of the images in this repository. You can build and tag all of the images by running make build . Note that you may need to perform an extra step of loading your locally built images into your local cluster's image registry (see Loading an image into the KinD Cluster , for example). Once connected to your local cluster, run make k8s to deploy the various manifests and kustomization.yaml files associated with the application. Continuous Deployment onto GKE TODO","title":"Deployment"},{"location":"deployment/#deploying-on-kubernetes","text":"This document outlines how to deploy the Kubernetes application for ruok-service-autochecker .","title":"Deploying on Kubernetes"},{"location":"deployment/#deploying-on-a-local-kubernetes-cluster","text":"To deploy the ruok-service-autochecker application onto a local Kubernetes environment, ensure your current context is set to your local cluster (i.e. kubectl config set-context  ). You can verify your Kubernetes context by running kubectl config get-contexts ; your current context will be indicated with * . Before deploying the application, it is necessary to first build and tag all of the images in this repository. You can build and tag all of the images by running make build . Note that you may need to perform an extra step of loading your locally built images into your local cluster's image registry (see Loading an image into the KinD Cluster , for example). Once connected to your local cluster, run make k8s to deploy the various manifests and kustomization.yaml files associated with the application.","title":"Deploying on a Local Kubernetes Cluster"},{"location":"deployment/#continuous-deployment-onto-gke","text":"TODO","title":"Continuous Deployment onto GKE"},{"location":"development-environment/","text":"Development Environment This page outlines how to set up a development environment for this project. This document is meant to serve as a high-level mental model for how to set up a development environment. There are many ways to substitute certain components if a developer prefers one tool over another. Overview The diagram below shows a high-level overview of the development environment. VSCode is used as the integrated development environment. VSCode is run in client-server mode: The desktop VSCode application is downloaded for the operating system of choice, and a project-specific VSCode Dev Container is used to run VSCode server as a dev container. The VSCode Dev Container is attached to the host network, so the development container can access ports exposed on 127.0.0.1 , for example. K9s is used as a kubernetes dashboard, which provides a user interface for the developer to interact with the Kubernetes cluster. Podman is a daemonless and rootless OCI-compliant runtime and container manager that can be used to build OCI images and run containers on your development machine. Kubernetes in Docker (KinD) is a tool for running local kubernetes clusters entirely in OCI containers (i.e. OCI containers are used to run Kubernetes nodes). The sections below outline how to set up each component of this environment. VSCode Development Containers TLDR : The .devcontainer/devcontainer.json file contains the dev container configuration for this project. If you install the VSCode Dev Container extension and build/run the dev container, the dev container will be setup automatically. The section below highlights specific issues that you might encounter along with helpful resources to troubleshoot potential issues. Starting Dev Container as non-root user I added the \"containerUser\": \"node\" key to start the dev container as the default non-root node user for the dev container. Since I am running my dev container on Ubuntu Linux, I also needed to add the following line to my devcontainer.json file: 1 2 3 4 5 ... , \"runArgs\" : [ \"--userns=keep-id\" ], ... This line of configuration is necessary because, on Linux, podman maps the user ID (UID) that launched the container process to the root user of the container. By default, this means that my current user ID (usually 1000 in most cases) maps to the UID 1 (i.e. root user of the container user namespace). You can run podman unshare cat /proc/self/uid_map on the host machine to see how host UIDs map to UIDs in the container process namespaces. This caused problems as the files/folders in the repo are mounted to the container filesystem with root as the owner, so the node user didn't have permission to write to these files. Setting --userns=keep-id keeps the UID of 1000 in the container, so the repo files/folders that get mounted to the container filesystem are correctly owned by UID 1000 (i.e. node user), and it is possible to write to files in the container as the non-root user. See this stackoverflow answer for a more detailed explanation of how this works. Attach Development Container to Host Network As per this thread answer , add the following key in devcontainer.json . 1 2 3 4 5 ... , \"runArgs\" : [ \"--network=host\" ], ... VSCode Development Tools VSCode Integrated Debugger Debug configurations can be found in the .vscode/launch.json file in the project root. For information on how to use VSCode's integrated debugger, see the VSCode Debugging documentation . Environment Variable Management with direnv In order to run or debug a given application in a dev container, it may be necessary load a specific set of environment variables to configure that application. direnv is a tool that automatically loads environment variables into your shell when you cd into a folder. You may need to run direnv allow in a directory upon making changes to its .envrc file. Podman Instructions for installing the Podman runtime on all platforms can be found at this link . Additionally (and optionally), you can install Podman Desktop , which provides a graphical tool to facilitate working with podman. Using Podman with KinD It might be necessary to follow the steps in Kind - Rootless . After following these instructions, I had to run systemd-run --user --scope --property=Delegate=yes kind create cluster to create my kind cluster. This Linkedin Arcticle is also a good resource that reviews step-by-step setup of a KIND cluster using podman on Ubuntu Linux. Loading an image into the KinD Cluster KinD doesn't spin up a local registry out of the box, so it's necessary to run kind load docker-image  to load a locally build container image into the KinD cluster. If you're using Podman Desktop, there is a UI convenience for this by navigating to the Images tab, then for the image(s) you want to load into the KinD cluster, click \"Push image to Kind cluster\" (see screenshot below). Useful Code Snippets NATS CLI Connect to nats on localhost 1 nats context add local --description \"Localhost\" --select Subscribe to a queue group 1 nats sub EventsUpdate Publish to a queue group 1 nats pub \"ClonedRepoEvent.>\" '{\"webEndpoints\": [\"https://www.canada.ca/en/public-health.html\"]}' ` Related Issues KinD - Running with rootless podman doesn't work as documented KinD - Podman creation fails KinD - How I Wasted a Day Loading Local Docker Images","title":"Development Environment"},{"location":"development-environment/#development-environment","text":"This page outlines how to set up a development environment for this project. This document is meant to serve as a high-level mental model for how to set up a development environment. There are many ways to substitute certain components if a developer prefers one tool over another.","title":"Development Environment"},{"location":"development-environment/#overview","text":"The diagram below shows a high-level overview of the development environment. VSCode is used as the integrated development environment. VSCode is run in client-server mode: The desktop VSCode application is downloaded for the operating system of choice, and a project-specific VSCode Dev Container is used to run VSCode server as a dev container. The VSCode Dev Container is attached to the host network, so the development container can access ports exposed on 127.0.0.1 , for example. K9s is used as a kubernetes dashboard, which provides a user interface for the developer to interact with the Kubernetes cluster. Podman is a daemonless and rootless OCI-compliant runtime and container manager that can be used to build OCI images and run containers on your development machine. Kubernetes in Docker (KinD) is a tool for running local kubernetes clusters entirely in OCI containers (i.e. OCI containers are used to run Kubernetes nodes). The sections below outline how to set up each component of this environment.","title":"Overview"},{"location":"development-environment/#vscode-development-containers","text":"TLDR : The .devcontainer/devcontainer.json file contains the dev container configuration for this project. If you install the VSCode Dev Container extension and build/run the dev container, the dev container will be setup automatically. The section below highlights specific issues that you might encounter along with helpful resources to troubleshoot potential issues.","title":"VSCode Development Containers"},{"location":"development-environment/#starting-dev-container-as-non-root-user","text":"I added the \"containerUser\": \"node\" key to start the dev container as the default non-root node user for the dev container. Since I am running my dev container on Ubuntu Linux, I also needed to add the following line to my devcontainer.json file: 1 2 3 4 5 ... , \"runArgs\" : [ \"--userns=keep-id\" ], ... This line of configuration is necessary because, on Linux, podman maps the user ID (UID) that launched the container process to the root user of the container. By default, this means that my current user ID (usually 1000 in most cases) maps to the UID 1 (i.e. root user of the container user namespace). You can run podman unshare cat /proc/self/uid_map on the host machine to see how host UIDs map to UIDs in the container process namespaces. This caused problems as the files/folders in the repo are mounted to the container filesystem with root as the owner, so the node user didn't have permission to write to these files. Setting --userns=keep-id keeps the UID of 1000 in the container, so the repo files/folders that get mounted to the container filesystem are correctly owned by UID 1000 (i.e. node user), and it is possible to write to files in the container as the non-root user. See this stackoverflow answer for a more detailed explanation of how this works.","title":"Starting Dev Container as non-root user"},{"location":"development-environment/#attach-development-container-to-host-network","text":"As per this thread answer , add the following key in devcontainer.json . 1 2 3 4 5 ... , \"runArgs\" : [ \"--network=host\" ], ...","title":"Attach Development Container to Host Network"},{"location":"development-environment/#vscode-development-tools","text":"","title":"VSCode Development Tools"},{"location":"development-environment/#vscode-integrated-debugger","text":"Debug configurations can be found in the .vscode/launch.json file in the project root. For information on how to use VSCode's integrated debugger, see the VSCode Debugging documentation .","title":"VSCode Integrated Debugger"},{"location":"development-environment/#environment-variable-management-with-direnv","text":"In order to run or debug a given application in a dev container, it may be necessary load a specific set of environment variables to configure that application. direnv is a tool that automatically loads environment variables into your shell when you cd into a folder. You may need to run direnv allow in a directory upon making changes to its .envrc file.","title":"Environment Variable Management with direnv"},{"location":"development-environment/#podman","text":"Instructions for installing the Podman runtime on all platforms can be found at this link . Additionally (and optionally), you can install Podman Desktop , which provides a graphical tool to facilitate working with podman.","title":"Podman"},{"location":"development-environment/#using-podman-with-kind","text":"It might be necessary to follow the steps in Kind - Rootless . After following these instructions, I had to run systemd-run --user --scope --property=Delegate=yes kind create cluster to create my kind cluster. This Linkedin Arcticle is also a good resource that reviews step-by-step setup of a KIND cluster using podman on Ubuntu Linux.","title":"Using Podman with KinD"},{"location":"development-environment/#loading-an-image-into-the-kind-cluster","text":"KinD doesn't spin up a local registry out of the box, so it's necessary to run kind load docker-image  to load a locally build container image into the KinD cluster. If you're using Podman Desktop, there is a UI convenience for this by navigating to the Images tab, then for the image(s) you want to load into the KinD cluster, click \"Push image to Kind cluster\" (see screenshot below).","title":"Loading an image into the KinD Cluster"},{"location":"development-environment/#useful-code-snippets","text":"","title":"Useful Code Snippets"},{"location":"development-environment/#nats-cli","text":"Connect to nats on localhost 1 nats context add local --description \"Localhost\" --select Subscribe to a queue group 1 nats sub EventsUpdate Publish to a queue group 1 nats pub \"ClonedRepoEvent.>\" '{\"webEndpoints\": [\"https://www.canada.ca/en/public-health.html\"]}' `","title":"NATS CLI"},{"location":"development-environment/#related-issues","text":"KinD - Running with rootless podman doesn't work as documented KinD - Podman creation fails KinD - How I Wasted a Day Loading Local Docker Images","title":"Related Issues"},{"location":"github-webhooks/","text":"GitHub Webhooks webhook-server/ contains the implementation of the GitHub webhook server portion of this project. The purpose of this server is to listen for events triggered by certain events of interest on GitHub resources. Validating GitHub Webhook Deliveries TODO Local Development with GitHub Webhooks In order to test webhook-server locally, it is necessary to use a webhook proxy URL to forward webhooks from GitHub to your computer. Instructions for how to do this are as follows: In your browser, nagivate to https://smee.io/ and click Start a new channel . Copy the full URL under Webhook Proxy URL . Install the corresponding smee-client package from npm as a dev dependency: npm i sme-client --save-dev . Start the smee-client as follows: smee --url  --path  --port  . Go to a repository of interest in the PHACDataHub Github organization, go to Settings --> Code and automation --> Webhooks --> Add new webhook and paste the Webhook Proxy URL from step 1. Choose application/json for the content type. You can also choose which repo events get forwarded, or select \"sent me everything\" to receive all events. Start up the webhook-server . Trigger an event on the GitHub repo that you registered the webhook with. If everything is set up correctly, you should receive a request to webhook-server where req.body contains the JSON payload of the GitHub webhook event. Helpful Resources testing webhooks redelivering webhooks about webhooks","title":"GitHub Webhooks"},{"location":"github-webhooks/#github-webhooks","text":"webhook-server/ contains the implementation of the GitHub webhook server portion of this project. The purpose of this server is to listen for events triggered by certain events of interest on GitHub resources.","title":"GitHub Webhooks"},{"location":"github-webhooks/#validating-github-webhook-deliveries","text":"TODO","title":"Validating GitHub Webhook Deliveries"},{"location":"github-webhooks/#local-development-with-github-webhooks","text":"In order to test webhook-server locally, it is necessary to use a webhook proxy URL to forward webhooks from GitHub to your computer. Instructions for how to do this are as follows: In your browser, nagivate to https://smee.io/ and click Start a new channel . Copy the full URL under Webhook Proxy URL . Install the corresponding smee-client package from npm as a dev dependency: npm i sme-client --save-dev . Start the smee-client as follows: smee --url  --path  --port  . Go to a repository of interest in the PHACDataHub Github organization, go to Settings --> Code and automation --> Webhooks --> Add new webhook and paste the Webhook Proxy URL from step 1. Choose application/json for the content type. You can also choose which repo events get forwarded, or select \"sent me everything\" to receive all events. Start up the webhook-server . Trigger an event on the GitHub repo that you registered the webhook with. If everything is set up correctly, you should receive a request to webhook-server where req.body contains the JSON payload of the GitHub webhook event.","title":"Local Development with GitHub Webhooks"},{"location":"github-webhooks/#helpful-resources","text":"testing webhooks redelivering webhooks about webhooks","title":"Helpful Resources"},{"location":"graphql-api/","text":"GraphQL API Due to the graph nature of the underlying data, Observatory exposes a GraphQL API that is oriented around the concept of Endpoint s. The following two sections explain the motivation behind our endpoint-oriented data model and the actual GraphQL Schema that the GraphQL API exposes. Motivation (Why Endpoints?) Ultimately, Observatory cares about monitoring products . Modern products tend to be associated with a variety of URLs, such as URLs for source code repositories (e.g. Github.com, Gitlab.com), URLs for container registries, URLs for APIs, URLs for web applications, and so on. It can be difficult to provide a stable and authoritative definition of a product without imposing a rigid definition that must be imposed and agreed upon by humans. An approach such as agreeing to and adopting a standard way of defining a product UID may work for a small coordinated group of individuals, but is difficult to scale to large groups of distributed teams without imposing significant administrative burden. Furthermore, it is often not realistic to assume that a product will always be associated with a single URL in a way that is stable over time. As a product evolves, it may rename its source code repository or move under a different organization; as a product graduates in its maturity model, it may be promoted from an *.alpha.* to a *.prod.* domain name. In Observatory, the assumption we make is that products are a related graph of Endpoints that evolves over time (e.g. new endpoints are added and old endpoints are removed). The graph of endpoints has the property that viewing any of the endpoint nodes allows all of the endpoint nodes attached to it to be discovered. Additionally, we add the ability to define Product s, which point to one or more Endpoint s, allowing for discovery of the subgraph of endpoints by querying a named Product . Or, conversely, given a URL of any endpoint on the graph, the associated Product node can be discovered. Alternatively, if users wish to monitor individual URLs directly rather than create a product graph, this use case can be accommodated as well. GraphQL API Currently, the GraphQL API exposes the following queries and mutations. 1 -- 8 <-- \"api/src/main.py:18:66 Examples The semantics of queries is that a query for any endpoint on a subgraph returns the entire subgraph. For example, suppose we make the following mutation: 1 2 3 4 5 6 7 mutation { endpoints ( urls : [ \"https://github.com/someorg/somerepo2\" , \"https://another-site.phac.gc.ca\" , \"https://some-other-api.phac-aspc.gc.ca\" ]) } This mutation creates 3 connected enpoints: https://github.com/someorg/somerepo2 , https://another-site.phac.gc.ca , https://some-other-api.phac-aspc.gc.ca . Suppose at a later date, we make some additional associations and attach these endpoints to a product with another mutation: 1 2 3 4 5 6 7 8 9 10 mutation { product ( name : \"myproduct\" urls : [ \"https://github.com/someorg/somerepo2\" , \"https://some-other-api.phac-aspc.gc.ca\" , \"https://some-third-webapp.phac.alpha.gc.ca\" ] ) } This mutation adds two additional nodes to the subgraph: https://some-third-webapp.phac.alpha.gc.ca , and a product label called myproduct . At this point in time, the subgraph looks like the following. This graph now has the property that a search for any endpoint on the graph will return all endpoints on the graph. For example, the following graphql query returns the following result: 1 2 3 4 5 query { endpoint ( url : \"myproduct\" ) { url } } 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 { \"data\" : { \"endpoint\" : [ { \"url\" : \"myproduct\" }, { \"url\" : \"https://github.com/someorg/somerepo2\" }, { \"url\" : \"https://some-other-api.phac-aspc.gc.ca\" }, { \"url\" : \"https://some-third-webapp.phac.alpha.gc.ca\" }, { \"url\" : \"https://another-site.phac.gc.ca\" } ] } } Similarly, a GraphQL query for a different vertex on the graph also returns the entire subgraph (although in a different order since the graph traversal started from a different vertex as last time). 1 2 3 4 5 query { endpoint ( url : \"https://another-site.phac.gc.ca\" ) { url } } 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 { \"data\" : { \"endpoint\" : [ { \"url\" : \"https://another-site.phac.gc.ca\" }, { \"url\" : \"https://github.com/someorg/somerepo2\" }, { \"url\" : \"https://some-other-api.phac-aspc.gc.ca\" }, { \"url\" : \"myproduct\" }, { \"url\" : \"https://some-third-webapp.phac.alpha.gc.ca\" } ] } } Full GraphQL API Specification Mutations Source code in api/src/graphql_types/mutation.py 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 @strawberry . type class Mutation : @strawberry . mutation def endpoint ( self , url : str ) -> str : \"\"\" Insert an endpoint with no additional metadata. This mutation should only be used by the graph updater component to update the graph structure. \"\"\" client = GraphDB () client . insert_endpoint ( url ) client . close () return url @strawberry . mutation def githubEndpoint ( self , endpoint : GithubEndpoint ) -> str : \"\"\" # Update/Insert Github Endpoint Insert a Github Endpoint with \"upsert\" semantics. If the endpoint doesn't already exist, the endpoint document will be created. If the endpoint already exists, its fields will be updated with the values provided in the mutation. # Example of how to use this as an input type: ```graphql mutation { githubEndpoint( endpoint: { url:\"https://github.com/someOrg/someRepo\" owner: \"someOrg\" repo:\"someRepo\" license: \"MIT\" visibility:\"Public\" programmingLanguage:[\"Python\", \"JavaScript\", \"Bash\", \"Dockerfile\"] automatedSecurityFixes: { checkPasses: true metadata: {} } vulnerabilityAlerts: { checkPasses: false metadata: { key: \"value\" } } branchProtection:{ checkPasses:true, metadata:{ key:\"value\" } } } ) } ``` \"\"\" client = GraphDB () client . upsert_github_endpoint ( endpoint ) client . close () return endpoint . url @strawberry . mutation def endpoints ( self , urls : List [ str ]) -> List [ str ]: \"\"\" Writes a list of URLs to the graph. Each URL will be associated with every other URL in the list. \"\"\" client = GraphDB () client . insert_endpoints ( urls ) client . close () return urls @strawberry . mutation def product ( self , name : str , urls : List [ str ]) -> str : \"\"\" Attaches a product label to a list of URLs. \"\"\" client = GraphDB () client . insert_product ( name , urls ) client . close () return name endpoint ( url ) Insert an endpoint with no additional metadata. This mutation should only be used by the graph updater component to update the graph structure. Source code in api/src/graphql_types/mutation.py 10 11 12 13 14 15 16 17 18 19 @strawberry . mutation def endpoint ( self , url : str ) -> str : \"\"\" Insert an endpoint with no additional metadata. This mutation should only be used by the graph updater component to update the graph structure. \"\"\" client = GraphDB () client . insert_endpoint ( url ) client . close () return url endpoints ( urls ) Writes a list of URLs to the graph. Each URL will be associated with every other URL in the list. Source code in api/src/graphql_types/mutation.py 68 69 70 71 72 73 74 75 76 77 @strawberry . mutation def endpoints ( self , urls : List [ str ]) -> List [ str ]: \"\"\" Writes a list of URLs to the graph. Each URL will be associated with every other URL in the list. \"\"\" client = GraphDB () client . insert_endpoints ( urls ) client . close () return urls githubEndpoint ( endpoint ) Update/Insert Github Endpoint Insert a Github Endpoint with \"upsert\" semantics. If the endpoint doesn't already exist, the endpoint document will be created. If the endpoint already exists, its fields will be updated with the values provided in the mutation. Example of how to use this as an input type: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 mutation { githubEndpoint ( endpoint : { url : \"https://github.com/someOrg/someRepo\" owner : \"someOrg\" repo : \"someRepo\" license : \"MIT\" visibility : \"Public\" programmingLanguage :[ \"Python\" , \"JavaScript\" , \"Bash\" , \"Dockerfile\" ] automatedSecurityFixes : { checkPasses : true metadata : {} } vulnerabilityAlerts : { checkPasses : false metadata : { key : \"value\" } } branchProtection :{ checkPasses : true , metadata :{ key : \"value\" } } } ) } Source code in api/src/graphql_types/mutation.py 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 @strawberry . mutation def githubEndpoint ( self , endpoint : GithubEndpoint ) -> str : \"\"\" # Update/Insert Github Endpoint Insert a Github Endpoint with \"upsert\" semantics. If the endpoint doesn't already exist, the endpoint document will be created. If the endpoint already exists, its fields will be updated with the values provided in the mutation. # Example of how to use this as an input type: ```graphql mutation { githubEndpoint( endpoint: { url:\"https://github.com/someOrg/someRepo\" owner: \"someOrg\" repo:\"someRepo\" license: \"MIT\" visibility:\"Public\" programmingLanguage:[\"Python\", \"JavaScript\", \"Bash\", \"Dockerfile\"] automatedSecurityFixes: { checkPasses: true metadata: {} } vulnerabilityAlerts: { checkPasses: false metadata: { key: \"value\" } } branchProtection:{ checkPasses:true, metadata:{ key:\"value\" } } } ) } ``` \"\"\" client = GraphDB () client . upsert_github_endpoint ( endpoint ) client . close () return endpoint . url product ( name , urls ) Attaches a product label to a list of URLs. Source code in api/src/graphql_types/mutation.py 79 80 81 82 83 84 85 86 87 @strawberry . mutation def product ( self , name : str , urls : List [ str ]) -> str : \"\"\" Attaches a product label to a list of URLs. \"\"\" client = GraphDB () client . insert_product ( name , urls ) client . close () return name","title":"GraphQL API"},{"location":"graphql-api/#graphql-api","text":"Due to the graph nature of the underlying data, Observatory exposes a GraphQL API that is oriented around the concept of Endpoint s. The following two sections explain the motivation behind our endpoint-oriented data model and the actual GraphQL Schema that the GraphQL API exposes.","title":"GraphQL API"},{"location":"graphql-api/#motivation-why-endpoints","text":"Ultimately, Observatory cares about monitoring products . Modern products tend to be associated with a variety of URLs, such as URLs for source code repositories (e.g. Github.com, Gitlab.com), URLs for container registries, URLs for APIs, URLs for web applications, and so on. It can be difficult to provide a stable and authoritative definition of a product without imposing a rigid definition that must be imposed and agreed upon by humans. An approach such as agreeing to and adopting a standard way of defining a product UID may work for a small coordinated group of individuals, but is difficult to scale to large groups of distributed teams without imposing significant administrative burden. Furthermore, it is often not realistic to assume that a product will always be associated with a single URL in a way that is stable over time. As a product evolves, it may rename its source code repository or move under a different organization; as a product graduates in its maturity model, it may be promoted from an *.alpha.* to a *.prod.* domain name. In Observatory, the assumption we make is that products are a related graph of Endpoints that evolves over time (e.g. new endpoints are added and old endpoints are removed). The graph of endpoints has the property that viewing any of the endpoint nodes allows all of the endpoint nodes attached to it to be discovered. Additionally, we add the ability to define Product s, which point to one or more Endpoint s, allowing for discovery of the subgraph of endpoints by querying a named Product . Or, conversely, given a URL of any endpoint on the graph, the associated Product node can be discovered. Alternatively, if users wish to monitor individual URLs directly rather than create a product graph, this use case can be accommodated as well.","title":"Motivation (Why Endpoints?)"},{"location":"graphql-api/#graphql-api_1","text":"Currently, the GraphQL API exposes the following queries and mutations. 1 -- 8 <-- \"api/src/main.py:18:66","title":"GraphQL API"},{"location":"graphql-api/#examples","text":"The semantics of queries is that a query for any endpoint on a subgraph returns the entire subgraph. For example, suppose we make the following mutation: 1 2 3 4 5 6 7 mutation { endpoints ( urls : [ \"https://github.com/someorg/somerepo2\" , \"https://another-site.phac.gc.ca\" , \"https://some-other-api.phac-aspc.gc.ca\" ]) } This mutation creates 3 connected enpoints: https://github.com/someorg/somerepo2 , https://another-site.phac.gc.ca , https://some-other-api.phac-aspc.gc.ca . Suppose at a later date, we make some additional associations and attach these endpoints to a product with another mutation: 1 2 3 4 5 6 7 8 9 10 mutation { product ( name : \"myproduct\" urls : [ \"https://github.com/someorg/somerepo2\" , \"https://some-other-api.phac-aspc.gc.ca\" , \"https://some-third-webapp.phac.alpha.gc.ca\" ] ) } This mutation adds two additional nodes to the subgraph: https://some-third-webapp.phac.alpha.gc.ca , and a product label called myproduct . At this point in time, the subgraph looks like the following. This graph now has the property that a search for any endpoint on the graph will return all endpoints on the graph. For example, the following graphql query returns the following result: 1 2 3 4 5 query { endpoint ( url : \"myproduct\" ) { url } } 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 { \"data\" : { \"endpoint\" : [ { \"url\" : \"myproduct\" }, { \"url\" : \"https://github.com/someorg/somerepo2\" }, { \"url\" : \"https://some-other-api.phac-aspc.gc.ca\" }, { \"url\" : \"https://some-third-webapp.phac.alpha.gc.ca\" }, { \"url\" : \"https://another-site.phac.gc.ca\" } ] } } Similarly, a GraphQL query for a different vertex on the graph also returns the entire subgraph (although in a different order since the graph traversal started from a different vertex as last time). 1 2 3 4 5 query { endpoint ( url : \"https://another-site.phac.gc.ca\" ) { url } } 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 { \"data\" : { \"endpoint\" : [ { \"url\" : \"https://another-site.phac.gc.ca\" }, { \"url\" : \"https://github.com/someorg/somerepo2\" }, { \"url\" : \"https://some-other-api.phac-aspc.gc.ca\" }, { \"url\" : \"myproduct\" }, { \"url\" : \"https://some-third-webapp.phac.alpha.gc.ca\" } ] } }","title":"Examples"},{"location":"graphql-api/#full-graphql-api-specification","text":"","title":"Full GraphQL API Specification"},{"location":"graphql-api/#mutations","text":"Source code in api/src/graphql_types/mutation.py 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 @strawberry . type class Mutation : @strawberry . mutation def endpoint ( self , url : str ) -> str : \"\"\" Insert an endpoint with no additional metadata. This mutation should only be used by the graph updater component to update the graph structure. \"\"\" client = GraphDB () client . insert_endpoint ( url ) client . close () return url @strawberry . mutation def githubEndpoint ( self , endpoint : GithubEndpoint ) -> str : \"\"\" # Update/Insert Github Endpoint Insert a Github Endpoint with \"upsert\" semantics. If the endpoint doesn't already exist, the endpoint document will be created. If the endpoint already exists, its fields will be updated with the values provided in the mutation. # Example of how to use this as an input type: ```graphql mutation { githubEndpoint( endpoint: { url:\"https://github.com/someOrg/someRepo\" owner: \"someOrg\" repo:\"someRepo\" license: \"MIT\" visibility:\"Public\" programmingLanguage:[\"Python\", \"JavaScript\", \"Bash\", \"Dockerfile\"] automatedSecurityFixes: { checkPasses: true metadata: {} } vulnerabilityAlerts: { checkPasses: false metadata: { key: \"value\" } } branchProtection:{ checkPasses:true, metadata:{ key:\"value\" } } } ) } ``` \"\"\" client = GraphDB () client . upsert_github_endpoint ( endpoint ) client . close () return endpoint . url @strawberry . mutation def endpoints ( self , urls : List [ str ]) -> List [ str ]: \"\"\" Writes a list of URLs to the graph. Each URL will be associated with every other URL in the list. \"\"\" client = GraphDB () client . insert_endpoints ( urls ) client . close () return urls @strawberry . mutation def product ( self , name : str , urls : List [ str ]) -> str : \"\"\" Attaches a product label to a list of URLs. \"\"\" client = GraphDB () client . insert_product ( name , urls ) client . close () return name","title":"Mutations"},{"location":"graphql-api/#graphql_types.mutation.Mutation.endpoint","text":"Insert an endpoint with no additional metadata. This mutation should only be used by the graph updater component to update the graph structure. Source code in api/src/graphql_types/mutation.py 10 11 12 13 14 15 16 17 18 19 @strawberry . mutation def endpoint ( self , url : str ) -> str : \"\"\" Insert an endpoint with no additional metadata. This mutation should only be used by the graph updater component to update the graph structure. \"\"\" client = GraphDB () client . insert_endpoint ( url ) client . close () return url","title":"endpoint()"},{"location":"graphql-api/#graphql_types.mutation.Mutation.endpoints","text":"Writes a list of URLs to the graph. Each URL will be associated with every other URL in the list. Source code in api/src/graphql_types/mutation.py 68 69 70 71 72 73 74 75 76 77 @strawberry . mutation def endpoints ( self , urls : List [ str ]) -> List [ str ]: \"\"\" Writes a list of URLs to the graph. Each URL will be associated with every other URL in the list. \"\"\" client = GraphDB () client . insert_endpoints ( urls ) client . close () return urls","title":"endpoints()"},{"location":"graphql-api/#graphql_types.mutation.Mutation.githubEndpoint","text":"","title":"githubEndpoint()"},{"location":"graphql-api/#graphql_types.mutation.Mutation.githubEndpoint--updateinsert-github-endpoint","text":"Insert a Github Endpoint with \"upsert\" semantics. If the endpoint doesn't already exist, the endpoint document will be created. If the endpoint already exists, its fields will be updated with the values provided in the mutation.","title":"Update/Insert Github Endpoint"},{"location":"graphql-api/#graphql_types.mutation.Mutation.githubEndpoint--example-of-how-to-use-this-as-an-input-type","text":"1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 mutation { githubEndpoint ( endpoint : { url : \"https://github.com/someOrg/someRepo\" owner : \"someOrg\" repo : \"someRepo\" license : \"MIT\" visibility : \"Public\" programmingLanguage :[ \"Python\" , \"JavaScript\" , \"Bash\" , \"Dockerfile\" ] automatedSecurityFixes : { checkPasses : true metadata : {} } vulnerabilityAlerts : { checkPasses : false metadata : { key : \"value\" } } branchProtection :{ checkPasses : true , metadata :{ key : \"value\" } } } ) } Source code in api/src/graphql_types/mutation.py 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 @strawberry . mutation def githubEndpoint ( self , endpoint : GithubEndpoint ) -> str : \"\"\" # Update/Insert Github Endpoint Insert a Github Endpoint with \"upsert\" semantics. If the endpoint doesn't already exist, the endpoint document will be created. If the endpoint already exists, its fields will be updated with the values provided in the mutation. # Example of how to use this as an input type: ```graphql mutation { githubEndpoint( endpoint: { url:\"https://github.com/someOrg/someRepo\" owner: \"someOrg\" repo:\"someRepo\" license: \"MIT\" visibility:\"Public\" programmingLanguage:[\"Python\", \"JavaScript\", \"Bash\", \"Dockerfile\"] automatedSecurityFixes: { checkPasses: true metadata: {} } vulnerabilityAlerts: { checkPasses: false metadata: { key: \"value\" } } branchProtection:{ checkPasses:true, metadata:{ key:\"value\" } } } ) } ``` \"\"\" client = GraphDB () client . upsert_github_endpoint ( endpoint ) client . close () return endpoint . url","title":"Example of how to use this as an input type:"},{"location":"graphql-api/#graphql_types.mutation.Mutation.product","text":"Attaches a product label to a list of URLs. Source code in api/src/graphql_types/mutation.py 79 80 81 82 83 84 85 86 87 @strawberry . mutation def product ( self , name : str , urls : List [ str ]) -> str : \"\"\" Attaches a product label to a list of URLs. \"\"\" client = GraphDB () client . insert_product ( name , urls ) client . close () return name","title":"product()"},{"location":"register-repository/","text":"Register a Repository with Observatory Registering new repositories with Observatory requires a few simple steps. Create a .product.yaml File Create a .product.yaml file at the repository root with the following fields. 1 2 3 4 5 6 7 8 9 10 productName : Your Product Name webappUrls : - https://product-url-1.phac.alpha.canada.ca - https://product-url-2.phac.alpha.canada.ca apiUrls : - https://api-url-1.phac.alpha.canada.ca containerRegistryUrls : - northamerica-northeast1-docker.pkg.dev/product-container-1@sha256:abcxyz - northamerica-northeast1-docker.pkg.dev/product-container-2@sha256:abc123 - northamerica-northeast1-docker.pkg.dev/product-container-3@sha256:xyz123","title":"Register a Repository"},{"location":"register-repository/#register-a-repository-with-observatory","text":"Registering new repositories with Observatory requires a few simple steps.","title":"Register a Repository with Observatory"},{"location":"register-repository/#create-a-productyaml-file","text":"Create a .product.yaml file at the repository root with the following fields. 1 2 3 4 5 6 7 8 9 10 productName : Your Product Name webappUrls : - https://product-url-1.phac.alpha.canada.ca - https://product-url-2.phac.alpha.canada.ca apiUrls : - https://api-url-1.phac.alpha.canada.ca containerRegistryUrls : - northamerica-northeast1-docker.pkg.dev/product-container-1@sha256:abcxyz - northamerica-northeast1-docker.pkg.dev/product-container-2@sha256:abc123 - northamerica-northeast1-docker.pkg.dev/product-container-3@sha256:xyz123","title":"Create a .product.yaml File"},{"location":"scanners/","text":"Checks When a repository event webhook is received, Observatory performs a series of automated scans. The Endpoint Scanner components of the Observatory system perform the actual scans on given endpoints. Broadly speaking, these checsk can be broken into the following categories. Check Type Purpose Strategy Remote Repository Checks Verify compliance with source code repositories on remotes such as GitHub . GitHub Octokit API Repository Content Checks Perform scans on the contents of the repository. Deep clone the repository and use automated scanning tools (e.g. Gitleaks ). URL Scanning Checks Perform security and compliance checks against the live instance(s) of the product Various automated scanning tools that interact with a public URL (e.g. axe-core for accessibility scanning). Container Image Checks Gather container image data from GCP. GCP API Observatory uses GraphQL as a layer to unify the data model for reporting on ITSG-33 and related compliance requirements. Roughly speaking, Observatory's \"scanners\" can be thought of as writing many pieces of security information about a given product. Similarly, the same data model exposed by the GraphQL API can be queried to report on the status of various compliance requirements. Note our assumption that one repository may deploy services behind multiple URLs and each repository may build more than one OCI image. The sections below expand on each Check Type in greater detail, and also show the parts of our GraphQL schema that expose these Check Types. Remote Repository Checks There are many idioms, best practices, and security requirements for remote source code repositories. Observatory automatically performs a number of these checks using information retrievable from the GitHub Octokit API . Vulnerability Alerts Enabled TODO Automated Security Fixes Enabled TODO Branch Protection Enabled TODO Repository Content Checks A number of checks are performed by scanning a deep clone of the repository's contents. The purpose of these checks is to perform scanning on all of the source code, configuration, etc. contained in the repository. Has Security.md A best practice with any open source code repository is to provide instructions via a Security.md file at the project root for how security vulnerabilities should be reported (see GitHub's code security documentation for more information). This check verifies whether a repository contains a Security.md file. Remediation Include a file called Security.md at the root of your repository explaining how security vulnerabilities should be reported to the repository owner. For example, there may be an email address where vulnerability reports should be sent, and explicit instructions to not document security vulnerabilities in the repository issues. Data Example 1 2 3 4 5 6 7 8 9 { // ... \"hasSecurityMd\":{ \"checkPasses\": true, \"metadata\": null, \"lastUpdated\": 1698174245826 } // ... } Gitleaks Report TODO File Size Check TODO Hadolint Dockerfile hadolint performs a series of lint checks on each Dockerfile found in the repository. TODO URL (Service) Scanning Checks Some products have one or more services exposed through URLs. URL compliance checks perform a series of automated accessibility and security compliance checks using information that can be retrieved via these public URLs. Container Image Checks Any products that build and deploy OCI images perform a series of checks on the built image artifact(s).","title":"Scanners"},{"location":"scanners/#checks","text":"When a repository event webhook is received, Observatory performs a series of automated scans. The Endpoint Scanner components of the Observatory system perform the actual scans on given endpoints. Broadly speaking, these checsk can be broken into the following categories. Check Type Purpose Strategy Remote Repository Checks Verify compliance with source code repositories on remotes such as GitHub . GitHub Octokit API Repository Content Checks Perform scans on the contents of the repository. Deep clone the repository and use automated scanning tools (e.g. Gitleaks ). URL Scanning Checks Perform security and compliance checks against the live instance(s) of the product Various automated scanning tools that interact with a public URL (e.g. axe-core for accessibility scanning). Container Image Checks Gather container image data from GCP. GCP API Observatory uses GraphQL as a layer to unify the data model for reporting on ITSG-33 and related compliance requirements. Roughly speaking, Observatory's \"scanners\" can be thought of as writing many pieces of security information about a given product. Similarly, the same data model exposed by the GraphQL API can be queried to report on the status of various compliance requirements. Note our assumption that one repository may deploy services behind multiple URLs and each repository may build more than one OCI image. The sections below expand on each Check Type in greater detail, and also show the parts of our GraphQL schema that expose these Check Types.","title":"Checks"},{"location":"scanners/#remote-repository-checks","text":"There are many idioms, best practices, and security requirements for remote source code repositories. Observatory automatically performs a number of these checks using information retrievable from the GitHub Octokit API .","title":"Remote Repository Checks"},{"location":"scanners/#vulnerability-alerts-enabled","text":"TODO","title":"Vulnerability Alerts Enabled"},{"location":"scanners/#automated-security-fixes-enabled","text":"TODO","title":"Automated Security Fixes Enabled"},{"location":"scanners/#branch-protection-enabled","text":"TODO","title":"Branch Protection Enabled"},{"location":"scanners/#repository-content-checks","text":"A number of checks are performed by scanning a deep clone of the repository's contents. The purpose of these checks is to perform scanning on all of the source code, configuration, etc. contained in the repository.","title":"Repository Content Checks"},{"location":"scanners/#has-securitymd","text":"A best practice with any open source code repository is to provide instructions via a Security.md file at the project root for how security vulnerabilities should be reported (see GitHub's code security documentation for more information). This check verifies whether a repository contains a Security.md file. Remediation Include a file called Security.md at the root of your repository explaining how security vulnerabilities should be reported to the repository owner. For example, there may be an email address where vulnerability reports should be sent, and explicit instructions to not document security vulnerabilities in the repository issues. Data Example 1 2 3 4 5 6 7 8 9 { // ... \"hasSecurityMd\":{ \"checkPasses\": true, \"metadata\": null, \"lastUpdated\": 1698174245826 } // ... }","title":"Has Security.md"},{"location":"scanners/#gitleaks-report","text":"TODO","title":"Gitleaks Report"},{"location":"scanners/#file-size-check","text":"TODO","title":"File Size Check"},{"location":"scanners/#hadolint-dockerfile","text":"hadolint performs a series of lint checks on each Dockerfile found in the repository. TODO","title":"Hadolint Dockerfile"},{"location":"scanners/#url-service-scanning-checks","text":"Some products have one or more services exposed through URLs. URL compliance checks perform a series of automated accessibility and security compliance checks using information that can be retrieved via these public URLs.","title":"URL (Service) Scanning Checks"},{"location":"scanners/#container-image-checks","text":"Any products that build and deploy OCI images perform a series of checks on the built image artifact(s).","title":"Container Image Checks"}]}
\ No newline at end of file
+{"config":{"indexing":"full","lang":["en"],"min_search_length":3,"prebuild_index":false,"separator":"[\\s\\-\\.]+"},"docs":[{"location":"","text":"Observatory (R U OK?) Observatory is an automated real-time scanning framework for various endpoints containing relevant products (e.g. GitHub URLs, Container Registry URLs, Webapp URLs, and so on). The purpose of Observatory is to ensure continuous compliance with requirements around concerns such as security and accessibility by performing various automated scans on these endpoints.","title":"Observatory (R U OK?)"},{"location":"#observatory-r-u-ok","text":"Observatory is an automated real-time scanning framework for various endpoints containing relevant products (e.g. GitHub URLs, Container Registry URLs, Webapp URLs, and so on). The purpose of Observatory is to ensure continuous compliance with requirements around concerns such as security and accessibility by performing various automated scans on these endpoints.","title":"Observatory (R U OK?)"},{"location":"architecture/","text":"RUOK Architecture The core architecture uses an event-driven workflow based on GitHub Webhooks for repository events. Note that nothing about this architecture relies on event updates coming exclusively from webhooks, although this provides a convenient way to receive push notifications for the time being. A webhook server listens for various webhook events. Two primary sources of events are considered, although the event sources are highly extensible. Github repository events : any GitHub repository that creates webhooks registered with the webhook server URL will automatically send event notifications when selected repository events occur. Repositories can optionally include a .product.yaml file with links that can be used to make associations between the GitHub repository and other endpoint nodes on the graph. DNS repository events : all DNS A-records for projects in PHACDataHub are provisioned using the dns repository. Annotation metadata from these Config Connector manifests can be parsed to make associations between the DNS A-record URL and other endpoints such as the associated Github repository. Depending on the type of endpoint being updated, the webhook server adds an event to the appropriate queue group in NATS (e.g. RepoEventsUpdate , WebEventsUpdate , etc.). Graph Updater components subscribe to *EventUpdate queue groups. Each kind of graph updater component performs a few tasks. If a metadata file (e.g. .product.yaml ) is present, parse the metadata file and construct the graph that associates the current endpoint with the endpoints it's related to. Note that if there is no metadata file, the graph is trivially a single node containing the current endpoint. Traverse the graph from (1) and query the GraphQL API for each node on the graph to see if any entrypoint to the graph already exists in the database. Merge the graphs from (1) and (2), where nodes from (1) take precedence over nodes from (2). Write each node from the merged graph in (3) to the database using the appropriate mutation queries in the GraphQL API. Traverse the graph from (3) and add endpoint nodes to the appropraite scanner queue groups. Each kind of Endpoint Scanner subscribes to the appropriate queue groups, listening for endpoint nodes added by the appropriate Graph Updater component. Each Endpoint Scanner performs a series of type-specific endpoint scans, largely reusing open source scanning tools such as Trivy , gitleaks , and axe-core (accessibility engine). Endpoint Scanners write the updated endpoint nodes back to the GraphQL API via the appropriate mutation query. Consumers of the GraphQL API (such as the web application) are able to read data about product subgraphs, using any valid entrypoint into the subgraph. A special kind of Product label can be added with pointers to one or more endpoints in a subgraph, which allows clients such as the web application to attach a meaningful label to a subgraph of connected endpoints. Importantly, note that Graph Updater components are aware of graph structure, but have no knowledge of node attributes added by Endpoint Scanner. Conversely, Endpoint Scanners are aware of attributes for the type of endpoint node they scan, but have no knowledge of the graph structure maintained by the Graph Updater components. In this way, there is a clean separation of concerns between the Graph Updater components and the Endpoint Scanner components. Detailed Architecture Diagram","title":"Architecture"},{"location":"architecture/#ruok-architecture","text":"The core architecture uses an event-driven workflow based on GitHub Webhooks for repository events. Note that nothing about this architecture relies on event updates coming exclusively from webhooks, although this provides a convenient way to receive push notifications for the time being. A webhook server listens for various webhook events. Two primary sources of events are considered, although the event sources are highly extensible. Github repository events : any GitHub repository that creates webhooks registered with the webhook server URL will automatically send event notifications when selected repository events occur. Repositories can optionally include a .product.yaml file with links that can be used to make associations between the GitHub repository and other endpoint nodes on the graph. DNS repository events : all DNS A-records for projects in PHACDataHub are provisioned using the dns repository. Annotation metadata from these Config Connector manifests can be parsed to make associations between the DNS A-record URL and other endpoints such as the associated Github repository. Depending on the type of endpoint being updated, the webhook server adds an event to the appropriate queue group in NATS (e.g. RepoEventsUpdate , WebEventsUpdate , etc.). Graph Updater components subscribe to *EventUpdate queue groups. Each kind of graph updater component performs a few tasks. If a metadata file (e.g. .product.yaml ) is present, parse the metadata file and construct the graph that associates the current endpoint with the endpoints it's related to. Note that if there is no metadata file, the graph is trivially a single node containing the current endpoint. Traverse the graph from (1) and query the GraphQL API for each node on the graph to see if any entrypoint to the graph already exists in the database. Merge the graphs from (1) and (2), where nodes from (1) take precedence over nodes from (2). Write each node from the merged graph in (3) to the database using the appropriate mutation queries in the GraphQL API. Traverse the graph from (3) and add endpoint nodes to the appropraite scanner queue groups. Each kind of Endpoint Scanner subscribes to the appropriate queue groups, listening for endpoint nodes added by the appropriate Graph Updater component. Each Endpoint Scanner performs a series of type-specific endpoint scans, largely reusing open source scanning tools such as Trivy , gitleaks , and axe-core (accessibility engine). Endpoint Scanners write the updated endpoint nodes back to the GraphQL API via the appropriate mutation query. Consumers of the GraphQL API (such as the web application) are able to read data about product subgraphs, using any valid entrypoint into the subgraph. A special kind of Product label can be added with pointers to one or more endpoints in a subgraph, which allows clients such as the web application to attach a meaningful label to a subgraph of connected endpoints. Importantly, note that Graph Updater components are aware of graph structure, but have no knowledge of node attributes added by Endpoint Scanner. Conversely, Endpoint Scanners are aware of attributes for the type of endpoint node they scan, but have no knowledge of the graph structure maintained by the Graph Updater components. In this way, there is a clean separation of concerns between the Graph Updater components and the Endpoint Scanner components.","title":"RUOK Architecture"},{"location":"architecture/#detailed-architecture-diagram","text":"","title":"Detailed Architecture Diagram"},{"location":"configuration/","text":"Configuration Since this repository contains multiple deployable services, each service needs to be configured with environment variables. This page outlines the necessary configuration for all services in this repository. Webhook Server 1 Graph Updater 1 2 3 4 5 GITHUB_TOKEN = changeme NATS_URL = localhost:4222 # GRAPHQL_URL=http://localhost:4000/graphql GRAPHQL_URL = http://api:4000/graphql Endpoint Scanners Github Cloned Repo Checks 1 NATS_URL = localhost:4222 Github Octokit Checks 1 2 3 4 5 6 7 8 GITHUB_TOKEN = changeme NATS_URL = localhost:4222 # NATS sub stream NATS_SUB_STREAM = \"EventsScanner.githubEndpoints\" # GRAPHQL_URL=http://localhost:4000/graphql GRAPHQL_URL = http://api:4000/graphql Web Endpoint Checks 1 2 NATS_URL = localhost:4222 API_URL = localhost:4000 Container Checks TODO GraphQL API 1 2 3 4 5 6 7 8 9 10 11 GRAPHQL_HOST = \"127.0.0.1\" GRAPHQL_PORT = \"4000\" DB_HOST = \"http://example-simple-single-ea:8529\" DB_NAME = \"ruok\" USERNAME = \"changeme\" PASSWORD = \"changeme\" GRAPH_NAME = \"endpoints\" VERTEX_COLLECTION = \"endpointNodes\" EDGE_COLLECTION = \"endpointEdges\" Web UI TODO","title":"Configuration"},{"location":"configuration/#configuration","text":"Since this repository contains multiple deployable services, each service needs to be configured with environment variables. This page outlines the necessary configuration for all services in this repository.","title":"Configuration"},{"location":"configuration/#webhook-server","text":"1","title":"Webhook Server"},{"location":"configuration/#graph-updater","text":"1 2 3 4 5 GITHUB_TOKEN = changeme NATS_URL = localhost:4222 # GRAPHQL_URL=http://localhost:4000/graphql GRAPHQL_URL = http://api:4000/graphql","title":"Graph Updater"},{"location":"configuration/#endpoint-scanners","text":"","title":"Endpoint Scanners"},{"location":"configuration/#github-cloned-repo-checks","text":"1 NATS_URL = localhost:4222","title":"Github Cloned Repo Checks"},{"location":"configuration/#github-octokit-checks","text":"1 2 3 4 5 6 7 8 GITHUB_TOKEN = changeme NATS_URL = localhost:4222 # NATS sub stream NATS_SUB_STREAM = \"EventsScanner.githubEndpoints\" # GRAPHQL_URL=http://localhost:4000/graphql GRAPHQL_URL = http://api:4000/graphql","title":"Github Octokit Checks"},{"location":"configuration/#web-endpoint-checks","text":"1 2 NATS_URL = localhost:4222 API_URL = localhost:4000","title":"Web Endpoint Checks"},{"location":"configuration/#container-checks","text":"TODO","title":"Container Checks"},{"location":"configuration/#graphql-api","text":"1 2 3 4 5 6 7 8 9 10 11 GRAPHQL_HOST = \"127.0.0.1\" GRAPHQL_PORT = \"4000\" DB_HOST = \"http://example-simple-single-ea:8529\" DB_NAME = \"ruok\" USERNAME = \"changeme\" PASSWORD = \"changeme\" GRAPH_NAME = \"endpoints\" VERTEX_COLLECTION = \"endpointNodes\" EDGE_COLLECTION = \"endpointEdges\"","title":"GraphQL API"},{"location":"configuration/#web-ui","text":"TODO","title":"Web UI"},{"location":"data-contracts/","text":"Data Contracts This document highlights the data contracts that implicitly exist between various services. Event Collectors and Graph Updater Currently, the event collectors write a message to the EventsUpdate NATS queue with the following payload schema. 1 2 3 { \"endpoint\": \"https://\" } The only job of the event collectors is to determine which events are valid and relevant (i.e. corresponding to meaningful updates to an endpoint), and pass the endpoint URL along to the Graph Updater component. Graph Updater and GraphQL API Each kind of endpoint has zero or more ways to attach metadata about related endpoints (e.g. GitHub repository endpoints can have a .product.yaml file in the project root containing URLs related to that GitHub repository). After parsing this optional metadata, the Graph Updater component has one or more endpoint URLs, which form a graph of related endpoints. Currently, the Graph Updater makes the following mutation to the GraphQL API: 1 2 3 4 5 mutation { endpoints ( urls : $URLs ) { url } } In this case, $URLs is an array of one or more endpoint URLs. Once the mutation above has been written to the GraphQL API, Graph Updater makes the following GraphQL query: 1 2 3 4 5 query { endpoints ( urls : $URLs ) { url } } The rationale here is that there may be existing vertices in the graph database that need to be re-scanned. For example, if the current URL was associated with https://endpoint1 and https://endpoint2 , and the graph of https://endpoint2 and https://endpoint3 already exists in the database, then we want to update each of https://endpoint1 , https://endpoint2 , and https://endpoint3 . At this point, each endpoint is dispatched to the appropriate EndpointScanner queue (e.g. EndpointScanner.githubEndpoints ) with the following payload. 1 2 3 { \"endpoint\": \"https://\" } Graph Updater and Scanners TODO Scanners and GraphQL API TODO","title":"Data Contracts"},{"location":"data-contracts/#data-contracts","text":"This document highlights the data contracts that implicitly exist between various services.","title":"Data Contracts"},{"location":"data-contracts/#event-collectors-and-graph-updater","text":"Currently, the event collectors write a message to the EventsUpdate NATS queue with the following payload schema. 1 2 3 { \"endpoint\": \"https://\" } The only job of the event collectors is to determine which events are valid and relevant (i.e. corresponding to meaningful updates to an endpoint), and pass the endpoint URL along to the Graph Updater component.","title":"Event Collectors and Graph Updater"},{"location":"data-contracts/#graph-updater-and-graphql-api","text":"Each kind of endpoint has zero or more ways to attach metadata about related endpoints (e.g. GitHub repository endpoints can have a .product.yaml file in the project root containing URLs related to that GitHub repository). After parsing this optional metadata, the Graph Updater component has one or more endpoint URLs, which form a graph of related endpoints. Currently, the Graph Updater makes the following mutation to the GraphQL API: 1 2 3 4 5 mutation { endpoints ( urls : $URLs ) { url } } In this case, $URLs is an array of one or more endpoint URLs. Once the mutation above has been written to the GraphQL API, Graph Updater makes the following GraphQL query: 1 2 3 4 5 query { endpoints ( urls : $URLs ) { url } } The rationale here is that there may be existing vertices in the graph database that need to be re-scanned. For example, if the current URL was associated with https://endpoint1 and https://endpoint2 , and the graph of https://endpoint2 and https://endpoint3 already exists in the database, then we want to update each of https://endpoint1 , https://endpoint2 , and https://endpoint3 . At this point, each endpoint is dispatched to the appropriate EndpointScanner queue (e.g. EndpointScanner.githubEndpoints ) with the following payload. 1 2 3 { \"endpoint\": \"https://\" }","title":"Graph Updater and GraphQL API"},{"location":"data-contracts/#graph-updater-and-scanners","text":"TODO","title":"Graph Updater and Scanners"},{"location":"data-contracts/#scanners-and-graphql-api","text":"TODO","title":"Scanners and GraphQL API"},{"location":"deployment/","text":"Deploying on Kubernetes This document outlines how to deploy the Kubernetes application for ruok-service-autochecker . Deploying on a Local Kubernetes Cluster To deploy the ruok-service-autochecker application onto a local Kubernetes environment, ensure your current context is set to your local cluster (i.e. kubectl config set-context  ). You can verify your Kubernetes context by running kubectl config get-contexts ; your current context will be indicated with * . Before deploying the application, it is necessary to first build and tag all of the images in this repository. You can build and tag all of the images by running make build . Note that you may need to perform an extra step of loading your locally built images into your local cluster's image registry (see Loading an image into the KinD Cluster , for example). Once connected to your local cluster, run make k8s to deploy the various manifests and kustomization.yaml files associated with the application. Continuous Deployment onto GKE TODO","title":"Deployment"},{"location":"deployment/#deploying-on-kubernetes","text":"This document outlines how to deploy the Kubernetes application for ruok-service-autochecker .","title":"Deploying on Kubernetes"},{"location":"deployment/#deploying-on-a-local-kubernetes-cluster","text":"To deploy the ruok-service-autochecker application onto a local Kubernetes environment, ensure your current context is set to your local cluster (i.e. kubectl config set-context  ). You can verify your Kubernetes context by running kubectl config get-contexts ; your current context will be indicated with * . Before deploying the application, it is necessary to first build and tag all of the images in this repository. You can build and tag all of the images by running make build . Note that you may need to perform an extra step of loading your locally built images into your local cluster's image registry (see Loading an image into the KinD Cluster , for example). Once connected to your local cluster, run make k8s to deploy the various manifests and kustomization.yaml files associated with the application.","title":"Deploying on a Local Kubernetes Cluster"},{"location":"deployment/#continuous-deployment-onto-gke","text":"TODO","title":"Continuous Deployment onto GKE"},{"location":"development-environment/","text":"Development Environment This page outlines how to set up a development environment for this project. This document is meant to serve as a high-level mental model for how to set up a development environment. There are many ways to substitute certain components if a developer prefers one tool over another. Overview The diagram below shows a high-level overview of the development environment. VSCode is used as the integrated development environment. VSCode is run in client-server mode: The desktop VSCode application is downloaded for the operating system of choice, and a project-specific VSCode Dev Container is used to run VSCode server as a dev container. The VSCode Dev Container is attached to the host network, so the development container can access ports exposed on 127.0.0.1 , for example. K9s is used as a kubernetes dashboard, which provides a user interface for the developer to interact with the Kubernetes cluster. Podman is a daemonless and rootless OCI-compliant runtime and container manager that can be used to build OCI images and run containers on your development machine. Kubernetes in Docker (KinD) is a tool for running local kubernetes clusters entirely in OCI containers (i.e. OCI containers are used to run Kubernetes nodes). The sections below outline how to set up each component of this environment. VSCode Development Containers TLDR : The .devcontainer/devcontainer.json file contains the dev container configuration for this project. If you install the VSCode Dev Container extension and build/run the dev container, the dev container will be setup automatically. The section below highlights specific issues that you might encounter along with helpful resources to troubleshoot potential issues. Starting Dev Container as non-root user I added the \"containerUser\": \"node\" key to start the dev container as the default non-root node user for the dev container. Since I am running my dev container on Ubuntu Linux, I also needed to add the following line to my devcontainer.json file: 1 2 3 4 5 ... , \"runArgs\" : [ \"--userns=keep-id\" ], ... This line of configuration is necessary because, on Linux, podman maps the user ID (UID) that launched the container process to the root user of the container. By default, this means that my current user ID (usually 1000 in most cases) maps to the UID 1 (i.e. root user of the container user namespace). You can run podman unshare cat /proc/self/uid_map on the host machine to see how host UIDs map to UIDs in the container process namespaces. This caused problems as the files/folders in the repo are mounted to the container filesystem with root as the owner, so the node user didn't have permission to write to these files. Setting --userns=keep-id keeps the UID of 1000 in the container, so the repo files/folders that get mounted to the container filesystem are correctly owned by UID 1000 (i.e. node user), and it is possible to write to files in the container as the non-root user. See this stackoverflow answer for a more detailed explanation of how this works. Attach Development Container to Host Network As per this thread answer , add the following key in devcontainer.json . 1 2 3 4 5 ... , \"runArgs\" : [ \"--network=host\" ], ... VSCode Development Tools VSCode Integrated Debugger Debug configurations can be found in the .vscode/launch.json file in the project root. For information on how to use VSCode's integrated debugger, see the VSCode Debugging documentation . Environment Variable Management with direnv In order to run or debug a given application in a dev container, it may be necessary load a specific set of environment variables to configure that application. direnv is a tool that automatically loads environment variables into your shell when you cd into a folder. You may need to run direnv allow in a directory upon making changes to its .envrc file. Podman Instructions for installing the Podman runtime on all platforms can be found at this link . Additionally (and optionally), you can install Podman Desktop , which provides a graphical tool to facilitate working with podman. Using Podman with KinD It might be necessary to follow the steps in Kind - Rootless . After following these instructions, I had to run systemd-run --user --scope --property=Delegate=yes kind create cluster to create my kind cluster. This Linkedin Arcticle is also a good resource that reviews step-by-step setup of a KIND cluster using podman on Ubuntu Linux. Loading an image into the KinD Cluster KinD doesn't spin up a local registry out of the box, so it's necessary to run kind load docker-image  to load a locally build container image into the KinD cluster. If you're using Podman Desktop, there is a UI convenience for this by navigating to the Images tab, then for the image(s) you want to load into the KinD cluster, click \"Push image to Kind cluster\" (see screenshot below). Useful Code Snippets NATS CLI Connect to nats on localhost 1 nats context add local --description \"Localhost\" --select Subscribe to a queue group 1 nats sub EventsUpdate Publish to a queue group 1 nats pub \"ClonedRepoEvent.>\" '{\"webEndpoints\": [\"https://www.canada.ca/en/public-health.html\"]}' ` Related Issues KinD - Running with rootless podman doesn't work as documented KinD - Podman creation fails KinD - How I Wasted a Day Loading Local Docker Images","title":"Development Environment"},{"location":"development-environment/#development-environment","text":"This page outlines how to set up a development environment for this project. This document is meant to serve as a high-level mental model for how to set up a development environment. There are many ways to substitute certain components if a developer prefers one tool over another.","title":"Development Environment"},{"location":"development-environment/#overview","text":"The diagram below shows a high-level overview of the development environment. VSCode is used as the integrated development environment. VSCode is run in client-server mode: The desktop VSCode application is downloaded for the operating system of choice, and a project-specific VSCode Dev Container is used to run VSCode server as a dev container. The VSCode Dev Container is attached to the host network, so the development container can access ports exposed on 127.0.0.1 , for example. K9s is used as a kubernetes dashboard, which provides a user interface for the developer to interact with the Kubernetes cluster. Podman is a daemonless and rootless OCI-compliant runtime and container manager that can be used to build OCI images and run containers on your development machine. Kubernetes in Docker (KinD) is a tool for running local kubernetes clusters entirely in OCI containers (i.e. OCI containers are used to run Kubernetes nodes). The sections below outline how to set up each component of this environment.","title":"Overview"},{"location":"development-environment/#vscode-development-containers","text":"TLDR : The .devcontainer/devcontainer.json file contains the dev container configuration for this project. If you install the VSCode Dev Container extension and build/run the dev container, the dev container will be setup automatically. The section below highlights specific issues that you might encounter along with helpful resources to troubleshoot potential issues.","title":"VSCode Development Containers"},{"location":"development-environment/#starting-dev-container-as-non-root-user","text":"I added the \"containerUser\": \"node\" key to start the dev container as the default non-root node user for the dev container. Since I am running my dev container on Ubuntu Linux, I also needed to add the following line to my devcontainer.json file: 1 2 3 4 5 ... , \"runArgs\" : [ \"--userns=keep-id\" ], ... This line of configuration is necessary because, on Linux, podman maps the user ID (UID) that launched the container process to the root user of the container. By default, this means that my current user ID (usually 1000 in most cases) maps to the UID 1 (i.e. root user of the container user namespace). You can run podman unshare cat /proc/self/uid_map on the host machine to see how host UIDs map to UIDs in the container process namespaces. This caused problems as the files/folders in the repo are mounted to the container filesystem with root as the owner, so the node user didn't have permission to write to these files. Setting --userns=keep-id keeps the UID of 1000 in the container, so the repo files/folders that get mounted to the container filesystem are correctly owned by UID 1000 (i.e. node user), and it is possible to write to files in the container as the non-root user. See this stackoverflow answer for a more detailed explanation of how this works.","title":"Starting Dev Container as non-root user"},{"location":"development-environment/#attach-development-container-to-host-network","text":"As per this thread answer , add the following key in devcontainer.json . 1 2 3 4 5 ... , \"runArgs\" : [ \"--network=host\" ], ...","title":"Attach Development Container to Host Network"},{"location":"development-environment/#vscode-development-tools","text":"","title":"VSCode Development Tools"},{"location":"development-environment/#vscode-integrated-debugger","text":"Debug configurations can be found in the .vscode/launch.json file in the project root. For information on how to use VSCode's integrated debugger, see the VSCode Debugging documentation .","title":"VSCode Integrated Debugger"},{"location":"development-environment/#environment-variable-management-with-direnv","text":"In order to run or debug a given application in a dev container, it may be necessary load a specific set of environment variables to configure that application. direnv is a tool that automatically loads environment variables into your shell when you cd into a folder. You may need to run direnv allow in a directory upon making changes to its .envrc file.","title":"Environment Variable Management with direnv"},{"location":"development-environment/#podman","text":"Instructions for installing the Podman runtime on all platforms can be found at this link . Additionally (and optionally), you can install Podman Desktop , which provides a graphical tool to facilitate working with podman.","title":"Podman"},{"location":"development-environment/#using-podman-with-kind","text":"It might be necessary to follow the steps in Kind - Rootless . After following these instructions, I had to run systemd-run --user --scope --property=Delegate=yes kind create cluster to create my kind cluster. This Linkedin Arcticle is also a good resource that reviews step-by-step setup of a KIND cluster using podman on Ubuntu Linux.","title":"Using Podman with KinD"},{"location":"development-environment/#loading-an-image-into-the-kind-cluster","text":"KinD doesn't spin up a local registry out of the box, so it's necessary to run kind load docker-image  to load a locally build container image into the KinD cluster. If you're using Podman Desktop, there is a UI convenience for this by navigating to the Images tab, then for the image(s) you want to load into the KinD cluster, click \"Push image to Kind cluster\" (see screenshot below).","title":"Loading an image into the KinD Cluster"},{"location":"development-environment/#useful-code-snippets","text":"","title":"Useful Code Snippets"},{"location":"development-environment/#nats-cli","text":"Connect to nats on localhost 1 nats context add local --description \"Localhost\" --select Subscribe to a queue group 1 nats sub EventsUpdate Publish to a queue group 1 nats pub \"ClonedRepoEvent.>\" '{\"webEndpoints\": [\"https://www.canada.ca/en/public-health.html\"]}' `","title":"NATS CLI"},{"location":"development-environment/#related-issues","text":"KinD - Running with rootless podman doesn't work as documented KinD - Podman creation fails KinD - How I Wasted a Day Loading Local Docker Images","title":"Related Issues"},{"location":"github-webhooks/","text":"GitHub Webhooks webhook-server/ contains the implementation of the GitHub webhook server portion of this project. The purpose of this server is to listen for events triggered by certain events of interest on GitHub resources. Validating GitHub Webhook Deliveries TODO Local Development with GitHub Webhooks In order to test webhook-server locally, it is necessary to use a webhook proxy URL to forward webhooks from GitHub to your computer. Instructions for how to do this are as follows: In your browser, nagivate to https://smee.io/ and click Start a new channel . Copy the full URL under Webhook Proxy URL . Install the corresponding smee-client package from npm as a dev dependency: npm i sme-client --save-dev . Start the smee-client as follows: smee --url  --path  --port  . Go to a repository of interest in the PHACDataHub Github organization, go to Settings --> Code and automation --> Webhooks --> Add new webhook and paste the Webhook Proxy URL from step 1. Choose application/json for the content type. You can also choose which repo events get forwarded, or select \"sent me everything\" to receive all events. Start up the webhook-server . Trigger an event on the GitHub repo that you registered the webhook with. If everything is set up correctly, you should receive a request to webhook-server where req.body contains the JSON payload of the GitHub webhook event. Helpful Resources testing webhooks redelivering webhooks about webhooks","title":"GitHub Webhooks"},{"location":"github-webhooks/#github-webhooks","text":"webhook-server/ contains the implementation of the GitHub webhook server portion of this project. The purpose of this server is to listen for events triggered by certain events of interest on GitHub resources.","title":"GitHub Webhooks"},{"location":"github-webhooks/#validating-github-webhook-deliveries","text":"TODO","title":"Validating GitHub Webhook Deliveries"},{"location":"github-webhooks/#local-development-with-github-webhooks","text":"In order to test webhook-server locally, it is necessary to use a webhook proxy URL to forward webhooks from GitHub to your computer. Instructions for how to do this are as follows: In your browser, nagivate to https://smee.io/ and click Start a new channel . Copy the full URL under Webhook Proxy URL . Install the corresponding smee-client package from npm as a dev dependency: npm i sme-client --save-dev . Start the smee-client as follows: smee --url  --path  --port  . Go to a repository of interest in the PHACDataHub Github organization, go to Settings --> Code and automation --> Webhooks --> Add new webhook and paste the Webhook Proxy URL from step 1. Choose application/json for the content type. You can also choose which repo events get forwarded, or select \"sent me everything\" to receive all events. Start up the webhook-server . Trigger an event on the GitHub repo that you registered the webhook with. If everything is set up correctly, you should receive a request to webhook-server where req.body contains the JSON payload of the GitHub webhook event.","title":"Local Development with GitHub Webhooks"},{"location":"github-webhooks/#helpful-resources","text":"testing webhooks redelivering webhooks about webhooks","title":"Helpful Resources"},{"location":"graphql-api/","text":"GraphQL API Due to the graph nature of the underlying data, Observatory exposes a GraphQL API that is oriented around the concept of Endpoint s. The following two sections explain the motivation behind our endpoint-oriented data model and the actual GraphQL Schema that the GraphQL API exposes. Motivation (Why Endpoints?) Ultimately, Observatory cares about monitoring products . Modern products tend to be associated with a variety of URLs, such as URLs for source code repositories (e.g. Github.com, Gitlab.com), URLs for container registries, URLs for APIs, URLs for web applications, and so on. It can be difficult to provide a stable and authoritative definition of a product without imposing a rigid definition that must be imposed and agreed upon by humans. An approach such as agreeing to and adopting a standard way of defining a product UID may work for a small coordinated group of individuals, but is difficult to scale to large groups of distributed teams without imposing significant administrative burden. Furthermore, it is often not realistic to assume that a product will always be associated with a single URL in a way that is stable over time. As a product evolves, it may rename its source code repository or move under a different organization; as a product graduates in its maturity model, it may be promoted from an *.alpha.* to a *.prod.* domain name. In Observatory, the assumption we make is that products are a related graph of Endpoints that evolves over time (e.g. new endpoints are added and old endpoints are removed). The graph of endpoints has the property that viewing any of the endpoint nodes allows all of the endpoint nodes attached to it to be discovered. Additionally, we add the ability to define Product s, which point to one or more Endpoint s, allowing for discovery of the subgraph of endpoints by querying a named Product . Or, conversely, given a URL of any endpoint on the graph, the associated Product node can be discovered. Alternatively, if users wish to monitor individual URLs directly rather than create a product graph, this use case can be accommodated as well. GraphQL API Currently, the GraphQL API exposes the following queries and mutations. 1 -- 8 <-- \"api/src/main.py:18:66 Examples The semantics of queries is that a query for any endpoint on a subgraph returns the entire subgraph. For example, suppose we make the following mutation: 1 2 3 4 5 6 7 mutation { endpoints ( urls : [ \"https://github.com/someorg/somerepo2\" , \"https://another-site.phac.gc.ca\" , \"https://some-other-api.phac-aspc.gc.ca\" ]) } This mutation creates 3 connected enpoints: https://github.com/someorg/somerepo2 , https://another-site.phac.gc.ca , https://some-other-api.phac-aspc.gc.ca . Suppose at a later date, we make some additional associations and attach these endpoints to a product with another mutation: 1 2 3 4 5 6 7 8 9 10 mutation { product ( name : \"myproduct\" urls : [ \"https://github.com/someorg/somerepo2\" , \"https://some-other-api.phac-aspc.gc.ca\" , \"https://some-third-webapp.phac.alpha.gc.ca\" ] ) } This mutation adds two additional nodes to the subgraph: https://some-third-webapp.phac.alpha.gc.ca , and a product label called myproduct . At this point in time, the subgraph looks like the following. This graph now has the property that a search for any endpoint on the graph will return all endpoints on the graph. For example, the following graphql query returns the following result: 1 2 3 4 5 query { endpoint ( url : \"myproduct\" ) { url } } 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 { \"data\" : { \"endpoint\" : [ { \"url\" : \"myproduct\" }, { \"url\" : \"https://github.com/someorg/somerepo2\" }, { \"url\" : \"https://some-other-api.phac-aspc.gc.ca\" }, { \"url\" : \"https://some-third-webapp.phac.alpha.gc.ca\" }, { \"url\" : \"https://another-site.phac.gc.ca\" } ] } } Similarly, a GraphQL query for a different vertex on the graph also returns the entire subgraph (although in a different order since the graph traversal started from a different vertex as last time). 1 2 3 4 5 query { endpoint ( url : \"https://another-site.phac.gc.ca\" ) { url } } 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 { \"data\" : { \"endpoint\" : [ { \"url\" : \"https://another-site.phac.gc.ca\" }, { \"url\" : \"https://github.com/someorg/somerepo2\" }, { \"url\" : \"https://some-other-api.phac-aspc.gc.ca\" }, { \"url\" : \"myproduct\" }, { \"url\" : \"https://some-third-webapp.phac.alpha.gc.ca\" } ] } } Full GraphQL API Specification Mutations Source code in api/src/graphql_types/mutation.py 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 @strawberry . type class Mutation : @strawberry . mutation def endpoint ( self , url : str ) -> str : \"\"\" Insert an endpoint with no additional metadata. This mutation should only be used by the graph updater component to update the graph structure. \"\"\" client = GraphDB () client . insert_endpoint ( url ) client . close () return url @strawberry . mutation def githubEndpoint ( self , endpoint : GithubEndpoint ) -> str : \"\"\" # Update/Insert Github Endpoint Insert a Github Endpoint with \"upsert\" semantics. If the endpoint doesn't already exist, the endpoint document will be created. If the endpoint already exists, its fields will be updated with the values provided in the mutation. # Example of how to use this as an input type: ```graphql mutation { githubEndpoint( endpoint: { url:\"https://github.com/someOrg/someRepo\" owner: \"someOrg\" repo:\"someRepo\" license: \"MIT\" visibility:\"Public\" programmingLanguage:[\"Python\", \"JavaScript\", \"Bash\", \"Dockerfile\"] automatedSecurityFixes: { checkPasses: true metadata: {} } vulnerabilityAlerts: { checkPasses: false metadata: { key: \"value\" } } branchProtection:{ checkPasses:true, metadata:{ key:\"value\" } } } ) } ``` \"\"\" client = GraphDB () client . upsert_scanner_endpoint ( endpoint ) client . close () return endpoint . url @strawberry . mutation def endpoints ( self , urls : List [ str ]) -> List [ str ]: \"\"\" Writes a list of URLs to the graph. Each URL will be associated with every other URL in the list. \"\"\" client = GraphDB () client . insert_endpoints ( urls ) client . close () return urls @strawberry . mutation def product ( self , name : str , urls : List [ str ]) -> str : \"\"\" Attaches a product label to a list of URLs. \"\"\" client = GraphDB () client . insert_product ( name , urls ) client . close () return name endpoint ( url ) Insert an endpoint with no additional metadata. This mutation should only be used by the graph updater component to update the graph structure. Source code in api/src/graphql_types/mutation.py 11 12 13 14 15 16 17 18 19 20 @strawberry . mutation def endpoint ( self , url : str ) -> str : \"\"\" Insert an endpoint with no additional metadata. This mutation should only be used by the graph updater component to update the graph structure. \"\"\" client = GraphDB () client . insert_endpoint ( url ) client . close () return url endpoints ( urls ) Writes a list of URLs to the graph. Each URL will be associated with every other URL in the list. Source code in api/src/graphql_types/mutation.py 69 70 71 72 73 74 75 76 77 78 @strawberry . mutation def endpoints ( self , urls : List [ str ]) -> List [ str ]: \"\"\" Writes a list of URLs to the graph. Each URL will be associated with every other URL in the list. \"\"\" client = GraphDB () client . insert_endpoints ( urls ) client . close () return urls githubEndpoint ( endpoint ) Update/Insert Github Endpoint Insert a Github Endpoint with \"upsert\" semantics. If the endpoint doesn't already exist, the endpoint document will be created. If the endpoint already exists, its fields will be updated with the values provided in the mutation. Example of how to use this as an input type: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 mutation { githubEndpoint ( endpoint : { url : \"https://github.com/someOrg/someRepo\" owner : \"someOrg\" repo : \"someRepo\" license : \"MIT\" visibility : \"Public\" programmingLanguage :[ \"Python\" , \"JavaScript\" , \"Bash\" , \"Dockerfile\" ] automatedSecurityFixes : { checkPasses : true metadata : {} } vulnerabilityAlerts : { checkPasses : false metadata : { key : \"value\" } } branchProtection :{ checkPasses : true , metadata :{ key : \"value\" } } } ) } Source code in api/src/graphql_types/mutation.py 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 @strawberry . mutation def githubEndpoint ( self , endpoint : GithubEndpoint ) -> str : \"\"\" # Update/Insert Github Endpoint Insert a Github Endpoint with \"upsert\" semantics. If the endpoint doesn't already exist, the endpoint document will be created. If the endpoint already exists, its fields will be updated with the values provided in the mutation. # Example of how to use this as an input type: ```graphql mutation { githubEndpoint( endpoint: { url:\"https://github.com/someOrg/someRepo\" owner: \"someOrg\" repo:\"someRepo\" license: \"MIT\" visibility:\"Public\" programmingLanguage:[\"Python\", \"JavaScript\", \"Bash\", \"Dockerfile\"] automatedSecurityFixes: { checkPasses: true metadata: {} } vulnerabilityAlerts: { checkPasses: false metadata: { key: \"value\" } } branchProtection:{ checkPasses:true, metadata:{ key:\"value\" } } } ) } ``` \"\"\" client = GraphDB () client . upsert_scanner_endpoint ( endpoint ) client . close () return endpoint . url product ( name , urls ) Attaches a product label to a list of URLs. Source code in api/src/graphql_types/mutation.py 80 81 82 83 84 85 86 87 88 @strawberry . mutation def product ( self , name : str , urls : List [ str ]) -> str : \"\"\" Attaches a product label to a list of URLs. \"\"\" client = GraphDB () client . insert_product ( name , urls ) client . close () return name","title":"GraphQL API"},{"location":"graphql-api/#graphql-api","text":"Due to the graph nature of the underlying data, Observatory exposes a GraphQL API that is oriented around the concept of Endpoint s. The following two sections explain the motivation behind our endpoint-oriented data model and the actual GraphQL Schema that the GraphQL API exposes.","title":"GraphQL API"},{"location":"graphql-api/#motivation-why-endpoints","text":"Ultimately, Observatory cares about monitoring products . Modern products tend to be associated with a variety of URLs, such as URLs for source code repositories (e.g. Github.com, Gitlab.com), URLs for container registries, URLs for APIs, URLs for web applications, and so on. It can be difficult to provide a stable and authoritative definition of a product without imposing a rigid definition that must be imposed and agreed upon by humans. An approach such as agreeing to and adopting a standard way of defining a product UID may work for a small coordinated group of individuals, but is difficult to scale to large groups of distributed teams without imposing significant administrative burden. Furthermore, it is often not realistic to assume that a product will always be associated with a single URL in a way that is stable over time. As a product evolves, it may rename its source code repository or move under a different organization; as a product graduates in its maturity model, it may be promoted from an *.alpha.* to a *.prod.* domain name. In Observatory, the assumption we make is that products are a related graph of Endpoints that evolves over time (e.g. new endpoints are added and old endpoints are removed). The graph of endpoints has the property that viewing any of the endpoint nodes allows all of the endpoint nodes attached to it to be discovered. Additionally, we add the ability to define Product s, which point to one or more Endpoint s, allowing for discovery of the subgraph of endpoints by querying a named Product . Or, conversely, given a URL of any endpoint on the graph, the associated Product node can be discovered. Alternatively, if users wish to monitor individual URLs directly rather than create a product graph, this use case can be accommodated as well.","title":"Motivation (Why Endpoints?)"},{"location":"graphql-api/#graphql-api_1","text":"Currently, the GraphQL API exposes the following queries and mutations. 1 -- 8 <-- \"api/src/main.py:18:66","title":"GraphQL API"},{"location":"graphql-api/#examples","text":"The semantics of queries is that a query for any endpoint on a subgraph returns the entire subgraph. For example, suppose we make the following mutation: 1 2 3 4 5 6 7 mutation { endpoints ( urls : [ \"https://github.com/someorg/somerepo2\" , \"https://another-site.phac.gc.ca\" , \"https://some-other-api.phac-aspc.gc.ca\" ]) } This mutation creates 3 connected enpoints: https://github.com/someorg/somerepo2 , https://another-site.phac.gc.ca , https://some-other-api.phac-aspc.gc.ca . Suppose at a later date, we make some additional associations and attach these endpoints to a product with another mutation: 1 2 3 4 5 6 7 8 9 10 mutation { product ( name : \"myproduct\" urls : [ \"https://github.com/someorg/somerepo2\" , \"https://some-other-api.phac-aspc.gc.ca\" , \"https://some-third-webapp.phac.alpha.gc.ca\" ] ) } This mutation adds two additional nodes to the subgraph: https://some-third-webapp.phac.alpha.gc.ca , and a product label called myproduct . At this point in time, the subgraph looks like the following. This graph now has the property that a search for any endpoint on the graph will return all endpoints on the graph. For example, the following graphql query returns the following result: 1 2 3 4 5 query { endpoint ( url : \"myproduct\" ) { url } } 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 { \"data\" : { \"endpoint\" : [ { \"url\" : \"myproduct\" }, { \"url\" : \"https://github.com/someorg/somerepo2\" }, { \"url\" : \"https://some-other-api.phac-aspc.gc.ca\" }, { \"url\" : \"https://some-third-webapp.phac.alpha.gc.ca\" }, { \"url\" : \"https://another-site.phac.gc.ca\" } ] } } Similarly, a GraphQL query for a different vertex on the graph also returns the entire subgraph (although in a different order since the graph traversal started from a different vertex as last time). 1 2 3 4 5 query { endpoint ( url : \"https://another-site.phac.gc.ca\" ) { url } } 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 { \"data\" : { \"endpoint\" : [ { \"url\" : \"https://another-site.phac.gc.ca\" }, { \"url\" : \"https://github.com/someorg/somerepo2\" }, { \"url\" : \"https://some-other-api.phac-aspc.gc.ca\" }, { \"url\" : \"myproduct\" }, { \"url\" : \"https://some-third-webapp.phac.alpha.gc.ca\" } ] } }","title":"Examples"},{"location":"graphql-api/#full-graphql-api-specification","text":"","title":"Full GraphQL API Specification"},{"location":"graphql-api/#mutations","text":"Source code in api/src/graphql_types/mutation.py 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 @strawberry . type class Mutation : @strawberry . mutation def endpoint ( self , url : str ) -> str : \"\"\" Insert an endpoint with no additional metadata. This mutation should only be used by the graph updater component to update the graph structure. \"\"\" client = GraphDB () client . insert_endpoint ( url ) client . close () return url @strawberry . mutation def githubEndpoint ( self , endpoint : GithubEndpoint ) -> str : \"\"\" # Update/Insert Github Endpoint Insert a Github Endpoint with \"upsert\" semantics. If the endpoint doesn't already exist, the endpoint document will be created. If the endpoint already exists, its fields will be updated with the values provided in the mutation. # Example of how to use this as an input type: ```graphql mutation { githubEndpoint( endpoint: { url:\"https://github.com/someOrg/someRepo\" owner: \"someOrg\" repo:\"someRepo\" license: \"MIT\" visibility:\"Public\" programmingLanguage:[\"Python\", \"JavaScript\", \"Bash\", \"Dockerfile\"] automatedSecurityFixes: { checkPasses: true metadata: {} } vulnerabilityAlerts: { checkPasses: false metadata: { key: \"value\" } } branchProtection:{ checkPasses:true, metadata:{ key:\"value\" } } } ) } ``` \"\"\" client = GraphDB () client . upsert_scanner_endpoint ( endpoint ) client . close () return endpoint . url @strawberry . mutation def endpoints ( self , urls : List [ str ]) -> List [ str ]: \"\"\" Writes a list of URLs to the graph. Each URL will be associated with every other URL in the list. \"\"\" client = GraphDB () client . insert_endpoints ( urls ) client . close () return urls @strawberry . mutation def product ( self , name : str , urls : List [ str ]) -> str : \"\"\" Attaches a product label to a list of URLs. \"\"\" client = GraphDB () client . insert_product ( name , urls ) client . close () return name","title":"Mutations"},{"location":"graphql-api/#graphql_types.mutation.Mutation.endpoint","text":"Insert an endpoint with no additional metadata. This mutation should only be used by the graph updater component to update the graph structure. Source code in api/src/graphql_types/mutation.py 11 12 13 14 15 16 17 18 19 20 @strawberry . mutation def endpoint ( self , url : str ) -> str : \"\"\" Insert an endpoint with no additional metadata. This mutation should only be used by the graph updater component to update the graph structure. \"\"\" client = GraphDB () client . insert_endpoint ( url ) client . close () return url","title":"endpoint()"},{"location":"graphql-api/#graphql_types.mutation.Mutation.endpoints","text":"Writes a list of URLs to the graph. Each URL will be associated with every other URL in the list. Source code in api/src/graphql_types/mutation.py 69 70 71 72 73 74 75 76 77 78 @strawberry . mutation def endpoints ( self , urls : List [ str ]) -> List [ str ]: \"\"\" Writes a list of URLs to the graph. Each URL will be associated with every other URL in the list. \"\"\" client = GraphDB () client . insert_endpoints ( urls ) client . close () return urls","title":"endpoints()"},{"location":"graphql-api/#graphql_types.mutation.Mutation.githubEndpoint","text":"","title":"githubEndpoint()"},{"location":"graphql-api/#graphql_types.mutation.Mutation.githubEndpoint--updateinsert-github-endpoint","text":"Insert a Github Endpoint with \"upsert\" semantics. If the endpoint doesn't already exist, the endpoint document will be created. If the endpoint already exists, its fields will be updated with the values provided in the mutation.","title":"Update/Insert Github Endpoint"},{"location":"graphql-api/#graphql_types.mutation.Mutation.githubEndpoint--example-of-how-to-use-this-as-an-input-type","text":"1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 mutation { githubEndpoint ( endpoint : { url : \"https://github.com/someOrg/someRepo\" owner : \"someOrg\" repo : \"someRepo\" license : \"MIT\" visibility : \"Public\" programmingLanguage :[ \"Python\" , \"JavaScript\" , \"Bash\" , \"Dockerfile\" ] automatedSecurityFixes : { checkPasses : true metadata : {} } vulnerabilityAlerts : { checkPasses : false metadata : { key : \"value\" } } branchProtection :{ checkPasses : true , metadata :{ key : \"value\" } } } ) } Source code in api/src/graphql_types/mutation.py 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 @strawberry . mutation def githubEndpoint ( self , endpoint : GithubEndpoint ) -> str : \"\"\" # Update/Insert Github Endpoint Insert a Github Endpoint with \"upsert\" semantics. If the endpoint doesn't already exist, the endpoint document will be created. If the endpoint already exists, its fields will be updated with the values provided in the mutation. # Example of how to use this as an input type: ```graphql mutation { githubEndpoint( endpoint: { url:\"https://github.com/someOrg/someRepo\" owner: \"someOrg\" repo:\"someRepo\" license: \"MIT\" visibility:\"Public\" programmingLanguage:[\"Python\", \"JavaScript\", \"Bash\", \"Dockerfile\"] automatedSecurityFixes: { checkPasses: true metadata: {} } vulnerabilityAlerts: { checkPasses: false metadata: { key: \"value\" } } branchProtection:{ checkPasses:true, metadata:{ key:\"value\" } } } ) } ``` \"\"\" client = GraphDB () client . upsert_scanner_endpoint ( endpoint ) client . close () return endpoint . url","title":"Example of how to use this as an input type:"},{"location":"graphql-api/#graphql_types.mutation.Mutation.product","text":"Attaches a product label to a list of URLs. Source code in api/src/graphql_types/mutation.py 80 81 82 83 84 85 86 87 88 @strawberry . mutation def product ( self , name : str , urls : List [ str ]) -> str : \"\"\" Attaches a product label to a list of URLs. \"\"\" client = GraphDB () client . insert_product ( name , urls ) client . close () return name","title":"product()"},{"location":"register-repository/","text":"Register a Repository with Observatory Registering new repositories with Observatory requires a few simple steps. Create a .product.yaml File Create a .product.yaml file at the repository root with the following fields. 1 2 3 4 5 6 7 8 9 10 productName : Your Product Name webappUrls : - https://product-url-1.phac.alpha.canada.ca - https://product-url-2.phac.alpha.canada.ca apiUrls : - https://api-url-1.phac.alpha.canada.ca containerRegistryUrls : - northamerica-northeast1-docker.pkg.dev/product-container-1@sha256:abcxyz - northamerica-northeast1-docker.pkg.dev/product-container-2@sha256:abc123 - northamerica-northeast1-docker.pkg.dev/product-container-3@sha256:xyz123","title":"Register a Repository"},{"location":"register-repository/#register-a-repository-with-observatory","text":"Registering new repositories with Observatory requires a few simple steps.","title":"Register a Repository with Observatory"},{"location":"register-repository/#create-a-productyaml-file","text":"Create a .product.yaml file at the repository root with the following fields. 1 2 3 4 5 6 7 8 9 10 productName : Your Product Name webappUrls : - https://product-url-1.phac.alpha.canada.ca - https://product-url-2.phac.alpha.canada.ca apiUrls : - https://api-url-1.phac.alpha.canada.ca containerRegistryUrls : - northamerica-northeast1-docker.pkg.dev/product-container-1@sha256:abcxyz - northamerica-northeast1-docker.pkg.dev/product-container-2@sha256:abc123 - northamerica-northeast1-docker.pkg.dev/product-container-3@sha256:xyz123","title":"Create a .product.yaml File"},{"location":"scanners/","text":"Checks When a repository event webhook is received, Observatory performs a series of automated scans. The Endpoint Scanner components of the Observatory system perform the actual scans on given endpoints. Broadly speaking, these checsk can be broken into the following categories. Check Type Purpose Strategy Remote Repository Checks Verify compliance with source code repositories on remotes such as GitHub . GitHub Octokit API Repository Content Checks Perform scans on the contents of the repository. Deep clone the repository and use automated scanning tools (e.g. Gitleaks ). URL Scanning Checks Perform security and compliance checks against the live instance(s) of the product Various automated scanning tools that interact with a public URL (e.g. axe-core for accessibility scanning). Container Image Checks Gather container image data from GCP. GCP API Observatory uses GraphQL as a layer to unify the data model for reporting on ITSG-33 and related compliance requirements. Roughly speaking, Observatory's \"scanners\" can be thought of as writing many pieces of security information about a given product. Similarly, the same data model exposed by the GraphQL API can be queried to report on the status of various compliance requirements. Note our assumption that one repository may deploy services behind multiple URLs and each repository may build more than one OCI image. The sections below expand on each Check Type in greater detail, and also show the parts of our GraphQL schema that expose these Check Types. Remote Repository Checks There are many idioms, best practices, and security requirements for remote source code repositories. Observatory automatically performs a number of these checks using information retrievable from the GitHub Octokit API . Vulnerability Alerts Enabled TODO Automated Security Fixes Enabled TODO Branch Protection Enabled TODO Repository Content Checks A number of checks are performed by scanning a deep clone of the repository's contents. The purpose of these checks is to perform scanning on all of the source code, configuration, etc. contained in the repository. Has Security.md A best practice with any open source code repository is to provide instructions via a Security.md file at the project root for how security vulnerabilities should be reported (see GitHub's code security documentation for more information). This check verifies whether a repository contains a Security.md file. Remediation Include a file called Security.md at the root of your repository explaining how security vulnerabilities should be reported to the repository owner. For example, there may be an email address where vulnerability reports should be sent, and explicit instructions to not document security vulnerabilities in the repository issues. Data Example 1 2 3 4 5 6 7 8 9 { // ... \"hasSecurityMd\":{ \"checkPasses\": true, \"metadata\": null, \"lastUpdated\": 1698174245826 } // ... } Gitleaks Report TODO File Size Check TODO Hadolint Dockerfile hadolint performs a series of lint checks on each Dockerfile found in the repository. TODO URL (Service) Scanning Checks Some products have one or more services exposed through URLs. URL compliance checks perform a series of automated accessibility and security compliance checks using information that can be retrieved via these public URLs. Container Image Checks Any products that build and deploy OCI images perform a series of checks on the built image artifact(s).","title":"Scanners"},{"location":"scanners/#checks","text":"When a repository event webhook is received, Observatory performs a series of automated scans. The Endpoint Scanner components of the Observatory system perform the actual scans on given endpoints. Broadly speaking, these checsk can be broken into the following categories. Check Type Purpose Strategy Remote Repository Checks Verify compliance with source code repositories on remotes such as GitHub . GitHub Octokit API Repository Content Checks Perform scans on the contents of the repository. Deep clone the repository and use automated scanning tools (e.g. Gitleaks ). URL Scanning Checks Perform security and compliance checks against the live instance(s) of the product Various automated scanning tools that interact with a public URL (e.g. axe-core for accessibility scanning). Container Image Checks Gather container image data from GCP. GCP API Observatory uses GraphQL as a layer to unify the data model for reporting on ITSG-33 and related compliance requirements. Roughly speaking, Observatory's \"scanners\" can be thought of as writing many pieces of security information about a given product. Similarly, the same data model exposed by the GraphQL API can be queried to report on the status of various compliance requirements. Note our assumption that one repository may deploy services behind multiple URLs and each repository may build more than one OCI image. The sections below expand on each Check Type in greater detail, and also show the parts of our GraphQL schema that expose these Check Types.","title":"Checks"},{"location":"scanners/#remote-repository-checks","text":"There are many idioms, best practices, and security requirements for remote source code repositories. Observatory automatically performs a number of these checks using information retrievable from the GitHub Octokit API .","title":"Remote Repository Checks"},{"location":"scanners/#vulnerability-alerts-enabled","text":"TODO","title":"Vulnerability Alerts Enabled"},{"location":"scanners/#automated-security-fixes-enabled","text":"TODO","title":"Automated Security Fixes Enabled"},{"location":"scanners/#branch-protection-enabled","text":"TODO","title":"Branch Protection Enabled"},{"location":"scanners/#repository-content-checks","text":"A number of checks are performed by scanning a deep clone of the repository's contents. The purpose of these checks is to perform scanning on all of the source code, configuration, etc. contained in the repository.","title":"Repository Content Checks"},{"location":"scanners/#has-securitymd","text":"A best practice with any open source code repository is to provide instructions via a Security.md file at the project root for how security vulnerabilities should be reported (see GitHub's code security documentation for more information). This check verifies whether a repository contains a Security.md file. Remediation Include a file called Security.md at the root of your repository explaining how security vulnerabilities should be reported to the repository owner. For example, there may be an email address where vulnerability reports should be sent, and explicit instructions to not document security vulnerabilities in the repository issues. Data Example 1 2 3 4 5 6 7 8 9 { // ... \"hasSecurityMd\":{ \"checkPasses\": true, \"metadata\": null, \"lastUpdated\": 1698174245826 } // ... }","title":"Has Security.md"},{"location":"scanners/#gitleaks-report","text":"TODO","title":"Gitleaks Report"},{"location":"scanners/#file-size-check","text":"TODO","title":"File Size Check"},{"location":"scanners/#hadolint-dockerfile","text":"hadolint performs a series of lint checks on each Dockerfile found in the repository. TODO","title":"Hadolint Dockerfile"},{"location":"scanners/#url-service-scanning-checks","text":"Some products have one or more services exposed through URLs. URL compliance checks perform a series of automated accessibility and security compliance checks using information that can be retrieved via these public URLs.","title":"URL (Service) Scanning Checks"},{"location":"scanners/#container-image-checks","text":"Any products that build and deploy OCI images perform a series of checks on the built image artifact(s).","title":"Container Image Checks"}]}
\ No newline at end of file
diff --git a/sitemap.xml.gz b/sitemap.xml.gz
index c886d2f..bfb25af 100644
Binary files a/sitemap.xml.gz and b/sitemap.xml.gz differ