From 3750a16adf023873bd40ca377d866c1256ec09af Mon Sep 17 00:00:00 2001 From: ivan-aksamentov Date: Mon, 4 Sep 2023 16:57:00 +0200 Subject: [PATCH] feat: fetch datasets from resp. dataset repo branch; enable it on vercel --- .env.example | 4 ++ .../config/next/lib/getEnvVars.ts | 2 + .../nextclade-web/config/next/next.config.ts | 2 + packages_rs/nextclade-web/src/constants.ts | 2 + .../nextclade-web/src/io/fetchDatasets.ts | 71 ++++++++++++++++--- packages_rs/nextclade-web/src/pages/_app.tsx | 5 +- .../nextclade-web/src/state/dataset.state.ts | 15 ---- scripts/build_on_vercel.sh | 2 + 8 files changed, 77 insertions(+), 26 deletions(-) diff --git a/.env.example b/.env.example index 4989826883..0e6d51d581 100644 --- a/.env.example +++ b/.env.example @@ -32,6 +32,10 @@ SYNC_DESTINATION=123.456.789.123:~/nextclade DATA_FULL_DOMAIN=https://data.master.clades.nextstrain.org/v3 # DATA_FULL_DOMAIN=http://localhost:27722 +# If enabled, Nextclade Web will first attempt to fetch datasets from the corresponding GitHub branch. If this attempt +# fails, it will use `DATA_FULL_DOMAIN` as usual. +DATA_TRY_GITHUB_BRANCH=0 + # Directory path (relative to the root of the project) from which local data server takes the data. # Useful for local testing on new datasets. See: https://github.com/neherlab/nextclade_data # It is recommended to keep the `nextclade_data` git repo in a sibling directory of `nextclade` git repo. diff --git a/packages_rs/nextclade-web/config/next/lib/getEnvVars.ts b/packages_rs/nextclade-web/config/next/lib/getEnvVars.ts index b040b9fd33..ef8a0b4f3c 100644 --- a/packages_rs/nextclade-web/config/next/lib/getEnvVars.ts +++ b/packages_rs/nextclade-web/config/next/lib/getEnvVars.ts @@ -10,6 +10,7 @@ export function getEnvVars() { const DOMAIN = getDomain() const DOMAIN_STRIPPED = DOMAIN.replace('https://', '').replace('http://', '') const DATA_FULL_DOMAIN = getenv('DATA_FULL_DOMAIN') + const DATA_TRY_GITHUB_BRANCH = getenv('DATA_TRY_GITHUB_BRANCH') const common = { BABEL_ENV, @@ -20,6 +21,7 @@ export function getEnvVars() { DOMAIN, DOMAIN_STRIPPED, DATA_FULL_DOMAIN, + DATA_TRY_GITHUB_BRANCH, } if (PRODUCTION) { diff --git a/packages_rs/nextclade-web/config/next/next.config.ts b/packages_rs/nextclade-web/config/next/next.config.ts index f388584867..22cd557fab 100644 --- a/packages_rs/nextclade-web/config/next/next.config.ts +++ b/packages_rs/nextclade-web/config/next/next.config.ts @@ -46,6 +46,7 @@ const { DOMAIN, DOMAIN_STRIPPED, DATA_FULL_DOMAIN, + DATA_TRY_GITHUB_BRANCH, } = getEnvVars() const BRANCH_NAME = getGitBranch() @@ -61,6 +62,7 @@ const clientEnv = { DOMAIN, DOMAIN_STRIPPED, DATA_FULL_DOMAIN, + DATA_TRY_GITHUB_BRANCH, BLOCK_SEARCH_INDEXING: DOMAIN === RELEASE_URL ? '0' : '1', } diff --git a/packages_rs/nextclade-web/src/constants.ts b/packages_rs/nextclade-web/src/constants.ts index 1b9d7841ef..e8eb6b34d4 100644 --- a/packages_rs/nextclade-web/src/constants.ts +++ b/packages_rs/nextclade-web/src/constants.ts @@ -31,6 +31,8 @@ export const URL_GITHUB_COMMITS = 'https://github.com/nextstrain/nextclade/commi export const URL_CLADE_SCHEMA_REPO = 'https://github.com/nextstrain/ncov-clades-schema/' export const URL_CLADE_SCHEMA_SVG = 'https://raw.githubusercontent.com/nextstrain/ncov-clades-schema/master/clades.svg' +export const URL_GITHUB_DATA_RAW = 'https://raw.githubusercontent.com/nextstrain/nextclade_data' as const + export const SUPPORT_EMAIL = 'hello@nextstrain.org' export const TWITTER_USERNAME_RAW = 'nextstrain' as const diff --git a/packages_rs/nextclade-web/src/io/fetchDatasets.ts b/packages_rs/nextclade-web/src/io/fetchDatasets.ts index d29a504c97..2f178afd70 100644 --- a/packages_rs/nextclade-web/src/io/fetchDatasets.ts +++ b/packages_rs/nextclade-web/src/io/fetchDatasets.ts @@ -1,5 +1,8 @@ +/* eslint-disable prefer-destructuring */ import type { ParsedUrlQuery } from 'querystring' import { findSimilarStrings } from 'src/helpers/string' +import { axiosFetchMaybe } from 'src/io/axiosFetch' +import { isGithubUrlOrShortcut, parseGitHubRepoUrlOrShortcut } from 'src/io/fetchSingleDatasetFromGithub' import { Dataset } from 'src/types' import { @@ -10,9 +13,11 @@ import { } from 'src/io/fetchDatasetsIndex' import { getQueryParamMaybe } from 'src/io/getQueryParamMaybe' import { useRecoilValue, useSetRecoilState } from 'recoil' -import { datasetCurrentAtom, datasetsAtom, datasetServerUrlAtom, datasetUpdatedAtom } from 'src/state/dataset.state' +import { datasetCurrentAtom, datasetsAtom, datasetUpdatedAtom } from 'src/state/dataset.state' import { useQuery } from 'react-query' import { isNil } from 'lodash' +import urljoin from 'url-join' +import { URL_GITHUB_DATA_RAW } from 'src/constants' export async function getDatasetFromUrlParams(urlQuery: ParsedUrlQuery, datasets: Dataset[]) { // Retrieve dataset-related URL params and try to find a dataset based on these params @@ -41,8 +46,60 @@ export async function getDatasetFromUrlParams(urlQuery: ParsedUrlQuery, datasets return dataset } -export async function initializeDatasets(urlQuery: ParsedUrlQuery, datasetServerUrlDefault: string) { - const datasetServerUrl = getQueryParamMaybe(urlQuery, 'dataset-server') ?? datasetServerUrlDefault +export async function getGithubDatasetServerUrl(): Promise { + const BRANCH_NAME = process.env.BRANCH_NAME + if (!BRANCH_NAME) { + return undefined + } + + const githubDatasetServerUrl = urljoin(URL_GITHUB_DATA_RAW, BRANCH_NAME) + const githubIndexJsonUrl = urljoin(githubDatasetServerUrl, 'data_output', 'index.json') + + if (await axiosFetchMaybe(githubIndexJsonUrl)) { + return githubIndexJsonUrl + } + + return undefined +} + +export function toAbsoluteUrl(url: string): string { + if (typeof window !== 'undefined' && url.slice(0) === '/') { + return urljoin(window.location.origin, url) + } + return url +} + +export async function getDatasetServerUrl(urlQuery: ParsedUrlQuery) { + // Get dataset URL from query URL params. + let datasetServerUrl = getQueryParamMaybe(urlQuery, 'dataset-server') + + // If the URL is formatted as a GitHub URL or as a GitHub URL shortcut, use it without any checking + if (datasetServerUrl && isGithubUrlOrShortcut(datasetServerUrl)) { + const { owner, repo, branch, path } = await parseGitHubRepoUrlOrShortcut(datasetServerUrl) + return urljoin('https://raw.githubusercontent.com', owner, repo, branch, path) + } + + // If requested to try GitHub-hosted datasets either using `DATA_TRY_GITHUB_BRANCH` env var (e.g. from + // `.env` file), or using `&dataset-server=gh` or `&dataset-server=github` URL parameters, then check if the + // corresponding branch in the default data repo on GitHub contains an `index.json` file. And and if yes, use it. + const datasetServerTryGithubBranch = + process.env.DATA_TRY_GITHUB_BRANCH === '1' || (datasetServerUrl && ['gh', 'github'].includes(datasetServerUrl)) + if (datasetServerTryGithubBranch) { + const githubDatasetServerUrl = await getGithubDatasetServerUrl() + if (githubDatasetServerUrl) { + datasetServerUrl = githubDatasetServerUrl + } + } + + // If none of the above, use hardcoded default URL (from `.env` file) + datasetServerUrl = datasetServerUrl ?? process.env.DATA_FULL_DOMAIN ?? '/' + + // If the URL happens to be a relative path, then convert to absolute URL (on the app's current host) + return toAbsoluteUrl(datasetServerUrl) +} + +export async function initializeDatasets(urlQuery: ParsedUrlQuery) { + const datasetServerUrl = await getDatasetServerUrl(urlQuery) const datasetsIndexJson = await fetchDatasetsIndex(datasetServerUrl) @@ -57,11 +114,10 @@ export async function initializeDatasets(urlQuery: ParsedUrlQuery, datasetServer /** Refetch dataset index periodically and update the local copy of if */ export function useUpdatedDatasetIndex() { const setDatasetsState = useSetRecoilState(datasetsAtom) - const datasetServerUrl = useRecoilValue(datasetServerUrlAtom) useQuery( 'refetchDatasetIndex', async () => { - const { currentDataset: _, ...datasetsState } = await initializeDatasets({}, datasetServerUrl) + const { currentDataset: _, ...datasetsState } = await initializeDatasets({}) setDatasetsState(datasetsState) }, { @@ -89,10 +145,9 @@ export function useUpdatedDataset() { 'currentDatasetState', async () => { const path = datasetCurrent?.path - const refAccession = datasetCurrent?.attributes.reference.value const updatedAt = datasetCurrent?.version?.updatedAt - if (!isNil(refAccession) && !isNil(updatedAt)) { - const candidateDatasets = filterDatasets(datasets, path, refAccession) + if (!isNil(updatedAt)) { + const candidateDatasets = filterDatasets(datasets, path) const updatedDataset = candidateDatasets.find((candidate) => { const candidateTag = candidate.version?.updatedAt return candidateTag && candidateTag > updatedAt diff --git a/packages_rs/nextclade-web/src/pages/_app.tsx b/packages_rs/nextclade-web/src/pages/_app.tsx index 6a4f53a32b..20868096e7 100644 --- a/packages_rs/nextclade-web/src/pages/_app.tsx +++ b/packages_rs/nextclade-web/src/pages/_app.tsx @@ -48,7 +48,7 @@ import { SEO } from 'src/components/Common/SEO' import { Plausible } from 'src/components/Common/Plausible' import i18n, { changeLocale, getLocaleWithKey } from 'src/i18n/i18n' import { theme } from 'src/theme' -import { datasetCurrentAtom, datasetsAtom, datasetServerUrlAtom } from 'src/state/dataset.state' +import { datasetCurrentAtom, datasetsAtom } from 'src/state/dataset.state' import { ErrorBoundary } from 'src/components/Error/ErrorBoundary' import { PreviewWarning } from 'src/components/Common/PreviewWarning' @@ -99,8 +99,7 @@ export function RecoilStateInitializer() { return datasetInfo } - const datasetServerUrlDefault = await getPromise(datasetServerUrlAtom) - return initializeDatasets(urlQuery, datasetServerUrlDefault) + return initializeDatasets(urlQuery) }) .catch((error) => { // Dataset error is fatal and we want error to be handled in the ErrorBoundary diff --git a/packages_rs/nextclade-web/src/state/dataset.state.ts b/packages_rs/nextclade-web/src/state/dataset.state.ts index 0da894fe4d..b0c7cb6a44 100644 --- a/packages_rs/nextclade-web/src/state/dataset.state.ts +++ b/packages_rs/nextclade-web/src/state/dataset.state.ts @@ -1,6 +1,5 @@ import { isNil } from 'lodash' import { atom, DefaultValue, selector } from 'recoil' -import urljoin from 'url-join' import type { Dataset } from 'src/types' // import { GENE_OPTION_NUC_SEQUENCE } from 'src/constants' @@ -10,20 +9,6 @@ import { persistAtom } from 'src/state/persist/localStorage' import { isDefaultValue } from 'src/state/utils/isDefaultValue' import { areDatasetsEqual } from 'src/types' -export function getDefaultDatasetServer(): string { - let datasetServerUrl = process.env.DATA_FULL_DOMAIN ?? '/' - // Add HTTP Origin if datasetServerUrl is a relative path (start with '/') - if (typeof window !== 'undefined' && datasetServerUrl.slice(0) === '/') { - datasetServerUrl = urljoin(window.location.origin, datasetServerUrl) - } - return datasetServerUrl -} - -export const datasetServerUrlAtom = atom({ - key: 'datasetServerUrl', - default: getDefaultDatasetServer(), -}) - export interface Datasets { datasets: Dataset[] } diff --git a/scripts/build_on_vercel.sh b/scripts/build_on_vercel.sh index 11a877f2ed..e28133150f 100755 --- a/scripts/build_on_vercel.sh +++ b/scripts/build_on_vercel.sh @@ -115,6 +115,8 @@ sed -i'' "s|PROD_ENABLE_TYPE_CHECKS=1|PROD_ENABLE_TYPE_CHECKS=0|g" .env sed -i'' "s|PROD_ENABLE_ESLINT=1|PROD_ENABLE_ESLINT=0|g" .env sed -i'' "s|PROD_ENABLE_STYLELINT=1|PROD_ENABLE_STYLELINT=0|g" .env +sed -i'' "s|DATA_TRY_GITHUB_BRANCH=0|DATA_TRY_GITHUB_BRANCH=1|g" .env + cd packages_rs/nextclade-web yarn install --frozen-lockfile