diff --git a/.github/workflows/publish-helm-chart.yaml b/.github/workflows/publish-helm-chart.yaml index 5c4a7d692c..acd16db5a0 100644 --- a/.github/workflows/publish-helm-chart.yaml +++ b/.github/workflows/publish-helm-chart.yaml @@ -23,6 +23,7 @@ jobs: run: | mkdir .chart-out helm package chart/ --destination .chart-out + helm package chart/proxies/ --destination .chart-out - name: Get Version run: | @@ -49,7 +50,9 @@ jobs: See [the development guide](https://docs.browsertrix.com/deploy/) for more info how to deploy Browsertrix. - files: .chart-out/browsertrix-v${{ env.version }}.tgz + files: | + .chart-out/browsertrix-v${{ env.version }}.tgz + .chart-out/btrix-proxies-0.1.0.tgz tag_name: v${{ env.version }} draft: true fail_on_unmatched_files: true diff --git a/backend/btrixcloud/crawlconfigs.py b/backend/btrixcloud/crawlconfigs.py index d9846fd2ea..5a0551f141 100644 --- a/backend/btrixcloud/crawlconfigs.py +++ b/backend/btrixcloud/crawlconfigs.py @@ -10,6 +10,7 @@ import json import re import os +import traceback from datetime import datetime from uuid import UUID, uuid4 import urllib.parse @@ -39,6 +40,8 @@ CrawlConfigSearchValues, CrawlConfigUpdateResponse, CrawlConfigDeletedResponse, + CrawlerProxy, + CrawlerProxies, ) from .utils import dt_now, slug_from_name @@ -63,6 +66,8 @@ "name", ) +DEFAULT_PROXY_ID: str | None = os.environ.get("DEFAULT_PROXY_ID") + # ============================================================================ class CrawlConfigOps: @@ -125,6 +130,14 @@ def __init__( if "default" not in self.crawler_images_map: raise TypeError("The channel list must include a 'default' channel") + self._crawler_proxies_last_updated = None + self._crawler_proxies_map = None + + if DEFAULT_PROXY_ID and DEFAULT_PROXY_ID not in self.get_crawler_proxies_map(): + raise ValueError( + f"Configured proxies must include DEFAULT_PROXY_ID: {DEFAULT_PROXY_ID}" + ) + def set_crawl_ops(self, ops): """set crawl ops reference""" self.crawl_ops = ops @@ -168,7 +181,9 @@ async def get_profile_filename( if not profileid: return "" - profile_filename = await self.profiles.get_profile_storage_path(profileid, org) + profile_filename, _ = await self.profiles.get_profile_storage_path_and_proxy( + profileid, org + ) if not profile_filename: raise HTTPException(status_code=400, detail="invalid_profile_id") @@ -195,6 +210,11 @@ async def add_crawl_config( if profileid: await self.profiles.get_profile(profileid, org) + # ensure proxyId is valid and available for org + if config_in.proxyId: + if not self.can_org_use_proxy(org, config_in.proxyId): + raise HTTPException(status_code=404, detail="proxy_not_found") + now = dt_now() crawlconfig = CrawlConfig( id=uuid4(), @@ -218,6 +238,7 @@ async def add_crawl_config( profileid=profileid, crawlerChannel=config_in.crawlerChannel, crawlFilenameTemplate=config_in.crawlFilenameTemplate, + proxyId=config_in.proxyId, ) if config_in.runNow: @@ -331,6 +352,8 @@ async def update_crawl_config( and ((not update.profileid) != (not orig_crawl_config.profileid)) ) + changed = changed or (orig_crawl_config.proxyId != update.proxyId) + metadata_changed = self.check_attr_changed(orig_crawl_config, update, "name") metadata_changed = metadata_changed or self.check_attr_changed( orig_crawl_config, update, "description" @@ -829,6 +852,9 @@ async def run_now_internal( if await self.get_running_crawl(crawlconfig.id): raise HTTPException(status_code=400, detail="crawl_already_running") + if crawlconfig.proxyId and not self.can_org_use_proxy(org, crawlconfig.proxyId): + raise HTTPException(status_code=404, detail="proxy_not_found") + profile_filename = await self.get_profile_filename(crawlconfig.profileid, org) storage_filename = ( crawlconfig.crawlFilenameTemplate or self.default_filename_template @@ -848,6 +874,7 @@ async def run_now_internal( except Exception as exc: # pylint: disable=raise-missing-from + print(traceback.format_exc()) raise HTTPException(status_code=500, detail=f"Error starting crawl: {exc}") async def set_config_current_crawl_info( @@ -897,6 +924,68 @@ def get_channel_crawler_image( """Get crawler image name by id""" return self.crawler_images_map.get(crawler_channel or "") + def get_crawler_proxies_map(self) -> dict[str, CrawlerProxy]: + """Load CrawlerProxy mapping from config""" + proxies_last_update_path = os.environ["CRAWLER_PROXIES_LAST_UPDATE"] + + if not os.path.isfile(proxies_last_update_path): + return {} + + # return cached data, when last_update timestamp hasn't changed + if self._crawler_proxies_last_updated and self._crawler_proxies_map: + with open(proxies_last_update_path, encoding="utf-8") as fh: + proxies_last_update = int(fh.read().strip()) + if proxies_last_update == self._crawler_proxies_last_updated: + return self._crawler_proxies_map + self._crawler_proxies_last_updated = proxies_last_update + + crawler_proxies_map: dict[str, CrawlerProxy] = {} + with open(os.environ["CRAWLER_PROXIES_JSON"], encoding="utf-8") as fh: + proxy_list = json.loads(fh.read()) + for proxy_data in proxy_list: + proxy = CrawlerProxy( + id=proxy_data["id"], + label=proxy_data["label"], + description=proxy_data.get("description", ""), + country_code=proxy_data.get("country_code", ""), + url=proxy_data["url"], + has_host_public_key=bool(proxy_data.get("ssh_host_public_key")), + has_private_key=bool(proxy_data.get("ssh_private_key")), + shared=proxy_data.get("shared", False) + or proxy_data["id"] == DEFAULT_PROXY_ID, + ) + + crawler_proxies_map[proxy.id] = proxy + + self._crawler_proxies_map = crawler_proxies_map + return self._crawler_proxies_map + + def get_crawler_proxies(self): + """Get CrawlerProxy configuration""" + return CrawlerProxies( + default_proxy_id=DEFAULT_PROXY_ID, + servers=list(self.get_crawler_proxies_map().values()), + ) + + def get_crawler_proxy(self, proxy_id: str) -> Optional[CrawlerProxy]: + """Get crawlerProxy by id""" + return self.get_crawler_proxies_map().get(proxy_id) + + def can_org_use_proxy(self, org: Organization, proxy: CrawlerProxy | str) -> bool: + """Checks if org is able to use proxy""" + + if isinstance(proxy, str): + _proxy = self.get_crawler_proxy(proxy) + else: + _proxy = proxy + + if _proxy is None: + return False + + return ( + _proxy.shared and org.allowSharedProxies + ) or _proxy.id in org.allowedProxies + def get_warc_prefix(self, org: Organization, crawlconfig: CrawlConfig) -> str: """Generate WARC prefix slug from org slug, name or url if no name is provided, hostname is used from url, otherwise @@ -983,6 +1072,7 @@ async def stats_recompute_all(crawl_configs, crawls, cid: UUID): # ============================================================================ # pylint: disable=redefined-builtin,invalid-name,too-many-locals,too-many-arguments def init_crawl_config_api( + app, dbclient, mdb, user_dep, @@ -1060,6 +1150,28 @@ async def get_crawler_channels( ): return ops.crawler_channels + @router.get("/crawler-proxies", response_model=CrawlerProxies) + async def get_crawler_proxies( + org: Organization = Depends(org_crawl_dep), + ): + return CrawlerProxies( + default_proxy_id=DEFAULT_PROXY_ID, + servers=[ + proxy + for proxy in ops.get_crawler_proxies_map().values() + if ops.can_org_use_proxy(org, proxy) + ], + ) + + @app.get("/orgs/all/crawlconfigs/crawler-proxies", response_model=CrawlerProxies) + async def get_all_crawler_proxies( + user: User = Depends(user_dep), + ): + if not user.is_superuser: + raise HTTPException(status_code=403, detail="Not Allowed") + + return ops.get_crawler_proxies() + @router.get("/{cid}/seeds", response_model=PaginatedSeedResponse) async def get_crawl_config_seeds( cid: UUID, diff --git a/backend/btrixcloud/crawlmanager.py b/backend/btrixcloud/crawlmanager.py index dd2ef3d116..37d392ffad 100644 --- a/backend/btrixcloud/crawlmanager.py +++ b/backend/btrixcloud/crawlmanager.py @@ -1,7 +1,6 @@ """ shared crawl manager implementation """ import os -import asyncio import secrets from typing import Optional, Dict @@ -16,13 +15,12 @@ # ============================================================================ -class CrawlManager(K8sAPI): - """abstract crawl manager""" +DEFAULT_PROXY_ID: str = os.environ.get("DEFAULT_PROXY_ID", "") - def __init__(self): - super().__init__() - self.loop = asyncio.get_running_loop() +# ============================================================================ +class CrawlManager(K8sAPI): + """abstract crawl manager""" # pylint: disable=too-many-arguments async def run_profile_browser( @@ -34,6 +32,7 @@ async def run_profile_browser( crawler_image: str, baseprofile: str = "", profile_filename: str = "", + proxy_id: str = "", ) -> str: """run browser for profile creation""" @@ -55,6 +54,7 @@ async def run_profile_browser( "vnc_password": secrets.token_hex(16), "expire_time": date_to_str(dt_now() + timedelta(seconds=30)), "crawler_image": crawler_image, + "proxy_id": proxy_id or DEFAULT_PROXY_ID, } data = self.templates.env.get_template("profile_job.yaml").render(params) @@ -138,6 +138,7 @@ async def create_crawl_job( warc_prefix=warc_prefix, storage_filename=storage_filename, profile_filename=profile_filename, + proxy_id=crawlconfig.proxyId or DEFAULT_PROXY_ID, ) async def create_qa_crawl_job( diff --git a/backend/btrixcloud/crawls.py b/backend/btrixcloud/crawls.py index 1b9c6e8015..81cf731e87 100644 --- a/backend/btrixcloud/crawls.py +++ b/backend/btrixcloud/crawls.py @@ -379,6 +379,7 @@ async def add_new_crawl( tags=crawlconfig.tags, name=crawlconfig.name, crawlerChannel=crawlconfig.crawlerChannel, + proxyId=crawlconfig.proxyId, image=image, ) diff --git a/backend/btrixcloud/k8sapi.py b/backend/btrixcloud/k8sapi.py index dc9175cc46..238155d212 100644 --- a/backend/btrixcloud/k8sapi.py +++ b/backend/btrixcloud/k8sapi.py @@ -2,8 +2,8 @@ import os import traceback - from typing import Optional + import yaml from kubernetes_asyncio import client, config @@ -93,6 +93,7 @@ def new_crawl_job_yaml( storage_filename: str = "", profile_filename: str = "", qa_source: str = "", + proxy_id: str = "", ): """load job template from yaml""" if not crawl_id: @@ -115,6 +116,7 @@ def new_crawl_job_yaml( "storage_filename": storage_filename, "profile_filename": profile_filename, "qa_source": qa_source, + "proxy_id": proxy_id, } data = self.templates.env.get_template("crawl_job.yaml").render(params) @@ -136,6 +138,7 @@ async def new_crawl_job( storage_filename: str = "", profile_filename: str = "", qa_source: str = "", + proxy_id: str = "", ) -> str: """load and init crawl job via k8s api""" crawl_id, data = self.new_crawl_job_yaml( @@ -153,6 +156,7 @@ async def new_crawl_job( storage_filename=storage_filename, profile_filename=profile_filename, qa_source=qa_source, + proxy_id=proxy_id, ) # create job directly diff --git a/backend/btrixcloud/main.py b/backend/btrixcloud/main.py index cb1610a98a..0bc3e48982 100644 --- a/backend/btrixcloud/main.py +++ b/backend/btrixcloud/main.py @@ -205,6 +205,7 @@ def main() -> None: ) crawl_config_ops = init_crawl_config_api( + app, dbclient, mdb, current_active_user, diff --git a/backend/btrixcloud/models.py b/backend/btrixcloud/models.py index a011cbf186..e5d2e45941 100644 --- a/backend/btrixcloud/models.py +++ b/backend/btrixcloud/models.py @@ -350,6 +350,7 @@ class CrawlConfigIn(BaseModel): profileid: Union[UUID, EmptyStr, None] = None crawlerChannel: str = "default" + proxyId: Optional[str] = None autoAddCollections: Optional[List[UUID]] = [] tags: Optional[List[str]] = [] @@ -373,6 +374,7 @@ class ConfigRevision(BaseMongoModel): profileid: Optional[UUID] = None crawlerChannel: Optional[str] = None + proxyId: Optional[str] = None crawlTimeout: Optional[int] = 0 maxCrawlSize: Optional[int] = 0 @@ -403,6 +405,7 @@ class CrawlConfigCore(BaseMongoModel): profileid: Optional[UUID] = None crawlerChannel: Optional[str] = None + proxyId: Optional[str] = None # ============================================================================ @@ -500,6 +503,7 @@ class UpdateCrawlConfig(BaseModel): schedule: Optional[str] = None profileid: Union[UUID, EmptyStr, None] = None crawlerChannel: Optional[str] = None + proxyId: Optional[str] = None crawlTimeout: Optional[int] = None maxCrawlSize: Optional[int] = None scale: Scale = 1 @@ -523,6 +527,7 @@ class CrawlConfigDefaults(BaseModel): profileid: Optional[UUID] = None crawlerChannel: Optional[str] = None + proxyId: Optional[str] = None lang: Optional[str] = None @@ -601,6 +606,40 @@ class CrawlerChannels(BaseModel): channels: List[CrawlerChannel] = [] +# ============================================================================ + +### PROXIES ### + + +class CrawlerProxy(BaseModel): + """proxy definition""" + + id: str + url: str + label: str + description: str = "" + country_code: str = "" + has_host_public_key: bool = False + has_private_key: bool = False + shared: bool = False + + +# ============================================================================ +class CrawlerProxies(BaseModel): + """List of CrawlerProxy instances for API""" + + default_proxy_id: Optional[str] = None + servers: List[CrawlerProxy] = [] + + +# ============================================================================ +class OrgProxies(BaseModel): + """Org proxy settings for API""" + + allowSharedProxies: bool + allowedProxies: list[str] + + # ============================================================================ ### BASE CRAWLS ### @@ -794,6 +833,7 @@ class CrawlOut(BaseMongoModel): execMinutesQuotaReached: Optional[bool] = False crawlerChannel: str = "default" + proxyId: Optional[str] = None image: Optional[str] = None reviewStatus: ReviewStatus = None @@ -1388,6 +1428,8 @@ class OrgOut(BaseMongoModel): subscription: Optional[Subscription] = None + allowSharedProxies: bool = False + allowedProxies: list[str] = [] crawlingDefaults: Optional[CrawlConfigDefaults] = None @@ -1441,6 +1483,8 @@ class Organization(BaseMongoModel): subscription: Optional[Subscription] = None + allowSharedProxies: bool = False + allowedProxies: list[str] = [] crawlingDefaults: Optional[CrawlConfigDefaults] = None def is_owner(self, user): @@ -1665,6 +1709,7 @@ class Profile(BaseMongoModel): baseid: Optional[UUID] = None crawlerChannel: Optional[str] = None + proxyId: Optional[str] = None # ============================================================================ @@ -1687,6 +1732,7 @@ class ProfileLaunchBrowserIn(UrlIn): profileId: Optional[UUID] = None crawlerChannel: str = "default" + proxyId: Optional[str] = None # ============================================================================ @@ -1704,6 +1750,7 @@ class ProfileCreate(BaseModel): name: str description: Optional[str] = "" crawlerChannel: str = "default" + proxyId: Optional[str] = None # ============================================================================ diff --git a/backend/btrixcloud/operator/crawls.py b/backend/btrixcloud/operator/crawls.py index b974fe5058..d170dd8c5b 100644 --- a/backend/btrixcloud/operator/crawls.py +++ b/backend/btrixcloud/operator/crawls.py @@ -130,7 +130,7 @@ async def sync_crawls(self, data: MCSyncData): status = CrawlStatus(**data.parent.get("status", {})) status.last_state = status.state - spec = data.parent.get("spec", {}) + spec = data.parent.get("spec", {}) # spec is the data from crawl_job.yaml crawl_id = spec["id"] cid = spec["cid"] oid = spec["oid"] @@ -152,6 +152,7 @@ async def sync_crawls(self, data: MCSyncData): oid=oid, storage=StorageRef(spec["storageName"]), crawler_channel=spec.get("crawlerChannel"), + proxy_id=spec.get("proxyId"), scale=spec.get("scale", 1), started=data.parent["metadata"]["creationTimestamp"], stopping=spec.get("stopping", False), @@ -283,6 +284,14 @@ async def sync_crawls(self, data: MCSyncData): params["crawler_image"] = status.crawlerImage + if crawl.proxy_id and not crawl.is_qa: + proxy = self.crawl_config_ops.get_crawler_proxy(crawl.proxy_id) + if proxy: + params["proxy_id"] = crawl.proxy_id + params["proxy_url"] = proxy.url + params["proxy_ssh_private_key"] = proxy.has_private_key + params["proxy_ssh_host_public_key"] = proxy.has_host_public_key + params["storage_filename"] = spec["storage_filename"] params["restart_time"] = spec.get("restartTime") diff --git a/backend/btrixcloud/operator/cronjobs.py b/backend/btrixcloud/operator/cronjobs.py index 74a43b0b44..9d0367aec3 100644 --- a/backend/btrixcloud/operator/cronjobs.py +++ b/backend/btrixcloud/operator/cronjobs.py @@ -93,11 +93,20 @@ async def make_new_crawljob( if org.readOnly: print( - f"org {org.id} set to read-only. skipping scheduled crawl for workflow {cid}" + f'org "{org.slug}" set to read-only. skipping scheduled crawl for workflow {cid}' ) return self.get_finished_response(metadata) - # if no db state, crawl crawl in the db + if crawlconfig.proxyId and not self.crawl_config_ops.get_crawler_proxy( + crawlconfig.proxyId + ): + print( + f"proxy {crawlconfig.proxyId} missing, skipping scheduled crawl for " + + f'workflow {cid} in "{org.slug}"' + ) + return self.get_finished_response(metadata) + + # if no db state, add crawl in the db if not state: await self.crawl_config_ops.add_new_crawl( crawl_id, @@ -125,6 +134,7 @@ async def make_new_crawljob( warc_prefix=warc_prefix, storage_filename=self.crawl_config_ops.default_filename_template, profile_filename=profile_filename or "", + proxy_id=crawlconfig.proxyId or "", ) return MCDecoratorSyncResponse(attachments=list(yaml.safe_load_all(crawljob))) diff --git a/backend/btrixcloud/operator/models.py b/backend/btrixcloud/operator/models.py index 0a31fc793d..067814b7fe 100644 --- a/backend/btrixcloud/operator/models.py +++ b/backend/btrixcloud/operator/models.py @@ -79,6 +79,7 @@ class CrawlSpec(BaseModel): timeout: int = 0 max_crawl_size: int = 0 qa_source_crawl_id: Optional[str] = "" + proxy_id: Optional[str] = None @property def db_crawl_id(self) -> str: @@ -207,6 +208,7 @@ class CrawlStatus(BaseModel): stopReason: Optional[StopReason] = None initRedis: bool = False crawlerImage: Optional[str] = None + lastActiveTime: str = "" podStatus: DefaultDict[str, Annotated[PodInfo, Field(default_factory=PodInfo)]] = ( defaultdict(lambda: PodInfo()) # pylint: disable=unnecessary-lambda diff --git a/backend/btrixcloud/operator/profiles.py b/backend/btrixcloud/operator/profiles.py index 922b49d453..11b528a3f6 100644 --- a/backend/btrixcloud/operator/profiles.py +++ b/backend/btrixcloud/operator/profiles.py @@ -46,6 +46,15 @@ async def sync_profile_browsers(self, data: MCSyncData): params["profile_filename"] = spec.get("profileFilename", "") params["crawler_image"] = spec["crawlerImage"] + proxy_id = spec.get("proxyId") + if proxy_id: + proxy = self.crawl_config_ops.get_crawler_proxy(proxy_id) + if proxy: + params["proxy_id"] = proxy_id + params["proxy_url"] = proxy.url + params["proxy_ssh_private_key"] = proxy.has_private_key + params["proxy_ssh_host_public_key"] = proxy.has_host_public_key + params["url"] = spec.get("startUrl", "about:blank") params["vnc_password"] = spec.get("vncPassword") diff --git a/backend/btrixcloud/orgs.py b/backend/btrixcloud/orgs.py index 2b278caf77..3889c42b94 100644 --- a/backend/btrixcloud/orgs.py +++ b/backend/btrixcloud/orgs.py @@ -38,6 +38,7 @@ OrgMetrics, OrgWebhookUrls, OrgCreate, + OrgProxies, Subscription, SubscriptionUpdate, SubscriptionCancel, @@ -515,6 +516,18 @@ async def update_custom_storages(self, org: Organization) -> bool: res = await self.orgs.find_one_and_update({"_id": org.id}, {"$set": set_dict}) return res is not None + async def update_proxies(self, org: Organization, proxies: OrgProxies) -> None: + """Update org proxy settings""" + await self.orgs.find_one_and_update( + {"_id": org.id}, + { + "$set": { + "allowSharedProxies": proxies.allowSharedProxies, + "allowedProxies": proxies.allowedProxies, + } + }, + ) + async def update_quotas(self, org: Organization, quotas: OrgQuotasIn) -> None: """update organization quotas""" @@ -1483,6 +1496,19 @@ async def update_quotas( return {"updated": True} + @router.post("/proxies", tags=["organizations"], response_model=UpdatedResponse) + async def update_proxies( + proxies: OrgProxies, + org: Organization = Depends(org_owner_dep), + user: User = Depends(user_dep), + ): + if not user.is_superuser: + raise HTTPException(status_code=403, detail="Not Allowed") + + await ops.update_proxies(org, proxies) + + return {"updated": True} + @router.post("/read-only", tags=["organizations"], response_model=UpdatedResponse) async def update_read_only( update: OrgReadOnlyUpdate, diff --git a/backend/btrixcloud/profiles.py b/backend/btrixcloud/profiles.py index fe16e80b9d..ab72422472 100644 --- a/backend/btrixcloud/profiles.py +++ b/backend/btrixcloud/profiles.py @@ -91,9 +91,12 @@ async def create_new_browser( """Create new profile""" prev_profile_path = "" prev_profile_id = "" + prev_proxy_id = "" if profile_launch.profileId: - prev_profile_path = await self.get_profile_storage_path( - profile_launch.profileId, org + prev_profile_path, prev_proxy_id = ( + await self.get_profile_storage_path_and_proxy( + profile_launch.profileId, org + ) ) if not prev_profile_path: @@ -107,6 +110,12 @@ async def create_new_browser( if not crawler_image: raise HTTPException(status_code=404, detail="crawler_not_found") + # use either specified proxyId or if none, use proxyId from existing profile + proxy_id = profile_launch.proxyId or prev_proxy_id + + if proxy_id and not self.crawlconfigs.can_org_use_proxy(org, proxy_id): + raise HTTPException(status_code=404, detail="proxy_not_found") + browserid = await self.crawl_manager.run_profile_browser( str(user.id), str(org.id), @@ -115,6 +124,7 @@ async def create_new_browser( crawler_image=crawler_image, baseprofile=prev_profile_id, profile_filename=prev_profile_path, + proxy_id=proxy_id, ) if not browserid: @@ -238,6 +248,7 @@ async def commit_to_profile( oid=org.id, baseid=baseid, crawlerChannel=browser_commit.crawlerChannel, + proxyId=browser_commit.proxyId, ) await self.profiles.find_one_and_update( @@ -362,18 +373,19 @@ async def get_profile_with_configs( return ProfileWithCrawlConfigs(crawlconfigs=crawlconfigs, **profile.dict()) - async def get_profile_storage_path( + async def get_profile_storage_path_and_proxy( self, profileid: UUID, org: Optional[Organization] = None - ) -> str: + ) -> tuple[str, str]: """return profile path filename (relative path) for given profile id and org""" try: profile = await self.get_profile(profileid, org) - return profile.resource.filename if profile.resource else "" + storage_path = profile.resource.filename if profile.resource else "" + return storage_path, profile.proxyId or "" # pylint: disable=bare-except except: pass - return "" + return "", "" async def get_profile_name( self, profileid: UUID, org: Optional[Organization] = None @@ -561,6 +573,7 @@ async def commit_browser_to_existing( name=browser_commit.name, description=browser_commit.description or profile.description, crawlerChannel=profile.crawlerChannel, + proxyId=profile.proxyId, ), org=org, user=user, diff --git a/backend/btrixcloud/users.py b/backend/btrixcloud/users.py index 6519d1cc9c..3e5b6fcc16 100644 --- a/backend/btrixcloud/users.py +++ b/backend/btrixcloud/users.py @@ -276,7 +276,7 @@ async def create_super_user(self) -> None: superuser = await self.get_superuser() if superuser: if str(superuser.email) != email: - await self.update_email_name(superuser, EmailStr(email), name) + await self.update_email_name(superuser, cast(EmailStr, email), name) print("Superuser email updated") if not await self.check_password(superuser, password): diff --git a/backend/requirements.txt b/backend/requirements.txt index b29d24d492..fc49b9506e 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -1,8 +1,7 @@ gunicorn uvicorn[standard] fastapi==0.103.2 -motor==3.3.1 -pymongo==4.8.0 +motor passlib PyJWT==2.8.0 pydantic==2.8.2 diff --git a/chart/Chart.lock b/chart/Chart.lock index 840747e9d1..fec463cfa0 100644 --- a/chart/Chart.lock +++ b/chart/Chart.lock @@ -8,5 +8,8 @@ dependencies: - name: metacontroller-helm repository: oci://ghcr.io/metacontroller version: 4.11.11 -digest: sha256:ae000dbd876ade6de33de1b8740f73683e2a783847f7e73e2cac4a0c2ee4d797 -generated: "2024-03-26T21:24:31.761944-07:00" +- name: btrix-proxies + repository: file://./proxies/ + version: 0.1.0 +digest: sha256:2fd9472f857e9e3eacdcc616a3cffac5bb2951411cc2d34aea84253092225ecf +generated: "2024-08-15T11:19:17.884682494+02:00" diff --git a/chart/Chart.yaml b/chart/Chart.yaml index 700ec2863b..e76d43f28c 100644 --- a/chart/Chart.yaml +++ b/chart/Chart.yaml @@ -18,3 +18,7 @@ dependencies: - name: metacontroller-helm version: 4.11.11 repository: "oci://ghcr.io/metacontroller" + - name: btrix-proxies + version: 0.1.0 + condition: btrix-proxies.enabled + repository: file://./proxies/ diff --git a/chart/app-templates/crawl_job.yaml b/chart/app-templates/crawl_job.yaml index 16c19dbc86..002372c65d 100644 --- a/chart/app-templates/crawl_job.yaml +++ b/chart/app-templates/crawl_job.yaml @@ -34,3 +34,4 @@ spec: storageName: "{{ storage_name }}" + proxyId: "{{ proxy_id }}" diff --git a/chart/app-templates/crawler.yaml b/chart/app-templates/crawler.yaml index 81836be54b..a00d4af332 100644 --- a/chart/app-templates/crawler.yaml +++ b/chart/app-templates/crawler.yaml @@ -52,8 +52,8 @@ spec: securityContext: runAsNonRoot: true - runAsUser: {{ crawler_uid}} - runAsGroup: {{ crawler_gid}} + runAsUser: {{ crawler_uid }} + runAsGroup: {{ crawler_gid }} fsGroup: {{ crawler_fsgroup }} allowPrivilegeEscalation: false readOnlyRootFilesystem: true @@ -71,7 +71,16 @@ spec: - name: crawl-data persistentVolumeClaim: claimName: {{ name }} - + {% if proxy_id %} + - name: proxies + secret: + secretName: proxies + defaultMode: 0600 + - name: force-user-and-group-name + secret: + secretName: force-user-and-group-name + defaultMode: 0600 + {% endif %} affinity: {% if crawler_node_type %} @@ -130,6 +139,18 @@ spec: - --profile - "@{{ profile_filename }}" {% endif %} + {% if proxy_id %} + - --proxyServer + - "{{ proxy_url }}" + {% if proxy_ssh_private_key %} + - --sshProxyPrivateKeyFile + - /tmp/ssh-proxy/private-key + {% endif %} + {% if proxy_ssh_host_public_key %} + - --sshProxyKnownHostsFile + - /tmp/ssh-proxy/known-hosts + {% endif %} + {% endif %} volumeMounts: - name: crawl-config mountPath: /tmp/crawl-config.json @@ -141,7 +162,28 @@ spec: mountPath: /tmp/qa/ readOnly: True {% endif %} - + {% if proxy_id %} + {% if proxy_ssh_private_key %} + - name: proxies + mountPath: /tmp/ssh-proxy/private-key + subPath: {{ proxy_id }}-private-key + readOnly: true + {% endif %} + {% if proxy_ssh_host_public_key %} + - name: proxies + mountPath: /tmp/ssh-proxy/known-hosts + subPath: {{ proxy_id }}-known-hosts + readOnly: true + {% endif %} + - name: force-user-and-group-name + mountPath: /etc/passwd + subPath: passwd + readOnly: true + - name: force-user-and-group-name + mountPath: /etc/group + subPath: group + readOnly: true + {% endif %} - name: crawl-data mountPath: /crawls envFrom: @@ -178,15 +220,6 @@ spec: - name: WARC_PREFIX value: "{{ warc_prefix }}" - {% if crawler_socks_proxy_host %} - - name: SOCKS_HOST - value: "{{ crawler_socks_proxy_host }}" - {% if crawler_socks_proxy_port %} - - name: SOCKS_PORT - value: "{{ crawler_socks_proxy_port }}" - {% endif %} - {% endif %} - resources: limits: memory: "{{ memory_limit }}" diff --git a/chart/app-templates/profile_job.yaml b/chart/app-templates/profile_job.yaml index 2f3f6f8865..fc6f61fbfc 100644 --- a/chart/app-templates/profile_job.yaml +++ b/chart/app-templates/profile_job.yaml @@ -28,6 +28,8 @@ spec: profileFilename: "{{ profile_filename }}" vncPassword: "{{ vnc_password }}" + proxyId: "{{ proxy_id }}" + {% if expire_time %} expireTime: "{{ expire_time }}" {% endif %} diff --git a/chart/app-templates/profilebrowser.yaml b/chart/app-templates/profilebrowser.yaml index 8eda40c615..662fa5a17b 100644 --- a/chart/app-templates/profilebrowser.yaml +++ b/chart/app-templates/profilebrowser.yaml @@ -26,6 +26,17 @@ spec: emptyDir: sizeLimit: {{ profile_browser_workdir_size }} + {% if proxy_id %} + - name: proxies + secret: + secretName: proxies + defaultMode: 0600 + - name: force-user-and-group-name + secret: + secretName: force-user-and-group-name + defaultMode: 0600 + {% endif %} + {% if priorityClassName %} priorityClassName: {{ priorityClassName }} {% endif %} @@ -73,10 +84,44 @@ spec: - --profile - "@{{ profile_filename }}" {%- endif %} + {% if proxy_id %} + - --proxyServer + - "{{ proxy_url }}" + {% if proxy_ssh_private_key %} + - --sshProxyPrivateKeyFile + - /tmp/ssh-proxy/private-key + {% endif %} + {% if proxy_ssh_host_public_key %} + - --sshProxyKnownHostsFile + - /tmp/ssh-proxy/known-hosts + {% endif %} + {% endif %} volumeMounts: - name: crawler-workdir mountPath: /tmp/home + {% if proxy_id %} + {% if proxy_ssh_private_key %} + - name: proxies + mountPath: /tmp/ssh-proxy/private-key + subPath: {{ proxy_id }}-private-key + readOnly: true + {% endif %} + {% if proxy_ssh_host_public_key %} + - name: proxies + mountPath: /tmp/ssh-proxy/known-hosts + subPath: {{ proxy_id }}-known-hosts + readOnly: true + {% endif %} + - name: force-user-and-group-name + mountPath: /etc/passwd + subPath: passwd + readOnly: true + - name: force-user-and-group-name + mountPath: /etc/group + subPath: group + readOnly: true + {% endif %} envFrom: - secretRef: diff --git a/chart/charts/btrix-proxies-0.1.0.tgz b/chart/charts/btrix-proxies-0.1.0.tgz new file mode 100644 index 0000000000..33dca51c7d Binary files /dev/null and b/chart/charts/btrix-proxies-0.1.0.tgz differ diff --git a/chart/proxies/Chart.yaml b/chart/proxies/Chart.yaml new file mode 100644 index 0000000000..4632adeb69 --- /dev/null +++ b/chart/proxies/Chart.yaml @@ -0,0 +1,15 @@ +apiVersion: v2 +name: btrix-proxies +description: A chart deploying the configmap and secrets required for using proxies with Browsertrix +type: application +icon: https://webrecorder.net/assets/icon.png + +# This is the chart version. This version number should be incremented each time you make changes +# to the chart and its templates, including the app version. +# Versions are expected to follow Semantic Versioning (https://semver.org/) +version: 0.1.0 + +# This is the version number of the application being deployed. This version number should be +# incremented each time you make changes to the application. Versions are not expected to +# follow Semantic Versioning. They should reflect the version the application is using. +appVersion: 0.1.0 diff --git a/chart/proxies/templates/proxies.yaml b/chart/proxies/templates/proxies.yaml new file mode 100644 index 0000000000..33003c34fc --- /dev/null +++ b/chart/proxies/templates/proxies.yaml @@ -0,0 +1,34 @@ +{{- if .Values.proxies }} +--- +apiVersion: v1 +kind: Secret +metadata: + name: proxies + namespace: {{ .Values.crawler_namespace | default "crawlers" }} +type: Opaque +stringData: +{{- range .Values.proxies }} + +{{- if .ssh_private_key }} + {{ .id }}-private-key: | +{{ .ssh_private_key | indent 4 }} +{{- end }} + +{{- if .ssh_host_public_key }} + {{ .id }}-known-hosts: | +{{ .ssh_host_public_key | indent 4 }} +{{- end }} + +{{- end }} +--- +apiVersion: v1 +kind: Secret +metadata: + name: ops-proxy-configs + namespace: {{ .Release.Namespace }} + +type: Opaque +data: + crawler_proxies_last_update: {{ now | unixEpoch | toString | b64enc | quote }} + crawler_proxies.json: {{ .Values.proxies | toJson | b64enc | quote }} +{{- end }} diff --git a/chart/proxies/values.yaml b/chart/proxies/values.yaml new file mode 100644 index 0000000000..f0284e6c16 --- /dev/null +++ b/chart/proxies/values.yaml @@ -0,0 +1,2 @@ +proxies: [] # see proxies description in main helm chart +crawler_namespace: crawlers # namespace to deploy ssh keys to diff --git a/chart/templates/backend.yaml b/chart/templates/backend.yaml index a1577288ed..8f96fdd24f 100644 --- a/chart/templates/backend.yaml +++ b/chart/templates/backend.yaml @@ -43,6 +43,11 @@ spec: secret: secretName: ops-configs + - name: ops-proxy-configs + secret: + secretName: ops-proxy-configs + optional: true + - name: app-templates configMap: name: app-templates @@ -97,6 +102,9 @@ spec: - name: ops-configs mountPath: /ops-configs/ + - name: ops-proxy-configs + mountPath: /ops-proxy-configs/ + - name: app-templates mountPath: /app/btrixcloud/templates/ @@ -171,6 +179,9 @@ spec: - name: ops-configs mountPath: /ops-configs/ + - name: ops-proxy-configs + mountPath: /ops-proxy-configs/ + - name: app-templates mountPath: /app/btrixcloud/templates/ diff --git a/chart/templates/configmap.yaml b/chart/templates/configmap.yaml index 265850e3d4..f876ae745c 100644 --- a/chart/templates/configmap.yaml +++ b/chart/templates/configmap.yaml @@ -54,6 +54,11 @@ data: CRAWLER_CHANNELS_JSON: "/ops-configs/crawler_channels.json" + CRAWLER_PROXIES_LAST_UPDATE: "/ops-proxy-configs/crawler_proxies_last_update" + CRAWLER_PROXIES_JSON: "/ops-proxy-configs/crawler_proxies.json" + + DEFAULT_PROXY_ID: "{{ .Values.default_proxy }}" + MIN_QA_CRAWLER_IMAGE: "{{ .Values.min_qa_crawler_image }}" NUM_BROWSERS: "{{ .Values.crawler_browser_instances }}" @@ -140,9 +145,9 @@ data: crawler_socks_proxy_host: "{{ .Values.crawler_socks_proxy_host }}" crawler_socks_proxy_port: "{{ .Values.crawler_socks_proxy_port }}" - crawler_uid: "{{ .Values.crawler_uid | default 201400007 }}" - crawler_gid: "{{ .Values.crawler_gid | default 201400007 }}" - crawler_fsgroup: "{{ .Values.crawler_fsgroup | default 201400007 }}" + crawler_uid: "{{ .Values.crawler_uid | default 201407 }}" + crawler_gid: "{{ .Values.crawler_gid | default 201407 }}" + crawler_fsgroup: "{{ .Values.crawler_fsgroup | default 201407 }}" profile_browser_workdir_size: "{{ .Values.profile_browser_workdir_size | default "4Gi" }}" diff --git a/chart/templates/secrets.yaml b/chart/templates/secrets.yaml index 4ee89e9f50..7c972b1a46 100644 --- a/chart/templates/secrets.yaml +++ b/chart/templates/secrets.yaml @@ -33,7 +33,6 @@ data: storages.json: {{ .Values.storages | toJson | b64enc | quote }} crawler_channels.json: {{ .Values.crawler_channels | toJson | b64enc | quote }} - {{- range $storage := .Values.storages }} --- apiVersion: v1 @@ -60,3 +59,22 @@ stringData: STORE_S3_PROVIDER: {{ $storage.s3_provider | default "Other" }} {{- end }} + +--- +apiVersion: v1 +kind: Secret +metadata: + name: force-user-and-group-name + namespace: {{ .Values.crawler_namespace }} +type: Opaque +stringData: + + # slightly hacky: override /etc/passwd and /etc/group in crawler + # this is needed to be able to use ssh to use proxies + passwd: | + root:x:0:0:root:/root:/bin/bash + btrix:btrix:{{ .Values.crawler_uid | default 201407 }}:{{ .Values.crawler_gid | default 201407 }}::/tmp/btrix:/bin/sh + + group: | + root:x:0: + btrix:x:{{ .Values.crawler_gid | default 201407 }}: diff --git a/chart/values.yaml b/chart/values.yaml index 5a092736e4..7d07cd05a8 100644 --- a/chart/values.yaml +++ b/chart/values.yaml @@ -310,14 +310,37 @@ crawler_session_time_limit_seconds: 18000 crawler_liveness_port: 6065 -# optional: use socks5 proxy for crawler and profilebrowser -# crawler_socks_proxy_host: 192.0.2.1 -# crawler_socks_proxy_port: 9050 +# optional: use this proxy by default, when no other proxy is set for the crawl +# must match one of the proxy ids in the 'btrix-proxies.proxies' list +# will set the proxy to shared +# default_proxy: "proxy-id" + +# optional: enable the proxies subchart and configure a list of ssh servers to be used as crawler proxies +btrix-proxies: + enabled: false # enable to deploy proxies configmap and secret + crawler_namespace: "crawlers" + proxies: [] + # - id: proxy-id # name of the proxy, is shown in the dropdown, has to be lowercase, alphanumeric, can contain dashes + # url: # proxy connection string, must be a ssh://, socks:// or http:// URL + # label: "US Proxy" # label to show in dropdown + # country_code: US # Alpha-2 ISO 3991 country code, https://www.iso.org/obp/ui/#search + # description: "Proxy" # optional: description to show for the proxy + # shared: false # optional: set to true, to make proxy availble for all orgs + # ssh_private_key: | # requred for ssh:// proxies + # # ssh-key needed to connect to the SSH server + # + # + # ssh_host_public_key: | # optional, for ssh:// proxies-only + # # ssh public keys of the SSH server + # # use output of `ssh-keyscan $hostname -p $port` for best results + # example.invalid:22 SSH-2.0-OpenSSH_9.6p1 Ubuntu-3ubuntu13 + # example.invalid ssh-rsa AAA[..] # optional: set the uid, gid and fsgroup for the crawler and profilebrowser pods -# crawler_uid: 201400007 -# crawler_gid: 201400007 -# crawler_fsgroup: 201400007 +# the following values are used by default: +# crawler_uid: 201407 +# crawler_gid: 201407 +# crawler_fsgroup: 201407 # optional: enable/disable crawler network policy diff --git a/docs/deploy/customization.md b/docs/deploy/customization.md index e48f9e84f9..8fc717a2bd 100644 --- a/docs/deploy/customization.md +++ b/docs/deploy/customization.md @@ -149,4 +149,4 @@ Browsertrix has the ability to cryptographically sign WACZ files with [Authsign] ## Enable Open Registration -You can enable sign-ups by setting `registration_enabled` to `"1"`. Once enabled, your users can register by visiting `/sign-up`. \ No newline at end of file +You can enable sign-ups by setting `registration_enabled` to `"1"`. Once enabled, your users can register by visiting `/sign-up`. diff --git a/docs/deploy/index.md b/docs/deploy/index.md index 3ea0077a74..e0693e4042 100644 --- a/docs/deploy/index.md +++ b/docs/deploy/index.md @@ -13,6 +13,6 @@ The main requirements for Browsertrix are: - A Kubernetes Cluster - [Helm 3](https://helm.sh/) (package manager for Kubernetes) -We have prepared a [Local Deployment Guide](local.md) which covers several options for testing Browsertrix locally on a single machine, as well as a [Production (Self-Hosted and Cloud) Deployment](remote.md) guide to help with setting up Browsertrix in different production scenarios. Information about configuring storage, crawler channels, and other details in local or production deployments is in the [Customizing Browsertrix Deployment Guide](customization.md). +We have prepared a [Local Deployment Guide](local.md) which covers several options for testing Browsertrix locally on a single machine, as well as a [Production (Self-Hosted and Cloud) Deployment](remote.md) guide to help with setting up Browsertrix in different production scenarios. Information about configuring storage, crawler channels, and other details in local or production deployments is in the [Customizing Browsertrix Deployment Guide](customization.md). Information about configuring proxies to use with Browsertrix can be found in the [Configuring Proxies](proxies.md) guide. Details on managing org export and import for existing clusters can be found in the [Org Import & Export](admin/org-import-export.md) guide. diff --git a/docs/deploy/proxies.md b/docs/deploy/proxies.md new file mode 100644 index 0000000000..fe12045318 --- /dev/null +++ b/docs/deploy/proxies.md @@ -0,0 +1,157 @@ +# Configuring Proxies + +Browsertrix can be configured to direct crawling traffic through dedicated proxy servers, so that websites can be crawled from a specific geographic location regardless of where Browsertrix itself is deployed. + +The Browsertrix superadmin can configure which proxy servers are available to which organizations (or if they are shared for all organizations) and users can choose from one of the available proxies in each crawl workflow. Users can also configure the default crawling proxy that will be used for the organization in organization-wide [Crawling Defaults](/user-guide/org-settings/#crawling-defaults). + +This guide covers how to set up proxy servers for use with Browsertrix, as well as how to configure Browsertrix to make those proxies available. + +## Proxy Configuration + +Browsertrix supports crawling through HTTP and SOCKS5 proxies, including through a SOCKS5 proxy over an SSH tunnel. For more information on what is supported in the underlying Browsertrix Crawler, see the [Browsertrix Crawler documentation](https://crawler.docs.browsertrix.com/user-guide/proxies/). + +### Obtain an SSH Key-pair + +To set up a proxy server to use with Browsertrix as SOCKS5 over SSH, you will need an SSH public key-pair and: +- The SSH public key configured on the remote machine +- The SSH private key configured in Browsertrix +- The public host key of the remote machine configured in Browsertrix (optional) + +We recommend creating a dedicated SSH key-pair (we recommend an ECDSA key-pair) for use with Browsertrix, as well as a dedicated user, eg. `proxy-user`, and not reusing existing keys or users. + +For basic information on how to create a key-pair using `ssh-keygen`, see existing guides such as [this one from DigitalOcean](https://www.digitalocean.com/community/tutorials/how-to-configure-ssh-key-based-authentication-on-a-linux-server) or [this one from ssh.com](https://www.ssh.com/academy/ssh/keygen) + +We recommend securing the SSH connection for the proxy user to contain the following settings. This can be done by adding a file +such as `/etc/ssh/sshd_config.d/99-ssh-proxy.conf` where `proxy-user` is the user connecting to the machine. + + +``` +Match User proxy-user + AllowTcpForwarding yes + X11Forwarding no + AllowAgentForwarding no + ForceCommand /bin/false + PubkeyAuthentication yes + PasswordAuthentication no +``` + +## Browsertrix Configuration + +Proxies are configured in Browsertrix through a separate subchart, and can be configured in the `btrix-proxies` section of the main Helm chart (or local override file) for the Browsertrix deployment. Alternatively, they can be [configured as a separate subchart](#deploying-with-proxies-via-subchart) + +The proxy configuration will look like this, containing one or more proxy declarations. + +```yaml +#default_proxy: + +btrix-proxies: + enabled: true + proxies: + - id: proxy-id-1 + shared: true + label: My Proxy + description: Proxy hosted in for Browsertrix + country_code: US + url: ssh://proxy-user@ssh-proxy-host + ssh_host_public_key: + ssh_private_key: + + - id: proxy-id-2 + shared: false + label: My SOCKS5 proxy + country_code: DE + url: socks5://username:password@proxy-host + ... +``` + + +First, set `enabled` to `true`, which will enable this proxies in Browsertrix. + +Next, provide the details of each proxy server that you want available within Browsertrix in the `proxies` list. Minimally, the `id`, `url` connection string, `label` name, and `country_code` two-letter country code must be set for each proxy. + +### SSH Proxies + +For SSH proxy servers,The `url` should be of the form `ssh://proxy-user@ssh-proxy-host`. + +The `ssh_private_key` is required and is the private key of the key-pair created above. + +The `ssh_host_public_key` is recommended to help ensure a secure connection and can often be obtained by running: `ssh-keyscan dev.proxy-host -p 22` on the remote machine, assuming default SSH setup and hostname of `proxy-host`. + +Only key-based auth is supported for SSH proxies, password-based authentication is not supported. + +### SOCKS5 Proxies + +For SOCKS5 proxies, the `url` should be of the form `socks5://username:password@socks-proxy-host`. + +This method is to be used with dedicated SOCKS5 proxies (not over SSH), such as existing services that provide this feature. + +### Shared Proxies + +The `shared` field on each proxy object defines if this proxy should be accessible to all organizations in a Browsertrix deployment +that are allowed to access shared proxy. If false, the proxy must be added directly to each organization that will have access to the proxy. + +The proxy settings can be be configured in the super-admin UI by clicking on the 'Edit Proxies...' next to each organization. + +### Default Proxy + +The `default_proxy` field in the root of the Helm values file can optionally be set to the id for one of the available proxies list. If set, the default proxy will be used for all crawls that do not have an alternate proxy set in the workflow configuration. This can be useful if Browsertrix is deployed on a private network and requires a proxy to access the outside world. + +This is a deployment-wide setting and is not shown to users, and is designed for admins to route all traffic through a designated proxy. Browsertrix will fail to start if the default proxy is not listed in the available proxies. + +## Deployment + +If `btrix-proxies` have been set in the main Helm chart or a local override file for your Browsertrix deployment, proxies will be enabled on next deploy of the Browsertrix helm chart. For instance, if the proxy configuration is located in a local override file `local.yaml`, you can use the following Helm command to redeploy Browsertrix with the proxy configuration: + +```sh +helm upgrade --install -f ./chart/values.yaml -f ./chart/local.yaml btrix ./chart/ +``` + +### Deploying with Proxies via Subchart + +Alternatively, the proxies can also be configured with a separate proxies sub-chart. + +This allows for updating proxies without having to redeploy all of Browsertrix. + +A separate proxies YAML file should contain just the `proxies` key: + +```yaml +proxies: + - id: proxy-id-1 + shared: true + label: My Proxy + description: Proxy hosted in for Browsertrix + country_code: US + url: ssh://proxy-user@ssh-proxy-host + ssh_host_public_key: + ssh_private_key: + + - id: proxy-id-2 + shared: false + label: My SOCKS5 proxy + country_code: DE + url: socks5://username:password@proxy-host +``` + + +If the above YAML is placed in `proxies.yaml`, the subchart can be deployed with + +```sh +helm upgrade --install -f ./chart/proxies.yaml proxies ./chart/proxies/ +``` + +The proxies can be updated without redeploying all of Browsertrix, and Browsertrix will pick up the updated proxies. + +### GitHub Release for Subchart + +The above layout assumes a local copy of Browsertrix repo. + +The proxies subchart can also be deployed from the latest GitHub release via: + +```sh +helm upgrade --install proxies https://github.com/webrecorder/browsertrix/releases/download/RELEASE/btrix-proxies-VERSION.tgz +``` + +where `RELEASE` are the Browsertrix release and the `VERSION` is the version of the proxies chart. + +See the [Browsertrix releases page](https://github.com/webrecorder/browsertrix/releases) for the latest available versions. + diff --git a/docs/user-guide/workflow-setup.md b/docs/user-guide/workflow-setup.md index a3fb93d8e0..3472a45d48 100644 --- a/docs/user-guide/workflow-setup.md +++ b/docs/user-guide/workflow-setup.md @@ -213,6 +213,10 @@ Sets the browser's [user agent](https://developer.mozilla.org/en-US/docs/Web/HTT Sets the browser's language setting. Useful for crawling websites that detect the browser's language setting and serve content accordingly. +### Proxy + +Sets the proxy server that [Browsertrix Crawler](https://github.com/webrecorder/browsertrix-crawler) will direct traffic through while crawling. When a proxy is selected, crawled websites will see traffic as coming from the IP address of the proxy rather than where the Browsertrix Crawler node is deployed. + ## Scheduling Automatically start crawls periodically on a daily, weekly, or monthly schedule. diff --git a/frontend/src/__generated__/locale-codes.ts b/frontend/src/__generated__/locale-codes.ts index 28be186a43..1a0d48c560 100644 --- a/frontend/src/__generated__/locale-codes.ts +++ b/frontend/src/__generated__/locale-codes.ts @@ -10,9 +10,14 @@ export const sourceLocale = `en`; * The other locale codes that this application is localized into. Sorted * lexicographically. */ -export const targetLocales = [] as const; +export const targetLocales = [ + `es`, +] as const; /** * All valid project locale codes. Sorted lexicographically. */ -export const allLocales = [`en`] as const; +export const allLocales = [ + `en`, + `es`, +] as const; diff --git a/frontend/src/components/orgs-list.ts b/frontend/src/components/orgs-list.ts index e10d37f2be..faa421204a 100644 --- a/frontend/src/components/orgs-list.ts +++ b/frontend/src/components/orgs-list.ts @@ -2,20 +2,24 @@ import { localized, msg, str } from "@lit/localize"; import type { SlButton, SlChangeEvent, + SlCheckbox, SlInput, + SlMenuItem, } from "@shoelace-style/shoelace"; import { serialize } from "@shoelace-style/shoelace/dist/utilities/form.js"; import { css, html, nothing } from "lit"; -import { customElement, property, query } from "lit/decorators.js"; +import { customElement, property, query, state } from "lit/decorators.js"; import { when } from "lit/directives/when.js"; import { BtrixElement } from "@/classes/BtrixElement"; import type { Dialog } from "@/components/ui/dialog"; +import type { ProxiesAPIResponse, Proxy } from "@/types/crawler"; import { formatNumber, getLocale } from "@/utils/localization"; import type { OrgData } from "@/utils/orgs"; /** * @fires update-quotas + * @fires update-proxies */ @localized() @customElement("btrix-orgs-list") @@ -35,9 +39,15 @@ export class OrgsList extends BtrixElement { @property({ type: Object }) currOrg?: OrgData | null = null; + @state() + private allProxies?: Proxy[]; + @query("#orgQuotaDialog") private readonly orgQuotaDialog?: Dialog | null; + @query("#orgProxiesDialog") + private readonly orgProxiesDialog?: Dialog | null; + @query("#orgReadOnlyDialog") private readonly orgReadOnlyDialog?: Dialog | null; @@ -79,8 +89,8 @@ export class OrgsList extends BtrixElement { - ${this.renderOrgQuotas()} ${this.renderOrgReadOnly()} - ${this.renderOrgDelete()} + ${this.renderOrgQuotas()} ${this.renderOrgProxies()} + ${this.renderOrgReadOnly()} ${this.renderOrgDelete()} `; } @@ -139,6 +149,69 @@ export class OrgsList extends BtrixElement { `; } + private renderOrgProxies() { + return html` + (this.currOrg = null)} + @sl-show=${() => { + void this.fetchAllProxies(); + }} + > + ${msg("Enable all shared proxies")} + + + Enable selected shared proxies + ${this.allProxies + ?.filter((server) => server.shared) + .map( + (server) => + html` + ${server.id}: ${server.label} + `, + )} + + Enable selected private proxies + + ${this.allProxies + ?.filter((server) => !server.shared) + .map( + (server) => + html` + ${server.id}: ${server.label} + `, + )} + + +
+ ${msg("Update Proxy Settings")} + +
+
+ `; + } + private renderOrgReadOnly() { return html` ( + `/orgs/all/crawlconfigs/crawler-proxies`, + ); + this.allProxies = data.servers; + } catch (e) { + console.debug(e); + + this.notify.toast({ + message: msg("Sorry, couldn't get all proxies at this time."), + variant: "danger", + icon: "exclamation-octagon", + }); + } + } private async deleteOrg(org: OrgData) { try { await this.api.fetch(`/orgs/${org.id}`, { @@ -537,6 +657,15 @@ export class OrgsList extends BtrixElement { ${msg("Edit Quotas")} + { + this.currOrg = org; + void this.orgProxiesDialog?.show(); + }} + > + + ${msg("Edit Proxies")} + ${org.readOnly ? html`
diff --git a/frontend/src/components/ui/index.ts b/frontend/src/components/ui/index.ts index efc9a61e41..4ee7958ca1 100644 --- a/frontend/src/components/ui/index.ts +++ b/frontend/src/components/ui/index.ts @@ -32,6 +32,7 @@ import("./relative-duration"); import("./search-combobox"); import("./section-heading"); import("./select-crawler"); +import("./select-crawler-proxy"); import("./table"); import("./tag-input"); import("./tag"); diff --git a/frontend/src/components/ui/select-crawler-proxy.ts b/frontend/src/components/ui/select-crawler-proxy.ts new file mode 100644 index 0000000000..5490c9594e --- /dev/null +++ b/frontend/src/components/ui/select-crawler-proxy.ts @@ -0,0 +1,207 @@ +import { localized, msg } from "@lit/localize"; +import { type SlSelect } from "@shoelace-style/shoelace"; +import { html } from "lit"; +import { customElement, property, state } from "lit/decorators.js"; + +import type { ProxiesAPIResponse, Proxy } from "@/pages/org/types"; +import LiteElement from "@/utils/LiteElement"; + +type SelectCrawlerProxyChangeDetail = { + value: string | null; +}; + +export type SelectCrawlerProxyChangeEvent = + CustomEvent; + +type SelectCrawlerProxyUpdateDetail = { + show: boolean; +}; + +export type SelectCrawlerProxyUpdateEvent = + CustomEvent; + +/** + * Crawler proxy select dropdown + * + * Usage example: + * ```ts + * selectedcrawlerProxy = value} + * > + * ``` + * + * @event on-change + */ +@customElement("btrix-select-crawler-proxy") +@localized() +export class SelectCrawlerProxy extends LiteElement { + @property({ type: String }) + proxyId: string | null = null; + + @state() + private selectedProxy?: Proxy; + + @state() + private defaultProxy?: Proxy; + + @state() + private allProxies?: Proxy[]; + + protected firstUpdated() { + void this.fetchOrgProxies(); + } + // credit: https://dev.to/jorik/country-code-to-flag-emoji-a21 + private countryCodeToFlagEmoji(countryCode: String): String { + return countryCode + .toUpperCase() + .split("") + .map((char) => String.fromCodePoint(char.charCodeAt(0) + 127397)) + .join(""); + } + + render() { + /*if (this.crawlerProxys && this.crawlerProxys.length < 2) { + return html``; + }*/ + + return html` + { + // Refetch to keep list up to date + void this.fetchOrgProxies(); + }} + @sl-hide=${this.stopProp} + @sl-after-hide=${this.stopProp} + > + ${this.allProxies?.map( + (server) => + html` + ${server.country_code + ? html` + ${this.countryCodeToFlagEmoji(server.country_code)} + ` + : ""} + ${server.label} + `, + )} + ${this.selectedProxy + ? html` +
+ ${msg("Description:")} + ${this.selectedProxy.description || ""} +
+ ` + : ``} + ${!this.selectedProxy && this.defaultProxy + ? html` +
+ ${msg("Description:")} + ${this.defaultProxy.description || ""} +
+ ` + : ``} +
+ `; + } + + private onChange(e: Event) { + this.stopProp(e); + + this.selectedProxy = this.allProxies?.find( + ({ id }) => id === (e.target as SlSelect).value, + ); + + if (!this.selectedProxy) { + this.proxyId = null; + } + + this.dispatchEvent( + new CustomEvent("on-change", { + detail: { + value: this.selectedProxy ? this.selectedProxy.id : null, + }, + }), + ); + } + + /** + * Fetch crawler proxies and update internal state + */ + private async fetchOrgProxies(): Promise { + try { + const data = await this.getOrgProxies(); + const defaultProxyId = data.default_proxy_id; + + this.allProxies = data.servers; + + if (!this.defaultProxy) { + this.defaultProxy = this.allProxies.find( + ({ id }) => id === defaultProxyId, + ); + } + + if (this.proxyId && !this.selectedProxy?.id) { + this.selectedProxy = this.allProxies.find( + ({ id }) => id === this.proxyId, + ); + } + + if (!this.selectedProxy) { + this.proxyId = null; + this.dispatchEvent( + new CustomEvent("on-change", { + detail: { + value: null, + }, + }), + ); + this.selectedProxy = this.allProxies.find( + ({ id }) => id === this.proxyId, + ); + } + + this.dispatchEvent( + new CustomEvent("on-update", { + detail: { + show: this.allProxies.length > 1, + }, + }), + ); + } catch (e) { + this.notify({ + message: msg("Sorry, couldn't retrieve proxies at this time."), + variant: "danger", + icon: "exclamation-octagon", + }); + } + } + + private async getOrgProxies(): Promise { + return this.apiFetch( + `/orgs/${this.orgId}/crawlconfigs/crawler-proxies`, + ); + } + + /** + * Stop propgation of sl-select events. + * Prevents bug where sl-dialog closes when dropdown closes + * https://github.com/shoelace-style/shoelace/issues/170 + */ + private stopProp(e: Event) { + e.stopPropagation(); + } +} diff --git a/frontend/src/components/ui/select-crawler.ts b/frontend/src/components/ui/select-crawler.ts index cce6a27383..4026f9cb73 100644 --- a/frontend/src/components/ui/select-crawler.ts +++ b/frontend/src/components/ui/select-crawler.ts @@ -30,7 +30,6 @@ type CrawlerChannelsAPIResponse = { * Usage example: * ```ts * selectedCrawler = value} * > * ``` diff --git a/frontend/src/features/browser-profiles/new-browser-profile-dialog.ts b/frontend/src/features/browser-profiles/new-browser-profile-dialog.ts index c1ac73f40f..62ef3844bb 100644 --- a/frontend/src/features/browser-profiles/new-browser-profile-dialog.ts +++ b/frontend/src/features/browser-profiles/new-browser-profile-dialog.ts @@ -11,6 +11,7 @@ import queryString from "query-string"; import type { Dialog } from "@/components/ui/dialog"; import { type SelectCrawlerChangeEvent } from "@/components/ui/select-crawler"; +import { type SelectCrawlerProxyChangeEvent } from "@/components/ui/select-crawler-proxy"; import LiteElement, { html } from "@/utils/LiteElement"; @localized() @@ -25,6 +26,9 @@ export class NewBrowserProfileDialog extends LiteElement { @state() private crawlerChannel = "default"; + @state() + private proxyId: string | null = null; + @query("btrix-dialog") private readonly dialog?: Dialog; @@ -79,6 +83,14 @@ export class NewBrowserProfileDialog extends LiteElement { (this.crawlerChannel = e.detail.value!)} > +
+ + (this.proxyId = e.detail.value!)} + > +
@@ -126,6 +138,7 @@ export class NewBrowserProfileDialog extends LiteElement { const data = await this.createBrowser({ url: url, crawlerChannel: this.crawlerChannel, + proxyId: this.proxyId, }); this.notify({ @@ -141,6 +154,7 @@ export class NewBrowserProfileDialog extends LiteElement { url, name: msg("My Profile"), crawlerChannel: this.crawlerChannel, + proxyId: this.proxyId, })}`, ); } catch (e) { @@ -156,13 +170,16 @@ export class NewBrowserProfileDialog extends LiteElement { private async createBrowser({ url, crawlerChannel, + proxyId, }: { url: string; crawlerChannel: string; + proxyId: string | null; }) { const params = { url, crawlerChannel, + proxyId, }; return this.apiFetch<{ browserid: string }>( diff --git a/frontend/src/features/browser-profiles/profile-browser.ts b/frontend/src/features/browser-profiles/profile-browser.ts index e3b06723b8..24f755117b 100644 --- a/frontend/src/features/browser-profiles/profile-browser.ts +++ b/frontend/src/features/browser-profiles/profile-browser.ts @@ -27,7 +27,6 @@ export type BrowserConnectionChange = { * Usage example: * ```ts * selectedProfile = value} * > * ``` @@ -115,6 +114,12 @@ export class SelectBrowserProfile extends LiteElement { minute="2-digit" > + ${this.selectedProfile.proxyId + ? html` + ${msg("Using proxy: ")} + ${this.selectedProfile.proxyId} + ` + : ``} { }, }; }; + function getLocalizedWeekDays() { const now = new Date(); // TODO accept locale from locale-picker @@ -1320,6 +1321,17 @@ https://archiveweb.page/images/${"logo.svg"}`} > `)} ${this.renderHelpTextCol(infoTextStrings["browserProfile"])} + ${inputCol(html` + + this.updateFormState({ + proxyId: e.detail.value, + })} + > + `)} + ${this.renderHelpTextCol(infoTextStrings["proxyId"])} ${inputCol(html`
@@ -412,6 +413,18 @@ export class Home extends LiteElement { }); } + async onUpdateOrgProxies(e: CustomEvent) { + const org = e.detail as OrgData; + + await this.apiFetch(`/orgs/${org.id}/proxies`, { + method: "POST", + body: JSON.stringify({ + allowSharedProxies: org.allowSharedProxies, + allowedProxies: org.allowedProxies, + }), + }); + } + async checkFormValidity(formEl: HTMLFormElement) { await this.updateComplete; return !formEl.querySelector("[data-invalid]"); diff --git a/frontend/src/pages/org/browser-profiles-detail.ts b/frontend/src/pages/org/browser-profiles-detail.ts index ce84f4bf50..045e0552d2 100644 --- a/frontend/src/pages/org/browser-profiles-detail.ts +++ b/frontend/src/pages/org/browser-profiles-detail.ts @@ -24,7 +24,6 @@ const DESCRIPTION_MAXLENGTH = 500; * Usage: * ```ts * * ``` @@ -586,6 +585,7 @@ export class BrowserProfilesDetail extends BtrixElement { description: this.profile.description.slice(0, DESCRIPTION_MAXLENGTH), profileId: this.profile.id, crawlerChannel: this.profile.crawlerChannel, + proxyId: this.profile.proxyId, })}`, ); } catch (e) { diff --git a/frontend/src/pages/org/browser-profiles-list.ts b/frontend/src/pages/org/browser-profiles-list.ts index af2471782e..b86ac37c7f 100644 --- a/frontend/src/pages/org/browser-profiles-list.ts +++ b/frontend/src/pages/org/browser-profiles-list.ts @@ -33,7 +33,6 @@ const INITIAL_PAGE_SIZE = 20; * Usage: * ```ts * * ``` */ diff --git a/frontend/src/pages/org/browser-profiles-new.ts b/frontend/src/pages/org/browser-profiles-new.ts index f6f1c4bcc0..fed4dc744f 100644 --- a/frontend/src/pages/org/browser-profiles-new.ts +++ b/frontend/src/pages/org/browser-profiles-new.ts @@ -14,7 +14,6 @@ import { isApiError } from "@/utils/api"; * Usage: * ```ts * * ``` @@ -33,9 +32,11 @@ export class BrowserProfilesNew extends BtrixElement { crawlerChannel?: string; profileId?: string | null; navigateUrl?: string; + proxyId: string | null; } = { name: "", url: "", + proxyId: null, }; @state() @@ -279,9 +280,11 @@ export class BrowserProfilesNew extends BtrixElement { } const crawlerChannel = this.browserParams.crawlerChannel || "default"; + const proxyId = this.browserParams.proxyId; const data = await this.createBrowser({ url, crawlerChannel, + proxyId, }); this.navigate.to( @@ -291,6 +294,7 @@ export class BrowserProfilesNew extends BtrixElement { url, name: this.browserParams.name || msg("My Profile"), crawlerChannel, + proxyId, })}`, ); } @@ -305,6 +309,7 @@ export class BrowserProfilesNew extends BtrixElement { name: formData.get("name"), description: formData.get("description"), crawlerChannel: this.browserParams.crawlerChannel, + proxyId: this.browserParams.proxyId, }; try { @@ -352,13 +357,16 @@ export class BrowserProfilesNew extends BtrixElement { private async createBrowser({ url, crawlerChannel, + proxyId, }: { url: string; crawlerChannel: string; + proxyId: string | null; }) { const params = { url, crawlerChannel, + proxyId, }; return this.api.fetch<{ browserid: string }>( diff --git a/frontend/src/pages/org/index.ts b/frontend/src/pages/org/index.ts index e701aec0df..d9defdb7d3 100644 --- a/frontend/src/pages/org/index.ts +++ b/frontend/src/pages/org/index.ts @@ -72,6 +72,7 @@ export type OrgParams = { crawlerChannel?: string; profileId?: string; navigateUrl?: string; + proxyId?: string; }; collections: ArchivedItemPageParams & { collectionTab?: string; @@ -565,6 +566,7 @@ export class Org extends LiteElement { crawlerChannel: params.crawlerChannel, profileId: params.profileId, navigateUrl: params.navigateUrl, + proxyId: params.proxyId, }} >`; } diff --git a/frontend/src/pages/org/settings/components/crawling-defaults.ts b/frontend/src/pages/org/settings/components/crawling-defaults.ts index ada0e91075..7000230a79 100644 --- a/frontend/src/pages/org/settings/components/crawling-defaults.ts +++ b/frontend/src/pages/org/settings/components/crawling-defaults.ts @@ -195,6 +195,10 @@ export class OrgSettingsCrawlWorkflows extends BtrixElement { size="small" > `, + proxyId: html` `, crawlerChannel: html` v) || [], diff --git a/frontend/src/pages/org/workflow-detail.ts b/frontend/src/pages/org/workflow-detail.ts index c14b54428e..81e52c07b9 100644 --- a/frontend/src/pages/org/workflow-detail.ts +++ b/frontend/src/pages/org/workflow-detail.ts @@ -1607,6 +1607,10 @@ export class WorkflowDetail extends LiteElement { } else { message = msg("You do not have permission to run crawls."); } + } else if (isApiError(e) && e.details == "proxy_not_found") { + message = msg( + "Your org doesn't have permission to use the proxy configured for this crawl.", + ); } this.notify({ message: message, diff --git a/frontend/src/pages/org/workflows-list.ts b/frontend/src/pages/org/workflows-list.ts index ffc015e779..f9d2891dc1 100644 --- a/frontend/src/pages/org/workflows-list.ts +++ b/frontend/src/pages/org/workflows-list.ts @@ -842,6 +842,10 @@ export class WorkflowsList extends LiteElement { } else { message = msg("You do not have permission to run crawls."); } + } else if (isApiError(e) && e.details == "proxy_not_found") { + message = msg( + "Your org doesn't have permission to use the proxy configured for this crawl.", + ); } this.notify({ message: message, diff --git a/frontend/src/pages/org/workflows-new.ts b/frontend/src/pages/org/workflows-new.ts index c19544c62d..5bf98627e8 100644 --- a/frontend/src/pages/org/workflows-new.ts +++ b/frontend/src/pages/org/workflows-new.ts @@ -35,6 +35,7 @@ const defaultValue = { scale: 1, autoAddCollections: [], crawlerChannel: "default", + proxyId: null, } as WorkflowParams; /** @@ -118,6 +119,7 @@ export class WorkflowsNew extends LiteElement { crawlTimeout: org.crawlingDefaults?.crawlTimeout, maxCrawlSize: org.crawlingDefaults?.maxCrawlSize, crawlerChannel: org.crawlingDefaults?.crawlerChannel, + proxyId: org.crawlingDefaults?.proxyId, }, this.initialWorkflow || {}, ); diff --git a/frontend/src/strings/crawl-workflows/infoText.ts b/frontend/src/strings/crawl-workflows/infoText.ts index 28c59eb200..bcc0721389 100644 --- a/frontend/src/strings/crawl-workflows/infoText.ts +++ b/frontend/src/strings/crawl-workflows/infoText.ts @@ -58,6 +58,7 @@ const infoText: Partial> = { ), lang: msg(`Websites that observe the browser’s language setting may serve content in that language if available.`), + proxyId: msg(`Choose a proxy to crawl through`), }; export default infoText; diff --git a/frontend/src/types/crawler.ts b/frontend/src/types/crawler.ts index 128157114b..5ec383204c 100644 --- a/frontend/src/types/crawler.ts +++ b/frontend/src/types/crawler.ts @@ -54,6 +54,7 @@ export type WorkflowParams = { description: string | null; autoAddCollections: string[]; crawlerChannel: string; + proxyId: string | null; }; export type CrawlConfig = WorkflowParams & { @@ -124,6 +125,7 @@ export type Profile = { replicas: ProfileReplica[] | null; }; crawlerChannel?: string; + proxyId?: string; }; // TODO maybe convert this to an enum? @@ -196,6 +198,19 @@ export type CrawlerChannel = { image: string; }; +export type Proxy = { + id: string; + label: string; + country_code: string | null; + description: string | null; + shared: boolean; +}; + +export type ProxiesAPIResponse = { + default_proxy_id: string | null; + servers: Proxy[]; +}; + export type ArchivedItem = Crawl | Upload; export type ArchivedItemPageComment = { diff --git a/frontend/src/types/org.ts b/frontend/src/types/org.ts index c2db5f8b5b..44e134e8b3 100644 --- a/frontend/src/types/org.ts +++ b/frontend/src/types/org.ts @@ -42,6 +42,7 @@ export const crawlingDefaultsSchema = z.object({ blockAds: z.boolean().optional(), profileid: z.string().optional(), crawlerChannel: z.string().optional(), + proxyId: z.string().optional(), lang: z.string().optional(), userAgent: z.string().optional(), exclude: z.array(z.string()), @@ -90,6 +91,8 @@ export const orgDataSchema = z.object({ readOnlyOnCancel: z.boolean(), subscription: subscriptionSchema.nullable(), crawlingDefaults: crawlingDefaultsSchema.nullable(), + allowSharedProxies: z.boolean(), + allowedProxies: z.array(z.string()), }); export type OrgData = z.infer; diff --git a/frontend/src/utils/workflow.ts b/frontend/src/utils/workflow.ts index b24763fa09..47340d106d 100644 --- a/frontend/src/utils/workflow.ts +++ b/frontend/src/utils/workflow.ts @@ -86,6 +86,7 @@ export type FormState = { autoscrollBehavior: boolean; userAgent: string | null; crawlerChannel: string; + proxyId: string | null; }; export type FormStateField = keyof FormState; @@ -139,6 +140,7 @@ export const getDefaultFormState = (): FormState => ({ autoscrollBehavior: true, userAgent: null, crawlerChannel: "default", + proxyId: null, }); export const mapSeedToUrl = (arr: Seed[]) => @@ -288,6 +290,7 @@ export function getInitialFormState(params: { params.initialWorkflow.config.userAgent ?? defaultFormState.userAgent, crawlerChannel: params.initialWorkflow.crawlerChannel || defaultFormState.crawlerChannel, + proxyId: params.initialWorkflow.proxyId || defaultFormState.proxyId, ...formState, }; } diff --git a/frontend/xliff/es.xlf b/frontend/xliff/es.xlf index 49673990ef..5b94f86f55 100644 --- a/frontend/xliff/es.xlf +++ b/frontend/xliff/es.xlf @@ -3738,6 +3738,48 @@ In-Page Links + + Choose a proxy to crawl through + + + Your org doesn't have permission to use the proxy configured for this crawl. + + + Proxy + + + Crawler Proxy Server + + + Default Proxy: + + + No Proxy + + + Description: + + + Sorry, couldn't retrieve proxies at this time. + + + Proxy Settings for: + + + Enable all shared proxies + + + Update Proxy Settings + + + Sorry, couldn't get all proxies at this time. + + + Edit Proxies + + + Using proxy: + diff --git a/mkdocs.yml b/mkdocs.yml index d5c2d30910..a930b276a0 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -77,6 +77,7 @@ nav: - deploy/local.md - deploy/remote.md - deploy/customization.md + - deploy/proxies.md - Ansible: - deploy/ansible/digitalocean.md - deploy/ansible/microk8s.md diff --git a/scripts/minikube-build-and-deploy.sh b/scripts/minikube-build-and-deploy.sh new file mode 100644 index 0000000000..259b05c6e0 --- /dev/null +++ b/scripts/minikube-build-and-deploy.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash +CURR=$(dirname "${BASH_SOURCE[0]}") + +eval $(minikube docker-env) +for img in backend frontend; +do + sh "${CURR}/build-${img}.sh" +done + +echo "Deploying helm chart..." +helm upgrade --wait --install -f ./chart/values.yaml -f ./chart/local.yaml btrix ./chart/ + +until kubectl port-forward service/browsertrix-cloud-frontend 8000:80; do + echo "Unable to forward service/browsertrix-cloud-frontend. Retrying.." >&2 + sleep 1 +done diff --git a/scripts/minikube-reset.sh b/scripts/minikube-reset.sh new file mode 100644 index 0000000000..0bad515fa7 --- /dev/null +++ b/scripts/minikube-reset.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash +if [ "$(minikube status | grep -o Running | wc -l)" -lt 3 ]; then + echo "Error: Less than 3 components are running in Minikube" + exit 1 +fi + +if kubectl config get-contexts | grep -q minikube; then + kubectl config set-context minikube + # ~~~ DANGER ZONE ~~~ + echo "Uninstalling helm deployment and deleting pvcs" + helm uninstall btrix + minikube kubectl delete pvc minio-storage-pvc + minikube kubectl delete pvc data-db-local-mongo-0 +fi