Skip to content

Commit

Permalink
update ranker & retreival
Browse files Browse the repository at this point in the history
  • Loading branch information
tottenjordan committed Sep 22, 2023
1 parent 7b2eb09 commit 20018ea
Show file tree
Hide file tree
Showing 33 changed files with 3,432 additions and 9,431 deletions.
83 changes: 66 additions & 17 deletions 00-env-setup.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 4,
"id": "c9bd35d9-6c0b-40b9-873c-5b137c62db33",
"metadata": {},
"outputs": [],
Expand All @@ -117,7 +117,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 5,
"id": "b2833324-082c-470d-916a-db133c2ce53d",
"metadata": {},
"outputs": [
Expand All @@ -137,8 +137,8 @@
"VALID_DIR_PREFIX : valid\n",
"VPC_NETWORK_FULL : projects/934903580331/global/networks/ucaip-haystack-vpc-network\n",
"BQ_DATASET : spotify_e2e_test\n",
"BQ_TABLE_TRAIN : v2_train_flatten_last_5\n",
"BQ_TABLE_VALID : v2_train_flatten_valid_last_5\n",
"BQ_TABLE_TRAIN : train_flatten_last_5\n",
"BQ_TABLE_VALID : train_flatten_valid_last_5\n",
"BQ_TABLE_CANDIDATES : candidates\n",
"REPO_SRC : src\n",
"PIPELINES_SUB_DIR : feature_pipes\n",
Expand Down Expand Up @@ -171,8 +171,8 @@
"\n",
"# BigQuery parameters\n",
"BQ_DATASET = 'spotify_e2e_test'\n",
"BQ_TABLE_TRAIN = 'v2_train_flatten_last_5'\n",
"BQ_TABLE_VALID = 'v2_train_flatten_valid_last_5'\n",
"BQ_TABLE_TRAIN = 'train_flatten_last_5'\n",
"BQ_TABLE_VALID = 'train_flatten_valid_last_5'\n",
"BQ_TABLE_CANDIDATES = 'candidates'\n",
"\n",
"# repo\n",
Expand Down Expand Up @@ -243,7 +243,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 6,
"id": "3c69c1a3-ee9a-4b94-b67a-f38b05b60eef",
"metadata": {},
"outputs": [],
Expand All @@ -263,7 +263,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 7,
"id": "01b46ec0-998e-42f2-8aa2-ec907ec2838c",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -307,8 +307,8 @@
"VPC_NETWORK_FULL = \"projects/934903580331/global/networks/ucaip-haystack-vpc-network\"\n",
"\n",
"BQ_DATASET = \"spotify_e2e_test\"\n",
"BQ_TABLE_TRAIN = \"v2_train_flatten_last_5\"\n",
"BQ_TABLE_VALID = \"v2_train_flatten_valid_last_5\"\n",
"BQ_TABLE_TRAIN = \"train_flatten_last_5\"\n",
"BQ_TABLE_VALID = \"train_flatten_valid_last_5\"\n",
"BQ_TABLE_CANDIDATES = \"candidates\"\n",
"\n",
"REPO_SRC = \"src\"\n",
Expand Down Expand Up @@ -382,7 +382,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 8,
"id": "87901338-bccd-41f0-8aeb-edad2b0a91f7",
"metadata": {},
"outputs": [
Expand All @@ -402,7 +402,7 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 9,
"id": "cad49778-eb36-4b20-9201-3a266a97dbc8",
"metadata": {},
"outputs": [
Expand All @@ -412,6 +412,7 @@
"text": [
"gs://ndr-v1-hybrid-vertex-bucket/config/\n",
"gs://ndr-v1-hybrid-vertex-bucket/data/\n",
"gs://ndr-v1-hybrid-vertex-bucket/endpoint-tests/\n",
"gs://ndr-v1-hybrid-vertex-bucket/local-train-v1/\n",
"gs://ndr-v1-hybrid-vertex-bucket/scale-training-v1/\n",
"gs://ndr-v1-hybrid-vertex-bucket/tfrs-pipe-v1/\n"
Expand Down Expand Up @@ -474,7 +475,7 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 1,
"id": "de5ce181-7f8a-43c2-b292-7d4a52e22adc",
"metadata": {},
"outputs": [
Expand All @@ -499,11 +500,59 @@
"*ARCHIVED*\n",
"# .gcloudignore\n",
"# .git\n",
"# .github\n",
".github\n",
"*__pycache__\n",
"# *cpython-37.pyc\n",
"# .gitignore\n",
"# .DS_Store"
"*cpython-37.pyc\n",
".gitignore\n",
".DS_Store\n",
"\n",
"# Pyhon byte-compiled / optimized files\n",
"__pycache__/\n",
"*.py[cod]\n",
"*$py.class"
]
},
{
"cell_type": "markdown",
"id": "e1c9896e-62c4-470f-8cad-8ed4b646eac3",
"metadata": {},
"source": [
"### Delete `__pycache__` directories"
]
},
{
"cell_type": "markdown",
"id": "2d69e358-638b-4e79-9c0f-51ae7a65d5a8",
"metadata": {},
"source": [
"First run `LIST_CMD` to validate query results"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "e28457c2-43a9-4e38-ac59-9766e816f45b",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"copy these commands into terminal:\n",
"\n",
"find . | grep -E \"(/__pycache__$|\\.pyc$|\\.pyo$)\"\n",
"find . | grep -E \"(/__pycache__$|\\.pyc$|\\.pyo$)\" | xargs rm -rf\n"
]
}
],
"source": [
"LIST_CMD = 'find . | grep -E \"(/__pycache__$|\\.pyc$|\\.pyo$)\"'\n",
"DELETE_CMD = 'find . | grep -E \"(/__pycache__$|\\.pyc$|\\.pyo$)\" | xargs rm -rf'\n",
"\n",
"# set variables if running in terminal\n",
"print(\"copy these commands into terminal:\\n\")\n",
"print(f\"{LIST_CMD}\")\n",
"print(f\"{DELETE_CMD}\")"
]
},
{
Expand Down
Loading

0 comments on commit 20018ea

Please sign in to comment.