Skip to content

Commit

Permalink
update name
Browse files Browse the repository at this point in the history
  • Loading branch information
tottenjordan committed Sep 25, 2023
1 parent 0009283 commit ff60f36
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 62 deletions.
32 changes: 7 additions & 25 deletions 04-custom-train-retrieval.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -476,7 +476,7 @@
"# =================================================\n",
"# trainconfig: Data sources\n",
"# =================================================\n",
"TRAIN_DIR_PREFIX = f'data/{DATA_VERSION}/train' # train\n",
"TRAIN_DIR_PREFIX = f'data/{DATA_VERSION}/valid' # train\n",
"VALID_DIR_PREFIX = f'data/{DATA_VERSION}/valid' \n",
"CANDIDATE_PREFIX = f'data/{DATA_VERSION}/candidates' "
]
Expand Down Expand Up @@ -768,15 +768,15 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 21,
"id": "33a58184-e0c4-4403-ad80-da5fdb7dbaed",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"TB_LOGS_PATH: gs://ndr-v1-hybrid-vertex-bucket/scale-training-v1/run-20230925-145451/logs\n"
"TB_LOGS_PATH: gs://ndr-v1-hybrid-vertex-bucket/scale-training-v1/run-20230925-203116/logs\n"
]
}
],
Expand All @@ -785,15 +785,14 @@
"\n",
"import tensorflow as tf\n",
"\n",
"# TB_LOGS_PATH = f'{BASE_OUTPUT_DIR}/logs'\n",
"TB_LOGS_PATH = \"gs://ndr-v1-hybrid-vertex-bucket/scale-training-v1/run-20230925-145451/logs\"\n",
"TB_LOGS_PATH = f'{BASE_OUTPUT_DIR}/logs'\n",
"\n",
"print(f\"TB_LOGS_PATH: {TB_LOGS_PATH}\")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 24,
"id": "d432d552-4416-4f4c-964f-bcca57b60a07",
"metadata": {},
"outputs": [],
Expand All @@ -804,31 +803,14 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 25,
"id": "171227db-d5f9-47e4-8b67-a528950233ab",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
" <iframe id=\"tensorboard-frame-ac1e82f204c46248\" width=\"100%\" height=\"800\" frameborder=\"0\">\n",
" </iframe>\n",
" <script>\n",
" (function() {\n",
" const frame = document.getElementById(\"tensorboard-frame-ac1e82f204c46248\");\n",
" const url = new URL(\"/proxy/6006/\", window.location);\n",
" const port = 0;\n",
" if (port) {\n",
" url.port = port;\n",
" }\n",
" frame.src = url;\n",
" })();\n",
" </script>\n",
" "
],
"text/plain": [
"<IPython.core.display.HTML object>"
"ERROR: Timed out waiting for TensorBoard to start. It may still be running as pid 23612."
]
},
"metadata": {},
Expand Down
72 changes: 35 additions & 37 deletions 07-train-pipeline.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"MODEL_ROOT_NAME: sp-2tower-tfrs-v1-pipev1\n"
"MODEL_ROOT_NAME: sp-2tower-tfrs-v1-pipe_v2\n"
]
}
],
Expand Down Expand Up @@ -2212,15 +2212,14 @@
"output_type": "stream",
"text": [
"EXPERIMENT_NAME: tfrs-pipe-v1\n",
"RUN_NAME: run-20230922-202528\n"
"RUN_NAME: run-20230925-203808\n"
]
}
],
"source": [
"EXPERIMENT_PREFIX = 'tfrs-pipe' # custom identifier for organizing experiments\n",
"EXPERIMENT_NAME=f'{EXPERIMENT_PREFIX}-{VERSION}'\n",
"# RUN_NAME = f'run-{time.strftime(\"%Y%m%d-%H%M%S\")}'\n",
"RUN_NAME = f'run-20230922-202528'\n",
"RUN_NAME = f'run-{time.strftime(\"%Y%m%d-%H%M%S\")}'\n",
"\n",
"print(f\"EXPERIMENT_NAME: {EXPERIMENT_NAME}\")\n",
"print(f\"RUN_NAME: {RUN_NAME}\")"
Expand Down Expand Up @@ -2248,7 +2247,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"PIPELINE_ROOT_PATH: gs://ndr-v1-hybrid-vertex-bucket/tfrs-pipe-v1/run-20230922-202528/pipeline_root\n"
"PIPELINE_ROOT_PATH: gs://ndr-v1-hybrid-vertex-bucket/tfrs-pipe-v1/run-20230925-203808/pipeline_root\n"
]
}
],
Expand Down Expand Up @@ -2282,7 +2281,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"FEATURES_PREFIX: tfrs-pipe-v1/run-20230922-202528/features\n"
"FEATURES_PREFIX: tfrs-pipe-v1/run-20230925-203808/features\n"
]
}
],
Expand Down Expand Up @@ -2512,7 +2511,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"VALID_FREQUENCY : 35\n",
"VALID_FREQUENCY : 23\n",
"VALID_STEPS : 20\n",
"EPOCH_STEPS : 2003\n",
"EMBED_FREQUENCY : 1\n",
Expand Down Expand Up @@ -2602,7 +2601,7 @@
"output_type": "stream",
"text": [
"CANDIDATE_PREFIX: data/v1/candidates\n",
"TRAIN_DIR_PREFIX: data/v1/valid\n",
"TRAIN_DIR_PREFIX: data/v1/train\n",
"VALID_DIR_PREFIX: data/v1/valid\n"
]
}
Expand All @@ -2611,7 +2610,7 @@
"# =================================================\n",
"# trainconfig: Data sources\n",
"# =================================================\n",
"TRAIN_DIR_PREFIX = f'data/{DATA_VERSION}/valid' # train\n",
"TRAIN_DIR_PREFIX = f'data/{DATA_VERSION}/train' # train\n",
"VALID_DIR_PREFIX = f'data/{DATA_VERSION}/valid' \n",
"CANDIDATE_PREFIX = f'data/{DATA_VERSION}/candidates' \n",
"\n",
Expand Down Expand Up @@ -2642,40 +2641,40 @@
"[{'container_spec': {'args': ['--project=hybrid-vertex',\n",
" '--train_output_gcs_bucket=ndr-v1-hybrid-vertex-bucket',\n",
" '--train_dir=ndr-v1-hybrid-vertex-bucket',\n",
" '--train_dir_prefix=data/v1/valid',\n",
" '--train_dir_prefix=data/v1/train',\n",
" '--valid_dir=ndr-v1-hybrid-vertex-bucket',\n",
" '--valid_dir_prefix=data/v1/valid',\n",
" '--candidate_file_dir=ndr-v1-hybrid-vertex-bucket',\n",
" '--candidate_files_prefix=data/v1/candidates',\n",
" '--experiment_name=tfrs-pipe-v1',\n",
" '--experiment_run=run-20230922-202528',\n",
" '--experiment_run=run-20230925-203808',\n",
" '--num_epochs=70',\n",
" '--batch_size=4096',\n",
" '--embedding_dim=128',\n",
" '--projection_dim=32',\n",
" '--layer_sizes=[512,256,128]',\n",
" '--learning_rate=0.01',\n",
" '--valid_frequency=35',\n",
" '--valid_frequency=23',\n",
" '--valid_steps=20',\n",
" '--epoch_steps=2003',\n",
" '--distribute=single',\n",
" '--model_version=v1',\n",
" '--pipeline_version=pipev1',\n",
" '--pipeline_version=pipe_v2',\n",
" '--seed=1234',\n",
" '--max_tokens=20000',\n",
" '--embed_frequency=1',\n",
" '--update_frequency=500',\n",
" '--hist_frequency=0',\n",
" '--tf_gpu_thread_count=8',\n",
" '--block_length=64',\n",
" '--num_data_shards=4',\n",
" '--chkpt_freq=500',\n",
" '--dropout_rate=0.33',\n",
" '--cache_train',\n",
" '--compute_batch_metrics',\n",
" '--use_cross_layer',\n",
" '--use_dropout'],\n",
" 'command': ['python', '-m', 'src.two_tower_jt.task'],\n",
" 'image_uri': 'us-central1-docker.pkg.dev/hybrid-vertex/ndr-v1-spotify/train-v1'},\n",
" 'image_uri': 'us-central1-docker.pkg.dev/hybrid-vertex/ndr-v1-spotify/train-v1:latest'},\n",
" 'machine_spec': {'accelerator_count': 1,\n",
" 'accelerator_type': 'NVIDIA_TESLA_T4',\n",
" 'machine_type': 'n1-highmem-16'},\n",
Expand Down Expand Up @@ -2724,7 +2723,7 @@
" f'--chkpt_freq={CHECKPOINT_FREQ}',\n",
" f'--dropout_rate={DROPOUT_RATE}',\n",
" # uncomment these to pass value of True (bool)\n",
" f'--cache_train', # caches train_dataset\n",
" # f'--cache_train', # caches train_dataset\n",
" # f'--evaluate_model', # runs model.eval()\n",
" # f'--write_embeddings', # writes embeddings index in train job\n",
" # f'--profiler', # runs TB profiler\n",
Expand All @@ -2735,7 +2734,7 @@
"]\n",
"\n",
"WORKER_POOL_SPECS = workerpool_specs.prepare_worker_pool_specs(\n",
" image_uri=REMOTE_IMAGE_NAME,\n",
" image_uri=f\"{REMOTE_IMAGE_NAME}:latest\",\n",
" args=WORKER_ARGS,\n",
" cmd=WORKER_CMD,\n",
" replica_count=REPLICA_COUNT,\n",
Expand All @@ -2761,7 +2760,7 @@
"output_type": "stream",
"text": [
"/home/jupyter/jw-repo2/spotify_mpd_two_tower\n",
"gs://ndr-v1-hybrid-vertex-bucket/tfrs-pipe-v1/run-20230922-202528/pipeline_root\n",
"gs://ndr-v1-hybrid-vertex-bucket/tfrs-pipe-v1/run-20230925-203808/pipeline_root\n",
"src\n"
]
}
Expand Down Expand Up @@ -2793,7 +2792,7 @@
{
"data": {
"text/plain": [
"'gs://ndr-v1-hybrid-vertex-bucket/tfrs-pipe-v1/run-20230922-202528'"
"'gs://ndr-v1-hybrid-vertex-bucket/tfrs-pipe-v1/run-20230925-203808'"
]
},
"execution_count": 40,
Expand All @@ -2819,7 +2818,7 @@
"output_type": "stream",
"text": [
"\n",
" Copied training package and Dockerfile to gs://ndr-v1-hybrid-vertex-bucket/tfrs-pipe-v1/run-20230922-202528/pipeline_root\n",
" Copied training package and Dockerfile to gs://ndr-v1-hybrid-vertex-bucket/tfrs-pipe-v1/run-20230925-203808/pipeline_root\n",
"\n"
]
}
Expand Down Expand Up @@ -2849,16 +2848,16 @@
"name": "stdout",
"output_type": "stream",
"text": [
"gs://ndr-v1-hybrid-vertex-bucket/tfrs-pipe-v1/run-20230922-202528/pipeline_root/trainer/__init__.py\n",
"gs://ndr-v1-hybrid-vertex-bucket/tfrs-pipe-v1/run-20230922-202528/pipeline_root/trainer/feature_sets.py\n",
"gs://ndr-v1-hybrid-vertex-bucket/tfrs-pipe-v1/run-20230922-202528/pipeline_root/trainer/interactive_train.py\n",
"gs://ndr-v1-hybrid-vertex-bucket/tfrs-pipe-v1/run-20230922-202528/pipeline_root/trainer/requirements.txt\n",
"gs://ndr-v1-hybrid-vertex-bucket/tfrs-pipe-v1/run-20230922-202528/pipeline_root/trainer/task.py\n",
"gs://ndr-v1-hybrid-vertex-bucket/tfrs-pipe-v1/run-20230922-202528/pipeline_root/trainer/test_instances.py\n",
"gs://ndr-v1-hybrid-vertex-bucket/tfrs-pipe-v1/run-20230922-202528/pipeline_root/trainer/train_config.py\n",
"gs://ndr-v1-hybrid-vertex-bucket/tfrs-pipe-v1/run-20230922-202528/pipeline_root/trainer/train_utils.py\n",
"gs://ndr-v1-hybrid-vertex-bucket/tfrs-pipe-v1/run-20230922-202528/pipeline_root/trainer/two_tower.py\n",
"gs://ndr-v1-hybrid-vertex-bucket/tfrs-pipe-v1/run-20230922-202528/pipeline_root/trainer/__pycache__/\n"
"gs://ndr-v1-hybrid-vertex-bucket/tfrs-pipe-v1/run-20230925-203808/pipeline_root/trainer/__init__.py\n",
"gs://ndr-v1-hybrid-vertex-bucket/tfrs-pipe-v1/run-20230925-203808/pipeline_root/trainer/feature_sets.py\n",
"gs://ndr-v1-hybrid-vertex-bucket/tfrs-pipe-v1/run-20230925-203808/pipeline_root/trainer/interactive_train.py\n",
"gs://ndr-v1-hybrid-vertex-bucket/tfrs-pipe-v1/run-20230925-203808/pipeline_root/trainer/requirements.txt\n",
"gs://ndr-v1-hybrid-vertex-bucket/tfrs-pipe-v1/run-20230925-203808/pipeline_root/trainer/task.py\n",
"gs://ndr-v1-hybrid-vertex-bucket/tfrs-pipe-v1/run-20230925-203808/pipeline_root/trainer/test_instances.py\n",
"gs://ndr-v1-hybrid-vertex-bucket/tfrs-pipe-v1/run-20230925-203808/pipeline_root/trainer/train_config.py\n",
"gs://ndr-v1-hybrid-vertex-bucket/tfrs-pipe-v1/run-20230925-203808/pipeline_root/trainer/train_utils.py\n",
"gs://ndr-v1-hybrid-vertex-bucket/tfrs-pipe-v1/run-20230925-203808/pipeline_root/trainer/two_tower.py\n",
"gs://ndr-v1-hybrid-vertex-bucket/tfrs-pipe-v1/run-20230925-203808/pipeline_root/trainer/__pycache__/\n"
]
}
],
Expand All @@ -2884,8 +2883,8 @@
"name": "stdout",
"output_type": "stream",
"text": [
"PIPELINE_TAG: 2tower-pipev1\n",
"PIPELINE_NAME: tfrs-v1-2tower-pipev1\n"
"PIPELINE_TAG: 2tower-pipe_v2\n",
"PIPELINE_NAME: tfrs-v1-2tower-pipe-v2\n"
]
}
],
Expand Down Expand Up @@ -3326,7 +3325,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"PIPELINES_FILEPATH: gs://ndr-v1-hybrid-vertex-bucket/tfrs-pipe-v1/run-20230922-202528/pipeline_root/pipeline_spec.json\n"
"PIPELINES_FILEPATH: gs://ndr-v1-hybrid-vertex-bucket/tfrs-pipe-v1/run-20230925-203808/pipeline_root/pipeline_spec.json\n"
]
}
],
Expand All @@ -3349,10 +3348,9 @@
"name": "stdout",
"output_type": "stream",
"text": [
"gs://ndr-v1-hybrid-vertex-bucket/tfrs-pipe-v1/run-20230922-202528/pipeline_root/Dockerfile_tfrs\n",
"gs://ndr-v1-hybrid-vertex-bucket/tfrs-pipe-v1/run-20230922-202528/pipeline_root/pipeline_spec.json\n",
"gs://ndr-v1-hybrid-vertex-bucket/tfrs-pipe-v1/run-20230922-202528/pipeline_root/934903580331/\n",
"gs://ndr-v1-hybrid-vertex-bucket/tfrs-pipe-v1/run-20230922-202528/pipeline_root/trainer/\n"
"gs://ndr-v1-hybrid-vertex-bucket/tfrs-pipe-v1/run-20230925-203808/pipeline_root/Dockerfile_tfrs\n",
"gs://ndr-v1-hybrid-vertex-bucket/tfrs-pipe-v1/run-20230925-203808/pipeline_root/pipeline_spec.json\n",
"gs://ndr-v1-hybrid-vertex-bucket/tfrs-pipe-v1/run-20230925-203808/pipeline_root/trainer/\n"
]
}
],
Expand Down

0 comments on commit ff60f36

Please sign in to comment.