diff --git a/04-custom-train-retrieval.ipynb b/04-custom-train-retrieval.ipynb index 676c426..80e6b9d 100644 --- a/04-custom-train-retrieval.ipynb +++ b/04-custom-train-retrieval.ipynb @@ -18,7 +18,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 1, "id": "03855d9e-afe2-4edc-8b78-6b1d44b109ce", "metadata": {}, "outputs": [ @@ -40,7 +40,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 2, "id": "7322bf4e-0bb0-486c-b041-6882ecee7239", "metadata": {}, "outputs": [ @@ -118,7 +118,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 3, "id": "319b0b73-d1bc-4a90-a4aa-864d2215d646", "metadata": {}, "outputs": [], @@ -146,7 +146,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 4, "id": "2e4b6da8-8246-410c-8718-6a0dea4ea269", "metadata": {}, "outputs": [], @@ -527,7 +527,7 @@ " f'--max_tokens={MAX_TOKENS}',\n", " f'--tb_resource_name={TB_RESOURCE_NAME}',\n", " f'--embed_frequency={EMBED_FREQUENCY}',\n", - " # f'--update_frequency={UPDATE_FREQ}', # TODO - turn on\n", + " # f'--update_frequency={UPDATE_FREQ}', # TODO - turn on\n", " f'--hist_frequency={HIST_FREQUENCY}', \n", " f'--tf_gpu_thread_count={TF_GPU_THREAD_COUNT}',\n", " f'--block_length={BLOCK_LENGTH}',\n", @@ -535,7 +535,7 @@ " f'--chkpt_freq={CHECKPOINT_FREQ}',\n", " f'--dropout_rate={DROPOUT_RATE}',\n", " # uncomment these to pass value of True (bool)\n", - " # f'--cache_train', # caches train_dataset\n", + " # f'--cache_train', # caches train_dataset\n", " # f'--evaluate_model', # runs model.eval()\n", " # f'--write_embeddings', # writes embeddings index in train job\n", " f'--profiler', # runs TB profiler\n", @@ -648,7 +648,7 @@ "BASE_OUTPUT_DIR = f'gs://{BUCKET_NAME}/{EXPERIMENT_NAME}/{RUN_NAME}'\n", "\n", "# copy training Dockerfile\n", - "# !gsutil -q cp $REPO_SRC/cloudbuild.yaml $BASE_OUTPUT_DIR/cloudbuild.yaml\n", + "!gsutil -q cp $REPO_SRC/cloudbuild.yaml $BASE_OUTPUT_DIR/cloudbuild.yaml\n", "!gsutil -q cp $REPO_SRC/Dockerfile_tfrs $BASE_OUTPUT_DIR/Dockerfile_tfrs\n", "!gsutil -q cp vocab_dict.pkl $BASE_OUTPUT_DIR/vocab_dict.pkl\n", "\n", @@ -766,7 +766,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 5, "id": "33a58184-e0c4-4403-ad80-da5fdb7dbaed", "metadata": {}, "outputs": [ @@ -783,13 +783,15 @@ "\n", "import tensorflow as tf\n", "\n", - "TB_LOGS_PATH = f'{BASE_OUTPUT_DIR}/logs' # \n", + "# TB_LOGS_PATH = f'{BASE_OUTPUT_DIR}/logs'\n", + "TB_LOGS_PATH = \"gs://ndr-v1-hybrid-vertex-bucket/scale-training-v1/run-20230925-145451/logs\"\n", + "\n", "print(f\"TB_LOGS_PATH: {TB_LOGS_PATH}\")" ] }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 6, "id": "d432d552-4416-4f4c-964f-bcca57b60a07", "metadata": {}, "outputs": [], @@ -800,7 +802,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 7, "id": "171227db-d5f9-47e4-8b67-a528950233ab", "metadata": {}, "outputs": [ @@ -808,12 +810,12 @@ "data": { "text/html": [ "\n", - " \n", "