diff --git a/05-candidate-generation.ipynb b/05-candidate-generation.ipynb index 1c673f5..787b82e 100644 --- a/05-candidate-generation.ipynb +++ b/05-candidate-generation.ipynb @@ -59,15 +59,15 @@ "output_type": "stream", "text": [ "\n", - "PROJECT_ID = \"hybrid-vertex\"\n", - "PROJECT_NUM = \"934903580331\"\n", + "PROJECT_ID = \"cpg-cdp\"\n", + "PROJECT_NUM = \"939655404703\"\n", "LOCATION = \"us-central1\"\n", "\n", "REGION = \"us-central1\"\n", "BQ_LOCATION = \"US\"\n", - "VPC_NETWORK_NAME = \"ucaip-haystack-vpc-network\"\n", + "VPC_NETWORK_NAME = \"genai-haystack-vpc\"\n", "\n", - "VERTEX_SA = \"934903580331-compute@developer.gserviceaccount.com\"\n", + "VERTEX_SA = \"939655404703-compute@developer.gserviceaccount.com\"\n", "\n", "PREFIX = \"ndr-v1\"\n", "VERSION = \"v1\"\n", @@ -78,12 +78,12 @@ "DATA_VERSION = \"v1\"\n", "TRACK_HISTORY = \"5\"\n", "\n", - "BUCKET_NAME = \"ndr-v1-hybrid-vertex-bucket\"\n", - "BUCKET_URI = \"gs://ndr-v1-hybrid-vertex-bucket\"\n", + "BUCKET_NAME = \"ndr-v1-cpg-cdp-bucket\"\n", + "BUCKET_URI = \"gs://ndr-v1-cpg-cdp-bucket\"\n", "SOURCE_BUCKET = \"spotify-million-playlist-dataset\"\n", "\n", "DATA_GCS_PREFIX = \"data\"\n", - "DATA_PATH = \"gs://ndr-v1-hybrid-vertex-bucket/data\"\n", + "DATA_PATH = \"gs://ndr-v1-cpg-cdp-bucket/data\"\n", "VOCAB_SUBDIR = \"vocabs\"\n", "VOCAB_FILENAME = \"vocab_dict.pkl\"\n", "\n", @@ -91,16 +91,19 @@ "TRAIN_DIR_PREFIX = \"train\"\n", "VALID_DIR_PREFIX = \"valid\"\n", "\n", - "VPC_NETWORK_FULL = \"projects/934903580331/global/networks/ucaip-haystack-vpc-network\"\n", + "VPC_NETWORK_FULL = \"projects/939655404703/global/networks/genai-haystack-vpc\"\n", "\n", "BQ_DATASET = \"spotify_e2e_test\"\n", + "BQ_TABLE_TRAIN = \"train_flatten_last_5\"\n", + "BQ_TABLE_VALID = \"train_flatten_valid_last_5\"\n", + "BQ_TABLE_CANDIDATES = \"candidates\"\n", "\n", "REPO_SRC = \"src\"\n", "PIPELINES_SUB_DIR = \"feature_pipes\"\n", "\n", "REPOSITORY = \"ndr-v1-spotify\"\n", "IMAGE_NAME = \"train-v1\"\n", - "REMOTE_IMAGE_NAME = \"us-central1-docker.pkg.dev/hybrid-vertex/ndr-v1-spotify/train-v1\"\n", + "REMOTE_IMAGE_NAME = \"us-central1-docker.pkg.dev/cpg-cdp/ndr-v1-spotify/train-v1\"\n", "DOCKERNAME = \"tfrs\"\n", "\n", "SERVING_IMAGE_URI_CPU = \"us-docker.pkg.dev/vertex-ai/prediction/tf2-cpu.2-11:latest\"\n", @@ -174,15 +177,11 @@ "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'),\n", - " PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU')]" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "Num GPUs Available: 1\n" + ] } ], "source": [ @@ -195,10 +194,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "5f692228-502d-4a6b-97c7-a83aa8b20649", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "22" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "device = cuda.get_current_device()\n", "device.reset()\n", @@ -223,7 +233,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "PATH_TO_INDEX_DIR: local-train-v1/run-20230919-135222\n" + "PATH_TO_INDEX_DIR: local-train-v2/run-20230925-234505\n" ] } ], @@ -231,8 +241,8 @@ "# TODO - grab from saved candiate_embedding.json URI\n", "# local-train-v1/run-20230919-135222/candidates/candidate_embeddings.json\n", "\n", - "EXPERIMENT_NAME = \"local-train-v1\" # TODO\n", - "RUN_NAME = \"run-20230919-135222\" # TODO\n", + "EXPERIMENT_NAME = \"local-train-v2\" # TODO\n", + "RUN_NAME = \"run-20230925-234505\" # TODO\n", "\n", "PATH_TO_INDEX_DIR = f'{EXPERIMENT_NAME}/{RUN_NAME}'\n", "print(f\"PATH_TO_INDEX_DIR: {PATH_TO_INDEX_DIR}\")" @@ -240,7 +250,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 7, "id": "6d905d16-b1c5-4d66-add2-dadd541a38fe", "metadata": {}, "outputs": [ @@ -248,11 +258,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "gs://ndr-v1-hybrid-vertex-bucket/local-train-v1/run-20230919-135222/\n", - "gs://ndr-v1-hybrid-vertex-bucket/local-train-v1/run-20230919-135222/candidate_model/\n", - "gs://ndr-v1-hybrid-vertex-bucket/local-train-v1/run-20230919-135222/candidates/\n", - "gs://ndr-v1-hybrid-vertex-bucket/local-train-v1/run-20230919-135222/query_model/\n", - "gs://ndr-v1-hybrid-vertex-bucket/local-train-v1/run-20230919-135222/tb-logs/\n" + "gs://ndr-v1-cpg-cdp-bucket/local-train-v2/run-20230925-234505/\n", + "gs://ndr-v1-cpg-cdp-bucket/local-train-v2/run-20230925-234505/model-dir/\n" ] } ], @@ -262,7 +269,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 8, "id": "485028ff-e7a1-463a-ae3a-9ea72c39ed07", "metadata": {}, "outputs": [ @@ -270,7 +277,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "CANDIDATE_MODEL_DIR: gs://ndr-v1-hybrid-vertex-bucket/local-train-v1/run-20230919-135222/candidate_model\n" + "CANDIDATE_MODEL_DIR: gs://ndr-v1-cpg-cdp-bucket/local-train-v2/run-20230925-234505/model-dir/candidate_model\n" ] } ], @@ -285,7 +292,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 9, "id": "4761374c-ec81-4be2-aa38-ba5751609aa5", "metadata": {}, "outputs": [ @@ -293,11 +300,11 @@ "name": "stdout", "output_type": "stream", "text": [ - "gs://ndr-v1-hybrid-vertex-bucket/local-train-v1/run-20230919-135222/candidate_model/\n", - "gs://ndr-v1-hybrid-vertex-bucket/local-train-v1/run-20230919-135222/candidate_model/fingerprint.pb\n", - "gs://ndr-v1-hybrid-vertex-bucket/local-train-v1/run-20230919-135222/candidate_model/saved_model.pb\n", - "gs://ndr-v1-hybrid-vertex-bucket/local-train-v1/run-20230919-135222/candidate_model/assets/\n", - "gs://ndr-v1-hybrid-vertex-bucket/local-train-v1/run-20230919-135222/candidate_model/variables/\n" + "gs://ndr-v1-cpg-cdp-bucket/local-train-v2/run-20230925-234505/model-dir/candidate_model/\n", + "gs://ndr-v1-cpg-cdp-bucket/local-train-v2/run-20230925-234505/model-dir/candidate_model/fingerprint.pb\n", + "gs://ndr-v1-cpg-cdp-bucket/local-train-v2/run-20230925-234505/model-dir/candidate_model/saved_model.pb\n", + "gs://ndr-v1-cpg-cdp-bucket/local-train-v2/run-20230925-234505/model-dir/candidate_model/assets/\n", + "gs://ndr-v1-cpg-cdp-bucket/local-train-v2/run-20230925-234505/model-dir/candidate_model/variables/\n" ] } ], @@ -315,17 +322,17 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 10, "id": "3886b062-b146-4be1-8884-7621de96e02d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "_SignatureMap({'serving_default': })" + "_SignatureMap({'serving_default': })" ] }, - "execution_count": 17, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -339,7 +346,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 11, "id": "bef25c58-eb42-4f4b-a4d2-5304d371aa1b", "metadata": {}, "outputs": [ @@ -357,7 +364,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 12, "id": "a416aead-8da1-4845-bff9-56f5dedf93ad", "metadata": {}, "outputs": [ @@ -376,7 +383,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 13, "id": "e2de03c0-4fea-448e-822e-be686ca53617", "metadata": {}, "outputs": [ @@ -386,7 +393,7 @@ "{'output_1': TensorShape([None, 128])}" ] }, - "execution_count": 20, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -413,7 +420,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 14, "id": "302f2b08-86e4-4519-a267-9b9783142b83", "metadata": {}, "outputs": [], @@ -426,7 +433,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 15, "id": "40a571dd-c26c-4d06-81ca-6eea6713b3bf", "metadata": {}, "outputs": [ @@ -458,7 +465,7 @@ " 'track_time_signature_can': FixedLenFeature(shape=(), dtype=tf.string, default_value=None)}" ] }, - "execution_count": 22, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -478,17 +485,17 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 16, "id": "b6fd698b-f06e-40f1-81c8-7b1f266ce297", "metadata": {}, "outputs": [], "source": [ - "CANDIDATE_PREFIX = f'data/{DATA_VERSION}/candidates' " + "CANDIDATE_PREFIX = f'candidates' " ] }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 17, "id": "871fdb33-cdd7-490d-91f0-2fcd33819b43", "metadata": {}, "outputs": [], @@ -513,12 +520,12 @@ " options\n", ")\n", "\n", - "parsed_candidate_dataset = parsed_candidate_dataset.cache() #400 MB on machine mem" + "# parsed_candidate_dataset = parsed_candidate_dataset.cache() #400 MB on machine mem" ] }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 18, "id": "4948d822-f53b-4d7b-806a-716f092ea220", "metadata": {}, "outputs": [ @@ -526,29 +533,29 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'album_name_can': ,\n", - " 'album_uri_can': ,\n", - " 'artist_followers_can': ,\n", - " 'artist_genres_can': ,\n", - " 'artist_name_can': ,\n", - " 'artist_pop_can': ,\n", - " 'artist_uri_can': ,\n", - " 'duration_ms_can': ,\n", - " 'track_acousticness_can': ,\n", - " 'track_danceability_can': ,\n", - " 'track_energy_can': ,\n", - " 'track_instrumentalness_can': ,\n", - " 'track_key_can': ,\n", - " 'track_liveness_can': ,\n", - " 'track_loudness_can': ,\n", + "{'album_name_can': ,\n", + " 'album_uri_can': ,\n", + " 'artist_followers_can': ,\n", + " 'artist_genres_can': ,\n", + " 'artist_name_can': ,\n", + " 'artist_pop_can': ,\n", + " 'artist_uri_can': ,\n", + " 'duration_ms_can': ,\n", + " 'track_acousticness_can': ,\n", + " 'track_danceability_can': ,\n", + " 'track_energy_can': ,\n", + " 'track_instrumentalness_can': ,\n", + " 'track_key_can': ,\n", + " 'track_liveness_can': ,\n", + " 'track_loudness_can': ,\n", " 'track_mode_can': ,\n", - " 'track_name_can': ,\n", - " 'track_pop_can': ,\n", - " 'track_speechiness_can': ,\n", - " 'track_tempo_can': ,\n", + " 'track_name_can': ,\n", + " 'track_pop_can': ,\n", + " 'track_speechiness_can': ,\n", + " 'track_tempo_can': ,\n", " 'track_time_signature_can': ,\n", - " 'track_uri_can': ,\n", - " 'track_valence_can': }\n", + " 'track_uri_can': ,\n", + " 'track_valence_can': }\n", "_______________\n" ] } @@ -561,17 +568,17 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 19, "id": "699ca0eb-efe5-42fd-9190-a9ce644f06eb", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "" + "<_OptionsDataset element_spec={'album_name_can': TensorSpec(shape=(), dtype=tf.string, name=None), 'album_uri_can': TensorSpec(shape=(), dtype=tf.string, name=None), 'artist_followers_can': TensorSpec(shape=(), dtype=tf.float32, name=None), 'artist_genres_can': TensorSpec(shape=(), dtype=tf.string, name=None), 'artist_name_can': TensorSpec(shape=(), dtype=tf.string, name=None), 'artist_pop_can': TensorSpec(shape=(), dtype=tf.float32, name=None), 'artist_uri_can': TensorSpec(shape=(), dtype=tf.string, name=None), 'duration_ms_can': TensorSpec(shape=(), dtype=tf.float32, name=None), 'track_acousticness_can': TensorSpec(shape=(), dtype=tf.float32, name=None), 'track_danceability_can': TensorSpec(shape=(), dtype=tf.float32, name=None), 'track_energy_can': TensorSpec(shape=(), dtype=tf.float32, name=None), 'track_instrumentalness_can': TensorSpec(shape=(), dtype=tf.float32, name=None), 'track_key_can': TensorSpec(shape=(), dtype=tf.string, name=None), 'track_liveness_can': TensorSpec(shape=(), dtype=tf.float32, name=None), 'track_loudness_can': TensorSpec(shape=(), dtype=tf.float32, name=None), 'track_mode_can': TensorSpec(shape=(), dtype=tf.string, name=None), 'track_name_can': TensorSpec(shape=(), dtype=tf.string, name=None), 'track_pop_can': TensorSpec(shape=(), dtype=tf.float32, name=None), 'track_speechiness_can': TensorSpec(shape=(), dtype=tf.float32, name=None), 'track_tempo_can': TensorSpec(shape=(), dtype=tf.float32, name=None), 'track_time_signature_can': TensorSpec(shape=(), dtype=tf.string, name=None), 'track_uri_can': TensorSpec(shape=(), dtype=tf.string, name=None), 'track_valence_can': TensorSpec(shape=(), dtype=tf.float32, name=None)}>" ] }, - "execution_count": 26, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -595,20 +602,10 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 26, "id": "328b63a3-ddf5-4c31-9fc5-25137c3270f3", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Copying gs://jt-tfrs-central/pipe-dev-2tower-tfrs-jtv10/run-20221228-210041/candidates/candidate_embeddings.json...\n", - "- [1 files][882.4 MiB/882.4 MiB] 68.6 MiB/s \n", - "Operation completed over 1 objects/882.4 MiB. \n" - ] - } - ], + "outputs": [], "source": [ "# previously created embedding output\n", "# !gsutil cp gs://jt-tfrs-central/pipe-dev-2tower-tfrs-jtv10/run-20221228-210041/candidates/candidate_embeddings.json candidate_embs_20221228_210041.json" @@ -624,87 +621,28 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 64, "id": "52540283-930f-4d04-b26b-abba3df5184e", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "WARNING:tensorflow:From /home/jupyter/.local/lib/python3.7/site-packages/tensorflow/python/autograph/pyct/static_analysis/liveness.py:83: Analyzer.lamba_check (from tensorflow.python.autograph.pyct.static_analysis.liveness) is deprecated and will be removed after 2023-09-23.\n", - "Instructions for updating:\n", - "Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089\n", - "elapsed_time: 141\n", - "Length of embs: 2243885\n" + "elapsed_time: 4\n", + "Length of embs: 225\n" ] - }, - { - "data": { - "text/plain": [ - "{'output_1': }" - ] - }, - "execution_count": 27, - "metadata": {}, - "output_type": "execute_result" } ], "source": [ "start_time = time.time()\n", "\n", - "embs_iter = parsed_candidate_dataset.batch(1).map(\n", - " lambda data: candidate_predictor(\n", - " track_uri_can = data[\"track_uri_can\"],\n", - " track_name_can = data['track_name_can'],\n", - " artist_uri_can = data['artist_uri_can'],\n", - " artist_name_can = data['artist_name_can'],\n", - " album_uri_can = data['album_uri_can'],\n", - " album_name_can = data['album_name_can'],\n", - " duration_ms_can = data['duration_ms_can'],\n", - " track_pop_can = data['track_pop_can'],\n", - " artist_pop_can = data['artist_pop_can'],\n", - " artist_genres_can = data['artist_genres_can'],\n", - " artist_followers_can = data['artist_followers_can'],\n", - " track_danceability_can = data['track_danceability_can'],\n", - " track_energy_can = data['track_energy_can'],\n", - " track_key_can = data['track_key_can'],\n", - " track_loudness_can = data['track_loudness_can'],\n", - " track_mode_can = data['track_mode_can'],\n", - " track_speechiness_can = data['track_speechiness_can'],\n", - " track_acousticness_can = data['track_acousticness_can'],\n", - " track_instrumentalness_can = data['track_instrumentalness_can'],\n", - " track_liveness_can = data['track_liveness_can'],\n", - " track_valence_can = data['track_valence_can'],\n", - " track_tempo_can = data['track_tempo_can'],\n", - " track_time_signature_can = data['track_time_signature_can']\n", - " )\n", + "embs_iter = parsed_candidate_dataset.batch(10000).map(\n", + " lambda data: ( data[\"track_uri_can\"]\n", + " ,loaded_candidate_model(data)\n", + " )\n", ")\n", "\n", "embs = []\n", @@ -715,23 +653,22 @@ "elapsed_time = int((end_time - start_time) / 60)\n", "print(f\"elapsed_time: {elapsed_time}\")\n", "\n", - "print(f\"Length of embs: {len(embs)}\")\n", - "embs[0]" + "print(f\"Length of embs: {len(embs)}\")" ] }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 65, "id": "c7d2a511-25bb-4a8d-8406-6cdb5e3c7e1b", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "2243885" + "225" ] }, - "execution_count": 36, + "execution_count": 65, "metadata": {}, "output_type": "execute_result" } @@ -750,7 +687,51 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 67, + "id": "56e9ff30-4b80-497e-ad2c-dc37e6aa5217", + "metadata": {}, + "outputs": [], + "source": [ + "x,y = embs[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "id": "99d25704-b99e-4af1-bb7b-aa41cd2835d7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 68, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y" + ] + }, + { + "cell_type": "code", + "execution_count": 79, "id": "b6969996-dcb3-4db0-ab51-915b6b985e7c", "metadata": {}, "outputs": [ @@ -765,7 +746,11 @@ "source": [ "start_time = time.time()\n", "\n", - "cleaned_embs = [x['output_1'].numpy()[0] for x in embs] #clean up the output\n", + "cleaned_embs = [] #clean up the output\n", + "track_uris = []\n", + "for ids , embedding in embs:\n", + " cleaned_embs.extend(embedding.numpy())\n", + " track_uris.extend(ids.numpy())\n", "\n", "end_time = time.time()\n", "elapsed_time = int((end_time - start_time) / 60)\n", @@ -774,7 +759,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 82, "id": "0dc79e64-aca5-4058-8656-99eb11477854", "metadata": {}, "outputs": [ @@ -788,35 +773,35 @@ { "data": { "text/plain": [ - "array([ 1.4558885 , 0.6574247 , -1.6407717 , -1.5887606 , -0.8956078 ,\n", - " -0.99810946, -0.40141183, 0.24247266, 0.6559755 , -0.0870877 ,\n", - " 0.4873863 , -1.192624 , 0.05810613, 0.65198994, -1.1014075 ,\n", - " 0.4844147 , 1.9804701 , -0.15204096, 1.7292233 , 0.60755455,\n", - " 0.52051014, 0.4429888 , 0.29365444, -0.4539186 , 0.42860448,\n", - " -0.54497916, -0.4762391 , -0.6574926 , -0.365402 , 0.8710664 ,\n", - " -0.6119961 , -0.8461934 , -0.4577797 , -0.01408611, -0.78435844,\n", - " 0.28907436, 0.43767878, -0.13469195, 0.28730276, -0.57131904,\n", - " -0.9392587 , -0.44394577, -0.53406644, 0.8327801 , 0.51560533,\n", - " 1.273033 , 0.32302907, -0.7242595 , 1.4502484 , -1.918178 ,\n", - " 0.18540913, 0.02203418, 1.2570223 , 0.779727 , 0.64376956,\n", - " -0.98304224, 1.9677243 , 0.3062334 , 1.4187527 , 0.7121415 ,\n", - " -0.24123847, 0.06877614, -0.9489165 , -1.7127383 , -0.68183535,\n", - " 1.1871426 , -0.05245163, -0.889681 , -0.10291553, -0.48856598,\n", - " -0.42148122, -0.2811011 , 1.124782 , -0.16637069, -0.23592445,\n", - " 0.17452964, -0.9590089 , -0.239284 , -0.7332347 , -0.5400953 ,\n", - " -0.7519743 , 1.5516475 , -0.68347657, 0.4984031 , 1.5010489 ,\n", - " 0.6671566 , -0.83567864, 0.66306823, -0.58128446, 0.49536583,\n", - " 0.46563637, -1.2337325 , -0.0851446 , 1.5076903 , 0.5815354 ,\n", - " 0.27275312, -1.040551 , -1.2742573 , 0.86381525, -1.6726958 ,\n", - " 0.33581898, -0.30517903, 0.74605113, -0.12091698, 0.51321703,\n", - " 0.06733688, 0.643067 , -0.8515033 , 0.17619704, -0.07051513,\n", - " 1.6417184 , 0.43914312, 1.6886785 , 0.64881957, -1.0398052 ,\n", - " 0.5104308 , -0.54082274, -1.3020104 , -0.38667956, -0.8198859 ,\n", - " -0.5548473 , -0.2751418 , 0.20881599, -0.27896985, 0.24578105,\n", - " 0.22008903, -0.5134009 , -0.41392204], dtype=float32)" + "array([-0.00875475, 0.4296594 , -1.0579184 , 0.33055162, 0.75509065,\n", + " -1.2682275 , 0.40074062, 1.2643234 , 1.4407821 , -0.81765157,\n", + " 1.0556922 , -0.38197348, -0.50490004, 0.16666569, -0.8488423 ,\n", + " 0.6206088 , 1.0948801 , -1.4514679 , 1.6594619 , 1.393434 ,\n", + " 1.3103865 , 0.5871945 , -0.77169776, -0.77867335, 0.30685037,\n", + " -1.091643 , -0.02446085, -0.44908383, 0.5722308 , 1.1759523 ,\n", + " -0.8426078 , 0.14493963, 0.90350306, -0.5260758 , -0.45274788,\n", + " -0.93923473, 0.75078434, -1.1178484 , 0.5522108 , 0.16904452,\n", + " -0.47102752, -1.2308649 , -0.39195207, 1.0723114 , -0.15880573,\n", + " 0.36832523, -1.0887698 , -1.3357592 , 0.8934939 , -0.79218334,\n", + " -0.21843508, 0.17024872, 1.3688272 , 0.8668576 , -0.1617419 ,\n", + " -0.48149663, 0.46772346, 0.99565965, -0.7618725 , 0.1475042 ,\n", + " -0.6342002 , 0.33067462, 0.15164517, -1.3497765 , -1.3240782 ,\n", + " 0.76521456, -1.2219937 , -0.7931432 , 0.97855496, 0.7467413 ,\n", + " -0.02284818, -1.7751491 , 0.56792647, -0.12896144, -0.0579375 ,\n", + " -0.5185563 , -0.7565005 , -0.80555063, -1.1301348 , -0.11285232,\n", + " 0.3331379 , 0.14850451, -0.83839 , 1.4484423 , 0.28467232,\n", + " 0.9029052 , -0.7156413 , 0.93304956, -0.7816973 , -1.4596479 ,\n", + " 1.1668905 , -0.6932967 , -0.12366682, 0.7515199 , 0.35072482,\n", + " -0.8035156 , -0.19282989, -1.4523159 , 0.5372954 , -0.8343574 ,\n", + " 0.6229277 , -0.91178715, 1.2217942 , 0.09963001, 0.10299594,\n", + " 0.6492107 , -0.36633113, -1.0306698 , 1.1886839 , 0.43315023,\n", + " 0.8821403 , 1.0549035 , 0.78654313, 0.4184367 , 1.3331281 ,\n", + " 1.3401752 , -0.62402946, -0.45836085, -0.8325017 , -1.1248122 ,\n", + " -0.8744741 , -0.8500382 , 1.6569259 , -0.35547775, 0.10615328,\n", + " 1.6157944 , 0.43617558, -0.27654117], dtype=float32)" ] }, - "execution_count": 38, + "execution_count": 82, "metadata": {}, "output_type": "execute_result" } @@ -836,40 +821,20 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 83, "id": "2fde9926-d898-49a9-bcde-1a1854c8fbd0", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Length of track_uris: 2243885\n" - ] - }, - { - "data": { - "text/plain": [ - "b'spotify:track:2XZ3bL3ROk605SPpy6Dn9C'" - ] - }, - "execution_count": 39, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# clean product IDs\n", - "track_uris = [x['track_uri_can'].numpy() for x in parsed_candidate_dataset]\n", - "\n", - "print(f\"Length of track_uris: {len(track_uris)}\")\n", + "# track_uris = [ids.numpy() for ids , _ in embs]\n", "\n", - "track_uris[0]" + "# print(f\"Length of track_uris: {len(track_uris)}\")" ] }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 84, "id": "fb203cb7-1254-4257-b6be-3624885eb19d", "metadata": {}, "outputs": [ @@ -883,10 +848,10 @@ { "data": { "text/plain": [ - "'spotify:track:2XZ3bL3ROk605SPpy6Dn9C'" + "'spotify:track:4yHc5LZQBLi0H3PpvcM0S8'" ] }, - "execution_count": 40, + "execution_count": 84, "metadata": {}, "output_type": "execute_result" } @@ -902,7 +867,7 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 85, "id": "36a10d13-4a0a-4550-a78d-d92489be78ef", "metadata": {}, "outputs": [ @@ -920,6 +885,16 @@ "print(f\"Length of track_uris_cleaned: {len(track_uris_decoded)}\")" ] }, + { + "cell_type": "code", + "execution_count": 86, + "id": "42e2db67-7b96-4f31-8056-b3f3fa4876b7", + "metadata": {}, + "outputs": [], + "source": [ + "# Length of track_uris_cleaned: 2243885\n" + ] + }, { "cell_type": "markdown", "id": "270376ce-3d4a-452d-97d6-d54592aebe05", @@ -930,42 +905,42 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 87, "id": "758258ee-6a11-4fbd-8a48-b0fffc7b6b3d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "array([ 1.4558885 , 0.6574247 , -1.6407717 , -1.5887606 , -0.8956078 ,\n", - " -0.99810946, -0.40141183, 0.24247266, 0.6559755 , -0.0870877 ,\n", - " 0.4873863 , -1.192624 , 0.05810613, 0.65198994, -1.1014075 ,\n", - " 0.4844147 , 1.9804701 , -0.15204096, 1.7292233 , 0.60755455,\n", - " 0.52051014, 0.4429888 , 0.29365444, -0.4539186 , 0.42860448,\n", - " -0.54497916, -0.4762391 , -0.6574926 , -0.365402 , 0.8710664 ,\n", - " -0.6119961 , -0.8461934 , -0.4577797 , -0.01408611, -0.78435844,\n", - " 0.28907436, 0.43767878, -0.13469195, 0.28730276, -0.57131904,\n", - " -0.9392587 , -0.44394577, -0.53406644, 0.8327801 , 0.51560533,\n", - " 1.273033 , 0.32302907, -0.7242595 , 1.4502484 , -1.918178 ,\n", - " 0.18540913, 0.02203418, 1.2570223 , 0.779727 , 0.64376956,\n", - " -0.98304224, 1.9677243 , 0.3062334 , 1.4187527 , 0.7121415 ,\n", - " -0.24123847, 0.06877614, -0.9489165 , -1.7127383 , -0.68183535,\n", - " 1.1871426 , -0.05245163, -0.889681 , -0.10291553, -0.48856598,\n", - " -0.42148122, -0.2811011 , 1.124782 , -0.16637069, -0.23592445,\n", - " 0.17452964, -0.9590089 , -0.239284 , -0.7332347 , -0.5400953 ,\n", - " -0.7519743 , 1.5516475 , -0.68347657, 0.4984031 , 1.5010489 ,\n", - " 0.6671566 , -0.83567864, 0.66306823, -0.58128446, 0.49536583,\n", - " 0.46563637, -1.2337325 , -0.0851446 , 1.5076903 , 0.5815354 ,\n", - " 0.27275312, -1.040551 , -1.2742573 , 0.86381525, -1.6726958 ,\n", - " 0.33581898, -0.30517903, 0.74605113, -0.12091698, 0.51321703,\n", - " 0.06733688, 0.643067 , -0.8515033 , 0.17619704, -0.07051513,\n", - " 1.6417184 , 0.43914312, 1.6886785 , 0.64881957, -1.0398052 ,\n", - " 0.5104308 , -0.54082274, -1.3020104 , -0.38667956, -0.8198859 ,\n", - " -0.5548473 , -0.2751418 , 0.20881599, -0.27896985, 0.24578105,\n", - " 0.22008903, -0.5134009 , -0.41392204], dtype=float32)" + "array([-0.00875475, 0.4296594 , -1.0579184 , 0.33055162, 0.75509065,\n", + " -1.2682275 , 0.40074062, 1.2643234 , 1.4407821 , -0.81765157,\n", + " 1.0556922 , -0.38197348, -0.50490004, 0.16666569, -0.8488423 ,\n", + " 0.6206088 , 1.0948801 , -1.4514679 , 1.6594619 , 1.393434 ,\n", + " 1.3103865 , 0.5871945 , -0.77169776, -0.77867335, 0.30685037,\n", + " -1.091643 , -0.02446085, -0.44908383, 0.5722308 , 1.1759523 ,\n", + " -0.8426078 , 0.14493963, 0.90350306, -0.5260758 , -0.45274788,\n", + " -0.93923473, 0.75078434, -1.1178484 , 0.5522108 , 0.16904452,\n", + " -0.47102752, -1.2308649 , -0.39195207, 1.0723114 , -0.15880573,\n", + " 0.36832523, -1.0887698 , -1.3357592 , 0.8934939 , -0.79218334,\n", + " -0.21843508, 0.17024872, 1.3688272 , 0.8668576 , -0.1617419 ,\n", + " -0.48149663, 0.46772346, 0.99565965, -0.7618725 , 0.1475042 ,\n", + " -0.6342002 , 0.33067462, 0.15164517, -1.3497765 , -1.3240782 ,\n", + " 0.76521456, -1.2219937 , -0.7931432 , 0.97855496, 0.7467413 ,\n", + " -0.02284818, -1.7751491 , 0.56792647, -0.12896144, -0.0579375 ,\n", + " -0.5185563 , -0.7565005 , -0.80555063, -1.1301348 , -0.11285232,\n", + " 0.3331379 , 0.14850451, -0.83839 , 1.4484423 , 0.28467232,\n", + " 0.9029052 , -0.7156413 , 0.93304956, -0.7816973 , -1.4596479 ,\n", + " 1.1668905 , -0.6932967 , -0.12366682, 0.7515199 , 0.35072482,\n", + " -0.8035156 , -0.19282989, -1.4523159 , 0.5372954 , -0.8343574 ,\n", + " 0.6229277 , -0.91178715, 1.2217942 , 0.09963001, 0.10299594,\n", + " 0.6492107 , -0.36633113, -1.0306698 , 1.1886839 , 0.43315023,\n", + " 0.8821403 , 1.0549035 , 0.78654313, 0.4184367 , 1.3331281 ,\n", + " 1.3401752 , -0.62402946, -0.45836085, -0.8325017 , -1.1248122 ,\n", + " -0.8744741 , -0.8500382 , 1.6569259 , -0.35547775, 0.10615328,\n", + " 1.6157944 , 0.43617558, -0.27654117], dtype=float32)" ] }, - "execution_count": 42, + "execution_count": 87, "metadata": {}, "output_type": "execute_result" } @@ -976,7 +951,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 88, "id": "5fe1f6f2-d35e-4823-b4cd-035805c30ad0", "metadata": {}, "outputs": [ @@ -984,8 +959,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "bad_records: 56192\n", - "bad_record_filter: 439\n" + "bad_records: 0\n", + "bad_record_filter: 0\n" ] } ], @@ -1006,7 +981,7 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 89, "id": "21e81ff0-7ce5-45d6-a197-5f7f4cccb2a2", "metadata": {}, "outputs": [], @@ -1016,7 +991,7 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 90, "id": "5f44d09d-09a7-4acf-a0e2-3cca80e49c9d", "metadata": {}, "outputs": [], @@ -1035,42 +1010,42 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 91, "id": "d0536b04-8337-49e2-8489-0463a3b95896", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "array([ 1.4558885 , 0.6574247 , -1.6407717 , -1.5887606 , -0.8956078 ,\n", - " -0.99810946, -0.40141183, 0.24247266, 0.6559755 , -0.0870877 ,\n", - " 0.4873863 , -1.192624 , 0.05810613, 0.65198994, -1.1014075 ,\n", - " 0.4844147 , 1.9804701 , -0.15204096, 1.7292233 , 0.60755455,\n", - " 0.52051014, 0.4429888 , 0.29365444, -0.4539186 , 0.42860448,\n", - " -0.54497916, -0.4762391 , -0.6574926 , -0.365402 , 0.8710664 ,\n", - " -0.6119961 , -0.8461934 , -0.4577797 , -0.01408611, -0.78435844,\n", - " 0.28907436, 0.43767878, -0.13469195, 0.28730276, -0.57131904,\n", - " -0.9392587 , -0.44394577, -0.53406644, 0.8327801 , 0.51560533,\n", - " 1.273033 , 0.32302907, -0.7242595 , 1.4502484 , -1.918178 ,\n", - " 0.18540913, 0.02203418, 1.2570223 , 0.779727 , 0.64376956,\n", - " -0.98304224, 1.9677243 , 0.3062334 , 1.4187527 , 0.7121415 ,\n", - " -0.24123847, 0.06877614, -0.9489165 , -1.7127383 , -0.68183535,\n", - " 1.1871426 , -0.05245163, -0.889681 , -0.10291553, -0.48856598,\n", - " -0.42148122, -0.2811011 , 1.124782 , -0.16637069, -0.23592445,\n", - " 0.17452964, -0.9590089 , -0.239284 , -0.7332347 , -0.5400953 ,\n", - " -0.7519743 , 1.5516475 , -0.68347657, 0.4984031 , 1.5010489 ,\n", - " 0.6671566 , -0.83567864, 0.66306823, -0.58128446, 0.49536583,\n", - " 0.46563637, -1.2337325 , -0.0851446 , 1.5076903 , 0.5815354 ,\n", - " 0.27275312, -1.040551 , -1.2742573 , 0.86381525, -1.6726958 ,\n", - " 0.33581898, -0.30517903, 0.74605113, -0.12091698, 0.51321703,\n", - " 0.06733688, 0.643067 , -0.8515033 , 0.17619704, -0.07051513,\n", - " 1.6417184 , 0.43914312, 1.6886785 , 0.64881957, -1.0398052 ,\n", - " 0.5104308 , -0.54082274, -1.3020104 , -0.38667956, -0.8198859 ,\n", - " -0.5548473 , -0.2751418 , 0.20881599, -0.27896985, 0.24578105,\n", - " 0.22008903, -0.5134009 , -0.41392204], dtype=float32)" + "array([-0.00875475, 0.4296594 , -1.0579184 , 0.33055162, 0.75509065,\n", + " -1.2682275 , 0.40074062, 1.2643234 , 1.4407821 , -0.81765157,\n", + " 1.0556922 , -0.38197348, -0.50490004, 0.16666569, -0.8488423 ,\n", + " 0.6206088 , 1.0948801 , -1.4514679 , 1.6594619 , 1.393434 ,\n", + " 1.3103865 , 0.5871945 , -0.77169776, -0.77867335, 0.30685037,\n", + " -1.091643 , -0.02446085, -0.44908383, 0.5722308 , 1.1759523 ,\n", + " -0.8426078 , 0.14493963, 0.90350306, -0.5260758 , -0.45274788,\n", + " -0.93923473, 0.75078434, -1.1178484 , 0.5522108 , 0.16904452,\n", + " -0.47102752, -1.2308649 , -0.39195207, 1.0723114 , -0.15880573,\n", + " 0.36832523, -1.0887698 , -1.3357592 , 0.8934939 , -0.79218334,\n", + " -0.21843508, 0.17024872, 1.3688272 , 0.8668576 , -0.1617419 ,\n", + " -0.48149663, 0.46772346, 0.99565965, -0.7618725 , 0.1475042 ,\n", + " -0.6342002 , 0.33067462, 0.15164517, -1.3497765 , -1.3240782 ,\n", + " 0.76521456, -1.2219937 , -0.7931432 , 0.97855496, 0.7467413 ,\n", + " -0.02284818, -1.7751491 , 0.56792647, -0.12896144, -0.0579375 ,\n", + " -0.5185563 , -0.7565005 , -0.80555063, -1.1301348 , -0.11285232,\n", + " 0.3331379 , 0.14850451, -0.83839 , 1.4484423 , 0.28467232,\n", + " 0.9029052 , -0.7156413 , 0.93304956, -0.7816973 , -1.4596479 ,\n", + " 1.1668905 , -0.6932967 , -0.12366682, 0.7515199 , 0.35072482,\n", + " -0.8035156 , -0.19282989, -1.4523159 , 0.5372954 , -0.8343574 ,\n", + " 0.6229277 , -0.91178715, 1.2217942 , 0.09963001, 0.10299594,\n", + " 0.6492107 , -0.36633113, -1.0306698 , 1.1886839 , 0.43315023,\n", + " 0.8821403 , 1.0549035 , 0.78654313, 0.4184367 , 1.3331281 ,\n", + " 1.3401752 , -0.62402946, -0.45836085, -0.8325017 , -1.1248122 ,\n", + " -0.8744741 , -0.8500382 , 1.6569259 , -0.35547775, 0.10615328,\n", + " 1.6157944 , 0.43617558, -0.27654117], dtype=float32)" ] }, - "execution_count": 46, + "execution_count": 91, "metadata": {}, "output_type": "execute_result" } @@ -1081,17 +1056,17 @@ }, { "cell_type": "code", - "execution_count": 47, + "execution_count": 92, "id": "ed856315-b86b-48e1-a72b-2f0c812b891c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "2243446" + "2243885" ] }, - "execution_count": 47, + "execution_count": 92, "metadata": {}, "output_type": "execute_result" } @@ -1102,17 +1077,17 @@ }, { "cell_type": "code", - "execution_count": 48, + "execution_count": 93, "id": "d75969dd-b96c-4c3e-9e7d-ee793dcd4a14", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "'spotify:track:2XZ3bL3ROk605SPpy6Dn9C'" + "'spotify:track:4yHc5LZQBLi0H3PpvcM0S8'" ] }, - "execution_count": 48, + "execution_count": 93, "metadata": {}, "output_type": "execute_result" } @@ -1123,17 +1098,17 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 94, "id": "1ded397c-197d-42ad-873c-8ad452f09cb2", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "2243446" + "2243885" ] }, - "execution_count": 49, + "execution_count": 94, "metadata": {}, "output_type": "execute_result" } @@ -1160,7 +1135,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 95, "id": "692c6aa5-5e50-4283-bea5-924120c7ca84", "metadata": {}, "outputs": [], @@ -1187,7 +1162,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 96, "id": "173ec51a-a6c2-4a30-848e-788a854e484c", "metadata": {}, "outputs": [ @@ -1195,9 +1170,9 @@ "name": "stdout", "output_type": "stream", "text": [ - "INDEX_GCS_URI: gs://jt-tfrs-central-v2/8m-tfrs-v100-jtv15/run-20230125-205451/candidates-index-local\n", - "DESTINATION_BLOB_NAME: candidate_embs_local_20230130-180710.json\n", - "SOURCE_FILE_NAME: candidate_embs_local_20230130-180710.json\n" + "INDEX_GCS_URI: gs://ndr-v1-cpg-cdp-bucket/local-train-v2/run-20230925-234505/candidates-index-local\n", + "DESTINATION_BLOB_NAME: candidate_embs_local_20230926-021921.json\n", + "SOURCE_FILE_NAME: candidate_embs_local_20230926-021921.json\n" ] } ], @@ -1214,7 +1189,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 97, "id": "0340e8e7-c29f-4d91-b1d2-6a42e57f0c14", "metadata": {}, "outputs": [], @@ -1238,7 +1213,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 98, "id": "4eab8276-512d-4d93-93e7-dd160106548e", "metadata": {}, "outputs": [ @@ -1246,9 +1221,9 @@ "name": "stdout", "output_type": "stream", "text": [ - "count of track_uris_valid: 2262292\n", + "count of track_uris_valid: 2243885\n", "\n", - "ex: track_uris_valid[0]: spotify:track:6Nx4UYbpHuU4x5mozUDaQQ\n", + "ex: track_uris_valid[0]: spotify:track:4yHc5LZQBLi0H3PpvcM0S8\n", "\n", "length of a track_uris_valid: 36\n", "\n" @@ -1265,7 +1240,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 99, "id": "98deb670-cc10-418c-b77a-08ea5548bc7d", "metadata": {}, "outputs": [ @@ -1274,7 +1249,7 @@ "output_type": "stream", "text": [ "short: 0\n", - "normal: 2262292\n", + "normal: 2243885\n", "long: 0\n" ] } @@ -1308,15 +1283,15 @@ ], "metadata": { "environment": { - "kernel": "python3", - "name": "tf2-gpu.2-10.m103", + "kernel": "conda-env-tensorflow-tensorflow", + "name": "workbench-notebooks.m111", "type": "gcloud", - "uri": "gcr.io/deeplearning-platform-release/tf2-gpu.2-10:m103" + "uri": "gcr.io/deeplearning-platform-release/workbench-notebooks:m111" }, "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "TensorFlow 2-11", "language": "python", - "name": "python3" + "name": "conda-env-tensorflow-tensorflow" }, "language_info": { "codemirror_mode": { @@ -1328,7 +1303,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.12" + "version": "3.10.12" } }, "nbformat": 4,