diff --git a/22-05-02-ObjectDetection/car-object-localization.ipynb b/22-05-02-ObjectDetection/car-object-localization.ipynb new file mode 100644 index 0000000..2611ac2 --- /dev/null +++ b/22-05-02-ObjectDetection/car-object-localization.ipynb @@ -0,0 +1,473 @@ +{ + "metadata": { + "kernelspec": { + "language": "python", + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.7.12", + "mimetype": "text/x-python", + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "pygments_lexer": "ipython3", + "nbconvert_exporter": "python", + "file_extension": ".py" + }, + "colab": { + "name": "car-object-localization.ipynb", + "provenance": [] + } + }, + "nbformat_minor": 0, + "nbformat": 4, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "Dataset at https://www.kaggle.com/datasets/sshikamaru/car-object-detection" + ], + "metadata": { + "id": "Ivr4vvvVMBjL" + } + }, + { + "cell_type": "code", + "source": [ + "import os\n", + "\n", + "import numpy as np \n", + "import pandas as pd \n", + "import tensorflow as tf\n", + "from tensorflow import keras\n", + "import matplotlib.pyplot as plt\n", + "import math\n", + "import cv2\n", + "from PIL import Image\n", + " \n", + "\n", + "print('Tensorflow Version:', tf.__version__)\n", + "print('Keras Version:', keras.__version__)" + ], + "metadata": { + "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5", + "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19", + "execution": { + "iopub.status.busy": "2022-05-02T16:41:07.442743Z", + "iopub.execute_input": "2022-05-02T16:41:07.443565Z", + "iopub.status.idle": "2022-05-02T16:41:12.557199Z", + "shell.execute_reply.started": "2022-05-02T16:41:07.443471Z", + "shell.execute_reply": "2022-05-02T16:41:12.556353Z" + }, + "trusted": true, + "id": "IV1MHcP9KZQC" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "tf.config.list_physical_devices()" + ], + "metadata": { + "execution": { + "iopub.status.busy": "2022-05-02T16:41:12.558879Z", + "iopub.execute_input": "2022-05-02T16:41:12.559080Z", + "iopub.status.idle": "2022-05-02T16:41:12.760391Z", + "shell.execute_reply.started": "2022-05-02T16:41:12.559051Z", + "shell.execute_reply": "2022-05-02T16:41:12.758182Z" + }, + "trusted": true, + "id": "p6BqhdxHKZQJ" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "image_root = '../input/car-object-detection/data/training_images'\n", + "BATCH_SIZE = 16\n", + "IMAGE_SIZE = (380, 676)" + ], + "metadata": { + "execution": { + "iopub.status.busy": "2022-05-02T16:41:12.762052Z", + "iopub.execute_input": "2022-05-02T16:41:12.762684Z", + "iopub.status.idle": "2022-05-02T16:41:12.771559Z", + "shell.execute_reply.started": "2022-05-02T16:41:12.762646Z", + "shell.execute_reply": "2022-05-02T16:41:12.770828Z" + }, + "trusted": true, + "id": "WaVnuUW5KZQK" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "df = pd.read_csv('../input/car-object-detection/data/train_solution_bounding_boxes (1).csv')\n", + "df.sample(4)" + ], + "metadata": { + "execution": { + "iopub.status.busy": "2022-05-02T16:41:12.775356Z", + "iopub.execute_input": "2022-05-02T16:41:12.775560Z", + "iopub.status.idle": "2022-05-02T16:41:12.811257Z", + "shell.execute_reply.started": "2022-05-02T16:41:12.775531Z", + "shell.execute_reply": "2022-05-02T16:41:12.810380Z" + }, + "trusted": true, + "id": "uUD52dPCKZQL" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "print('Path to an image ', os.path.join(image_root,'vid_4_6440.jpg'))" + ], + "metadata": { + "execution": { + "iopub.status.busy": "2022-05-02T16:41:12.812562Z", + "iopub.execute_input": "2022-05-02T16:41:12.813344Z", + "iopub.status.idle": "2022-05-02T16:41:12.819478Z", + "shell.execute_reply.started": "2022-05-02T16:41:12.813306Z", + "shell.execute_reply": "2022-05-02T16:41:12.818597Z" + }, + "trusted": true, + "id": "cyv2x2hiKZQM" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "def draw_box(img, xy):\n", + " x1, y1, x2, y2 = xy.astype('int16')\n", + " cv2.rectangle(img, (x1, y1), (x2, y2), (0, 0, 255), 5)\n", + " return img" + ], + "metadata": { + "execution": { + "iopub.status.busy": "2022-05-02T16:41:12.821204Z", + "iopub.execute_input": "2022-05-02T16:41:12.821930Z", + "iopub.status.idle": "2022-05-02T16:41:12.828740Z", + "shell.execute_reply.started": "2022-05-02T16:41:12.821887Z", + "shell.execute_reply": "2022-05-02T16:41:12.827825Z" + }, + "trusted": true, + "id": "5m71F9nfKZQN" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "plt.figure(figsize=(20, 12))\n", + "\n", + "for i, (index, row) in enumerate(df.head(4).iterrows()):\n", + " plt.subplot(2, 2, i+1)\n", + " \n", + " img = Image.open(os.path.join(image_root,row['image']))\n", + " img = np.asarray(img)\n", + " img = img/255\n", + " print(img.shape)\n", + " \n", + " img = draw_box(img, np.array([row['xmin'], row['ymin'], row['xmax'], row['ymax']]))\n", + " plt.imshow(img)" + ], + "metadata": { + "execution": { + "iopub.status.busy": "2022-05-02T16:41:12.830139Z", + "iopub.execute_input": "2022-05-02T16:41:12.830658Z", + "iopub.status.idle": "2022-05-02T16:41:14.268294Z", + "shell.execute_reply.started": "2022-05-02T16:41:12.830599Z", + "shell.execute_reply": "2022-05-02T16:41:14.266671Z" + }, + "trusted": true, + "id": "02hoIaNWKZQO" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# Creating DataSet" + ], + "metadata": { + "id": "kFe_w-KcKZQQ" + } + }, + { + "cell_type": "code", + "source": [ + "# step 1\n", + "filenames = tf.constant(df['image'].map(lambda x: os.path.join(image_root,x)))\n", + "labels = tf.constant(df[['xmin', 'ymin', 'xmax', 'ymax']])" + ], + "metadata": { + "execution": { + "iopub.status.busy": "2022-05-02T16:41:14.269264Z", + "iopub.execute_input": "2022-05-02T16:41:14.269473Z", + "iopub.status.idle": "2022-05-02T16:41:16.611974Z", + "shell.execute_reply.started": "2022-05-02T16:41:14.269445Z", + "shell.execute_reply": "2022-05-02T16:41:16.611265Z" + }, + "trusted": true, + "id": "3jPRAWSkKZQT" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "#step 2\n", + "dataset = tf.data.Dataset.from_tensor_slices((filenames, labels))" + ], + "metadata": { + "execution": { + "iopub.status.busy": "2022-05-02T16:41:16.613219Z", + "iopub.execute_input": "2022-05-02T16:41:16.613637Z", + "iopub.status.idle": "2022-05-02T16:41:16.626967Z", + "shell.execute_reply.started": "2022-05-02T16:41:16.613588Z", + "shell.execute_reply": "2022-05-02T16:41:16.626110Z" + }, + "trusted": true, + "id": "UB1wSaPGKZQV" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "def load_img(filename, label):\n", + " image_string = tf.io.read_file(filename)\n", + " image_decoded = tf.image.decode_jpeg(image_string, channels=3)\n", + " image = tf.cast(image_decoded, tf.float32)\n", + "\n", + " # Do preprocessing here\n", + " image = keras.applications.xception.preprocess_input(image)\n", + "\n", + " return image, label\n", + "\n", + "dataset = dataset.map(load_img).batch(BATCH_SIZE)" + ], + "metadata": { + "execution": { + "iopub.status.busy": "2022-05-02T16:41:16.670598Z", + "iopub.execute_input": "2022-05-02T16:41:16.671117Z", + "iopub.status.idle": "2022-05-02T16:41:16.761690Z", + "shell.execute_reply.started": "2022-05-02T16:41:16.671076Z", + "shell.execute_reply": "2022-05-02T16:41:16.760963Z" + }, + "trusted": true, + "id": "Ms6saBEfKZQX" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "plt.figure(figsize=(20, 12))\n", + "\n", + "\n", + "for x_batch, y_batch in dataset.take(1):\n", + " height = int(math.sqrt(BATCH_SIZE))\n", + " for i, (image, label) in enumerate(zip(x_batch, y_batch)):\n", + " plt.subplot(height, height, i+1)\n", + " \n", + " img = image.numpy() # [-1, 1]\n", + " img = img+1 # [0, 2]\n", + " img = img/2 # [0, 1]\n", + " \n", + " img = draw_box(img, np.array([*label]))\n", + " \n", + " plt.imshow(img)\n", + " plt.title(img.shape)\n", + "\n", + "plt.show()" + ], + "metadata": { + "execution": { + "iopub.status.busy": "2022-05-02T16:41:16.764568Z", + "iopub.execute_input": "2022-05-02T16:41:16.764788Z", + "iopub.status.idle": "2022-05-02T16:41:19.695653Z", + "shell.execute_reply.started": "2022-05-02T16:41:16.764764Z", + "shell.execute_reply": "2022-05-02T16:41:19.694576Z" + }, + "trusted": true, + "id": "IQ2XeB9OKZQY" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# Make and Train Model" + ], + "metadata": { + "id": "mhTV42XALeR4" + } + }, + { + "cell_type": "code", + "source": [ + "# for both classification and localization\n", + "\n", + "model = ???\n", + "\n", + "model.compile(loss=[\"sparse_categorical_crossentropy\", \"mse\"],loss_weights=[0.5, 0.5],\n", + " optimizer='adam', metrics=[tf.keras.metrics.MeanIoU(num_classes=n_classes)])" + ], + "metadata": { + "execution": { + "iopub.status.busy": "2022-05-02T16:41:19.697140Z", + "iopub.execute_input": "2022-05-02T16:41:19.697539Z", + "iopub.status.idle": "2022-05-02T16:41:21.746627Z", + "shell.execute_reply.started": "2022-05-02T16:41:19.697491Z", + "shell.execute_reply": "2022-05-02T16:41:21.745680Z" + }, + "trusted": true, + "id": "Pg4gKFDLKZQZ" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# for both only localization\n", + "\n", + "model = ???\n", + "\n", + "model.compile(loss=[\"mse\"],\n", + " optimizer='adam', metrics=[tf.keras.metrics.MeanIoU(num_classes=n_classes))" + ], + "metadata": { + "execution": { + "iopub.status.busy": "2022-05-02T16:41:21.748312Z", + "iopub.execute_input": "2022-05-02T16:41:21.748637Z", + "iopub.status.idle": "2022-05-02T16:41:22.874629Z", + "shell.execute_reply.started": "2022-05-02T16:41:21.748577Z", + "shell.execute_reply": "2022-05-02T16:41:22.873900Z" + }, + "trusted": true, + "id": "NY-oRnG6KZQa" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "val_dataset = dataset.take(4)\n", + "train_dataset = dataset.skip(4)" + ], + "metadata": { + "execution": { + "iopub.status.busy": "2022-05-02T16:42:00.115569Z", + "iopub.execute_input": "2022-05-02T16:42:00.116104Z", + "iopub.status.idle": "2022-05-02T16:42:00.123299Z", + "shell.execute_reply.started": "2022-05-02T16:42:00.116072Z", + "shell.execute_reply": "2022-05-02T16:42:00.122460Z" + }, + "trusted": true, + "id": "L4zqXW8LKZQc" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "" + ], + "metadata": { + "execution": { + "iopub.status.busy": "2022-05-02T16:42:00.692573Z", + "iopub.execute_input": "2022-05-02T16:42:00.693045Z", + "iopub.status.idle": "2022-05-02T16:43:30.255583Z", + "shell.execute_reply.started": "2022-05-02T16:42:00.693008Z", + "shell.execute_reply": "2022-05-02T16:43:30.254457Z" + }, + "trusted": true, + "id": "tLOuAYRAKZQc" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# See Evaluation Result" + ], + "metadata": { + "id": "AeJnoiw9LlqC" + } + }, + { + "cell_type": "code", + "source": [ + "plt.figure(figsize=(15, 8))\n", + "\n", + "for x_batch, y_batch in val_dataset.take(3):\n", + " for i in range(6):\n", + " plt.subplot(2, 3, i+1)\n", + " \n", + " image = x_batch[i]\n", + " img = image.numpy()\n", + " label = y_batch[i]\n", + " \n", + " img = img+1\n", + " img = img/2\n", + " xy = model(image[tf.newaxis, :]).numpy()[0]\n", + " plt.title(\"{}\".format(xy))\n", + " \n", + " img = draw_box(img, xy)\n", + " \n", + " plt.imshow(img)\n", + " \n", + "\n", + "plt.show()" + ], + "metadata": { + "execution": { + "iopub.status.busy": "2022-05-02T16:44:38.794047Z", + "iopub.execute_input": "2022-05-02T16:44:38.794298Z", + "iopub.status.idle": "2022-05-02T16:44:41.276729Z", + "shell.execute_reply.started": "2022-05-02T16:44:38.794271Z", + "shell.execute_reply": "2022-05-02T16:44:41.275959Z" + }, + "trusted": true, + "id": "hEJih5kOKZQe" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "" + ], + "metadata": { + "id": "dA_saASoKZQf" + }, + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file