From b90d74e0bb8aa9b03062bbc1baba31f03ced9b60 Mon Sep 17 00:00:00 2001 From: Dev Aggarwal Date: Mon, 16 Oct 2023 20:07:44 +0530 Subject: [PATCH] use python-user-agents to parse user agent string if https://iplist.cc/api/ fails, the redirect will fail too, hence do this call in a celery task. set enable_analytics - default=True add visitor summary to shortened url admin --- bots/admin.py | 65 ++-------- gooeysite/custom_filters.py | 84 +++++++++++++ poetry.lock | 53 +++++--- pyproject.toml | 3 + url_shortener/admin.py | 114 ++++++++++-------- ...edurl_enable_analytics_visitorclickinfo.py | 67 ++++++++++ ...hortenedurl_use_analytics_clickanalytic.py | 110 ----------------- url_shortener/models.py | 48 ++------ url_shortener/routers.py | 69 +---------- url_shortener/tasks.py | 35 ++++++ 10 files changed, 315 insertions(+), 333 deletions(-) create mode 100644 gooeysite/custom_filters.py create mode 100644 url_shortener/migrations/0003_shortenedurl_enable_analytics_visitorclickinfo.py delete mode 100644 url_shortener/migrations/0003_shortenedurl_use_analytics_clickanalytic.py create mode 100644 url_shortener/tasks.py diff --git a/bots/admin.py b/bots/admin.py index 4872b7bd1..32bac880b 100644 --- a/bots/admin.py +++ b/bots/admin.py @@ -1,19 +1,15 @@ import datetime -import json import django.db.models from django import forms from django.conf import settings from django.contrib import admin -from django.db import DataError -from django.db.models import Max, Count, F, Func +from django.db.models import Max, Count, F from django.http import HttpResponse from django.template import loader -from django.urls import reverse from django.utils import dateformat from django.utils.safestring import mark_safe from django.utils.timesince import timesince -from furl import furl from bots.admin_links import list_related_html_url, open_in_new_tab, change_obj_url from bots.models import ( @@ -27,6 +23,9 @@ BotIntegration, ) from bots.tasks import create_personal_channels_for_all_members +from gooeysite.custom_filters import ( + related_json_field_summary, +) from gooeysite.custom_widgets import JSONEditorWidget @@ -221,55 +220,13 @@ def view_analysis_results(self, bi: BotIntegration): ).exclude( analysis_result={}, ) - max_depth = 3 - field = "analysis_result" - nested_keys = [field] - for i in range(max_depth): - next_keys = [] - for parent in nested_keys: - try: - next_keys.extend( - f"{parent}__{child}" - for child in ( - msgs.values(parent) - .annotate( - keys=Func(F(parent), function="jsonb_object_keys") - ) - .order_by() - .distinct() - .values_list("keys", flat=True) - ) - ) - except DataError: - next_keys.append(parent) - nested_keys = next_keys - results = { - key.split(field + "__")[-1]: [ - ( - json.dumps(val).strip('"'), - count, - furl( - reverse( - f"admin:{Message._meta.app_label}_{Message.__name__.lower()}_changelist" - ), - query_params={ - f"conversation__bot_integration__id__exact": bi.id, - key: val, - }, - ), - ) - for val, count in ( - msgs.values(key) - .annotate(count=Count("id")) - .order_by("-count") - .values_list(key, "count") - ) - if val is not None - ] - for key in nested_keys - } - if not results: - raise Message.DoesNotExist + results = related_json_field_summary( + Message.objects, + "analysis_result", + qs=msgs, + query_param="conversation__bot_integration__id__exact", + instance_id=bi.id, + ) html = loader.render_to_string( "anaylsis_result.html", context=dict(results=results) ) diff --git a/gooeysite/custom_filters.py b/gooeysite/custom_filters.py new file mode 100644 index 000000000..8a14090a9 --- /dev/null +++ b/gooeysite/custom_filters.py @@ -0,0 +1,84 @@ +import json + +from django.db import DataError +from django.db.models import F, Func, QuerySet, Count +from django.urls import reverse +from furl import furl + + +def json_field_nested_lookup_keys( + qs: QuerySet, field: str, max_depth: int = 3 +) -> list[str]: + nested_keys = [field] + for _ in range(max_depth): + next_keys = [] + for parent in nested_keys: + try: + next_keys.extend( + f"{parent}__{child}" + for child in ( + qs.values(parent) + .annotate(keys=Func(F(parent), function="jsonb_object_keys")) + .order_by() + .distinct() + .values_list("keys", flat=True) + ) + ) + except DataError: + next_keys.append(parent) + nested_keys = next_keys + return nested_keys + + +def related_json_field_summary( + manager, field, qs=None, query_param=None, instance_id=None +): + if query_param is None: + try: + query_field_name = manager.field.name + except AttributeError: + query_field_name = manager.query_field_name + query_param = f"{query_field_name}__id__exact" + + model = manager.model + meta = model._meta + + if instance_id is None: + instance_id = manager.instance.id + if instance_id is None: + raise model.DoesNotExist + instance_id = str(instance_id) + + if qs is None: + qs = manager.all() + + nested_keys = json_field_nested_lookup_keys(qs, field) + + results = { + key.split(field + "__")[-1]: [ + ( + json.dumps(val).strip('"'), + count, + furl( + reverse( + f"admin:{meta.app_label}_{model.__name__.lower()}_changelist" + ), + query_params={ + query_param: instance_id, + field: json.dumps([key, val]), + }, + ), + ) + for val, count in ( + qs.values(key) + .annotate(count=Count("id")) + .order_by("-count") + .values_list(key, "count") + ) + if val is not None + ] + for key in nested_keys + } + if not results: + raise model.DoesNotExist + return results diff --git a/poetry.lock b/poetry.lock index 32a70bdde..b6f230fcc 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand. [[package]] name = "absl-py" @@ -1483,11 +1483,8 @@ files = [ [package.dependencies] google-auth = ">=2.14.1,<3.0.dev0" googleapis-common-protos = ">=1.56.2,<2.0.dev0" -grpcio = [ - {version = ">=1.33.2,<2.0dev", optional = true, markers = "extra == \"grpc\""}, - {version = ">=1.49.1,<2.0dev", optional = true, markers = "python_version >= \"3.11\" and extra == \"grpc\""}, -] -grpcio-status = {version = ">=1.33.2,<2.0.dev0", optional = true, markers = "extra == \"grpc\""} +grpcio = {version = ">=1.33.2,<2.0dev", optional = true, markers = "python_version < \"3.11\" and extra == \"grpc\""} +grpcio-status = {version = ">=1.33.2,<2.0.dev0", optional = true, markers = "python_version < \"3.11\" and extra == \"grpc\""} protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0.dev0" requests = ">=2.18.0,<3.0.0.dev0" @@ -1604,8 +1601,8 @@ files = [ google-api-core = {version = ">=1.34.0,<2.0.dev0 || >=2.11.dev0,<3.0.0dev", extras = ["grpc"]} google-cloud-core = ">=1.4.1,<3.0.0dev" proto-plus = [ - {version = ">=1.22.0,<2.0.0dev", markers = "python_version < \"3.11\""}, {version = ">=1.22.2,<2.0.0dev", markers = "python_version >= \"3.11\""}, + {version = ">=1.22.0,<2.0.0dev", markers = "python_version < \"3.11\""}, ] protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0dev" @@ -1623,8 +1620,8 @@ files = [ [package.dependencies] google-api-core = {version = ">=1.34.0,<2.0.dev0 || >=2.11.dev0,<3.0.0dev", extras = ["grpc"]} proto-plus = [ - {version = ">=1.22.0,<2.0.0dev", markers = "python_version < \"3.11\""}, {version = ">=1.22.2,<2.0.0dev", markers = "python_version >= \"3.11\""}, + {version = ">=1.22.0,<2.0.0dev", markers = "python_version < \"3.11\""}, ] protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0dev" @@ -1663,8 +1660,8 @@ files = [ [package.dependencies] google-api-core = {version = ">=1.34.0,<2.0.dev0 || >=2.11.dev0,<3.0.0dev", extras = ["grpc"]} proto-plus = [ - {version = ">=1.22.0,<2.0.0dev", markers = "python_version < \"3.11\""}, {version = ">=1.22.2,<2.0.0dev", markers = "python_version >= \"3.11\""}, + {version = ">=1.22.0,<2.0.0dev", markers = "python_version < \"3.11\""}, ] protobuf = ">=3.19.5,<3.20.0 || >3.20.0,<3.20.1 || >3.20.1,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4.21.4 || >4.21.4,<4.21.5 || >4.21.5,<5.0.0dev" @@ -3395,12 +3392,9 @@ files = [ [package.dependencies] numpy = [ - {version = ">=1.21.2", markers = "python_version >= \"3.10\""}, - {version = ">=1.21.4", markers = "python_version >= \"3.10\" and platform_system == \"Darwin\""}, {version = ">=1.23.5", markers = "python_version >= \"3.11\""}, - {version = ">=1.19.3", markers = "python_version >= \"3.6\" and platform_system == \"Linux\" and platform_machine == \"aarch64\" or python_version >= \"3.9\""}, - {version = ">=1.17.0", markers = "python_version >= \"3.7\""}, - {version = ">=1.17.3", markers = "python_version >= \"3.8\""}, + {version = ">=1.21.4", markers = "python_version >= \"3.10\" and platform_system == \"Darwin\" and python_version < \"3.11\""}, + {version = ">=1.21.2", markers = "platform_system != \"Darwin\" and python_version >= \"3.10\" and python_version < \"3.11\""}, ] [[package]] @@ -3458,8 +3452,8 @@ files = [ [package.dependencies] numpy = [ - {version = ">=1.22.4", markers = "python_version < \"3.11\""}, {version = ">=1.23.2", markers = "python_version >= \"3.11\""}, + {version = ">=1.22.4", markers = "python_version < \"3.11\""}, ] python-dateutil = ">=2.8.2" pytz = ">=2020.1" @@ -5204,7 +5198,7 @@ files = [ ] [package.dependencies] -greenlet = {version = "!=0.4.17", markers = "python_version >= \"3\" and (platform_machine == \"win32\" or platform_machine == \"WIN32\" or platform_machine == \"AMD64\" or platform_machine == \"amd64\" or platform_machine == \"x86_64\" or platform_machine == \"ppc64le\" or platform_machine == \"aarch64\")"} +greenlet = {version = "!=0.4.17", markers = "python_version >= \"3\" and (platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\")"} [package.extras] aiomysql = ["aiomysql", "greenlet (!=0.4.17)"] @@ -5672,6 +5666,17 @@ tzdata = {version = "*", markers = "platform_system == \"Windows\""} [package.extras] devenv = ["black", "check-manifest", "flake8", "pyroma", "pytest (>=4.3)", "pytest-cov", "pytest-mock (>=3.3)", "zest.releaser"] +[[package]] +name = "ua-parser" +version = "0.18.0" +description = "Python port of Browserscope's user agent parser" +optional = false +python-versions = "*" +files = [ + {file = "ua-parser-0.18.0.tar.gz", hash = "sha256:db51f1b59bfaa82ed9e2a1d99a54d3e4153dddf99ac1435d51828165422e624e"}, + {file = "ua_parser-0.18.0-py2.py3-none-any.whl", hash = "sha256:9d94ac3a80bcb0166823956a779186c746b50ea4c9fd9bf30fdb758553c38950"}, +] + [[package]] name = "uritemplate" version = "4.1.1" @@ -5699,6 +5704,20 @@ brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)", "brotlipy (>=0.6.0)"] secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"] socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] +[[package]] +name = "user-agents" +version = "2.2.0" +description = "A library to identify devices (phones, tablets) and their capabilities by parsing browser user agent strings." +optional = false +python-versions = "*" +files = [ + {file = "user-agents-2.2.0.tar.gz", hash = "sha256:d36d25178db65308d1458c5fa4ab39c9b2619377010130329f3955e7626ead26"}, + {file = "user_agents-2.2.0-py3-none-any.whl", hash = "sha256:a98c4dc72ecbc64812c4534108806fb0a0b3a11ec3fd1eafe807cee5b0a942e7"}, +] + +[package.dependencies] +ua-parser = ">=0.10.0" + [[package]] name = "uvicorn" version = "0.18.3" @@ -6129,4 +6148,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = ">=3.10,<3.13" -content-hash = "e1aa198ba112e95195815327669b1e7687428ab0510c5485f057442a207b982e" +content-hash = "8e391502b9e6c55c8a277dda67cc455784d1bc85c7bfbeaeb9f90b69129a86b1" diff --git a/pyproject.toml b/pyproject.toml index 3ea6240be..69f346e34 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -75,6 +75,9 @@ tabulate = "^0.9.0" deepgram-sdk = "^2.11.0" scipy = "^1.11.2" rank-bm25 = "^0.2.2" +pyyaml = "^6.0.1" +ua-parser = "^0.18.0" +user-agents = "^2.2.0" [tool.poetry.group.dev.dependencies] watchdog = "^2.1.9" diff --git a/url_shortener/admin.py b/url_shortener/admin.py index f5dccf338..e51ec46d4 100644 --- a/url_shortener/admin.py +++ b/url_shortener/admin.py @@ -1,11 +1,18 @@ +import json + from django.contrib import admin +from django.template import loader +from django.utils.safestring import mark_safe +from app_users.admin import AppUserAdmin from bots.admin import SavedRunAdmin, export_to_csv, export_to_excel from bots.admin_links import list_related_html_url +from gooeysite.custom_filters import ( + json_field_nested_lookup_keys, + related_json_field_summary, +) from url_shortener import models -from app_users.admin import AppUserAdmin - @admin.register(models.ShortenedURL) class ShortenedURLAdmin(admin.ModelAdmin): @@ -28,7 +35,6 @@ class ShortenedURLAdmin(admin.ModelAdmin): "disabled", "created_at", "updated_at", - "use_analytics", ] readonly_fields = [ "clicks", @@ -36,7 +42,8 @@ class ShortenedURLAdmin(admin.ModelAdmin): "updated_at", "shortened_url", "get_saved_runs", - "get_click_analytics", + "view_visitors", + "view_visitor_summary", ] exclude = ["saved_runs"] ordering = ["created_at"] @@ -51,60 +58,67 @@ def get_max_clicks(self, obj): def get_saved_runs(self, obj: models.ShortenedURL): return list_related_html_url(obj.saved_runs, show_add=False) - @admin.display(description="Analytic Clicks") - def get_click_analytics(self, obj: models.ShortenedURL): - if not obj.use_analytics: - return [] - return list_related_html_url( - models.ClickAnalytic.objects.filter(shortened_url__pk=obj.pk), - query_param="shortened_url__id__exact", - instance_id=obj.pk, - show_add=False, - ) + @admin.display(description="Visitors") + def view_visitors(self, obj: models.ShortenedURL): + return list_related_html_url(obj.visitors, instance_id=obj.pk) + + @admin.display(description="Visitor Summary") + def view_visitor_summary(self, surl: models.ShortenedURL): + html = "" + for field in ["browser", "device", "os", "location_data"]: + results = related_json_field_summary(surl.visitors, field) + html += "

" + field.replace("_", " ").capitalize() + "

" + html += loader.render_to_string( + "anaylsis_result.html", context=dict(results=results) + ) + html = mark_safe(html) + return html + +def jsonfieldlistfilter(field: str): + class JSONFieldListFilter(admin.SimpleListFilter): + title = field + parameter_name = field -@admin.register(models.ClickAnalytic) -class ClickAnalyticAdmin(admin.ModelAdmin): + def lookups(self, request, model_admin): + qs = model_admin.model.objects.all() + lookups = json_field_nested_lookup_keys(qs, field) + return [ + ( + json.dumps([k, v]), + f'{k.split(field + "__")[-1]} = {v}', + ) + for k in lookups + for v in qs.values_list(k, flat=True).distinct() + ] + + def queryset(self, request, queryset): + val = self.value() + if val is None: + return queryset + k, v = json.loads(val) + return queryset.filter(**{k: v}) + + return JSONFieldListFilter + + +@admin.register(models.VisitorClickInfo) +class VisitorClickInfoAdmin(admin.ModelAdmin): list_filter = [ - "shortened_url__url", + jsonfieldlistfilter("browser"), + jsonfieldlistfilter("device"), + jsonfieldlistfilter("os"), + jsonfieldlistfilter("location_data"), "created_at", ] - search_fields = ( - ["ip_address"] - + [ - f"shortened_url__saved_runs__{field}" - for field in SavedRunAdmin.search_fields - ] - + [f"shortened_url__user__{field}" for field in AppUserAdmin.search_fields] - + [f"shortened_url__{field}" for field in ShortenedURLAdmin.search_fields] - ) + search_fields = ["ip_address", "user_agent", "location_data"] + [ + f"shortened_url__{field}" for field in ShortenedURLAdmin.search_fields + ] list_display = [ - "ip_address", - "platform", - "operating_system", - "device_model", - "country_name", - "city_name", - "created_at", - "location_data", + "__str__", "user_agent", - "get_saved_runs", - ] - readonly_fields = [ - "ip_address", - "platform", - "operating_system", - "device_model", - "country_name", - "city_name", - "created_at", "location_data", - "user_agent", + "created_at", ] - exclude = ["saved_runs"] ordering = ["created_at"] actions = [export_to_csv, export_to_excel] - - @admin.display(description="Saved Runs") - def get_saved_runs(self, obj: models.ClickAnalytic): - return list_related_html_url(obj.shortened_url.saved_runs, show_add=False) diff --git a/url_shortener/migrations/0003_shortenedurl_enable_analytics_visitorclickinfo.py b/url_shortener/migrations/0003_shortenedurl_enable_analytics_visitorclickinfo.py new file mode 100644 index 000000000..95d1331f9 --- /dev/null +++ b/url_shortener/migrations/0003_shortenedurl_enable_analytics_visitorclickinfo.py @@ -0,0 +1,67 @@ +# Generated by Django 4.2.5 on 2023-10-16 13:36 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + dependencies = [ + ("url_shortener", "0002_alter_shortenedurl_url"), + ] + + operations = [ + migrations.AddField( + model_name="shortenedurl", + name="enable_analytics", + field=models.BooleanField( + default=True, + help_text="Collect detailed analytics for this shortened url", + ), + ), + migrations.CreateModel( + name="VisitorClickInfo", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ( + "ip_address", + models.GenericIPAddressField( + help_text="The IP address of the user who clicked the shortened url" + ), + ), + ( + "user_agent", + models.TextField( + blank=True, + help_text="The user agent of the user who clicked the shortened url", + ), + ), + ("browser", models.JSONField(blank=True)), + ("device", models.JSONField(blank=True)), + ("os", models.JSONField(blank=True)), + ( + "location_data", + models.JSONField( + blank=True, + help_text="The location data of the user who clicked the shortened url", + ), + ), + ("created_at", models.DateTimeField(auto_now_add=True)), + ( + "shortened_url", + models.ForeignKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="visitors", + to="url_shortener.shortenedurl", + ), + ), + ], + ), + ] diff --git a/url_shortener/migrations/0003_shortenedurl_use_analytics_clickanalytic.py b/url_shortener/migrations/0003_shortenedurl_use_analytics_clickanalytic.py deleted file mode 100644 index ff101c606..000000000 --- a/url_shortener/migrations/0003_shortenedurl_use_analytics_clickanalytic.py +++ /dev/null @@ -1,110 +0,0 @@ -# Generated by Django 4.2.5 on 2023-10-02 08:20 - -from django.db import migrations, models -import django.db.models.deletion - - -class Migration(migrations.Migration): - dependencies = [ - ("url_shortener", "0002_alter_shortenedurl_url"), - ] - - operations = [ - migrations.AddField( - model_name="shortenedurl", - name="use_analytics", - field=models.BooleanField( - default=False, - help_text="Collect detailed analytics for this shortened url", - ), - ), - migrations.CreateModel( - name="ClickAnalytic", - fields=[ - ( - "id", - models.BigAutoField( - auto_created=True, - primary_key=True, - serialize=False, - verbose_name="ID", - ), - ), - ( - "ip_address", - models.GenericIPAddressField( - help_text="The IP address of the user who clicked the shortened url" - ), - ), - ( - "user_agent", - models.CharField( - blank=True, - help_text="The user agent of the user who clicked the shortened url", - max_length=512, - ), - ), - ( - "platform", - models.CharField( - blank=True, - help_text="The platform of the user who clicked the shortened url (mobile vs. desktop)", - max_length=128, - ), - ), - ( - "operating_system", - models.CharField( - blank=True, - help_text="The operating system of the user who clicked the shortened url", - max_length=128, - ), - ), - ( - "device_model", - models.CharField( - blank=True, - help_text="The device model of the user who clicked the shortened url", - max_length=128, - ), - ), - ( - "location_data", - models.JSONField( - blank=True, - help_text="The location data of the user who clicked the shortened url", - ), - ), - ( - "country_name", - models.CharField( - blank=True, - help_text="The country name of the user who clicked the shortened url", - max_length=128, - ), - ), - ( - "city_name", - models.CharField( - blank=True, - help_text="The city name of the user who clicked the shortened url", - max_length=128, - ), - ), - ("created_at", models.DateTimeField(auto_now_add=True)), - ( - "shortened_url", - models.ForeignKey( - on_delete=django.db.models.deletion.DO_NOTHING, - related_name="click_analytic", - to="url_shortener.shortenedurl", - ), - ), - ], - options={ - "verbose_name": "Click Analytic", - "ordering": ("-created_at",), - "get_latest_by": "created_at", - }, - ), - ] diff --git a/url_shortener/models.py b/url_shortener/models.py index 166aad968..46653baba 100644 --- a/url_shortener/models.py +++ b/url_shortener/models.py @@ -119,8 +119,8 @@ class ShortenedURL(models.Model): disabled = models.BooleanField( default=False, help_text="Disable this shortened url" ) - use_analytics = models.BooleanField( - default=False, help_text="Collect detailed analytics for this shortened url" + enable_analytics = models.BooleanField( + default=True, help_text="Collect detailed analytics for this shortened url" ) objects = ShortenedURLQuerySet.as_manager() @@ -139,56 +139,28 @@ def __str__(self): return self.shortened_url() + " -> " + self.url -class ClickAnalytic(models.Model): +class VisitorClickInfo(models.Model): shortened_url = models.ForeignKey( "url_shortener.ShortenedURL", - on_delete=models.DO_NOTHING, - related_name="click_analytic", + on_delete=models.CASCADE, + related_name="visitors", ) ip_address = models.GenericIPAddressField( help_text="The IP address of the user who clicked the shortened url" ) - user_agent = models.CharField( - max_length=512, + user_agent = models.TextField( blank=True, help_text="The user agent of the user who clicked the shortened url", ) - platform = models.CharField( - max_length=128, - blank=True, - help_text="The platform of the user who clicked the shortened url (mobile vs. desktop)", - ) - operating_system = models.CharField( - max_length=128, - blank=True, - help_text="The operating system of the user who clicked the shortened url", - ) - device_model = models.CharField( - max_length=128, - blank=True, - help_text="The device model of the user who clicked the shortened url", - ) + browser = models.JSONField(blank=True) + device = models.JSONField(blank=True) + os = models.JSONField(blank=True) location_data = models.JSONField( blank=True, help_text="The location data of the user who clicked the shortened url", ) - country_name = models.CharField( - max_length=128, - blank=True, - help_text="The country name of the user who clicked the shortened url", - ) - city_name = models.CharField( - max_length=128, - blank=True, - help_text="The city name of the user who clicked the shortened url", - ) created_at = models.DateTimeField(auto_now_add=True) - class Meta: - ordering = ("-created_at",) - get_latest_by = "created_at" - verbose_name = "Click Analytic" - def __str__(self): - return f"{self.ip_address} clicked on {self.shortened_url.shortened_url()} -> {self.shortened_url.url}" + return f"{self.ip_address} at {self.shortened_url.shortened_url()}" diff --git a/url_shortener/routers.py b/url_shortener/routers.py index d0baecef9..1513af65e 100644 --- a/url_shortener/routers.py +++ b/url_shortener/routers.py @@ -2,10 +2,9 @@ from fastapi import APIRouter, Request from fastapi.responses import RedirectResponse from fastapi.responses import Response -import re -import requests -from url_shortener.models import ShortenedURL, ClickAnalytic +from url_shortener.models import ShortenedURL +from url_shortener.tasks import save_click_info app = APIRouter() @@ -22,68 +21,10 @@ def url_shortener(hashid: str, request: Request): return Response(status_code=410, content="This link has expired") # increment the click count ShortenedURL.objects.filter(id=surl.id).update(clicks=F("clicks") + 1) - if surl.use_analytics: - ip_address = request.client.host # does not work for localhost or with nginx - user_agent = request.headers.get( - "user-agent", "" - ) # note all user agent info can be spoofed - platform = getPlatform(user_agent) - operating_system = getOperatingSystem(user_agent) - device_model = getAndroidDeviceModel(user_agent) - res = requests.get(f"https://iplist.cc/api/{ip_address}") - location_data = res.json() if res.ok else {} - # not all location data will always be available - country_name = location_data.get("countryname", "") - city_name = location_data.get("city", "") - ClickAnalytic.objects.create( - shortened_url=surl, - ip_address=ip_address, - user_agent=user_agent, - platform=platform, - operating_system=operating_system, - device_model=device_model, - location_data=location_data, - country_name=country_name, - city_name=city_name, + if surl.enable_analytics: + save_click_info.delay( + surl.id, request.client.host, request.headers.get("user-agent", "") ) return RedirectResponse( url=surl.url, status_code=303 # because youtu.be redirects are 303 ) - - -def getPlatform(user_agent: str): - devices = [ - "Android", - "webOS", - "iPhone", - "iPad", - "iPod", - "BlackBerry", - "IEMobile", - "Opera Mini", - ] - return "mobile" if any(device in user_agent for device in devices) else "desktop" - - -def getOperatingSystem(user_agent: str): - if "Win" in user_agent: - return "Windows" - elif "Mac" in user_agent: - return "MacOS" - elif "Linux" in user_agent: - return "Linux" - elif "Android" in user_agent: - return "Android" - elif "like Mac" in user_agent: - return "iOS" - else: - return "Other" - - -def getAndroidDeviceModel(user_agent: str): - regex = r"Android (\d+(?:\.\d+)*);" - matches = re.search(regex, user_agent) - if matches: - return matches.group(1) - else: - return "Other" diff --git a/url_shortener/tasks.py b/url_shortener/tasks.py new file mode 100644 index 000000000..f6c29a31b --- /dev/null +++ b/url_shortener/tasks.py @@ -0,0 +1,35 @@ +import requests +import user_agents +from furl import furl + +from celeryapp import app +from url_shortener.models import VisitorClickInfo + + +@app.task +def save_click_info(surl_id: int, ip_address: str, user_agent: str): + if user_agent: + ua_data = user_agents.parse(user_agent) + browser = ua_data.browser._asdict() + device = ua_data.device._asdict() + os = ua_data.os._asdict() + else: + browser = None + device = None + os = None + + res = requests.get(str(furl("https://iplist.cc/api/") / ip_address)) + if res.ok: + location_data = res.json() + else: + location_data = {} + + VisitorClickInfo.objects.create( + shortened_url_id=surl_id, + ip_address=ip_address, + user_agent=user_agent, + browser=browser, + device=device, + os=os, + location_data=location_data, + )