Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix slow query caused by GET /articles #414

Merged
merged 3 commits into from
Nov 16, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions apps/core/migrations/0057_alter_article_name_type_and_more.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Generated by Django 4.2.3 on 2023-09-21 08:27

from django.db import migrations, models

import apps.core.models.board


class Migration(migrations.Migration):
dependencies = [
("core", "0056_alter_article_comment_count_alter_article_hit_count_and_more"),
]

operations = [
migrations.AlterField(
model_name="article",
name="name_type",
field=models.PositiveSmallIntegerField(
db_index=True,
default=apps.core.models.board.NameType["REGULAR"],
verbose_name="익명 혹은 실명 여부",
),
),
migrations.AddIndex(
model_name="article",
index=models.Index(
fields=["created_at", "parent_board_id"],
name="created_at_parent_board_id_idx",
),
),
]
10 changes: 9 additions & 1 deletion apps/core/models/article.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
)

from .block import Block
from .board import NameType, BoardAccessPermissionType
from .board import BoardAccessPermissionType, NameType
from .comment import Comment
from .communication_article import SchoolResponseStatus
from .report import Report
Expand Down Expand Up @@ -52,6 +52,7 @@ class Article(MetaDataModel):
name_type = models.PositiveSmallIntegerField(
verbose_name="익명 혹은 실명 여부",
default=NameType.REGULAR,
db_index=True,
)
is_content_sexual = models.BooleanField(
verbose_name="성인/음란성 내용",
Expand Down Expand Up @@ -157,6 +158,13 @@ class Meta(MetaDataModel.Meta):
verbose_name = "게시물"
verbose_name_plural = "게시물 목록"

indexes = [
models.Index(
fields=["created_at", "parent_board_id"],
name="created_at_parent_board_id_idx",
)
]

Comment on lines +161 to +167
Copy link
Contributor Author

@retroinspect retroinspect Nov 2, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Composite index of (created_at, parent_board_id) is needed
MySQL order by optimization doc

In this query, key_part1 is constant, so all rows accessed through the index are in key_part2 order, and an index on (key_part1, key_part2) avoids sorting if the WHERE clause is selective enough to make an index range scan cheaper than a table scan:

SELECT * FROM t1
  WHERE key_part1 = constant
  ORDER BY key_part2;

def __str__(self):
return self.title

Expand Down
86 changes: 59 additions & 27 deletions apps/core/views/viewsets/article.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import time

from django.core.paginator import Paginator as DjangoPaginator
from django.db import models
from django.http import Http404
from django.utils.functional import cached_property
from django.utils.translation import gettext
from rest_framework import (
decorators,
Expand Down Expand Up @@ -78,40 +80,70 @@ class ArticleViewSet(viewsets.ModelViewSet, ActionAPIViewSet):
),
}

# Override list action for further optimization
def list(self, request):
queryset = self.filter_queryset(self.get_queryset())

created_by = self.request.query_params.get("created_by")
if created_by and int(created_by) != self.request.user.id:
# exclude someone's anonymous or realname article in one's profile
exclude_list = [NameType.ANONYMOUS, NameType.REALNAME]
queryset = queryset.exclude(name_type__in=exclude_list)

count = (
queryset.count()
- queryset.filter(
created_by__id__in=self.request.user.block_set.values("user"),
name_type=NameType.ANONYMOUS,
).count()
)

# exclude article written by blocked users in anonymous board
queryset = queryset.exclude(
created_by__id__in=self.request.user.block_set.values("user"),
name_type=NameType.ANONYMOUS,
)
Comment on lines +94 to +106
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Blocked user filtering을 한 번만 하게 바꾸는 게 어떤가요?

Suggested change
count = (
queryset.count()
- queryset.filter(
created_by__id__in=self.request.user.block_set.values("user"),
name_type=NameType.ANONYMOUS,
).count()
)
# exclude article written by blocked users in anonymous board
queryset = queryset.exclude(
created_by__id__in=self.request.user.block_set.values("user"),
name_type=NameType.ANONYMOUS,
)
# exclude article written by blocked users in anonymous board
queryset = queryset.exclude(
created_by__id__in=self.request.user.block_set.values("user"),
name_type=NameType.ANONYMOUS,
)
count = queryset.count()

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  • count = queryset.count() 로 하면 현재와 동일한 쿼리를 사용하게 됨 (다음 문제의 쿼리)
SELECT COUNT(*) AS `__count`
FROM `core_article`
WHERE (
  `core_article`.`deleted_at` = '0001-01-01 00:00:00.000000' AND NOT (`core_article`.`created_by_id` IN (
    SELECT U0.`user_id`
    FROM `core_block` U0
    WHERE (
      U0.`deleted_at` = '0001-01-01 00:00:00.000000' AND U0.`blocked_by_id` = %s
    )
  ) AND `core_article`.`name_type` = %s)
)
  • 위 쿼리가 느린 이유는 NOT IN 절로 예상됨
  • 위 쿼리를 유지한 상태에서 속도를 개선하는 방법은 찾지 못했으며, 코드상의 쿼리처럼 둘로 나누어 처리하면 쿼리가 Slow Query로 잡히지 않을 정도로 빨라지는 것을 확인함


queryset = queryset.prefetch_related(
"attachments",
"communication_article",
)

# optimizing queryset for list action
queryset = queryset.select_related(
"created_by",
"created_by__profile",
"parent_topic",
"parent_board",
"parent_board__group",
).prefetch_related(
ArticleReadLog.prefetch_my_article_read_log(self.request.user),
)

class Paginator(DjangoPaginator):
@cached_property
def count(self):
return count

# Originated from list function of rest_framework.mixins.ListModelMixin
page = self.paginator.paginate_queryset(
queryset, request, paginator_class=Paginator
)
if page is not None:
serializer = self.get_serializer(page, many=True)
return self.get_paginated_response(serializer.data)

serializer = self.get_serializer(queryset, many=True)
return Response(serializer.data)

def filter_queryset(self, queryset):
queryset = super().filter_queryset(queryset)

if self.action == "destroy":
pass

elif self.action == "list":
created_by = self.request.query_params.get("created_by")
if created_by and int(created_by) != self.request.user.id:
# exclude someone's anonymous or realname article in one's profile
exclude_list = [NameType.ANONYMOUS, NameType.REALNAME]
queryset = queryset.exclude(name_type__in=exclude_list)

# exclude article written by blocked users in anonymous board
queryset = queryset.exclude(
created_by__id__in=self.request.user.block_set.values("user"),
name_type=NameType.ANONYMOUS,
)

queryset = queryset.prefetch_related(
"attachments",
"communication_article",
)

# optimizing queryset for list action
queryset = queryset.select_related(
"created_by",
"created_by__profile",
"parent_topic",
"parent_board",
"parent_board__group",
).prefetch_related(
ArticleReadLog.prefetch_my_article_read_log(self.request.user),
)
pass

# optimizing queryset for create, update, retrieve actions
else:
Expand Down
29 changes: 29 additions & 0 deletions ara/classes/pagination.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,40 @@
from collections import OrderedDict

from django.core.paginator import InvalidPage
from rest_framework import pagination, response
from rest_framework.exceptions import NotFound


class PageNumberPagination(pagination.PageNumberPagination):
page_size_query_param = "page_size"

# Originated from rest_framework.pagination.PageNumberPagination
def paginate_queryset(self, queryset, request, view=None, paginator_class=None):
page_size = self.get_page_size(request)
if not page_size:
return None

paginator_class = (
paginator_class if paginator_class else self.django_paginator_class
)
paginator = paginator_class(queryset, page_size)
page_number = self.get_page_number(request, paginator)

try:
self.page = paginator.page(page_number)
except InvalidPage as exc:
msg = self.invalid_page_message.format(
page_number=page_number, message=str(exc)
)
raise NotFound(msg)

if paginator.num_pages > 1 and self.template is not None:
# The browsable API should display pagination controls.
self.display_page_controls = True

self.request = request
return list(self.page)

def get_paginated_response(self, data):
return response.Response(
OrderedDict(
Expand Down
Loading