Skip to content

Commit

Permalink
[batch] make batches query go brrrrrrr (#14629)
Browse files Browse the repository at this point in the history
This PR optimises the V2 batches query along with its associated
subqueries.
Listing batches for the CI user now executes in 1s were previously it
had taken ~14s.
Resolves #14599

---------

Co-authored-by: Chris Llanwarne <[email protected]>
  • Loading branch information
ehigham and cjllanwarne authored Jul 31, 2024
1 parent 50310c9 commit 21ad23a
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 38 deletions.
37 changes: 25 additions & 12 deletions batch/batch/front_end/query/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -455,10 +455,14 @@ def __init__(self, term: str):
self.term = term

def query(self) -> Tuple[str, List[str]]:
sql = """
((job_groups.batch_id, job_groups.job_group_id) IN
(SELECT batch_id, job_group_id FROM job_group_attributes
WHERE `key` = %s OR `value` = %s))
sql = """\
EXISTS (
SELECT NULL
FROM job_group_attributes AS attrs
WHERE attrs.batch_id = job_groups.batch_id
AND attrs.job_group_id = job_groups.job_group_id
AND (attrs.`key` = %s OR attrs.`value` = %s)
)
"""
return (sql, [self.term, self.term])

Expand All @@ -478,10 +482,14 @@ def __init__(self, term: str):
self.term = term

def query(self) -> Tuple[str, List[str]]:
sql = """
((job_groups.batch_id, job_groups.job_group_id) IN
(SELECT batch_id, job_group_id FROM job_group_attributes
WHERE `key` LIKE %s OR `value` LIKE %s))
sql = """\
EXISTS (
SELECT NULL
FROM job_group_attributes AS attrs
WHERE attrs.batch_id = job_groups.batch_id
AND attrs.job_group_id = job_groups.job_group_id
AND (attrs.`key` LIKE %s OR attrs.`value` LIKE %s)
)
"""
escaped_term = f'%{self.term}%'
return (sql, [escaped_term, escaped_term])
Expand All @@ -505,10 +513,15 @@ def query(self) -> Tuple[str, List[str]]:
value = self.value
if isinstance(self.operator, PartialMatchOperator):
value = f'%{value}%'
sql = f"""
((job_groups.batch_id, job_groups.job_group_id) IN
(SELECT batch_id, job_group_id FROM job_group_attributes
WHERE `key` = %s AND `value` {op} %s))
sql = f"""\
EXISTS (
SELECT NULL
FROM job_group_attributes AS attrs
WHERE attrs.batch_id = job_groups.batch_id
AND attrs.job_group_id = job_groups.job_group_id
AND attrs.`key` = %s
AND attrs.`value` {op} %s
)
"""
return (sql, [self.key, value])

Expand Down
58 changes: 32 additions & 26 deletions batch/batch/front_end/query/query_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,40 +125,46 @@ def parse_list_batches_query_v2(user: str, q: str, last_batch_id: Optional[int])
where_conditions.append(f'({cond})')
where_args += args

sql = f"""
SELECT batches.*,
cancelled_t.cancelled IS NOT NULL AS cancelled,
sql = f"""\
SELECT
batches.*,
cancelled_t.cancelled <=> 1 AS cancelled,
job_groups_n_jobs_in_complete_states.n_completed,
job_groups_n_jobs_in_complete_states.n_succeeded,
job_groups_n_jobs_in_complete_states.n_failed,
job_groups_n_jobs_in_complete_states.n_cancelled,
cost_t.cost, cost_t.cost_breakdown
FROM job_groups
LEFT JOIN batches ON batches.id = job_groups.batch_id
LEFT JOIN billing_projects ON batches.billing_project = billing_projects.name
LEFT JOIN job_groups_n_jobs_in_complete_states ON job_groups.batch_id = job_groups_n_jobs_in_complete_states.id AND job_groups.job_group_id = job_groups_n_jobs_in_complete_states.job_group_id
LEFT JOIN LATERAL (
SELECT 1 AS cancelled
FROM job_group_self_and_ancestors
INNER JOIN job_groups_cancelled
ON job_group_self_and_ancestors.batch_id = job_groups_cancelled.id AND
job_group_self_and_ancestors.ancestor_id = job_groups_cancelled.job_group_id
WHERE job_groups.batch_id = job_group_self_and_ancestors.batch_id AND
job_groups.job_group_id = job_group_self_and_ancestors.job_group_id
) AS cancelled_t ON TRUE
STRAIGHT_JOIN billing_project_users ON batches.billing_project = billing_project_users.billing_project
LEFT JOIN LATERAL (
SELECT COALESCE(SUM(`usage` * rate), 0) AS cost, JSON_OBJECTAGG(resources.resource, COALESCE(`usage` * rate, 0)) AS cost_breakdown
FROM (
SELECT resource_id, CAST(COALESCE(SUM(`usage`), 0) AS SIGNED) AS `usage`
cost_t.cost,
cost_t.cost_breakdown
FROM batches
INNER JOIN job_groups
ON batches.id = job_groups.batch_id
INNER JOIN billing_projects
ON batches.billing_project = billing_projects.name
INNER JOIN billing_project_users
ON batches.billing_project = billing_project_users.billing_project
INNER JOIN job_groups_n_jobs_in_complete_states
ON batches.id = job_groups_n_jobs_in_complete_states.id
AND job_groups.job_group_id = job_groups_n_jobs_in_complete_states.job_group_id
LEFT JOIN (SELECT *, 1 AS cancelled FROM job_groups_cancelled) AS cancelled_t
ON batches.id = cancelled_t.id
AND job_groups.job_group_id = cancelled_t.job_group_id
INNER JOIN LATERAL (
WITH resource_costs AS (
SELECT
resource_id,
CAST(COALESCE(SUM(`usage`), 0) AS SIGNED) AS `usage`
FROM aggregated_job_group_resources_v3
WHERE job_groups.batch_id = aggregated_job_group_resources_v3.batch_id AND job_groups.job_group_id = aggregated_job_group_resources_v3.job_group_id
WHERE batch_id = batches.id
GROUP BY resource_id
) AS usage_t
LEFT JOIN resources ON usage_t.resource_id = resources.resource_id
)
SELECT
COALESCE(SUM(`usage` * rate), 0) AS cost,
JSON_OBJECTAGG(resource, COALESCE(`usage` * rate, 0)) AS cost_breakdown
FROM resource_costs
INNER JOIN resources USING (resource_id)
) AS cost_t ON TRUE
WHERE {' AND '.join(where_conditions)}
ORDER BY job_groups.batch_id DESC
ORDER BY batches.id DESC
LIMIT 51;
"""

Expand Down

0 comments on commit 21ad23a

Please sign in to comment.