Skip to content

Commit

Permalink
improve sql query for tag denylist
Browse files Browse the repository at this point in the history
Unfortunately, excludeTags defined in the _config.yml
cannot be used by the SQL Queries defined on a per-page
basis, since the globs are dynamic in nature, as applied
by Jekyll, so these are not picked up by our
current jekyll-sqlite implementation for generating
the query bind params.

For now, we hardcode the sql deny list, but will file
an issue on the jekyll-sqlite side to see if I ca
improve it there.
  • Loading branch information
captn3m0 committed Sep 18, 2024
1 parent 58310f8 commit ec692f6
Show file tree
Hide file tree
Showing 6 changed files with 58 additions and 126 deletions.
145 changes: 29 additions & 116 deletions _config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,11 @@ defaults:
layout: events
permalink: /:path/:basename/
subscribe: true
# While this is available under the page
# this is dynamic, and not available for use
# in sql queries, or copied to external layouts
# TODO: Fix jekyll-sqlite and multi-page layouts to support this
excludeTags: '["BUSINESS", "LOW-QUALITY", "NOTINBLR", "DANDIYA", "WOOWOO"]'
layouts:
- layout: icalendar.ics
permalink: /:path/:basename/cal.ics
Expand All @@ -43,65 +48,37 @@ defaults:
- data: events
file: events.db
query: |
SELECT *
FROM events
WHERE EXISTS (
SELECT 1
FROM json_each(:tags) AS tags
WHERE tags.value IN (
SELECT value
FROM json_each(event_json->'$.keywords')
)
AND tags.value NOT IN ("WOOWOO", "LOW-QUALITY", "BUSINESS", "NOTINBLR", "DANDIYA")
WITH
allowlist_tags AS (
SELECT value FROM json_each(:tags)
),
denylist_tags AS (
SELECT value FROM json_each('["BUSINESS", "LOW-QUALITY", "NOTINBLR", "DANDIYA", "WOOWOO"]')
)
ORDER BY event_json -> '$.startDate'
;
- data: today
file: events.db
query: |
SELECT event_json, url, date(json_extract(event_json,'$.startDate')) as startDate, date(json_extract(event_json,'$.endDate')) as endDate
SELECT *
FROM events
WHERE EXISTS (
WHERE
-- Check if no denylist tags are present
NOT EXISTS (
SELECT 1
FROM json_each(:tags) AS tags
WHERE tags.value IN (
SELECT value
FROM json_each(event_json->'$.keywords')
FROM denylist_tags
WHERE denylist_tags.value IN (
SELECT value
FROM json_each(event_json->'$.keywords')
)
) AND (
(startDate >= date('now', 'localtime') AND endDate <= date('now', 'localtime'))
OR
(startDate <= date('now', 'localtime') AND endDate >= date('now', '+1 day', 'localtime'))
)
ORDER by startDate;
- data: tomorrow
file: events.db
query: |
SELECT event_json, url, date(json_extract(event_json,'$.startDate')) as startDate, date(json_extract(event_json,'$.endDate')) as endDate
FROM events
WHERE EXISTS (
)
AND
-- Check if at least one allowlist tag is present
EXISTS (
SELECT 1
FROM json_each(:tags) AS tags
WHERE tags.value IN (
SELECT value
FROM json_each(event_json->'$.keywords')
FROM allowlist_tags
WHERE allowlist_tags.value IN (
SELECT value
FROM json_each(event_json->'$.keywords')
)
-- Find all events either starting and ending tomorrow
-- or starting today or earlier, and end ending after tomorrow
) AND (
(startDate >= date('now', '+1 day', 'localtime') AND endDate <= date('now', '+1 day', 'localtime'))
OR
(startDate <= date('now', '+1 day', 'localtime') AND endDate >= date('now', '+2 day', 'localtime'))
)
ORDER by startDate
)
ORDER BY event_json -> '$.startDate';
sqlite:
- data: tags.events
file: events.db
query: |
SELECT event_json, event_json -> '$.keywords' as keywords from events
WHERE EXISTS (SELECT 1 FROM json_each(keywords) WHERE value = :id)
- data: geo_events
file: events.db
query: |
Expand All @@ -110,67 +87,3 @@ sqlite:
event_json->'$.location.geo.longitude' as lng,
event_json-> '$.location.name' as name
from events WHERE event_json -> '$.location.geo' IS NOT NULL
- data: events
file: events.db
query: |
SELECT substr(url, 9, instr(substr(url, 9), '/') - 1) AS domain,
url,
json_extract(event_json, '$.name') AS name,
json_extract(event_json, '$.description') AS description,
json_extract(event_json, '$.keywords') AS keywords,
json_extract(event_json, '$.startDate') AS startDate,
json_extract(event_json, '$.eventStatus') AS eventStatus,
json_extract(event_json, '$.location.name') AS location,
json_extract(event_json, '$.organizer.name') AS organizer
FROM events WHERE
(
(
keywords NOT LIKE '%karaoke%' AND name not like '%karaoke%' AND -- MUSIC
keywords NOT LIKE '%dj night%' AND name not like '%dj night%' AND -- MUSIC
keywords NOT LIKE '%bollywood night%' AND name not like '%bollywood night%' AND -- MUSIC
keywords NOT LIKE '%ladies night%' AND name not like '%ladies night%' AND -- MUSIC
keywords NOT LIKE '%ft. %' AND name not like '%ft. %' AND -- MUSIC
keywords NOT LIKE '%camping%' AND name not like '%camping%' AND -- TRIP
keywords NOT LIKE '%trek%' AND name not like '%trek%' AND -- TRIP
keywords NOT LIKE '%techno%' AND name not like '%techno%' AND -- MUSIC
description NOT LIKE '%karaoke%' AND -- MUSIC
keywords NOT LIKE '%tipsy%' AND name not like '%tipsy%' AND -- MUSIC
keywords NOT LIKE '%summer camp%' AND name not like '%summer camp%' AND -- KIDS
description NOT LIKE '%get sloshed%' AND -- DRINKING
description NOT LIKE '%magic mocktails%' AND -- DRINKING
name NOT LIKE '%jollywood%' AND -- ADS
name NOT LIKE '%thrifty x%' AND -- DATING
name NOT LIKE '% vs %' AND -- SPORTS
name NOT LIKE '% admissions %' AND -- ADS
keywords NOT LIKE '%parties%' AND -- MUSIC
name NOT LIKE '%deck gigs%' AND -- MUSIC
name NOT LIKE '%5 day program%' AND -- LONG
name NOT GLOB '*IPL*' AND -- SPORTS
name NOT GLOB '*GP*' AND -- SPORTS
name NOT GLOB '*F1*' AND -- SPORTS
name NOT GLOB '*T20*' AND -- SPORTS
name NOT GLOB '*RCB*' AND -- SPORTS
name NOT GLOB '*MI*' AND -- SPORTS
name NOT LIKE '%pet paw%' AND -- PETS
keywords NOT LIKE '%summer vacation%' AND -- KIDS
(organizer NOT LIKE 'Odyssey vibes' OR organizer IS NULL) AND -- TRIP
(organizer NOT LIKE '%Medicine%' OR organizer IS NULL) AND -- CONFERENCE
(location NOT LIKE 'Small World' OR location IS NULL) AND -- BADRATING
(organizer NOT LIKE '%sheena - banjara%' OR organizer IS NULL) AND -- TRIP
(location NOT LIKE '%HeyBrewty Wellness Studio%' OR location is NULL) AND -- WOOWOO
(location NOT LIKE '%blue butterfly%' OR location is NULL) AND -- MUSIC
(description NOT LIKE '%techno%') AND -- MUSIC
(name NOT LIKE '%DnBIndia presents%') AND -- MUSIC
-- Not sure about these events: https://insider.in/search?q=burgerman
-- (organizer NOT LIKE '%burgerman%' OR organizer IS NULL) AND
-- See https://insider.in/search?q=boche%20club
(organizer NOT LIKE '%brewcraft hospitality%' OR organizer IS NULL) AND -- MUSIC
description not LIKE '%happy hour%' -- MUSIC
)
OR (keywords IS NULL OR description IS NULL OR url LIKE '%sumukha%')
)
ORDER BY startDate ASC
3 changes: 3 additions & 0 deletions _includes/head.html
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,6 @@
<link rel="icon" type="image/png" sizes="32x32" href="/img/favicon-32x32.png" />
<link rel="icon" type="image/png" sizes="16x16" href="/img/favicon-16x16.png" />
<link rel="manifest" href="/assets/site.webmanifest" />
{% if page.noindex %}
<meta name="robots" content="noindex, nofollow" />
{% endif %}
1 change: 1 addition & 0 deletions _layouts/events.html
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
</script>

<noscript>
<h3>{{eventCount}} Events</h3>
<ul>
{% for e in page.events %} {% assign E =e.event_json | parse_json %}
<li>
Expand Down
8 changes: 7 additions & 1 deletion _plugins/icalendar.rb
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,13 @@ module Jekyll
module ToIcal
def to_ical(event_json)
ics = Icalendar::Event.new
event = JSON.parse event_json
begin
event = JSON.parse event_json
rescue StandardError => e
print("Could not parse")
print(event_json)
return ""
end
ics.color = COLOR_MAP[event['@type']]
ics.description = event['description']
# set geolocation if available in schema.org/Place
Expand Down
5 changes: 5 additions & 0 deletions _plugins/multi_layout.rb
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@ def create_layout_views(page)
dir = File.dirname(page.relative_path)
Page.new(page.site, page.site.source, dir, page.name).tap do |new_page|
new_page.data = page.data.clone
# Default configuration is not copied, but dynamically
# applied. We need to copy this somehow as well.
# new_page.data.default_proc = proc do |_, key|
# site.frontmatter_defaults.find(relative_path, type, key)
# end
new_page.data["layout"] = config["layout"]
new_page.data["permalink"] = config["permalink"]
end
Expand Down
22 changes: 13 additions & 9 deletions cal/unwanted.md
Original file line number Diff line number Diff line change
@@ -1,23 +1,27 @@
---
title: Unwanted Events
tags: '["LOW-QUALITY", "NOTINBLR", "WOOWOO", "DANDIYA", "BUSINESS"]'
subscribe: false
sqlite:
- data: events
file: events.db
query: |
WITH
allowlist_tags AS (
SELECT value FROM json_each(:tags)
)
SELECT *
FROM events
WHERE EXISTS (
SELECT 1
FROM json_each(:tags) AS tags
WHERE tags.value IN (
SELECT value
FROM json_each(event_json->'$.keywords')
)
)
ORDER BY event_json -> '$.startDate'
;
FROM allowlist_tags
WHERE allowlist_tags.value IN (
SELECT value
FROM json_each(event_json->'$.keywords')
))
ORDER BY event_json -> '$.startDate';
subscribe: false
# This is such a trash event page, that we don't want links here to impact our rankings
noindex: true
---
This is a meta calendar that lists events that have been excluded
from other calendars. This is primarily meant for debugging.
Expand Down

0 comments on commit ec692f6

Please sign in to comment.