-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Performance improvements to duplicate_ip checking
When submitting a survey response, we store a hash of the request ip if available to detect repeated entries and potential abuse. IP addresses are hashed to protect privacy but checking hashes is slow, and becomes slower as the number of records grows. This is a fundamental limitation since we must store IP addresses with unique salts to protect privacy. To improve performance, we will now keep a maximum of 1000 hash records and also otherwise age them out after 1 week.
- Loading branch information
1 parent
29aa0d7
commit 1d9eef5
Showing
3 changed files
with
107 additions
and
69 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
--! Previous: sha1:6e14c22c9b93c63557203d4e25e5627d0ccf58f6 | ||
--! Hash: sha1:6084ad8c19dffb844ed6a49cab24caac6dc51841 | ||
|
||
-- Enter migration here | ||
alter table survey_response_network_addresses drop column if exists updated_at; | ||
alter table survey_response_network_addresses add column if not exists updated_at integer not null default extract(epoch from date_trunc('hour', now())) + (extract(minute FROM now())::int / 5) * 60 * 5; | ||
|
||
CREATE OR REPLACE FUNCTION public.before_survey_response_insert() RETURNS trigger | ||
LANGUAGE plpgsql SECURITY DEFINER | ||
AS $$ | ||
declare | ||
existing survey_response_network_addresses; | ||
begin | ||
if current_setting('session.request_ip', true) is not null then | ||
-- first, delete the oldest hashes of request IPs so that this | ||
-- process doesn't become too inefficient | ||
delete from | ||
survey_response_network_addresses | ||
where | ||
updated_at <= ( | ||
select | ||
greatest( | ||
-- Two weeks ago, | ||
(extract(epoch from date_trunc('hour', now())) + | ||
(extract(minute FROM now())::int / 5) * 60 * 5) - | ||
60 * 60 * 24 * 7, | ||
-- Or the thousandth most recent entry updated_at | ||
( | ||
select | ||
updated_at | ||
from | ||
survey_response_network_addresses | ||
order by | ||
updated_at desc | ||
limit 1 offset 1000 | ||
) | ||
) | ||
); | ||
update | ||
survey_response_network_addresses | ||
set | ||
num_responses = num_responses + 1, | ||
updated_at = extract(epoch from date_trunc('hour', now())) + (extract(minute FROM now())::int / 5) * 60 * 5 | ||
where | ||
ip_hash = crypt( | ||
current_setting('session.request_ip', true) || NEW.survey_id::text, | ||
ip_hash | ||
) | ||
returning | ||
* | ||
into | ||
existing; | ||
if existing is not null then | ||
NEW.is_duplicate_ip = true; | ||
else | ||
insert into survey_response_network_addresses ( | ||
survey_id, | ||
ip_hash | ||
) values ( | ||
NEW.survey_id, | ||
crypt( | ||
current_setting('session.request_ip', true) || NEW.survey_id::text, | ||
gen_salt('md5') | ||
) | ||
); | ||
end if; | ||
end if; | ||
return NEW; | ||
end; | ||
$$; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,67 +1 @@ | ||
-- Enter migration here | ||
alter table survey_response_network_addresses drop column if exists updated_at; | ||
alter table survey_response_network_addresses add column if not exists updated_at integer not null default extract(epoch from date_trunc('hour', now())) + (extract(minute FROM now())::int / 5) * 60 * 5; | ||
|
||
CREATE OR REPLACE FUNCTION public.before_survey_response_insert() RETURNS trigger | ||
LANGUAGE plpgsql SECURITY DEFINER | ||
AS $$ | ||
declare | ||
existing survey_response_network_addresses; | ||
begin | ||
if current_setting('session.request_ip', true) is not null then | ||
-- first, delete the oldest hashes of request IPs so that this | ||
-- process doesn't become too inefficient | ||
delete from | ||
survey_response_network_addresses | ||
where | ||
updated_at <= ( | ||
select | ||
greatest( | ||
-- Two weeks ago, | ||
(extract(epoch from date_trunc('hour', now())) + | ||
(extract(minute FROM now())::int / 5) * 60 * 5) - | ||
60 * 60 * 24 * 7, | ||
-- Or the thousandth most recent entry updated_at | ||
( | ||
select | ||
updated_at | ||
from | ||
survey_response_network_addresses | ||
order by | ||
updated_at desc | ||
limit 1 offset 1000 | ||
) | ||
) | ||
); | ||
update | ||
survey_response_network_addresses | ||
set | ||
num_responses = num_responses + 1, | ||
updated_at = extract(epoch from date_trunc('hour', now())) + (extract(minute FROM now())::int / 5) * 60 * 5 | ||
where | ||
ip_hash = crypt( | ||
current_setting('session.request_ip', true) || NEW.survey_id::text, | ||
ip_hash | ||
) | ||
returning | ||
* | ||
into | ||
existing; | ||
if existing is not null then | ||
NEW.is_duplicate_ip = true; | ||
else | ||
insert into survey_response_network_addresses ( | ||
survey_id, | ||
ip_hash | ||
) values ( | ||
NEW.survey_id, | ||
crypt( | ||
current_setting('session.request_ip', true) || NEW.survey_id::text, | ||
gen_salt('md5') | ||
) | ||
); | ||
end if; | ||
end if; | ||
return NEW; | ||
end; | ||
$$; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters