From 68b6e184c1592524f3b577ecfc633d8f46e983c7 Mon Sep 17 00:00:00 2001 From: moveson Date: Wed, 27 Nov 2024 08:17:59 -0700 Subject: [PATCH] Better de-duplication of names in the ultrasignup historical facts transformer --- .../ultrasignup_historical_facts_strategy.rb | 15 +++++++++++++-- .../ultrasignup_historical_facts_strategy_spec.rb | 6 +++--- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/lib/etl/transformers/async/ultrasignup_historical_facts_strategy.rb b/lib/etl/transformers/async/ultrasignup_historical_facts_strategy.rb index 9a0d1d31e..717a5fc85 100644 --- a/lib/etl/transformers/async/ultrasignup_historical_facts_strategy.rb +++ b/lib/etl/transformers/async/ultrasignup_historical_facts_strategy.rb @@ -96,11 +96,14 @@ def record_emergency_contact(struct) end def record_previous_names(struct) - previous_names_array = [struct[:Previous_names_1], struct[:Previous_names_2]] + names_1 = struct[:Previous_names_1] + names_2 = struct[:Previous_names_2] + names_2 = nil if names_2.downcase.strip == names_1.downcase.strip + previous_names_array = [names_1, names_2] previous_names_array.each do |previous_names| if previous_names.present? - next if previous_names.downcase.strip.in? JUNK_VALUES + next if name_is_junk(previous_names) || name_is_identical(previous_names, struct) proto_record = base_proto_record.deep_dup proto_record[:kind] = :previous_name @@ -111,6 +114,14 @@ def record_previous_names(struct) end end + def name_is_junk(previous_names) + previous_names.downcase.strip.in? JUNK_VALUES + end + + def name_is_identical(previous_names, struct) + previous_names.downcase.strip == "#{struct[:First_Name]} #{struct[:Last_Name]}".downcase.strip + end + def record_ever_finished(struct) reported_ever_finished = struct[:Ever_finished] diff --git a/spec/lib/etl/transformers/async/ultrasignup_historical_facts_strategy_spec.rb b/spec/lib/etl/transformers/async/ultrasignup_historical_facts_strategy_spec.rb index 2f023d790..4d6b1863d 100644 --- a/spec/lib/etl/transformers/async/ultrasignup_historical_facts_strategy_spec.rb +++ b/spec/lib/etl/transformers/async/ultrasignup_historical_facts_strategy_spec.rb @@ -59,7 +59,7 @@ :Volunteer_description => "", :Ever_finished => "No", :Previous_names_1 => "David Conroy", - :Previous_names_2 => "", + :Previous_names_2 => "david conroy", :DNS_since_finish => 1, :Qualifier => "2023 AUG: Bigfoot 200", :Years_volunteered => 0 @@ -80,7 +80,7 @@ :emergency_phone => 33682847631, :Volunteer_description => "", :Ever_finished => "No", - :Previous_names_1 => "N/A", + :Previous_names_1 => "louis benoit", :Previous_names_2 => "", :DNS_since_finish => 0, :Qualifier => "2023 SEPT: Tor de Geants (Italy)", @@ -155,7 +155,7 @@ expect(emergency_contact_proto_records.map { |pr| pr[:comments] }).to match_array(["Carleen Paucek", "Françoise Benoit, 33682847631", "Promo, 27724373177"]) end - it "returns one proto_record for each provided previous name" do + it "returns one proto_record for each provided previous name, ignoring junk and identical names" do previous_name_proto_records = proto_records.select { |proto_record| proto_record.attributes[:kind] == :previous_name } expect(previous_name_proto_records.count).to eq(3) expect(previous_name_proto_records.map { |pr| pr[:comments] }).to match_array(["David Conroy", "Marie Antoinette", "Maria Sanjust"])