clean code and refactor chemical import and export classes and relate…

…d spec files
ComPlat · Sep 11, 2023 · 49f9c9a · 49f9c9a
1 parent 86a44f2
commit 49f9c9a
Show file tree

Hide file tree

Showing 6 changed files with 99 additions and 125 deletions.
diff --git a/app/api/helpers/report_helpers.rb b/app/api/helpers/report_helpers.rb
@@ -322,34 +322,6 @@ def build_sql_sample_chemicals(columns, c_id, ids, checked_all)
     SQL
   end
 
-  # inner join samples s on s_dl.s_id = s.id #{collection_join}
-  # order by #{order};
-  # ,#{columns[1][0]}
-  # c.chemical_data
-  #c."cas",
-  #  c."chemical_data"->0->'status' AS "status"
-  # SELECT json_build_object(c."id", c."sample_id", #{columns[1].join(',')
-  # FROM chemicals c
-  # WHERE c.sample_id = s.id
-  # ORDER BY c.id
-  # LIMIT 1
-
-  # SELECT
-  # s_id, ts, co_id, scu_id, shared_sync, pl, dl_s,
-  # s.molfile_version, s.decoupled, s.molecular_mass as "molecular mass (decoupled)", s.sum_formula as "sum formula (decoupled)"
-  # ,#{columns[0].join(',')},
-  # (
-  # SELECT array_to_json(array_agg(row_to_json(chemical)))
-  #   FROM (
-  #     SELECT #{columns[1].join(',')}
-  #     FROM (#{s_subquery}) AS s_dl
-  #     INNER JOIN chemicals c ON c.sample_id = s_dl.s_id
-  #   ) AS chemical
-  # ) AS chemicals
-  # FROM (#{s_subquery}) AS s_dl
-  # INNER JOIN samples s ON s_dl.s_id = s.id #{collection_join}
-  # ORDER BY #{order};
-
   def build_sql_sample_analyses(columns, c_id, ids, checkedAll = false)
     s_ids = [ids].flatten.join(',')
     u_ids = [user_ids].flatten.join(',')

diff --git a/lib/export/export_chemicals.rb b/lib/export/export_chemicals.rb
@@ -45,55 +45,60 @@ class ExportChemicals
       important_notes: ['c."chemical_data"->0->\'important_notes\'', '"important_notes"', nil],
     }.freeze
 
-    def self.format_chemical_amount(value)
-      amount_value_unit = JSON.parse(value).values
-      sorted = amount_value_unit.sort_by { |element| [element.is_a?(Integer) || element.is_a?(Float) ? 0 : 1, element] }
-      sorted.join
-    end
-
-    def self.format_columns_name(result, *indexes)
-      indexes.sort.reverse_each do |index|
-        result.columns[index] = result.columns[index].sub(/\s+\S+\z/, '')
+    def self.build_chemical_column_query(selection, sel)
+      chemical_selections = []
+      sel[:chemicals].each do |col|
+        query = CHEMICAL_QUERIES[col.to_sym]
+        chemical_selections << ("#{query[2]} as #{query[1]}") if SAFETY_SHEET_INFO.include?(col)
+        chemical_selections << ("#{query[0]} as #{query[1]}")
       end
+      gathered_selections = []
+      gathered_selections << selection
+      gathered_selections << chemical_selections
     end
 
-    def self.delete_columns(result, *indexes)
-      indexes.sort.reverse_each do |index|
-        result.columns.delete_at(index)
-        result.rows.each { |row| row.delete_at(index) }
+    def self.format_chemical_results(result)
+      columns_index = { 'safety_sheet_link' => [], 'product_link' => [] }
+      result.columns.map.with_index do |column_name, index|
+        column_name, columns_index = construct_column_name(column_name, index, columns_index)
+        result.columns[index] = column_name # Replace the value in the array
       end
+      format_chemical_results_row(result, columns_index)
     end
 
-    def self.process_merged_columns(result, columns_index)
-      format_columns_name(result, columns_index['safety_sheet_link'][0], columns_index['product_link'][0])
-      delete_columns(result, columns_index['safety_sheet_link'][1], columns_index['product_link'][1])
-    end
-
-    def self.process_to_delete_indexes(result, indexes_to_delete)
-      indexes_to_delete.sort.reverse_each do |index|
-        result.columns.delete_at(index)
-        result.rows.each { |row| row.delete_at(index) }
-        format_columns_name(result, index - 1)
+    def self.construct_column_name(column_name, index, columns_index)
+      format_chemical_column = ['p statements', 'h statements', 'amount', 'safety sheet link thermofischer',
+                                'safety sheet link merck', 'product link thermofischer', 'product link merck'].freeze
+      if column_name.is_a?(String) && CHEMICAL_FIELDS.include?(column_name)
+        column_name = column_name.tr('_', ' ')
+        construct_column_name_hash(columns_index, column_name, index) if format_chemical_column.include?(column_name)
+      else
+        column_name
       end
+      [column_name, columns_index]
     end
 
-    def self.merge_safety_sheets_columns_rows(result, indexes_to_delete, columns_index)
-      process_to_delete_indexes(result, indexes_to_delete)
-      process_merged_columns(result, columns_index) if indexes_to_delete.empty?
-      result
-    end
-
-    def self.format_p_and_h_statements(value)
-      keys = JSON.parse(value).keys
-      keys.join('-')
+    def self.construct_column_name_hash(columns_index, column_name, index)
+      case column_name
+      when 'p statements'
+        columns_index['p_statements'] = index
+      when 'h statements'
+        columns_index['h_statements'] = index
+      when 'amount'
+        columns_index['amount'] = index
+      when 'safety sheet link merck', 'safety sheet link thermofischer'
+        columns_index['safety_sheet_link'].push(index)
+      when 'product link merck', 'product link thermofischer'
+        columns_index['product_link'].push(index)
+      end
     end
 
-    def self.format_link(value, row, next_index, indexes_to_delete)
-      if next_index && row[next_index]
-        value += "-#{row[next_index]}"
-        indexes_to_delete.push(next_index)
+    def self.format_chemical_results_row(result, columns_index)
+      indexes_to_delete = []
+      result.rows.map! do |row|
+        format_row(row, columns_index, indexes_to_delete)
       end
-      value
+      merge_safety_sheets_columns_rows(result, indexes_to_delete, columns_index)
     end
 
     def self.format_row(row, columns_index, indexes_to_delete)
@@ -102,73 +107,68 @@ def self.format_row(row, columns_index, indexes_to_delete)
 
         case index
         when columns_index['p_statements'], columns_index['h_statements']
-          format_p_and_h_statements(value)
+          value = format_p_and_h_statements(value)
         when columns_index['amount']
-          format_chemical_amount(value)
+          value = format_chemical_amount(value)
         when columns_index['safety_sheet_link'][0]
-          format_link(value, row, columns_index['safety_sheet_link'][1], indexes_to_delete)
+          value = format_link(value, row, columns_index['safety_sheet_link'][1], indexes_to_delete)
         when columns_index['product_link'][0]
-          format_link(value, row, columns_index['product_link'][1], indexes_to_delete)
-        else
-          value.gsub(/[\[\]"]/, '')
+          value = format_link(value, row, columns_index['product_link'][1], indexes_to_delete)
         end
+        value.gsub(/[\[\]"]/, '')
       end
     end
 
-    def self.format_chemical_results_row(result, columns_index)
-      indexes_to_delete = []
-      result.rows.map! do |row|
-        format_row(row, columns_index, indexes_to_delete)
-      end
-      merge_safety_sheets_columns_rows(result, indexes_to_delete, columns_index)
+    def self.format_p_and_h_statements(value)
+      keys = JSON.parse(value).keys
+      keys.join('-')
     end
 
-    def self.construct_column_name_hash(columns_index, column_name, index)
-      case column_name
-      when 'p statements'
-        columns_index['p_statements'] = index
-      when 'h statements'
-        columns_index['h_statements'] = index
-      when 'amount'
-        columns_index['amount'] = index
-      when 'safety sheet link merck', 'safety sheet link thermofischer'
-        columns_index['safety_sheet_link'].push(index)
-      when 'product link merck', 'product link thermofischer'
-        columns_index['product_link'].push(index)
+    def self.format_chemical_amount(value)
+      amount_value_unit = JSON.parse(value).values
+      sorted = amount_value_unit.sort_by { |element| [element.is_a?(Integer) || element.is_a?(Float) ? 0 : 1, element] }
+      sorted.join
+    end
+
+    def self.format_link(value, row, next_index, indexes_to_delete)
+      # binding.pry
+      if next_index && row[next_index].present?
+        value += "-#{row[next_index]}"
+        indexes_to_delete.push(next_index)
       end
+      value
     end
 
-    def self.construct_column_name(column_name, index, columns_index)
-      format_chemical_column = ['p statements', 'h statements', 'amount', 'safety sheet link thermofischer',
-                                'safety sheet link merck', 'product link thermofischer', 'product link merck'].freeze
-      if column_name.is_a?(String) && CHEMICAL_FIELDS.include?(column_name)
-        column_name = column_name.tr('_', ' ')
-        construct_column_name_hash(columns_index, column_name, index) if format_chemical_column.include?(column_name)
-      else
-        column_name
+    def self.merge_safety_sheets_columns_rows(result, indexes_to_delete, columns_index)
+      process_to_delete_indexes(result, indexes_to_delete)
+      process_merged_columns(result, columns_index) if indexes_to_delete.empty?
+      result
+    end
+
+    def self.process_to_delete_indexes(result, indexes_to_delete)
+      indexes_to_delete.sort.reverse_each do |index|
+        result.columns.delete_at(index)
+        result.rows.each { |row| row.delete_at(index) }
+        format_columns_name(result, index - 1)
       end
-      [column_name, columns_index]
     end
 
-    def self.format_chemical_results(result)
-      columns_index = { 'safety_sheet_link' => [], 'product_link' => [] }
-      result.columns.map.with_index do |column_name, index|
-        column_name, columns_index = construct_column_name(column_name, index, columns_index)
-        result.columns[index] = column_name # Replace the value in the array
+    def self.process_merged_columns(result, columns_index)
+      format_columns_name(result, columns_index['safety_sheet_link'][0], columns_index['product_link'][0])
+      delete_columns(result, columns_index['safety_sheet_link'][1], columns_index['product_link'][1])
+    end
+
+    def self.format_columns_name(result, *indexes)
+      indexes.sort.reverse_each do |index|
+        result.columns[index] = result.columns[index].sub(/\s+\S+\z/, '')
       end
-      format_chemical_results_row(result, columns_index)
     end
 
-    def self.build_chemical_column_query(selection, sel)
-      chemical_selections = []
-      sel[:chemicals].each do |col|
-        query = CHEMICAL_QUERIES[col.to_sym]
-        chemical_selections << ("#{query[2]} as #{query[1]}") if SAFETY_SHEET_INFO.include?(col)
-        chemical_selections << ("#{query[0]} as #{query[1]}")
+    def self.delete_columns(result, *indexes)
+      indexes.sort.reverse_each do |index|
+        result.columns.delete_at(index)
+        result.rows.each { |row| row.delete_at(index) }
       end
-      gathered_selections = []
-      gathered_selections << selection
-      gathered_selections << chemical_selections
     end
   end
 end
diff --git a/lib/export/export_table.rb b/lib/export/export_table.rb
@@ -49,10 +49,6 @@ class ExportTable
     HEADERS_DATASET = ["dataset name", "instrument", "dataset description"].freeze
     HEADERS_ATTACHMENT_0 = [].freeze
     HEADERS_ATTACHMENT = ["filename", "checksum"].freeze
-    # HEADERS_CHEMICAL = %w[status vendor order_number amount price person required_date ordered_date required_by
-    #                       safety_sheet_link product_link pictograms h_statements p_statements host_building
-    #                       host_room host_cabinet host_group owner current_building current_room current_cabinet
-    #                       borrowed_by disposal_info important_notes].freeze
 
     def extract_label_from_solvent_column(sample_column)
       return unless sample_column.is_a?(String) && !sample_column.empty?
@@ -145,7 +141,6 @@ def add_analyses_header(selected_headers)
       @headers100 << 'analyses'
     end
 
-
     def quill_to_html_to_string(delta)
       html_content = Chemotion::QuillToHtml.new.convert(delta)
       Nokogiri::HTML( html_content).text

diff --git a/lib/import/import_chemicals.rb b/lib/import/import_chemicals.rb
@@ -88,11 +88,6 @@ def self.detect_vendor(value)
       nil
     end
 
-    def self.extract_product_number(url)
-      match = url.match(/productNumber=(\d+)/) || url.match(/sku=(\w+)/)
-      match[1] if match
-    end
-
     def self.handle_safety_sheet(key, vendor, value, chemical)
       case key
       when 'safety_sheet_link'
@@ -104,6 +99,16 @@ def self.handle_safety_sheet(key, vendor, value, chemical)
       end
     end
 
+    def self.extract_product_number(url)
+      match = url.match(/productNumber=(\d+)/) || url.match(/sku=(\w+)/)
+      if match
+        match[1]
+      else
+        path = url.split('/')
+        path.last
+      end
+    end
+
     def self.create_safety_sheet_path(vendor, value, product_number, chemical)
       file_path = "#{product_number}_#{vendor.capitalize}.pdf"
       chemical['chemical_data'][0]['safetySheetPath'] ||= []

diff --git a/spec/lib/export/export_chemicals_spec.rb b/spec/lib/export/export_chemicals_spec.rb
@@ -154,7 +154,7 @@
       it 'constructs column name (p statements)' do
         columns_index = { 'safety_sheet_link' => [], 'product_link' => [] }
         result = described_class.construct_column_name('p_statements', 2, columns_index)
-        resulting_columns_index = ['p statements', {'p_statements' => 2, 'safety_sheet_link' => [],
+        resulting_columns_index = ['p statements', { 'p_statements' => 2, 'safety_sheet_link' => [],
                                                     'product_link' => [] }]
         expect(result).to eq(resulting_columns_index)
       end
@@ -169,7 +169,7 @@
 
         result = described_class.format_chemical_results(result)
 
-        expect(result.columns).to eq(['safety sheet link', 'product link merck', 'product link thermofischer'])
+        expect(result.columns).to eq(['safety sheet link', 'product link'])
       end
     end
 

diff --git a/spec/lib/import/import_chemicals_spec.rb b/spec/lib/import/import_chemicals_spec.rb
@@ -7,6 +7,8 @@
     it 'extracts a product number from a Sigma-Aldrich URL' do
       url = 'http://www.sigmaaldrich.com/MSDS/MSDS/DisplayMSDSPage.do?country=DE&language=DE&productNumber=131377&brand=ALDRICH'
       expect(described_class.extract_product_number(url)).to eq('131377')
+      url_2 = 'https://www.sigmaaldrich.com/US/en/product/sigma/a5376'
+      expect(described_class.extract_product_number(url_2)).to eq('a5376')
     end
   end