From a84bf4b82cc28e478329375e86bae13aaf28fcdc Mon Sep 17 00:00:00 2001 From: adambasha0 Date: Tue, 26 Nov 2024 15:02:39 +0000 Subject: [PATCH 1/5] feat: Refactor and improve Molecule API's SMILES endpoint for better maintainability and performance --- app/api/chemotion/molecule_api.rb | 56 ++------------------- lib/chemotion/molecule_fetcher.rb | 83 +++++++++++++++++++++++++++++++ lib/chemotion/smiles_processor.rb | 39 +++++++++++++++ lib/svg/processor.rb | 60 ++++++++++++++++++++-- 4 files changed, 181 insertions(+), 57 deletions(-) create mode 100644 lib/chemotion/molecule_fetcher.rb create mode 100644 lib/chemotion/smiles_processor.rb diff --git a/app/api/chemotion/molecule_api.rb b/app/api/chemotion/molecule_api.rb index 9f2e8b0f74..66e53d8409 100644 --- a/app/api/chemotion/molecule_api.rb +++ b/app/api/chemotion/molecule_api.rb @@ -30,57 +30,9 @@ class MoleculeAPI < Grape::API optional :editor, type: String, desc: 'SVGProcessor', default: 'ketcher' end post do - smiles = params[:smiles] - svg = params[:svg_file] - - babel_info = OpenBabelService.molecule_info_from_structure(smiles, 'smi') - inchikey = babel_info[:inchikey] - return {} if inchikey.blank? - - molecule = Molecule.find_by(inchikey: inchikey, is_partial: false) - unless molecule - molfile = babel_info[:molfile] if babel_info - begin - rw_mol = RDKitChem::RWMol.mol_from_smiles(smiles) - rd_mol = rw_mol.mol_to_mol_block unless rw_mol.nil? - rescue StandardError => e - Rails.logger.error ["with smiles: #{smiles}", e.message, *e.backtrace].join($INPUT_RECORD_SEPARATOR) - rd_mol = rw_mol.mol_to_mol_block(true, -1, false) unless rw_mol.nil? - end - if rd_mol.nil? - begin - pc_mol = Chemotion::PubchemService.molfile_from_smiles(smiles) - pc_mol = Chemotion::OpenBabelService.molfile_clear_hydrogens(pc_mol) unless pc_mol.nil? - molfile = pc_mol unless pc_mol.nil? - rescue StandardError => e - Rails.logger.error ["with smiles: #{smiles}", e.message, *e.backtrace].join($INPUT_RECORD_SEPARATOR) - end - else - molfile = rd_mol - end - return {} unless molfile - molecule = Molecule.find_or_create_by_molfile(molfile, babel_info) - molecule = Molecule.find_or_create_dummy if molecule.blank? - end - return unless molecule + molecule = Chemotion::SmilesProcessor.new(params).process + return {} unless molecule - svg_digest = "#{molecule.inchikey}#{Time.now}" - if svg.present? - svg_process = SVG::Processor.new.structure_svg(params[:editor], svg, svg_digest) - else - svg_process = SVG::Processor.new.generate_svg_info('samples', svg_digest) - svg_file_src = Rails.public_path.join('images', 'molecules', molecule.molecule_svg_file) - if File.exist?(svg_file_src) - mol = molecule.molfile.lines[0..1] - if svg.nil? || svg&.include?('Open Babel') - svg = Molecule.svg_reprocess(svg, molecule.molfile) - svg_process = SVG::Processor.new.structure_svg('ketcher', svg, svg_digest, true) - else - FileUtils.cp(svg_file_src, svg_process[:svg_file_path]) - end - end - end - molecule.attributes.merge(temp_svg: File.exist?(svg_process[:svg_file_path]) && svg_process[:svg_file_name], ob_log: babel_info[:ob_log]) present molecule, with: Entities::MoleculeEntity end @@ -196,9 +148,9 @@ class MoleculeAPI < Grape::API mol = molecule.molfile.lines.first(2) if mol[1]&.strip&.match?('OpenBabel') svg = File.read(svg_file_src) - svg_process = SVG::Processor.new.structure_svg('openbabel', svg, molfile) + svg_process = SVG::Processor.new(svg, params[:editor], molecule).structure_svg('openbabel', svg, molfile) else - svg_process = SVG::Processor.new.generate_svg_info('samples', molfile) + svg_process = SVG::Processor.new(svg, params[:editor], molecule).generate_svg_info('samples', molfile) FileUtils.cp(svg_file_src, svg_process[:svg_file_path]) end end diff --git a/lib/chemotion/molecule_fetcher.rb b/lib/chemotion/molecule_fetcher.rb new file mode 100644 index 0000000000..31d85423d9 --- /dev/null +++ b/lib/chemotion/molecule_fetcher.rb @@ -0,0 +1,83 @@ +# frozen_string_literal: true + +module Chemotion + class MoleculeFetcher + def initialize(smiles, babel_info) + @smiles = smiles + @babel_info = babel_info + end + + def fetch_or_create + find_existing || create_molecule + end + + private + + def find_existing + Molecule.find_by(inchikey: molecule_inchikey, is_partial: false) + end + + def create_molecule + molfile = fetch_molfile + return unless molfile + + Molecule.find_or_create_by_molfile(molfile, @babel_info) || Molecule.find_or_create_dummy + end + + def fetch_molfile + @babel_info&.dig(:molfile) || rdkit_molfile || pubchem_molfile + end + + def rdkit_molfile + rw_mol = RDKitChem::RWMol.mol_from_smiles(@smiles) + begin + rw_mol&.mol_to_mol_block + rescue StandardError => e + handle_rdkit_error(e, rw_mol) + end + end + + def handle_rdkit_error(error, rw_mol) + log_error(error) + rw_mol&.mol_to_mol_block(true, -1, false) + end + + def pubchem_molfile + pc_mol = Chemotion::PubchemService.molfile_from_smiles(@smiles) + validate_and_clear_molfile(pc_mol) + rescue StandardError => e + log_error(e) + nil + end + + def validate_and_clear_molfile(pc_mol) + return unless validate_molfile(pc_mol) + + Chemotion::OpenBabelService.molfile_clear_hydrogens(pc_mol) + end + + def validate_molfile(molfile) + parsed = parse_to_hash(molfile) + parsed['Status'] != '400' + end + + def parse_to_hash(input) + # Split the string into key-value pairs based on known patterns + input.each_line.with_object({}) do |line, hash| + next unless line =~ /^(.*?):\s*(.*)$/ + + key = Regexp.last_match(1).strip + value = Regexp.last_match(2).strip + hash[key] = value + end + end + + def molecule_inchikey + @babel_info[:inchikey] + end + + def log_error(error) + Rails.logger.error ["with smiles: #{@smiles}", error.message, *error.backtrace].join($INPUT_RECORD_SEPARATOR) + end + end +end diff --git a/lib/chemotion/smiles_processor.rb b/lib/chemotion/smiles_processor.rb new file mode 100644 index 0000000000..84de50f412 --- /dev/null +++ b/lib/chemotion/smiles_processor.rb @@ -0,0 +1,39 @@ +# frozen_string_literal: true + +module Chemotion + class SmilesProcessor + def initialize(params) + @smiles = params[:smiles] + @svg = params[:svg_file] + @editor = params[:editor] + end + + def process + return unless valid_smiles? && babel_info.present? + + molecule = Chemotion::MoleculeFetcher.new(@smiles, babel_info).fetch_or_create + return unless molecule + + svg_result = SVG::Processor.new(@svg, @editor, molecule).process + build_result(molecule, svg_result) + end + + private + + def valid_smiles? + @smiles.present? + end + + def babel_info + @babel_info ||= Chemotion::OpenBabelService.molecule_info_from_structure(@smiles, 'smi') + end + + def build_result(molecule, svg_result) + molecule.attributes.merge( + temp_svg: svg_result[:svg_file_name], + ob_log: babel_info[:ob_log], + ) + molecule + end + end +end diff --git a/lib/svg/processor.rb b/lib/svg/processor.rb index 6729d44e47..cfb0029090 100644 --- a/lib/svg/processor.rb +++ b/lib/svg/processor.rb @@ -5,6 +5,23 @@ module SVG # SVG Processor class Processor + def initialize(svg, editor, molecule) + @svg = svg + @editor = editor + @molecule = molecule + end + + def process + svg_digest = generate_svg_digest(@molecule) + if @svg.present? + structure_svg(@editor, @svg, svg_digest) + else + regenerate_or_copy_svg(svg_digest) + end + end + + private + def structure_svg(editor, svg, hexdigest, is_centered = false) processor = case editor when /marvinjs/i @@ -18,11 +35,7 @@ def structure_svg(editor, svg, hexdigest, is_centered = false) Chemotion::OpenBabelSvgProcessor.new(svg) end svg = processor.centered_and_scaled_svg unless is_centered == true - info = generate_svg_info('samples', hexdigest) - svg_file = File.new(info[:svg_file_path], 'w+') - svg_file.write(svg) - svg_file.close - { svg_file_path: info[:svg_file_path], svg_file_name: info[:svg_file_name] } + save_svg_to_file(svg, generate_svg_info('samples', hexdigest)) end def generate_svg_info(type, hexdigest) @@ -32,5 +45,42 @@ def generate_svg_info(type, hexdigest) svg_file_path = File.join('public', 'images', type, svg_file_name) { svg_file_path: svg_file_path, svg_file_name: svg_file_name } end + + def regenerate_or_copy_svg(hexdigest) + svg_file_src = Rails.public_path.join('images', 'molecules', @molecule.molecule_svg_file) + if svg_file_exists?(svg_file_src) + info = generate_svg_info('samples', hexdigest) + needs_reprocessing? ? regenerate_svg(hexdigest) : copy_svg_file(svg_file_src, info) + else + generate_svg_info('samples', hexdigest) + end + end + + def svg_file_exists?(svg_file_src) + File.exist?(svg_file_src) + end + + def needs_reprocessing? + @svg.nil? || @svg.include?('Open Babel') + end + + def save_svg_to_file(svg, info) + File.write(info[:svg_file_path], svg) + { svg_file_path: info[:svg_file_path], svg_file_name: info[:svg_file_name] } + end + + def regenerate_svg(hexdigest) + svg = Molecule.svg_reprocess(@svg, @molecule.molfile) + structure_svg(@editor, svg, hexdigest, true) + end + + def copy_svg_file(src, info) + FileUtils.cp(src, info[:svg_file_path]) + info + end + + def generate_svg_digest(molecule) + "#{molecule.inchikey}#{Time.current}" + end end end From 71a02f640bd9bfa075b6bf8ab4e1372666284d6e Mon Sep 17 00:00:00 2001 From: adambasha0 Date: Fri, 29 Nov 2024 13:01:50 +0000 Subject: [PATCH 2/5] fix: refactor spec tests for Molecule api --- app/api/chemotion/molecule_api.rb | 28 ++++---- lib/svg/processor.rb | 13 +++- spec/api/molecule_api_spec.rb | 113 ++++++++++++++++-------------- 3 files changed, 85 insertions(+), 69 deletions(-) diff --git a/app/api/chemotion/molecule_api.rb b/app/api/chemotion/molecule_api.rb index 66e53d8409..f1dd74906d 100644 --- a/app/api/chemotion/molecule_api.rb +++ b/app/api/chemotion/molecule_api.rb @@ -137,23 +137,21 @@ class MoleculeAPI < Grape::API svg = params[:svg_file] molfile = params[:molfile] decoupled = params[:decoupled] - molecule = decoupled ? Molecule.find_or_create_dummy : Molecule.find_or_create_by_molfile(molfile) - molecule = Molecule.find_or_create_dummy if molecule.blank? + molecule = if decoupled + Molecule.find_or_create_dummy + else + Molecule.find_or_create_by_molfile(molfile) || Molecule.find_or_create_dummy + end + if molfile.present? + molecule.molfile = molfile + molecule.save! if molecule.changed? + end ob = molecule&.ob_log - if svg.present? - svg_process = SVG::Processor.new.structure_svg(params[:editor], svg, molfile) + mol = molecule.molfile.lines.first(2) + if mol[1]&.strip&.match?('OpenBabel') + svg_process = SVG::Processor.new(svg, 'openbabel', molecule).process else - svg_file_src = Rails.public_path.join('images', 'molecules', molecule.molecule_svg_file) - if File.exist?(svg_file_src) - mol = molecule.molfile.lines.first(2) - if mol[1]&.strip&.match?('OpenBabel') - svg = File.read(svg_file_src) - svg_process = SVG::Processor.new(svg, params[:editor], molecule).structure_svg('openbabel', svg, molfile) - else - svg_process = SVG::Processor.new(svg, params[:editor], molecule).generate_svg_info('samples', molfile) - FileUtils.cp(svg_file_src, svg_process[:svg_file_path]) - end - end + svg_process = SVG::Processor.new(svg, params[:editor], molecule).process end molecule&.attributes&.merge(temp_svg: svg_process[:svg_file_name], ob_log: ob) Entities::MoleculeEntity.represent(molecule, temp_svg: svg_process[:svg_file_name], ob_log: ob) diff --git a/lib/svg/processor.rb b/lib/svg/processor.rb index cfb0029090..3c80a80bab 100644 --- a/lib/svg/processor.rb +++ b/lib/svg/processor.rb @@ -20,7 +20,6 @@ def process end end - private def structure_svg(editor, svg, hexdigest, is_centered = false) processor = case editor @@ -38,6 +37,8 @@ def structure_svg(editor, svg, hexdigest, is_centered = false) save_svg_to_file(svg, generate_svg_info('samples', hexdigest)) end + private + def generate_svg_info(type, hexdigest) digest = Digest::SHA256.hexdigest hexdigest digest = Digest::SHA256.hexdigest digest @@ -47,8 +48,8 @@ def generate_svg_info(type, hexdigest) end def regenerate_or_copy_svg(hexdigest) - svg_file_src = Rails.public_path.join('images', 'molecules', @molecule.molecule_svg_file) - if svg_file_exists?(svg_file_src) + svg_file_src = generate_svg_file_src + if svg_file_src && svg_file_exists?(svg_file_src) info = generate_svg_info('samples', hexdigest) needs_reprocessing? ? regenerate_svg(hexdigest) : copy_svg_file(svg_file_src, info) else @@ -56,6 +57,12 @@ def regenerate_or_copy_svg(hexdigest) end end + def generate_svg_file_src + return if @molecule.molecule_svg_file.blank? + + Rails.public_path.join('images', 'molecules', @molecule.molecule_svg_file) + end + def svg_file_exists?(svg_file_src) File.exist?(svg_file_src) end diff --git a/spec/api/molecule_api_spec.rb b/spec/api/molecule_api_spec.rb index 85db62c9c1..5454e8ec27 100644 --- a/spec/api/molecule_api_spec.rb +++ b/spec/api/molecule_api_spec.rb @@ -13,61 +13,72 @@ end describe 'POST /api/v1/molecules' do + let(:molfile) do + " + Ketcher 09231514282D 1 1.00000 0.00000 0 + + 8 12 0 0 0 999 V2000 + -4.3500 1.8250 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.8750 2.5000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.3500 0.3750 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.3250 1.3750 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.6000 -0.2000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -4.1500 -1.0250 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.0000 -2.6500 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1.5250 -1.5250 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 1 3 1 0 0 0 + 1 2 1 0 0 0 + 2 4 1 0 0 0 + 4 3 1 0 0 0 + 3 7 1 0 0 0 + 6 7 1 0 0 0 + 6 1 1 0 0 0 + 6 5 1 0 0 0 + 5 2 1 0 0 0 + 5 8 1 0 0 0 + 8 4 1 0 0 0 + 8 7 1 0 0 0 + M END" + end + let(:svg_file) { 'TXWRERCHRDBNLG-UHFFFAOYSA-N.svg' } + let(:editor) { 'openbabel' } + let(:valid_params) do + { + molfile: molfile, + svg_file: svg_file, + editor: editor, + decoupled: false, + } + end + context 'with valid parameters' do - let!(:molfile) do - " - Ketcher 09231514282D 1 1.00000 0.00000 0 - - 8 12 0 0 0 999 V2000 - -4.3500 1.8250 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 - -1.8750 2.5000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 - -1.3500 0.3750 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 - 1.3250 1.3750 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 - -1.6000 -0.2000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 - -4.1500 -1.0250 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 - -1.0000 -2.6500 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 - 1.5250 -1.5250 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 - 1 3 1 0 0 0 - 1 2 1 0 0 0 - 2 4 1 0 0 0 - 4 3 1 0 0 0 - 3 7 1 0 0 0 - 6 7 1 0 0 0 - 6 1 1 0 0 0 - 6 5 1 0 0 0 - 5 2 1 0 0 0 - 5 8 1 0 0 0 - 8 4 1 0 0 0 - 8 7 1 0 0 0 -M END" + it 'returns the molecule entity with expected attributes' do + post '/api/v1/molecules', params: valid_params + expect(response).to have_http_status(:created) + molecule = JSON.parse(response.body) + expect(molecule['molfile']).to eq(molfile) end + end - let!(:params) do - { - inchistring: 'InChI=1S/C8H8/c1-2-5-3(1)7-4(1)6(2)8(5)7/h1-8H', - # molecule_svg_file: "TXWRERCHRDBNLG-UHFFFAOYSA-N.svg", - inchikey: 'TXWRERCHRDBNLG-UHFFFAOYSA-N', - molecular_weight: 104.14912, - sum_formular: 'C8H8', - iupac_name: 'cubane', - names: ['cubane'] - } + context 'with decoupled true' do + let(:decoupled) { true } + + it 'creates a dummy molecule' do + post '/api/v1/molecules', params: valid_params + + expect(response).to have_http_status(:created) + molecule = JSON.parse(response.body) + expect(molecule['inchikey']).to eq('DUMMY') end - let!(:decoupled) { false } - - it 'is able to find or create a molecule by molfile' do - m = Molecule.find_by(molfile: molfile) - expect(m).to be_nil - post '/api/v1/molecules', params: { molfile: molfile, decoupled: false } - m = Molecule.find_by(molfile: molfile) - expect(m).not_to be_nil - mw = params.delete(:molecular_weight) - expect(m.attributes['molecular_weight'].round(5)).to eq(mw) - params.each do |k, v| - expect(m.attributes.symbolize_keys[k]).to eq(v) unless m.attributes.symbolize_keys[k].is_a?(Float) - expect(m.attributes.symbolize_keys[k].round(5)).to eq(v.round(5)) if m.attributes.symbolize_keys[k].is_a?(Float) - end - expect(m.molecule_svg_file).to match(/\w{128}\.svg/) + end + + context 'with missing SVG file' do + it 'processes and generates SVG from the molfile' do + post '/api/v1/molecules', params: valid_params.except(:svg_file) + + expect(response).to have_http_status(:created) + molecule = JSON.parse(response.body) + expect(molecule).to include('temp_svg') end end end From ab9722ad67083f3406f5baf7bccdcedca8022b0c Mon Sep 17 00:00:00 2001 From: adambasha0 Date: Fri, 29 Nov 2024 21:19:20 +0000 Subject: [PATCH 3/5] feat: add spec tests for SmilesProcessor class --- app/api/chemotion/molecule_api.rb | 5 +-- lib/chemotion/smiles_processor.rb | 1 - spec/lib/chemotion/smiles_processor_spec.rb | 41 +++++++++++++++++++++ 3 files changed, 43 insertions(+), 4 deletions(-) create mode 100644 spec/lib/chemotion/smiles_processor_spec.rb diff --git a/app/api/chemotion/molecule_api.rb b/app/api/chemotion/molecule_api.rb index f1dd74906d..aa15a40ae9 100644 --- a/app/api/chemotion/molecule_api.rb +++ b/app/api/chemotion/molecule_api.rb @@ -30,11 +30,10 @@ class MoleculeAPI < Grape::API optional :editor, type: String, desc: 'SVGProcessor', default: 'ketcher' end post do - molecule = Chemotion::SmilesProcessor.new(params).process + molecule, temp_svg, ob_log = Chemotion::SmilesProcessor.new(params).process return {} unless molecule - - present molecule, with: Entities::MoleculeEntity + Entities::MoleculeEntity.represent(molecule, temp_svg: temp_svg, ob_log: ob_log) end end diff --git a/lib/chemotion/smiles_processor.rb b/lib/chemotion/smiles_processor.rb index 84de50f412..a377b934d5 100644 --- a/lib/chemotion/smiles_processor.rb +++ b/lib/chemotion/smiles_processor.rb @@ -33,7 +33,6 @@ def build_result(molecule, svg_result) temp_svg: svg_result[:svg_file_name], ob_log: babel_info[:ob_log], ) - molecule end end end diff --git a/spec/lib/chemotion/smiles_processor_spec.rb b/spec/lib/chemotion/smiles_processor_spec.rb new file mode 100644 index 0000000000..93c4f9b9f6 --- /dev/null +++ b/spec/lib/chemotion/smiles_processor_spec.rb @@ -0,0 +1,41 @@ +# frozen_string_literal: true + +require 'rails_helper' + +describe Chemotion::SmilesProcessor do + let(:valid_smiles) { 'CCO' } + let(:invalid_smiles) { 'invalid_smiles' } + let(:editor) { 'ketcher' } + let(:svg) { '...' } + let(:babel_info) { { ob_log: 'Some log data' } } + let(:fetcher_instance) { instance_double(Chemotion::MoleculeFetcher) } + + before do + molecule = create(:molecule) + allow(Chemotion::MoleculeFetcher).to receive(:new).and_return(fetcher_instance) + allow(fetcher_instance).to receive(:fetch_or_create).and_return(molecule) + allow(Chemotion::MoleculeFetcher).to receive(:new).and_call_original + end + + context 'with valid SMILES' do + let(:params) { { smiles: valid_smiles, editor: editor, svg_file: svg } } + + it 'returns a molecule instance with valid attributes' do + processor_result = described_class.new(params).process + binding.pry + expect(processor_result.keys).to include(:temp_svg, :ob_log) + end + end + + context 'with invalid SMILES' do + let(:params) { { smiles: invalid_smiles, editor: editor, svg_file: svg } } + + it 'returns a molecule instance error in ob_log key' do + processor = described_class.new(params) + processor_result = processor.process + expect(processor_result).not_to be_nil + expect(processor_result.keys).to include(:temp_svg, :ob_log) + expect(processor_result[:ob_log][:error]).to be_present + end + end +end From d8f2c9675a41ffa4321e975c03d46baaf434d7f1 Mon Sep 17 00:00:00 2001 From: adambasha0 Date: Mon, 2 Dec 2024 15:06:15 +0000 Subject: [PATCH 4/5] feat: add spec tests for MoleculeFetcher model --- app/api/chemotion/molecule_api.rb | 5 +- app/packs/src/models/Sample.js | 2 +- lib/chemotion/smiles_processor.rb | 5 +- lib/svg/processor.rb | 1 - spec/lib/chemotion/molecule_fetcher_spec.rb | 160 ++++++++++++++++++++ spec/lib/chemotion/smiles_processor_spec.rb | 6 +- 6 files changed, 167 insertions(+), 12 deletions(-) create mode 100644 spec/lib/chemotion/molecule_fetcher_spec.rb diff --git a/app/api/chemotion/molecule_api.rb b/app/api/chemotion/molecule_api.rb index aa15a40ae9..19035e1b7c 100644 --- a/app/api/chemotion/molecule_api.rb +++ b/app/api/chemotion/molecule_api.rb @@ -30,10 +30,11 @@ class MoleculeAPI < Grape::API optional :editor, type: String, desc: 'SVGProcessor', default: 'ketcher' end post do - molecule, temp_svg, ob_log = Chemotion::SmilesProcessor.new(params).process + result = Chemotion::SmilesProcessor.new(params).process + molecule = result[:molecule] return {} unless molecule - Entities::MoleculeEntity.represent(molecule, temp_svg: temp_svg, ob_log: ob_log) + Entities::MoleculeEntity.represent(molecule, temp_svg: result[:temp_svg], ob_log: result[:ob_log]) end end diff --git a/app/packs/src/models/Sample.js b/app/packs/src/models/Sample.js index 5e3947ec76..31c0b34970 100644 --- a/app/packs/src/models/Sample.js +++ b/app/packs/src/models/Sample.js @@ -930,7 +930,7 @@ export default class Sample extends Element { set molecule(molecule) { this._molecule = new Molecule(molecule); - if (molecule.temp_svg) { this.sample_svg_file = molecule.temp_svg; } + if (molecule.temp_svg && !molecule.molecule_svg_file) { this.sample_svg_file = molecule.temp_svg; } } get polymer_formula() { diff --git a/lib/chemotion/smiles_processor.rb b/lib/chemotion/smiles_processor.rb index a377b934d5..e94c876474 100644 --- a/lib/chemotion/smiles_processor.rb +++ b/lib/chemotion/smiles_processor.rb @@ -29,10 +29,7 @@ def babel_info end def build_result(molecule, svg_result) - molecule.attributes.merge( - temp_svg: svg_result[:svg_file_name], - ob_log: babel_info[:ob_log], - ) + { molecule: molecule, temp_svg: svg_result[:svg_file_name], ob_log: babel_info[:ob_log] } end end end diff --git a/lib/svg/processor.rb b/lib/svg/processor.rb index 3c80a80bab..ff599774de 100644 --- a/lib/svg/processor.rb +++ b/lib/svg/processor.rb @@ -20,7 +20,6 @@ def process end end - def structure_svg(editor, svg, hexdigest, is_centered = false) processor = case editor when /marvinjs/i diff --git a/spec/lib/chemotion/molecule_fetcher_spec.rb b/spec/lib/chemotion/molecule_fetcher_spec.rb new file mode 100644 index 0000000000..c50fe4402b --- /dev/null +++ b/spec/lib/chemotion/molecule_fetcher_spec.rb @@ -0,0 +1,160 @@ +# frozen_string_literal: true + +require 'rails_helper' + +RSpec.describe Chemotion::MoleculeFetcher do + let(:smiles) { 'CCO' } + let(:babel_info) do + { + inchikey: 'InChI=1S/CH2O/c1-2/h1H2', + molfile: + "OpenBabel11292408462D + + 2 1 0 0 0 0 0 0 0 0999 V2000 + 1.0000 -0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.0000 -0.0000 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0 + 1 2 2 0 0 0 0 + M END", + } + end + let(:fetcher) { described_class.new(smiles, babel_info) } + let(:dummy_molecule) { create(:molecule, inchikey: 'DUMMY', is_partial: true) } + + describe '#fetch_or_create' do + context 'when molecule exists' do + let!(:existing_molecule) { create(:molecule, inchikey: babel_info[:inchikey], is_partial: false) } + + it 'returns the existing molecule' do + expect(fetcher.fetch_or_create).to eq(existing_molecule) + end + end + + context 'when molecule does not exist but molfile is available' do + before do + allow(fetcher).to receive(:fetch_molfile).and_return(babel_info[:molfile]) + allow(Molecule).to receive(:find_or_create_by_molfile).and_return(dummy_molecule) + end + + it 'creates a new molecule from the molfile' do + expect(fetcher.fetch_or_create).to eq(dummy_molecule) + expect(Molecule).to have_received(:find_or_create_by_molfile).with(babel_info[:molfile], babel_info) + end + end + + context 'when molecule and molfile are unavailable' do + before { allow(fetcher).to receive(:fetch_molfile).and_return(nil) } + + it 'returns nil' do + expect(fetcher.fetch_or_create).to be_nil + end + end + end + + describe '#fetch_molfile' do + it 'fetches molfile from babel_info if available' do + expect(fetcher.send(:fetch_molfile)).to eq(babel_info[:molfile]) + end + + context 'when not in babel_info but available from RDKit or PubChem' do + before { allow(fetcher).to receive_messages(rdkit_molfile: nil, pubchem_molfile: babel_info[:molfile]) } + + it 'fetches molfile from PubChem' do + expect(fetcher.send(:fetch_molfile)).to eq(babel_info[:molfile]) + end + end + end + + describe '#rdkit_molfile' do + let(:rw_mol) { instance_double(RDKitChem::RWMol) } + let(:error) { StandardError.new('Error message') } + + context 'when RDKit returns a valid molfile' do + before do + allow(RDKitChem::RWMol).to receive(:mol_from_smiles).and_return(rw_mol) + allow(rw_mol).to receive(:mol_to_mol_block).and_return(babel_info[:molfile]) + end + + it 'returns the molfile' do + expect(fetcher.send(:rdkit_molfile)).to eq(babel_info[:molfile]) + end + end + + context 'when RDKit raises an error' do + before do + allow(RDKitChem::RWMol).to receive(:mol_from_smiles).and_return(rw_mol) + allow(rw_mol).to receive(:mol_to_mol_block).and_raise(error) + allow(fetcher).to receive(:handle_rdkit_error).and_return(nil) + end + + it 'handles the error gracefully' do + fetcher.send(:rdkit_molfile) + expect(fetcher).to have_received(:handle_rdkit_error).with(error, rw_mol) + end + end + end + + describe '#pubchem_molfile' do + let(:pubchem_service) { class_double(Chemotion::PubchemService) } + + before { stub_const('Chemotion::PubchemService', pubchem_service) } + + context 'when PubChem returns a valid molfile' do + before do + allow(pubchem_service).to receive(:molfile_from_smiles).and_return(babel_info[:molfile]) + allow(fetcher).to receive(:validate_and_clear_molfile).and_return(babel_info[:molfile]) + end + + it 'returns the validated molfile' do + expect(fetcher.send(:pubchem_molfile)).to eq(babel_info[:molfile]) + end + end + + context 'when PubChem raises an error' do + before do + allow(pubchem_service).to receive(:molfile_from_smiles).and_raise(StandardError) + allow(fetcher).to receive(:log_error) + end + + it 'logs the error and returns nil' do + expect(fetcher.send(:pubchem_molfile)).to be_nil + expect(fetcher).to have_received(:log_error) + end + end + end + + describe '#handle_rdkit_error' do + let(:rw_mol) { instance_double(RDKitChem::RWMol) } + let(:error) { StandardError.new('RDKit Error') } + + before do + allow(rw_mol).to receive(:mol_to_mol_block) + fetcher.send(:handle_rdkit_error, error, rw_mol) + end + + it 'logs the error and attempts fallback molfile processing' do + expect(rw_mol).to have_received(:mol_to_mol_block).with(true, -1, false) + end + end + + describe '#validate_and_clear_molfile' do + let(:molfile) { "MOLFILE\nStatus: 200\nEND" } + + context 'when molfile is valid' do + before do + allow(Chemotion::OpenBabelService).to receive(:molfile_clear_hydrogens).and_return(molfile) + end + + it 'validates and clears the molfile' do + expect(fetcher.send(:validate_and_clear_molfile, molfile)).to eq(molfile) + end + end + + context 'when molfile is invalid' do + let(:invalid_molfile) { "MOLFILE\nStatus: 400\nEND" } + + it 'returns nil' do + expect(fetcher.send(:validate_and_clear_molfile, invalid_molfile)).to be_nil + end + end + end +end diff --git a/spec/lib/chemotion/smiles_processor_spec.rb b/spec/lib/chemotion/smiles_processor_spec.rb index 93c4f9b9f6..6226459f76 100644 --- a/spec/lib/chemotion/smiles_processor_spec.rb +++ b/spec/lib/chemotion/smiles_processor_spec.rb @@ -14,7 +14,6 @@ molecule = create(:molecule) allow(Chemotion::MoleculeFetcher).to receive(:new).and_return(fetcher_instance) allow(fetcher_instance).to receive(:fetch_or_create).and_return(molecule) - allow(Chemotion::MoleculeFetcher).to receive(:new).and_call_original end context 'with valid SMILES' do @@ -22,13 +21,12 @@ it 'returns a molecule instance with valid attributes' do processor_result = described_class.new(params).process - binding.pry - expect(processor_result.keys).to include(:temp_svg, :ob_log) + expect(processor_result.keys).to include(:molecule, :temp_svg, :ob_log) end end context 'with invalid SMILES' do - let(:params) { { smiles: invalid_smiles, editor: editor, svg_file: svg } } + let(:params) { { smiles: invalid_smiles, editor: editor } } it 'returns a molecule instance error in ob_log key' do processor = described_class.new(params) From 9b4ac9681758e9b2583680647dd072149a180395 Mon Sep 17 00:00:00 2001 From: PiTrem Date: Wed, 13 Nov 2024 17:03:32 +0100 Subject: [PATCH 5/5] test: WIP molecule creation --- spec/factories/molecule_structures.rb | 44 ++++++++++++++++++++ spec/fixtures/structures/01.mol | 45 +++++++++++++++++++++ spec/fixtures/structures/smiles.json | 12 ++++++ spec/fixtures/structures/smiles_faulty.json | 4 ++ spec/models/molecule_spec.rb | 41 +++++++++++++++++++ 5 files changed, 146 insertions(+) create mode 100644 spec/factories/molecule_structures.rb create mode 100644 spec/fixtures/structures/01.mol create mode 100644 spec/fixtures/structures/smiles.json create mode 100644 spec/fixtures/structures/smiles_faulty.json diff --git a/spec/factories/molecule_structures.rb b/spec/factories/molecule_structures.rb new file mode 100644 index 0000000000..77899a4928 --- /dev/null +++ b/spec/factories/molecule_structures.rb @@ -0,0 +1,44 @@ +# frozen_string_literal: true + +# those require statements are uneccessary in test environment but +# enable the single factory to be loaded in development environment +require 'faker' +require 'factory_bot' + +FactoryBot.define do + trait :smiles_from do + transient do + from { 'smiles' } + end + end + + trait :smiles_set_dict do + transient do + root_dictionary { JSON.parse(Rails.root.join("spec/fixtures/structures/#{from}.json").read) } + dictionary { root_dictionary } + end + end + + factory :smiles_set, class: Hash do + smiles_from + smiles_set_dict + initialize_with { dictionary } + end + + trait :smiles_set_random_key do + transient do + key { dictionary.keys.sample } + end + end + + factory :smiles, parent: :smiles_set, class: String do + smiles_set_random_key + initialize_with { dictionary[key] } + end + + factory :faulty_smiles, parent: :smiles, class: String do + transient do + from { 'smiles_faulty' } + end + end +end diff --git a/spec/fixtures/structures/01.mol b/spec/fixtures/structures/01.mol new file mode 100644 index 0000000000..6f8cfbb024 --- /dev/null +++ b/spec/fixtures/structures/01.mol @@ -0,0 +1,45 @@ + + Ketcher 11202414492D 1 1.00000 0.00000 0 + + 18 19 0 0 0 999 V2000 + -5.0933 2.6161 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -4.7013 1.8466 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 + -5.1717 1.1223 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -6.0341 1.1675 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -6.4262 1.9370 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -5.9558 2.6613 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -2.8515 1.9408 0.0000 Al 0 0 0 0 0 0 0 0 0 0 0 0 + -0.7500 1.9500 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 + -4.3411 0.8296 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.3182 1.2021 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.5454 1.2020 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.9773 1.9501 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + 0.5455 2.6979 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -0.3182 2.6979 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -1.0055 0.6271 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 + -2.5022 2.8612 0.0000 I 0 0 0 0 0 0 0 0 0 0 0 0 + -3.7053 2.8973 0.0000 I 0 0 0 0 0 0 0 0 0 0 0 0 + -2.7360 0.3845 0.0000 H 0 0 0 0 0 0 0 0 0 0 0 0 + 1 2 1 0 0 0 + 2 3 1 0 0 0 + 3 4 1 0 0 0 + 4 5 1 0 0 0 + 5 6 1 0 0 0 + 6 1 1 0 0 0 + 2 7 1 0 0 0 + 7 8 1 0 0 0 + 2 9 1 0 0 0 + 8 10 1 0 0 0 + 10 11 1 0 0 0 + 11 12 1 0 0 0 + 12 13 1 0 0 0 + 13 14 1 0 0 0 + 14 8 1 0 0 0 + 8 15 1 0 0 0 + 7 16 1 0 0 0 + 7 17 1 0 0 0 + 7 18 1 0 0 0 +M END +$$$$ + + diff --git a/spec/fixtures/structures/smiles.json b/spec/fixtures/structures/smiles.json new file mode 100644 index 0000000000..5128d3c9dd --- /dev/null +++ b/spec/fixtures/structures/smiles.json @@ -0,0 +1,12 @@ +{ + "Li": "[Li]", + "Li+": "[Li+]", + "LiH": "[LiH]", + "Na": "[Na]", + "Na+": "[Na+]", + "NaH": "[NaH]", + "K": "[K]", + "K+": "[K+]", + "KH": "[KH]", + "002": "CC1=C(CC)C(C)=[N+]2C1=C(C)C3=CC(/C=C\\B4NC5=C6C(C=CC=C6N4)=CC=C5)=CN3[B-]2(F)F" +} diff --git a/spec/fixtures/structures/smiles_faulty.json b/spec/fixtures/structures/smiles_faulty.json new file mode 100644 index 0000000000..9b1365ce1f --- /dev/null +++ b/spec/fixtures/structures/smiles_faulty.json @@ -0,0 +1,4 @@ +{ + "001": "C1CCCCN1(C)[Al]([H])(I)(I)N1(C)CCCCC1", + "002": "CC1=C(CC)C(C)=[N]2C1=C(C)C3=CC(/C=C\\B4NC5=C6C(C=CC=C6N4)=CC=C5)=CN3B2(F)F" +} diff --git a/spec/models/molecule_spec.rb b/spec/models/molecule_spec.rb index c85197ec0f..685f4fa261 100644 --- a/spec/models/molecule_spec.rb +++ b/spec/models/molecule_spec.rb @@ -68,4 +68,45 @@ expect(persisted_molecule.tag.taggable_data['pubchem_lcss']).not_to be_nil end end + describe '.find_or_create_by_inchikey' do + let(:smiles_bad) { build(:faulty_smiles, key: '002') } + let(:smiles_good) { build(:smiles, key: '002') } + let(:good_info) { Chemotion::OpenBabelService.molecule_info_from_structure(smiles_good, 'smi') } + let(:bad_info) { Chemotion::OpenBabelService.molecule_info_from_structure(smiles_bad, 'smi') } + + # mock Chemotion::PubchemService.molecule_info_from_inchikey to return empty hash + before do + allow(Chemotion::PubchemService).to receive(:molecule_info_from_inchikey).and_return({}) + end + + it 'has different inchikeys' do + + expect(good_info[:inchikey]).not_to eq(bad_info[:inchikey]) + end + + it 'creates a new bad molecule if it does not exist' do + + expect(described_class.find_by(inchikey: bad_info[:inchikey], is_partial: false)) + molecule = described_class.find_or_create_by_molfile(bad_info[:molfile]) + expect(molecule).to be_persisted + expect(molecule.inchikey).to eq(bad_info[:inchikey]) + end + + it 'does not create a good molecule after a bad one' do + molecule = described_class.find_or_create_by_molfile(bad_info[:molfile]) + molecule_good = described_class.find_or_create_by_molfile(good_info[:molfile]) + expect(molecule_good).to be_persisted + expect(molecule_good.inchikey).to eq(good_info[:inchikey]) + end + it 'does not transform a bad smiles into molfile' do + + rw_mol = begin + RDKitChem::RWMol.mol_from_smiles(smiles_bad) + rescue StandardError => e + e.message + end + expect(rw_mol).to be_a(String) + expect(rw_mol).to eq('') + end + end end