Skip to content

Commit

Permalink
Merge pull request #906 from NFDI4Chem/development
Browse files Browse the repository at this point in the history
Development
  • Loading branch information
NishaSharma14 authored Nov 16, 2023
2 parents 7a57d68 + bb3d701 commit 480278a
Show file tree
Hide file tree
Showing 34 changed files with 2,321 additions and 190 deletions.
10 changes: 5 additions & 5 deletions app/Console/Commands/SanitizeMolecules.php
Original file line number Diff line number Diff line change
Expand Up @@ -27,17 +27,17 @@ class SanitizeMolecules extends Command
*/
public function handle()
{
$molecules = Molecule::whereNull('CANONICAL_SMILES')->get();
$molecules = Molecule::whereNull('canonical_smiles')->get();

foreach ($molecules as $molecule) {
echo $molecule->id;
echo "\r\n";
$molecule->MOL = '
'.$molecule->MOL;
$standardisedMOL = $this->standardizeMolecule($molecule->MOL);
$molecule->CANONICAL_SMILES = $standardisedMOL['canonical_smiles'];
$molecule->STANDARD_INCHI = $standardisedMOL['inchi'];
$molecule->INCHI_KEY = $standardisedMOL['inchikey'];
$standardisedMOL = $this->standardizeMolecule($molecule->sdf);
$molecule->canonical_smiles = $standardisedMOL['canonical_smiles'];
$molecule->standard_inchi = $standardisedMOL['inchi'];
$molecule->inchi_key = $standardisedMOL['inchikey'];
$molecule->save();
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ public function getMolecules($sample)
{
$molecules = [];
foreach ($sample->molecules as &$molecule) {
$inchiKey = $molecule->INCHI_KEY;
$inchiKey = $molecule->inchi_key;
$pubchemLink = 'https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/inchikey/'.$inchiKey.'/property/IUPACName/JSON';
$json = json_decode(Http::get($pubchemLink)->body(), true);
// $cid = $json['PropertyTable']['Properties'][0]['CID'];
Expand All @@ -240,14 +240,14 @@ public function getMolecules($sample)
// $moleculeSchema['@id'] = $inchiKey;
// $moleculeSchema['dct:conformsTo'] = $this->conformsTo(['https://bioschemas.org/profiles/MolecularEntity/0.5-RELEASE']);
// $moleculeSchema['identifier'] = $inchiKey;
// $moleculeSchema->name($molecule->CAS_NUMBER);
// $moleculeSchema->name($molecule->cas);
// $moleculeSchema->url('https://pubchem.ncbi.nlm.nih.gov/compound/'.$cid);
// $moleculeSchema->inChI('InChI='.$molecule->STANDARD_INCHI);
// $moleculeSchema->inChI('InChI='.$molecule->standard_inchi);
// $moleculeSchema->inChIKey($inchiKey);
// $moleculeSchema->iupacName($iupacName);
// $moleculeSchema->molecularFormula($molecule->FORMULA);
// $moleculeSchema->molecularWeight($molecule->MOLECULAR_WEIGHT);
// $moleculeSchema->smiles([$molecule->SMILES, $molecule->SMILES_CHIRAL, $molecule->CANONICAL_SMILES]);
// $moleculeSchema->molecularFormula($molecule->molecular_formula);
// $moleculeSchema->molecularWeight($molecule->molecular_weight);
// $moleculeSchema->smiles([$molecule->SMILES, $molecule->absolute_smiles, $molecule->canonical_smiles]);
// $moleculeSchema->hasRepresentation($molecule->MOL);
// $moleculeSchema->description('Percentage composition: '.$molecule->pivot->percentage_composition.'%');
// array_push($molecules, $moleculeSchema);
Expand Down
336 changes: 336 additions & 0 deletions app/Http/Controllers/API/SearchController.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,336 @@
<?php

namespace App\Http\Controllers\API;

use App\Http\Controllers\Controller;
use App\Models\Molecule;
use Illuminate\Database\QueryException;
use Illuminate\Http\Request;
use Illuminate\Pagination\LengthAwarePaginator;
use Illuminate\Support\Facades\DB;

class SearchController extends Controller
{
/**
* Search DB based on the query and filter parameters
*
* @return \Illuminate\Http\Response
*/

/**
* @OA\Post(
* path="/api/v1/search",
* summary="Get compound details by Compound name, SMILES, InChi & InChiKey.",
* description="Get compound details by Compound name, SMILES, InChi & InChiKey.",
* operationId="search",
* tags={"search"},
*
* @OA\RequestBody(
* required=true,
* description="Pass search query and type such as InChiKey, InChi, SMILES & text",
*
* @OA\JsonContent(
* required={"query","type"},
*
* @OA\Property(property="query", type="string", format="query", example="AAAAWQOPBUPWEV-UHFFFAOYSA-N"),
* @OA\Property(property="type", type="string", format="type", example="InChiKey"),
*
* ),
*
* ),
*
* @OA\Response(
* response=200,
* description="Successful Operation"
* ),
* @OA\Response(
* response=404,
* description="Not Found"
* )
* )
*/
public function search(Request $request)
{
try {
set_time_limit(300);

$queryType = 'text';
$results = [];

//dd($request);

$limit = $request->query('limit');
$sort = $request->query('sort');
$limit = $limit ? $limit : 24;
$page = $request->query('page');
$tagType = $request->get('tagType') ? $request->get('tagType') : null;

$offset =
(($page != null && $page != 'null' && $page != 0 ? $page : 1) -
1) *
$limit;

$query = $request->get('query');

$type = $request->query('type')
? $request->query('type')
: $request->get('type');

if ($type) {
$queryType = $type;
} else {
//inchi
$re =
'/^((InChI=)?[^J][0-9BCOHNSOPrIFla+\-\(\)\\\\\/,pqbtmsih]{6,})$/i';
preg_match_all($re, $query, $imatches, PREG_SET_ORDER, 0);

if (count($imatches) > 0 && substr($query, 0, 6) == 'InChI=') {
$queryType = 'inchi';
}

//inchikey
$re = '/^([0-9A-Z\-]+)$/i';
preg_match_all($re, $query, $ikmatches, PREG_SET_ORDER, 0);
if (
count($ikmatches) > 0 &&
substr($query, 14, 1) == '-' &&
strlen($query) == 27
) {
$queryType = 'inchikey';
}

// smiles
$re = '/^([^J][0-9BCOHNSOPrIFla@+\-\[\]\(\)\\\\\/%=#$]{6,})$/i';
preg_match_all($re, $query, $matches, PREG_SET_ORDER, 0);

if (count($matches) > 0 && substr($query, 14, 1) != '-') {
$queryType = 'smiles';
}
}

$filterMap = [
'mf' => 'molecular_formula',

'mw' => 'molecular_weight',
'hac' => 'heavy_atom_count',
'tac' => 'total_atom_count',

'arc' => 'aromatic_ring_count',
'rbc' => 'rotatable_bond_count',
'mrc' => 'minimal_number_of_rings',
'fc' => 'formal_charge',
'cs' => 'contains_sugar',
'crs' => 'contains_ring_sugars',
'cls' => 'contains_linear_sugars',

'npl' => 'np_likeness_score',
'alogp' => 'alogp',
'topopsa' => 'topo_psa',
'fsp3' => 'fsp3',
'hba' => 'h_bond_acceptor_count',
'hbd' => 'h_bond_donor_count',
'ro5v' => 'rule_of_5_violations',
'lhba' => 'lipinski_h_bond_acceptor_count',
'lhbd' => 'lipinski_h_bond_donor_count',
'lro5v' => 'lipinski_rule_of_5_violations',
'ds' => 'found_in_databases',

'class' => 'chemical_class',
'subclass' => 'chemical_sub_class',
'superclass' => 'chemical_super_class',
'parent' => 'direct_parent_classification',

];

$queryType = strtolower($queryType);

$statement = null;

if ($queryType == 'smiles' || $queryType == 'substructure') {
$statement =
"select id, COUNT(*) OVER () from mols where m@>'".
$query.
"' limit ".
$limit.
' offset '.
$offset;
} elseif ($queryType == 'inchi') {
$statement =
"select id, COUNT(*) OVER () from molecules WHERE identifier NOTNULL AND standard_inchi LIKE '%".
$query.
"%' limit ".
$limit.
' offset '.
$offset;
} elseif ($queryType == 'inchikey') {
$statement =
"select id, COUNT(*) OVER () from molecules WHERE identifier NOTNULL AND standard_inchi_key LIKE '%".
$query.
"%' limit ".
$limit.
' offset '.
$offset;
} elseif ($queryType == 'exact') {
$statement =
"select id, COUNT(*) OVER () from mols where m@='".
$query.
"' limit ".
$limit.
' offset '.
$offset;
} elseif ($queryType == 'similarity') {
$statement =
"select id, COUNT(*) OVER () from fps where mfp2%morganbv_fp('".
$query.
"') limit ".
$limit.
' offset '.
$offset;
} elseif ($queryType == 'tags') {
$results = Molecule::withAnyTags([$query], $tagType)->paginate($limit)->items();
$count = Molecule::withAnyTags([$query], $tagType)->count();
} elseif ($queryType == 'filters') {
$orConditions = explode('OR', $query);
$isORInitial = true;
$statement =
'select molecule_id as id, COUNT(*) OVER () from properties where ';
foreach ($orConditions as $orCondition) {
if ($isORInitial === false) {
$statement = $statement.' OR ';
}
$isORInitial = false;
$statement = $statement.'(';
$andConditions = explode(' ', trim($orCondition, ' '));
$isANDInitial = true;
foreach ($andConditions as $andCondition) {
if ($isANDInitial === false) {
$statement = $statement.' AND ';
}
$isANDInitial = false;
$_filter = explode(':', $andCondition);
if (str_contains($_filter[1], '..')) {
$range = array_values(explode('..', $_filter[1]));
$statement =
$statement.
'('.
$filterMap[$_filter[0]].
' between '.
$range[0].
' and '.
$range[1].
')';
} elseif (
$_filter[1] === 'true' ||
$_filter[1] === 'false'
) {
$statement =
$statement.
'('.
$filterMap[$_filter[0]].
' = '.
$_filter[1].
')';
} elseif (str_contains($_filter[1], '|')) {
$dbFilters = explode('|', $_filter[1]);
$dbs = explode('+', $dbFilters[0]);
$statement =
$statement.
'('.
$filterMap[$_filter[0]].
" @> '[\"".
implode('","', $dbs).
"\"]')";
} else {
if (str_contains($_filter[1], '+')) {
$_filter[1] = str_replace('+', ' ', $_filter[1]);
}
$statement =
$statement.
'('.$filterMap[$_filter[0]].'::TEXT ILIKE \'%'.$_filter[1].'%\')';
}
}
$statement = $statement.')';
}
$statement = $statement.' LIMIT '.$limit;
// dd($statement );
} else {
if ($query) {
$query = str_replace("'", "''", $query);
$statement =
"select id, COUNT(*) OVER () from molecules WHERE identifier NOTNULL AND (\"name\"::TEXT ILIKE '%".
$query.
"%') OR (\"synonyms\"::TEXT ILIKE '%".
$query.
"%') OR (\"identifier\"::TEXT ILIKE '%".
$query.
"%') limit ".
$limit.
' offset '.
$offset;
} else {
$statement =
'select id, COUNT(*) OVER () from mols limit '.
$limit.
' offset '.
$offset;
}
}
if ($statement) {
$expression = DB::raw($statement);
$qString = $expression->getValue(
DB::connection()->getQueryGrammar()
);

$hits = DB::select($qString);

$count = count($hits) > 0 ? $hits[0]->count : 0;

$ids = implode(
',',
collect($hits)
->pluck('id')
->toArray()
);

if ($ids != '') {
$statement =
'SELECT * FROM molecules WHERE identifier NOTNULL AND ID IN ('.
implode(
',',
collect($hits)
->pluck('id')
->toArray()
).
')';
if ($sort == 'recent') {
$statement = $statement.' ORDER BY created_at DESC';
}
$expression = DB::raw($statement);
$string = $expression->getValue(
DB::connection()->getQueryGrammar()
);
$results = DB::select($string);
} else {
$results = [];
$count = 0;
}
}
$pagination = new LengthAwarePaginator(
$results,
$count,
$limit,
$page
);

//dd($pagination);
return $pagination;
} catch (QueryException $exception) {
return response()->json(
[
'message' => $exception->getMessage(),
],
500
);
}
}
}
Loading

0 comments on commit 480278a

Please sign in to comment.