diff --git a/packages/nightingale-structure/src/position-mapping.ts b/packages/nightingale-structure/src/position-mapping.ts index 0bcf7ba9..d5d75bd1 100644 --- a/packages/nightingale-structure/src/position-mapping.ts +++ b/packages/nightingale-structure/src/position-mapping.ts @@ -1,5 +1,3 @@ -import groupBy from "lodash-es/groupBy"; - export type Direction = "UP_PDB" | "PDB_UP"; /* eslint-disable camelcase */ @@ -34,65 +32,76 @@ export type TranslatedPosition = { /** * Translate between UniProt and PDBe positions using SIFTs mappings * @function translatePositions - * @param {Number} start The start index for the sequence (1-based) - * @param {Number} end The end index for the sequence (1-based) - * @param {Mapping[]} mappings The array of mapping objects - * @param {String} mappingDirection Indicates direction of maping: UniProt to PDB or PDB to UniProt - * @return {Translated} Object with: mapped entity ID; mapped chain ID; translated start & end positions + * @param {number} start The start index for the sequence (1-based) + * @param {number} end The end index for the sequence (1-based) + * @param {Direction} mappingDirection Indicates direction of mapping: UniProt to PDB or PDB to UniProt + * @param {Mapping[]} mappings The array of mapping objects + * @return {TranslatedPosition[]} Array of translated positions */ const translatePositions = ( start: number, end: number, mappingDirection: Direction, - mappings?: Mapping[], + mappings?: Mapping[] ): TranslatedPosition[] => { // Return if mappings not ready if (!mappings) { return []; } - // return if they have been set to 'undefined' + + // Validate start and end inputs if (!start || !end || Number.isNaN(start) || Number.isNaN(end)) { throw new PositionMappingError("Invalid start, end coordinates"); } - // return a translation separately for each chain (if it exists) - const translations: TranslatedPosition[] = Object.values( - groupBy(mappings, (mapping) => mapping.chain_id), - ) - .map((chainMappings) => { - let startMapping = null; - let endMapping = null; - for (const mapping of chainMappings) { - if ( - mapping.unp_end - mapping.unp_start !== - mapping.end.residue_number - mapping.start.residue_number - ) { - throw new PositionMappingError( - "Mismatch between protein sequence and structure residues", - ); - } - const regionStart = - mappingDirection === "UP_PDB" - ? mapping.unp_start - : mapping.start.residue_number; - const regionEnd = - mappingDirection === "UP_PDB" - ? mapping.unp_end - : mapping.end.residue_number; - - if (start >= regionStart && start <= regionEnd) { - startMapping = mapping; - } - if (end >= regionStart && end <= regionEnd) { - endMapping = mapping; - } + + const chainMappingsMap = new Map(); + + // Group mappings by chain_id + for (const mapping of mappings) { + const chainMapping = chainMappingsMap.get(mapping.chain_id) || []; + chainMapping.push(mapping); + chainMappingsMap.set(mapping.chain_id, chainMapping); + } + + const translations: TranslatedPosition[] = []; + + for (const chainMappings of chainMappingsMap.values()) { + let startMapping: Mapping | null = null; + let endMapping: Mapping | null = null; + + for (const mapping of chainMappings) { + if ( + mapping.unp_end - mapping.unp_start !== + mapping.end.residue_number - mapping.start.residue_number + ) { + throw new PositionMappingError( + "Mismatch between protein sequence and structure residues" + ); } - if (startMapping === null || endMapping === null) { - // If we didn't find a mapping for this chain, - // return null, it will be filtered out later - return null; + + const [regionStart, regionEnd] = + mappingDirection === "UP_PDB" + ? [mapping.unp_start, mapping.unp_end] + : [mapping.start.residue_number, mapping.end.residue_number]; + + // Set startMapping if the start coordinate is within the region + if (!startMapping && start >= regionStart && start <= regionEnd) { + startMapping = mapping; + } + // Set endMapping if the end coordinate is within the region + if (!endMapping && end >= regionStart && end <= regionEnd) { + endMapping = mapping; + } + + // Early exit if both start and end mappings are found + if (startMapping && endMapping) { + break; } + } + + if (startMapping && endMapping) { const direction = mappingDirection === "UP_PDB" ? 1 : -1; - return { + translations.push({ entity: startMapping.entity_id, // Note that we iterate through mappings from one chain at a time, // so all mappings are guaranteed to come from the same chain @@ -104,15 +113,13 @@ const translatePositions = ( end: end + direction * (endMapping.start.residue_number - endMapping.unp_start), - }; - }) - .filter((tp: TranslatedPosition | null): tp is TranslatedPosition => - Boolean(tp), - ); + }); + } + } if (!translations.length) { throw new PositionMappingError( - "Start or end coordinate outside of mapping range", + "Start or end coordinate outside of mapping range" ); }