diff --git a/Dockerfile b/Dockerfile index 2b57dd5..52d1bd7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,7 +4,7 @@ LABEL MAINTAINER="Pradeep Bashyal" WORKDIR /app -ARG PY_ARD_VERSION=1.5.1 +ARG PY_ARD_VERSION=1.5.2 COPY requirements.txt /app RUN pip install --no-cache-dir --upgrade pip && \ diff --git a/api-spec.yaml b/api-spec.yaml index 5604fca..83ddb6f 100644 --- a/api-spec.yaml +++ b/api-spec.yaml @@ -2,7 +2,7 @@ openapi: 3.0.3 info: title: ARD Reduction description: Reduce to ARD Level - version: "1.5.1" + version: "1.5.2" servers: - url: 'http://localhost:8080' tags: diff --git a/pyard/__init__.py b/pyard/__init__.py index a4e65fa..a2cc703 100644 --- a/pyard/__init__.py +++ b/pyard/__init__.py @@ -26,7 +26,7 @@ from .misc import get_imgt_db_versions as db_versions __author__ = """NMDP Bioinformatics""" -__version__ = "1.5.1" +__version__ = "1.5.2" def init( diff --git a/pyard/ard.py b/pyard/ard.py index 61033e4..88d8e7b 100644 --- a/pyard/ard.py +++ b/pyard/ard.py @@ -189,6 +189,8 @@ def _redux_allele( hla, allele_name = allele.split("-") redux_allele = self._redux_allele(allele_name, redux_type) if redux_allele: + if "/" in redux_allele: + return "/".join(["HLA-" + ra for ra in redux_allele.split("/")]) return "HLA-" + redux_allele else: return redux_allele @@ -246,9 +248,7 @@ def _redux_allele( elif redux_type == "P" and allele in self.ars_mappings.p_group: return self.ars_mappings.p_group[allele] elif redux_type in ["lgx", "lg"]: - if allele in self.ars_mappings.dup_lgx: - redux_allele = self.ars_mappings.dup_lgx[allele] - elif allele in self.ars_mappings.lgx_group: + if allele in self.ars_mappings.lgx_group: redux_allele = self.ars_mappings.lgx_group[allele] else: # for 'lgx' or 'lg' mode when allele is not in G group, diff --git a/pyard/data_repository.py b/pyard/data_repository.py index e21ecf7..6ad0e94 100644 --- a/pyard/data_repository.py +++ b/pyard/data_repository.py @@ -50,6 +50,7 @@ get_1field_allele, ) from .serology import broad_splits_dna_mapping, SerologyMapping +from .smart_sort import smart_sort_comparator def expression_reduce(df): @@ -154,6 +155,24 @@ def generate_ard_mapping(db_connection: sqlite3.Connection, imgt_version) -> ARS ) lgx_group = df_lgx.set_index("A")["lgx"].to_dict() + # Find the alleles that have more than 1 mapping + dup_lgx = ( + df_g_group[df_g_group["2d"].isin(multiple_lgx_list)][["lgx", "2d"]] + .drop_duplicates() + .groupby("2d", as_index=True) + .agg(list) + .to_dict()["lgx"] + ) + # Do not keep duplicate alleles for lgx. Issue #333 + # DPA1*02:02/DPA1*02:07 ==> DPA1*02:02 + # + lowest_numbered_dup_lgx = { + k: sorted(v, key=functools.cmp_to_key(smart_sort_comparator))[0] + for k, v in dup_lgx.items() + } + # Update the lgx_group with the allele with the lowest number + lgx_group.update(lowest_numbered_dup_lgx) + # Extract exon mapping df_exon = pd.concat( [ @@ -164,7 +183,6 @@ def generate_ard_mapping(db_connection: sqlite3.Connection, imgt_version) -> ARS ars_mapping = ARSMapping( dup_g=dup_g, - dup_lgx=dup_lgx, g_group=g_group, p_group=p_group, lgx_group=lgx_group, diff --git a/pyard/db.py b/pyard/db.py index 65f6a52..45ce425 100644 --- a/pyard/db.py +++ b/pyard/db.py @@ -461,9 +461,6 @@ def set_user_version(connection: sqlite3.Connection, version: int): def load_ars_mappings(db_connection): dup_g = load_dict(db_connection, table_name="dup_g", columns=("allele", "g_group")) - dup_lgx = load_dict( - db_connection, table_name="dup_lgx", columns=("allele", "lgx_group") - ) g_group = load_dict(db_connection, table_name="g_group", columns=("allele", "g")) p_group = load_dict(db_connection, table_name="p_group", columns=("allele", "p")) lgx_group = load_dict( @@ -475,7 +472,6 @@ def load_ars_mappings(db_connection): p_not_g = load_dict(db_connection, table_name="p_not_g", columns=("allele", "lgx")) return ARSMapping( dup_g=dup_g, - dup_lgx=dup_lgx, g_group=g_group, p_group=p_group, lgx_group=lgx_group, @@ -497,12 +493,6 @@ def save_ars_mappings(db_connection: sqlite3.Connection, ars_mapping: ARSMapping dictionary=ars_mapping.dup_g, columns=("allele", "g_group"), ) - save_dict( - db_connection, - table_name="dup_lgx", - dictionary=ars_mapping.dup_lgx, - columns=("allele", "lgx_group"), - ) save_dict( db_connection, table_name="g_group", diff --git a/pyard/mappings.py b/pyard/mappings.py index 07b4f2f..9a42a13 100644 --- a/pyard/mappings.py +++ b/pyard/mappings.py @@ -23,7 +23,6 @@ ars_mapping_tables = [ "dup_g", - "dup_lgx", "g_group", "p_group", "lgx_group", diff --git a/setup.cfg b/setup.cfg index 9d7b2e8..038a518 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 1.5.1 +current_version = 1.5.2 commit = True tag = True diff --git a/setup.py b/setup.py index af6b9c6..65ed2c2 100644 --- a/setup.py +++ b/setup.py @@ -36,7 +36,7 @@ setup( name="py-ard", - version="1.5.1", + version="1.5.2", description="ARD reduction for HLA with Python", long_description=readme, long_description_content_type="text/markdown", diff --git a/tests/features/p_g_group.feature b/tests/features/p_g_group.feature index 3c5b877..8f8e75d 100644 --- a/tests/features/p_g_group.feature +++ b/tests/features/p_g_group.feature @@ -90,10 +90,10 @@ Feature: P and G Groups | C*02:10 | lg | C*02:02g | | C*02:10 | lgx | C*02:02 | - Examples: lgx with duplicates - | Allele | Level | Redux Allele | - | DPA1*02:12 | lgx | DPA1*02:02/DPA1*02:07 | - | DPA1*02:12 | lg | DPA1*02:02g/DPA1*02:07g | - | DQA1*03:03 | lgx | DQA1*03:01 | - | DQA1*03:03 | lg | DQA1*03:01g | - | DQA1*03:03:09 | lg | DQA1*03:03g | + Examples: lgx redux with duplicate G groups + | Allele | Level | Redux Allele | + | DPA1*02:12 | lgx | DPA1*02:02 | + | DPA1*02:12 | lg | DPA1*02:02g | + | DQA1*03:03 | lgx | DQA1*03:01 | + | DQA1*03:03 | lg | DQA1*03:01g | + | DQA1*03:03:09 | lg | DQA1*03:03g |