Skip to content

Commit

Permalink
Merge pull request #302 from labgem/readFastSpot
Browse files Browse the repository at this point in the history
Speed up spot reading
  • Loading branch information
axbazin authored Nov 26, 2024
2 parents 4ec681f + 0086fe3 commit d5df368
Showing 1 changed file with 9 additions and 7 deletions.
16 changes: 9 additions & 7 deletions ppanggolin/formats/readBinaries.py
Original file line number Diff line number Diff line change
Expand Up @@ -979,28 +979,31 @@ def read_rgp(pangenome: Pangenome, h5f: tables.File, disable_bar: bool = False):

def read_spots(pangenome: Pangenome, h5f: tables.File, disable_bar: bool = False):
"""
Read hotspot in pangenome hdf5 file to add in pangenome object
Read hotspots in the pangenome HDF5 file and add them to the pangenome object.
:param pangenome: Pangenome object without spot
:param h5f: Pangenome HDF5 file with spot computed
:param disable_bar: Disable the progress bar
"""
table = h5f.root.spots
spots = {}
curr_spot_id = None
for row in tqdm(
read_chunks(table, chunk=20000),
total=table.nrows,
unit="spot",
disable=disable_bar,
):
curr_spot = spots.get(int(row["spot"]))
if curr_spot is None:
curr_spot = Spot(int(row["spot"]))
spots[row["spot"]] = curr_spot
if curr_spot_id != int(row["spot"]):
curr_spot_id = int(row["spot"])
curr_spot = spots.get(curr_spot_id)
if curr_spot is None:
curr_spot = Spot(int(row["spot"]))
spots[row["spot"]] = curr_spot
region = pangenome.get_region(row["RGP"].decode())
curr_spot.add(region)
curr_spot.spot_2_families()
for spot in spots.values():
spot.spot_2_families()
pangenome.add_spot(spot)
pangenome.status["spots"] = "Loaded"

Expand Down Expand Up @@ -1548,7 +1551,6 @@ def read_pangenome(
f"The pangenome in file '{filename}' does not have spots information, "
f"or has been improperly filled"
)

if modules:
if h5f.root.status._v_attrs.modules:
logging.getLogger("PPanGGOLiN").info("Reading the modules...")
Expand Down

0 comments on commit d5df368

Please sign in to comment.