ATOMScience-org · mcloughlin2 · May 7, 2024 · May 22, 2024 · May 31, 2024 · May 31, 2024
diff --git a/atomsci/ddm/pipeline/GeneticAlgorithm.py b/atomsci/ddm/pipeline/GeneticAlgorithm.py
@@ -49,29 +49,52 @@ def __init__(self,
         self.fitness_func = fitness_func
         self.crossover_func = crossover_func
         self.mutate_func = mutate_func
-        self.parallel_grade_population()
+        self.serial_grade_population()
+        #self.parallel_grade_population()
 
-    def parallel_grade_population(self):
-        """ Grade the population and save the scores
 
-        Updates the order of self.pop and self.pop_scores
+    def serial_grade_population(self):
+        """ Scores the chromosomes in the current population and sorts them in decreasing score order.
+        Saves the sorted scores in self.pop_scores. Not multithreaded; surprisingly, this runs faster
+        than the multithreaded function parallel_grade_scores.
 
         Parameters
         ----------
         None
 
         Returns
         -------
-        Nothing
+        None. As a side effect, sorts the chromosomes in self.pop and updates the scores in self.pop_scores.
+        """
+        fitnesses = []
+        for chrom in self.pop:
+            fitnesses.append(self.fitness_func(chrom))
+        pairs = sorted(zip(fitnesses, self.pop), reverse=True)
+        self.pop = [chrome for fitness, chrome in pairs]
+        self.pop_scores = [fitness for fitness, chrome in pairs]
+
+
+    def parallel_grade_population(self):
+        """ Scores the chromosomes in the current population and sorts them in decreasing score order.
+        Saves the sorted scores in self.pop_scores. 
+
+        Although this does the same thing in multiple threads as the single-threaded function 
+        serial_grade_population, it seems to run much slower, at least for multitask scaffold splits
+        with 100 chromosomes.
+
+        Parameters
+        ----------
+        None
+
+        Returns
+        -------
+        None
         """
         pool = multiprocessing.Pool(processes=N_PROCS)
         fitnesses = pool.map(self.fitness_func, self.pop)
         pool.close()
         pool.join()
-        pairs = list(zip(fitnesses, self.pop))
-
-        pairs.sort(key=lambda x: x[0], reverse=True)
-
+        pairs = sorted(zip(fitnesses, self.pop), reverse=True)
         self.pop = [chrome for fitness, chrome in pairs]
         self.pop_scores = [fitness for fitness, chrome in pairs]
 
@@ -90,10 +113,7 @@ def select_parents(self) -> List[List[Any]]:
         parents: List[List[Any]]
             A list of chromosomes that will be parents for the next generation
         """
-        self.parallel_grade_population()
-
-        parents = [chrome for chrome in self.pop[:self.num_parents]]
-        return parents
+        return self.pop[:self.num_parents]
 
     def iterate(self, num_generations: int):
         """ Iterates the genetic algorithm num_generations
@@ -130,19 +150,23 @@ def step(self, print_timings: bool = False):
 
         start = timeit.default_timer()
         i = timeit.default_timer()
+        # select parents using rank selection
         parents = self.select_parents()
         if print_timings:
             print('\tfind parents %0.2f min'%((timeit.default_timer()-i)/60))
 
-        # select parents using rank selection
         i = timeit.default_timer()
+        # Generate new population by crossing parent chromosomes
         new_pop = self.crossover_func(parents, self.num_pop)
         if print_timings:
             print('\tcrossover %0.2f min'%((timeit.default_timer()-i)/60))
 
         # mutate population
         i = timeit.default_timer()
         self.pop = self.mutate_func(new_pop)
+        # Compute scores for new chromosomes and sort population by score
+        self.serial_grade_population()
+        #self.parallel_grade_population()
         if print_timings:
             print('\tmutate %0.2f min'%((timeit.default_timer()-i)/60))
             print('total %0.2f min'%((timeit.default_timer()-start)/60))