Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Changes to control model sparsity and MTSS improvements #331

Open
wants to merge 34 commits into
base: 1.7.0
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
b73b98a
(1) Added code to DCNNModelWrapper to track absolute sum of 1st hidde…
mcloughlin2 May 7, 2024
46a70ae
Merge branch '1.6.1' into weight_decay_test
mcloughlin2 May 22, 2024
5d3c897
Added two new parameters, wdp and wdt, to allow hyperopt domain searc…
mcloughlin2 May 31, 2024
512d8eb
Added sparsity-related parameters weight_decay_penalty, weight_decay_…
mcloughlin2 May 31, 2024
b84098a
Fixed MTSS bug where population was not sorted by score after calling…
mcloughlin2 Jun 12, 2024
e4a5f62
Changed to use single-threaded function to grade chromosomes, after s…
mcloughlin2 Jun 13, 2024
c524a7b
Implemented a new fitness function to more robustly enforce dissimila…
mcloughlin2 Jun 13, 2024
be48c58
Normalize fitness scores to the range [0,1]. Add the total fitness sc…
mcloughlin2 Jun 18, 2024
da5a121
Added early stopping to terminate GA if no fitness improvement after …
mcloughlin2 Jun 21, 2024
035d283
Implemented enhancement request from AMPL issue #318: Random forest a…
mcloughlin2 Jun 21, 2024
b31769e
Save split_uuid in self.params at end of split_dataset().
mcloughlin2 Jun 28, 2024
03b8642
Added code to show valid & train Wasserstein distances in plot titles.
mcloughlin2 Jun 28, 2024
39dc2c1
Added functions to generate multi-plot displays to assess split quality.
mcloughlin2 Jun 28, 2024
8ea481a
Fixed plot_split_fractions so that bars always appear in order train,…
mcloughlin2 Jul 1, 2024
35a8180
Merge branch 'master' into sparsity
mcloughlin2 Jul 2, 2024
bc43bb5
Added initial version of function to draw line plot of NN feature wei…
mcloughlin2 Jul 12, 2024
f6c7a44
Removed old PRC plot function.
mcloughlin2 Jul 12, 2024
48a53cb
(1) Added code to DCNNModelWrapper to track absolute sum of 1st hidde…
mcloughlin2 May 7, 2024
0f0eded
Added two new parameters, wdp and wdt, to allow hyperopt domain searc…
mcloughlin2 May 31, 2024
7b5e99d
Added sparsity-related parameters weight_decay_penalty, weight_decay_…
mcloughlin2 May 31, 2024
812e13f
Fixed MTSS bug where population was not sorted by score after calling…
mcloughlin2 Jun 12, 2024
120c6a1
Changed to use single-threaded function to grade chromosomes, after s…
mcloughlin2 Jun 13, 2024
0741178
use the copy from sparsity branch
mauvais2 Jul 19, 2024
8d1e759
Implemented enhancement request from AMPL issue #318: Random forest a…
mcloughlin2 Jun 21, 2024
9297ffa
Save split_uuid in self.params at end of split_dataset().
mcloughlin2 Jun 28, 2024
f6d220d
Added code to show valid & train Wasserstein distances in plot titles.
mcloughlin2 Jun 28, 2024
a431188
Added functions to generate multi-plot displays to assess split quality.
mcloughlin2 Jun 28, 2024
2d91eb7
Fixed plot_split_fractions so that bars always appear in order train,…
mcloughlin2 Jul 1, 2024
4f48c6d
Added initial version of function to draw line plot of NN feature wei…
mcloughlin2 Jul 12, 2024
c7f4938
Removed old PRC plot function.
mcloughlin2 Jul 12, 2024
a266200
Merge branch 'sparsity' of github.com:ATOMScience-org/AMPL into sparsity
mauvais2 Jul 19, 2024
d9df1f2
Merge branch '1.7.0' of github.com:ATOMScience-org/AMPL into sparsity
mauvais2 Jul 22, 2024
e39fbd1
Merge branch 'master' of github.com:ATOMScience-org/AMPL into sparsity
mauvais2 Jul 23, 2024
6196638
Merge branch '1.7.0' of github.com:ATOMScience-org/AMPL into sparsity
mauvais2 Aug 5, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 38 additions & 14 deletions atomsci/ddm/pipeline/GeneticAlgorithm.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,29 +49,52 @@ def __init__(self,
self.fitness_func = fitness_func
self.crossover_func = crossover_func
self.mutate_func = mutate_func
self.parallel_grade_population()
self.serial_grade_population()
#self.parallel_grade_population()

def parallel_grade_population(self):
""" Grade the population and save the scores

Updates the order of self.pop and self.pop_scores
def serial_grade_population(self):
""" Scores the chromosomes in the current population and sorts them in decreasing score order.
Saves the sorted scores in self.pop_scores. Not multithreaded; surprisingly, this runs faster
than the multithreaded function parallel_grade_scores.

Parameters
----------
None

Returns
-------
Nothing
None. As a side effect, sorts the chromosomes in self.pop and updates the scores in self.pop_scores.
"""
fitnesses = []
for chrom in self.pop:
fitnesses.append(self.fitness_func(chrom))
pairs = sorted(zip(fitnesses, self.pop), reverse=True)
self.pop = [chrome for fitness, chrome in pairs]
self.pop_scores = [fitness for fitness, chrome in pairs]


def parallel_grade_population(self):
""" Scores the chromosomes in the current population and sorts them in decreasing score order.
Saves the sorted scores in self.pop_scores.

Although this does the same thing in multiple threads as the single-threaded function
serial_grade_population, it seems to run much slower, at least for multitask scaffold splits
with 100 chromosomes.

Parameters
----------
None

Returns
-------
None
"""
pool = multiprocessing.Pool(processes=N_PROCS)
fitnesses = pool.map(self.fitness_func, self.pop)
pool.close()
pool.join()
pairs = list(zip(fitnesses, self.pop))

pairs.sort(key=lambda x: x[0], reverse=True)

pairs = sorted(zip(fitnesses, self.pop), reverse=True)
self.pop = [chrome for fitness, chrome in pairs]
self.pop_scores = [fitness for fitness, chrome in pairs]

Expand All @@ -90,10 +113,7 @@ def select_parents(self) -> List[List[Any]]:
parents: List[List[Any]]
A list of chromosomes that will be parents for the next generation
"""
self.parallel_grade_population()

parents = [chrome for chrome in self.pop[:self.num_parents]]
return parents
return self.pop[:self.num_parents]

def iterate(self, num_generations: int):
""" Iterates the genetic algorithm num_generations
Expand Down Expand Up @@ -130,19 +150,23 @@ def step(self, print_timings: bool = False):

start = timeit.default_timer()
i = timeit.default_timer()
# select parents using rank selection
parents = self.select_parents()
if print_timings:
print('\tfind parents %0.2f min'%((timeit.default_timer()-i)/60))

# select parents using rank selection
i = timeit.default_timer()
# Generate new population by crossing parent chromosomes
new_pop = self.crossover_func(parents, self.num_pop)
if print_timings:
print('\tcrossover %0.2f min'%((timeit.default_timer()-i)/60))

# mutate population
i = timeit.default_timer()
self.pop = self.mutate_func(new_pop)
# Compute scores for new chromosomes and sort population by score
self.serial_grade_population()
#self.parallel_grade_population()
if print_timings:
print('\tmutate %0.2f min'%((timeit.default_timer()-i)/60))
print('total %0.2f min'%((timeit.default_timer()-start)/60))
Expand Down
Loading