-
Notifications
You must be signed in to change notification settings - Fork 8
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1582 from sanger/1390-dpl-916-pbmc-donor-pooling-…
…create-plate DPL-916 PBMC Donor Pooling create plate
- Loading branch information
Showing
18 changed files
with
1,621 additions
and
4 deletions.
There are no files selected for viewing
162 changes: 162 additions & 0 deletions
162
app/models/concerns/labware_creators/donor_pooling_calculator.rb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,162 @@ | ||
# frozen_string_literal: true | ||
|
||
# This module contains algorithms to allocate source wells into a target number of pools. | ||
module LabwareCreators::DonorPoolingCalculator | ||
extend ActiveSupport::Concern | ||
|
||
# Splits wells into groups by study and project. Wells are grouped based on the | ||
# study and project of the first aliquot in each well (only one aliquot is | ||
# expected per well). Returns an array of groups, where each group is an array | ||
# of wells with the same study and project. | ||
# | ||
# If the input group is [w1, w2, w3, w4, w5, w6, w7, w8, w9] | ||
# where w1, w2, w3, w4, w5, w6, w7, w8, and w9 are wells with (study_id, project_id), | ||
# | ||
# w1(1,1) | ||
# w2(1,2) | ||
# w3(1,3) | ||
# w4(1,1) | ||
# w5(1,2) | ||
# w6(1,3) | ||
# w7(1,1) | ||
# w8(2,1) | ||
# w9(2,2) | ||
# | ||
# the result will be: | ||
# [[w1, w4, w7], [w2, w5], [w3, w6], [w8], [w9]] | ||
# | ||
# @param group [Array<Well>] The group of wells to be split. | ||
# @return [Array<Array<Well>>] An array of well groups. | ||
def split_single_group_by_study_and_project(group) | ||
group.group_by { |well| [well.aliquots.first.study.id, well.aliquots.first.project.id] }.values | ||
end | ||
|
||
# Splits groups ensuring unique donor_ids within each group. Iterates over | ||
# each group, creating subgroups with wells from a unique donor. The first | ||
# occurrences of unique donor_ids are grouped, then the second occurrences, | ||
# and so on. This prevents combining samples with the same donor_id. The | ||
# result is flattened to a single array of subgroups. | ||
# | ||
# If the input groups are [[w1, w2, w3, w4], [w5, w6, w7], [w8, w9]] | ||
# where w1, w2, w3, w4, w5, w6, w7, w8, and w9 are wells with (donor_id), | ||
# | ||
# w1(1) | ||
# w2(2) | ||
# w3(3) | ||
# w4(1) | ||
# w5(4) | ||
# w6(4) | ||
# w7(5) | ||
# w8(6) | ||
# w9(7) | ||
# | ||
# the result will be: | ||
# [[w1, w2, w3], [w4], [w5, w7], [w6], [w8, w9]] | ||
# | ||
# Note that the input groups are not mixed. donor_ids are unique within each | ||
# result subgroup. | ||
# | ||
# @param groups [Array<Array<Well>>] Array of well groups to be split. | ||
# @return [Array<Array<Well>>] Array of subgroups split by donor ID. | ||
def split_groups_by_unique_donor_ids(groups) | ||
groups.flat_map { |group| split_single_group_by_unique_donor_ids(group) } | ||
end | ||
|
||
# Splits a single group of wells by donor_ids. This method is used by the | ||
# 'split_groups_by_unique_donor_ids' method. It iteratively segregates wells with | ||
# the first encountered instance of each unique donor_id into a separate | ||
# subgroup. This process continues until there are no wells left in the | ||
# original group. The result is a collection of subgroups, each containing | ||
# wells from distinct donors. | ||
# | ||
# If the input group is [w1, w2, w3, w4, w5, w6, w7, w8, w9] | ||
# where w1, w2, w3, w4, w5, w6, w7, w8, and w9 are wells with (donor_id), | ||
# | ||
# w1(1) | ||
# w2(2) | ||
# w3(3) | ||
# w4(1) | ||
# w5(2) | ||
# w6(4) | ||
# w7(5) | ||
# w8(5) | ||
# w9(5) | ||
# | ||
# the result will be: | ||
# [[w1, w2, w3, w6, w7], [w4, w5, w8], [w9]] | ||
# | ||
# @param group [Array<Well>] The group of wells to split. | ||
# @return [Array<Array<Well>>] An array of subgroups, each containing wells | ||
# from different donors. | ||
def split_single_group_by_unique_donor_ids(group) | ||
group = group.dup | ||
output = [] | ||
wells_moved = 0 | ||
wells_total = group.size | ||
while wells_moved < wells_total | ||
subgroup = [] | ||
unique_donor_ids(group).each do |donor_id| | ||
wells_moved += 1 | ||
index = group.index { |well| well.aliquots.first.sample.sample_metadata.donor_id == donor_id } | ||
subgroup << group.delete_at(index) | ||
end | ||
output << subgroup | ||
end | ||
output | ||
end | ||
|
||
# Returns the unique donor_ids from a group of wells. Used by the | ||
# 'split_single_group_by_unique_donor_ids' method. | ||
# | ||
# If the input group is [w1, w2, w3, w4, w5, w6, w7, w8, w9] | ||
# where w1, w2, w3, w4, w5, w6, w7, w8, and w9 are wells with (donor_id), | ||
# | ||
# w1(1) | ||
# w2(2) | ||
# w3(3) | ||
# w4(1) | ||
# w5(2) | ||
# w6(4) | ||
# w7(5) | ||
# w8(5) | ||
# w9(5) | ||
# | ||
# the result will be: | ||
# [1, 2, 3, 4, 5] | ||
# | ||
# @param group [Array<Well>] The group of wells from which to retrieve donor_ids. | ||
# @return [Array<String>] An array of unique donor_ids. | ||
def unique_donor_ids(group) | ||
group.map { |well| well.aliquots.first.sample.sample_metadata.donor_id }.uniq | ||
end | ||
|
||
# Distributes samples across pools based on group sizes. It sorts the groups | ||
# by size and splits the largest group into two until the number of groups | ||
# equals the number of pools or until all groups have a size of 1. The input | ||
# groups are the result of applying conditions, hence they cannot be mixed. | ||
# | ||
# If the request number of pools is 6 and the input groups are | ||
# [[1, 2, 3], [4, 5], [6, 7, 8, 9]] where the numbers denote wells, | ||
# | ||
# the result will be: | ||
# [[3], [1], [2], [4, 5], [6, 7], [8, 9]] | ||
# | ||
# for which the steps are: | ||
# [[1, 2, 3], [4, 5], [6, 7, 8, 9]] -> 3 pools (input) | ||
# [[4, 5], [6, 7], [8, 9], [1, 2, 3]] -> 4 pools | ||
# [[3], [4, 5], [6, 7], [8, 9], [1, 2]] -> 5 pools | ||
# [[3], [1], [2], [4, 5], [6, 7], [8, 9]] -> 6 pools (output) | ||
# | ||
# @param groups [Array<Array<Well>>] Array of well groups to be distributed. | ||
# @return [Array<Array<Well>>] Array of distributed groups. | ||
def distribute_groups_across_pools(groups, number_of_pools) | ||
groups = groups.dup | ||
groups.sort_by!(&:size) | ||
while groups.any? && groups.last.size > 1 && groups.size < number_of_pools | ||
largest = groups.pop # last | ||
splits = largest.each_slice((largest.size / 2.0).ceil).to_a | ||
groups.concat(splits).sort_by!(&:size) | ||
end | ||
groups | ||
end | ||
end |
130 changes: 130 additions & 0 deletions
130
app/models/concerns/labware_creators/donor_pooling_validator.rb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,130 @@ | ||
# frozen_string_literal: true | ||
|
||
# This module contains validations for donor pooling. | ||
module LabwareCreators::DonorPoolingValidator | ||
extend ActiveSupport::Concern | ||
|
||
included do | ||
validate :source_barcodes_must_be_entered | ||
validate :source_barcodes_must_be_different | ||
validate :source_plates_must_exist | ||
validate :wells_with_aliquots_must_have_donor_id | ||
validate :number_of_pools_must_not_exceed_configured | ||
end | ||
|
||
SOURCE_BARCODES_MUST_BE_ENTERED = 'At least one source plate must be scanned.' | ||
|
||
SOURCE_BARCODES_MUST_BE_DIFFERENT = 'You must not scan the same barcode more than once.' | ||
|
||
SOURCE_PLATES_MUST_EXIST = | ||
'Source plates not found: %s. ' \ | ||
'Please check you scanned the correct source plates. ' | ||
|
||
NUMBER_OF_POOLS_MUST_NOT_EXCEED_CONFIGURED = | ||
'The calculated number of pools (%s) is higher than the number of pools ' \ | ||
'(%s) configured. This is due to constraints such as: ' \ | ||
'* samples with different Studies or Projects cannot be combined ' \ | ||
'* multiple samples from the same donor cannot be combined. ' \ | ||
'Please check you have scanned the correct set of source plates.' | ||
|
||
WELLS_WITH_ALIQUOTS_MUST_HAVE_DONOR_ID = | ||
'All samples must have the donor_id specified. ' \ | ||
'Wells missing donor_id (on sample metadata): %s' | ||
|
||
# Validates that at least one source barcode has been entered. If no barcodes | ||
# are entered, an error is added to the :source_barcodes attribute. | ||
# | ||
# @return [void] | ||
def source_barcodes_must_be_entered | ||
return if minimal_barcodes.size >= 1 | ||
|
||
errors.add(:source_barcodes, SOURCE_BARCODES_MUST_BE_ENTERED) | ||
end | ||
|
||
# Validates that all source barcodes are unique. If any barcodes are | ||
# duplicated, an error is added to the :source_barcodes attribute. | ||
# | ||
# @return [void] | ||
def source_barcodes_must_be_different | ||
return if minimal_barcodes.size == minimal_barcodes.uniq.size | ||
|
||
errors.add(:source_barcodes, SOURCE_BARCODES_MUST_BE_DIFFERENT) | ||
end | ||
|
||
# Validates that all source plates corresponding to the minimal barcodes exist. | ||
# If the number of source plates does not match the number of minimal barcodes, | ||
# an error is added to the :source_plates attribute. | ||
# | ||
# @return [void] | ||
def source_plates_must_exist | ||
return if source_plates.size == minimal_barcodes.size | ||
|
||
formatted_string = (minimal_barcodes - source_plates.map(&:human_barcode)).join(', ') | ||
|
||
errors.add(:source_plates, format(SOURCE_PLATES_MUST_EXIST, formatted_string)) | ||
end | ||
|
||
# Validates that the number of calculated pools does not exceed the configured | ||
# number of pools. If the number of calculated pools is greater, an error is | ||
# added to the :source_plates attribute. | ||
# | ||
# @return [void] | ||
def number_of_pools_must_not_exceed_configured | ||
# Don't add this error if there are already errors about missing donor_ids. | ||
invalid_wells_hash = locations_with_missing_donor_id | ||
return if invalid_wells_hash.any? | ||
|
||
return if pools.size <= number_of_pools | ||
|
||
errors.add(:source_plates, format(NUMBER_OF_POOLS_MUST_NOT_EXCEED_CONFIGURED, pools.size, number_of_pools)) | ||
end | ||
|
||
# Validates that all wells with aliquots must have a donor_id. | ||
# It uses the locations_with_missing_donor_id method to find any wells that are | ||
# missing a donor_id. If any such wells are found, it adds an error message to | ||
# the source_plates attribute, formatted with the barcodes of the plates and | ||
# the wells that are missing a donor_id. | ||
# | ||
# @return [void] | ||
def wells_with_aliquots_must_have_donor_id | ||
invalid_wells_hash = locations_with_missing_donor_id | ||
return if invalid_wells_hash.empty? | ||
|
||
formatted_string = invalid_wells_hash.map { |barcode, locations| "#{barcode}: #{locations.join(', ')}" }.join(' ') | ||
errors.add(:source_plates, format(WELLS_WITH_ALIQUOTS_MUST_HAVE_DONOR_ID, formatted_string)) | ||
end | ||
|
||
private | ||
|
||
# Checks each source well for pooling for missing donor_id. Returns a hash | ||
# with keys as the barcodes of source plates and values as arrays of well | ||
# locations with missing donor_id. If a plate has no wells with missing | ||
# donor_id, it is not included in the returned hash. This method is used by | ||
# the wells_with_aliquots_must_have_donor_id method to generate an error | ||
# message. | ||
# | ||
# @return [Hash] A hash mapping source plate barcodes to arrays of invalid | ||
# well locations. | ||
def locations_with_missing_donor_id | ||
# source_wells_for_pooling contains filtered wells from source plates | ||
invalid_wells = source_wells_for_pooling.select { |well| missing_donor_id?(well) } | ||
invalid_wells.each_with_object({}) do |well, hash| | ||
plate_barcode = source_wells_to_plates[well].human_barcode # find the plate barcode | ||
hash[plate_barcode] ||= [] | ||
hash[plate_barcode] << well.location | ||
end | ||
end | ||
|
||
# Checks if a well is missing a donor_id. If there is an aliquot, it checks | ||
# if the associated sample_metadata has a donor_id. If the donor_id is | ||
# missing, it returns true. Otherwise, it returns false. | ||
# | ||
# @param well [Well] The well to check. | ||
# @return [Boolean] True if the well is missing a donor_id, false otherwise. | ||
def missing_donor_id?(well) | ||
aliquot = well.aliquots&.first | ||
return false unless aliquot | ||
|
||
(aliquot.sample.sample_metadata.donor_id || '').to_s.strip.blank? | ||
end | ||
end |
Oops, something went wrong.