Skip to content

Commit

Permalink
Merge pull request #1582 from sanger/1390-dpl-916-pbmc-donor-pooling-…
Browse files Browse the repository at this point in the history
…create-plate

DPL-916 PBMC Donor Pooling create plate
  • Loading branch information
yoldas authored Mar 11, 2024
2 parents 37c46c1 + d4b8573 commit 8f38db2
Show file tree
Hide file tree
Showing 18 changed files with 1,621 additions and 4 deletions.
162 changes: 162 additions & 0 deletions app/models/concerns/labware_creators/donor_pooling_calculator.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
# frozen_string_literal: true

# This module contains algorithms to allocate source wells into a target number of pools.
module LabwareCreators::DonorPoolingCalculator
extend ActiveSupport::Concern

# Splits wells into groups by study and project. Wells are grouped based on the
# study and project of the first aliquot in each well (only one aliquot is
# expected per well). Returns an array of groups, where each group is an array
# of wells with the same study and project.
#
# If the input group is [w1, w2, w3, w4, w5, w6, w7, w8, w9]
# where w1, w2, w3, w4, w5, w6, w7, w8, and w9 are wells with (study_id, project_id),
#
# w1(1,1)
# w2(1,2)
# w3(1,3)
# w4(1,1)
# w5(1,2)
# w6(1,3)
# w7(1,1)
# w8(2,1)
# w9(2,2)
#
# the result will be:
# [[w1, w4, w7], [w2, w5], [w3, w6], [w8], [w9]]
#
# @param group [Array<Well>] The group of wells to be split.
# @return [Array<Array<Well>>] An array of well groups.
def split_single_group_by_study_and_project(group)
group.group_by { |well| [well.aliquots.first.study.id, well.aliquots.first.project.id] }.values
end

# Splits groups ensuring unique donor_ids within each group. Iterates over
# each group, creating subgroups with wells from a unique donor. The first
# occurrences of unique donor_ids are grouped, then the second occurrences,
# and so on. This prevents combining samples with the same donor_id. The
# result is flattened to a single array of subgroups.
#
# If the input groups are [[w1, w2, w3, w4], [w5, w6, w7], [w8, w9]]
# where w1, w2, w3, w4, w5, w6, w7, w8, and w9 are wells with (donor_id),
#
# w1(1)
# w2(2)
# w3(3)
# w4(1)
# w5(4)
# w6(4)
# w7(5)
# w8(6)
# w9(7)
#
# the result will be:
# [[w1, w2, w3], [w4], [w5, w7], [w6], [w8, w9]]
#
# Note that the input groups are not mixed. donor_ids are unique within each
# result subgroup.
#
# @param groups [Array<Array<Well>>] Array of well groups to be split.
# @return [Array<Array<Well>>] Array of subgroups split by donor ID.
def split_groups_by_unique_donor_ids(groups)
groups.flat_map { |group| split_single_group_by_unique_donor_ids(group) }
end

# Splits a single group of wells by donor_ids. This method is used by the
# 'split_groups_by_unique_donor_ids' method. It iteratively segregates wells with
# the first encountered instance of each unique donor_id into a separate
# subgroup. This process continues until there are no wells left in the
# original group. The result is a collection of subgroups, each containing
# wells from distinct donors.
#
# If the input group is [w1, w2, w3, w4, w5, w6, w7, w8, w9]
# where w1, w2, w3, w4, w5, w6, w7, w8, and w9 are wells with (donor_id),
#
# w1(1)
# w2(2)
# w3(3)
# w4(1)
# w5(2)
# w6(4)
# w7(5)
# w8(5)
# w9(5)
#
# the result will be:
# [[w1, w2, w3, w6, w7], [w4, w5, w8], [w9]]
#
# @param group [Array<Well>] The group of wells to split.
# @return [Array<Array<Well>>] An array of subgroups, each containing wells
# from different donors.
def split_single_group_by_unique_donor_ids(group)
group = group.dup
output = []
wells_moved = 0
wells_total = group.size
while wells_moved < wells_total
subgroup = []
unique_donor_ids(group).each do |donor_id|
wells_moved += 1
index = group.index { |well| well.aliquots.first.sample.sample_metadata.donor_id == donor_id }
subgroup << group.delete_at(index)
end
output << subgroup
end
output
end

# Returns the unique donor_ids from a group of wells. Used by the
# 'split_single_group_by_unique_donor_ids' method.
#
# If the input group is [w1, w2, w3, w4, w5, w6, w7, w8, w9]
# where w1, w2, w3, w4, w5, w6, w7, w8, and w9 are wells with (donor_id),
#
# w1(1)
# w2(2)
# w3(3)
# w4(1)
# w5(2)
# w6(4)
# w7(5)
# w8(5)
# w9(5)
#
# the result will be:
# [1, 2, 3, 4, 5]
#
# @param group [Array<Well>] The group of wells from which to retrieve donor_ids.
# @return [Array<String>] An array of unique donor_ids.
def unique_donor_ids(group)
group.map { |well| well.aliquots.first.sample.sample_metadata.donor_id }.uniq
end

# Distributes samples across pools based on group sizes. It sorts the groups
# by size and splits the largest group into two until the number of groups
# equals the number of pools or until all groups have a size of 1. The input
# groups are the result of applying conditions, hence they cannot be mixed.
#
# If the request number of pools is 6 and the input groups are
# [[1, 2, 3], [4, 5], [6, 7, 8, 9]] where the numbers denote wells,
#
# the result will be:
# [[3], [1], [2], [4, 5], [6, 7], [8, 9]]
#
# for which the steps are:
# [[1, 2, 3], [4, 5], [6, 7, 8, 9]] -> 3 pools (input)
# [[4, 5], [6, 7], [8, 9], [1, 2, 3]] -> 4 pools
# [[3], [4, 5], [6, 7], [8, 9], [1, 2]] -> 5 pools
# [[3], [1], [2], [4, 5], [6, 7], [8, 9]] -> 6 pools (output)
#
# @param groups [Array<Array<Well>>] Array of well groups to be distributed.
# @return [Array<Array<Well>>] Array of distributed groups.
def distribute_groups_across_pools(groups, number_of_pools)
groups = groups.dup
groups.sort_by!(&:size)
while groups.any? && groups.last.size > 1 && groups.size < number_of_pools
largest = groups.pop # last
splits = largest.each_slice((largest.size / 2.0).ceil).to_a
groups.concat(splits).sort_by!(&:size)
end
groups
end
end
130 changes: 130 additions & 0 deletions app/models/concerns/labware_creators/donor_pooling_validator.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
# frozen_string_literal: true

# This module contains validations for donor pooling.
module LabwareCreators::DonorPoolingValidator
extend ActiveSupport::Concern

included do
validate :source_barcodes_must_be_entered
validate :source_barcodes_must_be_different
validate :source_plates_must_exist
validate :wells_with_aliquots_must_have_donor_id
validate :number_of_pools_must_not_exceed_configured
end

SOURCE_BARCODES_MUST_BE_ENTERED = 'At least one source plate must be scanned.'

SOURCE_BARCODES_MUST_BE_DIFFERENT = 'You must not scan the same barcode more than once.'

SOURCE_PLATES_MUST_EXIST =
'Source plates not found: %s. ' \
'Please check you scanned the correct source plates. '

NUMBER_OF_POOLS_MUST_NOT_EXCEED_CONFIGURED =
'The calculated number of pools (%s) is higher than the number of pools ' \
'(%s) configured. This is due to constraints such as: ' \
'* samples with different Studies or Projects cannot be combined ' \
'* multiple samples from the same donor cannot be combined. ' \
'Please check you have scanned the correct set of source plates.'

WELLS_WITH_ALIQUOTS_MUST_HAVE_DONOR_ID =
'All samples must have the donor_id specified. ' \
'Wells missing donor_id (on sample metadata): %s'

# Validates that at least one source barcode has been entered. If no barcodes
# are entered, an error is added to the :source_barcodes attribute.
#
# @return [void]
def source_barcodes_must_be_entered
return if minimal_barcodes.size >= 1

errors.add(:source_barcodes, SOURCE_BARCODES_MUST_BE_ENTERED)
end

# Validates that all source barcodes are unique. If any barcodes are
# duplicated, an error is added to the :source_barcodes attribute.
#
# @return [void]
def source_barcodes_must_be_different
return if minimal_barcodes.size == minimal_barcodes.uniq.size

errors.add(:source_barcodes, SOURCE_BARCODES_MUST_BE_DIFFERENT)
end

# Validates that all source plates corresponding to the minimal barcodes exist.
# If the number of source plates does not match the number of minimal barcodes,
# an error is added to the :source_plates attribute.
#
# @return [void]
def source_plates_must_exist
return if source_plates.size == minimal_barcodes.size

formatted_string = (minimal_barcodes - source_plates.map(&:human_barcode)).join(', ')

errors.add(:source_plates, format(SOURCE_PLATES_MUST_EXIST, formatted_string))
end

# Validates that the number of calculated pools does not exceed the configured
# number of pools. If the number of calculated pools is greater, an error is
# added to the :source_plates attribute.
#
# @return [void]
def number_of_pools_must_not_exceed_configured
# Don't add this error if there are already errors about missing donor_ids.
invalid_wells_hash = locations_with_missing_donor_id
return if invalid_wells_hash.any?

return if pools.size <= number_of_pools

errors.add(:source_plates, format(NUMBER_OF_POOLS_MUST_NOT_EXCEED_CONFIGURED, pools.size, number_of_pools))
end

# Validates that all wells with aliquots must have a donor_id.
# It uses the locations_with_missing_donor_id method to find any wells that are
# missing a donor_id. If any such wells are found, it adds an error message to
# the source_plates attribute, formatted with the barcodes of the plates and
# the wells that are missing a donor_id.
#
# @return [void]
def wells_with_aliquots_must_have_donor_id
invalid_wells_hash = locations_with_missing_donor_id
return if invalid_wells_hash.empty?

formatted_string = invalid_wells_hash.map { |barcode, locations| "#{barcode}: #{locations.join(', ')}" }.join(' ')
errors.add(:source_plates, format(WELLS_WITH_ALIQUOTS_MUST_HAVE_DONOR_ID, formatted_string))
end

private

# Checks each source well for pooling for missing donor_id. Returns a hash
# with keys as the barcodes of source plates and values as arrays of well
# locations with missing donor_id. If a plate has no wells with missing
# donor_id, it is not included in the returned hash. This method is used by
# the wells_with_aliquots_must_have_donor_id method to generate an error
# message.
#
# @return [Hash] A hash mapping source plate barcodes to arrays of invalid
# well locations.
def locations_with_missing_donor_id
# source_wells_for_pooling contains filtered wells from source plates
invalid_wells = source_wells_for_pooling.select { |well| missing_donor_id?(well) }
invalid_wells.each_with_object({}) do |well, hash|
plate_barcode = source_wells_to_plates[well].human_barcode # find the plate barcode
hash[plate_barcode] ||= []
hash[plate_barcode] << well.location
end
end

# Checks if a well is missing a donor_id. If there is an aliquot, it checks
# if the associated sample_metadata has a donor_id. If the donor_id is
# missing, it returns true. Otherwise, it returns false.
#
# @param well [Well] The well to check.
# @return [Boolean] True if the well is missing a donor_id, false otherwise.
def missing_donor_id?(well)
aliquot = well.aliquots&.first
return false unless aliquot

(aliquot.sample.sample_metadata.donor_id || '').to_s.strip.blank?
end
end
Loading

0 comments on commit 8f38db2

Please sign in to comment.