Skip to content

Commit

Permalink
Merge branch 'hotfix-1.29.2'
Browse files Browse the repository at this point in the history
  • Loading branch information
arteymix committed Dec 12, 2022
2 parents 1b7fc73 + 3ce66c5 commit 3bc9450
Show file tree
Hide file tree
Showing 111 changed files with 1,349 additions and 753 deletions.
2 changes: 1 addition & 1 deletion gemma-cli/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
<parent>
<artifactId>gemma</artifactId>
<groupId>gemma</groupId>
<version>1.29.1</version>
<version>1.29.2</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>gemma-cli</artifactId>
Expand Down
2 changes: 1 addition & 1 deletion gemma-core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
<parent>
<artifactId>gemma</artifactId>
<groupId>gemma</groupId>
<version>1.29.1</version>
<version>1.29.2</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>gemma-core</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,14 @@
import ubic.gemma.model.genome.gene.GeneValueObject;
import ubic.gemma.persistence.util.Settings;

import java.io.Serializable;
import java.util.*;

/**
* @author luke
*/
@SuppressWarnings({ "unused", "WeakerAccess" }) // Frontend use
public class CoexpressionMetaValueObject {
public class CoexpressionMetaValueObject implements Serializable {

/**
* The default maximum number of edges to send to the client.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,20 +19,27 @@
package ubic.gemma.core.analysis.expression.coexpression;

import ubic.gemma.model.association.coexpression.GeneCoexpressionNodeDegreeValueObject;
import ubic.gemma.persistence.service.association.coexpression.CoexpressionValueObject;

import java.io.Serializable;

/**
* @author luke
*/
@SuppressWarnings({ "unused", "WeakerAccess" }) // Used in frontend
public class CoexpressionSummaryValueObject {
public class CoexpressionSummaryValueObject implements Serializable {

private final long geneId;
private long geneId;
// node degree info for this gene, genome wide.
private GeneCoexpressionNodeDegreeValueObject coexpNodeDegree = null;
private int datasetsAvailable;
private int datasetsTested;
private int linksFound;

public CoexpressionSummaryValueObject() {
super();
}

public CoexpressionSummaryValueObject( Long geneId ) {
this.geneId = geneId;
}
Expand Down Expand Up @@ -71,6 +78,10 @@ public long getGeneId() {
return geneId;
}

public void setGeneId( long geneId ) {
this.geneId = geneId;
}

public int getLinksFound() {
return linksFound;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import ubic.gemma.model.expression.experiment.BioAssaySet;
import ubic.gemma.model.genome.gene.GeneValueObject;

import java.io.Serializable;
import java.util.Collection;

/**
Expand All @@ -26,7 +27,7 @@
* @author keshav
*/
@SuppressWarnings({ "WeakerAccess", "unused" }) // Frontend use
public class DifferentialExpressionMetaAnalysisValueObject {
public class DifferentialExpressionMetaAnalysisValueObject implements Serializable {

private GeneValueObject gene = null;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,244 +14,77 @@
*/
package ubic.gemma.core.analysis.preprocess.batcheffects;

import cern.colt.list.DoubleArrayList;
import cern.colt.list.IntArrayList;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.commons.math3.distribution.ChiSquaredDistribution;
import org.apache.commons.math3.stat.inference.ChiSquareTest;
import org.geneontology.util.CollectionUtil;
import ubic.basecode.math.KruskalWallis;
import ubic.gemma.core.analysis.preprocess.svd.SVDServiceHelperImpl;
import ubic.gemma.core.analysis.util.ExperimentalDesignUtils;
import ubic.gemma.model.expression.bioAssay.BioAssay;
import ubic.gemma.model.expression.biomaterial.BioMaterial;
import ubic.gemma.model.expression.experiment.BioAssaySet;
import ubic.gemma.model.expression.experiment.ExperimentalFactor;
import ubic.gemma.model.expression.experiment.FactorValue;
import ubic.gemma.model.expression.experiment.ExpressionExperiment;
import ubic.gemma.model.expression.experiment.ExpressionExperimentSubSet;

import java.util.*;
import java.io.Serializable;

/**
* Test if an experimental design is confounded with batches.
* Represents a summary of a batch effect confound.
*
* @author paul
*/
public class BatchConfound {
@SuppressWarnings({ "unused", "WeakerAccess" }) // Used in frontend
public class BatchConfound implements Serializable {

private static final Log log = LogFactory.getLog( BatchConfound.class.getName() );
private double chiSquare;
private int df;
private BioAssaySet ee;
private ExperimentalFactor ef;
private double p;
private int numBatches;

/**
*
* @param ee experiment or experiment subset
* @return collection of confounds (one for each confounded factor)
*/
public static Collection<BatchConfoundValueObject> test( BioAssaySet ee ) {
Map<ExperimentalFactor, Map<Long, Double>> bioMaterialFactorMap = getBioMaterialFactorMap( ee );
return factorBatchConfoundTest( ee, bioMaterialFactorMap );
}

/**
*
* @param ee experiment or experiment subset
* @return map of factors to map of factor -> bioassay -> factorvalue indicator
*/
private static Map<ExperimentalFactor, Map<Long, Double>> getBioMaterialFactorMap( BioAssaySet ee ) {
Map<ExperimentalFactor, Map<Long, Double>> bioMaterialFactorMap = new HashMap<>();
public BatchConfound() {

for ( BioAssay bioAssay : ee.getBioAssays() ) {
BioMaterial bm = bioAssay.getSampleUsed();
SVDServiceHelperImpl.populateBMFMap( bioMaterialFactorMap, bm );
}
return bioMaterialFactorMap;
}

/**
*
* @param ee experiment or subset
* @param bioMaterialFactorMap as per getBioMaterialFactorMap()
* @return collection of BatchConfoundValueObjects
*/
private static Collection<BatchConfoundValueObject> factorBatchConfoundTest( BioAssaySet ee,
Map<ExperimentalFactor, Map<Long, Double>> bioMaterialFactorMap ) throws IllegalArgumentException {

Map<Long, Long> batchMembership = new HashMap<>();
ExperimentalFactor batchFactor = null;
Map<Long, Integer> batchIndexes = new HashMap<>();
Collection<Long> usedBatches = new HashSet<>(); // track batches these samples actually occupy
for ( ExperimentalFactor ef : bioMaterialFactorMap.keySet() ) {
if ( ExperimentalDesignUtils.isBatch( ef ) ) {
batchFactor = ef;

Map<Long, Double> bmToFv = bioMaterialFactorMap.get( batchFactor );

if ( bmToFv == null ) {
log.warn( "No biomaterial --> factor value map for batch factor: " + batchFactor );
continue;
}

int index = 0;
for ( FactorValue fv : batchFactor.getFactorValues() ) {
batchIndexes.put( fv.getId(), index++ );
}

for ( Long bmId : bmToFv.keySet() ) {
batchMembership.put( bmId, bmToFv.get( bmId ).longValue() ); // not perfectly safe cast
usedBatches.add( bmToFv.get( bmId ).longValue() );
}
break;
}
}

Set<BatchConfoundValueObject> result = new HashSet<>();

// note that a batch can be "used" but irrelevant in a subset for some factors if they are only applied to some samples
// so we have to do more checking later.
if ( batchFactor == null || usedBatches.size() < 2 ) {
return result; // there can be no confound if there is no batch info or only one batch
}

/*
* Compare other factors to batches to look for confounds.
*/

for ( ExperimentalFactor ef : bioMaterialFactorMap.keySet() ) {

if ( ef.equals( batchFactor ) )
continue;

// ignore factors that we add with the aim of resolving confounds.
if ( ef.getCategory() != null && ef.getCategory().getValue().equalsIgnoreCase( "collection of material" ) )
continue;

Map<Long, Double> bmToFv = bioMaterialFactorMap.get( ef );
Collection<Double> usedFactorValues = new HashSet<>( bmToFv.values() );
int numBioMaterials = bmToFv.keySet().size();

assert numBioMaterials > 0 : "No biomaterials for " + ef;

double p = Double.NaN;
double chiSquare;
int df;

int numBatches = batchFactor.getFactorValues().size();
if ( ExperimentalDesignUtils.isContinuous( ef ) ) {

DoubleArrayList factorValues = new DoubleArrayList( numBioMaterials );
factorValues.setSize( numBioMaterials );

IntArrayList batches = new IntArrayList( numBioMaterials );
batches.setSize( numBioMaterials );

int j = 0;
for ( Long bmId : bmToFv.keySet() ) {

assert factorValues.size() > 0 : "Biomaterial to factorValue is empty for " + ef;

factorValues.set( j, bmToFv.get( bmId ) ); // ensures we only look at actually used factorvalues.
long batch = batchMembership.get( bmId );
batches.set( j, batchIndexes.get( batch ) );
j++;
}

p = KruskalWallis.test( factorValues, batches );
df = KruskalWallis.dof( factorValues, batches );
chiSquare = KruskalWallis.kwStatistic( factorValues, batches );

// log.debug( "KWallis\t" + ee.getId() + "\t" + ee.getShortName() + "\t" + ef.getId() + "\t" + ef.getName()
// + "\t" + String.format( "%.2f", chiSquare ) + "\t" + df + "\t" + String.format( "%.2g", p )
// + "\t" + numBatches );
} else {

Map<Long, Integer> factorValueIndexes = new HashMap<>();
int index = 0;
for ( FactorValue fv : ef.getFactorValues() ) {
// only use the used factorvalues
if ( !usedFactorValues.contains( fv.getId().doubleValue() ) ) {
continue;
}

factorValueIndexes.put( fv.getId(), index++ );
}
Map<Long, Long> factorValueMembership = new HashMap<>();

for ( Long bmId : bmToFv.keySet() ) {
factorValueMembership.put( bmId, bmToFv.get( bmId ).longValue() );
}

// numbatches could still be incorrect, so we have to clean this up later.
long[][] counts = new long[numBatches][usedFactorValues.size()];

for ( int i = 0; i < batchIndexes.size(); i++ ) {
for ( int j = 0; j < factorValueIndexes.size(); j++ ) {
counts[i][j] = 0;
}
}

for ( Long bm : bmToFv.keySet() ) {
long fv = factorValueMembership.get( bm );
Long batch = batchMembership.get( bm );
if ( batch == null ) {
log.warn( "No batch membership for : " + bm );
continue;
}
int batchIndex = batchIndexes.get( batch );
int factorIndex = factorValueIndexes.get( fv );
counts[batchIndex][factorIndex]++;
}

// check for unused batches
List<Integer> usedBatchesForFactor = new ArrayList<>();
int i = 0;
for ( long[] c : counts ) {
long total = 0;
for ( long f : c ) {
total += f;
}
if ( total == 0 ) {
log.debug( "Batch " + i + " not used by " + ef + " in " + ee );
} else {
usedBatchesForFactor.add( i );
}
i++;
}

// trim down again
long[][] finalCounts = new long[usedBatchesForFactor.size()][];
int j = 0;
for ( int b : usedBatchesForFactor ) {
finalCounts[j++] = counts[b];
}
if ( finalCounts.length < 2 ) {
continue; // to the next factor
}
public BatchConfound( BioAssaySet ee, ExperimentalFactor ef, double chiSquare, int df, double p,
int numBatches ) {
this.ee = ee;
this.ef = ef;
this.chiSquare = chiSquare;
this.df = df;
this.p = p;
this.numBatches = numBatches;
}

ChiSquareTest cst = new ChiSquareTest();
public double getChiSquare() {
return chiSquare;
}

try {
chiSquare = cst.chiSquare( finalCounts );
} catch ( IllegalArgumentException e ) {
log.warn( "IllegalArgumentException exception computing ChiSq for : " + ef + "; Error was: " + e
.getMessage() );
chiSquare = Double.NaN;
}
public int getDf() {
return df;
}

df = ( finalCounts.length - 1 ) * ( finalCounts[0].length - 1 );
ChiSquaredDistribution distribution = new ChiSquaredDistribution( df );
public BioAssaySet getEe() {
return ee;
}

if ( !Double.isNaN( chiSquare ) ) {
p = 1.0 - distribution.cumulativeProbability( chiSquare );
}
public ExperimentalFactor getEf() {
return ef;
}

// log.debug( "ChiSq\t" + ee.getId() + "\t" + ee.getShortName() + "\t" + ef.getId() + "\t" + ef.getName()
// + "\t" + String.format( "%.2f", chiSquare ) + "\t" + df + "\t" + String.format( "%.2g", p )
// + "\t" + numBatches );
}
public int getNumBatches() {
return numBatches;
}

BatchConfoundValueObject summary = new BatchConfoundValueObject( ee, ef, chiSquare, df, p, numBatches );
public double getP() {
return p;
}

result.add( summary );
@Override
public String toString() {
String name = null;
if ( ee instanceof ExpressionExperimentSubSet ) {
name = ( ( ExpressionExperimentSubSet ) ee ).getSourceExperiment().getShortName();
} else {
name = " Subset " + ee.getName() + " of " + ( ( ExpressionExperiment ) ee ).getShortName();
}
return result;
return ee.getId() + "\t" + name + "\t" + ef.getId() + "\t" + ( ef.getCategory() != null ? ef.getCategory().getCategory() : ef.getName() ) + "\t"
+ String.format( "%.2f", chiSquare ) + "\t" + df + "\t" + String.format( "%.2g", p ) + "\t"
+ numBatches;
}

}
Loading

0 comments on commit 3bc9450

Please sign in to comment.