Skip to content

Commit

Permalink
Merge branch 'hotfix-1.27.15'
Browse files Browse the repository at this point in the history
  • Loading branch information
arteymix committed Jun 7, 2022
2 parents f67dc22 + 7b6ea0c commit 16f2f7d
Show file tree
Hide file tree
Showing 9 changed files with 139 additions and 12 deletions.
2 changes: 1 addition & 1 deletion gemma-core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
<parent>
<artifactId>gemma</artifactId>
<groupId>gemma</groupId>
<version>1.27.14</version>
<version>1.27.15</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>gemma-core</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@

import java.io.File;
import java.io.IOException;
import java.io.Writer;
import java.util.Collection;
import java.util.List;
import java.util.Map;
Expand Down Expand Up @@ -120,6 +121,20 @@ List<DifferentialExpressionAnalysisResult> analysisResultSetToString( Expression
File writeDataFile( ExpressionExperiment ee, boolean filtered, String fileName, boolean compress )
throws IOException;

/**
* Write raw expression data to a given writer.
*
* Note: the preferred quantitations are used.
*
* Note: For compression, wrap a {@link java.util.zip.GZIPOutputStream} with a {@link java.io.OutputStreamWriter}.
* To write to a string, consider using {@link java.io.StringWriter}.
*
* @param ee the expression experiment
* @param writer the destination for the raw expression data
* @throws IOException if operations with the writer fails
*/
void writeRawExpressionData( ExpressionExperiment ee, Writer writer ) throws IOException;

/**
* Write or located the coexpression data file for a given experiment
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,10 @@
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.hibernate.Hibernate;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import org.springframework.transaction.annotation.Transactional;
import ubic.basecode.util.FileTools;
import ubic.basecode.util.StringUtil;
import ubic.gemma.core.analysis.expression.diff.DifferentialExpressionAnalysisConfig;
Expand All @@ -42,6 +44,7 @@
import ubic.gemma.model.common.quantitationtype.QuantitationType;
import ubic.gemma.model.expression.arrayDesign.ArrayDesign;
import ubic.gemma.model.expression.bioAssayData.DesignElementDataVector;
import ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector;
import ubic.gemma.model.expression.designElement.CompositeSequence;
import ubic.gemma.model.expression.experiment.*;
import ubic.gemma.model.genome.Taxon;
Expand All @@ -57,6 +60,7 @@
import java.io.*;
import java.util.*;
import java.util.Map.Entry;
import java.util.stream.Collectors;
import java.util.zip.GZIPOutputStream;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
Expand Down Expand Up @@ -347,6 +351,23 @@ public void writeDiffExArchiveFile( BioAssaySet experimentAnalyzed, Differential
}
}

@Override
@Transactional(readOnly = true)
public void writeRawExpressionData( ExpressionExperiment ee, Writer writer ) throws IOException {
ee = expressionExperimentService.find( ee );
if ( ee == null ) {
throw new IllegalArgumentException( "ExpressionExperiment has been removed." );
}
// pre-initialize it so that it get fetched in a single query without a jointure with the EE
Hibernate.initialize( ee.getRawExpressionDataVectors() );
ExpressionDataDoubleMatrix matrix = expressionDataMatrixService.getRawExpressionDataMatrix( ee );
Set<ArrayDesign> ads = ee.getRawExpressionDataVectors().stream()
.map( RawExpressionDataVector::getDesignElement )
.map( CompositeSequence::getArrayDesign )
.collect( Collectors.toSet() );
new MatrixWriter().writeWithStringifiedGeneAnnotations( writer, matrix, getGeneAnnotationsAsStringsByProbe( ads ), true );
}

@Override
public File writeOrLocateCoexpressionDataFile( ExpressionExperiment ee, boolean forceWrite ) {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
*/
package ubic.gemma.core.analysis.service;

import org.springframework.transaction.annotation.Transactional;
import ubic.basecode.dataStructure.matrix.DoubleMatrix;
import ubic.gemma.core.analysis.preprocess.filter.FilterConfig;
import ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix;
Expand Down Expand Up @@ -61,6 +62,13 @@ ExpressionDataDoubleMatrix getFilteredMatrix( String arrayDesignName, FilterConf
*/
ExpressionDataDoubleMatrix getProcessedExpressionDataMatrix( ExpressionExperiment ee );

/**
* @throws IllegalArgumentException if the expression experiment has no preferred raw quantitation types
* @param ee
* @return
*/
ExpressionDataDoubleMatrix getRawExpressionDataMatrix( ExpressionExperiment ee );

DoubleMatrix<Gene, ExpressionExperiment> getRankMatrix( Collection<Gene> genes,
Collection<ExpressionExperiment> ees, ProcessedExpressionDataVectorDao.RankMethod method );

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,35 +19,39 @@
package ubic.gemma.core.analysis.service;

import cern.colt.list.DoubleArrayList;
import lombok.extern.apachecommons.CommonsLog;
import org.apache.commons.lang3.ArrayUtils;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import org.springframework.transaction.annotation.Transactional;
import ubic.basecode.dataStructure.matrix.DenseDoubleMatrix;
import ubic.basecode.dataStructure.matrix.DoubleMatrix;
import ubic.basecode.math.DescriptiveWithMissing;
import ubic.gemma.core.analysis.preprocess.filter.ExpressionExperimentFilter;
import ubic.gemma.core.analysis.preprocess.filter.FilterConfig;
import ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix;
import ubic.gemma.model.common.quantitationtype.QuantitationType;
import ubic.gemma.model.expression.arrayDesign.ArrayDesign;
import ubic.gemma.model.expression.bioAssayData.ProcessedExpressionDataVector;
import ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector;
import ubic.gemma.model.expression.experiment.ExpressionExperiment;
import ubic.gemma.model.genome.Gene;
import ubic.gemma.persistence.service.expression.arrayDesign.ArrayDesignService;
import ubic.gemma.persistence.service.expression.bioAssayData.ProcessedExpressionDataVectorDao;
import ubic.gemma.persistence.service.expression.bioAssayData.ProcessedExpressionDataVectorService;
import ubic.gemma.persistence.service.expression.bioAssayData.RawExpressionDataVectorService;
import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentService;

import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.Map;
import java.util.*;
import java.util.stream.Collectors;

/**
* Tools for easily getting data matrices for analysis in a consistent way.
*
* @author keshav
*/
@Component
@CommonsLog
public class ExpressionDataMatrixServiceImpl implements ExpressionDataMatrixService {

@Autowired
Expand All @@ -56,6 +60,9 @@ public class ExpressionDataMatrixServiceImpl implements ExpressionDataMatrixServ
@Autowired
private ProcessedExpressionDataVectorService processedExpressionDataVectorService;

@Autowired
private RawExpressionDataVectorService rawExpressionDataVectorService;

@Autowired
private ArrayDesignService arrayDesignService;

Expand Down Expand Up @@ -97,6 +104,32 @@ public ExpressionDataDoubleMatrix getProcessedExpressionDataMatrix( ExpressionEx
return new ExpressionDataDoubleMatrix( dataVectors );
}

@Override
@Transactional(readOnly = true)
public ExpressionDataDoubleMatrix getRawExpressionDataMatrix( ExpressionExperiment ee ) {
Map<QuantitationType, List<RawExpressionDataVector>> rawVectorsByQt = ee.getRawExpressionDataVectors().stream()
.collect( Collectors.groupingBy( RawExpressionDataVector::getQuantitationType, Collectors.toList() ) );

Set<QuantitationType> preferredQuantitationTypes = rawVectorsByQt.keySet().stream()
.filter( QuantitationType::getIsPreferred )
.collect( Collectors.toSet() );

if ( preferredQuantitationTypes.isEmpty() ) {
throw new IllegalArgumentException( "There are no RawExpressionDataVectors for " + ee + ", they must be created first." );
}

if ( preferredQuantitationTypes.size() > 1 ) {
log.warn( "There are more than one preferred quantitation type for " + ee + " raw expression vectors." );
}

// pick the QT with the maximum ID, which should be the latest one created
QuantitationType pickedQuantitationType = preferredQuantitationTypes.stream()
.max( Comparator.comparing( QuantitationType::getId ) )
.orElse( null );

return new ExpressionDataDoubleMatrix( rawVectorsByQt.get( pickedQuantitationType ) );
}

@Override
public DoubleMatrix<Gene, ExpressionExperiment> getRankMatrix( Collection<Gene> genes,
Collection<ExpressionExperiment> ees, ProcessedExpressionDataVectorDao.RankMethod method ) {
Expand Down
2 changes: 1 addition & 1 deletion gemma-web/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
<parent>
<artifactId>gemma</artifactId>
<groupId>gemma</groupId>
<version>1.27.14</version>
<version>1.27.15</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<artifactId>gemma-web</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@
package ubic.gemma.web.services.rest;

import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.media.Content;
import io.swagger.v3.oas.annotations.media.Schema;
import io.swagger.v3.oas.annotations.responses.ApiResponse;
import lombok.extern.apachecommons.CommonsLog;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
Expand All @@ -40,19 +43,22 @@
import ubic.gemma.persistence.service.expression.bioAssayData.ProcessedExpressionDataVectorService;
import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentService;
import ubic.gemma.persistence.util.Filters;
import ubic.gemma.web.services.rest.util.ArgUtils;
import ubic.gemma.web.services.rest.util.PaginatedResponseDataObject;
import ubic.gemma.web.services.rest.util.Responder;
import ubic.gemma.web.services.rest.util.ResponseDataObject;
import ubic.gemma.web.services.rest.annotations.GZIP;
import ubic.gemma.web.services.rest.util.*;
import ubic.gemma.web.services.rest.util.args.*;

import javax.servlet.http.HttpServletResponse;
import javax.ws.rs.*;
import javax.ws.rs.core.Context;
import javax.ws.rs.core.MediaType;
import javax.ws.rs.core.Response;
import javax.ws.rs.core.StreamingOutput;
import java.io.File;
import java.util.*;
import java.io.OutputStreamWriter;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Set;

/**
* RESTful interface for datasets.
Expand Down Expand Up @@ -249,6 +255,29 @@ public Response datasetData( // Params:
return this.outputDataFile( ee, filterData.getValue() );
}

/**
* Retrieve raw expression data.
*
* The payload is transparently compressed via a <code>Content-Encoding</code> header and streamed to avoid dumping
* the whole payload in memory.
*/
@GZIP
@GET
@Path("/{dataset}/data/raw")
@Produces("text/tab-separated-values; charset=UTF-8")
@Operation(summary = "Retrieve raw expression data of a dataset", responses = {
@ApiResponse(responseCode = "200", content = @Content(mediaType = "text/tab-separated-values; charset=UTF-8",
schema = @Schema(type = "string", format = "binary"))),
@ApiResponse(responseCode = "404", description = "The dataset does not exist.",
content = @Content(mediaType = MediaType.APPLICATION_JSON, schema = @Schema(implementation = ResponseErrorObject.class))) })
public Response getDatasetRawExpression( @PathParam("dataset") DatasetArg<?> datasetArg ) {
ExpressionExperiment ee = datasetArg.getEntity( expressionExperimentService );
StreamingOutput stream = ( output ) -> expressionDataFileService.writeRawExpressionData( ee, new OutputStreamWriter( output ) );
return Response.ok( stream )
.header( "Content-Disposition", String.format( "attachment; filename=%d_%s_expmat.unfilt.raw.data.txt", ee.getId(), ee.getShortName() ) )
.build();
}

/**
* Retrieves the design for the given dataset.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@
import ubic.gemma.web.services.rest.util.args.*;
import ubic.gemma.web.util.BaseSpringWebTest;

import javax.ws.rs.core.Response;
import javax.ws.rs.core.StreamingOutput;
import java.io.*;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
Expand Down Expand Up @@ -239,4 +243,21 @@ public void testFilterByGeeqPublicationScore() {
SortArg.valueOf( "+id" ),
new MockHttpServletResponse() );
}

@Test
public void testGetDatasetRawExpression() throws IOException {
ExpressionExperiment ee = ees.get( 0 );
Response response = datasetsWebService.getDatasetRawExpression( DatasetArg.valueOf( String.valueOf( ee.getId() ) ) );
byte[] payload;
try ( ByteArrayOutputStream os = new ByteArrayOutputStream() ) {
( ( StreamingOutput ) response.getEntity() ).write( os );
payload = os.toByteArray();
}
String decodedPayload = new String( payload, StandardCharsets.UTF_8 );
// there's 7 comment lines, 1 header and then one line per raw EV (there are two platforms the default collection size in the fixture)
assertThat( decodedPayload )
.isNotEmpty()
.contains( ee.getShortName() )
.hasLineCount( 8 + 2 * testHelper.getTestElementCollectionSize() );
}
}
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<name>Gemma</name>
<groupId>gemma</groupId>
<artifactId>gemma</artifactId>
<version>1.27.14</version>
<version>1.27.15</version>
<inceptionYear>2005</inceptionYear>
<description>The Gemma Project for meta-analysis of genomics data</description>
<url>https://gemma.msl.ubc.ca</url>
Expand Down

0 comments on commit 16f2f7d

Please sign in to comment.