From ca3769a3652e85760c9c5cf80e40ece8d71bb568 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Tue, 12 Mar 2024 16:53:50 -0700 Subject: [PATCH 001/105] Update versions for hotfix --- gemma-cli/pom.xml | 2 +- gemma-core/pom.xml | 2 +- gemma-groovy-support/pom.xml | 2 +- gemma-rest/pom.xml | 2 +- gemma-web/pom.xml | 2 +- pom.xml | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/gemma-cli/pom.xml b/gemma-cli/pom.xml index eba8b0470c..6668939e41 100644 --- a/gemma-cli/pom.xml +++ b/gemma-cli/pom.xml @@ -3,7 +3,7 @@ gemma gemma - 1.31.2 + 1.31.3-SNAPSHOT 4.0.0 gemma-cli diff --git a/gemma-core/pom.xml b/gemma-core/pom.xml index 31a8933710..ee4bffab79 100644 --- a/gemma-core/pom.xml +++ b/gemma-core/pom.xml @@ -3,7 +3,7 @@ gemma gemma - 1.31.2 + 1.31.3-SNAPSHOT 4.0.0 gemma-core diff --git a/gemma-groovy-support/pom.xml b/gemma-groovy-support/pom.xml index 7be89bd33e..4e833c2f79 100644 --- a/gemma-groovy-support/pom.xml +++ b/gemma-groovy-support/pom.xml @@ -6,7 +6,7 @@ gemma gemma - 1.31.2 + 1.31.3-SNAPSHOT gemma-groovy-support diff --git a/gemma-rest/pom.xml b/gemma-rest/pom.xml index 6a4cf4e2a0..215975dadb 100644 --- a/gemma-rest/pom.xml +++ b/gemma-rest/pom.xml @@ -5,7 +5,7 @@ gemma gemma - 1.31.2 + 1.31.3-SNAPSHOT 4.0.0 diff --git a/gemma-web/pom.xml b/gemma-web/pom.xml index c04e695de2..2a0e13a172 100644 --- a/gemma-web/pom.xml +++ b/gemma-web/pom.xml @@ -3,7 +3,7 @@ gemma gemma - 1.31.2 + 1.31.3-SNAPSHOT 4.0.0 gemma-web diff --git a/pom.xml b/pom.xml index a3614752e3..6e17522767 100644 --- a/pom.xml +++ b/pom.xml @@ -5,7 +5,7 @@ Gemma gemma gemma - 1.31.2 + 1.31.3-SNAPSHOT 2005 The Gemma Project for meta-analysis of genomics data https://gemma.msl.ubc.ca From 4b0531302fffec0f7a13a69e442008195736bdbf Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Tue, 12 Mar 2024 16:54:12 -0700 Subject: [PATCH 002/105] Add a CLI for updating the EE2C table Add an option for truncating the table before updating it, which is handy if the query has changed. --- .../ubic/gemma/core/apps/UpdateEE2CCli.java | 52 +++++++++++++++++++ .../service/TableMaintenanceUtil.java | 4 +- .../service/TableMaintenanceUtilImpl.java | 21 ++++++-- .../search/SearchServiceIntegrationTest.java | 2 +- .../CharacteristicServiceTest.java | 2 +- ...ssionExperimentServiceIntegrationTest.java | 5 +- .../TableMaintenanceUtilIntegrationTest.java | 12 ++--- .../CharacteristicDaoImplTest.java | 6 +-- 8 files changed, 84 insertions(+), 20 deletions(-) create mode 100644 gemma-cli/src/main/java/ubic/gemma/core/apps/UpdateEE2CCli.java diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/UpdateEE2CCli.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/UpdateEE2CCli.java new file mode 100644 index 0000000000..9a457755c3 --- /dev/null +++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/UpdateEE2CCli.java @@ -0,0 +1,52 @@ +package ubic.gemma.core.apps; + +import org.apache.commons.cli.CommandLine; +import org.apache.commons.cli.Options; +import org.apache.commons.cli.ParseException; +import org.springframework.beans.factory.annotation.Autowired; +import ubic.gemma.core.util.AbstractAuthenticatedCLI; +import ubic.gemma.persistence.service.TableMaintenanceUtil; + +import javax.annotation.Nullable; + +public class UpdateEE2CCli extends AbstractAuthenticatedCLI { + + private static final String TRUNCATE_OPTION = "truncate"; + + @Autowired + private TableMaintenanceUtil tableMaintenanceUtil; + + private boolean truncate; + + @Override + protected void buildOptions( Options options ) { + options.addOption( TRUNCATE_OPTION, "truncate", false, "Truncate the table before updating it" ); + } + + @Override + protected void processOptions( CommandLine commandLine ) throws ParseException { + truncate = commandLine.hasOption( TRUNCATE_OPTION ); + } + + @Override + protected void doWork() throws Exception { + tableMaintenanceUtil.updateExpressionExperiment2CharacteristicEntries( truncate ); + } + + @Nullable + @Override + public String getCommandName() { + return "updateEe2c"; + } + + @Nullable + @Override + public String getShortDesc() { + return "Update the EXPRESSION_EXPERIMENT2CHARACTERISTIC table"; + } + + @Override + public GemmaCLI.CommandGroup getCommandGroup() { + return GemmaCLI.CommandGroup.EXPERIMENT; + } +} diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/TableMaintenanceUtil.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/TableMaintenanceUtil.java index e940364a1d..5cfb0407f0 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/TableMaintenanceUtil.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/TableMaintenanceUtil.java @@ -63,7 +63,7 @@ public interface TableMaintenanceUtil { * @return the number of records that were created or updated */ @Secured({ "GROUP_AGENT" }) - int updateExpressionExperiment2CharacteristicEntries(); + int updateExpressionExperiment2CharacteristicEntries( boolean truncate ); /** * Update a specific level of the {@code EXPRESSION_EXPERIMENT2CHARACTERISTIC} table. @@ -72,7 +72,7 @@ public interface TableMaintenanceUtil { * @return the number of records that were created or updated */ @Secured({ "GROUP_AGENT" }) - int updateExpressionExperiment2CharacteristicEntries( Class level ); + int updateExpressionExperiment2CharacteristicEntries( Class level, boolean truncate ); /** * Update the {@code EXPRESSION_EXPERIMENT2_ARRAY_DESIGN} table. diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/TableMaintenanceUtilImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/TableMaintenanceUtilImpl.java index b8f0fe281a..238ea9b190 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/TableMaintenanceUtilImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/TableMaintenanceUtilImpl.java @@ -91,7 +91,7 @@ public class TableMaintenanceUtilImpl implements TableMaintenanceUtil { + "group by AOI.ID), 0)"; private static final String EE2C_EE_QUERY = - "select MIN(C.ID), C.NAME, C.DESCRIPTION, C.CATEGORY, C.CATEGORY_URI, C.`VALUE`, C.VALUE_URI, C.ORIGINAL_VALUE, C.EVIDENCE_CODE, I.ID, (" + SELECT_ANONYMOUS_MASK + "), cast(? as char(256)) " + "select MIN(C.ID), C.NAME, C.DESCRIPTION, C.CATEGORY, C.CATEGORY_URI, C.`VALUE`, C.VALUE_URI, C.ORIGINAL_VALUE, C.EVIDENCE_CODE, I.ID, (" + SELECT_ANONYMOUS_MASK + "), cast(? as char(255)) " + "from INVESTIGATION I " + "join CHARACTERISTIC C on I.ID = C.INVESTIGATION_FK " + "where I.class = 'ExpressionExperiment' " @@ -223,8 +223,14 @@ public void updateGene2CsEntries() { @Override @Transactional @Timed - public int updateExpressionExperiment2CharacteristicEntries() { + public int updateExpressionExperiment2CharacteristicEntries( boolean truncate ) { log.info( "Updating the EXPRESSION_EXPERIMENT2CHARACTERISTIC table..." ); + if ( truncate ) { + log.info( "Truncating EXPRESSION_EXPERIMENT2CHARACTERISTIC..." ); + sessionFactory.getCurrentSession() + .createSQLQuery( "delete from EXPRESSION_EXPERIMENT2CHARACTERISTIC" ) + .executeUpdate(); + } int updated = sessionFactory.getCurrentSession() .createSQLQuery( "insert into EXPRESSION_EXPERIMENT2CHARACTERISTIC (ID, NAME, DESCRIPTION, CATEGORY, CATEGORY_URI, `VALUE`, VALUE_URI, ORIGINAL_VALUE, EVIDENCE_CODE, EXPRESSION_EXPERIMENT_FK, ACL_IS_AUTHENTICATED_ANONYMOUSLY_MASK, LEVEL) " @@ -246,7 +252,7 @@ public int updateExpressionExperiment2CharacteristicEntries() { @Override @Timed @Transactional - public int updateExpressionExperiment2CharacteristicEntries( Class level ) { + public int updateExpressionExperiment2CharacteristicEntries( Class level, boolean truncate ) { String query; if ( level.equals( ExpressionExperiment.class ) ) { query = EE2C_EE_QUERY; @@ -257,7 +263,14 @@ public int updateExpressionExperiment2CharacteristicEntries( Class level ) { } else { throw new IllegalArgumentException( "Level must be one of ExpressionExperiment.class, BioMaterial.class or ExperimentalDesign.class." ); } - log.info( "Updating the EXPRESSION_EXPERIMENT2CHARACTERISTIC table at level " + level + "..." ); + log.info( "Updating the EXPRESSION_EXPERIMENT2CHARACTERISTIC table at " + level.getSimpleName() + " level..." ); + if ( truncate ) { + log.info( "Truncating EXPRESSION_EXPERIMENT2CHARACTERISTIC at " + level.getSimpleName() + " level..." ); + sessionFactory.getCurrentSession() + .createSQLQuery( "delete from EXPRESSION_EXPERIMENT2CHARACTERISTIC where LEVEL = :level" ) + .setParameter( "level", level ) + .executeUpdate(); + } int updated = sessionFactory.getCurrentSession() .createSQLQuery( "insert into EXPRESSION_EXPERIMENT2CHARACTERISTIC (ID, NAME, DESCRIPTION, CATEGORY, CATEGORY_URI, `VALUE`, VALUE_URI, ORIGINAL_VALUE, EVIDENCE_CODE, EXPRESSION_EXPERIMENT_FK, ACL_IS_AUTHENTICATED_ANONYMOUSLY_MASK, LEVEL) " diff --git a/gemma-core/src/test/java/ubic/gemma/core/search/SearchServiceIntegrationTest.java b/gemma-core/src/test/java/ubic/gemma/core/search/SearchServiceIntegrationTest.java index a09eddeac2..7ff836de6a 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/search/SearchServiceIntegrationTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/search/SearchServiceIntegrationTest.java @@ -129,7 +129,7 @@ public void setUp() throws Exception { gene.setNcbiGeneId( new Integer( geneNcbiId ) ); geneService.update( gene ); - tableMaintenanceUtil.updateExpressionExperiment2CharacteristicEntries(); + tableMaintenanceUtil.updateExpressionExperiment2CharacteristicEntries( false ); } @After diff --git a/gemma-core/src/test/java/ubic/gemma/model/common/description/CharacteristicServiceTest.java b/gemma-core/src/test/java/ubic/gemma/model/common/description/CharacteristicServiceTest.java index ee875a66ed..66f8ae450a 100644 --- a/gemma-core/src/test/java/ubic/gemma/model/common/description/CharacteristicServiceTest.java +++ b/gemma-core/src/test/java/ubic/gemma/model/common/description/CharacteristicServiceTest.java @@ -91,7 +91,7 @@ public void setUp() throws Exception { fv.setCharacteristics( this.getTestPersistentStatements( 1 ) ); fvService.update( fv ); - tableMaintenanceUtil.updateExpressionExperiment2CharacteristicEntries(); + tableMaintenanceUtil.updateExpressionExperiment2CharacteristicEntries( false ); } @Test diff --git a/gemma-core/src/test/java/ubic/gemma/model/expression/experiment/ExpressionExperimentServiceIntegrationTest.java b/gemma-core/src/test/java/ubic/gemma/model/expression/experiment/ExpressionExperimentServiceIntegrationTest.java index 8f495236ae..197d1396d0 100644 --- a/gemma-core/src/test/java/ubic/gemma/model/expression/experiment/ExpressionExperimentServiceIntegrationTest.java +++ b/gemma-core/src/test/java/ubic/gemma/model/expression/experiment/ExpressionExperimentServiceIntegrationTest.java @@ -31,7 +31,6 @@ import ubic.gemma.model.common.description.Characteristic; import ubic.gemma.model.common.description.DatabaseEntry; import ubic.gemma.model.common.quantitationtype.QuantitationType; -import ubic.gemma.model.expression.arrayDesign.ArrayDesign; import ubic.gemma.model.expression.bioAssay.BioAssay; import ubic.gemma.model.expression.bioAssayData.DesignElementDataVector; import ubic.gemma.model.expression.bioAssayData.RawExpressionDataVector; @@ -430,7 +429,7 @@ public void testCacheInvalidationWhenACharacteristicIsDeleted() { assertThat( c2.getNumberOfExpressionExperiments() ).isEqualTo( 1L ); }; - tableMaintenanceUtil.updateExpressionExperiment2CharacteristicEntries(); + tableMaintenanceUtil.updateExpressionExperiment2CharacteristicEntries( false ); assertThat( expressionExperimentService.getAnnotationsUsageFrequency( null, 0, 0, null, null, null, null ) ) .noneSatisfy( consumer ); @@ -444,7 +443,7 @@ public void testCacheInvalidationWhenACharacteristicIsDeleted() { .noneSatisfy( consumer ); // update the pivot table - tableMaintenanceUtil.updateExpressionExperiment2CharacteristicEntries(); + tableMaintenanceUtil.updateExpressionExperiment2CharacteristicEntries( false ); assertThat( expressionExperimentService.getAnnotationsUsageFrequency( null, 0, 0, null, null, null, null ) ) .satisfiesOnlyOnce( consumer ); diff --git a/gemma-core/src/test/java/ubic/gemma/persistence/service/TableMaintenanceUtilIntegrationTest.java b/gemma-core/src/test/java/ubic/gemma/persistence/service/TableMaintenanceUtilIntegrationTest.java index 6a5525418e..d81d6884bb 100644 --- a/gemma-core/src/test/java/ubic/gemma/persistence/service/TableMaintenanceUtilIntegrationTest.java +++ b/gemma-core/src/test/java/ubic/gemma/persistence/service/TableMaintenanceUtilIntegrationTest.java @@ -60,19 +60,19 @@ public void testWhenUserIsAnonymous() { @Test @WithMockUser(authorities = "GROUP_AGENT") public void testUpdateExpressionExperiment2CharacteristicEntries() { - tableMaintenanceUtil.updateExpressionExperiment2CharacteristicEntries(); - tableMaintenanceUtil.updateExpressionExperiment2CharacteristicEntries( ExpressionExperiment.class ); - tableMaintenanceUtil.updateExpressionExperiment2CharacteristicEntries( BioMaterial.class ); - tableMaintenanceUtil.updateExpressionExperiment2CharacteristicEntries( ExperimentalDesign.class ); + tableMaintenanceUtil.updateExpressionExperiment2CharacteristicEntries( false ); + tableMaintenanceUtil.updateExpressionExperiment2CharacteristicEntries( ExpressionExperiment.class, false ); + tableMaintenanceUtil.updateExpressionExperiment2CharacteristicEntries( BioMaterial.class, false ); + tableMaintenanceUtil.updateExpressionExperiment2CharacteristicEntries( ExperimentalDesign.class, false ); assertThatThrownBy( () -> { - tableMaintenanceUtil.updateExpressionExperiment2CharacteristicEntries( FactorValue.class ); + tableMaintenanceUtil.updateExpressionExperiment2CharacteristicEntries( FactorValue.class, false ); } ).isInstanceOf( IllegalArgumentException.class ); } @Test(expected = AccessDeniedException.class) public void testUpdateEE2CAsUser() { this.runAsAnonymous(); - tableMaintenanceUtil.updateExpressionExperiment2CharacteristicEntries(); + tableMaintenanceUtil.updateExpressionExperiment2CharacteristicEntries( false ); } @Test diff --git a/gemma-core/src/test/java/ubic/gemma/persistence/service/common/description/CharacteristicDaoImplTest.java b/gemma-core/src/test/java/ubic/gemma/persistence/service/common/description/CharacteristicDaoImplTest.java index 50d19bcc2d..93fa6fb9d1 100644 --- a/gemma-core/src/test/java/ubic/gemma/persistence/service/common/description/CharacteristicDaoImplTest.java +++ b/gemma-core/src/test/java/ubic/gemma/persistence/service/common/description/CharacteristicDaoImplTest.java @@ -174,7 +174,7 @@ public void testFindExperimentsByUris() { acl.insertAce( 0, BasePermission.READ, new AclPrincipalSid( "bob" ), false ); aclService.updateAcl( acl ); - int updated = tableMaintenanceUtil.updateExpressionExperiment2CharacteristicEntries(); + int updated = tableMaintenanceUtil.updateExpressionExperiment2CharacteristicEntries( false ); assertThat( updated ).isEqualTo( 1 ); sessionFactory.getCurrentSession().flush(); // ranking by level uses the order by field() which is not supported @@ -201,7 +201,7 @@ public void testFindExperimentsByUrisAsAnonymous() { aclService.updateAcl( acl ); sessionFactory.getCurrentSession().flush(); - int updated = tableMaintenanceUtil.updateExpressionExperiment2CharacteristicEntries(); + int updated = tableMaintenanceUtil.updateExpressionExperiment2CharacteristicEntries( false ); assertThat( updated ).isEqualTo( 1 ); sessionFactory.getCurrentSession().flush(); @@ -233,7 +233,7 @@ public void testFindExperimentsByUrisAsAdmin() { sessionFactory.getCurrentSession().persist( ee ); sessionFactory.getCurrentSession().flush(); aclService.createAcl( new AclObjectIdentity( ExpressionExperiment.class, ee.getId() ) ); - int updated = tableMaintenanceUtil.updateExpressionExperiment2CharacteristicEntries(); + int updated = tableMaintenanceUtil.updateExpressionExperiment2CharacteristicEntries( false ); assertThat( updated ).isEqualTo( 1 ); sessionFactory.getCurrentSession().flush(); // ranking by level uses the order by field() which is not supported From 981689e9b400304ca15515be3a1a175b1a614ed6 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Wed, 13 Mar 2024 10:09:32 -0700 Subject: [PATCH 003/105] Add missing truncate option in scheduled job details --- .../resources/ubic/gemma/applicationContext-schedule.xml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/gemma-web/src/main/resources/ubic/gemma/applicationContext-schedule.xml b/gemma-web/src/main/resources/ubic/gemma/applicationContext-schedule.xml index 094e1d6eb5..ba44d18cc1 100644 --- a/gemma-web/src/main/resources/ubic/gemma/applicationContext-schedule.xml +++ b/gemma-web/src/main/resources/ubic/gemma/applicationContext-schedule.xml @@ -112,8 +112,8 @@ - ubic.gemma.model.expression.experiment.ExpressionExperiment - + ubic.gemma.model.expression.experiment.ExpressionExperiment + false @@ -130,6 +130,7 @@ ubic.gemma.model.expression.biomaterial.BioMaterial + false @@ -147,6 +148,7 @@ ubic.gemma.model.expression.experiment.ExperimentalDesign + false From c21175552723c01de214ca86f33739c456f8871a Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Wed, 13 Mar 2024 10:27:23 -0700 Subject: [PATCH 004/105] rest: Add a parameter to limit the number of category results --- .../experiment/ExpressionExperimentDao.java | 4 +-- .../ExpressionExperimentDaoImpl.java | 5 ++-- .../ExpressionExperimentService.java | 28 +++++++++++++------ .../ExpressionExperimentServiceImpl.java | 6 ++-- ...ssionExperimentServiceIntegrationTest.java | 8 +++--- .../ExpressionExperimentServiceTest.java | 4 +-- .../ExpressionExperimentDaoTest.java | 2 +- .../ubic/gemma/rest/DatasetsWebService.java | 14 +++++----- .../gemma/rest/DatasetsWebServiceTest.java | 12 ++++---- 9 files changed, 47 insertions(+), 36 deletions(-) diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDao.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDao.java index f3d637b7c4..6f782f8fc0 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDao.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDao.java @@ -271,8 +271,6 @@ Map> getSampleRemovalEvents( */ List getExperimentalDesignAnnotations( ExpressionExperiment expressionExperiment ); - Map getCategoriesUsageFrequency( @Nullable Collection eeIds, @Nullable Collection excludedCategoryUris, @Nullable Collection excludedTermUris, @Nullable Collection retainedTermUris ); - /** * Special indicator for free-text terms. *

@@ -287,6 +285,8 @@ Map> getSampleRemovalEvents( */ String UNCATEGORIZED = "[uncategorized_" + RandomStringUtils.randomAlphanumeric( 10 ) + "]"; + Map getCategoriesUsageFrequency( @Nullable Collection eeIds, @Nullable Collection excludedCategoryUris, @Nullable Collection excludedTermUris, @Nullable Collection retainedTermUris, int maxResults ); + /** * Obtain annotations usage frequency for a set of given {@link ExpressionExperiment} IDs. *

diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java index a7f95558a7..a4769483e6 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java @@ -676,7 +676,7 @@ private List getAnnotationsByLevel( ExpressionExperiment express } @Override - public Map getCategoriesUsageFrequency( @Nullable Collection eeIds, @Nullable Collection excludedCategoryUris, @Nullable Collection excludedTermUris, @Nullable Collection retainedTermUris ) { + public Map getCategoriesUsageFrequency( @Nullable Collection eeIds, @Nullable Collection excludedCategoryUris, @Nullable Collection excludedTermUris, @Nullable Collection retainedTermUris, int maxResults ) { if ( eeIds != null && eeIds.isEmpty() ) { return Collections.emptyMap(); } @@ -721,7 +721,8 @@ public Map getCategoriesUsageFrequency( @Nullable Collecti .addSynchronizedQuerySpace( EE2C_QUERY_SPACE ) .addSynchronizedEntityClass( ExpressionExperiment.class ) .addSynchronizedEntityClass( Characteristic.class ) - .setCacheable( true ); + .setCacheable( true ) + .setMaxResults( maxResults ); if ( eeIds != null ) { q.setParameterList( "eeIds", new HashSet<>( eeIds ) ); } diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentService.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentService.java index 89d4e01f61..7bb7a3bc8b 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentService.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentService.java @@ -263,13 +263,11 @@ ExpressionExperiment addRawVectors( ExpressionExperiment eeToUpdate, /** * Apply ontological inference to augment a filter with additional terms. - * @param mentionedTermUris if non-null, all the terms explicitly mentioned in the filters are added to the - * collection. The returned filter might contain terms that have been inferred. + * @param mentionedTerms if non-null, all the terms explicitly mentioned in the filters are added to the collection. + * The returned filter might contain terms that have been inferred. */ Filters getFiltersWithInferredAnnotations( Filters f, @Nullable Collection mentionedTerms ); - Map getCategoriesUsageFrequency( @Nullable Filters filters, @Nullable Collection excludedCategoryUris, @Nullable Collection excludedTermUris, @Nullable Collection retainedTermUris ); - @Value class CharacteristicWithUsageStatisticsAndOntologyTerm { /** @@ -302,6 +300,18 @@ class CharacteristicWithUsageStatisticsAndOntologyTerm { */ String UNCATEGORIZED = ExpressionExperimentDao.UNCATEGORIZED; + /** + * Obtain category usage frequency for datasets matching the given filter. + * + * @param filters filters restricting the terms to a given set of datasets + * @param excludedCategoryUris ensure that the given category URIs are excluded + * @param excludedTermUris ensure that the given term URIs and their sub-terms (as per {@code subClassOf} relation) + * are excluded; this requires relevant ontologies to be loaded in {@link ubic.gemma.core.ontology.OntologyService}. + * @param retainedTermUris ensure that the given terms are retained (overrides any exclusion from minFrequency and excludedTermUris) + * @param maxResults maximum number of results to return + */ + Map getCategoriesUsageFrequency( @Nullable Filters filters, @Nullable Collection excludedCategoryUris, @Nullable Collection excludedTermUris, @Nullable Collection retainedTermUris, int maxResults ); + /** * Obtain annotation usage frequency for datasets matching the given filters. *

@@ -311,18 +321,18 @@ class CharacteristicWithUsageStatisticsAndOntologyTerm { * if new terms are attached. * * @param filters filters restricting the terms to a given set of datasets - * @param maxResults maximum number of results to return - * @param minFrequency minimum occurrences of a term to be included in the results * @param category a category to restrict annotations to, or null to include all categories - * @param excludedCategoryUris ensure that the given categories are excluded - * @param excludedTermUris ensure that the given terms and their sub-terms (as per {@code subClassOf} relation) + * @param excludedCategoryUris ensure that the given category URIs are excluded + * @param excludedTermUris ensure that the given term URIs and their sub-terms (as per {@code subClassOf} relation) * are excluded; this requires relevant ontologies to be loaded in {@link ubic.gemma.core.ontology.OntologyService}. + * @param minFrequency minimum occurrences of a term to be included in the results * @param retainedTermUris ensure that the given terms are retained (overrides any exclusion from minFrequency and excludedTermUris) + * @param maxResults maximum number of results to return * @return mapping annotations grouped by category and term (URI or value if null) to their number of occurrences in * the matched datasets * @see ExpressionExperimentDao#getAnnotationsUsageFrequency(Collection, Class, int, int, String, Collection, Collection, Collection) */ - List getAnnotationsUsageFrequency( @Nullable Filters filters, int maxResults, int minFrequency, @Nullable String category, @Nullable Collection excludedCategoryUris, @Nullable Collection excludedTermUris, @Nullable Collection retainedTermUris ); + List getAnnotationsUsageFrequency( @Nullable Filters filters, @Nullable String category, @Nullable Collection excludedCategoryUris, @Nullable Collection excludedTermUris, int minFrequency, @Nullable Collection retainedTermUris, int maxResults ); /** * @param expressionExperiment experiment diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java index 7bd86362a5..3338a111ed 100755 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java @@ -727,7 +727,7 @@ private static class SubClauseKey { @Override @Transactional(readOnly = true) - public Map getCategoriesUsageFrequency( @Nullable Filters filters, @Nullable Collection excludedCategoryUris, @Nullable Collection excludedTermUris, @Nullable Collection retainedTermUris ) { + public Map getCategoriesUsageFrequency( @Nullable Filters filters, @Nullable Collection excludedCategoryUris, @Nullable Collection excludedTermUris, @Nullable Collection retainedTermUris, int maxResults ) { List eeIds; if ( filters == null || filters.isEmpty() ) { eeIds = null; @@ -737,7 +737,7 @@ public Map getCategoriesUsageFrequency( @Nullable Filters if ( excludedTermUris != null ) { excludedTermUris = inferTermsUris( excludedTermUris ); } - return expressionExperimentDao.getCategoriesUsageFrequency( eeIds, excludedCategoryUris, excludedTermUris, retainedTermUris ); + return expressionExperimentDao.getCategoriesUsageFrequency( eeIds, excludedCategoryUris, excludedTermUris, retainedTermUris, maxResults ); } /** @@ -746,7 +746,7 @@ public Map getCategoriesUsageFrequency( @Nullable Filters */ @Override @Transactional(readOnly = true) - public List getAnnotationsUsageFrequency( @Nullable Filters filters, int maxResults, int minFrequency, @Nullable String category, @Nullable Collection excludedCategoryUris, @Nullable Collection excludedTermUris, @Nullable Collection retainedTermUris ) { + public List getAnnotationsUsageFrequency( @Nullable Filters filters, @Nullable String category, @Nullable Collection excludedCategoryUris, @Nullable Collection excludedTermUris, int minFrequency, @Nullable Collection retainedTermUris, int maxResults ) { if ( excludedTermUris != null ) { excludedTermUris = inferTermsUris( excludedTermUris ); } diff --git a/gemma-core/src/test/java/ubic/gemma/model/expression/experiment/ExpressionExperimentServiceIntegrationTest.java b/gemma-core/src/test/java/ubic/gemma/model/expression/experiment/ExpressionExperimentServiceIntegrationTest.java index 197d1396d0..8434cf16b3 100644 --- a/gemma-core/src/test/java/ubic/gemma/model/expression/experiment/ExpressionExperimentServiceIntegrationTest.java +++ b/gemma-core/src/test/java/ubic/gemma/model/expression/experiment/ExpressionExperimentServiceIntegrationTest.java @@ -430,7 +430,7 @@ public void testCacheInvalidationWhenACharacteristicIsDeleted() { }; tableMaintenanceUtil.updateExpressionExperiment2CharacteristicEntries( false ); - assertThat( expressionExperimentService.getAnnotationsUsageFrequency( null, 0, 0, null, null, null, null ) ) + assertThat( expressionExperimentService.getAnnotationsUsageFrequency( null, null, null, null, 0, null, 0 ) ) .noneSatisfy( consumer ); // add the term to the dataset and update the pivot table @@ -439,12 +439,12 @@ public void testCacheInvalidationWhenACharacteristicIsDeleted() { assertThat( c.getId() ).isNotNull(); // the table is out-of-date - assertThat( expressionExperimentService.getAnnotationsUsageFrequency( null, 0, 0, null, null, null, null ) ) + assertThat( expressionExperimentService.getAnnotationsUsageFrequency( null, null, null, null, 0, null, 0 ) ) .noneSatisfy( consumer ); // update the pivot table tableMaintenanceUtil.updateExpressionExperiment2CharacteristicEntries( false ); - assertThat( expressionExperimentService.getAnnotationsUsageFrequency( null, 0, 0, null, null, null, null ) ) + assertThat( expressionExperimentService.getAnnotationsUsageFrequency( null, null, null, null, 0, null, 0 ) ) .satisfiesOnlyOnce( consumer ); // remove the term, which must evict the query cache @@ -457,7 +457,7 @@ public void testCacheInvalidationWhenACharacteristicIsDeleted() { } ); // since deletions are cascaded, the change will be reflected immediatly - assertThat( expressionExperimentService.getAnnotationsUsageFrequency( null, 0, 0, null, null, null, null ) ) + assertThat( expressionExperimentService.getAnnotationsUsageFrequency( null, null, null, null, 0, null, 0 ) ) .noneSatisfy( consumer ); } diff --git a/gemma-core/src/test/java/ubic/gemma/model/expression/experiment/ExpressionExperimentServiceTest.java b/gemma-core/src/test/java/ubic/gemma/model/expression/experiment/ExpressionExperimentServiceTest.java index b9354d4db5..c789a34518 100644 --- a/gemma-core/src/test/java/ubic/gemma/model/expression/experiment/ExpressionExperimentServiceTest.java +++ b/gemma-core/src/test/java/ubic/gemma/model/expression/experiment/ExpressionExperimentServiceTest.java @@ -182,7 +182,7 @@ public void testGetFiltersWithCategories() { @Test public void testGetAnnotationsUsageFrequency() { - expressionExperimentService.getAnnotationsUsageFrequency( Filters.empty(), -1, 0, null, null, null, null ); + expressionExperimentService.getAnnotationsUsageFrequency( Filters.empty(), null, null, null, 0, null, -1 ); verify( expressionExperimentDao ).getAnnotationsUsageFrequency( null, null, -1, 0, null, null, null, null ); verifyNoMoreInteractions( expressionExperimentDao ); } @@ -190,7 +190,7 @@ public void testGetAnnotationsUsageFrequency() { @Test public void testGetAnnotationsUsageFrequencyWithFilters() { Filters f = Filters.by( "c", "valueUri", String.class, Filter.Operator.eq, "http://example.com/T00001", "characteristics.valueUri" ); - expressionExperimentService.getAnnotationsUsageFrequency( f, -1, 0, null, null, null, null ); + expressionExperimentService.getAnnotationsUsageFrequency( f, null, null, null, 0, null, -1 ); verify( expressionExperimentDao ).loadIdsWithCache( f, null ); verify( expressionExperimentDao ).getAnnotationsUsageFrequency( Collections.emptyList(), null, -1, 0, null, null, null, null ); verifyNoMoreInteractions( expressionExperimentDao ); diff --git a/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoTest.java b/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoTest.java index db387a4607..f01bb16fdf 100644 --- a/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoTest.java +++ b/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoTest.java @@ -173,7 +173,7 @@ public void testGetOriginalPlatformUsageFrequency() { @WithMockUser(authorities = "GROUP_ADMIN") public void testGetCategoriesWithUsageFrequency() { Characteristic c = createCharacteristic( "foo", "foo", "bar", "bar" ); - Assertions.assertThat( expressionExperimentDao.getCategoriesUsageFrequency( null, null, null, null ) ) + Assertions.assertThat( expressionExperimentDao.getCategoriesUsageFrequency( null, null, null, null, -1 ) ) .containsEntry( c, 1L ); } diff --git a/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java b/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java index 6cbc923844..b2b2eed4ce 100644 --- a/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java +++ b/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java @@ -102,6 +102,7 @@ public class DatasetsWebService { private static final String ERROR_DATA_FILE_NOT_AVAILABLE = "Data file for experiment %s can not be created."; private static final String ERROR_DESIGN_FILE_NOT_AVAILABLE = "Design file for experiment %s can not be created."; + private static final int MAX_DATASETS_CATEGORIES = 200; private static final int MAX_DATASETS_ANNOTATIONS = 5000; @Autowired @@ -300,6 +301,7 @@ public static class CategoryWithUsageStatisticsValueObject implements UsageStati public QueriedAndFilteredResponseDataObject getDatasetsCategoriesUsageStatistics( @QueryParam("query") String query, @QueryParam("filter") @DefaultValue("") FilterArg filter, + @QueryParam("limit") @DefaultValue("20") LimitArg limit, @Parameter(description = "Excluded category URIs.", hidden = true) @QueryParam("excludedCategories") StringArrayArg excludedCategoryUris, @Parameter(description = "Exclude free-text categories (i.e. those with null URIs).", hidden = true) @QueryParam("excludeFreeTextCategories") @DefaultValue("false") Boolean excludeFreeTextCategories, @Parameter(description = "Excluded term URIs; this list is expanded with subClassOf inference.", hidden = true) @QueryParam("excludedTerms") StringArrayArg excludedTermUris, @@ -316,11 +318,13 @@ public QueriedAndFilteredResponseDataObject results = expressionExperimentService.getCategoriesUsageFrequency( filtersWithQuery, datasetArgService.getExcludedUris( excludedCategoryUris, excludeFreeTextCategories, excludeUncategorizedTerms ), datasetArgService.getExcludedUris( excludedTermUris, excludeFreeTextTerms, excludeUncategorizedTerms ), - mentionedTerms != null ? mentionedTerms.stream().map( OntologyTerm::getUri ).collect( Collectors.toSet() ) : null ) + mentionedTerms != null ? mentionedTerms.stream().map( OntologyTerm::getUri ).collect( Collectors.toSet() ) : null, + maxResults ) .entrySet() .stream() .map( e -> new CategoryWithUsageStatisticsValueObject( e.getKey().getCategoryUri(), e.getKey().getCategory(), e.getValue() ) ) @@ -388,12 +392,8 @@ public LimitedResponseDataObject getDa Map> visited = new HashMap<>(); List initialResults = expressionExperimentService.getAnnotationsUsageFrequency( filtersWithQuery, - limit, - minFrequency != null ? minFrequency : 0, - category, - datasetArgService.getExcludedUris( excludedCategoryUris, excludeFreeTextCategories, excludeUncategorizedTerms ), - datasetArgService.getExcludedUris( excludedTermUris, excludeFreeTextTerms, excludeUncategorizedTerms ), - mentionedTerms != null ? mentionedTerms.stream().map( OntologyTerm::getUri ).collect( Collectors.toSet() ) : null ); + category, datasetArgService.getExcludedUris( excludedCategoryUris, excludeFreeTextCategories, excludeUncategorizedTerms ), datasetArgService.getExcludedUris( excludedTermUris, excludeFreeTextTerms, excludeUncategorizedTerms ), minFrequency != null ? minFrequency : 0, mentionedTerms != null ? mentionedTerms.stream().map( OntologyTerm::getUri ).collect( Collectors.toSet() ) : null, limit + ); List results = initialResults .stream() .map( e -> new AnnotationWithUsageStatisticsValueObject( e.getCharacteristic(), e.getNumberOfExpressionExperiments(), !excludeParentTerms && e.getTerm() != null ? getParentTerms( e.getTerm(), visited ) : null ) ) diff --git a/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java b/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java index b160160c72..6192bc5991 100644 --- a/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java +++ b/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java @@ -294,7 +294,7 @@ public void testGetDatasetsAnnotationsWithRetainMentionedTerms() { .extracting( "groupBy", InstanceOfAssertFactories.list( String.class ) ) .containsExactly( "classUri", "className", "termUri", "termName" ); verify( expressionExperimentService ).getFiltersWithInferredAnnotations( Filters.empty(), Collections.emptySet() ); - verify( expressionExperimentService ).getAnnotationsUsageFrequency( Filters.empty(), 100, 0, null, null, null, Collections.emptySet() ); + verify( expressionExperimentService ).getAnnotationsUsageFrequency( Filters.empty(), null, null, null, 0, Collections.emptySet(), 100 ); } @Test @@ -310,7 +310,7 @@ public void testGetDatasetsAnnotations() { .extracting( "groupBy", InstanceOfAssertFactories.list( String.class ) ) .containsExactly( "classUri", "className", "termUri", "termName" ); verify( expressionExperimentService ).getFiltersWithInferredAnnotations( Filters.empty(), null ); - verify( expressionExperimentService ).getAnnotationsUsageFrequency( Filters.empty(), 100, 0, null, null, null, null ); + verify( expressionExperimentService ).getAnnotationsUsageFrequency( Filters.empty(), null, null, null, 0, null, 100 ); } @Test @@ -329,7 +329,7 @@ public void testGetDatasetsAnnotationsWhenMaxFrequencyIsSuppliedLimitMustUseMaxi .entity() .hasFieldOrPropertyWithValue( "limit", 5000 ); verify( expressionExperimentService ).getFiltersWithInferredAnnotations( Filters.empty(), null ); - verify( expressionExperimentService ).getAnnotationsUsageFrequency( Filters.empty(), 5000, 10, null, null, null, null ); + verify( expressionExperimentService ).getAnnotationsUsageFrequency( Filters.empty(), null, null, null, 10, null, 5000 ); } @Test @@ -341,7 +341,7 @@ public void testGetDatasetsAnnotationsWithLimitIsSupplied() { .hasFieldOrPropertyWithValue( "limit", 50 ) .extracting( "groupBy", InstanceOfAssertFactories.list( String.class ) ) .containsExactly( "classUri", "className", "termUri", "termName" ); - verify( expressionExperimentService ).getAnnotationsUsageFrequency( Filters.empty(), 50, 0, null, null, null, null ); + verify( expressionExperimentService ).getAnnotationsUsageFrequency( Filters.empty(), null, null, null, 0, null, 50 ); } @Test @@ -349,7 +349,7 @@ public void testGetDatasetsAnnotationsForUncategorizedTerms() { assertThat( target( "/datasets/annotations" ).queryParam( "category", "" ).request().get() ) .hasStatus( Response.Status.OK ) .hasMediaTypeCompatibleWith( MediaType.APPLICATION_JSON_TYPE ); - verify( expressionExperimentService ).getAnnotationsUsageFrequency( Filters.empty(), 100, 0, ExpressionExperimentService.UNCATEGORIZED, null, null, null ); + verify( expressionExperimentService ).getAnnotationsUsageFrequency( Filters.empty(), ExpressionExperimentService.UNCATEGORIZED, null, null, 0, null, 100 ); } @Test @@ -357,7 +357,7 @@ public void testGetDatasetsCategories() { assertThat( target( "/datasets/categories" ).request().get() ) .hasStatus( Response.Status.OK ) .hasMediaTypeCompatibleWith( MediaType.APPLICATION_JSON_TYPE ); - verify( expressionExperimentService ).getCategoriesUsageFrequency( Filters.empty(), null, null, null ); + verify( expressionExperimentService ).getCategoriesUsageFrequency( Filters.empty(), null, null, null, -1 ); } @Test From 7c8da1c5ecc62d68e0d8646605f6c6b3374d6f21 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Wed, 13 Mar 2024 10:27:23 -0700 Subject: [PATCH 005/105] rest: Add a parameter to limit the number of category results --- .../experiment/ExpressionExperimentDao.java | 4 +-- .../ExpressionExperimentDaoImpl.java | 5 ++-- .../ExpressionExperimentService.java | 28 +++++++++++++------ .../ExpressionExperimentServiceImpl.java | 6 ++-- ...ssionExperimentServiceIntegrationTest.java | 8 +++--- .../ExpressionExperimentServiceTest.java | 4 +-- .../ExpressionExperimentDaoTest.java | 2 +- .../ubic/gemma/rest/DatasetsWebService.java | 14 +++++----- .../gemma/rest/DatasetsWebServiceTest.java | 12 ++++---- 9 files changed, 47 insertions(+), 36 deletions(-) diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDao.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDao.java index f3d637b7c4..6f782f8fc0 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDao.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDao.java @@ -271,8 +271,6 @@ Map> getSampleRemovalEvents( */ List getExperimentalDesignAnnotations( ExpressionExperiment expressionExperiment ); - Map getCategoriesUsageFrequency( @Nullable Collection eeIds, @Nullable Collection excludedCategoryUris, @Nullable Collection excludedTermUris, @Nullable Collection retainedTermUris ); - /** * Special indicator for free-text terms. *

@@ -287,6 +285,8 @@ Map> getSampleRemovalEvents( */ String UNCATEGORIZED = "[uncategorized_" + RandomStringUtils.randomAlphanumeric( 10 ) + "]"; + Map getCategoriesUsageFrequency( @Nullable Collection eeIds, @Nullable Collection excludedCategoryUris, @Nullable Collection excludedTermUris, @Nullable Collection retainedTermUris, int maxResults ); + /** * Obtain annotations usage frequency for a set of given {@link ExpressionExperiment} IDs. *

diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java index a7f95558a7..a4769483e6 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java @@ -676,7 +676,7 @@ private List getAnnotationsByLevel( ExpressionExperiment express } @Override - public Map getCategoriesUsageFrequency( @Nullable Collection eeIds, @Nullable Collection excludedCategoryUris, @Nullable Collection excludedTermUris, @Nullable Collection retainedTermUris ) { + public Map getCategoriesUsageFrequency( @Nullable Collection eeIds, @Nullable Collection excludedCategoryUris, @Nullable Collection excludedTermUris, @Nullable Collection retainedTermUris, int maxResults ) { if ( eeIds != null && eeIds.isEmpty() ) { return Collections.emptyMap(); } @@ -721,7 +721,8 @@ public Map getCategoriesUsageFrequency( @Nullable Collecti .addSynchronizedQuerySpace( EE2C_QUERY_SPACE ) .addSynchronizedEntityClass( ExpressionExperiment.class ) .addSynchronizedEntityClass( Characteristic.class ) - .setCacheable( true ); + .setCacheable( true ) + .setMaxResults( maxResults ); if ( eeIds != null ) { q.setParameterList( "eeIds", new HashSet<>( eeIds ) ); } diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentService.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentService.java index 89d4e01f61..7bb7a3bc8b 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentService.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentService.java @@ -263,13 +263,11 @@ ExpressionExperiment addRawVectors( ExpressionExperiment eeToUpdate, /** * Apply ontological inference to augment a filter with additional terms. - * @param mentionedTermUris if non-null, all the terms explicitly mentioned in the filters are added to the - * collection. The returned filter might contain terms that have been inferred. + * @param mentionedTerms if non-null, all the terms explicitly mentioned in the filters are added to the collection. + * The returned filter might contain terms that have been inferred. */ Filters getFiltersWithInferredAnnotations( Filters f, @Nullable Collection mentionedTerms ); - Map getCategoriesUsageFrequency( @Nullable Filters filters, @Nullable Collection excludedCategoryUris, @Nullable Collection excludedTermUris, @Nullable Collection retainedTermUris ); - @Value class CharacteristicWithUsageStatisticsAndOntologyTerm { /** @@ -302,6 +300,18 @@ class CharacteristicWithUsageStatisticsAndOntologyTerm { */ String UNCATEGORIZED = ExpressionExperimentDao.UNCATEGORIZED; + /** + * Obtain category usage frequency for datasets matching the given filter. + * + * @param filters filters restricting the terms to a given set of datasets + * @param excludedCategoryUris ensure that the given category URIs are excluded + * @param excludedTermUris ensure that the given term URIs and their sub-terms (as per {@code subClassOf} relation) + * are excluded; this requires relevant ontologies to be loaded in {@link ubic.gemma.core.ontology.OntologyService}. + * @param retainedTermUris ensure that the given terms are retained (overrides any exclusion from minFrequency and excludedTermUris) + * @param maxResults maximum number of results to return + */ + Map getCategoriesUsageFrequency( @Nullable Filters filters, @Nullable Collection excludedCategoryUris, @Nullable Collection excludedTermUris, @Nullable Collection retainedTermUris, int maxResults ); + /** * Obtain annotation usage frequency for datasets matching the given filters. *

@@ -311,18 +321,18 @@ class CharacteristicWithUsageStatisticsAndOntologyTerm { * if new terms are attached. * * @param filters filters restricting the terms to a given set of datasets - * @param maxResults maximum number of results to return - * @param minFrequency minimum occurrences of a term to be included in the results * @param category a category to restrict annotations to, or null to include all categories - * @param excludedCategoryUris ensure that the given categories are excluded - * @param excludedTermUris ensure that the given terms and their sub-terms (as per {@code subClassOf} relation) + * @param excludedCategoryUris ensure that the given category URIs are excluded + * @param excludedTermUris ensure that the given term URIs and their sub-terms (as per {@code subClassOf} relation) * are excluded; this requires relevant ontologies to be loaded in {@link ubic.gemma.core.ontology.OntologyService}. + * @param minFrequency minimum occurrences of a term to be included in the results * @param retainedTermUris ensure that the given terms are retained (overrides any exclusion from minFrequency and excludedTermUris) + * @param maxResults maximum number of results to return * @return mapping annotations grouped by category and term (URI or value if null) to their number of occurrences in * the matched datasets * @see ExpressionExperimentDao#getAnnotationsUsageFrequency(Collection, Class, int, int, String, Collection, Collection, Collection) */ - List getAnnotationsUsageFrequency( @Nullable Filters filters, int maxResults, int minFrequency, @Nullable String category, @Nullable Collection excludedCategoryUris, @Nullable Collection excludedTermUris, @Nullable Collection retainedTermUris ); + List getAnnotationsUsageFrequency( @Nullable Filters filters, @Nullable String category, @Nullable Collection excludedCategoryUris, @Nullable Collection excludedTermUris, int minFrequency, @Nullable Collection retainedTermUris, int maxResults ); /** * @param expressionExperiment experiment diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java index 7bd86362a5..3338a111ed 100755 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java @@ -727,7 +727,7 @@ private static class SubClauseKey { @Override @Transactional(readOnly = true) - public Map getCategoriesUsageFrequency( @Nullable Filters filters, @Nullable Collection excludedCategoryUris, @Nullable Collection excludedTermUris, @Nullable Collection retainedTermUris ) { + public Map getCategoriesUsageFrequency( @Nullable Filters filters, @Nullable Collection excludedCategoryUris, @Nullable Collection excludedTermUris, @Nullable Collection retainedTermUris, int maxResults ) { List eeIds; if ( filters == null || filters.isEmpty() ) { eeIds = null; @@ -737,7 +737,7 @@ public Map getCategoriesUsageFrequency( @Nullable Filters if ( excludedTermUris != null ) { excludedTermUris = inferTermsUris( excludedTermUris ); } - return expressionExperimentDao.getCategoriesUsageFrequency( eeIds, excludedCategoryUris, excludedTermUris, retainedTermUris ); + return expressionExperimentDao.getCategoriesUsageFrequency( eeIds, excludedCategoryUris, excludedTermUris, retainedTermUris, maxResults ); } /** @@ -746,7 +746,7 @@ public Map getCategoriesUsageFrequency( @Nullable Filters */ @Override @Transactional(readOnly = true) - public List getAnnotationsUsageFrequency( @Nullable Filters filters, int maxResults, int minFrequency, @Nullable String category, @Nullable Collection excludedCategoryUris, @Nullable Collection excludedTermUris, @Nullable Collection retainedTermUris ) { + public List getAnnotationsUsageFrequency( @Nullable Filters filters, @Nullable String category, @Nullable Collection excludedCategoryUris, @Nullable Collection excludedTermUris, int minFrequency, @Nullable Collection retainedTermUris, int maxResults ) { if ( excludedTermUris != null ) { excludedTermUris = inferTermsUris( excludedTermUris ); } diff --git a/gemma-core/src/test/java/ubic/gemma/model/expression/experiment/ExpressionExperimentServiceIntegrationTest.java b/gemma-core/src/test/java/ubic/gemma/model/expression/experiment/ExpressionExperimentServiceIntegrationTest.java index 197d1396d0..8434cf16b3 100644 --- a/gemma-core/src/test/java/ubic/gemma/model/expression/experiment/ExpressionExperimentServiceIntegrationTest.java +++ b/gemma-core/src/test/java/ubic/gemma/model/expression/experiment/ExpressionExperimentServiceIntegrationTest.java @@ -430,7 +430,7 @@ public void testCacheInvalidationWhenACharacteristicIsDeleted() { }; tableMaintenanceUtil.updateExpressionExperiment2CharacteristicEntries( false ); - assertThat( expressionExperimentService.getAnnotationsUsageFrequency( null, 0, 0, null, null, null, null ) ) + assertThat( expressionExperimentService.getAnnotationsUsageFrequency( null, null, null, null, 0, null, 0 ) ) .noneSatisfy( consumer ); // add the term to the dataset and update the pivot table @@ -439,12 +439,12 @@ public void testCacheInvalidationWhenACharacteristicIsDeleted() { assertThat( c.getId() ).isNotNull(); // the table is out-of-date - assertThat( expressionExperimentService.getAnnotationsUsageFrequency( null, 0, 0, null, null, null, null ) ) + assertThat( expressionExperimentService.getAnnotationsUsageFrequency( null, null, null, null, 0, null, 0 ) ) .noneSatisfy( consumer ); // update the pivot table tableMaintenanceUtil.updateExpressionExperiment2CharacteristicEntries( false ); - assertThat( expressionExperimentService.getAnnotationsUsageFrequency( null, 0, 0, null, null, null, null ) ) + assertThat( expressionExperimentService.getAnnotationsUsageFrequency( null, null, null, null, 0, null, 0 ) ) .satisfiesOnlyOnce( consumer ); // remove the term, which must evict the query cache @@ -457,7 +457,7 @@ public void testCacheInvalidationWhenACharacteristicIsDeleted() { } ); // since deletions are cascaded, the change will be reflected immediatly - assertThat( expressionExperimentService.getAnnotationsUsageFrequency( null, 0, 0, null, null, null, null ) ) + assertThat( expressionExperimentService.getAnnotationsUsageFrequency( null, null, null, null, 0, null, 0 ) ) .noneSatisfy( consumer ); } diff --git a/gemma-core/src/test/java/ubic/gemma/model/expression/experiment/ExpressionExperimentServiceTest.java b/gemma-core/src/test/java/ubic/gemma/model/expression/experiment/ExpressionExperimentServiceTest.java index b9354d4db5..c789a34518 100644 --- a/gemma-core/src/test/java/ubic/gemma/model/expression/experiment/ExpressionExperimentServiceTest.java +++ b/gemma-core/src/test/java/ubic/gemma/model/expression/experiment/ExpressionExperimentServiceTest.java @@ -182,7 +182,7 @@ public void testGetFiltersWithCategories() { @Test public void testGetAnnotationsUsageFrequency() { - expressionExperimentService.getAnnotationsUsageFrequency( Filters.empty(), -1, 0, null, null, null, null ); + expressionExperimentService.getAnnotationsUsageFrequency( Filters.empty(), null, null, null, 0, null, -1 ); verify( expressionExperimentDao ).getAnnotationsUsageFrequency( null, null, -1, 0, null, null, null, null ); verifyNoMoreInteractions( expressionExperimentDao ); } @@ -190,7 +190,7 @@ public void testGetAnnotationsUsageFrequency() { @Test public void testGetAnnotationsUsageFrequencyWithFilters() { Filters f = Filters.by( "c", "valueUri", String.class, Filter.Operator.eq, "http://example.com/T00001", "characteristics.valueUri" ); - expressionExperimentService.getAnnotationsUsageFrequency( f, -1, 0, null, null, null, null ); + expressionExperimentService.getAnnotationsUsageFrequency( f, null, null, null, 0, null, -1 ); verify( expressionExperimentDao ).loadIdsWithCache( f, null ); verify( expressionExperimentDao ).getAnnotationsUsageFrequency( Collections.emptyList(), null, -1, 0, null, null, null, null ); verifyNoMoreInteractions( expressionExperimentDao ); diff --git a/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoTest.java b/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoTest.java index db387a4607..f01bb16fdf 100644 --- a/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoTest.java +++ b/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoTest.java @@ -173,7 +173,7 @@ public void testGetOriginalPlatformUsageFrequency() { @WithMockUser(authorities = "GROUP_ADMIN") public void testGetCategoriesWithUsageFrequency() { Characteristic c = createCharacteristic( "foo", "foo", "bar", "bar" ); - Assertions.assertThat( expressionExperimentDao.getCategoriesUsageFrequency( null, null, null, null ) ) + Assertions.assertThat( expressionExperimentDao.getCategoriesUsageFrequency( null, null, null, null, -1 ) ) .containsEntry( c, 1L ); } diff --git a/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java b/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java index 6cbc923844..b2b2eed4ce 100644 --- a/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java +++ b/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java @@ -102,6 +102,7 @@ public class DatasetsWebService { private static final String ERROR_DATA_FILE_NOT_AVAILABLE = "Data file for experiment %s can not be created."; private static final String ERROR_DESIGN_FILE_NOT_AVAILABLE = "Design file for experiment %s can not be created."; + private static final int MAX_DATASETS_CATEGORIES = 200; private static final int MAX_DATASETS_ANNOTATIONS = 5000; @Autowired @@ -300,6 +301,7 @@ public static class CategoryWithUsageStatisticsValueObject implements UsageStati public QueriedAndFilteredResponseDataObject getDatasetsCategoriesUsageStatistics( @QueryParam("query") String query, @QueryParam("filter") @DefaultValue("") FilterArg filter, + @QueryParam("limit") @DefaultValue("20") LimitArg limit, @Parameter(description = "Excluded category URIs.", hidden = true) @QueryParam("excludedCategories") StringArrayArg excludedCategoryUris, @Parameter(description = "Exclude free-text categories (i.e. those with null URIs).", hidden = true) @QueryParam("excludeFreeTextCategories") @DefaultValue("false") Boolean excludeFreeTextCategories, @Parameter(description = "Excluded term URIs; this list is expanded with subClassOf inference.", hidden = true) @QueryParam("excludedTerms") StringArrayArg excludedTermUris, @@ -316,11 +318,13 @@ public QueriedAndFilteredResponseDataObject results = expressionExperimentService.getCategoriesUsageFrequency( filtersWithQuery, datasetArgService.getExcludedUris( excludedCategoryUris, excludeFreeTextCategories, excludeUncategorizedTerms ), datasetArgService.getExcludedUris( excludedTermUris, excludeFreeTextTerms, excludeUncategorizedTerms ), - mentionedTerms != null ? mentionedTerms.stream().map( OntologyTerm::getUri ).collect( Collectors.toSet() ) : null ) + mentionedTerms != null ? mentionedTerms.stream().map( OntologyTerm::getUri ).collect( Collectors.toSet() ) : null, + maxResults ) .entrySet() .stream() .map( e -> new CategoryWithUsageStatisticsValueObject( e.getKey().getCategoryUri(), e.getKey().getCategory(), e.getValue() ) ) @@ -388,12 +392,8 @@ public LimitedResponseDataObject getDa Map> visited = new HashMap<>(); List initialResults = expressionExperimentService.getAnnotationsUsageFrequency( filtersWithQuery, - limit, - minFrequency != null ? minFrequency : 0, - category, - datasetArgService.getExcludedUris( excludedCategoryUris, excludeFreeTextCategories, excludeUncategorizedTerms ), - datasetArgService.getExcludedUris( excludedTermUris, excludeFreeTextTerms, excludeUncategorizedTerms ), - mentionedTerms != null ? mentionedTerms.stream().map( OntologyTerm::getUri ).collect( Collectors.toSet() ) : null ); + category, datasetArgService.getExcludedUris( excludedCategoryUris, excludeFreeTextCategories, excludeUncategorizedTerms ), datasetArgService.getExcludedUris( excludedTermUris, excludeFreeTextTerms, excludeUncategorizedTerms ), minFrequency != null ? minFrequency : 0, mentionedTerms != null ? mentionedTerms.stream().map( OntologyTerm::getUri ).collect( Collectors.toSet() ) : null, limit + ); List results = initialResults .stream() .map( e -> new AnnotationWithUsageStatisticsValueObject( e.getCharacteristic(), e.getNumberOfExpressionExperiments(), !excludeParentTerms && e.getTerm() != null ? getParentTerms( e.getTerm(), visited ) : null ) ) diff --git a/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java b/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java index b160160c72..dbbb6cc1e0 100644 --- a/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java +++ b/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java @@ -294,7 +294,7 @@ public void testGetDatasetsAnnotationsWithRetainMentionedTerms() { .extracting( "groupBy", InstanceOfAssertFactories.list( String.class ) ) .containsExactly( "classUri", "className", "termUri", "termName" ); verify( expressionExperimentService ).getFiltersWithInferredAnnotations( Filters.empty(), Collections.emptySet() ); - verify( expressionExperimentService ).getAnnotationsUsageFrequency( Filters.empty(), 100, 0, null, null, null, Collections.emptySet() ); + verify( expressionExperimentService ).getAnnotationsUsageFrequency( Filters.empty(), null, null, null, 0, Collections.emptySet(), 100 ); } @Test @@ -310,7 +310,7 @@ public void testGetDatasetsAnnotations() { .extracting( "groupBy", InstanceOfAssertFactories.list( String.class ) ) .containsExactly( "classUri", "className", "termUri", "termName" ); verify( expressionExperimentService ).getFiltersWithInferredAnnotations( Filters.empty(), null ); - verify( expressionExperimentService ).getAnnotationsUsageFrequency( Filters.empty(), 100, 0, null, null, null, null ); + verify( expressionExperimentService ).getAnnotationsUsageFrequency( Filters.empty(), null, null, null, 0, null, 100 ); } @Test @@ -329,7 +329,7 @@ public void testGetDatasetsAnnotationsWhenMaxFrequencyIsSuppliedLimitMustUseMaxi .entity() .hasFieldOrPropertyWithValue( "limit", 5000 ); verify( expressionExperimentService ).getFiltersWithInferredAnnotations( Filters.empty(), null ); - verify( expressionExperimentService ).getAnnotationsUsageFrequency( Filters.empty(), 5000, 10, null, null, null, null ); + verify( expressionExperimentService ).getAnnotationsUsageFrequency( Filters.empty(), null, null, null, 10, null, 5000 ); } @Test @@ -341,7 +341,7 @@ public void testGetDatasetsAnnotationsWithLimitIsSupplied() { .hasFieldOrPropertyWithValue( "limit", 50 ) .extracting( "groupBy", InstanceOfAssertFactories.list( String.class ) ) .containsExactly( "classUri", "className", "termUri", "termName" ); - verify( expressionExperimentService ).getAnnotationsUsageFrequency( Filters.empty(), 50, 0, null, null, null, null ); + verify( expressionExperimentService ).getAnnotationsUsageFrequency( Filters.empty(), null, null, null, 0, null, 50 ); } @Test @@ -349,7 +349,7 @@ public void testGetDatasetsAnnotationsForUncategorizedTerms() { assertThat( target( "/datasets/annotations" ).queryParam( "category", "" ).request().get() ) .hasStatus( Response.Status.OK ) .hasMediaTypeCompatibleWith( MediaType.APPLICATION_JSON_TYPE ); - verify( expressionExperimentService ).getAnnotationsUsageFrequency( Filters.empty(), 100, 0, ExpressionExperimentService.UNCATEGORIZED, null, null, null ); + verify( expressionExperimentService ).getAnnotationsUsageFrequency( Filters.empty(), ExpressionExperimentService.UNCATEGORIZED, null, null, 0, null, 100 ); } @Test @@ -357,7 +357,7 @@ public void testGetDatasetsCategories() { assertThat( target( "/datasets/categories" ).request().get() ) .hasStatus( Response.Status.OK ) .hasMediaTypeCompatibleWith( MediaType.APPLICATION_JSON_TYPE ); - verify( expressionExperimentService ).getCategoriesUsageFrequency( Filters.empty(), null, null, null ); + verify( expressionExperimentService ).getCategoriesUsageFrequency( Filters.empty(), null, null, null, 20 ); } @Test From cd3dcfa414635c8ee9cb1a236ae25922a39dbcc3 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Thu, 14 Mar 2024 12:32:57 -0700 Subject: [PATCH 006/105] Cleanup support, admin and noreply email addresses Message sent to users should always have a Reply-To set to the support email. All messages originate from the noreply email address. --- .../ubic/gemma/core/util/MailUtilsImpl.java | 13 +-- .../service/TableMaintenanceUtilImpl.java | 17 +--- .../gemma/persistence/util/MailEngine.java | 21 +++-- .../persistence/util/MailEngineImpl.java | 80 +++++++++++++------ .../ubic/gemma/persistence/util/Settings.java | 4 - .../src/main/resources/default.properties | 4 +- .../service/TableMaintenanceUtilTest.java | 4 +- .../CharacteristicDaoImplTest.java | 2 +- .../persistence/util/MailEngineTest.java | 22 +++-- .../gemma/web/controller/BaseController.java | 10 +-- .../web/controller/BaseFormController.java | 30 ------- .../SecurityControllerImpl.java | 15 +--- .../ArrayDesignControllerImpl.java | 3 +- 13 files changed, 105 insertions(+), 120 deletions(-) diff --git a/gemma-core/src/main/java/ubic/gemma/core/util/MailUtilsImpl.java b/gemma-core/src/main/java/ubic/gemma/core/util/MailUtilsImpl.java index 2c8381fa4c..0ea5501a52 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/util/MailUtilsImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/core/util/MailUtilsImpl.java @@ -25,7 +25,6 @@ import ubic.gemma.core.security.authentication.UserService; import ubic.gemma.model.common.auditAndSecurity.User; import ubic.gemma.persistence.util.MailEngine; -import ubic.gemma.persistence.util.Settings; /** * @author anton @@ -58,21 +57,13 @@ public void sendTaskCompletedNotificationEmail( EmailNotificationContext emailNo if ( emailAddress != null ) { MailUtilsImpl.log.info( "Sending email notification to " + emailAddress ); SimpleMailMessage msg = new SimpleMailMessage(); - msg.setTo( emailAddress ); - msg.setFrom( Settings.getAdminEmailAddress() ); - msg.setSubject( "Gemma task completed" ); - String logs = ""; if ( taskResult.getException() != null ) { logs += "Task failed with :\n"; logs += taskResult.getException().getMessage(); } - - msg.setText( - "A job you started on Gemma is completed (taskId=" + taskId + ", " + taskName + ")\n\n" + logs - + "\n" ); - - mailEngine.send( msg ); + String body = "A job you started on Gemma is completed (taskId=" + taskId + ", " + taskName + ")\n\n" + logs + "\n"; + mailEngine.sendMessage( emailAddress, "Gemma task completed", body ); } } } diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/TableMaintenanceUtilImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/TableMaintenanceUtilImpl.java index 238ea9b190..4f57c9003c 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/TableMaintenanceUtilImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/TableMaintenanceUtilImpl.java @@ -21,13 +21,11 @@ import io.micrometer.core.annotation.Timed; import org.apache.commons.io.FileUtils; -import org.apache.commons.lang3.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.hibernate.SessionFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Value; -import org.springframework.mail.SimpleMailMessage; import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Transactional; import ubic.gemma.model.common.Auditable; @@ -148,9 +146,6 @@ public class TableMaintenanceUtilImpl implements TableMaintenanceUtil { @Value("${gemma.gene2cs.path}") private Path gene2CsInfoPath; - @Value("${gemma.admin.email}") - private String adminEmailAddress; - private boolean sendEmail = true; @Override @@ -337,17 +332,7 @@ private Gene2CsStatus getLastGene2CsUpdateStatus() { private void sendEmail( Gene2CsStatus results ) { if ( !sendEmail ) return; - SimpleMailMessage msg = new SimpleMailMessage(); - if ( StringUtils.isBlank( adminEmailAddress ) ) { - TableMaintenanceUtilImpl.log - .warn( "No administrator email address could be found, so gene2cs status email will not be sent." ); - return; - } - msg.setTo( adminEmailAddress ); - msg.setSubject( "Gene2Cs update status." ); - msg.setText( "Gene2Cs updating was run.\n" + results.getAnnotation() ); - mailEngine.send( msg ); - TableMaintenanceUtilImpl.log.info( "Email notification sent to " + adminEmailAddress ); + mailEngine.sendAdminMessage( "Gene2Cs update status.", "Gene2Cs updating was run.\n" + results.getAnnotation() ); } /** diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/util/MailEngine.java b/gemma-core/src/main/java/ubic/gemma/persistence/util/MailEngine.java index e68501361e..8e8e5f1061 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/util/MailEngine.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/util/MailEngine.java @@ -14,8 +14,6 @@ */ package ubic.gemma.persistence.util; -import org.springframework.mail.SimpleMailMessage; - import java.util.Map; /** @@ -23,10 +21,23 @@ */ public interface MailEngine { - void sendAdminMessage( String bodyText, String subject ); + /** + * Return the admin email address used for {@link #sendAdminMessage(String, String)} + */ + String getAdminEmailAddress(); - void send( SimpleMailMessage msg ); + /** + * Send an email message to the administrator. + */ + void sendAdminMessage( String subject, String bodyText ); - void sendMessage( SimpleMailMessage msg, String templateName, Map model ); + /** + * Send a text email message. + */ + void sendMessage( String to, String subject, String body ); + /** + * Send a templated email message. + */ + void sendMessage( String to, String subject, String templateName, Map model ); } \ No newline at end of file diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/util/MailEngineImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/util/MailEngineImpl.java index c69908b154..b045dcc44d 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/util/MailEngineImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/util/MailEngineImpl.java @@ -18,18 +18,21 @@ */ package ubic.gemma.persistence.util; +import org.apache.commons.lang.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.velocity.app.VelocityEngine; import org.apache.velocity.exception.VelocityException; import org.apache.velocity.runtime.RuntimeConstants; import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Value; import org.springframework.mail.MailException; import org.springframework.mail.MailSender; import org.springframework.mail.SimpleMailMessage; import org.springframework.stereotype.Component; import org.springframework.ui.velocity.VelocityEngineUtils; +import java.util.Arrays; import java.util.Map; /** @@ -46,49 +49,78 @@ public class MailEngineImpl implements MailEngine { @Autowired private VelocityEngine velocityEngine; + @Value("${gemma.noreply.email}") + private String noreplyEmailAddress; + + @Value("${gemma.admin.email}") + private String adminEmailAddress; + + @Value("${gemma.support.email}") + private String supportEmailAddress; + + @Override + public String getAdminEmailAddress() { + return adminEmailAddress; + } + /** * Sends a message to the gemma administrator as defined in the Gemma.properties file */ @Override - public void sendAdminMessage( String bodyText, String subject ) { - - if ( ( bodyText == null ) && ( subject == null ) ) { - MailEngineImpl.log.warn( "Not sending empty email, both subject and body are null" ); + public void sendAdminMessage( String subject, String bodyText ) { + if ( StringUtils.isBlank( adminEmailAddress ) ) { + MailEngineImpl.log.warn( "Not sending email, no admin email is configured." ); return; } - MailEngineImpl.log.info( "Sending email notification to administrator regarding: " + subject ); SimpleMailMessage msg = new SimpleMailMessage(); - msg.setTo( Settings.getAdminEmailAddress() ); - msg.setFrom( Settings.getAdminEmailAddress() ); + msg.setFrom( noreplyEmailAddress ); + msg.setTo( adminEmailAddress ); + // no need to set the reply to support, it's meant for a Gemma admin msg.setSubject( subject ); msg.setText( bodyText ); - this.send( msg ); + send( msg ); + MailEngineImpl.log.info( "Email notification sent to " + Arrays.toString( msg.getTo() ) ); } @Override - public void send( SimpleMailMessage msg ) { - try { - mailSender.send( msg ); - } catch ( MailException ex ) { - // log it and go on - MailEngineImpl.log.error( ex.getMessage(), ex ); - MailEngineImpl.log.debug( ex, ex ); - } + public void sendMessage( String to, String subject, String body ) { + SimpleMailMessage msg = new SimpleMailMessage(); + msg.setTo( to ); + msg.setFrom( noreplyEmailAddress ); + msg.setReplyTo( supportEmailAddress ); + msg.setSubject( subject ); + msg.setText( body ); + send( msg ); } @Override - public void sendMessage( SimpleMailMessage msg, String templateName, Map model ) { - String result = null; - + public void sendMessage( String to, String subject, String templateName, Map model ) { + SimpleMailMessage msg = new SimpleMailMessage(); + msg.setTo( to ); + msg.setFrom( noreplyEmailAddress ); + msg.setReplyTo( supportEmailAddress ); + msg.setSubject( subject ); try { - result = VelocityEngineUtils - .mergeTemplateIntoString( velocityEngine, templateName, RuntimeConstants.ENCODING_DEFAULT, model ); + msg.setText( VelocityEngineUtils.mergeTemplateIntoString( velocityEngine, templateName, + RuntimeConstants.ENCODING_DEFAULT, model ) ); } catch ( VelocityException e ) { - e.printStackTrace(); + MailEngineImpl.log.error( e.getMessage(), e ); + return; } + send( msg ); + } - msg.setText( result ); - this.send( msg ); + private void send( SimpleMailMessage msg ) { + if ( StringUtils.isBlank( msg.getSubject() ) || StringUtils.isBlank( msg.getText() ) ) { + MailEngineImpl.log.warn( "Not sending empty email, both subject and body are blank" ); + return; + } + try { + mailSender.send( msg ); + } catch ( MailException ex ) { + // log it and go on + MailEngineImpl.log.error( ex.getMessage(), ex ); + } } } diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/util/Settings.java b/gemma-core/src/main/java/ubic/gemma/persistence/util/Settings.java index 403c563889..f3cf70808a 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/util/Settings.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/util/Settings.java @@ -172,10 +172,6 @@ public class Settings { } - public static String getAdminEmailAddress() { - return Settings.getString( "gemma.admin.email" ); - } - /** * @return The local directory where files generated by analyses are stored. It will end in a file separator ("/" on * unix). diff --git a/gemma-core/src/main/resources/default.properties b/gemma-core/src/main/resources/default.properties index 43705db5ee..5992c3996e 100755 --- a/gemma-core/src/main/resources/default.properties +++ b/gemma-core/src/main/resources/default.properties @@ -27,7 +27,9 @@ gemma.gene2cs.path=${gemma.appdata.home}/DBReports/gene2cs.info # base url for the system, used in formed URLs gemma.hosturl=https://gemma.msl.ubc.ca # 'From' address for system notifications and SMTP server settings -gemma.admin.email=gemma +gemma.admin.email=gemma@chibi.msl.ubc.ca +gemma.noreply.email=noreply@gemma.msl.ubc.ca +gemma.support.email=pavlab-support@msl.ubc.ca mail.host=localhost mail.username=XXXXXX mail.password= diff --git a/gemma-core/src/test/java/ubic/gemma/persistence/service/TableMaintenanceUtilTest.java b/gemma-core/src/test/java/ubic/gemma/persistence/service/TableMaintenanceUtilTest.java index 9b17dc9166..90a9bbecd8 100644 --- a/gemma-core/src/test/java/ubic/gemma/persistence/service/TableMaintenanceUtilTest.java +++ b/gemma-core/src/test/java/ubic/gemma/persistence/service/TableMaintenanceUtilTest.java @@ -44,7 +44,7 @@ static class TableMaintenanceUtilTestContextConfiguration { @Bean public static TestPropertyPlaceholderConfigurer propertyPlaceholderConfigurer() throws IOException { Path gene2csInfoPath = Files.createTempDirectory( "DBReport" ).resolve( "gene2cs.info" ); - return new TestPropertyPlaceholderConfigurer( "gemma.gene2cs.path=" + gene2csInfoPath, "gemma.admin.email=gemma" ); + return new TestPropertyPlaceholderConfigurer( "gemma.gene2cs.path=" + gene2csInfoPath ); } /** @@ -133,7 +133,7 @@ public void test() { verify( query ).executeUpdate(); verify( externalDatabaseService ).findByNameWithAuditTrail( "gene2cs" ); verify( externalDatabaseService ).updateReleaseLastUpdated( eq( gene2csDatabaseEntry ), eq( "" ), any() ); - verify( mailEngine ).send( any() ); + verify( mailEngine ).sendAdminMessage( any(), any() ); } @Test diff --git a/gemma-core/src/test/java/ubic/gemma/persistence/service/common/description/CharacteristicDaoImplTest.java b/gemma-core/src/test/java/ubic/gemma/persistence/service/common/description/CharacteristicDaoImplTest.java index 93fa6fb9d1..460a6ba3f4 100644 --- a/gemma-core/src/test/java/ubic/gemma/persistence/service/common/description/CharacteristicDaoImplTest.java +++ b/gemma-core/src/test/java/ubic/gemma/persistence/service/common/description/CharacteristicDaoImplTest.java @@ -61,7 +61,7 @@ static class CharacteristicDaoImplContextConfiguration extends BaseDatabaseTestC @Bean public static TestPropertyPlaceholderConfigurer propertyPlaceholderConfigurer() throws IOException { Path gene2csInfoPath = Files.createTempDirectory( "DBReport" ).resolve( "gene2cs.info" ); - return new TestPropertyPlaceholderConfigurer( "gemma.gene2cs.path=" + gene2csInfoPath, "gemma.admin.email=gemma" ); + return new TestPropertyPlaceholderConfigurer( "gemma.gene2cs.path=" + gene2csInfoPath ); } /** diff --git a/gemma-core/src/test/java/ubic/gemma/persistence/util/MailEngineTest.java b/gemma-core/src/test/java/ubic/gemma/persistence/util/MailEngineTest.java index d867f29c04..fb845878c0 100644 --- a/gemma-core/src/test/java/ubic/gemma/persistence/util/MailEngineTest.java +++ b/gemma-core/src/test/java/ubic/gemma/persistence/util/MailEngineTest.java @@ -6,21 +6,21 @@ import org.junit.Test; import org.mockito.ArgumentCaptor; import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.config.PropertyPlaceholderConfigurer; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; import org.springframework.mail.MailSender; import org.springframework.mail.SimpleMailMessage; import org.springframework.test.context.ContextConfiguration; import org.springframework.test.context.junit4.AbstractJUnit4SpringContextTests; +import ubic.gemma.core.util.test.TestPropertyPlaceholderConfigurer; import java.util.HashMap; import java.util.Map; import java.util.Properties; import static org.assertj.core.api.Assertions.assertThat; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.reset; +import static org.mockito.Mockito.*; @ContextConfiguration public class MailEngineTest extends AbstractJUnit4SpringContextTests { @@ -29,6 +29,14 @@ public class MailEngineTest extends AbstractJUnit4SpringContextTests { @TestComponent static class MailEngineTestContextConfiguration { + @Bean + public static PropertyPlaceholderConfigurer propertyPlaceholderConfigurer() { + return new TestPropertyPlaceholderConfigurer( + "gemma.admin.email=gemma@chibi.msl.ubc.ca", + "gemma.noreply.email=noreply@gemma.pavlab.msl.ubc.ca", + "gemma.support.email=pavlab-support@msl.ubc.ca" ); + } + @Bean public MailEngine mailEngine() { return new MailEngineImpl(); @@ -61,13 +69,13 @@ public void tearDown() { @Test public void test() { - mailEngine.sendAdminMessage( "test", "test subject" ); + mailEngine.sendAdminMessage( "test subject", "test" ); ArgumentCaptor captor = ArgumentCaptor.forClass( SimpleMailMessage.class ); verify( mailSender ).send( captor.capture() ); assertThat( captor.getValue() ) .isNotNull().satisfies( m -> { - assertThat( m.getTo() ).containsExactly( Settings.getAdminEmailAddress() ); - assertThat( m.getFrom() ).isEqualTo( Settings.getAdminEmailAddress() ); + assertThat( m.getTo() ).containsExactly( "gemma@chibi.msl.ubc.ca" ); + assertThat( m.getFrom() ).isEqualTo( "noreply@gemma.pavlab.msl.ubc.ca" ); assertThat( m.getSubject() ).isEqualTo( "test subject" ); assertThat( m.getText() ).isEqualTo( "test" ); } ); @@ -79,7 +87,7 @@ public void testSendMessageWithVelocityTemplate() { vars.put( "username", "foo" ); vars.put( "siteurl", "http://example.com/" ); vars.put( "confirmLink", "http://example.com/confirm?token=12ijdqwer9283" ); - mailEngine.sendMessage( new SimpleMailMessage(), "accountCreated.vm", vars ); + mailEngine.sendMessage( "test", "subject", "accountCreated.vm", vars ); ArgumentCaptor captor = ArgumentCaptor.forClass( SimpleMailMessage.class ); verify( mailSender ).send( captor.capture() ); assertThat( captor.getValue() ) diff --git a/gemma-web/src/main/java/ubic/gemma/web/controller/BaseController.java b/gemma-web/src/main/java/ubic/gemma/web/controller/BaseController.java index b088a24bbe..98e33806e8 100644 --- a/gemma-web/src/main/java/ubic/gemma/web/controller/BaseController.java +++ b/gemma-web/src/main/java/ubic/gemma/web/controller/BaseController.java @@ -1,8 +1,8 @@ /* * The Gemma project - * + * * Copyright (c) 2006 University of British Columbia - * + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at @@ -116,11 +116,7 @@ protected void sendConfirmationEmail( HttpServletRequest request, String token, model.put( "confirmLink", Settings.getHostUrl() + servletContext.getContextPath() + "/confirmRegistration.html?key=" + token + "&username=" + username ); - SimpleMailMessage mailMessage = new SimpleMailMessage(); - mailMessage.setFrom( Settings.getAdminEmailAddress() ); - mailMessage.setSubject( getText( "signup.email.subject", request.getLocale() ) ); - mailMessage.setTo( username + "<" + email + ">" ); - mailEngine.sendMessage( mailMessage, templateName, model ); + mailEngine.sendMessage( username + "<" + email + ">", getText( "signup.email.subject", request.getLocale() ), templateName, model ); } catch ( Exception e ) { log.error( "Couldn't send email to " + email, e ); diff --git a/gemma-web/src/main/java/ubic/gemma/web/controller/BaseFormController.java b/gemma-web/src/main/java/ubic/gemma/web/controller/BaseFormController.java index 180cccef7d..7e9bfc38db 100644 --- a/gemma-web/src/main/java/ubic/gemma/web/controller/BaseFormController.java +++ b/gemma-web/src/main/java/ubic/gemma/web/controller/BaseFormController.java @@ -23,7 +23,6 @@ import org.apache.commons.logging.LogFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.propertyeditors.CustomNumberEditor; -import org.springframework.mail.SimpleMailMessage; import org.springframework.validation.BindException; import org.springframework.validation.ObjectError; import org.springframework.web.bind.WebDataBinder; @@ -31,7 +30,6 @@ import org.springframework.web.multipart.support.ByteArrayMultipartFileEditor; import org.springframework.web.servlet.ModelAndView; import org.springframework.web.servlet.mvc.SimpleFormController; -import ubic.gemma.model.common.auditAndSecurity.User; import ubic.gemma.persistence.util.MailEngine; import ubic.gemma.web.util.MessageUtil; @@ -40,7 +38,6 @@ import javax.servlet.http.HttpSession; import java.text.NumberFormat; import java.util.Locale; -import java.util.Map; /** * Implementation of SimpleFormController that contains convenience methods for subclasses. For @@ -177,31 +174,4 @@ protected ModelAndView processFormSubmission( HttpServletRequest request, HttpSe return super.processFormSubmission( request, response, command, errors ); } - - /** - * Convenience message to send messages to users - */ - protected void sendEmail( User user, String msg ) { - if ( StringUtils.isBlank( user.getEmail() ) ) { - log.warn( "Could not send email to " + user + ", no email address" ); - } - log.debug( "sending e-mail to user [" + user.getEmail() + "]..." ); - SimpleMailMessage message = new SimpleMailMessage(); - message.setTo( user.getFullName() + "<" + user.getEmail() + ">" ); - - mailEngine.send( message ); - } - - /** - * Convenience message to send messages to users - */ - protected void sendEmail( User user, String templateName, Map model ) { - if ( StringUtils.isBlank( user.getEmail() ) ) { - log.warn( "Could not send email to " + user + ", no email address" ); - } - SimpleMailMessage message = new SimpleMailMessage(); - message.setTo( user.getFullName() + "<" + user.getEmail() + ">" ); - mailEngine.sendMessage( message, templateName, model ); - } - } \ No newline at end of file diff --git a/gemma-web/src/main/java/ubic/gemma/web/controller/common/auditAndSecurity/SecurityControllerImpl.java b/gemma-web/src/main/java/ubic/gemma/web/controller/common/auditAndSecurity/SecurityControllerImpl.java index 9889b3eb02..967fbfa4db 100644 --- a/gemma-web/src/main/java/ubic/gemma/web/controller/common/auditAndSecurity/SecurityControllerImpl.java +++ b/gemma-web/src/main/java/ubic/gemma/web/controller/common/auditAndSecurity/SecurityControllerImpl.java @@ -27,7 +27,6 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.mail.SimpleMailMessage; import org.springframework.security.access.AccessDeniedException; import org.springframework.security.acls.model.Sid; import org.springframework.security.core.userdetails.UserDetails; @@ -118,18 +117,12 @@ public boolean addUserToGroup( String userName, String groupName ) { String emailAddress = u.getEmail(); if ( StringUtils.isNotBlank( emailAddress ) ) { SecurityControllerImpl.log.debug( "Sending email notification to " + emailAddress ); - SimpleMailMessage msg = new SimpleMailMessage(); - msg.setTo( emailAddress ); - msg.setFrom( Settings.getAdminEmailAddress() ); - msg.setSubject( "You have been added to a group on Gemma" ); - String manageGroupsUrl = Settings.getHostUrl() + servletContext.getContextPath() + "/manageGroups.html"; - msg.setText( userTakingAction.getUserName() + " has added you to the group '" + groupName + String body = userTakingAction.getUserName() + " has added you to the group '" + groupName + "'.\nTo view groups you belong to, visit " + manageGroupsUrl - + "\n\nIf you believe you received this email in error, contact " + Settings.getAdminEmailAddress() - + "." ); - - mailEngine.send( msg ); + + "\n\nIf you believe you received this email in error, contact " + mailEngine.getAdminEmailAddress() + + "."; + mailEngine.sendMessage( emailAddress, "You have been added to a group on Gemma", body ); } return true; diff --git a/gemma-web/src/main/java/ubic/gemma/web/controller/expression/arrayDesign/ArrayDesignControllerImpl.java b/gemma-web/src/main/java/ubic/gemma/web/controller/expression/arrayDesign/ArrayDesignControllerImpl.java index f3a11edaa7..bb1c75ace9 100644 --- a/gemma-web/src/main/java/ubic/gemma/web/controller/expression/arrayDesign/ArrayDesignControllerImpl.java +++ b/gemma-web/src/main/java/ubic/gemma/web/controller/expression/arrayDesign/ArrayDesignControllerImpl.java @@ -57,6 +57,7 @@ import ubic.gemma.persistence.util.EntityUtils; import ubic.gemma.persistence.util.Filter; import ubic.gemma.persistence.util.Filters; +import ubic.gemma.persistence.util.Settings; import ubic.gemma.web.remote.EntityDelegator; import ubic.gemma.web.remote.JsonReaderResponse; import ubic.gemma.web.remote.ListBatchCommand; @@ -78,7 +79,7 @@ @RequestMapping("/arrays") public class ArrayDesignControllerImpl implements ArrayDesignController { - private static final String SUPPORT_EMAIL = "pavlab-support@msl.ubc.ca"; // FIXME factor out as config + private static final String SUPPORT_EMAIL = Settings.getString( "gemma.support.email" ); private static final Log log = LogFactory.getLog( ArrayDesignControllerImpl.class.getName() ); From 2a4af2662070b19a9c1ae6317d31e2bf9a021002 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Thu, 14 Mar 2024 15:35:41 -0700 Subject: [PATCH 007/105] rest: Fix double-gzipping of platform annotations --- .../java/ubic/gemma/rest/PlatformsWebService.java | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/gemma-rest/src/main/java/ubic/gemma/rest/PlatformsWebService.java b/gemma-rest/src/main/java/ubic/gemma/rest/PlatformsWebService.java index f51386170a..ab76523dbf 100644 --- a/gemma-rest/src/main/java/ubic/gemma/rest/PlatformsWebService.java +++ b/gemma-rest/src/main/java/ubic/gemma/rest/PlatformsWebService.java @@ -40,8 +40,10 @@ import javax.ws.rs.core.MediaType; import javax.ws.rs.core.Response; import java.io.File; +import java.io.FileInputStream; import java.io.IOException; import java.util.regex.Pattern; +import java.util.zip.GZIPInputStream; /** * RESTful interface for platforms. @@ -268,7 +270,11 @@ public FilteredAndPaginatedResponseDataObject getPlatformElemen public Response getPlatformAnnotations( // Params: @PathParam("platform") PlatformArg platformArg // Optional, default null ) { - return outputAnnotationFile( arrayDesignArgService.getEntity( platformArg ) ); + try { + return outputAnnotationFile( arrayDesignArgService.getEntity( platformArg ) ); + } catch ( IOException e ) { + throw new InternalServerErrorException( e ); + } } /** @@ -277,7 +283,7 @@ public Response getPlatformAnnotations( // Params: * @param arrayDesign the platform to fetch and output the annotation file for. * @return a Response object containing the annotation file. */ - private Response outputAnnotationFile( ArrayDesign arrayDesign ) { + private Response outputAnnotationFile( ArrayDesign arrayDesign ) throws IOException { String fileName = arrayDesign.getShortName().replaceAll( Pattern.quote( "/" ), "_" ) + ArrayDesignAnnotationService.STANDARD_FILE_SUFFIX + ArrayDesignAnnotationService.ANNOTATION_FILE_SUFFIX; @@ -293,8 +299,7 @@ private Response outputAnnotationFile( ArrayDesign arrayDesign ) { throw new NotFoundException( String.format( ERROR_ANNOTATION_FILE_NOT_AVAILABLE, arrayDesign.getShortName() ) ); } } - - return Response.ok( file ) + return Response.ok( new GZIPInputStream( new FileInputStream( file ) ) ) .header( "Content-Encoding", "gzip" ) .header( "Content-Disposition", "attachment; filename=" + FilenameUtils.removeExtension( file.getName() ) ) .build(); From 60c2c1a483b15ce180a28f720bd179493492b39a Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Thu, 14 Mar 2024 16:04:32 -0700 Subject: [PATCH 008/105] Fix test for retrieving dataset categories --- .../src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java b/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java index 6192bc5991..dbbb6cc1e0 100644 --- a/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java +++ b/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java @@ -357,7 +357,7 @@ public void testGetDatasetsCategories() { assertThat( target( "/datasets/categories" ).request().get() ) .hasStatus( Response.Status.OK ) .hasMediaTypeCompatibleWith( MediaType.APPLICATION_JSON_TYPE ); - verify( expressionExperimentService ).getCategoriesUsageFrequency( Filters.empty(), null, null, null, -1 ); + verify( expressionExperimentService ).getCategoriesUsageFrequency( Filters.empty(), null, null, null, 20 ); } @Test From a21c2709319501f4ab4db9b2b4079b9520fa2c8f Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Thu, 14 Mar 2024 16:35:33 -0700 Subject: [PATCH 009/105] Add mail.protocl to configure SMTPs --- gemma-core/src/main/resources/default.properties | 1 + .../main/resources/ubic/gemma/applicationContext-dataSource.xml | 1 + 2 files changed, 2 insertions(+) diff --git a/gemma-core/src/main/resources/default.properties b/gemma-core/src/main/resources/default.properties index 5992c3996e..f21f0abb41 100755 --- a/gemma-core/src/main/resources/default.properties +++ b/gemma-core/src/main/resources/default.properties @@ -31,6 +31,7 @@ gemma.admin.email=gemma@chibi.msl.ubc.ca gemma.noreply.email=noreply@gemma.msl.ubc.ca gemma.support.email=pavlab-support@msl.ubc.ca mail.host=localhost +mail.protocol=smtp mail.username=XXXXXX mail.password= # CORS diff --git a/gemma-core/src/main/resources/ubic/gemma/applicationContext-dataSource.xml b/gemma-core/src/main/resources/ubic/gemma/applicationContext-dataSource.xml index 049453c23b..4dcdc5710d 100644 --- a/gemma-core/src/main/resources/ubic/gemma/applicationContext-dataSource.xml +++ b/gemma-core/src/main/resources/ubic/gemma/applicationContext-dataSource.xml @@ -39,6 +39,7 @@ + From 06beae7a4291968aa82b88579356d43ef4c003df Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Thu, 14 Mar 2024 16:40:26 -0700 Subject: [PATCH 010/105] Set the noreply email to pavlab-apps@msl.ubc.ca for now --- gemma-core/src/main/resources/default.properties | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gemma-core/src/main/resources/default.properties b/gemma-core/src/main/resources/default.properties index f21f0abb41..e1e247f4f7 100755 --- a/gemma-core/src/main/resources/default.properties +++ b/gemma-core/src/main/resources/default.properties @@ -28,7 +28,7 @@ gemma.gene2cs.path=${gemma.appdata.home}/DBReports/gene2cs.info gemma.hosturl=https://gemma.msl.ubc.ca # 'From' address for system notifications and SMTP server settings gemma.admin.email=gemma@chibi.msl.ubc.ca -gemma.noreply.email=noreply@gemma.msl.ubc.ca +gemma.noreply.email=pavlab-apps@msl.ubc.ca gemma.support.email=pavlab-support@msl.ubc.ca mail.host=localhost mail.protocol=smtp From 26e74112636c2927d0d362891bf87f03274dc3f5 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Fri, 15 Mar 2024 09:41:17 -0700 Subject: [PATCH 011/105] Simplify the database configuration --- .../src/main/resources/default.properties | 13 ++------ .../gemma/applicationContext-dataSource.xml | 30 +++++++++---------- 2 files changed, 18 insertions(+), 25 deletions(-) diff --git a/gemma-core/src/main/resources/default.properties b/gemma-core/src/main/resources/default.properties index e1e247f4f7..573f6a13f7 100755 --- a/gemma-core/src/main/resources/default.properties +++ b/gemma-core/src/main/resources/default.properties @@ -46,15 +46,10 @@ ga.debug=false gemma.db.host=localhost gemma.db.port=3306 gemma.db.name=gemd -gemma.db.driver=com.mysql.cj.jdbc.Driver -gemma.db.url=jdbc:mysql://${gemma.db.host}:${gemma.db.port}/${gemma.db.name}?useSSL=false&rewriteBatchedStatements=true +gemma.db.url=jdbc:mysql://${gemma.db.host}:${gemma.db.port}/${gemma.db.name}?useSSL=false # You must override these settings: gemma.db.user=gemmauser gemma.db.password=XXXXXX -# This ensure some basic behaviors of our database -gemma.db.sqlMode=STRICT_TRANS_TABLES,NO_ZERO_IN_DATE,NO_ZERO_DATE,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION -# Default timezone for storage of DATETIME that are mapped to exact moments (i.e. java.util.Date) -gemma.db.timezone=America/Vancouver # Maximum size for the connections pool gemma.db.maximumPoolSize=10 ############################################################ @@ -194,12 +189,10 @@ gemma.localTasks.corePoolSize=16 gemma.testdb.host=localhost gemma.testdb.port=3307 gemma.testdb.name=gemdtest -gemma.testdb.url=jdbc:mysql://${gemma.testdb.host}:${gemma.testdb.port}/${gemma.testdb.name}?useSSL=false&rewriteBatchedStatements=true -gemma.testdb.driver=${gemma.db.driver} +gemma.testdb.url=jdbc:mysql://${gemma.testdb.host}:${gemma.testdb.port}/${gemma.testdb.name}?useSSL=false gemma.testdb.user=gemmatest gemma.testdb.password=1234 -gemma.testdb.timezone=${gemma.db.timezone} -gemma.testdb.sqlMode=${gemma.db.sqlMode} +gemma.testdb.maximumPoolSize=10 #the external database id to exclude by default in phenocarta gemma.neurocarta.exluded_database_id=85 # Featured external databases in Gemma Web About page and Gemma REST main endpoint diff --git a/gemma-core/src/main/resources/ubic/gemma/applicationContext-dataSource.xml b/gemma-core/src/main/resources/ubic/gemma/applicationContext-dataSource.xml index 4dcdc5710d..1fd0bf6c58 100644 --- a/gemma-core/src/main/resources/ubic/gemma/applicationContext-dataSource.xml +++ b/gemma-core/src/main/resources/ubic/gemma/applicationContext-dataSource.xml @@ -1,38 +1,38 @@ + http://www.springframework.org/schema/beans/spring-beans-3.2.xsd http://www.springframework.org/schema/util http://www.springframework.org/schema/util/spring-util.xsd"> + + + true + + America/Vancouver + + sql_mode='STRICT_TRANS_TABLES,NO_ZERO_IN_DATE,NO_ZERO_DATE,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION' + - + - - - ${gemma.db.timezone} - sql_mode='${gemma.db.sqlMode}' - - + - + - - - ${gemma.testdb.timezone} - sql_mode='${gemma.testdb.sqlMode}' - - + + From a9819231acbb2ab4ae2b277dceb30069726444f2 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Fri, 15 Mar 2024 10:33:43 -0700 Subject: [PATCH 012/105] Include additional properties in getDatasetsAnnotationsUsageFrequency() Include addditional properties when querying parent terms in the getDatasetsAnnotationsUsageFrequency() endpoint. Improve cycle detection when traversing parents. Fix a bug when querying direct parents: the requery fix was essentially triggering an indirect search. It's not actually necessary to requery for direct terms. --- .../core/ontology/OntologyServiceImpl.java | 159 +++++++++++------- .../ExpressionExperimentService.java | 2 +- .../ExpressionExperimentServiceImpl.java | 8 +- .../ubic/gemma/rest/DatasetsWebService.java | 58 ++++--- 4 files changed, 136 insertions(+), 91 deletions(-) diff --git a/gemma-core/src/main/java/ubic/gemma/core/ontology/OntologyServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/core/ontology/OntologyServiceImpl.java index 953f5ab519..4992535bcd 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/ontology/OntologyServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/core/ontology/OntologyServiceImpl.java @@ -63,6 +63,7 @@ import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorCompletionService; import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; import java.util.function.Function; import java.util.stream.Collectors; @@ -388,29 +389,47 @@ public Collection findTermsInexact( String givenQuery @Override public Set getParents( Collection terms, boolean direct, boolean includeAdditionalProperties ) { - Set toQuery = new HashSet<>( terms ); - Set results = new HashSet<>(); - while ( !toQuery.isEmpty() ) { - Set newResults = combineInThreads( os -> ontologyCache.getParents( os, toQuery, direct, includeAdditionalProperties ) ); - results.addAll( newResults ); - // toQuery = newResults - toQuery - newResults.removeAll( toQuery ); - toQuery.clear(); - toQuery.addAll( newResults ); - } - return results; + return getParentsOrChildren( terms, direct, includeAdditionalProperties, true ); } @Override public Set getChildren( Collection terms, boolean direct, boolean includeAdditionalProperties ) { + return getParentsOrChildren( terms, direct, includeAdditionalProperties, false ); + } + + private Set getParentsOrChildren( Collection terms, boolean direct, boolean includeAdditionalProperties, boolean parents ) { + if ( terms.isEmpty() ) { + return Collections.emptySet(); + } Set toQuery = new HashSet<>( terms ); Set results = new HashSet<>(); while ( !toQuery.isEmpty() ) { - Set newResults = combineInThreads( os -> ontologyCache.getChildren( os, toQuery, direct, includeAdditionalProperties ) ); - results.addAll( newResults ); - newResults.removeAll( toQuery ); - toQuery.clear(); - toQuery.addAll( newResults ); + Set newResults = combineInThreads( os -> { + StopWatch timer = StopWatch.createStarted(); + try { + return parents ? ontologyCache.getParents( os, toQuery, direct, includeAdditionalProperties ) + : ontologyCache.getChildren( os, toQuery, direct, includeAdditionalProperties ); + } finally { + if ( timer.getTime() > 10L * terms.size() ) { + log.warn( String.format( "Obtaining %s from %s for %s took %s", + parents ? "parents" : "children", + os, + terms.size() == 1 ? terms.iterator().next() : terms.size() + " terms", + timer ) ); + } + } + } ); + if ( results.addAll( newResults ) && !direct ) { + // there are new results (i.e. a term was inferred from a different ontology), we need to requery them + // if they were not in the query + newResults.removeAll( toQuery ); + toQuery.clear(); + toQuery.addAll( newResults ); + log.info( String.format( "Found %d new %s terms, will requery them.", newResults.size(), + parents ? "parents" : "children" ) ); + } else { + toQuery.clear(); + } } return results; } @@ -887,53 +906,18 @@ private T findFirst( Function - * The functions are evaluated using Gemma's short-lived task executor. - */ - private Set combineInThreads( Function> work, List ontologyServices ) { - List>> futures = new ArrayList<>( ontologyServices.size() ); - ExecutorCompletionService> completionService = new ExecutorCompletionService<>( taskExecutor ); - for ( ubic.basecode.ontology.providers.OntologyService os : ontologyServices ) { - if ( os.isOntologyLoaded() ) { - futures.add( completionService.submit( () -> work.apply( os ) ) ); - } - } - Set children = new HashSet<>(); - try { - for ( int i = 0; i < futures.size(); i++ ) { - children.addAll( completionService.take().get() ); - } - } catch ( InterruptedException e ) { - log.warn( "Current thread was interrupted while waiting, will only return results collected so far.", e ); - Thread.currentThread().interrupt(); - return children; - } catch ( ExecutionException e ) { - if ( e.getCause() instanceof RuntimeException ) { - throw ( RuntimeException ) e.getCause(); - } else { - throw new RuntimeException( e.getCause() ); - } - } finally { - // cancel all the remaining futures, this way if an exception occur, we don't needlessly occupy threads - // in the pool - for ( Future> future : futures ) { - future.cancel( true ); - } - } - return children; - } - - private Set combineInThreads( Function> work ) { - return combineInThreads( work, ontologyServices ); - } - @FunctionalInterface private interface SearchFunction { Collection apply( ubic.basecode.ontology.providers.OntologyService service ) throws OntologySearchException; } + /** + * Similar to {@link #combineInThreads(Function)}, but also handles {@link OntologySearchException}. + */ + private Set searchInThreads( SearchFunction function ) throws BaseCodeOntologySearchException { + return searchInThreads( function, ontologyServices ); + } + private Set searchInThreads( SearchFunction function, List ontologyServices ) throws BaseCodeOntologySearchException { try { return combineInThreads( os -> { @@ -948,13 +932,6 @@ private Set searchInThreads( SearchFunction function, List Set searchInThreads( SearchFunction function ) throws BaseCodeOntologySearchException { - return searchInThreads( function, ontologyServices ); - } - private static class OntologySearchExceptionWrapper extends RuntimeException { private final OntologySearchException cause; @@ -970,5 +947,57 @@ public synchronized OntologySearchException getCause() { } } + private Set combineInThreads( Function> work ) { + return combineInThreads( work, ontologyServices ); + } + /** + * Apply a given function to all the loaded ontology service and combine the results in a set. + *

+ * The functions are evaluated using Gemma's short-lived task executor. + */ + private Set combineInThreads( Function> work, List ontologyServices ) { + List>> futures = new ArrayList<>( ontologyServices.size() ); + ExecutorCompletionService> completionService = new ExecutorCompletionService<>( taskExecutor ); + for ( ubic.basecode.ontology.providers.OntologyService os : ontologyServices ) { + if ( os.isOntologyLoaded() ) { + futures.add( completionService.submit( () -> work.apply( os ) ) ); + } + } + Set children = new HashSet<>(); + try { + for ( int i = 0; i < futures.size(); i++ ) { + Future> future; + while ( ( future = completionService.poll( 1, TimeUnit.SECONDS ) ) == null ) { + log.warn( String.format( "Ontology query is taking too long (%d/%d completed so far).", i, futures.size() ) ); + } + children.addAll( future.get() ); + } + } catch ( InterruptedException e ) { + log.warn( "Current thread was interrupted while waiting, will only return results collected so far.", e ); + Thread.currentThread().interrupt(); + return children; + } catch ( ExecutionException e ) { + if ( e.getCause() instanceof RuntimeException ) { + throw ( RuntimeException ) e.getCause(); + } else { + throw new RuntimeException( e.getCause() ); + } + } finally { + // cancel all the remaining futures, this way if an exception occur, we don't needlessly occupy threads + // in the pool + List incompleteTasks = new ArrayList<>( futures.size() ); + for ( Future> future : futures ) { + if ( !future.isDone() ) { + incompleteTasks.add( ontologyServices.get( futures.indexOf( future ) ).toString() ); + future.cancel( true ); + } + } + if ( !incompleteTasks.isEmpty() ) { + log.warn( "The following ontology services did not have time to reply:\n\t" + + String.join( "\n\t", incompleteTasks ) ); + } + } + return children; + } } \ No newline at end of file diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentService.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentService.java index 7bb7a3bc8b..e1b9741103 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentService.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentService.java @@ -329,7 +329,7 @@ class CharacteristicWithUsageStatisticsAndOntologyTerm { * @param retainedTermUris ensure that the given terms are retained (overrides any exclusion from minFrequency and excludedTermUris) * @param maxResults maximum number of results to return * @return mapping annotations grouped by category and term (URI or value if null) to their number of occurrences in - * the matched datasets + * the matched datasets and ordered in descending number of associated experiments * @see ExpressionExperimentDao#getAnnotationsUsageFrequency(Collection, Class, int, int, String, Collection, Collection, Collection) */ List getAnnotationsUsageFrequency( @Nullable Filters filters, @Nullable String category, @Nullable Collection excludedCategoryUris, @Nullable Collection excludedTermUris, int minFrequency, @Nullable Collection retainedTermUris, int maxResults ); diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java index 3338a111ed..608acda2f1 100755 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java @@ -766,14 +766,15 @@ public List getAnnotationsUsag .flatMap( c -> Stream.of( c.getValueUri(), c.getCategoryUri() ) ) .filter( Objects::nonNull ) .collect( Collectors.toSet() ); - // TODO: handle more than one term per URI Map> termByUri = ontologyService.getTerms( uris ).stream() + .filter( t -> t.getUri() != null ) // should never occur, but better be safe than sorry .collect( Collectors.groupingBy( OntologyTerm::getUri, Collectors.toSet() ) ); for ( Map.Entry entry : result.entrySet() ) { Characteristic c = entry.getKey(); OntologyTerm term; if ( c.getValueUri() != null && termByUri.containsKey( c.getValueUri() ) ) { + // TODO: handle more than one term per URI term = termByUri.get( c.getValueUri() ).iterator().next(); } else if ( c.getCategoryUri() != null && termByUri.containsKey( c.getCategoryUri() ) ) { term = new OntologyTermSimpleWithCategory( c.getValueUri(), c.getValue(), termByUri.get( c.getCategoryUri() ).iterator().next() ); @@ -784,6 +785,9 @@ public List getAnnotationsUsag resultWithParents.add( new CharacteristicWithUsageStatisticsAndOntologyTerm( entry.getKey(), entry.getValue(), term ) ); } + // sort in descending order + resultWithParents.sort( Comparator.comparing( CharacteristicWithUsageStatisticsAndOntologyTerm::getNumberOfExpressionExperiments, Comparator.reverseOrder() ) ); + return resultWithParents; } @@ -833,7 +837,7 @@ public Collection getParents( boolean direct, boolean includeAddit return Collections.singleton( categoryTerm ); } else { // combine the direct parents + all the parents from the parents - return Stream.concat( Stream.of( categoryTerm ), Stream.of( categoryTerm ).flatMap( t -> getParents( false, includeAdditionalProperties, keepObsoletes ).stream() ) ) + return Stream.concat( Stream.of( categoryTerm ), Stream.of( categoryTerm ).flatMap( t -> t.getParents( false, includeAdditionalProperties, keepObsoletes ).stream() ) ) .collect( Collectors.toSet() ); } } diff --git a/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java b/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java index b2b2eed4ce..9e3e0c964e 100644 --- a/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java +++ b/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java @@ -39,6 +39,7 @@ import ubic.gemma.core.analysis.preprocess.svd.SVDService; import ubic.gemma.core.analysis.preprocess.svd.SVDValueObject; import ubic.gemma.core.analysis.service.ExpressionDataFileService; +import ubic.gemma.core.ontology.OntologyService; import ubic.gemma.core.search.DefaultHighlighter; import ubic.gemma.core.search.SearchResult; import ubic.gemma.core.search.lucene.SimpleMarkdownFormatter; @@ -125,6 +126,8 @@ public class DatasetsWebService { private GeneArgService geneArgService; @Autowired private QuantitationTypeArgService quantitationTypeArgService; + @Autowired + private OntologyService ontologyService; @Autowired private HttpServletRequest request; @@ -388,16 +391,19 @@ public LimitedResponseDataObject getDa if ( category != null && category.isEmpty() ) { category = ExpressionExperimentService.UNCATEGORIZED; } - // cache for visited parents (if two term share the same parent, we can save significant time generating the ancestors) - Map> visited = new HashMap<>(); List initialResults = expressionExperimentService.getAnnotationsUsageFrequency( filtersWithQuery, - category, datasetArgService.getExcludedUris( excludedCategoryUris, excludeFreeTextCategories, excludeUncategorizedTerms ), datasetArgService.getExcludedUris( excludedTermUris, excludeFreeTextTerms, excludeUncategorizedTerms ), minFrequency != null ? minFrequency : 0, mentionedTerms != null ? mentionedTerms.stream().map( OntologyTerm::getUri ).collect( Collectors.toSet() ) : null, limit - ); + category, + datasetArgService.getExcludedUris( excludedCategoryUris, excludeFreeTextCategories, excludeUncategorizedTerms ), + datasetArgService.getExcludedUris( excludedTermUris, excludeFreeTextTerms, excludeUncategorizedTerms ), + minFrequency != null ? minFrequency : 0, + mentionedTerms != null ? mentionedTerms.stream().map( OntologyTerm::getUri ).collect( Collectors.toSet() ) : null, + limit ); + // cache for visited parents (if two term share the same parent, we can save significant time generating the ancestors) + Map> visited = new HashMap<>(); List results = initialResults .stream() .map( e -> new AnnotationWithUsageStatisticsValueObject( e.getCharacteristic(), e.getNumberOfExpressionExperiments(), !excludeParentTerms && e.getTerm() != null ? getParentTerms( e.getTerm(), visited ) : null ) ) - .sorted( Comparator.comparing( UsageStatistics::getNumberOfExpressionExperiments, Comparator.reverseOrder() ) ) .collect( Collectors.toList() ); return Responder.limit( results, query, filters, new String[] { "classUri", "className", "termUri", "termName" }, Sort.by( null, "numberOfExpressionExperiments", Sort.Direction.DESC, "numberOfExpressionExperiments" ), @@ -415,25 +421,31 @@ private Set getExcludedFields( @Nullable ExcludeArg( exclude.getValue() ); } - - private static Set getParentTerms( OntologyTerm c, Map> visited ) { - return c.getParents( true, false ).stream() - .map( t -> toTermVo( t, visited ) ) - .collect( Collectors.toSet() ); + private Set getParentTerms( OntologyTerm c, Map> visited ) { + return getParentTerms( c, new LinkedHashSet<>(), visited ); } - private static OntologyTermValueObject toTermVo( OntologyTerm ontologyTerm, Map> visited ) { - Set parentVos; - if ( visited.containsKey( ontologyTerm ) ) { - parentVos = visited.get( ontologyTerm ); - } else { - visited.put( ontologyTerm, Collections.emptySet() ); - parentVos = ontologyTerm.getParents( true, false ).stream() - .map( t -> toTermVo( t, visited ) ) - .collect( Collectors.toSet() ); - visited.put( ontologyTerm, parentVos ); - } - return new OntologyTermValueObject( ontologyTerm, parentVos ); + private Set getParentTerms( OntologyTerm c, LinkedHashSet stack, Map> visited ) { + return ontologyService.getParents( Collections.singleton( c ), true, true ).stream() + .map( t -> { + Set parentVos; + if ( stack.contains( t ) ) { + log.debug( "Detected a cycle when visiting " + t + ": " + stack.stream() + .map( ot -> ot.equals( t ) ? ot + "*" : ot.toString() ) + .collect( Collectors.joining( " -> " ) ) + " -> " + t + "*" ); + return null; + } else if ( visited.containsKey( t ) ) { + parentVos = visited.get( t ); + } else { + stack.add( t ); + parentVos = getParentTerms( t, stack, visited ); + stack.remove( t ); + visited.put( t, parentVos ); + } + return new OntologyTermValueObject( t, parentVos ); + } ) + .filter( Objects::nonNull ) + .collect( Collectors.toSet() ); } @Value @@ -465,7 +477,7 @@ public static class AnnotationWithUsageStatisticsValueObject extends AnnotationV Long numberOfExpressionExperiments; /** - * URIs of parent terms. + * URIs of parent terms, or null if excluded. */ @Nullable @JsonInclude(JsonInclude.Include.NON_NULL) From a48554a48c3a288752c2c4d4899ab557eeab862c Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Fri, 15 Mar 2024 11:16:04 -0700 Subject: [PATCH 013/105] Use HikariCP for goldenpath datasource Fix missing driver class for HikariCP. --- .../gemma/core/externalDb/GoldenPath.java | 37 +++++-------------- .../src/main/resources/default.properties | 6 ++- 2 files changed, 14 insertions(+), 29 deletions(-) diff --git a/gemma-core/src/main/java/ubic/gemma/core/externalDb/GoldenPath.java b/gemma-core/src/main/java/ubic/gemma/core/externalDb/GoldenPath.java index 36191a53a8..ecd4fca886 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/externalDb/GoldenPath.java +++ b/gemma-core/src/main/java/ubic/gemma/core/externalDb/GoldenPath.java @@ -18,19 +18,16 @@ */ package ubic.gemma.core.externalDb; +import com.zaxxer.hikari.HikariDataSource; import lombok.Getter; -import org.apache.commons.lang3.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.springframework.jdbc.core.JdbcTemplate; -import org.springframework.jdbc.datasource.SimpleDriverDataSource; import ubic.gemma.model.common.description.DatabaseType; import ubic.gemma.model.common.description.ExternalDatabase; import ubic.gemma.model.genome.Taxon; import ubic.gemma.persistence.util.Settings; -import java.sql.Driver; - /** * Perform useful queries against GoldenPath (UCSC) databases. * @@ -57,36 +54,22 @@ public GoldenPath( Taxon taxon ) { private static JdbcTemplate createJdbcTemplateFromConfig( Taxon taxon ) { String host; - int port; - String user; - String password; String databaseName = getDbNameForTaxon( taxon ); - host = Settings.getString( "gemma.goldenpath.db.host" ); - port = Settings.getInt( "gemma.goldenpath.db.port", 3306 ); - - user = Settings.getString( "gemma.goldenpath.db.user" ); - password = Settings.getString( "gemma.goldenpath.db.password" ); - - SimpleDriverDataSource dataSource = new SimpleDriverDataSource(); - String url = "jdbc:mysql://" + host + ":" + port + "/" + databaseName + "?relaxAutoCommit=true&useSSL=false"; + // SimpleDriverDataSource dataSource = new SimpleDriverDataSource(); + HikariDataSource dataSource = new HikariDataSource(); + String driverClassName = Settings.getString( "gemma.goldenpath.db.driver" ); + String url = Settings.getString( "gemma.goldenpath.db.url" ); + String user = Settings.getString( "gemma.goldenpath.db.user" ); + String password = Settings.getString( "gemma.goldenpath.db.password" ); GoldenPath.log.info( "Connecting to " + databaseName ); GoldenPath.log.debug( "Connecting to Golden Path : " + url + " as " + user ); - String driver = Settings.getString( "gemma.goldenpath.db.driver" ); - if ( StringUtils.isBlank( driver ) ) { - driver = Settings.getString( "gemma.db.driver" ); - GoldenPath.log.warn( "No DB driver configured for GoldenPath, falling back on gemma.db.driver=" + driver ); - } - try { - //noinspection unchecked - dataSource.setDriverClass( ( Class ) Class.forName( driver ) ); - } catch ( ClassNotFoundException e ) { - throw new RuntimeException( e ); - } - dataSource.setUrl( url ); + dataSource.setDriverClassName( driverClassName ); + dataSource.setJdbcUrl( url ); dataSource.setUsername( user ); dataSource.setPassword( password ); + dataSource.setMaximumPoolSize( Settings.getInt( "gemma.goldenpath.db.maximumPoolSize" ) ); JdbcTemplate jdbcTemplate = new JdbcTemplate( dataSource ); jdbcTemplate.setFetchSize( 50 ); diff --git a/gemma-core/src/main/resources/default.properties b/gemma-core/src/main/resources/default.properties index 573f6a13f7..9e00870cc1 100755 --- a/gemma-core/src/main/resources/default.properties +++ b/gemma-core/src/main/resources/default.properties @@ -106,11 +106,13 @@ gemma.linearmodels.useR=false ########################################################### # Sequence annotation databases # Parameters for GoldenPath database installations. -gemma.goldenpath.db.driver=${gemma.db.driver} +gemma.goldenpath.db.driver=com.mysql.cj.jdbc.Driver gemma.goldenpath.db.host=${gemma.db.host} +gemma.goldenpath.db.port=${gemma.db.port} +gemma.goldenpath.db.url=jdbc:mysql://${gemma.goldenpath.db.host}:${gemma.goldenpath.db.port}?relaxAutoCommit=true&useSSL=false gemma.goldenpath.db.user=${gemma.db.user} gemma.goldenpath.db.password=${gemma.db.password} -gemma.goldenpath.db.port=${gemma.db.port} +gemma.goldenpath.db.maximumPoolSize=10 gemma.goldenpath.db.human=hg38 gemma.goldenpath.db.mouse=mm39 gemma.goldenpath.db.rat=rn7 From a0425a34b431d802afc62543f93c95324341440a Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Fri, 15 Mar 2024 12:33:26 -0700 Subject: [PATCH 014/105] Update baseCode to 1.1.21-SNAPSHOT --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 6e17522767..71fd92716d 100644 --- a/pom.xml +++ b/pom.xml @@ -140,7 +140,7 @@ baseCode baseCode - 1.1.20 + 1.1.21-SNAPSHOT From 7b6d7eb61c2d9ad309b911d494549c5c34705661 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Fri, 15 Mar 2024 13:01:59 -0700 Subject: [PATCH 015/105] Add missing use statement for Goldenpath --- .../src/main/java/ubic/gemma/core/externalDb/GoldenPath.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gemma-core/src/main/java/ubic/gemma/core/externalDb/GoldenPath.java b/gemma-core/src/main/java/ubic/gemma/core/externalDb/GoldenPath.java index ecd4fca886..c59e4e8350 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/externalDb/GoldenPath.java +++ b/gemma-core/src/main/java/ubic/gemma/core/externalDb/GoldenPath.java @@ -74,6 +74,8 @@ private static JdbcTemplate createJdbcTemplateFromConfig( Taxon taxon ) { JdbcTemplate jdbcTemplate = new JdbcTemplate( dataSource ); jdbcTemplate.setFetchSize( 50 ); + jdbcTemplate.execute( "use " + databaseName ); + return jdbcTemplate; } From 38bc279ea0602d2227eed7c32f160aed3e8a6197 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Sat, 16 Mar 2024 16:01:52 -0700 Subject: [PATCH 016/105] Add missing ee2ad trigger --- .../main/resources/ubic/gemma/applicationContext-schedule.xml | 1 + 1 file changed, 1 insertion(+) diff --git a/gemma-web/src/main/resources/ubic/gemma/applicationContext-schedule.xml b/gemma-web/src/main/resources/ubic/gemma/applicationContext-schedule.xml index ba44d18cc1..5e3288ad8c 100644 --- a/gemma-web/src/main/resources/ubic/gemma/applicationContext-schedule.xml +++ b/gemma-web/src/main/resources/ubic/gemma/applicationContext-schedule.xml @@ -26,6 +26,7 @@ + From d8cae470a34dbaf5c7a3fb4aa70faccd5228ff38 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Sun, 17 Mar 2024 12:47:42 -0700 Subject: [PATCH 017/105] Move relaxAutoCommit option in the datasource declaration --- .../src/main/java/ubic/gemma/core/externalDb/GoldenPath.java | 1 + gemma-core/src/main/resources/default.properties | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/gemma-core/src/main/java/ubic/gemma/core/externalDb/GoldenPath.java b/gemma-core/src/main/java/ubic/gemma/core/externalDb/GoldenPath.java index c59e4e8350..8fd74aa307 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/externalDb/GoldenPath.java +++ b/gemma-core/src/main/java/ubic/gemma/core/externalDb/GoldenPath.java @@ -70,6 +70,7 @@ private static JdbcTemplate createJdbcTemplateFromConfig( Taxon taxon ) { dataSource.setUsername( user ); dataSource.setPassword( password ); dataSource.setMaximumPoolSize( Settings.getInt( "gemma.goldenpath.db.maximumPoolSize" ) ); + dataSource.addDataSourceProperty( "relaxAutoCommit", "true" ); JdbcTemplate jdbcTemplate = new JdbcTemplate( dataSource ); jdbcTemplate.setFetchSize( 50 ); diff --git a/gemma-core/src/main/resources/default.properties b/gemma-core/src/main/resources/default.properties index 9e00870cc1..0e95d0939d 100755 --- a/gemma-core/src/main/resources/default.properties +++ b/gemma-core/src/main/resources/default.properties @@ -109,7 +109,7 @@ gemma.linearmodels.useR=false gemma.goldenpath.db.driver=com.mysql.cj.jdbc.Driver gemma.goldenpath.db.host=${gemma.db.host} gemma.goldenpath.db.port=${gemma.db.port} -gemma.goldenpath.db.url=jdbc:mysql://${gemma.goldenpath.db.host}:${gemma.goldenpath.db.port}?relaxAutoCommit=true&useSSL=false +gemma.goldenpath.db.url=jdbc:mysql://${gemma.goldenpath.db.host}:${gemma.goldenpath.db.port}?useSSL=false gemma.goldenpath.db.user=${gemma.db.user} gemma.goldenpath.db.password=${gemma.db.password} gemma.goldenpath.db.maximumPoolSize=10 From 861e048d5c2ee9fefc7b5157d6bd22cef07e7a76 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Sun, 17 Mar 2024 12:49:02 -0700 Subject: [PATCH 018/105] Fix missing OntologyService mock in DatasetsWebServiceTest --- .../test/java/ubic/gemma/rest/DatasetsWebServiceTest.java | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java b/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java index dbbb6cc1e0..3cedb11bc4 100644 --- a/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java +++ b/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java @@ -14,6 +14,7 @@ import ubic.gemma.core.analysis.preprocess.OutlierDetectionService; import ubic.gemma.core.analysis.preprocess.svd.SVDService; import ubic.gemma.core.analysis.service.ExpressionDataFileService; +import ubic.gemma.core.ontology.OntologyService; import ubic.gemma.core.search.SearchException; import ubic.gemma.core.search.SearchResult; import ubic.gemma.core.search.SearchService; @@ -122,6 +123,11 @@ public AnalyticsProvider analyticsProvider() { public AccessDecisionManager accessDecisionManager() { return mock( AccessDecisionManager.class ); } + + @Bean + public OntologyService ontologyService() { + return mock(); + } } @Autowired From b95b18dbd3c6574bd978d24832c6d1e35e277ce3 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Sun, 17 Mar 2024 14:59:44 -0700 Subject: [PATCH 019/105] Remove unused GenericValueObjectConverter This has been replaced with ServiceBasedValueObjectConverter. --- .../util/GenericValueObjectConverter.java | 67 ----------------- .../util/GenericValueObjectConverterTest.java | 72 ------------------- 2 files changed, 139 deletions(-) delete mode 100644 gemma-core/src/main/java/ubic/gemma/persistence/util/GenericValueObjectConverter.java delete mode 100644 gemma-core/src/test/java/ubic/gemma/persistence/util/GenericValueObjectConverterTest.java diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/util/GenericValueObjectConverter.java b/gemma-core/src/main/java/ubic/gemma/persistence/util/GenericValueObjectConverter.java deleted file mode 100644 index ca4df6e9e8..0000000000 --- a/gemma-core/src/main/java/ubic/gemma/persistence/util/GenericValueObjectConverter.java +++ /dev/null @@ -1,67 +0,0 @@ -package ubic.gemma.persistence.util; - -import org.springframework.core.convert.ConverterNotFoundException; -import org.springframework.core.convert.TypeDescriptor; -import org.springframework.core.convert.converter.ConditionalGenericConverter; -import org.springframework.core.convert.converter.Converter; -import ubic.gemma.model.IdentifiableValueObject; -import ubic.gemma.model.common.Identifiable; - -import javax.annotation.Nullable; -import java.util.*; -import java.util.stream.Collectors; - -/** - * Generic value object converter. - *

- * Performs conversion from entity to value object using a provided {@link Converter}. - * - * @author poirigui - */ -public class GenericValueObjectConverter> implements ConditionalGenericConverter { - - private final Converter converter; - private final Set convertibleTypes; - private final TypeDescriptor sourceType; - private final TypeDescriptor sourceCollectionType; - private final TypeDescriptor targetType; - private final TypeDescriptor targetListType; - - public GenericValueObjectConverter( Converter converter, Class fromClazz, Class clazz ) { - this.converter = converter; - Set convertibleTypes = new HashSet<>(); - convertibleTypes.add( new ConvertiblePair( Identifiable.class, IdentifiableValueObject.class ) ); - convertibleTypes.add( new ConvertiblePair( Collection.class, Collection.class ) ); - this.convertibleTypes = Collections.unmodifiableSet( convertibleTypes ); - this.sourceType = TypeDescriptor.valueOf( fromClazz ); - this.sourceCollectionType = TypeDescriptor.collection( Collection.class, this.sourceType ); - this.targetType = TypeDescriptor.valueOf( clazz ); - this.targetListType = TypeDescriptor.collection( List.class, this.targetType ); - } - - @Override - public Set getConvertibleTypes() { - return convertibleTypes; - } - - @Override - public boolean matches( TypeDescriptor sourceType, TypeDescriptor targetType ) { - return sourceType.isAssignableTo( this.sourceType ) && this.targetType.isAssignableTo( targetType ) || - sourceType.isAssignableTo( this.sourceCollectionType ) && this.targetListType.isAssignableTo( targetType ); - } - - @Override - public Object convert( @Nullable Object source, TypeDescriptor sourceType, TypeDescriptor targetType ) { - if ( sourceType.isAssignableTo( this.sourceType ) && this.targetType.isAssignableTo( targetType ) ) { - //noinspection unchecked - return source != null ? converter.convert( ( O ) source ) : null; - } - if ( sourceType.isAssignableTo( sourceCollectionType ) && this.targetListType.isAssignableTo( targetType ) ) { - //noinspection unchecked - return source != null ? ( ( Collection ) source ).stream() - .map( converter::convert ) - .collect( Collectors.toList() ) : null; - } - throw new ConverterNotFoundException( sourceType, targetType ); - } -} diff --git a/gemma-core/src/test/java/ubic/gemma/persistence/util/GenericValueObjectConverterTest.java b/gemma-core/src/test/java/ubic/gemma/persistence/util/GenericValueObjectConverterTest.java deleted file mode 100644 index b5add26e12..0000000000 --- a/gemma-core/src/test/java/ubic/gemma/persistence/util/GenericValueObjectConverterTest.java +++ /dev/null @@ -1,72 +0,0 @@ -package ubic.gemma.persistence.util; - -import org.junit.Before; -import org.junit.Test; -import org.springframework.core.convert.ConverterNotFoundException; -import org.springframework.core.convert.support.ConfigurableConversionService; -import org.springframework.core.convert.support.GenericConversionService; -import ubic.gemma.model.IdentifiableValueObject; -import ubic.gemma.model.common.description.DatabaseEntry; -import ubic.gemma.model.common.description.DatabaseEntryValueObject; -import ubic.gemma.model.expression.arrayDesign.ArrayDesign; -import ubic.gemma.model.expression.arrayDesign.ArrayDesignValueObject; - -import java.util.Collection; -import java.util.Collections; -import java.util.List; - -import static org.assertj.core.api.Assertions.assertThat; - -public class GenericValueObjectConverterTest { - - private final ConfigurableConversionService converter = new GenericConversionService(); - - @Before - public void setUp() { - converter.addConverter( new GenericValueObjectConverter<>( DatabaseEntryValueObject::new, DatabaseEntry.class, DatabaseEntryValueObject.class ) ); - } - - @Test - public void test() { - Object converted = converter.convert( new DatabaseEntry(), DatabaseEntryValueObject.class ); - assertThat( converted ).isInstanceOf( DatabaseEntryValueObject.class ); - } - - @Test - public void testConvertToSuperClass() { - Object converted = converter.convert( new DatabaseEntry(), IdentifiableValueObject.class ); - assertThat( converted ).isInstanceOf( DatabaseEntryValueObject.class ); - } - - @Test - public void testConvertFromSubClass() { - Object converted = converter.convert( new SpecificDatabaseEntry(), DatabaseEntryValueObject.class ); - assertThat( converted ).isInstanceOf( DatabaseEntryValueObject.class ); - } - - private static class SpecificDatabaseEntry extends DatabaseEntry { - - } - - @Test - public void testConvertCollection() { - Object converted = converter.convert( Collections.singleton( new DatabaseEntry() ), List.class ); - assertThat( converted ).isInstanceOf( List.class ); - } - - @Test - public void testConvertCollectionToListSuperType() { - Object converted = converter.convert( Collections.singleton( new DatabaseEntry() ), Collection.class ); - assertThat( converted ).isInstanceOf( List.class ); - } - - @Test(expected = ConverterNotFoundException.class) - public void testConvertUnsupportedType() { - converter.convert( new ArrayDesign(), ArrayDesignValueObject.class ); - } - - @Test - public void testConvertNull() { - assertThat( converter.convert( null, DatabaseEntryValueObject.class ) ).isNull(); - } -} \ No newline at end of file From ce798c701d5eeec04bc042b4ae642f9e029a932e Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Mon, 18 Mar 2024 11:35:51 -0700 Subject: [PATCH 020/105] Add a -DredirectTestOutputToFile Maven option --- pom.xml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index 71fd92716d..ae3bd4ad82 100644 --- a/pom.xml +++ b/pom.xml @@ -496,7 +496,7 @@ 2.22.2 -Dlog4j1.compatibility=true -Djava.util.logging.manager=org.apache.logging.log4j.jul.LogManager - true + ${redirectTestOutputToFile} **/*Test.java @@ -511,7 +511,7 @@ 2.22.2 -Dlog4j1.compatibility=true -Djava.util.logging.manager=org.apache.logging.log4j.jul.LogManager - true + ${redirectTestOutputToFile} **/*Test.java @@ -646,5 +646,6 @@ ${skipTests} ${skipTests} + true From a5427196b0f25d79bab310c1bf667a57c48a1bbc Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Sat, 16 Mar 2024 19:38:16 -0700 Subject: [PATCH 021/105] Ensure that all setParameterList() use padded collections Add a batch iterator implementation that uses the same batch size even for the last one. Honor the batch size setting. --- .../java/ubic/gemma/core/util/ListUtils.java | 34 ++- .../persistence/service/AbstractDao.java | 26 +- .../service/TableMaintenanceUtil.java | 5 + .../ExpressionExperimentSetDaoImpl.java | 14 +- .../CoexpressionAnalysisDaoImpl.java | 4 +- ...DifferentialExpressionAnalysisDaoImpl.java | 117 ++++---- .../DifferentialExpressionResultDaoImpl.java | 30 +-- .../diff/GeneDiffExMetaAnalysisDaoImpl.java | 6 +- .../Gene2GOAssociationDaoImpl.java | 67 ++--- .../auditAndSecurity/AuditEventDaoImpl.java | 8 +- .../auditAndSecurity/AuditTrailDaoImpl.java | 8 +- .../BibliographicReferenceDaoImpl.java | 20 +- .../description/CharacteristicDaoImpl.java | 15 +- .../QuantitationTypeDaoImpl.java | 4 +- .../arrayDesign/ArrayDesignDaoImpl.java | 31 +-- .../AbstractDesignElementDataVectorDao.java | 7 +- .../ProcessedExpressionDataVectorDaoImpl.java | 23 +- .../RawExpressionDataVectorDaoImpl.java | 7 +- .../CompositeSequenceDaoImpl.java | 84 ++---- .../experiment/BlacklistedEntityDaoImpl.java | 4 +- .../ExpressionExperimentDaoImpl.java | 253 +++++++----------- .../experiment/FactorValueDaoImpl.java | 4 +- .../service/genome/GeneDaoImpl.java | 70 ++--- .../biosequence/BioSequenceDaoImpl.java | 83 ++---- .../service/genome/gene/GeneSetDaoImpl.java | 6 +- .../AnnotationAssociationDaoImpl.java | 4 +- .../BlatAssociationDaoImpl.java | 8 +- .../sequenceAnalysis/BlatResultDaoImpl.java | 8 +- .../gemma/persistence/util/CommonQueries.java | 102 +++---- .../gemma/persistence/util/EntityUtils.java | 4 +- .../persistence/util/FilterCriteriaUtils.java | 8 +- .../persistence/util/FilterQueryUtils.java | 9 +- .../persistence/util/HibernateUtils.java | 34 +++ .../gemma/persistence/util/QueryUtils.java | 121 +++++++++ .../src/main/resources/default.properties | 3 +- .../core/util/test/BaseDatabaseTest.java | 2 +- .../core/util/test/HibernateConfigTest.java | 2 +- .../persistence/service/AbstractDaoTest.java | 47 +++- .../gemma/persistence/util/ListUtilsTest.java | 9 + .../persistence/util/QueryUtilsTest.java | 46 ++++ 40 files changed, 749 insertions(+), 588 deletions(-) create mode 100644 gemma-core/src/main/java/ubic/gemma/persistence/util/HibernateUtils.java create mode 100644 gemma-core/src/main/java/ubic/gemma/persistence/util/QueryUtils.java create mode 100644 gemma-core/src/test/java/ubic/gemma/persistence/util/QueryUtilsTest.java diff --git a/gemma-core/src/main/java/ubic/gemma/core/util/ListUtils.java b/gemma-core/src/main/java/ubic/gemma/core/util/ListUtils.java index c574fb1ea4..d18699dcef 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/util/ListUtils.java +++ b/gemma-core/src/main/java/ubic/gemma/core/util/ListUtils.java @@ -1,9 +1,8 @@ package ubic.gemma.core.util; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.TreeMap; +import org.springframework.util.Assert; + +import java.util.*; /** * Utilities and algorithms for {@link List}. @@ -44,4 +43,31 @@ private static void fillMap( Map element2position, List list } } } + + /** + * Pad a collection to the next power of 2 with the given element. + */ + public static Collection padToNextPowerOfTwo( Collection list, T elementForPadding ) { + int k = Integer.highestOneBit( list.size() ); + if ( list.size() == k ) { + return list; // already a power of 2 + } + return pad( list, elementForPadding, k << 1 ); + } + + /** + * Pad a collection with the given element. + */ + public static Collection pad( Collection list, T elementForPadding, int size ) { + Assert.isTrue( size >= list.size(), "Target size must be greater or equal to the collection size." ); + if ( list.size() == size ) { + return list; + } + List paddedList = new ArrayList<>( size ); + paddedList.addAll( list ); + for ( int j = list.size(); j < size; j++ ) { + paddedList.add( elementForPadding ); + } + return paddedList; + } } diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/AbstractDao.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/AbstractDao.java index e14091d257..a0683ee5aa 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/AbstractDao.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/AbstractDao.java @@ -21,7 +21,6 @@ import org.apache.commons.lang3.time.StopWatch; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.hibernate.FlushMode; import org.hibernate.Hibernate; import org.hibernate.SessionFactory; import org.hibernate.criterion.Projections; @@ -29,6 +28,7 @@ import org.hibernate.metadata.ClassMetadata; import org.springframework.util.Assert; import ubic.gemma.model.common.Identifiable; +import ubic.gemma.persistence.util.HibernateUtils; import javax.annotation.Nonnull; import javax.annotation.Nullable; @@ -38,6 +38,8 @@ import java.util.stream.Collectors; import static java.util.Objects.requireNonNull; +import static ubic.gemma.persistence.util.QueryUtils.batchParameterList; +import static ubic.gemma.persistence.util.QueryUtils.optimizeParameterList; /** * AbstractDao can find the generic type at runtime and simplify the code implementation of the BaseDao interface @@ -51,6 +53,7 @@ public abstract class AbstractDao implements BaseDao protected final Class elementClass; private final SessionFactory sessionFactory; private final ClassMetadata classMetadata; + private final int batchSize; protected AbstractDao( Class elementClass, SessionFactory sessionFactory ) { this( elementClass, sessionFactory, requireNonNull( sessionFactory.getClassMetadata( elementClass ), @@ -67,6 +70,7 @@ protected AbstractDao( Class elementClass, SessionFactory sessionFa this.elementClass = elementClass; this.sessionFactory = sessionFactory; this.classMetadata = classMetadata; + this.batchSize = HibernateUtils.getBatchSize( sessionFactory, classMetadata ); } @Override @@ -124,6 +128,10 @@ public T save( T entity ) { } } + /** + * This implementation is temporary and attempts to best replicate the behaviour of loading entities by multiple IDs + * introduced in Hibernate 5. Read more about this. + */ @Override public Collection load( Collection ids ) { if ( ids.isEmpty() ) { @@ -148,11 +156,19 @@ public Collection load( Collection ids ) { } } - if ( !unloadedIds.isEmpty() ) { + if ( batchSize != -1 && unloadedIds.size() > batchSize ) { + for ( Collection batch : batchParameterList( unloadedIds, batchSize ) ) { + //noinspection unchecked + results.addAll( sessionFactory.getCurrentSession() + .createCriteria( elementClass ) + .add( Restrictions.in( idPropertyName, batch ) ) + .list() ); + } + } else if ( !unloadedIds.isEmpty() ) { //noinspection unchecked results.addAll( sessionFactory.getCurrentSession() .createCriteria( elementClass ) - .add( Restrictions.in( idPropertyName, new HashSet<>( unloadedIds ) ) ) + .add( Restrictions.in( idPropertyName, optimizeParameterList( unloadedIds ) ) ) .list() ); } @@ -278,6 +294,10 @@ protected final SessionFactory getSessionFactory() { return sessionFactory; } + protected final int getBatchSize() { + return batchSize; + } + /** * Retrieve one entity whose given property matches the given value. *

diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/TableMaintenanceUtil.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/TableMaintenanceUtil.java index 5cfb0407f0..c0b944df2d 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/TableMaintenanceUtil.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/TableMaintenanceUtil.java @@ -34,6 +34,11 @@ public interface TableMaintenanceUtil { */ String GENE2CS_QUERY_SPACE = "GENE2CS"; + /** + * Recommended batch size to use when retrieving entries from the GENE2CS table either by gene or design element. + */ + int GENE2CS_BATCH_SIZE = 2048; + /** * Query space used by the {@code EXPRESSION_EXPERIMENT2CHARACTERISTIC} table. *

diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/ExpressionExperimentSetDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/ExpressionExperimentSetDaoImpl.java index 58fc0e3c0c..66d1f6b3b3 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/ExpressionExperimentSetDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/ExpressionExperimentSetDaoImpl.java @@ -20,14 +20,16 @@ import org.apache.commons.lang3.time.StopWatch; import org.hibernate.Hibernate; -import org.hibernate.LockOptions; import org.hibernate.Query; import org.hibernate.SessionFactory; import org.hibernate.criterion.Restrictions; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Repository; import ubic.gemma.model.analysis.expression.ExpressionExperimentSet; -import ubic.gemma.model.expression.experiment.*; +import ubic.gemma.model.expression.experiment.BioAssaySet; +import ubic.gemma.model.expression.experiment.ExpressionExperiment; +import ubic.gemma.model.expression.experiment.ExpressionExperimentDetailsValueObject; +import ubic.gemma.model.expression.experiment.ExpressionExperimentSetValueObject; import ubic.gemma.persistence.service.AbstractDao; import ubic.gemma.persistence.service.AbstractVoEnabledDao; import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentDao; @@ -36,6 +38,8 @@ import javax.annotation.Nullable; import java.util.*; +import static ubic.gemma.persistence.util.QueryUtils.optimizeParameterList; + /** * @author paul */ @@ -147,7 +151,7 @@ private void populateAnalysisInformation( Collection withCoexp = this.getSessionFactory().getCurrentSession().createQuery( "select e.id, count(an) from ExpressionExperimentSet e, CoexpressionAnalysis an join e.experiments ea " + "where an.experimentAnalyzed = ea and e.id in (:ids) group by e.id" ) - .setParameterList( "ids", idMap.keySet() ).list(); + .setParameterList( "ids", optimizeParameterList( idMap.keySet() ) ).list(); for ( Object[] oa : withCoexp ) { Long id = ( Long ) oa[0]; @@ -164,7 +168,7 @@ private void populateAnalysisInformation( Collection ids ) { Query queryObject = this.getSessionFactory().getCurrentSession().createQuery( queryString ); if ( ids != null ) - queryObject.setParameterList( "ids", ids ); + queryObject.setParameterList( "ids", optimizeParameterList( ids ) ); return queryObject; } diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/coexpression/CoexpressionAnalysisDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/coexpression/CoexpressionAnalysisDaoImpl.java index 0053ad878a..98fc4cfee6 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/coexpression/CoexpressionAnalysisDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/coexpression/CoexpressionAnalysisDaoImpl.java @@ -28,6 +28,8 @@ import java.util.Collection; +import static ubic.gemma.persistence.util.QueryUtils.optimizeParameterList; + /** *

* Base Spring DAO Class: is able to create, update, remove, load, and find objects of type @@ -59,7 +61,7 @@ public Collection getExperimentsWithAnalysis( Collection idsToFilter //noinspection unchecked return this.getSessionFactory().getCurrentSession().createQuery( "select experimentAnalyzed.id from CoexpressionAnalysis where experimentAnalyzed.id in (:ids)" ) - .setParameterList( "ids", idsToFilter ).list(); + .setParameterList( "ids", optimizeParameterList( idsToFilter ) ).list(); } @Override diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionAnalysisDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionAnalysisDaoImpl.java index aa49bfdc75..c5fe9b3d0f 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionAnalysisDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionAnalysisDaoImpl.java @@ -21,13 +21,17 @@ import org.apache.commons.collections4.ListUtils; import org.apache.commons.lang3.time.StopWatch; import org.apache.commons.lang3.tuple.Pair; -import org.hibernate.*; +import org.hibernate.Hibernate; +import org.hibernate.HibernateException; +import org.hibernate.Session; +import org.hibernate.SessionFactory; import org.hibernate.engine.jdbc.spi.SqlStatementLogger; import org.hibernate.engine.spi.SessionImplementor; import org.hibernate.id.IdentifierGeneratorHelper; import org.hibernate.internal.SessionFactoryImpl; import org.hibernate.jdbc.Expectations; import org.hibernate.persister.entity.EntityPersister; +import org.hibernate.type.StandardBasicTypes; import org.hibernate.type.Type; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Repository; @@ -40,9 +44,9 @@ import ubic.gemma.persistence.service.analysis.SingleExperimentAnalysisDaoBase; import ubic.gemma.persistence.util.CommonQueries; import ubic.gemma.persistence.util.EntityUtils; +import ubic.gemma.persistence.util.HibernateUtils; import java.io.Serializable; -import java.math.BigInteger; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; @@ -50,6 +54,8 @@ import java.util.*; import java.util.stream.Collectors; +import static ubic.gemma.persistence.util.QueryUtils.*; + /** * @author paul * @see DifferentialExpressionAnalysis @@ -71,6 +77,8 @@ class DifferentialExpressionAnalysisDaoImpl extends SingleExperimentAnalysisDaoB private final EntityPersister resultPersister, contrastPersister; + private final int bioAssaySetBatchSize; + @Autowired public DifferentialExpressionAnalysisDaoImpl( SessionFactory sessionFactory ) { super( DifferentialExpressionAnalysis.class, sessionFactory ); @@ -78,6 +86,7 @@ public DifferentialExpressionAnalysisDaoImpl( SessionFactory sessionFactory ) { .getEntityPersister( DifferentialExpressionAnalysisResult.class.getName() ); contrastPersister = ( ( SessionFactoryImpl ) sessionFactory ) .getEntityPersister( ContrastResult.class.getName() ); + bioAssaySetBatchSize = HibernateUtils.getBatchSize( sessionFactory, sessionFactory.getClassMetadata( BioAssaySet.class ) ); } /** @@ -328,7 +337,7 @@ public Collection findByFactors( Collection findByFactors( Collection> findByExperimentIds + " inner join a.experimentAnalyzed e where e.id in (:eeIds)"; List qresult = this.getSessionFactory().getCurrentSession() .createQuery( queryString ) - .setParameterList( "eeIds", experimentIds ) + .setParameterList( "eeIds", optimizeParameterList( experimentIds ) ) .list(); for ( Object o : qresult ) { Object[] oa = ( Object[] ) o; @@ -378,7 +387,7 @@ public Collection findExperimentsWithAnalyses( Gene gene ) { Collection probes = CommonQueries .getCompositeSequences( gene, this.getSessionFactory().getCurrentSession() ); Collection result = new HashSet<>(); - if ( probes.size() == 0 ) { + if ( probes.isEmpty() ) { return result; } @@ -388,31 +397,38 @@ public Collection findExperimentsWithAnalyses( Gene gene ) { timer.reset(); timer.start(); - /* - * Note: this query misses ExpressionExperimentSubSets. The native query was implemented because HQL was always - * constructing a constraint on SubSets. See bug 2173. - */ - final String queryToUse = "select e.ID from ANALYSIS a inner join INVESTIGATION e ON a.EXPERIMENT_ANALYZED_FK = e.ID " - + "inner join BIO_ASSAY ba ON ba.EXPRESSION_EXPERIMENT_FK=e.ID " - + " inner join BIO_MATERIAL bm ON bm.ID=ba.SAMPLE_USED_FK inner join TAXON t ON bm.SOURCE_TAXON_FK=t.ID " - + " inner join COMPOSITE_SEQUENCE cs ON ba.ARRAY_DESIGN_USED_FK =cs.ARRAY_DESIGN_FK where cs.ID in " - + " (:probes) and t.ID = :taxon"; + // Note: this query misses ExpressionExperimentSubSets. The native query was implemented because HQL was always + // constructing a constraint on SubSets. See bug 2173. + // final String queryToUse = "select e.ID from ANALYSIS a inner join INVESTIGATION e ON a.EXPERIMENT_ANALYZED_FK = e.ID " + // + "inner join BIO_ASSAY ba ON ba.EXPRESSION_EXPERIMENT_FK=e.ID " + // + " inner join BIO_MATERIAL bm ON bm.ID=ba.SAMPLE_USED_FK inner join TAXON t ON bm.SOURCE_TAXON_FK=t.ID " + // + " inner join COMPOSITE_SEQUENCE cs ON ba.ARRAY_DESIGN_USED_FK =cs.ARRAY_DESIGN_FK where cs.ID in " + // + " (:probes) and t.ID = :taxon"; Taxon taxon = gene.getTaxon(); - int batchSize = 1000; - Collection batch = new HashSet<>(); - for ( CompositeSequence probe : probes ) { - batch.add( probe ); - - if ( batch.size() == batchSize ) { - this.fetchExperimentsTestingGeneNativeQuery( batch, result, queryToUse, taxon ); - batch.clear(); - } + Set ids = new HashSet<>(); + for ( Collection batch : batchParameterList( EntityUtils.getIds( probes ), 1024 ) ) { + //noinspection unchecked + ids.addAll( this.getSessionFactory().getCurrentSession() + .createSQLQuery( "select a.EXPERIMENT_ANALYZED_FK from ANALYSIS a " + + "join BIO_ASSAY ba ON ba.EXPRESSION_EXPERIMENT_FK = a.EXPERIMENT_ANALYZED_FK " + + "join BIO_MATERIAL bm ON bm.ID = ba.SAMPLE_USED_FK " + + "join TAXON t ON bm.SOURCE_TAXON_FK = t.ID " + + "join COMPOSITE_SEQUENCE cs ON ba.ARRAY_DESIGN_USED_FK = cs.ARRAY_DESIGN_FK " + + "where cs.ID in (:probes) and t.ID = :taxon" ) + .addScalar( "ID", StandardBasicTypes.LONG ) + .setParameterList( "probes", batch ) + .setParameter( "taxon", taxon ) + .list() ); } - if ( !batch.isEmpty() ) { - this.fetchExperimentsTestingGeneNativeQuery( batch, result, queryToUse, taxon ); + for ( Collection batch : batchParameterList( ids, bioAssaySetBatchSize ) ) { + //noinspection unchecked + result.addAll( this.getSessionFactory().getCurrentSession() + .createQuery( "from BioAssaySet ba where ba.id in (:ids)" ) + .setParameterList( "ids", batch ) + .list() ); } if ( timer.getTime() > 1000 ) { @@ -432,12 +448,12 @@ public Map> get final String query = "select distinct a from DifferentialExpressionAnalysis a inner join fetch a.resultSets res " + " inner join fetch res.baselineGroup" + " inner join fetch res.experimentalFactors facs inner join fetch facs.factorValues " - + " inner join fetch res.hitListSizes where a.experimentAnalyzed.id in (:ees) "; + + " inner join fetch res.hitListSizes where a.experimentAnalyzed in (:ees) "; //noinspection unchecked List r1 = this.getSessionFactory().getCurrentSession() .createQuery( query ) - .setParameterList( "ees", EntityUtils.getIds( experiments ) ) + .setParameterList( "ees", optimizeIdentifiableParameterList( experiments ) ) .list(); int count = 0; for ( DifferentialExpressionAnalysis a : r1 ) { @@ -466,11 +482,11 @@ public Map> get + " inner join fetch a.resultSets res inner join fetch res.baselineGroup " + " inner join fetch res.experimentalFactors facs inner join fetch facs.factorValues" + " inner join fetch res.hitListSizes " - + " join eess.sourceExperiment see join a.experimentAnalyzed ee where eess=ee and see.id in (:ees) "; + + " join eess.sourceExperiment see join a.experimentAnalyzed ee where eess=ee and see in (:ees) "; //noinspection unchecked List r2 = this.getSessionFactory().getCurrentSession() .createQuery( q2 ) - .setParameterList( "ees", EntityUtils.getIds( experiments ) ) + .setParameterList( "ees", optimizeIdentifiableParameterList( experiments ) ) .list(); if ( !r2.isEmpty() ) { @@ -510,7 +526,7 @@ public Collection getExperimentsWithAnalysis( Collection idsToFilter //noinspection unchecked return this.getSessionFactory().getCurrentSession() .createQuery( queryString ) - .setParameterList( "eeIds", idsToFilter ) + .setParameterList( "eeIds", optimizeParameterList( idsToFilter ) ) .list(); } @@ -548,7 +564,7 @@ public Map> getAnalysesByE "select distinct a from DifferentialExpressionAnalysis a " + "join fetch a.experimentAnalyzed e " + "where e.id in (:eeIds)" ) - .setParameterList( "eeIds", expressionExperimentIds ) + .setParameterList( "eeIds", optimizeParameterList( expressionExperimentIds ) ) .setFirstResult( offset ) .setMaxResults( limit ) .list(); @@ -571,7 +587,7 @@ public Map> getAnalysesByE fvs = this.getSessionFactory().getCurrentSession().createQuery( "select distinct ee.id, fv from " + "ExpressionExperiment" + " ee join ee.bioAssays ba join ba.sampleUsed bm join bm.factorValues fv where ee.id in (:ees)" ) - .setParameterList( "ees", expressionExperimentIds ).list(); + .setParameterList( "ees", optimizeParameterList( expressionExperimentIds ) ).list(); this.addFactorValues( ee2fv, fvs ); // also get factor values for subsets - those not found yet. @@ -580,13 +596,13 @@ public Map> getAnalysesByE used.add( a.getExperimentAnalyzed().getId() ); } - List probableSubSetIds = ListUtils.removeAll( used, ee2fv.keySet() ); + List probableSubSetIds = ListUtils.removeAll( used, ee2fv.keySet() ); if ( !probableSubSetIds.isEmpty() ) { //noinspection unchecked fvs = this.getSessionFactory().getCurrentSession().createQuery( "select distinct ee.id, fv from " + "ExpressionExperimentSubSet" + " ee join ee.bioAssays ba join ba.sampleUsed bm join bm.factorValues fv where ee.id in (:ees)" ) - .setParameterList( "ees", probableSubSetIds ).list(); + .setParameterList( "ees", optimizeParameterList( probableSubSetIds ) ).list(); this.addFactorValues( ee2fv, fvs ); } @@ -600,7 +616,7 @@ public Map> getAnalysesByE .createQuery( "select distinct a from " + "ExpressionExperimentSubSet" + " ee, DifferentialExpressionAnalysis a" + " join ee.sourceExperiment see " + " join fetch a.experimentAnalyzed eeanalyzed where see.id in (:eeids) and ee=eeanalyzed" ) - .setParameterList( "eeids", expressionExperimentIds ).list(); + .setParameterList( "eeids", optimizeParameterList( expressionExperimentIds ) ).list(); if ( !analysesOfSubsets.isEmpty() ) { hits.addAll( analysesOfSubsets ); @@ -617,7 +633,7 @@ public Map> getAnalysesByE fvs = this.getSessionFactory().getCurrentSession().createQuery( "select distinct ee.id, fv from " + "ExpressionExperimentSubSet" + " ee join ee.bioAssays ba join ba.sampleUsed bm join bm.factorValues fv where ee.id in (:ees)" ) - .setParameterList( "ees", experimentSubsetIds ).list(); + .setParameterList( "ees", optimizeParameterList( experimentSubsetIds ) ).list(); this.addFactorValues( ee2fv, fvs ); } @@ -705,7 +721,7 @@ public Map> findByExperi results.addAll( this.getSessionFactory().getCurrentSession().createQuery( "select distinct a from DifferentialExpressionAnalysis a " + "where a.experimentAnalyzed in :ees" ) - .setParameterList( "ees", experiments ).list() ); + .setParameterList( "ees", optimizeIdentifiableParameterList( experiments ) ).list() ); /* * Deal with the analyses of subsets of the investigation. User has to know this is possible. @@ -715,7 +731,7 @@ public Map> findByExperi "select distinct a from ExpressionExperimentSubSet eess, DifferentialExpressionAnalysis a " + "join eess.sourceExperiment see " + "join a.experimentAnalyzed eeanalyzed where see in :ees and eess=eeanalyzed" ) - .setParameterList( "ees", experiments ).list() ); + .setParameterList( "ees", optimizeIdentifiableParameterList( experiments ) ).list() ); return results.stream() .collect( Collectors.groupingBy( DifferentialExpressionAnalysis::getExperimentAnalyzed, Collectors.toCollection( ArrayList::new ) ) ); @@ -778,29 +794,6 @@ private Collection convertToValueObje return summaries; } - private void fetchExperimentsTestingGeneNativeQuery( Collection probes, - Collection result, final String nativeQuery, Taxon taxon ) { - - if ( probes.isEmpty() ) - return; - - SQLQuery nativeQ = this.getSessionFactory().getCurrentSession().createSQLQuery( nativeQuery ); - nativeQ.setParameterList( "probes", EntityUtils.getIds( probes ) ); - nativeQ.setParameter( "taxon", taxon ); - List list = nativeQ.list(); - Set ids = new HashSet<>(); - for ( Object o : list ) { - ids.add( ( ( BigInteger ) o ).longValue() ); - } - if ( !ids.isEmpty() ) { - //noinspection unchecked - result.addAll( this.getSessionFactory().getCurrentSession() - .createQuery( "from ExpressionExperiment e where e.id in (:ids)" ) - .setParameterList( "ids", ids ) - .list() ); - } - } - /** * Figure out which factorValues were used for each of the experimental factors (excluding the subset factor) */ diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultDaoImpl.java index 209562d31f..f6c6ba7fd5 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/DifferentialExpressionResultDaoImpl.java @@ -26,7 +26,6 @@ import org.springframework.util.Assert; import ubic.basecode.io.ByteArrayConverter; import ubic.basecode.math.distribution.Histogram; -import ubic.basecode.util.BatchIterator; import ubic.basecode.util.SQLUtils; import ubic.gemma.model.analysis.expression.diff.*; import ubic.gemma.model.expression.experiment.BioAssaySet; @@ -43,6 +42,9 @@ import java.util.*; import java.util.stream.Collectors; +import static ubic.gemma.persistence.util.QueryUtils.batchParameterList; +import static ubic.gemma.persistence.util.QueryUtils.optimizeParameterList; + /** * This is a key class for queries to retrieve differential expression results (as well as standard CRUD aspects of * working with DifferentialExpressionResults). @@ -95,7 +97,7 @@ public Map 0 ? " order by r.correctedPvalue" : "" ) ) .setParameter( "gene", gene ) - .setParameterList( "experimentsAnalyzed", experimentsAnalyzed ) + .setParameterList( "experimentsAnalyzed", optimizeParameterList( experimentsAnalyzed ) ) .setParameter( "threshold", threshold ) .setMaxResults( limit ) .setCacheable( true ) @@ -127,7 +129,7 @@ public Map 0 ? " order by r.correctedPvalue" : "" ) ) - .setParameterList( "experimentsAnalyzed", experiments ) + .setParameterList( "experimentsAnalyzed", optimizeParameterList( experiments ) ) .setParameter( "threshold", qvalueThreshold ) .setMaxResults( limit ) .setCacheable( true ) @@ -173,7 +175,7 @@ public Map qResult = this.getSessionFactory().getCurrentSession() .createQuery( DIFF_EX_RESULTS_BY_GENE_QUERY + " and e.id in (:experimentsAnalyzed)" ) .setParameter( "gene", gene ) - .setParameterList( "experimentsAnalyzed", experimentsAnalyzed ) + .setParameterList( "experimentsAnalyzed", optimizeParameterList( experimentsAnalyzed ) ) .list(); try { return groupDiffExResultVos( qResult ); @@ -269,7 +271,7 @@ public Map> findDiffExAnalysisResultId int numResultSetBatchesDone = 0; // Iterate over batches of resultSets - for ( Collection resultSetIdBatch : new BatchIterator<>( resultSetsNeeded, resultSetBatchSize ) ) { + for ( Collection resultSetIdBatch : batchParameterList( resultSetsNeeded, resultSetBatchSize ) ) { if ( AbstractDao.log.isDebugEnabled() ) AbstractDao.log.debug( "Starting batch of resultsets: " + StringUtils @@ -290,17 +292,13 @@ public Map> findDiffExAnalysisResultId StopWatch innerQt = new StopWatch(); // iterate over batches of probes (genes) - for ( Collection probeBatch : new BatchIterator<>( cs2GeneIdMap.keySet(), geneBatchSize ) ) { + for ( Collection probeBatch : batchParameterList( cs2GeneIdMap.keySet(), geneBatchSize ) ) { if ( AbstractDao.log.isDebugEnabled() ) AbstractDao.log.debug( "Starting batch of probes: " + StringUtils .abbreviate( StringUtils.join( probeBatch, "," ), 100 ) ); - // would it help to sort the probeBatch/ - List pbL = new Vector<>( probeBatch ); - Collections.sort( pbL ); - - queryObject.setParameterList( "probe_ids", pbL ); + queryObject.setParameterList( "probe_ids", probeBatch ); innerQt.start(); List queryResult = queryObject.list(); @@ -341,7 +339,7 @@ public Map> findDiffExAnalysisResultId if ( DifferentialExpressionResultDaoImpl.CORRECTED_PVALUE_THRESHOLD_TO_BE_CONSIDERED_DIFF_EX < 1.0 ) { timeForFillingNonSig += this - .fillNonSignificant( pbL, resultSetIdsMap, resultsFromDb, resultSetIdBatch, cs2GeneIdMap, + .fillNonSignificant( probeBatch, resultSetIdsMap, resultsFromDb, resultSetIdBatch, cs2GeneIdMap, session ); } } // over probes. @@ -458,7 +456,7 @@ public Map loadContrastDetailsForResults( Collection int BATCH_SIZE = 2000; // previously: 500, then 1000. New optimized query is plenty fast. StopWatch timer = new StopWatch(); - for ( Collection batch : new BatchIterator<>( ids, BATCH_SIZE ) ) { + for ( Collection batch : batchParameterList( ids, BATCH_SIZE ) ) { timer.reset(); timer.start(); @@ -572,7 +570,7 @@ public Map ) session .createQuery( "select id,name from CompositeSequence where id in (:ids)" ) - .setParameterList( "ids", probeIds ).list() ) { + .setParameterList( "ids", optimizeParameterList( probeIds ) ).list() ) { probeNames.put( ( Long ) rec[0], ( String ) rec[1] ); } } @@ -588,7 +586,7 @@ public Map> fillFromCache( Map pbL, Map resultSetIds, + private long fillNonSignificant( Collection pbL, Map resultSetIds, Map> resultsFromDb, Collection resultSetIdBatch, Map> cs2GeneIdMap, Session session ) { diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/GeneDiffExMetaAnalysisDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/GeneDiffExMetaAnalysisDaoImpl.java index 939bdeba56..453919b6fe 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/GeneDiffExMetaAnalysisDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/analysis/expression/diff/GeneDiffExMetaAnalysisDaoImpl.java @@ -33,6 +33,8 @@ import java.util.*; +import static ubic.gemma.persistence.util.QueryUtils.optimizeParameterList; + /** * @author Paul */ @@ -88,7 +90,7 @@ public Collection find //noinspection unchecked List queryResults = this.getSessionFactory().getCurrentSession().createQuery( queryString ) - .setParameterList( "aIds", metaAnalysisIds ).list(); + .setParameterList( "aIds", optimizeParameterList( metaAnalysisIds ) ).list(); for ( Object[] queryResult : queryResults ) { GeneDifferentialExpressionMetaAnalysisSummaryValueObject myMetaAnalysis = new GeneDifferentialExpressionMetaAnalysisSummaryValueObject(); @@ -140,7 +142,7 @@ public Collection getExperimentsWithAnalysis( Collection idsToFilter return this.getSessionFactory().getCurrentSession().createQuery( "select distinct a from GeneDifferentialExpressionMetaAnalysis a" + " inner join a.resultSetsIncluded rs inner join rs.analysis ra where ra.experimentAnalyzed.id in (:ids)" ) - .setParameterList( "ids", idsToFilter ).list(); + .setParameterList( "ids", optimizeParameterList( idsToFilter ) ).list(); } /** diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/association/Gene2GOAssociationDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/association/Gene2GOAssociationDaoImpl.java index b7f1d97786..53134f666a 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/association/Gene2GOAssociationDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/association/Gene2GOAssociationDaoImpl.java @@ -20,7 +20,6 @@ import org.apache.commons.lang3.time.StopWatch; import org.hibernate.Criteria; -import org.hibernate.Query; import org.hibernate.SessionFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Repository; @@ -31,10 +30,14 @@ import ubic.gemma.persistence.service.AbstractDao; import ubic.gemma.persistence.util.BusinessKey; import ubic.gemma.persistence.util.EntityUtils; +import ubic.gemma.persistence.util.HibernateUtils; import javax.annotation.Nullable; import java.util.*; +import static ubic.gemma.persistence.util.QueryUtils.batchIdentifiableParameterList; +import static ubic.gemma.persistence.util.QueryUtils.optimizeParameterList; + /** * @author pavlidis * @see ubic.gemma.model.association.Gene2GOAssociation @@ -42,9 +45,12 @@ @Repository public class Gene2GOAssociationDaoImpl extends AbstractDao implements Gene2GOAssociationDao { + private final int geneBatchSize; + @Autowired protected Gene2GOAssociationDaoImpl( SessionFactory sessionFactory ) { super( Gene2GOAssociation.class, sessionFactory ); + this.geneBatchSize = HibernateUtils.getBatchSize( sessionFactory, sessionFactory.getClassMetadata( Gene.class ) ); } @Override @@ -79,22 +85,25 @@ public Map> findByGenes( Collection needT Map> result = new HashMap<>(); StopWatch timer = new StopWatch(); timer.start(); - int batchSize = 200; - Set batch = new HashSet<>(); int i = 0; - for ( Gene gene : needToFind ) { - batch.add( gene ); - if ( batch.size() == batchSize ) { - result.putAll( this.fetchBatch( batch ) ); - batch.clear(); + for ( Collection batch : batchIdentifiableParameterList( needToFind, geneBatchSize ) ) { + Map giMap = EntityUtils.getIdMap( batch ); + //noinspection unchecked + List o = this.getSessionFactory().getCurrentSession() + .createQuery( "select g.id, geneAss.ontologyEntry from Gene2GOAssociation as geneAss join geneAss.gene g where g.id in (:genes)" ) + .setParameterList( "genes", giMap.keySet() ) + .list(); + for ( Object[] object : o ) { + Long g = ( Long ) object[0]; + Characteristic vc = ( Characteristic ) object[1]; + Gene gene = giMap.get( g ); + assert gene != null; + result.computeIfAbsent( gene, k -> new HashSet<>() ).add( vc ); } if ( ++i % 1000 == 0 ) { AbstractDao.log.info( "Fetched GO associations for " + i + "/" + needToFind.size() + " genes" ); } } - if ( !batch.isEmpty() ) - result.putAll( this.fetchBatch( batch ) ); - if ( timer.getTime() > 1000 ) { AbstractDao.log .info( "Fetched GO annotations for " + needToFind.size() + " genes in " + timer.getTime() + " ms" ); @@ -131,7 +140,7 @@ public Collection getGenes( Collection ids ) { return this.getSessionFactory().getCurrentSession().createQuery( "select distinct geneAss.gene from Gene2GOAssociation as geneAss " + "where geneAss.ontologyEntry.value in ( :goIDs)" ) - .setParameterList( "goIDs", ids ).list(); + .setParameterList( "goIDs", optimizeParameterList( ids ) ).list(); } @Override @@ -141,9 +150,11 @@ public Collection getGenes( Collection ids, @Nullable Taxon taxon //noinspection unchecked return this.getSessionFactory().getCurrentSession().createQuery( - "select distinct " + " gene from Gene2GOAssociation as geneAss join geneAss.gene as gene " + "select distinct gene from Gene2GOAssociation as geneAss join geneAss.gene as gene " + "where geneAss.ontologyEntry.value in ( :goIDs) and gene.taxon = :tax" ) - .setParameterList( "goIDs", ids ).setParameter( "tax", taxon ).list(); + .setParameterList( "goIDs", optimizeParameterList( ids ) ) + .setParameter( "tax", taxon ) + .list(); } @Override @@ -159,7 +170,7 @@ public int removeAll() { if ( !cIds.isEmpty() ) { removedCharacteristics = getSessionFactory().getCurrentSession() .createQuery( "delete from Characteristic where id in :cIds" ) - .setParameterList( "cIds", cIds ) + .setParameterList( "cIds", optimizeParameterList( cIds ) ) .executeUpdate(); } else { removedCharacteristics = 0; @@ -168,30 +179,4 @@ public int removeAll() { removedAssociations, removedCharacteristics ) ); return removedAssociations; } - - private Map> fetchBatch( Set batch ) { - Map giMap = EntityUtils.getIdMap( batch ); - //language=HQL - final String queryString = "select g.id, geneAss.ontologyEntry from Gene2GOAssociation as geneAss join geneAss.gene g where g.id in (:genes)"; - Map> results = new HashMap<>(); - Query query = this.getSessionFactory().getCurrentSession().createQuery( queryString ); - query.setFetchSize( batch.size() ); - query.setParameterList( "genes", giMap.keySet() ); - List o = query.list(); - - for ( Object object : o ) { - Object[] oa = ( Object[] ) object; - Long g = ( Long ) oa[0]; - Characteristic vc = ( Characteristic ) oa[1]; - Gene gene = giMap.get( g ); - assert gene != null; - if ( !results.containsKey( gene ) ) { - results.put( gene, new HashSet() ); - } - results.get( gene ).add( vc ); - } - - return results; - } - } \ No newline at end of file diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/common/auditAndSecurity/AuditEventDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/common/auditAndSecurity/AuditEventDaoImpl.java index 6bbe63666c..69c38a1606 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/common/auditAndSecurity/AuditEventDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/common/auditAndSecurity/AuditEventDaoImpl.java @@ -37,6 +37,8 @@ import java.util.function.Function; import java.util.stream.Collectors; +import static ubic.gemma.persistence.util.QueryUtils.optimizeParameterList; + /** * @author pavlidis * @see ubic.gemma.model.common.auditAndSecurity.AuditEvent @@ -174,7 +176,7 @@ public Map getCreateEvents( final Collection qr = queryObject.list(); for ( Object o : qr ) { Object[] ar = ( Object[] ) o; @@ -220,8 +222,8 @@ private Map getLastEvents( final Collection qr = queryObject.list(); for ( Object o : qr ) { diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/common/auditAndSecurity/AuditTrailDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/common/auditAndSecurity/AuditTrailDaoImpl.java index 266cf83fa1..35ea164a5b 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/common/auditAndSecurity/AuditTrailDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/common/auditAndSecurity/AuditTrailDaoImpl.java @@ -27,6 +27,8 @@ import java.util.Collection; import java.util.List; +import static ubic.gemma.persistence.util.QueryUtils.optimizeParameterList; + /** * @author pavlidis * @see AuditTrailDao @@ -54,18 +56,18 @@ public int removeByIds( Collection ids ) { if ( !aeIds.isEmpty() ) { getSessionFactory().getCurrentSession() .createQuery( "delete from AuditEvent ae where ae.id in :aeIds" ) - .setParameterList( "aeIds", aeIds ) + .setParameterList( "aeIds", optimizeParameterList( aeIds ) ) .executeUpdate(); } if ( !aetIds.isEmpty() ) { getSessionFactory().getCurrentSession() .createQuery( "delete from AuditEventType aet where aet.id in :aetIds" ) - .setParameterList( "aetIds", aetIds ) + .setParameterList( "aetIds", optimizeParameterList( aetIds ) ) .executeUpdate(); } return getSessionFactory().getCurrentSession() .createQuery( "delete from AuditTrail at where at.id in :atIds" ) - .setParameterList( "atIds", ids ) + .setParameterList( "atIds", optimizeParameterList( ids ) ) .executeUpdate(); } } \ No newline at end of file diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/common/description/BibliographicReferenceDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/common/description/BibliographicReferenceDaoImpl.java index fd27cfb838..b63a264b2b 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/common/description/BibliographicReferenceDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/common/description/BibliographicReferenceDaoImpl.java @@ -19,17 +19,19 @@ import org.hibernate.criterion.Restrictions; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Repository; -import ubic.basecode.util.BatchIterator; import ubic.gemma.model.common.description.BibliographicReference; import ubic.gemma.model.common.description.BibliographicReferenceValueObject; import ubic.gemma.model.common.description.DatabaseEntry; import ubic.gemma.model.expression.experiment.ExpressionExperiment; import ubic.gemma.persistence.service.AbstractVoEnabledDao; import ubic.gemma.persistence.util.BusinessKey; -import ubic.gemma.persistence.util.EntityUtils; +import ubic.gemma.persistence.util.HibernateUtils; import java.util.*; +import static ubic.gemma.persistence.util.QueryUtils.batchIdentifiableParameterList; +import static ubic.gemma.persistence.util.QueryUtils.optimizeIdentifiableParameterList; + /** * @author pavlidis * @see BibliographicReference @@ -39,9 +41,12 @@ public class BibliographicReferenceDaoImpl extends AbstractVoEnabledDao implements BibliographicReferenceDao { + private final int eeBatchSize; + @Autowired public BibliographicReferenceDaoImpl( SessionFactory sessionFactory ) { super( BibliographicReference.class, sessionFactory ); + this.eeBatchSize = HibernateUtils.getBatchSize( sessionFactory, sessionFactory.getClassMetadata( ExpressionExperiment.class ) ); } @Override @@ -89,8 +94,8 @@ public Collection thaw( Collection> getRelatedE Map> result = new HashMap<>(); - for ( Collection batch : BatchIterator.batches( records, 200 ) ) { + for ( Collection batch : batchIdentifiableParameterList( records, eeBatchSize ) ) { //noinspection unchecked List os = this.getSessionFactory().getCurrentSession().createQuery( query ) .setParameterList( "recs", batch ).list(); for ( Object[] o : os ) { ExpressionExperiment e = ( ExpressionExperiment ) o[0]; BibliographicReference b = ( BibliographicReference ) o[1]; - if ( !result.containsKey( b ) ) { - result.put( b, new HashSet() ); - } - result.get( b ).add( e ); + result.computeIfAbsent( b, k -> new HashSet<>() ).add( e ); } } return result; diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/common/description/CharacteristicDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/common/description/CharacteristicDaoImpl.java index b1d7b5933c..dff2441f64 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/common/description/CharacteristicDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/common/description/CharacteristicDaoImpl.java @@ -18,7 +18,6 @@ */ package ubic.gemma.persistence.service.common.description; -import org.apache.commons.collections4.ListUtils; import org.apache.commons.lang3.StringUtils; import org.hibernate.Hibernate; import org.hibernate.Query; @@ -50,6 +49,8 @@ import java.util.stream.Collectors; import static ubic.gemma.persistence.service.TableMaintenanceUtil.EE2C_QUERY_SPACE; +import static ubic.gemma.persistence.util.QueryUtils.batchParameterList; +import static ubic.gemma.persistence.util.QueryUtils.optimizeParameterList; /** * @author Luke @@ -173,7 +174,7 @@ private Query prepareExperimentsByUrisQuery( Collection uris, @Nullable query.setParameter( "bmClass", BioMaterial.class ); } - query.setParameterList( "uris", uris ); + query.setParameterList( "uris", optimizeParameterList( uris ) ); if ( taxon != null ) { query.setParameter( "taxonId", taxon.getId() ); @@ -198,7 +199,7 @@ public Collection findByUri( Collection uris ) { .sorted() .collect( Collectors.toList() ); - for ( List batch : ListUtils.partition( uniqueUris, 100 ) ) { + for ( Collection batch : batchParameterList( uniqueUris, getBatchSize() ) ) { //noinspection unchecked results.addAll( this.getSessionFactory().getCurrentSession() .createQuery( "from Characteristic where valueUri in (:uris)" ) @@ -242,7 +243,7 @@ public Map countCharacteristicsByValueUriGroupedByNormalizedValue( .createQuery( "select lower(coalesce(char.valueUri, char.value)), count(char) from Characteristic char " + "where char.valueUri in :uris " + "group by coalesce(char.valueUri, char.value)" ) - .setParameterList( "uris", uniqueUris ) + .setParameterList( "uris", optimizeParameterList( uniqueUris ) ) .list() ) .stream() .collect( Collectors.toMap( row -> ( String ) row[0], row -> ( Long ) row[1] ) ); @@ -326,7 +327,7 @@ public Map getParents( Collection + "where C.ID in :ids " + "and (I.class is NULL or I.class = 'ExpressionExperiment') " // for investigations, only retrieve EEs + extraClause ) - .setParameterList( "ids", characteristicIds ) + .setParameterList( "ids", optimizeParameterList( characteristicIds ) ) .setMaxResults( maxResults ) .list(); Set characteristicsNotFound = new HashSet<>(); @@ -377,9 +378,11 @@ public Map getParents( Collection // } if ( efOK ) { + //noinspection unchecked List efResults = getSessionFactory().getCurrentSession() .createQuery( "select ef, ef.category from ExperimentalFactor ef where ef.category in :characteristics" ) - .setParameterList( "characteristics", characteristicsNotFound ).list(); + .setParameterList( "characteristics", optimizeParameterList( characteristicsNotFound ) ) + .list(); for ( Object[] row : efResults ) { charToParent.put( ( Characteristic ) row[1], ( Identifiable ) row[0] ); diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/common/quantitationtype/QuantitationTypeDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/common/quantitationtype/QuantitationTypeDaoImpl.java index 028f05f9d8..60b24916e9 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/common/quantitationtype/QuantitationTypeDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/common/quantitationtype/QuantitationTypeDaoImpl.java @@ -40,6 +40,8 @@ import java.util.Set; import java.util.stream.Collectors; +import static ubic.gemma.persistence.util.QueryUtils.optimizeParameterList; + /** *

* Base Spring DAO Class: is able to create, update, remove, load, and find objects of type @@ -180,7 +182,7 @@ private void populateVectorType( Collection quantit List qtIds = getSessionFactory().getCurrentSession() .createQuery( "select distinct v.quantitationType.id from " + vectorType.getName() + " v where v.expressionExperiment = :ee and v.quantitationType.id in :ids" ) .setParameter( "ee", ee ) - .setParameterList( "ids", ids ) + .setParameterList( "ids", optimizeParameterList( ids ) ) .list(); qtIds.forEach( id -> vectorTypeById.add( id, vectorType ) ); } diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/arrayDesign/ArrayDesignDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/arrayDesign/ArrayDesignDaoImpl.java index 85a43136cb..dc068ddba2 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/arrayDesign/ArrayDesignDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/arrayDesign/ArrayDesignDaoImpl.java @@ -53,6 +53,7 @@ import java.util.stream.Collectors; import static ubic.gemma.persistence.service.TableMaintenanceUtil.GENE2CS_QUERY_SPACE; +import static ubic.gemma.persistence.util.QueryUtils.optimizeParameterList; /** * @author pavlidis @@ -294,7 +295,7 @@ public Map> getAuditEvents( Collection ids ) //noinspection unchecked List list = this.getSessionFactory().getCurrentSession().createQuery( queryString ) - .setParameterList( "ids", ids ).list(); + .setParameterList( "ids", optimizeParameterList( ids ) ).list(); Map> eventMap = new HashMap<>(); for ( Object[] o : list ) { Long id = ( Long ) o[0]; @@ -439,12 +440,11 @@ public Map isMerged( Collection ids ) { if ( ids.isEmpty() ) { return Collections.emptyMap(); } - Set distinctIds = new HashSet<>( ids ); //noinspection unchecked,rawtypes Set mergedIds = new HashSet<>( this.getSessionFactory().getCurrentSession() .createQuery( "select ad.id from ArrayDesign as ad join ad.mergees subs where ad.id in (:ids) group by ad" ) - .setParameterList( "ids", distinctIds ).list() ); - return distinctIds.stream().collect( Collectors.toMap( id -> id, mergedIds::contains ) ); + .setParameterList( "ids", optimizeParameterList( ids ) ).list() ); + return ids.stream().distinct().collect( Collectors.toMap( id -> id, mergedIds::contains ) ); } @Override @@ -452,12 +452,11 @@ public Map isMergee( final Collection ids ) { if ( ids.isEmpty() ) { return Collections.emptyMap(); } - Set distinctIds = new HashSet<>( ids ); //noinspection unchecked,rawtypes Set mergeeIds = new HashSet<>( this.getSessionFactory().getCurrentSession() .createQuery( "select ad.id from ArrayDesign as ad where ad.mergedInto.id is not null and ad.id in (:ids)" ) - .setParameterList( "ids", distinctIds ).list() ); - return distinctIds.stream().collect( Collectors.toMap( id -> id, mergeeIds::contains ) ); + .setParameterList( "ids", optimizeParameterList( ids ) ).list() ); + return ids.stream().distinct().collect( Collectors.toMap( id -> id, mergeeIds::contains ) ); } @Override @@ -465,12 +464,11 @@ public Map isSubsumed( final Collection ids ) { if ( ids.isEmpty() ) { return Collections.emptyMap(); } - Set distinctIds = new HashSet<>( ids ); //noinspection unchecked,rawtypes Set subsumedIds = new HashSet<>( this.getSessionFactory().getCurrentSession() .createQuery( "select ad.id from ArrayDesign as ad where ad.subsumingArrayDesign.id is not null and ad.id in (:ids)" ) - .setParameterList( "ids", distinctIds ).list() ); - return distinctIds.stream().collect( Collectors.toMap( id -> id, subsumedIds::contains ) ); + .setParameterList( "ids", optimizeParameterList( ids ) ).list() ); + return ids.stream().distinct().collect( Collectors.toMap( id -> id, subsumedIds::contains ) ); } @Override @@ -478,12 +476,11 @@ public Map isSubsumer( Collection ids ) { if ( ids.isEmpty() ) { return Collections.emptyMap(); } - Set distinctIds = new HashSet<>( ids ); //noinspection unchecked,rawtypes Set subsumerIds = new HashSet<>( this.getSessionFactory().getCurrentSession() .createQuery( "select ad.id from ArrayDesign as ad join ad.subsumedArrayDesigns subs where ad.id in (:ids) group by ad" ) - .setParameterList( "ids", distinctIds ).list() ); - return distinctIds.stream().collect( Collectors.toMap( id -> id, subsumerIds::contains ) ); + .setParameterList( "ids", optimizeParameterList( ids ) ).list() ); + return ids.stream().distinct().collect( Collectors.toMap( id -> id, subsumerIds::contains ) ); } @Override @@ -572,7 +569,7 @@ public long numAllCompositeSequenceWithBioSequences( Collection ids ) { "select count (distinct cs) from CompositeSequence as cs inner join cs.arrayDesign as ar " + " where ar.id in (:ids) and cs.biologicalCharacteristic.sequence is not null"; return ( Long ) this.getSessionFactory().getCurrentSession().createQuery( queryString ) - .setParameterList( "ids", ids ).uniqueResult(); + .setParameterList( "ids", optimizeParameterList( ids ) ).uniqueResult(); } @Override @@ -594,7 +591,7 @@ public long numAllCompositeSequenceWithBlatResults( Collection ids ) { "select count (distinct cs) from CompositeSequence as cs inner join cs.arrayDesign as ar " + ", BlatResult as blat where blat.querySequence != null and ar.id in (:ids)"; return ( Long ) this.getSessionFactory().getCurrentSession().createQuery( queryString ) - .setParameterList( "ids", ids ).uniqueResult(); + .setParameterList( "ids", optimizeParameterList( ids ) ).uniqueResult(); } @Override @@ -619,7 +616,7 @@ public long numAllCompositeSequenceWithGenes( Collection ids ) { + "where bs2gp.bioSequence=cs.biologicalCharacteristic and " + "bs2gp.geneProduct=gp and ar.id in (:ids)"; return ( Long ) this.getSessionFactory().getCurrentSession().createQuery( queryString ) - .setParameterList( "ids", ids ).uniqueResult(); + .setParameterList( "ids", optimizeParameterList( ids ) ).uniqueResult(); } @Override @@ -644,7 +641,7 @@ public long numAllGenes( Collection ids ) { + "where bs2gp.bioSequence=cs.biologicalCharacteristic and " + "bs2gp.geneProduct=gp and ar.id in (:ids)"; return ( Long ) this.getSessionFactory().getCurrentSession().createQuery( queryString ) - .setParameterList( "ids", ids ).uniqueResult(); + .setParameterList( "ids", optimizeParameterList( ids ) ).uniqueResult(); } @Override diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/bioAssayData/AbstractDesignElementDataVectorDao.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/bioAssayData/AbstractDesignElementDataVectorDao.java index 51d61c3a92..9f56829005 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/bioAssayData/AbstractDesignElementDataVectorDao.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/bioAssayData/AbstractDesignElementDataVectorDao.java @@ -26,7 +26,6 @@ import ubic.gemma.model.expression.bioAssay.BioAssay; import ubic.gemma.model.expression.bioAssayData.BioAssayDimension; import ubic.gemma.model.expression.bioAssayData.DesignElementDataVector; -import ubic.gemma.model.expression.designElement.CompositeSequence; import ubic.gemma.model.expression.experiment.ExpressionExperiment; import ubic.gemma.persistence.service.AbstractDao; @@ -34,6 +33,8 @@ import java.util.HashSet; import java.util.Set; +import static ubic.gemma.persistence.util.QueryUtils.optimizeIdentifiableParameterList; + /** * @author pavlidis * @see ubic.gemma.model.expression.bioAssayData.DesignElementDataVector @@ -84,7 +85,7 @@ public void thaw( Collection designElementDataVectors ) { eeTimer.start(); this.getSessionFactory().getCurrentSession() .createQuery( "select ee from ExpressionExperiment ee where ee in :ees" ) - .setParameterList( "ees", ees ) + .setParameterList( "ees", optimizeIdentifiableParameterList( ees ) ) .list(); eeTimer.stop(); } @@ -101,7 +102,7 @@ public void thaw( Collection designElementDataVectors ) { + "left join fetch fv.experimentalFactor " + "fetch all properties " + "where bad in :dims" ) - .setParameterList( "dims", dims ) + .setParameterList( "dims", optimizeIdentifiableParameterList( dims ) ) .list(); dimTimer.stop(); } diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/bioAssayData/ProcessedExpressionDataVectorDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/bioAssayData/ProcessedExpressionDataVectorDaoImpl.java index 31eebd0d09..943ecde17b 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/bioAssayData/ProcessedExpressionDataVectorDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/bioAssayData/ProcessedExpressionDataVectorDaoImpl.java @@ -27,7 +27,6 @@ import org.springframework.stereotype.Repository; import ubic.basecode.dataStructure.matrix.DenseDoubleMatrix; import ubic.basecode.dataStructure.matrix.DoubleMatrix; -import ubic.basecode.util.BatchIterator; import ubic.gemma.core.analysis.preprocess.normalize.QuantileNormalizer; import ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix; import ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrixUtil; @@ -55,6 +54,8 @@ import java.util.*; import java.util.stream.Collectors; +import static ubic.gemma.persistence.util.QueryUtils.*; + /** * @author Paul */ @@ -302,9 +303,7 @@ public Map>> getRanks( } Map>> result = new HashMap<>(); - BatchIterator batchIterator = new BatchIterator<>( cs2gene.keySet(), 500 ); - - for ( Collection batch : batchIterator ) { + for ( Collection batch : batchIdentifiableParameterList( cs2gene.keySet(), 512 ) ) { //language=HQL //noinspection unchecked @@ -313,7 +312,7 @@ public Map>> getRanks( + "where dedv.designElement in ( :cs ) and dedv.expressionExperiment in (:ees) " + "group by dedv.designElement, dedv.expressionExperiment" ) .setParameter( "cs", batch ) - .setParameterList( "ees", expressionExperiments ) + .setParameterList( "ees", optimizeIdentifiableParameterList( expressionExperiments ) ) .list(); for ( Object[] o : qr ) { @@ -352,7 +351,7 @@ public Map> getRanks( ExpressionExperiment expressionEx "select dedv.designElement, dedv.rankByMean, dedv.rankByMax from ProcessedExpressionDataVector dedv " + "where dedv.designElement in (:cs) and dedv.expressionExperiment = :ee " + "group by dedv.designElement, dedv.expressionExperiment" ) - .setParameterList( "cs", cs2gene.keySet() ) + .setParameterList( "cs", optimizeIdentifiableParameterList( cs2gene.keySet() ) ) .setParameter( "ee", expressionExperiment ) .list(); @@ -416,7 +415,7 @@ public Map>> ge + "from ProcessedExpressionDataVector dedv " + "where dedv.designElement in (:cs) and dedv.expressionExperiment in (:ees) " + "group by dedv.designElement, dedv.expressionExperiment" ) - .setParameterList( "cs", cs2gene.keySet() ) + .setParameterList( "cs", optimizeIdentifiableParameterList( cs2gene.keySet() ) ) .setParameterList( "ees", expressionExperiments ) .list(); @@ -493,8 +492,8 @@ public void removeProcessedDataVectors( ExpressionExperiment expressionExperimen qtsToRemove.forEach( expressionExperiment.getQuantitationTypes()::remove ); this.getSessionFactory().getCurrentSession().update( expressionExperiment ); this.getSessionFactory().getCurrentSession() - .createQuery( "delete from QuantitationType where id in (:ids)" ) - .setParameterList( "ids", EntityUtils.getIds( qtsToRemove ) ); + .createQuery( "delete from QuantitationType qt where qt in (:qts)" ) + .setParameterList( "qts", optimizeIdentifiableParameterList( qtsToRemove ) ); } } @@ -726,7 +725,7 @@ private Map> getBioAssayDimensions( + "inner join bad.bioAssays badba " + "where e in (:ees) and b in (badba) " + "group by e, bad" ) - .setParameterList( "ees", ees ) + .setParameterList( "ees", optimizeIdentifiableParameterList( ees ) ) .list(); for ( Object[] o : r ) { @@ -1002,9 +1001,9 @@ private Map> getProcessedVectors "select dedv, dedv.designElement.id from ProcessedExpressionDataVector dedv fetch all properties" + " where dedv.designElement.id in ( :cs ) " + ( ees != null ? " and dedv.expressionExperiment in :ees" : "" ) ) - .setParameterList( "cs", cs2gene.keySet() ); + .setParameterList( "cs", optimizeParameterList( cs2gene.keySet() ) ); if ( ees != null ) { - queryObject.setParameterList( "ees", ees ); + queryObject.setParameterList( "ees", optimizeIdentifiableParameterList( ees ) ); } Map> dedv2genes = new HashMap<>(); //noinspection unchecked diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/bioAssayData/RawExpressionDataVectorDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/bioAssayData/RawExpressionDataVectorDaoImpl.java index a981915865..4757ec8eb0 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/bioAssayData/RawExpressionDataVectorDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/bioAssayData/RawExpressionDataVectorDaoImpl.java @@ -24,6 +24,8 @@ import java.util.Collection; import java.util.HashSet; +import static ubic.gemma.persistence.util.QueryUtils.optimizeIdentifiableParameterList; + /** * @author paul */ @@ -58,10 +60,9 @@ public Collection find( Collection d //noinspection unchecked return this.getSessionFactory().getCurrentSession().createQuery( "select dev from RawExpressionDataVector as dev " - + "join dev.designElement as de " // no need for the fetch jointures since the design elements and biological characteristics are already in the session - + "where de in (:des) and dev.quantitationType = :qt" ) - .setParameterList( "des", designElements ) + + "where dev.designElement in (:des) and dev.quantitationType = :qt" ) + .setParameterList( "des", optimizeIdentifiableParameterList( designElements ) ) .setParameter( "qt", quantitationType ) .list(); } diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/designElement/CompositeSequenceDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/designElement/CompositeSequenceDaoImpl.java index 6e57a336e9..5ec7076ecb 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/designElement/CompositeSequenceDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/designElement/CompositeSequenceDaoImpl.java @@ -21,12 +21,14 @@ import gemma.gsec.util.SecurityUtil; import org.apache.commons.lang3.time.StopWatch; -import org.hibernate.*; +import org.hibernate.Criteria; +import org.hibernate.Hibernate; +import org.hibernate.Query; +import org.hibernate.SessionFactory; import org.hibernate.criterion.Restrictions; import org.hibernate.type.StandardBasicTypes; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Repository; -import ubic.basecode.util.BatchIterator; import ubic.gemma.model.association.BioSequence2GeneProduct; import ubic.gemma.model.common.description.DatabaseEntry; import ubic.gemma.model.expression.arrayDesign.ArrayDesign; @@ -45,7 +47,10 @@ import javax.annotation.Nullable; import java.util.*; +import static ubic.gemma.persistence.service.TableMaintenanceUtil.GENE2CS_BATCH_SIZE; import static ubic.gemma.persistence.service.TableMaintenanceUtil.GENE2CS_QUERY_SPACE; +import static ubic.gemma.persistence.util.QueryUtils.batchIdentifiableParameterList; +import static ubic.gemma.persistence.util.QueryUtils.batchParameterList; /** * @author pavlidis @@ -54,7 +59,8 @@ public class CompositeSequenceDaoImpl extends AbstractQueryFilteringVoEnabledDao implements CompositeSequenceDao { - private static final int PROBE_TO_GENE_MAP_BATCH_SIZE = 2000; + private static final int PROBE_TO_GENE_MAP_BATCH_SIZE = 2048; + /** * Absolute maximum number of records to return when fetching raw summaries. This is necessary to avoid retrieving * millions of records (some sequences are repeats and can have >200,000 records. @@ -236,42 +242,27 @@ public CompositeSequence findByName( ArrayDesign arrayDesign, final String name public Map> getGenes( Collection compositeSequences ) { Map> returnVal = new HashMap<>(); - int BATCH_SIZE = 2000; - - if ( compositeSequences.size() == 0 ) + if ( compositeSequences.isEmpty() ) return returnVal; - /* - * Get the cs->gene mapping - */ - final String nativeQuery = "SELECT CS, GENE FROM GENE2CS WHERE CS IN (:csids) "; - for ( CompositeSequence cs : compositeSequences ) { - returnVal.put( cs, new HashSet() ); + returnVal.put( cs, new HashSet<>() ); } + /* + * Get the cs->gene mapping + */ List csGene = new ArrayList<>(); - Session session = this.getSessionFactory().getCurrentSession(); - org.hibernate.SQLQuery queryObject = session.createSQLQuery( nativeQuery ); - queryObject.addScalar( "cs", StandardBasicTypes.LONG ); - queryObject.addScalar( "gene", StandardBasicTypes.LONG ); - queryObject.addSynchronizedQuerySpace( GENE2CS_QUERY_SPACE ); - queryObject.addSynchronizedEntityClass( ArrayDesign.class ); - queryObject.addSynchronizedEntityClass( CompositeSequence.class ); - queryObject.addSynchronizedEntityClass( Gene.class ); - - Collection csIdBatch = new HashSet<>(); - for ( CompositeSequence cs : compositeSequences ) { - csIdBatch.add( cs.getId() ); - - if ( csIdBatch.size() == BATCH_SIZE ) { - queryObject.setParameterList( "csids", csIdBatch ); - csGene.addAll( queryObject.list() ); - csIdBatch.clear(); - } - } - - if ( csIdBatch.size() > 0 ) { + Query queryObject = this.getSessionFactory().getCurrentSession() + .createSQLQuery( "SELECT CS, GENE FROM GENE2CS WHERE CS IN (:csids)" ) + .addScalar( "cs", StandardBasicTypes.LONG ) + .addScalar( "gene", StandardBasicTypes.LONG ) + .addSynchronizedQuerySpace( GENE2CS_QUERY_SPACE ) + .addSynchronizedEntityClass( ArrayDesign.class ) + .addSynchronizedEntityClass( CompositeSequence.class ) + .addSynchronizedEntityClass( Gene.class ); + + for ( Collection csIdBatch : batchParameterList( EntityUtils.getIds( compositeSequences ), GENE2CS_BATCH_SIZE ) ) { queryObject.setParameterList( "csids", csIdBatch ); csGene.addAll( queryObject.list() ); } @@ -305,25 +296,12 @@ public Map> getGenes( Collection batch = new HashSet<>(); Collection genes = new HashSet<>(); String geneQuery = "from Gene g where g.id in ( :gs )"; - - org.hibernate.Query geneQueryObject = this.getSessionFactory().getCurrentSession().createQuery( geneQuery ) - .setFetchSize( 1000 ); - - for ( Long gene : genesToFetch ) { - batch.add( gene ); - if ( batch.size() == BATCH_SIZE ) { - AbstractDao.log.debug( "Processing batch ... " ); - geneQueryObject.setParameterList( "gs", batch ); - //noinspection unchecked - genes.addAll( geneQueryObject.list() ); - batch.clear(); - } - } - - if ( batch.size() > 0 ) { + org.hibernate.Query geneQueryObject = this.getSessionFactory().getCurrentSession() + .createQuery( geneQuery ); + for ( Collection batch : batchParameterList( genesToFetch, GENE2CS_BATCH_SIZE ) ) { + AbstractDao.log.debug( "Processing batch ... " ); geneQueryObject.setParameterList( "gs", batch ); //noinspection unchecked genes.addAll( geneQueryObject.list() ); @@ -397,14 +375,10 @@ public Map> getGenesWithS + " composite sequences" ); Map> results = new HashMap<>(); - BatchIterator it = BatchIterator - .batches( compositeSequences, CompositeSequenceDaoImpl.PROBE_TO_GENE_MAP_BATCH_SIZE ); - StopWatch timer = new StopWatch(); timer.start(); int total = 0; - for ( ; it.hasNext(); ) { - Collection batch = it.next(); + for ( Collection batch : batchIdentifiableParameterList( compositeSequences, CompositeSequenceDaoImpl.PROBE_TO_GENE_MAP_BATCH_SIZE ) ) { this.batchGetGenesWithSpecificity( batch, results ); total += batch.size(); } diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/BlacklistedEntityDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/BlacklistedEntityDaoImpl.java index c50b11c44d..ec05035ee3 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/BlacklistedEntityDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/BlacklistedEntityDaoImpl.java @@ -39,6 +39,8 @@ import java.util.Set; import java.util.stream.Collectors; +import static ubic.gemma.persistence.util.QueryUtils.optimizeParameterList; + /** * * @author paul @@ -142,7 +144,7 @@ public int removeAll() { if ( !deIds.isEmpty() ) { removedDe = getSessionFactory().getCurrentSession() .createQuery( "delete from DatabaseEntry where id in :deIds" ) - .setParameterList( "deIds", deIds ) + .setParameterList( "deIds", optimizeParameterList( deIds ) ) .executeUpdate(); } else { removedDe = 0; diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java index a4769483e6..b10d03a01b 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java @@ -67,6 +67,8 @@ import static java.util.stream.Collectors.summingLong; import static ubic.gemma.persistence.service.TableMaintenanceUtil.EE2AD_QUERY_SPACE; import static ubic.gemma.persistence.service.TableMaintenanceUtil.EE2C_QUERY_SPACE; +import static ubic.gemma.persistence.util.QueryUtils.optimizeIdentifiableParameterList; +import static ubic.gemma.persistence.util.QueryUtils.optimizeParameterList; /** * @author pavlidis @@ -133,7 +135,7 @@ public Collection filterByTaxon( @Nullable Collection ids, Taxon tax //noinspection unchecked return this.getSessionFactory().getCurrentSession().createQuery( queryString ).setParameter( "taxon", taxon ) - .setParameterList( "ids", ids ).list(); + .setParameterList( "ids", optimizeParameterList( ids ) ).list(); } @Override @@ -247,44 +249,21 @@ public ExpressionExperiment findByBioMaterial( BioMaterial bm ) { @Override public Map findByBioMaterials( Collection bms ) { - if ( bms.size() == 0 ) { + if ( bms.isEmpty() ) { return new HashMap<>(); } - //language=HQL - final String queryString = "select ee, sample from ExpressionExperiment as ee " - + "inner join ee.bioAssays as ba inner join ba.sampleUsed as sample where sample in (:bms) group by ee, sample"; - + //noinspection unchecked + List r = this.getSessionFactory().getCurrentSession() + .createQuery( "select ee, sample from ExpressionExperiment as ee " + + "inner join ee.bioAssays as ba inner join ba.sampleUsed as sample where sample in (:bms) group by ee, sample" ) + .setParameterList( "bms", optimizeIdentifiableParameterList( bms ) ) + .list(); Map results = new HashMap<>(); - Collection batch = new HashSet<>(); - - for ( BioMaterial o : bms ) { - batch.add( o ); - if ( batch.size() == ExpressionExperimentDaoImpl.BATCH_SIZE ) { - - //noinspection unchecked - List r = this.getSessionFactory().getCurrentSession().createQuery( queryString ) - .setParameterList( "bms", batch ).list(); - for ( Object a : r ) { - ExpressionExperiment e = ( ExpressionExperiment ) ( ( Object[] ) a )[0]; - BioMaterial b = ( BioMaterial ) ( ( Object[] ) a )[1]; // representative, there may have been multiple used as inputs - results.put( e, b ); - } - batch.clear(); - } - } - - if ( batch.size() > 0 ) { - - //noinspection unchecked - List r = this.getSessionFactory().getCurrentSession().createQuery( queryString ) - .setParameterList( "bms", batch ).list(); - for ( Object a : r ) { - ExpressionExperiment e = ( ExpressionExperiment ) ( ( Object[] ) a )[0]; - BioMaterial b = ( BioMaterial ) ( ( Object[] ) a )[1]; // representative, there may have been multiple used as inputs - results.put( e, b ); - } + for ( Object[] a : r ) { + ExpressionExperiment e = ( ExpressionExperiment ) a[0]; + BioMaterial b = ( BioMaterial ) a[1]; // representative, there may have been multiple used as inputs + results.put( e, b ); } - return results; } @@ -321,19 +300,13 @@ public ExpressionExperiment findByDesign( ExperimentalDesign ed ) { @Override public ExpressionExperiment findByFactor( ExperimentalFactor ef ) { - //language=HQL - final String queryString = - "select ee from ExpressionExperiment as ee inner join ee.experimentalDesign ed " - + "inner join ed.experimentalFactors ef where ef = :ef group by ee"; - - List results = this.getSessionFactory().getCurrentSession().createQuery( queryString ).setParameter( "ef", ef ) - .list(); - - if ( results.size() == 0 ) { - AbstractDao.log.info( "There is no expression experiment that has factor = " + ef ); - return null; - } - return ( ExpressionExperiment ) results.iterator().next(); + return ( ExpressionExperiment ) this.getSessionFactory().getCurrentSession() + .createQuery( "select distinct ee from ExpressionExperiment as ee " + + "join ee.experimentalDesign ed " + + "join ed.experimentalFactors ef " + + "where ef = :ef" ) + .setParameter( "ef", ef ) + .uniqueResult(); } @Override @@ -343,64 +316,32 @@ public ExpressionExperiment findByFactorValue( FactorValue fv ) { @Override public ExpressionExperiment findByFactorValue( Long factorValueId ) { - //language=HQL - final String queryString = - "select ee from ExpressionExperiment as ee inner join ee.experimentalDesign ed " - + "inner join ed.experimentalFactors ef inner join ef.factorValues fv where fv.id = :fvId group by ee"; - - //noinspection unchecked - List results = this.getSessionFactory().getCurrentSession().createQuery( queryString ) - .setParameter( "fvId", factorValueId ).list(); - - if ( results.size() == 0 ) { - return null; - } - - return results.get( 0 ); + return ( ExpressionExperiment ) this.getSessionFactory().getCurrentSession() + .createQuery( "select distinct ee from ExpressionExperiment as ee " + + "join ee.experimentalDesign ed " + + "join ed.experimentalFactors ef " + + "join ef.factorValues fv " + + "where fv.id = :fvId" ) + .setParameter( "fvId", factorValueId ) + .uniqueResult(); } @Override public Map findByFactorValues( Collection fvs ) { - if ( fvs.isEmpty() ) return new HashMap<>(); - - //language=HQL - final String queryString = "select ee, f from ExpressionExperiment ee " - + " join ee.experimentalDesign ed join ed.experimentalFactors ef join ef.factorValues f" - + " where f in (:fvs) group by ee, f"; Map results = new HashMap<>(); - Collection batch = new HashSet<>(); - for ( FactorValue o : fvs ) { - batch.add( o ); - if ( batch.size() == ExpressionExperimentDaoImpl.BATCH_SIZE ) { - - //noinspection unchecked - List r2 = this.getSessionFactory().getCurrentSession().createQuery( queryString ) - .setParameterList( "fvs", batch ).list(); - for ( Object o1 : r2 ) { - Object[] a = ( Object[] ) o1; - results.put( ( ExpressionExperiment ) a[0], ( FactorValue ) a[1] ); - } - - batch.clear(); - } - } - - if ( batch.size() > 0 ) { - - //noinspection unchecked - List r2 = this.getSessionFactory().getCurrentSession().createQuery( queryString ) - .setParameterList( "fvs", batch ).list(); - for ( Object o1 : r2 ) { - Object[] a = ( Object[] ) o1; - results.put( ( ExpressionExperiment ) a[0], ( FactorValue ) a[1] ); - } - + //noinspection unchecked + List r2 = this.getSessionFactory().getCurrentSession() + .createQuery( "select ee, f from ExpressionExperiment ee " + + "join ee.experimentalDesign ed join ed.experimentalFactors ef join ef.factorValues f " + + "where f in (:fvs) group by ee, f" ) + .setParameterList( "fvs", optimizeIdentifiableParameterList( fvs ) ) + .list(); + for ( Object[] row : r2 ) { + results.put( ( ExpressionExperiment ) row[0], ( FactorValue ) row[1] ); } - return results; - } @Override @@ -470,7 +411,7 @@ public List findByUpdatedLimit( Collection ids, int String queryString = "select e from ExpressionExperiment e join e.curationDetails s where e.id in (:ids) order by s.lastUpdated desc "; Query q = s.createQuery( queryString ); - q.setParameterList( "ids", ids ); + q.setParameterList( "ids", optimizeParameterList( ids ) ); q.setMaxResults( limit ); //noinspection unchecked @@ -513,9 +454,14 @@ public Map getAnnotationCounts( Collection ids ) { if ( ids.size() == 0 ) { return results; } - String queryString = "select e.id,count(c.id) from ExpressionExperiment e inner join e.characteristics c where e.id in (:ids) group by e.id"; - List res = this.getSessionFactory().getCurrentSession().createQuery( queryString ) - .setParameterList( "ids", ids ).list(); + //noinspection unchecked + List res = this.getSessionFactory().getCurrentSession() + .createQuery( "select e.id, count(c.id) from ExpressionExperiment e " + + "join e.characteristics c " + + "where e.id in (:ids) " + + "group by e" ) + .setParameterList( "ids", optimizeParameterList( ids ) ) + .list(); for ( Object[] ro : res ) { Long id = ( Long ) ro[0]; @@ -724,16 +670,16 @@ public Map getCategoriesUsageFrequency( @Nullable Collecti .setCacheable( true ) .setMaxResults( maxResults ); if ( eeIds != null ) { - q.setParameterList( "eeIds", new HashSet<>( eeIds ) ); + q.setParameterList( "eeIds", optimizeParameterList( eeIds ) ); } if ( excludedCategoryUris != null && !excludedCategoryUris.isEmpty() ) { - q.setParameterList( "excludedCategoryUris", excludedCategoryUris ); + q.setParameterList( "excludedCategoryUris", optimizeParameterList( excludedCategoryUris ) ); } if ( excludedTermUris != null && !excludedTermUris.isEmpty() ) { - q.setParameterList( "excludedTermUris", excludedTermUris ); + q.setParameterList( "excludedTermUris", optimizeParameterList( excludedTermUris ) ); } if ( retainedTermUris != null && !retainedTermUris.isEmpty() ) { - q.setParameterList( "retainedTermUris", retainedTermUris ); + q.setParameterList( "retainedTermUris", optimizeParameterList( retainedTermUris ) ); } EE2CAclQueryUtils.addAclParameters( q, ExpressionExperiment.class ); //noinspection unchecked @@ -829,19 +775,19 @@ else if ( category.startsWith( "http://" ) ) { .setCacheable( true ) .setMaxResults( maxResults ); if ( eeIds != null ) { - q.setParameterList( "eeIds", new HashSet<>( eeIds ) ); + q.setParameterList( "eeIds", optimizeParameterList( eeIds ) ); } if ( category != null && !category.equals( UNCATEGORIZED ) ) { q.setParameter( "category", category ); } if ( excludedCategoryUris != null && !excludedCategoryUris.isEmpty() ) { - q.setParameterList( "excludedCategoryUris", excludedCategoryUris ); + q.setParameterList( "excludedCategoryUris", optimizeParameterList( excludedCategoryUris ) ); } if ( excludedTermUris != null && !excludedTermUris.isEmpty() ) { - q.setParameterList( "excludedTermUris", excludedTermUris ); + q.setParameterList( "excludedTermUris", optimizeParameterList( excludedTermUris ) ); } if ( retainedTermUris != null && !retainedTermUris.isEmpty() ) { - q.setParameterList( "retainedTermUris", retainedTermUris ); + q.setParameterList( "retainedTermUris", optimizeParameterList( retainedTermUris ) ); } if ( level != null ) { q.setParameter( "level", level ); @@ -993,7 +939,7 @@ public Map getTechnologyTypeUsageFrequency( Collection getPlatformsUsageFrequency( @Nullable Collection< .addSynchronizedEntityClass( ArrayDesign.class ); query.setParameter( "original", original ); if ( eeIds != null ) { - query.setParameterList( "ids", eeIds ); + query.setParameterList( "ids", optimizeParameterList( eeIds ) ); } EE2CAclQueryUtils.addAclParameters( query, ExpressionExperiment.class ); EE2CAclQueryUtils.addAclParameters( query, ExpressionExperiment.class ); @@ -1075,22 +1021,19 @@ private Map getPlatformsUsageFrequency( @Nullable Collection< @Override public Map> getAuditEvents( Collection ids ) { - //language=HQL - final String queryString = - "select ee.id, auditEvent from ExpressionExperiment ee inner join ee.auditTrail as auditTrail inner join auditTrail.events as auditEvent " - + " where ee.id in (:ids) "; - - List result = this.getSessionFactory().getCurrentSession().createQuery( queryString ) - .setParameterList( "ids", ids ).list(); + //noinspection unchecked + List result = this.getSessionFactory().getCurrentSession() + .createQuery( "select ee.id, auditEvent from ExpressionExperiment ee " + + "join ee.auditTrail as auditTrail " + + "join auditTrail.events as auditEvent " + + "where ee.id in (:ids) " ) + .setParameterList( "ids", optimizeParameterList( ids ) ) + .list(); Map> eventMap = new HashMap<>(); - for ( Object o : result ) { - Object[] row = ( Object[] ) o; - Long id = ( Long ) row[0]; - AuditEvent event = ( AuditEvent ) row[1]; - - this.addEventsToMap( eventMap, id, event ); + for ( Object[] row : result ) { + this.addEventsToMap( eventMap, ( Long ) row[0], ( AuditEvent ) row[1] ); } // add in expression experiment ids that do not have events. Set // their values to null. @@ -1151,39 +1094,35 @@ public Collection getExperimentsWithOutliers() { @Override public Map getLastArrayDesignUpdate( Collection expressionExperiments ) { - //language=HQL - final String queryString = "select ee.id, max(s.lastUpdated) from ExpressionExperiment as ee inner join " - + "ee.bioAssays b inner join b.arrayDesignUsed a join a.curationDetails s " - + " where ee in (:ees) group by ee.id "; - - List res = this.getSessionFactory().getCurrentSession().createQuery( queryString ) - .setParameterList( "ees", expressionExperiments ).list(); - + if ( expressionExperiments.isEmpty() ) { + return Collections.emptyMap(); + } + //noinspection unchecked + List res = this.getSessionFactory().getCurrentSession() + .createQuery( "select ee.id, max(s.lastUpdated) from ExpressionExperiment as ee " + + "inner join ee.bioAssays b " + + "join b.arrayDesignUsed a " + + "join a.curationDetails s " + + "where ee in (:ees) " + + "group by ee.id" ) + .setParameterList( "ees", optimizeIdentifiableParameterList( expressionExperiments ) ) + .list(); assert ( !res.isEmpty() ); - Map result = new HashMap<>(); - for ( Object o : res ) { - Object[] oa = ( Object[] ) o; - Long id = ( Long ) oa[0]; - Date d = ( Date ) oa[1]; - result.put( id, d ); + for ( Object[] row : res ) { + result.put( ( Long ) row[0], ( Date ) row[1] ); } return result; } @Override public Date getLastArrayDesignUpdate( ExpressionExperiment ee ) { - - //language=HQL - final String queryString = "select max(s.lastUpdated) from ExpressionExperiment as ee inner join " - + "ee.bioAssays b inner join b.arrayDesignUsed a join a.curationDetails s " + " where ee = :ee "; - - List res = this.getSessionFactory().getCurrentSession().createQuery( queryString ).setParameter( "ee", ee ) - .list(); - - assert ( !res.isEmpty() ); - - return ( Date ) res.iterator().next(); + return ( Date ) this.getSessionFactory().getCurrentSession() + .createQuery( "select max(s.lastUpdated) from ExpressionExperiment as ee " + + "join ee.bioAssays b join b.arrayDesignUsed a join a.curationDetails s " + + "where ee = :ee" ) + .setParameter( "ee", ee ) + .uniqueResult(); } @Override @@ -1219,7 +1158,7 @@ public Map getPerTaxonCount( List ids ) { + "where ee.id in :eeIds " + "group by ee.taxon " + "order by EE_COUNT desc" ) - .setParameterList( "eeIds", ids ) + .setParameterList( "eeIds", optimizeParameterList( ids ) ) .list(); return list.stream() .collect( Collectors.toMap( row -> ( Taxon ) row[0], row -> ( Long ) row[1] ) ); @@ -1240,7 +1179,7 @@ public Map getPopulatedFactorCounts( Collection ids ) { + "ef where e.id in (:ids) group by e.id"; //noinspection unchecked List res = this.getSessionFactory().getCurrentSession().createQuery( queryString ) - .setParameterList( "ids", ids ).list(); + .setParameterList( "ids", optimizeParameterList( ids ) ).list(); for ( Object[] ro : res ) { Long id = ( Long ) ro[0]; @@ -1267,7 +1206,7 @@ public Map getPopulatedFactorCountsExcludeBatch( Collection id //noinspection unchecked List res = this.getSessionFactory().getCurrentSession().createQuery( queryString ) - .setParameterList( "ids", ids ) // Set ids + .setParameterList( "ids", optimizeParameterList( ids ) ) // Set ids .setParameter( "category", ExperimentalFactorService.BATCH_FACTOR_CATEGORY_NAME ) // Set batch category .setParameter( "name", ExperimentalFactorService.BATCH_FACTOR_NAME ) // set batch name .list(); @@ -1350,7 +1289,7 @@ public Map> getSampleRemovalEvents( Map> result = new HashMap<>(); List r = this.getSessionFactory().getCurrentSession().createQuery( queryString ) - .setParameterList( "ees", expressionExperiments ).list(); + .setParameterList( "ees", optimizeIdentifiableParameterList( expressionExperiments ) ).list(); for ( Object o : r ) { Object[] ol = ( Object[] ) o; @@ -1401,7 +1340,7 @@ public Map getTaxa( Collection bioAssaySets // FIXME: this query cannot be made cacheable because the taxon is not initialized when retrieved from the cache, defeating the purpose of caching altogether //noinspection unchecked List list = this.getSessionFactory().getCurrentSession().createQuery( queryString ) - .setParameterList( "ees", bioAssaySets ) + .setParameterList( "ees", optimizeIdentifiableParameterList( bioAssaySets ) ) .list(); //noinspection unchecked @@ -1467,7 +1406,7 @@ private Map> getExpressionExperimentDetai + "where ee.id in :eeIds " // FIXME: apply ACLs, other parts or platform might be private + "group by ee, ad, op, oe" ) - .setParameterList( "eeIds", expressionExperimentIds ) + .setParameterList( "eeIds", optimizeParameterList( expressionExperimentIds ) ) .setCacheable( cacheable ) .list(); return results.stream().collect( @@ -1491,7 +1430,7 @@ public List loadWithRelationsAndCache( List ids ) { + "left join s.lastTroubledEvent as eTrbl " + "left join ee.geeq as geeq " + "where ee.id in :ids" ) - .setParameterList( "ids", ids ) + .setParameterList( "ids", optimizeParameterList( ids ) ) .setCacheable( true ) // this transformer performs initialization of cached results .setResultTransformer( getEntityTransformer() ) @@ -2208,7 +2147,7 @@ private void populateArrayDesignCount( Collection results = getSessionFactory().getCurrentSession() .createQuery( "select ee.id, count(distinct ba.arrayDesignUsed) from ExpressionExperiment ee left join ee.bioAssays as ba where ee.id in (:ids) group by ee" ) - .setParameterList( "ids", EntityUtils.getIds( eevos ) ) + .setParameterList( "ids", optimizeParameterList( EntityUtils.getIds( eevos ) ) ) .list(); Map adCountById = results.stream().collect( Collectors.toMap( row -> ( Long ) row[0], row -> ( Long ) row[1] ) ); for ( ExpressionExperimentValueObject eevo : eevos ) { diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/FactorValueDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/FactorValueDaoImpl.java index 0bac3c746d..6c87900630 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/FactorValueDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/FactorValueDaoImpl.java @@ -37,6 +37,8 @@ import java.util.Set; import java.util.stream.Collectors; +import static ubic.gemma.persistence.util.QueryUtils.optimizeParameterList; + /** *

* Base Spring DAO Class: is able to create, update, remove, load, and find objects of type @@ -88,7 +90,7 @@ public Map loadIdsWithNumberOfOldStyleCharacteristics( Set //noinspection unchecked result = ( List ) this.getSessionFactory().getCurrentSession() .createQuery( "select fv.id, size(fv.oldStyleCharacteristics) from FactorValue fv where fv.id not in :ids group by fv order by id" ) - .setParameterList( "ids", excludedIds ) + .setParameterList( "ids", optimizeParameterList( excludedIds ) ) .list(); } return result.stream().collect( Collectors.toMap( row -> ( Long ) row[0], row -> ( Integer ) row[1] ) ); diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/genome/GeneDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/genome/GeneDaoImpl.java index 60037a9108..98610cad04 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/genome/GeneDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/genome/GeneDaoImpl.java @@ -27,7 +27,6 @@ import org.springframework.beans.factory.InitializingBean; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Repository; -import ubic.basecode.util.BatchIterator; import ubic.gemma.model.common.Describable; import ubic.gemma.model.common.Identifiable; import ubic.gemma.model.common.description.DatabaseEntry; @@ -48,6 +47,9 @@ import java.util.*; import java.util.stream.Collectors; +import static ubic.gemma.persistence.util.QueryUtils.batchParameterList; +import static ubic.gemma.persistence.util.QueryUtils.optimizeParameterList; + /** * Base Spring DAO Class: is able to create, update, remove, load, and find objects of type Gene. * @@ -177,7 +179,7 @@ public Map findByOfficialSymbols( Collection query, Long t //language=HQL final String queryString = "select g from Gene as g join fetch g.taxon t where g.officialSymbol in (:symbols) and t.id = :taxonId"; - for ( Collection batch : new BatchIterator<>( query, GeneDaoImpl.BATCH_SIZE ) ) { + for ( Collection batch : batchParameterList( query, getBatchSize() ) ) { //noinspection unchecked List results = this.getSessionFactory().getCurrentSession() .createQuery( queryString ) @@ -197,7 +199,7 @@ public Map findByNcbiIds( Collection ncbiIds ) { //language=HQL final String queryString = "from Gene g where g.ncbiGeneId in (:ncbi)"; - for ( Collection batch : new BatchIterator<>( ncbiIds, GeneDaoImpl.BATCH_SIZE ) ) { + for ( Collection batch : batchParameterList( ncbiIds, getBatchSize() ) ) { //noinspection unchecked List results = this.getSessionFactory().getCurrentSession() .createQuery( queryString ) @@ -332,8 +334,14 @@ public List loadThawed( Collection ids ) { return result; StopWatch timer = new StopWatch(); timer.start(); - for ( Collection batch : new BatchIterator<>( ids, GeneDaoImpl.BATCH_SIZE ) ) { - result.addAll( this.doLoadThawedLite( batch ) ); + for ( Collection batch : batchParameterList( ids, getBatchSize() ) ) { + //noinspection unchecked + result.addAll( this.getSessionFactory().getCurrentSession().createQuery( + "select distinct g from Gene g left join fetch g.aliases left join fetch g.accessions acc " + + "join fetch g.taxon t left join fetch g.products gp left join fetch g.multifunctionality " + + "where g.id in (:gIds)" ) + .setParameterList( "gIds", batch ) + .list() ); } if ( timer.getTime() > 1000 ) { AbstractDao.log.debug( "Load+thawRawAndProcessed " + result.size() + " genes: " + timer.getTime() + "ms" ); @@ -349,8 +357,12 @@ public Collection loadThawedLiter( Collection ids ) { return result; StopWatch timer = new StopWatch(); timer.start(); - for ( Collection batch : new BatchIterator<>( ids, GeneDaoImpl.BATCH_SIZE ) ) { - result.addAll( this.doLoadThawedLiter( batch ) ); + for ( Collection batch : batchParameterList( ids, getBatchSize() ) ) { + //noinspection unchecked + result.addAll( this.getSessionFactory().getCurrentSession() + .createQuery( "select g from Gene g join fetch g.taxon t where g.id in (:gIds)" ) + .setParameterList( "gIds", batch ) + .list() ); } if ( timer.getTime() > 1000 ) { AbstractDao.log.debug( "Load+thawRawAndProcessed " + result.size() + " genes: " + timer.getTime() + "ms" ); @@ -391,22 +403,10 @@ public Gene thawAliases( final Gene gene ) { public Collection thawLite( final Collection genes ) { if ( genes.isEmpty() ) return new HashSet<>(); - Collection result = new HashSet<>(); - Collection batch = new HashSet<>(); - - for ( Gene g : genes ) { - batch.add( g ); - if ( batch.size() == GeneDaoImpl.BATCH_SIZE ) { - result.addAll( this.loadThawed( EntityUtils.getIds( batch ) ) ); - batch.clear(); - } - } - - if ( !batch.isEmpty() ) { - result.addAll( this.loadThawed( EntityUtils.getIds( batch ) ) ); + for ( Collection batch : batchParameterList( EntityUtils.getIds( genes ), getBatchSize() ) ) { + result.addAll( this.loadThawed( batch ) ); } - return result; } @@ -456,7 +456,7 @@ public int removeAll() { if ( !gpIds.isEmpty() ) { removedGeneProductsAccessions = getSessionFactory().getCurrentSession() .createSQLQuery( "delete from DATABASE_ENTRY where GENE_PRODUCT_FK in :gpIds" ) - .setParameterList( "gpIds", gpIds ) + .setParameterList( "gpIds", optimizeParameterList( gpIds ) ) .executeUpdate(); } else { removedGeneProductsAccessions = 0; @@ -472,7 +472,7 @@ public int removeAll() { if ( !gaIds.isEmpty() ) { removedAliases = getSessionFactory().getCurrentSession() .createQuery( "delete from GeneAlias ga where ga.id in :gaIds" ) - .setParameterList( "gaIds", gaIds ) + .setParameterList( "gaIds", optimizeParameterList( gaIds ) ) .executeUpdate(); } else { removedAliases = 0; @@ -488,7 +488,7 @@ public int removeAll() { if ( !plIds.isEmpty() ) { removedPhysicalLocations = getSessionFactory().getCurrentSession() .createQuery( "delete from PhysicalLocation pl where pl.id in :plIds" ) - .setParameterList( "plIds", plIds ) + .setParameterList( "plIds", optimizeParameterList( plIds ) ) .executeUpdate(); } else { removedPhysicalLocations = 0; @@ -658,22 +658,6 @@ protected void postProcessValueObjects( List geneValueObjects ) fillMultifunctionalityRank( geneValueObjects ); } - private Collection doLoadThawedLite( Collection ids ) { - //noinspection unchecked - return this.getSessionFactory().getCurrentSession().createQuery( - "select distinct g from Gene g left join fetch g.aliases left join fetch g.accessions acc " - + "join fetch g.taxon t left join fetch g.products gp left join fetch g.multifunctionality " - + "where g.id in (:gIds)" ).setParameterList( "gIds", ids ).list(); - } - - private Collection doLoadThawedLiter( Collection ids ) { - //noinspection unchecked - return this.getSessionFactory().getCurrentSession() - .createQuery( "select g from Gene g join fetch g.taxon t where g.id in (:gIds)" ) - .setParameterList( "gIds", ids ) - .list(); - } - /** * Returns genes in the region. */ @@ -726,7 +710,7 @@ private void fillAliases( List geneValueObjects ) { //noinspection unchecked List results = getSessionFactory().getCurrentSession() .createQuery( "select g.id, a.alias from Gene g join g.aliases a where g.id in :ids" ) - .setParameterList( "ids", geneValueObjects.stream().map( GeneValueObject::getId ).collect( Collectors.toSet() ) ) + .setParameterList( "ids", optimizeParameterList( EntityUtils.getIds( geneValueObjects ) ) ) .list(); Map> aliasByGeneId = results.stream() .collect( Collectors.groupingBy( @@ -749,7 +733,7 @@ private void fillAccessions( List geneValueObjects ) { //noinspection unchecked List results = getSessionFactory().getCurrentSession() .createQuery( "select g.id, a from Gene g join g.accessions a where g.id in :ids" ) - .setParameterList( "ids", geneValueObjects.stream().map( GeneValueObject::getId ).collect( Collectors.toSet() ) ) + .setParameterList( "ids", optimizeParameterList( EntityUtils.getIds( geneValueObjects ) ) ) .list(); Map> accessionsByGeneId = results.stream() .collect( Collectors.groupingBy( @@ -786,7 +770,7 @@ private void fillMultifunctionalityRank( List geneValueObjects //noinspection unchecked List results = getSessionFactory().getCurrentSession() .createQuery( "select g.id, g.multifunctionality.rank from Gene g where g.id in :ids" ) - .setParameterList( "ids", ids ) + .setParameterList( "ids", optimizeParameterList( ids ) ) .list(); Map result = results.stream() .collect( Collectors.toMap( row -> ( Long ) row[0], row -> ( Double ) row[1] ) ); diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/genome/biosequence/BioSequenceDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/genome/biosequence/BioSequenceDaoImpl.java index f7db7346e3..eb5847f549 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/genome/biosequence/BioSequenceDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/genome/biosequence/BioSequenceDaoImpl.java @@ -32,11 +32,12 @@ import ubic.gemma.persistence.service.AbstractDao; import ubic.gemma.persistence.service.AbstractVoEnabledDao; import ubic.gemma.persistence.util.BusinessKey; -import ubic.gemma.persistence.util.EntityUtils; import javax.annotation.Nullable; import java.util.*; +import static ubic.gemma.persistence.util.QueryUtils.batchIdentifiableParameterList; + /** * @author pavlidis * @see ubic.gemma.model.genome.biosequence.BioSequence @@ -97,30 +98,19 @@ public BioSequence findByAccession( DatabaseEntry databaseEntry ) { public Map> findByGenes( Collection genes ) { if ( genes == null || genes.isEmpty() ) return new HashMap<>(); - Map> results = new HashMap<>(); - - int batchSize = 500; - - if ( genes.size() <= batchSize ) { - this.findByGenesBatch( genes, results ); - return results; - } - - Collection batch = new HashSet<>(); - - for ( Gene gene : genes ) { - batch.add( gene ); - if ( batch.size() == batchSize ) { - this.findByGenesBatch( genes, results ); - batch.clear(); + for ( Collection batch : batchIdentifiableParameterList( genes, 500 ) ) { + //noinspection unchecked + List qr = this.getSessionFactory().getCurrentSession().createQuery( + "select distinct gene, bs from Gene gene " + + "join fetch gene.products ggp, BioSequence bs " + + "join bs.bioSequence2GeneProduct bs2gp join bs2gp.geneProduct bsgp " + + "where ggp = bsgp and gene in (:genes)" ) + .setParameterList( "genes", batch ).list(); + for ( Object[] row : qr ) { + results.computeIfAbsent( ( Gene ) row[0], k -> new HashSet<>() ).add( ( BioSequence ) row[1] ); } } - - if ( !batch.isEmpty() ) { - this.findByGenesBatch( genes, results ); - } - return results; } @@ -155,18 +145,15 @@ public Collection thaw( final Collection bioSequences return new HashSet<>(); Collection result = new HashSet<>(); - Collection batch = new HashSet<>(); - - for ( BioSequence g : bioSequences ) { - batch.add( g ); - if ( batch.size() == 100 ) { - result.addAll( this.doThawBatch( batch ) ); - batch.clear(); - } - } - - if ( !batch.isEmpty() ) { - result.addAll( this.doThawBatch( batch ) ); + for ( Collection batch : batchIdentifiableParameterList( bioSequences, 100 ) ) { + //noinspection unchecked + result.addAll( this.getSessionFactory().getCurrentSession().createQuery( "select b from BioSequence b " + + "left join fetch b.taxon tax left join fetch tax.externalDatabase left join fetch b.sequenceDatabaseEntry s " + + "left join fetch s.externalDatabase" + " left join fetch b.bioSequence2GeneProduct bs2gp " + + "left join fetch bs2gp.geneProduct gp left join fetch gp.gene g " + + "left join fetch g.aliases left join fetch g.accessions where b in (:bs)" ) + .setParameterList( "bs", batch ) + .list() ); } return result; @@ -243,34 +230,6 @@ public BioSequence find( BioSequence bioSequence ) { return ( BioSequence ) result; } - private Collection doThawBatch( Collection batch ) { - //noinspection unchecked - return this.getSessionFactory().getCurrentSession().createQuery( "select b from BioSequence b " - + " left join fetch b.taxon tax left join fetch tax.externalDatabase left join fetch b.sequenceDatabaseEntry s " - + " left join fetch s.externalDatabase" + " left join fetch b.bioSequence2GeneProduct bs2gp " - + " left join fetch bs2gp.geneProduct gp left join fetch gp.gene g" - + " left join fetch g.aliases left join fetch g.accessions where b.id in (:bids)" ) - .setParameterList( "bids", EntityUtils.getIds( batch ) ) - .list(); - } - - private void findByGenesBatch( Collection genes, Map> results ) { - //noinspection unchecked - List qr = this.getSessionFactory().getCurrentSession().createQuery( - "select distinct gene,bs from Gene gene inner join fetch gene.products ggp," - + " BioSequence bs inner join bs.bioSequence2GeneProduct bs2gp inner join bs2gp.geneProduct bsgp" - + " where ggp=bsgp and gene in (:genes)" ) - .setParameterList( "genes", genes ).list(); - for ( Object[] oa : qr ) { - Gene g = ( Gene ) oa[0]; - BioSequence b = ( BioSequence ) oa[1]; - if ( !results.containsKey( g ) ) { - results.put( g, new HashSet() ); - } - results.get( g ).add( b ); - } - } - private void debug( @Nullable BioSequence query, List results ) { StringBuilder sb = new StringBuilder(); sb.append( "\nMultiple BioSequences found matching query:\n" ); diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/genome/gene/GeneSetDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/genome/gene/GeneSetDaoImpl.java index 0c2ea712a4..b7e490a1dc 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/genome/gene/GeneSetDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/genome/gene/GeneSetDaoImpl.java @@ -39,6 +39,8 @@ import java.util.*; import java.util.stream.Collectors; +import static ubic.gemma.persistence.util.QueryUtils.optimizeParameterList; + /** * Base Spring DAO Class: is able to create, update, remove, load, and find objects of type * ubic.gemma.model.genome.gene.GeneSet. @@ -134,7 +136,7 @@ public List loadValueObjectsByIdsLite( Collect + "left join m.gene.taxon t " + "where g.id in :ids " + "group by g.id" ) - .setParameterList( "ids", ids ) + .setParameterList( "ids", optimizeParameterList( ids ) ) .list(); return fillValueObjects( result ); } @@ -280,7 +282,7 @@ private void fillGeneIds( List result ) { //noinspection unchecked List r = getSessionFactory().getCurrentSession() .createQuery( "select g.id, genes.id from GeneSet g join g.members m join m.gene genes where g.id in :ids" ) - .setParameterList( "ids", ids ) + .setParameterList( "ids", optimizeParameterList( ids ) ) .list(); Map> geneIdsByGeneSetId = r.stream() .collect( Collectors.groupingBy( row -> ( Long ) row[0], Collectors.mapping( row -> ( Long ) row[1], Collectors.toSet() ) ) ); diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/genome/sequenceAnalysis/AnnotationAssociationDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/genome/sequenceAnalysis/AnnotationAssociationDaoImpl.java index 1a38872791..8c94f1e54b 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/genome/sequenceAnalysis/AnnotationAssociationDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/genome/sequenceAnalysis/AnnotationAssociationDaoImpl.java @@ -32,6 +32,8 @@ import java.util.Collections; import java.util.HashSet; +import static ubic.gemma.persistence.util.QueryUtils.optimizeIdentifiableParameterList; + /** * @author paul */ @@ -117,6 +119,6 @@ public Collection find( Collection gps ) { //noinspection unchecked return this.getSessionFactory().getCurrentSession() .createQuery( "select b from AnnotationAssociation b join b.geneProduct gp where gp in (:gps)" ) - .setParameterList( "gps", gps ).list(); + .setParameterList( "gps", optimizeIdentifiableParameterList( gps ) ).list(); } } diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/genome/sequenceAnalysis/BlatAssociationDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/genome/sequenceAnalysis/BlatAssociationDaoImpl.java index 2adf92a1dd..7019d86d89 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/genome/sequenceAnalysis/BlatAssociationDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/genome/sequenceAnalysis/BlatAssociationDaoImpl.java @@ -19,7 +19,9 @@ package ubic.gemma.persistence.service.genome.sequenceAnalysis; import org.apache.commons.lang3.StringUtils; -import org.hibernate.*; +import org.hibernate.Criteria; +import org.hibernate.Hibernate; +import org.hibernate.SessionFactory; import org.hibernate.criterion.Restrictions; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Repository; @@ -34,6 +36,8 @@ import java.util.Collections; import java.util.HashSet; +import static ubic.gemma.persistence.util.QueryUtils.optimizeIdentifiableParameterList; + /** *

* Base Spring DAO Class: is able to create, update, remove, load, and find objects of type @@ -112,7 +116,7 @@ public Collection find( Collection gps ) { Collections.emptySet() : this.getSessionFactory().getCurrentSession() .createQuery( "select b from BlatAssociation b join b.geneProduct gp where gp in (:gps)" ) - .setParameterList( "gps", gps ).list(); + .setParameterList( "gps", optimizeIdentifiableParameterList( gps ) ).list(); } } \ No newline at end of file diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/genome/sequenceAnalysis/BlatResultDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/genome/sequenceAnalysis/BlatResultDaoImpl.java index 35c4333b9d..d17571b5bf 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/genome/sequenceAnalysis/BlatResultDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/genome/sequenceAnalysis/BlatResultDaoImpl.java @@ -23,19 +23,17 @@ import org.hibernate.SessionFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Repository; -import org.springframework.transaction.annotation.Transactional; import ubic.gemma.model.genome.biosequence.BioSequence; import ubic.gemma.model.genome.sequenceAnalysis.BlatResult; import ubic.gemma.model.genome.sequenceAnalysis.BlatResultValueObject; import ubic.gemma.persistence.service.AbstractVoEnabledDao; import ubic.gemma.persistence.util.BusinessKey; -import ubic.gemma.persistence.util.EntityUtils; -import java.sql.Connection; import java.util.Collection; -import java.util.LinkedHashSet; import java.util.List; +import static ubic.gemma.persistence.util.QueryUtils.optimizeIdentifiableParameterList; + /** *

* Base Spring DAO Class: is able to create, update, remove, load, and find objects of type @@ -79,7 +77,7 @@ public Collection thaw( Collection blatResults ) { + " left join fetch t.externalDatabase left join fetch qs.sequenceDatabaseEntry s " + " left join fetch s.externalDatabase" + " where b in :blatResults" ) - .setParameterList( "blatResults", blatResults ) + .setParameterList( "blatResults", optimizeIdentifiableParameterList( blatResults ) ) .list(); } diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/util/CommonQueries.java b/gemma-core/src/main/java/ubic/gemma/persistence/util/CommonQueries.java index 2050865c40..153133ed4b 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/util/CommonQueries.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/util/CommonQueries.java @@ -31,7 +31,9 @@ import java.util.*; +import static ubic.gemma.persistence.service.TableMaintenanceUtil.GENE2CS_BATCH_SIZE; import static ubic.gemma.persistence.service.TableMaintenanceUtil.GENE2CS_QUERY_SPACE; +import static ubic.gemma.persistence.util.QueryUtils.*; /** * Contains methods to perform 'common' queries that are needed across DAOs. @@ -58,7 +60,7 @@ public static Map> getArrayDesignsUsed( Collection + "ee.bioAssays b inner join b.arrayDesignUsed ad fetch all properties where ee.id in (:ees)"; org.hibernate.Query queryObject = session.createQuery( eeAdQuery ); - queryObject.setParameterList( "ees", ees ); + queryObject.setParameterList( "ees", optimizeParameterList( ees ) ); queryObject.setReadOnly( true ); queryObject.setFlushMode( FlushMode.MANUAL ); @@ -93,7 +95,7 @@ public static Map> getArrayDesignsUsedEEMap( Collection> getArrayDesignsUsedEEMap( Collection possibleEEsubsets = ListUtils.removeAll( ees, ee2ads.keySet() ); // note: CollectionUtils.removeAll has a bug. - qr = session.createQuery( subsetQuery ).setParameterList( "ees", possibleEEsubsets ).list(); + qr = session.createQuery( subsetQuery ).setParameterList( "ees", optimizeParameterList( possibleEEsubsets ) ).list(); CommonQueries.addAllAds( ee2ads, qr ); } @@ -233,25 +235,25 @@ private static void addGeneIds( Map> cs2genes, Query quer */ public static Map> getCs2GeneIdMap( Collection genes, Collection arrayDesigns, Session session ) { + if ( genes.isEmpty() || arrayDesigns.isEmpty() ) { + return Collections.emptyMap(); + } - Map> cs2genes = new HashMap<>(); - - String queryString = "SELECT CS AS csid, GENE AS geneId FROM GENE2CS g WHERE g.GENE IN (:geneIds) AND g.AD IN (:ads)"; - SQLQuery queryObject = session.createSQLQuery( queryString ); - queryObject.addScalar( "csid", LongType.INSTANCE ); - queryObject.addScalar( "geneId", LongType.INSTANCE ); - queryObject.setParameterList( "ads", arrayDesigns ); - queryObject.setParameterList( "geneIds", genes ); - queryObject.setReadOnly( true ); - queryObject.addSynchronizedQuerySpace( GENE2CS_QUERY_SPACE ); - queryObject.addSynchronizedEntityClass( ArrayDesign.class ); - queryObject.addSynchronizedEntityClass( CompositeSequence.class ); - queryObject.addSynchronizedEntityClass( Gene.class ); - - CommonQueries.addGeneIds( cs2genes, queryObject ); + Query queryObject = session.createSQLQuery( "SELECT CS AS csid, GENE AS geneId FROM GENE2CS g WHERE g.GENE IN (:geneIds) AND g.AD IN (:ads)" ) + .addScalar( "csid", LongType.INSTANCE ) + .addScalar( "geneId", LongType.INSTANCE ) + .addSynchronizedQuerySpace( GENE2CS_QUERY_SPACE ) + .addSynchronizedEntityClass( ArrayDesign.class ) + .addSynchronizedEntityClass( CompositeSequence.class ) + .addSynchronizedEntityClass( Gene.class ) + .setParameterList( "ads", optimizeParameterList( arrayDesigns ) ) + .setReadOnly( true ); + Map> cs2genes = new HashMap<>(); + for ( Collection batch : batchParameterList( genes, GENE2CS_BATCH_SIZE ) ) { + CommonQueries.addGeneIds( cs2genes, queryObject.setParameterList( "geneIds", batch ) ); + } return cs2genes; - } public static Map> getCs2GeneMap( Collection genes, @@ -267,8 +269,8 @@ public static Map> getCs2GeneMap( Collection Map> cs2gene = new HashMap<>(); Query queryObject = session.createQuery( csQueryString ); queryObject.setCacheable( true ); - queryObject.setParameterList( "genes", genes ); - queryObject.setParameterList( "ads", arrayDesigns ); + queryObject.setParameterList( "genes", optimizeIdentifiableParameterList( genes ) ); + queryObject.setParameterList( "ads", optimizeIdentifiableParameterList( arrayDesigns ) ); queryObject.setReadOnly( true ); queryObject.setFlushMode( FlushMode.MANUAL ); @@ -297,7 +299,7 @@ public static Map> getCs2GeneMap( Collection Map> cs2gene = new HashMap<>(); org.hibernate.Query queryObject = session.createQuery( csQueryString ); queryObject.setCacheable( true ); - queryObject.setParameterList( "genes", genes ); + queryObject.setParameterList( "genes", optimizeIdentifiableParameterList( genes ) ); queryObject.setReadOnly( true ); queryObject.setFlushMode( FlushMode.MANUAL ); @@ -334,39 +336,39 @@ public static Map> getCs2GeneMapForProbes( Collection(); - Map> cs2genes = new HashMap<>(); - - String queryString = "SELECT CS AS csid, GENE AS geneId FROM GENE2CS g WHERE g.CS IN (:probes) "; - org.hibernate.SQLQuery queryObject = session.createSQLQuery( queryString ); - queryObject.addScalar( "csid", LongType.INSTANCE ); - queryObject.addScalar( "geneId", LongType.INSTANCE ); - queryObject.setParameterList( "probes", probes, LongType.INSTANCE ); - queryObject.setReadOnly( true ); - queryObject.addSynchronizedQuerySpace( GENE2CS_QUERY_SPACE ); - queryObject.addSynchronizedEntityClass( ArrayDesign.class ); - queryObject.addSynchronizedEntityClass( CompositeSequence.class ); - queryObject.addSynchronizedEntityClass( Gene.class ); - - CommonQueries.addGeneIds( cs2genes, queryObject ); + Query queryObject = session.createSQLQuery( "SELECT CS AS csid, GENE AS geneId FROM GENE2CS g WHERE g.CS IN (:probes) " ) + .addScalar( "csid", LongType.INSTANCE ) + .addScalar( "geneId", LongType.INSTANCE ) + .addSynchronizedQuerySpace( GENE2CS_QUERY_SPACE ) + .addSynchronizedEntityClass( ArrayDesign.class ) + .addSynchronizedEntityClass( CompositeSequence.class ) + .addSynchronizedEntityClass( Gene.class ) + .setReadOnly( true ); + Map> cs2genes = new HashMap<>(); + for ( Collection batch : batchParameterList( probes, GENE2CS_BATCH_SIZE ) ) { + CommonQueries.addGeneIds( cs2genes, queryObject.setParameterList( "probes", batch ) ); + } return cs2genes; } public static Collection filterProbesByPlatform( Collection probes, Collection arrayDesignIds, Session session ) { - assert probes != null && !probes.isEmpty(); - assert arrayDesignIds != null && !arrayDesignIds.isEmpty(); - String queryString = "SELECT CS AS csid FROM GENE2CS WHERE AD IN (:adids) AND CS IN (:probes)"; - org.hibernate.SQLQuery queryObject = session.createSQLQuery( queryString ); - queryObject.addScalar( "csid", LongType.INSTANCE ); - queryObject.setParameterList( "probes", probes, LongType.INSTANCE ); - queryObject.setParameterList( "adids", arrayDesignIds, LongType.INSTANCE ); - queryObject.addSynchronizedQuerySpace( GENE2CS_QUERY_SPACE ); - queryObject.addSynchronizedEntityClass( ArrayDesign.class ); - queryObject.addSynchronizedEntityClass( CompositeSequence.class ); - queryObject.addSynchronizedEntityClass( Gene.class ); - //noinspection unchecked - return queryObject.list(); + if ( probes.isEmpty() || arrayDesignIds.isEmpty() ) { + return Collections.emptyList(); + } + Query queryObject = session.createSQLQuery( "SELECT CS AS csid FROM GENE2CS WHERE AD IN (:adids) AND CS IN (:probes)" ) + .addScalar( "csid", LongType.INSTANCE ) + .addSynchronizedQuerySpace( GENE2CS_QUERY_SPACE ) + .addSynchronizedEntityClass( ArrayDesign.class ) + .addSynchronizedEntityClass( CompositeSequence.class ) + .addSynchronizedEntityClass( Gene.class ) + .setParameterList( "adids", optimizeParameterList( arrayDesignIds ), LongType.INSTANCE ); + List results = new ArrayList<>(); + for ( Collection batch : batchParameterList( probes, GENE2CS_BATCH_SIZE ) ) { + //noinspection unchecked + results.addAll( queryObject.setParameterList( "probes", batch, LongType.INSTANCE ).list() ); + } + return results; } - } diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/util/EntityUtils.java b/gemma-core/src/main/java/ubic/gemma/persistence/util/EntityUtils.java index b37888600e..cd538f7520 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/util/EntityUtils.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/util/EntityUtils.java @@ -36,6 +36,8 @@ import java.util.*; import java.util.stream.Collectors; +import static ubic.gemma.persistence.util.QueryUtils.optimizeParameterList; + /** * @author paul */ @@ -192,7 +194,7 @@ public static void addUserAndGroupParameters( SQLQuery queryObject, SessionFacto Collection groups = sessionFactory.getCurrentSession().createQuery( "select ug.name from UserGroup ug inner join ug.groupMembers memb where memb.userName = :user" ) .setParameter( "user", userName ).list(); - queryObject.setParameterList( "groups", groups ); + queryObject.setParameterList( "groups", optimizeParameterList( groups ) ); } if ( sqlQuery.contains( ":userName" ) ) { diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/util/FilterCriteriaUtils.java b/gemma-core/src/main/java/ubic/gemma/persistence/util/FilterCriteriaUtils.java index 1df9fb163a..b25a6d6f3d 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/util/FilterCriteriaUtils.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/util/FilterCriteriaUtils.java @@ -9,6 +9,7 @@ import java.util.Objects; import static ubic.gemma.persistence.util.PropertyMappingUtils.formProperty; +import static ubic.gemma.persistence.util.QueryUtils.optimizeParameterList; /** * Utilities for integrating {@link Filter} with Hibernate {@link Criteria} API. @@ -87,8 +88,11 @@ private static Criterion formRestrictionClause( Filter filter ) { case greaterOrEq: return Restrictions.ge( property, filter.getRequiredValue() ); case in: - return Restrictions.in( property, ( Collection ) Objects.requireNonNull( filter.getRequiredValue(), - "Required value cannot be null for a collection." ) ); + if ( !( filter.getRequiredValue() instanceof Collection ) ) { + throw new IllegalArgumentException( "Required value must be a non-null collection for the 'in' operator." ); + } + //noinspection rawtypes,unchecked + return Restrictions.in( property, optimizeParameterList( ( Collection ) filter.getRequiredValue() ) ); default: throw new IllegalStateException( "Unexpected operator for filter: " + filter.getOperator() ); } diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/util/FilterQueryUtils.java b/gemma-core/src/main/java/ubic/gemma/persistence/util/FilterQueryUtils.java index 94084aada7..0355f323c4 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/util/FilterQueryUtils.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/util/FilterQueryUtils.java @@ -6,10 +6,10 @@ import javax.annotation.Nullable; import java.util.Collection; import java.util.List; -import java.util.stream.Collectors; import static java.util.Objects.requireNonNull; import static ubic.gemma.persistence.util.PropertyMappingUtils.formProperty; +import static ubic.gemma.persistence.util.QueryUtils.optimizeParameterList; /** * Utilities for integrating {@link Filter} into {@link org.hibernate.Query}. @@ -211,9 +211,12 @@ private static void addRestrictionParameters( Query query, @Nullable Filters fil Subquery s = ( Subquery ) requireNonNull( subClause.getRequiredValue() ); addRestrictionParameters( query, Filters.by( s.getFilter() ), i - 1 ); } else if ( subClause.getOperator().equals( Filter.Operator.in ) ) { + if ( !( subClause.getRequiredValue() instanceof Collection ) ) { + throw new IllegalArgumentException( "Required value must be a non-null collection for the 'in' operator." ); + } // order is unimportant for this operation, so we can ensure that it is consistent and therefore cacheable - query.setParameterList( paramName, requireNonNull( ( Collection ) subClause.getRequiredValue(), "Required value cannot be null for the 'in' operator." ) - .stream().sorted().distinct().collect( Collectors.toList() ) ); + //noinspection rawtypes,unchecked + query.setParameterList( paramName, optimizeParameterList( ( Collection ) subClause.getRequiredValue() ) ); } else if ( subClause.getOperator().equals( Filter.Operator.like ) ) { query.setParameter( paramName, escapeLike( ( String ) requireNonNull( subClause.getRequiredValue(), "Required value cannot be null for the 'like' operator." ) ) + "%" ); } else { diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/util/HibernateUtils.java b/gemma-core/src/main/java/ubic/gemma/persistence/util/HibernateUtils.java new file mode 100644 index 0000000000..4fad0586b7 --- /dev/null +++ b/gemma-core/src/main/java/ubic/gemma/persistence/util/HibernateUtils.java @@ -0,0 +1,34 @@ +package ubic.gemma.persistence.util; + +import lombok.extern.apachecommons.CommonsLog; +import org.hibernate.SessionFactory; +import org.hibernate.engine.spi.SessionFactoryImplementor; +import org.hibernate.metadata.ClassMetadata; +import org.hibernate.persister.entity.AbstractEntityPersister; +import org.springframework.util.ReflectionUtils; + +import java.lang.reflect.Field; + +@CommonsLog +public class HibernateUtils { + + private static final String BATCH_FETCH_SIZE_SETTING = "gemma.hibernate.default_batch_fetch_size"; + + /** + * Obtain the batch fetch size for the given class. + */ + public static int getBatchSize( SessionFactory sessionFactory, ClassMetadata classMetadata ) { + if ( classMetadata instanceof AbstractEntityPersister ) { + Field field = ReflectionUtils.findField( AbstractEntityPersister.class, "batchSize" ); + ReflectionUtils.makeAccessible( field ); + return ( int ) ReflectionUtils.getField( field, classMetadata ); + } else if ( sessionFactory instanceof SessionFactoryImplementor ) { + return ( ( SessionFactoryImplementor ) sessionFactory ).getSettings() + .getDefaultBatchFetchSize(); + } else { + log.warn( String.format( "Could not determine batch size for %s, will fallback to the %s setting.", + classMetadata.getEntityName(), BATCH_FETCH_SIZE_SETTING ) ); + return Settings.getInt( BATCH_FETCH_SIZE_SETTING, -1 ); + } + } +} diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/util/QueryUtils.java b/gemma-core/src/main/java/ubic/gemma/persistence/util/QueryUtils.java new file mode 100644 index 0000000000..400877ee13 --- /dev/null +++ b/gemma-core/src/main/java/ubic/gemma/persistence/util/QueryUtils.java @@ -0,0 +1,121 @@ +package ubic.gemma.persistence.util; + +import lombok.extern.apachecommons.CommonsLog; +import org.springframework.util.Assert; +import ubic.gemma.core.util.ListUtils; +import ubic.gemma.model.common.Identifiable; + +import java.util.*; +import java.util.stream.Collectors; + +/** + * Utilities for {@link org.hibernate.Query}. + * @author poirigui + */ +@CommonsLog +public class QueryUtils { + + /** + * Largest parameter list size for which {@link #optimizeParameterList(Collection)} should be used. Past this size, + * no padding will be performed. + */ + private static final int MAX_PARAMETER_LIST_SIZE = 2048; + + /** + * Optimize a given parameter list by sorting, removing duplicates and padding to the next power of two. + *

+ * This is a temporary solution until we update to Hibernate 5.2.18 which introduced {@code hibernate.query.in_clause_parameter_padding}. + * Read more about this topic. + */ + public static > Collection optimizeParameterList( Collection list ) { + if ( list.size() < 2 ) { + return list; + } + List sortedList = list.stream() + .sorted( Comparator.nullsLast( Comparator.naturalOrder() ) ) + .distinct() + .collect( Collectors.toList() ); + if ( sortedList.size() > MAX_PARAMETER_LIST_SIZE ) { + log.warn( String.format( "Optimizing a large parameter list of size %d may have a negative impact on performance, use batchParameterList() instead.", + sortedList.size() ) ); + return list; + } + return ListUtils.padToNextPowerOfTwo( sortedList, sortedList.get( sortedList.size() - 1 ) ); + } + + /** + * Optimize a collection of {@link Identifiable} entities. + * @see #optimizeParameterList(Collection) + */ + public static Collection optimizeIdentifiableParameterList( Collection list ) { + if ( list.size() < 2 ) { + return list; + } + List sortedList = list.stream() + .sorted( Comparator.comparing( Identifiable::getId, Comparator.nullsLast( Comparator.naturalOrder() ) ) ) + .distinct() + .collect( Collectors.toList() ); + if ( sortedList.size() > MAX_PARAMETER_LIST_SIZE ) { + log.warn( String.format( "Optimizing a large parameter list of size %d may have a negative impact on performance, use batchIdentifiableParameterList() instead.", + sortedList.size() ) ); + return list; + } + return ListUtils.padToNextPowerOfTwo( sortedList, sortedList.get( sortedList.size() - 1 ) ); + } + + /** + * Partition a parameter list into a collection of batches of a given size. + *

+ * It is recommended to use a power of two in case the same query is also prepared via + * {@link #optimizeParameterList(Collection)}. This will make it so that the execution plan can be reused. + */ + public static > Iterable> batchParameterList( Collection list, int batchSize ) { + Assert.isTrue( batchSize == -1 || batchSize > 0, "Batch size must be strictly positive or equal to -1." ); + if ( list.isEmpty() ) { + return Collections.emptyList(); + } + List sortedList = list.stream() + .sorted( Comparator.nullsLast( Comparator.naturalOrder() ) ) + .distinct() + .collect( Collectors.toList() ); + return batch( sortedList, batchSize ); + } + + public static Iterable> batchIdentifiableParameterList( Collection list, int batchSize ) { + Assert.isTrue( batchSize == -1 || batchSize > 0, "Batch size must be strictly positive or equal to -1." ); + if ( list.isEmpty() ) { + return Collections.emptyList(); + } + List sortedList = list.stream() + .sorted( Comparator.comparing( Identifiable::getId, Comparator.nullsLast( Comparator.naturalOrder() ) ) ) + .distinct() + .collect( Collectors.toList() ); + return batch( sortedList, batchSize ); + } + + private static Iterable> batch( List list, int batchSize ) { + if ( batchSize == -1 ) { + return Collections.singletonList( list ); + } + int numberOfBatches = ( list.size() / batchSize ) + ( list.size() % batchSize > 0 ? 1 : 0 ); + int size = numberOfBatches * batchSize; + List paddedList = ( List ) ListUtils.pad( list, list.get( list.size() - 1 ), size ); + return () -> new Iterator>() { + private int i = 0; + + @Override + public boolean hasNext() { + return i < numberOfBatches; + } + + @Override + public List next() { + try { + return paddedList.subList( i * batchSize, ( i + 1 ) * batchSize ); + } finally { + i += 1; + } + } + }; + } +} diff --git a/gemma-core/src/main/resources/default.properties b/gemma-core/src/main/resources/default.properties index 0e95d0939d..f424733211 100755 --- a/gemma-core/src/main/resources/default.properties +++ b/gemma-core/src/main/resources/default.properties @@ -167,7 +167,8 @@ gemma.hibernate.show_sql=false gemma.hibernate.jdbc_fetch_size=128 gemma.hibernate.jdbc_batch_size=32 # Default size for batch-fetching data (adjust as needed, requires more memory!) -gemma.hibernate.default_batch_fetch_size=100 +# It's beneficial to use a power of two because the query plan can be shared with other queries +gemma.hibernate.default_batch_fetch_size=128 #coexpression vis/grid properties #controls how many results will be returned per query gene: gemma.coexpressionSearch.maxResultsPerQueryGene=200 diff --git a/gemma-core/src/test/java/ubic/gemma/core/util/test/BaseDatabaseTest.java b/gemma-core/src/test/java/ubic/gemma/core/util/test/BaseDatabaseTest.java index c26027600d..8cd4e08cae 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/util/test/BaseDatabaseTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/util/test/BaseDatabaseTest.java @@ -62,7 +62,7 @@ public FactoryBean sessionFactory( DataSource dataSource ) { props.setProperty( "hibernate.dialect", H2Dialect.class.getName() ); props.setProperty( "hibernate.cache.use_second_level_cache", "false" ); props.setProperty( "hibernate.max_fetch_depth", "3" ); - props.setProperty( "hibernate.default_batch_fetch_size", "100" ); + props.setProperty( "hibernate.default_batch_fetch_size", "128" ); props.setProperty( "hibernate.jdbc.fetch_size", "128" ); props.setProperty( "hibernate.jdbc.batch_size", "32" ); props.setProperty( "hibernate.jdbc.batch_versioned_data", "true" ); diff --git a/gemma-core/src/test/java/ubic/gemma/core/util/test/HibernateConfigTest.java b/gemma-core/src/test/java/ubic/gemma/core/util/test/HibernateConfigTest.java index e38599fed2..f2097d704f 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/util/test/HibernateConfigTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/util/test/HibernateConfigTest.java @@ -76,7 +76,7 @@ public FactoryBean sessionFactory( DataSource dataSource ) { public void test() { Settings settings = ( ( SessionFactoryImpl ) sessionFactory ).getSettings(); assertEquals( 3, settings.getMaximumFetchDepth().intValue() ); - assertEquals( 100, settings.getDefaultBatchFetchSize() ); + assertEquals( 128, settings.getDefaultBatchFetchSize() ); assertEquals( 128, settings.getJdbcFetchSize().intValue() ); assertEquals( 32, settings.getJdbcBatchSize() ); assertTrue( settings.isJdbcBatchVersionedData() ); diff --git a/gemma-core/src/test/java/ubic/gemma/persistence/service/AbstractDaoTest.java b/gemma-core/src/test/java/ubic/gemma/persistence/service/AbstractDaoTest.java index 4483de2c45..c59b808f7e 100644 --- a/gemma-core/src/test/java/ubic/gemma/persistence/service/AbstractDaoTest.java +++ b/gemma-core/src/test/java/ubic/gemma/persistence/service/AbstractDaoTest.java @@ -4,19 +4,20 @@ import org.hibernate.FlushMode; import org.hibernate.Session; import org.hibernate.SessionFactory; +import org.hibernate.cfg.Settings; import org.hibernate.criterion.Restrictions; +import org.hibernate.engine.spi.SessionFactoryImplementor; import org.hibernate.metadata.ClassMetadata; import org.hibernate.proxy.HibernateProxy; import org.hibernate.proxy.LazyInitializer; import org.junit.Before; import org.junit.Test; -import org.mockito.internal.verification.VerificationModeFactory; import ubic.gemma.model.common.Identifiable; -import java.util.ArrayList; import java.util.Arrays; -import java.util.Collection; import java.util.List; +import java.util.stream.Collectors; +import java.util.stream.LongStream; import static org.mockito.Mockito.*; @@ -43,19 +44,22 @@ public MyDao( SessionFactory sessionFactory ) { } } - private SessionFactory sessionFactory; + private SessionFactoryImplementor sessionFactory; + private Settings settings; private Session session; - private MyDao myDao; @Before public void setUp() { session = mock( Session.class ); - sessionFactory = mock( SessionFactory.class ); + sessionFactory = mock( SessionFactoryImplementor.class ); ClassMetadata myEntityClassMetadata = mock( ClassMetadata.class ); when( myEntityClassMetadata.getIdentifierPropertyName() ).thenReturn( "id" ); when( myEntityClassMetadata.getMappedClass() ).thenReturn( MyEntity.class ); + settings = mock( Settings.class ); + when( settings.getDefaultBatchFetchSize() ).thenReturn( -1 ); when( sessionFactory.getClassMetadata( MyEntity.class ) ).thenReturn( myEntityClassMetadata ); when( sessionFactory.getCurrentSession() ).thenReturn( session ); + when( sessionFactory.getSettings() ).thenReturn( settings ); when( session.getFlushMode() ).thenReturn( FlushMode.AUTO ); } @@ -64,8 +68,8 @@ private static abstract class MyEntityProxy extends MyEntity implements Hibernat } @Test - public void testLoadByCollection() { - myDao = new MyDao( sessionFactory ); + public void testLoadByIds() { + MyDao myDao = new MyDao( sessionFactory ); Criteria mockCriteria = mock( Criteria.class ); when( mockCriteria.add( any() ) ).thenReturn( mockCriteria ); when( session.createCriteria( MyEntity.class ) ).thenReturn( mockCriteria ); @@ -86,7 +90,32 @@ public void testLoadByCollection() { verify( session ).load( MyEntity.class, 5L ); verify( session ).createCriteria( MyEntity.class ); verifyNoMoreInteractions( session ); - verify( mockCriteria ).add( argThat( criterion -> criterion.toString().equals( Restrictions.in( "id", ids ).toString() ) ) ); + verify( mockCriteria ).add( argThat( criterion -> criterion.toString().equals( Restrictions.in( "id", Arrays.asList( 1L, 2L, 3L, 4L, 5L, 5L, 5L, 5L ) ).toString() ) ) ); verify( mockCriteria ).list(); } + + @Test + public void testBatchLoadingByIds() { + when( settings.getDefaultBatchFetchSize() ).thenReturn( 128 ); + MyDao myDao = new MyDao( sessionFactory ); + Criteria mockCriteria = mock( Criteria.class ); + when( mockCriteria.add( any() ) ).thenReturn( mockCriteria ); + when( session.createCriteria( MyEntity.class ) ).thenReturn( mockCriteria ); + when( session.load( any( Class.class ), any() ) ).thenAnswer( a -> { + MyEntityProxy entity = mock( MyEntityProxy.class ); + LazyInitializer lazyInitializer = mock( LazyInitializer.class ); + when( lazyInitializer.isUninitialized() ).thenReturn( true ); + when( entity.getId() ).thenReturn( a.getArgument( 1 ) ); + when( entity.getHibernateLazyInitializer() ).thenReturn( lazyInitializer ); + return entity; + } ); + List ids = LongStream.range( 0, 1200 ).boxed().collect( Collectors.toList() ); + myDao.load( ids ); + verify( session, times( 1200 ) ).load( eq( MyEntity.class ), any() ); + verify( session, times( 10 ) ).createCriteria( MyEntity.class ); + verifyNoMoreInteractions( session ); + verify( mockCriteria, times( 10 ) ).add( any() ); + verify( mockCriteria, times( 10 ) ).list(); + verifyNoMoreInteractions( mockCriteria ); + } } diff --git a/gemma-core/src/test/java/ubic/gemma/persistence/util/ListUtilsTest.java b/gemma-core/src/test/java/ubic/gemma/persistence/util/ListUtilsTest.java index 0745b433b4..44e2d5cc03 100644 --- a/gemma-core/src/test/java/ubic/gemma/persistence/util/ListUtilsTest.java +++ b/gemma-core/src/test/java/ubic/gemma/persistence/util/ListUtilsTest.java @@ -4,6 +4,7 @@ import ubic.gemma.core.util.ListUtils; import java.util.Arrays; +import java.util.Collections; import java.util.Map; import static org.assertj.core.api.Assertions.assertThat; @@ -33,4 +34,12 @@ public void testIndexOfCaseInsensitiveStringElements() { assertThat( str2position.get( "A" ) ).isEqualTo( 0 ); assertThat( str2position.get( "baBa" ) ).isEqualTo( 2 ); } + + @Test + public void testPadToNextPowerOfTwo() { + assertThat( ListUtils.padToNextPowerOfTwo( Collections.emptyList(), null ) ).hasSize( 0 ); + assertThat( ListUtils.padToNextPowerOfTwo( Arrays.asList( 1L, 2L, 3L ), null ) ).hasSize( 4 ); + assertThat( ListUtils.padToNextPowerOfTwo( Arrays.asList( 1L, 2L, 3L, 4L ), null ) ).hasSize( 4 ); + assertThat( ListUtils.padToNextPowerOfTwo( Arrays.asList( 1L, 2L, 3L, 4L, 5L ), null ) ).hasSize( 8 ); + } } \ No newline at end of file diff --git a/gemma-core/src/test/java/ubic/gemma/persistence/util/QueryUtilsTest.java b/gemma-core/src/test/java/ubic/gemma/persistence/util/QueryUtilsTest.java new file mode 100644 index 0000000000..e86ee42c37 --- /dev/null +++ b/gemma-core/src/test/java/ubic/gemma/persistence/util/QueryUtilsTest.java @@ -0,0 +1,46 @@ +package ubic.gemma.persistence.util; + +import org.junit.Test; +import ubic.gemma.model.expression.arrayDesign.ArrayDesign; + +import javax.annotation.Nullable; +import java.util.ArrayList; +import java.util.Arrays; + +import static org.assertj.core.api.Assertions.assertThat; +import static ubic.gemma.persistence.util.QueryUtils.*; + +public class QueryUtilsTest { + + @Test + public void test() { + assertThat( optimizeParameterList( Arrays.asList( 1L, 2L, null, 0L ) ) ) + .containsExactly( 0L, 1L, 2L, null ); + } + + @Test + public void testIdentifiable() { + assertThat( optimizeIdentifiableParameterList( Arrays.asList( createArrayDesign( 2L ), + createArrayDesign( 1L ), createArrayDesign( 1L ), createArrayDesign( null ) ) ) ) + .extracting( ArrayDesign::getId ) + .containsExactly( 1L, 2L, null, null ); + } + + @Test + public void testBatchParameterList() { + assertThat( batchParameterList( new ArrayList(), 4 ) ) + .isEmpty(); + assertThat( batchParameterList( Arrays.asList( 1, 2, 3 ), 4 ) ) + .containsExactly( Arrays.asList( 1, 2, 3, 3 ) ); + assertThat( batchParameterList( Arrays.asList( 1, 2, 3, 4 ), 4 ) ) + .containsExactly( Arrays.asList( 1, 2, 3, 4 ) ); + assertThat( batchParameterList( Arrays.asList( 1, 2, 3, null, 4, 14, 23, 1 ), 4 ) ) + .containsExactly( Arrays.asList( 1, 2, 3, 4 ), Arrays.asList( 14, 23, null, null ) ); + } + + private ArrayDesign createArrayDesign( @Nullable Long id ) { + ArrayDesign ad = new ArrayDesign(); + ad.setId( id ); + return ad; + } +} \ No newline at end of file From 12cd5ba0db3668f7c649e4580e79b32fd61759f2 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Mon, 18 Mar 2024 12:32:06 -0700 Subject: [PATCH 022/105] Make homepage paragraph occupy 1/3 of the available space --- gemma-web/src/main/webapp/pages/home.jsp | 9 ++++----- .../src/main/webapp/styles/antisense/responsive.css | 4 ++++ 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/gemma-web/src/main/webapp/pages/home.jsp b/gemma-web/src/main/webapp/pages/home.jsp index e68914cba3..f892341d2e 100755 --- a/gemma-web/src/main/webapp/pages/home.jsp +++ b/gemma-web/src/main/webapp/pages/home.jsp @@ -39,7 +39,7 @@ Ext.onReady( function() {

-
+
Gemma provides data, experimental design annotations, and differential expression analysis results for thousands of microarray and RNA-seq experiments. We re-analyze raw data from public sources (primarily NCBI --%> -
- + Example of a dataset view overlaid with a heatmap of top differentially expressed probes. <%-- <%--
--%> -
+
diff --git a/gemma-web/src/main/webapp/styles/antisense/responsive.css b/gemma-web/src/main/webapp/styles/antisense/responsive.css index df202e2790..71b744e00c 100644 --- a/gemma-web/src/main/webapp/styles/antisense/responsive.css +++ b/gemma-web/src/main/webapp/styles/antisense/responsive.css @@ -38,6 +38,10 @@ display: none; } +.w-100 { + width: 100%; +} + /* small screens */ @media (min-width: 576px) { .container { From a72338582cf91dd4c6970fcea90b994ab9c5e870 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Mon, 18 Mar 2024 13:18:42 -0700 Subject: [PATCH 023/105] Use specific names for connection pools --- .../src/main/java/ubic/gemma/core/externalDb/GoldenPath.java | 1 + .../main/resources/ubic/gemma/applicationContext-dataSource.xml | 1 + 2 files changed, 2 insertions(+) diff --git a/gemma-core/src/main/java/ubic/gemma/core/externalDb/GoldenPath.java b/gemma-core/src/main/java/ubic/gemma/core/externalDb/GoldenPath.java index 8fd74aa307..06239e13dc 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/externalDb/GoldenPath.java +++ b/gemma-core/src/main/java/ubic/gemma/core/externalDb/GoldenPath.java @@ -58,6 +58,7 @@ private static JdbcTemplate createJdbcTemplateFromConfig( Taxon taxon ) { // SimpleDriverDataSource dataSource = new SimpleDriverDataSource(); HikariDataSource dataSource = new HikariDataSource(); + dataSource.setPoolName( "goldenpath" ); String driverClassName = Settings.getString( "gemma.goldenpath.db.driver" ); String url = Settings.getString( "gemma.goldenpath.db.url" ); String user = Settings.getString( "gemma.goldenpath.db.user" ); diff --git a/gemma-core/src/main/resources/ubic/gemma/applicationContext-dataSource.xml b/gemma-core/src/main/resources/ubic/gemma/applicationContext-dataSource.xml index 1fd0bf6c58..5c50319099 100644 --- a/gemma-core/src/main/resources/ubic/gemma/applicationContext-dataSource.xml +++ b/gemma-core/src/main/resources/ubic/gemma/applicationContext-dataSource.xml @@ -16,6 +16,7 @@ + From 5ce3e587f24ffb2170de6a8f8322c7f4824e276b Mon Sep 17 00:00:00 2001 From: Paul Pavlidis Date: Tue, 19 Mar 2024 13:42:46 -0700 Subject: [PATCH 024/105] mentioned FIXME to be addressed in baseCode next release --- .../expression/bioAssayData/DoubleVectorValueObject.java | 5 ----- 1 file changed, 5 deletions(-) diff --git a/gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/DoubleVectorValueObject.java b/gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/DoubleVectorValueObject.java index 4f92824366..c2f69d097a 100644 --- a/gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/DoubleVectorValueObject.java +++ b/gemma-core/src/main/java/ubic/gemma/model/expression/bioAssayData/DoubleVectorValueObject.java @@ -245,10 +245,6 @@ public boolean isSliced() { */ public double[] standardize() { - /* - * FIXME If the values are all equal, variance == 0 and we get nothing back. So we should fill in zeros instead. - */ - /* * DoubleArrayList constructor does not make a copy, so we have to make one. */ @@ -257,7 +253,6 @@ public double[] standardize() { DescriptiveWithMissing.standardize( new DoubleArrayList( copy ) ); return copy; - } /** From 1fdb0fc30a92819cda0fda88ef66184717e7ead2 Mon Sep 17 00:00:00 2001 From: Paul Pavlidis Date: Tue, 19 Mar 2024 14:53:20 -0700 Subject: [PATCH 025/105] remove side-effect filtering This means all-missing data can be returned but side effect was too blunt, and changes to basecode mean that all-missing data will crop up less often in visualizations https://github.com/PavlidisLab/Gemma/issues/90 --- .../controller/visualization/VisualizationValueObject.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gemma-web/src/main/java/ubic/gemma/web/controller/visualization/VisualizationValueObject.java b/gemma-web/src/main/java/ubic/gemma/web/controller/visualization/VisualizationValueObject.java index 36367055f5..a5c3406c81 100644 --- a/gemma-web/src/main/java/ubic/gemma/web/controller/visualization/VisualizationValueObject.java +++ b/gemma-web/src/main/java/ubic/gemma/web/controller/visualization/VisualizationValueObject.java @@ -176,8 +176,8 @@ public VisualizationValueObject( Collection vectors, Li GeneExpressionProfile profile = new GeneExpressionProfile( vector, vectorGenes, color, valid, vector.getPvalue() ); - if ( !profile.isAllMissing() ) - profiles.add( profile ); + // if ( !profile.isAllMissing() ) // this might not be a desirable side-effect. + profiles.add( profile ); } } From cae5925120a90969fb54d6abef2b1be8e6a5db33 Mon Sep 17 00:00:00 2001 From: Paul Pavlidis Date: Tue, 19 Mar 2024 14:54:09 -0700 Subject: [PATCH 026/105] addressing https://github.com/PavlidisLab/Gemma/issues/90 - require higher expression levels for "random" vectors - Also randomization change --- .../ProcessedExpressionDataVectorDaoImpl.java | 41 ++++++++++++------- .../expression/experiment/DEDVController.java | 12 +++--- 2 files changed, 34 insertions(+), 19 deletions(-) diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/bioAssayData/ProcessedExpressionDataVectorDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/bioAssayData/ProcessedExpressionDataVectorDaoImpl.java index 943ecde17b..0890641df9 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/bioAssayData/ProcessedExpressionDataVectorDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/bioAssayData/ProcessedExpressionDataVectorDaoImpl.java @@ -1033,8 +1033,8 @@ private Map> getProcessedVectors } /** - * @param limit if non-null and positive, you will get a random set of vectors for the experiment * @param ee ee + * @param limit if >0, you will get a "random" set of vectors for the experiment * @return processed data vectors */ private Collection getProcessedVectors( ExpressionExperiment ee, int limit ) { @@ -1045,7 +1045,7 @@ private Collection getProcessedVectors( Expressio StopWatch timer = new StopWatch(); timer.start(); - List result; + Collection result = new HashSet<>(); Integer availableVectorCount = ee.getNumberOfDataVectors(); if ( availableVectorCount == null || availableVectorCount == 0 ) { @@ -1053,26 +1053,39 @@ private Collection getProcessedVectors( Expressio // cannot fix this here, because we're read-only. } + /* + * To help ensure we get a good random set of items, we can do several queries with different random offsets. + */ + // int numSegments = 2; + // int segmentSize = ( int ) Math.ceil( limit / numSegments ); + int segmentSize = limit; +// if ( limit < numSegments ) { +// segmentSize = limit; +// } + Query q = this.getSessionFactory().getCurrentSession() .createQuery( " from ProcessedExpressionDataVector dedv " - + "where dedv.expressionExperiment = :ee" ); + + "where dedv.expressionExperiment = :ee and dedv.rankByMean > 0.5 order by RAND()" ); // order by rand() works? q.setParameter( "ee", ee ); - q.setMaxResults( limit ); - if ( availableVectorCount != null && availableVectorCount > limit ) { - q.setFirstResult( new Random().nextInt( availableVectorCount - limit ) ); - } + q.setMaxResults( segmentSize ); - // we should already be read-only, so this is probably pointless. - q.setReadOnly( true ); + int k = 0; + while ( result.size() < limit ) { + // int firstResult = new Random().nextInt( availableVectorCount - segmentSize ); + // q.setFirstResult( firstResult ); + List list = q.list(); + // log.info( list.size() + " retrieved this time firstResult=" + 0 ); + result.addAll( list ); + k++; + } - // and so this probably doesn't do anything useful. - q.setFlushMode( FlushMode.MANUAL ); + if ( result.size() > limit ) { + result = result.stream().limit( limit ).collect( Collectors.toSet() ); + } - //noinspection unchecked - result = q.list(); if ( timer.getTime() > 1000 ) AbstractDao.log - .info( "Fetch " + limit + " vectors from " + ee.getShortName() + ": " + timer.getTime() + "ms" ); + .info( "Fetch " + result.size() + " vectors from " + ee.getShortName() + ": " + timer.getTime() + "ms, " + k + " queries were run." ); if ( result.isEmpty() ) { AbstractDao.log.warn( "Experiment does not have any processed data vectors" ); diff --git a/gemma-web/src/main/java/ubic/gemma/web/controller/expression/experiment/DEDVController.java b/gemma-web/src/main/java/ubic/gemma/web/controller/expression/experiment/DEDVController.java index ccf62ba1cc..2d0b57bf86 100644 --- a/gemma-web/src/main/java/ubic/gemma/web/controller/expression/experiment/DEDVController.java +++ b/gemma-web/src/main/java/ubic/gemma/web/controller/expression/experiment/DEDVController.java @@ -479,9 +479,11 @@ public VisualizationValueObject[] getDEDVForVisualization( Collection eeId Collection dedvs; if ( geneIds == null || geneIds.isEmpty() ) { - dedvs = processedExpressionDataVectorService.getProcessedDataArrays( ees.iterator().next(), SAMPLE_SIZE ); + dedvs = processedExpressionDataVectorService.getProcessedDataArrays( ees.iterator().next(), SAMPLE_SIZE ); + if ( dedvs.size() > SAMPLE_SIZE ) { + dedvs = new ArrayList<>( dedvs ).subList( 0, SAMPLE_SIZE ); + } } else { - if ( geneIds.size() > MAX_RESULTS_TO_RETURN ) { log.warn( geneIds.size() + " genes for visualization. Too many. Only using first " + MAX_RESULTS_TO_RETURN + " genes. " ); @@ -512,9 +514,9 @@ public VisualizationValueObject[] getDEDVForVisualization( Collection eeId time = watch.getTime(); watch.reset(); watch.start(); - if ( time > 100 ) { + if ( time > 500 ) { log.info( "Ran sortVectorDataByDesign on " + dedvs.size() + " DEDVs for " + eeIds.size() + " EEs" + " in " - + time + " ms (times <100ms not reported)." ); + + time + " ms (times <500ms not reported)." ); } watch.stop(); @@ -1235,7 +1237,7 @@ private VisualizationValueObject[] makeVisCollection( Collection 1000 ) { - log.info( "Created vis value objects in: " + time ); + log.info( "Created " + result.length + " vis value objects in: " + time ); } return result; From 40925a8f095ebc9af25f6b1050ae1f3802db7e76 Mon Sep 17 00:00:00 2001 From: Paul Pavlidis Date: Wed, 20 Mar 2024 12:13:06 -0700 Subject: [PATCH 027/105] cleanup --- .../model/expression/experiment/Geeq.java | 40 ------------------- 1 file changed, 40 deletions(-) diff --git a/gemma-core/src/main/java/ubic/gemma/model/expression/experiment/Geeq.java b/gemma-core/src/main/java/ubic/gemma/model/expression/experiment/Geeq.java index 7c9768baa0..8cd024a0ff 100644 --- a/gemma-core/src/main/java/ubic/gemma/model/expression/experiment/Geeq.java +++ b/gemma-core/src/main/java/ubic/gemma/model/expression/experiment/Geeq.java @@ -38,14 +38,6 @@ public class Geeq implements Identifiable, Serializable { private static final long serialVersionUID = 4783171234360698630L; private Long id; - - /* - * FIXME: ideally we would get rid of these direct associations as these are events in the experiments audit trail. - */ - // private AuditEvent lastRun; -// private AuditEvent lastManualOverride; -// private AuditEvent lastBatchEffectChange; -// private AuditEvent lastBatchConfoundChange; private double detectedQualityScore; private double manualQualityScore; @@ -497,38 +489,6 @@ public void setqScoreSampleCorrelationVariance( double qScoreSampleCorrelationVa this.qScoreSampleCorrelationVariance = qScoreSampleCorrelationVariance; } -// public AuditEvent getLastRun() { -// return lastRun; -// } -// -// public void setLastRun( AuditEvent lastRun ) { -// this.lastRun = lastRun; -// } - -// public AuditEvent getLastManualOverride() { -// return lastManualOverride; -// } -// -// public void setLastManualOverride( AuditEvent lastManualOverride ) { -// this.lastManualOverride = lastManualOverride; -// } -// -// public AuditEvent getLastBatchEffectChange() { -// return lastBatchEffectChange; -// } -// -// public void setLastBatchEffectChange( AuditEvent lastBatchEffectChange ) { -// this.lastBatchEffectChange = lastBatchEffectChange; -// } - -// public AuditEvent getLastBatchConfoundChange() { -// return lastBatchConfoundChange; -// } -// -// public void setLastBatchConfoundChange( AuditEvent lastBatchConfoundChange ) { -// this.lastBatchConfoundChange = lastBatchConfoundChange; -// } - public boolean isNoVectors() { return noVectors; } From 139453ad1e35720ff279c5f7f1d601cd74e13bf0 Mon Sep 17 00:00:00 2001 From: Paul Pavlidis Date: Wed, 20 Mar 2024 12:14:09 -0700 Subject: [PATCH 028/105] address bug caused by geeq elements being serialized with a capital Q ... --- .../experiment/ExpressionExperimentTools.js | 2650 +++++++++-------- 1 file changed, 1330 insertions(+), 1320 deletions(-) diff --git a/gemma-web/src/main/webapp/scripts/api/entities/experiment/ExpressionExperimentTools.js b/gemma-web/src/main/webapp/scripts/api/entities/experiment/ExpressionExperimentTools.js index f26c4e8252..893bd2e7d9 100755 --- a/gemma-web/src/main/webapp/scripts/api/entities/experiment/ExpressionExperimentTools.js +++ b/gemma-web/src/main/webapp/scripts/api/entities/experiment/ExpressionExperimentTools.js @@ -1,4 +1,4 @@ -Ext.namespace('Gemma'); +Ext.namespace( 'Gemma' ); Ext.BLANK_IMAGE_URL = ctxBasePath + '/images/default/s.gif'; /** @@ -11,938 +11,938 @@ Ext.BLANK_IMAGE_URL = ctxBasePath + '/images/default/s.gif'; * @extends Gemma.CurationTools * */ -Gemma.ExpressionExperimentTools = Ext.extend(Gemma.CurationTools, { - - allowScoreOverride: false, - experimentDetails: null, - tbar: new Ext.Toolbar(), - bconfFolded: true, - beffFolded: true, - qualFolded: true, - suitFolded: true, - - /** - * @memberOf Gemma.ExpressionExperimentTools - */ - initComponent: function () { - this.curatable = this.experimentDetails; - this.auditable = { - id: this.experimentDetails.id, - classDelegatingFor: "ubic.gemma.model.expression.experiment.ExpressionExperiment" - }; - Gemma.ExpressionExperimentTools.superclass.initComponent.call(this); - var manager = new Gemma.EEManager({ - editable: this.editable - }); - manager.on('reportUpdated', function () { - this.fireEvent('reloadNeeded'); - }, this); - - var self = this; - - var eeRow = new Ext.Panel({ - cls: 'ee-tool-row', - defaults: { - width: '100%', - border: false, - padding: 2 - } - }); - - eeRow.add({ - html: '
' - }); - - var refreshButton = new Ext.Button({ - text: '', - cls: 'btn-refresh nobreak', - tooltip: 'Refresh preprocessing statistics', - handler: function () { - manager.updateEEReport(this.experimentDetails.id); - }, - scope: this - }); - - var leftPanel = new Ext.Panel({ - cls: 'ee-tool-left', - defaults: { - border: false, - padding: 2 - } - }); +Gemma.ExpressionExperimentTools = Ext.extend( Gemma.CurationTools, { + + allowScoreOverride : false, + experimentDetails : null, + tbar : new Ext.Toolbar(), + bconfFolded : true, + beffFolded : true, + qualFolded : true, + suitFolded : true, + + /** + * @memberOf Gemma.ExpressionExperimentTools + */ + initComponent : function() { + this.curatable = this.experimentDetails; + this.auditable = { + id : this.experimentDetails.id, + classDelegatingFor : "ubic.gemma.model.expression.experiment.ExpressionExperiment" + }; + Gemma.ExpressionExperimentTools.superclass.initComponent.call( this ); + var manager = new Gemma.EEManager( { + editable : this.editable + } ); + manager.on( 'reportUpdated', function() { + this.fireEvent( 'reloadNeeded' ); + }, this ); + + var self = this; + + var eeRow = new Ext.Panel( { + cls : 'ee-tool-row', + defaults : { + width : '100%', + border : false, + padding : 2 + } + } ); + + eeRow.add( { + html : '
' + } ); + + var refreshButton = new Ext.Button( { + text : '', + cls : 'btn-refresh nobreak', + tooltip : 'Refresh preprocessing statistics', + handler : function() { + manager.updateEEReport( this.experimentDetails.id ); + }, + scope : this + } ); + + var leftPanel = new Ext.Panel( { + cls : 'ee-tool-left', + defaults : { + border : false, + padding : 2 + } + } ); + + leftPanel.add( {cls : 'nobreak', html : '

Preprocessing:

'} ); + leftPanel.add( refreshButton ); + + /* This does all preprocessing */ + leftPanel.add( this.processedVectorCreatePanelRenderer( this.experimentDetails, manager ) ); + + /* This is no longer needed as a separate step */ + // leftPanel.add(this.missingValueAnalysisPanelRenderer(this.experimentDetails, manager)); + + leftPanel.add( this.diagnosticsPanelRenderer( this.experimentDetails, manager ) ); + leftPanel.add( this.batchPanelRenderer( this.experimentDetails, manager ) ); + + // var batchInfoMissingPanel = this.batchInfoMissingRenderer(this.experimentDetails, manager); + var batchConfoundPanel = this.batchConfoundRenderer( this.experimentDetails, manager ); + var batchEffectPanel = this.batchEffectRenderer( this.experimentDetails, manager ); + if ( batchConfoundPanel !== null || batchEffectPanel !== null /*|| batchInfoMissingPanel !== null*/ ) { + leftPanel.add( {html : "

Batch info quality:

"} ); + // if (batchInfoMissingPanel !== null) leftPanel.add(batchInfoMissingPanel); + if ( batchConfoundPanel !== null ) leftPanel.add( batchConfoundPanel ); + if ( batchEffectPanel !== null ) leftPanel.add( batchEffectPanel ); + } + + leftPanel.add( {html : "

Analyses:

"} ); + leftPanel.add( this.differentialAnalysisPanelRenderer( this.experimentDetails, manager ) ); - leftPanel.add({cls: 'nobreak', html: '

Preprocessing:

'}); - leftPanel.add(refreshButton); - - /* This does all preprocessing */ - leftPanel.add(this.processedVectorCreatePanelRenderer(this.experimentDetails, manager)); - - /* This is no longer needed as a separate step */ - // leftPanel.add(this.missingValueAnalysisPanelRenderer(this.experimentDetails, manager)); - - leftPanel.add(this.diagnosticsPanelRenderer(this.experimentDetails, manager)); - leftPanel.add(this.batchPanelRenderer(this.experimentDetails, manager)); - - // var batchInfoMissingPanel = this.batchInfoMissingRenderer(this.experimentDetails, manager); - var batchConfoundPanel = this.batchConfoundRenderer(this.experimentDetails, manager); - var batchEffectPanel = this.batchEffectRenderer(this.experimentDetails, manager); - if (batchConfoundPanel !== null || batchEffectPanel !== null /*|| batchInfoMissingPanel !== null*/) { - leftPanel.add({html: "

Batch info quality:

"}); - // if (batchInfoMissingPanel !== null) leftPanel.add(batchInfoMissingPanel); - if (batchConfoundPanel !== null) leftPanel.add(batchConfoundPanel); - if (batchEffectPanel !== null) leftPanel.add(batchEffectPanel); - } - - leftPanel.add({html: "

Analyses:

"}); - leftPanel.add(this.differentialAnalysisPanelRenderer(this.experimentDetails, manager)); - // leftPanel.add(this.linkAnalysisPanelRenderer(this.experimentDetails, manager)); - eeRow.add(leftPanel); - - var rightPanel = new Ext.Panel({ - cls: 'ee-tool-right', - defaults: { - border: false, - padding: 2 - } - }); - - if (this.experimentDetails.geeq) { - if (this.experimentDetails.geeq.otherIssues && this.experimentDetails.geeq.otherIssues.trim()) { - rightPanel.add({ - html: - "
" + - "" + - "" + - "

There were some issues while scoring this experiment:

" + - "
" + this.experimentDetails.geeq.otherIssues + "
" + - "
" + - "
" + - "
" - }) - } - rightPanel.add(this.qualityRenderer(this.experimentDetails, manager)); - rightPanel.add(this.suitabilityRenderer(this.experimentDetails, manager)); - } else { - rightPanel.add({ - html: - '

Quality / Suitability

' + - '
Quality and Suitability not calculated for this experiment
' - }) - } - - var gqRecalcButton = new Ext.Button({ - text: "Recalculate score and refresh page (takes a minute)", - tooltip: + eeRow.add( leftPanel ); + + var rightPanel = new Ext.Panel( { + cls : 'ee-tool-right', + defaults : { + border : false, + padding : 2 + } + } ); + + if ( this.experimentDetails.geeq ) { + if ( this.experimentDetails.geeq.otherIssues && this.experimentDetails.geeq.otherIssues.trim() ) { + rightPanel.add( { + html : + "
" + + "" + + "" + + "

There were some issues while scoring this experiment:

" + + "
" + this.experimentDetails.geeq.otherIssues + "
" + + "
" + + "
" + + "
" + } ) + } + rightPanel.add( this.qualityRenderer( this.experimentDetails, manager ) ); + rightPanel.add( this.suitabilityRenderer( this.experimentDetails, manager ) ); + } else { + rightPanel.add( { + html : + '

Quality / Suitability

' + + '
Quality and Suitability not calculated for this experiment
' + } ) + } + + var gqRecalcButton = new Ext.Button( { + text : "Recalculate score and refresh page (takes a minute)", + tooltip : 'Runs full scoring. This usually takes around 1 minute to complete, but can take up to several minutes for large experiments.\n' + 'Page will refresh after this task has been finished', - cls: 'gq-btn btn-refresh gq-btn-recalc-all', - handler: function (b, e) { - b.setText("Recalculate score and refresh page (takes a minute)"); - b.setDisabled(true); - ExpressionExperimentController.runGeeq(self.experimentDetails.id, "all", { - callback: function () { - window.location.reload(); - } - }); - }, - scope: this - }); - - var recalcButtonWrap = new Ext.Panel({ - cls: 'extjs-sucks', - defaults: { - border: false, - padding: 0 - } - }); - - recalcButtonWrap.add(gqRecalcButton); - rightPanel.add(recalcButtonWrap); - - eeRow.add(rightPanel); - - this.add(eeRow); - }, - - suitabilityRenderer: function (ee, mgr) { - var panel = new Ext.Panel({ - defaults: { - border: false, - padding: 0 - }, - items: [{ - html: '

Suitability

' - }] - }); - - var sHead = new Ext.Panel({ - cls: 'gq-head', - defaults: { - border: false, - padding: 0 - } - }); - - var suitExtra = this.suitExtraRendeder(ee); - sHead.add(this.geeqRowRenderer("Public suitability score", ee.geeq.publicSuitabilityScore, - "This is the suitability score that is currently publicly displayed.", "", 2, null, suitExtra, true)); - if (this.allowScoreOverride) sHead.add(suitExtra); - this.allowSuitInput(ee.geeq.manualSuitabilityOverride); - - - panel.add(sHead); - - var sBody = new Ext.Panel({ - cls: 'gq-body', - defaults: { - border: false, - padding: 0 - } - }); - - var detailsButtonWrap = new Ext.Panel({ - cls: 'extjs-sucks', - defaults: { - border: false, - padding: 0 - } - }); - - var detailsButton = this.detailsButtonRenderer(sBody); - - detailsButtonWrap.add(detailsButton); - panel.add(detailsButtonWrap); - - var sPubDesc = - Number(ee.geeq.sScorePublication) === -1 ? "Experiment has no publication, try filling it in." : - "Experiment does have a publication filled in properly."; - - var sPlatfAmntDesc = - Number(ee.geeq.sScorePlatformAmount) === -1 ? "Experiment is on more than 2 platforms. Consider splitting the experiment." : - Number(ee.geeq.sScorePlatformAmount) === -0.5 ? "Experiment has 2 platforms. Consider splitting the experiment." : - "Experiment is on a single platform."; - - var sPlatfTechDesc = - Number(ee.geeq.sScorePlatformsTechMulti) === -1 ? "Experiment has two or more platforms that use different technologies. Experiment should be split." : "" + - "All used platforms use the same technology."; - - var sPlatfPopDesc = - Number(ee.geeq.sScoreAvgPlatformPopularity) === -1 ? "Platform(s) used (on average) by less than 10 experiments." : - Number(ee.geeq.sScoreAvgPlatformPopularity) === -0.5 ? "Platform(s) used (on average) by less than 20 experiments." : - Number(ee.geeq.sScoreAvgPlatformPopularity) === 0.0 ? "Platform(s) used (on average) by less than 50 experiments." : - Number(ee.geeq.sScoreAvgPlatformPopularity) === 0.5 ? "Platform(s) used (on average) by less than 100 experiments." : - "Platform(s) used (on average) by at least 100 experiments."; - - var sPlatfSizeDesc = - Number(ee.geeq.sScoreAvgPlatformSize) === -1 ? "Platform has (or all platforms have on average) very low gene covrage." : - Number(ee.geeq.sScoreAvgPlatformSize) === -0.5 ? "Platform has (or all platforms have on average) low gene coverage." : - Number(ee.geeq.sScoreAvgPlatformSize) === 0.0 ? "Platform has (or all platforms have on average) moderate gene coverage." : - Number(ee.geeq.sScoreAvgPlatformSize) === 0.5 ? "Platform has (or all platforms have on average) good gene coverage." : - "Platform has (or all paltforms have on average) excellent gene coverage."; - - var sSizeDesc = - Number(ee.geeq.sScoreSampleSize) === -1 ? "The experiment has less than 6 samples or more than 500 samples" : - Number(ee.geeq.sScoreSampleSize) === -0.3 ? "The experiment has less than 10 samples." : - Number(ee.geeq.sScoreSampleSize) === 0.0 ? "The experiment has less than 20 samples." : "The experiment has at least 20 samples."; - - var sRawDesc = - Number(ee.geeq.sScoreRawData) === -1 ? "Experiment has no raw data available (data are from external source). Try obtaining the raw data." - : "We do have raw data available for this experiment."; - - var sMissErr = - ee.geeq.noVectors === true ? "Experiment has no computed vectors, run the vector computation!" : ""; - var sMissDesc = - ee.geeq.noVectors === true ? "There are no computed vectors." : - Number(ee.geeq.sScoreMissingValues) === -1 ? "Experiment has missing values. Try filling them in, ideally by obtaining raw data." : - "There are no missing values."; - - sBody.add(this.geeqRowRenderer('Publication', ee.geeq.sScorePublication, - "Checks whether the experiment has a publication.", sPubDesc)); - - sBody.add(this.geeqRowRenderer('Platforms used', ee.geeq.sScorePlatformAmount, - "The amount of platforms the experiment uses.", sPlatfAmntDesc)); - - sBody.add(this.geeqRowRenderer('Platforms tech consistency', ee.geeq.sScorePlatformsTechMulti, - "Punishes technology inconsistency of multi-platform experiments.", sPlatfTechDesc)); - - sBody.add(this.geeqRowRenderer('Platforms usage', ee.geeq.sScoreAvgPlatformPopularity, - "Depends on the popularity (experiments that use the platform) of the used platform. If there are multiple platforms," + - "the popularity is averaged.", sPlatfPopDesc)); - - sBody.add(this.geeqRowRenderer('Platforms size', ee.geeq.sScoreAvgPlatformSize, - "Depends on the size (the number of elements) of the used platform. If there are multiple platforms, the" + - "size is averaged.", sPlatfSizeDesc)); - - sBody.add(this.geeqRowRenderer('Sample size', ee.geeq.sScoreSampleSize, - "Depends on the experiments size (number of samples).", sSizeDesc)); - - sBody.add(this.geeqRowRenderer('Raw data state', ee.geeq.sScoreRawData, - "Checks whether there was raw data available for this experiment.", sRawDesc)); - - sBody.add(this.geeqRowRenderer('Missing values', ee.geeq.sScoreMissingValues, - "Checks whether the experiment has any missing values.", sMissDesc, 1, sMissErr)); - - panel.add(sBody); - if (!sMissErr) { - sBody.hide(); - } - return panel; - }, - - qualityRenderer: function (ee, mgr) { - var panel = new Ext.Panel({ - defaults: { - border: false, - padding: 0 - }, - items: [{ - html: '

Quality

' - }] - }); - - var qHead = new Ext.Panel({ - cls: 'gq-head', - defaults: { - border: false, - padding: 0 - } - }); - - var qualExtra = this.qualExtraRendeder(ee); - qHead.add(this.geeqRowRenderer("Public quality score", ee.geeq.publicQualityScore, - "This is the quality score that is currently publicly displayed.", "", 2, null, qualExtra, true)); - if (this.allowScoreOverride) qHead.add(qualExtra); - this.allowQualInput(ee.geeq.manualQualityOverride); - - panel.add(qHead); - - var qBody = new Ext.Panel({ - cls: 'gq-body', - defaults: { - border: false, - padding: 0 - } - }); - - var detailsButtonWrap = new Ext.Panel({ - cls: 'extjs-sucks', - defaults: { - border: false, - padding: 0 - } - }); - - var detailsButton = this.detailsButtonRenderer(qBody); - - detailsButtonWrap.add(detailsButton); - panel.add(detailsButtonWrap); - - var qOutlErr = - Number(ee.geeq.corrMatIssues) === 1 ? "The correlation matrix is empty!" : - Number(ee.geeq.corrMatIssues) === 2 ? "There are NaN values in the correlation matrix." : - ""; - - var qOutlierDesc = - Number(ee.geeq.qScoreOutliers) === -1 ? "There are detected, non-removed outliers. Removing detected outliers will improve the score." : - "No outliers were detected."; - - var qPlatfTechMultiDesc = - Number(ee.geeq.qScorePlatformsTech) === -1 ? "The experiment is on a two-color platform." : "" + - "The experiment is NOT on a two-color platform."; - - var qReplErr = - Number(ee.geeq.replicatesIssues) === 1 ? "There is no experimental design for this experiment" : - Number(ee.geeq.replicatesIssues) === 2 ? "There are no factor values" : - Number(ee.geeq.replicatesIssues) === 3 ? "All factor-value combinations have no replicates." : - Number(ee.geeq.replicatesIssues) === 4 ? "The lowest replicate amount was 0 - this should be impossible, please report" : - ""; - - // These thresholds are defined - var qReplDesc = - Number(ee.geeq.qScoreReplicates) === -1 ? "There is a factor-value combination that has very few or no replicates." : - Number(ee.geeq.qScoreReplicates) === 0.0 ? "There is a factor-value combination that has moderately few replicates. " : - "All factor-value combinations have a good number of replicates"; - - var qBatchInfoDesc = - Number(ee.geeq.qScoreBatchInfo) === -1 ? "The experiment has no batch info. Try filling it in." : "" + - "Batch information provided."; - - var qBatchEffErr = - Number(ee.geeq.qScoreBatchInfo) === -1 ? "There is no batch information" : - Number(ee.geeq.qScoreBatchEffect) === 0.0 && Number(ee.geeq.qScoreBatchConfound) < 1 ? "Batch confound detected, batch effect detection skipped." : - ee.geeq.batchCorrected === true ? "Data was batch-corrected." : ""; - - var qBatchEffDesc = - ee.geeq.manualBatchEffectActive === true ? "Manually set value, detected score was: " + ee.geeq.qScoreBatchEffect : - Number(ee.geeq.qScoreBatchInfo) === -1 ? "There were problems when checking for batch effect." : - Number(ee.geeq.qScoreBatchEffect) === -1 ? "Experiment has a strong batch effect: the batch p-value is less than 0.0001. Try to batch-correct." : - Number(ee.geeq.qScoreBatchEffect) === 0.0 && Number(ee.geeq.qScoreBatchConfound) < 1 ? "Batch effect score defaults to 0 when data is confounded with batches." : - Number(ee.geeq.qScoreBatchEffect) === 0.0 ? "The experiment has some batch effect: the batch p-value is within [0.1, 0.0001]. Try to batch-correct." : - "The experiment has no or very weak batch effect: the batch p-value is more than 0.1."; - - var qBatchConfErr = - Number(ee.geeq.qScoreBatchInfo) === -1 ? "There is no batch information" : - ""; - - var qBatchConfDesc = - ee.geeq.manualBatchConfoundActive === true ? "Manually set value, detected score was: " + ee.geeq.qScoreBatchConfound : - Number(ee.geeq.qScoreBatchConfound) === -1 ? "Batch confound has been detected." : - Number(ee.geeq.qScoreBatchConfound) === 0.0 ? "There were problems when checking for batch confound." : - "The experiment does not seem to be confounded with the batches."; - - var bconfExtra = this.bconfExtraRendeder(ee); - var beffExtra = this.beffExtraRendeder(ee); - - this.allowBconfRadios(ee.geeq.manualBatchConfoundActive); - this.allowBeffRadios(ee.geeq.manualBatchEffectActive); - - qBody.add(this.geeqRowRenderer('Mean sample corr.', ee.geeq.qScoreSampleMeanCorrelation, - "[Not included in final score] The actual mean correlation of samples.", "Not included in final score", 4, qOutlErr)); - - qBody.add(this.geeqRowRenderer('Sample corr. variance', ee.geeq.qScoreSampleCorrelationVariance, - "[Not included in final score] The actual variance of sample correlation.", "Not included in final score", 4, qOutlErr)); - - qBody.add(this.geeqRowRenderer('Median sample corr.', ee.geeq.qScoreSampleMedianCorrelation, - "The actual median correlation of samples.", "Included in the final score. Can be somewhat improved by removing outliers.", 4, qOutlErr)); - - qBody.add(this.geeqRowRenderer('Outliers', ee.geeq.qScoreOutliers, - "Depends on the presence of detected (non-removed) outliers. If there are any outliers, the score will be low.", qOutlierDesc, 1, qOutlErr)); - - qBody.add(this.geeqRowRenderer('Platform technology', ee.geeq.qScorePlatformsTech, - "Checks whether the experiments platform (any one, if there are multiple) is two-color.", qPlatfTechMultiDesc)); - - qBody.add(this.geeqRowRenderer('Replicates', ee.geeq.qScoreReplicates, - "Checks the replicate amount of all factor-value combinations, and takes the lowest one.", qReplDesc, 1, qReplErr)); - - qBody.add(this.geeqRowRenderer('Batch info', ee.geeq.qScoreBatchInfo, - "Checks whether the experiment has batch info available.", qBatchInfoDesc)); - - qBody.add(this.geeqRowRenderer('Batch confound', ee.geeq.qScorePublicBatchConfound, - "Checks whether the experimental data are confounded with batches. This value is the currently publicly displayed information.", - qBatchConfDesc, 1, qBatchConfErr, bconfExtra)); - qBody.add(bconfExtra); - - qBody.add(this.geeqRowRenderer('Batch effect', ee.geeq.qScorePublicBatchEffect, - "Checks the experimental data for a batch effect. This value is the currently publicly displayed information.", - qBatchEffDesc, 1, qBatchEffErr, beffExtra)); - qBody.add(beffExtra); - - panel.add(qBody); - if (!qReplErr && !qOutlErr && !qBatchConfErr) { - qBody.hide(); - } - return panel; - }, - - detailsButtonRenderer: function (panel) { - return new Ext.Button({ - text: ' Show score breakdown and details', - cls: 'gq-btn gq-btn-details', - handler: function () { - this.showPanel(panel, !panel.isVisible()) - }, - scope: this - }); - }, - - bconfExtraRendeder: function (ee) { - - this.bconfFolded = !ee.geeq.manualBatchConfoundActive; - - var bconfExtra = new Ext.Panel({ - cls: 'gq-extra' + (this.bconfFolded ? ' folded' : ''), - defaults: { - border: false, - padding: 0 - } - }); - - var self = this; - var foldButton = new Ext.Button({ - text: '', - cls: 'gq-btn', - handler: function () { - this.foldPanel(bconfExtra, self.bconfFolded = !self.bconfFolded); - }, - scope: this - }); - - bconfExtra.add(foldButton); - - bconfExtra.add(new Ext.Button({ - text: 'Re-score batch info', - tooltip: 'Run geeq only for the batch info related scores (refreshes page).', - handler: function (b, e) { - b.setText("Re-score batch info"); - b.setDisabled(true); - ExpressionExperimentController.runGeeq(self.experimentDetails.id, "batch", { - callback: function () { - window.location.reload(); - } - }); - }, - scope: this, - cls: 'btn-refresh gq-subscore-refresh-btn' - })); - - bconfExtra.add(new Ext.form.Checkbox({ - xtype: 'checkbox', - id: 'gq-bconf-override', - boxLabel: 'Override:', - hideLabel: false, - checked: ee.geeq.manualBatchConfoundActive, - handler: function (el, value) { - self.allowBconfRadios(value); - ee.geeq.manualBatchConfoundActive = value; - document.getElementById('bconf-notification').removeAttribute("hidden"); - ExpressionExperimentController.setGeeqManualSettings(ee.id, ee.geeq, { - callback: self.bconfNotifySaved - }); - } - })); - - bconfExtra.add(new Ext.form.Radio({ - xtype: 'radio', - id: 'gq-bconf-override-value-true', - name: 'gq-bconf-override-value', - boxLabel: 'Confounded', - hideLabel: false, - checked: ee.geeq.manualHasBatchConfound, - handler: function (el, value) { - ee.geeq.manualHasBatchConfound = value; - document.getElementById('bconf-notification').removeAttribute("hidden"); - ExpressionExperimentController.setGeeqManualSettings(ee.id, ee.geeq, { - callback: self.bconfNotifySaved - }); - } - })); - - bconfExtra.add(new Ext.form.Radio({ - xtype: 'radio', - id: 'gq-bconf-override-value-false', - name: 'gq-bconf-override-value', - boxLabel: 'Not confounded', - hideLabel: false, - checked: !ee.geeq.manualHasBatchConfound - })); - - bconfExtra.add({cls: 'gq-notif hidden', html: ''}); - - return bconfExtra; - }, - - bconfNotifySaved: function () { - var nr = document.getElementById('bconf-notification'); - if (nr) { - nr.setAttribute("hidden", "true"); - } - }, - - beffExtraRendeder: function (ee) { - - this.beffFolded = !ee.geeq.manualBatchEffectActive; - - var beffExtra = new Ext.Panel({ - cls: 'gq-extra' + (this.beffFolded ? ' folded' : ''), - defaults: { - border: false, - padding: 0 - } - }); - - var self = this; - var foldButton = new Ext.Button({ - text: '', - cls: 'gq-btn', - handler: function () { - this.foldPanel(beffExtra, self.beffFolded = !self.beffFolded); - }, - scope: this - }); - - beffExtra.add(foldButton); - - beffExtra.add(new Ext.Button({ - text: 'Re-score batch info', - tooltip: 'Run geeq only for the batch info related scores (refreshes page).', - handler: function (b, e) { - b.setText("Re-score batch info"); - b.setDisabled(true); - ExpressionExperimentController.runGeeq(self.experimentDetails.id, "batch", { - callback: function () { - window.location.reload(); - } - }); - }, - scope: this, - cls: 'btn-refresh gq-subscore-refresh-btn' - })); - - beffExtra.add(new Ext.form.Checkbox({ - xtype: 'checkbox', - id: 'gq-beff-override', - boxLabel: 'Override:', - hideLabel: false, - checked: ee.geeq.manualBatchEffectActive, - handler: function (el, value) { - self.allowBeffRadios(value); - self.experimentDetails.geeq.manualBatchEffectActive = value; - document.getElementById('beff-notification').removeAttribute("hidden"); - ExpressionExperimentController.setGeeqManualSettings(self.experimentDetails.id, self.experimentDetails.geeq, { - callback: self.beffNotifySaved - }); - } - })); - - beffExtra.add(new Ext.form.Radio({ - xtype: 'radio', - id: 'gq-beff-override-value-strong', - name: 'gq-beff-override-value', - boxLabel: 'Strong', - hideLabel: false, - checked: ee.geeq.manualHasStrongBatchEffect, - handler: function (el, value) { - if (!value) return; // since we have 3 radios, we wil only process the one that was selected - ee.geeq.manualHasStrongBatchEffect = value; - ee.geeq.manualHasNoBatchEffect = !value; - document.getElementById('beff-notification').removeAttribute("hidden"); - ExpressionExperimentController.setGeeqManualSettings(ee.id, ee.geeq, { - callback: self.beffNotifySaved - }); - } - })); - - beffExtra.add(new Ext.form.Radio({ - xtype: 'radio', - id: 'gq-beff-override-value-weak', - name: 'gq-beff-override-value', - boxLabel: 'Weak', - hideLabel: false, - checked: !ee.geeq.manualHasStrongBatchEffect && !ee.geeq.manualHasNoBatchEffect, - handler: function (el, value) { - if (!value) return; // since we have 3 radios, we wil only process the one that was selected - ee.geeq.manualHasStrongBatchEffect = !value; - ee.geeq.manualHasNoBatchEffect = !value; - document.getElementById('beff-notification').removeAttribute("hidden"); - ExpressionExperimentController.setGeeqManualSettings(ee.id, ee.geeq, { - callback: self.beffNotifySaved - }); - } - })); - - beffExtra.add(new Ext.form.Radio({ - xtype: 'radio', - id: 'gq-beff-override-value-none', - name: 'gq-beff-override-value', - boxLabel: 'No batch effect', - hideLabel: false, - checked: ee.geeq.manualHasNoBatchEffect, - handler: function (el, value) { - if (!value) return; // since we have 3 radios, we wil only process the one that was selected - ee.geeq.manualHasStrongBatchEffect = !value; - ee.geeq.manualHasNoBatchEffect = value; - document.getElementById('beff-notification').removeAttribute("hidden"); - ExpressionExperimentController.setGeeqManualSettings(ee.id, ee.geeq, { - callback: self.beffNotifySaved - }); - } - })); - - beffExtra.add({cls: 'gq-notif hidden', html: ''}); - - return beffExtra; - }, - - beffNotifySaved: function () { - var nr = document.getElementById('beff-notification'); - if (nr) { - nr.setAttribute("hidden", "true"); - } - }, - - qualExtraRendeder: function (ee) { - - this.qualFolded = !ee.geeq.manualQualityOverride; - - var qualExtra = new Ext.Panel({ - cls: 'gq-extra' + (this.qualFolded ? ' folded' : ''), - defaults: { - border: false, - padding: 0 - } - }); - - var self = this; - var foldButton = new Ext.Button({ - text: '', - cls: 'gq-btn', - handler: function () { - this.foldPanel(qualExtra, self.qualFolded = !self.qualFolded); + cls : 'gq-btn btn-refresh gq-btn-recalc-all', + handler : function( b, e ) { + b.setText( "Recalculate score and refresh page (takes a minute)" ); + b.setDisabled( true ); + ExpressionExperimentController.runGeeq( self.experimentDetails.id, "all", { + callback : function() { + window.location.reload(); + } + } ); + }, + scope : this + } ); + + var recalcButtonWrap = new Ext.Panel( { + cls : 'extjs-sucks', + defaults : { + border : false, + padding : 0 + } + } ); + + recalcButtonWrap.add( gqRecalcButton ); + rightPanel.add( recalcButtonWrap ); + + eeRow.add( rightPanel ); + + this.add( eeRow ); + }, + + suitabilityRenderer : function( ee, mgr ) { + var panel = new Ext.Panel( { + defaults : { + border : false, + padding : 0 + }, + items : [ { + html : '

Suitability

' + } ] + } ); + + var sHead = new Ext.Panel( { + cls : 'gq-head', + defaults : { + border : false, + padding : 0 + } + } ); + + var suitExtra = this.suitExtraRendeder( ee ); + sHead.add( this.geeqRowRenderer( "Public suitability score", ee.geeq.publicSuitabilityScore, + "This is the suitability score that is currently publicly displayed.", "", 2, null, suitExtra, true ) ); + if ( this.allowScoreOverride ) sHead.add( suitExtra ); + this.allowSuitInput( ee.geeq.manualSuitabilityOverride ); + + + panel.add( sHead ); + + var sBody = new Ext.Panel( { + cls : 'gq-body', + defaults : { + border : false, + padding : 0 + } + } ); + + var detailsButtonWrap = new Ext.Panel( { + cls : 'extjs-sucks', + defaults : { + border : false, + padding : 0 + } + } ); + + var detailsButton = this.detailsButtonRenderer( sBody ); + + detailsButtonWrap.add( detailsButton ); + panel.add( detailsButtonWrap ); + + var sPubDesc = + Number( ee.geeq.sScorePublication ) === -1 ? "Experiment has no publication, try filling it in." : + "Experiment does have a publication filled in properly."; + + var sPlatfAmntDesc = + Number( ee.geeq.sScorePlatformAmount ) === -1 ? "Experiment is on more than 2 platforms. Consider splitting the experiment." : + Number( ee.geeq.sScorePlatformAmount ) === -0.5 ? "Experiment has 2 platforms. Consider splitting the experiment." : + "Experiment is on a single platform."; + + var sPlatfTechDesc = + Number( ee.geeq.sScorePlatformsTechMulti ) === -1 ? "Experiment has two or more platforms that use different technologies. Experiment should be split." : "" + + "All used platforms use the same technology."; + + var sPlatfPopDesc = + Number( ee.geeq.sScoreAvgPlatformPopularity ) === -1 ? "Platform(s) used (on average) by less than 10 experiments." : + Number( ee.geeq.sScoreAvgPlatformPopularity ) === -0.5 ? "Platform(s) used (on average) by less than 20 experiments." : + Number( ee.geeq.sScoreAvgPlatformPopularity ) === 0.0 ? "Platform(s) used (on average) by less than 50 experiments." : + Number( ee.geeq.sScoreAvgPlatformPopularity ) === 0.5 ? "Platform(s) used (on average) by less than 100 experiments." : + "Platform(s) used (on average) by at least 100 experiments."; + + var sPlatfSizeDesc = + Number( ee.geeq.sScoreAvgPlatformSize ) === -1 ? "Platform has (or all platforms have on average) very low gene covrage." : + Number( ee.geeq.sScoreAvgPlatformSize ) === -0.5 ? "Platform has (or all platforms have on average) low gene coverage." : + Number( ee.geeq.sScoreAvgPlatformSize ) === 0.0 ? "Platform has (or all platforms have on average) moderate gene coverage." : + Number( ee.geeq.sScoreAvgPlatformSize ) === 0.5 ? "Platform has (or all platforms have on average) good gene coverage." : + "Platform has (or all paltforms have on average) excellent gene coverage."; + + var sSizeDesc = + Number( ee.geeq.sScoreSampleSize ) === -1 ? "The experiment has less than 6 samples or more than 500 samples" : + Number( ee.geeq.sScoreSampleSize ) === -0.3 ? "The experiment has less than 10 samples." : + Number( ee.geeq.sScoreSampleSize ) === 0.0 ? "The experiment has less than 20 samples." : "The experiment has at least 20 samples."; + + var sRawDesc = + Number( ee.geeq.sScoreRawData ) === -1 ? "Experiment has no raw data available (data are from external source). Try obtaining the raw data." + : "We do have raw data available for this experiment."; + + var sMissErr = + ee.geeq.noVectors === true ? "Experiment has no computed vectors, run the vector computation!" : ""; + var sMissDesc = + ee.geeq.noVectors === true ? "There are no computed vectors." : + Number( ee.geeq.sScoreMissingValues ) === -1 ? "Experiment has missing values. Try filling them in, ideally by obtaining raw data." : + "There are no missing values."; + + sBody.add( this.geeqRowRenderer( 'Publication', ee.geeq.sScorePublication, + "Checks whether the experiment has a publication.", sPubDesc ) ); + + sBody.add( this.geeqRowRenderer( 'Platforms used', ee.geeq.sScorePlatformAmount, + "The amount of platforms the experiment uses.", sPlatfAmntDesc ) ); + + sBody.add( this.geeqRowRenderer( 'Platforms tech consistency', ee.geeq.sScorePlatformsTechMulti, + "Punishes technology inconsistency of multi-platform experiments.", sPlatfTechDesc ) ); + + sBody.add( this.geeqRowRenderer( 'Platforms usage', ee.geeq.sScoreAvgPlatformPopularity, + "Depends on the popularity (experiments that use the platform) of the used platform. If there are multiple platforms," + + "the popularity is averaged.", sPlatfPopDesc ) ); + + sBody.add( this.geeqRowRenderer( 'Platforms size', ee.geeq.sScoreAvgPlatformSize, + "Depends on the size (the number of elements) of the used platform. If there are multiple platforms, the" + + "size is averaged.", sPlatfSizeDesc ) ); + + sBody.add( this.geeqRowRenderer( 'Sample size', ee.geeq.sScoreSampleSize, + "Depends on the experiments size (number of samples).", sSizeDesc ) ); + + sBody.add( this.geeqRowRenderer( 'Raw data state', ee.geeq.sScoreRawData, + "Checks whether there was raw data available for this experiment.", sRawDesc ) ); + + sBody.add( this.geeqRowRenderer( 'Missing values', ee.geeq.sScoreMissingValues, + "Checks whether the experiment has any missing values.", sMissDesc, 1, sMissErr ) ); + + panel.add( sBody ); + if ( !sMissErr ) { + sBody.hide(); + } + return panel; + }, + + qualityRenderer : function( ee, mgr ) { + var panel = new Ext.Panel( { + defaults : { + border : false, + padding : 0 + }, + items : [ { + html : '

Quality

' + } ] + } ); + + var qHead = new Ext.Panel( { + cls : 'gq-head', + defaults : { + border : false, + padding : 0 + } + } ); + + var qualExtra = this.qualExtraRendeder( ee ); + qHead.add( this.geeqRowRenderer( "Public quality score", ee.geeq.publicQualityScore, + "This is the quality score that is currently publicly displayed.", "", 2, null, qualExtra, true ) ); + if ( this.allowScoreOverride ) qHead.add( qualExtra ); + this.allowQualInput( ee.geeq.manualQualityOverride ); + + panel.add( qHead ); + + var qBody = new Ext.Panel( { + cls : 'gq-body', + defaults : { + border : false, + padding : 0 + } + } ); + + var detailsButtonWrap = new Ext.Panel( { + cls : 'extjs-sucks', + defaults : { + border : false, + padding : 0 + } + } ); + + var detailsButton = this.detailsButtonRenderer( qBody ); + + detailsButtonWrap.add( detailsButton ); + panel.add( detailsButtonWrap ); + + var qOutlErr = + Number( ee.geeq.corrMatIssues ) === 1 ? "The correlation matrix is empty!" : + Number( ee.geeq.corrMatIssues ) === 2 ? "There are NaN values in the correlation matrix." : + ""; + + var qOutlierDesc = + Number( ee.geeq.qScoreOutliers ) === -1 ? "There are detected, non-removed outliers. Removing detected outliers will improve the score." : + "No outliers were detected."; + + var qPlatfTechMultiDesc = + Number( ee.geeq.qScorePlatformsTech ) === -1 ? "The experiment is on a two-color platform." : "" + + "The experiment is NOT on a two-color platform."; + + var qReplErr = + Number( ee.geeq.replicatesIssues ) === 1 ? "There is no experimental design for this experiment" : + Number( ee.geeq.replicatesIssues ) === 2 ? "There are no factor values" : + Number( ee.geeq.replicatesIssues ) === 3 ? "All factor-value combinations have no replicates." : + Number( ee.geeq.replicatesIssues ) === 4 ? "The lowest replicate amount was 0 - this should be impossible, please report" : + ""; + + // These thresholds are defined + var qReplDesc = + Number( ee.geeq.qScoreReplicates ) === -1 ? "There is a factor-value combination that has very few or no replicates." : + Number( ee.geeq.qScoreReplicates ) === 0.0 ? "There is a factor-value combination that has moderately few replicates. " : + "All factor-value combinations have a good number of replicates"; + + var qBatchInfoDesc = + Number( ee.geeq.qScoreBatchInfo ) === -1 ? "The experiment has no batch info. Try filling it in." : "" + + "Batch information provided."; + + var qBatchEffErr = + Number( ee.geeq.qScoreBatchInfo ) === -1 ? "There is no batch information" : + Number( ee.geeq.QScoreBatchEffect ) === 0.0 && Number( ee.geeq.QScoreBatchConfound ) < 1 ? "Batch confound detected, batch effect detection skipped." : + ee.geeq.batchCorrected === true ? "Data was batch-corrected." : ""; + + var qBatchEffDesc = + ee.geeq.manualBatchEffectActive === true ? "Manually set value, detected score was: " + ee.geeq.QScoreBatchEffect : + Number( ee.geeq.qScoreBatchInfo ) === -1 ? "There were problems when checking for batch effect." : + Number( ee.geeq.QScoreBatchEffect ) === -1 ? "Experiment has a batch effect; Try to batch-correct." : + Number( ee.geeq.QScoreBatchEffect ) === 0.0 && Number( ee.geeq.QScoreBatchConfound ) < 1 ? "Batch effect score defaults to 0 when data is confounded with batches." : + Number( ee.geeq.QScoreBatchEffect ) === 0.0 ? "The experiment has some evidence for a batch effect. Try to batch-correct." : + "Batch effect considered negligible"; // FIXME: this seems to not be working right when there is a confound; ee.geeq.qStoreBatchConfound is not defined? + + var qBatchConfErr = + Number( ee.geeq.qScoreBatchInfo ) === -1 ? "There is no batch information" : + ""; + + var qBatchConfDesc = + ee.geeq.manualBatchConfoundActive === true ? "Manually set value, detected score was: " + ee.geeq.QScoreBatchConfound : + Number( ee.geeq.QScoreBatchConfound ) === -1 ? "Batch confound has been detected." : + Number( ee.geeq.QScoreBatchConfound ) === 0.0 ? "There were problems when checking for batch confound." : + "The experiment does not seem to be confounded with the batches."; + + var bconfExtra = this.bconfExtraRendeder( ee ); + var beffExtra = this.beffExtraRendeder( ee ); + + this.allowBconfRadios( ee.geeq.manualBatchConfoundActive ); + this.allowBeffRadios( ee.geeq.manualBatchEffectActive ); + + qBody.add( this.geeqRowRenderer( 'Mean sample corr.', ee.geeq.qScoreSampleMeanCorrelation, + "[Not included in final score] The actual mean correlation of samples.", "Not included in final score", 4, qOutlErr ) ); + + qBody.add( this.geeqRowRenderer( 'Sample corr. variance', ee.geeq.qScoreSampleCorrelationVariance, + "[Not included in final score] The actual variance of sample correlation.", "Not included in final score", 4, qOutlErr ) ); + + qBody.add( this.geeqRowRenderer( 'Median sample corr.', ee.geeq.qScoreSampleMedianCorrelation, + "The actual median correlation of samples.", "Included in the final score. Can be somewhat improved by removing outliers.", 4, qOutlErr ) ); + + qBody.add( this.geeqRowRenderer( 'Outliers', ee.geeq.qScoreOutliers, + "Depends on the presence of detected (non-removed) outliers. If there are any outliers, the score will be low.", qOutlierDesc, 1, qOutlErr ) ); + + qBody.add( this.geeqRowRenderer( 'Platform technology', ee.geeq.qScorePlatformsTech, + "Checks whether the experiments platform (any one, if there are multiple) is two-color.", qPlatfTechMultiDesc ) ); + + qBody.add( this.geeqRowRenderer( 'Replicates', ee.geeq.qScoreReplicates, + "Checks the replicate amount of all factor-value combinations, and takes the lowest one.", qReplDesc, 1, qReplErr ) ); + + qBody.add( this.geeqRowRenderer( 'Batch info', ee.geeq.qScoreBatchInfo, + "Checks whether the experiment has batch info available.", qBatchInfoDesc ) ); + + qBody.add( this.geeqRowRenderer( 'Batch confound', ee.geeq.qScorePublicBatchConfound, + "Checks whether the experimental data are confounded with batches. This value is the currently publicly displayed information.", + qBatchConfDesc, 1, qBatchConfErr, bconfExtra ) ); + qBody.add( bconfExtra ); + + qBody.add( this.geeqRowRenderer( 'Batch effect', ee.geeq.qScorePublicBatchEffect, + "Checks the experimental data for a batch effect. This value is the currently publicly displayed information.", + qBatchEffDesc, 1, qBatchEffErr, beffExtra ) ); + qBody.add( beffExtra ); + + panel.add( qBody ); + if ( !qReplErr && !qOutlErr && !qBatchConfErr ) { + qBody.hide(); + } + return panel; + }, + + detailsButtonRenderer : function( panel ) { + return new Ext.Button( { + text : ' Show score breakdown and details', + cls : 'gq-btn gq-btn-details', + handler : function() { + this.showPanel( panel, !panel.isVisible() ) + }, + scope : this + } ); + }, + + bconfExtraRendeder : function( ee ) { + + this.bconfFolded = !ee.geeq.manualBatchConfoundActive; + + var bconfExtra = new Ext.Panel( { + cls : 'gq-extra' + (this.bconfFolded ? ' folded' : ''), + defaults : { + border : false, + padding : 0 + } + } ); + + var self = this; + var foldButton = new Ext.Button( { + text : '', + cls : 'gq-btn', + handler : function() { + this.foldPanel( bconfExtra, self.bconfFolded = !self.bconfFolded ); + }, + scope : this + } ); + + bconfExtra.add( foldButton ); + + bconfExtra.add( new Ext.Button( { + text : 'Re-score batch info', + tooltip : 'Run geeq only for the batch info related scores (refreshes page).', + handler : function( b, e ) { + b.setText( "Re-score batch info" ); + b.setDisabled( true ); + ExpressionExperimentController.runGeeq( self.experimentDetails.id, "batch", { + callback : function() { + window.location.reload(); + } + } ); + }, + scope : this, + cls : 'btn-refresh gq-subscore-refresh-btn' + } ) ); + + bconfExtra.add( new Ext.form.Checkbox( { + xtype : 'checkbox', + id : 'gq-bconf-override', + boxLabel : 'Override:', + hideLabel : false, + checked : ee.geeq.manualBatchConfoundActive, + handler : function( el, value ) { + self.allowBconfRadios( value ); + ee.geeq.manualBatchConfoundActive = value; + document.getElementById( 'bconf-notification' ).removeAttribute( "hidden" ); + ExpressionExperimentController.setGeeqManualSettings( ee.id, ee.geeq, { + callback : self.bconfNotifySaved + } ); + } + } ) ); + + bconfExtra.add( new Ext.form.Radio( { + xtype : 'radio', + id : 'gq-bconf-override-value-true', + name : 'gq-bconf-override-value', + boxLabel : 'Confounded', + hideLabel : false, + checked : ee.geeq.manualHasBatchConfound, + handler : function( el, value ) { + ee.geeq.manualHasBatchConfound = value; + document.getElementById( 'bconf-notification' ).removeAttribute( "hidden" ); + ExpressionExperimentController.setGeeqManualSettings( ee.id, ee.geeq, { + callback : self.bconfNotifySaved + } ); + } + } ) ); + + bconfExtra.add( new Ext.form.Radio( { + xtype : 'radio', + id : 'gq-bconf-override-value-false', + name : 'gq-bconf-override-value', + boxLabel : 'Not confounded', + hideLabel : false, + checked : !ee.geeq.manualHasBatchConfound + } ) ); + + bconfExtra.add( {cls : 'gq-notif hidden', html : ''} ); + + return bconfExtra; + }, + + bconfNotifySaved : function() { + var nr = document.getElementById( 'bconf-notification' ); + if ( nr ) { + nr.setAttribute( "hidden", "true" ); + } + }, + + beffExtraRendeder : function( ee ) { + + this.beffFolded = !ee.geeq.manualBatchEffectActive; + + var beffExtra = new Ext.Panel( { + cls : 'gq-extra' + (this.beffFolded ? ' folded' : ''), + defaults : { + border : false, + padding : 0 + } + } ); + + var self = this; + var foldButton = new Ext.Button( { + text : '', + cls : 'gq-btn', + handler : function() { + this.foldPanel( beffExtra, self.beffFolded = !self.beffFolded ); + }, + scope : this + } ); + + beffExtra.add( foldButton ); + + beffExtra.add( new Ext.Button( { + text : 'Re-score batch info', + tooltip : 'Run geeq only for the batch info related scores (refreshes page).', + handler : function( b, e ) { + b.setText( "Re-score batch info" ); + b.setDisabled( true ); + ExpressionExperimentController.runGeeq( self.experimentDetails.id, "batch", { + callback : function() { + window.location.reload(); + } + } ); + }, + scope : this, + cls : 'btn-refresh gq-subscore-refresh-btn' + } ) ); + + beffExtra.add( new Ext.form.Checkbox( { + xtype : 'checkbox', + id : 'gq-beff-override', + boxLabel : 'Override:', + hideLabel : false, + checked : ee.geeq.manualBatchEffectActive, + handler : function( el, value ) { + self.allowBeffRadios( value ); + self.experimentDetails.geeq.manualBatchEffectActive = value; + document.getElementById( 'beff-notification' ).removeAttribute( "hidden" ); + ExpressionExperimentController.setGeeqManualSettings( self.experimentDetails.id, self.experimentDetails.geeq, { + callback : self.beffNotifySaved + } ); + } + } ) ); + + beffExtra.add( new Ext.form.Radio( { + xtype : 'radio', + id : 'gq-beff-override-value-strong', + name : 'gq-beff-override-value', + boxLabel : 'Strong', + hideLabel : false, + checked : ee.geeq.manualHasStrongBatchEffect, + handler : function( el, value ) { + if ( !value ) return; // since we have 3 radios, we wil only process the one that was selected + ee.geeq.manualHasStrongBatchEffect = value; + ee.geeq.manualHasNoBatchEffect = !value; + document.getElementById( 'beff-notification' ).removeAttribute( "hidden" ); + ExpressionExperimentController.setGeeqManualSettings( ee.id, ee.geeq, { + callback : self.beffNotifySaved + } ); + } + } ) ); + + beffExtra.add( new Ext.form.Radio( { + xtype : 'radio', + id : 'gq-beff-override-value-weak', + name : 'gq-beff-override-value', + boxLabel : 'Weak', + hideLabel : false, + checked : !ee.geeq.manualHasStrongBatchEffect && !ee.geeq.manualHasNoBatchEffect, + handler : function( el, value ) { + if ( !value ) return; // since we have 3 radios, we wil only process the one that was selected + ee.geeq.manualHasStrongBatchEffect = !value; + ee.geeq.manualHasNoBatchEffect = !value; + document.getElementById( 'beff-notification' ).removeAttribute( "hidden" ); + ExpressionExperimentController.setGeeqManualSettings( ee.id, ee.geeq, { + callback : self.beffNotifySaved + } ); + } + } ) ); + + beffExtra.add( new Ext.form.Radio( { + xtype : 'radio', + id : 'gq-beff-override-value-none', + name : 'gq-beff-override-value', + boxLabel : 'No batch effect', + hideLabel : false, + checked : ee.geeq.manualHasNoBatchEffect, + handler : function( el, value ) { + if ( !value ) return; // since we have 3 radios, we wil only process the one that was selected + ee.geeq.manualHasStrongBatchEffect = !value; + ee.geeq.manualHasNoBatchEffect = value; + document.getElementById( 'beff-notification' ).removeAttribute( "hidden" ); + ExpressionExperimentController.setGeeqManualSettings( ee.id, ee.geeq, { + callback : self.beffNotifySaved + } ); + } + } ) ); + + beffExtra.add( {cls : 'gq-notif hidden', html : ''} ); + + return beffExtra; + }, + + beffNotifySaved : function() { + var nr = document.getElementById( 'beff-notification' ); + if ( nr ) { + nr.setAttribute( "hidden", "true" ); + } + }, + + qualExtraRendeder : function( ee ) { + + this.qualFolded = !ee.geeq.manualQualityOverride; + + var qualExtra = new Ext.Panel( { + cls : 'gq-extra' + (this.qualFolded ? ' folded' : ''), + defaults : { + border : false, + padding : 0 + } + } ); + + var self = this; + var foldButton = new Ext.Button( { + text : '', + cls : 'gq-btn', + handler : function() { + this.foldPanel( qualExtra, self.qualFolded = !self.qualFolded ); + }, + scope : this + } ); + + qualExtra.add( foldButton ); + + qualExtra.add( new Ext.Panel( { + cls : 'gq-qual-warning', + defaults : { + border : false, + padding : 0 }, - scope: this - }); - - qualExtra.add(foldButton); - - qualExtra.add(new Ext.Panel({ - cls: 'gq-qual-warning', - defaults: { - border: false, - padding: 0 - }, - items: [ - { - html: - "
" + - "" + - "" + - "

Changing the score manually is a last resort measure, that should not be used on regular basis.

" + - "

Please consult this step with your supervisor.

" + - "
" + - "
" + - "
" - } - ] - }) - ); - - qualExtra.add(new Ext.form.Checkbox({ - xtype: 'checkbox', - id: 'gq-qual-override', - boxLabel: 'Override public score?', - hideLabel: false, - checked: ee.geeq.manualQualityOverride, - handler: function (el, value) { - self.allowQualInput(value); - ee.geeq.manualQualityOverride = value; - if (value) ee.geeq.manualQualityScore = Number(document.getElementById('gq-qual-override-value').value); - } - })); - - var qval = (ee.geeq.manualQualityScore ? ee.geeq.manualQualityScore : ee.geeq.detectedQualityScore); - qualExtra.add({ - cls: "gq-override-value-wrap", - html: + items : [ + { + html : + "
" + + "" + + "" + + "

Changing the score manually is a last resort measure, that should not be used on regular basis.

" + + "

Please consult this step with your supervisor.

" + + "
" + + "
" + + "
" + } + ] + } ) + ); + + qualExtra.add( new Ext.form.Checkbox( { + xtype : 'checkbox', + id : 'gq-qual-override', + boxLabel : 'Override public score?', + hideLabel : false, + checked : ee.geeq.manualQualityOverride, + handler : function( el, value ) { + self.allowQualInput( value ); + ee.geeq.manualQualityOverride = value; + if ( value ) ee.geeq.manualQualityScore = Number( document.getElementById( 'gq-qual-override-value' ).value ); + } + } ) ); + + var qval = (ee.geeq.manualQualityScore ? ee.geeq.manualQualityScore : ee.geeq.detectedQualityScore); + qualExtra.add( { + cls : "gq-override-value-wrap", + html : " " - }); - - qualExtra.add(new Ext.slider.SingleSlider({ - id: 'gq-qual-override-value-slider', - cls: 'gq-override-value-slider', - name: 'gq-qual-override-value-slider', - width: 200, - value: ((ee.geeq.manualQualityScore ? ee.geeq.manualQualityScore : ee.geeq.detectedQualityScore) + 1) * 10, - increment: 1, - minValue: 0, - maxValue: 20, - hideLabel: true, - clickToChange: true, - listeners: { - change: function (el, val) { - var nr = document.getElementById('gq-qual-override-value'); - nr.value = (Math.round(val) / 10 - 1).toFixed(1); - nr.style.background = scoreToColor(Number(nr.value)); - ee.geeq.manualQualityScore = nr.value; - } + } ); + + qualExtra.add( new Ext.slider.SingleSlider( { + id : 'gq-qual-override-value-slider', + cls : 'gq-override-value-slider', + name : 'gq-qual-override-value-slider', + width : 200, + value : ((ee.geeq.manualQualityScore ? ee.geeq.manualQualityScore : ee.geeq.detectedQualityScore) + 1) * 10, + increment : 1, + minValue : 0, + maxValue : 20, + hideLabel : true, + clickToChange : true, + listeners : { + change : function( el, val ) { + var nr = document.getElementById( 'gq-qual-override-value' ); + nr.value = (Math.round( val ) / 10 - 1).toFixed( 1 ); + nr.style.background = scoreToColor( Number( nr.value ) ); + ee.geeq.manualQualityScore = nr.value; } - })); - - var saveButton = new Ext.Button({ - text: ' Save changes', - cls: 'gq-btn-save', - handler: function (el, value) { - ExpressionExperimentController.setGeeqManualSettings(ee.id, ee.geeq, { - callback: function () { - window.location.reload(); - } - }); + } + } ) ); + + var saveButton = new Ext.Button( { + text : ' Save changes', + cls : 'gq-btn-save', + handler : function( el, value ) { + ExpressionExperimentController.setGeeqManualSettings( ee.id, ee.geeq, { + callback : function() { + window.location.reload(); + } + } ); + }, + scope : this + } ); + qualExtra.add( saveButton ); + + return qualExtra; + }, + + suitExtraRendeder : function( ee ) { + + this.suitFolded = !ee.geeq.manualSuitabilityOverride; + + var suitExtra = new Ext.Panel( { + cls : 'gq-extra' + (this.suitFolded ? ' folded' : ''), + defaults : { + border : false, + padding : 0 + } + } ); + + var self = this; + var foldButton = new Ext.Button( { + text : '', + cls : 'gq-btn', + handler : function() { + this.foldPanel( suitExtra, self.suitFolded = !self.suitFolded ); + }, + scope : this + } ); + + suitExtra.add( foldButton ); + + suitExtra.add( new Ext.Panel( { + cls : 'gq-suit-warning', + defaults : { + border : false, + padding : 0 }, - scope: this - }); - qualExtra.add(saveButton); - - return qualExtra; - }, - - suitExtraRendeder: function (ee) { - - this.suitFolded = !ee.geeq.manualSuitabilityOverride; - - var suitExtra = new Ext.Panel({ - cls: 'gq-extra' + (this.suitFolded ? ' folded' : ''), - defaults: { - border: false, - padding: 0 - } - }); - - var self = this; - var foldButton = new Ext.Button({ - text: '', - cls: 'gq-btn', - handler: function () { - this.foldPanel(suitExtra, self.suitFolded = !self.suitFolded); - }, - scope: this - }); - - suitExtra.add(foldButton); - - suitExtra.add(new Ext.Panel({ - cls: 'gq-suit-warning', - defaults: { - border: false, - padding: 0 - }, - items: [ - { - html: - "
" + - "" + - "" + - "

Changing the score manually is a last resort measure, that should not be used on regular basis.

" + - "

Please consult this step with your supervisor.

" + - "
" + - "
" + - "
" - } - ] - }) - ); - - suitExtra.add(new Ext.form.Checkbox({ - xtype: 'checkbox', - id: 'gq-suit-override', - boxLabel: 'Override public score?', - hideLabel: false, - checked: ee.geeq.manualSuitabilityOverride, - handler: function (el, value) { - self.allowSuitInput(value); - ee.geeq.manualSuitabilityOverride = value; - if (value) ee.geeq.manualSuitabilityScore = Number(document.getElementById('gq-suit-override-value').value); - } - })); - - var sval = (ee.geeq.manualSuitabilityScore ? ee.geeq.manualSuitabilityScore : ee.geeq.detectedSuitabilityScore); - suitExtra.add({ - cls: "gq-override-value-wrap", - html: + items : [ + { + html : + "
" + + "" + + "" + + "

Changing the score manually is a last resort measure, that should not be used on regular basis.

" + + "

Please consult this step with your supervisor.

" + + "
" + + "
" + + "
" + } + ] + } ) + ); + + suitExtra.add( new Ext.form.Checkbox( { + xtype : 'checkbox', + id : 'gq-suit-override', + boxLabel : 'Override public score?', + hideLabel : false, + checked : ee.geeq.manualSuitabilityOverride, + handler : function( el, value ) { + self.allowSuitInput( value ); + ee.geeq.manualSuitabilityOverride = value; + if ( value ) ee.geeq.manualSuitabilityScore = Number( document.getElementById( 'gq-suit-override-value' ).value ); + } + } ) ); + + var sval = (ee.geeq.manualSuitabilityScore ? ee.geeq.manualSuitabilityScore : ee.geeq.detectedSuitabilityScore); + suitExtra.add( { + cls : "gq-override-value-wrap", + html : " " - }); - - suitExtra.add(new Ext.slider.SingleSlider({ - id: 'gq-suit-override-value-slider', - cls: 'gq-override-value-slider', - name: 'gq-suit-override-value-slider', - width: 200, - value: ((ee.geeq.manualSuitabilityScore ? ee.geeq.manualSuitabilityScore : ee.geeq.detectedSuitabilityScore) + 1) * 10, - increment: 1, - minValue: 0, - maxValue: 20, - hideLabel: true, - clickToChange: true, - listeners: { - change: function (el, val) { - var nr = document.getElementById('gq-suit-override-value'); - nr.value = (Math.round(val) / 10 - 1).toFixed(1); - nr.style.background = scoreToColor(Number(nr.value)); - ee.geeq.manualSuitabilityScore = nr.value; - } + } ); + + suitExtra.add( new Ext.slider.SingleSlider( { + id : 'gq-suit-override-value-slider', + cls : 'gq-override-value-slider', + name : 'gq-suit-override-value-slider', + width : 200, + value : ((ee.geeq.manualSuitabilityScore ? ee.geeq.manualSuitabilityScore : ee.geeq.detectedSuitabilityScore) + 1) * 10, + increment : 1, + minValue : 0, + maxValue : 20, + hideLabel : true, + clickToChange : true, + listeners : { + change : function( el, val ) { + var nr = document.getElementById( 'gq-suit-override-value' ); + nr.value = (Math.round( val ) / 10 - 1).toFixed( 1 ); + nr.style.background = scoreToColor( Number( nr.value ) ); + ee.geeq.manualSuitabilityScore = nr.value; } - })); - - var saveButton = new Ext.Button({ - text: ' Save changes', - cls: 'gq-btn-save', - handler: function () { - ExpressionExperimentController.setGeeqManualSettings(ee.id, ee.geeq, { - callback: function () { - window.location.reload(); - } - }); - }, - scope: this - }); - suitExtra.add(saveButton); - - return suitExtra; - }, - - allowBeffRadios: function (allow) { - Ext.getCmp('gq-beff-override-value-strong').setDisabled(!allow); - Ext.getCmp('gq-beff-override-value-weak').setDisabled(!allow); - Ext.getCmp('gq-beff-override-value-none').setDisabled(!allow); - }, - - allowBconfRadios: function (allow) { - Ext.getCmp('gq-bconf-override-value-true').setDisabled(!allow); - Ext.getCmp('gq-bconf-override-value-false').setDisabled(!allow); - }, - - allowQualInput: function (allow) { - Ext.getCmp('gq-qual-override-value-slider').setDisabled(!allow); - var nr = document.getElementById('gq-qual-override-value'); - if (nr && !allow) nr.setAttribute("disabled", "true"); - if (nr && allow) nr.removeAttribute("disabled"); - }, - - allowSuitInput: function (allow) { - Ext.getCmp('gq-suit-override-value-slider').setDisabled(!allow); - var nr = document.getElementById('gq-suit-override-value'); - if (nr && !allow) nr.setAttribute("disabled", "true"); - if (nr && allow) nr.removeAttribute("disabled"); - }, - - geeqRowRenderer: function (label, value, labelDesc, valueDesc, valDecimals, warning, extra, normalizeColor) { - if (valDecimals === undefined) valDecimals = 1; - var valColor = normalizeColor ? scoreToColorNormalized(Number(value)) : scoreToColor(Number(value)); - var valNumber = roundScore(value, valDecimals); - var cls = valNumber < 0 ? "negative" : "positive"; - var html = - '
' + - ' ' + - '' + - ' ' + label + '' + - ' ' + valNumber + ''; - if (valueDesc) { - html += '' - } - if (warning) { - html += '' - } - html += '
'; - - return { - html: html - }; - }, - - showPanel: function (panel, show) { - if (show) { - panel.show(); - } else { - panel.hide(); - } - }, - - foldPanel: function (panel, fold) { - if (fold) { - panel.addClass("folded"); - } else { - panel.removeClass("folded"); - } - }, + } + } ) ); + + var saveButton = new Ext.Button( { + text : ' Save changes', + cls : 'gq-btn-save', + handler : function() { + ExpressionExperimentController.setGeeqManualSettings( ee.id, ee.geeq, { + callback : function() { + window.location.reload(); + } + } ); + }, + scope : this + } ); + suitExtra.add( saveButton ); + + return suitExtra; + }, + + allowBeffRadios : function( allow ) { + Ext.getCmp( 'gq-beff-override-value-strong' ).setDisabled( !allow ); + Ext.getCmp( 'gq-beff-override-value-weak' ).setDisabled( !allow ); + Ext.getCmp( 'gq-beff-override-value-none' ).setDisabled( !allow ); + }, + + allowBconfRadios : function( allow ) { + Ext.getCmp( 'gq-bconf-override-value-true' ).setDisabled( !allow ); + Ext.getCmp( 'gq-bconf-override-value-false' ).setDisabled( !allow ); + }, + + allowQualInput : function( allow ) { + Ext.getCmp( 'gq-qual-override-value-slider' ).setDisabled( !allow ); + var nr = document.getElementById( 'gq-qual-override-value' ); + if ( nr && !allow ) nr.setAttribute( "disabled", "true" ); + if ( nr && allow ) nr.removeAttribute( "disabled" ); + }, + + allowSuitInput : function( allow ) { + Ext.getCmp( 'gq-suit-override-value-slider' ).setDisabled( !allow ); + var nr = document.getElementById( 'gq-suit-override-value' ); + if ( nr && !allow ) nr.setAttribute( "disabled", "true" ); + if ( nr && allow ) nr.removeAttribute( "disabled" ); + }, + + geeqRowRenderer : function( label, value, labelDesc, valueDesc, valDecimals, warning, extra, normalizeColor ) { + if ( valDecimals === undefined ) valDecimals = 1; + var valColor = normalizeColor ? scoreToColorNormalized( Number( value ) ) : scoreToColor( Number( value ) ); + var valNumber = roundScore( value, valDecimals ); + var cls = valNumber < 0 ? "negative" : "positive"; + var html = + '
' + + ' ' + + '' + + ' ' + label + '' + + ' ' + valNumber + ''; + if ( valueDesc ) { + html += '' + } + if ( warning ) { + html += '' + } + html += '
'; + + return { + html : html + }; + }, + + showPanel : function( panel, show ) { + if ( show ) { + panel.show(); + } else { + panel.hide(); + } + }, + + foldPanel : function( panel, fold ) { + if ( fold ) { + panel.addClass( "folded" ); + } else { + panel.removeClass( "folded" ); + } + }, /* batchInfoMissingRenderer: function (ee, mgr) { @@ -969,369 +969,379 @@ Gemma.ExpressionExperimentTools = Ext.extend(Gemma.CurationTools, { return panelBC; },*/ - batchEffectRenderer: function (ee, mgr) { + batchEffectRenderer : function( ee, mgr ) { - var panelBC = new Ext.Panel({ - layout: 'hbox', - defaults: { - border: false, - padding: 2 - }, - items: [] - }); + var panelBC = new Ext.Panel( { + layout : 'hbox', + defaults : { + border : false, + padding : 2 + }, + items : [] + } ); - var be = (ee.batchEffect !== null && ee.batchEffect !== "") - ? { - html: ' ' - + ee.batchEffect - } - : { - html: ' ' + - (ee.hasBatchInformation === false ? 'No batch info, can not check for batch effect' : 'Batch effect not detected') - }; - - panelBC.add(be); + var hasBatchConfound = ee.batchConfound !== null && ee.batchConfound !== ""; - var recalculateBCBtn = new Ext.Button({ - text: '', - tooltip: "Recalculate batch effect (refreshes page)", - handler: function (b, e) { - ExpressionExperimentController.recalculateBatchEffect(ee.id, { - callback: function () { - window.location.reload(); - } - }); - b.setText(''); - b.setDisabled(true); - }, - scope: this, - cls: 'btn-refresh' - }); - - panelBC.add(recalculateBCBtn); - return panelBC; - }, - - batchConfoundRenderer: function (ee, mgr) { - - var panelBC = new Ext.Panel({ - layout: 'hbox', - defaults: { - border: false, - padding: 2 - }, - items: [] - }); - - var be = (ee.batchConfound !== null && ee.batchConfound !== "") + if ( hasBatchConfound ) { + var be = { + html : ' ' + + "Batch effect not determined due to confound." + }; + panelBC.add( be ); + } else { + var be = (ee.batchEffect !== null && ee.batchEffect !== "") ? { - html: ' ' - + ee.batchConfound + html : ' ' + + ee.batchEffect } : { - html: ' Batch confound not detected' + html : ' ' + + (ee.hasBatchInformation === false ? 'No batch info, can not check for batch effect' : 'Batch effect not detected') }; - panelBC.add(be); - var recalculateBCBtn = new Ext.Button({ - text: '', - tooltip: 'Recalculate batch confound (refreshes page)', - handler: function (b, e) { - ExpressionExperimentController.recalculateBatchConfound(ee.id, { - callback: function () { - window.location.reload(); - } - }); - b.setText(''); - b.setDisabled(true); - }, - scope: this, - cls: 'btn-refresh' - }); - - panelBC.add(recalculateBCBtn); - return panelBC; - }, - - linkAnalysisPanelRenderer: function (ee, manager) { - var panel = new Ext.Panel({ - layout: 'hbox', - defaults: { - border: false, - padding: 2 - }, - items: [{ - html: 'Link Analysis: ' - }] - }); - var id = ee.id; - var runBtn = new Ext.Button({ - text: '', - tooltip: 'Missing value computation (popup, refreshes page)', - handler: manager.doLinks.createDelegate(this, [id]), - scope: this, - cls: 'btn-refresh' - }); - if (ee.dateLinkAnalysis) { - var type = ee.linkAnalysisEventType; - var color = "#000"; - var suggestRun = true; - var qtip = 'ext:qtip="Analysis was OK"'; - if (type == 'FailedLinkAnalysisEvent') { - color = 'red'; - qtip = 'ext:qtip="Analysis failed"'; - } else if (type == 'TooSmallDatasetLinkAnalysisEvent') { - color = '#CCC'; - qtip = 'ext:qtip="Dataset is too small"'; - suggestRun = false; - } - panel.add({ - html: '' - + Gemma.Renderers.dateRenderer(ee.dateLinkAnalysis) - }); - // disable through gui + panelBC.add( be ); + } + + var recalculateBCBtn = new Ext.Button( { + text : '', + tooltip : "Recalculate batch effect (refreshes page)", + handler : function( b, e ) { + ExpressionExperimentController.recalculateBatchEffect( ee.id, { + callback : function() { + window.location.reload(); + } + } ); + b.setText( '' ); + b.setDisabled( true ); + }, + scope : this, + cls : 'btn-refresh' + } ); + + panelBC.add( recalculateBCBtn ); + return panelBC; + }, + + batchConfoundRenderer : function( ee, mgr ) { + + var panelBC = new Ext.Panel( { + layout : 'hbox', + defaults : { + border : false, + padding : 2 + }, + items : [] + } ); + + var be = (ee.batchConfound !== null && ee.batchConfound !== "") + ? { + html : ' ' + + ee.batchConfound + } + : { + html : ' Batch confound not detected' + }; + + panelBC.add( be ); + var recalculateBCBtn = new Ext.Button( { + text : '', + tooltip : 'Recalculate batch confound (refreshes page)', + handler : function( b, e ) { + ExpressionExperimentController.recalculateBatchConfound( ee.id, { + callback : function() { + window.location.reload(); + } + } ); + b.setText( '' ); + b.setDisabled( true ); + }, + scope : this, + cls : 'btn-refresh' + } ); + + panelBC.add( recalculateBCBtn ); + return panelBC; + }, + + linkAnalysisPanelRenderer : function( ee, manager ) { + var panel = new Ext.Panel( { + layout : 'hbox', + defaults : { + border : false, + padding : 2 + }, + items : [ { + html : 'Link Analysis: ' + } ] + } ); + var id = ee.id; + var runBtn = new Ext.Button( { + text : '', + tooltip : 'Missing value computation (popup, refreshes page)', + handler : manager.doLinks.createDelegate( this, [ id ] ), + scope : this, + cls : 'btn-refresh' + } ); + if ( ee.dateLinkAnalysis ) { + var type = ee.linkAnalysisEventType; + var color = "#000"; + var suggestRun = true; + var qtip = 'ext:qtip="Analysis was OK"'; + if ( type == 'FailedLinkAnalysisEvent' ) { + color = 'red'; + qtip = 'ext:qtip="Analysis failed"'; + } else if ( type == 'TooSmallDatasetLinkAnalysisEvent' ) { + color = '#CCC'; + qtip = 'ext:qtip="Dataset is too small"'; + suggestRun = false; + } + panel.add( { + html : '' + + Gemma.Renderers.dateRenderer( ee.dateLinkAnalysis ) + } ); + // disable through gui // if (suggestRun) { // panel.add(runBtn); // } - return panel; - } else { - panel.add({ - html: 'May be eligible; perform via CLI ' - }); - // disable through gui - // panel.add(runBtn); - return panel; - } - - }, - - missingValueAnalysisPanelRenderer: function (ee, manager) { - var panel = new Ext.Panel({ - layout: 'hbox', - defaults: { - border: false, - padding: 2 - }, - items: [{ - html: 'Missing values: ' - }] - }); - var id = ee.id; - var runBtn = new Ext.Button({ - text: '', - tooltip: 'Missing value computation (popup, refreshes page)', - handler: manager.doMissingValues.createDelegate(this, [id]), - scope: this, - cls: 'btn-refresh' - }); - /* - * Offer missing value analysis if it's possible (this might need tweaking). - */ - if (ee.technologyType != 'ONECOLOR' && ee.technologyType != 'SEQUENCING' && ee.technologyType != 'GENELIST' && ee.hasEitherIntensity) { - - if (ee.dateMissingValueAnalysis) { - var type = ee.missingValueAnalysisEventType; - var color = "#000"; - var suggestRun = true; - var qtip = 'ext:qtip="OK"'; - if (type == 'FailedMissingValueAnalysisEvent') { - color = 'red'; - qtip = 'ext:qtip="Failed"'; - } - - panel.add({ - html: '' - + Gemma.Renderers.dateRenderer(ee.dateMissingValueAnalysis) + ' ' - }); - if (suggestRun) { - panel.add(runBtn); - } - return panel; - } else { - panel.add({ - html: 'Needed ' - }); - // panel.add(runBtn); - return panel; - } - - } else { - - panel - .add({ - html: 'NA' - }); - return panel; - } - }, - - processedVectorCreatePanelRenderer: function (ee, manager) { - var panel = new Ext.Panel({ - layout: 'hbox', - defaults: { - border: false, - padding: 2 - }, - items: [{ - html: 'Preprocessing: ' - }] - }); - var id = ee.id; - var runBtn = new Ext.Button({ - text: '', - tooltip: 'Preprocess including PCA, correlation matrix and M-V (popup, refreshes page)', - handler: manager.doProcessedVectors.createDelegate(this, [id]), - scope: this, - cls: 'btn-refresh' - }); - if (ee.dateProcessedDataVectorComputation) { - var type = ee.processedDataVectorComputationEventType; + return panel; + } else { + panel.add( { + html : 'May be eligible; perform via CLI ' + } ); + // disable through gui + // panel.add(runBtn); + return panel; + } + + }, + + missingValueAnalysisPanelRenderer : function( ee, manager ) { + var panel = new Ext.Panel( { + layout : 'hbox', + defaults : { + border : false, + padding : 2 + }, + items : [ { + html : 'Missing values: ' + } ] + } ); + var id = ee.id; + var runBtn = new Ext.Button( { + text : '', + tooltip : 'Missing value computation (popup, refreshes page)', + handler : manager.doMissingValues.createDelegate( this, [ id ] ), + scope : this, + cls : 'btn-refresh' + } ); + /* + * Offer missing value analysis if it's possible (this might need tweaking). + */ + if ( ee.technologyType != 'ONECOLOR' && ee.technologyType != 'SEQUENCING' && ee.technologyType != 'GENELIST' && ee.hasEitherIntensity ) { + + if ( ee.dateMissingValueAnalysis ) { + var type = ee.missingValueAnalysisEventType; var color = "#000"; - var suggestRun = true; var qtip = 'ext:qtip="OK"'; - if (type == 'FailedProcessedVectorComputationEvent') { - color = 'red'; - qtip = 'ext:qtip="Failed"'; + if ( type == 'FailedMissingValueAnalysisEvent' ) { + color = 'red'; + qtip = 'ext:qtip="Failed"'; } - panel.add({ - html: '' - + Gemma.Renderers.dateRenderer(ee.dateProcessedDataVectorComputation) + ' ' - }); - if (suggestRun) { - panel.add(runBtn); + + panel.add( { + html : '' + + Gemma.Renderers.dateRenderer( ee.dateMissingValueAnalysis ) + ' ' + } ); + if ( suggestRun ) { + panel.add( runBtn ); } return panel; - } else { - panel.add({ - html: 'Needed ' - }); - panel.add(runBtn); + } else { + panel.add( { + html : 'Needed ' + } ); + // panel.add(runBtn); return panel; - } - }, + } + + } else { + + panel + .add( { + html : 'NA' + } ); + return panel; + } + }, + + processedVectorCreatePanelRenderer : function( ee, manager ) { + var panel = new Ext.Panel( { + layout : 'hbox', + defaults : { + border : false, + padding : 2 + }, + items : [ { + html : 'Preprocessing: ' + } ] + } ); + var id = ee.id; + var runBtn = new Ext.Button( { + text : '', + tooltip : 'Preprocess including PCA, correlation matrix and M-V (popup, refreshes page)', + handler : manager.doProcessedVectors.createDelegate( this, [ id ] ), + scope : this, + cls : 'btn-refresh' + } ); + if ( ee.dateProcessedDataVectorComputation ) { + var type = ee.processedDataVectorComputationEventType; + var color = "#000"; + + var suggestRun = true; + var qtip = 'ext:qtip="OK"'; + if ( type == 'FailedProcessedVectorComputationEvent' ) { + color = 'red'; + qtip = 'ext:qtip="Failed"'; + } + panel.add( { + html : '' + + Gemma.Renderers.dateRenderer( ee.dateProcessedDataVectorComputation ) + ' ' + } ); + if ( suggestRun ) { + panel.add( runBtn ); + } + return panel; + } else { + panel.add( { + html : 'Needed ' + } ); + panel.add( runBtn ); + return panel; + } + }, + + differentialAnalysisPanelRenderer : function( ee, manager ) { + var panel = new Ext.Panel( { + layout : 'hbox', + defaults : { + border : false, + padding : 2 + }, + items : [ { + html : 'Differential Expression Analysis: ' + } ] + } ); + + if ( !ee.suitableForDEA ) { + var color = "#000"; + panel.add( { + html : '' + + 'Not suitable' + ' ' + } ); + return panel; + } - differentialAnalysisPanelRenderer: function (ee, manager) { - var panel = new Ext.Panel({ - layout: 'hbox', - defaults: { - border: false, - padding: 2 - }, - items: [{ - html: 'Differential Expression Analysis: ' - }] - }); + var id = ee.id; + var runBtn = new Ext.Button( { + text : '', + tooltip : 'Differential expression analysis (popup, refreshes page)', + handler : manager.doDifferential.createDelegate( this, [ id ] ), + scope : this, + cls : 'btn-refresh' + } ); - if (!ee.suitableForDEA) { - var color = "#000"; - panel.add({ - html: '' - + 'Not suitable' + ' ' - }); - return panel; - } - - var id = ee.id; - var runBtn = new Ext.Button({ - text: '', - tooltip: 'Differential expression analysis (popup, refreshes page)', - handler: manager.doDifferential.createDelegate(this, [id]), - scope: this, - cls: 'btn-refresh' - }); + if ( ee.numPopulatedFactors > 0 ) { + if ( ee.dateDifferentialAnalysis ) { + var type = ee.differentialAnalysisEventType; - if (ee.numPopulatedFactors > 0) { - if (ee.dateDifferentialAnalysis) { - var type = ee.differentialAnalysisEventType; - - var color = "#000"; - var suggestRun = true; - var qtip = 'ext:qtip="OK"'; - if (type == 'FailedDifferentialExpressionAnalysisEvent') { - color = 'red'; - qtip = 'ext:qtip="Failed"'; - } - panel.add({ - html: '' - + Gemma.Renderers.dateRenderer(ee.dateDifferentialAnalysis) + ' ' - }); - if (suggestRun) { - panel.add(runBtn); - } - return panel; - } else { - - panel.add({ - html: 'Needed ' - }); - panel.add(runBtn); - return panel; + var color = "#000"; + var suggestRun = true; + var qtip = 'ext:qtip="OK"'; + if ( type == 'FailedDifferentialExpressionAnalysisEvent' ) { + color = 'red'; + qtip = 'ext:qtip="Failed"'; } - } else { + panel.add( { + html : '' + + Gemma.Renderers.dateRenderer( ee.dateDifferentialAnalysis ) + ' ' + } ); + if ( suggestRun ) { + panel.add( runBtn ); + } + return panel; + } else { - panel.add({ - html: 'NA' - }); + panel.add( { + html : 'Needed ' + } ); + panel.add( runBtn ); return panel; - } - }, - - renderProcessedExpressionVectorCount: function (e) { - return e.processedExpressionVectorCount ? e.processedExpressionVectorCount : ' [count not available] '; - }, - - /* - * This really replaces the PCA panel - allows for refresh of the diagnostics (PCA, sample correlation and MV) - */ - diagnosticsPanelRenderer: function (ee, manager) { - var panel = new Ext.Panel({ - layout: 'hbox', - defaults: { - border: false, - padding: 2 - }, - items: [{ - html: 'Diagnostics (PCA, MV, Sample Corr, GEEQ): ' - }] - }); + } + } else { + + panel.add( { + html : 'NA' + } ); + return panel; + } + }, + + renderProcessedExpressionVectorCount : function( e ) { + return e.processedExpressionVectorCount ? e.processedExpressionVectorCount : ' [count not available] '; + }, + + /* + * This really replaces the PCA panel - allows for refresh of the diagnostics (PCA, sample correlation and MV) + */ + diagnosticsPanelRenderer : function( ee, manager ) { + var panel = new Ext.Panel( { + layout : 'hbox', + defaults : { + border : false, + padding : 2 + }, + items : [ { + html : 'Diagnostics (PCA, MV, Sample Corr, GEEQ): ' + } ] + } ); var id = ee.id; - var runBtn = new Ext.Button({ - text: '', - tooltip: 'Update diagnostics (popup, refreshes page)', - handler: manager.doDiagnostics.createDelegate(this, [id, true]), - scope: this, - cls: 'btn-refresh' - }); + var runBtn = new Ext.Button( { + text : '', + tooltip : 'Update diagnostics (popup, refreshes page)', + handler : manager.doDiagnostics.createDelegate( this, [ id, true ] ), + scope : this, + cls : 'btn-refresh' + } ); // Get date and info. Note that we don't have a date for the diagnostics all together, so this can be improved. - if (ee.datePcaAnalysis) { - var type = ee.pcaAnalysisEventType; - - var color = "#000"; - var qtip = 'ext:qtip="OK"'; - var suggestRun = true; - - if (type == 'FailedPCAAnalysisEvent') { - color = 'red'; - qtip = 'ext:qtip="Failed"'; - } - panel.add({ - html: '' - + Gemma.Renderers.dateRenderer(ee.datePcaAnalysis) + ' ' - }); + if ( ee.datePcaAnalysis ) { + var type = ee.pcaAnalysisEventType; + + var color = "#000"; + var qtip = 'ext:qtip="OK"'; + var suggestRun = true; + + if ( type == 'FailedPCAAnalysisEvent' ) { + color = 'red'; + qtip = 'ext:qtip="Failed"'; + } + panel.add( { + html : '' + + Gemma.Renderers.dateRenderer( ee.datePcaAnalysis ) + ' ' + } ); } else - panel.add({ - html: 'Needed ' - }); + panel.add( { + html : 'Needed ' + } ); - panel.add(runBtn); + panel.add( runBtn ); return panel; - }, + }, - // removed in place of general diagnostics one. + // removed in place of general diagnostics one. // /* // * Get the last date PCA was run, add a button to run PCA // */ @@ -1382,77 +1392,77 @@ Gemma.ExpressionExperimentTools = Ext.extend(Gemma.CurationTools, { // // }, - /* - * Get the last date batch info was downloaded, add a button to download - */ - batchPanelRenderer: function (ee, manager) { - var panel = new Ext.Panel({ - layout: 'hbox', - defaults: { - border: false, - padding: 2 - }, - items: [{ - html: 'Batch Information: ' - }] - }); - var id = ee.id; - var hasBatchInformation = ee.hasBatchInformation; - var technologyType = ee.technologyType; - var runBtn = new Ext.Button({ - text: '', - tooltip: 'Batch information (popup, refreshes page)', - // See EEManager.js doBatchInfoFetch(id) - handler: manager.doBatchInfoFetch.createDelegate(this, [id]), - scope: this, - cls: 'btn-refresh' - }); - - // Batch info fetching not allowed for RNA seq and other non-microarray data - if (technologyType == 'NONE') { - panel.add({ - html: '' + 'NA' + ' ' - }); - return panel; - } - - // If present, display the date and info. If batch information exists without date, display 'Provided'. - // If no batch information, display 'Needed' with button for GEO and ArrayExpress data. Otherwise, NA. - if (ee.dateBatchFetch) { - var type = ee.batchFetchEventType; - - var color = "#000"; - var qtip = 'ext:qtip="OK"'; - - if (type == 'FailedBatchInformationFetchingEvent') { - color = 'red'; - qtip = 'ext:qtip="Failed"'; - } else if (type == 'FailedBatchInformationMissingEvent') { - color = '#CCC'; - qtip = 'ext:qtip="Raw data files not available from source"'; - } + /* + * Get the last date batch info was downloaded, add a button to download + */ + batchPanelRenderer : function( ee, manager ) { + var panel = new Ext.Panel( { + layout : 'hbox', + defaults : { + border : false, + padding : 2 + }, + items : [ { + html : 'Batch Information: ' + } ] + } ); + var id = ee.id; + var hasBatchInformation = ee.hasBatchInformation; + var technologyType = ee.technologyType; + var runBtn = new Ext.Button( { + text : '', + tooltip : 'Batch information (popup, refreshes page)', + // See EEManager.js doBatchInfoFetch(id) + handler : manager.doBatchInfoFetch.createDelegate( this, [ id ] ), + scope : this, + cls : 'btn-refresh' + } ); + + // Batch info fetching not allowed for RNA seq and other non-microarray data + if ( technologyType == 'NONE' ) { + panel.add( { + html : '' + 'NA' + ' ' + } ); + return panel; + } + + // If present, display the date and info. If batch information exists without date, display 'Provided'. + // If no batch information, display 'Needed' with button for GEO and ArrayExpress data. Otherwise, NA. + if ( ee.dateBatchFetch ) { + var type = ee.batchFetchEventType; + + var color = "#000"; + var qtip = 'ext:qtip="OK"'; + + if ( type == 'FailedBatchInformationFetchingEvent' ) { + color = 'red'; + qtip = 'ext:qtip="Failed"'; + } else if ( type == 'FailedBatchInformationMissingEvent' ) { + color = '#CCC'; + qtip = 'ext:qtip="Raw data files not available from source"'; + } + + panel.add( { + html : '' + + Gemma.Renderers.dateRenderer( ee.dateBatchFetch ) + ' ' + } ); + panel.add( runBtn ); + } else if ( hasBatchInformation ) { + panel.add( { + html : 'Provided' + } ); + } else if ( ee.externalDatabase == "GEO" || ee.externalDatabase == "ArrayExpress" ) { + panel.add( { + html : 'Needed ' + } ); + panel.add( runBtn ); + } else + panel.add( { + html : '' + 'NA' + + ' ' + } ); - panel.add({ - html: '' - + Gemma.Renderers.dateRenderer(ee.dateBatchFetch) + ' ' - }); - panel.add(runBtn); - } else if (hasBatchInformation) { - panel.add({ - html: 'Provided' - }); - } else if (ee.externalDatabase == "GEO" || ee.externalDatabase == "ArrayExpress") { - panel.add({ - html: 'Needed ' - }); - panel.add(runBtn); - } else - panel.add({ - html: '' + 'NA' - + ' ' - }); - - return panel; - } -}); + return panel; + } +} ); From 8bf6ff295f2c92a8539c33da9e0a4e1b5ac608fc Mon Sep 17 00:00:00 2001 From: Paul Pavlidis Date: Wed, 20 Mar 2024 12:14:47 -0700 Subject: [PATCH 029/105] some refactoring --- .../ExpressionExperimentServiceImpl.java | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java index 608acda2f1..ba922389af 100755 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java @@ -910,6 +910,7 @@ public String getBatchConfound( ExpressionExperiment ee ) { ee = this.thawBioAssays( ee ); if ( !this.checkHasBatchInfo( ee ) ) { + log.info( "Experiment has no batch information, cannot check for confound: " + ee ); return null; } @@ -1045,6 +1046,8 @@ public BatchEffectDetails getBatchEffectDetails( ExpressionExperiment ee ) { @Transactional(readOnly = true) public BatchEffectType getBatchEffect( ExpressionExperiment ee ) { BatchEffectDetails beDetails = this.getBatchEffectDetails( ee ); + BatchEffectDetails.BatchEffectStatistics batchEffectStatistics = beDetails.getBatchEffectStatistics(); + if ( !beDetails.hasBatchInformation() ) { return BatchEffectType.NO_BATCH_INFO; } else if ( beDetails.getHasSingletonBatches() ) { @@ -1059,13 +1062,15 @@ public BatchEffectType getBatchEffect( ExpressionExperiment ee ) { } else if ( beDetails.hasProblematicBatchInformation() ) { // sort of generic return BatchEffectType.PROBLEMATIC_BATCH_INFO_FAILURE; - } else if ( beDetails.getBatchEffectStatistics() == null ) { - return BatchEffectType.BATCH_EFFECT_UNDETERMINED_FAILURE; - } else if ( beDetails.getBatchEffectStatistics().getPvalue() < ExpressionExperimentServiceImpl.BATCH_EFFECT_THRESHOLD ) { - // this means there was a batch effect but we couldn't correct it - return BatchEffectType.BATCH_EFFECT_FAILURE; } else { - return BatchEffectType.NO_BATCH_EFFECT_SUCCESS; + if ( batchEffectStatistics == null ) { + return BatchEffectType.BATCH_EFFECT_UNDETERMINED_FAILURE; + } else if ( batchEffectStatistics.getPvalue() < ExpressionExperimentServiceImpl.BATCH_EFFECT_THRESHOLD ) { + // this means there was a batch effect but we couldn't correct it + return BatchEffectType.BATCH_EFFECT_FAILURE; + } else { + return BatchEffectType.NO_BATCH_EFFECT_SUCCESS; + } } } From bc06bacd75fac10ebeb51cbc6d47e03811a4317d Mon Sep 17 00:00:00 2001 From: Paul Pavlidis Date: Wed, 20 Mar 2024 12:22:21 -0700 Subject: [PATCH 030/105] Improve our ability to detect batch confounds in small data sets --- .../batcheffects/BatchConfoundUtils.java | 62 ++++++++++++++++--- 1 file changed, 53 insertions(+), 9 deletions(-) diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/BatchConfoundUtils.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/BatchConfoundUtils.java index 618a8dbe83..2a12d8489e 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/BatchConfoundUtils.java +++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/BatchConfoundUtils.java @@ -133,8 +133,8 @@ private static Collection factorBatchConfoundTest( BioAssaySet ee assert numBioMaterials > 0 : "No biomaterials for " + ef; double p = Double.NaN; - double chiSquare; - int df; + double chiSquare = Double.NaN; + int df = 0; int numBatches = batchFactor.getFactorValues().size(); if ( ExperimentalDesignUtils.isContinuous( ef ) ) { @@ -238,20 +238,64 @@ private static Collection factorBatchConfoundTest( BioAssaySet ee continue; // to the next factor } + /* + * The problem with chi-square test is it is underpowered and we don't detect perfect confounds + * when the sample size is small e.g. 3 + 3. + * So for small sample sizes we apply some special cases 1) when we have a 2x2 table and 3) when we have a small number of batches and observations. + * Otherwise we use the chisquare test. + */ ChiSquareTest cst = new ChiSquareTest(); + if ( finalCounts.length == 2 && finalCounts[0].length == 2 ) { // treat as odds ratio computation + double numerator = ( double ) finalCounts[0][0] * finalCounts[1][1]; + double denominator = ( double ) finalCounts[0][1] * finalCounts[1][0]; - try { - chiSquare = cst.chiSquare( finalCounts ); - } catch ( IllegalArgumentException e ) { - log.warn( "IllegalArgumentException exception computing ChiSq for : " + ef + "; Error was: " + e - .getMessage() ); - chiSquare = Double.NaN; + // if either value is zero, we have a perfect confound + if ( numerator == 0 || denominator == 0 ) { + chiSquare = Double.POSITIVE_INFINITY; // effectively we shift to fisher's exact test here. + } else { + chiSquare = cst.chiSquare( finalCounts ); + } + } else if ( numBioMaterials <= 10 && finalCounts.length <= 4 ) { // number of batches and number of samples is small + // look for pairs of rows and columns where there is only one non-zero value in each, which would be a confound. + for ( int r = 0; r < finalCounts.length; r++ ) { + int numNonzero = 0; + int nonZeroIndex = -1; + for ( int c = 0; c < finalCounts[0].length; c++ ) { + if ( finalCounts[r][c] != 0 ) { + numNonzero++; + nonZeroIndex = c; + } + } + // inspect the column + if ( numNonzero == 1 ) { + int numNonzeroColumnVals = 0; + for ( int r2 = 0; r2 < finalCounts.length; r2++ ) { + if ( finalCounts[r2][nonZeroIndex] != 0 ) { + numNonzeroColumnVals++; + } + } + if ( numNonzeroColumnVals == 1 ) { + chiSquare = Double.POSITIVE_INFINITY; + break; + } + } + } + } else { + try { + chiSquare = cst.chiSquare( finalCounts ); + } catch ( IllegalArgumentException e ) { + log.warn( "IllegalArgumentException exception computing ChiSq for : " + ef + "; Error was: " + e + .getMessage() ); + chiSquare = Double.NaN; + } } df = ( finalCounts.length - 1 ) * ( finalCounts[0].length - 1 ); ChiSquaredDistribution distribution = new ChiSquaredDistribution( df ); - if ( !Double.isNaN( chiSquare ) ) { + if ( chiSquare == Double.POSITIVE_INFINITY ) { + p = 0.0; + } else if ( !Double.isNaN( chiSquare ) ) { p = 1.0 - distribution.cumulativeProbability( chiSquare ); } From 613369acb3440393d87735e3cf9dae7e5c141d0c Mon Sep 17 00:00:00 2001 From: Paul Pavlidis Date: Wed, 20 Mar 2024 12:27:37 -0700 Subject: [PATCH 031/105] ensure we fall back on the regular test if the special cases aren't triggered --- .../batcheffects/BatchConfoundUtils.java | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/BatchConfoundUtils.java b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/BatchConfoundUtils.java index 2a12d8489e..2f06737a19 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/BatchConfoundUtils.java +++ b/gemma-core/src/main/java/ubic/gemma/core/analysis/preprocess/batcheffects/BatchConfoundUtils.java @@ -245,6 +245,15 @@ private static Collection factorBatchConfoundTest( BioAssaySet ee * Otherwise we use the chisquare test. */ ChiSquareTest cst = new ChiSquareTest(); + // initialize this value; we'll use it when my special test doesn't turn up anything. + try { + chiSquare = cst.chiSquare( finalCounts ); + } catch ( IllegalArgumentException e ) { + log.warn( "IllegalArgumentException exception computing ChiSq for : " + ef + "; Error was: " + e + .getMessage() ); + chiSquare = Double.NaN; + } + if ( finalCounts.length == 2 && finalCounts[0].length == 2 ) { // treat as odds ratio computation double numerator = ( double ) finalCounts[0][0] * finalCounts[1][1]; double denominator = ( double ) finalCounts[0][1] * finalCounts[1][0]; @@ -252,10 +261,10 @@ private static Collection factorBatchConfoundTest( BioAssaySet ee // if either value is zero, we have a perfect confound if ( numerator == 0 || denominator == 0 ) { chiSquare = Double.POSITIVE_INFINITY; // effectively we shift to fisher's exact test here. - } else { - chiSquare = cst.chiSquare( finalCounts ); } + } else if ( numBioMaterials <= 10 && finalCounts.length <= 4 ) { // number of batches and number of samples is small + // look for pairs of rows and columns where there is only one non-zero value in each, which would be a confound. for ( int r = 0; r < finalCounts.length; r++ ) { int numNonzero = 0; @@ -280,14 +289,6 @@ private static Collection factorBatchConfoundTest( BioAssaySet ee } } } - } else { - try { - chiSquare = cst.chiSquare( finalCounts ); - } catch ( IllegalArgumentException e ) { - log.warn( "IllegalArgumentException exception computing ChiSq for : " + ef + "; Error was: " + e - .getMessage() ); - chiSquare = Double.NaN; - } } df = ( finalCounts.length - 1 ) * ( finalCounts[0].length - 1 ); From 3542b455971cdeca1f72007f9d68f9fc409971c4 Mon Sep 17 00:00:00 2001 From: Paul Pavlidis Date: Fri, 22 Mar 2024 16:55:32 -0700 Subject: [PATCH 032/105] address https://github.com/PavlidisLab/Gemma/issues/1072 --- .../expression/experiment/ExperimentalDesignController.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gemma-web/src/main/java/ubic/gemma/web/controller/expression/experiment/ExperimentalDesignController.java b/gemma-web/src/main/java/ubic/gemma/web/controller/expression/experiment/ExperimentalDesignController.java index d7d59648ad..c2a2ed5942 100644 --- a/gemma-web/src/main/java/ubic/gemma/web/controller/expression/experiment/ExperimentalDesignController.java +++ b/gemma-web/src/main/java/ubic/gemma/web/controller/expression/experiment/ExperimentalDesignController.java @@ -637,7 +637,7 @@ public void updateBioMaterials( BioMaterialValueObject[] bmvos ) { Collection biomaterials = bioMaterialService.updateBioMaterials( Arrays.asList( bmvos ) ); - log.info( String.format( "Updating biomaterials took %.2f seconds", w.getTime() / 1000 ) ); + log.info( String.format( "Updating biomaterials took %.2f seconds", (double)w.getTime() / 1000.0 ) ); if ( biomaterials.isEmpty() ) return; From 2266dfc0f5cb878c1f1e65f9ede4d0c26595f3a7 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Mon, 18 Mar 2024 13:32:37 -0700 Subject: [PATCH 033/105] Include a stacktrace when warning about lengthy parameters --- .../src/main/java/ubic/gemma/persistence/util/QueryUtils.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/util/QueryUtils.java b/gemma-core/src/main/java/ubic/gemma/persistence/util/QueryUtils.java index 400877ee13..73a0d2e612 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/util/QueryUtils.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/util/QueryUtils.java @@ -37,7 +37,7 @@ public static > Collection optimizeParameterList( Col .collect( Collectors.toList() ); if ( sortedList.size() > MAX_PARAMETER_LIST_SIZE ) { log.warn( String.format( "Optimizing a large parameter list of size %d may have a negative impact on performance, use batchParameterList() instead.", - sortedList.size() ) ); + sortedList.size() ), new Throwable() ); return list; } return ListUtils.padToNextPowerOfTwo( sortedList, sortedList.get( sortedList.size() - 1 ) ); @@ -57,7 +57,7 @@ public static Collection optimizeIdentifiableParamet .collect( Collectors.toList() ); if ( sortedList.size() > MAX_PARAMETER_LIST_SIZE ) { log.warn( String.format( "Optimizing a large parameter list of size %d may have a negative impact on performance, use batchIdentifiableParameterList() instead.", - sortedList.size() ) ); + sortedList.size() ), new Throwable() ); return list; } return ListUtils.padToNextPowerOfTwo( sortedList, sortedList.get( sortedList.size() - 1 ) ); From f2caf81029a49668307bf43f3a67abfea022195d Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Mon, 18 Mar 2024 14:56:12 -0700 Subject: [PATCH 034/105] Retrieve results from EE2AD and EE2C by batch --- .../java/ubic/gemma/core/util/ListUtils.java | 14 +- .../ExpressionExperimentDaoImpl.java | 150 ++++++++++++------ .../gemma/persistence/util/QueryUtils.java | 60 +++---- .../ExpressionExperimentDaoTest.java | 16 +- 4 files changed, 157 insertions(+), 83 deletions(-) diff --git a/gemma-core/src/main/java/ubic/gemma/core/util/ListUtils.java b/gemma-core/src/main/java/ubic/gemma/core/util/ListUtils.java index d18699dcef..0065a8f8a0 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/util/ListUtils.java +++ b/gemma-core/src/main/java/ubic/gemma/core/util/ListUtils.java @@ -47,7 +47,7 @@ private static void fillMap( Map element2position, List list /** * Pad a collection to the next power of 2 with the given element. */ - public static Collection padToNextPowerOfTwo( Collection list, T elementForPadding ) { + public static List padToNextPowerOfTwo( List list, T elementForPadding ) { int k = Integer.highestOneBit( list.size() ); if ( list.size() == k ) { return list; // already a power of 2 @@ -58,7 +58,7 @@ public static Collection padToNextPowerOfTwo( Collection list, T eleme /** * Pad a collection with the given element. */ - public static Collection pad( Collection list, T elementForPadding, int size ) { + public static List pad( List list, T elementForPadding, int size ) { Assert.isTrue( size >= list.size(), "Target size must be greater or equal to the collection size." ); if ( list.size() == size ) { return list; @@ -70,4 +70,14 @@ public static Collection pad( Collection list, T elementForPadding, in } return paddedList; } + + public static List> batch( List list, int batchSize ) { + if ( batchSize == -1 ) { + return Collections.singletonList( list ); + } + int numberOfBatches = ( list.size() / batchSize ) + ( list.size() % batchSize > 0 ? 1 : 0 ); + int size = numberOfBatches * batchSize; + List paddedList = pad( list, list.get( list.size() - 1 ), size ); + return org.apache.commons.collections4.ListUtils.partition( paddedList, batchSize ); + } } diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java index b10d03a01b..5ef95fe8c5 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java @@ -67,8 +67,7 @@ import static java.util.stream.Collectors.summingLong; import static ubic.gemma.persistence.service.TableMaintenanceUtil.EE2AD_QUERY_SPACE; import static ubic.gemma.persistence.service.TableMaintenanceUtil.EE2C_QUERY_SPACE; -import static ubic.gemma.persistence.util.QueryUtils.optimizeIdentifiableParameterList; -import static ubic.gemma.persistence.util.QueryUtils.optimizeParameterList; +import static ubic.gemma.persistence.util.QueryUtils.*; /** * @author pavlidis @@ -79,8 +78,6 @@ public class ExpressionExperimentDaoImpl extends AbstractCuratableDao implements ExpressionExperimentDao { - private static final int BATCH_SIZE = 1000; - private static final String CHARACTERISTIC_ALIAS = CharacteristicDao.OBJECT_ALIAS, BIO_MATERIAL_CHARACTERISTIC_ALIAS = "bmc", @@ -666,12 +663,7 @@ public Map getCategoriesUsageFrequency( @Nullable Collecti .addScalar( "EE_COUNT", StandardBasicTypes.LONG ) .addSynchronizedQuerySpace( EE2C_QUERY_SPACE ) .addSynchronizedEntityClass( ExpressionExperiment.class ) - .addSynchronizedEntityClass( Characteristic.class ) - .setCacheable( true ) - .setMaxResults( maxResults ); - if ( eeIds != null ) { - q.setParameterList( "eeIds", optimizeParameterList( eeIds ) ); - } + .addSynchronizedEntityClass( Characteristic.class ); if ( excludedCategoryUris != null && !excludedCategoryUris.isEmpty() ) { q.setParameterList( "excludedCategoryUris", optimizeParameterList( excludedCategoryUris ) ); } @@ -682,12 +674,36 @@ public Map getCategoriesUsageFrequency( @Nullable Collecti q.setParameterList( "retainedTermUris", optimizeParameterList( retainedTermUris ) ); } EE2CAclQueryUtils.addAclParameters( q, ExpressionExperiment.class ); - //noinspection unchecked - List result = q.list(); + q.setCacheable( true ); + List result; + if ( eeIds != null ) { + if ( eeIds.size() > MAX_PARAMETER_LIST_SIZE ) { + result = listByBatch( q, "eeIds", eeIds, 2048 ); + if ( maxResults > 0 ) { + return aggregateC( result ).entrySet().stream() + .sorted( Map.Entry.comparingByValue( Comparator.reverseOrder() ) ) + .limit( maxResults ) + .collect( Collectors.toMap( Map.Entry::getKey, Map.Entry::getValue, ( a, b ) -> b, () -> new TreeMap<>( Characteristic.getByCategoryAndValueComparator() ) ) ); + } + } else { + //noinspection unchecked + result = q + .setParameterList( "eeIds", optimizeParameterList( eeIds ) ) + .setMaxResults( maxResults ) + .list(); + } + } else { + //noinspection unchecked + result = q.setMaxResults( maxResults ).list(); + } + return aggregateC( result ); + } + + private Map aggregateC( List result ) { TreeMap byC = new TreeMap<>( Characteristic.getByCategoryComparator() ); for ( Object[] row : result ) { - Characteristic c = Characteristic.Factory.newInstance( null, null, null, null, ( String ) row[0], ( String ) row[1], null ); - byC.put( c, ( Long ) row[2] ); + byC.compute( Characteristic.Factory.newInstance( null, null, null, null, ( String ) row[0], ( String ) row[1], null ), + ( k, v ) -> v != null ? v + ( Long ) row[2] : ( Long ) row[2] ); } return byC; } @@ -756,7 +772,8 @@ else if ( category.startsWith( "http://" ) ) { // no need to group by category if a specific one is requested + ( category == null ? "COALESCE(T.CATEGORY_URI, T.CATEGORY), " : "" ) + "COALESCE(T.VALUE_URI, T.`VALUE`) " - + ( minFrequency > 0 ? "having EE_COUNT >= :minFrequency " : "" ); + // if there are too many EE IDs, they will be retrieved by batch and filtered in-memory + + ( minFrequency > 1 && ( eeIds == null || eeIds.size() <= MAX_PARAMETER_LIST_SIZE ) ? "having EE_COUNT >= :minFrequency " : "" ); if ( retainedTermUris != null && !retainedTermUris.isEmpty() ) { query += " or VALUE_URI in (:retainedTermUris)"; } @@ -771,12 +788,7 @@ else if ( category.startsWith( "http://" ) ) { .addScalar( "EE_COUNT", StandardBasicTypes.LONG ) .addSynchronizedQuerySpace( EE2C_QUERY_SPACE ) .addSynchronizedEntityClass( ExpressionExperiment.class ) - .addSynchronizedEntityClass( Characteristic.class ) // ensures that the cache is invalidated if characteristics are added or removed - .setCacheable( true ) - .setMaxResults( maxResults ); - if ( eeIds != null ) { - q.setParameterList( "eeIds", optimizeParameterList( eeIds ) ); - } + .addSynchronizedEntityClass( Characteristic.class ); // ensures that the cache is invalidated if characteristics are added or removed if ( category != null && !category.equals( UNCATEGORIZED ) ) { q.setParameter( "category", category ); } @@ -792,15 +804,39 @@ else if ( category.startsWith( "http://" ) ) { if ( level != null ) { q.setParameter( "level", level ); } - if ( minFrequency > 0 ) { + if ( minFrequency > 1 && ( eeIds == null || eeIds.size() <= MAX_PARAMETER_LIST_SIZE ) ) { q.setParameter( "minFrequency", minFrequency ); } EE2CAclQueryUtils.addAclParameters( q, ExpressionExperiment.class ); - //noinspection unchecked - List result = q.list(); + q.setCacheable( true ); + List result; + if ( eeIds != null ) { + if ( eeIds.size() > MAX_PARAMETER_LIST_SIZE ) { + result = listByBatch( q, "eeIds", eeIds, 2048 ); + if ( minFrequency > 1 || maxResults > 0 ) { + return aggregate( result ).entrySet().stream() + .filter( e -> e.getValue() >= minFrequency ) + .sorted( Map.Entry.comparingByValue( Comparator.reverseOrder() ) ) + .limit( maxResults > 0 ? maxResults : Long.MAX_VALUE ) + .collect( Collectors.toMap( Map.Entry::getKey, Map.Entry::getValue, ( a, b ) -> b, () -> new TreeMap<>( Characteristic.getByCategoryAndValueComparator() ) ) ); + } + } else { + //noinspection unchecked + result = q.setParameterList( "eeIds", optimizeParameterList( eeIds ) ) + .setMaxResults( maxResults ) + .list(); + } + } else { + //noinspection unchecked + result = q.setMaxResults( maxResults ).list(); + } + return aggregate( result ); + } + + private Map aggregate( List result ) { TreeMap byC = new TreeMap<>( Characteristic.getByCategoryAndValueComparator() ); for ( Object[] row : result ) { - byC.put( convertRowToCharacteristic( row ), ( Long ) row[5] ); + byC.compute( convertRowToCharacteristic( row ), ( k, v ) -> v != null ? v + ( Long ) row[5] : ( Long ) row[5] ); } return byC; } @@ -812,8 +848,7 @@ private Characteristic convertRowToCharacteristic( Object[] row ) { } catch ( IllegalArgumentException e ) { evidenceCode = null; } - Characteristic c = Characteristic.Factory.newInstance( null, null, ( String ) row[0], ( String ) row[1], ( String ) row[2], ( String ) row[3], evidenceCode ); - return c; + return Characteristic.Factory.newInstance( null, null, ( String ) row[0], ( String ) row[1], ( String ) row[2], ( String ) row[3], evidenceCode ); } /** @@ -928,8 +963,7 @@ public Map getTechnologyTypeUsageFrequency( Collection result = getSessionFactory().getCurrentSession() + Query q = getSessionFactory().getCurrentSession() .createQuery( "select a.technologyType, oa.technologyType, count(distinct ee) from ExpressionExperiment ee " + "join ee.bioAssays ba " + "join ba.arrayDesignUsed a " @@ -939,10 +973,8 @@ public Map getTechnologyTypeUsageFrequency( Collection aggregateTechnologyTypeCounts( List result ) { @@ -1006,16 +1038,35 @@ private Map getPlatformsUsageFrequency( @Nullable Collection< .addSynchronizedEntityClass( ExpressionExperiment.class ) .addSynchronizedEntityClass( ArrayDesign.class ); query.setParameter( "original", original ); - if ( eeIds != null ) { - query.setParameterList( "ids", optimizeParameterList( eeIds ) ); - } EE2CAclQueryUtils.addAclParameters( query, ExpressionExperiment.class ); EE2CAclQueryUtils.addAclParameters( query, ExpressionExperiment.class ); - //noinspection unchecked - List result = query - .setCacheable( true ) - .setMaxResults( maxResults ) - .list(); + query.setCacheable( true ); + List result; + if ( eeIds != null ) { + if ( eeIds.size() > MAX_PARAMETER_LIST_SIZE ) { + result = listByBatch( query, "ids", eeIds, 2048 ); + if ( maxResults > 0 ) { + // results need to be aggregated and limited + return result.stream() + .collect( groupingBy( row -> ( ArrayDesign ) row[0], summingLong( row -> ( Long ) row[1] ) ) ) + .entrySet().stream() + .sorted( Map.Entry.comparingByValue( Comparator.reverseOrder() ) ) + .limit( maxResults ) + .collect( Collectors.toMap( Map.Entry::getKey, Map.Entry::getValue ) ); + } + } else { + //noinspection unchecked + result = query + .setParameterList( "ids", optimizeParameterList( eeIds ) ) + .setMaxResults( maxResults ) + .list(); + } + } else { + //noinspection unchecked + result = query + .setMaxResults( maxResults ) + .list(); + } return result.stream().collect( groupingBy( row -> ( ArrayDesign ) row[0], summingLong( row -> ( Long ) row[1] ) ) ); } @@ -1152,16 +1203,15 @@ public Map getPerTaxonCount( List ids ) { if ( ids.isEmpty() ) { return Collections.emptyMap(); } - //noinspection unchecked - List list = this.getSessionFactory().getCurrentSession().createQuery( - "select ee.taxon, count(distinct ee) as EE_COUNT from ExpressionExperiment ee " - + "where ee.id in :eeIds " - + "group by ee.taxon " - + "order by EE_COUNT desc" ) - .setParameterList( "eeIds", optimizeParameterList( ids ) ) - .list(); + Query query = this.getSessionFactory().getCurrentSession() + .createQuery( "select ee.taxon, count(distinct ee) as EE_COUNT from ExpressionExperiment ee " + + "where ee.id in :eeIds " + + "group by ee.taxon " + + "order by EE_COUNT desc" ) + .setCacheable( true ); + List list = listByBatch( query, "eeIds", ids, getBatchSize() ); return list.stream() - .collect( Collectors.toMap( row -> ( Taxon ) row[0], row -> ( Long ) row[1] ) ); + .collect( Collectors.groupingBy( row -> ( Taxon ) row[0], Collectors.summingLong( row -> ( Long ) row[1] ) ) ); } public Map getPopulatedFactorCounts( Collection ids ) { diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/util/QueryUtils.java b/gemma-core/src/main/java/ubic/gemma/persistence/util/QueryUtils.java index 73a0d2e612..51a360afa2 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/util/QueryUtils.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/util/QueryUtils.java @@ -1,6 +1,7 @@ package ubic.gemma.persistence.util; import lombok.extern.apachecommons.CommonsLog; +import org.hibernate.Query; import org.springframework.util.Assert; import ubic.gemma.core.util.ListUtils; import ubic.gemma.model.common.Identifiable; @@ -17,9 +18,9 @@ public class QueryUtils { /** * Largest parameter list size for which {@link #optimizeParameterList(Collection)} should be used. Past this size, - * no padding will be performed. + * no padding will be performed and a warning will be emitted. */ - private static final int MAX_PARAMETER_LIST_SIZE = 2048; + public static final int MAX_PARAMETER_LIST_SIZE = 2048; /** * Optimize a given parameter list by sorting, removing duplicates and padding to the next power of two. @@ -69,7 +70,7 @@ public static Collection optimizeIdentifiableParamet * It is recommended to use a power of two in case the same query is also prepared via * {@link #optimizeParameterList(Collection)}. This will make it so that the execution plan can be reused. */ - public static > Iterable> batchParameterList( Collection list, int batchSize ) { + public static > List> batchParameterList( Collection list, int batchSize ) { Assert.isTrue( batchSize == -1 || batchSize > 0, "Batch size must be strictly positive or equal to -1." ); if ( list.isEmpty() ) { return Collections.emptyList(); @@ -78,10 +79,10 @@ public static > Iterable> batchParameterLi .sorted( Comparator.nullsLast( Comparator.naturalOrder() ) ) .distinct() .collect( Collectors.toList() ); - return batch( sortedList, batchSize ); + return ListUtils.batch( sortedList, batchSize ); } - public static Iterable> batchIdentifiableParameterList( Collection list, int batchSize ) { + public static List> batchIdentifiableParameterList( Collection list, int batchSize ) { Assert.isTrue( batchSize == -1 || batchSize > 0, "Batch size must be strictly positive or equal to -1." ); if ( list.isEmpty() ) { return Collections.emptyList(); @@ -90,32 +91,37 @@ public static Iterable> batchIdentifiable .sorted( Comparator.comparing( Identifiable::getId, Comparator.nullsLast( Comparator.naturalOrder() ) ) ) .distinct() .collect( Collectors.toList() ); - return batch( sortedList, batchSize ); + return ListUtils.batch( sortedList, batchSize ); } - private static Iterable> batch( List list, int batchSize ) { - if ( batchSize == -1 ) { - return Collections.singletonList( list ); - } - int numberOfBatches = ( list.size() / batchSize ) + ( list.size() % batchSize > 0 ? 1 : 0 ); - int size = numberOfBatches * batchSize; - List paddedList = ( List ) ListUtils.pad( list, list.get( list.size() - 1 ), size ); - return () -> new Iterator>() { - private int i = 0; - - @Override - public boolean hasNext() { - return i < numberOfBatches; - } + /** + * @see #listByBatch(Query, String, Collection, int, int) + */ + public static , T> List listByBatch( Query query, String batchParam, Collection list, int batchSize ) { + return listByBatch( query, batchParam, list, batchSize, -1 ); + } - @Override - public List next() { - try { - return paddedList.subList( i * batchSize, ( i + 1 ) * batchSize ); - } finally { - i += 1; + /** + * List the results of a query by fixed batch size. + */ + public static , T> List listByBatch( Query query, String batchParam, Collection list, int batchSize, int maxResults ) { + List result = new ArrayList<>(); + for ( List batch : batchParameterList( list, batchSize ) ) { + int remainingToFetch; + if ( maxResults > 0 ) { + if ( result.size() < maxResults ) { + remainingToFetch = maxResults - result.size(); + } else { + break; } + } else { + remainingToFetch = -1; } - }; + query.setParameterList( batchParam, batch ); + query.setMaxResults( remainingToFetch ); + //noinspection unchecked + result.addAll( query.list() ); + } + return result; } } diff --git a/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoTest.java b/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoTest.java index f01bb16fdf..f0a68c9446 100644 --- a/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoTest.java +++ b/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoTest.java @@ -30,10 +30,9 @@ import ubic.gemma.persistence.util.*; import javax.annotation.Nullable; -import java.util.Arrays; -import java.util.Collection; -import java.util.Collections; -import java.util.Map; +import java.util.*; +import java.util.stream.Collectors; +import java.util.stream.LongStream; import static org.junit.Assert.*; @@ -185,6 +184,15 @@ public void testGetAnnotationUsageFrequency() { .containsEntry( c, 1L ); } + @Test + @WithMockUser(authorities = "GROUP_ADMIN") + public void testGetAnnotationUsageFrequencyWithLargeBatch() { + Characteristic c = createCharacteristic( "foo", "foo", "bar", "bar" ); + List ees = LongStream.range( 0, 10000 ).boxed().collect( Collectors.toList() ); + Assertions.assertThat( expressionExperimentDao.getAnnotationsUsageFrequency( ees, null, 10, 1, null, null, null, null ) ) + .containsEntry( c, 1L ); + } + @Test @WithMockUser(authorities = "GROUP_ADMIN") public void testGetAnnotationUsageFrequencyRetainMentionedTerm() { From 4a88e4ff1ef00367e95f0e6a0c9ff83597e0bdac Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Tue, 19 Mar 2024 09:34:50 -0700 Subject: [PATCH 035/105] More test cleanups --- .../diff/BaseAnalyzerConfigurationTest.java | 2 + .../diff/OneWayAnovaAnalyzerTest.java | 11 +-- .../expression/diff/TTestAnalyzerTest.java | 12 +-- ...oWayAnovaWithInteractionsAnalyzerTest.java | 6 +- ...yAnovaWithoutInteractionsAnalyzerTest.java | 6 +- .../preprocess/ExpressionDataSVDTest.java | 8 +- ...nExperimentBatchCorrectionServiceTest.java | 4 +- .../RNASeqBatchInfoPopulationTest.java | 8 +- .../preprocess/svd/SVDServiceImplTest.java | 1 + ...ExpressionExperimentReportServiceTest.java | 3 - .../ExpressionExperimentBibRefFinderTest.java | 73 ++++--------------- .../entrez/pubmed/PubMedXMLFetcherTest.java | 3 - .../entrez/pubmed/PubMedXMLParserTest.java | 2 +- .../expression/geo/GeoConverterTest.java | 3 - .../util/test/suite/FastIntegrationTests.java | 8 +- .../core/util/test/suite/FastUnitTests.java | 2 +- .../util/test/suite/IntegrationTests.java | 3 + .../gemma/core/util/test/suite/UnitTests.java | 3 + 18 files changed, 51 insertions(+), 107 deletions(-) diff --git a/gemma-core/src/test/java/ubic/gemma/core/analysis/expression/diff/BaseAnalyzerConfigurationTest.java b/gemma-core/src/test/java/ubic/gemma/core/analysis/expression/diff/BaseAnalyzerConfigurationTest.java index 484c342053..05cfe53007 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/analysis/expression/diff/BaseAnalyzerConfigurationTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/analysis/expression/diff/BaseAnalyzerConfigurationTest.java @@ -21,6 +21,7 @@ import org.apache.commons.lang3.RandomStringUtils; import org.junit.After; import org.junit.Before; +import org.junit.experimental.categories.Category; import org.springframework.beans.factory.annotation.Autowired; import ubic.basecode.dataStructure.matrix.DoubleMatrix; import ubic.basecode.io.ByteArrayConverter; @@ -31,6 +32,7 @@ import ubic.gemma.core.analysis.service.ExpressionDataMatrixService; import ubic.gemma.core.datastructure.matrix.ExpressionDataDoubleMatrix; import ubic.gemma.core.util.test.BaseSpringContextTest; +import ubic.gemma.core.util.test.category.SlowTest; import ubic.gemma.model.common.quantitationtype.*; import ubic.gemma.model.expression.arrayDesign.ArrayDesign; import ubic.gemma.model.expression.arrayDesign.TechnologyType; diff --git a/gemma-core/src/test/java/ubic/gemma/core/analysis/expression/diff/OneWayAnovaAnalyzerTest.java b/gemma-core/src/test/java/ubic/gemma/core/analysis/expression/diff/OneWayAnovaAnalyzerTest.java index 4fc5680b1c..8590ee6655 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/analysis/expression/diff/OneWayAnovaAnalyzerTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/analysis/expression/diff/OneWayAnovaAnalyzerTest.java @@ -35,6 +35,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; +import static org.junit.Assume.assumeTrue; /** * Tests the one way anova analyzer. See test/data/stat-tests/README.txt for R code. @@ -52,10 +53,7 @@ public class OneWayAnovaAnalyzerTest extends BaseAnalyzerConfigurationTest { @Test public void testOneWayAnova() throws Exception { - if ( !connected ) { - log.warn( "Could not establish R connection. Skipping test ..." ); - return; - } + assumeTrue( "Could not establish R connection. Skipping test ...", connected ); super.configureTestDataForOneWayAnova(); @@ -86,10 +84,7 @@ public void testOneWayAnova() throws Exception { @Test public void testOnewayAnovaB() throws Exception { - if ( !connected ) { - log.warn( "Could not establish R connection. Skipping test ..." ); - return; - } + assumeTrue( "Could not establish R connection. Skipping test ...", connected ); super.configureTestDataForOneWayAnova(); diff --git a/gemma-core/src/test/java/ubic/gemma/core/analysis/expression/diff/TTestAnalyzerTest.java b/gemma-core/src/test/java/ubic/gemma/core/analysis/expression/diff/TTestAnalyzerTest.java index e9a52ddb06..c02086ec29 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/analysis/expression/diff/TTestAnalyzerTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/analysis/expression/diff/TTestAnalyzerTest.java @@ -20,7 +20,6 @@ import org.junit.Test; import org.springframework.beans.factory.annotation.Autowired; -import ubic.gemma.model.analysis.expression.diff.ExpressionAnalysisResultSet; import ubic.gemma.model.analysis.expression.diff.*; import ubic.gemma.model.common.quantitationtype.ScaleType; import ubic.gemma.model.expression.biomaterial.BioMaterial; @@ -35,6 +34,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; +import static org.junit.Assume.assumeTrue; /** * See test/data/stat-tests/README.txt for R code. @@ -49,10 +49,7 @@ public class TTestAnalyzerTest extends BaseAnalyzerConfigurationTest { @Test public void testOneSampleTtest() throws Exception { - if ( !connected ) { - log.warn( "Could not establish R connection. Skipping test ..." ); - return; - } + assumeTrue( "Could not establish R connection. Skipping test ...", connected ); this.configureVectors( super.biomaterials, "/data/stat-tests/onesample-ttest-data.txt" ); @@ -127,10 +124,7 @@ public void testOneSampleTtest() throws Exception { @Test public void testTTestWithExpressionExperiment() { - if ( !connected ) { - log.warn( "Could not establish R connection. Skipping test ..." ); - return; - } + assumeTrue( "Could not establish R connection. Skipping test ...", connected ); this.configureMocks(); diff --git a/gemma-core/src/test/java/ubic/gemma/core/analysis/expression/diff/TwoWayAnovaWithInteractionsAnalyzerTest.java b/gemma-core/src/test/java/ubic/gemma/core/analysis/expression/diff/TwoWayAnovaWithInteractionsAnalyzerTest.java index cf0cc383d6..ab017fe9ce 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/analysis/expression/diff/TwoWayAnovaWithInteractionsAnalyzerTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/analysis/expression/diff/TwoWayAnovaWithInteractionsAnalyzerTest.java @@ -31,6 +31,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; +import static org.junit.Assume.assumeTrue; /** * Tests the two way anova analyzer with interactions. See test/data/stat-tests/README.txt for R code. @@ -47,10 +48,7 @@ public void testTwoWayAnova() { log.debug( "Testing TwoWayAnova method in " + DiffExAnalyzer.class.getName() ); - if ( !connected ) { - log.warn( "Could not establish R connection. Skipping test ..." ); - return; - } + assumeTrue( "Could not establish R connection. Skipping test ...", connected ); this.configureMocks(); diff --git a/gemma-core/src/test/java/ubic/gemma/core/analysis/expression/diff/TwoWayAnovaWithoutInteractionsAnalyzerTest.java b/gemma-core/src/test/java/ubic/gemma/core/analysis/expression/diff/TwoWayAnovaWithoutInteractionsAnalyzerTest.java index e71ecb2982..1a260706ee 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/analysis/expression/diff/TwoWayAnovaWithoutInteractionsAnalyzerTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/analysis/expression/diff/TwoWayAnovaWithoutInteractionsAnalyzerTest.java @@ -32,6 +32,7 @@ import java.util.List; import static org.junit.Assert.*; +import static org.junit.Assume.assumeTrue; /** * Tests the two way anova analyzer. See test/data/stat-tests/README.txt for R code. @@ -51,10 +52,7 @@ public void testTwoWayAnova() { log.debug( "Testing getPValues method in " + DiffExAnalyzer.class.getName() ); - if ( !connected ) { - log.warn( "Could not establish R connection. Skipping test ..." ); - return; - } + assumeTrue( "Could not establish R connection. Skipping test ...", connected ); this.configureMocks(); diff --git a/gemma-core/src/test/java/ubic/gemma/core/analysis/preprocess/ExpressionDataSVDTest.java b/gemma-core/src/test/java/ubic/gemma/core/analysis/preprocess/ExpressionDataSVDTest.java index 3b244fced9..99ecd07c47 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/analysis/preprocess/ExpressionDataSVDTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/analysis/preprocess/ExpressionDataSVDTest.java @@ -1,8 +1,8 @@ /* * The Gemma project - * + * * Copyright (c) 2008 University of British Columbia - * + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at @@ -86,7 +86,7 @@ public void testMatrixReconstruct() { * header=T, row.names=1) * testdata.s <- testdata * for(i in 1:5) { - * testdata.s <- t(scale(t(scale(testdata.s)))); + * testdata.s <- t(scale(t(scale(testdata.s)))); * } * s<-svd(testdata.s) * s$d @@ -132,7 +132,7 @@ public void testEigenvalues() throws SVDException { /* * See testEigenvalues - * + * *
      * cat( signif( p$sdev ˆ 2 / sum( p$sdev ˆ 2 ), 3 ), sep = ",\n" )
      * 
diff --git a/gemma-core/src/test/java/ubic/gemma/core/analysis/preprocess/batcheffects/ExpressionExperimentBatchCorrectionServiceTest.java b/gemma-core/src/test/java/ubic/gemma/core/analysis/preprocess/batcheffects/ExpressionExperimentBatchCorrectionServiceTest.java index f663d63fd1..71b0caffb7 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/analysis/preprocess/batcheffects/ExpressionExperimentBatchCorrectionServiceTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/analysis/preprocess/batcheffects/ExpressionExperimentBatchCorrectionServiceTest.java @@ -89,8 +89,8 @@ public void testComBatOnEE() throws Exception { assertNotNull( newee ); newee = expressionExperimentService.thawLite( newee ); processedExpressionDataVectorService.computeProcessedExpressionData( newee ); - try (InputStream deis = this.getClass() - .getResourceAsStream( "/data/loader/expression/geo/gse18162Short/design.txt" )) { + try ( InputStream deis = this.getClass() + .getResourceAsStream( "/data/loader/expression/geo/gse18162Short/design.txt" ) ) { experimentalDesignImporter.importDesign( newee, deis ); } ExpressionDataDoubleMatrix comBat = correctionService.comBat( newee ); diff --git a/gemma-core/src/test/java/ubic/gemma/core/analysis/preprocess/batcheffects/RNASeqBatchInfoPopulationTest.java b/gemma-core/src/test/java/ubic/gemma/core/analysis/preprocess/batcheffects/RNASeqBatchInfoPopulationTest.java index 961a72c616..f0382cdf36 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/analysis/preprocess/batcheffects/RNASeqBatchInfoPopulationTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/analysis/preprocess/batcheffects/RNASeqBatchInfoPopulationTest.java @@ -19,15 +19,11 @@ package ubic.gemma.core.analysis.preprocess.batcheffects; -import java.util.Collection; -import java.util.Map; - import org.junit.After; import org.junit.Before; import org.junit.Test; import org.junit.experimental.categories.Category; import org.springframework.beans.factory.annotation.Autowired; - import org.springframework.core.io.ClassPathResource; import ubic.basecode.util.FileTools; import ubic.gemma.core.loader.expression.geo.AbstractGeoServiceTest; @@ -44,6 +40,9 @@ import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentService; import ubic.gemma.persistence.util.Settings; +import java.util.Collection; +import java.util.Map; + import static org.junit.Assert.*; /** @@ -169,6 +168,7 @@ public void testGSE14285OneBatch() throws Exception { * batch info. */ @Test + @Category(SlowTest.class) public void testGSE156689NoBatchinfo() throws Exception { geoService.setGeoDomainObjectGenerator( new GeoDomainObjectGeneratorLocal( FileTools.resourceToPath( "/data/analysis/preprocess/batcheffects/" ) ) ); diff --git a/gemma-core/src/test/java/ubic/gemma/core/analysis/preprocess/svd/SVDServiceImplTest.java b/gemma-core/src/test/java/ubic/gemma/core/analysis/preprocess/svd/SVDServiceImplTest.java index 0f9f03de0a..861fafa329 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/analysis/preprocess/svd/SVDServiceImplTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/analysis/preprocess/svd/SVDServiceImplTest.java @@ -37,6 +37,7 @@ /** * @author paul */ +@Category(SlowTest.class) public class SVDServiceImplTest extends AbstractGeoServiceTest { @Autowired diff --git a/gemma-core/src/test/java/ubic/gemma/core/analysis/report/ExpressionExperimentReportServiceTest.java b/gemma-core/src/test/java/ubic/gemma/core/analysis/report/ExpressionExperimentReportServiceTest.java index e413e4edba..e2f73ec1f4 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/analysis/report/ExpressionExperimentReportServiceTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/analysis/report/ExpressionExperimentReportServiceTest.java @@ -2,12 +2,10 @@ import org.junit.After; import org.junit.Test; -import org.junit.experimental.categories.Category; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.security.core.context.SecurityContext; import org.springframework.security.core.context.SecurityContextHolder; import ubic.gemma.core.util.test.BaseSpringContextTest; -import ubic.gemma.core.util.test.category.SlowTest; import ubic.gemma.model.expression.experiment.BatchEffectType; import ubic.gemma.model.expression.experiment.ExpressionExperiment; import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentService; @@ -32,7 +30,6 @@ public void tearDown() { } @Test - @Category(SlowTest.class) public void testRecalculateBatchInfo() { ee = getTestPersistentBasicExpressionExperiment(); assertNull( ee.getBatchEffect() ); diff --git a/gemma-core/src/test/java/ubic/gemma/core/loader/entrez/pubmed/ExpressionExperimentBibRefFinderTest.java b/gemma-core/src/test/java/ubic/gemma/core/loader/entrez/pubmed/ExpressionExperimentBibRefFinderTest.java index d3800e70b0..5b1607cb6f 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/loader/entrez/pubmed/ExpressionExperimentBibRefFinderTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/loader/entrez/pubmed/ExpressionExperimentBibRefFinderTest.java @@ -1,8 +1,8 @@ /* * The Gemma project - * + * * Copyright (c) 2007 University of British Columbia - * + * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at @@ -18,34 +18,26 @@ */ package ubic.gemma.core.loader.entrez.pubmed; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; import org.junit.Test; import org.junit.experimental.categories.Category; -import ubic.gemma.core.util.test.category.SlowTest; +import ubic.gemma.core.util.test.category.GeoTest; import ubic.gemma.model.common.description.BibliographicReference; import ubic.gemma.model.common.description.DatabaseEntry; import ubic.gemma.model.common.description.ExternalDatabase; import ubic.gemma.model.expression.experiment.ExpressionExperiment; -import javax.net.ssl.SSLException; -import java.io.IOException; -import java.net.UnknownHostException; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assume.assumeNoException; +import static org.junit.Assert.*; +import static ubic.gemma.core.util.test.Assumptions.assumeThatResourceIsAvailable; /** * @author pavlidis */ +@Category(GeoTest.class) public class ExpressionExperimentBibRefFinderTest { - private static final Log log = LogFactory.getLog( ExpressionExperimentBibRefFinderTest.class.getName() ); - @Test - @Category(SlowTest.class) public void testLocatePrimaryReference() throws Exception { + assumeThatResourceIsAvailable( "https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi" ); ExpressionExperimentBibRefFinder finder = new ExpressionExperimentBibRefFinder(); ExpressionExperiment ee = ExpressionExperiment.Factory.newInstance(); DatabaseEntry de = DatabaseEntry.Factory.newInstance(); @@ -54,25 +46,15 @@ public void testLocatePrimaryReference() throws Exception { de.setAccession( "GSE3023" ); de.setExternalDatabase( ed ); ee.setAccession( de ); - try { - BibliographicReference bibref = null; - for ( int i = 0; i < 3; i++ ) { - bibref = finder.locatePrimaryReference( ee ); - if ( bibref != null ) - break; - Thread.sleep( 1000 ); - } - assertNotNull( bibref ); - assertEquals( "Differential gene expression in anatomical compartments of the human eye.", - bibref.getTitle() ); - } catch ( Exception e ) { - checkCause( e ); - } - + BibliographicReference bibref = finder.locatePrimaryReference( ee ); + assertNotNull( bibref ); + assertEquals( "Differential gene expression in anatomical compartments of the human eye.", + bibref.getTitle() ); } @Test public void testLocatePrimaryReferenceInvalidGSE() throws Exception { + assumeThatResourceIsAvailable( "https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi" ); ExpressionExperimentBibRefFinder finder = new ExpressionExperimentBibRefFinder(); ExpressionExperiment ee = ExpressionExperiment.Factory.newInstance(); DatabaseEntry de = DatabaseEntry.Factory.newInstance(); @@ -81,34 +63,7 @@ public void testLocatePrimaryReferenceInvalidGSE() throws Exception { de.setAccession( "GSE30231111111111111" ); de.setExternalDatabase( ed ); ee.setAccession( de ); - try { - BibliographicReference bibref = finder.locatePrimaryReference( ee ); - assert ( bibref == null ); - } catch ( Exception e ) { - checkCause( e ); - } - } - - private void checkCause( Exception e ) throws Exception { - IOException k; - if ( e instanceof IOException ) { - k = ( IOException ) e; - } else if ( e.getCause() instanceof IOException ) { - k = ( IOException ) e.getCause(); - } else { - throw e; - } - if ( k instanceof UnknownHostException || k instanceof SSLException ) { - assumeNoException( e ); - } else if ( k.getMessage().contains( "503" ) ) { - assumeNoException( "Test skipped due to a 503 error from NCBI", e ); - } else if ( k.getMessage().contains( "502" ) ) { - log.warn( "Test skipped due to a 502 error from NCBI" ); - } else if ( k.getMessage().contains( "500" ) ) { - log.warn( "Test skipped due to a 500 error from NCBI" ); - } else { - throw e; - } + BibliographicReference bibref = finder.locatePrimaryReference( ee ); + assertTrue( bibref == null ); } - } diff --git a/gemma-core/src/test/java/ubic/gemma/core/loader/entrez/pubmed/PubMedXMLFetcherTest.java b/gemma-core/src/test/java/ubic/gemma/core/loader/entrez/pubmed/PubMedXMLFetcherTest.java index f4ff4477d1..349cb47e56 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/loader/entrez/pubmed/PubMedXMLFetcherTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/loader/entrez/pubmed/PubMedXMLFetcherTest.java @@ -61,7 +61,6 @@ public final void testRetrieveByHTTP() { } @Test - @Category(SlowTest.class) public final void testRetrieveByHTTP2() { try { BibliographicReference br = pmf.retrieveByHTTP( 24850731 ); @@ -83,7 +82,6 @@ public final void testRetrieveByHTTP2() { * 23865096 is a NCBI bookshelf article, not a paper */ @Test - @Category({ SlowTest.class, PubMedTest.class }) public final void testRetrieveByHTTPBookshelf() { try { BibliographicReference br = pmf.retrieveByHTTP( 23865096 ); @@ -103,7 +101,6 @@ public final void testRetrieveByHTTPBookshelf() { } @Test - @Category(SlowTest.class) public final void testRetrieveByHTTPNotFound() { try { BibliographicReference br = pmf.retrieveByHTTP( 1517311444 ); diff --git a/gemma-core/src/test/java/ubic/gemma/core/loader/entrez/pubmed/PubMedXMLParserTest.java b/gemma-core/src/test/java/ubic/gemma/core/loader/entrez/pubmed/PubMedXMLParserTest.java index 37ee30a997..ebc71cbf39 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/loader/entrez/pubmed/PubMedXMLParserTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/loader/entrez/pubmed/PubMedXMLParserTest.java @@ -45,7 +45,6 @@ /** * @author pavlidis */ -@Category(SlowTest.class) public class PubMedXMLParserTest { private static final Log log = LogFactory.getLog( PubMedXMLParserTest.class.getName() ); @@ -175,6 +174,7 @@ public void testParseMesh() throws Exception { * This uses a medline-format file, instead of the pubmed xml files we get from the eutils. */ @Test + @Category(SlowTest.class) public void testParseMulti() throws Exception { try { testStream = new GZIPInputStream( new ClassPathResource( "/data/loader/medline.multi.xml.gz" ).getInputStream() ); diff --git a/gemma-core/src/test/java/ubic/gemma/core/loader/expression/geo/GeoConverterTest.java b/gemma-core/src/test/java/ubic/gemma/core/loader/expression/geo/GeoConverterTest.java index 0b3ae5a0ba..b9959c3105 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/loader/expression/geo/GeoConverterTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/loader/expression/geo/GeoConverterTest.java @@ -150,7 +150,6 @@ public void testConvertGSE106() throws Exception { * */ @Test - @Category(SlowTest.class) public void testConvertGSE18Stress() throws Exception { InputStream is = new GZIPInputStream( new ClassPathResource( "/data/loader/expression/geo/gse18short/GSE18.soft.gz" ).getInputStream() ); @@ -410,7 +409,6 @@ public void testConvertGse59() throws Exception { */ @SuppressWarnings("unchecked") @Test - @Category(SlowTest.class) public void testConvertGSE60() throws Exception { InputStream is = new GZIPInputStream( new ClassPathResource( "/data/loader/expression/geo/gse60Short/GSE60_family.soft.gz" ).getInputStream() ); @@ -624,7 +622,6 @@ public final void testGSE44903() throws Exception { */ @SuppressWarnings("unchecked") @Test - @Category(SlowTest.class) public final void testGSE8872() throws Exception { InputStream is = new GZIPInputStream( new ClassPathResource( "/data/loader/expression/geo/gse8872short/GSE8872_family.soft.gz" ).getInputStream() ); diff --git a/gemma-core/src/test/java/ubic/gemma/core/util/test/suite/FastIntegrationTests.java b/gemma-core/src/test/java/ubic/gemma/core/util/test/suite/FastIntegrationTests.java index 0ba3d6fc1d..e155cb8b23 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/util/test/suite/FastIntegrationTests.java +++ b/gemma-core/src/test/java/ubic/gemma/core/util/test/suite/FastIntegrationTests.java @@ -3,11 +3,15 @@ import org.junit.experimental.categories.Categories; import org.junit.runner.RunWith; import org.junit.runners.Suite; +import ubic.gemma.core.util.test.category.IntegrationTest; import ubic.gemma.core.util.test.category.SlowTest; +/** + * Fast integration tests. + */ @RunWith(Categories.class) -@Categories.IncludeCategory(IntegrationTests.class) +@Categories.IncludeCategory(IntegrationTest.class) @Categories.ExcludeCategory(SlowTest.class) -@Suite.SuiteClasses(AllTests.class) +@Suite.SuiteClasses({ AllTests.class }) public class FastIntegrationTests { } diff --git a/gemma-core/src/test/java/ubic/gemma/core/util/test/suite/FastUnitTests.java b/gemma-core/src/test/java/ubic/gemma/core/util/test/suite/FastUnitTests.java index a4b6e7fdf1..91f87e7bf9 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/util/test/suite/FastUnitTests.java +++ b/gemma-core/src/test/java/ubic/gemma/core/util/test/suite/FastUnitTests.java @@ -12,5 +12,5 @@ @RunWith(Categories.class) @Categories.ExcludeCategory({ IntegrationTest.class, SlowTest.class }) @Suite.SuiteClasses(AllTests.class) -public class FastUnitTests extends UnitTests { +public class FastUnitTests { } diff --git a/gemma-core/src/test/java/ubic/gemma/core/util/test/suite/IntegrationTests.java b/gemma-core/src/test/java/ubic/gemma/core/util/test/suite/IntegrationTests.java index b9308a1cb4..f741e7d04a 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/util/test/suite/IntegrationTests.java +++ b/gemma-core/src/test/java/ubic/gemma/core/util/test/suite/IntegrationTests.java @@ -5,6 +5,9 @@ import org.junit.runners.Suite; import ubic.gemma.core.util.test.category.IntegrationTest; +/** + * Integration tests. + */ @RunWith(Categories.class) @Categories.IncludeCategory(IntegrationTest.class) @Suite.SuiteClasses({ AllTests.class }) diff --git a/gemma-core/src/test/java/ubic/gemma/core/util/test/suite/UnitTests.java b/gemma-core/src/test/java/ubic/gemma/core/util/test/suite/UnitTests.java index 1b65bd1edb..3b99da4f3d 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/util/test/suite/UnitTests.java +++ b/gemma-core/src/test/java/ubic/gemma/core/util/test/suite/UnitTests.java @@ -5,6 +5,9 @@ import org.junit.runners.Suite; import ubic.gemma.core.util.test.category.IntegrationTest; +/** + * Unit tests. + */ @RunWith(Categories.class) @Categories.ExcludeCategory(IntegrationTest.class) @Suite.SuiteClasses(AllTests.class) From 980f2194ef6209e5ccbc3914cc6810d872185f2b Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Tue, 19 Mar 2024 13:47:20 -0700 Subject: [PATCH 036/105] Improve logging for OntologyService Use debug logs when requerying ontologies --- .../core/ontology/OntologyServiceImpl.java | 36 ++++++++++--------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/gemma-core/src/main/java/ubic/gemma/core/ontology/OntologyServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/core/ontology/OntologyServiceImpl.java index 4992535bcd..0069321380 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/ontology/OntologyServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/core/ontology/OntologyServiceImpl.java @@ -267,7 +267,7 @@ public Collection findTerms( String search ) throws BaseCodeOntolo } else { return Collections.emptySet(); } - } ); + }, "terms matching " + search ); } Collection results = new HashSet<>(); @@ -281,7 +281,7 @@ public Collection findTerms( String search ) throws BaseCodeOntolo return results; } - results = searchInThreads( ontology -> ontology.findTerm( query ) ); + results = searchInThreads( ontology -> ontology.findTerm( query ), query ); if ( geneOntologyService.isOntologyLoaded() ) { try { @@ -331,7 +331,7 @@ public Collection findTermsInexact( String givenQuery if ( results2.isEmpty() ) return Collections.emptySet(); return CharacteristicValueObject.characteristic2CharacteristicVO( this.termsToCharacteristics( results2 ) ); - } ) ); + }, queryString ) ); // get GO terms, if we don't already have a lot of possibilities. (might have to adjust this) StopWatch findGoTerms = StopWatch.createStarted(); @@ -410,7 +410,7 @@ private Set getParentsOrChildren( Collection terms, return parents ? ontologyCache.getParents( os, toQuery, direct, includeAdditionalProperties ) : ontologyCache.getChildren( os, toQuery, direct, includeAdditionalProperties ); } finally { - if ( timer.getTime() > 10L * terms.size() ) { + if ( timer.getTime() > Math.max( 10L * terms.size(), 1000L ) ) { log.warn( String.format( "Obtaining %s from %s for %s took %s", parents ? "parents" : "children", os, @@ -418,14 +418,14 @@ private Set getParentsOrChildren( Collection terms, timer ) ); } } - } ); + }, String.format( "%s %s of %d terms", direct ? "direct" : "all", parents ? "parents" : "children", terms.size() ) ); if ( results.addAll( newResults ) && !direct ) { // there are new results (i.e. a term was inferred from a different ontology), we need to requery them // if they were not in the query newResults.removeAll( toQuery ); toQuery.clear(); toQuery.addAll( newResults ); - log.info( String.format( "Found %d new %s terms, will requery them.", newResults.size(), + log.debug( String.format( "Found %d new %s terms, will requery them.", newResults.size(), parents ? "parents" : "children" ) ); } else { toQuery.clear(); @@ -495,7 +495,8 @@ public OntologyTerm getTerm( String uri ) { @Override public Set getTerms( Collection uris ) { Set distinctUris = uris instanceof Set ? ( Set ) uris : new HashSet<>( uris ); - return combineInThreads( os -> distinctUris.stream().map( os::getTerm ).filter( Objects::nonNull ).collect( Collectors.toSet() ) ); + return combineInThreads( os -> distinctUris.stream().map( os::getTerm ).filter( Objects::nonNull ).collect( Collectors.toSet() ), + String.format( "terms for %d URIs", uris.size() ) ); } /** @@ -713,7 +714,7 @@ private Collection findCharacteristicsFromOntology( S } } return characteristicsFromOntology; - }, ontologyServicesToUse ); + }, ontologyServicesToUse, "terms matching " + searchQuery ); } private String foundValueKey( Characteristic c ) { @@ -912,13 +913,13 @@ private interface SearchFunction { } /** - * Similar to {@link #combineInThreads(Function)}, but also handles {@link OntologySearchException}. + * Similar to {@link #combineInThreads(Function, String)}, but also handles {@link OntologySearchException}. */ - private Set searchInThreads( SearchFunction function ) throws BaseCodeOntologySearchException { - return searchInThreads( function, ontologyServices ); + private Set searchInThreads( SearchFunction function, String query ) throws BaseCodeOntologySearchException { + return searchInThreads( function, ontologyServices, query ); } - private Set searchInThreads( SearchFunction function, List ontologyServices ) throws BaseCodeOntologySearchException { + private Set searchInThreads( SearchFunction function, List ontologyServices, String query ) throws BaseCodeOntologySearchException { try { return combineInThreads( os -> { try { @@ -926,7 +927,7 @@ private Set searchInThreads( SearchFunction function, List Set combineInThreads( Function> work ) { - return combineInThreads( work, ontologyServices ); + private Set combineInThreads( Function> work, String query ) { + return combineInThreads( work, ontologyServices, query ); } /** @@ -956,7 +957,8 @@ private Set combineInThreads( Function * The functions are evaluated using Gemma's short-lived task executor. */ - private Set combineInThreads( Function> work, List ontologyServices ) { + private Set combineInThreads( Function> work, List ontologyServices, String query ) { + StopWatch timer = StopWatch.createStarted(); List>> futures = new ArrayList<>( ontologyServices.size() ); ExecutorCompletionService> completionService = new ExecutorCompletionService<>( taskExecutor ); for ( ubic.basecode.ontology.providers.OntologyService os : ontologyServices ) { @@ -969,7 +971,7 @@ private Set combineInThreads( Function> future; while ( ( future = completionService.poll( 1, TimeUnit.SECONDS ) ) == null ) { - log.warn( String.format( "Ontology query is taking too long (%d/%d completed so far).", i, futures.size() ) ); + log.warn( String.format( "Ontology query for %s is taking too long (%d/%d completed so far, %s elapsed).", query, i, futures.size(), timer ) ); } children.addAll( future.get() ); } From 36b4d5f5540c8d340a5475c7ae31317eaa50ffff Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Tue, 19 Mar 2024 15:18:40 -0700 Subject: [PATCH 037/105] Fetch EEs by URIs in batch --- .../description/CharacteristicDaoImpl.java | 53 +++++++++++++------ 1 file changed, 38 insertions(+), 15 deletions(-) diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/common/description/CharacteristicDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/common/description/CharacteristicDaoImpl.java index dff2441f64..d2fa824fbd 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/common/description/CharacteristicDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/common/description/CharacteristicDaoImpl.java @@ -49,8 +49,7 @@ import java.util.stream.Collectors; import static ubic.gemma.persistence.service.TableMaintenanceUtil.EE2C_QUERY_SPACE; -import static ubic.gemma.persistence.util.QueryUtils.batchParameterList; -import static ubic.gemma.persistence.util.QueryUtils.optimizeParameterList; +import static ubic.gemma.persistence.util.QueryUtils.*; /** * @author Luke @@ -99,10 +98,8 @@ public Map, Map> if ( uris.isEmpty() ) { return Collections.emptyMap(); } - //noinspection unchecked - List result = prepareExperimentsByUrisQuery( uris, taxon, limit > 0 && rankByLevel ) - .setMaxResults( limit ) - .list(); + // no need to rank if there is no limit since we're collecting in a mapping + List result = findExperimentsByUrisInternal( uris, taxon, limit > 0 && rankByLevel, limit ); if ( result.isEmpty() ) { return Collections.emptyMap(); } @@ -135,11 +132,7 @@ public Map, Map> return Collections.emptyMap(); } //noinspection unchecked - List result = prepareExperimentsByUrisQuery( uris, taxon, limit > 0 && rankByLevel ) - .setMaxResults( limit ) - .list(); - //noinspection unchecked - return result.stream().collect( Collectors.groupingBy( + return findExperimentsByUrisInternal( uris, taxon, limit > 0 && rankByLevel, limit ).stream().collect( Collectors.groupingBy( row -> ( Class ) row[0], Collectors.groupingBy( row -> ( String ) row[1], @@ -148,7 +141,7 @@ public Map, Map> Collectors.toCollection( () -> new TreeSet<>( Comparator.comparing( ExpressionExperiment::getId ) ) ) ) ) ) ); } - private Query prepareExperimentsByUrisQuery( Collection uris, @Nullable Taxon taxon, boolean rankByLevel ) { + private List findExperimentsByUrisInternal( Collection uris, @Nullable Taxon taxon, boolean rankByLevel, int limit ) { String qs = "select T.`LEVEL`, T.VALUE_URI, T.EXPRESSION_EXPERIMENT_FK from EXPRESSION_EXPERIMENT2CHARACTERISTIC T" + ( taxon != null ? " join INVESTIGATION I on T.EXPRESSION_EXPERIMENT_FK = I.ID " : "" ) + EE2CAclQueryUtils.formNativeAclJoinClause( "T.EXPRESSION_EXPERIMENT_FK" ) + " " @@ -174,8 +167,6 @@ private Query prepareExperimentsByUrisQuery( Collection uris, @Nullable query.setParameter( "bmClass", BioMaterial.class ); } - query.setParameterList( "uris", optimizeParameterList( uris ) ); - if ( taxon != null ) { query.setParameter( "taxonId", taxon.getId() ); } @@ -184,7 +175,39 @@ private Query prepareExperimentsByUrisQuery( Collection uris, @Nullable query.setCacheable( true ); - return query; + List result; + if ( uris.size() > MAX_PARAMETER_LIST_SIZE ) { + if ( limit > 0 && rankByLevel ) { + // query is limited and order is important, we have to sort the results in memory + result = listByBatch( query, "uris", uris, 2048 ); + result = result.stream() + .sorted( Comparator.comparing( row -> rankClass( ( Class ) row[0] ) ) ) + .limit( limit ) + .collect( Collectors.toList() ); + } else { + // query is limited, but there is no ordering, so we can just fetch the first few results + result = listByBatch( query, "uris", uris, 2048, limit ); + } + } else { + //noinspection unchecked + result = query + .setParameterList( "uris", optimizeParameterList( uris ) ) + .list(); + } + + return result; + } + + private int rankClass( Class clazz ) { + if ( clazz == ExpressionExperiment.class ) { + return 0; + } else if ( clazz == ExperimentalDesign.class ) { + return 1; + } else if ( clazz == BioMaterial.class ) { + return 2; + } else { + return 3; + } } @Override From 147589cefb37755a5b323a15b16881b033f25479 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Sat, 23 Mar 2024 00:35:49 -0700 Subject: [PATCH 038/105] Clarify comment that listByBatch() doesn't affect the query output --- .../service/common/description/CharacteristicDaoImpl.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/common/description/CharacteristicDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/common/description/CharacteristicDaoImpl.java index d2fa824fbd..697e2cecf8 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/common/description/CharacteristicDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/common/description/CharacteristicDaoImpl.java @@ -185,7 +185,7 @@ private List findExperimentsByUrisInternal( Collection uris, @ .limit( limit ) .collect( Collectors.toList() ); } else { - // query is limited, but there is no ordering, so we can just fetch the first few results + // query is either unlimited or there is no ordering, batching will not affect the output result = listByBatch( query, "uris", uris, 2048, limit ); } } else { From 55a66090f340097a3812ef03cf3a20a6584fde7e Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Tue, 19 Mar 2024 16:16:57 -0700 Subject: [PATCH 039/105] Do not pass IDs resulting from search to the filters This collection can be large sometimes and since we generally just retrieve IDs, it's more efficient to compute the intersection. --- .../experiment/ExpressionExperimentDao.java | 2 +- .../ExpressionExperimentDaoImpl.java | 2 +- .../ExpressionExperimentService.java | 13 ++-- .../ExpressionExperimentServiceImpl.java | 70 ++++++++++++++----- ...ssionExperimentServiceIntegrationTest.java | 8 +-- .../ExpressionExperimentServiceTest.java | 4 +- .../ubic/gemma/rest/DatasetsWebService.java | 49 +++++++------ .../rest/util/args/DatasetArgService.java | 9 +-- .../gemma/rest/DatasetsWebServiceTest.java | 17 +++-- .../ExpressionExperimentController.java | 2 +- 10 files changed, 106 insertions(+), 70 deletions(-) diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDao.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDao.java index 6f782f8fc0..8ae858cd49 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDao.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDao.java @@ -170,7 +170,7 @@ public interface ExpressionExperimentDao *

* Experiments are not filtered by ACLs and toubled experiments are only visible to administrators. */ - Map getPerTaxonCount( List ids ); + Map getPerTaxonCount( Collection ids ); Map getPopulatedFactorCounts( Collection ids ); diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java index 5ef95fe8c5..f531ec318e 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java @@ -1199,7 +1199,7 @@ public Map getPerTaxonCount() { } @Override - public Map getPerTaxonCount( List ids ) { + public Map getPerTaxonCount( Collection ids ) { if ( ids.isEmpty() ) { return Collections.emptyMap(); } diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentService.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentService.java index e1b9741103..82a67557a4 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentService.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentService.java @@ -146,7 +146,7 @@ ExpressionExperiment addRawVectors( ExpressionExperiment eeToUpdate, List loadIdsWithCache( @Nullable Filters filters, @Nullable Sort sort ); - long countWithCache( @Nullable Filters filters ); + long countWithCache( @Nullable Filters filters, @Nullable Set extraIds ); @Secured({ "IS_AUTHENTICATED_ANONYMOUSLY", "AFTER_ACL_VALUE_OBJECT_COLLECTION_READ" }) Slice loadValueObjectsWithCache( @Nullable Filters filters, @Nullable Sort sort, int offset, int limit ); @@ -310,7 +310,7 @@ class CharacteristicWithUsageStatisticsAndOntologyTerm { * @param retainedTermUris ensure that the given terms are retained (overrides any exclusion from minFrequency and excludedTermUris) * @param maxResults maximum number of results to return */ - Map getCategoriesUsageFrequency( @Nullable Filters filters, @Nullable Collection excludedCategoryUris, @Nullable Collection excludedTermUris, @Nullable Collection retainedTermUris, int maxResults ); + Map getCategoriesUsageFrequency( @Nullable Filters filters, @Nullable Set extraIds, @Nullable Collection excludedCategoryUris, @Nullable Collection excludedTermUris, @Nullable Collection retainedTermUris, int maxResults ); /** * Obtain annotation usage frequency for datasets matching the given filters. @@ -332,7 +332,7 @@ class CharacteristicWithUsageStatisticsAndOntologyTerm { * the matched datasets and ordered in descending number of associated experiments * @see ExpressionExperimentDao#getAnnotationsUsageFrequency(Collection, Class, int, int, String, Collection, Collection, Collection) */ - List getAnnotationsUsageFrequency( @Nullable Filters filters, @Nullable String category, @Nullable Collection excludedCategoryUris, @Nullable Collection excludedTermUris, int minFrequency, @Nullable Collection retainedTermUris, int maxResults ); + List getAnnotationsUsageFrequency( @Nullable Filters filters, @Nullable Set extraIds, @Nullable String category, @Nullable Collection excludedCategoryUris, @Nullable Collection excludedTermUris, int minFrequency, @Nullable Collection retainedTermUris, int maxResults ); /** * @param expressionExperiment experiment @@ -343,15 +343,16 @@ class CharacteristicWithUsageStatisticsAndOntologyTerm { @Secured({ "IS_AUTHENTICATED_ANONYMOUSLY", "ACL_SECURABLE_READ" }) Collection getArrayDesignsUsed( BioAssaySet expressionExperiment ); - Map getTechnologyTypeUsageFrequency( @Nullable Filters filters ); + Map getTechnologyTypeUsageFrequency( @Nullable Filters filters, @Nullable Set extraIds ); /** * Calculate the usage frequency of platforms by the datasets matching the provided filters. * * @param filters a set of filters to be applied as per {@link #load(Filters, Sort, int, int)} + * @param extraIds * @param maxResults the maximum of results, or unlimited if less than 1 */ - Map getArrayDesignUsedOrOriginalPlatformUsageFrequency( @Nullable Filters filters, int maxResults ); + Map getArrayDesignUsedOrOriginalPlatformUsageFrequency( @Nullable Filters filters, @Nullable Set extraIds, int maxResults ); /** * Calculate the usage frequency of taxa by the datasets matching the provided filters. @@ -360,7 +361,7 @@ class CharacteristicWithUsageStatisticsAndOntologyTerm { * * @see #getPerTaxonCount() */ - Map getTaxaUsageFrequency( @Nullable Filters filters ); + Map getTaxaUsageFrequency( @Nullable Filters filters, @Nullable Set extraIds ); /** * Checks the experiment for a batch confound. diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java index ba922389af..5f0f0c5c95 100755 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java @@ -705,7 +705,12 @@ public List loadIdsWithCache( @Nullable Filters filters, @Nullable Sort so @Override @Transactional(readOnly = true) - public long countWithCache( @Nullable Filters filters ) { + public long countWithCache( @Nullable Filters filters, @Nullable Set extraIds ) { + if ( extraIds != null ) { + List eeIds = loadIdsWithCache( filters, null ); + eeIds.retainAll( extraIds ); + return eeIds.size(); + } return expressionExperimentDao.countWithCache( filters ); } @@ -727,12 +732,15 @@ private static class SubClauseKey { @Override @Transactional(readOnly = true) - public Map getCategoriesUsageFrequency( @Nullable Filters filters, @Nullable Collection excludedCategoryUris, @Nullable Collection excludedTermUris, @Nullable Collection retainedTermUris, int maxResults ) { - List eeIds; + public Map getCategoriesUsageFrequency( @Nullable Filters filters, @Nullable Set extraIds, @Nullable Collection excludedCategoryUris, @Nullable Collection excludedTermUris, @Nullable Collection retainedTermUris, int maxResults ) { + Collection eeIds; if ( filters == null || filters.isEmpty() ) { - eeIds = null; + eeIds = extraIds; } else { eeIds = expressionExperimentDao.loadIdsWithCache( filters, null ); + if ( extraIds != null ) { + eeIds.retainAll( extraIds ); + } } if ( excludedTermUris != null ) { excludedTermUris = inferTermsUris( excludedTermUris ); @@ -746,19 +754,23 @@ public Map getCategoriesUsageFrequency( @Nullable Filters */ @Override @Transactional(readOnly = true) - public List getAnnotationsUsageFrequency( @Nullable Filters filters, @Nullable String category, @Nullable Collection excludedCategoryUris, @Nullable Collection excludedTermUris, int minFrequency, @Nullable Collection retainedTermUris, int maxResults ) { + public List getAnnotationsUsageFrequency( @Nullable Filters filters, @Nullable Set extraIds, @Nullable String category, @Nullable Collection excludedCategoryUris, @Nullable Collection excludedTermUris, int minFrequency, @Nullable Collection retainedTermUris, int maxResults ) { if ( excludedTermUris != null ) { excludedTermUris = inferTermsUris( excludedTermUris ); } - Map result; + Collection eeIds; if ( filters == null || filters.isEmpty() ) { - result = expressionExperimentDao.getAnnotationsUsageFrequency( null, null, maxResults, minFrequency, category, excludedCategoryUris, excludedTermUris, retainedTermUris ); + eeIds = extraIds; } else { - List eeIds = expressionExperimentDao.loadIdsWithCache( filters, null ); - result = expressionExperimentDao.getAnnotationsUsageFrequency( eeIds, null, maxResults, minFrequency, category, excludedCategoryUris, excludedTermUris, retainedTermUris ); + eeIds = expressionExperimentDao.loadIdsWithCache( filters, null ); + if ( extraIds != null ) { + eeIds.retainAll( extraIds ); + } } + Map result = expressionExperimentDao.getAnnotationsUsageFrequency( eeIds, null, maxResults, minFrequency, category, excludedCategoryUris, excludedTermUris, retainedTermUris ); + List resultWithParents = new ArrayList<>( result.size() ); // gather all the values and categories @@ -856,26 +868,43 @@ public Collection getArrayDesignsUsed( final BioAssaySet expression @Override @Transactional(readOnly = true) - public Map getTechnologyTypeUsageFrequency( @Nullable Filters filters ) { + public Map getTechnologyTypeUsageFrequency( @Nullable Filters filters, @Nullable Set extraIds ) { if ( filters == null || filters.isEmpty() ) { - return expressionExperimentDao.getTechnologyTypeUsageFrequency(); + if ( extraIds != null ) { + return expressionExperimentDao.getTechnologyTypeUsageFrequency( extraIds ); + } else { + return expressionExperimentDao.getTechnologyTypeUsageFrequency(); + } } else { List ids = this.expressionExperimentDao.loadIdsWithCache( filters, null ); + if ( extraIds != null ) { + ids.retainAll( extraIds ); + } return expressionExperimentDao.getTechnologyTypeUsageFrequency( ids ); } } @Override @Transactional(readOnly = true) - public Map getArrayDesignUsedOrOriginalPlatformUsageFrequency( @Nullable Filters filters, int maxResults ) { + public Map getArrayDesignUsedOrOriginalPlatformUsageFrequency( @Nullable Filters filters, @Nullable Set extraIds, int maxResults ) { Map result; if ( filters == null || filters.isEmpty() ) { - result = new HashMap<>( expressionExperimentDao.getArrayDesignsUsageFrequency( maxResults ) ); - for ( Map.Entry e : expressionExperimentDao.getOriginalPlatformsUsageFrequency( maxResults ).entrySet() ) { - result.compute( e.getKey(), ( k, v ) -> ( v != null ? v : 0L ) + e.getValue() ); + if ( extraIds != null ) { + result = new HashMap<>( expressionExperimentDao.getArrayDesignsUsageFrequency( extraIds, maxResults ) ); + for ( Map.Entry e : expressionExperimentDao.getOriginalPlatformsUsageFrequency( extraIds, maxResults ).entrySet() ) { + result.compute( e.getKey(), ( k, v ) -> ( v != null ? v : 0L ) + e.getValue() ); + } + } else { + result = new HashMap<>( expressionExperimentDao.getArrayDesignsUsageFrequency( maxResults ) ); + for ( Map.Entry e : expressionExperimentDao.getOriginalPlatformsUsageFrequency( maxResults ).entrySet() ) { + result.compute( e.getKey(), ( k, v ) -> ( v != null ? v : 0L ) + e.getValue() ); + } } } else { List ids = this.expressionExperimentDao.loadIdsWithCache( filters, null ); + if ( extraIds != null ) { + ids.retainAll( extraIds ); + } result = new HashMap<>( expressionExperimentDao.getArrayDesignsUsageFrequency( ids, maxResults ) ); for ( Map.Entry e : expressionExperimentDao.getOriginalPlatformsUsageFrequency( ids, maxResults ).entrySet() ) { result.compute( e.getKey(), ( k, v ) -> ( v != null ? v : 0L ) + e.getValue() ); @@ -895,11 +924,18 @@ public Map getArrayDesignUsedOrOriginalPlatformUsageFrequency @Override @Transactional(readOnly = true) - public Map getTaxaUsageFrequency( @Nullable Filters filters ) { + public Map getTaxaUsageFrequency( @Nullable Filters filters, @Nullable Set extraIds ) { if ( filters == null || filters.isEmpty() ) { - return expressionExperimentDao.getPerTaxonCount(); + if ( extraIds != null ) { + return expressionExperimentDao.getPerTaxonCount( extraIds ); + } else { + return expressionExperimentDao.getPerTaxonCount(); + } } else { List ids = this.expressionExperimentDao.loadIdsWithCache( filters, null ); + if ( extraIds != null ) { + ids.retainAll( extraIds ); + } return expressionExperimentDao.getPerTaxonCount( ids ); } } diff --git a/gemma-core/src/test/java/ubic/gemma/model/expression/experiment/ExpressionExperimentServiceIntegrationTest.java b/gemma-core/src/test/java/ubic/gemma/model/expression/experiment/ExpressionExperimentServiceIntegrationTest.java index 8434cf16b3..19021f478d 100644 --- a/gemma-core/src/test/java/ubic/gemma/model/expression/experiment/ExpressionExperimentServiceIntegrationTest.java +++ b/gemma-core/src/test/java/ubic/gemma/model/expression/experiment/ExpressionExperimentServiceIntegrationTest.java @@ -430,7 +430,7 @@ public void testCacheInvalidationWhenACharacteristicIsDeleted() { }; tableMaintenanceUtil.updateExpressionExperiment2CharacteristicEntries( false ); - assertThat( expressionExperimentService.getAnnotationsUsageFrequency( null, null, null, null, 0, null, 0 ) ) + assertThat( expressionExperimentService.getAnnotationsUsageFrequency( null, null, null, null, null, 0, null, 0 ) ) .noneSatisfy( consumer ); // add the term to the dataset and update the pivot table @@ -439,12 +439,12 @@ public void testCacheInvalidationWhenACharacteristicIsDeleted() { assertThat( c.getId() ).isNotNull(); // the table is out-of-date - assertThat( expressionExperimentService.getAnnotationsUsageFrequency( null, null, null, null, 0, null, 0 ) ) + assertThat( expressionExperimentService.getAnnotationsUsageFrequency( null, null, null, null, null, 0, null, 0 ) ) .noneSatisfy( consumer ); // update the pivot table tableMaintenanceUtil.updateExpressionExperiment2CharacteristicEntries( false ); - assertThat( expressionExperimentService.getAnnotationsUsageFrequency( null, null, null, null, 0, null, 0 ) ) + assertThat( expressionExperimentService.getAnnotationsUsageFrequency( null, null, null, null, null, 0, null, 0 ) ) .satisfiesOnlyOnce( consumer ); // remove the term, which must evict the query cache @@ -457,7 +457,7 @@ public void testCacheInvalidationWhenACharacteristicIsDeleted() { } ); // since deletions are cascaded, the change will be reflected immediatly - assertThat( expressionExperimentService.getAnnotationsUsageFrequency( null, null, null, null, 0, null, 0 ) ) + assertThat( expressionExperimentService.getAnnotationsUsageFrequency( null, null, null, null, null, 0, null, 0 ) ) .noneSatisfy( consumer ); } diff --git a/gemma-core/src/test/java/ubic/gemma/model/expression/experiment/ExpressionExperimentServiceTest.java b/gemma-core/src/test/java/ubic/gemma/model/expression/experiment/ExpressionExperimentServiceTest.java index c789a34518..b23be9635f 100644 --- a/gemma-core/src/test/java/ubic/gemma/model/expression/experiment/ExpressionExperimentServiceTest.java +++ b/gemma-core/src/test/java/ubic/gemma/model/expression/experiment/ExpressionExperimentServiceTest.java @@ -182,7 +182,7 @@ public void testGetFiltersWithCategories() { @Test public void testGetAnnotationsUsageFrequency() { - expressionExperimentService.getAnnotationsUsageFrequency( Filters.empty(), null, null, null, 0, null, -1 ); + expressionExperimentService.getAnnotationsUsageFrequency( Filters.empty(), null, null, null, null, 0, null, -1 ); verify( expressionExperimentDao ).getAnnotationsUsageFrequency( null, null, -1, 0, null, null, null, null ); verifyNoMoreInteractions( expressionExperimentDao ); } @@ -190,7 +190,7 @@ public void testGetAnnotationsUsageFrequency() { @Test public void testGetAnnotationsUsageFrequencyWithFilters() { Filters f = Filters.by( "c", "valueUri", String.class, Filter.Operator.eq, "http://example.com/T00001", "characteristics.valueUri" ); - expressionExperimentService.getAnnotationsUsageFrequency( f, null, null, null, 0, null, -1 ); + expressionExperimentService.getAnnotationsUsageFrequency( f, null, null, null, null, 0, null, -1 ); verify( expressionExperimentDao ).loadIdsWithCache( f, null ); verify( expressionExperimentDao ).getAnnotationsUsageFrequency( Collections.emptyList(), null, -1, 0, null, null, null, null ); verifyNoMoreInteractions( expressionExperimentDao ); diff --git a/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java b/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java index 9e3e0c964e..7e024013ea 100644 --- a/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java +++ b/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java @@ -189,9 +189,9 @@ public QueriedAndFilteredAndPaginatedResponseDataObject ids = new ArrayList<>( expressionExperimentService.loadIdsWithCache( filters, sort ) ); Map scoreById = new HashMap<>(); - Filters filtersWithQuery = Filters.by( filters ).and( datasetArgService.getFilterForSearchQuery( query, scoreById ) ); - List ids = new ArrayList<>( expressionExperimentService.loadIdsWithCache( filtersWithQuery, sort ) ); + ids.retainAll( datasetArgService.getIdsForSearchQuery( query, scoreById ) ); // sort is stable, so the order of IDs with the same score is preserved ids.sort( Comparator.comparingDouble( i -> -scoreById.get( i ) ) ); @@ -240,15 +240,18 @@ public ExpressionExperimentWithSearchResultValueObject( ExpressionExperimentValu @GET @Path("/count") @Produces(MediaType.APPLICATION_JSON) - @Operation(summary = "Count datasets matching the provided query and filter") + @Operation(summary = "Count datasets matching the provided query and filter") public ResponseDataObject getNumberOfDatasets( @QueryParam("query") String query, @QueryParam("filter") @DefaultValue("") FilterArg filter ) { Filters filters = datasetArgService.getFilters( filter ); + Set extraIds; if ( query != null ) { - filters.and( datasetArgService.getFilterForSearchQuery( query, null ) ); + extraIds = datasetArgService.getIdsForSearchQuery( query, null ); + } else { + extraIds = null; } - return Responder.respond( expressionExperimentService.countWithCache( filters ) ); + return Responder.respond( expressionExperimentService.countWithCache( filters, extraIds ) ); } public interface UsageStatistics { @@ -268,15 +271,15 @@ public LimitedResponseDataObject getD @QueryParam("filter") @DefaultValue("") FilterArg filter, @QueryParam("limit") @DefaultValue("50") LimitArg limit ) { Filters filters = datasetArgService.getFilters( filter ); - Filters filtersWithQuery; + Set extraIds; if ( query != null ) { - filtersWithQuery = Filters.by( filters ).and( datasetArgService.getFilterForSearchQuery( query, null ) ); + extraIds = datasetArgService.getIdsForSearchQuery( query, null ); } else { - filtersWithQuery = filters; + extraIds = null; } Integer l = limit.getValueNoMaximum(); - Map tts = expressionExperimentService.getTechnologyTypeUsageFrequency( filtersWithQuery ); - Map ads = expressionExperimentService.getArrayDesignUsedOrOriginalPlatformUsageFrequency( filtersWithQuery, l ); + Map tts = expressionExperimentService.getTechnologyTypeUsageFrequency( filters, extraIds ); + Map ads = expressionExperimentService.getArrayDesignUsedOrOriginalPlatformUsageFrequency( filters, extraIds, l ); List adsVos = arrayDesignService.loadValueObjects( ads.keySet() ); Map countsById = ads.entrySet().stream().collect( Collectors.toMap( e -> e.getKey().getId(), Map.Entry::getValue ) ); List results = @@ -315,15 +318,16 @@ public QueriedAndFilteredResponseDataObject mentionedTerms = retainMentionedTerms ? new HashSet<>() : null; Filters filters = datasetArgService.getFilters( filter, mentionedTerms ); - Filters filtersWithQuery; + Set extraIds; if ( query != null ) { - filtersWithQuery = Filters.by( filters ).and( datasetArgService.getFilterForSearchQuery( query, null ) ); + extraIds = datasetArgService.getIdsForSearchQuery( query, null ); } else { - filtersWithQuery = filters; + extraIds = null; } int maxResults = limit.getValue( MAX_DATASETS_CATEGORIES ); List results = expressionExperimentService.getCategoriesUsageFrequency( - filtersWithQuery, + filters, + extraIds, datasetArgService.getExcludedUris( excludedCategoryUris, excludeFreeTextCategories, excludeUncategorizedTerms ), datasetArgService.getExcludedUris( excludedTermUris, excludeFreeTextTerms, excludeUncategorizedTerms ), mentionedTerms != null ? mentionedTerms.stream().map( OntologyTerm::getUri ).collect( Collectors.toSet() ) : null, @@ -382,17 +386,18 @@ public LimitedResponseDataObject getDa // ensure that implied terms are retained in the usage frequency Collection mentionedTerms = retainMentionedTerms ? new HashSet<>() : null; Filters filters = datasetArgService.getFilters( filter, mentionedTerms ); - Filters filtersWithQuery; + Set extraIds; if ( query != null ) { - filtersWithQuery = Filters.by( filters ).and( datasetArgService.getFilterForSearchQuery( query, null ) ); + extraIds = datasetArgService.getIdsForSearchQuery( query, null ); } else { - filtersWithQuery = filters; + extraIds = null; } if ( category != null && category.isEmpty() ) { category = ExpressionExperimentService.UNCATEGORIZED; } List initialResults = expressionExperimentService.getAnnotationsUsageFrequency( - filtersWithQuery, + filters, + extraIds, category, datasetArgService.getExcludedUris( excludedCategoryUris, excludeFreeTextCategories, excludeUncategorizedTerms ), datasetArgService.getExcludedUris( excludedTermUris, excludeFreeTextTerms, excludeUncategorizedTerms ), @@ -499,13 +504,13 @@ public AnnotationWithUsageStatisticsValueObject( Characteristic c, Long numberOf public QueriedAndFilteredResponseDataObject getDatasetsTaxaUsageStatistics( @QueryParam("query") String query, @QueryParam("filter") @DefaultValue("") FilterArg filterArg ) { Filters filters = datasetArgService.getFilters( filterArg ); - Filters filtersWithQuery; + Set extraIds; if ( query != null ) { - filtersWithQuery = Filters.by( filters ).and( datasetArgService.getFilterForSearchQuery( query, null ) ); + extraIds = datasetArgService.getIdsForSearchQuery( query, null ); } else { - filtersWithQuery = filters; + extraIds = null; } - return Responder.queryAndFilter( expressionExperimentService.getTaxaUsageFrequency( filtersWithQuery ) + return Responder.queryAndFilter( expressionExperimentService.getTaxaUsageFrequency( filters, extraIds ) .entrySet().stream() .map( e -> new TaxonWithUsageStatisticsValueObject( e.getKey(), e.getValue() ) ) .collect( Collectors.toList() ), query, filters, new String[] { "id" }, Sort.by( null, "numberOfExpressionExperiments", Sort.Direction.DESC, "numberOfExpressionExperiments" ) ); diff --git a/gemma-rest/src/main/java/ubic/gemma/rest/util/args/DatasetArgService.java b/gemma-rest/src/main/java/ubic/gemma/rest/util/args/DatasetArgService.java index 0cad88745d..e6537b9a9f 100644 --- a/gemma-rest/src/main/java/ubic/gemma/rest/util/args/DatasetArgService.java +++ b/gemma-rest/src/main/java/ubic/gemma/rest/util/args/DatasetArgService.java @@ -131,19 +131,14 @@ public List> getResultsForSearchQuery( String * @param scoreById if non-null, a destination for storing the scores by result ID * @throws BadRequestException if the query is empty */ - public Filter getFilterForSearchQuery( String query, @Nullable Map scoreById ) throws BadRequestException { + public Set getIdsForSearchQuery( String query, @Nullable Map scoreById ) throws BadRequestException { List> _results = getResultsForSearchQuery( query, null ); if ( scoreById != null ) { for ( SearchResult result : _results ) { scoreById.put( result.getResultId(), result.getScore() ); } } - Set ids = _results.stream().map( SearchResult::getResultId ).collect( Collectors.toSet() ); - if ( ids.isEmpty() ) { - return service.getFilter( "id", Long.class, Filter.Operator.eq, -1L ); - } else { - return service.getFilter( "id", Long.class, Filter.Operator.in, ids ); - } + return _results.stream().map( SearchResult::getResultId ).collect( Collectors.toSet() ); } /** diff --git a/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java b/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java index 3cedb11bc4..ac922c4ced 100644 --- a/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java +++ b/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java @@ -220,8 +220,7 @@ public void testGetDatasetsWithQuery() throws SearchException { assertThat( s.isFillResults() ).isFalse(); assertThat( s.getHighlighter() ).isNotNull(); } ); - verify( expressionExperimentService ).getFilter( "id", Long.class, Filter.Operator.in, new HashSet<>( ids ) ); - verify( expressionExperimentService ).loadIdsWithCache( Filters.by( "ee", "id", Long.class, Filter.Operator.in, new HashSet<>( ids ) ), Sort.by( "ee", "id", Sort.Direction.ASC ) ); + verify( expressionExperimentService ).loadIdsWithCache( Filters.empty(), Sort.by( "ee", "id", Sort.Direction.ASC ) ); verify( expressionExperimentService ).loadValueObjectsByIdsWithRelationsAndCache( ids ); } @@ -284,7 +283,7 @@ public void testGetDatasetsPlatformsUsageStatistics() { .hasEncoding( "gzip" ); verify( expressionExperimentService ).getFilter( "id", Filter.Operator.lessThan, "10" ); verify( expressionExperimentService ).getFiltersWithInferredAnnotations( Filters.by( f ), null ); - verify( expressionExperimentService ).getArrayDesignUsedOrOriginalPlatformUsageFrequency( Filters.by( f ), 50 ); + verify( expressionExperimentService ).getArrayDesignUsedOrOriginalPlatformUsageFrequency( Filters.by( f ), null, 50 ); } @Test @@ -300,7 +299,7 @@ public void testGetDatasetsAnnotationsWithRetainMentionedTerms() { .extracting( "groupBy", InstanceOfAssertFactories.list( String.class ) ) .containsExactly( "classUri", "className", "termUri", "termName" ); verify( expressionExperimentService ).getFiltersWithInferredAnnotations( Filters.empty(), Collections.emptySet() ); - verify( expressionExperimentService ).getAnnotationsUsageFrequency( Filters.empty(), null, null, null, 0, Collections.emptySet(), 100 ); + verify( expressionExperimentService ).getAnnotationsUsageFrequency( Filters.empty(), null, null, null, null, 0, Collections.emptySet(), 100 ); } @Test @@ -316,7 +315,7 @@ public void testGetDatasetsAnnotations() { .extracting( "groupBy", InstanceOfAssertFactories.list( String.class ) ) .containsExactly( "classUri", "className", "termUri", "termName" ); verify( expressionExperimentService ).getFiltersWithInferredAnnotations( Filters.empty(), null ); - verify( expressionExperimentService ).getAnnotationsUsageFrequency( Filters.empty(), null, null, null, 0, null, 100 ); + verify( expressionExperimentService ).getAnnotationsUsageFrequency( Filters.empty(), null, null, null, null, 0, null, 100 ); } @Test @@ -335,7 +334,7 @@ public void testGetDatasetsAnnotationsWhenMaxFrequencyIsSuppliedLimitMustUseMaxi .entity() .hasFieldOrPropertyWithValue( "limit", 5000 ); verify( expressionExperimentService ).getFiltersWithInferredAnnotations( Filters.empty(), null ); - verify( expressionExperimentService ).getAnnotationsUsageFrequency( Filters.empty(), null, null, null, 10, null, 5000 ); + verify( expressionExperimentService ).getAnnotationsUsageFrequency( Filters.empty(), null, null, null, null, 10, null, 5000 ); } @Test @@ -347,7 +346,7 @@ public void testGetDatasetsAnnotationsWithLimitIsSupplied() { .hasFieldOrPropertyWithValue( "limit", 50 ) .extracting( "groupBy", InstanceOfAssertFactories.list( String.class ) ) .containsExactly( "classUri", "className", "termUri", "termName" ); - verify( expressionExperimentService ).getAnnotationsUsageFrequency( Filters.empty(), null, null, null, 0, null, 50 ); + verify( expressionExperimentService ).getAnnotationsUsageFrequency( Filters.empty(), null, null, null, null, 0, null, 50 ); } @Test @@ -355,7 +354,7 @@ public void testGetDatasetsAnnotationsForUncategorizedTerms() { assertThat( target( "/datasets/annotations" ).queryParam( "category", "" ).request().get() ) .hasStatus( Response.Status.OK ) .hasMediaTypeCompatibleWith( MediaType.APPLICATION_JSON_TYPE ); - verify( expressionExperimentService ).getAnnotationsUsageFrequency( Filters.empty(), ExpressionExperimentService.UNCATEGORIZED, null, null, 0, null, 100 ); + verify( expressionExperimentService ).getAnnotationsUsageFrequency( Filters.empty(), null, ExpressionExperimentService.UNCATEGORIZED, null, null, 0, null, 100 ); } @Test @@ -363,7 +362,7 @@ public void testGetDatasetsCategories() { assertThat( target( "/datasets/categories" ).request().get() ) .hasStatus( Response.Status.OK ) .hasMediaTypeCompatibleWith( MediaType.APPLICATION_JSON_TYPE ); - verify( expressionExperimentService ).getCategoriesUsageFrequency( Filters.empty(), null, null, null, 20 ); + verify( expressionExperimentService ).getCategoriesUsageFrequency( Filters.empty(), null, null, null, null, 20 ); } @Test diff --git a/gemma-web/src/main/java/ubic/gemma/web/controller/expression/experiment/ExpressionExperimentController.java b/gemma-web/src/main/java/ubic/gemma/web/controller/expression/experiment/ExpressionExperimentController.java index f253745fb5..2d89fc2d44 100644 --- a/gemma-web/src/main/java/ubic/gemma/web/controller/expression/experiment/ExpressionExperimentController.java +++ b/gemma-web/src/main/java/ubic/gemma/web/controller/expression/experiment/ExpressionExperimentController.java @@ -474,7 +474,7 @@ public Map loadCountsForDataSummaryTable() { countTimer.start(); long bioMaterialCount = expressionExperimentService.countBioMaterials( null ); long arrayDesignCount = arrayDesignService.countWithCache( null ); - long expressionExperimentCount = expressionExperimentService.countWithCache( null ); + long expressionExperimentCount = expressionExperimentService.countWithCache( null, null ); Map eesPerTaxon = expressionExperimentService.getPerTaxonCount(); countTimer.stop(); From e162c973f4c9c754745a32b4110ff5561b871c71 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Wed, 20 Mar 2024 10:09:10 -0700 Subject: [PATCH 040/105] rest: Produce a complete URI for GEO datasets and add a externalDatabaseUri field --- .../gemma/model/common/description/ExternalDatabases.java | 1 + .../experiment/ExpressionExperimentValueObject.java | 7 ++++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/gemma-core/src/main/java/ubic/gemma/model/common/description/ExternalDatabases.java b/gemma-core/src/main/java/ubic/gemma/model/common/description/ExternalDatabases.java index cf946349d9..85c01385ab 100644 --- a/gemma-core/src/main/java/ubic/gemma/model/common/description/ExternalDatabases.java +++ b/gemma-core/src/main/java/ubic/gemma/model/common/description/ExternalDatabases.java @@ -7,6 +7,7 @@ public final class ExternalDatabases { public static final String + GEO = "GEO", GENE = "gene", GO = "go", MULTIFUNCTIONALITY = "multifunctionality", diff --git a/gemma-core/src/main/java/ubic/gemma/model/expression/experiment/ExpressionExperimentValueObject.java b/gemma-core/src/main/java/ubic/gemma/model/expression/experiment/ExpressionExperimentValueObject.java index e0f0c646fb..34265683d7 100644 --- a/gemma-core/src/main/java/ubic/gemma/model/expression/experiment/ExpressionExperimentValueObject.java +++ b/gemma-core/src/main/java/ubic/gemma/model/expression/experiment/ExpressionExperimentValueObject.java @@ -13,6 +13,7 @@ import org.hibernate.Hibernate; import ubic.gemma.model.annotations.GemmaWebOnly; import ubic.gemma.model.common.auditAndSecurity.curation.AbstractCuratableValueObject; +import ubic.gemma.model.common.description.ExternalDatabases; import ubic.gemma.model.genome.TaxonValueObject; import ubic.gemma.persistence.util.EntityUtils; @@ -52,6 +53,7 @@ public class ExpressionExperimentValueObject extends AbstractCuratableValueObjec @JsonIgnore private Long experimentalDesign; private String externalDatabase; + private String externalDatabaseUri; private String externalUri; private GeeqValueObject geeq; @JsonIgnore @@ -103,7 +105,10 @@ public ExpressionExperimentValueObject( ExpressionExperiment ee, boolean ignoreD if ( !ignoreAccession && ee.getAccession() != null && Hibernate.isInitialized( ee.getAccession() ) ) { this.accession = ee.getAccession().getAccession(); this.externalDatabase = ee.getAccession().getExternalDatabase().getName(); - this.externalUri = ee.getAccession().getExternalDatabase().getWebUri(); + this.externalDatabaseUri = ee.getAccession().getExternalDatabase().getWebUri(); + if ( ee.getAccession().getExternalDatabase().getName().equals( ExternalDatabases.GEO ) ) { + this.externalUri = "https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=" + ee.getAccession().getAccession(); + } } // EE From 70f473484491dc85ba573b15feee2703b4f797ec Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Wed, 20 Mar 2024 11:10:52 -0700 Subject: [PATCH 041/105] Fix missing field in EE VO copy constructor --- .../expression/experiment/ExpressionExperimentValueObject.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gemma-core/src/main/java/ubic/gemma/model/expression/experiment/ExpressionExperimentValueObject.java b/gemma-core/src/main/java/ubic/gemma/model/expression/experiment/ExpressionExperimentValueObject.java index 34265683d7..98318d526b 100644 --- a/gemma-core/src/main/java/ubic/gemma/model/expression/experiment/ExpressionExperimentValueObject.java +++ b/gemma-core/src/main/java/ubic/gemma/model/expression/experiment/ExpressionExperimentValueObject.java @@ -180,7 +180,9 @@ protected ExpressionExperimentValueObject( ExpressionExperimentValueObject vo ) this.accession = vo.getAccession(); this.batchConfound = vo.getBatchConfound(); this.batchEffect = vo.getBatchEffect(); + this.batchEffectStatistics = vo.getBatchEffectStatistics(); this.externalDatabase = vo.getExternalDatabase(); + this.externalDatabaseUri = vo.getExternalDatabaseUri(); this.externalUri = vo.getExternalUri(); this.metadata = vo.getMetadata(); this.shortName = vo.getShortName(); From 0062611206f1c73042658d2c42927da5f21f44c9 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Wed, 20 Mar 2024 14:29:16 -0700 Subject: [PATCH 042/105] Limit the number of inferred terms appearing in a filter --- .../experiment/ExpressionExperimentServiceImpl.java | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java index 5f0f0c5c95..9bc4858c92 100755 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentServiceImpl.java @@ -623,11 +623,21 @@ public Filters getFiltersWithInferredAnnotations( Filters f, @Nullable Collectio } // recreate a clause with inferred terms for ( Map.Entry> e : termUrisBySubClause.entrySet() ) { - Collection termAndChildrenUris = new HashSet<>( e.getValue() ); Set terms = ontologyService.getTerms( e.getValue() ); + Set termAndChildrenUris = new TreeSet<>( String.CASE_INSENSITIVE_ORDER ); + termAndChildrenUris.addAll( e.getValue() ); termAndChildrenUris.addAll( ontologyService.getChildren( terms, false, true ).stream() .map( OntologyTerm::getUri ) .collect( Collectors.toList() ) ); + if ( termAndChildrenUris.size() > QueryUtils.MAX_PARAMETER_LIST_SIZE ) { + log.warn( String.format( "There too many terms for the clause %s, will pick top %d terms.", + e.getKey().getOriginalProperty(), QueryUtils.MAX_PARAMETER_LIST_SIZE ) ); + termAndChildrenUris = termAndChildrenUris.stream() + // favour terms that are mentioned in the filter + .sorted( Comparator.comparing( e.getValue()::contains, Comparator.reverseOrder() ) ) + .limit( QueryUtils.MAX_PARAMETER_LIST_SIZE ) + .collect( Collectors.toSet() ); + } if ( mentionedTerms != null ) { mentionedTerms.addAll( terms ); } From f8ced42ccb1fed4ac912e12b22c8454ce86f9094 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Wed, 20 Mar 2024 15:23:48 -0700 Subject: [PATCH 043/105] Few more rounds of optimization for ontology caches Use a read-only lock when accessing the ontology cache initially and then a read-write lock to compute. Eliminate the k-1,k-2 subset strategy, it's completely inefficient in practice and the caches are so small that it's more performance to just enumerate the keys. Make parents/children cache eternal. These relationships are immutable once the ontology is loaded. --- .../gemma/core/ontology/OntologyCache.java | 227 +++++++++--------- .../core/ontology/OntologyServiceImpl.java | 52 ++-- .../gemma/persistence/util/CacheUtils.java | 105 ++++++-- gemma-core/src/main/resources/ehcache.xml | 4 +- .../core/ontology/OntologyCacheTest.java | 14 +- 5 files changed, 232 insertions(+), 170 deletions(-) diff --git a/gemma-core/src/main/java/ubic/gemma/core/ontology/OntologyCache.java b/gemma-core/src/main/java/ubic/gemma/core/ontology/OntologyCache.java index 6932d204c9..1a99e7b4a6 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/ontology/OntologyCache.java +++ b/gemma-core/src/main/java/ubic/gemma/core/ontology/OntologyCache.java @@ -2,8 +2,10 @@ import lombok.EqualsAndHashCode; import lombok.Value; -import org.apache.commons.math3.util.Combinations; +import lombok.extern.apachecommons.CommonsLog; +import org.apache.commons.lang3.time.StopWatch; import org.springframework.cache.Cache; +import org.springframework.util.Assert; import ubic.basecode.ontology.model.OntologyTerm; import ubic.basecode.ontology.providers.OntologyService; import ubic.gemma.persistence.util.CacheUtils; @@ -14,19 +16,39 @@ /** * High-level cache abstraction for retrieving parents and children of a set of terms. *

- * The main approach here for caching is to lookup all the possible {@code k-1} subsets (then {@code k - 2}, - * {@code k - 3}, ...) of a given query and only retrieve the difference from the {@link OntologyService}. + * The main approach here for caching is to enumerate cache keys to find subsets of a given query and only retrieve the + * difference from the {@link OntologyService}. * @author poirigui */ +@CommonsLog class OntologyCache { private final Cache parentsCache, childrenCache; + private long lockTimeoutMillis = 5000; + private int minSubsetSize = 1; + OntologyCache( Cache parentsCache, Cache childrenCache ) { this.parentsCache = parentsCache; this.childrenCache = childrenCache; } + /** + * Maximum amount of time in milliseconds to wait for a cache entry to be computed by another thread. If the timeout + * is exceeded, no results will be returned. + */ + public void setLockTimeoutMillis( long lockTimeoutMillis ) { + this.lockTimeoutMillis = lockTimeoutMillis; + } + + /** + * Minimum size of subsets to consider when enumerating cache keys. + */ + void setMinSubsetSize( int minSubsetSize ) { + Assert.isTrue( minSubsetSize > 0 ); + this.minSubsetSize = minSubsetSize; + } + /** * Obtain the parents of a given set of terms. */ @@ -53,140 +75,102 @@ private Set getParentsOrChildren( OntologyService os, Collection termsSet = new HashSet<>( terms ); - Object key = new ParentsOrChildrenCacheKey( os, termsSet, direct, includeAdditionalProperties ); - Cache.ValueWrapper value = cache.get( key ); - if ( value != null ) { - //noinspection unchecked - return ( Set ) value.get(); - } else { - if ( termsSet.size() > 1 ) { + ParentsOrChildrenCacheKey key = new ParentsOrChildrenCacheKey( os, termsSet, direct, includeAdditionalProperties ); + + // there might be a thread computing this cache entry + long initialLockAcquisitionMs = timer.getTime(); + try ( CacheUtils.Lock ignored = CacheUtils.acquireReadLock( cache, key ) ) { + initialLockAcquisitionMs = timer.getTime() - initialLockAcquisitionMs; + Cache.ValueWrapper value = cache.get( key ); + if ( value != null ) { //noinspection unchecked - HashSet keys = new HashSet<>( ( Collection ) CacheUtils.getKeys( cache ) ); - - // try looking for k-1 or k-2 subsets - ParentsOrChildrenCacheKey keyForSubset = lookupMaximalSubsetByCombination( keys, os, termsSet, direct, includeAdditionalProperties ); - - // try enumerating keys (initially fast, but gets slower as the cache grows) - if ( keyForSubset == null ) { - keyForSubset = lookupMaximalSubsetByEnumeratingKeys( keys, os, termsSet, direct, includeAdditionalProperties ); - } - - if ( keyForSubset != null ) { - Cache.ValueWrapper valueForSubset = cache.get( keyForSubset ); - if ( valueForSubset != null ) { - //noinspection unchecked - Set resultsForSubset = ( Set ) valueForSubset.get(); - // only query the difference - Set remainingTerms = new HashSet<>( termsSet ); - remainingTerms.removeAll( keyForSubset.terms ); - Set remainingResults = getParentsOrChildren( os, remainingTerms, direct, includeAdditionalProperties, cache, ancestors ); - // recombine the results - Set results = new HashSet<>( resultsForSubset ); - results.addAll( remainingResults ); - cache.put( key, results ); - return results; - } - } - } - - // no subsets are of any use, so directly query - try ( CacheUtils.Lock ignored = CacheUtils.acquireWriteLock( cache, key ) ) { - // check if the entry have been computed by another thread - value = cache.get( key ); - if ( value != null ) { - //noinspection unchecked - return ( Set ) value.get(); - } - Set newVal = ancestors ? - os.getParents( termsSet, direct, includeAdditionalProperties ) : - os.getChildren( termsSet, direct, includeAdditionalProperties ); - cache.put( key, newVal ); - return newVal; + return ( Set ) value.get(); } } - } - /** - * A HashSet implementation with a cheap hashCode() operation. - */ - private static class IncrementalHashSet extends HashSet { - - private int hashCode = 0; - - public IncrementalHashSet( Set terms ) { - super( terms ); - } - - @Override - public boolean add( T o ) { - if ( !super.add( o ) ) { - hashCode += o.hashCode(); - return true; + long lookupSubsetMs = 0; + ParentsOrChildrenCacheKey keyForSubset; + // enough terms to make it worth looking for subsets... + if ( termsSet.size() >= minSubsetSize + 1 ) { + lookupSubsetMs = timer.getTime(); + keyForSubset = lookupMaximalSubsetByEnumeratingKeys( cache, os, termsSet, direct, includeAdditionalProperties ); + lookupSubsetMs = timer.getTime() - lookupSubsetMs; + if ( lookupSubsetMs > 100 ) { + log.warn( String.format( "Enumerating cache keys for finding a maximal subset for %s of %s took %d ms and %s", + ancestors ? "parents" : "children", key, lookupSubsetMs, keyForSubset != null ? "succeeded with " + keyForSubset + " terms" : "failed" ) ); } - return false; + } else { + // we used to enumerate all possible k-1, k-2 subsets, but that's just too slow compared to enumerating + // cache keys, other strategies can be implemented here if necessary + keyForSubset = null; } - @Override - public boolean remove( Object o ) { - if ( !super.remove( o ) ) { - hashCode -= o.hashCode(); - return true; + if ( keyForSubset != null ) { + Cache.ValueWrapper valueForSubset = cache.get( keyForSubset ); + if ( valueForSubset != null ) { + //noinspection unchecked + Set resultsForSubset = ( Set ) valueForSubset.get(); + // only query the difference + Set remainingTerms = new HashSet<>( termsSet ); + remainingTerms.removeAll( keyForSubset.terms ); + Set remainingResults = getParentsOrChildren( os, remainingTerms, direct, includeAdditionalProperties, cache, ancestors ); + // recombine the results + Set results = new HashSet<>( resultsForSubset ); + results.addAll( remainingResults ); + cache.put( key, results ); + return results; + } else { + log.warn( "Missing expected key from the " + ( ancestors ? "parents" : "children" ) + " cache: " + keyForSubset ); } - return false; } - @Override - public int hashCode() { - return hashCode; - } - } - - /** - * Check if a k-1 (or k-2) subset of a given set of terms is in the given cache and query the difference. - *

- * Because the number of subset is exponential in the number of terms, we only try subsets of size 1 and 2 if - * {@code n < 100}. - */ - @Nullable - private ParentsOrChildrenCacheKey lookupMaximalSubsetByCombination( Set keys, OntologyService os, Set terms, boolean direct, boolean includeAdditionalProperties ) { - // we will be generating subsets from this - List orderedTerms = new ArrayList<>( terms ); - // we will be mutating this - Set termsForSubset = new IncrementalHashSet<>( terms ); - // successively try removing k-subsets (k = 1 up to 3); it grows exponentially so careful here! - int n = orderedTerms.size(); - // n = 100 has ~5000 2-combinations - int maxN = n < 100 ? 2 : 1; - // if n = k, there's only one subset, and it's the same case as if no subsets were found - for ( int k = 1; k <= Math.min( n - 1, maxN ); k++ ) { - for ( int[] is : new Combinations( n, k ) ) { - for ( int i : is ) { - termsForSubset.remove( orderedTerms.get( i ) ); - } - // note: ParentsOrChildrenCacheKey is immutable so that the hashCode can be efficiently computed - ParentsOrChildrenCacheKey keyForSubset = new ParentsOrChildrenCacheKey( os, termsForSubset, direct, includeAdditionalProperties ); - if ( keys.contains( keyForSubset ) ) { - return keyForSubset; - } - for ( int i : is ) { - termsForSubset.add( orderedTerms.get( i ) ); - } + long acquireMs = timer.getTime(); + long computingMs = 0; + try ( CacheUtils.Lock ignored = CacheUtils.acquireWriteLock( cache, key ) ) { + acquireMs = timer.getTime() - acquireMs; + // lookup the cache in case another thread computed the result while we were enumerating subsets + Cache.ValueWrapper value = cache.get( key ); + if ( value != null ) { + //noinspection unchecked + return ( Set ) value.get(); + } + computingMs = timer.getTime(); + // no subset found in the cache, just compute it from scratch + Set newVal = ancestors ? + os.getParents( termsSet, direct, includeAdditionalProperties ) : + os.getChildren( termsSet, direct, includeAdditionalProperties ); + computingMs = timer.getTime() - computingMs; + // ignore empty newVal, it might just be that the ontology is not initialized yet + if ( !newVal.isEmpty() && computingMs < lookupSubsetMs ) { + log.warn( String.format( "Computing %d %s terms for %s took less time than looking up subsets, increasing the minSubsetSize might be beneficial", + newVal.size(), + ancestors ? "parents" : "children", + key ) ); + } + cache.put( key, newVal ); + return newVal; + } finally { + if ( timer.getTime() > 500 ) { + log.warn( String.format( "Retrieving %s for %s took %d ms (acquiring locks: %d ms, enumerating subsets: %d ms, computing: %d ms)", + ancestors ? "parents" : "children", key, timer.getTime(), initialLockAcquisitionMs + acquireMs, lookupSubsetMs, computingMs ) ); } } - return null; } /** * Enumerate the cache's keys to find the maximal subset. - *

- * This is less efficient than {@link #lookupMaximalSubsetByCombination(Set, OntologyService, Set, boolean, boolean)} - * because we to verify if a subset exist for each key of the cache. */ @Nullable - private ParentsOrChildrenCacheKey lookupMaximalSubsetByEnumeratingKeys( Collection keys, OntologyService os, Set terms, boolean direct, boolean includeAdditionalProperties ) { - return keys.stream() - .filter( k -> k.ontologyService.equals( os ) && k.direct == direct && k.includeAdditionalProperties == includeAdditionalProperties && terms.containsAll( k.terms ) ) + private ParentsOrChildrenCacheKey lookupMaximalSubsetByEnumeratingKeys( Cache cache, OntologyService os, Set terms, boolean direct, boolean includeAdditionalProperties ) { + return CacheUtils.getKeys( cache ).stream() + .map( o -> ( ParentsOrChildrenCacheKey ) o ) + .filter( k -> k.direct == direct && k.includeAdditionalProperties == includeAdditionalProperties && k.ontologyService.equals( os ) ) + // ignore empty subsets, those will cause an infinite loop + // skip sets which are larger or equal in size, those cannot be subsets + .filter( k -> k.terms.size() >= minSubsetSize && k.terms.size() < terms.size() && terms.containsAll( k.terms ) ) .max( Comparator.comparingInt( k1 -> k1.terms.size() ) ) .orElse( null ); } @@ -198,5 +182,12 @@ private static class ParentsOrChildrenCacheKey { Set terms; boolean direct; boolean includeAdditionalProperties; + + @Override + public String toString() { + return String.format( "%d terms from %s [%s] [%s]", terms.size(), ontologyService, + direct ? "direct" : "all", + includeAdditionalProperties ? "subClassOf and " + ontologyService.getAdditionalPropertyUris().size() + " additional properties" : "only subClassOf" ); + } } } diff --git a/gemma-core/src/main/java/ubic/gemma/core/ontology/OntologyServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/core/ontology/OntologyServiceImpl.java index 0069321380..0390553602 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/ontology/OntologyServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/core/ontology/OntologyServiceImpl.java @@ -30,6 +30,7 @@ import org.springframework.core.io.ClassPathResource; import org.springframework.core.io.Resource; import org.springframework.core.task.AsyncTaskExecutor; +import org.springframework.core.task.SimpleAsyncTaskExecutor; import org.springframework.core.task.TaskExecutor; import org.springframework.stereotype.Service; import ubic.basecode.ontology.model.AnnotationProperty; @@ -92,7 +93,7 @@ public class OntologyServiceImpl implements OntologyService, InitializingBean { @Autowired private GeneService geneService; @Autowired - private AsyncTaskExecutor taskExecutor; + private AsyncTaskExecutor taskExecutor = new SimpleAsyncTaskExecutor(); @Autowired private ExperimentalFactorOntologyService experimentalFactorOntologyService; @@ -260,14 +261,7 @@ public Collection findTerms( String search ) throws BaseCodeOntolo * URI input: just retrieve the term. */ if ( search.startsWith( "http://" ) ) { - return combineInThreads( ontology -> { - OntologyTerm found = ontology.getTerm( search ); - if ( found != null ) { - return Collections.singleton( found ); - } else { - return Collections.emptySet(); - } - }, "terms matching " + search ); + return Collections.singleton( findFirst( ontology -> ontology.getTerm( search ), "terms matching " + search ) ); } Collection results = new HashSet<>(); @@ -281,7 +275,7 @@ public Collection findTerms( String search ) throws BaseCodeOntolo return results; } - results = searchInThreads( ontology -> ontology.findTerm( query ), query ); + results = searchInThreads( ontology -> ontology.findTerm( query ).stream().collect( Collectors.toSet() ), query ); if ( geneOntologyService.isOntologyLoaded() ) { try { @@ -410,12 +404,12 @@ private Set getParentsOrChildren( Collection terms, return parents ? ontologyCache.getParents( os, toQuery, direct, includeAdditionalProperties ) : ontologyCache.getChildren( os, toQuery, direct, includeAdditionalProperties ); } finally { - if ( timer.getTime() > Math.max( 10L * terms.size(), 1000L ) ) { - log.warn( String.format( "Obtaining %s from %s for %s took %s", + if ( timer.getTime() > Math.max( 10L * terms.size(), 500L ) ) { + log.warn( String.format( "Obtaining %s from %s for %s took %d ms", parents ? "parents" : "children", os, terms.size() == 1 ? terms.iterator().next() : terms.size() + " terms", - timer ) ); + timer.getTime() ) ); } } }, String.format( "%s %s of %d terms", direct ? "direct" : "all", parents ? "parents" : "children", terms.size() ) ); @@ -457,11 +451,7 @@ public Collection getCategoryTerms() { @Override public Collection getRelationTerms() { // FIXME: it's not quite like categoryTerms so this map operation is probably not needed at all, the relations don't come from any particular ontology - return relationTerms.stream() - .map( term -> { - return term; - } ) - .collect( Collectors.toSet() ); + return Collections.unmodifiableSet( relationTerms ); } @Override @@ -481,15 +471,7 @@ public String getDefinition( String uri ) { @Override public OntologyTerm getTerm( String uri ) { - return findFirst( ontology -> { - OntologyTerm term = ontology.getTerm( uri ); - // some terms mentioned, but not declared in some ontologies (see https://github.com/PavlidisLab/Gemma/issues/998) - // FIXME: baseCode should return null if there is no , not default the local name or URI - if ( term != null && ( term.getLabel() == null || term.getLabel().equals( term.getUri() ) ) ) { - return null; - } - return term; - } ); + return findFirst( ontology -> ontology.getTerm( uri ), uri ); } @Override @@ -866,14 +848,14 @@ static Comparator getCharacteristicComparator( String .thenComparing( CharacteristicValueObject::getNumTimesUsed, Comparator.reverseOrder() ) // most frequently used first .thenComparing( CharacteristicValueObject::isAlreadyPresentInDatabase, Comparator.reverseOrder() ) // already used terms first .thenComparing( c -> c.getValue() != null ? c.getValue().length() : null, Comparator.nullsLast( Comparator.naturalOrder() ) ); // shorter term first - } /** * Find the first non-null result among loaded ontology services. */ @Nullable - private T findFirst( Function function ) { + private T findFirst( Function function, String query ) { + StopWatch timer = StopWatch.createStarted(); List> futures = new ArrayList<>( ontologyServices.size() ); ExecutorCompletionService completionService = new ExecutorCompletionService<>( taskExecutor ); for ( ubic.basecode.ontology.providers.OntologyService service : ontologyServices ) { @@ -883,7 +865,13 @@ private T findFirst( Function future; + double timeout = 1000; + while ( ( future = completionService.poll( ( long ) timeout, TimeUnit.MILLISECONDS ) ) == null ) { + log.warn( String.format( "Ontology query for %s is taking too long (%d/%d completed so far, %s elapsed).", query, i, futures.size(), timer ) ); + timeout *= 1.5; // exponential backoff + } + T result = future.get(); if ( result != null ) { return result; } @@ -970,8 +958,10 @@ private Set combineInThreads( Function> future; - while ( ( future = completionService.poll( 1, TimeUnit.SECONDS ) ) == null ) { + double timeout = 1000; + while ( ( future = completionService.poll( ( long ) timeout, TimeUnit.MILLISECONDS ) ) == null ) { log.warn( String.format( "Ontology query for %s is taking too long (%d/%d completed so far, %s elapsed).", query, i, futures.size(), timer ) ); + timeout *= 1.5; // exponential backoff } children.addAll( future.get() ); } diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/util/CacheUtils.java b/gemma-core/src/main/java/ubic/gemma/persistence/util/CacheUtils.java index 08903a8b18..7823a2ca28 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/util/CacheUtils.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/util/CacheUtils.java @@ -1,14 +1,14 @@ package ubic.gemma.persistence.util; -import lombok.Value; import net.sf.ehcache.Ehcache; import org.springframework.cache.Cache; import org.springframework.cache.CacheManager; -import java.util.Collection; -import java.util.Collections; -import java.util.Map; -import java.util.Objects; +import java.util.*; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import java.util.concurrent.locks.ReadWriteLock; +import java.util.concurrent.locks.ReentrantReadWriteLock; import java.util.function.Predicate; /** @@ -25,6 +25,29 @@ public static Cache getCache( CacheManager cacheManager, String cacheName ) thro return Objects.requireNonNull( cacheManager.getCache( cacheName ), String.format( "Cache with name %s does not exist.", cacheName ) ); } + public static int getSize( Cache cache ) { + if ( cache.getNativeCache() instanceof Ehcache ) { + return ( ( Ehcache ) cache.getNativeCache() ).getSize(); + } else if ( cache.getNativeCache() instanceof Map ) { + return ( ( Map ) cache.getNativeCache() ).size(); + } else { + return 0; + } + } + + /** + * Check if a cache contains a given key. + */ + public static boolean containsKey( Cache cache, Object key ) { + if ( cache.getNativeCache() instanceof Ehcache ) { + return ( ( Ehcache ) cache.getNativeCache() ).isKeyInCache( key ); + } else if ( cache.getNativeCache() instanceof Map ) { + return ( ( Map ) cache.getNativeCache() ).containsKey( key ); + } else { + return cache.get( key ) != null; + } + } + /** * Obtain the keys of all elements of a cache. */ @@ -55,6 +78,14 @@ public static void evictIf( Cache cache, Predicate predicate ) { } } + public static Lock acquireReadLock( Cache cache, Object key ) { + if ( cache.getNativeCache() instanceof Ehcache ) { + return new EhcacheLock( ( Ehcache ) cache.getNativeCache(), key, true ); + } else { + return new CacheLock( cache, key, true ); + } + } + /** * Acquire an exclusive write lock on the given key in the cache. *

@@ -62,9 +93,9 @@ public static void evictIf( Cache cache, Predicate predicate ) { */ public static Lock acquireWriteLock( Cache cache, Object key ) { if ( cache.getNativeCache() instanceof Ehcache ) { - return new EhcacheWriteLock( ( Ehcache ) cache.getNativeCache(), key ); + return new EhcacheLock( ( Ehcache ) cache.getNativeCache(), key, false ); } else { - return new NoopWriteLock(); + return new CacheLock( cache, key, false ); } } @@ -74,15 +105,16 @@ public interface Lock extends AutoCloseable { void close(); } - @Value - private static class EhcacheWriteLock implements Lock { + private static class EhcacheLock implements Lock { - Ehcache cache; - Object key; + private final Ehcache cache; + private final Object key; + private final boolean readOnly; - public EhcacheWriteLock( Ehcache cache, Object key ) { + public EhcacheLock( Ehcache cache, Object key, boolean readOnly ) { this.cache = cache; this.key = key; + this.readOnly = readOnly; lock(); } @@ -92,19 +124,60 @@ public void close() { } private void lock() { - cache.acquireWriteLockOnKey( key ); + if ( readOnly ) { + cache.acquireReadLockOnKey( key ); + } else { + cache.acquireWriteLockOnKey( key ); + } } private void unlock() { - cache.releaseWriteLockOnKey( key ); + if ( readOnly ) { + cache.releaseReadLockOnKey( key ); + } else { + cache.releaseWriteLockOnKey( key ); + } } } - private static class NoopWriteLock implements Lock { + private static class CacheLock implements Lock { + + /** + * Using a WeakHashMap to avoid memory leaks when a cache key is no longer used. + */ + private static final Map> lockByKey = new WeakHashMap<>(); + + private final ReadWriteLock lock; + private final boolean readOnly; + + public CacheLock( Cache cache, Object key, boolean readOnly ) { + synchronized ( lockByKey ) { + this.lock = lockByKey.computeIfAbsent( cache, k -> new WeakHashMap<>() ) + .computeIfAbsent( key, k -> new ReentrantReadWriteLock() ); + } + this.readOnly = readOnly; + lock(); + } @Override public void close() { - // noop + unlock(); + } + + private void lock() { + if ( readOnly ) { + lock.readLock().lock(); + } else { + lock.writeLock().lock(); + } + } + + private void unlock() { + if ( readOnly ) { + lock.readLock().unlock(); + } else { + lock.writeLock().unlock(); + } } } } diff --git a/gemma-core/src/main/resources/ehcache.xml b/gemma-core/src/main/resources/ehcache.xml index 1485cb2c83..419ccc4c0b 100644 --- a/gemma-core/src/main/resources/ehcache.xml +++ b/gemma-core/src/main/resources/ehcache.xml @@ -457,8 +457,8 @@ - - + + diff --git a/gemma-core/src/test/java/ubic/gemma/core/ontology/OntologyCacheTest.java b/gemma-core/src/test/java/ubic/gemma/core/ontology/OntologyCacheTest.java index ec68bed950..1e058305a8 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/ontology/OntologyCacheTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/ontology/OntologyCacheTest.java @@ -1,5 +1,6 @@ package ubic.gemma.core.ontology; +import org.junit.After; import org.junit.Before; import org.junit.Test; import org.springframework.cache.concurrent.ConcurrentMapCache; @@ -31,6 +32,11 @@ public void setUp() { term4 = new OntologyTermSimple( "http://example.com/term3", "term4" ); } + @After + public void resetMocks() { + reset( ontologyService ); + } + @Test public void testLookupByMaximalSubset() { ontologyCache.getChildren( ontologyService, Collections.singleton( term1 ), true, true ); @@ -54,12 +60,14 @@ public void testLookupByMaximalSubset() { } @Test - public void testLookupByEnumeration() { + public void testLookupByMaximalSubsetWhenMinSubsetSizeIsSet() { ontologyCache.getChildren( ontologyService, Collections.singleton( term1 ), true, true ); verify( ontologyService ).getChildren( Collections.singleton( term1 ), true, true ); - // a k-3 subset exist (i.e. [term1]) but only via enumeration + ontologyCache.setMinSubsetSize( 2 ); + + // a subset of size 1 exists, but it cannot be used ontologyCache.getChildren( ontologyService, Arrays.asList( term1, term2, term3, term4 ), true, true ); - verify( ontologyService, atMostOnce() ).getChildren( Collections.singleton( term1 ), true, true ); + verify( ontologyService ).getChildren( new HashSet<>( Arrays.asList( term1, term2, term3, term4 ) ), true, true ); } } \ No newline at end of file From 3db80a6c0c467109c186f50ec2796465289eaac7 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Thu, 21 Mar 2024 13:32:23 -0700 Subject: [PATCH 044/105] Prioritize results with labels and highest scores Handle missing URI scores and labels in the OntologySearchSource. --- .../gemma/core/ontology/OntologyService.java | 10 ++-- .../core/ontology/OntologyServiceImpl.java | 57 +++++++++++-------- 2 files changed, 36 insertions(+), 31 deletions(-) diff --git a/gemma-core/src/main/java/ubic/gemma/core/ontology/OntologyService.java b/gemma-core/src/main/java/ubic/gemma/core/ontology/OntologyService.java index dabd33995f..c640547e35 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/ontology/OntologyService.java +++ b/gemma-core/src/main/java/ubic/gemma/core/ontology/OntologyService.java @@ -17,14 +17,12 @@ import ubic.basecode.ontology.model.OntologyProperty; import ubic.basecode.ontology.model.OntologyTerm; import ubic.gemma.core.search.SearchException; -import ubic.gemma.model.common.description.Characteristic; -import ubic.gemma.model.expression.biomaterial.BioMaterial; -import ubic.gemma.model.expression.experiment.ExpressionExperiment; -import ubic.gemma.model.genome.Taxon; import ubic.gemma.model.common.description.CharacteristicValueObject; +import ubic.gemma.model.genome.Taxon; import javax.annotation.Nullable; import java.util.Collection; +import java.util.LinkedHashSet; import java.util.Map; import java.util.Set; @@ -84,13 +82,13 @@ Collection findExperimentsCharacteristicTags( String /** * @return terms which are allowed for use in the Category of a Characteristic */ - Collection getCategoryTerms(); + Set getCategoryTerms(); /** * * @return terms allowed for the predicate (relationship) in a Characteristic */ - Collection getRelationTerms(); + Set getRelationTerms(); /** * Obtain the parents of a collection of terms. diff --git a/gemma-core/src/main/java/ubic/gemma/core/ontology/OntologyServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/core/ontology/OntologyServiceImpl.java index 0390553602..4052a2af93 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/ontology/OntologyServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/core/ontology/OntologyServiceImpl.java @@ -79,7 +79,6 @@ public class OntologyServiceImpl implements OntologyService, InitializingBean { private static final Log log = LogFactory.getLog( OntologyServiceImpl.class.getName() ); - private static final String PARENTS_CACHE_NAME = "OntologyService.parents", CHILDREN_CACHE_NAME = "OntologyService.children"; @@ -275,7 +274,7 @@ public Collection findTerms( String search ) throws BaseCodeOntolo return results; } - results = searchInThreads( ontology -> ontology.findTerm( query ).stream().collect( Collectors.toSet() ), query ); + results = searchInThreads( ontology -> ontology.findTerm( query ), query ); if ( geneOntologyService.isOntologyLoaded() ) { try { @@ -285,7 +284,7 @@ public Collection findTerms( String search ) throws BaseCodeOntolo } } - return results; + return pickBest( results ); } @Override @@ -299,7 +298,7 @@ public Collection findTermsInexact( String givenQuery String queryString = OntologySearch.stripInvalidCharacters( givenQueryString ); if ( StringUtils.isBlank( queryString ) ) { OntologyServiceImpl.log.warn( "The query was not valid (ended up being empty): " + givenQueryString ); - return new HashSet<>(); + return Collections.emptySet(); } if ( OntologyServiceImpl.log.isDebugEnabled() ) { @@ -363,9 +362,9 @@ public Collection findTermsInexact( String givenQuery countOccurrencesTimerAfter.stop(); // Sort the results rather elaborately. - Collection sortedResults = results.values().stream() + LinkedHashSet sortedResults = results.values().stream() .sorted( getCharacteristicComparator( queryString ) ) - .collect( Collectors.toList() ); + .collect( Collectors.toCollection( LinkedHashSet::new ) ); watch.stop(); @@ -396,9 +395,9 @@ private Set getParentsOrChildren( Collection terms, return Collections.emptySet(); } Set toQuery = new HashSet<>( terms ); - Set results = new HashSet<>(); + List results = new ArrayList<>(); while ( !toQuery.isEmpty() ) { - Set newResults = combineInThreads( os -> { + List newResults = combineInThreads( os -> { StopWatch timer = StopWatch.createStarted(); try { return parents ? ontologyCache.getParents( os, toQuery, direct, includeAdditionalProperties ) @@ -425,11 +424,11 @@ private Set getParentsOrChildren( Collection terms, toQuery.clear(); } } - return results; + return pickBest( results ); } @Override - public Collection getCategoryTerms() { + public Set getCategoryTerms() { return categoryTerms.stream() .map( term -> { String termUri = term.getUri(); @@ -449,7 +448,7 @@ public Collection getCategoryTerms() { @Override - public Collection getRelationTerms() { + public Set getRelationTerms() { // FIXME: it's not quite like categoryTerms so this map operation is probably not needed at all, the relations don't come from any particular ontology return Collections.unmodifiableSet( relationTerms ); } @@ -477,8 +476,8 @@ public OntologyTerm getTerm( String uri ) { @Override public Set getTerms( Collection uris ) { Set distinctUris = uris instanceof Set ? ( Set ) uris : new HashSet<>( uris ); - return combineInThreads( os -> distinctUris.stream().map( os::getTerm ).filter( Objects::nonNull ).collect( Collectors.toSet() ), - String.format( "terms for %d URIs", uris.size() ) ); + return pickBest( combineInThreads( os -> distinctUris.stream().map( os::getTerm ).filter( Objects::nonNull ).collect( Collectors.toSet() ), + String.format( "terms for %d URIs", uris.size() ) ) ); } /** @@ -699,13 +698,6 @@ private Collection findCharacteristicsFromOntology( S }, ontologyServicesToUse, "terms matching " + searchQuery ); } - private String foundValueKey( Characteristic c ) { - if ( StringUtils.isNotBlank( c.getValueUri() ) ) { - return c.getValueUri().toLowerCase(); - } - return c.getValue().toLowerCase(); - } - /** * Allow us to store gene information as a characteristic associated with our entities. This doesn't work so well * for non-ncbi genes. @@ -903,11 +895,11 @@ private interface SearchFunction { /** * Similar to {@link #combineInThreads(Function, String)}, but also handles {@link OntologySearchException}. */ - private Set searchInThreads( SearchFunction function, String query ) throws BaseCodeOntologySearchException { + private List searchInThreads( SearchFunction function, String query ) throws BaseCodeOntologySearchException { return searchInThreads( function, ontologyServices, query ); } - private Set searchInThreads( SearchFunction function, List ontologyServices, String query ) throws BaseCodeOntologySearchException { + private List searchInThreads( SearchFunction function, List ontologyServices, String query ) throws BaseCodeOntologySearchException { try { return combineInThreads( os -> { try { @@ -936,7 +928,7 @@ public synchronized OntologySearchException getCause() { } } - private Set combineInThreads( Function> work, String query ) { + private List combineInThreads( Function> work, String query ) { return combineInThreads( work, ontologyServices, query ); } @@ -945,7 +937,7 @@ private Set combineInThreads( Function * The functions are evaluated using Gemma's short-lived task executor. */ - private Set combineInThreads( Function> work, List ontologyServices, String query ) { + private List combineInThreads( Function> work, List ontologyServices, String query ) { StopWatch timer = StopWatch.createStarted(); List>> futures = new ArrayList<>( ontologyServices.size() ); ExecutorCompletionService> completionService = new ExecutorCompletionService<>( taskExecutor ); @@ -954,7 +946,7 @@ private Set combineInThreads( Function work.apply( os ) ) ); } } - Set children = new HashSet<>(); + List children = new ArrayList<>(); try { for ( int i = 0; i < futures.size(); i++ ) { Future> future; @@ -992,4 +984,19 @@ private Set combineInThreads( Function ontologyTermComparator = Comparator + .comparing( ( OntologyTerm t ) -> t.getLabel() != null, Comparator.reverseOrder() ) // prefer terms with rdf:label + .thenComparing( OntologyTerm::getScore, Comparator.nullsLast( Comparator.reverseOrder() ) ); // prefer the highest score + + /** + * Deduplicate terms in the given collection giving preference to those with a if available and the + * highest {@link OntologyTerm#getScore()}. + */ + private LinkedHashSet pickBest( Collection terms ) { + return terms.stream().sorted( ontologyTermComparator ).collect( Collectors.toCollection( LinkedHashSet::new ) ); + } } \ No newline at end of file From 25f1ad699e70e956be19a8593731a3fc1b897af3 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Thu, 21 Mar 2024 14:55:27 -0700 Subject: [PATCH 045/105] Monitor the local task pool --- .../binder/ThreadPoolTaskExecutorMetrics.java | 51 +++++++++++++++++++ .../gemma/applicationContext-serviceBeans.xml | 4 ++ 2 files changed, 55 insertions(+) create mode 100644 gemma-core/src/main/java/ubic/gemma/core/metrics/binder/ThreadPoolTaskExecutorMetrics.java diff --git a/gemma-core/src/main/java/ubic/gemma/core/metrics/binder/ThreadPoolTaskExecutorMetrics.java b/gemma-core/src/main/java/ubic/gemma/core/metrics/binder/ThreadPoolTaskExecutorMetrics.java new file mode 100644 index 0000000000..1b9778e290 --- /dev/null +++ b/gemma-core/src/main/java/ubic/gemma/core/metrics/binder/ThreadPoolTaskExecutorMetrics.java @@ -0,0 +1,51 @@ +package ubic.gemma.core.metrics.binder; + +import io.micrometer.core.instrument.Gauge; +import io.micrometer.core.instrument.MeterRegistry; +import io.micrometer.core.instrument.binder.MeterBinder; +import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor; + +import javax.annotation.Nullable; +import javax.annotation.ParametersAreNonnullByDefault; + +@ParametersAreNonnullByDefault +public class ThreadPoolTaskExecutorMetrics implements MeterBinder { + + private final ThreadPoolTaskExecutor executor; + + @Nullable + private String poolName; + + public ThreadPoolTaskExecutorMetrics( ThreadPoolTaskExecutor executor ) { + this.executor = executor; + } + + @Override + public void bindTo( MeterRegistry registry ) { + String poolName = this.poolName != null ? this.poolName : executor.getThreadNamePrefix(); + Gauge.builder( "threadPool.corePoolSize", executor, ThreadPoolTaskExecutor::getCorePoolSize ) + .description( "Core pool size" ) + .tags( "pool", poolName ) + .register( registry ); + Gauge.builder( "threadPool.maxPoolSize", executor, e -> e.getMaxPoolSize() == Integer.MAX_VALUE ? Double.POSITIVE_INFINITY : e.getMaxPoolSize() ) + .description( "Maximum pool size" ) + .tags( "pool", poolName ) + .register( registry ); + Gauge.builder( "threadPool.poolSize", executor, ThreadPoolTaskExecutor::getPoolSize ) + .description( "Pool size" ) + .tags( "pool", poolName ) + .register( registry ); + Gauge.builder( "threadPool.activeCount", executor, ThreadPoolTaskExecutor::getActiveCount ) + .description( "Number of active threads" ) + .tags( "pool", poolName ) + .register( registry ); + Gauge.builder( "threadPool.queueSize", executor, e -> e.getThreadPoolExecutor().getQueue().size() ) + .description( "Queue size" ) + .tags( "pool", poolName ) + .register( registry ); + } + + public void setPoolName( String poolName ) { + this.poolName = poolName; + } +} diff --git a/gemma-core/src/main/resources/ubic/gemma/applicationContext-serviceBeans.xml b/gemma-core/src/main/resources/ubic/gemma/applicationContext-serviceBeans.xml index 489761263f..86d2c049f4 100644 --- a/gemma-core/src/main/resources/ubic/gemma/applicationContext-serviceBeans.xml +++ b/gemma-core/src/main/resources/ubic/gemma/applicationContext-serviceBeans.xml @@ -70,6 +70,10 @@ + + + + From ef73373383c79a80c19aae4f74d1b417499ea04a Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Thu, 21 Mar 2024 15:34:27 -0700 Subject: [PATCH 046/105] Add ExpressionExperiment.name to the search index to enable highlights --- .../model/expression/experiment/ExpressionExperiment.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/gemma-core/src/main/java/ubic/gemma/model/expression/experiment/ExpressionExperiment.java b/gemma-core/src/main/java/ubic/gemma/model/expression/experiment/ExpressionExperiment.java index e8a6891298..66a9cf2b65 100644 --- a/gemma-core/src/main/java/ubic/gemma/model/expression/experiment/ExpressionExperiment.java +++ b/gemma-core/src/main/java/ubic/gemma/model/expression/experiment/ExpressionExperiment.java @@ -16,7 +16,6 @@ import gemma.gsec.model.SecuredNotChild; import lombok.extern.apachecommons.CommonsLog; -import org.hibernate.Hibernate; import org.hibernate.proxy.HibernateProxyHelper; import org.hibernate.search.annotations.*; import ubic.gemma.model.common.auditAndSecurity.curation.Curatable; @@ -130,7 +129,7 @@ public Long getId() { } @Override - @Field + @Field(store = Store.YES) public String getName() { return super.getName(); } From f60339e21ed6a6e500f765002e4aa1d7cb450d19 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Thu, 21 Mar 2024 15:38:43 -0700 Subject: [PATCH 047/105] Include ExpressionExperiment.accession to the search index --- .../gemma/core/search/source/HibernateSearchSource.java | 7 ++++--- .../gemma/model/expression/experiment/BioAssaySet.java | 1 - .../model/expression/experiment/ExpressionExperiment.java | 8 ++++++++ 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/gemma-core/src/main/java/ubic/gemma/core/search/source/HibernateSearchSource.java b/gemma-core/src/main/java/ubic/gemma/core/search/source/HibernateSearchSource.java index cf41a0dea3..2e49db42ea 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/search/source/HibernateSearchSource.java +++ b/gemma-core/src/main/java/ubic/gemma/core/search/source/HibernateSearchSource.java @@ -58,9 +58,10 @@ public class HibernateSearchSource implements SearchSource, InitializingBean { "fullTextUri", "keywords.term", "meshTerms.term", "pubAccession.accession", "title" }; private static String[] DATASET_FIELDS = { - "shortName", "name", "description", "bioAssays.name", "bioAssays.description", "bioAssays.accession.accession", - "bioAssays.sampleUsed.name", "bioAssays.sampleUsed.characteristics.value", - "bioAssays.sampleUsed.characteristics.valueUri", "characteristics.value", "characteristics.valueUri", + "shortName", "name", "description", "accession.accession", + "bioAssays.name", "bioAssays.description", "bioAssays.accession.accession", "bioAssays.sampleUsed.name", + "bioAssays.sampleUsed.characteristics.value", "bioAssays.sampleUsed.characteristics.valueUri", + "characteristics.value", "characteristics.valueUri", "experimentalDesign.name", "experimentalDesign.description", "experimentalDesign.experimentalFactors.name", "experimentalDesign.experimentalFactors.description", "experimentalDesign.experimentalFactors.category.categoryUri", diff --git a/gemma-core/src/main/java/ubic/gemma/model/expression/experiment/BioAssaySet.java b/gemma-core/src/main/java/ubic/gemma/model/expression/experiment/BioAssaySet.java index 9ca767d21c..d0b7b601e0 100644 --- a/gemma-core/src/main/java/ubic/gemma/model/expression/experiment/BioAssaySet.java +++ b/gemma-core/src/main/java/ubic/gemma/model/expression/experiment/BioAssaySet.java @@ -24,7 +24,6 @@ import ubic.gemma.model.expression.bioAssay.BioAssay; import javax.annotation.Nullable; -import java.util.Collection; import java.util.HashSet; import java.util.Set; diff --git a/gemma-core/src/main/java/ubic/gemma/model/expression/experiment/ExpressionExperiment.java b/gemma-core/src/main/java/ubic/gemma/model/expression/experiment/ExpressionExperiment.java index 66a9cf2b65..d20654d471 100644 --- a/gemma-core/src/main/java/ubic/gemma/model/expression/experiment/ExpressionExperiment.java +++ b/gemma-core/src/main/java/ubic/gemma/model/expression/experiment/ExpressionExperiment.java @@ -22,6 +22,7 @@ import ubic.gemma.model.common.auditAndSecurity.curation.CurationDetails; import ubic.gemma.model.common.description.BibliographicReference; import ubic.gemma.model.common.description.Characteristic; +import ubic.gemma.model.common.description.DatabaseEntry; import ubic.gemma.model.common.quantitationtype.QuantitationType; import ubic.gemma.model.expression.bioAssay.BioAssay; import ubic.gemma.model.expression.bioAssayData.MeanVarianceRelation; @@ -146,6 +147,13 @@ public Set getBioAssays() { return super.getBioAssays(); } + @Nullable + @Override + @IndexedEmbedded + public DatabaseEntry getAccession() { + return super.getAccession(); + } + @Override @IndexedEmbedded public BibliographicReference getPrimaryPublication() { From ebd6018fd73811a1579e9a7f5ec8b6f6a8f826b9 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Thu, 21 Mar 2024 16:25:09 -0700 Subject: [PATCH 048/105] Add a cache for searching individual ontologies --- .../gemma/core/ontology/OntologyCache.java | 49 ++++++++++++++----- .../core/ontology/OntologyServiceImpl.java | 15 +++--- gemma-core/src/main/resources/ehcache.xml | 1 + .../core/ontology/OntologyCacheTest.java | 2 +- 4 files changed, 48 insertions(+), 19 deletions(-) diff --git a/gemma-core/src/main/java/ubic/gemma/core/ontology/OntologyCache.java b/gemma-core/src/main/java/ubic/gemma/core/ontology/OntologyCache.java index 1a99e7b4a6..fd27df8d8f 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/ontology/OntologyCache.java +++ b/gemma-core/src/main/java/ubic/gemma/core/ontology/OntologyCache.java @@ -8,6 +8,7 @@ import org.springframework.util.Assert; import ubic.basecode.ontology.model.OntologyTerm; import ubic.basecode.ontology.providers.OntologyService; +import ubic.basecode.ontology.search.OntologySearchException; import ubic.gemma.persistence.util.CacheUtils; import javax.annotation.Nullable; @@ -23,24 +24,16 @@ @CommonsLog class OntologyCache { - private final Cache parentsCache, childrenCache; + private final Cache searchCache, parentsCache, childrenCache; - private long lockTimeoutMillis = 5000; private int minSubsetSize = 1; - OntologyCache( Cache parentsCache, Cache childrenCache ) { + OntologyCache( Cache searchCache, Cache parentsCache, Cache childrenCache ) { + this.searchCache = searchCache; this.parentsCache = parentsCache; this.childrenCache = childrenCache; } - /** - * Maximum amount of time in milliseconds to wait for a cache entry to be computed by another thread. If the timeout - * is exceeded, no results will be returned. - */ - public void setLockTimeoutMillis( long lockTimeoutMillis ) { - this.lockTimeoutMillis = lockTimeoutMillis; - } - /** * Minimum size of subsets to consider when enumerating cache keys. */ @@ -49,6 +42,24 @@ void setMinSubsetSize( int minSubsetSize ) { this.minSubsetSize = minSubsetSize; } + public Collection findTerm( OntologyService ontology, String query ) throws OntologySearchException { + SearchCacheKey key = new SearchCacheKey( ontology, query ); + + try ( CacheUtils.Lock ignored = CacheUtils.acquireReadLock( searchCache, key ) ) { + Cache.ValueWrapper value = searchCache.get( key ); + if ( value != null ) { + //noinspection unchecked + return ( Collection ) value.get(); + } + } + + try ( CacheUtils.Lock ignored = CacheUtils.acquireWriteLock( searchCache, key ) ) { + Collection results = ontology.findTerm( query ); + searchCache.put( key, results ); + return results; + } + } + /** * Obtain the parents of a given set of terms. */ @@ -63,6 +74,14 @@ Set getChildren( OntologyService os, Collection term return getParentsOrChildren( os, terms, direct, includeAdditionalProperties, childrenCache, false ); } + /** + * Clear the search cache for all entries related to a given ontology service. + * @param serv + */ + public void clearSearchCacheByOntology( OntologyService serv ) { + CacheUtils.evictIf( searchCache, key -> ( ( SearchCacheKey ) key ).getOntologyService().equals( serv ) ); + } + /** * Clear the cache for all entries related to a given ontology service. */ @@ -178,7 +197,7 @@ private ParentsOrChildrenCacheKey lookupMaximalSubsetByEnumeratingKeys( Cache ca @Value @EqualsAndHashCode(cacheStrategy = EqualsAndHashCode.CacheStrategy.LAZY) private static class ParentsOrChildrenCacheKey { - ubic.basecode.ontology.providers.OntologyService ontologyService; + OntologyService ontologyService; Set terms; boolean direct; boolean includeAdditionalProperties; @@ -190,4 +209,10 @@ public String toString() { includeAdditionalProperties ? "subClassOf and " + ontologyService.getAdditionalPropertyUris().size() + " additional properties" : "only subClassOf" ); } } + + @Value + private static class SearchCacheKey { + OntologyService ontologyService; + String query; + } } diff --git a/gemma-core/src/main/java/ubic/gemma/core/ontology/OntologyServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/core/ontology/OntologyServiceImpl.java index 4052a2af93..e533bf4899 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/ontology/OntologyServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/core/ontology/OntologyServiceImpl.java @@ -80,6 +80,7 @@ public class OntologyServiceImpl implements OntologyService, InitializingBean { private static final Log log = LogFactory.getLog( OntologyServiceImpl.class.getName() ); private static final String + SEARCH_CACHE_NAME = "OntologyService.search", PARENTS_CACHE_NAME = "OntologyService.parents", CHILDREN_CACHE_NAME = "OntologyService.children"; @@ -122,7 +123,7 @@ public class OntologyServiceImpl implements OntologyService, InitializingBean { @Override public void afterPropertiesSet() throws Exception { - ontologyCache = new OntologyCache( cacheManager.getCache( PARENTS_CACHE_NAME ), cacheManager.getCache( CHILDREN_CACHE_NAME ) ); + ontologyCache = new OntologyCache( cacheManager.getCache( SEARCH_CACHE_NAME ), cacheManager.getCache( PARENTS_CACHE_NAME ), cacheManager.getCache( CHILDREN_CACHE_NAME ) ); if ( ontologyServiceFactories != null && autoLoadOntologies ) { List enabledOntologyServices = ontologyServiceFactories.stream() .map( factory -> { @@ -274,11 +275,11 @@ public Collection findTerms( String search ) throws BaseCodeOntolo return results; } - results = searchInThreads( ontology -> ontology.findTerm( query ), query ); + results = searchInThreads( ontology -> ontologyCache.findTerm( ontology, query ), query ); if ( geneOntologyService.isOntologyLoaded() ) { try { - results.addAll( geneOntologyService.findTerm( search ) ); + results.addAll( ontologyCache.findTerm( geneOntologyService, search ) ); } catch ( OntologySearchException e ) { throw new BaseCodeOntologySearchException( e ); } @@ -320,7 +321,7 @@ public Collection findTermsInexact( String givenQuery Set ontologySearchResults = new HashSet<>(); ontologySearchResults.addAll( searchInThreads( service -> { Collection results2; - results2 = service.findTerm( queryString ); + results2 = ontologyCache.findTerm( service, queryString ); if ( results2.isEmpty() ) return Collections.emptySet(); return CharacteristicValueObject.characteristic2CharacteristicVO( this.termsToCharacteristics( results2 ) ); @@ -331,7 +332,7 @@ public Collection findTermsInexact( String givenQuery if ( geneOntologyService.isOntologyLoaded() ) { try { ontologySearchResults.addAll( CharacteristicValueObject.characteristic2CharacteristicVO( - this.termsToCharacteristics( geneOntologyService.findTerm( queryString ) ) ) ); + this.termsToCharacteristics( ontologyCache.findTerm( geneOntologyService, queryString ) ) ) ); } catch ( OntologySearchException e ) { throw new BaseCodeOntologySearchException( e ); } @@ -498,6 +499,7 @@ public void reindexAllOntologies() { OntologyServiceImpl.log.info( "Reindexing: " + serv ); try { serv.index( true ); + ontologyCache.clearSearchCacheByOntology( serv ); } catch ( Exception e ) { OntologyServiceImpl.log.error( "Failed to index " + serv + ": " + e.getMessage(), e ); } @@ -514,6 +516,7 @@ public void reinitializeAndReindexAllOntologies() { for ( ubic.basecode.ontology.providers.OntologyService serv : this.ontologyServices ) { ontologyTaskExecutor.execute( () -> { serv.initialize( true, true ); + ontologyCache.clearSearchCacheByOntology( serv ); ontologyCache.clearByOntology( serv ); } ); } @@ -681,7 +684,7 @@ private Collection findCharacteristicsFromOntology( S } return searchInThreads( ontologyService -> { - Collection ontologyTerms = ontologyService.findTerm( searchQuery ); + Collection ontologyTerms = ontologyCache.findTerm( ontologyService, searchQuery ); Collection characteristicsFromOntology = new HashSet<>(); for ( OntologyTerm ontologyTerm : ontologyTerms ) { // if the ontology term wasnt already found in the database diff --git a/gemma-core/src/main/resources/ehcache.xml b/gemma-core/src/main/resources/ehcache.xml index 419ccc4c0b..3751658a61 100644 --- a/gemma-core/src/main/resources/ehcache.xml +++ b/gemma-core/src/main/resources/ehcache.xml @@ -457,6 +457,7 @@ + diff --git a/gemma-core/src/test/java/ubic/gemma/core/ontology/OntologyCacheTest.java b/gemma-core/src/test/java/ubic/gemma/core/ontology/OntologyCacheTest.java index 1e058305a8..72d5a5c8d2 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/ontology/OntologyCacheTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/ontology/OntologyCacheTest.java @@ -25,7 +25,7 @@ public class OntologyCacheTest { @Before public void setUp() { ontologyService = mock( OntologyService.class ); - ontologyCache = new OntologyCache( new ConcurrentMapCache( "parents" ), new ConcurrentMapCache( "children" ) ); + ontologyCache = new OntologyCache( new ConcurrentMapCache( "search" ), new ConcurrentMapCache( "parents" ), new ConcurrentMapCache( "children" ) ); term1 = new OntologyTermSimple( "http://example.com/term1", "term1" ); term2 = new OntologyTermSimple( "http://example.com/term2", "term2" ); term3 = new OntologyTermSimple( "http://example.com/term3", "term3" ); From 4df47886bad8cc6cfa69f82774c8ed579a001d25 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Fri, 22 Mar 2024 12:47:44 -0700 Subject: [PATCH 049/105] Use the EE2AD table for counting technology types --- .../ExpressionExperimentDaoImpl.java | 69 ++++++++----------- 1 file changed, 30 insertions(+), 39 deletions(-) diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java index f531ec318e..e7a6f2cd1d 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java @@ -20,6 +20,7 @@ import gemma.gsec.acl.domain.AclObjectIdentity; import gemma.gsec.acl.domain.AclSid; +import gemma.gsec.util.SecurityUtil; import lombok.Value; import org.apache.commons.lang3.NotImplementedException; import org.apache.commons.lang3.time.StopWatch; @@ -940,22 +941,7 @@ public Map> getArrayDesignsUsed( Collection @Override public Map getTechnologyTypeUsageFrequency() { - Query query = getSessionFactory().getCurrentSession().createQuery( - "select a.technologyType, oa.technologyType, count(distinct ee) from ExpressionExperiment ee " - + "join ee.bioAssays ba " - + "join ba.arrayDesignUsed a " - + "left join ba.originalPlatform oa " - + AclQueryUtils.formAclRestrictionClause( "ee.id" ) + " " - + "and (oa is null or a.technologyType <> oa.technologyType) " // ignore noop switch - + formNonTroubledClause( "ee" ) - + formNonTroubledClause( "a" ) + " " - + "group by a.technologyType, oa.technologyType" ); - AclQueryUtils.addAclParameters( query, ExpressionExperiment.class ); - //noinspection unchecked - List result = query - .setCacheable( true ) - .list(); - return aggregateTechnologyTypeCounts( result ); + return getTechnologyTypeUsageFrequencyInternal( null ); } @Override @@ -963,32 +949,37 @@ public Map getTechnologyTypeUsageFrequency( Collection oa.technologyType) " // ignore noop switch - + formNonTroubledClause( "ee" ) - + formNonTroubledClause( "a" ) + " " - + "group by a.technologyType, oa.technologyType" ) - .setCacheable( true ); - return aggregateTechnologyTypeCounts( listByBatch( q, "ids", eeIds, getBatchSize() ) ); + return getTechnologyTypeUsageFrequencyInternal( eeIds ); } - private Map aggregateTechnologyTypeCounts( List result ) { - Map counts = new HashMap<>(); - for ( Object[] row : result ) { - TechnologyType tt = ( TechnologyType ) row[0]; - TechnologyType originalTt = ( TechnologyType ) row[1]; - Long count = ( Long ) row[2]; - counts.compute( tt, ( k, v ) -> v == null ? count : v + count ); - if ( originalTt != null ) { - counts.compute( originalTt, ( k, v ) -> v == null ? count : v + count ); - } + private Map getTechnologyTypeUsageFrequencyInternal( @Nullable Collection eeIds ) { + Query q = getSessionFactory().getCurrentSession() + .createSQLQuery( "select AD.TECHNOLOGY_TYPE as TT, count(distinct EXPRESSION_EXPERIMENT_FK) as EE_COUNT from EXPRESSION_EXPERIMENT2ARRAY_DESIGN EE2AD " + + "join ARRAY_DESIGN AD on EE2AD.ARRAY_DESIGN_FK = AD.ID " + + "join CURATION_DETAILS ADCD on AD.CURATION_DETAILS_FK = ADCD.ID " + + "join INVESTIGATION I on I.ID = EE2AD.EXPRESSION_EXPERIMENT_FK " + + "join CURATION_DETAILS EECD on EECD.ID = I.CURATION_DETAILS_FK " + + EE2CAclQueryUtils.formNativeAclJoinClause( "EE2AD.EXPRESSION_EXPERIMENT_FK" ) + " " + + "where EE2AD.EXPRESSION_EXPERIMENT_FK is not NULL " + + ( eeIds != null ? "and EE2AD.EXPRESSION_EXPERIMENT_FK in :ids " : "" ) + + EE2CAclQueryUtils.formNativeAclRestrictionClause( ( SessionFactoryImplementor ) getSessionFactory(), "EE2AD.ACL_IS_AUTHENTICATED_ANONYMOUSLY_MASK" ) + " " + + ( !SecurityUtil.isUserAdmin() ? "and not ADCD.TROUBLED and not EECD.TROUBLED " : "" ) + + "group by AD.TECHNOLOGY_TYPE" ) + .addScalar( "TT", StandardBasicTypes.STRING ) + .addScalar( "EE_COUNT", StandardBasicTypes.LONG ) + .addSynchronizedQuerySpace( EE2AD_QUERY_SPACE ) + .addSynchronizedEntityClass( ExpressionExperiment.class ) + .addSynchronizedEntityClass( ArrayDesign.class ) + .setCacheable( true ); + EE2CAclQueryUtils.addAclParameters( q, ExpressionExperiment.class ); + List results; + if ( eeIds != null ) { + results = listByBatch( q, "ids", eeIds, getBatchSize() ); + } else { + //noinspection unchecked + results = q.list(); } - return counts; + return results.stream().collect( Collectors.groupingBy( row -> TechnologyType.valueOf( ( String ) row[0] ), Collectors.summingLong( row -> ( Long ) row[1] ) ) ); } @Override From 160a93262e833e916d195346a466da58adc39f14 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Fri, 22 Mar 2024 12:49:16 -0700 Subject: [PATCH 050/105] Do not filter out troubled platforms when counting for admins --- .../expression/experiment/ExpressionExperimentDaoImpl.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java index e7a6f2cd1d..2532e268e5 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java @@ -1013,12 +1013,12 @@ private Map getPlatformsUsageFrequency( @Nullable Collection< + "join ARRAY_DESIGN ad on ee2ad.ARRAY_DESIGN_FK = ad.ID " + "join CURATION_DETAILS adcd on adcd.ID = ad.CURATION_DETAILS_FK " + EE2CAclQueryUtils.formNativeAclJoinClause( "ee2ad.EXPRESSION_EXPERIMENT_FK" ) + " " - + "where not eecd.TROUBLED and not adcd.TROUBLED " - + "and ee2ad.IS_ORIGINAL_PLATFORM = :original " + + "where ee2ad.IS_ORIGINAL_PLATFORM = :original " // exclude noop switch + ( original ? " and ee2ad.ARRAY_DESIGN_FK not in (select ARRAY_DESIGN_FK from EXPRESSION_EXPERIMENT2ARRAY_DESIGN where EXPRESSION_EXPERIMENT_FK = ee2ad.EXPRESSION_EXPERIMENT_FK and ARRAY_DESIGN_FK = ee2ad.ARRAY_DESIGN_FK and not IS_ORIGINAL_PLATFORM) " : "" ) + ( eeIds != null ? "and ee2ad.EXPRESSION_EXPERIMENT_FK in :ids " : "" ) + EE2CAclQueryUtils.formNativeAclRestrictionClause( ( SessionFactoryImplementor ) getSessionFactory(), "ee2ad.ACL_IS_AUTHENTICATED_ANONYMOUSLY_MASK" ) + " " + + ( !SecurityUtil.isUserAdmin() ? "and not eecd.TROUBLED and not adcd.TROUBLED " : "" ) + "group by ad.ID " + "order by EE_COUNT desc" ) .addEntity( ArrayDesign.class ) From 39761c2a9b982cc91e242c7a3a3cea145396fd27 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Fri, 22 Mar 2024 12:58:19 -0700 Subject: [PATCH 051/105] Remove unnecessary order by when all results are collected into a map --- .../experiment/ExpressionExperimentDaoImpl.java | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java index 2532e268e5..7376819b35 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java @@ -960,8 +960,7 @@ private Map getTechnologyTypeUsageFrequencyInternal( @Null + "join INVESTIGATION I on I.ID = EE2AD.EXPRESSION_EXPERIMENT_FK " + "join CURATION_DETAILS EECD on EECD.ID = I.CURATION_DETAILS_FK " + EE2CAclQueryUtils.formNativeAclJoinClause( "EE2AD.EXPRESSION_EXPERIMENT_FK" ) + " " - + "where EE2AD.EXPRESSION_EXPERIMENT_FK is not NULL " - + ( eeIds != null ? "and EE2AD.EXPRESSION_EXPERIMENT_FK in :ids " : "" ) + + ( eeIds != null ? "where EE2AD.EXPRESSION_EXPERIMENT_FK in :ids " : "where EE2AD.EXPRESSION_EXPERIMENT_FK is not NULL " ) + EE2CAclQueryUtils.formNativeAclRestrictionClause( ( SessionFactoryImplementor ) getSessionFactory(), "EE2AD.ACL_IS_AUTHENTICATED_ANONYMOUSLY_MASK" ) + " " + ( !SecurityUtil.isUserAdmin() ? "and not ADCD.TROUBLED and not EECD.TROUBLED " : "" ) + "group by AD.TECHNOLOGY_TYPE" ) @@ -1020,7 +1019,8 @@ private Map getPlatformsUsageFrequency( @Nullable Collection< + EE2CAclQueryUtils.formNativeAclRestrictionClause( ( SessionFactoryImplementor ) getSessionFactory(), "ee2ad.ACL_IS_AUTHENTICATED_ANONYMOUSLY_MASK" ) + " " + ( !SecurityUtil.isUserAdmin() ? "and not eecd.TROUBLED and not adcd.TROUBLED " : "" ) + "group by ad.ID " - + "order by EE_COUNT desc" ) + // no need to sort results if limiting, we're collecting in a map + + ( maxResults > 0 ? "order by EE_COUNT desc" : "" ) ) .addEntity( ArrayDesign.class ) .addScalar( "EE_COUNT", StandardBasicTypes.LONG ) // ensures that the cache is invalidated when the ee2ad table is regenerated @@ -1172,8 +1172,7 @@ public Map getPerTaxonCount() { String queryString = "select ee.taxon, count(distinct ee) as EE_COUNT from ExpressionExperiment ee " + AclQueryUtils.formAclRestrictionClause( "ee.id" ) + " " + formNonTroubledClause( "ee" ) + " " - + "group by ee.taxon " - + "order by EE_COUNT desc"; + + "group by ee.taxon"; Query query = this.getSessionFactory().getCurrentSession().createQuery( queryString ); @@ -1197,8 +1196,7 @@ public Map getPerTaxonCount( Collection ids ) { Query query = this.getSessionFactory().getCurrentSession() .createQuery( "select ee.taxon, count(distinct ee) as EE_COUNT from ExpressionExperiment ee " + "where ee.id in :eeIds " - + "group by ee.taxon " - + "order by EE_COUNT desc" ) + + "group by ee.taxon" ) .setCacheable( true ); List list = listByBatch( query, "eeIds", ids, getBatchSize() ); return list.stream() From 6f4509b78e0a3faea3fbad0940d7cd8382dca21a Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Fri, 22 Mar 2024 13:01:52 -0700 Subject: [PATCH 052/105] rest: Add missing ordering by usage frequency for the getDatasetsTaxa() endpoint --- gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java | 1 + 1 file changed, 1 insertion(+) diff --git a/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java b/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java index 7e024013ea..b06285da96 100644 --- a/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java +++ b/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java @@ -512,6 +512,7 @@ public QueriedAndFilteredResponseDataObject } return Responder.queryAndFilter( expressionExperimentService.getTaxaUsageFrequency( filters, extraIds ) .entrySet().stream() + .sorted( Map.Entry.comparingByValue( Comparator.reverseOrder() ) ) .map( e -> new TaxonWithUsageStatisticsValueObject( e.getKey(), e.getValue() ) ) .collect( Collectors.toList() ), query, filters, new String[] { "id" }, Sort.by( null, "numberOfExpressionExperiments", Sort.Direction.DESC, "numberOfExpressionExperiments" ) ); } From c5370e6850d338eb38afb8d9dadda83021cc7542 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Fri, 22 Mar 2024 13:36:37 -0700 Subject: [PATCH 053/105] Avoid needless ACL filtering if specific EE IDs are given --- .../ExpressionExperimentDaoImpl.java | 182 +++++++++++------- 1 file changed, 116 insertions(+), 66 deletions(-) diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java index 7376819b35..f0b0be9215 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java @@ -621,6 +621,7 @@ private List getAnnotationsByLevel( ExpressionExperiment express @Override public Map getCategoriesUsageFrequency( @Nullable Collection eeIds, @Nullable Collection excludedCategoryUris, @Nullable Collection excludedTermUris, @Nullable Collection retainedTermUris, int maxResults ) { + boolean doAclFiltering = eeIds == null; if ( eeIds != null && eeIds.isEmpty() ) { return Collections.emptyMap(); } @@ -648,16 +649,23 @@ public Map getCategoriesUsageFrequency( @Nullable Collecti excludedTermUris = excludedTermUris.stream().filter( Objects::nonNull ).collect( Collectors.toList() ); } } - String query = "select T.CATEGORY as CATEGORY, T.CATEGORY_URI as CATEGORY_URI, count(distinct T.EXPRESSION_EXPERIMENT_FK) as EE_COUNT from EXPRESSION_EXPERIMENT2CHARACTERISTIC T " - + EE2CAclQueryUtils.formNativeAclJoinClause( "T.EXPRESSION_EXPERIMENT_FK" ) + " " - + "where T.EXPRESSION_EXPERIMENT_FK is not null "; + String query = "select T.CATEGORY as CATEGORY, T.CATEGORY_URI as CATEGORY_URI, count(distinct T.EXPRESSION_EXPERIMENT_FK) as EE_COUNT from EXPRESSION_EXPERIMENT2CHARACTERISTIC T "; + if ( doAclFiltering ) { + query += EE2CAclQueryUtils.formNativeAclJoinClause( "T.EXPRESSION_EXPERIMENT_FK" ) + " "; + } if ( eeIds != null ) { - query += " and T.EXPRESSION_EXPERIMENT_FK in :eeIds"; + query += "where T.EXPRESSION_EXPERIMENT_FK in :eeIds"; + } else { + query += "where T.EXPRESSION_EXPERIMENT_FK is not null"; } query += getExcludeUrisClause( excludedCategoryUris, excludedTermUris, excludeFreeTextCategories, excludeFreeTextTerms, excludeUncategorized, retainedTermUris ); - query += EE2CAclQueryUtils.formNativeAclRestrictionClause( ( SessionFactoryImplementor ) getSessionFactory(), "T.ACL_IS_AUTHENTICATED_ANONYMOUSLY_MASK" ) + " " - + "group by COALESCE(T.CATEGORY_URI, T.CATEGORY) " - + "order by EE_COUNT desc"; + if ( doAclFiltering ) { + query += EE2CAclQueryUtils.formNativeAclRestrictionClause( ( SessionFactoryImplementor ) getSessionFactory(), "T.ACL_IS_AUTHENTICATED_ANONYMOUSLY_MASK" ); + } + query += " group by COALESCE(T.CATEGORY_URI, T.CATEGORY)"; + if ( maxResults > 0 ) { + query += " order by EE_COUNT desc"; + } Query q = getSessionFactory().getCurrentSession().createSQLQuery( query ) .addScalar( "CATEGORY", StandardBasicTypes.STRING ) .addScalar( "CATEGORY_URI", StandardBasicTypes.STRING ) @@ -674,7 +682,9 @@ public Map getCategoriesUsageFrequency( @Nullable Collecti if ( retainedTermUris != null && !retainedTermUris.isEmpty() ) { q.setParameterList( "retainedTermUris", optimizeParameterList( retainedTermUris ) ); } - EE2CAclQueryUtils.addAclParameters( q, ExpressionExperiment.class ); + if ( doAclFiltering ) { + EE2CAclQueryUtils.addAclParameters( q, ExpressionExperiment.class ); + } q.setCacheable( true ); List result; if ( eeIds != null ) { @@ -715,6 +725,7 @@ private Map aggregateC( List result ) { */ @Override public Map getAnnotationsUsageFrequency( @Nullable Collection eeIds, @Nullable Class level, int maxResults, int minFrequency, @Nullable String category, @Nullable Collection excludedCategoryUris, @Nullable Collection excludedTermUris, @Nullable Collection retainedTermUris ) { + boolean doAclFiltering = eeIds == null; if ( eeIds != null && eeIds.isEmpty() ) { return Collections.emptyMap(); } @@ -742,11 +753,14 @@ public Map getAnnotationsUsageFrequency( @Nullable Collect excludedTermUris = excludedTermUris.stream().filter( Objects::nonNull ).collect( Collectors.toList() ); } } - String query = "select T.`VALUE` as `VALUE`, T.VALUE_URI as VALUE_URI, T.CATEGORY as CATEGORY, T.CATEGORY_URI as CATEGORY_URI, T.EVIDENCE_CODE as EVIDENCE_CODE, count(distinct T.EXPRESSION_EXPERIMENT_FK) as EE_COUNT from EXPRESSION_EXPERIMENT2CHARACTERISTIC T " - + EE2CAclQueryUtils.formNativeAclJoinClause( "T.EXPRESSION_EXPERIMENT_FK" ) + " " - + "where T.EXPRESSION_EXPERIMENT_FK is not null"; // this is necessary for the clause building since there might be no clause + String query = "select T.`VALUE` as `VALUE`, T.VALUE_URI as VALUE_URI, T.CATEGORY as CATEGORY, T.CATEGORY_URI as CATEGORY_URI, T.EVIDENCE_CODE as EVIDENCE_CODE, count(distinct T.EXPRESSION_EXPERIMENT_FK) as EE_COUNT from EXPRESSION_EXPERIMENT2CHARACTERISTIC T "; + if ( doAclFiltering ) { + query += EE2CAclQueryUtils.formNativeAclJoinClause( "T.EXPRESSION_EXPERIMENT_FK" ) + " "; + } if ( eeIds != null ) { - query += " and T.EXPRESSION_EXPERIMENT_FK in :eeIds"; + query += "where T.EXPRESSION_EXPERIMENT_FK in :eeIds"; + } else { + query += "where T.EXPRESSION_EXPERIMENT_FK is not null"; // this is necessary for the clause building since there might be no clause } if ( level != null ) { query += " and T.LEVEL = :level"; @@ -768,17 +782,24 @@ else if ( category.startsWith( "http://" ) ) { // all categories are requested, we may filter out excluded ones query += getExcludeUrisClause( excludedCategoryUris, excludedTermUris, excludeFreeTextCategories, excludeFreeTextTerms, excludeUncategorized, retainedTermUris ); } - query += EE2CAclQueryUtils.formNativeAclRestrictionClause( ( SessionFactoryImplementor ) getSessionFactory(), "T.ACL_IS_AUTHENTICATED_ANONYMOUSLY_MASK" ) + " " - + "group by " + if ( doAclFiltering ) { + query += EE2CAclQueryUtils.formNativeAclRestrictionClause( ( SessionFactoryImplementor ) getSessionFactory(), "T.ACL_IS_AUTHENTICATED_ANONYMOUSLY_MASK" ); + } + //language=HQL + query += " group by " // no need to group by category if a specific one is requested + ( category == null ? "COALESCE(T.CATEGORY_URI, T.CATEGORY), " : "" ) - + "COALESCE(T.VALUE_URI, T.`VALUE`) " - // if there are too many EE IDs, they will be retrieved by batch and filtered in-memory - + ( minFrequency > 1 && ( eeIds == null || eeIds.size() <= MAX_PARAMETER_LIST_SIZE ) ? "having EE_COUNT >= :minFrequency " : "" ); - if ( retainedTermUris != null && !retainedTermUris.isEmpty() ) { - query += " or VALUE_URI in (:retainedTermUris)"; + + "COALESCE(T.VALUE_URI, T.`VALUE`)"; + // if there are too many EE IDs, they will be retrieved by batch and filtered in-memory + if ( minFrequency > 1 && ( eeIds == null || eeIds.size() <= MAX_PARAMETER_LIST_SIZE ) ) { + query += " having EE_COUNT >= :minFrequency"; + if ( retainedTermUris != null && !retainedTermUris.isEmpty() ) { + query += " or VALUE_URI in (:retainedTermUris)"; + } + } + if ( maxResults > 0 ) { + query += " order by EE_COUNT desc"; } - query += "order by EE_COUNT desc"; Query q = getSessionFactory().getCurrentSession().createSQLQuery( query ) .addScalar( "VALUE", StandardBasicTypes.STRING ) .addScalar( "VALUE_URI", StandardBasicTypes.STRING ) @@ -808,7 +829,9 @@ else if ( category.startsWith( "http://" ) ) { if ( minFrequency > 1 && ( eeIds == null || eeIds.size() <= MAX_PARAMETER_LIST_SIZE ) ) { q.setParameter( "minFrequency", minFrequency ); } - EE2CAclQueryUtils.addAclParameters( q, ExpressionExperiment.class ); + if ( doAclFiltering ) { + EE2CAclQueryUtils.addAclParameters( q, ExpressionExperiment.class ); + } q.setCacheable( true ); List result; if ( eeIds != null ) { @@ -816,7 +839,7 @@ else if ( category.startsWith( "http://" ) ) { result = listByBatch( q, "eeIds", eeIds, 2048 ); if ( minFrequency > 1 || maxResults > 0 ) { return aggregate( result ).entrySet().stream() - .filter( e -> e.getValue() >= minFrequency ) + .filter( e -> e.getValue() >= minFrequency || ( retainedTermUris != null && retainedTermUris.contains( e.getKey().getValueUri() ) ) ) .sorted( Map.Entry.comparingByValue( Comparator.reverseOrder() ) ) .limit( maxResults > 0 ? maxResults : Long.MAX_VALUE ) .collect( Collectors.toMap( Map.Entry::getKey, Map.Entry::getValue, ( a, b ) -> b, () -> new TreeMap<>( Characteristic.getByCategoryAndValueComparator() ) ) ); @@ -941,18 +964,6 @@ public Map> getArrayDesignsUsed( Collection @Override public Map getTechnologyTypeUsageFrequency() { - return getTechnologyTypeUsageFrequencyInternal( null ); - } - - @Override - public Map getTechnologyTypeUsageFrequency( Collection eeIds ) { - if ( eeIds.isEmpty() ) { - return Collections.emptyMap(); - } - return getTechnologyTypeUsageFrequencyInternal( eeIds ); - } - - private Map getTechnologyTypeUsageFrequencyInternal( @Nullable Collection eeIds ) { Query q = getSessionFactory().getCurrentSession() .createSQLQuery( "select AD.TECHNOLOGY_TYPE as TT, count(distinct EXPRESSION_EXPERIMENT_FK) as EE_COUNT from EXPRESSION_EXPERIMENT2ARRAY_DESIGN EE2AD " + "join ARRAY_DESIGN AD on EE2AD.ARRAY_DESIGN_FK = AD.ID " @@ -960,7 +971,7 @@ private Map getTechnologyTypeUsageFrequencyInternal( @Null + "join INVESTIGATION I on I.ID = EE2AD.EXPRESSION_EXPERIMENT_FK " + "join CURATION_DETAILS EECD on EECD.ID = I.CURATION_DETAILS_FK " + EE2CAclQueryUtils.formNativeAclJoinClause( "EE2AD.EXPRESSION_EXPERIMENT_FK" ) + " " - + ( eeIds != null ? "where EE2AD.EXPRESSION_EXPERIMENT_FK in :ids " : "where EE2AD.EXPRESSION_EXPERIMENT_FK is not NULL " ) + + "where EE2AD.EXPRESSION_EXPERIMENT_FK is not NULL " + EE2CAclQueryUtils.formNativeAclRestrictionClause( ( SessionFactoryImplementor ) getSessionFactory(), "EE2AD.ACL_IS_AUTHENTICATED_ANONYMOUSLY_MASK" ) + " " + ( !SecurityUtil.isUserAdmin() ? "and not ADCD.TROUBLED and not EECD.TROUBLED " : "" ) + "group by AD.TECHNOLOGY_TYPE" ) @@ -971,19 +982,34 @@ private Map getTechnologyTypeUsageFrequencyInternal( @Null .addSynchronizedEntityClass( ArrayDesign.class ) .setCacheable( true ); EE2CAclQueryUtils.addAclParameters( q, ExpressionExperiment.class ); - List results; - if ( eeIds != null ) { - results = listByBatch( q, "ids", eeIds, getBatchSize() ); - } else { - //noinspection unchecked - results = q.list(); + //noinspection unchecked + List results = q.list(); + return results.stream().collect( Collectors.groupingBy( row -> TechnologyType.valueOf( ( String ) row[0] ), Collectors.summingLong( row -> ( Long ) row[1] ) ) ); + } + + @Override + public Map getTechnologyTypeUsageFrequency( Collection eeIds ) { + if ( eeIds.isEmpty() ) { + return Collections.emptyMap(); } + Query q = getSessionFactory().getCurrentSession() + .createSQLQuery( "select AD.TECHNOLOGY_TYPE as TT, count(distinct EXPRESSION_EXPERIMENT_FK) as EE_COUNT from EXPRESSION_EXPERIMENT2ARRAY_DESIGN EE2AD " + + "join ARRAY_DESIGN AD on EE2AD.ARRAY_DESIGN_FK = AD.ID " + + "where EE2AD.EXPRESSION_EXPERIMENT_FK in (:ids) " + + "group by AD.TECHNOLOGY_TYPE" ) + .addScalar( "TT", StandardBasicTypes.STRING ) + .addScalar( "EE_COUNT", StandardBasicTypes.LONG ) + .addSynchronizedQuerySpace( EE2AD_QUERY_SPACE ) + .addSynchronizedEntityClass( ExpressionExperiment.class ) + .addSynchronizedEntityClass( ArrayDesign.class ) + .setCacheable( true ); + List results = listByBatch( q, "ids", eeIds, getBatchSize() ); return results.stream().collect( Collectors.groupingBy( row -> TechnologyType.valueOf( ( String ) row[0] ), Collectors.summingLong( row -> ( Long ) row[1] ) ) ); } @Override public Map getArrayDesignsUsageFrequency( int maxResults ) { - return getPlatformsUsageFrequency( null, false, maxResults ); + return getPlatformsUsageFrequency( false, maxResults ); } @Override @@ -993,7 +1019,7 @@ public Map getArrayDesignsUsageFrequency( Collection ee @Override public Map getOriginalPlatformsUsageFrequency( int maxResults ) { - return getPlatformsUsageFrequency( null, true, maxResults ); + return getPlatformsUsageFrequency( true, maxResults ); } @Override @@ -1001,10 +1027,7 @@ public Map getOriginalPlatformsUsageFrequency( Collection getPlatformsUsageFrequency( @Nullable Collection eeIds, boolean original, int maxResults ) { - if ( eeIds != null && eeIds.isEmpty() ) { - return Collections.emptyMap(); - } + private Map getPlatformsUsageFrequency( boolean original, int maxResults ) { Query query = getSessionFactory().getCurrentSession() .createSQLQuery( "select ad.*, count(distinct i.ID) EE_COUNT from EXPRESSION_EXPERIMENT2ARRAY_DESIGN ee2ad " + "join INVESTIGATION i on i.ID = ee2ad.EXPRESSION_EXPERIMENT_FK " @@ -1015,8 +1038,8 @@ private Map getPlatformsUsageFrequency( @Nullable Collection< + "where ee2ad.IS_ORIGINAL_PLATFORM = :original " // exclude noop switch + ( original ? " and ee2ad.ARRAY_DESIGN_FK not in (select ARRAY_DESIGN_FK from EXPRESSION_EXPERIMENT2ARRAY_DESIGN where EXPRESSION_EXPERIMENT_FK = ee2ad.EXPRESSION_EXPERIMENT_FK and ARRAY_DESIGN_FK = ee2ad.ARRAY_DESIGN_FK and not IS_ORIGINAL_PLATFORM) " : "" ) - + ( eeIds != null ? "and ee2ad.EXPRESSION_EXPERIMENT_FK in :ids " : "" ) + EE2CAclQueryUtils.formNativeAclRestrictionClause( ( SessionFactoryImplementor ) getSessionFactory(), "ee2ad.ACL_IS_AUTHENTICATED_ANONYMOUSLY_MASK" ) + " " + // exclude troubled platforms or experiments for non-admins + ( !SecurityUtil.isUserAdmin() ? "and not eecd.TROUBLED and not adcd.TROUBLED " : "" ) + "group by ad.ID " // no need to sort results if limiting, we're collecting in a map @@ -1033,28 +1056,55 @@ private Map getPlatformsUsageFrequency( @Nullable Collection< EE2CAclQueryUtils.addAclParameters( query, ExpressionExperiment.class ); query.setCacheable( true ); List result; - if ( eeIds != null ) { - if ( eeIds.size() > MAX_PARAMETER_LIST_SIZE ) { - result = listByBatch( query, "ids", eeIds, 2048 ); - if ( maxResults > 0 ) { - // results need to be aggregated and limited - return result.stream() - .collect( groupingBy( row -> ( ArrayDesign ) row[0], summingLong( row -> ( Long ) row[1] ) ) ) - .entrySet().stream() - .sorted( Map.Entry.comparingByValue( Comparator.reverseOrder() ) ) - .limit( maxResults ) - .collect( Collectors.toMap( Map.Entry::getKey, Map.Entry::getValue ) ); - } - } else { - //noinspection unchecked - result = query - .setParameterList( "ids", optimizeParameterList( eeIds ) ) - .setMaxResults( maxResults ) - .list(); + //noinspection unchecked + result = query + .setMaxResults( maxResults ) + .list(); + return result.stream().collect( groupingBy( row -> ( ArrayDesign ) row[0], summingLong( row -> ( Long ) row[1] ) ) ); + } + + private Map getPlatformsUsageFrequency( Collection eeIds, boolean original, int maxResults ) { + if ( eeIds.isEmpty() ) { + return Collections.emptyMap(); + } + // exclude noop switch + // no need to sort results if limiting, we're collecting in a map + Query query = getSessionFactory().getCurrentSession() + .createSQLQuery( "select ad.*, count(distinct i.ID) EE_COUNT from EXPRESSION_EXPERIMENT2ARRAY_DESIGN ee2ad " + + "join INVESTIGATION i on i.ID = ee2ad.EXPRESSION_EXPERIMENT_FK " + + "join ARRAY_DESIGN ad on ee2ad.ARRAY_DESIGN_FK = ad.ID " + + "where ee2ad.IS_ORIGINAL_PLATFORM = :original " + // exclude noop switch + + ( original ? " and ee2ad.ARRAY_DESIGN_FK not in (select ARRAY_DESIGN_FK from EXPRESSION_EXPERIMENT2ARRAY_DESIGN where EXPRESSION_EXPERIMENT_FK = ee2ad.EXPRESSION_EXPERIMENT_FK and ARRAY_DESIGN_FK = ee2ad.ARRAY_DESIGN_FK and not IS_ORIGINAL_PLATFORM) " : "" ) + + "and ee2ad.EXPRESSION_EXPERIMENT_FK in :ids " + + "group by ad.ID " + // no need to sort results if limiting, we're collecting in a map + + ( maxResults > 0 ? "order by EE_COUNT desc" : "" ) ) + .addEntity( ArrayDesign.class ) + .addScalar( "EE_COUNT", StandardBasicTypes.LONG ) + // ensures that the cache is invalidated when the ee2ad table is regenerated + .addSynchronizedQuerySpace( EE2AD_QUERY_SPACE ) + // ensures that the cache is invalidated when EEs or ADs are added/removed + .addSynchronizedEntityClass( ExpressionExperiment.class ) + .addSynchronizedEntityClass( ArrayDesign.class ); + query.setParameter( "original", original ); + query.setCacheable( true ); + List result; + if ( eeIds.size() > MAX_PARAMETER_LIST_SIZE ) { + result = listByBatch( query, "ids", eeIds, 2048 ); + if ( maxResults > 0 ) { + // results need to be aggregated and limited + return result.stream() + .collect( groupingBy( row -> ( ArrayDesign ) row[0], summingLong( row -> ( Long ) row[1] ) ) ) + .entrySet().stream() + .sorted( Map.Entry.comparingByValue( Comparator.reverseOrder() ) ) + .limit( maxResults ) + .collect( Collectors.toMap( Map.Entry::getKey, Map.Entry::getValue ) ); } } else { //noinspection unchecked result = query + .setParameterList( "ids", optimizeParameterList( eeIds ) ) .setMaxResults( maxResults ) .list(); } From 3fcadd9de08c6e95078a2b5943146a1119df9c01 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Fri, 22 Mar 2024 13:55:39 -0700 Subject: [PATCH 054/105] Perform troubled status filtering for categories and annotations usage frequency --- .../ExpressionExperimentDaoImpl.java | 41 +++++++++++-------- .../ExpressionExperimentDaoTest.java | 28 +++++++++++++ 2 files changed, 52 insertions(+), 17 deletions(-) diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java index f0b0be9215..ea742feb6d 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java @@ -95,6 +95,13 @@ public class ExpressionExperimentDaoImpl private static final String[] ONE_TO_MANY_ALIASES = { CHARACTERISTIC_ALIAS, BIO_MATERIAL_CHARACTERISTIC_ALIAS, FACTOR_VALUE_CHARACTERISTIC_ALIAS, ALL_CHARACTERISTIC_ALIAS, BIO_ASSAY_ALIAS, ARRAY_DESIGN_ALIAS }; + /** + * Queries for retrieving troubled experiment and platform identifiers. + */ + private static final String + TROUBLED_EXPERIMENT_IDS_SQL = "select I.ID from INVESTIGATION I join CURATION_DETAILS CD on I.CURATION_DETAILS_FK = CD.ID where CD.TROUBLED", + TROUBLED_PLATFORM_IDS_SQL = "select AD.ID from ARRAY_DESIGN AD join CURATION_DETAILS CD on AD.CURATION_DETAILS_FK = CD.ID where CD.TROUBLED"; + @Autowired public ExpressionExperimentDaoImpl( SessionFactory sessionFactory ) { super( ExpressionExperimentDao.OBJECT_ALIAS, ExpressionExperiment.class, sessionFactory ); @@ -661,6 +668,10 @@ public Map getCategoriesUsageFrequency( @Nullable Collecti query += getExcludeUrisClause( excludedCategoryUris, excludedTermUris, excludeFreeTextCategories, excludeFreeTextTerms, excludeUncategorized, retainedTermUris ); if ( doAclFiltering ) { query += EE2CAclQueryUtils.formNativeAclRestrictionClause( ( SessionFactoryImplementor ) getSessionFactory(), "T.ACL_IS_AUTHENTICATED_ANONYMOUSLY_MASK" ); + // troubled filtering + if ( !SecurityUtil.isUserAdmin() ) { + query += " and T.EXPRESSION_EXPERIMENT_FK not in (" + TROUBLED_EXPERIMENT_IDS_SQL + ")"; + } } query += " group by COALESCE(T.CATEGORY_URI, T.CATEGORY)"; if ( maxResults > 0 ) { @@ -784,6 +795,9 @@ else if ( category.startsWith( "http://" ) ) { } if ( doAclFiltering ) { query += EE2CAclQueryUtils.formNativeAclRestrictionClause( ( SessionFactoryImplementor ) getSessionFactory(), "T.ACL_IS_AUTHENTICATED_ANONYMOUSLY_MASK" ); + if ( !SecurityUtil.isUserAdmin() ) { + query += " and T.EXPRESSION_EXPERIMENT_FK not in (" + TROUBLED_EXPERIMENT_IDS_SQL + ")"; + } } //language=HQL query += " group by " @@ -965,16 +979,13 @@ public Map> getArrayDesignsUsed( Collection @Override public Map getTechnologyTypeUsageFrequency() { Query q = getSessionFactory().getCurrentSession() - .createSQLQuery( "select AD.TECHNOLOGY_TYPE as TT, count(distinct EXPRESSION_EXPERIMENT_FK) as EE_COUNT from EXPRESSION_EXPERIMENT2ARRAY_DESIGN EE2AD " + .createSQLQuery( "select AD.TECHNOLOGY_TYPE as TT, count(distinct EE2AD.EXPRESSION_EXPERIMENT_FK) as EE_COUNT from EXPRESSION_EXPERIMENT2ARRAY_DESIGN EE2AD " + "join ARRAY_DESIGN AD on EE2AD.ARRAY_DESIGN_FK = AD.ID " - + "join CURATION_DETAILS ADCD on AD.CURATION_DETAILS_FK = ADCD.ID " - + "join INVESTIGATION I on I.ID = EE2AD.EXPRESSION_EXPERIMENT_FK " - + "join CURATION_DETAILS EECD on EECD.ID = I.CURATION_DETAILS_FK " + EE2CAclQueryUtils.formNativeAclJoinClause( "EE2AD.EXPRESSION_EXPERIMENT_FK" ) + " " - + "where EE2AD.EXPRESSION_EXPERIMENT_FK is not NULL " - + EE2CAclQueryUtils.formNativeAclRestrictionClause( ( SessionFactoryImplementor ) getSessionFactory(), "EE2AD.ACL_IS_AUTHENTICATED_ANONYMOUSLY_MASK" ) + " " - + ( !SecurityUtil.isUserAdmin() ? "and not ADCD.TROUBLED and not EECD.TROUBLED " : "" ) - + "group by AD.TECHNOLOGY_TYPE" ) + + "where EE2AD.EXPRESSION_EXPERIMENT_FK is not NULL" + + EE2CAclQueryUtils.formNativeAclRestrictionClause( ( SessionFactoryImplementor ) getSessionFactory(), "EE2AD.ACL_IS_AUTHENTICATED_ANONYMOUSLY_MASK" ) + + ( !SecurityUtil.isUserAdmin() ? " and EE2AD.EXPRESSION_EXPERIMENT_FK not in (" + TROUBLED_EXPERIMENT_IDS_SQL + ") and EE2AD.ARRAY_DESIGN_FK not in (" + TROUBLED_PLATFORM_IDS_SQL + ") " : "" ) + + " group by AD.TECHNOLOGY_TYPE" ) .addScalar( "TT", StandardBasicTypes.STRING ) .addScalar( "EE_COUNT", StandardBasicTypes.LONG ) .addSynchronizedQuerySpace( EE2AD_QUERY_SPACE ) @@ -1029,19 +1040,16 @@ public Map getOriginalPlatformsUsageFrequency( Collection getPlatformsUsageFrequency( boolean original, int maxResults ) { Query query = getSessionFactory().getCurrentSession() - .createSQLQuery( "select ad.*, count(distinct i.ID) EE_COUNT from EXPRESSION_EXPERIMENT2ARRAY_DESIGN ee2ad " - + "join INVESTIGATION i on i.ID = ee2ad.EXPRESSION_EXPERIMENT_FK " - + "join CURATION_DETAILS eecd on eecd.ID = i.CURATION_DETAILS_FK " + .createSQLQuery( "select ad.*, count(distinct ee2ad.EXPRESSION_EXPERIMENT_FK) EE_COUNT from EXPRESSION_EXPERIMENT2ARRAY_DESIGN ee2ad " + "join ARRAY_DESIGN ad on ee2ad.ARRAY_DESIGN_FK = ad.ID " - + "join CURATION_DETAILS adcd on adcd.ID = ad.CURATION_DETAILS_FK " + EE2CAclQueryUtils.formNativeAclJoinClause( "ee2ad.EXPRESSION_EXPERIMENT_FK" ) + " " - + "where ee2ad.IS_ORIGINAL_PLATFORM = :original " + + "where ee2ad.IS_ORIGINAL_PLATFORM = :original" // exclude noop switch + ( original ? " and ee2ad.ARRAY_DESIGN_FK not in (select ARRAY_DESIGN_FK from EXPRESSION_EXPERIMENT2ARRAY_DESIGN where EXPRESSION_EXPERIMENT_FK = ee2ad.EXPRESSION_EXPERIMENT_FK and ARRAY_DESIGN_FK = ee2ad.ARRAY_DESIGN_FK and not IS_ORIGINAL_PLATFORM) " : "" ) + EE2CAclQueryUtils.formNativeAclRestrictionClause( ( SessionFactoryImplementor ) getSessionFactory(), "ee2ad.ACL_IS_AUTHENTICATED_ANONYMOUSLY_MASK" ) + " " // exclude troubled platforms or experiments for non-admins - + ( !SecurityUtil.isUserAdmin() ? "and not eecd.TROUBLED and not adcd.TROUBLED " : "" ) - + "group by ad.ID " + + ( !SecurityUtil.isUserAdmin() ? "and ee2ad.ARRAY_DESIGN_FK not in (" + TROUBLED_PLATFORM_IDS_SQL + ") and ee2ad.EXPRESSION_EXPERIMENT_FK not in (" + TROUBLED_EXPERIMENT_IDS_SQL + ") " : "" ) + + " group by ad.ID " // no need to sort results if limiting, we're collecting in a map + ( maxResults > 0 ? "order by EE_COUNT desc" : "" ) ) .addEntity( ArrayDesign.class ) @@ -1070,8 +1078,7 @@ private Map getPlatformsUsageFrequency( Collection eeId // exclude noop switch // no need to sort results if limiting, we're collecting in a map Query query = getSessionFactory().getCurrentSession() - .createSQLQuery( "select ad.*, count(distinct i.ID) EE_COUNT from EXPRESSION_EXPERIMENT2ARRAY_DESIGN ee2ad " - + "join INVESTIGATION i on i.ID = ee2ad.EXPRESSION_EXPERIMENT_FK " + .createSQLQuery( "select ad.*, count(distinct ee2ad.EXPRESSION_EXPERIMENT_FK) EE_COUNT from EXPRESSION_EXPERIMENT2ARRAY_DESIGN ee2ad " + "join ARRAY_DESIGN ad on ee2ad.ARRAY_DESIGN_FK = ad.ID " + "where ee2ad.IS_ORIGINAL_PLATFORM = :original " // exclude noop switch diff --git a/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoTest.java b/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoTest.java index f0a68c9446..bec7448d37 100644 --- a/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoTest.java +++ b/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoTest.java @@ -176,6 +176,20 @@ public void testGetCategoriesWithUsageFrequency() { .containsEntry( c, 1L ); } + @Test + @WithMockUser + public void testGetCategoriesUsageFrequencyAsAnonymous() { + expressionExperimentDao.getCategoriesUsageFrequency( null, null, null, null, -1 ); + } + + /** + * No ACL filtering is done when explicit IDs are provided, so this should work without {@link WithMockUser}. + */ + @Test + public void testGetCategoriesUsageFrequencyWithIds() { + expressionExperimentDao.getCategoriesUsageFrequency( Collections.singleton( 1L ), null, null, null, -1 ); + } + @Test @WithMockUser(authorities = "GROUP_ADMIN") public void testGetAnnotationUsageFrequency() { @@ -184,6 +198,12 @@ public void testGetAnnotationUsageFrequency() { .containsEntry( c, 1L ); } + @Test + @WithMockUser + public void testGetAnnotationUsageFrequencyAsAnonymous() { + expressionExperimentDao.getAnnotationsUsageFrequency( null, null, 10, 1, null, null, null, null ); + } + @Test @WithMockUser(authorities = "GROUP_ADMIN") public void testGetAnnotationUsageFrequencyWithLargeBatch() { @@ -250,6 +270,14 @@ public void testGetAnnotationUsageFrequencyWithUncategorizedCategory() { .doesNotContainKey( c2 ); } + /** + * No ACL filtering is done when explicit IDs are provided, so this should work without {@link WithMockUser}. + */ + @Test + public void testGetAnnotationUsageFrequencyWithIds() { + expressionExperimentDao.getAnnotationsUsageFrequency( Collections.singleton( 1L ), null, 10, 1, null, null, null, null ); + } + private Characteristic createCharacteristic( @Nullable String category, @Nullable String categoryUri, String value, @Nullable String valueUri ) { ExpressionExperiment ee = new ExpressionExperiment(); sessionFactory.getCurrentSession().persist( ee ); From 4a97dcbdea4631449eebfab45a92b8c87af6edea Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Sat, 23 Mar 2024 00:33:02 -0700 Subject: [PATCH 055/105] Few optimizations and improvements for OntologySearchSource If a term URI search is done, don't bother performing a full-text search afterwards. Improve how children terms are aggregated and scored. Only reindex/reload ontologies that are enabled, not loaded. --- .../gemma/core/apps/FindObsoleteTermsCli.java | 9 +- .../AbstractOntologyResourceSimple.java | 11 + .../gemma/core/ontology/OntologyService.java | 31 +-- .../core/ontology/OntologyServiceImpl.java | 128 +++++------ .../search/source/OntologySearchSource.java | 207 ++++++------------ .../core/ontology/OntologyServiceTest.java | 8 +- .../gemma/core/search/SearchServiceTest.java | 1 - .../source/OntologySearchSourceTest.java | 6 +- .../experiment/AnnotationController.java | 5 +- 9 files changed, 160 insertions(+), 246 deletions(-) diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/FindObsoleteTermsCli.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/FindObsoleteTermsCli.java index effbe261f3..e7efebba37 100644 --- a/gemma-cli/src/main/java/ubic/gemma/core/apps/FindObsoleteTermsCli.java +++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/FindObsoleteTermsCli.java @@ -8,7 +8,7 @@ import org.springframework.core.task.AsyncTaskExecutor; import ubic.gemma.core.ontology.OntologyService; import ubic.gemma.core.util.AbstractCLI; -import ubic.gemma.model.common.description.CharacteristicValueObject; +import ubic.gemma.model.common.description.Characteristic; import java.util.LinkedHashMap; import java.util.List; @@ -90,14 +90,13 @@ protected void doWork() throws Exception { log.info( "Ontologies warmed up, starting check..." ); - Map vos = ontologyService.findObsoleteTermUsage(); + Map vos = ontologyService.findObsoleteTermUsage(); AbstractCLI.log.info( "Obsolete term check finished, printing ..." ); System.out.println( "Value\tValueUri\tCount" ); - for ( CharacteristicValueObject vo : vos.values() ) { - System.out.println( vo.getValue() + "\t" + vo.getValueUri() + "\t" + vo.getNumTimesUsed() ); + for ( Map.Entry vo : vos.entrySet() ) { + System.out.println( vo.getKey().getValue() + "\t" + vo.getKey().getValueUri() + "\t" + vo.getValue() ); } - } } diff --git a/gemma-core/src/main/java/ubic/gemma/core/ontology/AbstractOntologyResourceSimple.java b/gemma-core/src/main/java/ubic/gemma/core/ontology/AbstractOntologyResourceSimple.java index 1dd2be213c..c33793e7be 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/ontology/AbstractOntologyResourceSimple.java +++ b/gemma-core/src/main/java/ubic/gemma/core/ontology/AbstractOntologyResourceSimple.java @@ -16,11 +16,22 @@ protected AbstractOntologyResourceSimple( @Nullable String uri, String label ) { this.label = label; } + @Override + public String getLocalName() { + return uri; + } + @Override public String getLabel() { return label; } + @Nullable + @Override + public String getComment() { + return null; + } + @Override @Nullable public String getUri() { diff --git a/gemma-core/src/main/java/ubic/gemma/core/ontology/OntologyService.java b/gemma-core/src/main/java/ubic/gemma/core/ontology/OntologyService.java index c640547e35..254cc995e5 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/ontology/OntologyService.java +++ b/gemma-core/src/main/java/ubic/gemma/core/ontology/OntologyService.java @@ -17,12 +17,12 @@ import ubic.basecode.ontology.model.OntologyProperty; import ubic.basecode.ontology.model.OntologyTerm; import ubic.gemma.core.search.SearchException; +import ubic.gemma.model.common.description.Characteristic; import ubic.gemma.model.common.description.CharacteristicValueObject; import ubic.gemma.model.genome.Taxon; import javax.annotation.Nullable; import java.util.Collection; -import java.util.LinkedHashSet; import java.util.Map; import java.util.Set; @@ -33,17 +33,13 @@ public interface OntologyService { /** - *

* Locates usages of obsolete terms in Characteristics, ignoring Gene Ontology annotations. Requires the ontologies are loaded into memory. - *

*

- * Will also find terms that are no longer in an ontology we use. - *

- * + * Will also find terms that are no longer in an ontology we use. * @return map of value URI to a representative characteristic using the term. The latter will contain a count - * of how many ocurrences there were. + * of how many occurrences there were. */ - Map findObsoleteTermUsage(); + Map findObsoleteTermUsage(); /** * Using the ontology and values in the database, for a search searchQuery given by the client give an ordered list @@ -53,6 +49,7 @@ public interface OntologyService { * @param useNeuroCartaOntology use neurocarta ontology * @return characteristic vos */ + @Deprecated Collection findExperimentsCharacteristicTags( String searchQuery, boolean useNeuroCartaOntology ) throws SearchException; @@ -61,10 +58,10 @@ Collection findExperimentsCharacteristicTags( String * looks like a URI, it just retrieves the term. * For other queries, this a lucene backed search, is inexact and for general terms can return a lot of results. * - * @param search search + * @param query search query * @return returns a collection of ontologyTerm's */ - Collection findTerms( String search ) throws SearchException; + Collection findTerms( String query ) throws SearchException; /** * Given a search string will first look through the characteristic database for any entries that have a match. If a @@ -80,13 +77,12 @@ Collection findExperimentsCharacteristicTags( String Collection findTermsInexact( String givenQueryString, @Nullable Taxon taxon ) throws SearchException; /** - * @return terms which are allowed for use in the Category of a Characteristic + * Obtain terms which are allowed for use in the category of a {@link ubic.gemma.model.common.description.Characteristic}. */ Set getCategoryTerms(); /** - * - * @return terms allowed for the predicate (relationship) in a Characteristic + * Obtain terms allowed for the predicate (relationship) in a {@link ubic.gemma.model.expression.experiment.Statement}. */ Set getRelationTerms(); @@ -103,14 +99,13 @@ Collection findExperimentsCharacteristicTags( String Set getChildren( Collection matchingTerms, boolean direct, boolean includeAdditionalProperties ); /** - * @param uri uri - * @return the definition of the associated OntologyTerm. This requires that the ontology be loaded. + * Obtain a definition for the given URI. */ + @Nullable String getDefinition( String uri ); /** - * @param uri uri - * @return the OntologyTerm for the specified URI. + * Obtain a term for the given URI. */ @Nullable OntologyTerm getTerm( String uri ); @@ -120,8 +115,6 @@ Collection findExperimentsCharacteristicTags( String */ Set getTerms( Collection uris ); - boolean isObsolete( String uri ); - /** * Recreate the search indices, for ontologies that are loaded. */ diff --git a/gemma-core/src/main/java/ubic/gemma/core/ontology/OntologyServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/core/ontology/OntologyServiceImpl.java index e533bf4899..59b2069028 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/ontology/OntologyServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/core/ontology/OntologyServiceImpl.java @@ -285,7 +285,9 @@ public Collection findTerms( String search ) throws BaseCodeOntolo } } - return pickBest( results ); + return results.stream() + .sorted( Comparator.comparing( OntologyTerm::getScore, Comparator.nullsLast( Comparator.reverseOrder() ) ) ) + .collect( Collectors.toCollection( LinkedHashSet::new ) ); } @Override @@ -425,7 +427,9 @@ private Set getParentsOrChildren( Collection terms, toQuery.clear(); } } - return pickBest( results ); + // drop terms without labels + results.removeIf( t -> t.getLabel() == null ); + return new HashSet<>( results ); } @Override @@ -456,14 +460,12 @@ public Set getRelationTerms() { @Override public String getDefinition( String uri ) { - if ( uri == null ) return null; OntologyTerm ot = this.getTerm( uri ); if ( ot != null ) { - for ( AnnotationProperty ann : ot.getAnnotations() ) { - // FIXME: not clear this will work with all ontologies. UBERON, HP, MP, MONDO does it this way. - if ( "http://purl.obolibrary.org/obo/IAO_0000115".equals( ann.getUri() ) ) { - return ann.getContents(); - } + // FIXME: not clear this will work with all ontologies. UBERON, HP, MP, MONDO does it this way. + AnnotationProperty annot = ot.getAnnotation( "http://purl.obolibrary.org/obo/IAO_0000115" ); + if ( annot != null ) { + return annot.getContents(); } } return null; @@ -471,42 +473,33 @@ public String getDefinition( String uri ) { @Override public OntologyTerm getTerm( String uri ) { - return findFirst( ontology -> ontology.getTerm( uri ), uri ); + return findFirst( ontology -> { + OntologyTerm term = ontology.getTerm( uri ); + if ( term != null && term.getLabel() == null ) { + return null; + } + return term; + }, uri ); } @Override public Set getTerms( Collection uris ) { Set distinctUris = uris instanceof Set ? ( Set ) uris : new HashSet<>( uris ); - return pickBest( combineInThreads( os -> distinctUris.stream().map( os::getTerm ).filter( Objects::nonNull ).collect( Collectors.toSet() ), - String.format( "terms for %d URIs", uris.size() ) ) ); - } - - /** - * @return true if the Uri is an ObsoleteClass. This will only work if the ontology in question is loaded. - */ - @Override - public boolean isObsolete( String uri ) { - if ( uri == null ) - return false; - OntologyTerm t = this.getTerm( uri ); - return t != null && t.isObsolete(); + List results = combineInThreads( os -> distinctUris.stream().map( os::getTerm ).filter( Objects::nonNull ).collect( Collectors.toSet() ), + String.format( "terms for %d URIs", uris.size() ) ); + results.removeIf( t -> t.getLabel() == null ); + return new HashSet<>( results ); } @Override public void reindexAllOntologies() { for ( ubic.basecode.ontology.providers.OntologyService serv : this.ontologyServices ) { - if ( serv.isOntologyLoaded() ) { - OntologyServiceImpl.log.info( "Reindexing: " + serv ); - try { + if ( serv.isEnabled() && serv.isSearchEnabled() ) { + ontologyTaskExecutor.execute( () -> { + OntologyServiceImpl.log.info( "Reindexing " + serv + "..." ); serv.index( true ); ontologyCache.clearSearchCacheByOntology( serv ); - } catch ( Exception e ) { - OntologyServiceImpl.log.error( "Failed to index " + serv + ": " + e.getMessage(), e ); - } - } else { - if ( serv.isEnabled() ) - OntologyServiceImpl.log - .info( "Not available for reindexing (not enabled or finished initialization): " + serv ); + } ); } } } @@ -514,11 +507,19 @@ public void reindexAllOntologies() { @Override public void reinitializeAndReindexAllOntologies() { for ( ubic.basecode.ontology.providers.OntologyService serv : this.ontologyServices ) { - ontologyTaskExecutor.execute( () -> { - serv.initialize( true, true ); - ontologyCache.clearSearchCacheByOntology( serv ); - ontologyCache.clearByOntology( serv ); - } ); + if ( serv.isOntologyLoaded() ) { + if ( serv.isEnabled() ) { + boolean isSearchEnabled = serv.isSearchEnabled(); + ontologyTaskExecutor.execute( () -> { + OntologyServiceImpl.log.info( "Reinitializing " + serv + "..." ); + serv.initialize( true, isSearchEnabled ); + ontologyCache.clearByOntology( serv ); + if ( isSearchEnabled ) { + ontologyCache.clearSearchCacheByOntology( serv ); + } + } ); + } + } } } @@ -570,20 +571,17 @@ private Characteristic termToCharacteristic( OntologyTerm res ) { } @Override - public Map findObsoleteTermUsage() { - Map vos = new HashMap<>(); - - int start = 0; - int step = 5000; + public Map findObsoleteTermUsage() { + Map results = new HashMap<>(); int prevObsoleteCnt = 0; int checked = 0; - CharacteristicValueObject lastObsolete = null; - - while ( true ) { + Characteristic lastObsolete = null; + long total = characteristicService.countAll(); + int step = 5000; + for ( int start = 0; ; start += step ) { Collection chars = characteristicService.browse( start, step ); - start += step; if ( chars == null || chars.isEmpty() ) { break; @@ -597,35 +595,30 @@ public Map findObsoleteTermUsage() { checked++; - if ( this.getTerm( valueUri ) == null || this.isObsolete( valueUri ) ) { - + OntologyTerm term = this.getTerm( valueUri ); + if ( term != null && term.isObsolete() ) { if ( valueUri.startsWith( "http://purl.org/commons/record/ncbi_gene" ) || valueUri.startsWith( "http://purl.obolibrary.org/obo/GO_" ) ) { // these are false positives, they aren't in an ontology, and we aren't looking at GO Terms. continue; } - - - if ( !vos.containsKey( valueUri ) ) { - vos.put( valueUri, new CharacteristicValueObject( ch ) ); - } - vos.get( valueUri ).incrementOccurrenceCount(); + results.compute( ch, ( k, v ) -> v == null ? 1L : v + 1L ); if ( log.isDebugEnabled() ) OntologyServiceImpl.log.debug( "Found obsolete or missing term: " + ch.getValue() + " - " + valueUri ); - lastObsolete = vos.get( valueUri ); + lastObsolete = ch; } } - if ( vos.size() > prevObsoleteCnt ) { - OntologyServiceImpl.log.info( "Found " + vos.size() + " obsolete or missing terms so far, tested " + checked + " characteristics" ); + if ( results.size() > prevObsoleteCnt ) { + OntologyServiceImpl.log.info( "Found " + results.size() + " obsolete or missing terms so far, tested " + checked + " out of " + total + " characteristics" ); OntologyServiceImpl.log.info( "Last obsolete term seen: " + lastObsolete.getValue() + " - " + lastObsolete.getValueUri() ); } - prevObsoleteCnt = vos.size(); + prevObsoleteCnt = results.size(); } - OntologyServiceImpl.log.info( "Done, obsolete or missing terms found: " + vos.size() ); + OntologyServiceImpl.log.info( "Done, obsolete or missing terms found: " + results.size() ); - return vos; + return results; } private void searchForCharacteristics( String queryString, Map previouslyUsedInSystem ) { @@ -987,19 +980,4 @@ private List combineInThreads( Function ontologyTermComparator = Comparator - .comparing( ( OntologyTerm t ) -> t.getLabel() != null, Comparator.reverseOrder() ) // prefer terms with rdf:label - .thenComparing( OntologyTerm::getScore, Comparator.nullsLast( Comparator.reverseOrder() ) ); // prefer the highest score - - /** - * Deduplicate terms in the given collection giving preference to those with a if available and the - * highest {@link OntologyTerm#getScore()}. - */ - private LinkedHashSet pickBest( Collection terms ) { - return terms.stream().sorted( ontologyTermComparator ).collect( Collectors.toCollection( LinkedHashSet::new ) ); - } } \ No newline at end of file diff --git a/gemma-core/src/main/java/ubic/gemma/core/search/source/OntologySearchSource.java b/gemma-core/src/main/java/ubic/gemma/core/search/source/OntologySearchSource.java index 2b7baf7cd6..36e5840e95 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/search/source/OntologySearchSource.java +++ b/gemma-core/src/main/java/ubic/gemma/core/search/source/OntologySearchSource.java @@ -1,11 +1,13 @@ package ubic.gemma.core.search.source; +import lombok.EqualsAndHashCode; +import lombok.Value; import lombok.extern.apachecommons.CommonsLog; import org.apache.commons.lang3.time.StopWatch; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Component; import org.springframework.util.StringUtils; -import ubic.basecode.ontology.model.*; +import ubic.basecode.ontology.model.OntologyTerm; import ubic.gemma.core.ontology.OntologyService; import ubic.gemma.core.search.SearchException; import ubic.gemma.core.search.SearchResult; @@ -47,38 +49,47 @@ public Collection> searchExpressionExperiment Set> results = new SearchResultSet<>(); - Collection terms = new HashSet<>(); + Collection ontologyResults = new HashSet<>(); - // f the query is a term, find it + // if the query is a term, find it directly + Collection matchingTerms; if ( settings.isTermQuery() ) { String termUri = settings.getQuery(); - OntologyTerm resource; + OntologyResult resource; OntologyTerm r2 = ontologyService.getTerm( termUri ); if ( r2 != null ) { - resource = new SimpleOntologyTermWithScore( r2, 1.0 ); + assert r2.getUri() != null; + resource = new OntologyResult( r2, 1.0 ); + matchingTerms = Collections.singleton( r2 ); } else { // attempt to guess a label from othe database - Characteristic c = characteristicService.findBestByUri( settings.getQuery() ); + Characteristic c = characteristicService.findBestByUri( termUri ); if ( c != null ) { assert c.getValueUri() != null; - resource = new SimpleOntologyTermWithScore( c.getValueUri(), c.getValue(), 1.0 ); + resource = new OntologyResult( c.getValueUri(), c.getValue(), 1.0 ); } else { - resource = new SimpleOntologyTermWithScore( termUri, getLabelFromTermUri( termUri ), 1.0 ); + resource = new OntologyResult( termUri, getLabelFromTermUri( termUri ), 1.0 ); } + matchingTerms = Collections.emptySet(); + } + ontologyResults.add( resource ); + } else { + // Search ontology classes matches to the full-text query + timer.reset(); + timer.start(); + matchingTerms = ontologyService.findTerms( settings.getQuery() ); + matchingTerms.stream() + // ignore bnodes + .filter( t -> t.getUri() != null ) + // the only possibility for being no score is that the query is an URI and the search didn't go through + // the search index + .map( t -> new OntologyResult( t, t.getScore() != null ? t.getScore() : 1.0 ) ) + .forEach( ontologyResults::add ); + timer.stop(); + if ( timer.getTime() > 100 ) { + log.warn( String.format( "Found %d ontology classes matching '%s' in %d ms", + matchingTerms.size(), settings.getQuery(), timer.getTime() ) ); } - terms.add( resource ); - } - - // Search ontology classes matches to the query - timer.reset(); - timer.start(); - Collection matchingTerms = ontologyService.findTerms( settings.getQuery() ); - terms.addAll( matchingTerms ); - timer.stop(); - - if ( timer.getTime() > 100 ) { - log.warn( String.format( "Found %d ontology classes matching '%s' in %d ms", - matchingTerms.size(), settings.getQuery(), timer.getTime() ) ); } // Search for child terms. @@ -86,27 +97,40 @@ public Collection> searchExpressionExperiment // TODO: move this logic in baseCode, this can be done far more efficiently with Jena API timer.reset(); timer.start(); - terms.addAll( ontologyService.getChildren( matchingTerms, false, true ) ); + // we don't know parent/child relation, so the best we can do is assigne the average score + double avgScore = matchingTerms.stream() + .mapToDouble( t -> t.getScore() != null ? t.getScore() : 0 ) + .average() + .orElse( 0 ); + ontologyService.getChildren( matchingTerms, false, true ) + .stream() + // ignore bnodes + .filter( c -> c.getUri() != null ) + // small penalty for being indirectly matched + .map( c -> new OntologyResult( c, 0.9 * avgScore ) ) + // if a children was already in terms, it will not be added again and thus its original score will + // be reflected in the results + .forEach( ontologyResults::add ); timer.stop(); if ( timer.getTime() > 200 ) { log.warn( String.format( "Found %d ontology subclasses or related terms for %d terms matching '%s' in %d ms", - terms.size() - matchingTerms.size(), matchingTerms.size(), settings.getQuery(), timer.getTime() ) ); + ontologyResults.size() - matchingTerms.size(), matchingTerms.size(), settings.getQuery(), timer.getTime() ) ); } } timer.reset(); timer.start(); - findExperimentsByTerms( terms, settings, results ); + findExperimentsByOntologyResults( ontologyResults, settings, results ); timer.stop(); if ( timer.getTime() > 100 ) { log.warn( String.format( "Retrieved %d datasets via %d characteristics in %d ms", - results.size(), terms.size(), timer.getTime() ) ); + results.size(), ontologyResults.size(), timer.getTime() ) ); } String message = String.format( "Found %d datasets by %d characteristic URIs for '%s' in %d ms", - results.size(), terms.size(), settings.getQuery(), watch.getTime() ); + results.size(), ontologyResults.size(), settings.getQuery(), watch.getTime() ); if ( watch.getTime() > 300 ) { log.warn( message ); } else { @@ -116,26 +140,22 @@ public Collection> searchExpressionExperiment return results; } - private void findExperimentsByTerms( Collection terms, SearchSettings settings, Set> results ) { + private void findExperimentsByOntologyResults( Collection terms, SearchSettings settings, Set> results ) { // URIs are case-insensitive in the database, so should be the mapping to labels Collection uris = new HashSet<>(); Map uri2value = new TreeMap<>( String.CASE_INSENSITIVE_ORDER ); Map uri2score = new TreeMap<>( String.CASE_INSENSITIVE_ORDER ); - // renormalize the scores in a [0, 1] range + // rescale the scores in a [0, 1] range DoubleSummaryStatistics summaryStatistics = terms.stream() - .map( OntologyTerm::getScore ) - .filter( Objects::nonNull ) + .map( OntologyResult::getScore ) .mapToDouble( s -> s ) .summaryStatistics(); - for ( OntologyTerm term : terms ) { - // bnodes can have null URIs, how annoying... - if ( term.getUri() != null ) { - uris.add( term.getUri() ); - uri2value.put( term.getUri(), term.getLabel() ); - uri2score.put( term.getUri(), term.getScore() != null ? term.getScore() / summaryStatistics.getMax() : summaryStatistics.getAverage() / summaryStatistics.getMax() ); - } + for ( OntologyResult term : terms ) { + uris.add( term.getUri() ); + uri2value.put( term.getUri(), term.getLabel() ); + uri2score.put( term.getUri(), ( term.getScore() - summaryStatistics.getMin() ) / ( summaryStatistics.getMax() - summaryStatistics.getMin() ) ); } findExpressionExperimentsByUris( uris, uri2value, uri2score, settings, results ); @@ -225,114 +245,27 @@ private static String partToTerm( String part ) { return part.replaceFirst( "_", ":" ).toUpperCase(); } - /** - * Simple ontology resource with a score. - */ - private static class SimpleOntologyTermWithScore implements OntologyTerm { - - private static final Comparator COMPARATOR = Comparator - .comparing( OntologyResource::getScore, Comparator.nullsLast( Comparator.reverseOrder() ) ) - .thenComparing( OntologyResource::getUri, Comparator.nullsLast( Comparator.naturalOrder() ) ); + @Value + @EqualsAndHashCode(of = { "uri" }) + private static class OntologyResult { + String uri; + String label; + double score; - private final String uri; - private final String label; - private final double score; - - private SimpleOntologyTermWithScore( String uri, String label, double score ) { + private OntologyResult( String uri, String label, double score ) { this.uri = uri; this.label = label; this.score = score; } - public SimpleOntologyTermWithScore( OntologyTerm resource, double score ) { + public OntologyResult( OntologyTerm resource, double score ) { this.uri = resource.getUri(); - this.label = resource.getLabel(); + if ( resource.getLabel() != null ) { + this.label = resource.getLabel(); + } else { + this.label = resource.getLocalName(); + } this.score = score; } - - @Override - public String getUri() { - return uri; - } - - @Override - public String getLabel() { - return label; - } - - @Override - public boolean isObsolete() { - return false; - } - - @Override - public Double getScore() { - return score; - } - - @Override - public int compareTo( OntologyResource ontologyResource ) { - return Objects.compare( this, ontologyResource, COMPARATOR ); - } - - @Override - public Collection getAlternativeIds() { - return null; - } - - @Override - public Collection getAnnotations() { - return null; - } - - @Override - public Collection getChildren( boolean direct, boolean includeAdditionalProperties, boolean keepObsoletes ) { - return null; - } - - @Override - public String getComment() { - return null; - } - - @Override - public Collection getIndividuals( boolean direct ) { - return null; - } - - @Override - public String getLocalName() { - return null; - } - - @Override - public Object getModel() { - return null; - } - - @Override - public Collection getParents( boolean direct, boolean includeAdditionalProperties, boolean keepObsoletes ) { - return null; - } - - @Override - public Collection getRestrictions() { - return null; - } - - @Override - public String getTerm() { - return null; - } - - @Override - public boolean isRoot() { - return false; - } - - @Override - public boolean isTermObsolete() { - return false; - } } } diff --git a/gemma-core/src/test/java/ubic/gemma/core/ontology/OntologyServiceTest.java b/gemma-core/src/test/java/ubic/gemma/core/ontology/OntologyServiceTest.java index 50bb012d0a..0979f725a2 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/ontology/OntologyServiceTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/ontology/OntologyServiceTest.java @@ -6,6 +6,7 @@ import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.cache.CacheManager; +import org.springframework.cache.concurrent.ConcurrentMapCacheManager; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; import org.springframework.core.task.AsyncTaskExecutor; @@ -108,7 +109,7 @@ public TaskExecutor ontologyTaskExecutor() { @Bean public CacheManager cacheManager() { - return mock(); + return new ConcurrentMapCacheManager(); } } @@ -157,10 +158,7 @@ public void testTermLackingLabelIsIgnored() { when( chebiOntologyService.getTerm( "http://test" ) ).thenReturn( new OntologyTermSimple( "http://test", null ) ); assertNull( ontologyService.getTerm( "http://test" ) ); - // this is covering the case when baseCode defaults to the local name or URI when a term does not have a label - when( chebiOntologyService.getTerm( "http://test" ) ).thenReturn( new OntologyTermSimple( "http://test", "http://test" ) ); - assertNull( ontologyService.getTerm( "http://test" ) ); - + // provide the term from another ontology, but with a label this time when( obiService.isOntologyLoaded() ).thenReturn( true ); when( obiService.getTerm( "http://test" ) ).thenReturn( new OntologyTermSimple( "http://test", "this is a test term" ) ); assertNotNull( ontologyService.getTerm( "http://test" ) ); diff --git a/gemma-core/src/test/java/ubic/gemma/core/search/SearchServiceTest.java b/gemma-core/src/test/java/ubic/gemma/core/search/SearchServiceTest.java index 76f0a06531..397a0ed713 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/search/SearchServiceTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/search/SearchServiceTest.java @@ -94,7 +94,6 @@ public void searchExpressionExperimentsByUri_whenQueryIsAUri_thenEnsureTheUriIsU .build(); searchService.search( settings ); verify( ontologyService ).getTerm( "http://purl.obolibrary.org/obo/DOID_14602" ); - verify( ontologyService ).findTerms( "http://purl.obolibrary.org/obo/DOID_14602" ); verifyNoMoreInteractions( ontologyService ); verify( characteristicService ).findExperimentsByUris( Collections.singleton( "http://purl.obolibrary.org/obo/DOID_14602" ), null, 10, true, false ); } diff --git a/gemma-core/src/test/java/ubic/gemma/core/search/source/OntologySearchSourceTest.java b/gemma-core/src/test/java/ubic/gemma/core/search/source/OntologySearchSourceTest.java index ef44b30051..31aa3f3972 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/search/source/OntologySearchSourceTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/search/source/OntologySearchSourceTest.java @@ -100,7 +100,8 @@ public Map highlightDocument( Document document, org.apache.luce return Collections.emptyMap(); } } ) ); - verify( ontologyService ).findTerms( "http://purl.obolibrary.org/obo/CL_0000129" ); + verify( ontologyService ).getTerm( "http://purl.obolibrary.org/obo/CL_0000129" ); + verify( ontologyService ).getChildren( argThat( col -> col.size() == 1 ), eq( false ), eq( true ) ); verify( characteristicService ).findExperimentsByUris( Collections.singleton( "http://purl.obolibrary.org/obo/CL_0000129" ), null, 5000, true, false ); assertThat( results ).anySatisfy( result -> { assertThat( result ) @@ -136,7 +137,8 @@ public Map highlightDocument( Document document, org.apache.luce return Collections.emptyMap(); } } ) ); - verify( ontologyService ).findTerms( "http://purl.obolibrary.org/obo/CL_0000129" ); + verify( ontologyService ).getTerm( "http://purl.obolibrary.org/obo/CL_0000129" ); + verifyNoMoreInteractions( ontologyService ); verify( characteristicService ).findBestByUri( "http://purl.obolibrary.org/obo/CL_0000129" ); verify( characteristicService ).findExperimentsByUris( Collections.singleton( "http://purl.obolibrary.org/obo/CL_0000129" ), null, 5000, true, false ); assertThat( results ).anySatisfy( result -> { diff --git a/gemma-web/src/main/java/ubic/gemma/web/controller/expression/experiment/AnnotationController.java b/gemma-web/src/main/java/ubic/gemma/web/controller/expression/experiment/AnnotationController.java index 0084168412..fcd4220c5d 100644 --- a/gemma-web/src/main/java/ubic/gemma/web/controller/expression/experiment/AnnotationController.java +++ b/gemma-web/src/main/java/ubic/gemma/web/controller/expression/experiment/AnnotationController.java @@ -102,7 +102,8 @@ public void createExperimentTag( Characteristic vc, Long id ) { if ( vc == null ) { throw new IllegalArgumentException( "Null characteristic" ); } - if ( ontologyService.isObsolete( vc.getValueUri() ) ) { + OntologyTerm term = ontologyService.getTerm( vc.getValueUri() ); + if ( vc.getValueUri() != null && term != null && term.isObsolete() ) { throw new IllegalArgumentException( vc + " is an obsolete term! Not saving." ); } expressionExperimentService.addCharacteristic( ee, vc ); @@ -131,7 +132,7 @@ public Collection findTerm( String givenQueryString, int numfilled = 0; int maxfilled = 25; // presuming we don't need to look too far down the list ... just as a start. for ( CharacteristicValueObject cvo : sortedResults ) { - cvo.setValueDefinition( ontologyService.getDefinition( cvo.getValueUri() ) ); + cvo.setValueDefinition( cvo.getValueUri() != null ? ontologyService.getDefinition( cvo.getValueUri() ) : null ); if ( ++numfilled > maxfilled ) { break; } From 61402c50fd8d333fc7a0f1d3627709f956b92db8 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Mon, 25 Mar 2024 09:50:36 -0700 Subject: [PATCH 056/105] Add IntelliJ scripts for deploying to dev/staging servers --- .idea/runConfigurations/Deploy__dev_.xml | 21 ++++++++++++++++++++ .idea/runConfigurations/Deploy__staging_.xml | 21 ++++++++++++++++++++ 2 files changed, 42 insertions(+) create mode 100644 .idea/runConfigurations/Deploy__dev_.xml create mode 100644 .idea/runConfigurations/Deploy__staging_.xml diff --git a/.idea/runConfigurations/Deploy__dev_.xml b/.idea/runConfigurations/Deploy__dev_.xml new file mode 100644 index 0000000000..3e16af9e38 --- /dev/null +++ b/.idea/runConfigurations/Deploy__dev_.xml @@ -0,0 +1,21 @@ + + + + \ No newline at end of file diff --git a/.idea/runConfigurations/Deploy__staging_.xml b/.idea/runConfigurations/Deploy__staging_.xml new file mode 100644 index 0000000000..d834542fa1 --- /dev/null +++ b/.idea/runConfigurations/Deploy__staging_.xml @@ -0,0 +1,21 @@ + + + + \ No newline at end of file From ec687ec8cb51e1eeaa21b73c071fc204f3d26307 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Mon, 25 Mar 2024 10:09:54 -0700 Subject: [PATCH 057/105] Add streamByBatch() to avoid unnecessary collect Update documentation for listByBatch(). --- .../description/CharacteristicDaoImpl.java | 3 +- .../ExpressionExperimentDaoImpl.java | 25 ++++++++-------- .../gemma/persistence/util/QueryUtils.java | 30 +++++++++++++++++-- 3 files changed, 42 insertions(+), 16 deletions(-) diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/common/description/CharacteristicDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/common/description/CharacteristicDaoImpl.java index 697e2cecf8..766a86c848 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/common/description/CharacteristicDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/common/description/CharacteristicDaoImpl.java @@ -179,8 +179,7 @@ private List findExperimentsByUrisInternal( Collection uris, @ if ( uris.size() > MAX_PARAMETER_LIST_SIZE ) { if ( limit > 0 && rankByLevel ) { // query is limited and order is important, we have to sort the results in memory - result = listByBatch( query, "uris", uris, 2048 ); - result = result.stream() + result = streamByBatch( query, "uris", uris, 2048, Object[].class ) .sorted( Comparator.comparing( row -> rankClass( ( Class ) row[0] ) ) ) .limit( limit ) .collect( Collectors.toList() ); diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java index ea742feb6d..16a240c810 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java @@ -63,6 +63,7 @@ import javax.annotation.Nullable; import java.util.*; import java.util.stream.Collectors; +import java.util.stream.Stream; import static java.util.stream.Collectors.groupingBy; import static java.util.stream.Collectors.summingLong; @@ -702,7 +703,7 @@ public Map getCategoriesUsageFrequency( @Nullable Collecti if ( eeIds.size() > MAX_PARAMETER_LIST_SIZE ) { result = listByBatch( q, "eeIds", eeIds, 2048 ); if ( maxResults > 0 ) { - return aggregateC( result ).entrySet().stream() + return aggregateByCategory( result ).entrySet().stream() .sorted( Map.Entry.comparingByValue( Comparator.reverseOrder() ) ) .limit( maxResults ) .collect( Collectors.toMap( Map.Entry::getKey, Map.Entry::getValue, ( a, b ) -> b, () -> new TreeMap<>( Characteristic.getByCategoryAndValueComparator() ) ) ); @@ -718,10 +719,10 @@ public Map getCategoriesUsageFrequency( @Nullable Collecti //noinspection unchecked result = q.setMaxResults( maxResults ).list(); } - return aggregateC( result ); + return aggregateByCategory( result ); } - private Map aggregateC( List result ) { + private Map aggregateByCategory( List result ) { TreeMap byC = new TreeMap<>( Characteristic.getByCategoryComparator() ); for ( Object[] row : result ) { byC.compute( Characteristic.Factory.newInstance( null, null, null, null, ( String ) row[0], ( String ) row[1], null ), @@ -1014,8 +1015,8 @@ public Map getTechnologyTypeUsageFrequency( Collection results = listByBatch( q, "ids", eeIds, getBatchSize() ); - return results.stream().collect( Collectors.groupingBy( row -> TechnologyType.valueOf( ( String ) row[0] ), Collectors.summingLong( row -> ( Long ) row[1] ) ) ); + return streamByBatch( q, "ids", eeIds, getBatchSize(), Object[].class ) + .collect( Collectors.groupingBy( row -> TechnologyType.valueOf( ( String ) row[0] ), Collectors.summingLong( row -> ( Long ) row[1] ) ) ); } @Override @@ -1096,12 +1097,12 @@ private Map getPlatformsUsageFrequency( Collection eeId .addSynchronizedEntityClass( ArrayDesign.class ); query.setParameter( "original", original ); query.setCacheable( true ); - List result; + Stream result; if ( eeIds.size() > MAX_PARAMETER_LIST_SIZE ) { - result = listByBatch( query, "ids", eeIds, 2048 ); + result = streamByBatch( query, "ids", eeIds, 2048 ); if ( maxResults > 0 ) { // results need to be aggregated and limited - return result.stream() + return result .collect( groupingBy( row -> ( ArrayDesign ) row[0], summingLong( row -> ( Long ) row[1] ) ) ) .entrySet().stream() .sorted( Map.Entry.comparingByValue( Comparator.reverseOrder() ) ) @@ -1113,9 +1114,10 @@ private Map getPlatformsUsageFrequency( Collection eeId result = query .setParameterList( "ids", optimizeParameterList( eeIds ) ) .setMaxResults( maxResults ) - .list(); + .list() + .stream(); } - return result.stream().collect( groupingBy( row -> ( ArrayDesign ) row[0], summingLong( row -> ( Long ) row[1] ) ) ); + return result.collect( groupingBy( row -> ( ArrayDesign ) row[0], summingLong( row -> ( Long ) row[1] ) ) ); } @Override @@ -1255,8 +1257,7 @@ public Map getPerTaxonCount( Collection ids ) { + "where ee.id in :eeIds " + "group by ee.taxon" ) .setCacheable( true ); - List list = listByBatch( query, "eeIds", ids, getBatchSize() ); - return list.stream() + return streamByBatch( query, "eeIds", ids, getBatchSize(), Object[].class ) .collect( Collectors.groupingBy( row -> ( Taxon ) row[0], Collectors.summingLong( row -> ( Long ) row[1] ) ) ); } diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/util/QueryUtils.java b/gemma-core/src/main/java/ubic/gemma/persistence/util/QueryUtils.java index 51a360afa2..4671ba0652 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/util/QueryUtils.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/util/QueryUtils.java @@ -8,6 +8,7 @@ import java.util.*; import java.util.stream.Collectors; +import java.util.stream.Stream; /** * Utilities for {@link org.hibernate.Query}. @@ -72,6 +73,7 @@ public static Collection optimizeIdentifiableParamet */ public static > List> batchParameterList( Collection list, int batchSize ) { Assert.isTrue( batchSize == -1 || batchSize > 0, "Batch size must be strictly positive or equal to -1." ); + Assert.isTrue( batchSize <= MAX_PARAMETER_LIST_SIZE, "The batch size must not exceed " + MAX_PARAMETER_LIST_SIZE + "." ); if ( list.isEmpty() ) { return Collections.emptyList(); } @@ -84,6 +86,7 @@ public static > List> batchParameterList( Collec public static List> batchIdentifiableParameterList( Collection list, int batchSize ) { Assert.isTrue( batchSize == -1 || batchSize > 0, "Batch size must be strictly positive or equal to -1." ); + Assert.isTrue( batchSize <= MAX_PARAMETER_LIST_SIZE, "The batch size must not exceed " + MAX_PARAMETER_LIST_SIZE + "." ); if ( list.isEmpty() ) { return Collections.emptyList(); } @@ -102,10 +105,15 @@ public static , T> List listByBatch( Query query, Str } /** - * List the results of a query by fixed batch size. + * List the results of a query by a fixed batch size. + * @param query the query + * @param batchParam a parameter of the query for batching + * @param list a collection of values for the batch parameters to retrieve + * @param batchSize the number of elements to fetch in each batch + * @param maxResults maximum number of results to return, or -1 to ignore */ public static , T> List listByBatch( Query query, String batchParam, Collection list, int batchSize, int maxResults ) { - List result = new ArrayList<>(); + List result = new ArrayList<>( list.size() ); for ( List batch : batchParameterList( list, batchSize ) ) { int remainingToFetch; if ( maxResults > 0 ) { @@ -124,4 +132,22 @@ public static , T> List listByBatch( Query query, Str } return result; } + + /** + * @see #streamByBatch(Query, String, Collection, int) + */ + public static , T> Stream streamByBatch( Query query, String batchParam, Collection list, int batchSize, Class clazz ) { + return streamByBatch( query, batchParam, list, batchSize ); + } + + /** + * Stream the results of a query by a fixed batch size. + * @see #listByBatch(Query, String, Collection, int) + */ + public static , T> Stream streamByBatch( Query query, String batchParam, Collection list, int batchSize ) { + //noinspection unchecked + return batchParameterList( list, batchSize ).stream() + .map( batch -> ( List ) query.setParameterList( batchParam, batch ).list() ) + .flatMap( List::stream ); + } } From 36d3fd737ef010466cf81e9aad859720b3a6d23f Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Mon, 25 Mar 2024 10:51:46 -0700 Subject: [PATCH 058/105] Improve native non-troubled filtering and apply it to platform counting Use the EE2AD table and non-troubled filtering for counting experiments and switched experiments. Add a clause to exclude noop switches (i.e. original and new platform are identical). --- .../curation/AbstractCuratableDao.java | 39 +++++++++--- .../arrayDesign/ArrayDesignDaoImpl.java | 62 +++++++++++-------- .../ExpressionExperimentDaoImpl.java | 37 +++++------ 3 files changed, 79 insertions(+), 59 deletions(-) diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/common/auditAndSecurity/curation/AbstractCuratableDao.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/common/auditAndSecurity/curation/AbstractCuratableDao.java index d7150d357f..aab6569b8e 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/common/auditAndSecurity/curation/AbstractCuratableDao.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/common/auditAndSecurity/curation/AbstractCuratableDao.java @@ -2,6 +2,8 @@ import gemma.gsec.util.SecurityUtil; import org.hibernate.SessionFactory; +import org.hibernate.metadata.ClassMetadata; +import org.hibernate.persister.entity.SingleTableEntityPersister; import ubic.gemma.model.common.auditAndSecurity.AuditEvent; import ubic.gemma.model.common.auditAndSecurity.curation.AbstractCuratableValueObject; import ubic.gemma.model.common.auditAndSecurity.curation.Curatable; @@ -40,7 +42,7 @@ protected AbstractCuratableDao( String objectAlias, Class elementClass, Sessi super( objectAlias, elementClass, sessionFactory ); this.objectAlias = objectAlias; } - + @Override public void updateCurationDetailsFromAuditEvent( Curatable curatable, AuditEvent auditEvent ) { if ( curatable.getId() == null ) { @@ -85,6 +87,7 @@ protected void addNonTroubledFilter( Filters filters, String objectAlias ) { if ( !SecurityUtil.isUserAdmin() ) { filters.and( objectAlias, "curationDetails.troubled", Boolean.class, Filter.Operator.eq, false ); } + } /** @@ -121,15 +124,33 @@ protected String groupByIfNecessary( @Nullable Sort sort, String... oneToManyAli } /** - * Format a non-troubled filter for an HQL query. - *

- * For filtering queries, use {@link #addNonTroubledFilter(Filters, String)} instead. - * - * @param objectAlias an alias for a {@link Curatable} entity + * Form a non-troubled clause. */ - protected String formNonTroubledClause( String objectAlias ) { - //language=HQL - return SecurityUtil.isUserAdmin() ? "" : " and " + objectAlias + ".curationDetails.troubled = false"; + protected String formNonTroubledClause( String objectAlias, Class clazz ) { + String entityName = getSessionFactory().getClassMetadata( clazz ).getEntityName(); + if ( !SecurityUtil.isUserAdmin() ) { + //language=HQL + return " and " + objectAlias + " not in (select c from " + entityName + " c join c.curationDetails cd where cd.troubled = true)"; + } else { + return ""; + } + } + + /** + * Form a native non-troubled clause. + */ + protected String formNativeNonTroubledClause( String idColumn, Class clazz ) { + ClassMetadata classMetadata = getSessionFactory().getClassMetadata( clazz ); + String table = ( ( SingleTableEntityPersister ) classMetadata ) + .getTableName(); + String columnName = ( ( SingleTableEntityPersister ) classMetadata ) + .getPropertyColumnNames( "curationDetails" )[0]; + if ( !SecurityUtil.isUserAdmin() ) { + //language=SQL + return " and " + idColumn + " not in (select c.ID from " + table + " c join CURATION_DETAILS cd on c." + columnName + " = cd.ID where cd.TROUBLED)"; + } else { + return ""; + } } @Override diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/arrayDesign/ArrayDesignDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/arrayDesign/ArrayDesignDaoImpl.java index dc068ddba2..e8634d52b9 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/arrayDesign/ArrayDesignDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/arrayDesign/ArrayDesignDaoImpl.java @@ -52,6 +52,7 @@ import java.util.regex.Pattern; import java.util.stream.Collectors; +import static ubic.gemma.persistence.service.TableMaintenanceUtil.EE2AD_QUERY_SPACE; import static ubic.gemma.persistence.service.TableMaintenanceUtil.GENE2CS_QUERY_SPACE; import static ubic.gemma.persistence.util.QueryUtils.optimizeParameterList; @@ -1033,22 +1034,23 @@ private void populateBlacklisted( Collection vos ) { private void populateExpressionExperimentCount( Collection entities ) { Query query = this.getSessionFactory().getCurrentSession() - // using EXPRESSION_EXPERIMENT_FK, we don't need to do a jointure on the INVESTIGATION table, however - // the count reflect the number of bioassays, not EEs - .createSQLQuery( "select BA.ARRAY_DESIGN_USED_FK as ID, count(distinct BA.EXPRESSION_EXPERIMENT_FK) as EE_COUNT from BIO_ASSAY BA " - + AclQueryUtils.formNativeAclJoinClause( "BA.EXPRESSION_EXPERIMENT_FK" ) - + AclQueryUtils.formNativeAclRestrictionClause( ( SessionFactoryImplementor ) getSessionFactory() ) + " " - // FIXME: exclude troubled datasets - + "group by BA.ARRAY_DESIGN_USED_FK" - ) + .createSQLQuery( "select ee2ad.ARRAY_DESIGN_FK as ID, count(distinct ee2ad.EXPRESSION_EXPERIMENT_FK) as EE_COUNT from EXPRESSION_EXPERIMENT2ARRAY_DESIGN ee2ad " + + EE2CAclQueryUtils.formNativeAclJoinClause( "ee2ad.EXPRESSION_EXPERIMENT_FK" ) + " " + + "where ee2ad.ARRAY_DESIGN_FK in :ids " + + "and not ee2ad.IS_ORIGINAL_PLATFORM" + + EE2CAclQueryUtils.formNativeAclRestrictionClause( ( SessionFactoryImplementor ) getSessionFactory(), "ee2ad.ACL_IS_AUTHENTICATED_ANONYMOUSLY_MASK" ) + + formNativeNonTroubledClause( "ee2ad.EXPRESSION_EXPERIMENT_FK", ExpressionExperiment.class ) + + " group by ee2ad.ARRAY_DESIGN_FK" ) .addScalar( "ID", StandardBasicTypes.LONG ) - .addScalar( "EE_COUNT", StandardBasicTypes.LONG ); - AclQueryUtils.addAclParameters( query, ExpressionExperiment.class ); - //noinspection unchecked - List list = query - .setCacheable( true ) - .list(); - Map countById = list.stream() + .addScalar( "EE_COUNT", StandardBasicTypes.LONG ) + // ensures that the cache is invalidated when the ee2ad table is regenerated + .addSynchronizedQuerySpace( EE2AD_QUERY_SPACE ) + // ensures that the cache is invalidated when EEs or ADs are added/removed + .addSynchronizedEntityClass( ExpressionExperiment.class ) + .addSynchronizedEntityClass( ArrayDesign.class ) + .setCacheable( true ); + EE2CAclQueryUtils.addAclParameters( query, ExpressionExperiment.class ); + Map countById = QueryUtils.streamByBatch( query, "ids", EntityUtils.getIds( entities ), 2048, Object[].class ) .collect( Collectors.toMap( o -> ( Long ) o[0], o -> ( Long ) o[1] ) ); for ( ArrayDesignValueObject vo : entities ) { // missing implies no EEs, so zero is a valid default @@ -1058,19 +1060,25 @@ private void populateExpressionExperimentCount( Collection entities ) { Query query = this.getSessionFactory().getCurrentSession() - // using EXPRESSION_EXPERIMENT_FK, we don't need to do a jointure on the INVESTIGATION table, however - // the count reflect the number of bioassays, not EEs - .createSQLQuery( "select BA.ORIGINAL_PLATFORM_FK as ID, count(distinct BA.EXPRESSION_EXPERIMENT_FK) as EE_COUNT from BIO_ASSAY BA " - + AclQueryUtils.formNativeAclJoinClause( "BA.EXPRESSION_EXPERIMENT_FK" ) - + AclQueryUtils.formNativeAclRestrictionClause( ( SessionFactoryImplementor ) getSessionFactory() ) + " " - // FIXME: exclude troubled datasets - + "group by BA.ORIGINAL_PLATFORM_FK" ) + .createSQLQuery( "select ee2ad.ARRAY_DESIGN_FK as ID, count(distinct ee2ad.EXPRESSION_EXPERIMENT_FK) as EE_COUNT from EXPRESSION_EXPERIMENT2ARRAY_DESIGN ee2ad " + + EE2CAclQueryUtils.formNativeAclJoinClause( "ee2ad.EXPRESSION_EXPERIMENT_FK" ) + " " + + "where ee2ad.ARRAY_DESIGN_FK in :ids " + + "and ee2ad.IS_ORIGINAL_PLATFORM " + // ignore noop switches + + "and ee2ad.ARRAY_DESIGN_FK not in (select ARRAY_DESIGN_FK from EXPRESSION_EXPERIMENT2ARRAY_DESIGN where EXPRESSION_EXPERIMENT_FK = ee2ad.EXPRESSION_EXPERIMENT_FK and ARRAY_DESIGN_FK = ee2ad.ARRAY_DESIGN_FK and not IS_ORIGINAL_PLATFORM)" + + EE2CAclQueryUtils.formNativeAclRestrictionClause( ( SessionFactoryImplementor ) getSessionFactory(), "ee2ad.ACL_IS_AUTHENTICATED_ANONYMOUSLY_MASK" ) + + formNativeNonTroubledClause( "ee2ad.EXPRESSION_EXPERIMENT_FK", ExpressionExperiment.class ) + + " group by ee2ad.ARRAY_DESIGN_FK" ) .addScalar( "ID", StandardBasicTypes.LONG ) - .addScalar( "EE_COUNT", StandardBasicTypes.LONG ); - AclQueryUtils.addAclParameters( query, ExpressionExperiment.class ); - //noinspection unchecked - List results = query.setCacheable( true ).list(); - Map switchedCountById = results.stream() + .addScalar( "EE_COUNT", StandardBasicTypes.LONG ) + // ensures that the cache is invalidated when the ee2ad table is regenerated + .addSynchronizedQuerySpace( EE2AD_QUERY_SPACE ) + // ensures that the cache is invalidated when EEs or ADs are added/removed + .addSynchronizedEntityClass( ExpressionExperiment.class ) + .addSynchronizedEntityClass( ArrayDesign.class ) + .setCacheable( true ); + EE2CAclQueryUtils.addAclParameters( query, ExpressionExperiment.class ); + Map switchedCountById = QueryUtils.streamByBatch( query, "ids", EntityUtils.getIds( entities ), 2048, Object[].class ) .collect( Collectors.toMap( row -> ( Long ) row[0], row -> ( Long ) row[1] ) ); for ( ArrayDesignValueObject vo : entities ) { // missing implies no switched EEs, so zero is a valid default diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java index 16a240c810..ceae7d7e65 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java @@ -20,7 +20,6 @@ import gemma.gsec.acl.domain.AclObjectIdentity; import gemma.gsec.acl.domain.AclSid; -import gemma.gsec.util.SecurityUtil; import lombok.Value; import org.apache.commons.lang3.NotImplementedException; import org.apache.commons.lang3.time.StopWatch; @@ -96,13 +95,6 @@ public class ExpressionExperimentDaoImpl private static final String[] ONE_TO_MANY_ALIASES = { CHARACTERISTIC_ALIAS, BIO_MATERIAL_CHARACTERISTIC_ALIAS, FACTOR_VALUE_CHARACTERISTIC_ALIAS, ALL_CHARACTERISTIC_ALIAS, BIO_ASSAY_ALIAS, ARRAY_DESIGN_ALIAS }; - /** - * Queries for retrieving troubled experiment and platform identifiers. - */ - private static final String - TROUBLED_EXPERIMENT_IDS_SQL = "select I.ID from INVESTIGATION I join CURATION_DETAILS CD on I.CURATION_DETAILS_FK = CD.ID where CD.TROUBLED", - TROUBLED_PLATFORM_IDS_SQL = "select AD.ID from ARRAY_DESIGN AD join CURATION_DETAILS CD on AD.CURATION_DETAILS_FK = CD.ID where CD.TROUBLED"; - @Autowired public ExpressionExperimentDaoImpl( SessionFactory sessionFactory ) { super( ExpressionExperimentDao.OBJECT_ALIAS, ExpressionExperiment.class, sessionFactory ); @@ -670,9 +662,7 @@ public Map getCategoriesUsageFrequency( @Nullable Collecti if ( doAclFiltering ) { query += EE2CAclQueryUtils.formNativeAclRestrictionClause( ( SessionFactoryImplementor ) getSessionFactory(), "T.ACL_IS_AUTHENTICATED_ANONYMOUSLY_MASK" ); // troubled filtering - if ( !SecurityUtil.isUserAdmin() ) { - query += " and T.EXPRESSION_EXPERIMENT_FK not in (" + TROUBLED_EXPERIMENT_IDS_SQL + ")"; - } + query += formNativeNonTroubledClause( "T.EXPRESSION_EXPERIMENT_FK", ExpressionExperiment.class ); } query += " group by COALESCE(T.CATEGORY_URI, T.CATEGORY)"; if ( maxResults > 0 ) { @@ -796,9 +786,7 @@ else if ( category.startsWith( "http://" ) ) { } if ( doAclFiltering ) { query += EE2CAclQueryUtils.formNativeAclRestrictionClause( ( SessionFactoryImplementor ) getSessionFactory(), "T.ACL_IS_AUTHENTICATED_ANONYMOUSLY_MASK" ); - if ( !SecurityUtil.isUserAdmin() ) { - query += " and T.EXPRESSION_EXPERIMENT_FK not in (" + TROUBLED_EXPERIMENT_IDS_SQL + ")"; - } + query += formNativeNonTroubledClause( "T.EXPRESSION_EXPERIMENT_FK", ExpressionExperiment.class ); } //language=HQL query += " group by " @@ -985,7 +973,8 @@ public Map getTechnologyTypeUsageFrequency() { + EE2CAclQueryUtils.formNativeAclJoinClause( "EE2AD.EXPRESSION_EXPERIMENT_FK" ) + " " + "where EE2AD.EXPRESSION_EXPERIMENT_FK is not NULL" + EE2CAclQueryUtils.formNativeAclRestrictionClause( ( SessionFactoryImplementor ) getSessionFactory(), "EE2AD.ACL_IS_AUTHENTICATED_ANONYMOUSLY_MASK" ) - + ( !SecurityUtil.isUserAdmin() ? " and EE2AD.EXPRESSION_EXPERIMENT_FK not in (" + TROUBLED_EXPERIMENT_IDS_SQL + ") and EE2AD.ARRAY_DESIGN_FK not in (" + TROUBLED_PLATFORM_IDS_SQL + ") " : "" ) + + formNativeNonTroubledClause( "EE2AD.ARRAY_DESIGN_FK", ArrayDesign.class ) + + formNativeNonTroubledClause( "EE2AD.EXPRESSION_EXPERIMENT_FK", ExpressionExperiment.class ) + " group by AD.TECHNOLOGY_TYPE" ) .addScalar( "TT", StandardBasicTypes.STRING ) .addScalar( "EE_COUNT", StandardBasicTypes.LONG ) @@ -1046,10 +1035,11 @@ private Map getPlatformsUsageFrequency( boolean original, int + EE2CAclQueryUtils.formNativeAclJoinClause( "ee2ad.EXPRESSION_EXPERIMENT_FK" ) + " " + "where ee2ad.IS_ORIGINAL_PLATFORM = :original" // exclude noop switch - + ( original ? " and ee2ad.ARRAY_DESIGN_FK not in (select ARRAY_DESIGN_FK from EXPRESSION_EXPERIMENT2ARRAY_DESIGN where EXPRESSION_EXPERIMENT_FK = ee2ad.EXPRESSION_EXPERIMENT_FK and ARRAY_DESIGN_FK = ee2ad.ARRAY_DESIGN_FK and not IS_ORIGINAL_PLATFORM) " : "" ) + + ( original ? " and ee2ad.ARRAY_DESIGN_FK not in (select ARRAY_DESIGN_FK from EXPRESSION_EXPERIMENT2ARRAY_DESIGN where EXPRESSION_EXPERIMENT_FK = ee2ad.EXPRESSION_EXPERIMENT_FK and ARRAY_DESIGN_FK = ee2ad.ARRAY_DESIGN_FK and not IS_ORIGINAL_PLATFORM)" : "" ) + EE2CAclQueryUtils.formNativeAclRestrictionClause( ( SessionFactoryImplementor ) getSessionFactory(), "ee2ad.ACL_IS_AUTHENTICATED_ANONYMOUSLY_MASK" ) + " " // exclude troubled platforms or experiments for non-admins - + ( !SecurityUtil.isUserAdmin() ? "and ee2ad.ARRAY_DESIGN_FK not in (" + TROUBLED_PLATFORM_IDS_SQL + ") and ee2ad.EXPRESSION_EXPERIMENT_FK not in (" + TROUBLED_EXPERIMENT_IDS_SQL + ") " : "" ) + + formNativeNonTroubledClause( "ee2ad.ARRAY_DESIGN_FK", ArrayDesign.class ) + + formNativeNonTroubledClause( "ee2ad.EXPRESSION_EXPERIMENT_FK", ExpressionExperiment.class ) + " group by ad.ID " // no need to sort results if limiting, we're collecting in a map + ( maxResults > 0 ? "order by EE_COUNT desc" : "" ) ) @@ -1081,10 +1071,10 @@ private Map getPlatformsUsageFrequency( Collection eeId Query query = getSessionFactory().getCurrentSession() .createSQLQuery( "select ad.*, count(distinct ee2ad.EXPRESSION_EXPERIMENT_FK) EE_COUNT from EXPRESSION_EXPERIMENT2ARRAY_DESIGN ee2ad " + "join ARRAY_DESIGN ad on ee2ad.ARRAY_DESIGN_FK = ad.ID " - + "where ee2ad.IS_ORIGINAL_PLATFORM = :original " + + "where ee2ad.IS_ORIGINAL_PLATFORM = :original" // exclude noop switch - + ( original ? " and ee2ad.ARRAY_DESIGN_FK not in (select ARRAY_DESIGN_FK from EXPRESSION_EXPERIMENT2ARRAY_DESIGN where EXPRESSION_EXPERIMENT_FK = ee2ad.EXPRESSION_EXPERIMENT_FK and ARRAY_DESIGN_FK = ee2ad.ARRAY_DESIGN_FK and not IS_ORIGINAL_PLATFORM) " : "" ) - + "and ee2ad.EXPRESSION_EXPERIMENT_FK in :ids " + + ( original ? " and ee2ad.ARRAY_DESIGN_FK not in (select ARRAY_DESIGN_FK from EXPRESSION_EXPERIMENT2ARRAY_DESIGN where EXPRESSION_EXPERIMENT_FK = ee2ad.EXPRESSION_EXPERIMENT_FK and ARRAY_DESIGN_FK = ee2ad.ARRAY_DESIGN_FK and not IS_ORIGINAL_PLATFORM)" : "" ) + + " and ee2ad.EXPRESSION_EXPERIMENT_FK in :ids " + "group by ad.ID " // no need to sort results if limiting, we're collecting in a map + ( maxResults > 0 ? "order by EE_COUNT desc" : "" ) ) @@ -1228,10 +1218,11 @@ public Date getLastArrayDesignUpdate( ExpressionExperiment ee ) { @Override public Map getPerTaxonCount() { + //language=HQL String queryString = "select ee.taxon, count(distinct ee) as EE_COUNT from ExpressionExperiment ee " - + AclQueryUtils.formAclRestrictionClause( "ee.id" ) + " " - + formNonTroubledClause( "ee" ) + " " - + "group by ee.taxon"; + + AclQueryUtils.formAclRestrictionClause( "ee.id" ) + + formNonTroubledClause( "ee", ExpressionExperiment.class ) + + " group by ee.taxon"; Query query = this.getSessionFactory().getCurrentSession().createQuery( queryString ); From 029b9832160154375dab082136d953eac341a9db Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Mon, 25 Mar 2024 11:13:38 -0700 Subject: [PATCH 059/105] Make the mask parameterizable in EE2AclQueryUtils.formNativeAclRestrictionClause --- .../ubic/gemma/persistence/util/EE2CAclQueryUtils.java | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/util/EE2CAclQueryUtils.java b/gemma-core/src/main/java/ubic/gemma/persistence/util/EE2CAclQueryUtils.java index 81c4f981b1..a1e6003747 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/util/EE2CAclQueryUtils.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/util/EE2CAclQueryUtils.java @@ -26,14 +26,18 @@ public static String formNativeAclJoinClause( String aoiIdColumn ) { } public static String formNativeAclRestrictionClause( SessionFactoryImplementor sessionFactoryImplementor, String anonymousMaskColumn ) { + return formNativeAclRestrictionClause( sessionFactoryImplementor, anonymousMaskColumn, BasePermission.READ.getMask() ); + } + + public static String formNativeAclRestrictionClause( SessionFactoryImplementor sessionFactoryImplementor, String anonymousMaskColumn, int mask ) { if ( SecurityUtil.isUserAnonymous() ) { SQLFunction bitwiseAnd = sessionFactoryImplementor.getSqlFunctionRegistry().findSQLFunction( "bitwise_and" ); - String mask = bitwiseAnd.render( new IntegerType(), Arrays.asList( anonymousMaskColumn, BasePermission.READ.getMask() ), sessionFactoryImplementor ); - return " and " + mask + " <> 0"; + String renderedMask = bitwiseAnd.render( new IntegerType(), Arrays.asList( anonymousMaskColumn, mask ), sessionFactoryImplementor ); + return " and " + renderedMask + " <> 0"; } else if ( SecurityUtil.isUserAdmin() ) { return ""; } else { - return AclQueryUtils.formNativeAclRestrictionClause( sessionFactoryImplementor ); + return AclQueryUtils.formNativeAclRestrictionClause( sessionFactoryImplementor, mask ); } } From 9ca36d5161555331073ef603778ab2836694d351 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Mon, 25 Mar 2024 12:35:36 -0700 Subject: [PATCH 060/105] No need to use a TreeMap to aggregate categories and annotations counts Those entities are non-persistent, so regular coalescing rules of URIs and labels apply. Also, because this is collected in a mapping, order is not important. Also fix bugs where a term with different labels appear twice in the output because of the sorting by label. Exclude the id from Characteristic's hash code --- .../common/description/Characteristic.java | 3 --- .../description/CharacteristicUtils.java | 10 ++++++++ .../ExpressionExperimentDaoImpl.java | 23 ++++++------------- .../ExpressionExperimentDaoTest.java | 3 ++- 4 files changed, 19 insertions(+), 20 deletions(-) diff --git a/gemma-core/src/main/java/ubic/gemma/model/common/description/Characteristic.java b/gemma-core/src/main/java/ubic/gemma/model/common/description/Characteristic.java index c18be25801..106ef9cfda 100644 --- a/gemma-core/src/main/java/ubic/gemma/model/common/description/Characteristic.java +++ b/gemma-core/src/main/java/ubic/gemma/model/common/description/Characteristic.java @@ -220,9 +220,6 @@ public void setMigratedToStatement( boolean migratedToStatement ) { @Override public int hashCode() { - if ( this.getId() != null ) { - return super.hashCode(); - } return Objects.hash( StringUtils.lowerCase( categoryUri != null ? categoryUri : category ), StringUtils.lowerCase( valueUri != null ? valueUri : value ) ); } diff --git a/gemma-core/src/main/java/ubic/gemma/model/common/description/CharacteristicUtils.java b/gemma-core/src/main/java/ubic/gemma/model/common/description/CharacteristicUtils.java index b9069f2b51..4ce26f785a 100644 --- a/gemma-core/src/main/java/ubic/gemma/model/common/description/CharacteristicUtils.java +++ b/gemma-core/src/main/java/ubic/gemma/model/common/description/CharacteristicUtils.java @@ -6,6 +6,16 @@ public class CharacteristicUtils { + /** + * Create a new characteristic that represents the category of a given characteristic. + */ + public static Characteristic getCategory( Characteristic t ) { + Characteristic c = new Characteristic(); + c.setCategory( t.getCategory() ); + c.setCategoryUri( t.getCategoryUri() ); + return c; + } + /** * Compare a pair of ontology terms. */ diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java index ceae7d7e65..bf3298e5d6 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java @@ -696,7 +696,7 @@ public Map getCategoriesUsageFrequency( @Nullable Collecti return aggregateByCategory( result ).entrySet().stream() .sorted( Map.Entry.comparingByValue( Comparator.reverseOrder() ) ) .limit( maxResults ) - .collect( Collectors.toMap( Map.Entry::getKey, Map.Entry::getValue, ( a, b ) -> b, () -> new TreeMap<>( Characteristic.getByCategoryAndValueComparator() ) ) ); + .collect( Collectors.toMap( Map.Entry::getKey, Map.Entry::getValue ) ); } } else { //noinspection unchecked @@ -713,12 +713,7 @@ public Map getCategoriesUsageFrequency( @Nullable Collecti } private Map aggregateByCategory( List result ) { - TreeMap byC = new TreeMap<>( Characteristic.getByCategoryComparator() ); - for ( Object[] row : result ) { - byC.compute( Characteristic.Factory.newInstance( null, null, null, null, ( String ) row[0], ( String ) row[1], null ), - ( k, v ) -> v != null ? v + ( Long ) row[2] : ( Long ) row[2] ); - } - return byC; + return result.stream().collect( Collectors.groupingBy( row -> Characteristic.Factory.newInstance( null, null, null, null, ( String ) row[0], ( String ) row[1], null ), Collectors.summingLong( row -> ( Long ) row[2] ) ) ); } /** @@ -841,11 +836,11 @@ else if ( category.startsWith( "http://" ) ) { if ( eeIds.size() > MAX_PARAMETER_LIST_SIZE ) { result = listByBatch( q, "eeIds", eeIds, 2048 ); if ( minFrequency > 1 || maxResults > 0 ) { - return aggregate( result ).entrySet().stream() + return aggregateByCategoryAndValue( result ).entrySet().stream() .filter( e -> e.getValue() >= minFrequency || ( retainedTermUris != null && retainedTermUris.contains( e.getKey().getValueUri() ) ) ) .sorted( Map.Entry.comparingByValue( Comparator.reverseOrder() ) ) .limit( maxResults > 0 ? maxResults : Long.MAX_VALUE ) - .collect( Collectors.toMap( Map.Entry::getKey, Map.Entry::getValue, ( a, b ) -> b, () -> new TreeMap<>( Characteristic.getByCategoryAndValueComparator() ) ) ); + .collect( Collectors.toMap( Map.Entry::getKey, Map.Entry::getValue ) ); } } else { //noinspection unchecked @@ -857,15 +852,11 @@ else if ( category.startsWith( "http://" ) ) { //noinspection unchecked result = q.setMaxResults( maxResults ).list(); } - return aggregate( result ); + return aggregateByCategoryAndValue( result ); } - private Map aggregate( List result ) { - TreeMap byC = new TreeMap<>( Characteristic.getByCategoryAndValueComparator() ); - for ( Object[] row : result ) { - byC.compute( convertRowToCharacteristic( row ), ( k, v ) -> v != null ? v + ( Long ) row[5] : ( Long ) row[5] ); - } - return byC; + private Map aggregateByCategoryAndValue( List result ) { + return result.stream().collect( Collectors.groupingBy( this::convertRowToCharacteristic, Collectors.summingLong( row -> ( Long ) row[5] ) ) ); } private Characteristic convertRowToCharacteristic( Object[] row ) { diff --git a/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoTest.java b/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoTest.java index bec7448d37..5e16197ca1 100644 --- a/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoTest.java +++ b/gemma-core/src/test/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoTest.java @@ -16,6 +16,7 @@ import org.springframework.test.context.TestExecutionListeners; import ubic.gemma.core.util.test.BaseDatabaseTest; import ubic.gemma.model.common.description.Characteristic; +import ubic.gemma.model.common.description.CharacteristicUtils; import ubic.gemma.model.common.quantitationtype.*; import ubic.gemma.model.expression.arrayDesign.ArrayDesign; import ubic.gemma.model.expression.bioAssay.BioAssay; @@ -173,7 +174,7 @@ public void testGetOriginalPlatformUsageFrequency() { public void testGetCategoriesWithUsageFrequency() { Characteristic c = createCharacteristic( "foo", "foo", "bar", "bar" ); Assertions.assertThat( expressionExperimentDao.getCategoriesUsageFrequency( null, null, null, null, -1 ) ) - .containsEntry( c, 1L ); + .containsEntry( CharacteristicUtils.getCategory( c ), 1L ); } @Test From 481a39dd8f5ac0f56fdc6f198089c8c12465f1ff Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Mon, 25 Mar 2024 13:17:38 -0700 Subject: [PATCH 061/105] Add definitions for uncategorized, free-text, etc. in CharacteristicUtils --- .../description/CharacteristicUtils.java | 21 ++++++++++ .../ExpressionExperimentDaoImpl.java | 5 ++- .../description/CharacteristicUtilsTest.java | 40 ++++++++++++++++++- 3 files changed, 62 insertions(+), 4 deletions(-) diff --git a/gemma-core/src/main/java/ubic/gemma/model/common/description/CharacteristicUtils.java b/gemma-core/src/main/java/ubic/gemma/model/common/description/CharacteristicUtils.java index 4ce26f785a..ae323713a9 100644 --- a/gemma-core/src/main/java/ubic/gemma/model/common/description/CharacteristicUtils.java +++ b/gemma-core/src/main/java/ubic/gemma/model/common/description/CharacteristicUtils.java @@ -16,6 +16,27 @@ public static Characteristic getCategory( Characteristic t ) { return c; } + /** + * Check if the given characteristic is uncategorized. + */ + public static boolean isUncategorized( Characteristic c ) { + return c.getCategory() == null && c.getCategoryUri() == null; + } + + /** + * Check if the given characteristic has or is a free-text category. + */ + public static boolean isFreeTextCategory( Characteristic c ) { + return c.getCategory() != null && c.getCategoryUri() == null; + } + + /** + * Check if the given characteristic is a free-text value. + */ + public static boolean isFreeText( Characteristic c ) { + return c.getValue() != null && c.getValueUri() == null; + } + /** * Compare a pair of ontology terms. */ diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java index bf3298e5d6..e7fe5fe549 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java @@ -66,6 +66,7 @@ import static java.util.stream.Collectors.groupingBy; import static java.util.stream.Collectors.summingLong; +import static ubic.gemma.model.common.description.CharacteristicUtils.*; import static ubic.gemma.persistence.service.TableMaintenanceUtil.EE2AD_QUERY_SPACE; import static ubic.gemma.persistence.service.TableMaintenanceUtil.EE2C_QUERY_SPACE; import static ubic.gemma.persistence.util.QueryUtils.*; @@ -487,11 +488,11 @@ public Collection getAnnotationsByBioMaterials( for ( Characteristic c : raw ) { // filter. Could include this in the query if it isn't too complicated. - if ( c.getCategoryUri() == null ) { + if ( isUncategorized( c ) || isFreeTextCategory( c ) ) { continue; } - if ( c.getValueUri() == null ) { + if ( isFreeText( c ) ) { continue; } diff --git a/gemma-core/src/test/java/ubic/gemma/model/common/description/CharacteristicUtilsTest.java b/gemma-core/src/test/java/ubic/gemma/model/common/description/CharacteristicUtilsTest.java index 93e1c65cab..27feeef0ad 100644 --- a/gemma-core/src/test/java/ubic/gemma/model/common/description/CharacteristicUtilsTest.java +++ b/gemma-core/src/test/java/ubic/gemma/model/common/description/CharacteristicUtilsTest.java @@ -2,12 +2,39 @@ import org.junit.Test; -import static org.junit.Assert.assertEquals; +import static org.junit.Assert.*; +import static ubic.gemma.model.common.description.CharacteristicUtils.*; public class CharacteristicUtilsTest { @Test - public void test() { + public void testUncategorized() { + assertTrue( isUncategorized( createCharacteristic( null, null, null, null ) ) ); + assertFalse( isUncategorized( createCharacteristic( "a", null, null, null ) ) ); + } + + @Test + public void testIsFreeTextCategory() { + assertFalse( isFreeTextCategory( createCharacteristic( null, null, null, null ) ) ); + assertTrue( isFreeTextCategory( createCharacteristic( "a", null, null, null ) ) ); + } + + @Test + public void testIsFreeText() { + assertTrue( isFreeText( createCharacteristic( null, null, "foo", null ) ) ); + assertFalse( isFreeText( createCharacteristic( null, null, "foo", "bar" ) ) ); + } + + @Test + public void testEquals() { + assertTrue( CharacteristicUtils.equals( "a", "b", "a", "b" ) ); + assertTrue( CharacteristicUtils.equals( null, "b", "c", "b" ) ); + assertFalse( CharacteristicUtils.equals( null, "b", "c", "c" ) ); + assertTrue( CharacteristicUtils.equals( "A", null, "a", null ) ); + } + + @Test + public void testCompareTerm() { // terms with identical URIs are collapsed assertEquals( 0, CharacteristicUtils.compareTerm( "a", "test", "b", "test" ) ); // terms with different URIs are compared by label @@ -15,4 +42,13 @@ public void test() { assertEquals( 1, CharacteristicUtils.compareTerm( "b", "test", "a", "bar" ) ); } + private Characteristic createCharacteristic( String category, String categoryUri, String value, String valueUri ) { + Characteristic c = new Characteristic(); + c.setCategory( category ); + c.setCategoryUri( categoryUri ); + c.setValue( value ); + c.setValueUri( valueUri ); + return c; + } + } \ No newline at end of file From c8556a7a3e8f091f5bbe4c47035175486416bdd6 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Mon, 25 Mar 2024 14:27:06 -0700 Subject: [PATCH 062/105] Fix various font sizes (fix #1063) Use an absolute pixel font size in heatmaps as it must match the height of the row. Adjust the sequence textarea in composite sequence view to match that of the platform. --- gemma-web/src/main/webapp/pages/compositeSequence.detail.jsp | 2 +- .../scripts/api/entities/platform/SequenceDetailsPanel.js | 4 ++-- .../webapp/scripts/api/visualization/VisualizationWidget.js | 5 +++-- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/gemma-web/src/main/webapp/pages/compositeSequence.detail.jsp b/gemma-web/src/main/webapp/pages/compositeSequence.detail.jsp index 6dec611b30..4744eff37f 100644 --- a/gemma-web/src/main/webapp/pages/compositeSequence.detail.jsp +++ b/gemma-web/src/main/webapp/pages/compositeSequence.detail.jsp @@ -167,7 +167,7 @@ - No sequence diff --git a/gemma-web/src/main/webapp/scripts/api/entities/platform/SequenceDetailsPanel.js b/gemma-web/src/main/webapp/scripts/api/entities/platform/SequenceDetailsPanel.js index d62d772454..95a9d160de 100644 --- a/gemma-web/src/main/webapp/scripts/api/entities/platform/SequenceDetailsPanel.js +++ b/gemma-web/src/main/webapp/scripts/api/entities/platform/SequenceDetailsPanel.js @@ -105,8 +105,8 @@ Gemma.SequenceDetailsPanel = Ext html: { tag: 'div', html: seq.sequence, - cls: 'clob', - style: 'word-wrap: break-word;width:500px;height:100px;padding:4px;margin:3px;font-size:0.9em;font-family:monospace' + cls: 'clob smaller', + style: 'word-wrap: break-word;width:500px;height:100px;padding:4px;margin:3px;font-family:monospace' } }); } diff --git a/gemma-web/src/main/webapp/scripts/api/visualization/VisualizationWidget.js b/gemma-web/src/main/webapp/scripts/api/visualization/VisualizationWidget.js index 69cdd1be45..65654fc99e 100755 --- a/gemma-web/src/main/webapp/scripts/api/visualization/VisualizationWidget.js +++ b/gemma-web/src/main/webapp/scripts/api/visualization/VisualizationWidget.js @@ -169,9 +169,10 @@ Gemma.prepareProfiles = function( data, showPValues ) { pvalueLabel = sprintf( "%.2e ", pvalue ); } - var labelStyle = ''; + // use a fixed font size that matches the heatmap row height + var labelStyle = 'font-size: 12px'; if ( factor && factor < 2 ) { - labelStyle = "font-style:italic"; + labelStyle += ";font-style:italic"; // qtip = qtip + " [Not significant]"; } From 5718f86544ec9024194f349f18bdbd0b6aaed588 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Mon, 25 Mar 2024 15:53:55 -0700 Subject: [PATCH 063/105] Fix cases where a parent or children term does not have a label --- .../core/ontology/OntologyServiceImpl.java | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/gemma-core/src/main/java/ubic/gemma/core/ontology/OntologyServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/core/ontology/OntologyServiceImpl.java index 59b2069028..b3f28d918d 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/ontology/OntologyServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/core/ontology/OntologyServiceImpl.java @@ -33,10 +33,7 @@ import org.springframework.core.task.SimpleAsyncTaskExecutor; import org.springframework.core.task.TaskExecutor; import org.springframework.stereotype.Service; -import ubic.basecode.ontology.model.AnnotationProperty; -import ubic.basecode.ontology.model.OntologyProperty; -import ubic.basecode.ontology.model.OntologyTerm; -import ubic.basecode.ontology.model.OntologyTermSimple; +import ubic.basecode.ontology.model.*; import ubic.basecode.ontology.providers.ExperimentalFactorOntologyService; import ubic.basecode.ontology.providers.ObiService; import ubic.basecode.ontology.search.OntologySearch; @@ -415,6 +412,7 @@ private Set getParentsOrChildren( Collection terms, } } }, String.format( "%s %s of %d terms", direct ? "direct" : "all", parents ? "parents" : "children", terms.size() ) ); + if ( results.addAll( newResults ) && !direct ) { // there are new results (i.e. a term was inferred from a different ontology), we need to requery them // if they were not in the query @@ -427,6 +425,19 @@ private Set getParentsOrChildren( Collection terms, toQuery.clear(); } } + + // when an ontology returns a result without a label, it might be referring to another ontology, so we attempt + // to retrieve a results with a label as a replacement + Set resultsWithMissingLabels = results.stream() + .filter( t -> t.getLabel() == null ) + .map( OntologyResource::getUri ) + .collect( Collectors.toSet() ); + if ( !resultsWithMissingLabels.isEmpty() ) { + Set replacements = getTerms( resultsWithMissingLabels ); + results.removeAll( replacements ); + results.addAll( replacements ); + } + // drop terms without labels results.removeIf( t -> t.getLabel() == null ); return new HashSet<>( results ); From 617b02266f8612202b4839c4dd88a62475067868 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Mon, 25 Mar 2024 16:54:43 -0700 Subject: [PATCH 064/105] Allow complete access to Lucene's query syntax like it used to be --- .../search/source/HibernateSearchSource.java | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/gemma-core/src/main/java/ubic/gemma/core/search/source/HibernateSearchSource.java b/gemma-core/src/main/java/ubic/gemma/core/search/source/HibernateSearchSource.java index 2e49db42ea..8d3c1ec31b 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/search/source/HibernateSearchSource.java +++ b/gemma-core/src/main/java/ubic/gemma/core/search/source/HibernateSearchSource.java @@ -5,14 +5,16 @@ import org.apache.commons.lang3.time.StopWatch; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Document; +import org.apache.lucene.queryParser.ParseException; +import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.Query; import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.QueryScorer; +import org.apache.lucene.util.Version; import org.hibernate.SessionFactory; import org.hibernate.search.FullTextQuery; import org.hibernate.search.FullTextSession; import org.hibernate.search.Search; -import org.hibernate.search.query.dsl.QueryBuilder; import org.springframework.beans.factory.InitializingBean; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Component; @@ -163,13 +165,14 @@ public Collection> searchGeneSet( SearchSettings settings private Collection> searchFor( SearchSettings settings, Class clazz, String... fields ) throws HibernateSearchException { try { FullTextSession fullTextSession = Search.getFullTextSession( sessionFactory.getCurrentSession() ); - QueryBuilder queryBuilder = fullTextSession.getSearchFactory().buildQueryBuilder().forEntity( clazz ) - .get(); - Query query = queryBuilder.keyword() - .onFields( fields ) - .matching( settings.getQuery() ) - .createQuery(); Analyzer analyzer = analyzers.get( clazz ); + QueryParser queryParser = new QueryParser( Version.LUCENE_36, "", analyzer ); + Query query; + try { + query = queryParser.parse( settings.getQuery() ); + } catch ( ParseException e ) { + throw new org.hibernate.search.SearchException( e ); + } Highlighter highlighter = settings.getHighlighter() != null ? settings.getHighlighter().createLuceneHighlighter( new QueryScorer( query ) ) : null; String[] projection; if ( highlighter != null ) { From ee1716d36877c6b7dcddb7f63ee3519920e248c9 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Tue, 26 Mar 2024 10:01:04 -0700 Subject: [PATCH 065/105] Search in all fields --- .../gemma/core/search/DefaultHighlighter.java | 21 +++++++++++++------ .../ubic/gemma/core/search/Highlighter.java | 9 ++++---- .../search/source/HibernateSearchSource.java | 9 ++++---- .../model/common/search/SearchSettings.java | 4 ++-- .../source/OntologySearchSourceTest.java | 18 ++++++++-------- .../ubic/gemma/rest/DatasetsWebService.java | 11 +++------- .../ubic/gemma/rest/SearchWebService.java | 16 +++++++------- .../GeneralSearchControllerImpl.java | 4 ++-- 8 files changed, 47 insertions(+), 45 deletions(-) diff --git a/gemma-core/src/main/java/ubic/gemma/core/search/DefaultHighlighter.java b/gemma-core/src/main/java/ubic/gemma/core/search/DefaultHighlighter.java index 499aca3250..0a887702b7 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/search/DefaultHighlighter.java +++ b/gemma-core/src/main/java/ubic/gemma/core/search/DefaultHighlighter.java @@ -4,8 +4,8 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Fieldable; +import org.apache.lucene.search.highlight.Formatter; import org.apache.lucene.search.highlight.InvalidTokenOffsetsException; -import org.apache.lucene.search.highlight.QueryScorer; import ubic.gemma.core.search.lucene.SimpleHTMLFormatter; import javax.annotation.Nullable; @@ -13,26 +13,35 @@ import java.util.Collections; import java.util.HashMap; import java.util.Map; -import java.util.Set; @CommonsLog public class DefaultHighlighter implements Highlighter { + private final Formatter formatter; + + public DefaultHighlighter() { + this( new SimpleHTMLFormatter() ); + } + + public DefaultHighlighter( Formatter formatter ) { + this.formatter = formatter; + } + @Override public Map highlightTerm( @Nullable String termUri, String termLabel, String field ) { return Collections.emptyMap(); } @Override - public org.apache.lucene.search.highlight.Highlighter createLuceneHighlighter( QueryScorer queryScorer ) { - return new org.apache.lucene.search.highlight.Highlighter( new SimpleHTMLFormatter(), queryScorer ); + public Formatter getFormatter() { + return formatter; } @Override - public Map highlightDocument( Document document, org.apache.lucene.search.highlight.Highlighter highlighter, Analyzer analyzer, Set fields ) { + public Map highlightDocument( Document document, org.apache.lucene.search.highlight.Highlighter highlighter, Analyzer analyzer ) { Map highlights = new HashMap<>(); for ( Fieldable field : document.getFields() ) { - if ( !field.isTokenized() || field.isBinary() || !fields.contains( field.name() ) ) { + if ( !field.isTokenized() || field.isBinary() ) { continue; } try { diff --git a/gemma-core/src/main/java/ubic/gemma/core/search/Highlighter.java b/gemma-core/src/main/java/ubic/gemma/core/search/Highlighter.java index ff170ec51a..e3b3ad2d59 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/search/Highlighter.java +++ b/gemma-core/src/main/java/ubic/gemma/core/search/Highlighter.java @@ -2,7 +2,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Document; -import org.apache.lucene.search.highlight.QueryScorer; +import org.apache.lucene.search.highlight.Formatter; import javax.annotation.Nullable; import java.util.Map; @@ -26,13 +26,12 @@ public interface Highlighter { Map highlightTerm( @Nullable String termUri, String termLabel, String field ); /** - * Obtain a highlighter for Lucene hits to be used with {@link #highlightDocument(Document, org.apache.lucene.search.highlight.Highlighter, Analyzer, Set)}. + * Obtain a formatter for highlights. */ - @Nullable - org.apache.lucene.search.highlight.Highlighter createLuceneHighlighter( QueryScorer queryScorer ); + Formatter getFormatter(); /** * Highlight a given Lucene document. */ - Map highlightDocument( Document document, org.apache.lucene.search.highlight.Highlighter highlighter, Analyzer analyzer, Set fields ); + Map highlightDocument( Document document, org.apache.lucene.search.highlight.Highlighter highlighter, Analyzer analyzer ); } diff --git a/gemma-core/src/main/java/ubic/gemma/core/search/source/HibernateSearchSource.java b/gemma-core/src/main/java/ubic/gemma/core/search/source/HibernateSearchSource.java index 8d3c1ec31b..466d09f6b2 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/search/source/HibernateSearchSource.java +++ b/gemma-core/src/main/java/ubic/gemma/core/search/source/HibernateSearchSource.java @@ -5,6 +5,7 @@ import org.apache.commons.lang3.time.StopWatch; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Document; +import org.apache.lucene.queryParser.MultiFieldQueryParser; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.Query; @@ -166,14 +167,14 @@ private Collection> searchFor( SearchSe try { FullTextSession fullTextSession = Search.getFullTextSession( sessionFactory.getCurrentSession() ); Analyzer analyzer = analyzers.get( clazz ); - QueryParser queryParser = new QueryParser( Version.LUCENE_36, "", analyzer ); + QueryParser queryParser = new MultiFieldQueryParser( Version.LUCENE_36, fields, analyzer ); Query query; try { query = queryParser.parse( settings.getQuery() ); } catch ( ParseException e ) { throw new org.hibernate.search.SearchException( e ); } - Highlighter highlighter = settings.getHighlighter() != null ? settings.getHighlighter().createLuceneHighlighter( new QueryScorer( query ) ) : null; + Highlighter highlighter = settings.getHighlighter() != null ? new Highlighter( settings.getHighlighter().getFormatter(), new QueryScorer( query ) ) : null; String[] projection; if ( highlighter != null ) { projection = new String[] { settings.isFillResults() ? FullTextQuery.THIS : FullTextQuery.ID, FullTextQuery.SCORE, FullTextQuery.DOCUMENT }; @@ -213,9 +214,9 @@ private SearchResult searchResultFromRow( Object[] r // this happens if an entity is still in the cache, but was removed from the database return null; } - return SearchResult.from( clazz, entity, ( Float ) row[1], highlighter != null ? settings.highlightDocument( ( Document ) row[2], highlighter, analyzer, fields ) : null, "hibernateSearch" ); + return SearchResult.from( clazz, entity, ( Float ) row[1], highlighter != null ? settings.highlightDocument( ( Document ) row[2], highlighter, analyzer ) : null, "hibernateSearch" ); } else { - return SearchResult.from( clazz, ( Long ) row[0], ( Float ) row[1], highlighter != null ? settings.highlightDocument( ( Document ) row[2], highlighter, analyzer, fields ) : null, "hibernateSearch" ); + return SearchResult.from( clazz, ( Long ) row[0], ( Float ) row[1], highlighter != null ? settings.highlightDocument( ( Document ) row[2], highlighter, analyzer ) : null, "hibernateSearch" ); } } } diff --git a/gemma-core/src/main/java/ubic/gemma/model/common/search/SearchSettings.java b/gemma-core/src/main/java/ubic/gemma/model/common/search/SearchSettings.java index 4e48b6f29f..d437f5579b 100644 --- a/gemma-core/src/main/java/ubic/gemma/model/common/search/SearchSettings.java +++ b/gemma-core/src/main/java/ubic/gemma/model/common/search/SearchSettings.java @@ -289,8 +289,8 @@ public Map highlightTerm( String termUri, String termLabel, Stri } @Nullable - public Map highlightDocument( Document document, org.apache.lucene.search.highlight.Highlighter luceneHighlighter, Analyzer analyzer, Set fields ) { - return highlighter != null ? highlighter.highlightDocument( document, luceneHighlighter, analyzer, fields ) : null; + public Map highlightDocument( Document document, org.apache.lucene.search.highlight.Highlighter luceneHighlighter, Analyzer analyzer ) { + return highlighter != null ? highlighter.highlightDocument( document, luceneHighlighter, analyzer ) : null; } @Override diff --git a/gemma-core/src/test/java/ubic/gemma/core/search/source/OntologySearchSourceTest.java b/gemma-core/src/test/java/ubic/gemma/core/search/source/OntologySearchSourceTest.java index 31aa3f3972..cbec7873b2 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/search/source/OntologySearchSourceTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/search/source/OntologySearchSourceTest.java @@ -2,7 +2,8 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Document; -import org.apache.lucene.search.highlight.QueryScorer; +import org.apache.lucene.search.highlight.Formatter; +import org.apache.lucene.search.highlight.SimpleHTMLFormatter; import org.junit.After; import org.junit.Test; import org.springframework.beans.factory.annotation.Autowired; @@ -18,6 +19,7 @@ import ubic.gemma.core.search.SearchException; import ubic.gemma.core.search.SearchResult; import ubic.gemma.core.search.SearchSource; +import ubic.gemma.core.search.lucene.SimpleMarkdownFormatter; import ubic.gemma.model.common.search.SearchSettings; import ubic.gemma.model.expression.experiment.ExpressionExperiment; import ubic.gemma.persistence.service.common.description.CharacteristicService; @@ -89,14 +91,13 @@ public Map highlightTerm( @Nullable String termUri, String termL return Collections.singletonMap( field, termUri != null ? String.format( "[%s](%s)", termLabel, termUri ) : termLabel ); } - @Nullable @Override - public org.apache.lucene.search.highlight.Highlighter createLuceneHighlighter( QueryScorer queryScorer ) { - return null; + public Formatter getFormatter() { + return new SimpleHTMLFormatter(); } @Override - public Map highlightDocument( Document document, org.apache.lucene.search.highlight.Highlighter highlighter, Analyzer analyzer, Set fields ) { + public Map highlightDocument( Document document, org.apache.lucene.search.highlight.Highlighter highlighter, Analyzer analyzer ) { return Collections.emptyMap(); } } ) ); @@ -126,14 +127,13 @@ public Map highlightTerm( @Nullable String termUri, String termL return Collections.singletonMap( field, termUri != null ? String.format( "[%s](%s)", termLabel, termUri ) : termLabel ); } - @Nullable @Override - public org.apache.lucene.search.highlight.Highlighter createLuceneHighlighter( QueryScorer queryScorer ) { - return null; + public Formatter getFormatter() { + return new SimpleMarkdownFormatter(); } @Override - public Map highlightDocument( Document document, org.apache.lucene.search.highlight.Highlighter highlighter, Analyzer analyzer, Set fields ) { + public Map highlightDocument( Document document, org.apache.lucene.search.highlight.Highlighter highlighter, Analyzer analyzer ) { return Collections.emptyMap(); } } ) ); diff --git a/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java b/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java index b06285da96..1123c447db 100644 --- a/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java +++ b/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java @@ -27,7 +27,6 @@ import org.apache.commons.io.FilenameUtils; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Document; -import org.apache.lucene.search.highlight.QueryScorer; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.security.access.annotation.Secured; import org.springframework.stereotype.Service; @@ -138,6 +137,7 @@ private class Highlighter extends DefaultHighlighter { private final Set documentIdsToHighlight; private Highlighter( Set documentIdsToHighlight ) { + super( new SimpleMarkdownFormatter() ); this.documentIdsToHighlight = documentIdsToHighlight; } @@ -156,18 +156,13 @@ public Map highlightTerm( @Nullable String termUri, String termL } @Override - public org.apache.lucene.search.highlight.Highlighter createLuceneHighlighter( QueryScorer queryScorer ) { - return new org.apache.lucene.search.highlight.Highlighter( new SimpleMarkdownFormatter(), queryScorer ); - } - - @Override - public Map highlightDocument( Document document, org.apache.lucene.search.highlight.Highlighter highlighter, Analyzer analyzer, Set fields ) { + public Map highlightDocument( Document document, org.apache.lucene.search.highlight.Highlighter highlighter, Analyzer analyzer ) { long id = Long.parseLong( document.get( "id" ) ); // TODO: maybe use a filter in the Lucene query? if ( !documentIdsToHighlight.contains( id ) ) { return Collections.emptyMap(); } - return super.highlightDocument( document, highlighter, analyzer, fields ); + return super.highlightDocument( document, highlighter, analyzer ); } } diff --git a/gemma-rest/src/main/java/ubic/gemma/rest/SearchWebService.java b/gemma-rest/src/main/java/ubic/gemma/rest/SearchWebService.java index 94295d6842..dea9c349a1 100644 --- a/gemma-rest/src/main/java/ubic/gemma/rest/SearchWebService.java +++ b/gemma-rest/src/main/java/ubic/gemma/rest/SearchWebService.java @@ -11,7 +11,6 @@ import org.apache.commons.lang3.exception.ExceptionUtils; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Document; -import org.apache.lucene.search.highlight.QueryScorer; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.http.MediaType; import org.springframework.stereotype.Service; @@ -24,6 +23,7 @@ import ubic.gemma.model.IdentifiableValueObject; import ubic.gemma.model.common.Identifiable; import ubic.gemma.model.common.description.BibliographicReferenceValueObject; +import ubic.gemma.model.common.description.CharacteristicValueObject; import ubic.gemma.model.common.search.SearchSettings; import ubic.gemma.model.expression.arrayDesign.ArrayDesignValueObject; import ubic.gemma.model.expression.designElement.CompositeSequenceValueObject; @@ -32,7 +32,6 @@ import ubic.gemma.model.genome.TaxonValueObject; import ubic.gemma.model.genome.gene.GeneSetValueObject; import ubic.gemma.model.genome.gene.GeneValueObject; -import ubic.gemma.model.common.description.CharacteristicValueObject; import ubic.gemma.model.genome.sequenceAnalysis.BioSequenceValueObject; import ubic.gemma.persistence.service.expression.arrayDesign.ArrayDesignService; import ubic.gemma.persistence.service.genome.taxon.TaxonService; @@ -99,6 +98,10 @@ private class Highlighter extends DefaultHighlighter { private int highlightedDocuments = 0; + public Highlighter() { + super( new SimpleMarkdownFormatter() ); + } + @Override public Map highlightTerm( @Nullable String uri, String label, String field ) { String searchUrl = ServletUriComponentsBuilder.fromRequest( request ) @@ -110,17 +113,12 @@ public Map highlightTerm( @Nullable String uri, String label, St } @Override - public org.apache.lucene.search.highlight.Highlighter createLuceneHighlighter( QueryScorer queryScorer ) { - return new org.apache.lucene.search.highlight.Highlighter( new SimpleMarkdownFormatter(), queryScorer ); - } - - @Override - public Map highlightDocument( Document document, org.apache.lucene.search.highlight.Highlighter highlighter, Analyzer analyzer, Set fields ) { + public Map highlightDocument( Document document, org.apache.lucene.search.highlight.Highlighter highlighter, Analyzer analyzer ) { if ( highlightedDocuments >= MAX_HIGHLIGHTED_DOCUMENTS ) { return Collections.emptyMap(); } highlightedDocuments++; - return super.highlightDocument( document, highlighter, analyzer, fields ); + return super.highlightDocument( document, highlighter, analyzer ); } } diff --git a/gemma-web/src/main/java/ubic/gemma/web/controller/GeneralSearchControllerImpl.java b/gemma-web/src/main/java/ubic/gemma/web/controller/GeneralSearchControllerImpl.java index ef76b61561..d3b58c6cab 100644 --- a/gemma-web/src/main/java/ubic/gemma/web/controller/GeneralSearchControllerImpl.java +++ b/gemma-web/src/main/java/ubic/gemma/web/controller/GeneralSearchControllerImpl.java @@ -202,12 +202,12 @@ public Map highlightTerm( @Nullable String uri, String value, St } @Override - public Map highlightDocument( Document document, org.apache.lucene.search.highlight.Highlighter highlighter, Analyzer analyzer, Set fields ) { + public Map highlightDocument( Document document, org.apache.lucene.search.highlight.Highlighter highlighter, Analyzer analyzer ) { if ( highlightedDocuments >= MAX_HIGHLIGHTED_DOCUMENTS ) { return Collections.emptyMap(); } highlightedDocuments++; - return super.highlightDocument( document, highlighter, analyzer, fields ) + return super.highlightDocument( document, highlighter, analyzer ) .entrySet().stream() .collect( Collectors.toMap( e -> localizeField( StringUtils.substringAfterLast( document.get( "_hibernate_class" ), '.' ), e.getKey() ), Map.Entry::getValue, ( a, b ) -> b ) ); } From 838856e2b322b809dba4487440ff4bd729f90f11 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Tue, 26 Mar 2024 10:15:43 -0700 Subject: [PATCH 066/105] cli: Fix parsing and declaration of numerical parameters --- .../ubic/gemma/core/apps/ArrayDesignBlatCli.java | 4 ++-- .../gemma/core/apps/ArrayDesignProbeMapperCli.java | 12 ++++++------ .../java/ubic/gemma/core/apps/LinkAnalysisCli.java | 4 ++-- .../main/java/ubic/gemma/core/util/AbstractCLI.java | 4 ++-- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/ArrayDesignBlatCli.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/ArrayDesignBlatCli.java index c0c6075aa2..d25cfc1d96 100644 --- a/gemma-cli/src/main/java/ubic/gemma/core/apps/ArrayDesignBlatCli.java +++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/ArrayDesignBlatCli.java @@ -73,7 +73,7 @@ protected void buildOptions( Options options ) { .desc( "Threshold (0-1.0) for acceptance of BLAT alignments [Default = " + this.blatScoreThreshold + "]" ) .longOpt( "scoreThresh" ) - .type( Double.class ) + .type( Number.class ) .build(); options.addOption( Option.builder( "sensitive" ).desc( "Run on more sensitive server, if available" ).build() ); @@ -107,7 +107,7 @@ protected void processOptions( CommandLine commandLine ) throws ParseException { // } if ( commandLine.hasOption( 's' ) ) { - this.blatScoreThreshold = ( Double ) commandLine.getParsedOptionValue( 's' ); + this.blatScoreThreshold = ( ( Number ) commandLine.getParsedOptionValue( 's' ) ).doubleValue(); } TaxonService taxonService = this.getBean( TaxonService.class ); diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/ArrayDesignProbeMapperCli.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/ArrayDesignProbeMapperCli.java index 7f0ba64765..2bc30977cd 100644 --- a/gemma-cli/src/main/java/ubic/gemma/core/apps/ArrayDesignProbeMapperCli.java +++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/ArrayDesignProbeMapperCli.java @@ -87,17 +87,17 @@ protected void buildOptions( Options options ) { super.buildOptions( options ); options.addOption( Option.builder( "i" ).hasArg().argName( "value" ) - .type( Double.class ) + .type( Number.class ) .desc( "Sequence identity threshold, default = " + ProbeMapperConfig.DEFAULT_IDENTITY_THRESHOLD ) .longOpt( "identityThreshold" ).build() ); options.addOption( Option.builder( "s" ).hasArg().argName( "value" ) - .type( Double.class ) + .type( Number.class ) .desc( "Blat score threshold, default = " + ProbeMapperConfig.DEFAULT_SCORE_THRESHOLD ) .longOpt( "scoreThreshold" ).build() ); options.addOption( Option.builder( "o" ).hasArg().argName( "value" ) - .type( Double.class ) + .type( Number.class ) .desc( "Minimum fraction of probe overlap with exons, default = " + ProbeMapperConfig.DEFAULT_MINIMUM_EXON_OVERLAP_FRACTION ) .longOpt( "overlapThreshold" ) .build() ); @@ -234,7 +234,7 @@ protected void processOptions( CommandLine commandLine ) throws ParseException { } if ( commandLine.hasOption( 's' ) ) { - blatScoreThreshold = ( Double ) commandLine.getParsedOptionValue( 's' ); + blatScoreThreshold = ( ( Number ) commandLine.getParsedOptionValue( 's' ) ).doubleValue(); if ( blatScoreThreshold < 0 || blatScoreThreshold > 1 ) { throw new IllegalArgumentException( "BLAT score threshold must be between 0 and 1" ); } @@ -249,14 +249,14 @@ protected void processOptions( CommandLine commandLine ) throws ParseException { this.mirnaOnlyModeOption = commandLine.hasOption( ArrayDesignProbeMapperCli.MIRNA_ONLY_MODE_OPTION ); if ( commandLine.hasOption( 'i' ) ) { - identityThreshold = ( Double ) commandLine.getParsedOptionValue( 'i' ); + identityThreshold = ( ( Number ) commandLine.getParsedOptionValue( 'i' ) ).doubleValue(); if ( identityThreshold < 0 || identityThreshold > 1 ) { throw new IllegalArgumentException( "Identity threshold must be between 0 and 1" ); } } if ( commandLine.hasOption( 'o' ) ) { - overlapThreshold = ( Double ) commandLine.getParsedOptionValue( 'o' ); + overlapThreshold = ( ( Number ) commandLine.getParsedOptionValue( 'o' ) ).doubleValue(); if ( overlapThreshold < 0 || overlapThreshold > 1 ) { throw new IllegalArgumentException( "Overlap threshold must be between 0 and 1" ); } diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/LinkAnalysisCli.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/LinkAnalysisCli.java index 7f965191b3..8990039f6b 100644 --- a/gemma-cli/src/main/java/ubic/gemma/core/apps/LinkAnalysisCli.java +++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/LinkAnalysisCli.java @@ -291,7 +291,7 @@ protected void buildOptions( Options options ) { .build(); options.addOption( chooseCutOption ); - options.addOption( Option.builder( "probeDegreeLim" ).hasArg().type( Integer.class ).build() ); + options.addOption( Option.builder( "probeDegreeLim" ).hasArg().type( Number.class ).build() ); // finer-grained control is possible, of course. Option skipQC = Option.builder( "noqc" ) @@ -427,7 +427,7 @@ protected void processOptions( CommandLine commandLine ) throws ParseException { } if ( commandLine.hasOption( "probeDegreeLim" ) ) { - this.linkAnalysisConfig.setProbeDegreeThreshold( ( Integer ) commandLine.getParsedOptionValue( "probeDegreeLim" ) ); + this.linkAnalysisConfig.setProbeDegreeThreshold( ( ( Number ) commandLine.getParsedOptionValue( "probeDegreeLim" ) ).intValue() ); } } diff --git a/gemma-cli/src/main/java/ubic/gemma/core/util/AbstractCLI.java b/gemma-cli/src/main/java/ubic/gemma/core/util/AbstractCLI.java index 2dff2278ea..ab56b5e2da 100644 --- a/gemma-cli/src/main/java/ubic/gemma/core/util/AbstractCLI.java +++ b/gemma-cli/src/main/java/ubic/gemma/core/util/AbstractCLI.java @@ -273,7 +273,7 @@ protected void addDateOption( Options options ) { protected void addThreadsOption( Options options ) { options.addOption( Option.builder( THREADS_OPTION ).argName( "numThreads" ).hasArg() .desc( "Number of threads to use for batch processing." ) - .type( Integer.class ) + .type( Number.class ) .build() ); } @@ -349,7 +349,7 @@ protected void processStandardOptions( CommandLine commandLine ) throws ParseExc this.autoSeek = commandLine.hasOption( AbstractCLI.AUTO_OPTION_NAME ); if ( commandLine.hasOption( THREADS_OPTION ) ) { - this.numThreads = ( Integer ) commandLine.getParsedOptionValue( THREADS_OPTION ); + this.numThreads = ( ( Number ) commandLine.getParsedOptionValue( THREADS_OPTION ) ).intValue(); if ( this.numThreads < 1 ) { throw new IllegalArgumentException( "Number of threads must be greater than 1." ); } From 9f75731a8489cc645dbe8a8fe17eacc982cb8b5d Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Tue, 26 Mar 2024 10:32:17 -0700 Subject: [PATCH 067/105] Improve document indexing --- .../ubic/gemma/core/apps/IndexGemmaCLI.java | 30 +++++++++++-------- .../gemma/core/search/IndexerService.java | 18 +++++++---- .../gemma/core/search/IndexerServiceImpl.java | 28 +++++++++++++---- .../tasks/maintenance/IndexerTaskImpl.java | 2 +- 4 files changed, 53 insertions(+), 25 deletions(-) diff --git a/gemma-cli/src/main/java/ubic/gemma/core/apps/IndexGemmaCLI.java b/gemma-cli/src/main/java/ubic/gemma/core/apps/IndexGemmaCLI.java index 94a9e68c4c..22608950af 100644 --- a/gemma-cli/src/main/java/ubic/gemma/core/apps/IndexGemmaCLI.java +++ b/gemma-cli/src/main/java/ubic/gemma/core/apps/IndexGemmaCLI.java @@ -4,6 +4,7 @@ import org.apache.commons.cli.Options; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Value; +import org.springframework.stereotype.Component; import ubic.gemma.core.search.IndexerService; import ubic.gemma.core.util.AbstractCLI; import ubic.gemma.model.analysis.expression.ExpressionExperimentSet; @@ -21,22 +22,21 @@ import java.util.Set; import java.util.stream.Collectors; +@Component public class IndexGemmaCLI extends AbstractCLI { - private static final String THREADS_OPTION = "threads"; - /** * A list of all searchable entities this CLI supports. */ private static final IndexableEntity[] indexableEntities = { - new IndexableEntity( "g", "genes", Gene.class ), - new IndexableEntity( "e", "datasets", ExpressionExperiment.class ), - new IndexableEntity( "a", "platforms", ArrayDesign.class ), - new IndexableEntity( "b", "bibliographic references", BibliographicReference.class ), - new IndexableEntity( "s", "probes", CompositeSequence.class ), - new IndexableEntity( "q", "sequences", BioSequence.class ), - new IndexableEntity( "x", "datasets groups", ExpressionExperimentSet.class ), - new IndexableEntity( "y", "gene sets", GeneSet.class ) + new IndexableEntity( "g", "genes", Gene.class, 1000 ), + new IndexableEntity( "e", "datasets", ExpressionExperiment.class, 100 ), + new IndexableEntity( "a", "platforms", ArrayDesign.class, 10 ), + new IndexableEntity( "b", "bibliographic references", BibliographicReference.class, 1000 ), + new IndexableEntity( "s", "probes", CompositeSequence.class, 1000 ), + new IndexableEntity( "q", "sequences", BioSequence.class, 1000 ), + new IndexableEntity( "x", "datasets groups", ExpressionExperimentSet.class, 10 ), + new IndexableEntity( "y", "gene sets", GeneSet.class, 10 ) }; @lombok.Value @@ -44,6 +44,7 @@ private static class IndexableEntity { String option; String description; Class clazz; + int loggingFrequency; } @Autowired @@ -53,7 +54,6 @@ private static class IndexableEntity { private File searchDir; private final Set> classesToIndex = new HashSet<>(); - private int numThreads; @Override public String getCommandName() { @@ -80,23 +80,27 @@ protected void buildOptions( Options options ) { @Override protected void processOptions( CommandLine commandLine ) { + int loggingFrequency = 1000; for ( IndexableEntity ie : indexableEntities ) { if ( commandLine.hasOption( ie.option ) ) { classesToIndex.add( ie.clazz ); + loggingFrequency = Math.min( loggingFrequency, ie.loggingFrequency ); } } + indexerService.setLoggingFrequency( loggingFrequency ); + indexerService.setNumThreads( getNumThreads() ); } @Override protected void doWork() throws Exception { if ( classesToIndex.isEmpty() ) { log.info( String.format( "All entities will be indexed under %s.", searchDir.getAbsolutePath() ) ); - indexerService.index( getNumThreads() ); + indexerService.index(); } else { log.info( String.format( "The following entities will be indexed under %s:\n\t%s", searchDir.getAbsolutePath(), classesToIndex.stream().map( Class::getName ).collect( Collectors.joining( "\n\t" ) ) ) ); - indexerService.index( classesToIndex, getNumThreads() ); + indexerService.index( classesToIndex ); } } } diff --git a/gemma-core/src/main/java/ubic/gemma/core/search/IndexerService.java b/gemma-core/src/main/java/ubic/gemma/core/search/IndexerService.java index ab8da73119..b270d61482 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/search/IndexerService.java +++ b/gemma-core/src/main/java/ubic/gemma/core/search/IndexerService.java @@ -12,14 +12,22 @@ public interface IndexerService { /** * Index all the searchable entities. - * @param numThreads number of threads to use for loading and indexing */ - void index( int numThreads ); + void index(); /** - * Index all the given classes. + * Index the given classes. * @param classesToIndex a set of classes to index - * @param numThreads number of threads to use for loading and indexing */ - void index( Set> classesToIndex, int numThreads ); + void index( Set> classesToIndex ); + + /** + * Set the number of threads to use for indexing entities. + */ + void setNumThreads( int numThreads ); + + /** + * Set the logging frequency for reporting progress. + */ + void setLoggingFrequency( int loggingFrequency ); } diff --git a/gemma-core/src/main/java/ubic/gemma/core/search/IndexerServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/core/search/IndexerServiceImpl.java index 37c8e8e9f4..86d3afc2b9 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/search/IndexerServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/core/search/IndexerServiceImpl.java @@ -6,6 +6,7 @@ import org.hibernate.search.impl.SimpleIndexingProgressMonitor; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; +import org.springframework.util.Assert; import ubic.gemma.model.common.Identifiable; import java.util.Set; @@ -16,25 +17,40 @@ public class IndexerServiceImpl implements IndexerService { @Autowired private SessionFactory sessionFactory; + private int numThreads = 1; + private int loggingFrequency = 10000; + @Override - public void index( int numThreads ) { - doIndex( new Class[0], numThreads ); + public void index() { + doIndex( new Class[0] ); } @Override - public void index( Set> classesToIndex, int numThreads ) { + public void index( Set> classesToIndex ) { if ( classesToIndex.isEmpty() ) { return; } - doIndex( classesToIndex.toArray( new Class[0] ), numThreads ); + doIndex( classesToIndex.toArray( new Class[0] ) ); + } + + @Override + public void setNumThreads( int numThreads ) { + Assert.isTrue( numThreads > 0, "The number of threads must be strictly positive." ); + this.numThreads = numThreads; + } + + @Override + public void setLoggingFrequency( int loggingFrequency ) { + Assert.isTrue( loggingFrequency > 0, "The logging frequency must be strictly positive." ); + this.loggingFrequency = loggingFrequency; } - private void doIndex( Class[] classesToIndex, int numThreads ) { + private void doIndex( Class[] classesToIndex ) { FullTextSession fullTextSession = Search.getFullTextSession( sessionFactory.openSession() ); try { fullTextSession.createIndexer( classesToIndex ) .threadsToLoadObjects( numThreads ) - .progressMonitor( new SimpleIndexingProgressMonitor( 10000 ) ) + .progressMonitor( new SimpleIndexingProgressMonitor( loggingFrequency ) ) .startAndWait(); } catch ( InterruptedException e ) { Thread.currentThread().interrupt(); diff --git a/gemma-core/src/main/java/ubic/gemma/core/tasks/maintenance/IndexerTaskImpl.java b/gemma-core/src/main/java/ubic/gemma/core/tasks/maintenance/IndexerTaskImpl.java index 248a04cf56..2a5a204e29 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/tasks/maintenance/IndexerTaskImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/core/tasks/maintenance/IndexerTaskImpl.java @@ -53,7 +53,7 @@ public TaskResult call() throws Exception { if ( taskCommand.isIndexGeneSet() ) { classesToIndex.add( GeneSet.class ); } - indexerService.index( classesToIndex, 4 ); + indexerService.index( classesToIndex ); return new TaskResult( taskCommand, null ); } } From 0f3c6aa77e7f7707c3df77f3a71ff66e2d72f6d0 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Tue, 26 Mar 2024 10:55:24 -0700 Subject: [PATCH 068/105] Add a scheduled job for updating EEs --- .../ubic/gemma/applicationContext-schedule.xml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/gemma-web/src/main/resources/ubic/gemma/applicationContext-schedule.xml b/gemma-web/src/main/resources/ubic/gemma/applicationContext-schedule.xml index 5e3288ad8c..2c2af0c487 100644 --- a/gemma-web/src/main/resources/ubic/gemma/applicationContext-schedule.xml +++ b/gemma-web/src/main/resources/ubic/gemma/applicationContext-schedule.xml @@ -27,6 +27,7 @@ + @@ -171,6 +172,22 @@ + + + + + + + + ubic.gemma.model.expression.experiment.ExpressionExperiment + + + + + + + + - + + ${skipIntegrationTests} diff --git a/gemma-core/src/main/resources/sql/h2/init-entities.sql b/gemma-core/src/main/resources/sql/h2/init-entities.sql new file mode 100644 index 0000000000..9d99a2ff52 --- /dev/null +++ b/gemma-core/src/main/resources/sql/h2/init-entities.sql @@ -0,0 +1,15 @@ +alter table CHARACTERISTIC + add index CHARACTERISTIC_VALUE_URI_VALUE (VALUE_URI, `VALUE`); +alter table CHARACTERISTIC + add index CHARACTERISTIC_CATEGORY_URI_CATEGORY_VALUE_URI_VALUE (CATEGORY_URI, CATEGORY, VALUE_URI, `VALUE`); +alter table CHARACTERISTIC + add index CHARACTERISTIC_PREDICATE_URI_PREDICATE (PREDICATE_URI, PREDICATE); +alter table CHARACTERISTIC + add index CHARACTERISTIC_OBJECT_URI_OBJECT (OBJECT_URI, OBJECT); +alter table CHARACTERISTIC + add index CHARACTERISTIC_SECOND_PREDICATE_URI_SECOND_PREDICATE (SECOND_PREDICATE_URI, SECOND_PREDICATE); +alter table CHARACTERISTIC + add index CHARACTERISTIC_SECOND_OBJECT_URI_SECOND_OBJECT (SECOND_OBJECT_URI, SECOND_OBJECT); + +create index EE2C_VALUE_URI_VALUE on EXPRESSION_EXPERIMENT2CHARACTERISTIC (VALUE_URI, `VALUE`); +create index EE2C_CATEGORY_URI_CATEGORY_VALUE_URI_VALUE on EXPRESSION_EXPERIMENT2CHARACTERISTIC (CATEGORY_URI, CATEGORY, VALUE_URI, `VALUE`); diff --git a/gemma-core/src/main/resources/sql/init-data.sql b/gemma-core/src/main/resources/sql/init-data.sql new file mode 100644 index 0000000000..2786ab788d --- /dev/null +++ b/gemma-core/src/main/resources/sql/init-data.sql @@ -0,0 +1,114 @@ +-- Initialize the database with some scraps of data. See also init-entities.sql and init-acls.sql + +-- all of these are used. +insert into AUDIT_TRAIL VALUES (1); +insert into AUDIT_TRAIL VALUES (2); +insert into AUDIT_TRAIL VALUES (3); + +set @n:=now(); + +-- username=gemmaAgent: id = 2, password = 'XXXXXXXX', audit trail #2, using salt={username} +insert into CONTACT (ID, CLASS, NAME, LAST_NAME, USER_NAME, PASSWORD, ENABLED, EMAIL, PASSWORD_HINT) values (2, 'User', 'gemmaAgent', '', 'gemmaAgent', '2db458c67b4b52bba0184611c302c9c174ce8de4', 1, 'pavlab-support@msl.ubc.ca', 'hint'); + +-- username=administrator: id = 1, password = 'administrator', audit trail #1 using salt=username ('administrator') +insert into CONTACT (ID, CLASS, NAME, LAST_NAME, USER_NAME, PASSWORD, ENABLED, EMAIL, PASSWORD_HINT) values (1, 'User', 'administrator', '', 'administrator', 'b7338dcc17d6b6c199a75540aab6d0506567b980', 1, 'pavlab-support@msl.ubc.ca', 'hint'); + +-- initialize the audit trails +insert into AUDIT_EVENT VALUES (1, @n, 'C', 'From init script', '', 1, NULL, 1); +insert into AUDIT_EVENT VALUES (2, @n, 'C', 'From init script', '', 1, NULL, 2); +insert into AUDIT_EVENT VALUES (3, @n, 'C', 'From init script', '', 1, NULL, 3); + + +-- Note that 'Administrators' is a constant set in AuthorityConstants. The names of these groups are defined in UserGroupDao. +insert into USER_GROUP (ID, NAME, DESCRIPTION, AUDIT_TRAIL_FK) VALUES (1, 'Administrators', 'Users with administrative rights', 1); +insert into USER_GROUP (ID, NAME, DESCRIPTION, AUDIT_TRAIL_FK) VALUES (2, 'Users', 'Default group for all authenticated users', 2); +insert into USER_GROUP (ID, NAME, DESCRIPTION, AUDIT_TRAIL_FK) VALUES (3, 'Agents', 'For \'autonomous\' agents that run within the server context, such as scheduled tasks.', 3); +insert into GROUP_AUTHORITY (ID, AUTHORITY, GROUP_FK) VALUES (1, 'ADMIN', 1); +insert into GROUP_AUTHORITY (ID, AUTHORITY, GROUP_FK) VALUES (2, 'USER', 2); +insert into GROUP_AUTHORITY (ID, AUTHORITY, GROUP_FK) VALUES (3, 'AGENT', 3); + +-- make admin in the admin group +insert into GROUP_MEMBERS (USER_GROUPS_FK, GROUP_MEMBERS_FK) VALUES (1, 1); + +-- add admin to the user group (note that there is no need for a corresponding ACL entry) +insert into GROUP_MEMBERS (USER_GROUPS_FK, GROUP_MEMBERS_FK) VALUES (2, 1); + +-- add agent to the agent group +insert into GROUP_MEMBERS (USER_GROUPS_FK, GROUP_MEMBERS_FK) VALUES (3, 2); + +-- taxa +insert into TAXON (SCIENTIFIC_NAME,COMMON_NAME,NCBI_ID,IS_GENES_USABLE) values ('Homo sapiens','human','9606',1); +insert into TAXON (SCIENTIFIC_NAME,COMMON_NAME,NCBI_ID,IS_GENES_USABLE) values ('Mus musculus','mouse','10090',1); +insert into TAXON (SCIENTIFIC_NAME,COMMON_NAME,NCBI_ID,IS_GENES_USABLE) values ('Rattus norvegicus','rat','10116',1); +insert into TAXON (SCIENTIFIC_NAME,COMMON_NAME,NCBI_ID,IS_GENES_USABLE,SECONDARY_NCBI_ID) values ('Saccharomyces cerevisiae','yeast','4932',1,559292); +insert into TAXON (SCIENTIFIC_NAME,COMMON_NAME,NCBI_ID,IS_GENES_USABLE) values ('Danio rerio','zebrafish','7955',1); +insert into TAXON (SCIENTIFIC_NAME,COMMON_NAME,NCBI_ID,IS_GENES_USABLE) values ('Drosophila melanogaster','fly','7227',1); +insert into TAXON (SCIENTIFIC_NAME,COMMON_NAME,NCBI_ID,IS_GENES_USABLE) values ('Caenorhabditis elegans','worm','6239',1); + +-- external databases + +-- we need a procedure since we have to create an audit trail +-- silly, but this needs to be in a single line because sql-maven-plugin does not deal well with statements containing multiple semi-colons +create procedure add_external_database(in name varchar(255), in description text, in web_uri varchar(255), in ftp_uri varchar(255), in type varchar(255)) begin insert into AUDIT_TRAIL (ID) values (null); insert into EXTERNAL_DATABASE (NAME, DESCRIPTION, WEB_URI, FTP_URI, TYPE, AUDIT_TRAIL_FK) values (name, description, web_uri, ftp_uri, type, last_insert_id()); end; + +-- insert new db we need to track various things +call add_external_database ('PubMed', 'PubMed database from NCBI', 'https://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=PubMed', 'ftp://ftp.ncbi.nlm.nih.gov/pubmed/', 'LITERATURE'); +-- call add_external_database('GO', 'Gene Ontology database', 'https://www.godatabase.org/dev/database/', 'https://archive.godatabase.org', 'ONTOLOGY'); +call add_external_database('GEO', 'Gene Expression Omnibus', 'https://www.ncbi.nlm.nih.gov/geo/', 'ftp://ftp.ncbi.nih.gov/pub/geo/DATA', 'EXPRESSION'); +call add_external_database('ArrayExpress', 'EBI ArrayExpress', 'https://www.ebi.ac.uk/arrayexpress/', 'ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/', 'EXPRESSION'); +call add_external_database('Genbank', 'NCBI Genbank', 'https://www.ncbi.nlm.nih.gov/Genbank/index.html', 'ftp://ftp.ncbi.nih.gov/genbank/', 'SEQUENCE'); +call add_external_database('Entrez Gene', 'NCBI Gene database', 'https://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=gene', 'ftp://ftp.ncbi.nih.gov/gene/', 'SEQUENCE'); +call add_external_database('Ensembl', 'EMBL - EBI/Sanger Institute genome annotations', 'https://www.ensembl.org/', 'ftp://ftp.ensembl.org/pub/', 'GENOME'); +call add_external_database('OBO_REL', 'Open Biomedical Ontologies Relationships', 'https://www.obofoundry.org/ro/', NULL, 'ONTOLOGY'); +call add_external_database('STRING', 'STRING - Known and Predicted Protein-Protein Interactions', 'https://string-db.org/version_8_2/newstring_cgi/show_network_section.pl?identifiers=', NULL, 'PROTEIN'); +call add_external_database('hg18', NULL, '', NULL, 'SEQUENCE'); +call add_external_database('hg19', NULL, '', NULL, 'SEQUENCE'); +call add_external_database('hg38', NULL, '', NULL, 'SEQUENCE'); +call add_external_database('mm8', NULL, '', NULL, 'SEQUENCE'); +call add_external_database('mm9', NULL, '', NULL, 'SEQUENCE'); +call add_external_database('mm10', NULL, '', NULL, 'SEQUENCE'); +call add_external_database('mm39', NULL, '', NULL, 'SEQUENCE'); +call add_external_database('rn4', NULL, '', NULL, 'SEQUENCE'); +call add_external_database('rn6', NULL, '', NULL, 'SEQUENCE'); +call add_external_database('rn7', NULL, '', NULL, 'SEQUENCE'); +call add_external_database('hg18 annotations', NULL, 'https://hgdownload.cse.ucsc.edu/goldenpath/hg18/database/', NULL, 'OTHER'); +call add_external_database('hg19 annotations', NULL, 'https://hgdownload.cse.ucsc.edu/goldenpath/hg19/database/', NULL, 'OTHER'); +call add_external_database('hg38 annotations', NULL, 'https://hgdownload.cse.ucsc.edu/goldenpath/hg38/database/', NULL, 'OTHER'); +call add_external_database('mm8 annotations', NULL, 'https://hgdownload.cse.ucsc.edu/goldenpath/mm8/database/', NULL, 'OTHER'); +call add_external_database('mm9 annotations', NULL, 'https://hgdownload.cse.ucsc.edu/goldenpath/mm9/database/', NULL, 'OTHER'); +call add_external_database('mm10 annotations', NULL, 'https://hgdownload.cse.ucsc.edu/goldenpath/mm10/database/', NULL, 'OTHER'); +call add_external_database('mm39 annotations', NULL, 'https://hgdownload.cse.ucsc.edu/goldenpath/mm39/database/', NULL, 'OTHER'); +call add_external_database('rn4 annotations', NULL, 'https://hgdownload.cse.ucsc.edu/goldenpath/rn4/database/', NULL, 'OTHER'); +call add_external_database('rn6 annotations', NULL, 'https://hgdownload.cse.ucsc.edu/goldenpath/rn6/database/', NULL, 'OTHER'); +call add_external_database('rn7 annotations', NULL, 'https://hgdownload.cse.ucsc.edu/goldenpath/rn7/database/', NULL, 'OTHER'); +call add_external_database('hg38 RNA-Seq annotations', NULL, NULL, NULL, 'OTHER'); +call add_external_database('mm10 RNA-Seq annotations', NULL, NULL, NULL, 'OTHER'); +call add_external_database('mm39 RNA-Seq annotations', NULL, NULL, NULL, 'OTHER'); +call add_external_database('rn7 RNA-Seq annotations', NULL, NULL, NULL, 'OTHER'); +call add_external_database('gene', NULL, NULL, 'https://ftp.ncbi.nih.gov/gene/DATA/gene_info.gz', 'OTHER'); +call add_external_database('go', NULL, NULL, 'https://ftp.ncbi.nih.gov/gene/DATA/gene2go.gz', 'ONTOLOGY'); +call add_external_database('multifunctionality', NULL, NULL, NULL, 'OTHER'); +call add_external_database('gene2cs', NULL, NULL, NULL, 'OTHER'); + +drop procedure add_external_database; + +create procedure add_external_database_relation(in parent_name varchar(255), in child_name varchar(255)) begin select @parent_id := ID from EXTERNAL_DATABASE where name = parent_name; update EXTERNAL_DATABASE set EXTERNAL_DATABASE_FK = @parent_id where NAME = child_name; end; + +call add_external_database_relation('hg38', 'hg38 annotations'); +call add_external_database_relation('hg19', 'hg19 annotations'); +call add_external_database_relation('hg18', 'hg18 annotations'); +call add_external_database_relation('mm39', 'mm39 annotations'); +call add_external_database_relation('mm10', 'mm10 annotations'); +call add_external_database_relation('mm9', 'mm9 annotations'); +call add_external_database_relation('mm8', 'mm8 annotations'); +call add_external_database_relation('rn7', 'rn7 annotations'); +call add_external_database_relation('rn6', 'rn4 annotations'); +call add_external_database_relation('rn4', 'rn6 annotations'); + +call add_external_database_relation('hg38', 'hg38 RNA-Seq annotations'); +call add_external_database_relation('mm39', 'mm39 RNA-Seq annotations'); +call add_external_database_relation('mm10', 'mm10 RNA-Seq annotations'); +call add_external_database_relation('rn7', 'rn7 RNA-Seq annotations'); + +drop procedure add_external_database_relation; + diff --git a/gemma-core/src/main/resources/sql/init-entities.sql b/gemma-core/src/main/resources/sql/init-entities.sql index a706a3fa47..6a23844a3b 100644 --- a/gemma-core/src/main/resources/sql/init-entities.sql +++ b/gemma-core/src/main/resources/sql/init-entities.sql @@ -1,116 +1,68 @@ --- Initialize the database with some scraps of data. See also init-indices.sql and mysql-acegi-acl.sql. - --- all of these are used. -insert into AUDIT_TRAIL VALUES (1); -insert into AUDIT_TRAIL VALUES (2); -insert into AUDIT_TRAIL VALUES (3); - -set @n:=now(); - --- username=gemmaAgent: id = 2, password = 'XXXXXXXX', audit trail #2, using salt={username} -insert into CONTACT (ID, CLASS, NAME, LAST_NAME, USER_NAME, PASSWORD, ENABLED, EMAIL, PASSWORD_HINT) values (2, 'User', 'gemmaAgent', '', 'gemmaAgent', '2db458c67b4b52bba0184611c302c9c174ce8de4', 1, 'pavlab-support@msl.ubc.ca', 'hint'); - --- username=administrator: id = 1, password = 'administrator', audit trail #1 using salt=username ('administrator') -insert into CONTACT (ID, CLASS, NAME, LAST_NAME, USER_NAME, PASSWORD, ENABLED, EMAIL, PASSWORD_HINT) values (1, 'User', 'administrator', '', 'administrator', 'b7338dcc17d6b6c199a75540aab6d0506567b980', 1, 'pavlab-support@msl.ubc.ca', 'hint'); - --- initialize the audit trails -insert into AUDIT_EVENT VALUES (1, @n, 'C', 'From init script', '', 1, NULL, 1); -insert into AUDIT_EVENT VALUES (2, @n, 'C', 'From init script', '', 1, NULL, 2); -insert into AUDIT_EVENT VALUES (3, @n, 'C', 'From init script', '', 1, NULL, 3); - - --- Note that 'Administrators' is a constant set in AuthorityConstants. The names of these groups are defined in UserGroupDao. -insert into USER_GROUP (ID, NAME, DESCRIPTION, AUDIT_TRAIL_FK) VALUES (1, 'Administrators', 'Users with administrative rights', 1); -insert into USER_GROUP (ID, NAME, DESCRIPTION, AUDIT_TRAIL_FK) VALUES (2, 'Users', 'Default group for all authenticated users', 2); -insert into USER_GROUP (ID, NAME, DESCRIPTION, AUDIT_TRAIL_FK) VALUES (3, 'Agents', 'For \'autonomous\' agents that run within the server context, such as scheduled tasks.', 3); -insert into GROUP_AUTHORITY (ID, AUTHORITY, GROUP_FK) VALUES (1, 'ADMIN', 1); -insert into GROUP_AUTHORITY (ID, AUTHORITY, GROUP_FK) VALUES (2, 'USER', 2); -insert into GROUP_AUTHORITY (ID, AUTHORITY, GROUP_FK) VALUES (3, 'AGENT', 3); - --- make admin in the admin group -insert into GROUP_MEMBERS (USER_GROUPS_FK, GROUP_MEMBERS_FK) VALUES (1, 1); - --- add admin to the user group (note that there is no need for a corresponding ACL entry) -insert into GROUP_MEMBERS (USER_GROUPS_FK, GROUP_MEMBERS_FK) VALUES (2, 1); - --- add agent to the agent group -insert into GROUP_MEMBERS (USER_GROUPS_FK, GROUP_MEMBERS_FK) VALUES (3, 2); - --- taxa -insert into TAXON (SCIENTIFIC_NAME,COMMON_NAME,NCBI_ID,IS_GENES_USABLE) values ('Homo sapiens','human','9606',1); -insert into TAXON (SCIENTIFIC_NAME,COMMON_NAME,NCBI_ID,IS_GENES_USABLE) values ('Mus musculus','mouse','10090',1); -insert into TAXON (SCIENTIFIC_NAME,COMMON_NAME,NCBI_ID,IS_GENES_USABLE) values ('Rattus norvegicus','rat','10116',1); -insert into TAXON (SCIENTIFIC_NAME,COMMON_NAME,NCBI_ID,IS_GENES_USABLE,SECONDARY_NCBI_ID) values ('Saccharomyces cerevisiae','yeast','4932',1,559292); -insert into TAXON (SCIENTIFIC_NAME,COMMON_NAME,NCBI_ID,IS_GENES_USABLE) values ('Danio rerio','zebrafish','7955',1); -insert into TAXON (SCIENTIFIC_NAME,COMMON_NAME,NCBI_ID,IS_GENES_USABLE) values ('Drosophila melanogaster','fly','7227',1); -insert into TAXON (SCIENTIFIC_NAME,COMMON_NAME,NCBI_ID,IS_GENES_USABLE) values ('Caenorhabditis elegans','worm','6239',1); - --- external databases - --- we need a procedure since we have to create an audit trail --- silly, but this needs to be in a single line because sql-maven-plugin does not deal well with statements containing multiple semi-colons -create procedure add_external_database(in name varchar(255), in description text, in web_uri varchar(255), in ftp_uri varchar(255), in type varchar(255)) begin insert into AUDIT_TRAIL (ID) values (null); insert into EXTERNAL_DATABASE (NAME, DESCRIPTION, WEB_URI, FTP_URI, TYPE, AUDIT_TRAIL_FK) values (name, description, web_uri, ftp_uri, type, last_insert_id()); end; - --- insert new db we need to track various things -call add_external_database ('PubMed', 'PubMed database from NCBI', 'https://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=PubMed', 'ftp://ftp.ncbi.nlm.nih.gov/pubmed/', 'LITERATURE'); --- call add_external_database('GO', 'Gene Ontology database', 'https://www.godatabase.org/dev/database/', 'https://archive.godatabase.org', 'ONTOLOGY'); -call add_external_database('GEO', 'Gene Expression Omnibus', 'https://www.ncbi.nlm.nih.gov/geo/', 'ftp://ftp.ncbi.nih.gov/pub/geo/DATA', 'EXPRESSION'); -call add_external_database('ArrayExpress', 'EBI ArrayExpress', 'https://www.ebi.ac.uk/arrayexpress/', 'ftp://ftp.ebi.ac.uk/pub/databases/microarray/data/', 'EXPRESSION'); -call add_external_database('Genbank', 'NCBI Genbank', 'https://www.ncbi.nlm.nih.gov/Genbank/index.html', 'ftp://ftp.ncbi.nih.gov/genbank/', 'SEQUENCE'); -call add_external_database('Entrez Gene', 'NCBI Gene database', 'https://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=gene', 'ftp://ftp.ncbi.nih.gov/gene/', 'SEQUENCE'); -call add_external_database('Ensembl', 'EMBL - EBI/Sanger Institute genome annotations', 'https://www.ensembl.org/', 'ftp://ftp.ensembl.org/pub/', 'GENOME'); -call add_external_database('OBO_REL', 'Open Biomedical Ontologies Relationships', 'https://www.obofoundry.org/ro/', NULL, 'ONTOLOGY'); -call add_external_database('STRING', 'STRING - Known and Predicted Protein-Protein Interactions', 'https://string-db.org/version_8_2/newstring_cgi/show_network_section.pl?identifiers=', NULL, 'PROTEIN'); -call add_external_database('hg18', NULL, '', NULL, 'SEQUENCE'); -call add_external_database('hg19', NULL, '', NULL, 'SEQUENCE'); -call add_external_database('hg38', NULL, '', NULL, 'SEQUENCE'); -call add_external_database('mm8', NULL, '', NULL, 'SEQUENCE'); -call add_external_database('mm9', NULL, '', NULL, 'SEQUENCE'); -call add_external_database('mm10', NULL, '', NULL, 'SEQUENCE'); -call add_external_database('mm39', NULL, '', NULL, 'SEQUENCE'); -call add_external_database('rn4', NULL, '', NULL, 'SEQUENCE'); -call add_external_database('rn6', NULL, '', NULL, 'SEQUENCE'); -call add_external_database('rn7', NULL, '', NULL, 'SEQUENCE'); -call add_external_database('hg18 annotations', NULL, 'https://hgdownload.cse.ucsc.edu/goldenpath/hg18/database/', NULL, 'OTHER'); -call add_external_database('hg19 annotations', NULL, 'https://hgdownload.cse.ucsc.edu/goldenpath/hg19/database/', NULL, 'OTHER'); -call add_external_database('hg38 annotations', NULL, 'https://hgdownload.cse.ucsc.edu/goldenpath/hg38/database/', NULL, 'OTHER'); -call add_external_database('mm8 annotations', NULL, 'https://hgdownload.cse.ucsc.edu/goldenpath/mm8/database/', NULL, 'OTHER'); -call add_external_database('mm9 annotations', NULL, 'https://hgdownload.cse.ucsc.edu/goldenpath/mm9/database/', NULL, 'OTHER'); -call add_external_database('mm10 annotations', NULL, 'https://hgdownload.cse.ucsc.edu/goldenpath/mm10/database/', NULL, 'OTHER'); -call add_external_database('mm39 annotations', NULL, 'https://hgdownload.cse.ucsc.edu/goldenpath/mm39/database/', NULL, 'OTHER'); -call add_external_database('rn4 annotations', NULL, 'https://hgdownload.cse.ucsc.edu/goldenpath/rn4/database/', NULL, 'OTHER'); -call add_external_database('rn6 annotations', NULL, 'https://hgdownload.cse.ucsc.edu/goldenpath/rn6/database/', NULL, 'OTHER'); -call add_external_database('rn7 annotations', NULL, 'https://hgdownload.cse.ucsc.edu/goldenpath/rn7/database/', NULL, 'OTHER'); -call add_external_database('hg38 RNA-Seq annotations', NULL, NULL, NULL, 'OTHER'); -call add_external_database('mm10 RNA-Seq annotations', NULL, NULL, NULL, 'OTHER'); -call add_external_database('mm39 RNA-Seq annotations', NULL, NULL, NULL, 'OTHER'); -call add_external_database('rn7 RNA-Seq annotations', NULL, NULL, NULL, 'OTHER'); -call add_external_database('gene', NULL, NULL, 'https://ftp.ncbi.nih.gov/gene/DATA/gene_info.gz', 'OTHER'); -call add_external_database('go', NULL, NULL, 'https://ftp.ncbi.nih.gov/gene/DATA/gene2go.gz', 'ONTOLOGY'); -call add_external_database('multifunctionality', NULL, NULL, NULL, 'OTHER'); -call add_external_database('gene2cs', NULL, NULL, NULL, 'OTHER'); - -drop procedure add_external_database; - -create procedure add_external_database_relation(in parent_name varchar(255), in child_name varchar(255)) begin select @parent_id := ID from EXTERNAL_DATABASE where name = parent_name; update EXTERNAL_DATABASE set EXTERNAL_DATABASE_FK = @parent_id where NAME = child_name; end; - -call add_external_database_relation('hg38', 'hg38 annotations'); -call add_external_database_relation('hg19', 'hg19 annotations'); -call add_external_database_relation('hg18', 'hg18 annotations'); -call add_external_database_relation('mm39', 'mm39 annotations'); -call add_external_database_relation('mm10', 'mm10 annotations'); -call add_external_database_relation('mm9', 'mm9 annotations'); -call add_external_database_relation('mm8', 'mm8 annotations'); -call add_external_database_relation('rn7', 'rn7 annotations'); -call add_external_database_relation('rn6', 'rn4 annotations'); -call add_external_database_relation('rn4', 'rn6 annotations'); - -call add_external_database_relation('hg38', 'hg38 RNA-Seq annotations'); -call add_external_database_relation('mm39', 'mm39 RNA-Seq annotations'); -call add_external_database_relation('mm10', 'mm10 RNA-Seq annotations'); -call add_external_database_relation('rn7', 'rn7 RNA-Seq annotations'); - -drop procedure add_external_database_relation; +-- Add some indices that are not included in the generated gemma-ddl.sql. +-- Some of these are very important for performance + +alter table ACLSID + add index ACLSID_CLASS (class); +alter table INVESTIGATION + add index INVESTIGATION_CLASS (class); +alter table DATABASE_ENTRY + add index acc_ex (ACCESSION, EXTERNAL_DATABASE_FK); +alter table CHROMOSOME_FEATURE + add index CHROMOSOME_FEATURE_CLASS (class); +alter table CHROMOSOME_FEATURE + add index symbol_tax (OFFICIAL_SYMBOL, TAXON_FK); +alter table AUDIT_EVENT_TYPE + add index AUDIT_EVENT_TYPE_CLASS (class); +alter table ANALYSIS + add index ANALYSIS_CLASS (class); + +alter table CHARACTERISTIC + add index CHARACTERISTIC_CLASS (class); + +alter table PROCESSED_EXPRESSION_DATA_VECTOR + add index experimentProcessedVectorProbes (EXPRESSION_EXPERIMENT_FK, DESIGN_ELEMENT_FK); + +alter table DIFFERENTIAL_EXPRESSION_ANALYSIS_RESULT + add index resultSetProbes (RESULT_SET_FK, PROBE_FK); +alter table DIFFERENTIAL_EXPRESSION_ANALYSIS_RESULT + add index probeResultSets (PROBE_FK, RESULT_SET_FK); + +alter table CONTACT + add index fullname (NAME, LAST_NAME); + +-- should remove the FIRST_GENE_FK and SECOND_GENE_FK indices, but they get given 'random' names. +-- Drop the second_gene_fk constraint. +-- alter table HUMAN_GENE_COEXPRESSION drop foreign key FKF9E6557F21D58F19; +-- alter table MOUSE_GENE_COEXPRESSION drop foreign key FKFC61C4F721D58F19; +-- alter table RAT_GENE_COEXPRESSION drop foreign key FKDE59FC7721D58F19; +-- alter table OTHER_GENE_COEXPRESSION drop foreign key FK74B9A3E221D58F19; + +alter table HUMAN_GENE_COEXPRESSION + add index hfgsg (FIRST_GENE_FK, SECOND_GENE_FK); +alter table MOUSE_GENE_COEXPRESSION + add index mfgsg (FIRST_GENE_FK, SECOND_GENE_FK); +alter table RAT_GENE_COEXPRESSION + add index rfgsg (FIRST_GENE_FK, SECOND_GENE_FK); +alter table OTHER_GENE_COEXPRESSION + add index ofgsg (FIRST_GENE_FK, SECOND_GENE_FK); + +-- same for these, should drop the key for EXPERIMENT_FK, manually +alter table HUMAN_EXPERIMENT_COEXPRESSION + add index ECL1EFK (EXPERIMENT_FK, GENE1_FK, GENE2_FK); +alter table HUMAN_EXPERIMENT_COEXPRESSION + add constraint ECL1EFK foreign key (EXPERIMENT_FK) references INVESTIGATION (ID); +alter table MOUSE_EXPERIMENT_COEXPRESSION + add index ECL2EFK (EXPERIMENT_FK, GENE1_FK, GENE2_FK); +alter table MOUSE_EXPERIMENT_COEXPRESSION + add constraint ECL2EFK foreign key (EXPERIMENT_FK) references INVESTIGATION (ID); +alter table RAT_EXPERIMENT_COEXPRESSION + add index ECL3EFK (EXPERIMENT_FK, GENE1_FK, GENE2_FK); +alter table RAT_EXPERIMENT_COEXPRESSION + add constraint ECL3EFK foreign key (EXPERIMENT_FK) references INVESTIGATION (ID); +alter table OTHER_EXPERIMENT_COEXPRESSION + add index ECL4EFK (EXPERIMENT_FK, GENE1_FK, GENE2_FK); +alter table OTHER_EXPERIMENT_COEXPRESSION + add constraint ECL4EFK foreign key (EXPERIMENT_FK) references INVESTIGATION (ID); -- denormalized table joining genes and compositeSequences; maintained by TableMaintenanceUtil. create table GENE2CS @@ -136,7 +88,7 @@ create table EXPRESSION_EXPERIMENT2CHARACTERISTIC DESCRIPTION text, CATEGORY varchar(255), CATEGORY_URI varchar(255), - VALUE varchar(255), + `VALUE` varchar(255), VALUE_URI varchar(255), ORIGINAL_VALUE varchar(255), EVIDENCE_CODE varchar(255), @@ -146,16 +98,15 @@ create table EXPRESSION_EXPERIMENT2CHARACTERISTIC primary key (ID, EXPRESSION_EXPERIMENT_FK) ); --- note: constraint names cannot exceed 64 characters, so we cannot use the usual naming convention --- no URI exceeds 100 characters in practice, so we only index a prefix alter table EXPRESSION_EXPERIMENT2CHARACTERISTIC - add constraint EE2C_CHARACTERISTIC_FKC foreign key (ID) references CHARACTERISTIC (ID) on update cascade on delete cascade, - add constraint EE2C_EXPRESSION_EXPERIMENT_FKC foreign key (EXPRESSION_EXPERIMENT_FK) references INVESTIGATION (id) on update cascade on delete cascade, - add index EE2C_VALUE (VALUE), - add index EE2C_CATEGORY (CATEGORY), - add index EE2C_VALUE_URI_VALUE (VALUE_URI(100), VALUE), - add index EE2C_CATEGORY_URI_CATEGORY_VALUE_URI_VALUE (CATEGORY_URI(100), CATEGORY, VALUE_URI(100), VALUE), - add index EE2C_LEVEL (LEVEL); + add constraint EE2C_CHARACTERISTIC_FKC foreign key (ID) references CHARACTERISTIC (ID) on update cascade on delete cascade; +alter table EXPRESSION_EXPERIMENT2CHARACTERISTIC + add constraint EE2C_EXPRESSION_EXPERIMENT_FKC foreign key (EXPRESSION_EXPERIMENT_FK) references INVESTIGATION (id) on update cascade on delete cascade; + +-- note: constraint names cannot exceed 64 characters, so we cannot use the usual naming convention +create index EE2C_VALUE on EXPRESSION_EXPERIMENT2CHARACTERISTIC (`VALUE`); +create index EE2C_CATEGORY on EXPRESSION_EXPERIMENT2CHARACTERISTIC (CATEGORY); +create index EE2C_LEVEL on EXPRESSION_EXPERIMENT2CHARACTERISTIC (LEVEL); create table EXPRESSION_EXPERIMENT2ARRAY_DESIGN ( @@ -169,5 +120,6 @@ create table EXPRESSION_EXPERIMENT2ARRAY_DESIGN ); alter table EXPRESSION_EXPERIMENT2ARRAY_DESIGN - add constraint EE2AD_EXPRESSION_EXPERIMENT_FKC foreign key (EXPRESSION_EXPERIMENT_FK) references INVESTIGATION (id) on update cascade on delete cascade, - add constraint EE2AD_ARRAY_DESIGN_FKC foreign key (ARRAY_DESIGN_FK) references ARRAY_DESIGN (ID) on update cascade on delete cascade; \ No newline at end of file + add constraint EE2AD_EXPRESSION_EXPERIMENT_FKC foreign key (EXPRESSION_EXPERIMENT_FK) references INVESTIGATION (id) on update cascade on delete cascade; +alter table EXPRESSION_EXPERIMENT2ARRAY_DESIGN + add constraint EE2AD_ARRAY_DESIGN_FKC foreign key (ARRAY_DESIGN_FK) references ARRAY_DESIGN (ID) on update cascade on delete cascade; diff --git a/gemma-core/src/main/resources/sql/init-indices.sql b/gemma-core/src/main/resources/sql/init-indices.sql deleted file mode 100644 index 17bf61ec80..0000000000 --- a/gemma-core/src/main/resources/sql/init-indices.sql +++ /dev/null @@ -1,129 +0,0 @@ --- Add some indices that are not included in the generated gemma-ddl.sql. Some of these are very important to performance - -ALTER TABLE ACLSID - ADD INDEX class (class); - -ALTER TABLE CURATION_DETAILS - ADD INDEX TROUBLED_IX (TROUBLED); - -ALTER TABLE BIO_SEQUENCE - ADD INDEX name (NAME); -ALTER TABLE ALTERNATE_NAME - ADD INDEX name (NAME); -ALTER TABLE INVESTIGATION - ADD INDEX name (NAME), - ADD INDEX shortname (SHORT_NAME), - ADD INDEX class (class), - ADD INDEX INVESTIGATION_NUMBER_OF_SAMPLES (NUMBER_OF_SAMPLES), - ADD INDEX INVESTIGATION_NUMBER_OF_DATA_VECTORS (NUMBER_OF_DATA_VECTORS); -ALTER TABLE DATABASE_ENTRY - ADD INDEX acc_ex (ACCESSION, EXTERNAL_DATABASE_FK); -ALTER TABLE CHROMOSOME_FEATURE - ADD INDEX symbol_tax (OFFICIAL_SYMBOL, TAXON_FK); -ALTER TABLE CHROMOSOME_FEATURE - ADD INDEX ncbigeneid (NCBI_GENE_ID); -ALTER TABLE CHROMOSOME_FEATURE - ADD INDEX ncbigi (NCBI_GI); -ALTER TABLE CHROMOSOME_FEATURE - ADD INDEX previous_ncbiid (PREVIOUS_NCBI_ID); -ALTER TABLE CHROMOSOME_FEATURE - ADD INDEX ensemblid (ENSEMBL_ID); -ALTER TABLE CHROMOSOME_FEATURE - ADD INDEX name (NAME); -ALTER TABLE CHROMOSOME_FEATURE - ADD INDEX class (class); -ALTER TABLE GENE_ALIAS - ADD INDEX `alias` (`ALIAS`); -ALTER TABLE COMPOSITE_SEQUENCE - ADD INDEX name (NAME); -ALTER TABLE PHYSICAL_LOCATION - ADD INDEX BIN_KEY (BIN); -ALTER TABLE AUDIT_EVENT_TYPE - ADD INDEX class (class); -ALTER TABLE ANALYSIS - ADD INDEX class (class), - ADD INDEX ANALYSIS_NUMBER_OF_ELEMENTS_ANALYZED (NUMBER_OF_ELEMENTS_ANALYZED); -ALTER TABLE ANALYSIS_RESULT_SET - ADD INDEX ANALYSIS_RESULT_SET_NUMBER_OF_GENES_TESTED (NUMBER_OF_GENES_TESTED), - ADD INDEX ANALYSIS_RESULT_SET_NUMBER_OF_PROBES_TESTED (NUMBER_OF_PROBES_TESTED); --- no URI exceeds 100 characters in practice, so we only index a prefix -ALTER TABLE CHARACTERISTIC - ADD INDEX class (class), - ADD INDEX CHARACTERISTIC_VALUE (VALUE), - ADD INDEX CHARACTERISTIC_CATEGORY (CATEGORY), - ADD INDEX CHARACTERISTIC_VALUE_URI_VALUE (VALUE_URI(100), VALUE), - ADD INDEX CHARACTERISTIC_CATEGORY_URI_CATEGORY_VALUE_URI_VALUE (CATEGORY_URI(100), CATEGORY, VALUE_URI(100), VALUE), - ADD INDEX CHARACTERISTIC_EVIDENCE_CODE (EVIDENCE_CODE), - ADD INDEX CHARACTERISTIC_PREDICATE_URI_PREDICATE (PREDICATE_URI(100), PREDICATE), - ADD INDEX CHARACTERISTIC_OBJECT_URI_OBJECT (OBJECT_URI(100), OBJECT), - ADD INDEX CHARACTERISTIC_SECOND_PREDICATE_URI_SECOND_PREDICATE (SECOND_PREDICATE_URI(100), SECOND_PREDICATE), - ADD INDEX CHARACTERISTIC_SECOND_OBJECT_URI_SECOND_OBJECT (SECOND_OBJECT_URI(100), SECOND_OBJECT); -ALTER TABLE GENE_SET - ADD INDEX name (NAME); -ALTER TABLE PROCESSED_EXPRESSION_DATA_VECTOR - ADD INDEX experimentProcessedVectorProbes (EXPRESSION_EXPERIMENT_FK, DESIGN_ELEMENT_FK); -ALTER TABLE DIFFERENTIAL_EXPRESSION_ANALYSIS_RESULT - ADD INDEX resultSetProbes (RESULT_SET_FK, PROBE_FK); -ALTER TABLE DIFFERENTIAL_EXPRESSION_ANALYSIS_RESULT - ADD INDEX probeResultSets (PROBE_FK, RESULT_SET_FK); -ALTER TABLE TAXON - ADD INDEX taxonncbiid (NCBI_ID); -ALTER TABLE TAXON - ADD INDEX taxonsecondncbiid (SECONDARY_NCBI_ID); -ALTER TABLE TAXON - ADD INDEX taxoncommonname (COMMON_NAME); -ALTER TABLE TAXON - ADD INDEX taxonscientificname (SCIENTIFIC_NAME); -ALTER TABLE CONTACT - ADD INDEX fullname (NAME, LAST_NAME); - --- should remove the FIRST_GENE_FK and SECOND_GENE_FK indices, but they get given 'random' names. --- Drop the second_gene_fk constraint. --- alter table HUMAN_GENE_COEXPRESSION drop foreign key FKF9E6557F21D58F19; --- alter table MOUSE_GENE_COEXPRESSION drop foreign key FKFC61C4F721D58F19; --- alter table RAT_GENE_COEXPRESSION drop foreign key FKDE59FC7721D58F19; --- alter table OTHER_GENE_COEXPRESSION drop foreign key FK74B9A3E221D58F19; - -ALTER TABLE HUMAN_GENE_COEXPRESSION - ADD INDEX hfgsg (FIRST_GENE_FK, SECOND_GENE_FK); -ALTER TABLE MOUSE_GENE_COEXPRESSION - ADD INDEX mfgsg (FIRST_GENE_FK, SECOND_GENE_FK); -ALTER TABLE RAT_GENE_COEXPRESSION - ADD INDEX rfgsg (FIRST_GENE_FK, SECOND_GENE_FK); -ALTER TABLE OTHER_GENE_COEXPRESSION - ADD INDEX ofgsg (FIRST_GENE_FK, SECOND_GENE_FK); - --- same for these, should drop the key for EXPERIMENT_FK, manually -ALTER TABLE HUMAN_EXPERIMENT_COEXPRESSION - ADD INDEX ECL1EFK (EXPERIMENT_FK, GENE1_FK, GENE2_FK), - ADD CONSTRAINT ECL1EFK FOREIGN KEY (EXPERIMENT_FK) REFERENCES INVESTIGATION (ID); -ALTER TABLE MOUSE_EXPERIMENT_COEXPRESSION - ADD INDEX ECL2EFK (EXPERIMENT_FK, GENE1_FK, GENE2_FK), - ADD CONSTRAINT ECL2EFK FOREIGN KEY (EXPERIMENT_FK) REFERENCES INVESTIGATION (ID); -ALTER TABLE RAT_EXPERIMENT_COEXPRESSION - ADD INDEX ECL3EFK (EXPERIMENT_FK, GENE1_FK, GENE2_FK), - ADD CONSTRAINT ECL3EFK FOREIGN KEY (EXPERIMENT_FK) REFERENCES INVESTIGATION (ID); -ALTER TABLE OTHER_EXPERIMENT_COEXPRESSION - ADD INDEX ECL4EFK (EXPERIMENT_FK, GENE1_FK, GENE2_FK), - ADD CONSTRAINT ECL4EFK FOREIGN KEY (EXPERIMENT_FK) REFERENCES INVESTIGATION (ID); - --- candidates for removal -ALTER TABLE DIFFERENTIAL_EXPRESSION_ANALYSIS_RESULT - ADD INDEX corrpvalbin (CORRECTED_P_VALUE_BIN); -ALTER TABLE HIT_LIST_SIZE - ADD INDEX direction (DIRECTION); - -ALTER TABLE MEASUREMENT - ADD INDEX MEASUREMENT_KIND_CV (KIND_C_V), - ADD INDEX MEASUREMENT_OTHER_KIND (OTHER_KIND), - ADD INDEX MEASUREMENT_REPRESENTATION (REPRESENTATION), - ADD INDEX MEASUREMENT_TYPE (TYPE), - ADD INDEX MEASUREMENT_VALUE (VALUE); - -ALTER TABLE GEEQ - ADD INDEX GEEQ_DETECTED_QUALITY_SCORE (DETECTED_QUALITY_SCORE), - ADD INDEX GEEQ_DETECTED_SUITABILITY_SCORE (DETECTED_SUITABILITY_SCORE), - ADD INDEX GEEQ_MANUAL_QUALITY_SCORE (MANUAL_QUALITY_SCORE), - ADD INDEX GEEQ_MANUAL_QUALITY_OVERRIDE (MANUAL_QUALITY_OVERRIDE), - ADD INDEX GEEQ_MANUAL_SUITABILITY_SCORE (MANUAL_SUITABILITY_SCORE), - ADD INDEX GEEQ_MANUAL_SUITABILITY_OVERRIDE (MANUAL_SUITABILITY_OVERRIDE); \ No newline at end of file diff --git a/gemma-core/src/main/resources/sql/mysql/init-entities.sql b/gemma-core/src/main/resources/sql/mysql/init-entities.sql new file mode 100644 index 0000000000..83e0bc036f --- /dev/null +++ b/gemma-core/src/main/resources/sql/mysql/init-entities.sql @@ -0,0 +1,16 @@ +-- no URI exceeds 100 characters in practice, so we only index a prefix +alter table CHARACTERISTIC + add index CHARACTERISTIC_VALUE_URI_VALUE (VALUE_URI(100), `VALUE`); +alter table CHARACTERISTIC + add index CHARACTERISTIC_CATEGORY_URI_CATEGORY_VALUE_URI_VALUE (CATEGORY_URI(100), CATEGORY, VALUE_URI(100), `VALUE`); +alter table CHARACTERISTIC + add index CHARACTERISTIC_PREDICATE_URI_PREDICATE (PREDICATE_URI(100), PREDICATE); +alter table CHARACTERISTIC + add index CHARACTERISTIC_OBJECT_URI_OBJECT (OBJECT_URI(100), OBJECT); +alter table CHARACTERISTIC + add index CHARACTERISTIC_SECOND_PREDICATE_URI_SECOND_PREDICATE (SECOND_PREDICATE_URI(100), SECOND_PREDICATE); +alter table CHARACTERISTIC + add index CHARACTERISTIC_SECOND_OBJECT_URI_SECOND_OBJECT (SECOND_OBJECT_URI(100), SECOND_OBJECT); + +create index EE2C_VALUE_URI_VALUE on EXPRESSION_EXPERIMENT2CHARACTERISTIC (VALUE_URI(100), `VALUE`); +create index EE2C_CATEGORY_URI_CATEGORY_VALUE_URI_VALUE on EXPRESSION_EXPERIMENT2CHARACTERISTIC (CATEGORY_URI(100), CATEGORY, VALUE_URI(100), `VALUE`); diff --git a/gemma-core/src/main/resources/ubic/gemma/model/analysis/Analysis.hbm.xml b/gemma-core/src/main/resources/ubic/gemma/model/analysis/Analysis.hbm.xml index d8b840107e..19cca0175d 100644 --- a/gemma-core/src/main/resources/ubic/gemma/model/analysis/Analysis.hbm.xml +++ b/gemma-core/src/main/resources/ubic/gemma/model/analysis/Analysis.hbm.xml @@ -56,7 +56,8 @@ - + + "http://www.hibernate.org/dtd/hibernate-mapping-3.0.dtd"> - + - + @@ -42,7 +44,7 @@ + class="ubic.gemma.model.analysis.expression.diff.DifferentialExpressionAnalysisResult"/> + sql-type="VARCHAR(255)" index="INVESTIGATION_NAME"/> + sql-type="VARCHAR(255)" index="shortname"/> - + - + + "http://www.hibernate.org/dtd/hibernate-mapping-3.0.dtd"> - + diff --git a/gemma-core/src/main/resources/ubic/gemma/model/analysis/expression/diff/HitListSize.hbm.xml b/gemma-core/src/main/resources/ubic/gemma/model/analysis/expression/diff/HitListSize.hbm.xml index 2893a21758..e1717bbc24 100644 --- a/gemma-core/src/main/resources/ubic/gemma/model/analysis/expression/diff/HitListSize.hbm.xml +++ b/gemma-core/src/main/resources/ubic/gemma/model/analysis/expression/diff/HitListSize.hbm.xml @@ -18,7 +18,7 @@ - + ubic.gemma.model.analysis.expression.diff.Direction true diff --git a/gemma-core/src/main/resources/ubic/gemma/model/common/auditAndSecurity/curation/CurationDetails.hbm.xml b/gemma-core/src/main/resources/ubic/gemma/model/common/auditAndSecurity/curation/CurationDetails.hbm.xml index cfa266f2ac..c5e1bd2317 100644 --- a/gemma-core/src/main/resources/ubic/gemma/model/common/auditAndSecurity/curation/CurationDetails.hbm.xml +++ b/gemma-core/src/main/resources/ubic/gemma/model/common/auditAndSecurity/curation/CurationDetails.hbm.xml @@ -1,7 +1,7 @@ + "http://www.hibernate.org/dtd/hibernate-mapping-3.0.dtd"> @@ -29,7 +29,7 @@ - + diff --git a/gemma-core/src/main/resources/ubic/gemma/model/common/description/Characteristic.hbm.xml b/gemma-core/src/main/resources/ubic/gemma/model/common/description/Characteristic.hbm.xml index ca8e1c6548..f4e2107808 100644 --- a/gemma-core/src/main/resources/ubic/gemma/model/common/description/Characteristic.hbm.xml +++ b/gemma-core/src/main/resources/ubic/gemma/model/common/description/Characteristic.hbm.xml @@ -23,11 +23,11 @@ + sql-type="VARCHAR(255)" index="CHARACTERISTIC_VALUE"/> + sql-type="VARCHAR(255)" index="CHARACTERISTIC_CATEGORY"/> + sql-type="VARCHAR(255)" index="CHARACTERISTIC_EVIDENCE_CODE"/> ubic.gemma.model.association.GOEvidenceCode true diff --git a/gemma-core/src/main/resources/ubic/gemma/model/common/measurement/Measurement.hbm.xml b/gemma-core/src/main/resources/ubic/gemma/model/common/measurement/Measurement.hbm.xml index 011b3d3798..7be9c3c99d 100644 --- a/gemma-core/src/main/resources/ubic/gemma/model/common/measurement/Measurement.hbm.xml +++ b/gemma-core/src/main/resources/ubic/gemma/model/common/measurement/Measurement.hbm.xml @@ -1,7 +1,7 @@ + "http://www.hibernate.org/dtd/hibernate-mapping-3.0.dtd"> @@ -12,27 +12,30 @@ - + ubic.gemma.model.common.measurement.MeasurementType true - + - + ubic.gemma.model.common.measurement.MeasurementKind true - + - + ubic.gemma.model.common.quantitationtype.PrimitiveType true diff --git a/gemma-core/src/main/resources/ubic/gemma/model/expression/arrayDesign/AlternateName.hbm.xml b/gemma-core/src/main/resources/ubic/gemma/model/expression/arrayDesign/AlternateName.hbm.xml index ef57c9dece..80a3814b97 100644 --- a/gemma-core/src/main/resources/ubic/gemma/model/expression/arrayDesign/AlternateName.hbm.xml +++ b/gemma-core/src/main/resources/ubic/gemma/model/expression/arrayDesign/AlternateName.hbm.xml @@ -12,7 +12,7 @@ - + \ No newline at end of file diff --git a/gemma-core/src/main/resources/ubic/gemma/model/expression/designElement/CompositeSequence.hbm.xml b/gemma-core/src/main/resources/ubic/gemma/model/expression/designElement/CompositeSequence.hbm.xml index c20a381b5f..b58d7060a3 100644 --- a/gemma-core/src/main/resources/ubic/gemma/model/expression/designElement/CompositeSequence.hbm.xml +++ b/gemma-core/src/main/resources/ubic/gemma/model/expression/designElement/CompositeSequence.hbm.xml @@ -12,7 +12,7 @@ - + diff --git a/gemma-core/src/main/resources/ubic/gemma/model/expression/experiment/Geeq.hbm.xml b/gemma-core/src/main/resources/ubic/gemma/model/expression/experiment/Geeq.hbm.xml index 15d790a5bd..0a25744c9d 100644 --- a/gemma-core/src/main/resources/ubic/gemma/model/expression/experiment/Geeq.hbm.xml +++ b/gemma-core/src/main/resources/ubic/gemma/model/expression/experiment/Geeq.hbm.xml @@ -1,7 +1,7 @@ + "http://www.hibernate.org/dtd/hibernate-mapping-3.0.dtd"> @@ -12,22 +12,28 @@ - + - + - + - + - + - + @@ -39,7 +45,8 @@ - + diff --git a/gemma-core/src/main/resources/ubic/gemma/model/genome/ChromosomeFeature.hbm.xml b/gemma-core/src/main/resources/ubic/gemma/model/genome/ChromosomeFeature.hbm.xml index 9d59919c3c..1220598ad3 100644 --- a/gemma-core/src/main/resources/ubic/gemma/model/genome/ChromosomeFeature.hbm.xml +++ b/gemma-core/src/main/resources/ubic/gemma/model/genome/ChromosomeFeature.hbm.xml @@ -1,7 +1,7 @@ + "http://www.hibernate.org/dtd/hibernate-mapping-3.0.dtd"> + sql-type="VARCHAR(255)" index="CHROMOSOME_FEATURE_NAME"/> + sql-type="VARCHAR(255)" index="PREVIOUS_NCBI_ID"/> @@ -33,7 +33,7 @@ abstract="false"> + sql-type="VARCHAR(255)" index="NCBI_GI"/> @@ -61,11 +61,11 @@ sql-type="text"/> - + + sql-type="VARCHAR(255)" index="ENSEMBL_ID"/> diff --git a/gemma-core/src/main/resources/ubic/gemma/model/genome/PhysicalLocation.hbm.xml b/gemma-core/src/main/resources/ubic/gemma/model/genome/PhysicalLocation.hbm.xml index a9b2d33717..6c16fb932d 100644 --- a/gemma-core/src/main/resources/ubic/gemma/model/genome/PhysicalLocation.hbm.xml +++ b/gemma-core/src/main/resources/ubic/gemma/model/genome/PhysicalLocation.hbm.xml @@ -25,7 +25,7 @@ sql-type="VARCHAR(255)"/> - + \ No newline at end of file diff --git a/gemma-core/src/main/resources/ubic/gemma/model/genome/Taxon.hbm.xml b/gemma-core/src/main/resources/ubic/gemma/model/genome/Taxon.hbm.xml index c8f4171f4b..ca8e6dedcb 100644 --- a/gemma-core/src/main/resources/ubic/gemma/model/genome/Taxon.hbm.xml +++ b/gemma-core/src/main/resources/ubic/gemma/model/genome/Taxon.hbm.xml @@ -4,31 +4,34 @@ "http://www.hibernate.org/dtd/hibernate-mapping-3.0.dtd"> - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/gemma-core/src/main/resources/ubic/gemma/model/genome/biosequence/BioSequence.hbm.xml b/gemma-core/src/main/resources/ubic/gemma/model/genome/biosequence/BioSequence.hbm.xml index 6bd0f0115f..8c101aa1c7 100644 --- a/gemma-core/src/main/resources/ubic/gemma/model/genome/biosequence/BioSequence.hbm.xml +++ b/gemma-core/src/main/resources/ubic/gemma/model/genome/biosequence/BioSequence.hbm.xml @@ -4,34 +4,34 @@ "http://www.hibernate.org/dtd/hibernate-mapping-3.0.dtd"> - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + @@ -49,25 +49,25 @@ - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/gemma-core/src/main/resources/ubic/gemma/model/genome/gene/GeneAlias.hbm.xml b/gemma-core/src/main/resources/ubic/gemma/model/genome/gene/GeneAlias.hbm.xml index a701b63304..18830b429e 100644 --- a/gemma-core/src/main/resources/ubic/gemma/model/genome/gene/GeneAlias.hbm.xml +++ b/gemma-core/src/main/resources/ubic/gemma/model/genome/gene/GeneAlias.hbm.xml @@ -12,7 +12,7 @@ - + \ No newline at end of file diff --git a/gemma-core/src/main/resources/ubic/gemma/model/genome/gene/GeneSet.hbm.xml b/gemma-core/src/main/resources/ubic/gemma/model/genome/gene/GeneSet.hbm.xml index 6737693eb8..7ca18cfd54 100644 --- a/gemma-core/src/main/resources/ubic/gemma/model/genome/gene/GeneSet.hbm.xml +++ b/gemma-core/src/main/resources/ubic/gemma/model/genome/gene/GeneSet.hbm.xml @@ -1,7 +1,7 @@ + "http://www.hibernate.org/dtd/hibernate-mapping-3.0.dtd"> @@ -17,7 +17,7 @@ + sql-type="VARCHAR(255)" index="GENE_SET_NAME"/> - - + + diff --git a/gemma-core/src/test/java/ubic/gemma/core/util/test/BaseDatabaseTest.java b/gemma-core/src/test/java/ubic/gemma/core/util/test/BaseDatabaseTest.java index 8cd4e08cae..7af0eb3e5a 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/util/test/BaseDatabaseTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/util/test/BaseDatabaseTest.java @@ -126,8 +126,9 @@ public DataSourceInitializer( DataSource dataSource ) { @Override public void afterPropertiesSet() { JdbcTestUtils.executeSqlScript( template, applicationContext.getResource( "/sql/init-acls.sql" ), false ); + JdbcTestUtils.executeSqlScript( template, applicationContext.getResource( "/sql/init-entities.sql" ), false ); JdbcTestUtils.executeSqlScript( template, applicationContext.getResource( "/sql/h2/init-entities.sql" ), false ); - JdbcTestUtils.executeSqlScript( template, applicationContext.getResource( "/sql/h2/init-indices.sql" ), false ); + JdbcTestUtils.executeSqlScript( template, applicationContext.getResource( "/sql/init-data-slim.sql" ), false ); } } } diff --git a/gemma-core/src/test/resources/sql/h2/init-entities.sql b/gemma-core/src/test/resources/sql/h2/init-entities.sql deleted file mode 100644 index fb4ff2c786..0000000000 --- a/gemma-core/src/test/resources/sql/h2/init-entities.sql +++ /dev/null @@ -1,72 +0,0 @@ -insert into AUDIT_TRAIL (ID) -values (1), - (2); - -insert into USER_GROUP (ID, AUDIT_TRAIL_FK, NAME, DESCRIPTION) -values (1, 1, 'Administrators', NULL), - (2, 2, 'Users', NULL); - -insert into CONTACT (ID, class, NAME, DESCRIPTION, EMAIL, LAST_NAME, USER_NAME, PASSWORD, PASSWORD_HINT, ENABLED, - SIGNUP_TOKEN, SIGNUP_TOKEN_DATESTAMP) -values (1, 'User', 'admin', NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL); - --- denormalized table joining genes and compositeSequences; maintained by TableMaintenanceUtil. -create table GENE2CS -( - GENE BIGINT not null, - CS BIGINT not null, - AD BIGINT not null, - primary key (AD, CS, GENE) -); -alter table GENE2CS - add constraint GENE2CS_ARRAY_DESIGN_FKC foreign key (AD) references ARRAY_DESIGN (ID) on update cascade on delete cascade; -alter table GENE2CS - add constraint GENE2CS_CS_FKC foreign key (CS) references COMPOSITE_SEQUENCE (ID) on update cascade on delete cascade; -alter table GENE2CS - add constraint GENE2CS_GENE_FKC foreign key (GENE) references CHROMOSOME_FEATURE (ID) on update cascade on delete cascade; - --- this table is created in the hibernate schema -drop table EXPRESSION_EXPERIMENT2CHARACTERISTIC; -create table EXPRESSION_EXPERIMENT2CHARACTERISTIC -( - ID bigint, - NAME varchar(255), - DESCRIPTION text, - CATEGORY varchar(255), - CATEGORY_URI varchar(255), - `VALUE` varchar(255), - VALUE_URI varchar(255), - ORIGINAL_VALUE varchar(255), - EVIDENCE_CODE varchar(255), - EXPRESSION_EXPERIMENT_FK bigint, - ACL_IS_AUTHENTICATED_ANONYMOUSLY_MASK int not null default 0, - LEVEL varchar(255), - primary key (ID, EXPRESSION_EXPERIMENT_FK) -); - -alter table EXPRESSION_EXPERIMENT2CHARACTERISTIC - add constraint EE2C_CHARACTERISTIC_FKC foreign key (ID) references CHARACTERISTIC (ID) on update cascade on delete cascade; -alter table EXPRESSION_EXPERIMENT2CHARACTERISTIC - add constraint EE2C_EXPRESSION_EXPERIMENT_FKC foreign key (EXPRESSION_EXPERIMENT_FK) references INVESTIGATION (id) on update cascade on delete cascade; - -create index EE2C_VALUE on EXPRESSION_EXPERIMENT2CHARACTERISTIC (`VALUE`); -create index EE2C_CATEGORY on EXPRESSION_EXPERIMENT2CHARACTERISTIC (CATEGORY); -create index EE2C_VALUE_URI_VALUE on EXPRESSION_EXPERIMENT2CHARACTERISTIC (VALUE_URI, `VALUE`); -create index EE2C_CATEGORY_URI_CATEGORY_VALUE_URI_VALUE on EXPRESSION_EXPERIMENT2CHARACTERISTIC (CATEGORY_URI, CATEGORY, VALUE_URI, `VALUE`); -create index EE2C_LEVEL on EXPRESSION_EXPERIMENT2CHARACTERISTIC (LEVEL); - -create table EXPRESSION_EXPERIMENT2ARRAY_DESIGN -( - EXPRESSION_EXPERIMENT_FK bigint not null, - ARRAY_DESIGN_FK bigint not null, - -- indicate if the platform is original (see BioAssay.originalPlatform) - IS_ORIGINAL_PLATFORM tinyint not null, - -- the permission mask of the EE for the anonymous SID - ACL_IS_AUTHENTICATED_ANONYMOUSLY_MASK int not null default 0, - primary key (EXPRESSION_EXPERIMENT_FK, ARRAY_DESIGN_FK, IS_ORIGINAL_PLATFORM) -); - -alter table EXPRESSION_EXPERIMENT2ARRAY_DESIGN - add constraint EE2AD_EXPRESSION_EXPERIMENT_FKC foreign key (EXPRESSION_EXPERIMENT_FK) references INVESTIGATION (id) on update cascade on delete cascade; -alter table EXPRESSION_EXPERIMENT2ARRAY_DESIGN - add constraint EE2AD_ARRAY_DESIGN_FKC foreign key (ARRAY_DESIGN_FK) references ARRAY_DESIGN (ID) on update cascade on delete cascade; diff --git a/gemma-core/src/test/resources/sql/h2/init-indices.sql b/gemma-core/src/test/resources/sql/h2/init-indices.sql deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/gemma-core/src/test/resources/sql/init-data-slim.sql b/gemma-core/src/test/resources/sql/init-data-slim.sql new file mode 100644 index 0000000000..17ec863f5c --- /dev/null +++ b/gemma-core/src/test/resources/sql/init-data-slim.sql @@ -0,0 +1,13 @@ +-- Slim version of init-data.sql for unit tests + +insert into AUDIT_TRAIL (ID) +values (1), + (2); + +insert into USER_GROUP (ID, AUDIT_TRAIL_FK, NAME, DESCRIPTION) +values (1, 1, 'Administrators', NULL), + (2, 2, 'Users', NULL); + +insert into CONTACT (ID, class, NAME, DESCRIPTION, EMAIL, LAST_NAME, USER_NAME, PASSWORD, PASSWORD_HINT, ENABLED, + SIGNUP_TOKEN, SIGNUP_TOKEN_DATESTAMP) +values (1, 'User', 'admin', NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL); \ No newline at end of file From 00b55fde082f0101eae12313972683d05924cee3 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Thu, 28 Mar 2024 11:25:45 -0700 Subject: [PATCH 089/105] Fix broken conversion from ID due to PA removal --- .../gemma/core/search/SearchServiceImpl.java | 20 +++++++++++++++++-- .../search/SearchServiceVoConversionTest.java | 15 -------------- 2 files changed, 18 insertions(+), 17 deletions(-) diff --git a/gemma-core/src/main/java/ubic/gemma/core/search/SearchServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/core/search/SearchServiceImpl.java index f446d0fc58..cd015c2f35 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/search/SearchServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/core/search/SearchServiceImpl.java @@ -171,9 +171,11 @@ private void addAll( Collection> search @Qualifier("valueObjectConversionService") private ConversionService valueObjectConversionService; + /** + * Mapping of supported result types to their corresponding VO type. + */ private final Map, Class>> supportedResultTypes = new HashMap<>(); - /** * A composite search source. */ @@ -302,9 +304,14 @@ public List>> loadValueObjects */ private List>> loadValueObjectsOfSameResultType( List> results, Class resultType ) { List entities = new ArrayList<>(); + List entitiesIds = new ArrayList<>(); List> entitiesVos = new ArrayList<>(); for ( SearchResult result : results ) { - entities.add( result.getResultObject() ); + if ( resultType.isInstance( result.getResultObject() ) ) { + entities.add( result.getResultObject() ); + } else { + entitiesIds.add( result.getResultId() ); + } } // convert entities to VOs @@ -316,6 +323,15 @@ private List>> loadValueObject TypeDescriptor.collection( List.class, TypeDescriptor.valueOf( supportedResultTypes.get( resultType ) ) ) ) ); } + // convert IDs to VOs + if ( !entitiesIds.isEmpty() ) { + //noinspection unchecked + entitiesVos.addAll( ( List> ) + valueObjectConversionService.convert( entitiesIds, + TypeDescriptor.collection( Collection.class, TypeDescriptor.valueOf( Long.class ) ), + TypeDescriptor.collection( List.class, TypeDescriptor.valueOf( supportedResultTypes.get( resultType ) ) ) ) ); + } + Map> entityVosById = EntityUtils.getIdMap( entitiesVos ); Set> excludedResults = new HashSet<>(); diff --git a/gemma-core/src/test/java/ubic/gemma/core/search/SearchServiceVoConversionTest.java b/gemma-core/src/test/java/ubic/gemma/core/search/SearchServiceVoConversionTest.java index d5c66f2e01..882f1cef72 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/search/SearchServiceVoConversionTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/search/SearchServiceVoConversionTest.java @@ -14,7 +14,6 @@ import ubic.gemma.core.genome.gene.service.GeneSetService; import ubic.gemma.model.IdentifiableValueObject; import ubic.gemma.model.analysis.expression.diff.ContrastResult; -import ubic.gemma.model.association.phenotype.PhenotypeAssociation; import ubic.gemma.model.common.Identifiable; import ubic.gemma.model.common.description.BibliographicReference; import ubic.gemma.model.common.description.BibliographicReferenceValueObject; @@ -31,7 +30,6 @@ import ubic.gemma.model.genome.Taxon; import ubic.gemma.model.genome.gene.DatabaseBackedGeneSetValueObject; import ubic.gemma.model.genome.gene.GeneSet; -import ubic.gemma.model.common.description.CharacteristicValueObject; import ubic.gemma.persistence.service.expression.arrayDesign.ArrayDesignService; import ubic.gemma.persistence.service.expression.designElement.CompositeSequenceService; import ubic.gemma.persistence.service.expression.experiment.BlacklistedEntityService; @@ -90,7 +88,6 @@ static class SearchServiceVoConversionTestContextConfiguration extends SearchSer private ExpressionExperiment ee; private ExpressionExperimentValueObject eevo; private GeneSet gs; - private CharacteristicValueObject phenotypeAssociation; @Before public void setUp() { @@ -106,7 +103,6 @@ public void setUp() { eevo.setId( 12L ); gs = new GeneSet(); gs.setId( 13L ); - phenotypeAssociation = new CharacteristicValueObject( 14L ); when( arrayDesignService.loadValueObject( any( ArrayDesign.class ) ) ).thenAnswer( a -> new ArrayDesignValueObject( a.getArgument( 0, ArrayDesign.class ) ) ); //noinspection unchecked when( arrayDesignService.loadValueObjects( anyCollection() ) ).thenAnswer( a -> ( ( Collection ) a.getArgument( 0, Collection.class ) ) @@ -173,17 +169,6 @@ public void testConvertExpressionExperiment() { verify( expressionExperimentService ).loadValueObject( ee ); } - @Test - public void testConvertPhenotypeAssociation() { - // this is a complicated one because the result type does not match the entity - assertThat( searchService.loadValueObject( SearchResult.from( PhenotypeAssociation.class, phenotypeAssociation, 1.0, "test object" ) ) ) - .extracting( "resultObject" ) - .isSameAs( phenotypeAssociation ); - assertThat( searchService.loadValueObjects( Collections.singleton( SearchResult.from( PhenotypeAssociation.class, phenotypeAssociation, 1.0, "test object" ) ) ) ) - .extracting( "resultObject" ) - .containsExactly( phenotypeAssociation ); - } - @Test public void testConvertGeneSet() { // this is another complicated one because GeneSetService does not implement BaseVoEnabledService From 7adb1fb8ea5be48e51c1e1ad5836c32eb5f327de Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Thu, 28 Mar 2024 14:33:40 -0700 Subject: [PATCH 090/105] Use a batch query for populating platform counts --- .../experiment/ExpressionExperimentDaoImpl.java | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java index e7fe5fe549..d77eae323c 100644 --- a/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/persistence/service/expression/experiment/ExpressionExperimentDaoImpl.java @@ -2224,14 +2224,16 @@ private void populateArrayDesignCount( Collection results = getSessionFactory().getCurrentSession() - .createQuery( "select ee.id, count(distinct ba.arrayDesignUsed) from ExpressionExperiment ee left join ee.bioAssays as ba where ee.id in (:ids) group by ee" ) - .setParameterList( "ids", optimizeParameterList( EntityUtils.getIds( eevos ) ) ) - .list(); - Map adCountById = results.stream().collect( Collectors.toMap( row -> ( Long ) row[0], row -> ( Long ) row[1] ) ); + Query q = getSessionFactory().getCurrentSession() + .createQuery( "select ee.id, count(distinct ba.arrayDesignUsed) from ExpressionExperiment ee " + + "join ee.bioAssays as ba " + + "where ee.id in (:ids) " + + "group by ee" ) + .setCacheable( true ); + Map adCountById = streamByBatch( q, "ids", EntityUtils.getIds( eevos ), 2048, Object[].class ) + .collect( Collectors.toMap( row -> ( Long ) row[0], row -> ( Long ) row[1] ) ); for ( ExpressionExperimentValueObject eevo : eevos ) { - eevo.setArrayDesignCount( adCountById.get( eevo.getId() ) ); + eevo.setArrayDesignCount( adCountById.getOrDefault( eevo.getId(), 0L ) ); } } } From cdeac35afb83e83b9382f72b4b8483e495053049 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Thu, 28 Mar 2024 16:37:29 -0700 Subject: [PATCH 091/105] Restore support for PhenotypeAssociation result type --- .../main/java/ubic/gemma/core/search/SearchServiceImpl.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/gemma-core/src/main/java/ubic/gemma/core/search/SearchServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/core/search/SearchServiceImpl.java index cd015c2f35..40e593a3cb 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/search/SearchServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/core/search/SearchServiceImpl.java @@ -41,9 +41,11 @@ import ubic.gemma.core.search.source.DatabaseSearchSource; import ubic.gemma.model.IdentifiableValueObject; import ubic.gemma.model.analysis.expression.ExpressionExperimentSet; +import ubic.gemma.model.association.phenotype.PhenotypeAssociation; import ubic.gemma.model.common.Identifiable; import ubic.gemma.model.common.description.BibliographicReference; import ubic.gemma.model.common.description.BibliographicReferenceValueObject; +import ubic.gemma.model.common.description.CharacteristicValueObject; import ubic.gemma.model.common.search.SearchSettings; import ubic.gemma.model.expression.BlacklistedEntity; import ubic.gemma.model.expression.BlacklistedValueObject; @@ -253,6 +255,8 @@ private void initializeSupportedResultTypes() { canConvertFromEntity( e.getKey(), e.getValue() ); canConvertFromId( e.getValue() ); } + // FIXME: remove this in the 1.32 series, we still allow selecting Phenotypes from the UI + supportedResultTypes.put( PhenotypeAssociation.class, CharacteristicValueObject.class ); } private void canConvertFromEntity( Class from, Class> to ) { From bb9954280ef049644079c551bdc7b54649dae878 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Sat, 30 Mar 2024 18:05:40 -0700 Subject: [PATCH 092/105] Few improvements for polling and cancelling tasks in OntologyService Add a fuzzy delay to prevent futures from all timing out at the same time. --- .../core/ontology/OntologyServiceImpl.java | 94 ++++++++++++------- 1 file changed, 61 insertions(+), 33 deletions(-) diff --git a/gemma-core/src/main/java/ubic/gemma/core/ontology/OntologyServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/core/ontology/OntologyServiceImpl.java index b3f28d918d..3bcaadbead 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/ontology/OntologyServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/core/ontology/OntologyServiceImpl.java @@ -18,6 +18,7 @@ */ package ubic.gemma.core.ontology; +import org.apache.commons.lang3.RandomUtils; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.time.StopWatch; import org.apache.commons.logging.Log; @@ -33,6 +34,7 @@ import org.springframework.core.task.SimpleAsyncTaskExecutor; import org.springframework.core.task.TaskExecutor; import org.springframework.stereotype.Service; +import org.springframework.util.Assert; import ubic.basecode.ontology.model.*; import ubic.basecode.ontology.providers.ExperimentalFactorOntologyService; import ubic.basecode.ontology.providers.ObiService; @@ -854,23 +856,18 @@ static Comparator getCharacteristicComparator( String */ @Nullable private T findFirst( Function function, String query ) { - StopWatch timer = StopWatch.createStarted(); List> futures = new ArrayList<>( ontologyServices.size() ); + List objects = new ArrayList<>( ontologyServices.size() ); ExecutorCompletionService completionService = new ExecutorCompletionService<>( taskExecutor ); for ( ubic.basecode.ontology.providers.OntologyService service : ontologyServices ) { if ( service.isOntologyLoaded() ) { futures.add( completionService.submit( () -> function.apply( service ) ) ); + objects.add( service ); } } try { for ( int i = 0; i < futures.size(); i++ ) { - Future future; - double timeout = 1000; - while ( ( future = completionService.poll( ( long ) timeout, TimeUnit.MILLISECONDS ) ) == null ) { - log.warn( String.format( "Ontology query for %s is taking too long (%d/%d completed so far, %s elapsed).", query, i, futures.size(), timer ) ); - timeout *= 1.5; // exponential backoff - } - T result = future.get(); + T result = pollCompletionService( completionService, "Finding first result for " + query, futures, objects, 1000, TimeUnit.MILLISECONDS, 1.5 ); if ( result != null ) { return result; } @@ -887,10 +884,7 @@ private T findFirst( Function future : futures ) { - future.cancel( true ); - } + cancelRemainingFutures( futures, objects ); } } @@ -945,24 +939,19 @@ private List combineInThreads( Function List combineInThreads( Function> work, List ontologyServices, String query ) { - StopWatch timer = StopWatch.createStarted(); List>> futures = new ArrayList<>( ontologyServices.size() ); + List objects = new ArrayList<>( ontologyServices.size() ); ExecutorCompletionService> completionService = new ExecutorCompletionService<>( taskExecutor ); for ( ubic.basecode.ontology.providers.OntologyService os : ontologyServices ) { if ( os.isOntologyLoaded() ) { futures.add( completionService.submit( () -> work.apply( os ) ) ); + objects.add( os ); } } List children = new ArrayList<>(); try { for ( int i = 0; i < futures.size(); i++ ) { - Future> future; - double timeout = 1000; - while ( ( future = completionService.poll( ( long ) timeout, TimeUnit.MILLISECONDS ) ) == null ) { - log.warn( String.format( "Ontology query for %s is taking too long (%d/%d completed so far, %s elapsed).", query, i, futures.size(), timer ) ); - timeout *= 1.5; // exponential backoff - } - children.addAll( future.get() ); + children.addAll( pollCompletionService( completionService, "Combining all the results for " + query, futures, objects, 1000, TimeUnit.MILLISECONDS, 1.5 ) ); } } catch ( InterruptedException e ) { log.warn( "Current thread was interrupted while waiting, will only return results collected so far.", e ); @@ -975,20 +964,59 @@ private List combineInThreads( Function incompleteTasks = new ArrayList<>( futures.size() ); - for ( Future> future : futures ) { - if ( !future.isDone() ) { - incompleteTasks.add( ontologyServices.get( futures.indexOf( future ) ).toString() ); - future.cancel( true ); - } - } - if ( !incompleteTasks.isEmpty() ) { - log.warn( "The following ontology services did not have time to reply:\n\t" - + String.join( "\n\t", incompleteTasks ) ); - } + cancelRemainingFutures( futures, objects ); } return children; } + + /** + * Poll the next available future from the given completion service. + * + * @param completionService the completion service to poll from + * @param description a description of the task being waited for logging purposes + * @param futures the list of futures being awaited + * @param objects the list of objects corresponding to the futures for logging purposes + * @param timeout the amount of time to wait for resolving the next available future + * @param exponentialBackoff if the future does not resolve within the timeout, increase it by the given amount + */ + private T pollCompletionService( ExecutorCompletionService completionService, String description, List> futures, List objects, long timeout, TimeUnit timeUnit, double exponentialBackoff ) throws InterruptedException, ExecutionException { + Assert.isTrue( futures.size() == objects.size(), "The number of futures must match the number of descriptive objects." ); + Assert.isTrue( exponentialBackoff >= 1.0, "Exponential backoff factor must be greater or equal to 1." ); + StopWatch timer = StopWatch.createStarted(); + Future future; + double timeoutMs = TimeUnit.MILLISECONDS.convert( timeout, timeUnit ); + // a fuzz factor to prevent concurrent tasks from all timing out at the same time + // up to 10% of the initial timeout + double fuzzyMs = RandomUtils.nextDouble( 0.0, timeoutMs / 10.0 ); + while ( ( future = completionService.poll( ( long ) timeoutMs, timeUnit ) ) == null ) { + long i = futures.stream().filter( Future::isDone ).count(); + log.warn( String.format( "%s is taking too long (%d/%d completed so far, %s elapsed). The following are still running:\n\t%s", + description, i, futures.size(), timer, futures.stream() + .filter( f -> !f.isDone() ) + .map( futures::indexOf ) + .map( objects::get ) + .map( Object::toString ) + .collect( Collectors.joining( "\n\t" ) ) ) ); + timeoutMs = ( timeoutMs + fuzzyMs ) * exponentialBackoff; + } + return future.get(); + } + + /** + * Cancel all the remaining futures, this way if an exception occur, we don't needlessly occupy threads in the pool. + */ + private void cancelRemainingFutures( List> futures, List objects ) { + Assert.isTrue( futures.size() == objects.size(), "The number of futures must match the number of descriptive objects." ); + List incompleteTasks = new ArrayList<>( futures.size() ); + for ( Future future : futures ) { + if ( !future.isDone() ) { + future.cancel( true ); + incompleteTasks.add( objects.get( futures.indexOf( future ) ).toString() ); + } + } + if ( !incompleteTasks.isEmpty() ) { + log.warn( "The following tasks did not have time to reply and were cancelled:\n\t" + + String.join( "\n\t", incompleteTasks ) ); + } + } } \ No newline at end of file From 697cad2ce421d4d8028f3877279f57a3ed166862 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Tue, 2 Apr 2024 11:01:43 -0700 Subject: [PATCH 093/105] Fix breaking hashCode() when altering a statement --- .../experiment/ExperimentalDesignController.java | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/gemma-web/src/main/java/ubic/gemma/web/controller/expression/experiment/ExperimentalDesignController.java b/gemma-web/src/main/java/ubic/gemma/web/controller/expression/experiment/ExperimentalDesignController.java index c2a2ed5942..e71d45f05d 100644 --- a/gemma-web/src/main/java/ubic/gemma/web/controller/expression/experiment/ExperimentalDesignController.java +++ b/gemma-web/src/main/java/ubic/gemma/web/controller/expression/experiment/ExperimentalDesignController.java @@ -773,7 +773,13 @@ public void updateFactorValueCharacteristics( FactorValueValueObject[] fvvos ) { Long charId = fvvo.getCharId(); // this is optional. Maybe we're actually adding a characteristic for the Statement c; if ( charId != null ) { - c = fv.getCharacteristics().stream().filter( s -> s.getId().equals( charId ) ).findFirst().orElseThrow( () -> new EntityNotFoundException( String.format( "No characteristic with ID %d in FactorValue with ID %d", charId, fvvo.getId() ) ) ); + c = fv.getCharacteristics().stream() + .filter( s -> s.getId().equals( charId ) ) + .findFirst() + .orElseThrow( () -> new EntityNotFoundException( String.format( "No characteristic with ID %d in FactorValue with ID %d", charId, fvvo.getId() ) ) ); + // updating the statement can alter its hashCode() and thus breaking the Set contract, we have to remove + // it and add it back before saving + fv.getCharacteristics().remove( c ); } else { c = Statement.Factory.newInstance(); } @@ -815,6 +821,10 @@ public void updateFactorValueCharacteristics( FactorValueValueObject[] fvvos ) { c.setSecondObjectUri( null ); } + if ( charId != null ) { + fv.getCharacteristics().add( c ); + } + fvs[i] = fv; statements[i] = c; } From 52e9d02087c7bdf48ed0dcd19ac7ccb22eb09043 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Thu, 28 Mar 2024 14:34:52 -0700 Subject: [PATCH 094/105] Numerous search and highlighting improvements Make parsing of term URI query more robust by only allowing a single and valid URI in the query string. Improve SearchResultSet capability to merge highlights. Highlights are always merged and replaced only if a better result is found. Make SearchResult almost completely immutable. Separate OntologyTerm-specific highlighting capabilities. Relax thresholds for warning to make logs more useful and include the database query when applicable. Support parsing of nested conjunctions and disjunctions in extractDnf(), allowing more complex expressions to be treated as long as they can trivially simplified to a DNF. --- .../gene/service/GeneSearchServiceImpl.java | 2 +- .../gemma/core/search/DefaultHighlighter.java | 9 +- .../core/search/FieldAwareSearchSource.java | 3 + .../ubic/gemma/core/search/Highlighter.java | 10 +- .../core/search/OntologyHighlighter.java | 21 + .../ubic/gemma/core/search/SearchResult.java | 150 ++- .../gemma/core/search/SearchResultSet.java | 66 +- .../gemma/core/search/SearchServiceImpl.java | 966 +++--------------- .../ubic/gemma/core/search/SearchSource.java | 17 +- .../lucene/LuceneParseSearchException.java | 24 +- .../core/search/lucene/LuceneQueryUtils.java | 183 ++-- .../search/source/CompositeSearchSource.java | 106 +- .../search/source/DatabaseSearchSource.java | 281 +++-- .../search/source/HibernateSearchSource.java | 5 + .../search/source/OntologySearchSource.java | 131 ++- .../model/common/search/SearchSettings.java | 77 +- .../search/SearchSettingsValueObject.java | 11 - .../model/common/search/package-info.java | 7 + .../core/search/SearchResultSetTest.java | 84 ++ .../gemma/core/search/SearchResultTest.java | 15 +- .../gemma/core/search/SearchServiceTest.java | 29 + .../search/SearchServiceVoConversionTest.java | 28 +- .../search/lucene/LuceneQueryUtilsTest.java | 42 +- .../source/DatabaseSearchSourceTest.java | 25 + .../source/OntologySearchSourceTest.java | 66 +- .../java/ubic/gemma/core/util/test/Maps.java | 23 + .../common/search/SearchSettingsTest.java | 42 +- .../ubic/gemma/rest/DatasetsWebService.java | 24 +- .../ubic/gemma/rest/SearchWebService.java | 10 +- .../swagger/resolver/CustomModelResolver.java | 44 +- .../rest/util/args/DatasetArgService.java | 2 + .../main/resources/openapi-configuration.yaml | 2 +- .../main/resources/restapidocs/CHANGELOG.md | 22 + .../restapidocs/fragments/QueryType.md | 17 + .../gemma/rest/AnnotationsWebServiceTest.java | 2 +- .../gemma/rest/DatasetsWebServiceTest.java | 2 +- .../ubic/gemma/rest/SearchWebServiceTest.java | 14 +- .../GeneralSearchControllerImpl.java | 11 +- 38 files changed, 1207 insertions(+), 1366 deletions(-) create mode 100644 gemma-core/src/main/java/ubic/gemma/core/search/OntologyHighlighter.java create mode 100644 gemma-core/src/main/java/ubic/gemma/model/common/search/package-info.java create mode 100644 gemma-core/src/test/java/ubic/gemma/core/search/SearchResultSetTest.java create mode 100644 gemma-core/src/test/java/ubic/gemma/core/util/test/Maps.java create mode 100644 gemma-rest/src/main/resources/restapidocs/fragments/QueryType.md diff --git a/gemma-core/src/main/java/ubic/gemma/core/genome/gene/service/GeneSearchServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/core/genome/gene/service/GeneSearchServiceImpl.java index 2d5eef3706..a361b3ba9a 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/genome/gene/service/GeneSearchServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/core/genome/gene/service/GeneSearchServiceImpl.java @@ -216,7 +216,7 @@ public Collection searchGenesAndGeneGroups( String qu // convert result object to a value object List> dbsgvo = taxonCheckedSets.stream() .filter( Objects::nonNull ) - .map( sr -> SearchResult.from( sr, geneSetValueObjectHelper.convertToValueObject( sr.getResultObject() ) ) ) + .map( sr -> sr.withResultObject( geneSetValueObjectHelper.convertToValueObject( sr.getResultObject() ) ) ) .collect( Collectors.toList() ); geneSets = SearchResultDisplayObject.convertSearchResults2SearchResultDisplayObjects( dbsgvo ); diff --git a/gemma-core/src/main/java/ubic/gemma/core/search/DefaultHighlighter.java b/gemma-core/src/main/java/ubic/gemma/core/search/DefaultHighlighter.java index 8ea89217a0..4991ff9981 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/search/DefaultHighlighter.java +++ b/gemma-core/src/main/java/ubic/gemma/core/search/DefaultHighlighter.java @@ -16,7 +16,7 @@ import java.util.Map; @CommonsLog -public class DefaultHighlighter implements LuceneHighlighter { +public class DefaultHighlighter implements LuceneHighlighter, OntologyHighlighter { private final Formatter formatter; @@ -28,9 +28,14 @@ public DefaultHighlighter( Formatter formatter ) { this.formatter = formatter; } + @Override + public Map highlight( String value, String field ) { + return Collections.singletonMap( field, value ); + } + @Override public Map highlightTerm( @Nullable String termUri, String termLabel, String field ) { - return Collections.emptyMap(); + return Collections.singletonMap( field, termLabel ); } @Override diff --git a/gemma-core/src/main/java/ubic/gemma/core/search/FieldAwareSearchSource.java b/gemma-core/src/main/java/ubic/gemma/core/search/FieldAwareSearchSource.java index 04eb0e85be..d1c584f991 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/search/FieldAwareSearchSource.java +++ b/gemma-core/src/main/java/ubic/gemma/core/search/FieldAwareSearchSource.java @@ -1,12 +1,15 @@ package ubic.gemma.core.search; +import org.apache.lucene.queryParser.QueryParser; import ubic.gemma.model.common.Identifiable; +import ubic.gemma.model.common.search.SearchSettings; import java.util.Set; /** * Search source that can retrieve results matching specific fields. * @author poirigui + * @see ubic.gemma.core.search.lucene.LuceneQueryUtils#parseSafely(SearchSettings, QueryParser) */ public interface FieldAwareSearchSource extends SearchSource { diff --git a/gemma-core/src/main/java/ubic/gemma/core/search/Highlighter.java b/gemma-core/src/main/java/ubic/gemma/core/search/Highlighter.java index 87c607f58e..c8efd6d5ea 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/search/Highlighter.java +++ b/gemma-core/src/main/java/ubic/gemma/core/search/Highlighter.java @@ -1,6 +1,5 @@ package ubic.gemma.core.search; -import javax.annotation.Nullable; import java.util.Map; /** @@ -11,12 +10,7 @@ public interface Highlighter { /** - * Produce a highlight for a given ontology term. - * - * @param termUri a URI for the term or null for a full-text term - * @param termLabel a label for the term - * @param field an object path through which the term was found - * @return a suitable highlight, or null if none is found + * Produce a highlight for a given field. */ - Map highlightTerm( @Nullable String termUri, String termLabel, String field ); + Map highlight( String value, String field ); } diff --git a/gemma-core/src/main/java/ubic/gemma/core/search/OntologyHighlighter.java b/gemma-core/src/main/java/ubic/gemma/core/search/OntologyHighlighter.java new file mode 100644 index 0000000000..c6408986af --- /dev/null +++ b/gemma-core/src/main/java/ubic/gemma/core/search/OntologyHighlighter.java @@ -0,0 +1,21 @@ +package ubic.gemma.core.search; + +import javax.annotation.Nullable; +import java.util.Map; + +/** + * Highlighter specialized for ontology terms. + * @author poirigui + */ +public interface OntologyHighlighter extends Highlighter { + + /** + * Produce a highlight for a given ontology term. + * + * @param termUri a URI for the term or null for a full-text term + * @param termLabel a label for the term + * @param field an object path through which the term was found + * @return a suitable highlight, or null if none is found + */ + Map highlightTerm( @Nullable String termUri, String termLabel, String field ); +} diff --git a/gemma-core/src/main/java/ubic/gemma/core/search/SearchResult.java b/gemma-core/src/main/java/ubic/gemma/core/search/SearchResult.java index 4e9a6e2bac..d075b76c05 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/search/SearchResult.java +++ b/gemma-core/src/main/java/ubic/gemma/core/search/SearchResult.java @@ -20,85 +20,59 @@ import lombok.Data; import lombok.EqualsAndHashCode; -import lombok.ToString; +import lombok.RequiredArgsConstructor; +import org.springframework.util.Assert; import ubic.gemma.model.common.Identifiable; import javax.annotation.Nonnull; import javax.annotation.Nullable; import java.util.Comparator; import java.util.Map; -import java.util.Objects; +import java.util.stream.Collectors; /** + * Represents an individual search result. + *

+ * Search result minimally have a type and ID and may have their result object populated at a later time via {@link #setResultObject(Identifiable)}. + *

+ * Results have a score and possibly number of highlights. Two results are considered equal if they have the same type + * and ID. You may use a {@link SearchResultSet} to combine results in a sensible way, retaining result objects and + * highlights when a better result is added. * @author paul + * @author poirigui + * @see SearchSource + * @see SearchResultSet */ @Data +@RequiredArgsConstructor @EqualsAndHashCode(of = { "resultType", "resultId" }) -@ToString(of = { "resultType", "resultId", "resultType", "highlights", "score", "source" }) public class SearchResult implements Comparable> { - /** - * Obtain a comparator for this search result. - *

- * Results are compared by {@link #getScore()} in descending order. Note that any search result can be compared - * regardless of their result type or result object. - */ - public static Comparator> getComparator() { - return Comparator.comparing( SearchResult::getScore, Comparator.reverseOrder() ); - } + private static final Comparator> COMPARATOR = Comparator.comparing( SearchResult::getScore, Comparator.reverseOrder() ); /** - * Create a search result whose result class differ from the object. + * Create a search result from a given identifiable entity. *

- * This can be useful if you wrap a proxy, or don't want to expose the object class publicly. + * The result can be cleared later with {@link #clearResultObject()}. */ public static SearchResult from( Class resultType, T entity, double score, @Nullable Map highlights, Object source ) { - if ( entity.getId() == null ) { - throw new IllegalArgumentException( "Entity ID cannot be null." ); - } + Assert.notNull( entity.getId(), "The entity ID cannot be null." ); SearchResult sr = new SearchResult<>( resultType, entity.getId(), score, highlights, source ); sr.setResultObject( entity ); return sr; } - /** - * Shorthand for {@link #from(Class, Identifiable, double, String, Object)} if you don't need to set the score and - * highlighted text. - */ - public static SearchResult from( Class resultType, T entity, double score, Object source ) { - if ( entity.getId() == null ) { - throw new IllegalArgumentException( "Entity ID cannot be null." ); - } - SearchResult sr = new SearchResult<>( resultType, entity.getId(), score, null, source ); - sr.setResultObject( entity ); - return sr; - } - /** * Create a new provisional search result with a result type and ID. + *

+ * The result can be set later with {@link #setResultObject(Identifiable)}. */ public static SearchResult from( Class resultType, long entityId, double score, @Nullable Map highlights, Object source ) { return new SearchResult<>( resultType, entityId, score, highlights, source ); } - public static SearchResult from( Class resultType, long entityId, double score, Object source ) { - return new SearchResult<>( resultType, entityId, score, null, source ); - } - /** - * Create a search result from an existing one, replacing the result object with the target one. - *

- * This is useful if you need to convert the result object (i.e. to a VO) while preserving the metadata (score, - * highlighted text, etc.). - */ - public static SearchResult from( SearchResult original, @Nullable T newResultObject ) { - SearchResult sr = new SearchResult<>( original.resultType, original.resultId, original.score, original.highlights, original.source ); - sr.setResultObject( newResultObject ); - return sr; - } - - /** - * Class of the result, immutable. + * Type of search result, immutable. */ private final Class resultType; @@ -110,7 +84,7 @@ public static SearchResult from( SearchResult ori /** * Result object this search result is referring to. *

- * This can be null, at least initially if the resultClass and objectId are provided. + * This can be null, at least initially if the resultType and resultId are provided. *

* It may also be replaced at a later time via {@link #setResultObject(Identifiable)}. */ @@ -118,17 +92,17 @@ public static SearchResult from( SearchResult ori private T resultObject; /** - * Highlights for this result. - *

- * Keys are fields of {@link T} and values are substrings that matched. + * Score for ranking this result among other results. */ - @Nullable - private Map highlights; + private final double score; /** - * Score for ranking this result among other results. + * Highlights for this result. + *

+ * Keys are fields of {@link T} and values are substrings that were matched. */ - private final double score; + @Nullable + private final Map highlights; /** * Object representing the source of this result object. @@ -137,23 +111,18 @@ public static SearchResult from( SearchResult ori */ private final Object source; - /** - * Placeholder for provisional search results. - *

- * This is used when the class and ID is known beforehand, but the result hasn't been retrieve yet from persistent - * storage. - */ - private SearchResult( Class entityClass, long entityId, double score, @Nullable Map highlights, Object source ) { - this.resultType = entityClass; - this.resultId = entityId; - this.score = score; - this.highlights = highlights; - this.source = source; + @Override + public int compareTo( SearchResult o ) { + return COMPARATOR.compare( this, o ); } @Override - public int compareTo( SearchResult o ) { - return getComparator().compare( this, o ); + public String toString() { + return String.format( "%s Id=%d Score=%.2f%s Source=%s %s", resultType.getSimpleName(), resultId, + score, + highlights != null ? " Highlights=" + highlights.keySet().stream().sorted().collect( Collectors.joining( "," ) ) : "", + source, + resultObject != null ? "[Not Filled]" : "[Filled]" ); } /** @@ -170,15 +139,44 @@ public Long getResultId() { /** * Set the result object. * - * @throws IllegalArgumentException if the provided result object IDs differs from {@link #getResultId()}. + * @throws IllegalArgumentException if the provided result object is null or if its ID differs from {@link #getResultId()}. */ - public void setResultObject( @Nullable T resultObject ) { - if ( resultObject != null && resultObject.getId() == null ) { - throw new IllegalArgumentException( "The result object ID cannot be null." ); + public void setResultObject( T resultObject ) { + Assert.notNull( resultObject, "The result object cannot be null, use clearResultObject() to unset it." ); + Assert.notNull( resultObject.getId(), "The result object ID cannot be null." ); + Assert.isTrue( resultObject.getId().equals( this.resultId ), "The result object cannot be replaced with one that has a different ID." ); + this.resultObject = resultObject; + } + + /** + * Clear the result object. + */ + public void clearResultObject() { + this.resultObject = null; + } + + /** + * Create a search result from an existing one, replacing the result object with the target one. + *

+ * The new result object does not have to be of the same type as the original result object. This is useful if you + * need to convert the result object (i.e. to a VO) while preserving the metadata (score, highlighted text, etc.). + */ + public SearchResult withResultObject( @Nullable S resultObject ) { + SearchResult searchResult = new SearchResult<>( resultType, resultId, score, highlights, source ); + if ( resultObject != null ) { + searchResult.setResultObject( resultObject ); } - if ( resultObject != null && !Objects.equals( resultObject.getId(), this.resultId ) ) { - throw new IllegalArgumentException( "The result object cannot be replaced with one that has a different ID." ); + return searchResult; + } + + /** + * Copy this search result with the given highlights. + */ + public SearchResult withHighlights( Map highlights ) { + SearchResult searchResult = new SearchResult<>( resultType, resultId, score, highlights, source ); + if ( resultObject != null ) { + searchResult.setResultObject( resultObject ); } - this.resultObject = resultObject; + return searchResult; } } diff --git a/gemma-core/src/main/java/ubic/gemma/core/search/SearchResultSet.java b/gemma-core/src/main/java/ubic/gemma/core/search/SearchResultSet.java index e46ac6b4c0..9a689e406a 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/search/SearchResultSet.java +++ b/gemma-core/src/main/java/ubic/gemma/core/search/SearchResultSet.java @@ -1,6 +1,7 @@ package ubic.gemma.core.search; import ubic.gemma.model.common.Identifiable; +import ubic.gemma.model.common.search.SearchSettings; import java.util.AbstractSet; import java.util.HashMap; @@ -12,15 +13,25 @@ *

* If a better result is added to the set, it replaces the existing one. If the original result had a non-null * {@link SearchResult#getResultObject()}, it is transferred over so that it won't need to be filled later on if needed. - * + *

+ * The collection also honor the {@link SearchSettings#getMaxResults()} value, rejecting any new result unless replacing + * an existing one. * @author poirigui */ public class SearchResultSet extends AbstractSet> { + private final SearchSettings settings; + private final Map, SearchResult> results; - public SearchResultSet() { - results = new HashMap<>(); + public SearchResultSet( SearchSettings settings ) { + this.settings = settings; + this.results = new HashMap<>(); + } + + public SearchResultSet( SearchSettings settings, int initialCapacity ) { + this.settings = settings; + this.results = new HashMap<>( initialCapacity ); } @Override @@ -36,22 +47,45 @@ public int size() { @Override public boolean add( SearchResult t ) { SearchResult previousValue = results.get( t ); - if ( previousValue == null || t.getScore() > previousValue.getScore() ) { - results.put( t, t ); - // retain the result object to avoid fetching it again in the future - if ( previousValue != null && previousValue.getResultObject() != null && t.getResultObject() == null ) { - t.setResultObject( previousValue.getResultObject() ); + if ( previousValue == t ) { + // no need to copy or merge anything if its the same object + return false; + } + SearchResult newValue; + boolean replaced; + if ( previousValue == null ) { + if ( settings.getMaxResults() > 0 && size() >= settings.getMaxResults() ) { + // max size reached and not replacing a previous value + return false; + } + newValue = t; + replaced = true; + } else { + if ( t.getScore() > previousValue.getScore() ) { + newValue = t; + replaced = true; + } else { + // new value is unchanged, so treat the passed argument as the previous value for copy-over purposes + newValue = previousValue; + previousValue = t; + replaced = false; + } + // copy-over the previous result object if necessary + if ( previousValue.getResultObject() != null && newValue.getResultObject() == null ) { + newValue = newValue.withResultObject( previousValue.getResultObject() ); } - // merge highlights - if ( previousValue != null && previousValue.getHighlights() != null ) { - Map mergedHighlights = new HashMap<>( previousValue.getHighlights() ); - if ( t.getHighlights() != null ) { - mergedHighlights.putAll( t.getHighlights() ); + // merge highlights if necessary + if ( previousValue.getHighlights() != null ) { + if ( newValue.getHighlights() != null ) { + Map mergedHighlights = new HashMap<>( previousValue.getHighlights() ); + mergedHighlights.putAll( newValue.getHighlights() ); + newValue = newValue.withHighlights( mergedHighlights ); + } else { + newValue = newValue.withHighlights( previousValue.getHighlights() ); } - t.setHighlights( mergedHighlights ); } - return true; } - return false; + results.put( newValue, newValue ); + return replaced; } } diff --git a/gemma-core/src/main/java/ubic/gemma/core/search/SearchServiceImpl.java b/gemma-core/src/main/java/ubic/gemma/core/search/SearchServiceImpl.java index 40e593a3cb..b8c17977c5 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/search/SearchServiceImpl.java +++ b/gemma-core/src/main/java/ubic/gemma/core/search/SearchServiceImpl.java @@ -24,7 +24,6 @@ import org.apache.commons.collections4.SetUtils; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.time.StopWatch; -import org.apache.commons.text.StringEscapeUtils; import org.springframework.beans.factory.InitializingBean; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Qualifier; @@ -36,9 +35,7 @@ import org.springframework.util.Assert; import org.springframework.util.LinkedMultiValueMap; import ubic.gemma.core.genome.gene.service.GeneSearchService; -import ubic.gemma.core.genome.gene.service.GeneService; import ubic.gemma.core.search.source.CompositeSearchSource; -import ubic.gemma.core.search.source.DatabaseSearchSource; import ubic.gemma.model.IdentifiableValueObject; import ubic.gemma.model.analysis.expression.ExpressionExperimentSet; import ubic.gemma.model.association.phenotype.PhenotypeAssociation; @@ -64,7 +61,6 @@ import ubic.gemma.model.genome.gene.GeneValueObject; import ubic.gemma.model.genome.sequenceAnalysis.BioSequenceValueObject; import ubic.gemma.persistence.service.expression.arrayDesign.ArrayDesignService; -import ubic.gemma.persistence.service.expression.experiment.BlacklistedEntityService; import ubic.gemma.persistence.service.genome.taxon.TaxonService; import ubic.gemma.persistence.util.EntityUtils; @@ -72,9 +68,9 @@ import java.util.*; import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; -import java.util.stream.Stream; -import static ubic.gemma.core.search.lucene.LuceneQueryUtils.*; +import static ubic.gemma.core.search.lucene.LuceneQueryUtils.extractTerms; +import static ubic.gemma.core.search.source.DatabaseSearchSource.NCBI_GENE_ID_URI_PREFIX; /** * This service is used for performing searches using free text or exact matches to items in the database. @@ -131,26 +127,12 @@ private void addAll( Collection> search } } - - /** - * Penalty hit for indirect hit (multiplicative). - *

- * For example, if a platform is matched by a gene hit (score = 1.0), the score will be multiplied by this penalty - * (score = 0.8 * 1.0 = 0.8). - */ - private static final double INDIRECT_HIT_PENALTY = 0.8; - - private static final String NCBI_GENE = "ncbi_gene"; - private final Map nameToTaxonMap = new LinkedHashMap<>(); /* sources */ @Autowired - @Qualifier("databaseSearchSource") - private SearchSource databaseSearchSource; - @Autowired - @Qualifier("hibernateSearchSource") - private SearchSource hibernateSearchSource; + private List searchSources; + @Autowired @Qualifier("ontologySearchSource") private SearchSource ontologySearchSource; @@ -161,12 +143,6 @@ private void addAll( Collection> search @Autowired private GeneSearchService geneSearchService; @Autowired - private GeneService geneService; - - // TODO: use services instead of DAO here - @Autowired - private BlacklistedEntityService blacklistedEntityService; - @Autowired private TaxonService taxonService; @Autowired @@ -179,16 +155,15 @@ private void addAll( Collection> search private final Map, Class>> supportedResultTypes = new HashMap<>(); /** - * A composite search source. + * A composite search source that combines all the search sources. */ - private SearchSource searchSource; + private CompositeSearchSource searchSource; @Override public Set getFields( Class resultType ) { - return Stream.of( databaseSearchSource, hibernateSearchSource, ontologySearchSource ) + return searchSources.stream() .filter( s -> s instanceof FieldAwareSearchSource ) - .map( s -> ( FieldAwareSearchSource ) s ) - .map( s -> s.getFields( resultType ) ) + .map( s -> ( ( FieldAwareSearchSource ) s ).getFields( resultType ) ) .flatMap( Set::stream ) .collect( Collectors.toSet() ); } @@ -205,16 +180,53 @@ public SearchResultMap search( SearchSettings settings ) throws SearchException StopWatch timer = StopWatch.createStarted(); - SearchResultMapImpl results; - if ( settings.isTermQuery() ) { - // we only attempt an ontology search if the uri looks remotely like a url. - results = this.ontologyUriSearch( settings ); - } else { - results = this.generalSearch( settings ); + // attempt to infer a taxon from the query if missing + if ( settings.getTaxon() == null ) { + settings = settings.withTaxon( inferTaxon( settings ) ); + } + + // If nothing to search return nothing. + if ( StringUtils.isBlank( settings.getQuery() ) ) { + return new SearchResultMapImpl(); + } + + // Get the top N results for each class. + SearchResultMapImpl results = new SearchResultMapImpl(); + // do gene first before we munge the query too much. + if ( settings.hasResultType( Gene.class ) ) { + results.addAll( this.geneSearch( settings ) ); + } + if ( settings.hasResultType( ExpressionExperiment.class ) ) { + results.addAll( this.expressionExperimentSearch( settings ) ); + } + if ( settings.hasResultType( CompositeSequence.class ) ) { + results.addAll( this.compositeSequenceSearch( settings ) ); + } + if ( settings.hasResultType( ArrayDesign.class ) ) { + results.addAll( searchSource.searchArrayDesign( settings ) ); + } + if ( settings.hasResultType( BioSequence.class ) ) { + results.addAll( searchSource.searchBioSequence( settings ) ); + } + if ( settings.hasResultType( Gene.class ) && settings.isUseGo() ) { + results.addAll( this.dbHitsToSearchResult( settings, Gene.class, geneSearchService.getGOGroupGenes( settings.getQuery(), settings.getTaxon() ), + 0.8, Collections.singletonMap( "GO Group", "From GO group" ), "GeneSearchService.getGOGroupGenes" ) ); + } + if ( settings.hasResultType( BibliographicReference.class ) ) { + results.addAll( searchSource.searchBibliographicReference( settings ) ); + } + if ( settings.hasResultType( GeneSet.class ) ) { + results.addAll( searchSource.searchGeneSet( settings ) ); + } + if ( settings.hasResultType( ExpressionExperimentSet.class ) ) { + results.addAll( searchSource.searchExperimentSet( settings ) ); + } + if ( settings.hasResultType( BlacklistedEntity.class ) ) { + results.addAll( searchSource.searchBlacklistedEntities( settings ) ); } if ( !settings.isFillResults() ) { - results.forEach( ( k, v ) -> v.forEach( sr -> sr.setResultObject( null ) ) ); + results.forEach( ( k, v ) -> v.forEach( SearchResult::clearResultObject ) ); } if ( !results.isEmpty() ) { @@ -222,7 +234,6 @@ public SearchResultMap search( SearchSettings settings ) throws SearchException } return results; - } /* @@ -236,7 +247,7 @@ public Set> getSupportedResultTypes() { @Override public void afterPropertiesSet() throws Exception { - searchSource = new CompositeSearchSource( Arrays.asList( databaseSearchSource, hibernateSearchSource, ontologySearchSource ) ); + searchSource = new CompositeSearchSource( searchSources ); initializeSupportedResultTypes(); this.initializeNameToTaxonMap(); } @@ -282,7 +293,7 @@ public > SearchResu // null sf a valid state if the original result is provisional, the converter is capable of retrieving the VO by ID T resultObject = searchResult.getResultObject(); //noinspection unchecked - return SearchResult.from( searchResult, ( U ) valueObjectConversionService.convert( + return searchResult.withResultObject( ( U ) valueObjectConversionService.convert( resultObject != null ? resultObject : searchResult.getResultId(), supportedResultTypes.get( searchResult.getResultType() ) ) ); } catch ( ConverterNotFoundException e ) { @@ -344,13 +355,15 @@ private List>> loadValueObject List>> resultsVo = new ArrayList<>( results.size() ); for ( SearchResult sr : results ) { if ( entityVosById.containsKey( sr.getResultId() ) ) { - resultsVo.add( SearchResult.from( sr, entityVosById.get( sr.getResultId() ) ) ); + IdentifiableValueObject newResultObject = entityVosById.get( sr.getResultId() ); + resultsVo.add( sr.withResultObject( newResultObject ) ); } else if ( sr.getResultObject() == null ) { // result was originally unfilled and nothing was found, so it's somewhat safe to restore it if ( sr.getHighlights() != null ) { resultsVo.add( SearchResult.from( sr.getResultType(), sr.getResultId(), sr.getScore(), sr.getHighlights(), sr.getSource() ) ); } else { - resultsVo.add( SearchResult.from( sr.getResultType(), sr.getResultId(), sr.getScore(), sr.getSource() ) ); + long entityId = sr.getResultId(); + resultsVo.add( SearchResult.from( sr.getResultType(), entityId, sr.getScore(), null, sr.getSource() ) ); } } else { // this happens if the VO was filtered out after VO conversion (i.e. via ACL) or uninitialized @@ -368,379 +381,10 @@ private List>> loadValueObject return resultsVo; } - /** - * Checks whether settings have the search genes flag and does the search if needed. - * - * @param results the results to which should any new results be accreted. - */ - private void accreteResultsGenes( LinkedHashSet> results, SearchSettings settings ) throws SearchException { - if ( settings.hasResultType( Gene.class ) ) { - Collection> genes = this.getGenesFromSettings( settings ); - results.addAll( genes ); - } - } - - /** - * Checks settings for all do-search flags, except for gene (see - * {@link #accreteResultsGenes(LinkedHashSet, SearchSettings)}), and does the search if needed. - * - * @param results the results to which should any new results be accreted. - * @param settings search settings - */ - private void accreteResultsOthers( LinkedHashSet> results, SearchSettings settings ) throws SearchException { - - Collection> blacklistedResults = new SearchResultSet<>(); - - if ( settings.hasResultType( ExpressionExperiment.class ) ) { - results.addAll( this.expressionExperimentSearch( settings, blacklistedResults ) ); - } - - Collection> compositeSequences = null; - if ( settings.hasResultType( CompositeSequence.class ) ) { - compositeSequences = this.compositeSequenceSearch( settings ); - results.addAll( compositeSequences ); - } - - if ( settings.hasResultType( ArrayDesign.class ) ) { - results.addAll( this.arrayDesignSearch( settings, compositeSequences, blacklistedResults ) ); - } - - if ( settings.hasResultType( BioSequence.class ) ) { - Collection> genes = this.getGenesFromSettings( settings ); - Collection> bioSequencesAndGenes = this.bioSequenceAndGeneSearch( settings, genes ); - - // split results so that accreteResults can be properly typed - - //noinspection unchecked - Collection> bioSequences = bioSequencesAndGenes.stream() - .filter( result -> BioSequence.class.equals( result.getResultType() ) ) - .map( result -> ( SearchResult ) result ) - .collect( Collectors.toSet() ); - results.addAll( bioSequences ); - - //noinspection unchecked - Collection> gen = bioSequencesAndGenes.stream() - .filter( result -> Gene.class.equals( result.getResultType() ) ) - .map( result -> ( SearchResult ) result ) - .collect( Collectors.toSet() ); - results.addAll( gen ); - } - - if ( settings.hasResultType( Gene.class ) && settings.isUseGo() ) { - // FIXME: add support for OR, but there's a bug in baseCode that prevents this https://github.com/PavlidisLab/baseCode/issues/22 - String query = settings.getQuery().replaceAll( "\\s+OR\\s+", "" ); - results.addAll( this.dbHitsToSearchResult( - Gene.class, geneSearchService.getGOGroupGenes( query, settings.getTaxon() ), 0.8, Collections.singletonMap( "GO Group", "From GO group" ), "GeneSearchService.getGOGroupGenes" ) ); - } - - if ( settings.hasResultType( BibliographicReference.class ) ) { - results.addAll( this.searchSource.searchBibliographicReference( settings ) ); - } - - if ( settings.hasResultType( GeneSet.class ) ) { - results.addAll( this.geneSetSearch( settings ) ); - } - - if ( settings.hasResultType( ExpressionExperimentSet.class ) ) { - results.addAll( this.experimentSetSearch( settings ) ); - } - - if ( settings.hasResultType( BlacklistedEntity.class ) ) { - results.addAll( blacklistedResults ); - } - } - - // /** - // * Convert biomaterial hits into their associated ExpressionExperiments - // * - // * @param results will go here - // * @param biomaterials - // */ - // private void addEEByBiomaterials( Collection results, Map biomaterials ) { - // if ( biomaterials.size() == 0 ) { - // return; - // } - // Map ees = expressionExperimentService - // .findByBioMaterials( biomaterials.keySet() ); - // for ( ExpressionExperiment ee : ees.keySet() ) { - // SearchResult searchResult = biomaterials.get( ees.get( ee ) ); - // results.add( new SearchResult( ee, searchResult.getScore() * SearchServiceImpl.INDIRECT_DB_HIT_PENALTY, - // searchResult.getHighlightedText() + " (BioMaterial characteristic)" ) ); - // } - // } - // - // /** - // * Convert biomaterial hits into their associated ExpressionExperiments - // * - // * @param results will go here - // * @param biomaterials - // */ - // private void addEEByBiomaterialIds( Collection results, Map biomaterials ) { - // if ( biomaterials.size() == 0 ) { - // return; - // } - // Map ees = expressionExperimentService - // .findByBioMaterialIds( biomaterials.keySet() ); - // for ( ExpressionExperiment ee : ees.keySet() ) { - // SearchResult searchResult = biomaterials.get( ees.get( ee ) ); - // results.add( new SearchResult( ee, searchResult.getScore() * SearchServiceImpl.INDIRECT_DB_HIT_PENALTY, - // searchResult.getHighlightedText() + " (BioMaterial characteristic)" ) ); - // } - // } - // - // /** - // * Convert factorValue hits into their associated ExpressionExperiments - // * - // * @param results will go here - // * @param factorValues - // */ - // private void addEEByFactorvalueIds( Collection results, Map factorValues ) { - // if ( factorValues.size() == 0 ) { - // return; - // } - // Map ees = expressionExperimentService - // .findByFactorValueIds( factorValues.keySet() ); - // for ( ExpressionExperiment ee : ees.keySet() ) { - // SearchResult searchResult = factorValues.get( ees.get( ee ) ); - // results.add( new SearchResult( ee, searchResult.getScore() * SearchServiceImpl.INDIRECT_DB_HIT_PENALTY, - // searchResult.getHighlightedText() + " (FactorValue characteristic)" ) ); - // } - // - // } - // - // /** - // * Convert factorValue hits into their associated ExpressionExperiments - // * - // * @param results will go here - // * @param factorValues - // */ - // private void addEEByFactorvalues( Collection results, Map factorValues ) { - // if ( factorValues.size() == 0 ) { - // return; - // } - // Map ees = expressionExperimentService - // .findByFactorValues( factorValues.keySet() ); - // for ( ExpressionExperiment ee : ees.keySet() ) { - // SearchResult searchResult = factorValues.get( ees.get( ee ) ); - // results.add( new SearchResult( ee, searchResult.getScore() * SearchServiceImpl.INDIRECT_DB_HIT_PENALTY, - // searchResult.getHighlightedText() + " (FactorValue characteristic)" ) ); - // } - // - // } - - private void addTerms( Taxon taxon, String taxonName ) { - String[] terms; - if ( StringUtils.isNotBlank( taxonName ) ) { - terms = taxonName.split( "\\s+" ); - // Only continue for multi-word - if ( terms.length > 1 ) { - for ( String s : terms ) { - if ( !nameToTaxonMap.containsKey( s.trim().toLowerCase() ) ) { - nameToTaxonMap.put( s.trim().toLowerCase(), taxon ); - } - } - } - } - } - - private Collection> experimentSetSearch( SearchSettings settings ) throws SearchException { - return searchSource.searchExperimentSet( settings ); - } - - /** - * A general search for array designs. - * This search does both an database search and a compass search. This is also contains an underlying - * {@link CompositeSequence} search, returning the {@link ArrayDesign} collection for the given composite sequence - * search string (the returned collection of array designs does not contain duplicates). - * - * @param probeResults Collection of results from a previous CompositeSequence search. Can be null; otherwise used - * to avoid a second search for probes. The array designs for the probes are added to the final - * results. - */ - private Collection> arrayDesignSearch( SearchSettings settings, - @Nullable Collection> probeResults, Collection> blacklistedResults ) throws SearchException { - - StopWatch watch = StopWatch.createStarted(); - String searchString = prepareDatabaseQuery( settings ); - Collection> results = new SearchResultSet<>(); - - ArrayDesign shortNameResult = arrayDesignService.findByShortName( searchString ); - if ( shortNameResult != null ) { - results.add( SearchResult.from( ArrayDesign.class, shortNameResult, DatabaseSearchSource.MATCH_BY_SHORT_NAME_SCORE, "ArrayDesignService.findByShortName" ) ); - return results; - } - - Collection nameResult = arrayDesignService.findByName( searchString ); - if ( nameResult != null && !nameResult.isEmpty() ) { - for ( ArrayDesign ad : nameResult ) { - results.add( SearchResult.from( ArrayDesign.class, ad, DatabaseSearchSource.MATCH_BY_NAME_SCORE, "ArrayDesignService.findByShortName" ) ); - } - return results; - } - - if ( settings.hasResultType( BlacklistedEntity.class ) ) { - BlacklistedEntity b = blacklistedEntityService.findByAccession( searchString ); - if ( b != null ) { - blacklistedResults.add( SearchResult.from( BlacklistedEntity.class, b, DatabaseSearchSource.MATCH_BY_ACCESSION_SCORE, null, "BlacklistedEntityService.findByAccession" ) ); - return results; - } - } - - Collection altNameResults = arrayDesignService.findByAlternateName( searchString ); - for ( ArrayDesign arrayDesign : altNameResults ) { - results.add( SearchResult.from( ArrayDesign.class, arrayDesign, 0.9, "ArrayDesignService.findByAlternateName" ) ); - } - - Collection manufacturerResults = arrayDesignService.findByManufacturer( searchString ); - for ( ArrayDesign arrayDesign : manufacturerResults ) { - results.add( SearchResult.from( ArrayDesign.class, arrayDesign, 0.9, "ArrayDesignService.findByManufacturer" ) ); - } - - /* - * FIXME: add merged platforms and subsumers - */ - results.addAll( searchSource.searchArrayDesign( settings ) ); - - watch.stop(); - if ( watch.getTime() > 1000 ) - SearchServiceImpl.log.warn( "Array Design search for " + settings + " took " + watch.getTime() + " ms" ); - - return results; - } - - /** - * @param previousGeneSearchResults Can be null, otherwise used to avoid a second search for genes. The biosequences - * for the genes are added to the final results. - */ - private Collection> bioSequenceAndGeneSearch( SearchSettings settings, - Collection> previousGeneSearchResults ) throws SearchException { - StopWatch watch = StopWatch.createStarted(); - - Collection> searchResults = searchSource.searchBioSequenceAndGene( settings, previousGeneSearchResults ); - - watch.stop(); - if ( watch.getTime() > 1000 ) - SearchServiceImpl.log - .warn( "Biosequence search for " + settings + " took " + watch.getTime() + " ms " + searchResults - .size() + " results." ); - - return searchResults; - } - - /** - * Search via characteristics i.e. ontology terms. - *

- * This is an important type of search but also a point of performance issues. Searches for "specific" terms are - * generally not a big problem (yielding less than 100 results); searches for "broad" terms can return numerous - * (thousands) - * results. - */ - private Collection> characteristicEESearch( final SearchSettings settings ) throws SearchException { - - Collection> results = new SearchResultSet<>(); - - StopWatch watch = StopWatch.createStarted(); - - log.debug( "Starting EE search for " + settings ); - /* - * Note that the AND is applied only within one entity type. The fix would be to apply AND at this - * level. - * - * The tricky part here is if the user has entered a boolean query. If they put in Parkinson's disease AND - * neuron, then we want to eventually return entities that are associated with both. We don't expect to find - * single characteristics that match both. - * - * But if they put in Parkinson's disease we don't want to do two queries. - */ - Set> subclauses = extractDnf( settings ); - - for ( Set subclause : subclauses ) { - Collection> classResults = this.characteristicEESearchWithChildren( settings, subclause ); - if ( !classResults.isEmpty() ) { - log.debug( "... Found " + classResults.size() + " EEs matching " + String.join( " OR ", subclause ) ); - } - results.addAll( classResults ); - } - - SearchServiceImpl.log.debug( String.format( "ExpressionExperiment search: %s -> %d characteristic-based hits %d ms", - settings, results.size(), watch.getTime() ) ); - - return results; - - } - - /** - * Search for the Experiment query in ontologies, including items that are associated with children of matching - * query terms. That is, 'brain' should return entities tagged as 'hippocampus'. It can handle AND in searches, so - * Parkinson's - * AND neuron finds items tagged with both of those terms. The use of OR is handled by the caller. - * - * @param settings search settings - * @return SearchResults of Experiments - */ - private Collection> characteristicEESearchWithChildren( SearchSettings settings, Set subparts ) throws SearchException { - StopWatch watch = StopWatch.createStarted(); - - // we would have to first deal with the separate queries, and then apply the logic. - Collection> allResults = new SearchResultSet<>(); - - SearchServiceImpl.log.debug( "Starting characteristic search for: " + settings ); - for ( String rawTerm : subparts ) { - String trimmed = StringUtils.strip( rawTerm ); - if ( StringUtils.isBlank( trimmed ) ) { - continue; - } - Collection> subqueryResults = ontologySearchSource.searchExpressionExperiment( settings.withQuery( trimmed ) ); - if ( allResults.isEmpty() ) { - allResults.addAll( subqueryResults ); - } else { - // this is our Intersection operation. - allResults.retainAll( subqueryResults ); - - // aggregate the highlighted text. - Map, String> highlights = new HashMap<>(); - for ( SearchResult sqr : subqueryResults ) { - if ( sqr.getHighlights() != null && sqr.getHighlights().containsKey( "term" ) ) { - highlights.put( sqr, sqr.getHighlights().get( "term" ) ); - } - } - - for ( SearchResult ar : allResults ) { - String k = highlights.get( ar ); - if ( StringUtils.isNotBlank( k ) ) { - if ( ar.getHighlights() != null ) { - if ( StringUtils.isBlank( ar.getHighlights().get( "term" ) ) ) { - ar.getHighlights().put( "term", k ); - } else { - ar.getHighlights().compute( "term", ( z, t ) -> t + "
" + k ); - } - } else { - ar.setHighlights( Collections.singletonMap( "term", k ) ); - } - } - } - } - - if ( watch.getTime() > 1000 ) { - SearchServiceImpl.log.warn( "Characteristic EE search for '" + rawTerm + "': " + allResults.size() - + " hits retained so far; " + watch.getTime() + "ms" ); - watch.reset(); - watch.start(); - } - - if ( isFilled( allResults, settings ) ) { - return allResults; - } - } - - return allResults; - - } - /** * Search by name of the composite sequence as well as gene. */ - private Collection> compositeSequenceSearch( SearchSettings settings ) throws SearchException { + private SearchResultSet compositeSequenceSearch( SearchSettings settings ) throws SearchException { StopWatch watch = StopWatch.createStarted(); @@ -750,12 +394,12 @@ private Collection> compositeSequenceSearch( Sea */ // Skip compass searching of composite sequences because it only bloats the results. - Collection> compositeSequenceResults = new HashSet<>( this.searchSource.searchCompositeSequenceAndGene( settings ) ); + Collection> compositeSequenceResults = this.searchSource.searchCompositeSequenceAndGene( settings ); /* * This last step is needed because the compassSearch for compositeSequences returns bioSequences too. */ - Collection> finalResults = new SearchResultSet<>(); + SearchResultSet finalResults = new SearchResultSet<>( settings ); for ( SearchResult sr : compositeSequenceResults ) { if ( CompositeSequence.class.equals( sr.getResultType() ) ) { //noinspection unchecked @@ -764,93 +408,19 @@ private Collection> compositeSequenceSearch( Sea } watch.stop(); - if ( watch.getTime() > 1000 ) - SearchServiceImpl.log - .warn( "Composite sequence search for " + settings + " took " + watch.getTime() + " ms, " - + finalResults.size() + " results." ); + if ( watch.getTime() > 1000 ) { + SearchServiceImpl.log.warn( String.format( "Composite sequence search for %s took %d ms, %d results.", + settings, watch.getTime(), finalResults.size() ) ); + } return finalResults; } - // private List convertEntitySearchResutsToValueObjectsSearchResults( - // Collection searchResults ) { - // List convertedSearchResults = new ArrayList<>(); - // StopWatch t = this.startTiming(); - // for ( SearchResult searchResult : searchResults ) { - // // this is a special case ... for some reason. - // if ( BioSequence.class.equals( searchResult.getResultClass() ) ) { - // SearchResult convertedSearchResult = new SearchResult( BioSequenceValueObject - // .fromEntity( bioSequenceService.thaw( ( BioSequence ) searchResult.getResultObject() ) ), - // searchResult.getScore(), searchResult.getHighlightedText() ); - // convertedSearchResults.add( convertedSearchResult ); - // } else { - // convertedSearchResults.add( searchResult ); - // } - // } - // if ( t.getTime() > 500 ) { - // log.info( "Conversion of " + searchResults.size() + " search results: " + t.getTime() + "ms" ); - // } - // return convertedSearchResults; - // } - - // /** - // * Takes a list of ontology terms, and classes of objects of interest to be returned. Looks through the - // * characteristic table for an exact match with the given ontology terms. Only tries to match the uri's. - // * - // * @param classes Class of objects to restrict the search to (typically ExpressionExperiment.class, for - // * example). - // * @param terms A list of ontology terms to search for - // * @return Collection of search results for the objects owning the found characteristics, where the owner is - // * of - // * class clazz - // */ - // private Collection databaseCharacteristicExactUriSearchForOwners( Collection> classes, - // Collection terms ) { - // - // // Collection characteristicValueMatches = new ArrayList(); - // Collection characteristicURIMatches = new ArrayList<>(); - // - // for ( OntologyTerm term : terms ) { - // // characteristicValueMatches.addAll( characteristicService.findByValue( term.getUri() )); - // characteristicURIMatches.addAll( characteristicService.findByUri( classes, term.getUri() ) ); - // } - // - // Map parentMap = characteristicService.getParents( classes, characteristicURIMatches ); - // // parentMap.putAll( characteristicService.getParents(characteristicValueMatches ) ); - // - // return this.filterCharacteristicOwnersByClass( classes, parentMap ); - // } - - // /** - // * Convert characteristic hits from database searches into SearchResults. - // * @param entities map of classes to characteristics e.g. Experiment.class -> annotated characteristics - // * @param matchText used in highlighting - // * - // * FIXME we need the ID of the annotated object if we do it this way - // */ - // private Collection dbCharacteristicHitsToSearchResultByClass( Map, Collection> entities, - // String matchText ) { - // // return this.dbHitsToSearchResult( entities, null, matchText ); - // - // List results = new ArrayList<>(); - // for ( Class clazz : entities.keySet() ) { - // - // for ( Characteristic c : entities.get( clazz ) ) { - // SearchResult esr = new SearchResult(clazz, /*ID NEEDED*/ , 1.0, matchText ); - // - // results.add( esr ); - // } - // - // } - // return results; - // - // } - /** * Convert hits from database searches into SearchResults. */ - private Collection> dbHitsToSearchResult( Class entityClass, Collection entities, double score, Map highlights, String source ) { + private SearchResultSet dbHitsToSearchResult( SearchSettings settings, Class entityClass, Collection entities, double score, Map highlights, String source ) { StopWatch watch = StopWatch.createStarted(); - List> results = new ArrayList<>( entities.size() ); + SearchResultSet results = new SearchResultSet<>( settings, entities.size() ); for ( T e : entities ) { if ( e == null ) { if ( log.isDebugEnabled() ) @@ -868,17 +438,6 @@ private Collection> dbHitsToSearchResul return results; } - // private void debugParentFetch( Map parentMap ) { - // /* - // * This is purely debugging. - // */ - // if ( parentMap.size() > 0 ) { - // if ( SearchServiceImpl.log.isDebugEnabled() ) - // SearchServiceImpl.log.debug( "Found " + parentMap.size() + " owners for " + parentMap.keySet().size() - // + " characteristics:" ); - // } - // } - /** * A key method for experiment search. This search does both an database search and a compass search, and looks at * several different associations. To allow maximum flexibility, we try not to limit the number of results here (it @@ -891,75 +450,45 @@ private Collection> dbHitsToSearchResul * SearchSettings.DEFAULT_MAX_RESULTS_PER_RESULT_TYPE * @return {@link Collection} of SearchResults */ - private Collection> expressionExperimentSearch( final SearchSettings settings, Collection> blacklistedResults ) throws SearchException { + private SearchResultSet expressionExperimentSearch( final SearchSettings settings ) throws SearchException { StopWatch totalTime = StopWatch.createStarted(); StopWatch watch = StopWatch.createStarted(); SearchServiceImpl.log.debug( ">>>>> Starting search for " + settings ); - Set> results = new SearchResultSet(); + SearchResultSet results = new SearchResultSet<>( settings ); // searches for GEO names, etc - "exact" matches. - if ( settings.isUseDatabase() ) { - results.addAll( this.searchSource.searchExpressionExperiment( settings ) ); - if ( watch.getTime() > 500 ) - SearchServiceImpl.log - .info( "Expression Experiment database search for " + settings + " took " + watch.getTime() - + " ms, " + results.size() + " hits." ); - - /* - * If we get results here, probably we want to just stop immediately, because the user is searching for - * something exact. In response to https://github.com/PavlidisLab/Gemma/issues/140 we continue if the user - * has admin status. - */ - if ( !results.isEmpty() && !SecurityUtil.isUserAdmin() ) { - return results; - } - - if ( settings.hasResultType( BlacklistedEntity.class ) ) { - BlacklistedEntity b = blacklistedEntityService.findByAccession( prepareDatabaseQuery( settings ) ); - if ( b != null ) { - blacklistedResults.add( SearchResult.from( BlacklistedEntity.class, b, DatabaseSearchSource.MATCH_BY_ACCESSION_SCORE, null, "BlacklistedEntityService.findByAccession" ) ); - return results; - } - } + results.addAll( searchSource.searchExpressionExperiment( settings ) ); + if ( watch.getTime() > 1000 ) + SearchServiceImpl.log.warn( String.format( "Expression Experiment database search for %s took %d ms, %d hits.", + settings, watch.getTime(), results.size() ) ); - watch.reset(); - watch.start(); + // in fast mode, stop now + if ( settings.getMode().equals( SearchSettings.SearchMode.FAST ) ) { + return results; } + /* + * If we get results here, probably we want to just stop immediately, because the user is searching for + * something exact. In response to https://github.com/PavlidisLab/Gemma/issues/140 we continue if the user + * has admin status. + */ + // special case: search for experiments associated with genes - Collection> geneHits = this.geneSearch( settings.withMode( SearchSettings.SearchMode.FAST ) ); - if ( geneHits.size() > 0 ) { - // TODO: make sure this is being hit correctly. + // this is achieved by crafting a URI with the NCBI gene id + if ( results.isEmpty() || settings.getMode().equals( SearchSettings.SearchMode.ACCURATE ) || SecurityUtil.isUserAdmin() ) { + SearchResultSet geneHits = this.geneSearch( settings.withMode( SearchSettings.SearchMode.FAST ) ); for ( SearchResult gh : geneHits ) { Gene g = gh.getResultObject(); - if ( g == null ) { + if ( g == null || g.getNcbiGeneId() == null ) { continue; } - Integer ncbiGeneId = g.getNcbiGeneId(); - String geneUri = "http://" + NCBI_GENE + "/" + ncbiGeneId; // this is just enough to fool the search into looking by NCBI ID, but check working as expected - SearchSettings gss = SearchSettings.expressionExperimentSearch( geneUri ); - gss.setMaxResults( settings.getMaxResults() ); - gss.setTaxon( settings.getTaxon() ); - gss.setQuery( geneUri ); - // FIXME: there should be a nicer, typed way of doing ontology searches - results.addAll( ontologyUriSearch( gss ).getByResultObjectType( ExpressionExperiment.class ) ); + results.addAll( ontologySearchSource.searchExpressionExperiment( settings.withQuery( NCBI_GENE_ID_URI_PREFIX + g.getNcbiGeneId() ) ) ); } } - // fancy search that uses ontologies to infer related terms - if ( settings.isUseCharacteristics() ) { - results.addAll( this.characteristicEESearch( settings ) ); - if ( watch.getTime() > 500 ) - SearchServiceImpl.log - .warn( String.format( "Expression Experiment search via characteristics for %s took %d ms, %d hits.", - settings, watch.getTime(), results.size() ) ); - watch.reset(); - watch.start(); - } - /* * this should be unnecessary we we hit bibrefs in our regular lucene-index search. Also as written, this is * very slow @@ -1000,139 +529,55 @@ private Collection> expressionExperimentSearc * we may want to move this sooner, but we don't want to slow down the process if they are not searching by * array design */ - if ( results.isEmpty() ) { + if ( results.isEmpty() || settings.getMode().equals( SearchSettings.SearchMode.ACCURATE ) || SecurityUtil.isUserAdmin() ) { watch.reset(); watch.start(); - Collection> matchingPlatforms = this.arrayDesignSearch( settings, null, blacklistedResults ); + Collection> matchingPlatforms = searchSource.searchArrayDesign( settings ); for ( SearchResult adRes : matchingPlatforms ) { ArrayDesign ad = adRes.getResultObject(); if ( ad != null ) { Collection expressionExperiments = this.arrayDesignService .getExpressionExperiments( ad ); - if ( expressionExperiments.size() > 0 ) - results.addAll( this.dbHitsToSearchResult( ExpressionExperiment.class, expressionExperiments, + if ( !expressionExperiments.isEmpty() ) + results.addAll( this.dbHitsToSearchResult( settings, ExpressionExperiment.class, expressionExperiments, 0.8, Collections.singletonMap( "arrayDesign", ad.getShortName() + " - " + ad.getName() ), String.format( "ArrayDesignService.getExpressionExperiments(%s)", ad ) ) ); } } - if ( watch.getTime() > 500 ) + if ( watch.getTime() > 1000 ) { SearchServiceImpl.log.warn( String.format( "Expression Experiment platform search for %s took %d ms, %d hits.", settings, watch.getTime(), results.size() ) ); - - if ( !results.isEmpty() ) { - return results; } } - if ( !settings.isFillResults() ) { - results.forEach( sr -> sr.setResultObject( null ) ); - } - String message = String.format( ">>>>>>> Expression Experiment search for %s took %d ms, %d hits.", settings, totalTime.getTime(), results.size() ); - if ( totalTime.getTime() > 500 ) { + if ( totalTime.getTime() > 1000 ) { SearchServiceImpl.log.warn( message ); } else { SearchServiceImpl.log.debug( message ); } return results; - - } - - // /** - // * - // * @param classes - // * @param characteristic2entity - // * @return - // */ - // private Collection filterCharacteristicOwnersByClass( Map, Collection> parents, String uri, String value ) { - // - // StopWatch t = this.startTiming(); - // Map biomaterials = new HashMap<>(); - // Map factorValues = new HashMap<>(); - // Collection results = new HashSet<>(); - // - // for ( Class clazz : parents.keySet() ) { - // for ( Long id : parents.get( clazz ) ) { - // String matchedText; - // - // if ( StringUtils.isNotBlank( uri ) ) { - // matchedText = "Tagged term: " + value + ""; - // } else { - // matchedText = "Free text: " + value; - // } - // - // if ( clazz.isAssignableFrom( BioMaterial.class ) ) { - // biomaterials.put( id, new SearchResult( clazz, id, 1.0, matchedText ) ); - // } else if ( clazz.isAssignableFrom( FactorValue.class ) ) { - // factorValues.put( id, new SearchResult( clazz, id, 1.0, matchedText ) ); - // } else if ( clazz.isAssignableFrom( ExpressionExperiment.class ) ) { - // results.add( new SearchResult( clazz, id, 1.0, matchedText ) ); - // } else { - // throw new IllegalStateException(); - // } - // } - // - // } - // - // this.addEEByFactorvalueIds( results, factorValues ); - // - // this.addEEByBiomaterialIds( results, biomaterials ); - // - // if ( t.getTime() > 500 ) { - // log.info( "Retrieving experiments associated with characteristics: " + t.getTime() + "ms" ); - // } - // - // return results; - // - // } - - /** - * Makes no attempt at resolving the search query as a URI. Will tokenize the search query if there are control - * characters in the String. URI's will get parsed into multiple query terms and lead to bad results. - *

- * Will try to resolve general terms like brain --> to appropriate OntologyTerms and search for objects tagged with - * those terms (if isUseCharacte = true) - */ - private SearchResultMapImpl generalSearch( SearchSettings settings ) throws SearchException { - // If nothing to search return nothing. - if ( StringUtils.isBlank( settings.getQuery() ) ) { - return new SearchResultMapImpl(); - } - - // attempt to infer a taxon from the query if missing - if ( settings.getTaxon() == null ) { - settings.setTaxon( inferTaxon( settings ) ); - } - - LinkedHashSet> rawResults = new LinkedHashSet<>(); - - // do gene first before we munge the query too much. - this.accreteResultsGenes( rawResults, settings ); - - this.accreteResultsOthers( - rawResults, - settings ); - - return groupAndSortResultsByType( rawResults, settings ); } /** * Combines compass style search, the db style search, and the compositeSequence search and returns 1 combined list * with no duplicates. */ - private Collection> geneSearch( final SearchSettings settings ) throws SearchException { + private SearchResultSet geneSearch( final SearchSettings settings ) throws SearchException { StopWatch watch = StopWatch.createStarted(); - Collection> geneDbList = this.searchSource.searchGene( settings ); + SearchResultSet combinedGeneList = new SearchResultSet<>( settings ); - if ( settings.getMode() == SearchSettings.SearchMode.FAST && geneDbList.size() > 0 ) { - return geneDbList; - } + combinedGeneList.addAll( this.searchSource.searchGene( settings ) ); - Set> combinedGeneList = new HashSet<>( geneDbList ); + // stop here in the fast search mode + if ( settings.getMode() == SearchSettings.SearchMode.FAST ) { + return combinedGeneList; + } - if ( combinedGeneList.isEmpty() ) { + // expand the search by including probes-associated genes + if ( combinedGeneList.isEmpty() || settings.getMode().equals( SearchSettings.SearchMode.ACCURATE ) ) { Collection> geneCsList = this.searchSource.searchCompositeSequenceAndGene( settings ); for ( SearchResult res : geneCsList ) { if ( Gene.class.equals( res.getResultType() ) ) @@ -1141,81 +586,12 @@ private Collection> geneSearch( final SearchSettings settings } } - if ( watch.getTime() > 1000 ) - SearchServiceImpl.log - .warn( "Gene search for " + settings + " took " + watch.getTime() + " ms; " + combinedGeneList - .size() + " results." ); - - return combinedGeneList; - } - - private Collection> geneSetSearch( SearchSettings settings ) throws SearchException { - return searchSource.searchGeneSet( settings ); - } - - // /** - // * Given classes to search and characteristics (experiment search) - // * - // * @param classes Which classes of entities to look for - // */ - // private Collection getAnnotatedEntities( Collection> classes, - // Collection cs ) { - // - // // time-critical - // Map characteristic2entity = characteristicService.getParents( classes, cs ); - // Collection matchedEntities = this - // .filterCharacteristicOwnersByClass( classes, characteristic2entity ); - // - // if ( SearchServiceImpl.log.isDebugEnabled() ) { - // this.debugParentFetch( characteristic2entity ); - // } - // return matchedEntities; - // } - - /** - * @return a collection of SearchResults holding all the genes resulting from the search with given SearchSettings. - */ - private Collection> getGenesFromSettings( SearchSettings settings ) throws SearchException { - Collection> genes = null; - if ( settings.hasResultType( Gene.class ) ) { - genes = this.geneSearch( settings ); - } - return genes; - } - - // /** - // * @return List of ids for the entities held by the search results. - // */ - // private List getIds( List searchResults ) { - // List list = new ArrayList<>(); - // for ( SearchResult r : searchResults ) { - // list.add( r.getId() ); - // } - // assert list.size() == searchResults.size(); - // return list; - // } - - /** - * Group and sort results by type. - * - * @return map of result entity class (e.g. BioSequence or ExpressionExperiment) to SearchResult - * @see SearchResult#getResultType() - */ - private static SearchResultMapImpl groupAndSortResultsByType( - LinkedHashSet> rawResults, - SearchSettings settings ) { - - SearchResultMapImpl results = new SearchResultMapImpl(); - List> sortedRawResults = rawResults.stream().sorted().collect( Collectors.toList() ); - - // Get the top N results for each class. - for ( SearchResult sr : sortedRawResults ) { - if ( settings.getMaxResults() < 1 || results.size() < settings.getMaxResults() ) { - results.add( sr ); - } + if ( watch.getTime() > 1000 ) { + SearchServiceImpl.log.warn( String.format( "Gene search for %s took %d ms; %d results.", + settings, watch.getTime(), combinedGeneList.size() ) ); } - return results; + return combinedGeneList; } private void initializeNameToTaxonMap() { @@ -1239,126 +615,25 @@ private void initializeNameToTaxonMap() { } - /** - * @return results, if the settings.termUri is populated. This includes gene uris. - */ - private SearchResultMapImpl ontologyUriSearch( SearchSettings settings ) throws SearchException { - SearchResultMapImpl results = new SearchResultMapImpl(); - - // 1st check to see if the query is a URI (from an ontology). - // Do this by seeing if we can find it in the loaded ontologies. - // Escape with general utilities because might not be doing a lucene backed search. (just a hibernate one). - String termUri = settings.getQuery(); - - if ( !settings.isTermQuery() ) { - return results; - } - - String uriString = StringEscapeUtils.escapeJava( StringUtils.strip( termUri ) ); - - /* - * Gene search. We want experiments that are annotated. But also genes. - */ - if ( StringUtils.containsIgnoreCase( uriString, SearchServiceImpl.NCBI_GENE ) ) { - // Perhaps is a valid gene URL. Want to search for the gene in gemma. - - // Get the gene - String ncbiAccessionFromUri = StringUtils.substringAfterLast( uriString, "/" ); - Gene g = null; - - try { - g = geneService.findByNCBIId( Integer.parseInt( ncbiAccessionFromUri ) ); - } catch ( NumberFormatException e ) { - // ok - } - if ( g != null ) { - - // 1st get objects tagged with the given gene identifier - if ( settings.hasResultType( ExpressionExperiment.class ) ) { // FIXME maybe we always want this? - Collection> eeHits = ontologySearchSource.searchExpressionExperiment( settings.withQuery( termUri ) ); - for ( SearchResult sr : eeHits ) { - Map highlights; - if ( sr.getHighlights() != null ) { - highlights = new HashMap<>( sr.getHighlights() ); - } else { - highlights = new HashMap<>(); - } - highlights.put( "term", g.getOfficialSymbol() ); - sr.setHighlights( highlights ); + private void addTerms( Taxon taxon, String taxonName ) { + String[] terms; + if ( StringUtils.isNotBlank( taxonName ) ) { + terms = taxonName.split( "\\s+" ); + // Only continue for multi-word + if ( terms.length > 1 ) { + for ( String s : terms ) { + if ( !nameToTaxonMap.containsKey( s.trim().toLowerCase() ) ) { + nameToTaxonMap.put( s.trim().toLowerCase(), taxon ); } - results.addAll( eeHits ); - } - - //// - if ( settings.hasResultType( Gene.class ) ) { - results.add( SearchResult.from( Gene.class, g, DatabaseSearchSource.MATCH_BY_ID_SCORE, "GeneService.findByNCBIId" ) ); - } } - return results; - } - - /* - * Not searching for a gene. Only other option is a direct URI search for experiments. - */ - if ( settings.hasResultType( ExpressionExperiment.class ) ) { - results.addAll( ontologySearchSource.searchExpressionExperiment( settings.withQuery( uriString ) ) ); } - - return results; } - // /** - // * Retrieve entities from the persistent store (if we don't have them already) - // */ - // private Collection retrieveResultEntities( Class entityClass, List results ) { - // List ids = this.getIds( results ); - // - // // FIXME: don't we want value objects? - // if ( ExpressionExperiment.class.isAssignableFrom( entityClass ) ) { - // return expressionExperimentService.load( ids ); - // } else if ( ArrayDesign.class.isAssignableFrom( entityClass ) ) { - // return arrayDesignService.load( ids ); - // } else if ( CompositeSequence.class.isAssignableFrom( entityClass ) ) { - // return compositeSequenceService.load( ids ); - // } else if ( BibliographicReference.class.isAssignableFrom( entityClass ) ) { - // return bibliographicReferenceService.load( ids ); - // } else if ( Gene.class.isAssignableFrom( entityClass ) ) { - // return geneService.load( ids ); - // } else if ( BioSequence.class.isAssignableFrom( entityClass ) ) { - // return bioSequenceService.load( ids ); - // } else if ( GeneSet.class.isAssignableFrom( entityClass ) ) { - // return geneSetService.load( ids ); - // } else if ( ExpressionExperimentSet.class.isAssignableFrom( entityClass ) ) { - // return experimentSetService.load( ids ); - // } else if ( Characteristic.class.isAssignableFrom( entityClass ) ) { - // Collection chars = new ArrayList<>(); - // for ( Long id : ids ) { - // chars.add( characteristicService.load( id ) ); - // } - // return chars; - // } else if ( CharacteristicValueObject.class.isAssignableFrom( entityClass ) ) { - // // TEMP HACK this whole method should not be needed in many cases - // Collection chars = new ArrayList<>(); - // for ( SearchResult result : results ) { - // if ( result.getResultClass().isAssignableFrom( CharacteristicValueObject.class ) ) { - // chars.add( ( CharacteristicValueObject ) result.getResultObject() ); - // } - // } - // return chars; - // } else if ( ExpressionExperimentSet.class.isAssignableFrom( entityClass ) ) { - // return experimentSetService.load( ids ); - // } else if ( BlacklistedEntity.class.isAssignableFrom( entityClass ) ) { - // return blackListDao.load( ids ); - // } else { - // throw new UnsupportedOperationException( "Don't know how to retrieve objects for class=" + entityClass ); - // } - // } - - /** * Infer a {@link Taxon} from the search settings. */ + @Nullable private Taxon inferTaxon( SearchSettings settings ) throws SearchException { // split the query around whitespace characters, limit the splitting to 4 terms (may be excessive) // remove quotes and other characters tha can interfere with the exact match @@ -1373,13 +648,4 @@ private Taxon inferTaxon( SearchSettings settings ) throws SearchException { // no match found, on taxon is inferred return null; } - - /** - * Check if a collection of search results is already filled. - * - * @return true if the search results are filled and cannot accept more results, false otherwise - */ - private static boolean isFilled( Collection> results, SearchSettings settings ) { - return settings.getMaxResults() > 0 && results.size() >= settings.getMaxResults(); - } } diff --git a/gemma-core/src/main/java/ubic/gemma/core/search/SearchSource.java b/gemma-core/src/main/java/ubic/gemma/core/search/SearchSource.java index ae61a1a0b5..82762b1633 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/search/SearchSource.java +++ b/gemma-core/src/main/java/ubic/gemma/core/search/SearchSource.java @@ -3,6 +3,7 @@ import ubic.gemma.model.analysis.expression.ExpressionExperimentSet; import ubic.gemma.model.common.description.BibliographicReference; import ubic.gemma.model.common.search.SearchSettings; +import ubic.gemma.model.expression.BlacklistedEntity; import ubic.gemma.model.expression.arrayDesign.ArrayDesign; import ubic.gemma.model.expression.designElement.CompositeSequence; import ubic.gemma.model.expression.experiment.ExpressionExperiment; @@ -21,6 +22,11 @@ */ public interface SearchSource { + /** + * Indicate if this source accepts the given search settings. + */ + boolean accepts( SearchSettings settings ); + default Collection> searchArrayDesign( SearchSettings settings ) throws SearchException { return Collections.emptyList(); } @@ -48,7 +54,10 @@ default Collection> searchBioSequence( SearchSettings @Deprecated default Collection> searchBioSequenceAndGene( SearchSettings settings, @Nullable Collection> previousGeneSearchResults ) throws SearchException { - return Collections.emptyList(); + Collection> results = new HashSet<>(); + results.addAll( this.searchBioSequence( settings ) ); + results.addAll( this.searchGene( settings ) ); + return results; } default Collection> searchCompositeSequence( SearchSettings settings ) throws SearchException { @@ -66,7 +75,7 @@ default Collection> searchCompositeSequence( Sea @Deprecated default Collection> searchCompositeSequenceAndGene( SearchSettings settings ) throws SearchException { Collection> results = new HashSet<>(); - results.addAll( this.searchBioSequence( settings ) ); + results.addAll( this.searchCompositeSequence( settings ) ); results.addAll( this.searchGene( settings ) ); return results; } @@ -82,4 +91,8 @@ default Collection> searchGene( SearchSettings settings ) thr default Collection> searchGeneSet( SearchSettings settings ) throws SearchException { return Collections.emptyList(); } + + default Collection> searchBlacklistedEntities( SearchSettings settings ) throws SearchException { + return Collections.emptyList(); + } } diff --git a/gemma-core/src/main/java/ubic/gemma/core/search/lucene/LuceneParseSearchException.java b/gemma-core/src/main/java/ubic/gemma/core/search/lucene/LuceneParseSearchException.java index 29c17c9644..5f3fea37b8 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/search/lucene/LuceneParseSearchException.java +++ b/gemma-core/src/main/java/ubic/gemma/core/search/lucene/LuceneParseSearchException.java @@ -3,11 +3,31 @@ import org.apache.lucene.queryParser.ParseException; import ubic.gemma.core.search.SearchException; +import javax.annotation.Nullable; + /** * @author poirigui */ public class LuceneParseSearchException extends SearchException { - public LuceneParseSearchException( ParseException e ) { - super( e.getMessage(), e ); + + @Nullable + private final ParseException originalParseException; + + public LuceneParseSearchException( String message, ParseException cause ) { + super( message, cause ); + this.originalParseException = null; + } + + public LuceneParseSearchException( String message, ParseException cause, ParseException originalParseException ) { + super( message, cause ); + this.originalParseException = originalParseException; + } + + /** + * The original {@link ParseException} if this query was reparsed without special characters. + */ + @Nullable + public ParseException getOriginalParseException() { + return originalParseException; } } diff --git a/gemma-core/src/main/java/ubic/gemma/core/search/lucene/LuceneQueryUtils.java b/gemma-core/src/main/java/ubic/gemma/core/search/lucene/LuceneQueryUtils.java index 7a33f83ecf..fd8682d380 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/search/lucene/LuceneQueryUtils.java +++ b/gemma-core/src/main/java/ubic/gemma/core/search/lucene/LuceneQueryUtils.java @@ -10,6 +10,9 @@ import ubic.gemma.core.search.SearchException; import ubic.gemma.model.common.search.SearchSettings; +import javax.annotation.Nullable; +import java.net.URI; +import java.net.URISyntaxException; import java.util.Collections; import java.util.HashSet; import java.util.Set; @@ -24,23 +27,25 @@ public class LuceneQueryUtils { private static final Pattern LUCENE_RESERVED_CHARS = Pattern.compile( "[+\\-&|!(){}\\[\\]^\"~*?:\\\\]" ); - private static final QueryParser QUERY_PARSER = new QueryParser( Version.LUCENE_36, "", new PassThroughAnalyzer( Version.LUCENE_36 ) ); + private static QueryParser createQueryParser() { + return new QueryParser( Version.LUCENE_36, "", new PassThroughAnalyzer( Version.LUCENE_36 ) ); + } /** * Safely parse the given search settings into a Lucene query, falling back on a query with special characters * escaped if necessary. */ public static Query parseSafely( SearchSettings settings, QueryParser queryParser ) throws SearchException { + String query = settings.getQuery(); try { - return queryParser.parse( settings.getQuery() ); + return queryParser.parse( query ); } catch ( ParseException e ) { String strippedQuery = LUCENE_RESERVED_CHARS.matcher( settings.getQuery() ).replaceAll( "\\\\$0" ); - log.warn( String.format( "Failed to parse '%s' after attempting to parse it without special characters '%s': %s", - settings.getQuery(), strippedQuery, e.getMessage() ) ); + log.debug( String.format( "Failed to parse '%s'; will attempt to parse it without special characters '%s'.", query, strippedQuery ), e ); try { return queryParser.parse( strippedQuery ); } catch ( ParseException e2 ) { - throw new LuceneParseSearchException( e ); + throw new LuceneParseSearchException( String.format( "Failed to parse '%s' after attempting to parse it without special characters as '%s'.", query, strippedQuery ), e2, e ); } } } @@ -52,7 +57,7 @@ public static Query parseSafely( SearchSettings settings, QueryParser queryParse */ public static Set extractTerms( SearchSettings settings ) throws SearchException { Set terms = new HashSet<>(); - extractTerms( parseSafely( settings, QUERY_PARSER ), terms ); + extractTerms( parseSafely( settings, createQueryParser() ), terms ); return terms; } @@ -63,61 +68,91 @@ private static void extractTerms( Query query, Set terms ) { extractTerms( clause.getQuery(), terms ); } } - } else if ( query instanceof TermQuery ) { + } else if ( query instanceof TermQuery && isTermGlobal( ( ( TermQuery ) query ).getTerm() ) ) { terms.add( termToString( ( ( TermQuery ) query ).getTerm() ) ); } } /** * Extract a DNF (Disjunctive Normal Form) from the query. + *

+ * Clauses can be nested (i.e. {@code a OR (d OR (c AND (d AND e))}) as long as {@code OR} and {@code AND} are not + * interleaved. + *

+ * Prohibited clauses are ignored unless they break the DNF structure, in which case this will return an empty set. */ public static Set> extractDnf( SearchSettings settings ) throws SearchException { - Query q = parseSafely( settings, QUERY_PARSER ); - Set> result = new HashSet<>(); + Query q = parseSafely( settings, createQueryParser() ); + Set> result; if ( q instanceof BooleanQuery ) { - boolean isSimpleAndClause = true; - for ( BooleanClause clause : ( ( BooleanQuery ) q ) ) { - isSimpleAndClause &= clause.isRequired() && clause.getQuery() instanceof TermQuery; - if ( clause.isRequired() || clause.isProhibited() ) { - continue; // AND, we ignore - } - if ( clause.getQuery() instanceof BooleanQuery ) { - Set terms = new HashSet<>(); - for ( BooleanClause subClause : ( ( BooleanQuery ) clause.getQuery() ) ) { - if ( !subClause.isRequired() || subClause.isProhibited() ) { - continue; // OR, we ignore - } - if ( subClause.getQuery() instanceof TermQuery ) { - terms.add( termToString( ( ( TermQuery ) subClause.getQuery() ).getTerm() ) ); - } - } - if ( !terms.isEmpty() ) { - result.add( terms ); - } - } else if ( clause.getQuery() instanceof TermQuery ) { - result.add( Collections.singleton( termToString( ( ( TermQuery ) clause.getQuery() ).getTerm() ) ) ); - } + Set> ds = new HashSet<>(); + if ( extractNestedDisjunctions( ( BooleanQuery ) q, ds ) ) { + result = ds; + } else { + result = Collections.emptySet(); } - // check if all the clauses are required, in which case we can just create a nested clause - if ( isSimpleAndClause ) { - Set terms = new HashSet<>(); - for ( BooleanClause clause : ( ( BooleanQuery ) q ) ) { - terms.add( ( termToString( ( ( TermQuery ) clause.getQuery() ).getTerm() ) ) ); + } else if ( q instanceof TermQuery && isTermGlobal( ( ( TermQuery ) q ).getTerm() ) ) { + result = Collections.singleton( Collections.singleton( termToString( ( ( TermQuery ) q ).getTerm() ) ) ); + } else { + result = Collections.emptySet(); + } + return result; + } + + private static boolean extractNestedDisjunctions( BooleanQuery query, Set> terms ) { + if ( query.clauses().stream().anyMatch( BooleanClause::isRequired ) ) { + Set subClause = new HashSet<>(); + terms.add( subClause ); + return extractNestedConjunctions( query, subClause ); + } + // at this point, all clauses are optional + for ( BooleanClause clause : query.clauses() ) { + if ( clause.isProhibited() ) { + continue; + } + assert !clause.isRequired(); + if ( clause.getQuery() instanceof BooleanQuery ) { + if ( !extractNestedDisjunctions( ( BooleanQuery ) clause.getQuery(), terms ) ) { + return false; } - if ( !terms.isEmpty() ) { - result.add( terms ); + } else if ( clause.getQuery() instanceof TermQuery && isTermGlobal( ( ( TermQuery ) clause.getQuery() ).getTerm() ) ) { + terms.add( Collections.singleton( termToString( ( ( TermQuery ) clause.getQuery() ).getTerm() ) ) ); + } + } + return true; + } + + /** + * Extract nested conjunctions from a query and populate their terms in the given set. + * + * @return true if all the clauses in the query are conjunctions + */ + private static boolean extractNestedConjunctions( BooleanQuery query, Set terms ) { + if ( !query.clauses().stream().allMatch( c -> c.isRequired() || c.isProhibited() ) ) { + // found a disjunction, this is not a valid nested conjunction + return false; + } + // at this point, all the clauses are required + for ( BooleanClause clause : query.clauses() ) { + if ( clause.isProhibited() ) { + continue; + } + if ( clause.getQuery() instanceof BooleanQuery ) { + if ( !extractNestedConjunctions( ( BooleanQuery ) clause.getQuery(), terms ) ) { + return false; } + } else if ( clause.getQuery() instanceof TermQuery && isTermGlobal( ( ( TermQuery ) clause.getQuery() ).getTerm() ) ) { + terms.add( termToString( ( ( TermQuery ) clause.getQuery() ).getTerm() ) ); } - } else if ( q instanceof TermQuery ) { - result.add( Collections.singleton( termToString( ( ( TermQuery ) q ).getTerm() ) ) ); } - return result; + return true; } /** * Escape the query for a database match. * @see #prepareDatabaseQuery(SearchSettings, boolean) */ + @Nullable public static String prepareDatabaseQuery( SearchSettings settings ) throws SearchException { return prepareDatabaseQuery( settings, false ); } @@ -125,46 +160,53 @@ public static String prepareDatabaseQuery( SearchSettings settings ) throws Sear /** * Obtain a query suitable for a database match. *

+ * This method will return the first global term in the query that is not prohibited. If {@code allowWildcards} is + * set to true, prefix and wildcard terms will be considered as well. + *

* The resulting string is free from character that would usually be used for a free-text match unless * {@code allowWildcards} is set to true. *

* @param allowWildcards if true, wildcards are supported (i.e. '*' and '?') and translated to their corresponding * LIKE SQL syntax (i.e. '%' and '_'), all other special characters are escaped. + * @return the first suitable term in the query, or null if none of them are applicable for a database query */ + @Nullable public static String prepareDatabaseQuery( SearchSettings settings, boolean allowWildcards ) throws SearchException { - return rewriteQuery( parseSafely( settings, QUERY_PARSER ), allowWildcards ); + return prepareDatabaseQueryInternal( parseSafely( settings, createQueryParser() ), allowWildcards ); } - private static String rewriteQuery( Query query, boolean replaceWildcards ) { + @Nullable + private static String prepareDatabaseQueryInternal( Query query, boolean allowWildcards ) { if ( query instanceof BooleanQuery ) { // pick the first, non-prohibited term for ( BooleanClause c : ( BooleanQuery ) query ) { if ( !c.isProhibited() ) { - return rewriteQuery( c.getQuery(), replaceWildcards ); + return prepareDatabaseQueryInternal( c.getQuery(), allowWildcards ); } } - } else if ( query instanceof WildcardQuery ) { - if ( replaceWildcards ) { - return escapeLike( termToString( ( ( WildcardQuery ) query ).getTerm() ) ) - .replace( '?', '_' ) - .replace( '*', '%' ); - } else { - return termToString( ( ( WildcardQuery ) query ).getTerm() ); - } - } else if ( query instanceof PrefixQuery ) { - if ( replaceWildcards ) { - return escapeLike( termToString( ( ( PrefixQuery ) query ).getPrefix() ) ) + "%"; - } else { - return termToString( ( ( PrefixQuery ) query ).getPrefix() ); - } - } else if ( query instanceof TermQuery ) { - if ( replaceWildcards ) { + } else if ( allowWildcards && query instanceof WildcardQuery && isTermGlobal( ( ( WildcardQuery ) query ).getTerm() ) ) { + return escapeLike( termToString( ( ( WildcardQuery ) query ).getTerm() ) ) + .replace( '?', '_' ) + .replace( '*', '%' ); + } else if ( allowWildcards && query instanceof PrefixQuery && isTermGlobal( ( ( PrefixQuery ) query ).getPrefix() ) ) { + return escapeLike( termToString( ( ( PrefixQuery ) query ).getPrefix() ) ) + "%"; + } else if ( query instanceof TermQuery && isTermGlobal( ( ( TermQuery ) query ).getTerm() ) ) { + if ( allowWildcards ) { return escapeLike( termToString( ( ( TermQuery ) query ).getTerm() ) ); } else { return termToString( ( ( TermQuery ) query ).getTerm() ); } } - return ""; + return null; + } + + /** + * Check if a given term is global (i.e. not fielded). + *

+ * This includes the corner case when a term is a URI and would be parsed as a fielded term. + */ + private static boolean isTermGlobal( Term term ) { + return term.field().isEmpty() || term.field().equals( "http" ) || term.field().equals( "https" ); } /** @@ -178,6 +220,23 @@ private static String termToString( Term term ) { } } + @Nullable + public static URI prepareTermUriQuery( SearchSettings settings ) throws SearchException { + Query query = parseSafely( settings, createQueryParser() ); + if ( query instanceof TermQuery ) { + Term term = ( ( TermQuery ) query ).getTerm(); + if ( term.field().equals( "http" ) || term.field().equals( "https" ) ) { + String candidateUri = term.field() + ":" + term.text(); + try { + return new URI( candidateUri ); + } catch ( URISyntaxException e ) { + return null; + } + } + } + return null; + } + private static String escapeLike( String s ) { return s.replaceAll( "[%_\\\\]", "\\\\$0" ); } @@ -187,7 +246,7 @@ private static String escapeLike( String s ) { */ public static boolean isWildcard( SearchSettings settings ) { try { - return isWildcard( QUERY_PARSER.parse( settings.getQuery() ) ); + return isWildcard( createQueryParser().parse( settings.getQuery() ) ); } catch ( ParseException e ) { return false; } diff --git a/gemma-core/src/main/java/ubic/gemma/core/search/source/CompositeSearchSource.java b/gemma-core/src/main/java/ubic/gemma/core/search/source/CompositeSearchSource.java index b3432b676c..524eb84413 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/search/source/CompositeSearchSource.java +++ b/gemma-core/src/main/java/ubic/gemma/core/search/source/CompositeSearchSource.java @@ -2,6 +2,7 @@ import lombok.extern.apachecommons.CommonsLog; import org.apache.commons.lang3.time.StopWatch; +import org.springframework.util.Assert; import ubic.gemma.core.search.SearchException; import ubic.gemma.core.search.SearchResult; import ubic.gemma.core.search.SearchResultSet; @@ -10,6 +11,7 @@ import ubic.gemma.model.common.Identifiable; import ubic.gemma.model.common.description.BibliographicReference; import ubic.gemma.model.common.search.SearchSettings; +import ubic.gemma.model.expression.BlacklistedEntity; import ubic.gemma.model.expression.arrayDesign.ArrayDesign; import ubic.gemma.model.expression.designElement.CompositeSequence; import ubic.gemma.model.expression.experiment.ExpressionExperiment; @@ -28,7 +30,15 @@ /** * A search source constituted of multiple other sources. - * + *

+ * Sources are used in the order they are passed to the {@link #CompositeSearchSource(List)} constructor. + *

+ * This source checks if the {@link SearchSource} are accepted by each individual source with + * {@link SearchSource#accepts(SearchSettings)} and subsequently delegate the operation. + *

+ * It also supports logging of the time spent by each source and the number of results found. This is done at the DEBUG + * level unless the value set by {@link #setWarningThresholdMills(int)} or {@link #setFastWarningThresholdMillis(int)} + * is exceeded in which case WARNING is used. * @author poirigui */ @CommonsLog @@ -36,28 +46,56 @@ public class CompositeSearchSource implements SearchSource { private final List sources; + private int fastWarningThresholdMillis = 100; + private int warningThresholdMills = 1000; + public CompositeSearchSource( List sources ) { this.sources = sources; } + /** + * Threshold in milliseconds for a warning to be logged when searching with {@link ubic.gemma.model.common.search.SearchSettings.SearchMode#FAST}. + *

+ * The default is 100 ms. + */ + public void setFastWarningThresholdMillis( int fastWarningThresholdMillis ) { + Assert.isTrue( fastWarningThresholdMillis >= 0 ); + this.fastWarningThresholdMillis = fastWarningThresholdMillis; + } + + /** + * Threshold in milliseconds for a warning to be logged. + *

+ * The default is 1000 ms. + */ + public void setWarningThresholdMills( int warningThresholdMills ) { + Assert.isTrue( warningThresholdMills >= 0 ); + this.warningThresholdMills = warningThresholdMills; + } + + @Override + public boolean accepts( SearchSettings settings ) { + return sources.stream().anyMatch( s -> s.accepts( settings ) ); + } + @Override public Collection> searchArrayDesign( SearchSettings settings ) throws SearchException { - return searchWith( ( s ) -> s.searchArrayDesign( settings ), ArrayDesign.class ); + return searchWith( settings, SearchSource::searchArrayDesign, ArrayDesign.class ); } @Override public Collection> searchBibliographicReference( SearchSettings settings ) throws SearchException { - return searchWith( ( s ) -> s.searchBibliographicReference( settings ), BibliographicReference.class ); + return searchWith( settings, SearchSource::searchBibliographicReference, BibliographicReference.class ); } @Override public Collection> searchExperimentSet( SearchSettings settings ) throws SearchException { - return searchWith( ( s ) -> s.searchExperimentSet( settings ), ExpressionExperimentSet.class ); + return searchWith( settings, SearchSource::searchExperimentSet, ExpressionExperimentSet.class ); } @Override public Collection> searchBioSequence( SearchSettings settings ) throws SearchException { - return searchWith( ( s ) -> s.searchBioSequence( settings ), BioSequence.class ); + return searchWith( settings, SearchSource::searchBioSequence, BioSequence.class ); } @Override @@ -73,7 +111,7 @@ public Collection> searchBioSequenceAndGene( SearchSettings sett @Override public Collection> searchCompositeSequence( SearchSettings settings ) throws SearchException { - return searchWith( ( s ) -> s.searchCompositeSequence( settings ), CompositeSequence.class ); + return searchWith( settings, SearchSource::searchCompositeSequence, CompositeSequence.class ); } @Override @@ -89,45 +127,69 @@ public Collection> searchCompositeSequenceAndGene( SearchSetting @Override public Collection> searchExpressionExperiment( SearchSettings settings ) throws SearchException { - return searchWith( ( s ) -> s.searchExpressionExperiment( settings ), ExpressionExperiment.class ); + return searchWith( settings, SearchSource::searchExpressionExperiment, ExpressionExperiment.class ); } @Override public Collection> searchGene( SearchSettings settings ) throws SearchException { - return searchWith( ( s ) -> s.searchGene( settings ), Gene.class ); + return searchWith( settings, SearchSource::searchGene, Gene.class ); } @Override public Collection> searchGeneSet( SearchSettings settings ) throws SearchException { - return searchWith( ( s ) -> s.searchGeneSet( settings ), GeneSet.class ); + return searchWith( settings, SearchSource::searchGeneSet, GeneSet.class ); + } + + @Override + public Collection> searchBlacklistedEntities( SearchSettings settings ) throws SearchException { + return searchWith( settings, SearchSource::searchBlacklistedEntities, BlacklistedEntity.class ); } - @FunctionalInterface - public interface SearchFunction { - Collection> apply( SearchSource searchSource ) throws SearchException; + private interface SearchFunction { + Collection> apply( SearchSource searchSource, SearchSettings settings ) throws SearchException; } - private Collection> searchWith( SearchFunction func, Class clazz ) throws SearchException { + private Collection> searchWith( SearchSettings settings, SearchFunction func, Class clazz ) throws SearchException { StopWatch timer = StopWatch.createStarted(); - Set> results = new SearchResultSet<>(); + Set> results = new SearchResultSet<>( settings ); long[] timeSpentBySource = new long[sources.size()]; int[] foundItemsBySource = new int[sources.size()]; int[] newItemsBySource = new int[sources.size()]; for ( int i = 0; i < sources.size(); i++ ) { long timeBefore = timer.getTime( TimeUnit.MILLISECONDS ); - int sizeBefore = results.size(); SearchSource source = sources.get( i ); - Collection> r = func.apply( source ); - results.addAll( r ); - foundItemsBySource[i] = r.size(); - newItemsBySource[i] = results.size() - sizeBefore; + if ( source.accepts( settings ) ) { + int sizeBefore = results.size(); + Collection> r = func.apply( source, settings ); + results.addAll( r ); + foundItemsBySource[i] = r.size(); + newItemsBySource[i] = results.size() - sizeBefore; + } else { + foundItemsBySource[i] = 0; + newItemsBySource[i] = 0; + } timeSpentBySource[i] = timer.getTime( TimeUnit.MILLISECONDS ) - timeBefore; } timer.stop(); - boolean shouldWarn = timer.getTime( TimeUnit.MILLISECONDS ) > 200; + boolean shouldWarn; + switch ( settings.getMode() ) { + case FAST: + shouldWarn = timer.getTime() > Math.min( fastWarningThresholdMillis, warningThresholdMills ); + break; + case BALANCED: + shouldWarn = timer.getTime() > warningThresholdMills; + break; + case ACCURATE: + default: + shouldWarn = false; + } if ( shouldWarn || log.isDebugEnabled() ) { - String breakdownBySource = IntStream.range( 0, sources.size() ).mapToObj( i -> String.format( "source: %s, found items: %d, found items (novel): %d, time spent: %d ms", sources.get( i ).getClass().getSimpleName(), foundItemsBySource[i], newItemsBySource[i], timeSpentBySource[i] ) ).collect( Collectors.joining( "; " ) ); - String message = String.format( "Found %d %s results in %d ms (%s)", results.size(), clazz.getSimpleName(), timer.getTime( TimeUnit.MILLISECONDS ), breakdownBySource ); + String breakdownBySource = IntStream.range( 0, sources.size() ) + .mapToObj( i -> String.format( "source: %s, found items: %d, found items (novel): %d, time spent: %d ms", + sources.get( i ).getClass().getSimpleName(), foundItemsBySource[i], newItemsBySource[i], timeSpentBySource[i] ) ) + .collect( Collectors.joining( "; " ) ); + String message = String.format( "Found %d %s results in %d ms (%s)", results.size(), clazz.getSimpleName(), + timer.getTime( TimeUnit.MILLISECONDS ), breakdownBySource ); if ( shouldWarn ) { log.warn( message ); } else { diff --git a/gemma-core/src/main/java/ubic/gemma/core/search/source/DatabaseSearchSource.java b/gemma-core/src/main/java/ubic/gemma/core/search/source/DatabaseSearchSource.java index 2a08735e93..df6a28b0dd 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/search/source/DatabaseSearchSource.java +++ b/gemma-core/src/main/java/ubic/gemma/core/search/source/DatabaseSearchSource.java @@ -2,23 +2,29 @@ import gemma.gsec.util.SecurityUtil; import lombok.extern.apachecommons.CommonsLog; -import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.time.StopWatch; import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.core.Ordered; import org.springframework.stereotype.Component; import ubic.gemma.core.genome.gene.service.GeneService; import ubic.gemma.core.genome.gene.service.GeneSetService; -import ubic.gemma.core.search.*; +import ubic.gemma.core.search.SearchException; +import ubic.gemma.core.search.SearchResult; +import ubic.gemma.core.search.SearchResultSet; +import ubic.gemma.core.search.SearchSource; import ubic.gemma.model.analysis.expression.ExpressionExperimentSet; import ubic.gemma.model.common.Identifiable; import ubic.gemma.model.common.search.SearchSettings; +import ubic.gemma.model.expression.BlacklistedEntity; import ubic.gemma.model.expression.arrayDesign.ArrayDesign; import ubic.gemma.model.expression.designElement.CompositeSequence; import ubic.gemma.model.expression.experiment.ExpressionExperiment; import ubic.gemma.model.genome.Gene; import ubic.gemma.model.genome.biosequence.BioSequence; import ubic.gemma.model.genome.gene.GeneSet; +import ubic.gemma.persistence.service.expression.arrayDesign.ArrayDesignService; import ubic.gemma.persistence.service.expression.designElement.CompositeSequenceService; +import ubic.gemma.persistence.service.expression.experiment.BlacklistedEntityService; import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentService; import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentSetService; import ubic.gemma.persistence.service.genome.biosequence.BioSequenceService; @@ -28,7 +34,8 @@ import java.util.*; import java.util.stream.Collectors; -import static ubic.gemma.core.search.lucene.LuceneQueryUtils.*; +import static ubic.gemma.core.search.lucene.LuceneQueryUtils.isWildcard; +import static ubic.gemma.core.search.lucene.LuceneQueryUtils.prepareDatabaseQuery; /** * Search source for direct database results. @@ -40,7 +47,9 @@ */ @Component @CommonsLog -public class DatabaseSearchSource implements SearchSource { +public class DatabaseSearchSource implements SearchSource, Ordered { + + public static final String NCBI_GENE_ID_URI_PREFIX = "http://purl.org/commons/record/ncbi_gene/"; /** * Score when a result is matched exactly by numerical ID. @@ -66,6 +75,8 @@ public class DatabaseSearchSource implements SearchSource { */ private final double INDIRECT_HIT_PENALTY = 0.8; + @Autowired + private ArrayDesignService arrayDesignService; @Autowired private BioSequenceService bioSequenceService; @Autowired @@ -80,40 +91,87 @@ public class DatabaseSearchSource implements SearchSource { private GeneSetService geneSetService; @Autowired private ExpressionExperimentSetService experimentSetService; + @Autowired + private BlacklistedEntityService blacklistedEntityService; + + @Override + public int getOrder() { + return Ordered.HIGHEST_PRECEDENCE; + } + + @Override + public boolean accepts( SearchSettings settings ) { + return settings.isUseDatabase(); + } /** + * A general search for array designs. + *

+ * This search does both an database search and a compass search. This is also contains an underlying + * {@link CompositeSequence} search, returning the {@link ArrayDesign} collection for the given composite sequence + * search string (the returned collection of array designs does not contain duplicates). + *

* Searches the DB for array designs which have composite sequences whose names match the given search string. * Because of the underlying database search, this is acl aware. That is, returned array designs are filtered based * on access control list (ACL) permissions. */ @Override public Collection> searchArrayDesign( SearchSettings settings ) throws SearchException { - if ( !settings.isUseDatabase() ) + StopWatch watch = StopWatch.createStarted(); + String query = prepareDatabaseQuery( settings ); + if ( query == null ) { return Collections.emptySet(); + } - StopWatch watch = StopWatch.createStarted(); + SearchResultSet results = new SearchResultSet<>( settings ); + + ArrayDesign shortNameResult = arrayDesignService.findByShortName( query ); + if ( shortNameResult != null ) { + results.add( SearchResult.from( ArrayDesign.class, shortNameResult, DatabaseSearchSource.MATCH_BY_SHORT_NAME_SCORE, null, "ArrayDesignService.findByShortName" ) ); + return results; + } + + Collection nameResult = arrayDesignService.findByName( query ); + if ( nameResult != null && !nameResult.isEmpty() ) { + for ( ArrayDesign ad : nameResult ) { + results.add( SearchResult.from( ArrayDesign.class, ad, DatabaseSearchSource.MATCH_BY_NAME_SCORE, null, "ArrayDesignService.findByShortName" ) ); + } + return results; + } - Collection adSet = new HashSet<>(); + Collection altNameResults = arrayDesignService.findByAlternateName( query ); + for ( ArrayDesign arrayDesign : altNameResults ) { + results.add( SearchResult.from( ArrayDesign.class, arrayDesign, 0.9, null, "ArrayDesignService.findByAlternateName" ) ); + } + + Collection manufacturerResults = arrayDesignService.findByManufacturer( query ); + for ( ArrayDesign arrayDesign : manufacturerResults ) { + results.add( SearchResult.from( ArrayDesign.class, arrayDesign, 0.9, null, "ArrayDesignService.findByManufacturer" ) ); + } // search by exact composite sequence name - Collection matchedCs = compositeSequenceService.findByName( prepareDatabaseQuery( settings ) ); + Collection matchedCs = compositeSequenceService.findByName( query ); for ( CompositeSequence sequence : matchedCs ) { - adSet.add( sequence.getArrayDesign() ); + ArrayDesign entity = sequence.getArrayDesign(); + results.add( SearchResult.from( ArrayDesign.class, entity, INDIRECT_HIT_PENALTY * MATCH_BY_NAME_SCORE, null, "CompositeSequenceService.findByName" ) ); } watch.stop(); - if ( watch.getTime() > 1000 ) - DatabaseSearchSource.log - .info( "Array Design Composite Sequence DB search for " + settings + " took " + watch.getTime() - + " ms" + " found " + adSet.size() + " Ads" ); + if ( watch.getTime() > 1000 ) { + DatabaseSearchSource.log.warn( String.format( "Array Design DB search for %s with '%s' took %d ms found %d Ads", + settings, query, watch.getTime(), results.size() ) ); + } - return toSearchResults( ArrayDesign.class, adSet, MATCH_BY_NAME_SCORE, "CompositeSequenceService.findByName" ); + return results; } @Override - public Collection> searchExperimentSet( SearchSettings settings ) throws SearchException { - return toSearchResults( ExpressionExperimentSet.class, this.experimentSetService.findByName( prepareDatabaseQuery( settings ) ), MATCH_BY_NAME_SCORE, "ExperimentSetService.findByName" ); + String query = prepareDatabaseQuery( settings ); + if ( query == null ) { + return Collections.emptySet(); + } + return toSearchResults( settings, ExpressionExperimentSet.class, this.experimentSetService.findByName( query ), MATCH_BY_NAME_SCORE, "ExperimentSetService.findByName" ); } /** @@ -121,22 +179,22 @@ public Collection> searchExperimentSet( Se */ @Override public Collection> searchBioSequence( SearchSettings settings ) throws SearchException { - if ( !settings.isUseDatabase() ) - return Collections.emptySet(); - StopWatch watch = StopWatch.createStarted(); String searchString = prepareDatabaseQuery( settings ); + if ( searchString == null ) { + return Collections.emptySet(); + } Collection bs = bioSequenceService.findByName( searchString ); // bioSequenceService.thawRawAndProcessed( bs ); - Collection> bioSequenceList = toSearchResults( BioSequence.class, bs, MATCH_BY_NAME_SCORE, "BioSequenceService.findByName" ); + Collection> bioSequenceList = toSearchResults( settings, BioSequence.class, bs, MATCH_BY_NAME_SCORE, "BioSequenceService.findByName" ); watch.stop(); - if ( watch.getTime() > 1000 ) - DatabaseSearchSource.log - .info( "BioSequence DB search for " + searchString + " took " + watch.getTime() + " ms and found" - + bioSequenceList.size() + " BioSequences" ); + if ( watch.getTime() > 1000 ) { + DatabaseSearchSource.log.warn( String.format( "BioSequence DB search for %s with '%s' took %d ms and found %d BioSequences", + settings, searchString, watch.getTime(), bioSequenceList.size() ) ); + } return bioSequenceList; } @@ -156,7 +214,7 @@ public Collection> searchCompositeSequence( Sear */ @Override public Collection> searchCompositeSequenceAndGene( SearchSettings settings ) throws SearchException { - Set> geneSet = new SearchResultSet<>(); + Set> geneSet = new SearchResultSet<>( settings ); Collection> matchedCs = this.searchCompositeSequenceAndPopulateGenes( settings, geneSet ); Collection> combinedResults = new HashSet<>(); combinedResults.addAll( geneSet ); @@ -165,33 +223,33 @@ public Collection> searchCompositeSequenceAndGene( SearchSetting } private Collection> searchCompositeSequenceAndPopulateGenes( SearchSettings settings, Set> geneSet ) throws SearchException { - if ( !settings.isUseDatabase() ) - return Collections.emptySet(); - StopWatch watch = StopWatch.createStarted(); String searchString = prepareDatabaseQuery( settings ); + if ( searchString == null ) { + return Collections.emptySet(); + } ArrayDesign ad = settings.getPlatformConstraint(); // search by exact composite sequence name - Collection> matchedCs = new SearchResultSet<>(); + Collection> matchedCs = new SearchResultSet<>( settings ); if ( ad != null ) { CompositeSequence cs = compositeSequenceService.findByName( ad, searchString ); if ( cs != null ) - matchedCs.add( SearchResult.from( CompositeSequence.class, cs, MATCH_BY_NAME_SCORE, "CompositeSequenceService.findByName" ) ); + matchedCs.add( SearchResult.from( CompositeSequence.class, cs, MATCH_BY_NAME_SCORE, null, "CompositeSequenceService.findByName" ) ); } else { - matchedCs = toSearchResults( CompositeSequence.class, compositeSequenceService.findByName( searchString ), MATCH_BY_NAME_SCORE, "CompositeSequenceService.findByName" ); + matchedCs = toSearchResults( settings, CompositeSequence.class, compositeSequenceService.findByName( searchString ), MATCH_BY_NAME_SCORE, "CompositeSequenceService.findByName" ); } /* * Search by biosequence */ - if ( matchedCs.isEmpty() ) { + if ( matchedCs.isEmpty() || settings.getMode().equals( SearchSettings.SearchMode.ACCURATE ) ) { Collection csViaBioSeq = compositeSequenceService.findByBioSequenceName( searchString ); if ( ad != null ) { csViaBioSeq.removeIf( c -> !c.getArrayDesign().equals( ad ) ); } - matchedCs.addAll( toSearchResults( CompositeSequence.class, csViaBioSeq, INDIRECT_HIT_PENALTY * MATCH_BY_NAME_SCORE, "CompositeSequenceService.findByBioSequenceName" ) ); + matchedCs.addAll( toSearchResults( settings, CompositeSequence.class, csViaBioSeq, INDIRECT_HIT_PENALTY * MATCH_BY_NAME_SCORE, "CompositeSequenceService.findByBioSequenceName" ) ); } /* @@ -208,9 +266,9 @@ private Collection> searchCompositeSequenceAndPo // results from the database are always pre-filled assert g.getResultObject() != null; if ( settings.getPlatformConstraint() != null ) { - matchedCs.addAll( toSearchResults( CompositeSequence.class, compositeSequenceService.findByGene( g.getResultObject(), settings.getPlatformConstraint() ), INDIRECT_HIT_PENALTY * g.getScore(), "CompositeSequenceService.findByGene with platform constraint" ) ); + matchedCs.addAll( toSearchResults( settings, CompositeSequence.class, compositeSequenceService.findByGene( g.getResultObject(), settings.getPlatformConstraint() ), INDIRECT_HIT_PENALTY * g.getScore(), "CompositeSequenceService.findByGene with platform constraint" ) ); } else { - matchedCs.addAll( toSearchResults( CompositeSequence.class, compositeSequenceService.findByGene( g.getResultObject() ), INDIRECT_HIT_PENALTY * g.getScore(), "CompositeSequenceService.findByGene" ) ); + matchedCs.addAll( toSearchResults( settings, CompositeSequence.class, compositeSequenceService.findByGene( g.getResultObject() ), INDIRECT_HIT_PENALTY * g.getScore(), "CompositeSequenceService.findByGene" ) ); } } @@ -222,14 +280,13 @@ private Collection> searchCompositeSequenceAndPo for ( Collection genes : compositeSequenceService.getGenes( compositeSequences ).values() ) { // TODO: each individual CS have a potentially different score that should be reflected in the gene score, // but that would require knowing which CS matched which gene - geneSet.addAll( toSearchResults( Gene.class, genes, INDIRECT_HIT_PENALTY, "CompositeSequenceService.getGenes" ) ); + geneSet.addAll( toSearchResults( settings, Gene.class, genes, INDIRECT_HIT_PENALTY, "CompositeSequenceService.getGenes" ) ); } watch.stop(); if ( watch.getTime() > 1000 ) - DatabaseSearchSource.log - .info( "Gene composite sequence DB search " + searchString + " took " + watch.getTime() + " ms, " - + geneSet.size() + " items." ); + DatabaseSearchSource.log.warn( String.format( "Gene composite sequence DB search for %s with '%s' took %d ms, %d items.", + settings, searchString, watch.getTime(), geneSet.size() ) ); return matchedCs; } @@ -243,14 +300,14 @@ private Collection> searchCompositeSequenceAndPo */ @Override public Collection> searchExpressionExperiment( SearchSettings settings ) throws SearchException { - if ( !settings.isUseDatabase() ) - return Collections.emptySet(); - StopWatch watch = StopWatch.createStarted(); String query = prepareDatabaseQuery( settings ); + if ( query == null ) { + return Collections.emptySet(); + } - Collection> results = new SearchResultSet<>(); + Collection> results = new SearchResultSet<>( settings ); Collection ees = expressionExperimentService.findByName( query ); for ( ExpressionExperiment ee : ees ) { @@ -258,21 +315,21 @@ public Collection> searchExpressionExperiment } // in response to https://github.com/PavlidisLab/Gemma/issues/140, always keep going if admin. - if ( results.isEmpty() || SecurityUtil.isUserAdmin() ) { + if ( results.isEmpty() || settings.getMode().equals( SearchSettings.SearchMode.ACCURATE ) || SecurityUtil.isUserAdmin() ) { ExpressionExperiment ee = expressionExperimentService.findByShortName( query ); if ( ee != null ) { results.add( SearchResult.from( ExpressionExperiment.class, ee, MATCH_BY_SHORT_NAME_SCORE, Collections.singletonMap( "shortName", ee.getShortName() ), "ExpressionExperimentService.findByShortName" ) ); } } - if ( results.isEmpty() || SecurityUtil.isUserAdmin() ) { + if ( results.isEmpty() || settings.getMode().equals( SearchSettings.SearchMode.ACCURATE ) || SecurityUtil.isUserAdmin() ) { ees = expressionExperimentService.findByAccession( query ); // this will find split parts for ( ExpressionExperiment e : ees ) { results.add( SearchResult.from( ExpressionExperiment.class, e, MATCH_BY_ACCESSION_SCORE, Collections.singletonMap( "id", e.getId().toString() ), "ExpressionExperimentService.findByAccession" ) ); } } - if ( results.isEmpty() ) { + if ( results.isEmpty() || settings.getMode().equals( SearchSettings.SearchMode.ACCURATE ) ) { try { // maybe user put in a primary key value. ExpressionExperiment ee = expressionExperimentService.load( Long.parseLong( query ) ); @@ -293,8 +350,8 @@ public Collection> searchExpressionExperiment watch.stop(); if ( watch.getTime() > 1000 ) - DatabaseSearchSource.log.warn( "DB Expression Experiment search for " + settings + " took " + watch.getTime() - + " ms and found " + results.size() + " EEs" ); + DatabaseSearchSource.log.warn( String.format( "DB Expression Experiment search for %s with '%s' took %d ms and found %d EEs", + settings, query, watch.getTime(), results.size() ) ); return results; } @@ -305,44 +362,39 @@ public Collection> searchExpressionExperiment */ @Override public Collection> searchGene( SearchSettings settings ) throws SearchException { - if ( !settings.isUseDatabase() ) - return Collections.emptySet(); - StopWatch watch = StopWatch.createStarted(); - String searchString; - if ( settings.isTermQuery() ) { - // then we can get the NCBI ID, maybe. - searchString = StringUtils.substringAfterLast( prepareDatabaseQuery( settings ), "/" ); - } else { - searchString = prepareDatabaseQuery( settings ); - } - - if ( StringUtils.isBlank( searchString ) ) - return Collections.emptySet(); + Set> results = new SearchResultSet<>( settings ); - Set> results = new SearchResultSet<>(); + String searchString = prepareDatabaseQuery( settings ); + if ( searchString != null ) { + // then we can get the NCBI ID, maybe. + if ( searchString.startsWith( NCBI_GENE_ID_URI_PREFIX ) ) { + searchString = searchString.substring( NCBI_GENE_ID_URI_PREFIX.length() ); + } - /* - * First search by accession. If we find it, stop. - */ - Gene result = null; - try { - result = geneService.findByNCBIId( Integer.parseInt( searchString ) ); - } catch ( NumberFormatException e ) { - // - } - if ( result != null ) { - results.add( SearchResult.from( Gene.class, result, MATCH_BY_ID_SCORE, "GeneService.findByNCBIId" ) ); - } else { - result = geneService.findByAccession( searchString, null ); + /* + * First search by accession. If we find it, stop. + */ + Gene result = null; + try { + result = geneService.findByNCBIId( Integer.parseInt( searchString ) ); + } catch ( NumberFormatException e ) { + // + } if ( result != null ) { - results.add( SearchResult.from( Gene.class, result, MATCH_BY_ACCESSION_SCORE, "GeneService.findByAccession" ) ); + results.add( SearchResult.from( Gene.class, result, MATCH_BY_ID_SCORE, null, "GeneService.findByNCBIId" ) ); + } else { + result = geneService.findByAccession( searchString, null ); + if ( result != null ) { + results.add( SearchResult.from( Gene.class, result, MATCH_BY_ACCESSION_SCORE, null, "GeneService.findByAccession" ) ); + } } } - if ( results.isEmpty() ) { - results.addAll( searchGeneExpanded( settings ) ); + // attempt to do an inexact search if no results were yielded + if ( results.isEmpty() || settings.getMode().equals( SearchSettings.SearchMode.ACCURATE ) ) { + searchGeneExpanded( settings, results ); } // filter by taxon @@ -352,9 +404,8 @@ public Collection> searchGene( SearchSettings settings ) thro watch.stop(); if ( watch.getTime() > 1000 ) - DatabaseSearchSource.log - .info( "Gene DB search for " + searchString + " took " + watch.getTime() + " ms and found " - + results.size() + " genes" ); + DatabaseSearchSource.log.warn( String.format( "Gene DB search for %s with '%s' took %d ms and found %d genes", + settings, searchString, watch.getTime(), results.size() ) ); return results; } @@ -362,68 +413,88 @@ public Collection> searchGene( SearchSettings settings ) thro /** * Expanded gene search used when a simple search does not yield results. */ - private Collection> searchGeneExpanded( SearchSettings settings ) throws SearchException { - Set> results = new SearchResultSet<>(); - - String exactString = prepareDatabaseQuery( settings ); + private void searchGeneExpanded( SearchSettings settings, Set> results ) throws SearchException { String inexactString = prepareDatabaseQuery( settings, true ); + if ( inexactString == null ) { + return; + } + + // trim all the unescaped reserved characters from the string to get the "exact" string + String exactString = inexactString.replaceAll( "([^\\\\])[%_\\\\]", "$1" ); // if the query is shortish, always do a wild card search. This gives better behavior in 'live // search' situations. If we do wildcards on very short queries we get too many results. if ( exactString.length() <= 1 ) { // case 0: we got no results yet, or user entered a very short string. We search only for exact matches. - results.addAll( toSearchResults( Gene.class, geneService.findByOfficialSymbol( exactString ), MATCH_BY_OFFICIAL_SYMBOL_SCORE, "GeneService.findByOfficialSymbol" ) ); + results.addAll( toSearchResults( settings, Gene.class, geneService.findByOfficialSymbol( exactString ), MATCH_BY_OFFICIAL_SYMBOL_SCORE, "GeneService.findByOfficialSymbol" ) ); } else if ( exactString.length() <= 5 ) { if ( isWildcard( settings ) ) { // case 2: user did ask for a wildcard, if the string is 2, 3, 4 or 5 characters. - results.addAll( toSearchResults( Gene.class, geneService.findByOfficialSymbolInexact( inexactString ), MATCH_BY_OFFICIAL_SYMBOL_INEXACT_SCORE, "GeneService.findByOfficialSymbolInexact" ) ); + results.addAll( toSearchResults( settings, Gene.class, geneService.findByOfficialSymbolInexact( inexactString ), MATCH_BY_OFFICIAL_SYMBOL_INEXACT_SCORE, "GeneService.findByOfficialSymbolInexact" ) ); } else { // case 2: user did not ask for a wildcard, but we add it anyway, if the string is 2, 3, 4 or 5 characters. - results.addAll( toSearchResults( Gene.class, geneService.findByOfficialSymbolInexact( inexactString + "%" ), MATCH_BY_OFFICIAL_SYMBOL_INEXACT_SCORE, "GeneService.findByOfficialSymbolInexact" ) ); + results.addAll( toSearchResults( settings, Gene.class, geneService.findByOfficialSymbolInexact( inexactString + "%" ), MATCH_BY_OFFICIAL_SYMBOL_INEXACT_SCORE, "GeneService.findByOfficialSymbolInexact" ) ); } } else { if ( isWildcard( settings ) ) { // case 3: string is long enough, and user asked for wildcard. - results.addAll( toSearchResults( Gene.class, geneService.findByOfficialSymbolInexact( inexactString ), MATCH_BY_OFFICIAL_SYMBOL_INEXACT_SCORE, "GeneService.findByOfficialSymbol" ) ); + results.addAll( toSearchResults( settings, Gene.class, geneService.findByOfficialSymbolInexact( inexactString ), MATCH_BY_OFFICIAL_SYMBOL_INEXACT_SCORE, "GeneService.findByOfficialSymbol" ) ); } else { // case 3: string is long enough, and user did not ask for wildcard. - results.addAll( toSearchResults( Gene.class, geneService.findByOfficialSymbol( exactString ), MATCH_BY_OFFICIAL_SYMBOL_SCORE, "GeneService.findByOfficialSymbol" ) ); + results.addAll( toSearchResults( settings, Gene.class, geneService.findByOfficialSymbol( exactString ), MATCH_BY_OFFICIAL_SYMBOL_SCORE, "GeneService.findByOfficialSymbol" ) ); } } /* * If we found a match using official symbol or name, don't bother with this */ - if ( results.isEmpty() ) { - results.addAll( toSearchResults( Gene.class, geneService.findByAlias( exactString ), MATCH_BY_ALIAS_SCORE, "GeneService.findByAlias" ) ); + if ( results.isEmpty() || settings.getMode().equals( SearchSettings.SearchMode.ACCURATE ) ) { + results.addAll( toSearchResults( settings, Gene.class, geneService.findByAlias( exactString ), MATCH_BY_ALIAS_SCORE, "GeneService.findByAlias" ) ); Gene geneByEnsemblId = geneService.findByEnsemblId( exactString ); if ( geneByEnsemblId != null ) { - results.add( SearchResult.from( Gene.class, geneByEnsemblId, MATCH_BY_ACCESSION_SCORE, "GeneService.findByAlias" ) ); + results.add( SearchResult.from( Gene.class, geneByEnsemblId, MATCH_BY_ACCESSION_SCORE, null, "GeneService.findByAlias" ) ); } - results.addAll( toSearchResults( Gene.class, geneProductService.getGenesByName( exactString ), INDIRECT_HIT_PENALTY * MATCH_BY_NAME_SCORE, "GeneProductService.getGenesByName" ) ); - results.addAll( toSearchResults( Gene.class, geneProductService.getGenesByNcbiId( exactString ), INDIRECT_HIT_PENALTY * MATCH_BY_ACCESSION_SCORE, "GeneProductService.getGenesByNcbiId" ) ); - results.addAll( toSearchResults( Gene.class, bioSequenceService.getGenesByAccession( exactString ), INDIRECT_HIT_PENALTY * MATCH_BY_ACCESSION_SCORE, "BioSequenceService.GetGenesByAccession" ) ); - results.addAll( toSearchResults( Gene.class, bioSequenceService.getGenesByName( exactString ), INDIRECT_HIT_PENALTY * MATCH_BY_NAME_SCORE, "BioSequenceService.getGenesByName" ) ); + results.addAll( toSearchResults( settings, Gene.class, geneProductService.getGenesByName( exactString ), INDIRECT_HIT_PENALTY * MATCH_BY_NAME_SCORE, "GeneProductService.getGenesByName" ) ); + results.addAll( toSearchResults( settings, Gene.class, geneProductService.getGenesByNcbiId( exactString ), INDIRECT_HIT_PENALTY * MATCH_BY_ACCESSION_SCORE, "GeneProductService.getGenesByNcbiId" ) ); + results.addAll( toSearchResults( settings, Gene.class, bioSequenceService.getGenesByAccession( exactString ), INDIRECT_HIT_PENALTY * MATCH_BY_ACCESSION_SCORE, "BioSequenceService.GetGenesByAccession" ) ); + results.addAll( toSearchResults( settings, Gene.class, bioSequenceService.getGenesByName( exactString ), INDIRECT_HIT_PENALTY * MATCH_BY_NAME_SCORE, "BioSequenceService.getGenesByName" ) ); } - - return results; } @Override public Collection> searchGeneSet( SearchSettings settings ) throws SearchException { - if ( !settings.isUseDatabase() ) + String query = prepareDatabaseQuery( settings ); + if ( query == null ) { return Collections.emptySet(); + } if ( settings.getTaxon() != null ) { - return toSearchResults( GeneSet.class, this.geneSetService.findByName( prepareDatabaseQuery( settings ), settings.getTaxon() ), MATCH_BY_NAME_SCORE, "GeneSetService.findByNameWithTaxon" ); + return toSearchResults( settings, GeneSet.class, this.geneSetService.findByName( query, settings.getTaxon() ), MATCH_BY_NAME_SCORE, "GeneSetService.findByNameWithTaxon" ); } else { - return toSearchResults( GeneSet.class, this.geneSetService.findByName( prepareDatabaseQuery( settings ) ), MATCH_BY_NAME_SCORE, "GeneSetService.findByName" ); + return toSearchResults( settings, GeneSet.class, this.geneSetService.findByName( query ), MATCH_BY_NAME_SCORE, "GeneSetService.findByName" ); } } - private static Set> toSearchResults( Class resultType, Collection entities, double score, String source ) { + @Override + public Collection> searchBlacklistedEntities( SearchSettings settings ) throws SearchException { + Collection> blacklistedResults = new SearchResultSet<>( settings ); + String query = prepareDatabaseQuery( settings ); + + if ( query == null ) { + return Collections.emptySet(); + } + + BlacklistedEntity b = blacklistedEntityService.findByAccession( query ); + if ( b != null ) { + blacklistedResults.add( SearchResult.from( BlacklistedEntity.class, b, DatabaseSearchSource.MATCH_BY_ACCESSION_SCORE, null, "BlacklistedEntityService.findByAccession" ) ); + } + + return blacklistedResults; + } + + private static Set> toSearchResults( SearchSettings settings, Class resultType, Collection entities, double score, String source ) { return entities.stream() .filter( Objects::nonNull ) - .map( e -> SearchResult.from( resultType, e, score, source ) ) - .collect( Collectors.toCollection( SearchResultSet::new ) ); + .map( e -> SearchResult.from( resultType, e, score, null, source ) ) + .collect( Collectors.toCollection( () -> new SearchResultSet<>( settings ) ) ); } } diff --git a/gemma-core/src/main/java/ubic/gemma/core/search/source/HibernateSearchSource.java b/gemma-core/src/main/java/ubic/gemma/core/search/source/HibernateSearchSource.java index 0d56b547b0..ffd351b7fc 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/search/source/HibernateSearchSource.java +++ b/gemma-core/src/main/java/ubic/gemma/core/search/source/HibernateSearchSource.java @@ -141,6 +141,11 @@ public Set getFields( Class entityClass ) { return ALL_FIELDS.getOrDefault( entityClass, Collections.emptySet() ); } + @Override + public boolean accepts( SearchSettings settings ) { + return settings.isUseIndices(); + } + @Override public Collection> searchArrayDesign( SearchSettings settings ) throws SearchException { return searchFor( settings, ArrayDesign.class, PLATFORM_FIELDS ); diff --git a/gemma-core/src/main/java/ubic/gemma/core/search/source/OntologySearchSource.java b/gemma-core/src/main/java/ubic/gemma/core/search/source/OntologySearchSource.java index beeffa970a..9a89abbd5b 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/search/source/OntologySearchSource.java +++ b/gemma-core/src/main/java/ubic/gemma/core/search/source/OntologySearchSource.java @@ -24,6 +24,9 @@ import java.net.URI; import java.util.*; +import static ubic.gemma.core.search.lucene.LuceneQueryUtils.extractDnf; +import static ubic.gemma.core.search.lucene.LuceneQueryUtils.prepareTermUriQuery; + @Component @CommonsLog public class OntologySearchSource implements SearchSource { @@ -51,41 +54,126 @@ public class OntologySearchSource implements SearchSource { @Autowired private CharacteristicService characteristicService; + @Override + public boolean accepts( SearchSettings settings ) { + return settings.isUseCharacteristics(); + } + + /** + * Search via characteristics i.e. ontology terms. + *

+ * This is an important type of search but also a point of performance issues. Searches for "specific" terms are + * generally not a big problem (yielding less than 100 results); searches for "broad" terms can return numerous + * (thousands) + * results. + */ + @Override + public Collection> searchExpressionExperiment( final SearchSettings settings ) throws SearchException { + Collection> results = new SearchResultSet<>( settings ); + + StopWatch watch = StopWatch.createStarted(); + + log.debug( "Starting EE search for " + settings ); + /* + * Note that the AND is applied only within one entity type. The fix would be to apply AND at this + * level. + * + * The tricky part here is if the user has entered a boolean query. If they put in Parkinson's disease AND + * neuron, then we want to eventually return entities that are associated with both. We don't expect to find + * single characteristics that match both. + * + * But if they put in Parkinson's disease we don't want to do two queries. + */ + Set> subclauses = extractDnf( settings ); + for ( Set subclause : subclauses ) { + Collection> classResults = this.searchExpressionExperiments( settings, subclause ); + if ( !classResults.isEmpty() ) { + log.debug( String.format( "Found %d EEs matching %s", classResults.size(), String.join( " AND ", subclause ) ) ); + } + results.addAll( classResults ); + // this is an OR query, so we can stop as soon as we've retrieved enough results + if ( isFilled( results, settings ) ) { + break; + } + } + + OntologySearchSource.log.debug( String.format( "ExpressionExperiment search: %s -> %d characteristic-based hits %d ms", + settings, results.size(), watch.getTime() ) ); + + return results; + } + + /** + * Search for the Experiment query in ontologies, including items that are associated with children of matching + * query terms. That is, 'brain' should return entities tagged as 'hippocampus'. It can handle AND in searches, so + * Parkinson's + * AND neuron finds items tagged with both of those terms. The use of OR is handled by the caller. + * + * @param settings search settings + * @param clause a conjunctive clause + * @return SearchResults of Experiments + */ + private SearchResultSet searchExpressionExperiments( SearchSettings settings, Set clause ) throws SearchException { + StopWatch watch = StopWatch.createStarted(); + + // we would have to first deal with the separate queries, and then apply the logic. + SearchResultSet results = new SearchResultSet<>( settings ); + + OntologySearchSource.log.debug( "Starting characteristic search for: " + settings + " matching " + String.join( " AND ", clause ) ); + for ( String subClause : clause ) { + SearchResultSet subqueryResults = doSearchExpressionExperiment( settings.withQuery( subClause ) ); + if ( results.isEmpty() ) { + results.addAll( subqueryResults ); + } else { + // this is our Intersection operation. + results.retainAll( subqueryResults ); + } + if ( watch.getTime() > 1000 ) { + OntologySearchSource.log.warn( String.format( "Characteristic EE search for '%s': %d hits retained so far; %dms", + subClause, results.size(), watch.getTime() ) ); + watch.reset(); + watch.start(); + } + } + + return results; + } + /** * Perform a Experiment search based on annotations (anchored in ontology terms) - it does not have to be one word, * it could be "parkinson's disease"; it can also be a URI. * * @return collection of SearchResults (Experiments) */ - @Override - public Collection> searchExpressionExperiment( SearchSettings settings ) throws SearchException { + private SearchResultSet doSearchExpressionExperiment( SearchSettings settings ) throws SearchException { // overall timer StopWatch watch = StopWatch.createStarted(); // per-step timer StopWatch timer = StopWatch.create(); - Set> results = new SearchResultSet<>(); + SearchResultSet results = new SearchResultSet<>( settings ); Collection ontologyResults = new HashSet<>(); - // if the query is a term, find it directly Collection matchingTerms; - if ( settings.isTermQuery() ) { - String termUri = settings.getQuery(); + + // if the query is a term, find it directly + URI termUri = prepareTermUriQuery( settings ); + if ( termUri != null ) { OntologyResult resource; - OntologyTerm r2 = ontologyService.getTerm( termUri ); + OntologyTerm r2 = ontologyService.getTerm( termUri.toString() ); if ( r2 != null ) { assert r2.getUri() != null; resource = new OntologyResult( r2, EXACT_MATCH_SCORE ); matchingTerms = Collections.singleton( r2 ); } else { // attempt to guess a label from othe database - Characteristic c = characteristicService.findBestByUri( termUri ); + Characteristic c = characteristicService.findBestByUri( termUri.toString() ); if ( c != null ) { assert c.getValueUri() != null; resource = new OntologyResult( c.getValueUri(), c.getValue(), EXACT_MATCH_SCORE ); } else { - resource = new OntologyResult( termUri, getLabelFromTermUri( termUri ), EXACT_MATCH_SCORE ); + resource = new OntologyResult( termUri.toString(), getLabelFromTermUri( termUri ), EXACT_MATCH_SCORE ); } matchingTerms = Collections.emptySet(); } @@ -103,7 +191,7 @@ public Collection> searchExpressionExperiment .map( t -> new OntologyResult( t, t.getScore() != null ? t.getScore() : EXACT_MATCH_SCORE ) ) .forEach( ontologyResults::add ); timer.stop(); - if ( timer.getTime() > 100 ) { + if ( timer.getTime() > 1000 ) { log.warn( String.format( "Found %d ontology classes matching '%s' in %d ms", matchingTerms.size(), settings.getQuery(), timer.getTime() ) ); } @@ -131,7 +219,7 @@ public Collection> searchExpressionExperiment .forEach( ontologyResults::add ); timer.stop(); - if ( timer.getTime() > 200 ) { + if ( timer.getTime() > 1000 ) { log.warn( String.format( "Found %d ontology subclasses or related terms for %d terms matching '%s' in %d ms", ontologyResults.size() - matchingTerms.size(), matchingTerms.size(), settings.getQuery(), timer.getTime() ) ); } @@ -142,14 +230,14 @@ public Collection> searchExpressionExperiment findExperimentsByOntologyResults( ontologyResults, settings, results ); timer.stop(); - if ( timer.getTime() > 100 ) { + if ( timer.getTime() > 1000 ) { log.warn( String.format( "Retrieved %d datasets via %d characteristics in %d ms", results.size(), ontologyResults.size(), timer.getTime() ) ); } String message = String.format( "Found %d datasets by %d characteristic URIs for '%s' in %d ms", results.size(), ontologyResults.size(), settings.getQuery(), watch.getTime() ); - if ( watch.getTime() > 300 ) { + if ( watch.getTime() > 1000 ) { log.warn( message ); } else { log.debug( message ); @@ -158,7 +246,7 @@ public Collection> searchExpressionExperiment return results; } - private void findExperimentsByOntologyResults( Collection terms, SearchSettings settings, Set> results ) { + private void findExperimentsByOntologyResults( Collection terms, SearchSettings settings, SearchResultSet results ) { // URIs are case-insensitive in the database, so should be the mapping to labels Collection uris = new HashSet<>(); Map uri2value = new TreeMap<>( String.CASE_INSENSITIVE_ORDER ); @@ -186,7 +274,7 @@ private void findExperimentsByOntologyResults( Collection terms, findExpressionExperimentsByUris( uris, uri2value, uri2score, settings, results ); } - private void findExpressionExperimentsByUris( Collection uris, Map uri2value, Map uri2score, SearchSettings settings, Set> results ) { + private void findExpressionExperimentsByUris( Collection uris, Map uri2value, Map uri2score, SearchSettings settings, SearchResultSet results ) { if ( isFilled( results, settings ) ) return; @@ -211,7 +299,7 @@ private void findExpressionExperimentsByUris( Collection uris, Map> hits, String field, double scoreMultiplier, Map uri2value, Map uri2score, SearchSettings settings, Set> results ) { + private void addExperimentsByUrisHits( Map> hits, String field, double scoreMultiplier, Map uri2value, Map uri2score, SearchSettings settings, SearchResultSet results ) { for ( Map.Entry> entry : hits.entrySet() ) { String uri = entry.getKey(); String value = uri2value.get( uri ); @@ -249,12 +337,11 @@ private static int getLimit( Collection /** * Extract a label for a term URI as per {@link OntologyTerm#getLabel()}. */ - static String getLabelFromTermUri( String termUri ) { - URI components = URI.create( termUri ); - String[] segments = components.getPath().split( "/" ); + static String getLabelFromTermUri( URI termUri ) { + String[] segments = termUri.getPath().split( "/" ); // use the fragment - if ( !StringUtils.isEmpty( components.getFragment() ) ) { - return partToTerm( components.getFragment() ); + if ( !StringUtils.isEmpty( termUri.getFragment() ) ) { + return partToTerm( termUri.getFragment() ); } // pick the last non-empty segment for ( int i = segments.length - 1; i >= 0; i-- ) { @@ -263,7 +350,7 @@ static String getLabelFromTermUri( String termUri ) { } } // as a last resort, return the parsed URI - return components.toString(); + return termUri.toString(); } private static String partToTerm( String part ) { diff --git a/gemma-core/src/main/java/ubic/gemma/model/common/search/SearchSettings.java b/gemma-core/src/main/java/ubic/gemma/model/common/search/SearchSettings.java index e58ac0d722..e19dcc6bdd 100644 --- a/gemma-core/src/main/java/ubic/gemma/model/common/search/SearchSettings.java +++ b/gemma-core/src/main/java/ubic/gemma/model/common/search/SearchSettings.java @@ -22,10 +22,10 @@ import lombok.Data; import lombok.Singular; import lombok.With; -import org.apache.commons.lang3.StringUtils; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Document; import ubic.gemma.core.search.Highlighter; +import ubic.gemma.core.search.OntologyHighlighter; import ubic.gemma.core.search.lucene.LuceneHighlighter; import ubic.gemma.model.common.Identifiable; import ubic.gemma.model.common.description.BibliographicReference; @@ -54,7 +54,7 @@ public class SearchSettings implements Serializable { public enum SearchMode { /** - * Prefer correctness over speed. + * Prefer correctness to speed. */ ACCURATE, /** @@ -105,10 +105,9 @@ public static SearchSettings bibliographicReferenceSearch( String query ) { * @param arrayDesign the array design to limit the search to * @return search settings */ - public static SearchSettings compositeSequenceSearch( String query, ArrayDesign arrayDesign ) { + public static SearchSettings compositeSequenceSearch( String query, @Nullable ArrayDesign arrayDesign ) { return builder().query( query ) .resultType( CompositeSequence.class ) - .resultType( ArrayDesign.class ) .platformConstraint( arrayDesign ) // TODO: check if this was specified in the original code .build(); } @@ -133,7 +132,7 @@ public static SearchSettings expressionExperimentSearch( String query ) { * @param taxon if you want to filter by taxon (can be null) * @return search settings */ - public static SearchSettings expressionExperimentSearch( String query, Taxon taxon ) { + public static SearchSettings expressionExperimentSearch( String query, @Nullable Taxon taxon ) { return builder() .query( query ) .resultType( ExpressionExperiment.class ) @@ -148,7 +147,7 @@ public static SearchSettings expressionExperimentSearch( String query, Taxon tax * @param taxon the taxon to limit the search to (can be null) * @return search settings */ - public static SearchSettings geneSearch( String query, Taxon taxon ) { + public static SearchSettings geneSearch( String query, @Nullable Taxon taxon ) { return builder().query( query ).resultType( Gene.class ).taxon( taxon ).build(); } @@ -207,75 +206,35 @@ public static SearchSettings geneSearch( String query, Taxon taxon ) { private transient Highlighter highlighter; /** - * Get this query, trimmed. - */ - public String getQuery() { - return query == null ? null : query.trim(); - } - - /** - * Get the original query that was set by {@link #setQuery(String)}, untrimmed. - */ - @SuppressWarnings("unused") - public String getRawQuery() { - return this.query; - } - - /** - * Indicate if the query refers to an ontology term. - *

- * This is done by checking if this query starts with 'http://' for now, but there could be fancier checks performed - * in the future. - */ - public boolean isTermQuery() { - return getQuery() != null && getQuery().startsWith( "http://" ); - } - - /** - * Obtain the term URI. - * - * @deprecated use {@link #getQuery()} and {@link #isTermQuery()} instead. - * - * @return the term URI if this is a term query, otherwise null - */ - @Deprecated - public String getTermUri() { - return isTermQuery() ? getQuery() : null; - } - - /** - * Set this term URI. - * - * @deprecated URI can be set with {@link #setQuery(String)} instead. - * - * @param termUri a valid term URI, or null or a blank string + * Check if this is configured to search a given result type. */ - @Deprecated - public void setTermUri( String termUri ) { - if ( StringUtils.isNotBlank( termUri ) && !termUri.startsWith( "http://" ) ) { - throw new IllegalArgumentException( "The term URI must be a valid URI." ); - } - setQuery( termUri ); + public boolean hasResultType( Class cls ) { + return resultTypes.contains( cls ); } /** - * Check if this is configured to search a given result type. + * Highlight a given field. */ - public boolean hasResultType( Class cls ) { - return resultTypes.contains( cls ); + @Nullable + public Map highlight( String value, String field ) { + return highlighter != null ? highlighter.highlight( value, field ) : null; } /** * Highlight a given ontology term. *

- * This is a shorthand for {@link #getHighlighter()} and {@link Highlighter#highlightTerm(String, String, String)} + * This is a shorthand for {@link #getHighlighter()} and {@link OntologyHighlighter#highlightTerm(String, String, String)} * that deals with a potentially null highlighter. * @see #setHighlighter(Highlighter) * @return a highlight, or null if no provider is set or the provider returns null */ @Nullable public Map highlightTerm( String termUri, String termLabel, String field ) { - return highlighter != null ? highlighter.highlightTerm( termUri, termLabel, field ) : null; + if ( highlighter instanceof OntologyHighlighter ) { + return ( ( OntologyHighlighter ) highlighter ).highlightTerm( termUri, termLabel, field ); + } else { + return null; + } } @Nullable diff --git a/gemma-core/src/main/java/ubic/gemma/model/common/search/SearchSettingsValueObject.java b/gemma-core/src/main/java/ubic/gemma/model/common/search/SearchSettingsValueObject.java index 9a61837e2e..8d59dcc89a 100644 --- a/gemma-core/src/main/java/ubic/gemma/model/common/search/SearchSettingsValueObject.java +++ b/gemma-core/src/main/java/ubic/gemma/model/common/search/SearchSettingsValueObject.java @@ -18,21 +18,10 @@ */ package ubic.gemma.model.common.search; -import org.apache.commons.lang3.StringUtils; -import ubic.gemma.model.analysis.expression.ExpressionExperimentSet; -import ubic.gemma.model.association.phenotype.PhenotypeAssociation; -import ubic.gemma.model.common.description.BibliographicReference; import ubic.gemma.model.expression.arrayDesign.ArrayDesign; -import ubic.gemma.model.expression.designElement.CompositeSequence; -import ubic.gemma.model.expression.experiment.ExpressionExperiment; -import ubic.gemma.model.genome.Gene; import ubic.gemma.model.genome.Taxon; -import ubic.gemma.model.genome.biosequence.BioSequence; -import ubic.gemma.model.genome.gene.GeneSet; import java.io.Serializable; -import java.util.HashSet; -import java.util.Set; /** * author: anton date: 18/03/13 diff --git a/gemma-core/src/main/java/ubic/gemma/model/common/search/package-info.java b/gemma-core/src/main/java/ubic/gemma/model/common/search/package-info.java new file mode 100644 index 0000000000..4837e61b59 --- /dev/null +++ b/gemma-core/src/main/java/ubic/gemma/model/common/search/package-info.java @@ -0,0 +1,7 @@ +/** + * + */ +@ParametersAreNonnullByDefault +package ubic.gemma.model.common.search; + +import javax.annotation.ParametersAreNonnullByDefault; \ No newline at end of file diff --git a/gemma-core/src/test/java/ubic/gemma/core/search/SearchResultSetTest.java b/gemma-core/src/test/java/ubic/gemma/core/search/SearchResultSetTest.java new file mode 100644 index 0000000000..496b0478e0 --- /dev/null +++ b/gemma-core/src/test/java/ubic/gemma/core/search/SearchResultSetTest.java @@ -0,0 +1,84 @@ +package ubic.gemma.core.search; + +import org.assertj.core.data.Index; +import org.junit.Test; +import ubic.gemma.model.common.search.SearchSettings; +import ubic.gemma.model.expression.experiment.ExpressionExperiment; + +import java.util.Collections; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static ubic.gemma.core.util.test.Maps.map; + +public class SearchResultSetTest { + + @Test + public void test() { + SearchSettings settings = SearchSettings.expressionExperimentSearch( "test" ); + SearchResultSet results = new SearchResultSet<>( settings ); + assertTrue( results.add( SearchResult.from( ExpressionExperiment.class, 1L, 0.5, null, "test" ) ) ); + assertTrue( results.add( SearchResult.from( ExpressionExperiment.class, 1L, 0.6, null, "test" ) ) ); + assertThat( results ).hasSize( 1 ) + .extracting( SearchResult::getScore ).containsExactly( 0.6 ); + } + + @Test + public void testResultObjectIsRetainedWhenReplacingAResult() { + SearchSettings settings = SearchSettings.expressionExperimentSearch( "test" ); + SearchResultSet results = new SearchResultSet<>( settings ); + assertTrue( results.add( SearchResult.from( ExpressionExperiment.class, new ExpressionExperiment() {{ + setId( 1L ); + }}, 0.5, null, "test" ) ) ); + // replaced by a better result without a result object + assertTrue( results.add( SearchResult.from( ExpressionExperiment.class, 1L, 0.6, null, "test" ) ) ); + assertThat( results ).satisfiesExactly( sr -> { + assertThat( sr.getResultId() ).isEqualTo( 1L ); + assertThat( sr.getResultObject() ).isNotNull(); + assertThat( sr.getScore() ).isEqualTo( 0.6 ); + } ); + } + + @Test + public void testAddWhenMaxResultsIsReached() { + SearchSettings settings = SearchSettings.expressionExperimentSearch( "test" ) + .withMaxResults( 3 ); + SearchResultSet results = new SearchResultSet<>( settings ); + assertTrue( results.add( SearchResult.from( ExpressionExperiment.class, 1L, 0.5, null, "test" ) ) ); + assertTrue( results.add( SearchResult.from( ExpressionExperiment.class, 2L, 0.6, null, "test" ) ) ); + assertTrue( results.add( SearchResult.from( ExpressionExperiment.class, 3L, 0.5, null, "test" ) ) ); + // ignored + assertFalse( results.add( SearchResult.from( ExpressionExperiment.class, 4L, 0.6, null, "test" ) ) ); + assertThat( results ).hasSize( 3 ); + // this is allowed though as it replaces a previosu result + assertTrue( results.add( SearchResult.from( ExpressionExperiment.class, 3L, 0.6, null, "test" ) ) ); + assertThat( results ).hasSize( 3 ) + .extracting( SearchResult::getResultId ) + .containsExactlyInAnyOrder( 1L, 2L, 3L ); + } + + @Test + public void testMergingHighlightWhenReplacingAResult() { + SearchSettings settings = SearchSettings.expressionExperimentSearch( "test" ); + SearchResultSet results = new SearchResultSet<>( settings ); + results.add( SearchResult.from( ExpressionExperiment.class, 1L, 0.5, Collections.singletonMap( "a", "a" ), "test" ) ); + results.add( SearchResult.from( ExpressionExperiment.class, 1L, 0.6, Collections.singletonMap( "b", "b" ), "test" ) ); + assertThat( results ).hasSize( 1 ) + .extracting( SearchResult::getHighlights ) + .satisfies( h -> { + assertThat( h ).containsEntry( "a", "a" ).containsEntry( "b", "b" ); + }, Index.atIndex( 0 ) ); + } + + @Test + public void testMergingHighlightWhenRetainingAnExistingResult() { + SearchSettings settings = SearchSettings.expressionExperimentSearch( "test" ); + SearchResultSet results = new SearchResultSet<>( settings ); + results.add( SearchResult.from( ExpressionExperiment.class, 1L, 0.6, Collections.singletonMap( "a", "a" ), "test" ) ); + results.add( SearchResult.from( ExpressionExperiment.class, 1L, 0.5, map( "a", "b", "b", "b" ), "test" ) ); + assertThat( results ).hasSize( 1 ) + .extracting( SearchResult::getHighlights ) + .containsExactly( map( "a", "a", "b", "b" ) ); + } +} \ No newline at end of file diff --git a/gemma-core/src/test/java/ubic/gemma/core/search/SearchResultTest.java b/gemma-core/src/test/java/ubic/gemma/core/search/SearchResultTest.java index e69b8b51b4..33b45d1136 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/search/SearchResultTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/search/SearchResultTest.java @@ -4,8 +4,10 @@ import ubic.gemma.model.common.Identifiable; import javax.annotation.Nullable; +import java.util.Collections; import static org.assertj.core.api.Assertions.assertThat; +import static ubic.gemma.core.util.test.Maps.map; /** * Tests for {@link SearchResult}. @@ -30,31 +32,32 @@ public Long getId() { @Test public void testResultObject() { - SearchResult sr = SearchResult.from( FooBar.class, new FooBar( 1L ), 1.0, "test object" ); + SearchResult sr = SearchResult.from( FooBar.class, new FooBar( 1L ), 1.0, Collections.singletonMap( "a", "b" ), "test object" ); assertThat( sr.getScore() ).isEqualTo( 1.0 ); - assertThat( sr.getHighlights() ).isNull(); + assertThat( sr.getHighlights() ).isEqualTo( map( "a", "b" ) ); + assertThat( sr ).hasToString( String.format( "FooBar Id=1 Score=%.2f Highlights=a Source=test object [Not Filled]", 1.0 ) ); } @Test(expected = IllegalArgumentException.class) public void testResultObjectWithNullId() { - SearchResult.from( FooBar.class, new FooBar( null ), 1.0, "test object" ); + SearchResult.from( FooBar.class, new FooBar( null ), 1.0, null, "test object" ); } @Test public void testSetResultObject() { - SearchResult sr = SearchResult.from( FooBar.class, 1L, 1.0, "test object" ); + SearchResult sr = SearchResult.from( FooBar.class, 1L, 1.0, null, "test object" ); sr.setResultObject( new FooBar( 1L ) ); } @Test(expected = IllegalArgumentException.class) public void testSetResultObjectWithNullId() { - SearchResult sr = SearchResult.from( FooBar.class, 1L, 1.0, "test object" ); + SearchResult sr = SearchResult.from( FooBar.class, 1L, 1.0, null, "test object" ); sr.setResultObject( new FooBar( null ) ); } @Test(expected = IllegalArgumentException.class) public void testSetResultObjectWithDifferentId() { - SearchResult sr = SearchResult.from( FooBar.class, 1L, 1.0, "test object" ); + SearchResult sr = SearchResult.from( FooBar.class, 1L, 1.0, null, "test object" ); sr.setResultObject( new FooBar( 2L ) ); } diff --git a/gemma-core/src/test/java/ubic/gemma/core/search/SearchServiceTest.java b/gemma-core/src/test/java/ubic/gemma/core/search/SearchServiceTest.java index 397a0ed713..19414bdafc 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/search/SearchServiceTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/search/SearchServiceTest.java @@ -7,7 +7,10 @@ import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; +import org.springframework.security.test.context.support.WithMockUser; +import org.springframework.security.test.context.support.WithSecurityContextTestExecutionListener; import org.springframework.test.context.ContextConfiguration; +import org.springframework.test.context.TestExecutionListeners; import org.springframework.test.context.junit4.AbstractJUnit4SpringContextTests; import ubic.gemma.core.ontology.OntologyService; import ubic.gemma.core.search.source.OntologySearchSource; @@ -21,10 +24,12 @@ import java.util.Collections; +import static org.assertj.core.api.Assertions.assertThat; import static org.junit.Assert.assertNull; import static org.mockito.Mockito.*; @ContextConfiguration +@TestExecutionListeners(WithSecurityContextTestExecutionListener.class) public class SearchServiceTest extends AbstractJUnit4SpringContextTests { private static final Taxon rat = Taxon.Factory.newInstance( "Rattus norvegicus", "rat", 192, false ); @@ -47,6 +52,11 @@ public TaxonService taxonService() { when( ts.loadAll() ).thenReturn( Collections.singletonList( rat ) ); return ts; } + + @Bean + public SearchSource fieldAwareSearchSource() { + return mock( FieldAwareSearchSource.class ); + } } @Autowired @@ -56,6 +66,10 @@ public TaxonService taxonService() { @Qualifier("databaseSearchSource") private SearchSource databaseSearchSource; + @Autowired + @Qualifier("fieldAwareSearchSource") + private SearchSource fieldAwareSearchSource; + @Autowired private OntologyService ontologyService; @@ -67,13 +81,24 @@ public void tearDown() { reset( databaseSearchSource, ontologyService ); } + @Test + public void testGetFields() { + when( ( ( FieldAwareSearchSource ) fieldAwareSearchSource ).getFields( ExpressionExperiment.class ) ) + .thenReturn( Collections.singleton( "shortName" ) ); + assertThat( searchService.getFields( ExpressionExperiment.class ) ) + .contains( "shortName" ); + verify( ( FieldAwareSearchSource ) fieldAwareSearchSource ).getFields( ExpressionExperiment.class ); + } + @Test public void test_whenTaxonIsNameIsUsedInQuery_thenAddTaxonToSearchSettings() throws SearchException { + when( databaseSearchSource.accepts( any() ) ).thenReturn( true ); SearchSettings settings = SearchSettings.builder() .resultType( Gene.class ) .query( "the best rat in the universe" ) .build(); searchService.search( settings ); + verify( databaseSearchSource ).accepts( settings.withTaxon( rat ) ); verify( databaseSearchSource ).searchGene( settings.withTaxon( rat ) ); } @@ -99,7 +124,9 @@ public void searchExpressionExperimentsByUri_whenQueryIsAUri_thenEnsureTheUriIsU } @Test + @WithMockUser public void searchExpressionExperiment() throws SearchException { + when( databaseSearchSource.accepts( any() ) ).thenReturn( true ); SearchSettings settings = SearchSettings.builder() .query( "http://purl.obolibrary.org/obo/DOID_14602" ) .resultType( ExpressionExperiment.class ) @@ -110,6 +137,8 @@ public void searchExpressionExperiment() throws SearchException { .thenReturn( Collections.singletonMap( ExpressionExperiment.class, Collections.singletonMap( "test", Collections.singleton( ee ) ) ) ); SearchService.SearchResultMap results = searchService.search( settings ); + verify( databaseSearchSource ).accepts( settings ); + verify( databaseSearchSource ).searchExpressionExperiment( settings ); verify( characteristicService ).findExperimentsByUris( Collections.singleton( "http://purl.obolibrary.org/obo/DOID_14602" ), null, 5000, false, false ); assertNull( results.getByResultObjectType( ExpressionExperiment.class ).iterator().next().getResultObject() ); // since EE is a proxy, only its ID should be accessed diff --git a/gemma-core/src/test/java/ubic/gemma/core/search/SearchServiceVoConversionTest.java b/gemma-core/src/test/java/ubic/gemma/core/search/SearchServiceVoConversionTest.java index 882f1cef72..4a31b244f1 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/search/SearchServiceVoConversionTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/search/SearchServiceVoConversionTest.java @@ -125,14 +125,14 @@ public void tearDown() { @Test @WithMockUser public void testConvertArrayDesign() { - searchService.loadValueObject( SearchResult.from( ArrayDesign.class, ad, 1.0, "test object" ) ); + searchService.loadValueObject( SearchResult.from( ArrayDesign.class, ad, 1.0, null, "test object" ) ); verify( arrayDesignService ).loadValueObject( ad ); } @Test @WithMockUser public void testConvertArrayDesignCollection() { - searchService.loadValueObjects( Collections.singleton( SearchResult.from( ArrayDesign.class, ad, 1.0, "test object" ) ) ); + searchService.loadValueObjects( Collections.singleton( SearchResult.from( ArrayDesign.class, ad, 1.0, null, "test object" ) ) ); verify( arrayDesignService ).loadValueObjects( Collections.singletonList( ad ) ); } @@ -142,14 +142,14 @@ public void testConvertBibliographicReference() { when( bibliographicReferenceService.loadValueObject( any( BibliographicReference.class ) ) ) .thenAnswer( arg -> new BibliographicReferenceValueObject( arg.getArgument( 0, BibliographicReference.class ) ) ); br.setId( 13L ); - searchService.loadValueObject( SearchResult.from( BibliographicReference.class, br, 1.0, "test object" ) ); + searchService.loadValueObject( SearchResult.from( BibliographicReference.class, br, 1.0, null, "test object" ) ); verify( bibliographicReferenceService ).loadValueObject( br ); } @Test @WithMockUser public void testConvertCompositeSequence() { - searchService.loadValueObject( SearchResult.from( CompositeSequence.class, cs, 1.0, "test object" ) ); + searchService.loadValueObject( SearchResult.from( CompositeSequence.class, cs, 1.0, null, "test object" ) ); verify( compositeSequenceService ).loadValueObject( cs ); } @@ -158,21 +158,21 @@ public void testConvertCompositeSequence() { public void testConvertCompositeSequenceCollection() { when( compositeSequenceService.loadValueObjects( any() ) ).thenReturn( Collections.singletonList( new CompositeSequenceValueObject( cs ) ) ); // this is a special case because of how it's implemented - searchService.loadValueObjects( Collections.singleton( SearchResult.from( CompositeSequence.class, cs, 1.0, "test object" ) ) ); + searchService.loadValueObjects( Collections.singleton( SearchResult.from( CompositeSequence.class, cs, 1.0, null, "test object" ) ) ); verify( compositeSequenceService ).loadValueObjects( Collections.singletonList( cs ) ); } @Test @WithMockUser public void testConvertExpressionExperiment() { - searchService.loadValueObject( SearchResult.from( ExpressionExperiment.class, ee, 1.0, "test object" ) ); + searchService.loadValueObject( SearchResult.from( ExpressionExperiment.class, ee, 1.0, null, "test object" ) ); verify( expressionExperimentService ).loadValueObject( ee ); } @Test public void testConvertGeneSet() { // this is another complicated one because GeneSetService does not implement BaseVoEnabledService - searchService.loadValueObject( SearchResult.from( GeneSet.class, gs, 1.0, "test object" ) ); + searchService.loadValueObject( SearchResult.from( GeneSet.class, gs, 1.0, null, "test object" ) ); verify( geneSetService ).loadValueObject( gs ); } @@ -180,7 +180,7 @@ public void testConvertGeneSet() { public void testConvertUninitializedResult() { DatabaseBackedGeneSetValueObject gsvo = new DatabaseBackedGeneSetValueObject( gs, new Taxon(), 1L ); when( geneSetService.loadValueObjectById( 13L ) ).thenReturn( gsvo ); - SearchResult> sr = searchService.loadValueObject( SearchResult.from( GeneSet.class, 13L, 1.0, "test object" ) ); + SearchResult> sr = searchService.loadValueObject( SearchResult.from( GeneSet.class, 13L, 1.0, null, "test object" ) ); assertThat( sr ) .isNotNull() .hasFieldOrPropertyWithValue( "resultType", GeneSet.class ) @@ -195,18 +195,18 @@ public void testConvertUninitializedResult() { public void testUnsupportedResultTypeRaisesIllegalArgumentException() { ContrastResult cr = new ContrastResult(); cr.setId( 1L ); - searchService.loadValueObject( SearchResult.from( ContrastResult.class, cr, 1.0, "test object" ) ); + searchService.loadValueObject( SearchResult.from( ContrastResult.class, cr, 1.0, null, "test object" ) ); } @Test(expected = IllegalArgumentException.class) public void testUnsupportedResultTypeInCollectionRaisesIllegalArgumentException() { - searchService.loadValueObjects( Collections.singleton( SearchResult.from( ContrastResult.class, new ContrastResult(), 0.0f, "test object" ) ) ); + searchService.loadValueObjects( Collections.singleton( SearchResult.from( ContrastResult.class, new ContrastResult(), 0.0f, null, "test object" ) ) ); } @Test public void testConvertAlreadyConvertedCollection() { searchService.loadValueObjects( Collections.singletonList( - SearchResult.from( ExpressionExperiment.class, eevo, 0.0f, "test value object" ) ) ); + SearchResult.from( ExpressionExperiment.class, eevo, 0.0f, null, "test value object" ) ) ); verify( expressionExperimentService ).loadValueObjectsByIds( Collections.singletonList( eevo.getId() ) ); } @@ -224,9 +224,9 @@ public void testBlacklistedConversion() { when( expressionExperimentService.loadValueObjects( any() ) ).thenReturn( Collections.singletonList( new ExpressionExperimentValueObject( ee ) ) ); when( blacklistedEntityService.loadValueObjects( any() ) ).thenReturn( Arrays.asList( BlacklistedValueObject.fromEntity( bp ), BlacklistedValueObject.fromEntity( be ) ) ); List>> vos = searchService.loadValueObjects( Arrays.asList( - SearchResult.from( BlacklistedEntity.class, be, 0.0, "test blacklisted object" ), - SearchResult.from( BlacklistedEntity.class, bp, 0.0, "test blacklisted object" ), - SearchResult.from( ExpressionExperiment.class, ee, 1.0, "test object" ) ) ); + SearchResult.from( BlacklistedEntity.class, be, 0.0, null, "test blacklisted object" ), + SearchResult.from( BlacklistedEntity.class, bp, 0.0, null, "test blacklisted object" ), + SearchResult.from( ExpressionExperiment.class, ee, 1.0, null, "test object" ) ) ); verify( expressionExperimentService ).loadValueObjects( Collections.singletonList( ee ) ); assertThat( vos ) .extracting( "resultType", "resultId" ) diff --git a/gemma-core/src/test/java/ubic/gemma/core/search/lucene/LuceneQueryUtilsTest.java b/gemma-core/src/test/java/ubic/gemma/core/search/lucene/LuceneQueryUtilsTest.java index 5a66042b4d..77d5bd6eff 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/search/lucene/LuceneQueryUtilsTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/search/lucene/LuceneQueryUtilsTest.java @@ -15,27 +15,56 @@ public class LuceneQueryUtilsTest { public void testExtractTerms() throws SearchException { assertThat( extractTerms( SearchSettings.geneSearch( "BRCA1 OR (BRCA2 AND BRCA3) OR NOT BRCA4 OR -BRCA5", null ) ) ) .containsExactlyInAnyOrder( "BRCA1", "BRCA2", "BRCA3" ); + // fielded terms are excluded + assertThat( extractTerms( SearchSettings.geneSearch( "shortName:GSE1234 test", null ) ) ) + .containsExactlyInAnyOrder( "test" ); } @Test public void testExtractDnf() throws SearchException { - assertThat( extractDnf( SearchSettings.geneSearch( "BRCA1 OR (BRCA2 AND BRCA3) OR NOT BRCA4 OR -BRCA5 OR (BRCA6 OR BRCA7) AND BRCA9", null ) ) ) - .containsExactlyInAnyOrder( set( "BRCA1" ), set( "BRCA2", "BRCA3" ) ); + assertThat( extractDnf( SearchSettings.geneSearch( "BRCA1 OR (BRCA2 AND BRCA3) OR NOT BRCA4 OR -BRCA5 OR (BRCA6 OR BRCA7)", null ) ) ) + .containsExactlyInAnyOrder( set( "BRCA1" ), set( "BRCA2", "BRCA3" ), set( "BRCA6" ), set( "BRCA7" ) ); assertThat( extractDnf( SearchSettings.geneSearch( "BRCA1 AND BRCA2", null ) ) ) .containsExactlyInAnyOrder( set( "BRCA1", "BRCA2" ) ); - assertThat( extractDnf( SearchSettings.geneSearch( "BRCA1 AND NOT BRCA2", null ) ) ) + assertThat( extractDnf( SearchSettings.geneSearch( "NOT BRCA1 AND NOT BRCA2", null ) ) ) + .isEmpty(); + assertThat( extractDnf( SearchSettings.geneSearch( "NOT BRCA1 OR NOT BRCA2", null ) ) ) .isEmpty(); + assertThat( extractDnf( SearchSettings.geneSearch( "BRCA1 AND NOT BRCA2", null ) ) ) + .containsExactly( set( "BRCA1" ) ); + assertThat( extractDnf( SearchSettings.geneSearch( "BRCA1 OR NOT (BRCA2 AND BRCA3)", null ) ) ) + .containsExactly( set( "BRCA1" ) ); assertThat( extractDnf( SearchSettings.geneSearch( "BRCA1 AND (BRCA2 OR BRCA3)", null ) ) ) .isEmpty(); } + @Test + public void testExtractDnfWithNestedOrInClause() throws SearchException { + assertThat( extractDnf( SearchSettings.geneSearch( "BRCA1 OR (BRCA2 OR (BRCA3 AND BRCA4))", null ) ) ) + .containsExactlyInAnyOrder( set( "BRCA1" ), set( "BRCA2" ), set( "BRCA3", "BRCA4" ) ); + } + + @Test + public void testExtractDnfWithNestedAndInSubClause() throws SearchException { + assertThat( extractDnf( SearchSettings.geneSearch( "BRCA1 OR (BRCA2 AND (BRCA3 AND BRCA4))", null ) ) ) + .containsExactlyInAnyOrder( set( "BRCA1" ), set( "BRCA2", "BRCA3", "BRCA4" ) ); + } + + @Test + public void testExtractDnfWithUris() throws SearchException { + // this is an important case for searching datasets by ontology terms + assertThat( extractDnf( SearchSettings.geneSearch( "http://example.com/GO:1234 OR http://example.com/GO:1235", null ) ) ) + .contains( set( "http://example.com/GO:1234" ), set( "http://example.com/GO:1235" ) ); + } + @Test public void testPrepareDatabaseQuery() throws SearchException { assertEquals( "BRCA1", prepareDatabaseQuery( SearchSettings.geneSearch( "BRCA1", null ) ) ); assertEquals( "BRCA1", prepareDatabaseQuery( SearchSettings.geneSearch( "BRCA1^4", null ) ) ); assertEquals( "BRCA1", prepareDatabaseQuery( SearchSettings.geneSearch( "\"BRCA1\"", null ) ) ); assertEquals( "BRCA1", prepareDatabaseQuery( SearchSettings.geneSearch( "(BRCA1)", null ) ) ); - assertEquals( "BRCA1", prepareDatabaseQuery( SearchSettings.geneSearch( "symbol:BRCA1", null ) ) ); + // fielded term are ignored + assertNull( prepareDatabaseQuery( SearchSettings.geneSearch( "symbol:BRCA1", null ) ) ); assertEquals( "+BRCA", prepareDatabaseQuery( SearchSettings.geneSearch( "\\+BRCA", null ), true ) ); assertEquals( "BRCA", prepareDatabaseQuery( SearchSettings.geneSearch( "BRCA OR TCGA", null ) ) ); assertEquals( "BRCA", prepareDatabaseQuery( SearchSettings.geneSearch( "BRCA AND TCGA", null ) ) ); @@ -43,6 +72,8 @@ public void testPrepareDatabaseQuery() throws SearchException { assertEquals( "TCGA", prepareDatabaseQuery( SearchSettings.geneSearch( "NOT BRCA AND TCGA", null ) ) ); assertEquals( "BRCA", prepareDatabaseQuery( SearchSettings.geneSearch( "BRCA -TCGA", null ) ) ); assertEquals( "BRCA AND TCGA", prepareDatabaseQuery( SearchSettings.geneSearch( "\"BRCA AND TCGA\"", null ) ) ); + // wildcards and prefix queries are ignored for database queries + assertNull( prepareDatabaseQuery( SearchSettings.geneSearch( "BRCA*", null ) ) ); } @Test @@ -65,6 +96,9 @@ public void testPrepareDatabaseQueryForInexactMatch() throws SearchException { assertEquals( "brca_", prepareDatabaseQuery( SearchSettings.geneSearch( "BRCA?", null ), true ) ); assertEquals( "BRCA?", prepareDatabaseQuery( SearchSettings.geneSearch( "\"BRCA?\"", null ), true ) ); assertEquals( "BRCA", prepareDatabaseQuery( SearchSettings.geneSearch( "+BRCA", null ), true ) ); + // escaped wildcard + assertEquals( "BRCA?", prepareDatabaseQuery( SearchSettings.geneSearch( "BRCA\\?", null ), true ) ); + assertEquals( "BRCA*", prepareDatabaseQuery( SearchSettings.geneSearch( "BRCA\\*", null ), true ) ); // forbidden prefix-style searches assertEquals( "*", prepareDatabaseQuery( SearchSettings.geneSearch( "*", null ), true ) ); assertEquals( "*BRCA", prepareDatabaseQuery( SearchSettings.geneSearch( "*BRCA", null ), true ) ); diff --git a/gemma-core/src/test/java/ubic/gemma/core/search/source/DatabaseSearchSourceTest.java b/gemma-core/src/test/java/ubic/gemma/core/search/source/DatabaseSearchSourceTest.java index ea38d162aa..0f1169042c 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/search/source/DatabaseSearchSourceTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/search/source/DatabaseSearchSourceTest.java @@ -14,7 +14,9 @@ import ubic.gemma.core.search.SearchException; import ubic.gemma.core.search.SearchSource; import ubic.gemma.model.common.search.SearchSettings; +import ubic.gemma.persistence.service.expression.arrayDesign.ArrayDesignService; import ubic.gemma.persistence.service.expression.designElement.CompositeSequenceService; +import ubic.gemma.persistence.service.expression.experiment.BlacklistedEntityService; import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentService; import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentSetService; import ubic.gemma.persistence.service.genome.biosequence.BioSequenceService; @@ -74,6 +76,16 @@ public GeneSetService geneSetService() { public ExpressionExperimentSetService experimentSetService() { return mock( ExpressionExperimentSetService.class ); } + + @Bean + public ArrayDesignService arrayDesignService() { + return mock( ArrayDesignService.class ); + } + + @Bean + public BlacklistedEntityService blacklistedEntityService() { + return mock(); + } } @Autowired @@ -113,4 +125,17 @@ public void test_quotedTerms() throws SearchException { databaseSearchSource.searchGene( SearchSettings.geneSearch( "\"BRCA1 BRCA2\"", null ) ); verify( geneService ).findByOfficialSymbol( "BRCA1 BRCA2" ); } + + @Test + public void testSearchGeneByUri() throws SearchException { + databaseSearchSource.searchGene( SearchSettings.geneSearch( "http://purl.org/commons/record/ncbi_gene/1234", null ) ); + verify( geneService ).findByNCBIId( 1234 ); + verify( geneService ).findByOfficialSymbol( "http://purl.org/commons/record/ncbi_gene/1234" ); + } + + @Test + public void testSearchGeneByUriInexact() throws SearchException { + databaseSearchSource.searchGene( SearchSettings.geneSearch( "http://purl.org/commons/record/ncbi_gene/123?", null ) ); + verify( geneService ).findByOfficialSymbolInexact( "http://purl.org/commons/record/ncbi\\_gene/123_" ); + } } \ No newline at end of file diff --git a/gemma-core/src/test/java/ubic/gemma/core/search/source/OntologySearchSourceTest.java b/gemma-core/src/test/java/ubic/gemma/core/search/source/OntologySearchSourceTest.java index 17107d5cab..7c1ddedb30 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/search/source/OntologySearchSourceTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/search/source/OntologySearchSourceTest.java @@ -1,9 +1,5 @@ package ubic.gemma.core.search.source; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.document.Document; -import org.apache.lucene.search.highlight.Formatter; -import org.apache.lucene.search.highlight.SimpleHTMLFormatter; import org.junit.After; import org.junit.Test; import org.springframework.beans.factory.annotation.Autowired; @@ -15,22 +11,20 @@ import ubic.basecode.ontology.model.OntologyTermSimple; import ubic.basecode.ontology.search.OntologySearchException; import ubic.gemma.core.ontology.OntologyService; -import ubic.gemma.core.search.Highlighter; +import ubic.gemma.core.search.OntologyHighlighter; import ubic.gemma.core.search.SearchException; import ubic.gemma.core.search.SearchResult; import ubic.gemma.core.search.SearchSource; -import ubic.gemma.core.search.lucene.LuceneHighlighter; -import ubic.gemma.core.search.lucene.SimpleMarkdownFormatter; import ubic.gemma.model.common.search.SearchSettings; import ubic.gemma.model.expression.experiment.ExpressionExperiment; import ubic.gemma.persistence.service.common.description.CharacteristicService; import ubic.gemma.persistence.util.TestComponent; import javax.annotation.Nullable; +import java.net.URI; import java.util.Collection; import java.util.Collections; import java.util.Map; -import java.util.Set; import static junit.framework.TestCase.assertEquals; import static org.assertj.core.api.Assertions.assertThat; @@ -86,20 +80,15 @@ public void test() throws SearchException, OntologySearchException { .thenReturn( Collections.singletonMap( ExpressionExperiment.class, Collections.singletonMap( "http://purl.obolibrary.org/obo/CL_0000129", Collections.singleton( ee ) ) ) ); Collection> results = ontologySearchSource.searchExpressionExperiment( SearchSettings.expressionExperimentSearch( "http://purl.obolibrary.org/obo/CL_0000129" ) - .withHighlighter( new LuceneHighlighter() { + .withHighlighter( new OntologyHighlighter() { @Override - public Map highlightTerm( @Nullable String termUri, String termLabel, String field ) { - return Collections.singletonMap( field, termUri != null ? String.format( "[%s](%s)", termLabel, termUri ) : termLabel ); - } - - @Override - public Formatter getFormatter() { - return new SimpleHTMLFormatter(); + public Map highlight( String value, String field ) { + return Collections.singletonMap( field, value ); } @Override - public Map highlightDocument( Document document, org.apache.lucene.search.highlight.Highlighter highlighter, Analyzer analyzer ) { - return Collections.emptyMap(); + public Map highlightTerm( @Nullable String termUri, String termLabel, String field ) { + return Collections.singletonMap( field, termUri != null ? String.format( "[%s](%s)", termLabel, termUri ) : termLabel ); } } ) ); verify( ontologyService ).getTerm( "http://purl.obolibrary.org/obo/CL_0000129" ); @@ -122,20 +111,15 @@ public void testWhenTermIsNotFoundGenerateLabelFromUri() throws SearchException .thenReturn( Collections.singletonMap( ExpressionExperiment.class, Collections.singletonMap( "http://purl.obolibrary.org/obo/CL_0000129", Collections.singleton( ee ) ) ) ); Collection> results = ontologySearchSource.searchExpressionExperiment( SearchSettings.expressionExperimentSearch( "http://purl.obolibrary.org/obo/CL_0000129" ) - .withHighlighter( new LuceneHighlighter() { - @Override - public Map highlightTerm( @Nullable String termUri, String termLabel, String field ) { - return Collections.singletonMap( field, termUri != null ? String.format( "[%s](%s)", termLabel, termUri ) : termLabel ); - } - + .withHighlighter( new OntologyHighlighter() { @Override - public Formatter getFormatter() { - return new SimpleMarkdownFormatter(); + public Map highlight( String value, String field ) { + return Collections.singletonMap( field, value ); } @Override - public Map highlightDocument( Document document, org.apache.lucene.search.highlight.Highlighter highlighter, Analyzer analyzer ) { - return Collections.emptyMap(); + public Map highlightTerm( @Nullable String termUri, String termLabel, String field ) { + return Collections.singletonMap( field, termUri != null ? String.format( "[%s](%s)", termLabel, termUri ) : termLabel ); } } ) ); verify( ontologyService ).getTerm( "http://purl.obolibrary.org/obo/CL_0000129" ); @@ -151,15 +135,25 @@ public Map highlightDocument( Document document, org.apache.luce } ); } + @Test + public void testSearchExpressionExperimentWithBooleanQuery() throws SearchException { + ontologySearchSource.searchExpressionExperiment( SearchSettings.expressionExperimentSearch( "a OR (b AND c) OR http://example.com/d" ) ); + verify( ontologyService ).findTerms( "a" ); + verify( ontologyService ).findTerms( "b" ); + verify( ontologyService ).findTerms( "c" ); + verify( ontologyService ).getTerm( "http://example.com/d" ); + verifyNoMoreInteractions( ontologyService ); + } + @Test public void testGetLabelFromTermUri() { - assertEquals( "GO:0004016", getLabelFromTermUri( "http://purl.obolibrary.org/obo/GO_0004016" ) ); - assertEquals( "CHEBI:7466", getLabelFromTermUri( "http://purl.obolibrary.org/obo/chebi.owl#CHEBI_7466" ) ); - assertEquals( "BIRNLEX:15001", getLabelFromTermUri( "http://ontology.neuinfo.org/NIF/Function/NIF-Function.owl#birnlex_15001" ) ); - assertEquals( "GO:0004016", getLabelFromTermUri( "http://purl.obolibrary.org/obo//GO_0004016//" ) ); - assertEquals( "http://purl.obolibrary.org////", getLabelFromTermUri( "http://purl.obolibrary.org////" ) ); - assertEquals( "PAT:ID_20327", getLabelFromTermUri( "http://www.orphanet.org/rdfns#pat_id_20327" ) ); - assertEquals( "PAT:ID_20327", getLabelFromTermUri( "http://www.orphanet.org/rdfns#pat_id_20327" ) ); - assertEquals( "63857", getLabelFromTermUri( "http://purl.org/commons/record/ncbi_gene/63857" ) ); + assertEquals( "GO:0004016", getLabelFromTermUri( URI.create( "http://purl.obolibrary.org/obo/GO_0004016" ) ) ); + assertEquals( "CHEBI:7466", getLabelFromTermUri( URI.create( "http://purl.obolibrary.org/obo/chebi.owl#CHEBI_7466" ) ) ); + assertEquals( "BIRNLEX:15001", getLabelFromTermUri( URI.create( "http://ontology.neuinfo.org/NIF/Function/NIF-Function.owl#birnlex_15001" ) ) ); + assertEquals( "GO:0004016", getLabelFromTermUri( URI.create( "http://purl.obolibrary.org/obo//GO_0004016//" ) ) ); + assertEquals( "http://purl.obolibrary.org////", getLabelFromTermUri( URI.create( "http://purl.obolibrary.org////" ) ) ); + assertEquals( "PAT:ID_20327", getLabelFromTermUri( URI.create( "http://www.orphanet.org/rdfns#pat_id_20327" ) ) ); + assertEquals( "PAT:ID_20327", getLabelFromTermUri( URI.create( "http://www.orphanet.org/rdfns#pat_id_20327" ) ) ); + assertEquals( "63857", getLabelFromTermUri( URI.create( "http://purl.org/commons/record/ncbi_gene/63857" ) ) ); } } \ No newline at end of file diff --git a/gemma-core/src/test/java/ubic/gemma/core/util/test/Maps.java b/gemma-core/src/test/java/ubic/gemma/core/util/test/Maps.java new file mode 100644 index 0000000000..0f2f8f4a61 --- /dev/null +++ b/gemma-core/src/test/java/ubic/gemma/core/util/test/Maps.java @@ -0,0 +1,23 @@ +package ubic.gemma.core.util.test; + +import org.springframework.util.Assert; + +import java.util.HashMap; +import java.util.Map; + +/** + * Extensions for AssertJ's {@link org.assertj.core.util.Maps}. + */ +public class Maps { + + public static Map map( K key, V value, Object... keyValues ) { + Assert.isTrue( keyValues.length % 2 == 0, "You must provide an even number of key-value pairs" ); + return new HashMap( 1 + keyValues.length / 2 ) {{ + put( key, value ); + for ( int i = 0; i < keyValues.length; i += 2 ) { + //noinspection unchecked + put( ( K ) keyValues[i], ( V ) keyValues[i + 1] ); + } + }}; + } +} diff --git a/gemma-core/src/test/java/ubic/gemma/model/common/search/SearchSettingsTest.java b/gemma-core/src/test/java/ubic/gemma/model/common/search/SearchSettingsTest.java index d98ada02b9..471f887fc2 100644 --- a/gemma-core/src/test/java/ubic/gemma/model/common/search/SearchSettingsTest.java +++ b/gemma-core/src/test/java/ubic/gemma/model/common/search/SearchSettingsTest.java @@ -2,8 +2,10 @@ import org.junit.Test; import ubic.gemma.core.search.DefaultHighlighter; +import ubic.gemma.core.search.SearchException; import static org.assertj.core.api.Assertions.assertThat; +import static ubic.gemma.core.search.lucene.LuceneQueryUtils.prepareTermUriQuery; public class SearchSettingsTest { @@ -11,51 +13,23 @@ public class SearchSettingsTest { public void testSetQueryWhenQueryContainsBlankThenTrimAccordingly() { SearchSettings searchSettings = SearchSettings.builder().build(); searchSettings.setQuery( " " ); - assertThat( searchSettings.getQuery() ).isEqualTo( "" ); - assertThat( searchSettings.getRawQuery() ).isEqualTo( " " ); + assertThat( searchSettings.getQuery() ).isEqualTo( " " ); } @Test - public void testSetQueryWhenQueryIsNull() { - SearchSettings searchSettings = SearchSettings.builder().build(); - searchSettings.setQuery( null ); - assertThat( searchSettings.getQuery() ).isNull(); - assertThat( searchSettings.getRawQuery() ).isNull(); - } - - @Test - public void testSetQueryWhenQueryIsATermUri() { + public void testSetQueryWhenQueryIsATermUri() throws SearchException { SearchSettings searchSettings = SearchSettings.builder().build(); searchSettings.setQuery( "http://example.ca/" ); assertThat( searchSettings.getQuery() ).isEqualTo( "http://example.ca/" ); - assertThat( searchSettings.getRawQuery() ).isEqualTo( "http://example.ca/" ); - assertThat( searchSettings.isTermQuery() ).isTrue(); - assertThat( searchSettings.getTermUri() ).isEqualTo( "http://example.ca/" ); + assertThat( prepareTermUriQuery( searchSettings ) ).isNotNull().hasToString( "http://example.ca/" ); } @Test - public void testSetQueryWhenQueryIsATermUriWithTrailingBlanks() { + public void testSetQueryWhenQueryIsATermUriWithTrailingBlanks() throws SearchException { SearchSettings searchSettings = SearchSettings.builder().build(); searchSettings.setQuery( " http://example.ca/ " ); - assertThat( searchSettings.getQuery() ).isEqualTo( "http://example.ca/" ); - assertThat( searchSettings.getRawQuery() ).isEqualTo( " http://example.ca/ " ); - assertThat( searchSettings.isTermQuery() ).isTrue(); - assertThat( searchSettings.getTermUri() ).isEqualTo( "http://example.ca/" ); - } - - - @Test - public void testSetTermUriWhenUriIsBlank() { - SearchSettings searchSettings = SearchSettings.builder().build(); - searchSettings.setTermUri( "" ); - assertThat( searchSettings.isTermQuery() ).isFalse(); - } - - @Test - public void testSetTermUriWhenUriIsNull() { - SearchSettings searchSettings = SearchSettings.builder().build(); - searchSettings.setTermUri( null ); - assertThat( searchSettings.isTermQuery() ).isFalse(); + assertThat( searchSettings.getQuery() ).isEqualTo( " http://example.ca/ " ); + assertThat( prepareTermUriQuery( searchSettings ) ).isNotNull().hasToString( "http://example.ca/" ); } @Test diff --git a/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java b/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java index 1123c447db..d07cddd157 100644 --- a/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java +++ b/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java @@ -89,6 +89,8 @@ import java.util.stream.Collectors; import java.util.zip.GZIPInputStream; +import static ubic.gemma.rest.SearchWebService.QUERY_SCHEMA_NAME; + /** * RESTful interface for datasets. * @@ -173,7 +175,7 @@ public Map highlightDocument( Document document, org.apache.luce @Produces(MediaType.APPLICATION_JSON) @Operation(summary = "Retrieve all datasets") public QueriedAndFilteredAndPaginatedResponseDataObject getDatasets( // Params: - @QueryParam("query") String query, + @Parameter(schema = @Schema(name = QUERY_SCHEMA_NAME)) @QueryParam("query") String query, @QueryParam("filter") @DefaultValue("") FilterArg filterArg, // Optional, default null @QueryParam("offset") @DefaultValue("0") OffsetArg offsetArg, // Optional, default 0 @QueryParam("limit") @DefaultValue("20") LimitArg limitArg, // Optional, default 20 @@ -225,7 +227,7 @@ public static class ExpressionExperimentWithSearchResultValueObject extends Expr public ExpressionExperimentWithSearchResultValueObject( ExpressionExperimentValueObject vo, @Nullable SearchResult result ) { super( vo ); if ( result != null ) { - this.searchResult = new SearchWebService.SearchResultValueObject<>( SearchResult.from( result, null ) ); + this.searchResult = new SearchWebService.SearchResultValueObject<>( result.withResultObject( null ) ); } else { this.searchResult = null; } @@ -237,8 +239,9 @@ public ExpressionExperimentWithSearchResultValueObject( ExpressionExperimentValu @Produces(MediaType.APPLICATION_JSON) @Operation(summary = "Count datasets matching the provided query and filter") public ResponseDataObject getNumberOfDatasets( - @QueryParam("query") String query, - @QueryParam("filter") @DefaultValue("") FilterArg filter ) { + @Parameter(schema = @Schema(name = QUERY_SCHEMA_NAME)) @QueryParam("query") String query, + @QueryParam("filter") @DefaultValue("") FilterArg filter + ) { Filters filters = datasetArgService.getFilters( filter ); Set extraIds; if ( query != null ) { @@ -262,9 +265,10 @@ public interface UsageStatistics { @Operation(summary = "Retrieve usage statistics of platforms among datasets matching the provided query and filter", description = "Usage statistics are aggregated across experiment tags, samples and factor values mentioned in the experimental design.") public LimitedResponseDataObject getDatasetsPlatformsUsageStatistics( - @QueryParam("query") String query, + @Parameter(schema = @Schema(name = QUERY_SCHEMA_NAME)) @QueryParam("query") String query, @QueryParam("filter") @DefaultValue("") FilterArg filter, - @QueryParam("limit") @DefaultValue("50") LimitArg limit ) { + @QueryParam("limit") @DefaultValue("50") LimitArg limit + ) { Filters filters = datasetArgService.getFilters( filter ); Set extraIds; if ( query != null ) { @@ -300,7 +304,7 @@ public static class CategoryWithUsageStatisticsValueObject implements UsageStati @Operation(summary = "Retrieve usage statistics of categories among datasets matching the provided query and filter", description = "Usage statistics are aggregated across experiment tags, samples and factor values mentioned in the experimental design.") public QueriedAndFilteredResponseDataObject getDatasetsCategoriesUsageStatistics( - @QueryParam("query") String query, + @Parameter(schema = @Schema(name = QUERY_SCHEMA_NAME)) @QueryParam("query") String query, @QueryParam("filter") @DefaultValue("") FilterArg filter, @QueryParam("limit") @DefaultValue("20") LimitArg limit, @Parameter(description = "Excluded category URIs.", hidden = true) @QueryParam("excludedCategories") StringArrayArg excludedCategoryUris, @@ -360,7 +364,7 @@ public ArrayDesignWithUsageStatisticsValueObject( ArrayDesignValueObject arrayDe @Operation(summary = "Retrieve usage statistics of annotations among datasets matching the provided query and filter", description = "Usage statistics are aggregated across experiment tags, samples and factor values mentioned in the experimental design.") public LimitedResponseDataObject getDatasetsAnnotationsUsageStatistics( - @QueryParam("query") String query, + @Parameter(schema = @Schema(name = QUERY_SCHEMA_NAME)) @QueryParam("query") String query, @QueryParam("filter") @DefaultValue("") FilterArg filter, @Parameter(description = "List of fields to exclude from the payload. Only `parentTerms` can be excluded.") @QueryParam("exclude") ExcludeArg exclude, @Parameter(description = "Maximum number of annotations to returned; capped at " + MAX_DATASETS_ANNOTATIONS + ".", schema = @Schema(type = "integer", minimum = "1", maximum = "" + MAX_DATASETS_ANNOTATIONS)) @QueryParam("limit") LimitArg limitArg, @@ -497,7 +501,9 @@ public AnnotationWithUsageStatisticsValueObject( Characteristic c, Long numberOf @Produces(MediaType.APPLICATION_JSON) @Operation(summary = "Retrieve taxa usage statistics for datasets matching the provided query and filter") public QueriedAndFilteredResponseDataObject getDatasetsTaxaUsageStatistics( - @QueryParam("query") String query, @QueryParam("filter") @DefaultValue("") FilterArg filterArg ) { + @Parameter(schema = @Schema(name = QUERY_SCHEMA_NAME)) @QueryParam("query") String query, + @QueryParam("filter") @DefaultValue("") FilterArg filterArg + ) { Filters filters = datasetArgService.getFilters( filterArg ); Set extraIds; if ( query != null ) { diff --git a/gemma-rest/src/main/java/ubic/gemma/rest/SearchWebService.java b/gemma-rest/src/main/java/ubic/gemma/rest/SearchWebService.java index aa937d4cb8..a3d634ae09 100644 --- a/gemma-rest/src/main/java/ubic/gemma/rest/SearchWebService.java +++ b/gemma-rest/src/main/java/ubic/gemma/rest/SearchWebService.java @@ -63,7 +63,7 @@ public class SearchWebService { /** * Name used in the OpenAPI schema to identify a search query. */ - public static final String QUERY_SCHEMA_NAME = "QueryType"; + public static final String QUERY_SCHEMA_NAME = "SearchQueryType"; /** * Name used in the OpenAPI schema to identify result types as per {@link #search(String, TaxonArg, PlatformArg, List, LimitArg, ExcludeArg)}'s @@ -136,12 +136,14 @@ public Map highlightDocument( Document document, org.apache.luce @GZIP @Produces(MediaType.APPLICATION_JSON_VALUE) @Operation(summary = "Search everything in Gemma") - public SearchResultsResponseDataObject search( @QueryParam("query") @Schema(name = QUERY_SCHEMA_NAME) String query, + public SearchResultsResponseDataObject search( + @QueryParam("query") @Schema(name = QUERY_SCHEMA_NAME) String query, @QueryParam("taxon") TaxonArg taxonArg, @QueryParam("platform") PlatformArg platformArg, @Parameter(array = @ArraySchema(schema = @Schema(name = RESULT_TYPES_SCHEMA_NAME, hidden = true))) @QueryParam("resultTypes") List resultTypes, @Parameter(description = "Maximum number of search results to return; capped at " + MAX_SEARCH_RESULTS + " unless `resultObject` is excluded.", schema = @Schema(type = "integer", minimum = "1", maximum = "" + MAX_SEARCH_RESULTS)) @QueryParam("limit") LimitArg limit, - @Parameter(description = "List of fields to exclude from the payload. Only `resultObject` is supported.") @QueryParam("exclude") ExcludeArg> excludeArg ) { + @Parameter(description = "List of fields to exclude from the payload. Only `resultObject` is supported.") @QueryParam("exclude") ExcludeArg> excludeArg + ) { if ( StringUtils.isBlank( query ) ) { throw new BadRequestException( "A non-empty query must be supplied." ); } @@ -193,7 +195,7 @@ public SearchResultsResponseDataObject search( @QueryParam("query") @Schema(name searchResultVos = searchService.loadValueObjects( searchResults ); } else { searchResultVos = searchResults.stream() - .map( sr -> SearchResult.from( sr, ( IdentifiableValueObject ) null ) ) + .map( sr -> sr.withResultObject( ( IdentifiableValueObject ) null ) ) .collect( Collectors.toList() ); } diff --git a/gemma-rest/src/main/java/ubic/gemma/rest/swagger/resolver/CustomModelResolver.java b/gemma-rest/src/main/java/ubic/gemma/rest/swagger/resolver/CustomModelResolver.java index ce8696a078..52ffa0f054 100644 --- a/gemma-rest/src/main/java/ubic/gemma/rest/swagger/resolver/CustomModelResolver.java +++ b/gemma-rest/src/main/java/ubic/gemma/rest/swagger/resolver/CustomModelResolver.java @@ -14,22 +14,29 @@ import io.swagger.v3.oas.models.security.SecurityRequirement; import lombok.Value; import lombok.extern.apachecommons.CommonsLog; +import org.apache.commons.io.IOUtils; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.context.MessageSource; import org.springframework.context.MessageSourceResolvable; +import org.springframework.core.io.ClassPathResource; import org.springframework.security.access.ConfigAttribute; import org.springframework.stereotype.Component; +import org.springframework.util.StringUtils; import ubic.gemma.core.search.SearchService; import ubic.gemma.model.common.Identifiable; import ubic.gemma.rest.SearchWebService; import ubic.gemma.rest.util.args.*; import javax.annotation.Nullable; +import java.io.IOException; import java.lang.annotation.Annotation; +import java.nio.charset.StandardCharsets; import java.util.*; import java.util.stream.Collectors; +import static org.apache.commons.text.StringEscapeUtils.escapeHtml4; + /** * Resolve {@link Arg} parameters' schema. *

@@ -118,6 +125,20 @@ protected String resolveDescription( Annotated a, Annotation[] annotations, io.s return description == null ? availableProperties : description + "\n\n" + availableProperties; } + if ( schema != null && SearchWebService.QUERY_SCHEMA_NAME.equalsIgnoreCase( schema.name() ) ) { + try { + return ( description != null ? description + "\n\n" : "" ) + + IOUtils.toString( new ClassPathResource( "/restapidocs/fragments/QueryType.md" ).getInputStream(), StandardCharsets.UTF_8 ) + // this part of the template is using embedded HTML in Markdown + .replace( "{searchableProperties}", getSearchableProperties().entrySet().stream() + .map( e -> "

" + escapeHtml4( e.getKey() ) + "

" + + "
    " + e.getValue().stream().map( v -> "
  • " + escapeHtml4( v ) + "
  • " ).collect( Collectors.joining() ) + "
" ) + .collect( Collectors.joining() ) ); + } catch ( IOException e ) { + throw new RuntimeException( e ); + } + } + return description; } @@ -131,19 +152,26 @@ protected Map resolveExtensions( Annotated a, Annotation[] annot } if ( schema != null && SearchWebService.QUERY_SCHEMA_NAME.equals( schema.name() ) ) { extensions = extensions != null ? new HashMap<>( extensions ) : new HashMap<>(); - Map> sp = new HashMap<>(); - for ( Class resultType : searchService.getSupportedResultTypes() ) { - List fields = searchService.getFields( resultType ).stream().sorted().collect( Collectors.toList() ); - if ( !fields.isEmpty() ) { - sp.put( resultType.getName(), fields ); - } - } - extensions.put( "x-gemma-searchable-properties", sp ); + extensions.put( "x-gemma-searchable-properties", getSearchableProperties() ); extensions = Collections.unmodifiableMap( extensions ); } return extensions; } + private final Comparator FIELD_COMPARATOR = Comparator + .comparing( ( String s ) -> StringUtils.countOccurrencesOf( s, "." ), Comparator.naturalOrder() ) + .thenComparing( s -> s ); + + private Map> getSearchableProperties() { + Map> sp = new HashMap<>(); + for ( Class resultType : searchService.getSupportedResultTypes() ) { + List fields = searchService.getFields( resultType ).stream().sorted( FIELD_COMPARATOR ).collect( Collectors.toList() ); + if ( !fields.isEmpty() ) { + sp.put( resultType.getName(), fields ); + } + } + return sp; + } @Value private static class FilterablePropMeta { diff --git a/gemma-rest/src/main/java/ubic/gemma/rest/util/args/DatasetArgService.java b/gemma-rest/src/main/java/ubic/gemma/rest/util/args/DatasetArgService.java index e6537b9a9f..1f5032e3f1 100644 --- a/gemma-rest/src/main/java/ubic/gemma/rest/util/args/DatasetArgService.java +++ b/gemma-rest/src/main/java/ubic/gemma/rest/util/args/DatasetArgService.java @@ -1,5 +1,6 @@ package ubic.gemma.rest.util.args; +import lombok.extern.apachecommons.CommonsLog; import org.apache.commons.lang3.StringUtils; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; @@ -31,6 +32,7 @@ import java.util.stream.Collectors; @Service +@CommonsLog public class DatasetArgService extends AbstractEntityArgService { private final SearchService searchService; diff --git a/gemma-rest/src/main/resources/openapi-configuration.yaml b/gemma-rest/src/main/resources/openapi-configuration.yaml index fbcff0f1fe..9c342d32ba 100644 --- a/gemma-rest/src/main/resources/openapi-configuration.yaml +++ b/gemma-rest/src/main/resources/openapi-configuration.yaml @@ -8,7 +8,7 @@ openAPI: url: https://dev.gemma.msl.ubc.ca/rest/v2 info: title: Gemma RESTful API - version: 2.7.2 + version: 2.7.3 description: | This website documents the usage of the [Gemma RESTful API](https://gemma.msl.ubc.ca/rest/v2/). Here you can find example script usage of the API, as well as graphical interface for each endpoint, with description of its diff --git a/gemma-rest/src/main/resources/restapidocs/CHANGELOG.md b/gemma-rest/src/main/resources/restapidocs/CHANGELOG.md index c7e1801e5d..3118e023b5 100644 --- a/gemma-rest/src/main/resources/restapidocs/CHANGELOG.md +++ b/gemma-rest/src/main/resources/restapidocs/CHANGELOG.md @@ -1,5 +1,27 @@ ## Updates +### Update 2.7.3 + +- fix double-gzipping for the `getPlatformAnnotations` endpoint +- add a limit argument `getDatasetCategoriesUsageStatistics` with a default value of 200 +- more parent terms now include in `getDatasetAnnotationsUsageFrequency` +- search is much more efficient and now capable of handling more advanced syntax + +#### More free-text categories + +We've backfilled thousands of free-text categories from GEO sample metadata which resulted in +the `getDatasetCategoriesUsageFrequency` endpoint producing far more results than usual. This is now being alleviated +by a new `limit` parameter with a default value of 200. + +#### Complete inference for parent terms in `getDatasetAnnotationsUsageFrequency` + +The `getDatasetAnnotationsUsageFrequency` endpoint now include parent terms that satisfy the `hasPart` relation. We've +rewritten the logic under the hood to be much more efficient and cache frequently requested terms. + +#### Advanced search syntax + +The search endpoint and individual query parameters now support an advanced search syntax provided by Lucene. + ### Update 2.7.2 Expose statements in `FactorValueValueObject` and `FactorValueBasicValueObject`. diff --git a/gemma-rest/src/main/resources/restapidocs/fragments/QueryType.md b/gemma-rest/src/main/resources/restapidocs/fragments/QueryType.md new file mode 100644 index 0000000000..7036017ebf --- /dev/null +++ b/gemma-rest/src/main/resources/restapidocs/fragments/QueryType.md @@ -0,0 +1,17 @@ +# Search query syntax + +The search query accepts the following syntax: + +| | | | +|--------------|-----------------------------|---------------------------------------------------------------------------------------------------------------------------------| +| Conjunction | `alpha AND beta AND gamma` | Results must contain "alpha", "beta" and "gamma". | +| Disjunction | `alpha OR beta OR gamma` | Results must contain either "alpha", "beta" or "gamma". This is the default when multiple terms are supplied. | +| Grouping | `(alpha OR beta) AND gamma` | Results must contain one of "alpha" or "beta" and also "gamma". | +| Exact Search | `"alpha beta gamma"` | Results must contain the exact phrase "alpha beta gamma". | +| Field | `shortName:GSE00001` | Datasets with short name GSE00001.
List of supported dataset fields{searchableProperties}
| +| Prefix | `alpha*` | Results must start with "alpha". | +| Wildcard | `BRCA?` | Results can contain any letter for the `?`. In this example, BRCA1 and BRCA2 would be matched. | +| Fuzzy | `alpha~` | Results can approximate "alpha". In this example, "aleph" would be accepted. | +| Boosting | `alpha^2 beta` | Results mentioning "alpha" are ranked higher over those containing only "beta". | +| Require | `+alpha beta` | Results must mention "alpha" and optionally "beta". | +| Escape | `\+alpha` | Results must mention "+alpha". Any special character from the search syntax can be escaped by prepending it with "\". | diff --git a/gemma-rest/src/test/java/ubic/gemma/rest/AnnotationsWebServiceTest.java b/gemma-rest/src/test/java/ubic/gemma/rest/AnnotationsWebServiceTest.java index 767bf5c08c..fcaec45866 100644 --- a/gemma-rest/src/test/java/ubic/gemma/rest/AnnotationsWebServiceTest.java +++ b/gemma-rest/src/test/java/ubic/gemma/rest/AnnotationsWebServiceTest.java @@ -126,7 +126,7 @@ public void testSearchTaxonDatasets() throws SearchException { ee.setId( 1L ); SearchService.SearchResultMap mockedSrMap = mock( SearchService.SearchResultMap.class ); when( mockedSrMap.getByResultObjectType( ExpressionExperiment.class ) ) - .thenReturn( Collections.singletonList( SearchResult.from( ExpressionExperiment.class, ee, 1.0, "test object" ) ) ); + .thenReturn( Collections.singletonList( SearchResult.from( ExpressionExperiment.class, ee, 1.0, null, "test object" ) ) ); when( searchService.search( any( SearchSettings.class ) ) ) .thenReturn( mockedSrMap ); when( taxonService.getFilter( eq( "commonName" ), eq( String.class ), eq( Filter.Operator.eq ), any( String.class ) ) ) diff --git a/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java b/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java index ac922c4ced..5099855e36 100644 --- a/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java +++ b/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java @@ -232,7 +232,7 @@ public void testGetDatasetsWithEmptyQuery() { } private SearchResult createMockSearchResult( Long id ) { - return SearchResult.from( ExpressionExperiment.class, id, 0, "test result object" ); + return SearchResult.from( ExpressionExperiment.class, id, 0, null, "test result object" ); } @Test diff --git a/gemma-rest/src/test/java/ubic/gemma/rest/SearchWebServiceTest.java b/gemma-rest/src/test/java/ubic/gemma/rest/SearchWebServiceTest.java index 7d8df3f2cd..94b992dc15 100644 --- a/gemma-rest/src/test/java/ubic/gemma/rest/SearchWebServiceTest.java +++ b/gemma-rest/src/test/java/ubic/gemma/rest/SearchWebServiceTest.java @@ -113,13 +113,13 @@ public void tearDown() { public void testSearchEverything() throws SearchException { ArgumentCaptor searchSettingsArgumentCaptor = ArgumentCaptor.forClass( SearchSettings.class ); SearchService.SearchResultMap srm = mock( SearchService.SearchResultMap.class ); - when( srm.toList() ).thenReturn( Collections.singletonList( SearchResult.from( Gene.class, gene, 1.0, "test object" ) ) ); + when( srm.toList() ).thenReturn( Collections.singletonList( SearchResult.from( Gene.class, gene, 1.0, null, "test object" ) ) ); when( searchService.search( searchSettingsArgumentCaptor.capture() ) ).thenReturn( srm ); when( searchService.loadValueObjects( any() ) ).thenAnswer( args -> { //noinspection unchecked Collection> searchResult = args.getArgument( 0, Collection.class ); return searchResult.stream() - .map( sr -> SearchResult.from( sr, new GeneValueObject( sr.getResultObject() ) ) ) + .map( sr -> sr.withResultObject( new GeneValueObject( sr.getResultObject() ) ) ) .collect( Collectors.toList() ); } ); when( searchService.getSupportedResultTypes() ).thenReturn( Collections.singleton( Gene.class ) ); @@ -147,13 +147,12 @@ public void testSearchEverything() throws SearchException { @Test public void testSearchByTaxon() throws SearchException { SearchService.SearchResultMap srm = mock( SearchService.SearchResultMap.class ); - when( srm.getByResultObjectType( Gene.class ) ).thenReturn( Collections.singletonList( SearchResult.from( Gene.class, gene, 1.0, "test object" ) ) ); + when( srm.getByResultObjectType( Gene.class ) ).thenReturn( Collections.singletonList( SearchResult.from( Gene.class, gene, 1.0, null, "test object" ) ) ); when( searchService.search( any() ) ).thenReturn( srm ); when( searchService.loadValueObject( any() ) ).thenAnswer( args -> { //noinspection unchecked SearchResult searchResult = args.getArgument( 0, SearchResult.class ); - SearchResult sr = SearchResult.from( searchResult.getResultType(), searchResult.getResultId(), searchResult.getScore(), "test object" ); - searchResult.setHighlights( searchResult.getHighlights() ); + SearchResult sr = SearchResult.from( searchResult.getResultType(), searchResult.getResultId(), searchResult.getScore(), searchResult.getHighlights(), "test object" ); if ( searchResult.getResultObject() != null ) { sr.setResultObject( new GeneValueObject( searchResult.getResultObject() ) ); } @@ -166,13 +165,12 @@ public void testSearchByTaxon() throws SearchException { @Test public void testSearchByArrayDesign() throws SearchException { SearchService.SearchResultMap srm = mock( SearchService.SearchResultMap.class ); - when( srm.getByResultObjectType( Gene.class ) ).thenReturn( Collections.singletonList( SearchResult.from( Gene.class, gene, 1.0, "test object" ) ) ); + when( srm.getByResultObjectType( Gene.class ) ).thenReturn( Collections.singletonList( SearchResult.from( Gene.class, gene, 1.0, null, "test object" ) ) ); when( searchService.search( any() ) ).thenReturn( srm ); when( searchService.loadValueObject( any() ) ).thenAnswer( args -> { //noinspection unchecked SearchResult searchResult = args.getArgument( 0, SearchResult.class ); - SearchResult sr = SearchResult.from( searchResult.getResultType(), searchResult.getResultId(), searchResult.getScore(), "test object" ); - sr.setHighlights( searchResult.getHighlights() ); + SearchResult sr = SearchResult.from( searchResult.getResultType(), searchResult.getResultId(), searchResult.getScore(), searchResult.getHighlights(), "test object" ); if ( searchResult.getResultObject() != null ) { sr.setResultObject( new GeneValueObject( searchResult.getResultObject() ) ); } diff --git a/gemma-web/src/main/java/ubic/gemma/web/controller/GeneralSearchControllerImpl.java b/gemma-web/src/main/java/ubic/gemma/web/controller/GeneralSearchControllerImpl.java index d3b58c6cab..ccae6778a7 100644 --- a/gemma-web/src/main/java/ubic/gemma/web/controller/GeneralSearchControllerImpl.java +++ b/gemma-web/src/main/java/ubic/gemma/web/controller/GeneralSearchControllerImpl.java @@ -60,10 +60,12 @@ import javax.annotation.ParametersAreNonnullByDefault; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; +import java.net.URI; import java.util.*; import java.util.stream.Collectors; import static org.apache.commons.text.StringEscapeUtils.escapeHtml4; +import static ubic.gemma.core.search.lucene.LuceneQueryUtils.prepareTermUriQuery; /** * Note: do not use parametrized collections as parameters for ajax methods in this class! Type information is lost @@ -225,13 +227,18 @@ public ModelAndView doSearch( HttpServletRequest request, HttpServletResponse re ModelAndView mav = new ModelAndView( "generalSearch" ); - if ( !this.searchStringValidator( command.getQuery() ) && StringUtils.isBlank( command.getTermUri() ) ) { + if ( !searchStringValidator( command.getQuery() ) ) { throw new IllegalArgumentException( "Invalid query" ); } // Need this for the bookmarkable links mav.addObject( "SearchString", command.getQuery() ); - mav.addObject( "SearchURI", command.getTermUri() ); + try { + URI termUri = prepareTermUriQuery( command ); + mav.addObject( "SearchURI", termUri != null ? termUri.toString() : null ); + } catch ( SearchException e ) { + mav.addObject( "SearchURI", null ); + } if ( ( command.getTaxon() != null ) && ( command.getTaxon().getId() != null ) ) mav.addObject( "searchTaxon", command.getTaxon().getScientificName() ); From fe0c9700100338fb479c3afc377135849b335577 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Tue, 2 Apr 2024 12:42:09 -0700 Subject: [PATCH 095/105] Cover more edge cases when parsing fielded or quoted URIs --- .../core/search/lucene/LuceneQueryUtils.java | 40 ++++++++++++------- .../search/lucene/LuceneQueryUtilsTest.java | 12 ++++++ 2 files changed, 38 insertions(+), 14 deletions(-) diff --git a/gemma-core/src/main/java/ubic/gemma/core/search/lucene/LuceneQueryUtils.java b/gemma-core/src/main/java/ubic/gemma/core/search/lucene/LuceneQueryUtils.java index fd8682d380..924e14ee5a 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/search/lucene/LuceneQueryUtils.java +++ b/gemma-core/src/main/java/ubic/gemma/core/search/lucene/LuceneQueryUtils.java @@ -200,38 +200,50 @@ private static String prepareDatabaseQueryInternal( Query query, boolean allowWi return null; } + @Nullable + public static URI prepareTermUriQuery( SearchSettings settings ) throws SearchException { + Query query = parseSafely( settings, createQueryParser() ); + if ( query instanceof TermQuery ) { + Term term = ( ( TermQuery ) query ).getTerm(); + return tryParseUri( term ); + } + return null; + } + /** * Check if a given term is global (i.e. not fielded). *

* This includes the corner case when a term is a URI and would be parsed as a fielded term. */ private static boolean isTermGlobal( Term term ) { - return term.field().isEmpty() || term.field().equals( "http" ) || term.field().equals( "https" ); + return term.field().isEmpty() || tryParseUri( term ) != null; } /** * Extract a suitable string from a term, detecting URIs that would be parsed as a fielded term. */ private static String termToString( Term term ) { - if ( term.field().equals( "http" ) || term.field().equals( "https" ) ) { - return term.field() + ":" + term.text(); + URI uri; + if ( ( uri = tryParseUri( term ) ) != null ) { + return uri.toString(); } else { return term.text(); } } @Nullable - public static URI prepareTermUriQuery( SearchSettings settings ) throws SearchException { - Query query = parseSafely( settings, createQueryParser() ); - if ( query instanceof TermQuery ) { - Term term = ( ( TermQuery ) query ).getTerm(); - if ( term.field().equals( "http" ) || term.field().equals( "https" ) ) { - String candidateUri = term.field() + ":" + term.text(); - try { - return new URI( candidateUri ); - } catch ( URISyntaxException e ) { - return null; - } + private static URI tryParseUri( Term term ) { + if ( term.text().startsWith( "http://" ) || term.text().startsWith( "https://" ) ) { + try { + return new URI( term.text() ); + } catch ( URISyntaxException e ) { + // ignore, it will be treated as a term term + } + } else if ( ( term.field().equals( "http" ) || term.field().equals( "https" ) ) && term.text().startsWith( "//" ) ) { + try { + return new URI( term.field() + ":" + term.text() ); + } catch ( URISyntaxException e ) { + // ignore, it will be treated as a fielded term } } return null; diff --git a/gemma-core/src/test/java/ubic/gemma/core/search/lucene/LuceneQueryUtilsTest.java b/gemma-core/src/test/java/ubic/gemma/core/search/lucene/LuceneQueryUtilsTest.java index 77d5bd6eff..5b52dbf964 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/search/lucene/LuceneQueryUtilsTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/search/lucene/LuceneQueryUtilsTest.java @@ -4,6 +4,8 @@ import ubic.gemma.core.search.SearchException; import ubic.gemma.model.common.search.SearchSettings; +import java.net.URI; + import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.util.Sets.set; import static org.junit.Assert.*; @@ -123,4 +125,14 @@ public void testIsWildcard() { assertFalse( isWildcard( SearchSettings.geneSearch( "BRCA\\*", null ) ) ); assertFalse( isWildcard( SearchSettings.geneSearch( "\"BRCA1\" \"BRCA2\"", null ) ) ); } + + @Test + public void testPrepareTermUriQuery() throws SearchException { + assertEquals( URI.create( "http://example.com" ), prepareTermUriQuery( SearchSettings.geneSearch( "http://example.com", null ) ) ); + assertEquals( URI.create( "http://example.com" ), prepareTermUriQuery( SearchSettings.geneSearch( "\"http://example.com\"", null ) ) ); + // an invalid URI + assertNull( prepareTermUriQuery( SearchSettings.geneSearch( "\"http://example.com /test\"", null ) ) ); + // an interesting case: a fielded search for a URI + assertEquals( URI.create( "http://example.com" ), prepareTermUriQuery( SearchSettings.geneSearch( "http:\"http://example.com\"", null ) ) ); + } } \ No newline at end of file From 5dcc9814e18ea60002a64610c18acd6070ee66e6 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Tue, 2 Apr 2024 12:50:51 -0700 Subject: [PATCH 096/105] Introduce an argument type for search queries Move logic for checking if the query is blank in QueryArg.valueOf(). This allows Swagger to generate a component in the documentation. Improve wording in the QueryArg documentation. --- .../ubic/gemma/rest/DatasetsWebService.java | 25 ++++++++--------- .../ubic/gemma/rest/SearchWebService.java | 16 ++++------- .../swagger/resolver/CustomModelResolver.java | 4 +-- .../rest/util/args/DatasetArgService.java | 9 ++---- .../ubic/gemma/rest/util/args/QueryArg.java | 28 +++++++++++++++++++ .../restapidocs/fragments/QueryType.md | 28 +++++++++---------- .../ubic/gemma/rest/SearchWebServiceTest.java | 14 +++++----- 7 files changed, 70 insertions(+), 54 deletions(-) create mode 100644 gemma-rest/src/main/java/ubic/gemma/rest/util/args/QueryArg.java diff --git a/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java b/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java index d07cddd157..8043154da4 100644 --- a/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java +++ b/gemma-rest/src/main/java/ubic/gemma/rest/DatasetsWebService.java @@ -27,6 +27,7 @@ import org.apache.commons.io.FilenameUtils; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Document; +import org.apache.lucene.search.highlight.Highlighter; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.security.access.annotation.Secured; import org.springframework.stereotype.Service; @@ -89,8 +90,6 @@ import java.util.stream.Collectors; import java.util.zip.GZIPInputStream; -import static ubic.gemma.rest.SearchWebService.QUERY_SCHEMA_NAME; - /** * RESTful interface for datasets. * @@ -175,7 +174,7 @@ public Map highlightDocument( Document document, org.apache.luce @Produces(MediaType.APPLICATION_JSON) @Operation(summary = "Retrieve all datasets") public QueriedAndFilteredAndPaginatedResponseDataObject getDatasets( // Params: - @Parameter(schema = @Schema(name = QUERY_SCHEMA_NAME)) @QueryParam("query") String query, + @QueryParam("query") QueryArg query, @QueryParam("filter") @DefaultValue("") FilterArg filterArg, // Optional, default null @QueryParam("offset") @DefaultValue("0") OffsetArg offsetArg, // Optional, default 0 @QueryParam("limit") @DefaultValue("20") LimitArg limitArg, // Optional, default 20 @@ -208,7 +207,7 @@ public QueriedAndFilteredAndPaginatedResponseDataObject( vos, Sort.by( null, "searchResult.score", Sort.Direction.DESC ), offset, limit, ( long ) ids.size() ) .map( vo -> new ExpressionExperimentWithSearchResultValueObject( vo, resultById.get( vo.getId() ) ) ), - query, filters, new String[] { "id" } ); + query.getValue(), filters, new String[] { "id" } ); } else { return Responder.queryAndPaginate( expressionExperimentService.loadValueObjectsWithCache( filters, sort, offset, limit ).map( vo -> new ExpressionExperimentWithSearchResultValueObject( vo, null ) ), @@ -239,7 +238,7 @@ public ExpressionExperimentWithSearchResultValueObject( ExpressionExperimentValu @Produces(MediaType.APPLICATION_JSON) @Operation(summary = "Count datasets matching the provided query and filter") public ResponseDataObject getNumberOfDatasets( - @Parameter(schema = @Schema(name = QUERY_SCHEMA_NAME)) @QueryParam("query") String query, + @QueryParam("query") QueryArg query, @QueryParam("filter") @DefaultValue("") FilterArg filter ) { Filters filters = datasetArgService.getFilters( filter ); @@ -265,7 +264,7 @@ public interface UsageStatistics { @Operation(summary = "Retrieve usage statistics of platforms among datasets matching the provided query and filter", description = "Usage statistics are aggregated across experiment tags, samples and factor values mentioned in the experimental design.") public LimitedResponseDataObject getDatasetsPlatformsUsageStatistics( - @Parameter(schema = @Schema(name = QUERY_SCHEMA_NAME)) @QueryParam("query") String query, + @QueryParam("query") QueryArg query, @QueryParam("filter") @DefaultValue("") FilterArg filter, @QueryParam("limit") @DefaultValue("50") LimitArg limit ) { @@ -286,7 +285,7 @@ public LimitedResponseDataObject getD .map( e -> new ArrayDesignWithUsageStatisticsValueObject( e, countsById.get( e.getId() ), tts.getOrDefault( TechnologyType.valueOf( e.getTechnologyType() ), 0L ) ) ) .sorted( Comparator.comparing( UsageStatistics::getNumberOfExpressionExperiments, Comparator.reverseOrder() ) ) .collect( Collectors.toList() ); - return Responder.limit( results, query, filters, new String[] { "id" }, Sort.by( null, "numberOfExpressionExperiments", Sort.Direction.DESC, "numberOfExpressionExperiments" ), l ); + return Responder.limit( results, query != null ? query.getValue() : null, filters, new String[] { "id" }, Sort.by( null, "numberOfExpressionExperiments", Sort.Direction.DESC, "numberOfExpressionExperiments" ), l ); } @Value @@ -304,7 +303,7 @@ public static class CategoryWithUsageStatisticsValueObject implements UsageStati @Operation(summary = "Retrieve usage statistics of categories among datasets matching the provided query and filter", description = "Usage statistics are aggregated across experiment tags, samples and factor values mentioned in the experimental design.") public QueriedAndFilteredResponseDataObject getDatasetsCategoriesUsageStatistics( - @Parameter(schema = @Schema(name = QUERY_SCHEMA_NAME)) @QueryParam("query") String query, + @QueryParam("query") QueryArg query, @QueryParam("filter") @DefaultValue("") FilterArg filter, @QueryParam("limit") @DefaultValue("20") LimitArg limit, @Parameter(description = "Excluded category URIs.", hidden = true) @QueryParam("excludedCategories") StringArrayArg excludedCategoryUris, @@ -336,7 +335,7 @@ public QueriedAndFilteredResponseDataObject new CategoryWithUsageStatisticsValueObject( e.getKey().getCategoryUri(), e.getKey().getCategory(), e.getValue() ) ) .sorted( Comparator.comparing( UsageStatistics::getNumberOfExpressionExperiments, Comparator.reverseOrder() ) ) .collect( Collectors.toList() ); - return Responder.queryAndFilter( results, query, filters, new String[] { "classUri", "className" }, Sort.by( null, "numberOfExpressionExperiments", Sort.Direction.DESC, "numberOfExpressionExperiments" ) ); + return Responder.queryAndFilter( results, query != null ? query.getValue() : null, filters, new String[] { "classUri", "className" }, Sort.by( null, "numberOfExpressionExperiments", Sort.Direction.DESC, "numberOfExpressionExperiments" ) ); } @Value @@ -364,7 +363,7 @@ public ArrayDesignWithUsageStatisticsValueObject( ArrayDesignValueObject arrayDe @Operation(summary = "Retrieve usage statistics of annotations among datasets matching the provided query and filter", description = "Usage statistics are aggregated across experiment tags, samples and factor values mentioned in the experimental design.") public LimitedResponseDataObject getDatasetsAnnotationsUsageStatistics( - @Parameter(schema = @Schema(name = QUERY_SCHEMA_NAME)) @QueryParam("query") String query, + @QueryParam("query") QueryArg query, @QueryParam("filter") @DefaultValue("") FilterArg filter, @Parameter(description = "List of fields to exclude from the payload. Only `parentTerms` can be excluded.") @QueryParam("exclude") ExcludeArg exclude, @Parameter(description = "Maximum number of annotations to returned; capped at " + MAX_DATASETS_ANNOTATIONS + ".", schema = @Schema(type = "integer", minimum = "1", maximum = "" + MAX_DATASETS_ANNOTATIONS)) @QueryParam("limit") LimitArg limitArg, @@ -409,7 +408,7 @@ public LimitedResponseDataObject getDa .stream() .map( e -> new AnnotationWithUsageStatisticsValueObject( e.getCharacteristic(), e.getNumberOfExpressionExperiments(), !excludeParentTerms && e.getTerm() != null ? getParentTerms( e.getTerm(), visited ) : null ) ) .collect( Collectors.toList() ); - return Responder.limit( results, query, filters, new String[] { "classUri", "className", "termUri", "termName" }, + return Responder.limit( results, query != null ? query.getValue() : null, filters, new String[] { "classUri", "className", "termUri", "termName" }, Sort.by( null, "numberOfExpressionExperiments", Sort.Direction.DESC, "numberOfExpressionExperiments" ), limit ); } @@ -501,7 +500,7 @@ public AnnotationWithUsageStatisticsValueObject( Characteristic c, Long numberOf @Produces(MediaType.APPLICATION_JSON) @Operation(summary = "Retrieve taxa usage statistics for datasets matching the provided query and filter") public QueriedAndFilteredResponseDataObject getDatasetsTaxaUsageStatistics( - @Parameter(schema = @Schema(name = QUERY_SCHEMA_NAME)) @QueryParam("query") String query, + @QueryParam("query") QueryArg query, @QueryParam("filter") @DefaultValue("") FilterArg filterArg ) { Filters filters = datasetArgService.getFilters( filterArg ); @@ -515,7 +514,7 @@ public QueriedAndFilteredResponseDataObject .entrySet().stream() .sorted( Map.Entry.comparingByValue( Comparator.reverseOrder() ) ) .map( e -> new TaxonWithUsageStatisticsValueObject( e.getKey(), e.getValue() ) ) - .collect( Collectors.toList() ), query, filters, new String[] { "id" }, Sort.by( null, "numberOfExpressionExperiments", Sort.Direction.DESC, "numberOfExpressionExperiments" ) ); + .collect( Collectors.toList() ), query != null ? query.getValue() : null, filters, new String[] { "id" }, Sort.by( null, "numberOfExpressionExperiments", Sort.Direction.DESC, "numberOfExpressionExperiments" ) ); } @Value diff --git a/gemma-rest/src/main/java/ubic/gemma/rest/SearchWebService.java b/gemma-rest/src/main/java/ubic/gemma/rest/SearchWebService.java index a3d634ae09..5a47eac5e1 100644 --- a/gemma-rest/src/main/java/ubic/gemma/rest/SearchWebService.java +++ b/gemma-rest/src/main/java/ubic/gemma/rest/SearchWebService.java @@ -7,7 +7,6 @@ import io.swagger.v3.oas.annotations.media.Schema; import lombok.Value; import lombok.extern.apachecommons.CommonsLog; -import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.exception.ExceptionUtils; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Document; @@ -61,12 +60,7 @@ public class SearchWebService { /** - * Name used in the OpenAPI schema to identify a search query. - */ - public static final String QUERY_SCHEMA_NAME = "SearchQueryType"; - - /** - * Name used in the OpenAPI schema to identify result types as per {@link #search(String, TaxonArg, PlatformArg, List, LimitArg, ExcludeArg)}'s + * Name used in the OpenAPI schema to identify result types as per {@link #search(QueryArg, TaxonArg, PlatformArg, List, LimitArg, ExcludeArg)}'s * fourth argument. */ public static final String RESULT_TYPES_SCHEMA_NAME = "SearchResultType"; @@ -137,15 +131,15 @@ public Map highlightDocument( Document document, org.apache.luce @Produces(MediaType.APPLICATION_JSON_VALUE) @Operation(summary = "Search everything in Gemma") public SearchResultsResponseDataObject search( - @QueryParam("query") @Schema(name = QUERY_SCHEMA_NAME) String query, + @QueryParam("query") QueryArg query, @QueryParam("taxon") TaxonArg taxonArg, @QueryParam("platform") PlatformArg platformArg, @Parameter(array = @ArraySchema(schema = @Schema(name = RESULT_TYPES_SCHEMA_NAME, hidden = true))) @QueryParam("resultTypes") List resultTypes, @Parameter(description = "Maximum number of search results to return; capped at " + MAX_SEARCH_RESULTS + " unless `resultObject` is excluded.", schema = @Schema(type = "integer", minimum = "1", maximum = "" + MAX_SEARCH_RESULTS)) @QueryParam("limit") LimitArg limit, @Parameter(description = "List of fields to exclude from the payload. Only `resultObject` is supported.") @QueryParam("exclude") ExcludeArg> excludeArg ) { - if ( StringUtils.isBlank( query ) ) { - throw new BadRequestException( "A non-empty query must be supplied." ); + if ( query == null ) { + throw new BadRequestException( "A query must be supplied." ); } Map> supportedResultTypesByName = searchService.getSupportedResultTypes().stream() .collect( Collectors.toMap( Class::getName, identity() ) ); @@ -172,7 +166,7 @@ public SearchResultsResponseDataObject search( } SearchSettings searchSettings = SearchSettings.builder() - .query( query ) + .query( query.getValue() ) .taxon( taxonArg != null ? taxonArgService.getEntity( taxonArg ) : null ) .platformConstraint( platformArg != null ? platformArgService.getEntity( platformArg ) : null ) .resultTypes( resultTypesCls ) diff --git a/gemma-rest/src/main/java/ubic/gemma/rest/swagger/resolver/CustomModelResolver.java b/gemma-rest/src/main/java/ubic/gemma/rest/swagger/resolver/CustomModelResolver.java index 52ffa0f054..0244217222 100644 --- a/gemma-rest/src/main/java/ubic/gemma/rest/swagger/resolver/CustomModelResolver.java +++ b/gemma-rest/src/main/java/ubic/gemma/rest/swagger/resolver/CustomModelResolver.java @@ -125,7 +125,7 @@ protected String resolveDescription( Annotated a, Annotation[] annotations, io.s return description == null ? availableProperties : description + "\n\n" + availableProperties; } - if ( schema != null && SearchWebService.QUERY_SCHEMA_NAME.equalsIgnoreCase( schema.name() ) ) { + if ( a != null && QueryArg.class.isAssignableFrom( a.getRawType() ) ) { try { return ( description != null ? description + "\n\n" : "" ) + IOUtils.toString( new ClassPathResource( "/restapidocs/fragments/QueryType.md" ).getInputStream(), StandardCharsets.UTF_8 ) @@ -150,7 +150,7 @@ protected Map resolveExtensions( Annotated a, Annotation[] annot extensions.put( "x-gemma-filterable-properties", resolveAvailableProperties( a ) ); extensions = Collections.unmodifiableMap( extensions ); } - if ( schema != null && SearchWebService.QUERY_SCHEMA_NAME.equals( schema.name() ) ) { + if ( a != null && QueryArg.class.isAssignableFrom( a.getRawType() ) ) { extensions = extensions != null ? new HashMap<>( extensions ) : new HashMap<>(); extensions.put( "x-gemma-searchable-properties", getSearchableProperties() ); extensions = Collections.unmodifiableMap( extensions ); diff --git a/gemma-rest/src/main/java/ubic/gemma/rest/util/args/DatasetArgService.java b/gemma-rest/src/main/java/ubic/gemma/rest/util/args/DatasetArgService.java index 1f5032e3f1..bd53cd502c 100644 --- a/gemma-rest/src/main/java/ubic/gemma/rest/util/args/DatasetArgService.java +++ b/gemma-rest/src/main/java/ubic/gemma/rest/util/args/DatasetArgService.java @@ -107,13 +107,10 @@ public Filters getFilters( FilterArg filterArg, @Nullable * @param highlighter a highlighter to use for the query or null to ignore * @throws BadRequestException if the query is empty */ - public List> getResultsForSearchQuery( String query, @Nullable Highlighter highlighter ) throws BadRequestException { - if ( StringUtils.isBlank( query ) ) { - throw new BadRequestException( "A non-empty query must be supplied." ); - } + public List> getResultsForSearchQuery( QueryArg query, @Nullable Highlighter highlighter ) throws BadRequestException { try { SearchSettings settings = SearchSettings.builder() - .query( query ) + .query( query.getValue() ) .resultType( ExpressionExperiment.class ) .highlighter( highlighter ) .fillResults( false ) @@ -133,7 +130,7 @@ public List> getResultsForSearchQuery( String * @param scoreById if non-null, a destination for storing the scores by result ID * @throws BadRequestException if the query is empty */ - public Set getIdsForSearchQuery( String query, @Nullable Map scoreById ) throws BadRequestException { + public Set getIdsForSearchQuery( QueryArg query, @Nullable Map scoreById ) throws BadRequestException { List> _results = getResultsForSearchQuery( query, null ); if ( scoreById != null ) { for ( SearchResult result : _results ) { diff --git a/gemma-rest/src/main/java/ubic/gemma/rest/util/args/QueryArg.java b/gemma-rest/src/main/java/ubic/gemma/rest/util/args/QueryArg.java new file mode 100644 index 0000000000..3ae2d52e6a --- /dev/null +++ b/gemma-rest/src/main/java/ubic/gemma/rest/util/args/QueryArg.java @@ -0,0 +1,28 @@ +package ubic.gemma.rest.util.args; + +import org.apache.commons.lang.StringUtils; +import ubic.gemma.rest.util.MalformedArgException; + +public class QueryArg implements Arg { + + private final String value; + + private QueryArg( String value ) { + this.value = value; + } + + @Override + public String getValue() { + return value; + } + + /** + * @throws MalformedArgException if the query string is blank + */ + public static QueryArg valueOf( String s ) throws MalformedArgException { + if ( StringUtils.isBlank( s ) ) { + throw new MalformedArgException( "" ); + } + return new QueryArg( s ); + } +} diff --git a/gemma-rest/src/main/resources/restapidocs/fragments/QueryType.md b/gemma-rest/src/main/resources/restapidocs/fragments/QueryType.md index 7036017ebf..936ccea18b 100644 --- a/gemma-rest/src/main/resources/restapidocs/fragments/QueryType.md +++ b/gemma-rest/src/main/resources/restapidocs/fragments/QueryType.md @@ -1,17 +1,15 @@ -# Search query syntax - The search query accepts the following syntax: -| | | | -|--------------|-----------------------------|---------------------------------------------------------------------------------------------------------------------------------| -| Conjunction | `alpha AND beta AND gamma` | Results must contain "alpha", "beta" and "gamma". | -| Disjunction | `alpha OR beta OR gamma` | Results must contain either "alpha", "beta" or "gamma". This is the default when multiple terms are supplied. | -| Grouping | `(alpha OR beta) AND gamma` | Results must contain one of "alpha" or "beta" and also "gamma". | -| Exact Search | `"alpha beta gamma"` | Results must contain the exact phrase "alpha beta gamma". | -| Field | `shortName:GSE00001` | Datasets with short name GSE00001.

List of supported dataset fields{searchableProperties}
| -| Prefix | `alpha*` | Results must start with "alpha". | -| Wildcard | `BRCA?` | Results can contain any letter for the `?`. In this example, BRCA1 and BRCA2 would be matched. | -| Fuzzy | `alpha~` | Results can approximate "alpha". In this example, "aleph" would be accepted. | -| Boosting | `alpha^2 beta` | Results mentioning "alpha" are ranked higher over those containing only "beta". | -| Require | `+alpha beta` | Results must mention "alpha" and optionally "beta". | -| Escape | `\+alpha` | Results must mention "+alpha". Any special character from the search syntax can be escaped by prepending it with "\". | +| | | | +|--------------|-----------------------------|------------------------------------------------------------------------------------------------------------------------| +| Conjunction | `alpha AND beta AND gamma` | Results must contain "alpha", "beta" and "gamma". | +| Disjunction | `alpha OR beta OR gamma` | Results must contain either "alpha", "beta" or "gamma". This is the default when multiple terms are supplied. | +| Grouping | `(alpha OR beta) AND gamma` | Results must contain one of "alpha" or "beta" and also "gamma". | +| Exact Search | `"alpha beta gamma"` | Results must contain the exact phrase "alpha beta gamma". | +| Field | `shortName:GSE00001` | Results with short name GSE00001.
List of supported fields{searchableProperties}
| +| Prefix | `alpha*` | Results must start with "alpha". | +| Wildcard | `BRCA?` | Results can contain any letter for the `?`. In this example, BRCA1 and BRCA2 would be matched. | +| Fuzzy | `alpha~` | Results can approximate "alpha". In this example, "aleph" would be accepted. | +| Boosting | `alpha^2 beta` | Results mentioning "alpha" are ranked higher over those containing only "beta". | +| Require | `+alpha beta` | Results must mention "alpha" and optionally "beta". | +| Escape | `\+alpha` | Results must mention "+alpha". Any special character from the search syntax can be escaped by prepending it with "\". | diff --git a/gemma-rest/src/test/java/ubic/gemma/rest/SearchWebServiceTest.java b/gemma-rest/src/test/java/ubic/gemma/rest/SearchWebServiceTest.java index 94b992dc15..64e88f854a 100644 --- a/gemma-rest/src/test/java/ubic/gemma/rest/SearchWebServiceTest.java +++ b/gemma-rest/src/test/java/ubic/gemma/rest/SearchWebServiceTest.java @@ -124,7 +124,7 @@ public void testSearchEverything() throws SearchException { } ); when( searchService.getSupportedResultTypes() ).thenReturn( Collections.singleton( Gene.class ) ); - SearchWebService.SearchResultsResponseDataObject searchResults = searchWebService.search( "BRCA1", null, null, null, LimitArg.valueOf( "20" ), null ); + SearchWebService.SearchResultsResponseDataObject searchResults = searchWebService.search( QueryArg.valueOf( "BRCA1" ), null, null, null, LimitArg.valueOf( "20" ), null ); assertThat( searchSettingsArgumentCaptor.getValue() ) .hasFieldOrPropertyWithValue( "query", "BRCA1" ) @@ -158,7 +158,7 @@ public void testSearchByTaxon() throws SearchException { } return sr; } ); - searchWebService.search( "BRCA1", TaxonArg.valueOf( "9606" ), null, null, LimitArg.valueOf( "20" ), null ); + searchWebService.search( QueryArg.valueOf( "BRCA1" ), TaxonArg.valueOf( "9606" ), null, null, LimitArg.valueOf( "20" ), null ); verify( taxonService ).findByNcbiId( 9606 ); } @@ -176,7 +176,7 @@ public void testSearchByArrayDesign() throws SearchException { } return sr; } ); - searchWebService.search( "BRCA1", null, PlatformArg.valueOf( "1" ), null, LimitArg.valueOf( "20" ), null ); + searchWebService.search( QueryArg.valueOf( "BRCA1" ), null, PlatformArg.valueOf( "1" ), null, LimitArg.valueOf( "20" ), null ); verify( arrayDesignService ).load( 1L ); } @@ -187,21 +187,21 @@ public void testSearchWhenQueryIsMissing() { @Test(expected = BadRequestException.class) public void testSearchWhenQueryIsEmpty() { - searchWebService.search( null, null, null, null, LimitArg.valueOf( "20" ), null ); + QueryArg.valueOf( "" ); } @Test(expected = NotFoundException.class) public void testSearchWhenUnknownTaxonIsProvided() { - searchWebService.search( "brain", TaxonArg.valueOf( "9607" ), null, null, LimitArg.valueOf( "20" ), null ); + searchWebService.search( QueryArg.valueOf( "brain" ), TaxonArg.valueOf( "9607" ), null, null, LimitArg.valueOf( "20" ), null ); } @Test(expected = NotFoundException.class) public void testSearchWhenUnknownPlatformIsProvided() { - searchWebService.search( "brain", null, PlatformArg.valueOf( "2" ), null, LimitArg.valueOf( "20" ), null ); + searchWebService.search( QueryArg.valueOf( "brain" ), null, PlatformArg.valueOf( "2" ), null, LimitArg.valueOf( "20" ), null ); } @Test(expected = BadRequestException.class) public void testSearchWhenUnsupportedResultTypeIsProvided() { - searchWebService.search( "brain", null, null, Collections.singletonList( "ubic.gemma.model.expression.designElement.CompositeSequence2" ), LimitArg.valueOf( "20" ), null ); + searchWebService.search( QueryArg.valueOf( "brain" ), null, null, Collections.singletonList( "ubic.gemma.model.expression.designElement.CompositeSequence2" ), LimitArg.valueOf( "20" ), null ); } } \ No newline at end of file From 8ce580c77effcf778b11481725e824e5dc63babf Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Tue, 2 Apr 2024 15:11:18 -0700 Subject: [PATCH 097/105] Use a $ref for the QueryArg model and fix some broken tests --- .../swagger/resolver/CustomModelResolver.java | 6 +++--- .../java/ubic/gemma/rest/util/args/QueryArg.java | 6 +++++- .../ubic/gemma/rest/DatasetsWebServiceTest.java | 2 +- .../test/java/ubic/gemma/rest/OpenApiTest.java | 16 +++++++++++----- 4 files changed, 20 insertions(+), 10 deletions(-) diff --git a/gemma-rest/src/main/java/ubic/gemma/rest/swagger/resolver/CustomModelResolver.java b/gemma-rest/src/main/java/ubic/gemma/rest/swagger/resolver/CustomModelResolver.java index 0244217222..e04e33745a 100644 --- a/gemma-rest/src/main/java/ubic/gemma/rest/swagger/resolver/CustomModelResolver.java +++ b/gemma-rest/src/main/java/ubic/gemma/rest/swagger/resolver/CustomModelResolver.java @@ -73,7 +73,7 @@ public Schema resolve( AnnotatedType type, ModelConverterContext context, Iterat } if ( t.isTypeOrSubTypeOf( FilterArg.Filter.class ) || t.isTypeOrSubTypeOf( SortArg.Sort.class ) ) { return null; // ignore those... - } else if ( t.isTypeOrSubTypeOf( FilterArg.class ) || t.isTypeOrSubTypeOf( SortArg.class ) ) { + } else if ( t.isTypeOrSubTypeOf( FilterArg.class ) || t.isTypeOrSubTypeOf( SortArg.class ) || t.isTypeOrSubTypeOf( QueryArg.class ) ) { Schema resolved = super.resolve( type, context, chain ); String ref = resolved.get$ref(); // FilterArg and SortArg schemas in parameters are refs to globally-defined schemas and those are @@ -89,7 +89,7 @@ public Schema resolve( AnnotatedType type, ModelConverterContext context, Iterat // definitions in the class's Schema annotation Schema resolvedSchema = super.resolve( new AnnotatedType( t.getRawClass() ), context, chain ); // There's a bug with abstract class such as TaxonArg and GeneArg that result in the schema containing 'type' - // and 'properties' fields instead of solely emiting the oneOf + // and 'properties' fields instead of solely emitting the oneOf if ( t.isAbstract() ) { return resolvedSchema.type( null ).properties( null ); } else { @@ -101,7 +101,7 @@ public Schema resolve( AnnotatedType type, ModelConverterContext context, Iterat } /** - * Resolves allowed values for the {@link ubic.gemma.rest.SearchWebService#search(String, TaxonArg, PlatformArg, List, LimitArg, ExcludeArg)} + * Resolves allowed values for the {@link ubic.gemma.rest.SearchWebService#search(QueryArg, TaxonArg, PlatformArg, List, LimitArg, ExcludeArg)} * resultTypes argument. *

* This ensures that the OpenAPI specification exposes all supported search result types in the {@link SearchService} as diff --git a/gemma-rest/src/main/java/ubic/gemma/rest/util/args/QueryArg.java b/gemma-rest/src/main/java/ubic/gemma/rest/util/args/QueryArg.java index 3ae2d52e6a..7333835874 100644 --- a/gemma-rest/src/main/java/ubic/gemma/rest/util/args/QueryArg.java +++ b/gemma-rest/src/main/java/ubic/gemma/rest/util/args/QueryArg.java @@ -1,8 +1,12 @@ package ubic.gemma.rest.util.args; +import io.swagger.v3.oas.annotations.ExternalDocumentation; +import io.swagger.v3.oas.annotations.media.Schema; import org.apache.commons.lang.StringUtils; import ubic.gemma.rest.util.MalformedArgException; +@Schema(type = "string", description = "Filter results matching the given full-text query.", + externalDocs = @ExternalDocumentation(url = "https://lucene.apache.org/core/3_6_2/queryparsersyntax.html")) public class QueryArg implements Arg { private final String value; @@ -21,7 +25,7 @@ public String getValue() { */ public static QueryArg valueOf( String s ) throws MalformedArgException { if ( StringUtils.isBlank( s ) ) { - throw new MalformedArgException( "" ); + throw new MalformedArgException( "The query cannot be empty." ); } return new QueryArg( s ); } diff --git a/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java b/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java index 5099855e36..15cc416a1b 100644 --- a/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java +++ b/gemma-rest/src/test/java/ubic/gemma/rest/DatasetsWebServiceTest.java @@ -226,7 +226,7 @@ public void testGetDatasetsWithQuery() throws SearchException { @Test public void testGetDatasetsWithEmptyQuery() { - assertThat( target( "/datasets" ).queryParam( "query", "" ).request().get() ) + assertThat( target( "/datasets" ).queryParam( "query", " " ).request().get() ) .hasStatus( Response.Status.BAD_REQUEST ) .hasMediaTypeCompatibleWith( MediaType.APPLICATION_JSON_TYPE ); } diff --git a/gemma-rest/src/test/java/ubic/gemma/rest/OpenApiTest.java b/gemma-rest/src/test/java/ubic/gemma/rest/OpenApiTest.java index d3e7d318c4..f9b050fbdf 100644 --- a/gemma-rest/src/test/java/ubic/gemma/rest/OpenApiTest.java +++ b/gemma-rest/src/test/java/ubic/gemma/rest/OpenApiTest.java @@ -8,6 +8,7 @@ import io.swagger.v3.oas.models.OpenAPI; import io.swagger.v3.oas.models.media.Schema; import lombok.Data; +import org.assertj.core.api.Assertions; import org.junit.Before; import org.junit.Test; import org.springframework.beans.factory.annotation.Autowired; @@ -181,11 +182,16 @@ public void testSearchableProperties() { assertThat( spec.getPaths().get( "/search" ).getGet().getParameters() ) .anySatisfy( p -> { assertThat( p.getName() ).isEqualTo( "query" ); - assertThat( p.getSchema().getType() ).isEqualTo( "string" ); - //noinspection unchecked - assertThat( p.getSchema().getExtensions() ) - .isNotNull() - .containsEntry( "x-gemma-searchable-properties", Collections.singletonMap( ExpressionExperiment.class.getName(), Collections.singletonList( "shortName" ) ) ); + assertThat( p.getSchema().get$ref() ).isEqualTo( "#/components/schemas/QueryArg" ); } ); + assertThat( spec.getComponents().getSchemas().get( "QueryArg" ) ).satisfies( s -> { + assertThat( s.getType() ).isEqualTo( "string" ); + //noinspection unchecked + assertThat( s.getExtensions() ) + .isNotNull() + .containsEntry( "x-gemma-searchable-properties", Collections.singletonMap( ExpressionExperiment.class.getName(), Collections.singletonList( "shortName" ) ) ); + assertThat( s.getExternalDocs().getUrl() ) + .isEqualTo( "https://lucene.apache.org/core/3_6_2/queryparsersyntax.html" ); + } ); } } From 237acd24f0f609a6fafa0403fdd993e5746a6795 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Wed, 3 Apr 2024 10:24:40 -0700 Subject: [PATCH 098/105] Map T-ALL to MONDO_0004963 --- .../gemma/core/ontology/valueStringToOntologyTermMappings.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gemma-core/src/main/resources/ubic/gemma/core/ontology/valueStringToOntologyTermMappings.txt b/gemma-core/src/main/resources/ubic/gemma/core/ontology/valueStringToOntologyTermMappings.txt index 90124e1ba3..a02250c181 100644 --- a/gemma-core/src/main/resources/ubic/gemma/core/ontology/valueStringToOntologyTermMappings.txt +++ b/gemma-core/src/main/resources/ubic/gemma/core/ontology/valueStringToOntologyTermMappings.txt @@ -1271,8 +1271,8 @@ synovial membrane synovial membrane of synovial joint http://purl.obolibrary.org Synovial sarcoma tumor tissue synovial sarcoma http://purl.obolibrary.org/obo/MONDO_0010434 disease http://www.ebi.ac.uk/efo/EFO_0000408 T cell T cell http://purl.obolibrary.org/obo/CL_0000084 cell type http://www.ebi.ac.uk/efo/EFO_0000324 T cells T cell http://purl.obolibrary.org/obo/CL_0000084 cell type http://www.ebi.ac.uk/efo/EFO_0000324 -T-ALL T-cell adult acute lymphocytic leukemia http://purl.obolibrary.org/obo/MONDO_0003539 disease http://www.ebi.ac.uk/efo/EFO_0000408 -T-ALL diagnostic sample acute T cell leukemia http://purl.obolibrary.org/obo/MONDO_0003540 disease http://www.ebi.ac.uk/efo/EFO_0000408 +T-ALL T-cell acute lymphoblastic leukemia http://purl.obolibrary.org/obo/MONDO_0004963 disease http://www.ebi.ac.uk/efo/EFO_0000408 +T-ALL diagnostic sample T-cell acute lymphoblastic leukemia http://purl.obolibrary.org/obo/MONDO_0004963 disease http://www.ebi.ac.uk/efo/EFO_0000408 T-cell lymphoblasts T cell http://purl.obolibrary.org/obo/CL_0000084 cell type http://www.ebi.ac.uk/efo/EFO_0000324 T47D T-47D cell http://purl.obolibrary.org/obo/CLO_0009251 cell line http://purl.obolibrary.org/obo/CLO_0000031 tamoxifen tamoxifen http://purl.obolibrary.org/obo/CHEBI_41774 treatment http://www.ebi.ac.uk/efo/EFO_0000727 From 1b6fe600d06c5bff0a3f1c205f41e730ff7ccfb0 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Wed, 3 Apr 2024 10:43:16 -0700 Subject: [PATCH 099/105] Remove redundant 'brain' line with a non-breakng space --- .../gemma/core/ontology/valueStringToOntologyTermMappings.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/gemma-core/src/main/resources/ubic/gemma/core/ontology/valueStringToOntologyTermMappings.txt b/gemma-core/src/main/resources/ubic/gemma/core/ontology/valueStringToOntologyTermMappings.txt index a02250c181..b1e8ecd68c 100644 --- a/gemma-core/src/main/resources/ubic/gemma/core/ontology/valueStringToOntologyTermMappings.txt +++ b/gemma-core/src/main/resources/ubic/gemma/core/ontology/valueStringToOntologyTermMappings.txt @@ -127,7 +127,6 @@ Bone marrow sample bone marrow http://purl.obolibrary.org/obo/UBERON_0002371 org bone marrow-derived macrophages bone marrow macrophage http://purl.obolibrary.org/obo/CL_0002476 cell type http://www.ebi.ac.uk/efo/EFO_0000324 Borrelia burgdorferi Borreliella burgdorferi http://purl.obolibrary.org/obo/NCBITaxon_139 treatment http://www.ebi.ac.uk/efo/EFO_0000727 brain brain http://purl.obolibrary.org/obo/UBERON_0000955 organism part http://www.ebi.ac.uk/efo/EFO_0000635 -brainĀ  brain http://purl.obolibrary.org/obo/UBERON_0000955 organism part http://www.ebi.ac.uk/efo/EFO_0000635 Brain - Cerebellum cerebellum http://purl.obolibrary.org/obo/UBERON_0002037 organism part http://www.ebi.ac.uk/efo/EFO_0000635 Brain - Hippocampus Ammon's horn http://purl.obolibrary.org/obo/UBERON_0001954 organism part http://www.ebi.ac.uk/efo/EFO_0000635 brain (cortex) cerebral cortex http://purl.obolibrary.org/obo/UBERON_0000956 organism part http://www.ebi.ac.uk/efo/EFO_0000635 From 4d40a9e07b87184111d4063f1358ec077ae366e0 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Wed, 3 Apr 2024 11:02:08 -0700 Subject: [PATCH 100/105] Add mdo and obi to the list of excluded ontologies for testing initialization --- .../ubic/gemma/core/ontology/OntologyLoadingTest.java | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/gemma-core/src/test/java/ubic/gemma/core/ontology/OntologyLoadingTest.java b/gemma-core/src/test/java/ubic/gemma/core/ontology/OntologyLoadingTest.java index 595e112856..1814a302bb 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/ontology/OntologyLoadingTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/ontology/OntologyLoadingTest.java @@ -70,6 +70,12 @@ public class OntologyLoadingTest extends AbstractJUnit4SpringContextTests { @Autowired private UberonOntologyService uberon; + @Autowired + private ObiService obi; + + @Autowired + private MouseDevelopmentOntologyService mdo; + @Test public void testThatChebiDoesNotHaveInferenceEnabled() { assertThat( chebi.getInferenceMode() ).isEqualTo( OntologyService.InferenceMode.NONE ); @@ -84,7 +90,7 @@ public void testThatTGEMODoesNotProcessImports() { @Category(SlowTest.class) public void testInitializeAllOntologies() { // these are notoriously slow, so we skip them - List ignoredOntologies = Arrays.asList( efo, chebi, mp, mondo, clo, cl, hpo, uberon ); + List ignoredOntologies = Arrays.asList( efo, chebi, mp, mondo, clo, cl, hpo, uberon, obi, mdo ); List services = new ArrayList<>(); List> futures = new ArrayList<>(); for ( OntologyService os : ontologyServices ) { From 2610b296d50c5665604855ae072c2b3093212311 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Wed, 3 Apr 2024 11:44:17 -0700 Subject: [PATCH 101/105] Include a descriptive message when search parse fails --- .../core/search/ParseSearchException.java | 34 ++++++++++++++ .../lucene/LuceneParseSearchException.java | 25 ++-------- .../core/search/lucene/LuceneQueryUtils.java | 9 +++- .../gemma/rest/AnnotationsWebService.java | 7 ++- .../ubic/gemma/rest/SearchWebService.java | 10 ++-- .../rest/util/args/DatasetArgService.java | 11 ++--- .../ubic/gemma/rest/SearchWebServiceTest.java | 46 +++++++++++++++++-- .../ubic/gemma/rest/util/ResponseAssert.java | 4 ++ .../experiment/AnnotationController.java | 6 ++- .../genome/gene/GeneSetController.java | 6 ++- 10 files changed, 117 insertions(+), 41 deletions(-) create mode 100644 gemma-core/src/main/java/ubic/gemma/core/search/ParseSearchException.java diff --git a/gemma-core/src/main/java/ubic/gemma/core/search/ParseSearchException.java b/gemma-core/src/main/java/ubic/gemma/core/search/ParseSearchException.java new file mode 100644 index 0000000000..a50708197d --- /dev/null +++ b/gemma-core/src/main/java/ubic/gemma/core/search/ParseSearchException.java @@ -0,0 +1,34 @@ +package ubic.gemma.core.search; + +import org.apache.lucene.queryParser.ParseException; + +import javax.annotation.Nullable; + +/** + * An exception that indicate that the search query could not be parsed. + *

+ * When that occurs, we typically reattempt to parse the query. + */ +public class ParseSearchException extends SearchException { + + @Nullable + private final ParseSearchException originalParseException; + + public ParseSearchException( String message, Throwable cause ) { + super( message, cause ); + this.originalParseException = null; + } + + public ParseSearchException( String message, Throwable cause, ParseSearchException originalParseException ) { + super( message, cause ); + this.originalParseException = originalParseException; + } + + /** + * The original {@link ParseException} if this query was reattempted. + */ + @Nullable + public ParseSearchException getOriginalParseException() { + return originalParseException; + } +} diff --git a/gemma-core/src/main/java/ubic/gemma/core/search/lucene/LuceneParseSearchException.java b/gemma-core/src/main/java/ubic/gemma/core/search/lucene/LuceneParseSearchException.java index 5f3fea37b8..ff6c39a5b8 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/search/lucene/LuceneParseSearchException.java +++ b/gemma-core/src/main/java/ubic/gemma/core/search/lucene/LuceneParseSearchException.java @@ -1,33 +1,18 @@ package ubic.gemma.core.search.lucene; import org.apache.lucene.queryParser.ParseException; -import ubic.gemma.core.search.SearchException; - -import javax.annotation.Nullable; +import ubic.gemma.core.search.ParseSearchException; /** * @author poirigui */ -public class LuceneParseSearchException extends SearchException { - - @Nullable - private final ParseException originalParseException; - - public LuceneParseSearchException( String message, ParseException cause ) { - super( message, cause ); - this.originalParseException = null; - } +public class LuceneParseSearchException extends ParseSearchException { - public LuceneParseSearchException( String message, ParseException cause, ParseException originalParseException ) { + public LuceneParseSearchException( String query, String message, ParseException cause ) { super( message, cause ); - this.originalParseException = originalParseException; } - /** - * The original {@link ParseException} if this query was reparsed without special characters. - */ - @Nullable - public ParseException getOriginalParseException() { - return originalParseException; + public LuceneParseSearchException( String query, String message, ParseException cause, LuceneParseSearchException originalParseException ) { + super( message, cause, originalParseException ); } } diff --git a/gemma-core/src/main/java/ubic/gemma/core/search/lucene/LuceneQueryUtils.java b/gemma-core/src/main/java/ubic/gemma/core/search/lucene/LuceneQueryUtils.java index 924e14ee5a..739b8bc88d 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/search/lucene/LuceneQueryUtils.java +++ b/gemma-core/src/main/java/ubic/gemma/core/search/lucene/LuceneQueryUtils.java @@ -1,6 +1,7 @@ package ubic.gemma.core.search.lucene; import lombok.extern.apachecommons.CommonsLog; +import org.apache.commons.lang3.exception.ExceptionUtils; import org.apache.lucene.index.Term; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.queryParser.QueryParser; @@ -41,11 +42,15 @@ public static Query parseSafely( SearchSettings settings, QueryParser queryParse return queryParser.parse( query ); } catch ( ParseException e ) { String strippedQuery = LUCENE_RESERVED_CHARS.matcher( settings.getQuery() ).replaceAll( "\\\\$0" ); - log.debug( String.format( "Failed to parse '%s'; will attempt to parse it without special characters '%s'.", query, strippedQuery ), e ); + log.debug( String.format( "Failed to parse '%s': %s.", query, ExceptionUtils.getRootCauseMessage( e ) ), e ); try { return queryParser.parse( strippedQuery ); } catch ( ParseException e2 ) { - throw new LuceneParseSearchException( String.format( "Failed to parse '%s' after attempting to parse it without special characters as '%s'.", query, strippedQuery ), e2, e ); + throw new LuceneParseSearchException( + strippedQuery, + ExceptionUtils.getRootCauseMessage( e2 ), + e2, + new LuceneParseSearchException( query, ExceptionUtils.getRootCauseMessage( e ), e ) ); } } } diff --git a/gemma-rest/src/main/java/ubic/gemma/rest/AnnotationsWebService.java b/gemma-rest/src/main/java/ubic/gemma/rest/AnnotationsWebService.java index 683e13ab60..bab26d600f 100644 --- a/gemma-rest/src/main/java/ubic/gemma/rest/AnnotationsWebService.java +++ b/gemma-rest/src/main/java/ubic/gemma/rest/AnnotationsWebService.java @@ -32,14 +32,15 @@ import org.springframework.stereotype.Service; import ubic.gemma.core.expression.experiment.service.ExpressionExperimentSearchService; import ubic.gemma.core.ontology.OntologyService; +import ubic.gemma.core.search.ParseSearchException; import ubic.gemma.core.search.SearchException; import ubic.gemma.core.search.SearchResult; import ubic.gemma.core.search.SearchService; +import ubic.gemma.model.common.description.CharacteristicValueObject; import ubic.gemma.model.common.search.SearchSettings; import ubic.gemma.model.expression.experiment.ExpressionExperiment; import ubic.gemma.model.expression.experiment.ExpressionExperimentValueObject; import ubic.gemma.model.genome.Taxon; -import ubic.gemma.model.common.description.CharacteristicValueObject; import ubic.gemma.persistence.service.common.description.CharacteristicService; import ubic.gemma.persistence.service.expression.experiment.ExpressionExperimentService; import ubic.gemma.persistence.util.Filters; @@ -117,8 +118,10 @@ public ResponseDataObject> searchAnnotat } try { return Responder.respond( this.getTerms( query ) ); + } catch ( ParseSearchException e ) { + throw new BadRequestException( e.getMessage(), e ); } catch ( SearchException e ) { - throw new BadRequestException( "Invalid search query.", e ); + throw new InternalServerErrorException( e ); } } diff --git a/gemma-rest/src/main/java/ubic/gemma/rest/SearchWebService.java b/gemma-rest/src/main/java/ubic/gemma/rest/SearchWebService.java index 5a47eac5e1..aa76d54662 100644 --- a/gemma-rest/src/main/java/ubic/gemma/rest/SearchWebService.java +++ b/gemma-rest/src/main/java/ubic/gemma/rest/SearchWebService.java @@ -7,17 +7,13 @@ import io.swagger.v3.oas.annotations.media.Schema; import lombok.Value; import lombok.extern.apachecommons.CommonsLog; -import org.apache.commons.lang3.exception.ExceptionUtils; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Document; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.http.MediaType; import org.springframework.stereotype.Service; import org.springframework.web.servlet.support.ServletUriComponentsBuilder; -import ubic.gemma.core.search.DefaultHighlighter; -import ubic.gemma.core.search.SearchException; -import ubic.gemma.core.search.SearchResult; -import ubic.gemma.core.search.SearchService; +import ubic.gemma.core.search.*; import ubic.gemma.core.search.lucene.SimpleMarkdownFormatter; import ubic.gemma.model.IdentifiableValueObject; import ubic.gemma.model.common.Identifiable; @@ -178,8 +174,10 @@ public SearchResultsResponseDataObject search( List> searchResults; try { searchResults = searchService.search( searchSettings ).toList(); + } catch ( ParseSearchException e ) { + throw new BadRequestException( e.getMessage(), e ); } catch ( SearchException e ) { - throw new BadRequestException( String.format( "Invalid search settings: %s.", ExceptionUtils.getRootCauseMessage( e ) ), e ); + throw new InternalServerErrorException( e ); } List>> searchResultVos; diff --git a/gemma-rest/src/main/java/ubic/gemma/rest/util/args/DatasetArgService.java b/gemma-rest/src/main/java/ubic/gemma/rest/util/args/DatasetArgService.java index bd53cd502c..473f38a338 100644 --- a/gemma-rest/src/main/java/ubic/gemma/rest/util/args/DatasetArgService.java +++ b/gemma-rest/src/main/java/ubic/gemma/rest/util/args/DatasetArgService.java @@ -1,16 +1,12 @@ package ubic.gemma.rest.util.args; import lombok.extern.apachecommons.CommonsLog; -import org.apache.commons.lang3.StringUtils; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; import ubic.basecode.ontology.model.OntologyTerm; import ubic.gemma.core.analysis.preprocess.OutlierDetails; import ubic.gemma.core.analysis.preprocess.OutlierDetectionService; -import ubic.gemma.core.search.Highlighter; -import ubic.gemma.core.search.SearchException; -import ubic.gemma.core.search.SearchResult; -import ubic.gemma.core.search.SearchService; +import ubic.gemma.core.search.*; import ubic.gemma.model.common.description.AnnotationValueObject; import ubic.gemma.model.common.quantitationtype.QuantitationTypeValueObject; import ubic.gemma.model.common.search.SearchSettings; @@ -28,6 +24,7 @@ import javax.annotation.Nullable; import javax.ws.rs.BadRequestException; +import javax.ws.rs.InternalServerErrorException; import java.util.*; import java.util.stream.Collectors; @@ -116,8 +113,10 @@ public List> getResultsForSearchQuery( QueryA .fillResults( false ) .build(); return searchService.search( settings ).getByResultObjectType( ExpressionExperiment.class ); + } catch ( ParseSearchException e ) { + throw new MalformedArgException( e.getMessage(), e ); } catch ( SearchException e ) { - throw new MalformedArgException( "Invalid search query.", e ); + throw new InternalServerErrorException( e ); } } diff --git a/gemma-rest/src/test/java/ubic/gemma/rest/SearchWebServiceTest.java b/gemma-rest/src/test/java/ubic/gemma/rest/SearchWebServiceTest.java index 64e88f854a..df8c34f41e 100644 --- a/gemma-rest/src/test/java/ubic/gemma/rest/SearchWebServiceTest.java +++ b/gemma-rest/src/test/java/ubic/gemma/rest/SearchWebServiceTest.java @@ -1,5 +1,9 @@ package ubic.gemma.rest; +import org.apache.lucene.queryParser.ParseException; +import org.apache.lucene.queryParser.QueryParser; +import org.apache.lucene.util.Version; +import org.hibernate.search.util.impl.PassThroughAnalyzer; import org.junit.After; import org.junit.Before; import org.junit.Test; @@ -7,14 +11,16 @@ import org.springframework.beans.factory.annotation.Autowired; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; +import org.springframework.context.annotation.Import; +import org.springframework.security.access.AccessDecisionManager; import org.springframework.test.context.ActiveProfiles; import org.springframework.test.context.ContextConfiguration; -import org.springframework.test.context.junit4.AbstractJUnit4SpringContextTests; import org.springframework.test.context.web.WebAppConfiguration; import ubic.gemma.core.genome.gene.service.GeneService; import ubic.gemma.core.search.SearchException; import ubic.gemma.core.search.SearchResult; import ubic.gemma.core.search.SearchService; +import ubic.gemma.core.search.lucene.LuceneParseSearchException; import ubic.gemma.model.common.search.SearchSettings; import ubic.gemma.model.expression.arrayDesign.ArrayDesign; import ubic.gemma.model.genome.Gene; @@ -26,10 +32,15 @@ import ubic.gemma.persistence.service.genome.ChromosomeService; import ubic.gemma.persistence.service.genome.taxon.TaxonService; import ubic.gemma.persistence.util.TestComponent; +import ubic.gemma.rest.analytics.AnalyticsProvider; +import ubic.gemma.rest.util.Assertions; +import ubic.gemma.rest.util.BaseJerseyTest; +import ubic.gemma.rest.util.JacksonConfig; import ubic.gemma.rest.util.args.*; import javax.ws.rs.BadRequestException; import javax.ws.rs.NotFoundException; +import javax.ws.rs.core.Response; import java.util.Collection; import java.util.Collections; import java.util.stream.Collectors; @@ -41,10 +52,11 @@ @ActiveProfiles("web") @WebAppConfiguration @ContextConfiguration -public class SearchWebServiceTest extends AbstractJUnit4SpringContextTests { +public class SearchWebServiceTest extends BaseJerseyTest { @Configuration @TestComponent + @Import(JacksonConfig.class) public static class SearchWebServiceTestContextConfiguration { @Bean @@ -76,6 +88,16 @@ public TaxonArgService taxonArgService( TaxonService taxonService ) { public PlatformArgService platformArgService( ArrayDesignService arrayDesignService ) { return new PlatformArgService( arrayDesignService, mock( ExpressionExperimentService.class ), mock( CompositeSequenceService.class ) ); } + + @Bean + public AnalyticsProvider analyticsProvider() { + return mock(); + } + + @Bean + public AccessDecisionManager accessDecisionManager() { + return mock(); + } } @Autowired @@ -91,7 +113,7 @@ public PlatformArgService platformArgService( ArrayDesignService arrayDesignServ private Gene gene; @Before - public void setUp() { + public void setUpMocks() { gene = new Gene(); gene.setId( 1L ); gene.setOfficialSymbol( "BRCA1" ); @@ -204,4 +226,22 @@ public void testSearchWhenUnknownPlatformIsProvided() { public void testSearchWhenUnsupportedResultTypeIsProvided() { searchWebService.search( QueryArg.valueOf( "brain" ), null, null, Collections.singletonList( "ubic.gemma.model.expression.designElement.CompositeSequence2" ), LimitArg.valueOf( "20" ), null ); } + + @Test + public void testSearchWithInvalidQuery() throws SearchException { + when( searchService.search( any() ) ).thenAnswer( a -> { + try { + new QueryParser( Version.LUCENE_36, "", new PassThroughAnalyzer( Version.LUCENE_36 ) ) + .parse( a.getArgument( 0, SearchSettings.class ).getQuery() ); + } catch ( ParseException e ) { + throw new LuceneParseSearchException( "\"", e.getMessage(), e ); + } + return mock(); + } ); + Assertions.assertThat( target( "/search" ).queryParam( "query", "\"" ).request().get() ) + .hasStatus( Response.Status.BAD_REQUEST ) + .entity() + .hasFieldOrPropertyWithValue( "error.code", 400 ) + .hasFieldOrPropertyWithValue( "error.message", "Cannot parse '\"': Lexical error at line 1, column 2. Encountered: after : \"\"" ); + } } \ No newline at end of file diff --git a/gemma-rest/src/test/java/ubic/gemma/rest/util/ResponseAssert.java b/gemma-rest/src/test/java/ubic/gemma/rest/util/ResponseAssert.java index 33fa22ab44..52fe654d19 100644 --- a/gemma-rest/src/test/java/ubic/gemma/rest/util/ResponseAssert.java +++ b/gemma-rest/src/test/java/ubic/gemma/rest/util/ResponseAssert.java @@ -100,6 +100,10 @@ public ObjectAssert entityAs( Class clazz ) { } } + public StringAssert entityAsString() { + return new StringAssert( actual.readEntity( String.class ) ); + } + public InputStreamAssert entityAsStream() { return new InputStreamAssert( actual.readEntity( InputStream.class ) ); } diff --git a/gemma-web/src/main/java/ubic/gemma/web/controller/expression/experiment/AnnotationController.java b/gemma-web/src/main/java/ubic/gemma/web/controller/expression/experiment/AnnotationController.java index fcd4220c5d..2edb2543dd 100644 --- a/gemma-web/src/main/java/ubic/gemma/web/controller/expression/experiment/AnnotationController.java +++ b/gemma-web/src/main/java/ubic/gemma/web/controller/expression/experiment/AnnotationController.java @@ -28,6 +28,7 @@ import ubic.basecode.ontology.model.OntologyTerm; import ubic.gemma.core.job.executor.webapp.TaskRunningService; import ubic.gemma.core.ontology.OntologyService; +import ubic.gemma.core.search.ParseSearchException; import ubic.gemma.core.search.SearchException; import ubic.gemma.model.common.description.Characteristic; import ubic.gemma.model.common.description.CharacteristicValueObject; @@ -40,6 +41,7 @@ import ubic.gemma.persistence.service.genome.taxon.TaxonService; import ubic.gemma.web.util.EntityNotFoundException; +import javax.ws.rs.InternalServerErrorException; import java.util.Collection; import java.util.HashSet; import java.util.Set; @@ -139,8 +141,10 @@ public Collection findTerm( String givenQueryString, } return sortedResults; + } catch ( ParseSearchException e ) { + throw new IllegalArgumentException( e.getMessage(), e ); } catch ( SearchException e ) { - throw new IllegalArgumentException( "Invalid search query.", e ); + throw new InternalServerErrorException( e ); } } diff --git a/gemma-web/src/main/java/ubic/gemma/web/controller/genome/gene/GeneSetController.java b/gemma-web/src/main/java/ubic/gemma/web/controller/genome/gene/GeneSetController.java index 5f144c029d..294ac7dc3f 100755 --- a/gemma-web/src/main/java/ubic/gemma/web/controller/genome/gene/GeneSetController.java +++ b/gemma-web/src/main/java/ubic/gemma/web/controller/genome/gene/GeneSetController.java @@ -29,6 +29,7 @@ import org.springframework.web.servlet.ModelAndView; import ubic.gemma.core.genome.gene.SessionBoundGeneSetValueObject; import ubic.gemma.core.genome.gene.service.GeneSetService; +import ubic.gemma.core.search.ParseSearchException; import ubic.gemma.core.search.SearchException; import ubic.gemma.model.genome.TaxonValueObject; import ubic.gemma.model.genome.gene.DatabaseBackedGeneSetValueObject; @@ -38,6 +39,7 @@ import ubic.gemma.web.util.EntityNotFoundException; import javax.servlet.http.HttpServletRequest; +import javax.ws.rs.InternalServerErrorException; import java.util.ArrayList; import java.util.Collection; import java.util.HashSet; @@ -190,8 +192,10 @@ public Collection findGeneSetsByGene( Long geneId ) { public Collection findGeneSetsByName( String query, Long taxonId ) { try { return geneSetService.findGeneSetsByName( query, taxonId ); + } catch ( ParseSearchException e ) { + throw new IllegalArgumentException( e.getMessage(), e ); } catch ( SearchException e ) { - throw new IllegalArgumentException( "Invalid search query.", e ); + throw new InternalServerErrorException( e ); } } From 24eaeb0441bdc1fb20d67490d94670006e96c77c Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Wed, 3 Apr 2024 12:14:24 -0700 Subject: [PATCH 102/105] ArrayDesignSequenceProcessorTest.testFetchAndLoadWithIdentifiers for now --- .../arrayDesign/ArrayDesignSequenceProcessorTest.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gemma-core/src/test/java/ubic/gemma/core/loader/expression/arrayDesign/ArrayDesignSequenceProcessorTest.java b/gemma-core/src/test/java/ubic/gemma/core/loader/expression/arrayDesign/ArrayDesignSequenceProcessorTest.java index 00bb31c100..ebd02b2597 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/loader/expression/arrayDesign/ArrayDesignSequenceProcessorTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/loader/expression/arrayDesign/ArrayDesignSequenceProcessorTest.java @@ -21,6 +21,7 @@ import org.apache.commons.lang3.StringUtils; import org.junit.Assume; import org.junit.Before; +import org.junit.Ignore; import org.junit.Test; import org.springframework.beans.factory.annotation.Autowired; import ubic.basecode.util.FileTools; @@ -121,6 +122,7 @@ public void testAssignSequencesToDesignElementsMissingSequence() throws Exceptio } @Test + @Ignore("See https://github.com/PavlidisLab/Gemma/issues/1082 for details") public void testFetchAndLoadWithIdentifiers() throws Exception { String fastacmdExe = Settings.getString( SimpleFastaCmd.FASTA_CMD_ENV_VAR ); Assume.assumeTrue( "No fastacmd executable is configured, skipping test.", fastacmdExe != null ); From 762b6ec8d3353f8fcbf4e3dc328a6cedd1594f02 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Wed, 3 Apr 2024 12:33:45 -0700 Subject: [PATCH 103/105] Make sure that DNF terms containing spaces or special characters are properly escaped Full-text search against the ontology is done only using exact matches on terms and limited boolean logic. Thus, when a subclause contains special characters or spaces, those must be escaped or quoted. Rename extractDnf to extractTermsDnf to make it clear this this will only pull down the terms and not prefix, wildcards, etc. --- .../core/search/lucene/LuceneQueryUtils.java | 4 +-- .../search/source/OntologySearchSource.java | 16 +++++++++--- .../search/lucene/LuceneQueryUtilsTest.java | 26 ++++++++++++------- .../source/OntologySearchSourceTest.java | 3 ++- 4 files changed, 33 insertions(+), 16 deletions(-) diff --git a/gemma-core/src/main/java/ubic/gemma/core/search/lucene/LuceneQueryUtils.java b/gemma-core/src/main/java/ubic/gemma/core/search/lucene/LuceneQueryUtils.java index 739b8bc88d..f7c9e3112f 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/search/lucene/LuceneQueryUtils.java +++ b/gemma-core/src/main/java/ubic/gemma/core/search/lucene/LuceneQueryUtils.java @@ -79,14 +79,14 @@ private static void extractTerms( Query query, Set terms ) { } /** - * Extract a DNF (Disjunctive Normal Form) from the query. + * Extract a DNF (Disjunctive Normal Form) from the terms of a query. *

* Clauses can be nested (i.e. {@code a OR (d OR (c AND (d AND e))}) as long as {@code OR} and {@code AND} are not * interleaved. *

* Prohibited clauses are ignored unless they break the DNF structure, in which case this will return an empty set. */ - public static Set> extractDnf( SearchSettings settings ) throws SearchException { + public static Set> extractTermsDnf( SearchSettings settings ) throws SearchException { Query q = parseSafely( settings, createQueryParser() ); Set> result; if ( q instanceof BooleanQuery ) { diff --git a/gemma-core/src/main/java/ubic/gemma/core/search/source/OntologySearchSource.java b/gemma-core/src/main/java/ubic/gemma/core/search/source/OntologySearchSource.java index 9a89abbd5b..66875060da 100644 --- a/gemma-core/src/main/java/ubic/gemma/core/search/source/OntologySearchSource.java +++ b/gemma-core/src/main/java/ubic/gemma/core/search/source/OntologySearchSource.java @@ -4,6 +4,7 @@ import lombok.Value; import lombok.extern.apachecommons.CommonsLog; import org.apache.commons.lang3.time.StopWatch; +import org.apache.lucene.queryParser.QueryParser; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Component; import org.springframework.util.StringUtils; @@ -24,7 +25,7 @@ import java.net.URI; import java.util.*; -import static ubic.gemma.core.search.lucene.LuceneQueryUtils.extractDnf; +import static ubic.gemma.core.search.lucene.LuceneQueryUtils.extractTermsDnf; import static ubic.gemma.core.search.lucene.LuceneQueryUtils.prepareTermUriQuery; @Component @@ -84,7 +85,7 @@ public Collection> searchExpressionExperiment * * But if they put in Parkinson's disease we don't want to do two queries. */ - Set> subclauses = extractDnf( settings ); + Set> subclauses = extractTermsDnf( settings ); for ( Set subclause : subclauses ) { Collection> classResults = this.searchExpressionExperiments( settings, subclause ); if ( !classResults.isEmpty() ) { @@ -121,7 +122,16 @@ private SearchResultSet searchExpressionExperiments( Searc OntologySearchSource.log.debug( "Starting characteristic search for: " + settings + " matching " + String.join( " AND ", clause ) ); for ( String subClause : clause ) { - SearchResultSet subqueryResults = doSearchExpressionExperiment( settings.withQuery( subClause ) ); + // at this point, subclauses have already been parsed, so if they contain special characters, those must be + // escaped + String subClauseQuery = QueryParser.escape( subClause ); + // spaces should be quoted + if ( subClauseQuery.contains( " " ) ) { + subClauseQuery = "\"" + subClauseQuery + "\""; + } + SearchResultSet subqueryResults = doSearchExpressionExperiment( + settings.withQuery( subClauseQuery ) + ); if ( results.isEmpty() ) { results.addAll( subqueryResults ); } else { diff --git a/gemma-core/src/test/java/ubic/gemma/core/search/lucene/LuceneQueryUtilsTest.java b/gemma-core/src/test/java/ubic/gemma/core/search/lucene/LuceneQueryUtilsTest.java index 5b52dbf964..7209a6d502 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/search/lucene/LuceneQueryUtilsTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/search/lucene/LuceneQueryUtilsTest.java @@ -24,38 +24,44 @@ public void testExtractTerms() throws SearchException { @Test public void testExtractDnf() throws SearchException { - assertThat( extractDnf( SearchSettings.geneSearch( "BRCA1 OR (BRCA2 AND BRCA3) OR NOT BRCA4 OR -BRCA5 OR (BRCA6 OR BRCA7)", null ) ) ) + assertThat( extractTermsDnf( SearchSettings.geneSearch( "BRCA1 OR (BRCA2 AND BRCA3) OR NOT BRCA4 OR -BRCA5 OR (BRCA6 OR BRCA7)", null ) ) ) .containsExactlyInAnyOrder( set( "BRCA1" ), set( "BRCA2", "BRCA3" ), set( "BRCA6" ), set( "BRCA7" ) ); - assertThat( extractDnf( SearchSettings.geneSearch( "BRCA1 AND BRCA2", null ) ) ) + assertThat( extractTermsDnf( SearchSettings.geneSearch( "BRCA1 AND BRCA2", null ) ) ) .containsExactlyInAnyOrder( set( "BRCA1", "BRCA2" ) ); - assertThat( extractDnf( SearchSettings.geneSearch( "NOT BRCA1 AND NOT BRCA2", null ) ) ) + assertThat( extractTermsDnf( SearchSettings.geneSearch( "NOT BRCA1 AND NOT BRCA2", null ) ) ) .isEmpty(); - assertThat( extractDnf( SearchSettings.geneSearch( "NOT BRCA1 OR NOT BRCA2", null ) ) ) + assertThat( extractTermsDnf( SearchSettings.geneSearch( "NOT BRCA1 OR NOT BRCA2", null ) ) ) .isEmpty(); - assertThat( extractDnf( SearchSettings.geneSearch( "BRCA1 AND NOT BRCA2", null ) ) ) + assertThat( extractTermsDnf( SearchSettings.geneSearch( "BRCA1 AND NOT BRCA2", null ) ) ) .containsExactly( set( "BRCA1" ) ); - assertThat( extractDnf( SearchSettings.geneSearch( "BRCA1 OR NOT (BRCA2 AND BRCA3)", null ) ) ) + assertThat( extractTermsDnf( SearchSettings.geneSearch( "BRCA1 OR NOT (BRCA2 AND BRCA3)", null ) ) ) .containsExactly( set( "BRCA1" ) ); - assertThat( extractDnf( SearchSettings.geneSearch( "BRCA1 AND (BRCA2 OR BRCA3)", null ) ) ) + assertThat( extractTermsDnf( SearchSettings.geneSearch( "BRCA1 AND (BRCA2 OR BRCA3)", null ) ) ) .isEmpty(); } + @Test + public void testExtractDnfWithQuotedSpaces() throws SearchException { + assertThat( extractTermsDnf( SearchSettings.geneSearch( "\"alpha beta\" OR \"gamma delta\"", null ) ) ) + .containsExactlyInAnyOrder( set( "alpha beta" ), set( "gamma delta" ) ); + } + @Test public void testExtractDnfWithNestedOrInClause() throws SearchException { - assertThat( extractDnf( SearchSettings.geneSearch( "BRCA1 OR (BRCA2 OR (BRCA3 AND BRCA4))", null ) ) ) + assertThat( extractTermsDnf( SearchSettings.geneSearch( "BRCA1 OR (BRCA2 OR (BRCA3 AND BRCA4))", null ) ) ) .containsExactlyInAnyOrder( set( "BRCA1" ), set( "BRCA2" ), set( "BRCA3", "BRCA4" ) ); } @Test public void testExtractDnfWithNestedAndInSubClause() throws SearchException { - assertThat( extractDnf( SearchSettings.geneSearch( "BRCA1 OR (BRCA2 AND (BRCA3 AND BRCA4))", null ) ) ) + assertThat( extractTermsDnf( SearchSettings.geneSearch( "BRCA1 OR (BRCA2 AND (BRCA3 AND BRCA4))", null ) ) ) .containsExactlyInAnyOrder( set( "BRCA1" ), set( "BRCA2", "BRCA3", "BRCA4" ) ); } @Test public void testExtractDnfWithUris() throws SearchException { // this is an important case for searching datasets by ontology terms - assertThat( extractDnf( SearchSettings.geneSearch( "http://example.com/GO:1234 OR http://example.com/GO:1235", null ) ) ) + assertThat( extractTermsDnf( SearchSettings.geneSearch( "http://example.com/GO:1234 OR http://example.com/GO:1235", null ) ) ) .contains( set( "http://example.com/GO:1234" ), set( "http://example.com/GO:1235" ) ); } diff --git a/gemma-core/src/test/java/ubic/gemma/core/search/source/OntologySearchSourceTest.java b/gemma-core/src/test/java/ubic/gemma/core/search/source/OntologySearchSourceTest.java index 7c1ddedb30..9447253cc2 100644 --- a/gemma-core/src/test/java/ubic/gemma/core/search/source/OntologySearchSourceTest.java +++ b/gemma-core/src/test/java/ubic/gemma/core/search/source/OntologySearchSourceTest.java @@ -137,11 +137,12 @@ public Map highlightTerm( @Nullable String termUri, String termL @Test public void testSearchExpressionExperimentWithBooleanQuery() throws SearchException { - ontologySearchSource.searchExpressionExperiment( SearchSettings.expressionExperimentSearch( "a OR (b AND c) OR http://example.com/d" ) ); + ontologySearchSource.searchExpressionExperiment( SearchSettings.expressionExperimentSearch( "a OR (b AND c) OR http://example.com/d OR \"a quoted string containing an escaped quote \\\"\"" ) ); verify( ontologyService ).findTerms( "a" ); verify( ontologyService ).findTerms( "b" ); verify( ontologyService ).findTerms( "c" ); verify( ontologyService ).getTerm( "http://example.com/d" ); + verify( ontologyService ).findTerms( "\"a quoted string containing an escaped quote \\\"\"" ); verifyNoMoreInteractions( ontologyService ); } From 3581a1b1e83ebcf66f8e63b7b832ca9466c69129 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Wed, 3 Apr 2024 13:55:53 -0700 Subject: [PATCH 104/105] Update baseCode to 1.1.21 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index ae3bd4ad82..4757a23729 100644 --- a/pom.xml +++ b/pom.xml @@ -140,7 +140,7 @@ baseCode baseCode - 1.1.21-SNAPSHOT + 1.1.21 From 61606fbca6f48e3049f795c9847a3a872a77d837 Mon Sep 17 00:00:00 2001 From: Guillaume Poirier-Morency Date: Wed, 3 Apr 2024 15:24:00 -0700 Subject: [PATCH 105/105] Update for next development version --- gemma-cli/pom.xml | 2 +- gemma-core/pom.xml | 2 +- gemma-groovy-support/pom.xml | 2 +- gemma-rest/pom.xml | 2 +- gemma-web/pom.xml | 2 +- pom.xml | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/gemma-cli/pom.xml b/gemma-cli/pom.xml index 6668939e41..3b55dacd79 100644 --- a/gemma-cli/pom.xml +++ b/gemma-cli/pom.xml @@ -3,7 +3,7 @@ gemma gemma - 1.31.3-SNAPSHOT + 1.31.3 4.0.0 gemma-cli diff --git a/gemma-core/pom.xml b/gemma-core/pom.xml index f5c1a086bc..d4e5be3677 100644 --- a/gemma-core/pom.xml +++ b/gemma-core/pom.xml @@ -3,7 +3,7 @@ gemma gemma - 1.31.3-SNAPSHOT + 1.31.3 4.0.0 gemma-core diff --git a/gemma-groovy-support/pom.xml b/gemma-groovy-support/pom.xml index 4e833c2f79..e86cae1aeb 100644 --- a/gemma-groovy-support/pom.xml +++ b/gemma-groovy-support/pom.xml @@ -6,7 +6,7 @@ gemma gemma - 1.31.3-SNAPSHOT + 1.31.3 gemma-groovy-support diff --git a/gemma-rest/pom.xml b/gemma-rest/pom.xml index 215975dadb..ece62c2904 100644 --- a/gemma-rest/pom.xml +++ b/gemma-rest/pom.xml @@ -5,7 +5,7 @@ gemma gemma - 1.31.3-SNAPSHOT + 1.31.3 4.0.0 diff --git a/gemma-web/pom.xml b/gemma-web/pom.xml index 2a0e13a172..a5ce47a550 100644 --- a/gemma-web/pom.xml +++ b/gemma-web/pom.xml @@ -3,7 +3,7 @@ gemma gemma - 1.31.3-SNAPSHOT + 1.31.3 4.0.0 gemma-web diff --git a/pom.xml b/pom.xml index 4757a23729..7d9e1d9478 100644 --- a/pom.xml +++ b/pom.xml @@ -5,7 +5,7 @@ Gemma gemma gemma - 1.31.3-SNAPSHOT + 1.31.3 2005 The Gemma Project for meta-analysis of genomics data https://gemma.msl.ubc.ca