From 2e01c57af486e562df88759412e36576d6f9e768 Mon Sep 17 00:00:00 2001 From: himeshr Date: Mon, 22 Jul 2024 19:21:23 +0530 Subject: [PATCH] #102 | Refactor MediaAnalysis flow --- .../etl/domain/metadata/SchemaMetadata.java | 8 + .../etl/domain/metadata/TableMetadata.java | 13 ++ .../avniproject/etl/dto/MediaAnalysisVO.java | 53 +++++++ .../etl/repository/MediaTableRepository.java | 8 +- .../repository/SchemaMetadataRepository.java | 7 +- .../MediaAnalysisTableMetadataBuilder.java | 19 +++ .../tableMappers/MediaAnalysisTable.java | 27 ++++ .../service/MediaTableRepositoryService.java | 22 ++- .../sql/MediaSearchQueryBuilder.java | 8 + .../MediaAnalysisTableRegenerateAction.java | 143 ++++++++++++++++++ .../etl/service/MediaAnalysisService.java | 66 ++------ .../avniproject/etl/service/SyncService.java | 3 +- src/main/resources/sql/api/searchMedia.sql.st | 2 + .../resources/sql/etl/mediaAnalysis.sql.st | 3 + .../SchemaMetadataRepositoryTest.java | 11 ++ 15 files changed, 325 insertions(+), 68 deletions(-) create mode 100644 src/main/java/org/avniproject/etl/dto/MediaAnalysisVO.java create mode 100644 src/main/java/org/avniproject/etl/repository/rowMappers/MediaAnalysisTableMetadataBuilder.java create mode 100644 src/main/java/org/avniproject/etl/repository/rowMappers/tableMappers/MediaAnalysisTable.java create mode 100644 src/main/java/org/avniproject/etl/repository/sync/MediaAnalysisTableRegenerateAction.java create mode 100644 src/main/resources/sql/etl/mediaAnalysis.sql.st diff --git a/src/main/java/org/avniproject/etl/domain/metadata/SchemaMetadata.java b/src/main/java/org/avniproject/etl/domain/metadata/SchemaMetadata.java index 1d563e2..cfd6438 100644 --- a/src/main/java/org/avniproject/etl/domain/metadata/SchemaMetadata.java +++ b/src/main/java/org/avniproject/etl/domain/metadata/SchemaMetadata.java @@ -97,6 +97,14 @@ public List getAllEncounterTableNames() { return encounterTableNames; } + public Optional getMediaTable() { + return tableMetadata.stream().filter(TableMetadata::isMediaTable).findFirst(); + } + + public Optional getMediaAnalysisTable() { + return tableMetadata.stream().filter(TableMetadata::isMediaAnalysisTable).findFirst(); + } + private List findChanges(SchemaMetadata currentSchema, TableMetadata newTable) { List diffs = new ArrayList<>(); Optional optionalMatchingTable = currentSchema.findMatchingTable(newTable); diff --git a/src/main/java/org/avniproject/etl/domain/metadata/TableMetadata.java b/src/main/java/org/avniproject/etl/domain/metadata/TableMetadata.java index e685644..b6e1fb4 100644 --- a/src/main/java/org/avniproject/etl/domain/metadata/TableMetadata.java +++ b/src/main/java/org/avniproject/etl/domain/metadata/TableMetadata.java @@ -247,6 +247,7 @@ public enum Type { IndividualEncounterCancellation, Address, Media, + MediaAnalysis, ManualProgramEnrolmentEligibility, GroupToMember, HouseholdToMember, @@ -273,6 +274,18 @@ public boolean isSubjectTable() { return Arrays.asList(Type.Individual, Type.Person, Type.Household, Type.Group).contains(this.type); } + public boolean isMediaTable() { + return (Type.Media).equals(this.type); + } + + public boolean isMediaAnalysisTable() { + return (Type.MediaAnalysis).equals(this.type); + } + + public boolean isPartOfRegularSync() { + return !isMediaAnalysisTable(); + } + private void addIndexMetadata(IndexMetadata indexMetadata) { this.indexMetadataList.add(indexMetadata); } diff --git a/src/main/java/org/avniproject/etl/dto/MediaAnalysisVO.java b/src/main/java/org/avniproject/etl/dto/MediaAnalysisVO.java new file mode 100644 index 0000000..7732432 --- /dev/null +++ b/src/main/java/org/avniproject/etl/dto/MediaAnalysisVO.java @@ -0,0 +1,53 @@ +package org.avniproject.etl.dto; + +import java.util.Objects; + +public class MediaAnalysisVO { + + String uuid; + String image_url; + boolean isValidUrl; + boolean isPresentInStorage; + boolean isThumbnailGenerated; + + public MediaAnalysisVO(String uuid, String image_url, boolean isValidUrl, boolean isPresentInStorage, boolean isThumbnailGenerated) { + this.uuid = uuid; + this.image_url = image_url; + this.isValidUrl = isValidUrl; + this.isPresentInStorage = isPresentInStorage; + this.isThumbnailGenerated = isThumbnailGenerated; + } + + public String getUuid() { + return uuid; + } + + public String getImage_url() { + return image_url; + } + + public boolean isValidUrl() { + return isValidUrl; + } + + public boolean isPresentInStorage() { + return isPresentInStorage; + } + + public boolean isThumbnailGenerated() { + return isThumbnailGenerated; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (!(o instanceof MediaAnalysisVO)) return false; + MediaAnalysisVO that = (MediaAnalysisVO) o; + return uuid.equals(that.uuid); + } + + @Override + public int hashCode() { + return Objects.hash(uuid); + } +} diff --git a/src/main/java/org/avniproject/etl/repository/MediaTableRepository.java b/src/main/java/org/avniproject/etl/repository/MediaTableRepository.java index 2d119a8..77b6274 100644 --- a/src/main/java/org/avniproject/etl/repository/MediaTableRepository.java +++ b/src/main/java/org/avniproject/etl/repository/MediaTableRepository.java @@ -84,9 +84,13 @@ private List searchInternal(MediaSearchRequest mediaSearchRequest, Page p .query(query.sql(), query.parameters(), rowMapper), jdbcTemplate); } - - public List getImageData(MediaSearchRequest mediaSearchRequest, Page page) { return searchInternal(mediaSearchRequest, page, (rs, rowNum) -> mediaTableRepositoryService.setImageData(rs)); } + + public List getAllMedia() { + Query query = new MediaSearchQueryBuilder().allWithoutAnyLimitOrOffset().build(); + return runInSchemaUserContext(() -> new NamedParameterJdbcTemplate(jdbcTemplate) + .query(query.sql(), query.parameters(), (rs, rowNum) -> mediaTableRepositoryService.setMediaDto(rs, false)), jdbcTemplate); + } } diff --git a/src/main/java/org/avniproject/etl/repository/SchemaMetadataRepository.java b/src/main/java/org/avniproject/etl/repository/SchemaMetadataRepository.java index 9427615..da5a2f4 100644 --- a/src/main/java/org/avniproject/etl/repository/SchemaMetadataRepository.java +++ b/src/main/java/org/avniproject/etl/repository/SchemaMetadataRepository.java @@ -6,11 +6,7 @@ import org.avniproject.etl.domain.metadata.SchemaMetadata; import org.avniproject.etl.domain.metadata.TableMetadata; import org.avniproject.etl.domain.metadata.diff.Diff; -import org.avniproject.etl.repository.rowMappers.ColumnMetadataMapper; -import org.avniproject.etl.repository.rowMappers.MediaTableMetadataBuilder; -import org.avniproject.etl.repository.rowMappers.SyncTelemetryTableMetadataBuilder; -import org.avniproject.etl.repository.rowMappers.UserTableMetadataBuilder; -import org.avniproject.etl.repository.rowMappers.TableMetadataMapper; +import org.avniproject.etl.repository.rowMappers.*; import org.avniproject.etl.repository.rowMappers.tableMappers.AddressTable; import org.avniproject.etl.repository.rowMappers.tableMappers.ChecklistTable; import org.springframework.beans.factory.annotation.Autowired; @@ -45,6 +41,7 @@ public SchemaMetadata getNewSchemaMetadata() { List tables = new ArrayList<>(getFormTables()); tables.add(getAddressTable()); tables.add(MediaTableMetadataBuilder.build()); + tables.add(MediaAnalysisTableMetadataBuilder.build()); tables.add(SyncTelemetryTableMetadataBuilder.build()); tables.add(UserTableMetadataBuilder.build()); tables.addAll(getGroupSubjectTables()); diff --git a/src/main/java/org/avniproject/etl/repository/rowMappers/MediaAnalysisTableMetadataBuilder.java b/src/main/java/org/avniproject/etl/repository/rowMappers/MediaAnalysisTableMetadataBuilder.java new file mode 100644 index 0000000..2f953fe --- /dev/null +++ b/src/main/java/org/avniproject/etl/repository/rowMappers/MediaAnalysisTableMetadataBuilder.java @@ -0,0 +1,19 @@ +package org.avniproject.etl.repository.rowMappers; + +import org.avniproject.etl.domain.metadata.ColumnMetadata; +import org.avniproject.etl.domain.metadata.TableMetadata; +import org.avniproject.etl.repository.rowMappers.tableMappers.MediaAnalysisTable; + +import java.util.stream.Collectors; + +public class MediaAnalysisTableMetadataBuilder { + public static TableMetadata build() { + TableMetadata mediaAnalysisTableMetadata = new TableMetadata(); + MediaAnalysisTable mediaAnalysisTable = new MediaAnalysisTable(); + mediaAnalysisTableMetadata.setName(mediaAnalysisTable.name(null)); + mediaAnalysisTableMetadata.setType(TableMetadata.Type.MediaAnalysis); + mediaAnalysisTableMetadata.addColumnMetadata(mediaAnalysisTable.columns().stream().map(column -> new ColumnMetadata(column, null, null, null)).collect(Collectors.toList())); + + return mediaAnalysisTableMetadata; + } +} diff --git a/src/main/java/org/avniproject/etl/repository/rowMappers/tableMappers/MediaAnalysisTable.java b/src/main/java/org/avniproject/etl/repository/rowMappers/tableMappers/MediaAnalysisTable.java new file mode 100644 index 0000000..13b6031 --- /dev/null +++ b/src/main/java/org/avniproject/etl/repository/rowMappers/tableMappers/MediaAnalysisTable.java @@ -0,0 +1,27 @@ +package org.avniproject.etl.repository.rowMappers.tableMappers; + +import org.avniproject.etl.domain.metadata.Column; + +import java.util.Arrays; +import java.util.List; +import java.util.Map; + +public class MediaAnalysisTable extends Table { + @Override + public String name(Map tableDetails) { + return "media_analysis"; + } + + @Override + public List columns() { + return new Columns() + .withColumns(Arrays.asList( + new Column("uuid", Column.Type.text, Column.ColumnType.index), + new Column("image_url", Column.Type.text), + new Column("is_valid_url", Column.Type.bool), + new Column("is_present_in_storage", Column.Type.bool), + new Column("is_thumbnail_generated", Column.Type.bool) + )) + .build(); + } +} \ No newline at end of file diff --git a/src/main/java/org/avniproject/etl/repository/service/MediaTableRepositoryService.java b/src/main/java/org/avniproject/etl/repository/service/MediaTableRepositoryService.java index 6d9f04d..4ade50e 100644 --- a/src/main/java/org/avniproject/etl/repository/service/MediaTableRepositoryService.java +++ b/src/main/java/org/avniproject/etl/repository/service/MediaTableRepositoryService.java @@ -24,22 +24,28 @@ public MediaTableRepositoryService(AmazonClientService amazonClientService) { } public MediaDTO setMediaDto(ResultSet rs) { + return this.setMediaDto(rs, true); + } + + public MediaDTO setMediaDto(ResultSet rs, boolean generateSignedUrls) { try { String imageUrl = rs.getString("image_url"); String thumbnailUrl = Utils.getThumbnailUrl(imageUrl); URL signedImageUrl = null, signedThumbnailUrl = null; - try { - signedImageUrl = amazonClientService.generateMediaDownloadUrl(imageUrl); + if(generateSignedUrls) { try { - signedThumbnailUrl = amazonClientService.generateMediaDownloadUrl(thumbnailUrl); - } catch (S3FileDoesNotExist ignored) { + signedImageUrl = amazonClientService.generateMediaDownloadUrl(imageUrl); + try { + signedThumbnailUrl = amazonClientService.generateMediaDownloadUrl(thumbnailUrl); + } catch (S3FileDoesNotExist ignored) { + } + } catch (IllegalArgumentException illegalArgumentException) { + //Ignore and move on. Image will be null + } catch (S3FileDoesNotExist e) { + throw new RuntimeException(e); } - } catch (IllegalArgumentException illegalArgumentException) { - //Ignore and move on. Image will be null - } catch (S3FileDoesNotExist e) { - throw new RuntimeException(e); } String uuid = rs.getString("uuid"); diff --git a/src/main/java/org/avniproject/etl/repository/sql/MediaSearchQueryBuilder.java b/src/main/java/org/avniproject/etl/repository/sql/MediaSearchQueryBuilder.java index 092820d..314fe35 100644 --- a/src/main/java/org/avniproject/etl/repository/sql/MediaSearchQueryBuilder.java +++ b/src/main/java/org/avniproject/etl/repository/sql/MediaSearchQueryBuilder.java @@ -72,6 +72,14 @@ public MediaSearchQueryBuilder withPage(Page page) { return this; } + public MediaSearchQueryBuilder allWithoutAnyLimitOrOffset() { + template.add("joinTablesAndColumns", null); + template.add("request", null); + parameters.put("offset", 0); + parameters.put("limit", Long.MAX_VALUE); + return this; + } + public Query build() { String str = template.render(); logger.debug(str); diff --git a/src/main/java/org/avniproject/etl/repository/sync/MediaAnalysisTableRegenerateAction.java b/src/main/java/org/avniproject/etl/repository/sync/MediaAnalysisTableRegenerateAction.java new file mode 100644 index 0000000..981b081 --- /dev/null +++ b/src/main/java/org/avniproject/etl/repository/sync/MediaAnalysisTableRegenerateAction.java @@ -0,0 +1,143 @@ +package org.avniproject.etl.repository.sync; + +import org.apache.log4j.Logger; +import org.avniproject.etl.config.AmazonClientService; +import org.avniproject.etl.domain.NullObject; +import org.avniproject.etl.domain.OrgIdentityContextHolder; +import org.avniproject.etl.domain.Organisation; +import org.avniproject.etl.domain.metadata.TableMetadata; +import org.avniproject.etl.dto.MediaAnalysisVO; +import org.avniproject.etl.dto.MediaDTO; +import org.avniproject.etl.repository.MediaTableRepository; +import org.avniproject.etl.repository.sql.SqlFile; +import org.avniproject.etl.service.MediaAnalysisService; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.jdbc.core.BatchPreparedStatementSetter; +import org.springframework.jdbc.core.JdbcTemplate; +import org.springframework.jdbc.core.ParameterizedPreparedStatementSetter; +import org.springframework.stereotype.Repository; +import org.stringtemplate.v4.ST; + +import java.sql.PreparedStatement; +import java.sql.SQLException; +import java.util.List; +import java.util.Map; +import java.util.function.Predicate; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + +import static org.avniproject.etl.repository.JdbcContextWrapper.runInOrgContext; + +@Repository +public class MediaAnalysisTableRegenerateAction { + public static final String THUMBNAILS_PATTERN = "thumbnails"; + public static final String ADHOC_MOBILE_DB_BACKUP_PATTERN = "Adhoc|MobileDbBackup"; + public static final String UUID_V4_PATTERN = "[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}"; + + private final AmazonClientService amazonClientService; + private final MediaTableRepository mediaTableRepository; + private final JdbcTemplate jdbcTemplate; + private static final String generateMediaAnalysisTableTemplate = SqlFile.readSqlFile("mediaAnalysis.sql.st"); + + private static final Logger log = Logger.getLogger(MediaAnalysisService.class); + + @Autowired + public MediaAnalysisTableRegenerateAction(AmazonClientService amazonClientService, MediaTableRepository mediaTableRepository, JdbcTemplate jdbcTemplate) { + this.amazonClientService = amazonClientService; + this.mediaTableRepository = mediaTableRepository; + this.jdbcTemplate = jdbcTemplate; + } + + public void process(Organisation organisation, TableMetadata tableMetadata) { + + List listOfAllMediaUrls = fetchValidMediaUrlsFromStorage(organisation); + Map> partitionResults = partitionListBasedOnThumbnailsPattern(listOfAllMediaUrls); + List listOfAllThumbnailsUrls = partitionResults.get(Boolean.TRUE); + List listOfAllMediaUrlsExcludingThumbnails = partitionResults.get(Boolean.FALSE); + log.info(String.format("listOfAllMediaUrls %d listOfAllMediaUrlsExcludingThumbnails %d listOfAllThumbnailsUrls %d", listOfAllMediaUrls.size(), listOfAllMediaUrlsExcludingThumbnails.size(), listOfAllThumbnailsUrls.size())); + + //TODO Log entries that get filtered out for dev purposes + // TODO: 17/07/24 Fetch list of MediaUrls from media table + // SELECT REPLACE(image_url, 'https://s3.ap-south-1.amazonaws.com/prod-user-media/goonj/', '') as image_url_in_media_table + // FROM goonj.media + // ORDER BY REPLACE(image_url, 'https://s3.ap-south-1.amazonaws.com/prod-user-media/goonj/', ''); + // TODO: 17/07/24 Invoke Analysis method to perform various metrics computations for each entry in media table of the org + //TODO Fix test issues causing build break + List listOfMediaEntities = mediaTableRepository.getAllMedia(); + String orgMediaDirectory = organisation.getOrganisationIdentity().getMediaDirectory(); + // TODO: 22/07/24 do + List mediaAnalysisVOS = listOfMediaEntities.stream().map(mediaDTO -> { + boolean isValidUrl = mediaDTO.url().contains(orgMediaDirectory); + String urlToSearch = mediaDTO.url().substring(mediaDTO.url().indexOf(orgMediaDirectory)); + boolean isPresentInStorage = listOfAllMediaUrlsExcludingThumbnails.contains(urlToSearch); + // TODO: 22/07/24 init booleans correctly + return new MediaAnalysisVO(mediaDTO.uuid(), mediaDTO.url(), isValidUrl, isPresentInStorage, false); + }).collect(Collectors.toList()); + log.info(String.format("listOfMediaEntities %d mediaAnalysisVOS %d ", listOfMediaEntities.size(), mediaAnalysisVOS.size())); + + truncateMediaAnalysisTable(tableMetadata); + generateMediaAnalysisTableEntries(tableMetadata, mediaAnalysisVOS); + } + + private void truncateMediaAnalysisTable(TableMetadata tableMetadata) { + String schema = OrgIdentityContextHolder.getDbSchema(); + String mediaAnalysisTable = tableMetadata.getName(); + String sql = new ST("delete from . where uuid is not null;") + .add("schemaName", wrapInQuotes(schema)) + .add("mediaAnalysisTable", wrapInQuotes(mediaAnalysisTable)) + .render(); + runInOrgContext(() -> { + jdbcTemplate.execute(sql); + return NullObject.instance(); + }, jdbcTemplate); + } + + private List fetchValidMediaUrlsFromStorage(Organisation organisation) { + List listOfAllMediaUrls = amazonClientService.listObjectsInBucket(getMediaDirectory(organisation)); + filterOutNonMediaUrls(listOfAllMediaUrls); + return listOfAllMediaUrls; + } + + private void filterOutNonMediaUrls(List listOfAllMediaUrls) { + Predicate fastSyncAndAdhocDumpPatternPredicate = Pattern.compile(ADHOC_MOBILE_DB_BACKUP_PATTERN, Pattern.CASE_INSENSITIVE).asPredicate(); + Predicate notUUIDPatternPredicate = Pattern.compile(UUID_V4_PATTERN).asPredicate().negate(); + listOfAllMediaUrls.removeIf(fastSyncAndAdhocDumpPatternPredicate.or(notUUIDPatternPredicate)); + } + + private Map> partitionListBasedOnThumbnailsPattern(List listOfAllMediaUrls) { + Predicate thumbnailsPatternPredicate = Pattern.compile(THUMBNAILS_PATTERN, Pattern.CASE_INSENSITIVE).asPredicate(); + Map> partitionResults= listOfAllMediaUrls.stream().collect(Collectors.partitioningBy(thumbnailsPatternPredicate)); + return partitionResults; + } + + private String getMediaDirectory(Organisation organisation) { + return organisation.getOrganisationIdentity().getMediaDirectory(); + } + + private void generateMediaAnalysisTableEntries(TableMetadata tableMetadata, List mediaAnalysisVOS) { + String schema = OrgIdentityContextHolder.getDbSchema(); + String mediaAnalysisTable = tableMetadata.getName(); + String sql = new ST(generateMediaAnalysisTableTemplate) + .add("schemaName", wrapInQuotes(schema)) + .add("mediaAnalysisTable", wrapInQuotes(mediaAnalysisTable)) + .render(); + runInOrgContext(() -> { + jdbcTemplate.batchUpdate(sql, + mediaAnalysisVOS, + 100, + (ps, mediaAnalysisVO) -> { + ps.setString(1, mediaAnalysisVO.getUuid()); + ps.setString(2, mediaAnalysisVO.getImage_url()); + ps.setBoolean(3, mediaAnalysisVO.isValidUrl()); + ps.setBoolean(4, mediaAnalysisVO.isPresentInStorage()); + ps.setBoolean(5, mediaAnalysisVO.isThumbnailGenerated()); + }); + return NullObject.instance(); + }, jdbcTemplate); + } + + private String wrapInQuotes(String parameter) { + return parameter == null ? "null" : "\"" + parameter + "\""; + } + +} diff --git a/src/main/java/org/avniproject/etl/service/MediaAnalysisService.java b/src/main/java/org/avniproject/etl/service/MediaAnalysisService.java index e3f2e54..a181550 100644 --- a/src/main/java/org/avniproject/etl/service/MediaAnalysisService.java +++ b/src/main/java/org/avniproject/etl/service/MediaAnalysisService.java @@ -1,42 +1,34 @@ package org.avniproject.etl.service; import org.apache.log4j.Logger; -import org.avniproject.etl.config.AmazonClientService; import org.avniproject.etl.config.EtlServiceConfig; import org.avniproject.etl.domain.OrgIdentityContextHolder; import org.avniproject.etl.domain.Organisation; import org.avniproject.etl.domain.OrganisationIdentity; +import org.avniproject.etl.domain.metadata.TableMetadata; import org.avniproject.etl.repository.OrganisationRepository; +import org.avniproject.etl.repository.sync.MediaAnalysisTableRegenerateAction; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; import java.util.List; -import java.util.Map; -import java.util.function.Predicate; -import java.util.regex.Pattern; -import java.util.stream.Collectors; +import java.util.Optional; @Service public class MediaAnalysisService { - public static final String THUMBNAILS_PATTERN = "thumbnails"; - public static final String ADHOC_MOBILE_DB_BACKUP_PATTERN = "Adhoc|MobileDbBackup"; - public static final String UUID_V4_PATTERN = "[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}"; + private final MediaAnalysisTableRegenerateAction mediaAnalysisTableRegenerateAction; private final OrganisationRepository organisationRepository; private final OrganisationFactory organisationFactory; - private final SchemaMigrationService schemaMigrationService; - private final SyncService syncService; private final EtlServiceConfig etlServiceConfig; - private final AmazonClientService amazonClientService; private static final Logger log = Logger.getLogger(MediaAnalysisService.class); @Autowired - public MediaAnalysisService(OrganisationRepository organisationRepository, OrganisationFactory organisationFactory, SchemaMigrationService schemaMigrationService, SyncService syncService, EtlServiceConfig etlServiceConfig, AmazonClientService amazonClientService) { + public MediaAnalysisService(MediaAnalysisTableRegenerateAction mediaAnalysisTableRegenerateAction, OrganisationRepository organisationRepository, OrganisationFactory organisationFactory, + EtlServiceConfig etlServiceConfig) { + this.mediaAnalysisTableRegenerateAction = mediaAnalysisTableRegenerateAction; this.organisationRepository = organisationRepository; this.organisationFactory = organisationFactory; - this.schemaMigrationService = schemaMigrationService; - this.syncService = syncService; this.etlServiceConfig = etlServiceConfig; - this.amazonClientService = amazonClientService; } public void runFor(String organisationUUID) { @@ -57,42 +49,12 @@ public void runFor(OrganisationIdentity organisationIdentity) { log.info(String.format("Running Media Analysis for %s", organisationIdentity.toString())); OrgIdentityContextHolder.setContext(organisationIdentity, etlServiceConfig); Organisation organisation = organisationFactory.create(organisationIdentity); - - List listOfAllMediaUrls = fetchValidMediaUrlsFromStorage(organisation); - Map> partitionResults = partitionListBasedOnThumbnailsPattern(listOfAllMediaUrls); - List listOfAllThumbnailsUrls = partitionResults.get(Boolean.TRUE); - List listOfAllMediaUrlsExcludingThumbnails = partitionResults.get(Boolean.FALSE); - log.info(String.format("listOfAllMediaUrls %d listOfAllMediaUrlsExcludingThumbnails %d listOfAllThumbnailsUrls %d", listOfAllMediaUrls.size(), listOfAllMediaUrlsExcludingThumbnails.size(), listOfAllThumbnailsUrls.size())); - - //TODO Log entries that get filtered out for dev purposes - // TODO: 17/07/24 Fetch list of MediaUrls from media table - // SELECT REPLACE(image_url, 'https://s3.ap-south-1.amazonaws.com/prod-user-media/goonj/', '') as image_url_in_media_table - // FROM goonj.media - // ORDER BY REPLACE(image_url, 'https://s3.ap-south-1.amazonaws.com/prod-user-media/goonj/', ''); - // TODO: 17/07/24 Invoke Analysis method to perform various metrics computations for each entry in media table of the org - //TODO Fix test issues causing build break + Optional mediaAnalysisTableMetadata = organisation.getSchemaMetadata().getMediaAnalysisTable(); + if(!mediaAnalysisTableMetadata.isPresent()) { + log.error(String.format("Sync job hasn't yet run for schema %s with dbUser %s and schemaUser %s", organisationIdentity.getSchemaName(), organisationIdentity.getDbUser(), organisationIdentity.getSchemaUser())); + return; + } + mediaAnalysisTableRegenerateAction.process(organisation,mediaAnalysisTableMetadata.get()); log.info(String.format("Completed Media Analysis for schema %s with dbUser %s and schemaUser %s", organisationIdentity.getSchemaName(), organisationIdentity.getDbUser(), organisationIdentity.getSchemaUser())); } - - private List fetchValidMediaUrlsFromStorage(Organisation organisation) { - List listOfAllMediaUrls = amazonClientService.listObjectsInBucket(getMediaDirectory(organisation)); - filterOutNonMediaUrls(listOfAllMediaUrls); - return listOfAllMediaUrls; - } - - private void filterOutNonMediaUrls(List listOfAllMediaUrls) { - Predicate fastSyncAndAdhocDumpPatternPredicate = Pattern.compile(ADHOC_MOBILE_DB_BACKUP_PATTERN, Pattern.CASE_INSENSITIVE).asPredicate(); - Predicate notUUIDPatternPredicate = Pattern.compile(UUID_V4_PATTERN).asPredicate().negate(); - listOfAllMediaUrls.removeIf(fastSyncAndAdhocDumpPatternPredicate.or(notUUIDPatternPredicate)); - } - - private Map> partitionListBasedOnThumbnailsPattern(List listOfAllMediaUrls) { - Predicate thumbnailsPatternPredicate = Pattern.compile(THUMBNAILS_PATTERN, Pattern.CASE_INSENSITIVE).asPredicate(); - Map> partitionResults= listOfAllMediaUrls.stream().collect(Collectors.partitioningBy(thumbnailsPatternPredicate)); - return partitionResults; - } - - private String getMediaDirectory(Organisation organisation) { - return organisation.getOrganisationIdentity().getMediaDirectory(); - } -} +} \ No newline at end of file diff --git a/src/main/java/org/avniproject/etl/service/SyncService.java b/src/main/java/org/avniproject/etl/service/SyncService.java index 4dcb1b5..5353721 100644 --- a/src/main/java/org/avniproject/etl/service/SyncService.java +++ b/src/main/java/org/avniproject/etl/service/SyncService.java @@ -36,7 +36,8 @@ public SyncService(EntityRepository entityRepository, EntitySyncStatusRepository @Transactional(propagation = Propagation.REQUIRES_NEW) public void sync(Organisation organisation) { SchemaMetadata currentSchemaMetadata = organisation.getSchemaMetadata(); - currentSchemaMetadata.getOrderedTableMetadata().forEach(tableMetadata -> migrateTable(tableMetadata, organisation.getSyncStatus(), currentSchemaMetadata)); + currentSchemaMetadata.getOrderedTableMetadata().stream().filter(TableMetadata::isPartOfRegularSync) + .forEach(tableMetadata -> migrateTable(tableMetadata, organisation.getSyncStatus(), currentSchemaMetadata)); } @Transactional diff --git a/src/main/resources/sql/api/searchMedia.sql.st b/src/main/resources/sql/api/searchMedia.sql.st index 99c56e4..0af9af9 100644 --- a/src/main/resources/sql/api/searchMedia.sql.st +++ b/src/main/resources/sql/api/searchMedia.sql.st @@ -41,6 +41,7 @@ FROM .media media where media.image_url is not null and media.is_voided is false + and media.created_date_time >= :fromDate and media.created_date_time \<= :toDate and ( and ( address." id" in (:addressLevelIds_)1 = 2}; separator="\n OR "> ) + ORDER BY media.created_date_time desc LIMIT :limit OFFSET :offset; diff --git a/src/main/resources/sql/etl/mediaAnalysis.sql.st b/src/main/resources/sql/etl/mediaAnalysis.sql.st new file mode 100644 index 0000000..40193cb --- /dev/null +++ b/src/main/resources/sql/etl/mediaAnalysis.sql.st @@ -0,0 +1,3 @@ +insert into . (uuid, image_url, + is_valid_url, is_present_in_storage, is_thumbnail_generated) +VALUES (?, ?, ?, ?, ?); \ No newline at end of file diff --git a/src/test/java/org/avniproject/etl/repository/SchemaMetadataRepositoryTest.java b/src/test/java/org/avniproject/etl/repository/SchemaMetadataRepositoryTest.java index d62dc3c..ef55f16 100644 --- a/src/test/java/org/avniproject/etl/repository/SchemaMetadataRepositoryTest.java +++ b/src/test/java/org/avniproject/etl/repository/SchemaMetadataRepositoryTest.java @@ -79,6 +79,17 @@ public void shouldGetMediaTable() { assertThat(mediaTable.isPresent(), is(true)); } + @Test + @Sql({"/test-data-teardown.sql", "/test-data.sql"}) + @Sql(scripts = {"/test-data-teardown.sql"}, executionPhase = Sql.ExecutionPhase.AFTER_TEST_METHOD) + public void shouldGetMediaAnalysisTable() { + SchemaMetadata schemaMetadata = schemaMetadataRepository.getExistingSchemaMetadata(); + Optional mediaAnalysis = schemaMetadata.getTableMetadata().stream().filter(tableMetadata1 -> tableMetadata1.getName().equals("media_analysis")).findFirst(); + + assertThat(mediaAnalysis.isPresent(), is(true)); + } + + @Test @Sql({"/test-data-teardown.sql", "/test-data.sql"}) @Sql(scripts = {"/test-data-teardown.sql"}, executionPhase = Sql.ExecutionPhase.AFTER_TEST_METHOD)