Skip to content

Commit

Permalink
#102 | Refactor MediaAnalysis flow
Browse files Browse the repository at this point in the history
  • Loading branch information
himeshr committed Jul 22, 2024
1 parent a7bc035 commit 2e01c57
Show file tree
Hide file tree
Showing 15 changed files with 325 additions and 68 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,14 @@ public List<String> getAllEncounterTableNames() {
return encounterTableNames;
}

public Optional<TableMetadata> getMediaTable() {
return tableMetadata.stream().filter(TableMetadata::isMediaTable).findFirst();
}

public Optional<TableMetadata> getMediaAnalysisTable() {
return tableMetadata.stream().filter(TableMetadata::isMediaAnalysisTable).findFirst();
}

private List<Diff> findChanges(SchemaMetadata currentSchema, TableMetadata newTable) {
List<Diff> diffs = new ArrayList<>();
Optional<TableMetadata> optionalMatchingTable = currentSchema.findMatchingTable(newTable);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,7 @@ public enum Type {
IndividualEncounterCancellation,
Address,
Media,
MediaAnalysis,
ManualProgramEnrolmentEligibility,
GroupToMember,
HouseholdToMember,
Expand All @@ -273,6 +274,18 @@ public boolean isSubjectTable() {
return Arrays.asList(Type.Individual, Type.Person, Type.Household, Type.Group).contains(this.type);
}

public boolean isMediaTable() {
return (Type.Media).equals(this.type);
}

public boolean isMediaAnalysisTable() {
return (Type.MediaAnalysis).equals(this.type);
}

public boolean isPartOfRegularSync() {
return !isMediaAnalysisTable();
}

private void addIndexMetadata(IndexMetadata indexMetadata) {
this.indexMetadataList.add(indexMetadata);
}
Expand Down
53 changes: 53 additions & 0 deletions src/main/java/org/avniproject/etl/dto/MediaAnalysisVO.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
package org.avniproject.etl.dto;

import java.util.Objects;

public class MediaAnalysisVO {

String uuid;
String image_url;
boolean isValidUrl;
boolean isPresentInStorage;
boolean isThumbnailGenerated;

public MediaAnalysisVO(String uuid, String image_url, boolean isValidUrl, boolean isPresentInStorage, boolean isThumbnailGenerated) {
this.uuid = uuid;
this.image_url = image_url;
this.isValidUrl = isValidUrl;
this.isPresentInStorage = isPresentInStorage;
this.isThumbnailGenerated = isThumbnailGenerated;
}

public String getUuid() {
return uuid;
}

public String getImage_url() {
return image_url;
}

public boolean isValidUrl() {
return isValidUrl;
}

public boolean isPresentInStorage() {
return isPresentInStorage;
}

public boolean isThumbnailGenerated() {
return isThumbnailGenerated;
}

@Override
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof MediaAnalysisVO)) return false;
MediaAnalysisVO that = (MediaAnalysisVO) o;
return uuid.equals(that.uuid);
}

@Override
public int hashCode() {
return Objects.hash(uuid);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -84,9 +84,13 @@ private <T> List<T> searchInternal(MediaSearchRequest mediaSearchRequest, Page p
.query(query.sql(), query.parameters(), rowMapper), jdbcTemplate);
}



public List<ImageData> getImageData(MediaSearchRequest mediaSearchRequest, Page page) {
return searchInternal(mediaSearchRequest, page, (rs, rowNum) -> mediaTableRepositoryService.setImageData(rs));
}

public List<MediaDTO> getAllMedia() {
Query query = new MediaSearchQueryBuilder().allWithoutAnyLimitOrOffset().build();
return runInSchemaUserContext(() -> new NamedParameterJdbcTemplate(jdbcTemplate)
.query(query.sql(), query.parameters(), (rs, rowNum) -> mediaTableRepositoryService.setMediaDto(rs, false)), jdbcTemplate);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,7 @@
import org.avniproject.etl.domain.metadata.SchemaMetadata;
import org.avniproject.etl.domain.metadata.TableMetadata;
import org.avniproject.etl.domain.metadata.diff.Diff;
import org.avniproject.etl.repository.rowMappers.ColumnMetadataMapper;
import org.avniproject.etl.repository.rowMappers.MediaTableMetadataBuilder;
import org.avniproject.etl.repository.rowMappers.SyncTelemetryTableMetadataBuilder;
import org.avniproject.etl.repository.rowMappers.UserTableMetadataBuilder;
import org.avniproject.etl.repository.rowMappers.TableMetadataMapper;
import org.avniproject.etl.repository.rowMappers.*;
import org.avniproject.etl.repository.rowMappers.tableMappers.AddressTable;
import org.avniproject.etl.repository.rowMappers.tableMappers.ChecklistTable;
import org.springframework.beans.factory.annotation.Autowired;
Expand Down Expand Up @@ -45,6 +41,7 @@ public SchemaMetadata getNewSchemaMetadata() {
List<TableMetadata> tables = new ArrayList<>(getFormTables());
tables.add(getAddressTable());
tables.add(MediaTableMetadataBuilder.build());
tables.add(MediaAnalysisTableMetadataBuilder.build());
tables.add(SyncTelemetryTableMetadataBuilder.build());
tables.add(UserTableMetadataBuilder.build());
tables.addAll(getGroupSubjectTables());
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
package org.avniproject.etl.repository.rowMappers;

import org.avniproject.etl.domain.metadata.ColumnMetadata;
import org.avniproject.etl.domain.metadata.TableMetadata;
import org.avniproject.etl.repository.rowMappers.tableMappers.MediaAnalysisTable;

import java.util.stream.Collectors;

public class MediaAnalysisTableMetadataBuilder {
public static TableMetadata build() {
TableMetadata mediaAnalysisTableMetadata = new TableMetadata();
MediaAnalysisTable mediaAnalysisTable = new MediaAnalysisTable();
mediaAnalysisTableMetadata.setName(mediaAnalysisTable.name(null));
mediaAnalysisTableMetadata.setType(TableMetadata.Type.MediaAnalysis);
mediaAnalysisTableMetadata.addColumnMetadata(mediaAnalysisTable.columns().stream().map(column -> new ColumnMetadata(column, null, null, null)).collect(Collectors.toList()));

return mediaAnalysisTableMetadata;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
package org.avniproject.etl.repository.rowMappers.tableMappers;

import org.avniproject.etl.domain.metadata.Column;

import java.util.Arrays;
import java.util.List;
import java.util.Map;

public class MediaAnalysisTable extends Table {
@Override
public String name(Map<String, Object> tableDetails) {
return "media_analysis";
}

@Override
public List<Column> columns() {
return new Columns()
.withColumns(Arrays.asList(
new Column("uuid", Column.Type.text, Column.ColumnType.index),
new Column("image_url", Column.Type.text),
new Column("is_valid_url", Column.Type.bool),
new Column("is_present_in_storage", Column.Type.bool),
new Column("is_thumbnail_generated", Column.Type.bool)
))
.build();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,22 +24,28 @@ public MediaTableRepositoryService(AmazonClientService amazonClientService) {
}

public MediaDTO setMediaDto(ResultSet rs) {
return this.setMediaDto(rs, true);
}

public MediaDTO setMediaDto(ResultSet rs, boolean generateSignedUrls) {
try {
String imageUrl = rs.getString("image_url");
String thumbnailUrl = Utils.getThumbnailUrl(imageUrl);

URL signedImageUrl = null, signedThumbnailUrl = null;

try {
signedImageUrl = amazonClientService.generateMediaDownloadUrl(imageUrl);
if(generateSignedUrls) {
try {
signedThumbnailUrl = amazonClientService.generateMediaDownloadUrl(thumbnailUrl);
} catch (S3FileDoesNotExist ignored) {
signedImageUrl = amazonClientService.generateMediaDownloadUrl(imageUrl);
try {
signedThumbnailUrl = amazonClientService.generateMediaDownloadUrl(thumbnailUrl);
} catch (S3FileDoesNotExist ignored) {
}
} catch (IllegalArgumentException illegalArgumentException) {
//Ignore and move on. Image will be null
} catch (S3FileDoesNotExist e) {
throw new RuntimeException(e);
}
} catch (IllegalArgumentException illegalArgumentException) {
//Ignore and move on. Image will be null
} catch (S3FileDoesNotExist e) {
throw new RuntimeException(e);
}

String uuid = rs.getString("uuid");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,14 @@ public MediaSearchQueryBuilder withPage(Page page) {
return this;
}

public MediaSearchQueryBuilder allWithoutAnyLimitOrOffset() {
template.add("joinTablesAndColumns", null);
template.add("request", null);
parameters.put("offset", 0);
parameters.put("limit", Long.MAX_VALUE);
return this;
}

public Query build() {
String str = template.render();
logger.debug(str);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
package org.avniproject.etl.repository.sync;

import org.apache.log4j.Logger;
import org.avniproject.etl.config.AmazonClientService;
import org.avniproject.etl.domain.NullObject;
import org.avniproject.etl.domain.OrgIdentityContextHolder;
import org.avniproject.etl.domain.Organisation;
import org.avniproject.etl.domain.metadata.TableMetadata;
import org.avniproject.etl.dto.MediaAnalysisVO;
import org.avniproject.etl.dto.MediaDTO;
import org.avniproject.etl.repository.MediaTableRepository;
import org.avniproject.etl.repository.sql.SqlFile;
import org.avniproject.etl.service.MediaAnalysisService;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.jdbc.core.BatchPreparedStatementSetter;
import org.springframework.jdbc.core.JdbcTemplate;
import org.springframework.jdbc.core.ParameterizedPreparedStatementSetter;
import org.springframework.stereotype.Repository;
import org.stringtemplate.v4.ST;

import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.util.List;
import java.util.Map;
import java.util.function.Predicate;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

import static org.avniproject.etl.repository.JdbcContextWrapper.runInOrgContext;

@Repository
public class MediaAnalysisTableRegenerateAction {
public static final String THUMBNAILS_PATTERN = "thumbnails";
public static final String ADHOC_MOBILE_DB_BACKUP_PATTERN = "Adhoc|MobileDbBackup";
public static final String UUID_V4_PATTERN = "[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}";

private final AmazonClientService amazonClientService;
private final MediaTableRepository mediaTableRepository;
private final JdbcTemplate jdbcTemplate;
private static final String generateMediaAnalysisTableTemplate = SqlFile.readSqlFile("mediaAnalysis.sql.st");

private static final Logger log = Logger.getLogger(MediaAnalysisService.class);

@Autowired
public MediaAnalysisTableRegenerateAction(AmazonClientService amazonClientService, MediaTableRepository mediaTableRepository, JdbcTemplate jdbcTemplate) {
this.amazonClientService = amazonClientService;
this.mediaTableRepository = mediaTableRepository;
this.jdbcTemplate = jdbcTemplate;
}

public void process(Organisation organisation, TableMetadata tableMetadata) {

List<String> listOfAllMediaUrls = fetchValidMediaUrlsFromStorage(organisation);
Map<Boolean, List<String>> partitionResults = partitionListBasedOnThumbnailsPattern(listOfAllMediaUrls);
List<String> listOfAllThumbnailsUrls = partitionResults.get(Boolean.TRUE);
List<String> listOfAllMediaUrlsExcludingThumbnails = partitionResults.get(Boolean.FALSE);
log.info(String.format("listOfAllMediaUrls %d listOfAllMediaUrlsExcludingThumbnails %d listOfAllThumbnailsUrls %d", listOfAllMediaUrls.size(), listOfAllMediaUrlsExcludingThumbnails.size(), listOfAllThumbnailsUrls.size()));

//TODO Log entries that get filtered out for dev purposes
// TODO: 17/07/24 Fetch list of MediaUrls from media table
// SELECT REPLACE(image_url, 'https://s3.ap-south-1.amazonaws.com/prod-user-media/goonj/', '') as image_url_in_media_table
// FROM goonj.media
// ORDER BY REPLACE(image_url, 'https://s3.ap-south-1.amazonaws.com/prod-user-media/goonj/', '');
// TODO: 17/07/24 Invoke Analysis method to perform various metrics computations for each entry in media table of the org
//TODO Fix test issues causing build break
List<MediaDTO> listOfMediaEntities = mediaTableRepository.getAllMedia();
String orgMediaDirectory = organisation.getOrganisationIdentity().getMediaDirectory();
// TODO: 22/07/24 do
List<MediaAnalysisVO> mediaAnalysisVOS = listOfMediaEntities.stream().map(mediaDTO -> {
boolean isValidUrl = mediaDTO.url().contains(orgMediaDirectory);
String urlToSearch = mediaDTO.url().substring(mediaDTO.url().indexOf(orgMediaDirectory));
boolean isPresentInStorage = listOfAllMediaUrlsExcludingThumbnails.contains(urlToSearch);
// TODO: 22/07/24 init booleans correctly
return new MediaAnalysisVO(mediaDTO.uuid(), mediaDTO.url(), isValidUrl, isPresentInStorage, false);
}).collect(Collectors.toList());
log.info(String.format("listOfMediaEntities %d mediaAnalysisVOS %d ", listOfMediaEntities.size(), mediaAnalysisVOS.size()));

truncateMediaAnalysisTable(tableMetadata);
generateMediaAnalysisTableEntries(tableMetadata, mediaAnalysisVOS);
}

private void truncateMediaAnalysisTable(TableMetadata tableMetadata) {
String schema = OrgIdentityContextHolder.getDbSchema();
String mediaAnalysisTable = tableMetadata.getName();
String sql = new ST("delete from <schemaName>.<mediaAnalysisTable> where uuid is not null;")
.add("schemaName", wrapInQuotes(schema))
.add("mediaAnalysisTable", wrapInQuotes(mediaAnalysisTable))
.render();
runInOrgContext(() -> {
jdbcTemplate.execute(sql);
return NullObject.instance();
}, jdbcTemplate);
}

private List<String> fetchValidMediaUrlsFromStorage(Organisation organisation) {
List<String> listOfAllMediaUrls = amazonClientService.listObjectsInBucket(getMediaDirectory(organisation));
filterOutNonMediaUrls(listOfAllMediaUrls);
return listOfAllMediaUrls;
}

private void filterOutNonMediaUrls(List<String> listOfAllMediaUrls) {
Predicate<String> fastSyncAndAdhocDumpPatternPredicate = Pattern.compile(ADHOC_MOBILE_DB_BACKUP_PATTERN, Pattern.CASE_INSENSITIVE).asPredicate();
Predicate<String> notUUIDPatternPredicate = Pattern.compile(UUID_V4_PATTERN).asPredicate().negate();
listOfAllMediaUrls.removeIf(fastSyncAndAdhocDumpPatternPredicate.or(notUUIDPatternPredicate));
}

private Map<Boolean, List<String>> partitionListBasedOnThumbnailsPattern(List<String> listOfAllMediaUrls) {
Predicate<String> thumbnailsPatternPredicate = Pattern.compile(THUMBNAILS_PATTERN, Pattern.CASE_INSENSITIVE).asPredicate();
Map<Boolean, List<String>> partitionResults= listOfAllMediaUrls.stream().collect(Collectors.partitioningBy(thumbnailsPatternPredicate));
return partitionResults;
}

private String getMediaDirectory(Organisation organisation) {
return organisation.getOrganisationIdentity().getMediaDirectory();
}

private void generateMediaAnalysisTableEntries(TableMetadata tableMetadata, List<MediaAnalysisVO> mediaAnalysisVOS) {
String schema = OrgIdentityContextHolder.getDbSchema();
String mediaAnalysisTable = tableMetadata.getName();
String sql = new ST(generateMediaAnalysisTableTemplate)
.add("schemaName", wrapInQuotes(schema))
.add("mediaAnalysisTable", wrapInQuotes(mediaAnalysisTable))
.render();
runInOrgContext(() -> {
jdbcTemplate.batchUpdate(sql,
mediaAnalysisVOS,
100,
(ps, mediaAnalysisVO) -> {
ps.setString(1, mediaAnalysisVO.getUuid());
ps.setString(2, mediaAnalysisVO.getImage_url());
ps.setBoolean(3, mediaAnalysisVO.isValidUrl());
ps.setBoolean(4, mediaAnalysisVO.isPresentInStorage());
ps.setBoolean(5, mediaAnalysisVO.isThumbnailGenerated());
});
return NullObject.instance();
}, jdbcTemplate);
}

private String wrapInQuotes(String parameter) {
return parameter == null ? "null" : "\"" + parameter + "\"";
}

}
Loading

0 comments on commit 2e01c57

Please sign in to comment.