Skip to content

Commit

Permalink
#102 | Implement logic to perform analysis of media entities
Browse files Browse the repository at this point in the history
  • Loading branch information
himeshr committed Jul 24, 2024
1 parent 2e01c57 commit 7c6cf5e
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 30 deletions.
12 changes: 9 additions & 3 deletions src/main/java/org/avniproject/etl/dto/MediaAnalysisVO.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,15 @@ public class MediaAnalysisVO {
boolean isValidUrl;
boolean isPresentInStorage;
boolean isThumbnailGenerated;
boolean isHavingDuplicates;

public MediaAnalysisVO(String uuid, String image_url, boolean isValidUrl, boolean isPresentInStorage, boolean isThumbnailGenerated) {
public MediaAnalysisVO(String uuid, String image_url, boolean isValidUrl, boolean isPresentInStorage, boolean isThumbnailGenerated, boolean isHavingDuplicates) {
this.uuid = uuid;
this.image_url = image_url;
this.isValidUrl = isValidUrl;
this.isPresentInStorage = isPresentInStorage;
this.isThumbnailGenerated = isThumbnailGenerated;
this.isHavingDuplicates = isHavingDuplicates;
}

public String getUuid() {
Expand All @@ -38,16 +40,20 @@ public boolean isThumbnailGenerated() {
return isThumbnailGenerated;
}

public boolean isHavingDuplicates() {
return isHavingDuplicates;
}

@Override
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof MediaAnalysisVO)) return false;
MediaAnalysisVO that = (MediaAnalysisVO) o;
return uuid.equals(that.uuid);
return getUuid().equals(that.getUuid()) && getImage_url().equals(that.getImage_url());
}

@Override
public int hashCode() {
return Objects.hash(uuid);
return Objects.hash(getUuid(), getImage_url());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ public List<Column> columns() {
new Column("image_url", Column.Type.text),
new Column("is_valid_url", Column.Type.bool),
new Column("is_present_in_storage", Column.Type.bool),
new Column("is_thumbnail_generated", Column.Type.bool)
new Column("is_thumbnail_generated", Column.Type.bool),
new Column("is_having_duplicates", Column.Type.bool)
))
.build();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,13 @@
import org.avniproject.etl.repository.sql.SqlFile;
import org.avniproject.etl.service.MediaAnalysisService;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.jdbc.core.BatchPreparedStatementSetter;
import org.springframework.jdbc.core.JdbcTemplate;
import org.springframework.jdbc.core.ParameterizedPreparedStatementSetter;
import org.springframework.stereotype.Repository;
import org.stringtemplate.v4.ST;

import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.util.List;
import java.util.Map;
import java.util.function.Function;
import java.util.function.Predicate;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
Expand All @@ -33,6 +30,13 @@ public class MediaAnalysisTableRegenerateAction {
public static final String THUMBNAILS_PATTERN = "thumbnails";
public static final String ADHOC_MOBILE_DB_BACKUP_PATTERN = "Adhoc|MobileDbBackup";
public static final String UUID_V4_PATTERN = "[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}";
public static final String STRING_CONST_SEPARATOR = "/";
public static final String TRUNCATE_MEDIA_ANALYSIS_TABLE_SQL = "delete from <schemaName>.<mediaAnalysisTable> where uuid is not null;";
public static final String SCHEMA_NAME = "schemaName";
public static final String MEDIA_ANALYSIS_TABLE = "mediaAnalysisTable";
public static final int INT_CONSTANT_ZERO = 0;
public static final String COMPOSITE_UUID_SEPARATOR = "#";
public static final int INT_CONSTANT_ONE = 1;

private final AmazonClientService amazonClientService;
private final MediaTableRepository mediaTableRepository;
Expand All @@ -56,24 +60,27 @@ public void process(Organisation organisation, TableMetadata tableMetadata) {
List<String> listOfAllMediaUrlsExcludingThumbnails = partitionResults.get(Boolean.FALSE);
log.info(String.format("listOfAllMediaUrls %d listOfAllMediaUrlsExcludingThumbnails %d listOfAllThumbnailsUrls %d", listOfAllMediaUrls.size(), listOfAllMediaUrlsExcludingThumbnails.size(), listOfAllThumbnailsUrls.size()));

//TODO Log entries that get filtered out for dev purposes
// TODO: 17/07/24 Fetch list of MediaUrls from media table
// SELECT REPLACE(image_url, 'https://s3.ap-south-1.amazonaws.com/prod-user-media/goonj/', '') as image_url_in_media_table
// FROM goonj.media
// ORDER BY REPLACE(image_url, 'https://s3.ap-south-1.amazonaws.com/prod-user-media/goonj/', '');
// TODO: 17/07/24 Invoke Analysis method to perform various metrics computations for each entry in media table of the org
//TODO Fix test issues causing build break
List<MediaDTO> listOfMediaEntities = mediaTableRepository.getAllMedia();
String orgMediaDirectory = organisation.getOrganisationIdentity().getMediaDirectory();
// TODO: 22/07/24 do
List<MediaAnalysisVO> mediaAnalysisVOS = listOfMediaEntities.stream().map(mediaDTO -> {
List<MediaDTO> listOfMediaDTOEntities = mediaTableRepository.getAllMedia();
Map<String, String> mediaUrlsMap = listOfAllMediaUrlsExcludingThumbnails.stream().collect(Collectors.toMap(mediaUrl -> mediaUrl.substring(mediaUrl.lastIndexOf(STRING_CONST_SEPARATOR)), Function.identity()));
Map<String, String> thumbnailUrlsMap = listOfAllThumbnailsUrls.stream().collect(Collectors.toMap(thumbnailUrl -> thumbnailUrl.substring(thumbnailUrl.lastIndexOf(STRING_CONST_SEPARATOR)), Function.identity()));

Map<String, List<MediaDTO>> groupedMediaEntityMap = listOfMediaDTOEntities.stream()
.collect(Collectors.groupingBy(mediaDTO -> mediaDTO.uuid())); //mediaDTO.uuid() returns a composite uuid of entity.uuid#media.uuid
List<MediaAnalysisVO> mediaAnalysisVOS = groupedMediaEntityMap.entrySet().stream().map(groupedMediaEntityMapEntry -> {
MediaDTO mediaDTO = groupedMediaEntityMapEntry.getValue().get(INT_CONSTANT_ZERO);
boolean isPresentInStorage = false, isThumbnailGenerated = false;
boolean isValidUrl = mediaDTO.url().contains(orgMediaDirectory);
String urlToSearch = mediaDTO.url().substring(mediaDTO.url().indexOf(orgMediaDirectory));
boolean isPresentInStorage = listOfAllMediaUrlsExcludingThumbnails.contains(urlToSearch);
// TODO: 22/07/24 init booleans correctly
return new MediaAnalysisVO(mediaDTO.uuid(), mediaDTO.url(), isValidUrl, isPresentInStorage, false);
if (isValidUrl) {
String urlToSearch = mediaDTO.url().substring(mediaDTO.url().lastIndexOf(STRING_CONST_SEPARATOR));
isPresentInStorage = mediaUrlsMap.containsKey(urlToSearch);
isThumbnailGenerated = thumbnailUrlsMap.containsKey(urlToSearch);
}
return new MediaAnalysisVO(mediaDTO.uuid().substring(INT_CONSTANT_ZERO,mediaDTO.uuid().indexOf(COMPOSITE_UUID_SEPARATOR)),
mediaDTO.url(), isValidUrl, isPresentInStorage, isThumbnailGenerated,
groupedMediaEntityMapEntry.getValue().size() > INT_CONSTANT_ONE);
}).collect(Collectors.toList());
log.info(String.format("listOfMediaEntities %d mediaAnalysisVOS %d ", listOfMediaEntities.size(), mediaAnalysisVOS.size()));
log.info(String.format("listOfMediaDTOEntities %d mediaAnalysisVOS %d duplicates %d", listOfMediaDTOEntities.size(), mediaAnalysisVOS.size(), listOfMediaDTOEntities.size() - mediaAnalysisVOS.size()));

truncateMediaAnalysisTable(tableMetadata);
generateMediaAnalysisTableEntries(tableMetadata, mediaAnalysisVOS);
Expand All @@ -82,9 +89,9 @@ public void process(Organisation organisation, TableMetadata tableMetadata) {
private void truncateMediaAnalysisTable(TableMetadata tableMetadata) {
String schema = OrgIdentityContextHolder.getDbSchema();
String mediaAnalysisTable = tableMetadata.getName();
String sql = new ST("delete from <schemaName>.<mediaAnalysisTable> where uuid is not null;")
.add("schemaName", wrapInQuotes(schema))
.add("mediaAnalysisTable", wrapInQuotes(mediaAnalysisTable))
String sql = new ST(TRUNCATE_MEDIA_ANALYSIS_TABLE_SQL)
.add(SCHEMA_NAME, wrapInQuotes(schema))
.add(MEDIA_ANALYSIS_TABLE, wrapInQuotes(mediaAnalysisTable))
.render();
runInOrgContext(() -> {
jdbcTemplate.execute(sql);
Expand Down Expand Up @@ -118,8 +125,8 @@ private void generateMediaAnalysisTableEntries(TableMetadata tableMetadata, List
String schema = OrgIdentityContextHolder.getDbSchema();
String mediaAnalysisTable = tableMetadata.getName();
String sql = new ST(generateMediaAnalysisTableTemplate)
.add("schemaName", wrapInQuotes(schema))
.add("mediaAnalysisTable", wrapInQuotes(mediaAnalysisTable))
.add(SCHEMA_NAME, wrapInQuotes(schema))
.add(MEDIA_ANALYSIS_TABLE, wrapInQuotes(mediaAnalysisTable))
.render();
runInOrgContext(() -> {
jdbcTemplate.batchUpdate(sql,
Expand All @@ -131,6 +138,7 @@ private void generateMediaAnalysisTableEntries(TableMetadata tableMetadata, List
ps.setBoolean(3, mediaAnalysisVO.isValidUrl());
ps.setBoolean(4, mediaAnalysisVO.isPresentInStorage());
ps.setBoolean(5, mediaAnalysisVO.isThumbnailGenerated());
ps.setBoolean(6, mediaAnalysisVO.isHavingDuplicates());
});
return NullObject.instance();
}, jdbcTemplate);
Expand Down
4 changes: 2 additions & 2 deletions src/main/resources/sql/etl/mediaAnalysis.sql.st
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
insert into <schemaName>.<mediaAnalysisTable> (uuid, image_url,
is_valid_url, is_present_in_storage, is_thumbnail_generated)
VALUES (?, ?, ?, ?, ?);
is_valid_url, is_present_in_storage, is_thumbnail_generated, is_having_duplicates)
VALUES (?, ?, ?, ?, ?, ?);

0 comments on commit 7c6cf5e

Please sign in to comment.