-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
15 changed files
with
325 additions
and
68 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
53 changes: 53 additions & 0 deletions
53
src/main/java/org/avniproject/etl/dto/MediaAnalysisVO.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
package org.avniproject.etl.dto; | ||
|
||
import java.util.Objects; | ||
|
||
public class MediaAnalysisVO { | ||
|
||
String uuid; | ||
String image_url; | ||
boolean isValidUrl; | ||
boolean isPresentInStorage; | ||
boolean isThumbnailGenerated; | ||
|
||
public MediaAnalysisVO(String uuid, String image_url, boolean isValidUrl, boolean isPresentInStorage, boolean isThumbnailGenerated) { | ||
this.uuid = uuid; | ||
this.image_url = image_url; | ||
this.isValidUrl = isValidUrl; | ||
this.isPresentInStorage = isPresentInStorage; | ||
this.isThumbnailGenerated = isThumbnailGenerated; | ||
} | ||
|
||
public String getUuid() { | ||
return uuid; | ||
} | ||
|
||
public String getImage_url() { | ||
return image_url; | ||
} | ||
|
||
public boolean isValidUrl() { | ||
return isValidUrl; | ||
} | ||
|
||
public boolean isPresentInStorage() { | ||
return isPresentInStorage; | ||
} | ||
|
||
public boolean isThumbnailGenerated() { | ||
return isThumbnailGenerated; | ||
} | ||
|
||
@Override | ||
public boolean equals(Object o) { | ||
if (this == o) return true; | ||
if (!(o instanceof MediaAnalysisVO)) return false; | ||
MediaAnalysisVO that = (MediaAnalysisVO) o; | ||
return uuid.equals(that.uuid); | ||
} | ||
|
||
@Override | ||
public int hashCode() { | ||
return Objects.hash(uuid); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
19 changes: 19 additions & 0 deletions
19
...ain/java/org/avniproject/etl/repository/rowMappers/MediaAnalysisTableMetadataBuilder.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
package org.avniproject.etl.repository.rowMappers; | ||
|
||
import org.avniproject.etl.domain.metadata.ColumnMetadata; | ||
import org.avniproject.etl.domain.metadata.TableMetadata; | ||
import org.avniproject.etl.repository.rowMappers.tableMappers.MediaAnalysisTable; | ||
|
||
import java.util.stream.Collectors; | ||
|
||
public class MediaAnalysisTableMetadataBuilder { | ||
public static TableMetadata build() { | ||
TableMetadata mediaAnalysisTableMetadata = new TableMetadata(); | ||
MediaAnalysisTable mediaAnalysisTable = new MediaAnalysisTable(); | ||
mediaAnalysisTableMetadata.setName(mediaAnalysisTable.name(null)); | ||
mediaAnalysisTableMetadata.setType(TableMetadata.Type.MediaAnalysis); | ||
mediaAnalysisTableMetadata.addColumnMetadata(mediaAnalysisTable.columns().stream().map(column -> new ColumnMetadata(column, null, null, null)).collect(Collectors.toList())); | ||
|
||
return mediaAnalysisTableMetadata; | ||
} | ||
} |
27 changes: 27 additions & 0 deletions
27
src/main/java/org/avniproject/etl/repository/rowMappers/tableMappers/MediaAnalysisTable.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
package org.avniproject.etl.repository.rowMappers.tableMappers; | ||
|
||
import org.avniproject.etl.domain.metadata.Column; | ||
|
||
import java.util.Arrays; | ||
import java.util.List; | ||
import java.util.Map; | ||
|
||
public class MediaAnalysisTable extends Table { | ||
@Override | ||
public String name(Map<String, Object> tableDetails) { | ||
return "media_analysis"; | ||
} | ||
|
||
@Override | ||
public List<Column> columns() { | ||
return new Columns() | ||
.withColumns(Arrays.asList( | ||
new Column("uuid", Column.Type.text, Column.ColumnType.index), | ||
new Column("image_url", Column.Type.text), | ||
new Column("is_valid_url", Column.Type.bool), | ||
new Column("is_present_in_storage", Column.Type.bool), | ||
new Column("is_thumbnail_generated", Column.Type.bool) | ||
)) | ||
.build(); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
143 changes: 143 additions & 0 deletions
143
src/main/java/org/avniproject/etl/repository/sync/MediaAnalysisTableRegenerateAction.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,143 @@ | ||
package org.avniproject.etl.repository.sync; | ||
|
||
import org.apache.log4j.Logger; | ||
import org.avniproject.etl.config.AmazonClientService; | ||
import org.avniproject.etl.domain.NullObject; | ||
import org.avniproject.etl.domain.OrgIdentityContextHolder; | ||
import org.avniproject.etl.domain.Organisation; | ||
import org.avniproject.etl.domain.metadata.TableMetadata; | ||
import org.avniproject.etl.dto.MediaAnalysisVO; | ||
import org.avniproject.etl.dto.MediaDTO; | ||
import org.avniproject.etl.repository.MediaTableRepository; | ||
import org.avniproject.etl.repository.sql.SqlFile; | ||
import org.avniproject.etl.service.MediaAnalysisService; | ||
import org.springframework.beans.factory.annotation.Autowired; | ||
import org.springframework.jdbc.core.BatchPreparedStatementSetter; | ||
import org.springframework.jdbc.core.JdbcTemplate; | ||
import org.springframework.jdbc.core.ParameterizedPreparedStatementSetter; | ||
import org.springframework.stereotype.Repository; | ||
import org.stringtemplate.v4.ST; | ||
|
||
import java.sql.PreparedStatement; | ||
import java.sql.SQLException; | ||
import java.util.List; | ||
import java.util.Map; | ||
import java.util.function.Predicate; | ||
import java.util.regex.Pattern; | ||
import java.util.stream.Collectors; | ||
|
||
import static org.avniproject.etl.repository.JdbcContextWrapper.runInOrgContext; | ||
|
||
@Repository | ||
public class MediaAnalysisTableRegenerateAction { | ||
public static final String THUMBNAILS_PATTERN = "thumbnails"; | ||
public static final String ADHOC_MOBILE_DB_BACKUP_PATTERN = "Adhoc|MobileDbBackup"; | ||
public static final String UUID_V4_PATTERN = "[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}"; | ||
|
||
private final AmazonClientService amazonClientService; | ||
private final MediaTableRepository mediaTableRepository; | ||
private final JdbcTemplate jdbcTemplate; | ||
private static final String generateMediaAnalysisTableTemplate = SqlFile.readSqlFile("mediaAnalysis.sql.st"); | ||
|
||
private static final Logger log = Logger.getLogger(MediaAnalysisService.class); | ||
|
||
@Autowired | ||
public MediaAnalysisTableRegenerateAction(AmazonClientService amazonClientService, MediaTableRepository mediaTableRepository, JdbcTemplate jdbcTemplate) { | ||
this.amazonClientService = amazonClientService; | ||
this.mediaTableRepository = mediaTableRepository; | ||
this.jdbcTemplate = jdbcTemplate; | ||
} | ||
|
||
public void process(Organisation organisation, TableMetadata tableMetadata) { | ||
|
||
List<String> listOfAllMediaUrls = fetchValidMediaUrlsFromStorage(organisation); | ||
Map<Boolean, List<String>> partitionResults = partitionListBasedOnThumbnailsPattern(listOfAllMediaUrls); | ||
List<String> listOfAllThumbnailsUrls = partitionResults.get(Boolean.TRUE); | ||
List<String> listOfAllMediaUrlsExcludingThumbnails = partitionResults.get(Boolean.FALSE); | ||
log.info(String.format("listOfAllMediaUrls %d listOfAllMediaUrlsExcludingThumbnails %d listOfAllThumbnailsUrls %d", listOfAllMediaUrls.size(), listOfAllMediaUrlsExcludingThumbnails.size(), listOfAllThumbnailsUrls.size())); | ||
|
||
//TODO Log entries that get filtered out for dev purposes | ||
// TODO: 17/07/24 Fetch list of MediaUrls from media table | ||
// SELECT REPLACE(image_url, 'https://s3.ap-south-1.amazonaws.com/prod-user-media/goonj/', '') as image_url_in_media_table | ||
// FROM goonj.media | ||
// ORDER BY REPLACE(image_url, 'https://s3.ap-south-1.amazonaws.com/prod-user-media/goonj/', ''); | ||
// TODO: 17/07/24 Invoke Analysis method to perform various metrics computations for each entry in media table of the org | ||
//TODO Fix test issues causing build break | ||
List<MediaDTO> listOfMediaEntities = mediaTableRepository.getAllMedia(); | ||
String orgMediaDirectory = organisation.getOrganisationIdentity().getMediaDirectory(); | ||
// TODO: 22/07/24 do | ||
List<MediaAnalysisVO> mediaAnalysisVOS = listOfMediaEntities.stream().map(mediaDTO -> { | ||
boolean isValidUrl = mediaDTO.url().contains(orgMediaDirectory); | ||
String urlToSearch = mediaDTO.url().substring(mediaDTO.url().indexOf(orgMediaDirectory)); | ||
boolean isPresentInStorage = listOfAllMediaUrlsExcludingThumbnails.contains(urlToSearch); | ||
// TODO: 22/07/24 init booleans correctly | ||
return new MediaAnalysisVO(mediaDTO.uuid(), mediaDTO.url(), isValidUrl, isPresentInStorage, false); | ||
}).collect(Collectors.toList()); | ||
log.info(String.format("listOfMediaEntities %d mediaAnalysisVOS %d ", listOfMediaEntities.size(), mediaAnalysisVOS.size())); | ||
|
||
truncateMediaAnalysisTable(tableMetadata); | ||
generateMediaAnalysisTableEntries(tableMetadata, mediaAnalysisVOS); | ||
} | ||
|
||
private void truncateMediaAnalysisTable(TableMetadata tableMetadata) { | ||
String schema = OrgIdentityContextHolder.getDbSchema(); | ||
String mediaAnalysisTable = tableMetadata.getName(); | ||
String sql = new ST("delete from <schemaName>.<mediaAnalysisTable> where uuid is not null;") | ||
.add("schemaName", wrapInQuotes(schema)) | ||
.add("mediaAnalysisTable", wrapInQuotes(mediaAnalysisTable)) | ||
.render(); | ||
runInOrgContext(() -> { | ||
jdbcTemplate.execute(sql); | ||
return NullObject.instance(); | ||
}, jdbcTemplate); | ||
} | ||
|
||
private List<String> fetchValidMediaUrlsFromStorage(Organisation organisation) { | ||
List<String> listOfAllMediaUrls = amazonClientService.listObjectsInBucket(getMediaDirectory(organisation)); | ||
filterOutNonMediaUrls(listOfAllMediaUrls); | ||
return listOfAllMediaUrls; | ||
} | ||
|
||
private void filterOutNonMediaUrls(List<String> listOfAllMediaUrls) { | ||
Predicate<String> fastSyncAndAdhocDumpPatternPredicate = Pattern.compile(ADHOC_MOBILE_DB_BACKUP_PATTERN, Pattern.CASE_INSENSITIVE).asPredicate(); | ||
Predicate<String> notUUIDPatternPredicate = Pattern.compile(UUID_V4_PATTERN).asPredicate().negate(); | ||
listOfAllMediaUrls.removeIf(fastSyncAndAdhocDumpPatternPredicate.or(notUUIDPatternPredicate)); | ||
} | ||
|
||
private Map<Boolean, List<String>> partitionListBasedOnThumbnailsPattern(List<String> listOfAllMediaUrls) { | ||
Predicate<String> thumbnailsPatternPredicate = Pattern.compile(THUMBNAILS_PATTERN, Pattern.CASE_INSENSITIVE).asPredicate(); | ||
Map<Boolean, List<String>> partitionResults= listOfAllMediaUrls.stream().collect(Collectors.partitioningBy(thumbnailsPatternPredicate)); | ||
return partitionResults; | ||
} | ||
|
||
private String getMediaDirectory(Organisation organisation) { | ||
return organisation.getOrganisationIdentity().getMediaDirectory(); | ||
} | ||
|
||
private void generateMediaAnalysisTableEntries(TableMetadata tableMetadata, List<MediaAnalysisVO> mediaAnalysisVOS) { | ||
String schema = OrgIdentityContextHolder.getDbSchema(); | ||
String mediaAnalysisTable = tableMetadata.getName(); | ||
String sql = new ST(generateMediaAnalysisTableTemplate) | ||
.add("schemaName", wrapInQuotes(schema)) | ||
.add("mediaAnalysisTable", wrapInQuotes(mediaAnalysisTable)) | ||
.render(); | ||
runInOrgContext(() -> { | ||
jdbcTemplate.batchUpdate(sql, | ||
mediaAnalysisVOS, | ||
100, | ||
(ps, mediaAnalysisVO) -> { | ||
ps.setString(1, mediaAnalysisVO.getUuid()); | ||
ps.setString(2, mediaAnalysisVO.getImage_url()); | ||
ps.setBoolean(3, mediaAnalysisVO.isValidUrl()); | ||
ps.setBoolean(4, mediaAnalysisVO.isPresentInStorage()); | ||
ps.setBoolean(5, mediaAnalysisVO.isThumbnailGenerated()); | ||
}); | ||
return NullObject.instance(); | ||
}, jdbcTemplate); | ||
} | ||
|
||
private String wrapInQuotes(String parameter) { | ||
return parameter == null ? "null" : "\"" + parameter + "\""; | ||
} | ||
|
||
} |
Oops, something went wrong.