Skip to content

Commit

Permalink
#102 | Absorb code review comments, set trigger to once a day for Med…
Browse files Browse the repository at this point in the history
…iaAnalysis job
  • Loading branch information
himeshr committed Jul 24, 2024
1 parent 6f00eb5 commit e07cbf4
Show file tree
Hide file tree
Showing 5 changed files with 43 additions and 32 deletions.
4 changes: 4 additions & 0 deletions src/main/java/org/avniproject/etl/dto/MediaCompactDTO.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
package org.avniproject.etl.dto;

public record MediaCompactDTO (String compositeUUID, String entityUUID, String url){
}
Original file line number Diff line number Diff line change
Expand Up @@ -88,9 +88,9 @@ public List<ImageData> getImageData(MediaSearchRequest mediaSearchRequest, Page
return searchInternal(mediaSearchRequest, page, (rs, rowNum) -> mediaTableRepositoryService.setImageData(rs));
}

public List<MediaDTO> getAllMedia() {
public List<MediaCompactDTO> getAllMedia() {
Query query = new MediaSearchQueryBuilder().allWithoutAnyLimitOrOffset().build();
return runInSchemaUserContext(() -> new NamedParameterJdbcTemplate(jdbcTemplate)
.query(query.sql(), query.parameters(), (rs, rowNum) -> mediaTableRepositoryService.setMediaDto(rs, false)), jdbcTemplate);
.query(query.sql(), query.parameters(), (rs, rowNum) -> mediaTableRepositoryService.setMediaCompactDTO(rs)), jdbcTemplate);
}
}
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
package org.avniproject.etl.repository.service;

import org.apache.log4j.Logger;
import org.avniproject.etl.config.AmazonClientService;
import org.avniproject.etl.config.S3FileDoesNotExist;
import org.avniproject.etl.dto.ImageData;
import org.avniproject.etl.dto.MediaCompactDTO;
import org.avniproject.etl.dto.MediaDTO;
import org.avniproject.etl.util.Utils;
import org.springframework.stereotype.Service;
Expand All @@ -23,29 +23,39 @@ public MediaTableRepositoryService(AmazonClientService amazonClientService) {
this.amazonClientService = amazonClientService;
}

public MediaDTO setMediaDto(ResultSet rs) {
return this.setMediaDto(rs, true);
public MediaCompactDTO setMediaCompactDTO(ResultSet rs) {
try {
String imageUrl = rs.getString("image_url");
String uuid = rs.getString("uuid");
String imageUUID = getImageUUID(imageUrl);
String compositeUUID = uuid + "#" + imageUUID;
return new MediaCompactDTO(
compositeUUID,
uuid,
imageUrl
);
} catch (SQLException e) {
throw new RuntimeException(e);
}
}

public MediaDTO setMediaDto(ResultSet rs, boolean generateSignedUrls) {
public MediaDTO setMediaDto(ResultSet rs) {
try {
String imageUrl = rs.getString("image_url");
String thumbnailUrl = Utils.getThumbnailUrl(imageUrl);

URL signedImageUrl = null, signedThumbnailUrl = null;

if(generateSignedUrls) {
try {
signedImageUrl = amazonClientService.generateMediaDownloadUrl(imageUrl);
try {
signedImageUrl = amazonClientService.generateMediaDownloadUrl(imageUrl);
try {
signedThumbnailUrl = amazonClientService.generateMediaDownloadUrl(thumbnailUrl);
} catch (S3FileDoesNotExist ignored) {
}
} catch (IllegalArgumentException illegalArgumentException) {
//Ignore and move on. Image will be null
} catch (S3FileDoesNotExist e) {
throw new RuntimeException(e);
signedThumbnailUrl = amazonClientService.generateMediaDownloadUrl(thumbnailUrl);
} catch (S3FileDoesNotExist ignored) {
}
} catch (IllegalArgumentException illegalArgumentException) {
//Ignore and move on. Image will be null
} catch (S3FileDoesNotExist e) {
throw new RuntimeException(e);
}

String uuid = rs.getString("uuid");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import org.avniproject.etl.domain.Organisation;
import org.avniproject.etl.domain.metadata.TableMetadata;
import org.avniproject.etl.dto.MediaAnalysisVO;
import org.avniproject.etl.dto.MediaDTO;
import org.avniproject.etl.dto.MediaCompactDTO;
import org.avniproject.etl.repository.MediaTableRepository;
import org.avniproject.etl.repository.sql.SqlFile;
import org.avniproject.etl.service.MediaAnalysisService;
Expand Down Expand Up @@ -35,7 +35,6 @@ public class MediaAnalysisTableRegenerateAction {
public static final String SCHEMA_NAME = "schemaName";
public static final String MEDIA_ANALYSIS_TABLE = "mediaAnalysisTable";
public static final int INT_CONSTANT_ZERO = 0;
public static final String COMPOSITE_UUID_SEPARATOR = "#";
public static final int INT_CONSTANT_ONE = 1;

private final AmazonClientService amazonClientService;
Expand All @@ -55,28 +54,25 @@ public MediaAnalysisTableRegenerateAction(AmazonClientService amazonClientServic
public void process(Organisation organisation, TableMetadata tableMetadata) {

List<String> listOfAllMediaUrls = fetchValidMediaUrlsFromStorage(organisation);
Map<Boolean, List<String>> partitionResults = partitionListBasedOnThumbnailsPattern(listOfAllMediaUrls);
List<String> listOfAllThumbnailsUrls = partitionResults.get(Boolean.TRUE);
List<String> listOfAllMediaUrlsExcludingThumbnails = partitionResults.get(Boolean.FALSE);
log.info(String.format("listOfAllMediaUrls %d listOfAllMediaUrlsExcludingThumbnails %d listOfAllThumbnailsUrls %d", listOfAllMediaUrls.size(), listOfAllMediaUrlsExcludingThumbnails.size(), listOfAllThumbnailsUrls.size()));
Map<Boolean, Map<String, String>> partitionResults = partitionListBasedOnThumbnailsPattern(listOfAllMediaUrls);
Map<String, String> thumbnailUrlsMap = partitionResults.get(Boolean.TRUE);
Map<String, String> mediaUrlsMap = partitionResults.get(Boolean.FALSE);

String orgMediaDirectory = organisation.getOrganisationIdentity().getMediaDirectory();
List<MediaDTO> listOfMediaDTOEntities = mediaTableRepository.getAllMedia();
Map<String, String> mediaUrlsMap = listOfAllMediaUrlsExcludingThumbnails.stream().collect(Collectors.toMap(mediaUrl -> mediaUrl.substring(mediaUrl.lastIndexOf(STRING_CONST_SEPARATOR)), Function.identity()));
Map<String, String> thumbnailUrlsMap = listOfAllThumbnailsUrls.stream().collect(Collectors.toMap(thumbnailUrl -> thumbnailUrl.substring(thumbnailUrl.lastIndexOf(STRING_CONST_SEPARATOR)), Function.identity()));
List<MediaCompactDTO> listOfMediaDTOEntities = mediaTableRepository.getAllMedia();

Map<String, List<MediaDTO>> groupedMediaEntityMap = listOfMediaDTOEntities.stream()
.collect(Collectors.groupingBy(mediaDTO -> mediaDTO.uuid())); //mediaDTO.uuid() returns a composite uuid of entity.uuid#media.uuid
Map<String, List<MediaCompactDTO>> groupedMediaEntityMap = listOfMediaDTOEntities.stream()
.collect(Collectors.groupingBy(mediaDTO -> mediaDTO.compositeUUID()));
List<MediaAnalysisVO> mediaAnalysisVOS = groupedMediaEntityMap.entrySet().stream().map(groupedMediaEntityMapEntry -> {
MediaDTO mediaDTO = groupedMediaEntityMapEntry.getValue().get(INT_CONSTANT_ZERO);
MediaCompactDTO mediaDTO = groupedMediaEntityMapEntry.getValue().get(INT_CONSTANT_ZERO);
boolean isPresentInStorage = false, isThumbnailGenerated = false;
boolean isValidUrl = mediaDTO.url().contains(orgMediaDirectory);
if (isValidUrl) {
String urlToSearch = mediaDTO.url().substring(mediaDTO.url().lastIndexOf(STRING_CONST_SEPARATOR));
isPresentInStorage = mediaUrlsMap.containsKey(urlToSearch);
isThumbnailGenerated = thumbnailUrlsMap.containsKey(urlToSearch);
}
return new MediaAnalysisVO(mediaDTO.uuid().substring(INT_CONSTANT_ZERO,mediaDTO.uuid().indexOf(COMPOSITE_UUID_SEPARATOR)),
return new MediaAnalysisVO(mediaDTO.entityUUID(),
mediaDTO.url(), isValidUrl, isPresentInStorage, isThumbnailGenerated,
groupedMediaEntityMapEntry.getValue().size() > INT_CONSTANT_ONE);
}).collect(Collectors.toList());
Expand Down Expand Up @@ -111,9 +107,10 @@ private void filterOutNonMediaUrls(List<String> listOfAllMediaUrls) {
listOfAllMediaUrls.removeIf(fastSyncAndAdhocDumpPatternPredicate.or(notUUIDPatternPredicate));
}

private Map<Boolean, List<String>> partitionListBasedOnThumbnailsPattern(List<String> listOfAllMediaUrls) {
private Map<Boolean, Map<String, String>> partitionListBasedOnThumbnailsPattern(List<String> listOfAllMediaUrls) {
Predicate<String> thumbnailsPatternPredicate = Pattern.compile(THUMBNAILS_PATTERN, Pattern.CASE_INSENSITIVE).asPredicate();
Map<Boolean, List<String>> partitionResults= listOfAllMediaUrls.stream().collect(Collectors.partitioningBy(thumbnailsPatternPredicate));
Map<Boolean, Map<String, String>> partitionResults= listOfAllMediaUrls.stream().collect(Collectors.partitioningBy(thumbnailsPatternPredicate,
Collectors.toMap(url -> url.substring(url.lastIndexOf(STRING_CONST_SEPARATOR)), Function.identity())));
return partitionResults;
}

Expand Down
2 changes: 1 addition & 1 deletion src/main/resources/main-application.properties
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ spring.quartz.properties.org.quartz.jobStore.misfireThreshold = ${AVNI_SCHEDULED

# Internal Scheduler config
avni.scheduledJob.sync.repeatIntervalInMinutes=${AVNI_SCHEDULED_JOB_REPEAT_INTERVAL_IN_MINUTES:90}
avni.scheduledJob.mediaAnalysis.repeatIntervalInMinutes=${AVNI_MEDIA_ANALYSIS_JOB_REPEAT_INTERVAL_IN_MINUTES:2}
avni.scheduledJob.mediaAnalysis.repeatIntervalInMinutes=${AVNI_MEDIA_ANALYSIS_JOB_REPEAT_INTERVAL_IN_MINUTES:1440}

#S3 Parameters
avni.bucket.name=${OPENCHS_BUCKET_NAME:dummy}
Expand Down

0 comments on commit e07cbf4

Please sign in to comment.