Skip to content

Commit

Permalink
#102 | Partition MediaURLs, into thumbnails and media after validatin…
Browse files Browse the repository at this point in the history
…g for UUID and excluding Mobile and Adhoc entries
  • Loading branch information
himeshr committed Jul 19, 2024
1 parent fbb927c commit a7bc035
Showing 1 changed file with 31 additions and 12 deletions.
43 changes: 31 additions & 12 deletions src/main/java/org/avniproject/etl/service/MediaAnalysisService.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,20 @@
import org.avniproject.etl.domain.Organisation;
import org.avniproject.etl.domain.OrganisationIdentity;
import org.avniproject.etl.repository.OrganisationRepository;
import org.glassfish.jaxb.core.v2.TODO;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;

import javax.swing.text.html.HTML;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.function.Predicate;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

@Service
public class MediaAnalysisService {
public static final String THUMBNAILS_PATTERN = "thumbnails";
public static final String ADHOC_MOBILE_DB_BACKUP_PATTERN = "Adhoc|MobileDbBackup";
public static final String UUID_V4_PATTERN = "[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}";
private final OrganisationRepository organisationRepository;
private final OrganisationFactory organisationFactory;
private final SchemaMigrationService schemaMigrationService;
Expand Down Expand Up @@ -53,24 +57,39 @@ public void runFor(OrganisationIdentity organisationIdentity) {
log.info(String.format("Running Media Analysis for %s", organisationIdentity.toString()));
OrgIdentityContextHolder.setContext(organisationIdentity, etlServiceConfig);
Organisation organisation = organisationFactory.create(organisationIdentity);
ArrayList<String> listOfAllMediaUrls = amazonClientService.listObjectsInBucket(getMediaDirectory(organisation));
//TODO Fix test issues causing build break
//TODO Make use of listOfAllMediaUrls to come up with required subset of URLs, like thumbnails, media after validating UUID and excluding Mobile and Adhoc entries

List<String> listOfAllMediaUrls = fetchValidMediaUrlsFromStorage(organisation);
Map<Boolean, List<String>> partitionResults = partitionListBasedOnThumbnailsPattern(listOfAllMediaUrls);
List<String> listOfAllThumbnailsUrls = partitionResults.get(Boolean.TRUE);
List<String> listOfAllMediaUrlsExcludingThumbnails = partitionResults.get(Boolean.FALSE);
log.info(String.format("listOfAllMediaUrls %d listOfAllMediaUrlsExcludingThumbnails %d listOfAllThumbnailsUrls %d", listOfAllMediaUrls.size(), listOfAllMediaUrlsExcludingThumbnails.size(), listOfAllThumbnailsUrls.size()));

//TODO Log entries that get filtered out for dev purposes
ArrayList<String> listOfAllMediaUrlsExcludingThumbnails = new ArrayList<>();
ArrayList<String> listOfAllThumbnailsUrls = new ArrayList<>();
// TODO: 17/07/24 Fetch list of MediaUrls from media table
// SELECT REPLACE(image_url, 'https://s3.ap-south-1.amazonaws.com/prod-user-media/goonj/', '') as image_url_in_media_table
// FROM goonj.media
// ORDER BY REPLACE(image_url, 'https://s3.ap-south-1.amazonaws.com/prod-user-media/goonj/', '');
// TODO: 17/07/24 Invoke Analysis method to perform various metrics computations for each entry in media table of the org
log.info(String.format("listOfAllMediaUrls %d listOfAllMediaUrlsExcludingThumbnails %d listOfAllThumbnailsUrls %d", listOfAllMediaUrls.size(), listOfAllMediaUrlsExcludingThumbnails.size(), listOfAllThumbnailsUrls.size()));
//TODO Fix test issues causing build break
log.info(String.format("Completed Media Analysis for schema %s with dbUser %s and schemaUser %s", organisationIdentity.getSchemaName(), organisationIdentity.getDbUser(), organisationIdentity.getSchemaUser()));
OrgIdentityContextHolder.setContext(organisationIdentity, etlServiceConfig);
}

private String getThumbnailsDirectory(Organisation organisation) {
return getMediaDirectory(organisation) + "/thumbnails";
private List<String> fetchValidMediaUrlsFromStorage(Organisation organisation) {
List<String> listOfAllMediaUrls = amazonClientService.listObjectsInBucket(getMediaDirectory(organisation));
filterOutNonMediaUrls(listOfAllMediaUrls);
return listOfAllMediaUrls;
}

private void filterOutNonMediaUrls(List<String> listOfAllMediaUrls) {
Predicate<String> fastSyncAndAdhocDumpPatternPredicate = Pattern.compile(ADHOC_MOBILE_DB_BACKUP_PATTERN, Pattern.CASE_INSENSITIVE).asPredicate();
Predicate<String> notUUIDPatternPredicate = Pattern.compile(UUID_V4_PATTERN).asPredicate().negate();
listOfAllMediaUrls.removeIf(fastSyncAndAdhocDumpPatternPredicate.or(notUUIDPatternPredicate));
}

private Map<Boolean, List<String>> partitionListBasedOnThumbnailsPattern(List<String> listOfAllMediaUrls) {
Predicate<String> thumbnailsPatternPredicate = Pattern.compile(THUMBNAILS_PATTERN, Pattern.CASE_INSENSITIVE).asPredicate();
Map<Boolean, List<String>> partitionResults= listOfAllMediaUrls.stream().collect(Collectors.partitioningBy(thumbnailsPatternPredicate));
return partitionResults;
}

private String getMediaDirectory(Organisation organisation) {
Expand Down

0 comments on commit a7bc035

Please sign in to comment.