From 6eba7cb956d90503b687d7d6e0a3fdfd36f97416 Mon Sep 17 00:00:00 2001 From: CodingPF Date: Sun, 9 Jun 2024 14:41:12 +0200 Subject: [PATCH] init POC --- MServer-Config.yaml | 11 +- .../mserver/crawler/artem/ArteMConstants.java | 15 ++ .../mserver/crawler/artem/ArteMCrawler.java | 62 ++++++++ .../crawler/artem/ArteMSreamDeserializer.java | 65 ++++++++ .../mserver/crawler/artem/ArteMStreamDto.java | 44 ++++++ .../crawler/artem/ArteMStreamTask.java | 148 ++++++++++++++++++ .../crawler/artem/ArteMVideoDeserializer.java | 74 +++++++++ .../mserver/crawler/artem/ArteMVideoDto.java | 119 ++++++++++++++ .../mserver/crawler/artem/ArteMVideoTask.java | 81 ++++++++++ 9 files changed, 614 insertions(+), 5 deletions(-) create mode 100644 src/main/java/de/mediathekview/mserver/crawler/artem/ArteMConstants.java create mode 100644 src/main/java/de/mediathekview/mserver/crawler/artem/ArteMCrawler.java create mode 100644 src/main/java/de/mediathekview/mserver/crawler/artem/ArteMSreamDeserializer.java create mode 100644 src/main/java/de/mediathekview/mserver/crawler/artem/ArteMStreamDto.java create mode 100644 src/main/java/de/mediathekview/mserver/crawler/artem/ArteMStreamTask.java create mode 100644 src/main/java/de/mediathekview/mserver/crawler/artem/ArteMVideoDeserializer.java create mode 100644 src/main/java/de/mediathekview/mserver/crawler/artem/ArteMVideoDto.java create mode 100644 src/main/java/de/mediathekview/mserver/crawler/artem/ArteMVideoTask.java diff --git a/MServer-Config.yaml b/MServer-Config.yaml index 5fc58be27..2f1af33d1 100644 --- a/MServer-Config.yaml +++ b/MServer-Config.yaml @@ -18,7 +18,7 @@ maximumRequestsPerSecond: 999.0 # If set only these Sender will be crawled all other will be ignored. senderIncluded: #- ARD - #- ARTE_DE + - ARTE_DE #- ARGE_FR #- ARTE_EN #- ARTE_PL @@ -32,7 +32,7 @@ senderIncluded: #- PHOENIX #- SRF #- SR - - ZDF + #- ZDF #SRF,SR,PHONIX,ORF,KIKA,DW,3SAT< @@ -159,9 +159,10 @@ senderConfigurations: ORF: maximumRequestsPerSecond: 10.0 ARTE_DE: - maximumUrlsPerTask: 1 - maximumDaysForSendungVerpasstSectionFuture: 0 - maximumRequestsPerSecond: 2.0 + maximumSubpages: 2 + #maximumUrlsPerTask: 1 + #maximumDaysForSendungVerpasstSectionFuture: 0 + #maximumRequestsPerSecond: 2.0 ARTE_FR: maximumDaysForSendungVerpasstSectionFuture: 0 # The maximum amount of URLs to be processed per task. diff --git a/src/main/java/de/mediathekview/mserver/crawler/artem/ArteMConstants.java b/src/main/java/de/mediathekview/mserver/crawler/artem/ArteMConstants.java new file mode 100644 index 000000000..9707e787c --- /dev/null +++ b/src/main/java/de/mediathekview/mserver/crawler/artem/ArteMConstants.java @@ -0,0 +1,15 @@ +package de.mediathekview.mserver.crawler.artem; + +public final class ArteMConstants { + // + public static final int PAGE_LIMIT = 100; + // + public static final String HOST = "https://api.arte.tv"; + // + public static final String ALL_VIDEOS = HOST + "/api/opa/v3/videos?language=de&sort=-lastModified&limit=" + PAGE_LIMIT; + // + public static final String AUTH = "Bearer Nzc1Yjc1ZjJkYjk1NWFhN2I2MWEwMmRlMzAzNjI5NmU3NWU3ODg4ODJjOWMxNTMxYzEzZGRjYjg2ZGE4MmIwOA"; + + private ArteMConstants() {} + // +} diff --git a/src/main/java/de/mediathekview/mserver/crawler/artem/ArteMCrawler.java b/src/main/java/de/mediathekview/mserver/crawler/artem/ArteMCrawler.java new file mode 100644 index 000000000..e43bbf0db --- /dev/null +++ b/src/main/java/de/mediathekview/mserver/crawler/artem/ArteMCrawler.java @@ -0,0 +1,62 @@ +package de.mediathekview.mserver.crawler.artem; + +import de.mediathekview.mlib.daten.Film; +import de.mediathekview.mlib.daten.Sender; +import de.mediathekview.mlib.messages.listener.MessageListener; +import de.mediathekview.mserver.base.config.MServerConfigManager; +import de.mediathekview.mserver.base.messages.ServerMessages; +import de.mediathekview.mserver.crawler.basic.AbstractCrawler; +import de.mediathekview.mserver.crawler.basic.CrawlerUrlDTO; +import de.mediathekview.mserver.crawler.basic.TopicUrlDTO; +import de.mediathekview.mserver.crawler.kika.json.KikaApiFilmDto; +import de.mediathekview.mserver.crawler.kika.tasks.*; +import de.mediathekview.mserver.progress.listeners.SenderProgressListener; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import java.util.Collection; +import java.util.Queue; +import java.util.Set; +import java.util.concurrent.ConcurrentLinkedQueue; +import java.util.concurrent.ForkJoinPool; +import java.util.concurrent.RecursiveTask; + +public class ArteMCrawler extends AbstractCrawler { + private static final Logger LOG = LogManager.getLogger(ArteMCrawler.class); + + public ArteMCrawler( + final ForkJoinPool aForkJoinPool, + final Collection aMessageListeners, + final Collection aProgressListeners, + final MServerConfigManager aRootConfig) { + super(aForkJoinPool, aMessageListeners, aProgressListeners, aRootConfig); + } + + @Override + public Sender getSender() { + return Sender.ARTE_DE; + } + + @Override + protected RecursiveTask> createCrawlerTask() { + + try { + // get all brands from json doc + final Queue root = new ConcurrentLinkedQueue<>(); + root.add(new CrawlerUrlDTO(ArteMConstants.ALL_VIDEOS)); + final ArteMVideoTask arteMVideoTask = new ArteMVideoTask(this, root, ArteMConstants.AUTH, 0); + final Queue videos = new ConcurrentLinkedQueue<>(); + videos.addAll(arteMVideoTask.fork().join()); + // + printMessage(ServerMessages.DEBUG_ALL_SENDUNG_FOLGEN_COUNT, getSender().getName(), videos.size()); + getAndSetMaxCount(videos.size()); + // + return new ArteMStreamTask(this, videos, ArteMConstants.AUTH, 0); + } catch (final Exception ex) { + LOG.fatal("Exception in ARTE_DE crawler.", ex); + } + + return null; + } + +} diff --git a/src/main/java/de/mediathekview/mserver/crawler/artem/ArteMSreamDeserializer.java b/src/main/java/de/mediathekview/mserver/crawler/artem/ArteMSreamDeserializer.java new file mode 100644 index 000000000..26d8bf5b2 --- /dev/null +++ b/src/main/java/de/mediathekview/mserver/crawler/artem/ArteMSreamDeserializer.java @@ -0,0 +1,65 @@ +package de.mediathekview.mserver.crawler.artem; + +import com.google.gson.*; + +import de.mediathekview.mserver.base.utils.JsonUtils; +import de.mediathekview.mserver.crawler.basic.PagedElementListDTO; +import java.lang.reflect.Type; +import java.util.HashMap; +import java.util.Map; +import java.util.Optional; + + +public class ArteMSreamDeserializer implements JsonDeserializer> { + private static final String NEXT_PAGE[] = {"meta","videoStreams", "links", "next", "href"}; + private static final String ELEMENT_STREAMS = "videoStreams"; + private static final String ATTR_LANGUAGE = "language"; + private static final String ATTR_QUALITY = "quality"; + private static final String ATTR_MIMETYPE = "mimeType"; + private static final String ATTR_AUDIOCODE = "audioCode"; + private static final String ATTR_URL = "url"; + private static final String ELEMENT_SUBTITLES = "subtitles"; + private static final String ATTR_SUBTITLES_VERSION = "version"; + private static final String ATTR_SUBTITLES_FILENAME = "filename"; + + @Override + public PagedElementListDTO deserialize( + final JsonElement jsonElement, final Type typeOfT, final JsonDeserializationContext context) + throws JsonParseException { + // + PagedElementListDTO list = new PagedElementListDTO<>(); + // + list.setNextPage(JsonUtils.getElementValueAsString(jsonElement, NEXT_PAGE)); + // + Optional videos = JsonUtils.getElement(jsonElement, ELEMENT_STREAMS); + if (videos.isEmpty()) { + return list; + } + Optional subtitle = JsonUtils.getElement(jsonElement, ELEMENT_SUBTITLES); + Optional> subtitleStreams = Optional.empty(); + if (subtitle.isPresent()) { + Map subtitleEntries = new HashMap<>(); + for (JsonElement sub : subtitle.get().getAsJsonArray()) { + subtitleEntries.put( + JsonUtils.getElementValueAsString(sub, ATTR_SUBTITLES_VERSION).get(), + JsonUtils.getElementValueAsString(sub, ATTR_SUBTITLES_FILENAME).get() + ); + } + subtitleStreams = Optional.of(subtitleEntries); + } + + for (JsonElement stream : videos.get().getAsJsonArray()) { + list.addElement(new ArteMStreamDto( + JsonUtils.getElementValueAsString(stream, ATTR_LANGUAGE), + JsonUtils.getElementValueAsString(stream, ATTR_QUALITY), + JsonUtils.getElementValueAsString(stream, ATTR_MIMETYPE), + JsonUtils.getElementValueAsString(stream, ATTR_AUDIOCODE), + JsonUtils.getElementValueAsString(stream, ATTR_URL), + subtitleStreams)); + } + + return list; + } + + +} diff --git a/src/main/java/de/mediathekview/mserver/crawler/artem/ArteMStreamDto.java b/src/main/java/de/mediathekview/mserver/crawler/artem/ArteMStreamDto.java new file mode 100644 index 000000000..ec14f9ae0 --- /dev/null +++ b/src/main/java/de/mediathekview/mserver/crawler/artem/ArteMStreamDto.java @@ -0,0 +1,44 @@ +package de.mediathekview.mserver.crawler.artem; + +import java.util.Map; +import java.util.Optional; + +public class ArteMStreamDto { + Optional language; + Optional quality; + Optional mimeType; + Optional audioCode; + Optional url; + Optional> subtitles; + public ArteMStreamDto(Optional language, Optional quality, Optional mimeType, + Optional audioCode, Optional url, Optional> subtitles) { + super(); + this.language = language; + this.quality = quality; + this.mimeType = mimeType; + this.audioCode = audioCode; + this.url = url; + this.subtitles = subtitles; + } + public Optional getLanguage() { + return language; + } + public Optional getQuality() { + return quality; + } + public Optional getMimeType() { + return mimeType; + } + public Optional getAudioCode() { + return audioCode; + } + public Optional getUrl() { + return url; + } + public Optional> getSubtitles() { + return subtitles; + } + + + +} diff --git a/src/main/java/de/mediathekview/mserver/crawler/artem/ArteMStreamTask.java b/src/main/java/de/mediathekview/mserver/crawler/artem/ArteMStreamTask.java new file mode 100644 index 000000000..d11d25949 --- /dev/null +++ b/src/main/java/de/mediathekview/mserver/crawler/artem/ArteMStreamTask.java @@ -0,0 +1,148 @@ +package de.mediathekview.mserver.crawler.artem; + +import java.lang.reflect.Type; +import java.net.URI; +import java.net.URL; +import java.time.Duration; +import java.time.LocalDateTime; +import java.time.temporal.TemporalUnit; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Queue; +import java.util.Set; +import java.util.UUID; +import java.util.concurrent.ConcurrentLinkedQueue; +import java.util.concurrent.TimeUnit; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import com.google.gson.reflect.TypeToken; + +import de.mediathekview.mlib.daten.Film; +import de.mediathekview.mlib.daten.GeoLocations; +import de.mediathekview.mserver.crawler.basic.AbstractCrawler; +import de.mediathekview.mserver.crawler.basic.AbstractJsonRestTask; +import de.mediathekview.mserver.crawler.basic.AbstractRecursiveConverterTask; +import de.mediathekview.mserver.crawler.basic.CrawlerUrlDTO; +import de.mediathekview.mserver.crawler.basic.PagedElementListDTO; +import jakarta.ws.rs.core.Response; + +// extends AbstractRestTask +// return T Class from this task, desirialisation of class R , D , Reasearch in this url +public class ArteMStreamTask extends AbstractJsonRestTask, ArteMVideoDto> { + private static final long serialVersionUID = 1L; + private static final Logger LOG = LogManager.getLogger(ArteMStreamTask.class); + private int subPageIndex = 0; + + protected ArteMStreamTask(AbstractCrawler crawler, Queue urlToCrawlDTOs, String authKey, int subPageIndex) { + super(crawler, urlToCrawlDTOs, authKey); + this.subPageIndex = subPageIndex; + } + + @Override + protected Type getType() { + return new TypeToken>() {}.getType(); + } + + @Override + protected void handleHttpError(ArteMVideoDto dto, URI url, Response response) { + crawler.printErrorMessage(); + LOG.fatal( + "A HTTP error {} occurred when getting REST information from: \"{}\".", + response.getStatus(), + url); + } + + @Override + protected void postProcessing(PagedElementListDTO aResponseObj, ArteMVideoDto aDTO) { + final Optional> subpageCrawler; + final Optional nextPageLink = aResponseObj.getNextPage(); + if (nextPageLink.isPresent() && config.getMaximumSubpages() > subPageIndex) { + final Queue nextPageLinks = new ConcurrentLinkedQueue<>(); + ArteMVideoDto np = new ArteMVideoDto(aDTO); + np.setUrl(nextPageLink.get()); + nextPageLinks.add(np); + subpageCrawler = Optional.of(createNewOwnInstance(nextPageLinks)); + subpageCrawler.get().fork(); + } else { + subpageCrawler = Optional.empty(); + } + // Trailer + if (!aDTO.getPlatform().orElse("").equalsIgnoreCase("EXTRAIT")) { + Set streams = aResponseObj.getElements(); + taskResults.add(createFilm(aDTO, streams)); + } + // + + + } + + @Override + protected Object getParser(ArteMVideoDto aDTO) { + return new ArteMSreamDeserializer(); + } + + @Override + protected AbstractRecursiveConverterTask createNewOwnInstance( + Queue aElementsToProcess) { + return new ArteMStreamTask(crawler, aElementsToProcess, getAuthKey().orElse(""), subPageIndex+1); + } + + private Film createFilm(ArteMVideoDto filmData, Set streams) { + Film film = new Film( + UUID.randomUUID(), + crawler.getSender(), + filmData.getSubtitle().orElse(""), + filmData.getTitle().get(), + parseDate(filmData.getCreationDate().get()).get(), + parseDuration(filmData.getDurationSeconds().get()).get() + ); + film.setBeschreibung(filmData.getShortDescription().get()); + film.setWebsite(parseWebsite(filmData.getWebsite().get()).get()); + film.addGeolocation(parseGeo(filmData.getGeoblockingZone().get())); + streams.stream().findAny().get().getSubtitles(); + return film; + } + + private Set parseSubtitle(Optional> data) { + return null; + } + + private GeoLocations parseGeo(String data) { + switch(data) { + case "ALL": + return GeoLocations.GEO_NONE; + } + return GeoLocations.GEO_NONE; + } + + private Optional parseDate(String date) { + try { + return Optional.of(LocalDateTime.parse(date)); + } catch (Exception e) { + + } + return Optional.empty(); + } + + private Optional parseDuration(String data) { + try { + return Optional.of(Duration.ofSeconds(Long.parseLong(data))); + } catch (Exception e) { + + } + return Optional.empty(); + } + + private Optional parseWebsite(String data) { + try { + return Optional.of(new URL(data)); + } catch (Exception e) { + + } + return Optional.empty(); + } + +} diff --git a/src/main/java/de/mediathekview/mserver/crawler/artem/ArteMVideoDeserializer.java b/src/main/java/de/mediathekview/mserver/crawler/artem/ArteMVideoDeserializer.java new file mode 100644 index 000000000..9757e8625 --- /dev/null +++ b/src/main/java/de/mediathekview/mserver/crawler/artem/ArteMVideoDeserializer.java @@ -0,0 +1,74 @@ +package de.mediathekview.mserver.crawler.artem; + +import com.google.gson.*; + +import de.mediathekview.mserver.base.utils.JsonUtils; +import de.mediathekview.mserver.base.utils.UrlUtils; +import de.mediathekview.mserver.crawler.basic.PagedElementListDTO; +import de.mediathekview.mserver.crawler.basic.TopicUrlDTO; +import de.mediathekview.mserver.crawler.kika.KikaApiConstants; + +import java.lang.reflect.Type; +import java.util.List; +import java.util.Optional; + + +public class ArteMVideoDeserializer implements JsonDeserializer> { + private static final String NEXT_PAGE[] = {"meta","videos", "links", "next", "href"}; + private static final String ELEMENT_VIDEO = "videos"; + private static final String ATTR_ID = "id"; + private static final String ATTR_PROGRAMID = "programId"; + private static final String ATTR_LANGUAGE = "language"; + private static final String ATTR_KIND = "kind"; + private static final String ATTR_PLATFORM = "platform"; + private static final String ATTR_PLATFORMLABEL = "platformLabel"; + private static final String ATTR_TITLE = "title"; + private static final String ATTR_SUBTITLE = "subtitle"; + private static final String ATTR_ORIGINALTITLE = "originalTitle"; + private static final String ATTR_DURATIONSECONDS = "durationSeconds"; + private static final String ATTR_SHORTDESCRIPTION = "shortDescription"; + private static final String ATTR_HEADERTEXT = "headerText"; + private static final String ATTR_GEOBLOCKINGZONE = "geoblockingZone"; + private static final String ATTR_URL = "url"; + private static final String ATTR_VIDEOSTREAMS[] = {"links","videoStreams","href"}; + private static final String ATTR_CREATIONDATE = "creationDate"; + // + + @Override + public PagedElementListDTO deserialize( + final JsonElement jsonElement, final Type typeOfT, final JsonDeserializationContext context) + throws JsonParseException { + // + PagedElementListDTO list = new PagedElementListDTO<>(); + // + list.setNextPage(JsonUtils.getElementValueAsString(jsonElement, NEXT_PAGE)); + // + Optional videos = JsonUtils.getElement(jsonElement, ELEMENT_VIDEO); + if (videos.isEmpty()) { + return list; + } + for (JsonElement video : videos.get().getAsJsonArray()) { + list.addElement(new ArteMVideoDto( + JsonUtils.getElementValueAsString(video, ATTR_ID), + JsonUtils.getElementValueAsString(video, ATTR_PROGRAMID), + JsonUtils.getElementValueAsString(video, ATTR_LANGUAGE), + JsonUtils.getElementValueAsString(video, ATTR_KIND), + JsonUtils.getElementValueAsString(video, ATTR_PLATFORM), + JsonUtils.getElementValueAsString(video, ATTR_PLATFORMLABEL), + JsonUtils.getElementValueAsString(video, ATTR_TITLE), + JsonUtils.getElementValueAsString(video, ATTR_SUBTITLE), + JsonUtils.getElementValueAsString(video, ATTR_ORIGINALTITLE), + JsonUtils.getElementValueAsString(video, ATTR_DURATIONSECONDS), + JsonUtils.getElementValueAsString(video, ATTR_SHORTDESCRIPTION), + JsonUtils.getElementValueAsString(video, ATTR_HEADERTEXT), + JsonUtils.getElementValueAsString(video, ATTR_GEOBLOCKINGZONE), + JsonUtils.getElementValueAsString(video, ATTR_URL), + JsonUtils.getElementValueAsString(video, ATTR_VIDEOSTREAMS), + JsonUtils.getElementValueAsString(video, ATTR_CREATIONDATE))); + } + + return list; + } + + +} diff --git a/src/main/java/de/mediathekview/mserver/crawler/artem/ArteMVideoDto.java b/src/main/java/de/mediathekview/mserver/crawler/artem/ArteMVideoDto.java new file mode 100644 index 000000000..93eb18e05 --- /dev/null +++ b/src/main/java/de/mediathekview/mserver/crawler/artem/ArteMVideoDto.java @@ -0,0 +1,119 @@ +package de.mediathekview.mserver.crawler.artem; + +import java.util.Optional; + +import de.mediathekview.mserver.crawler.basic.CrawlerUrlDTO; + +public class ArteMVideoDto extends CrawlerUrlDTO{ + + Optional id; + Optional programId; + Optional language; + Optional kind; + Optional platform; + Optional platformLabel; + Optional title; + Optional subtitle; + Optional originalTitle; + Optional durationSeconds; + Optional shortDescription; + Optional headerText; + Optional geoblockingZone; + Optional website; + Optional videoStreams; + Optional creationDate; + + public ArteMVideoDto(ArteMVideoDto clone) { + super(clone.getVideoStreams().orElse("")); + this.id = clone.id; + this.programId = clone.programId; + this.language = clone.language; + this.kind = clone.kind; + this.platform = clone.platform; + this.platformLabel = clone.platformLabel; + this.title = clone.title; + this.subtitle = clone.subtitle; + this.originalTitle = clone.originalTitle; + this.durationSeconds = clone.durationSeconds; + this.shortDescription = clone.shortDescription; + this.headerText = clone.headerText; + this.geoblockingZone = clone.geoblockingZone; + this.website = clone.website; + this.videoStreams = clone.videoStreams; + this.creationDate = clone.creationDate; + } + + public ArteMVideoDto(Optional id, Optional programId, Optional language, + Optional kind, Optional platform, Optional platformLabel, Optional title, + Optional subtitle, Optional originalTitle, Optional durationSeconds, + Optional shortDescription, Optional headerText, Optional geoblockingZone, + Optional website, Optional videoStreams, Optional creationDate) { + super(videoStreams.orElse("")); + this.id = id; + this.programId = programId; + this.language = language; + this.kind = kind; + this.platform = platform; + this.platformLabel = platformLabel; + this.title = title; + this.subtitle = subtitle; + this.originalTitle = originalTitle; + this.durationSeconds = durationSeconds; + this.shortDescription = shortDescription; + this.headerText = headerText; + this.geoblockingZone = geoblockingZone; + this.website = website; + this.videoStreams = videoStreams; + this.creationDate = creationDate; + } + public Optional getId() { + return id; + } + public Optional getProgramId() { + return programId; + } + public Optional getLanguage() { + return language; + } + public Optional getKind() { + return kind; + } + public Optional getPlatform() { + return platform; + } + public Optional getPlatformLabel() { + return platformLabel; + } + public Optional getTitle() { + return title; + } + public Optional getSubtitle() { + return subtitle; + } + public Optional getOriginalTitle() { + return originalTitle; + } + public Optional getDurationSeconds() { + return durationSeconds; + } + public Optional getShortDescription() { + return shortDescription; + } + public Optional getHeaderText() { + return headerText; + } + public Optional getGeoblockingZone() { + return geoblockingZone; + } + public Optional getWebsite() { + return website; + } + public Optional getVideoStreams() { + return videoStreams; + } + public Optional getCreationDate() { + return creationDate; + } + + +} diff --git a/src/main/java/de/mediathekview/mserver/crawler/artem/ArteMVideoTask.java b/src/main/java/de/mediathekview/mserver/crawler/artem/ArteMVideoTask.java new file mode 100644 index 000000000..d71c7b6c9 --- /dev/null +++ b/src/main/java/de/mediathekview/mserver/crawler/artem/ArteMVideoTask.java @@ -0,0 +1,81 @@ +package de.mediathekview.mserver.crawler.artem; + +import java.lang.reflect.Type; +import java.net.URI; +import java.util.List; +import java.util.Optional; +import java.util.Queue; +import java.util.concurrent.ConcurrentLinkedQueue; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import com.google.gson.reflect.TypeToken; + +import de.mediathekview.mserver.crawler.basic.AbstractCrawler; +import de.mediathekview.mserver.crawler.basic.AbstractJsonRestTask; +import de.mediathekview.mserver.crawler.basic.AbstractRecursiveConverterTask; +import de.mediathekview.mserver.crawler.basic.CrawlerUrlDTO; +import de.mediathekview.mserver.crawler.basic.PagedElementListDTO; +import jakarta.ws.rs.core.Response; + +// extends AbstractRestTask +// return T Class from this task, desirialisation of class R , D , Reasearch in this url +public class ArteMVideoTask extends AbstractJsonRestTask, CrawlerUrlDTO> { + private static final long serialVersionUID = 1L; + private static final Logger LOG = LogManager.getLogger(ArteMVideoTask.class); + private int subPageIndex = 0; + + public ArteMVideoTask(AbstractCrawler crawler, Queue urlToCrawlDTOs, String authKey, int subPageIndex) { + super(crawler, urlToCrawlDTOs, authKey); + this.subPageIndex = subPageIndex; + } + + @Override + protected Object getParser(CrawlerUrlDTO aDTO) { + return new ArteMVideoDeserializer(); + } + + @Override + protected Type getType() { + return new TypeToken>() {}.getType(); + } + + @Override + protected void handleHttpError(CrawlerUrlDTO dto, URI url, Response response) { + crawler.printErrorMessage(); + LOG.fatal( + "A HTTP error {} occurred when getting REST information from: \"{}\".", + response.getStatus(), + url); + } + + @Override + protected void postProcessing(PagedElementListDTO aResponseObj, CrawlerUrlDTO aDTO) { + final Optional> subpageCrawler; + final Optional nextPageLink = aResponseObj.getNextPage(); + if (nextPageLink.isPresent() && config.getMaximumSubpages() > subPageIndex) { + final Queue nextPageLinks = new ConcurrentLinkedQueue<>(); + nextPageLinks.add(new CrawlerUrlDTO(nextPageLink.get())); + subpageCrawler = Optional.of(createNewOwnInstance(nextPageLinks)); + subpageCrawler.get().fork(); + } else { + subpageCrawler = Optional.empty(); + } + for (ArteMVideoDto aFilm : aResponseObj.getElements()) { + taskResults.add(aFilm); + } + // + subpageCrawler.ifPresent(nextPageCrawler -> taskResults.addAll(nextPageCrawler.join())); + + + } + + @Override + protected AbstractRecursiveConverterTask createNewOwnInstance( + Queue aElementsToProcess) { + return new ArteMVideoTask(crawler, aElementsToProcess, getAuthKey().orElse(""), subPageIndex+1); + } + + +}