Skip to content

Commit

Permalink
refactor: add 3rd pass for tmdb episode linking
Browse files Browse the repository at this point in the history
Added two new match rating for when the title kinda matches but not exactly, one for only title and one for when the data also matches. Then added a new second pass to check for exact title matches after the first pass that checks for date and exact title matches, and moved the existing second pass to a third pass instead. This should make the matching _even more_ accurate, but it's still not perfect, and also getting more compute intense for every new tweak we're doing. In the worst case scenario then we just went from O(n²) to O(n³), but the best case scenario still remains O(n¹).
  • Loading branch information
revam committed Nov 5, 2024
1 parent d9f2faa commit a2dabcd
Show file tree
Hide file tree
Showing 2 changed files with 78 additions and 14 deletions.
2 changes: 1 addition & 1 deletion Shoko.Commons
Submodule Shoko.Commons updated 1 files
+1 −1 Shoko.Models
90 changes: 77 additions & 13 deletions Shoko.Server/Providers/TMDB/TmdbLinkingService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -401,6 +401,7 @@ public IReadOnlyList<CrossRef_AniDB_TMDB_Episode> MatchAnidbToTmdbEpisodes(int a
var toAdd = new List<CrossRef_AniDB_TMDB_Episode>();
var crossReferences = new List<CrossRef_AniDB_TMDB_Episode>();
var secondPass = new List<SVR_AniDB_Episode>();
var thirdPass = new List<SVR_AniDB_Episode>();
var existing = _xrefAnidbTmdbEpisodes.GetAllByAnidbAnimeAndTmdbShowIDs(anidbAnimeId, tmdbShowId)
.GroupBy(xref => xref.AnidbEpisodeID)
.ToDictionary(grouped => grouped.Key, grouped => grouped.ToList());
Expand Down Expand Up @@ -505,11 +506,11 @@ public IReadOnlyList<CrossRef_AniDB_TMDB_Episode> MatchAnidbToTmdbEpisodes(int a

crossReferences.Add(crossRef);
toAdd.Add(crossRef);
_logger.LogTrace("Adding new link for episode. (AniDB ID: {AnidbEpisodeID}, TMDB ID: {TMDbEpisodeID}, Rating: {MatchRating}, Pass: 1/2)", episode.EpisodeID, crossRef.TmdbEpisodeID, crossRef.MatchRating);
_logger.LogTrace("Adding new link for episode. (AniDB ID: {AnidbEpisodeID}, TMDB ID: {TMDbEpisodeID}, Rating: {MatchRating}, Pass: 1/3)", episode.EpisodeID, crossRef.TmdbEpisodeID, crossRef.MatchRating);
}
else
{
_logger.LogTrace("Skipping new link for episode for first pass. (AniDB ID: {AnidbEpisodeID}, TMDB ID: {TMDbEpisodeID}, Rating: {MatchRating}, Pass: 1/2)", episode.EpisodeID, crossRef.TmdbEpisodeID, crossRef.MatchRating);
_logger.LogTrace("Skipping new link for episode for first pass. (AniDB ID: {AnidbEpisodeID}, TMDB ID: {TMDbEpisodeID}, Rating: {MatchRating}, Pass: 1/3)", episode.EpisodeID, crossRef.TmdbEpisodeID, crossRef.MatchRating);
secondPass.Add(episode);
}
}
Expand Down Expand Up @@ -545,22 +546,76 @@ public IReadOnlyList<CrossRef_AniDB_TMDB_Episode> MatchAnidbToTmdbEpisodes(int a
current = 0;
foreach (var episode in secondPass)
{
// Try find a match.
current++;
_logger.LogTrace("Linking episode {EpisodeType} {EpisodeNumber}. (AniDB ID: {EpisodeID}, Progress: {Current}/{Total}, Pass: 1/2)", episode.EpisodeTypeEnum, episode.EpisodeNumber, episode.EpisodeID, current, secondPass.Count);
// Else try find a match.
_logger.LogTrace("Linking episode {EpisodeType} {EpisodeNumber}. (AniDB ID: {EpisodeID}, Progress: {Current}/{Total}, Pass: 2/3)", episode.EpisodeTypeEnum, episode.EpisodeNumber, episode.EpisodeID, current, secondPass.Count);
var isSpecial = episode.AbstractEpisodeType is EpisodeType.Special || anime.AbstractAnimeType is not AnimeType.TVSeries and not AnimeType.Web;
var episodeList = isSpecial ? tmdbSpecialEpisodes : tmdbNormalEpisodes;
var crossRef = TryFindAnidbAndTmdbMatch(anime, episode, episodeList, isSpecial && !isOVA);
if (crossRef.MatchRating is MatchRating.TitleMatches)
{
var index = episodeList.FindIndex(episode => episode.TmdbEpisodeID == crossRef.TmdbEpisodeID);
if (index != -1)
episodeList.RemoveAt(index);

crossReferences.Add(crossRef);
toAdd.Add(crossRef);
_logger.LogTrace("Adding new link for episode. (AniDB ID: {AnidbEpisodeID}, TMDB ID: {TMDbEpisodeID}, Rating: {MatchRating}, Pass: 2/3)", episode.EpisodeID, crossRef.TmdbEpisodeID, crossRef.MatchRating);
}
else
{
_logger.LogTrace("Skipping new link for episode for first pass. (AniDB ID: {AnidbEpisodeID}, TMDB ID: {TMDbEpisodeID}, Rating: {MatchRating}, Pass: 2/3)", episode.EpisodeID, crossRef.TmdbEpisodeID, crossRef.MatchRating);
thirdPass.Add(episode);
}
}
}

// Run a third pass on the episodes that weren't OV, DT or T links in the first and second pass.
if (thirdPass.Count > 0)
{
// Filter the new links by the currently in use seasons from the existing (and/or new) OV/DT links.
var currentSessions = crossReferences
.Select(xref => xref.TmdbEpisodeID is not 0 && tmdbEpisodeDict.TryGetValue(xref.TmdbEpisodeID, out var tmdbEpisode) ? tmdbEpisode.SeasonNumber : -1)
.Except([-1])
.Append(0)
.ToHashSet();
// We always include season 0, so check if we have more than one session.
if (currentSessions.Count > 1)
{
_logger.LogTrace("Filtering new links by current sessions. (Current Sessions: {CurrentSessions})", string.Join(", ", currentSessions));
tmdbEpisodes = (isOVA ? tmdbEpisodes : tmdbNormalEpisodes.Concat(tmdbSpecialEpisodes))
.Where(episode => currentSessions.Contains(episode.SeasonNumber))
.ToList();
tmdbNormalEpisodes = isOVA ? tmdbEpisodes : tmdbEpisodes
.Where(episode => episode.SeasonNumber != 0)
.OrderBy(episode => episode.SeasonNumber)
.ThenBy(episode => episode.EpisodeNumber)
.ToList();
tmdbSpecialEpisodes = isOVA ? tmdbEpisodes : tmdbEpisodes
.Where(episode => episode.SeasonNumber == 0)
.OrderBy(episode => episode.EpisodeNumber)
.ToList();
}

current = 0;
foreach (var episode in thirdPass)
{
// Try find a match.
current++;
_logger.LogTrace("Linking episode {EpisodeType} {EpisodeNumber}. (AniDB ID: {EpisodeID}, Progress: {Current}/{Total}, Pass: 3/3)", episode.EpisodeTypeEnum, episode.EpisodeNumber, episode.EpisodeID, current, secondPass.Count);
var isSpecial = episode.AbstractEpisodeType is EpisodeType.Special || anime.AbstractAnimeType is not AnimeType.TVSeries and not AnimeType.Web;
var episodeList = isSpecial ? tmdbSpecialEpisodes : tmdbNormalEpisodes;
var crossRef = TryFindAnidbAndTmdbMatch(anime, episode, episodeList, isSpecial && !isOVA);
if (crossRef.TmdbEpisodeID != 0)
{
_logger.LogTrace("Adding new link for episode. (AniDB ID: {AnidbEpisodeID}, TMDB ID: {TMDbEpisodeID}, Rating: {MatchRating}, Pass: 2/2)", episode.EpisodeID, crossRef.TmdbEpisodeID, crossRef.MatchRating);
_logger.LogTrace("Adding new link for episode. (AniDB ID: {AnidbEpisodeID}, TMDB ID: {TMDbEpisodeID}, Rating: {MatchRating}, Pass: 3/3)", episode.EpisodeID, crossRef.TmdbEpisodeID, crossRef.MatchRating);
var index = episodeList.FindIndex(episode => episode.TmdbEpisodeID == crossRef.TmdbEpisodeID);
if (index != -1)
episodeList.RemoveAt(index);
}
else
{
_logger.LogTrace("No match found for episode. (AniDB ID: {AnidbEpisodeID}, Pass: 2/2)", episode.EpisodeID);
_logger.LogTrace("No match found for episode. (AniDB ID: {AnidbEpisodeID}, Pass: 3/3)", episode.EpisodeID);
}

crossReferences.Add(crossRef);
Expand Down Expand Up @@ -644,20 +699,29 @@ private CrossRef_AniDB_TMDB_Episode TryFindAnidbAndTmdbMatch(SVR_AniDB_Anime ani
.OrderBy(result => result)
.ToList() : [];

// Exact or almost exact match first.
if (titleSearchResults.Count > 0 && titleSearchResults[0] is { } firstMatch && ((firstMatch.ExactMatch && firstMatch.LengthDifference == 0) || (firstMatch.Distance < 0.2D && firstMatch.LengthDifference < 6)))
// Exact match first.
if (titleSearchResults.Count > 0 && titleSearchResults[0] is { } exactTitleMatch && exactTitleMatch.ExactMatch && exactTitleMatch.LengthDifference < 3)
{
var tmdbEpisode = exactTitleMatch.Result;
var dateMatches = airdateProbability.Any(result => result.episode == tmdbEpisode);
var rating = dateMatches ? MatchRating.DateAndTitleMatches : MatchRating.TitleMatches;
return new(anidbEpisode.EpisodeID, anidbEpisode.AnimeID, tmdbEpisode.TmdbEpisodeID, tmdbEpisode.TmdbShowID, rating);
}

// Almost exact match second.
if (titleSearchResults.Count > 0 && titleSearchResults[0] is { } kindaTitleMatch && kindaTitleMatch.Distance < 0.2D && kindaTitleMatch.LengthDifference < 6)
{
var tmdbEpisode = titleSearchResults.FirstOrDefault(r => airdateProbability.Any(result => result.episode == r.Result))?.Result;
var rating = tmdbEpisode is null ? MatchRating.TitleMatches : MatchRating.DateAndTitleMatches;
tmdbEpisode ??= titleSearchResults[0]!.Result;
var tmdbEpisode = kindaTitleMatch.Result;
var dateMatches = airdateProbability.Any(result => result.episode == tmdbEpisode);
var rating = dateMatches ? MatchRating.DateAndTitleKindaMatches : MatchRating.TitleKindaMatches;
return new(anidbEpisode.EpisodeID, anidbEpisode.AnimeID, tmdbEpisode.TmdbEpisodeID, tmdbEpisode.TmdbShowID, rating);
}

// Followed by checking the air date.
if (airdateProbability.Count > 0)
{
var tmdbEpisode = airdateProbability.FirstOrDefault(r => titleSearchResults.Any(result => result.Result == r.episode))?.episode;
var rating = tmdbEpisode is null ? MatchRating.DateMatches : MatchRating.DateAndTitleMatches;
var rating = tmdbEpisode is null ? MatchRating.DateMatches : MatchRating.DateAndTitleKindaMatches;
tmdbEpisode ??= airdateProbability[0].episode;
return new(anidbEpisode.EpisodeID, anidbEpisode.AnimeID, tmdbEpisode.TmdbEpisodeID, tmdbEpisode.TmdbShowID, rating);
}
Expand All @@ -666,7 +730,7 @@ private CrossRef_AniDB_TMDB_Episode TryFindAnidbAndTmdbMatch(SVR_AniDB_Anime ani
if (titleSearchResults.Count > 0)
{
var tmdbEpisode = titleSearchResults[0]!.Result;
return new(anidbEpisode.EpisodeID, anidbEpisode.AnimeID, tmdbEpisode.TmdbEpisodeID, tmdbEpisode.TmdbShowID, MatchRating.TitleMatches);
return new(anidbEpisode.EpisodeID, anidbEpisode.AnimeID, tmdbEpisode.TmdbEpisodeID, tmdbEpisode.TmdbShowID, MatchRating.TitleKindaMatches);
}

// And finally, just pick the first available episode if it's not a special.
Expand Down

0 comments on commit a2dabcd

Please sign in to comment.