Skip to content

Commit

Permalink
feat: assign aggregated genre to album
Browse files Browse the repository at this point in the history
Also, make autogenre command code reusable for running it on import.
  • Loading branch information
mgoltzsche committed May 7, 2024
1 parent b68d6b1 commit a0e495a
Show file tree
Hide file tree
Showing 5 changed files with 155 additions and 48 deletions.
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM ghcr.io/mgoltzsche/beets-plugins:0.13.1
FROM ghcr.io/mgoltzsche/beets-plugins:0.14.0

# Install bats
USER root:root
Expand Down
145 changes: 101 additions & 44 deletions beetsplug/autogenre/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import os
import re
import yaml
from beets import ui
from collections import Counter
from beets.plugins import BeetsPlugin
from beets.dbcore import types
from beets.dbcore.query import FixedFieldSort
Expand Down Expand Up @@ -58,6 +58,37 @@ def __init__(self):
self._lastgenre_conf = config['lastgenre'].get() or {}
self._separator = self._lastgenre_conf.get('separator') or ', '
self._remix_regex = re.compile(r'.+[^\w](remix|bootleg|remake)', re.IGNORECASE)
self._genre_tree = None
# TODO: add auto-detect support

def imported(self, session, task):
"""Event hook called when an import task finishes."""
if task.is_album:
for item in task.album.items():
self._update_item_genre(item)

self._update_album_genre(task.album)
else:
self._update_item_genre(task.item)

def _update_item_genre(self, item):
genre, genres, source = self._item_genre(item, True, True)
self._log.info("Set track genre '{}' ({}): {}", genre, source, item)
item.genre = genre
item.genres = genres
item.genre_source = source
if not self.config['pretend'].get():
if config['import']['write'].get():
item.try_write()
item.store(['genre', 'genres', 'genre_source'])

def _update_album_genre(self, album):
genre = _most_common([item.genre for item in album.items()])
if album.genre != genre and genre:
album.genre = genre
self._log.info("Set genre '{}' for album {}", album.genre, album)
if not self.config['pretend'].get():
album.store(['genre'])

def commands(self):
p = OptionParser()
Expand Down Expand Up @@ -105,18 +136,9 @@ def commands(self):
return [c]

def _run_autogenre_cmd(self, lib, opts, args):
genre_tree_file = self._lastgenre_conf.get('canonical')
if not genre_tree_file:
genre_tree_file = os.path.join(os.path.dirname(__file__), '..', 'lastgenre', 'genres-tree.yaml')
genre_wh_file = self._lastgenre_conf.get('whitelist')
assert genre_wh_file, "Config option lastgenre.whitelist is not specified!"
with open(genre_wh_file, 'r') as f:
genre_whitelist = [genre.strip().lower() for genre in f.readlines() if genre.strip()]
with codecs.open(genre_tree_file, 'r', encoding='utf-8') as f:
genre_tree_yaml = yaml.safe_load(f)
genre_tree = GenreTree(genre_tree_yaml, genre_whitelist)
self._apply_opts_to_config(opts)
if opts.genre:
ok = genre_tree.contains(opts.genre)
ok = self._genres().contains(opts.genre)
assert args, "Must specify selector when --genre provided"
assert ok, "Provided genre '{}' is not registered within genre tree!".format(opts.genre)
query = decargs(args)
Expand All @@ -125,63 +147,79 @@ def _run_autogenre_cmd(self, lib, opts, args):
items = lib.items(parsed_query, parsed_sort)
all = opts.all or opts.genre is not None
force = opts.force or opts.genre is not None
pretend = self.config['pretend'].get()
filtered_items = [item for item in items if _filter_item(item, all, force)]
self._log.info('Selected {} items for genre update...', len(filtered_items))
for item in items:
genres, source = self._item_genres(item, all, force, opts, genre_tree)
genrel = self._str2list(genres)
genre = genres and genrel[0] or None

if genres:
if opts.parent_genres and genre:
# Append primary genre's parent genres to genre list
parent_genres = genre_tree.parents(genre)
parent_genres = [self._format_genre(g) for g in parent_genres]
genrel = genrel + [g for g in parent_genres if g not in genrel]
genres = self._list2str(genrel)

# Update items
for item in filtered_items:
genre, genres, source = self._item_genre(item, all, force, opts.genre)
genre_changed = genre != item.get('genre')
genres_changed = genres != item.get('genres')
genre_source_changed = source != item.get('genre_source')
changed = genre_changed or genres_changed or genre_source_changed
if changed and genres is not None:
msg = "Changing genre from '{}' to '{}' ({}) for item: {}"
msg = "Change genre from '{}' to '{}' ({}) for item: {}"
self._log.info(msg, item.get('genre'), genre, source, item)
write = ui.should_write()
if not opts.pretend:
if not pretend:
item.genre = genre
item.genres = genres
item.genre_source = source
if write:
if config['import']['write'].get():
item.try_write()
item.store()
# TODO: match remix artist within title and get genre from artist: TITLE (ARTIST remix)
# Update albums
album_ids = set([item.album_id for item in filtered_items if item.album_id])
for album_id in album_ids:
album = lib.get_album(album_id)
if album:
self._update_album_genre(album)

def _apply_opts_to_config(self, opts):
for k, v in opts.__dict__.items():
if v is not None and k in self.config:
self.config[k] = v

def _item_genre(self, item, all, force, force_genre=None):
genres, source = self._item_genres(item, all, force, force_genre)
genrel = self._str2list(genres)
genre = genres and genrel[0] or None

if genres:
if self.config['parent_genres'].get() and genre:
# Append primary genre's parent genres to genre list
parent_genres = self._genres().parents(genre)
parent_genres = [self._format_genre(g) for g in parent_genres]
genrel = genrel + [g for g in parent_genres if g not in genrel]
genres = self._list2str(genrel)

def _item_genres(self, item, all, force, opts, genre_tree):
return genre, genres, source

def _item_genres(self, item, all, force, force_genre):
genre = item.get('genres')
if not genre:
genre = item.get('genre')
source = item.get('genre_source')
orig_genre = genre
orig_source = source
if _filter_item(item, all, force):
if opts.genre is not None:
source = opts.genre and 'user' or None
genre = self._format_genre(opts.genre.lower())
if force_genre is not None:
source = force_genre and 'user' or None
genre = self._format_genre(force_genre.lower())
if source != 'user' or not genre:
# auto-detect genre
if opts.lastgenre:
if self.config['lastgenre'].get():
genre = self._lastfm_genre(item)
if genre is not None:
source = 'lastfm'
if genre is None and opts.xtractor:
if self.config['from_title'].get():
genre, matched = self._fix_remix_genre(item, genre)
if matched and genre is not None:
source = 'title'
if genre is None and self.config['xtractor'].get():
genre = self._essentia_genre(item)
if genre is not None:
source = 'essentia'
if opts.from_title:
genre, matched = self._fix_remix_genre(item, genre, genre_tree)
if matched and genre is not None:
source = 'title'
return genre, source

def _is_remix(self, title):
Expand All @@ -208,18 +246,18 @@ def _lastfm_genre(self, item):
self._log.debug(msg, genre, src, item)
return genre

def _fix_remix_genre(self, item, genre, genre_tree):
def _fix_remix_genre(self, item, genre):
'''Match genre within title or album and prepend to genre list.
This fixes remixes that are wrongly tagged on last.fm'''
title = item.get('title')
album = item.get('album')
genres = self._str2list(genre)
matched = genre_tree.match(title)
matched = self._genres().match(title)
if matched:
source = 'title'
elif album:
source = 'album'
matched = genre_tree.match(album)
matched = self._genres().match(album)
prepend_genre = None
if matched:
prepend_genre = matched.lower()
Expand Down Expand Up @@ -268,10 +306,24 @@ def _essentia_genre(self, item):
genre = 'electronic'

genre = self._format_genre(genre)
msg = "Got essentia genre '{}' for item: {}"
self._log.debug(msg, genre, item)
self._log.debug("Got essentia genre '{}' for item: {}", genre, item)
return genre

def _genres(self):
if not self._genre_tree:
genre_tree_file = self._lastgenre_conf.get('canonical')
if not genre_tree_file:
genre_tree_file = os.path.join(os.path.dirname(__file__), '..', 'lastgenre', 'genres-tree.yaml')
genre_wh_file = self._lastgenre_conf.get('whitelist')
assert genre_wh_file, "Config option lastgenre.whitelist is not specified!"
with open(genre_wh_file, 'r') as f:
genre_whitelist = [genre.strip().lower() for genre in f.readlines() if genre.strip()]
with codecs.open(genre_tree_file, 'r', encoding='utf-8') as f:
genre_tree_yaml = yaml.safe_load(f)
self._genre_tree = GenreTree(genre_tree_yaml, genre_whitelist)

return self._genre_tree

def _format_genre(self, genre):
return self._lastgenre._format_tag(genre)

Expand All @@ -290,3 +342,8 @@ def _filter_item(item, all, force):
def _is_plugin_enabled(plugin_name):
enabled_plugins = config['plugins'].get() if config['plugins'].exists() else []
return plugin_name in enabled_plugins

def _most_common(names):
r = Counter([name for name in names if name]).most_common(1)
if len(r) == 1:
return r[0][0]
2 changes: 1 addition & 1 deletion example_beets_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,11 @@ smartplaylist:

lastgenre:
auto: false
canonical: /etc/beets/genre-tree.yaml
prefer_specific: true
count: 4 # To also tag Rage Again the Machine as Hip Hop
source: album
min_weight: 15 # To prevent Amadou & Mariam from being tagged as New Wave
canonical: /etc/beets/genre-tree.yaml
# This files picks some of the genres defined in the canonical tree.
# The genres listed in this files must be aligned with the playlist queries.
whitelist: /data/beets/genres.txt
Expand Down
12 changes: 10 additions & 2 deletions tests/e2e/tests.bats
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ assertGenre() {
beet ytimport -q --quiet-fallback=asis https://www.youtube.com/watch?v=hyVVoLy4LSc
QUERY='title:Paris City Jazz'
beet autogenre $QUERY
assertGenre "$QUERY" 'lastfm | House | House, Downtempo, Jazz, Electronic'
assertGenre "$QUERY" 'lastfm | Nu Jazz | Nu Jazz, House, Downtempo, Jazz, Electronic'
}

@test 'derive genre from track title' {
Expand All @@ -56,6 +56,9 @@ assertGenre() {
QUERY='album:Reggae Jungle Drum and Bass Mix #9 New 2022 Rudy, a message to you'
beet autogenre -fa $QUERY
assertGenre "$QUERY" 'title | Ragga Drum And Bass | Ragga Drum And Bass, Drum And Bass, Electronic'
echo ALBUM GENRE:
beet ls -a 'Reggae Jungle Drum and Bass Mix #9 New 2022' -f '$genre'
[ "`beet ls -a 'Reggae Jungle Drum and Bass Mix #9 New 2022' -f '$genre'`" = 'Ragga Drum And Bass' ] || (echo 'Should set album genre!'; false)
}

@test 'estimate genre using essentia' {
Expand All @@ -68,9 +71,14 @@ assertGenre() {


@test 'specify genre manually' {
QUERY='album:Reggae Jungle Drum and Bass Mix #9 New 2022 Rudy, a message to you'
ALBUM='Reggae Jungle Drum and Bass Mix #9 New 2022'
beet autogenre -fa "album:$ALBUM"
QUERY="album:$ALBUM Rudy, a message to you"
beet autogenre --genre='Electronic' $QUERY
assertGenre "$QUERY" 'user | Electronic | Electronic'
# Should not touch genre of other items
QUERY="album:$ALBUM Sizzla Livin"
assertGenre "$QUERY" 'title | Ragga Drum And Bass | Ragga Drum And Bass, Dancehall, Reggae, Drum And Bass, Electronic'
}

@test 'preserve manually specified genre' {
Expand Down
42 changes: 42 additions & 0 deletions tests/test_most_common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import unittest
from beetsplug.autogenre import _most_common

class TestMostCommon(unittest.TestCase):

def test_most_common(self):
testcases = [
{
'name': 'empty',
'input': [],
'expected': None,
},
{
'name': 'single',
'input': ['genre 1'],
'expected': 'genre 1',
},
{
'name': 'first',
'input': ['genre 1', 'genre 2', 'genre 3'],
'expected': 'genre 1',
},
{
'name': 'first not none',
'input': [None, 'genre 1', 'genre 2', 'genre 3'],
'expected': 'genre 1',
},
{
'name': 'most common',
'input': ['genre 1', 'genre 2', 'genre 2', 'genre 3'],
'expected': 'genre 2',
},
{
'name': 'first most common',
'input': ['genre 1', 'genre 2', 'genre 2', 'genre 3', 'genre 3'],
'expected': 'genre 2',
},
]
for c in testcases:
info = "\ntest case '{}' input: {}".format(c['name'], c['input'])
a = _most_common(c['input'])
self.assertEqual(a, c['expected'], info)

0 comments on commit a0e495a

Please sign in to comment.