Skip to content

Commit

Permalink
Merge pull request jagrosh#12 from TheConner/fix-azlyrics-genius
Browse files Browse the repository at this point in the history
Fix Lyrics fetch for AZLyrics and Genius
  • Loading branch information
jagrosh authored Sep 22, 2022
2 parents 688cdb4 + c7a9b3f commit 01cf4ee
Show file tree
Hide file tree
Showing 3 changed files with 109 additions and 42 deletions.
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.14.2</version>
<version>1.15.3</version>
<type>jar</type>
</dependency>
<dependency>
Expand Down
136 changes: 98 additions & 38 deletions src/main/java/com/jagrosh/jlyrics/LyricsClient.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.Executor;
import java.util.concurrent.Executors;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.json.JSONException;
import org.json.JSONObject;
import org.json.XML;
Expand All @@ -31,7 +34,7 @@
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Document.OutputSettings;
import org.jsoup.nodes.Element;
import org.jsoup.safety.Whitelist;
import org.jsoup.safety.Safelist;

/**
*
Expand All @@ -42,7 +45,7 @@ public class LyricsClient
private final Config config = ConfigFactory.load();
private final HashMap<String, Lyrics> cache = new HashMap<>();
private final OutputSettings noPrettyPrint = new OutputSettings().prettyPrint(false);
private final Whitelist newlineWhitelist = Whitelist.none().addTags("br", "p");
private final Safelist newlineSafelist = Safelist.none().addTags("br", "p");
private final Executor executor;
private final String defaultSource, userAgent;
private final int timeout;
Expand Down Expand Up @@ -122,49 +125,60 @@ public CompletableFuture<Lyrics> getLyrics(String search, String source)
return CompletableFuture.completedFuture(cache.get(cacheKey));
try
{
String searchUrl = String.format(config.getString("lyrics." + source + ".search.url"), search);
CompletableFuture<String> futureToken;
boolean jsonSearch = config.getBoolean("lyrics." + source + ".search.json");
String select = config.getString("lyrics." + source + ".search.select");
String titleSelector = config.getString("lyrics." + source + ".parse.title");
String authorSelector = config.getString("lyrics." + source + ".parse.author");
String contentSelector = config.getString("lyrics." + source + ".parse.content");
return CompletableFuture.supplyAsync(() ->
{
try

if (config.hasPath("lyrics." + source + ".token")) {
futureToken = getToken(source);
} else {
futureToken = CompletableFuture.completedFuture("");
}

return futureToken.thenCompose(token -> {
String searchUrl = String.format(config.getString("lyrics." + source + ".search.url"), search, token);

return CompletableFuture.supplyAsync(() ->
{
Document doc;
Connection connection = Jsoup.connect(searchUrl).userAgent(userAgent).timeout(timeout);
if(jsonSearch)
try
{
String body = connection.ignoreContentType(true).execute().body();
JSONObject json = new JSONObject(body);
doc = Jsoup.parse(XML.toString(json));
Document doc;
Connection connection = Jsoup.connect(searchUrl).userAgent(userAgent).timeout(timeout);
if(jsonSearch)
{
String body = connection.ignoreContentType(true).execute().body();
JSONObject json = new JSONObject(body);
doc = Jsoup.parse(XML.toString(json));
}
else
doc = connection.get();

Element urlElement = doc.selectFirst(select);
String url;
if(jsonSearch)
url = urlElement.text();
else
url = urlElement.attr("abs:href");
if(url==null || url.isEmpty())
return null;
doc = Jsoup.connect(url).userAgent(userAgent).timeout(timeout).get();
Lyrics lyrics = new Lyrics(doc.selectFirst(titleSelector).ownText(),
doc.selectFirst(authorSelector).ownText(),
cleanWithNewlines(doc.selectFirst(contentSelector)),
url,
source);
cache.put(cacheKey, lyrics);
return lyrics;
}
else
doc = connection.get();

Element urlElement = doc.selectFirst(select);
String url;
if(jsonSearch)
url = urlElement.text();
else
url = urlElement.attr("abs:href");
if(url==null || url.isEmpty())
catch(IOException | NullPointerException | JSONException ex)
{
return null;
doc = Jsoup.connect(url).userAgent(userAgent).timeout(timeout).get();
Lyrics lyrics = new Lyrics(doc.selectFirst(titleSelector).ownText(),
doc.selectFirst(authorSelector).ownText(),
cleanWithNewlines(doc.selectFirst(contentSelector)),
url,
source);
cache.put(cacheKey, lyrics);
return lyrics;
}
catch(IOException | NullPointerException | JSONException ex)
{
return null;
}
}, executor);
}
}, executor);
});
}
catch(ConfigException ex)
{
Expand All @@ -175,9 +189,55 @@ public CompletableFuture<Lyrics> getLyrics(String search, String source)
return null;
}
}


private CompletableFuture<String> getToken(String source) {
try {
String tokenUrl = config.getString("lyrics." + source + ".token.url");
String select = config.getString("lyrics." + source + ".token.select");
boolean textSearch = config.getBoolean("lyrics." + source + ".token.text");

return CompletableFuture.supplyAsync(() -> {
try {
Pattern pattern = null;

// Optional regex for post-processing
// Helpful if token is not accessible using HTML accessors (e.g, inlined in a JS file)
if (config.hasPath("lyrics." + source + ".token.regex")) {
String regexPattern = config.getString("lyrics." + source + ".token.regex");
pattern = Pattern.compile(regexPattern);
}

Connection connection = Jsoup.connect(tokenUrl).userAgent(userAgent).timeout(timeout);
String body;

if (textSearch) {
body = connection.ignoreContentType(true).execute().body();
} else {
// HTML -- apply selectors to derive body string
Document doc = connection.get();
body = doc.selectFirst(select).ownText();
}

if (pattern != null) {
Matcher matcher = pattern.matcher(body);
if (matcher.find()) {
return matcher.group();
}
}
return null;
} catch (IOException | NullPointerException ex) {
return null;
}
}, executor);
} catch (ConfigException ex) {
throw new IllegalArgumentException(String.format("Source '%s' does not exist or is not configured correctly", source));
} catch (Exception ignored) {
return null;
}
}

private String cleanWithNewlines(Element element)
{
return Jsoup.clean(Jsoup.clean(element.html(), newlineWhitelist), "", Whitelist.none(), noPrettyPrint);
return Jsoup.clean(Jsoup.clean(element.html(), newlineSafelist), "", Safelist.none(), noPrettyPrint);
}
}
13 changes: 10 additions & 3 deletions src/main/resources/reference.conf
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,16 @@ lyrics

A-Z Lyrics
{
token
{
url = "https://www.azlyrics.com/geo.js"
text = true
select = ""
regex = """(?<=\"value\",\s\").*(?=\")"""
}
search
{
url = "https://search.azlyrics.com/search.php?q=%s"
url = "https://search.azlyrics.com/search.php?q=%s&x=%s"
json = false
select = "a[href*=/lyrics/]"
}
Expand All @@ -30,8 +37,8 @@ lyrics
}
parse
{
title = "h1[class^=SongHeader__Title]"
author = "a[class*=SongHeader__Artist]"
title = "h1[class*=__Title] > span"
author = "a[class*=__Artist]"
content = "div[class^=Lyrics__Container]"
}
}
Expand Down

0 comments on commit 01cf4ee

Please sign in to comment.