diff --git a/src/main/java/com/jagrosh/jlyrics/LyricsClient.java b/src/main/java/com/jagrosh/jlyrics/LyricsClient.java index 1041de6..a245118 100644 --- a/src/main/java/com/jagrosh/jlyrics/LyricsClient.java +++ b/src/main/java/com/jagrosh/jlyrics/LyricsClient.java @@ -23,6 +23,9 @@ import java.util.concurrent.CompletableFuture; import java.util.concurrent.Executor; import java.util.concurrent.Executors; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + import org.json.JSONException; import org.json.JSONObject; import org.json.XML; @@ -42,7 +45,7 @@ public class LyricsClient private final Config config = ConfigFactory.load(); private final HashMap cache = new HashMap<>(); private final OutputSettings noPrettyPrint = new OutputSettings().prettyPrint(false); - private final Safelist newlineWhitelist = Safelist.none().addTags("br", "p"); + private final Safelist newlineSafelist = Safelist.none().addTags("br", "p"); private final Executor executor; private final String defaultSource, userAgent; private final int timeout; @@ -122,54 +125,60 @@ public CompletableFuture getLyrics(String search, String source) return CompletableFuture.completedFuture(cache.get(cacheKey)); try { - String searchUrl = String.format(config.getString("lyrics." + source + ".search.url"), search); + CompletableFuture futureToken; boolean jsonSearch = config.getBoolean("lyrics." + source + ".search.json"); String select = config.getString("lyrics." + source + ".search.select"); String titleSelector = config.getString("lyrics." + source + ".parse.title"); String authorSelector = config.getString("lyrics." + source + ".parse.author"); String contentSelector = config.getString("lyrics." + source + ".parse.content"); - return CompletableFuture.supplyAsync(() -> - { - try - { - Document doc; - Connection connection = Jsoup.connect(searchUrl).userAgent(userAgent).timeout(timeout); - if(jsonSearch) - { - String body = connection.ignoreContentType(true).execute().body(); - JSONObject json = new JSONObject(body); - doc = Jsoup.parse(XML.toString(json)); - } - else - doc = connection.get(); - - Element urlElement = doc.selectFirst(select); - if(urlElement == null) - return null; + if (config.hasPath("lyrics." + source + ".token")) { + futureToken = getToken(source); + } else { + futureToken = CompletableFuture.completedFuture(""); + } - String url; - if(jsonSearch) - url = urlElement.text(); - else - url = urlElement.attr("abs:href"); - if(url.isEmpty()) - return null; - doc = Jsoup.connect(url).userAgent(userAgent).timeout(timeout).get(); + return futureToken.thenCompose(token -> { + String searchUrl = String.format(config.getString("lyrics." + source + ".search.url"), search, token); - Lyrics lyrics = new Lyrics(doc.selectFirst(titleSelector).ownText(), - doc.selectFirst(authorSelector).ownText(), - cleanWithNewlines(doc.selectFirst(contentSelector)), - url, - source); - cache.put(cacheKey, lyrics); - return lyrics; - } - catch(IOException | NullPointerException | JSONException ex) + return CompletableFuture.supplyAsync(() -> { - return null; - } - }, executor); + try + { + Document doc; + Connection connection = Jsoup.connect(searchUrl).userAgent(userAgent).timeout(timeout); + if(jsonSearch) + { + String body = connection.ignoreContentType(true).execute().body(); + JSONObject json = new JSONObject(body); + doc = Jsoup.parse(XML.toString(json)); + } + else + doc = connection.get(); + + Element urlElement = doc.selectFirst(select); + String url; + if(jsonSearch) + url = urlElement.text(); + else + url = urlElement.attr("abs:href"); + if(url==null || url.isEmpty()) + return null; + doc = Jsoup.connect(url).userAgent(userAgent).timeout(timeout).get(); + Lyrics lyrics = new Lyrics(doc.selectFirst(titleSelector).ownText(), + doc.selectFirst(authorSelector).ownText(), + cleanWithNewlines(doc.selectFirst(contentSelector)), + url, + source); + cache.put(cacheKey, lyrics); + return lyrics; + } + catch(IOException | NullPointerException | JSONException ex) + { + return null; + } + }, executor); + }); } catch(ConfigException ex) { @@ -180,9 +189,55 @@ public CompletableFuture getLyrics(String search, String source) return null; } } - + + private CompletableFuture getToken(String source) { + try { + String tokenUrl = config.getString("lyrics." + source + ".token.url"); + String select = config.getString("lyrics." + source + ".token.select"); + boolean textSearch = config.getBoolean("lyrics." + source + ".token.text"); + + return CompletableFuture.supplyAsync(() -> { + try { + Pattern pattern = null; + + // Optional regex for post-processing + // Helpful if token is not accessible using HTML accessors (e.g, inlined in a JS file) + if (config.hasPath("lyrics." + source + ".token.regex")) { + String regexPattern = config.getString("lyrics." + source + ".token.regex"); + pattern = Pattern.compile(regexPattern); + } + + Connection connection = Jsoup.connect(tokenUrl).userAgent(userAgent).timeout(timeout); + String body; + + if (textSearch) { + body = connection.ignoreContentType(true).execute().body(); + } else { + // HTML -- apply selectors to derive body string + Document doc = connection.get(); + body = doc.selectFirst(select).ownText(); + } + + if (pattern != null) { + Matcher matcher = pattern.matcher(body); + if (matcher.find()) { + return matcher.group(); + } + } + return null; + } catch (IOException | NullPointerException ex) { + return null; + } + }, executor); + } catch (ConfigException ex) { + throw new IllegalArgumentException(String.format("Source '%s' does not exist or is not configured correctly", source)); + } catch (Exception ignored) { + return null; + } + } + private String cleanWithNewlines(Element element) { - return Jsoup.clean(Jsoup.clean(element.html(), newlineWhitelist), "", Safelist.none(), noPrettyPrint); + return Jsoup.clean(Jsoup.clean(element.html(), newlineSafelist), "", Safelist.none(), noPrettyPrint); } } diff --git a/src/main/resources/reference.conf b/src/main/resources/reference.conf index aef3322..fd961cf 100644 --- a/src/main/resources/reference.conf +++ b/src/main/resources/reference.conf @@ -6,9 +6,16 @@ lyrics A-Z Lyrics { + token + { + url = "https://www.azlyrics.com/geo.js" + text = true + select = "" + regex = """(?<=\"value\",\s\").*(?=\")""" + } search { - url = "https://search.azlyrics.com/search.php?q=%s" + url = "https://search.azlyrics.com/search.php?q=%s&x=%s" json = false select = "a[href*=/lyrics/]" } @@ -30,8 +37,8 @@ lyrics } parse { - title = "h1[class^=SongHeader__Title]" - author = "a[class*=SongHeader__Artist]" + title = "h1[class*=__Title] > span" + author = "a[class*=__Artist]" content = "div[class^=Lyrics__Container]" } }