Quick and dirty commit, due to heaste of this needing

This commit is contained in:
2025-06-08 22:01:17 +02:00
parent 840ad468e3
commit 57fc7f2533
19 changed files with 334 additions and 63 deletions

View File

@@ -0,0 +1,60 @@
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Node;
import org.jsoup.nodes.TextNode;
import org.jsoup.select.Elements;
import org.junit.jupiter.api.Test;
import javax.swing.text.html.HTML;
import java.util.regex.MatchResult;
import java.util.regex.Pattern;
import static me.zacharias.chat.core.Core.writeLog;
import static org.junit.jupiter.api.Assertions.assertNotNull;
public class LyricsFetch {
Pattern pattern = Pattern.compile("(?i)\\[(verse.*)|(chorus.*)|(bridge.*)|(outro.*)|(intro.*)]");
@Test
public void testFetchLyrics() throws Exception {
Document doc = Jsoup.connect("https://genius.com/Neuro-sama-life-lyrics")
.userAgent("Mozilla/5.0")
.get();
Elements containers = doc.select("div[data-lyrics-container=true]");
StringBuilder lyrics = new StringBuilder();
for (Element container : containers) {
for(Node n : container.childNodes())
{
if(n instanceof Element e) {
if (e.attribute("data-exclude-from-selection") != null && e.attr("data-exclude-from-selection").equals("true")) {
continue;
}
else if(e.tagName().equalsIgnoreCase("br"))
{
lyrics.append("\n");
}
else {
System.out.println(container.tagName());
String s = e.text();
lyrics.append(s.trim());
}
}
else if(n instanceof TextNode tn)
{
String s = tn.text();
if (!s.isBlank()) {
lyrics.append(s.trim());
}
}
}
}
System.out.println(lyrics.toString());
assertNotNull(lyrics.toString());
}
}