From 67ab9d0c36a2a3d36ceb789f7b4b70b545d026ee Mon Sep 17 00:00:00 2001 From: Ziedelth Date: Thu, 28 Sep 2023 14:14:49 +0200 Subject: [PATCH] Refactor browser invocation and improve error handling in AyaneJob" --- .gitignore | 5 +- .../converters/CrunchyrollConverter.kt | 5 +- .../kotlin/fr/jais/scraper/jobs/AyaneJob.kt | 89 ++++++++++--------- .../AnimationDigitalNetworkPlatform.kt | 1 - .../scraper/platforms/CrunchyrollPlatform.kt | 14 ++- .../jais/scraper/platforms/NetflixPlatform.kt | 2 +- .../kotlin/fr/jais/scraper/utils/Browser.kt | 9 +- 7 files changed, 58 insertions(+), 67 deletions(-) diff --git a/.gitignore b/.gitignore index e878adf..5f853c8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,4 @@ /.idea/ /scraper.iml /target/ -/logs/ -/database.json -/tmp/ -/ayane/ +/data/* diff --git a/src/main/kotlin/fr/jais/scraper/converters/CrunchyrollConverter.kt b/src/main/kotlin/fr/jais/scraper/converters/CrunchyrollConverter.kt index 645edca..9d0977e 100644 --- a/src/main/kotlin/fr/jais/scraper/converters/CrunchyrollConverter.kt +++ b/src/main/kotlin/fr/jais/scraper/converters/CrunchyrollConverter.kt @@ -170,10 +170,7 @@ class CrunchyrollConverter(private val platform: CrunchyrollPlatform) { Logger.info("Get anime page...") val url = "https://www.crunchyroll.com/$country/$animeId" Logger.config("Anime page: $url") - val result = Browser( - Browser.BrowserType.FIREFOX, - url - ).launchAndWaitForSelector("div.undefined:nth-child(1) > figure:nth-child(1) > picture:nth-child(1) > img:nth-child(2)") + val result = Browser(url).launchAndWaitForSelector("div.undefined:nth-child(1) > figure:nth-child(1) > picture:nth-child(1) > img:nth-child(2)") // ----- IMAGE ----- Logger.info("Get image...") diff --git a/src/main/kotlin/fr/jais/scraper/jobs/AyaneJob.kt b/src/main/kotlin/fr/jais/scraper/jobs/AyaneJob.kt index 2bae0d2..08f54d3 100644 --- a/src/main/kotlin/fr/jais/scraper/jobs/AyaneJob.kt +++ b/src/main/kotlin/fr/jais/scraper/jobs/AyaneJob.kt @@ -1,6 +1,5 @@ package fr.jais.scraper.jobs -import com.microsoft.playwright.Playwright import com.mortennobel.imagescaling.ResampleOp import fr.jais.scraper.utils.* import org.quartz.Job @@ -16,6 +15,7 @@ import java.time.LocalDate import java.time.format.DateTimeFormatter import java.time.format.TextStyle import java.util.* +import java.util.logging.Level import javax.imageio.ImageIO class AyaneJob : Job { @@ -23,16 +23,6 @@ class AyaneJob : Job { override fun execute(p0: JobExecutionContext?) { Logger.info("Starting AyaneJob...") - val folder = File("data/ayane") - if (!folder.exists()) folder.mkdirs() - - val font = File(folder, "Rubik.ttf") - val backgroundImage = - ImageIO.read(URL("https://cdn.discordapp.com/attachments/1093774447636385883/1095284174883147877/Ziedelth_solo_1girl_adult_beautiful_shy_yellow_hair_smooth_hair_fd121b3f-3739-4dbe-b1d3-fec13fff64fd.png")) - .opacity(0.1f) - val crunchyrollImage = ImageIO.read(File(folder, "crunchyroll.png")).invert() - val adnImage = ImageIO.read(File(folder, "animation_digital_network.png")).invert() - val netflixImage = ImageIO.read(File(folder, "netflix.png")).invert() try { val episodes = getEpisodes() @@ -41,15 +31,39 @@ class AyaneJob : Job { return } + val folder = File("data/ayane") + + if (!folder.exists()) { + Logger.config("Creating Ayane folder...") + folder.mkdirs() + } + + Logger.config("Getting Ayane font...") + val font = File(folder, "Rubik.ttf") + Logger.config("Getting Ayane background image...") + val backgroundImage = + ImageIO.read(URL("https://cdn.discordapp.com/attachments/1093774447636385883/1095284174883147877/Ziedelth_solo_1girl_adult_beautiful_shy_yellow_hair_smooth_hair_fd121b3f-3739-4dbe-b1d3-fec13fff64fd.png")) + .opacity(0.1f) + Logger.config("Getting Ayane Crunchyroll image...") + val crunchyrollImage = ImageIO.read(File(folder, "crunchyroll.png")).invert() + Logger.config("Getting Ayane ADN image...") + val adnImage = ImageIO.read(File(folder, "animation_digital_network.png")).invert() + Logger.config("Getting Ayane Netflix image...") + val netflixImage = ImageIO.read(File(folder, "netflix.png")).invert() + val day = LocalDate.now().dayOfWeek.getDisplayName(TextStyle.FULL, Locale.FRANCE).lowercase() val date = LocalDate.now().format(DateTimeFormatter.ofPattern("dd/MM")) var string: String + var epochs = 0 + var take = 7 + + Logger.info("Building text...") do { string = "🎯 | Votre planning #anime pour ce $day $date :\n" - episodes.shuffled().take(7).forEach { + episodes.shuffled().take(take).forEach { string += "\n#${ it.first.name.split(":", ",").first().capitalizeWords().onlyLettersAndDigits() } EP${it.second.split(" ")[1]}" @@ -58,6 +72,13 @@ class AyaneJob : Job { string += """ Bonne journée ! 😊""" + + epochs++ + + if (epochs % 10 == 0) { + take-- + Logger.warning("$epochs has passed to attempting build the text, reducing take to $take") + } } while (string.length > 250) Logger.info(string) @@ -68,7 +89,8 @@ Bonne journée ! 😊""" API.saveAyane(string, images) } catch (e: Exception) { - println("Error: $e") + Logger.log(Level.SEVERE, "Error with Ayane", e) + return } Logger.info("Ayane is released!") @@ -258,24 +280,18 @@ Bonne journée ! 😊""" return bufferedImage } - @Throws(Exception::class) private fun getEpisodes(): List> { - val playwright = Playwright.create() - val browser = playwright.firefox().launch() - val context = browser.newContext() - val page = context.newPage() + val content = Browser("${Const.calendarBaseUrl}/calendrier_diffusion.html").launch() - page.navigate("${Const.calendarBaseUrl}/calendrier_diffusion.html") - - val todayCalendar = page.querySelectorAll("table.calendrier_diffusion") - .find { true == it.querySelector("th")?.textContent()?.contains("Aujourd'hui", true) } + val todayCalendar = content.select("table.calendrier_diffusion") + .find { true == it.getElementsByTag("th").text().contains("Aujourd'hui", true) } ?: throw Exception("No anime today") - val episodes = todayCalendar.querySelectorAll("td").mapNotNull { - val animeElement = it.querySelector("a") ?: return@mapNotNull null + val episodes = todayCalendar.getElementsByTag("td").mapNotNull { + val animeElement = it.getElementsByTag("a") ?: return@mapNotNull null - var name = animeElement.textContent().trim().replace(Const.multipleSpaceRegex, " ") - val url = "${Const.calendarBaseUrl}${animeElement.getAttribute("href")}" + var name = animeElement.text().trim().replace(Const.multipleSpaceRegex, " ") + val url = "${Const.calendarBaseUrl}${animeElement.attr("href")}" val season = if (name.contains("Saison", true)) { val number = name.split("Saison", ignoreCase = true)[1].trim().split(" ")[0].toInt() @@ -286,17 +302,13 @@ Bonne journée ! 😊""" 1 } - if (name == "Shūmatsu no Walküre 2") { - return@mapNotNull null - } - - val episode = - it.querySelector(".calendrier_episode").textContent().trim().replace(Const.multipleSpaceRegex, " ") + val episode = it.select(".calendrier_episode").text().trim().replace(Const.multipleSpaceRegex, " ") Anime(name, url, season) to episode }.filter { (anime, _) -> - page.navigate(anime.url) - val infos = page.querySelectorAll(".info_fiche > div") - val licenceElement = infos.find { it.textContent().contains("Licence VOD", true) } + val subcontent = Browser(anime.url).launch() + + val infos = subcontent.select(".info_fiche > div") + val licenceElement = infos.find { it.text().contains("Licence VOD", true) } if (licenceElement == null) { println("No licence for ${anime.name}") @@ -304,7 +316,7 @@ Bonne journée ! 😊""" } val licencePlatform = - licenceElement.textContent().split(":")[1].trim().replace(Const.multipleSpaceRegex, " ").split(",") + licenceElement.text().split(":")[1].trim().replace(Const.multipleSpaceRegex, " ").split(",") .map { it.trim() } anime.licences.addAll(licencePlatform) licencePlatform.contains("Animation Digital Network") || licencePlatform.contains("Crunchyroll") || licencePlatform.contains( @@ -312,11 +324,6 @@ Bonne journée ! 😊""" ) } - page.close() - context.close() - browser.close() - playwright.close() - return episodes } } diff --git a/src/main/kotlin/fr/jais/scraper/platforms/AnimationDigitalNetworkPlatform.kt b/src/main/kotlin/fr/jais/scraper/platforms/AnimationDigitalNetworkPlatform.kt index 50c0037..717329a 100644 --- a/src/main/kotlin/fr/jais/scraper/platforms/AnimationDigitalNetworkPlatform.kt +++ b/src/main/kotlin/fr/jais/scraper/platforms/AnimationDigitalNetworkPlatform.kt @@ -7,7 +7,6 @@ import fr.jais.scraper.countries.FranceCountry import fr.jais.scraper.countries.ICountry import fr.jais.scraper.entities.Episode import fr.jais.scraper.exceptions.CountryNotSupportedException -import fr.jais.scraper.exceptions.EpisodeException import fr.jais.scraper.utils.Const import fr.jais.scraper.utils.Logger import fr.jais.scraper.utils.toDate diff --git a/src/main/kotlin/fr/jais/scraper/platforms/CrunchyrollPlatform.kt b/src/main/kotlin/fr/jais/scraper/platforms/CrunchyrollPlatform.kt index 461385c..9ef29c4 100644 --- a/src/main/kotlin/fr/jais/scraper/platforms/CrunchyrollPlatform.kt +++ b/src/main/kotlin/fr/jais/scraper/platforms/CrunchyrollPlatform.kt @@ -64,13 +64,11 @@ class CrunchyrollPlatform(scraper: Scraper) : IPlatform( val selector = "#content > div > div.app-body-wrapper > div > div > div.erc-browse-collection > div > div:nth-child(1) > div > div > h4 > a" val simulcastSelector = ".erc-browse-cards-collection > .browse-card > div > div > h4 > a" - val contentCurrentSimulcast = Browser( - Browser.BrowserType.FIREFOX, - "https://www.crunchyroll.com/$countryTag/simulcasts" - ).launchAndWaitForSelector(selector) + val contentCurrentSimulcast = Browser("https://www.crunchyroll.com/$countryTag/simulcasts").launchAndWaitForSelector(selector) val simulcastName = - contentCurrentSimulcast.select("#content > div > div.app-body-wrapper > div > div > div.header > div > div > span.call-to-action--PEidl.call-to-action--is-m--RVdkI.select-trigger__title-cta--C5-uH.select-trigger__title-cta--is-displayed-on-mobile--6oNk1").text() + contentCurrentSimulcast.select("#content > div > div.app-body-wrapper > div > div > div.header > div > div > span.call-to-action--PEidl.call-to-action--is-m--RVdkI.select-trigger__title-cta--C5-uH.select-trigger__title-cta--is-displayed-on-mobile--6oNk1") + .text() val simulcastCode = getSimulcastCode(simulcastName) Logger.info("Current simulcast code for ${iCountry.name}: $simulcastCode") @@ -81,10 +79,8 @@ class CrunchyrollPlatform(scraper: Scraper) : IPlatform( val previousSimulcastCode = getPreviousSimulcastCode(simulcastCode) Logger.info("Previous simulcast code for ${iCountry.name}: $previousSimulcastCode") - val contentPreviousSimulcast = Browser( - Browser.BrowserType.FIREFOX, - "https://www.crunchyroll.com/$countryTag/simulcasts/seasons/$previousSimulcastCode" - ).launchAndWaitForSelector(selector) + val contentPreviousSimulcast = + Browser("https://www.crunchyroll.com/$countryTag/simulcasts/seasons/$previousSimulcastCode").launchAndWaitForSelector(selector) val previousSimulcastAnimes = contentPreviousSimulcast.select(simulcastSelector).map { it.text().lowercase() }.toSet() diff --git a/src/main/kotlin/fr/jais/scraper/platforms/NetflixPlatform.kt b/src/main/kotlin/fr/jais/scraper/platforms/NetflixPlatform.kt index dc866c9..9f1a134 100644 --- a/src/main/kotlin/fr/jais/scraper/platforms/NetflixPlatform.kt +++ b/src/main/kotlin/fr/jais/scraper/platforms/NetflixPlatform.kt @@ -152,7 +152,7 @@ class NetflixPlatform(scraper: Scraper) : IPlatform( } val apiUrl = "https://www.netflix.com/$lang/title/$netflixId" - val content = Browser(Browser.BrowserType.CHROME, apiUrl).launch() + val content = Browser(apiUrl, Browser.BrowserType.CHROME).launch() cache.lastCheck = System.currentTimeMillis() cache.content = content convertToNetflixEpisodes(content) diff --git a/src/main/kotlin/fr/jais/scraper/utils/Browser.kt b/src/main/kotlin/fr/jais/scraper/utils/Browser.kt index b40f2bb..e227a93 100644 --- a/src/main/kotlin/fr/jais/scraper/utils/Browser.kt +++ b/src/main/kotlin/fr/jais/scraper/utils/Browser.kt @@ -1,18 +1,16 @@ package fr.jais.scraper.utils -import com.microsoft.playwright.BrowserContext import com.microsoft.playwright.Page import org.jsoup.Jsoup import org.jsoup.nodes.Document -class Browser(type: BrowserType = BrowserType.CHROME, val url: String) { +class Browser(val url: String, type: BrowserType = BrowserType.FIREFOX) { enum class BrowserType { CHROME, FIREFOX, } private var browser: com.microsoft.playwright.Browser? = null - private var context: BrowserContext? = null private var page: Page? = null private val launchOptions = com.microsoft.playwright.BrowserType.LaunchOptions().setHeadless(true) @@ -24,10 +22,8 @@ class Browser(type: BrowserType = BrowserType.CHROME, val url: String) { BrowserType.FIREFOX -> Const.firefox.launch(launchOptions) } - Logger.info("Creating context...") - context = browser?.newContext() Logger.info("Creating page...") - page = context?.newPage() + page = browser?.newPage() page?.setDefaultTimeout(60_000.0) page?.setDefaultNavigationTimeout(60_000.0) Logger.config("URL: $url") @@ -69,7 +65,6 @@ class Browser(type: BrowserType = BrowserType.CHROME, val url: String) { private fun close() { Logger.info("Closing browser...") page?.close() - context?.close() browser?.close() } }