Skip to content
This repository has been archived by the owner on Mar 11, 2024. It is now read-only.

Commit

Permalink
Refactor browser invocation and improve error handling in AyaneJob"
Browse files Browse the repository at this point in the history
  • Loading branch information
Ziedelth committed Sep 28, 2023
1 parent 35bcf56 commit 67ab9d0
Show file tree
Hide file tree
Showing 7 changed files with 58 additions and 67 deletions.
5 changes: 1 addition & 4 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,7 +1,4 @@
/.idea/
/scraper.iml
/target/
/logs/
/database.json
/tmp/
/ayane/
/data/*
Original file line number Diff line number Diff line change
Expand Up @@ -170,10 +170,7 @@ class CrunchyrollConverter(private val platform: CrunchyrollPlatform) {
Logger.info("Get anime page...")
val url = "https://www.crunchyroll.com/$country/$animeId"
Logger.config("Anime page: $url")
val result = Browser(
Browser.BrowserType.FIREFOX,
url
).launchAndWaitForSelector("div.undefined:nth-child(1) > figure:nth-child(1) > picture:nth-child(1) > img:nth-child(2)")
val result = Browser(url).launchAndWaitForSelector("div.undefined:nth-child(1) > figure:nth-child(1) > picture:nth-child(1) > img:nth-child(2)")

// ----- IMAGE -----
Logger.info("Get image...")
Expand Down
89 changes: 48 additions & 41 deletions src/main/kotlin/fr/jais/scraper/jobs/AyaneJob.kt
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
package fr.jais.scraper.jobs

import com.microsoft.playwright.Playwright
import com.mortennobel.imagescaling.ResampleOp
import fr.jais.scraper.utils.*
import org.quartz.Job
Expand All @@ -16,23 +15,14 @@ import java.time.LocalDate
import java.time.format.DateTimeFormatter
import java.time.format.TextStyle
import java.util.*
import java.util.logging.Level
import javax.imageio.ImageIO

class AyaneJob : Job {
private val maxEpisodesPerImage = 7

override fun execute(p0: JobExecutionContext?) {
Logger.info("Starting AyaneJob...")
val folder = File("data/ayane")
if (!folder.exists()) folder.mkdirs()

val font = File(folder, "Rubik.ttf")
val backgroundImage =
ImageIO.read(URL("https://cdn.discordapp.com/attachments/1093774447636385883/1095284174883147877/Ziedelth_solo_1girl_adult_beautiful_shy_yellow_hair_smooth_hair_fd121b3f-3739-4dbe-b1d3-fec13fff64fd.png"))
.opacity(0.1f)
val crunchyrollImage = ImageIO.read(File(folder, "crunchyroll.png")).invert()
val adnImage = ImageIO.read(File(folder, "animation_digital_network.png")).invert()
val netflixImage = ImageIO.read(File(folder, "netflix.png")).invert()

try {
val episodes = getEpisodes()
Expand All @@ -41,15 +31,39 @@ class AyaneJob : Job {
return
}

val folder = File("data/ayane")

if (!folder.exists()) {
Logger.config("Creating Ayane folder...")
folder.mkdirs()
}

Logger.config("Getting Ayane font...")
val font = File(folder, "Rubik.ttf")
Logger.config("Getting Ayane background image...")
val backgroundImage =
ImageIO.read(URL("https://cdn.discordapp.com/attachments/1093774447636385883/1095284174883147877/Ziedelth_solo_1girl_adult_beautiful_shy_yellow_hair_smooth_hair_fd121b3f-3739-4dbe-b1d3-fec13fff64fd.png"))
.opacity(0.1f)
Logger.config("Getting Ayane Crunchyroll image...")
val crunchyrollImage = ImageIO.read(File(folder, "crunchyroll.png")).invert()
Logger.config("Getting Ayane ADN image...")
val adnImage = ImageIO.read(File(folder, "animation_digital_network.png")).invert()
Logger.config("Getting Ayane Netflix image...")
val netflixImage = ImageIO.read(File(folder, "netflix.png")).invert()

val day = LocalDate.now().dayOfWeek.getDisplayName(TextStyle.FULL, Locale.FRANCE).lowercase()
val date = LocalDate.now().format(DateTimeFormatter.ofPattern("dd/MM"))

var string: String
var epochs = 0
var take = 7

Logger.info("Building text...")

do {
string = "🎯 | Votre planning #anime pour ce $day $date :\n"

episodes.shuffled().take(7).forEach {
episodes.shuffled().take(take).forEach {
string += "\n#${
it.first.name.split(":", ",").first().capitalizeWords().onlyLettersAndDigits()
} EP${it.second.split(" ")[1]}"
Expand All @@ -58,6 +72,13 @@ class AyaneJob : Job {
string += """
Bonne journée ! 😊"""

epochs++

if (epochs % 10 == 0) {
take--
Logger.warning("$epochs has passed to attempting build the text, reducing take to $take")
}
} while (string.length > 250)

Logger.info(string)
Expand All @@ -68,7 +89,8 @@ Bonne journée ! 😊"""

API.saveAyane(string, images)
} catch (e: Exception) {
println("Error: $e")
Logger.log(Level.SEVERE, "Error with Ayane", e)
return
}

Logger.info("Ayane is released!")
Expand Down Expand Up @@ -258,24 +280,18 @@ Bonne journée ! 😊"""
return bufferedImage
}

@Throws(Exception::class)
private fun getEpisodes(): List<Pair<Anime, String>> {
val playwright = Playwright.create()
val browser = playwright.firefox().launch()
val context = browser.newContext()
val page = context.newPage()
val content = Browser("${Const.calendarBaseUrl}/calendrier_diffusion.html").launch()

page.navigate("${Const.calendarBaseUrl}/calendrier_diffusion.html")

val todayCalendar = page.querySelectorAll("table.calendrier_diffusion")
.find { true == it.querySelector("th")?.textContent()?.contains("Aujourd'hui", true) }
val todayCalendar = content.select("table.calendrier_diffusion")
.find { true == it.getElementsByTag("th").text().contains("Aujourd'hui", true) }
?: throw Exception("No anime today")

val episodes = todayCalendar.querySelectorAll("td").mapNotNull {
val animeElement = it.querySelector("a") ?: return@mapNotNull null
val episodes = todayCalendar.getElementsByTag("td").mapNotNull {
val animeElement = it.getElementsByTag("a") ?: return@mapNotNull null

var name = animeElement.textContent().trim().replace(Const.multipleSpaceRegex, " ")
val url = "${Const.calendarBaseUrl}${animeElement.getAttribute("href")}"
var name = animeElement.text().trim().replace(Const.multipleSpaceRegex, " ")
val url = "${Const.calendarBaseUrl}${animeElement.attr("href")}"

val season = if (name.contains("Saison", true)) {
val number = name.split("Saison", ignoreCase = true)[1].trim().split(" ")[0].toInt()
Expand All @@ -286,37 +302,28 @@ Bonne journée ! 😊"""
1
}

if (name == "Shūmatsu no Walküre 2") {
return@mapNotNull null
}

val episode =
it.querySelector(".calendrier_episode").textContent().trim().replace(Const.multipleSpaceRegex, " ")
val episode = it.select(".calendrier_episode").text().trim().replace(Const.multipleSpaceRegex, " ")
Anime(name, url, season) to episode
}.filter { (anime, _) ->
page.navigate(anime.url)
val infos = page.querySelectorAll(".info_fiche > div")
val licenceElement = infos.find { it.textContent().contains("Licence VOD", true) }
val subcontent = Browser(anime.url).launch()

val infos = subcontent.select(".info_fiche > div")
val licenceElement = infos.find { it.text().contains("Licence VOD", true) }

if (licenceElement == null) {
println("No licence for ${anime.name}")
return@filter false
}

val licencePlatform =
licenceElement.textContent().split(":")[1].trim().replace(Const.multipleSpaceRegex, " ").split(",")
licenceElement.text().split(":")[1].trim().replace(Const.multipleSpaceRegex, " ").split(",")
.map { it.trim() }
anime.licences.addAll(licencePlatform)
licencePlatform.contains("Animation Digital Network") || licencePlatform.contains("Crunchyroll") || licencePlatform.contains(
"Netflix"
)
}

page.close()
context.close()
browser.close()
playwright.close()

return episodes
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ import fr.jais.scraper.countries.FranceCountry
import fr.jais.scraper.countries.ICountry
import fr.jais.scraper.entities.Episode
import fr.jais.scraper.exceptions.CountryNotSupportedException
import fr.jais.scraper.exceptions.EpisodeException
import fr.jais.scraper.utils.Const
import fr.jais.scraper.utils.Logger
import fr.jais.scraper.utils.toDate
Expand Down
14 changes: 5 additions & 9 deletions src/main/kotlin/fr/jais/scraper/platforms/CrunchyrollPlatform.kt
Original file line number Diff line number Diff line change
Expand Up @@ -64,13 +64,11 @@ class CrunchyrollPlatform(scraper: Scraper) : IPlatform(
val selector = "#content > div > div.app-body-wrapper > div > div > div.erc-browse-collection > div > div:nth-child(1) > div > div > h4 > a"
val simulcastSelector = ".erc-browse-cards-collection > .browse-card > div > div > h4 > a"

val contentCurrentSimulcast = Browser(
Browser.BrowserType.FIREFOX,
"https://www.crunchyroll.com/$countryTag/simulcasts"
).launchAndWaitForSelector(selector)
val contentCurrentSimulcast = Browser("https://www.crunchyroll.com/$countryTag/simulcasts").launchAndWaitForSelector(selector)

val simulcastName =
contentCurrentSimulcast.select("#content > div > div.app-body-wrapper > div > div > div.header > div > div > span.call-to-action--PEidl.call-to-action--is-m--RVdkI.select-trigger__title-cta--C5-uH.select-trigger__title-cta--is-displayed-on-mobile--6oNk1").text()
contentCurrentSimulcast.select("#content > div > div.app-body-wrapper > div > div > div.header > div > div > span.call-to-action--PEidl.call-to-action--is-m--RVdkI.select-trigger__title-cta--C5-uH.select-trigger__title-cta--is-displayed-on-mobile--6oNk1")
.text()
val simulcastCode = getSimulcastCode(simulcastName)
Logger.info("Current simulcast code for ${iCountry.name}: $simulcastCode")

Expand All @@ -81,10 +79,8 @@ class CrunchyrollPlatform(scraper: Scraper) : IPlatform(
val previousSimulcastCode = getPreviousSimulcastCode(simulcastCode)
Logger.info("Previous simulcast code for ${iCountry.name}: $previousSimulcastCode")

val contentPreviousSimulcast = Browser(
Browser.BrowserType.FIREFOX,
"https://www.crunchyroll.com/$countryTag/simulcasts/seasons/$previousSimulcastCode"
).launchAndWaitForSelector(selector)
val contentPreviousSimulcast =
Browser("https://www.crunchyroll.com/$countryTag/simulcasts/seasons/$previousSimulcastCode").launchAndWaitForSelector(selector)

val previousSimulcastAnimes =
contentPreviousSimulcast.select(simulcastSelector).map { it.text().lowercase() }.toSet()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ class NetflixPlatform(scraper: Scraper) : IPlatform(
}

val apiUrl = "https://www.netflix.com/$lang/title/$netflixId"
val content = Browser(Browser.BrowserType.CHROME, apiUrl).launch()
val content = Browser(apiUrl, Browser.BrowserType.CHROME).launch()
cache.lastCheck = System.currentTimeMillis()
cache.content = content
convertToNetflixEpisodes(content)
Expand Down
9 changes: 2 additions & 7 deletions src/main/kotlin/fr/jais/scraper/utils/Browser.kt
Original file line number Diff line number Diff line change
@@ -1,18 +1,16 @@
package fr.jais.scraper.utils

import com.microsoft.playwright.BrowserContext
import com.microsoft.playwright.Page
import org.jsoup.Jsoup
import org.jsoup.nodes.Document

class Browser(type: BrowserType = BrowserType.CHROME, val url: String) {
class Browser(val url: String, type: BrowserType = BrowserType.FIREFOX) {
enum class BrowserType {
CHROME,
FIREFOX,
}

private var browser: com.microsoft.playwright.Browser? = null
private var context: BrowserContext? = null
private var page: Page? = null
private val launchOptions = com.microsoft.playwright.BrowserType.LaunchOptions().setHeadless(true)

Expand All @@ -24,10 +22,8 @@ class Browser(type: BrowserType = BrowserType.CHROME, val url: String) {
BrowserType.FIREFOX -> Const.firefox.launch(launchOptions)
}

Logger.info("Creating context...")
context = browser?.newContext()
Logger.info("Creating page...")
page = context?.newPage()
page = browser?.newPage()
page?.setDefaultTimeout(60_000.0)
page?.setDefaultNavigationTimeout(60_000.0)
Logger.config("URL: $url")
Expand Down Expand Up @@ -69,7 +65,6 @@ class Browser(type: BrowserType = BrowserType.CHROME, val url: String) {
private fun close() {
Logger.info("Closing browser...")
page?.close()
context?.close()
browser?.close()
}
}

0 comments on commit 67ab9d0

Please sign in to comment.