From 27ee0609789aae24a20870d6700a8818a5841a82 Mon Sep 17 00:00:00 2001 From: Ziedelth Date: Fri, 15 Sep 2023 09:57:28 +0200 Subject: [PATCH] Add job management and optimize browser interactions. --- src/main/kotlin/fr/jais/scraper/Scraper.kt | 92 +++---------------- .../converters/CrunchyrollConverter.kt | 5 +- .../kotlin/fr/jais/scraper/jobs/CheckJob.kt | 38 ++++++++ .../kotlin/fr/jais/scraper/jobs/ClearJob.kt | 13 +++ .../kotlin/fr/jais/scraper/utils/Browser.kt | 4 +- 5 files changed, 72 insertions(+), 80 deletions(-) create mode 100644 src/main/kotlin/fr/jais/scraper/jobs/CheckJob.kt create mode 100644 src/main/kotlin/fr/jais/scraper/jobs/ClearJob.kt diff --git a/src/main/kotlin/fr/jais/scraper/Scraper.kt b/src/main/kotlin/fr/jais/scraper/Scraper.kt index 5a80f45..59bae6a 100644 --- a/src/main/kotlin/fr/jais/scraper/Scraper.kt +++ b/src/main/kotlin/fr/jais/scraper/Scraper.kt @@ -1,18 +1,17 @@ package fr.jais.scraper import fr.jais.scraper.countries.ICountry -import fr.jais.scraper.entities.Episode import fr.jais.scraper.jobs.AyaneJob +import fr.jais.scraper.jobs.CheckJob +import fr.jais.scraper.jobs.ClearJob import fr.jais.scraper.jobs.JobManager import fr.jais.scraper.platforms.AnimationDigitalNetworkPlatform import fr.jais.scraper.platforms.CrunchyrollPlatform import fr.jais.scraper.platforms.IPlatform import fr.jais.scraper.platforms.NetflixPlatform -import fr.jais.scraper.utils.* -import java.text.SimpleDateFormat -import java.time.LocalDateTime -import java.time.ZoneOffset -import java.util.* +import fr.jais.scraper.utils.Const +import fr.jais.scraper.utils.Logger +import fr.jais.scraper.utils.ThreadManager class Scraper { val platforms = listOf( @@ -26,78 +25,13 @@ class Scraper { fun getCountries(platform: IPlatform): List = countries.filter { platform.countries.contains(it.javaClass) } - private fun getAllEpisodes(calendar: Calendar): List { - Logger.config("Calendar: ${calendar.toISO8601()}") - - Logger.info("Getting cached episodes...") - val cachedEpisodes = Database.loadEpisodes().map { it.hash } - - Logger.info("Get all episodes...") - val episodes = platforms - .flatMap { it.getEpisodes(calendar, cachedEpisodes) } - .filter { calendar.after(CalendarConverter.fromUTCDate(it.releaseDate)) } - .sortedWith( - compareBy( - { CalendarConverter.fromUTCDate(it.releaseDate) }, - { it.anime.name.lowercase() }, - { it.season }, - { it.number } - ) - ) - Logger.config("Episodes: ${episodes.size}") - Database.saveEpisodes(episodes) - API.saveEpisodes(episodes) - return episodes - } - - fun startThreadCheck() { - ThreadManager.start("Checker") { - var lastCheck: String? = null - - while (true) { - val calendar = Calendar.getInstance() - val today = calendar.toDate() - - if (lastCheck != today) { - Logger.info("Reset all platforms...") - lastCheck = today - platforms.forEach { it.reset() } - } - - getAllEpisodes(calendar).forEach { println(it) } - - // Wait 3 minutes - Thread.sleep(3 * 60 * 1000) - } - } - } - fun startThreadCommand() { ThreadManager.start("Command") { while (true) { val command = readlnOrNull() ?: continue val args = command.split(" ") - val argsWithoutCommand = args.drop(1) when (args[0]) { - "scrap" -> { - if (argsWithoutCommand.isEmpty()) { - Logger.info("Please specify a date") - continue - } - - val date = argsWithoutCommand[0] - val parsedDate = SimpleDateFormat("yyyy-MM-dd").parse(date) - val localDateTime = - LocalDateTime.ofInstant(parsedDate.toInstant(), TimeZone.getDefault().toZoneId()) - .withHour(23).withMinute(59).withSecond(59) - val calendar = - Calendar.getInstance().apply { time = Date.from(localDateTime.toInstant(ZoneOffset.UTC)) } - println(calendar.toISO8601()) - - getAllEpisodes(calendar).forEach { println(it) } - } - "ayane" -> { AyaneJob().execute(null) } @@ -112,20 +46,24 @@ class Scraper { fun startThreadCron() { jobManager.scheduleJob("0 0 9 * * ?", AyaneJob::class.java) + jobManager.scheduleJob("0 0 0 * * ?", ClearJob::class.java) + jobManager.scheduleJob("0 */2 * * * ?", CheckJob::class.java) jobManager.start() } + + companion object { + val instance = Scraper() + } } fun main() { Logger.info("Initializing...") Const.gson Logger.info("Initialization done!") - Logger.info("Starting...") - val scraper = Scraper() - Logger.info("Start main thread...") - scraper.startThreadCheck() + Logger.info("Start command thread...") - scraper.startThreadCommand() + Scraper.instance.startThreadCommand() Logger.info("Start cron thread...") - scraper.startThreadCron() + Scraper.instance.startThreadCron() + Logger.info("Done!") } diff --git a/src/main/kotlin/fr/jais/scraper/converters/CrunchyrollConverter.kt b/src/main/kotlin/fr/jais/scraper/converters/CrunchyrollConverter.kt index 10b5e41..1594a10 100644 --- a/src/main/kotlin/fr/jais/scraper/converters/CrunchyrollConverter.kt +++ b/src/main/kotlin/fr/jais/scraper/converters/CrunchyrollConverter.kt @@ -170,7 +170,10 @@ class CrunchyrollConverter(private val platform: CrunchyrollPlatform) { Logger.info("Get anime page...") val url = "https://www.crunchyroll.com/$country/$animeId" Logger.config("Anime page: $url") - val result = Browser(Browser.BrowserType.FIREFOX, url).launch() + val result = Browser( + Browser.BrowserType.FIREFOX, + url + ).launchAndWaitForSelector("div.undefined:nth-child(1) > figure:nth-child(1) > picture:nth-child(1) > img:nth-child(2)") // ----- IMAGE ----- Logger.info("Get image...") diff --git a/src/main/kotlin/fr/jais/scraper/jobs/CheckJob.kt b/src/main/kotlin/fr/jais/scraper/jobs/CheckJob.kt new file mode 100644 index 0000000..2245ed5 --- /dev/null +++ b/src/main/kotlin/fr/jais/scraper/jobs/CheckJob.kt @@ -0,0 +1,38 @@ +package fr.jais.scraper.jobs + +import fr.jais.scraper.Scraper +import fr.jais.scraper.entities.Episode +import fr.jais.scraper.utils.* +import org.quartz.Job +import org.quartz.JobExecutionContext +import java.util.* + +class CheckJob : Job { + override fun execute(p0: JobExecutionContext?) { + getAllEpisodes(Calendar.getInstance()).forEach { println(it) } + } + + private fun getAllEpisodes(calendar: Calendar): List { + Logger.config("Calendar: ${calendar.toISO8601()}") + + Logger.info("Getting cached episodes...") + val cachedEpisodes = Database.loadEpisodes().map { it.hash } + + Logger.info("Get all episodes...") + val episodes = Scraper.instance.platforms + .flatMap { it.getEpisodes(calendar, cachedEpisodes) } + .filter { calendar.after(CalendarConverter.fromUTCDate(it.releaseDate)) } + .sortedWith( + compareBy( + { CalendarConverter.fromUTCDate(it.releaseDate) }, + { it.anime.name.lowercase() }, + { it.season }, + { it.number } + ) + ) + Logger.config("Episodes: ${episodes.size}") + Database.saveEpisodes(episodes) + API.saveEpisodes(episodes) + return episodes + } +} diff --git a/src/main/kotlin/fr/jais/scraper/jobs/ClearJob.kt b/src/main/kotlin/fr/jais/scraper/jobs/ClearJob.kt new file mode 100644 index 0000000..52171ed --- /dev/null +++ b/src/main/kotlin/fr/jais/scraper/jobs/ClearJob.kt @@ -0,0 +1,13 @@ +package fr.jais.scraper.jobs + +import fr.jais.scraper.Scraper +import fr.jais.scraper.utils.Logger +import org.quartz.Job +import org.quartz.JobExecutionContext + +class ClearJob : Job { + override fun execute(p0: JobExecutionContext?) { + Logger.info("Reset all platforms...") + Scraper.instance.platforms.forEach { it.reset() } + } +} diff --git a/src/main/kotlin/fr/jais/scraper/utils/Browser.kt b/src/main/kotlin/fr/jais/scraper/utils/Browser.kt index 391314d..437199d 100644 --- a/src/main/kotlin/fr/jais/scraper/utils/Browser.kt +++ b/src/main/kotlin/fr/jais/scraper/utils/Browser.kt @@ -28,8 +28,8 @@ class Browser(type: BrowserType = BrowserType.CHROME, val url: String) { context = browser?.newContext() Logger.info("Creating page...") page = context?.newPage() - page?.setDefaultTimeout(60000.0) - page?.setDefaultNavigationTimeout(60000.0) + page?.setDefaultTimeout(30_000.0) + page?.setDefaultNavigationTimeout(30_000.0) Logger.config("URL: $url") Logger.info("Navigating...") page?.navigate(url)