Skip to content
This repository has been archived by the owner on Mar 11, 2024. It is now read-only.

Commit

Permalink
Add job management and optimize browser interactions.
Browse files Browse the repository at this point in the history
  • Loading branch information
Ziedelth committed Sep 15, 2023
1 parent 7f841e6 commit 27ee060
Show file tree
Hide file tree
Showing 5 changed files with 72 additions and 80 deletions.
92 changes: 15 additions & 77 deletions src/main/kotlin/fr/jais/scraper/Scraper.kt
Original file line number Diff line number Diff line change
@@ -1,18 +1,17 @@
package fr.jais.scraper

import fr.jais.scraper.countries.ICountry
import fr.jais.scraper.entities.Episode
import fr.jais.scraper.jobs.AyaneJob
import fr.jais.scraper.jobs.CheckJob
import fr.jais.scraper.jobs.ClearJob
import fr.jais.scraper.jobs.JobManager
import fr.jais.scraper.platforms.AnimationDigitalNetworkPlatform
import fr.jais.scraper.platforms.CrunchyrollPlatform
import fr.jais.scraper.platforms.IPlatform
import fr.jais.scraper.platforms.NetflixPlatform
import fr.jais.scraper.utils.*
import java.text.SimpleDateFormat
import java.time.LocalDateTime
import java.time.ZoneOffset
import java.util.*
import fr.jais.scraper.utils.Const
import fr.jais.scraper.utils.Logger
import fr.jais.scraper.utils.ThreadManager

class Scraper {
val platforms = listOf(
Expand All @@ -26,78 +25,13 @@ class Scraper {
fun getCountries(platform: IPlatform): List<ICountry> =
countries.filter { platform.countries.contains(it.javaClass) }

private fun getAllEpisodes(calendar: Calendar): List<Episode> {
Logger.config("Calendar: ${calendar.toISO8601()}")

Logger.info("Getting cached episodes...")
val cachedEpisodes = Database.loadEpisodes().map { it.hash }

Logger.info("Get all episodes...")
val episodes = platforms
.flatMap { it.getEpisodes(calendar, cachedEpisodes) }
.filter { calendar.after(CalendarConverter.fromUTCDate(it.releaseDate)) }
.sortedWith(
compareBy(
{ CalendarConverter.fromUTCDate(it.releaseDate) },
{ it.anime.name.lowercase() },
{ it.season },
{ it.number }
)
)
Logger.config("Episodes: ${episodes.size}")
Database.saveEpisodes(episodes)
API.saveEpisodes(episodes)
return episodes
}

fun startThreadCheck() {
ThreadManager.start("Checker") {
var lastCheck: String? = null

while (true) {
val calendar = Calendar.getInstance()
val today = calendar.toDate()

if (lastCheck != today) {
Logger.info("Reset all platforms...")
lastCheck = today
platforms.forEach { it.reset() }
}

getAllEpisodes(calendar).forEach { println(it) }

// Wait 3 minutes
Thread.sleep(3 * 60 * 1000)
}
}
}

fun startThreadCommand() {
ThreadManager.start("Command") {
while (true) {
val command = readlnOrNull() ?: continue
val args = command.split(" ")
val argsWithoutCommand = args.drop(1)

when (args[0]) {
"scrap" -> {
if (argsWithoutCommand.isEmpty()) {
Logger.info("Please specify a date")
continue
}

val date = argsWithoutCommand[0]
val parsedDate = SimpleDateFormat("yyyy-MM-dd").parse(date)
val localDateTime =
LocalDateTime.ofInstant(parsedDate.toInstant(), TimeZone.getDefault().toZoneId())
.withHour(23).withMinute(59).withSecond(59)
val calendar =
Calendar.getInstance().apply { time = Date.from(localDateTime.toInstant(ZoneOffset.UTC)) }
println(calendar.toISO8601())

getAllEpisodes(calendar).forEach { println(it) }
}

"ayane" -> {
AyaneJob().execute(null)
}
Expand All @@ -112,20 +46,24 @@ class Scraper {

fun startThreadCron() {
jobManager.scheduleJob("0 0 9 * * ?", AyaneJob::class.java)
jobManager.scheduleJob("0 0 0 * * ?", ClearJob::class.java)
jobManager.scheduleJob("0 */2 * * * ?", CheckJob::class.java)
jobManager.start()
}

companion object {
val instance = Scraper()
}
}

fun main() {
Logger.info("Initializing...")
Const.gson
Logger.info("Initialization done!")
Logger.info("Starting...")
val scraper = Scraper()
Logger.info("Start main thread...")
scraper.startThreadCheck()

Logger.info("Start command thread...")
scraper.startThreadCommand()
Scraper.instance.startThreadCommand()
Logger.info("Start cron thread...")
scraper.startThreadCron()
Scraper.instance.startThreadCron()
Logger.info("Done!")
}
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,10 @@ class CrunchyrollConverter(private val platform: CrunchyrollPlatform) {
Logger.info("Get anime page...")
val url = "https://www.crunchyroll.com/$country/$animeId"
Logger.config("Anime page: $url")
val result = Browser(Browser.BrowserType.FIREFOX, url).launch()
val result = Browser(
Browser.BrowserType.FIREFOX,
url
).launchAndWaitForSelector("div.undefined:nth-child(1) > figure:nth-child(1) > picture:nth-child(1) > img:nth-child(2)")

// ----- IMAGE -----
Logger.info("Get image...")
Expand Down
38 changes: 38 additions & 0 deletions src/main/kotlin/fr/jais/scraper/jobs/CheckJob.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
package fr.jais.scraper.jobs

import fr.jais.scraper.Scraper
import fr.jais.scraper.entities.Episode
import fr.jais.scraper.utils.*
import org.quartz.Job
import org.quartz.JobExecutionContext
import java.util.*

class CheckJob : Job {
override fun execute(p0: JobExecutionContext?) {
getAllEpisodes(Calendar.getInstance()).forEach { println(it) }
}

private fun getAllEpisodes(calendar: Calendar): List<Episode> {
Logger.config("Calendar: ${calendar.toISO8601()}")

Logger.info("Getting cached episodes...")
val cachedEpisodes = Database.loadEpisodes().map { it.hash }

Logger.info("Get all episodes...")
val episodes = Scraper.instance.platforms
.flatMap { it.getEpisodes(calendar, cachedEpisodes) }
.filter { calendar.after(CalendarConverter.fromUTCDate(it.releaseDate)) }
.sortedWith(
compareBy(
{ CalendarConverter.fromUTCDate(it.releaseDate) },
{ it.anime.name.lowercase() },
{ it.season },
{ it.number }
)
)
Logger.config("Episodes: ${episodes.size}")
Database.saveEpisodes(episodes)
API.saveEpisodes(episodes)
return episodes
}
}
13 changes: 13 additions & 0 deletions src/main/kotlin/fr/jais/scraper/jobs/ClearJob.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
package fr.jais.scraper.jobs

import fr.jais.scraper.Scraper
import fr.jais.scraper.utils.Logger
import org.quartz.Job
import org.quartz.JobExecutionContext

class ClearJob : Job {
override fun execute(p0: JobExecutionContext?) {
Logger.info("Reset all platforms...")
Scraper.instance.platforms.forEach { it.reset() }
}
}
4 changes: 2 additions & 2 deletions src/main/kotlin/fr/jais/scraper/utils/Browser.kt
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ class Browser(type: BrowserType = BrowserType.CHROME, val url: String) {
context = browser?.newContext()
Logger.info("Creating page...")
page = context?.newPage()
page?.setDefaultTimeout(60000.0)
page?.setDefaultNavigationTimeout(60000.0)
page?.setDefaultTimeout(30_000.0)
page?.setDefaultNavigationTimeout(30_000.0)
Logger.config("URL: $url")
Logger.info("Navigating...")
page?.navigate(url)
Expand Down

0 comments on commit 27ee060

Please sign in to comment.