diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index ce3de73..f2832c4 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -10,11 +10,59 @@ jobs: steps: - uses: actions/checkout@v4 - - name: Set up JDK 11 + - name: Set up JDK 17 uses: actions/setup-java@v3 with: - java-version: 11 + java-version: 17 distribution: 'adopt' - name: Tests - run: mvn test \ No newline at end of file + run: mvn test + + docker-amd64: + runs-on: ubuntu-latest + + needs: + - tests + + steps: + - uses: actions/checkout@v4 + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Build + id: docker + uses: docker/build-push-action@v5 + with: + platforms: linux/amd64 + push: false + context: . + tags: scraper:amd64 + + docker-arm64: + runs-on: ubuntu-latest + + needs: + - tests + + steps: + - uses: actions/checkout@v4 + + - name: Set up QEMU + uses: docker/setup-qemu-action@v3 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Build + id: docker + uses: docker/build-push-action@v5 + with: + platforms: linux/arm64 + push: false + context: . + tags: scraper:arm64 \ No newline at end of file diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml index fe18f12..78ef79e 100644 --- a/.github/workflows/push.yml +++ b/.github/workflows/push.yml @@ -12,10 +12,10 @@ jobs: steps: - uses: actions/checkout@v4 - - name: Set up JDK 11 + - name: Set up JDK 17 uses: actions/setup-java@v3 with: - java-version: 11 + java-version: 17 distribution: 'adopt' - name: Tests @@ -30,10 +30,10 @@ jobs: steps: - uses: actions/checkout@v4 - - name: Set up JDK 11 + - name: Set up JDK 17 uses: actions/setup-java@v3 with: - java-version: 11 + java-version: 17 distribution: 'adopt' - name: Build diff --git a/Dockerfile b/Dockerfile index f6bc2cb..3da213f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,10 +1,32 @@ -FROM maven:3.8.6-jdk-11-slim +FROM maven:3.9.4-amazoncorretto-17 AS build WORKDIR /app COPY . /app -RUN apt-get update && apt-get install -y libgtk-3-0 libasound2 libx11-6 libxcomposite1 libxdamage1 libxext6 libxfixes3 libxrandr2 libxrender1 libxtst6 libfreetype6 libfontconfig1 libpangocairo-1.0-0 libpangocairo-1.0-0 libpango-1.0-0 libatk1.0-0 libcairo-gobject2 libcairo2 libgdk-pixbuf-2.0-0 libglib2.0-0 libdbus-glib-1-2 libdbus-1-3 libxcb-shm0 libx11-xcb1 libxcb1 libxcursor1 libxi6 libnss3 libnspr4 libdrm2 libgbm1 RUN mvn clean package -DskipTests -RUN ls -lash -RUN mv target/scraper-1.0-SNAPSHOT-jar-with-dependencies.jar /scraper.jar -RUN rm -rf /app/** -RUN mv /scraper.jar /app/scraper.jar -CMD ["java", "-jar", "scraper.jar"] \ No newline at end of file + +FROM mcr.microsoft.com/playwright:v1.38.0-jammy + +ARG version=17.0.8.8-1 +RUN set -eux \ + && apt-get update \ + && apt-get install -y --no-install-recommends \ + curl ca-certificates gnupg software-properties-common fontconfig java-common \ + && curl -fL https://apt.corretto.aws/corretto.key | apt-key add - \ + && add-apt-repository 'deb https://apt.corretto.aws stable main' \ + && mkdir -p /usr/share/man/man1 || true \ + && apt-get update \ + && apt-get install -y java-17-amazon-corretto-jdk=1:$version \ + && apt-get purge -y --auto-remove -o APT::AutoRemove::RecommendsImportant=false \ + curl gnupg software-properties-common + +ENV LANG C.UTF-8 +ENV JAVA_HOME=/usr/lib/jvm/java-17-amazon-corretto + +COPY --from=build /app/target/scraper-1.0-SNAPSHOT-jar-with-dependencies.jar /app/app.jar + +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install tzdata +ENV TZ=Europe/Paris +RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone +RUN dpkg-reconfigure --frontend noninteractive tzdata + +WORKDIR /app +CMD ["java", "-jar", "app.jar"] \ No newline at end of file diff --git a/create_images.sh b/create_images.sh new file mode 100644 index 0000000..672fecc --- /dev/null +++ b/create_images.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker buildx create --use +docker buildx build --load --platform linux/amd64 -t jais-scraper:amd64 . +docker buildx build --load --platform linux/arm64 -t jais-scraper:arm64 . \ No newline at end of file diff --git a/install.sh b/install.sh deleted file mode 100644 index 88eb4ec..0000000 --- a/install.sh +++ /dev/null @@ -1,34 +0,0 @@ -#!/bin/sh - -BRANCH=master - -# Check args passed to script -if [ $# -eq 1 ]; then - BRANCH=$1 -fi - -# Check if git is installed -if ! [ -x "$(command -v git)" ]; then - echo 'Error: git is not installed.' >&2 - exit 1 -fi - -# Check if docker is installed -if ! [ -x "$(command -v docker)" ]; then - echo 'Error: docker is not installed.' >&2 - exit 1 -fi - -# Clone repo from github "https://github.com/Z-Jais/Scraper.git" on branch $BRANCH -git clone --single-branch --branch "$BRANCH" https://github.com/Z-Jais/Scraper.git ./tmp - -cd ./tmp || exit - -# Build docker image -docker build -t scraper . -# Run docker container -docker run -d --name scraper scraper - -cd .. || exit -# Remove tmp folder -rm -rf ./tmp \ No newline at end of file diff --git a/pom.xml b/pom.xml index 64f8efb..78b4071 100644 --- a/pom.xml +++ b/pom.xml @@ -14,7 +14,7 @@ UTF-8 official - 11 + 17 1.9.10 fr.jais.scraper.ScraperKt 5.10.0 @@ -157,7 +157,7 @@ org.quartz-scheduler quartz - 2.3.2 + 2.5.0-rc1 \ No newline at end of file diff --git a/src/main/kotlin/fr/jais/scraper/Scraper.kt b/src/main/kotlin/fr/jais/scraper/Scraper.kt index 59bae6a..ae9c509 100644 --- a/src/main/kotlin/fr/jais/scraper/Scraper.kt +++ b/src/main/kotlin/fr/jais/scraper/Scraper.kt @@ -28,17 +28,22 @@ class Scraper { fun startThreadCommand() { ThreadManager.start("Command") { while (true) { - val command = readlnOrNull() ?: continue - val args = command.split(" ") + try { + val command = readlnOrNull() ?: continue + val args = command.split(" ") - when (args[0]) { - "ayane" -> { - AyaneJob().execute(null) - } - else -> { - Logger.info("Unknown command") + when (args[0]) { + "ayane" -> { + AyaneJob().execute(null) + } + + else -> { + Logger.info("Unknown command") + } } + } catch (_: Exception) { + Thread.sleep(1000) } } } diff --git a/src/main/kotlin/fr/jais/scraper/converters/AnimationDigitalNetworkConverter.kt b/src/main/kotlin/fr/jais/scraper/converters/AnimationDigitalNetworkConverter.kt index 7b85046..43e33b3 100644 --- a/src/main/kotlin/fr/jais/scraper/converters/AnimationDigitalNetworkConverter.kt +++ b/src/main/kotlin/fr/jais/scraper/converters/AnimationDigitalNetworkConverter.kt @@ -105,6 +105,11 @@ class AnimationDigitalNetworkConverter(private val platform: AnimationDigitalNet // ----- NUMBER ----- Logger.info("Get number...") + + if ("Bande-annonce" == jsonObject["shortNumber"]?.asString) { + throw EpisodeNotAvailableException("Trailer detected") + } + val number = jsonObject["shortNumber"]?.asString()?.toIntOrNull() ?: run { Logger.warning("No number found, using -1...") -1 diff --git a/src/main/kotlin/fr/jais/scraper/platforms/AnimationDigitalNetworkPlatform.kt b/src/main/kotlin/fr/jais/scraper/platforms/AnimationDigitalNetworkPlatform.kt index 7a33dfb..50c0037 100644 --- a/src/main/kotlin/fr/jais/scraper/platforms/AnimationDigitalNetworkPlatform.kt +++ b/src/main/kotlin/fr/jais/scraper/platforms/AnimationDigitalNetworkPlatform.kt @@ -47,10 +47,7 @@ class AnimationDigitalNetworkPlatform(scraper: Scraper) : try { converter.convertEpisode(country, calendar, it, cachedEpisodes) } catch (e: Exception) { - if (e !is EpisodeException) { - Logger.log(Level.SEVERE, "Error while converting episode", e) - } - + Logger.log(Level.SEVERE, "Error while converting episode", e) null } } ?: emptyList() diff --git a/src/main/kotlin/fr/jais/scraper/platforms/CrunchyrollPlatform.kt b/src/main/kotlin/fr/jais/scraper/platforms/CrunchyrollPlatform.kt index 0b8c9f8..461385c 100644 --- a/src/main/kotlin/fr/jais/scraper/platforms/CrunchyrollPlatform.kt +++ b/src/main/kotlin/fr/jais/scraper/platforms/CrunchyrollPlatform.kt @@ -58,48 +58,42 @@ class CrunchyrollPlatform(scraper: Scraper) : IPlatform( } private fun checkSimulcasts(iCountry: ICountry) { - Logger.info("Checking simulcasts for ${iCountry.name}...") - // Clear simulcast for this country if exists - simulcasts.remove(iCountry) val countryTag = converter.getCountryTag(iCountry) Logger.info("Loading simulcasts for ${iCountry.name}...") - val content = try { - Browser( - Browser.BrowserType.FIREFOX, - "https://www.crunchyroll.com/$countryTag/simulcasts" - ).launchAndWaitForSelector("#content > div > div.app-body-wrapper > div > div > div.erc-browse-collection > div > div:nth-child(1) > div > div > h4 > a") - } catch (_: Exception) { - Logger.warning("No simulcasts found for ${iCountry.name}! (Empty page or error)") - - Browser( - Browser.BrowserType.FIREFOX, - "https://www.crunchyroll.com/$countryTag/simulcasts" - ).launchAndWaitForSelector("#content > div > div.app-body-wrapper > div > div > div.info-box--wxFEW.erc-simulcast-no-results") - } + val selector = "#content > div > div.app-body-wrapper > div > div > div.erc-browse-collection > div > div:nth-child(1) > div > div > h4 > a" + val simulcastSelector = ".erc-browse-cards-collection > .browse-card > div > div > h4 > a" - val currentSimulcastAnimes = content.select(".erc-browse-cards-collection > .browse-card > div > div > h4 > a") - .map { it.text().lowercase() }.toSet() + val contentCurrentSimulcast = Browser( + Browser.BrowserType.FIREFOX, + "https://www.crunchyroll.com/$countryTag/simulcasts" + ).launchAndWaitForSelector(selector) val simulcastName = - content.select("#content > div > div.app-body-wrapper > div > div > div.header > div > div > span.call-to-action--PEidl.call-to-action--is-m--RVdkI.select-trigger__title-cta--C5-uH.select-trigger__title-cta--is-displayed-on-mobile--6oNk1") - .text() + contentCurrentSimulcast.select("#content > div > div.app-body-wrapper > div > div > div.header > div > div > span.call-to-action--PEidl.call-to-action--is-m--RVdkI.select-trigger__title-cta--C5-uH.select-trigger__title-cta--is-displayed-on-mobile--6oNk1").text() val simulcastCode = getSimulcastCode(simulcastName) - Logger.info("Simulcast code for ${iCountry.name}: $simulcastCode") + Logger.info("Current simulcast code for ${iCountry.name}: $simulcastCode") + + val currentSimulcastAnimes = contentCurrentSimulcast.select(simulcastSelector) + .map { it.text().lowercase() }.toSet() + Logger.config("Found ${currentSimulcastAnimes.size} animes for the current simulcast") + val previousSimulcastCode = getPreviousSimulcastCode(simulcastCode) Logger.info("Previous simulcast code for ${iCountry.name}: $previousSimulcastCode") - val content2 = Browser( + val contentPreviousSimulcast = Browser( Browser.BrowserType.FIREFOX, "https://www.crunchyroll.com/$countryTag/simulcasts/seasons/$previousSimulcastCode" - ).launchAndWaitForSelector("#content > div > div.app-body-wrapper > div > div > div.erc-browse-collection > div > div:nth-child(1) > div > div > h4 > a") + ).launchAndWaitForSelector(selector) + val previousSimulcastAnimes = - content2.select(".erc-browse-cards-collection > .browse-card > div > div > h4 > a") - .map { it.text().lowercase() }.toSet() + contentPreviousSimulcast.select(simulcastSelector).map { it.text().lowercase() }.toSet() + Logger.config("Found ${previousSimulcastAnimes.size} animes for the previous simulcast") - simulcasts[iCountry] = (currentSimulcastAnimes + previousSimulcastAnimes).toSet() - Logger.info("Found ${simulcasts[iCountry]?.size} simulcasts for ${iCountry.name}!") - Logger.config("Simulcasts: ${simulcasts[iCountry]?.joinToString(", ")}") + val combinedSimulcastAnimes = (currentSimulcastAnimes + previousSimulcastAnimes).toSet() + simulcasts[iCountry] = combinedSimulcastAnimes + Logger.info("Found ${combinedSimulcastAnimes.size} simulcasts for ${iCountry.name}!") + Logger.config("Simulcasts: ${combinedSimulcastAnimes.joinToString(", ")}") } fun xmlToJson(content: String) = @@ -137,12 +131,11 @@ class CrunchyrollPlatform(scraper: Scraper) : IPlatform( countries.forEach { try { checkSimulcasts(it) + lastSimulcastCheck = System.currentTimeMillis() } catch (e: Exception) { Logger.log(Level.SEVERE, "Error while checking simulcasts", e) } } - - lastSimulcastCheck = System.currentTimeMillis() } return countries.flatMap { country -> diff --git a/src/main/kotlin/fr/jais/scraper/utils/API.kt b/src/main/kotlin/fr/jais/scraper/utils/API.kt index a9792ee..bcb2ed8 100644 --- a/src/main/kotlin/fr/jais/scraper/utils/API.kt +++ b/src/main/kotlin/fr/jais/scraper/utils/API.kt @@ -11,9 +11,6 @@ import java.net.http.HttpRequest import java.net.http.HttpResponse import java.util.logging.Level -private const val URL = "https://beta-api.ziedelth.fr/" -//private const val URL = "http://localhost:8080/" - object API { private fun get(url: String): HttpResponse { val request = HttpRequest.newBuilder() @@ -32,20 +29,20 @@ object API { } private fun getCountry(country: Country): JsonObject? { - val response = get("${URL}countries") + val response = get("${Const.apiUrl}countries") val json = Const.gson.fromJson(response.body(), JsonArray::class.java) ?: return null return json.firstOrNull { it.asJsonObject["tag"].asString == country.tag }?.asJsonObject } private fun createCountry(country: Country): JsonObject? { - val response = post("${URL}countries", Const.gson.toJson(country)) + val response = post("${Const.apiUrl}countries", Const.gson.toJson(country)) return if (response.statusCode() == 201) Const.gson.fromJson(response.body(), JsonObject::class.java) else null } private fun getAnimeByHash(country: Country, anime: Anime): JsonObject? { val hash = anime.name.lowercase().filter { it.isLetterOrDigit() || it.isWhitespace() || it == '-' }.trim() .replace("\\s+".toRegex(), "-").replace("--", "-") - val response = get("${URL}animes/country/${country.tag}/search/hash/$hash") + val response = get("${Const.apiUrl}animes/country/${country.tag}/search/hash/$hash") return if (response.statusCode() == 200) Const.gson.fromJson(response.body(), JsonObject::class.java) else null } @@ -59,40 +56,40 @@ object API { } private fun createAnime(country: JsonObject, releaseDate: String, anime: Anime): JsonObject? { - val response = post("${URL}animes", toAnime(country, anime, releaseDate).toString()) + val response = post("${Const.apiUrl}animes", toAnime(country, anime, releaseDate).toString()) return if (response.statusCode() == 201) Const.gson.fromJson(response.body(), JsonObject::class.java) else null } private fun getPlatform(platform: Platform): JsonObject? { - val response = get("${URL}platforms") + val response = get("${Const.apiUrl}platforms") val json = Const.gson.fromJson(response.body(), JsonArray::class.java) ?: return null return json.firstOrNull { it.asJsonObject["name"].asString == platform.name }?.asJsonObject } private fun createPlatform(platform: Platform): JsonObject? { - val response = post("${URL}platforms", Const.gson.toJson(platform)) + val response = post("${Const.apiUrl}platforms", Const.gson.toJson(platform)) return if (response.statusCode() == 201) Const.gson.fromJson(response.body(), JsonObject::class.java) else null } private fun getEpisodeType(type: EpisodeType): JsonObject? { - val response = get("${URL}episodetypes") + val response = get("${Const.apiUrl}episodetypes") val json = Const.gson.fromJson(response.body(), JsonArray::class.java) ?: return null return json.firstOrNull { it.asJsonObject["name"].asString == type.name }?.asJsonObject } private fun createEpisodeType(type: EpisodeType): JsonObject? { - val response = post("${URL}episodetypes", JsonObject().apply { addProperty("name", type.name) }.toString()) + val response = post("${Const.apiUrl}episodetypes", JsonObject().apply { addProperty("name", type.name) }.toString()) return if (response.statusCode() == 201) Const.gson.fromJson(response.body(), JsonObject::class.java) else null } private fun getLangType(type: LangType): JsonObject? { - val response = get("${URL}langtypes") + val response = get("${Const.apiUrl}langtypes") val json = Const.gson.fromJson(response.body(), JsonArray::class.java) ?: return null return json.firstOrNull { it.asJsonObject["name"].asString == type.name }?.asJsonObject } private fun createLangType(type: LangType): JsonObject? { - val response = post("${URL}langtypes", JsonObject().apply { addProperty("name", type.name) }.toString()) + val response = post("${Const.apiUrl}langtypes", JsonObject().apply { addProperty("name", type.name) }.toString()) return if (response.statusCode() == 201) Const.gson.fromJson(response.body(), JsonObject::class.java) else null } @@ -154,7 +151,7 @@ object API { return } - post("${URL}episodes/multiple", Const.gson.toJson(episodesApi)) + post("${Const.apiUrl}episodes/multiple", Const.gson.toJson(episodesApi)) } catch (e: Exception) { Logger.log(Level.SEVERE, "Error saving episodes", e) } @@ -162,7 +159,7 @@ object API { fun saveAyane(message: String, images: List) { try { - post("${URL}ayane", Const.gson.toJson(mapOf("message" to message, "images" to images))) + post("${Const.apiUrl}ayane", Const.gson.toJson(mapOf("message" to message, "images" to images))) } catch (e: Exception) { Logger.log(Level.SEVERE, "Error saving episodes", e) } diff --git a/src/main/kotlin/fr/jais/scraper/utils/Browser.kt b/src/main/kotlin/fr/jais/scraper/utils/Browser.kt index 437199d..b40f2bb 100644 --- a/src/main/kotlin/fr/jais/scraper/utils/Browser.kt +++ b/src/main/kotlin/fr/jais/scraper/utils/Browser.kt @@ -28,13 +28,19 @@ class Browser(type: BrowserType = BrowserType.CHROME, val url: String) { context = browser?.newContext() Logger.info("Creating page...") page = context?.newPage() - page?.setDefaultTimeout(30_000.0) - page?.setDefaultNavigationTimeout(30_000.0) + page?.setDefaultTimeout(60_000.0) + page?.setDefaultNavigationTimeout(60_000.0) Logger.config("URL: $url") - Logger.info("Navigating...") - page?.navigate(url) - Logger.info("Waiting for load...") - page?.waitForLoadState() + + try { + Logger.info("Navigating...") + page?.navigate(url) + Logger.info("Waiting for load...") + page?.waitForLoadState() + } catch (e: Exception) { + close() + throw e + } } fun launch(): Document { @@ -46,7 +52,13 @@ class Browser(type: BrowserType = BrowserType.CHROME, val url: String) { } fun launchAndWaitForSelector(selector: String): Document { - page?.waitForSelector(selector) + try { + page?.waitForSelector(selector) + } catch (e: Exception) { + close() + throw e + } + val content = page?.content() close() diff --git a/src/main/kotlin/fr/jais/scraper/utils/Const.kt b/src/main/kotlin/fr/jais/scraper/utils/Const.kt index 7c49db1..3f62e62 100644 --- a/src/main/kotlin/fr/jais/scraper/utils/Const.kt +++ b/src/main/kotlin/fr/jais/scraper/utils/Const.kt @@ -17,4 +17,5 @@ object Const { val firefox: BrowserType = playwright.firefox() val multipleSpaceRegex = "\\s+".toRegex() const val calendarBaseUrl = "https://anime.icotaku.com" + val apiUrl = System.getenv("API_URL") ?: "http://localhost:8080/" }