NodeJS script to scrap the entire database of bdgest.com / bedetheque.com. (approx. 50.000+ series, 300.000+ albums, 30.000+ authors)
npm install bedetheque-scraper --save
const { Scraper } = require('bedetheque-scraper');
// import { Scraper } from 'bedetheque-scraper' // using CommonJS
const letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ0'.split('');
async function run() {
for (const letter of letters) {
const serieUrls = await Scraper.getSeriesUrlFromLetter(letter);
for(const serieUrl of serieUrls) {
const {serie, albums} = await Scraper.getSerie(serieUrl);
console.log(serie.serieTitle);
}
const authorUrls = await Scraper.getAuthorsUrlFromLetter(letter);
for(const authorUrl of authorUrls) {
const author = await Scraper.getAuthor(authorUrl);
console.log(author.name);
}
}
}
run();
Serie
{
serieId: number;
serieTitle: string;
serieUrl: string;
numberOfAlbums: number;
serieCover: { small: string, large: string };
albumsId: number[];
voteAverage: number;
voteCount: number;
recommendationsId: number[];
dateBegin: number;
dateEnd: number;
}
Album
{
serieId: number;
albumId: number;
albumNum: string;
serieTitle: string;
serieUrl: string;
albumTitle: string;
albumUrl: string;
scenario: string;
drawing: string;
colors: string;
date: number;
editor: string;
estimationEuros: number[]
nbrOfPages: number;
imageCover: { small: string, large: string };
imageExtract: { small: string, large: string };
imageReverse: { small: string, large: string };
voteAverage: number; // %
voteCount: number;
}
Author
{
authorId: number;
image: string;
name: string;
birthDate: string;
deathDate: string;
seriesIdScenario: number[];
seriesIdDrawing: number[];
seriesIdBoth: number[];
}
Proxy
{
host: string;
port: number;
}
-
Scraper.getSeriesUrlFromLetter(letter: string, frenchOnly?: boolean, proxy?: Proxy): Promise<string[]>;
- Example:
const serieUrls = await Scraper.getSeriesUrlFromLetter('A');
- Example:
-
Scraper.getAuthorsUrlFromLetter(letter: string, proxy?: Proxy): Promise<string[]>;
- Example:
const authorUrls = await Scraper.getAuthorsUrlFromLetter('A');
- Example:
-
Scraper.getSerie(url: string, proxy?: Proxy): Promise<{serie: Serie, albums: Album[]>;
- Example:
const {serie, albums} = await Scraper.getSerie('https://www.bedetheque.com/serie-10739-BD-Roi-des-mouches.html')
- Example:
-
Scraper.getAuthor(url: string, proxy?: Proxy): Promise<Author>;
- Example:
const author = await Scraper.getAuthor('https://www.bedetheque.com/auteur-232-BD-Blain-Christophe.html')
- Example: