From d33c11ad737bdb41a0b7acad88bece988089c40e Mon Sep 17 00:00:00 2001 From: asivery Date: Mon, 12 Apr 2021 16:42:35 +0200 Subject: [PATCH] Added cache database and User-Agent HTTP header --- README.md | 14 ++++++++++- article.php | 60 +++++++++++++++++++++++++++++++++----------- cache_database.php | 62 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 120 insertions(+), 16 deletions(-) create mode 100644 cache_database.php diff --git a/README.md b/README.md index 59068e1..2e41799 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,14 @@ # 68k-news -Source for the 68k.news site \ No newline at end of file +Source for the 68k.news site + +--- + +I added an SQLite3 cache database in which the articles are stored for 24h. After that period they automatically get deleted. + +The database can be disabled by changing the `USE_CACHE` define in `article.php`. + +If the database file becomes corrupted, the program will try to delete and recreate it. (This can be turned off by setting `RECREATE_ON_FAIL` to `false` in `cache_database.php`). + +The default cache freshness lifetime is 24h, however it can be changed using the `MAX_CACHE_TIME` parameter in `cache_database.php` along with the default cache database filename (`cache.db`). + +I also had to add a spoofed User-Agent header, because some articles couldn't load without it. diff --git a/article.php b/article.php index e112e71..aceea8a 100644 --- a/article.php +++ b/article.php @@ -1,10 +1,18 @@ "; +if($from_cache){ + $database = CacheDatabase::getInstance(); + $cachedArticle = is_null($database) ? null : $database->getFromCache($article_url); + if(!is_null($cachedArticle)){ + $article_title = $cachedArticle[0]; + $readable_article = $cachedArticle[1]; + $article_images = $cachedArticle[2]; + }else $from_cache = false; } -try { - $readability->parse($article_html); - $readable_article = strip_tags($readability->getContent(), '