From bf4840946c52a5b2ade2549b2054831b1ebbb143 Mon Sep 17 00:00:00 2001 From: hxtree Date: Wed, 10 Jan 2024 05:58:28 +0000 Subject: [PATCH] feat: add meta tags endpoint to html to pdf Signed-off-by: hxtree --- .../rush/browser-approved-packages.json | 4 ++ common/config/rush/pnpm-lock.yaml | 59 +++++++++++++++++++ common/config/rush/repo-state.json | 2 +- services/html-to-pdf/package.json | 3 +- services/html-to-pdf/src/app.module.ts | 3 +- .../module/meta-tags/meta-tags.controller.ts | 16 +++++ .../src/module/meta-tags/meta-tags.module.ts | 9 +++ .../src/module/meta-tags/meta-tags.service.ts | 36 +++++++++++ 8 files changed, 129 insertions(+), 3 deletions(-) create mode 100644 services/html-to-pdf/src/module/meta-tags/meta-tags.controller.ts create mode 100644 services/html-to-pdf/src/module/meta-tags/meta-tags.module.ts create mode 100644 services/html-to-pdf/src/module/meta-tags/meta-tags.service.ts diff --git a/common/config/rush/browser-approved-packages.json b/common/config/rush/browser-approved-packages.json index 1a5cbd7c4..0ea17e03a 100644 --- a/common/config/rush/browser-approved-packages.json +++ b/common/config/rush/browser-approved-packages.json @@ -450,6 +450,10 @@ "name": "bson", "allowedCategories": [ "apis", "libraries" ] }, + { + "name": "cheerio", + "allowedCategories": [ "apis" ] + }, { "name": "chokidar", "allowedCategories": [ "platform", "rigs" ] diff --git a/common/config/rush/pnpm-lock.yaml b/common/config/rush/pnpm-lock.yaml index 086794c1a..535066089 100644 --- a/common/config/rush/pnpm-lock.yaml +++ b/common/config/rush/pnpm-lock.yaml @@ -2286,6 +2286,9 @@ importers: axios: specifier: ^0.21.1 version: 0.21.1 + cheerio: + specifier: ~1.0.0-rc.12 + version: 1.0.0-rc.12 class-transformer: specifier: 0.5.1 version: 0.5.1 @@ -15872,6 +15875,10 @@ packages: fast-safe-stringify: 2.1.1 individual: 3.0.0 + /boolbase@1.0.0: + resolution: {integrity: sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww==} + dev: false + /bootstrap@5.3.2(@popperjs/core@2.11.8): resolution: {integrity: sha512-D32nmNWiQHo94BKHLmOrdjlL05q1c8oxbtBphQFb9Z5to6eGRDCm0QgeaZ4zFBHzfg2++rqa2JkqCcxDy0sH0g==} peerDependencies: @@ -16293,6 +16300,30 @@ packages: engines: {node: '>=12'} dev: false + /cheerio-select@2.1.0: + resolution: {integrity: sha512-9v9kG0LvzrlcungtnJtpGNxY+fzECQKhK4EGJX2vByejiMX84MFNQw4UxPJl3bFbTMw+Dfs37XaIkCwTZfLh4g==} + dependencies: + boolbase: 1.0.0 + css-select: 5.1.0 + css-what: 6.1.0 + domelementtype: 2.3.0 + domhandler: 5.0.3 + domutils: 3.1.0 + dev: false + + /cheerio@1.0.0-rc.12: + resolution: {integrity: sha512-VqR8m68vM46BNnuZ5NtnGBKIE/DfN0cRIzg9n40EIq9NOv90ayxLBXA8fXC5gquFRGJSTRqBq25Jt2ECLR431Q==} + engines: {node: '>= 6'} + dependencies: + cheerio-select: 2.1.0 + dom-serializer: 2.0.0 + domhandler: 5.0.3 + domutils: 3.1.0 + htmlparser2: 8.0.2 + parse5: 7.1.2 + parse5-htmlparser2-tree-adapter: 7.0.0 + dev: false + /chokidar@3.4.3: resolution: {integrity: sha512-DtM3g7juCXQxFVSNPNByEC2+NImtBuxQQvWlHunpJIS5Ocr0lG306cC7FCi7cEA0fzmybPUIl4txBIobk1gGOQ==} engines: {node: '>= 8.10.0'} @@ -16937,6 +16968,21 @@ packages: webpack: 5.89.0(esbuild@0.18.20) dev: true + /css-select@5.1.0: + resolution: {integrity: sha512-nwoRF1rvRRnnCqqY7updORDsuqKzqYJ28+oSMaJMMgOauh3fvwHqMS7EZpIPqK8GL+g9mKxF1vP/ZjSeNjEVHg==} + dependencies: + boolbase: 1.0.0 + css-what: 6.1.0 + domhandler: 5.0.3 + domutils: 3.1.0 + nth-check: 2.1.1 + dev: false + + /css-what@6.1.0: + resolution: {integrity: sha512-HTUrgRJ7r4dsZKU6GjmpfRK1O76h97Z8MfS1G0FozR+oF2kG6Vfe8JE6zwrkbxigziPHinCJ+gCPjA9EaBDtRw==} + engines: {node: '>= 6'} + dev: false + /css.escape@1.5.1: resolution: {integrity: sha512-YUifsXXuknHlUsmlgyY0PKzgPOr7/FjCePfHNt0jxm83wHZi44VDMQ7/fGNkjY3/jV1MC+1CmZbaHzugyeRtpg==} dev: true @@ -23483,6 +23529,12 @@ packages: gauge: 4.0.4 set-blocking: 2.0.0 + /nth-check@2.1.1: + resolution: {integrity: sha512-lqjrjmaOoAnWfMmBPL+XNnynZh2+swxiX3WUE0s4yEHI6m+AwrK2UZOimIRl3X/4QctVqS8AiZjFqyOGrMXb/w==} + dependencies: + boolbase: 1.0.0 + dev: false + /nullthrows@1.1.1: resolution: {integrity: sha512-2vPPEi+Z7WqML2jZYddDIfy5Dqb0r2fze2zTxNNknZaFpVHU3mFB3R+DWeJWGVx0ecvttSGlJTI+WG+8Z4cDWw==} dev: false @@ -23911,6 +23963,13 @@ packages: engines: {node: '>=0.10.0'} dev: false + /parse5-htmlparser2-tree-adapter@7.0.0: + resolution: {integrity: sha512-B77tOZrqqfUfnVcOrUvfdLbz4pu4RopLD/4vmu3HUPswwTA8OH0EMW9BlWR2B0RCoiZRAHEUu7IxeP1Pd1UU+g==} + dependencies: + domhandler: 5.0.3 + parse5: 7.1.2 + dev: false + /parse5@5.1.0: resolution: {integrity: sha512-fxNG2sQjHvlVAYmzBZS9YlDp6PTSSDwa98vkD4QgVDDCAo84z5X1t5XyJQ62ImdLXx5NdIIfihey6xpum9/gRQ==} dev: true diff --git a/common/config/rush/repo-state.json b/common/config/rush/repo-state.json index 0f3778add..2b7b52578 100644 --- a/common/config/rush/repo-state.json +++ b/common/config/rush/repo-state.json @@ -1,5 +1,5 @@ // DO NOT MODIFY THIS FILE MANUALLY BUT DO COMMIT IT. It is generated and used by Rush. { - "pnpmShrinkwrapHash": "ef99d2be9bb5fde2af843e6f081457fe57fa679f", + "pnpmShrinkwrapHash": "0f26d344f9b8b25f5850336a9f316858200a1459", "preferredVersionsHash": "a48003cf229dd47d077bcf6301ac15a6f90e1c34" } diff --git a/services/html-to-pdf/package.json b/services/html-to-pdf/package.json index 4860d10c2..376197313 100644 --- a/services/html-to-pdf/package.json +++ b/services/html-to-pdf/package.json @@ -61,7 +61,8 @@ "@sparticuz/chromium-min": "112.0.0", "puppeteer-core": "19.8.0", "uuid": "~9.0.1", - "axios": "^0.21.1" + "axios": "^0.21.1", + "cheerio": "~1.0.0-rc.12" }, "devDependencies": { "@cats-cradle/eslint-config": "1.0.11", diff --git a/services/html-to-pdf/src/app.module.ts b/services/html-to-pdf/src/app.module.ts index 74d797dbd..e3ef4d5ec 100644 --- a/services/html-to-pdf/src/app.module.ts +++ b/services/html-to-pdf/src/app.module.ts @@ -1,9 +1,10 @@ import { Module } from '@nestjs/common'; import { HealthModule } from './module/health/health.module'; import { PdfModule } from './module/pdf/pdf.module'; +import { MetaTagsModule } from './module/meta-tags/meta-tags.module'; @Module({ - imports: [HealthModule, PdfModule], + imports: [HealthModule, PdfModule, MetaTagsModule], providers: [], exports: [], }) diff --git a/services/html-to-pdf/src/module/meta-tags/meta-tags.controller.ts b/services/html-to-pdf/src/module/meta-tags/meta-tags.controller.ts new file mode 100644 index 000000000..82bdc663a --- /dev/null +++ b/services/html-to-pdf/src/module/meta-tags/meta-tags.controller.ts @@ -0,0 +1,16 @@ +import { + Controller, Get, Param, Query, VERSION_NEUTRAL, +} from '@nestjs/common'; +import { MetaTagsService } from './meta-tags.service'; + +@Controller({ path: 'meta-tags', version: ['1', VERSION_NEUTRAL] }) +export class MetaTagsController { + constructor(private readonly metaTagsService: MetaTagsService) {} + + @Get() + async getMetaTags( + @Query('url') url: string, + ): Promise<{ [key: string]: string }> { + return this.metaTagsService.getMetaTags(url); + } +} diff --git a/services/html-to-pdf/src/module/meta-tags/meta-tags.module.ts b/services/html-to-pdf/src/module/meta-tags/meta-tags.module.ts new file mode 100644 index 000000000..e9a9aa57b --- /dev/null +++ b/services/html-to-pdf/src/module/meta-tags/meta-tags.module.ts @@ -0,0 +1,9 @@ +import { Module } from '@nestjs/common'; +import { MetaTagsController } from './meta-tags.controller'; +import { MetaTagsService } from './meta-tags.service'; + +@Module({ + controllers: [MetaTagsController], + providers: [MetaTagsService], +}) +export class MetaTagsModule {} diff --git a/services/html-to-pdf/src/module/meta-tags/meta-tags.service.ts b/services/html-to-pdf/src/module/meta-tags/meta-tags.service.ts new file mode 100644 index 000000000..623c8ccee --- /dev/null +++ b/services/html-to-pdf/src/module/meta-tags/meta-tags.service.ts @@ -0,0 +1,36 @@ +import { Injectable } from '@nestjs/common'; +import axios from 'axios'; +import * as cheerio from 'cheerio'; + +@Injectable() +export class MetaTagsService { + async getMetaTags(url: string): Promise<{ [key: string]: string }> { + try { + const response = await axios.get(url); + const metaTags = this.extractMetaTags(response.data); + return metaTags; + } catch (err) { + // Handle errors (e.g., network issues, invalid URLs) + const error = err as Error; + console.error('Error fetching or parsing the page:', error.message); + throw new Error('Unable to fetch or parse the page'); + } + } + + private extractMetaTags(html: string): { [key: string]: string } { + const $ = cheerio.load(html); + const metaTags: { [key: string]: string } = {}; + + $('meta').each((_, element) => { + const tag = $(element); + const name = tag.attr('name') || tag.attr('property'); + const content = tag.attr('content'); + + if (name && content) { + metaTags[name] = content; + } + }); + + return metaTags; + } +}