Skip to content

Commit

Permalink
feat: add meta tags endpoint to html to pdf (#789)
Browse files Browse the repository at this point in the history
Signed-off-by: hxtree <[email protected]>
  • Loading branch information
hxtree authored Jan 10, 2024
1 parent 6b39a90 commit f787227
Show file tree
Hide file tree
Showing 8 changed files with 129 additions and 3 deletions.
4 changes: 4 additions & 0 deletions common/config/rush/browser-approved-packages.json
Original file line number Diff line number Diff line change
Expand Up @@ -450,6 +450,10 @@
"name": "bson",
"allowedCategories": [ "apis", "libraries" ]
},
{
"name": "cheerio",
"allowedCategories": [ "apis" ]
},
{
"name": "chokidar",
"allowedCategories": [ "platform", "rigs" ]
Expand Down
59 changes: 59 additions & 0 deletions common/config/rush/pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion common/config/rush/repo-state.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
// DO NOT MODIFY THIS FILE MANUALLY BUT DO COMMIT IT. It is generated and used by Rush.
{
"pnpmShrinkwrapHash": "ef99d2be9bb5fde2af843e6f081457fe57fa679f",
"pnpmShrinkwrapHash": "0f26d344f9b8b25f5850336a9f316858200a1459",
"preferredVersionsHash": "a48003cf229dd47d077bcf6301ac15a6f90e1c34"
}
3 changes: 2 additions & 1 deletion services/html-to-pdf/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,8 @@
"@sparticuz/chromium-min": "112.0.0",
"puppeteer-core": "19.8.0",
"uuid": "~9.0.1",
"axios": "^0.21.1"
"axios": "^0.21.1",
"cheerio": "~1.0.0-rc.12"
},
"devDependencies": {
"@cats-cradle/eslint-config": "1.0.11",
Expand Down
3 changes: 2 additions & 1 deletion services/html-to-pdf/src/app.module.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import { Module } from '@nestjs/common';
import { HealthModule } from './module/health/health.module';
import { PdfModule } from './module/pdf/pdf.module';
import { MetaTagsModule } from './module/meta-tags/meta-tags.module';

@Module({
imports: [HealthModule, PdfModule],
imports: [HealthModule, PdfModule, MetaTagsModule],
providers: [],
exports: [],
})
Expand Down
16 changes: 16 additions & 0 deletions services/html-to-pdf/src/module/meta-tags/meta-tags.controller.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import {
Controller, Get, Param, Query, VERSION_NEUTRAL,
} from '@nestjs/common';
import { MetaTagsService } from './meta-tags.service';

@Controller({ path: 'meta-tags', version: ['1', VERSION_NEUTRAL] })
export class MetaTagsController {
constructor(private readonly metaTagsService: MetaTagsService) {}

@Get()
async getMetaTags(
@Query('url') url: string,
): Promise<{ [key: string]: string }> {
return this.metaTagsService.getMetaTags(url);
}
}
9 changes: 9 additions & 0 deletions services/html-to-pdf/src/module/meta-tags/meta-tags.module.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import { Module } from '@nestjs/common';
import { MetaTagsController } from './meta-tags.controller';
import { MetaTagsService } from './meta-tags.service';

@Module({
controllers: [MetaTagsController],
providers: [MetaTagsService],
})
export class MetaTagsModule {}
36 changes: 36 additions & 0 deletions services/html-to-pdf/src/module/meta-tags/meta-tags.service.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import { Injectable } from '@nestjs/common';
import axios from 'axios';
import * as cheerio from 'cheerio';

@Injectable()
export class MetaTagsService {
async getMetaTags(url: string): Promise<{ [key: string]: string }> {
try {
const response = await axios.get(url);
const metaTags = this.extractMetaTags(response.data);
return metaTags;
} catch (err) {
// Handle errors (e.g., network issues, invalid URLs)
const error = err as Error;
console.error('Error fetching or parsing the page:', error.message);
throw new Error('Unable to fetch or parse the page');
}
}

private extractMetaTags(html: string): { [key: string]: string } {
const $ = cheerio.load(html);
const metaTags: { [key: string]: string } = {};

$('meta').each((_, element) => {
const tag = $(element);
const name = tag.attr('name') || tag.attr('property');
const content = tag.attr('content');

if (name && content) {
metaTags[name] = content;
}
});

return metaTags;
}
}

0 comments on commit f787227

Please sign in to comment.