diff --git a/services/html-to-pdf/README.md b/services/html-to-pdf/README.md index 4631e915..9ec2b112 100644 --- a/services/html-to-pdf/README.md +++ b/services/html-to-pdf/README.md @@ -1,19 +1,33 @@ # @org-apis/html-to-pdf -HTMLtoPDF is a service for generating PDFs from HTML. It support generating a -PDF from either a request containing a HTML body or a URL. +HTMLtoPDF is a turn-key microservice that generates PDFs from HTML. It support +both URL and HTML based requests. + +## Usage Example + +```bash +curl -X POST https://nx7uv2rfy4.execute-api.us-east-2.amazonaws.com/default/v1/html-to-pdf/pdf -H "Content-Type: application/json" -d '{"input": "URL", "output": "PDF", "url": "https://google.com"}' -o example.pdf +``` + +![Example Image](./example.png) ## How it Works -PDF are generated using a headless version of Chromium. This form of PDF -rendering supports text recognition, images, hyperlinks, print media queries, -table breaks, and other features all this with relatively little code +PDF are generated using a headless version of Chromium running in a lambda. This +form of PDF rendering supports text recognition, images, hyperlinks, print media +queries, table breaks, and other features all this with relatively little code maintenance. -It can be finicky to get working within a Lambda and API Gateway. Lambda doesn't -include fonts fonts. Chromium should be installed in a Lambda layer. API gateway -can cause blank PDF if it doesn't properly handle binary responses. Serverless -Express also has to be configured to support the binary mime type. +This service address many finicky obstacles with making a request through API +gateway to a Lambda running Chromium to generate a PDF. + +- NodeJS Lambda Layers do include default fonts files (\*.tff) like a standard + OS does. +- Performance is essential and Lambda deploys are small. A compressed version of + Chromium must be deployed independently as Lambda layer and for performance. +- API gateway if not properly configured to handle binary responses can cause a + blank PDF. +- Serverless Express has to be configured to support the binary mime type. ## References diff --git a/services/html-to-pdf/example.png b/services/html-to-pdf/example.png new file mode 100644 index 00000000..00bbe71d Binary files /dev/null and b/services/html-to-pdf/example.png differ diff --git a/services/html-to-pdf/src/module/pdf/create-html-to-pdf.dto.ts b/services/html-to-pdf/src/module/pdf/create-html-to-pdf.dto.ts deleted file mode 100644 index 46ce90ab..00000000 --- a/services/html-to-pdf/src/module/pdf/create-html-to-pdf.dto.ts +++ /dev/null @@ -1,20 +0,0 @@ -import { ApiProperty } from '@nestjs/swagger'; -import { IsFilename, IsString } from '@cats-cradle/validation-schemas'; - -export class CreateHtmlToPdfDto { - @IsString() - @ApiProperty({ - description: 'HTML', - default: 'Hello, World', - type: String, - }) - public html: string; - - @IsFilename() - @ApiProperty({ - description: 'Filename', - default: 'report.pdf', - type: String, - }) - public filename: string; -} diff --git a/services/html-to-pdf/src/module/pdf/create-url-to-pdf.dto.ts b/services/html-to-pdf/src/module/pdf/create-url-to-pdf.dto.ts deleted file mode 100644 index cc7a68c1..00000000 --- a/services/html-to-pdf/src/module/pdf/create-url-to-pdf.dto.ts +++ /dev/null @@ -1,20 +0,0 @@ -import { ApiProperty } from '@nestjs/swagger'; -import { IsFilename, IsUrl } from '@cats-cradle/validation-schemas'; - -export class CreateUrlToPdfDto { - @IsUrl() - @ApiProperty({ - description: 'url', - default: 'https://google.com', - type: String, - }) - public url: string; - - @IsFilename() - @ApiProperty({ - description: 'Filename', - default: 'report.pdf', - type: String, - }) - public filename: string; -} diff --git a/services/html-to-pdf/src/module/pdf/operation.dto.ts b/services/html-to-pdf/src/module/pdf/operation.dto.ts new file mode 100644 index 00000000..d7b67804 --- /dev/null +++ b/services/html-to-pdf/src/module/pdf/operation.dto.ts @@ -0,0 +1,65 @@ +import { + IsUrl, + IsEnum, + IsString, + IsOptional, + IsFilename, +} from '@cats-cradle/validation-schemas'; +import { ApiProperty } from '@nestjs/swagger'; +import { v4 } from 'uuid'; + +export enum OperationInput { + HTML = 'HTML', + URL = 'URL', +} + +export enum OperationOutput { + DATA = 'DATA', + PDF = 'PDF', + JSON = 'JSON', +} + +export class OperationDto { + @IsEnum(OperationInput) + @ApiProperty({ + description: 'input format', + default: OperationInput.HTML, + enum: OperationOutput, + }) + input: OperationInput; + + @IsEnum(OperationOutput) + @ApiProperty({ + description: 'output format', + default: OperationOutput.PDF, + enum: OperationOutput, + }) + output: OperationOutput; + + @IsOptional() + @IsUrl() + @ApiProperty({ + description: 'url', + default: 'https://google.com', + type: String, + }) + url?: string; + + @IsOptional() + @IsString() + @ApiProperty({ + description: 'HTML', + default: + 'ExampleHello, World', + type: String, + }) + content?: string; + + @IsFilename() + @ApiProperty({ + description: 'Filename', + default: `${v4()}.pdf`, + type: String, + }) + filename?: string; +} diff --git a/services/html-to-pdf/src/module/pdf/pdf.controller.ts b/services/html-to-pdf/src/module/pdf/pdf.controller.ts index e5394ab0..70f4e356 100644 --- a/services/html-to-pdf/src/module/pdf/pdf.controller.ts +++ b/services/html-to-pdf/src/module/pdf/pdf.controller.ts @@ -6,73 +6,94 @@ import { Get, VERSION_NEUTRAL, Query, + BadRequestException, + StreamableFile, } from '@nestjs/common'; import { Response } from 'express'; import { v4 } from 'uuid'; import { PdfService } from './pdf.service'; -import { CreateHtmlToPdfDto } from './create-html-to-pdf.dto'; -import { CreateUrlToPdfDto } from './create-url-to-pdf.dto'; +import { OperationDto, OperationInput, OperationOutput } from './operation.dto'; @Controller({ path: 'pdf', version: ['1', VERSION_NEUTRAL] }) export class PdfController { constructor(private readonly pdfService: PdfService) {} - @Get('example-data') - async exampleData(@Res() res: Response) { - const data = await this.pdfService.renderPageData( - 'Demo Page

Demo

', - ); - - res.status(200).send(data); - } - - @Get('example-pdf') - async test(@Res() res: Response, @Query('url') url?: string) { - const buffer = await this.pdfService.renderUrl(url ?? 'http://example.com'); - this.responseAsPdf(false, buffer, res, `${v4()}.pdf`); - } - - @Post('render-url') - async renderUrl(@Res() res: Response, @Body() body: CreateUrlToPdfDto) { - const buffer = await this.pdfService.renderUrl(body.url); - this.responseAsPdf(false, buffer, res, body.filename); + @Get() + async url( + @Res({ passthrough: true }) res: Response, + @Query('url') url?: string, + ) { + const buffer = await this.pdfService.urlToPdf(url ?? 'http://example.com'); + const filename = `${v4()}.pdf`; + res.setHeader('Content-Length', Buffer.byteLength(buffer, 'utf-8')); + res.setHeader('Content-Type', 'application/pdf'); + res.setHeader('Content-Disposition', `attachment; filename=${filename}`); + return new StreamableFile(this.pdfService.createReadableStream(buffer)); } - @Post('render-html-data') - async renderHtmlData(@Res() res: Response, @Body() body: CreateHtmlToPdfDto) { - return this.pdfService.renderPageData(body.html); - } + @Post() + async operation( + @Res({ passthrough: true }) res: Response, + @Body() body: OperationDto, + ): Promise { + let buffer; + const filename = body.filename ?? `${v4()}.pdf`; - @Post('render-html') - async renderHtml(@Res() res: Response, @Body() body: CreateHtmlToPdfDto) { - const buffer = await this.pdfService.renderHtml(body.html); - this.responseAsPdf(false, buffer, res, body.filename); - } - - private responseAsPdf( - json: boolean, - buffer: Buffer, - res: Response, - filename: string, - ) { - if (!json) { - const stream = this.pdfService.createReadableStream(buffer); + try { + switch (true) { + case body.input === OperationInput.HTML + && body.output === OperationOutput.DATA: + return await this.pdfService.htmlToData(body.content ?? ''); + case body.input === OperationInput.HTML + && body.output === OperationOutput.JSON: + buffer = await this.pdfService.htmlToPdf(body.content ?? ''); + return { + content: buffer.toString('base64'), + filename: body.filename ?? `${v4()}.pdf`, + mimeType: 'application/pdf', + }; + case body.input === OperationInput.HTML + && body.output === OperationOutput.PDF: + buffer = await this.pdfService.htmlToPdf(body.content ?? ''); + res.setHeader('Content-Length', Buffer.byteLength(buffer, 'utf-8')); + res.setHeader('Content-Type', 'application/pdf'); + res.setHeader( + 'Content-Disposition', + `attachment; filename=${filename}`, + ); + return new StreamableFile( + this.pdfService.createReadableStream(buffer), + ); - res.setHeader('Content-Length', Buffer.byteLength(buffer, 'utf-8')); - res.setHeader('Content-Type', 'application/pdf'); - res.setHeader('Content-Disposition', `attachment; filename=${filename}`); - stream.pipe(res); - } else { - /** - * base64 can be responses can be checked using the following - * https://base64.guru/converter/decode/pdf - */ - res.setHeader('Content-Type', 'application/json;charset=UTF-8'); - res.status(200).send({ - content: buffer.toString('base64'), - filename, - mimeType: 'application/pdf', - }); + case body.input === OperationInput.URL + && body.output === OperationOutput.DATA: + return await this.pdfService.urlToData(body.url ?? ''); + case body.input === OperationInput.URL + && body.output === OperationOutput.JSON: + buffer = await this.pdfService.urlToPdf(body.url ?? ''); + return { + content: buffer.toString('base64'), + filename: body.filename ?? `${v4()}.pdf`, + mimeType: 'application/pdf', + }; + case body.input === OperationInput.URL + && body.output === OperationOutput.PDF: + buffer = await this.pdfService.urlToPdf(body.url ?? ''); + res.setHeader('Content-Length', Buffer.byteLength(buffer, 'utf-8')); + res.setHeader('Content-Type', 'application/pdf'); + res.setHeader( + 'Content-Disposition', + `attachment; filename=${filename}`, + ); + return new StreamableFile( + this.pdfService.createReadableStream(buffer), + ); + default: + return new BadRequestException('Invalid request'); + } + } catch (err) { + const error = err as Error; + return new BadRequestException(`Failed to render pdf: ${error.message}`); } } } diff --git a/services/html-to-pdf/src/module/pdf/pdf.e2e-spec.ts b/services/html-to-pdf/src/module/pdf/pdf.e2e-spec.ts index 1a3f3bf4..1f8c3369 100644 --- a/services/html-to-pdf/src/module/pdf/pdf.e2e-spec.ts +++ b/services/html-to-pdf/src/module/pdf/pdf.e2e-spec.ts @@ -1,22 +1,26 @@ import supertest from 'supertest'; import { Test, TestingModule } from '@nestjs/testing'; -import { INestApplication } from '@nestjs/common'; +import { INestApplication, Injectable } from '@nestjs/common'; import { FakerFactory } from '@cats-cradle/faker-factory'; -import { PdfModule } from './pdf.module'; -import { UrlToDataDto } from './url-to-data.dto'; +import { OperationInput, OperationOutput } from './operation.dto'; +import { PdfService } from './pdf.service'; +import { PdfController } from './pdf.controller'; describe('/pdf', () => { let app: INestApplication; + let pdfService: PdfService; beforeAll(async () => { const moduleRef: TestingModule = await Test.createTestingModule({ - imports: [PdfModule], - providers: [], - controllers: [], + imports: [], + controllers: [PdfController], + providers: [PdfService], }).compile(); app = moduleRef.createNestApplication(); + pdfService = moduleRef.get(PdfService); + await app.init(); }); @@ -24,20 +28,150 @@ describe('/pdf', () => { app.close(); }); - describe('POST /pdf/render-html-data', () => { - it.skip('should render html page', async () => { - const result = await supertest(app.getHttpServer()) - .post('/pdf/render-html-data') + describe('GET /pdf', () => { + it('should render url page to pdf', async () => { + jest.spyOn(pdfService, 'urlToPdf').mockImplementation((url: string) => Promise.resolve(Buffer.from('Test', 'utf-8'))); + + const response = await supertest(app.getHttpServer()).get('/pdf'); + // .expect(200); + + expect(response.header['content-type']).toEqual('application/pdf'); + expect(response.body).toEqual(Buffer.from('Test', 'utf-8')); + }); + }); + + describe('POST /pdf', () => { + it('should render url page to pdf', async () => { + jest.spyOn(pdfService, 'urlToPdf').mockImplementation((url: string) => Promise.resolve(Buffer.from('Test', 'utf-8'))); + + const response = await supertest(app.getHttpServer()) + .post('/pdf') + .send({ + input: OperationInput.URL, + output: OperationOutput.PDF, + url: 'http://example.com', + }) + .expect(201); + + expect(response.header['content-type']).toEqual('application/pdf'); + expect(response.body).toEqual(Buffer.from('Test', 'utf-8')); + }); + + it('should render url page to json', async () => { + jest.spyOn(pdfService, 'urlToPdf').mockImplementation((url: string) => Promise.resolve(Buffer.from('Test', 'utf-8'))); + + const response = await supertest(app.getHttpServer()) + .post('/pdf') + .send({ + input: OperationInput.URL, + output: OperationOutput.JSON, + url: 'http://example.com', + }) + .expect(201); + + expect(response.header['content-type']).toEqual( + 'application/json; charset=utf-8', + ); + expect(response.body).toEqual( + expect.objectContaining({ + content: 'VGVzdA==', + filename: expect.stringContaining('.pdf'), + mimeType: 'application/pdf', + }), + ); + }); + + it('should render url page to data', async () => { + jest.spyOn(pdfService, 'urlToData').mockImplementation((url: string) => Promise.resolve({ + title: 'Example Domain', + })); + + const response = await supertest(app.getHttpServer()) + .post('/pdf') + .send({ + input: OperationInput.URL, + output: OperationOutput.DATA, + url: 'https://example.com', + }) + .expect(201); + + expect(response.header['content-type']).toEqual( + 'application/json; charset=utf-8', + ); + expect(response.body).toEqual( + expect.objectContaining({ + title: 'Example Domain', + }), + ); + }); + + it('should render url page to pdf', async () => { + jest.spyOn(pdfService, 'htmlToPdf').mockImplementation((url: string) => Promise.resolve(Buffer.from('Test', 'utf-8'))); + + const response = await supertest(app.getHttpServer()) + .post('/pdf') + .send({ + input: OperationInput.HTML, + output: OperationOutput.PDF, + content: + 'Example PageExample', + }) + .expect(201); + + expect(response.header['content-type']).toEqual('application/pdf'); + expect(response.body).toEqual(Buffer.from('Test', 'utf-8')); + }); + + it('should render url page to json', async () => { + jest.spyOn(pdfService, 'htmlToPdf').mockImplementation((url: string) => Promise.resolve(Buffer.from('Test', 'utf-8'))); + + const response = await supertest(app.getHttpServer()) + .post('/pdf') .send({ - html: 'Example PageExample', + input: OperationInput.HTML, + output: OperationOutput.JSON, + content: + 'Example PageExample', }) .expect(201); - expect(result.body).toEqual( + expect(response.header['content-type']).toEqual( + 'application/json; charset=utf-8', + ); + expect(response.body).toEqual( + expect.objectContaining({ + content: 'VGVzdA==', + filename: expect.stringContaining('.pdf'), + mimeType: 'application/pdf', + }), + ); + }); + + it('should render html page to data', async () => { + jest + .spyOn(pdfService, 'htmlToData') + .mockImplementation((html: string) => Promise.resolve({ + title: 'Example Page', + })); + + const response = await supertest(app.getHttpServer()) + .post('/pdf') + .send({ + input: OperationInput.HTML, + output: OperationOutput.DATA, + content: + 'Example PageExample', + }) + .expect(201); + + expect(response.header['content-type']).toEqual( + 'application/json; charset=utf-8', + ); + expect(response.body).toEqual( expect.objectContaining({ title: 'Example Page', }), ); - }, 15000); + }); }); }); diff --git a/services/html-to-pdf/src/module/pdf/pdf.service.ts b/services/html-to-pdf/src/module/pdf/pdf.service.ts index 6a4cfc6c..9edb7e84 100644 --- a/services/html-to-pdf/src/module/pdf/pdf.service.ts +++ b/services/html-to-pdf/src/module/pdf/pdf.service.ts @@ -1,74 +1,67 @@ /* eslint @typescript-eslint/no-var-requires: "off" */ -import { BadRequestException, Injectable } from '@nestjs/common'; +import { Injectable } from '@nestjs/common'; import { Readable } from 'stream'; - -const puppeteer = require('puppeteer-core'); -const chromium = require('@sparticuz/chromium-min'); +import puppeteer from 'puppeteer-core'; +import chromium from '@sparticuz/chromium-min'; @Injectable() export class PdfService { - async renderHtml(html: string): Promise { - try { - const browser = await this.getBrowser(); - const page = await browser.newPage(); - - await page.setContent(html, { - waitUntil: ['networkidle0', 'domcontentloaded'], - }); - - const buffer = await page.pdf({ format: 'a4', printBackground: true }); - - await browser.close(); - return buffer; - } catch (err) { - const error = err as Error; - return new BadRequestException(`Failed to render pdf: ${error.message}`); - } + async htmlToPdf(html: string): Promise { + const browser = await this.getBrowser(); + const page = await browser.newPage(); + + await page.setContent(html, { + waitUntil: ['networkidle0', 'domcontentloaded'], + }); + + const buffer = await page.pdf({ format: 'a4', printBackground: true }); + + await browser.close(); + return buffer; + } + + async urlToPdf(url: string) { + const browser = await this.getBrowser(); + const page = await browser.newPage(); + await page.goto(url, { waitUntil: ['networkidle2', 'domcontentloaded'] }); + + const buffer = await page.pdf({ + format: 'A4', + landscape: false, + printBackground: true, + margin: { top: '30px' }, + scale: 0.98, + }); + + await browser.close(); + + return buffer; } - async renderUrl(url: string) { - try { - const browser = await this.getBrowser(); - const page = await browser.newPage(); - await page.goto(url, { waitUntil: ['networkidle2', 'domcontentloaded'] }); - - const buffer = await page.pdf({ - format: 'A4', - landscape: false, - printBackground: true, - margin: { top: '30px' }, - scale: 0.98, - }); - - await browser.close(); - - return buffer; - } catch (err) { - const error = err as Error; - return new BadRequestException(`Failed to render pdf: ${error.message}`); - } + async htmlToData(html: string) { + const browser = await this.getBrowser(); + const page = await browser.newPage(); + await page.setContent(html, { + waitUntil: ['networkidle0', 'domcontentloaded'], + }); + const data = { + title: (await page.title()) ?? 'undefined', + }; + + await browser.close(); + return data; } - async renderPageData(html: string) { - try { - const browser = await this.getBrowser(); - const page = await browser.newPage(); - await page.setContent(html, { - waitUntil: ['networkidle0', 'domcontentloaded'], - }); - const data = { - title: (await page.title()) ?? 'undefined', - mimeType: page.mimeType, - filename: page.filename, - charset: page.charset, - }; - - await browser.close(); - return data; - } catch (err) { - const error = err as Error; - return new BadRequestException(`Failed to render pdf: ${error.message}`); - } + async urlToData(url: string) { + const browser = await this.getBrowser(); + const page = await browser.newPage(); + await page.goto(url, { waitUntil: ['networkidle2', 'domcontentloaded'] }); + const data = { + title: (await page.title()) ?? 'undefined', + }; + + await browser.close(); + return data; } private async getBrowser() { @@ -76,9 +69,6 @@ export class PdfService { ? '/opt/nodejs/node_modules/@sparticuz/chromium/bin' : undefined; - chromium.setHeadlessMode = true; - chromium.setGraphicsMode = true; - await chromium.font( 'http://themes.googleusercontent.com/static/fonts/opensans/v6/cJZKeOuBrn4kERxqtaUH3aCWcynf_cDxXwCLxiixG1c.ttf', ); diff --git a/services/html-to-pdf/src/module/pdf/url-to-data.dto.ts b/services/html-to-pdf/src/module/pdf/url-to-data.dto.ts deleted file mode 100644 index c9bebac4..00000000 --- a/services/html-to-pdf/src/module/pdf/url-to-data.dto.ts +++ /dev/null @@ -1,12 +0,0 @@ -import { IsUrl } from '@cats-cradle/validation-schemas'; -import { ApiProperty } from '@nestjs/swagger'; - -export class UrlToDataDto { - @IsUrl() - @ApiProperty({ - description: 'url', - default: 'https://example.com', - type: String, - }) - public url: string; -} diff --git a/services/html-to-pdf/stacks/main-stack.ts b/services/html-to-pdf/stacks/main-stack.ts index e10bb782..c872a06a 100644 --- a/services/html-to-pdf/stacks/main-stack.ts +++ b/services/html-to-pdf/stacks/main-stack.ts @@ -29,7 +29,7 @@ export class HtmlToPdfStack extends cdk.Stack { }); new cdk.CfnOutput(this, 'test endpoint', { - value: `${microservice.getBaseUrl()}/pdf/example-pdf`, + value: `${microservice.getBaseUrl()}/pdf?url=https://google.com`, }); } }