diff --git a/services/html-to-pdf/README.md b/services/html-to-pdf/README.md
index 4631e915..9ec2b112 100644
--- a/services/html-to-pdf/README.md
+++ b/services/html-to-pdf/README.md
@@ -1,19 +1,33 @@
# @org-apis/html-to-pdf
-HTMLtoPDF is a service for generating PDFs from HTML. It support generating a
-PDF from either a request containing a HTML body or a URL.
+HTMLtoPDF is a turn-key microservice that generates PDFs from HTML. It support
+both URL and HTML based requests.
+
+## Usage Example
+
+```bash
+curl -X POST https://nx7uv2rfy4.execute-api.us-east-2.amazonaws.com/default/v1/html-to-pdf/pdf -H "Content-Type: application/json" -d '{"input": "URL", "output": "PDF", "url": "https://google.com"}' -o example.pdf
+```
+
+![Example Image](./example.png)
## How it Works
-PDF are generated using a headless version of Chromium. This form of PDF
-rendering supports text recognition, images, hyperlinks, print media queries,
-table breaks, and other features all this with relatively little code
+PDF are generated using a headless version of Chromium running in a lambda. This
+form of PDF rendering supports text recognition, images, hyperlinks, print media
+queries, table breaks, and other features all this with relatively little code
maintenance.
-It can be finicky to get working within a Lambda and API Gateway. Lambda doesn't
-include fonts fonts. Chromium should be installed in a Lambda layer. API gateway
-can cause blank PDF if it doesn't properly handle binary responses. Serverless
-Express also has to be configured to support the binary mime type.
+This service address many finicky obstacles with making a request through API
+gateway to a Lambda running Chromium to generate a PDF.
+
+- NodeJS Lambda Layers do include default fonts files (\*.tff) like a standard
+ OS does.
+- Performance is essential and Lambda deploys are small. A compressed version of
+ Chromium must be deployed independently as Lambda layer and for performance.
+- API gateway if not properly configured to handle binary responses can cause a
+ blank PDF.
+- Serverless Express has to be configured to support the binary mime type.
## References
diff --git a/services/html-to-pdf/example.png b/services/html-to-pdf/example.png
new file mode 100644
index 00000000..00bbe71d
Binary files /dev/null and b/services/html-to-pdf/example.png differ
diff --git a/services/html-to-pdf/src/module/pdf/create-html-to-pdf.dto.ts b/services/html-to-pdf/src/module/pdf/create-html-to-pdf.dto.ts
deleted file mode 100644
index 46ce90ab..00000000
--- a/services/html-to-pdf/src/module/pdf/create-html-to-pdf.dto.ts
+++ /dev/null
@@ -1,20 +0,0 @@
-import { ApiProperty } from '@nestjs/swagger';
-import { IsFilename, IsString } from '@cats-cradle/validation-schemas';
-
-export class CreateHtmlToPdfDto {
- @IsString()
- @ApiProperty({
- description: 'HTML',
- default: '
Hello, World',
- type: String,
- })
- public html: string;
-
- @IsFilename()
- @ApiProperty({
- description: 'Filename',
- default: 'report.pdf',
- type: String,
- })
- public filename: string;
-}
diff --git a/services/html-to-pdf/src/module/pdf/create-url-to-pdf.dto.ts b/services/html-to-pdf/src/module/pdf/create-url-to-pdf.dto.ts
deleted file mode 100644
index cc7a68c1..00000000
--- a/services/html-to-pdf/src/module/pdf/create-url-to-pdf.dto.ts
+++ /dev/null
@@ -1,20 +0,0 @@
-import { ApiProperty } from '@nestjs/swagger';
-import { IsFilename, IsUrl } from '@cats-cradle/validation-schemas';
-
-export class CreateUrlToPdfDto {
- @IsUrl()
- @ApiProperty({
- description: 'url',
- default: 'https://google.com',
- type: String,
- })
- public url: string;
-
- @IsFilename()
- @ApiProperty({
- description: 'Filename',
- default: 'report.pdf',
- type: String,
- })
- public filename: string;
-}
diff --git a/services/html-to-pdf/src/module/pdf/operation.dto.ts b/services/html-to-pdf/src/module/pdf/operation.dto.ts
new file mode 100644
index 00000000..d7b67804
--- /dev/null
+++ b/services/html-to-pdf/src/module/pdf/operation.dto.ts
@@ -0,0 +1,65 @@
+import {
+ IsUrl,
+ IsEnum,
+ IsString,
+ IsOptional,
+ IsFilename,
+} from '@cats-cradle/validation-schemas';
+import { ApiProperty } from '@nestjs/swagger';
+import { v4 } from 'uuid';
+
+export enum OperationInput {
+ HTML = 'HTML',
+ URL = 'URL',
+}
+
+export enum OperationOutput {
+ DATA = 'DATA',
+ PDF = 'PDF',
+ JSON = 'JSON',
+}
+
+export class OperationDto {
+ @IsEnum(OperationInput)
+ @ApiProperty({
+ description: 'input format',
+ default: OperationInput.HTML,
+ enum: OperationOutput,
+ })
+ input: OperationInput;
+
+ @IsEnum(OperationOutput)
+ @ApiProperty({
+ description: 'output format',
+ default: OperationOutput.PDF,
+ enum: OperationOutput,
+ })
+ output: OperationOutput;
+
+ @IsOptional()
+ @IsUrl()
+ @ApiProperty({
+ description: 'url',
+ default: 'https://google.com',
+ type: String,
+ })
+ url?: string;
+
+ @IsOptional()
+ @IsString()
+ @ApiProperty({
+ description: 'HTML',
+ default:
+ 'ExampleHello, World',
+ type: String,
+ })
+ content?: string;
+
+ @IsFilename()
+ @ApiProperty({
+ description: 'Filename',
+ default: `${v4()}.pdf`,
+ type: String,
+ })
+ filename?: string;
+}
diff --git a/services/html-to-pdf/src/module/pdf/pdf.controller.ts b/services/html-to-pdf/src/module/pdf/pdf.controller.ts
index e5394ab0..70f4e356 100644
--- a/services/html-to-pdf/src/module/pdf/pdf.controller.ts
+++ b/services/html-to-pdf/src/module/pdf/pdf.controller.ts
@@ -6,73 +6,94 @@ import {
Get,
VERSION_NEUTRAL,
Query,
+ BadRequestException,
+ StreamableFile,
} from '@nestjs/common';
import { Response } from 'express';
import { v4 } from 'uuid';
import { PdfService } from './pdf.service';
-import { CreateHtmlToPdfDto } from './create-html-to-pdf.dto';
-import { CreateUrlToPdfDto } from './create-url-to-pdf.dto';
+import { OperationDto, OperationInput, OperationOutput } from './operation.dto';
@Controller({ path: 'pdf', version: ['1', VERSION_NEUTRAL] })
export class PdfController {
constructor(private readonly pdfService: PdfService) {}
- @Get('example-data')
- async exampleData(@Res() res: Response) {
- const data = await this.pdfService.renderPageData(
- 'Demo PageDemo
',
- );
-
- res.status(200).send(data);
- }
-
- @Get('example-pdf')
- async test(@Res() res: Response, @Query('url') url?: string) {
- const buffer = await this.pdfService.renderUrl(url ?? 'http://example.com');
- this.responseAsPdf(false, buffer, res, `${v4()}.pdf`);
- }
-
- @Post('render-url')
- async renderUrl(@Res() res: Response, @Body() body: CreateUrlToPdfDto) {
- const buffer = await this.pdfService.renderUrl(body.url);
- this.responseAsPdf(false, buffer, res, body.filename);
+ @Get()
+ async url(
+ @Res({ passthrough: true }) res: Response,
+ @Query('url') url?: string,
+ ) {
+ const buffer = await this.pdfService.urlToPdf(url ?? 'http://example.com');
+ const filename = `${v4()}.pdf`;
+ res.setHeader('Content-Length', Buffer.byteLength(buffer, 'utf-8'));
+ res.setHeader('Content-Type', 'application/pdf');
+ res.setHeader('Content-Disposition', `attachment; filename=${filename}`);
+ return new StreamableFile(this.pdfService.createReadableStream(buffer));
}
- @Post('render-html-data')
- async renderHtmlData(@Res() res: Response, @Body() body: CreateHtmlToPdfDto) {
- return this.pdfService.renderPageData(body.html);
- }
+ @Post()
+ async operation(
+ @Res({ passthrough: true }) res: Response,
+ @Body() body: OperationDto,
+ ): Promise {
+ let buffer;
+ const filename = body.filename ?? `${v4()}.pdf`;
- @Post('render-html')
- async renderHtml(@Res() res: Response, @Body() body: CreateHtmlToPdfDto) {
- const buffer = await this.pdfService.renderHtml(body.html);
- this.responseAsPdf(false, buffer, res, body.filename);
- }
-
- private responseAsPdf(
- json: boolean,
- buffer: Buffer,
- res: Response,
- filename: string,
- ) {
- if (!json) {
- const stream = this.pdfService.createReadableStream(buffer);
+ try {
+ switch (true) {
+ case body.input === OperationInput.HTML
+ && body.output === OperationOutput.DATA:
+ return await this.pdfService.htmlToData(body.content ?? '');
+ case body.input === OperationInput.HTML
+ && body.output === OperationOutput.JSON:
+ buffer = await this.pdfService.htmlToPdf(body.content ?? '');
+ return {
+ content: buffer.toString('base64'),
+ filename: body.filename ?? `${v4()}.pdf`,
+ mimeType: 'application/pdf',
+ };
+ case body.input === OperationInput.HTML
+ && body.output === OperationOutput.PDF:
+ buffer = await this.pdfService.htmlToPdf(body.content ?? '');
+ res.setHeader('Content-Length', Buffer.byteLength(buffer, 'utf-8'));
+ res.setHeader('Content-Type', 'application/pdf');
+ res.setHeader(
+ 'Content-Disposition',
+ `attachment; filename=${filename}`,
+ );
+ return new StreamableFile(
+ this.pdfService.createReadableStream(buffer),
+ );
- res.setHeader('Content-Length', Buffer.byteLength(buffer, 'utf-8'));
- res.setHeader('Content-Type', 'application/pdf');
- res.setHeader('Content-Disposition', `attachment; filename=${filename}`);
- stream.pipe(res);
- } else {
- /**
- * base64 can be responses can be checked using the following
- * https://base64.guru/converter/decode/pdf
- */
- res.setHeader('Content-Type', 'application/json;charset=UTF-8');
- res.status(200).send({
- content: buffer.toString('base64'),
- filename,
- mimeType: 'application/pdf',
- });
+ case body.input === OperationInput.URL
+ && body.output === OperationOutput.DATA:
+ return await this.pdfService.urlToData(body.url ?? '');
+ case body.input === OperationInput.URL
+ && body.output === OperationOutput.JSON:
+ buffer = await this.pdfService.urlToPdf(body.url ?? '');
+ return {
+ content: buffer.toString('base64'),
+ filename: body.filename ?? `${v4()}.pdf`,
+ mimeType: 'application/pdf',
+ };
+ case body.input === OperationInput.URL
+ && body.output === OperationOutput.PDF:
+ buffer = await this.pdfService.urlToPdf(body.url ?? '');
+ res.setHeader('Content-Length', Buffer.byteLength(buffer, 'utf-8'));
+ res.setHeader('Content-Type', 'application/pdf');
+ res.setHeader(
+ 'Content-Disposition',
+ `attachment; filename=${filename}`,
+ );
+ return new StreamableFile(
+ this.pdfService.createReadableStream(buffer),
+ );
+ default:
+ return new BadRequestException('Invalid request');
+ }
+ } catch (err) {
+ const error = err as Error;
+ return new BadRequestException(`Failed to render pdf: ${error.message}`);
}
}
}
diff --git a/services/html-to-pdf/src/module/pdf/pdf.e2e-spec.ts b/services/html-to-pdf/src/module/pdf/pdf.e2e-spec.ts
index 1a3f3bf4..1f8c3369 100644
--- a/services/html-to-pdf/src/module/pdf/pdf.e2e-spec.ts
+++ b/services/html-to-pdf/src/module/pdf/pdf.e2e-spec.ts
@@ -1,22 +1,26 @@
import supertest from 'supertest';
import { Test, TestingModule } from '@nestjs/testing';
-import { INestApplication } from '@nestjs/common';
+import { INestApplication, Injectable } from '@nestjs/common';
import { FakerFactory } from '@cats-cradle/faker-factory';
-import { PdfModule } from './pdf.module';
-import { UrlToDataDto } from './url-to-data.dto';
+import { OperationInput, OperationOutput } from './operation.dto';
+import { PdfService } from './pdf.service';
+import { PdfController } from './pdf.controller';
describe('/pdf', () => {
let app: INestApplication;
+ let pdfService: PdfService;
beforeAll(async () => {
const moduleRef: TestingModule = await Test.createTestingModule({
- imports: [PdfModule],
- providers: [],
- controllers: [],
+ imports: [],
+ controllers: [PdfController],
+ providers: [PdfService],
}).compile();
app = moduleRef.createNestApplication();
+ pdfService = moduleRef.get(PdfService);
+
await app.init();
});
@@ -24,20 +28,150 @@ describe('/pdf', () => {
app.close();
});
- describe('POST /pdf/render-html-data', () => {
- it.skip('should render html page', async () => {
- const result = await supertest(app.getHttpServer())
- .post('/pdf/render-html-data')
+ describe('GET /pdf', () => {
+ it('should render url page to pdf', async () => {
+ jest.spyOn(pdfService, 'urlToPdf').mockImplementation((url: string) => Promise.resolve(Buffer.from('Test', 'utf-8')));
+
+ const response = await supertest(app.getHttpServer()).get('/pdf');
+ // .expect(200);
+
+ expect(response.header['content-type']).toEqual('application/pdf');
+ expect(response.body).toEqual(Buffer.from('Test', 'utf-8'));
+ });
+ });
+
+ describe('POST /pdf', () => {
+ it('should render url page to pdf', async () => {
+ jest.spyOn(pdfService, 'urlToPdf').mockImplementation((url: string) => Promise.resolve(Buffer.from('Test', 'utf-8')));
+
+ const response = await supertest(app.getHttpServer())
+ .post('/pdf')
+ .send({
+ input: OperationInput.URL,
+ output: OperationOutput.PDF,
+ url: 'http://example.com',
+ })
+ .expect(201);
+
+ expect(response.header['content-type']).toEqual('application/pdf');
+ expect(response.body).toEqual(Buffer.from('Test', 'utf-8'));
+ });
+
+ it('should render url page to json', async () => {
+ jest.spyOn(pdfService, 'urlToPdf').mockImplementation((url: string) => Promise.resolve(Buffer.from('Test', 'utf-8')));
+
+ const response = await supertest(app.getHttpServer())
+ .post('/pdf')
+ .send({
+ input: OperationInput.URL,
+ output: OperationOutput.JSON,
+ url: 'http://example.com',
+ })
+ .expect(201);
+
+ expect(response.header['content-type']).toEqual(
+ 'application/json; charset=utf-8',
+ );
+ expect(response.body).toEqual(
+ expect.objectContaining({
+ content: 'VGVzdA==',
+ filename: expect.stringContaining('.pdf'),
+ mimeType: 'application/pdf',
+ }),
+ );
+ });
+
+ it('should render url page to data', async () => {
+ jest.spyOn(pdfService, 'urlToData').mockImplementation((url: string) => Promise.resolve({
+ title: 'Example Domain',
+ }));
+
+ const response = await supertest(app.getHttpServer())
+ .post('/pdf')
+ .send({
+ input: OperationInput.URL,
+ output: OperationOutput.DATA,
+ url: 'https://example.com',
+ })
+ .expect(201);
+
+ expect(response.header['content-type']).toEqual(
+ 'application/json; charset=utf-8',
+ );
+ expect(response.body).toEqual(
+ expect.objectContaining({
+ title: 'Example Domain',
+ }),
+ );
+ });
+
+ it('should render url page to pdf', async () => {
+ jest.spyOn(pdfService, 'htmlToPdf').mockImplementation((url: string) => Promise.resolve(Buffer.from('Test', 'utf-8')));
+
+ const response = await supertest(app.getHttpServer())
+ .post('/pdf')
+ .send({
+ input: OperationInput.HTML,
+ output: OperationOutput.PDF,
+ content:
+ 'Example PageExample',
+ })
+ .expect(201);
+
+ expect(response.header['content-type']).toEqual('application/pdf');
+ expect(response.body).toEqual(Buffer.from('Test', 'utf-8'));
+ });
+
+ it('should render url page to json', async () => {
+ jest.spyOn(pdfService, 'htmlToPdf').mockImplementation((url: string) => Promise.resolve(Buffer.from('Test', 'utf-8')));
+
+ const response = await supertest(app.getHttpServer())
+ .post('/pdf')
.send({
- html: 'Example PageExample',
+ input: OperationInput.HTML,
+ output: OperationOutput.JSON,
+ content:
+ 'Example PageExample',
})
.expect(201);
- expect(result.body).toEqual(
+ expect(response.header['content-type']).toEqual(
+ 'application/json; charset=utf-8',
+ );
+ expect(response.body).toEqual(
+ expect.objectContaining({
+ content: 'VGVzdA==',
+ filename: expect.stringContaining('.pdf'),
+ mimeType: 'application/pdf',
+ }),
+ );
+ });
+
+ it('should render html page to data', async () => {
+ jest
+ .spyOn(pdfService, 'htmlToData')
+ .mockImplementation((html: string) => Promise.resolve({
+ title: 'Example Page',
+ }));
+
+ const response = await supertest(app.getHttpServer())
+ .post('/pdf')
+ .send({
+ input: OperationInput.HTML,
+ output: OperationOutput.DATA,
+ content:
+ 'Example PageExample',
+ })
+ .expect(201);
+
+ expect(response.header['content-type']).toEqual(
+ 'application/json; charset=utf-8',
+ );
+ expect(response.body).toEqual(
expect.objectContaining({
title: 'Example Page',
}),
);
- }, 15000);
+ });
});
});
diff --git a/services/html-to-pdf/src/module/pdf/pdf.service.ts b/services/html-to-pdf/src/module/pdf/pdf.service.ts
index 6a4cfc6c..9edb7e84 100644
--- a/services/html-to-pdf/src/module/pdf/pdf.service.ts
+++ b/services/html-to-pdf/src/module/pdf/pdf.service.ts
@@ -1,74 +1,67 @@
/* eslint @typescript-eslint/no-var-requires: "off" */
-import { BadRequestException, Injectable } from '@nestjs/common';
+import { Injectable } from '@nestjs/common';
import { Readable } from 'stream';
-
-const puppeteer = require('puppeteer-core');
-const chromium = require('@sparticuz/chromium-min');
+import puppeteer from 'puppeteer-core';
+import chromium from '@sparticuz/chromium-min';
@Injectable()
export class PdfService {
- async renderHtml(html: string): Promise {
- try {
- const browser = await this.getBrowser();
- const page = await browser.newPage();
-
- await page.setContent(html, {
- waitUntil: ['networkidle0', 'domcontentloaded'],
- });
-
- const buffer = await page.pdf({ format: 'a4', printBackground: true });
-
- await browser.close();
- return buffer;
- } catch (err) {
- const error = err as Error;
- return new BadRequestException(`Failed to render pdf: ${error.message}`);
- }
+ async htmlToPdf(html: string): Promise {
+ const browser = await this.getBrowser();
+ const page = await browser.newPage();
+
+ await page.setContent(html, {
+ waitUntil: ['networkidle0', 'domcontentloaded'],
+ });
+
+ const buffer = await page.pdf({ format: 'a4', printBackground: true });
+
+ await browser.close();
+ return buffer;
+ }
+
+ async urlToPdf(url: string) {
+ const browser = await this.getBrowser();
+ const page = await browser.newPage();
+ await page.goto(url, { waitUntil: ['networkidle2', 'domcontentloaded'] });
+
+ const buffer = await page.pdf({
+ format: 'A4',
+ landscape: false,
+ printBackground: true,
+ margin: { top: '30px' },
+ scale: 0.98,
+ });
+
+ await browser.close();
+
+ return buffer;
}
- async renderUrl(url: string) {
- try {
- const browser = await this.getBrowser();
- const page = await browser.newPage();
- await page.goto(url, { waitUntil: ['networkidle2', 'domcontentloaded'] });
-
- const buffer = await page.pdf({
- format: 'A4',
- landscape: false,
- printBackground: true,
- margin: { top: '30px' },
- scale: 0.98,
- });
-
- await browser.close();
-
- return buffer;
- } catch (err) {
- const error = err as Error;
- return new BadRequestException(`Failed to render pdf: ${error.message}`);
- }
+ async htmlToData(html: string) {
+ const browser = await this.getBrowser();
+ const page = await browser.newPage();
+ await page.setContent(html, {
+ waitUntil: ['networkidle0', 'domcontentloaded'],
+ });
+ const data = {
+ title: (await page.title()) ?? 'undefined',
+ };
+
+ await browser.close();
+ return data;
}
- async renderPageData(html: string) {
- try {
- const browser = await this.getBrowser();
- const page = await browser.newPage();
- await page.setContent(html, {
- waitUntil: ['networkidle0', 'domcontentloaded'],
- });
- const data = {
- title: (await page.title()) ?? 'undefined',
- mimeType: page.mimeType,
- filename: page.filename,
- charset: page.charset,
- };
-
- await browser.close();
- return data;
- } catch (err) {
- const error = err as Error;
- return new BadRequestException(`Failed to render pdf: ${error.message}`);
- }
+ async urlToData(url: string) {
+ const browser = await this.getBrowser();
+ const page = await browser.newPage();
+ await page.goto(url, { waitUntil: ['networkidle2', 'domcontentloaded'] });
+ const data = {
+ title: (await page.title()) ?? 'undefined',
+ };
+
+ await browser.close();
+ return data;
}
private async getBrowser() {
@@ -76,9 +69,6 @@ export class PdfService {
? '/opt/nodejs/node_modules/@sparticuz/chromium/bin'
: undefined;
- chromium.setHeadlessMode = true;
- chromium.setGraphicsMode = true;
-
await chromium.font(
'http://themes.googleusercontent.com/static/fonts/opensans/v6/cJZKeOuBrn4kERxqtaUH3aCWcynf_cDxXwCLxiixG1c.ttf',
);
diff --git a/services/html-to-pdf/src/module/pdf/url-to-data.dto.ts b/services/html-to-pdf/src/module/pdf/url-to-data.dto.ts
deleted file mode 100644
index c9bebac4..00000000
--- a/services/html-to-pdf/src/module/pdf/url-to-data.dto.ts
+++ /dev/null
@@ -1,12 +0,0 @@
-import { IsUrl } from '@cats-cradle/validation-schemas';
-import { ApiProperty } from '@nestjs/swagger';
-
-export class UrlToDataDto {
- @IsUrl()
- @ApiProperty({
- description: 'url',
- default: 'https://example.com',
- type: String,
- })
- public url: string;
-}
diff --git a/services/html-to-pdf/stacks/main-stack.ts b/services/html-to-pdf/stacks/main-stack.ts
index e10bb782..c872a06a 100644
--- a/services/html-to-pdf/stacks/main-stack.ts
+++ b/services/html-to-pdf/stacks/main-stack.ts
@@ -29,7 +29,7 @@ export class HtmlToPdfStack extends cdk.Stack {
});
new cdk.CfnOutput(this, 'test endpoint', {
- value: `${microservice.getBaseUrl()}/pdf/example-pdf`,
+ value: `${microservice.getBaseUrl()}/pdf?url=https://google.com`,
});
}
}