Skip to content

Commit

Permalink
feat: html-to-pdf pragmatic api (#446)
Browse files Browse the repository at this point in the history
Signed-off-by: hxtree <[email protected]>
  • Loading branch information
hxtree authored Oct 23, 2023
1 parent e1fff74 commit 228996d
Show file tree
Hide file tree
Showing 10 changed files with 366 additions and 194 deletions.
32 changes: 23 additions & 9 deletions services/html-to-pdf/README.md
Original file line number Diff line number Diff line change
@@ -1,19 +1,33 @@
# @org-apis/html-to-pdf

HTMLtoPDF is a service for generating PDFs from HTML. It support generating a
PDF from either a request containing a HTML body or a URL.
HTMLtoPDF is a turn-key microservice that generates PDFs from HTML. It support
both URL and HTML based requests.

## Usage Example

```bash
curl -X POST https://nx7uv2rfy4.execute-api.us-east-2.amazonaws.com/default/v1/html-to-pdf/pdf -H "Content-Type: application/json" -d '{"input": "URL", "output": "PDF", "url": "https://google.com"}' -o example.pdf
```

![Example Image](./example.png)

## How it Works

PDF are generated using a headless version of Chromium. This form of PDF
rendering supports text recognition, images, hyperlinks, print media queries,
table breaks, and other features all this with relatively little code
PDF are generated using a headless version of Chromium running in a lambda. This
form of PDF rendering supports text recognition, images, hyperlinks, print media
queries, table breaks, and other features all this with relatively little code
maintenance.

It can be finicky to get working within a Lambda and API Gateway. Lambda doesn't
include fonts fonts. Chromium should be installed in a Lambda layer. API gateway
can cause blank PDF if it doesn't properly handle binary responses. Serverless
Express also has to be configured to support the binary mime type.
This service address many finicky obstacles with making a request through API
gateway to a Lambda running Chromium to generate a PDF.

- NodeJS Lambda Layers do include default fonts files (\*.tff) like a standard
OS does.
- Performance is essential and Lambda deploys are small. A compressed version of
Chromium must be deployed independently as Lambda layer and for performance.
- API gateway if not properly configured to handle binary responses can cause a
blank PDF.
- Serverless Express has to be configured to support the binary mime type.

## References

Expand Down
Binary file added services/html-to-pdf/example.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
20 changes: 0 additions & 20 deletions services/html-to-pdf/src/module/pdf/create-html-to-pdf.dto.ts

This file was deleted.

20 changes: 0 additions & 20 deletions services/html-to-pdf/src/module/pdf/create-url-to-pdf.dto.ts

This file was deleted.

65 changes: 65 additions & 0 deletions services/html-to-pdf/src/module/pdf/operation.dto.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import {
IsUrl,
IsEnum,
IsString,
IsOptional,
IsFilename,
} from '@cats-cradle/validation-schemas';
import { ApiProperty } from '@nestjs/swagger';
import { v4 } from 'uuid';

export enum OperationInput {
HTML = 'HTML',
URL = 'URL',
}

export enum OperationOutput {
DATA = 'DATA',
PDF = 'PDF',
JSON = 'JSON',
}

export class OperationDto {
@IsEnum(OperationInput)
@ApiProperty({
description: 'input format',
default: OperationInput.HTML,
enum: OperationOutput,
})
input: OperationInput;

@IsEnum(OperationOutput)
@ApiProperty({
description: 'output format',
default: OperationOutput.PDF,
enum: OperationOutput,
})
output: OperationOutput;

@IsOptional()
@IsUrl()
@ApiProperty({
description: 'url',
default: 'https://google.com',
type: String,
})
url?: string;

@IsOptional()
@IsString()
@ApiProperty({
description: 'HTML',
default:
'<!doctype><html><head><title>Example</title></head><body>Hello, World</body></html>',
type: String,
})
content?: string;

@IsFilename()
@ApiProperty({
description: 'Filename',
default: `${v4()}.pdf`,
type: String,
})
filename?: string;
}
129 changes: 75 additions & 54 deletions services/html-to-pdf/src/module/pdf/pdf.controller.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,73 +6,94 @@ import {
Get,
VERSION_NEUTRAL,
Query,
BadRequestException,
StreamableFile,
} from '@nestjs/common';
import { Response } from 'express';
import { v4 } from 'uuid';
import { PdfService } from './pdf.service';
import { CreateHtmlToPdfDto } from './create-html-to-pdf.dto';
import { CreateUrlToPdfDto } from './create-url-to-pdf.dto';
import { OperationDto, OperationInput, OperationOutput } from './operation.dto';

@Controller({ path: 'pdf', version: ['1', VERSION_NEUTRAL] })
export class PdfController {
constructor(private readonly pdfService: PdfService) {}

@Get('example-data')
async exampleData(@Res() res: Response) {
const data = await this.pdfService.renderPageData(
'<!doctype><html><head><title>Demo Page</title></head><body><h1>Demo</h1></body></html>',
);

res.status(200).send(data);
}

@Get('example-pdf')
async test(@Res() res: Response, @Query('url') url?: string) {
const buffer = await this.pdfService.renderUrl(url ?? 'http://example.com');
this.responseAsPdf(false, buffer, res, `${v4()}.pdf`);
}

@Post('render-url')
async renderUrl(@Res() res: Response, @Body() body: CreateUrlToPdfDto) {
const buffer = await this.pdfService.renderUrl(body.url);
this.responseAsPdf(false, buffer, res, body.filename);
@Get()
async url(
@Res({ passthrough: true }) res: Response,
@Query('url') url?: string,
) {
const buffer = await this.pdfService.urlToPdf(url ?? 'http://example.com');
const filename = `${v4()}.pdf`;
res.setHeader('Content-Length', Buffer.byteLength(buffer, 'utf-8'));
res.setHeader('Content-Type', 'application/pdf');
res.setHeader('Content-Disposition', `attachment; filename=${filename}`);
return new StreamableFile(this.pdfService.createReadableStream(buffer));
}

@Post('render-html-data')
async renderHtmlData(@Res() res: Response, @Body() body: CreateHtmlToPdfDto) {
return this.pdfService.renderPageData(body.html);
}
@Post()
async operation(
@Res({ passthrough: true }) res: Response,
@Body() body: OperationDto,
): Promise<any> {
let buffer;
const filename = body.filename ?? `${v4()}.pdf`;

@Post('render-html')
async renderHtml(@Res() res: Response, @Body() body: CreateHtmlToPdfDto) {
const buffer = await this.pdfService.renderHtml(body.html);
this.responseAsPdf(false, buffer, res, body.filename);
}

private responseAsPdf(
json: boolean,
buffer: Buffer,
res: Response,
filename: string,
) {
if (!json) {
const stream = this.pdfService.createReadableStream(buffer);
try {
switch (true) {
case body.input === OperationInput.HTML
&& body.output === OperationOutput.DATA:
return await this.pdfService.htmlToData(body.content ?? '');
case body.input === OperationInput.HTML
&& body.output === OperationOutput.JSON:
buffer = await this.pdfService.htmlToPdf(body.content ?? '');
return {
content: buffer.toString('base64'),
filename: body.filename ?? `${v4()}.pdf`,
mimeType: 'application/pdf',
};
case body.input === OperationInput.HTML
&& body.output === OperationOutput.PDF:
buffer = await this.pdfService.htmlToPdf(body.content ?? '');
res.setHeader('Content-Length', Buffer.byteLength(buffer, 'utf-8'));
res.setHeader('Content-Type', 'application/pdf');
res.setHeader(
'Content-Disposition',
`attachment; filename=${filename}`,
);
return new StreamableFile(
this.pdfService.createReadableStream(buffer),
);

res.setHeader('Content-Length', Buffer.byteLength(buffer, 'utf-8'));
res.setHeader('Content-Type', 'application/pdf');
res.setHeader('Content-Disposition', `attachment; filename=${filename}`);
stream.pipe(res);
} else {
/**
* base64 can be responses can be checked using the following
* https://base64.guru/converter/decode/pdf
*/
res.setHeader('Content-Type', 'application/json;charset=UTF-8');
res.status(200).send({
content: buffer.toString('base64'),
filename,
mimeType: 'application/pdf',
});
case body.input === OperationInput.URL
&& body.output === OperationOutput.DATA:
return await this.pdfService.urlToData(body.url ?? '');
case body.input === OperationInput.URL
&& body.output === OperationOutput.JSON:
buffer = await this.pdfService.urlToPdf(body.url ?? '');
return {
content: buffer.toString('base64'),
filename: body.filename ?? `${v4()}.pdf`,
mimeType: 'application/pdf',
};
case body.input === OperationInput.URL
&& body.output === OperationOutput.PDF:
buffer = await this.pdfService.urlToPdf(body.url ?? '');
res.setHeader('Content-Length', Buffer.byteLength(buffer, 'utf-8'));
res.setHeader('Content-Type', 'application/pdf');
res.setHeader(
'Content-Disposition',
`attachment; filename=${filename}`,
);
return new StreamableFile(
this.pdfService.createReadableStream(buffer),
);
default:
return new BadRequestException('Invalid request');
}
} catch (err) {
const error = err as Error;
return new BadRequestException(`Failed to render pdf: ${error.message}`);
}
}
}
Loading

0 comments on commit 228996d

Please sign in to comment.