diff --git a/README.md b/README.md index 4e9b26f..95fa2a1 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,6 @@ This repo contains code used to run the [online demo](https://vladholubiev.com/serverless-libreoffice). - ``` ├── compile.sh <-- commands used to compile LibreOffice for Lambda ├── infra <-- terraform config to deploy example Lambda @@ -35,23 +34,23 @@ Compiled and ready to use archive can be downloaded under [Releases section](htt # How to compile by yourself -> Check out a comprehensive [step-by-step tutorial](STEP_BY_STEP.md) from 0 to deployed function. +> Check out a comprehensive [step-by-step tutorial](STEP_BY_STEP.md) from 0 to deployed function. 1. Go to [Lambda Execution Environment and Available Libraries](https://docs.aws.amazon.com/lambda/latest/dg/current-supported-versions.html) page to get the latest AMI id -2. Click on [this link](https://console.aws.amazon.com/ec2/v2/home#Images:visibility=public-images;search=amzn-ami-hvm-2017.03.1.20170812-x86_64-gp2) to get AMI id for your region +2. Click on [this link](https://console.aws.amazon.com/ec2/v2/home#Images:visibility=public-images;search=amzn-ami-hvm-2017.03.1.20170812-x86_64-gp2) to get AMI id for your region 3. Spin up a `c5.2xlarge` spot instance with ~ 100 GB of storage attached 4. Follow the steps in `compile.sh` file in the repo # Help -* [List of RPM Packages available in AWS Lambda](https://gist.github.com/vladgolubev/1dac4ed47a5febf110c668074c6b671c) -* [List of Libraries available in AWS Lambda](https://gist.github.com/vladgolubev/439559fc7597a4fb51eaa9e97b72f319) +- [List of RPM Packages available in AWS Lambda](https://gist.github.com/vladgolubev/1dac4ed47a5febf110c668074c6b671c) +- [List of Libraries available in AWS Lambda](https://gist.github.com/vladgolubev/439559fc7597a4fb51eaa9e97b72f319) # Related Projects -* [Docker in AWS Lambda](https://github.com/vladgolubev/docker-in-aws-lambda) -* [NPM package with bundled LibreOffice for Lambda (85 MB)](https://github.com/shelfio/aws-lambda-libreoffice) -* [Lambda Layer with LibreOffice](https://github.com/shelfio/libreoffice-lambda-layer) +- [Docker in AWS Lambda](https://github.com/vladgolubev/docker-in-aws-lambda) +- [NPM package with bundled LibreOffice for Lambda (85 MB)](https://github.com/shelfio/aws-lambda-libreoffice) +- [Lambda Layer with LibreOffice](https://github.com/shelfio/libreoffice-lambda-layer) # How To Help @@ -80,6 +79,34 @@ Here is the list of: [available RPM packages](https://gist.github.com/vladgolube and [libraries](https://gist.github.com/vladgolubev/439559fc7597a4fb51eaa9e97b72f319) available in AWS Lambda Environment, which can be helpful. +You can also use multi compression level, with upx and then decompress after brotli. + +## Testing + +Update repo for testing. Return before S3 for example, hardcode or generate files to convert and setup variables. Then simply run: + +``` +docker run \ + -v "\$PWD":/var/task \ + lambci/lambda:nodejs12.x src/handler.handler +``` + +After successful execution, get the resulted files to check the pdfs. + +``` +docker ps -a +``` + +Find exect container id. + +Then execute + +``` +docker cp containerId:/tmp/filename.pdf ./filename.pdf +``` + +Then check your results locally + ## License MIT © [Vlad Holubiev](https://vladholubiev.com) diff --git a/src/handler.js b/src/handler.js index e2443d6..d314f0f 100644 --- a/src/handler.js +++ b/src/handler.js @@ -1,23 +1,18 @@ -const {unpackArchive} = require('./libreoffice'); -const {convertFileToPDF} = require('./logic'); +const { unpack } = require("./libreoffice"); +const { convertFileToPDF } = require("./logic"); -unpackArchive(); - -module.exports.handler = (event, context, cb) => { +module.exports.handler = async (event, context, cb) => { + await unpack(); if (event.warmup) { return cb(); } - const {filename, base64File} = JSON.parse(event.body); - - return convertFileToPDF(base64File, filename) - .then(pdfFileURL => { - return cb(null, { - headers: { - 'Access-Control-Allow-Origin': 'https://vladholubiev.com' - }, - body: JSON.stringify({pdfFileURL}) - }); - }) - .catch(cb); + const { filename, base64File } = JSON.parse(event.body); + const pdfFileURL = await convertFileToPDF(base64File, filename).catch(cb); + return cb(null, { + headers: { + "Access-Control-Allow-Origin": "https://vladholubiev.com" + }, + body: JSON.stringify({ pdfFileURL }) + }); }; diff --git a/src/libreoffice.js b/src/libreoffice.js index 71e0adc..d3a1b02 100644 --- a/src/libreoffice.js +++ b/src/libreoffice.js @@ -1,8 +1,10 @@ -const {execSync} = require('child_process'); -const {readFileSync} = require('fs'); -const path = require('path'); +const { execSync } = require("child_process"); +const { readFileSync } = require("fs"); +const tar = require("tar-fs"); +const zlib = require("zlib"); +const path = require("path"); -const convertCommand = `./instdir/program/soffice --headless --invisible --nodefault --nofirststartwizard --nolockcheck --nologo --norestore --convert-to pdf --outdir /tmp`; +const convertCommand = `export HOME=/tmp && ./instdir/program/soffice.bin --headless --norestore --invisible --nodefault --nofirststartwizard --nolockcheck --nologo --convert-to "pdf:writer_pdf_Export" --outdir /tmp`; /** * Converts a document to PDF from url by spawning LibreOffice process @@ -13,7 +15,12 @@ module.exports.convertToPDF = function convertToPDF(inputFilename) { console.log(`[convertToPDF][file:${inputFilename}]`); const pdfFilename = getPDFFilename(inputFilename); - execSync(`cd /tmp && ${convertCommand} ${inputFilename}`); + try { + // First run will produce predictable error, because of unknown issues + execSync(`cd /tmp && ${convertCommand} ${inputFilename}`); + } catch (e) { + execSync(`cd /tmp && ${convertCommand} ${inputFilename}`); + } console.log(`[converted]`); const pdfFileBuffer = readFileSync(`/tmp/${pdfFilename}`); @@ -25,10 +32,44 @@ module.exports.convertToPDF = function convertToPDF(inputFilename) { }; function getPDFFilename(inputFilename) { - const {name} = path.parse(inputFilename); + const { name } = path.parse(inputFilename); return `${name}.pdf`; } -module.exports.unpackArchive = function unpackArchive() { - execSync(`cd /tmp && tar -xf /var/task/lo.tar.gz`); +module.exports.unpack = function({ + inputPath = `/var/task/lo.tar.br`, + outputBaseDir = `/tmp`, + outputPath = `/tmp/instdir` +}) { + return new Promise((resolve, reject) => { + let input = path.resolve(inputPath); + let output = outputPath; + + if (fs.existsSync(output) === true) { + return resolve(output); + } + + const source = fs.createReadStream(input); + const target = tar.extract(outputBaseDir); + + source.on("error", error => { + return reject(error); + }); + + target.on("error", error => { + return reject(error); + }); + + target.on("finish", () => { + fs.chmod(output, "0755", error => { + if (error) { + return reject(error); + } + + return resolve(output); + }); + }); + + source.pipe(zlib.createBrotliDecompress()).pipe(target); + }); }; diff --git a/src/logic.js b/src/logic.js index 6341843..56cf077 100644 --- a/src/logic.js +++ b/src/logic.js @@ -1,6 +1,6 @@ -const {writeFileSync} = require('fs'); -const {convertToPDF} = require('./libreoffice'); -const {uploadPDF} = require('./s3'); +const { writeFileSync } = require("fs"); +const { convertToPDF } = require("./libreoffice"); +const { uploadPDF } = require("./s3"); const MAX_FILE_SIZE = 5 * 1024 * 1024; @@ -10,31 +10,36 @@ const MAX_FILE_SIZE = 5 * 1024 * 1024; * @param filename {String} Name of file to convert * @return {Promise.} URL of uploaded file on S3 */ -module.exports.convertFileToPDF = function convertFileToPDF(base64File, filename) { - console.log(`[start][file:${filename}][buffer:${base64File.slice(0, 16)}...]`); - - const fileBuffer = new Buffer(base64File, 'base64'); +module.exports.convertFileToPDF = function convertFileToPDF( + base64File, + filename +) { + console.log( + `[start][file:${filename}][buffer:${base64File.slice(0, 16)}...]` + ); + + const fileBuffer = new Buffer(base64File, "base64"); console.log(`[size:${fileBuffer.length}]`); const fileError = validate(fileBuffer); if (fileError) { - return fileError; + return fileError; } writeFileSync(`/tmp/${filename}`, fileBuffer); console.log(`[written]`); - const {pdfFilename, pdfFileBuffer} = convertToPDF(filename); + const { pdfFilename, pdfFileBuffer } = convertToPDF(filename); return uploadPDF(pdfFilename, pdfFileBuffer); }; function validate(fileBuffer) { if (fileBuffer.length > MAX_FILE_SIZE) { - return Promise.reject(new Error('File is too large')); + return Promise.reject(new Error("File is too large")); } if (fileBuffer.length < 4) { - return Promise.reject(new Error('File is too small')); + return Promise.reject(new Error("File is too small")); } } diff --git a/src/s3.js b/src/s3.js index 3b23676..440709f 100644 --- a/src/s3.js +++ b/src/s3.js @@ -1,7 +1,7 @@ -const {execSync} = require('child_process'); -const {S3} = require('aws-sdk'); +const { execSync } = require("child_process"); +const { S3 } = require("aws-sdk"); -const s3 = new S3({region: 'us-east-1'}); +const s3 = new S3({ region: "us-east-1" }); /** * Uploads converted PDF file to S3 bucket @@ -15,13 +15,14 @@ function uploadPDF(filename, fileBuffer) { Bucket: process.env.S3_BUCKET_NAME, Key: `tmp/pdf/${filename}`, Body: fileBuffer, - ACL: 'public-read', - ContentType: 'application/pdf' + ACL: "public-read", + ContentType: "application/pdf" }; - return s3.upload(options) + return s3 + .upload(options) .promise() - .then(({Location}) => Location) + .then(({ Location }) => Location) .then(Location => { execSync(`rm /tmp/${filename}`); console.log(`[removed]`);