Skip to content

Commit

Permalink
feat: Add support for convering source document to PDF via Gotenberg
Browse files Browse the repository at this point in the history
Adds the ability to convert a source document automatically into a PDF.
  • Loading branch information
xitij2000 committed Aug 9, 2024
1 parent 066c402 commit 63a4a5d
Show file tree
Hide file tree
Showing 7 changed files with 89 additions and 14 deletions.
26 changes: 18 additions & 8 deletions .github/workflows/ci-javascript.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,21 @@ on:
branches:
- '**'

steps:
- uses: actions/checkout@v4
- name: Use Node.js
uses: actions/setup-node@v4
with:
node-version: 20.x
- run: npm ci
- run: npm run lint

jobs:
run_tests:
name: linting
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v4
- name: Use Node.js
uses: actions/setup-node@v4
with:
node-version: 20.x

- name: Install Dependencies
run: npm ci

- name: Run linting
run: npm run lint
4 changes: 2 additions & 2 deletions .github/workflows/ci-python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ jobs:
python-version: 3.8

- name: Install Dependencies
run: pip install .[]
run: pip install .[quality]

- name: Run linting
run: pytest pdf
run: pylint pdf
3 changes: 3 additions & 0 deletions pdf/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,4 @@
"""
PDF XBlock
"""
__version__ = "1.2.0"
23 changes: 20 additions & 3 deletions pdf/pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
from xblock.fragment import Fragment
from xblock.utils.resources import ResourceLoader

from .utils import DummyTranslationService, _, bool_from_str, is_all_download_disabled

from .utils import DummyTranslationService, _, bool_from_str, is_all_download_disabled, convert_to_pdf, GOTENBERG_HOST

try:
import importlib_resources
Expand Down Expand Up @@ -71,7 +72,7 @@ def load_resource(self, resource_path):
"""
Gets the content of a resource
"""
resource = importlib_resources.files(__name__).joinpath(resource_path)
resource = importlib_resources.files("pdf").joinpath(resource_path)
return resource.read_text("utf-8")

def render_template(self, template_path, context=None):
Expand Down Expand Up @@ -127,7 +128,8 @@ def studio_view(self, context=None):
'allow_download': self.allow_download,
'disable_all_download': is_all_download_disabled(),
'source_text': self.source_text,
'source_url': self.source_url
'source_url': self.source_url,
'enable_conversion': GOTENBERG_HOST is not None,
}
html = loader.render_django_template(
'templates/html/pdf_edit.html',
Expand All @@ -151,6 +153,18 @@ def on_download(self, data, suffix=''): # pylint: disable=unused-argument
}
self.runtime.publish(self, event_type, event_data)

def _generate_pdf_from_source(self):
"""
Uses the Gotenberg API to convert the source document to a PDF.
"""
output_path = "{loc.org}/{loc.course}/{loc.block_type}/{loc.block_id}.pdf".format(
loc=self.location # pylint: disable=no-member
)
return convert_to_pdf(
self.source_url,
output_path,
)

@XBlock.json_handler
def save_pdf(self, data, suffix=''): # pylint: disable=unused-argument
"""
Expand All @@ -163,6 +177,9 @@ def save_pdf(self, data, suffix=''): # pylint: disable=unused-argument
self.allow_download = bool_from_str(data['allow_download'])
self.source_text = data['source_text']
self.source_url = data['source_url']
if data['source_url'] and bool_from_str(data['pdf_auto_generate']):
pdf_path = self._generate_pdf_from_source()
self.url = pdf_path

return {
'result': 'success',
Expand Down
3 changes: 2 additions & 1 deletion pdf/static/js/pdf_edit.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ function pdfXBlockInitEdit(runtime, element) {
'url': $('#pdf_edit_url').val(),
'allow_download': $('#pdf_edit_allow_download').val() || '',
'source_text': $('#pdf_edit_source_text').val() || '',
'source_url': $('#pdf_edit_source_url').val() || ''
'source_url': $('#pdf_edit_source_url').val() || '',
'pdf_auto_generate': $('#pdf_auto_generate').val() || '',
};

runtime.notify('save', { state: 'start' });
Expand Down
12 changes: 12 additions & 0 deletions pdf/templates/html/pdf_edit.html
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,18 @@
<input class="input setting-input" id="pdf_edit_source_url" value="{{ source_url }}" type="text">
</div>
<span class="tip setting-help">{% trans "Add a download link for the source file of your PDF. Use it for example to provide the PowerPoint file used to create this PDF." %}</span>
{% if enable_conversion %}
<div class="wrapper-comp-setting">
<label class="label setting-label" for="pdf_auto_generate">{% trans "Generate PDF from source" %}</label>
<select class="input setting-input" id="pdf_auto_generate">
<option value="True" {% if pdf_auto_generate %}selected{% endif %}>{% trans "True" %}</option>
<option value="False" {% if not pdf_auto_generate %}selected{% endif %}>{% trans "False" %}</option>
</select>
</div>
<span class="tip setting-help">
{% trans "Automatically generate a PDF from the source document." %}
</span>
{% endif %}
</li>
{% endif %}

Expand Down
32 changes: 32 additions & 0 deletions pdf/utils.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,40 @@
"""
Utility functions for PDF XBlock
"""
from typing import Optional
from urllib.parse import urlparse

import requests
from django.conf import settings
from django.core.files.base import ContentFile
from django.core.files.storage import default_storage

GOTENBERG_HOST = getattr(settings, "GOTENBERG_HOST", None)
GOTENBERG_CONVERSION_URL = f"{GOTENBERG_HOST}/forms/libreoffice/convert"


def convert_to_pdf(doc_url: str, pdf_path: str) -> Optional[str]:
"""
Uses the Gotenberg service to convert the document at `doc_url` to a PDF file.
Parameters:
doc_url (str): The path or URL to the document to be converted.
pdf_path (str): The path where the converted PDF file will be stored.
Returns:
str | None: Return None if conversation fails, or returns the PDF url.
"""
source_url = urlparse(doc_url)
filename = source_url.path.split('/')[-1]
source_doc_response = requests.get(doc_url, timeout=(10, 120))

pdf_response = requests.post(GOTENBERG_CONVERSION_URL, files={
'file': (filename, source_doc_response.content)
}, timeout=(2, 120))
if pdf_response.status_code != 200:
return None
file_path = default_storage.save(pdf_path, ContentFile(pdf_response.content))
return default_storage.url(file_path)


def bool_from_str(str_value):
Expand Down

0 comments on commit 63a4a5d

Please sign in to comment.