Skip to content

Commit

Permalink
print (WIP), fake scan (WIP) and text conversion for ultra-lite (#1098)
Browse files Browse the repository at this point in the history
* Changes!

* lang

* fake scan init, print init and pdf to text for exe

* Hardening suggestions for Stirling-PDF / changes (#1099)

* Switch order of literals to prevent NullPointerException

* Introduced protections against predictable RNG abuse

---------

Co-authored-by: pixeebot[bot] <104101892+pixeebot[bot]@users.noreply.github.com>

* Update README.md

* install custom fonts

* Formats etc

* version bump

* disable WIP work

* remove chinese font

---------

Co-authored-by: pixeebot[bot] <104101892+pixeebot[bot]@users.noreply.github.com>
Co-authored-by: systo <[email protected]>
  • Loading branch information
3 people authored Apr 21, 2024
1 parent 6c052a7 commit 71e93e3
Show file tree
Hide file tree
Showing 19 changed files with 491 additions and 140 deletions.
7 changes: 5 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,17 @@ ENV DOCKER_ENABLE_SECURITY=false \
RUN echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /etc/apk/repositories && \
echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/community" | tee -a /etc/apk/repositories && \
echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/testing" | tee -a /etc/apk/repositories && \
apk update && \
apk add --no-cache \
ca-certificates \
tzdata \
tini \
openssl \
openssl-dev \
bash \
curl \
openjdk17-jre \
su-exec \
font-noto-cjk \
shadow \
# Doc conversion
libreoffice@testing \
Expand All @@ -58,7 +60,8 @@ RUN echo "@testing https://dl-cdn.alpinelinux.org/alpine/edge/main" | tee -a /et
addgroup -S stirlingpdfgroup && adduser -S stirlingpdfuser -G stirlingpdfgroup && \
chown -R stirlingpdfuser:stirlingpdfgroup $HOME /scripts /usr/share/fonts/opentype/noto /configs /customFiles /pipeline && \
chown stirlingpdfuser:stirlingpdfgroup /app.jar && \
tesseract --list-langs
tesseract --list-langs && \
rm -rf /var/cache/apk/*

EXPOSE 8080

Expand Down
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ docker run -d \
-v /location/of/logs:/logs \
-e DOCKER_ENABLE_SECURITY=false \
-e INSTALL_BOOK_AND_ADVANCED_HTML_OPS=false \
-e LANGS=en_GB \
--name stirling-pdf \
frooodle/s-pdf:latest

Expand All @@ -147,6 +148,7 @@ services:
environment:
- DOCKER_ENABLE_SECURITY=false
- INSTALL_BOOK_AND_ADVANCED_HTML_OPS=false
- LANGS=en_GB
```
Note: Podman is CLI-compatible with Docker, so simply replace "docker" with "podman".
Expand Down Expand Up @@ -252,13 +254,13 @@ metrics:
- ``SYSTEM_CONNECTIONTIMEOUTMINUTES`` to set custom connection timeout values
- ``DOCKER_ENABLE_SECURITY`` to tell docker to download security jar (required as true for auth login)
- ``INSTALL_BOOK_AND_ADVANCED_HTML_OPS`` to download calibre onto stirling-pdf enabling pdf to/from book and advanced html conversion
- ``LANGS`` to define custom font libraries to install for use for document conversions
## API
For those wanting to use Stirling-PDFs backend API to link with their own custom scripting to edit PDFs you can view all existing API documentation
[here](https://app.swaggerhub.com/apis-docs/Stirling-Tools/Stirling-PDF/) or navigate to /swagger-ui/index.html of your stirling-pdf instance for your versions documentation (Or by following the API button in your settings of Stirling-PDF)
## Login authentication
![stirling-login](images/login-light.png)
Expand Down
2 changes: 1 addition & 1 deletion build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ plugins {
import com.github.jk1.license.render.*

group = 'stirling.software'
version = '0.22.8'
version = '0.23.0'
sourceCompatibility = '17'

repositories {
Expand Down
2 changes: 2 additions & 0 deletions exampleYmlFiles/docker-compose-latest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ services:
environment:
DOCKER_ENABLE_SECURITY: "false"
SECURITY_ENABLELOGIN: "false"
LANGS: "en_GB,en_US,ar_AR,de_DE,fr_FR,es_ES,zh_CN,zh_TW,ca_CA,it_IT,sv_SE,pl_PL,ro_RO,ko_KR,pt_BR,ru_RU,el_GR,hi_IN,hu_HU,tr_TR,id_ID"
INSTALL_BOOK_AND_ADVANCED_HTML_OPS: "true"
SYSTEM_DEFAULTLOCALE: en-US
UI_APPNAME: Stirling-PDF
UI_HOMEDESCRIPTION: Demo site for Stirling-PDF Latest
Expand Down
15 changes: 10 additions & 5 deletions scripts/init-without-ocr.sh
Original file line number Diff line number Diff line change
@@ -1,26 +1,31 @@
#!/bin/sh
#!/bin/bash

# Update the user and group IDs as per environment variables
if [ ! -z "$PUID" ] && [ "$PUID" != "$(id -u stirlingpdfuser)" ]; then
usermod -o -u "$PUID" stirlingpdfuser || true
fi


if [ ! -z "$PGID" ] && [ "$PGID" != "$(getent group stirlingpdfgroup | cut -d: -f3)" ]; then
groupmod -o -g "$PGID" stirlingpdfgroup || true
fi
umask "$UMASK" || true


if [[ "$INSTALL_BOOK_AND_ADVANCED_HTML_OPS" == "true" ]]; then
apk add --no-cache calibre@testing
fi

/scripts/download-security-jar.sh

if [[ -n "$LANGS" ]]; then
/scripts/installFonts.sh $LANGS
fi

echo "Setting permissions and ownership for necessary directories..."
if chown -R stirlingpdfuser:stirlingpdfgroup $HOME /logs /scripts /usr/share/fonts/opentype/noto /usr/share/tessdata /configs /customFiles /pipeline /app.jar; then
chmod -R 755 /logs /scripts /usr/share/fonts/opentype/noto /usr/share/tessdata /configs /customFiles /pipeline /app.jar || true
# If chown succeeds, execute the command as stirlingpdfuser
# Attempt to change ownership of directories and files
if chown -R stirlingpdfuser:stirlingpdfgroup $HOME /logs /scripts /usr/share/fonts/opentype/noto /configs /customFiles /pipeline /app.jar; then
chmod -R 755 /logs /scripts /usr/share/fonts/opentype/noto /configs /customFiles /pipeline /app.jar || true
# If chown succeeds, execute the command as stirlingpdfuser
exec su-exec stirlingpdfuser "$@"
else
# If chown fails, execute the command without changing the user context
Expand Down
30 changes: 1 addition & 29 deletions scripts/init.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,18 +13,6 @@ if [ -d /usr/share/tesseract-ocr/5/tessdata ]; then
cp -r /usr/share/tesseract-ocr/5/tessdata/* /usr/share/tessdata || true;
fi

# Update the user and group IDs as per environment variables
if [ ! -z "$PUID" ] && [ "$PUID" != "$(id -u stirlingpdfuser)" ]; then
usermod -o -u "$PUID" stirlingpdfuser || true
fi


if [ ! -z "$PGID" ] && [ "$PGID" != "$(getent group stirlingpdfgroup | cut -d: -f3)" ]; then
groupmod -o -g "$PGID" stirlingpdfgroup || true
fi
umask "$UMASK" || true


# Check if TESSERACT_LANGS environment variable is set and is not empty
if [[ -n "$TESSERACT_LANGS" ]]; then
# Convert comma-separated values to a space-separated list
Expand All @@ -40,20 +28,4 @@ if [[ -n "$TESSERACT_LANGS" ]]; then
done
fi

if [[ "$INSTALL_BOOK_AND_ADVANCED_HTML_OPS" == "true" ]]; then
apk add --no-cache calibre@testing
fi

/scripts/download-security-jar.sh

echo "Setting permissions and ownership for necessary directories..."
# Attempt to change ownership of directories and files
if chown -R stirlingpdfuser:stirlingpdfgroup $HOME /logs /scripts /usr/share/fonts/opentype/noto /usr/share/tessdata /configs /customFiles /pipeline /app.jar; then
chmod -R 755 /logs /scripts /usr/share/fonts/opentype/noto /usr/share/tessdata /configs /customFiles /pipeline /app.jar || true
# If chown succeeds, execute the command as stirlingpdfuser
exec su-exec stirlingpdfuser "$@"
else
# If chown fails, execute the command without changing the user context
echo "[WARN] Chown failed, running as host user"
exec "$@"
fi
/scripts/init-without-ocr.sh "$@"
67 changes: 67 additions & 0 deletions scripts/installFonts.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
#!/bin/bash

LANGS=$1

# Function to install a font package
install_font() {
echo "Installing font package: $1"
if ! apk add "$1" --no-cache; then
echo "Failed to install $1"
fi
}

# Install common fonts used across many languages
#common_fonts=(
# font-terminus
# font-dejavu
# font-noto
# font-noto-cjk
# font-awesome
# font-noto-extra
#)
#
#for font in "${common_fonts[@]}"; do
# install_font $font
#done

# Map languages to specific font packages
declare -A language_fonts=(
["ar_AR"]="font-noto-arabic"
["zh_CN"]="font-isas-misc"
["zh_TW"]="font-isas-misc"
["ja_JP"]="font-noto font-noto-thai font-noto-tibetan font-ipa font-sony-misc font-jis-misc"
["ru_RU"]="font-vollkorn font-misc-cyrillic font-mutt-misc font-screen-cyrillic font-winitzki-cyrillic font-cronyx-cyrillic"
["sr_LATN_RS"]="font-vollkorn font-misc-cyrillic font-mutt-misc font-screen-cyrillic font-winitzki-cyrillic font-cronyx-cyrillic"
["uk_UA"]="font-vollkorn font-misc-cyrillic font-mutt-misc font-screen-cyrillic font-winitzki-cyrillic font-cronyx-cyrillic"
["ko_KR"]="font-noto font-noto-thai font-noto-tibetan"
["el_GR"]="font-noto"
["hi_IN"]="font-noto-devanagari"
["bg_BG"]="font-vollkorn font-misc-cyrillic"
["GENERAL"]="font-terminus font-dejavu font-noto font-noto-cjk font-awesome font-noto-extra"
)

# Install fonts for other languages which generally do not need special packages beyond 'font-noto'
other_langs=("en_GB" "en_US" "de_DE" "fr_FR" "es_ES" "ca_CA" "it_IT" "pt_BR" "nl_NL" "sv_SE" "pl_PL" "ro_RO" "hu_HU" "tr_TR" "id_ID" "eu_ES")
if [[ $LANGS == "ALL" ]]; then
# Install all fonts from the language_fonts map
for fonts in "${language_fonts[@]}"; do
for font in $fonts; do
install_font $font
done
done
else
# Split comma-separated languages and install necessary fonts
IFS=',' read -ra LANG_CODES <<< "$LANGS"
for code in "${LANG_CODES[@]}"; do
if [[ " ${other_langs[@]} " =~ " ${code} " ]]; then
install_font font-noto
else
fonts_to_install=${language_fonts[$code]}
if [ ! -z "$fonts_to_install" ]; then
for font in $fonts_to_install; do
install_font $font
done
fi
fi
done
fi
1 change: 1 addition & 0 deletions src/main/java/stirling/software/SPDF/SPdfApplication.java
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ public void init() {
}

public static void main(String[] args) throws IOException, InterruptedException {

SpringApplication app = new SpringApplication(SPdfApplication.class);
app.addInitializers(new ConfigInitializer());
if (Files.exists(Paths.get("configs/settings.yml"))) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -146,14 +146,14 @@ public void init() {
addEndpointToGroup("CLI", "xlsx-to-pdf");
addEndpointToGroup("CLI", "pdf-to-word");
addEndpointToGroup("CLI", "pdf-to-presentation");
addEndpointToGroup("CLI", "pdf-to-text");
addEndpointToGroup("CLI", "pdf-to-html");
addEndpointToGroup("CLI", "pdf-to-xml");
addEndpointToGroup("CLI", "ocr-pdf");
addEndpointToGroup("CLI", "html-to-pdf");
addEndpointToGroup("CLI", "url-to-pdf");
addEndpointToGroup("CLI", "book-to-pdf");
addEndpointToGroup("CLI", "pdf-to-book");
addEndpointToGroup("CLI", "pdf-to-rtf");

// Calibre
addEndpointToGroup("Calibre", "book-to-pdf");
Expand All @@ -175,7 +175,7 @@ public void init() {
addEndpointToGroup("LibreOffice", "xlsx-to-pdf");
addEndpointToGroup("LibreOffice", "pdf-to-word");
addEndpointToGroup("LibreOffice", "pdf-to-presentation");
addEndpointToGroup("LibreOffice", "pdf-to-text");
addEndpointToGroup("LibreOffice", "pdf-to-rtf");
addEndpointToGroup("LibreOffice", "pdf-to-html");
addEndpointToGroup("LibreOffice", "pdf-to-xml");

Expand Down Expand Up @@ -218,6 +218,7 @@ public void init() {
addEndpointToGroup("Java", "overlay-pdf");
addEndpointToGroup("Java", "split-pdf-by-sections");
addEndpointToGroup("Java", REMOVE_BLANKS);
addEndpointToGroup("Java", "pdf-to-text");

// Javascript
addEndpointToGroup("Javascript", "pdf-organizer");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ public class ConvertPDFToPDFA {
summary = "Convert a PDF to a PDF/A",
description =
"This endpoint converts a PDF file to a PDF/A file. PDF/A is a format designed for long-term archiving of digital documents. Input:PDF Output:PDF Type:SISO")
public ResponseEntity<byte[]> pdfToPdfA(@ModelAttribute PdfToPdfARequest request) throws Exception {
public ResponseEntity<byte[]> pdfToPdfA(@ModelAttribute PdfToPdfARequest request)
throws Exception {
MultipartFile inputFile = request.getFileInput();
String outputFormat = request.getOutputFormat();

Expand Down
Loading

0 comments on commit 71e93e3

Please sign in to comment.