diff --git a/DotNET/Endpoint Examples/JSON Payload/pdf-with-ocr-text.cs b/DotNET/Endpoint Examples/JSON Payload/pdf-with-ocr-text.cs new file mode 100644 index 0000000..1c46cee --- /dev/null +++ b/DotNET/Endpoint Examples/JSON Payload/pdf-with-ocr-text.cs @@ -0,0 +1,50 @@ + +using Newtonsoft.Json.Linq; +using System.Text; + +using (var httpClient = new HttpClient { BaseAddress = new Uri("https://api.pdfrest.com") }) +{ + using (var uploadRequest = new HttpRequestMessage(HttpMethod.Post, "upload")) + { + uploadRequest.Headers.TryAddWithoutValidation("Api-Key", "xxxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"); + uploadRequest.Headers.Accept.Add(new("application/json")); + + var uploadByteArray = File.ReadAllBytes("/path/to/file"); + var uploadByteAryContent = new ByteArrayContent(uploadByteArray); + uploadByteAryContent.Headers.TryAddWithoutValidation("Content-Type", "application/octet-stream"); + uploadByteAryContent.Headers.TryAddWithoutValidation("Content-Filename", "filename.pdf"); + + + uploadRequest.Content = uploadByteAryContent; + var uploadResponse = await httpClient.SendAsync(uploadRequest); + + var uploadResult = await uploadResponse.Content.ReadAsStringAsync(); + + Console.WriteLine("Upload response received."); + Console.WriteLine(uploadResult); + + JObject uploadResultJson = JObject.Parse(uploadResult); + var uploadedID = uploadResultJson["files"][0]["id"]; + using (var ocrTextRequest = new HttpRequestMessage(HttpMethod.Post, "pdf-with-ocr-text")) + { + ocrTextRequest.Headers.TryAddWithoutValidation("Api-Key", "xxxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"); + ocrTextRequest.Headers.Accept.Add(new("application/json")); + + ocrTextRequest.Headers.TryAddWithoutValidation("Content-Type", "application/json"); + + + JObject parameterJson = new JObject + { + ["id"] = uploadedID, + }; + + ocrTextRequest.Content = new StringContent(parameterJson.ToString(), Encoding.UTF8, "application/json"); ; + var ocrTextResponse = await httpClient.SendAsync(ocrTextRequest); + + var ocrTextResult = await ocrTextResponse.Content.ReadAsStringAsync(); + + Console.WriteLine("Processing response received."); + Console.WriteLine(ocrTextResult); + } + } +} diff --git a/DotNET/Endpoint Examples/Multipart Payload/pdf-with-ocr-text.cs b/DotNET/Endpoint Examples/Multipart Payload/pdf-with-ocr-text.cs new file mode 100644 index 0000000..2255032 --- /dev/null +++ b/DotNET/Endpoint Examples/Multipart Payload/pdf-with-ocr-text.cs @@ -0,0 +1,28 @@ +using System.Text; + +using (var httpClient = new HttpClient { BaseAddress = new Uri("https://api.pdfrest.com") }) +{ + using (var request = new HttpRequestMessage(HttpMethod.Post, "pdf-with-ocr-text")) + { + request.Headers.TryAddWithoutValidation("Api-Key", "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"); + request.Headers.Accept.Add(new("application/json")); + var multipartContent = new MultipartFormDataContent(); + + var byteArray = File.ReadAllBytes("/path/to/file"); + var byteAryContent = new ByteArrayContent(byteArray); + multipartContent.Add(byteAryContent, "file", "file_name"); + byteAryContent.Headers.TryAddWithoutValidation("Content-Type", "application/pdf"); + + var byteArrayOption = new ByteArrayContent(Encoding.UTF8.GetBytes("converted")); + multipartContent.Add(byteArrayOption, "output"); + + + request.Content = multipartContent; + var response = await httpClient.SendAsync(request); + + var apiResult = await response.Content.ReadAsStringAsync(); + + Console.WriteLine("API response received."); + Console.WriteLine(apiResult); + } +} diff --git a/Java/Endpoint Examples/JSON Payload/PDFWithOCRText.java b/Java/Endpoint Examples/JSON Payload/PDFWithOCRText.java new file mode 100644 index 0000000..aa08636 --- /dev/null +++ b/Java/Endpoint Examples/JSON Payload/PDFWithOCRText.java @@ -0,0 +1,96 @@ +import io.github.cdimascio.dotenv.Dotenv; +import java.io.File; +import java.io.IOException; +import java.util.concurrent.TimeUnit; +import okhttp3.*; +import org.json.JSONArray; +import org.json.JSONObject; + +public class PDFWithOCRText { + + // Specify the path to your file here, or as the first argument when running the program. + private static final String DEFAULT_FILE_PATH = "/path/to/file.pdf"; + + // Specify your API key here, or in the environment variable PDFREST_API_KEY. + // You can also put the environment variable in a .env file. + private static final String DEFAULT_API_KEY = "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"; + + public static void main(String[] args) { + File inputFile; + if (args.length > 0) { + inputFile = new File(args[0]); + } else { + inputFile = new File(DEFAULT_FILE_PATH); + } + final Dotenv dotenv = Dotenv.configure().ignoreIfMalformed().ignoreIfMissing().load(); + + String uploadString = uploadFile(inputFile); + JSONObject uploadJSON = new JSONObject(uploadString); + if (uploadJSON.has("error")) { + System.out.println("Error during upload: " + uploadString); + return; + } + JSONArray fileArray = uploadJSON.getJSONArray("files"); + + JSONObject fileObject = fileArray.getJSONObject(0); + + String uploadedID = fileObject.get("id").toString(); + + String JSONString = String.format("{\"id\":\"%s\"}", uploadedID); + + final RequestBody requestBody = + RequestBody.create(JSONString, MediaType.parse("application/json")); + + Request request = + new Request.Builder() + .header("Api-Key", dotenv.get("PDFREST_API_KEY", DEFAULT_API_KEY)) + .url("https://api.pdfrest.com/pdf-with-ocr-text") + .post(requestBody) + .build(); + try { + OkHttpClient client = + new OkHttpClient().newBuilder().readTimeout(60, TimeUnit.SECONDS).build(); + + Response response = client.newCall(request).execute(); + System.out.println("Processing Result code " + response.code()); + if (response.body() != null) { + System.out.println(prettyJson(response.body().string())); + } + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + private static String prettyJson(String json) { + // https://stackoverflow.com/a/9583835/11996393 + return new JSONObject(json).toString(4); + } + + // This function is just a copy of the 'Upload.java' file to upload a binary file + private static String uploadFile(File inputFile) { + + final Dotenv dotenv = Dotenv.configure().ignoreIfMalformed().ignoreIfMissing().load(); + + final RequestBody requestBody = + RequestBody.create(inputFile, MediaType.parse("application/pdf")); + + Request request = + new Request.Builder() + .header("Api-Key", dotenv.get("PDFREST_API_KEY", DEFAULT_API_KEY)) + .header("Content-Filename", "File.pdf") + .url("https://api.pdfrest.com/upload") + .post(requestBody) + .build(); + try { + OkHttpClient client = new OkHttpClient().newBuilder().build(); + Response response = client.newCall(request).execute(); + System.out.println("Upload Result code " + response.code()); + if (response.body() != null) { + return response.body().string(); + } + } catch (IOException e) { + throw new RuntimeException(e); + } + return ""; + } +} diff --git a/Java/Endpoint Examples/Multipart Payload/PDFWithOCRText.java b/Java/Endpoint Examples/Multipart Payload/PDFWithOCRText.java new file mode 100644 index 0000000..a24ba99 --- /dev/null +++ b/Java/Endpoint Examples/Multipart Payload/PDFWithOCRText.java @@ -0,0 +1,63 @@ +import io.github.cdimascio.dotenv.Dotenv; +import java.io.File; +import java.io.IOException; +import java.util.concurrent.TimeUnit; +import okhttp3.MediaType; +import okhttp3.MultipartBody; +import okhttp3.OkHttpClient; +import okhttp3.Request; +import okhttp3.RequestBody; +import okhttp3.Response; +import org.json.JSONObject; + +public class PDFWithOCRText { + + // Specify the path to your file here, or as the first argument when running the program. + private static final String DEFAULT_FILE_PATH = "/path/to/file.pdf"; + + // Specify your API key here, or in the environment variable PDFREST_API_KEY. + // You can also put the environment variable in a .env file. + private static final String DEFAULT_API_KEY = "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"; + + public static void main(String[] args) { + File inputFile; + if (args.length > 0) { + inputFile = new File(args[0]); + } else { + inputFile = new File(DEFAULT_FILE_PATH); + } + + final Dotenv dotenv = Dotenv.configure().ignoreIfMalformed().ignoreIfMissing().load(); + + final RequestBody inputFileRequestBody = + RequestBody.create(inputFile, MediaType.parse("application/pdf")); + RequestBody requestBody = + new MultipartBody.Builder() + .setType(MultipartBody.FORM) + .addFormDataPart("file", inputFile.getName(), inputFileRequestBody) + .addFormDataPart("output", "pdfrest_pdf-with-ocr-text") + .build(); + Request request = + new Request.Builder() + .header("Api-Key", dotenv.get("PDFREST_API_KEY", DEFAULT_API_KEY)) + .url("https://api.pdfrest.com/pdf-with-ocr-text") + .post(requestBody) + .build(); + try { + OkHttpClient client = + new OkHttpClient().newBuilder().readTimeout(60, TimeUnit.SECONDS).build(); + Response response = client.newCall(request).execute(); + System.out.println("Result code " + response.code()); + if (response.body() != null) { + System.out.println(prettyJson(response.body().string())); + } + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + private static String prettyJson(String json) { + // https://stackoverflow.com/a/9583835/11996393 + return new JSONObject(json).toString(4); + } +} diff --git a/JavaScript/Endpoint Examples/JSON Payload/pdf-with-ocr-text.js b/JavaScript/Endpoint Examples/JSON Payload/pdf-with-ocr-text.js new file mode 100644 index 0000000..1572f2d --- /dev/null +++ b/JavaScript/Endpoint Examples/JSON Payload/pdf-with-ocr-text.js @@ -0,0 +1,47 @@ +var axios = require("axios"); +var FormData = require("form-data"); +var fs = require("fs"); + +var upload_data = fs.createReadStream("/path/to/file"); + +var upload_config = { + method: "post", + maxBodyLength: Infinity, + url: "https://api.pdfrest.com/upload", + headers: { + "Api-Key": "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx", // Replace with your API key + "Content-Filename": "filename.pdf", + "Content-Type": "application/octet-stream", + }, + data: upload_data, // set the data to be sent with the request +}; + +// send request and handle response or error +axios(upload_config) + .then(function (upload_response) { + console.log(JSON.stringify(upload_response.data)); + var uploaded_id = upload_response.data.files[0].id; + + var pdf_with_ocr_text_config = { + method: "post", + maxBodyLength: Infinity, + url: "https://api.pdfrest.com/pdf-with-ocr-text", + headers: { + "Api-Key": "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx", // Replace with your API key + "Content-Type": "application/json", + }, + data: { id: uploaded_id }, // set the data to be sent with the request + }; + + // send request and handle response or error + axios(pdf_with_ocr_text_config) + .then(function (pdf_with_ocr_text_response) { + console.log(JSON.stringify(pdf_with_ocr_text_response.data)); + }) + .catch(function (error) { + console.log(error); + }); + }) + .catch(function (error) { + console.log(error); + }); diff --git a/JavaScript/Endpoint Examples/Multipart Payload/pdf-with-ocr-text.js b/JavaScript/Endpoint Examples/Multipart Payload/pdf-with-ocr-text.js new file mode 100644 index 0000000..771c9cb --- /dev/null +++ b/JavaScript/Endpoint Examples/Multipart Payload/pdf-with-ocr-text.js @@ -0,0 +1,32 @@ +// This request demonstrates how to apply OCR to a PDF document and insert text behind images of text. +var axios = require('axios'); +var FormData = require('form-data'); +var fs = require('fs'); + +// Create a new form data instance and append the PDF file and parameters to it +var data = new FormData(); +data.append('file', fs.createReadStream('/path/to/file')); +data.append('output', 'pdfrest_pdf-with-ocr-text'); + +// define configuration options for axios request +var config = { + method: 'post', + maxBodyLength: Infinity, // set maximum length of the request body + url: 'https://api.pdfrest.com/pdf-with-ocr-text', + headers: { + 'Api-Key': 'xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx', // Replace with your API key + ...data.getHeaders() // set headers for the request + }, + data : data // set the data to be sent with the request +}; + +// send request and handle response or error +axios(config) +.then(function (response) { + console.log(JSON.stringify(response.data)); +}) +.catch(function (error) { + console.log(error); +}); + +// If you would like to download the file instead of getting the JSON response, please see the 'get-resource-id-endpoint.js' sample. \ No newline at end of file diff --git a/PHP/Endpoint Examples/JSON Payload/pdf-with-ocr-text.php b/PHP/Endpoint Examples/JSON Payload/pdf-with-ocr-text.php new file mode 100644 index 0000000..971ee05 --- /dev/null +++ b/PHP/Endpoint Examples/JSON Payload/pdf-with-ocr-text.php @@ -0,0 +1,33 @@ + false]); +$upload_headers = [ + 'api-key' => 'xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx', + 'content-filename' => 'filename.pdf', + 'Content-Type' => 'application/octet-stream' +]; +$upload_body = file_get_contents('/path/to/file'); +$upload_request = new Request('POST', 'https://api.pdfrest.com/upload', $upload_headers, $upload_body); +$upload_res = $upload_client->sendAsync($upload_request)->wait(); +echo $upload_res->getBody() . PHP_EOL; + +$upload_response_json = json_decode($upload_res->getBody()); + +$uploaded_id = $upload_response_json->{'files'}[0]->{'id'}; + +echo "Successfully uploaded with an id of: " . $uploaded_id . PHP_EOL; + +$pdf_with_ocr_text_client = new Client(['http_errors' => false]); +$pdf_with_ocr_text_headers = [ + 'api-key' => 'xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx', + 'Content-Type' => 'application/json' +]; +$pdf_with_ocr_text_body = '{"id":"'.$uploaded_id.'"}'; +$pdf_with_ocr_text_request = new Request('POST', 'https://api.pdfrest.com/pdf-with-ocr-text', $pdf_with_ocr_text_headers, $pdf_with_ocr_text_body); +$pdf_with_ocr_text_res = $pdf_with_ocr_text_client->sendAsync($pdf_with_ocr_text_request)->wait(); +echo $pdf_with_ocr_text_res->getBody() . PHP_EOL; diff --git a/PHP/Endpoint Examples/Multipart Payload/pdf-with-ocr-text.php b/PHP/Endpoint Examples/Multipart Payload/pdf-with-ocr-text.php new file mode 100644 index 0000000..76073f2 --- /dev/null +++ b/PHP/Endpoint Examples/Multipart Payload/pdf-with-ocr-text.php @@ -0,0 +1,35 @@ + 'xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx' // Set the API key in the headers for authentication. +]; + +$options = [ + 'multipart' => [ + [ + 'name' => 'file', // Specify the field name for the file. + 'contents' => Utils::tryFopen('/path/to/file', 'r'), // Open the file specified by the '/path/to/file' for reading. + 'filename' => '/path/to/file', // Set the filename for the file to be processed, in this case, '/path/to/file'. + 'headers' => [ + 'Content-Type' => '' // Set the Content-Type header for the file. + ] + ], + [ + 'name' => 'output', // Specify the field name for the output option. + 'contents' => 'pdfrest_pdf-with-ocr-text' // Set the value for the output option (in this case, 'pdfrest_pdf-with-ocr-text'). + ] + ] +]; + +$request = new Request('POST', 'https://api.pdfrest.com/pdf-with-ocr-text', $headers); // Create a new HTTP POST request with the API endpoint and headers. + +$res = $client->sendAsync($request, $options)->wait(); // Send the asynchronous request and wait for the response. + +echo $res->getBody(); // Output the response body, which contains the document with text from OCR added. diff --git a/Python/Endpoint Examples/JSON Payload/pdf-with-ocr-text.py b/Python/Endpoint Examples/JSON Payload/pdf-with-ocr-text.py new file mode 100644 index 0000000..cb48f00 --- /dev/null +++ b/Python/Endpoint Examples/JSON Payload/pdf-with-ocr-text.py @@ -0,0 +1,39 @@ +import requests +import json + +with open('/path/to/file', 'rb') as f: + upload_data = f.read() + +print("Uploading file...") +upload_response = requests.post(url='https://api.pdfrest.com/upload', + data=upload_data, + headers={'Content-Type': 'application/octet-stream', 'Content-Filename': 'file.pdf', "API-Key": "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"}) + +print("Upload response status code: " + str(upload_response.status_code)) + +if upload_response.ok: + upload_response_json = upload_response.json() + print(json.dumps(upload_response_json, indent = 2)) + + + uploaded_id = upload_response_json['files'][0]['id'] + pdf_with_ocr_text_data = { "id" : uploaded_id } + print(json.dumps(pdf_with_ocr_text_data, indent = 2)) + + + print("Processing file...") + pdf_with_ocr_text_response = requests.post(url='https://api.pdfrest.com/pdf-with-ocr-text', + data=json.dumps(pdf_with_ocr_text_data), + headers={'Content-Type': 'application/json', "API-Key": "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"}) + + + + print("Processing response status code: " + str(pdf_with_ocr_text_response.status_code)) + if pdf_with_ocr_text_response.ok: + pdf_with_ocr_text_response_json = pdf_with_ocr_text_response.json() + print(json.dumps(pdf_with_ocr_text_response_json, indent = 2)) + + else: + print(pdf_with_ocr_text_response.text) +else: + print(upload_response.text) diff --git a/Python/Endpoint Examples/Multipart Payload/pdf-with-ocr-text.py b/Python/Endpoint Examples/Multipart Payload/pdf-with-ocr-text.py new file mode 100644 index 0000000..d51cda9 --- /dev/null +++ b/Python/Endpoint Examples/Multipart Payload/pdf-with-ocr-text.py @@ -0,0 +1,35 @@ +from requests_toolbelt import MultipartEncoder +import requests +import json + +pdf_with_ocr_text_endpoint_url = 'https://api.pdfrest.com/pdf-with-ocr-text' + +# The /pdf-with-ocr-text endpoint can take a single PDF file or id as input. +# This sample demonstrates a request to add text to a document by using OCR on images of text. +mp_encoder_pdf_with_ocr_text = MultipartEncoder( + fields={ + 'file': ('file_name', open('/path/to/file', 'rb'), 'application/pdf'), + 'output' : 'example_pdf-with-ocr-text_out', + } +) + +# Let's set the headers that the pdf-with-ocr-text endpoint expects. +# Since MultipartEncoder is used, the 'Content-Type' header gets set to 'multipart/form-data' via the content_type attribute below. +headers = { + 'Accept': 'application/json', + 'Content-Type': mp_encoder_pdf_with_ocr_text.content_type, + 'Api-Key': 'xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx' # place your api key here +} + +print("Sending POST request to pdf-with-ocr-text endpoint...") +response = requests.post(pdf_with_ocr_text_endpoint_url, data=mp_encoder_pdf_with_ocr_text, headers=headers) + +print("Response status code: " + str(response.status_code)) + +if response.ok: + response_json = response.json() + print(json.dumps(response_json, indent = 2)) +else: + print(response.text) + +# If you would like to download the file instead of getting the JSON response, please see the 'get-resource-id-endpoint.py' sample. diff --git a/README.md b/README.md index 8260322..37395b8 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,8 @@ pdfRest is a REST API Toolkit for developers with all of the API Tools you'll ne | **[Watermark PDF](https://pdfrest.com/apitools/watermark-pdf/)** | **[Flatten Transparencies](https://pdfrest.com/apitools/flatten-transparencies/)** | **[Flatten Annotations](https://pdfrest.com/apitools/flatten-annotations/)** | **[Flatten Layers](https://pdfrest.com/apitools/flatten-layers/)** | **[Query PDF](https://pdfrest.com/apitools/query-pdf/)** | **[Linearize PDF](https://pdfrest.com/apitools/linearize-pdf/)** | **[Upload Files](https://pdfrest.com/apitools/upload-files/)** | **[Zip Files](https://pdfrest.com/apitools/zip-files/)** | **[Flatten Forms](https://pdfrest.com/apitools/flatten-forms/)** | **[Import Form Data](https://pdfrest.com/apitools/import-form-data/)** | **[Export Form Data](https://pdfrest.com/apitools/export-form-data/)** | **[Extract Text](https://pdfrest.com/apitools/extract-text/)** | -| **[PDF to Word](https://pdfrest.com/apitools/pdf-to-word/)** | **[PDF to Excel](https://pdfrest.com/apitools/pdf-to-excel/)** | **[PDF to PowerPoint](https://pdfrest.com/apitools/pdf-to-powerpoint/)** | **[Convert PDF Colors](https://pdfrest.com/apitools/convert-pdf-colors/)** | +| **[PDF to Word](https://pdfrest.com/apitools/pdf-to-word/)** | **[PDF to Excel](https://pdfrest.com/apitools/pdf-to-excel/)** | **[PDF to PowerPoint](https://pdfrest.com/apitools/pdf-to-powerpoint/)** | **[Convert PDF Colors](https://pdfrest.com/apitools/convert-pdf-colors/)** | +| **[OCR to PDF](https://pdfrest.com/apitools/ocr-pdf/)** | | | |
diff --git a/cURL/Endpoint Examples/JSON Payload/pdf-with-ocr-text.sh b/cURL/Endpoint Examples/JSON Payload/pdf-with-ocr-text.sh new file mode 100644 index 0000000..7291739 --- /dev/null +++ b/cURL/Endpoint Examples/JSON Payload/pdf-with-ocr-text.sh @@ -0,0 +1,14 @@ +#!/bin/sh + +UPLOAD_ID=$(curl --location 'https://api.pdfrest.com/upload' \ +--header 'Api-Key: xxxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx' \ +--header 'content-filename: filename.pdf' \ +--data-binary '@/path/to/file' \ + | jq -r '.files.[0].id') + +echo "File successfully uploaded with an ID of: $UPLOAD_ID" + +curl 'https://api.pdfrest.com/pdf-with-ocr-text' \ +--header 'Api-Key: xxxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx' \ +--header 'Content-Type: application/json' \ +--data-raw "{ \"id\": \"$UPLOAD_ID\"}" | jq -r '.' diff --git a/cURL/Endpoint Examples/Multipart Payload/pdf-with-ocr-text.sh b/cURL/Endpoint Examples/Multipart Payload/pdf-with-ocr-text.sh new file mode 100644 index 0000000..1f470d3 --- /dev/null +++ b/cURL/Endpoint Examples/Multipart Payload/pdf-with-ocr-text.sh @@ -0,0 +1,6 @@ +curl -X POST "https://api.pdfrest.com/pdf-with-ocr-text" \ + -H "Accept: application/json" \ + -H "Content-Type: multipart/form-data" \ + -H "Api-Key: xxxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" \ + -F "file=@/path/to/file" \ + -F "output=example_out"