Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[US-777]Example Code for Search and Replace Usages #263

Merged
merged 5 commits into from
Dec 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ require (
github.com/unidoc/globalsign-dss v0.0.0-20220330092912-b69d85b63736
github.com/unidoc/pkcs7 v0.2.0
github.com/unidoc/unichart v0.3.0
github.com/unidoc/unipdf/v3 v3.62.0
github.com/unidoc/unipdf/v3 v3.65.0
golang.org/x/crypto v0.31.0
golang.org/x/image v0.18.0
golang.org/x/text v0.21.0
Expand Down Expand Up @@ -50,7 +50,7 @@ require (
github.com/unidoc/timestamp v0.0.0-20200412005513-91597fd3793a // indirect
github.com/unidoc/unitype v0.4.0 // indirect
go.opencensus.io v0.24.0 // indirect
golang.org/x/net v0.24.0 // indirect
golang.org/x/net v0.33.0 // indirect
golang.org/x/oauth2 v0.7.0 // indirect
golang.org/x/sys v0.28.0 // indirect
golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 // indirect
Expand Down
8 changes: 4 additions & 4 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -128,8 +128,8 @@ github.com/unidoc/timestamp v0.0.0-20200412005513-91597fd3793a h1:RLtvUhe4DsUDl6
github.com/unidoc/timestamp v0.0.0-20200412005513-91597fd3793a/go.mod h1:j+qMWZVpZFTvDey3zxUkSgPJZEX33tDgU/QIA0IzCUw=
github.com/unidoc/unichart v0.3.0 h1:VX1j5yzhjrR3f2flC03Yat6/WF3h7Z+DLEvJLoTGhoc=
github.com/unidoc/unichart v0.3.0/go.mod h1:8JnLNKSOl8yQt1jXewNgYFHhFm5M6/ZiaydncFDpakA=
github.com/unidoc/unipdf/v3 v3.62.0 h1:CVsxq6k1SSIrprotlFvq6iBhA+5745dWaApB0LKtGcc=
github.com/unidoc/unipdf/v3 v3.62.0/go.mod h1:0OIzSHHno23Y8WzaK+852abK8d3AxUZ1GQkMqpyCzu8=
github.com/unidoc/unipdf/v3 v3.65.0 h1:ye3PP9JuUJnQd4BqRR4wJO/EN9EpHRGusMOruDjCS74=
github.com/unidoc/unipdf/v3 v3.65.0/go.mod h1:tTbloOTKtGGi6z5doJshesDpQYktePhR+7r+WGCkooU=
github.com/unidoc/unitype v0.4.0 h1:/TMZ3wgwfWWX64mU5x2O9no9UmoBqYCB089LYYqHyQQ=
github.com/unidoc/unitype v0.4.0/go.mod h1:HV5zuUeqMKA4QgYQq3KDlJY/P96XF90BQB+6czK6LVA=
go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0=
Expand All @@ -153,8 +153,8 @@ golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn
golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks=
golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
golang.org/x/net v0.0.0-20220127200216-cd36cc0744dd/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk=
golang.org/x/net v0.24.0 h1:1PcaxkF854Fu3+lvBIx5SYn9wRlBzzcnHZSiaFFAb0w=
golang.org/x/net v0.24.0/go.mod h1:2Q7sJY5mzlzWjKtYUEXSlBWCdyaioyXzRB2RtU8KVE8=
golang.org/x/net v0.33.0 h1:74SYHlV8BIgHIFC/LrYkOGIwL19eTYXQ5wc6TBuO36I=
golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.7.0 h1:qe6s0zUXlPX80/dITx3440hWZ7GwMwgDDyrSGTPJG/g=
golang.org/x/oauth2 v0.7.0/go.mod h1:hPLQkd9LyjfXTiRohC/41GhcFqxisoUQ99sCUOHO9x4=
Expand Down
8 changes: 8 additions & 0 deletions search-and-replace/READ.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# PDF Text Search and Replace

This example shows how to do text search and replacement on PDF using unipdf.

## Examples
- [search_text.go](search_text.go) This examples shows how to do text searching using unipdf's by providing the pattern string and the pages to search on.
- [replace_text.go](replace_text.go) This example show how to replace a given text by searching for it using a pattern and a replacement string.
A list of pages is also provided in the parameter to specify which to do the replacement.
76 changes: 76 additions & 0 deletions search-and-replace/replace_text.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
/*
* This example code shows how to do search and replace operation in PDF using unipdf
*
* Run as: go run replace_text.go <pattern> <replacement> <pages> <input> <output>
*
* example: go run replace_text.go "Australia" "America" "1,2" ./test-data/file1.pdf ./test-data/result.pdf
*/
package main

import (
"fmt"
"os"
"strconv"
"strings"

"github.com/unidoc/unipdf/v3/common/license"
"github.com/unidoc/unipdf/v3/extractor"
"github.com/unidoc/unipdf/v3/model"
)

func init() {
// Make sure to load your metered License API key prior to using the library.
// If you need a key, you can sign up and create a free one at https://cloud.unidoc.io
err := license.SetMeteredKey(os.Getenv(`UNIDOC_LICENSE_API_KEY`))
if err != nil {
panic(err)
}
}

func main() {
// Ensure enough arguments are provided
if len(os.Args) < 5 {
fmt.Println("Usage: go run replace_text.go <pattern> <replacement> <pages> <input> <output>")
os.Exit(1)
}

// Parse positional arguments
pattern := os.Args[1]
replacement := os.Args[2]
pagesArg := os.Args[3]
filePath := os.Args[4]
outputPath := os.Args[5]

// Convert pages string to a slice of integers
pageStrings := strings.Split(pagesArg, ",")
pageList := []int{}
for _, pageStr := range pageStrings {
page, err := strconv.Atoi(pageStr)
if err != nil {
fmt.Printf("Invalid page number: %s\n", pageStr)
os.Exit(1)
}
pageList = append(pageList, page)
}

reader, _, err := model.NewPdfReaderFromFile(filePath, nil)
if err != nil {
fmt.Printf("Failed to create PDF reader: %v", err)
os.Exit(1)
}
editor := extractor.NewEditor(reader)

err = editor.Replace(pattern, replacement, pageList)
if err != nil {
fmt.Printf("Failed to search pattern: %v\n", err)
os.Exit(1)
}

err = editor.WriteToFile(outputPath)
if err != nil {
fmt.Printf("Failed to write to file: %v", err)
os.Exit(1)
}

fmt.Printf("Finished replacing %s by %s and saved the output file at %s\n", pattern, replacement, filePath)
}
112 changes: 112 additions & 0 deletions search-and-replace/search_text.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
/*
* This example code shows how to do text searching on pdf using unipdf
*
* Run as: go run search_text.go <pattern> <pages> <input>
*
* Example: go run search_text.go "copyright law" "1,2" ./test-data/file1.pdf
*/

package main

import (
"fmt"
"os"
"strconv"
"strings"

"github.com/unidoc/unipdf/v3/common/license"
"github.com/unidoc/unipdf/v3/extractor"
"github.com/unidoc/unipdf/v3/model"
)

func init() {
// Make sure to load your metered License API key prior to using the library.
// If you need a key, you can sign up and create a free one at https://cloud.unidoc.io
err := license.SetMeteredKey(os.Getenv(`UNIDOC_LICENSE_API_KEY`))
if err != nil {
panic(err)
}
}

func main() {
// Ensure enough arguments are provided
if len(os.Args) < 4 {
fmt.Println("Usage: go run main.go <pattern> <pages> <input>")
os.Exit(1)
}

// Parse positional arguments
pattern := os.Args[1]
pagesArg := os.Args[2]
filePath := os.Args[3]

// Convert pages string to a slice of integers
pageStrings := strings.Split(pagesArg, ",")
pageList := []int{}
for _, pageStr := range pageStrings {
page, err := strconv.Atoi(pageStr)
if err != nil {
fmt.Printf("Invalid page number: %s\n", pageStr)
os.Exit(1)
}
pageList = append(pageList, page)
}

// Create a new PDF reader
reader, _, err := model.NewPdfReaderFromFile(filePath, nil)
if err != nil {
fmt.Printf("Failed to create PDF reader: %v\n", err)
os.Exit(1)
}

// Create an Editor object for searching
editor := extractor.NewEditor(reader)

// Perform the search for the specified pattern on the given pages
matchesPerPage, err := editor.Search(pattern, pageList)
if err != nil {
fmt.Printf("Failed to search pattern: %v\n", err)
os.Exit(1)
}

// Print formatted search results
printSearchResults(matchesPerPage, pageList, pattern)
}

// printSearchResults formats and prints the search results.
// It displays indexes as [beg:end] and locations as {Llx Lly Urx Ury}.
// If no matches are found for a page, it prints a not found message.
func printSearchResults(matchesPerPage map[int]extractor.Match, pages []int, pattern string) {
foundAny := false // Flag to check if any match is found across all pages

for _, page := range pages {
result, exists := matchesPerPage[page]
if exists && len(result.Indexes) > 0 {
foundAny = true
fmt.Printf("Page %d:\n", page)

// Prepare index strings
var indexStrings []string
for _, idx := range result.Indexes {
indexStrings = append(indexStrings, fmt.Sprintf("[%d:%d]", idx[0], idx[1]))
}
fmt.Printf("indexes: %s\n", strings.Join(indexStrings, ", "))

// Prepare location strings
var locationStrings []string
for _, box := range result.Locations {
locationStrings = append(locationStrings, fmt.Sprintf("{%.2f %.2f %.2f %.2f}", box.BBox.Llx, box.BBox.Lly, box.BBox.Urx, box.BBox.Ury))
}
fmt.Printf("locations: %s\n\n", strings.Join(locationStrings, ", "))
} else {
// If no matches found for the current page
fmt.Printf("Page %d:\n", page)
fmt.Println("pattern didn't match any text\n")
}
}

if !foundAny {
// If no matches found in any of the pages
fmt.Println("pattern didn't match any text in the specified pages.")
}
}
Binary file added search-and-replace/test-data/file1.pdf
Binary file not shown.
Binary file added search-and-replace/test-data/result.pdf
Binary file not shown.
Loading