diff --git a/go.mod b/go.mod index 0b62c8d0..96aea4b9 100644 --- a/go.mod +++ b/go.mod @@ -13,7 +13,7 @@ require ( github.com/unidoc/globalsign-dss v0.0.0-20220330092912-b69d85b63736 github.com/unidoc/pkcs7 v0.2.0 github.com/unidoc/unichart v0.3.0 - github.com/unidoc/unipdf/v3 v3.62.0 + github.com/unidoc/unipdf/v3 v3.65.0 golang.org/x/crypto v0.31.0 golang.org/x/image v0.18.0 golang.org/x/text v0.21.0 @@ -50,7 +50,7 @@ require ( github.com/unidoc/timestamp v0.0.0-20200412005513-91597fd3793a // indirect github.com/unidoc/unitype v0.4.0 // indirect go.opencensus.io v0.24.0 // indirect - golang.org/x/net v0.24.0 // indirect + golang.org/x/net v0.33.0 // indirect golang.org/x/oauth2 v0.7.0 // indirect golang.org/x/sys v0.28.0 // indirect golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 // indirect diff --git a/go.sum b/go.sum index d4f465c8..be365d03 100644 --- a/go.sum +++ b/go.sum @@ -128,8 +128,8 @@ github.com/unidoc/timestamp v0.0.0-20200412005513-91597fd3793a h1:RLtvUhe4DsUDl6 github.com/unidoc/timestamp v0.0.0-20200412005513-91597fd3793a/go.mod h1:j+qMWZVpZFTvDey3zxUkSgPJZEX33tDgU/QIA0IzCUw= github.com/unidoc/unichart v0.3.0 h1:VX1j5yzhjrR3f2flC03Yat6/WF3h7Z+DLEvJLoTGhoc= github.com/unidoc/unichart v0.3.0/go.mod h1:8JnLNKSOl8yQt1jXewNgYFHhFm5M6/ZiaydncFDpakA= -github.com/unidoc/unipdf/v3 v3.62.0 h1:CVsxq6k1SSIrprotlFvq6iBhA+5745dWaApB0LKtGcc= -github.com/unidoc/unipdf/v3 v3.62.0/go.mod h1:0OIzSHHno23Y8WzaK+852abK8d3AxUZ1GQkMqpyCzu8= +github.com/unidoc/unipdf/v3 v3.65.0 h1:ye3PP9JuUJnQd4BqRR4wJO/EN9EpHRGusMOruDjCS74= +github.com/unidoc/unipdf/v3 v3.65.0/go.mod h1:tTbloOTKtGGi6z5doJshesDpQYktePhR+7r+WGCkooU= github.com/unidoc/unitype v0.4.0 h1:/TMZ3wgwfWWX64mU5x2O9no9UmoBqYCB089LYYqHyQQ= github.com/unidoc/unitype v0.4.0/go.mod h1:HV5zuUeqMKA4QgYQq3KDlJY/P96XF90BQB+6czK6LVA= go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0= @@ -153,8 +153,8 @@ golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20220127200216-cd36cc0744dd/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= -golang.org/x/net v0.24.0 h1:1PcaxkF854Fu3+lvBIx5SYn9wRlBzzcnHZSiaFFAb0w= -golang.org/x/net v0.24.0/go.mod h1:2Q7sJY5mzlzWjKtYUEXSlBWCdyaioyXzRB2RtU8KVE8= +golang.org/x/net v0.33.0 h1:74SYHlV8BIgHIFC/LrYkOGIwL19eTYXQ5wc6TBuO36I= +golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.7.0 h1:qe6s0zUXlPX80/dITx3440hWZ7GwMwgDDyrSGTPJG/g= golang.org/x/oauth2 v0.7.0/go.mod h1:hPLQkd9LyjfXTiRohC/41GhcFqxisoUQ99sCUOHO9x4= diff --git a/search-and-replace/READ.md b/search-and-replace/READ.md new file mode 100644 index 00000000..ad422a59 --- /dev/null +++ b/search-and-replace/READ.md @@ -0,0 +1,8 @@ +# PDF Text Search and Replace + +This example shows how to do text search and replacement on PDF using unipdf. + +## Examples +- [search_text.go](search_text.go) This examples shows how to do text searching using unipdf's by providing the pattern string and the pages to search on. +- [replace_text.go](replace_text.go) This example show how to replace a given text by searching for it using a pattern and a replacement string. +A list of pages is also provided in the parameter to specify which to do the replacement. \ No newline at end of file diff --git a/search-and-replace/replace_text.go b/search-and-replace/replace_text.go new file mode 100644 index 00000000..317b8a46 --- /dev/null +++ b/search-and-replace/replace_text.go @@ -0,0 +1,76 @@ +/* + * This example code shows how to do search and replace operation in PDF using unipdf + * + * Run as: go run replace_text.go + * + * example: go run replace_text.go "Australia" "America" "1,2" ./test-data/file1.pdf ./test-data/result.pdf + */ +package main + +import ( + "fmt" + "os" + "strconv" + "strings" + + "github.com/unidoc/unipdf/v3/common/license" + "github.com/unidoc/unipdf/v3/extractor" + "github.com/unidoc/unipdf/v3/model" +) + +func init() { + // Make sure to load your metered License API key prior to using the library. + // If you need a key, you can sign up and create a free one at https://cloud.unidoc.io + err := license.SetMeteredKey(os.Getenv(`UNIDOC_LICENSE_API_KEY`)) + if err != nil { + panic(err) + } +} + +func main() { + // Ensure enough arguments are provided + if len(os.Args) < 5 { + fmt.Println("Usage: go run replace_text.go ") + os.Exit(1) + } + + // Parse positional arguments + pattern := os.Args[1] + replacement := os.Args[2] + pagesArg := os.Args[3] + filePath := os.Args[4] + outputPath := os.Args[5] + + // Convert pages string to a slice of integers + pageStrings := strings.Split(pagesArg, ",") + pageList := []int{} + for _, pageStr := range pageStrings { + page, err := strconv.Atoi(pageStr) + if err != nil { + fmt.Printf("Invalid page number: %s\n", pageStr) + os.Exit(1) + } + pageList = append(pageList, page) + } + + reader, _, err := model.NewPdfReaderFromFile(filePath, nil) + if err != nil { + fmt.Printf("Failed to create PDF reader: %v", err) + os.Exit(1) + } + editor := extractor.NewEditor(reader) + + err = editor.Replace(pattern, replacement, pageList) + if err != nil { + fmt.Printf("Failed to search pattern: %v\n", err) + os.Exit(1) + } + + err = editor.WriteToFile(outputPath) + if err != nil { + fmt.Printf("Failed to write to file: %v", err) + os.Exit(1) + } + + fmt.Printf("Finished replacing %s by %s and saved the output file at %s\n", pattern, replacement, filePath) +} diff --git a/search-and-replace/search_text.go b/search-and-replace/search_text.go new file mode 100644 index 00000000..3331c13f --- /dev/null +++ b/search-and-replace/search_text.go @@ -0,0 +1,112 @@ +/* + * This example code shows how to do text searching on pdf using unipdf + * + * Run as: go run search_text.go + * + * Example: go run search_text.go "copyright law" "1,2" ./test-data/file1.pdf + */ + +package main + +import ( + "fmt" + "os" + "strconv" + "strings" + + "github.com/unidoc/unipdf/v3/common/license" + "github.com/unidoc/unipdf/v3/extractor" + "github.com/unidoc/unipdf/v3/model" +) + +func init() { + // Make sure to load your metered License API key prior to using the library. + // If you need a key, you can sign up and create a free one at https://cloud.unidoc.io + err := license.SetMeteredKey(os.Getenv(`UNIDOC_LICENSE_API_KEY`)) + if err != nil { + panic(err) + } +} + +func main() { + // Ensure enough arguments are provided + if len(os.Args) < 4 { + fmt.Println("Usage: go run main.go ") + os.Exit(1) + } + + // Parse positional arguments + pattern := os.Args[1] + pagesArg := os.Args[2] + filePath := os.Args[3] + + // Convert pages string to a slice of integers + pageStrings := strings.Split(pagesArg, ",") + pageList := []int{} + for _, pageStr := range pageStrings { + page, err := strconv.Atoi(pageStr) + if err != nil { + fmt.Printf("Invalid page number: %s\n", pageStr) + os.Exit(1) + } + pageList = append(pageList, page) + } + + // Create a new PDF reader + reader, _, err := model.NewPdfReaderFromFile(filePath, nil) + if err != nil { + fmt.Printf("Failed to create PDF reader: %v\n", err) + os.Exit(1) + } + + // Create an Editor object for searching + editor := extractor.NewEditor(reader) + + // Perform the search for the specified pattern on the given pages + matchesPerPage, err := editor.Search(pattern, pageList) + if err != nil { + fmt.Printf("Failed to search pattern: %v\n", err) + os.Exit(1) + } + + // Print formatted search results + printSearchResults(matchesPerPage, pageList, pattern) +} + +// printSearchResults formats and prints the search results. +// It displays indexes as [beg:end] and locations as {Llx Lly Urx Ury}. +// If no matches are found for a page, it prints a not found message. +func printSearchResults(matchesPerPage map[int]extractor.Match, pages []int, pattern string) { + foundAny := false // Flag to check if any match is found across all pages + + for _, page := range pages { + result, exists := matchesPerPage[page] + if exists && len(result.Indexes) > 0 { + foundAny = true + fmt.Printf("Page %d:\n", page) + + // Prepare index strings + var indexStrings []string + for _, idx := range result.Indexes { + indexStrings = append(indexStrings, fmt.Sprintf("[%d:%d]", idx[0], idx[1])) + } + fmt.Printf("indexes: %s\n", strings.Join(indexStrings, ", ")) + + // Prepare location strings + var locationStrings []string + for _, box := range result.Locations { + locationStrings = append(locationStrings, fmt.Sprintf("{%.2f %.2f %.2f %.2f}", box.BBox.Llx, box.BBox.Lly, box.BBox.Urx, box.BBox.Ury)) + } + fmt.Printf("locations: %s\n\n", strings.Join(locationStrings, ", ")) + } else { + // If no matches found for the current page + fmt.Printf("Page %d:\n", page) + fmt.Println("pattern didn't match any text\n") + } + } + + if !foundAny { + // If no matches found in any of the pages + fmt.Println("pattern didn't match any text in the specified pages.") + } +} diff --git a/search-and-replace/test-data/file1.pdf b/search-and-replace/test-data/file1.pdf new file mode 100644 index 00000000..29adba44 Binary files /dev/null and b/search-and-replace/test-data/file1.pdf differ diff --git a/search-and-replace/test-data/result.pdf b/search-and-replace/test-data/result.pdf new file mode 100644 index 00000000..3280c4de Binary files /dev/null and b/search-and-replace/test-data/result.pdf differ