diff --git a/.github/workflows/test-dotnet-samples.yml b/.github/workflows/test-dotnet-samples.yml new file mode 100644 index 0000000..81b6aa8 --- /dev/null +++ b/.github/workflows/test-dotnet-samples.yml @@ -0,0 +1,190 @@ +name: test-samples + +on: + pull_request: + push: + branches: [ develop, main ] + +env: + DOTNET_VERSION: '6.x' + +jobs: + run-samples: + runs-on: ${{ matrix.os }} + defaults: + run: + shell: bash + strategy: + fail-fast: false + matrix: + os: [windows-latest, ubuntu-latest, macos-14] + dir: [ + 'Annotations/Annotations/', + 'Annotations/InkAnnotations/', + 'Annotations/LinkAnnotation/', + 'Annotations/PolygonAnnotations/', + 'Annotations/PolyLineAnnotations/', + 'ContentCreation/AddElements/', + 'ContentCreation/AddHeaderFooter/', + 'ContentCreation/Clips/', + 'ContentCreation/CreateBookmarks/', + 'ContentCreation/GradientShade/', + 'ContentCreation/MakeDocWithCalGrayColorSpace/', + 'ContentCreation/MakeDocWithCalRGBColorSpace/', + 'ContentCreation/MakeDocWithDeviceNColorSpace/', + 'ContentCreation/MakeDocWithICCBasedColorSpace/', + 'ContentCreation/MakeDocWithIndexedColorSpace/', + 'ContentCreation/MakeDocWithLabColorSpace/', + 'ContentCreation/MakeDocWithSeparationColorSpace/', + 'ContentCreation/NameTrees/', + 'ContentCreation/NumberTrees/', + 'ContentCreation/RemoteGoToActions/', + 'ContentCreation/WriteNChannelTiff/', + 'ContentModification/Action/', + 'ContentModification/AddCollection/', + 'ContentModification/ChangeLayerConfiguration/', + 'ContentModification/ChangeLinkColors/', + 'ContentModification/CreateLayer/', + 'ContentModification/ExtendedGraphicStates/', + 'ContentModification/FlattenTransparency/', + 'ContentModification/LaunchActions/', + 'ContentModification/MergePDF/', + 'ContentModification/PageLabels/', + 'ContentModification/PDFObject/', + 'ContentModification/UnderlinesAndHighlights/', + 'ContentModification/Watermark/', + 'DocumentConversion/ColorConvertDocument/', + 'DocumentConversion/ConvertToOffice/', + 'DocumentConversion/CreateDocFromXPS/', + 'DocumentConversion/Factur-XConverter/', + 'DocumentConversion/PDFAConverter/', + 'DocumentConversion/PDFXConverter/', + 'DocumentConversion/ZUGFeRDConverter/', + 'DocumentOptimization/PDFOptimize/', + 'Images/DocToImages/', + 'Images/DrawSeparations/', + 'Images/DrawToBitmap/', + 'Images/EPSSeparations/', + 'Images/GetSeparatedImages/', + 'Images/ImageEmbedICCProfile/', + 'Images/ImageExport/', + 'Images/ImageExtraction/', + 'Images/ImageFromStream/', + 'Images/ImageImport/', + 'Images/ImageResampling/', + 'Images/ImageSoftMask/', + 'Images/OutputPreview/', + 'Images/RasterizePage/', + 'InformationExtraction/ListBookmarks/', + 'InformationExtraction/ListInfo/', + 'InformationExtraction/ListLayers/', + 'InformationExtraction/ListPaths/', + 'InformationExtraction/Metadata/', + 'OpticalCharacterRecognition/AddTextToDocument/', + 'OpticalCharacterRecognition/AddTextToImage/', + 'Other/MemoryFileSystem/', + 'Other/StreamIO/', + 'Security/AddRegexRedaction/', + 'Security/Redactions/', + 'Text/AddGlyphs/', + 'Text/AddUnicodeText/', + 'Text/AddVerticalText/', + 'Text/ExtractAcroFormFieldData/', + 'Text/ExtractCJKTextByPatternMatch/', + 'Text/ExtractTextByPatternMatch/', + 'Text/ExtractTextByRegion/', + 'Text/ExtractTextFromAnnotations/', + 'Text/ExtractTextFromMultiRegions/', + 'Text/ExtractTextPreservingStyleAndPositionInfo/', + 'Text/ListWords/', + 'Text/RegexExtractText/', + 'Text/RegexTextSearch/', + 'Text/TextExtract/' + ] + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup .NET + uses: actions/setup-dotnet@v4 + with: + dotnet-version: ${{ env.DOTNET_VERSION }} + + - name: Setup Microsoft Core Fonts + working-directory: ${{ matrix.dir }} + run: | + sample_name=$(basename "$PWD") + if [ "${{ matrix.os }}" == 'ubuntu-latest' ]; then + case "$sample_name" in "AddHeaderFooter" | "AddElements" | "MakeDocWithCalGrayColorSpace" | "MakeDocWithCalRGBColorSpace" | "MakeDocWithDeviceNColorSpace" | "MakeDocWithICCBasedColorSpace" | "MakeDocWithIndexedColorSpace" | "MakeDocWithLabColorSpace" | "MakeDocWithSeparationColorSpace" | "ExtendedGraphicStates" | "AddGlyphs" | "AddUnicodeText") + echo 'ttf-mscorefonts-installer msttcorefonts/accepted-mscorefonts-eula select true' | sudo debconf-set-selections + sudo apt-get install ttf-mscorefonts-installer + ;; + esac + fi + + - name: Build samples + working-directory: ${{ matrix.dir }} + run: | + sample_name=$(basename "$PWD") + if { [ "$sample_name" != "ConvertToOffice" ] && [ "$sample_name" != "CreateDocFromXPS" ]; } || [ "${{ matrix.os }}" != 'macos-14' ]; then + dotnet build -c Release *.csproj + else + echo "Not available on this os" + fi + + - name: Run samples + working-directory: ${{matrix.dir}} + run: | + sample_name=$(basename "$PWD") + + if [ "$sample_name" == "Redactions" ] && [ "${{matrix.os}}" == 'ubuntu-latest' ]; then + echo "Not available on this os" + else + if { [ "$sample_name" != "ConvertToOffice" ] && [ "$sample_name" != "CreateDocFromXPS" ]; } || [ "${{matrix.os}}" != 'macos-14' ]; then + if [ "${{matrix.os}}" == 'windows-latest' ]; then + if [ "$sample_name" == "DocToImages" ]; then + bin/Release/net6.0/$sample_name.exe -format=png $HOME/.nuget/packages/adobe.pdf.library.sampleinput/1.0.0/build/Resources/Sample_Input/ducky.pdf + else + bin/Release/net6.0/$sample_name.exe + fi + else + if [ "$sample_name" == "DocToImages" ]; then + dotnet bin/Release/net6.0/$sample_name.dll -format=png $HOME/.nuget/packages/adobe.pdf.library.sampleinput/1.0.0/build/Resources/Sample_Input/ducky.pdf + else + dotnet bin/Release/net6.0/$sample_name.dll + fi + fi + else + echo "Not available on this os" + fi + fi + + - name: Set sample_name variable + id: set-sample-name + working-directory: ${{matrix.dir}} + run: echo "SAMPLE_NAME=$(basename "$PWD")" >> "$GITHUB_ENV" + + - name: Save artifacts + uses: actions/upload-artifact@v4 + with: + name: output-${{ runner.os }}-${{ env.SAMPLE_NAME }} + path: | + ${{matrix.dir}}*.docx + ${{matrix.dir}}*.xslx + ${{matrix.dir}}*.pptx + ${{matrix.dir}}*.pdf + ${{matrix.dir}}*.tif + ${{matrix.dir}}*.png + ${{matrix.dir}}*.jpg + ${{matrix.dir}}*.eps + ${{matrix.dir}}*.tiff + ${{matrix.dir}}*.bmp + ${{matrix.dir}}*.gif + ${{matrix.dir}}*.json + ${{matrix.dir}}*.txt + ${{matrix.dir}}*.csv + + - name: List files + run: | + ls ${{matrix.dir}} diff --git a/ContentCreation/AddHeaderFooter/AddHeaderFooter.cs b/ContentCreation/AddHeaderFooter/AddHeaderFooter.cs index da64e76..f090bce 100644 --- a/ContentCreation/AddHeaderFooter/AddHeaderFooter.cs +++ b/ContentCreation/AddHeaderFooter/AddHeaderFooter.cs @@ -29,7 +29,7 @@ static void Main(string[] args) using (Library lib = new Library()) { - String sOutput = "../AddHeaderFooter-out.pdf"; + String sOutput = "AddHeaderFooter-out.pdf"; Console.WriteLine("Output file: " + sOutput); diff --git a/ContentCreation/MakeDocWithCalRGBColorSpace/MakeDocwithCalRGBColorSpace.csproj b/ContentCreation/MakeDocWithCalRGBColorSpace/MakeDocWithCalRGBColorSpace.csproj similarity index 100% rename from ContentCreation/MakeDocWithCalRGBColorSpace/MakeDocwithCalRGBColorSpace.csproj rename to ContentCreation/MakeDocWithCalRGBColorSpace/MakeDocWithCalRGBColorSpace.csproj diff --git a/ContentModification/AddQRCode/AddQRCode.cs b/ContentModification/AddQRCode/AddQRCode.cs new file mode 100644 index 0000000..5cd16aa --- /dev/null +++ b/ContentModification/AddQRCode/AddQRCode.cs @@ -0,0 +1,40 @@ +using System; +using Datalogics.PDFL; + +/* + * + * This sample shows how to add a QR barcode to a PDF page + * + * Copyright (c) 2024, Datalogics, Inc. All rights reserved. + * + */ +namespace AddCollection +{ + class AddQRCode + { + static void Main(string[] args) + { + Console.WriteLine("AddQRCode Sample:"); + + using (Library lib = new Library()) + { + Console.WriteLine("Initialized the library."); + + String sInput = Library.ResourceDirectory + "Sample_Input/sample_links.pdf"; + String sOutput = "../AddQRCode-out.pdf"; + + if (args.Length > 0) + sInput = args[0]; + + using (Document doc = new Document(sInput)) + { + Page page = doc.GetPage(0); + + page.AddQRBarcode("Datalogics", 72.0, page.CropBox.Top - 1.5 * 72.0, 72.0, 72.0); + + doc.Save(SaveFlags.Full, sOutput); + } + } + } + } +} diff --git a/ContentModification/AddQRCode/AddQRCode.csproj b/ContentModification/AddQRCode/AddQRCode.csproj new file mode 100644 index 0000000..a452af9 --- /dev/null +++ b/ContentModification/AddQRCode/AddQRCode.csproj @@ -0,0 +1,14 @@ + + + + Exe + net6.0 + enable + enable + + + + + + + diff --git a/ContentModification/ChangeLinkColors/ChangeLinkColors.cs b/ContentModification/ChangeLinkColors/ChangeLinkColors.cs index 8b54aa4..0c73057 100644 --- a/ContentModification/ChangeLinkColors/ChangeLinkColors.cs +++ b/ContentModification/ChangeLinkColors/ChangeLinkColors.cs @@ -52,7 +52,7 @@ static void Main(string[] args) Annotation annot = page.GetAnnotation(i); if (annot is LinkAnnotation) { - linkAnnots.Add(annot as LinkAnnotation); + linkAnnots.Add((LinkAnnotation)annot); } } @@ -92,7 +92,7 @@ static void FindAndProcessText(Content content, List linkAnnots) else if (element is Text) { Console.WriteLine("Found a Text object."); - CheckCharactersInText(element as Text, linkAnnots); + CheckCharactersInText((Text)element, linkAnnots); } } } diff --git a/ContentModification/CreateLayer/CreateLayer.cs b/ContentModification/CreateLayer/CreateLayer.cs index 74de52e..0ade19e 100644 --- a/ContentModification/CreateLayer/CreateLayer.cs +++ b/ContentModification/CreateLayer/CreateLayer.cs @@ -43,34 +43,38 @@ static void Main(string[] args) Console.WriteLine("Opened a document."); Page pg = doc.GetPage(0); - Image img = (pg.Content.GetElement(0) as Image); - - // Containers, Forms and Annotations can be attached to an - // OptionalContentGroup; other content (like Image) can - // be made optional by placing it inside a Container - Container container = new Container(); - container.Content = new Content(); - container.Content.AddElement(img); - - // We replace the Image with the Container - // (which now holds the image) - pg.Content.RemoveElement(0); - pg.UpdateContent(); - - pg.Content.AddElement(container); - pg.UpdateContent(); - - // We create a new OptionalContentGroup and place it in the - // OptionalContentConfig.Order array - OptionalContentGroup ocg = CreateNewOptionalContentGroup(doc, "Rubber Ducky"); - - // Now we associate the Container with the OptionalContentGroup - // via an OptionalContentMembershipDict. Note that we MUST - // update the Page's content afterwards. - AssociateOCGWithContainer(doc, ocg, container); - pg.UpdateContent(); - - doc.Save(SaveFlags.Full, sOutput); + Element element = pg.Content.GetElement(0); + if (element is Image) + { + Image img = (Image)element; + + // Containers, Forms and Annotations can be attached to an + // OptionalContentGroup; other content (like Image) can + // be made optional by placing it inside a Container + Container container = new Container(); + container.Content = new Content(); + container.Content.AddElement(img); + + // We replace the Image with the Container + // (which now holds the image) + pg.Content.RemoveElement(0); + pg.UpdateContent(); + + pg.Content.AddElement(container); + pg.UpdateContent(); + + // We create a new OptionalContentGroup and place it in the + // OptionalContentConfig.Order array + OptionalContentGroup ocg = CreateNewOptionalContentGroup(doc, "Rubber Ducky"); + + // Now we associate the Container with the OptionalContentGroup + // via an OptionalContentMembershipDict. Note that we MUST + // update the Page's content afterwards. + AssociateOCGWithContainer(doc, ocg, container); + pg.UpdateContent(); + + doc.Save(SaveFlags.Full, sOutput); + } } } diff --git a/ContentModification/ExtendedGraphicStates/ExtendedGraphicStates.cs b/ContentModification/ExtendedGraphicStates/ExtendedGraphicStates.cs index f287ce7..da56cec 100644 --- a/ContentModification/ExtendedGraphicStates/ExtendedGraphicStates.cs +++ b/ContentModification/ExtendedGraphicStates/ExtendedGraphicStates.cs @@ -85,7 +85,7 @@ static void blendPage(Document doc, Image foregroundImage, Image backgroundImage m = m.Scale(12.0, 12.0); ExtendedGraphicState xgs = new ExtendedGraphicState(); - TextRun tr = null; + TextRun? tr = null; if (i == 0) { xgs.BlendMode = BlendMode.Normal; diff --git a/ContentModification/README.md b/ContentModification/README.md index 8b6c3ff..caeb696 100644 --- a/ContentModification/README.md +++ b/ContentModification/README.md @@ -4,6 +4,9 @@ Creates an action associated with a link annotation on a PDF page. ## ***AddCollection*** Adds a collection to a PDF document to turn that document into a PDF Portfolio. +## ***AddQRCode*** +Adds a QR barcode to the Page of a PDF document. + ## ***ChangeLayerConfiguration*** Sets the on/off states for Optional Content Groups (Layers) within a PDF document. diff --git a/DocumentOptimization/PDFOptimize/PDFOptimize.cs b/DocumentOptimization/PDFOptimize/PDFOptimize.cs index 4a56212..d60f4a6 100644 --- a/DocumentOptimization/PDFOptimize/PDFOptimize.cs +++ b/DocumentOptimization/PDFOptimize/PDFOptimize.cs @@ -29,7 +29,7 @@ static void Main(string[] args) Console.WriteLine("Initialized the library."); String sInput = Library.ResourceDirectory + "Sample_Input/sample.pdf"; ; - String sOutput = "../PDFOptimizer-out.pdf"; + String sOutput = "PDFOptimizer-out.pdf"; if (args.Length > 0) sInput = args[0]; diff --git a/Images/DocToImages/DocToImages.cs b/Images/DocToImages/DocToImages.cs index 09a7578..db1f2b5 100644 --- a/Images/DocToImages/DocToImages.cs +++ b/Images/DocToImages/DocToImages.cs @@ -31,7 +31,7 @@ public class DocToImagesOptions List PageList = new List(0); int evenoddpages; // 1 = all odd pages, 2 = all even pages. string outputfilename = ""; - string outputdirname = ""; + string? outputdirname = ""; SmoothFlags smoothingflags = SmoothFlags.None; bool reversegray; bool blackisone; @@ -214,12 +214,12 @@ public string getoutputfile() return (outputfilename); } - public void setoutputdir(string outputdir) + public void setoutputdir(string? outputdir) { outputdirname = outputdir; } - public string getoutputdir() + public string? getoutputdir() { return (outputdirname); } @@ -1071,7 +1071,7 @@ static void Main(string[] args) // ReSharper disable once UnusedVariable using (Library lib = new Library(options.getfontdirs())) { - Document pdfdocument = null; + Document? pdfdocument = null; int numpages = 0; try { @@ -1089,7 +1089,7 @@ static void Main(string[] args) * of the output filename and directory name. */ - string outputfilename; + string? outputfilename; if (options.getoutputfile() == "") { outputfilename = docpath; @@ -1169,7 +1169,7 @@ static void Main(string[] args) i++) // Get the images of the PDF pages to create an image collection. { Page docpage = pdfdocument.GetPage(pagelist[i]); - Rect PageRect = null; + Rect? PageRect = null; if (options.getpageregion().Equals("crop")) { PageRect = docpage.CropBox; diff --git a/Images/GetSeparatedImages/GetSeparatedImages.cs b/Images/GetSeparatedImages/GetSeparatedImages.cs index 741f413..23b1a49 100644 --- a/Images/GetSeparatedImages/GetSeparatedImages.cs +++ b/Images/GetSeparatedImages/GetSeparatedImages.cs @@ -6,7 +6,7 @@ * This sample demonstrates drawing a list of grayscale separations from a PDF file to multi-paged TIFF file. * * - * Copyright (c) 2007-2023, Datalogics, Inc. All rights reserved. + * Copyright (c) 2007-2024, Datalogics, Inc. All rights reserved. * */ @@ -49,6 +49,7 @@ static void Main(string[] args) } PageImageParams pip = new PageImageParams(); + pip.PageDrawFlags = DrawFlags.UseAnnotFaces; pip.HorizontalResolution = 300; pip.VerticalResolution = 300; diff --git a/Images/ImageExport/ImageExport.cs b/Images/ImageExport/ImageExport.cs index 0fdb2e5..27b8fe5 100644 --- a/Images/ImageExport/ImageExport.cs +++ b/Images/ImageExport/ImageExport.cs @@ -117,17 +117,17 @@ public void Export_Element_Images(Content content) else if (e is Container) { Console.WriteLine("Recursing through a Container"); - Export_Element_Images((e as Container).Content); + Export_Element_Images(((Container)e).Content); } else if (e is Group) { Console.WriteLine("Recursing through a Group"); - Export_Element_Images((e as Group).Content); + Export_Element_Images(((Group)e).Content); } else if (e is Form) { Console.WriteLine("Recursing through a Form"); - Export_Element_Images((e as Form).Content); + Export_Element_Images(((Form)e).Content); } i++; diff --git a/Images/ImageExtraction/ImageExtraction.cs b/Images/ImageExtraction/ImageExtraction.cs index 1e144bd..3685c0e 100644 --- a/Images/ImageExtraction/ImageExtraction.cs +++ b/Images/ImageExtraction/ImageExtraction.cs @@ -32,14 +32,14 @@ static void ExtractImages(Content content) Datalogics.PDFL.Image img = (Datalogics.PDFL.Image)e; using (SKBitmap sKBitmap = img.SKBitmap) { - using (FileStream f = File.OpenWrite("ImageExtraction-extract-out" + (next) + ".Png")) + using (FileStream f = File.OpenWrite("ImageExtraction-extract-out" + (next) + ".png")) sKBitmap.Encode(SKEncodedImageFormat.Png, 100).SaveTo(f); } Datalogics.PDFL.Image newimg = img.ChangeResolution(500); using (SKBitmap sKBitmap = newimg.SKBitmap) { - using (FileStream f = File.OpenWrite("ImageExtraction-extract-Resolution-500-out" + (next) + ".Png")) + using (FileStream f = File.OpenWrite("ImageExtraction-extract-Resolution-500-out" + (next) + ".png")) sKBitmap.Encode(SKEncodedImageFormat.Png, 100).SaveTo(f); } next++; @@ -47,15 +47,15 @@ static void ExtractImages(Content content) } else if (e is Datalogics.PDFL.Container) { - ExtractImages((e as Datalogics.PDFL.Container).Content); + ExtractImages(((Datalogics.PDFL.Container)e).Content); } else if (e is Datalogics.PDFL.Group) { - ExtractImages((e as Datalogics.PDFL.Group).Content); + ExtractImages(((Datalogics.PDFL.Group)e).Content); } else if (e is Form) { - ExtractImages((e as Form).Content); + ExtractImages(((Datalogics.PDFL.Form)e).Content); } } } diff --git a/Images/ImageResampling/ImageResampling.cs b/Images/ImageResampling/ImageResampling.cs index 52233ab..773d0f1 100644 --- a/Images/ImageResampling/ImageResampling.cs +++ b/Images/ImageResampling/ImageResampling.cs @@ -47,19 +47,19 @@ static void ResampleImages(Content content) else if (e is Container) { Console.WriteLine("Recursing through a Container"); - ResampleImages((e as Container).Content); + ResampleImages(((Container)e).Content); } else if (e is Group) { Console.WriteLine("Recursing through a Group"); - ResampleImages((e as Group).Content); + ResampleImages(((Group)e).Content); } else if (e is Form) { Console.WriteLine("Recursing through a Form"); - Content formcontent = (e as Form).Content; + Content formcontent = ((Form)e).Content; ResampleImages(formcontent); - (e as Form).Content = formcontent; + ((Form)e).Content = formcontent; } i++; diff --git a/Images/OutputPreview/OutputPreview.cs b/Images/OutputPreview/OutputPreview.cs index 9370784..b358f6e 100644 --- a/Images/OutputPreview/OutputPreview.cs +++ b/Images/OutputPreview/OutputPreview.cs @@ -6,7 +6,7 @@ * This sample demonstrates creating an Output Preview Image which is used during Soft Proofing prior to printing to visualize combining different Colorants. * * - * Copyright (c)2023, Datalogics, Inc. All rights reserved. + * Copyright (c)2023-2024, Datalogics, Inc. All rights reserved. * */ @@ -75,6 +75,7 @@ static void Main(string[] args) } PageImageParams pip = new PageImageParams(); + pip.PageDrawFlags = DrawFlags.UseAnnotFaces; pip.HorizontalResolution = 300; pip.VerticalResolution = 300; diff --git a/InformationExtraction/ListPaths/ListPaths.cs b/InformationExtraction/ListPaths/ListPaths.cs index df8d795..cd5b6d8 100644 --- a/InformationExtraction/ListPaths/ListPaths.cs +++ b/InformationExtraction/ListPaths/ListPaths.cs @@ -54,22 +54,22 @@ private static void ListPathsInContent(Content content, int pgno) if (e is Datalogics.PDFL.Path) { - ListPath(e as Datalogics.PDFL.Path, pgno); + ListPath((Datalogics.PDFL.Path)e, pgno); } else if (e is Container) { Console.WriteLine("Recurring through a Container"); - ListPathsInContent((e as Container).Content, pgno); + ListPathsInContent(((Datalogics.PDFL.Container)e).Content, pgno); } else if (e is Group) { Console.WriteLine("Recurring through a Group"); - ListPathsInContent((e as Group).Content, pgno); + ListPathsInContent(((Datalogics.PDFL.Group)e).Content, pgno); } else if (e is Form) { Console.WriteLine("Recurring through a Form"); - ListPathsInContent((e as Form).Content, pgno); + ListPathsInContent(((Datalogics.PDFL.Form)e).Content, pgno); } } } @@ -83,18 +83,18 @@ private static void ListPath(Datalogics.PDFL.Path path, int pgno) { if (segment is MoveTo) { - MoveTo moveto = segment as MoveTo; + MoveTo moveto = (MoveTo)segment; Console.WriteLine(" MoveTo x={0}, y={1}", moveto.Point.H, moveto.Point.V); } else if (segment is LineTo) { - LineTo lineto = segment as LineTo; + LineTo lineto = (LineTo)segment; Console.WriteLine(" LineTo x={0}, y={1}", lineto.Point.H, lineto.Point.V); } else if (segment is CurveTo) { - CurveTo curveto = segment as CurveTo; + CurveTo curveto = (CurveTo)segment; Console.WriteLine(" CurveTo x1={0}, y1={1}, x2={2}, y2={3}, x3={4}, y3={5}", curveto.Point1.H, curveto.Point1.V, curveto.Point2.H, curveto.Point2.V, @@ -102,21 +102,21 @@ private static void ListPath(Datalogics.PDFL.Path path, int pgno) } else if (segment is CurveToV) { - CurveToV curveto = segment as CurveToV; + CurveToV curveto = (CurveToV)segment; Console.WriteLine(" CurveToV x2={0}, y2={1}, x3={2}, y3={3}", curveto.Point2.H, curveto.Point2.V, curveto.Point3.H, curveto.Point3.V); } else if (segment is CurveToY) { - CurveToY curveto = segment as CurveToY; + CurveToY curveto = (CurveToY)segment; Console.WriteLine(" CurveToV x1={0}, y1={1}, x3={2}, y3={3}", curveto.Point1.H, curveto.Point1.V, curveto.Point3.H, curveto.Point3.V); } else if (segment is RectSegment) { - RectSegment rect = segment as RectSegment; + RectSegment rect = (RectSegment)segment; Console.WriteLine(" Rectangle x={0}, y={1}, width={2}, height={3}", rect.Point.H, rect.Point.V, rect.Width, rect.Height); diff --git a/InformationExtraction/Metadata/Metadata.cs b/InformationExtraction/Metadata/Metadata.cs index 60063c2..0d0550c 100644 --- a/InformationExtraction/Metadata/Metadata.cs +++ b/InformationExtraction/Metadata/Metadata.cs @@ -75,8 +75,8 @@ private static void DisplayImageMetadata(String input) { // Demonstrate getting data from an image Content content = doc.GetPage(0).Content; - Container container = (Container) content.GetElement(0); - Datalogics.PDFL.Image image = (Datalogics.PDFL.Image) container.Content.GetElement(0); + Container container = (Container)content.GetElement(0); + Datalogics.PDFL.Image image = (Datalogics.PDFL.Image)container.Content.GetElement(0); String metadata = image.Stream.Dict.XMPMetadata; Console.WriteLine("Ducky CreatorTool: {0}\n", GetCreatorToolAttribute(metadata)); } @@ -84,34 +84,52 @@ private static void DisplayImageMetadata(String input) static string GetTitle(string xmlstring) { + string title = ""; + XmlDocument xmldoc = new XmlDocument(); xmldoc.LoadXml(xmlstring); - XmlElement element = (XmlElement) xmldoc.GetElementsByTagName("dc:title")[0]; - XmlNode titleNode = element.GetElementsByTagName("rdf:li")[0]; - return GetText(titleNode.ChildNodes); + XmlElement? element = (XmlElement?)xmldoc.GetElementsByTagName("dc:title")[0]; + if (element != null) + { + XmlNode? titleNode = element.GetElementsByTagName("rdf:li")[0]; + if (titleNode != null) + { + title = GetText(titleNode.ChildNodes); + } + } + + return title; } // ReSharper disable once UnusedMember.Local static string GetCreatorTool(string xmlstring) { + string creatorTool = ""; XmlDocument xmldoc = new XmlDocument(); xmldoc.LoadXml(xmlstring); - XmlElement element = (XmlElement) xmldoc.GetElementsByTagName("xap:CreatorTool")[0]; - return GetText(element.ChildNodes); + XmlElement? element = (XmlElement?)xmldoc.GetElementsByTagName("xap:CreatorTool")[0]; + if (element != null) + { + creatorTool = GetText(element.ChildNodes); + } + + return creatorTool; } static string GetCreatorToolAttribute(string xmlstring) { + string creatorToolAttribute = ""; + XmlDocument xmldoc = new XmlDocument(); xmldoc.LoadXml(xmlstring); foreach (XmlNode node in xmldoc.GetElementsByTagName("rdf:Description")) { XmlElement e = (XmlElement) node; if (e.HasAttribute("xap:CreatorTool")) - return e.GetAttribute("xap:CreatorTool"); + creatorToolAttribute = e.GetAttribute("xap:CreatorTool"); } - return null; + return creatorToolAttribute; } static string GetText(XmlNodeList nodeList) diff --git a/OpticalCharacterRecognition/AddTextToDocument/AddTextToDocument.cs b/OpticalCharacterRecognition/AddTextToDocument/AddTextToDocument.cs index dad4924..28f2d4b 100644 --- a/OpticalCharacterRecognition/AddTextToDocument/AddTextToDocument.cs +++ b/OpticalCharacterRecognition/AddTextToDocument/AddTextToDocument.cs @@ -32,15 +32,15 @@ static void AddTextToImages(Document doc, Content content, OCREngine engine) } else if (e is Container) { - AddTextToImages(doc, (e as Container).Content, engine); + AddTextToImages(doc, ((Container)e).Content, engine); } else if (e is Group) { - AddTextToImages(doc, (e as Group).Content, engine); + AddTextToImages(doc, ((Group)e).Content, engine); } else if (e is Form) { - AddTextToImages(doc, (e as Form).Content, engine); + AddTextToImages(doc, ((Form)e).Content, engine); } } } diff --git a/Text/AddUnicodeText/AddUnicodeText.cs b/Text/AddUnicodeText/AddUnicodeText.cs index f66a37a..7adc8e6 100644 --- a/Text/AddUnicodeText/AddUnicodeText.cs +++ b/Text/AddUnicodeText/AddUnicodeText.cs @@ -88,7 +88,7 @@ static void Main(string[] args) foreach (String str in strings) { // Find a font that can represent all characters in the string, if there is one. - Font font = GetRepresentableFont(fonts, str); + Font? font = GetRepresentableFont(fonts, str); if (font == null) { Console.WriteLine( @@ -117,7 +117,7 @@ static void Main(string[] args) } } - static Font GetRepresentableFont(List fonts, String str) + static Font? GetRepresentableFont(List fonts, String str) { foreach (Font font in fonts) { diff --git a/Text/AddVerticalText/AddVerticalText.cs b/Text/AddVerticalText/AddVerticalText.cs index 81d8ce2..657d8f2 100644 --- a/Text/AddVerticalText/AddVerticalText.cs +++ b/Text/AddVerticalText/AddVerticalText.cs @@ -65,7 +65,7 @@ static void Main(string[] args) foreach (String str in strings) { // Find a font that can represent all characters in the string, if there is one. - Font font = GetRepresentableFont(fonts, str); + Font? font = GetRepresentableFont(fonts, str); if (font == null) { Console.WriteLine( @@ -94,7 +94,7 @@ static void Main(string[] args) } } - static Font GetRepresentableFont(List fonts, String str) + static Font? GetRepresentableFont(List fonts, String str) { foreach (Font font in fonts) { diff --git a/Text/ExtractAcroFormFieldData/ExtractAcroFormFieldData.cs b/Text/ExtractAcroFormFieldData/ExtractAcroFormFieldData.cs index 3b001fe..27aa8d5 100644 --- a/Text/ExtractAcroFormFieldData/ExtractAcroFormFieldData.cs +++ b/Text/ExtractAcroFormFieldData/ExtractAcroFormFieldData.cs @@ -17,7 +17,7 @@ class ExtractAcroFormFieldData { // Set Defaults static String sInput = Library.ResourceDirectory + "Sample_Input/ExtractAcroFormFieldData.pdf"; - static String sOutput = "../ExtractAcroFormFieldData-out.json"; + static String sOutput = "ExtractAcroFormFieldData-out.json"; static void Print(AcroFormTextFieldObject t) { diff --git a/Text/ExtractCJKTextByPatternMatch/ExtractCJKTextByPatternMatch.cs b/Text/ExtractCJKTextByPatternMatch/ExtractCJKTextByPatternMatch.cs index 8114f03..a397a04 100644 --- a/Text/ExtractCJKTextByPatternMatch/ExtractCJKTextByPatternMatch.cs +++ b/Text/ExtractCJKTextByPatternMatch/ExtractCJKTextByPatternMatch.cs @@ -24,7 +24,7 @@ static void Main(string[] args) // Set Defaults String sInput = Library.ResourceDirectory + "Sample_Input/ExtractUnicodeText.pdf"; - String sOutput = "../ExtractCJKTextByPatternMatch-out.txt"; + String sOutput = "ExtractCJKTextByPatternMatch-out.txt"; String sPattern = "『世界人権宣言』"; using (Document doc = new Document(sInput)) diff --git a/Text/ExtractTextByPatternMatch/ExtractTextByPatternMatch.cs b/Text/ExtractTextByPatternMatch/ExtractTextByPatternMatch.cs index 66f8de3..b147358 100644 --- a/Text/ExtractTextByPatternMatch/ExtractTextByPatternMatch.cs +++ b/Text/ExtractTextByPatternMatch/ExtractTextByPatternMatch.cs @@ -24,7 +24,7 @@ static void Main(string[] args) // Set Defaults String sInput = Library.ResourceDirectory + "Sample_Input/ExtractTextByPatternMatch.pdf"; - String sOutput = "../ExtractTextByPatternMatch-out.txt"; + String sOutput = "ExtractTextByPatternMatch-out.txt"; String sPattern = "((1-)?(\\()?\\d{3}(\\))?(\\s)?(-)?\\d{3}-\\d{4})"; // phone numbers using (Document doc = new Document(sInput)) diff --git a/Text/ExtractTextByRegion/ExtractTextByRegion.cs b/Text/ExtractTextByRegion/ExtractTextByRegion.cs index d32d76c..7911294 100644 --- a/Text/ExtractTextByRegion/ExtractTextByRegion.cs +++ b/Text/ExtractTextByRegion/ExtractTextByRegion.cs @@ -16,7 +16,7 @@ class ExtractTextByRegion { // Set Defaults static String sInput = Library.ResourceDirectory + "Sample_Input/ExtractTextByRegion.pdf"; - static String sOutput = "../ExtractTextByRegion-out.txt"; + static String sOutput = "ExtractTextByRegion-out.txt"; // Rectangular region to extract text in points (origin of the page is bottom left) // (545,576,694,710) is a rectangle encompassing the invoice entry for this sample. @@ -51,7 +51,7 @@ static void Main(string[] args) bool allQuadsWithinRegion = true; // A Word typically has only 1 quad, but can have more than one // for hyphenated words, words on a curve, etc. - foreach (Quad quad in textInfo.Quads) + foreach (Quad quad in textInfo.Quads ?? Enumerable.Empty()) { if (!CheckWithinRegion(quad)) { diff --git a/Text/ExtractTextFromAnnotations/ExtractTextFromAnnotations.cs b/Text/ExtractTextFromAnnotations/ExtractTextFromAnnotations.cs index 1568174..b694996 100644 --- a/Text/ExtractTextFromAnnotations/ExtractTextFromAnnotations.cs +++ b/Text/ExtractTextFromAnnotations/ExtractTextFromAnnotations.cs @@ -19,7 +19,7 @@ class ExtractTextFromAnnotations // Set Defaults static String sInput = Library.ResourceDirectory + "Sample_Input/sample_annotations.pdf"; - static String sOutput = "../ExtractTextFromAnnotations-out.json"; + static String sOutput = "ExtractTextFromAnnotations-out.json"; static void Print(AnnotationTextObject t) { diff --git a/Text/ExtractTextFromMultiRegions/ExtractTextFromMultiRegions.cs b/Text/ExtractTextFromMultiRegions/ExtractTextFromMultiRegions.cs index a41b825..d866f65 100644 --- a/Text/ExtractTextFromMultiRegions/ExtractTextFromMultiRegions.cs +++ b/Text/ExtractTextFromMultiRegions/ExtractTextFromMultiRegions.cs @@ -17,7 +17,7 @@ class ExtractTextFromMultiRegions { // Set Defaults static String sInput = Library.ResourceDirectory + "Sample_Input/ExtractTextFromMultiRegions"; - static String sOutput = "../ExtractTextFromMultiRegions-out.csv"; + static String sOutput = "ExtractTextFromMultiRegions-out.csv"; // Rectangular regions to extract text in points (origin of the page is bottom left) // (Left, Right, Bottom, Top) @@ -73,7 +73,7 @@ static void Main(string[] args) bool allQuadsWithinRegion = true; // A Word typically has only 1 quad, but can have more than one // for hyphenated words, words on a curve, etc. - foreach (Quad quad in textInfo.Quads) + foreach (Quad quad in textInfo.Quads ?? Enumerable.Empty()) { if (!CheckWithinRegion(quad, region)) { diff --git a/Text/ExtractTextPreservingStyleAndPositionInfo/ExtractTextPreservingStyleAndPositionInfo.cs b/Text/ExtractTextPreservingStyleAndPositionInfo/ExtractTextPreservingStyleAndPositionInfo.cs index 441e937..7b0f95d 100644 --- a/Text/ExtractTextPreservingStyleAndPositionInfo/ExtractTextPreservingStyleAndPositionInfo.cs +++ b/Text/ExtractTextPreservingStyleAndPositionInfo/ExtractTextPreservingStyleAndPositionInfo.cs @@ -19,7 +19,7 @@ class ExtractTextPreservingStyleAndPositionInfo { // Set Defaults static String sInput = Library.ResourceDirectory + "Sample_Input/sample.pdf"; - static String sOutput = "../ExtractTextPreservingStyleAndPositionInfo-out.json"; + static String sOutput = "ExtractTextPreservingStyleAndPositionInfo-out.json"; static void Main(string[] args) { @@ -57,7 +57,7 @@ static void SaveJson(List result) writer.WriteStartObject(); writer.WriteString("text", resultText.Text); writer.WriteStartArray("quads"); - foreach (Quad quad in resultText.Quads) + foreach (Quad quad in resultText.Quads ?? Enumerable.Empty()) { writer.WriteStartObject(); writer.WriteString("top-left", quad.TopLeft.ToString()); @@ -68,18 +68,39 @@ static void SaveJson(List result) } writer.WriteEndArray(); writer.WriteStartArray("styles"); - foreach (DLStyleTransition st in resultText.StyleList) + foreach (DLStyleTransition st in resultText.StyleList ?? Enumerable.Empty()) { writer.WriteStartObject(); writer.WriteString("char-index", st.CharIndex.ToString()); - writer.WriteString("font-name", st.Style.FontName.ToString()); - - writer.WriteString("font-size", Math.Round(st.Style.FontSize, 2).ToString()); - writer.WriteString("color-space", st.Style.Color.Space.Name); + string fontName = ""; + if (st.Style != null && st.Style.FontName != null) + { + fontName = st.Style.FontName; + } + writer.WriteString("font-name", fontName); + double fontSize = 0; + if (st.Style != null) + { + fontSize = st.Style.FontSize; + } + writer.WriteString("font-size", Math.Round(fontSize, 2).ToString()); + string colorSpaceName = ""; + if (st.Style != null && st.Style.Color != null && st.Style.Color.Space != null && st.Style.Color.Space.Name != null) + { + colorSpaceName = st.Style.Color.Space.Name; + } + writer.WriteString("color-space", colorSpaceName); writer.WriteStartArray("color-values"); - foreach (double cv in st.Style.Color.Value) + if (st.Style != null) { - writer.WriteStringValue(Math.Round(cv, 3).ToString()); + DLColor? color = st.Style.Color; + if (color != null) + { + foreach (double cv in color.Value ?? Enumerable.Empty()) + { + writer.WriteStringValue(Math.Round(cv, 3).ToString()); + } + } } writer.WriteEndArray(); writer.WriteEndObject(); diff --git a/Text/ListWords/ListWords.cs b/Text/ListWords/ListWords.cs index 98ea9fb..b7c7b41 100644 --- a/Text/ListWords/ListWords.cs +++ b/Text/ListWords/ListWords.cs @@ -54,7 +54,7 @@ static void Main(string[] args) wordConfig.NoStyleInfo = false; // text extraction efficiency WordFinder wordFinder = new WordFinder(doc, WordFinderVersion.Latest, wordConfig); - IList pageWords = null; + IList pageWords = new List(); for (int i = 0; i < nPages; i++) { pageWords = wordFinder.GetWordList(i); diff --git a/Text/RegexExtractText/RegexExtractText.cs b/Text/RegexExtractText/RegexExtractText.cs index efe0fda..61de480 100644 --- a/Text/RegexExtractText/RegexExtractText.cs +++ b/Text/RegexExtractText/RegexExtractText.cs @@ -47,16 +47,16 @@ public class TopRight public class QuadLocation { [JsonPropertyName("bottom-left")] - public BottomLeft bottomLeft { get; set; } + public BottomLeft? bottomLeft { get; set; } [JsonPropertyName("bottom-right")] - public BottomRight bottomRight { get; set; } + public BottomRight? bottomRight { get; set; } [JsonPropertyName("top-left")] - public TopLeft topLeft { get; set; } + public TopLeft? topLeft { get; set; } [JsonPropertyName("top-right")] - public TopRight topRight { get; set; } + public TopRight? topRight { get; set; } } // This class represents a match quad's location (the quad coordinates and page number that quad is located on). @@ -66,23 +66,23 @@ public class MatchQuadInformation public int pageNumber { get; set; } [JsonPropertyName("quad-location")] - public QuadLocation quadLocation { get; set; } + public QuadLocation? quadLocation { get; set; } } // This class represents the information that is associated with a match (match phrase and match quads). public class MatchObject { [JsonPropertyName("match-phrase")] - public string matchPhrase { get; set; } + public string? matchPhrase { get; set; } [JsonPropertyName("match-quads")] - public List matchQuads { get; set; } + public List? matchQuads { get; set; } } // This class represents the final JSON that will be written to the output JSON file. public class DocTextFinderJson { - public List documentJson; + public List? documentJson; } class RegexExtractText diff --git a/Text/TextExtract/TextExtract.cs b/Text/TextExtract/TextExtract.cs index 908f6a1..c8dcf5e 100644 --- a/Text/TextExtract/TextExtract.cs +++ b/Text/TextExtract/TextExtract.cs @@ -88,7 +88,7 @@ static void Main(string[] args) static void ExtractTextUntagged(Document doc, WordFinder wordFinder) { int nPages = doc.NumPages; - IList pageWords = null; + IList pageWords = new List(); System.IO.StreamWriter logfile = new System.IO.StreamWriter("TextExtract-untagged-out.txt"); Console.WriteLine("Writing TextExtract-untagged-out.txt"); @@ -151,7 +151,7 @@ static void ExtractTextUntagged(Document doc, WordFinder wordFinder) static void ExtractTextTagged(Document doc, WordFinder wordFinder) { int nPages = doc.NumPages; - IList pageWords = null; + IList pageWords = new List(); System.IO.StreamWriter logfile = new System.IO.StreamWriter("TextExtract-tagged-out.txt"); Console.WriteLine("Writing TextExtract-tagged-out.txt"); diff --git a/_Common/ExtractText.cs b/_Common/ExtractText.cs index 8591b8f..7d53605 100644 --- a/_Common/ExtractText.cs +++ b/_Common/ExtractText.cs @@ -19,57 +19,57 @@ namespace ExtractTextNameSpace // This class represents the text info. public class TextObject { - public string Text { get; set; } + public string? Text { get; set; } } public class DLColorSpace { - public string Name { get; set; } + public string? Name { get; set; } public int NumComponents { get; set; } } public class DLColor { - public IList Value { get; set; } - public DLColorSpace Space { get; set; } + public IList? Value { get; set; } + public DLColorSpace? Space { get; set; } } public class DLStyle { - public DLColor Color { get; set; } + public DLColor? Color { get; set; } public double FontSize { get; set; } - public string FontName { get; set; } + public string? FontName { get; set; } } public class DLStyleTransition { public int CharIndex { get; set; } - public DLStyle Style { get; set; } + public DLStyle? Style { get; set; } } // This class represents the text and details info. public class TextAndDetailsObject { - public string Text { get; set; } - public IList CharQuads { get; set; } - public IList Quads { get; set; } - public IList StyleList { get; set; } + public string? Text { get; set; } + public IList? CharQuads { get; set; } + public IList? Quads { get; set; } + public IList? StyleList { get; set; } } // This class represents the AcroForm text info. public class AcroFormTextFieldObject { [JsonPropertyName("field-name")] - public string AcroFormFieldName { get; set; } + public string? AcroFormFieldName { get; set; } [JsonPropertyName("field-text")] - public string AcroFormFieldText { get; set; } + public string? AcroFormFieldText { get; set; } } // This class represents the Annotation text info. public class AnnotationTextObject { [JsonPropertyName("annotation-type")] - public string AnnotationType { get; set; } + public string? AnnotationType { get; set; } [JsonPropertyName("annotation-text")] - public string AnnotationText { get; set; } + public string? AnnotationText { get; set; } } public class ExtractText : IDisposable @@ -77,7 +77,7 @@ public class ExtractText : IDisposable private Document doc; private WordFinder wordFinder; - private IList pageWords = null; + private IList pageWords = new List(); public ExtractText(Document inputDoc) {