diff --git a/FormsExtension/index.html b/FormsExtension/index.html index 38f5008c..7f2b506e 100644 --- a/FormsExtension/index.html +++ b/FormsExtension/index.html @@ -15,10 +15,10 @@ gtag('config', 'G-L4MQFNK7VK');
-Datalogics offers the Forms Extension, a software module that allows applications built using the Adobe PDF Library to work with PDF AcroForm and XFA forms documents. The extension works with the Windows 32 and 64-bit Adobe PDF Library platforms and is available for separate purchase.
+Datalogics offers the Forms Extension, a software module that allows applications built using the Adobe PDF Library to work with PDF AcroForm and XFA forms documents. The extension is available for separate purchase. With the integration with APDFL18, support for Linux x86_64 was added on to the previously supported Windows x64 systems.
CosByteMax
CosCryptVersion
CosStreamStartAndCode
CosType
DLPDEImageExportParams
FormsExtensionVersion
OPAQUE_32_BITS
OptimizedFont
OptimizedImage
PDColorConvertParams
OPAQUE_32_BITS
OptimizedFont
OptimizedImage
PDColorConvertParams
PDColorConvertParamsEx
PDCount
PDDocFlags
PDDocTextFinderMatchQuadRec
PDEncodingType
PDFOptimizerReport
PDFlatten
PDFlattenTilingMode
PDFlatten
PDFlattenTilingMode
PDImageScalar
PDPageEnumInksParam
PDPageNumber
PDPrintFont
PDPrintMarkStyles
PDPrintParams
_t_PDEFontCreateParams
_t_PDEGlyphDescription
_t_PDEGlyphRun
_t_PDEGraphicState
_t_PDEGraphicStateEx
_t_PDEImageAttrs
_t_PDEImageJPXAttrs
_t_PDEPathData
_t_PDEImageJPXAttrs
_t_PDEPathData
_t_PDESpanItem
_t_PDESpanSet
_t_PDETextState
_t_PDFLData
_t_PDFLPrintUserParams
_t_PDFLPrintUserParamsEx
_t_PDPrintClient
_t_PDResourceEnumMonitor
_t_PDSMCInfo
_t_PDSysFontPlatData
_t_PDThumbCreationServer
_t_PDUserPropertiesXMLLabels
_t_PDWordFinderConfig
_t_ProgressMonitor
_t_PDWordFinderConfig
_t_ProgressMonitor
_t_StdSecurityData
PDFProcessorPDFAConvertParams
PDFontMetric
PDSmallFlagBits
_t_PDEIndexedColorData
_t_PDFLPrintUserParams
_t_PDFLPrintUserParamsEx
_t_PDWordFinderConfig
_t_PDWordFinderConfig
ASTextFromUns32
ASTimeSpanSet
CosCryptStringProc
CosDocGetAdobeExtensionLevel
CosDocSetAdobeExtensionLevel
PDAnnotSetFlags
PDCryptGetSecurityInfoProc
PDDocConvertXFAFieldsToAcroFormFields
PDCryptGetSecurityInfoProc
PDDocConvertXFAFieldsToAcroFormFields
PDDocEmbedFonts
PDDocEmbedFontsFromFontArray
PDDocFlattenXFAFields
PDDocFlattenXFAFieldsAsIfPrinted
PDDocFlattenXFAFields
PDDocFlattenXFAFieldsAsIfPrinted
PDDocGetNewSecurityInfo
PDDocGetVersionEx
PDDocReadAhead
PDDocReplaceUnembeddedSimpleFonts
PDEAddTag
PDEAttrEnumProc
PDPrefSetWorkingRGB
PDPrintEmitFontProc
PDSysFontGetAttrs
PDSysFontGetInfo
PDTextSelectEnumRTFTextProc
PDWordCreateTextSelect
PDWordFinderCtrlProc
PDWordFinderEnumWordsStr
PDWordFinderCtrlProc
PDWordFinderEnumWordsStr
PDWordGetASText
PDWordGetAttrEx
PDWordGetByteIdxFromHiliteChar
PDWordGetCharEncFlags
PDWordGetCharOffsetEx
PDWordGetCharQuad
_t_PDEFontCreateFromSysFontParams
_t_PDEFontCreateParams
_t_PDEGlyphDescription
_t_PDEGraphicState
_t_PDEGraphicStateEx
_t_PDEICCBasedColorData
_t_PDEImageAttrs
_t_PDEImageJPXAttrs
_t_PDEImageAttrs
_t_PDEImageJPXAttrs
_t_PDEIndexedColorData
_t_PDEPSAttrs
_t_PDETextState
_t_PDFLData
_t_PDFLMemStats
_t_PDFontFlags
typedef
struct
_t_HFTServer
*
HFTServer
;
-
-HFTServerNew | - |
HFTNew | HFTNewEx |
HFTServerDestroy | HFTServerDestroyProc |
HFTServerProvideHFTProc | - |
HFT
HFTServerProvideHFTProc(
HFTServer
hftServer
,
ASVersion
version
,
void
*
rock
);
+HFT
HFTServerProvideHFTProc(
HFTServer
hftServer
,
ASVersion
version
,
void
*
rock
);
HFT
HFTNew(
HFTServer
hftServer
,
ASTCount
numSelectors
);
+HFT
HFTNew(
HFTServer
hftServer
,
ASTCount
numSelectors
);
HFT
HFTNewEx(
HFTServer
hftServer
,
HFTData
data
);
+HFT
HFTNewEx(
HFTServer
hftServer
,
HFTData
data
);
typedef
struct
_t_HFTServer
*
HFTServer
;
+
+HFTServerNew | + |
HFTNew | HFTNewEx |
HFTServerDestroy | HFTServerDestroyProc |
HFTServerProvideHFTProc | + |
void
HFTServerDestroyProc(
HFTServer
hftServer
,
void
*
rock
);
+void
HFTServerDestroyProc(
HFTServer
hftServer
,
void
*
rock
);
void
HFTServerDestroy(
HFTServer
hftServer
);
+void
HFTServerDestroy(
HFTServer
hftServer
);
HFTServer
HFTServerNew(
const
char
*
name
,
HFTServerProvideHFTProc
serverProc
,
HFTServerDestroyProc
destroyProc
,
void
*
clientData
);
+HFTServer
HFTServerNew(
const
char
*
name
,
HFTServerProvideHFTProc
serverProc
,
HFTServerDestroyProc
destroyProc
,
void
*
clientData
);
OptimizedFont | PDSAttrObj |
OptimizedFont | PDSAttrObj | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
PDSClassMap | PDSElement | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
PDSMCR | PDSOBJR | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
PDSRoleMap | PDSTreeRoot |
OptimizedImage |
+
Used In
DLPDEImageExportParams@@ -350,92 +348,7 @@SyntaxUsed By-
OptimizedFont- -Header: DLExtrasExpT.h:294
-Description- -Structure representing an Optimized Font.
-Syntax- -
Used In- -
OptimizedImage- -Header: DLExtrasExpT.h:278
-Description- -Structure representing an Optimized Image.
-Syntax- -
Used In- -
GetFormsExtensionVersionNumber- -Header: DLExtrasProcs.h:1328
-Description- -Retrieves the Forms Extension Version Number. *
-Syntax- -
-
-Parameters- -
IsFormsExtensionSupported- -Header: DLExtrasProcs.h:1189
-Description- -Validate the Forms Extension for APDFL dependencies are present.
-Syntax- -
-
-Returns- -if True, the Forms Extension for APDFL dependencies are present.
-PDFLAddFontDirectoriesHeader: DLExtrasProcs.h:432
diff --git a/apdfl18/CPlusPlus/APDFL18.0.4PlusP2w/dlextras_PDDoc.html b/apdfl18/CPlusPlus/APDFL18.0.4PlusP2w/dlextras_PDDoc.html
index 73476933..0f8518d1 100644
--- a/apdfl18/CPlusPlus/APDFL18.0.4PlusP2w/dlextras_PDDoc.html
+++ b/apdfl18/CPlusPlus/APDFL18.0.4PlusP2w/dlextras_PDDoc.html
@@ -64,7 +64,7 @@ Enum ConstantsUsed By-
_t_PDEImageJPXAttrs+ +Header: PEExpT.h:2360
+Description+ +Attributes of a JPX image.
+Syntax+ +
Used By+ +
PDEImage FunctionsUsed ByDescription-Acquires the PDEImageFlate resource of the PDEImage content element when the image filter type is Call PDERelease() to dispose of the PDEImageFlate when finished with it. Acquires the PDEImageFlate resource of the PDEImage content element when the image filter type is Call PDERelease() to dispose of the PDEImageFlate when finished with it. Syntax-
+
Parameters@@ -318,7 +399,7 @@ParametersReturns-a PDEImageFlate resource object.
+a PDEImageFlate resource object.
Exceptionsdiff --git a/apdfl18/CPlusPlus/APDFL18.0.4PlusP2w/pdelayer_PDEImageFlate.html b/apdfl18/CPlusPlus/APDFL18.0.4PlusP2w/pdelayer_PDEImageFlate.html index 2bd920b6..a2cdd4ee 100644 --- a/apdfl18/CPlusPlus/APDFL18.0.4PlusP2w/pdelayer_PDEImageFlate.html +++ b/apdfl18/CPlusPlus/APDFL18.0.4PlusP2w/pdelayer_PDEImageFlate.html @@ -22,41 +22,8 @@PDEImageFlate Typedefs
PDEImageFlate- -Header: PEExpT.h:415
-Description- -A reference to a PDEImageFlate.
-Syntax- -
-
-Returned From- -
Used By- -
PDEImageFlate Functions
Used BySyntax-
+
Parameters
Returns@@ -107,7 +74,7 @@ReturnsSyntax-
+
Parameters@@ -146,7 +113,7 @@ExceptionsSyntax-
+
Parameters@@ -171,7 +138,7 @@ExceptionsDescription-Gets a data stream for a flate compressed image, PDEImageFlate object. Gets a data stream for a flate compressed image, PDEImageFlate object. The caller must dispose of the returned ASStm by calling ASStmClose(). ExceptionsSyntax-
+
Parametersdiff --git a/apdfl18/CPlusPlus/APDFL18.0.4PlusP2w/pdelayer_PDEImageJPX.html b/apdfl18/CPlusPlus/APDFL18.0.4PlusP2w/pdelayer_PDEImageJPX.html index 9af64b16..c15b31e3 100644 --- a/apdfl18/CPlusPlus/APDFL18.0.4PlusP2w/pdelayer_PDEImageJPX.html +++ b/apdfl18/CPlusPlus/APDFL18.0.4PlusP2w/pdelayer_PDEImageJPX.html @@ -23,7 +23,6 @@PDF Edit Layer: PDEImageJPXUsed ByPDEImageJPX Structures
_t_PDEImageJPXAttrs- -Header: PEExpT.h:2360
-Description- -Attributes of a JPX image.
-Syntax- -
Used By- -
PDEImageJPX Functions
ExceptionsSyntax-
+
Parameters@@ -272,7 +212,7 @@Parameters | IN/OUT A JPX encoded image object.
| |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
attrsP | IN/OUT (Filled by the method) A pointer to a PDEImageJPXAttrs structure containing the attributes of the JPX encoded image.
+ | IN/OUT (Filled by the method) A pointer to a PDEImageJPXAttrs structure containing the attributes of the JPX encoded image.
| |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
attrsSize | IN/OUT The size of the
diff --git a/apdfl18/CPlusPlus/APDFL18.0.4PlusP2w/pdfllayer_General.html b/apdfl18/CPlusPlus/APDFL18.0.4PlusP2w/pdfllayer_General.html
index b30381da..226b568e 100644
--- a/apdfl18/CPlusPlus/APDFL18.0.4PlusP2w/pdfllayer_General.html
+++ b/apdfl18/CPlusPlus/APDFL18.0.4PlusP2w/pdfllayer_General.html
@@ -674,6 +674,7 @@ attrsP buffer in bytes. SyntaxEnum Constants | Emit an EPS file with extended preview.
|
Value options for PDFlattenTilingMode.
kPDNoTiling = 0 | No tiling.
+ |
kPDConstantTiling | Constant tiling.
+ |
kPDAdaptiveTiling | Adaptive tiling.
+ |
typedef
ASInt32
PDEncodingType
;
+For value options see PDTilings.
typedef
ASInt32
PDFlattenTilingMode
;
+
+PDFlatten | + |
Value options for PDFlattenTilingMode.
kPDNoTiling = 0 | No tiling.
- |
kPDConstantTiling | Constant tiling.
- |
kPDAdaptiveTiling | Adaptive tiling.
- |
For value options see PDTilings.
typedef
ASInt32
PDFlattenTilingMode
;
-
-PDFlatten | - |
sizeof(PDFlattenRec)
. 0
= no tiling; 1
= constant tiling; 2
= adaptive tiling PDDocClearErrors
PDDocClearFlags
PDDocClose
PDDocColorConvertEmbedOutputIntent
PDDocColorConvertEmbedOutputIntentEx
PDDocColorConvertPage
PDDocColorConvertPageEx
PDDocConvertXFAFieldsToAcroFormFields
PDDocColorConvertPageEx
PDDocConvertXFAFieldsToAcroFormFields
PDDocCopyToFile
PDDocCountXAPMetadataArrayItems
PDDocCreateNameTree
PDDocCreatePDCollection
PDDocCreatePage
PDDocCreateRedaction
PDDocEnumLoadedFonts
PDDocEnumOCConfigs
PDDocEnumOCGs
PDDocEnumPDSElementsWithUserProperties
PDDocEnumProc
PDDocEnumResources
PDDocExportAcroFormsData
PDDocExportNotes
PDDocExportSomeNotes
PDDocExportXFAFormsData
PDDocExportAcroFormsData
PDDocExportNotes
PDDocExportSomeNotes
PDDocExportXFAFormsData
PDDocFindPageNumForLabel
PDDocFindPageNumForLabelEx
PDDocFlattenAcroFormFields
PDDocFlattenNonFormAnnotations
PDDocFlattenOC
PDDocFlattenXFAFields
PDDocFlattenXFAFieldsAsIfPrinted
PDDocGetAdobePDFVersion
PDDocFlattenAcroFormFields
PDDocFlattenNonFormAnnotations
PDDocFlattenOC
PDDocFlattenXFAFields
PDDocFlattenXFAFieldsAsIfPrinted
PDDocGetAdobePDFVersion
PDDocGetBookmarkRoot
PDDocGetCosDoc
PDDocGetCryptHandler
PDDocGetCryptHandlerClientData
PDDocGetCryptRevision
PDDocGetCryptVersion
PDDocGetFile
PDDocGetFlags
PDDocGetFormsType
PDDocGetFullScreen
PDDocGetFormsType
PDDocGetFullScreen
PDDocGetID
PDDocGetInfo
PDDocGetInfoASText
PDDocGetLabelForPageNum
PDDocGetLabelForPageNumEx
PDDocGetLayoutMode
PDDocGetXAPMetadataArrayItem
PDDocGetXAPMetadataCompactOptional
PDDocGetXAPMetadataProperty
PDDocHasISOExtensions
PDDocHasOC
PDDocHasSignature
PDDocHasUserProperties
PDDocImportAcroFormsData
PDDocHasUserProperties
PDDocImportAcroFormsData
PDDocImportCosDocNotes
PDDocImportNotes
PDDocImportXFAFormsData
PDDocInsertPages
PDDocIsDynamicXFA
PDDocIsStaticXFA
PDDocImportXFAFormsData
PDDocInsertPages
PDDocIsDynamicXFA
PDDocIsStaticXFA
PDDocMergeXAPKeywords
PDDocMovePage
PDDocNewSecurityData
PDDocPermRequest
PDDocPermRequestNoUB
PDDocPreSaveProc
This method also works for non-Roman (CJK or Chinese-Japanese-Korean) viewers. In this case, words are extracted to the host encoding. Developers desiring Unicode output must use PDDocCreateWordFinderUCS(), which does the extraction for Roman or non-Roman text.
-
The type of PDWordFinder determines the encoding of the string returned by PDWordGetString(). For instance, if PDDocCreateWordFinderUCS() is used to create the word finder, PDWordGetString() returns only Unicode.
The type of PDWordFinder determines the encoding of the string returned by PDWordGetString(). For instance, if PDDocCreateWordFinderUCS() is used to create the word finder, PDWordGetString() returns only Unicode.
For CJK viewers, words are stored internally using CID encoding.
@@ -3960,7 +3960,7 @@
PDWordFinder
PDDocCreateWordFinder(
PDDoc
doc
,
ASUns16
*
outEncInfo
,
char
*
*
outEncVec
,
char
*
*
ligatureTbl
,
ASInt16
algVersion
,
ASUns16
rdFlags
,
void
*
clientData
);
+PDWordFinder
PDDocCreateWordFinder(
PDDoc
doc
,
ASUns16
*
outEncInfo
,
char
*
*
outEncVec
,
char
*
*
ligatureTbl
,
ASInt16
algVersion
,
ASUns16
rdFlags
,
void
*
clientData
);
PDWordFinder
PDDocCreateWordFinderEx(
PDDoc
doc
,
ASInt16
algVersion
,
ASBool
outUnicode
,
PDWordFinderConfig
wbConfig
);
+PDWordFinder
PDDocCreateWordFinderEx(
PDDoc
doc
,
ASInt16
algVersion
,
ASBool
outUnicode
,
PDWordFinderConfig
wbConfig
);
PDDocCreateWordFinder() also works for non-Roman character set viewers. For PDDocCreateWordFinder(), words are extracted to the host encoding. Users desiring Unicode output should use PDDocCreateWordFinderUCS().
-
The type of PDWordFinder determines the encoding of the string returned by PDWordGetString(). If PDDocCreateWordFinderUCS() is used to create the word finder, PDWordGetString() returns only Unicode. Note that there is no way to detect Unicode strings returned by PDWordGetString(), since there is no UCS header (FEFF) added to each string returned.
The type of PDWordFinder determines the encoding of the string returned by PDWordGetString(). If PDDocCreateWordFinderUCS() is used to create the word finder, PDWordGetString() returns only Unicode. Note that there is no way to detect Unicode strings returned by PDWordGetString(), since there is no UCS header (FEFF) added to each string returned.
In CJK viewers, words are stored internally using CID encoding. See the description of Composite Fonts and CIDFonts in the ISO 32000-1:2008, Document Management-Portable Document Format-Part 1: PDF 1.7, section 9.7, page 267.
PDWordFinder
PDDocCreateWordFinderUCS(
PDDoc
doc
,
ASInt16
algVersion
,
ASUns16
rdFlags
,
void
*
clientData
);
+PDWordFinder
PDDocCreateWordFinderUCS(
PDDoc
doc
,
ASInt16
algVersion
,
ASUns16
rdFlags
,
void
*
clientData
);
PDWordFinder
PDDocGetWordFinder(
PDDoc
docP
,
ASInt16
WXEVersion
);
+PDWordFinder
PDDocGetWordFinder(
PDDoc
docP
,
ASInt16
WXEVersion
);
typedef
struct
_t_PDWordFinder
*
PDWordFinder
;
-
-PDDocCreateWordFinder | PDDocCreateWordFinderEx |
PDDocCreateWordFinderUCS | PDDocGetWordFinder |
This is passed to PDWordFinderSetCtrlProc().
-
This is the callback function called by Word Finder when its page enumeration process takes longer than the specified time (in seconds). Return true
to continue the enumeration process, or false
to stop. startTime
is the value that was set by ASGetSecs() when the Word Finder started processing the current page.
ASBool
PDWordFinderCtrlProc(
ASUns32
startTime
,
void
*
clientData
);
-ASBool
PDWordProc(
PDWordFinder
wObj
,
PDWord
wInfo
,
ASInt32
pgNum
,
void
*
clientData
);
+ASBool
PDWordProc(
PDWordFinder
wObj
,
PDWord
wInfo
,
ASInt32
pgNum
,
void
*
clientData
);
false
.
- | - |
- | This is always
-sizeof(PDWordFinderConfigRec) . |
- | When
-true , it disables tagged PDF support and treats the document as non-tagged PDF. Use this to keep the word finder in legacy mode when it is created with the latest algorithm version (WF_LATEST_VERSION). |
- | When
-true , it disables generating an XY-ordered word list. This option replaces the sort order flags in the older version of the word finder creation command (PDDocCreateWordFinder()). Setting this option is equivalent to omitting the WXE_XY_SORT flag. |
- | When
-true , the word finder preserves space characters during word breaking. Otherwise, spaces are removed from output text. When false (the default), you can add spaces later by considering the word attribute flag WXE_ADJACENT_TO_SPACE , but there is no way to restore the exact number of consecutive space characters. |
- | - When
- When |
- | When
-true , it disables guessing encoding of fonts that have unknown or custom encoding when there is no ToUnicode table. Inappropriate encoding conversions can cause the word finder to mistakenly recognize non-Roman single-byte fonts as Standard Roman encoding fonts and extract the text in an unusable format. When this option is selected, the word finder avoids such unreliable encoding conversions and tries to provide the original characters without any encoding conversion for a client with its own encoding handling. Use the PDWordGetCharEncFlags() method to detect such characters. |
- | When
-true , it assumes any font with unknown or custom encoding to be Standard Roman. This option overrides the noEncodingGuess option. |
- | When
-true , it disables converting large character gaps to space characters, so that the word finder reports a character space only when a space character appears in the original PDF content. This option has no effect on tagged PDF. |
- | When
-true , it disables treating vertical movements as line breaks, so that the word finder determines a line break only when a line break character or special tag information appears in the original PDF content. This option has no effect on tagged PDF. |
- | When
-true , it disables extracting text from text annotations. Normally, the word finder extracts text from the normal appearances of text annotations that are inside the page crop box. |
- | When
-true , it disables finding and removing soft hyphens in non-tagged PDF, so that the word finder trusts hard hyphens as non-soft hyphens. This option has no effect on tagged PDF files. Normally, the word finder does not differentiate between soft and hard hyphen characters in non-tagged PDF files, because these are often misused. |
- | When
-true , it disables treating non-breaking space characters as regular space characters in non-tagged PDF files, so that the word finder preserves the space without breaking the word. This option has no effect on tagged PDF files. Normally, the word finder does not differentiate between breaking and non-breaking space characters in non-tagged PDF files, because these are often misused. |
- | When
-true , it disables generating extended character offset information to improve text extraction performance. The extended character offset information is necessary to determine exact character offset for character-by-character text selection. The beginning character offset of each word is always available regardless of this option, and can be used for word-by-word text selection with reasonable accuracy. When a client has no need for the detailed character offset information, it can use this option to improve the text extraction efficiency. There is a minor difference in the text extraction performance, and less memory is needed for the extracted word list. |
- | When
-true , it disables generating character style information to improve text extraction performance and memory efficiency. When you select this option, you cannot use PDWordGetNthCharStyle() and PDWordGetStyleTransition() with the output of the word finder. |
- | A custom UTF-16 decomposition table. This table can be used to expand Unicode ligatures not included in the default ligature list. Each decomposition record contains a UTF-16 character code (either a 16-bit or 32-bit surrogate), a replacement UTF16 string, and the delimiter
-0x0000 . |
- | The size of the
-decomposeTbl in bytes. |
- | A custom character type table to enhance word breaking quality. Each character type record contains a region start value, a region end value, and a character type flag as defined in PDExpT.h. A character code is in UTF-16, and is either a 16-bit or a 32-bit surrogate.
- |
- | The size of the
-charTypeTbl in bytes. |
- | When Since this option may leave extra characters with overlapping bounding boxes, using it together with the |
- | When
-true , it disables reconstructing the character orders, and the word finding algorithm is applied to the characters in the drawing order. By default, word finder reorders characters on a single line by the relative horizontal character locations. Most of the time, the character reordering feature improves the text extraction quality. However, on a PDF page with heavily overlapped character bounding boxes, the outcome becomes somewhat unpredictable. In such case, disabling the character reordering ( disableCharReordering = true ) may produce a more static result. |
- | When
-true , it disables the creation of a quad per character for skewed words, words with a horizontally-aligned, but non-rectangular, bounding region. Each skewed word will, instead, be associated with a single rectangular, bounding region. |
- | When
-true , it disables extracting text with Text Rendering mode Tr = 3 ("Neither fill nor stroke text (invisible).") Normally, the word finder extracts such text as any other. |
- | - |
- | - |
PDDocCreateWordFinderEx | PDDocTextFinderCreate |
PDWordFinder
method. The text selection can then be set as the current selection using AVDocSetSelection(). PDWordFinder
method. The text selection can then be set as the current selection using AVDocSetSelection(). Note: For consistent text selection behavior, avoid using other PDTextSelect creation methods which depend on the word finder versions and word offsets. These include PDTextSelectCreatePageHiliteEx(), PDTextSelectCreateRanges(), PDTextSelectCreateRangesEx(), PDTextSelectCreateWordHilite(), and PDTextSelectCreateWordHiliteEx().
PDWord
PDWordFinderGetNthWord(
PDWordFinder
wObj
,
ASInt32
nTh
);
+PDWord
PDWordFinderGetNthWord(
PDWordFinder
wObj
,
ASInt32
nTh
);
PDStyle
PDWordGetNthCharStyle(
PDWordFinder
wObj
,
PDWord
word
,
ASInt32
dex
);
+PDStyle
PDWordGetNthCharStyle(
PDWordFinder
wObj
,
PDWord
word
,
ASInt32
dex
);
str
PDWordFinder
that supplied the PDWord. For instance, if PDDocCreateWordFinderUCS() is used to create the word finder, PDWordGetString() returns only Unicode. There is no way to detect Unicode strings returned by PDWordGetString(), since there is no UCS header ( FEFF
) added to each string returned. PDWordFinder
that supplied the PDWord. For instance, if PDDocCreateWordFinderUCS() is used to create the word finder, PDWordGetString() returns only Unicode. There is no way to detect Unicode strings returned by PDWordGetString(), since there is no UCS header ( FEFF
) added to each string returned. len
str
in bytes. Up to len
characters of word will be copied into str
. If str
is long enough, it will be NULL
-terminated. #define WF_VERSION_4
4
typedef
struct
_t_PDWordFinder
*
PDWordFinder
;
+
+PDDocCreateWordFinder | PDDocCreateWordFinderEx |
PDDocCreateWordFinderUCS | PDDocGetWordFinder |
This is passed to PDWordFinderSetCtrlProc().
+
This is the callback function called by Word Finder when its page enumeration process takes longer than the specified time (in seconds). Return true
to continue the enumeration process, or false
to stop. startTime
is the value that was set by ASGetSecs() when the Word Finder started processing the current page.
ASBool
PDWordFinderCtrlProc(
ASUns32
startTime
,
void
*
clientData
);
+false
.
+ | + |
+ | This is always
+sizeof(PDWordFinderConfigRec) . |
+ | When
+true , it disables tagged PDF support and treats the document as non-tagged PDF. Use this to keep the word finder in legacy mode when it is created with the latest algorithm version (WF_LATEST_VERSION). |
+ | When
+true , it disables generating an XY-ordered word list. This option replaces the sort order flags in the older version of the word finder creation command (PDDocCreateWordFinder()). Setting this option is equivalent to omitting the WXE_XY_SORT flag. |
+ | When
+true , the word finder preserves space characters during word breaking. Otherwise, spaces are removed from output text. When false (the default), you can add spaces later by considering the word attribute flag WXE_ADJACENT_TO_SPACE , but there is no way to restore the exact number of consecutive space characters. |
+ | + When
+ When |
+ | When
+true , it disables guessing encoding of fonts that have unknown or custom encoding when there is no ToUnicode table. Inappropriate encoding conversions can cause the word finder to mistakenly recognize non-Roman single-byte fonts as Standard Roman encoding fonts and extract the text in an unusable format. When this option is selected, the word finder avoids such unreliable encoding conversions and tries to provide the original characters without any encoding conversion for a client with its own encoding handling. Use the PDWordGetCharEncFlags() method to detect such characters. |
+ | When
+true , it assumes any font with unknown or custom encoding to be Standard Roman. This option overrides the noEncodingGuess option. |
+ | When
+true , it disables converting large character gaps to space characters, so that the word finder reports a character space only when a space character appears in the original PDF content. This option has no effect on tagged PDF. |
+ | When
+true , it disables treating vertical movements as line breaks, so that the word finder determines a line break only when a line break character or special tag information appears in the original PDF content. This option has no effect on tagged PDF. |
+ | When
+true , it disables extracting text from text annotations. Normally, the word finder extracts text from the normal appearances of text annotations that are inside the page crop box. |
+ | When
+true , it disables finding and removing soft hyphens in non-tagged PDF, so that the word finder trusts hard hyphens as non-soft hyphens. This option has no effect on tagged PDF files. Normally, the word finder does not differentiate between soft and hard hyphen characters in non-tagged PDF files, because these are often misused. |
+ | When
+true , it disables treating non-breaking space characters as regular space characters in non-tagged PDF files, so that the word finder preserves the space without breaking the word. This option has no effect on tagged PDF files. Normally, the word finder does not differentiate between breaking and non-breaking space characters in non-tagged PDF files, because these are often misused. |
+ | When
+true , it disables generating extended character offset information to improve text extraction performance. The extended character offset information is necessary to determine exact character offset for character-by-character text selection. The beginning character offset of each word is always available regardless of this option, and can be used for word-by-word text selection with reasonable accuracy. When a client has no need for the detailed character offset information, it can use this option to improve the text extraction efficiency. There is a minor difference in the text extraction performance, and less memory is needed for the extracted word list. |
+ | When
+true , it disables generating character style information to improve text extraction performance and memory efficiency. When you select this option, you cannot use PDWordGetNthCharStyle() and PDWordGetStyleTransition() with the output of the word finder. |
+ | A custom UTF-16 decomposition table. This table can be used to expand Unicode ligatures not included in the default ligature list. Each decomposition record contains a UTF-16 character code (either a 16-bit or 32-bit surrogate), a replacement UTF16 string, and the delimiter
+0x0000 . |
+ | The size of the
+decomposeTbl in bytes. |
+ | A custom character type table to enhance word breaking quality. Each character type record contains a region start value, a region end value, and a character type flag as defined in PDExpT.h. A character code is in UTF-16, and is either a 16-bit or a 32-bit surrogate.
+ |
+ | The size of the
+charTypeTbl in bytes. |
+ | When Since this option may leave extra characters with overlapping bounding boxes, using it together with the |
+ | When
+true , it disables reconstructing the character orders, and the word finding algorithm is applied to the characters in the drawing order. By default, word finder reorders characters on a single line by the relative horizontal character locations. Most of the time, the character reordering feature improves the text extraction quality. However, on a PDF page with heavily overlapped character bounding boxes, the outcome becomes somewhat unpredictable. In such case, disabling the character reordering ( disableCharReordering = true ) may produce a more static result. |
+ | When
+true , it disables the creation of a quad per character for skewed words, words with a horizontally-aligned, but non-rectangular, bounding region. Each skewed word will, instead, be associated with a single rectangular, bounding region. |
+ | When
+true , it disables extracting text with Text Rendering mode Tr = 3 ("Neither fill nor stroke text (invisible).") Normally, the word finder extracts such text as any other. |
+ | + |
+ | + |
PDDocCreateWordFinderEx | PDDocTextFinderCreate |
void
PDWordFinderAcquireVisibleWordList(
PDWordFinder
wObj
,
ASInt32
pgNum
,
PDOCContext
ocContext
,
PDWord
*
wInfoP
,
PDWord
*
*
xySortTable
,
PDWord
*
*
rdOrderTable
,
ASInt32
*
numWords
);
+void
PDWordFinderAcquireVisibleWordList(
PDWordFinder
wObj
,
ASInt32
pgNum
,
PDOCContext
ocContext
,
PDWord
*
wInfoP
,
PDWord
*
*
xySortTable
,
PDWord
*
*
rdOrderTable
,
ASInt32
*
numWords
);
void
PDWordFinderAcquireWordList(
PDWordFinder
wObj
,
ASInt32
pgNum
,
PDWord
*
wInfoP
,
PDWord
*
*
xySortTable
,
PDWord
*
*
rdOrderTable
,
ASInt32
*
numWords
);
+void
PDWordFinderAcquireWordList(
PDWordFinder
wObj
,
ASInt32
pgNum
,
PDWord
*
wInfoP
,
PDWord
*
*
xySortTable
,
PDWord
*
*
rdOrderTable
,
ASInt32
*
numWords
);
void
PDWordFinderDestroy(
PDWordFinder
wObj
);
+void
PDWordFinderDestroy(
PDWordFinder
wObj
);
ASBool
PDWordFinderEnumVisibleWords(
PDWordFinder
wObj
,
ASInt32
PageNum
,
PDOCContext
ocContext
,
PDWordProc
wordProc
,
void
*
clientData
);
+ASBool
PDWordFinderEnumVisibleWords(
PDWordFinder
wObj
,
ASInt32
PageNum
,
PDOCContext
ocContext
,
PDWordProc
wordProc
,
void
*
clientData
);
ASBool
PDWordFinderEnumWords(
PDWordFinder
wObj
,
ASInt32
PageNum
,
PDWordProc
wordProc
,
void
*
clientData
);
+ASBool
PDWordFinderEnumWords(
PDWordFinder
wObj
,
ASInt32
PageNum
,
PDWordProc
wordProc
,
void
*
clientData
);
ASBool
PDWordFinderEnumWordsStr(
PDWordFinder
wObj
,
const
ASUTF16Val
*
ucsStr
,
ASUns32
strLen
,
ASUns32
charOffsetAdj
,
PDWordProc
wordProc
,
void
*
clientData
);
+ASBool
PDWordFinderEnumWordsStr(
PDWordFinder
wObj
,
const
ASUTF16Val
*
ucsStr
,
ASUns32
strLen
,
ASUns32
charOffsetAdj
,
PDWordProc
wordProc
,
void
*
clientData
);
ASInt16
PDWordFinderGetLatestAlgVersion(
PDWordFinder
wObj
);
+ASInt16
PDWordFinderGetLatestAlgVersion(
PDWordFinder
wObj
);
void
PDWordFinderReleaseWordList(
PDWordFinder
wObj
,
ASInt32
pgNum
);
+void
PDWordFinderReleaseWordList(
PDWordFinder
wObj
,
ASInt32
pgNum
);