Files | |
| file | fpdftext.h |
Header file for the text module - for text extraction. | |
Functions | |
| DLLEXPORT FPDF_TEXTPAGE STDCALL | FPDFText_LoadPage (FPDF_PAGE page) |
| DLLEXPORT void STDCALL | FPDFText_ClosePage (FPDF_TEXTPAGE text_page) |
| DLLEXPORT int STDCALL | FPDFText_CountChars (FPDF_TEXTPAGE text_page) |
| DLLEXPORT unsigned int STDCALL | FPDFText_GetUnicode (FPDF_TEXTPAGE text_page, int index) |
| DLLEXPORT FPDF_BOOL STDCALL | FPDFText_IsGenerated (FPDF_TEXTPAGE text_page, int index) |
| DLLEXPORT double STDCALL | FPDFText_GetFontSize (FPDF_TEXTPAGE text_page, int index) |
| DLLEXPORT void STDCALL | FPDFText_GetOrigin (FPDF_TEXTPAGE text_page, int index, double *x, double *y) |
| DLLEXPORT void STDCALL | FPDFText_GetCharBox (FPDF_TEXTPAGE text_page, int index, double *left, double *right, double *bottom, double *top) |
| DLLEXPORT void STDCALL | FPDFText_GetMatrix (FPDF_TEXTPAGE text_page, int index, double *a, double *b, double *c, double *d) |
| DLLEXPORT FPDF_FONT STDCALL | FPDFText_GetFont (FPDF_TEXTPAGE text_page, int index) |
| DLLEXPORT int STDCALL | FPDFFont_GetAscent (FPDF_FONT font) |
| DLLEXPORT int STDCALL | FPDFFont_GetDescent (FPDF_FONT font) |
| DLLEXPORT FPDF_BYTESTRING STDCALL | FPDFFont_GetName (FPDF_FONT font) |
| DLLEXPORT int STDCALL | FPDFText_GetCharIndexAtPos (FPDF_TEXTPAGE text_page, double x, double y, double xTorelance, double yTolerance) |
| DLLEXPORT int STDCALL | FPDFText_GetCharIndexByDirection (FPDF_TEXTPAGE text_page, int index, int direction) |
| DLLEXPORT int STDCALL | FPDFText_GetText (FPDF_TEXTPAGE text_page, int start_index, int count, unsigned short *result) |
| DLLEXPORT int STDCALL | FPDFText_CountRects (FPDF_TEXTPAGE text_page, int start_index, int count) |
| DLLEXPORT void STDCALL | FPDFText_GetRect (FPDF_TEXTPAGE text_page, int rect_index, double *left, double *top, double *right, double *bottom) |
| DLLEXPORT int STDCALL | FPDFText_GetBoundedText (FPDF_TEXTPAGE text_page, double left, double top, double right, double bottom, unsigned short *buffer, int buflen) |
| DLLEXPORT int STDCALL | FPDFText_CountBoundedSegments (FPDF_TEXTPAGE text_page, double left, double top, double right, double bottom) |
| DLLEXPORT void STDCALL | FPDFText_GetBoundedSegment (FPDF_TEXTPAGE text_page, int seg_index, int *start_index, int *count) |
| DLLEXPORT FPDF_SCHHANDLE STDCALL | FPDFText_FindStart (FPDF_TEXTPAGE text_page, FPDF_WIDESTRING findwhat, unsigned long flags, int start_index) |
| DLLEXPORT FPDF_BOOL STDCALL | FPDFText_FindNext (FPDF_SCHHANDLE handle) |
| DLLEXPORT FPDF_BOOL STDCALL | FPDFText_FindPrev (FPDF_SCHHANDLE handle) |
| DLLEXPORT int STDCALL | FPDFText_GetSchResultIndex (FPDF_SCHHANDLE handle) |
| DLLEXPORT int STDCALL | FPDFText_GetSchCount (FPDF_SCHHANDLE handle) |
| DLLEXPORT void STDCALL | FPDFText_FindClose (FPDF_SCHHANDLE handle) |
| DLLEXPORT FPDF_PAGELINK STDCALL | FPDFLink_LoadWebLinks (FPDF_TEXTPAGE text_page) |
| DLLEXPORT int STDCALL | FPDFLink_CountWebLinks (FPDF_PAGELINK link_page) |
| DLLEXPORT int STDCALL | FPDFLink_GetURL (FPDF_PAGELINK link_page, int link_index, unsigned short *buffer, int buflen) |
| DLLEXPORT int STDCALL | FPDFLink_CountRects (FPDF_PAGELINK link_page, int link_index) |
| DLLEXPORT void STDCALL | FPDFLink_GetRect (FPDF_PAGELINK link_page, int link_index, int rect_index, double *left, double *top, double *right, double *bottom) |
| DLLEXPORT void STDCALL | FPDFLink_CloseWebLinks (FPDF_PAGELINK link_page) |
| DLLEXPORT FPDF_BOOL STDCALL | FPDFText_PDFToText (const char *sour_file, const char *dest_file, int flag, FPDF_BYTESTRING password) |
| DLLEXPORT int STDCALL | FPDFText_PageToText (FPDF_DOCUMENT doc, int page_index, wchar_t *buf, int size, int flag) |
| _FPDFTEXT_H_ | |
Flags used by FPDFText_GetCharIndexByDirection function. | |
| #define | FPDFTEXT_LEFT -1 |
| LEFT. | |
| #define | FPDFTEXT_RIGHT 1 |
| RIGHT. | |
| #define | FPDFTEXT_UP -2 |
| UP. | |
| #define | FPDFTEXT_DOWN 2 |
| DOWN. | |
Flags used by FPDFText_FindStart function. | |
| #define | FPDF_MATCHCASE 0x00000001 |
| If not set, it will not match case by default. | |
| #define | FPDF_MATCHWHOLEWORD 0x00000002 |
| If not set, it will not match the whole word by default. | |
Flags used by FPDFText_PDFToText and FPDFText_PageToText. | |
| #define | FPDFTEXT_STREAM_ORDER 0 |
| STREAM. | |
| #define | FPDFTEXT_DISPLAY_ORDER 1 |
| DISPLAY. | |
Header file for the text module
| DLLEXPORT int STDCALL FPDFFont_GetAscent | ( | FPDF_FONT | font | ) |
Get font ascent(in 1/1000 em).
| [in] | font | - Handle to a font. Returned by FPDFText_GetFont function. |
| The | ascent (typically the above-baseline height of letter "h"), measured in 1/1000 of em size. So if a character uses a font size (em size) of 10 points, and it has an ascent value of 500 (meaning half of the em), then the ascent height will be 5 points (5/72 inch). |
| DLLEXPORT int STDCALL FPDFFont_GetDescent | ( | FPDF_FONT | font | ) |
Get font descent (in 1/1000 em).
| [in] | font | - Handle to a font. Returned by FPDFText_GetFont function. |
| The | descent (typically the under-baseline height of letter "g"), measured in 1/1000 of em size. Most fonts have a negative descent value. |
| DLLEXPORT FPDF_BYTESTRING STDCALL FPDFFont_GetName | ( | FPDF_FONT | font | ) |
Get the Name of a font.
| [in] | font | - Handle to a font. Returned by FPDFText_GetFont function. |
| A | pointer to a null-terminated string that specifies the name of the font. Application can't modify the returned string. |
| DLLEXPORT void STDCALL FPDFLink_CloseWebLinks | ( | FPDF_PAGELINK | link_page | ) |
Release resources used by weblink feature.
| [in] | link_page | - Handle returned by FPDFLink_LoadWebLinks. |
| None. |
| DLLEXPORT int STDCALL FPDFLink_CountRects | ( | FPDF_PAGELINK | link_page, |
| int | link_index | ||
| ) |
Count number of rectangular areas for the link.
| [in] | link_page | - Handle returned by FPDFLink_LoadWebLinks. |
| [in] | link_index | - Zero-based index for the link. |
| Number | of rectangular areas for the link. |
| DLLEXPORT int STDCALL FPDFLink_CountWebLinks | ( | FPDF_PAGELINK | link_page | ) |
Count number of detected web links.
| [in] | link_page | - Handle returned by FPDFLink_LoadWebLinks. |
| Number | of detected web links. |
| DLLEXPORT void STDCALL FPDFLink_GetRect | ( | FPDF_PAGELINK | link_page, |
| int | link_index, | ||
| int | rect_index, | ||
| double * | left, | ||
| double * | top, | ||
| double * | right, | ||
| double * | bottom | ||
| ) |
Fetch the boundaries of a rectangle for a link.
| [in] | link_page | - Handle returned by FPDFLink_LoadWebLinks. |
| [in] | link_index | - Zero-based index for the link. |
| [in] | rect_index | - Zero-based index for a rectangle. |
| [in] | left | - Pointer to a double receiving the rectangle left boundary. |
| [in] | top | - Pointer to a double receiving the rectangle top boundary. |
| [in] | right | - Pointer to a double receiving the rectangle right boundary. |
| [in] | bottom | - Pointer to a double receiving the rectangle bottom boundary. |
| None. |
| DLLEXPORT int STDCALL FPDFLink_GetURL | ( | FPDF_PAGELINK | link_page, |
| int | link_index, | ||
| unsigned short * | buffer, | ||
| int | buflen | ||
| ) |
Fetch the URL information for a detected web link.
| [in] | link_page | - Handle returned by FPDFLink_LoadWebLinks. |
| [in] | link_index | - Zero-based index for the link. |
| [in] | buffer | - A unicode buffer. |
| [in] | buflen | - Number of characters (not bytes) for the buffer, excluding an additional terminator. |
| If | buffer is NULL or buflen is zero, return number of characters (not bytes) needed, otherwise, return number of characters copied into the buffer. |
| DLLEXPORT FPDF_PAGELINK STDCALL FPDFLink_LoadWebLinks | ( | FPDF_TEXTPAGE | text_page | ) |
Prepare information about weblinks in a page.
| [in] | text_page | - Handle to a text page information structure. Returned by FPDFText_LoadPage function. |
FPDFLink_CloseWebLinks must be called to release resources.
| A | handle to the page's links information structure. NULL if something goes wrong. |
| DLLEXPORT void STDCALL FPDFText_ClosePage | ( | FPDF_TEXTPAGE | text_page | ) |
Release all resources allocated for a text page information structure.
| [in] | text_page | - Handle to a text page information structure. Returned by FPDFText_LoadPage function. |
| None. |
| DLLEXPORT int STDCALL FPDFText_CountBoundedSegments | ( | FPDF_TEXTPAGE | text_page, |
| double | left, | ||
| double | top, | ||
| double | right, | ||
| double | bottom | ||
| ) |
Get number of text segments within a rectangular boundary on the page.
| [in] | text_page | - Handle to a text page information structure. Returned by FPDFText_LoadPage function. |
| [in] | left | - Left boundary. |
| [in] | top | - Top boundary. |
| [in] | right | - Right boundary. |
| [in] | bottom | - Bottom boundary. |
| Number | of segments. |
| DLLEXPORT int STDCALL FPDFText_CountChars | ( | FPDF_TEXTPAGE | text_page | ) |
Get number of characters in a page.
| [in] | text_page | - Handle to a text page information structure. Returned by FPDFText_LoadPage function. |
| Number | of characters in the page. Return -1 for error. |
| DLLEXPORT int STDCALL FPDFText_CountRects | ( | FPDF_TEXTPAGE | text_page, |
| int | start_index, | ||
| int | count | ||
| ) |
Count number of rectangular areas occupied by a segment of texts.
| [in] | text_page | - Handle to a text page information structure. Returned by FPDFText_LoadPage function. |
| [in] | start_index | - Index for the start characters. |
| [in] | count | - Number of characters. |
| Number | of rectangles. Zero for error. |
| DLLEXPORT void STDCALL FPDFText_FindClose | ( | FPDF_SCHHANDLE | handle | ) |
Release a search context.
| [in] | handle | - A search context handle returned by FPDFText_FindStart. |
| None. |
| DLLEXPORT FPDF_BOOL STDCALL FPDFText_FindNext | ( | FPDF_SCHHANDLE | handle | ) |
Search in the direction from page start to end.
| [in] | handle | - A search context handle returned by FPDFText_FindStart. |
| Whether | a match is found. |
| DLLEXPORT FPDF_BOOL STDCALL FPDFText_FindPrev | ( | FPDF_SCHHANDLE | handle | ) |
Search in the direction from page end to start.
| [in] | handle | - A search context handle returned by FPDFText_FindStart. |
| Whether | a match is found. |
| DLLEXPORT FPDF_SCHHANDLE STDCALL FPDFText_FindStart | ( | FPDF_TEXTPAGE | text_page, |
| FPDF_WIDESTRING | findwhat, | ||
| unsigned long | flags, | ||
| int | start_index | ||
| ) |
Start a search.
| [in] | text_page | - Handle to a text page information structure. Returned by FPDFText_LoadPage function. |
| [in] | findwhat | - A unicode match pattern. |
| [in] | flags | - Option flags. |
| [in] | start_index | - Start from this character. -1 for end of the page. |
| A | handle for the search context. FPDFText_FindClose must be called to release this handle. |
| DLLEXPORT void STDCALL FPDFText_GetBoundedSegment | ( | FPDF_TEXTPAGE | text_page, |
| int | seg_index, | ||
| int * | start_index, | ||
| int * | count | ||
| ) |
Get a particular segment in the result generated by FPDFText_CountBoundedSegments function.
| [in] | text_page | - Handle to a text page information structure. Returned by FPDFText_LoadPage function. |
| [in] | seg_index | - Zero-based index for the segment. |
| [in] | start_index | - Pointer to an integer receiving the start character index for the segment. |
| [in] | count | - Pointer to an integer receiving number of characters in the segment. |
| None. |
| DLLEXPORT int STDCALL FPDFText_GetBoundedText | ( | FPDF_TEXTPAGE | text_page, |
| double | left, | ||
| double | top, | ||
| double | right, | ||
| double | bottom, | ||
| unsigned short * | buffer, | ||
| int | buflen | ||
| ) |
Extract unicode text within a rectangular boundary on the page.
| [in] | text_page | - Handle to a text page information structure. Returned by FPDFText_LoadPage function. |
| [in] | left | - Left boundary. |
| [in] | top | - Top boundary. |
| [in] | right | - Right boundary. |
| [in] | bottom | - Bottom boundary. |
| [in] | buffer | - A unicode buffer. |
| [in] | buflen | - Number of characters (not bytes) for the buffer, excluding an additional terminator. |
| If | buffer is NULL or buflen is zero, return number of characters (not bytes) needed, otherwise, return number of characters copied into the buffer. |
| DLLEXPORT void STDCALL FPDFText_GetCharBox | ( | FPDF_TEXTPAGE | text_page, |
| int | index, | ||
| double * | left, | ||
| double * | right, | ||
| double * | bottom, | ||
| double * | top | ||
| ) |
Get bounding box of a particular character.
| [in] | text_page | - Handle to a text page information structure. Returned by FPDFText_LoadPage function. |
| [in] | index | - Zero-based index of the character. |
| [in] | left | - Pointer to a double receiving left position of the character box. |
| [in] | right | - Pointer to a double receiving right position of the character box. |
| [in] | bottom | - Pointer to a double receiving bottom position of the character box. |
| [in] | top | - Pointer to a double receiving top position of the character box. |
| None. |
| DLLEXPORT int STDCALL FPDFText_GetCharIndexAtPos | ( | FPDF_TEXTPAGE | text_page, |
| double | x, | ||
| double | y, | ||
| double | xTorelance, | ||
| double | yTolerance | ||
| ) |
Get the index of a character at or nearby a certain position on the page.
| [in] | text_page | - Handle to a text page information structure. Returned by FPDFText_LoadPage function. |
| [in] | x | - X position in PDF "user space". |
| [in] | y | - Y position in PDF "user space". |
| [in] | xTolerance | - A x-axis tolerance value for character hit detection, in point unit. |
| [in] | yTolerance | - A y-axis tolerance value for character hit detection, in point unit. |
| The | zero-based index of the character at, or nearby the point (x,y). If there is no character at or nearby the point, return value will be -1. If an error occurs, return value will be -3. |
| DLLEXPORT int STDCALL FPDFText_GetCharIndexByDirection | ( | FPDF_TEXTPAGE | text_page, |
| int | index, | ||
| int | direction | ||
| ) |
Move the character index in different directions and get new character index, from a specific character.
| [in] | text_page | - Handle to a text page information structure. Returned by FPDFText_LoadPage function. |
| [in] | index | - Zero-based index for the current character. |
| [in] | direction | - A number indicating the moving direction. Can be one of the followings: FPDFTEXT_LEFT, FPDFTEXT_UP, FPDFTEXT_RIGHT, FPDFTEXT_DOWN. |
| Zero-base | character index for the new position. -1 if beginning of the page reached; -2 if end of the page reached; -3 for failures. |
| DLLEXPORT FPDF_FONT STDCALL FPDFText_GetFont | ( | FPDF_TEXTPAGE | text_page, |
| int | index | ||
| ) |
Get font of a particular character.
| [in] | text_page | - Handle to a text page information structure. Returned by FPDFText_LoadPage function. |
| [in] | index | - Zero-based index of the character. |
| A | handle to the font used by the particular character. This handle can be used in FPDFFont_xxx functions for more information about the font. |
| DLLEXPORT double STDCALL FPDFText_GetFontSize | ( | FPDF_TEXTPAGE | text_page, |
| int | index | ||
| ) |
Get the font size of a particular character.
| [in] | text_page | - Handle to a text page information structure. Returned by FPDFText_LoadPage function. |
| [in] | index | - Zero-based index of the character. |
| The | font size of the particular character, measured in points (about 1/72 inch). This is the typographic size of the font (so called "em size"). |
| DLLEXPORT void STDCALL FPDFText_GetMatrix | ( | FPDF_TEXTPAGE | text_page, |
| int | index, | ||
| double * | a, | ||
| double * | b, | ||
| double * | c, | ||
| double * | d | ||
| ) |
Get the matrix of a particular character.
| [in] | text_page | - Handle to a text page information structure. Returned by FPDFText_LoadPage function. |
| [in] | index | - Zero-based index of the character. |
| [in] | a | - Pointer to a double receiving the coefficient "a" of the matrix. |
| [in] | b | - Pointer to a double receiving the coefficient "b" of the matrix. |
| [in] | c | - Pointer to a double receiving the coefficient "c" of the matrix. |
| [in] | d | - Pointer to a double receiving the coefficient "d" of the matrix. |
| None. |
| DLLEXPORT void STDCALL FPDFText_GetOrigin | ( | FPDF_TEXTPAGE | text_page, |
| int | index, | ||
| double * | x, | ||
| double * | y | ||
| ) |
Get origin position of a particular character.
| [in] | text_page | - Handle to a text page information structure. Returned by FPDFText_LoadPage function. |
| [in] | index | - Zero-based index of the character. |
| [in] | x | - Pointer to a double receiving X position of the character origin. |
| [in] | y | - Pointer to a double number receiving Y position of the character origin. |
| None. |
| DLLEXPORT void STDCALL FPDFText_GetRect | ( | FPDF_TEXTPAGE | text_page, |
| int | rect_index, | ||
| double * | left, | ||
| double * | top, | ||
| double * | right, | ||
| double * | bottom | ||
| ) |
Get a rectangular area from the result generated by FPDFText_CountRects.
| [in] | text_page | - Handle to a text page information structure. Returned by FPDFText_LoadPage function. |
| [in] | rect_index | - Zero-based index for the rectangle. |
| [in] | left | - Pointer to a double receiving the rectangle left boundary. |
| [in] | top | - Pointer to a double receiving the rectangle top boundary. |
| [in] | right | - Pointer to a double receiving the rectangle right boundary. |
| [in] | bottom | - Pointer to a double receiving the rectangle bottom boundary. |
| None. |
| DLLEXPORT int STDCALL FPDFText_GetSchCount | ( | FPDF_SCHHANDLE | handle | ) |
Get the number of matched characters in the search result.
| [in] | handle | - A search context handle returned by FPDFText_FindStart. |
| Number | of matched characters. |
| DLLEXPORT int STDCALL FPDFText_GetSchResultIndex | ( | FPDF_SCHHANDLE | handle | ) |
Get the starting character index of the search result.
| [in] | handle | - A search context handle returned by FPDFText_FindStart. |
| Index | for the starting character. |
| DLLEXPORT int STDCALL FPDFText_GetText | ( | FPDF_TEXTPAGE | text_page, |
| int | start_index, | ||
| int | count, | ||
| unsigned short * | result | ||
| ) |
Extract unicode text string from the page.
| [in] | text_page | - Handle to a text page information structure. Returned by FPDFText_LoadPage function. |
| [in] | start_index | - Index for the start character. |
| [in] | count | - Number of characters to be extracted. |
| [in] | result | - A buffer (allocated by application) receiving the extracted unicodes. The size of the buffer must be able to hold the number of characters plus a terminator. |
| Number | of characters written into the result buffer, excluding the trailing terminator. |
| DLLEXPORT unsigned int STDCALL FPDFText_GetUnicode | ( | FPDF_TEXTPAGE | text_page, |
| int | index | ||
| ) |
Get Unicode of a character in a page.
| [in] | text_page | - Handle to a text page information structure. Returned by FPDFText_LoadPage function. |
| [in] | index | - Zero-based index of the character. |
| The | Unicode of the character. If a character is not encoded in Unicode and cannot be convert to Unicode by the Foxit engine, the return value will be zero. |
| DLLEXPORT FPDF_BOOL STDCALL FPDFText_IsGenerated | ( | FPDF_TEXTPAGE | text_page, |
| int | index | ||
| ) |
Indicate whether a character is a generated character.
| [in] | text_page | - Handle to a text page information structure. Returned by FPDFText_LoadPage function. |
| [in] | index | - Zero-based index of the character. |
| TRUE | indicates a generated character and FALSE indicates an actual character in the PDF page. |
| DLLEXPORT FPDF_TEXTPAGE STDCALL FPDFText_LoadPage | ( | FPDF_PAGE | page | ) |
Prepare information about all characters in a page.
| [in] | page | - Handle to the page. Returned by FPDF_LoadPage function (in view module). |
| A | handle to the text page information structure. NULL if something goes wrong. |
| DLLEXPORT int STDCALL FPDFText_PageToText | ( | FPDF_DOCUMENT | doc, |
| int | page_index, | ||
| wchar_t * | buf, | ||
| int | size, | ||
| int | flag | ||
| ) |
_FPDFTEXT_H_
Convert a PDF page data to a text buffer.
| [in] | doc | - Handle to document. Returned by FPDF_LoadDocument function. |
| [in] | page_index | - Index number of the page. 0 for the first page. |
| [in] | buf | - An output buffer used to hold the text of the page. |
| [in] | size | - Size of the buffer. |
| [in] | flag | - 0 for stream order, 1 for appearance order. |
| If | buf is NULL or size is zero, number of characters (not bytes) needed, otherwise, number of characters copied into the buf. |
| DLLEXPORT FPDF_BOOL STDCALL FPDFText_PDFToText | ( | const char * | sour_file, |
| const char * | dest_file, | ||
| int | flag, | ||
| FPDF_BYTESTRING | password | ||
| ) |
Convert a PDF file to a TXT File.
| [in] | sour_file | - Path to the PDF file you want to Convert. |
| [in] | dest_file | - The path of the file you want to save. |
| [in] | flag | - 0 for stream order, 1 for appearance order. |
| [in] | password | - A string used as the password for PDF file. If no password needed, empty or NULL can be used. |
| TURE | for succeed, False for failed. |