Text Search PDF
How to Search for Specific Words & Phrases in PDFs
Search for words or phrases in an entire PDF document or set up to search specific page ranges with Adobe PDF Library. Output document highlights search text with a box that surrounds the entire phrase.
This is often used in conjunction with the Redaction solution.
Get Free Trial
C++
C#
Java
C++
#define DEF_SEARCH_REGEX R"((1-)?(\()?\d{3}(\))?(\s)?(-)?\d{3}-\d{4})"
std::string csSearchRegex(szRegex != NULL ? szRegex : DEF_SEARCH_REGEX);
DURING
APDFLDoc document(csInputFile.c_str(), true);
PDWordFinderConfigRec wfConfig;
memset(&wfConfig, 0, sizeof(wfConfig)); // Always do this!
wfConfig.recSize = sizeof(PDWordFinderConfigRec); //...and this!
// Need to set this to true so phrases will be concatenated properly
wfConfig.noHyphenDetection = true;
PDDocTextFinder matchFinder = PDDocTextFinderCreate(&wfConfig);
PDDocTextFinderMatchList matchList = PDDocTextFinderAcquireMatchList(
matchFinder, document.getPDDoc(), PDAllPages, 0, csSearchRegex.c_str());
// Iterate over the matches that were found by DocTextFinder
for (ASUns32 matchInstance = 0; matchInstance < matchList.numMatches; ++matchInstance) {
std::cout << matchList.matches[matchInstance].phrase << std::endl;
}
// Release resources here.
PDDocTextFinderReleaseMatchList(matchFinder);
PDDocTextFinderDestroy(matchFinder);
HANDLER
errCode = ERRORCODE;
libInit.displayError(errCode);
END_HANDLER
C#
String sRegex = @"((1-)?(\()?\d{3}(\))?(\s)?(-)?\d{3}-\d{4})";
using (Document doc = new Document(sInput))
{
// Create a WordFinder configuration
var wordConfig = new WordFinderConfig()
{
NoHyphenDetection = true, // Need to set this to true so phrases will be concatenated properly
};
// Create a DocTextFinder with the default wordfinder parameters
using (var docTextFinder = new DocTextFinder(doc, wordConfig))
{
// Retrieve the phrases and words matching a regular expression
var docMatches = docTextFinder.GetMatchList(Document.AllPages, 0, sRegex);
foreach (DocTextFinderMatch wInfo in docMatches)
{
// Show the matching phrase and the page where it was found.
Console.WriteLine("{0}: {1}", wInfo.QuadInfo[0].PageNum, wInfo.MatchString);
}
}
}
Java
String sRegex = "((1-)?(\\()?\\d{3}(\\))?(\\s)?(-)?\\d{3}-\\d{4})";
Document doc = new Document(sInput);
WordFinderConfig wordConfig = new WordFinderConfig()
{{
setNoHyphenDetection(true); // Need to set this to true so phrases will be concatenated properly
}};
DocTextFinder docTextFinder = new DocTextFinder(doc, wordConfig);
for (DocTextFinderMatch wInfo : docTextFinder.getMatchList(Document.ALL_PAGES, 0, sRegex)) {
// Show the matching phrase and the page it's found on.
System.out.println(String.format("%d: %s",wInfo.getQuadInfo().get(0).getPageNum(), wInfo.getMatchString()));
}