Redaction in PDFs
How to Redact Information in a PDF
Redacting information in PDFs, particularly in the digital age, is not simply adding a black bar across text – the text itself is still searchable. True redaction requires that text is completely removed from the page and replaced with black bars. Redaction can also be set up to remove metadata embedded into the document for full and complete security of PDF assets.
Get Free Trial
C++
C#
Java
C++
#define DEF_SEARCH_REGEX R"((1-)?(\()?\d{3}(\))?(\s)?(-)?\d{3}-\d{4})"
std::string csSearchRegex(szRegex != NULL ? szRegex : DEF_SEARCH_REGEX);
PDColorValueRec cvRedRec;
cvRedRec.space = PDDeviceRGB;
cvRedRec.value[0] = fixedOne;
cvRedRec.value[1] = fixedZero;
cvRedRec.value[2] = fixedZero;
DURING
APDFLDoc document(csInputFile.c_str(), true);
PDWordFinderConfigRec wfConfig;
memset(&wfConfig, 0, sizeof(wfConfig)); // Always do this!
wfConfig.recSize = sizeof(PDWordFinderConfigRec); //...and this!
wfConfig.noHyphenDetection = true; // Need to set this to true so phrases will be concatenated properly
// Create the DocTextFinder object and use it to find matches.
PDDocTextFinder matchFinder = PDDocTextFinderCreate(&wfConfig);
PDDocTextFinderMatchList matchList = PDDocTextFinderAcquireMatchList(
matchFinder, document.getPDDoc(), PDAllPages, 0, csSearchRegex.c_str());
// Iterate over the matches that were found by DocTextFinder.
for (ASUns32 matchInstance = 0; matchInstance < matchList.numMatches; ++matchInstance) {
PDDocTextFinderMatchRec match = matchList.matches[matchInstance];
for (ASUns32 quadInstance = 0; quadInstance < match.numQuads; ++quadInstance) {
PDRedactParamsRec redactParams;
memset(&redactParams, 0, sizeof(PDRedactParamsRec));
redactParams.size = sizeof(PDRedactParamsRec);
redactParams.colorVal = &cvRedRec;
redactParams.horizAlign = kPDHorizCenter;
redactParams.fillColor = &cvRedRec;
// In the "normal" or "unredacted" appearance, fill each with 100% Red
redactParams.fillOpacity = fixedQuarter;
// at 25% opacity.
redactParams.pageNum = match.quads[0].pageNum;
// The page that the redaction will be applied to.
std::vector quadv;
for (unsigned int j = 0; j < match.numQuads; j++) {
if (match.quads[j].pageNum == redactParams.pageNum)
quadv.push_back(match.quads[j].boundingQuad);
}
redactParams.redactQuads = &quadv.front();
redactParams.numQuads = static_cast(quadv.size());
// The number of entries in the vector or array.
PDAnnot redactAnnot = PDDocCreateRedaction(document.getPDDoc(), &redactParams);
}
}
if (!bApply) {
// Save the document with the redactions created, but not applied.
document.saveDoc(csOutputFile.c_str(), PDSaveIncremental | PDSaveCopy);
} else {
// Apply the redactions, and Save.
PDDocApplyRedactions(document.getPDDoc(), NULL);
document.saveDoc(csOutputFile.c_str(), PDSaveFull | PDSaveCollectGarbage, PDSaveCompressed);
}
// Release resources here.
PDDocTextFinderReleaseMatchList(matchFinder);
PDDocTextFinderDestroy(matchFinder);
HANDLER
errCode = ERRORCODE;
libInit.displayError(errCode);
END_HANDLER
C#
String sRegex = @"((1-)?(\()?\d{3}(\))?(\s)?(-)?\d{3}-\d{4})";
Color red = new Color(1.0, 0.0, 0.0);
using (var doc = new Document(sInput))
{
var wordConfig = new WordFinderConfig()
{
NoHyphenDetection = true, // Need to set this to true so phrases will be concatenated properly
};
using (var docTextFinder = new DocTextFinder(doc, wordConfig))
{
// Retrieve the phrases and words matching a regular expression
foreach (DocTextFinderMatch wInfo in docTextFinder.GetMatchList(Document.AllPages, 0, sRegex))
foreach (var qInfo in wInfo.QuadInfo)
using (var docpage = doc.GetPage(qInfo.PageNum))
{
Redaction red_fill = new Redaction(docpage, qInfo.Quads, red)
{
FillNormal = true,
Opacity = 0.25,
};
red_fill.SetFillColor(red, 0.25);
}
}
// Save the document with the highlighted matched strings
if (!bApply)
{
//Redaction annotations appended to the end of a copy of the original PDF.
doc.Save(SaveFlags.Incremental | SaveFlags.Copy, sOutput);
}
else
{
// Apply all the redactions in the document
doc.ApplyRedactions();
// Save the document with the redacted matched strings
doc.Save(SaveFlags.Full | SaveFlags.CollectGarbage, sOutput);
}
}
Java
String sRegex = "((1-)?(\\()?\\d{3}(\\))?(\\s)?(-)?\\d{3}-\\d{4})";
Document doc = new Document(sInput);
Color red = new Color(1.0, 0.0, 0.0);
WordFinderConfig wordConfig = new WordFinderConfig() {{
// Need to set this to true so phrases will be concatenated properly
setNoHyphenDetection(true);
}};
// Create a DocTextFinder with the default wordfinder parameters
DocTextFinder docTextFinder = new DocTextFinder(doc, wordConfig);
// Retrieve the phrases matching a regular expression
for (DocTextFinderMatch wInfo : docTextFinder.getMatchList(Document.ALL_PAGES, 0, sRegex)) {
// Iterate through the quad info and create highlights
for (DocTextFinderQuadInfo qInfo : wInfo.getQuadInfo()) {
Page docpage = doc.getPage(qInfo.getPageNum());
Redaction red_fill = new Redaction(docpage, qInfo.getQuads(), red) {{
setFillNormal(true);
setOpacity(0.25);
}};
/* fill the "normal" appearance with 25% red */
red_fill.setFillColor(red, 0.25);
docpage.delete();
}
}
if (!bApply) {
doc.save(EnumSet.of(SaveFlags.INCREMENTAL, SaveFlags.COPY), sOutput);
} else {
// Apply all the redactions in the document
doc.applyRedactions();
// Save the document with the redacted matched strings
doc.save(EnumSet.of(SaveFlags.FULL, SaveFlags.COLLECT_GARBAGE), sOutput);
}