Skip to content
Open
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,65 @@
public class DocumentProcessor {
private static final Logger LOGGER = Logger.getLogger(DocumentProcessor.class.getCanonicalName());

/**
* Releases PDF resources to prevent file locks and memory leaks.
* - Closes PDDocument to free OS file handles (required for file deletion)
* - Clears static containers to remove lingering references
* Should always be called in a finally block.
*/
private static void closePdfResources() throws Exception {
Exception closeFailure = null;
PDDocument document = StaticResources.getDocument();
if (document != null) {
try {
document.close();
} catch (Exception e) {
closeFailure = e;
}
}

try {
StaticLayoutContainers.closeContrastRatioConsumer();
} catch (Exception e) {
if (closeFailure != null) {
closeFailure.addSuppressed(e);
} else {
closeFailure = e;
}
}

// cleanup static containers
clearCleanupStep("StaticResources", StaticResources::clear);
clearCleanupStep("StaticContainers", () -> StaticContainers.updateContainers(null));
clearCleanupStep(
"GFStaticContainers",
org.verapdf.gf.model.impl.containers.StaticContainers::clearAllContainers
);
clearCleanupStep("StaticLayoutContainers", StaticLayoutContainers::clearContainers);
clearCleanupStep("StaticStorages", StaticStorages::clearAllContainers);
clearCleanupStep("StaticCoreContainers", StaticCoreContainers::clearAllContainers);
clearCleanupStep("StaticXmpCoreContainers", StaticXmpCoreContainers::clearAllContainers);
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Comment thread
coderabbitai[bot] marked this conversation as resolved.

if (closeFailure != null) {
throw closeFailure;
}
}

/**
* Executes a cleanup step safely without interrupting subsequent steps.
*
* Each cleanup action is isolated so that a failure in one step
* does not prevent the remaining cleanup operations from running.
* Errors are logged for debugging purposes.
*/
private static void clearCleanupStep(String name, Runnable cleanup) {
try {
cleanup.run();
} catch (Exception e) {
LOGGER.log(Level.WARNING, "Error clearing " + name, e);
}
}

/**
* Processes a PDF file and generates the configured outputs.
*
Expand All @@ -76,25 +135,47 @@ public class DocumentProcessor {
* @throws IOException if unable to process the file
*/
public static void processFile(String inputPdfName, Config config) throws IOException {
preprocessing(inputPdfName, config);
calculateDocumentInfo();
Set<Integer> pagesToProcess = getValidPageNumbers(config);
List<List<IObject>> contents;
if (StaticLayoutContainers.isUseStructTree()) {
contents = TaggedDocumentProcessor.processDocument(inputPdfName, config, pagesToProcess);
} else if (config.isHybridEnabled()) {
contents = HybridDocumentProcessor.processDocument(inputPdfName, config, pagesToProcess);
} else {
contents = processDocument(inputPdfName, config, pagesToProcess);
}
if (config.needsStructuredProcessing()) {
sortContents(contents, config);
Throwable processingFailure = null;
try {
preprocessing(inputPdfName, config);
calculateDocumentInfo();
Set<Integer> pagesToProcess = getValidPageNumbers(config);
List<List<IObject>> contents;
if (StaticLayoutContainers.isUseStructTree()) {
contents = TaggedDocumentProcessor.processDocument(inputPdfName, config, pagesToProcess);
} else if (config.isHybridEnabled()) {
contents = HybridDocumentProcessor.processDocument(inputPdfName, config, pagesToProcess);
} else {
contents = processDocument(inputPdfName, config, pagesToProcess);
}
if (config.needsStructuredProcessing()) {
sortContents(contents, config);
}
ContentSanitizer contentSanitizer = new ContentSanitizer(config.getFilterConfig().getFilterRules(),
config.getFilterConfig().isFilterSensitiveData());
contentSanitizer.sanitizeContents(contents);
generateOutputs(inputPdfName, contents, config);
} catch (IOException | RuntimeException | Error e) {
processingFailure = e;
throw e;
} finally {
// Ensures resources are always released, even if processing throws an exception
try {
closePdfResources();
} catch (Exception closeException) {
LOGGER.log(Level.WARNING, "Error during PDF resource cleanup", closeException);
if (processingFailure != null) {
processingFailure.addSuppressed(closeException);
} else {
if (closeException instanceof IOException) {
throw (IOException) closeException;
} else {
throw new IOException("Failed to close PDF resources", closeException);
}
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.
}
}
ContentSanitizer contentSanitizer = new ContentSanitizer(config.getFilterConfig().getFilterRules(),
config.getFilterConfig().isFilterSensitiveData());
contentSanitizer.sanitizeContents(contents);
generateOutputs(inputPdfName, contents, config);
}
}

/**
* Validates and filters page numbers from config against actual document pages.
Expand Down