From 54bd1946a9c5f08dcebfc597c428b4b3be846451 Mon Sep 17 00:00:00 2001 From: Jordan Padams Date: Thu, 30 Jan 2020 15:37:44 -0800 Subject: [PATCH 1/3] Fix multi-threading bug to wait until threads have completed Update ExecutorService to use Future object to track currently executing threads. Previous implementation set arbitrary timeout to 100 seconds, but it looks like it was potentially killing the reporting thread, not necessarily the validation execution thread. Resolves #180 --- .../validate/rule/pds4/LabelInFolderRule.java | 33 +++++++++++++++---- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/src/main/java/gov/nasa/pds/tools/validate/rule/pds4/LabelInFolderRule.java b/src/main/java/gov/nasa/pds/tools/validate/rule/pds4/LabelInFolderRule.java index f0bf01625..5985f6812 100644 --- a/src/main/java/gov/nasa/pds/tools/validate/rule/pds4/LabelInFolderRule.java +++ b/src/main/java/gov/nasa/pds/tools/validate/rule/pds4/LabelInFolderRule.java @@ -13,15 +13,23 @@ // $Id$ package gov.nasa.pds.tools.validate.rule.pds4; +import gov.nasa.pds.tools.label.ExceptionType; import gov.nasa.pds.tools.util.Utility; +import gov.nasa.pds.tools.validate.ProblemDefinition; +import gov.nasa.pds.tools.validate.ProblemType; import gov.nasa.pds.tools.validate.Target; +import gov.nasa.pds.tools.validate.ValidationProblem; import gov.nasa.pds.tools.validate.crawler.Crawler; import gov.nasa.pds.tools.validate.rule.*; import java.io.IOException; import java.net.URISyntaxException; +import java.net.URL; +import java.util.ArrayList; +import java.util.List; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; +import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; /** @@ -31,8 +39,10 @@ public class LabelInFolderRule extends AbstractValidationRule { private static final String XML_SUFFIX = ".xml"; + private static final long THREAD_TIMEOUT = 100; // HOURS private ExecutorService validateThreadExecutor; + List> futures = new ArrayList>(); @Override public boolean isApplicable(String location) { @@ -45,6 +55,7 @@ public boolean isApplicable(String location) { @ValidationTest public void validateLabelsInFolder() { validateThreadExecutor = Executors.newFixedThreadPool(1); + ValidationRule labelRuleTmp = null; // issue_124: @@ -57,36 +68,44 @@ public void validateLabelsInFolder() { final ValidationRule labelRule = labelRuleTmp; Crawler crawler = getContext().getCrawler(); + URL target = getTarget(); try { int targetCount = 0; - for (Target t : crawler.crawl(getTarget(), false, getContext().getFileFilters())) { + for (Target t : crawler.crawl(target, false, getContext().getFileFilters())) { - validateThreadExecutor.execute(new Runnable() { + Future f = validateThreadExecutor.submit(new Runnable() { public void run() { //System.out.println("\nVALIDATING : " + t.getUrl()); try { labelRule.execute(getChildContext(t.getUrl())); } catch (Exception e) { - System.out.println("ERROR: " + e.getMessage()); - e.printStackTrace(); reportError(GenericProblems.UNCAUGHT_EXCEPTION, t.getUrl(), -1, -1, e.getMessage()); + e.printStackTrace(); } } }); + futures.add(f); + targetCount++; } // end for try { + // Wait for threads to complete + for(Future future : futures) + future.get(); + validateThreadExecutor.shutdown(); - validateThreadExecutor.awaitTermination(100, TimeUnit.SECONDS); - } catch (InterruptedException e) { + } catch (Exception e) { e.printStackTrace(); } -// System.out.println("COMPLETED " + targetCount + " targets."); + getListener().addProblem( + new ValidationProblem( + new ProblemDefinition(ExceptionType.DEBUG, + ProblemType.GENERAL_INFO, "Targets completed: " + targetCount), target)); } catch (IOException io) { reportError(GenericProblems.UNCAUGHT_EXCEPTION, getContext().getTarget(), -1, -1, io.getMessage()); From 0e12810632d10a9fd53d55cb44a875ecc9c99730 Mon Sep 17 00:00:00 2001 From: Jordan Padams Date: Thu, 30 Jan 2020 15:39:17 -0800 Subject: [PATCH 2/3] Remove md5 generation from validation when not possible Original implementation generated MD5 checksum for all products, even if there was no checksum validation possible in the validation run (e.g. no checksum in label and no checksum manifest provided). Resolves #178 --- .../pds4/FileReferenceValidationRule.java | 144 +++++++++--------- 1 file changed, 74 insertions(+), 70 deletions(-) diff --git a/src/main/java/gov/nasa/pds/tools/validate/rule/pds4/FileReferenceValidationRule.java b/src/main/java/gov/nasa/pds/tools/validate/rule/pds4/FileReferenceValidationRule.java index 690c88651..6e3b9ac2c 100644 --- a/src/main/java/gov/nasa/pds/tools/validate/rule/pds4/FileReferenceValidationRule.java +++ b/src/main/java/gov/nasa/pds/tools/validate/rule/pds4/FileReferenceValidationRule.java @@ -330,79 +330,83 @@ private List handleChecksum(ValidationTarget target, URL file private List handleChecksum(ValidationTarget target, URL urlRef, TinyNodeImpl fileObject, String checksumInLabel) throws Exception { - List messages = new ArrayList(); - String generatedChecksum = MD5Checksum.getMD5Checksum(urlRef); - int lineNumber = -1; - if (fileObject != null) { - lineNumber = fileObject.getLineNumber(); - } - if (!checksumManifest.isEmpty()) { - if (checksumManifest.containsKey(urlRef)) { - String suppliedChecksum = checksumManifest.get(urlRef); - String message = ""; - ProblemType type = null; - ExceptionType severity = null; - if (!suppliedChecksum.equals(generatedChecksum)) { - message = "Generated checksum '" + generatedChecksum - + "' does not match supplied checksum '" - + suppliedChecksum + "' in the manifest for '" - + urlRef + "'"; - severity = ExceptionType.ERROR; - type = ProblemType.CHECKSUM_MISMATCH; - } else { - message = "Generated checksum '" + generatedChecksum - + "' matches the supplied checksum '" + suppliedChecksum - + "' in the manifest for '" + urlRef - + "'"; - severity = ExceptionType.INFO; - type = ProblemType.CHECKSUM_MATCHES; - } - if (!message.isEmpty()) { - ProblemDefinition def = new ProblemDefinition(severity, type, - message); - messages.add(new ValidationProblem(def, target, lineNumber, -1)); + if (checksumManifest.isEmpty() && (checksumInLabel == null || checksumInLabel.isEmpty())) { + return new ArrayList(); + } else { + List messages = new ArrayList(); + String generatedChecksum = MD5Checksum.getMD5Checksum(urlRef); + int lineNumber = -1; + if (fileObject != null) { + lineNumber = fileObject.getLineNumber(); } - } else { - String message = "No checksum found in the manifest for '" - + urlRef + "'"; - ProblemDefinition def = new ProblemDefinition( - ExceptionType.ERROR, ProblemType.MISSING_CHECKSUM, message); - messages.add(new ValidationProblem(def, target, lineNumber, -1)); - } - } - if (checksumInLabel != null) { - if (!checksumInLabel.isEmpty()) { - String message = ""; - ProblemType type = null; - ExceptionType severity = null; - if (!generatedChecksum.equals(checksumInLabel)) { - message = "Generated checksum '" + generatedChecksum - + "' does not match supplied checksum '" - + checksumInLabel + "' in the product label for '" - + urlRef + "'"; - type = ProblemType.CHECKSUM_MISMATCH; - severity = ExceptionType.ERROR; - } else { - message = "Generated checksum '" + generatedChecksum - + "' matches the supplied checksum '" + checksumInLabel - + "' in the product label for '" - + urlRef + "'"; - type = ProblemType.CHECKSUM_MATCHES; - severity = ExceptionType.INFO; + if (!checksumManifest.isEmpty()) { + if (checksumManifest.containsKey(urlRef)) { + String suppliedChecksum = checksumManifest.get(urlRef); + String message = ""; + ProblemType type = null; + ExceptionType severity = null; + if (!suppliedChecksum.equals(generatedChecksum)) { + message = "Generated checksum '" + generatedChecksum + + "' does not match supplied checksum '" + + suppliedChecksum + "' in the manifest for '" + + urlRef + "'"; + severity = ExceptionType.ERROR; + type = ProblemType.CHECKSUM_MISMATCH; + } else { + message = "Generated checksum '" + generatedChecksum + + "' matches the supplied checksum '" + suppliedChecksum + + "' in the manifest for '" + urlRef + + "'"; + severity = ExceptionType.INFO; + type = ProblemType.CHECKSUM_MATCHES; + } + if (!message.isEmpty()) { + ProblemDefinition def = new ProblemDefinition(severity, type, + message); + messages.add(new ValidationProblem(def, target, lineNumber, -1)); + } + } else { + String message = "No checksum found in the manifest for '" + + urlRef + "'"; + ProblemDefinition def = new ProblemDefinition( + ExceptionType.ERROR, ProblemType.MISSING_CHECKSUM, message); + messages.add(new ValidationProblem(def, target, lineNumber, -1)); + } } - if (!message.isEmpty()) { - ProblemDefinition def = new ProblemDefinition(severity, type, - message); - messages.add(new ValidationProblem(def, target, lineNumber, -1)); + if (checksumInLabel != null) { + if (!checksumInLabel.isEmpty()) { + String message = ""; + ProblemType type = null; + ExceptionType severity = null; + if (!generatedChecksum.equals(checksumInLabel)) { + message = "Generated checksum '" + generatedChecksum + + "' does not match supplied checksum '" + + checksumInLabel + "' in the product label for '" + + urlRef + "'"; + type = ProblemType.CHECKSUM_MISMATCH; + severity = ExceptionType.ERROR; + } else { + message = "Generated checksum '" + generatedChecksum + + "' matches the supplied checksum '" + checksumInLabel + + "' in the product label for '" + + urlRef + "'"; + type = ProblemType.CHECKSUM_MATCHES; + severity = ExceptionType.INFO; + } + if (!message.isEmpty()) { + ProblemDefinition def = new ProblemDefinition(severity, type, + message); + messages.add(new ValidationProblem(def, target, lineNumber, -1)); + } + } else { + String message = "No checksum to compare against in the product label " + + "for '" + urlRef + "'"; + ProblemDefinition def = new ProblemDefinition( + ExceptionType.INFO, ProblemType.MISSING_CHECKSUM_INFO, message); + messages.add(new ValidationProblem(def, target, lineNumber, -1)); + } } - } else { - String message = "No checksum to compare against in the product label " - + "for '" + urlRef + "'"; - ProblemDefinition def = new ProblemDefinition( - ExceptionType.INFO, ProblemType.MISSING_CHECKSUM_INFO, message); - messages.add(new ValidationProblem(def, target, lineNumber, -1)); - } + return messages; } - return messages; } } From 58d5db0ba6a9770853a88892ca2ffae8a9f76dc2 Mon Sep 17 00:00:00 2001 From: Jordan Padams Date: Thu, 30 Jan 2020 15:41:10 -0800 Subject: [PATCH 3/3] Remove dead code --- .../validate/FileReferenceValidator.java | 410 ------------------ 1 file changed, 410 deletions(-) delete mode 100644 src/main/java/gov/nasa/pds/tools/label/validate/FileReferenceValidator.java diff --git a/src/main/java/gov/nasa/pds/tools/label/validate/FileReferenceValidator.java b/src/main/java/gov/nasa/pds/tools/label/validate/FileReferenceValidator.java deleted file mode 100644 index 4db529460..000000000 --- a/src/main/java/gov/nasa/pds/tools/label/validate/FileReferenceValidator.java +++ /dev/null @@ -1,410 +0,0 @@ -// Copyright 2009-2018, by the California Institute of Technology. -// ALL RIGHTS RESERVED. United States Government Sponsorship acknowledged. -// Any commercial use must be negotiated with the Office of Technology -// Transfer at the California Institute of Technology. -// -// This software is subject to U. S. export control laws and regulations -// (22 C.F.R. 120-130 and 15 C.F.R. 730-774). To the extent that the software -// is subject to U.S. export control laws and regulations, the recipient has -// the responsibility to obtain export licenses or other export authority as -// may be required before exporting such information to foreign countries or -// providing access to foreign nationals. -// -// $Id$ -// -package gov.nasa.pds.tools.label.validate; - -import gov.nasa.pds.tools.label.ExceptionType; -import gov.nasa.pds.tools.util.MD5Checksum; -import gov.nasa.pds.tools.util.XMLExtractor; -import gov.nasa.pds.tools.validate.ProblemDefinition; -import gov.nasa.pds.tools.validate.ProblemHandler; -import gov.nasa.pds.tools.validate.ProblemType; -import gov.nasa.pds.tools.validate.ValidationProblem; - -import java.io.File; -import java.io.IOException; -import java.net.MalformedURLException; -import java.net.URL; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import javax.xml.xpath.XPathExpressionException; - -import org.apache.commons.io.FileUtils; -import org.apache.commons.io.FilenameUtils; - -import net.sf.saxon.om.DocumentInfo; -import net.sf.saxon.tree.tiny.TinyNodeImpl; - -/** - * Validator class that looks for file references in the PDS4 product label - * and performs the following checks: - * - * - Verify that the generated checksum matches the supplied checksum, - * if provided - * - * - Verify that the casing of the file reference matches the file name - * casing on the file system - * - * @author mcayanan - * - */ -public class FileReferenceValidator implements DocumentValidator { - - /** - * XPath to the file references within a PDS4 data product label. - */ - private final String FILE_OBJECTS_XPATH = - "//*[starts-with(name(), 'File_Area')]/File | //Document_File"; - - private Map checksumManifest; - - public FileReferenceValidator() { - checksumManifest = new HashMap(); - } - - @Override - public boolean validate(ProblemHandler handler, DocumentInfo xml) { - boolean passFlag = true; - URL systemIdUrl = null; - try { - systemIdUrl = new URL(xml.getSystemId()); - } catch (MalformedURLException e1) { - // Ignore. Should not happen!!! - } - List problems = new ArrayList(); - try { - // Perform checksum validation on the label itself. - problems.addAll( - handleChecksum(systemIdUrl, systemIdUrl) - ); - } catch (Exception e) { - problems.add(new ValidationProblem( - new ProblemDefinition(ExceptionType.ERROR, - ProblemType.INTERNAL_ERROR, - "Error occurred while calculating checksum for " - + FilenameUtils.getName(xml.getSystemId()) + ": " - + e.getMessage()), - systemIdUrl)); - passFlag = false; - } - try { - XMLExtractor extractor = new XMLExtractor(xml); - URL labelUrl = new URL(xml.getSystemId()); - URL parent = labelUrl.toURI().getPath().endsWith("/") ? - labelUrl.toURI().resolve("..").toURL() : - labelUrl.toURI().resolve(".").toURL(); - try { - // Search for "xml:base" attributes within the merged XML. This will - // tell us if there are any xincludes. - List xincludes = extractor.getValuesFromDoc("//@xml:base"); - for (String xinclude : xincludes) { - URL xincludeUrl = new URL(parent, xinclude); - try { - xincludeUrl.openStream().close(); - // Check that the casing of the file reference matches the - // casing of the file located on the file system. - try { - File fileRef = FileUtils.toFile(xincludeUrl); - if (fileRef != null && - !fileRef.getCanonicalPath().endsWith(fileRef.getName())) { - handler.addProblem(new ValidationProblem( - new ProblemDefinition( - ExceptionType.WARNING, - ProblemType.FILE_REFERENCE_CASE_MISMATCH, - "File reference'" + fileRef.toString() - + "' exists but the case doesn't match."), - systemIdUrl)); - } - } catch (IOException io) { - problems.add(new ValidationProblem( - new ProblemDefinition(ExceptionType.FATAL, - ProblemType.INTERNAL_ERROR, - "Error occurred while checking for the existence of the " - + "uri reference '" + xincludeUrl.toString() + "': " - + io.getMessage()), - systemIdUrl)); - passFlag = false; - } - try { - // Perform checksum validation on the xincludes. - problems.addAll( - handleChecksum(systemIdUrl, xincludeUrl) - ); - } catch (Exception e) { - problems.add(new ValidationProblem( - new ProblemDefinition(ExceptionType.ERROR, - ProblemType.INTERNAL_ERROR, - "Error occurred while calculating checksum for " - + FilenameUtils.getName(xincludeUrl.toString()) + ": " - + e.getMessage()), - systemIdUrl)); - passFlag = false; - } - } catch (IOException io) { - problems.add(new ValidationProblem( - new ProblemDefinition( - ExceptionType.ERROR, - ProblemType.MISSING_REFERENCED_FILE, - "URI reference does not exist: " + xincludeUrl.toString()), - systemIdUrl)); - passFlag = false; - } - } - List fileObjects = extractor.getNodesFromDoc( - FILE_OBJECTS_XPATH); - for (TinyNodeImpl fileObject : fileObjects) { - String name = ""; - String checksum = ""; - String directory = ""; - List children = new ArrayList(); - try { - children = extractor.getNodesFromItem("*", fileObject); - } catch (XPathExpressionException xpe) { - problems.add(new ValidationProblem( - new ProblemDefinition(ExceptionType.FATAL, - ProblemType.INTERNAL_ERROR, - "Problem occurred while trying to get all the children " - + "of the file object node: " + xpe.getMessage()), - systemIdUrl, - fileObject.getLineNumber(), - -1)); - passFlag = false; - continue; - } - for (TinyNodeImpl child : children) { - if ("file_name".equals(child.getLocalPart())) { - name = child.getStringValue(); - } else if ("md5_checksum".equals(child.getLocalPart())) { - checksum = child.getStringValue(); - } else if ("directory_path_name".equals(child.getLocalPart())) { - directory = child.getStringValue(); - } - } - if (name.isEmpty()) { - problems.add(new ValidationProblem( - new ProblemDefinition(ExceptionType.ERROR, - ProblemType.INTERNAL_ERROR, - "Missing 'file_name' element tag"), - systemIdUrl, - fileObject.getLineNumber(), - -1) - ); - passFlag = false; - } else { - URL urlRef = null; - if (!directory.isEmpty()) { - urlRef = new URL(parent, directory + "/" + name); - } else { - urlRef = new URL(parent, name); - } - try { - urlRef.openStream().close(); - // Check that the casing of the file reference matches the - // casing of the file located on the file system. - try { - File fileRef = FileUtils.toFile(urlRef); - if (fileRef != null && - !fileRef.getCanonicalPath().endsWith(fileRef.getName())) { - handler.addProblem( - new ValidationProblem( - new ProblemDefinition( - ExceptionType.WARNING, - ProblemType.FILE_REFERENCE_CASE_MISMATCH, - "File reference'" + fileRef.toString() - + "' exists but the case doesn't match."), - systemIdUrl, - fileObject.getLineNumber(), - -1)); - } - } catch (IOException io) { - problems.add(new ValidationProblem( - new ProblemDefinition(ExceptionType.FATAL, - ProblemType.INTERNAL_ERROR, - "Error occurred while checking for the existence " - + "of the uri reference '" + urlRef.toString() + "': " - + io.getMessage()), - systemIdUrl, - fileObject.getLineNumber(), - -1)); - passFlag = false; - } - try { - problems.addAll(handleChecksum(systemIdUrl, urlRef, - fileObject, checksum)); - } catch (Exception e) { - problems.add(new ValidationProblem( - new ProblemDefinition(ExceptionType.ERROR, - ProblemType.INTERNAL_ERROR, - "Error occurred while calculating checksum for " - + FilenameUtils.getName(urlRef.toString()) + ": " - + e.getMessage()), - systemIdUrl, - fileObject.getLineNumber(), - -1)); - passFlag = false; - } - } catch (IOException io) { - problems.add(new ValidationProblem( - new ProblemDefinition( - ExceptionType.ERROR, - ProblemType.MISSING_REFERENCED_FILE, - "URI reference does not exist: " + urlRef.toString()), - systemIdUrl, - fileObject.getLineNumber(), - -1)); - passFlag = false; - } - } - } - } catch (XPathExpressionException xpe) { - problems.add(new ValidationProblem( - new ProblemDefinition(ExceptionType.FATAL, - ProblemType.INTERNAL_ERROR, - "Error occurred while evaluating the following xpath " - + "expression '" + FILE_OBJECTS_XPATH + "': " - + xpe.getMessage()), - systemIdUrl)); - passFlag = false; - } - } catch (Exception e) { - problems.add(new ValidationProblem( - new ProblemDefinition(ExceptionType.FATAL, - ProblemType.INTERNAL_ERROR, - "Error occurred while reading the uri: " + e.getMessage()), - systemIdUrl) - ); - passFlag = false; - } - // Add the problems to the problem handler. - for (ValidationProblem problem : problems) { - handler.addProblem(problem); - } - return passFlag; - } - - private List handleChecksum(URL systemId, URL fileRef) - throws Exception { - return handleChecksum(systemId, fileRef, null, null); - } - - /** - * Method to handle checksum processing. - * - * @param systemId The source (product label). - * @param urlRef The uri of the file being processed. - * @param fileObject The Node representation of the file object. - * @param checksumInLabel Supplied checksum in the label. Can pass in - * an empty value. If a null value is passed instead, it tells the - * method to not do a check to see if the generated value matches - * a supplied value. This would be in cases where a label's own - * checksum is being validated. - * - * @return The resulting checksum. This will either be the generated value, - * the value from the manifest file (if supplied), or the value from the - * supplied value in the product label (if provided). - * - * @throws Exception If there was an error generating the checksum - * (if the flag was on) - */ - private List handleChecksum(URL systemId, URL urlRef, - TinyNodeImpl fileObject, String checksumInLabel) - throws Exception { - List messages = new ArrayList(); - String generatedChecksum = MD5Checksum.getMD5Checksum(urlRef); - int lineNumber = -1; - if (fileObject != null) { - lineNumber = fileObject.getLineNumber(); - } - if (!checksumManifest.isEmpty()) { - if (checksumManifest.containsKey(urlRef)) { - String suppliedChecksum = checksumManifest.get(urlRef); - if (!suppliedChecksum.equals(generatedChecksum)) { - messages.add(new ValidationProblem( - new ProblemDefinition( - ExceptionType.ERROR, - ProblemType.CHECKSUM_MISMATCH, - "Generated checksum '" + generatedChecksum - + "' does not match supplied checksum '" - + suppliedChecksum + "' in the manifest for '" - + urlRef + "'."), - systemId, - lineNumber, - -1) - ); - } else { - messages.add(new ValidationProblem( - new ProblemDefinition( - ExceptionType.INFO, - ProblemType.CHECKSUM_MATCHES, - "Generated checksum '" + generatedChecksum - + "' matches the supplied checksum '" + suppliedChecksum - + "' in the manifest for '" + urlRef - + "'."), - systemId, - lineNumber, - -1) - ); - } - } else { - messages.add(new ValidationProblem( - new ProblemDefinition( - ExceptionType.ERROR, - ProblemType.MISSING_CHECKSUM, - "No checksum found in the manifest for '" - + urlRef + "'."), - systemId, - lineNumber, - -1) - ); - } - } - if (checksumInLabel != null) { - if (!checksumInLabel.isEmpty()) { - if (!generatedChecksum.equals(checksumInLabel)) { - messages.add(new ValidationProblem( - new ProblemDefinition(ExceptionType.ERROR, - ProblemType.CHECKSUM_MISMATCH, - "Generated checksum '" + generatedChecksum - + "' does not match supplied checksum '" - + checksumInLabel + "' in the product label for '" - + urlRef + "'."), - systemId, - lineNumber, - -1) - ); - } else { - messages.add(new ValidationProblem( - new ProblemDefinition(ExceptionType.INFO, - ProblemType.CHECKSUM_MATCHES, - "Generated checksum '" + generatedChecksum - + "' matches the supplied checksum '" + checksumInLabel - + "' in the produt label for '" - + urlRef + "'."), - systemId, - lineNumber, - -1) - ); - } - } else { - messages.add(new ValidationProblem( - new ProblemDefinition(ExceptionType.INFO, - ProblemType.MISSING_CHECKSUM_INFO, - "No checksum to compare against in the product label " - + "for '" + urlRef + "'."), - systemId, - lineNumber, - -1) - ); - } - } - return messages; - } - - public void setChecksumManifest(Map checksumManifest) { - this.checksumManifest = checksumManifest; - } -}