From 41691bdce22941ec518801c6e7f6ed2d14271571 Mon Sep 17 00:00:00 2001 From: "martin.geldmacher" Date: Thu, 18 Aug 2016 19:50:39 +0200 Subject: [PATCH 1/2] Add Apache commons-lang3 to dependencies and change build to use maven-shade plugin Apache commons-lang3 was added to the project dependencies. It contains many utilities that are useful and avoid having to rewrite them. Among them is FastDateFormat, which is a thread-safe and fast alternative to SimpleDateFormat. This dependency now has to be delivered together with GCViewer. The maven-shade plugin is now used to build an executable "fat-jar" that contains this and all other future dependencies. The maven-jar plugin has been removed, as it is no longer required. --- pom.xml | 73 +++++++++++++++++++++++++-------------------------------- 1 file changed, 32 insertions(+), 41 deletions(-) diff --git a/pom.xml b/pom.xml index 07241b19..c309ec47 100644 --- a/pom.xml +++ b/pom.xml @@ -172,7 +172,7 @@ 1.8 3.3 1.3 - 2.6 + 2.4.3 2.18.1 2.5.2 1.9.4 @@ -187,6 +187,12 @@ + + org.apache.commons + commons-lang3 + 3.4 + compile + junit junit @@ -271,46 +277,6 @@ true - - org.apache.maven.plugins - maven-jar-plugin - ${maven.jar.plugin.version} - - - - true - com.tagtraum.perf.gcviewer.GCViewer - true - - - ${timestamp} - ${revision} - - - - **/*.psd - - - - - org.apache.maven.plugins - maven-javadoc-plugin - ${maven.javadoc.plugin.version} - - - - true - - - ${timestamp} - ${revision} - - - - http://docs.oracle.com/javase/8/docs/api/ - - - org.apache.maven.plugins maven-source-plugin @@ -536,6 +502,31 @@ + + org.apache.maven.plugins + maven-shade-plugin + ${maven.shade.plugin.version} + + + package + + shade + + + true + true + + ${java.io.tmpdir}/dependency-reduced-pom.xml + + + + com.tagtraum.perf.gcviewer.GCViewer + + + + + + From 40c5a1ab3241a4ed9fc956cd367d516835ba2f59 Mon Sep 17 00:00:00 2001 From: "martin.geldmacher" Date: Thu, 18 Aug 2016 19:52:46 +0200 Subject: [PATCH 2/2] Improve logfile parsing speed by factor 2 for logfiles when AbstractDataReaderSun is used Profiling showed that a large part of the time for parsing a logfile is spent while parsing the contained datestamps. This is noticeable when either big logfiles or a series of logfiles are parsed. See https://github.com/geld0r/DateTimeFormatBenchmarks for a speed comparison of various DateTimeFormatters. FastDateFormat was chosen as a replacement and Apache Commons Lang added as dependency. Hand timed benchmarks show that parsing logfiles of supported types now works roughly twice as fast. --- .../gcviewer/imp/AbstractDataReaderSun.java | 31 ++++++++++++---- .../perf/gcviewer/imp/DataReaderFacade.java | 26 +++++++++++-- .../imp/TestAbstractDataReaderSun.java | 15 +++++++- .../gcviewer/imp/TestDataReaderSun1_6_0.java | 24 ++++++------ .../imp/TestDataReaderSun1_6_0G1.java | 37 ++++++++----------- .../gcviewer/imp/TestDataReaderSun1_7_0.java | 18 ++++----- 6 files changed, 94 insertions(+), 57 deletions(-) diff --git a/src/main/java/com/tagtraum/perf/gcviewer/imp/AbstractDataReaderSun.java b/src/main/java/com/tagtraum/perf/gcviewer/imp/AbstractDataReaderSun.java index f27966ac..95e3f909 100644 --- a/src/main/java/com/tagtraum/perf/gcviewer/imp/AbstractDataReaderSun.java +++ b/src/main/java/com/tagtraum/perf/gcviewer/imp/AbstractDataReaderSun.java @@ -7,14 +7,15 @@ import com.tagtraum.perf.gcviewer.model.GCResource; import com.tagtraum.perf.gcviewer.util.NumberParser; import com.tagtraum.perf.gcviewer.util.ParseInformation; +import org.apache.commons.lang3.time.FastDateFormat; import java.io.IOException; import java.io.InputStream; import java.io.LineNumberReader; import java.io.UnsupportedEncodingException; +import java.time.ZoneId; import java.time.ZonedDateTime; import java.time.format.DateTimeFormatter; -import java.time.format.DateTimeParseException; import java.time.temporal.ChronoUnit; import java.util.LinkedList; import java.util.List; @@ -33,7 +34,8 @@ */ public abstract class AbstractDataReaderSun extends AbstractDataReader { - public static final DateTimeFormatter DATE_TIME_FORMATTER = DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss.SSSZ"); + public static final String TIMESTAMP_PATTERN = "yyyy-MM-dd'T'HH:mm:ss.SSSZ"; + public static final FastDateFormat DATE_TIME_FORMATTER = FastDateFormat.getInstance(TIMESTAMP_PATTERN); private static final int LENGTH_OF_DATESTAMP = 29; private static final String CMS_PRINT_PROMOTION_FAILURE = "promotion failure size"; @@ -58,6 +60,8 @@ public abstract class AbstractDataReaderSun extends AbstractDataReader { /** the log type allowing for small differences between different versions of the gc logs */ protected GcLogType gcLogType; + /** the ZoneId of the GcResource that is being parsed. Lazily initialized. Assumption: constant for a GcResource */ + private ZoneId zoneId; /** * Create an instance of this class passing an inputStream an the type of the logfile. @@ -231,8 +235,8 @@ protected void setMemory(GCEvent event, String line, ParseInformation pos) throw } protected double parsePause(String line, ParseInformation pos) throws ParseException { - // usual pattern expected: "..., 0.002032 secs]" - // but may be as well (G1): "..., 0.003032]" + // usual pattern expected: "..., 0.002032 secs]" + // but may be as well (G1): "..., 0.003032]" // if the next token is "icms_dc" skip until after the comma // ...] icms_dc=0 , 8.0600619 secs] @@ -476,13 +480,13 @@ protected ZonedDateTime parseDatestamp(String line, ParseInformation pos) throws ZonedDateTime zonedDateTime = null; if (nextIsDatestamp(line, pos)) { try { - zonedDateTime = ZonedDateTime.parse(line.substring(pos.getIndex(), pos.getIndex() + LENGTH_OF_DATESTAMP - 1), - DATE_TIME_FORMATTER); + String timestamp = line.substring(pos.getIndex(), pos.getIndex() + LENGTH_OF_DATESTAMP - 1); + zonedDateTime = parseDatestamp(timestamp); pos.setIndex(pos.getIndex() + LENGTH_OF_DATESTAMP); if (pos.getFirstDateStamp() == null) { pos.setFirstDateStamp(zonedDateTime); } - } catch (DateTimeParseException e){ + } catch (java.text.ParseException e){ throw new ParseException(e.toString(), line); } } @@ -490,6 +494,19 @@ protected ZonedDateTime parseDatestamp(String line, ParseInformation pos) throws return zonedDateTime; } + private ZonedDateTime parseDatestamp(String timestamp) throws java.text.ParseException { + ZoneId zone = getOrParseZoneId(timestamp); + // Faster than using ZonedDateTime#parse. Zone must be explicitly set in order to keep original offset value. + return ZonedDateTime.from(DATE_TIME_FORMATTER.parse(timestamp).toInstant().atZone(zone)); + } + + private ZoneId getOrParseZoneId(String timestamp) { + if (zoneId == null) { + zoneId = ZonedDateTime.parse(timestamp, DateTimeFormatter.ofPattern(TIMESTAMP_PATTERN)).getZone(); + } + return zoneId; + } + /** * Returns true if text at parsePosition is a datestamp. * diff --git a/src/main/java/com/tagtraum/perf/gcviewer/imp/DataReaderFacade.java b/src/main/java/com/tagtraum/perf/gcviewer/imp/DataReaderFacade.java index 5061643c..8d030816 100644 --- a/src/main/java/com/tagtraum/perf/gcviewer/imp/DataReaderFacade.java +++ b/src/main/java/com/tagtraum/perf/gcviewer/imp/DataReaderFacade.java @@ -8,6 +8,8 @@ import com.tagtraum.perf.gcviewer.util.BuildInfoReader; import com.tagtraum.perf.gcviewer.util.HttpUrlConnectionHelper; import com.tagtraum.perf.gcviewer.util.LocalisationHelper; +import org.apache.commons.lang3.time.DurationFormatUtils; +import org.apache.commons.lang3.time.StopWatch; import java.beans.PropertyChangeListener; import java.io.File; @@ -19,6 +21,7 @@ import java.util.ArrayList; import java.util.List; import java.util.concurrent.atomic.AtomicLong; +import java.util.logging.Level; import java.util.logging.Logger; /** @@ -83,13 +86,20 @@ public GCModel loadModel(GCResource gcResource) throws DataReaderException { /** * Loads the {@link GCResource}s as a rotated series of logfiles. Takes care of ordering them * - * @param gcResource the {@link GcResourceSeries} to load + * @param series the {@link GcResourceSeries} to load * @return a {@link GCModel} containing all events found in the given {@link GCResource}s that were readable * @throws DataReaderException */ - protected GCModel loadModelFromSeries(GcResourceSeries gcResource) throws DataReaderException { + protected GCModel loadModelFromSeries(GcResourceSeries series) throws DataReaderException { + StopWatch stopWatch = new StopWatch(); + stopWatch.start(); + GcSeriesLoader seriesLoader = new GcSeriesLoader(this); - return seriesLoader.load(gcResource); + GCModel model = seriesLoader.load(series); + + stopWatch.stop(); + series.getLogger().log(Level.INFO, "Parsing logfile series containing of " + series.getResourcesInOrder().size() + " files took " + getDurationFormatted(stopWatch)); + return model; } /** @@ -126,11 +136,21 @@ private GCModel readModel(GcResourceFile gcResource) throws IOException { } } + StopWatch stopWatch = new StopWatch(); + stopWatch.start(); + DataReader reader = factory.getDataReader(gcResource, in); GCModel model = reader.read(); model.setURL(url); + stopWatch.stop(); + gcResource.getLogger().log(Level.INFO, "Parsing logfile " + gcResource.getResourceName() + " took " + getDurationFormatted(stopWatch)); + return model; } + private String getDurationFormatted(StopWatch stopWatch) { + return DurationFormatUtils.formatDuration(stopWatch.getTime(), "s,SS") + "s"; + } + } diff --git a/src/test/java/com/tagtraum/perf/gcviewer/imp/TestAbstractDataReaderSun.java b/src/test/java/com/tagtraum/perf/gcviewer/imp/TestAbstractDataReaderSun.java index 44467f37..726a3379 100644 --- a/src/test/java/com/tagtraum/perf/gcviewer/imp/TestAbstractDataReaderSun.java +++ b/src/test/java/com/tagtraum/perf/gcviewer/imp/TestAbstractDataReaderSun.java @@ -9,8 +9,12 @@ import java.io.IOException; import java.io.InputStream; import java.io.UnsupportedEncodingException; +import java.time.ZoneOffset; +import java.time.ZonedDateTime; +import static org.hamcrest.core.Is.is; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertThat; public class TestAbstractDataReaderSun { @@ -90,7 +94,16 @@ public void setExtendedMemoryFloatingPointPreEden_postEden() throws ParseExcepti assertEquals("heap before", 121344, event.getPreUsed()); assertEquals("heap after", 128, event.getPostUsed()); } - + + @Test + public void testParseDatestamp() throws Exception { + String line = + "2016-04-14T22:37:55.315+0200: 467.260: [GC (Allocation Failure) 467.260: [ParNew: 226563K->6586K(245760K), 0.0044323 secs] 385679K->165875K(791936K), 0.0045438 secs] [Times: user=0.00 sys=0.00, real=0.00 secs] "; + ParseInformation pos = new ParseInformation(0); + ZonedDateTime time = dataReader.parseDatestamp(line, pos); + assertThat(time, is(ZonedDateTime.of(2016, 4, 14, 22, 37, 55, 315000000, ZoneOffset.ofHours(2)))); + } + /** * Subclass of {@link AbstractDataReaderSun} which makes those methods public, I want to test here. * diff --git a/src/test/java/com/tagtraum/perf/gcviewer/imp/TestDataReaderSun1_6_0.java b/src/test/java/com/tagtraum/perf/gcviewer/imp/TestDataReaderSun1_6_0.java index 95dadfea..9460a09a 100644 --- a/src/test/java/com/tagtraum/perf/gcviewer/imp/TestDataReaderSun1_6_0.java +++ b/src/test/java/com/tagtraum/perf/gcviewer/imp/TestDataReaderSun1_6_0.java @@ -1,17 +1,17 @@ package com.tagtraum.perf.gcviewer.imp; import com.tagtraum.perf.gcviewer.UnittestHelper; -import com.tagtraum.perf.gcviewer.model.GcResourceFile; import com.tagtraum.perf.gcviewer.model.GCEvent; import com.tagtraum.perf.gcviewer.model.GCModel; import com.tagtraum.perf.gcviewer.model.GCResource; +import com.tagtraum.perf.gcviewer.model.GcResourceFile; import org.junit.Test; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; +import java.time.ZoneOffset; import java.time.ZonedDateTime; -import java.time.format.DateTimeFormatter; import java.util.logging.Level; import static org.hamcrest.Matchers.equalTo; @@ -20,7 +20,6 @@ public class TestDataReaderSun1_6_0 { - private final DateTimeFormatter dateTimeFormatter = AbstractDataReaderSun.DATE_TIME_FORMATTER; private InputStream getInputStream(String fileName) throws IOException { return UnittestHelper.getResourceAsStream(UnittestHelper.FOLDER_OPENJDK, fileName); @@ -28,19 +27,18 @@ private InputStream getInputStream(String fileName) throws IOException { @Test public void testPrintGCDateStamps() throws Exception { - ByteArrayInputStream in = new ByteArrayInputStream( - ("2011-10-05T04:23:39.427+0200: 19.845: [GC 19.845: [ParNew: 93184K->5483K(104832K), 0.0384413 secs] 93184K->5483K(1036928K), 0.0388082 secs] [Times: user=0.41 sys=0.06, real=0.04 secs]") - .getBytes()); - + ByteArrayInputStream in = + new ByteArrayInputStream(("2011-10-05T04:23:39.427+0200: 19.845: [GC 19.845: [ParNew: 93184K->5483K(104832K), 0.0384413 secs] 93184K->5483K(1036928K), 0.0388082 secs] [Times: user=0.41 sys=0.06, real=0.04 secs]").getBytes()); + DataReader reader = new DataReaderSun1_6_0(new GcResourceFile("byteArray"), in, GcLogType.SUN1_6); - GCModel model = reader.read(); + GCModel model = reader.read(); - assertTrue("hasDateStamp", model.hasDateStamp()); - assertEquals("DateStamp", - ZonedDateTime.parse("2011-10-05T04:23:39.427+0200", dateTimeFormatter), - model.getFirstDateStamp()); + assertTrue("hasDateStamp", model.hasDateStamp()); + ZonedDateTime expected = + ZonedDateTime.from(AbstractDataReaderSun.DATE_TIME_FORMATTER.parse("2011-10-05T04:23:39.427+0200").toInstant().atZone(ZoneOffset.ofHours(2))); + assertEquals("DateStamp", expected, model.getFirstDateStamp()); assertEquals("gc pause", 0.0388082, model.getGCPause().getMax(), 0.000001); - } + } @Test public void testCMSPromotionFailed() throws Exception { diff --git a/src/test/java/com/tagtraum/perf/gcviewer/imp/TestDataReaderSun1_6_0G1.java b/src/test/java/com/tagtraum/perf/gcviewer/imp/TestDataReaderSun1_6_0G1.java index eecc1132..0757a754 100644 --- a/src/test/java/com/tagtraum/perf/gcviewer/imp/TestDataReaderSun1_6_0G1.java +++ b/src/test/java/com/tagtraum/perf/gcviewer/imp/TestDataReaderSun1_6_0G1.java @@ -2,16 +2,16 @@ import com.tagtraum.perf.gcviewer.UnittestHelper; import com.tagtraum.perf.gcviewer.model.AbstractGCEvent.Type; -import com.tagtraum.perf.gcviewer.model.GcResourceFile; import com.tagtraum.perf.gcviewer.model.GCModel; import com.tagtraum.perf.gcviewer.model.GCResource; +import com.tagtraum.perf.gcviewer.model.GcResourceFile; import org.junit.Test; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; +import java.time.ZoneOffset; import java.time.ZonedDateTime; -import java.time.format.DateTimeFormatter; import java.util.logging.Level; import static org.hamcrest.Matchers.*; @@ -20,8 +20,6 @@ public class TestDataReaderSun1_6_0G1 { - private final DateTimeFormatter dateTimeFormatter = AbstractDataReaderSun.DATE_TIME_FORMATTER; - private InputStream getInputStream(String fileName) throws IOException { return UnittestHelper.getResourceAsStream(UnittestHelper.FOLDER_OPENJDK, fileName); } @@ -31,11 +29,11 @@ private InputStream getInputStream(String fileName) throws IOException { */ @Test public void testG1GcVerbose() throws Exception { - TestLogHandler handler = new TestLogHandler(); + TestLogHandler handler = new TestLogHandler(); handler.setLevel(Level.WARNING); GCResource gcResource = new GcResourceFile("SampleSun1_6_0G1_gc_verbose.txt"); gcResource.getLogger().addHandler(handler); - + InputStream in = getInputStream(gcResource.getResourceName()); DataReader reader = new DataReaderSun1_6_0G1(gcResource, in, GcLogType.SUN1_6G1); GCModel model = reader.read(); @@ -71,30 +69,25 @@ public void testSimpleEventDateStamp() throws Exception { assertEquals("count", 1, model.size()); assertEquals("gc pause", 0.0032067, model.getGCPause().getMax(), 0.000001); - assertEquals("datestamp", - ZonedDateTime.parse("2012-02-29T13:41:00.721+0100", dateTimeFormatter), - model.getGCEvents().next().getDatestamp()); + ZonedDateTime expected = ZonedDateTime.from(AbstractDataReaderSun.DATE_TIME_FORMATTER.parse("2012-02-29T13:41:00.721+0100").toInstant().atZone(ZoneOffset.ofHours(1))); + assertEquals("datestamp", expected, model.getGCEvents().next().getDatestamp()); } - + @Test public void testG1FullGcSystemGc() throws Exception { - InputStream in = new ByteArrayInputStream( - ("9.978: [Full GC (System.gc()) 597M->1142K(7168K), 0.1604955 secs]") - .getBytes()); - - DataReader reader = new DataReaderSun1_6_0G1(new GcResourceFile("byteArray"), in, GcLogType.SUN1_6G1); - GCModel model = reader.read(); + InputStream in = new ByteArrayInputStream(("9.978: [Full GC (System.gc()) 597M->1142K(7168K), 0.1604955 secs]").getBytes()); - assertEquals("count", 1, model.size()); - assertEquals("full gc pause", 0.1604955, model.getFullGCPause().getMax(), 0.000001); + DataReader reader = new DataReaderSun1_6_0G1(new GcResourceFile("byteArray"), in, GcLogType.SUN1_6G1); + GCModel model = reader.read(); + assertEquals("count", 1, model.size()); + assertEquals("full gc pause", 0.1604955, model.getFullGCPause().getMax(), 0.000001); } @Test public void testG1MixedLine1() throws Exception { InputStream in = new ByteArrayInputStream( - ("0.388: [GC pause (young) (initial-mark) 10080K->10080K(16M)0.390: [GC concurrent-mark-start]" + - "\n, 0.0013065 secs]") + ("0.388: [GC pause (young) (initial-mark) 10080K->10080K(16M)0.390: [GC concurrent-mark-start]" + "\n, 0.0013065 secs]") .getBytes()); DataReader reader = new DataReaderSun1_6_0G1(new GcResourceFile("byteArray"), in, GcLogType.SUN1_6G1); @@ -183,8 +176,8 @@ public void testDetailedCollectionMixed3() throws Exception { @Test public void testFullGcMixed() throws Exception { InputStream in = new ByteArrayInputStream(("107.222: [Full GC107.248: [GC concurrent-mark-end, 0.0437048 sec]" + - "\n 254M->59M(199M), 0.0687356 secs]" + - "\n [Times: user=0.06 sys=0.02, real=0.07 secs]").getBytes()); + "\n 254M->59M(199M), 0.0687356 secs]" + + "\n [Times: user=0.06 sys=0.02, real=0.07 secs]").getBytes()); DataReader reader = new DataReaderSun1_6_0G1(new GcResourceFile("byteArray"), in, GcLogType.SUN1_6G1); GCModel model = reader.read(); diff --git a/src/test/java/com/tagtraum/perf/gcviewer/imp/TestDataReaderSun1_7_0.java b/src/test/java/com/tagtraum/perf/gcviewer/imp/TestDataReaderSun1_7_0.java index 732a8454..5a8cd732 100644 --- a/src/test/java/com/tagtraum/perf/gcviewer/imp/TestDataReaderSun1_7_0.java +++ b/src/test/java/com/tagtraum/perf/gcviewer/imp/TestDataReaderSun1_7_0.java @@ -1,10 +1,8 @@ package com.tagtraum.perf.gcviewer.imp; -import static org.hamcrest.Matchers.closeTo; -import static org.hamcrest.Matchers.equalTo; -import static org.hamcrest.Matchers.is; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertThat; +import com.tagtraum.perf.gcviewer.UnittestHelper; +import com.tagtraum.perf.gcviewer.model.*; +import org.junit.Test; import java.io.ByteArrayInputStream; import java.io.IOException; @@ -13,9 +11,9 @@ import java.util.Iterator; import java.util.logging.Level; -import com.tagtraum.perf.gcviewer.model.*; -import org.junit.Test; -import com.tagtraum.perf.gcviewer.UnittestHelper; +import static org.hamcrest.Matchers.*; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertThat; /** * Tests for logs generated specifically by jdk 1.7.0. @@ -25,8 +23,6 @@ */ public class TestDataReaderSun1_7_0 { - private final DateTimeFormatter dateTimeFormatter = AbstractDataReaderSun.DATE_TIME_FORMATTER; - private InputStream getInputStream(String fileName) throws IOException { return UnittestHelper.getResourceAsStream(UnittestHelper.FOLDER_OPENJDK, fileName); } @@ -386,7 +382,7 @@ public void cmsPrintGCApplicationStoppedTimeWithoutTimestampsTenuringDist() thro model.get(1).getTimestamp(), closeTo(33395.153 + 0.1120380, 0.0000001)); assertThat("Application Stopped datestamp", - dateTimeFormatter.format(model.get(1).getDatestamp()), + model.get(1).getDatestamp().format(DateTimeFormatter.ofPattern(AbstractDataReaderSun.TIMESTAMP_PATTERN)), equalTo("2012-04-26T23:59:51.011+0400")); assertThat("first timestamp", model.getFirstPauseTimeStamp(), closeTo(33395.153, 0.00001)); }