diff --git a/.settings/org.eclipse.core.resources.prefs b/.settings/org.eclipse.core.resources.prefs
index 6e2c23d..5d60b85 100644
--- a/.settings/org.eclipse.core.resources.prefs
+++ b/.settings/org.eclipse.core.resources.prefs
@@ -5,8 +5,10 @@ encoding//stamp/GUI/plotDlgUI.py=utf-8
encoding//stamp/mainUI.py=utf-8
encoding//stamp/plugins/groups/plots/configGUI/BarPlotUI.py=utf-8
encoding//stamp/plugins/groups/plots/configGUI/BoxPlotUI.py=utf-8
+encoding//stamp/plugins/groups/plots/configGUI/HeatmapPlotUI.py=utf-8
encoding//stamp/plugins/groups/plots/configGUI/extendedErrorBarUI.py=utf-8
encoding//stamp/plugins/multiGroups/plots/configGUI/BarPlotUI.py=utf-8
encoding//stamp/plugins/multiGroups/plots/configGUI/BoxPlotUI.py=utf-8
+encoding//stamp/plugins/multiGroups/plots/configGUI/HeatmapPlotUI.py=utf-8
encoding//stamp/plugins/multiGroups/plots/configGUI/pcaPlotUI.py=utf-8
encoding//stamp/plugins/samples/plots/configGUI/barUI.py=utf-8
diff --git a/manual/STAMP_Users_Guide.docx b/manual/STAMP_Users_Guide.docx
index 94d0575..34bf440 100644
Binary files a/manual/STAMP_Users_Guide.docx and b/manual/STAMP_Users_Guide.docx differ
diff --git a/manual/STAMP_Users_Guide.pdf b/manual/STAMP_Users_Guide.pdf
index 9585bd1..ac1a682 100644
Binary files a/manual/STAMP_Users_Guide.pdf and b/manual/STAMP_Users_Guide.pdf differ
diff --git a/scripts/checkHierarchy.py b/scripts/checkHierarchy.py
new file mode 100644
index 0000000..bfed809
--- /dev/null
+++ b/scripts/checkHierarchy.py
@@ -0,0 +1,170 @@
+#!/usr/bin/env python
+
+###############################################################################
+# #
+# This program is free software: you can redistribute it and/or modify #
+# it under the terms of the GNU General Public License as published by #
+# the Free Software Foundation, either version 3 of the License, or #
+# (at your option) any later version. #
+# #
+# This program is distributed in the hope that it will be useful, #
+# but WITHOUT ANY WARRANTY; without even the implied warranty of #
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
+# GNU General Public License for more details. #
+# #
+# You should have received a copy of the GNU General Public License #
+# along with this program. If not, see . #
+# #
+###############################################################################
+
+__prog_name__ = 'checkHierarchy'
+__prog_desc__ = ''
+
+__author__ = 'Donovan Parks'
+__copyright__ = 'Copyright 2014'
+__credits__ = ['Donovan Parks']
+__license__ = 'GPL3'
+__version__ = '0.0.1'
+__maintainer__ = 'Donovan Parks'
+__email__ = 'donovan.parks@gmail.com'
+__status__ = 'Development'
+
+import os
+import sys
+import argparse
+from collections import defaultdict
+
+def isNumber(s):
+ """Check is a string is a number."""
+ try:
+ float(s)
+ return True
+ except ValueError:
+ return False
+
+class CheckHierarchy(object):
+ def __init__(self):
+ pass
+
+ def isUnclassified(self, value):
+ """Check if value (taxon, metabolic pathway) is unclassified."""
+
+ # currently unclassified sequences need to be explicitly stated as
+ # 'unclassified' (case insensitive) or '*__unclassified' which is
+ # the format used by GreenGenes
+ return value.lower() == 'unclassified' or value.lower()[1:] == '__unclassified'
+
+ def determineHierarchicalColumns(self, headerValues, firstDataValues):
+ """Determine columns corresponding to user-defined hierarchy."""
+
+ # first column entry that is numeric is assumed to be from first sample
+ firstSampleIndex = 0
+ for entry in firstDataValues:
+ if isNumber(entry):
+ break
+ firstSampleIndex += 1
+
+ # sanity check profile
+ numSamples = len(headerValues) - firstSampleIndex
+ if numSamples < 2:
+ print '[Error] Profile must contain at least two samples. Identified %d samples' % numSamples
+ sys.exit()
+
+ if firstSampleIndex == 0:
+ print '[Error] Profile file must contain at least one column indicating feature names.'
+ sys.exit()
+
+ print 'Identified %d samples.' % numSamples
+ print 'Identified %d hierarchical columns.' % firstSampleIndex
+
+ # get name of hierarchical columns
+ columnNames = headerValues[0:firstSampleIndex]
+
+ return columnNames
+
+ def run(self, stampProfile):
+ """Verify that data forms a strict hierarchy."""
+ parent = defaultdict(dict)
+
+ # identify entries breaking hierarchy
+ entriesWithUnclassifiedParents = []
+ entriesBreakingHierarchy = []
+ with open(stampProfile, 'U') as f:
+ header = f.readline()
+ headerValues = map(str.strip, header.split('\t'))
+
+ columnNames = None
+ for i, line in enumerate(f):
+ rowNumber = i+2 # +1 for header row, +1 for zero indexing
+ lineSplit = line.split('\t')
+ dataValues = map(str.strip, lineSplit)
+
+ if len(headerValues) != len(dataValues):
+ print '[Error] Line %d does not contain as many entries as the header line.' % rowNumber
+ sys.exit()
+
+ if not columnNames:
+ columnNames = self.determineHierarchicalColumns(headerValues, dataValues)
+
+ categories = dataValues[0:len(columnNames)]
+ for r, value in enumerate(categories):
+ # top of hierarchy has no parent
+ if r == 0:
+ continue
+
+ # ignore unclassified sequences
+ if self.isUnclassified(value):
+ continue
+
+ # make sure parent is not unclassified
+ parentValue = categories[r-1]
+ if self.isUnclassified(parentValue):
+ entriesWithUnclassifiedParents.append([rowNumber, r, value])
+ continue
+
+ if r not in parent:
+ parent[r] = {}
+
+ if value not in parent[r]:
+ parent[r][value] = parentValue
+ else:
+ if parent[r][value] != parentValue:
+ entriesBreakingHierarchy.append([rowNumber, r, value, parent[r][value], parentValue])
+
+ # report entries breaking hierarchy
+ if len(entriesWithUnclassifiedParents) > 0:
+ print ''
+ print 'The following entries have an unclassified parent:'
+ for entry in entriesWithUnclassifiedParents:
+ rowNumber, r, value = entry
+ print '%s\t%s\t%s' % (rowNumber, columnNames[r], value)
+
+
+ if len(entriesBreakingHierarchy) > 0:
+ print ''
+ print 'The following entries have two (and potentially more) parents:'
+ for entry in entriesBreakingHierarchy:
+ rowNumber, r, value, parent1, parent2 = entry
+ print '%s\t%s\t%s\t%s' % (rowNumber, columnNames[r], value, ','.join([parent1, parent2]))
+
+ if len(entriesWithUnclassifiedParents) == 0 and len(entriesBreakingHierarchy) == 0:
+ print ''
+ print 'Profile forms a strict hierarchy. You are good to go!'
+
+if __name__ == '__main__':
+ print __prog_name__ + ' v' + __version__ + ': ' + __prog_desc__
+ print ' by ' + __author__ + ' (' + __email__ + ')' + '\n'
+
+ parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+ parser.add_argument('stamp_profile', help='STAMP profile to evaluate')
+
+ args = parser.parse_args()
+
+ try:
+ checkHierarchy = CheckHierarchy()
+ checkHierarchy.run(args.stamp_profile)
+ except SystemExit:
+ print "\nControlled exit resulting from an unrecoverable error or warning."
+ except:
+ print "\nUnexpected error:", sys.exc_info()[0]
+ raise
diff --git a/setup.py b/setup.py
index c4ef6d7..3fc3b52 100644
--- a/setup.py
+++ b/setup.py
@@ -37,7 +37,7 @@
('.', ['README.md'])]
setup(
name='STAMP',
- version='2.0.7',
+ version='2.0.8',
author='Donovan Parks, Rob Beiko',
author_email='donovan.parks@gmail.com',
packages=['stamp', 'stamp.GUI'] + pluginPkgs + metagenomicPkgs,
diff --git a/stamp/STAMP.py b/stamp/STAMP.py
index 6ce32de..35b983d 100644
--- a/stamp/STAMP.py
+++ b/stamp/STAMP.py
@@ -23,8 +23,8 @@
__copyright__ = 'Copyright 2013'
__credits__ = ['Donovan Parks']
__license__ = 'GPL3'
-__version__ = '2.0.7'
-__date__ = 'July 26, 2014'
+__version__ = '2.0.8'
+__date__ = 'August 4, 2014'
__maintainer__ = 'Donovan Parks'
__email__ = 'donovan.parks@gmail.com'
__status__ = 'Development'
diff --git a/stamp/metagenomics/fileIO/StampIO.py b/stamp/metagenomics/fileIO/StampIO.py
index 09d9584..63d99e7 100644
--- a/stamp/metagenomics/fileIO/StampIO.py
+++ b/stamp/metagenomics/fileIO/StampIO.py
@@ -135,20 +135,28 @@ def checkHierarchy(self, data, numHierarchicalLevels):
categories = lineSplit[0:numHierarchicalLevels]
for r, value in enumerate(categories):
+ # top of hierarchy has no parent
if r == 0:
- continue # top of hierarchy has no parent
+ continue
+ # ignore unclassified sequences
if self.isUnclassified(value):
- continue # ignore unclassified sequences
+ continue
+
+ # make sure parent is not unclassified
+ parentValue = categories[r-1]
+ if self.isUnclassified(parentValue):
+ return "Child %s has an unclassified parent." % value
+ continue
if r not in parent:
parent[r] = {}
if value not in parent[r]:
- parent[r][value] = categories[r-1]
+ parent[r][value] = parentValue
else:
- if parent[r][value] != categories[r-1]:
+ if parent[r][value] != parentValue:
# data is not a strict hierarchy
- return "Data does not form a strict hierarchy. Child %s has multiple parents (e.g., %s, %s)." % (value, parent[r][value], categories[r-1])
+ return "Data does not form a strict hierarchy. Child %s has multiple parents (e.g., %s, %s)." % (value, parent[r][value], parentValue)
return None
-
+
\ No newline at end of file
diff --git a/stamp/plugins/groups/plots/HeatmapPlot.py b/stamp/plugins/groups/plots/HeatmapPlot.py
index 65eb3f4..cee5518 100644
--- a/stamp/plugins/groups/plots/HeatmapPlot.py
+++ b/stamp/plugins/groups/plots/HeatmapPlot.py
@@ -124,9 +124,9 @@ def plot(self, profile, statsResults):
if len(featuresToPlot) <= 1 or (len(profile.samplesInGroup1) + len(profile.samplesInGroup2)) <= 1:
self.emptyAxis()
return
- elif len(featuresToPlot) > 100 or len(profile.samplesInGroup1) + len(profile.samplesInGroup2) > 100:
+ elif len(featuresToPlot) > 1000 or len(profile.samplesInGroup1) + len(profile.samplesInGroup2) > 1000:
QtGui.QApplication.instance().setOverrideCursor(QtGui.QCursor(QtCore.Qt.ArrowCursor))
- QtGui.QMessageBox.information(self, 'Too much data!', 'Heatmap plots are limited to 100 samples and 100 features.', QtGui.QMessageBox.Ok)
+ QtGui.QMessageBox.information(self, 'Too much data!', 'Heatmap plots are limited to 1000 samples and 1000 features.', QtGui.QMessageBox.Ok)
QtGui.QApplication.instance().restoreOverrideCursor()
self.emptyAxis()
return
diff --git a/stamp/plugins/groups/plots/configGUI/HeatmapPlot.ui b/stamp/plugins/groups/plots/configGUI/HeatmapPlot.ui
index 4b557c9..662c8f7 100644
--- a/stamp/plugins/groups/plots/configGUI/HeatmapPlot.ui
+++ b/stamp/plugins/groups/plots/configGUI/HeatmapPlot.ui
@@ -17,7 +17,7 @@
- PCA plot
+ Heatmap plot
@@ -95,7 +95,7 @@
0.500000000000000
- 30.000000000000000
+ 100.000000000000000
0.100000000000000
@@ -130,7 +130,7 @@
0.500000000000000
- 30.000000000000000
+ 100.000000000000000
0.100000000000000
diff --git a/stamp/plugins/groups/plots/configGUI/HeatmapPlotUI.py b/stamp/plugins/groups/plots/configGUI/HeatmapPlotUI.py
index 9b0bbd0..885b564 100644
--- a/stamp/plugins/groups/plots/configGUI/HeatmapPlotUI.py
+++ b/stamp/plugins/groups/plots/configGUI/HeatmapPlotUI.py
@@ -2,7 +2,7 @@
# Form implementation generated from reading ui file 'HeatmapPlot.ui'
#
-# Created: Sat Jul 26 11:13:18 2014
+# Created: Mon Aug 04 15:26:28 2014
# by: PyQt4 UI code generator 4.9.6
#
# WARNING! All changes made in this file will be lost!
@@ -73,7 +73,7 @@ def setupUi(self, HeatmapPlotDialog):
self.spinFigWidth.setSizePolicy(sizePolicy)
self.spinFigWidth.setDecimals(2)
self.spinFigWidth.setMinimum(0.5)
- self.spinFigWidth.setMaximum(30.0)
+ self.spinFigWidth.setMaximum(100.0)
self.spinFigWidth.setSingleStep(0.1)
self.spinFigWidth.setProperty("value", 7.0)
self.spinFigWidth.setObjectName(_fromUtf8("spinFigWidth"))
@@ -90,7 +90,7 @@ def setupUi(self, HeatmapPlotDialog):
self.spinFigHeight.setSizePolicy(sizePolicy)
self.spinFigHeight.setDecimals(2)
self.spinFigHeight.setMinimum(0.5)
- self.spinFigHeight.setMaximum(30.0)
+ self.spinFigHeight.setMaximum(100.0)
self.spinFigHeight.setSingleStep(0.1)
self.spinFigHeight.setProperty("value", 7.0)
self.spinFigHeight.setObjectName(_fromUtf8("spinFigHeight"))
@@ -268,7 +268,7 @@ def setupUi(self, HeatmapPlotDialog):
QtCore.QMetaObject.connectSlotsByName(HeatmapPlotDialog)
def retranslateUi(self, HeatmapPlotDialog):
- HeatmapPlotDialog.setWindowTitle(_translate("HeatmapPlotDialog", "PCA plot", None))
+ HeatmapPlotDialog.setWindowTitle(_translate("HeatmapPlotDialog", "Heatmap plot", None))
self.label.setText(_translate("HeatmapPlotDialog", "Field to plot:", None))
self.cboFieldToPlot.setItemText(0, _translate("HeatmapPlotDialog", "Number of sequences", None))
self.cboFieldToPlot.setItemText(1, _translate("HeatmapPlotDialog", "Proportion of sequences (%)", None))
diff --git a/stamp/plugins/multiGroups/plots/HeatmapPlot.py b/stamp/plugins/multiGroups/plots/HeatmapPlot.py
index de805ff..173709f 100644
--- a/stamp/plugins/multiGroups/plots/HeatmapPlot.py
+++ b/stamp/plugins/multiGroups/plots/HeatmapPlot.py
@@ -126,9 +126,9 @@ def plot(self, profile, statsResults):
if len(featuresToPlot) <= 1 or len(profile.activeGroupNames) <= 1:
self.emptyAxis()
return
- elif len(featuresToPlot) > 100 or len(profile.activeSamplesInGroups) > 100:
+ elif len(featuresToPlot) > 1000 or len(profile.activeSamplesInGroups) > 1000:
QtGui.QApplication.instance().setOverrideCursor(QtGui.QCursor(QtCore.Qt.ArrowCursor))
- QtGui.QMessageBox.information(self, 'Too much data!', 'Heatmap plots are limited to 100 samples and 100 features.', QtGui.QMessageBox.Ok)
+ QtGui.QMessageBox.information(self, 'Too much data!', 'Heatmap plots are limited to 1000 samples and 1000 features.', QtGui.QMessageBox.Ok)
QtGui.QApplication.instance().restoreOverrideCursor()
self.emptyAxis()
return
diff --git a/stamp/plugins/multiGroups/plots/configGUI/HeatmapPlot.ui b/stamp/plugins/multiGroups/plots/configGUI/HeatmapPlot.ui
index 4b557c9..662c8f7 100644
--- a/stamp/plugins/multiGroups/plots/configGUI/HeatmapPlot.ui
+++ b/stamp/plugins/multiGroups/plots/configGUI/HeatmapPlot.ui
@@ -17,7 +17,7 @@
- PCA plot
+ Heatmap plot
@@ -95,7 +95,7 @@
0.500000000000000
- 30.000000000000000
+ 100.000000000000000
0.100000000000000
@@ -130,7 +130,7 @@
0.500000000000000
- 30.000000000000000
+ 100.000000000000000
0.100000000000000
diff --git a/stamp/plugins/multiGroups/plots/configGUI/HeatmapPlotUI.py b/stamp/plugins/multiGroups/plots/configGUI/HeatmapPlotUI.py
index f3ada6b..1e0e52b 100644
--- a/stamp/plugins/multiGroups/plots/configGUI/HeatmapPlotUI.py
+++ b/stamp/plugins/multiGroups/plots/configGUI/HeatmapPlotUI.py
@@ -2,7 +2,7 @@
# Form implementation generated from reading ui file 'HeatmapPlot.ui'
#
-# Created: Sat Jul 26 11:35:52 2014
+# Created: Mon Aug 04 15:27:32 2014
# by: PyQt4 UI code generator 4.9.6
#
# WARNING! All changes made in this file will be lost!
@@ -73,7 +73,7 @@ def setupUi(self, HeatmapPlotDialog):
self.spinFigWidth.setSizePolicy(sizePolicy)
self.spinFigWidth.setDecimals(2)
self.spinFigWidth.setMinimum(0.5)
- self.spinFigWidth.setMaximum(30.0)
+ self.spinFigWidth.setMaximum(100.0)
self.spinFigWidth.setSingleStep(0.1)
self.spinFigWidth.setProperty("value", 7.0)
self.spinFigWidth.setObjectName(_fromUtf8("spinFigWidth"))
@@ -90,7 +90,7 @@ def setupUi(self, HeatmapPlotDialog):
self.spinFigHeight.setSizePolicy(sizePolicy)
self.spinFigHeight.setDecimals(2)
self.spinFigHeight.setMinimum(0.5)
- self.spinFigHeight.setMaximum(30.0)
+ self.spinFigHeight.setMaximum(100.0)
self.spinFigHeight.setSingleStep(0.1)
self.spinFigHeight.setProperty("value", 7.0)
self.spinFigHeight.setObjectName(_fromUtf8("spinFigHeight"))
@@ -268,7 +268,7 @@ def setupUi(self, HeatmapPlotDialog):
QtCore.QMetaObject.connectSlotsByName(HeatmapPlotDialog)
def retranslateUi(self, HeatmapPlotDialog):
- HeatmapPlotDialog.setWindowTitle(_translate("HeatmapPlotDialog", "PCA plot", None))
+ HeatmapPlotDialog.setWindowTitle(_translate("HeatmapPlotDialog", "Heatmap plot", None))
self.label.setText(_translate("HeatmapPlotDialog", "Field to plot:", None))
self.cboFieldToPlot.setItemText(0, _translate("HeatmapPlotDialog", "Number of sequences", None))
self.cboFieldToPlot.setItemText(1, _translate("HeatmapPlotDialog", "Proportion of sequences (%)", None))