Updated heat maps and user's guide.

donovan-h-parks · Aug 4, 2014 · 45c151d · 45c151d
1 parent 17158af
commit 45c151d
Show file tree

Hide file tree

Showing 13 changed files with 207 additions and 27 deletions.
diff --git a/.settings/org.eclipse.core.resources.prefs b/.settings/org.eclipse.core.resources.prefs
@@ -5,8 +5,10 @@ encoding//stamp/GUI/plotDlgUI.py=utf-8
 encoding//stamp/mainUI.py=utf-8
 encoding//stamp/plugins/groups/plots/configGUI/BarPlotUI.py=utf-8
 encoding//stamp/plugins/groups/plots/configGUI/BoxPlotUI.py=utf-8
+encoding//stamp/plugins/groups/plots/configGUI/HeatmapPlotUI.py=utf-8
 encoding//stamp/plugins/groups/plots/configGUI/extendedErrorBarUI.py=utf-8
 encoding//stamp/plugins/multiGroups/plots/configGUI/BarPlotUI.py=utf-8
 encoding//stamp/plugins/multiGroups/plots/configGUI/BoxPlotUI.py=utf-8
+encoding//stamp/plugins/multiGroups/plots/configGUI/HeatmapPlotUI.py=utf-8
 encoding//stamp/plugins/multiGroups/plots/configGUI/pcaPlotUI.py=utf-8
 encoding//stamp/plugins/samples/plots/configGUI/barUI.py=utf-8
diff --git a/manual/STAMP_Users_Guide.docx b/manual/STAMP_Users_Guide.docx
diff --git a/manual/STAMP_Users_Guide.pdf b/manual/STAMP_Users_Guide.pdf
diff --git a/scripts/checkHierarchy.py b/scripts/checkHierarchy.py
@@ -0,0 +1,170 @@
+#!/usr/bin/env python
+
+###############################################################################
+#                                                                             #
+#    This program is free software: you can redistribute it and/or modify     #
+#    it under the terms of the GNU General Public License as published by     #
+#    the Free Software Foundation, either version 3 of the License, or        #
+#    (at your option) any later version.                                      #
+#                                                                             #
+#    This program is distributed in the hope that it will be useful,          #
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of           #
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the            #
+#    GNU General Public License for more details.                             #
+#                                                                             #
+#    You should have received a copy of the GNU General Public License        #
+#    along with this program. If not, see <http://www.gnu.org/licenses/>.     #
+#                                                                             #
+###############################################################################
+
+__prog_name__ = 'checkHierarchy'
+__prog_desc__ = '<program description>'
+
+__author__ = 'Donovan Parks'
+__copyright__ = 'Copyright 2014'
+__credits__ = ['Donovan Parks']
+__license__ = 'GPL3'
+__version__ = '0.0.1'
+__maintainer__ = 'Donovan Parks'
+__email__ = '[email protected]'
+__status__ = 'Development'
+
+import os
+import sys
+import argparse
+from collections import defaultdict
+
+def isNumber(s):
+	"""Check is a string is a number."""
+	try:
+		float(s)
+		return True
+	except ValueError:
+		return False
+
+class CheckHierarchy(object):
+	def __init__(self):
+		pass
+
+	def isUnclassified(self, value):
+		"""Check if value (taxon, metabolic pathway) is unclassified."""
+
+		# currently unclassified sequences need to be explicitly stated as
+		# 'unclassified' (case insensitive) or '*__unclassified' which is
+		# the format used by GreenGenes
+		return value.lower() == 'unclassified' or value.lower()[1:] == '__unclassified'
+
+	def determineHierarchicalColumns(self, headerValues, firstDataValues):
+		"""Determine columns corresponding to user-defined hierarchy.""" 
+
+		# first column entry that is numeric is assumed to be from first sample
+		firstSampleIndex = 0
+		for entry in firstDataValues:
+			if isNumber(entry):
+				break
+			firstSampleIndex += 1
+
+		# sanity check profile
+		numSamples = len(headerValues) - firstSampleIndex
+		if numSamples < 2:
+			print '[Error] Profile must contain at least two samples. Identified %d samples' % numSamples
+			sys.exit()
+
+		if firstSampleIndex == 0:
+			print '[Error] Profile file must contain at least one column indicating feature names.'
+			sys.exit()
+
+		print 'Identified %d samples.' % numSamples
+		print 'Identified %d hierarchical columns.' % firstSampleIndex
+
+		# get name of hierarchical columns
+		columnNames = headerValues[0:firstSampleIndex]
+
+		return columnNames
+
+	def run(self, stampProfile):
+		"""Verify that data forms a strict hierarchy."""
+		parent = defaultdict(dict)
+
+		# identify entries breaking hierarchy
+		entriesWithUnclassifiedParents = []
+		entriesBreakingHierarchy = []
+		with open(stampProfile, 'U') as f:
+			header = f.readline()
+			headerValues = map(str.strip, header.split('\t'))
+
+			columnNames = None
+			for i, line in enumerate(f):
+				rowNumber = i+2 # +1 for header row, +1 for zero indexing
+				lineSplit = line.split('\t')
+				dataValues = map(str.strip, lineSplit)
+
+				if len(headerValues) != len(dataValues):
+					print '[Error] Line %d does not contain as many entries as the header line.' % rowNumber
+					sys.exit()
+
+				if not columnNames:
+					columnNames = self.determineHierarchicalColumns(headerValues, dataValues)
+
+				categories = dataValues[0:len(columnNames)]
+				for r, value in enumerate(categories):
+					# top of hierarchy has no parent
+					if r == 0:
+						continue 
+
+					# ignore unclassified sequences
+					if self.isUnclassified(value):
+						continue 
+
+					# make sure parent is not unclassified
+					parentValue = categories[r-1]
+					if self.isUnclassified(parentValue):
+						entriesWithUnclassifiedParents.append([rowNumber, r, value])
+						continue 
+
+					if r not in parent:
+						parent[r] = {}
+
+					if value not in parent[r]:
+						parent[r][value] = parentValue
+					else:
+						if parent[r][value] != parentValue:
+							entriesBreakingHierarchy.append([rowNumber, r, value, parent[r][value], parentValue])
+
+		# report entries breaking hierarchy
+		if len(entriesWithUnclassifiedParents) > 0:
+			print ''
+			print 'The following entries have an unclassified parent:'
+			for entry in entriesWithUnclassifiedParents:
+				rowNumber, r, value = entry
+				print '%s\t%s\t%s' % (rowNumber, columnNames[r], value)
+
+
+		if len(entriesBreakingHierarchy) > 0:
+			print ''
+			print 'The following entries have two (and potentially more) parents:'
+			for entry in entriesBreakingHierarchy:
+				rowNumber, r, value, parent1, parent2 = entry
+				print '%s\t%s\t%s\t%s' % (rowNumber, columnNames[r], value, ','.join([parent1, parent2]))
+
+		if len(entriesWithUnclassifiedParents) == 0 and len(entriesBreakingHierarchy) == 0:
+			print ''
+			print 'Profile forms a strict hierarchy. You are good to go!'
+
+if __name__ == '__main__':
+	print __prog_name__ + ' v' + __version__ + ': ' + __prog_desc__
+	print '  by ' + __author__ + ' (' + __email__ + ')' + '\n'
+
+	parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+	parser.add_argument('stamp_profile', help='STAMP profile to evaluate')
+
+	args = parser.parse_args()
+
+	try:
+		checkHierarchy = CheckHierarchy()
+		checkHierarchy.run(args.stamp_profile)
+	except SystemExit:
+		print "\nControlled exit resulting from an unrecoverable error or warning."
+	except:
+		print "\nUnexpected error:", sys.exc_info()[0]
+		raise
diff --git a/setup.py b/setup.py
@@ -37,7 +37,7 @@
                   ('.', ['README.md'])]
 setup(
     name='STAMP',
-    version='2.0.7',
+    version='2.0.8',
     author='Donovan Parks, Rob Beiko',
     author_email='[email protected]',
     packages=['stamp', 'stamp.GUI'] + pluginPkgs + metagenomicPkgs,

diff --git a/stamp/STAMP.py b/stamp/STAMP.py
@@ -23,8 +23,8 @@
 __copyright__ = 'Copyright 2013'
 __credits__ = ['Donovan Parks']
 __license__ = 'GPL3'
-__version__ = '2.0.7'
-__date__ = 'July 26, 2014'
+__version__ = '2.0.8'
+__date__ = 'August 4, 2014'
 __maintainer__ = 'Donovan Parks'
 __email__ = '[email protected]'
 __status__ = 'Development'

diff --git a/stamp/metagenomics/fileIO/StampIO.py b/stamp/metagenomics/fileIO/StampIO.py
@@ -135,20 +135,28 @@ def checkHierarchy(self, data, numHierarchicalLevels):
 
 			categories = lineSplit[0:numHierarchicalLevels]
 			for r, value in enumerate(categories):
+				# top of hierarchy has no parent
 				if r == 0:
-					continue # top of hierarchy has no parent
+					continue 
 
+				# ignore unclassified sequences
 				if self.isUnclassified(value):
-					continue # ignore unclassified sequences
+					continue 
+
+				# make sure parent is not unclassified
+				parentValue = categories[r-1]
+				if self.isUnclassified(parentValue):
+					return "Child %s has an unclassified parent." % value
+					continue 
 
 				if r not in parent:
 					parent[r] = {}
 
 				if value not in parent[r]:
-					parent[r][value] = categories[r-1]
+					parent[r][value] = parentValue
 				else:
-					if parent[r][value] != categories[r-1]:
+					if parent[r][value] != parentValue:
 						# data is not a strict hierarchy
-						return "Data does not form a strict hierarchy. Child %s has multiple parents (e.g., %s, %s)." % (value, parent[r][value], categories[r-1])		
+						return "Data does not form a strict hierarchy. Child %s has multiple parents (e.g., %s, %s)." % (value, parent[r][value], parentValue)		
 		return None
-
+
diff --git a/stamp/plugins/groups/plots/HeatmapPlot.py b/stamp/plugins/groups/plots/HeatmapPlot.py
@@ -124,9 +124,9 @@ def plot(self, profile, statsResults):
 		if len(featuresToPlot) <= 1 or (len(profile.samplesInGroup1) + len(profile.samplesInGroup2)) <= 1:
 			self.emptyAxis()
 			return
-		elif len(featuresToPlot) > 100 or len(profile.samplesInGroup1) + len(profile.samplesInGroup2) > 100:
+		elif len(featuresToPlot) > 1000 or len(profile.samplesInGroup1) + len(profile.samplesInGroup2) > 1000:
 			QtGui.QApplication.instance().setOverrideCursor(QtGui.QCursor(QtCore.Qt.ArrowCursor))
-			QtGui.QMessageBox.information(self, 'Too much data!', 'Heatmap plots are limited to 100 samples and 100 features.', QtGui.QMessageBox.Ok)
+			QtGui.QMessageBox.information(self, 'Too much data!', 'Heatmap plots are limited to 1000 samples and 1000 features.', QtGui.QMessageBox.Ok)
 			QtGui.QApplication.instance().restoreOverrideCursor()
 			self.emptyAxis()
 			return

diff --git a/stamp/plugins/groups/plots/configGUI/HeatmapPlot.ui b/stamp/plugins/groups/plots/configGUI/HeatmapPlot.ui
@@ -17,7 +17,7 @@
    </sizepolicy>
   </property>
   <property name="windowTitle">
-   <string>PCA plot</string>
+   <string>Heatmap plot</string>
   </property>
   <property name="windowIcon">
    <iconset>
@@ -95,7 +95,7 @@
            <double>0.500000000000000</double>
           </property>
           <property name="maximum">
-           <double>30.000000000000000</double>
+           <double>100.000000000000000</double>
           </property>
           <property name="singleStep">
            <double>0.100000000000000</double>
@@ -130,7 +130,7 @@
            <double>0.500000000000000</double>
           </property>
           <property name="maximum">
-           <double>30.000000000000000</double>
+           <double>100.000000000000000</double>
           </property>
           <property name="singleStep">
            <double>0.100000000000000</double>

diff --git a/stamp/plugins/groups/plots/configGUI/HeatmapPlotUI.py b/stamp/plugins/groups/plots/configGUI/HeatmapPlotUI.py
@@ -2,7 +2,7 @@
 
 # Form implementation generated from reading ui file 'HeatmapPlot.ui'
 #
-# Created: Sat Jul 26 11:13:18 2014
+# Created: Mon Aug 04 15:26:28 2014
 #      by: PyQt4 UI code generator 4.9.6
 #
 # WARNING! All changes made in this file will be lost!
@@ -73,7 +73,7 @@ def setupUi(self, HeatmapPlotDialog):
         self.spinFigWidth.setSizePolicy(sizePolicy)
         self.spinFigWidth.setDecimals(2)
         self.spinFigWidth.setMinimum(0.5)
-        self.spinFigWidth.setMaximum(30.0)
+        self.spinFigWidth.setMaximum(100.0)
         self.spinFigWidth.setSingleStep(0.1)
         self.spinFigWidth.setProperty("value", 7.0)
         self.spinFigWidth.setObjectName(_fromUtf8("spinFigWidth"))
@@ -90,7 +90,7 @@ def setupUi(self, HeatmapPlotDialog):
         self.spinFigHeight.setSizePolicy(sizePolicy)
         self.spinFigHeight.setDecimals(2)
         self.spinFigHeight.setMinimum(0.5)
-        self.spinFigHeight.setMaximum(30.0)
+        self.spinFigHeight.setMaximum(100.0)
         self.spinFigHeight.setSingleStep(0.1)
         self.spinFigHeight.setProperty("value", 7.0)
         self.spinFigHeight.setObjectName(_fromUtf8("spinFigHeight"))
@@ -268,7 +268,7 @@ def setupUi(self, HeatmapPlotDialog):
         QtCore.QMetaObject.connectSlotsByName(HeatmapPlotDialog)
 
     def retranslateUi(self, HeatmapPlotDialog):
-        HeatmapPlotDialog.setWindowTitle(_translate("HeatmapPlotDialog", "PCA plot", None))
+        HeatmapPlotDialog.setWindowTitle(_translate("HeatmapPlotDialog", "Heatmap plot", None))
         self.label.setText(_translate("HeatmapPlotDialog", "Field to plot:", None))
         self.cboFieldToPlot.setItemText(0, _translate("HeatmapPlotDialog", "Number of sequences", None))
         self.cboFieldToPlot.setItemText(1, _translate("HeatmapPlotDialog", "Proportion of sequences (%)", None))

diff --git a/stamp/plugins/multiGroups/plots/HeatmapPlot.py b/stamp/plugins/multiGroups/plots/HeatmapPlot.py
@@ -126,9 +126,9 @@ def plot(self, profile, statsResults):
 		if len(featuresToPlot) <= 1 or len(profile.activeGroupNames) <= 1:
 			self.emptyAxis()
 			return
-		elif len(featuresToPlot) > 100 or len(profile.activeSamplesInGroups) > 100:
+		elif len(featuresToPlot) > 1000 or len(profile.activeSamplesInGroups) > 1000:
 			QtGui.QApplication.instance().setOverrideCursor(QtGui.QCursor(QtCore.Qt.ArrowCursor))
-			QtGui.QMessageBox.information(self, 'Too much data!', 'Heatmap plots are limited to 100 samples and 100 features.', QtGui.QMessageBox.Ok)
+			QtGui.QMessageBox.information(self, 'Too much data!', 'Heatmap plots are limited to 1000 samples and 1000 features.', QtGui.QMessageBox.Ok)
 			QtGui.QApplication.instance().restoreOverrideCursor()
 			self.emptyAxis()
 			return

diff --git a/stamp/plugins/multiGroups/plots/configGUI/HeatmapPlot.ui b/stamp/plugins/multiGroups/plots/configGUI/HeatmapPlot.ui
@@ -17,7 +17,7 @@
    </sizepolicy>
   </property>
   <property name="windowTitle">
-   <string>PCA plot</string>
+   <string>Heatmap plot</string>
   </property>
   <property name="windowIcon">
    <iconset>
@@ -95,7 +95,7 @@
            <double>0.500000000000000</double>
           </property>
           <property name="maximum">
-           <double>30.000000000000000</double>
+           <double>100.000000000000000</double>
           </property>
           <property name="singleStep">
            <double>0.100000000000000</double>
@@ -130,7 +130,7 @@
            <double>0.500000000000000</double>
           </property>
           <property name="maximum">
-           <double>30.000000000000000</double>
+           <double>100.000000000000000</double>
           </property>
           <property name="singleStep">
            <double>0.100000000000000</double>

diff --git a/stamp/plugins/multiGroups/plots/configGUI/HeatmapPlotUI.py b/stamp/plugins/multiGroups/plots/configGUI/HeatmapPlotUI.py
@@ -2,7 +2,7 @@
 
 # Form implementation generated from reading ui file 'HeatmapPlot.ui'
 #
-# Created: Sat Jul 26 11:35:52 2014
+# Created: Mon Aug 04 15:27:32 2014
 #      by: PyQt4 UI code generator 4.9.6
 #
 # WARNING! All changes made in this file will be lost!
@@ -73,7 +73,7 @@ def setupUi(self, HeatmapPlotDialog):
         self.spinFigWidth.setSizePolicy(sizePolicy)
         self.spinFigWidth.setDecimals(2)
         self.spinFigWidth.setMinimum(0.5)
-        self.spinFigWidth.setMaximum(30.0)
+        self.spinFigWidth.setMaximum(100.0)
         self.spinFigWidth.setSingleStep(0.1)
         self.spinFigWidth.setProperty("value", 7.0)
         self.spinFigWidth.setObjectName(_fromUtf8("spinFigWidth"))
@@ -90,7 +90,7 @@ def setupUi(self, HeatmapPlotDialog):
         self.spinFigHeight.setSizePolicy(sizePolicy)
         self.spinFigHeight.setDecimals(2)
         self.spinFigHeight.setMinimum(0.5)
-        self.spinFigHeight.setMaximum(30.0)
+        self.spinFigHeight.setMaximum(100.0)
         self.spinFigHeight.setSingleStep(0.1)
         self.spinFigHeight.setProperty("value", 7.0)
         self.spinFigHeight.setObjectName(_fromUtf8("spinFigHeight"))
@@ -268,7 +268,7 @@ def setupUi(self, HeatmapPlotDialog):
         QtCore.QMetaObject.connectSlotsByName(HeatmapPlotDialog)
 
     def retranslateUi(self, HeatmapPlotDialog):
-        HeatmapPlotDialog.setWindowTitle(_translate("HeatmapPlotDialog", "PCA plot", None))
+        HeatmapPlotDialog.setWindowTitle(_translate("HeatmapPlotDialog", "Heatmap plot", None))
         self.label.setText(_translate("HeatmapPlotDialog", "Field to plot:", None))
         self.cboFieldToPlot.setItemText(0, _translate("HeatmapPlotDialog", "Number of sequences", None))
         self.cboFieldToPlot.setItemText(1, _translate("HeatmapPlotDialog", "Proportion of sequences (%)", None))