Skip to content

Commit

Permalink
Updated heat maps and user's guide.
Browse files Browse the repository at this point in the history
  • Loading branch information
donovan-h-parks committed Aug 4, 2014
1 parent 17158af commit 45c151d
Show file tree
Hide file tree
Showing 13 changed files with 207 additions and 27 deletions.
2 changes: 2 additions & 0 deletions .settings/org.eclipse.core.resources.prefs
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@ encoding//stamp/GUI/plotDlgUI.py=utf-8
encoding//stamp/mainUI.py=utf-8
encoding//stamp/plugins/groups/plots/configGUI/BarPlotUI.py=utf-8
encoding//stamp/plugins/groups/plots/configGUI/BoxPlotUI.py=utf-8
encoding//stamp/plugins/groups/plots/configGUI/HeatmapPlotUI.py=utf-8
encoding//stamp/plugins/groups/plots/configGUI/extendedErrorBarUI.py=utf-8
encoding//stamp/plugins/multiGroups/plots/configGUI/BarPlotUI.py=utf-8
encoding//stamp/plugins/multiGroups/plots/configGUI/BoxPlotUI.py=utf-8
encoding//stamp/plugins/multiGroups/plots/configGUI/HeatmapPlotUI.py=utf-8
encoding//stamp/plugins/multiGroups/plots/configGUI/pcaPlotUI.py=utf-8
encoding//stamp/plugins/samples/plots/configGUI/barUI.py=utf-8
Binary file modified manual/STAMP_Users_Guide.docx
Binary file not shown.
Binary file modified manual/STAMP_Users_Guide.pdf
Binary file not shown.
170 changes: 170 additions & 0 deletions scripts/checkHierarchy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
#!/usr/bin/env python

###############################################################################
# #
# This program is free software: you can redistribute it and/or modify #
# it under the terms of the GNU General Public License as published by #
# the Free Software Foundation, either version 3 of the License, or #
# (at your option) any later version. #
# #
# This program is distributed in the hope that it will be useful, #
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
# GNU General Public License for more details. #
# #
# You should have received a copy of the GNU General Public License #
# along with this program. If not, see <http://www.gnu.org/licenses/>. #
# #
###############################################################################

__prog_name__ = 'checkHierarchy'
__prog_desc__ = '<program description>'

__author__ = 'Donovan Parks'
__copyright__ = 'Copyright 2014'
__credits__ = ['Donovan Parks']
__license__ = 'GPL3'
__version__ = '0.0.1'
__maintainer__ = 'Donovan Parks'
__email__ = '[email protected]'
__status__ = 'Development'

import os
import sys
import argparse
from collections import defaultdict

def isNumber(s):
"""Check is a string is a number."""
try:
float(s)
return True
except ValueError:
return False

class CheckHierarchy(object):
def __init__(self):
pass

def isUnclassified(self, value):
"""Check if value (taxon, metabolic pathway) is unclassified."""

# currently unclassified sequences need to be explicitly stated as
# 'unclassified' (case insensitive) or '*__unclassified' which is
# the format used by GreenGenes
return value.lower() == 'unclassified' or value.lower()[1:] == '__unclassified'

def determineHierarchicalColumns(self, headerValues, firstDataValues):
"""Determine columns corresponding to user-defined hierarchy."""

# first column entry that is numeric is assumed to be from first sample
firstSampleIndex = 0
for entry in firstDataValues:
if isNumber(entry):
break
firstSampleIndex += 1

# sanity check profile
numSamples = len(headerValues) - firstSampleIndex
if numSamples < 2:
print '[Error] Profile must contain at least two samples. Identified %d samples' % numSamples
sys.exit()

if firstSampleIndex == 0:
print '[Error] Profile file must contain at least one column indicating feature names.'
sys.exit()

print 'Identified %d samples.' % numSamples
print 'Identified %d hierarchical columns.' % firstSampleIndex

# get name of hierarchical columns
columnNames = headerValues[0:firstSampleIndex]

return columnNames

def run(self, stampProfile):
"""Verify that data forms a strict hierarchy."""
parent = defaultdict(dict)

# identify entries breaking hierarchy
entriesWithUnclassifiedParents = []
entriesBreakingHierarchy = []
with open(stampProfile, 'U') as f:
header = f.readline()
headerValues = map(str.strip, header.split('\t'))

columnNames = None
for i, line in enumerate(f):
rowNumber = i+2 # +1 for header row, +1 for zero indexing
lineSplit = line.split('\t')
dataValues = map(str.strip, lineSplit)

if len(headerValues) != len(dataValues):
print '[Error] Line %d does not contain as many entries as the header line.' % rowNumber
sys.exit()

if not columnNames:
columnNames = self.determineHierarchicalColumns(headerValues, dataValues)

categories = dataValues[0:len(columnNames)]
for r, value in enumerate(categories):
# top of hierarchy has no parent
if r == 0:
continue

# ignore unclassified sequences
if self.isUnclassified(value):
continue

# make sure parent is not unclassified
parentValue = categories[r-1]
if self.isUnclassified(parentValue):
entriesWithUnclassifiedParents.append([rowNumber, r, value])
continue

if r not in parent:
parent[r] = {}

if value not in parent[r]:
parent[r][value] = parentValue
else:
if parent[r][value] != parentValue:
entriesBreakingHierarchy.append([rowNumber, r, value, parent[r][value], parentValue])

# report entries breaking hierarchy
if len(entriesWithUnclassifiedParents) > 0:
print ''
print 'The following entries have an unclassified parent:'
for entry in entriesWithUnclassifiedParents:
rowNumber, r, value = entry
print '%s\t%s\t%s' % (rowNumber, columnNames[r], value)


if len(entriesBreakingHierarchy) > 0:
print ''
print 'The following entries have two (and potentially more) parents:'
for entry in entriesBreakingHierarchy:
rowNumber, r, value, parent1, parent2 = entry
print '%s\t%s\t%s\t%s' % (rowNumber, columnNames[r], value, ','.join([parent1, parent2]))

if len(entriesWithUnclassifiedParents) == 0 and len(entriesBreakingHierarchy) == 0:
print ''
print 'Profile forms a strict hierarchy. You are good to go!'

if __name__ == '__main__':
print __prog_name__ + ' v' + __version__ + ': ' + __prog_desc__
print ' by ' + __author__ + ' (' + __email__ + ')' + '\n'

parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('stamp_profile', help='STAMP profile to evaluate')

args = parser.parse_args()

try:
checkHierarchy = CheckHierarchy()
checkHierarchy.run(args.stamp_profile)
except SystemExit:
print "\nControlled exit resulting from an unrecoverable error or warning."
except:
print "\nUnexpected error:", sys.exc_info()[0]
raise
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
('.', ['README.md'])]
setup(
name='STAMP',
version='2.0.7',
version='2.0.8',
author='Donovan Parks, Rob Beiko',
author_email='[email protected]',
packages=['stamp', 'stamp.GUI'] + pluginPkgs + metagenomicPkgs,
Expand Down
4 changes: 2 additions & 2 deletions stamp/STAMP.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@
__copyright__ = 'Copyright 2013'
__credits__ = ['Donovan Parks']
__license__ = 'GPL3'
__version__ = '2.0.7'
__date__ = 'July 26, 2014'
__version__ = '2.0.8'
__date__ = 'August 4, 2014'
__maintainer__ = 'Donovan Parks'
__email__ = '[email protected]'
__status__ = 'Development'
Expand Down
20 changes: 14 additions & 6 deletions stamp/metagenomics/fileIO/StampIO.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,20 +135,28 @@ def checkHierarchy(self, data, numHierarchicalLevels):

categories = lineSplit[0:numHierarchicalLevels]
for r, value in enumerate(categories):
# top of hierarchy has no parent
if r == 0:
continue # top of hierarchy has no parent
continue

# ignore unclassified sequences
if self.isUnclassified(value):
continue # ignore unclassified sequences
continue

# make sure parent is not unclassified
parentValue = categories[r-1]
if self.isUnclassified(parentValue):
return "Child %s has an unclassified parent." % value
continue

if r not in parent:
parent[r] = {}

if value not in parent[r]:
parent[r][value] = categories[r-1]
parent[r][value] = parentValue
else:
if parent[r][value] != categories[r-1]:
if parent[r][value] != parentValue:
# data is not a strict hierarchy
return "Data does not form a strict hierarchy. Child %s has multiple parents (e.g., %s, %s)." % (value, parent[r][value], categories[r-1])
return "Data does not form a strict hierarchy. Child %s has multiple parents (e.g., %s, %s)." % (value, parent[r][value], parentValue)
return None


4 changes: 2 additions & 2 deletions stamp/plugins/groups/plots/HeatmapPlot.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,9 +124,9 @@ def plot(self, profile, statsResults):
if len(featuresToPlot) <= 1 or (len(profile.samplesInGroup1) + len(profile.samplesInGroup2)) <= 1:
self.emptyAxis()
return
elif len(featuresToPlot) > 100 or len(profile.samplesInGroup1) + len(profile.samplesInGroup2) > 100:
elif len(featuresToPlot) > 1000 or len(profile.samplesInGroup1) + len(profile.samplesInGroup2) > 1000:
QtGui.QApplication.instance().setOverrideCursor(QtGui.QCursor(QtCore.Qt.ArrowCursor))
QtGui.QMessageBox.information(self, 'Too much data!', 'Heatmap plots are limited to 100 samples and 100 features.', QtGui.QMessageBox.Ok)
QtGui.QMessageBox.information(self, 'Too much data!', 'Heatmap plots are limited to 1000 samples and 1000 features.', QtGui.QMessageBox.Ok)
QtGui.QApplication.instance().restoreOverrideCursor()
self.emptyAxis()
return
Expand Down
6 changes: 3 additions & 3 deletions stamp/plugins/groups/plots/configGUI/HeatmapPlot.ui
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
</sizepolicy>
</property>
<property name="windowTitle">
<string>PCA plot</string>
<string>Heatmap plot</string>
</property>
<property name="windowIcon">
<iconset>
Expand Down Expand Up @@ -95,7 +95,7 @@
<double>0.500000000000000</double>
</property>
<property name="maximum">
<double>30.000000000000000</double>
<double>100.000000000000000</double>
</property>
<property name="singleStep">
<double>0.100000000000000</double>
Expand Down Expand Up @@ -130,7 +130,7 @@
<double>0.500000000000000</double>
</property>
<property name="maximum">
<double>30.000000000000000</double>
<double>100.000000000000000</double>
</property>
<property name="singleStep">
<double>0.100000000000000</double>
Expand Down
8 changes: 4 additions & 4 deletions stamp/plugins/groups/plots/configGUI/HeatmapPlotUI.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

# Form implementation generated from reading ui file 'HeatmapPlot.ui'
#
# Created: Sat Jul 26 11:13:18 2014
# Created: Mon Aug 04 15:26:28 2014
# by: PyQt4 UI code generator 4.9.6
#
# WARNING! All changes made in this file will be lost!
Expand Down Expand Up @@ -73,7 +73,7 @@ def setupUi(self, HeatmapPlotDialog):
self.spinFigWidth.setSizePolicy(sizePolicy)
self.spinFigWidth.setDecimals(2)
self.spinFigWidth.setMinimum(0.5)
self.spinFigWidth.setMaximum(30.0)
self.spinFigWidth.setMaximum(100.0)
self.spinFigWidth.setSingleStep(0.1)
self.spinFigWidth.setProperty("value", 7.0)
self.spinFigWidth.setObjectName(_fromUtf8("spinFigWidth"))
Expand All @@ -90,7 +90,7 @@ def setupUi(self, HeatmapPlotDialog):
self.spinFigHeight.setSizePolicy(sizePolicy)
self.spinFigHeight.setDecimals(2)
self.spinFigHeight.setMinimum(0.5)
self.spinFigHeight.setMaximum(30.0)
self.spinFigHeight.setMaximum(100.0)
self.spinFigHeight.setSingleStep(0.1)
self.spinFigHeight.setProperty("value", 7.0)
self.spinFigHeight.setObjectName(_fromUtf8("spinFigHeight"))
Expand Down Expand Up @@ -268,7 +268,7 @@ def setupUi(self, HeatmapPlotDialog):
QtCore.QMetaObject.connectSlotsByName(HeatmapPlotDialog)

def retranslateUi(self, HeatmapPlotDialog):
HeatmapPlotDialog.setWindowTitle(_translate("HeatmapPlotDialog", "PCA plot", None))
HeatmapPlotDialog.setWindowTitle(_translate("HeatmapPlotDialog", "Heatmap plot", None))
self.label.setText(_translate("HeatmapPlotDialog", "Field to plot:", None))
self.cboFieldToPlot.setItemText(0, _translate("HeatmapPlotDialog", "Number of sequences", None))
self.cboFieldToPlot.setItemText(1, _translate("HeatmapPlotDialog", "Proportion of sequences (%)", None))
Expand Down
4 changes: 2 additions & 2 deletions stamp/plugins/multiGroups/plots/HeatmapPlot.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,9 +126,9 @@ def plot(self, profile, statsResults):
if len(featuresToPlot) <= 1 or len(profile.activeGroupNames) <= 1:
self.emptyAxis()
return
elif len(featuresToPlot) > 100 or len(profile.activeSamplesInGroups) > 100:
elif len(featuresToPlot) > 1000 or len(profile.activeSamplesInGroups) > 1000:
QtGui.QApplication.instance().setOverrideCursor(QtGui.QCursor(QtCore.Qt.ArrowCursor))
QtGui.QMessageBox.information(self, 'Too much data!', 'Heatmap plots are limited to 100 samples and 100 features.', QtGui.QMessageBox.Ok)
QtGui.QMessageBox.information(self, 'Too much data!', 'Heatmap plots are limited to 1000 samples and 1000 features.', QtGui.QMessageBox.Ok)
QtGui.QApplication.instance().restoreOverrideCursor()
self.emptyAxis()
return
Expand Down
6 changes: 3 additions & 3 deletions stamp/plugins/multiGroups/plots/configGUI/HeatmapPlot.ui
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
</sizepolicy>
</property>
<property name="windowTitle">
<string>PCA plot</string>
<string>Heatmap plot</string>
</property>
<property name="windowIcon">
<iconset>
Expand Down Expand Up @@ -95,7 +95,7 @@
<double>0.500000000000000</double>
</property>
<property name="maximum">
<double>30.000000000000000</double>
<double>100.000000000000000</double>
</property>
<property name="singleStep">
<double>0.100000000000000</double>
Expand Down Expand Up @@ -130,7 +130,7 @@
<double>0.500000000000000</double>
</property>
<property name="maximum">
<double>30.000000000000000</double>
<double>100.000000000000000</double>
</property>
<property name="singleStep">
<double>0.100000000000000</double>
Expand Down
8 changes: 4 additions & 4 deletions stamp/plugins/multiGroups/plots/configGUI/HeatmapPlotUI.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

# Form implementation generated from reading ui file 'HeatmapPlot.ui'
#
# Created: Sat Jul 26 11:35:52 2014
# Created: Mon Aug 04 15:27:32 2014
# by: PyQt4 UI code generator 4.9.6
#
# WARNING! All changes made in this file will be lost!
Expand Down Expand Up @@ -73,7 +73,7 @@ def setupUi(self, HeatmapPlotDialog):
self.spinFigWidth.setSizePolicy(sizePolicy)
self.spinFigWidth.setDecimals(2)
self.spinFigWidth.setMinimum(0.5)
self.spinFigWidth.setMaximum(30.0)
self.spinFigWidth.setMaximum(100.0)
self.spinFigWidth.setSingleStep(0.1)
self.spinFigWidth.setProperty("value", 7.0)
self.spinFigWidth.setObjectName(_fromUtf8("spinFigWidth"))
Expand All @@ -90,7 +90,7 @@ def setupUi(self, HeatmapPlotDialog):
self.spinFigHeight.setSizePolicy(sizePolicy)
self.spinFigHeight.setDecimals(2)
self.spinFigHeight.setMinimum(0.5)
self.spinFigHeight.setMaximum(30.0)
self.spinFigHeight.setMaximum(100.0)
self.spinFigHeight.setSingleStep(0.1)
self.spinFigHeight.setProperty("value", 7.0)
self.spinFigHeight.setObjectName(_fromUtf8("spinFigHeight"))
Expand Down Expand Up @@ -268,7 +268,7 @@ def setupUi(self, HeatmapPlotDialog):
QtCore.QMetaObject.connectSlotsByName(HeatmapPlotDialog)

def retranslateUi(self, HeatmapPlotDialog):
HeatmapPlotDialog.setWindowTitle(_translate("HeatmapPlotDialog", "PCA plot", None))
HeatmapPlotDialog.setWindowTitle(_translate("HeatmapPlotDialog", "Heatmap plot", None))
self.label.setText(_translate("HeatmapPlotDialog", "Field to plot:", None))
self.cboFieldToPlot.setItemText(0, _translate("HeatmapPlotDialog", "Number of sequences", None))
self.cboFieldToPlot.setItemText(1, _translate("HeatmapPlotDialog", "Proportion of sequences (%)", None))
Expand Down

0 comments on commit 45c151d

Please sign in to comment.