diff --git a/.settings/org.eclipse.core.resources.prefs b/.settings/org.eclipse.core.resources.prefs index 6e2c23d..5d60b85 100644 --- a/.settings/org.eclipse.core.resources.prefs +++ b/.settings/org.eclipse.core.resources.prefs @@ -5,8 +5,10 @@ encoding//stamp/GUI/plotDlgUI.py=utf-8 encoding//stamp/mainUI.py=utf-8 encoding//stamp/plugins/groups/plots/configGUI/BarPlotUI.py=utf-8 encoding//stamp/plugins/groups/plots/configGUI/BoxPlotUI.py=utf-8 +encoding//stamp/plugins/groups/plots/configGUI/HeatmapPlotUI.py=utf-8 encoding//stamp/plugins/groups/plots/configGUI/extendedErrorBarUI.py=utf-8 encoding//stamp/plugins/multiGroups/plots/configGUI/BarPlotUI.py=utf-8 encoding//stamp/plugins/multiGroups/plots/configGUI/BoxPlotUI.py=utf-8 +encoding//stamp/plugins/multiGroups/plots/configGUI/HeatmapPlotUI.py=utf-8 encoding//stamp/plugins/multiGroups/plots/configGUI/pcaPlotUI.py=utf-8 encoding//stamp/plugins/samples/plots/configGUI/barUI.py=utf-8 diff --git a/manual/STAMP_Users_Guide.docx b/manual/STAMP_Users_Guide.docx index 94d0575..34bf440 100644 Binary files a/manual/STAMP_Users_Guide.docx and b/manual/STAMP_Users_Guide.docx differ diff --git a/manual/STAMP_Users_Guide.pdf b/manual/STAMP_Users_Guide.pdf index 9585bd1..ac1a682 100644 Binary files a/manual/STAMP_Users_Guide.pdf and b/manual/STAMP_Users_Guide.pdf differ diff --git a/scripts/checkHierarchy.py b/scripts/checkHierarchy.py new file mode 100644 index 0000000..bfed809 --- /dev/null +++ b/scripts/checkHierarchy.py @@ -0,0 +1,170 @@ +#!/usr/bin/env python + +############################################################################### +# # +# This program is free software: you can redistribute it and/or modify # +# it under the terms of the GNU General Public License as published by # +# the Free Software Foundation, either version 3 of the License, or # +# (at your option) any later version. # +# # +# This program is distributed in the hope that it will be useful, # +# but WITHOUT ANY WARRANTY; without even the implied warranty of # +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # +# GNU General Public License for more details. # +# # +# You should have received a copy of the GNU General Public License # +# along with this program. If not, see . # +# # +############################################################################### + +__prog_name__ = 'checkHierarchy' +__prog_desc__ = '' + +__author__ = 'Donovan Parks' +__copyright__ = 'Copyright 2014' +__credits__ = ['Donovan Parks'] +__license__ = 'GPL3' +__version__ = '0.0.1' +__maintainer__ = 'Donovan Parks' +__email__ = 'donovan.parks@gmail.com' +__status__ = 'Development' + +import os +import sys +import argparse +from collections import defaultdict + +def isNumber(s): + """Check is a string is a number.""" + try: + float(s) + return True + except ValueError: + return False + +class CheckHierarchy(object): + def __init__(self): + pass + + def isUnclassified(self, value): + """Check if value (taxon, metabolic pathway) is unclassified.""" + + # currently unclassified sequences need to be explicitly stated as + # 'unclassified' (case insensitive) or '*__unclassified' which is + # the format used by GreenGenes + return value.lower() == 'unclassified' or value.lower()[1:] == '__unclassified' + + def determineHierarchicalColumns(self, headerValues, firstDataValues): + """Determine columns corresponding to user-defined hierarchy.""" + + # first column entry that is numeric is assumed to be from first sample + firstSampleIndex = 0 + for entry in firstDataValues: + if isNumber(entry): + break + firstSampleIndex += 1 + + # sanity check profile + numSamples = len(headerValues) - firstSampleIndex + if numSamples < 2: + print '[Error] Profile must contain at least two samples. Identified %d samples' % numSamples + sys.exit() + + if firstSampleIndex == 0: + print '[Error] Profile file must contain at least one column indicating feature names.' + sys.exit() + + print 'Identified %d samples.' % numSamples + print 'Identified %d hierarchical columns.' % firstSampleIndex + + # get name of hierarchical columns + columnNames = headerValues[0:firstSampleIndex] + + return columnNames + + def run(self, stampProfile): + """Verify that data forms a strict hierarchy.""" + parent = defaultdict(dict) + + # identify entries breaking hierarchy + entriesWithUnclassifiedParents = [] + entriesBreakingHierarchy = [] + with open(stampProfile, 'U') as f: + header = f.readline() + headerValues = map(str.strip, header.split('\t')) + + columnNames = None + for i, line in enumerate(f): + rowNumber = i+2 # +1 for header row, +1 for zero indexing + lineSplit = line.split('\t') + dataValues = map(str.strip, lineSplit) + + if len(headerValues) != len(dataValues): + print '[Error] Line %d does not contain as many entries as the header line.' % rowNumber + sys.exit() + + if not columnNames: + columnNames = self.determineHierarchicalColumns(headerValues, dataValues) + + categories = dataValues[0:len(columnNames)] + for r, value in enumerate(categories): + # top of hierarchy has no parent + if r == 0: + continue + + # ignore unclassified sequences + if self.isUnclassified(value): + continue + + # make sure parent is not unclassified + parentValue = categories[r-1] + if self.isUnclassified(parentValue): + entriesWithUnclassifiedParents.append([rowNumber, r, value]) + continue + + if r not in parent: + parent[r] = {} + + if value not in parent[r]: + parent[r][value] = parentValue + else: + if parent[r][value] != parentValue: + entriesBreakingHierarchy.append([rowNumber, r, value, parent[r][value], parentValue]) + + # report entries breaking hierarchy + if len(entriesWithUnclassifiedParents) > 0: + print '' + print 'The following entries have an unclassified parent:' + for entry in entriesWithUnclassifiedParents: + rowNumber, r, value = entry + print '%s\t%s\t%s' % (rowNumber, columnNames[r], value) + + + if len(entriesBreakingHierarchy) > 0: + print '' + print 'The following entries have two (and potentially more) parents:' + for entry in entriesBreakingHierarchy: + rowNumber, r, value, parent1, parent2 = entry + print '%s\t%s\t%s\t%s' % (rowNumber, columnNames[r], value, ','.join([parent1, parent2])) + + if len(entriesWithUnclassifiedParents) == 0 and len(entriesBreakingHierarchy) == 0: + print '' + print 'Profile forms a strict hierarchy. You are good to go!' + +if __name__ == '__main__': + print __prog_name__ + ' v' + __version__ + ': ' + __prog_desc__ + print ' by ' + __author__ + ' (' + __email__ + ')' + '\n' + + parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument('stamp_profile', help='STAMP profile to evaluate') + + args = parser.parse_args() + + try: + checkHierarchy = CheckHierarchy() + checkHierarchy.run(args.stamp_profile) + except SystemExit: + print "\nControlled exit resulting from an unrecoverable error or warning." + except: + print "\nUnexpected error:", sys.exc_info()[0] + raise diff --git a/setup.py b/setup.py index c4ef6d7..3fc3b52 100644 --- a/setup.py +++ b/setup.py @@ -37,7 +37,7 @@ ('.', ['README.md'])] setup( name='STAMP', - version='2.0.7', + version='2.0.8', author='Donovan Parks, Rob Beiko', author_email='donovan.parks@gmail.com', packages=['stamp', 'stamp.GUI'] + pluginPkgs + metagenomicPkgs, diff --git a/stamp/STAMP.py b/stamp/STAMP.py index 6ce32de..35b983d 100644 --- a/stamp/STAMP.py +++ b/stamp/STAMP.py @@ -23,8 +23,8 @@ __copyright__ = 'Copyright 2013' __credits__ = ['Donovan Parks'] __license__ = 'GPL3' -__version__ = '2.0.7' -__date__ = 'July 26, 2014' +__version__ = '2.0.8' +__date__ = 'August 4, 2014' __maintainer__ = 'Donovan Parks' __email__ = 'donovan.parks@gmail.com' __status__ = 'Development' diff --git a/stamp/metagenomics/fileIO/StampIO.py b/stamp/metagenomics/fileIO/StampIO.py index 09d9584..63d99e7 100644 --- a/stamp/metagenomics/fileIO/StampIO.py +++ b/stamp/metagenomics/fileIO/StampIO.py @@ -135,20 +135,28 @@ def checkHierarchy(self, data, numHierarchicalLevels): categories = lineSplit[0:numHierarchicalLevels] for r, value in enumerate(categories): + # top of hierarchy has no parent if r == 0: - continue # top of hierarchy has no parent + continue + # ignore unclassified sequences if self.isUnclassified(value): - continue # ignore unclassified sequences + continue + + # make sure parent is not unclassified + parentValue = categories[r-1] + if self.isUnclassified(parentValue): + return "Child %s has an unclassified parent." % value + continue if r not in parent: parent[r] = {} if value not in parent[r]: - parent[r][value] = categories[r-1] + parent[r][value] = parentValue else: - if parent[r][value] != categories[r-1]: + if parent[r][value] != parentValue: # data is not a strict hierarchy - return "Data does not form a strict hierarchy. Child %s has multiple parents (e.g., %s, %s)." % (value, parent[r][value], categories[r-1]) + return "Data does not form a strict hierarchy. Child %s has multiple parents (e.g., %s, %s)." % (value, parent[r][value], parentValue) return None - + \ No newline at end of file diff --git a/stamp/plugins/groups/plots/HeatmapPlot.py b/stamp/plugins/groups/plots/HeatmapPlot.py index 65eb3f4..cee5518 100644 --- a/stamp/plugins/groups/plots/HeatmapPlot.py +++ b/stamp/plugins/groups/plots/HeatmapPlot.py @@ -124,9 +124,9 @@ def plot(self, profile, statsResults): if len(featuresToPlot) <= 1 or (len(profile.samplesInGroup1) + len(profile.samplesInGroup2)) <= 1: self.emptyAxis() return - elif len(featuresToPlot) > 100 or len(profile.samplesInGroup1) + len(profile.samplesInGroup2) > 100: + elif len(featuresToPlot) > 1000 or len(profile.samplesInGroup1) + len(profile.samplesInGroup2) > 1000: QtGui.QApplication.instance().setOverrideCursor(QtGui.QCursor(QtCore.Qt.ArrowCursor)) - QtGui.QMessageBox.information(self, 'Too much data!', 'Heatmap plots are limited to 100 samples and 100 features.', QtGui.QMessageBox.Ok) + QtGui.QMessageBox.information(self, 'Too much data!', 'Heatmap plots are limited to 1000 samples and 1000 features.', QtGui.QMessageBox.Ok) QtGui.QApplication.instance().restoreOverrideCursor() self.emptyAxis() return diff --git a/stamp/plugins/groups/plots/configGUI/HeatmapPlot.ui b/stamp/plugins/groups/plots/configGUI/HeatmapPlot.ui index 4b557c9..662c8f7 100644 --- a/stamp/plugins/groups/plots/configGUI/HeatmapPlot.ui +++ b/stamp/plugins/groups/plots/configGUI/HeatmapPlot.ui @@ -17,7 +17,7 @@ - PCA plot + Heatmap plot @@ -95,7 +95,7 @@ 0.500000000000000 - 30.000000000000000 + 100.000000000000000 0.100000000000000 @@ -130,7 +130,7 @@ 0.500000000000000 - 30.000000000000000 + 100.000000000000000 0.100000000000000 diff --git a/stamp/plugins/groups/plots/configGUI/HeatmapPlotUI.py b/stamp/plugins/groups/plots/configGUI/HeatmapPlotUI.py index 9b0bbd0..885b564 100644 --- a/stamp/plugins/groups/plots/configGUI/HeatmapPlotUI.py +++ b/stamp/plugins/groups/plots/configGUI/HeatmapPlotUI.py @@ -2,7 +2,7 @@ # Form implementation generated from reading ui file 'HeatmapPlot.ui' # -# Created: Sat Jul 26 11:13:18 2014 +# Created: Mon Aug 04 15:26:28 2014 # by: PyQt4 UI code generator 4.9.6 # # WARNING! All changes made in this file will be lost! @@ -73,7 +73,7 @@ def setupUi(self, HeatmapPlotDialog): self.spinFigWidth.setSizePolicy(sizePolicy) self.spinFigWidth.setDecimals(2) self.spinFigWidth.setMinimum(0.5) - self.spinFigWidth.setMaximum(30.0) + self.spinFigWidth.setMaximum(100.0) self.spinFigWidth.setSingleStep(0.1) self.spinFigWidth.setProperty("value", 7.0) self.spinFigWidth.setObjectName(_fromUtf8("spinFigWidth")) @@ -90,7 +90,7 @@ def setupUi(self, HeatmapPlotDialog): self.spinFigHeight.setSizePolicy(sizePolicy) self.spinFigHeight.setDecimals(2) self.spinFigHeight.setMinimum(0.5) - self.spinFigHeight.setMaximum(30.0) + self.spinFigHeight.setMaximum(100.0) self.spinFigHeight.setSingleStep(0.1) self.spinFigHeight.setProperty("value", 7.0) self.spinFigHeight.setObjectName(_fromUtf8("spinFigHeight")) @@ -268,7 +268,7 @@ def setupUi(self, HeatmapPlotDialog): QtCore.QMetaObject.connectSlotsByName(HeatmapPlotDialog) def retranslateUi(self, HeatmapPlotDialog): - HeatmapPlotDialog.setWindowTitle(_translate("HeatmapPlotDialog", "PCA plot", None)) + HeatmapPlotDialog.setWindowTitle(_translate("HeatmapPlotDialog", "Heatmap plot", None)) self.label.setText(_translate("HeatmapPlotDialog", "Field to plot:", None)) self.cboFieldToPlot.setItemText(0, _translate("HeatmapPlotDialog", "Number of sequences", None)) self.cboFieldToPlot.setItemText(1, _translate("HeatmapPlotDialog", "Proportion of sequences (%)", None)) diff --git a/stamp/plugins/multiGroups/plots/HeatmapPlot.py b/stamp/plugins/multiGroups/plots/HeatmapPlot.py index de805ff..173709f 100644 --- a/stamp/plugins/multiGroups/plots/HeatmapPlot.py +++ b/stamp/plugins/multiGroups/plots/HeatmapPlot.py @@ -126,9 +126,9 @@ def plot(self, profile, statsResults): if len(featuresToPlot) <= 1 or len(profile.activeGroupNames) <= 1: self.emptyAxis() return - elif len(featuresToPlot) > 100 or len(profile.activeSamplesInGroups) > 100: + elif len(featuresToPlot) > 1000 or len(profile.activeSamplesInGroups) > 1000: QtGui.QApplication.instance().setOverrideCursor(QtGui.QCursor(QtCore.Qt.ArrowCursor)) - QtGui.QMessageBox.information(self, 'Too much data!', 'Heatmap plots are limited to 100 samples and 100 features.', QtGui.QMessageBox.Ok) + QtGui.QMessageBox.information(self, 'Too much data!', 'Heatmap plots are limited to 1000 samples and 1000 features.', QtGui.QMessageBox.Ok) QtGui.QApplication.instance().restoreOverrideCursor() self.emptyAxis() return diff --git a/stamp/plugins/multiGroups/plots/configGUI/HeatmapPlot.ui b/stamp/plugins/multiGroups/plots/configGUI/HeatmapPlot.ui index 4b557c9..662c8f7 100644 --- a/stamp/plugins/multiGroups/plots/configGUI/HeatmapPlot.ui +++ b/stamp/plugins/multiGroups/plots/configGUI/HeatmapPlot.ui @@ -17,7 +17,7 @@ - PCA plot + Heatmap plot @@ -95,7 +95,7 @@ 0.500000000000000 - 30.000000000000000 + 100.000000000000000 0.100000000000000 @@ -130,7 +130,7 @@ 0.500000000000000 - 30.000000000000000 + 100.000000000000000 0.100000000000000 diff --git a/stamp/plugins/multiGroups/plots/configGUI/HeatmapPlotUI.py b/stamp/plugins/multiGroups/plots/configGUI/HeatmapPlotUI.py index f3ada6b..1e0e52b 100644 --- a/stamp/plugins/multiGroups/plots/configGUI/HeatmapPlotUI.py +++ b/stamp/plugins/multiGroups/plots/configGUI/HeatmapPlotUI.py @@ -2,7 +2,7 @@ # Form implementation generated from reading ui file 'HeatmapPlot.ui' # -# Created: Sat Jul 26 11:35:52 2014 +# Created: Mon Aug 04 15:27:32 2014 # by: PyQt4 UI code generator 4.9.6 # # WARNING! All changes made in this file will be lost! @@ -73,7 +73,7 @@ def setupUi(self, HeatmapPlotDialog): self.spinFigWidth.setSizePolicy(sizePolicy) self.spinFigWidth.setDecimals(2) self.spinFigWidth.setMinimum(0.5) - self.spinFigWidth.setMaximum(30.0) + self.spinFigWidth.setMaximum(100.0) self.spinFigWidth.setSingleStep(0.1) self.spinFigWidth.setProperty("value", 7.0) self.spinFigWidth.setObjectName(_fromUtf8("spinFigWidth")) @@ -90,7 +90,7 @@ def setupUi(self, HeatmapPlotDialog): self.spinFigHeight.setSizePolicy(sizePolicy) self.spinFigHeight.setDecimals(2) self.spinFigHeight.setMinimum(0.5) - self.spinFigHeight.setMaximum(30.0) + self.spinFigHeight.setMaximum(100.0) self.spinFigHeight.setSingleStep(0.1) self.spinFigHeight.setProperty("value", 7.0) self.spinFigHeight.setObjectName(_fromUtf8("spinFigHeight")) @@ -268,7 +268,7 @@ def setupUi(self, HeatmapPlotDialog): QtCore.QMetaObject.connectSlotsByName(HeatmapPlotDialog) def retranslateUi(self, HeatmapPlotDialog): - HeatmapPlotDialog.setWindowTitle(_translate("HeatmapPlotDialog", "PCA plot", None)) + HeatmapPlotDialog.setWindowTitle(_translate("HeatmapPlotDialog", "Heatmap plot", None)) self.label.setText(_translate("HeatmapPlotDialog", "Field to plot:", None)) self.cboFieldToPlot.setItemText(0, _translate("HeatmapPlotDialog", "Number of sequences", None)) self.cboFieldToPlot.setItemText(1, _translate("HeatmapPlotDialog", "Proportion of sequences (%)", None))