summarize-results

##!/usr/bin/python3

# Copyright 2016-2021, André Müller <muellan@uni-mainz.de>
#
# This file is part of the MetaCache taxonomic sequence classification tool.
#
# MetaCache is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# MetaCache is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with MetaCache.  If not, see <http://www.gnu.org/licenses/>.

import glob
import re
import sys

if len(sys.argv) < 3:
    print("summarize classification results with ground truth")
    print("usage: " + sys.argv[0] + " <input file pattern> <rank>")
    exit()

inpattern = sys.argv[1]
rank = sys.argv[2]

sep = "\t"
unclre = re.compile('^# unclassified:\s*([0-9.]+)%.*')
clasre = re.compile('^# classified:.*')
precre = re.compile('^# precision.*')
sensre = re.compile('^# sensitivity.*')
rankre = re.compile('^#   ([a-zA-Z]+)\s+([0-9.]+)%')


def parse_rank(line, rec):
    res = rankre.match(line)
    if res is not None:
        rec[res.group(1)] = res.group(2)
        return True
    return False

fnames = glob.glob(inpattern)
fnames.sort()

pad = 0
for fname in fnames:
    if len(fname) > pad:
        pad = len(fname)

if pad > 6:
    pad = pad - 6

print("Results on rank " + rank + ":")
print("filename" + (" " * pad) \
    + sep + "uncl" + sep + "clas" + sep + "prec" + sep + "sens")


for fname in fnames:
    with open(fname, 'r') as file:
        section = 0
        ranks = False
        uncl = "0.0"
        clas = {}
        prec = {}
        sens = {}

        for line in file:
            if section == 0:
                res = unclre.match(line)
                if res is not None:
                    uncl = res.group(1)
                    section = 1
                else:
                    res = clasre.match(line)
                    if res is not None:
                        section = 2

            elif section == 1:
                res = clasre.match(line)
                if res is not None:
                    section = 2

            elif section == 2:
                if not parse_rank(line, clas):
                    section = 3
                    res = precre.match(line)
                    if res is not None:
                        section = 4

            elif section == 3:
                res = precre.match(line)
                if res is not None:
                    section = 4

            elif section == 4:
                if not parse_rank(line, prec):
                    section = 5
                    res = sensre.match(line)
                    if res is not None:
                        section = 6

            elif section == 5:
                res = sensre.match(line)
                if res is not None:
                    section = 6

            elif section == 6:
                if not parse_rank(line, sens):
                    section = 7

            # print(str(section) + " -> " + line)

    if section > 5:
        print(fname + "   " \
            + sep + str(round(float(uncl),2)) \
            + sep + str(round(float(clas[rank]),2)) \
            + sep + str(round(float(prec[rank]),2)) \
            + sep + str(round(float(sens[rank]),2)))