diff --git a/README.md b/README.md
index 4b8b835..c230d08 100644
--- a/README.md
+++ b/README.md
@@ -57,7 +57,7 @@ Each parent node, of at least one child, will generate a decision tree classific
   'labels': ['white'],
   'stage': 'light'}]
 ```
-The hmc.DecisionTreeHierarchicalClassifier is idiomatic to the sklearn tree.DecisionTreeClassifier. Fit, predict and score the same way. Traditional multi-classification accuracy is comparable.
+The hmc.DecisionTreeHierarchicalClassifier is idiomatic to the sklearn tree.DecisionTreeClassifier. Fit, predict and score the same way. Traditional multi-classification average accuracy is comparable.
 ```python
 from sklearn import tree
 dt = tree.DecisionTreeClassifier()
@@ -72,18 +72,62 @@ dth_accuracy = dth.score(X_test, y_test)
 ```
 ```python
 >>> dt_accuracy
-0.46561886051080548
+0.4400785854616896
 >>> dth_accuracy
-0.46758349705304519
+0.46561886051080548
 ```
-Hierarchically adjusted classification accuracy scoring is available in addition to traditional accuracy. This metric averages accuracy at each classification stage, penalizing the least harshly cases of the mis-classification of sibling nodes, and most harshly cases where true and predicted classes share no ancestors in the hierarchy.
+Additional hierarchical multi-classification specific metrics [2] are provided.
 ```python
-dth_accuracy_adjusted = dth.score_adjusted(X_test, y_test)
+import hmc.metrics as metrics
+
+>>> metrics.accuracy_score(ch, dth_predicted, y_test)
+0.46561886051080548
+>>> metrics.precision_score_ancestors(ch, dth_predicted, y_test)
+0.8108614232209738
+>>> metrics.recall_score_ancestors(ch, dth_predicted, y_test)
+0.7988929889298892
+>>> metrics.f1_score_ancestors(ch, dth_predicted, y_test)
+0.8048327137546468
+>>> metrics.precision_score_descendants(ch, dth_predicted, y_test)
+0.6160337552742616
+>>> metrics.recall_score_descendants(ch, dth_predicted, y_test)
+0.6576576576576577
+>>> metrics.f1_score_descendants(ch, dth_predicted, y_test)
+0.636165577342048
 ```
+Ancestor and Descendant precision and recall scores are calculated as the fraction of shared ancestor or descendant classes over the sum of either the predicted or true class for precision and recall respectively [3].
 ```python
->>> dth_accuracy_adjusted
-0.66115923150295042
-```
+true = ['dark', 'white', 'gray']
+
+pred_sibling = ['dark', 'white', 'black']
 
+>>> metrics.accuracy_score(ch, pred_sibling, true)
+0.66666666666666663
+>>> metrics.precision_score_ancestors(ch, pred_sibling, true)
+0.8
+>>> metrics.precision_score_descendants(ch, pred_sibling, true)
+0.6666666666666666
+
+pred_narrower = ['dark', 'white', 'ash']
+
+>>> metrics.accuracy_score(ch, pred_narrower, true)
+0.66666666666666663
+>>> metrics.precision_score_ancestors(ch, pred_narrower, true)
+1.0
+>>> metrics.precision_score_descendants(ch, pred_narrower, true)
+0.7777777777777778
+
+pred_broader = ['dark', 'white', 'dark']
+
+>>> metrics.accuracy_score(ch, pred_broader, true)
+0.66666666666666663
+>>> metrics.precision_score_ancestors(ch, pred_broader, true)
+0.8
+>>> metrics.precision_score_descendants(ch, pred_broader, true)
+1.0
+```
 
 1. Vens, C., Struyf, J., Schietgat, L., Džeroski, S., & Blockeel, H. (2008). Decision trees for hierarchical multi-label classification. Mach Learn Machine Learning, 73(2), 185-214.
+2. Sokolova, M., & Lapalme, G. (2009). A systematic analysis of performance measures for classification tasks. Information Processing & Management, 45(4), 427-437. doi:10.1016/j.ipm.2009.03.002
+3. Costa, E., Lorena, A., Carvalho, A., & Freitas, A. (2007). A review of performance evaluation measures for hierarchical classifiers. In Proceedings of the AAAI
+2007 workshop "Evaluation methods for machine learning" (pp. 1–6).
diff --git a/hmc/__init__.py b/hmc/__init__.py
index 14ba09b..c42ff26 100644
--- a/hmc/__init__.py
+++ b/hmc/__init__.py
@@ -3,8 +3,13 @@
 from .hmc import DecisionTreeHierarchicalClassifier
 from .datasets import load_shades_class_hierachy
 from .datasets import load_shades_data
+from .metrics import accuracy_score
 
 __all__ = ["ClassHierarchy",
     "DecisionTreeHierarchicalClassifier",
     "load_shades_class_hierachy",
-    "load_shades_data"]
+    "load_shades_data",
+    "accuracy_score",
+    "precision_score_ancestors", "recall_score_ancestors",
+    "precision_score_descendants", "recall_score_descendants",
+    "f1_score_ancestors", "f1_score_descendants"]
diff --git a/hmc/hmc.py b/hmc/hmc.py
index 8ffa7aa..2b364e1 100644
--- a/hmc/hmc.py
+++ b/hmc/hmc.py
@@ -10,6 +10,8 @@
 import numpy as np
 import pandas as pd
 
+import metrics
+
 __all__ = ["ClassHierarchy", "DecisionTreeHierarchicalClassifier"]
 
 # =============================================================================
@@ -40,6 +42,25 @@ def _get_children(self, parent):
         # Return a list of children nodes in alpha order
         return sorted([child for child, childs_parent in self.nodes.iteritems() if childs_parent == parent])
 
+    def _get_ancestors(self, child):
+        # Return a list of the ancestors of this node
+        # Not including root, not including the child
+        ancestors = []
+        while True:
+            child = self._get_parent(child)
+            if child == self.root:
+                break
+            ancestors.append(child)
+        return ancestors
+
+    def _get_descendants(self, parent):
+        # Return a list of the descendants of this node
+        # Not including the parent
+        descendants = []
+        self._depth_first(parent, descendants)
+        descendants.remove(parent)
+        return descendants
+
     def _is_descendant(self, parent, child):
         while child != self.class_hierarchy.root and child != parent:
             child = self.class_hierarchy._get_parent(child)
@@ -219,37 +240,5 @@ def score(self, X, y):
         """
         # Check that the trees have been fit
         self._check_fit()
-        classes = pd.DataFrame(self.predict(X), columns=['y_hat'], index=y.index)
-        classes['y'] = pd.DataFrame(y)
-        classes['correct'] = classes.apply(lambda row: 1 if row['y_hat'] == row['y'] else 0, axis=1)
-        return classes[['correct']].mean()[0]
-
-    def _score_stages(self, X, y):
-        y_hat = self._predict_stages(X)
-        y = pd.DataFrame(y)
-        y_classes = pd.DataFrame(index=y.index)
-
-        def assign_ancestor(classes, descendent):
-            while descendent not in classes and descendent != self.class_hierarchy.root:
-                descendent = self.class_hierarchy._get_parent(descendent)
-            if descendent == self.class_hierarchy.root and self.class_hierarchy.root not in classes:
-              descendent = ""
-            return descendent
-
-        accuracies = []
-        for stage in self.stages:
-            y_hat[stage['stage'] + "_true"] = y.apply(lambda row: assign_ancestor(stage['classes'], row[0]), axis=1)
-            y_hat[stage['stage'] + "_correct"] = y_hat.apply(lambda row: 1 if row[stage['stage'] + "_true"] == row[stage['stage']] else 0, axis=1)
-            y_hat[stage['stage'] + "_included"] = y_hat.apply(lambda row: 1 if len(row[stage['stage'] + "_true"]) > 0 else 0, axis=1)
-            accuracy = y_hat[[stage['stage'] + "_correct"]].sum()[0] / y_hat[[stage['stage'] + "_included"]].sum()[0]
-            accuracies.append(accuracy)
-        return accuracies
-
-    def score_adjusted(self, X, y):
-        """
-        Returns the hierachy adjusted mean accuracy on the given test data (X, y).
-        """
-        # Check that the trees have been fit
-        self._check_fit()
-        accuracies = self._score_stages(X, y)
-        return (1 / len(self.stages)) * sum(accuracies)
+        y_pred = pd.DataFrame(self.predict(X), columns=['y_hat'], index=y.index)
+        return metrics.accuracy_score(self.class_hierarchy, y, y_pred)
diff --git a/hmc/metrics.py b/hmc/metrics.py
new file mode 100644
index 0000000..34612be
--- /dev/null
+++ b/hmc/metrics.py
@@ -0,0 +1,92 @@
+"""
+Metrics for evaluating hierachical multi-classification performance.
+"""
+
+from __future__ import print_function
+from __future__ import division
+
+from sklearn import tree
+from sklearn import metrics as skmetrics
+from sklearn.utils import check_consistent_length
+from sklearn.utils import column_or_1d
+from sklearn.utils.multiclass import type_of_target
+
+from itertools import chain
+
+import numpy as np
+import pandas as pd
+
+def _check_targets_hmc(y_true, y_pred):
+    check_consistent_length(y_true, y_pred)
+    y_type = set([type_of_target(y_true), type_of_target(y_pred)])
+    if y_type == set(["binary", "multiclass"]):
+        y_type = set(["multiclass"])
+    if y_type != set(["multiclass"]):
+        raise ValueError("{0} is not supported".format(y_type))
+    y_true = column_or_1d(y_true)
+    y_pred = column_or_1d(y_pred)
+    return y_true, y_pred
+
+## General Scores
+# Average accuracy
+def accuracy_score(class_hierarchy, y_true, y_pred):
+    y_true, y_pred = _check_targets_hmc(y_true, y_pred)
+    return skmetrics.accuracy_score(y_true, y_pred)
+
+## Hierarchy Precision / Recall
+def _aggregate_class_sets(set_function, y_true, y_pred):
+    intersection_sum = 0
+    true_sum = 0
+    predicted_sum = 0
+    for true, pred in zip(y_true.tolist(), y_pred.tolist()):
+        true_set = set([true] + set_function(true))
+        pred_set = set([pred] + set_function(pred))
+        intersection_sum += len(true_set.intersection(pred_set))
+        true_sum += len(true_set)
+        predicted_sum += len(pred_set)
+    return (true_sum, predicted_sum, intersection_sum)
+
+# Ancestors Scores (Super Class)
+# Precision
+def precision_score_ancestors(class_hierarchy, y_true, y_pred):
+    y_true, y_pred = _check_targets_hmc(y_true, y_pred)
+    true_sum, predicted_sum, intersection_sum = _aggregate_class_sets(class_hierarchy._get_ancestors, y_true, y_pred)
+    return intersection_sum / predicted_sum
+
+# Recall
+def recall_score_ancestors(class_hierarchy, y_true, y_pred):
+    y_true, y_pred = _check_targets_hmc(y_true, y_pred)
+    true_sum, predicted_sum, intersection_sum = _aggregate_class_sets(class_hierarchy._get_ancestors, y_true, y_pred)
+    return intersection_sum / true_sum
+
+# Descendants Scores (Sub Class)
+# Precision
+def precision_score_descendants(class_hierarchy, y_true, y_pred):
+    y_true, y_pred = _check_targets_hmc(y_true, y_pred)
+    true_sum, predicted_sum, intersection_sum = _aggregate_class_sets(class_hierarchy._get_descendants, y_true, y_pred)
+    return intersection_sum / predicted_sum
+
+# Recall
+def recall_score_descendants(class_hierarchy, y_true, y_pred):
+    y_true, y_pred = _check_targets_hmc(y_true, y_pred)
+    true_sum, predicted_sum, intersection_sum = _aggregate_class_sets(class_hierarchy._get_descendants, y_true, y_pred)
+    return intersection_sum / true_sum
+
+# Hierarchy Fscore
+def _fbeta_score_class_sets(set_function, y_true, y_pred, beta=1):
+    y_true, y_pred = _check_targets_hmc(y_true, y_pred)
+    true_sum, predicted_sum, intersection_sum = _aggregate_class_sets(set_function, y_true, y_pred)
+    precision = intersection_sum / predicted_sum
+    recall = intersection_sum / true_sum
+    return ((beta ** 2 + 1) * precision * recall) / ((beta ** 2 * precision) + recall)
+
+def f1_score_ancestors(class_hierarchy, y_true, y_pred):
+    y_true, y_pred = _check_targets_hmc(y_true, y_pred)
+    return _fbeta_score_class_sets(class_hierarchy._get_ancestors, y_true, y_pred)
+
+def f1_score_descendants(class_hierarchy, y_true, y_pred):
+    y_true, y_pred = _check_targets_hmc(y_true, y_pred)
+    return _fbeta_score_class_sets(class_hierarchy._get_descendants, y_true, y_pred)
+
+# # Classification Report
+# def classification_report(class_hierarchy, y_true, y_pred):
diff --git a/setup.py b/setup.py
index 7dfe58e..00fdaed 100644
--- a/setup.py
+++ b/setup.py
@@ -7,5 +7,5 @@
       description='Decision tree based hierachical multi-classifier',
       author='David Warshaw',
       author_email='david.warshaw@gmail.com',
-      py_modules=['hmc', 'datasets'],
+      py_modules=['hmc', 'datasets', 'metrics'],
       requires=['sklearn', 'numpy', 'pandas'])
diff --git a/tests/__init__.py b/tests/__init__.py
index 85c9014..d5e79d9 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -1,2 +1,3 @@
 from .test_hmc import TestClassHierarchy
 from .test_hmc import TestDecisionTreeHierarchicalClassifier
+from .test_metrics import TestMetrics
diff --git a/tests/test_hmc.py b/tests/test_hmc.py
index df6e190..bf2406b 100644
--- a/tests/test_hmc.py
+++ b/tests/test_hmc.py
@@ -29,6 +29,16 @@ def test_get_children(self):
         ch = hmc.load_shades_class_hierachy()
         self.assertEqual(ch._get_children('dark'), ['black', 'gray'])
 
+    def test_get_ancestors(self):
+        ch = hmc.load_shades_class_hierachy()
+        self.assertEqual(ch._get_ancestors('ash'), ['gray', 'dark'])
+        self.assertEqual(len(ch._get_ancestors('colors')), 0)
+
+    def test_get_descendants(self):
+        ch = hmc.load_shades_class_hierachy()
+        self.assertEqual(ch._get_descendants('dark'), ['black', 'gray', 'ash', 'slate'])
+        self.assertEqual(len(ch._get_descendants('slate')), 0)
+
     def test_add_node(self):
         ch = hmc.load_shades_class_hierachy()
         old_number = len(ch.nodes_())
@@ -111,18 +121,6 @@ def test_score(self):
         # Hierachical classification should be at least as accurate as traditional classification
         self.assertTrue(accuracy >= accuracy_nonh)
 
-    def test_score_adjusted(self):
-        ch = hmc.load_shades_class_hierachy()
-        X, y = hmc.load_shades_data()
-        X_train, X_test, y_train, y_test = train_test_split(X, y,
-            test_size = 0.50, random_state = 0)
-        dt = hmc.DecisionTreeHierarchicalClassifier(ch)
-        dt = dt.fit(X_train, y_train)
-        accuracy = dt.score(X_test, y_test)
-        accuracy_adjusted = dt.score_adjusted(X_test, y_test)
-        # Adjusted accuracy should be at least as high as final class accuracy
-        self.assertTrue(accuracy_adjusted >= accuracy)
-
     def test_score_before_fit(self):
         ch = hmc.load_shades_class_hierachy()
         X, y = hmc.load_shades_data()
diff --git a/tests/test_metrics.py b/tests/test_metrics.py
new file mode 100644
index 0000000..2e05205
--- /dev/null
+++ b/tests/test_metrics.py
@@ -0,0 +1,79 @@
+"""
+Tests for the hmc metrics module.
+"""
+
+import unittest
+
+import pandas as pd
+
+from sklearn import tree
+from sklearn.cross_validation import train_test_split
+from sklearn import metrics as skmetrics
+
+import hmc
+import hmc.metrics as metrics
+
+class TestMetrics(unittest.TestCase):
+
+    def setUp(self):
+        self.ch = hmc.load_shades_class_hierachy()
+        self.X, self.y = hmc.load_shades_data()
+        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(self.X, self.y,
+            test_size=0.50, random_state=0)
+        self.dt = hmc.DecisionTreeHierarchicalClassifier(self.ch)
+        self.dt_nonh = tree.DecisionTreeClassifier()
+        self.dt = self.dt.fit(self.X_train, self.y_train)
+        self.dt_nonh = self.dt_nonh.fit(self.X_train, self.y_train)
+        self.y_pred = self.dt.predict(self.X_test)
+        self.y_pred_nonh = self.dt_nonh.predict(self.X_test)
+
+    ## General Scores
+    # Average accuracy
+    def test_accuracy_score(self):
+        accuracy = metrics.accuracy_score(self.ch, self.y_test, self.y_pred)
+        accuracy_sk = skmetrics.accuracy_score(self.y_test, self.y_pred)
+        # Hierachical classification should be at least as accurate as traditional classification
+        self.assertTrue(accuracy >= accuracy_sk)
+
+    ## Hierarchy Precision / Recall
+    # Ancestors Scores (Super Class)
+    # Precision
+    def test_precision_score_ancestors(self):
+        precision_ancestors = metrics.precision_score_ancestors(self.ch, self.y_test, self.y_pred)
+        precision_sk = skmetrics.precision_score(self.y_test, self.y_pred, average="macro")
+        self.assertTrue(precision_ancestors >= precision_sk)
+
+    # Recall
+    def test_recall_score_ancestors(self):
+        recall_ancestors = metrics.recall_score_ancestors(self.ch, self.y_test, self.y_pred)
+        recall_sk = skmetrics.recall_score(self.y_test, self.y_pred, average="macro")
+        self.assertTrue(recall_ancestors >= recall_sk)
+
+    # Descendants Scores (Sub Class)
+    # Precision
+    def test_precision_score_descendants(self):
+        precision_descendants = metrics.precision_score_descendants(self.ch, self.y_test, self.y_pred)
+        precision_sk = skmetrics.precision_score(self.y_test, self.y_pred, average="macro")
+        self.assertTrue(precision_descendants >= precision_sk)
+
+    # Recall
+    def test_recall_score_descendants(self):
+        recall_descendants = metrics.recall_score_descendants(self.ch, self.y_test, self.y_pred)
+        recall_sk = skmetrics.recall_score(self.y_test, self.y_pred, average="macro")
+        self.assertTrue(recall_descendants >= recall_sk)
+
+    # F1
+    # Ancestors
+    def test_f1_score_ancestors(self):
+        f1_ancestors = metrics.f1_score_ancestors(self.ch, self.y_test, self.y_pred)
+        f1_sk = skmetrics.f1_score(self.y_test, self.y_pred, average="macro")
+        self.assertTrue(f1_ancestors >= f1_sk)
+
+    # Descendants
+    def test_f1_score_descendants(self):
+        f1_descendants = metrics.f1_score_descendants(self.ch, self.y_test, self.y_pred)
+        f1_sk = skmetrics.f1_score(self.y_test, self.y_pred, average="macro")
+        self.assertTrue(f1_descendants >= f1_sk)
+
+if __name__ == '__main__':
+    unittest.main()