diff --git a/assignments/assignment_2/assign_2_template.ipynb b/assignments/assignment_2/assign_2_template.ipynb index f9e4b42..24e0741 100644 --- a/assignments/assignment_2/assign_2_template.ipynb +++ b/assignments/assignment_2/assign_2_template.ipynb @@ -7,7 +7,7 @@ "source": [ "## License \n", "\n", - "Copyright 2021 Patrick Hall (jphall@gwu.edu)\n", + "Copyright 2021--2023 Patrick Hall (jphall@gwu.edu)\n", "\n", "Licensed under the Apache License, Version 2.0 (the \"License\");\n", "you may not use this file except in compliance with the License.\n", @@ -923,13 +923,7 @@ "Grid search run 19/50:\n", "Training with parameters: {'booster': 'gbtree', 'eval_metric': 'auc', 'nthread': 4, 'objective': 'binary:logistic', 'seed': 12345, 'colsample_bytree': 0.5, 'colsample_bylevel': 0.9, 'eta': 0.05, 'max_depth': 7, 'reg_alpha': 0.005, 'reg_lambda': 0.0005, 'subsample': 0.3, 'min_child_weight': 5, 'gamma': 0.2, 'monotone_constraints': (1, 1, 1, -1, 1, 1, -1, -1, -1, 1)}\n", "---------- ----------\n", - "Grid search run 20/50:\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ + "Grid search run 20/50:\n", "Training with parameters: {'booster': 'gbtree', 'eval_metric': 'auc', 'nthread': 4, 'objective': 'binary:logistic', 'seed': 12345, 'colsample_bytree': 0.9, 'colsample_bylevel': 0.3, 'eta': 0.05, 'max_depth': 3, 'reg_alpha': 0.05, 'reg_lambda': 0.0005, 'subsample': 0.7, 'min_child_weight': 10, 'gamma': 0.4, 'monotone_constraints': (1, 1, 1, -1, 1, 1, -1, -1, -1, 1)}\n", "---------- ----------\n", "Grid search run 21/50:\n", @@ -2422,7 +2416,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -2436,7 +2430,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.9" + "version": "3.10.6" } }, "nbformat": 4, diff --git a/assignments/tex/assignment_1.pdf b/assignments/tex/assignment_1.pdf index 479b801..a786fc6 100644 Binary files a/assignments/tex/assignment_1.pdf and b/assignments/tex/assignment_1.pdf differ diff --git a/assignments/tex/assignment_1.tex b/assignments/tex/assignment_1.tex index 0589aae..350a8a6 100644 --- a/assignments/tex/assignment_1.tex +++ b/assignments/tex/assignment_1.tex @@ -3,7 +3,6 @@ \documentclass[fleqn]{article} \renewcommand\refname{} \title{Responsible Machine Learning\\\Large{Assignment 1}\\\Large{10 points}} -\author{\copyright Patrick Hall 2021--2023} \usepackage{graphicx} \usepackage{fullpage} diff --git a/assignments/tex/assignment_2.pdf b/assignments/tex/assignment_2.pdf index 4d7b7a0..b34bda5 100644 Binary files a/assignments/tex/assignment_2.pdf and b/assignments/tex/assignment_2.pdf differ diff --git a/assignments/tex/assignment_2.tex b/assignments/tex/assignment_2.tex index f2e1af6..481deda 100644 --- a/assignments/tex/assignment_2.tex +++ b/assignments/tex/assignment_2.tex @@ -3,7 +3,6 @@ \documentclass[fleqn]{article} \renewcommand\refname{} \title{Responsible Machine Learning\\\Large{Assignment 2}\\\Large{10 points}} -\author{\copyright Patrick Hall 2022} \usepackage{graphicx} \usepackage{fullpage} @@ -33,7 +32,9 @@ \maketitle -\noindent In Assignment 2, you will work with your group to analyze and explain interpretable machine learning (ML) models following the instructions below. A \href{https://nbviewer.jupyter.org/github/jphall663/GWU_rml/blob/master/assignments/assignment_2/assign_2_template.ipynb?flush_cache=true}{template} has been provided as an example of how to explain and compare a few different interpretable models. For those of you who use Python virtual environments, a basic \href{https://github.com/jphall663/GWU_rml/blob/master/assignments/requirements.txt}{\texttt{requirements.txt}} file is also available for the template.\\ +\noindent In Assignment 2, you will work with your group to analyze and explain machine learning (ML) models from Assignment 1 following the instructions below. A \href{https://nbviewer.jupyter.org/github/jphall663/GWU_rml/blob/master/assignments/assignment_2/assign_2_template.ipynb?flush_cache=true}{template} has been provided as an example of how to explain and compare a few different models. For those of you who use Python virtual environments, a basic \href{https://github.com/jphall663/GWU_rml/blob/master/assignments/requirements.txt}{\texttt{requirements.txt}} file is also available for the template.\\ + +\noindent Also, note that \href{https://github.com/SelfExplainML/PiML-Toolbox}{PiML} makes it easy to calculate global feature importance, local feature importance, and plot feature behavior--i.e., to fullfil the requirements for this assignment--but may take some custom coding for Section \ref{local_fi}.\\ \noindent For each section below, you should be trying to think through whether the explanatory results make sense from a domain knowledge perspective, as well as the differences between your models and if those are logical and/or informative.\\ @@ -43,7 +44,7 @@ \section{Calculate and Plot Global Feature Importance.}\label{global_fi} Use regression coefficients, Shapley values, or other reputable techniques to calculate global feature importance for your models. The template uses coefficients from the elastic net GLM -- extracted in cell 12, Shapley values for the monotonic GBM -- calculated in cell 19--20, and local feature scores for the EBM -- extracted in cells 27--28, to create global feature importance. (Depending on your package version, \texttt{interpret} can calculate these quantities much more directly using the \texttt{predict\_and\_explain()} function.) Plot these values as bar charts, ideally comparing how your models treat input features differently, as in cell 30 of the template.\\ -\section{Calculate and Plot Local Feature Importance.} +\section{Calculate and Plot Local Feature Importance.}\label{local_fi} Using approaches similar to those in Section \ref{global_fi}, calculate local feature importance for your models at three percentiles of predicted probability. Cell 10 of the template provides a simple function for calculating percentiles, and percentiles for each models' predictions are found in cells 11, 18, and 26.\\ @@ -57,7 +58,7 @@ \section{Submit Code Results.} Your deliverable for this assignment is to update your group's GitHub repository to reflect this explanatory analysis. Feature importance plots for all models and features are worth 3 points for global and 3 points for local. Feature behavior plots for all models and features are worth 4 pts., for a total of 10 pts.\\ -\noindent \textbf{Your deliverables are due Wednesday, June 1\textsuperscript{st}, at 11:59:59 PM ET.}\\ +\noindent \textbf{Your deliverables are due Wednesday, June 7\textsuperscript{th}, at 11:59:59 PM ET.}\\ \noindent Note that you may also improve Assignment 1 scores throughout the Summer I Session to improve your ranking, your Assignment 1 grade, and your final project grade. diff --git a/lecture_2.ipynb b/lecture_2.ipynb index 76a738a..691b285 100644 --- a/lecture_2.ipynb +++ b/lecture_2.ipynb @@ -6,7 +6,7 @@ "source": [ "## License \n", "\n", - "Copyright 2020 Patrick Hall (jphall@gwu.edu)\n", + "Copyright 2020--2023 Patrick Hall (jphall@gwu.edu)\n", "\n", "Licensed under the Apache License, Version 2.0 (the \"License\");\n", "you may not use this file except in compliance with the License.\n", @@ -3415,7 +3415,7 @@ "metadata": { "anaconda-cloud": {}, "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -3429,7 +3429,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.9" + "version": "3.10.6" } }, "nbformat": 4, diff --git a/tex/lecture_2.bib b/tex/lecture_2.bib index f965231..8791496 100644 --- a/tex/lecture_2.bib +++ b/tex/lecture_2.bib @@ -506,4 +506,13 @@ @misc{h2o_mli_booklet Year = {2017}, Title = {Machine Learning Interpretability with H2O Driverless AI}, Url = {http://docs.h2o.ai/driverless-ai/latest-stable/docs/booklets/MLIBooklet.pdf}, - Institution = {H2O.ai}} \ No newline at end of file + Institution = {H2O.ai}} + +@inproceedings{mothilal2020explaining, + title={Explaining {M}achine {L}earning {C}lassifiers {T}hrough {D}iverse {C}ounterfactual {E}xplanations}, + author={Mothilal, Ramaravind K and Sharma, Amit and Tan, Chenhao}, + booktitle={Proceedings of the 2020 conference on fairness, accountability, and transparency}, + pages={607--617}, + year={2020}, + note={URL: \url{https://dl.acm.org/doi/pdf/10.1145/3351095.3372850}} +} \ No newline at end of file diff --git a/tex/lecture_2.pdf b/tex/lecture_2.pdf index 4c32a4a..d119e9c 100644 Binary files a/tex/lecture_2.pdf and b/tex/lecture_2.pdf differ diff --git a/tex/lecture_2.tex b/tex/lecture_2.tex index 28152ff..a03d4a0 100644 --- a/tex/lecture_2.tex +++ b/tex/lecture_2.tex @@ -166,6 +166,8 @@ \item Partial dependence (\citet{esl}) \item Individual conditional expectation (ICE) (\citet{ice_plots}) \end{itemize} + \item \textbf{Counterfactual explanations} (\citet{mothilal2020explaining}) + \item \textbf{Example-based explanations} (\citet{molnar}) \end{itemize}\normalsize \end{frame}