update lecture 5 for 2023

jphall663 · Jun 18, 2023 · c2cee2e · c2cee2e
1 parent 05ee5e5
commit c2cee2e
Show file tree

Hide file tree

Showing 10 changed files with 5,581 additions and 1,132 deletions.
diff --git a/assignments/assignment_3/assign_3_template.ipynb b/assignments/assignment_3/assign_3_template.ipynb
diff --git a/assignments/data/scores/assignment_3.pdf b/assignments/data/scores/assignment_3.pdf
diff --git a/assignments/final150.ipynb b/assignments/final150.ipynb
diff --git a/assignments/tex/assignment_5.pdf b/assignments/tex/assignment_5.pdf
diff --git a/assignments/tex/assignment_5.tex b/assignments/tex/assignment_5.tex
@@ -3,7 +3,6 @@
 \documentclass[fleqn]{article}
 \renewcommand\refname{}
 \title{Responsible Machine Learning\\\Large{Assignment 5}\\\Large{10 points}}
-\author{\copyright Patrick Hall 2021}
 
 \usepackage{graphicx}
 \usepackage{fullpage}
@@ -43,15 +42,15 @@ \section{Test How this Lending Model Performs in Recession Conditions.}
 
 \section{Conduct Residual Analysis and Remediate Discovered Bugs.}
 
-Cells 9--15 use a basic residual analysis procedure to find outliers and identify a fundamental problem with our data and EBM model. Once these problems are identified, you should be able to increase your model performance by accounting for them.\\
+Cells 9--15 use a basic residual analysis procedure to find outliers and identify a fundamental problem with our data and EBM model. Once these problems are identified, you should be able to increase your model performance by accounting for them. You may use the template approach and/or additional approaches to improve your model. \\
 
 \section{Submit Code Results.}
 
-Your deliverable for this assignment is to update your group's GitHub repository to reflect this debugging exercise. Stress-testing is worth 5 points. Remediating your model by removing outliers and handling data imbalance, to increase validation AUC, is worth 5 points.\\
+Your deliverable for this assignment is to update your group's GitHub repository to reflect this debugging exercise. Stress-testing is worth 5 points. Remediating your model by removing outliers and handling data imbalance, to increase validation AUC, is worth up to 5 points. Groups with better debugging will receive higher scores. \\
 
-\noindent \textbf{Your deliverables are due Sunday, July 3\textsuperscript{rd}, at 11:00 AM ET.}\\
+\noindent \textbf{Your deliverables are due Wednesday, June 28\textsuperscript{th}, at 11:59 PM ET.}\\
 
-\noindent Note that you may also improve Assignment 1 or Assignment 3 scores throughout the Summer I Session to improve your ranking, your Assignment 1 grade, your Assignment 3 grade, and your final project grade.
+\noindent Note that you may also improve Assignment 1 or Assignment 3 scores throughout the Summer I Session to improve your ranking, your Assignment 1 grade, your Assignment 3 grade, and your final project grade. \textbf{(HINT: If you perform Assignment 5 correctly, it should allow you to boost your model performance in Assignments 1  and/or 3!)}
 
 \end{document}
 
diff --git a/img/de.png b/img/de.png
diff --git a/img/robust.png b/img/robust.png
diff --git a/img/sa_max_prob.png b/img/sa_max_prob.png
diff --git a/tex/lecture_5.pdf b/tex/lecture_5.pdf
diff --git a/tex/lecture_5.tex b/tex/lecture_5.tex
@@ -64,9 +64,9 @@
 		\frametitle{What is Model Debugging?}
 
 		\begin{itemize}
-			\item Model debugging is an emergent discipline focused on discovering and remediating errors in the internal mechanisms and outputs of machine learning models.\footnote{\tiny{See \url{https://debug-ml-iclr2019.github.io/} for numerous model debugging approaches.}} 
+			\item Model debugging is an emergent discipline focused on discovering and remediating errors in the internal mechanisms and outputs of machine learning models.\footnote{\tiny{See \url{https://debug-ml-iclr2019.github.io/} for numerous examples of model debugging approaches.}} 
 			\item Model debugging attempts to test machine learning models like software (because the models are software).
-			\item Model debugging is similar to regression diagnostics, but for machine learning models.
+			\item Model debugging is similar to model validation and regression diagnostics, but for machine learning models.
 			\item Model debugging \textbf{promotes trust directly} and \textbf{enhances interpretability as a side-effect}.
 		\end{itemize}
 
@@ -102,7 +102,7 @@
     				\end{center}
   			\end{figure}
 			\vspace{-10pt}
-\centering{\scriptsize{\textbf{AI incidents}: The Partnership on AI Incident Database contains over 1,200 incident reports.\footnote{\tiny{See \url{https://incidentdatabase.ai/} to access the database.}}}}
+\centering{\scriptsize{\textbf{AI incidents}: The AI Incident Database contains over 2,000 incident reports.\footnote{\tiny{See \url{https://incidentdatabase.ai/} to access the database.}}}}
 
 		\end{frame}
 
@@ -270,13 +270,21 @@
 			\begin{frame}[t, allowframebreaks]
 				\vspace{-10pt}
 				\frametitle{\textbf{Sensitivity Analysis}: Search for Adversarial Examples}
-				\begin{figure}
-					\begin{center}
-						\includegraphics[height=130pt]{../img/sa_max_prob.png}
-					\end{center}
-				\end{figure}
+
+				\begin{columns}		
+
+				\column{0.5\linewidth}		
+
+					\begin{figure}
+						\begin{center}
+							\includegraphics[height=165pt]{../img/sa_max_prob.png}
+						\end{center}
+					\end{figure}
 
-				\tiny{Adversary search confirms multiple avenues of attack and exposes a potential flaw in $g_{\text{mono}}$ inductive logic: default is predicted for customer's who make payments above their credit limit. (Try heuristics, evolutionary learning or packages like \href{https://github.com/tensorflow/cleverhans}{cleverhans} to generate adversarial examples.)}
+				\column{0.5\linewidth}	
+					\small{Adversary search confirms multiple avenues of attack and exposes a potential flaw in $g_{\text{mono}}$ inductive logic: default is predicted for customer's who make payments above their credit limit. (Try heuristics, evolutionary learning or packages like \href{https://github.com/tensorflow/cleverhans}{cleverhans} to generate adversarial examples.)}
+
+				\end{columns}
 
 				%\framebreak
 				%\vspace{-5pt}
@@ -290,6 +298,32 @@
 
 			\end{frame}			
 
+			\begin{frame}[t]
+
+				\frametitle{\textbf{Sensitivity Analysis}: Robustness to Drift}
+
+				\begin{columns}[t]		
+
+					\column{0.5\linewidth}
+						\begin{figure}
+							\begin{center}
+								\includegraphics[height=125pt]{../img/robust.png}
+								\caption{$g_{mono}$ accuracy under feature perturbation.}		
+							\end{center}
+						\end{figure}
+
+					\column{0.5\linewidth}
+						\begin{itemize}\small
+							\item Models must be robust to data drift once deployed.
+							\item Simulation, perturbation, and statistics like population stability index (PSI), \textit{t}, and Kolmogorov-Smirnov (K-S) can help assess robustness.
+							\item Drift can also be measured on a feature-by-feature basis across data partitions.
+							\item Likely due to monotonicity contraints $g_{mono}$ holds up well to moderate data perturbation. 
+						\end{itemize}
+
+				\end{columns}
+
+			\end{frame}
+
 			\begin{frame}[t]
 
 				%When you don't know what to test
@@ -298,14 +332,15 @@
 				\vspace{-15pt}
 				\begin{figure}
 					\begin{center}
-						\includegraphics[height=130pt]{../img/ra.png}
+						\includegraphics[height=115pt]{../img/ra.png}
 					\end{center}
 				\end{figure}	
 				\vspace{-10pt}
 				\begin{itemize}\scriptsize
 					\item In general, random attacks are a viable method to identify software bugs in machine learning pipelines. \textbf{(Start here if you don't know where to start.)}
 					\item Random data can apparently elicit all probabilities $\in [0, 1]$ from $g_{\text{mono}}$.
 					\item Around the decision threshold, lower probabilities can be attained simply by injecting missing values, yet another vulnerability to adversarial attack.
+					\item Chaos testing is a broader approach that can also elicit unexpected approaches from machine learning systems.
 				\end{itemize}
 				\normalsize
 
@@ -336,24 +371,54 @@
 
 			\begin{frame}[t]
 
-				\frametitle{\textbf{Residual Analysis}: Disparate Accuracy and Errors}
+				\frametitle{\textbf{Residual Analysis}: Segmented Error Analysis}
 
-		                 \vspace{-10pt}
+		        \vspace{-10pt}
 				\begin{figure}
 					\begin{center}
 						\includegraphics[height=140pt]{../img/de.png}
 					\end{center}
 				\end{figure}
-				\vspace{-15pt}
-				\tiny For $\text{PAY\_0}$:
-				\begin{itemize}
+				%\vspace{-15pt}
+				\begin{itemize}\tiny
 					\item Notable change in accuracy and error characteristics for $\text{PAY\_0} \geq 2$. 
+					\item For $\text{SEX}$, accuracy and error characteristics vary little across individuals represented in the training data. Bias mitigation should be confirmed by more involved bias testing.
+					\item Overfitting, stability and other characteristics should also be analyzed by segment.
 					\item Varying performance across segments can be an indication of underspecification. 
 				\end{itemize}
-				For $\text{SEX}$, accuracy and error characteristics vary little across individuals represented in the training data. Non-discrimination should be confirmed by more involved disparate impact analysis.
 
 			\end{frame}
 
+			\begin{frame}[t]
+
+				\frametitle{\textbf{Residual Analysis}: Plotting Residuals}
+
+				\begin{columns}
+
+					\column{0.5\linewidth}
+						\begin{figure}
+							\begin{center}
+								\includegraphics[height=140pt]{../img/lecture_5.png}
+								\caption{Residuals plotted by $\text{PAY\_0}$ reveal a serious problem with $g_{\text{mono}}$.}
+							\end{center}
+						\end{figure}
+
+					\column{0.5\linewidth}
+						\begin{itemize}
+							\item Plotting residuals is a battle-tested model debugging technique. 
+							\item Residuals can be plotted using many approaches: 
+							\begin{itemize}
+								\item Overall, by feature (at left) or by segment
+								\item Traditional ($\hat{y}^{(i)} - y^{(i)}$)
+								\item Deviance or loss residuals (at left)
+							\end{itemize}
+							\item Residuals can reveal serious issues and the underlying problems behind them. 
+						\end{itemize}
+
+				\end{columns}				
+
+			\end{frame}			
+
 			\begin{frame}
 
 				\frametitle{\textbf{Residual Analysis}: Local Contributions to Logloss}