diff --git a/img/benchmark.png b/img/benchmark.png new file mode 100644 index 0000000..9cbec5d Binary files /dev/null and b/img/benchmark.png differ diff --git a/img/blueprint.png b/img/blueprint.png new file mode 100644 index 0000000..3eb32f9 Binary files /dev/null and b/img/blueprint.png differ diff --git a/img/de.png b/img/de.png new file mode 100644 index 0000000..8d22e6e Binary files /dev/null and b/img/de.png differ diff --git a/img/di.png b/img/di.png new file mode 100644 index 0000000..0c2ba75 Binary files /dev/null and b/img/di.png differ diff --git a/img/figs.pptx b/img/figs.pptx new file mode 100644 index 0000000..b3a7ce8 Binary files /dev/null and b/img/figs.pptx differ diff --git a/img/global_high_low.png b/img/global_high_low.png new file mode 100644 index 0000000..20b2ff8 Binary files /dev/null and b/img/global_high_low.png differ diff --git a/img/global_pred_loss.png b/img/global_pred_loss.png new file mode 100644 index 0000000..333cbda Binary files /dev/null and b/img/global_pred_loss.png differ diff --git a/img/h2o_logo.png b/img/h2o_logo.png new file mode 100644 index 0000000..2e4b5d6 Binary files /dev/null and b/img/h2o_logo.png differ diff --git a/img/h2o_world_logo.png b/img/h2o_world_logo.png new file mode 100644 index 0000000..609ecdd Binary files /dev/null and b/img/h2o_world_logo.png differ diff --git a/img/local.png b/img/local.png new file mode 100644 index 0000000..af91250 Binary files /dev/null and b/img/local.png differ diff --git a/img/md.png b/img/md.png new file mode 100644 index 0000000..241103e Binary files /dev/null and b/img/md.png differ diff --git a/img/pd.png b/img/pd.png new file mode 100644 index 0000000..2447c11 Binary files /dev/null and b/img/pd.png differ diff --git a/img/pr_auc.png b/img/pr_auc.png new file mode 100644 index 0000000..6f4f4c8 Binary files /dev/null and b/img/pr_auc.png differ diff --git a/img/ra.png b/img/ra.png new file mode 100644 index 0000000..08983a0 Binary files /dev/null and b/img/ra.png differ diff --git a/img/sa_max_prob.png b/img/sa_max_prob.png new file mode 100644 index 0000000..2aef77d Binary files /dev/null and b/img/sa_max_prob.png differ diff --git a/img/sa_max_prob_demo.png b/img/sa_max_prob_demo.png new file mode 100644 index 0000000..7e57b51 Binary files /dev/null and b/img/sa_max_prob_demo.png differ diff --git a/img/surrogate_dt_1.png b/img/surrogate_dt_1.png new file mode 100644 index 0000000..2393e2d Binary files /dev/null and b/img/surrogate_dt_1.png differ diff --git a/img/trust_understanding.png b/img/trust_understanding.png new file mode 100644 index 0000000..b599d4c Binary files /dev/null and b/img/trust_understanding.png differ diff --git a/tex/lecture_5.bib b/tex/lecture_5.bib new file mode 100644 index 0000000..52d1a7e --- /dev/null +++ b/tex/lecture_5.bib @@ -0,0 +1,435 @@ +@article{fair_washing, + title = {Fairwashing: the {R}isk of {R}ationalization}, + author = {A{\"\i}vodji, Ulrich and Arai, Hiromi and Fortineau, Olivier and Gambs, S{\'e}bastien and Hara, Satoshi and Tapp, Alain}, + journal = {arXiv preprint arXiv:1901.09749}, + year = {2019}, + note = {URL: \url{https://arxiv.org/pdf/1901.09749.pdf}}} + +@inproceedings{amershi2015modeltracker, + title={Modeltracker: {R}edesigning {P}erformance {A}nalysis {T}ools for {M}achine {L}earning}, + author={Amershi, Saleema and Chickering, Max and Drucker, Steven M and Lee, Bongshin and Simard, Patrice and Suh, Jina}, + booktitle={Proceedings of the 33rd Annual ACM Conference on Human Factors in Computing Systems}, + pages={337--346}, + year={2015}, + organization={ACM}, + note={URL: \url{https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/amershi.CHI2015.ModelTracker.pdf}}} + +@article{angwin16, + Author = {Julia Angwin and Jeff Larson and Surya Mattu and Lauren Kirchner}, + Journal = {ProPublica}, + Title = {{M}achine {B}ias: {T}here's {S}oftware {U}sed {A}cross the {C}ountry to {P}redict {F}uture {C}riminals. {A}nd {I}t's {B}iased {A}gainst {B}lacks.}, + note = {URL: \url{https://www.propublica.org/article/machine-bias-risk-assessments-in-criminal-sentencing}}, + Year = {2016}} + +@article{ale_plot, + title = {Visualizing the {E}ffects of {P}redictor {V}ariables in {B}lack {B}ox {S}upervised {L}earning {M}odels}, + author = {Apley, Daniel W.}, + journal = {arXiv preprint arXiv:1612.08468}, + year = {2016}, + note = {URL: \url{https://arxiv.org/pdf/1612.08468.pdf}}} + +@book{barocas-hardt-narayanan, + title = {Fairness and Machine Learning}, + author = {Solon Barocas and Moritz Hardt and Arvind Narayanan}, + publisher = {fairmlbook.org}, + note = {URL: \url{http://www.fairmlbook.org}}, + year = {2018}} + +@article{security_of_ml, + title={The {S}ecurity of {M}achine {L}earning}, + author={Barreno, Marco and Nelson, Blaine and Joseph, Anthony D and Tygar, J Doug}, + journal={Machine Learning}, + volume={81}, + number={2}, + pages={121--148}, + year={2010}, + publisher={Springer}, + note={URL: \url{https://people.eecs.berkeley.edu/~adj/publications/paper-files/SecML-MLJ2010.pdf}}} + +@article{dt_surrogate2, + title = {Interpreting {B}lackbox {M}odels via {M}odel {E}xtraction}, + author ={Osbert Bastani and Carolyn Kim and Hamsa Bastani}, + journal = {arXiv preprint arXiv:1705.08504}, + note = {URL: \url{https://arxiv.org/pdf/1705.08504.pdf}}, + year = {2017}} + +@inproceedings{viper, + title={Verifiable {R}einforcement {L}earning {V}ia {P}olicy {E}xtraction}, + author={Bastani, Osbert and Pu, Yewen and Solar-Lezama, Armando}, + booktitle={Advances in Neural Information Processing Systems}, + pages={2494--2504}, + year={2018}, + note={URL: \url{http://papers.nips.cc/paper/7516-verifiable-reinforcement-learning-via-policy-extraction.pdf}}} + +@article{calders2010three, + title={Three {N}a\"{i}ve {B}ayes {A}pproaches for {D}iscrimination-free {C}lassification}, + author={Calders, Toon and Verwer, Sicco}, + journal={Data Mining and Knowledge Discovery}, + volume={21}, + number={2}, + pages={277--292}, + year={2010}, + publisher={Springer}, + note={URL: \url{https://link.springer.com/content/pdf/10.1007/s10618-010-0190-x.pdf}}} + +@inproceedings{calmon2017optimized, + title={Optimized {P}re-processing for {D}iscrimination {P}revention}, + author={Calmon, Flavio and Wei, Dennis and Vinzamuri, Bhanukiran and Ramamurthy, Karthikeyan Natesan and Varshney, Kush R.}, + booktitle={Advances in Neural Information Processing Systems}, + pages={3992--4001}, + year={2017}, + note={URL: \url{http://papers.nips.cc/paper/6988-optimized-pre-processing-for-discrimination-prevention.pdf}}} + + +@article{dt_surrogate1, + Author = {Mark W. Craven and Jude W. Shavlik}, + Journal = {Advances in Neural Information Processing Systems}, + Title = {Extracting {T}ree-{S}tructured {R}epresentations of {T}rained {N}etworks}, + note={URL: \url{http://papers.nips.cc/paper/1152-extracting-tree-structured-representations-of-trained-networks.pdf}}, + Year = {1996}} + +@inproceedings{feldman2015certifying, + title={Certifying and {R}emoving {D}isparate {I}mpact}, + author={Feldman, Michael and Friedler, Sorelle A. and Moeller, John and Scheidegger, Carlos and Venkatasubramanian, Suresh}, + booktitle={Proceedings of the 21\textsuperscript{st} ACM SIGKDD International Conference on Knowledge Discovery and Data Mining}, + pages={259--268}, + year={2015}, + organization={ACM}, + note={URL: \url{https://arxiv.org/pdf/1412.3756.pdf}}} + +@article{been_kim1, + Author = {Finale Doshi-Velez and Been Kim}, + Title = {Towards a {R}igorous {S}cience of {I}nterpretable {M}achine {L}earning}, + Journal = {arXiv preprint arXiv:1702.08608}, + note = {URL: \url{https://arxiv.org/pdf/1702.08608.pdf}}, + Year = {2017}} + +@article{flores2016false, + title={False {P}ositives, {F}alse {N}egatives, and {F}alse {A}nalyses: {A} {R}ejoinder to {M}achine {B}ias: {T}here's {S}oftware {U}sed across the {C}ountry to {P}redict {F}uture {C}riminals. {A}nd {I}t's {B}iased against {B}lacks}, + author={Flores, Anthony W. and Bechtel, Kristin and Lowenkamp, Christopher T.}, + journal={Fed. Probation}, + volume={80}, + pages={38}, + year={2016}, + publisher={HeinOnline}, + note={URL: \url{https://bit.ly/2Gesf9Y}}} + +@article{friedler2019assessing, + title={{A}ssessing the {L}ocal {I}nterpretability of {M}achine {L}earning {M}odels}, + author={Friedler, Sorelle A. and Roy, Chitradeep Dutta and Scheidegger, Carlos and Slack, Dylan}, + journal={arXiv preprint arXiv:1902.03501}, + year={2019}, + note={URL: \url{https://arxiv.org/pdf/1902.03501.pdf}}} + +@book{esl, + Address = {New York}, + Author = {Jerome Friedman and Trevor Hastie and Robert Tibshirani}, + Booktitle = {\textit{The Elements of Statistical Learning}}, + Publisher = {Springer}, + Title = {\textbf{The Elements of Statistical Learning}}, + note = {URL: \url{https://web.stanford.edu/~hastie/ElemStatLearn/printings/ESLII\_print12.pdf}}, + Year = {2001}} + +@inproceedings{hardt2016equality, + title={Equality of {O}pportunity in {S}upervised {L}earning}, + author={Hardt, Moritz and Price, Eric and Srebro, Nati and others}, + booktitle={Advances in neural information processing systems}, + pages={3315--3323}, + year={2016}, + note={URL: \url{http://papers.nips.cc/paper/6374-equality-of-opportunity-in-supervised-learning.pdf}}} + +@inproceedings{hcml, + title={Human-{C}entred {M}achine {L}earning}, + author={Gillies, Marco and Fiebrink, Rebecca and Tanaka, Atau and Garcia, J{\'e}r{\'e}mie and Bevilacqua, Fr{\'e}d{\'e}ric and Heloir, Alexis and Nunnari, Fabrizio and Mackay, Wendy and Amershi, Saleema and Lee, Bongshin and others}, + booktitle={Proceedings of the 2016 CHI Conference Extended Abstracts on Human Factors in Computing Systems}, + pages={3558--3565}, + year={2016}, + organization={ACM}, + note={URL: \url{http://research.gold.ac.uk/16112/1/HCML2016.pdf}}} + +@article{gilpin2018explaining, + title = {Explaining {E}xplanations: {A}n {A}pproach to {E}valuating {I}nterpretability of {M}achine {L}earning}, + author = {Leilani H. Gilpin and David Bau and Ben Z. Yuan and Ayesha Bajwa and Michael Specter and Lalana Kagal}, + journal = {arXiv preprint arXiv:1806.00069}, + note = {URL: \url{https://arxiv.org/pdf/1806.00069.pdf}}, + year = {2018}} + +@article{ice_plots, + Author = {Alex Goldstein and Adam Kapelner and Justin Bleich and Emil Pitkin}, + Journal = {Journal of Computational and Graphical Statistics}, + Number = {1}, + Title = {Peeking {I}nside the {B}lack {B}ox: {V}isualizing {S}tatistical {L}earning with {P}lots of {I}ndividual {C}onditional {E}xpectation}, + Volume = {24}, + Year = {2015}, + note = {URL: \url{https://arxiv.org/pdf/1309.6392.pdf}}} + +@article{kamiran2012data, + title={Data {P}reprocessing {T}echniques for {C}lassification {W}ithout {D}iscrimination}, + author={Kamiran, Faisal and Calders, Toon}, + journal={Knowledge and Information Systems}, + volume={33}, + number={1}, + pages={1--33}, + year={2012}, + publisher={Springer}, + note={URL: \url{https://link.springer.com/content/pdf/10.1007/s10115-011-0463-8.pdf}}} + +@misc{gopinathan1998fraud, + title = {Fraud {D}etection using {P}redictive {M}odeling}, + author = {Gopinathan, Krishna M. and Biafore, Louis S. and Ferguson, William M. and Lazarus, Michael A. and Pathria, Anu K. and Jost, Allen}, + year = {1998}, + month = oct # "~6", + publisher = {Google Patents}, + note = {US Patent 5,819,226. URL: \url{https://patents.google.com/patent/US5819226A}}} + +@article{gosiewska2019safe, + title={S{A}{F}{E} {M}{L}: {S}urrogate {A}ssisted {F}eature {E}xtraction for {M}odel {L}earning}, + author={Gosiewska, Alicja and Gacek, Aleksandra and Lubon, Piotr and Biecek, Przemyslaw}, + journal={arXiv preprint arXiv:1902.11035}, + year={2019}, + note={URL: \url{https://arxiv.org/pdf/1902.11035v1.pdf}}} + +@article{guidotti2018survey, + title = {{A} {S}urvey of {M}ethods for {E}xplaining {B}lack {B}ox {M}odels}, + author = {Guidotti, Riccardo and Monreale, Anna and Ruggieri, Salvatore and Turini, Franco and Giannotti, Fosca and Pedreschi, Dino}, + journal = {ACM Computing Surveys (CSUR)}, + volume = {51}, + number = {5}, + pages = {93}, + year = {2018}, + publisher = {ACM}, + note = {URL: \url{https://arxiv.org/pdf/1802.01933.pdf}}} + +@inproceedings{art_and_sci, + title = {On the {A}rt and {S}cience of {M}achine {L}earning {E}xplanations}, + author = {Hall, Patrick}, + booktitle={JSM Proceedings, Statistical Computing Section}, + pages = {1781--1799}, + publisher = {American Statistical Association}, + note = {URL: \url{https://github.com/jphall663/jsm_2018_paper}}, + year = {2018}} + +@inproceedings{hardt2016equality, + title={Equality of {O}pportunity in {S}upervised {L}earning}, + author={Hardt, Moritz and Price, Eric and Srebro, Nati and others}, + booktitle={Advances in Neural Information Processing Systems}, + pages={3315--3323}, + year={2016}, + note={URL: \url{http://papers.nips.cc/paper/6374-equality-of-opportunity-in-supervised-learning.pdf}}} + +@article{lime-sup, + title = {Locally {I}nterpretable {M}odels and {E}ffects {B}ased on {S}upervised {P}artitioning ({LIME-SUP})}, + author = {Linwei Hu and Jie Chen and Vijayan N. Nair and Agus Sudjianto}, + journal = {arXiv preprint arXiv:1806.00663}, + note = {URL: \url{https://arxiv.org/ftp/arxiv/papers/1806/1806.00663.pdf}}, + year = {2018}} + +@misc{kangdebugging, + title={Debugging {M}achine {L}earning {M}odels via {M}odel {A}ssertions}, + author={Kang, Daniel and Raghavan, Deepti and Bailis, Peter and Zaharia, Matei}, + note={URL: \url{https://debug-ml-iclr2019.github.io/cameraready/DebugML-19_paper_27.pdf}}} + +@article{keinan2004fair, + title = {Fair {A}ttribution of {F}unctional {C}ontribution in {A}rtificial and {B}iological {N}etworks}, + author = {Keinan, Alon and Sandbank, Ben and Hilgetag, Claus C. and Meilijson, Isaac and Ruppin, Eytan}, + journal = {Neural Computation}, + volume = {16}, + number = {9}, + pages = {1887--1915}, + year = {2004}, + publisher = {MIT Press}, + note={URL: \url{http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.436.6801&rep=rep1&type=pdf}}} + +@misc{uci, + author = {M. Lichman}, + year = {2013}, + title = {{UCI} {M}achine {L}earning {R}epository}, + note = {URL: \url{http://archive.ics.uci.edu/ml}}, + institution = {University of California, Irvine, School of Information and Computer Sciences"}} + +@article{lipovetsky2001analysis, + title={Analysis of {R}egression in {G}ame {T}heory {A}pproach}, + author={Lipovetsky, Stan and Conklin, Michael}, + journal={Applied Stochastic Models in Business and Industry}, + volume={17}, + number={4}, + pages={319--330}, + year={2001}, + publisher={Wiley Online Library}} + +@article{lipton1, + title = {The {M}ythos of {M}odel {I}nterpretability}, + author = {Lipton, Zachary C.}, + journal = {arXiv preprint arXiv:1606.03490}, + year = {2016}, + note = {URL: \url{https://arxiv.org/pdf/1606.03490.pdf}}} + +@incollection{shapley, + title = {A {U}nified {A}pproach to {I}nterpreting {M}odel {P}redictions}, + author = {Lundberg, Scott M. and Lee, Su-In}, + booktitle = {Advances in Neural Information Processing Systems 30}, + editor = {I. Guyon and U. V. Luxburg and S. Bengio and H. Wallach and R. Fergus and S. Vishwanathan and R. Garnett}, + pages = {4765--4774}, + year = {2017}, + publisher = {Curran Associates, Inc.}, + note = {URL: \url{http://papers.nips.cc/paper/7062-a-unified-approach-to-interpreting-model-predictions.pdf}}} + +@incollection{tree_shap, + title = {Consistent {I}ndividualized {F}eature {A}ttribution for {T}ree {E}nsembles}, + author = {Lundberg, Scott M. and Erion, Gabriel G. and Lee, Su-In}, + booktitle = {Proceedings of the 2017 ICML Workshop on Human Interpretability in Machine Learning (WHI 2017)}, + pages = {15--21}, + editor = {Been Kim and Dmitry M. Malioutov and Kush R. Varshney and Adrian Weller}, + year = {2017}, + note = {URL: \url{https://openreview.net/pdf?id=ByTKSo-m-}}, + publisher = {ICML WHI 2017}, + organization = {ICML}} + +@book{molnar, + title = {\textbf{\textit{{I}nterpretable {M}achine {L}earning}}}, + author = {Christoph Molnar}, + publisher = {christophm.github.io}, + note = {URL: \url{https://christophm.github.io/interpretable-ml-book/}}, + year = {2018}} + +@article{molnar2019quantifying, + title={Quantifying {I}nterpretability of {A}rbitrary {M}achine {L}earning {M}odels {T}hrough {F}unctional {D}ecomposition}, + author={Molnar, Christoph and Casalicchio, Giuseppe and Bischl, Bernd}, + journal={arXiv preprint arXiv:1904.03867}, + year={2019}, + note={URL: \url{https://arxiv.org/pdf/1904.03867.pdf}}} + +@article{murdoch2019interpretable, + title={Interpretable {M}achine {L}earning: {D}efinitions, {M}ethods, and {A}pplications}, + author={Murdoch, W. James and Singh, Chandan and Kumbier, Karl and Abbasi-Asl, Reza and Yu, Bin}, + journal={arXiv preprint arXiv:1901.04592}, + year={2019}, + note={URL: \url{https://arxiv.org/pdf/1901.04592.pdf}}} + +@inproceedings{papernot2018marauder, + title={A {M}arauder's {M}ap of {S}ecurity and {P}rivacy in {M}achine {L}earning: {A}n overview of current and future research directions for making machine learning secure and private}, + author={Papernot, Nicolas}, + booktitle={Proceedings of the 11th ACM Workshop on Artificial Intelligence and Security}, + year={2018}, + organization={ACM}, + note={URL: \url{https://arxiv.org/pdf/1811.01134.pdf}}} + +@article{pate, + title={Scalable {P}rivate {L}earning with {P}{A}{T}{E}}, + author={Papernot, Nicolas and Song, Shuang and Mironov, Ilya and Raghunathan, Ananth and Talwar, Kunal and Erlingsson, {\'U}lfar}, + journal={arXiv preprint arXiv:1802.08908}, + year={2018}, + note={URL: \url{https://arxiv.org/pdf/1802.08908.pdf}}} + +@inproceedings{lime, + title = {Why {S}hould {I} {T}rust {Y}ou?: {E}xplaining the {P}redictions of {A}ny {C}lassifier}, + author = {Marco Tulio Ribeiro and Sameer Singh and Carlos Guestrin}, + booktitle = {Proceedings of the 22nd ACM SIGKDD International Conference on Knowledge Discovery and Data Mining}, + pages = {1135--1144}, + year = {2016}, + note = {URL: \url{http://www.kdd.org/kdd2016/papers/files/rfp0573-ribeiroA.pdf}}, + organization = {ACM}} + +@article{please_stop, + title = {Please {S}top {E}xplaining {B}lack {B}ox {M}odels for {H}igh {S}takes {D}ecisions}, + author = {Rudin, Cynthia}, + note = {URL: \url{https://arxiv.org/pdf/1811.10154.pdf}}, + journal = {arXiv preprint arXiv:1811.10154}, + year = {2018}} + +@book{shapley1988shapley, + title = {\textit{The Shapley value: {E}ssays in honor of {L}loyd {S}. {S}hapley}}, + author = {Shapley, Lloyd S. and Roth, Alvin E. and others}, + year = {1988}, + publisher = {Cambridge University Press}, + note = {URL: \url{http://www.library.fa.ru/files/Roth2.pdf}}} + +@inproceedings{membership_inference, + title={Membership {I}nference {A}ttacks {A}gainst {M}achine {L}earning {M}odels}, + author={Shokri, Reza and Stronati, Marco and Song, Congzheng and Shmatikov, Vitaly}, + booktitle={2017 IEEE Symposium on Security and Privacy (SP)}, + pages={3--18}, + year={2017}, + organization={IEEE}, + note={URL: \url{https://arxiv.org/pdf/1610.05820.pdf}}} + +@article{shokri2019privacy, + title={Privacy {R}isks of {E}xplaining {M}achine {L}earning {M}odels}, + author={Shokri, Reza and Strobel, Martin and Zick, Yair}, + journal={arXiv preprint arXiv:1907.00164}, + year={2019}, + note={URL: \url{https://arxiv.org/pdf/1907.00164.pdf}}} + +@article{kononenko2010efficient, + title = {An {E}fficient {E}xplanation of {I}ndividual {C}lassifications using {G}ame {T}heory}, + author = {Strumbelj, Erik and Kononenko, Igor}, + journal = {Journal of Machine Learning Research}, + volume = {11}, + number = {Jan}, + pages = {1--18}, + year = {2010}, + note = {URL: \url{http://www.jmlr.org/papers/volume11/strumbelj10a/strumbelj10a.pdf}}} + +@inproceedings{model_stealing, + title={Stealing {M}achine {L}earning {M}odels via {P}rediction {A}{P}{I}s}, + author={Tram{\`e}r, Florian and Zhang, Fan and Juels, Ari and Reiter, Michael K and Ristenpart, Thomas}, + booktitle={25th $\{$USENIX$\}$ Security Symposium ($\{$USENIX$\}$ Security 16)}, + pages={601--618}, + year={2016}, + note={URL: \url{https://www.usenix.org/system/files/conference/usenixsecurity16/sec16_paper_tramer.pdf}}} + +@article{slim, + Author = {Ustun, Berk and Rudin, Cynthia}, + Journal = {Machine Learning}, + Number = {3}, + Pages = {349--391}, + Publisher = {Springer}, + Title = {{Supersparse {L}inear {I}nteger {M}odels for {O}ptimized {M}edical {S}coring {S}ystems}}, + Volume = {102}, + note={URL: \url{https://users.cs.duke.edu/~cynthia/docs/UstunTrRuAAAI13.pdf}}, + Year = {2016}} + +@article{wf_xnn, + title = {Explainable {N}eural {N}etworks {B}ased on {A}dditive {I}ndex {M}odels}, + author = {Vaughan, Joel and Sudjianto, Agus and Brahimi, Erind and Chen, Jie and Nair, Vijayan N.}, + journal = {arXiv preprint arXiv:1806.01933}, + note = {URL: \url{https://arxiv.org/pdf/1806.01933.pdf}}, + year = {2018}} + +@article{weller2017challenges, + title = {{C}hallenges for {T}ransparency}, + author = {Weller, Adrian}, + journal = {arXiv preprint arXiv:1708.01870}, + year = {2017}, + note = {URL: \url{https://arxiv.org/pdf/1708.01870.pdf}}} + +@book{ff_interpretability, + author = {Williams, Mike and others}, + title = {Interpretability}, + publisher = {Fast Forward Labs}, + year = {2017}, + note = {URL: \url{https://www.cloudera.com/products/fast-forward-labs-research.html}}} + +@inproceedings{sbrl, + title = {Scalable {B}ayesian {R}ule {L}ists}, + author = {Hongyu Yang and Cynthia Rudin and Margo Seltzer}, + booktitle = {Proceedings of the 34th International Conference on Machine Learning {(ICML)}}, + note = {URL: \url{https://arxiv.org/pdf/1602.08610.pdf}}, + year = {2017}} + +@inproceedings{zhang2018mitigating, + title={Mitigating {U}nwanted {B}iases with {A}dversarial {L}earning}, + author={Zhang, Brian Hu and Lemoine, Blake and Mitchell, Margaret}, + booktitle={Proceedings of the 2018 AAAI/ACM Conference on AI, Ethics, and Society}, + pages={335--340}, + year={2018}, + organization={ACM}, + note={URL: \url{https://arxiv.org/pdf/1801.07593.pdf}}} + +@inproceedings{lfr, + title={Learning {F}air {R}epresentations}, + author={Zemel, Rich and Wu, Yu and Swersky, Kevin and Pitassi, Toni and Dwork, Cynthia}, + booktitle={International Conference on Machine Learning}, + pages={325--333}, + year={2013}, + note={URL: \url{http://proceedings.mlr.press/v28/zemel13.pdf}}} diff --git a/tex/lecture_5.pdf b/tex/lecture_5.pdf new file mode 100644 index 0000000..cb00847 Binary files /dev/null and b/tex/lecture_5.pdf differ diff --git a/tex/lecture_5.tex b/tex/lecture_5.tex new file mode 100644 index 0000000..ff80acb --- /dev/null +++ b/tex/lecture_5.tex @@ -0,0 +1,457 @@ +\documentclass[11pt, + %10pt, + %hyperref={colorlinks}, + aspectratio=169, + hyperref={colorlinks} + ]{beamer} +\usetheme{Singapore} +\usecolortheme[snowy, cautious]{owl} + +\usepackage[utf8]{inputenc} +\usepackage[T1]{fontenc} +\usepackage[american]{babel} +\usepackage{graphicx} +\usepackage{hyperref} + +\hypersetup{ + colorlinks=true, + urlcolor=[rgb]{1,0,1}, + linkcolor=[rgb]{1,0,1}} +\definecolor{magenta}{RGB}{255, 0, 255} + +\usepackage[natbib=true,style=numeric,backend=bibtex,useprefix=true]{biblatex} + +\definecolor{OwlGreen}{RGB}{75,0,130} % easier to see +\setbeamertemplate{bibliography item}{\insertbiblabel} +\setbeamerfont{caption}{size=\footnotesize} +\setbeamertemplate{frametitle continuation}{} + +\setcounter{tocdepth}{1} +\renewcommand*{\bibfont}{\scriptsize} +\addbibresource{lecture_5.bib} + +\renewcommand*{\thefootnote}{\fnsymbol{footnote}} + +\usenavigationsymbolstemplate{} +\setbeamertemplate{footline}{% + \raisebox{5pt}{\makebox{\hfill\makebox[20pt]{\color{gray} + \scriptsize\insertframenumber}}}\hspace*{5pt}} + +\author{\copyright\hspace{1pt}Patrick Hall\footnote{\tiny{This material is shared under a \href{https://creativecommons.org/licenses/by/4.0/deed.ast}{CC By 4.0 license} which allows for editing and redistribution, even for commercial purposes. However, any derivative work should attribute the author and H2O.ai.}}} +\title{Increasing Trust and Understanding in Machine Learning with Model Debugging } +\institute{} +\date{\today} +\subject{} + +\begin{document} + + \maketitle + + \begin{frame} + + \frametitle{Contents} + + \tableofcontents{} + + \end{frame} + +%------------------------------------------------------------------------------- + \section{What?} +%------------------------------------------------------------------------------- + + \begin{frame} + + \frametitle{What is Model Debugging?} + + \begin{itemize} + \item Model debugging is an emergent discipline focused on discovering and remediating errors in the internal mechanisms and outputs of machine learning models.\footnote{\tiny{See \url{https://debug-ml-iclr2019.github.io/} for numerous model debugging approaches.}} + \item Model debugging attempts to test machine learning models like code (because the models are code). + \item Model debugging promotes trust directly and enhances interpretability as a side-effect. + \item Model debugging is similar to regression diagnostics, but for nonlinear machine learning models. + \end{itemize} + + \end{frame} + + \begin{frame}[t] + + \frametitle{Trust and Understanding} + + \begin{figure}[htb] + \begin{center} + \includegraphics[height=130pt]{../img/trust_understanding.png} + \end{center} + \end{figure} + + Trust and understanding in machine learning are different but complementary goals, and they are technically feasible \textit{today}. + + \end{frame} + +%------------------------------------------------------------------------------- + \section{Why?} +%------------------------------------------------------------------------------- + +%------------------------------------------------------------------------------- + \subsection{Inadequate Assessment } +%------------------------------------------------------------------------------- + + \begin{frame} + + \frametitle{Why Debug?} + + \begin{columns} + + \column{0.5\linewidth} + \centering + \begin{itemize} + \item \scriptsize Constrained, monotonic GBM probability of default (PD) classifier, $g_{\text{mono}}$. + \item Grid search over hundreds of models. + \item Best model selected by validation-based early stopping. + \item Seemingly well-regularized (row and column sampling, explicit specification of L1 and L2 penalties). + \item No evidence of over- or under-fitting. + \item Better validation logloss than benchmark GLM. + \item Decision threshold selected by maximization of F1 statistic. + \item BUT traditional assessment can be inadequate ... + \end{itemize}\normalsize + + \vspace{20pt} + \column{0.5\linewidth} + \centering + \includegraphics[height=110pt]{../img/pr_auc.png}\\ + \tiny + \vspace{5pt} + Validation Confusion Matrix at Threshold:\vspace{-7pt} + \begin{table} + \hspace{7pt} + \begin{tabular}{ | p{1.3cm} | p{1cm} | p{1.3cm} | } + \hline + & Actual: 1 & Actual: 0 \\ + \hline + Predicted: 1 & 1159 & 827 \\ + \hline + Predicted: 0 & 1064 & 6004 \\ + \hline + \end{tabular} + \end{table} + \normalsize + + \end{columns} + + \end{frame} + +%------------------------------------------------------------------------------- + \subsection{Inaccuracy} +%------------------------------------------------------------------------------- + + \begin{frame} + + \frametitle{Why Debug?} + + \footnotesize{Machine learning models can be \textbf{inaccurate}.} + \begin{columns} + + \column{0.5\linewidth} + \centering + \includegraphics[height=125pt]{../img/global_shap.png}\\ + \vspace{5pt} + \tiny{$g_{\text{mono}}$ PD classifier over-emphasizes the most important feature, a customer's most recent repayment status, $\text{PAY\_0}$.} + + \vspace{11pt} + \column{0.5\linewidth} + \centering + \includegraphics[height=118pt]{../img/resid.png}\\ + \vspace{7pt} + \tiny{$g_{\text{mono}}$ also struggles to predict default for favorable statuses, $-2 \leq \texttt{PAY\_0} < 2$, and often cannot predict on-time payment\\when recent payments are late, $\text{PAY\_0} \geq 2$}. + + \end{columns} + \normalsize + + \end{frame} + +%------------------------------------------------------------------------------- + \subsection{Sociological Biases} +%------------------------------------------------------------------------------- + + \begin{frame}[label={slide:disp}] + + \frametitle{Why Debug?} + + \footnotesize{Machine learning models can perpetuate \textbf{sociological biases} \cite{barocas-hardt-narayanan}.} + \vspace{10pt} + \begin{figure} + \begin{center} + \includegraphics[height=100pt]{../img/di.png} + \end{center} + \end{figure} + \center{\scriptsize{Group disparity metrics are out-of-range for $g_{\text{mono}}$ across different marital statuses.\\This \textit{does not} address localized instances of discrimination.}} + \normalsize + + \end{frame} + +%------------------------------------------------------------------------------- + \subsection{Security Vulnerabilities} +%------------------------------------------------------------------------------- +% Hackers, competitors, or malicious or extorted insiders can manipulate model outcomes, steal models, and steal data! + \begin{frame}[t] + + \frametitle{Why Debug?} + + \footnotesize{Machine learning models can have \textbf{security vulnerabilities} \cite{security_of_ml}, \cite{membership_inference}, \cite{model_stealing}}.\footnote{\tiny{See \url{https://github.com/jphall663/secure_ML_ideas} for full size image and more information.}} + \begin{figure}[] + \begin{center} + \includegraphics[height=135pt]{../img/cheatsheet.png} + \end{center} + \end{figure} + \vspace{-17pt} + \normalsize + + \end{frame} + +%------------------------------------------------------------------------------- + \section{How?} +%------------------------------------------------------------------------------- + +%------------------------------------------------------------------------------- + \subsection{Holistic, Low-Risk Approach} +%------------------------------------------------------------------------------- + + \begin{frame} + + \frametitle{How to Debug Models?} + + \footnotesize{As part of a holistic, low-risk approach to machine learning}.\footnote{\tiny{See \url{https://github.com/jphall663/hc_ml} for more information.}} + \begin{figure} + \begin{center} + \includegraphics[height=170pt]{../img/blueprint.png} + \end{center} + \end{figure} + \normalsize + + \end{frame} + +%------------------------------------------------------------------------------- + \subsection{Sensitivity Analysis} +%------------------------------------------------------------------------------- + + \begin{frame}[t] + + \frametitle{\textbf{Sensitivity Analysis}: Partial Dependence and ICE} + \vspace{-15pt} + \begin{figure} + \begin{center} + \includegraphics[height=96pt]{../img/pd.png} + \end{center} + \end{figure} + \vspace{-10pt} + \begin{itemize} + \item \scriptsize Training data is very sparse for $\text{PAY\_0} > 2$.\\ + \item Residuals of partial dependence confirm over-emphasis on $\text{PAY\_0}$. + \item ICE curves indicate that partial dependence is likely trustworthy and empirically confirm monotonicity, but also expose adversarial attack vulnerabilities. + \item Partial dependence and ICE indicate $g_{\text{mono}}$ likely learned very little for $\text{PAY\_0} \geq 2$. + \item $\text{PAY\_0} = $ \texttt{missing} gives lowest probability of default. + \end{itemize}\normalsize + + \end{frame} + + \begin{frame}[t, allowframebreaks] + \vspace{-10pt} + \frametitle{\textbf{Sensitivity Analysis}: Search for Adversarial Examples} + \begin{figure} + \begin{center} + \includegraphics[height=135pt]{../img/sa_max_prob.png} + \end{center} + \end{figure} + \tiny{Adversary search confirms multiple avenues of attack and exposes a potential flaw in $g_{\text{mono}}$ scoring logic: default is predicted for customer's who make payments above their credit limit. (Try the \href{https://github.com/tensorflow/cleverhans}{cleverhans} package for finding adversarial examples.)} + + \framebreak + \vspace{-5pt} + \begin{figure} + \begin{center} + \includegraphics[height=135pt]{../img/sa_max_prob_demo.png} + \end{center} + \end{figure} + \vspace{-5pt} + \tiny{$g_{\text{mono}}$ appears to prefer younger, unmarried customers, which should be investigated further with disparate impact analysis - see slide \ref{slide:disp} - and could expose the lender to impersonation attacks. (Try the \href{https://github.com/IBM/AIF360}{AIF360}, \href{https://github.com/dssg/aequitas}{aequitas}, or \href{https://github.com/LASER-UMASS/Themis}{themis} packages for disparate impact audits.)} + + \end{frame} + + \begin{frame}[t] + + %When you don't know what to test + + \frametitle{\textbf{Sensitivity Analysis}: Random Attacks} + \vspace{-15pt} + \begin{figure} + \begin{center} + \includegraphics[height=130pt]{../img/ra.png} + \end{center} + \end{figure} + \vspace{-10pt} + \begin{itemize}\scriptsize + \item In general, random attacks are a viable method to identify software bugs in machine learning pipelines. \textbf{(Start here if you don't know where to start.)} + \item Random data can apparently elicit all probabilities $\in [0, 1]$ from $g_{\text{mono}}$. + \item Around the decision threshold, lower probabilities can be attained simply by injecting missing values, yet another vulnerability to adversarial attack. + \end{itemize} + \normalsize + + \end{frame} + +%------------------------------------------------------------------------------- + \subsection{Residual Analysis} +%------------------------------------------------------------------------------- + + \begin{frame}[t] + + \frametitle{\textbf{Residual Analysis}: Disparate Accuracy and Errors} + + \vspace{-10pt} + \begin{figure} + \begin{center} + \includegraphics[height=140pt]{../img/de.png} + \end{center} + \end{figure} + \vspace{-15pt} + \tiny For $\text{PAY\_0}$: + \begin{itemize} + \item Notable change in accuracy and error characteristics for $\text{PAY\_0} \geq 2$. + \item 100\% false omission rate for $\text{PAY\_0} \geq 2$. (Every prediction of non-default is incorrect!) + \end{itemize} + For $\text{SEX}$, accuracy and error characteristics vary little across individuals represented in the training data. Non-discrimination should be confirmed by disparate impact analysis. + + \end{frame} + + \begin{frame}[t] + + \frametitle{\textbf{Residual Analysis}: Mean Local Feature Contributions} + \vspace{-15pt} + \begin{figure} + \begin{center} + \includegraphics[height=125pt]{../img/global_high_low.png} + \end{center} + \end{figure} + \scriptsize{Exact local Shapley feature contributions \cite{shapley}, which are available at scoring time for unlabeled data, are noticeably different for low and high residual predictions. (Both monotonicity constraints and Shapley values are available in \href{https://www.github.com/h2oai/h2o-3}{h2o-3} and \href{https://www.github.com/dmlc/xgboost}{XGBoost}.)} + + \end{frame} + + \begin{frame}[t] + + \frametitle{\large{\textbf{Residual Analysis}: Non-Robust Features}} + \vspace{-10pt} + \begin{figure} + \begin{center} + \includegraphics[height=150pt]{../img/global_pred_loss.png} + \end{center} + \end{figure} + \vspace{-8pt} + \scriptsize{Globally important features $\text{PAY\_3}$ and $\text{PAY\_2}$ are more important, on average, to the loss than to the predictions. (Shapley contributions to XGBoost logloss can be calculated using the \href{https://github.com/slundberg/shap}{shap} package. This is a \textbf{time-consuming} calculation.)} + + \end{frame} + + \begin{frame} + + \frametitle{\textbf{Residual Analysis}: Local Contributions to Logloss} + + \begin{figure} + \begin{center} + \includegraphics[height=130pt]{../img/local.png} + \end{center} + \end{figure} + Exact, local feature contributions to logloss can be calculated, enabling ranking of features contributing to logloss residuals for \textbf{each prediction}. + \end{frame} + + \begin{frame}[t] + + \frametitle{\textbf{Residual Analysis}: Modeling Residuals} + Decision tree model of $g_{\text{mono}} ~\text{DEFAULT\_NEXT\_MONTH} =1$ logloss residuals with 3-fold CV MSE $=0.0070$ and $R^2=0.8871$. + \begin{figure} + \begin{center} + \includegraphics[height=95pt, width=330pt]{../img/surrogate_dt_1.png} + \end{center} + \end{figure} + This tree encodes rules describing when $g_{\text{mono}}$ is probably wrong. + \end{frame} + +%------------------------------------------------------------------------------- + \subsection{Benchmark Models} +%------------------------------------------------------------------------------- + + \begin{frame} + + \frametitle{\textbf{Benchmark Models}: Compare to Linear Models} + \begin{figure} + \begin{center} + \includegraphics[height=130pt]{../img/benchmark.png} + \end{center} + \end{figure} + \vspace{-10pt} + For a range of probabilities $\in (\sim0.2, \sim0.6)$, $g_{\text{mono}}$ displays exactly incorrect prediction behavior as compared to a benchmark GLM. + \end{frame} + +%------------------------------------------------------------------------------- + \subsection{Error Remediation} +%------------------------------------------------------------------------------- + + \begin{frame} + + \frametitle{\textbf{Remediation}: for $g_{\text{mono}}$} + + \begin{itemize}\scriptsize + \item \textbf{Over-emphasis of $\text{PAY\_0}$}: + \begin{itemize}\scriptsize + \item Engineer features for payment trends or stability. + \item Missing value injection during training or scoring. + \end{itemize} + \item \textbf{Sparsity of $\text{PAY\_0} > 2$ training data}: Increase observation weights. + \item \textbf{Payments $\geq$ credit limit}: Scoring-time model assertion \cite{kangdebugging}. + \item \textbf{Disparate impact}: Adversarial de-biasing \cite{zhang2018mitigating} or model selection by minimal disparate impact. + \item \textbf{Security vulnerabilities}: API throttling, authentication, real-time model monitoring. + \item \textbf{Large logloss importance}: Evaluate dropping non-robust features. + \item \textbf{Poor accuracy vs. benchmark GLM}: Blend $g_{\text{mono}}$ and GLM for probabilities $\in (\sim0.2, \sim0.6)$. + \item \textbf{Miscellaneous strategies}: + \begin{itemize}\scriptsize + \item Local feature importance and decision tree rules can indicate additional scoring-time model assertions, e.g. alternate treatment for locally non-robust features in known high-residual ranges of the learned response function. + \item Incorporate local feature contributions to logloss into training or scoring processes. + \end{itemize} + \end{itemize} + \normalsize + + \end{frame} + + \begin{frame} + + \frametitle{\textbf{Remediation}: General Strategies} + + \begin{itemize} + \item Calibration to past data. + \item Data collection or simulation for model blindspots. + \item Detection and elimination of non-robust features. + \item Missing value injection during training or scoring. + \item Model or model artifact editing. + \end{itemize} + + \end{frame} + + +%------------------------------------------------------------------------------- +% \section{References} +%------------------------------------------------------------------------------- + +% which code for which slide + + \begin{frame}[t, allowframebreaks] + + \frametitle{References} + + This presentation:\\ + \tiny{\url{https://www.github.com/jphall663/jsm_2019}}\\ + \vspace{10pt} + \normalsize Code examples for this presentation:\\ + \tiny{\url{https://www.github.com/jphall663/interpretable_machine_learning_with_python}}\\ + \noindent\tiny{\url{https://www.github.com/jphall663/responsible_xai}} + + \framebreak + + \printbibliography + + \end{frame} + +\end{document} \ No newline at end of file