diff --git a/_news/bortolotti2024rsbench-preprint.md b/_news/bortolotti2024rsbench-preprint.md new file mode 100644 index 0000000..f2b13d7 --- /dev/null +++ b/_news/bortolotti2024rsbench-preprint.md @@ -0,0 +1,7 @@ +--- +title: "RSBench" +collection: news +permalink: /news/rsbench-preprint +date: 2024-06-14 +--- +How to evaluate if neuro-symbolic systems are learning the right concepts or are falling prey of resoning shortcuts? Answer in this new preprint! diff --git a/_publications/bortolotti2024rsbench.md b/_publications/bortolotti2024rsbench.md new file mode 100644 index 0000000..05cc5d2 --- /dev/null +++ b/_publications/bortolotti2024rsbench.md @@ -0,0 +1,23 @@ +--- +collection: publications +ref: "bortolotti2024rsbench" +permalink: "publications/bortolotti2024rsbench" +title: "A Benchmark Suite for Systematically Evaluating Reasoning Shortcuts" +date: 2024-06-14 00:00 +tags: nesy shortcuts reasoning +image: "/images/papers/bortolotti2024rsbench/rsbench.png" +authors: "Samuele Bortolotti, Emanuele Marconato, Tommaso Carraro, Paolo Morettin, Emile van Krieken, Antonio Vergari, Stefano Teso, Andrea Passerini" +paperurl: "https://unitn-sml.github.io/rsbench/" +pdf: "https://arxiv.org/pdf/2406.10368" +venue: "arXiv 2024" +code: "https://github.com/unitn-sml/rsbench-code" +excerpt: "How to evaluate if neuro-symbolic systems are learning the right concepts or are falling prey of resoning shortcuts?" +abstract: "The advent of powerful neural classifiers has increased interest in problems that require both learning and reasoning. These problems are critical for understanding important properties of models, such as trustworthiness, generalization, interpretability, and compliance to safety and structural constraints. However, recent research observed that tasks requiring both learning and reasoning on background knowledge often suffer from reasoning shortcuts (RSs): predictors can solve the downstream reasoning task without associating the correct concepts to the high-dimensional data. To address this issue, we introduce rsbench, a comprehensive benchmark suite designed to systematically evaluate the impact of RSs on models by providing easy access to highly customizable tasks affected by RSs. Furthermore, rsbench implements common metrics for evaluating concept quality and introduces novel formal verification procedures for assessing the presence of RSs in learning tasks. Using rsbench, we highlight that obtaining high quality concepts in both purely neural and neuro-symbolic models is a far-from-solved problem." +supplemental: +bibtex: "@article{bortolotti2024benchmark, + title={A Benchmark Suite for Systematically Evaluating Reasoning Shortcuts}, + author={Bortolotti, Samuele and Marconato, Emanuele and Carraro, Tommaso and Morettin, Paolo and van Krieken, Emile and Vergari, Antonio and Teso, Stefano and Passerini, Andrea}, + journal={arXiv preprint arXiv:2406.10368}, + year={2024} +}" +--- diff --git a/images/papers/bortolotti2024rsbench/rsbench.png b/images/papers/bortolotti2024rsbench/rsbench.png new file mode 100644 index 0000000..b126cc6 Binary files /dev/null and b/images/papers/bortolotti2024rsbench/rsbench.png differ