|
12 | 12 | from ragas.evaluation import EvaluationDataset, EvaluationResult, RunConfig, evaluate
|
13 | 13 | from ragas.metrics import Metric
|
14 | 14 | from ragas.metrics._domain_specific_rubrics import ( # the rubrics we must instantiate are located inside of a file marked as private
|
15 |
| - DEFAULT_WITH_REFERENCE_RUBRICS, |
16 | 15 | RubricsScore,
|
| 16 | + SingleTurnPrompt, |
17 | 17 | )
|
18 | 18 |
|
19 | 19 | # Local
|
|
22 | 22 |
|
23 | 23 | logger = setup_logger(__name__)
|
24 | 24 |
|
| 25 | +OLD_DEFAULT_WITH_REFERENCE_RUBRICS = { |
| 26 | + "score1_description": "The response is incorrect, irrelevant, or does not align with the ground truth.", |
| 27 | + "score2_description": "The response partially matches the ground truth but includes significant errors, omissions, or irrelevant information.", |
| 28 | + "score3_description": "The response generally aligns with the ground truth but may lack detail, clarity, or have minor inaccuracies.", |
| 29 | + "score4_description": "The response is mostly accurate and aligns well with the ground truth, with only minor issues or missing details.", |
| 30 | + "score5_description": "The response is fully accurate, aligns completely with the ground truth, and is clear and detailed.", |
| 31 | +} |
| 32 | + |
25 | 33 |
|
26 | 34 | class Sample(TypedDict):
|
27 | 35 | """
|
@@ -256,9 +264,8 @@ def _generate_answers_from_model(
|
256 | 264 |
|
257 | 265 | @staticmethod
|
258 | 266 | def _get_metrics() -> List[Metric]:
|
259 |
| - # default set of metrics |
260 | 267 | return [
|
261 | 268 | RubricsScore(
|
262 |
| - rubrics=DEFAULT_WITH_REFERENCE_RUBRICS, |
| 269 | + rubrics=OLD_DEFAULT_WITH_REFERENCE_RUBRICS, |
263 | 270 | )
|
264 | 271 | ]
|
0 commit comments