tasks.py


TASKS_MMLU = [
    'abstract_algebra',
    'anatomy',
    'astronomy',
    'business_ethics',
    'clinical_knowledge',
    'college_biology',
    'college_chemistry',
    'college_computer_science',
    'college_mathematics',
    'college_medicine',
    'college_physics',
    'computer_security',
    'conceptual_physics',
    'econometrics',
    'electrical_engineering',
    'elementary_mathematics',
    'formal_logic',
    'global_facts',
    'high_school_biology',
    'high_school_chemistry',
    'high_school_computer_science',
    'high_school_european_history',
    'high_school_geography',
    'high_school_government_and_politics',
    'high_school_macroeconomics',
    'high_school_mathematics',
    'high_school_microeconomics',
    'high_school_physics',
    'high_school_psychology',
    'high_school_statistics',
    'high_school_us_history',
    'high_school_world_history',
    'human_aging',
    'human_sexuality',
    'international_law',
    'jurisprudence',
    'logical_fallacies',
    'machine_learning',
    'management',
    'marketing',
    'medical_genetics',
    'miscellaneous',
    'moral_disputes',
    'moral_scenarios',
    'nutrition',
    'philosophy',
    'prehistory',
    'professional_accounting',
    'professional_law',
    'professional_medicine',
    'professional_psychology',
    'public_relations',
    'security_studies', 
    'sociology',
    'us_foreign_policy',
    'virology',
    'world_religions']
TASKS_FLAN = [
    'cnn_dailymail_10templates',
    'cola_10templates',
    'common_gen_10templates',
    'copa_10templates',
    'coqa_10templates',
    'cosmos_qa_10templates',
    'dart_10templates',
    'definite_pronoun_resolution_10templates',
    'drop_10templates',
    'e2e_nlg_10templates',
    'fix_punct_10templates',
    'gigaword_10templates',
    'glue_mrpc_10templates',
    'glue_qqp_10templates',
    'hellaswag_10templates',
    'imdb_reviews_10templates',
    'math_dataset_10templates',
    'mnli_matched_10templates',
    'mnli_mismatched_10templates',
    'multi_news_10templates',
    'multirc_10templates',
    'natural_questions_10templates',
    'openbookqa_10templates',
    'opinion_abstracts_idebate_10templates',
    'opinion_abstracts_rotten_tomatoes_10templates',
    'para_crawl_enes_10templates',
    'paws_wiki_10templates',
    'piqa_10templates',
    'qnli_10templates',
    'quac_10templates',
    'record_10templates',
    'rte_10templates',
    'samsum_10templates',
    'sentiment140_10templates',
    'snli_10templates',
    'squad_v1_10templates',
    'squad_v2_10templates',
    'sst2_10templates',
    'story_cloze_10templates',
    'stsb_10templates',
    'trec_10templates',
    'trivia_qa_10templates',
    'true_case_10templates',
    'web_nlg_en_10templates',
    'wic_10templates',
    'wiki_lingua_english_en_10templates',
    'wmt14_enfr_10templates',
    'wmt16_translate_csen_10templates',
    'wmt16_translate_deen_10templates',
    'wmt16_translate_fien_10templates',
    'wmt16_translate_roen_10templates',
    'wmt16_translate_ruen_10templates',
    'wmt16_translate_tren_10templates',
    'wnli_10templates',
    'word_segment_10templates',
    'wsc_10templates',
    'yelp_polarity_reviews_10templates']
TASKS_BBH = [
    "boolean_expressions",
    "causal_judgement",
    "date_understanding",
    "disambiguation_qa",
    "dyck_languages",
    "formal_fallacies",
    "geometric_shapes",
    "hyperbaton",
    "logical_deduction_five_objects",
    "logical_deduction_seven_objects",
    "logical_deduction_three_objects",
    "movie_recommendation",
    "multistep_arithmetic_two",
    "navigate",
    "object_counting",
    "penguins_in_a_table",
    "reasoning_about_colored_objects",
    "ruin_names",
    "salient_translation_error_detection",
    "snarks",
    "sports_understanding",
    "temporal_sequences",
    "tracking_shuffled_objects_five_objects",
    "tracking_shuffled_objects_seven_objects",
    "tracking_shuffled_objects_three_objects",
    "web_of_lies",
    "word_sorting"
]
TASKS_AGIEVAL = ["lsat-ar", "lsat-lr", "lsat-rc", "logiqa-en", "sat-math", "sat-en", "aqua-rat", "sat-en-without-passage", "gaokao-english"]
mmlu_tasks = [24, 0, 45]
flan_tasks = [21, 34, 32]
bbh_tasks = [16, 3, 9, 25, 26, 5]
agieval_tasks = [6, 2, 4, 3, 8]
TASKS_MMLU_REDUCED = [TASKS_MMLU[i] for i in mmlu_tasks]
TASKS_FLAN_REDUCED = [TASKS_FLAN[i] for i in flan_tasks]
TASKS_BBH_REDUCED = [TASKS_BBH[i] for i in bbh_tasks]
TASKS_AGIEVAL_REDUCED = [TASKS_AGIEVAL[i] for i in agieval_tasks]
TASKS = {
    "mmlu": TASKS_MMLU,
    "flan": TASKS_FLAN,
    "bbh": TASKS_BBH,
    "agieval": TASKS_AGIEVAL,
    "mmlu_reduced": TASKS_MMLU_REDUCED,
    "flan_reduced": TASKS_FLAN_REDUCED,
    "bbh_reduced": TASKS_BBH_REDUCED,
    "agieval_reduced": TASKS_AGIEVAL_REDUCED
}