Set continue_on_step_failure setting to False for FT, Eval, Import no…

…tebooks (Azure#2366) * Disbable continue job on failure * replicate for all notebooks * Reformat * Revert metadata
vrxmike · Jun 11, 2023 · f922f0e · f922f0e
1 parent 3dafee3
commit f922f0e
Show file tree

Hide file tree

Showing 12 changed files with 118 additions and 75 deletions.
diff --git a/sdk/python/foundation-models/system/evaluation/fill-mask/fill-mask.ipynb b/sdk/python/foundation-models/system/evaluation/fill-mask/fill-mask.ipynb
@@ -223,6 +223,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
    "outputs": [],
    "source": [
     "models = []\n",
@@ -231,21 +234,18 @@
     "    reg_model = list(registry_ml_client.models.list(name=model[\"name\"]))[0]\n",
     "    print(reg_model.id)\n",
     "    models.append({**model, \"version\": reg_model.version})"
-   ],
-   "metadata": {
-    "collapsed": false
-   }
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
    "outputs": [],
    "source": [
     "models"
-   ],
-   "metadata": {
-    "collapsed": false
-   }
+   ]
   },
   {
    "attachments": {},
@@ -262,14 +262,14 @@
   {
    "cell_type": "code",
    "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
    "outputs": [],
    "source": [
     "%pip install transformers\n",
     "%pip install torch"
-   ],
-   "metadata": {
-    "collapsed": false
-   }
+   ]
   },
   {
    "cell_type": "code",
@@ -302,6 +302,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
    "outputs": [],
    "source": [
     "from transformers import AutoTokenizer\n",
@@ -319,10 +322,7 @@
     "    )\n",
     "    test_data_file_name = \"small-test-{}.jsonl\".format(model[\"name\"])\n",
     "    test_data_df.to_json(test_data_file_name, lines=True, orient=\"records\")"
-   ],
-   "metadata": {
-    "collapsed": false
-   }
+   ]
   },
   {
    "attachments": {},
@@ -417,6 +417,11 @@
     "    # don't reuse cached results from previous jobs\n",
     "    pipeline_object.settings.force_rerun = True\n",
     "    pipeline_object.settings.default_compute = compute_cluster\n",
+    "\n",
+    "    # set continue on step failure to False\n",
+    "    pipeline_object.settings.continue_on_step_failure = False\n",
+    "\n",
+    "    pipeline_object.display_name = f\"eval-{model['name']}-{timestamp}\"\n",
     "    pipeline_job = workspace_ml_client.jobs.create_or_update(\n",
     "        pipeline_object, experiment_name=experiment_name\n",
     "    )\n",

diff --git a/sdk/python/foundation-models/system/evaluation/question-answering/question-answering.ipynb b/sdk/python/foundation-models/system/evaluation/question-answering/question-answering.ipynb
@@ -217,6 +217,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
    "outputs": [],
    "source": [
     "models = []\n",
@@ -225,21 +228,18 @@
     "    reg_model = list(registry_ml_client.models.list(name=model[\"name\"]))[0]\n",
     "    print(reg_model.id)\n",
     "    models.append({**model, \"version\": reg_model.version})"
-   ],
-   "metadata": {
-    "collapsed": false
-   }
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
    "outputs": [],
    "source": [
     "models"
-   ],
-   "metadata": {
-    "collapsed": false
-   }
+   ]
   },
   {
    "attachments": {},
@@ -406,6 +406,10 @@
     "    # don't reuse cached results from previous jobs\n",
     "    pipeline_object.settings.force_rerun = True\n",
     "    pipeline_object.settings.default_compute = compute_cluster\n",
+    "\n",
+    "    # set continue on step failure to False\n",
+    "    pipeline_object.settings.continue_on_step_failure = False\n",
+    "\n",
     "    pipeline_object.display_name = f\"eval-{model['name']}-{timestamp}\"\n",
     "    pipeline_job = workspace_ml_client.jobs.create_or_update(\n",
     "        pipeline_object, experiment_name=experiment_name\n",

diff --git a/...ion-models/system/evaluation/summarization/abstractive-and-extractive-summarization.ipynb b/...ion-models/system/evaluation/summarization/abstractive-and-extractive-summarization.ipynb
@@ -210,6 +210,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
    "outputs": [],
    "source": [
     "models = []\n",
@@ -218,21 +221,18 @@
     "    reg_model = list(registry_ml_client.models.list(name=model[\"name\"]))[0]\n",
     "    print(reg_model.id)\n",
     "    models.append({**model, \"version\": reg_model.version})"
-   ],
-   "metadata": {
-    "collapsed": false
-   }
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
    "outputs": [],
    "source": [
     "models"
-   ],
-   "metadata": {
-    "collapsed": false
-   }
+   ]
   },
   {
    "attachments": {},
@@ -401,6 +401,10 @@
     "    # don't reuse cached results from previous jobs\n",
     "    pipeline_object.settings.force_rerun = True\n",
     "    pipeline_object.settings.default_compute = compute_cluster\n",
+    "\n",
+    "    # set continue on step failure to False\n",
+    "    pipeline_object.settings.continue_on_step_failure = False\n",
+    "\n",
     "    pipeline_object.display_name = f\"eval-{model['name']}-{timestamp}\"\n",
     "    pipeline_job = workspace_ml_client.jobs.create_or_update(\n",
     "        pipeline_object, experiment_name=experiment_name\n",

diff --git a/...on/foundation-models/system/evaluation/text-classification/entailment-contradiction.ipynb b/...on/foundation-models/system/evaluation/text-classification/entailment-contradiction.ipynb
@@ -210,6 +210,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
    "outputs": [],
    "source": [
     "models = []\n",
@@ -218,21 +221,18 @@
     "    reg_model = list(registry_ml_client.models.list(name=model[\"name\"]))[0]\n",
     "    print(reg_model.id)\n",
     "    models.append({**model, \"version\": reg_model.version})"
-   ],
-   "metadata": {
-    "collapsed": false
-   }
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
    "outputs": [],
    "source": [
     "models"
-   ],
-   "metadata": {
-    "collapsed": false
-   }
+   ]
   },
   {
    "attachments": {},
@@ -399,6 +399,10 @@
     "    # don't reuse cached results from previous jobs\n",
     "    pipeline_object.settings.force_rerun = True\n",
     "    pipeline_object.settings.default_compute = compute_cluster\n",
+    "\n",
+    "    # set continue on step failure to False\n",
+    "    pipeline_object.settings.continue_on_step_failure = False\n",
+    "\n",
     "    pipeline_object.display_name = f\"eval-{model['name']}-{timestamp}\"\n",
     "    pipeline_job = workspace_ml_client.jobs.create_or_update(\n",
     "        pipeline_object, experiment_name=experiment_name\n",

diff --git a/...tion-models/system/evaluation/token-classification/news-articles-entity-recognition.ipynb b/...tion-models/system/evaluation/token-classification/news-articles-entity-recognition.ipynb
@@ -209,6 +209,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
    "outputs": [],
    "source": [
     "models = []\n",
@@ -217,21 +220,18 @@
     "    reg_model = list(registry_ml_client.models.list(name=model[\"name\"]))[0]\n",
     "    print(reg_model.id)\n",
     "    models.append({**model, \"version\": reg_model.version})"
-   ],
-   "metadata": {
-    "collapsed": false
-   }
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
    "outputs": [],
    "source": [
     "models"
-   ],
-   "metadata": {
-    "collapsed": false
-   }
+   ]
   },
   {
    "attachments": {},
@@ -431,6 +431,10 @@
     "    # don't reuse cached results from previous jobs\n",
     "    pipeline_object.settings.force_rerun = True\n",
     "    pipeline_object.settings.default_compute = compute_cluster\n",
+    "\n",
+    "    # set continue on step failure to False\n",
+    "    pipeline_object.settings.continue_on_step_failure = False\n",
+    "\n",
     "    pipeline_object.display_name = f\"eval-{model['name']}-{timestamp}\"\n",
     "    pipeline_job = workspace_ml_client.jobs.create_or_update(\n",
     "        pipeline_object, experiment_name=experiment_name\n",

diff --git a/...hon/foundation-models/system/evaluation/translation/translation-romanian-to-english.ipynb b/...hon/foundation-models/system/evaluation/translation/translation-romanian-to-english.ipynb
@@ -211,6 +211,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
    "outputs": [],
    "source": [
     "models = []\n",
@@ -219,21 +222,18 @@
     "    reg_model = list(registry_ml_client.models.list(name=model[\"name\"]))[0]\n",
     "    print(reg_model.id)\n",
     "    models.append({**model, \"version\": reg_model.version})"
-   ],
-   "metadata": {
-    "collapsed": false
-   }
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
    "outputs": [],
    "source": [
     "models"
-   ],
-   "metadata": {
-    "collapsed": false
-   }
+   ]
   },
   {
    "attachments": {},
@@ -401,6 +401,10 @@
     "    # don't reuse cached results from previous jobs\n",
     "    pipeline_object.settings.force_rerun = True\n",
     "    pipeline_object.settings.default_compute = compute_cluster\n",
+    "\n",
+    "    # set continue on step failure to False\n",
+    "    pipeline_object.settings.continue_on_step_failure = False\n",
+    "\n",
     "    pipeline_object.display_name = f\"eval-{model['name']}-{timestamp}\"\n",
     "    pipeline_job = workspace_ml_client.jobs.create_or_update(\n",
     "        pipeline_object, experiment_name=experiment_name\n",

diff --git a/sdk/python/foundation-models/system/finetune/question-answering/extractive-qa.ipynb b/sdk/python/foundation-models/system/finetune/question-answering/extractive-qa.ipynb
@@ -319,7 +319,10 @@
     "pipeline_object = create_pipeline()\n",
     "\n",
     "# don't use cached results from previous jobs\n",
-    "pipeline_object.settings.force_rerun = True"
+    "pipeline_object.settings.force_rerun = True\n",
+    "\n",
+    "# set continue on step failure to False\n",
+    "pipeline_object.settings.continue_on_step_failure = False"
    ]
   },
   {

diff --git a/sdk/python/foundation-models/system/finetune/summarization/news-summary.ipynb b/sdk/python/foundation-models/system/finetune/summarization/news-summary.ipynb
@@ -311,7 +311,10 @@
     "pipeline_object = create_pipeline()\n",
     "\n",
     "# don't use cached results from previous jobs\n",
-    "pipeline_object.settings.force_rerun = True"
+    "pipeline_object.settings.force_rerun = True\n",
+    "\n",
+    "# set continue on step failure to False\n",
+    "pipeline_object.settings.continue_on_step_failure = False"
    ]
   },
   {

diff --git a/sdk/python/foundation-models/system/finetune/text-classification/emotion-detection.ipynb b/sdk/python/foundation-models/system/finetune/text-classification/emotion-detection.ipynb
@@ -135,7 +135,7 @@
     "    )\n",
     "    gpus_per_node = 1\n",
     "\n",
-    "# genrating a unique timestamp that can be used for names and versions that need to be unique\n",
+    "# generating a unique timestamp that can be used for names and versions that need to be unique\n",
     "timestamp = str(int(time.time()))"
    ]
   },
@@ -338,7 +338,10 @@
     "pipeline_object = create_pipeline()\n",
     "\n",
     "# don't use cached results from previous jobs\n",
-    "pipeline_object.settings.force_rerun = True"
+    "pipeline_object.settings.force_rerun = True\n",
+    "\n",
+    "# set continue on step failure to False\n",
+    "pipeline_object.settings.continue_on_step_failure = False"
    ]
   },
   {

diff --git a/sdk/python/foundation-models/system/finetune/token-classification/token-classification.ipynb b/sdk/python/foundation-models/system/finetune/token-classification/token-classification.ipynb
@@ -314,7 +314,10 @@
     "pipeline_object = create_pipeline()\n",
     "\n",
     "# don't use cached results from previous jobs\n",
-    "pipeline_object.settings.force_rerun = True"
+    "pipeline_object.settings.force_rerun = True\n",
+    "\n",
+    "# set continue on step failure to False\n",
+    "pipeline_object.settings.continue_on_step_failure = False"
    ]
   },
   {

diff --git a/sdk/python/foundation-models/system/finetune/translation/translation.ipynb b/sdk/python/foundation-models/system/finetune/translation/translation.ipynb
@@ -306,7 +306,10 @@
     "pipeline_object = create_pipeline()\n",
     "\n",
     "# don't use cached results from previous jobs\n",
-    "pipeline_object.settings.force_rerun = True"
+    "pipeline_object.settings.force_rerun = True\n",
+    "\n",
+    "# set continue on step failure to False\n",
+    "pipeline_object.settings.continue_on_step_failure = False"
    ]
   },
   {