[Refactor:SubmittyUtils] Cleanup dateutils.write_submitty_date to be …

…more consistent (Submitty#5545) This updates the write_submitty_date function to be a bit more consistent and with less potential foot-guns. First, instead of printing an error to console, but still returning if given an invalid type, this function will now throw an exception, so that if someone were to somehow get an integer into this function, we should not return an integer back and have some other crash later in an unrelated system. Scanning through the codebase, we always use this function with datetimes at the moment at least. Second, remove the space between the milliseconds and timezone if using milliseconds to match the format of the non-milliseconds datetime string, as well as just generally where someone would expect the "-0400" string. Third, the optional parameter to add milliseconds to the datetime string was named "microseconds", and so it was renamed to "milliseconds" to be consistent with what it was actually doing. Signed-off-by: Matthew Peveler <[email protected]>
ziesski · Jun 30, 2020 · 6b99ba8 · 6b99ba8
1 parent fe8b5fa
commit 6b99ba8
Show file tree

Hide file tree

Showing 5 changed files with 127 additions and 44 deletions.
diff --git a/bin/make_generated_output.py b/bin/make_generated_output.py
@@ -2,7 +2,7 @@
 
 """
 # USAGE
-# make_generated_output.py   <path to config file for gradeable>   <assignment>   <semester>  <course> 
+# make_generated_output.py   <path to config file for gradeable>   <assignment>   <semester>  <course>
 """
 
 import argparse
@@ -30,7 +30,7 @@ def main():
                                             'config',
                                             'complete_config',
                                             'complete_config_' + args.assignment + '.json')
-                                            
+
     if os.path.isfile(complete_config_json_path):
         with open(complete_config_json_path,'r', encoding='utf-8') as infile:
             config_file=json.load(infile)
@@ -44,15 +44,15 @@ def main():
         "course": args.course,
         "gradeable": args.assignment,
         "required_capabilities": required_capabilities,
-        "queue_time": dateutils.write_submitty_date(microseconds=True),
+        "queue_time": dateutils.write_submitty_date(milliseconds=True),
         "generate_output": True,
         "max_possible_grading_time" : -1,
         "who" : "build",
         "regrade" : False,
     }
 
     should_generated_output = False
-    for testcase in testcases:  
+    for testcase in testcases:
         input_generation_commands = testcase.get('input_generation_commands',[])
         solution_containers = testcase.get('solution_containers',[])
         should_generate_solution = False
@@ -63,14 +63,14 @@ def main():
 
         if should_generate_solution and not input_generation_commands:
             path_grading_file = os.path.join(SUBMITTY_DATA_DIR, "to_be_graded_queue", "__".join([args.semester, args.course, args.assignment]))
-            
+
             if os.path.isfile(path_grading_file):
                 os.remove(path_grading_file)
-            
+
             with open(path_grading_file, 'w') as grading_file:
                 json.dump(graded_file, grading_file,sort_keys=True,indent=4)
             print("Starting to build generated output")
             break
 
 if __name__ == "__main__":
-    main()
+    main()
diff --git a/python_submitty_utils/submitty_utils/dateutils.py b/python_submitty_utils/submitty_utils/dateutils.py
@@ -41,31 +41,29 @@ def get_current_time():
     return datetime.now(get_timezone())
 
 
-def write_submitty_date(d=None, microseconds=False):
+def write_submitty_date(d=None, milliseconds=False):
     """
     Converts a datetime object to a string with a timezone. If the datetime object
     does not have a timezone, it'll use the server's timezone.
 
     :param d: datetime object you want to convert to string
+    :param milliseconds: add milliseconds to returned string
     :return:
-
-    FIXME: We should not be printing anything out here
     """
     if d is None:
         d = get_current_time()
     if not isinstance(d, datetime):
-        print("ERROR:  ", d, " is not a datetime object, it is of type ", type(d))
-        return d
-    my_timezone = d.tzinfo
-    if my_timezone is None:
-        print("ERROR: NO TIMEZONE ", d, " assuming local timezone")
-        my_timezone = get_timezone()
-        d = my_timezone.localize(d)
-    answer = d.strftime("%Y-%m-%d %H:%M:%S%z")
-    if microseconds:
-        mlsec = d.strftime("%f")
-        mlsec = mlsec[0:3]
-        answer = d.strftime("%Y-%m-%d %H:%M:%S.{} %z".format(mlsec))
+        raise TypeError(
+            f"Invalid type. Expected datetime or datetime string, got {type(d)}."
+        )
+    if d.tzinfo is None:
+        d = get_timezone().localize(d)
+
+    if milliseconds:
+        mlsec = d.strftime("%f")[0:3]
+        answer = d.strftime(f"%Y-%m-%d %H:%M:%S.{mlsec}%z")
+    else:
+        answer = d.strftime("%Y-%m-%d %H:%M:%S%z")
     return answer
 
 

diff --git a/python_submitty_utils/tests/test_dateutils.py b/python_submitty_utils/tests/test_dateutils.py
@@ -8,6 +8,90 @@
 
 
 class TestDateUtils(TestCase):
+    @patch(
+        "submitty_utils.dateutils.get_current_time",
+        return_value=pytz_timezone('America/New_York').localize(datetime(
+            2016, 10, 14, 22, 11, 32, 0
+        ))
+    )
+    def test_write_submitty_date_default(self, current_time):
+        date = dateutils.write_submitty_date()
+        self.assertTrue(current_time.called)
+        self.assertEqual('2016-10-14 22:11:32-0400', date)
+
+    @patch(
+        "submitty_utils.dateutils.get_timezone",
+        return_value=pytz_timezone('America/New_York')
+    )
+    def test_write_submitty_date(self, get_timezone):
+        testcases = (
+            (
+                datetime(2020, 6, 12, 3, 21, 30, tzinfo=pytz_timezone('UTC')),
+                '2020-06-12 03:21:30+0000'
+            ),
+            (
+                datetime(2020, 12, 25, 3, 21, 30, tzinfo=pytz_timezone('UTC')),
+                '2020-12-25 03:21:30+0000'
+            ),
+            (
+                datetime(2020, 6, 12, 3, 21, 30, 123, tzinfo=pytz_timezone('UTC')),
+                '2020-06-12 03:21:30+0000'
+            ),
+            (
+                datetime(2020, 6, 12, 3, 21, 30),
+                '2020-06-12 03:21:30-0400'
+            ),
+            (
+                datetime(2020, 12, 12, 3, 21, 30),
+                '2020-12-12 03:21:30-0500'
+            )
+        )
+        for testcase in testcases:
+            with self.subTest(i=testcase[0]):
+                self.assertEqual(
+                    testcase[1],
+                    dateutils.write_submitty_date(testcase[0])
+                )
+
+
+    @patch(
+        "submitty_utils.dateutils.get_timezone",
+        return_value=pytz_timezone('America/New_York')
+    )
+    def test_write_submitty_date_microseconds(self, get_timezone):
+        testcases = (
+            (
+                datetime(2020, 6, 12, 3, 21, 30, tzinfo=pytz_timezone('UTC')),
+                '2020-06-12 03:21:30.000+0000'
+            ),
+            (
+                datetime(2020, 6, 12, 3, 21, 30, 123500, tzinfo=pytz_timezone('UTC')),
+                '2020-06-12 03:21:30.123+0000'
+            ),
+            (
+                datetime(2020, 6, 12, 3, 21, 30, 211500),
+                '2020-06-12 03:21:30.211-0400'
+            ),
+        )
+        for testcase in testcases:
+            with self.subTest(i=testcase[0]):
+                self.assertEqual(
+                    testcase[1],
+                    dateutils.write_submitty_date(testcase[0], True)
+                )
+
+    def test_invalid_type_write_submitty_date(self):
+        testcases = ('2020-06-12 03:21:30.123+0000', 10)
+        for testcase in testcases:
+            with self.subTest(testcase):
+                with self.assertRaises(TypeError) as cm:
+                    dateutils.write_submitty_date(10)
+                self.assertEqual(
+                    "Invalid type. Expected datetime or datetime string,"
+                    " got <class 'int'>.",
+                    str(cm.exception)
+                )
+
     @patch(
         "submitty_utils.dateutils.get_current_time",
         return_value=pytz_timezone('America/New_York').localize(datetime(

diff --git a/sbin/submitty_autograding_shipper.py b/sbin/submitty_autograding_shipper.py
@@ -219,8 +219,9 @@ def prepare_job(my_name,which_machine,which_untrusted,next_directory,next_to_gra
             queue_obj = json.load(infile)
             queue_obj["which_untrusted"] = which_untrusted
             queue_obj["which_machine"] = which_machine
-            queue_obj["ship_time"] = dateutils.write_submitty_date(microseconds=True)
+            queue_obj["ship_time"] = dateutils.write_submitty_date(milliseconds=True)
             queue_obj['identifier'] = random_identifier
+
     except Exception as e:
         autograding_utils.log_stack_trace(AUTOGRADING_STACKTRACE_PATH, job_id=JOB_ID, trace=traceback.format_exc())
         autograding_utils.log_message(AUTOGRADING_LOG_PATH, JOB_ID, message="ERROR: failed preparing submission zip or accessing next to grade "+str(e))
@@ -420,7 +421,7 @@ def grade_queue_file(my_name, which_machine,which_untrusted,queue_file):
         return
 
     #TODO: break which_machine into id, address, and passphrase.
-    
+
     try:
         # prepare the job
         shipper_counter=0
@@ -430,7 +431,7 @@ def grade_queue_file(my_name, which_machine,which_untrusted,queue_file):
             time.sleep(5)
 
         prep_job_success = True
-        
+
         if not prep_job_success:
             print (my_name, " ERROR unable to prepare job: ", queue_file)
             autograding_utils.log_message(AUTOGRADING_LOG_PATH, JOB_ID, message=str(my_name)+" ERROR unable to prepare job: " + queue_file)
@@ -662,15 +663,15 @@ def get_job(my_name,which_machine,my_capabilities,which_untrusted):
     Our first priority is to perform any awaiting VCS checkouts
 
     Note: This design is imperfect:
-    
+
         * If all shippers are busy working on long-running autograding
         tasks there will be a delay of seconds or minutes between
         a student pressing the submission button and clone happening.
         This is a minor exploit allowing them to theoretically
         continue working on their submission past the deadline for
         the time period of the delay.
         -- This is not a significant, practical problem.
-    
+
         * If multiple and/or large git submissions arrive close
         together, this shipper job will be tied up performing these
         clone operations.  Because we don't release the lock, any
@@ -680,11 +681,11 @@ def get_job(my_name,which_machine,my_capabilities,which_untrusted):
         -- Based on experience with actual submission patterns, we
             do not anticipate that this will be a significant
             bottleneck at this time.
-    
+
         * If a git clone takes a very long time and/or hangs because of
         network problems, this could halt all work on the server.
         -- We'll need to monitor the production server.
-    
+
     We plan to do a complete overhaul of the
     scheduler/shipper/worker and refactoring this design should be
     part of the project.
@@ -848,7 +849,7 @@ def can_short_circuit(config_obj: str) -> bool:
 def check_submission_limit_penalty_inline(config_obj: dict,
                                           queue_obj: dict) -> dict:
     """Check if a submission violates the submission limit.
-    
+
     Note that this function makes the assumption that the file being graded is
     short-circuitable (i.e. only has one test case; the sentinel "submission
     limit" test case).
@@ -873,7 +874,7 @@ def check_submission_limit_penalty_inline(config_obj: dict,
     points = floor(excessive * penalty)
     points = max(points, possible_points)
     view_testcase = points != 0
-    
+
     return {
         'test_name': f"Test 1 {testcase['title']}",
         'view_testcase': view_testcase,
@@ -987,7 +988,7 @@ def try_short_circuit(queue_file: str) -> bool:
 
     base_dir = tempfile.mkdtemp()
 
-    results_dir = os.path.join(base_dir, 'TMP_RESULTS')    
+    results_dir = os.path.join(base_dir, 'TMP_RESULTS')
     results_json_path = os.path.join(results_dir, 'results.json')
     grade_txt_path = os.path.join(results_dir, 'grade.txt')
     queue_file_json_path = os.path.join(results_dir, 'queue_file.json')
@@ -1094,7 +1095,7 @@ def write_grading_outputs(testcases: list,
         for r, tc in zip(testcase_outputs, testcases)
         if not tc.get('hidden', False)
     )
-    
+
     with open(grade_txt, 'w') as fd:
         # Write each test case's individual output
         for i, tc in enumerate(testcases):
@@ -1116,11 +1117,11 @@ def write_grading_outputs(testcases: list,
                     fd.write(' ' * 10)
             else:
                 fd.write(f"{points:3} / {max_points:3}  ")
-            
+
             if tc.get('hidden', False):
                 fd.write("  [ HIDDEN ]")
             fd.write('\n')
-        
+
         # Write the final lines
         autograde_total_msg = f"{'Automatic grading total:':<64}{auto_points:3} /{max_auto_points:3}\n"
         fd.write(autograde_total_msg)
@@ -1175,7 +1176,7 @@ def launch_shippers(worker_status_map):
     processes = list()
     for name, machine in autograding_workers.items():
         thread_count = machine["num_autograding_workers"]
-        
+
         # Cleanup previous in-progress submissions
         worker_folders = [worker_folder(f'{name}_{i}') for i in range(thread_count)]
         for folder in worker_folders:
@@ -1241,16 +1242,16 @@ def launch_shippers(worker_status_map):
             idle_workers = list(filter(
                 lambda n: len(os.listdir(worker_folder(n))) == 0,
                 workers))
-            jobs = filter(os.path.isfile, 
-                map(lambda f: os.path.join(INTERACTIVE_QUEUE, f), 
+            jobs = filter(os.path.isfile,
+                map(lambda f: os.path.join(INTERACTIVE_QUEUE, f),
                     os.listdir(INTERACTIVE_QUEUE)))
-            
+
             # Distribute available jobs randomly among workers currently idle.
             for job in jobs:
                 if len(idle_workers) == 0:
                     break
                 dest = random.choice(idle_workers)
-                autograding_utils.log_message(AUTOGRADING_LOG_PATH, JOB_ID, 
+                autograding_utils.log_message(AUTOGRADING_LOG_PATH, JOB_ID,
                     message=f"Pushing job {os.path.basename(job)} to {dest}.")
                 shutil.move(job, worker_folder(dest))
                 idle_workers.remove(dest)

diff --git a/sbin/submitty_autograding_worker.py b/sbin/submitty_autograding_worker.py
@@ -70,9 +70,9 @@ def worker_process(which_machine,address,which_untrusted,my_server):
                 os.remove(results_zip_tmp)
                 with open(todo_queue_file, 'r') as infile:
                     queue_obj = json.load(infile)
-                    queue_obj["done_time"]=dateutils.write_submitty_date(microseconds=True)
+                    queue_obj["done_time"]=dateutils.write_submitty_date(milliseconds=True)
                 with open(done_queue_file, 'w') as outfile:
-                    json.dump(queue_obj, outfile, sort_keys=True, indent=4)        
+                    json.dump(queue_obj, outfile, sort_keys=True, indent=4)
             except Exception as e:
                 autograding_utils.log_message(AUTOGRADING_LOG_PATH, JOB_ID, message="ERROR attempting to unzip graded item: " + which_machine + " " + which_untrusted + ". for more details, see traces entry.")
                 autograding_utils.log_stack_trace(AUTOGRADING_STACKTRACE_PATH, JOB_ID,trace=traceback.format_exc())
@@ -95,7 +95,7 @@ def worker_process(which_machine,address,which_untrusted,my_server):
                 done_queue_file = os.path.join(SUBMITTY_DATA_DIR,"autograding_DONE",servername_workername+"_"+which_untrusted+"_queue.json")
                 with open(todo_queue_file, 'r') as infile:
                     queue_obj = json.load(infile)
-                    queue_obj["done_time"]=dateutils.write_submitty_date(microseconds=True)
+                    queue_obj["done_time"]=dateutils.write_submitty_date(milliseconds=True)
                 with open(done_queue_file, 'w') as outfile:
                     json.dump(queue_obj, outfile, sort_keys=True, indent=4)
             finally:
@@ -114,7 +114,7 @@ def worker_process(which_machine,address,which_untrusted,my_server):
             counter += 1
             time.sleep(1)
 
-                
+
 # ==================================================================================
 # ==================================================================================
 def launch_workers(my_name, my_stats):