From 777d02c0fa2076e690e91e8f0ccfab40331edda3 Mon Sep 17 00:00:00 2001
From: axif <muhamadasif570@gmail.com>
Date: Mon, 6 Jan 2025 00:26:26 +0600
Subject: [PATCH 1/2] Update get cmd for 'all' when retrieving all data-types.

---
 src/scribe_data/cli/get.py             |  20 ++--
 src/scribe_data/wikidata/query_data.py |   4 +-
 tests/cli/test_get.py                  | 148 ++++++++++++++++++-------
 3 files changed, 119 insertions(+), 53 deletions(-)

diff --git a/src/scribe_data/cli/get.py b/src/scribe_data/cli/get.py
index 3e2f3897..6c4fbbcc 100644
--- a/src/scribe_data/cli/get.py
+++ b/src/scribe_data/cli/get.py
@@ -139,7 +139,7 @@ def prompt_user_download_all():
         elif data_type:
             if prompt_user_download_all():
                 parse_wd_lexeme_dump(
-                    language=None,
+                    language="all",
                     wikidata_dump_type=["form"],
                     data_types=[data_type],
                     type_output_dir=output_dir,
@@ -208,27 +208,27 @@ def prompt_user_download_all():
             f"Updating data for language(s): {language.title()}; data type(s): {data_type.capitalize()}"
         )
         existing_files = list(Path(output_dir).glob(f"{language}/{data_type}.json"))
-        if existing_files:
+        if existing_files and not overwrite:
             print(
                 f"Existing file(s) found for {language.title()} and {data_type.capitalize()} in the {output_dir} directory."
             )
             for idx, file in enumerate(existing_files, start=1):
                 print(f"{idx}. {file.name}")
 
-            print("\nChoose an option:")
-            print("1. Overwrite existing data (press 'o')")
-            print("2. Skip process (press anything else)")
-            user_choice = input("Enter your choice: ").strip().lower()
+            user_choice = questionary.confirm(
+                "Overwrite existing data?", default=False
+            ).ask()
 
-            if user_choice == "o":
+            if user_choice:
                 print("Overwrite chosen. Removing existing files...")
                 for file in existing_files:
-                    file.unlink()
+                    if file.exists():  # Check if the file exists before unlinking
+                        file.unlink()
             else:
                 print(f"Skipping update for {language.title()} {data_type}.")
                 return {"success": False, "skipped": True}
 
-        query_result = query_data(
+        query_data(
             languages=[language_or_sub_language],
             data_type=data_types,
             output_dir=output_dir,
@@ -236,7 +236,7 @@ def prompt_user_download_all():
             interactive=interactive,
         )
 
-        if not all_bool and not query_result.get("skipped", False):
+        if not all_bool:
             print(f"Updated data was saved in: {Path(output_dir).resolve()}.")
 
     else:
diff --git a/src/scribe_data/wikidata/query_data.py b/src/scribe_data/wikidata/query_data.py
index bbe7c7b5..27f1b4b8 100644
--- a/src/scribe_data/wikidata/query_data.py
+++ b/src/scribe_data/wikidata/query_data.py
@@ -216,7 +216,7 @@ def query_data(
 
                     else:
                         print(f"Skipping update for {lang.title()} {target_type}.")
-                        return {"success": False, "skipped": True}
+                        # return {"success": False, "skipped": True}
 
         print(f"Querying and formatting {lang.title()} {target_type}")
 
@@ -342,7 +342,7 @@ def query_data(
             print(
                 f"Successfully queried and formatted data for {lang.title()} {target_type}."
             )
-            return {"success": True, "skipped": False}
+            # return {"success": True, "skipped": False}
 
 
 # if __name__ == "__main__":
diff --git a/tests/cli/test_get.py b/tests/cli/test_get.py
index 914fbe9e..9de2ef20 100644
--- a/tests/cli/test_get.py
+++ b/tests/cli/test_get.py
@@ -62,37 +62,46 @@ def test_invalid_arguments(self):
 
     # MARK: All Data
 
-    # @patch("scribe_data.cli.get.query_data")
-    # @patch("scribe_data.cli.get.prompt_user_download_all", return_value=False)
-    # def test_get_all_data_types_for_language(self, mock_prompt, mock_query_data):
-    #     """
-    #     Test retrieving all data types for a specific language.
-
-    #     Ensures that `query_data` is called properly when `--all` flag is used with a language.
-    #     """
-    #     get_data(all_bool=True, language="English")
-    #     mock_query_data.assert_called_once_with(
-    #         languages=["English"],
-    #         data_type=None,
-    #         output_dir="scribe_data_json_export",
-    #         overwrite=False,
-    #     )
-
-    # @patch("scribe_data.cli.get.query_data")
-    # @patch("scribe_data.cli.get.prompt_user_download_all", return_value=False)
-    # def test_get_all_languages_for_data_type(self, mock_prompt, mock_query_data):
-    #     """
-    #     Test retrieving all languages for a specific data type.
-
-    #     Ensures that `query_data` is called properly when `--all` flag is used with a data type.
-    #     """
-    #     get_data(all_bool=True, data_type="nouns")
-    #     mock_query_data.assert_called_once_with(
-    #         languages=None,
-    #         data_type=["nouns"],
-    #         output_dir="scribe_data_json_export",
-    #         overwrite=False,
-    #     )
+    @patch("scribe_data.cli.get.query_data")
+    @patch("scribe_data.cli.get.parse_wd_lexeme_dump")
+    @patch("scribe_data.cli.get.questionary.confirm")
+    def test_get_all_data_types_for_language_user_says_yes(
+        self, mock_questionary_confirm, mock_parse, mock_query_data
+    ):
+        """
+        Test the behavior when the user agrees to query Wikidata directly.
+
+        This test checks that `parse_wd_lexeme_dump` is called with the correct parameters
+        when the user confirms they want to query Wikidata.
+        """
+        mock_questionary_confirm.return_value.ask.return_value = True
+
+        get_data(all_bool=True, language="English")
+
+        mock_parse.assert_called_once_with(
+            language="English",
+            wikidata_dump_type=["form"],
+            data_types=None,  # because data_types = [data_type] if provided else None
+            type_output_dir="scribe_data_json_export",  # default for JSON
+        )
+        mock_query_data.assert_not_called()
+
+    @patch("scribe_data.cli.get.parse_wd_lexeme_dump")
+    def test_get_all_languages_and_data_types(self, mock_parse):
+        """
+        Test retrieving all languages for a specific data type.
+
+        Ensures that `query_data` is called properly when `--all` flag is used with a data type.
+        """
+        get_data(all_bool=True)
+
+        mock_parse.assert_called_once_with(
+            language="all",
+            wikidata_dump_type=["form", "translations"],
+            data_types="all",
+            type_output_dir="scribe_data_json_export",
+            wikidata_dump_path=None,
+        )
 
     # MARK: Language and Data Type
 
@@ -211,38 +220,47 @@ def test_get_data_with_overwrite_false(self, mock_query_data):
     # MARK : User Chooses to skip
 
     @patch("scribe_data.cli.get.query_data")
-    @patch("scribe_data.cli.get.Path.glob")
-    @patch("builtins.input", return_value="s")
-    def test_user_skips_existing_file(self, mock_input, mock_glob, mock_query_data):
+    @patch(
+        "scribe_data.cli.get.Path.glob",
+        return_value=[Path("./test_output/English/nouns.json")],
+    )
+    @patch("scribe_data.cli.get.questionary.confirm")
+    def test_user_skips_existing_file(
+        self, mock_questionary_confirm, mock_glob, mock_query_data
+    ):
         """
         Test the behavior when the user chooses to skip an existing file.
 
         Ensures that the file is not overwritten and the function returns the correct result.
         """
-        mock_glob.return_value = [Path("./test_output/English/nouns.json")]
+        mock_questionary_confirm.return_value.ask.return_value = False
         result = get_data(
             language="English", data_type="nouns", output_dir="./test_output"
         )
+
+        # Validate the skip result
         self.assertEqual(result, {"success": False, "skipped": True})
         mock_query_data.assert_not_called()
 
     # MARK : User Chooses to overwrite
 
     @patch("scribe_data.cli.get.query_data")
-    @patch("scribe_data.cli.get.Path.glob")
-    @patch("builtins.input", return_value="o")
-    @patch("scribe_data.cli.get.Path.unlink")
+    @patch(
+        "scribe_data.cli.get.Path.glob",
+        return_value=[Path("./test_output/English/nouns.json")],
+    )
+    @patch("scribe_data.cli.get.questionary.confirm")
     def test_user_overwrites_existing_file(
-        self, mock_unlink, mock_input, mock_glob, mock_query_data
+        self, mock_questionary_confirm, mock_glob, mock_query_data
     ):
         """
         Test the behavior when the user chooses to overwrite an existing file.
 
         Ensures that the file is overwritten and the function returns the correct result.
         """
-        mock_glob.return_value = [Path("./test_output/English/nouns.json")]
+        mock_questionary_confirm.return_value.ask.return_value = True
         get_data(language="English", data_type="nouns", output_dir="./test_output")
-        mock_unlink.assert_called_once_with()
+
         mock_query_data.assert_called_once_with(
             languages=["English"],
             data_type=["nouns"],
@@ -250,3 +268,51 @@ def test_user_overwrites_existing_file(
             overwrite=False,
             interactive=False,
         )
+
+    # MARK: Translations
+    @patch("scribe_data.cli.get.parse_wd_lexeme_dump")
+    def test_get_translations_no_language_specified(self, mock_parse):
+        """
+        Test behavior when no language is specified for 'translations'.
+        Expect language="all".
+        """
+        get_data(data_type="translations")
+        mock_parse.assert_called_once_with(
+            language="all",
+            wikidata_dump_type=["translations"],
+            type_output_dir="scribe_data_json_export",  # default output dir for JSON
+            wikidata_dump_path=None,
+        )
+
+    @patch("scribe_data.cli.get.parse_wd_lexeme_dump")
+    def test_get_translations_with_specific_language(self, mock_parse):
+        """
+        Test behavior when a specific language is provided for 'translations'.
+        Expect parse_wd_lexeme_dump to be called with that language.
+        """
+        get_data(
+            language="Spanish", data_type="translations", output_dir="./test_output"
+        )
+        mock_parse.assert_called_once_with(
+            language="Spanish",
+            wikidata_dump_type=["translations"],
+            type_output_dir="./test_output",
+            wikidata_dump_path=None,
+        )
+
+    @patch("scribe_data.cli.get.parse_wd_lexeme_dump")
+    def test_get_translations_with_dump(self, mock_parse):
+        """
+        Test behavior when a Wikidata dump path is specified for 'translations'.
+        Even with a language, it should call parse_wd_lexeme_dump
+        passing that dump path.
+        """
+        get_data(
+            language="German", data_type="translations", wikidata_dump="./wikidump.json"
+        )
+        mock_parse.assert_called_once_with(
+            language="German",
+            wikidata_dump_type=["translations"],
+            type_output_dir="scribe_data_json_export",  # default for JSON
+            wikidata_dump_path="./wikidump.json",
+        )

From 248a56f39ca753e7ec3b4ecad2fc691af0c94037 Mon Sep 17 00:00:00 2001
From: Andrew Tavis McAllister <andrew.t.mcallister@gmail.com>
Date: Tue, 7 Jan 2025 00:54:53 +0100
Subject: [PATCH 2/2] Minor changes and fix/test total command for QIDs

---
 src/scribe_data/cli/get.py             |  2 +-
 src/scribe_data/cli/total.py           | 17 ++++++++++++-----
 src/scribe_data/utils.py               | 10 +++++-----
 src/scribe_data/wikidata/query_data.py |  7 -------
 tests/cli/test_get.py                  |  7 ++++---
 tests/load/test_update_utils.py        | 15 +++++++++++++--
 6 files changed, 35 insertions(+), 23 deletions(-)

diff --git a/src/scribe_data/cli/get.py b/src/scribe_data/cli/get.py
index 6c4fbbcc..cbf94f0c 100644
--- a/src/scribe_data/cli/get.py
+++ b/src/scribe_data/cli/get.py
@@ -222,7 +222,7 @@ def prompt_user_download_all():
             if user_choice:
                 print("Overwrite chosen. Removing existing files...")
                 for file in existing_files:
-                    if file.exists():  # Check if the file exists before unlinking
+                    if file.exists():  # check if the file exists before unlinking
                         file.unlink()
             else:
                 print(f"Skipping update for {language.title()} {data_type}.")
diff --git a/src/scribe_data/cli/total.py b/src/scribe_data/cli/total.py
index 8d86d7fe..1359c2e6 100644
--- a/src/scribe_data/cli/total.py
+++ b/src/scribe_data/cli/total.py
@@ -268,15 +268,22 @@ def get_total_lexemes(language, data_type, do_print=True):
     str
         A formatted string indicating the language, data type and total number of lexemes, if found.
     """
-
-    if language is not None and language.startswith("Q") and language[1:].isdigit():
-        language_qid = language
+    if (
+        language is not None
+        and (language.startswith("Q") or language.startswith("q"))
+        and language[1:].isdigit()
+    ):
+        language_qid = language.capitalize()
 
     else:
         language_qid = get_qid_by_input(language)
 
-    if data_type is not None and data_type.startswith("Q") and data_type[1:].isdigit():
-        data_type_qid = data_type
+    if (
+        data_type is not None
+        and (data_type.startswith("Q") or data_type.startswith("q"))
+        and data_type[1:].isdigit()
+    ):
+        data_type_qid = data_type.capitalize()
 
     else:
         data_type_qid = get_qid_by_input(data_type)
diff --git a/src/scribe_data/utils.py b/src/scribe_data/utils.py
index aab0d941..b1934a6d 100644
--- a/src/scribe_data/utils.py
+++ b/src/scribe_data/utils.py
@@ -123,11 +123,8 @@ def _load_json(package_path: str, file_name: str) -> Any:
     -------
     A python entity representing the JSON content.
     """
-    with (
-        resources.files(package_path)
-        .joinpath(file_name)
-        .open(encoding="utf-8") as in_stream
-    ):
+    json_file = resources.files(package_path).joinpath(file_name)
+    with json_file.open(encoding="utf-8") as in_stream:
         return json.load(in_stream)
 
 
@@ -547,6 +544,9 @@ def format_sublanguage_name(lang, language_metadata=_languages):
     > format_sublanguage_name("english", language_metadata)
     'English'
     """
+    if (lang.startswith("Q") or lang.startswith("q")) and lang[1:].isdigit():
+        return lang
+
     for main_lang, lang_data in language_metadata.items():
         # If it's not a sub-language, return the original name.
         if main_lang == lang:
diff --git a/src/scribe_data/wikidata/query_data.py b/src/scribe_data/wikidata/query_data.py
index 27f1b4b8..7476bc61 100644
--- a/src/scribe_data/wikidata/query_data.py
+++ b/src/scribe_data/wikidata/query_data.py
@@ -166,7 +166,6 @@ def query_data(
 
     # MARK: Run Queries
 
-    # Run queries and format data.
     for q in tqdm(
         queries_to_run,
         desc="Data updated",
@@ -216,7 +215,6 @@ def query_data(
 
                     else:
                         print(f"Skipping update for {lang.title()} {target_type}.")
-                        # return {"success": False, "skipped": True}
 
         print(f"Querying and formatting {lang.title()} {target_type}")
 
@@ -342,8 +340,3 @@ def query_data(
             print(
                 f"Successfully queried and formatted data for {lang.title()} {target_type}."
             )
-            # return {"success": True, "skipped": False}
-
-
-# if __name__ == "__main__":
-#     query_data()
diff --git a/tests/cli/test_get.py b/tests/cli/test_get.py
index 9de2ef20..9e1fdbfb 100644
--- a/tests/cli/test_get.py
+++ b/tests/cli/test_get.py
@@ -217,7 +217,7 @@ def test_get_data_with_overwrite_false(self, mock_query_data):
             interactive=False,
         )
 
-    # MARK : User Chooses to skip
+    # MARK: User Chooses Skip
 
     @patch("scribe_data.cli.get.query_data")
     @patch(
@@ -238,11 +238,11 @@ def test_user_skips_existing_file(
             language="English", data_type="nouns", output_dir="./test_output"
         )
 
-        # Validate the skip result
+        # Validate the skip result.
         self.assertEqual(result, {"success": False, "skipped": True})
         mock_query_data.assert_not_called()
 
-    # MARK : User Chooses to overwrite
+    # MARK: User Chooses Overwrite
 
     @patch("scribe_data.cli.get.query_data")
     @patch(
@@ -270,6 +270,7 @@ def test_user_overwrites_existing_file(
         )
 
     # MARK: Translations
+
     @patch("scribe_data.cli.get.parse_wd_lexeme_dump")
     def test_get_translations_no_language_specified(self, mock_parse):
         """
diff --git a/tests/load/test_update_utils.py b/tests/load/test_update_utils.py
index 64c55835..bd8813ef 100644
--- a/tests/load/test_update_utils.py
+++ b/tests/load/test_update_utils.py
@@ -123,11 +123,22 @@ def test_format_sublanguage_name_positive(lang, expected_output):
     assert utils.format_sublanguage_name(lang) == expected_output
 
 
+@pytest.mark.parametrize(
+    "lang, expected_output",
+    [
+        ("Q42", "Q42"),  # test that any QID is returned
+        ("Q1860", "Q1860"),
+    ],
+)
+def test_format_sublanguage_name_qid_positive(lang, expected_output):
+    assert utils.format_sublanguage_name(lang) == expected_output
+
+
 def test_format_sublanguage_name_negative():
     with pytest.raises(ValueError) as excp:
-        _ = utils.format_sublanguage_name("Silence")
+        _ = utils.format_sublanguage_name("Newspeak")
 
-    assert str(excp.value) == "Silence is not a valid language or sub-language."
+    assert str(excp.value) == "Newspeak is not a valid language or sub-language."
 
 
 def test_list_all_languages():