scribe-org · andrewtavis · Jan 4, 2025 · Dec 22, 2024 · Dec 25, 2024 · Dec 26, 2024
diff --git a/requirements.txt b/requirements.txt
@@ -18,3 +18,4 @@ ruff>=0.3.3
 SPARQLWrapper>=2.0.0
 sphinx-rtd-theme>=3.0.0
 tqdm==4.66.4
+orjson>=3.10.12
diff --git a/src/scribe_data/cli/download.py b/src/scribe_data/cli/download.py
@@ -30,6 +30,7 @@
 import requests
 from rich import print as rprint
 from tqdm import tqdm
+import questionary
 
 from scribe_data.utils import DEFAULT_DUMP_EXPORT_DIR, check_lexeme_dump_prompt_download
 
@@ -244,16 +245,12 @@ def wd_lexeme_dump_download_wrapper(
         filename = dump_url.split("/")[-1]
         output_path = str(Path(output_dir) / filename)
 
-        user_response = (
-            input(
-                "We'll be using the Wikidata lexeme dump from dumps.wikimedia.org/wikidatawiki/entities."
-                "\nDo you want to proceed? (y/n): "
-            )
-            .strip()
-            .lower()
-        )
+        user_response = questionary.confirm(
+            "We'll be using the Wikidata lexeme dump from dumps.wikimedia.org/wikidatawiki/entities. Do you want to proceed?",
+            default=True,
+        ).ask()
 
-        if user_response == "y":
+        if user_response:
             rprint(f"[bold blue]Downloading dump to {output_path}...[/bold blue]")
 
             response = requests.get(dump_url, stream=True)

diff --git a/src/scribe_data/cli/get.py b/src/scribe_data/cli/get.py
@@ -25,6 +25,7 @@
 from typing import List, Union
 
 from rich import print as rprint
+import questionary
 
 from scribe_data.cli.convert import convert_wrapper
 from scribe_data.unicode.generate_emoji_keywords import generate_emoji
@@ -108,16 +109,20 @@ def prompt_user_download_all():
         """
         Checks with the user if they'd rather use Wikidata lexeme dumps before a download all call.
         """
-        download_all_input = input(
-            "Do you want to query Wikidata, or would you rather use Wikidata lexeme dumps? (y/N): "
-        )
-        return download_all_input == "y"
+        return questionary.confirm(
+            "Do you want to query Wikidata directly? (selecting 'no' will use Wikidata lexeme dumps)",
+            default=False,
+        ).ask()
 
     if all_bool:
         if language:
             if prompt_user_download_all():
-                parse_wd_lexeme_dump()
-
+                parse_wd_lexeme_dump(
+                    language=language,
+                    wikidata_dump_type=["form"],
+                    data_types=data_types,
+                    type_output_dir=output_dir,
+                )
             else:
                 language_or_sub_language = language.split(" ")[0]
                 print(f"Updating all data types for language: {language.title()}")
@@ -133,8 +138,12 @@ def prompt_user_download_all():
 
         elif data_type:
             if prompt_user_download_all():
-                parse_wd_lexeme_dump()
-
+                parse_wd_lexeme_dump(
+                    language=None,
+                    wikidata_dump_type=["form"],
+                    data_types=[data_type],
+                    type_output_dir=output_dir,
+                )
             else:
                 print(f"Updating all languages for data type: {data_type.capitalize()}")
                 query_data(
@@ -152,13 +161,44 @@ def prompt_user_download_all():
             rprint(
                 "[bold red]Note that the download all functionality must use Wikidata dumps to observe responsible Wikidata Query Service usage practices.[/bold red]"
             )
-            parse_wd_lexeme_dump()
+            parse_wd_lexeme_dump(
+                language="all",
+                wikidata_dump_type=["form", "translations"],
+                data_types="all",
+                type_output_dir=output_dir,
+                wikidata_dump_path=wikidata_dump,
+            )
 
     # MARK: Emojis
 
     elif data_type in {"emoji-keywords", "emoji_keywords"}:
         generate_emoji(language=language, output_dir=output_dir)
 
+    # MARK: Translations
+
+    elif data_type == "translations":
+        if language is None:
+            language = "all"
+        parse_wd_lexeme_dump(
+            language=language,
+            wikidata_dump_type=["translations"],
+            type_output_dir=output_dir,
+            wikidata_dump_path=wikidata_dump,
+        )
+        return
+
+    # MARK: Query Data using Wikidata Dump
+
+    elif wikidata_dump:
+        parse_wd_lexeme_dump(
+            language=language,
+            wikidata_dump_type=["form"],
+            data_types=data_types,
+            type_output_dir=output_dir,
+            wikidata_dump_path=wikidata_dump,
+        )
+        return
+
     # MARK: Query Data
 
     elif language or data_type:

diff --git a/src/scribe_data/cli/interactive.py b/src/scribe_data/cli/interactive.py
@@ -37,9 +37,10 @@
 # from scribe_data.cli.list import list_wrapper
 from scribe_data.cli.get import get_data
 from scribe_data.cli.total import total_wrapper
-from scribe_data.cli.version import get_local_version
+from scribe_data.wikidata.wikidata_utils import parse_wd_lexeme_dump
 from scribe_data.utils import (
     DEFAULT_JSON_EXPORT_DIR,
+    DEFAULT_DUMP_EXPORT_DIR,
     data_type_metadata,
     language_metadata,
     list_all_languages,
@@ -262,6 +263,7 @@ def request_total_lexeme_loop():
             choices=[
                 Choice("Configure total lexemes request", "total"),
                 Choice("Run total lexemes request", "run"),
+                Choice("Run total lexemes request with lexeme dumps", "run_all"),
                 Choice("Exit", "exit"),
             ],
         ).ask()
@@ -275,6 +277,18 @@ def request_total_lexeme_loop():
             config.selected_languages, config.selected_data_types = [], []
             rprint(THANK_YOU_MESSAGE)
             break
+        elif choice == "run_all":
+            if wikidata_dump_path := prompt(
+                f"Enter Wikidata lexeme dump path (default: {DEFAULT_DUMP_EXPORT_DIR}): "
+            ):
+                wikidata_dump_path = Path(wikidata_dump_path)
+
+            parse_wd_lexeme_dump(
+                language=config.selected_languages,
+                wikidata_dump_type=["total"],
+                wikidata_dump_path=wikidata_dump_path,
+            )
+            break
         elif choice == "exit":
             return
         else:
@@ -316,7 +330,6 @@ def start_interactive_mode(operation: str = None):
         operation : str
             The type of operation that interactive mode is being ran with.
     """
-    rprint(f"[bold cyan]Welcome to {get_local_version()} interactive mode![/bold cyan]")
     while True:
         # Check if both selected_languages and selected_data_types are empty.
         if not config.selected_languages and not config.selected_data_types:
@@ -333,6 +346,12 @@ def start_interactive_mode(operation: str = None):
                     # Choice("See list of languages", "languages"),
                     Choice("Exit", "exit"),
                 ]
+            elif operation == "translations":
+                choices = [
+                    Choice("Configure translations request", "translations"),
+                    # Choice("See list of languages", "languages"),
+                    Choice("Exit", "exit"),
+                ]
 
         else:
             choices = [
@@ -356,6 +375,29 @@ def start_interactive_mode(operation: str = None):
             request_total_lexeme_loop()
             break
 
+        elif choice == "translations":
+            prompt_for_languages()
+
+            if wikidata_dump_path := prompt(
+                f"Enter Wikidata lexeme dump path (default: {DEFAULT_DUMP_EXPORT_DIR}): "
+            ):
+                wikidata_dump_path = Path(wikidata_dump_path)
+
+            if output_dir := prompt(
+                f"Enter output directory (default: {config.output_dir}): "
+            ):
+                config.output_dir = Path(output_dir)
+
+            parse_wd_lexeme_dump(
+                language=config.selected_languages,
+                wikidata_dump_type=["translations"],
+                data_types=None,
+                type_output_dir=config.output_dir,
+                wikidata_dump_path=wikidata_dump_path,
+            )
+
+            break
+
         # elif choice == "languages":
         #     see_list_languages()
         #     break

diff --git a/src/scribe_data/cli/main.py b/src/scribe_data/cli/main.py
@@ -25,6 +25,7 @@
 from pathlib import Path
 
 from rich import print as rprint
+from questionary import select
 
 from scribe_data.cli.cli_utils import validate_language_and_data_type
 from scribe_data.cli.convert import convert_wrapper
@@ -200,7 +201,8 @@ def main() -> None:
     total_parser.add_argument(
         "-wdp",
         "--wikidata-dump-path",
-        type=str,
+        nargs="?",
+        const=True,
         help="Path to a local Wikidata lexemes dump for running with '--all'.",
     )
 
@@ -298,12 +300,22 @@ def main() -> None:
         help="Download Wikidata dump. Optionally specify date in YYYYMMDD format.",
     )
     download_parser.add_argument(
-        "-od",
-        "--output-dir",
+        "-wdp",
+        "--wikidata-dump-path",
         type=str,
         help="The output directory path for the downloaded dump.",
     )
 
+    # MARK: Interactive
+
+    interactive_parser = subparsers.add_parser(
+        "interactive",
+        aliases=["i"],
+        help="Run in interactive mode.",
+        description="Run in interactive mode.",
+    )
+    interactive_parser._actions[0].help = "Show this help message and exit."
+
     # MARK: Setup CLI
 
     args = parser.parse_args()
@@ -400,9 +412,34 @@ def main() -> None:
                 wikidata_dump=args.wikidata_dump_version
                 if args.wikidata_dump_version != "latest"
                 else None,
-                output_dir=args.output_dir,
+                output_dir=args.wikidata_dump_path,
             )
 
+        elif args.command in ["interactive", "i"]:
+            rprint(
+                f"[bold cyan]Welcome to {get_version_message()} interactive mode![/bold cyan]"
+            )
+            action = select(
+                "What would you like to do?",
+                choices=[
+                    "Download a Wikidata dump",
+                    "Check for totals",
+                    "Get data",
+                    "Get translations",
+                    "Exit",
+                ],
+            ).ask()
+
+            if action == "Download a Wikidata dump":
+                wd_lexeme_dump_download_wrapper()
+            elif action == "Check for totals":
+                start_interactive_mode(operation="total")
+            elif action == "Get data":
+                start_interactive_mode(operation="get")
+            elif action == "Get translations":
+                start_interactive_mode(operation="translations")
+            else:
+                print("Skipping action")
         else:
             parser.print_help()
 

diff --git a/src/scribe_data/cli/total.py b/src/scribe_data/cli/total.py
@@ -36,6 +36,7 @@
     list_all_languages,
 )
 from scribe_data.wikidata.wikidata_utils import sparql
+from scribe_data.wikidata.wikidata_utils import parse_wd_lexeme_dump
 
 
 def get_qid_by_input(input_str):
@@ -370,7 +371,7 @@ def total_wrapper(
     language: Union[str, List[str]] = None,
     data_type: Union[str, List[str]] = None,
     all_bool: bool = False,
-    wikidata_dump: str = None,
+    wikidata_dump: Union[str, bool] = None,
 ) -> None:
     """
     Conditionally provides the full functionality of the total command.
@@ -387,9 +388,29 @@ def total_wrapper(
         all_bool : boolean
             Whether all languages and data types should be listed.
 
-        wikidata_dump : str
-            The local Wikidata dump that can be used to process data.
+        wikidata_dump : Union[str, bool]
+            The local Wikidata dump path that can be used to process data.
+            If True, indicates the flag was used without a path.
     """
+    # Handle --all flag
+    if all_bool and wikidata_dump:
+        language = "all"
+
+    if wikidata_dump is True:  # flag without a wikidata dump path
+        parse_wd_lexeme_dump(
+            language=language,
+            wikidata_dump_type=["total"],
+            wikidata_dump_path=None,
+        )
+        return
+
+    if isinstance(wikidata_dump, str):  # if user provided a wikidata dump path
+        parse_wd_lexeme_dump(
+            language=language,
+            wikidata_dump_type=["total"],
+            wikidata_dump_path=wikidata_dump,
+        )
+        return
 
     if (not language and not data_type) and all_bool:
         print_total_lexemes()

diff --git a/src/scribe_data/resources/data_type_metadata.json b/src/scribe_data/resources/data_type_metadata.json
@@ -11,5 +11,6 @@
   "prepositions": "Q4833830",
   "pronouns": "Q36224",
   "proper_nouns": "Q147276",
+  "translations": "Q21112633",
   "verbs": "Q24905"
 }