diff --git a/.gitignore b/.gitignore index 25a21422..8028858e 100644 --- a/.gitignore +++ b/.gitignore @@ -160,6 +160,8 @@ cython_debug/ # images src/images/temp/* !src/images/temp/.gitkeep +src/images/bot/scraper/* +!src/images/bot/scraper/.gitkeep src/runelite_settings/temp.properties src/utilities/record_mouse/* !src/utilities/record_mouse/record_mouse.py diff --git a/requirements.txt b/requirements.txt index 3d0adb96..0fa88cb1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,12 +1,15 @@ +beautifulsoup4==4.11.1 +bs4==0.0.1 customtkinter==4.5.10 Deprecated==1.2.13 +flake8==6.0.0 matplotlib==3.6.2 mss==7.0.1 numpy==1.23.1 opencv_python_headless==4.5.4.60 opencv-python==4.5.4.60 pandas==1.5.0 -Pillow==9.2.0 +Pillow==9.3.0 pre-commit==2.20.0 psutil==5.9.4 PyAutoGUI==0.9.53 diff --git a/src/images/bot/scraper/.gitkeep b/src/images/bot/scraper/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/src/images/ui/scraper.png b/src/images/ui/scraper.png new file mode 100644 index 00000000..19fab12a Binary files /dev/null and b/src/images/ui/scraper.png differ diff --git a/src/utilities/sprite_scraper.py b/src/utilities/sprite_scraper.py new file mode 100644 index 00000000..0cf3c52d --- /dev/null +++ b/src/utilities/sprite_scraper.py @@ -0,0 +1,189 @@ +""" +This module contains the SpriteScraper class, which is used to download images from the OSRS Wiki. +This utility does not work well with IPv6. If you are having issues, try disabling IPv6 on your machine. +""" + +import os +from typing import List +from urllib.parse import urljoin + +import cv2 +import numpy as np +import requests +from bs4 import BeautifulSoup + +if __name__ == "__main__": + import sys + + sys.path[0] = os.path.dirname(sys.path[0]) + +import utilities.imagesearch as imsearch + +DEFAULT_DESTINATION = imsearch.BOT_IMAGES.joinpath("scraper") + + +class SpriteScraper: + def __init__(self): + self.base_url = "https://oldschool.runescape.wiki" + + def search_and_download(self, search_string: str, **kwargs): + """ + Searches the OSRS Wiki for the given search parameter and downloads the image(s) to the appropriate folder. + Args: + search_string: A comma-separated list of wiki keywords to locate images for. + Keyword Args: + image_type: 0 = Normal, 1 = Bank, 2 = Both. Normal sprites are full-size, and bank sprites are cropped at the top + to improve image search performance within the bank interface (crops out stack numbers). Default is 0. + destination: The folder to save the downloaded images to. Default is defined in the global `DEFAULT_DESTINATION`. + notify_callback: A function (usually defined in the view) that takes a string as a parameter. This function is + called with search results and errors. Default is print(). + Example: + This is an example of using the scraper from a Bot script to download images suitable for searching in the bank: + >>> scraper = SpriteScraper() + >>> scraper.search_and_download( + >>> search_string = "molten glass, bucket of sand", + >>> image_type = 1, + >>> destination = imsearch.BOT_IMAGES.joinpath("bank"), + >>> notify_callback = self.log_msg, + >>> ) + """ + image_type = kwargs.get("image_type", 0) + destination = kwargs.get("destination", DEFAULT_DESTINATION) + notify_callback = kwargs.get("notify_callback", print) + + # Ensure the iamge_type is valid + if image_type not in (0, 1, 2): + notify_callback("Invalid image type argument.") + return + + # Format search args into a list of strings + img_names = self.format_args(search_string) + if not img_names: + notify_callback("No search terms entered.") + return + notify_callback("Beginning search...\n") + + # Iterate through each image name and download the image + i = -1 + while i < len(img_names) - 1: + i += 1 + + # Locate image on webpage using the alt text + alt_text = f"File:{img_names[i]}.png" + url = urljoin(self.base_url, f"w/{alt_text}") + notify_callback(f"Searching for {img_names[i]}...") + response = requests.get(url) + soup = BeautifulSoup(response.content, "html.parser") + img = soup.find("img", alt=alt_text.replace("_", " ")) # Img alt text doesn't seem to include underscores + if not img: + capitalized_name = self.capitalize_each_in(img_names[i]) + if capitalized_name not in img_names: + img_names.insert(i + 1, capitalized_name) + notify_callback(f"No image found for {img_names[i]}. Trying alternative...\n") + else: + notify_callback(f"No image found for: {img_names[i]}.\n") + continue + notify_callback("Found image.") + + # Download image + img_url = urljoin(self.base_url, img["src"]) + notify_callback("Downloading image...") + try: + downloaded_img = self.__download_image(img_url) + except Exception as e: + notify_callback(f"Error: {e}\n") + continue + + # Save image according to image_type argument + filepath = destination.joinpath(img_names[i]) + if image_type in {0, 2}: + cv2.imwrite(f"{filepath}.png", downloaded_img) + nl = "\n" + notify_callback(f"Success: {img_names[i]} sprite saved.{nl if image_type != 2 else ''}") + if image_type in {1, 2}: + cropped_img = self.__crop_image(downloaded_img) + cv2.imwrite(f"{filepath}_bank.png", cropped_img) + notify_callback(f"Success: {img_names[i]} bank sprite saved.\n") + + notify_callback(f"Search complete. Images saved to:\n{destination}.\n") + + def capitalize_each_in(self, string: str) -> str: + """ + Capitalizes the first letter of each word in a string of words separated by underscores, retaining the + underscores. + """ + exclude = ["from", "of", "to", "in", "with", "on", "at", "by", "for"] + return "_".join(word if word in exclude else word.capitalize() for word in string.split("_")) + + def format_args(self, string: str) -> List[str]: + """ + Formats a comma-separated list of strings into a list of strings where each string is capitalized and + underscores are used instead of spaces. + """ + # If the string is empty, return an empty list + if not string.strip(): + return [] + # Reduce multiple spaces to a single space + string = " ".join(string.split()) + # Strip whitespace and replace spaces with underscores + return [word.strip().replace(" ", "_").capitalize() for word in string.split(",")] + + def __crop_image(self, image: cv2.Mat) -> cv2.Mat: + """ + Makes the top of the image transparent. This is used to crop out stack numbers in bank sprites. + Args: + image: The image to crop. + Returns: + The cropped image. + """ + BANK_SLOT_HALF_HEIGHT = 16 + IMG_MAX_HEIGHT = 28 + height, _, _ = image.shape + # Crop out stack numbers + crop_amt = int((height - BANK_SLOT_HALF_HEIGHT) / 2) if height > BANK_SLOT_HALF_HEIGHT else 0 + if height >= IMG_MAX_HEIGHT: + crop_amt += 1 # Crop an additional pixel if the image is very tall + image[:crop_amt, :] = 0 # Set the top pixels to transparent + return image + + def __download_image(self, url: str) -> cv2.Mat: + """ + Downloads an image from a URL. + Args: + url: The URL of the image to download. + Returns: + The downloaded image as a cv2 Mat. + """ + response = requests.get(url) + downloaded_img = np.frombuffer(response.content, dtype="uint8") + downloaded_img = cv2.imdecode(downloaded_img, cv2.IMREAD_UNCHANGED) + return downloaded_img + + +if __name__ == "__main__": + scraper = SpriteScraper() + + assert scraper.format_args("") == [] + assert scraper.format_args("a, b, c") == ["A", "B", "C"] + assert scraper.format_args(" shark ") == ["Shark"] + assert scraper.format_args(" swordfish ,lobster, lobster pot ") == ["Swordfish", "Lobster", "Lobster_pot"] + assert scraper.format_args("Swordfish ,lobster, Lobster_Pot ") == ["Swordfish", "Lobster", "Lobster_pot"] + + assert scraper.capitalize_each_in("swordfish") == "Swordfish" + assert scraper.capitalize_each_in("Lobster_pot") == "Lobster_Pot" + assert scraper.capitalize_each_in("arceuus_home_teleport") == "Arceuus_Home_Teleport" + assert scraper.capitalize_each_in("protect_from_magic") == "Protect_from_Magic" + assert scraper.capitalize_each_in("teleport_to_house") == "Teleport_to_House" + assert scraper.capitalize_each_in("claws_of_guthix") == "Claws_of_Guthix" + + scraper.search_and_download( + search_string=" lobster , lobster Pot", + image_type=1, + ) + + scraper.search_and_download( + search_string="protect from magic, arceuus home teleport, nonexitent_sprite", + image_type=0, + ) + + print("Test cleared.") diff --git a/src/view/__init__.py b/src/view/__init__.py index df2c3a0d..dc7c4bc0 100644 --- a/src/view/__init__.py +++ b/src/view/__init__.py @@ -4,4 +4,5 @@ from .info_frame import InfoFrame from .output_log_frame import OutputLogFrame from .settings_view import SettingsView +from .sprite_scraper_view import SpriteScraperView from .title_view import TitleView diff --git a/src/view/sprite_scraper_view.py b/src/view/sprite_scraper_view.py new file mode 100644 index 00000000..bced6eb4 --- /dev/null +++ b/src/view/sprite_scraper_view.py @@ -0,0 +1,117 @@ +import threading +import tkinter + +import customtkinter + +from utilities.sprite_scraper import SpriteScraper + +scraper = SpriteScraper() + + +class SpriteScraperView(customtkinter.CTkFrame): + def __init__(self, parent): + super().__init__(parent) + self.parent = parent + self.parent.protocol("WM_DELETE_WINDOW", self.on_closing) + + self.grid_columnconfigure(0, weight=1) + self.grid_rowconfigure(0, weight=0) # - Title + self.grid_rowconfigure(1, weight=0) # - Label + self.grid_rowconfigure(2, weight=0) # - Entry + self.grid_rowconfigure(3, weight=0) # - Submit + self.grid_rowconfigure(5, weight=0) # - Radio Group + self.grid_rowconfigure(6, weight=1) # - Logs + + # Title + self.search_label = customtkinter.CTkLabel(self, text="Search OSRS Wiki for Sprites", text_font=("Roboto Medium", 12)) + self.search_label.grid(row=0, column=0, sticky="nsew", padx=10, pady=10) + + # Search instructions + self.search_info = customtkinter.CTkLabel(self, text="Enter sprite names separated by commas") + self.search_info.grid(row=1, column=0, sticky="nsew", padx=10, pady=(0, 10)) + + # Search entry + self.search_entry = customtkinter.CTkEntry(self, placeholder_text="Ex: molten glass, bucket of sand") + self.search_entry.grid(row=2, column=0, sticky="nsew", padx=40, pady=10) + + # Submit button + self.search_submit_button = customtkinter.CTkButton(self, text="Submit", command=self.on_submit) + self.search_submit_button.grid(row=3, column=0, sticky="nsew", padx=40, pady=(0, 20)) + + # Radio Group + self.radio_group = customtkinter.CTkFrame(self) + self.radio_group.grid_columnconfigure(0, weight=1) + self.radio_group.grid_columnconfigure(1, weight=1) + self.radio_group.grid_rowconfigure(0, weight=1) + self.radio_group.grid_rowconfigure(1, weight=1) + self.radio_group.grid_rowconfigure(2, weight=1) + self.radio_group.grid_rowconfigure(3, weight=1) + + # -- Radio Group Label + self.lbl_radio_group = customtkinter.CTkLabel(master=self.radio_group, text="Select the type of sprites to download") + self.lbl_radio_group.grid(row=0, column=0, columnspan=4, sticky="nsew", padx=10, pady=(10, 0)) + + self.radio_var = tkinter.IntVar(self) + + # -- Radio Buttons + self.radio_normal = customtkinter.CTkRadioButton(master=self.radio_group, text="", variable=self.radio_var, value=0) + self.radio_bank = customtkinter.CTkRadioButton(master=self.radio_group, text="", variable=self.radio_var, value=1) + self.radio_both = customtkinter.CTkRadioButton(master=self.radio_group, text="", variable=self.radio_var, value=2) + self.radio_normal.grid(row=1, column=0, sticky="e", padx=10, pady=10) + self.radio_bank.grid(row=2, column=0, sticky="e", padx=10, pady=10) + self.radio_both.grid(row=3, column=0, sticky="e", padx=10, pady=(10, 20)) + + # -- Radio Button Labels + self.lbl_radio_normal = customtkinter.CTkLabel(master=self.radio_group, text="Normal") + self.lbl_radio_bank = customtkinter.CTkLabel(master=self.radio_group, text="Bank") + self.lbl_radio_both = customtkinter.CTkLabel(master=self.radio_group, text="Normal + Bank") + self.lbl_radio_normal.grid(row=1, column=1, sticky="w", padx=10) + self.lbl_radio_bank.grid(row=2, column=1, sticky="w", padx=10) + self.lbl_radio_both.grid(row=3, column=1, sticky="w", padx=10) + + self.radio_group.grid(row=5, column=0, sticky="nsew", padx=10) + + # Logs + self.log_frame = customtkinter.CTkFrame(self) + self.log_frame.grid_columnconfigure(0, weight=1) + self.log_frame.grid_rowconfigure(0, weight=0) + self.log_frame.grid_rowconfigure(1, weight=1) + + self.lbl_logs = customtkinter.CTkLabel(master=self.log_frame, text="Logs:") + self.lbl_logs.grid(row=0, column=0, sticky="nsew", padx=10, pady=(10, 0)) + + self.txt_logs = tkinter.Text( + master=self.log_frame, + font=("Roboto", 10), + bg="#343638", + fg="#ffffff", + ) + self.txt_logs.grid(row=1, column=0, sticky="nsew", padx=10, pady=10) + self.txt_logs.configure(state=tkinter.DISABLED) + self.log_frame.grid(row=6, column=0, sticky="nsew", padx=10, pady=10) + + def on_closing(self): + self.parent.destroy() + + def on_submit(self): + search_string = self.search_entry.get() + thread = threading.Thread( + target=scraper.search_and_download, + kwargs={"search_string": search_string, "image_type": self.radio_var.get(), "notify_callback": self.update_log}, + daemon=True, + ) + self.search_entry.delete(0, "end") + self.search_entry.clear_placeholder() + self.txt_logs.configure(state=tkinter.NORMAL) + self.txt_logs.delete("1.0", "end") + self.txt_logs.configure(state=tkinter.DISABLED) + thread.start() + + def update_log(self, text: str): + """ + Updates the log with the given text. + """ + self.txt_logs.configure(state=tkinter.NORMAL) + self.txt_logs.insert("end", "\n" + text) + self.txt_logs.configure(state=tkinter.DISABLED) + self.txt_logs.see(tkinter.END) diff --git a/src/view/title_view.py b/src/view/title_view.py index a8c85f82..2498cdc9 100644 --- a/src/view/title_view.py +++ b/src/view/title_view.py @@ -4,6 +4,8 @@ import customtkinter from PIL import Image, ImageTk +from view.sprite_scraper_view import SpriteScraperView + class TitleView(customtkinter.CTkFrame): def __init__(self, parent, main): @@ -99,6 +101,24 @@ def __init__(self, parent, main): ) self.btn_feedback.grid(row=3, column=2, padx=15, pady=(15, 0), sticky="w") + # -- Sprite Scraper + self.scraper_logo = ImageTk.PhotoImage( + Image.open(f"{self.logo_path}/images/ui/scraper.png").resize((IMG_SIZE, IMG_SIZE)), + Image.LANCZOS, + ) + self.btn_sprite_scraper = customtkinter.CTkButton( + master=self, + text="Scraper", + image=self.scraper_logo, + width=BTN_WIDTH, + height=BTN_HEIGHT, + corner_radius=15, + fg_color=DEFAULT_GRAY, + compound="top", + command=self.btn_scraper_clicked, + ) + self.btn_sprite_scraper.grid(row=4, column=1, padx=15, pady=(15, 0)) + def btn_github_clicked(self): wb.open_new_tab("https://github.com/kelltom/OSRS-Bot-COLOR") @@ -107,3 +127,10 @@ def btn_feedback_clicked(self): def btn_bug_report_clicked(self): wb.open_new_tab("https://github.com/kelltom/OSRS-Bot-COLOR/issues/new/choose") + + def btn_scraper_clicked(self): + window = customtkinter.CTkToplevel(master=self) + window.geometry("400x660") + window.title("OSRS Wiki Sprite Scraper") + view = SpriteScraperView(parent=window) + view.pack(side="top", fill="both", expand=True, padx=20, pady=20)