From ae50c1a571e370c81c4414692c3d36e8a28e5412 Mon Sep 17 00:00:00 2001 From: Intrinsical Date: Fri, 28 Jun 2024 22:20:26 -0400 Subject: [PATCH 01/20] start refactor on unembed-icons: rename to unembed_icons to conform to PEP 8; start rewrite with getData(url) --- scripts/unembed-icons.py | 53 ---------------------------------------- scripts/unembed_icons.py | 20 +++++++++++++++ 2 files changed, 20 insertions(+), 53 deletions(-) delete mode 100755 scripts/unembed-icons.py create mode 100644 scripts/unembed_icons.py diff --git a/scripts/unembed-icons.py b/scripts/unembed-icons.py deleted file mode 100755 index 7f73c4a9b..000000000 --- a/scripts/unembed-icons.py +++ /dev/null @@ -1,53 +0,0 @@ -#!/usr/bin/env python - -import json -import io -from argparse import ArgumentParser -import colorlog -from base64 import b64decode - -parser = ArgumentParser(description="Checks ELI sourcen for validity and common errors") -parser.add_argument("path", nargs="+", help="Path of files to check.") -parser.add_argument( - "-v", - "--verbose", - dest="verbose_count", - action="count", - default=0, - help="increases log verbosity for each occurence.", -) -arguments = parser.parse_args() -logger = colorlog.getLogger() -# Start off at Error, reduce by one level for each -v argument -logger.setLevel(max(4 - arguments.verbose_count, 0) * 10) -handler = colorlog.StreamHandler() -handler.setFormatter(colorlog.ColoredFormatter()) -logger.addHandler(handler) - -spacesave = 0 - -knownIcons = {} - -for filename in arguments.path: - with io.open(filename, "r", encoding="utf-8") as f: - source = json.load(f) - if "icon" in source["properties"]: - if source["properties"]["icon"].startswith("data:image/png"): - iconsize = len(source["properties"]["icon"].encode("utf-8")) - spacesave += iconsize - logger.debug("{} icon will disembedded to save {} KB".format(filename, round(iconsize / 1024.0, 2))) - if source["properties"]["icon"] in knownIcons: - iconpath = knownIcons[source["properties"]["icon"]] - logger.info("I already have a known icon for {} : I'll reuse {}".format(filename, iconpath)) - else: - iconpath = filename.replace(".geojson", ".png") - with open(iconpath, "wb") as ico: - ico.write(b64decode(source["properties"]["icon"].split(",")[1])) - knownIcons[source["properties"]["icon"]] = iconpath - source["properties"]["icon"] = "https://osmlab.github.io/editor-layer-index/" + iconpath - with io.open(filename, "w", encoding="utf-8") as fw: - json.dump(source, fw, sort_keys=True, indent=4) - else: - logger.debug("{} contains a good icon, {}".format(filename, source["properties"]["icon"])) -if spacesave > 0: - logger.warning("Disembedding all icons saved {} KB".format(round(spacesave / 1024.0, 2))) diff --git a/scripts/unembed_icons.py b/scripts/unembed_icons.py new file mode 100644 index 000000000..0e519fb8f --- /dev/null +++ b/scripts/unembed_icons.py @@ -0,0 +1,20 @@ +import re +from base64 import b64decode +import mimetypes + +def getData(url): + data_re = re.search(r"^data:(.*);base64,(.*)$", url) + if data_re: + mime = data_re.group(1) + data = b64decode(data_re.group(2)) + return [mime, data] + elif url.startswith("data:"): + print("unsupported data URL variation") + else: + print("URL isn't a data URL") + return None + +#def main(): + +if __name__ == "__main__": + main() \ No newline at end of file From c7cee09c1053be01c64af7989573502b2259425d Mon Sep 17 00:00:00 2001 From: Intrinsical Date: Sat, 29 Jun 2024 02:24:03 -0400 Subject: [PATCH 02/20] implement findFile() for just binary data for now --- scripts/unembed_icons.py | 70 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 65 insertions(+), 5 deletions(-) diff --git a/scripts/unembed_icons.py b/scripts/unembed_icons.py index 0e519fb8f..fcb1ee706 100644 --- a/scripts/unembed_icons.py +++ b/scripts/unembed_icons.py @@ -1,20 +1,80 @@ import re from base64 import b64decode -import mimetypes +import mimetypes +from pathlib import Path +import json +import logging -def getData(url): +root = None + +def getData(url): # returns: [mime, data] or None upon error data_re = re.search(r"^data:(.*);base64,(.*)$", url) if data_re: mime = data_re.group(1) data = b64decode(data_re.group(2)) return [mime, data] elif url.startswith("data:"): - print("unsupported data URL variation") + logging.error("unsupported data-URL variation") else: - print("URL isn't a data URL") + logging.error("URL isn't a data-URL") return None -#def main(): +def findFile(parent_path, data, mime): # returns: Path or None upon none found + # NOTE: all the extra stuff with text data is to compensate for git changing newlines + + found_path = None + + data_size = len(data) + extensions = mimetypes.guess_all_extensions(mime, strict=False) + + data_istext = False + data_text = None + data_lines = None + try: + data_text = data.decode() + except: + pass + if isinstance(data_text, str): + logging.info("data is text") + data_istext = True + data_lines = data_text.splitlines() + else: + logging.info("data is binary") + + glob = None + if len(extensions) == 1: + glob = "*{}".format(extensions[0]) + elif len(extensions) > 1: + glob = "*[{}]".format("][".join(extensions)) + else: + logging.warning("invalid mime-type") + glob = "*" + + logging.info("walking `{}' with glob: `{}'".format(parent_path, glob)) + + for file_path in parent_path.rglob(glob): # OPTIMIZE: ignore hidden files (especially .git) + logging.debug("checking against file: `{}'".format(file_path)) + if(data_istext == False): + file_size = file_path.stat().st_size + if file_size == data_size: + # OPTIMIZE: could be optimized by using buffering + file_handle = open(file_path, "rb") + file = file_handle.read() + file_handle.close() + if file == data: + logging.info("found match: `{}'".format(file_path)) + found_path = file_path + break + # TODO: handle text data + + if found_path != None: + return found_path + else: + return None + +def main(): + pass # TODO if __name__ == "__main__": + root = Path(__file__).parents[1] main() \ No newline at end of file From fb7808cca25ab67eb891e3af3788f78ecf75fe37 Mon Sep 17 00:00:00 2001 From: Intrinsical Date: Sat, 29 Jun 2024 03:00:09 -0400 Subject: [PATCH 03/20] support text data --- scripts/unembed_icons.py | 68 ++++++++++++++++++++++------------------ 1 file changed, 37 insertions(+), 31 deletions(-) diff --git a/scripts/unembed_icons.py b/scripts/unembed_icons.py index fcb1ee706..bc5775779 100644 --- a/scripts/unembed_icons.py +++ b/scripts/unembed_icons.py @@ -19,27 +19,29 @@ def getData(url): # returns: [mime, data] or None upon error logging.error("URL isn't a data-URL") return None -def findFile(parent_path, data, mime): # returns: Path or None upon none found - # NOTE: all the extra stuff with text data is to compensate for git changing newlines - - found_path = None - - data_size = len(data) - extensions = mimetypes.guess_all_extensions(mime, strict=False) - - data_istext = False +def decodeLines(data): data_text = None - data_lines = None try: data_text = data.decode() except: pass if isinstance(data_text, str): logging.info("data is text") - data_istext = True - data_lines = data_text.splitlines() + return data_text.splitlines() else: logging.info("data is binary") + return None + +def findFile(parent_path, data, mime): # returns: Path or None upon none found + # NOTE: all the extra stuff with text data is to compensate for git changing newlines + + data_size = len(data) + extensions = mimetypes.guess_all_extensions(mime, strict=False) + + data_istext = False + data_lines = decodeLines(data) + if data_lines != None: + data_istext = True glob = None if len(extensions) == 1: @@ -52,29 +54,33 @@ def findFile(parent_path, data, mime): # returns: Path or None upon none found logging.info("walking `{}' with glob: `{}'".format(parent_path, glob)) - for file_path in parent_path.rglob(glob): # OPTIMIZE: ignore hidden files (especially .git) + for file_path in parent_path.rglob(glob): # OPTIMIZE: ignore hidden files (especially .git) # TODO: force only match files (not including directories) logging.debug("checking against file: `{}'".format(file_path)) - if(data_istext == False): - file_size = file_path.stat().st_size - if file_size == data_size: - # OPTIMIZE: could be optimized by using buffering - file_handle = open(file_path, "rb") - file = file_handle.read() - file_handle.close() - if file == data: - logging.info("found match: `{}'".format(file_path)) - found_path = file_path - break - # TODO: handle text data - - if found_path != None: - return found_path - else: - return None + file_size = file_path.stat().st_size + if file_size == data_size: + # OPTIMIZE: could be optimized by using buffering + file_handle = open(file_path, "rb") + file = file_handle.read() + file_handle.close() + if file == data: + logging.info("found binary match: `{}'".format(file_path)) + return file_path + break + if(data_istext): + # TODO: deduplicate reading file + file_handle = open(file_path, "rb") + file = file_handle.read() + file_handle.close() + file_lines = decodeLines(file) + if file_lines == data_lines: + logging.info("found text match: `{}'".format(file_path)) + return file_path + break + return None def main(): pass # TODO if __name__ == "__main__": - root = Path(__file__).parents[1] + root = Path(__file__).parents[1] # get repository's root main() \ No newline at end of file From 80fe800853cfed3074443580f0a19c873fbbe288 Mon Sep 17 00:00:00 2001 From: Intrinsical Date: Sat, 29 Jun 2024 03:20:48 -0400 Subject: [PATCH 04/20] resolve "TODO: deduplicate reading file" --- scripts/unembed_icons.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/scripts/unembed_icons.py b/scripts/unembed_icons.py index bc5775779..779b4bd32 100644 --- a/scripts/unembed_icons.py +++ b/scripts/unembed_icons.py @@ -56,6 +56,7 @@ def findFile(parent_path, data, mime): # returns: Path or None upon none found for file_path in parent_path.rglob(glob): # OPTIMIZE: ignore hidden files (especially .git) # TODO: force only match files (not including directories) logging.debug("checking against file: `{}'".format(file_path)) + file = None file_size = file_path.stat().st_size if file_size == data_size: # OPTIMIZE: could be optimized by using buffering @@ -67,10 +68,10 @@ def findFile(parent_path, data, mime): # returns: Path or None upon none found return file_path break if(data_istext): - # TODO: deduplicate reading file - file_handle = open(file_path, "rb") - file = file_handle.read() - file_handle.close() + if file == None: + file_handle = open(file_path, "rb") + file = file_handle.read() + file_handle.close() file_lines = decodeLines(file) if file_lines == data_lines: logging.info("found text match: `{}'".format(file_path)) From d2f72419acb39448210d3cf599ac8dc9248d2bbd Mon Sep 17 00:00:00 2001 From: Intrinsical Date: Sat, 29 Jun 2024 05:35:09 -0400 Subject: [PATCH 05/20] added check counter --- scripts/unembed_icons.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/scripts/unembed_icons.py b/scripts/unembed_icons.py index 779b4bd32..b28526abe 100644 --- a/scripts/unembed_icons.py +++ b/scripts/unembed_icons.py @@ -26,10 +26,10 @@ def decodeLines(data): except: pass if isinstance(data_text, str): - logging.info("data is text") + logging.debug("data is text") return data_text.splitlines() else: - logging.info("data is binary") + logging.debug("data is binary") return None def findFile(parent_path, data, mime): # returns: Path or None upon none found @@ -53,8 +53,9 @@ def findFile(parent_path, data, mime): # returns: Path or None upon none found glob = "*" logging.info("walking `{}' with glob: `{}'".format(parent_path, glob)) - + check_count = 0 for file_path in parent_path.rglob(glob): # OPTIMIZE: ignore hidden files (especially .git) # TODO: force only match files (not including directories) + check_count += 1 logging.debug("checking against file: `{}'".format(file_path)) file = None file_size = file_path.stat().st_size @@ -77,6 +78,7 @@ def findFile(parent_path, data, mime): # returns: Path or None upon none found logging.info("found text match: `{}'".format(file_path)) return file_path break + logging.warning("no match found in {} checked files".format(check_count)) return None def main(): From 8e6ce955a67cd9d034b2d74d3f0bc6bda23afeca Mon Sep 17 00:00:00 2001 From: Intrinsical Date: Sat, 29 Jun 2024 07:02:46 -0400 Subject: [PATCH 06/20] stop on invalid mime-type --- scripts/unembed_icons.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/unembed_icons.py b/scripts/unembed_icons.py index b28526abe..0a7c376b7 100644 --- a/scripts/unembed_icons.py +++ b/scripts/unembed_icons.py @@ -50,6 +50,7 @@ def findFile(parent_path, data, mime): # returns: Path or None upon none found glob = "*[{}]".format("][".join(extensions)) else: logging.warning("invalid mime-type") + return None # TODO: remove when `TODO: force only match files (not including directories)' is done glob = "*" logging.info("walking `{}' with glob: `{}'".format(parent_path, glob)) From 4f02fd6ec6e500a9edc929b9bc09dd545e4906db Mon Sep 17 00:00:00 2001 From: Intrinsical Date: Sat, 29 Jun 2024 10:35:40 -0400 Subject: [PATCH 07/20] almost at basic functionality (just missing replacing icon URL in the GeoJSON) --- scripts/unembed_icons.py | 65 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 59 insertions(+), 6 deletions(-) diff --git a/scripts/unembed_icons.py b/scripts/unembed_icons.py index 0a7c376b7..7e2a1994d 100644 --- a/scripts/unembed_icons.py +++ b/scripts/unembed_icons.py @@ -4,8 +4,11 @@ from pathlib import Path import json import logging +import tkinter.filedialog as fd -root = None +host = "https://osmlab.github.io/editor-layer-index/" + +root_path = None def getData(url): # returns: [mime, data] or None upon error data_re = re.search(r"^data:(.*);base64,(.*)$", url) @@ -32,7 +35,7 @@ def decodeLines(data): logging.debug("data is binary") return None -def findFile(parent_path, data, mime): # returns: Path or None upon none found +def findFile(parent_path, mime, data): # returns: Path or None upon none found # NOTE: all the extra stuff with text data is to compensate for git changing newlines data_size = len(data) @@ -47,7 +50,7 @@ def findFile(parent_path, data, mime): # returns: Path or None upon none found if len(extensions) == 1: glob = "*{}".format(extensions[0]) elif len(extensions) > 1: - glob = "*[{}]".format("][".join(extensions)) + glob = "*[{}]".format("][".join(extensions)) # IMPROVEMENT: separate out the dot, instead of having all. else: logging.warning("invalid mime-type") return None # TODO: remove when `TODO: force only match files (not including directories)' is done @@ -55,7 +58,7 @@ def findFile(parent_path, data, mime): # returns: Path or None upon none found logging.info("walking `{}' with glob: `{}'".format(parent_path, glob)) check_count = 0 - for file_path in parent_path.rglob(glob): # OPTIMIZE: ignore hidden files (especially .git) # TODO: force only match files (not including directories) + for file_path in parent_path.rglob(glob): # TODO: ignore hidden files (especially .git) and only match files (i.e. not including directories) check_count += 1 logging.debug("checking against file: `{}'".format(file_path)) file = None @@ -82,9 +85,59 @@ def findFile(parent_path, data, mime): # returns: Path or None upon none found logging.warning("no match found in {} checked files".format(check_count)) return None +def saveAs(directory, mime): + exts = mimetypes.guess_all_extensions(mime, strict=True) + exts_mod = [] + for ext in exts: + exts_mod.append((ext[1:].upper(), ext)) + logging.info("waiting for Save As dialog") + output_path = Path(fd.asksaveasfilename(filetypes=exts_mod, defaultextension=exts_mod[0], initialdir=directory)) + # TODO: backup input for terminal interfaces (dumb input(); check if parent is at least valid; relative to repo root_path; list recommended extensions; make sure the import failure is dealt with) + + return output_path + +def save(path, binary, data): + # TODO: confirm with user first, and show if it will be replacing something, and of course honor command line arguments + mode = None # TODO: maybe just require input to be binary, actually? + if binary == True: + mode = "wb" + mode_str = "bytes" + elif binary == False: + mode = "w" + mode_str = "chars" + file_handle = open(path, mode) + logging.info("saving {} {} into the file `{}'".format(len(data), mode_str, path)) + file = file_handle.write(data) + file_handle.close() + +def single(geojson_path): + geojson_handle = open(geojson_path) + geojson = json.load(geojson_handle) + geojson_handle.close() + + url = geojson["properties"]["icon"] + + url_mime, url_data = getData(url) # TODO: gracefully quit if it returns None + + icon_path = findFile(root_path, url_mime, url_data) + if icon_path == None: + icon_path = saveAs(geojson_path.parent, url_mime) # TODO: handle cancel + save(icon_path, True, url_data) + new_url = host + icon_path.relative_to(root_path).as_posix() # TODO: make sure it is underneath + logging.info("new URL: `{}'".format(new_url)) + # TODO: handle replacing url in the GeoJSON: do a search and replace of the icon url as to preserve formatting of file (check it can find it at the beginning, to present a warning to the user (that it wont be able to replace)). (yes I know this method could cause issues normally, because escaped chars, but data URLs shouldn't have that) + def main(): - pass # TODO + logging.getLogger().setLevel(logging.DEBUG) + + geojson_path = fd.askopenfilename(filetypes=[("GeoJSON", ".geojson")], initialdir=root_path / "sources") + single(Path(geojson_path)) + + # TODO: handle more than one GeoJSON (should take in a folder too (rglob that), and multiple files, or a mix of both) + # OPTIMIZE: use a cache of hashes if doing multiple files + # TODO: posix interface with these options: --confirm-all --no-confirm-overwrite --only-existing + log level stuff + # TODO: maybe duplicate the old version's interface as much as possible? if __name__ == "__main__": - root = Path(__file__).parents[1] # get repository's root + root_path = Path(__file__).parents[1] # get repository's root_path main() \ No newline at end of file From 831e3a64348850d5f59d0f808d3e1c9443ba595a Mon Sep 17 00:00:00 2001 From: Intrinsical Date: Sat, 29 Jun 2024 11:26:57 -0400 Subject: [PATCH 08/20] basic functionality! --- scripts/unembed_icons.py | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/scripts/unembed_icons.py b/scripts/unembed_icons.py index 7e2a1994d..6d04db65a 100644 --- a/scripts/unembed_icons.py +++ b/scripts/unembed_icons.py @@ -90,14 +90,14 @@ def saveAs(directory, mime): exts_mod = [] for ext in exts: exts_mod.append((ext[1:].upper(), ext)) - logging.info("waiting for Save As dialog") + logging.info("(waiting for user to finish Save As dialog)") # TODO: rewrite the english output_path = Path(fd.asksaveasfilename(filetypes=exts_mod, defaultextension=exts_mod[0], initialdir=directory)) # TODO: backup input for terminal interfaces (dumb input(); check if parent is at least valid; relative to repo root_path; list recommended extensions; make sure the import failure is dealt with) return output_path def save(path, binary, data): - # TODO: confirm with user first, and show if it will be replacing something, and of course honor command line arguments + # TODO: confirm with user first, and show if it will be replacing something, their relative sizes, plus of course honor command line arguments mode = None # TODO: maybe just require input to be binary, actually? if binary == True: mode = "wb" @@ -111,8 +111,11 @@ def save(path, binary, data): file_handle.close() def single(geojson_path): - geojson_handle = open(geojson_path) - geojson = json.load(geojson_handle) + logging.info("operating on `{}'".format(geojson_path)) + + # TODO: clean this up + geojson_handle = open(geojson_path, "r", encoding='utf8') + geojson = json.loads(geojson_handle.read()) geojson_handle.close() url = geojson["properties"]["icon"] @@ -121,11 +124,17 @@ def single(geojson_path): icon_path = findFile(root_path, url_mime, url_data) if icon_path == None: + logging.info("will now be saving it, since couldn't find existing") # TODO: rewrite the english icon_path = saveAs(geojson_path.parent, url_mime) # TODO: handle cancel save(icon_path, True, url_data) new_url = host + icon_path.relative_to(root_path).as_posix() # TODO: make sure it is underneath logging.info("new URL: `{}'".format(new_url)) - # TODO: handle replacing url in the GeoJSON: do a search and replace of the icon url as to preserve formatting of file (check it can find it at the beginning, to present a warning to the user (that it wont be able to replace)). (yes I know this method could cause issues normally, because escaped chars, but data URLs shouldn't have that) + + geojson_binary_handle = open(geojson_path, "rb") + geojson_binary = geojson_binary_handle.read() + geojson_binary_new = geojson_binary.replace(bytes(url, encoding="utf8"), bytes(new_url, encoding="utf8")) # IMPROVEMENT: utf8 should be safe, though really you would want it following the input GeoJSON's detected encoding (I now force utf8, so this is invalid). + geojson_binary_handle.close() + save(geojson_path, True, geojson_binary_new) def main(): logging.getLogger().setLevel(logging.DEBUG) @@ -135,7 +144,7 @@ def main(): # TODO: handle more than one GeoJSON (should take in a folder too (rglob that), and multiple files, or a mix of both) # OPTIMIZE: use a cache of hashes if doing multiple files - # TODO: posix interface with these options: --confirm-all --no-confirm-overwrite --only-existing + log level stuff + # TODO: posix interface with these options: --confirm-all --no-confirm-overwrite --only-existing --no-write (for testing) + log-level stuff # TODO: maybe duplicate the old version's interface as much as possible? if __name__ == "__main__": From 2159e0becddec96f53abd57401160f7c01a1c264 Mon Sep 17 00:00:00 2001 From: Intrinsical Date: Sat, 29 Jun 2024 11:30:04 -0400 Subject: [PATCH 09/20] add note --- scripts/unembed_icons.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/unembed_icons.py b/scripts/unembed_icons.py index 6d04db65a..6a023c920 100644 --- a/scripts/unembed_icons.py +++ b/scripts/unembed_icons.py @@ -1,10 +1,11 @@ +# NOTE: do NOT import any modules outside the standard library import re from base64 import b64decode import mimetypes from pathlib import Path import json import logging -import tkinter.filedialog as fd +import tkinter.filedialog as fd # TODO: handle if not present host = "https://osmlab.github.io/editor-layer-index/" From 5af58c39f48440785bb95b83dfbaa273181cf809 Mon Sep 17 00:00:00 2001 From: Intrinsical Date: Sat, 29 Jun 2024 11:33:54 -0400 Subject: [PATCH 10/20] add another note --- scripts/unembed_icons.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/unembed_icons.py b/scripts/unembed_icons.py index 6a023c920..fe614abf4 100644 --- a/scripts/unembed_icons.py +++ b/scripts/unembed_icons.py @@ -131,6 +131,7 @@ def single(geojson_path): new_url = host + icon_path.relative_to(root_path).as_posix() # TODO: make sure it is underneath logging.info("new URL: `{}'".format(new_url)) + # NOTE: It is done this way (binary search and replace) to preserve the formatting of the file. Yes I know this could cause issues normally, because escaped characters and such, but data URLs shouldn't have that. geojson_binary_handle = open(geojson_path, "rb") geojson_binary = geojson_binary_handle.read() geojson_binary_new = geojson_binary.replace(bytes(url, encoding="utf8"), bytes(new_url, encoding="utf8")) # IMPROVEMENT: utf8 should be safe, though really you would want it following the input GeoJSON's detected encoding (I now force utf8, so this is invalid). From 1c2ee1378ffa2cf807c9657bd0f2bf19f3517b14 Mon Sep 17 00:00:00 2001 From: Intrinsical Date: Sat, 29 Jun 2024 11:47:28 -0400 Subject: [PATCH 11/20] extra comments --- scripts/unembed_icons.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/scripts/unembed_icons.py b/scripts/unembed_icons.py index fe614abf4..c32125a80 100644 --- a/scripts/unembed_icons.py +++ b/scripts/unembed_icons.py @@ -1,4 +1,4 @@ -# NOTE: do NOT import any modules outside the standard library +# NOTE: please do not import any modules outside of the standard library import re from base64 import b64decode import mimetypes @@ -146,9 +146,11 @@ def main(): # TODO: handle more than one GeoJSON (should take in a folder too (rglob that), and multiple files, or a mix of both) # OPTIMIZE: use a cache of hashes if doing multiple files - # TODO: posix interface with these options: --confirm-all --no-confirm-overwrite --only-existing --no-write (for testing) + log-level stuff + # TODO: posix interface with these options: --confirm-all --no-confirm-overwrite --only-existing --no-write (for testing) + log-level stuff; specify input file(s) (and possible output if inputting only one geojson) + force cli file selectors + force save (i.e. skip checking for existing icon files) # TODO: maybe duplicate the old version's interface as much as possible? if __name__ == "__main__": root_path = Path(__file__).parents[1] # get repository's root_path - main() \ No newline at end of file + main() + +# NOTE: one of the goals is for it to also be usable as a module \ No newline at end of file From 4af8de167f7b6e46715dbfd71d0fd2781fe0c148 Mon Sep 17 00:00:00 2001 From: Intrinsical Date: Sat, 29 Jun 2024 12:10:56 -0400 Subject: [PATCH 12/20] add comments --- scripts/unembed_icons.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/unembed_icons.py b/scripts/unembed_icons.py index c32125a80..5c46ceb06 100644 --- a/scripts/unembed_icons.py +++ b/scripts/unembed_icons.py @@ -7,7 +7,7 @@ import logging import tkinter.filedialog as fd # TODO: handle if not present -host = "https://osmlab.github.io/editor-layer-index/" +host = "https://osmlab.github.io/editor-layer-index/" # TODO: ideally load this from a common config/etc file root_path = None @@ -139,7 +139,7 @@ def single(geojson_path): save(geojson_path, True, geojson_binary_new) def main(): - logging.getLogger().setLevel(logging.DEBUG) + logging.getLogger().setLevel(logging.DEBUG) # TODO: add color geojson_path = fd.askopenfilename(filetypes=[("GeoJSON", ".geojson")], initialdir=root_path / "sources") single(Path(geojson_path)) From 80af05b81890be3bf3c0ccc6a8ee919fc09551a2 Mon Sep 17 00:00:00 2001 From: Intrinsical Date: Sat, 29 Jun 2024 13:05:44 -0400 Subject: [PATCH 13/20] even more comments --- scripts/unembed_icons.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/unembed_icons.py b/scripts/unembed_icons.py index 5c46ceb06..0ec5951fb 100644 --- a/scripts/unembed_icons.py +++ b/scripts/unembed_icons.py @@ -148,9 +148,11 @@ def main(): # OPTIMIZE: use a cache of hashes if doing multiple files # TODO: posix interface with these options: --confirm-all --no-confirm-overwrite --only-existing --no-write (for testing) + log-level stuff; specify input file(s) (and possible output if inputting only one geojson) + force cli file selectors + force save (i.e. skip checking for existing icon files) # TODO: maybe duplicate the old version's interface as much as possible? + # TODO: write a help text on how to use it and how it operates if __name__ == "__main__": root_path = Path(__file__).parents[1] # get repository's root_path main() -# NOTE: one of the goals is for it to also be usable as a module \ No newline at end of file +# NOTE: one of the goals is for it to also be usable as a module +# TODO: be clear about what file(s) you're now supposed to be selecting \ No newline at end of file From efc7cccd31d8cf38d553f6202a9707521838ca22 Mon Sep 17 00:00:00 2001 From: Intrinsical Date: Sat, 29 Jun 2024 13:49:01 -0400 Subject: [PATCH 14/20] edit & add comments --- scripts/unembed_icons.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/scripts/unembed_icons.py b/scripts/unembed_icons.py index 0ec5951fb..533727bbe 100644 --- a/scripts/unembed_icons.py +++ b/scripts/unembed_icons.py @@ -51,7 +51,7 @@ def findFile(parent_path, mime, data): # returns: Path or None upon none found if len(extensions) == 1: glob = "*{}".format(extensions[0]) elif len(extensions) > 1: - glob = "*[{}]".format("][".join(extensions)) # IMPROVEMENT: separate out the dot, instead of having all. + glob = "*[{}]".format("][".join(extensions)) # IMPROVEMENT: separate out the dot, instead of having it in every []. else: logging.warning("invalid mime-type") return None # TODO: remove when `TODO: force only match files (not including directories)' is done @@ -99,7 +99,7 @@ def saveAs(directory, mime): def save(path, binary, data): # TODO: confirm with user first, and show if it will be replacing something, their relative sizes, plus of course honor command line arguments - mode = None # TODO: maybe just require input to be binary, actually? + mode = None # TODO: maybe just require input to be binary, actually? or rewrite this functionality general, maybe not as a function at all? if binary == True: mode = "wb" mode_str = "bytes" @@ -132,6 +132,7 @@ def single(geojson_path): logging.info("new URL: `{}'".format(new_url)) # NOTE: It is done this way (binary search and replace) to preserve the formatting of the file. Yes I know this could cause issues normally, because escaped characters and such, but data URLs shouldn't have that. + # TODO: gracefully continue if there's an error. alternatively, perform a traditional json dumps (with an argument to disable it). geojson_binary_handle = open(geojson_path, "rb") geojson_binary = geojson_binary_handle.read() geojson_binary_new = geojson_binary.replace(bytes(url, encoding="utf8"), bytes(new_url, encoding="utf8")) # IMPROVEMENT: utf8 should be safe, though really you would want it following the input GeoJSON's detected encoding (I now force utf8, so this is invalid). @@ -146,7 +147,7 @@ def main(): # TODO: handle more than one GeoJSON (should take in a folder too (rglob that), and multiple files, or a mix of both) # OPTIMIZE: use a cache of hashes if doing multiple files - # TODO: posix interface with these options: --confirm-all --no-confirm-overwrite --only-existing --no-write (for testing) + log-level stuff; specify input file(s) (and possible output if inputting only one geojson) + force cli file selectors + force save (i.e. skip checking for existing icon files) + # TODO: posix interface with these options: --confirm-all --no-confirm-overwrite --only-existing --no-write (for testing) + log-level stuff; specify input file(s) (and possible output if inputting only one geojson) + force cli file selectors + force save (i.e. skip checking for existing icon files) + override root_path # TODO: maybe duplicate the old version's interface as much as possible? # TODO: write a help text on how to use it and how it operates @@ -155,4 +156,4 @@ def main(): main() # NOTE: one of the goals is for it to also be usable as a module -# TODO: be clear about what file(s) you're now supposed to be selecting \ No newline at end of file +# TODO: be clear about what file(s) you're now supposed to be selecting From 5166fc2f31b391381b6541e4ed8951183a2e61f6 Mon Sep 17 00:00:00 2001 From: Intrinsical Date: Sun, 30 Jun 2024 07:29:07 -0400 Subject: [PATCH 15/20] ignore directories and hidden files when searching for icons + support invalid mime types --- scripts/unembed_icons.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/scripts/unembed_icons.py b/scripts/unembed_icons.py index 533727bbe..11cd9a62c 100644 --- a/scripts/unembed_icons.py +++ b/scripts/unembed_icons.py @@ -54,12 +54,17 @@ def findFile(parent_path, mime, data): # returns: Path or None upon none found glob = "*[{}]".format("][".join(extensions)) # IMPROVEMENT: separate out the dot, instead of having it in every []. else: logging.warning("invalid mime-type") - return None # TODO: remove when `TODO: force only match files (not including directories)' is done glob = "*" logging.info("walking `{}' with glob: `{}'".format(parent_path, glob)) check_count = 0 - for file_path in parent_path.rglob(glob): # TODO: ignore hidden files (especially .git) and only match files (i.e. not including directories) + for file_path in parent_path.rglob(glob): + if not file_path.is_file(): + logging.debug("`{}' is a directory, skipping..".format(file_path)) + continue + if re.search(r"/\.", file_path.relative_to(parent_path).as_posix()): + logging.debug("`{}' is hidden, skipping..".format(file_path)) + continue check_count += 1 logging.debug("checking against file: `{}'".format(file_path)) file = None @@ -88,11 +93,15 @@ def findFile(parent_path, mime, data): # returns: Path or None upon none found def saveAs(directory, mime): exts = mimetypes.guess_all_extensions(mime, strict=True) + logging.debug("opening Save As dialog in `{}', with the mime type `{}' which means the extensions: {}".format(directory, mime, exts)) exts_mod = [] for ext in exts: exts_mod.append((ext[1:].upper(), ext)) logging.info("(waiting for user to finish Save As dialog)") # TODO: rewrite the english - output_path = Path(fd.asksaveasfilename(filetypes=exts_mod, defaultextension=exts_mod[0], initialdir=directory)) + if len(exts_mod) > 0: + output_path = Path(fd.asksaveasfilename(filetypes=exts_mod, defaultextension=exts_mod[0], initialdir=directory)) + else: + output_path = Path(fd.asksaveasfilename(initialdir=directory)) # TODO: backup input for terminal interfaces (dumb input(); check if parent is at least valid; relative to repo root_path; list recommended extensions; make sure the import failure is dealt with) return output_path From 9daf9af083674de0fb3e0dd21e145dbd8d8454e0 Mon Sep 17 00:00:00 2001 From: Intrinsical Date: Sun, 30 Jun 2024 07:51:28 -0400 Subject: [PATCH 16/20] handle canceling of Selection and Save As dialogs --- scripts/unembed_icons.py | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/scripts/unembed_icons.py b/scripts/unembed_icons.py index 11cd9a62c..9c50573c6 100644 --- a/scripts/unembed_icons.py +++ b/scripts/unembed_icons.py @@ -99,14 +99,19 @@ def saveAs(directory, mime): exts_mod.append((ext[1:].upper(), ext)) logging.info("(waiting for user to finish Save As dialog)") # TODO: rewrite the english if len(exts_mod) > 0: - output_path = Path(fd.asksaveasfilename(filetypes=exts_mod, defaultextension=exts_mod[0], initialdir=directory)) + output_path = fd.asksaveasfilename(filetypes=exts_mod, defaultextension=exts_mod[0], initialdir=directory) else: - output_path = Path(fd.asksaveasfilename(initialdir=directory)) + output_path = fd.asksaveasfilename(initialdir=directory) # TODO: backup input for terminal interfaces (dumb input(); check if parent is at least valid; relative to repo root_path; list recommended extensions; make sure the import failure is dealt with) - return output_path + if len(output_path) <= 0: + return None + return Path(output_path) def save(path, binary, data): + if path == None: + logging.warning("not saving, as None passed as path") + return None # TODO: confirm with user first, and show if it will be replacing something, their relative sizes, plus of course honor command line arguments mode = None # TODO: maybe just require input to be binary, actually? or rewrite this functionality general, maybe not as a function at all? if binary == True: @@ -135,7 +140,10 @@ def single(geojson_path): icon_path = findFile(root_path, url_mime, url_data) if icon_path == None: logging.info("will now be saving it, since couldn't find existing") # TODO: rewrite the english - icon_path = saveAs(geojson_path.parent, url_mime) # TODO: handle cancel + icon_path = saveAs(geojson_path.parent, url_mime) + if icon_path == None: + logging.warning("canceled saving of icon, and subsequent modification of the GeoJSON, because not output path was selected") + return save(icon_path, True, url_data) new_url = host + icon_path.relative_to(root_path).as_posix() # TODO: make sure it is underneath logging.info("new URL: `{}'".format(new_url)) @@ -152,6 +160,11 @@ def main(): logging.getLogger().setLevel(logging.DEBUG) # TODO: add color geojson_path = fd.askopenfilename(filetypes=[("GeoJSON", ".geojson")], initialdir=root_path / "sources") + + if len(geojson_path) <= 0: + logging.info("exited because nothing selected for input") + return + single(Path(geojson_path)) # TODO: handle more than one GeoJSON (should take in a folder too (rglob that), and multiple files, or a mix of both) From b4f0d51e299d5348d07a45adba7d3cb9f18eac59 Mon Sep 17 00:00:00 2001 From: Intrinsical Date: Sun, 30 Jun 2024 07:56:24 -0400 Subject: [PATCH 17/20] gracefully handle if getData(url) returns None --- scripts/unembed_icons.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/scripts/unembed_icons.py b/scripts/unembed_icons.py index 9c50573c6..11d3fa010 100644 --- a/scripts/unembed_icons.py +++ b/scripts/unembed_icons.py @@ -135,7 +135,12 @@ def single(geojson_path): url = geojson["properties"]["icon"] - url_mime, url_data = getData(url) # TODO: gracefully quit if it returns None + url_all = getData(url) + + if url_all == None: + return None + + url_mime, url_data = url_all icon_path = findFile(root_path, url_mime, url_data) if icon_path == None: From c59f546a66cbe4b3f48c8e3dabca3dc2468f9236 Mon Sep 17 00:00:00 2001 From: Intrinsical Date: Sun, 30 Jun 2024 08:30:25 -0400 Subject: [PATCH 18/20] confirm overwrites (now in a broken state) --- scripts/unembed_icons.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/scripts/unembed_icons.py b/scripts/unembed_icons.py index 11d3fa010..5e82d05b0 100644 --- a/scripts/unembed_icons.py +++ b/scripts/unembed_icons.py @@ -108,10 +108,26 @@ def saveAs(directory, mime): return None return Path(output_path) +def confirm(string): + # TODO: honor command line arguments + answer = "" + while answer not in ["y", "n"]: # TODO: inform user when invalid input + answer = input("{} [Y/n]: ".format(string)).lower() + return answer == "y" + def save(path, binary, data): if path == None: logging.warning("not saving, as None passed as path") return None + if path.is_dir(): + logging.warning("not saving, as directory passed as path") + return None + if path.exists(): + if not confirm("would you like to overwrite `{}'?".format(path)): + logging.warning("not saving, as file exists, and got confirmation that it is not okay to overwrite") # TODO: rewrite the english + return None + logging.warning("saving, file exists, but got confirmation that it is okay to overwrite") # TODO: rewrite the english + # TODO: confirm with user first, and show if it will be replacing something, their relative sizes, plus of course honor command line arguments mode = None # TODO: maybe just require input to be binary, actually? or rewrite this functionality general, maybe not as a function at all? if binary == True: @@ -146,10 +162,9 @@ def single(geojson_path): if icon_path == None: logging.info("will now be saving it, since couldn't find existing") # TODO: rewrite the english icon_path = saveAs(geojson_path.parent, url_mime) - if icon_path == None: - logging.warning("canceled saving of icon, and subsequent modification of the GeoJSON, because not output path was selected") + if save(icon_path, True, url_data) == None: + logging.warning("canceled saving of icon, and subsequent modification of the GeoJSON, because no output path was selected, or writing was canceled") return - save(icon_path, True, url_data) new_url = host + icon_path.relative_to(root_path).as_posix() # TODO: make sure it is underneath logging.info("new URL: `{}'".format(new_url)) From 449805c48a215dd37bdd794d246344b017be8043 Mon Sep 17 00:00:00 2001 From: Intrinsical Date: Sun, 30 Jun 2024 09:00:40 -0400 Subject: [PATCH 19/20] return bools --- scripts/unembed_icons.py | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/scripts/unembed_icons.py b/scripts/unembed_icons.py index 5e82d05b0..4361d6da8 100644 --- a/scripts/unembed_icons.py +++ b/scripts/unembed_icons.py @@ -11,7 +11,7 @@ root_path = None -def getData(url): # returns: [mime, data] or None upon error +def getData(url): # returns: [mime, data] or False upon error data_re = re.search(r"^data:(.*);base64,(.*)$", url) if data_re: mime = data_re.group(1) @@ -21,7 +21,7 @@ def getData(url): # returns: [mime, data] or None upon error logging.error("unsupported data-URL variation") else: logging.error("URL isn't a data-URL") - return None + return False def decodeLines(data): data_text = None @@ -34,9 +34,9 @@ def decodeLines(data): return data_text.splitlines() else: logging.debug("data is binary") - return None + return False -def findFile(parent_path, mime, data): # returns: Path or None upon none found +def findFile(parent_path, mime, data): # returns: Path or False upon none found # NOTE: all the extra stuff with text data is to compensate for git changing newlines data_size = len(data) @@ -89,7 +89,7 @@ def findFile(parent_path, mime, data): # returns: Path or None upon none found return file_path break logging.warning("no match found in {} checked files".format(check_count)) - return None + return False def saveAs(directory, mime): exts = mimetypes.guess_all_extensions(mime, strict=True) @@ -105,7 +105,7 @@ def saveAs(directory, mime): # TODO: backup input for terminal interfaces (dumb input(); check if parent is at least valid; relative to repo root_path; list recommended extensions; make sure the import failure is dealt with) if len(output_path) <= 0: - return None + return False return Path(output_path) def confirm(string): @@ -116,16 +116,16 @@ def confirm(string): return answer == "y" def save(path, binary, data): - if path == None: - logging.warning("not saving, as None passed as path") - return None + if path == False: + logging.warning("not saving, as False passed as path") + return False if path.is_dir(): logging.warning("not saving, as directory passed as path") - return None + return False if path.exists(): if not confirm("would you like to overwrite `{}'?".format(path)): logging.warning("not saving, as file exists, and got confirmation that it is not okay to overwrite") # TODO: rewrite the english - return None + return False logging.warning("saving, file exists, but got confirmation that it is okay to overwrite") # TODO: rewrite the english # TODO: confirm with user first, and show if it will be replacing something, their relative sizes, plus of course honor command line arguments @@ -153,16 +153,16 @@ def single(geojson_path): url_all = getData(url) - if url_all == None: - return None + if url_all == False: + return False url_mime, url_data = url_all icon_path = findFile(root_path, url_mime, url_data) - if icon_path == None: + if icon_path == False: logging.info("will now be saving it, since couldn't find existing") # TODO: rewrite the english icon_path = saveAs(geojson_path.parent, url_mime) - if save(icon_path, True, url_data) == None: + if save(icon_path, True, url_data) == False: logging.warning("canceled saving of icon, and subsequent modification of the GeoJSON, because no output path was selected, or writing was canceled") return new_url = host + icon_path.relative_to(root_path).as_posix() # TODO: make sure it is underneath @@ -199,3 +199,4 @@ def main(): # NOTE: one of the goals is for it to also be usable as a module # TODO: be clear about what file(s) you're now supposed to be selecting +# TODO: use exceptions instead of bools From eed7cb7505016dd3839771696faa1473e19184c9 Mon Sep 17 00:00:00 2001 From: Intrinsical Date: Sun, 30 Jun 2024 09:22:05 -0400 Subject: [PATCH 20/20] more bools (has been in a working state since last commit) --- scripts/unembed_icons.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/unembed_icons.py b/scripts/unembed_icons.py index 4361d6da8..65855ba78 100644 --- a/scripts/unembed_icons.py +++ b/scripts/unembed_icons.py @@ -164,7 +164,7 @@ def single(geojson_path): icon_path = saveAs(geojson_path.parent, url_mime) if save(icon_path, True, url_data) == False: logging.warning("canceled saving of icon, and subsequent modification of the GeoJSON, because no output path was selected, or writing was canceled") - return + return False new_url = host + icon_path.relative_to(root_path).as_posix() # TODO: make sure it is underneath logging.info("new URL: `{}'".format(new_url)) @@ -174,7 +174,7 @@ def single(geojson_path): geojson_binary = geojson_binary_handle.read() geojson_binary_new = geojson_binary.replace(bytes(url, encoding="utf8"), bytes(new_url, encoding="utf8")) # IMPROVEMENT: utf8 should be safe, though really you would want it following the input GeoJSON's detected encoding (I now force utf8, so this is invalid). geojson_binary_handle.close() - save(geojson_path, True, geojson_binary_new) + save(geojson_path, True, geojson_binary_new) # TODO: maybe return result of this def main(): logging.getLogger().setLevel(logging.DEBUG) # TODO: add color @@ -183,7 +183,7 @@ def main(): if len(geojson_path) <= 0: logging.info("exited because nothing selected for input") - return + return False single(Path(geojson_path))