Skip to content

Commit ace589c

Browse files
authored
Improvements on ms3 transform & convert; bug fixes (#127)
2 parents aa0979a + 6a33678 commit ace589c

File tree

7 files changed

+239
-125
lines changed

7 files changed

+239
-125
lines changed

src/ms3/bs4_parser.py

+23-23
Original file line numberDiff line numberDiff line change
@@ -2683,9 +2683,9 @@ def parse_measures(self):
26832683
event["mc_onset"] = current_position
26842684
chord_info["mc_onset"] = current_position
26852685
if tremolo_type:
2686-
chord_info[
2687-
"tremolo"
2688-
] = f"{tremolo_duration_string}_{tremolo_type}_{tremolo_component}"
2686+
chord_info["tremolo"] = (
2687+
f"{tremolo_duration_string}_{tremolo_type}_{tremolo_component}"
2688+
)
26892689
if tremolo_component in (0, 2):
26902690
# delete 'tremolo_type' which signals that the <Chord> is part of a tremolo
26912691
tremolo_type = None
@@ -2793,7 +2793,9 @@ def safe_update_event(key, value):
27932793
# block
27942794
parent_name = text_tag.parent.name
27952795
text_including_html = text_tag2str(text_tag)
2796-
text_excluding_html = text_tag2str_recursive(text_tag)
2796+
text_excluding_html = text_tag2str_recursive(
2797+
text_tag, join_char=" "
2798+
)
27972799
if parent_name == "Fingering":
27982800
# fingerings occur within <Note> tags, if they are to be extracted, they should go
27992801
# into the notes table
@@ -3304,14 +3306,14 @@ def replace_first_harmony(self, first_harmony_values: Dict[str, str]):
33043306
)
33053307

33063308
@overload
3307-
def get_onset_zero_harmony(self, return_layer: Literal[False]) -> Optional[bs4.Tag]:
3308-
...
3309+
def get_onset_zero_harmony(
3310+
self, return_layer: Literal[False]
3311+
) -> Optional[bs4.Tag]: ...
33093312

33103313
@overload
33113314
def get_onset_zero_harmony(
33123315
self, return_layer: Literal[True]
3313-
) -> Tuple[Optional[bs4.Tag], int, int]:
3314-
...
3316+
) -> Tuple[Optional[bs4.Tag], int, int]: ...
33153317

33163318
def get_onset_zero_harmony(self, return_layer: bool = False) -> Optional[bs4.Tag]:
33173319
"""Iterate through all tags at mc_onset 0 for all notational (staff, voice) layers and return the first
@@ -3618,10 +3620,10 @@ def __repr__(self):
36183620
),
36193621
index_col=0,
36203622
)
3621-
INSTRUMENT_DEFAULTS[
3622-
["controllers", "ChannelName", "ChannelValue"]
3623-
] = INSTRUMENT_DEFAULTS[["controllers", "ChannelName", "ChannelValue"]].apply(
3624-
lambda k: list(map(lambda j: eval(j) if j is not None else None, k))
3623+
INSTRUMENT_DEFAULTS[["controllers", "ChannelName", "ChannelValue"]] = (
3624+
INSTRUMENT_DEFAULTS[["controllers", "ChannelName", "ChannelValue"]].apply(
3625+
lambda k: list(map(lambda j: eval(j) if j is not None else None, k))
3626+
)
36253627
)
36263628
for int_column in ["keysig", "useDrumset"]:
36273629
INSTRUMENT_DEFAULTS[int_column] = INSTRUMENT_DEFAULTS[int_column].astype("Int64")
@@ -3875,9 +3877,9 @@ def modify_list_tags(self, changed_part, found, value):
38753877
:param value: new values to set
38763878
:return: corrected list of parts of the same length as value list
38773879
"""
3878-
l_found, l_value = 1 if found is None else len(
3879-
found
3880-
), 1 if value is None else len(value)
3880+
l_found, l_value = 1 if found is None else len(found), (
3881+
1 if value is None else len(value)
3882+
)
38813883
if l_found < l_value:
38823884
for i in range(l_value - l_found):
38833885
new_tag = self.soup.new_tag("Channel")
@@ -4796,15 +4798,13 @@ def tag2text(tag: bs4.Tag) -> Tuple[str, str]:
47964798
@overload
47974799
def find_tag_get_string(
47984800
parent_tag: bs4.Tag, tag_to_find: str, fallback: Literal[None]
4799-
) -> Tuple[Optional[bs4.Tag], Optional[str]]:
4800-
...
4801+
) -> Tuple[Optional[bs4.Tag], Optional[str]]: ...
48014802

48024803

48034804
@overload
48044805
def find_tag_get_string(
48054806
parent_tag: bs4.Tag, tag_to_find: str, fallback: Hashable
4806-
) -> Tuple[Optional[bs4.Tag], Optional[Hashable]]:
4807-
...
4807+
) -> Tuple[Optional[bs4.Tag], Optional[Hashable]]: ...
48084808

48094809

48104810
def find_tag_get_string(
@@ -4885,13 +4885,13 @@ def process_thoroughbass(
48854885
@overload
48864886
def get_row_at_quarterbeat(
48874887
df: pd.DataFrame, quarterbeat: Literal[None]
4888-
) -> pd.DataFrame:
4889-
...
4888+
) -> pd.DataFrame: ...
48904889

48914890

48924891
@overload
4893-
def get_row_at_quarterbeat(df: pd.DataFrame, quarterbeat: float) -> Optional[pd.Series]:
4894-
...
4892+
def get_row_at_quarterbeat(
4893+
df: pd.DataFrame, quarterbeat: float
4894+
) -> Optional[pd.Series]: ...
48954895

48964896

48974897
def get_row_at_quarterbeat(

src/ms3/cli.py

+62-35
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from ms3 import (
1717
Parse,
1818
compute_path_from_file,
19+
convert_from_metadata_tsv,
1920
get_git_repo,
2021
get_git_version_info,
2122
make_coloring_reports_and_warnings,
@@ -164,7 +165,6 @@ def convert_cmd(args):
164165
# '\nTARGET_DIR: ' + target
165166
update_logger = get_logger("ms3.convert", level=args.level)
166167
for argument_name, argument, default in (
167-
("-a/--all", args.all, False),
168168
("-e/--exclude", args.exclude, None),
169169
("-f/--folders", args.folders, None),
170170
("--reviewed", args.reviewed, False),
@@ -175,22 +175,38 @@ def convert_cmd(args):
175175
update_logger.info(
176176
f"Argument '{argument_name}' is currently being ignored."
177177
)
178-
out_dir = os.getcwd() if args.out is None else resolve_dir(args.out)
178+
out_dir = "." if args.out is None else args.out
179179
ms = "auto" if args.musescore is None else args.musescore
180-
convert_folder(
181-
directory=args.dir,
182-
file_paths=args.files,
183-
target_dir=out_dir,
184-
extensions=args.extensions,
185-
target_extension=args.format,
186-
regex=args.include,
187-
suffix=args.suffix,
188-
recursive=not args.nonrecursive,
189-
ms=ms,
190-
overwrite=args.safe,
191-
parallel=not args.iterative,
192-
logger=update_logger,
193-
)
180+
if args.all:
181+
convert_folder(
182+
directory=args.dir,
183+
file_paths=args.files,
184+
target_dir=out_dir,
185+
extensions=args.extensions,
186+
target_extension=args.format,
187+
regex=args.include,
188+
suffix=args.suffix,
189+
recursive=not args.nonrecursive,
190+
ms=ms,
191+
overwrite=args.safe,
192+
parallel=not args.iterative,
193+
logger=update_logger,
194+
)
195+
else:
196+
convert_from_metadata_tsv(
197+
directory=args.dir,
198+
file_paths=args.files,
199+
target_dir=out_dir,
200+
extensions=args.extensions,
201+
target_extension=args.format,
202+
regex=args.include,
203+
suffix=args.suffix,
204+
recursive=not args.nonrecursive,
205+
ms=ms,
206+
overwrite=args.safe,
207+
parallel=not args.iterative,
208+
logger=update_logger,
209+
)
194210

195211

196212
def empty(args, parse_obj: Optional[Parse] = None):
@@ -290,12 +306,15 @@ def transform_cmd(args):
290306
if repo is None:
291307
version_info = None
292308
elif repo.is_dirty():
293-
print(
294-
"The repository is dirty. Please commit or stash your changes before running ms3 transform. This is "
295-
"important because the version information in the JSON descriptor(s) needs to be consistent with the "
296-
"repository state."
297-
)
298-
return
309+
if args.dirty:
310+
version_info = None
311+
else:
312+
print(
313+
"The repository is dirty. Please commit or stash your changes before running ms3 transform. This is "
314+
"important because the version information in the JSON descriptor(s) needs to be consistent with the "
315+
"repository state. To ignore this warning, add the --dirty flag to the command."
316+
)
317+
return
299318
else:
300319
version_info = get_git_version_info(repo=repo)
301320
parse_obj = make_parse_obj(args, parse_tsv=True, facets=params)
@@ -339,18 +358,22 @@ def update_cmd(args, parse_obj: Optional[Parse] = None):
339358
print("Nothing to do.")
340359

341360

342-
def check_and_create(d):
361+
def check_and_create(d, resolve=True):
343362
"""Turn input into an existing, absolute directory path."""
344363
if not os.path.isdir(d):
345-
d = resolve_dir(os.path.join(os.getcwd(), d))
346-
if not os.path.isdir(d):
347-
if input(d + " does not exist. Create? (y|n)") == "y":
348-
os.mkdir(d)
364+
d_resolved = resolve_dir(os.path.join(os.getcwd(), d))
365+
if not os.path.isdir(d_resolved):
366+
if input(d_resolved + " does not exist. Create? (y|n)") == "y":
367+
os.mkdir(d_resolved)
349368
else:
350369
raise argparse.ArgumentTypeError(
351370
d + " needs to be an existing directory"
352371
)
353-
return resolve_dir(d)
372+
return d_resolved if resolve else d
373+
374+
375+
def check_and_create_unresolved(d):
376+
return check_and_create(d, resolve=False)
354377

355378

356379
def check_dir(d):
@@ -381,13 +404,11 @@ def precommit_cmd(
381404

382405

383406
@overload
384-
def review_cmd(args, parse_obj, wrapped_by_precommit: Literal[False]) -> None:
385-
...
407+
def review_cmd(args, parse_obj, wrapped_by_precommit: Literal[False]) -> None: ...
386408

387409

388410
@overload
389-
def review_cmd(args, parse_obj, wrapped_by_precommit: Literal[True]) -> bool:
390-
...
411+
def review_cmd(args, parse_obj, wrapped_by_precommit: Literal[True]) -> bool: ...
391412

392413

393414
def review_cmd(
@@ -627,8 +648,9 @@ def get_arg_parser():
627648
"-o",
628649
"--out",
629650
metavar="OUT_DIR",
630-
type=check_and_create,
631-
help="Output directory.",
651+
type=check_and_create_unresolved,
652+
help="Output directory. For conversion, an absolute path will result in a copy of the original sub-folder "
653+
"structure, whereas a relative path will contain all converted files next to each other.",
632654
)
633655
parse_args.add_argument(
634656
"-n",
@@ -1006,7 +1028,7 @@ def get_arg_parser():
10061028
"--format",
10071029
default="mscx",
10081030
help="Output format of converted files. Defaults to mscx. Other options are "
1009-
"{png, svg, pdf, mscz, wav, mp3, flac, ogg, xml, mxl, mid}",
1031+
"{png, svg, pdf, mscz, wav, mp3, flac, ogg, musicxml, mxl, mid}",
10101032
)
10111033
convert_parser.add_argument(
10121034
"--extensions",
@@ -1196,6 +1218,11 @@ def get_arg_parser():
11961218
action="store_true",
11971219
help="Store the transformed files as uncompressed TSVs rather than writing them into a ZIP file.",
11981220
)
1221+
transform_parser.add_argument(
1222+
"--dirty",
1223+
action="store_true",
1224+
help="Allows to override the 'This repository is dirty' blocker.",
1225+
)
11991226
transform_parser.set_defaults(func=transform_cmd)
12001227

12011228
update_parser = subparsers.add_parser(

src/ms3/expand_dcml.py

+11-8
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
1-
""" This is the same code as in the corpora repo as copied on September 24, 2020
1+
"""This is the same code as in the corpora repo as copied on September 24, 2020
22
and then adapted.
33
"""
4+
45
import re
56
import sys
67
from collections import defaultdict
@@ -709,7 +710,7 @@ def make_comparison():
709710
if n_beginnings == n_endings_cleaned:
710711
mismatch_maybe_due_to_voltas = True
711712
logger.info(
712-
"One or several pedal points have there endings in a first/second ending scenario. "
713+
"One or several pedal points have their endings in a first/second ending scenario. "
713714
"So far I can only correctly propagate the pedal note into first endings, not the others."
714715
)
715716
if mismatch_maybe_due_to_voltas is False:
@@ -746,12 +747,14 @@ def make_comparison():
746747
# if the localkey changes during the pedal point, the reference changes and the Roman numeral indicating
747748
# the pedal note needs to be adapted
748749
key2pedal = {
749-
key: ped
750-
if key == first_localkey
751-
else abs2rel_key(
752-
rel2abs_key(ped, first_localkey, global_minor, logger=logger),
753-
key,
754-
global_minor,
750+
key: (
751+
ped
752+
if key == first_localkey
753+
else abs2rel_key(
754+
rel2abs_key(ped, first_localkey, global_minor, logger=logger),
755+
key,
756+
global_minor,
757+
)
755758
)
756759
for key in localkeys.unique()
757760
}

src/ms3/operations.py

+3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""This module contains the functions called by the ms3 commandline interface, which is why they may use
22
print() instead of log messages from time to time.
33
"""
4+
45
import os
56
from typing import Dict, Iterator, List, Literal, Optional, Tuple, Union
67

@@ -25,6 +26,7 @@
2526
store_dataframe_resource,
2627
store_dataframes_package,
2728
tpc2scale_degree,
29+
update_relative_paths_with_corpus_dirs,
2830
write_tsv,
2931
)
3032
from ms3.utils.constants import LATEST_MUSESCORE_VERSION
@@ -457,6 +459,7 @@ def _transform(
457459
# get concatenated dataframe:
458460
if facet == "metadata":
459461
df = ms3_object.metadata()
462+
update_relative_paths_with_corpus_dirs(df)
460463
else:
461464
if obj_is_corpus:
462465
df = ms3_object.get_facet(

src/ms3/piece.py

+5-6
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
get_git_version_info,
4444
get_musescore,
4545
infer_tsv_type,
46+
literal_type2tuple,
4647
load_tsv,
4748
make_file_path,
4849
metadata2series,
@@ -212,17 +213,15 @@ def score_metadata(
212213
view_name: Optional[str],
213214
choose: Literal["auto", "ask"],
214215
as_dict: Literal[False],
215-
) -> pd.Series:
216-
...
216+
) -> pd.Series: ...
217217

218218
@overload
219219
def score_metadata(
220220
self,
221221
view_name: Optional[str],
222222
choose: Literal["auto", "ask"],
223223
as_dict: Literal[True],
224-
) -> dict:
225-
...
224+
) -> dict: ...
226225

227226
def score_metadata(
228227
self,
@@ -1331,8 +1330,8 @@ def get_parsed_tsv(
13311330
) -> FileDataframeTupleMaybe:
13321331
facets = argument_and_literal_type2list(facet, TSVtype, logger=self.logger)
13331332
assert (
1334-
len(facet) == 1
1335-
), f"Pass exactly one valid TSV type {TSVtype.__args__} or use _.get_parsed_tsvs()"
1333+
len(facets) == 1
1334+
), f"Pass exactly one valid TSV type {literal_type2tuple(TSVtype)} or use _.get_parsed_tsvs()\nGot: {facets}"
13361335
facet = facets[0]
13371336
return self.get_parsed(facet, view_name=view_name, choose=choose)
13381337

0 commit comments

Comments
 (0)