diff --git a/parse_info_functions.py b/parse_info_functions.py index 6dd106d..d9c9044 100644 --- a/parse_info_functions.py +++ b/parse_info_functions.py @@ -4,23 +4,26 @@ def collectYearManuscriptCode(file_name, output): - space_sections = file_name.split(" ") numbers4digits = re.findall(r"[0-9]{4}", file_name) numbersAllDigits = re.findall(r"[0-9]{3,9}", file_name) - posYear = int(numbers4digits[0]) - if ( - numbers4digits - and len(numbers4digits) >= 1 - and posYear > 0 - and posYear < 2050 - and space_sections[1] == numbers4digits[0] - ): - output["year"] = numbers4digits[0] - additionalNums = [x for x in numbersAllDigits if x != numbers4digits[0]] - if additionalNums: - output["number_of_volumes"] = additionalNums[0] - else: + secondItem = True + + if numbers4digits: + # Do I need this check? + if " " in file_name: + space_sections = file_name.split(" ") + secondItem = space_sections[1] == numbers4digits[0] + intYear = int(numbers4digits[0]) + if len(numbers4digits) >= 1 and intYear > 0 and intYear < 2050 and secondItem: + output["year"] = numbers4digits[0] + print("Update: Year found: " + numbers4digits[0]) + additionalNums = [x for x in numbersAllDigits if x != numbers4digits[0]] + if additionalNums: + output["number_of_volumes"] = additionalNums[0] + print("Update: Manuscript code found") + elif numbersAllDigits: output["number_of_volumes"] = numbersAllDigits[0] + print("Update: Manuscript code found") return output diff --git a/tests/test_parse_info_functions.py b/tests/test_parse_info_functions.py index 078b208..384c383 100644 --- a/tests/test_parse_info_functions.py +++ b/tests/test_parse_info_functions.py @@ -11,24 +11,31 @@ # collectYearManuscriptCode @pytest.mark.parametrize( - "file_name,output,expected", + "file_name,output,expected,numPrints", [ ( "Ammannati 2023 Lupus in fabula - Sulla vera mano di Lupo di Ferrières", {}, {"year": "2023"}, + 1, ), ( "Zurli 1998 Il cod Vindobonensis Palatinus 9401 asterisk dell Anthologia Latina", {}, - {"year": "1998", "number_of_volumes": 9401}, + {"year": "1998", "number_of_volumes": "9401"}, + 2, ), - ("Levitan-DancingEndRope-1985", {}, {"year": 1985}), # fails - ("Les manuscrits de Loup de Ferrières", {}, {}), # fails + ("Levitan-DancingEndRope-1985", {}, {"year": "1985"}, 1), # fails + ("Les manuscrits de Loup de Ferrières", {}, {}, 0), # fails ], ) -def test_collectYearManuscriptCode(file_name, output, expected): - assert collectYearManuscriptCode(file_name, output) == expected +def test_collectYearManuscriptCode(file_name, output, expected, numPrints, capsys): + result = collectYearManuscriptCode(file_name, output) + captured = capsys.readouterr().out + assert result == expected + # Splitting on the 'U' adds an extra empty item to the list for the first occurence + # captured = ['Update: Year foundUpdate: Manuscript code found'] + assert len(captured.split("U")[1:]) == numPrints # getInfoFromFileName @@ -59,6 +66,7 @@ def test_dashes(): ) +# SHhuld the 2 below just be tested by the direct function? def test_multiple_numbers_format(): assert getInfoFromFileName( "Zurli 1998 Il cod Vindobonensis Palatinus 9401 asterisk dell Anthologia Latina" @@ -67,6 +75,7 @@ def test_multiple_numbers_format(): "title": "Il cod Vindobonensis Palatinus", "authors": ["Zurli"], "year": "1998", + "number_of_volumes": "9401", }, 2, )