diff --git a/.DS_Store b/.DS_Store index de7ad30..293e58a 100644 Binary files a/.DS_Store and b/.DS_Store differ diff --git a/.gitignore b/.gitignore index 99b48fd..7072248 100644 --- a/.gitignore +++ b/.gitignore @@ -38,7 +38,7 @@ pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports -htmlcov/ +# htmlcov/ .tox/ .nox/ .coverage @@ -131,5 +131,6 @@ dmypy.json # local files *misc/ -*data/ +data/* *scratch/ +tests/text_extraction_tests_aassumpcao.py diff --git a/process/__init__.py b/queridodiario_toolbox/__init__.py similarity index 100% rename from process/__init__.py rename to queridodiario_toolbox/__init__.py diff --git a/process/etl/__init__.py b/queridodiario_toolbox/etl/__init__.py similarity index 100% rename from process/etl/__init__.py rename to queridodiario_toolbox/etl/__init__.py diff --git a/process/etl/file_transform.py b/queridodiario_toolbox/etl/file_transform.py similarity index 99% rename from process/etl/file_transform.py rename to queridodiario_toolbox/etl/file_transform.py index 0604272..acbb595 100644 --- a/process/etl/file_transform.py +++ b/queridodiario_toolbox/etl/file_transform.py @@ -143,7 +143,7 @@ def is_file_type(filepath: str, file_types: List[str]) -> bool: def write_file_content( - filepath: str, apache_tika_jar: str, metadata: Optional[bool]=None + filepath: str, apache_tika_jar: str, metadata: Optional[bool]=False ) -> str: """ Extract the metadata of the original file using the given Apache diff --git a/process/gazette.py b/queridodiario_toolbox/gazette.py similarity index 87% rename from process/gazette.py rename to queridodiario_toolbox/gazette.py index aecbc40..1dc4913 100644 --- a/process/gazette.py +++ b/queridodiario_toolbox/gazette.py @@ -37,13 +37,14 @@ def __init__( "Either the filepath or content argument must be specified" ) - def extract_content(self, metadata: Optional[bool]=None) -> str: + def extract_content(self, metadata: Optional[bool]=False) -> str: """ Extract gazette content, save to disk, and store filepath in filepath class content """ self.filepath = write_file_content( - self.filepath, self.tika_jar, metadata + filepath=self.filepath, apache_tika_jar=self.tika_jar, + metadata=metadata ) def load_content(self) -> None: @@ -54,4 +55,4 @@ def load_content(self) -> None: with open(self.filepath, 'r') as fp: self.content = json.load(fp) else: - self.content = load_file_content(self.filepath) + self.content = load_file_content(filepath=self.filepath) diff --git a/test.json b/test.json deleted file mode 100644 index 1af4b71..0000000 --- a/test.json +++ /dev/null @@ -1,72 +0,0 @@ - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Hi this is a document created to test the text extraction for the Querido Diário project.
+ + + diff --git a/tests/data/fake_gazette.jpeg b/tests/data/fake_gazette.jpeg new file mode 100644 index 0000000..43dfe43 Binary files /dev/null and b/tests/data/fake_gazette.jpeg differ diff --git a/tests/data/fake_gazette.json b/tests/data/fake_gazette.json new file mode 100644 index 0000000..3911b80 --- /dev/null +++ b/tests/data/fake_gazette.json @@ -0,0 +1 @@ +{"Blue Colorant":"(0.1431, 0.0606, 0.7141)","Blue TRC":"0.0, 0.0000763, 0.0001526, 0.0002289, 0.0003052, 0.0003815, 0.0004578, 0.0005341, 0.0006104, 0.0006867, 0.000763, 0.0008392, 0.0009003, 0.0009766, 0.0010529, 0.0011292, 0.0012055, 0.0012818, 0.0013581, 0.0014343, 0.0015106, 0.0015869, 0.0016632, 0.0017395, 0.0018158, 0.0018921, 0.0019684, 0.0020447, 0.002121, 0.0021973, 0.0022736, 0.0023499, 0.0024262, 0.0025025, 0.0025788, 0.0026551, 0.0027161, 0.0027924, 0.0028687, 0.002945, 0.0030213, 0.0030976, 0.0031739, 0.0032502, 0.0033417, 0.003418, 0.0034943, 0.0035859, 0.0036622, 0.0037537, 0.00383, 0.0039216, 0.0040131, 0.0041047, 0.0041962, 0.0042878, 0.0043793, 0.0044709, 0.0045624, 0.0046693, 0.0047608, 0.0048524, 0.0049592, 0.005066, 0.0051575, 0.0052644, 0.0053712, 0.005478, 0.0055848, 0.0056916, 0.0057984, 0.0059052, 0.0060273, 0.0061341, 0.0062562, 0.006363, 0.0064851, 0.0066072, 0.0067292, 0.0068513, 0.0069734, 0.0070954, 0.0072175, 0.0073396, 0.0074617, 0.007599, 0.0077211, 0.0078584, 0.0079957, 0.0081178, 0.0082551, 0.0083925, 0.0085298, 0.0086671, 0.0088045, 0.008957, 0.0090944, 0.0092317, 0.0093843, 0.0095369, 0.0096742, 0.0098268, 0.0099794, 0.010132, 0.0102846, 0.0104372, 0.0105898, 0.0107576, 0.0109102, 0.0110628, 0.0112306, 0.0113985, 0.0115511, 0.0117189, 0.0118868, 0.0120546, 0.0122225, 0.0124056, 0.0125734, 0.0127413, 0.0129244, 0.0130922, 0.0132753, 0.0134585, 0.0136416, 0.0138247, 0.0140078, 0.0141909, 0.014374, 0.0145571, 0.0147555, 0.0149386, 0.0151369, 0.0153201, 0.0155184, 0.0157168, 0.0159152, 0.0161135, 0.0163119, 0.0165255, 0.0167239, 0.0169223, 0.0171359, 0.0173495, 0.0175479, 0.0177615, 0.0179751, 0.0181888, 0.0184024, 0.018616, 0.0188449, 0.0190585, 0.0192874, 0.019501, 0.0197299, 0.0199588, 0.0201877, 0.0204166, 0.0206455, 0.0208743, 0.0211032, 0.0213474, 0.0215763, 0.0218204, 0.0220645, 0.0222934, 0.0225376, 0.0227817, 0.0230259, 0.0232853, 0.0235294, 0.0237736, 0.024033, 0.0242771, 0.0245365, 0.0247959, 0.0250553, 0.0253147, 0.0255741, 0.0258335, 0.0261082, 0.0263676, 0.026627, 0.0269017, 0.0271763, 0.027451, 0.0277256, 0.0280003, 0.028275, 0.0285496, 0.0288243, 0.0291142, 0.0293889, 0.0296788, 0.0299687, 0.0302586, 0.0305486, 0.0308385, 0.0311284, 0.0314183, 0.0317235, 0.0320134, 0.0323186, 0.0326238, 0.032929, 0.0332341, 0.0335393, 0.0338445, 0.0341497, 0.0344549, 0.0347753, 0.0350805, 0.0354009, 0.0357214, 0.0360418, 0.0363622, 0.0366827, 0.0370031, 0.0373388, 0.0376593, 0.037995, 0.0383154, 0.0386511, 0.0389868, 0.0393225, 0.0396582, 0.0399939, 0.0403449, 0.0406806, 0.0410315, 0.0413825, 0.0417182, 0.0420691, 0.0424201, 0.042771, 0.0431373, 0.0434882, 0.0438392, 0.0442054, 0.0445716, 0.0449226, 0.0452888, 0.045655, 0.0460212, 0.0464027, 0.0467689, 0.0471504, 0.0475166, 0.0478981, 0.0482795, 0.048661, 0.0490425, 0.049424, 0.0498054, 0.0501869, 0.0505837, 0.0509804, 0.0513619, 0.0517586, 0.0521553, 0.0525521, 0.0529488, 0.0533608, 0.0537575, 0.0541695, 0.0545663, 0.0549783, 0.0553902, 0.0558022, 0.0562142, 0.0566262, 0.0570535, 0.0574655, 0.0578927, 0.05832, 0.058732, 0.0591592, 0.0595865, 0.060029, 0.0604562, 0.0608835, 0.061326, 0.0617533, 0.0621958, 0.0626383, 0.0630808, 0.0635233, 0.0639811, 0.0644236, 0.0648661, 0.0653239, 0.0657816, 0.0662394, 0.0666972, 0.067155, 0.0676127, 0.0680705, 0.0685435, 0.0690013, 0.0694743, 0.0699474, 0.0704204, 0.0708934, 0.0713664, 0.0718395, 0.0723278, 0.0728008, 0.0732891, 0.0737774, 0.0742657, 0.0747539, 0.0752422, 0.0757305, 0.0762188, 0.0767224, 0.0772259, 0.0777142, 0.0782177, 0.0787213, 0.0792401, 0.0797436, 0.0802472, 0.080766, 0.0812696, 0.0817884, 0.0823072, 0.082826, 0.0833448, 0.0838636, 0.0843977, 0.0849165, 0.0854505, 0.0859846, 0.0865187, 0.0870527, 0.0875868, 0.0881209, 0.0886549, 0.0892042, 0.0897536, 0.0902876, 0.090837, 0.0913863, 0.0919356, 0.0925002, 0.0930495, 0.0936141, 0.0941634, 0.094728, 0.0952926, 0.0958572, 0.0964218, 0.0970016, 0.0975662, 0.098146, 0.0987106, 0.0992905, 0.0998703, 0.1004501, 0.10103, 0.1016251, 0.1022049, 0.1028, 0.1033799, 0.103975, 0.1045701, 0.1051652, 0.1057755, 0.1063706, 0.106981, 0.1075761, 0.1081865, 0.1087968, 0.1094072, 0.1100175, 0.1106279, 0.1112535, 0.1118639, 0.1124895, 0.1131151, 0.1137407, 0.1143664, 0.114992, 0.1156176, 0.1162585, 0.1168841, 0.117525, 0.1181659, 0.1188067, 0.1194476, 0.1200885, 0.1207446, 0.1213855, 0.1220417, 0.1226978, 0.1233539, 0.1240101, 0.1246662, 0.1253223, 0.1259937, 0.1266499, 0.1273213, 0.1279927, 0.1286641, 0.1293355, 0.1300069, 0.1306935, 0.1313649, 0.1320516, 0.1327382, 0.1334096, 0.1341115, 0.1347982, 0.1354849, 0.1361868, 0.1368734, 0.1375753, 0.1382773, 0.1389792, 0.1396811, 0.140383, 0.1411002, 0.1418021, 0.1425193, 0.1432364, 0.1439536, 0.1446708, 0.145388, 0.1461204, 0.1468376, 0.14757, 0.1483024, 0.1490349, 0.1497673, 0.1504997, 0.1512322, 0.1519799, 0.1527123, 0.15346, 0.1542077, 0.1549554, 0.1557031, 0.1564508, 0.1572137, 0.1579767, 0.1587243, 0.1594873, 0.1602502, 0.1610132, 0.1617914, 0.1625544, 0.1633326, 0.1640955, 0.1648737, 0.1656519, 0.1664302, 0.1672236, 0.1680018, 0.1687953, 0.1695735, 0.170367, 0.1711604, 0.1719539, 0.1727474, 0.1735561, 0.1743496, 0.1751583, 0.175967, 0.1767758, 0.1775845, 0.1783932, 0.1792172, 0.1800259, 0.1808499, 0.1816739, 0.1824826, 0.1833219, 0.1841459, 0.1849699, 0.1858091, 0.1866331, 0.1874723, 0.1883116, 0.1891508, 0.1900053, 0.1908446, 0.1916838, 0.1925383, 0.1933928, 0.1942473, 0.1951019, 0.1959564, 0.1968261, 0.1976806, 0.1985504, 0.1994202, 0.2002899, 0.2011597, 0.2020294, 0.2028992, 0.2037842, 0.2046693, 0.205539, 0.206424, 0.2073243, 0.2082094, 0.2090944, 0.2099947, 0.2108949, 0.21178, 0.2126802, 0.2135958, 0.2144961, 0.2153964, 0.2163119, 0.2172274, 0.2181277, 0.2190585, 0.2199741, 0.2208896, 0.2218051, 0.2227359, 0.2236667, 0.2245975, 0.2255283, 0.2264591, 0.2273899, 0.228336, 0.2292821, 0.2302129, 0.2311589, 0.232105, 0.2330663, 0.2340124, 0.2349737, 0.2359197, 0.2368811, 0.2378424, 0.2388037, 0.239765, 0.2407416, 0.2417029, 0.2426795, 0.2436561, 0.2446326, 0.2456092, 0.2466011, 0.2475776, 0.2485695, 0.249546, 0.2505379, 0.2515297, 0.2525368, 0.2535286, 0.2545357, 0.2555276, 0.2565347, 0.2575418, 0.2585489, 0.259556, 0.2605783, 0.2615854, 0.2626078, 0.2636301, 0.2646525, 0.2656748, 0.2667124, 0.2677348, 0.2687724, 0.26981, 0.2708324, 0.2718853, 0.2729229, 0.2739605, 0.2750134, 0.276051, 0.2771038, 0.2781567, 0.2792248, 0.2802777, 0.2813306, 0.2823987, 0.2834668, 0.284535, 0.2856031, 0.2866712, 0.2877394, 0.2888228, 0.2899062, 0.2909743, 0.2920577, 0.2931563, 0.2942397, 0.2953231, 0.2964218, 0.2975204, 0.2986191, 0.2997177, 0.3008164, 0.301915, 0.3030289, 0.3041428, 0.3052567, 0.3063706, 0.3074846, 0.3085985, 0.3097124, 0.3108415, 0.3119707, 0.3130999, 0.314229, 0.3153582, 0.3165026, 0.3176318, 0.3187762, 0.3199207, 0.3210651, 0.3222095, 0.3233539, 0.3245136, 0.3256733, 0.3268177, 0.3279774, 0.3291371, 0.330312, 0.3314717, 0.3326467, 0.3338216, 0.3349966, 0.3361715, 0.3373465, 0.3385214, 0.3397116, 0.3408865, 0.3420768, 0.343267, 0.3444724, 0.3456626, 0.3468528, 0.3480583, 0.3492638, 0.3504692, 0.3516747, 0.3528801, 0.3541009, 0.3553063, 0.356527, 0.3577478, 0.3589685, 0.3601892, 0.3614252, 0.3626459, 0.3638819, 0.3651179, 0.3663539, 0.3675898, 0.3688411, 0.3700771, 0.3713283, 0.3725795, 0.3738308, 0.375082, 0.3763333, 0.3775998, 0.378851, 0.3801175, 0.381384, 0.3826505, 0.3839322, 0.3851987, 0.3864805, 0.387747, 0.3890288, 0.3903105, 0.3916075, 0.3928893, 0.3941863, 0.3954681, 0.3967651, 0.3980621, 0.3993744, 0.4006714, 0.4019837, 0.4032807, 0.404593, 0.4059052, 0.4072175, 0.4085451, 0.4098573, 0.4111849, 0.4125124, 0.4138399, 0.4151675, 0.416495, 0.4178378, 0.4191806, 0.4205234, 0.4218662, 0.423209, 0.4245518, 0.4259098, 0.4272526, 0.4286107, 0.4299687, 0.4313268, 0.4326848, 0.4340581, 0.4354314, 0.4367895, 0.4381628, 0.4395514, 0.4409247, 0.442298, 0.4436866, 0.4450752, 0.4464637, 0.4478523, 0.4492409, 0.4506447, 0.4520333, 0.4534371, 0.4548409, 0.4562448, 0.4576486, 0.4590677, 0.4604715, 0.4618906, 0.4633097, 0.4647288, 0.4661631, 0.4675822, 0.4690166, 0.4704356, 0.47187, 0.4733043, 0.4747539, 0.4761883, 0.4776379, 0.4790875, 0.4805371, 0.4819867, 0.4834363, 0.4848859, 0.4863508, 0.4878157, 0.4892805, 0.4907454, 0.4922103, 0.4936904, 0.4951553, 0.4966354, 0.4981155, 0.4995956, 0.501091, 0.5025711, 0.5040665, 0.5055467, 0.507042, 0.5085527, 0.5100481, 0.5115435, 0.5130541, 0.5145647, 0.5160754, 0.517586, 0.5190967, 0.5206226, 0.5221485, 0.5236591, 0.525185, 0.5267262, 0.5282521, 0.529778, 0.5313191, 0.5328603, 0.5344015, 0.5359426, 0.537499, 0.5390402, 0.5405966, 0.542153, 0.5437095, 0.5452659, 0.5468223, 0.548394, 0.5499657, 0.5515373, 0.553109, 0.5546807, 0.5562524, 0.5578393, 0.5594263, 0.5610132, 0.5626001, 0.5641871, 0.565774, 0.5673762, 0.5689784, 0.5705806, 0.5721828, 0.573785, 0.5754025, 0.5770047, 0.5786221, 0.5802396, 0.581857, 0.5834897, 0.5851072, 0.5867399, 0.5883726, 0.5900053, 0.5916381, 0.5932708, 0.5949187, 0.5965667, 0.5982147, 0.5998627, 0.6015106, 0.6031586, 0.6048219, 0.6064851, 0.6081483, 0.6098116, 0.6114748, 0.6131533, 0.6148165, 0.616495, 0.6181735, 0.619852, 0.6215457, 0.6232242, 0.624918, 0.6266117, 0.6283055, 0.6299992, 0.631693, 0.633402, 0.635111, 0.63682, 0.638529, 0.640238, 0.6419471, 0.6436713, 0.6453956, 0.6471199, 0.6488441, 0.6505684, 0.6523079, 0.6540322, 0.6557717, 0.6575113, 0.6592508, 0.6610056, 0.6627451, 0.6644999, 0.6662547, 0.6680095, 0.6697642, 0.6715343, 0.6732891, 0.6750591, 0.6768292, 0.6785992, 0.6803845, 0.6821546, 0.6839399, 0.6857252, 0.6875105, 0.6892958, 0.6910811, 0.6928817, 0.6946822, 0.6964675, 0.6982834, 0.7000839, 0.7018845, 0.7037003, 0.7055161, 0.707332, 0.7091478, 0.7109636, 0.7127947, 0.7146105, 0.7164416, 0.7182727, 0.720119, 0.7219501, 0.7237964, 0.7256275, 0.7274739, 0.7293355, 0.7311818, 0.7330282, 0.7348898, 0.7367514, 0.738613, 0.7404746, 0.7423514, 0.744213, 0.7460899, 0.7479667, 0.7498436, 0.7517205, 0.7536126, 0.7554894, 0.7573816, 0.7592737, 0.7611658, 0.7630732, 0.7649653, 0.7668727, 0.76878, 0.7706874, 0.7725948, 0.7745174, 0.7764248, 0.7783474, 0.7802701, 0.7821927, 0.7841306, 0.7860533, 0.7879911, 0.789929, 0.7918669, 0.7938048, 0.795758, 0.7976959, 0.799649, 0.8016022, 0.8035554, 0.8055238, 0.8074769, 0.8094453, 0.8114137, 0.8133822, 0.8153506, 0.8173342, 0.8193179, 0.8212863, 0.82327, 0.8252689, 0.8272526, 0.8292515, 0.8312352, 0.8332341, 0.8352331, 0.8372473, 0.8392462, 0.8412604, 0.8432746, 0.8452888, 0.847303, 0.8493172, 0.8513466, 0.8533761, 0.8554055, 0.857435, 0.8594644, 0.8614939, 0.8635386, 0.8655833, 0.867628, 0.8696727, 0.8717327, 0.8737774, 0.8758373, 0.8778973, 0.8799573, 0.8820325, 0.8840925, 0.8861677, 0.8882429, 0.8903182, 0.8923934, 0.8944839, 0.8965591, 0.8986496, 0.9007401, 0.9028305, 0.9049363, 0.9070268, 0.9091325, 0.9112383, 0.913344, 0.915465, 0.9175708, 0.9196918, 0.9218128, 0.9239338, 0.9260548, 0.9281758, 0.930312, 0.9324483, 0.9345846, 0.9367208, 0.9388571, 0.9410086, 0.9431601, 0.9453117, 0.9474632, 0.9496147, 0.9517815, 0.953933, 0.9560998, 0.9582666, 0.9604334, 0.9626154, 0.9647822, 0.9669642, 0.9691463, 0.9713283, 0.9735256, 0.9757076, 0.9779049, 0.9801022, 0.9822995, 0.9844968, 0.9867094, 0.988922, 0.9911345, 0.9933471, 0.9955596, 0.9977722, 1.0","CMM Type":"Lino","Class":"Display Device","Color space":"RGB","Content-Length":"1641910","Content-Type":"image/tiff","Copyright":"Copyright (c) 1998 Hewlett-Packard Company","Device Mfg Description":"IEC http://www.iec.ch","Device Model Description":"IEC 61966-2.1 Default RGB colour space - sRGB","Device manufacturer":"IEC","Device model":"sRGB","Exif IFD0:Bits Per Sample":"8 8 8 8 bits/component/pixel","Exif IFD0:Compression":"Uncompressed","Exif IFD0:Fill Order":"Normal","Exif IFD0:Image Height":"640 pixels","Exif IFD0:Image Width":"640 pixels","Exif IFD0:Orientation":"Top, left side (Horizontal / normal)","Exif IFD0:Photometric Interpretation":"RGB","Exif IFD0:Planar Configuration":"Chunky (contiguous for each subsampling pixel)","Exif IFD0:Resolution Unit":"Inch","Exif IFD0:Rows Per Strip":"51 rows/strip","Exif IFD0:Samples Per Pixel":"4 samples/pixel","Exif IFD0:Strip Byte Counts":"130560 130560 130560 130560 130560 130560 130560 130560 130560 130560 130560 130560 71680 bytes","Exif IFD0:Strip Offsets":"8 130568 261128 391688 522248 652808 783368 913928 1044488 1175048 1305608 1436168 1566728","Exif IFD0:Unknown tag (0x0152)":"1","Exif IFD0:Unknown tag (0x0153)":"1 1 1 1","Exif IFD0:X Resolution":"72 dots per inch","Exif IFD0:Y Resolution":"72 dots per inch","File Modified Date":"Thu Nov 19 11:37:15 -05:00 2020","File Name":"fake_gazette.tiff","File Size":"1641910 bytes","Green Colorant":"(0.3851, 0.7169, 0.0971)","Green TRC":"0.0, 0.0000763, 0.0001526, 0.0002289, 0.0003052, 0.0003815, 0.0004578, 0.0005341, 0.0006104, 0.0006867, 0.000763, 0.0008392, 0.0009003, 0.0009766, 0.0010529, 0.0011292, 0.0012055, 0.0012818, 0.0013581, 0.0014343, 0.0015106, 0.0015869, 0.0016632, 0.0017395, 0.0018158, 0.0018921, 0.0019684, 0.0020447, 0.002121, 0.0021973, 0.0022736, 0.0023499, 0.0024262, 0.0025025, 0.0025788, 0.0026551, 0.0027161, 0.0027924, 0.0028687, 0.002945, 0.0030213, 0.0030976, 0.0031739, 0.0032502, 0.0033417, 0.003418, 0.0034943, 0.0035859, 0.0036622, 0.0037537, 0.00383, 0.0039216, 0.0040131, 0.0041047, 0.0041962, 0.0042878, 0.0043793, 0.0044709, 0.0045624, 0.0046693, 0.0047608, 0.0048524, 0.0049592, 0.005066, 0.0051575, 0.0052644, 0.0053712, 0.005478, 0.0055848, 0.0056916, 0.0057984, 0.0059052, 0.0060273, 0.0061341, 0.0062562, 0.006363, 0.0064851, 0.0066072, 0.0067292, 0.0068513, 0.0069734, 0.0070954, 0.0072175, 0.0073396, 0.0074617, 0.007599, 0.0077211, 0.0078584, 0.0079957, 0.0081178, 0.0082551, 0.0083925, 0.0085298, 0.0086671, 0.0088045, 0.008957, 0.0090944, 0.0092317, 0.0093843, 0.0095369, 0.0096742, 0.0098268, 0.0099794, 0.010132, 0.0102846, 0.0104372, 0.0105898, 0.0107576, 0.0109102, 0.0110628, 0.0112306, 0.0113985, 0.0115511, 0.0117189, 0.0118868, 0.0120546, 0.0122225, 0.0124056, 0.0125734, 0.0127413, 0.0129244, 0.0130922, 0.0132753, 0.0134585, 0.0136416, 0.0138247, 0.0140078, 0.0141909, 0.014374, 0.0145571, 0.0147555, 0.0149386, 0.0151369, 0.0153201, 0.0155184, 0.0157168, 0.0159152, 0.0161135, 0.0163119, 0.0165255, 0.0167239, 0.0169223, 0.0171359, 0.0173495, 0.0175479, 0.0177615, 0.0179751, 0.0181888, 0.0184024, 0.018616, 0.0188449, 0.0190585, 0.0192874, 0.019501, 0.0197299, 0.0199588, 0.0201877, 0.0204166, 0.0206455, 0.0208743, 0.0211032, 0.0213474, 0.0215763, 0.0218204, 0.0220645, 0.0222934, 0.0225376, 0.0227817, 0.0230259, 0.0232853, 0.0235294, 0.0237736, 0.024033, 0.0242771, 0.0245365, 0.0247959, 0.0250553, 0.0253147, 0.0255741, 0.0258335, 0.0261082, 0.0263676, 0.026627, 0.0269017, 0.0271763, 0.027451, 0.0277256, 0.0280003, 0.028275, 0.0285496, 0.0288243, 0.0291142, 0.0293889, 0.0296788, 0.0299687, 0.0302586, 0.0305486, 0.0308385, 0.0311284, 0.0314183, 0.0317235, 0.0320134, 0.0323186, 0.0326238, 0.032929, 0.0332341, 0.0335393, 0.0338445, 0.0341497, 0.0344549, 0.0347753, 0.0350805, 0.0354009, 0.0357214, 0.0360418, 0.0363622, 0.0366827, 0.0370031, 0.0373388, 0.0376593, 0.037995, 0.0383154, 0.0386511, 0.0389868, 0.0393225, 0.0396582, 0.0399939, 0.0403449, 0.0406806, 0.0410315, 0.0413825, 0.0417182, 0.0420691, 0.0424201, 0.042771, 0.0431373, 0.0434882, 0.0438392, 0.0442054, 0.0445716, 0.0449226, 0.0452888, 0.045655, 0.0460212, 0.0464027, 0.0467689, 0.0471504, 0.0475166, 0.0478981, 0.0482795, 0.048661, 0.0490425, 0.049424, 0.0498054, 0.0501869, 0.0505837, 0.0509804, 0.0513619, 0.0517586, 0.0521553, 0.0525521, 0.0529488, 0.0533608, 0.0537575, 0.0541695, 0.0545663, 0.0549783, 0.0553902, 0.0558022, 0.0562142, 0.0566262, 0.0570535, 0.0574655, 0.0578927, 0.05832, 0.058732, 0.0591592, 0.0595865, 0.060029, 0.0604562, 0.0608835, 0.061326, 0.0617533, 0.0621958, 0.0626383, 0.0630808, 0.0635233, 0.0639811, 0.0644236, 0.0648661, 0.0653239, 0.0657816, 0.0662394, 0.0666972, 0.067155, 0.0676127, 0.0680705, 0.0685435, 0.0690013, 0.0694743, 0.0699474, 0.0704204, 0.0708934, 0.0713664, 0.0718395, 0.0723278, 0.0728008, 0.0732891, 0.0737774, 0.0742657, 0.0747539, 0.0752422, 0.0757305, 0.0762188, 0.0767224, 0.0772259, 0.0777142, 0.0782177, 0.0787213, 0.0792401, 0.0797436, 0.0802472, 0.080766, 0.0812696, 0.0817884, 0.0823072, 0.082826, 0.0833448, 0.0838636, 0.0843977, 0.0849165, 0.0854505, 0.0859846, 0.0865187, 0.0870527, 0.0875868, 0.0881209, 0.0886549, 0.0892042, 0.0897536, 0.0902876, 0.090837, 0.0913863, 0.0919356, 0.0925002, 0.0930495, 0.0936141, 0.0941634, 0.094728, 0.0952926, 0.0958572, 0.0964218, 0.0970016, 0.0975662, 0.098146, 0.0987106, 0.0992905, 0.0998703, 0.1004501, 0.10103, 0.1016251, 0.1022049, 0.1028, 0.1033799, 0.103975, 0.1045701, 0.1051652, 0.1057755, 0.1063706, 0.106981, 0.1075761, 0.1081865, 0.1087968, 0.1094072, 0.1100175, 0.1106279, 0.1112535, 0.1118639, 0.1124895, 0.1131151, 0.1137407, 0.1143664, 0.114992, 0.1156176, 0.1162585, 0.1168841, 0.117525, 0.1181659, 0.1188067, 0.1194476, 0.1200885, 0.1207446, 0.1213855, 0.1220417, 0.1226978, 0.1233539, 0.1240101, 0.1246662, 0.1253223, 0.1259937, 0.1266499, 0.1273213, 0.1279927, 0.1286641, 0.1293355, 0.1300069, 0.1306935, 0.1313649, 0.1320516, 0.1327382, 0.1334096, 0.1341115, 0.1347982, 0.1354849, 0.1361868, 0.1368734, 0.1375753, 0.1382773, 0.1389792, 0.1396811, 0.140383, 0.1411002, 0.1418021, 0.1425193, 0.1432364, 0.1439536, 0.1446708, 0.145388, 0.1461204, 0.1468376, 0.14757, 0.1483024, 0.1490349, 0.1497673, 0.1504997, 0.1512322, 0.1519799, 0.1527123, 0.15346, 0.1542077, 0.1549554, 0.1557031, 0.1564508, 0.1572137, 0.1579767, 0.1587243, 0.1594873, 0.1602502, 0.1610132, 0.1617914, 0.1625544, 0.1633326, 0.1640955, 0.1648737, 0.1656519, 0.1664302, 0.1672236, 0.1680018, 0.1687953, 0.1695735, 0.170367, 0.1711604, 0.1719539, 0.1727474, 0.1735561, 0.1743496, 0.1751583, 0.175967, 0.1767758, 0.1775845, 0.1783932, 0.1792172, 0.1800259, 0.1808499, 0.1816739, 0.1824826, 0.1833219, 0.1841459, 0.1849699, 0.1858091, 0.1866331, 0.1874723, 0.1883116, 0.1891508, 0.1900053, 0.1908446, 0.1916838, 0.1925383, 0.1933928, 0.1942473, 0.1951019, 0.1959564, 0.1968261, 0.1976806, 0.1985504, 0.1994202, 0.2002899, 0.2011597, 0.2020294, 0.2028992, 0.2037842, 0.2046693, 0.205539, 0.206424, 0.2073243, 0.2082094, 0.2090944, 0.2099947, 0.2108949, 0.21178, 0.2126802, 0.2135958, 0.2144961, 0.2153964, 0.2163119, 0.2172274, 0.2181277, 0.2190585, 0.2199741, 0.2208896, 0.2218051, 0.2227359, 0.2236667, 0.2245975, 0.2255283, 0.2264591, 0.2273899, 0.228336, 0.2292821, 0.2302129, 0.2311589, 0.232105, 0.2330663, 0.2340124, 0.2349737, 0.2359197, 0.2368811, 0.2378424, 0.2388037, 0.239765, 0.2407416, 0.2417029, 0.2426795, 0.2436561, 0.2446326, 0.2456092, 0.2466011, 0.2475776, 0.2485695, 0.249546, 0.2505379, 0.2515297, 0.2525368, 0.2535286, 0.2545357, 0.2555276, 0.2565347, 0.2575418, 0.2585489, 0.259556, 0.2605783, 0.2615854, 0.2626078, 0.2636301, 0.2646525, 0.2656748, 0.2667124, 0.2677348, 0.2687724, 0.26981, 0.2708324, 0.2718853, 0.2729229, 0.2739605, 0.2750134, 0.276051, 0.2771038, 0.2781567, 0.2792248, 0.2802777, 0.2813306, 0.2823987, 0.2834668, 0.284535, 0.2856031, 0.2866712, 0.2877394, 0.2888228, 0.2899062, 0.2909743, 0.2920577, 0.2931563, 0.2942397, 0.2953231, 0.2964218, 0.2975204, 0.2986191, 0.2997177, 0.3008164, 0.301915, 0.3030289, 0.3041428, 0.3052567, 0.3063706, 0.3074846, 0.3085985, 0.3097124, 0.3108415, 0.3119707, 0.3130999, 0.314229, 0.3153582, 0.3165026, 0.3176318, 0.3187762, 0.3199207, 0.3210651, 0.3222095, 0.3233539, 0.3245136, 0.3256733, 0.3268177, 0.3279774, 0.3291371, 0.330312, 0.3314717, 0.3326467, 0.3338216, 0.3349966, 0.3361715, 0.3373465, 0.3385214, 0.3397116, 0.3408865, 0.3420768, 0.343267, 0.3444724, 0.3456626, 0.3468528, 0.3480583, 0.3492638, 0.3504692, 0.3516747, 0.3528801, 0.3541009, 0.3553063, 0.356527, 0.3577478, 0.3589685, 0.3601892, 0.3614252, 0.3626459, 0.3638819, 0.3651179, 0.3663539, 0.3675898, 0.3688411, 0.3700771, 0.3713283, 0.3725795, 0.3738308, 0.375082, 0.3763333, 0.3775998, 0.378851, 0.3801175, 0.381384, 0.3826505, 0.3839322, 0.3851987, 0.3864805, 0.387747, 0.3890288, 0.3903105, 0.3916075, 0.3928893, 0.3941863, 0.3954681, 0.3967651, 0.3980621, 0.3993744, 0.4006714, 0.4019837, 0.4032807, 0.404593, 0.4059052, 0.4072175, 0.4085451, 0.4098573, 0.4111849, 0.4125124, 0.4138399, 0.4151675, 0.416495, 0.4178378, 0.4191806, 0.4205234, 0.4218662, 0.423209, 0.4245518, 0.4259098, 0.4272526, 0.4286107, 0.4299687, 0.4313268, 0.4326848, 0.4340581, 0.4354314, 0.4367895, 0.4381628, 0.4395514, 0.4409247, 0.442298, 0.4436866, 0.4450752, 0.4464637, 0.4478523, 0.4492409, 0.4506447, 0.4520333, 0.4534371, 0.4548409, 0.4562448, 0.4576486, 0.4590677, 0.4604715, 0.4618906, 0.4633097, 0.4647288, 0.4661631, 0.4675822, 0.4690166, 0.4704356, 0.47187, 0.4733043, 0.4747539, 0.4761883, 0.4776379, 0.4790875, 0.4805371, 0.4819867, 0.4834363, 0.4848859, 0.4863508, 0.4878157, 0.4892805, 0.4907454, 0.4922103, 0.4936904, 0.4951553, 0.4966354, 0.4981155, 0.4995956, 0.501091, 0.5025711, 0.5040665, 0.5055467, 0.507042, 0.5085527, 0.5100481, 0.5115435, 0.5130541, 0.5145647, 0.5160754, 0.517586, 0.5190967, 0.5206226, 0.5221485, 0.5236591, 0.525185, 0.5267262, 0.5282521, 0.529778, 0.5313191, 0.5328603, 0.5344015, 0.5359426, 0.537499, 0.5390402, 0.5405966, 0.542153, 0.5437095, 0.5452659, 0.5468223, 0.548394, 0.5499657, 0.5515373, 0.553109, 0.5546807, 0.5562524, 0.5578393, 0.5594263, 0.5610132, 0.5626001, 0.5641871, 0.565774, 0.5673762, 0.5689784, 0.5705806, 0.5721828, 0.573785, 0.5754025, 0.5770047, 0.5786221, 0.5802396, 0.581857, 0.5834897, 0.5851072, 0.5867399, 0.5883726, 0.5900053, 0.5916381, 0.5932708, 0.5949187, 0.5965667, 0.5982147, 0.5998627, 0.6015106, 0.6031586, 0.6048219, 0.6064851, 0.6081483, 0.6098116, 0.6114748, 0.6131533, 0.6148165, 0.616495, 0.6181735, 0.619852, 0.6215457, 0.6232242, 0.624918, 0.6266117, 0.6283055, 0.6299992, 0.631693, 0.633402, 0.635111, 0.63682, 0.638529, 0.640238, 0.6419471, 0.6436713, 0.6453956, 0.6471199, 0.6488441, 0.6505684, 0.6523079, 0.6540322, 0.6557717, 0.6575113, 0.6592508, 0.6610056, 0.6627451, 0.6644999, 0.6662547, 0.6680095, 0.6697642, 0.6715343, 0.6732891, 0.6750591, 0.6768292, 0.6785992, 0.6803845, 0.6821546, 0.6839399, 0.6857252, 0.6875105, 0.6892958, 0.6910811, 0.6928817, 0.6946822, 0.6964675, 0.6982834, 0.7000839, 0.7018845, 0.7037003, 0.7055161, 0.707332, 0.7091478, 0.7109636, 0.7127947, 0.7146105, 0.7164416, 0.7182727, 0.720119, 0.7219501, 0.7237964, 0.7256275, 0.7274739, 0.7293355, 0.7311818, 0.7330282, 0.7348898, 0.7367514, 0.738613, 0.7404746, 0.7423514, 0.744213, 0.7460899, 0.7479667, 0.7498436, 0.7517205, 0.7536126, 0.7554894, 0.7573816, 0.7592737, 0.7611658, 0.7630732, 0.7649653, 0.7668727, 0.76878, 0.7706874, 0.7725948, 0.7745174, 0.7764248, 0.7783474, 0.7802701, 0.7821927, 0.7841306, 0.7860533, 0.7879911, 0.789929, 0.7918669, 0.7938048, 0.795758, 0.7976959, 0.799649, 0.8016022, 0.8035554, 0.8055238, 0.8074769, 0.8094453, 0.8114137, 0.8133822, 0.8153506, 0.8173342, 0.8193179, 0.8212863, 0.82327, 0.8252689, 0.8272526, 0.8292515, 0.8312352, 0.8332341, 0.8352331, 0.8372473, 0.8392462, 0.8412604, 0.8432746, 0.8452888, 0.847303, 0.8493172, 0.8513466, 0.8533761, 0.8554055, 0.857435, 0.8594644, 0.8614939, 0.8635386, 0.8655833, 0.867628, 0.8696727, 0.8717327, 0.8737774, 0.8758373, 0.8778973, 0.8799573, 0.8820325, 0.8840925, 0.8861677, 0.8882429, 0.8903182, 0.8923934, 0.8944839, 0.8965591, 0.8986496, 0.9007401, 0.9028305, 0.9049363, 0.9070268, 0.9091325, 0.9112383, 0.913344, 0.915465, 0.9175708, 0.9196918, 0.9218128, 0.9239338, 0.9260548, 0.9281758, 0.930312, 0.9324483, 0.9345846, 0.9367208, 0.9388571, 0.9410086, 0.9431601, 0.9453117, 0.9474632, 0.9496147, 0.9517815, 0.953933, 0.9560998, 0.9582666, 0.9604334, 0.9626154, 0.9647822, 0.9669642, 0.9691463, 0.9713283, 0.9735256, 0.9757076, 0.9779049, 0.9801022, 0.9822995, 0.9844968, 0.9867094, 0.988922, 0.9911345, 0.9933471, 0.9955596, 0.9977722, 1.0","Luminance":"(76.0365, 80, 87.1246)","Measurement":"1931 2° Observer, Backing (0, 0, 0), Geometry Unknown, Flare 1%, Illuminant D65","Media Black Point":"(0, 0, 0)","Media White Point":"(0.9505, 1, 1.0891)","Primary Platform":"Microsoft Corporation","Profile Connection Space":"XYZ","Profile Date/Time":"1998:02:09 06:49:00","Profile Description":"sRGB IEC61966-2.1","Profile Size":"3144","Red Colorant":"(0.4361, 0.2225, 0.0139)","Red TRC":"0.0, 0.0000763, 0.0001526, 0.0002289, 0.0003052, 0.0003815, 0.0004578, 0.0005341, 0.0006104, 0.0006867, 0.000763, 0.0008392, 0.0009003, 0.0009766, 0.0010529, 0.0011292, 0.0012055, 0.0012818, 0.0013581, 0.0014343, 0.0015106, 0.0015869, 0.0016632, 0.0017395, 0.0018158, 0.0018921, 0.0019684, 0.0020447, 0.002121, 0.0021973, 0.0022736, 0.0023499, 0.0024262, 0.0025025, 0.0025788, 0.0026551, 0.0027161, 0.0027924, 0.0028687, 0.002945, 0.0030213, 0.0030976, 0.0031739, 0.0032502, 0.0033417, 0.003418, 0.0034943, 0.0035859, 0.0036622, 0.0037537, 0.00383, 0.0039216, 0.0040131, 0.0041047, 0.0041962, 0.0042878, 0.0043793, 0.0044709, 0.0045624, 0.0046693, 0.0047608, 0.0048524, 0.0049592, 0.005066, 0.0051575, 0.0052644, 0.0053712, 0.005478, 0.0055848, 0.0056916, 0.0057984, 0.0059052, 0.0060273, 0.0061341, 0.0062562, 0.006363, 0.0064851, 0.0066072, 0.0067292, 0.0068513, 0.0069734, 0.0070954, 0.0072175, 0.0073396, 0.0074617, 0.007599, 0.0077211, 0.0078584, 0.0079957, 0.0081178, 0.0082551, 0.0083925, 0.0085298, 0.0086671, 0.0088045, 0.008957, 0.0090944, 0.0092317, 0.0093843, 0.0095369, 0.0096742, 0.0098268, 0.0099794, 0.010132, 0.0102846, 0.0104372, 0.0105898, 0.0107576, 0.0109102, 0.0110628, 0.0112306, 0.0113985, 0.0115511, 0.0117189, 0.0118868, 0.0120546, 0.0122225, 0.0124056, 0.0125734, 0.0127413, 0.0129244, 0.0130922, 0.0132753, 0.0134585, 0.0136416, 0.0138247, 0.0140078, 0.0141909, 0.014374, 0.0145571, 0.0147555, 0.0149386, 0.0151369, 0.0153201, 0.0155184, 0.0157168, 0.0159152, 0.0161135, 0.0163119, 0.0165255, 0.0167239, 0.0169223, 0.0171359, 0.0173495, 0.0175479, 0.0177615, 0.0179751, 0.0181888, 0.0184024, 0.018616, 0.0188449, 0.0190585, 0.0192874, 0.019501, 0.0197299, 0.0199588, 0.0201877, 0.0204166, 0.0206455, 0.0208743, 0.0211032, 0.0213474, 0.0215763, 0.0218204, 0.0220645, 0.0222934, 0.0225376, 0.0227817, 0.0230259, 0.0232853, 0.0235294, 0.0237736, 0.024033, 0.0242771, 0.0245365, 0.0247959, 0.0250553, 0.0253147, 0.0255741, 0.0258335, 0.0261082, 0.0263676, 0.026627, 0.0269017, 0.0271763, 0.027451, 0.0277256, 0.0280003, 0.028275, 0.0285496, 0.0288243, 0.0291142, 0.0293889, 0.0296788, 0.0299687, 0.0302586, 0.0305486, 0.0308385, 0.0311284, 0.0314183, 0.0317235, 0.0320134, 0.0323186, 0.0326238, 0.032929, 0.0332341, 0.0335393, 0.0338445, 0.0341497, 0.0344549, 0.0347753, 0.0350805, 0.0354009, 0.0357214, 0.0360418, 0.0363622, 0.0366827, 0.0370031, 0.0373388, 0.0376593, 0.037995, 0.0383154, 0.0386511, 0.0389868, 0.0393225, 0.0396582, 0.0399939, 0.0403449, 0.0406806, 0.0410315, 0.0413825, 0.0417182, 0.0420691, 0.0424201, 0.042771, 0.0431373, 0.0434882, 0.0438392, 0.0442054, 0.0445716, 0.0449226, 0.0452888, 0.045655, 0.0460212, 0.0464027, 0.0467689, 0.0471504, 0.0475166, 0.0478981, 0.0482795, 0.048661, 0.0490425, 0.049424, 0.0498054, 0.0501869, 0.0505837, 0.0509804, 0.0513619, 0.0517586, 0.0521553, 0.0525521, 0.0529488, 0.0533608, 0.0537575, 0.0541695, 0.0545663, 0.0549783, 0.0553902, 0.0558022, 0.0562142, 0.0566262, 0.0570535, 0.0574655, 0.0578927, 0.05832, 0.058732, 0.0591592, 0.0595865, 0.060029, 0.0604562, 0.0608835, 0.061326, 0.0617533, 0.0621958, 0.0626383, 0.0630808, 0.0635233, 0.0639811, 0.0644236, 0.0648661, 0.0653239, 0.0657816, 0.0662394, 0.0666972, 0.067155, 0.0676127, 0.0680705, 0.0685435, 0.0690013, 0.0694743, 0.0699474, 0.0704204, 0.0708934, 0.0713664, 0.0718395, 0.0723278, 0.0728008, 0.0732891, 0.0737774, 0.0742657, 0.0747539, 0.0752422, 0.0757305, 0.0762188, 0.0767224, 0.0772259, 0.0777142, 0.0782177, 0.0787213, 0.0792401, 0.0797436, 0.0802472, 0.080766, 0.0812696, 0.0817884, 0.0823072, 0.082826, 0.0833448, 0.0838636, 0.0843977, 0.0849165, 0.0854505, 0.0859846, 0.0865187, 0.0870527, 0.0875868, 0.0881209, 0.0886549, 0.0892042, 0.0897536, 0.0902876, 0.090837, 0.0913863, 0.0919356, 0.0925002, 0.0930495, 0.0936141, 0.0941634, 0.094728, 0.0952926, 0.0958572, 0.0964218, 0.0970016, 0.0975662, 0.098146, 0.0987106, 0.0992905, 0.0998703, 0.1004501, 0.10103, 0.1016251, 0.1022049, 0.1028, 0.1033799, 0.103975, 0.1045701, 0.1051652, 0.1057755, 0.1063706, 0.106981, 0.1075761, 0.1081865, 0.1087968, 0.1094072, 0.1100175, 0.1106279, 0.1112535, 0.1118639, 0.1124895, 0.1131151, 0.1137407, 0.1143664, 0.114992, 0.1156176, 0.1162585, 0.1168841, 0.117525, 0.1181659, 0.1188067, 0.1194476, 0.1200885, 0.1207446, 0.1213855, 0.1220417, 0.1226978, 0.1233539, 0.1240101, 0.1246662, 0.1253223, 0.1259937, 0.1266499, 0.1273213, 0.1279927, 0.1286641, 0.1293355, 0.1300069, 0.1306935, 0.1313649, 0.1320516, 0.1327382, 0.1334096, 0.1341115, 0.1347982, 0.1354849, 0.1361868, 0.1368734, 0.1375753, 0.1382773, 0.1389792, 0.1396811, 0.140383, 0.1411002, 0.1418021, 0.1425193, 0.1432364, 0.1439536, 0.1446708, 0.145388, 0.1461204, 0.1468376, 0.14757, 0.1483024, 0.1490349, 0.1497673, 0.1504997, 0.1512322, 0.1519799, 0.1527123, 0.15346, 0.1542077, 0.1549554, 0.1557031, 0.1564508, 0.1572137, 0.1579767, 0.1587243, 0.1594873, 0.1602502, 0.1610132, 0.1617914, 0.1625544, 0.1633326, 0.1640955, 0.1648737, 0.1656519, 0.1664302, 0.1672236, 0.1680018, 0.1687953, 0.1695735, 0.170367, 0.1711604, 0.1719539, 0.1727474, 0.1735561, 0.1743496, 0.1751583, 0.175967, 0.1767758, 0.1775845, 0.1783932, 0.1792172, 0.1800259, 0.1808499, 0.1816739, 0.1824826, 0.1833219, 0.1841459, 0.1849699, 0.1858091, 0.1866331, 0.1874723, 0.1883116, 0.1891508, 0.1900053, 0.1908446, 0.1916838, 0.1925383, 0.1933928, 0.1942473, 0.1951019, 0.1959564, 0.1968261, 0.1976806, 0.1985504, 0.1994202, 0.2002899, 0.2011597, 0.2020294, 0.2028992, 0.2037842, 0.2046693, 0.205539, 0.206424, 0.2073243, 0.2082094, 0.2090944, 0.2099947, 0.2108949, 0.21178, 0.2126802, 0.2135958, 0.2144961, 0.2153964, 0.2163119, 0.2172274, 0.2181277, 0.2190585, 0.2199741, 0.2208896, 0.2218051, 0.2227359, 0.2236667, 0.2245975, 0.2255283, 0.2264591, 0.2273899, 0.228336, 0.2292821, 0.2302129, 0.2311589, 0.232105, 0.2330663, 0.2340124, 0.2349737, 0.2359197, 0.2368811, 0.2378424, 0.2388037, 0.239765, 0.2407416, 0.2417029, 0.2426795, 0.2436561, 0.2446326, 0.2456092, 0.2466011, 0.2475776, 0.2485695, 0.249546, 0.2505379, 0.2515297, 0.2525368, 0.2535286, 0.2545357, 0.2555276, 0.2565347, 0.2575418, 0.2585489, 0.259556, 0.2605783, 0.2615854, 0.2626078, 0.2636301, 0.2646525, 0.2656748, 0.2667124, 0.2677348, 0.2687724, 0.26981, 0.2708324, 0.2718853, 0.2729229, 0.2739605, 0.2750134, 0.276051, 0.2771038, 0.2781567, 0.2792248, 0.2802777, 0.2813306, 0.2823987, 0.2834668, 0.284535, 0.2856031, 0.2866712, 0.2877394, 0.2888228, 0.2899062, 0.2909743, 0.2920577, 0.2931563, 0.2942397, 0.2953231, 0.2964218, 0.2975204, 0.2986191, 0.2997177, 0.3008164, 0.301915, 0.3030289, 0.3041428, 0.3052567, 0.3063706, 0.3074846, 0.3085985, 0.3097124, 0.3108415, 0.3119707, 0.3130999, 0.314229, 0.3153582, 0.3165026, 0.3176318, 0.3187762, 0.3199207, 0.3210651, 0.3222095, 0.3233539, 0.3245136, 0.3256733, 0.3268177, 0.3279774, 0.3291371, 0.330312, 0.3314717, 0.3326467, 0.3338216, 0.3349966, 0.3361715, 0.3373465, 0.3385214, 0.3397116, 0.3408865, 0.3420768, 0.343267, 0.3444724, 0.3456626, 0.3468528, 0.3480583, 0.3492638, 0.3504692, 0.3516747, 0.3528801, 0.3541009, 0.3553063, 0.356527, 0.3577478, 0.3589685, 0.3601892, 0.3614252, 0.3626459, 0.3638819, 0.3651179, 0.3663539, 0.3675898, 0.3688411, 0.3700771, 0.3713283, 0.3725795, 0.3738308, 0.375082, 0.3763333, 0.3775998, 0.378851, 0.3801175, 0.381384, 0.3826505, 0.3839322, 0.3851987, 0.3864805, 0.387747, 0.3890288, 0.3903105, 0.3916075, 0.3928893, 0.3941863, 0.3954681, 0.3967651, 0.3980621, 0.3993744, 0.4006714, 0.4019837, 0.4032807, 0.404593, 0.4059052, 0.4072175, 0.4085451, 0.4098573, 0.4111849, 0.4125124, 0.4138399, 0.4151675, 0.416495, 0.4178378, 0.4191806, 0.4205234, 0.4218662, 0.423209, 0.4245518, 0.4259098, 0.4272526, 0.4286107, 0.4299687, 0.4313268, 0.4326848, 0.4340581, 0.4354314, 0.4367895, 0.4381628, 0.4395514, 0.4409247, 0.442298, 0.4436866, 0.4450752, 0.4464637, 0.4478523, 0.4492409, 0.4506447, 0.4520333, 0.4534371, 0.4548409, 0.4562448, 0.4576486, 0.4590677, 0.4604715, 0.4618906, 0.4633097, 0.4647288, 0.4661631, 0.4675822, 0.4690166, 0.4704356, 0.47187, 0.4733043, 0.4747539, 0.4761883, 0.4776379, 0.4790875, 0.4805371, 0.4819867, 0.4834363, 0.4848859, 0.4863508, 0.4878157, 0.4892805, 0.4907454, 0.4922103, 0.4936904, 0.4951553, 0.4966354, 0.4981155, 0.4995956, 0.501091, 0.5025711, 0.5040665, 0.5055467, 0.507042, 0.5085527, 0.5100481, 0.5115435, 0.5130541, 0.5145647, 0.5160754, 0.517586, 0.5190967, 0.5206226, 0.5221485, 0.5236591, 0.525185, 0.5267262, 0.5282521, 0.529778, 0.5313191, 0.5328603, 0.5344015, 0.5359426, 0.537499, 0.5390402, 0.5405966, 0.542153, 0.5437095, 0.5452659, 0.5468223, 0.548394, 0.5499657, 0.5515373, 0.553109, 0.5546807, 0.5562524, 0.5578393, 0.5594263, 0.5610132, 0.5626001, 0.5641871, 0.565774, 0.5673762, 0.5689784, 0.5705806, 0.5721828, 0.573785, 0.5754025, 0.5770047, 0.5786221, 0.5802396, 0.581857, 0.5834897, 0.5851072, 0.5867399, 0.5883726, 0.5900053, 0.5916381, 0.5932708, 0.5949187, 0.5965667, 0.5982147, 0.5998627, 0.6015106, 0.6031586, 0.6048219, 0.6064851, 0.6081483, 0.6098116, 0.6114748, 0.6131533, 0.6148165, 0.616495, 0.6181735, 0.619852, 0.6215457, 0.6232242, 0.624918, 0.6266117, 0.6283055, 0.6299992, 0.631693, 0.633402, 0.635111, 0.63682, 0.638529, 0.640238, 0.6419471, 0.6436713, 0.6453956, 0.6471199, 0.6488441, 0.6505684, 0.6523079, 0.6540322, 0.6557717, 0.6575113, 0.6592508, 0.6610056, 0.6627451, 0.6644999, 0.6662547, 0.6680095, 0.6697642, 0.6715343, 0.6732891, 0.6750591, 0.6768292, 0.6785992, 0.6803845, 0.6821546, 0.6839399, 0.6857252, 0.6875105, 0.6892958, 0.6910811, 0.6928817, 0.6946822, 0.6964675, 0.6982834, 0.7000839, 0.7018845, 0.7037003, 0.7055161, 0.707332, 0.7091478, 0.7109636, 0.7127947, 0.7146105, 0.7164416, 0.7182727, 0.720119, 0.7219501, 0.7237964, 0.7256275, 0.7274739, 0.7293355, 0.7311818, 0.7330282, 0.7348898, 0.7367514, 0.738613, 0.7404746, 0.7423514, 0.744213, 0.7460899, 0.7479667, 0.7498436, 0.7517205, 0.7536126, 0.7554894, 0.7573816, 0.7592737, 0.7611658, 0.7630732, 0.7649653, 0.7668727, 0.76878, 0.7706874, 0.7725948, 0.7745174, 0.7764248, 0.7783474, 0.7802701, 0.7821927, 0.7841306, 0.7860533, 0.7879911, 0.789929, 0.7918669, 0.7938048, 0.795758, 0.7976959, 0.799649, 0.8016022, 0.8035554, 0.8055238, 0.8074769, 0.8094453, 0.8114137, 0.8133822, 0.8153506, 0.8173342, 0.8193179, 0.8212863, 0.82327, 0.8252689, 0.8272526, 0.8292515, 0.8312352, 0.8332341, 0.8352331, 0.8372473, 0.8392462, 0.8412604, 0.8432746, 0.8452888, 0.847303, 0.8493172, 0.8513466, 0.8533761, 0.8554055, 0.857435, 0.8594644, 0.8614939, 0.8635386, 0.8655833, 0.867628, 0.8696727, 0.8717327, 0.8737774, 0.8758373, 0.8778973, 0.8799573, 0.8820325, 0.8840925, 0.8861677, 0.8882429, 0.8903182, 0.8923934, 0.8944839, 0.8965591, 0.8986496, 0.9007401, 0.9028305, 0.9049363, 0.9070268, 0.9091325, 0.9112383, 0.913344, 0.915465, 0.9175708, 0.9196918, 0.9218128, 0.9239338, 0.9260548, 0.9281758, 0.930312, 0.9324483, 0.9345846, 0.9367208, 0.9388571, 0.9410086, 0.9431601, 0.9453117, 0.9474632, 0.9496147, 0.9517815, 0.953933, 0.9560998, 0.9582666, 0.9604334, 0.9626154, 0.9647822, 0.9669642, 0.9691463, 0.9713283, 0.9735256, 0.9757076, 0.9779049, 0.9801022, 0.9822995, 0.9844968, 0.9867094, 0.988922, 0.9911345, 0.9933471, 0.9955596, 0.9977722, 1.0","Signature":"acsp","Tag Count":"17","Technology":"CRT","Viewing Conditions":"view (0x76696577): 36 bytes","Viewing Conditions Description":"Reference Viewing Condition in IEC61966-2.1","X-Parsed-By":["org.apache.tika.parser.DefaultParser","org.apache.tika.parser.ocr.TesseractOCRParser","org.apache.tika.parser.image.TiffParser"],"XYZ values":"0.964 1 0.825","resourceName":"fake_gazette.tiff","tiff:BitsPerSample":"8","tiff:ImageLength":"640","tiff:ImageWidth":"640","tiff:Orientation":"1","tiff:ResolutionUnit":"Inch","tiff:SamplesPerPixel":"4","tiff:XResolution":"72.0","tiff:YResolution":"72.0"} \ No newline at end of file diff --git a/tests/data/fake_gazette.m4a b/tests/data/fake_gazette.m4a new file mode 100644 index 0000000..3eaf4f0 Binary files /dev/null and b/tests/data/fake_gazette.m4a differ diff --git a/tests/data/fake_gazette.png b/tests/data/fake_gazette.png new file mode 100644 index 0000000..819d38a Binary files /dev/null and b/tests/data/fake_gazette.png differ diff --git a/tests/data/fake_gazette.tiff b/tests/data/fake_gazette.tiff new file mode 100644 index 0000000..91573cf Binary files /dev/null and b/tests/data/fake_gazette.tiff differ diff --git a/tests/data/fake_metadata.json b/tests/data/fake_metadata.json new file mode 100644 index 0000000..9643e96 --- /dev/null +++ b/tests/data/fake_metadata.json @@ -0,0 +1 @@ +{"gazette": "Hi this is a document created to test the text extraction for the Querido Diário project."} diff --git a/tests/text_extraction_tests.py b/tests/text_extraction_tests.py index 5085314..197cfa9 100644 --- a/tests/text_extraction_tests.py +++ b/tests/text_extraction_tests.py @@ -1,13 +1,12 @@ from unittest import TestCase import os -from process.etl.file_transform import * -from process import Gazette +from queridodiario_toolbox.etl.file_transform import * +from queridodiario_toolbox import Gazette class TextExtractionTests(TestCase): - # APACHE_TIKA_JAR_PATH = "/tika-app.jar" - TIKA_PATH = "/usr/local/Cellar/tika/1.24.1_1/libexec/tika-app-1.24.1.jar" + TIKA_PATH = "/tika-app.jar" def tearDown(self): self.clean_txt_file_generated_during_tests() @@ -22,9 +21,26 @@ def clean_txt_file_generated_during_tests(self): def get_files_generated_during_tests(self, root, files): for f in files: - if ".txt" in f and f not in ["fake_content.txt", "fake_gazette.txt"]: + if ".txt" in f and f not in [ + "fake_content.txt", "fake_gazette.txt" + ]: yield f"{root}{f}" + def validate_basic_extract_content(self, gazette, metadata=False): + if metadata: + target = "tests/data/fake_gazette.json" + else: + target = "tests/data/fake_gazette.txt" + + gazette.extract_content(metadata=metadata) + self.assertEqual(gazette.filepath, target) + + gazette.load_content() + self.assertNotEqual(0, len(gazette.content)) + + if not metadata: + self.assertIn("Querido", gazette.content, "Extraction Failed") + def test_extract_text_from_invalid_file(self): with self.assertRaisesRegex(Exception, "No such file"): gazette = Gazette("file/does/not/exist", self.TIKA_PATH) @@ -116,154 +132,66 @@ def test_class_instantiation_with_all_arguments(self): def test_extract_text_from_doc_should_return_content(self): gazette = Gazette("tests/data/fake_gazette.doc", self.TIKA_PATH) - - gazette.extract_content() - self.assertEqual(gazette.filepath, "tests/data/fake_gazette.txt") - - gazette.load_content() - self.assertNotEqual(0, len(gazette.content)) - self.assertIn("Querido", gazette.content, "Extraction Failed") + self.validate_basic_extract_content(gazette) def test_extract_text_from_docx_should_return_content(self): gazette = Gazette("tests/data/fake_gazette.docx", self.TIKA_PATH) - - gazette.extract_content() - self.assertEqual(gazette.filepath, "tests/data/fake_gazette.txt") - - gazette.load_content() - self.assertNotEqual(0, len(gazette.content)) - self.assertIn("Querido", gazette.content, "Extraction Failed") + self.validate_basic_extract_content(gazette) def test_extract_text_from_odt_should_return_content(self): gazette = Gazette("tests/data/fake_gazette.odt", self.TIKA_PATH) - - gazette.extract_content() - self.assertEqual(gazette.filepath, "tests/data/fake_gazette.txt") - - gazette.load_content() - self.assertNotEqual(0, len(gazette.content)) - self.assertIn("Querido", gazette.content, "Extraction Failed") + self.validate_basic_extract_content(gazette) def test_extract_text_from_html_should_return_content(self): gazette = Gazette("tests/data/fake_gazette.html", self.TIKA_PATH) - - gazette.extract_content() - self.assertEqual(gazette.filepath, "tests/data/fake_gazette.txt") - - gazette.load_content() - self.assertNotEqual(0, len(gazette.content)) - self.assertIn("Querido", gazette.content, "Extraction Failed") + self.validate_basic_extract_content(gazette) def test_extract_text_from_pdf_should_return_content(self): gazette = Gazette("tests/data/fake_gazette.pdf", self.TIKA_PATH) - - gazette.extract_content() - self.assertEqual(gazette.filepath, "tests/data/fake_gazette.txt") - - gazette.load_content() - self.assertNotEqual(0, len(gazette.content)) - self.assertIn("Querido", gazette.content, "Extraction Failed") + self.validate_basic_extract_content(gazette) def test_extract_text_from_jpeg_should_return_content(self): gazette = Gazette("tests/data/fake_gazette.jpeg", self.TIKA_PATH) - - gazette.extract_content() - self.assertEqual(gazette.filepath, "tests/data/fake_gazette.txt") - - gazette.load_content() - self.assertNotEqual(0, len(gazette.content)) - self.assertIn("Querido", gazette.content, "Extraction Failed") + self.validate_basic_extract_content(gazette) def test_extract_text_from_png_should_return_content(self): gazette = Gazette("tests/data/fake_gazette.png", self.TIKA_PATH) - - gazette.extract_content() - self.assertEqual(gazette.filepath, "tests/data/fake_gazette.txt") - - gazette.load_content() - self.assertNotEqual(0, len(gazette.content)) - self.assertIn("Querido", gazette.content, "Extraction Failed") + self.validate_basic_extract_content(gazette) def test_extract_text_from_tiff_should_return_content(self): gazette = Gazette("tests/data/fake_gazette.tiff", self.TIKA_PATH) - - gazette.extract_content() - self.assertEqual(gazette.filepath, "tests/data/fake_gazette.txt") - - gazette.load_content() - self.assertNotEqual(0, len(gazette.content)) - self.assertIn("Querido", gazette.content, "Extraction Failed") + self.validate_basic_extract_content(gazette) def test_extract_metadata_from_doc_should_return_content(self): gazette = Gazette("tests/data/fake_gazette.doc", self.TIKA_PATH) - - gazette.extract_content(metadata=True) - self.assertEqual(gazette.filepath, "tests/data/fake_gazette.json") - - gazette.load_content() - self.assertNotEqual(0, len(gazette.content)) + self.validate_basic_extract_content(gazette, metadata=True) def test_extract_metadata_from_docx_should_return_content(self): gazette = Gazette("tests/data/fake_gazette.docx", self.TIKA_PATH) - - gazette.extract_content(metadata=True) - self.assertEqual(gazette.filepath, "tests/data/fake_gazette.json") - - gazette.load_content() - self.assertNotEqual(0, len(gazette.content)) + self.validate_basic_extract_content(gazette, metadata=True) def test_extract_metadata_from_odt_should_return_content(self): gazette = Gazette("tests/data/fake_gazette.odt", self.TIKA_PATH) - - gazette.extract_content(metadata=True) - self.assertEqual(gazette.filepath, "tests/data/fake_gazette.json") - - gazette.load_content() - self.assertNotEqual(0, len(gazette.content)) + self.validate_basic_extract_content(gazette, metadata=True) def test_extract_metadata_from_html_should_return_content(self): gazette = Gazette("tests/data/fake_gazette.html", self.TIKA_PATH) - - gazette.extract_content(metadata=True) - self.assertEqual(gazette.filepath, "tests/data/fake_gazette.json") - - gazette.load_content() - self.assertNotEqual(0, len(gazette.content)) + self.validate_basic_extract_content(gazette, metadata=True) def test_extract_metadata_from_pdf_should_return_content(self): gazette = Gazette("tests/data/fake_gazette.pdf", self.TIKA_PATH) - - gazette.extract_content(metadata=True) - self.assertEqual(gazette.filepath, "tests/data/fake_gazette.json") - - gazette.load_content() - self.assertNotEqual(0, len(gazette.content)) + self.validate_basic_extract_content(gazette, metadata=True) def test_extract_metadata_from_jpeg_should_return_content(self): gazette = Gazette("tests/data/fake_gazette.jpeg", self.TIKA_PATH) - - gazette.extract_content(metadata=True) - self.assertEqual(gazette.filepath, "tests/data/fake_gazette.json") - - gazette.load_content() - self.assertNotEqual(0, len(gazette.content)) + self.validate_basic_extract_content(gazette, metadata=True) def test_extract_metadata_from_png_should_return_content(self): gazette = Gazette("tests/data/fake_gazette.png", self.TIKA_PATH) - - gazette.extract_content(metadata=True) - self.assertEqual(gazette.filepath, "tests/data/fake_gazette.json") - - gazette.load_content() - self.assertNotEqual(0, len(gazette.content)) + self.validate_basic_extract_content(gazette, metadata=True) def test_extract_metadata_from_tiff_should_return_content(self): gazette = Gazette("tests/data/fake_gazette.tiff", self.TIKA_PATH) - - gazette.extract_content(metadata=True) - self.assertEqual(gazette.filepath, "tests/data/fake_gazette.json") - - gazette.load_content() - self.assertNotEqual(0, len(gazette.content)) + self.validate_basic_extract_content(gazette, metadata=True)