From 87dc65288924025770878742ce927d3c74a65a40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20B=C3=B6ving?= Date: Fri, 2 Aug 2024 18:54:33 +0200 Subject: [PATCH 1/8] feat: read entire files in one system call --- src/Init/System/IO.lean | 41 +++++++++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 16 deletions(-) diff --git a/src/Init/System/IO.lean b/src/Init/System/IO.lean index cbcc705e7952..fc44763e5377 100644 --- a/src/Init/System/IO.lean +++ b/src/Init/System/IO.lean @@ -473,22 +473,11 @@ partial def Handle.readBinToEnd (h : Handle) : IO ByteArray := do loop (acc ++ buf) loop ByteArray.empty -partial def Handle.readToEnd (h : Handle) : IO String := do - let rec loop (s : String) := do - let line ← h.getLine - if line.isEmpty then - return s - else - loop (s ++ line) - loop "" - -def readBinFile (fname : FilePath) : IO ByteArray := do - let h ← Handle.mk fname Mode.read - h.readBinToEnd - -def readFile (fname : FilePath) : IO String := do - let h ← Handle.mk fname Mode.read - h.readToEnd +def Handle.readToEnd (h : Handle) : IO String := do + let data ← h.readBinToEnd + match String.fromUTF8? data with + | some s => return s + | none => throw <| .userError s!"Tried to read from handle containing non UTF-8 data." partial def lines (fname : FilePath) : IO (Array String) := do let h ← Handle.mk fname Mode.read @@ -594,6 +583,26 @@ end System.FilePath namespace IO +namespace FS + +def readBinFile (fname : FilePath) : IO ByteArray := do + -- Requires metadata so defined after metadata + let mdata ← fname.metadata + let size := mdata.byteSize.toUSize + if size > 0 then + let handle ← IO.FS.Handle.mk fname .read + handle.read mdata.byteSize.toUSize + else + return ByteArray.mkEmpty 0 + +def readFile (fname : FilePath) : IO String := do + let data ← readBinFile fname + match String.fromUTF8? data with + | some s => return s + | none => throw <| .userError s!"Tried to read file '{fname}' containing non UTF-8 data." + +end FS + def withStdin [Monad m] [MonadFinally m] [MonadLiftT BaseIO m] (h : FS.Stream) (x : m α) : m α := do let prev ← setStdin h try x finally discard <| setStdin prev From 174df1960ed69f50ab6fb4388b43ca916dc02e0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20B=C3=B6ving?= Date: Fri, 2 Aug 2024 20:13:34 +0200 Subject: [PATCH 2/8] feat: add lean_mk_string_from_bytes to object.h --- src/runtime/object.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/runtime/object.h b/src/runtime/object.h index b5e5a6375817..e5d6181d7909 100644 --- a/src/runtime/object.h +++ b/src/runtime/object.h @@ -238,6 +238,7 @@ inline size_t string_capacity(object * o) { return lean_string_capacity(o); } inline uint32 char_default_value() { return lean_char_default_value(); } inline obj_res alloc_string(size_t size, size_t capacity, size_t len) { return lean_alloc_string(size, capacity, len); } inline obj_res mk_string(char const * s) { return lean_mk_string(s); } +inline obj_res mk_string_from_bytes(char const * s, size_t sz) { return lean_mk_string_from_bytes(s, sz); } LEAN_EXPORT obj_res mk_ascii_string_unchecked(std::string const & s); LEAN_EXPORT obj_res mk_string(std::string const & s); LEAN_EXPORT std::string string_to_std(b_obj_arg o); From 858bdf0b3370dd796d020927d8870a7e1c182cb6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20B=C3=B6ving?= Date: Fri, 2 Aug 2024 20:13:49 +0200 Subject: [PATCH 3/8] fix: Handle.getLine for lines containing NULL bytes --- src/runtime/io.cpp | 41 +++++++++++++---------------------------- 1 file changed, 13 insertions(+), 28 deletions(-) diff --git a/src/runtime/io.cpp b/src/runtime/io.cpp index 136612e2fee5..edf49c02a77c 100644 --- a/src/runtime/io.cpp +++ b/src/runtime/io.cpp @@ -485,36 +485,21 @@ extern "C" LEAN_EXPORT obj_res lean_io_prim_handle_write(b_obj_arg h, b_obj_arg } } -/* - Handle.getLine : (@& Handle) → IO Unit - The line returned by `lean_io_prim_handle_get_line` - is truncated at the first '\0' character and the - rest of the line is discarded. */ +/* Handle.getLine : (@& Handle) → IO Unit */ extern "C" LEAN_EXPORT obj_res lean_io_prim_handle_get_line(b_obj_arg h, obj_arg /* w */) { FILE * fp = io_get_handle(h); - const int buf_sz = 64; - char buf_str[buf_sz]; // NOLINT - std::string result; - bool first = true; - while (true) { - char * out = std::fgets(buf_str, buf_sz, fp); - if (out != nullptr) { - if (strlen(buf_str) < buf_sz-1 || buf_str[buf_sz-2] == '\n') { - if (first) { - return io_result_mk_ok(mk_string(out)); - } else { - result.append(out); - return io_result_mk_ok(mk_string(result)); - } - } - result.append(out); - } else if (std::feof(fp)) { - clearerr(fp); - return io_result_mk_ok(mk_string(result)); - } else { - return io_result_mk_error(decode_io_error(errno, nullptr)); - } - first = false; + char* buf = NULL; + size_t n = 0; + ssize_t read = getline(&buf, &n, fp); + if (read != -1) { + obj_res ret = io_result_mk_ok(mk_string_from_bytes(buf, read)); + free(buf); + return ret; + } else if (std::feof(fp)) { + clearerr(fp); + return io_result_mk_ok(mk_string("")); + } else { + return io_result_mk_error(decode_io_error(errno, nullptr)); } } From 253a44a4ccb7dbc069c47ef87f8dd61fc5fb7713 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20B=C3=B6ving?= Date: Fri, 2 Aug 2024 20:41:50 +0200 Subject: [PATCH 4/8] fix: Handle.putStr for strings containing NULL bytes. --- src/runtime/io.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/runtime/io.cpp b/src/runtime/io.cpp index edf49c02a77c..552e0ca062ff 100644 --- a/src/runtime/io.cpp +++ b/src/runtime/io.cpp @@ -506,7 +506,9 @@ extern "C" LEAN_EXPORT obj_res lean_io_prim_handle_get_line(b_obj_arg h, obj_arg /* Handle.putStr : (@& Handle) → (@& String) → IO Unit */ extern "C" LEAN_EXPORT obj_res lean_io_prim_handle_put_str(b_obj_arg h, b_obj_arg s, obj_arg /* w */) { FILE * fp = io_get_handle(h); - if (std::fputs(lean_string_cstr(s), fp) != EOF) { + usize n = lean_string_size(s) - 1; // - 1 to ignore the terminal NULL byte. + usize m = std::fwrite(lean_string_cstr(s), 1, n, fp); + if (m == n) { return io_result_mk_ok(box(0)); } else { return io_result_mk_error(decode_io_error(errno, nullptr)); From aaff43234db1ee97c9f3a162f8c2c9395c57ed12 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20B=C3=B6ving?= Date: Fri, 2 Aug 2024 20:44:15 +0200 Subject: [PATCH 5/8] test: NULL byte safety of Handle.putStr and Handle.getLine --- tests/lean/3546.lean | 12 ++++++++++++ tests/lean/3546.lean.expected.out | 1 + 2 files changed, 13 insertions(+) create mode 100644 tests/lean/3546.lean create mode 100644 tests/lean/3546.lean.expected.out diff --git a/tests/lean/3546.lean b/tests/lean/3546.lean new file mode 100644 index 000000000000..631b660168a5 --- /dev/null +++ b/tests/lean/3546.lean @@ -0,0 +1,12 @@ +def test : IO Unit := do + let tmpFile := "3546.tmp" + let firstLine := "foo\u0000bar\n" + let content := firstLine ++ "hello world\nbye" + IO.FS.writeFile tmpFile content + let handle ← IO.FS.Handle.mk tmpFile .read + let firstReadLine ← handle.getLine + let cond := firstLine == firstReadLine && firstReadLine.length == 8 -- paranoid + IO.println cond + IO.FS.removeFile tmpFile + +#eval test diff --git a/tests/lean/3546.lean.expected.out b/tests/lean/3546.lean.expected.out new file mode 100644 index 000000000000..27ba77ddaf61 --- /dev/null +++ b/tests/lean/3546.lean.expected.out @@ -0,0 +1 @@ +true From 13ec35fd56d865f350f02110ae05126d9d68ee92 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20B=C3=B6ving?= Date: Tue, 6 Aug 2024 10:09:42 +0200 Subject: [PATCH 6/8] fix: keep trying to read after mdata size --- src/Init/System/IO.lean | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/src/Init/System/IO.lean b/src/Init/System/IO.lean index fc44763e5377..94a3f74a86e4 100644 --- a/src/Init/System/IO.lean +++ b/src/Init/System/IO.lean @@ -464,14 +464,17 @@ def withFile (fn : FilePath) (mode : Mode) (f : Handle → IO α) : IO α := def Handle.putStrLn (h : Handle) (s : String) : IO Unit := h.putStr (s.push '\n') -partial def Handle.readBinToEnd (h : Handle) : IO ByteArray := do +partial def Handle.readBinToEndInto (h : Handle) (buf : ByteArray) : IO ByteArray := do let rec loop (acc : ByteArray) : IO ByteArray := do let buf ← h.read 1024 if buf.isEmpty then return acc else loop (acc ++ buf) - loop ByteArray.empty + loop buf + +partial def Handle.readBinToEnd (h : Handle) : IO ByteArray := do + h.readBinToEndInto .empty def Handle.readToEnd (h : Handle) : IO String := do let data ← h.readBinToEnd @@ -589,11 +592,13 @@ def readBinFile (fname : FilePath) : IO ByteArray := do -- Requires metadata so defined after metadata let mdata ← fname.metadata let size := mdata.byteSize.toUSize - if size > 0 then - let handle ← IO.FS.Handle.mk fname .read - handle.read mdata.byteSize.toUSize - else - return ByteArray.mkEmpty 0 + let handle ← IO.FS.Handle.mk fname .read + let buf ← + if size > 0 then + handle.read mdata.byteSize.toUSize + else + pure <| ByteArray.mkEmpty 0 + handle.readBinToEndInto buf def readFile (fname : FilePath) : IO String := do let data ← readBinFile fname From 1be6f69388d795a7ae1c3af76b1c1bb91aa89272 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20B=C3=B6ving?= Date: Tue, 6 Aug 2024 10:11:47 +0200 Subject: [PATCH 7/8] chore: use guard_msgs --- tests/lean/3546.lean | 2 ++ tests/lean/3546.lean.expected.out | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/lean/3546.lean b/tests/lean/3546.lean index 631b660168a5..4491b72cd67a 100644 --- a/tests/lean/3546.lean +++ b/tests/lean/3546.lean @@ -9,4 +9,6 @@ def test : IO Unit := do IO.println cond IO.FS.removeFile tmpFile +/-- info: true -/ +#guard_msgs in #eval test diff --git a/tests/lean/3546.lean.expected.out b/tests/lean/3546.lean.expected.out index 27ba77ddaf61..e69de29bb2d1 100644 --- a/tests/lean/3546.lean.expected.out +++ b/tests/lean/3546.lean.expected.out @@ -1 +0,0 @@ -true From 17c0fd1546a822a03bb52650d104c602b05d320c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20B=C3=B6ving?= Date: Tue, 6 Aug 2024 17:19:22 +0200 Subject: [PATCH 8/8] chore: move test --- tests/lean/3546.lean.expected.out | 0 tests/lean/{ => run}/3546.lean | 0 2 files changed, 0 insertions(+), 0 deletions(-) delete mode 100644 tests/lean/3546.lean.expected.out rename tests/lean/{ => run}/3546.lean (100%) diff --git a/tests/lean/3546.lean.expected.out b/tests/lean/3546.lean.expected.out deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/tests/lean/3546.lean b/tests/lean/run/3546.lean similarity index 100% rename from tests/lean/3546.lean rename to tests/lean/run/3546.lean