Skip to content

Commit 2cf033a

Browse files
committed
Make tokenize_string an optional parameter for diff methods in pp_diffs.
Remove forward reference to lexer.
1 parent 3f619ba commit 2cf033a

File tree

5 files changed

+27
-21
lines changed

5 files changed

+27
-21
lines changed

lib/pp_diff.ml

+16-9
Original file line numberDiff line numberDiff line change
@@ -126,13 +126,20 @@ let diff_strs old_strs new_strs =
126126
let diffs = List.rev (StringDiff.diff old_strs new_strs) in
127127
shorten_diff_span `Removed (shorten_diff_span `Added diffs);;
128128

129-
(* to be set to Proof_diffs.tokenize_string to allow a forward reference to the lexer *)
130-
let tokenize_string = ref (fun (s : string) -> [s])
129+
(* Default string tokenizer. Makes each character a separate strin.
130+
Whitespace is not ignored. Doesn't handle UTF-8 differences well. *)
131+
let def_tokenize_string s =
132+
let limit = (String.length s) - 1 in
133+
let strs : string list ref = ref [] in
134+
for i = 0 to limit do
135+
strs := (String.make 1 s.[i]) :: !strs
136+
done;
137+
List.rev !strs
131138

132139
(* get the Myers diff of 2 strings *)
133-
let diff_str old_str new_str =
134-
let old_toks = Array.of_list (!tokenize_string old_str)
135-
and new_toks = Array.of_list (!tokenize_string new_str) in
140+
let diff_str ?(tokenize_string=def_tokenize_string) old_str new_str =
141+
let old_toks = Array.of_list (tokenize_string old_str)
142+
and new_toks = Array.of_list (tokenize_string new_str) in
136143
diff_strs old_toks new_toks;;
137144

138145
let get_dinfo = function
@@ -277,18 +284,18 @@ let add_diff_tags which pp diffs =
277284
raise (Diff_Failure "left-over diff info at end of Pp.t, should be impossible");
278285
if has_added || has_removed then wrap_in_bg diff_tag rv else rv;;
279286

280-
let diff_pp o_pp n_pp =
287+
let diff_pp ?(tokenize_string=def_tokenize_string) o_pp n_pp =
281288
let open Pp in
282289
let o_str = string_of_ppcmds o_pp in
283290
let n_str = string_of_ppcmds n_pp in
284-
let diffs = diff_str o_str n_str in
291+
let diffs = diff_str ~tokenize_string o_str n_str in
285292
(add_diff_tags `Removed o_pp diffs, add_diff_tags `Added n_pp diffs);;
286293

287-
let diff_pp_combined ?(show_removed=false) o_pp n_pp =
294+
let diff_pp_combined ?(tokenize_string=def_tokenize_string) ?(show_removed=false) o_pp n_pp =
288295
let open Pp in
289296
let o_str = string_of_ppcmds o_pp in
290297
let n_str = string_of_ppcmds n_pp in
291-
let diffs = diff_str o_str n_str in
298+
let diffs = diff_str ~tokenize_string o_str n_str in
292299
let (_, has_removed) = has_changes diffs in
293300
let added = add_diff_tags `Added n_pp diffs in
294301
if show_removed && has_removed then

lib/pp_diff.mli

+3-6
Original file line numberDiff line numberDiff line change
@@ -33,19 +33,16 @@ Limitations/Possible enhancements:
3333

3434
(** Compute the diff between two Pp.t structures and return
3535
versions of each with diffs highlighted as (old, new) *)
36-
val diff_pp : Pp.t -> Pp.t -> Pp.t * Pp.t
36+
val diff_pp : ?tokenize_string:(string -> string list) -> Pp.t -> Pp.t -> Pp.t * Pp.t
3737

3838
(** Compute the diff between two Pp.t structures and return
3939
a highlighted Pp.t. If [show_removed] is true, show separate lines for
4040
removals and additions, otherwise only show additions *)
41-
val diff_pp_combined : ?show_removed:bool -> Pp.t -> Pp.t -> Pp.t
41+
val diff_pp_combined : ?tokenize_string:(string -> string list) -> ?show_removed:bool -> Pp.t -> Pp.t -> Pp.t
4242

4343
(** Raised if the diff fails *)
4444
exception Diff_Failure of string
4545

46-
(* for dependency injection to allow calling the lexer *)
47-
val tokenize_string : (string -> string list) ref
48-
4946
module StringDiff :
5047
sig
5148
type elem = String.t
@@ -69,7 +66,7 @@ If the strings are not lexable, this routine will raise Diff_Failure.
6966
Therefore you should catch any exceptions. The workaround for now is for the
7067
caller to tokenize the strings itself and then call diff_strs.
7168
*)
72-
val diff_str : string -> string -> StringDiff.elem Diff2.edit list
69+
val diff_str : ?tokenize_string:(string -> string list) -> string -> string -> StringDiff.elem Diff2.edit list
7370

7471
(** Compute the differences between 2 lists of strings, treating the strings
7572
in the lists as indivisible units.

printing/proof_diffs.ml

+2-5
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,6 @@ let cprintf s = cfprintf !log_out_ch s
8989

9090
module StringMap = Map.Make(String);;
9191

92-
(* placed here so that pp_diff.ml can access the lexer through dependency injection *)
9392
let tokenize_string s =
9493
(* todo: cLexer changes buff as it proceeds. Seems like that should be saved, too.
9594
But I don't understand how it's used--it looks like things get appended to it but
@@ -112,8 +111,6 @@ let tokenize_string s =
112111
CLexer.set_lexer_state st;
113112
raise (Diff_Failure "Input string is not lexable");;
114113

115-
let _ = Pp_diff.tokenize_string := tokenize_string
116-
117114

118115
type hyp_info = {
119116
idents: string list;
@@ -153,7 +150,7 @@ let diff_hyps o_line_idents o_map n_line_idents n_map =
153150
let (o_line, o_pp) = setup old_ids o_map in
154151
let (n_line, n_pp) = setup new_ids n_map in
155152

156-
let hyp_diffs = diff_str o_line n_line in
153+
let hyp_diffs = diff_str ~tokenize_string o_line n_line in
157154
let (has_added, has_removed) = has_changes hyp_diffs in
158155
if show_removed () && has_removed then begin
159156
let o_entry = StringMap.find (List.hd old_ids) o_map in
@@ -303,7 +300,7 @@ let diff_goal_info o_info n_info =
303300
let (o_line_idents, o_hyp_map, o_concl_pp) = o_info in
304301
let (n_line_idents, n_hyp_map, n_concl_pp) = n_info in
305302
let show_removed = Some (show_removed ()) in
306-
let concl_pp = diff_pp_combined ?show_removed o_concl_pp n_concl_pp in
303+
let concl_pp = diff_pp_combined ~tokenize_string ?show_removed o_concl_pp n_concl_pp in
307304

308305
let hyp_diffs_list = diff_hyps o_line_idents o_hyp_map n_line_idents n_hyp_map in
309306
(hyp_diffs_list, concl_pp)

printing/proof_diffs.mli

+3
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,9 @@ the first argument set to None, which will skip the diff.
4343
*)
4444
val diff_goals : ?prev_gs:(goal sigma) -> goal sigma option -> Pp.t
4545

46+
(** Convert a string to a list of token strings using the lexer *)
47+
val tokenize_string : string -> string list
48+
4649
(* Exposed for unit test, don't use these otherwise *)
4750
(* output channel for the test log file *)
4851
val log_out_ch : out_channel ref

test-suite/unit-tests/printing/proof_diffs_test.ml

+3-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,9 @@ open Utest
33
open Pp_diff
44
open Proof_diffs
55

6-
let tokenize_string = !Pp_diff.tokenize_string
6+
let tokenize_string = Proof_diffs.tokenize_string
7+
let diff_pp = diff_pp ~tokenize_string
8+
let diff_str = diff_str ~tokenize_string
79

810
let tests = ref []
911
let add_test name test = tests := (mk_test name (TestCase test)) :: !tests

0 commit comments

Comments
 (0)