summaryrefslogtreecommitdiff
path: root/asmcomp/strmatch.ml
blob: 0a258edb87dd0579ff1f30c73e4fc839540c3cf8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
(**************************************************************************)
(*                                                                        *)
(*                                 OCaml                                  *)
(*                                                                        *)
(*             Xavier Leroy, projet Cristal, INRIA Rocquencourt           *)
(*                                                                        *)
(*   Copyright 1996 Institut National de Recherche en Informatique et     *)
(*     en Automatique.                                                    *)
(*                                                                        *)
(*   All rights reserved.  This file is distributed under the terms of    *)
(*   the GNU Lesser General Public License version 2.1, with the          *)
(*   special exception on linking described in the file LICENSE.          *)
(*                                                                        *)
(**************************************************************************)

(* Translation of string matching from closed lambda to C-- *)

open Lambda
open Cmm

module type I = sig
  val string_block_length : Cmm.expression -> Cmm.expression
  val transl_switch :
      Location.t -> Cmm.expression -> int -> int ->
        (int * Cmm.expression) list -> Cmm.expression ->
          Cmm.expression
end

module Make(I:I) = struct

(* Debug *)

  let dbg = false

  let mask =
    let open Nativeint in
    sub (shift_left one 8) one

  let pat_as_string p =
    let rec digits k n p =
      if n <= 0 then k
      else
        let d = Nativeint.to_int (Nativeint.logand mask p) in
        let d = Char.escaped (Char.chr d) in
        digits (d::k) (n-1) (Nativeint.shift_right_logical p  8) in
    let ds = digits [] Arch.size_addr p in
    let ds =
      if Arch.big_endian then ds else List.rev ds in
    String.concat "" ds

  let do_pp_cases chan cases =
    List.iter
      (fun (ps,_) ->
        Printf.fprintf chan "  [%s]\n"
          (String.concat "; " (List.map pat_as_string ps)))
      cases

  let pp_cases chan tag cases =
    Printf.eprintf "%s:\n" tag ;
    do_pp_cases chan cases

  let pp_match chan tag idxs cases =
    Printf.eprintf
      "%s: idx=[%s]\n" tag
      (String.concat "; " (List.map string_of_int idxs)) ;
    do_pp_cases chan cases

(* Utilities *)

  let gen_cell_id () = Ident.create "cell"
  let gen_size_id () = Ident.create "size"

  let mk_let_cell id str ind body =
    let dbg = Debuginfo.none in
    let cell =
      Cop(Cload (Word_int, Asttypes.Mutable),
        [Cop(Cadda,[str;Cconst_int(Arch.size_int*ind)], dbg)],
        dbg) in
    Clet(id, cell, body)

  let mk_let_size id str body =
    let size = I.string_block_length str in
    Clet(id, size, body)

  let mk_cmp_gen cmp_op id nat ifso ifnot =
    let dbg = Debuginfo.none in
    let test =
      Cop (Ccmpi cmp_op, [ Cvar id; Cconst_natpointer nat ], dbg)
    in
    Cifthenelse (test, ifso, ifnot)

  let mk_lt = mk_cmp_gen Clt
  let mk_eq = mk_cmp_gen Ceq

  module IntArg =
    struct
      type t = int
      let compare (x:int) (y:int) =
        if x < y then -1
        else if x > y then 1
        else 0
    end

  let interval m0 n =
    let rec do_rec m =
      if m >= n then []
      else m::do_rec (m+1) in
    do_rec m0


(*****************************************************)
(* Compile strings to a lists of words [native ints] *)
(*****************************************************)

  let pat_of_string str =
    let len = String.length str in
    let n = len / Arch.size_addr + 1 in
    let get_byte i =
      if i < len then int_of_char str.[i]
      else if i < n * Arch.size_addr - 1 then 0
      else n * Arch.size_addr - 1 - len in
    let mk_word ind =
      let w = ref 0n in
      let imin = ind * Arch.size_addr
      and imax = (ind + 1) * Arch.size_addr - 1 in
      if Arch.big_endian then
        for i = imin to imax do
          w := Nativeint.logor (Nativeint.shift_left !w 8)
              (Nativeint.of_int (get_byte i));
        done
      else
        for i = imax downto imin do
          w := Nativeint.logor (Nativeint.shift_left !w 8)
              (Nativeint.of_int (get_byte i));
        done;
      !w in
    let rec mk_words ind  =
      if ind >= n then []
      else mk_word ind::mk_words (ind+1) in
    mk_words 0

(*****************************)
(* Discriminating heuristics *)
(*****************************)

  module IntSet = Set.Make(IntArg)
  module NativeSet = Set.Make(Nativeint)

  let rec add_one sets ps = match sets,ps with
  | [],[] -> []
  | set::sets,p::ps ->
      let sets = add_one sets ps in
      NativeSet.add p set::sets
  | _,_ -> assert false

  let count_arities cases = match cases with
  | [] -> assert false
  | (ps,_)::_ ->
      let sets =
        List.fold_left
          (fun sets (ps,_) -> add_one sets ps)
          (List.map (fun _ -> NativeSet.empty) ps) cases in
      List.map NativeSet.cardinal sets

  let count_arities_first cases =
    let set =
      List.fold_left
        (fun set case -> match case with
        | (p::_,_) -> NativeSet.add p set
        | _ -> assert false)
        NativeSet.empty cases in
    NativeSet.cardinal set

  let count_arities_length cases =
    let set =
      List.fold_left
        (fun set (ps,_) -> IntSet.add (List.length ps) set)
        IntSet.empty cases in
    IntSet.cardinal set

  let best_col =
    let rec do_rec kbest best k = function
      | [] -> kbest
      | x::xs ->
          if x < best then
            do_rec k x (k+1) xs
          else
            do_rec kbest best (k+1) xs in
    let smallest = do_rec (-1) max_int 0 in
    fun cases ->
      let ars = count_arities cases in
      smallest ars

  let swap_list =
    let rec do_rec k xs = match xs with
    | [] -> assert false
    | x::xs ->
        if k <= 0 then [],x,xs
        else
          let xs,mid,ys = do_rec (k-1) xs in
          x::xs,mid,ys in
    fun k xs ->
      let xs,x,ys = do_rec  k xs in
      x::xs @ ys

  let swap k idxs cases =
    if k = 0 then idxs,cases
    else
      let idxs = swap_list k idxs
      and cases =
        List.map
          (fun (ps,act) -> swap_list k ps,act)
          cases in
      if dbg then begin
        pp_match stderr "SWAP" idxs cases
      end ;
      idxs,cases

  let best_first idxs cases = match idxs with
  | []|[_] -> idxs,cases (* optimisation: one column only *)
  | _ ->
      let k = best_col cases in
      swap k idxs cases

(************************************)
(* Divide according to first column *)
(************************************)

  module Divide(O:Set.OrderedType) = struct

    module OMap = Map.Make(O)

    let divide cases =
      let env =
        List.fold_left
          (fun env (p,psact) ->
            let old =
              try OMap.find p env
              with Not_found -> [] in
            OMap.add p ((psact)::old) env)
          OMap.empty cases in
      let r =  OMap.fold (fun key v k -> (key,v)::k) env [] in
      List.rev r (* Now sorted *)
  end

(***************)
(* Compilation *)
(***************)

(* Group by cell *)

    module DivideNative = Divide(Nativeint)

    let by_cell cases =
      DivideNative.divide
        (List.map
           (fun case -> match case with
           | (p::ps),act -> p,(ps,act)
           | [],_ -> assert false)
           cases)

(* Split into two halves *)

    let rec do_split idx env = match env with
    | [] -> assert false
    | (midkey,_ as x)::rem ->
        if idx <= 0 then [],midkey,env
        else
          let lt,midkey,ge = do_split (idx-1) rem in
          x::lt,midkey,ge

    let split_env len env = do_split (len/2) env

(* Switch according to one cell *)

(*
  Emit the switch, here as a comparison tree.
  Argument compile_rec is to be called to compile the rest of patterns,
  as match_on_cell can be called in two different contexts :
  from do_compile_pats and top_compile below.
 *)
    let match_oncell compile_rec str default idx env =
      let id = gen_cell_id () in
      let rec comp_rec env =
        let len = List.length env in
        if len <= 3 then
          List.fold_right
            (fun (key,cases) ifnot ->
              mk_eq id key
                (compile_rec str default cases)
              ifnot)
            env default
        else
          let lt,midkey,ge = split_env len env in
          mk_lt id midkey (comp_rec lt) (comp_rec ge) in
      mk_let_cell id str idx (comp_rec env)

(*
  Recursive 'list of cells' compile function:
  - choose the matched cell and switch on it
  - notice: patterns (and idx) all have the same length
 *)

    let rec do_compile_pats idxs str default cases =
      if dbg then begin
        pp_match stderr "COMPILE" idxs cases
      end ;
      match idxs with
      | [] ->
          begin match cases with
          | [] -> default
          | (_,e)::_ -> e
          end
      | _::_ ->
          let idxs,cases = best_first idxs cases in
          begin match idxs with
          | [] -> assert false
          | idx::idxs ->
              match_oncell
                (do_compile_pats idxs) str default idx (by_cell cases)
          end


(* Group by size *)

    module DivideInt = Divide(IntArg)


    let by_size cases =
      DivideInt.divide
        (List.map
           (fun (ps,_ as case) -> List.length ps,case)
           cases)
(*
  Switch according to pattern size
  Argument from_ind is the starting index, it can be zero
  or one (when the swicth on the cell 0 has already been performed.
  In that latter case pattern len is string length-1 and is corrected.
 *)

    let compile_by_size dbg from_ind str default cases =
      let size_cases =
        List.map
          (fun (len,cases) ->
            let len = len+from_ind in
            let act =
              do_compile_pats
                (interval from_ind len)
                str default  cases in
            (len,act))
          (by_size cases) in
      let id = gen_size_id () in
      let loc = Debuginfo.to_location dbg in
      let switch = I.transl_switch loc (Cvar id) 1 max_int size_cases default in
      mk_let_size id str switch

(*
  Compilation entry point: we choose to switch
  either on size or on first cell, using the
  'least discriminant' heuristics.
 *)
    let top_compile debuginfo str default cases =
      let a_len = count_arities_length cases
      and a_fst = count_arities_first cases in
      if a_len <= a_fst then begin
        if dbg then pp_cases stderr "SIZE" cases ;
        compile_by_size debuginfo 0 str default cases
      end else begin
        if dbg then pp_cases stderr "FIRST COL" cases ;
        let compile_size_rest str default cases =
          compile_by_size debuginfo 1 str default cases in
        match_oncell compile_size_rest str default 0 (by_cell cases)
      end

(* Module entry point *)

    let catch arg k = match arg with
    | Cexit (_e,[]) ->  k arg
    | _ ->
        let e =  next_raise_count () in
        ccatch (e,[],k (Cexit (e,[])),arg)

    let compile dbg str default cases =
(* We do not attempt to really optimise default=None *)
      let cases,default = match cases,default with
      | (_,e)::cases,None
      | cases,Some e -> cases,e
      | [],None -> assert false in
      let cases =
        List.rev_map
          (fun (s,act) -> pat_of_string s,act)
          cases in
      catch default (fun default -> top_compile dbg str default cases)

  end