With a set of quite simple passes moving expressions up and down the tree using static exceptions and matching some patterns, we can end up simplifying some interesting patterns:
The simplest case being
let (a,b) = if c then (1,2) else (3,4) in
a+b
A first pass replace tail allocations and const pointers in branching expressions by a static raises:
let (a,b) =
catch
if c then exit (1,2) else exit (3,4)
with (x,y) -> (x,y)
in
a+b
This form is not better than the previous, but it has some informations moved up in the expression, allowing to recognise some interesting patterns.
Here since the body of the catch never returns, thanks to the previous transformation, we know we can push the let binding inside the staticexception handler:
catch
if c then exit (1,2) else exit (3,4)
with (x,y) ->
let (a,b) = (x,y) in
a+b
And previously existing passes can get rid of the allocation to finaly obtain:
catch
if c then exit (1,2) else exit (3,4)
with (x,y) ->
x+y
let rec read_lines acc =
let res =
try Some (input_line stdin)
with End_of_file -> None
in
match res with
| None -> acc
| Some l -> read_lines (l :: acc)
Is first transformed to
let rec read_lines acc =
let res =
catch
catch
try exit 1 (input_line stdin)
with End_of_file -> exit 2
with 1 x -> Some x
with 2 -> None
in
match res with
| None -> acc
| Some l -> read_lines (l :: acc)
Then the following pattern is recognised
let x = catch ... with ... None in
match x with
| None -> ...
| ...
And the match branch is pushed inside the static exception handler:
let rec read_lines acc =
catch
let res =
catch
try exit 1 (input_line stdin)
with End_of_file -> exit 2
with 1 x -> Some x
in
match res with
| None -> acc
| Some l -> read_lines (l :: acc)
with 2 -> let res = None in acc
And further
let rec read_lines acc =
catch
catch
let res =
try exit 1 (input_line stdin)
with End_of_file -> exit 2
in
match res with
| None -> acc
| Some l -> read_lines (l :: acc)
with 1 x ->
let res = Some x in
let l = field 1 res in
read_lines (l :: acc)
with 2 -> let res = None in acc
And with further simplifications we obtain:
let rec read_lines acc =
catch
catch
try exit 1 (input_line stdin)
with End_of_file -> exit 2
with 1 x ->
read_lines (x :: acc)
with 2 -> acc
Hence, eliminating an allocation and a match.
When those transformation generate useless static exception, a final pass push back expressions at their original position and remove useless static exceptions.
catch
catch
exit 1
with 1 -> exit 2
with 2 -> expr
is converted to
expr
It is easy to play with it to add some transformation: Just create a flambda***.ml file
As an example (augmenting the scope of lets)
let lift_lets tree compilation_unit =
let rec aux = function
| Flet(str1,v1,Flet(str2,v2,def2,body2,d2),body1,d1) ->
Flet(str2,v2,def2,
aux (Flet(str1,v1,body2,body1,d1)),d2)
| e -> e in
Flambdaiter.map aux tree
open Flambdapasses
let lift_let_pass =
{ name = "lift lets";
pass = lift_lets }
let () = Flambdapasses.register_pass Loop 5 lift_let_pass
add it to the OPTSTART variable in the Makefile before optmain.cmo (to ensure that it is linked)
OPTSTART=asmcomp/flambda***.cmo \
driver/optmain.cmo \
Experimental passes that are activated by environment variables. It is to allow testing without passes for which I am not certain of the robustness/interest or if it make simple benchmarking harder... (when dummy for loops are eliminated)
export EXPERIMENTS=true
ocamlopt ...
Lets are pushed up or down to minimize the number of computations. It also helps a bit with cmmgen pattern matchings.
let f b x =
let a = x + 1 in
if b
then 1
else a
is rewritten to
let f b x =
if b
then 1
else let a = x + 1 in a
let f y =
while true do
let x = y + y in
x
done
is rewritten to
let f y =
let x = y + y in
while true do
x
done
For some functions it may be worth adding a test before running it. The code is stable, but it is not obvious when this is a benefit
For instance:
let rec map f = function
| [] -> []
| h::t -> f h :: map f t
let g x l =
let add y = x + y in
map add l
Can be rewritten to
let rec map' f = function
| [] -> []
| h::t -> f h :: map' f t
let map f = function (* annotated with stub: hence always inlined *)
| [] -> []
| l -> map' f l
let g x l =
let add y = x + y in
map add l
Applying others rewriting (but no lambda lifting) leads to
let rec map' f = function
| [] -> []
| h::t -> f h :: map' f t
let g x l =
if l == [] then []
else
let add y = x + y in
map add l
thus avoiding the allocation of the add
closure when the list is empty.
for i = 0 to 100000000 do
let x = (1,(i,i,i,i,i,i)) in
ignore (fst x)
done
In this example,
let x = (1,(i,i,i,i,i,i)) in
ignore (fst x)
is rewritten to
let x_1 = 1 in
let x_2 = (i,i,i,i,i,i) in
let x = (x_1,x_2) in
ignore (x_1)
then the dead code is eliminated
let x_1 = 1 in
ignore (x_1)
For sake of test/benchmarking ignore is volontarily not eliminated
movq $1, %rax cmpq $200000001, %rax jg .L100 .L101: movq $3, %rbx # correspond to 'ignore 1' movq %rax, %rbx addq $2, %rax cmpq $200000001, %rbx jne .L101 .L100: ... |
movq $1, %rbx cmpq $200000001, %rbx jg .L100 .L101: movq $80, %rax call caml_allocN@PLT .L103: leaq 8(%r15), %rax movq $6144, -8(%rax) movq %rbx, (%rax) movq %rbx, 8(%rax) movq %rbx, 16(%rax) movq %rbx, 24(%rax) movq %rbx, 32(%rax) movq %rbx, 40(%rax) leaq 56(%rax), %rdi movq $2048, -8(%rdi) movq $3, (%rdi) movq %rax, 8(%rdi) movq $3, %rax movq %rbx, %rax addq $2, %rbx cmpq $200000001, %rax jne .L101 .L100: |
type a = { mutable x : int; mutable y : int }
let swap a =
let y = a.y in
a.y <- a.x;
a.x <- y
let f x y =
let a = { x; y } in
swap a;
a.x + a.y
after inlining, f
becomes:
let f x y =
let a = { x; y } in
let y = a.y in
a.y <- a.x;
a.x <- y
a.x + a.y
Reference elimination can then take place
let f x y =
let a_1 = x in
let a_2 = y in
let y = a_2 in
a_2 <- a_1;
a_1 <- y
a_1 + a_2
camlElim_ref__f_1031: .cfi_startproc .L101: movq %rbx, %rdi movq %rax, %rbx movq %rdi, %rax leaq -1(%rax, %rbx), %rax ret .cfi_endproc |
camlElim_ref__f_1014: .cfi_startproc subq $8, %rsp .cfi_adjust_cfa_offset 8 .L101: movq %rax, %rdi .L102: subq $24, %r15 movq caml_young_limit@GOTPCREL(%rip), %rax cmpq (%rax), %r15 jb .L103 leaq 8(%r15), %rax movq $2048, -8(%rax) movq %rdi, (%rax) movq %rbx, 8(%rax) movq 8(%rax), %rbx movq (%rax), %rdi movq %rdi, 8(%rax) movq %rbx, (%rax) movq 8(%rax), %rbx movq (%rax), %rax leaq -1(%rax, %rbx), %rax addq $8, %rsp .cfi_adjust_cfa_offset -8 ret .cfi_adjust_cfa_offset 8 .L103: call caml_call_gc@PLT .L104: jmp .L102 .cfi_endproc |
let plus x acc = x + acc
let rec fold f acc = function
| [] -> acc
| h :: t -> fold f (f h acc) t
let truc = fold plus 0
fold
can be specialised to plus here giving
let truc =
let rec fold f acc = function
| [] -> acc
| h :: t -> fold plus (plus h acc) t in
fold plus 0
Further inlined to
let truc =
let rec fold f acc = function
| [] -> acc
| h :: t -> fold plus (h + acc) t in
fold plus 0
camlFold__fold_1484: .cfi_startproc .L101: cmpq $1, %rdi je .L100 movq 8(%rdi), %rdx movq (%rdi), %rax leaq -1(%rax, %rbx), %rbx movq camlFold__plus_1037_closure@GOTPCREL(%rip), %rax |
camlFold__plus_1008: .cfi_startproc .L100: leaq -1(%rax, %rbx), %rax ret .cfi_endproc |
let plus (x,y) = x + y
let f g x =
g (x,x)
let h x = f plus x
is inlined to
let h x = plus (x,x)
then
let h x =
let b = (x,x) in
let (x,y) = b in
x + y
then simplified to
let h x = x + x
camlFun_param__h_1026: .cfi_startproc .L106: leaq -1(%rax, %rax), %rax ret .cfi_endproc |
camlFun_param__h_1014: .cfi_startproc subq $8, %rsp .cfi_adjust_cfa_offset 8 .L105: movq %rax, %rdi movq camlFun_param@GOTPCREL(%rip), %rax movq (%rax), %rbx .L106: subq $24, %r15 movq caml_young_limit@GOTPCREL(%rip), %rax cmpq (%rax), %r15 jb .L107 leaq 8(%r15), %rax movq $2048, -8(%rax) movq %rdi, (%rax) movq %rdi, 8(%rax) movq (%rbx), %rdi addq $8, %rsp .cfi_adjust_cfa_offset -8 jmp *%rdi .cfi_adjust_cfa_offset 8 .L107: call caml_call_gc@PLT .L108: jmp .L106 .cfi_endproc |
let f x =
let g y = y + x in
g x
The point here is to note that after inlining, the closure of g is not allocated anymore.
camlLoc_fun__f_1020: .cfi_startproc .L100: leaq -1(%rax, %rax), %rax ret .cfi_endproc |
camlLoc_fun__f_1008: .cfi_startproc subq $8, %rsp .cfi_adjust_cfa_offset 8 .L101: movq %rax, %rbx .L102: subq $32, %r15 movq caml_young_limit@GOTPCREL(%rip), %rax cmpq (%rax), %r15 jb .L103 leaq 8(%r15), %rax movq $3319, -8(%rax) movq camlLoc_fun__g_1010@GOTPCREL(%rip), %rdi movq %rdi, (%rax) movq $3, 8(%rax) movq %rbx, 16(%rax) movq 16(%rax), %rax leaq -1(%rbx, %rax), %rax addq $8, %rsp .cfi_adjust_cfa_offset -8 ret .cfi_adjust_cfa_offset 8 .L103: call caml_call_gc@PLT .L104: jmp .L102 .cfi_endproc |
let f i =
match Some (i,(i,i)) with
| None -> 0
| Some (i,_) -> i + i
camlMatch_elim__f_1027: .cfi_startproc .L100: addq $2, %rax ret .cfi_endproc |
camlMatch_elim__f_1012: .cfi_startproc subq $8, %rsp .cfi_adjust_cfa_offset 8 .L103: movq %rax, %rbx .L104: subq $16, %r15 movq caml_young_limit@GOTPCREL(%rip), %rax cmpq (%rax), %r15 jb .L105 leaq 8(%r15), %rdi movq $1025, -8(%rdi) movq %rbx, (%rdi) movzbq -8(%rdi), %rax cmpq $1, %rax je .L101 jg .L100 .L102: movq (%rdi), %rax addq $8, %rsp .cfi_adjust_cfa_offset -8 ret .cfi_adjust_cfa_offset 8 .align 4 .L101: movq (%rdi), %rax addq $2, %rax addq $8, %rsp .cfi_adjust_cfa_offset -8 ret .cfi_adjust_cfa_offset 8 .align 4 .L100: movq (%rdi), %rax leaq -1(%rax, %rax), %rax addq $8, %rsp .cfi_adjust_cfa_offset -8 ret .cfi_adjust_cfa_offset 8 .L105: call caml_call_gc@PLT .L106: jmp .L104 .cfi_endproc |