I am merely copy-pasting his changes and comments here, for the sake of completeness of this blog.

#light

open System

let run_time_func (func: unit -> 'a) =

let time0 = System.DateTime.Now

let res = func ()

let diff0 = System.DateTime.Now - time0

(diff0, res)

let map_parallel func items =

let tasks =

seq {

for i in items -> async {

return (func i)

}

}

Async.Run (Async.Parallel tasks)

type ps<'num> = struct

val pos:'num

val speed:'num

new(pos, speed) = { pos = pos; speed = speed }

end

type sa<'num> = struct

val speed:'num

val accel:'num

new(speed, accel) = { speed = speed; accel = accel }

end

let gravity =

-9.81

let inline spring pos =

let k = 100.0 in

-k * pos

let inline drag k pos speed = -k * speed

let inline euler_implicit accel_func pos speed delta =

let speed2 = speed + delta * accel_func pos speed in

ps<_>(pos + delta * speed2, speed2)

let inline adapt (f: 'num -> 'num -> 'num) =

fun pos speed ->

let accel = f pos speed in

sa<_>(speed, accel)

let inline runge_kutta_4 deriv_func pos speed delta =

let half_delta = 0.5 * delta

let sixth_delta = delta / 6.0

let sa1: sa<_> = deriv_func pos speed

let sa2 = deriv_func (pos + half_delta * sa1.speed) (speed + half_delta * sa1.accel)

let sa3 = deriv_func (pos + half_delta * sa2.speed) (speed + half_delta * sa2.accel)

let sa4 = deriv_func (pos + delta * sa3.speed) (speed + delta * sa3.accel)

let pos1 = pos + sixth_delta*(sa1.speed + 2.0 * sa2.speed + 2.0 * sa3.speed + sa4.speed)

let speed1 = speed + sixth_delta*(sa1.accel + 2.0 * sa2.accel + 2.0 * sa3.accel + sa4.accel) in

ps<_>(pos1, speed1)

let inline simulate intg_func t0 t_fin delta pos0 speed0 =

let oc = OptimizedClosures.FastFunc3<_,_,_,_>.Adapt(intg_func)

let rec repeat t0 pos0 speed0 =

if t0 > t_fin then (pos0, speed0)

else

let t1 = t0 + delta

let ps: ps<_> = oc.Invoke(pos0, speed0, delta)

repeat t1 ps.pos ps.speed

repeat t0 pos0 speed0

let initial_states = [ for s in 1..1000 -> (float s * 1.0, 0.0) ]

let t0 = 0.0

let t_fin = 1000.0

let delta = 0.025

let accel = fun pos speed -> (drag 1.0) pos speed + spring pos + gravity

let ef = (euler_implicit accel)

let rk4f = (runge_kutta_4 (adapt accel))

for i in 0..3 do

let (run_time, res) =

run_time_func (fun () ->

initial_states |> List.map(fun (x,y) ->

simulate ef t0 t_fin (0.25*delta) x y))

printfn "Euler single: %f" run_time.TotalSeconds;

let (run_time, res) =

run_time_func (fun () ->

initial_states |> map_parallel(fun (x,y) ->

simulate ef t0 t_fin (0.25*delta) x y))

printfn "Euler multi: %f" run_time.TotalSeconds;

let (run_time, res) =

run_time_func (fun () ->

initial_states |> List.map(fun (x,y) ->

simulate rk4f t0 t_fin delta x y))

printfn "RK4 single: %f" run_time.TotalSeconds;

let (run_time, res) =

run_time_func (fun () ->

initial_states |>

map_parallel(fun (x,y) ->

simulate rk4f t0 t_fin delta x y))

printfn "RK4 multi: %f" run_time.TotalSeconds;

done;

ignore(Console.ReadKey(true))

open System

let run_time_func (func: unit -> 'a) =

let time0 = System.DateTime.Now

let res = func ()

let diff0 = System.DateTime.Now - time0

(diff0, res)

let map_parallel func items =

let tasks =

seq {

for i in items -> async {

return (func i)

}

}

Async.Run (Async.Parallel tasks)

type ps<'num> = struct

val pos:'num

val speed:'num

new(pos, speed) = { pos = pos; speed = speed }

end

type sa<'num> = struct

val speed:'num

val accel:'num

new(speed, accel) = { speed = speed; accel = accel }

end

let gravity =

-9.81

let inline spring pos =

let k = 100.0 in

-k * pos

let inline drag k pos speed = -k * speed

let inline euler_implicit accel_func pos speed delta =

let speed2 = speed + delta * accel_func pos speed in

ps<_>(pos + delta * speed2, speed2)

let inline adapt (f: 'num -> 'num -> 'num) =

fun pos speed ->

let accel = f pos speed in

sa<_>(speed, accel)

let inline runge_kutta_4 deriv_func pos speed delta =

let half_delta = 0.5 * delta

let sixth_delta = delta / 6.0

let sa1: sa<_> = deriv_func pos speed

let sa2 = deriv_func (pos + half_delta * sa1.speed) (speed + half_delta * sa1.accel)

let sa3 = deriv_func (pos + half_delta * sa2.speed) (speed + half_delta * sa2.accel)

let sa4 = deriv_func (pos + delta * sa3.speed) (speed + delta * sa3.accel)

let pos1 = pos + sixth_delta*(sa1.speed + 2.0 * sa2.speed + 2.0 * sa3.speed + sa4.speed)

let speed1 = speed + sixth_delta*(sa1.accel + 2.0 * sa2.accel + 2.0 * sa3.accel + sa4.accel) in

ps<_>(pos1, speed1)

let inline simulate intg_func t0 t_fin delta pos0 speed0 =

let oc = OptimizedClosures.FastFunc3<_,_,_,_>.Adapt(intg_func)

let rec repeat t0 pos0 speed0 =

if t0 > t_fin then (pos0, speed0)

else

let t1 = t0 + delta

let ps: ps<_> = oc.Invoke(pos0, speed0, delta)

repeat t1 ps.pos ps.speed

repeat t0 pos0 speed0

let initial_states = [ for s in 1..1000 -> (float s * 1.0, 0.0) ]

let t0 = 0.0

let t_fin = 1000.0

let delta = 0.025

let accel = fun pos speed -> (drag 1.0) pos speed + spring pos + gravity

let ef = (euler_implicit accel)

let rk4f = (runge_kutta_4 (adapt accel))

for i in 0..3 do

let (run_time, res) =

run_time_func (fun () ->

initial_states |> List.map(fun (x,y) ->

simulate ef t0 t_fin (0.25*delta) x y))

printfn "Euler single: %f" run_time.TotalSeconds;

let (run_time, res) =

run_time_func (fun () ->

initial_states |> map_parallel(fun (x,y) ->

simulate ef t0 t_fin (0.25*delta) x y))

printfn "Euler multi: %f" run_time.TotalSeconds;

let (run_time, res) =

run_time_func (fun () ->

initial_states |> List.map(fun (x,y) ->

simulate rk4f t0 t_fin delta x y))

printfn "RK4 single: %f" run_time.TotalSeconds;

let (run_time, res) =

run_time_func (fun () ->

initial_states |>

map_parallel(fun (x,y) ->

simulate rk4f t0 t_fin delta x y))

printfn "RK4 multi: %f" run_time.TotalSeconds;

done;

ignore(Console.ReadKey(true))

The "simulate" function now uses an optimized closure. Citing Jomo:

Closure optimization can be used when a function will be called many times. It allows F# to do a bit of expensive work up front that will later be amortized across the many calls to the function. Unfortunately, this is not something that you can stick everywhere and just get a benefit (otherwise, the compiler could just inject it for you). You should definitely profile before and after.

let inline simulate intg_func t0 t_fin delta pos0 speed0 =

let oc = OptimizedClosures.FastFunc3<_,_,_,_>.Adapt(intg_func)

let rec repeat t0 pos0 speed0 =

if t0 > t_fin then (pos0, speed0)

else

let t1 = t0 + delta

let ps: ps<_> = oc.Invoke(pos0, speed0, delta)

repeat t1 ps.pos ps.speed

repeat t0 pos0 speed0

let oc = OptimizedClosures.FastFunc3<_,_,_,_>.Adapt(intg_func)

let rec repeat t0 pos0 speed0 =

if t0 > t_fin then (pos0, speed0)

else

let t1 = t0 + delta

let ps: ps<_> = oc.Invoke(pos0, speed0, delta)

repeat t1 ps.pos ps.speed

repeat t0 pos0 speed0

My code made a rather extensive use of tuples, used for the returned values of "euler_implicit", "runge_kutta_4" and "adapt". Jomo has changed that to use structs instead. From what I understand, structs in F# are similar to structs in C#, and local variables of this type can be instantiated on the stack, instead of being created on the heap.

I'm still unsure about when to use structs and when not to, but I have noticed in my experiments that they do not always bring a performance boost. In other words, just replacing all your tuples (or other types I'll introduce later) is not always a good strategy.

The definition of the struct types is shown below.

type ps<'num> = struct

val pos:'num

val speed:'num

new(pos, speed) = { pos = pos; speed = speed }

end

type sa<'num> = struct

val speed:'num

val accel:'num

new(speed, accel) = { speed = speed; accel = accel }

end

This defines a struct "ps" parameterized by a type. "<'num>" declares the type parameter. Since the integrator in this tutorial is still limited to a single dimension, we could also have omitted the type parameter:

type ps = struct

val pos: float

val speed: float

new(pos, speed) = { pos = pos; speed = speed }

end

This defines a struct with two data members named pos and speed, and a constructor taking (oh surprise!) a position and a speed. The parts between curly braces means "create a value of type ps where the pos member is set to the parameter pos, and the speed member is set to the speed parameter".

Finally, Jomo also moved the creation of the integration functions out of the loop in the main program (which I obviously should have thought of...):

let ef = (euler_implicit accel)

let rk4f = (runge_kutta_4 (adapt accel))

let rk4f = (runge_kutta_4 (adapt accel))

let (run_time, res) =

run_time_func (fun () ->

initial_states |> List.map(fun (x,y) ->

simulate ef t0 t_fin (0.25*delta) x y))

run_time_func (fun () ->

initial_states |> List.map(fun (x,y) ->

simulate ef t0 t_fin (0.25*delta) x y))

The performance of the code has now increased to the numbers below.

Implementation | Performance Euler | Performance RK4 |

Managed C++ | 172413 | 326530 |

F# Tutorial 6 | 66225 | 111731 |

F# Tutorial 7 | 129032 | 185185 |

JF single | 153846 | 160000 |

JF multi | 297619 | 318471 |

For the next tutorial, I am considering taking this integrator to the third dimension, or to draw a graph showing the trajectory of a particle. I have yet to decide.

## 2 comments:

Amazing..A lot of things for me to learn.

Glad you like it AddCodPer :)

Post a Comment