/
crosswalk_eval.jl
102 lines (85 loc) · 3.2 KB
/
crosswalk_eval.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
using ArgParse
s = ArgParseSettings()
@add_arg_table s begin
"--single"
help = "set max_peds to 1"
action = :store_true
"--policy"
help = "specify the policy to load "
arg_type = String
default = joinpath("log","policy.bson")
"--logdir"
help = "Directory in which to save the evaluation results"
arg_type = String
default = "log"
"--n_eval"
help = "Number of episodes for evaluation"
arg_type = Int64
default = 1000
"--correction"
help = "specify the single policy to load "
arg_type = Union{Nothing, String }
default = nothing
end
parsed_args = parse_args(s)
using Random
using POMDPs
using POMDPSimulators
using POMDPModelTools
using AutomotiveDrivingModels
using AutomotivePOMDPs
using BeliefUpdaters
using Flux
using DeepQLearning
using DeepCorrections
using FileIO
using BSON
using CSV
using StatsBase
include("decomposed_policy.jl")
const N_EVAL = parsed_args["n_eval"]
const MAX_STEPS = 100
const MAX_PEDS = 10
println("Evaluating in environment with $MAX_PEDS pedestrians")
pomdp = OCPOMDP(ΔT = 0.5, p_birth = 0.3, max_peds = MAX_PEDS, no_ped_prob = 0.1)
println("Loading policy...")
if parsed_args["correction"] != nothing
single_policy = BSON.load(parsed_args["correction"])[:policy]
lowfi_policy = DecPolicy(single_policy, pomdp, (x,y) -> min.(x,y))
correction_network = BSON.load(parsed_args["policy"])[:correction]
problem = BSON.load(parsed_args["policy"])[:problem]
policy = DeepCorrectionPolicy(problem, correction_network, lowfi_policy, additive_correction, 1.0, ordered_actions(pomdp))
println("Initialized Deep Correction policy from $(parsed_args["correction"]) and $(parsed_args["policy"])")
elseif parsed_args["single"]
single_policy = BSON.load(parsed_args["policy"])[:policy]
policy = DecPolicy(single_policy, pomdp, (x,y) -> min.(x,y))
println("Initialized Decomposed Policy from $(parsed_args["policy"])")
else
BSON.@load parsed_args["policy"] policy
println("Initialize DQN policy from $(parsed_args["policy"])")
end
const K = 4
updater = KMarkovUpdater(K)
# helpers for KMarkovUpdater
POMDPs.convert_o(::Type{Array{Float64}}, o::Array{Array{Float64, 1}}, pomdp::OCPOMDP) = hcat(o...)
simlist = []
for i=1:N_EVAL
rng = MersenneTwister(i)
s0 = initialstate(pomdp, rng)
o0 = generate_o(pomdp, s0, rng)
b0 = initialize_belief(updater, fill(o0, K))
push!(simlist, Sim(pomdp, policy, updater, b0, s0, rng=rng, max_steps=MAX_STEPS))
end
println("Starting Parallel Simulation...")
df = run_parallel(simlist) do sim, hist
return (n_steps=n_steps(hist),
reward=discounted_reward(hist),
collision=undiscounted_reward(hist) < 0.0,
timeout=undiscounted_reward(hist)==0.0,
success=undiscounted_reward(hist)>0.0)
end
summary = describe(df, stats=[:mean, :std])
println(summary)
CSV.write(joinpath(parsed_args["logdir"], "log.csv"), df, append=true)
CSV.write(joinpath(parsed_args["logdir"], "summary.csv"), summary, append=true)
println("Results saved in $(parsed_args["logdir"])")