Skip to content

Commit

Permalink
more comments
Browse files Browse the repository at this point in the history
  • Loading branch information
gareth authored and gareth committed May 18, 2021
1 parent ffa97fc commit c78da89
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 10 deletions.
15 changes: 12 additions & 3 deletions ptha_access/get_PTHA_results.R
Expand Up @@ -1162,7 +1162,8 @@ estimate_exrate_uncertainty<-function(random_scenarios, event_peak_stage, thresh

local_fun<-function(mw){
.get_mean_and_variance_of_exrate_in_mw_bin_from_random_scenarios(mw, random_scenarios,
event_peak_stage, threshold_stage, importance_sampling_type)}
event_peak_stage, threshold_stage, importance_sampling_type)
}

all_results = lapply(unique_Mw, function(x) local_fun(x))

Expand Down Expand Up @@ -1216,7 +1217,7 @@ estimate_exrate_uncertainty<-function(random_scenarios, event_peak_stage, thresh
#' conditional probability of sampling each scenario (within its magnitude-bin)
#' is proportional to event_rates. If provided then this gives the conditional
#' probability, and we compute the variance assuming that basic importance
#' sampling is used to re-weight the scenarios.
#' sampling is used to re-weight the scenarios, using the asymptotic variance formula.
#' @param TOL A numerical tolerance used for checking if magnitude-bins are
#' equally spaced. It should be much smaller than the magnitude-bin size (0.1
#' in PTHA18), but allow for minor floating-point variations in event_Mw.
Expand Down Expand Up @@ -1252,11 +1253,15 @@ get_optimal_number_of_samples_per_Mw<-function(event_Mw, event_rates,

k = which(abs(event_Mw - unique_event_Mw[i]) < TOL)

if(sum(event_rates[k]) == 0){
if(sum(event_rates[k]) == 0 | sum(event_importance_weighted_sampling_probs[k]) == 0){

variance_numerator[i] = 0

}else{
# 'Basic-importance-sampling' variance. For the case with:
# event_importance_weighted_sampling_probs = event_rates
# this gives the same result as regular stratified sampling, so
# we do not add a separate implementation.

# Bin-specific weights with standard stratified sampling
scenario_wts_no_importance = event_rates[k] / sum(event_rates[k])
Expand All @@ -1278,6 +1283,10 @@ get_optimal_number_of_samples_per_Mw<-function(event_Mw, event_rates,
variance_numerator[i] = sum( scenario_wts_importance *
(rate_of_Mw * (event_peak_stage[k] > stage_threshold) * basic_importance_sampling_weights -
true_exceedance_rate)**2 )
#variance_numerator[i] = sum( scenario_wts_importance *
# (rate_of_Mw * (event_peak_stage[k] > stage_threshold) * basic_importance_sampling_weights)^2) -
# true_exceedance_rate^2

}
}

Expand Down
17 changes: 10 additions & 7 deletions ptha_access/test_all.R
Expand Up @@ -421,10 +421,10 @@ test_random_scenario_sampling<-function(){
#
# Test our estimates of the optimal number of scenarios to sample in each Mw bin
#

nrand_samples = sum(!is.na(random_scenarios_repeated$inds))
# Simple check of variance_numerator by back-calculation
t0 = ptha18$get_optimal_number_of_samples_per_Mw(event_Mw, event_rates, event_peak_stage,
stage_threshold=threshold_stage, total_samples=nrow(random_scenarios_repeated))
stage_threshold=threshold_stage, total_samples=nrand_samples)
rate_with_this_Mw = aggregate(event_rates, by=list(event_Mw), sum)$x
rate_exceeding_with_this_Mw = aggregate(event_rates * (event_peak_stage > threshold_stage),
by=list(event_Mw), sum)$x
Expand All @@ -442,22 +442,25 @@ test_random_scenario_sampling<-function(){
# Check that the theoretical variance (assuming we sample per Mw-bin like
# in random_scenarios_repeated) is close to the empirical variances (from
# the above repeated random sampling). This exercises importance-sampling as well
# as unequal magnitude stratification.
# as unequal magnitude stratification. Note that for importance-sampling the variance
# is asymptotic (so it is not exact in finite samples)
#
t0 = ptha18$get_optimal_number_of_samples_per_Mw(event_Mw, event_rates, event_peak_stage,
stage_threshold=threshold_stage, total_samples=nrow(random_scenarios_repeated),
stage_threshold=threshold_stage, total_samples=nrand_samples,
event_importance_weighted_sampling_probs=(event_peak_stage*event_rates))
repeated_random_scenario_counts = as.numeric(table(random_scenarios_repeated$mw))
expected_variance = sum(t0$variance_numerator/repeated_random_scenario_counts)
repeated_random_scenario_counts = aggregate(!is.na(random_scenarios_repeated$inds),
by=list(random_scenarios_repeated$mw), sum)$x
expected_variance = sum(t0$variance_numerator/repeated_random_scenario_counts, na.rm=TRUE)
empirical_variance = var(est_sd_store[['basic']][,1])
# Check they agree within a few percent
# Check they agree within a few percent (the estimate is not exact in finite-samples)
if(abs(empirical_variance - expected_variance) < 0.03*expected_variance){
print('PASS')
}else{
print('FAIL -- the theoretical variance is not close enough to the sampling variance')
}

# Check that the typical 'sample-sd-estimate' is reasonably close to the expected value
# (again note the estimate is not exact in finite samples)
typical_sd_estimate = median(est_sd_store[['basic']][,2])
expected_sd = sqrt(expected_variance)
if( abs(typical_sd_estimate - expected_sd) < 0.05*expected_sd ){
Expand Down

0 comments on commit c78da89

Please sign in to comment.