DistanceDevelopment · LHMarshall · Dec 22, 2023 · Dec 1, 2023 · Dec 1, 2023 · Dec 5, 2023
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -10,3 +10,4 @@ cran-comments.md
 ^codecov\.yml$
 ^\.github$
 ^CRAN-SUBMISSION$
+README.md
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -11,13 +11,13 @@ Description: A simple way of fitting detection functions to distance sampling
     Horvitz-Thompson-like estimator) if survey area information is provided. See
     Miller et al. (2019) <doi:10.18637/jss.v089.i01> for more information on
     methods and <https://examples.distancesampling.org/> for example analyses.
-Version: 1.0.8
+Version: 1.0.9
 URL: https://github.com/DistanceDevelopment/Distance/
 BugReports: https://github.com/DistanceDevelopment/Distance/issues
 Language: en-GB
 Depends:
     R (>= 3.5.0),
-    mrds (>= 2.2.9)
+    mrds (>= 2.3.0)
 Imports:
     dplyr,
     methods,

diff --git a/NEWS b/NEWS
@@ -1,3 +1,13 @@
+Distance 1.0.9
+----------------------
+
+* Changed the default encounter rate estimator for point transect surveys from P3 to P2. (Issue #138)
+* Fixed bug which produced NA's when stratum names came after 'Total' in the alphabet. (Issue #158)
+* Added additional documentation explaining the adjustment term options when covariates are in the model. (Issue #156)
+* Fixed dht bootstrap to work when distbegin and distend are supplied but not distance. (Issue #147)
+* Added a warning for the dht bootstrap when Sample.Label values are not unique across all strata. (Issue #157)
+* Distance 1.0.9 depends on mrds >= 2.3.0 due to re-named documentation page links.
+
 Distance 1.0.8
 ----------------------
 

diff --git a/R/bootdht.R b/R/bootdht.R
@@ -6,7 +6,10 @@
 #'
 #' @param model a model fitted by [`ds`] or a list of models
 #' @param flatfile Data provided in the flatfile format. See [`flatfile`] for
-#' details.
+#' details. Please note, it is a current limitation of bootdht that all 
+#' Sample.Label identifiers must be unique across all strata, i.e.transect
+#' ids must not be re-used from one strata to another. An easy way to achieve
+#' this is to paste together the stratum names and transect ids. 
 #' @param convert_units conversion between units for abundance estimation, see
 #' "Units", below. (Defaults to 1, implying all of the units are "correct"
 #' already.) This takes precedence over any unit conversion stored in `model`.
@@ -98,6 +101,13 @@
 #' when running in parallel, so all computations must be made using only its
 #' `ests` and `fit` arguments (i.e., you can not use R objects from elsewhere
 #' in that function, even if they are available to you from the console).
+#' 
+#' Another consequence of the global environment being unavailable inside 
+#' parallel bootstraps is that any starting values in the model object passed 
+#' in to `bootdht` must be hard coded (otherwise you get back 0 successful 
+#' bootstraps). For a worked example showing this, see the camera trap distance 
+#' sampling online example at 
+#' <https://examples.distancesampling.org/Distance-cameratraps/camera-distill.html>.
 #'
 #' @importFrom utils txtProgressBar setTxtProgressBar getTxtProgressBar
 #' @importFrom stats as.formula AIC
@@ -216,7 +226,7 @@ bootdht <- function(model,
     model
   })
 
-  cat(paste0("Performing ", nboot, " bootstraps\n"))
+  message(paste0("Performing ", nboot, " bootstraps\n"))
 
   if(cores > 1 & progress_bar != "none"){
     progress_bar <- "none"

diff --git a/R/bootdht_bootit.R b/R/bootdht_bootit.R
@@ -61,6 +61,11 @@ bootit <- function(bootdat, models, our_resamples, summary_fun,
           models[[i]]$dht$individuals[[est_type]]$Estimate <- ind$Estimate/
                                                               ind$rate
         }else{
+          # Need to re-order (incase strata names come after total)
+          label.order <- models[[i]]$dht$individuals[[est_type]]$Label
+          index <- match(label.order, ind$Label)
+          ind <- ind[index,]
+          # Now extract relevant values
           nN <- length(ind$Estimate)-1
           models[[i]]$dht$individuals[[est_type]]$Estimate <-
             c(ind$Estimate[1:nN]/ind$rate[1:nN],
@@ -75,6 +80,11 @@ bootit <- function(bootdat, models, our_resamples, summary_fun,
             models[[i]]$dht$clusters[[est_type]]$Estimate <- cl$Estimate/
                                                              cl$rate
           }else{
+            # Need to re-order cl (incase strata names come after total)
+            label.order <- models[[i]]$dht$clusters[[est_type]]$Label
+            index <- match(label.order, cl$Label)
+            cl <- cl[index,]
+            # Now extract relevant values
             models[[i]]$dht$clusters[[est_type]]$Estimate <-
               c(cl$Estimate[1:nN]/cl$rate[1:nN],
                 sum(cl$Estimate[1:nN]/cl$rate[1:nN]))

diff --git a/R/bootdht_resample_data.R b/R/bootdht_resample_data.R
@@ -18,6 +18,11 @@ bootdht_resample_data <- function(bootdat, our_resamples,
 
   # get all samples per stratum
   samps_per_strata <- unique(bf[, c(stratum_label, sample_label)])
+  # Check that the sampler names are unique across strata
+  # The number of unique sampler names should be the same as the rows in samps_per_strata
+  if(sample_label %in% our_resamples && !nrow(samps_per_strata) == length(unique(samps_per_strata[[sample_label]]))){
+    stop("Cannot bootstrap on samplers within strata as sampler ID values are not unique across strata. Please ensure all Sample.Label values are unique.", call. = FALSE)
+  }
   samps_per_strata <- by(bf[,c(stratum_label, sample_label)],
                          bf[[stratum_label]],
                          function(x) unique(x[[sample_label]]))
@@ -62,10 +67,19 @@ bootdht_resample_data <- function(bootdat, our_resamples,
 
   # concatenate list elements to data.frame
   rr <- do.call(rbind.data.frame, obs)
+
+  # Check if there is a distance column otherwise look for distbegin
+  if("distance" %in% names(rr)){
+   dist_col <- "distance"
+  }else if("distbegin" %in% names(rr)){
+   dist_col <- "distbegin"
+  }else{
+   stop("No distance nor distbegin column in the bootstrap dataset.", call. = FALSE)
+  }
 
   # reset the object IDs to be unique (where there are observations)
-  rr[[obs_label]][is.na(rr[["distance"]])] <- NA
-  rr[[obs_label]][!is.na(rr[["distance"]])] <- 1:length(rr[[obs_label]][!is.na(rr[["distance"]])])
+  rr[[obs_label]][is.na(rr[[dist_col]])] <- NA
+  rr[[obs_label]][!is.na(rr[[dist_col]])] <- 1:length(rr[[obs_label]][!is.na(rr[[dist_col]])])
 
   return(rr)
 }
diff --git a/R/checkdata.R b/R/checkdata.R
@@ -25,10 +25,16 @@ checkdata <- function(data, region.table=NULL, sample.table=NULL,
     }
   }
 
+
+  # Make sure that the user has only specified either distance or distend / distbegin (need to do this check first as then Distance creates the distance column)
+  if(!is.null(data$distance) && !is.null(data$distbegin) && !is.null(data$distend)){
+    stop("You can only specify either a 'distance' column or 'distbegin' and 'distend' columns in your data.", call. = FALSE)
+  }
+
   # make sure that the data are in the right format first
   if(is.null(data$distance)){
     if(is.null(data$distend) & is.null(data$distbegin)){
-      stop("Your data must (at least) have a column called 'distance' or 'distbegin' and 'distend'!")
+      stop("Your data must (at least) have a column called 'distance' or 'distbegin' and 'distend'!", call. = FALSE)
     }else{
       data$distance <- (data$distend + data$distbegin)/2
     }

diff --git a/R/dht2.R b/R/dht2.R
@@ -281,7 +281,7 @@ dht2 <- function(ddf, observations=NULL, transects=NULL, geo_strat=NULL,
   }
 
 
-  # what are the stratum labels specicied in strat_formula?
+  # what are the stratum labels specified in strat_formula?
   stratum_labels <- attr(terms(strat_formula), "term.labels")
 
   # TODO: currently break if >1 stratum is defined

diff --git a/R/ds.R b/R/ds.R
@@ -28,16 +28,20 @@
 #' @param adjustment adjustment terms to use; `"cos"` gives cosine (default),
 #' `"herm"` gives Hermite polynomial and `"poly"` gives simple polynomial. A
 #' value of `NULL` indicates that no adjustments are to be fitted.
-#' @param nadj the number of adjustment terms to fit. The default value
-#' (`NULL`) will select via AIC (using a sequential forward selection
-#' algorithm) up to `max.adjustment` adjustments (unless `order` is specified).
-#' A non-negative integer value will cause the specified number of adjustments
-#' to be fitted. The order of adjustment terms used will depend on the `key`
-#' and `adjustment`. For `key="unif"`, adjustments of order 1, 2, 3, ... are
-#' fitted when `adjustment = "cos"` and order 2, 4, 6, ... otherwise. For
-#' `key="hn"` or `"hr"` adjustments of order 2, 3, 4, ... are fitted when
-#' `adjustment = "cos"` and order 4, 6, 8, ... otherwise. See Buckland et al.
-#' (2001, p. 47) for details.
+#' @param nadj the number of adjustment terms to fit. In the absence of 
+#' covariates in the formula, the default value (`NULL`) will select via AIC 
+#' (using a sequential forward selection algorithm) up to `max.adjustment` 
+#' adjustments (unless `order` is specified). When covariates are present 
+#' in the model formula, the default value of `NULL` results in no adjustment 
+#' terms being fitted in the model. A non-negative integer value will cause 
+#' the specified number of adjustments to be fitted. Supplying an integer 
+#' value will allow the use of adjustment terms in addition to specifying 
+#' covariates in the model. The order of adjustment terms used will depend 
+#' on the `key`and `adjustment`. For `key="unif"`, adjustments of order 
+#' 1, 2, 3, ... are fitted when `adjustment = "cos"` and order 2, 4, 6, ... 
+#' otherwise. For `key="hn"` or `"hr"` adjustments of order 2, 3, 4, ... are 
+#' fitted when `adjustment = "cos"` and order 4, 6, 8, ... otherwise. See 
+#' Buckland et al. (2001, p. 47) for details.
 #' @param order order of adjustment terms to fit. The default value (`NULL`)
 #' results in `ds` choosing the orders to use - see `nadj`. Otherwise a scalar
 #' positive integer value can be used to fit a single adjustment term of the
@@ -50,11 +54,15 @@
 #' key is uniform only `"width"` will be used. The other option is `"scale"`:
 #' the scale parameter of the detection
 #' @param cutpoints if the data are binned, this vector gives the cutpoints of
-#' the bins. Ensure that the first element is 0 (or the left truncation
+#' the bins. Supplying a distance column in your data and specifying cutpoints
+#' is the recommended approach for all standard binned analyses.
+#' Ensure that the first element is 0 (or the left truncation
 #' distance) and the last is the distance to the end of the furthest bin.
-#' (Default `NULL`, no binning.) Note that if `data` has columns `distbegin`
-#' and `distend` then these will be used as bins if `cutpoints` is not
-#' specified. If both are specified, `cutpoints` has precedence.
+#' (Default `NULL`, no binning.) If you have provided `distbegin` and `distend`
+#' columns in your data (note this should only be used when your cutpoints 
+#' are not constant across all your data, e.g. planes flying at differing 
+#' altitudes) then do not specify the cutpoints argument as this will cause
+#' the `distbegin` and `distend` columns in your data to be overwritten. 
 #' @param monotonicity should the detection function be constrained for
 #' monotonicity weakly (`"weak"`), strictly (`"strict"`) or not at all
 #' (`"none"` or `FALSE`). See Monotonicity, below. (Default `"strict"`). By
@@ -85,8 +93,9 @@
 #' "Units", below. (Defaults to 1, implying all of the units are "correct"
 #' already.)
 #' @param er_var encounter rate variance estimator to use when abundance
-#' estimates are required. Defaults to "R2" for line transects and "P3" for
-#' point transects. See [`dht2`][dht2] for more information and if more
+#' estimates are required. Defaults to "R2" for line transects and "P2" for
+#' point transects (>= 1.0.9, earlier versions <= 1.0.8 used the "P3" estimator 
+#' by default for points). See [`dht2`][dht2] for more information and if more
 #' complex options are required.
 #' @param method optimization method to use (any method usable by
 #' [`optim`][stats::optim] or [`optimx`][optimx::optimx]). Defaults to
@@ -96,7 +105,7 @@
 #' @param quiet suppress non-essential messages (useful for bootstraps etc).
 #' Default value `FALSE`.
 #' @param initial_values a `list` of named starting values, see
-#' [`mrds-opt`][mrds::mrds-opt]. Only allowed when AIC term selection is not
+#' [`mrds_opt`][mrds::mrds_opt]. Only allowed when AIC term selection is not
 #' used.
 #' @param max_adjustments maximum number of adjustments to try (default 5) only
 #' used when `order=NULL`.
@@ -147,7 +156,7 @@
 #' <http://examples.distancesampling.org/>.
 #'
 #' Hints and tips on fitting (particularly optimisation issues) are on the
-#' [`mrds-opt`][mrds::mrds-opt] manual page.
+#' [`mrds_opt`][mrds::mrds_opt] manual page.
 #'
 #' @section Clusters/groups:
 #'  Note that if the data contains a column named `size`, cluster size will be
@@ -324,7 +333,7 @@ ds <- function(data, truncation=ifelse(is.null(cutpoints),
              cutpoints=NULL, dht_group=FALSE,
              monotonicity=ifelse(formula==~1, "strict", "none"),
              region_table=NULL, sample_table=NULL, obs_table=NULL,
-             convert_units=1, er_var=ifelse(transect=="line", "R2", "P3"),
+             convert_units=1, er_var=ifelse(transect=="line", "R2", "P2"),
              method="nlminb", quiet=FALSE, debug_level=0,
              initial_values=NULL, max_adjustments=5, er_method=2, dht_se=TRUE,
              optimizer = "both",

diff --git a/man/bootdht.Rd b/man/bootdht.Rd
diff --git a/man/ds.Rd b/man/ds.Rd