Code
<- function (n_tanks = 84, n_obs = 5, n_reps = 100, fixedest = 87)
tanks
{<- as.matrix(rep(n_tanks, n_reps))
temp <- apply(temp, 1, sample, size = n_obs)
temp <- t(apply(temp, 2, tanks.ests, fixedest))
temp
temp }
The function tanks simulates n_reps battles with n_obs serial numbers recorded for each battle. The argument n_tanks is the number of tanks that the Germans had. The argument fixedest is the expert’s best guess.
<- function (n_tanks = 84, n_obs = 5, n_reps = 100, fixedest = 87)
tanks
{<- as.matrix(rep(n_tanks, n_reps))
temp <- apply(temp, 1, sample, size = n_obs)
temp <- t(apply(temp, 2, tanks.ests, fixedest))
temp
temp }
The function tank.ests runs on a sample of observed serial numbers. Each of the vector values is the result of a student supplied estimator. These need to be changed to reflect the class’ ideas.
<- function (x = stop("Argument 'x' is missing"), fixedest = 125)
tanks.ests
{<- length(x)
n <- n %% 2 == 1
nodd <- rep(NA, 16)
ests <- mean(x)
xbar <- median(x)
xmedian <- max(x)
xn <- var(x)
xvar <- sqrt(xvar)
xstddev <- min(x)
x1 <- c(-1,1)%*%range(x)
rng <- sum(x)
xsum <- sort(x)[n-1]
xnm1
1] <- 2 * xbar
ests[2] <- 2 * xmedian
ests[3] <- xbar + xmedian
ests[4] <- sum(x[c(n-1,n)])
ests[5] <- nodd * (xmedian + x[(n+1)/2 + 1]) + !nodd * (xmedian + x[n/2 + 1])
ests[6] <- xn + xmedian
ests[7] <- xbar + 2*xstddev
ests[#
= sort(x)
y = c(NA,y[-n])
z 8] <- xn + mean(y-z, na.rm = TRUE)
ests[= c(0,y[-n])
z 9] <- xn + mean(y-z)
ests[= c(1,y[-n])
z 10] <- xn + mean(y-z)
ests[11] <- (3*xn - x1)/2
ests[12] <- (5*xn - xbar)/3
ests[13] <- 3*xmedian + rng
ests[14] <- xn
ests[15] <- xn * ((n+1)/n) - 1
ests[16] <- fixedest
ests[names(ests) <- c("2*xbar", "2*m", "xbar + m", "x[n]+x[n-1]", "m + next biggest",
"x[n] + m", "xbar + 2s", "x[n]+mean(x -lag(x))", "x[n]+mean(x -lag(x) w/ 0)",
"x[n]+mean(x -lag(x) w/ 1)", "(3x[n]-x[1])/2", "(5*x[n]-xbar)/3", "3m + range(x)",
"x[n]", "UMVUE", fixedest)
ests
#ests[1] <- xn
#ests[2] <- 2 * xbar
#ests[3] <- xn + x1
#ests[4] <- xn + rng/2
#ests[5] <- xbar + xstddev
#ests[6] <- 2 * xmedian
#ests[7] <- xn + xnm1
#ests[8] <- 2*xn - x1
#ests[9] <- xsum
#ests[10] <- xn * ((n+1)/n) - 1
#ests[11] <- fixedest
#names(ests) <- c("x[n]","2*xbar", "x[n]+x[1]", "x[n]+R/2", "xbar+s", "2*m",
# "x[n]+x[n-1]","2x[n]-x[1]","sum(x)", "UMVUE",fixedest)
#ests
}
The function tanks.descriptives computes descriptive statistics for each of the estimators in tanks.ests.
<- function (n = 84, obs = 5, reps = 100, fixedest = 87)
tanks.descriptives
{<- tanks(n, obs, reps, fixedest)
temp <- apply(temp, 2, mean)
means <- sqrt(apply(temp, 2, var))
stddevs <- apply(temp, 2, median)
medians <- means - n
bias <- bias^2 + stddevs^2
mse t(cbind(means, stddevs, medians, bias, mse))
}
The individual estimates computed for the samples from each battle can be plotted. This allows us to compare location and dispersion statistics — center and spread. tanks.plots2 is intended to be an “improved” version of tanks.plots. Both plots use traditional lattice boxplots and there is a ggplot plot as well.
### Load the lattice package
p_load(lattice)
<- function (n = 84, obs = 5, reps = 100, fixedest = 87)
tanks.plots
{<- tanks(n, obs, reps, fixedest)
temp <- attributes(temp)$dimnames[[2]]
tanknames <- dim(temp)
dims <- as.vector(t(temp))
temp <- cbind(temp, rep(1:dims[2], dims[1]))
temp bwplot(factor(temp[, 2], labels = tanknames) ~
1], xlab = "Number of Tanks",
temp[, ylab = "Estimator")
}
<- function (n = 84, obs = 5, reps = 100, fixedest = 87)
tanks.plots2
{<- tanks(n, obs, reps, fixedest)
temp <- attributes(temp)$dimnames[[2]]
tanknames <- dim(temp)
dims <- as.vector(t(temp))
temp <- cbind(temp, rep(1:dims[2], dims[1]))
temp bwplot(factor(temp[, 2], labels = tanknames) ~
1], xlab = "Number of Tanks",
temp[, ylab = "Estimator", panel = function (x ,
vref = n, ... )
y ,
{panel.bwplot(x, y)
panel.abline(v = vref, lty = 2)
vref = n)
},
}
p_load(ggplot2)
p_load(tidyverse)
<- function (n = 84, obs = 5, reps = 100, fixedest = 87)
tanks.plots3
{<- tanks(n, obs, reps, fixedest)
temp <- attributes(temp)$dimnames[[2]]
tanknames <- dim(temp)
dims <- as.vector(t(temp))
temp <- cbind(temp, rep(1:dims[2], dims[1]))
temp <- as_tibble(temp)
temp names(temp) <- c("Estimate", "Estimator")
$Estimator <- factor(temp$Estimator)
tempggplot(temp, aes(x=Estimator, y=Estimate)) +
geom_boxplot(alpha=0.7) +
stat_summary(fun=mean, geom="point", shape=20, size=5, color="red", fill="red") +
theme(legend.position="none") +
scale_fill_brewer(palette="Set1") +
geom_hline(yintercept = n, alpha = 0.5, color = "blue", lty = 1) +
coord_flip()
}
<- function (n = 84, obs = 5, reps = 100, fixedest = 87)
tanks.plots4
{<- tanks(n, obs, reps, fixedest)
temp <- melt(temp)
temp names(temp) <- c("RowID","Estimator","Estimate")
ggplot(temp, aes(x=Estimator, y=Estimate)) +
geom_boxplot(alpha=0.7) +
stat_summary(fun=mean, geom="point", shape=20, size=5, color="red", fill="red") +
theme(legend.position="none") +
scale_fill_brewer(palette="Set1") +
geom_hline(yintercept = n, alpha = 0.5, color = "blue", lty = 1) +
coord_flip()
}
The class’ estimators can be compared using the functions defined above.
### Compute descriptive stats
tanks.descriptives(n = 47, obs = 5, reps = 10000, fixedest = 50)
2*xbar 2*m xbar + m x[n]+x[n-1] m + next biggest x[n] + m
means 48.10448 48.09400 48.09924 48.15300 48.23620 64.11970
stddevs 11.68563 17.03625 13.93933 19.03241 18.61887 12.68711
medians 48.00000 48.00000 48.20000 48.00000 48.00000 65.00000
bias 1.10448 1.09400 1.09924 1.15300 1.23620 17.11970
mse 137.77391 291.43062 195.51321 363.56222 348.19067 454.04680
xbar + 2s x[n]+mean(x -lag(x)) x[n]+mean(x -lag(x) w/ 0)
means 50.569001 48.079050 48.087240
stddevs 9.112276 7.709197 7.562589
medians 51.548247 50.000000 50.400000
bias 3.569001 1.079050 1.087240
mse 95.771347 60.596068 58.374839
x[n]+mean(x -lag(x) w/ 1) (3x[n]-x[1])/2 (5*x[n]-xbar)/3 3m + range(x)
means 47.887240 56.085400 58.77042 104.16640
stddevs 7.562589 9.332868 9.33617 26.77891
medians 50.200000 58.000000 61.13333 104.00000
bias 0.887240 9.085400 11.77042 57.16640
mse 57.979943 169.646910 225.70686 3985.10711
x[n] UMVUE 50
means 40.072700 47.087240 50
stddevs 6.302157 7.562589 0
medians 42.000000 49.400000 50
bias -6.927300 0.087240 3
mse 87.704672 57.200359 9
### Plot the estimates from each of the estimators
tanks.plots(n = 47, obs = 5, reps = 10000, fixedest = 50)
### Plot the estimates from each of the estimators
tanks.plots2(n = 47, obs = 5, reps = 10000, fixedest = 50)
### Plot the estimates from each of the estimators
tanks.plots4(n = 47, obs = 5, reps = 10000, fixedest = 50)
Note that \widehat{N} = X_{(n)} \frac{n+1}{n} - 1 is UMVUE for N when the X_i are i.i.d. DU(1,N).