Model Answers: Computational Testing
10.8 Problem 1
The data is:
<- c(263.9, 266.2, 266.3, 266.8, 265.0) x
Now construct some summary statistics and define some given parameters:
<- mean(x) # compute sample mean
x_bar <- 1.65 # population standard deviation is given
sigma <- 260 # population mean to be tested against
mu <- length(x) # number of samples n
Construct the z-statistic:
<- (x_bar - mu) / (sigma / sqrt(n))
z print(z)
## [1] 7.643287
Check if the z-statistic is in the critical range. First, work out what the z-value at the edge of the critical region is:
<- qnorm(1 - 0.01, mean = 0, sd = 1)
z_threshold print(z_threshold)
## [1] 2.326348
Thus, the z-statistic is much greater than the threshold and there is evidence to suggest the cartons are overfilled.
10.9 Problem 2
Parameters given by the problem:
<- 103.11
x_bar <- 53.5
s <- 100
mu <- 45 n
Compute the z-statistic assuming large sample assumptions apply:
<- ( x_bar - mu )/(s/sqrt(n))
z print(z)
## [1] 0.3899535
Now, work out the thresholds of the critical regions:
<- qnorm(1 - 0.025, mean = 0, sd = 1)
z_upper print(z_upper)
## [1] 1.959964
<- qnorm(0.025, mean = 0, sd = 1)
z_lower print(z_lower)
## [1] -1.959964
The z-statistic is outside the critical regions and therefore we do not reject the null hypothesis.
10.10 Problem 3
<- function(x, mu, popvar){
z_test
<- NULL
one_tail_p
<- round((mean(x) - mu) / (popvar / sqrt(length(x))), 3)
z_score
<- round(pnorm(abs(z_score),lower.tail = FALSE), 3)
one_tail_p
cat(" z =", z_score, "\n",
"one-tailed probability =", one_tail_p, "\n",
"two-tailed probability =", 2 * one_tail_p)
return(list(z = z_score, one_p = one_tail_p, two_p = 2 * one_tail_p))
}
<- rnorm(10, mean = 0, sd = 1) # generate some artificial data from a N(0, 1)
x <- z_test(x, 0, 1) # null should not be rejected! out
## z = 0.387
## one-tailed probability = 0.349
## two-tailed probability = 0.698
print(out)
## $z
## [1] 0.387
##
## $one_p
## [1] 0.349
##
## $two_p
## [1] 0.698
<- rnorm(10, mean = 1, sd = 1) # generate some artificial data from a N(1, 1)
x <- z_test(x, 0, 1) # null should be rejected! out
## z = 2.836
## one-tailed probability = 0.002
## two-tailed probability = 0.004
print(out)
## $z
## [1] 2.836
##
## $one_p
## [1] 0.002
##
## $two_p
## [1] 0.004
10.11 Problem 4
Define some parameters
<- 5.4
mu <- 5
n <- 5.64
x_bar <- 0.05 s2
Compute the t-statistic:
<- (x_bar - mu) / sqrt(s2 / n)
t print(t)
## [1] 2.4
Work out the thresholds of the critical regions:
<- qt(1 - 0.025, df = n - 1)
t_upper print(t_upper)
## [1] 2.776445
<- qt(0.025, df = n - 1)
t_lower print(t_lower)
## [1] -2.776445
The t-statistic is outside of the critical regions so we do not reject the null hypothesis.
10.12 Problem 5
Define the parameters:
<- 88
x_bar_a <- 4.5
s2_a <- 72
n_a <- 79
x_bar_b <- 4.2
s2_b <- 48
n_b <- 0
mu_a <- 0 mu_b
Compute the z-statistic:
<- ((x_bar_a - x_bar_b) - (mu_a - mu_b)) / sqrt(s2_a / n_a + s2_b / n_b)
z print(z)
## [1] 23.2379
Work out for the 5% significance level, the critical values:
<- qnorm(1 - 0.05, mean = 0, sd = 1)
z_upper print(z_upper)
## [1] 1.644854
There is evidence to support the claim that process \(A\) yields higher pressurisation.
10.13 Problem 6
# Data vectors
<- c(91.50, 94.18, 92.18, 95.39, 91.79, 89.07, 94.72, 89.21)
x_A <- c(89.19, 90.95, 90.46, 93.21, 97.19, 97.04, 91.07, 92.75)
x_B
# parameters based on data
<- mean(x_A)
x_bar_A <- var(x_A)
s2_A <- length(x_A)
n_A <- mean(x_B)
x_bar_B <- var(x_B)
s2_B <- length(x_B) n_B
Compute the pooled variance estimator:
<- ((n_A - 1) * s2_A + (n_B - 1) * s2_B) / (n_A + n_B - 2)
s2_p print(s2_p)
## [1] 7.294654
Compute the t-statistic:
= ( x_bar_A - x_bar_B ) / sqrt( s2_p*(1/n_A + 1/n_B) )
t print(t)
## [1] -0.3535909
Work out the critical values:
<- qt(1 - 0.025, df = n_A + n_B - 2)
t_upper print(t_upper)
## [1] 2.144787
<- qt(0.025, df = n_A + n_B - 2)
t_lower print(t_lower)
## [1] -2.144787
Since \(|t|<2.14\) we have no evidence to reject the null hypothesis that the mean yields are equal.
Now, let us use the built-in t.test
command:
<- t.test(x = x_A, y = x_B, paired = FALSE, var.equal = TRUE,
out conf.level = 0.95, mu = 0, alternative = "two.sided")
print(out)
##
## Two Sample t-test
##
## data: x_A and x_B
## t = -0.35359, df = 14, p-value = 0.7289
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -3.373886 2.418886
## sample estimates:
## mean of x mean of y
## 92.2550 92.7325
The options paired=FALSE
means this is an unpaired t-test, var.equal=TRUE
forces the estimated variances to be the same (i.e. we are using a pooled variance estimator) and we are testing at 95% confidence level with an alternative hypothesis that the true difference in means is non-zero.
The p-value for the t-test is between 0 and 1. In this case, the value is around 0.72 which means the hypothesis should not be reject.
10.14 Problem 7
Define parameters:
<- c(23, 36, 31)
x <- c(1 / 3, 1 / 3, 1 / 3)
p <- sum(x)
n <- length(x) K
Compute the expected counts:
= p*n Ex
Compute the chi-squared statistic:
<- sum((x - Ex)^2 / Ex)
chi2 print(chi2)
## [1] 2.866667
Compute the critical value form the chi-squared distribution:
<- qchisq(1 - 0.05, df = K-1)
chi_upper print(chi_upper)
## [1] 5.991465
Thus there is no evidence to reject the null hypothesis. The data provides no reason to suggest a preference for a particular door.
Now, we could have done this in R
:
<- chisq.test(x, p = c(1 / 3, 1 / 3, 1 / 3))
out print(out)
##
## Chi-squared test for given probabilities
##
## data: x
## X-squared = 2.8667, df = 2, p-value = 0.2385
10.15 Problem 8
<- c( 0, 1, 2 )
y <- c( 32, 12, 6 ) x
You will need the vcdExtra
package to use the expand.dft
command:
install.packages("vcdExtra")
The expand.dft
command allows one to convert the frequency table into a vector of samples:
library(vcdExtra)
<- expand.dft(data.frame(y,Frequency = x), freq = "Frequency") samples
## Warning in type.convert.default(as.character(DF[[i]]), ...): 'as.is' should be
## specified by the caller; using TRUE
Now we can use the fitdistr
function in the MASS
package to estimate the MLE of the Poisson distribution
# loading the MASS package
library(MASS)
# fitting a Poisson distribution using maximum-likelihood
<- fitdistr(samples$y, densfun = 'Poisson') lambda_hat
Let just solve this directly using R built in function. First compute the expected probabilities under the Poisson distribution using dpois
to compute the Poisson pdf:
<- c(0, 0, 0)
pr 1] <- dpois(0, lambda = lambda_hat$estimate)
pr[2] <- dpois(1, lambda = lambda_hat$estimate)
pr[3] <- 1 - sum(pr[1:2]) pr[
Then apply chisq.test
:
<- chisq.test(x, p = pr) out
## Warning in chisq.test(x, p = pr): Chi-squared approximation may be incorrect
print(out)
##
## Chi-squared test for given probabilities
##
## data: x
## X-squared = 1.3447, df = 2, p-value = 0.5105
Actually, in this case the answer is wrong(!), we need to apply an additional loss of degree of freedom to account for the use of the MLE. However, we can re-use values already computed by chisq.test
:
<- out$statistic
chi2 print(chi2)
## X-squared
## 1.34466
<- qchisq(1 - 0.01, df = 1)
chi2_lower print(chi2_lower)
## [1] 6.634897
Hence, there is no evidence to reject the null hypothesis. There is no reason to suppose that the Poisson distribution is not a plausible model for the number of accidents per week at this junction.