chp7-handout.pdf
chp7.pdf
Chapter 7, Inference for Numerical Data (from Paired Data), Chapter 8, Intro to Regression
library(googlesheets4)
url <- "https://docs.google.com/spreadsheets/d/1Le_A8n8LUNnPUQvdawnMw1ULmdLiuLaV-XPABvZFIGg"
exam <- read_sheet(url)
exam %>% group_by(section) %>% summarise(n=length(total), mean=mean(total), sd=sd(total))
exam %>% select(c(section,total)) %>% drop_na() %>% group_by(section) %>% summarise(n=length(total), mean=mean(total), sd=sd(total))
# are the scores of the two classes significantly different?
# - calc se
# - for CI, calc critical value
# - for hypothesis test set up hypothesis
library(openintro)
data(package="openintro")
summary(classData)
head(classData)
head(table(classData))
aggregate(classData$lecture, list(classData$lecture), length)
aggregate(classData$m1, list(classData$lecture), mean)
aggregate(classData$m1, list(classData$lecture), sd)
aggregate(m1 ~ lecture, data=classData, length)
aggregate(m1 ~ lecture, data=classData, mean)
aggregate(m1 ~ lecture, data=classData, sd)
library(dplyr)
summarise(classData, mean=mean(m1))
group_by(classData, lecture) %>% summarise(n=length(m1), mean=mean(m1), sd=sd(m1))
#note the British spelling
group_by(classData, lecture) %>% summarize(n=length(m1), mean=mean(m1), sd=sd(m1))
# explicitly show classData as source to the pipeline
classData %>% group_by(lecture) %>% summarize(n=length(m1), mean=mean(m1), sd=sd(m1))
plot(m1 ~ lecture, data=classData)
(classAov <- aov(m1 ~ lecture, data=classData))
anova(classAov)
(classLM <- lm(m1 ~ lecture, data=classData))
anova(classLM)
# anova() is mainly a specialized summary()
summary(classAov) # same as anova(classAov)
# but
summary(classLM)
names(classAov)
class(classAov)
names(classLM)
class(classLM)
classAov$fitted.values # aov like regression: class means are predicted
hist(class_aov$residuals)
# Bonferoni correction
pairwise.t.test(classData$m1, classData$lecture, p.adj = "bonf")
# Tukey correction (post-hoc)
TukeyHSD(class_aov)
plot(TukeyHSD(class_aov))
group_df <- anova(class_aov)["lecture","Df"]
res_df <- anova(class_aov)["Residuals", "Df"]
anova(aov(price ~ cut + carat + color+ clarity, data=diamonds))
plot(price ~ cut + carat + color+ clarity, data=diamonds)