library(scales) # percent ggplot scale
library(rstatix) # pipe-ready R functions
library(ggpubr) # Some ggplot based plots
library(lmerTest)
library(patchwork)
library(MASS) # for boxplot
library(ARTool)
library(emmeans)
library(ez)
library(tidyverse)
library(svglite)
source("./device-setup.R")
H_GRAPH_MARGIN = 10
BASE_WIDTH = (FULL_WIDTH - H_GRAPH_MARGIN) / 4
BASE_HEIGHT = (FULL_WIDTH - H_GRAPH_MARGIN) / 4
normalCheck <- function(model) {
res = residuals(model)
qqnorm((res - mean(res)) / sd(res))
abline(0, 1)
print (shapiro.test(res))
}
line_graph <- function(completed_trials, measure, min_value=NA, max_value=NA) {
graph_data <- completed_trials |>
rename(value = measure) |>
group_by(participant, accuracy, keytime, suggestions_type) |>
summarize(
p_value = mean(value),
.groups="drop"
) |>
group_by(accuracy, keytime, suggestions_type) |>
summarize(
error_value = t_error(p_value),
mean_value = mean(p_value),
ci_min = if_else(is.na(min_value), mean_value - error_value, max(min_value, mean_value - error_value)),
ci_max = if_else(is.na(max_value), mean_value + error_value, min(max_value, mean_value + error_value)),
.groups="drop"
)
ggplot(graph_data, aes(
x = accuracy |> factor_to_numeric(),
y = mean_value,
ymin = ci_min,
ymax = ci_max,
color=keytime |> factor_to_numeric(),
group=keytime |> factor_to_numeric()
)) +
scale_x_continuous(breaks=ACCURACY_LEVELS) +
scale_color_continuous(breaks=keytime_LEVELS) +
geom_line() +
geom_pointrange()
}
measured_runs <- read_device_runs(measured_only=TRUE)
measured_trials <- read_device_trials(measured_only=TRUE)
p_data_usage <- measured_trials |>
group_by(participant, device, accuracy, accuracy_numeric) |>
summarize(mean_suggestion_use = mean(total_suggestion_used),
.groups = "drop")
summary_usage <- p_data_usage |>
group_by(accuracy_numeric, device) |>
summarize(
error_suggestion_used = t_error(mean_suggestion_use),
mean_suggestion_used = mean(mean_suggestion_use),
min_suggestion_used = max(0, mean_suggestion_used - error_suggestion_used),
max_suggestion_used = mean_suggestion_used + error_suggestion_used,
.groups = "drop"
)
summary_usage
measured_trials |>
group_by(device, participant) |>
summarize(mean_suggestion_use = mean(total_suggestion_used), .groups="drop_last") |>
summarize(mean_suggestion_used = mean(mean_suggestion_use), .groups = "drop")
measured_trials |>
group_by(accuracy, participant) |>
summarize(mean_suggestion_use = mean(total_suggestion_used), .groups="drop_last") |>
summarize(mean_suggestion_used = mean(mean_suggestion_use), .groups = "drop")
pd <- position_dodge(0.025)
suggestion_usage_plot <- ggplot(
summary_usage,
aes(
x = accuracy_numeric,
y = mean_suggestion_used,
ymin = min_suggestion_used,
ymax = max_suggestion_used,
color = device,
group = device
)
) +
expand_limits(y = c(0)) +
custom_line(position = pd) +
SCALE_X_ACCURACY +
scale_color_manual("Device", values = DEVICE_COLORS) +
scale_y_continuous("Suggestion Usage per Trial", breaks = seq(0, 6, 1)) +
custom_pointrange(position = pd) +
theme(legend.position = "none",
axis.title.x = element_blank())
ggsave(
graph_path("suggestions-usage-devices.pdf"),
units = "mm",
width = BASE_WIDTH,
height = BASE_HEIGHT,
device = cairo_pdf
)
suggestion_usage_plot
p_data_usage |>
group_by(accuracy, device) |>
shapiro_test(mean_suggestion_use)
ggqqplot(p_data_usage, "mean_suggestion_use", ggtheme = theme_bw()) +
facet_grid(accuracy ~ device)
Not normal.
# Comes from https://stackoverflow.com/a/34002020.
# We need to shift it because we have non positive values...
bc <-
boxcox((mean_suggestion_use + 1e-10) ~ device * accuracy, data =
p_data_usage)
lambda <- bc$x[which.max(bc$y)]
powerTransform <-
function(y,
lambda1,
lambda2 = NULL,
method = "boxcox") {
boxcoxTrans <- function(x, lam1, lam2 = NULL) {
# if we set lambda2 to zero, it becomes the one parameter transformation
lam2 <- ifelse(is.null(lam2), 0, lam2)
if (lam1 == 0L) {
log(y + lam2)
} else {
(((y + lam2) ^ lam1) - 1) / lam1
}
}
switch(method,
boxcox = boxcoxTrans(y, lambda1, lambda2),
tukey = y ^ lambda1)
}
p_data_usage <- p_data_usage |>
mutate(transformed_suggestion_use = powerTransform(mean_suggestion_use, lambda))
ggqqplot(p_data_usage, "transformed_suggestion_use", ggtheme = theme_bw()) +
facet_grid(accuracy ~ device)
Boxcox won’t work. ART it is.
m_usage = art(mean_suggestion_use ~ device * accuracy + (1|participant),
data = p_data_usage)
anova(m_usage)
emmeans(artlm(m_usage, "device"), pairwise ~ device)
NOTE: Results may be misleading due to involvement in interactions
$emmeans
device emmean SE df lower.CL upper.CL
desktop 24.4 3.85 74.9 16.7 32.1
tablet 63.4 3.85 74.9 55.7 71.1
phone 75.7 3.85 74.9 68.0 83.3
Results are averaged over the levels of: accuracy
Degrees-of-freedom method: kenward-roger
Confidence level used: 0.95
$contrasts
contrast estimate SE df t.ratio p.value
desktop - tablet -39.0 4.22 66 -9.248 <.0001
desktop - phone -51.2 4.22 66 -12.153 <.0001
tablet - phone -12.2 4.22 66 -2.905 0.0137
Results are averaged over the levels of: accuracy
Degrees-of-freedom method: kenward-roger
P value adjustment: tukey method for comparing a family of 3 estimates
emmeans(artlm(m_usage, "accuracy"), pairwise ~ accuracy)
NOTE: Results may be misleading due to involvement in interactions
$emmeans
accuracy emmean SE df lower.CL upper.CL
0.1 29.0 4.87 33 19.1 38.9
0.5 50.2 4.87 33 40.3 60.1
0.9 84.3 4.87 33 74.4 94.2
Results are averaged over the levels of: device
Degrees-of-freedom method: kenward-roger
Confidence level used: 0.95
$contrasts
contrast estimate SE df t.ratio p.value
accuracy0.1 - accuracy0.5 -21.1 6.89 33 -3.068 0.0116
accuracy0.1 - accuracy0.9 -55.3 6.89 33 -8.022 <.0001
accuracy0.5 - accuracy0.9 -34.1 6.89 33 -4.954 0.0001
Results are averaged over the levels of: device
Degrees-of-freedom method: kenward-roger
P value adjustment: tukey method for comparing a family of 3 estimates
Pairwise comparisons are not reliable with ART. Instead, we perform interaction contrasts. See http://depts.washington.edu/acelab/proj/art/index.html.
contrast(
emmeans(
artlm(m_usage, "device:accuracy"),
~ device:accuracy
),
method = "pairwise",
interaction = TRUE
)
device_pairwise accuracy_pairwise estimate SE df t.ratio p.value
desktop - tablet 0.1 - 0.5 23.33 11.6 66 2.015 0.0479
desktop - phone 0.1 - 0.5 36.25 11.6 66 3.131 0.0026
tablet - phone 0.1 - 0.5 12.92 11.6 66 1.116 0.2686
desktop - tablet 0.1 - 0.9 87.00 11.6 66 7.515 <.0001
desktop - phone 0.1 - 0.9 104.17 11.6 66 8.998 <.0001
tablet - phone 0.1 - 0.9 17.17 11.6 66 1.483 0.1429
desktop - tablet 0.5 - 0.9 63.67 11.6 66 5.499 <.0001
desktop - phone 0.5 - 0.9 67.92 11.6 66 5.866 <.0001
tablet - phone 0.5 - 0.9 4.25 11.6 66 0.367 0.7147
Degrees-of-freedom method: kenward-roger
p_data_sks <- measured_trials |>
group_by(participant, accuracy, accuracy_numeric, device) |>
summarize(
p_mean_sks = mean(actual_sks),
.groups="drop"
)
summary_sks <- p_data_sks |>
group_by(accuracy, accuracy_numeric, device) |>
summarize(
error_sks = t_error(p_mean_sks),
mean_sks = mean(p_mean_sks),
min_sks = mean_sks - error_sks,
max_sks = mean_sks + error_sks,
.groups="drop"
)
theoretical_sks <- measured_trials |>
group_by(participant, accuracy_numeric, device) |>
summarize(p_mean_sks = mean(theoretical_sks), .groups="drop") |>
group_by(accuracy_numeric, device) |>
summarize(p_mean_sks = mean(p_mean_sks), .groups="drop") |>
group_by(accuracy_numeric) |>
summarize(
error_sks = t_error(p_mean_sks),
mean_sks = mean(p_mean_sks),
min_sks = mean_sks - error_sks,
max_sks = mean_sks + error_sks,
.groups="drop"
)
pd <- position_dodge(0.025)
ggplot(summary_sks, aes(
x = accuracy_numeric,
y = mean_sks,
ymin = min_sks,
ymax = max_sks,
color = device,
group = device
)) +
geom_line(data=theoretical_sks, color = "red", group = "red") +
geom_pointrange(data=theoretical_sks, color="red", group="red") +
scale_x_continuous(breaks=c(0, 0.1, 0.3, 0.5, 0.7, 0.9, 1)) +
geom_line(position=pd) +
geom_pointrange(position=pd) +
labs(x="Accuracy", y="Saved Keystrokes", color="Device")
p_data_ks <- measured_trials |>
group_by(participant, accuracy, device, accuracy_numeric) |>
summarize(
p_mean_actual_ks = mean(actual_key_saving_no_editing),
p_mean_theoretical_ks = mean(theoretical_key_saving),
.groups="drop"
) |>
group_by(accuracy, device) |>
mutate(
is_outlier = is_outlier(p_mean_actual_ks)
) |>
ungroup()
actual_ks_summary <- p_data_ks |>
group_by(accuracy_numeric, device) |>
summarize(
ks_type = "actual",
error_ks = t_error(p_mean_actual_ks),
mean_ks = mean(p_mean_actual_ks),
min_ks = max(0, mean_ks - error_ks),
max_ks = min(1, mean_ks + error_ks),
.groups="drop"
)
theoretical_ks_summary <- p_data_ks |>
group_by(accuracy_numeric) |>
summarize(
ks_type = "theoretical",
device = NA,
error_ks = t_error(p_mean_theoretical_ks),
mean_ks = mean(p_mean_theoretical_ks),
min_ks = max(0, mean_ks - error_ks),
max_ks = min(1, mean_ks + error_ks),
.groups="drop"
)
ks_summary <- union_all(theoretical_ks_summary, actual_ks_summary)
ks_summary
measured_trials |>
group_by(device, participant) |>
summarize(m_ks = mean(actual_key_saving_no_editing), .groups="drop_last") |>
summarize(mean_ks = mean(m_ks), .groups = "drop")
measured_trials |>
group_by(accuracy, participant) |>
summarize(m_ks = mean(actual_key_saving_no_editing), .groups="drop_last") |>
summarize(mean_ks = mean(m_ks), .groups = "drop")
p_data_ks <- measured_trials |>
group_by(participant, accuracy, device, accuracy_numeric) |>
summarize(
p_mean_actual_ks = mean(actual_key_saving_no_editing),
p_mean_theoretical_ks = mean(theoretical_key_saving),
.groups="drop"
) |>
group_by(accuracy, device) |>
mutate(
is_outlier = is_outlier(p_mean_actual_ks)
) |>
ungroup()
actual_ks_summary <- p_data_ks |>
group_by(accuracy_numeric, device) |>
summarize(
error_ks = t_error(p_mean_actual_ks),
mean_ks = mean(p_mean_actual_ks),
min_ks = max(0, mean_ks - error_ks),
max_ks = min(1, mean_ks + error_ks),
ks_type = "actual",
.groups="drop"
)
theoretical_ks_summary <- p_data_ks |>
group_by(accuracy_numeric) |>
summarize(
device=NA,
error_ks = t_error(p_mean_theoretical_ks),
mean_ks = mean(p_mean_theoretical_ks),
min_ks = max(0, mean_ks - error_ks),
max_ks = min(1, mean_ks + error_ks),
ks_type = "theoretical",
.groups="drop"
)
ks_summary <- union_all(theoretical_ks_summary, actual_ks_summary)
pd <- position_dodge(0.025)
key_stroke_saving_plot <- ggplot(ks_summary, aes(
x = accuracy_numeric,
y = mean_ks,
ymin = min_ks,
ymax = max_ks,
color = device,
group = device
)) +
custom_line(
data=theoretical_ks_summary,
color=THEORETICAL_COLOR,
group="theoretical"
) +
custom_pointrange(
data=theoretical_ks_summary,
color=THEORETICAL_COLOR,
group="theoretical",
shape=17
) +
scale_color_manual("Device", values = DEVICE_COLORS) +
custom_line(data=actual_ks_summary, position=pd) +
custom_pointrange(data=actual_ks_summary, position=pd) +
SCALE_X_ACCURACY +
scale_y_continuous("Keystroke Saving", limits = c(0, 1), labels = percent) +
theme(legend.position = "none",
axis.title.x = element_blank())
ggsave(
graph_path("keystroke-saving-devices.pdf"),
units = "mm",
# Manually add 2 mm because axis labels are longer.
width = BASE_WIDTH + 2,
height = BASE_HEIGHT,
device = cairo_pdf
)
key_stroke_saving_plot
p_data_ks |>
group_by(accuracy, device) |>
shapiro_test(p_mean_actual_ks)
ggqqplot(p_data_ks, "p_mean_actual_ks", ggtheme = theme_bw()) +
facet_grid(accuracy ~ device)
Not normal.
# Comes from https://stackoverflow.com/a/34002020.
# We need to shift it because we have non positive values...
bc <- boxcox((p_mean_actual_ks+1e-10)~device*accuracy, data=p_data_ks)
lambda <- bc$x[which.max(bc$y)]
powerTransform <-
function(y,
lambda1,
lambda2 = NULL,
method = "boxcox") {
boxcoxTrans <- function(x, lam1, lam2 = NULL) {
# if we set lambda2 to zero, it becomes the one parameter transformation
lam2 <- ifelse(is.null(lam2), 0, lam2)
if (lam1 == 0L) {
log(y + lam2)
} else {
(((y + lam2) ^ lam1) - 1) / lam1
}
}
switch(method,
boxcox = boxcoxTrans(y, lambda1, lambda2),
tukey = y ^ lambda1)
}
p_data_ks <- p_data_ks |> mutate(transformed_ks = powerTransform(p_mean_actual_ks, lambda))
ggqqplot(p_data_ks, "transformed_ks", ggtheme = theme_bw()) +
facet_grid(accuracy ~ device)
Boxcox won’t work. ART it is.
m_ks = art(p_mean_actual_ks ~ device * accuracy + (1|participant),
data = p_data_ks)
anova(m_ks)
emmeans(artlm(m_ks, "device"), pairwise ~ device)
NOTE: Results may be misleading due to involvement in interactions
$emmeans
device emmean SE df lower.CL upper.CL
desktop 24.2 3.84 86.4 16.5 31.8
tablet 64.3 3.84 86.4 56.7 71.9
phone 75.0 3.84 86.4 67.4 82.7
Results are averaged over the levels of: accuracy
Degrees-of-freedom method: kenward-roger
Confidence level used: 0.95
$contrasts
contrast estimate SE df t.ratio p.value
desktop - tablet -40.1 4.65 66 -8.641 <.0001
desktop - phone -50.9 4.65 66 -10.949 <.0001
tablet - phone -10.7 4.65 66 -2.308 0.0616
Results are averaged over the levels of: accuracy
Degrees-of-freedom method: kenward-roger
P value adjustment: tukey method for comparing a family of 3 estimates
emmeans(artlm(m_ks, "accuracy"), pairwise ~ accuracy)
NOTE: Results may be misleading due to involvement in interactions
$emmeans
accuracy emmean SE df lower.CL upper.CL
0.1 26.8 4.25 33 18.1 35.4
0.5 50.3 4.25 33 41.7 59.0
0.9 86.4 4.25 33 77.7 95.0
Results are averaged over the levels of: device
Degrees-of-freedom method: kenward-roger
Confidence level used: 0.95
$contrasts
contrast estimate SE df t.ratio p.value
accuracy0.1 - accuracy0.5 -23.6 6.01 33 -3.916 0.0012
accuracy0.1 - accuracy0.9 -59.6 6.01 33 -9.911 <.0001
accuracy0.5 - accuracy0.9 -36.1 6.01 33 -5.994 <.0001
Results are averaged over the levels of: device
Degrees-of-freedom method: kenward-roger
P value adjustment: tukey method for comparing a family of 3 estimates
Pairwise comparisons are not reliable with ART. Instead, we perform interaction contrasts. See http://depts.washington.edu/acelab/proj/art/index.html.
contrast(emmeans(artlm(m_ks, "device:accuracy"), ~ device:accuracy), method="pairwise", interaction=TRUE)
device_pairwise accuracy_pairwise estimate SE df t.ratio p.value
desktop - tablet 0.1 - 0.5 16.17 11.8 66 1.367 0.1762
desktop - phone 0.1 - 0.5 28.58 11.8 66 2.417 0.0184
tablet - phone 0.1 - 0.5 12.42 11.8 66 1.050 0.2975
desktop - tablet 0.1 - 0.9 93.92 11.8 66 7.943 <.0001
desktop - phone 0.1 - 0.9 113.08 11.8 66 9.564 <.0001
tablet - phone 0.1 - 0.9 19.17 11.8 66 1.621 0.1098
desktop - tablet 0.5 - 0.9 77.75 11.8 66 6.576 <.0001
desktop - phone 0.5 - 0.9 84.50 11.8 66 7.147 <.0001
tablet - phone 0.5 - 0.9 6.75 11.8 66 0.571 0.5700
Degrees-of-freedom method: kenward-roger
p_data_tsku <- measured_trials |>
group_by(participant, accuracy_numeric, accuracy, device) |>
summarize(
p_mean_ksr = mean((total_final_suggestion_chars / total_chars) / theoretical_key_saving),
.groups="drop"
)
actual_tsku_data <- p_data_tsku |>
group_by(accuracy_numeric, accuracy, device) |>
summarize(
error_ksr = t_error(p_mean_ksr),
mean_ksr = mean(p_mean_ksr),
min_ksr = max(0, mean_ksr - error_ksr),
max_ksr = min(1, mean_ksr + error_ksr),
.groups="drop"
)
actual_tsku_data
measured_trials |>
group_by(device, participant) |>
summarize(p_mean_ksr = mean((total_final_suggestion_chars / total_chars) / theoretical_key_saving), .groups="drop_last") |>
summarize(mean_ksr = mean(p_mean_ksr), .groups = "drop")
measured_trials |>
group_by(accuracy, participant) |>
summarize(p_mean_ksr = mean((total_final_suggestion_chars / total_chars) / theoretical_key_saving), .groups="drop_last") |>
summarize(mean_ksr = mean(p_mean_ksr), .groups = "drop")
theoretical_tsku_data <- tibble(
accuracy_numeric = ACCURACY_LEVELS_NUM,
mean_ksr = 1.0,
min_ksr = 1.0,
max_ksr = 1.0
)
pd <- position_dodge(0.025)
keystroke_saving_ratio_plot <- ggplot(actual_tsku_data, aes(
x = accuracy_numeric,
y = mean_ksr,
ymin = min_ksr,
ymax = max_ksr,
color = device,
group = device
)) +
custom_line(
data=theoretical_tsku_data,
color=THEORETICAL_COLOR,
group="theoretical"
) +
custom_pointrange(
data=theoretical_tsku_data,
color=THEORETICAL_COLOR,
group="theoretical",
shape=17
) +
custom_line(position=pd) +
custom_pointrange(position=pd) +
SCALE_X_ACCURACY +
xlab("") +
scale_color_manual("Device", values = DEVICE_COLORS) +
scale_y_continuous("Keystroke Saving Ratio", limits = c(0, 1), labels = percent)+
theme(legend.position = "none",
axis.title.x = element_blank())
ggsave(
graph_path("keystroke-saving-ratio-devices.pdf"),
units = "mm",
# Manually add 2 mm because axis labels are longer.
width = BASE_WIDTH + 2,
height = BASE_HEIGHT,
device = cairo_pdf
)
keystroke_saving_ratio_plot
m_ksr = art(p_mean_ksr ~ device * accuracy + (1|participant),
data = p_data_tsku)
anova(m_ks)
emmeans(artlm(m_ksr, "device"), pairwise ~ device)
NOTE: Results may be misleading due to involvement in interactions
$emmeans
device emmean SE df lower.CL upper.CL
desktop 25.0 3.94 83.5 17.1 32.8
tablet 63.7 3.94 83.5 55.8 71.5
phone 74.9 3.94 83.5 67.0 82.7
Results are averaged over the levels of: accuracy
Degrees-of-freedom method: kenward-roger
Confidence level used: 0.95
$contrasts
contrast estimate SE df t.ratio p.value
desktop - tablet -38.7 4.65 66 -8.324 <.0001
desktop - phone -49.9 4.65 66 -10.732 <.0001
tablet - phone -11.2 4.65 66 -2.408 0.0488
Results are averaged over the levels of: accuracy
Degrees-of-freedom method: kenward-roger
P value adjustment: tukey method for comparing a family of 3 estimates
emmeans(artlm(m_ksr, "accuracy"), pairwise ~ accuracy)
NOTE: Results may be misleading due to involvement in interactions
$emmeans
accuracy emmean SE df lower.CL upper.CL
0.1 39.4 5.29 33 28.7 50.2
0.5 41.1 5.29 33 30.3 51.9
0.9 83.0 5.29 33 72.2 93.7
Results are averaged over the levels of: device
Degrees-of-freedom method: kenward-roger
Confidence level used: 0.95
$contrasts
contrast estimate SE df t.ratio p.value
accuracy0.1 - accuracy0.5 -1.64 7.49 33 -0.219 0.9739
accuracy0.1 - accuracy0.9 -43.53 7.49 33 -5.815 <.0001
accuracy0.5 - accuracy0.9 -41.89 7.49 33 -5.596 <.0001
Results are averaged over the levels of: device
Degrees-of-freedom method: kenward-roger
P value adjustment: tukey method for comparing a family of 3 estimates
actual_graph_data <- measured_trials |>
group_by(participant, accuracy_numeric, device) |>
summarize(
p_value = mean(duration),
.groups="drop"
) |>
group_by(accuracy_numeric, device) |>
summarize(
error_value = t_error(p_value),
mean_value = mean(p_value),
min_value = max(0, mean_value - error_value),
max_value = mean_value + error_value,
.groups="drop"
)
pd <- position_dodge(0.025)
ggplot(actual_graph_data, aes(
x = accuracy_numeric,
y = mean_value,
ymin = min_value,
ymax = max_value,
color = device,
group = device
)) +
scale_x_continuous(breaks=c(0, 0.1, 0.3, 0.5, 0.7, 0.9, 1)) +
scale_color_manual("Device", values = DEVICE_COLORS) +
expand_limits(y = c(0)) +
geom_line(position=pd) +
geom_pointrange(position=pd) +
labs(x="Suggestions Accuracy", y="Trial Duration (seconds)", color="Key Stroke Delay")
p_data_ts <- measured_trials |>
group_by(participant, accuracy, device, accuracy_numeric) |>
summarize(speed = mean(cps) / 5 * 60, .groups = "drop")
summary_ts <- p_data_ts |>
group_by(accuracy_numeric, device) |>
summarize(
error_value = t_error(speed),
mean_value = mean(speed),
min_value = max(0, mean_value - error_value),
max_value = mean_value + error_value,
.groups = "drop"
)
# Also calculating this here because we want the same max between natural entry speed and entry
# speed.
summary_nts <- measured_runs |>
filter(!is.na(run_start_date)) |>
mutate(accuracy_numeric = accuracy |> as.character() |> as.numeric()) |>
group_by(accuracy_numeric, device) |>
summarize(
error_value = t_error(avg_wpm),
mean_value = mean(avg_wpm),
min_value = max(0, mean_value - error_value),
max_value = mean_value + error_value,
.groups = "drop"
)
max_wpm = max(summary_ts$max_value, summary_nts$max_value)
summary_ts
measured_trials |>
group_by(device, participant) |>
summarize(m_es = mean(cps) / 5 * 60, .groups = "drop_last") |>
summarize(mean_wpm = mean(m_es), .groups = "drop")
measured_trials |>
group_by(accuracy, participant) |>
summarize(m_es = mean(cps) / 5 * 60, .groups = "drop_last") |>
summarize(mean_wpm = mean(m_es), .groups = "drop")
pd <- position_dodge(0.025)
entry_speed_plot <- ggplot(summary_ts, aes(
x = accuracy_numeric,
y = mean_value,
ymin = min_value,
ymax = max_value,
color = device,
group = device
)) +
SCALE_X_ACCURACY +
expand_limits(y = c(0, max_wpm)) +
scale_y_continuous(
breaks=seq(0, max_wpm + 20, 20),
labels=paste(seq(0, max_wpm + 20, 20), "wpm")
) +
scale_color_manual("Device", values = DEVICE_COLORS) +
custom_line(position=pd) +
custom_pointrange(position=pd) +
labs(y="Entry Speed") +
guides(color = guide_legend(override.aes = list(linetype = 0))) +
theme(legend.position = "none",
axis.title.x = element_blank())
ggsave(
graph_path("entry-speed-devices.pdf"),
units = "mm",
# Manually add 2 mm because axis labels are even longer.
width = BASE_WIDTH + 3,
height = BASE_HEIGHT,
device = cairo_pdf
)
entry_speed_plot
m_ts = art(speed ~ device * accuracy + (1|participant),
data = p_data_ts)
anova(m_ts)
emmeans(artlm(m_ts, "device"), pairwise ~ device)
NOTE: Results may be misleading due to involvement in interactions
$emmeans
device emmean SE df lower.CL upper.CL
desktop 89.2 3.31 84.7 82.6 95.8
tablet 40.8 3.31 84.7 34.2 47.4
phone 33.5 3.31 84.7 27.0 40.1
Results are averaged over the levels of: accuracy
Degrees-of-freedom method: kenward-roger
Confidence level used: 0.95
$contrasts
contrast estimate SE df t.ratio p.value
desktop - tablet 48.42 3.94 66 12.291 <.0001
desktop - phone 55.67 3.94 66 14.131 <.0001
tablet - phone 7.25 3.94 66 1.840 0.1645
Results are averaged over the levels of: accuracy
Degrees-of-freedom method: kenward-roger
P value adjustment: tukey method for comparing a family of 3 estimates
emmeans(artlm(m_ts, "accuracy"), pairwise ~ accuracy)
NOTE: Results may be misleading due to involvement in interactions
$emmeans
accuracy emmean SE df lower.CL upper.CL
0.1 37.8 5.95 33 25.7 49.9
0.5 53.1 5.95 33 40.9 65.2
0.9 72.6 5.95 33 60.5 84.7
Results are averaged over the levels of: device
Degrees-of-freedom method: kenward-roger
Confidence level used: 0.95
$contrasts
contrast estimate SE df t.ratio p.value
accuracy0.1 - accuracy0.5 -15.2 8.42 33 -1.812 0.1814
accuracy0.1 - accuracy0.9 -34.8 8.42 33 -4.139 0.0006
accuracy0.5 - accuracy0.9 -19.6 8.42 33 -2.327 0.0659
Results are averaged over the levels of: device
Degrees-of-freedom method: kenward-roger
P value adjustment: tukey method for comparing a family of 3 estimates
This is the entry speed without suggestions.
ggplot(measured_runs, aes(x = avg_wpm, fill=device)) +
geom_histogram(binwidth = 5) +
facet_grid(rows = "device")
# summary_nts is calculated in the cell above.
pd <- position_dodge(0.025)
ggplot(summary_nts, aes(
x = accuracy_numeric,
y = mean_value,
ymin = min_value,
ymax = max_value,
color = device,
group = device
)) +
SCALE_X_ACCURACY +
expand_limits(y = c(0, max_wpm)) +
scale_y_continuous(
breaks=seq(0, max_wpm + 20, 20),
labels=paste(seq(0, max_wpm + 20, 20), "wpm")
) +
custom_line(position=pd) +
custom_pointrange(position=pd) +
labs(y="Natural Entry Speed") +
guides(color = guide_legend(override.aes = list(linetype = 0)))
Ideally, we would want all three lines to be constant.
summary_nts |>
group_by(device) |>
summarize(
.groups = "drop",
mean_speed = mean(mean_value),
sd_speed = sd(mean_value),
diff = max(mean_value) - min(min(mean_value))
)
ezANOVA(
measured_runs,
avg_wpm,
wid = c("participant"),
within = c("device")
)
$ANOVA
Effect DFn DFd F p p<.05 ges
2 device 2 70 157.1841 1.294993e-26 * 0.7217376
$`Mauchly's Test for Sphericity`
Effect W p p<.05
2 device 0.2995354 1.257817e-09 *
$`Sphericity Corrections`
Effect GGe p[GG] p[GG]<.05 HFe p[HF] p[HF]<.05
2 device 0.5880746 1.217766e-16 * 0.596345 7.671883e-17 *
pairwise.t.test(measured_runs$avg_wpm, measured_runs$device, paired = T, p.adjust = "bonf")
Pairwise comparisons using paired t tests
data: measured_runs$avg_wpm and measured_runs$device
desktop tablet
tablet 2.2e-14 -
phone 2.2e-14 0.00072
P value adjustment method: bonferroni
kses_data <-
left_join(p_data_ks,
measured_runs,
by = c("participant", "device", "accuracy")) |>
select(participant,
device,
accuracy,
avg_wpm,
p_mean_actual_ks) |>
mutate(
acc_label = paste0("accuracy ", accuracy),
accuracy_numeric = as.numeric(accuracy),
device = device |> recode_factor(laptop = "desktop")
)
h_line_data <-
tibble(acc = c(0.1, 0.5, 0.9), x = 110) |> mutate(acc_label = paste("accuracy", acc))
dp1 <- ggplot(kses_data, aes(x = avg_wpm, y = p_mean_actual_ks)) +
geom_point(aes(color = device, shape = device)) +
facet_grid(cols = vars(acc_label)) +
geom_text(
aes(y = acc, x = x),
color = "#404040",
label = "max",
data = h_line_data,
size = 2,
hjust = 0,
vjust = -0.75,
family = "Linux Libertine"
) +
geom_hline(aes(yintercept = acc), size = 0.25, data = h_line_data) +
labs(
x = "Natural Entry Speed (wpm)",
y = "Keystroke Saving",
color = "Device",
shape = "Device"
) +
scale_color_manual(values = DEVICE_COLORS) +
expand_limits(x = 0) +
scale_y_continuous(limits = c(0, 1), labels = percent)
ggsave(
graph_path("keystroke-saving-vs-entry-speed.pdf"),
plot = dp1,
units = "mm",
# Manually add 2 mm because axis labels are even longer.
width = FULL_WIDTH,
height = BASE_HEIGHT,
device=cairo_pdf
)
dp1
m <-
lmer(
p_mean_actual_ks ~ avg_wpm + accuracy_numeric +
(1 + avg_wpm | participant) + (1 + avg_wpm | device),
data = kses_data
)
boundary (singular) fit: see help('isSingular')
qqnorm(residuals(m))
qqline(residuals(m))
shapiro.test(residuals(m))
Shapiro-Wilk normality test
data: residuals(m)
W = 0.91902, p-value = 6.038e-06
Residuals from Linear Mixed Model are not normally distributed. Simple Pearson correlations will do.
for (x in c(0.1,0.5, 0.9)) {
d <- kses_data |> filter(accuracy == x)
message(paste0("------ A = ", x, " ------"))
print(cor.test(d$avg_wpm, d$p_mean_actual_ks, method="kendall"))
}
------ A = 0.1 ------
Kendall's rank correlation tau
data: d$avg_wpm and d$p_mean_actual_ks
z = -2.6829, p-value = 0.007298
alternative hypothesis: true tau is not equal to 0
sample estimates:
tau
-0.3255828
------ A = 0.5 ------
Kendall's rank correlation tau
data: d$avg_wpm and d$p_mean_actual_ks
z = -5.2076, p-value = 1.913e-07
alternative hypothesis: true tau is not equal to 0
sample estimates:
tau
-0.6170425
------ A = 0.9 ------
Kendall's rank correlation tau
data: d$avg_wpm and d$p_mean_actual_ks
z = -4.8098, p-value = 1.511e-06
alternative hypothesis: true tau is not equal to 0
sample estimates:
tau
-0.5616563
ksnes_data <-
left_join(p_data_tsku,
measured_runs,
by = c("participant", "device", "accuracy")) |>
select(participant, device, accuracy, avg_wpm, p_mean_ksr) |>
mutate(
acc_label = paste("accuracy", accuracy),
device = device |> recode_factor(laptop = "desktop")
)
ggplot(ksnes_data,
aes(
color = device,
shape = device,
x = avg_wpm,
y = p_mean_ksr
)) +
geom_point() +
facet_grid(cols = vars(acc_label)) +
labs(
x = "Natural Entry Speed (wpm)",
y = "Keystroke Saving Ratio",
color = "Device",
shape = "Device"
) +
expand_limits(x = 0) +
scale_color_manual(values = DEVICE_COLORS) +
scale_y_continuous(limits = c(0, 1), labels = percent)
ggsave(
graph_path("keystroke-saving-ratio-vs-entry-speed.pdf"),
units = "mm",
# Manually add 2 mm because axis labels are even longer.
width = FULL_WIDTH,
height = BASE_HEIGHT,
device=cairo_pdf
)
for (a in c(0.1, 0.5, 0.9)){
d <- ksnes_data |> filter(accuracy == a)
message(paste0("------ A = ", a, " ------"))
print(cor.test(d$avg_wpm, d$p_mean_ksr, method = "kendall"))
}
------ A = 0.1 ------
Kendall's rank correlation tau
data: d$avg_wpm and d$p_mean_ksr
z = -2.6413, p-value = 0.00826
alternative hypothesis: true tau is not equal to 0
sample estimates:
tau
-0.3207862
------ A = 0.5 ------
Kendall's rank correlation tau
data: d$avg_wpm and d$p_mean_ksr
z = -5.1802, p-value = 2.217e-07
alternative hypothesis: true tau is not equal to 0
sample estimates:
tau
-0.6137949
------ A = 0.9 ------
Kendall's rank correlation tau
data: d$avg_wpm and d$p_mean_ksr
z = -4.7826, p-value = 1.731e-06
alternative hypothesis: true tau is not equal to 0
sample estimates:
tau
-0.5584741
p_data_ts <- measured_trials |>
group_by(participant, accuracy, device) |>
summarize(speed = mean(cps) / 5 * 60, .groups = "drop") |>
left_join(
measured_runs |> select(participant, accuracy, device, avg_wpm),
by = c("participant", "device", "accuracy")
) |>
mutate(
acc_label = paste0("accuracy ", accuracy),
improvement = speed - avg_wpm,
device = recode_factor(device, laptop = "desktop")
)
dp2 <- ggplot(p_data_ts,
aes(
color = device,
shape = device,
x = avg_wpm,
y = improvement
)) +
facet_grid(cols = vars(acc_label)) +
geom_point() +
labs(
y = "Entry Speed Improvement (wpm)",
x = "Natural Entry Speed (wpm)",
color = "Device",
shape = "Device"
) +
expand_limits(x = 0) +
scale_color_manual(values = DEVICE_COLORS) +
theme(legend.position = "bottom")
# scale_y_continuous(
# breaks = seq(-90, 90, 30),
# labels = str_pad(as.character(seq(-90, 90, 30)), 6, side = "left", pad = " ")
# ) +
ggsave(
graph_path("entry-speed-improvement.pdf"),
plot = dp2,
width = FULL_WIDTH*0.6,
height = BASE_HEIGHT*3*0.6,
units = "mm",
device = cairo_pdf
)
dp2
# Export dp1 and dp2 together
entry_speed_comparisons_combined_plot <- dp1 / dp2 + plot_layout(guides = 'collect')
ggsave(
graph_path("entry-speed-comparisons-combined.pdf"),
plot = entry_speed_comparisons_combined_plot,
units = "mm",
# Manually add 2 mm because axis labels are even longer.
width = FULL_WIDTH,
height = (BASE_HEIGHT + 10) * 2,
device = cairo_pdf
)
for (a in c(0.1, 0.5, 0.9)){
d <- p_data_ts |> filter(accuracy == a)
message(paste0("------ A = ", a, " ------"))
print(cor.test(d$avg_wpm, d$improvement, method="kendall"))
}
------ A = 0.1 ------
Kendall's rank correlation tau
data: d$avg_wpm and d$improvement
T = 200, p-value = 0.001491
alternative hypothesis: true tau is not equal to 0
sample estimates:
tau
-0.3650794
------ A = 0.5 ------
Kendall's rank correlation tau
data: d$avg_wpm and d$improvement
T = 259, p-value = 0.1312
alternative hypothesis: true tau is not equal to 0
sample estimates:
tau
-0.1777778
------ A = 0.9 ------
Kendall's rank correlation tau
data: d$avg_wpm and d$improvement
T = 157, p-value = 7.3e-06
alternative hypothesis: true tau is not equal to 0
sample estimates:
tau
-0.5015873
p_data_suggestion_delay <- measured_trials |>
group_by(participant, accuracy, device, accuracy_numeric) |>
summarize(
suggestion_delay = mean(avg_suggestion_delay) * 1000,
# For ezANOVA.
suggestion_delay_num = as.numeric(suggestion_delay),
.groups = "drop"
)
summary_suggestion_delay <- p_data_suggestion_delay |>
group_by(accuracy_numeric, device) |>
summarize(
error_value = t_error(suggestion_delay),
mean_value = mean(suggestion_delay),
min_value = max(150, mean_value - error_value),
max_value = mean_value + error_value,
.groups = "drop"
)
pd <- position_dodge(0.025)
ggplot(
summary_suggestion_delay,
aes(
x = accuracy_numeric,
y = mean_value,
ymin = min_value,
ymax = max_value,
color = device,
group = device
)
) +
SCALE_X_ACCURACY +
expand_limits(y = c(0)) +
custom_line(position = pd) +
custom_pointrange(position = pd) +
labs(y = "Suggestion delay (ms)") +
guides(color = guide_legend(override.aes = list(linetype = 0)))
m_delay = art(suggestion_delay_num ~ device * accuracy + (1|participant),
data = p_data_suggestion_delay)
anova(m_ks, type = 3)
emmeans(artlm(m_delay, "device"), pairwise ~ device)
NOTE: Results may be misleading due to involvement in interactions
$emmeans
device emmean SE df lower.CL upper.CL
desktop 60.0 4.86 95.3 50.4 69.7
tablet 46.7 4.86 95.3 37.0 56.3
phone 56.8 4.86 95.3 47.2 66.5
Results are averaged over the levels of: accuracy
Degrees-of-freedom method: kenward-roger
Confidence level used: 0.95
$contrasts
contrast estimate SE df t.ratio p.value
desktop - tablet 13.36 6.38 66 2.094 0.0988
desktop - phone 3.22 6.38 66 0.505 0.8691
tablet - phone -10.14 6.38 66 -1.589 0.2574
Results are averaged over the levels of: accuracy
Degrees-of-freedom method: kenward-roger
P value adjustment: tukey method for comparing a family of 3 estimates
emmeans(artlm(m_delay, "accuracy"), pairwise ~ accuracy)
NOTE: Results may be misleading due to involvement in interactions
$emmeans
accuracy emmean SE df lower.CL upper.CL
0.1 50.8 5.35 33 39.9 61.7
0.5 63.4 5.35 33 52.6 74.3
0.9 49.2 5.35 33 38.4 60.1
Results are averaged over the levels of: device
Degrees-of-freedom method: kenward-roger
Confidence level used: 0.95
$contrasts
contrast estimate SE df t.ratio p.value
accuracy0.1 - accuracy0.5 -12.64 7.57 33 -1.670 0.2317
accuracy0.1 - accuracy0.9 1.56 7.57 33 0.206 0.9770
accuracy0.5 - accuracy0.9 14.19 7.57 33 1.876 0.1617
Results are averaged over the levels of: device
Degrees-of-freedom method: kenward-roger
P value adjustment: tukey method for comparing a family of 3 estimates
Pairwise comparisons are not reliable with ART. Instead, we perform interaction contrasts. See http://depts.washington.edu/acelab/proj/art/index.html.
contrast(emmeans(artlm(m_delay, "device:accuracy"), ~ device:accuracy), method="pairwise", interaction=TRUE)
device_pairwise accuracy_pairwise estimate SE df t.ratio p.value
desktop - tablet 0.1 - 0.5 -17.42 15.7 66 -1.106 0.2726
desktop - phone 0.1 - 0.5 9.33 15.7 66 0.593 0.5553
tablet - phone 0.1 - 0.5 26.75 15.7 66 1.699 0.0940
desktop - tablet 0.1 - 0.9 16.00 15.7 66 1.016 0.3132
desktop - phone 0.1 - 0.9 4.58 15.7 66 0.291 0.7719
tablet - phone 0.1 - 0.9 -11.42 15.7 66 -0.725 0.4709
desktop - tablet 0.5 - 0.9 33.42 15.7 66 2.123 0.0375
desktop - phone 0.5 - 0.9 -4.75 15.7 66 -0.302 0.7638
tablet - phone 0.5 - 0.9 -38.17 15.7 66 -2.424 0.0181
Degrees-of-freedom method: kenward-roger
measured_trials |>
select(participant, device, accuracy, trial_id, phrase) |>
mutate(letter = strsplit("abcdefghijklmnopqrstuvwxyz", "")) |>
unnest(letter) |>
mutate(n_letter = str_count(phrase, letter)) |>
group_by(letter) |>
summarize(n_letter = sum(n_letter), .groups="drop") |>
mutate(f = n_letter / sum(n_letter)) |>
arrange(desc(f))
measured_trials |>
select(participant, device, accuracy, trial_id, phrase) |>
mutate(letter = strsplit("abcdefghijklmnopqrstuvwxyz", "")) |>
unnest(letter) |>
mutate(n_letter = str_count(phrase, letter)) |>
group_by(letter, accuracy, device) |>
summarize(n_letter = sum(n_letter), .groups="drop") |>
group_by(accuracy, device) |>
mutate(f = n_letter / sum(n_letter)) |>
arrange(desc(f))
We investigate learning in term of speed.
p_data_ks <- measured_trials |>
group_by(participant, accuracy, device, accuracy_numeric, trial_number) |>
summarize(
p_mean_actual_ks = mean(actual_key_saving_no_editing),
p_mean_theoretical_ks = mean(theoretical_key_saving),
.groups="drop"
) |>
group_by(accuracy, device, trial_number) |>
mutate(
is_outlier = is_outlier(p_mean_actual_ks)
) |>
ungroup()
actual_ks_summary <- p_data_ks |>
group_by(accuracy_numeric, device, trial_number) |>
summarize(
error_ks = t_error(p_mean_actual_ks),
mean_ks = mean(p_mean_actual_ks),
min_ks = max(0, mean_ks - error_ks),
max_ks = min(1, mean_ks + error_ks),
ks_type = "actual",
.groups="drop"
)
theoretical_ks_summary <- p_data_ks |>
group_by(accuracy_numeric, trial_number) |>
summarize(
device = NA,
error_ks = t_error(p_mean_theoretical_ks),
mean_ks = mean(p_mean_theoretical_ks),
min_ks = max(0, mean_ks - error_ks),
max_ks = min(1, mean_ks + error_ks),
ks_type = "theoretical",
.groups="drop"
)
ks_summary <- union_all(theoretical_ks_summary, actual_ks_summary)
pd <- position_dodge(0.2)
ggplot(ks_summary, aes(
x = trial_number,
y = mean_ks,
ymin = min_ks,
ymax = max_ks,
color = device,
group = device
)) +
facet_wrap(vars(accuracy_numeric)) +
custom_line(
data=theoretical_ks_summary,
color=THEORETICAL_COLOR,
group="theoretical"
) +
custom_pointrange(
data=theoretical_ks_summary,
color=THEORETICAL_COLOR,
group="theoretical",
shape=17
) +
scale_color_manual("Device", values = DEVICE_COLORS) +
custom_line(data=actual_ks_summary, position=pd) +
custom_pointrange(data=actual_ks_summary, position=pd) +
scale_y_continuous("Keystroke Saving", limits = c(0, 1), labels = percent) +
theme(legend.position = "bottom")
ggsave(
graph_path("learning-keystroke-saving-devices.pdf"),
units = "mm",
# Manually add 2 mm because axis labels are longer.
width = 150,
height = 75,
device = cairo_pdf
)
theme_margin <-
theme(plot.margin = margin(r = 2, b = 2, unit = "mm"))
objective_combined <- (suggestion_usage_plot + theme_margin) +
entry_speed_plot +
key_stroke_saving_plot + keystroke_saving_ratio_plot
ggsave(
plot = objective_combined,
graph_path("objectives-multi.pdf"),
units = "mm",
width = FULL_WIDTH,
height = FULL_WIDTH / GOLDEN_RATIO,
device = cairo_pdf
)
objective_combined