library(pander)
library(ez)
library(ARTool)
library(MASS) # for boxplot
library(emmeans)
library(scales) # for percent scale
library(patchwork)
source("./keytime-setup.R")
H_GRAPH_MARGIN = 10
PANEL_MARGIN = unit(4, "mm")
normalCheck <- function(model) {
res = residuals(model)
qqnorm((res - mean(res)) / sd(res))
abline(0, 1)
print (shapiro.test(res))
}
all_trials <- read_keytime_trials(measured_only = FALSE)
measured_test_trials <- read_keytime_trials(measured_only = TRUE) |>
mutate(
suggestions_type = suggestions_type |> fct_relevel(BAR_SUGGESTION_TYPE, INLINE_SUGGESTION_TYPE),
suggestions_label = fct_recode(
suggestions_type,
`Suggestion Bar (Exp. 2)` = BAR_SUGGESTION_TYPE,
`Inline Suggestions (Exp. 3)` = INLINE_SUGGESTION_TYPE
)
)
measured_runs <- read_keytime_runs(measured_only = TRUE)
all_trials |>
filter(!is_practice & is_run_measured) |>
count(suggestions_type, is_trial_measured, was_trial_blurred)
p_data_usage <- measured_test_trials |>
group_by(
participant,
accuracy,
keytime,
accuracy_numeric,
keytime_numeric,
suggestions_type
) |>
summarize(mean_suggestion_use = mean(total_suggestion_used),
.groups = "drop")
plot_reliance <- function(filtered_p_data_usage) {
summary_usage <- filtered_p_data_usage |>
group_by(accuracy_numeric, keytime_numeric) |>
summarize(
err_mean_tibble(mean_suggestion_use, min_min = 0),
.groups = "drop"
)
pd <- position_dodge(0.025)
ggplot(
data = summary_usage,
aes(
x = accuracy_numeric,
y = data_mean,
ymin = data_min,
ymax = data_max,
color = keytime_numeric,
group = keytime_numeric
)
) +
SCALE_COLOR_KEY_STROKE +
SCALE_X_ACCURACY +
scale_y_continuous("Suggestions Use per Trials", breaks = seq(0, 6, 1)) +
expand_limits(y = c(0)) +
custom_line(position = pd) +
custom_pointrange(position = pd) +
theme(legend.position = "none",
panel.spacing = PANEL_MARGIN)
}
p_data_usage |>
group_by(suggestions_type) |>
summarize(
sd_suggestion_use = sd(mean_suggestion_use),
mean_suggestion_use = mean(mean_suggestion_use),
.groups = "drop"
)
p_data_usage |>
group_by(suggestions_type, accuracy, keytime) |>
summarize(
sd_suggestion_use = sd(mean_suggestion_use),
mean_suggestion_use = mean(mean_suggestion_use),
.groups = "drop"
)
p_data_usage |>
group_by(suggestions_type, keytime) |>
summarize(
sd_suggestion_use = sd(mean_suggestion_use),
mean_suggestion_use = mean(mean_suggestion_use),
.groups = "drop"
)
p_data_usage |>
group_by(suggestions_type, accuracy) |>
summarize(
sd_suggestion_use = sd(mean_suggestion_use),
mean_suggestion_use = mean(mean_suggestion_use),
.groups = "drop"
)
p_data_usage_inline <-
p_data_usage |> filter(suggestions_type == INLINE_SUGGESTION_TYPE)
inline_reliance_plot <- plot_reliance(p_data_usage_inline)
ggsave(
graph_path("suggestions-usage-inline.pdf"),
plot = inline_reliance_plot,
units = "mm",
width = (FULL_WIDTH - H_GRAPH_MARGIN) / 4 - 2,
height = (FULL_WIDTH - H_GRAPH_MARGIN) / 4,
device = cairo_pdf
)
inline_reliance_plot
ggplot(p_data_usage_inline, aes(x = mean_suggestion_use)) +
geom_histogram(binwidth = .5) +
facet_grid(rows=vars(accuracy), cols = vars(keytime))
model_usage <- ezANOVA(
data = measured_test_trials |> filter(suggestions_type == INLINE_SUGGESTION_TYPE),
wid = c("participant"),
dv = total_suggestion_used,
between = c("keytime", "accuracy"),
return_aov = TRUE
)
Warning: You have removed one or more Ss from the analysis. Refactoring "participant" for ANOVA.
Warning: Data is unbalanced (unequal N per group). Make sure you specified a well-considered value for the type argument to ezANOVA().
Warning: Collapsing data to cell means. *IF* the requested effects are a subset of the full design, you must use the "within_full"
argument, else results may be inaccurate.
Coefficient covariances computed by hccm()
qqnorm(model_usage$aov$residuals);qqline(model_usage$aov$residuals)
model_usage
$ANOVA
Effect DFn DFd F p p<.05 ges
1 keytime 3 587 109.90919 1.753218e-56 * 0.3596789
2 accuracy 4 587 327.74936 5.303566e-148 * 0.6907267
3 keytime:accuracy 12 587 11.29028 1.614947e-20 * 0.1875246
$`Levene's Test for Homogeneity of Variance`
DFn DFd SSn SSd F p p<.05
1 19 587 118.6575 274.5834 13.35074 6.059843e-35 *
$aov
Call:
aov(formula = formula(aov_formula), data = data)
Terms:
keytime accuracy keytime:accuracy Residuals
Sum of Squares 374.7243 1438.9419 148.7057 644.2872
Deg. of Freedom 3 4 12 587
Residual standard error: 1.047661
Estimated effects may be unbalanced
art_m_usage_inline = art(mean_suggestion_use ~ keytime * accuracy,
data=p_data_usage_inline)
anova(art_m_usage_inline, type = 2)
emmeans(artlm(art_m_usage_inline, "keytime"), pairwise ~ keytime)
NOTE: Results may be misleading due to involvement in interactions
$emmeans
keytime emmean SE df lower.CL upper.CL
0 171 11.2 587 149 193
50 230 11.0 587 208 251
100 358 11.2 587 336 380
200 455 11.0 587 433 477
Results are averaged over the levels of: accuracy
Confidence level used: 0.95
$contrasts
contrast estimate SE df t.ratio p.value
keytime0 - keytime50 -59.0 15.7 587 -3.759 0.0011
keytime0 - keytime100 -187.3 15.8 587 -11.875 <.0001
keytime0 - keytime200 -284.4 15.7 587 -18.141 <.0001
keytime50 - keytime100 -128.3 15.7 587 -8.174 <.0001
keytime50 - keytime200 -225.4 15.6 587 -14.449 <.0001
keytime100 - keytime200 -97.1 15.7 587 -6.196 <.0001
Results are averaged over the levels of: accuracy
P value adjustment: tukey method for comparing a family of 4 estimates
emmeans(artlm(art_m_usage_inline, "accuracy"), pairwise ~ accuracy)
NOTE: Results may be misleading due to involvement in interactions
$emmeans
accuracy emmean SE df lower.CL upper.CL
0.1 89.6 8.59 587 72.7 106
0.3 201.2 8.59 587 184.3 218
0.5 298.5 8.63 587 281.6 315
0.7 429.2 8.46 587 412.6 446
0.9 498.8 8.63 587 481.9 516
Results are averaged over the levels of: keytime
Confidence level used: 0.95
$contrasts
contrast estimate SE df t.ratio p.value
accuracy0.1 - accuracy0.3 -111.6 12.2 587 -9.183 <.0001
accuracy0.1 - accuracy0.5 -208.9 12.2 587 -17.161 <.0001
accuracy0.1 - accuracy0.7 -339.6 12.1 587 -28.159 <.0001
accuracy0.1 - accuracy0.9 -409.2 12.2 587 -33.612 <.0001
accuracy0.3 - accuracy0.5 -97.4 12.2 587 -7.997 <.0001
accuracy0.3 - accuracy0.7 -228.0 12.1 587 -18.907 <.0001
accuracy0.3 - accuracy0.9 -297.6 12.2 587 -24.448 <.0001
accuracy0.5 - accuracy0.7 -130.7 12.1 587 -10.812 <.0001
accuracy0.5 - accuracy0.9 -200.3 12.2 587 -16.417 <.0001
accuracy0.7 - accuracy0.9 -69.6 12.1 587 -5.762 <.0001
Results are averaged over the levels of: keytime
P value adjustment: tukey method for comparing a family of 5 estimates
p_data_usage_bar <- p_data_usage |>
filter(suggestions_type == BAR_SUGGESTION_TYPE)
p_data_usage_bar_common_factors <- p_data_usage_bar
bar_reliance_plot <- plot_reliance(p_data_usage_bar)
ggsave(
graph_path("suggestions-usage-bar.pdf"),
plot = bar_reliance_plot,
units = "mm",
width = (FULL_WIDTH - H_GRAPH_MARGIN) / 4 - 2,
height = (FULL_WIDTH - H_GRAPH_MARGIN) / 4,
device = cairo_pdf
)
bar_reliance_plot
ggplot(p_data_usage_bar, aes(x = mean_suggestion_use)) +
geom_histogram(binwidth = .5) +
facet_grid(rows=vars(accuracy), cols = vars(keytime))
model_usage <- ezANOVA(
data = measured_test_trials |>
filter(suggestions_type == BAR_SUGGESTION_TYPE & accuracy %in% c(0.1, 0.5, 0.9)),
wid = c("participant"),
dv = total_suggestion_used,
between = c("keytime", "accuracy"),
return_aov = TRUE
)
Warning: You have removed one or more Ss from the analysis. Refactoring "participant" for ANOVA.
Warning: You have removed one or more levels from variable "accuracy". Refactoring for ANOVA.
Warning: Collapsing data to cell means. *IF* the requested effects are a subset of the full design, you must use the "within_full"
argument, else results may be inaccurate.
Coefficient covariances computed by hccm()
qqnorm(model_usage$aov$residuals);qqline(model_usage$aov$residuals)
model_usage
$ANOVA
Effect DFn DFd F p p<.05 ges
1 keytime 3 348 60.81269 1.237671e-31 * 0.3439385
2 accuracy 2 348 297.02066 5.579782e-76 * 0.6305894
3 keytime:accuracy 6 348 14.22432 1.667218e-14 * 0.1969464
$`Levene's Test for Homogeneity of Variance`
DFn DFd SSn SSd F p p<.05
1 11 348 124.8144 250.0284 15.7929 4.657556e-25 *
$aov
Call:
aov(formula = formula(aov_formula), data = data)
Terms:
keytime accuracy keytime:accuracy Residuals
Sum of Squares 262.8508 855.8752 122.9636 501.3870
Deg. of Freedom 3 2 6 348
Residual standard error: 1.20032
Estimated effects are balanced
art_m_usage_bar = art(mean_suggestion_use ~ keytime * accuracy,
data = p_data_usage_bar_common_factors)
anova(art_m_usage_bar, type = 2)
emmeans(artlm(art_m_usage_bar, "keytime"), pairwise ~ keytime)
NOTE: Results may be misleading due to involvement in interactions
$emmeans
keytime emmean SE df lower.CL upper.CL
0 164 11.2 580 142 186
50 232 11.2 580 210 254
100 372 11.2 580 350 394
200 434 11.2 580 412 456
Results are averaged over the levels of: accuracy
Confidence level used: 0.95
$contrasts
contrast estimate SE df t.ratio p.value
keytime0 - keytime50 -68.6 15.8 580 -4.332 0.0001
keytime0 - keytime100 -207.8 15.8 580 -13.118 <.0001
keytime0 - keytime200 -270.3 15.8 580 -17.059 <.0001
keytime50 - keytime100 -139.2 15.8 580 -8.786 <.0001
keytime50 - keytime200 -201.6 15.8 580 -12.727 <.0001
keytime100 - keytime200 -62.4 15.8 580 -3.941 0.0005
Results are averaged over the levels of: accuracy
P value adjustment: tukey method for comparing a family of 4 estimates
emmeans(artlm(art_m_usage_bar, "accuracy"), pairwise ~ accuracy)
NOTE: Results may be misleading due to involvement in interactions
$emmeans
accuracy emmean SE df lower.CL upper.CL
0.1 94.2 9.95 580 74.6 114
0.3 215.8 9.95 580 196.2 235
0.5 312.5 9.95 580 293.0 332
0.7 398.7 9.95 580 379.1 418
0.9 481.4 9.95 580 461.8 501
Results are averaged over the levels of: keytime
Confidence level used: 0.95
$contrasts
contrast estimate SE df t.ratio p.value
accuracy0.1 - accuracy0.3 -121.6 14.1 580 -8.646 <.0001
accuracy0.1 - accuracy0.5 -218.4 14.1 580 -15.525 <.0001
accuracy0.1 - accuracy0.7 -304.5 14.1 580 -21.647 <.0001
accuracy0.1 - accuracy0.9 -387.2 14.1 580 -27.526 <.0001
accuracy0.3 - accuracy0.5 -96.8 14.1 580 -6.879 <.0001
accuracy0.3 - accuracy0.7 -182.9 14.1 580 -13.002 <.0001
accuracy0.3 - accuracy0.9 -265.6 14.1 580 -18.880 <.0001
accuracy0.5 - accuracy0.7 -86.1 14.1 580 -6.123 <.0001
accuracy0.5 - accuracy0.9 -168.8 14.1 580 -12.001 <.0001
accuracy0.7 - accuracy0.9 -82.7 14.1 580 -5.879 <.0001
Results are averaged over the levels of: keytime
P value adjustment: tukey method for comparing a family of 5 estimates
p_data_sks <- measured_test_trials |>
group_by(participant, accuracy, keytime, accuracy_numeric, keytime_numeric, suggestions_type) |>
summarize(
p_mean_sks = mean(actual_sks),
.groups = "drop"
)
summary_sks <- p_data_sks |>
group_by(accuracy_numeric, keytime_numeric, suggestions_type) |>
summarize(
error_sks = t_error(p_mean_sks),
mean_sks = mean(p_mean_sks),
min_sks = mean_sks - error_sks,
max_sks = mean_sks + error_sks,
.groups = "drop"
)
theoretical_sks <- measured_test_trials |>
group_by(participant, accuracy_numeric, suggestions_type) |>
summarize(p_mean_sks = mean(theoretical_sks), .groups = "drop") |>
group_by(accuracy_numeric) |>
summarize(
error_sks = t_error(p_mean_sks),
mean_sks = mean(p_mean_sks),
min_sks = mean_sks - error_sks,
max_sks = mean_sks + error_sks,
.groups = "drop"
)
pd <- position_dodge(0.025)
plot_sks <- ggplot(summary_sks, aes(
x = accuracy_numeric,
y = mean_sks,
ymin = min_sks,
ymax = max_sks,
color = keytime_numeric,
group = keytime_numeric
)) +
geom_line(data=theoretical_sks, color="red", group="red") +
geom_pointrange(data=theoretical_sks, color="red", group="red") +
scale_color_continuous(breaks=c(0, 50, 100, 200)) +
scale_x_continuous(breaks=c(0, 0.1, 0.3, 0.5, 0.7, 0.9, 1)) +
geom_line(position=pd) +
geom_pointrange(position=pd) +
facet_grid(rows=vars(suggestions_type)) +
theme(
legend.position = "none",
panel.spacing = PANEL_MARGIN,
) +
labs(x="Accuracy", y="Saved Keystrokes", color="Key Stroke Delay")
plot_sks
p_data_ks <- measured_test_trials |>
group_by(
participant,
accuracy,
keytime,
accuracy_numeric,
keytime_numeric,
suggestions_type
) |>
summarize(
p_mean_actual_ks = mean(actual_key_saving_no_editing),
p_mean_theoretical_ks = mean(theoretical_key_saving),
.groups = "drop"
) |>
group_by(accuracy, keytime) |>
mutate(is_outlier = is_outlier(p_mean_actual_ks)) |>
ungroup()
plot_keystroke_saving <- function(filtered_p_data_ks) {
actual_ks_summary <- filtered_p_data_ks |>
group_by(accuracy_numeric,
keytime_numeric) |>
summarize(
error_ks = t_error(p_mean_actual_ks),
mean_ks = mean(p_mean_actual_ks),
min_ks = max(0, mean_ks - error_ks),
max_ks = min(1, mean_ks + error_ks),
ks_type = "actual",
.groups = "drop"
)
theoretical_ks_summary <- filtered_p_data_ks |>
group_by(accuracy_numeric) |>
summarize(
error_ks = t_error(p_mean_theoretical_ks),
mean_ks = mean(p_mean_theoretical_ks),
min_ks = max(0, mean_ks - error_ks),
max_ks = min(1, mean_ks + error_ks),
ks_type = "theoretical",
keytime_numeric = NA,
.groups = "drop"
)
ks_summary <- union_all(theoretical_ks_summary, actual_ks_summary)
pd <- position_dodge(0.025)
plot_keystroke_saving <- ggplot(
ks_summary,
aes(
x = accuracy_numeric,
y = mean_ks,
ymin = min_ks,
ymax = max_ks,
color = keytime_numeric,
group = keytime_numeric
)
) +
custom_line(data = theoretical_ks_summary,
color = THEORETICAL_COLOR,
group = "theoretical") +
custom_pointrange(
data = theoretical_ks_summary,
color = THEORETICAL_COLOR,
group = "theoretical",
shape = 17
) +
custom_line(data = actual_ks_summary, position = pd) +
custom_pointrange(data = actual_ks_summary, position = pd) +
SCALE_COLOR_KEY_STROKE +
SCALE_X_ACCURACY +
scale_y_continuous("Keystroke Saving",
limits = c(0, 1),
labels = percent) +
theme(legend.position = "none",
panel.spacing = PANEL_MARGIN)
}
p_data_ks |>
group_by(suggestions_type) |>
summarize(
sd_ks = sd(p_mean_actual_ks),
mean_ks = mean(p_mean_actual_ks),
.groups = "drop"
)
p_data_ks |>
group_by(suggestions_type, accuracy, keytime) |>
summarize(
sd_ks = sd(p_mean_actual_ks),
mean_ks = mean(p_mean_actual_ks),
.groups = "drop"
)
p_data_ks |>
group_by(suggestions_type, keytime) |>
summarize(
sd_ks = sd(p_mean_actual_ks),
mean_ks = mean(p_mean_actual_ks),
.groups = "drop"
)
p_data_ks |>
group_by(suggestions_type, accuracy) |>
summarize(
sd_ks = sd(p_mean_actual_ks),
mean_ks = mean(p_mean_actual_ks),
.groups = "drop"
)
p_data_ks_inline <- p_data_ks |> filter(suggestions_type == INLINE_SUGGESTION_TYPE)
inline_ks_plot <- plot_keystroke_saving(p_data_ks_inline)
ggsave(
graph_path("keystroke-saving-inline.pdf"),
plot=inline_ks_plot,
units="mm",
width=(FULL_WIDTH - H_GRAPH_MARGIN) / 4,
height=(FULL_WIDTH - H_GRAPH_MARGIN) / 4,
device=cairo_pdf
)
inline_ks_plot
ggplot(p_data_ks_inline, aes(x = p_mean_actual_ks)) +
geom_histogram(binwidth = .05) +
facet_grid(rows=vars(accuracy), cols = vars(keytime))
m <- aov(p_mean_actual_ks ~ keytime*accuracy, data=p_data_ks_inline)
pander(normalCheck(m))
Shapiro-Wilk normality test
data: res W = 0.90538, p-value < 2.2e-16
Test statistic | P value |
---|---|
0.9054 | 6.001e-19 * * * |
remove(m)
Not normal.
boxcox((p_mean_actual_ks + 0.5)~keytime*accuracy, data=p_data_ks_inline)
lambda <- 0.5
m <- aov((p_mean_actual_ks + 0.5) ^ -0.3 ~ keytime*accuracy, data=p_data_ks_inline)
pander(normalCheck(m))
Shapiro-Wilk normality test
data: res W = 0.92199, p-value < 2.2e-16
Test statistic | P value |
---|---|
0.922 | 3.512e-17 * * * |
remove(m)
ezANOVA(
p_data_ks_inline |> mutate(final_ks = (p_mean_actual_ks + 0.5) ^ -0.3),
dv=final_ks, wid=c(participant),
between=c(keytime, accuracy),
detailed=TRUE
)
Warning: You have removed one or more Ss from the analysis. Refactoring "participant" for ANOVA.
Warning: Data is unbalanced (unequal N per group). Make sure you specified a well-considered value for the type argument to ezANOVA().
Coefficient covariances computed by hccm()
$ANOVA
Effect DFn DFd SSn SSd F p p<.05 ges
1 keytime 3 587 0.7855189 1.395994 110.100646 1.459609e-56 * 0.3600798
2 accuracy 4 587 6.0451693 1.395994 635.481539 1.191109e-211 * 0.8123957
3 keytime:accuracy 12 587 0.2848055 1.395994 9.979795 6.273666e-18 * 0.1694464
$`Levene's Test for Homogeneity of Variance`
DFn DFd SSn SSd F p p<.05
1 19 587 0.2777115 0.6856414 12.51357 1.12417e-32 *
Still not normal, and also not homogenic, switching to art
m_ks_inline = art(p_mean_actual_ks ~ keytime * accuracy, data=p_data_ks_inline)
anova(m_ks_inline, type = 2)
emmeans(artlm(m_ks_inline, "keytime"), pairwise ~ keytime)
NOTE: Results may be misleading due to involvement in interactions
$emmeans
keytime emmean SE df lower.CL upper.CL
0 170 10.8 587 149 191
50 225 10.7 587 204 246
100 357 10.8 587 335 378
200 461 10.7 587 440 482
Results are averaged over the levels of: accuracy
Confidence level used: 0.95
$contrasts
contrast estimate SE df t.ratio p.value
keytime0 - keytime50 -55.2 15.2 587 -3.632 0.0017
keytime0 - keytime100 -186.4 15.3 587 -12.217 <.0001
keytime0 - keytime200 -291.2 15.2 587 -19.197 <.0001
keytime50 - keytime100 -131.3 15.2 587 -8.644 <.0001
keytime50 - keytime200 -236.0 15.1 587 -15.637 <.0001
keytime100 - keytime200 -104.8 15.2 587 -6.907 <.0001
Results are averaged over the levels of: accuracy
P value adjustment: tukey method for comparing a family of 4 estimates
emmeans(artlm(m_ks_inline, "accuracy"), pairwise ~ accuracy)
NOTE: Results may be misleading due to involvement in interactions
$emmeans
accuracy emmean SE df lower.CL upper.CL
0.1 83.7 6.74 587 70.4 96.9
0.3 191.4 6.74 587 178.2 204.7
0.5 288.4 6.77 587 275.1 301.7
0.7 424.0 6.64 587 411.0 437.1
0.9 530.2 6.77 587 516.9 543.5
Results are averaged over the levels of: keytime
Confidence level used: 0.95
$contrasts
contrast estimate SE df t.ratio p.value
accuracy0.1 - accuracy0.3 -107.8 9.54 587 -11.300 <.0001
accuracy0.1 - accuracy0.5 -204.7 9.55 587 -21.424 <.0001
accuracy0.1 - accuracy0.7 -340.4 9.46 587 -35.962 <.0001
accuracy0.1 - accuracy0.9 -446.5 9.55 587 -46.729 <.0001
accuracy0.3 - accuracy0.5 -96.9 9.55 587 -10.146 <.0001
accuracy0.3 - accuracy0.7 -232.6 9.46 587 -24.577 <.0001
accuracy0.3 - accuracy0.9 -338.7 9.55 587 -35.452 <.0001
accuracy0.5 - accuracy0.7 -135.7 9.48 587 -14.304 <.0001
accuracy0.5 - accuracy0.9 -241.8 9.57 587 -25.255 <.0001
accuracy0.7 - accuracy0.9 -106.1 9.48 587 -11.190 <.0001
Results are averaged over the levels of: keytime
P value adjustment: tukey method for comparing a family of 5 estimates
p_data_ks_bar <- p_data_ks |> filter(suggestions_type == BAR_SUGGESTION_TYPE)
p_data_ks_bar_common_factors <- p_data_ks_bar
bar_ks_plot <- plot_keystroke_saving(p_data_ks_bar)
ggsave(
graph_path("keystroke-saving-bar.pdf"),
plot=bar_ks_plot,
units="mm",
width=(FULL_WIDTH - H_GRAPH_MARGIN) / 4,
height=(FULL_WIDTH - H_GRAPH_MARGIN) / 4,
device=cairo_pdf
)
bar_ks_plot
p_data_ks |>
group_by(suggestions_type) |>
summarize(
error = t_error(p_mean_actual_ks),
mean = mean(p_mean_actual_ks))
m_ks_both = art(p_mean_actual_ks ~ keytime * accuracy * suggestions_type, data=p_data_ks)
anova(m_ks_both, type = 2)
emmeans(artlm(m_ks_both, "suggestions_type"), pairwise ~ suggestions_type)
NOTE: Results may be misleading due to involvement in interactions
$emmeans
suggestions_type emmean SE df lower.CL upper.CL
BAR 525 13.7 1167 499 552
INLINE 681 13.6 1167 655 708
Results are averaged over the levels of: keytime, accuracy
Confidence level used: 0.95
$contrasts
contrast estimate SE df t.ratio p.value
BAR - INLINE -156 19.3 1167 -8.066 <.0001
Results are averaged over the levels of: keytime, accuracy
ggplot(p_data_ks_bar, aes(x = p_mean_actual_ks)) +
geom_histogram(binwidth = .05) +
facet_grid(rows=vars(accuracy), cols = vars(keytime))
m <- aov(p_mean_actual_ks ~ keytime*accuracy, data=p_data_ks_bar_common_factors)
pander(normalCheck(m))
Shapiro-Wilk normality test
data: res W = 0.89279, p-value < 2.2e-16
Test statistic | P value |
---|---|
0.8928 | 5.069e-20 * * * |
remove(m)
Not normal.
C.f. http://faculty.washington.edu/wobbrock/pubs/chi-11.06.pdf
m_ks_bar = art(p_mean_actual_ks ~ keytime * accuracy, data=p_data_ks_bar_common_factors)
anova(m_ks_bar, type = 2)
emmeans(artlm(m_ks_bar, "keytime"), pairwise ~ keytime)
NOTE: Results may be misleading due to involvement in interactions
$emmeans
keytime emmean SE df lower.CL upper.CL
0 160 11.1 580 138 182
50 242 11.1 580 220 264
100 369 11.1 580 348 391
200 431 11.1 580 409 452
Results are averaged over the levels of: accuracy
Confidence level used: 0.95
$contrasts
contrast estimate SE df t.ratio p.value
keytime0 - keytime50 -82.3 15.8 580 -5.220 <.0001
keytime0 - keytime100 -209.7 15.8 580 -13.306 <.0001
keytime0 - keytime200 -270.8 15.8 580 -17.185 <.0001
keytime50 - keytime100 -127.4 15.8 580 -8.086 <.0001
keytime50 - keytime200 -188.5 15.8 580 -11.965 <.0001
keytime100 - keytime200 -61.1 15.8 580 -3.879 0.0007
Results are averaged over the levels of: accuracy
P value adjustment: tukey method for comparing a family of 4 estimates
emmeans(artlm(m_ks_bar, "accuracy"), pairwise ~ accuracy)
NOTE: Results may be misleading due to involvement in interactions
$emmeans
accuracy emmean SE df lower.CL upper.CL
0.1 91.1 8.54 580 74.4 108
0.3 205.2 8.54 580 188.5 222
0.5 293.1 8.54 580 276.3 310
0.7 403.0 8.54 580 386.3 420
0.9 510.1 8.54 580 493.3 527
Results are averaged over the levels of: keytime
Confidence level used: 0.95
$contrasts
contrast estimate SE df t.ratio p.value
accuracy0.1 - accuracy0.3 -114.1 12.1 580 -9.451 <.0001
accuracy0.1 - accuracy0.5 -201.9 12.1 580 -16.729 <.0001
accuracy0.1 - accuracy0.7 -311.9 12.1 580 -25.838 <.0001
accuracy0.1 - accuracy0.9 -418.9 12.1 580 -34.704 <.0001
accuracy0.3 - accuracy0.5 -87.9 12.1 580 -7.278 <.0001
accuracy0.3 - accuracy0.7 -197.8 12.1 580 -16.387 <.0001
accuracy0.3 - accuracy0.9 -304.8 12.1 580 -25.253 <.0001
accuracy0.5 - accuracy0.7 -110.0 12.1 580 -9.108 <.0001
accuracy0.5 - accuracy0.9 -217.0 12.1 580 -17.975 <.0001
accuracy0.7 - accuracy0.9 -107.0 12.1 580 -8.866 <.0001
Results are averaged over the levels of: keytime
P value adjustment: tukey method for comparing a family of 5 estimates
p_data_tsku <- measured_test_trials |>
group_by(participant,
accuracy_numeric,
keytime_numeric,
suggestions_type) |>
summarize(
p_mean_ks = mean((total_final_suggestion_chars / total_chars) / theoretical_key_saving
),
.groups = "drop")
plot_keystroke_saving_ratio <- function(filtered_p_data_tsku) {
actual_tsku_data <- filtered_p_data_tsku |>
group_by(accuracy_numeric,
keytime_numeric) |>
summarize(
error_ks = t_error(p_mean_ks),
mean_ks = mean(p_mean_ks),
min_ks = max(0, mean_ks - error_ks),
max_ks = min(1, mean_ks + error_ks),
.groups = "drop"
)
theoretical_tsku_data <- tibble(
accuracy_numeric = ACCURACY_LEVELS_NUM,
keytime_numeric = NA,
mean_ks = 1.0,
min_ks = 1.0,
max_ks = 1.0
)
pd <- position_dodge(0.025)
plot_keystroke_saving_ratio <- ggplot(
actual_tsku_data,
aes(
x = accuracy_numeric,
y = mean_ks,
ymin = min_ks,
ymax = max_ks,
color = keytime_numeric,
group = keytime_numeric
)
) +
custom_line(data = theoretical_tsku_data,
color = THEORETICAL_COLOR,
group = "theoretical") +
custom_pointrange(
data = theoretical_tsku_data,
color = THEORETICAL_COLOR,
group = "theoretical",
shape = 17
) +
custom_line(position = pd) +
custom_pointrange(position = pd) +
SCALE_COLOR_KEY_STROKE +
SCALE_X_ACCURACY +
scale_y_continuous("Keystroke Saving Ratio",
limits = c(0, 1),
labels = percent) +
theme(legend.position = "none",
panel.spacing = PANEL_MARGIN, )
}
inline_ks_ratio_plot <-
plot_keystroke_saving_ratio(p_data_tsku |> filter(suggestions_type == INLINE_SUGGESTION_TYPE))
ggsave(
graph_path("keystroke-saving-ratio-inline.pdf"),
plot = inline_ks_ratio_plot,
units = "mm",
width = (FULL_WIDTH - H_GRAPH_MARGIN) / 4,
height = (FULL_WIDTH - H_GRAPH_MARGIN) / 4,
device = cairo_pdf
)
inline_ks_ratio_plot
bar_ks_ratio_plot <-
plot_keystroke_saving_ratio(p_data_tsku |> filter(suggestions_type == BAR_SUGGESTION_TYPE))
ggsave(
graph_path("keystroke-saving-ratio-bar.pdf"),
plot = bar_ks_ratio_plot,
units = "mm",
width = (FULL_WIDTH - H_GRAPH_MARGIN) / 4,
height = (FULL_WIDTH - H_GRAPH_MARGIN) / 4,
device = cairo_pdf
)
bar_ks_ratio_plot
actual_graph_data <- measured_test_trials |>
group_by(participant, accuracy_numeric, keytime_numeric, suggestions_type) |>
summarize(
value = mean(duration),
.groups = "drop"
) |>
group_by(accuracy_numeric, keytime_numeric, suggestions_type) |>
summarize(
error_value = t_error(value),
mean_value = mean(value),
min_value = max(0, mean_value - error_value),
max_value = mean_value + error_value,
.groups = "drop"
)
pd <- position_dodge(0.025)
ggplot(actual_graph_data, aes(
x = accuracy_numeric,
y = mean_value,
ymin = min_value,
ymax = max_value,
color = keytime_numeric,
group = keytime_numeric
)) +
scale_x_continuous(breaks=c(0, 0.1, 0.3, 0.5, 0.7, 0.9, 1)) +
scale_color_continuous(breaks=c(0, 50, 100, 200)) +
expand_limits(y = c(0)) +
geom_line(position=pd) +
geom_pointrange(position=pd) +
facet_wrap(vars(suggestions_type)) +
labs(x="Suggestions Accuracy", y="Trial Duration (seconds)", color="Key Stroke Delay")
p_data_ts <- measured_test_trials |>
group_by(
participant,
accuracy,
keytime,
accuracy_numeric,
keytime_numeric,
suggestions_label,
suggestions_type
) |>
summarize(speed = mean(cps * 60 / 5), .groups = "drop") |>
group_by(accuracy, keytime)
plot_entry_speed <- function(filtered_p_data_ts) {
summary_ts <- filtered_p_data_ts |>
group_by(accuracy_numeric,
keytime_numeric,
suggestions_label) |>
summarize(
error_value = t_error(speed),
mean_value = mean(speed),
min_value = max(0, mean_value - error_value),
max_value = mean_value + error_value,
.groups = "drop"
)
pd <- position_dodge(0.025)
plot_entry_speed <- ggplot(
summary_ts,
aes(
x = accuracy_numeric,
y = mean_value,
ymin = min_value,
ymax = max_value,
color = keytime_numeric,
group = keytime_numeric
)
) +
SCALE_X_ACCURACY +
SCALE_COLOR_KEY_STROKE +
expand_limits(y = c(0)) +
scale_y_continuous(breaks = seq(0, 80, 10), labels = paste(seq(0, 80, 10), "wpm")) +
custom_line(position = pd) +
custom_pointrange(position = pd) +
labs(y = "Entry Speed") +
guides(color = guide_legend(override.aes = list(linetype = 0))) +
theme(legend.position = "none",
panel.spacing = PANEL_MARGIN)
}
p_data_ts |>
group_by(suggestions_type) |>
summarize(
sd_speed = sd(speed),
mean_speed = mean(speed),
.groups = "drop"
)
p_data_ts |>
group_by(suggestions_type, accuracy, keytime) |>
summarize(
sd_speed = sd(speed),
mean_speed = mean(speed),
.groups = "drop"
)
p_data_ts |>
group_by(suggestions_type, keytime) |>
summarize(
sd_speed = sd(speed),
mean_speed = mean(speed),
.groups = "drop"
)
p_data_ts |>
group_by(suggestions_type, accuracy) |>
summarize(
sd_speed = sd(speed),
mean_speed = mean(speed),
.groups = "drop"
)
p_data_ts_inline <- p_data_ts |> filter(suggestions_type == INLINE_SUGGESTION_TYPE)
inline_entry_speed_plot <- plot_entry_speed(p_data_ts_inline)
ggsave(
graph_path("entry-speed-inline.pdf"),
plot = inline_entry_speed_plot,
units = "mm",
width = (FULL_WIDTH - H_GRAPH_MARGIN) / 4 + 1,
height = (FULL_WIDTH - H_GRAPH_MARGIN) / 4,
device = cairo_pdf
)
inline_entry_speed_plot
ggplot(p_data_ts_inline, aes(x = speed)) +
geom_histogram(binwidth = 2) +
facet_grid(rows=vars(accuracy), cols = vars(keytime))
m <- aov(speed ~ keytime*accuracy, data=p_data_ts_inline)
pander(normalCheck(m))
Shapiro-Wilk normality test
data: res W = 0.9444, p-value = 2.623e-14
Test statistic | P value |
---|---|
0.9444 | 2.623e-14 * * * |
remove(m)
Not normal.
boxcox(speed~keytime*accuracy, data=p_data_ts_inline)
lambda <- 0.15
m <- aov(speed ^lambda ~ keytime*accuracy, data=p_data_ts_inline)
pander(normalCheck(m))
Shapiro-Wilk normality test
data: res W = 0.97699, p-value = 3.596e-08
Test statistic | P value |
---|---|
0.977 | 3.596e-08 * * * |
remove(m)
Could not normalize it. Switch to ART.
m_ts = art(speed ~ keytime * accuracy, data=p_data_ts_inline)
anova(m_ts, type = 2)
emmeans(artlm(m_ts, "keytime"), pairwise ~ keytime)
NOTE: Results may be misleading due to involvement in interactions
$emmeans
keytime emmean SE df lower.CL upper.CL
0 456 10.4 587 435 476
50 379 10.3 587 359 399
100 247 10.4 587 226 267
200 137 10.2 587 117 157
Results are averaged over the levels of: accuracy
Confidence level used: 0.95
$contrasts
contrast estimate SE df t.ratio p.value
keytime0 - keytime50 76.6 14.6 587 5.252 <.0001
keytime0 - keytime100 209.1 14.7 587 14.261 <.0001
keytime0 - keytime200 318.5 14.6 587 21.846 <.0001
keytime50 - keytime100 132.5 14.6 587 9.079 <.0001
keytime50 - keytime200 241.8 14.5 587 16.670 <.0001
keytime100 - keytime200 109.3 14.6 587 7.500 <.0001
Results are averaged over the levels of: accuracy
P value adjustment: tukey method for comparing a family of 4 estimates
emmeans(artlm(m_ts, "accuracy"), pairwise ~ accuracy)
NOTE: Results may be misleading due to involvement in interactions
$emmeans
accuracy emmean SE df lower.CL upper.CL
0.1 263 14.7 587 234 292
0.3 237 14.7 587 208 266
0.5 290 14.7 587 261 319
0.7 286 14.4 587 257 314
0.9 446 14.7 587 417 475
Results are averaged over the levels of: keytime
Confidence level used: 0.95
$contrasts
contrast estimate SE df t.ratio p.value
accuracy0.1 - accuracy0.3 26.03 20.7 587 1.255 0.7191
accuracy0.1 - accuracy0.5 -27.14 20.8 587 -1.306 0.6877
accuracy0.1 - accuracy0.7 -22.69 20.6 587 -1.102 0.8056
accuracy0.1 - accuracy0.9 -182.83 20.8 587 -8.796 <.0001
accuracy0.3 - accuracy0.5 -53.17 20.8 587 -2.558 0.0797
accuracy0.3 - accuracy0.7 -48.71 20.6 587 -2.366 0.1262
accuracy0.3 - accuracy0.9 -208.86 20.8 587 -10.048 <.0001
accuracy0.5 - accuracy0.7 4.46 20.6 587 0.216 0.9995
accuracy0.5 - accuracy0.9 -155.69 20.8 587 -7.475 <.0001
accuracy0.7 - accuracy0.9 -160.15 20.6 587 -7.762 <.0001
Results are averaged over the levels of: keytime
P value adjustment: tukey method for comparing a family of 5 estimates
p_data_ts_bar <- p_data_ts |> filter(suggestions_type == BAR_SUGGESTION_TYPE)
p_data_ts_bar_common_factors <- p_data_ts_bar
bar_entry_speed_plot <- plot_entry_speed(p_data_ts_bar)
ggsave(
graph_path("entry-speed-bar.pdf"),
plot = bar_entry_speed_plot,
units = "mm",
width = (FULL_WIDTH - H_GRAPH_MARGIN) / 4 + 1,
height = (FULL_WIDTH - H_GRAPH_MARGIN) / 4,
device = cairo_pdf
)
bar_entry_speed_plot
ggplot(p_data_ts_bar_common_factors, aes(x = speed)) +
geom_histogram(binwidth = 2) +
facet_grid(rows=vars(accuracy), cols = vars(keytime))
m <- aov(speed ~ keytime*accuracy, data=p_data_ts_bar_common_factors)
pander(normalCheck(m))
Shapiro-Wilk normality test
data: res W = 0.96035, p-value = 1.218e-11
Test statistic | P value |
---|---|
0.9603 | 1.218e-11 * * * |
remove(m)
Not normal.
m_ts_bar = art(speed ~ keytime * accuracy, data=p_data_ts_bar_common_factors)
anova(m_ts_bar, type = 2)
emmeans(artlm(m_ts_bar, "keytime"), pairwise ~ keytime)
NOTE: Results may be misleading due to involvement in interactions
$emmeans
keytime emmean SE df lower.CL upper.CL
0 451 10.8 580 430 472
50 361 10.8 580 340 382
100 239 10.8 580 217 260
200 151 10.8 580 130 173
Results are averaged over the levels of: accuracy
Confidence level used: 0.95
$contrasts
contrast estimate SE df t.ratio p.value
keytime0 - keytime50 89.6 15.2 580 5.876 <.0001
keytime0 - keytime100 212.2 15.2 580 13.925 <.0001
keytime0 - keytime200 299.5 15.2 580 19.651 <.0001
keytime50 - keytime100 122.7 15.2 580 8.049 <.0001
keytime50 - keytime200 209.9 15.2 580 13.774 <.0001
keytime100 - keytime200 87.2 15.2 580 5.725 <.0001
Results are averaged over the levels of: accuracy
P value adjustment: tukey method for comparing a family of 4 estimates
emmeans(artlm(m_ts_bar, "accuracy"), pairwise ~ accuracy)
NOTE: Results may be misleading due to involvement in interactions
$emmeans
accuracy emmean SE df lower.CL upper.CL
0.1 280 14.4 580 251 308
0.3 237 14.4 580 209 265
0.5 277 14.4 580 248 305
0.7 261 14.4 580 233 290
0.9 448 14.4 580 419 476
Results are averaged over the levels of: keytime
Confidence level used: 0.95
$contrasts
contrast estimate SE df t.ratio p.value
accuracy0.1 - accuracy0.3 42.65 20.4 580 2.092 0.2247
accuracy0.1 - accuracy0.5 2.98 20.4 580 0.146 0.9999
accuracy0.1 - accuracy0.7 18.17 20.4 580 0.891 0.9001
accuracy0.1 - accuracy0.9 -168.00 20.4 580 -8.242 <.0001
accuracy0.3 - accuracy0.5 -39.67 20.4 580 -1.946 0.2940
accuracy0.3 - accuracy0.7 -24.48 20.4 580 -1.201 0.7507
accuracy0.3 - accuracy0.9 -210.65 20.4 580 -10.335 <.0001
accuracy0.5 - accuracy0.7 15.19 20.4 580 0.745 0.9457
accuracy0.5 - accuracy0.9 -170.97 20.4 580 -8.388 <.0001
accuracy0.7 - accuracy0.9 -186.17 20.4 580 -9.134 <.0001
Results are averaged over the levels of: keytime
P value adjustment: tukey method for comparing a family of 5 estimates
p_data_ts |>
group_by(suggestions_type) |>
summarize(
error = t_error(speed),
mean = mean(speed))
m_ts_both = art(speed ~ keytime * accuracy * suggestions_type, data=p_data_ts)
anova(m_ts_both, type = 2)
emmeans(artlm(m_ts_both, "suggestions_type"), pairwise ~ suggestions_type)
NOTE: Results may be misleading due to involvement in interactions
$emmeans
suggestions_type emmean SE df lower.CL upper.CL
BAR 572 14.4 1167 544 601
INLINE 635 14.3 1167 607 663
Results are averaged over the levels of: keytime, accuracy
Confidence level used: 0.95
$contrasts
contrast estimate SE df t.ratio p.value
BAR - INLINE -62.7 20.2 1167 -3.100 0.0020
Results are averaged over the levels of: keytime, accuracy
line_graph <-
function(completed_trials,
measure,
min_value = NA,
max_value = NA) {
graph_data <- completed_trials |>
rename(value = measure) |>
group_by(participant, accuracy_numeric, keytime_numeric, suggestions_type) |>
summarize(p_value = mean(value), .groups="drop") |>
group_by(accuracy_numeric, keytime_numeric, suggestions_type) |>
summarize(
error_value = t_error(p_value),
mean_value = mean(p_value),
ci_min = if_else(
is.na(min_value),
mean_value - error_value,
max(min_value, mean_value - error_value)
),
ci_max = if_else(
is.na(max_value),
mean_value + error_value,
min(max_value, mean_value + error_value)
),
.groups = "drop"
)
ggplot(
graph_data,
aes(
x = accuracy_numeric,
y = mean_value,
ymin = ci_min,
ymax = ci_max,
color = keytime_numeric,
group = keytime_numeric
)
) +
scale_x_continuous(breaks = ACCURACY_LEVELS_NUM) +
scale_color_continuous(breaks = keytime_LEVELS_NUM) +
geom_line() +
geom_pointrange()
}
line_graph(measured_runs |> mutate(minutes = duration / 60) |>
filter(minutes <= 120), "minutes") +
facet_wrap(vars(suggestions_type)) +
ylab("Duration (minutes)")
p_data_ks <- measured_test_trials |>
group_by(participant, accuracy, keytime, accuracy_numeric, keytime_numeric, suggestions_type, trial_number) |>
summarize(
p_mean_actual_ks = mean(actual_key_saving_no_editing),
p_mean_theoretical_ks = mean(theoretical_key_saving),
.groups = "drop"
) |>
group_by(accuracy, keytime, trial_number) |>
mutate(
is_outlier = is_outlier(p_mean_actual_ks)
) |>
ungroup()
actual_ks_summary <- p_data_ks |>
group_by(accuracy_numeric, keytime_numeric, suggestions_type, trial_number) |>
summarize(
error_ks = t_error(p_mean_actual_ks),
mean_ks = mean(p_mean_actual_ks),
min_ks = max(0, mean_ks - error_ks),
max_ks = min(1, mean_ks + error_ks),
ks_type = "actual",
.groups = "drop"
)
theoretical_ks_summary <- p_data_ks |>
group_by(accuracy_numeric, suggestions_type, trial_number) |>
summarize(
error_ks = t_error(p_mean_theoretical_ks),
mean_ks = mean(p_mean_theoretical_ks),
min_ks = max(0, mean_ks - error_ks),
max_ks = min(1, mean_ks + error_ks),
ks_type = "theoretical",
keytime_numeric = NA,
.groups = "drop"
)
ks_summary <- union_all(theoretical_ks_summary, actual_ks_summary)
pd <- position_dodge(0.025)
plot_learning <- ggplot(ks_summary, aes(
x = trial_number,
y = mean_ks,
ymin = min_ks,
ymax = max_ks,
color = keytime_numeric,
group = keytime_numeric
)) +
custom_line(
data=theoretical_ks_summary,
color=THEORETICAL_COLOR,
group="theoretical"
) +
custom_pointrange(
data=theoretical_ks_summary,
color=THEORETICAL_COLOR,
group="theoretical",
shape=17
) +
custom_line(data=actual_ks_summary, position=pd) +
custom_pointrange(data=actual_ks_summary, position=pd) +
SCALE_COLOR_KEY_STROKE +
scale_y_continuous("Keystroke Saving", limits = c(0, 1), labels = percent) +
facet_grid(rows=vars(suggestions_type), cols=vars(accuracy_numeric)) +
theme(
legend.position = "none",
panel.spacing = PANEL_MARGIN
# strip.text.y = element_blank(),
# strip.background.y = element_blank(),
# axis.title.x = element_blank()
)
ggsave(
graph_path("amt-learning-keystroke-saving.pdf"),
plot = plot_learning,
units="mm",
width=150,
height=150,
device=cairo_pdf
)
plot_learning
theme_margin <- theme(plot.margin = margin(r = 2, b = 2, unit = "mm"))
inline_plot <-
(inline_reliance_plot + theme_margin) + inline_entry_speed_plot + inline_ks_plot + inline_ks_ratio_plot
ggsave(
graph_path("objective_inline.pdf"),
plot = inline_plot,
units = "mm",
width = FULL_WIDTH,
height = FULL_WIDTH / GOLDEN_RATIO,
device = cairo_pdf
)
inline_plot
bar_plot <-
(bar_reliance_plot + theme_margin) + bar_entry_speed_plot + bar_ks_plot + bar_ks_ratio_plot
ggsave(
graph_path("objective_bar.pdf"),
plot = bar_plot,
units = "mm",
width = FULL_WIDTH,
height = FULL_WIDTH / GOLDEN_RATIO,
device = cairo_pdf
)
bar_plot