Master Informatique – Université de Lille
# opérations sur les objets library(tidyverse) # manipulations de data frames library(dplyr) # graphiques library(ggplot2)
# Example <= commentaire a = 42 b <- 1.21
Deux opérateurs, même résultat.
Je préfère utiliser <-
car l’opération d’affectation n’est pas symmétrique.
Cela évite aussi les confusions avec la relation d’égalité.
a
## [1] 42
print(b)
## [1] 1.21
Les variables sont des tableaux de dimension 1, d’où [1]
.
Les entiers et les flottants sont des nombres (numbers
).
a <- 666 is.numeric(a)
## [1] TRUE
is.numeric("666")
## [1] FALSE
as.numeric("666")
## [1] 666
# addition 6 + 4
## [1] 10
# soustraction 6 - 4
## [1] 2
# multiplication 6 * 4
## [1] 24
# division 6 / 4
## [1] 1.5
Tiens, un flottant.
# division entière 6 %/% 4
## [1] 1
# reste de la division entière 6 %% 4
## [1] 2
# exponentielle 6 ^ 4
## [1] 1296
# trigonométrie sin(pi / 6)
## [1] 0.5
cos(pi / 4)
## [1] 0.7071068
# arrondi supérieur ceiling(1.21)
## [1] 2
# arrondi inférieur floor(1.21)
## [1] 1
# arrondi… contextuel round(1.2)
## [1] 1
round(1.7)
## [1] 2
round(1.5)
## [1] 2
round(6.5)
## [1] 6
4 == 2
## [1] FALSE
8 != 6 + 2
## [1] FALSE
15 > 2 ^ 8
## [1] FALSE
16 < log(16, 2)
## [1] FALSE
23 >= 23
## [1] TRUE
42 <= 666
## [1] TRUE
# négation !TRUE
## [1] FALSE
# et FALSE & TRUE
## [1] FALSE
# ou FALSE | TRUE
## [1] TRUE
Fonctionnent sur des vecteurs
c(TRUE,FALSE,TRUE,FALSE) & c(TRUE,TRUE,FALSE,FALSE)
## [1] TRUE FALSE FALSE FALSE
c(TRUE,FALSE,TRUE,FALSE) | c(TRUE,TRUE,FALSE,FALSE)
## [1] TRUE TRUE TRUE FALSE
&&
et ||
existent aussi, mais ne fonctionnent que sur des vecteurs de taille un et leur évaluation est paresseuse.
"Chaine"
## [1] "Chaine"
'Autre "chaîne"'
## [1] "Autre \"chaîne\""
"Chaine" == 'Chaine'
## [1] TRUE
is.character("666")
## [1] TRUE
as.character(666)
## [1] "666"
str_length("Chaine")
## [1] 6
str_sub("Chaine", 2, 3)
## [1] "ha"
paste('ma', 'chaine')
## [1] "ma chaine"
paste('ma', 'chaine', sep=".")
## [1] "ma.chaine"
paste('ma', 'chaine', sep='')
## [1] "machaine"
# constructeur c(1,2,3)
## [1] 1 2 3
# conversion automatique vers le type le plus complexe c("Le", "résultat", "est", 666)
## [1] "Le" "résultat" "est" "666"
# répétitions rep(0, 3)
## [1] 0 0 0
rep(c(1,2), 4)
## [1] 1 2 1 2 1 2 1 2
rep(c(1,2), each=4)
## [1] 1 1 1 1 2 2 2 2
# séquence simple 3:12
## [1] 3 4 5 6 7 8 9 10 11 12
13:5
## [1] 13 12 11 10 9 8 7 6 5
# séquence plus complexe seq(from=12, to=60, by=6)
## [1] 12 18 24 30 36 42 48 54 60
# séquence aléatoire uniforme runif(n=6, min=0, max=1)
## [1] 0.3238483 0.4018908 0.8861652 0.3605108 0.5743770 0.9318380
# séquence aléatoire normale rnorm(n=6, mean=0, sd=1)
## [1] -0.62029631 0.02948394 1.31353973 -0.41090757 -0.29721893 0.62649698
a <- seq(from=0, to=20, by=2) # Une seule valeur a[3]
## [1] 4
# Plusieurs valeurs a[c(2, 5, 8)]
## [1] 2 8 14
# Plusieurs valeurs par plage a[c(2:8)]
## [1] 2 4 6 8 10 12 14
# Plusieurs valeurs par condition a[a>12]
## [1] 14 16 18 20
# Plusieurs valeurs par exclusion a[c(-2, -5, -8)]
## [1] 0 4 6 10 12 16 18 20
# Appartenance 12 %in% a
## [1] TRUE
# Arithmérique a + 5
## [1] 5 7 9 11 13 15 17 19 21 23 25
# Comparaisons a > 12
## [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE TRUE TRUE TRUE TRUE
# En cas de taille différente, le plus petit se répète 1:3 + rep(1,7)
## Warning in 1:3 + rep(1, 7): longer object length is not a multiple of shorter ## object length
## [1] 2 3 4 2 3 4 2
length(10:20)
## [1] 11
rev(10:20)
## [1] 20 19 18 17 16 15 14 13 12 11 10
min(10:20)
## [1] 10
max(10:20)
## [1] 20
sum(10:20)
## [1] 165
sort(c(9,6,5,8,4,10,3,1,7,2))
## [1] 1 2 3 4 5 6 7 8 9 10
# NA correspond à une valeur manquante b <- c(10, 20, NA, 30, 40, NA, 50) # Recherche de valeurs manquantes is.na(b)
## [1] FALSE FALSE TRUE FALSE FALSE TRUE FALSE
# Opérations avec valeurs manquantes b * 2
## [1] 20 40 NA 60 80 NA 100
# Ignorer les valeurs manquantes min(b)
## [1] NA
min(b, na.rm=TRUE)
## [1] 10
plus1 <- function (n) { return (n + 1) } plus1(28)
## [1] 29
# Par défaut les fonctions sont vectorisées plus1(2:5)
## [1] 3 4 5 6
g = function (n, m) { return (n + 10 * m) } g(2, 8)
## [1] 82
# Passage de paramètre par nom g(m=8, n=2)
## [1] 82
n = 2 h = function (n) { n = 666 print(n) } print(n)
## [1] 2
h(n)
## [1] 666
# L'appel à la fonction n'a pas changé la valeur de n print(n)
## [1] 2
if (1 > 0) print("Test simple")
## [1] "Test simple"
# Le else est bien sûr optionel if (28 < 2) { print("28 < 2") } else { print("28 >= 2") }
## [1] "28 >= 2"
# condition vectorisée ifelse(1:10 < 4, "<4", ">=4")
## [1] "<4" "<4" "<4" ">=4" ">=4" ">=4" ">=4" ">=4" ">=4" ">=4"
for (n in 1:5) { print(n) }
## [1] 1 ## [1] 2 ## [1] 3 ## [1] 4 ## [1] 5
n = 4 while(n > 0) { print(n) n <- n - 1 }
## [1] 4 ## [1] 3 ## [1] 2 ## [1] 1
# Chargement du CSV en précisant le séparateur de colonnes et le séparateur décimal data <- read.csv2("ehkmenus.csv", sep=",", dec=".") # Affiche les 10 premières lignes seulement head(data, 7)
## X usernumber firsttechnique technique bloc trial target targetDistrib ## 1 80 1 0 0 0 0 Sweater 8 ## 2 138 1 0 0 0 1 Penguin 1 ## 3 200 1 0 0 0 2 Sweater 8 ## 4 244 1 0 0 0 3 Strawberry 2 ## 5 249 1 0 0 0 4 Sweater 8 ## 6 383 1 0 0 0 5 Duck 1 ## 7 388 1 0 0 0 6 Sweater 8 ## time correct modality ## 1 8219 1 1 ## 2 9594 0 0 ## 3 5437 1 1 ## 4 6266 1 1 ## 5 2469 1 1 ## 6 8000 1 1 ## 7 2016 1 1
summary(data)
## X usernumber firsttechnique technique bloc ## Min. : 80 Min. : 1.00 Min. :0.0 Min. :0.0 Min. :0.0 ## 1st Qu.:109947 1st Qu.: 9.75 1st Qu.:0.0 1st Qu.:0.0 1st Qu.:1.0 ## Median :187428 Median :18.50 Median :0.5 Median :0.5 Median :2.5 ## Mean :193952 Mean :18.50 Mean :0.5 Mean :0.5 Mean :2.5 ## 3rd Qu.:296639 3rd Qu.:27.25 3rd Qu.:1.0 3rd Qu.:1.0 3rd Qu.:4.0 ## Max. :361753 Max. :36.00 Max. :1.0 Max. :1.0 Max. :5.0 ## trial target targetDistrib time ## Min. : 0.00 Length:10368 Min. :1 Min. : 657 ## 1st Qu.: 5.75 Class :character 1st Qu.:1 1st Qu.: 1280 ## Median :11.50 Mode :character Median :3 Median : 2016 ## Mean :11.50 Mean :4 Mean : 3002 ## 3rd Qu.:17.25 3rd Qu.:8 3rd Qu.: 3922 ## Max. :23.00 Max. :8 Max. :42390 ## correct modality ## Min. :0.0000 Min. :0.0000 ## 1st Qu.:1.0000 1st Qu.:1.0000 ## Median :1.0000 Median :1.0000 ## Mean :0.9728 Mean :0.8166 ## 3rd Qu.:1.0000 3rd Qu.:1.0000 ## Max. :1.0000 Max. :2.0000
data$usernumber <- factor(data$usernumber) data$target <- factor(data$target) data$trial <- factor(data$trial) data$bloc <- factor(data$bloc) levels(data$target)
## [1] "Artichoke" "Baseball" "Basketball" "Bat" "Bowtie" ## [6] "Broccoli" "Button" "Cards" "Cucumber" "Dog" ## [11] "Dress shirt" "Duck" "Envelope" "Hockey" "Karate" ## [16] "Keyboard" "Kiwi" "Mushroom" "Pear" "Penguin" ## [21] "Pineapple" "Potato" "Pumpkin" "Shirt" "Stamp" ## [26] "Strawberry" "Sweater" "Telephone"
summary(data$target)
## Artichoke Baseball Basketball Bat Bowtie Broccoli ## 18 18 198 216 432 216 ## Button Cards Cucumber Dog Dress shirt Duck ## 216 216 420 216 18 216 ## Envelope Hockey Karate Keyboard Kiwi Mushroom ## 864 216 216 216 216 198 ## Pear Penguin Pineapple Potato Pumpkin Shirt ## 216 216 1728 864 12 198 ## Stamp Strawberry Sweater Telephone ## 216 432 1728 432
library(dplyr) data.clean <- data %>% # Suppresions de colonnes select(!c(X,targetDistrib)) %>% # Renommage des niveaux des facteurs mutate(technique = factor(technique, labels = c("ExposeHK", "Audio"))) %>% mutate(firsttechnique = factor(firsttechnique, labels = c("ExposeHK", "audio"))) %>% mutate(modality = factor(modality, labels = c("Pointer", "Keyboard", "Both"))) summary(data.clean)
## usernumber firsttechnique technique bloc trial ## 1 : 288 ExposeHK:5184 ExposeHK:5184 0:1728 0 : 432 ## 2 : 288 audio :5184 Audio :5184 1:1728 1 : 432 ## 3 : 288 2:1728 2 : 432 ## 4 : 288 3:1728 3 : 432 ## 5 : 288 4:1728 4 : 432 ## 6 : 288 5:1728 5 : 432 ## (Other):8640 (Other):7776 ## target time correct modality ## Pineapple :1728 Min. : 657 Min. :0.0000 Pointer :1977 ## Sweater :1728 1st Qu.: 1280 1st Qu.:1.0000 Keyboard:8315 ## Envelope : 864 Median : 2016 Median :1.0000 Both : 76 ## Potato : 864 Mean : 3002 Mean :0.9728 ## Bowtie : 432 3rd Qu.: 3922 3rd Qu.:1.0000 ## Strawberry: 432 Max. :42390 Max. :1.0000 ## (Other) :4320
# Pour analyser le temps de sélection on supprime les essais avec erreur data.time <- data.clean %>% filter(correct == 1) %>% select(!c(correct)) summary(data.time)
## usernumber firsttechnique technique bloc trial ## 28 : 287 ExposeHK:5031 ExposeHK:5059 0:1663 19 : 428 ## 32 : 287 audio :5055 Audio :5027 1:1684 9 : 425 ## 3 : 286 2:1688 21 : 425 ## 11 : 285 3:1689 20 : 424 ## 16 : 285 4:1676 11 : 423 ## 35 : 285 5:1686 17 : 423 ## (Other):8371 (Other):7538 ## target time modality ## Pineapple:1712 Min. : 657 Pointer :1901 ## Sweater :1711 1st Qu.: 1265 Keyboard:8112 ## Envelope : 845 Median : 1936 Both : 73 ## Potato : 838 Mean : 2869 ## Cucumber : 415 3rd Qu.: 3735 ## Telephone: 413 Max. :42390 ## (Other) :4152
# Pour une analyse en inter-sujets on ne garde que la première condition data.between <- data.clean %>% filter(as.numeric(technique) == as.numeric(firsttechnique)) %>% select(!c(firsttechnique)) summary(data.between)
## usernumber technique bloc trial target ## 1 : 144 ExposeHK:2592 0:864 0 : 216 Sweater : 912 ## 2 : 144 Audio :2592 1:864 1 : 216 Pineapple : 816 ## 3 : 144 2:864 2 : 216 Potato : 456 ## 4 : 144 3:864 3 : 216 Envelope : 408 ## 5 : 144 4:864 4 : 216 Strawberry: 228 ## 6 : 144 5:864 5 : 216 Telephone : 228 ## (Other):4320 (Other):3888 (Other) :2136 ## time correct modality ## Min. : 718 Min. :0.0000 Pointer : 840 ## 1st Qu.: 1266 1st Qu.:1.0000 Keyboard:4323 ## Median : 1922 Median :1.0000 Both : 21 ## Mean : 2984 Mean :0.9726 ## 3rd Qu.: 4075 3rd Qu.:1.0000 ## Max. :37891 Max. :1.0000 ##
library(ggplot2) g.points <- ggplot(data=data.time, aes(x=technique, y=time)) + geom_point() g.points
g.boxplot <- ggplot(data=data.time, aes(x=technique, y=time)) + geom_boxplot() g.boxplot
# Export du graphique ggsave("graph.pdf", width = 20, height = 20, units = "cm")
data.bars <- data.time %>% group_by(technique) %>% summarise(mean.time=mean(time)) g.bars <- ggplot(data=data.bars, aes(x=technique, y=mean.time)) + geom_col() g.bars
data.bars2 <- data.time %>% group_by(technique, modality) %>% summarise(mean.time=mean(time), .groups = 'drop') g.bars2 <- ggplot(data=data.bars2, aes(x=technique, y=mean.time, fill=modality)) + geom_col(position = "dodge") g.bars2
g.bars3 <- ggplot(data=data.bars2, aes(x=technique, y=mean.time, fill=modality)) + geom_col(position = "dodge") + theme_bw() g.bars3
g.bars4 <- ggplot(data=data.bars2, aes(x=technique, y=mean.time, fill=modality)) + geom_col(position = "dodge") + scale_x_discrete(name="Technique") + scale_y_continuous("Selection time (s)") + labs(fill="Modality") + theme_bw() g.bars4
g.bars5 <- ggplot(data=data.bars2, aes(x=technique, y=mean.time, fill=modality)) + geom_col(position = "dodge") + scale_x_discrete(name="Technique") + scale_y_continuous("Selection time (s)", limits=c(0,10000)) + labs(fill="Modality") + theme_bw() g.bars5
g.bars6 <- ggplot(data=data.bars2, aes(x=technique, y=mean.time, fill=modality)) + geom_col(position = "dodge") + scale_x_discrete(name="Technique") + scale_y_continuous("Selection time (s)", limits=c(0,10000)) + scale_fill_brewer(type = "qual", palette = "Set1") + labs(fill="Modality") + theme_bw() g.bars6
data.bars.ci <- data.time %>% group_by(technique) %>% filter(modality != "Both") %>% summarise(mean.time = mean(time, na.rm = TRUE), # standard deviation sd.time = sd(time, na.rm = TRUE), # nombre de points n.time = n()) %>% mutate(se.time = sd.time / sqrt(n.time), # valeur basse lower.ci = mean.time - qt(1 - (0.05 / 2), n.time - 1) * se.time, # valeur haute upper.ci = mean.time + qt(1 - (0.05 / 2), n.time - 1) * se.time)
g.bars7 <- ggplot(data=data.bars.ci, aes(x=technique, y=mean.time)) + geom_col(width=0.5, fill="greenyellow") + geom_errorbar(aes(ymin=lower.ci, ymax=upper.ci), linewidth=1, width=0.2) + scale_x_discrete(name="Technique") + scale_y_continuous("Selection time (s)") + scale_fill_brewer(type = "qual", palette = "Set1") + labs(fill="Modality") + theme_bw() g.bars7
data2 <- read.csv2("ariane.csv", sep=',', dec='.') data2.psychometric <- data2 %>% mutate(RelativeDuration = (2*(Good.Answer-1)-1) * Speed) %>% group_by(RelativeDuration) %>% mutate(Clockwise = +(Answer==2), NbClockwise = sum(Clockwise), n = n(), RateClockwise = NbClockwise / n) %>% select(-c(User,Repetition,Speed,APA,Good.Answer,Answer,n,NbClockwise)) summary(data2.psychometric)
## RelativeDuration Clockwise RateClockwise ## Min. :-630 Min. :0.0000 Min. :0.0200 ## 1st Qu.:-360 1st Qu.:0.0000 1st Qu.:0.1467 ## Median : 0 Median :0.0000 Median :0.3433 ## Mean : 0 Mean :0.4252 Mean :0.4252 ## 3rd Qu.: 360 3rd Qu.:1.0000 3rd Qu.:0.7267 ## Max. : 630 Max. :1.0000 Max. :0.9400
g.psychometric <- ggplot(data2.psychometric, aes(RelativeDuration, RateClockwise)) + geom_line(color="blue", size=1) + theme_bw()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0. ## ℹ Please use `linewidth` instead.
g.psychometric
g.psychometric.2 <- g.psychometric + scale_x_continuous(breaks = seq(-630,630,90), labels = seq(-630,630,90), expand = c(0, 0)) + scale_y_continuous(labels = scales::percent_format(accuracy=1), breaks = seq(0.0, 1.0, 0.2), expand = c(0, 0)) + coord_cartesian(ylim=c(0,1), clip="off") g.psychometric.2
g.psychometric.3 <- g.psychometric.2 + xlab("Duration + Direction") + ylab("% Clockwise") g.psychometric.3
g.psychometric.4 <- g.psychometric.3 + geom_smooth(aes(y=Clockwise), method=glm, method.args=list(family="binomial"), level=0.95, color="black", size=0.5) g.psychometric.4
## `geom_smooth()` using formula = 'y ~ x'
g.psychometric.5 <- g.psychometric.4 + geom_hline(yintercept=0.5, color="red") + geom_hline(yintercept=0.25, color="red", size=0.2) + geom_hline(yintercept=0.75, color="red", size=0.2) + annotate(geom="text", x = 620, y = 0.4, label="chance level", hjust=1, vjust=0) g.psychometric.5
## `geom_smooth()` using formula = 'y ~ x'