Behavioral results. Let’s look at how participants performed in the task
First, let’s load the data. These data import functions are defined in dataProcessing.R
where I convert the raw data into a usuable dataframe.
Training Phase
Before the main bandit task, participants performed a training phase where they were required to match a target stimuli until a learning criterion was met (at least 32 trials and a run of 9 out of 10 correct). The task used the same stimuli and inputs as the main bandit task, and was used to familiarize participants and have them achieve a similar level of fluency with both spatial and conceptual domains. Let’s first look at some of the results.
Participants had a lower accuracy on the conceptual training (\(t(128)=7.5\), \(p<.001\), \(d=0.8\), \(BF>100\)) and required more trials to reach the training criterion (\(Z=-4.1\), \(p<.001\), \(r=-.40\), \(BF>100\)), which is to be expected since it is intuitively more difficult.
#Overall Correct choices
dat <- ddply(df, ~id+context, plyr::summarize, trajCorrect = mean(trajCorrect), trajAvgSteps = mean(trajAvgSteps))
trajp1a<- ggplot(dat, aes(x = context, y = trajCorrect, color = context))+
geom_line(aes(group=id),color = 'black', alpha = 0.1)+
geom_boxplot(outlier.shape = NA, fill=NA, color = 'black', width = 0.1)+
geom_quasirandom(alpha = 0.7)+
stat_summary(fun.y=mean, geom='point', shape=23, color = 'black', size =3)+
ylab('P(correct)')+
xlab('')+
scale_color_brewer(palette = "Dark2", name = "") +
theme(legend.position='none')
trajp1a

#Trials until complete
dat <- ddply(trajDF, ~id+context, plyr::summarize, trajTrials = max(trial))
#ttestPretty(subset(dat, context == 'Spatial')$trajTrials, subset(dat, context == 'Conceptual')$trajTrials, paired=T) #Data doesn't look very normal
#ranktestPretty(subset(dat, context == 'Spatial')$trajTrials, subset(dat, context == 'Conceptual')$trajTrials, paired=T) #Is there a meaninful difference in the number of trials needed to finish the training phase? #Note sometimes Bayes Factors display as NA when they are very large
trajComplete <- ggplot(dat, aes(x = context, y = trajTrials, color = context))+
geom_line(aes(group=id),color = 'black', alpha = 0.1)+
geom_quasirandom( alpha = 0.6)+
geom_boxplot(color='black', fill= NA, width =.2, outlier.shape = NA)+
stat_summary(fun.y = mean, geom='point', shape = 23,size=3, color = 'black')+
geom_hline(yintercept = 32, linetype = 'dashed')+
scale_color_brewer(palette = "Dark2", name = "") +
ylab('Trials Until Complete')+
xlab('')+
#coord_cartesian(ylim=c(30,128), )+
scale_y_continuous(breaks=c(32,64, 96, 128), limits = c(32,128))+
theme(legend.position = 'none')
trajComplete

We can also look at the magnitude of errors vs. the frequenvy of occurence, which gives us a nice Shepard (1987) style generalization gradient.
#Magnitude of error vs. frequency
gradientDF <- ddply(trajDF, ~context+manhattanError,plyr::summarize, counts = table(manhattanError))
#Normalize into a percentage
gradientDF[gradientDF$context == 'Conceptual','P'] <- gradientDF[gradientDF$context == 'Conceptual','counts']/sum(gradientDF[gradientDF$context == 'Conceptual','counts'])
gradientDF[gradientDF$context == 'Spatial','P'] <- gradientDF[gradientDF$context == 'Spatial','counts']/sum(gradientDF[gradientDF$context == 'Spatial','counts'])
pError<- ggplot(gradientDF, aes(x = manhattanError, y = P, color = context, shape = context))+
geom_line()+
geom_point()+
coord_cartesian(xlim=c(0,5))+
ylab('P(error)')+
xlab('Magnitude of Error (Manhattan distance)')+
scale_color_brewer(palette = "Dark2", name = "Task")+
scale_shape_manual( values = c(16,15),name = "Task")+
theme(legend.position=c(1,1), legend.justification = c(1,1))
pError

How did accuracy differ for the different options?
spatialCounts <- ddply(subset(trajDF, context == 'Spatial'), .(x, y), plyr::summarize, correct = sum(trajCorrect)/length(trajCorrect))
names(spatialCounts) <- c("X", "Y", "Accuracy")
spatialCounts$task <- 'Spatial'
conceptCounts <- ddply(subset(df, context == 'Conceptual'), .(x, y), plyr::summarize, correct = sum(trajCorrect)/length(trajCorrect))
names(conceptCounts) <- c("X", "Y", "Accuracy")
conceptCounts$task <- 'Conceptual'
trainingAccuracyDF <-rbind(spatialCounts, conceptCounts)
trajheatmap<- ggplot(trainingAccuracyDF, aes(x=X, y = Y, fill=Accuracy)) +
geom_tile()+
scale_fill_distiller(palette = "Spectral", name = 'P(correct)',limits = c(0,1),labels = scales::percent_format(accuracy = 1))+
theme_classic() +
facet_grid(~task)+
coord_equal() +
theme(strip.background=element_blank(), legend.key=element_rect(color=NA), axis.line=element_blank(),axis.text.x=element_blank(),
axis.text.y=element_blank(),axis.ticks=element_blank(), panel.background=element_blank(),panel.border=element_blank(),panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),plot.background=element_blank())+
labs(x = '', y = '')
trajheatmap

Performance
Now let’s finally look at performance on the bandit task.
#Construct plotting dataframes
meanDF <- ddply(df, .(id, context, environment, contextOrder), plyr::summarize, meanScore = mean(z))
conceptualScores <- subset(meanDF, context == "Conceptual")
spatialScores <- subset(meanDF, context == "Spatial")
mergedDF <- merge(conceptualScores, spatialScores, by ="id")
joinedDF <- rbind(conceptualScores, spatialScores)
bothTasksDF <- ddply(joinedDF, .(id, environment, contextOrder), plyr::summarize, meanScore = mean(meanScore))
randomDF <- read.csv("../rationalModels/random.csv") #load random model
joinedDF$contextOrder <- factor(joinedDF$contextOrder)
levels(joinedDF$contextOrder)<- c("Spatial First", "Conceptual First")
Overall, participants performed far better than chance in both Conceptual (\(t(128)=24.6\), \(p<.001\), \(d=2.2\), \(BF>100\)) and Spatial tasks (\(t(128)=34.6\), \(p<.001\), \(d=3.0\), \(BF>100\)). Let’s now do a two way mixed ANOVA to see how our context x environment design influenced performance.
#Two way mixed ANOVA: context is within, environment is between
dd<-ddply(rbind(conceptualScores, spatialScores), ~id+context+environment, summarise, m=mean(meanScore))
dd$id <- factor(dd$id)
res.aov <- aov(m ~ environment*context + Error(id/context), data=dd)
anova_stats(res.aov)
# Now let's replicate via Robust ANOVA
bwtrim(m ~ environment*context, id = id, data=dd, tr = 0.2) #using 20% trimmed means
## Call:
## bwtrim(formula = m ~ environment * context, id = id, data = dd,
## tr = 0.2)
##
## value df1 df2 p.value
## environment 22.0075 1 71.7571 0.0000
## context 36.1917 1 71.4948 0.0000
## environment:context 1.3662 1 71.4948 0.2463
sppba(m ~ environment*context, id = id, data=dd) #Main fixed effect
## Call:
## sppba(formula = m ~ environment * context, id = id, data = dd)
##
## Test statistics:
## [1] -6.345
##
## Test whether the corrresponding population parameters are the same:
## p-value: 0.002
sppbb(m ~ environment*context, id = id, data=dd) #Within-subect effect
## Call:
## sppbb(formula = m ~ environment * context, id = id, data = dd)
##
## Test statistics:
## [1] -4.828
##
## Test whether the corrresponding population parameters are the same:
## p-value: 0
#Now compute Bayes factor
invisible(bf <- anovaBF(m ~ environment*context+id, data=dd, whichRandom="id"))
bf
## Bayes factor analysis
## --------------
## [1] context + id : 453922.7 ±0.88%
## [2] environment + id : 12.66632 ±0.89%
## [3] context + environment + id : 6246489 ±2.69%
## [4] context + environment + context:environment + id : 2391835 ±2.84%
##
## Against denominator:
## m ~ id
## ---
## Bayes factor type: BFlinearModel, JZS
Looks like strong evidence for differences.
#Mean performance plots
p1a <- ggplot(joinedDF, aes(x = interaction(context, environment), y = meanScore, color = context))+
geom_boxplot(fill=NA, color = 'black', outlier.shape=NA, width = 0.2)+
geom_line(aes(group=id), color = 'black', alpha = 0.1)+
geom_quasirandom(alpha = 0.7)+
geom_hline(yintercept = mean(randomDF$meanReward), color = 'black', linetype = 'dashed')+
stat_summary(fun.y = mean, shape = 23, geom='point', size = 3, fill = NA, color ='black')+
#facet_grid(~environment)+
ylab("Mean Reward \u00B1SE")+
xlab('')+
annotate('text', x = 1.5, y = 110, label='Rough')+
annotate('text', x = 3.5, y = 110, label='Smooth')+
scale_x_discrete(labels=c("Conceptual", "Spatial", "Conceptual", "Spatial"))+
scale_color_brewer(palette = "Dark2", name = "") +
scale_fill_brewer(palette = "Dark2", name = "") +
geom_signif(y_position = c(95, 97, 103), xmin = c(1,3, 1.5), xmax = c(2,4, 3.5), annotation = c("BF>100","BF = 14", "BF = 12"), color = 'black')+
#geom_signif(comparisons=list( c("Conceptual", "Spatial"), c("Conceptual", "Spatial")), annotations=c("",""), col="black")+ #
theme(text = element_text(size=12, family="sans"),strip.background=element_blank(), legend.key=element_rect(color=NA), legend.position='None')
p1a

Participants earned higher rewards in the Spatial than the Conceptual task (one sample \(t\)-test: \(t(128)=-6.0\), \(p<.001\), \(d=0.5\), \(BF>100\)) and better in smooth than in rough environments (two sample \(t\)-test: \(t(127)=3.1\), \(p=.003\), \(d=0.5\), \(BF=12\)). We also find correlated performance between tasks (\(r=.53\), \(p<.001\), \(BF>100\)).
#statistical tests
ttestPretty(conceptualScores$meanScore, mu=mean(randomDF$meanReward)) #compared to chance
ttestPretty(spatialScores$meanScore, mu=mean(randomDF$meanReward))
#T-tests
ttestPretty(conceptualScores$meanScore, spatialScores$meanScore, var.equal = TRUE, paired=T) #context
ttestPretty(subset(bothTasksDF, environment=="Smooth")$meanScore, subset(bothTasksDF, environment=="Rough")$meanScore, var.equal = TRUE) #environment
corTestPretty(conceptualScores$meanScore, spatialScores$meanScore) #correlated performance
ttestPretty(subset(conceptualScores, environment=='Smooth')$meanScore, subset(spatialScores, environment=="Smooth")$meanScore, var.equal = TRUE, paired=T) #context
ttestPretty(subset(conceptualScores, environment=='Rough')$meanScore, subset(spatialScores, environment=="Rough")$meanScore, var.equal = TRUE, paired=T, maxBF = Inf) #context
Correlation between tasks:
#corTestPretty(conceptualScores$meanScore, spatialScores$meanScore)
p1b <- ggplot(mergedDF, aes(x=meanScore.x, y = meanScore.y, color = environment.x, shape= environment.x)) +
geom_abline(slope=1, intercept=0, linetype='dashed') +
geom_point(alpha=0.9, size = 2.5)+
ylab('Spatial Reward')+
xlab('Conceptual Reward')+
xlim(40,100)+
ylim(40,100) +
annotate("text", x = 50, y = 95, label = "'r = .53' *','* ~~'BF > 100'", parse=TRUE, size=5, family="sans") +
theme(legend.position=c(1,0),legend.justification=c(1,0), strip.background=element_blank(), legend.key=element_rect(color=NA), text = element_text(size=12, family="sans"))+
#scale_color_rickandmorty(name="Environment", palette = "schwifty")+
scale_color_manual(name="Environment", values=c("#24325FFF", '#B7E4F9FF'))+
scale_shape_manual(name="Environment", values= c(17,16))
p1b

Order effect
We find an interesting one-directional order effect. Participants performed better on the conceptual task once they had experience with the spatial task (\(t(127)=2.8\), \(p=.006\), \(d=0.5\), \(BF=6.4\). This was not the case for the spatial task, where performance did not differ if performed first or second (\(t(127)=-1.7\), \(p=.096\), \(d=0.3\), \(BF=.67\)). Thus, experience with spatial search boosted performance on conceptual search, but not vice versa.
df$FirstTask <- ifelse(df$contextOrder==0, 'Spatial First', 'Conceptual First') #which task was performed first?
df$taskOrder <- ifelse((df$FirstTask=="Spatial First" & df$context == "Spatial") |(df$FirstTask=="Conceptual First" & df$context == "Conceptual"), 1, 2 )
df$FirstTask <- factor(df$FirstTask, levels = c("Conceptual First","Spatial First"))
orderDF <- ddply(df, .(id, context, environment, FirstTask, taskOrder), plyr::summarize, meanScore = mean(z))
pOrder <- ggplot(orderDF, aes(x = interaction(context,FirstTask), y = meanScore, fill = context, color = context))+
geom_boxplot( color = 'black', position = position_dodge(width = 1), outlier.shape=NA, width = 0.2, alpha =0)+
geom_quasirandom(alpha = 0.7, dodge.width = 1)+
#geom_line(aes(group=id), color = 'black', alpha = 0.1)+
stat_summary(fun.y = mean, shape = 23, geom='point', size = 3, position = position_dodge(width = 1), color ='black', fill = NA)+
#stat_summary(fun.y=mean, geom='bar', position = position_dodge(width = 1), color='black')+
#stat_summary(fun.data = mean_cl_boot, geom='errorbar', color='black', position = position_dodge(width = 1), width = .2)+
scale_fill_brewer(palette = 'Dark2', name= 'Task')+
scale_color_brewer(palette = 'Dark2', name= 'Task')+
annotate('text', x = 1.5, y = 118, label='Conceptual First')+
annotate('text', x = 3.5, y = 118, label='Spatial First')+
scale_x_discrete(labels=c("Conceptual", "Spatial", "Conceptual", "Spatial"))+
geom_hline(yintercept = mean(randomDF$meanReward), color = 'black', linetype = 'dashed')+
#coord_cartesian(ylim =c(40,120))+
xlab('')+
ylab('Mean Reward ±SE')+
geom_signif(y_position = c(100, 108), xmin = c(1,2), xmax = c(3,4), annotation = c("BF=6.4","BF=0.67"), color = 'black')+
#theme(legend.position=c(0.05,1), legend.justification = c(0,1), legend.direction='horizontal')
theme(legend.position='none')
pOrder

Patterns of Search
Locality of sampling
Let’s look at the patterns in search behavior. First, let’s look at the distance between successive choices.
#compare to random
sampleSize <- 400000
randomDistanceDF <- data.frame(x=sample(x = seq(0:7), size = sampleSize, replace=TRUE), y=sample(x = seq(0:7), size = sampleSize, replace=TRUE), environment=c(rep("Rough",sampleSize/2), rep("Smooth", sampleSize/2)), context = rep(c('Conceptual', 'Spatial'), sampleSize/2))
randomDistanceDF <- randomDistanceDF %>%
mutate(distance = abs((x - lag(x,default = NA)) + abs(y - lag(y,default = NA)) ))
#Add classification of choices as stay, near, or far decisions
localityDF <-ddply(df, ~id+trial+context, plyr::summarize, avgDistance=mean(distance, na.rm=T))
localityDF$choiceType <-ifelse(localityDF$avgDistance==0, "Stay", ifelse(localityDF$avgDistance==1, "Near", "Far"))
localityDF$choiceType <- factor(localityDF$choiceType)
choiceProp <- ddply(na.omit(localityDF),.(id,context), function(x) with(x,data.frame(table(choiceType)/length(choiceType),2)))
choiceProp$choiceType <- factor(choiceProp$choiceType, levels=c("Stay", "Near", "Far"))
Let’s first do an ANOVA here
#Two way mixed ANOVA: context is within, environment is between
dd <-ddply(df, ~id+environment+context, plyr::summarize, avgDistance=mean(distance, na.rm=T))
dd$id <- factor(dd$id)
res.aov <- aov(avgDistance ~ environment*context + Error(id/context), data=dd)
anova_stats(res.aov)
# Now let's replicate via Robust ANOVA
bwtrim(avgDistance ~ environment*context, id = id, data=dd, tr = 0.2) #using 20% trimmed means
## Call:
## bwtrim(formula = avgDistance ~ environment * context, id = id,
## data = dd, tr = 0.2)
##
## value df1 df2 p.value
## environment 0.5550 1 70.3913 0.4588
## context 4.6935 1 68.9977 0.0337
## environment:context 0.8983 1 68.9977 0.3465
sppba(avgDistance ~ environment*context, id = id, data=dd) #Main fixed effect
## Call:
## sppba(formula = avgDistance ~ environment * context, id = id,
## data = dd)
##
## Test statistics:
## [1] 0.1143
##
## Test whether the corrresponding population parameters are the same:
## p-value: 0.408
sppbb(avgDistance ~ environment*context, id = id, data=dd) #Within-subect effect
## Call:
## sppbb(formula = avgDistance ~ environment * context, id = id,
## data = dd)
##
## Test statistics:
## [1] 0.211
##
## Test whether the corrresponding population parameters are the same:
## p-value: 0.064
#Now compute Bayes factor
bf = anovaBF(avgDistance ~ environment*context+id, data=dd, whichRandom="id")
bf
## Bayes factor analysis
## --------------
## [1] environment + id : 0.2475671 ±1.5%
## [2] context + id : 67.10826 ±0.98%
## [3] environment + context + id : 16.90135 ±1.41%
## [4] environment + context + environment:context + id : 5.704376 ±1.75%
##
## Against denominator:
## avgDistance ~ id
## ---
## Bayes factor type: BFlinearModel, JZS
Let’s plot the results
contextLabels <- c('Conceptual' = 'Conceptual\nTask', 'Spatial' = 'Spatial\nTask', "Rough"="Rough", "Smooth"="Smooth")
p4alt <- ggplot(na.omit(df), aes(x=distance, fill = context, color = context)) +
geom_histogram(aes(y = ..density..*20), position = 'dodge', binwidth=1, color='black')+
stat_density(data = randomDistanceDF, aes(y = ..density..*20), geom="line",color='black', size = .8, bw = 1) +
#geom_density(fill=NA, size = 0.7) +
scale_fill_manual(values=c("#1B9E77", "#D95F02", "Black"), name="") +
scale_color_manual(values = c("#1B9E77", "#D95F02", "Black"), name="") +
ylab("Choices Per Round") +
xlab("Distance Between Choices") +
#xlim(0,6)+
facet_grid(context~environment, labeller = as_labeller(contextLabels))+
scale_x_continuous(breaks = scales::pretty_breaks(n = 5))+
scale_y_continuous(breaks = seq(0, 10, by = 2))+
#ggtitle("Locality of Sampling") +
theme(legend.position='none', strip.background=element_blank(), legend.key=element_rect(color=NA))
p4alt
## Warning: Removed 1 rows containing non-finite values (stat_density).

Let’s try a different version where the differences between task are more salient
contextLabels <- c('Conceptual' = 'Conceptual\nTask', 'Spatial' = 'Spatial\nTask', "Rough"="Rough", "Smooth"="Smooth")
anndf<-data.frame(distance = NA,context = NA,environment = factor("Smooth", levels = c("Rough", "Smooth")), text = 'Random', color = 'black') #for annotation
p4alt <- ggplot(na.omit(df), aes(x=distance, fill = context, color = context)) +
geom_histogram(aes(y = ..density..*20), position = 'identity', binwidth=1, alpha = 0.4)+
stat_density(data = subset(randomDistanceDF, context == 'Conceptual'), aes(y = ..density..*20), geom="line",color='black', size = .8, bw = 1) +
#geom_density(fill=NA, size = 0.7) +
scale_fill_manual(values=c("#1B9E77", "#D95F02", "Black"), name="Task") +
scale_color_manual(values = c("#1B9E77", "#D95F02", "Black"), name="Task") +
ylab("Choices Per Round") +
xlab("Distance Between Choices") +
#xlim(0,6)+
facet_grid(~environment, labeller = as_labeller(contextLabels))+
scale_x_continuous(breaks = scales::pretty_breaks(n = 5))+
scale_y_continuous(breaks = seq(0, 10, by = 2))+
#ggtitle("Locality of Sampling") +
#geom_text(data = anndf, x = 10.5, y = 5, label = "Random", color = 'black', size = 3.5)+
#geom_segment(data = anndf, x = 9, xend = 10, y = 5, yend = 5,colour = "black", size = 1.2)+
theme(legend.position=c(1,1), legend.justification = c(1,1), strip.background=element_blank(), legend.key=element_rect(color=NA))
p4alt
## Warning: Removed 1 rows containing non-finite values (stat_density).

Participants searched over larger distances in the conceptual task than the spatial task (\(t(128)=-3.7\), \(p<.001\), \(d=0.3\), \(BF=59\)). There were no differences across environments (\(t(127)=-0.3\), \(p=.727\), \(d=0.06\), \(BF=.20\)). Note that each trial began on a random selected stimuli. So searching close to the previous selection is not due to a lack of effort. (\(t(128)=-16.2\), \(p<.001\), \(d=1.4\), \(BF>100\))
#Statistical tests reported above
localityDF <- ddply(df, ~id+context,plyr::summarize, avgDistance=mean(distance, na.rm=T))
ttestPretty(subset(localityDF, context == 'Spatial')$avgDistance, subset(localityDF, context == 'Conceptual')$avgDistance, var.equal=T, paired=T)
localityDF <- ddply(df, ~id+environment,plyr::summarize, avgDistance=mean(distance, na.rm=T))
ttestPretty(subset(localityDF, environment == 'Smooth')$avgDistance, subset(localityDF, environment == 'Rough')$avgDistance, var.equal=T)
localityDF <- ddply( df, ~id, plyr::summarize, avgDistance=mean(distance, na.rm=T))
ttestPretty(na.omit(localityDF$avgDistance), mu = mean(randomDistanceDF$distance, na.rm=T))
Now let’s classify these choices as either Stay
(distance = 0), Near
(distance = 1), or Far
(distance >1).
#choice prop
localityDF <-ddply(df, ~id+trial+context, plyr::summarize, avgDistance=mean(distance, na.rm=T))
localityDF$distance <-localityDF$avgDistance
randomDistanceDF$id <- 0
randomDistanceDF$context <- 'Random'
localityDF <- rbind(localityDF[,c( "context", "distance", 'id')], randomDistanceDF[,c( "context", "distance", 'id')])
localityDF$choiceType <-ifelse(localityDF$distance==0, "Stay", ifelse(localityDF$distance==1, "Near", "Far"))
localityDF$choiceType <- factor(localityDF$choiceType)
choiceProp <- ddply(na.omit(localityDF),.(id,context),
function(x) with(x,
data.frame(table(choiceType)/length(choiceType),2)))
choiceProp$choiceType <- factor(choiceProp$choiceType, levels=c("Stay", "Near", "Far"))
p4 <- ggplot(na.omit(choiceProp), aes(x=choiceType, y = Freq*20, fill=context, color = context))+
stat_summary(fun.y = mean,geom='bar', position='dodge', color='black') +
stat_summary(fun.data = mean_se, geom = "errorbar", position = position_dodge(width = 0.90), width = 0.2, color='black' ) +
#scale_y_continuous(labels=percent)+
scale_fill_manual(values=c("#1B9E77", "#D95F02", "Black"), name="") +
scale_color_manual(values=c("#1B9E77", "#D95F02", "Black"), name="") +
#scale_fill_rickandmorty()+
ylab('Choices Per Round ±SE')+
xlab("Choice Type")+
#facet_grid(~contextOrder)+
theme(legend.position= c(0.05, 1), legend.justification=c(0,1), strip.background=element_blank(), legend.key=element_rect(color=NA))
p4
## Warning: Removed 3 rows containing missing values (geom_errorbar).

This seems to paint the same picture as the distance histograms before. Participants made more stay choices in the spatial task (\(t(128)=-2.7\), \(p=.007\), \(d=0.3\), \(BF=3.4\)) and more far choices in the conceptual task (\(t(128)=2.8\), \(p=.006\), \(d=0.3\), \(BF=4.1\)). There were no differences in near choices (\(t(128)=-0.4\), \(p=.688\), \(d=0.05\), \(BF=.11\)).
Search Trajectories
We have this really rich data about how participants navigated the search space. Let’s first look at the number of steps participants took before making a selection
df$steps <- sapply(df$trajectories, function(i) length(fromJSON(as.character(i))))
trajDF <- df%>% group_by(id,context) %>% dplyr::summarize(avgSteps=mean(steps, na.rm=T))
trajContextDF <- df%>% group_by(id,context) %>% dplyr::summarize(avgSteps=mean(steps, na.rm=T))
traEnvjDF <- df%>% group_by(id,environment) %>% dplyr::summarize(avgSteps=mean(steps, na.rm=T))
levels(df$contextOrder)<- c("Spatial First", "Conceptual First")
#comparing context
ggplot(df, aes(x = context, y = steps, fill = context))+
stat_summary(fun.y = mean, geom = "bar", position = "dodge", color='black') +
stat_summary(fun.data = mean_se, geom = "errorbar", position = position_dodge(width = 0.90), width = 0.2, color = 'black' ) +
theme(legend.position='right', strip.background=element_blank(), legend.key=element_rect(color=NA), legend.background=element_blank(), text = element_text(size=16, family="sans"))+
#coord_cartesian(ylim=c(0,8)) +
xlab("")+
scale_fill_brewer(palette = "Dark2", name="") +
facet_grid(~contextOrder)+
ylab("Mean Number of Steps \u00B1SE")

medians <- df%>% group_by(environment,context) %>% dplyr::summarize(steps=mean(steps, na.rm=T))
contextLabels <- c('Conceptual' = 'Conceptual\nTask', 'Spatial' = 'Spatial\nTask', "Rough"="Rough", "Smooth"="Smooth")
trajectoryplot <- ggplot(na.omit(df), aes(x=steps, fill = context, color = context)) +
geom_histogram(aes(y = ..density..*20), position = 'dodge', binwidth=1, color='black')+
#stat_density(data = as.data.frame(randomDF), aes(value),geom="line",color='black', size = .8, linetype='dashed') +
#geom_density(fill=NA, size = 0.7) +
scale_fill_manual(values=c("#1B9E77", "#D95F02", "Black"), name="") +
scale_color_manual(values = c("#1B9E77", "#D95F02", "Black"), name="") +
geom_vline(data = medians, aes(xintercept = steps), linetype = 'dashed', size =.7)+
ylab("Choices Per Round") +
xlab("Trajectory Length") +
#xlim(0,6)+
facet_grid(context~environment, labeller = as_labeller(contextLabels))+
scale_x_continuous(breaks = scales::pretty_breaks(n = 5), limits = c(0,20))+
scale_y_continuous(breaks = seq(0, 3, by = 1))+
#ggtitle("Locality of Sampling") +
theme(legend.position='none', strip.background=element_blank(), legend.key=element_rect(color=NA))
trajectoryplot
## Warning: Removed 843 rows containing non-finite values (stat_bin).
## Warning: Removed 8 rows containing missing values (geom_bar).
Participants had longer trajectories in the contextual task (\(t(128)=-10.7\), \(p<.001\), \(d=1.0\), \(BF>100\)), although there were no differences across environments (\(t(127)=1.3\), \(p=.213\), \(d=0.2\), \(BF=.38\)).
ttestPretty(subset(trajContextDF, context == 'Spatial')$avgSteps, subset(trajContextDF, context == 'Conceptual')$avgSteps, var.equal=T, paired=T)
## [1] "$t(128)=-10.7$, $p<.001$, $d=1.0$, $BF>100$"
ttestPretty(subset(traEnvjDF, environment == 'Smooth')$avgSteps, subset(traEnvjDF, environment == 'Rough')$avgSteps, var.equal=T)
## [1] "$t(127)=1.3$, $p=.213$, $d=0.2$, $BF=.38$"
Attentional Biases
Let’s also look at the trajectories decomposed into the vertical/stripe frequency dimension vs. horizontal/tilt dimension. The figure below shows the proportion of participant inputs corresponding to each dimension, where we see an higher proportion of inputs given to the vertical/stripe frequency dimension in both tasks, relative to the horizontal/tilt dimension.
#compile total button pressess for each dimension
df$trajLeft <- sapply(df$trajectories, function(i) sum(fromJSON(as.character(i)) == 37))
df$trajUp <- sapply(df$trajectories, function(i) sum(fromJSON(as.character(i)) == 38))
df$trajRight <- sapply(df$trajectories, function(i) sum(fromJSON(as.character(i)) == 39))
df$trajDown <- sapply(df$trajectories, function(i) sum(fromJSON(as.character(i)) == 40))
trajDirDF <- ddply(df, ~id+context+environment, plyr::summarize, horizontal = (sum(trajLeft)+sum(trajRight))/sum(steps), vertical = (sum(trajUp)+sum(trajDown))/sum(steps)) #compute average per participant per trial
trajDirDF <- trajDirDF %>% gather(direction, p, horizontal:vertical, factor_key=TRUE) #wide to long
trajDirDF$direction <- factor(trajDirDF$direction)
levels(trajDirDF$direction)<- c('Horizontal/\nRotation', 'Vertical/\nStripes')
trajDirDF$id <- factor(trajDirDF$id)
#plot
inputdir <- ggplot(trajDirDF, aes(x = direction, y = p, fill = context))+
stat_summary(fun.y = mean, geom = "bar", position = "dodge", color='black') +
stat_summary(fun.data = mean_cl_boot, geom = "errorbar", position = position_dodge(width = 0.90), width = 0.2, color = 'black' ) +
theme(legend.position='right', strip.background=element_blank(), legend.key=element_rect(color=NA), legend.background=element_blank())+
#coord_cartesian(ylim=c(0,8)) +
xlab("Input")+
scale_fill_brewer(palette = "Dark2", name="Task") +
facet_grid(~environment)+
ylab("Proportion of Inputs \u00B1 95% CI") +
theme(legend.position = 'top')
inputdir
We formally define the difference in attention \(\Delta_{\textrm{dim}} = P(\textrm{vertical/stripe frequency}) - P(\textrm{horizontal/tilt})\), where positive values indicate a stronger bias towards the vertical/stripe frequency dimension. Running a two-way mixed ANOVA reveals that attentional bias was influenced by the interaction of task order and task (\(F(1,127) = 8.1\), \(p=.005\), \(\eta^2=.02\), \(BF>100\)).
#add task order into trajDifDF
trajDirDF$FirstTask <- df[match(trajDirDF$id, df$id),"FirstTask"]
diffDF <- ddply(trajDirDF, ~id+environment+context+FirstTask, plyr::summarize, pdiff = diff(p)) #calculate the difference in proportion of key presses over contexts (positive is more vertical, while negative is more horizontal )
#Anova
res.aov <- aov(p ~ context*FirstTask + Error(id/context), data = subset(trajDirDF, direction =='Horizontal/\nRotation'))
anova_stats(res.aov)
#Replication with robust ANOVA
bwtrim(p ~ context*FirstTask, id = id, data=subset(trajDirDF, direction =='Horizontal/\nRotation'), tr = 0.2) #using 20% trimmed means
## Call:
## bwtrim(formula = p ~ context * FirstTask, id = id, data = subset(trajDirDF,
## direction == "Horizontal/\nRotation"), tr = 0.2)
##
## value df1 df2 p.value
## FirstTask 7.1050 1 77.9543 0.0093
## context 29.1966 1 78.6680 0.0000
## FirstTask:context 8.3419 1 78.6680 0.0050
#Bayes factor of ANOVA
invisible(bf <- anovaBF(p ~ context*FirstTask , data=subset(trajDirDF, direction =='Horizontal/\nRotation'), whichRandom="id"))
bf
## Bayes factor analysis
## --------------
## [1] context : 1973.07 ±0%
## [2] FirstTask : 25.80392 ±0%
## [3] context + FirstTask : 74055.3 ±1.28%
## [4] context + FirstTask + context:FirstTask : 248556.1 ±2.33%
##
## Against denominator:
## Intercept only
## ---
## Bayes factor type: BFlinearModel, JZS
pAttentionTaskOrder <- ggplot(diffDF, aes(x = context, y = pdiff, fill = context))+
stat_summary(fun.y = mean, geom='bar', color='black')+
stat_summary(fun.data = mean_cl_boot, geom='errorbar', color = 'black', width = 0.2)+
facet_grid(~FirstTask)+
theme(legend.position='none', strip.background=element_blank(), legend.key=element_rect(color=NA), legend.background=element_blank())+
#coord_cartesian(ylim=c(0,8)) +
xlab("")+
ylab(expression( Delta[dim]))+
scale_fill_brewer(palette = "Dark2", name="Task")
pAttentionTaskOrder

While participants were more biased towards the vertical/stripe frequency dimension in the conceptual task when the conceptual task was performed first (\(t(66)=-6.0\), \(p<.001\), \(d=0.7\), \(BF>100\)), these differences disappeared when the spatial task was performed first (\(t(61)=-1.6\), \(p=.118\), \(d=0.2\), \(BF=.45\)).
ttestPretty(subset(diffDF, FirstTask == 'Conceptual First' & context == 'Spatial')$pdiff- subset(diffDF, FirstTask == 'Conceptual First' & context == 'Conceptual')$pdiff, mu=0)
ttestPretty(subset(diffDF, FirstTask == 'Spatial First' & context == 'Spatial')$pdiff- subset(diffDF, FirstTask == 'Spatial First' & context == 'Conceptual')$pdiff, mu=0)
Does unequal preference for one of the feature dimensions influence performance? The results are presented in the figure below, where each pair of dots is a single participant, and the connecting line shows the change in score and change in attentional bias \(\Delta_{\textrm{dim}}\) across tasks.
perfDF<- ddply(df, ~id+environment+context+FirstTask, plyr::summarize, score = mean(z))
diffDF$score <- perfDF$score #task specific score
pScoreAttention <- ggplot(diffDF, aes(x = pdiff, y = score, color =context))+
geom_point(alpha = 0.8) +
geom_line(aes(group=id), color = 'black', alpha = 0.1)+
facet_grid(~FirstTask)+
scale_color_brewer(palette = "Dark2", name="Task") +
#geom_smooth(method = 'lm')+
xlab(expression( Delta[dim]))+
ylab('Mean Score')+
theme(legend.position='top', strip.background=element_blank(), legend.key=element_rect(color=NA))
pScoreAttention

We find a negative relationship between score and attention for the conceptual task only in the conceptual first order (\(r_{\tau}=-.31\), \(p<.001\), \(BF>100\)), but not in the spatial first order (\(r_{\tau}=-.07\), \(p=.392\), \(BF=.24\)). There were no relationships between score and attention in the spatial task in either order (spatial first: \(r_{\tau}=.03\), \(p=.738\), \(BF=.17\); conceptual first: \(r_{\tau}=-.03\), \(p=.750\), \(BF=.17\)). Thus, strong attentional biases predicted lower score, but only in the conceptual first task order.
#spatial first
corTestPretty(subset(diffDF, FirstTask == 'Spatial First' & context == 'Spatial')$pdiff, subset(diffDF, FirstTask == 'Spatial First' & context == 'Spatial')$score, method = 'kendall')
corTestPretty(subset(diffDF, FirstTask == 'Spatial First' & context == 'Conceptual')$pdiff, subset(diffDF, FirstTask == 'Spatial First' & context == 'Conceptual')$score, method = 'kendall')
corTestPretty(subset(diffDF, FirstTask == 'Conceptual First' & context == 'Spatial')$pdiff, subset(diffDF, FirstTask == 'Conceptual First' & context == 'Spatial')$score, method = 'kendall')
corTestPretty(subset(diffDF, FirstTask == 'Conceptual First' & context == 'Conceptual')$pdiff, subset(diffDF, FirstTask == 'Conceptual First' & context == 'Conceptual')$score, method = 'kendall')
We now look at differences in attentional biases between tasks. We define \(\Delta_{\textrm{task}} = \Delta_{\textrm{dim}}^{\textrm{Spatial}} - \Delta_{\textrm{dim}}^{\textrm{Conceptual}}\). This difference of differences is a bit more difficult to interpret, but recall that \(\Delta_{\textrm{dim}}\) tended to be positive, since participants attended more towards the vertical/stripe frequency dimension in both tasks. Thus, \(\Delta_{\textrm{task}}\) is positive if participants were more biased towards the vertical/stripe frequency dimension in the spatial task. Vice versa, \(\Delta_{\textrm{task}}\) is negative if participants were more biased towards the vertical/stripe frequency dimension in the conceptual task. Let’s now see the relationship between \(\Delta_{\textrm{task}}\) and change in score:
#Compute delta task
diffByContext <- ddply(diffDF, ~id+environment+FirstTask, plyr::summarize, pdiff = diff(pdiff)) #summarize out context; pdiff spatial - pdiff conceptual
#Add score difference
taskperfDF <- ddply(df, ~id+environment+context+FirstTask, plyr::summarize, score = mean(z)) #mean score for each task
taskPerDiffDF <- ddply(taskperfDF, ~id+environment, plyr::summarize, scoreDiff = diff(score)) #spatial score - conceptual score
diffByContext$scoreDiff <- taskPerDiffDF$scoreDiff #add to dataframe
pScoreDiff <- ggplot(diffByContext, aes(x = pdiff, y = scoreDiff, color = FirstTask, fill = FirstTask))+
geom_point(alpha = 0.8) +
#geom_line(aes(group=id), color = 'black', alpha = 0.1)+
#facet_grid(~FirstTask)+
scale_color_brewer(palette = "Dark2", name="") +
scale_fill_brewer(palette = "Dark2", name="") +
geom_smooth(method = 'lm')+
xlab(expression( Delta[task]))+
ylab('Spatial Score - Conceptual Score')+
theme(legend.position='top', strip.background=element_blank(), legend.key=element_rect(color=NA))
pScoreDiff
Looking first at participants in the Conceptual First condition, we find an anecdotal relationship between \(\Delta_{\textrm{task}}\) and difference in score (\(r_{\tau}=-.20\), \(p=.019\), \(BF=2.4\)). The directionality of this effect is that participants with a stronger bias towards the vertical/stripe frequency dimension in the conceptual task tended to have a lower score in the conceptual task relative to the spatial task. We find no relationship between \(\Delta_{\textrm{task}}\) and difference in score for the Spatial First condition (\(r_{\tau}=-.04\), \(p=.666\), \(BF=.18\)). An outstanding question is whether this shift in attention is responsible for the transfer effect, or is merely an artifact of participants being more capable of navigating the conceptual domain after prior experience with the spatial tasks. While these analyses provide further clarification about the role of attention, the exact relationship between attention (as measured by input frequency) and generalization is perhaps outside the scope of our current paper.
corTestPretty(subset(diffByContext, FirstTask == 'Conceptual First')$pdiff, subset(diffByContext, FirstTask == 'Conceptual First')$scoreDiff, method = 'kendall')
corTestPretty(subset(diffByContext, FirstTask == 'Spatial First')$pdiff, subset(diffByContext, FirstTask == 'Spatial First')$scoreDiff, method = 'kendall')
Efficiency
We can also compute the efficiency of their trajectories based on \(\text{efficiency} = \frac{\text{Manhattan Distance from start to selection}}{\text{Steps taken}}\)
efficencyDF <- ddply(df, ~id+context, plyr::summarize, efficiency = mean(movement/steps))
efficiencyPlot <- ggplot(efficencyDF, aes(x = context, y = efficiency, color = context, fill = context))+
geom_boxplot(fill=NA, color = 'black', width = .2, outlier.shape = NA)+
geom_quasirandom(alpha = .7)+
stat_summary(fun.y = mean, geom = "point",color = 'black', fill=NA, shape =23, size = 3 ) +
#coord_cartesian(ylim=c(0,2)) +
xlab('')+
scale_fill_brewer(palette = "Dark2", name="") +
scale_color_brewer(palette = "Dark2", name="") +
theme( legend.position='none', strip.background=element_blank(), legend.background=element_blank(), legend.key=element_rect(color=NA))+
ylab("Efficiency \u00B1SE")
efficiencyPlot
Participants are clearly less efficient in the conceptual task than the spatial task ($t(128)=-20.6$, $p<.001$, $d=1.9$, $BF>100$
).
ttestPretty(subset(efficencyDF, context=='Conceptual')$efficiency, subset(efficencyDF, context=='Spatial')$efficiency, paired=T)
Now let’s ask, what factors influence trajectories? Do longer trajectories obtain higher rewards? Yes they do (\(r=.21\), \(p<.001\), \(BF>100\)).
#steps as a function of previous reward
pTrajLengthReward <- ggplot(subset(df,steps<=20), aes(x =steps, y = z, color = context, fill = context))+
#geom_smooth(fill=NA)+
stat_summary(fun.y = mean, geom = "point") +
stat_summary(fun.data = mean_cl_boot, geom = "errorbar") +
coord_cartesian(xlim=c(0,20), ylim=c(0,100)) + #Tukey outlier criterion indicates outliers above 20; min(boxplot.stats(df$steps)$out)
#facet_grid(~environment)+
scale_fill_brewer(palette = "Dark2", name="Task") +
scale_color_brewer(palette = "Dark2", name="Task") +
theme(legend.position=c(1,0.1),legend.justification = c(1,0), strip.background=element_blank(), legend.background=element_blank(), legend.key=element_rect(color=NA))+
ylab("Reward Value ± 95% CI")+
xlab('Trajectory Length')
pTrajLengthReward

We can also look at the entropy of each trajectory (computed over the distribution of directions moved). It seems like participants in the contextual task had higher entropy (consistent with larger step sizes and lower efficiency), and that lower entropy predicts higher reward.
library(entropy)
myent<-function(x){
return(entropy.empirical(table(x)))
}
df$trajEntropy <- sapply(df$trajectories, function(i) myent(fromJSON(as.character(i))))
ggplot(df, aes(x = context, y = trajEntropy, fill = context))+
stat_summary(fun.y = mean,geom='bar', position='dodge', color='black') +
stat_summary(fun.data = mean_se, geom = "errorbar", position = position_dodge(width = 0.90), width = 0.2, color='black' ) +
#scale_y_continuous(labels=percent)+
scale_fill_manual(values=c("#1B9E77", "#D95F02"), name="") +
scale_color_manual(values=c("#1B9E77", "#D95F02"), name="") +
facet_grid(~environment)

ggplot(subset(df, trajEntropy>0), aes(x = trajEntropy, y = z, color = context))+
geom_point(alpha = 0.05)+
geom_smooth(method = 'lm')+
#stat_summary(fun.y = mean, geom = "point") +
#stat_summary(fun.data = mean_cl_boot, geom = "errorbar") +
xlab('Entropy')+ ylab('Reward Value')+
scale_fill_manual(values=c("#1B9E77", "#D95F02"), name="") +
scale_color_manual(values=c("#1B9E77", "#D95F02"), name="") +
facet_grid(~environment)

How were both distance and trajectory length influenced by the previous reward value?
#reward and distance
#corTestPretty(na.omit(df)$distance, na.omit(df)$previousReward)
p5 <- ggplot(na.omit(df), aes(x=distance, y = previousReward, color = context, fill=context)) +
#geom_count(alpha=0.2, show.legend = F, position = position_dodge(width=0.1))+
#scale_size_area(max_size = 5)+
#geom_jitter(alpha=0.05, size=0.5)+
#geom_smooth(method = "lm") +
stat_summary(fun.y = mean, geom = 'line', size=1)+
stat_summary(fun.data = mean_se, geom = 'ribbon', alpha = 0.7, color=NA) +
theme_classic() +
labs(y='Previous Reward Value', x = 'Distance Between Selections')+
scale_x_continuous(breaks = scales::pretty_breaks(n = 5))+
scale_color_brewer(palette = 'Dark2', name="Task")+
scale_fill_brewer( palette = 'Dark2', name="Task")+
#coord_flip()+
theme(legend.position=c(1,1), legend.justification = c(1,1), strip.background=element_blank(), legend.key=element_rect(color=NA), legend.background=element_blank())
p5
It seems like participants move further away from their previous selection when the reward value was low (\(r=-.66\), \(p<.001\), \(BF>100\)), suggesting basic evidence of generalization behavior.
Let’s run a mixed model on these results
#Mixed effects modeling
#Previous reward value and distance between selections
prior <- c(set_prior("normal(0,1)", class = "b"),set_prior("normal(0,1)", class = "sd"))
distanceRewardMM <- run_model(brm(distance ~ 0+ intercept+ previousReward+context+previousReward*context +(1+previousReward*context|id), data=subset(df, !is.na(df$distance)), prior = prior,cores=4, iter = 4000, warmup = 1000, control = list(adapt_delta = 0.99)), modelName = 'distanceRewardMM')
#tab_model(distanceRewardMM) #Really slow!
fixedTerms <- fixef(distanceRewardMM)#Look at fixed terms
#Now generate predictions, removing id as a random effect
xseq <- seq(0,100)
newdat <-data.frame(context = rep(c("Conceptual","Spatial"), each=101), previousReward = rep(xseq,2))
preds <- fitted(distanceRewardMM, re_formula = NA, newdata = newdat, probs = c(0.025, 0.975))
#create new fixed effects dataframe
fixedDF <- data.frame(context = rep(c("Conceptual","Spatial"), each=101), previousReward = rep(xseq,2),
distance = preds[,1], lower = preds[,3], upper = preds[,4] )
p5alt <- ggplot(subset(df, !is.na(df$distance)), aes(previousReward, distance, color = context, fill = context)) +
#geom_hline(yintercept = mean(randomDistanceDF$distance, na.rm=T ), size = 1, color = 'black', linetype='dashed')+
geom_line(data = fixedDF, size = 1)+ #GP is
geom_ribbon(data = fixedDF, aes(ymin=lower, ymax = upper), color = NA, alpha = 0.4 )+
stat_summary(fun.y=mean,geom='point', alpha = 0.8)+
#geom_abline(slope = 1, linetype = 'dashed')+
#coord_cartesian(xlim = c(0,100))+
xlim(c(0,100))+
theme_classic()+
scale_color_brewer(palette = 'Dark2', name="Task")+
scale_fill_brewer( palette = 'Dark2', name="Task")+
#facet_grid(~context, labeller = as_labeller(contextLabels) )+
xlab("Previous Reward Value")+
ylab("Distance Between Selections")+
annotate("text", x = 50, y = 8, label = "paste(italic(b)[prevReward] , \" = -0.06, 95% HPD: [-0.06, -0.06]\")", parse = TRUE)+
theme(legend.position=c(0, 0), legend.justification=c(0,0), strip.background=element_blank(), legend.key=element_blank(), legend.background=element_blank())
p5alt
## Warning: Removed 4966 rows containing non-finite values (stat_summary).

At ther same time, participants also moved futher away from their initial starting point after observing larger reward values (\(r_{ au}=.18\), \(p<.001\), \(BF>100\)). Note that the there was a random starting position at the beginning of each trial. So the starting point is not the same as the previous selection. A small distance from the initial starting point is indicative of random search behavior, utilizing the randomness of the initialization. The trend indicates that participants made a larger effort to search in a directed fashion after observing large reward values
prior <- c(set_prior("normal(0,1)", class = "b"),set_prior("normal(0,1)", class = "sd"))
distanceInitialMM <- run_model(brm(movement ~0 + intercept +previousReward*context +(1+previousReward*contex|id), data=subset(df, !is.na(df$movement)),
prior = prior,
cores=4, iter = 4000, warmup = 1000, control = list(adapt_delta = 0.99)), modelName = 'distanceInitialMM')
#bayes_R2(distanceInitialMM)
#tab_model(distanceInitialMM)
fixedTerms <- fixef(distanceInitialMM)#Look at fixed terms
#Now generate predictions, removing id as a random effect
xseq <- seq(1,100)
newdat <-data.frame(context = rep(c("Conceptual","Spatial"), each=100), previousReward = rep(xseq,2))
preds <- fitted(distanceInitialMM, re_formula = NA, newdata = newdat, probs = c(0.025, 0.975))
#create new fixed effects dataframe
fixedDF <- data.frame(context = rep(c("Conceptual","Spatial"), each=100), previousReward = rep(xseq,2),
movement = preds[,1], lower = preds[,3], upper = preds[,4] )
p6 <- ggplot(subset(df, !is.na(df$movement)), aes(previousReward, movement, color = context, fill = context)) +
geom_hline(yintercept = mean(randomDistanceDF$distance, na.rm=T ), size = 1, color = 'black', linetype='dashed')+
geom_line(data = fixedDF, size = 1)+ #GP is
geom_ribbon(data = fixedDF, aes(ymin=lower, ymax = upper), color = NA, alpha = 0.4 )+
stat_summary(fun.y=mean,geom='point', alpha = 0.8)+
#geom_abline(slope = 1, linetype = 'dashed')+
#coord_cartesian(xlim = c(0,100))+
xlim(c(0,100))+
theme_classic()+
scale_color_brewer(palette = 'Dark2', name="Task")+
scale_fill_brewer( palette = 'Dark2', name="Task")+
#facet_grid(~context, labeller = as_labeller(contextLabels) )+
xlab("Previous Reward Value")+
ylab("Distance From Initial Position")+
annotate("text", x = 50, y = 8, label = "paste(italic(b)[prevReward] , \" = 0.01, 95% HPD: [0.01, 0.01]\")", parse = TRUE)+
theme(legend.position=c(0, 0.7), legend.justification=c(0,1), strip.background=element_blank(), legend.key=element_blank(), legend.background=element_blank())
p6
## Warning: Removed 7288 rows containing non-finite values (stat_summary).
