Behavioral results. Let’s look at how participants performed in the task

#house keeping
rm(list=ls())
#load packages
packages <- c('dplyr','cowplot', 'Rmisc', 'ggbeeswarm', 'brms', 'WRS2', 'BayesFactor','scales',  'plyr', 'reshape2', 'ggridges', 'ggplot2', 'jsonlite', 'MASS', 'gridExtra', 'Hmisc', 'lsr', 'pander', 'ggsignif', 'rstatix', 'sjstats', 'emmeans')
invisible(lapply(packages, require, character.only = TRUE))

theme_set(theme_cowplot(font_size=12))
source('../dataProcessing.R') 
source('../statisticalTests.R')
#Wrapper for brm models such that it saves the full model the first time it is run, otherwise it loads it from disk
run_model <- function(expr, modelName, path='../brmsModels', reuse = TRUE) {
  path <- paste0(path,'/', modelName, ".brm")
  if (reuse) {
    fit <- suppressWarnings(try(readRDS(path), silent = TRUE))
  }
  if (is(fit, "try-error")) {
    fit <- eval(expr)
    saveRDS(fit, file = path)
  }
  fit
}

First, let’s load the data. These data import functions are defined in dataProcessing.R where I convert the raw data into a usuable dataframe.

dataDir <- '../experimentData/full.csv' #
df <- dataImport(dataFile = dataDir ,normalize=F) #These two functions are defined in `dataProcessing.R`
trajDF <- importTrajData(dataFile = dataDir,normalize=F)
#Trim last rounds
df<- subset(df, round<10) #Last round is the bonus round and is not included in the behavioral analysis, since the judgment task may bias subsequent performance

n_rounds = 9 #without bonus round
n_trials = 20 #per round

Demographics

## [1] "Participants: 129"
## [1] "Age: 14.94574 ± 8.724346"
## [1] "Males: 74"
## [1] "Earnings :15.6496899224806 USD ± 0.9952732"
## [1] "Task Duration: 54.15182 mins ± 18.7926"
## [1] "Gap between tasks: 18.2791 hours ± 8.555568"

Training Phase

Before the main bandit task, participants performed a training phase where they were required to match a target stimuli until a learning criterion was met (at least 32 trials and a run of 9 out of 10 correct). The task used the same stimuli and inputs as the main bandit task, and was used to familiarize participants and have them achieve a similar level of fluency with both spatial and conceptual domains. Let’s first look at some of the results.

Participants had a lower accuracy on the conceptual training (\(t(128)=7.5\), \(p<.001\), \(d=0.8\), \(BF>100\)) and required more trials to reach the training criterion (\(Z=-4.1\), \(p<.001\), \(r=-.40\), \(BF>100\)), which is to be expected since it is intuitively more difficult.

#Overall Correct choices
dat <- ddply(df, ~id+context, plyr::summarize, trajCorrect = mean(trajCorrect), trajAvgSteps = mean(trajAvgSteps))
trajp1a<- ggplot(dat, aes(x = context, y = trajCorrect, color = context))+
  geom_line(aes(group=id),color = 'black', alpha = 0.1)+
  geom_boxplot(outlier.shape = NA, fill=NA, color = 'black', width = 0.1)+
  geom_quasirandom(alpha = 0.7)+
  stat_summary(fun.y=mean, geom='point', shape=23, color = 'black', size =3)+
  ylab('P(correct)')+
  xlab('')+
  scale_color_brewer(palette = "Dark2", name = "") +
  theme(legend.position='none')
trajp1a

#Trials until complete
dat <- ddply(trajDF, ~id+context, plyr::summarize, trajTrials = max(trial))
#ttestPretty(subset(dat, context == 'Spatial')$trajTrials, subset(dat, context == 'Conceptual')$trajTrials, paired=T) #Data doesn't look very normal
#ranktestPretty(subset(dat, context == 'Spatial')$trajTrials, subset(dat, context == 'Conceptual')$trajTrials, paired=T) #Is there a meaninful difference in the number of trials needed to finish the training phase? #Note sometimes Bayes Factors display as NA when they are very large

trajComplete <- ggplot(dat, aes(x = context, y = trajTrials, color = context))+
  geom_line(aes(group=id),color = 'black', alpha = 0.1)+
  geom_quasirandom( alpha = 0.6)+
  geom_boxplot(color='black', fill= NA, width =.2, outlier.shape = NA)+
  stat_summary(fun.y = mean, geom='point', shape = 23,size=3, color = 'black')+
  geom_hline(yintercept = 32, linetype = 'dashed')+
  scale_color_brewer(palette = "Dark2", name = "") +
  ylab('Trials Until Complete')+
  xlab('')+
  #coord_cartesian(ylim=c(30,128), )+
  scale_y_continuous(breaks=c(32,64, 96, 128), limits = c(32,128))+
  theme(legend.position = 'none')
trajComplete

We can also look at the magnitude of errors vs. the frequenvy of occurence, which gives us a nice Shepard (1987) style generalization gradient.

#Magnitude of error vs. frequency
gradientDF <- ddply(trajDF, ~context+manhattanError,plyr::summarize, counts = table(manhattanError)) 
#Normalize into a percentage
gradientDF[gradientDF$context == 'Conceptual','P'] <- gradientDF[gradientDF$context == 'Conceptual','counts']/sum(gradientDF[gradientDF$context == 'Conceptual','counts'])
gradientDF[gradientDF$context == 'Spatial','P'] <- gradientDF[gradientDF$context == 'Spatial','counts']/sum(gradientDF[gradientDF$context == 'Spatial','counts'])

pError<- ggplot(gradientDF, aes(x = manhattanError, y = P, color = context, shape = context))+
  geom_line()+
  geom_point()+
  coord_cartesian(xlim=c(0,5))+
  ylab('P(error)')+
  xlab('Magnitude of Error (Manhattan distance)')+
  scale_color_brewer(palette = "Dark2", name = "Task")+
  scale_shape_manual( values = c(16,15),name = "Task")+
  theme(legend.position=c(1,1), legend.justification = c(1,1))

pError

How did accuracy differ for the different options?

spatialCounts <- ddply(subset(trajDF, context == 'Spatial'), .(x, y), plyr::summarize, correct = sum(trajCorrect)/length(trajCorrect))
names(spatialCounts) <- c("X", "Y", "Accuracy")
spatialCounts$task <- 'Spatial'

conceptCounts <- ddply(subset(df, context == 'Conceptual'), .(x, y), plyr::summarize, correct = sum(trajCorrect)/length(trajCorrect))
names(conceptCounts) <- c("X", "Y", "Accuracy")
conceptCounts$task <- 'Conceptual'

trainingAccuracyDF <-rbind(spatialCounts, conceptCounts)
trajheatmap<- ggplot(trainingAccuracyDF, aes(x=X, y = Y, fill=Accuracy)) +
  geom_tile()+
  scale_fill_distiller(palette = "Spectral", name = 'P(correct)',limits = c(0,1),labels = scales::percent_format(accuracy = 1))+
  theme_classic() +
  facet_grid(~task)+
  coord_equal() +
  theme(strip.background=element_blank(), legend.key=element_rect(color=NA), axis.line=element_blank(),axis.text.x=element_blank(),
        axis.text.y=element_blank(),axis.ticks=element_blank(), panel.background=element_blank(),panel.border=element_blank(),panel.grid.major=element_blank(),
        panel.grid.minor=element_blank(),plot.background=element_blank())+
  labs(x = '', y = '')
trajheatmap

Performance

Now let’s finally look at performance on the bandit task.

#Construct plotting dataframes
meanDF <- ddply(df, .(id, context, environment, contextOrder), plyr::summarize, meanScore = mean(z))
conceptualScores <- subset(meanDF, context == "Conceptual")
spatialScores <- subset(meanDF, context == "Spatial")
mergedDF <- merge(conceptualScores, spatialScores, by ="id")
joinedDF <- rbind(conceptualScores, spatialScores)
bothTasksDF <- ddply(joinedDF, .(id, environment, contextOrder), plyr::summarize, meanScore = mean(meanScore))
randomDF <- read.csv("../rationalModels/random.csv") #load random model

joinedDF$contextOrder <- factor(joinedDF$contextOrder)
levels(joinedDF$contextOrder)<- c("Spatial First", "Conceptual First")

Overall, participants performed far better than chance in both Conceptual (\(t(128)=24.6\), \(p<.001\), \(d=2.2\), \(BF>100\)) and Spatial tasks (\(t(128)=34.6\), \(p<.001\), \(d=3.0\), \(BF>100\)). Let’s now do a two way mixed ANOVA to see how our context x environment design influenced performance.

#Two way mixed ANOVA: context is within, environment is between 
dd<-ddply(rbind(conceptualScores, spatialScores), ~id+context+environment, summarise, m=mean(meanScore))
dd$id <- factor(dd$id)
res.aov <- aov(m ~ environment*context + Error(id/context), data=dd)
anova_stats(res.aov)
# Now let's replicate via Robust ANOVA
bwtrim(m ~ environment*context, id = id, data=dd, tr = 0.2) #using 20% trimmed means
## Call:
## bwtrim(formula = m ~ environment * context, id = id, data = dd, 
##     tr = 0.2)
## 
##                       value df1     df2 p.value
## environment         22.0075   1 71.7571  0.0000
## context             36.1917   1 71.4948  0.0000
## environment:context  1.3662   1 71.4948  0.2463
sppba(m ~ environment*context, id = id, data=dd) #Main fixed effect
## Call:
## sppba(formula = m ~ environment * context, id = id, data = dd)
## 
## Test statistics:
## [1] -6.345
## 
## Test whether the corrresponding population parameters are the same:
## p-value: 0.002
sppbb(m ~ environment*context, id = id, data=dd) #Within-subect effect
## Call:
## sppbb(formula = m ~ environment * context, id = id, data = dd)
## 
## Test statistics:
## [1] -4.828
## 
## Test whether the corrresponding population parameters are the same:
## p-value: 0
#Now compute Bayes factor
invisible(bf <- anovaBF(m ~ environment*context+id,  data=dd,  whichRandom="id"))
bf
## Bayes factor analysis
## --------------
## [1] context + id                                     : 453922.7 ±0.88%
## [2] environment + id                                 : 12.66632 ±0.89%
## [3] context + environment + id                       : 6246489  ±2.69%
## [4] context + environment + context:environment + id : 2391835  ±2.84%
## 
## Against denominator:
##   m ~ id 
## ---
## Bayes factor type: BFlinearModel, JZS

Looks like strong evidence for differences.

#Mean performance plots
p1a <- ggplot(joinedDF, aes(x = interaction(context, environment), y = meanScore, color = context))+
  geom_boxplot(fill=NA, color = 'black', outlier.shape=NA, width = 0.2)+
  geom_line(aes(group=id), color = 'black', alpha = 0.1)+
  geom_quasirandom(alpha = 0.7)+
  geom_hline(yintercept = mean(randomDF$meanReward), color = 'black', linetype = 'dashed')+
  stat_summary(fun.y = mean, shape = 23, geom='point', size = 3, fill = NA, color ='black')+
  #facet_grid(~environment)+
  ylab("Mean Reward \u00B1SE")+
  xlab('')+
  annotate('text', x = 1.5, y = 110, label='Rough')+
  annotate('text', x = 3.5, y = 110, label='Smooth')+
  scale_x_discrete(labels=c("Conceptual", "Spatial", "Conceptual", "Spatial"))+
  scale_color_brewer(palette = "Dark2", name = "") +
  scale_fill_brewer(palette = "Dark2", name = "") +
  geom_signif(y_position = c(95, 97, 103),  xmin = c(1,3, 1.5), xmax = c(2,4, 3.5), annotation = c("BF>100","BF = 14", "BF = 12"), color = 'black')+
  #geom_signif(comparisons=list( c("Conceptual", "Spatial"), c("Conceptual", "Spatial")), annotations=c("",""), col="black")+ # 
  theme(text = element_text(size=12,  family="sans"),strip.background=element_blank(), legend.key=element_rect(color=NA), legend.position='None')
p1a

Participants earned higher rewards in the Spatial than the Conceptual task (one sample \(t\)-test: \(t(128)=-6.0\), \(p<.001\), \(d=0.5\), \(BF>100\)) and better in smooth than in rough environments (two sample \(t\)-test: \(t(127)=3.1\), \(p=.003\), \(d=0.5\), \(BF=12\)). We also find correlated performance between tasks (\(r=.53\), \(p<.001\), \(BF>100\)).

#statistical tests
ttestPretty(conceptualScores$meanScore, mu=mean(randomDF$meanReward)) #compared to chance
ttestPretty(spatialScores$meanScore, mu=mean(randomDF$meanReward))

#T-tests
ttestPretty(conceptualScores$meanScore, spatialScores$meanScore, var.equal = TRUE, paired=T) #context
ttestPretty(subset(bothTasksDF, environment=="Smooth")$meanScore, subset(bothTasksDF, environment=="Rough")$meanScore, var.equal = TRUE) #environment
corTestPretty(conceptualScores$meanScore, spatialScores$meanScore) #correlated performance
ttestPretty(subset(conceptualScores, environment=='Smooth')$meanScore, subset(spatialScores, environment=="Smooth")$meanScore, var.equal = TRUE, paired=T) #context
ttestPretty(subset(conceptualScores, environment=='Rough')$meanScore, subset(spatialScores, environment=="Rough")$meanScore, var.equal = TRUE, paired=T, maxBF = Inf) #context

Correlation between tasks:

#corTestPretty(conceptualScores$meanScore, spatialScores$meanScore)
p1b <- ggplot(mergedDF, aes(x=meanScore.x, y = meanScore.y, color = environment.x, shape= environment.x)) +
  geom_abline(slope=1, intercept=0, linetype='dashed') +
  geom_point(alpha=0.9, size = 2.5)+
  ylab('Spatial Reward')+
  xlab('Conceptual Reward')+
  xlim(40,100)+
  ylim(40,100) +
  annotate("text", x = 50, y = 95, label = "'r = .53' *','* ~~'BF > 100'", parse=TRUE, size=5, family="sans") +
  theme(legend.position=c(1,0),legend.justification=c(1,0), strip.background=element_blank(), legend.key=element_rect(color=NA), text = element_text(size=12,  family="sans"))+
  #scale_color_rickandmorty(name="Environment", palette = "schwifty")+
  scale_color_manual(name="Environment", values=c("#24325FFF", '#B7E4F9FF'))+
  scale_shape_manual(name="Environment", values= c(17,16))
p1b

Order effect

We find an interesting one-directional order effect. Participants performed better on the conceptual task once they had experience with the spatial task (\(t(127)=2.8\), \(p=.006\), \(d=0.5\), \(BF=6.4\). This was not the case for the spatial task, where performance did not differ if performed first or second (\(t(127)=-1.7\), \(p=.096\), \(d=0.3\), \(BF=.67\)). Thus, experience with spatial search boosted performance on conceptual search, but not vice versa.

df$FirstTask <- ifelse(df$contextOrder==0, 'Spatial First', 'Conceptual First') #which task was performed first?
df$taskOrder <- ifelse((df$FirstTask=="Spatial First" & df$context == "Spatial") |(df$FirstTask=="Conceptual First" & df$context == "Conceptual"), 1, 2 ) 
df$FirstTask <- factor(df$FirstTask, levels = c("Conceptual First","Spatial First"))
orderDF <- ddply(df, .(id, context, environment, FirstTask, taskOrder), plyr::summarize, meanScore = mean(z))

pOrder <- ggplot(orderDF, aes(x = interaction(context,FirstTask), y = meanScore, fill = context, color = context))+
  geom_boxplot( color = 'black', position = position_dodge(width = 1), outlier.shape=NA, width = 0.2, alpha =0)+
  geom_quasirandom(alpha = 0.7, dodge.width = 1)+
  #geom_line(aes(group=id), color = 'black', alpha = 0.1)+
  stat_summary(fun.y = mean, shape = 23, geom='point', size = 3, position = position_dodge(width = 1), color ='black', fill = NA)+
  #stat_summary(fun.y=mean, geom='bar', position = position_dodge(width = 1), color='black')+
  #stat_summary(fun.data = mean_cl_boot, geom='errorbar', color='black', position = position_dodge(width = 1), width = .2)+
  scale_fill_brewer(palette = 'Dark2', name= 'Task')+
  scale_color_brewer(palette = 'Dark2', name= 'Task')+
  annotate('text', x = 1.5, y = 118, label='Conceptual First')+
  annotate('text', x = 3.5, y = 118, label='Spatial First')+
  scale_x_discrete(labels=c("Conceptual", "Spatial", "Conceptual", "Spatial"))+
  geom_hline(yintercept = mean(randomDF$meanReward), color = 'black', linetype = 'dashed')+
  #coord_cartesian(ylim =c(40,120))+
  xlab('')+
  ylab('Mean Reward ±SE')+
  geom_signif(y_position = c(100, 108),  xmin = c(1,2), xmax = c(3,4), annotation = c("BF=6.4","BF=0.67"), color = 'black')+
  #theme(legend.position=c(0.05,1), legend.justification = c(0,1), legend.direction='horizontal')
  theme(legend.position='none')
pOrder

Learning over trials and rounds

Participants improved strongly over trials (Pearson correlation between score and trial number: \(r=.88\), \(p<.001\), \(BF>100\)) and to a lesser extent over rounds (\(r=.81\), \(p=.008\), \(BF=5.1\)).

#Mean score over trial
trialDF <- ddply(df, .(id, context, environment, trial), plyr::summarize, meanScore=mean(z))
randomDF <- read.csv("../rationalModels/random.csv") #load random model

p3a <- ggplot(trialDF, aes(x=trial, y = meanScore, color = context)) +
  stat_summary(fun.y = mean, geom = 'line')+
  stat_summary(fun.data = mean_se,aes ( fill=context), geom = 'ribbon', alpha = 0.7, color=NA) +
  stat_summary(data = randomDF, aes(x=trial, y = meanReward), fun.y = mean, geom='line', color = 'black', fill=NA, linetype = 'dashed')+
  #geom_hline(yintercept =  50, linetype = 'dashed', color = 'black')+
  facet_grid(~environment)+
  scale_fill_brewer(palette = "Dark2", name="Task" ) +
  scale_color_brewer(palette = "Dark2", name="Task") +
  #coord_cartesian(ylim=c(49,95))+
  ylab("Mean Reward \u00B1SE") + 
  scale_x_continuous(breaks = round(seq(0,20, by = 5),1))+
  xlab("Trial")+
  theme(legend.position=c(1, 0.15), legend.justification=c(1,0), strip.background=element_blank(), legend.key=element_blank(), legend.background=element_blank())
## Warning: Ignoring unknown parameters: fill
p3a

#Mean score over round
roundDF <- ddply(df, .(id, context, environment, round), plyr::summarize, meanScore=mean(z))

pRound <- ggplot(roundDF, aes(x=round, y = meanScore, color = context )) +
  stat_summary(fun.y = mean, geom = 'line')+
  stat_summary(fun.data = mean_se,aes(fill=context), geom = 'ribbon', alpha = 0.7, color=NA) +
  stat_summary(data = randomDF, aes(x=trial, y = meanReward), fun.y = mean, geom='line', color = 'black', fill=NA, linetype = 'dashed')+
  coord_cartesian(xlim=c(0,9))+
  facet_grid(~environment)+
  scale_fill_brewer(palette = "Dark2", name="Task") +
  scale_color_brewer(palette = "Dark2", name="Task") +
  ylab("Mean Reward \u00B1SE") + 
  scale_x_continuous(breaks = round(seq(0,9, by = 2),1))+
  #coord_cartesian(ylim=c(20,50))+
  xlab("Round")+
  theme(legend.position=c(0.05, 0.1), legend.justification=c(0,0), strip.background=element_blank(), legend.background=element_blank(), legend.key=element_rect(color=NA))
## Warning: Ignoring unknown parameters: fill
pRound

Heatmap of clicks

Let’s look at any patterns in how people searched the input space. Unsurprisingly, there is a preference for corners and edges. Yellow is mapped to random chance, so orange and red are higher than chance, while green and blue are lower than chance.

#Heatmap of clicks
#spatial and spatial
spatialCounts <- ddply(subset(df, context == 'Spatial'), .(x, y), nrow)
names(spatialCounts) <- c("X", "Y", "Freq")
spatialCounts$Prob <- spatialCounts$Freq / sum(spatialCounts$Freq)

conceptCounts <- ddply(subset(df, context == 'Conceptual'), .(x, y), nrow)
names(conceptCounts) <- c("X", "Y", "Freq")
conceptCounts$Prob <- conceptCounts$Freq / sum(conceptCounts$Freq)

maxFreq <- max(max(spatialCounts$Prob), max(conceptCounts$Prob))

heatmapSpatial <- ggplot(spatialCounts, aes(x=X, y = Y, fill=Prob)) +
  geom_tile()+
  scale_fill_distiller(palette = "Spectral", name = 'Freq', values=rescale(c(0,1/64,maxFreq)),limits = c(0,maxFreq),labels = scales::percent_format(accuracy = 1))+
  theme_classic() +
  coord_equal() +
  theme(strip.background=element_blank(), legend.key=element_rect(color=NA), axis.line=element_blank(),axis.text.x=element_blank(),
        axis.text.y=element_blank(),axis.ticks=element_blank(), panel.background=element_blank(),panel.border=element_blank(),panel.grid.major=element_blank(),
        panel.grid.minor=element_blank(),plot.background=element_blank())+
  ggtitle('Spatial Heatmap')
heatmapSpatial

conceptualHeatmap <- ggplot(conceptCounts, aes(x=X, y = Y, fill=Prob)) +
  geom_tile()+
  scale_fill_distiller(palette = "Spectral",name = 'Freq', values=rescale(c(0,1/64,maxFreq)), limits = c(0,maxFreq),labels = scales::percent_format(accuracy = 1))+
  theme_classic() +
  xlab('Rotation')+
  ylab('Stripes')+
  coord_equal() +
  theme(strip.background=element_blank(), legend.key=element_rect(color=NA), axis.line=element_blank(),axis.text.x=element_blank(),
        axis.text.y=element_blank(),axis.ticks=element_blank(), panel.background=element_blank(),panel.border=element_blank(),panel.grid.major=element_blank(),
        panel.grid.minor=element_blank(),plot.background=element_blank())+
  ggtitle('Conceptual Heatmap')
conceptualHeatmap

Reaction times

We can also analyze participant reaction times. Participants were slower in the conceptual task (\(t(128)=8.3\), \(p<.001\), \(d=0.7\), \(BF>100\)), but there were no differences across environments (\(t(256)=0.7\), \(p=.502\), \(d=0.08\), \(BF=.17\)).

#Individual participant quartile splits
df$prevRewardValue <- NA
for (pid in unique(df$id)){
  subd <- subset(df, id==pid)
  xs <- quantile(subd$previousReward, probs=0:4/4, na.rm = T)
  df[df$id==pid,'prevRewardValue'] <- cut(subd$previousReward, breaks=xs,labels=c("Q1", "Q2", "Q3", "Q4") )
}

df$prevRewardValue <- factor(df$prevRewardValue)
levels(df$prevRewardValue) <- c("Q1", "Q2", "Q3", "Q4")
colfunc<-colorRampPalette(c("#0D0887FF", "#CC4678FF", "#F0F921FF"))
p11 <- ggplot(subset(df, ts>0 & !is.na(prevRewardValue)), aes(x=ts/1000, y=prevRewardValue, fill = prevRewardValue))+
  geom_density_ridges()+
  xlab('RT in seconds (log scale)')+
  scale_color_manual(values = colfunc(5), name="")+
  scale_fill_manual(values = colfunc(5), name="")+
  scale_x_log10()+
  annotation_logticks(sides='b')+
  facet_grid(~context, labeller = as_labeller(contextLabels))+
  coord_cartesian(xlim = c(0.1,50))+
  theme_classic()+
  scale_y_discrete(expand = c(0.01, 0))+
  theme(legend.position='none', strip.background=element_blank(), legend.key=element_rect(color=NA))+
  ylab('Previous Reward')
p11
## Picking joint bandwidth of 0.0391
## Picking joint bandwidth of 0.0354

There doesn’t seem to be any connection between reaction time and previous reward value. It seems like any influence of deliberation on reaction time is likely washed out by larger influence of trajectory length.

ggplot(subset(df,steps<=20), aes(x =steps,  y = log(ts/1000), color = context, fill = context))+
  #geom_smooth(fill=NA)+
  stat_summary(fun.y = mean, geom = "point") + 
  stat_summary(fun.data = mean_cl_boot, geom = "errorbar") +
  #coord_cartesian(xlim=c(0,20), ylim=c(4,10)) + #Tukey outlier criterion indicates outliers above 20; min(boxplot.stats(df$steps)$out)
  #facet_grid(~environment)+
  scale_fill_brewer(palette = "Dark2", name="Task") +
  scale_color_brewer(palette = "Dark2", name="Task") +
  theme(legend.position=c(0.05,1),legend.justification = c(0,1), strip.background=element_blank(), legend.background=element_blank(), legend.key=element_rect(color=NA))+
  ylab("RT in seconds (log scale)")+
  xlab('Trajectory Length')+
  scale_y_log10()+
  annotation_logticks(sides='l')
## Warning in self$trans$transform(x): NaNs produced
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning in self$trans$transform(x): NaNs produced
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 4335 rows containing non-finite values (stat_summary).

## Warning: Removed 4335 rows containing non-finite values (stat_summary).

Final Plots

Main text

#completeplot <- plot_grid(p3a, p1b, p4alt, trajectoryplot, pGeneralization, p5,conceptualHeatmap, heatmapSpatial , ncol=2, labels = "auto")
completeplot <- cowplot::plot_grid(p1a, p1b, pOrder ,p3a,  p4alt, p5alt, ncol=2, labels = "auto")
## Warning: Removed 1 rows containing non-finite values (stat_density).
## Warning: Removed 4966 rows containing non-finite values (stat_summary).
completeplot

ggsave('../plots/behavioralplot.pdf',completeplot, width = 10, height = 8, unit='in', useDingbats=F)

SI

#SI plots
trainingRound <- cowplot::plot_grid(trajComplete, trajp1a, trajheatmap, pError,  ncol=2, labels = 'auto')
trainingRound

ggsave(filename = '../plots/TrainingPlots.pdf', trainingRound, width = 9,height = 6, units = 'in', useDingbats=FALSE)
trajectoryPlots <- cowplot::plot_grid(trajectoryplot,pTrajLengthReward,p6, inputdir,pAttentionTaskOrder, pScoreAttention,  ncol=3, labels="auto")
## Warning: Removed 843 rows containing non-finite values (stat_bin).
## Warning: Removed 8 rows containing missing values (geom_bar).
## Warning: Removed 7288 rows containing non-finite values (stat_summary).
trajectoryPlots

ggsave(filename = '../plots/TrajectoryPlots.pdf', trajectoryPlots, width = 12,height = 6, units = 'in', useDingbats=FALSE)
heatmaps <- cowplot::plot_grid(conceptualHeatmap, heatmapSpatial, labels = "auto")
heatmaps

ggsave(filename = '../plots/heatmaps.pdf', heatmaps, width = 8,height = 4, units = 'in', useDingbats=FALSE)