DataSim

Load and process data

data <- read_csv("data/simplified_data.csv") %>%
  mutate(size = size_delta/10,
         color = factor(color),
         contrast = factor(contrast)) %>%
  add_contrast("color", colnames = "color.e") %>%
  add_contrast("contrast", colnames = "contrast.e")

## Rows: 6012 Columns: 5

## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): subjectID, contrast
## dbl (2): size_delta, response
## lgl (1): color

## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

7 subjects
color = T/F (within subject)
contrast = +/- (between subject)
size_delta = seq(-80,+80, 10) w/in subject
response = 0/1

ggplot(data, aes(x = size_delta, y = response,
                 color = color)) +
  facet_grid(~subjectID) +
  stat_summary()

## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`

Model to get parameter estimate

model <- glmer(response ~ color.e * contrast.e * size +
                 (1 | subjectID), 
               data = data, 
               family = binomial(link = "logit"))

summary(model)

## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: response ~ color.e * contrast.e * size + (1 | subjectID)
##    Data: data
## 
##      AIC      BIC   logLik deviance df.resid 
##   5204.2   5264.6  -2593.1   5186.2     6003 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -8.4600 -0.4786  0.1470  0.4775  6.4352 
## 
## Random effects:
##  Groups    Name        Variance Std.Dev.
##  subjectID (Intercept) 0.03136  0.1771  
## Number of obs: 6012, groups:  subjectID, 7
## 
## Fixed effects:
##                         Estimate Std. Error z value Pr(>|z|)    
## (Intercept)              0.19934    0.07690   2.592  0.00954 ** 
## color.e                  1.10022    0.07325  15.020  < 2e-16 ***
## contrast.e               0.08069    0.15381   0.525  0.59988    
## size                    -0.02031    0.01046  -1.941  0.05221 .  
## color.e:contrast.e       0.40803    0.14652   2.785  0.00536 ** 
## color.e:size             0.01374    0.02087   0.658  0.51026    
## contrast.e:size          0.82412    0.02095  39.332  < 2e-16 ***
## color.e:contrast.e:size -0.01513    0.04174  -0.362  0.71701    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) color. cntrs. size   clr.:. clr.:s cntr.:
## color.e      0.027                                          
## contrast.e  -0.143  0.006                                   
## size         0.000  0.005  0.043                            
## clr.:cntrs.  0.006 -0.144  0.027  0.252                     
## color.e:siz  0.002  0.000  0.119  0.007  0.088              
## contrst.:sz  0.043  0.252  0.000 -0.222  0.006  0.037       
## clr.:cntr.:  0.119  0.088  0.002  0.037  0.000 -0.224  0.006

pred_data <- predict(model, type = "response")

ggplot(data, aes(x = size_delta, y = pred_data,
                 color = color)) +
  facet_grid(~subjectID) +
  stat_summary()

## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`
## No summary function supplied, defaulting to `mean_se()`

est <- broom.mixed::tidy(model)

Data Exclusions by contrast

included <- count(data, subjectID, color, contrast, size_delta) %>% 
  mutate(n = ifelse(size_delta == 0, n/2, n)) %>%
  group_by(subjectID, contrast) %>%
  summarise(mean = mean(n), .groups = "drop") %>%
  summarise(mean = mean(mean), .groups = "drop") %>%
  pull(mean)

Fake some data

# logit functions

logit <- function(x) { log(x / (1 - x)) }
inv_logit <- function(x) { 1 / (1 + exp(-x)) }

# simulation function

simdata <- function(fixef,
                    subj_n = 100, 
                    trial_n = 24,
                    excluded = 0) {
  add_random(subjectID = subj_n) %>%
    add_random(trial = trial_n) %>%
    add_within(contrast = c("positive", "negative")) %>%
    add_within(color = c(TRUE, FALSE)) %>%
    add_within(size_delta = seq(0, 80, 10)) %>%
    add_within(sign_delta = c("+", "-")) %>%
    add_ranef("subjectID", sub_i = fixef$`sd__(Intercept)`) %>%
    mutate(size_delta = as.character(size_delta) %>% as.integer() *
             ifelse(sign_delta == "+", 1, -1),
           size = size_delta/10,
           color = factor(color),
           contrast = factor(contrast)) %>%
    add_contrast("color", colnames = "color.e") %>%
    add_contrast("contrast", colnames = "contrast.e") %>%
    mutate(
      # calculate gaussian DV
      Y = fixef$`(Intercept)` + sub_i + 
        fixef$color.e * color.e +
        fixef$contrast.e * contrast.e +
        fixef$size * size +
        fixef$`color.e:contrast.e` * color.e * contrast.e +
        fixef$`color.e:size` * color.e * size +
        fixef$`contrast.e:size` * contrast.e * size +
        fixef$`color.e:contrast.e:size` * color.e * contrast.e * size,
      pr = inv_logit(Y), # transform to probability of getting 1
      response = rbinom(nrow(.), 1, pr) # sample from bernoulli distribution
    ) %>%
    select(subjectID, 
           contrast, color, size_delta, 
           color.e, contrast.e, size, 
           response) %>%
    messy(prop = excluded, response)
}

# set parameters
subj_n <- 100
trial_n <- 24
excluded <- (trial_n - included)/trial_n

# fixed and random effects
fixef <- setNames(object = est$estimate, 
                  nm = est$term) %>%
  as.list()

# simulate one dataset
simdata1 <- simdata(fixef = fixef, 
                    subj_n = subj_n, 
                    trial_n = trial_n,
                    excluded = excluded)

Analysis PSE

analyse <- function(simdata1) {
  simplified_PSEs = simdata1 %>% 
    group_by(color, subjectID, contrast) %>% 
    nest() %>% 
    mutate(data = map(data, ~ glm(response ~ size_delta, family=binomial(link='logit'), data = .))) %>%
    # get the coefficients
    mutate(data = map(data, ~ coefficients(.))) %>% 
    # convert coefficients from weird format
    mutate(data = map(data, ~ as_tibble(as.list(.)))) %>% 
    # convert back to a normal table with 1 row per boot * condition * subject
    unnest(data) %>% 
    # make coefficients easier to manage
    rename(intercept = `(Intercept)`) %>% 
    mutate(pse = -intercept/size_delta)
  
  afex::aov_ez(
    id="subjectID", 
    dv="pse", 
    simplified_PSEs, 
    within=c("color", "contrast"), 
    anova_table = list(correction="none", es = "pes")) %>% 
    # use pes to calculate cohen's f
    `$`('anova_table') %>% 
    mutate(cohens_f = sqrt(pes/(1-pes))) %>% 
    # why are the row names so hard to access?
    as.data.frame() %>% 
    rownames_to_column("effect")
}

Power analysis

power_table <- map_df(1:10, ~{ simdata(fixef) %>% analyse() })

ggplot(power_table, aes(x = cohens_f, color = effect)) +
  geom_density()

DataSim

Lisa DeBruine

19/10/2021

Load and process data

Model to get parameter estimate

Data Exclusions by contrast

Fake some data

Analysis PSE

Power analysis