Agricultural Understanding

Author

Franz Wortha

Setup

Load packages and additional functions

corrplot 0.92 loaded

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
**************************************************************************************************
EFA.dimensions 0.1.8.4

Please contact Brian O'Connor at brian.oconnor@ubc.ca if you have questions or suggestions.
**************************************************************************************************
This is lavaan 0.6-18
lavaan is FREE software! Please report any bugs.
Version:  1.1.1
We work hard to write this free software. Please help us get credit by citing: 

Hallquist, M. N. & Wiley, J. F. (2018). MplusAutomation: An R Package for Facilitating Large-Scale Latent Variable Analyses in Mplus. Structural Equation Modeling, 25, 621-638. doi: 10.1080/10705511.2017.1402334.

-- see citation("MplusAutomation").

Attaching package: 'psych'
The following object is masked from 'package:lavaan':

    cor2cov
The following object is masked from 'package:effectsize':

    phi
Loaded custom functions and objects:  calculate_mean_ci cfa_from_list env_init grey_white mean_ci paper_names rename_paper robust_indices split_data wlmsv_cfa

Load data set

df <- read.csv("./agricultural_understanding.csv")
# Filter rows where all items are missing
items <- grep("^f\\d{1,2}", names(df), value = T)
df <- df[rowSums(is.na(df[, items])) != ncol(df[, items]), ]
head(df, 5)
   id geschlecht geburtsjahr alter altersgruppen f1_1 f1_2a f1_2b f1_2c f1_2d
1 AP1          1        1961    58             5    2     4     4     5     4
2 AP2          0        1960    59             5    6     5     5     5     5
3 AP3          1        1954    65             6    5     4     5     5     4
4 AP4          1        1973    46             4    6     5    NA     5     5
5 AP5          0        1971    48             4    5     2     4     4     3
  f1_2e f1_2f f1_2g f1_2h f1_2i f1_2j f1_2k f1_2m f1_2n f2_1a f2_1b f2_1c f2_1d
1     2     1     4     4     4     2     2     1     5     5     2     3     4
2     5     2     3     5     5     5     4     2     2     3     5     3     1
3     5     2     4     5     5     5     5     1     5     5     5     5     5
4     5    NA     5     5     5     5     5    NA     5     5     5     2     3
5     1     1     2     5     5     5     2     1     5     5     4     5     5
  f2_1e f2_1f f2_1g f2_1h f2_1i f2_1j f2_1k f2_1l f2_1m f2_7 f2_9a f2_9b f2_9c
1     5     5     5     5     5     4     3     3     5    3     0     0     0
2     5     5     5     5    NA     5     5     5     5    7     0     0     0
3     5     5     5     5     5     4     5     5     5    7     0     0     0
4     5     5     5     3    NA     4     5     5     3    7     0     0     0
5     5     5     5     5     5     3     4     3     3    4     0     0     0
  f2_9d f2_9e f2_9f f2_9g_a f2_10a f2_10b f2_10c f2_10d f2_10e f2_10f f2_10g
1     0     1     1       0      3      3      3      3      3      4      3
2     0     0     1       1      1      5      5      1      1      1      5
3     0     1     1       0      1      5      5      1      5      1      5
4     0     1     1       0      3      4      5      2      2      3      5
5     0     0     0       0      2      3      4      1      4      2      5
  f2_10h f2_10i f2_10j f2_10k f2_10l f2_10m f2_10n f2_10o f2_10p f2_10q
1      3      4      3      3      3      3      3      3      2      3
2      1      5      5      1      2      5      5      3      1      4
3      2      5      5      1      1      5      1      5      1      4
4      3      4      5      1      1      5      4      5      5      5
5      2      5      5     NA      3      3      2      3      2      2

Prepare and split data

sprintf(
  "Split sizes: E1: %i, E2: %i C: %i",
  nrow(mplus_split$e1),
  nrow(mplus_split$e2),
  nrow(mplus_split$c
  ))
[1] "Split sizes: E1: 685, E2: 685 C: 685"

MAP

map_e1 <- MAP(df_split$e1[, items], verbose = F)

Cases with missing values were found and removed from the data matrix.

There were 477 cases with missing values, and 3458 missing values in total.
cat(sprintf("N Factors (e1): %i %i\n", map_e1$NfactorsMAP, map_e1$NfactorsMAP4))
N Factors (e1): 5 5
map_e2 <- MAP(df_split$e2[, items], verbose = F)

Cases with missing values were found and removed from the data matrix.

There were 482 cases with missing values, and 3133 missing values in total.
cat(sprintf("N Factors (e2): %i %i\n", map_e2$NfactorsMAP, map_e2$NfactorsMAP4))
N Factors (e2): 4 5

Scales

Items are upper-case because they are copied from Excel in that format

ED 1

ev1 <- c(
  "F1_1",
  "F1_2A",
  "F1_2B",
  "F1_2C",
  "F1_2D",
  "F1_2E",
  "F1_2F",
  "F1_2G",
  "F1_2H",
  "F1_2I",
  "F1_2J",
  "F1_2K"
) %>% tolower
# Print names matching publication
sapply(ev1, function(n){paper_names[[n]]})
 f1_1 f1_2a f1_2b f1_2c f1_2d f1_2e f1_2f f1_2g f1_2h f1_2i f1_2j f1_2k 
 "i1" "i2a" "i2b" "i2c" "i2d" "i2e" "i2f" "i2g" "i2h" "i2i" "i2j" "i2k" 

ED II

ev2 <- c(
  "F1_2M",
  "F1_2N",
  "F2_9A",
  "F2_9B",
  "F2_9C",
  "F2_9D",
  "F2_9F"
) %>% tolower
# Print names matching publication
sapply(ev2, function(n){paper_names[[n]]})
f1_2m f1_2n f2_9a f2_9b f2_9c f2_9d f2_9f 
"i2l" "i2m" "i4a" "i4b" "i4c" "i4d" "i4e" 

CD

kv <- c(
  "F2_1E",
  "F2_1G",
  "F2_1H",
  "F2_1I",
  "F2_1J",
  "F2_1K",
  "F2_1L",
  "F2_1M",
  "F2_10I",
  "F2_10J",
  "F2_10M",
  "F2_10O"
) %>% tolower
# Print names matching publication
sapply(kv, function(n){paper_names[[n]]})
 f2_1e  f2_1g  f2_1h  f2_1i  f2_1j  f2_1k  f2_1l  f2_1m f2_10i f2_10j f2_10m 
 "i3a"  "i3b"  "i3c"  "i3d"  "i3e"  "i3f"  "i3g"  "i3h"  "i5g"  "i5h"  "i5i" 
f2_10o 
 "i5j" 

OD

mv1 <- c(
  "F2_10A",
  "F2_10B",
  "F2_10D",
  "F2_10E",
  "F2_10F",
  "F2_10H",
  "F2_10P"
) %>% tolower
# Print names matching publication
sapply(mv1, function(n){paper_names[[n]]})
f2_10a f2_10b f2_10d f2_10e f2_10f f2_10h f2_10p 
 "i5a"  "i5b"  "i5c"  "i5d"  "i5e"  "i5f"  "i5k" 

Initial model fit

Fit on confirmatory subset

scales_final <- list(ev1 = ev1,
                     ev2 = ev2,
                     kv = kv,
                     mv1 = mv1)
model_final <- cfa_from_list(scales_final)

fit_final_c <- wlmsv_cfa(model_final, df_split$c)
fitMeasures(fit_final_c, robust_indices)
         rmsea.scaled rmsea.ci.lower.scaled rmsea.ci.upper.scaled 
                0.050                 0.047                 0.053 
           cfi.scaled            tli.scaled 
                0.898                 0.891 

Fit on full data set

fit_final <- wlmsv_cfa(model_final, df)
fitMeasures(fit_final, robust_indices)
         rmsea.scaled rmsea.ci.lower.scaled rmsea.ci.upper.scaled 
                0.056                 0.054                 0.057 
           cfi.scaled            tli.scaled 
                0.882                 0.874 

Internal consistency

int_cons <- list(
  "ev1" = psych::alpha(df[, ev1]),
  "ev2" = psych::alpha(df[, ev2]),
  "kv" = psych::alpha(df[, kv]),
  "mv" = psych::alpha(df[, mv1], check.keys = T, warnings =
                        F)
)

lapply(int_cons, function(a)
  round(as.numeric(a$total[2]), 2)) %>%
  print
$ev1
[1] 0.86

$ev2
[1] 0.54

$kv
[1] 0.78

$mv
[1] 0.79

Measurement invariance

Weak Invariance

scales_inv <- list(
  ev1 = ev1,
  ev2 = ev2,
  kv = kv[!kv %in% c("f2_1e", "f2_1l")],
  mv1 = mv1
)
model_inv <- cfa_from_list(scales_inv)

m1 <- cfa(
  model_inv,
  df,
  group = "geschlecht",
  ordered = T,
  estimator = "WLSMV",
  parameterization = "theta",
  missing = "pairwise"
)
# Weak invariance
m2 <- cfa(
  model_inv,
  df,
  group = "geschlecht",
  group.equal = "loadings",
  ordered = T,
  estimator = "WLSMV",
  parameterization = "theta",
  missing = "pairwise"
)

lavTestLRT(m1, m2)

Scaled Chi-Squared Difference Test (method = "satorra.2000")

lavaan->lavTestLRT():  
   lavaan NOTE: The "Chisq" column contains standard test statistics, not the 
   robust test that should be reported per model. A robust difference test is 
   a function of two standard (not robust) statistics.
     Df AIC BIC  Chisq Chisq diff Df diff Pr(>Chisq)   
m1 1176         5968.1                                 
m2 1208         6117.7      61.41      32   0.001338 **
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Partial invariance

partial_loadings <- c(
  "ev1=~f1_2e",
  "ev1=~f1_2f",
  "ev1=~f1_2g",
  "ev1=~f1_2i"
)

m2_adj <- cfa(
  model_inv,
  df,
  group = "geschlecht",
  group.equal = c("loadings"),
  group.partial = partial_loadings,
  ordered = T,
  estimator = "WLSMV",
  parameterization = "theta",
  missing = "pairwise"
)

lavTestLRT(m1, m2_adj)

Scaled Chi-Squared Difference Test (method = "satorra.2000")

lavaan->lavTestLRT():  
   lavaan NOTE: The "Chisq" column contains standard test statistics, not the 
   robust test that should be reported per model. A robust difference test is 
   a function of two standard (not robust) statistics.
         Df AIC BIC  Chisq Chisq diff Df diff Pr(>Chisq)
m1     1176         5968.1                              
m2_adj 1204         6051.8     34.829      28     0.1749

Scalar Invariance

m3 <- cfa(
  model_inv,
  df,
  group = "geschlecht",
  group.equal = c("intercepts", "loadings"),
  group.partial = partial_loadings,
  ordered = T,
  estimator = "WLSMV",
  parameterization = "theta",
  missing = "pairwise",
  meanstructure = T
)

lavTestLRT(m2_adj, m3)

Scaled Chi-Squared Difference Test (method = "satorra.2000")

lavaan->lavTestLRT():  
   lavaan NOTE: The "Chisq" column contains standard test statistics, not the 
   robust test that should be reported per model. A robust difference test is 
   a function of two standard (not robust) statistics.
         Df AIC BIC  Chisq Chisq diff Df diff Pr(>Chisq)    
m2_adj 1204         6051.8                                  
m3     1294         6310.5     338.34      90  < 2.2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Establish partial invariance

partial_thresholds <- c(
  # f1_1
  "f1_1|t1", "f1_1|t2", "f1_1|t3", "f1_1|t4", "f1_1|t5",
  # f1_2a
  "f1_2a|t1", "f1_2a|t2", "f1_2a|t3", "f1_2a|t4",
  # f1_2b
  "f1_2b|t1", "f1_2b|t2", "f1_2b|t3", "f1_2b|t4",
  # f1_2c
  "f1_2c|t1", "f1_2c|t2", "f1_2c|t3", "f1_2c|t4",
  # f1_2d
  "f1_2d|t1", "f1_2d|t2", "f1_2d|t3", "f1_2d|t4",
  # f1_2e
  "f1_2e|t1", "f1_2e|t2", "f1_2e|t3", #"f1_2e|t4",
  # f1_2f
  "f1_2f|t1", "f1_2f|t2", "f1_2f|t3", #"f1_2f|t4",
  # f1_2g
  "f1_2g|t1", "f1_2g|t2", "f1_2g|t3", #"f1_2g|t4",
  # f1_2h
  "f1_2h|t1", "f1_2h|t2", "f1_2h|t3", #"f1_2h|t4",
  # f1_2i
  "f1_2i|t1", "f1_2i|t2", "f1_2i|t3", #"f1_2i|t4",
  # f1_2j
  "f1_2j|t1", "f1_2j|t2", "f1_2j|t3", #"f1_2j|t4",
  # f1_2k
  "f1_2k|t1", "f1_2k|t2", "f1_2k|t3"#, #"f1_2k|t4"
)
##
m3_adj <- cfa(
  model_inv,
  df,
  group = "geschlecht",
  group.equal = c("thresholds", "loadings"),
  group.partial = c(partial_loadings, partial_thresholds),
  ordered = T,
  estimator = "WLSMV",
  parameterization = "theta",
  missing = "pairwise"
)

lavTestLRT(m2_adj, m3_adj)

Scaled Chi-Squared Difference Test (method = "satorra.2000")

lavaan->lavTestLRT():  
   lavaan NOTE: The "Chisq" column contains standard test statistics, not the 
   robust test that should be reported per model. A robust difference test is 
   a function of two standard (not robust) statistics.
         Df AIC BIC  Chisq Chisq diff Df diff Pr(>Chisq)  
m2_adj 1204         6051.8                                
m3_adj 1252         6099.1     62.831      48    0.07388 .
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Fit measures of partially invariant model

fitMeasures(m3_adj, robust_indices)
         rmsea.scaled rmsea.ci.lower.scaled rmsea.ci.upper.scaled 
                0.052                 0.051                 0.054 
           cfi.scaled            tli.scaled 
                0.894                 0.893 

Sex comparison

# Adjust names of df to match the paper
df_plot <- df %>%
  rename_all(function(x)
    unlist(lapply(names(.), rename_paper)))

# Add scale values from the partially invariant model to the data frame
df_full <- df_plot %>%
  rename(age = alter,
         sex = geschlecht,
         connectedness = `i6`) %>%
  rename_all(str_to_title)
lv3 <- lavPredict(m3_adj)
df_full[df_full$Sex == 1, c("ED I", "ED II", "CD", "OD")] <- lv3$`1`
df_full[df_full$Sex == 0, c("ED I", "ED II", "CD", "OD")] <- lv3$`0`

MANOVA

summary(
  m_sex <- manova(cbind(`ED I`, `ED II`, CD, OD) ~ as.factor(Sex), df_full),
  test = "Pillai"
)
                 Df   Pillai approx F num Df den Df    Pr(>F)    
as.factor(Sex)    1 0.037391   19.907      4   2050 4.272e-16 ***
Residuals      2053                                              
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Effect size

effectsize::eta_squared(m_sex, alternative = "two.sided")
# Effect Size for ANOVA (Type I)

Parameter      | Eta2 (partial) |       95% CI
----------------------------------------------
as.factor(Sex) |           0.04 | [0.02, 0.05]

Posthoc

summary.aov(m_sex)
 Response ED I :
                 Df Sum Sq Mean Sq F value Pr(>F)
as.factor(Sex)    1   1.02 1.02004  2.3162 0.1282
Residuals      2053 904.11 0.44038               

 Response ED II :
                 Df Sum Sq Mean Sq F value    Pr(>F)    
as.factor(Sex)    1  10.36 10.3571  44.023 4.139e-11 ***
Residuals      2053 483.00  0.2353                      
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

 Response CD :
                 Df Sum Sq Mean Sq F value    Pr(>F)    
as.factor(Sex)    1   31.9  31.935  17.016 3.855e-05 ***
Residuals      2053 3853.0   1.877                      
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

 Response OD :
                 Df  Sum Sq Mean Sq F value Pr(>F)
as.factor(Sex)    1    0.77 0.76616  1.0759 0.2997
Residuals      2053 1461.99 0.71212               

Effect sizes

effectsize::eta_squared(aov(`ED II` ~ Sex, df_full), alternative = "two.sided")
For one-way between subjects designs, partial eta squared is equivalent
  to eta squared. Returning eta squared.
# Effect Size for ANOVA

Parameter | Eta2 |       95% CI
-------------------------------
Sex       | 0.02 | [0.01, 0.03]
effectsize::eta_squared(aov(`CD` ~ Sex, df_full), alternative = "two.sided")
For one-way between subjects designs, partial eta squared is equivalent
  to eta squared. Returning eta squared.
# Effect Size for ANOVA

Parameter |     Eta2 |       95% CI
-----------------------------------
Sex       | 8.22e-03 | [0.00, 0.02]

Save overview table

scales_by_sex <- df_full %>%
  mutate(Sex = recode(Sex, `0` = "Female", `1` = "Male")) %>%
  group_by(Sex) %>%
  reframe(
    across(c("ED I", "ED II", "CD", "OD"), mean_ci)
  )
# Save results 
write.csv(scales_by_sex, "./scale_means_by_sex.csv")

Plots

Corplot

c_mat <- df_full %>%
  select(c(Age, Sex, `ED I`, `ED II`, CD, OD, Connectedness)) %>%
  cor(use = "pairwise.complete.obs")

p_mat <- df_full %>%
  select(c(Age, Sex, `ED I`, `ED II`, CD, OD, Connectedness)) %>%
  cor.mtest(conf.level = 0.95)

png(file = "./corplot.tiff",
    units = "in",
    width = 5,
    height = 5,
    res=300
    )

corrplot(
  c_mat,
  p.mat = p_mat$p,
  method = 'circle',
  type = 'lower',
  col = grey_white(200),
  tl.col = "black",
  cl.pos = FALSE,
  insig = 'blank',
  addCoef.col = 'black',
  number.cex = 0.8,
  diag = FALSE,
  tl.srt = 45,
  sig.level = .05
)
dev.off()

Model plot

plot_scales <- list(
  `ED I` = unlist(lapply(ev1, rename_paper), use.names = FALSE),
  `ED II` = unlist(lapply(ev2, rename_paper), use.names = FALSE),
  CD = unlist(lapply(kv, rename_paper), use.names = FALSE),
  OD = unlist(lapply(mv1, rename_paper), use.names = FALSE)
)
plot_model <- cfa_from_list(plot_scales)
plot_fit <- invisible(wlmsv_cfa(plot_model, df_plot))

semPlot::semPaths(
  plot_fit,
  "par",
  layout = "tree2",
  edge.color = "black",
  whatLabels = "hide",
  sizeLat = 4,
  sizeMan = 2,
  residuals = FALSE,
  measurementLayout = TRUE,
  filetype = "png",
  width = 16,
  height = 9,
  dpi = 300
)