Difference between revisions of "SMHS BigDataBigSci SEM Ex2"
Line 1: | Line 1: | ||
==[[SMHS_BigDataBigSci_SEM| Structural Equation Modeling (SEM)]] - Hands-on Example 2 (Parkinson’s Disease data) == | ==[[SMHS_BigDataBigSci_SEM| Structural Equation Modeling (SEM)]] - Hands-on Example 2 (Parkinson’s Disease data) == | ||
+ | # Data: PPMI Integrated (imaging, demographics, genetics, clinical and cognitive (UPDRS) data. | ||
+ | # Dinov et al., 2015 | ||
+ | <mark>INSERT CHART HERE</mark> | ||
+ | # install.packages("lavaan") | ||
+ | library(lavaan) | ||
+ | #load data 05_PPMI_top_UPDRS_Integrated_LongFormat1.csv ( dim(myData) 1764 31 ) | ||
+ | # setwd("/dir/") | ||
+ | myData <- read.csv("https://umich.instructure.com/files/330397/download?download_frd=1&verifier=3bYRT9FXgBGMCQv8MNxsclWnMgodiJRYo3ODFtDq",header=TRUE) | ||
+ | |||
+ | # dichotomize the "ResearchGroup" variable | ||
+ | myData$\$$ResearchGroup <- ifelse(myData$\$$ResearchGroup == "Control", 1, 0) | ||
+ | |||
+ | # Data elements: Index FID_IID L_cingulate_gyrus_ComputeArea L_cingulate_gyrus_Volume | ||
+ | R_cingulate_gyrus_ComputeArea R_cingulate_gyrus_Volume L_caudate_ComputeArea | ||
+ | L_caudate_Volume R_caudate_ComputeArea R_caudate_Volume | ||
+ | L_putamen_ComputeArea L_putamen_Volume R_putamen_ComputeArea | ||
+ | R_putamen_Volume L_hippocampus_ComputeArea L_hippocampus_Volume R_hippocampus_ComputeArea | ||
+ | R_hippocampus_Volume cerebellum_ComputeArea | ||
+ | cerebellum_Volume L_fusiform_gyrus_ComputeArea L_fusiform_gyrus_Volume R_fusiform_gyrus_ComputeArea | ||
+ | R_fusiform_gyrus_Volume Sex Weight ResearchGroup Age chr12_rs34637584_GT chr17_rs11868035_GT chr17_rs11012_GT chr17_rs393152_GT | ||
+ | chr17_rs12185268_GT UPDRS_Part_I_Summary_Score_Baseline | ||
+ | UPDRS_Part_I_Summary_Score_Month_03 UPDRS_Part_I_Summary_Score_Month_06 UPDRS_Part_I_Summary_Score_Month_09 UPDRS_Part_I_Summary_Score_Month_12 UPDRS_Part_I_Summary_Score_Month_18 UPDRS_Part_I_Summary_Score_Month_24 UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Baseline UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Month_03 UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Month_06 UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Month_09 UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Month_12 UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Month_18 UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Month_24 UPDRS_Part_III_Summary_Score_Baseline UPDRS_Part_III_Summary_Score_Month_03 UPDRS_Part_III_Summary_Score_Month_06 UPDRS_Part_III_Summary_Score_Month_09 UPDRS_Part_III_Summary_Score_Month_12 UPDRS_Part_III_Summary_Score_Month_18 UPDRS_Part_III_Summary_Score_Month_24 X_Assessment_Non.Motor_Epworth_Sleepiness_Scale_Summary_Score_Baseline X_Assessment_Non.Motor_Epworth_Sleepiness_Scale_Summary_Score_Month_06 X_Assessment_Non.Motor_Epworth_Sleepiness_Scale_Summary_Score_Month_12 X_Assessment_Non.Motor_Epworth_Sleepiness_Scale_Summary_Score_Month_24 X_Assessment_Non.Motor_Geriatric_Depression_Scale_GDS_Short_Summary_Score_Baseline X_Assessment_Non.Motor_Geriatric_Depression_Scale_GDS_Short_Summary_Score_Month_06 X_Assessment_Non.Motor_Geriatric_Depression_Scale_GDS_Short_Summary_Score_Month_12 X_Assessment_Non.Motor_Geriatric_Depression_Scale_GDS_Short_Summary_Score_Month_24 | ||
+ | |||
+ | ====Validation of the measurement model==== | ||
+ | |||
+ | myData<-within(myData, { | ||
+ | L_cingulate_gyrus_ComputeArea <- lm(L_cingulate_gyrus_ComputeArea ~ L_cingulate_gyrus_Volume+R_cingulate_gyrus_ComputeArea+R_cingulate_gyrus_Volume+L_caudate_ComputeArea+L_caudate_Volume+R_caudate_ComputeArea+R_caudate_Volume+L_putamen_ComputeArea+L_putamen_Volume+R_putamen_ComputeArea+R_putamen_Volum e+L_hippocampus_ComputeArea+L_hippocampus_Volume+R_hippocampus_ComputeArea+R_hippocampus_Volume+cerebellum_ComputeArea+cerebellum_Volume+L_fusiform_gyrus_ComputeArea+L_fusiform_gyrus_Volume+R_fusiform_gyrus_ComputeArea+R_fusiform_gyru s_Volume, data=myData)$\$$residuals | ||
+ | Weight <- lm(Weight ~ Sex+ResearchGroup+Age+chr12_rs34637584_GT+chr17_rs11868035_GT+chr17_rs11012_GT+chr17_rs393152_GT+chr17_rs12185268_GT, data=myData)$\$$residuals | ||
+ | UPDRS_Part_I_Summary_Score_Baseline <- lm(UPDRS_Part_I_Summary_Score_Baseline ~ UPDRS_Part_I_Summary_Score_Month_03+UPDRS_Part_I_Summary_Score_Month_06+UPDRS_Part_I_Summary_Score_Month_09+UPDRS_Part_I_Summary_Score_Month_12+UPDRS_Part_I_Summary_Score_Month_18+UPDRS_Part_I_Summary_Score_Month_24+UPDRS_Part_II_Pati ent_Questionnaire_Summary_Score_Baseline+UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Month_03+UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Month_06+UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Month_09+UPDRS_Part_II_Pa tient_Questionnaire_Summary_Score_Month_12+UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Month_18+UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Month_24+UPDRS_Part_III_Summary_Score_Baseline+UPDRS_Part_III_Summary_Score_Month_ 03+UPDRS_Part_III_Summary_Score_Month_06+UPDRS_Part_III_Summary_Score_Month_09+UPDRS_Part_III_Summary_Score_Month_12+UPDRS_Part_III_Summary_Score_Month_18+UPDRS_Part_III_Summary_Score_Month_24+X_Assessment_Non.Motor_Epworth_Sleepiness _Scale_Summary_Score_Baseline+X_Assessment_Non.Motor_Epworth_Sleepiness_Scale_Summary_Score_Month_06+X_Assessment_Non.Motor_Epworth_Sleepiness_Scale_Summary_Score_Month_12+X_Assessment_Non.Motor_Epworth_Sleepiness_Scale_Summary_Score_ Month_24+X_Assessment_Non.Motor_Geriatric_Depression_Scale_GDS_Short_Summary_Score_Baseline+X_Assessment_Non.Motor_Geriatric_Depression_Scale_GDS_Short_Summary_Score_Month_06+X_Assessment_Non.Motor_Geriatric_Depression_Scale_GDS_Short _Summary_Score_Month_12+X_Assessment_Non.Motor_Geriatric_Depression_Scale_GDS_Short_Summary_Score_Month_24, data=myData)$\$$residuals | ||
+ | }) | ||
+ | |||
+ | ====Structural Model==== | ||
+ | |||
+ | # Next, proceed with the structural model including the residuals from data to account for effects of site. | ||
+ | |||
+ | Lavaan model specification: | ||
+ | |||
+ | formula type operator mnemonic | ||
+ | latent variable definition =~ is measured by | ||
+ | regression ~ is regressed on | ||
+ | (residual) (co)variance ~~ is correlated with | ||
+ | Intercept ~ 1 Intercept | ||
+ | |||
+ | For example, | ||
+ | myModel <- | ||
+ | <b># regressions</b> | ||
+ | y1 + y2 <mark>~</mark> f1 + f2 + x1 + x2 | ||
+ | f1 ~ f2 + f3 | ||
+ | f2 ~ f3 + x1 + x2 | ||
+ | |||
+ | <b># latent variable definitions</b> | ||
+ | f1 <mark>=~</mark> y1 + y2 + y3 | ||
+ | f2 =~ y4 + y5 + y6 | ||
+ | f3 =~ y7 + y8 + y9 + y10 | ||
+ | |||
+ | <b># variances and covariances</b> | ||
+ | y1 <mark>~~</mark> y1 | ||
+ | y1 ~~ y2 | ||
+ | f1 ~~ f2 | ||
+ | |||
+ | <b># intercepts</b> | ||
+ | y1 <mark>~</mark> 1 | ||
+ | f1 ~ 1 | ||
+ | model1 <- | ||
+ | ' | ||
+ | # latent variable definitions - defining how the latent variables are “manifested by” a set of observed | ||
+ | # (or manifest) variables, aka “indicators” | ||
+ | # (1) Measurement Model | ||
+ | Imaging =~ L_cingulate_gyrus_ComputeArea+L_cingulate_gyrus_Volume | ||
+ | DemoGeno =~ Weight+Sex+Age | ||
+ | UPDRS =~ UPDRS_Part_I_Summary_Score_Baseline+UPDRS_Part_I_Summary_Score_Month_03 | ||
+ | |||
+ | # (2) Regressions | ||
+ | ResearchGroup ~ Imaging + DemoGeno + UPDRS | ||
+ | ' | ||
+ | model2 <- | ||
+ | ' | ||
+ | # latent variable definitions - defining how the latent variables are “manifested by” a set of observed | ||
+ | # (or manifest) variables, aka “indicators” | ||
+ | # (1) Measurement Model | ||
+ | Imaging =~ L_cingulate_gyrus_ComputeArea+L_cingulate_gyrus_Volume+R_cingulate_gyrus_ComputeArea+R_cingulate_gyrus_Volume+L_caudate_ComputeArea+L_caudate_Volume+R_caudate_ComputeArea+R_caudate_Volume+L_putamen_ComputeArea+L_putamen_Volume+R_putam en_ComputeArea+R_putamen_Volume+L_hippocampus_ComputeArea+L_hippocampus_Volume+R_hippocampus_ComputeArea+R_hippocampus_Volume+cerebellum_ComputeArea+cerebellum_Volume+L_fusiform_gyrus_ComputeArea+L_fusiform_gyrus_Volume+R_fusiform_gyr us_ComputeArea+R_fusiform_gyrus_Volume | ||
+ | DemoGeno =~ Weight+Sex+Age+chr12_rs34637584_GT+chr17_rs11868035_GT+chr17_rs11012_GT+chr17_rs393152_GT+chr17_rs12185268_GT | ||
+ | UPDRS =~ UPDRS_Part_I_Summary_Score_Baseline+UPDRS_Part_I_Summary_Score_Month_03+UPDRS_Part_I_Summary_Score_Month_06+UPDRS_Part_I_Summary_Score_Month_09+UPDRS_Part_I_Summary_Score_Month_12+UPDRS_Part_I_Summary_Score_Month_18+UPDRS_Part_I_Summa ry_Score_Month_24+UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Baseline+UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Month_03+UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Month_06+UPDRS_Part_II_Patient_Questionnaire_Sum mary_Score_Month_09+UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Month_12+UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Month_18+UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Month_24+UPDRS_Part_III_Summary_Score_Baseline +UPDRS_Part_III_Summary_Score_Month_03+UPDRS_Part_III_Summary_Score_Month_06+UPDRS_Part_III_Summary_Score_Month_09+UPDRS_Part_III_Summary_Score_Month_12+UPDRS_Part_III_Summary_Score_Month_18+UPDRS_Part_III_Summary_Score_Month_24+X_Ass essment_Non.Motor_Epworth_Sleepiness_Scale_Summary_Score_Baseline+X_Assessment_Non.Motor_Epworth_Sleepiness_Scale_Summary_Score_Month_06+X_Assessment_Non.Motor_Epworth_Sleepiness_Scale_Summary_Score_Month_12+X_Assessment_Non.Motor_Epw orth_Sleepiness_Scale_Summary_Score_Month_24+X_Assessment_Non.Motor_Geriatric_Depression_Scale_GDS_Short_Summary_Score_Baseline+X_Assessment_Non.Motor_Geriatric_Depression_Scale_GDS_Short_Summary_Score_Month_06+X_Assessment_Non.Motor_ Geriatric_Depression_Scale_GDS_Short_Summary_Score_Month_12+X_Assessment_Non.Motor_Geriatric_Depression_Scale_GDS_Short_Summary_Score_Month_24 | ||
+ | |||
+ | # (2) Regressions | ||
+ | # ResearchGroup ~ Imaging + DemoGeno + UPDRS | ||
+ | # transform cat variable to numeric: | ||
+ | # myData$\$$ResearchGroup <- ifelse(myData$\$$ResearchGroup == "Control", 0, | ||
+ | # ifelse(myData$\$$ResearchGroup == "PD", 2, 1)) | ||
+ | RG_ranked ~ Imaging + DemoGeno + UPDRS | ||
+ | |||
+ | # (3) Residual Variances | ||
+ | L_insular_cortex_ComputeArea ~~ L_insular_cortex_ComputeArea | ||
+ | L_insular_cortex_Volume ~~ L_insular_cortex_Volume | ||
+ | R_insular_cortex_ComputeArea ~~ R_insular_cortex_ComputeArea | ||
+ | R_insular_cortex_Volume ~~ R_insular_cortex_Volume | ||
+ | L_cingulate_gyrus_ComputeArea ~~ L_cingulate_gyrus_ComputeArea | ||
+ | L_cingulate_gyrus_Volume ~~ L_cingulate_gyrus_Volume | ||
+ | R_cingulate_gyrus_ComputeArea ~~ R_cingulate_gyrus_ComputeArea | ||
+ | R_cingulate_gyrus_Volume ~~ R_cingulate_gyrus_Volume | ||
+ | L_caudate_ComputeArea ~~ L_caudate_ComputeArea | ||
+ | L_caudate_Volume ~~ L_caudate_Volume | ||
+ | R_caudate_ComputeArea ~~ R_caudate_ComputeArea | ||
+ | R_caudate_Volume ~~ R_caudate_Volume | ||
+ | L_putamen_ComputeArea ~~ L_putamen_ComputeArea | ||
+ | L_putamen_Volume ~~ L_putamen_Volume | ||
+ | R_putamen_ComputeArea ~~ R_putamen_ComputeArea | ||
+ | R_putamen_Volume ~~ R_putamen_Volume | ||
+ | L_hippocampus_ComputeArea ~~ L_hippocampus_ComputeArea | ||
+ | L_hippocampus_Volume ~~ L_hippocampus_Volume | ||
+ | R_hippocampus_ComputeArea ~~ R_hippocampus_ComputeArea | ||
+ | R_hippocampus_Volume ~~ R_hippocampus_Volume | ||
+ | cerebellum_ComputeArea ~~ cerebellum_ComputeArea | ||
+ | cerebellum_Volume ~~ cerebellum_Volume | ||
+ | L_fusiform_gyrus_ComputeArea ~~ L_fusiform_gyrus_ComputeArea | ||
+ | L_fusiform_gyrus_Volume ~~ L_fusiform_gyrus_Volume | ||
+ | R_fusiform_gyrus_ComputeArea ~~ R_fusiform_gyrus_ComputeArea | ||
+ | R_fusiform_gyrus_Volume ~~ R_fusiform_gyrus_Volume | ||
+ | R_fusiform_gyrus_ShapeIndex ~~ R_fusiform_gyrus_ShapeIndex | ||
+ | R_fusiform_gyrus_Curvedness ~~ R_fusiform_gyrus_Curvedness | ||
+ | Sex ~~ Sex | ||
+ | Weight ~~ Weight | ||
+ | ResearchGroup ~~ ResearchGroup | ||
+ | VisitID ~~ VisitID | ||
+ | Age ~~ Age | ||
+ | chr12_rs34637584_GT ~~ chr12_rs34637584_GT | ||
+ | chr17_rs11868035_GT ~~ chr17_rs11868035_GT | ||
+ | chr17_rs11012_GT ~~ chr17_rs11012_GT | ||
+ | chr17_rs393152_GT ~~ chr17_rs393152_GT | ||
+ | chr17_rs12185268_GT ~~ chr17_rs12185268_GT | ||
+ | chr17_rs199533_GT ~~ chr17_rs199533_GT | ||
+ | UPDRS_Part_I_Summary_Score_Baseline ~~ UPDRS_Part_I_Summary_Score_Baseline | ||
+ | UPDRS_Part_I_Summary_Score_Month_03 ~~ UPDRS_Part_I_Summary_Score_Month_03 | ||
+ | UPDRS_Part_I_Summary_Score_Month_06 ~~ UPDRS_Part_I_Summary_Score_Month_06 | ||
+ | UPDRS_Part_I_Summary_Score_Month_09 ~~ UPDRS_Part_I_Summary_Score_Month_09 | ||
+ | UPDRS_Part_I_Summary_Score_Month_12 ~~ UPDRS_Part_I_Summary_Score_Month_12 | ||
+ | UPDRS_Part_I_Summary_Score_Month_18 ~~ UPDRS_Part_I_Summary_Score_Month_18 | ||
+ | UPDRS_Part_I_Summary_Score_Month_24 ~~ UPDRS_Part_I_Summary_Score_Month_24 | ||
+ | UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Baseline ~~ UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Baseline | ||
+ | UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Month_03 ~~ UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Month_03 | ||
+ | UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Month_06 ~~ UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Month_06 | ||
+ | UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Month_09 ~~ UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Month_09 | ||
+ | UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Month_12 ~~ UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Month_12 | ||
+ | UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Month_18 ~~ UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Month_18 | ||
+ | UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Month_24 ~~ UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Month_24 | ||
+ | UPDRS_Part_III_Summary_Score_Baseline ~~ UPDRS_Part_III_Summary_Score_Baseline | ||
+ | UPDRS_Part_III_Summary_Score_Month_03 ~~ UPDRS_Part_III_Summary_Score_Month_03 | ||
+ | UPDRS_Part_III_Summary_Score_Month_06 ~~ UPDRS_Part_III_Summary_Score_Month_06 | ||
+ | UPDRS_Part_III_Summary_Score_Month_09 ~~ UPDRS_Part_III_Summary_Score_Month_09 | ||
+ | UPDRS_Part_III_Summary_Score_Month_12 ~~ UPDRS_Part_III_Summary_Score_Month_12 | ||
+ | UPDRS_Part_III_Summary_Score_Month_18 ~~ UPDRS_Part_III_Summary_Score_Month_18 | ||
+ | UPDRS_Part_III_Summary_Score_Month_24 ~~ UPDRS_Part_III_Summary_Score_Month_24 | ||
+ | X_Assessment_Non.Motor_Epworth_Sleepiness_Scale_Summary_Score_Baseline ~~ X_Assessment_Non.Motor_Epworth_Sleepiness_Scale_Summary_Score_Baseline | ||
+ | X_Assessment_Non.Motor_Epworth_Sleepiness_Scale_Summary_Score_Month_06 ~~ X_Assessment_Non.Motor_Epworth_Sleepiness_Scale_Summary_Score_Month_06 | ||
+ | X_Assessment_Non.Motor_Epworth_Sleepiness_Scale_Summary_Score_Month_12 ~~ X_Assessment_Non.Motor_Epworth_Sleepiness_Scale_Summary_Score_Month_12 | ||
+ | X_Assessment_Non.Motor_Epworth_Sleepiness_Scale_Summary_Score_Month_24 ~~ X_Assessment_Non.Motor_Epworth_Sleepiness_Scale_Summary_Score_Month_24 | ||
+ | X_Assessment_Non.Motor_Geriatric_Depression_Scale_GDS_Short_Summary_Score_Baseline ~~ X_Assessment_Non.Motor_Geriatric_Depression_Scale_GDS_Short_Summary_Score_Baseline | ||
+ | X_Assessment_Non.Motor_Geriatric_Depression_Scale_GDS_Short_Summary_Score_Month_06 ~~ X_Assessment_Non.Motor_Geriatric_Depression_Scale_GDS_Short_Summary_Score_Month_06 | ||
+ | X_Assessment_Non.Motor_Geriatric_Depression_Scale_GDS_Short_Summary_Score_Month_12 ~~ X_Assessment_Non.Motor_Geriatric_Depression_Scale_GDS_Short_Summary_Score_Month_12 | ||
+ | X_Assessment_Non.Motor_Geriatric_Depression_Scale_GDS_Short_Summary_Score_Month_24 ~~ X_Assessment_Non.Motor_Geriatric_Depression_Scale_GDS_Short_Summary_Score_Month_24 | ||
+ | |||
+ | # (4) Residual Covariances | ||
+ | Sex ~~ Weight | ||
+ | ' | ||
+ | |||
+ | # confirmatory factor analysis (CFA) | ||
+ | # The baseline is a null model constraining the observed variables to covary with no other variables. | ||
+ | # That is, the covariances are fixed to 0 and only individual variances are estimated. This is represents | ||
+ | # a “reasonable worst-possible fitting model”, against which the new fitted model is compared | ||
+ | # to calculate appropriate model-quality indices (e.g., CFA). | ||
+ | |||
+ | # standardize all variable to avoid huge variations between variable distributions | ||
+ | library("MASS") | ||
+ | # myData <- read.csv("https://umich.instructure.com/files/330397/download?download_frd=1&verifier=3bYRT9FXgBGMCQv8MNxsclWnMgodiJRYo3ODFtDq",header=TRUE) | ||
+ | |||
+ | summary(myData) | ||
+ | myData2<-scale(myData); summary(myData2) | ||
+ | |||
+ | myDF <- data.frame(myData2) | ||
+ | # myDF3 <- subset(myDF, select=c("L_cingulate_gyrus_ComputeArea", "cerebellum_Volume", "Weight", "Sex", "Age", " UPDRS_part_I", "UPDRS_part_II", "UPDRS_part_III", "ResearchGroup")) | ||
+ | |||
+ | myDF3 <- subset(myDF, select=c("R_insular_cortex_ComputeArea", "R_insular_cortex_Volume", "Sex", "Weight", "ResearchGroup", "Age", "chr12_rs34637584_GT", "chr17_rs11868035_GT", "chr17_rs11012_GT")) | ||
+ | |||
+ | model3 <- | ||
+ | ' | ||
+ | # latent variable definitions - defining how the latent variables are “manifested by” a set of observed | ||
+ | # (or manifest) variables, aka “indicators” | ||
+ | # (1) Measurement Model | ||
+ | # Imaging =~ L_cingulate_gyrus_ComputeArea + cerebellum_Volume | ||
+ | Imaging =~ R_insular_cortex_ComputeArea + R_insular_cortex_Volume | ||
+ | DemoGeno =~ Weight+Sex+Age | ||
+ | # UPDRS =~ UPDRS_Part_I_Summary_Score_Baseline+X_Assessment_Non.Motor_Geriatric_Depression_Scale_GDS_Short_Summary_Score_Baseline | ||
+ | UPDRS =~ UPDRS_part_I +UPDRS_part_II + UPDRS_part_III | ||
+ | # (2) Regressions | ||
+ | ResearchGroup ~ Imaging + DemoGeno + UPDRS | ||
+ | |||
+ | fit3 <- cfa(model3, data= myData2, missing='FIML') # deal with missing values (missing='FIML') | ||
+ | summary(fit3, fit.measures=TRUE) | ||
+ | lavaan (0.5-18) converged normally after 2044 iterations | ||
+ | Number of observations 1764 | ||
+ | Number of missing patterns 3 | ||
+ | Estimator ML | ||
+ | Minimum Function Test Statistic 455.923 | ||
+ | Degrees of freedom 15 | ||
+ | P-value (Chi-square) 0.000 | ||
+ | Model test baseline model: | ||
+ | Minimum Function Test Statistic 2625.020 | ||
+ | Degrees of freedom 28 | ||
+ | P-value 0.000 | ||
+ | User model versus baseline model: | ||
+ | Comparative Fit Index (CFI) 0.830 | ||
+ | Tucker-Lewis Index (TLI) 0.683 | ||
+ | Loglikelihood and Information Criteria: | ||
+ | Loglikelihood user model (H0) -51499.484 | ||
+ | Loglikelihood unrestricted model (H1) -51271.522 | ||
+ | Number of free parameters 29 | ||
+ | Akaike (AIC) 103056.967 | ||
+ | Bayesian (BIC) 103215.752 | ||
+ | Sample-size adjusted Bayesian (BIC) 103123.621 | ||
+ | Root Mean Square Error of Approximation: | ||
+ | RMSEA 0.129 | ||
+ | 90 Percent Confidence Interval 0.119 0.139 | ||
+ | P-value RMSEA <= 0.05 0.000 | ||
+ | Standardized Root Mean Square Residual: | ||
+ | SRMR 0.062 | ||
+ | Parameter estimates: | ||
+ | Information Observed | ||
+ | Standard Errors Standard | ||
+ | <blockquote>Estimate Std.err Z-value P(>|z|)</blockquote> | ||
+ | Latent variables: | ||
+ | Imaging =~ | ||
+ | R_cnglt_gyr_V 1.000 | ||
+ | L_cadt_CmptAr 493.058 | ||
+ | DemoGeno =~ | ||
+ | Weight 1.000 | ||
+ | Sex 24.158 | ||
+ | Age 0.094 | ||
+ | UPDRS =~ | ||
+ | UPDRS_part_I 1.000 | ||
+ | UPDRS_part_II 7.389 | ||
+ | Regressions: | ||
+ | ResearchGroup ~ | ||
+ | Imaging -0.000 | ||
+ | DemoGeno 0.002 | ||
+ | UPDRS -0.323 | ||
+ | Covariances: | ||
+ | Imaging ~~ | ||
+ | DemoGeno 0.001 | ||
+ | UPDRS 0.002 | ||
+ | DemoGeno ~~ | ||
+ | UPDRS 0.000 | ||
+ | Intercepts: | ||
+ | R_cnglt_gyr_V 7895.658 | ||
+ | L_cadt_CmptAr 635.570 | ||
+ | Weight 82.048 | ||
+ | Sex 1.340 | ||
+ | Age 61.073 | ||
+ | UPDRS_part_I 1.126 | ||
+ | UPDRS_part_II 4.905 | ||
+ | ResearchGroup 0.290 | ||
+ | Imaging 0.000 | ||
+ | DemoGeno 0.000 | ||
+ | UPDRS 0.000 | ||
+ | Variances: | ||
+ | R_cnglt_gyr_V 17070159.189 | ||
+ | L_cadt_CmptAr -536243845.090 | ||
+ | Weight 274.912 | ||
+ | Sex 96.664 | ||
+ | Age 105.347 | ||
+ | UPDRS_part_I 2.442 | ||
+ | UPDRS_part_II -0.256 | ||
+ | ResearchGroup 0.149 | ||
+ | Imaging 2206.397 | ||
+ | DemoGeno -0.165 | ||
+ | UPDRS 0.550 | ||
+ | ' | ||
+ | |||
+ | ====Output==== | ||
+ | 3 parts of the Lavaan SEM output | ||
+ | * First six lines are called the header contains the following information: | ||
+ | * lavaan version number | ||
+ | * lavaan converge info (normal or not), and # iterations needed | ||
+ | * the number of observations that were effectively used in the analysis | ||
+ | * the estimator that was used to obtain the parameter values (here: ML) | ||
+ | * the model test statistic, the degrees of freedom, and a corresponding p-value | ||
+ | |||
+ | # Next, is the Model test baseline model and the value for the SRMR | ||
+ | # The last section contains the parameter estimates, standard errors (if the information matrix is expected or observed, and if the standard errors are standard, robust, or based on the bootstrap). Then, it tabulates all free (and fixed) parameters that were included in the model. Typically, first the latent variables are shown, followed by covariances and (residual) variances. The first column (Estimate) contains the (estimated or fixed) parameter value for each model parameter; the second column (Std.err) contains the standard error for each estimated parameter; the third column (Z-value) contains the Wald statistic (which is simply obtained by dividing the parameter value by its standard error), and the last column contains the p-value for testing the null hypothesis that the parameter equals zero in the population. | ||
+ | |||
+ | <b>Note:</b> You can get this type of error <b>“…system is computationally singular: reciprocal condition…”,</b> which indicates that the design matrix is not invertible. Thus, it can't be used to develop a regression model. This is due to linearly dependent columns, i.e. strongly correlated variables. Resolve pairwise covariances (or correlations) of your variables to investigate if there are any variables that can potentially be removed. You're looking for covariances (or correlations) >> 0. We can also automate this variable selection by using a forward stepwise regression. | ||
+ | |||
+ | |||
+ | # Graphical fit model visualization | ||
+ | library(semPlot) | ||
+ | semPaths(fit3) | ||
+ | |||
+ | [[Image:SMHS_BigDataBigSci4.png|500px]] | ||
+ | |||
+ | semPaths(fit3, "std", ask = FALSE, as.expression = "edges", mar = c(3, 1, 5, 1)) | ||
+ | |||
+ | [[Image:SMHS_BigDataBigSci5.png|500px]] | ||
+ | |||
+ | ===sem() vs. cfa()=== | ||
+ | The function <b>sem()</b> is very similar to the function <b>cfa()</b>. As we did not include <b>fit.measures=TRUE</b>, the report only includes the basic chi-square test statistic. The argument <b>standardized=TRUE</b>, reports standardized parameter values. Two extra columns of standardized parameter values are printed. In the first column (labeled, Std.lv), only the latent variables are standardized. In the second column (labeled Std.all), both latent and observed variables are standardized. The latter is often called the `completely standardized solution'. | ||
+ | |||
+ | # remove all objects from the R console (current workspace) | ||
+ | rm(list = ls()) | ||
+ | |||
+ | # library("lavaan") | ||
+ | |||
+ | #load data <b>05_PPMI_top_UPDRS_Integrated_LongFormat1.csv ( dim(myData) 1764 31 ), long format</b> | ||
+ | # myData <- read.csv("https://umich.instructure.com/files/330397/download?download_frd=1&verifier=3bYRT9FXgBGMCQv8MNxsclWnMgodiJRYo3ODFtDq",header=TRUE) | ||
+ | # dichotomize the "ResearchGroup" variable | ||
+ | |||
+ | myData$\$$ResearchGroup <- ifelse(myData$\$$ResearchGroup == "Control", 1, 0) | ||
+ | |||
+ | # library("MASS") | ||
+ | myData2<-scale(myData); head(myData2) | ||
+ | myData2[, 20] <- myData$\$$ResearchGroup; # replace the ResearchGroup by orig binary labels (do not rescale) | ||
+ | attach(myData2) | ||
+ | head(myData2) | ||
+ | |||
+ | # model3 <- | ||
+ | ' | ||
+ | # latent variable definitions - defining how the latent variables are “manifested by” a set of observed | ||
+ | # (or manifest) variables, aka “indicators” | ||
+ | # (1) Measurement Model | ||
+ | # Imaging =~ L_cingulate_gyrus_ComputeArea + cerebellum_Volume | ||
+ | Imaging =~ R_insular_cortex_ComputeArea + R_insular_cortex_Volume | ||
+ | DemoGeno =~ Weight+Sex+Age | ||
+ | # UPDRS =~ UPDRS_Part_I_Summary_Score_Baseline+X_Assessment_Non.Motor_Geriatric_Depression_Scale_GDS_Short_Summary_Score_Baseline | ||
+ | UPDRS =~ UPDRS_part_I +UPDRS_part_II + UPDRS_part_III | ||
+ | # (2) Regressions | ||
+ | ResearchGroup ~ Imaging + DemoGeno + UPDRS | ||
+ | ' | ||
+ | |||
+ | Here is why we needed to scale the data first before we fit the SEM model: | ||
+ | |||
+ | # using raw data provides unreliable estimates | ||
+ | <font color="red"># some observed variances are (at least) a factor 1000 times larger than others;</font> | ||
+ | fit3 <- sem(model3, data=<b>myData</b>, estimator="MLM") | ||
+ | summary(fit3) | ||
+ | |||
+ | # using scaled data provides stable estimates | ||
+ | fit3 <- sem(model3, data=myData2, estimator="MLM") | ||
+ | summary(fit3) | ||
+ | |||
+ | |||
+ | # report the standardized coefficients of the model | ||
+ | standardizedSolution(fit3) | ||
+ | |||
+ | # variation explained by model components (The R-square value for all endogenous variables) | ||
+ | inspect(fit3, "r2") | ||
+ | |||
+ | Note that all variances are supposed to be positive, however, occasionally, model estimates may generate a residual variance that is negative. This may happen due to random sampling error where a very small true value may sometimes produce a negative estimate, or it can occur when the model is unstable. | ||
+ | |||
+ | # Inspect the fitted values variance-covariance matrix: | ||
+ | fitted(fit3)$\$$cov | ||
+ | |||
+ | # | ||
+ | Model fitting | ||
+ | <b>Name Command</b> | ||
+ | fit CFA to data cfa(model, data=Data) | ||
+ | fit SEM to data sem(model, data=Data) | ||
+ | standardized solution sem(model, data=Data, std.ov=TRUE) | ||
+ | orthogonal factors cfa(model, data=Data, orthogonal=TRUE) | ||
+ | Matrices | ||
+ | <b>Name Command</b> | ||
+ | Factor covariance matrix inspect(fit, "coefficients")$\$$psi | ||
+ | Fitted covariance matrix fitted(fit)$\$$cov | ||
+ | Observed covariance matrix inspect(fit, 'sampstat')$\$$cov | ||
+ | Residual covariance matrix resid(fit)$\$$cov | ||
+ | Factor correlation matrix cov2cor(inspect(fit, "coefficients")$\$$psi) or use covariance command with standardised solution e.g., cfa(..., std.ov=TRUE) | ||
+ | Fit Measures | ||
+ | <b>Name Command</b> | ||
+ | Fit measures: fitMeasures(fit) | ||
+ | Specific fit measures e.g.: fitMeasures(fit)[c('chisq', 'df', 'pvalue', 'cfi', 'rmsea', 'srmr')] | ||
+ | Parameters | ||
+ | <b>Name Command</b> | ||
+ | Parameter information parTable(fit) | ||
+ | Standardised estimates standardizedSolution(fit) or summary(fit, standardized=TRUE) | ||
+ | R-squared | inspect(fit, 'r2') | ||
+ | Compare models | ||
+ | <b>Name Command</b> | ||
+ | Compare fit measures cbind(m1=inspect(m1_fit, 'fit.measures'), m2=inspect(m2_fit, 'fit.measures')) | ||
+ | Chi-square difference test anova(m1_fit, m2_fit) | ||
+ | Model improvement | ||
+ | <b>Name Command</b> | ||
+ | Modification indices mod_ind <- modificationindices(fit) | ||
+ | 10 greatest head(mod_ind[order(mod_ind$\$$mi, decreasing=TRUE), ], 10) | ||
+ | mi > 5 subset(mod_ind[order(mod_ind$\$$mi, decreasing=TRUE), ], mi > 5) | ||
+ | |||
+ | # to account for groupings (gender) | ||
+ | # fit3 <- sem(model3, data=myData, group="Sex", estimator="MLM") | ||
+ | |||
+ | # install.packages("semPlot") | ||
+ | library(semPlot) | ||
+ | |||
+ | # Plot standardized model (numerical): | ||
+ | # semPaths(fit3, what = "est", layout = "tree", title = TRUE, style = "LISREL") | ||
+ | semPaths(fit3) | ||
+ | |||
+ | [[Image:SMHS_BigDataBigSci7.png|500px]] | ||
==See also== | ==See also== | ||
* [[SMHS_BigDataBigSci_SEM| Back to Structural Equation Modeling (SEM)]] | * [[SMHS_BigDataBigSci_SEM| Back to Structural Equation Modeling (SEM)]] | ||
− | * [[ | + | * [[SMHS_BigDataBigSci_SEM_Ex1| Back to SEM Example 1: School Kids Mental Abilities]] |
Revision as of 11:53, 5 May 2016
Contents
Structural Equation Modeling (SEM) - Hands-on Example 2 (Parkinson’s Disease data)
# Data: PPMI Integrated (imaging, demographics, genetics, clinical and cognitive (UPDRS) data. # Dinov et al., 2015
INSERT CHART HERE
# install.packages("lavaan") library(lavaan) #load data 05_PPMI_top_UPDRS_Integrated_LongFormat1.csv ( dim(myData) 1764 31 ) # setwd("/dir/") myData <- read.csv("https://umich.instructure.com/files/330397/download?download_frd=1&verifier=3bYRT9FXgBGMCQv8MNxsclWnMgodiJRYo3ODFtDq",header=TRUE)
# dichotomize the "ResearchGroup" variable myData$\$$ResearchGroup <- ifelse(myData$\$$ResearchGroup == "Control", 1, 0)
# Data elements: Index FID_IID L_cingulate_gyrus_ComputeArea L_cingulate_gyrus_Volume R_cingulate_gyrus_ComputeArea R_cingulate_gyrus_Volume L_caudate_ComputeArea L_caudate_Volume R_caudate_ComputeArea R_caudate_Volume L_putamen_ComputeArea L_putamen_Volume R_putamen_ComputeArea R_putamen_Volume L_hippocampus_ComputeArea L_hippocampus_Volume R_hippocampus_ComputeArea R_hippocampus_Volume cerebellum_ComputeArea cerebellum_Volume L_fusiform_gyrus_ComputeArea L_fusiform_gyrus_Volume R_fusiform_gyrus_ComputeArea R_fusiform_gyrus_Volume Sex Weight ResearchGroup Age chr12_rs34637584_GT chr17_rs11868035_GT chr17_rs11012_GT chr17_rs393152_GT chr17_rs12185268_GT UPDRS_Part_I_Summary_Score_Baseline UPDRS_Part_I_Summary_Score_Month_03 UPDRS_Part_I_Summary_Score_Month_06 UPDRS_Part_I_Summary_Score_Month_09 UPDRS_Part_I_Summary_Score_Month_12 UPDRS_Part_I_Summary_Score_Month_18 UPDRS_Part_I_Summary_Score_Month_24 UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Baseline UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Month_03 UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Month_06 UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Month_09 UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Month_12 UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Month_18 UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Month_24 UPDRS_Part_III_Summary_Score_Baseline UPDRS_Part_III_Summary_Score_Month_03 UPDRS_Part_III_Summary_Score_Month_06 UPDRS_Part_III_Summary_Score_Month_09 UPDRS_Part_III_Summary_Score_Month_12 UPDRS_Part_III_Summary_Score_Month_18 UPDRS_Part_III_Summary_Score_Month_24 X_Assessment_Non.Motor_Epworth_Sleepiness_Scale_Summary_Score_Baseline X_Assessment_Non.Motor_Epworth_Sleepiness_Scale_Summary_Score_Month_06 X_Assessment_Non.Motor_Epworth_Sleepiness_Scale_Summary_Score_Month_12 X_Assessment_Non.Motor_Epworth_Sleepiness_Scale_Summary_Score_Month_24 X_Assessment_Non.Motor_Geriatric_Depression_Scale_GDS_Short_Summary_Score_Baseline X_Assessment_Non.Motor_Geriatric_Depression_Scale_GDS_Short_Summary_Score_Month_06 X_Assessment_Non.Motor_Geriatric_Depression_Scale_GDS_Short_Summary_Score_Month_12 X_Assessment_Non.Motor_Geriatric_Depression_Scale_GDS_Short_Summary_Score_Month_24
Validation of the measurement model
myData<-within(myData, { L_cingulate_gyrus_ComputeArea <- lm(L_cingulate_gyrus_ComputeArea ~ L_cingulate_gyrus_Volume+R_cingulate_gyrus_ComputeArea+R_cingulate_gyrus_Volume+L_caudate_ComputeArea+L_caudate_Volume+R_caudate_ComputeArea+R_caudate_Volume+L_putamen_ComputeArea+L_putamen_Volume+R_putamen_ComputeArea+R_putamen_Volum e+L_hippocampus_ComputeArea+L_hippocampus_Volume+R_hippocampus_ComputeArea+R_hippocampus_Volume+cerebellum_ComputeArea+cerebellum_Volume+L_fusiform_gyrus_ComputeArea+L_fusiform_gyrus_Volume+R_fusiform_gyrus_ComputeArea+R_fusiform_gyru s_Volume, data=myData)$\$$residuals Weight <- lm(Weight ~ Sex+ResearchGroup+Age+chr12_rs34637584_GT+chr17_rs11868035_GT+chr17_rs11012_GT+chr17_rs393152_GT+chr17_rs12185268_GT, data=myData)$\$$residuals UPDRS_Part_I_Summary_Score_Baseline <- lm(UPDRS_Part_I_Summary_Score_Baseline ~ UPDRS_Part_I_Summary_Score_Month_03+UPDRS_Part_I_Summary_Score_Month_06+UPDRS_Part_I_Summary_Score_Month_09+UPDRS_Part_I_Summary_Score_Month_12+UPDRS_Part_I_Summary_Score_Month_18+UPDRS_Part_I_Summary_Score_Month_24+UPDRS_Part_II_Pati ent_Questionnaire_Summary_Score_Baseline+UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Month_03+UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Month_06+UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Month_09+UPDRS_Part_II_Pa tient_Questionnaire_Summary_Score_Month_12+UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Month_18+UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Month_24+UPDRS_Part_III_Summary_Score_Baseline+UPDRS_Part_III_Summary_Score_Month_ 03+UPDRS_Part_III_Summary_Score_Month_06+UPDRS_Part_III_Summary_Score_Month_09+UPDRS_Part_III_Summary_Score_Month_12+UPDRS_Part_III_Summary_Score_Month_18+UPDRS_Part_III_Summary_Score_Month_24+X_Assessment_Non.Motor_Epworth_Sleepiness _Scale_Summary_Score_Baseline+X_Assessment_Non.Motor_Epworth_Sleepiness_Scale_Summary_Score_Month_06+X_Assessment_Non.Motor_Epworth_Sleepiness_Scale_Summary_Score_Month_12+X_Assessment_Non.Motor_Epworth_Sleepiness_Scale_Summary_Score_ Month_24+X_Assessment_Non.Motor_Geriatric_Depression_Scale_GDS_Short_Summary_Score_Baseline+X_Assessment_Non.Motor_Geriatric_Depression_Scale_GDS_Short_Summary_Score_Month_06+X_Assessment_Non.Motor_Geriatric_Depression_Scale_GDS_Short _Summary_Score_Month_12+X_Assessment_Non.Motor_Geriatric_Depression_Scale_GDS_Short_Summary_Score_Month_24, data=myData)$\$$residuals }) ===='"`UNIQ--h-2--QINU`"'Structural Model==== # Next, proceed with the structural model including the residuals from data to account for effects of site. Lavaan model specification: formula type operator mnemonic latent variable definition =~ is measured by regression ~ is regressed on (residual) (co)variance ~~ is correlated with Intercept ~ 1 Intercept For example, myModel <- <b># regressions</b> y1 + y2 <mark>~</mark> f1 + f2 + x1 + x2 f1 ~ f2 + f3 f2 ~ f3 + x1 + x2 <b># latent variable definitions</b> f1 <mark>=~</mark> y1 + y2 + y3 f2 =~ y4 + y5 + y6 f3 =~ y7 + y8 + y9 + y10 <b># variances and covariances</b> y1 <mark>~~</mark> y1 y1 ~~ y2 f1 ~~ f2 <b># intercepts</b> y1 <mark>~</mark> 1 f1 ~ 1 model1 <- ' # latent variable definitions - defining how the latent variables are “manifested by” a set of observed # (or manifest) variables, aka “indicators” # (1) Measurement Model Imaging =~ L_cingulate_gyrus_ComputeArea+L_cingulate_gyrus_Volume DemoGeno =~ Weight+Sex+Age UPDRS =~ UPDRS_Part_I_Summary_Score_Baseline+UPDRS_Part_I_Summary_Score_Month_03 # (2) Regressions ResearchGroup ~ Imaging + DemoGeno + UPDRS ' model2 <- ' # latent variable definitions - defining how the latent variables are “manifested by” a set of observed # (or manifest) variables, aka “indicators” # (1) Measurement Model Imaging =~ L_cingulate_gyrus_ComputeArea+L_cingulate_gyrus_Volume+R_cingulate_gyrus_ComputeArea+R_cingulate_gyrus_Volume+L_caudate_ComputeArea+L_caudate_Volume+R_caudate_ComputeArea+R_caudate_Volume+L_putamen_ComputeArea+L_putamen_Volume+R_putam en_ComputeArea+R_putamen_Volume+L_hippocampus_ComputeArea+L_hippocampus_Volume+R_hippocampus_ComputeArea+R_hippocampus_Volume+cerebellum_ComputeArea+cerebellum_Volume+L_fusiform_gyrus_ComputeArea+L_fusiform_gyrus_Volume+R_fusiform_gyr us_ComputeArea+R_fusiform_gyrus_Volume DemoGeno =~ Weight+Sex+Age+chr12_rs34637584_GT+chr17_rs11868035_GT+chr17_rs11012_GT+chr17_rs393152_GT+chr17_rs12185268_GT UPDRS =~ UPDRS_Part_I_Summary_Score_Baseline+UPDRS_Part_I_Summary_Score_Month_03+UPDRS_Part_I_Summary_Score_Month_06+UPDRS_Part_I_Summary_Score_Month_09+UPDRS_Part_I_Summary_Score_Month_12+UPDRS_Part_I_Summary_Score_Month_18+UPDRS_Part_I_Summa ry_Score_Month_24+UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Baseline+UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Month_03+UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Month_06+UPDRS_Part_II_Patient_Questionnaire_Sum mary_Score_Month_09+UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Month_12+UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Month_18+UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Month_24+UPDRS_Part_III_Summary_Score_Baseline +UPDRS_Part_III_Summary_Score_Month_03+UPDRS_Part_III_Summary_Score_Month_06+UPDRS_Part_III_Summary_Score_Month_09+UPDRS_Part_III_Summary_Score_Month_12+UPDRS_Part_III_Summary_Score_Month_18+UPDRS_Part_III_Summary_Score_Month_24+X_Ass essment_Non.Motor_Epworth_Sleepiness_Scale_Summary_Score_Baseline+X_Assessment_Non.Motor_Epworth_Sleepiness_Scale_Summary_Score_Month_06+X_Assessment_Non.Motor_Epworth_Sleepiness_Scale_Summary_Score_Month_12+X_Assessment_Non.Motor_Epw orth_Sleepiness_Scale_Summary_Score_Month_24+X_Assessment_Non.Motor_Geriatric_Depression_Scale_GDS_Short_Summary_Score_Baseline+X_Assessment_Non.Motor_Geriatric_Depression_Scale_GDS_Short_Summary_Score_Month_06+X_Assessment_Non.Motor_ Geriatric_Depression_Scale_GDS_Short_Summary_Score_Month_12+X_Assessment_Non.Motor_Geriatric_Depression_Scale_GDS_Short_Summary_Score_Month_24 # (2) Regressions # ResearchGroup ~ Imaging + DemoGeno + UPDRS # transform cat variable to numeric: # myData$\$$ResearchGroup <- ifelse(myData$\$$ResearchGroup == "Control", 0, # ifelse(myData$\$$ResearchGroup == "PD", 2, 1)) RG_ranked ~ Imaging + DemoGeno + UPDRS
# (3) Residual Variances L_insular_cortex_ComputeArea ~~ L_insular_cortex_ComputeArea L_insular_cortex_Volume ~~ L_insular_cortex_Volume R_insular_cortex_ComputeArea ~~ R_insular_cortex_ComputeArea R_insular_cortex_Volume ~~ R_insular_cortex_Volume L_cingulate_gyrus_ComputeArea ~~ L_cingulate_gyrus_ComputeArea L_cingulate_gyrus_Volume ~~ L_cingulate_gyrus_Volume R_cingulate_gyrus_ComputeArea ~~ R_cingulate_gyrus_ComputeArea R_cingulate_gyrus_Volume ~~ R_cingulate_gyrus_Volume L_caudate_ComputeArea ~~ L_caudate_ComputeArea L_caudate_Volume ~~ L_caudate_Volume R_caudate_ComputeArea ~~ R_caudate_ComputeArea R_caudate_Volume ~~ R_caudate_Volume L_putamen_ComputeArea ~~ L_putamen_ComputeArea L_putamen_Volume ~~ L_putamen_Volume R_putamen_ComputeArea ~~ R_putamen_ComputeArea R_putamen_Volume ~~ R_putamen_Volume L_hippocampus_ComputeArea ~~ L_hippocampus_ComputeArea L_hippocampus_Volume ~~ L_hippocampus_Volume R_hippocampus_ComputeArea ~~ R_hippocampus_ComputeArea R_hippocampus_Volume ~~ R_hippocampus_Volume cerebellum_ComputeArea ~~ cerebellum_ComputeArea cerebellum_Volume ~~ cerebellum_Volume L_fusiform_gyrus_ComputeArea ~~ L_fusiform_gyrus_ComputeArea L_fusiform_gyrus_Volume ~~ L_fusiform_gyrus_Volume R_fusiform_gyrus_ComputeArea ~~ R_fusiform_gyrus_ComputeArea R_fusiform_gyrus_Volume ~~ R_fusiform_gyrus_Volume R_fusiform_gyrus_ShapeIndex ~~ R_fusiform_gyrus_ShapeIndex R_fusiform_gyrus_Curvedness ~~ R_fusiform_gyrus_Curvedness Sex ~~ Sex Weight ~~ Weight ResearchGroup ~~ ResearchGroup VisitID ~~ VisitID Age ~~ Age chr12_rs34637584_GT ~~ chr12_rs34637584_GT chr17_rs11868035_GT ~~ chr17_rs11868035_GT chr17_rs11012_GT ~~ chr17_rs11012_GT chr17_rs393152_GT ~~ chr17_rs393152_GT chr17_rs12185268_GT ~~ chr17_rs12185268_GT chr17_rs199533_GT ~~ chr17_rs199533_GT UPDRS_Part_I_Summary_Score_Baseline ~~ UPDRS_Part_I_Summary_Score_Baseline UPDRS_Part_I_Summary_Score_Month_03 ~~ UPDRS_Part_I_Summary_Score_Month_03 UPDRS_Part_I_Summary_Score_Month_06 ~~ UPDRS_Part_I_Summary_Score_Month_06 UPDRS_Part_I_Summary_Score_Month_09 ~~ UPDRS_Part_I_Summary_Score_Month_09 UPDRS_Part_I_Summary_Score_Month_12 ~~ UPDRS_Part_I_Summary_Score_Month_12 UPDRS_Part_I_Summary_Score_Month_18 ~~ UPDRS_Part_I_Summary_Score_Month_18 UPDRS_Part_I_Summary_Score_Month_24 ~~ UPDRS_Part_I_Summary_Score_Month_24 UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Baseline ~~ UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Baseline UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Month_03 ~~ UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Month_03 UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Month_06 ~~ UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Month_06 UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Month_09 ~~ UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Month_09 UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Month_12 ~~ UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Month_12 UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Month_18 ~~ UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Month_18 UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Month_24 ~~ UPDRS_Part_II_Patient_Questionnaire_Summary_Score_Month_24 UPDRS_Part_III_Summary_Score_Baseline ~~ UPDRS_Part_III_Summary_Score_Baseline UPDRS_Part_III_Summary_Score_Month_03 ~~ UPDRS_Part_III_Summary_Score_Month_03 UPDRS_Part_III_Summary_Score_Month_06 ~~ UPDRS_Part_III_Summary_Score_Month_06 UPDRS_Part_III_Summary_Score_Month_09 ~~ UPDRS_Part_III_Summary_Score_Month_09 UPDRS_Part_III_Summary_Score_Month_12 ~~ UPDRS_Part_III_Summary_Score_Month_12 UPDRS_Part_III_Summary_Score_Month_18 ~~ UPDRS_Part_III_Summary_Score_Month_18 UPDRS_Part_III_Summary_Score_Month_24 ~~ UPDRS_Part_III_Summary_Score_Month_24 X_Assessment_Non.Motor_Epworth_Sleepiness_Scale_Summary_Score_Baseline ~~ X_Assessment_Non.Motor_Epworth_Sleepiness_Scale_Summary_Score_Baseline X_Assessment_Non.Motor_Epworth_Sleepiness_Scale_Summary_Score_Month_06 ~~ X_Assessment_Non.Motor_Epworth_Sleepiness_Scale_Summary_Score_Month_06 X_Assessment_Non.Motor_Epworth_Sleepiness_Scale_Summary_Score_Month_12 ~~ X_Assessment_Non.Motor_Epworth_Sleepiness_Scale_Summary_Score_Month_12 X_Assessment_Non.Motor_Epworth_Sleepiness_Scale_Summary_Score_Month_24 ~~ X_Assessment_Non.Motor_Epworth_Sleepiness_Scale_Summary_Score_Month_24 X_Assessment_Non.Motor_Geriatric_Depression_Scale_GDS_Short_Summary_Score_Baseline ~~ X_Assessment_Non.Motor_Geriatric_Depression_Scale_GDS_Short_Summary_Score_Baseline X_Assessment_Non.Motor_Geriatric_Depression_Scale_GDS_Short_Summary_Score_Month_06 ~~ X_Assessment_Non.Motor_Geriatric_Depression_Scale_GDS_Short_Summary_Score_Month_06 X_Assessment_Non.Motor_Geriatric_Depression_Scale_GDS_Short_Summary_Score_Month_12 ~~ X_Assessment_Non.Motor_Geriatric_Depression_Scale_GDS_Short_Summary_Score_Month_12 X_Assessment_Non.Motor_Geriatric_Depression_Scale_GDS_Short_Summary_Score_Month_24 ~~ X_Assessment_Non.Motor_Geriatric_Depression_Scale_GDS_Short_Summary_Score_Month_24 # (4) Residual Covariances Sex ~~ Weight '
# confirmatory factor analysis (CFA) # The baseline is a null model constraining the observed variables to covary with no other variables. # That is, the covariances are fixed to 0 and only individual variances are estimated. This is represents # a “reasonable worst-possible fitting model”, against which the new fitted model is compared # to calculate appropriate model-quality indices (e.g., CFA).
# standardize all variable to avoid huge variations between variable distributions library("MASS") # myData <- read.csv("https://umich.instructure.com/files/330397/download?download_frd=1&verifier=3bYRT9FXgBGMCQv8MNxsclWnMgodiJRYo3ODFtDq",header=TRUE)
summary(myData) myData2<-scale(myData); summary(myData2)
myDF <- data.frame(myData2) # myDF3 <- subset(myDF, select=c("L_cingulate_gyrus_ComputeArea", "cerebellum_Volume", "Weight", "Sex", "Age", " UPDRS_part_I", "UPDRS_part_II", "UPDRS_part_III", "ResearchGroup"))
myDF3 <- subset(myDF, select=c("R_insular_cortex_ComputeArea", "R_insular_cortex_Volume", "Sex", "Weight", "ResearchGroup", "Age", "chr12_rs34637584_GT", "chr17_rs11868035_GT", "chr17_rs11012_GT"))
model3 <- ' # latent variable definitions - defining how the latent variables are “manifested by” a set of observed # (or manifest) variables, aka “indicators” # (1) Measurement Model # Imaging =~ L_cingulate_gyrus_ComputeArea + cerebellum_Volume Imaging =~ R_insular_cortex_ComputeArea + R_insular_cortex_Volume DemoGeno =~ Weight+Sex+Age # UPDRS =~ UPDRS_Part_I_Summary_Score_Baseline+X_Assessment_Non.Motor_Geriatric_Depression_Scale_GDS_Short_Summary_Score_Baseline UPDRS =~ UPDRS_part_I +UPDRS_part_II + UPDRS_part_III # (2) Regressions ResearchGroup ~ Imaging + DemoGeno + UPDRS
fit3 <- cfa(model3, data= myData2, missing='FIML') # deal with missing values (missing='FIML') summary(fit3, fit.measures=TRUE) lavaan (0.5-18) converged normally after 2044 iterations Number of observations 1764 Number of missing patterns 3 Estimator ML Minimum Function Test Statistic 455.923 Degrees of freedom 15 P-value (Chi-square) 0.000 Model test baseline model: Minimum Function Test Statistic 2625.020 Degrees of freedom 28 P-value 0.000 User model versus baseline model: Comparative Fit Index (CFI) 0.830 Tucker-Lewis Index (TLI) 0.683 Loglikelihood and Information Criteria: Loglikelihood user model (H0) -51499.484 Loglikelihood unrestricted model (H1) -51271.522 Number of free parameters 29 Akaike (AIC) 103056.967 Bayesian (BIC) 103215.752 Sample-size adjusted Bayesian (BIC) 103123.621 Root Mean Square Error of Approximation: RMSEA 0.129 90 Percent Confidence Interval 0.119 0.139 P-value RMSEA <= 0.05 0.000 Standardized Root Mean Square Residual: SRMR 0.062 Parameter estimates: Information Observed Standard Errors Standard
Estimate Std.err Z-value P(>|z|)
Latent variables: Imaging =~ R_cnglt_gyr_V 1.000 L_cadt_CmptAr 493.058 DemoGeno =~ Weight 1.000 Sex 24.158 Age 0.094 UPDRS =~ UPDRS_part_I 1.000 UPDRS_part_II 7.389 Regressions: ResearchGroup ~ Imaging -0.000 DemoGeno 0.002 UPDRS -0.323 Covariances: Imaging ~~ DemoGeno 0.001 UPDRS 0.002 DemoGeno ~~ UPDRS 0.000 Intercepts: R_cnglt_gyr_V 7895.658 L_cadt_CmptAr 635.570 Weight 82.048 Sex 1.340 Age 61.073 UPDRS_part_I 1.126 UPDRS_part_II 4.905 ResearchGroup 0.290 Imaging 0.000 DemoGeno 0.000 UPDRS 0.000 Variances: R_cnglt_gyr_V 17070159.189 L_cadt_CmptAr -536243845.090 Weight 274.912 Sex 96.664 Age 105.347 UPDRS_part_I 2.442 UPDRS_part_II -0.256 ResearchGroup 0.149 Imaging 2206.397 DemoGeno -0.165 UPDRS 0.550 '
Output
3 parts of the Lavaan SEM output
- First six lines are called the header contains the following information:
- lavaan version number
- lavaan converge info (normal or not), and # iterations needed
- the number of observations that were effectively used in the analysis
- the estimator that was used to obtain the parameter values (here: ML)
- the model test statistic, the degrees of freedom, and a corresponding p-value
- Next, is the Model test baseline model and the value for the SRMR
- The last section contains the parameter estimates, standard errors (if the information matrix is expected or observed, and if the standard errors are standard, robust, or based on the bootstrap). Then, it tabulates all free (and fixed) parameters that were included in the model. Typically, first the latent variables are shown, followed by covariances and (residual) variances. The first column (Estimate) contains the (estimated or fixed) parameter value for each model parameter; the second column (Std.err) contains the standard error for each estimated parameter; the third column (Z-value) contains the Wald statistic (which is simply obtained by dividing the parameter value by its standard error), and the last column contains the p-value for testing the null hypothesis that the parameter equals zero in the population.
Note: You can get this type of error “…system is computationally singular: reciprocal condition…”, which indicates that the design matrix is not invertible. Thus, it can't be used to develop a regression model. This is due to linearly dependent columns, i.e. strongly correlated variables. Resolve pairwise covariances (or correlations) of your variables to investigate if there are any variables that can potentially be removed. You're looking for covariances (or correlations) >> 0. We can also automate this variable selection by using a forward stepwise regression.
# Graphical fit model visualization library(semPlot) semPaths(fit3)
semPaths(fit3, "std", ask = FALSE, as.expression = "edges", mar = c(3, 1, 5, 1))
sem() vs. cfa()
The function sem() is very similar to the function cfa(). As we did not include fit.measures=TRUE, the report only includes the basic chi-square test statistic. The argument standardized=TRUE, reports standardized parameter values. Two extra columns of standardized parameter values are printed. In the first column (labeled, Std.lv), only the latent variables are standardized. In the second column (labeled Std.all), both latent and observed variables are standardized. The latter is often called the `completely standardized solution'.
# remove all objects from the R console (current workspace) rm(list = ls())
# library("lavaan")
#load data 05_PPMI_top_UPDRS_Integrated_LongFormat1.csv ( dim(myData) 1764 31 ), long format # myData <- read.csv("https://umich.instructure.com/files/330397/download?download_frd=1&verifier=3bYRT9FXgBGMCQv8MNxsclWnMgodiJRYo3ODFtDq",header=TRUE) # dichotomize the "ResearchGroup" variable
myData$\$$ResearchGroup <- ifelse(myData$\$$ResearchGroup == "Control", 1, 0)
# library("MASS") myData2<-scale(myData); head(myData2) myData2[, 20] <- myData$\$$ResearchGroup; # replace the ResearchGroup by orig binary labels (do not rescale) attach(myData2) head(myData2) # model3 <- ' # latent variable definitions - defining how the latent variables are “manifested by” a set of observed # (or manifest) variables, aka “indicators” # (1) Measurement Model # Imaging =~ L_cingulate_gyrus_ComputeArea + cerebellum_Volume Imaging =~ R_insular_cortex_ComputeArea + R_insular_cortex_Volume DemoGeno =~ Weight+Sex+Age # UPDRS =~ UPDRS_Part_I_Summary_Score_Baseline+X_Assessment_Non.Motor_Geriatric_Depression_Scale_GDS_Short_Summary_Score_Baseline UPDRS =~ UPDRS_part_I +UPDRS_part_II + UPDRS_part_III # (2) Regressions ResearchGroup ~ Imaging + DemoGeno + UPDRS ' Here is why we needed to scale the data first before we fit the SEM model: # using raw data provides unreliable estimates <font color="red"># some observed variances are (at least) a factor 1000 times larger than others;</font> fit3 <- sem(model3, data=<b>myData</b>, estimator="MLM") summary(fit3) # using scaled data provides stable estimates fit3 <- sem(model3, data=myData2, estimator="MLM") summary(fit3) # report the standardized coefficients of the model standardizedSolution(fit3) # variation explained by model components (The R-square value for all endogenous variables) inspect(fit3, "r2") Note that all variances are supposed to be positive, however, occasionally, model estimates may generate a residual variance that is negative. This may happen due to random sampling error where a very small true value may sometimes produce a negative estimate, or it can occur when the model is unstable. # Inspect the fitted values variance-covariance matrix: fitted(fit3)$\$$cov
# Model fitting Name Command fit CFA to data cfa(model, data=Data) fit SEM to data sem(model, data=Data) standardized solution sem(model, data=Data, std.ov=TRUE) orthogonal factors cfa(model, data=Data, orthogonal=TRUE) Matrices Name Command Factor covariance matrix inspect(fit, "coefficients")$\$$psi Fitted covariance matrix fitted(fit)$\$$cov Observed covariance matrix inspect(fit, 'sampstat')$\$$cov Residual covariance matrix resid(fit)$\$$cov Factor correlation matrix cov2cor(inspect(fit, "coefficients")$\$$psi) or use covariance command with standardised solution e.g., cfa(..., std.ov=TRUE) Fit Measures <b>Name Command</b> Fit measures: fitMeasures(fit) Specific fit measures e.g.: fitMeasures(fit)[c('chisq', 'df', 'pvalue', 'cfi', 'rmsea', 'srmr')] Parameters <b>Name Command</b> Parameter information parTable(fit) Standardised estimates standardizedSolution(fit) or summary(fit, standardized=TRUE) R-squared | inspect(fit, 'r2') Compare models <b>Name Command</b> Compare fit measures cbind(m1=inspect(m1_fit, 'fit.measures'), m2=inspect(m2_fit, 'fit.measures')) Chi-square difference test anova(m1_fit, m2_fit) Model improvement <b>Name Command</b> Modification indices mod_ind <- modificationindices(fit) 10 greatest head(mod_ind[order(mod_ind$\$$mi, decreasing=TRUE), ], 10) mi > 5 subset(mod_ind[order(mod_ind$\$$mi, decreasing=TRUE), ], mi > 5)
# to account for groupings (gender) # fit3 <- sem(model3, data=myData, group="Sex", estimator="MLM")
# install.packages("semPlot") library(semPlot)
# Plot standardized model (numerical): # semPaths(fit3, what = "est", layout = "tree", title = TRUE, style = "LISREL") semPaths(fit3)
See also
- SOCR Home page: http://www.socr.umich.edu
Translate this page: