SOCR Simulated HELP Data Activity

From SOCR
Revision as of 18:46, 12 September 2014 by Dinov (talk | contribs) (R examples)
Jump to: navigation, search

SOCR Simulated HELP Data - SOCR Activity: Simulated Health Evaluation and Linkage to Primary (HELP) Care Dataset

SOCR Simulated HELP Data

See the SOCR Simulated HELP Data first. These data can be copy-pasted using the mouse from the HTML table into a plain text file "help_data.csv".

R examples

These simulated HELP data can be used to demonstrate (using SOCR and R)a number of different statistical, modeling, inferential and data analytic techniques.

R examples

These simulated HELP data can be used to demonstrate (using SOCR and R)a number of different statistical, modeling, inferential and data analytic techniques.

Data I/O, summaries, visualization

options(digits=2)  # decimal precision
options(width=80)  # narrows output to stay in the grey box

hemp_sim_data <- read.csv("http://socr.umich.edu/data/SOCR_HELP_SIm_Data_2014.csv")
attach(hemp_sim_data)
summary(hemp_sim_data)
fivenum(hemp_sim_data$\$ $mcs)

mean(hemp_sim_data$\$ $mcs, na.rm=TRUE); median(hemp_sim_data$\$ $mcs, na.rm=TRUE); range(hemp_sim_data$\$ $mcs, na.rm=TRUE); sd(hemp_sim_data$\$ $mcs, na.rm=TRUE); var(hemp_sim_data$\$ $mcs, na.rm=TRUE)

quantile(hemp_sim_data$\$ $mcs, seq(from=0, to=1, length=11), na.rm=TRUE)


no_mis_hemp_sim_data_mcs <- na.omit(hemp_sim_data$\$ $mcs)

hist(no_mis_hemp_sim_data_mcs, main="", freq=FALSE)
lines(density(no_mis_hemp_sim_data_mcs), main="MCS", lty=2, lwd=2)
xvals <- seq(from=min(no_mis_hemp_sim_data_mcs), to=max(no_mis_hemp_sim_data_mcs), length=100)
lines(xvals, dnorm(xvals, mean(no_mis_hemp_sim_data_mcs), sd(no_mis_hemp_sim_data_mcs)), lwd=2)


cor_mat <- cor(cbind(hemp_sim_data$\$ $mcs, hemp_sim_data$\$ $i11, hemp_sim_data$\$ $pcs1))
cor_mat
cor_mat[c(2, 3), 2]


plot(hemp_sim_data$\$ $mcs[hemp_sim_data$\$ $female==0], hemp_sim_data$\$ $cesd[hemp_sim_data$\$ $female==0], xlab="MCS", ylab="cesd", type="n", bty="n")

text(hemp_sim_data$\$ $mcs[hemp_sim_data$\$ $female==0& hemp_sim_data$\$ $substance=="alcohol"],
   hemp_sim_data$\$ $cesd[hemp_sim_data$\$ $female==1& hemp_sim_data$\$ $substance=="alcohol"],"A")

text(hemp_sim_data$\$ $mcs[hemp_sim_data$\$ $female==0& hemp_sim_data$\$ $substance=="cocaine"],
   hemp_sim_data$\$ $cesd[hemp_sim_data$\$ $female==0& hemp_sim_data$\$ $substance=="cocaine"],"C")

text(hemp_sim_data$\$ $mcs[hemp_sim_data$\$ $female==0& hemp_sim_data$\$ $substance=="heroin"],
   hemp_sim_data$\$ $cesd[hemp_sim_data$\$ $female==1& hemp_sim_data$\$ $substance=="heroin"],"H")

rug(jitter(hemp_sim_data$\$ $mcs[hemp_sim_data$\$ $female==0]), side=2)
rug(jitter(hemp_sim_data$\$ $mcs[hemp_sim_data$\$ $female==0]), side=3)


table(hemp_sim_data$\$ $homeless, hemp_sim_data$\$ $female)


or <- (sum(hemp_sim_data$\$ $homeless==0 & hemp_sim_data$\$ $female==0 , na.rm=TRUE)*
       sum(hemp_sim_data$\$ $homeless==1 & hemp_sim_data$\$ $female==1 , na.rm=TRUE))/
      (sum(hemp_sim_data$\$ $homeless==0 & hemp_sim_data$\$ $female==1 , na.rm=TRUE)*
       sum(hemp_sim_data$\$ $homeless==1 & hemp_sim_data$\$ $female==0 , na.rm=TRUE))
or


chisq_val <- chisq.test(hemp_sim_data$\$ $homeless, hemp_sim_data$\$ $female, correct=FALSE)
chisq_val


fisher.test(hemp_sim_data$\$ $homeless, hemp_sim_data$\$ $female)


ttres <- t.test(hemp_sim_data$\$ $age ~ hemp_sim_data$\$ $female, data=hemp_sim_data)
print(ttres)


wilcox.test(hemp_sim_data$\$ $age ~ as.factor(hemp_sim_data$\$ $female), correct=FALSE)

ksres <- ks.test(hemp_sim_data$\$ $age[hemp_sim_data$\$ $female==0], hemp_sim_data$\$ $age[hemp_sim_data$\$ $female==1], data=hemp_sim_data)
print(ksres)

Sorting and subsetting

new_cesd = sum(hemp_sim_data$\$ $f1a-hemp_sim_data$\$ $f1t, na.rm=TRUE);
new_cesd
impute_mean_cesd = mean(hemp_sim_data$\$ $f1a - hemp_sim_data$\$ $f1t, na.rm=TRUE) * 20;
sort(hemp_sim_data$\$ $cesd)[1:4]
sum(is.na(hemp_sim_data$\$ $drinkstat))

table(hemp_sim_data$\$ $drinkstat, exclude="NULL")

gender <- factor(hemp_sim_data$\$ $female, c(0,1), c("male","Female")) table(hemp_sim_data$\$ $female)

Exploratory data analysis

Graphing and plotting of data (scatterplot, bubble chart, multiple plots, dotplot, etc.)

Bivariate relationship

Contingency tables

Two-sample tests

Survival analysis (Kaplan–Meier plot)

Scatterplot with smooth fit

Regression with prediction intervals

Linear regression with interaction

Regression diagnostics

Fitting stratified regression models

Two-way analysis of variance (ANOVA)

Multiple comparisons

Contrasts

Logistic regression

Poisson regression

Zero-inflated Poisson regression

Negative binomial regression

Lasso model selection

Quantile regression

Ordinal logit regression

Multinomial logit regression

Generalized additive model

Data transformations

General linear model for correlated data

Random effects model

Generalized estimating equations (GEE) model

Generalized linear mixed model

Proportional hazards regression model

Bayesian Poisson regression

Cronbach’s $\alpha$

Factor analysis

Recursive partitioning

Linear discriminant analysis

Hierarchical clustering

ROC curve

Multiple imputation

Propensity score modeling

References




Translate this page:

(default)
Uk flag.gif

Deutsch
De flag.gif

Español
Es flag.gif

Français
Fr flag.gif

Italiano
It flag.gif

Português
Pt flag.gif

日本語
Jp flag.gif

България
Bg flag.gif

الامارات العربية المتحدة
Ae flag.gif

Suomi
Fi flag.gif

इस भाषा में
In flag.gif

Norge
No flag.png

한국어
Kr flag.gif

中文
Cn flag.gif

繁体中文
Cn flag.gif

Русский
Ru flag.gif

Nederlands
Nl flag.gif

Ελληνικά
Gr flag.gif

Hrvatska
Hr flag.gif

Česká republika
Cz flag.gif

Danmark
Dk flag.gif

Polska
Pl flag.png

România
Ro flag.png

Sverige
Se flag.gif