Difference between revisions of "SMHS DataSimulation"

From SOCR
Jump to: navigation, search
(Replace all references to dollar sign ($) by ($\$$))
(Testing section)
Line 51: Line 51:
 
Simulate new data to match the properties/characteristics of observed data
 
Simulate new data to match the properties/characteristics of observed data
  
i2 [0: 184]
+
* i2 [0: 184]
age m=34,sd=12
+
* age m=34,sd=12
treat {0,1}
+
* treat {0,1}
homeless {0,1}
+
* homeless {0,1}
pcs 14-75
+
* pcs 14-75
mcs 7-62
+
* mcs 7-62
cesd 0–60
+
* cesd 0–60
indtot 4-45
+
* indtot 4-45
pss_fr 0-14
+
* pss_fr 0-14
drugrisk 0-21
+
* drugrisk 0-21
sexrisk
+
* sexrisk
satreat (0=no,1=yes)
+
* satreat (0=no,1=yes)
female (0=no,1=yes)
+
* female (0=no,1=yes)
racegrp (black, white, other)
+
* racegrp (black, white, other)
 
   
 
   
 
  # Demographics variables
 
  # Demographics variables
 
 
  Sex <- ifelse(runif(NumSubj)<.5,0,1)
 
  Sex <- ifelse(runif(NumSubj)<.5,0,1)
 
 
  Weight <- as.integer(rnorm(NumSubj, 80,10))
 
  Weight <- as.integer(rnorm(NumSubj, 80,10))
 
 
  Age <- as.integer(rnorm(NumSubj, 62,10))
 
  Age <- as.integer(rnorm(NumSubj, 62,10))
 
 
 
 
   
 
   
 
  # Diagnosis:
 
  # Diagnosis:
 
 
  Dx <- c(rep("PD", 100), rep("HC", 100), rep("SWEDD", 82))
 
  Dx <- c(rep("PD", 100), rep("HC", 100), rep("SWEDD", 82))
 
 
 
 
   
 
   
 
  # Genetics
 
  # Genetics
 
 
  chr12_rs34637584_GT <- c(ifelse(runif(100)<.3,0,1), ifelse(runif(100)<.6,0,1), ifelse(runif(82)<.4,0,1))                              # NumSubj Bernoulli trials
 
  chr12_rs34637584_GT <- c(ifelse(runif(100)<.3,0,1), ifelse(runif(100)<.6,0,1), ifelse(runif(82)<.4,0,1))                              # NumSubj Bernoulli trials
 
 
  chr17_rs11868035_GT <- c(ifelse(runif(100)<.7,0,1), ifelse(runif(100)<.4,0,1), ifelse(runif(82)<.5,0,1))                              # NumSubj Bernoulli trials
 
  chr17_rs11868035_GT <- c(ifelse(runif(100)<.7,0,1), ifelse(runif(100)<.4,0,1), ifelse(runif(82)<.5,0,1))                              # NumSubj Bernoulli trials
+
 
 
 
 
 
  # Clinical          # rpois(NumSubj, 15) + rpois(NumSubj, 6)
 
  # Clinical          # rpois(NumSubj, 15) + rpois(NumSubj, 6)
 
 
  UPDRS_part_I <- c( ifelse(runif(100)<.7,0,1)+ifelse(runif(100)<.7,0,1),
 
  UPDRS_part_I <- c( ifelse(runif(100)<.7,0,1)+ifelse(runif(100)<.7,0,1),
 
 
  ifelse(runif(100)<.6,0,1)+ ifelse(runif(100)<.6,0,1),
 
  ifelse(runif(100)<.6,0,1)+ ifelse(runif(100)<.6,0,1),
 
 
  ifelse(runif(82)<.4,0,1)+ ifelse(runif(82)<.4,0,1) )
 
  ifelse(runif(82)<.4,0,1)+ ifelse(runif(82)<.4,0,1) )
 
 
  UPDRS_part_II <- c(sample.int(20, 100, replace=T), sample.int(14, 100, replace=T),
 
  UPDRS_part_II <- c(sample.int(20, 100, replace=T), sample.int(14, 100, replace=T),
 
 
  sample.int(18, 82, replace=T) )
 
  sample.int(18, 82, replace=T) )
 
 
  UPDRS_part_III <- c(sample.int(30, 100, replace=T), sample.int(20, 100, replace=T),
 
  UPDRS_part_III <- c(sample.int(30, 100, replace=T), sample.int(20, 100, replace=T),
+
    sample.int(25, 82, replace=T) )
            sample.int(25, 82, replace=T) )
+
 
 
 
 
 
  # Time: VisitTime – done automatically below in aggregator
 
  # Time: VisitTime – done automatically below in aggregator
+
 
 
  # Data (putting all components together)
 
  # Data (putting all components together)
 
 
  sim_PD_Data <- cbind(
 
  sim_PD_Data <- cbind(
+
          rep(Cases, each= NumTime),                         # Cases
          rep(Cases, each= NumTime),                                     # Cases
+
          rep(L_caudate_ComputeArea, each= NumTime), # Imaging
+
          rep(Sex, each= NumTime),                           # Demographics
                                          rep(L_caudate_ComputeArea, each= NumTime), # Imaging
+
          rep(Weight, each= NumTime),
+
          rep(Age, each= NumTime),
                                          rep(Sex, each= NumTime),                                         # Demographics
+
          rep(Dx, each= NumTime),                             # Dx
+
          rep(chr12_rs34637584_GT, each= NumTime),           # Genetics
rep(Weight, each= NumTime),
+
          rep(chr17_rs11868035_GT, each= NumTime),
+
          rep(UPDRS_part_I, each= NumTime),                   # Clinical
rep(Age, each= NumTime),
+
          rep(UPDRS_part_II, each= NumTime),
+
          rep(UPDRS_part_III, each= NumTime),
                                          rep(Dx, each= NumTime),                                                           # Dx
+
          rep(c(0,6,12,18), NumSubj)                         # Time
 
                                          rep(chr12_rs34637584_GT, each= NumTime),                     # Genetics
 
 
                                          rep(chr17_rs11868035_GT, each= NumTime),
 
                                               
 
                                          rep(UPDRS_part_I, each= NumTime),                       # Clinical
 
 
rep(UPDRS_part_II, each= NumTime),
 
 
rep(UPDRS_part_III, each= NumTime),
 
 
  rep(c(0,6,12,18), NumSubj)                                         # Time
 
 
 
  )
 
  )
+
 
 
 
  # Assign the column names
 
  # Assign the column names
 
 
  colnames(sim_PD_Data) <- c(
 
  colnames(sim_PD_Data) <- c(
 
  "Cases",
 
  "Cases",
Line 150: Line 114:
 
  "Time"
 
  "Time"
 
  )
 
  )
+
 
 
 
 
 
  # some QC
 
  # some QC
 
 
  summary(sim_PD_Data)
 
  summary(sim_PD_Data)
 
 
  dim(sim_PD_Data)
 
  dim(sim_PD_Data)
 
 
  head(sim_PD_Data)
 
  head(sim_PD_Data)
  

Revision as of 15:16, 20 January 2016

Scientific Methods for Health Sciences - Data Simulation

Importing observed data for exploratory analytics

Using the SOCR Health Evaluation and Linkage to Primary (HELP) Care Dataset we can extract some sample data (00_Tiny_SOCR_HELP_Data_Simmulation.csv).

# data_1 <- read.csv('00_Tiny_SOCR_HELP_Data_Simmulation.csv',as.is=T, header=T)
# data_1 = read.csv(file.choose( ))
# data_1 <- read.table('00_Tiny_SOCR_HELP_Data_Simmulation.csv', header=TRUE,   sep=",", row.names="ID")
attach(data_1)  
# to ensure all variables are accessible within R, e.g., using age instead of data_1$\$$age
 # i2 maximum number of drinks (standard units) consumed per day (in the past 30 days range 0–184) see also i1
 # treat randomization group (0=usual care, 1=HELP clinic)
 # pcs SF-36 Physical Component Score (range 14-75)
 # mcs SF-36 Mental Component Score(range 7-62)
 # cesd Center for Epidemiologic Studies Depression scale (range 0–60)
 # indtot Inventory of Drug Use Con-sequences (InDUC) total score (range 4–45)
 # pss_fr perceived social supports (friends, range 0–14) see also dayslink
 # drugrisk Risk-Assessment Battery(RAB) drug risk score (range0–21)
 # satreat any BSAS substance abuse treatment at baseline (0=no,1=yes)

==='"`UNIQ--h-2--QINU`"'Fragment of the data===
<center>
{| class="wikitable" style="text-align:center; " border="1"
|-
! ID ||i2 ||age ||treat ||homeless ||pcs ||mcs ||cesd ||indtot ||pss_fr ||drugrisk ||sexrisk ||satreat ||female ||substance ||racegrp
|-
| 1 ||0 ||25 ||0 ||0 ||49 ||7 ||46 ||37 ||0 ||1 ||6 ||0 ||0 ||cocaine ||black
|-
| 2 ||18 ||31 ||0 ||0 ||48 ||34 ||17 ||48 ||0 ||0 ||11 ||0 ||0 ||alcohol ||white
|-
| 3 ||39 ||36 ||0 ||0 ||76 ||9 ||33 ||41 ||12 ||19 ||4 ||0 ||0 ||heroin ||black
|-
| … || || || || || || || || || || || || || || ||
|-
| 100 ||81 ||22 ||0 ||0 ||37 ||17 ||19 ||30 ||3 ||0 ||10 ||0 ||0 ||alcohol ||other
|}
</center>

==='"`UNIQ--h-3--QINU`"'Testing section===

 summary(data_1)
 
 x.norm <- rnorm(n=200, m=10, sd=20)
 hist(x.norm, main="N(10,20) Histogram")
 hist(x.norm, main="N(10,20) Histogram")
 mean(data_1$\$$age)
sd(data_1$\$$age)


Simulate new data to match the properties/characteristics of observed data

  • i2 [0: 184]
  • age m=34,sd=12
  • treat {0,1}
  • homeless {0,1}
  • pcs 14-75
  • mcs 7-62
  • cesd 0–60
  • indtot 4-45
  • pss_fr 0-14
  • drugrisk 0-21
  • sexrisk
  • satreat (0=no,1=yes)
  • female (0=no,1=yes)
  • racegrp (black, white, other)
# Demographics variables
Sex <- ifelse(runif(NumSubj)<.5,0,1)
Weight <- as.integer(rnorm(NumSubj, 80,10))
Age <- as.integer(rnorm(NumSubj, 62,10))

# Diagnosis:
Dx <- c(rep("PD", 100), rep("HC", 100), rep("SWEDD", 82))

# Genetics
chr12_rs34637584_GT <- c(ifelse(runif(100)<.3,0,1), ifelse(runif(100)<.6,0,1), ifelse(runif(82)<.4,0,1))                              # NumSubj Bernoulli trials
chr17_rs11868035_GT <- c(ifelse(runif(100)<.7,0,1), ifelse(runif(100)<.4,0,1), ifelse(runif(82)<.5,0,1))                              # NumSubj Bernoulli trials
# Clinical          # rpois(NumSubj, 15) + rpois(NumSubj, 6)
UPDRS_part_I <- c( ifelse(runif(100)<.7,0,1)+ifelse(runif(100)<.7,0,1),
ifelse(runif(100)<.6,0,1)+ ifelse(runif(100)<.6,0,1),
ifelse(runif(82)<.4,0,1)+ ifelse(runif(82)<.4,0,1) )
UPDRS_part_II <- c(sample.int(20, 100, replace=T), sample.int(14, 100, replace=T),
sample.int(18, 82, replace=T) )
UPDRS_part_III <- c(sample.int(30, 100, replace=T), sample.int(20, 100, replace=T),
   sample.int(25, 82, replace=T) )
# Time: VisitTime – done automatically below in aggregator
# Data (putting all components together)
sim_PD_Data <- cbind(
          rep(Cases, each= NumTime),                          # Cases
          rep(L_caudate_ComputeArea, each= NumTime), # Imaging
          rep(Sex, each= NumTime),                            # Demographics
          rep(Weight, each= NumTime),
          rep(Age, each= NumTime),
          rep(Dx, each= NumTime),                             # Dx
          rep(chr12_rs34637584_GT, each= NumTime),            # Genetics
          rep(chr17_rs11868035_GT, each= NumTime),
          rep(UPDRS_part_I, each= NumTime),                   # Clinical
          rep(UPDRS_part_II, each= NumTime),
          rep(UPDRS_part_III, each= NumTime),
          rep(c(0,6,12,18), NumSubj)                          # Time
)
# Assign the column names
colnames(sim_PD_Data) <- c(
"Cases",
"L_caudate_ComputeArea",
"Sex", "Weight", "Age",
"Dx", "chr12_rs34637584_GT", "chr17_rs11868035_GT",
"UPDRS_part_I", "UPDRS_part_II", "UPDRS_part_III",
"Time"
)
# some QC
summary(sim_PD_Data)
dim(sim_PD_Data)
head(sim_PD_Data)


.....

SMHS DataSimulation Fig1.png


....





Translate this page:

(default)
Uk flag.gif

Deutsch
De flag.gif

Español
Es flag.gif

Français
Fr flag.gif

Italiano
It flag.gif

Português
Pt flag.gif

日本語
Jp flag.gif

България
Bg flag.gif

الامارات العربية المتحدة
Ae flag.gif

Suomi
Fi flag.gif

इस भाषा में
In flag.gif

Norge
No flag.png

한국어
Kr flag.gif

中文
Cn flag.gif

繁体中文
Cn flag.gif

Русский
Ru flag.gif

Nederlands
Nl flag.gif

Ελληνικά
Gr flag.gif

Hrvatska
Hr flag.gif

Česká republika
Cz flag.gif

Danmark
Dk flag.gif

Polska
Pl flag.png

România
Ro flag.png

Sverige
Se flag.gif