SMHS TimeSeriesAnalysis LOS

From SOCR
Revision as of 14:31, 19 May 2016 by Pineaumi (talk | contribs) (Plot Response Residuals Over Time From Model 3)
Jump to: navigation, search

SMHS: Time-series Analysis - Applications

Time series regression studies in environmental epidemiology (London Ozone Study 2002-2006)

A time series regression analysis of a London ozone dataset including daily observations from 1 January 2002 to 31 December 2006. Each day has records of (mean) ozone levels that day, and the total number of deaths that occurred in the city.

Questions

  • Is there an association between day-to-day variation in ozone levels and daily risk of death?
  • Is ozone exposure associated with the outcome is death or other confounders - temperature and relative humidity?
  • Reference: Bhaskaran K, Gasparrini A, Hajat S, Smeeth L, Armstrong B. Time series regression studies in environmental epidemiology. International Journal of Epidemiology. 2013;42(4):1187-1195. doi:10.1093/ije/dyt092. http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3780998/

    Load the Data

    library(foreign)
    #07_LondonOzonPolutionData_2006_TS.csv
    #data <- read.csv("https://umich.instructure.com/files/720873/download?download_frd=1")
    data <- read.dta("https://umich.instructure.com/files/721042/download?download_frd=1")
    
    #Set the Default Action for Missing Data to na.exclude
    options(na.action="na.exclude")
    

    Exploratory Analyses

    #set the plotting parameters for the plot

    oldpar <- par(no.readonly=TRUE)
    par(mex=0.8,mfrow=c(2,1))
    

    #sub-plot for daily deaths, with vertical lines defining years

    plot(data$\$$date,data$\$$numdeaths,pch=".",main="Daily deaths over time",  
       ylab="Daily number of deaths",xlab="Date")
    abline(v=data$\$$date[grep("-01-01",data$\$$date)],col=grey(0.6),lty=2)
    

    #plot for ozone levels

    plot(data$\$$date,data$\$$ozone,pch=".",main="Ozone levels over time",
        ylab="Daily mean ozone level(ug/m3)",xlab="Date")
    abline(v=data$\$$date[grep("-01-01",data$\$$date)],col=grey(0.6),lty=2)
    par(oldpar)
    layout(1)
    

    #descriptive statistics

    summary(data)
    

    #correlations

    cor(data[,2:4])
    #scale exposure
    data$\$$ozone10 <- data$\$$ozone/10
    

    Modelling Seasonality and Long-Term Trend

    #option 1: time-stratified model
    #generate month and year

    data$\$$month <- as.factor(months(data$\$$date,abbr=TRUE))
    data$\$$year <- as.factor(substr(data$\$$date,1,4))
    

    #fit a Poisson model with a stratum for each month nested in year
    #(use of quasi-Poisson family for scaling the standard errors)

    model1 <- glm(numdeaths ~ month/year,data,family=quasipoisson) 
    summary(model1)
    

    #compute predicted number of deaths from this model

    pred1 <- predict(model1,type="response")
    

    #Figure 2a: Three alternative ways of modelling long-term patterns in the data (seasonality and trends)

    plot(data$\$$date,data$\$$numdeaths,ylim=c(100,300),pch=19,cex=0.2,col=grey(0.6),
        main="Time-stratified model (month strata)",ylab="Daily number of deaths", xlab="Date")
    lines(data$\$$date, pred1,lwd=2)
    
    #Option 2: periodic functions model (fourier terms)<br>
    #use function harmonic, in package '''tsModel''' 
    
     install.packages("tsModel"); library(tsModel)
    
    #4 sine-cosine pairs representing different harmonics with period 1 year
    
     data$\$$time <- seq(nrow(data))
    fourier <- harmonic(data$\$$time,nfreq=4,period=365.25)
    
    #fit a Poisson model Fourier terms + linear term for trend <br>
    #(use of quasi-Poisson family for scaling the standard errors)
    
     model2 <- glm(numdeaths ~ fourier +time,data,family=quasipoisson) 
     summary(model2)
    
    #compute predicted number of deaths from this model
    
     pred2 <- predict(model2,type="response")
    
    #Figure 2b
    
     plot(data$\$$date, data$\$$numdeaths,ylim=c(100,300),pch=19,cex=0.2,col=grey(0.6),
         main="Sine-cosine functions (Fourier terms)",ylab="Daily number of deaths", xlab="Date")
     lines(data$\$$date, pred2,lwd=2)
    


    #Option 3: Spline Model: Flexible Spline Functions
    #generate spline terms, use function bs in package splines

    library(splines)

    #A CUBIC B-SPLINE WITH 32 EQUALLY-SPACED KNOTS + 2 BOUNDARY KNOTS
    #Note: the 35 basis variables are set as df, with default knots placement. see ?bs
    #other types of splines can be produced with the function ns. see ?ns

    spl <- bs(data$\$$time,degree=3,df=35)<br>
    #Fit a Poisson Model Fourier Terms + Linear Term for Trend
    
     model3 <- glm(numdeaths ~ spl,data,family=quasipoisson)
     summary(model3)
    
    #compute predicted number of deaths from this model
    
     pred3 <- predict(model3,type="response")
    
    #FIGURE 2C
    
     plot(data$\$$date,data$\$$numdeaths,ylim=c(100,300),pch=19,cex=0.2,col=grey(0.6), 
         main="Flexible cubic spline model",ylab="Daily number of deaths", xlab="Date")
     lines(data$\$$date,pred3,lwd=2)
    

    Plot Response Residuals Over Time From Model 3

    #GENERATE RESIDUALS

    res3 <- residuals(model3,type="response")
    

    #Figure 3: Residual variation in daily deaths after ‘removing’ (i.e. modelling) season and long-term trend.

    plot(data$\$$date,res3,ylim=c(-50,150),pch=19,cex=0.4,col=grey(0.6),
         main="Residuals over time",ylab="Residuals (observed-fitted)",xlab="Date")
     abline(h=1,lty=2,lwd=2)
    
    ==='"`UNIQ--h-3--QINU`"'Estimate ozone-mortality association - controlling for confounders===
    
    #compare the RR (and CI using '''ci.lin''' in package '''Epi''')
    
     install.packages("Epi"); library(Epi)
    
    
    #unadjusted model
    
     model4 <- glm(numdeaths ~ ozone10,data,family=quasipoisson)
     summary(model4)
     (eff4 <- ci.lin(model4,subset="ozone10",Exp=T))
    
    
    #control for seasonality (with spline as in model 3)
    
     model5 <- update(model4, .~. + spl)
     summary(model5)
     (eff5 <- ci.lin(model5,subset="ozone10",Exp=T))
    
    
    #control for temperature - temperature modelled with categorical variables for deciles
     
     cutoffs <- quantile(data$\$$temperature,probs=0:10/10)
    tempdecile <- cut(data$\$$temperature,breaks=cutoffs,include.lowest=TRUE)
     model6 <- update(model5,.~.+tempdecile)
     summary(model6)
     (eff6 <- ci.lin(model6,subset="ozone10",Exp=T))
    
    ==='"`UNIQ--h-4--QINU`"'Build a summary table with effect as percent increase===
    
     tabeff <- rbind(eff4,eff5,eff6)[,5:7]
     tabeff <- (tabeff-1)*100
     dimnames(tabeff) <- list(c("Unadjusted","Plus season/trend","Plus temperature"), c("RR","ci.low","ci.hi"))
     round(tabeff,2)
    
    
    #explore the lagged (delayed) effects
    
    #SINGLE-LAG MODELS
    
    
    #prepare the table with estimates
    
     tablag <- matrix(NA,7+1,3,dimnames=list(paste("Lag",0:7),  c("RR","ci.low","ci.hi")))
    
    
    #iterate
    
     for(i in 0:7) {
         #lag ozone and temperature variables
         ozone10lag <- Lag(data$\$$ozone10,i)
        tempdecilelag <- cut(Lag(data$\$$temperature,i),breaks=cutoffs,   include.lowest=TRUE)
    
    
    #define the transformation for temperature
    
    #lag same as above, but with strata terms instead than linear
     
         mod <- glm(numdeaths ~ ozone10lag + tempdecilelag + spl,data,   family=quasipoisson)
         tablag[i+1,] <- ci.lin(mod,subset="ozone10lag",Exp=T)[5:7]</blockquote>
     }
     tablag
    
    
    #Figure 4A: Modelling lagged (delayed) associations between ozone exposure and survival/death outcome.
    
     plot(0:7,0:7,type="n",ylim=c(0.99,1.03),main="Lag terms modelled one at a time", xlab="Lag (days)",
         ylab="RR and 95%CI per 10ug/m3 ozone increase")</blockquote>
     abline(h=1)
     arrows(0:7,tablag[,2],0:7,tablag[,3],length=0.05,angle=90,code=3)
     points(0:7,tablag[,1],pch=19)
    
    ==='"`UNIQ--h-5--QINU`"'Model Checking===
    
    #generate deviance residuals from unconstrained distributed lag model
    
     res6 <- residuals(model6,type="deviance")
    
    
    #Figure A1: Plot of deviance residuals over time (London data)
    
     plot(data$\$$date,res6,ylim=c(-5,10),pch=19,cex=0.7,col=grey(0.6),
         main="Residuals over time",ylab="Deviance residuals",xlab="Date")
    abline(h=0,lty=2,lwd=2)
    


    #Figure A2a: Residual plot for Model6: the residuals relate to the unconstrained distributed lag model with ozone

    #(lag days 0 to 7 inclusive), adjusted for temperature at the same lags. The spike in the plot of residuals relate to

    #the 2003 European heat wave, and indicate that the current model does not explain the data over this period well.

    pacf(res6,na.action=na.omit,main="From original model")
    


    #Include the 1-Day Lagged Residual in the Model

    model9 <- update(model6,.~.+Lag(res6,1))
    


    #Figure A2b: residuals related to the unconstrained distributed lag model with ozone (lag days 0 to 7 inclusive),

    #adjusted for temperature at the same lags

    pacf(residuals(model9,type="deviance"),na.action=na.omit,   
        main="From model adjusted for residual autocorrelation")
    

    Irish Longitudinal Study on Ageing Example

    The Irish Longitudinal Study on Ageing (TILDA), 2009-2011

    http://www.icpsr.umich.edu/icpsrweb/ICPSR/studies/34315

    Kenny, Rose Anne. The Irish Longitudinal Study on Ageing (TILDA),

    2009-2011. ICPSR34315-v1. Ann Arbor, MI: Inter-university Consortium

    Bibliographic Citation: for Political and Social Research [distributor], 2014-07-16.

    http://doi.org/10.3886/ICPSR34315.v1


    The Irish Longitudinal Study on Ageing (TILDA) is a major inter-institutional initiative led by Trinity College, Dublin, to improve in the quantity and quality of data, research and information related to aging in Ireland. Eligible respondents for this study include individuals aged ≥ 50 and their spouses or partners of any age. Annual interviews on a two yearly basis (N=8,504 people) in Ireland, collecting detailed information on all aspects of their lives, including the economic (pensions, employment, living standards), health (physical, mental, service needs and usage) and social aspects (contact with friends and kin, formal and informal care, social participation). Survey interviews, physical, and biological data are collected along with demographic variables (e.g., age, sex, marital status, household composition, education, and employment), and activities of daily living (ADL), aging, childhood, depression (psychology), education, employment, exercise, eyesight, families, family life, etc.

    # download the RDA data object (ICPSR_34315.zip)
    # load in the data into RStudio
    dataURL <- "https://umich.instructure.com/files/703606/download?download_frd=1"
    load(url(dataURL))
    head(da34315.0001); data_colnames <- colnames(da34315.0001)
    vars <- da34315.0001
    
    vars; head(vars); summary(vars); data_colnames
    
    [1] ”ID" ”HOUSEHOLD”
    [3] "CLUSTER" "STRATUM"
    [5] ”REGION” "CAPIWEIGHT"
    [7] "IN_SCQ" "SCQ_WEIGHT"
    [9] "AGE" "SEX"
    [11] "NML" "CM003"
    ...
    [1673] "HA_WEIGHT" "IN_HA"
    [1675] "SR_HEIGHT_CENTIMETRES" "HEIGHT"
    [1677] "SR_WEIGHT_KILOGRAMMES" "WEIGHT"
    [1679] "COGMMSE" "FRGRIPSTRENGTHD"
    [1681] "FRGRIPSTRENGTHND" "VISUALACUITYLEFT"
    [1683] "VISUALACUITYRIGHT" "BPSEATEDSYSTOLIC1"
    [1685] "BPSEATEDSYSTOLIC2" "BPSEATEDDIASTOLIC1"
    [1687] "BPSEATEDDIASTOLIC2" "BPSEATEDSYSTOLICMEAN"
    [1689] "BPSEATEDDIASTOLICMEAN" "BPHYPERTENSION"
    [1691] "FRBMI" "FRWAIST"
    [1693] "FRHIP" "FRWHR"
    [1695] "WEARGLASSES" "WOREGLASSESDURINGTEST"
    [1697] "BLOODS_CHOL" "BLOODS_HDL"
    [1699] "BLOODS_LDL" "BLOODS_TRIG"
    [1701] "BLOODS_TIMEBETWEENLASTMEALANDASS" "DELAY_HA"
    [1703] "PICMEMSCORE" "PICRECALLSCORE"
    [1705] "PICRECOGSCORE" "VISREASONING"
    [1707] "GRIPTEST1D" "GRIPTEST2D"
    [1709] "GRIPTEST1ND" "GRIPTEST2ND"
    [1711] "GRIPTESTDOMINANT" "GRIPTESTSITTING"
    [1713] "TEMPERATURE" "SCQSOCACT1"
    ...
    [1981] "SOCPROXCHLD4" "SCRFLU"
    [1983] "SCRCHOL" "SCRPROSTATE"
    [1985] "SCRBREASTLUMPS" "SCRMAMMOGRAM"
    [1987] "BEHALC_FREQ_WEEK" "BEHALC_DRINKSPERDAY"
    [1989] "BEHALC_DRINKSPERWEEK" "BEHALC_DOH_LIMIT"
    [1991] "BEHSMOKER" "BEHCAGE"
    # extract some data elements
    df1 <- data.frame(vars)
    
    df_Irish_small <- df1[, c("ID",  "HOUSEHOLD",  "AGE", "SEX"  , "HA_WEIGHT", "HEIGHT" ,                         
       "WEIGHT", "COGMMSE", "FRGRIPSTRENGTHD", "VISUALACUITYLEFT",       
       "VISUALACUITYRIGHT",    "BPSEATEDSYSTOLIC1",      
       "BPSEATEDSYSTOLIC2",       "BPSEATEDDIASTOLIC1",     
       "BPSEATEDDIASTOLIC2",      "BPSEATEDSYSTOLICMEAN",   
       "BPSEATEDDIASTOLICMEAN",   "BPHYPERTENSION",
       "WEARGLASSES",   "WOREGLASSESDURINGTEST",  
       "BLOODS_CHOL",   "BLOODS_HDL",    
       "BLOODS_LDL",  "BLOODS_TRIG",   
       "PICMEMSCORE",   "PICRECALLSCORE",
       "PICRECOGSCORE", "VISREASONING",  
       "TEMPERATURE",  "SOCPROXCHLD4",   "SCRFLU", "SCRCHOL",        "SCRPROSTATE",   
       "SCRBREASTLUMPS", "SCRMAMMOGRAM",  
       "BEHALC_FREQ_WEEK",        "BEHALC_DRINKSPERDAY",    
       "BEHALC_DRINKSPERWEEK",    "BEHALC_DOH_LIMIT",       
       "BEHSMOKER",      "BEHCAGE" )
       ]
    
    summary(df_Irish_small); head(df_Irish_small)
    write.table(df_Irish_small , "data.csv", sep=",")
    

    Applications

    Frailty associations with sustained attention measures5

    Multinomial logistic regression analyses were used to examine frailty as the outcome variable were performed to determine associations between the sustained attention measures and prefrailty or frailty. Binary logistic regression analyses determined significant associations between the sustained attention measures and the individual frailty components. The regression models included age and gender and were also extended to include additional measures of cognitive processing speed (cognitive RT from CRT), executive function (Delta CTT), number of chronic conditions, and number of medications. We also included the quadratic term age2 to allow for any potential nonlinear effects of age on frailty in each regression model. For the independent variables in the multinomial logistic regression models, relative risk (RR) ratios with 95% confidence intervals (CIs) were provided. For the independent variables in the binary logistic regression models, OR with 95% CI were provided.

    Multivariable logistic regression examining the association between social relationships and depression, anxiety, and suicidal ideation6

    5http://psychsocgerontology.oxfordjournals.org/content/early/2013/03/13/geronb.gbt009.full

    6http://www.jad-journal.com/article/S0165-0327%2815%2900145-7/fulltext

    Appendix

    See also




    Translate this page:

    (default)
    Uk flag.gif

    Deutsch
    De flag.gif

    Español
    Es flag.gif

    Français
    Fr flag.gif

    Italiano
    It flag.gif

    Português
    Pt flag.gif

    日本語
    Jp flag.gif

    България
    Bg flag.gif

    الامارات العربية المتحدة
    Ae flag.gif

    Suomi
    Fi flag.gif

    इस भाषा में
    In flag.gif

    Norge
    No flag.png

    한국어
    Kr flag.gif

    中文
    Cn flag.gif

    繁体中文
    Cn flag.gif

    Русский
    Ru flag.gif

    Nederlands
    Nl flag.gif

    Ελληνικά
    Gr flag.gif

    Hrvatska
    Hr flag.gif

    Česká republika
    Cz flag.gif

    Danmark
    Dk flag.gif

    Polska
    Pl flag.png

    România
    Ro flag.png

    Sverige
    Se flag.gif