Difference between revisions of "SMHS LinearModeling MachineLearning"
| Line 84: | Line 84: | ||
kernel ="radial") | kernel ="radial") | ||
summary(svm_model) | summary(svm_model) | ||
| + | |||
| + | Appendix | ||
| + | |||
| + | Example 1: Simulation (subject, day, treatment, observation) | ||
| + | |||
| + | <blockquote><i>Obs ~ Treatment + Day + Subject(Treatment)+ Day*Subject(Treatment)+ ε.</i></blockquote> | ||
| + | |||
| + | This model is accounts for: | ||
| + | |||
| + | Response = Obs | ||
| + | |||
| + | Fixed effects: | ||
| + | |||
| + | Treatment (fixed) | ||
| + | |||
| + | Day (fixed) | ||
| + | |||
| + | Treatment*Day interaction | ||
| + | |||
| + | Random Effects: | ||
| + | |||
| + | Subject nested within Treatment (random) | ||
| + | |||
| + | Day crossed with "Subject within Treatment" (random) | ||
| + | |||
| + | mydata <- data.frame( | ||
| + | Subject = c(13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 29, 30, 31, 32, 33, | ||
| + | 34, 35, 36, 37, 38, 39, 40, 62, 63, 64, 65, 13, 14, 15, 16, 17, 18, | ||
| + | 19, 20, 21, 22, 23, 24, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, | ||
| + | 40, 62, 63, 64, 65, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, | ||
| + | 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 62, 63, 64, 65), | ||
| + | Day = c(rep(c("Day1", "Day3", "Day6"), each=28)), | ||
| + | Treatment = c(rep(c("B", "A", "C", "B", "C", "A", "A", "B", "A", "C", "B", "C", | ||
| + | "A", "A", "B", "A", "C", "B", "C", "A", "A"), each = 4)), | ||
| + | Obs = c(6.472687, 7.017110, 6.200715, 6.613928, 6.829968, 7.387583, 7.367293, | ||
| + | 8.018853, 7.527408, 6.746739, 7.296910, 6.983360, 6.816621, 6.571689, | ||
| + | 5.911261, 6.954988, 7.624122, 7.669865, 7.676225, 7.263593, 7.704737, | ||
| + | 7.328716, 7.295610, 5.964180, 6.880814, 6.926342, 6.926342, 7.562293, | ||
| + | 6.677607, 7.023526, 6.441864, 7.020875, 7.478931, 7.495336, 7.427709, | ||
| + | 7.633020, 7.382091, 7.359731, 7.285889, 7.496863, 6.632403, 6.171196, | ||
| + | 6.306012, 7.253833, 7.594852, 6.915225, 7.220147, 7.298227, 7.573612, | ||
| + | 7.366550, 7.560513, 7.289078, 7.287802, 7.155336, 7.394452, 7.465383, | ||
| + | 6.976048, 7.222966, 6.584153, 7.013223, 7.569905, 7.459185, 7.504068, | ||
| + | 7.801867, 7.598728, 7.475841, 7.511873, 7.518384, 6.618589, 5.854754, | ||
| + | 6.125749, 6.962720, 7.540600, 7.379861, 7.344189, 7.362815, 7.805802, | ||
| + | 7.764172, 7.789844, 7.616437, NA, NA, NA, NA)) | ||
| + | |||
| + | install.packages("lme4") | ||
| + | library("lme4", lib.loc="~/R/win-library/3.1") | ||
| + | m1 <- lmer(Obs ~ Treatment * Day + (1 | Subject), mydata) | ||
| + | m1 | ||
| + | |||
| + | |||
| + | |||
| + | |||
| + | |||
| + | |||
| + | |||
| + | |||
| + | |||
| + | |||
| + | |||
| + | |||
| + | |||
| + | |||
| + | |||
| + | |||
| + | |||
| + | |||
| + | |||
| + | |||
Revision as of 13:44, 3 March 2016
SMHS Linear Modeling - Machine Learning Algorithms
Scientific inference based on fixed and random effect models, assumptions, and mixed effects logistic regression.
Questions:
- How can we tie human intuition and computer-generated results to obtain reliable, effective, and efficient decision-support system (that facilitates, forecasting)?
- Niels Born – “It is difficult to make predictions, especially about the future” …
- Can we unsupervisely classify the data?
Prediction
For most of the machine learning algorithms (including first-order linear regression), we:
- first generate the model using training data, and then
- predict values for test/new data.
Predictions are made using the R predict function. (type ?predict.name), where name is the function-name corresponding to the algorithm. The first argument of predict often represents the variable storing the model and the second argument is a matrix or data frame of test data that the model needs to be applied to. Calling predict can be done in 2 ways: type predict or type of predict.name.
Example:
#mydata <- read.table('https://umich.instructure.com/files/330381/download?download_frd=1&verifier=HpfmjfMFaMsk7rIpfPx0tmz960oTW7JA8ZonGvVC',as.is=T, header=T) # 01a_data.txt
# mydata <- read.table('data.txt',as.is=T, header=T)
# (1) First, there are different approaches to split the data (partition the data) into # training and testing sets. ## TRAINING: 75% of the sample size sample_size <- floor(0.75 * nrow(mydata)) ## set the seed to make your partition reproductible set.seed(1234) train_ind <- sample(seq_len(nrow(mydata)), size = sample_size) train <- mydata[train_ind, ]
# TESTING DATA test <- mydata[-train_ind, ]
lin.mod <- lm(Weight ~ Height*Team, data=train) predicted.values <- predict(lin.mod, newdata=test
Data Modeling/Training
Logistic Regression:
glm_model <-glm(ifelse(Weight > 200,1,0) ~ Height*Team, family=binomial(link="logit"), data=train)
K-Means Clustering
train.1 <- cbind(train$\$$Height, train$\$$Weight, train$\$$Age)
test.1 <- cbind(test$\$$Height, test$\$$Weight, test$\$$Age)
Weight.1 <- ifelse(train$\$$Weight > 200,1,0)
head(train.1)
kmeans_model <- kmeans(<u><b>train.1</b></u>, 3)
plot(train.1, col = kmeans_model$\$$cluster)
points(kmeans_model$\$$centers, col = 1:2, pch = 8, cex = 2)
<b>K-Nearest Neighbor Classification</b>
# install.packages("class")
library("class")
knn_model <- knn(train=train.1, test=test.1, cl=as.factor(Weight.1), k=5)
plot(knn_model)
summary(knn_model)
<b>Naïve Bayes Classifier</b>
install.packages("e1071")
library("e1071")
nbc_model <- naiveBayes(Weight ~ Height*Age, data=train.1)
<b>Decision Trees (CART)</b>
#install.packages("e1071")
library("rpart")
cart_model <- rpart(Weight ~ Height+Age, data= as.data.frame(train.1), method="class")
plot(cart_model)
text(cart_model)
<b>AdaBoost</b>
install.packages("ada")
# X be the matrix of features, and labels be a vector of 0-1 class labels.
library("ada")
boost_model <- ada(x= cbind(train$\$$Height, train$\$$Weight, train$\$$Age), y= Weight.1)
plot(boost_model)
boost_model
Support Vector Machines (SVM)
#install.packages("e1071")
library("rpart")
svm_model <- svm(x= cbind(train$\$$Height, train$\$$Weight, train$\$$Age), y=as.factor(Weight.1),
kernel ="radial")
summary(svm_model)
Appendix
Example 1: Simulation (subject, day, treatment, observation)
Obs ~ Treatment + Day + Subject(Treatment)+ Day*Subject(Treatment)+ ε.
This model is accounts for:
Response = Obs
Fixed effects:
Treatment (fixed)
Day (fixed)
Treatment*Day interaction
Random Effects:
Subject nested within Treatment (random)
Day crossed with "Subject within Treatment" (random)
mydata <- data.frame(
Subject = c(13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 29, 30, 31, 32, 33,
34, 35, 36, 37, 38, 39, 40, 62, 63, 64, 65, 13, 14, 15, 16, 17, 18,
19, 20, 21, 22, 23, 24, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
40, 62, 63, 64, 65, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 62, 63, 64, 65),
Day = c(rep(c("Day1", "Day3", "Day6"), each=28)),
Treatment = c(rep(c("B", "A", "C", "B", "C", "A", "A", "B", "A", "C", "B", "C",
"A", "A", "B", "A", "C", "B", "C", "A", "A"), each = 4)),
Obs = c(6.472687, 7.017110, 6.200715, 6.613928, 6.829968, 7.387583, 7.367293,
8.018853, 7.527408, 6.746739, 7.296910, 6.983360, 6.816621, 6.571689,
5.911261, 6.954988, 7.624122, 7.669865, 7.676225, 7.263593, 7.704737,
7.328716, 7.295610, 5.964180, 6.880814, 6.926342, 6.926342, 7.562293,
6.677607, 7.023526, 6.441864, 7.020875, 7.478931, 7.495336, 7.427709,
7.633020, 7.382091, 7.359731, 7.285889, 7.496863, 6.632403, 6.171196,
6.306012, 7.253833, 7.594852, 6.915225, 7.220147, 7.298227, 7.573612,
7.366550, 7.560513, 7.289078, 7.287802, 7.155336, 7.394452, 7.465383,
6.976048, 7.222966, 6.584153, 7.013223, 7.569905, 7.459185, 7.504068,
7.801867, 7.598728, 7.475841, 7.511873, 7.518384, 6.618589, 5.854754,
6.125749, 6.962720, 7.540600, 7.379861, 7.344189, 7.362815, 7.805802,
7.764172, 7.789844, 7.616437, NA, NA, NA, NA))
install.packages("lme4")
library("lme4", lib.loc="~/R/win-library/3.1")
m1 <- lmer(Obs ~ Treatment * Day + (1 | Subject), mydata)
m1
....
- SOCR Home page: http://www.socr.umich.edu
Translate this page: