阅读 20 SEO

【搬砖】构建预后signature 实践

Cox 回归模型

写在最开头:诚心推荐解螺旋麦子的Cox回归模型R教学视频!!!如果你对cox还稀里糊涂的,下面的链接,点!15分钟就能GET超级舒爽的代码~
【Cox回归的R操作:从单因素到多因素一气呵成https://www.bilibili.com/video/av18918951/

最后的效果图

根据麦子老师的课,我整理的代码如下↓

library(survival)
library(plyr)
library(xlsx)

data(lung)
### 1 COX 风险模型
## 1.1 转换数据类型(基线表要改成数值型)
Baseline = lung
## 1.2 单因素Cox回归,用sex
BaSurv <- Surv(time=Baseline$time,event=Baseline$status)
Baseline$BaSurv <- with(Baseline,BaSurv)  # with函数的用法
SexCox <- coxph(BaSurv~sex,data=Baseline) # ties='breslow' 那么结果与SPSS一样
SexSum <- summary(SexCox)
HR <- round(SexSum$coefficients[,2],2)
PValue <- round(SexSum$coefficients[,5],3)
CI <- paste0(round(SexSum$conf.int[,3:4],2),collapse="-")
Unicox <- data.frame("Characteristics" = "Sex",
                    "Hazard ratio" = HR,
                    "CI95" = CI,
                    "p-Value" = PValue)

# 多个单因素回归,自定义函数,使用lapply(向量化处理)
UniCox <- function(x){
 FML <- as.formula(paste0("BaSurv~",x))
 SexCox <- coxph(FML,data=Baseline)
 SexSum <- summary(SexCox)
 HR <- round(SexSum$coefficients[,2],2)
 PValue <- round(SexSum$coefficients[,5],3)
 CI <- paste0(round(SexSum$conf.int[,3:4],2),collapse="-")
 Unicox <- data.frame("Characteristics" = x,
                      "Hazard ratio" = HR,
                      "CI95" = CI,
                      "p-Value" = PValue)
 return(Unicox)
}
UniCox("ph.ecog")
ValNames <- colnames(Baseline)[4:10]
UniVar <- lapply(ValNames,UniCox) #前面填名字,后面填函数
UniVar <- ldply(UniVar)  #把list转换为data.frame
UniVar$Characteristics[UniVar$p.Value < 0.05]

## 1.3 多因素Cox回归分析
fml=as.formula(paste0("BaSurv~",paste0(UniVar$Characteristics[UniVar$p.Value < 0.05],collapse = "+")))
MultiCox <- coxph(fml,data=Baseline)
MultiSum <- summary(MultiCox)
MultiSum$coefficients
MultiSum$conf.int
MHR <- round(MultiSum$coefficients[,2],2)
MPValue <- round(MultiSum$coefficients[,5],3)
MCIL <- round(MultiSum$conf.int[,3],2)
MCIH <- round(MultiSum$conf.int[,4],2)
MCI <- paste0(MCIL,'-',MCIH)
MultiName=as.character(UniVar$Characteristics[UniVar$p.Value < 0.05])
Multicox <- data.frame("Characteristics" = MultiName,
                    "Hazard ratio" = MHR,
                    "CI95" = MCI,
                    "p-Value" = MPValue)
## 1.4 整合表格
Final <- merge.data.frame(UniVar,Multicox,by="Characteristics",all=T,sort = T)
write.xlsx(Final,'CoxOnline.xlsx',col.names = T,row.names = F,showNA = F)

Final。输出到excel就可以制作paper上那种表格啦

回到正题:如何构建预后signature?

参考文章:doi: 10.3389/fonc.2019.00078


doi: 10.3389/fonc.2019.00078

统计学中,多重检验,两两检验的p值需要进行Bonferroni校正。结合这篇文章(https://www.jianshu.com/p/1aeeac34ce51)理解下容错率FDR。

使用library(My.stepwise)

### 2 MUV Cox,逐步回归 stepwise forward
newBaseline <- Baseline
newBaseline$BoneRe.status <- ifelse(newBaseline$BoneRe.status==2,1,0)
Varlist <- colnames(newBaseline[9:26])
My.stepwise.coxph(Time = "time.m", Status = "BoneRe.status", variable.list = Varlist) # 结果只能直接输出
结果

得到含9个gene的signature。
接下来进行可视化。类似下图。


doi: 10.7150/ijbs.45050

(a) risk score 分布

# 散点图:Risk score 分布
ggplot(data=plot.data)+
  geom_point(mapping = aes(x=id,y=data),
             color=ifelse(plot.data$group==0,"blue","red"),
             show.legend = F)+
  labs(x="Patiens",y="Risk Score")
a

(b) PCA

library("factoextra") 
dat_pca <- t(GSE2034_log) 
DatPCA <- prcomp(dat_pca)
fviz_pca_ind(DatPCA, label="none",habillage=group_risk$group,
             addEllipses=TRUE, ellipse.level=0.95,
             palette = c("red","blue"))
b

(c)t-SNE

library(Rtsne)
tsne_out <- Rtsne(
  dat_pca,
  dims = 2, #降维之后的维度,默认值为2
  pca = FALSE, #是否对原始数据进行PCA分析,再用PCA得到的topN主成分进行后续分析
  perplexity = 60, #参数的取值必须小于(nrow(data) - 1 )/ 3
  theta = 0.0,
  max_iter = 1000 # 最大迭代次数
)
tsne_res <- as.data.frame(tsne_out$Y)
colnames(tsne_res) <- c("tSNE1","tSNE2")
head(tsne_res)
Group=group_risk$group
ggplot(tsne_res,aes(tSNE1,tSNE2,color=Group)) + 
  geom_point() + theme_bw() + 
  geom_hline(yintercept = 0,lty=2,col="black") + 
  geom_vline(xintercept = 0,lty=2,col="black") + #lwd
  theme(plot.title = element_text(hjust = 0.5)) + 
  labs(title = "tSNE plot")
c

(d)略。
(e)生存曲线

Baseline$RiskScore = RiskScore
Baseline$Group = ifelse(Baseline$RiskScore>= median(Baseline$RiskScore),
                        "high-risk","low-risk")
ReSurv = Surv(time = Baseline$time.m, event = Baseline$relapse.status)
fit <- survfit(ReSurv ~ Group, data = Baseline) 
ggsurvplot(fit, # 创建的拟合对象
           #surv.median.line = "hv",  # 增加中位生存时间
           #conf.int = TRUE, # 显示置信区间
           pval = TRUE, # 添加P值
           # add.all = TRUE,# 添加总患者生存曲线
           # risk.table = TRUE)
e

(f)ROC

library(pROC)
BoneRe.risk = roc(response = Baseline$BoneRe.status, 
                  predictor = Baseline$RiskScore,ci=T,auc=T)
ggroc(BoneRe.risk,col="red")+
  theme_bw()+
  geom_abline(intercept=1,slope=1)+
  labs(x="Specificity",y="Sensitivity",title = "ROC")+
  annotate("text", x= 0.75, y= 0.75,label="AUC = 0.5726")
f

综上,我的signature 表现不是很好。。。 :)

作者:yadandb

原文链接:https://www.jianshu.com/p/9121d0254292

文章分类
后端
版权声明:本文内容由互联网用户自发贡献,该文观点仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌抄袭侵权/违法违规的内容, 请发送邮件至 gxwowoo@163.com 举报,一经查实,本站将立刻删除。
相关推荐