geo矩阵的获取有两种方式:
- library(DESeq2)
library(DESeq2)
getwd()
dir.create("G:/r/duqiang_IPF/GSE150910_IPF_Donors_subsets_using_getGEO/")
setwd("G:/r/duqiang_IPF/GSE150910_IPF_Donors_subsets_using_getGEO/")
if(1==!1){
getwd()
Sys.setenv("VROOM_CONNECTION_SIZE"=999999999)
f='GSE150910_eSet.Rdata'
if(!file.exists(f)){
gset <- getGEO('GSE150910', destdir=".",
AnnotGPL = F, ## 注释文件
getGPL = F) ## 平台文件
save(gset,file=f) ## 保存到本地
}
}
load("G:/r/duqiang_IPF/GSE150910_IPF_Donors_subsets/GSE150910_eSet.Rdata")
head(pData(gset[[1]])$"status")
head(pData(gset[[1]])$"age:ch1")
#提取meta信息
meta_all=pData(gset[[1]])
2.在geo官网把文件下载下来,然后读取
expr.150910<-read.table(file="G:/r/duqiang_IPF/GSE150910_IPF_Donors_subsets/GSE150910_gene-level_count_file.csv/GSE150910_gene-level_count_file.csv",
header=TRUE,
sep=",", #Error in scan(file = file, what = what, sep = sep, quote = quote, dec = dec
comment.char="!")
expr.150910=expr.df %>% column_to_rownames(var = "symbol") #列名变成行名
head(expr.150910)[1:6,1:12]
length(rownames(expr.150910))
#提取meta信息
meta_all=pData(gset[[1]])
head(meta_all)
meta.150910=meta_all[,c("race (hispanic;1, black;3,asian;4, white;5, other;6):ch1",
"Sex:ch1",
"age:ch1","diagnosis:ch1",
"ever_smoked:ch1","plate:ch1",
"institution:ch1","sample type:ch1",
"title")]
colnames(meta.150910)=c("race","Sex","age","diagnosis","ever_smoked",
'plate','institution','sample',
"title")
meta.150910=meta.150910 %>% rownames_to_column(var = "GSM_acc")
rownames(meta.150910)=meta.150910$title
head(meta.150910)
#表达矩阵样本顺序和meta信息是否一致
rownames(meta.150910)
colnames(expr.150910)
match(c(1,3,2,6),c(6,2,3,1))
c(6,2,3,1)[match(c(1,3,2,6),c(6,2,3,1))]
length(match(rownames(meta.150910),colnames(expr.150910)))
expr.150910[,match(rownames(meta.150910),colnames(expr.150910))]
expr.150910=expr.150910[,match(rownames(meta.150910),colnames(expr.150910))]
head(expr.150910)
identical(colnames(expr.150910),rownames(meta.150910))
##deseq2 素材准备 包括矩阵和meta信息
table(meta.150910$sample,meta.150910$diagnosis)
my_coldata_explant=meta.150910[meta.150910$sample=="explant" &
meta.150910$diagnosis %in% c("control",'ipf'),]
head(my_coldata_explant)
my_rawcout_explant=expr.150910[,match(rownames(my_coldata_explant),colnames(expr.150910))]
head(my_rawcout_explant)
identical(rownames(my_coldata_explant),colnames(my_rawcout_explant))
开始deseq2
library(DESeq2)
dds <- DESeqDataSetFromMatrix(countData = my_rawcout_explant,#生成deseq对象结构
colData = my_coldata_explant,
design= ~Sex+age+race+plate+institution + diagnosis)
dds <- DESeq(dds)
#设置好 到底是谁比谁
dds@design
table(my_coldata_explant$diagnosis)
dds$diagnosis <- relevel(dds$diagnosis, ref = "control") #dds$group <- factor(dds$group, levels = c("control","ipf"))
resultsNames(dds)
#save(dds,file = "G:/r/duqiang_IPF/GSE150910_IPF_Donors_subsets_using_getGEO/dds_for_explants_ipf_control.rds")
load("G:/r/duqiang_IPF/GSE150910_IPF_Donors_subsets_using_getGEO/dds_for_explants_ipf_control.rds")
res=results(dds,name = "diagnosis_ipf_vs_control")
head(res)
res$genename=rownames(res)
getwd()
openxlsx::write.xlsx(res,file ="G:/r/duqiang_IPF/GSE150910_IPF_Donors_subsets_using_getGEO/degs_explants_for_ipf vs control.xlsx" )
收缩
# or to shrink log fold changes association with condition:
#BiocManager::install('apeglm')
res <- lfcShrink(dds, coef="diagnosis_ipf_vs_control", type="apeglm")
head(res)
length(rownames(res)) #18838
res$genename=rownames(res)
res=as_tibble(res)
res=res %>% filter(abs(log2FoldChange)>1 & pvalue<0.05 )
length(rownames(res)) #1595
openxlsx::write.xlsx(res,file ="G:/r/duqiang_IPF/GSE150910_IPF_Donors_subsets_using_getGEO/degs_explants_for_ipf vs control_filtered.xlsx" )