admin管理员组文章数量:1037775
孟德尔随机分析—如何解决数据存在水平多效性
孟德尔随机分析—如何解决数据存在水平多效性
进行孟德尔随机分析时候,发现结果MR-PRESSO(P-value = 0.628,未检测到显著多效性),而Egger截距显著偏离0(P = 0.0368,检测到水平多效性),在chatGPT的帮助下,使用使用 Lasso 回归( RadialMR 包)减少多效性 SNP,使结果Egger截距,P>0.05,这里记录了解决办法。
You:
这是一段暴露颞下颌关节紊乱(TMD)对结局强直性脊柱炎(AS)的孟德尔随机化代码,请帮我解释。
代码语言:r复制rm(list = ls())
library(dplyr)
library(data.table)
library(vroom)
library(VariantAnnotation)
library(gwasvcf)
library(gwasglue)
library(ieugwasr)
library(plinkbinr)
library(TwoSampleMR)
library(MRPRESSO)
#TMD
load("../../1/R/Rdata/step1.1.TMD_.Rdata")
#结局----
outcome_finn = vroom("../rawData/finngen_R12_M13_ANKYLOSPON_STRICT.gz")
colnames(outcome_finn)
outcome_finn_dat = subset(outcome_finn, outcome_finn$rsids %in% TMD_exposure_dat_clumped$SNP)
dim(outcome_finn_dat)# 20 13
outcome_finn_dat = format_data(dat = outcome_finn_dat,
type = "outcome",
snp_col = "rsids",
beta_col = "beta",
pval_col = "pval",
se_col = "sebeta",
eaf_col = "af_alt",
effect_allele_col = "alt",
other_allele_col = "ref")
outcome_finn_dat = subset(outcome_finn_dat, pval.outcome>5e-06)
dim(outcome_finn_dat)#[1] 19 11
#修改暴露和结局的名称
TMD_exposure_dat_clumped$id.exposure = "TMD"
outcome_finn_dat$id.outcome = "AS"
mr_data_finn = harmonise_data(exposure_dat = TMD_exposure_dat_clumped,
outcome_dat = outcome_finn_dat,
action= 2) #去除回文序列,2去除,1不去除
table(mr_data_finn$mr_keep)#1 19
write.csv(mr_data_finn, file = "data/reAS/mr_TMD_data_finn.csv")
#View(mr_data_finn)
mr_data_finn <- data.table::fread("data/reAS/mr_TMD_data_finn.csv",data.table = F)
#MR-PRESSO异常值检测
presso = mr_presso(BetaOutcome ="beta.outcome",
BetaExposure = "beta.exposure",
SdOutcome ="se.outcome",
SdExposure = "se.exposure",
OUTLIERtest = TRUE,
DISTORTIONtest = TRUE,
data = mr_data_finn,
NbDistribution = 1000,
SignifThreshold = 0.05)
presso$MR-PRESSO results$Distortion Test$Outliers Indices #如果显示NULL,则表示不存在异常值
#mr_data_clean = mr_data_finn[-c(84,136,159,172,255),]
mr_data_clean = mr_data_finn
#去除离群值
#rs = sin$SNP[sin$b < -3|sin$b > 3] #找到离群的SNP
#mr_data_clean = mr_data_finn[!mr_data_finn$SNP %in% rs,] #从原始数据框中移除这些SNP
het_clean = mr_heterogeneity(mr_data_clean) #异质性检验
write.csv(het_clean,file = "data/reAS/het_clean.csv")
sin_clean = mr_singlesnp(mr_data_clean)
pdf(file = "data/reAS/异质性.pdf", width=10, heigh=8)
mr_funnel_plot(singlesnp_results = sin_clean)
dev.off()
#多效性检验
pleio = mr_pleiotropy_test(mr_data_clean)
write.csv(pleio,file = "data/reAS/pleio.csv")
#重新进行mr分析
res_clean = mr(mr_data_clean)
OR_clean = generate_odds_ratios(res_clean)
write.csv(OR_clean, "data/reAS/MRresult.csv")
#MR分析结果可视化
pdf(file = "data/reAS/MR.pdf", width=10, heigh=8)
mr_scatter_plot(res_clean, mr_data_clean)
dev.off()
* * *
ChatGPT:
这段代码是用R语言进行孟德尔随机化(Mendelian Randomization, MR)分析的完整流程,目的是研究颞下颌关节紊乱(TMD)对强直性脊柱炎(AS)的因果关系。下面我帮你详细解释每个部分在做什么:
* * *
1. 加载必要的包和数据:
代码语言:r复制rm(list = ls()) # 清空环境变量
library(dplyr) # 数据处理
library(data.table) # 高效读写数据
library(vroom) # 高速读取大文件
library(VariantAnnotation) # 遗传变异注释
library(gwasvcf) # GWAS数据处理
library(gwasglue) # GWAS数据转换
library(ieugwasr) # MR分析接口
library(plinkbinr) # PLINK格式数据处理
library(TwoSampleMR) # 二样本MR分析的核心包
library(MRPRESSO) # MR-PRESSO用于异常值检测和校正
# 加载TMD(暴露变量)的数据
load("../../1/R/Rdata/step1.1.TMD_.Rdata")
* * *
2. 处理AS(结局变量)数据:
代码语言:r复制outcome_finn = vroom("../rawData/finngen_R12_M13_ANKYLOSPON_STRICT.gz") # 从Finngen读取AS数据
colnames(outcome_finn) # 查看列名
# 筛选TMD暴露中包含的SNP
outcome_finn_dat = subset(outcome_finn, outcome_finn$rsids %in% TMD_exposure_dat_clumped$SNP)
dim(outcome_finn_dat) # 看看筛选后SNP数量,20行13列
# 格式化结局数据,匹配MR分析需求
outcome_finn_dat = format_data(dat = outcome_finn_dat,
type = "outcome",
snp_col = "rsids", # SNP ID
beta_col = "beta", # 效应量
pval_col = "pval", # P值
se_col = "sebeta", # 标准误
eaf_col = "af_alt", # 辅助等位基因频率
effect_allele_col = "alt", # 效应等位基因
other_allele_col = "ref") # 非效应等位基因
# 筛选p值大于5e-06的SNP
outcome_finn_dat = subset(outcome_finn_dat, pval.outcome > 5e-06)
dim(outcome_finn_dat) # 筛选后19个SNP
* * *
3. 给暴露和结局变量命名:
代码语言:r复制TMD_exposure_dat_clumped$id.exposure = "TMD"
outcome_finn_dat$id.outcome = "AS"
* * *
4. 暴露和结局数据匹配(协调化):
代码语言:r复制mr_data_finn = harmonise_data(exposure_dat = TMD_exposure_dat_clumped,
outcome_dat = outcome_finn_dat,
action = 2) # 去除回文序列(即正反向一致的序列)
table(mr_data_finn$mr_keep) # 查看成功匹配的SNP数量,19个
write.csv(mr_data_finn, file = "data/reAS/mr_TMD_data_finn.csv")
* * *
5. 异常值检测(MR-PRESSO):
代码语言:r复制mr_data_finn <- data.table::fread("data/reAS/mr_TMD_data_finn.csv", data.table = F)
presso = mr_presso(BetaOutcome = "beta.outcome",
BetaExposure = "beta.exposure",
SdOutcome = "se.outcome",
SdExposure = "se.exposure",
OUTLIERtest = TRUE,
DISTORTIONtest = TRUE,
data = mr_data_finn,
NbDistribution = 1000, # 随机抽样1000次
SignifThreshold = 0.05) # 显著性阈值0.05
presso$`MR-PRESSO results`$`Distortion Test`$`Outliers Indices` # 查看异常值索引,NULL表示无异常值
mr_data_clean = mr_data_finn # 若存在异常值,可在此移除
* * *
6. 异质性检验和多效性检验:
代码语言:r复制# 异质性检验
het_clean = mr_heterogeneity(mr_data_clean)
write.csv(het_clean, file = "data/reAS/het_clean.csv")
# 单SNP分析和漏斗图
sin_clean = mr_singlesnp(mr_data_clean)
pdf(file = "data/reAS/异质性.pdf", width=10, heigh=8)
mr_funnel_plot(singlesnp_results = sin_clean)
dev.off()
# 多效性检验
pleio = mr_pleiotropy_test(mr_data_clean)
write.csv(pleio, file = "data/reAS/pleio.csv")
* * *
7. MR分析和可视化:
代码语言:r复制# 主MR分析
res_clean = mr(mr_data_clean)
OR_clean = generate_odds_ratios(res_clean)
write.csv(OR_clean, "data/reAS/MRresult.csv")
# 散点图可视化结果
pdf(file = "data/reAS/MR.pdf", width=10, heigh=8)
mr_scatter_plot(res_clean, mr_data_clean)
dev.off()
* * *
总结流程:
- 数据读取与筛选: 从TMD和AS数据集中筛选匹配的SNP。
- 数据格式化与协调: 将暴露和结局变量格式化并匹配。
- 异常值检测(MR-PRESSO): 检查并去除有可能扭曲结果的SNP。
- 异质性和多效性检验: 确认SNP效应是否一致,是否存在多效性问题。
- MR分析和结果输出: 使用TwoSampleMR包执行MR分析,计算比值比(OR)并绘图展示结果。
整个分析流程非常标准,结果文件和图像都被清晰地保存了。如果你需要我进一步解释某部分、帮你改进代码或解释分析结果,告诉我就行!
孟德尔随机分析—如何解决数据存在水平多效性
孟德尔随机分析—如何解决数据存在水平多效性
进行孟德尔随机分析时候,发现结果MR-PRESSO(P-value = 0.628,未检测到显著多效性),而Egger截距显著偏离0(P = 0.0368,检测到水平多效性),在chatGPT的帮助下,使用使用 Lasso 回归( RadialMR 包)减少多效性 SNP,使结果Egger截距,P>0.05,这里记录了解决办法。
You:
这是一段暴露颞下颌关节紊乱(TMD)对结局强直性脊柱炎(AS)的孟德尔随机化代码,请帮我解释。
代码语言:r复制rm(list = ls())
library(dplyr)
library(data.table)
library(vroom)
library(VariantAnnotation)
library(gwasvcf)
library(gwasglue)
library(ieugwasr)
library(plinkbinr)
library(TwoSampleMR)
library(MRPRESSO)
#TMD
load("../../1/R/Rdata/step1.1.TMD_.Rdata")
#结局----
outcome_finn = vroom("../rawData/finngen_R12_M13_ANKYLOSPON_STRICT.gz")
colnames(outcome_finn)
outcome_finn_dat = subset(outcome_finn, outcome_finn$rsids %in% TMD_exposure_dat_clumped$SNP)
dim(outcome_finn_dat)# 20 13
outcome_finn_dat = format_data(dat = outcome_finn_dat,
type = "outcome",
snp_col = "rsids",
beta_col = "beta",
pval_col = "pval",
se_col = "sebeta",
eaf_col = "af_alt",
effect_allele_col = "alt",
other_allele_col = "ref")
outcome_finn_dat = subset(outcome_finn_dat, pval.outcome>5e-06)
dim(outcome_finn_dat)#[1] 19 11
#修改暴露和结局的名称
TMD_exposure_dat_clumped$id.exposure = "TMD"
outcome_finn_dat$id.outcome = "AS"
mr_data_finn = harmonise_data(exposure_dat = TMD_exposure_dat_clumped,
outcome_dat = outcome_finn_dat,
action= 2) #去除回文序列,2去除,1不去除
table(mr_data_finn$mr_keep)#1 19
write.csv(mr_data_finn, file = "data/reAS/mr_TMD_data_finn.csv")
#View(mr_data_finn)
mr_data_finn <- data.table::fread("data/reAS/mr_TMD_data_finn.csv",data.table = F)
#MR-PRESSO异常值检测
presso = mr_presso(BetaOutcome ="beta.outcome",
BetaExposure = "beta.exposure",
SdOutcome ="se.outcome",
SdExposure = "se.exposure",
OUTLIERtest = TRUE,
DISTORTIONtest = TRUE,
data = mr_data_finn,
NbDistribution = 1000,
SignifThreshold = 0.05)
presso$MR-PRESSO results$Distortion Test$Outliers Indices #如果显示NULL,则表示不存在异常值
#mr_data_clean = mr_data_finn[-c(84,136,159,172,255),]
mr_data_clean = mr_data_finn
#去除离群值
#rs = sin$SNP[sin$b < -3|sin$b > 3] #找到离群的SNP
#mr_data_clean = mr_data_finn[!mr_data_finn$SNP %in% rs,] #从原始数据框中移除这些SNP
het_clean = mr_heterogeneity(mr_data_clean) #异质性检验
write.csv(het_clean,file = "data/reAS/het_clean.csv")
sin_clean = mr_singlesnp(mr_data_clean)
pdf(file = "data/reAS/异质性.pdf", width=10, heigh=8)
mr_funnel_plot(singlesnp_results = sin_clean)
dev.off()
#多效性检验
pleio = mr_pleiotropy_test(mr_data_clean)
write.csv(pleio,file = "data/reAS/pleio.csv")
#重新进行mr分析
res_clean = mr(mr_data_clean)
OR_clean = generate_odds_ratios(res_clean)
write.csv(OR_clean, "data/reAS/MRresult.csv")
#MR分析结果可视化
pdf(file = "data/reAS/MR.pdf", width=10, heigh=8)
mr_scatter_plot(res_clean, mr_data_clean)
dev.off()
* * *
ChatGPT:
这段代码是用R语言进行孟德尔随机化(Mendelian Randomization, MR)分析的完整流程,目的是研究颞下颌关节紊乱(TMD)对强直性脊柱炎(AS)的因果关系。下面我帮你详细解释每个部分在做什么:
* * *
1. 加载必要的包和数据:
代码语言:r复制rm(list = ls()) # 清空环境变量
library(dplyr) # 数据处理
library(data.table) # 高效读写数据
library(vroom) # 高速读取大文件
library(VariantAnnotation) # 遗传变异注释
library(gwasvcf) # GWAS数据处理
library(gwasglue) # GWAS数据转换
library(ieugwasr) # MR分析接口
library(plinkbinr) # PLINK格式数据处理
library(TwoSampleMR) # 二样本MR分析的核心包
library(MRPRESSO) # MR-PRESSO用于异常值检测和校正
# 加载TMD(暴露变量)的数据
load("../../1/R/Rdata/step1.1.TMD_.Rdata")
* * *
2. 处理AS(结局变量)数据:
代码语言:r复制outcome_finn = vroom("../rawData/finngen_R12_M13_ANKYLOSPON_STRICT.gz") # 从Finngen读取AS数据
colnames(outcome_finn) # 查看列名
# 筛选TMD暴露中包含的SNP
outcome_finn_dat = subset(outcome_finn, outcome_finn$rsids %in% TMD_exposure_dat_clumped$SNP)
dim(outcome_finn_dat) # 看看筛选后SNP数量,20行13列
# 格式化结局数据,匹配MR分析需求
outcome_finn_dat = format_data(dat = outcome_finn_dat,
type = "outcome",
snp_col = "rsids", # SNP ID
beta_col = "beta", # 效应量
pval_col = "pval", # P值
se_col = "sebeta", # 标准误
eaf_col = "af_alt", # 辅助等位基因频率
effect_allele_col = "alt", # 效应等位基因
other_allele_col = "ref") # 非效应等位基因
# 筛选p值大于5e-06的SNP
outcome_finn_dat = subset(outcome_finn_dat, pval.outcome > 5e-06)
dim(outcome_finn_dat) # 筛选后19个SNP
* * *
3. 给暴露和结局变量命名:
代码语言:r复制TMD_exposure_dat_clumped$id.exposure = "TMD"
outcome_finn_dat$id.outcome = "AS"
* * *
4. 暴露和结局数据匹配(协调化):
代码语言:r复制mr_data_finn = harmonise_data(exposure_dat = TMD_exposure_dat_clumped,
outcome_dat = outcome_finn_dat,
action = 2) # 去除回文序列(即正反向一致的序列)
table(mr_data_finn$mr_keep) # 查看成功匹配的SNP数量,19个
write.csv(mr_data_finn, file = "data/reAS/mr_TMD_data_finn.csv")
* * *
5. 异常值检测(MR-PRESSO):
代码语言:r复制mr_data_finn <- data.table::fread("data/reAS/mr_TMD_data_finn.csv", data.table = F)
presso = mr_presso(BetaOutcome = "beta.outcome",
BetaExposure = "beta.exposure",
SdOutcome = "se.outcome",
SdExposure = "se.exposure",
OUTLIERtest = TRUE,
DISTORTIONtest = TRUE,
data = mr_data_finn,
NbDistribution = 1000, # 随机抽样1000次
SignifThreshold = 0.05) # 显著性阈值0.05
presso$`MR-PRESSO results`$`Distortion Test`$`Outliers Indices` # 查看异常值索引,NULL表示无异常值
mr_data_clean = mr_data_finn # 若存在异常值,可在此移除
* * *
6. 异质性检验和多效性检验:
代码语言:r复制# 异质性检验
het_clean = mr_heterogeneity(mr_data_clean)
write.csv(het_clean, file = "data/reAS/het_clean.csv")
# 单SNP分析和漏斗图
sin_clean = mr_singlesnp(mr_data_clean)
pdf(file = "data/reAS/异质性.pdf", width=10, heigh=8)
mr_funnel_plot(singlesnp_results = sin_clean)
dev.off()
# 多效性检验
pleio = mr_pleiotropy_test(mr_data_clean)
write.csv(pleio, file = "data/reAS/pleio.csv")
* * *
7. MR分析和可视化:
代码语言:r复制# 主MR分析
res_clean = mr(mr_data_clean)
OR_clean = generate_odds_ratios(res_clean)
write.csv(OR_clean, "data/reAS/MRresult.csv")
# 散点图可视化结果
pdf(file = "data/reAS/MR.pdf", width=10, heigh=8)
mr_scatter_plot(res_clean, mr_data_clean)
dev.off()
* * *
总结流程:
- 数据读取与筛选: 从TMD和AS数据集中筛选匹配的SNP。
- 数据格式化与协调: 将暴露和结局变量格式化并匹配。
- 异常值检测(MR-PRESSO): 检查并去除有可能扭曲结果的SNP。
- 异质性和多效性检验: 确认SNP效应是否一致,是否存在多效性问题。
- MR分析和结果输出: 使用TwoSampleMR包执行MR分析,计算比值比(OR)并绘图展示结果。
整个分析流程非常标准,结果文件和图像都被清晰地保存了。如果你需要我进一步解释某部分、帮你改进代码或解释分析结果,告诉我就行!
本文标签: 孟德尔随机分析如何解决数据存在水平多效性
版权声明:本文标题:孟德尔随机分析—如何解决数据存在水平多效性 内容由热心网友自发贡献,该文观点仅代表作者本人, 转载请联系作者并注明出处:http://it.en369.cn/jiaocheng/1748351785a2289116.html, 本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌抄袭侵权/违法违规的内容,一经查实,本站将立刻删除。
发表评论