admin管理员组

文章数量:1037775

孟德尔随机分析—如何解决数据存在水平多效性

孟德尔随机分析—如何解决数据存在水平多效性

进行孟德尔随机分析时候,发现结果MR-PRESSO(P-value = 0.628,未检测到显著多效性),而Egger截距显著偏离0(P = 0.0368,检测到水平多效性),在chatGPT的帮助下,使用使用 Lasso 回归( RadialMR 包)减少多效性 SNP,使结果Egger截距,P>0.05,这里记录了解决办法。

You:

这是一段暴露颞下颌关节紊乱(TMD)对结局强直性脊柱炎(AS)的孟德尔随机化代码,请帮我解释。

代码语言:r复制
rm(list = ls())
library(dplyr)
library(data.table)
library(vroom)
library(VariantAnnotation)
library(gwasvcf)
library(gwasglue)
library(ieugwasr)
library(plinkbinr)
library(TwoSampleMR)
library(MRPRESSO)
#TMD
load("../../1/R/Rdata/step1.1.TMD_.Rdata")


#结局----
outcome_finn = vroom("../rawData/finngen_R12_M13_ANKYLOSPON_STRICT.gz")
colnames(outcome_finn)
outcome_finn_dat = subset(outcome_finn, outcome_finn$rsids %in% TMD_exposure_dat_clumped$SNP)
dim(outcome_finn_dat)# 20 13
outcome_finn_dat = format_data(dat = outcome_finn_dat,
                               type = "outcome",
                               snp_col = "rsids",
                               beta_col = "beta",
                               pval_col = "pval",
                               se_col = "sebeta",
                               eaf_col = "af_alt",
                               effect_allele_col = "alt",
                               other_allele_col = "ref")

outcome_finn_dat = subset(outcome_finn_dat, pval.outcome>5e-06)
dim(outcome_finn_dat)#[1] 19 11

#修改暴露和结局的名称
TMD_exposure_dat_clumped$id.exposure = "TMD"
outcome_finn_dat$id.outcome = "AS"

mr_data_finn = harmonise_data(exposure_dat = TMD_exposure_dat_clumped,
                              outcome_dat = outcome_finn_dat,
                              action= 2)   #去除回文序列,2去除,1不去除
table(mr_data_finn$mr_keep)#1 19
write.csv(mr_data_finn, file = "data/reAS/mr_TMD_data_finn.csv")
#View(mr_data_finn)
mr_data_finn <- data.table::fread("data/reAS/mr_TMD_data_finn.csv",data.table = F)
#MR-PRESSO异常值检测
presso = mr_presso(BetaOutcome ="beta.outcome",
                   BetaExposure = "beta.exposure",
                   SdOutcome ="se.outcome",
                   SdExposure = "se.exposure",
                   OUTLIERtest = TRUE,
                   DISTORTIONtest = TRUE,
                   data = mr_data_finn,
                   NbDistribution = 1000,
                   SignifThreshold = 0.05)
presso$MR-PRESSO results$Distortion Test$Outliers Indices #如果显示NULL,则表示不存在异常值
#mr_data_clean = mr_data_finn[-c(84,136,159,172,255),]
mr_data_clean = mr_data_finn
#去除离群值
#rs = sin$SNP[sin$b < -3|sin$b > 3] #找到离群的SNP
#mr_data_clean = mr_data_finn[!mr_data_finn$SNP %in% rs,] #从原始数据框中移除这些SNP

het_clean = mr_heterogeneity(mr_data_clean) #异质性检验
write.csv(het_clean,file = "data/reAS/het_clean.csv")
sin_clean = mr_singlesnp(mr_data_clean)
pdf(file = "data/reAS/异质性.pdf", width=10, heigh=8)
mr_funnel_plot(singlesnp_results = sin_clean)
dev.off()
#多效性检验
pleio = mr_pleiotropy_test(mr_data_clean)
write.csv(pleio,file = "data/reAS/pleio.csv")
#重新进行mr分析
res_clean = mr(mr_data_clean)
OR_clean = generate_odds_ratios(res_clean)
write.csv(OR_clean, "data/reAS/MRresult.csv")

#MR分析结果可视化
pdf(file = "data/reAS/MR.pdf", width=10, heigh=8)
mr_scatter_plot(res_clean, mr_data_clean)
dev.off()

* * *

ChatGPT:

这段代码是用R语言进行孟德尔随机化(Mendelian Randomization, MR)分析的完整流程,目的是研究颞下颌关节紊乱(TMD)对强直性脊柱炎(AS)的因果关系。下面我帮你详细解释每个部分在做什么:

* * *

1. 加载必要的包和数据:

代码语言:r复制
rm(list = ls())  # 清空环境变量
library(dplyr)   # 数据处理
library(data.table)  # 高效读写数据
library(vroom)   # 高速读取大文件
library(VariantAnnotation) # 遗传变异注释
library(gwasvcf) # GWAS数据处理
library(gwasglue) # GWAS数据转换
library(ieugwasr) # MR分析接口
library(plinkbinr) # PLINK格式数据处理
library(TwoSampleMR) # 二样本MR分析的核心包
library(MRPRESSO) # MR-PRESSO用于异常值检测和校正

# 加载TMD(暴露变量)的数据
load("../../1/R/Rdata/step1.1.TMD_.Rdata")

* * *

2. 处理AS(结局变量)数据:

代码语言:r复制
outcome_finn = vroom("../rawData/finngen_R12_M13_ANKYLOSPON_STRICT.gz")  # 从Finngen读取AS数据
colnames(outcome_finn)  # 查看列名

# 筛选TMD暴露中包含的SNP
outcome_finn_dat = subset(outcome_finn, outcome_finn$rsids %in% TMD_exposure_dat_clumped$SNP)
dim(outcome_finn_dat)  # 看看筛选后SNP数量,20行13列

# 格式化结局数据,匹配MR分析需求
outcome_finn_dat = format_data(dat = outcome_finn_dat,
                               type = "outcome",
                               snp_col = "rsids",       # SNP ID
                               beta_col = "beta",       # 效应量
                               pval_col = "pval",       # P值
                               se_col = "sebeta",       # 标准误
                               eaf_col = "af_alt",      # 辅助等位基因频率
                               effect_allele_col = "alt", # 效应等位基因
                               other_allele_col = "ref") # 非效应等位基因

# 筛选p值大于5e-06的SNP
outcome_finn_dat = subset(outcome_finn_dat, pval.outcome > 5e-06)
dim(outcome_finn_dat)  # 筛选后19个SNP

* * *

3. 给暴露和结局变量命名:

代码语言:r复制
TMD_exposure_dat_clumped$id.exposure = "TMD"
outcome_finn_dat$id.outcome = "AS"

* * *

4. 暴露和结局数据匹配(协调化):

代码语言:r复制
mr_data_finn = harmonise_data(exposure_dat = TMD_exposure_dat_clumped,
                              outcome_dat = outcome_finn_dat,
                              action = 2)  # 去除回文序列(即正反向一致的序列)
table(mr_data_finn$mr_keep)  # 查看成功匹配的SNP数量,19个
write.csv(mr_data_finn, file = "data/reAS/mr_TMD_data_finn.csv")

* * *

5. 异常值检测(MR-PRESSO):

代码语言:r复制
mr_data_finn <- data.table::fread("data/reAS/mr_TMD_data_finn.csv", data.table = F)

presso = mr_presso(BetaOutcome = "beta.outcome",
                   BetaExposure = "beta.exposure",
                   SdOutcome = "se.outcome",
                   SdExposure = "se.exposure",
                   OUTLIERtest = TRUE,
                   DISTORTIONtest = TRUE,
                   data = mr_data_finn,
                   NbDistribution = 1000,  # 随机抽样1000次
                   SignifThreshold = 0.05) # 显著性阈值0.05

presso$`MR-PRESSO results`$`Distortion Test`$`Outliers Indices`  # 查看异常值索引,NULL表示无异常值
mr_data_clean = mr_data_finn  # 若存在异常值,可在此移除

* * *

6. 异质性检验和多效性检验:

代码语言:r复制
# 异质性检验
het_clean = mr_heterogeneity(mr_data_clean)
write.csv(het_clean, file = "data/reAS/het_clean.csv")

# 单SNP分析和漏斗图
sin_clean = mr_singlesnp(mr_data_clean)
pdf(file = "data/reAS/异质性.pdf", width=10, heigh=8)
mr_funnel_plot(singlesnp_results = sin_clean)
dev.off()

# 多效性检验
pleio = mr_pleiotropy_test(mr_data_clean)
write.csv(pleio, file = "data/reAS/pleio.csv")

* * *

7. MR分析和可视化:

代码语言:r复制
# 主MR分析
res_clean = mr(mr_data_clean)
OR_clean = generate_odds_ratios(res_clean)
write.csv(OR_clean, "data/reAS/MRresult.csv")

# 散点图可视化结果
pdf(file = "data/reAS/MR.pdf", width=10, heigh=8)
mr_scatter_plot(res_clean, mr_data_clean)
dev.off()

* * *

总结流程:

  1. 数据读取与筛选: 从TMD和AS数据集中筛选匹配的SNP。
  2. 数据格式化与协调: 将暴露和结局变量格式化并匹配。
  3. 异常值检测(MR-PRESSO): 检查并去除有可能扭曲结果的SNP。
  4. 异质性和多效性检验: 确认SNP效应是否一致,是否存在多效性问题。
  5. MR分析和结果输出: 使用TwoSampleMR包执行MR分析,计算比值比(OR)并绘图展示结果。

整个分析流程非常标准,结果文件和图像都被清晰地保存了。如果你需要我进一步解释某部分、帮你改进代码或解释分析结果,告诉我就行!

孟德尔随机分析—如何解决数据存在水平多效性

孟德尔随机分析—如何解决数据存在水平多效性

进行孟德尔随机分析时候,发现结果MR-PRESSO(P-value = 0.628,未检测到显著多效性),而Egger截距显著偏离0(P = 0.0368,检测到水平多效性),在chatGPT的帮助下,使用使用 Lasso 回归( RadialMR 包)减少多效性 SNP,使结果Egger截距,P>0.05,这里记录了解决办法。

You:

这是一段暴露颞下颌关节紊乱(TMD)对结局强直性脊柱炎(AS)的孟德尔随机化代码,请帮我解释。

代码语言:r复制
rm(list = ls())
library(dplyr)
library(data.table)
library(vroom)
library(VariantAnnotation)
library(gwasvcf)
library(gwasglue)
library(ieugwasr)
library(plinkbinr)
library(TwoSampleMR)
library(MRPRESSO)
#TMD
load("../../1/R/Rdata/step1.1.TMD_.Rdata")


#结局----
outcome_finn = vroom("../rawData/finngen_R12_M13_ANKYLOSPON_STRICT.gz")
colnames(outcome_finn)
outcome_finn_dat = subset(outcome_finn, outcome_finn$rsids %in% TMD_exposure_dat_clumped$SNP)
dim(outcome_finn_dat)# 20 13
outcome_finn_dat = format_data(dat = outcome_finn_dat,
                               type = "outcome",
                               snp_col = "rsids",
                               beta_col = "beta",
                               pval_col = "pval",
                               se_col = "sebeta",
                               eaf_col = "af_alt",
                               effect_allele_col = "alt",
                               other_allele_col = "ref")

outcome_finn_dat = subset(outcome_finn_dat, pval.outcome>5e-06)
dim(outcome_finn_dat)#[1] 19 11

#修改暴露和结局的名称
TMD_exposure_dat_clumped$id.exposure = "TMD"
outcome_finn_dat$id.outcome = "AS"

mr_data_finn = harmonise_data(exposure_dat = TMD_exposure_dat_clumped,
                              outcome_dat = outcome_finn_dat,
                              action= 2)   #去除回文序列,2去除,1不去除
table(mr_data_finn$mr_keep)#1 19
write.csv(mr_data_finn, file = "data/reAS/mr_TMD_data_finn.csv")
#View(mr_data_finn)
mr_data_finn <- data.table::fread("data/reAS/mr_TMD_data_finn.csv",data.table = F)
#MR-PRESSO异常值检测
presso = mr_presso(BetaOutcome ="beta.outcome",
                   BetaExposure = "beta.exposure",
                   SdOutcome ="se.outcome",
                   SdExposure = "se.exposure",
                   OUTLIERtest = TRUE,
                   DISTORTIONtest = TRUE,
                   data = mr_data_finn,
                   NbDistribution = 1000,
                   SignifThreshold = 0.05)
presso$MR-PRESSO results$Distortion Test$Outliers Indices #如果显示NULL,则表示不存在异常值
#mr_data_clean = mr_data_finn[-c(84,136,159,172,255),]
mr_data_clean = mr_data_finn
#去除离群值
#rs = sin$SNP[sin$b < -3|sin$b > 3] #找到离群的SNP
#mr_data_clean = mr_data_finn[!mr_data_finn$SNP %in% rs,] #从原始数据框中移除这些SNP

het_clean = mr_heterogeneity(mr_data_clean) #异质性检验
write.csv(het_clean,file = "data/reAS/het_clean.csv")
sin_clean = mr_singlesnp(mr_data_clean)
pdf(file = "data/reAS/异质性.pdf", width=10, heigh=8)
mr_funnel_plot(singlesnp_results = sin_clean)
dev.off()
#多效性检验
pleio = mr_pleiotropy_test(mr_data_clean)
write.csv(pleio,file = "data/reAS/pleio.csv")
#重新进行mr分析
res_clean = mr(mr_data_clean)
OR_clean = generate_odds_ratios(res_clean)
write.csv(OR_clean, "data/reAS/MRresult.csv")

#MR分析结果可视化
pdf(file = "data/reAS/MR.pdf", width=10, heigh=8)
mr_scatter_plot(res_clean, mr_data_clean)
dev.off()

* * *

ChatGPT:

这段代码是用R语言进行孟德尔随机化(Mendelian Randomization, MR)分析的完整流程,目的是研究颞下颌关节紊乱(TMD)对强直性脊柱炎(AS)的因果关系。下面我帮你详细解释每个部分在做什么:

* * *

1. 加载必要的包和数据:

代码语言:r复制
rm(list = ls())  # 清空环境变量
library(dplyr)   # 数据处理
library(data.table)  # 高效读写数据
library(vroom)   # 高速读取大文件
library(VariantAnnotation) # 遗传变异注释
library(gwasvcf) # GWAS数据处理
library(gwasglue) # GWAS数据转换
library(ieugwasr) # MR分析接口
library(plinkbinr) # PLINK格式数据处理
library(TwoSampleMR) # 二样本MR分析的核心包
library(MRPRESSO) # MR-PRESSO用于异常值检测和校正

# 加载TMD(暴露变量)的数据
load("../../1/R/Rdata/step1.1.TMD_.Rdata")

* * *

2. 处理AS(结局变量)数据:

代码语言:r复制
outcome_finn = vroom("../rawData/finngen_R12_M13_ANKYLOSPON_STRICT.gz")  # 从Finngen读取AS数据
colnames(outcome_finn)  # 查看列名

# 筛选TMD暴露中包含的SNP
outcome_finn_dat = subset(outcome_finn, outcome_finn$rsids %in% TMD_exposure_dat_clumped$SNP)
dim(outcome_finn_dat)  # 看看筛选后SNP数量,20行13列

# 格式化结局数据,匹配MR分析需求
outcome_finn_dat = format_data(dat = outcome_finn_dat,
                               type = "outcome",
                               snp_col = "rsids",       # SNP ID
                               beta_col = "beta",       # 效应量
                               pval_col = "pval",       # P值
                               se_col = "sebeta",       # 标准误
                               eaf_col = "af_alt",      # 辅助等位基因频率
                               effect_allele_col = "alt", # 效应等位基因
                               other_allele_col = "ref") # 非效应等位基因

# 筛选p值大于5e-06的SNP
outcome_finn_dat = subset(outcome_finn_dat, pval.outcome > 5e-06)
dim(outcome_finn_dat)  # 筛选后19个SNP

* * *

3. 给暴露和结局变量命名:

代码语言:r复制
TMD_exposure_dat_clumped$id.exposure = "TMD"
outcome_finn_dat$id.outcome = "AS"

* * *

4. 暴露和结局数据匹配(协调化):

代码语言:r复制
mr_data_finn = harmonise_data(exposure_dat = TMD_exposure_dat_clumped,
                              outcome_dat = outcome_finn_dat,
                              action = 2)  # 去除回文序列(即正反向一致的序列)
table(mr_data_finn$mr_keep)  # 查看成功匹配的SNP数量,19个
write.csv(mr_data_finn, file = "data/reAS/mr_TMD_data_finn.csv")

* * *

5. 异常值检测(MR-PRESSO):

代码语言:r复制
mr_data_finn <- data.table::fread("data/reAS/mr_TMD_data_finn.csv", data.table = F)

presso = mr_presso(BetaOutcome = "beta.outcome",
                   BetaExposure = "beta.exposure",
                   SdOutcome = "se.outcome",
                   SdExposure = "se.exposure",
                   OUTLIERtest = TRUE,
                   DISTORTIONtest = TRUE,
                   data = mr_data_finn,
                   NbDistribution = 1000,  # 随机抽样1000次
                   SignifThreshold = 0.05) # 显著性阈值0.05

presso$`MR-PRESSO results`$`Distortion Test`$`Outliers Indices`  # 查看异常值索引,NULL表示无异常值
mr_data_clean = mr_data_finn  # 若存在异常值,可在此移除

* * *

6. 异质性检验和多效性检验:

代码语言:r复制
# 异质性检验
het_clean = mr_heterogeneity(mr_data_clean)
write.csv(het_clean, file = "data/reAS/het_clean.csv")

# 单SNP分析和漏斗图
sin_clean = mr_singlesnp(mr_data_clean)
pdf(file = "data/reAS/异质性.pdf", width=10, heigh=8)
mr_funnel_plot(singlesnp_results = sin_clean)
dev.off()

# 多效性检验
pleio = mr_pleiotropy_test(mr_data_clean)
write.csv(pleio, file = "data/reAS/pleio.csv")

* * *

7. MR分析和可视化:

代码语言:r复制
# 主MR分析
res_clean = mr(mr_data_clean)
OR_clean = generate_odds_ratios(res_clean)
write.csv(OR_clean, "data/reAS/MRresult.csv")

# 散点图可视化结果
pdf(file = "data/reAS/MR.pdf", width=10, heigh=8)
mr_scatter_plot(res_clean, mr_data_clean)
dev.off()

* * *

总结流程:

  1. 数据读取与筛选: 从TMD和AS数据集中筛选匹配的SNP。
  2. 数据格式化与协调: 将暴露和结局变量格式化并匹配。
  3. 异常值检测(MR-PRESSO): 检查并去除有可能扭曲结果的SNP。
  4. 异质性和多效性检验: 确认SNP效应是否一致,是否存在多效性问题。
  5. MR分析和结果输出: 使用TwoSampleMR包执行MR分析,计算比值比(OR)并绘图展示结果。

整个分析流程非常标准,结果文件和图像都被清晰地保存了。如果你需要我进一步解释某部分、帮你改进代码或解释分析结果,告诉我就行!

本文标签: 孟德尔随机分析如何解决数据存在水平多效性