前面我们推出了两篇付费文章,如果能够运行完全部代码 ,那么基本上已经完成了服务器的基本配置,在安装其他工具就容易了,这次内容,我们介绍一些数据宏基因组相关的软件安装。其实宏基因组软件安装并不难,大部分使用bioconda都可以完成,稍微麻烦的部分在于需要对软件进行数据库配置。这些数据库地址一般都在国外,下载速度稍慢一些。上次有人反映代码部分无法复制,我们已经将代码放到百度云,想下载代码的,注意底部下载链接即可。
目前阶段一些宏基因组数据分析工具都是基于python2.7的版本,所以这里我们推荐安装miniconda2,当然是用python3也可以,只不过需要单独创建一个python2.7的虚拟环境即可。
wget -c https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh
sh Miniconda2-latest-Linux-x86_64.sh
source ~/.bashrc
#配置源
conda config --add channels bioconda
conda config --add channels conda-forge
数据质控软件
conda install -y kneaddata
conda install -y fastqc
conda install -y multiqc
安装宏基因组拼接工具
conda install -y megahit
conda install -y spades
conda install -y quast
安装物种分类工具
conda install -y kraken
conda install -y bracken
conda install -y kraken2
conda install -y krona
conda install -y metaphlan2
conda install -y humann2
conda install -y graphlan export2graphlan #python 2.7
安装prokka,建立虚拟环境
conda create -n prokka -y
conda activate prokka
conda install -y prokka
安装metawrap
conda create -y -n metawrap-env python=2.7
conda activate metawrap-env
# Note: ordering is important
conda config --add channels defaults
conda config --add channels conda-forge
conda config --add channels bioconda
conda config --add channels ursky
conda install -y -c ursky metawrap-mg
# Note: may take a while
# To fix the CONCOCT endless warning messages in metaWRAP=1.2, run
conda install -y blas=2.5=mkl
宏基因组基因功能注释
conda install -y eggnog-mapper
metaphlan3
conda create --name mpa -c bioconda metaphlan
conda activate mpa
metaphlan --install --index mpa_v296_CHOCOPhlAn_201901
kneaddata配置
mkdir kneaddata
kneaddata_database --download human_genome bowtie2 database/kneaddata
kneaddata_database --download human_genome bmtagger database/kneaddata
kneaddata_database --download mouse_C57BL bowtie2 database/kneaddata
kneaddata_database --download human_transcriptome database/kneaddata
kneaddata_database --download ribosomal_RNA bowtie2 database/kneaddata
humann2
utility_mapping : full = http://huttenhower.sph.harvard.edu/humann2_data/full_mapping_1_1.tar.gz
chocophlan : DEMO = http://huttenhower.sph.harvard.edu/humann2_data/chocophlan/DEMO_chocophlan.v0.1.1.tar.gz
chocophlan : full = http://huttenhower.sph.harvard.edu/humann2_data/chocophlan/full_chocophlan_plus_viral.v0.1.1.tar.gz
uniref : DEMO_diamond = http://huttenhower.sph.harvard.edu/humann2_data/uniprot/uniref_annotated/uniref90_DEMO_diamond.tar.gz
uniref : uniref90_diamond = http://huttenhower.sph.harvard.edu/humann2_data/uniprot/uniref_annotated/uniref90_annotated_1_1.tar.gz
uniref : uniref50_ec_filtered_diamond = http://huttenhower.sph.harvard.edu/humann2_data/uniprot/uniref_ec_filtered/uniref50_ec_filtered_1_1.tar.gz
uniref : uniref50_GO_filtered_rapsearch2 = http://huttenhower.sph.harvard.edu/humann2_data/uniprot/uniref50_GO_filtered/uniref50_GO_filtered_rapsearch2.tar.gz
uniref : uniref50_diamond = http://huttenhower.sph.harvard.edu/humann2_data/uniprot/uniref_annotated/uniref50_annotated_1_1.tar.gz
uniref : uniref90_ec_filtered_diamond = http://huttenhower.sph.harvard.edu/humann2_data/uniprot/uniref_ec_filtered/uniref90_ec_filtered_1_1.tar.gz
metaphlan2
mkdir metaphlan2
#需要从dropbox上下载对应的maker文件
#https://www.dropbox.com/s/nhhx7i7glwdahru/mpa_v20_m200_marker_info.txt.bz2?dl=1
tar xvf mpa_v20_m200.tar
bzip2 -d mpa_v20_m200.fna.bz2
bowtie2-build mpa_v20_m200.fna mpa_v20_m200
nt库与nr库
wget -c https://ftp.ncbi.nlm.nih.gov/blast/db/FASTA/nt.gz
wget -c https://ftp.ncbi.nlm.nih.gov/blast/db/FASTA/nr.gz
#如果下载速度慢,可以尝试使用aspera
metawrap库下载