最新消息:XAMPP默认安装之后是很不安全的,我们只需要点击左方菜单的 "安全"选项,按照向导操作即可完成安全设置。

在CentOS Linux操作系统上安装UCSC基因组浏览器

XAMPP相关 admin 377浏览 0评论

如果是本地安装UCSC人类,小鼠等已有数据镜像,可以参考:

http://genome.ucsc.edu/admin/mirror.html

但如果是想展示部分自己测的NGS数据或者一个新的物种的基因组,就需要定制安装。整个过程比较复杂,直接看代码,照做就可以了。

========================
http://genome.ucsc.edu/admin/jk-install.html

wget http://hgdownload.cse.ucsc.edu/admin/hgcentral.sql
mysql -u wangk -p -e "create database hgcentral"
mysql -u wangk -p hgcentral < hgcentral.sql
mysql -u wangk -p -e "create database hgFixed"
hgsql 'hgcentral' -e 'show tables;'
#mysql -u root -p -e "grant all privileges on hgcentral.* to 'wangk'@'localhost'"
#mysql -u root -p -e "create database hgFixed"
#mysql -u root -p -e "grant select on hgFixed.* to 'wangk'@'localhost'"

Human Genome

faToTwoBit GRCH38P14G.fa /var/www/ucsc/gbdb/human/H38P14/GRCH38P14G.2bit
hgsql "" -e "create database if not exists GRCH38P14G"
hgLoadSeq GRCH38P14G GRCH38P14G.fa

#genePredToGtf hg18 knownGene knownGene.gtf

cd /home/wangk/ucsc/kent/src/hg/lib
hgsql GRCH38P14G < grp.sql

faSize -detailed GRCH38P14G.fa > chrominfo.tab
hgsql GRCH38P14G < /home/wangk/ucsc/kent/src/hg/lib/chromInfo.sql
hgsql GRCH38P14G -e 'load data local infile "chrominfo.tab" into table chromInfo;'
hgsql GRCH38P14G -e 'update chromInfo set fileName = "/var/www/ucsc/gbdb/human/H38P14/GRCH38P14G.2bit"'

#http://ucscbrowser.genenetwork.org/FAQ/FAQlicense.html
#http://genomewiki.ucsc.edu/index.php/Building_a_new_genome_database

hgsql 'hgcentral' -e 'INSERT INTO dbDb (name, description, nibPath, organism, defaultPos, active, orderKey, genome, scientificName, htmlPath, hgNearOk, hgPbOk, sourceName, taxId) VALUES ("GRCH38P14G", "September 26, 2016 (GRCh38.p9)", "/var/www/ucsc/gbdb/human/H38P14/", "Human", "NC_000001:1-20000", 1, 7750, "Human", "Homo sapiens", "/var/www/ucsc/gbdb/human/H38P14/html/description.html", 1, 0, "GRCh38 patch 9 Genome Reference Consortium Human Reference 38", 9606);'

hgsql 'hgcentral' -e 'INSERT INTO defaultDb (genome, name) VALUES("Human", "GRCH38P14G");'
hgsql 'hgcentral' -e 'INSERT INTO genomeClade (genome, clade, priority) VALUES("Human", "mammal", 1);'
hgsql 'hgcentral' -e 'INSERT INTO clade (name, label, priority) VALUES("mammal", "Mammal", 5);'



#gmap_build -k 7 -d GRCH38P14G GRCH38P14G.fa
#gmap --cross-species -d GRCH38P14G -D /home/wangk/.progs/gmap/share/GRCH38P14G -B 1 -n 1 -A -t 4 -O -f 2 GRCH38P14T.fa > GRCH38P14T.gff

perl ucsc_gff.pl GCF_000001405.35_GRCh38.p9_genomic.gff > GRCH38P14G.gff

#sed -e 's/\.mrna[0-9].*$//g' GRCH38P14G.gff | sed -e 's/\.path[0-9].*$//g' | sed -e 's/ID=//g'  > GRCH38P14G.gff.1
#awk '$3 == "gene" || $3 == "mRNA" || $3 == "CDS" {print}' OIKONorway.gff3 > OIKONorway.gff3.1

#  sed -e 's/\.exon[0-9].*$//g' Hr_augustus_step2_20140415.gff3| sed -e 's/\.T[0-9].*$//g' |  sed -e 's/ID=//g' > augustus.gff3
ldHgGene GRCH38P14G refgenes GRCH38P14G.gff
hgLoadBed -trimSqlTable GRCH38P14G newTrack newTrack.bed

samtools merge Egg.bam Egg-*/accepted_hits.bam
samtools merge Blastula.bam Blastula-*/accepted_hits.bam

samtools sort Egg.bam Egg.SORTED.bam
samtools sort Blastula.bam Blastula.SORTED.bam

genomeCoverageBed -bg -ibam Egg.SORTED.bam -g chrominfo.tab -split > Egg.BEDGRAPH
bedGraphToBigWig Egg.BEDGRAPH chrominfo.tab /gbdb/GRCH38P14G/BigWig/Egg.BW

genomeCoverageBed -bg -ibam Blastula.SORTED.bam -g chrominfo.tab -split > Blastula.BEDGRAPH
bedGraphToBigWig Blastula.BEDGRAPH chrominfo.tab /gbdb/GRCH38P14G/BigWig/Blastula.BW

hgBbiDbLink GRCH38P14G Egg /gbdb/GRCH38P14G/BigWig/Egg.bw
hgBbiDbLink GRCH38P14G Blastula /gbdb/GRCH38P14G/BigWig/Blastula.bw

hgsql 'hgcentral' -e 'INSERT INTO blatServers (db, host, port, isTrans, canPcr) VALUES ("GRCH38P14G", "10.10.64.178", 20001, 0, 1);'
hgsql 'hgcentral' -e 'INSERT INTO blatServers (db, host, port, isTrans, canPcr) VALUES ("GRCH38P14G", "10.10.64.178", 20002, 1, 0);'


mkdir -p /home/wangk/ucsc/kent/src/hg/makeDb/trackDb/GRCH38P14G
cd /home/wangk/ucsc/kent/src/hg/makeDb/trackDb

#hgTrackDb [options] org database trackDb trackDb.sql hgRoot
hgTrackDb -strict GRCH38P14G GRCH38P14G trackDb /home/wangk/ucsc/kent/src/hg/lib/trackDb.sql .
hgFindSpec -strict GRCH38P14G GRCH38P14G hgFindSpec /home/wangk/ucsc/kent/src/hg/lib/hgFindSpec.sql .

hgTrackDb -strict . GRCH38P14G trackDb /home/wangk/ucsc/kent/src/hg/lib/trackDb.sql .
hgFindSpec -strict . GRCH38P14G hgFindSpec /home/wangk/ucsc/kent/src/hg/lib/hgFindSpec.sql .

gfServer -tileSize=7 -canStop start 10.10.64.178 20001 -stepSize=5 GRCH38P14G.2bit &
gfServer -canStop start 10.10.64.178 20002 -trans GRCH38P14G.2bit &


UCSC格式转化:

#ucsc_gff.pl

#!/usr/bin/perl
die "Usage: perl $0 GCF_000001405.35_GRCh38.p9_genomic.gff > GRCH38P9G.gff\n" unless (@ARGV == 1);

my %hash = ();
my @array = ();
my $name = "";

open (GFF, "$ARGV[0]") || die "cannot open $ARGV[0]\n";
while(<GFF>){
  chomp;
  my $s = $_;
  next if ($s =~ /^\#/);
  
  @array = ();
  @array = split(/\t/, $s);
  my ($scaffold, $tpye, $p_s, $p_e, $strand, $anno) = ($array[0], $array[2], $array[3], $array[4], $array[6], $array[8]); #gene, CDS, mRNA
  #NC_000001.11    BestRefSeq      transcript      11874   14409   .       +       .       ID=rna0;Parent=gene0;Dbxref=GeneID:100287102,Genbank:NR_046018.2,HGNC:HGNC:37102;Name=NR_046018.2;gbkey=misc_RNA;gene=DDX11L1;product=DEAD/H-box helicase 11 like 1;transcript_id=NR_046018.2
  #NC_000001.11    BestRefSeq      exon    11874   12227   .       +       .       ID=id1;Parent=rna0;Dbxref=GeneID:100287102,Genbank:NR_046018.2,HGNC:HGNC:37102;gbkey=misc_RNA;gene=DDX11L1;product=DEAD/H-box helicase 11 like 1;transcript_id=NR_046018.2
  #NC_000001.11    Gnomon  mRNA    586287  611297  .       -       .       ID=rna49;Parent=gene27;Dbxref=GeneID:105378947,Genbank:XM_011542538.1;Name=XM_011542538.1;gbkey=mRNA;gene=LOC105378947;model_evidence=Supporting evidence includes similarity to: 91%25 coverage of the annotated genomic feature by RNAseq alignments;product=proline-rich extensin-like protein EPR1%2C transcript variant X1;transcript_id=XM_011542538.1
  #NC_000001.11    Gnomon  exon    611112  611297  .       -       .       ID=id279;Parent=rna49;Dbxref=GeneID:105378947,Genbank:XM_011542538.1;gbkey=mRNA;gene=LOC105378947;product=proline-rich extensin-like protein EPR1%2C transcript variant X1;transcript_id=XM_011542538.1
  #NC_000001.11    Gnomon  CDS     611112  611297  .       -       0       ID=cds7;Parent=rna49;Dbxref=GeneID:105378947,Genbank:XP_011540840.1;Name=XP_011540840.1;gbkey=CDS;gene=LOC105378947;product=proline-rich extensin-like protein EPR1 isoform X1;protein_id=XP_011540840.1
  
  next unless (($tpye eq "gene") || ($tpye eq "mRNA") || ($tpye eq "transcript") || ($tpye eq "exon") || ($tpye eq "CDS") || ($tpye eq "lnc_RNA") || ($tpye eq "ncRNA"));
  
  if ($tpye eq "gene"){
    $name = "";
    next;
  }
  
  if ($anno =~ /^ID\=(rna\d+)\;Parent\=(gene\d+);/){
    $name = "$2_$1";
    if ($anno =~ /\;gene\=(\S+)/){
      my $gene = $1;
      $gene =~ s/\;.*$//g;
      $name .= "_$gene";
    }
  }
  next if ($name eq "");
  
  $scaffold =~ s/\.\d+$//g;
  print "$scaffold\t$array[1]\t$array[2]\t$array[3]\t$array[4]\t$array[5]\t$array[6]\t$array[7]\t$name\n";
  
}
close(GFF);

转载请注明:XAMPP中文组官网 » 在CentOS Linux操作系统上安装UCSC基因组浏览器

您必须 登录 才能发表评论!