Commit 53d59b27 authored by Tizian Schulz's avatar Tizian Schulz
Browse files

Merge branch 'dev' into 'master'

Workflow Extension

See merge request !1
parents e10da5e3 3380e3aa
......@@ -2,7 +2,6 @@ CopyG.cpp
Corer
Corer_expRt
Corer_polyRt
makefile
test/MyTest
MyTest*
test/testCoreGraph.*
......@@ -11,3 +10,4 @@ bugRep*
*.txt
long*
small*
test/makefile
......@@ -3,11 +3,38 @@ configfile: 'config.yaml'
from glob import glob
from os.path import basename
def getAccessionNames(sp):
accNames = []
for l in open(glob("%s/genomeSequences/*.txt" %sp)[0], 'r'):
splittedAccName = basename(l.strip()).split('.')
if len(splittedAccName) > 1:
accNames.append('.'.join(splittedAccName[:-1]))
else:
accNames.append(splittedAccName[0])
return accNames
def getGffFiles(sp):
return ["%s/annotations/%s.gff" %(sp, basename(g).split('.')[0]) for g in glob("%s/genomeSequences/*.fasta" %sp)]
return ["%s/annotations/%s.gff" %(sp, a) for a in getAccessionNames(sp)]
def getAssemblies(wcs):
return ["%s/genomeSequences/%s" %(wcs.spec, basename(g)) for g in glob("%s/genomeSequences/*.fasta" %wcs.spec)]
if wcs.sp == "arabidopsis":
gList = ["%s/genomeSequences/%s.fas" %(wcs.sp, a) for a in getAccessionNames(wcs.sp)]
else:
gList = ["%s/genomeSequences/%s.fasta" %(wcs.sp, a) for a in getAccessionNames(wcs.sp)]
if wcs.gtype.find("wotSf") > -1:
gList.remove("arabidopsis/genomeSequences/sf_2.v7.PR_in_lowercase.fas")
return gList
def listGenomes(sp):
return ["%s/genomeSequences/%s.fasta" %(sp, a.strip()) for a in open("%s/genomeSequences/accessions.txt" %sp, 'r')]
def getReadSets(acc):
return ["arabidopsis/reads/" + basename(f) for l in open("arabidopsis/reads/filereport_read_run_PRJEB2457_tsv.txt", 'r') if l.find(str(acc).capitalize()) > -1 for f in l.split('\t')[6].split(';')]
rule all:
input:
......@@ -20,26 +47,143 @@ rule all:
expand("listeriaMonocytogenes/graphs/assemblyGraph_k17_Core_m250_d{d}.{gFSuf}", d=config['dlts'], gFSuf=config['gFileSuffixes']),
expand("listeriaMonocytogenes/benchmarks/benchmark_assemblyGraph_k17_Core_m250_d{d}.txt", d=config['dlts'] + [100, 120, 140, 160, \
180, 200, 220, 240]),
expand("{s}/panarooRes/gene_presence_absence.csv", s=config['species']),
expand("{s}/panarooRes/benchmark.txt", s=config['species']),
expand("{s}/sibeliaz/blocks_coords.gff", s=config['species']),
expand("{s}/sibeliaz/benchmark.txt", s=config['species'])
expand("arabidopsis/graphs/assemblyGraph_k21_Core_m18_d60.{gFSuf}", gFSuf=config['gFileSuffixes']),
"arabidopsis/benchmarks/benchmark_assemblyGraph_k21_Core_m18_d60.txt",
expand("arabidopsis/graphs/assemblyGraphWotSf_k21_Core_m17_d60.{gFSuf}", gFSuf=config['gFileSuffixes']),
"arabidopsis/benchmarks/benchmark_assemblyGraphWotSf_k21_Core_m17_d60.txt",
expand("arabidopsis/graphs/readGraph_k21_Core_m17_d60.{gFSuf}", gFSuf=config['gFileSuffixes']),
"arabidopsis/benchmarks/benchmark_readGraph_k21_Core_m17_d60.txt",
expand("{s}/panarooRes/gene_presence_absence.csv", s=config['prokaryotes']),
expand("{s}/panarooRes/benchmark.txt", s=config['prokaryotes']),
expand("{s}/sibeliaz/assemblyGraph/blocks_coords.gff", s=config['prokaryotes'] + config['eukaryotes']),
expand("{s}/sibeliaz/assemblyGraph/benchmark.txt", s=config['prokaryotes'] + config['eukaryotes']),
"arabidopsis/sibeliaz/assemblyGraphWotSf/blocks_coords.gff",
"arabidopsis/sibeliaz/assemblyGraphWotSf/benchmark.txt"
rule annotateGenome:
input:
"{sp}/genomeSequences/{genomeId}.fasta"
output:
expand("{sp}/annotations/{genomeId}.{suf}", sp="{sp}", genomeId="{genomeId}", suf=config['prokkaOutputFileSuffixes'])
threads:
workflow.cores
# wildcard_constraints:
# genomeId="[N,C,L,H,A]+.*"
shell:
"mkdir -p {wildcards.sp}/annotations; %s/prokka --force --outdir {wildcards.sp}/annotations --prefix {wildcards.genomeId} --cpus {threads} {input}" %config['prokka_bin']
rule annotatePlantGenome:
input:
"{sp}/genomeSequences/{genomeId}.fas"
output:
"{sp}/annotations/{genomeId}.gff"
wildcard_constraints:
genomeId="[a-z]+"
shell:
"mkdir -p {wildcards.sp}/annotations; %s/augustus --species=arabidopsis {input} > {output}" %config['augustus_bin']
rule calculateCore:
input:
expand("{sp}/graphs/{t}Graph_k{k}.{suf}", sp='{sp}', t='{gtype}', k='{k}', suf=config['gFileSuffixes'])
expand("{sp}/graphs/{t}_k{k}.{suf}", sp='{sp}', t='{gtype}', k='{k}', suf=config['gFileSuffixes'])
params:
qrm = "{qrm}",
dlt = "{dlt}"
output:
grph = expand("{sp}/graphs/{t}Graph_k{k}_Core_m{qrm}_d{dlt}.{gFSuf}", sp='{sp}', t='{gtype}', k='{k}', qrm='{qrm}', dlt='{dlt}', \
grph = expand("{sp}/graphs/{t}_k{k}_Core_m{qrm}_d{dlt}.{gFSuf}", sp='{sp}', t='{gtype}', k='{k}', qrm='{qrm}', dlt='{dlt}', \
gFSuf=config['gFileSuffixes']),
bnchmrk = "{sp}/benchmarks/benchmark_{gtype}Graph_k{k}_Core_m{qrm}_d{dlt}.txt"
bnchmrk = "{sp}/benchmarks/benchmark_{gtype}_k{k}_Core_m{qrm}_d{dlt}.txt"
shell:
"mkdir -p {wildcards.sp}/benchmarks;" +
" /usr/bin/time -v %s/Corer -q {params.qrm} -d {params.dlt} -i $(echo {input} | cut -d'.' -f1) " %config['corer_bin_dir'] +
"-o $(echo {output.grph} | cut -d'.' -f1) 2> {output.bnchmrk}"
rule buildPangenomeGraph:
input:
"{sp}/{gtype}PangenomeList.txt"
threads:
8
output:
expand("{sp}/graphs/{g}_k{k}.{gFSuf}", sp='{sp}', g='{gtype}', k='{k}', gFSuf=config['gFileSuffixes'])
wildcard_constraints:
k = "[0-9]+",
gtype = "[a-z,A-Z]+"
shell:
"mkdir -p {wildcards.sp}/graphs; Bifrost build -r {input} -o $(echo {output} | cut -d'.' -f1) -t {threads} -v -c -k {wildcards.k}"
rule createReadGraphList:
input:
["arabidopsis/reads/%s.fasta" %basename(l).split('_')[0] for l in open("arabidopsis/genomeSequences/links.txt", 'r') if l.find("sf") < 0]
output:
"arabidopsis/readGraphPangenomeList.txt"
shell:
"ls -l {input} | tr -s ' ' | cut -d' ' -f9 > {output}"
rule extractGraphSeqs:
input:
"arabidopsis/graphs/readGraph_{acc}_k21.gfa"
output:
"arabidopsis/reads/{acc}.fasta"
shell:
"python3 scripts/extractGFAseqs.py {input} > {output}"
rule buildGenomeGraph:
input:
"arabidopsis/{acc}ReadList.txt"
threads:
8
output:
"arabidopsis/graphs/readGraph_{acc}_k21.gfa"
shell:
"mkdir -p arabidopsis/graphs; Bifrost build -s {input} -o $(echo {output} | cut -d'.' -f1) -t {threads} -v -k 21"
rule createReadList:
input:
getReadSets
output:
"arabidopsis/{acc}ReadList.txt"
shell:
"ls -l {input} | tr -s ' ' | cut -d' ' -f9 > {output}"
rule downloadReadSets:
input:
"arabidopsis/reads/filereport_read_run_PRJEB2457_tsv.txt"
threads:
workflow.cores
output:
["arabidopsis/reads/" + basename(f) for l in open("arabidopsis/reads/filereport_read_run_PRJEB2457_tsv.txt", 'r') for f in l.split('\t')[6].split(';')]
shell:
"wget $(python3 scripts/getReadLocations.py {input}); mv *.fastq.gz arabidopsis/reads"
rule createAssemblyList:
input:
getAssemblies
output:
temp("{sp}/{gtype}PangenomeList.txt")
# wildcard_constraints:
# gtype="[a-z,A,P,F,M]+"
shell:
"ls -l {input} | tr -s ' ' | cut -d' ' -f9 > {output}"
rule getNCBI_Genomes:
input:
"{sp}/genomeSequences/accessions.txt"
threads:
workflow.cores
output:
"{sp}/genomeSequences/{acc}.fasta"
shell:
"python3 scripts/get_fastas_for_acc_list.py {input}"
rule getArabidopsisGenomes:
input:
"arabidopsis/genomeSequences/links.txt"
threads:
workflow.cores
output:
expand("arabidopsis/genomeSequences/{g}", g=[basename(u.strip()) for u in open("arabidopsis/genomeSequences/links.txt", 'r')])
shell:
"for u in $(cat {input}); do wget $u; done; mv *.fas arabidopsis/genomeSequences"
rule runPanaroo:
input:
getGffFiles
......@@ -66,8 +210,8 @@ rule runSibeliaZ:
input:
getAssemblies
output:
res = "{spec}/sibeliaz/blocks_coords.gff",
benchmark = "{spec}/sibeliaz/benchmark.txt"
res = "{sp}/sibeliaz/{gtype}/blocks_coords.gff",
benchmark = "{sp}/sibeliaz/{gtype}/benchmark.txt"
shell:
"mkdir -p {wildcards.spec}/sibeliaz; /usr/bin/time -v %s/sibeliaz -n -o {wildcards.spec}/sibeliaz " %config['sibeliaz_bin'] + \
"mkdir -p {wildcards.sp}/sibeliaz {wildcards.sp}/sibeliaz/{wildcards.gtype}; /usr/bin/time -v %s/sibeliaz -n -o {wildcards.sp}/sibeliaz/{wildcards.gtype} " %config['sibeliaz_bin'] + \
"{input} 2> {output.benchmark}"
\ No newline at end of file
http://mtweb.cs.ucl.ac.uk/mus/www/19genomes/fasta/MASKED/bur_0.v7.PR_in_lowercase.fas
http://mtweb.cs.ucl.ac.uk/mus/www/19genomes/fasta/MASKED/can_0.v7.PR_in_lowercase.fas
http://mtweb.cs.ucl.ac.uk/mus/www/19genomes/fasta/MASKED/ct_1.v7.PR_in_lowercase.fas
http://mtweb.cs.ucl.ac.uk/mus/www/19genomes/fasta/MASKED/edi_0.v7.PR_in_lowercase.fas
http://mtweb.cs.ucl.ac.uk/mus/www/19genomes/fasta/MASKED/hi_0.v7.PR_in_lowercase.fas
http://mtweb.cs.ucl.ac.uk/mus/www/19genomes/fasta/MASKED/kn_0.v7.PR_in_lowercase.fas
http://mtweb.cs.ucl.ac.uk/mus/www/19genomes/fasta/MASKED/ler_0.v7.PR_in_lowercase.fas
http://mtweb.cs.ucl.ac.uk/mus/www/19genomes/fasta/MASKED/mt_0.v7.PR_in_lowercase.fas
http://mtweb.cs.ucl.ac.uk/mus/www/19genomes/fasta/MASKED/no_0.v7.PR_in_lowercase.fas
http://mtweb.cs.ucl.ac.uk/mus/www/19genomes/fasta/MASKED/oy_0.v7.PR_in_lowercase.fas
http://mtweb.cs.ucl.ac.uk/mus/www/19genomes/fasta/MASKED/po_0.v7.PR_in_lowercase.fas
http://mtweb.cs.ucl.ac.uk/mus/www/19genomes/fasta/MASKED/rsch_4.v7.PR_in_lowercase.fas
http://mtweb.cs.ucl.ac.uk/mus/www/19genomes/fasta/MASKED/sf_2.v7.PR_in_lowercase.fas
http://mtweb.cs.ucl.ac.uk/mus/www/19genomes/fasta/MASKED/tsu_0.v7.PR_in_lowercase.fas
http://mtweb.cs.ucl.ac.uk/mus/www/19genomes/fasta/MASKED/wil_2.v7.PR_in_lowercase.fas
http://mtweb.cs.ucl.ac.uk/mus/www/19genomes/fasta/MASKED/ws_0.v7.PR_in_lowercase.fas
http://mtweb.cs.ucl.ac.uk/mus/www/19genomes/fasta/MASKED/wu_0.v7.PR_in_lowercase.fas
http://mtweb.cs.ucl.ac.uk/mus/www/19genomes/fasta/MASKED/zu_0.v7.PR_in_lowercase.fas
study_accession sample_accession experiment_accession run_accession tax_id scientific_name fastq_ftp submitted_ftp sra_ftp
PRJEB2457 SAMEA1027642 ERX011733 ERR031531 3702 Arabidopsis thaliana ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031531/ERR031531_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031531/ERR031531_2.fastq.gz ftp.sra.ebi.ac.uk/vol1/run/ERR031/ERR031531/Bur_0_bur_PII.bam ftp.sra.ebi.ac.uk/vol1/err/ERR031/ERR031531
PRJEB2457 SAMEA1027636 ERX011735 ERR031532 3702 Arabidopsis thaliana ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031532/ERR031532_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031532/ERR031532_2.fastq.gz ftp.sra.ebi.ac.uk/vol1/run/ERR031/ERR031532/Can_0_can_phaseI.bam ftp.sra.ebi.ac.uk/vol1/err/ERR031/ERR031532
PRJEB2457 SAMEA1027636 ERX011734 ERR031533 3702 Arabidopsis thaliana ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031533/ERR031533.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031533/ERR031533_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031533/ERR031533_2.fastq.gz ftp.sra.ebi.ac.uk/vol1/run/ERR031/ERR031533/Can_0_can_PII.bam ftp.sra.ebi.ac.uk/vol1/err/ERR031/ERR031533
PRJEB2457 SAMEA1027631 ERX011737 ERR031534 3702 Arabidopsis thaliana ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031534/ERR031534_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031534/ERR031534_2.fastq.gz ftp.sra.ebi.ac.uk/vol1/run/ERR031/ERR031534/Ct_1_ct_phaseI.bam ftp.sra.ebi.ac.uk/vol1/err/ERR031/ERR031534
PRJEB2457 SAMEA1027631 ERX011736 ERR031535 3702 Arabidopsis thaliana ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031535/ERR031535.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031535/ERR031535_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031535/ERR031535_2.fastq.gz ftp.sra.ebi.ac.uk/vol1/run/ERR031/ERR031535/Ct_1_ct_PII.bam ftp.sra.ebi.ac.uk/vol1/err/ERR031/ERR031535
PRJEB2457 SAMEA1027637 ERX011739 ERR031536 3702 Arabidopsis thaliana ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031536/ERR031536_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031536/ERR031536_2.fastq.gz ftp.sra.ebi.ac.uk/vol1/run/ERR031/ERR031536/Edi_0_edi_phaseI.bam ftp.sra.ebi.ac.uk/vol1/err/ERR031/ERR031536
PRJEB2457 SAMEA1027637 ERX011738 ERR031537 3702 Arabidopsis thaliana ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031537/ERR031537.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031537/ERR031537_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031537/ERR031537_2.fastq.gz ftp.sra.ebi.ac.uk/vol1/run/ERR031/ERR031537/Edi_0_edi_PII.bam ftp.sra.ebi.ac.uk/vol1/err/ERR031/ERR031537
PRJEB2457 SAMEA1027635 ERX011740 ERR031538 3702 Arabidopsis thaliana ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031538/ERR031538_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031538/ERR031538_2.fastq.gz ftp.sra.ebi.ac.uk/vol1/run/ERR031/ERR031538/Hi_0_hi_amplified.bam ftp.sra.ebi.ac.uk/vol1/err/ERR031/ERR031538
PRJEB2457 SAMEA1027635 ERX011741 ERR031539 3702 Arabidopsis thaliana ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031539/ERR031539_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031539/ERR031539_2.fastq.gz ftp.sra.ebi.ac.uk/vol1/run/ERR031/ERR031539/Hi_0_hi_nonamp.bam ftp.sra.ebi.ac.uk/vol1/err/ERR031/ERR031539
PRJEB2457 SAMEA1027635 ERX011742 ERR031540 3702 Arabidopsis thaliana ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031540/ERR031540.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031540/ERR031540_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031540/ERR031540_2.fastq.gz ftp.sra.ebi.ac.uk/vol1/run/ERR031/ERR031540/Hi_0_hi_phaseI.bam ftp.sra.ebi.ac.uk/vol1/err/ERR031/ERR031540
PRJEB2457 SAMEA1027629 ERX011744 ERR031541 3702 Arabidopsis thaliana ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031541/ERR031541.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031541/ERR031541_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031541/ERR031541_2.fastq.gz ftp.sra.ebi.ac.uk/vol1/run/ERR031/ERR031541/Kn_0_kn_phaseI.bam ftp.sra.ebi.ac.uk/vol1/err/ERR031/ERR031541
PRJEB2457 SAMEA1027629 ERX011743 ERR031542 3702 Arabidopsis thaliana ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031542/ERR031542_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031542/ERR031542_2.fastq.gz ftp.sra.ebi.ac.uk/vol1/run/ERR031/ERR031542/Kn_0_kn_PII.bam ftp.sra.ebi.ac.uk/vol1/err/ERR031/ERR031542
PRJEB2457 SAMEA1027626 ERX011746 ERR031543 3702 Arabidopsis thaliana ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031543/ERR031543_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031543/ERR031543_2.fastq.gz ftp.sra.ebi.ac.uk/vol1/run/ERR031/ERR031543/Ler_0_ler_phaseI.bam ftp.sra.ebi.ac.uk/vol1/err/ERR031/ERR031543
PRJEB2457 SAMEA1027626 ERX011745 ERR031544 3702 Arabidopsis thaliana ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031544/ERR031544_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031544/ERR031544_2.fastq.gz ftp.sra.ebi.ac.uk/vol1/run/ERR031/ERR031544/Ler_0_ler_PII.bam ftp.sra.ebi.ac.uk/vol1/err/ERR031/ERR031544
PRJEB2457 SAMEA1027638 ERX011748 ERR031545 3702 Arabidopsis thaliana ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031545/ERR031545_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031545/ERR031545_2.fastq.gz ftp.sra.ebi.ac.uk/vol1/run/ERR031/ERR031545/Mt_0_mt_phaseI.bam ftp.sra.ebi.ac.uk/vol1/err/ERR031/ERR031545
PRJEB2457 SAMEA1027638 ERX011747 ERR031546 3702 Arabidopsis thaliana ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031546/ERR031546_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031546/ERR031546_2.fastq.gz ftp.sra.ebi.ac.uk/vol1/run/ERR031/ERR031546/Mt_0_mt_PII.bam ftp.sra.ebi.ac.uk/vol1/err/ERR031/ERR031546
PRJEB2457 SAMEA1027641 ERX011750 ERR031547 3702 Arabidopsis thaliana ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031547/ERR031547.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031547/ERR031547_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031547/ERR031547_2.fastq.gz ftp.sra.ebi.ac.uk/vol1/run/ERR031/ERR031547/No_0_no_phaseI.bam ftp.sra.ebi.ac.uk/vol1/err/ERR031/ERR031547
PRJEB2457 SAMEA1027641 ERX011749 ERR031548 3702 Arabidopsis thaliana ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031548/ERR031548_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031548/ERR031548_2.fastq.gz ftp.sra.ebi.ac.uk/vol1/run/ERR031/ERR031548/No_0_no_PII.bam ftp.sra.ebi.ac.uk/vol1/err/ERR031/ERR031548
PRJEB2457 SAMEA1027627 ERX011752 ERR031549 3702 Arabidopsis thaliana ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031549/ERR031549_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031549/ERR031549_2.fastq.gz ftp.sra.ebi.ac.uk/vol1/run/ERR031/ERR031549/Oy_0_oy_phaseI.bam ftp.sra.ebi.ac.uk/vol1/err/ERR031/ERR031549
PRJEB2457 SAMEA1027627 ERX011751 ERR031550 3702 Arabidopsis thaliana ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031550/ERR031550.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031550/ERR031550_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031550/ERR031550_2.fastq.gz ftp.sra.ebi.ac.uk/vol1/run/ERR031/ERR031550/Oy_0_oy_PII.bam ftp.sra.ebi.ac.uk/vol1/err/ERR031/ERR031550
PRJEB2457 SAMEA1027640 ERX011754 ERR031551 3702 Arabidopsis thaliana ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031551/ERR031551_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031551/ERR031551_2.fastq.gz ftp.sra.ebi.ac.uk/vol1/run/ERR031/ERR031551/Po_0_po_phaseI.bam ftp.sra.ebi.ac.uk/vol1/err/ERR031/ERR031551
PRJEB2457 SAMEA1027640 ERX011753 ERR031552 3702 Arabidopsis thaliana ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031552/ERR031552.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031552/ERR031552_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031552/ERR031552_2.fastq.gz ftp.sra.ebi.ac.uk/vol1/run/ERR031/ERR031552/Po_0_po_PII.bam ftp.sra.ebi.ac.uk/vol1/err/ERR031/ERR031552
PRJEB2457 SAMEA1027634 ERX011756 ERR031553 3702 Arabidopsis thaliana ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031553/ERR031553_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031553/ERR031553_2.fastq.gz ftp.sra.ebi.ac.uk/vol1/run/ERR031/ERR031553/Rsch_4_rsch_phaseI.bam ftp.sra.ebi.ac.uk/vol1/err/ERR031/ERR031553
PRJEB2457 SAMEA1027634 ERX011755 ERR031554 3702 Arabidopsis thaliana ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031554/ERR031554.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031554/ERR031554_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031554/ERR031554_2.fastq.gz ftp.sra.ebi.ac.uk/vol1/run/ERR031/ERR031554/Rsch_4_rsch_PII.bam ftp.sra.ebi.ac.uk/vol1/err/ERR031/ERR031554
PRJEB2457 SAMEA1027630 ERX011758 ERR031555 3702 Arabidopsis thaliana ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031555/ERR031555_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031555/ERR031555_2.fastq.gz ftp.sra.ebi.ac.uk/vol1/run/ERR031/ERR031555/Tsu_0_tsu_phaseI.bam ftp.sra.ebi.ac.uk/vol1/err/ERR031/ERR031555
PRJEB2457 SAMEA1027630 ERX011757 ERR031556 3702 Arabidopsis thaliana ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031556/ERR031556.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031556/ERR031556_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031556/ERR031556_2.fastq.gz ftp.sra.ebi.ac.uk/vol1/run/ERR031/ERR031556/Tsu_0_tsu_PII.bam ftp.sra.ebi.ac.uk/vol1/err/ERR031/ERR031556
PRJEB2457 SAMEA1027628 ERX011760 ERR031557 3702 Arabidopsis thaliana ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031557/ERR031557_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031557/ERR031557_2.fastq.gz ftp.sra.ebi.ac.uk/vol1/run/ERR031/ERR031557/Wil_2_wil2_phaseI.bam ftp.sra.ebi.ac.uk/vol1/err/ERR031/ERR031557
PRJEB2457 SAMEA1027628 ERX011759 ERR031558 3702 Arabidopsis thaliana ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031558/ERR031558.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031558/ERR031558_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031558/ERR031558_2.fastq.gz ftp.sra.ebi.ac.uk/vol1/run/ERR031/ERR031558/Wil_2_wil2_PII.bam ftp.sra.ebi.ac.uk/vol1/err/ERR031/ERR031558
PRJEB2457 SAMEA1027632 ERX011762 ERR031559 3702 Arabidopsis thaliana ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031559/ERR031559_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031559/ERR031559_2.fastq.gz ftp.sra.ebi.ac.uk/vol1/run/ERR031/ERR031559/Ws_0_ws_phaseI.bam ftp.sra.ebi.ac.uk/vol1/err/ERR031/ERR031559
PRJEB2457 SAMEA1027632 ERX011761 ERR031560 3702 Arabidopsis thaliana ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031560/ERR031560_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031560/ERR031560_2.fastq.gz ftp.sra.ebi.ac.uk/vol1/run/ERR031/ERR031560/Ws_0_ws_PII.bam ftp.sra.ebi.ac.uk/vol1/err/ERR031/ERR031560
PRJEB2457 SAMEA1027632 ERX011763 ERR031561 3702 Arabidopsis thaliana ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031561/ERR031561_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031561/ERR031561_2.fastq.gz ftp.sra.ebi.ac.uk/vol1/run/ERR031/ERR031561/Ws_0_ws_tsl.bam ftp.sra.ebi.ac.uk/vol1/err/ERR031/ERR031561
PRJEB2457 SAMEA1027633 ERX011765 ERR031562 3702 Arabidopsis thaliana ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031562/ERR031562.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031562/ERR031562_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031562/ERR031562_2.fastq.gz ftp.sra.ebi.ac.uk/vol1/run/ERR031/ERR031562/Wu_0_wu_phaseI.bam ftp.sra.ebi.ac.uk/vol1/err/ERR031/ERR031562
PRJEB2457 SAMEA1027633 ERX011764 ERR031563 3702 Arabidopsis thaliana ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031563/ERR031563_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031563/ERR031563_2.fastq.gz ftp.sra.ebi.ac.uk/vol1/run/ERR031/ERR031563/Wu_0_wu_PII.bam ftp.sra.ebi.ac.uk/vol1/err/ERR031/ERR031563
PRJEB2457 SAMEA1027639 ERX011767 ERR031564 3702 Arabidopsis thaliana ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031564/ERR031564_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031564/ERR031564_2.fastq.gz ftp.sra.ebi.ac.uk/vol1/run/ERR031/ERR031564/Zu_0_zu_phaseI.bam ftp.sra.ebi.ac.uk/vol1/err/ERR031/ERR031564
PRJEB2457 SAMEA1027639 ERX011766 ERR031565 3702 Arabidopsis thaliana ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031565/ERR031565_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/ERR031/ERR031565/ERR031565_2.fastq.gz ftp.sra.ebi.ac.uk/vol1/run/ERR031/ERR031565/Zu_0_zu_PII.bam ftp.sra.ebi.ac.uk/vol1/err/ERR031/ERR031565
CP007522
CP007755
CP009045
CP010433
CP015407
CP017098
NC_011835
NC_012814
NC_012815
NC_017214
NC_017215
NC_017216
NC_017217
NC_017834
NC_017866
NC_017867
NC_021593
NC_022523
# PLEASE ADJUST THE FOLLOWING PARAMETERS --------------------------------------
#Program binaries that shall be used
corer_bin_dir: ".."
panaroo_bin: "path/to/panaroo/bin"
sibeliaz_bin: "path/to/sibeliaz/bin"
corer_bin_dir: "../src"
panaroo_bin: "/path/to/panaroo/bin"
sibeliaz_bin: "/path/to/sibeliaz/bin"
#Program binaries of gene annotation softwares
prokka_bin: "/path/to/prokka/bin"
augustus_bin: "/path/to/augustus/bin"
#------------------------------------------------------------------------------
......@@ -15,4 +18,7 @@ dlts: [20, 40, 60, 80]
#Bifrost graph file suffixes
gFileSuffixes: ["gfa", "bfg_colors"]
#Spezies to analyze
species: ["bifidobacteriumAnimalis", "enterococcusFaecium", "yersiniaPestis", "listeriaMonocytogenes"]
\ No newline at end of file
prokaryotes: ["bifidobacteriumAnimalis", "enterococcusFaecium", "yersiniaPestis", "listeriaMonocytogenes"]
eukaryotes: ["arabidopsis"]
#Output file suffixes of prokka
prokkaOutputFileSuffixes: ["err", "faa", "ffn", "fna", "fsa", "gbk", "gff", "log", "sqn", "tbl", "tsv", "txt"]
\ No newline at end of file
AP019394
AP022341
CP006030
CP011281
CP011828
CP012430
CP012436
CP012440
CP012447
CP012454
CP012460
CP012465
CP012471
CP012522
CP013009
CP013994
CP014449
CP014529
CP016163
CP017787
CP017792
CP017797
CP018065
CP018071
CP018128
CP018828
CP018830
CP019208
CP019970
CP019988
CP019992
CP020484
CP020488
CP021849
CP021885
CP023423
CP023780
CP023784
CP023789
CP023794
CP023799
CP023804
CP023808
CP025022
CP025077
CP025389
CP025392
CP025425
CP025685
CP025754
CP027402
CP027497
CP027501
CP027506
CP027512
CP027517
CP028727
CP030110
CP032308
CP033041
CP033206
CP033376
CP034949
CP035136
CP035220
CP035648
CP035654
CP035660
CP035666
CP036151
CP038995
CP038996
CP039729
CP040236
CP040368
CP040706
CP040740
CP040849
CP040875
CP040878
CP040904
CP040907
CP041257
CP041261
CP041270
CP042834
CP043484
CP043865
CP044264
CP044274
CP045012
CP046077
CP050255
CP050648
CP050650
CP053704
LN999844
LN999987
LR132067
LR134095
LR134105
LR134337
LR135169
LR135170
LR135174
LR135179
LR135181
LR135191
LR135197
LR135203
LR135219
LR135226
LR135235
LR135243
LR135254
LR135258
LR135278
LR135287
LR135293
LR135297
LR135307
LR135317
LR135324
LR135331
LR135339
LR135344
LR135351
LR135357
LR135364
LR135372
LR135384
LR135394
LR135401
LR135408
LR135414
LR135428
LR135435
LR135474
LR135475
LR135482
LR135488
LR135782
LR215968
LR536658
LR607349
LR607355
LR607382
LT598663
LT603678
NC_017022
NC_017960
NC_020207
NC_021994
CM001159
CM001469
CM003382
CP006046
CP006047
CP006858
CP006859
CP006860
CP006861
CP006862
CP006874
CP006940
CP007007
CP007008
CP007009
CP007010
CP007011
CP007017
CP007018
CP007019
CP007020
CP007021
CP007160
CP007167
CP007168
CP007169
CP007170
CP007171
CP007194
CP007195
CP007196
CP007197
CP007198
CP007199
CP007200
CP007210
CP007459
CP007460
CP007461
CP007462
CP007492
CP007525
CP007526
CP007527
CP007538
CP007583
CP007600
CP007684
CP007685
CP007686
CP007687
CP007688
CP007689
CP008768
CP008770
CP008821
CP008836
CP008837
CP009242
CP009258
CP009897
CP010346
CP011004
CP011345
CP011397
CP011398
CP012021
CP013285
CP013286
CP013287
CP013288
CP013289
CP013722
CP013723
CP013724
CP013919
CP014250
CP014252
CP014261
CP014790
CP015508
CP015593
CP016213
CP016629
CP018148
CP018149
CP018685
CP019164
CP019165
CP019167
CP019170
CP019614
CP019615
CP019616
CP019617
CP019618
CP019619
CP019620
CP019621
CP019622
CP019623
CP019624
CP019625
CP020022
CP020774
CP020827
CP020828
CP020830
CP020831
CP020832
CP020833
CP021174
CP021325
CP022020
CP023050
CP023052
CP023321
CP023752
CP023754
CP023861
CP023862
CP025082
CP025219
CP025220
CP025221
CP025222
CP025259
CP025438
CP025440
CP025442
CP025443
CP025560
CP025565
CP025567
CP025568
CP026043
CP027029
CP028183
CP028333
CP028392
CP028393
CP028394
CP028395
CP028396
CP028397
CP028398
CP028399
CP028400
CP028401
CP028402
CP028403
CP028404
CP028405
CP028406
CP028407
CP028408