---
directories:
    work_dir: /usr/local/www/data/private/Arachis/hypogaea/Tifrunner.gnm1.ann2
    from_annot_dir: GCF_003086295.2
    from_genome_dir: GCF_003086295.2
prefixes:
    from_annot_prefix: "arahy." 
    from_genome_prefix: "GCF_003086295.2_"
collection_info:
    genus: Arachis
    species: hypogaea
    scientific_name_abbrev: arahy
    coll_genotype: Tifrunner
    gnm_ver: gnm1
    ann_ver: ann2
    genome_key: KYV3
    annot_key: TN8K
readme_info:
    provenance: "The files in this directory originated from GenBank, for RefSeq genome sequence GCF_003086295.2, submitted by the International Peanut Genome Initiative in 2018. The GenBank source is considered the primary repository and authoritative; files in this present directory are derived, and may have changes, as noted below. The files here are held as part of the LegumeInfo and Peanutbase projects, and are made available here for the purpose of reproducibility of analyses at these sites (e.g. gene family alignments and phylogenies, genome browsers, etc.) and for further use by researchers, as that research extends other analyses at the LegumeInfo and Peanutbase projects. If you are conducting research on large-scale data sets for this species, please consider retrieving the data from the primary repositories. If you use the data in the present directory, please respect any usage restrictions in the present and original repositories, and cite the data appropriately."
    source: "https://www.ncbi.nlm.nih.gov/data-hub/genome/GCF_003086295.2"
    synopsis_genome: Genome assembly 1 for Arachis hypogaea, cultivar Tifrunner
    synopsis_annot: GenBank RefSeq annotation for Genome assembly 1 for Arachis hypogaea, cultivar Tifrunner
    taxid: "3818"
    genotype: Tifrunner
    description_genome: "Genome assembly for Arachis hypogaea cultivar Tifrunner. A total of 48.25x of PACBIO sequence (avg. read length of 11,525) was used to generate the initial MECAT assembly, which was subsequently polished using ARROW. Synteny with the diploid A. duranensis and A. ipaensis, along with 1 genetic map and 2 synthetic maps (provided by David Bertioli) were used to identify misjoins in the raw assembly. A total of 856 breaks were identified. The broken assembly was then scaffolded using HiC data by Rajeev Varshney. Post scaffolding, 6 additional breaks were made to resolve misjoins introduced during the scaffolding procedure. Seven tetrasomic regions were identified and duplicated, as described in the original README file. Additionally, homozygous SNPs and INDELs were corrected in the release sequence using ~40x of illumina reads (2x250, 800bp insert, library ID ICIH and ICID). See the original README file for additional details."
    chromosome_prefix: chr
    supercontig_prefix: scaffold
    description_annot: "This annotation was produced by GenBank on the RefSeq assembly GCF_003086295.2, Apr 29 2019"
    bioproject: "PRJNA419393"
    sraproject: 
    dataset_doi_genome: 
    dataset_doi_annot: 
    genbank_accession: "GCA_003086295.2"
    original_file_creation_date: "2017-11-13"
    local_file_creation_date: "2023-02-17"
    dataset_release_date: "2023-03-03"
    contributors: "The International Peanut Genome Initiative; lead assembly group Jeremy Schmutz, Jerry Jenkins, Jane Grimwood; project leads David Bertioli; Soraya Bertioli; Brian Schleffler; Scott Jackson; Peggy Ozias-Akins"
    publication_doi: "10.1038/s41588-019-0405-z"
    citation: "Bertioli, D.J., Jenkins, J., Clevenger, J. et al. The genome sequence of segmental allotetraploid peanut Arachis hypogaea. Nat Genet 51, 877-884 (2019). https://doi.org/10.1038/s41588-019-0405-z"
    publication_title: "The genome sequence of segmental allotetraploid peanut Arachis hypogaea"
    data_curators: Ethalinda Cannon, Steven Cannon, Jerry Jenkins
    public_access_level: public
    license: open
    keywords: "peanut, Arachis hypogaea, Tifrunner"
from_to_genome:
  - 
    from: arahy.Tifrunner.gnm1.KYV3_genomic.fna.gz
    to: genome_main.fna
    description: "Primary genome assembly"
original_readme_and_usage:
from_to_annot_as_is:
  - 
    from: initial_featid_map.tsv
    to: featid_map.tsv
    description: "Mapping between original and modified annotation feature IDs"
from_to_genome_as_is:
  - 
    from: arahy.initial_seqid_map.tsv
    to: seqid_map.tsv
    description: "Mapping between original and modified sequence IDs"
from_to_cds_mrna:
  - 
    from: gffread.cds.fna.gz
    to: cds.fna
    description: "cds sequences"
  - 
    from: gffread.cds_primary.fna.gz
    to: cds_primary.fna
    description: "cds sequences - primary only"
  - 
    from: gffread.mrna.fna.gz
    to: mrna.fna
    description: "mRNA transcript sequences"
  - 
    from: gffread.mrna_primary.fna.gz
    to: mrna_primary.fna
    description: "mRNA transcript sequences - primary only"
from_to_protein:
  - 
    from: gffread.protein.faa.gz
    to: protein.faa
    description: "Protein sequences"
  - 
    from: gffread.protein_primary.faa.gz
    to: protein_primary.faa
    description: "Protein sequences - primary only"
from_to_gff:
  - 
    from: genomic_mod.gff.gz
    to: gene_models_main.gff3
    description: "Gene models - main"