---
directories:
    work_dir: /usr/local/www/data/private/Zea/GENUS
    from_pan_dir: out_Zea_26_3
prefixes:
    from_pan_prefix: 
collection_info:
    genus: Zea
    pan_ver: pan1
    pan_key: KEY4
readme_info:
    provenance: "The files in this directory are a product of the MaizeGDB project team, using the Pandagma pan-gene pipeline. The method is described here: https://github.com/legumeinfo/pandagma"
    source: "https://data.legumeinfo.org"
    synopsis: "Pangene set for Zea mays, This pangene set includes 29 annotation sets."
    taxid: "3913"
    annotations_main: Zm00001eb,Zm00018ab,Zm00019ab,Zm00020ab,Zm00021ab,Zm00022ab,Zm00023ab,Zm00024ab,Zm00025ab,Zm00026ab,Zm00027ab,Zm00028ab,Zm00029ab,Zm00030ab,Zm00031ab,Zm00032ab,Zm00033ab,Zm00034ab,Zm00035ab,Zm00036ab,Zm00037ab,Zm00038ab,Zm00039ab,Zm00040ab,Zm00041ab,Zm00042ab
    annotations_extra: Zm00001d,Zm00004b,Zm00008a
    description: "Pan-gene set for Zea, spanning 29 annotation sets, calculated using the pandagma pipeline, version 2023-04-03"
    bioproject: 
    sraproject: 
    dataset_doi_genome: 
    dataset_doi_annot: 
    genbank_accession: 
    original_file_creation_date: 2023-04-03
    local_file_creation_date: 2023-04-03
    dataset_release_date: 2023-04-03
    contributors: Steven Cannon, Ethy Cannon, Margaret Woodhouse
    publication_doi: 
    citation: 
    publication_title: 
    data_curators: Steven Cannon, Ethy Cannon, Margaret Woodhouse
    public_access_level: public
    license: Open
    keywords: "Zea, corn, maize, pan-gene, pangene, orthogroup"
from_to_pan_tsv:
  - 
    from: 18_syn_pan_aug_extra.clust.tsv
    to: clust.tsv
    description: "Pan-gene sets, in cluster format: ID in first column, followed by tab-separated gene list."
  - 
    from: 18_syn_pan_aug_extra.counts.tsv
    to: counts.tsv
    description: "Matrix of counts of genes per annotation set for each pan-gene set."
  - 
    from: 18_syn_pan_aug_extra.hsh.tsv
    to: hsh.tsv
    description: "Pan-gene sets, in a two-column hash format, with the set ID in the first column and genes in the second."
from_to_pan_fasta:
  - 
    from: 21_pan_fasta_clust_rep_cds.fna
    to: inclusive_cds.fna
    description: "CDS pan-gene sequence, inclusive (not filtered by minimum cluster size or annotation-set representation)."
  - 
    from: 21_pan_fasta_clust_rep_prot.faa
    to: inclusive_protein.faa
    description: "Protein pan-gene sequence, inclusive (not filtered by minimum cluster size or annotation-set representation)."
  - 
    from: 23_syn_pan_pctl25_posn_cds.fna
    to: pctl25_named_cds.fna
    strip: '\w+\.pan\d+\.'
    description: "CDS pan-gene sequence, omitting pan-genes smaller than 25% of the mode, with derived pan-gene IDs corresponding with consensus chromosome and ordinal position."
  - 
    from: 23_syn_pan_pctl25_posn_prot.faa
    to: pctl25_named_protein.faa
    strip: '\w+\.pan\d+\.'
    description: "Protein pan-gene sequence, omitting pan-genes smaller than 25% of the mode, with derived pan-gene IDs corresponding with consensus chromosome and ordinal position."
from_to_pan_as_is:
  - 
    from: 18_syn_pan_aug_extra_complement.fna
    to: complement.fna
    description: "Complement of genes in this pan-gene set; i.e. not clustered, presumed to be singletons."
  - 
    from: stats.Zea_26_3.txt
    to: stats.txt
    description: "Descriptive statistics about program parameters, input sequences, and pan-gene products."