--- directories: work_dir: /usr/local/www/data/private/Medicago/GENUS from_pan_dir: out_Medicago_7_16 prefixes: from_pan_prefix: collection_info: genus: Medicago pan_ver: pan1 pan_key: XXQ6 readme_info: provenance: "The files in this directory are a product of the staff of the SoyBase and LegumeInfo project teams. The method is described here: https://github.com/legumeinfo/pandagma" source: "https://data.legumeinfo.org" synopsis: "Pangene set for Medicago species, based primarily on M. truncatula but also including M. sativa. This pangene set includes 19 annotation sets." taxid: "3877" annotations_main: medsa.XinJiangDaYe_1.gnm1.ann1,medsa.XinJiangDaYe_2.gnm1.ann1,medsa.XinJiangDaYe_3.gnm1.ann1,medsa.XinJiangDaYe_4.gnm1.ann1,medtr.A17_HM341.gnm4.ann2,medtr.A17.gnm5.ann1_6,medtr.R108.gnmHiC_1.ann1 annotations_extra: medsa.XinJiangDaYe_sc.gnm1.ann1,medtr.HM004.gnm1.ann1,medtr.HM010.gnm1.ann1,medtr.HM022.gnm1.ann1,medtr.HM023.gnm1.ann1,medtr.HM034.gnm1.ann1,medtr.HM050.gnm1.ann1,medtr.HM056.gnm1.ann1,medtr.HM058.gnm1.ann1,medtr.HM060.gnm1.ann1,medtr.HM095.gnm1.ann1,medtr.HM125.gnm1.ann1,medtr.HM129.gnm1.ann1,medtr.HM185.gnm1.ann1,medtr.HM324.gnm1.ann1,medtr.R108_HM340.gnm1.ann1 description: "Pan-gene set for Medicago species, spanning 2 species and 19 annotation sets, calculated using the pandagma pipeline, version 2023-04-03" bioproject: sraproject: dataset_doi_genome: dataset_doi_annot: genbank_accession: original_file_creation_date: 2023-04-03 local_file_creation_date: 2023-04-03 dataset_release_date: 2023-04-03 contributors: Steven Cannon, Hyunoh Lee publication_doi: citation: publication_title: data_curators: Steven Cannon public_access_level: public license: Open keywords: "Medicago, barrel medic, alfalfa, lucerne, pan-gene, pangene, orthogroup" from_to_pan_tsv: - from: 18_syn_pan_aug_extra.clust.tsv to: clust.tsv description: "Pan-gene sets, in cluster format: ID in first column, followed by tab-separated gene list." - from: 18_syn_pan_aug_extra.counts.tsv to: counts.tsv description: "Matrix of counts of genes per annotation set for each pan-gene set." - from: 18_syn_pan_aug_extra.hsh.tsv to: hsh.tsv description: "Pan-gene sets, in a two-column hash format, with the set ID in the first column and genes in the second." from_to_pan_fasta: - from: 21_pan_fasta_clust_rep_cds.fna to: inclusive_cds.fna description: "CDS pan-gene sequence, inclusive (not filtered by minimum cluster size or annotation-set representation)." - from: 21_pan_fasta_clust_rep_prot.faa to: inclusive_protein.faa description: "Protein pan-gene sequence, inclusive (not filtered by minimum cluster size or annotation-set representation)." - from: 23_syn_pan_pctl25_posn_cds.fna to: pctl25_named_cds.fna strip: '\w+\.pan\d+\.' description: "CDS pan-gene sequence, omitting pan-genes smaller than 25% of the mode, with derived pan-gene IDs corresponding with consensus chromosome and ordinal position." - from: 23_syn_pan_pctl25_posn_prot.faa to: pctl25_named_protein.faa strip: '\w+\.pan\d+\.' description: "Protein pan-gene sequence, omitting pan-genes smaller than 25% of the mode, with derived pan-gene IDs corresponding with consensus chromosome and ordinal position." from_to_pan_as_is: - from: 18_syn_pan_aug_extra_complement.fna to: complement.fna description: "Complement of genes in this pan-gene set; i.e. not clustered, presumed to be singletons." - from: stats.Medicago_7_16.txt to: stats.txt description: "Descriptive statistics about program parameters, input sequences, and pan-gene products."