--- file_transformation: - #in this directory - for file in *.f??; do perl -pi -e 's/^>Glyma/>glyma.Fiskeby.gnm1.ann1.Glyma/' $file; - # Prefix gff3 and strip off.v1.1 suffixes that Phytozome uses after applying our prefixing - perl -pe 's/^/glyma.Fiskeby.gnm1./ if (! /##/); s/Glyma/glyma.Fiskeby.gnm1.ann1.Glyma/g; s/Name=glyma.Fiskeby.gnm1.ann1.Glyma/Name=Glyma/; s/\.v1\.1//g' \ glyma.Fiskeby.gnm1.ann1.SS25.gene.gff3 >glyma.Fiskeby.gnm1.ann1.SS25.gene_models_main.gff3 - perl -pe 's/^/glyma.Fiskeby.gnm1./ if (! /##/); s/Glyma/glyma.Fiskeby.gnm1.ann1.Glyma/g; s/Name=glyma.Fiskeby.gnm1.ann1.Glyma/Name=Glyma/; s/\.v1\.1//g' \ glyma.Fiskeby.gnm1.ann1.SS25.gene_exons.gff3 >glyma.Fiskeby.gnm1.ann1.SS25.gene_exons.gff3 - #sort gff3 - /usr/local/www/data/about_the_data_store/scripts/gff3sort/gff3sort.pl glyma.Fiskeby.gnm1.ann1.SS25.gene_models_main.gff3 >tmp.gff3.sorted - mv tmp.gff3.sorted glyma.Fiskeby.gnm1.ann1.SS25.gene_models_main.gff - # order and sort - perl -ne '@rows = split("\t", $_); if ($rows[3] > $rows[4]){ $line = $rows[0]."\t".$rows[1]."\t".$rows[2]."\t".$rows[4]."\t".$rows[3]."\t".$rows[5]."\t".$rows[6]."\t".$rows[7]."\t".$rows[8]; print "$line";}else {$line2 = join ("\t", @rows); print "$line2";}' glyma.Fiskeby.gnm1.ann1.SS25.gene_exons.gff3 >tmp.gff3 - mv tmp.gff3 glyma.Fiskeby.gnm1.ann1.SS25.gene_exons.gff3 - /usr/local/www/data/about_the_data_store/scripts/gff3sort/gff3sort.pl glyma.Fiskeby.gnm1.ann1.SS25.gene_exons.gff3 >tmp.gff3.sorted - mv tmp.gff3.sorted glyma.Fiskeby.gnm1.ann1.SS25.gene_exons.gff3 - # Manually: removed the .p suffix from IDs in the protein files. changes: - 2021-04-19 Initial repository creation - 2021-05-24 removed .p suffix from glyma.Fiskeby.gnm1.ann1.SS25.protein_primaryTranscriptOnly.faa.gz as well; added fai indexes - 2021-06-30 Change all file names with Fiskeby to FiskebyIII - 2021-06-30 Change Fiskeby ti FiskebyIII in all files - perl -pi -e 's/Fiskeby/FiskebyIII/g' * - 2021-07-13 Rename primaryTranscriptOnly to primaryTranscript - adf: 2021-08-11 correct gene names for unanchored scaffolds in glyma.FiskebyIII.gnm1.ann1.SS25.legfed_v1_0.M65K.gfa.tsv - 2021-09-14 WH Add AHRD annotation to the main gene model gff - 2023-05-23 adf: add AHRD with GO/IPR in descriptors - 2024-04-11 sbc: update synopsis to include JGI numbering