--- file_transformation: - #add gene feature to the gff3 with ID and Name attributes in the 9th column; for mRNA feature, add Parent, Name attributes and .1 to ID attribute - cat IGA1002.gene.gff | perl -ne 'if ( /^#/ ) {print;} else{ s/_rc//; s/;$//; s/chr(\d\t)/chr0$1/; s/;Source=.+//; chomp;$line = $_; @rows = split ("\t" , $line); if ($rows[2] =~/mRNA/) {$new_line = $1 if ($rows[8] =~ /ID=(.+)/); $gene_line = $rows[0]."\t".$rows[1]."\t"."gene"."\t".$rows[3]."\t".$rows[4]."\t".$rows[5]."\t".$rows[6]."\t".$rows[7]."\t".$rows[8].";Name=".$new_line; $rows[8] = $rows[8].".1".";Parent=".$new_line.";Name=".$new_line.".1"; $mrna_line =join ("\t", @rows); print "$gene_line\n$mrna_line\n";}else { $other_line =join ("\t", @rows); $other_new = $other_line.".1"; print "$other_new\n";}}' >IGA1002.gene_with_gene_feature.gff3 - #Use Connor's bionorm program to add prefix (glyma.Hefeng25_IGA1002.gnm1.ann1) and sort the gff3. - #Compress and index files - bgzip glyma.Hefeng25_IGA1002.gnm1.ann1.320V.cds.fna glyma.Hefeng25_IGA1002.gnm1.ann1.320V.cds.fna.gz - bgzip glyma.Hefeng25_IGA1002.gnm1.ann1.320V.protein.faa glyma.Hefeng25_IGA1002.gnm1.ann1.320V.protein.faa.gz - bgzip glyma.Hefeng25_IGA1002.gnm1.ann1.320V.gene_models_main.gff3 glyma.Hefeng25_IGA1002.gnm1.ann1.320V.gene_models_main.gff3.gz - tabix -p gff glyma.Hefeng25_IGA1002.gnm1.ann1.320V.gene_models_main.gff3.gz glyma.Hefeng25_IGA1002.gnm1.ann1.320V.gene_models_main.gff3.gz.tbi changes: - 2020-11-04 Initial repository creation - 2021-05-13 Add README, MANIFEST and make repository public - 2021-07-14 adf: added ID attributes and re-sorted due to tabix error (fixes https://github.com/legumeinfo/datastore-issues/issues/42) - 2021-09-13 WH: Add AHRD annotation to the main gene model gff - 2023-05-24 adf: add AHRD with GO/IPR in descriptors