--- file_transformation: - #in this directory - zcat GCA_020497155.1_ASM2049715v1_genomic.gff.gz | awk 'BEGIN {FS=OFS="\t"} !/^#/ && $3 ~ /gene/ {print $9}' | sed 's/.*ID=\([^;]*\).*locus_tag=\([^;]*\).*/\1\tglyma.Hwangkeum.gnm1.ann1.\2/' > feat_id_map.txt - zcat GCA_020497155.1_ASM2049715v1_genomic.gff.gz | awk 'BEGIN {FS=OFS="\t"} !/^#/ && $3 ~ /mRNA/ {print $9}' | sed 's/.*ID=\([^;]*\(\.[0-9][0-9]*\)_[^;]*\).*locus_tag=\([^;]*\).*/\1\tglyma.Hwangkeum.gnm1.ann1.\3\2/' >> feat_id_map.txt - zcat GCA_020497155.1_ASM2049715v1_genomic.gff.gz | awk 'BEGIN {FS=OFS="\t"} !/^#/ && $3 ~ /tRNA/ {print $9}' | sed 's/.*ID=\([^;]*\).*locus_tag=\([^;]*\).*/\1\tglyma.Hwangkeum.gnm1.ann1.\2.1/' >> feat_id_map.txt - zcat GCA_020497155.1_ASM2049715v1_genomic.gff.gz | ~/datastore-specifications/scripts/apply_seqid_map_gff.pl seq_id_map.txt | ~/datastore-specifications/scripts/apply_featid_map_gff.pl feat_id_map.txt 2> apply_feat_map.err | ~/datastore-specifications/scripts/add_IDs_to_gff_features.pl --clobber CDS --clobber exon 2> add_IDs_to_gff_features.err | bgzip -c -l9 > glyma.Hwangkeum.gnm1.ann1.1G4F.gene_models_main.gff3.gz - #use gffread to get cds and protein sequences from gff3 file - gffread -g glyma.Hwangkeum.gnm1.4S83.genome_main.fna -y glyma.Hwangkeum.gnm1.ann1.1G4F.protein.faa glyma.Hwangkeum.gnm1.ann1.1G4F.gene_models_main.gff3 - gffread -g glyma.Hwangkeum.gnm1.4S83.genome_main.fna -x glyma.Hwangkeum.gnm1.ann1.1G4F.cds.fna glyma.Hwangkeum.gnm1.ann1.1G4F.gene_models_main.gff3 changes: - 2022-07-20 Initial repository creation - 2022-08-23 Add AHRD annotation by Andrew Farmer - 2023-12-08 s/PFAM/Pfam/g in GFF3 file (SH)