# Received revised files received from Qijian Song on 2021-10-27 # Original filenames: ready_imputed_dataset1_5176RILsX29416SNPs_imputed_from_SoySNP50K_SNPs_AB_format_Oct21.txt ready_imputed_dataset1_5176RILsX29416SNPs_imputed_from_SoySNP50K_SNPs_ACGT_vcf_format_Oct21.txt ready_imputed_dataset2_5176RILsx423419SNPs_AB_format_Oct21.txt ready_imputed_dataset2_5176RILsx423419SNPs_ACGT_vcf_format_Oct21.txt # Prepare new names, per Data Store patterns basename=glyma.Wm82.gnm2.div.Song_NAM_2021 collection=Wm82.gnm2.div.Song_NAM_2021 ### Process dataset 1 (three formats) ### # Remove carriage returns # Swap orders of first three columns to be vcf-like # Shorten header names and replace spaces with underscores # Move linkage group column from last column into metadata section nohup cat from_QSong/ready_imputed_dataset1_5176RILsX29416SNPs_imputed_from_SoySNP50K_SNPs_01_format_Oct29.txt | perl -pe 's/\t\t/\t.\t/' | # add dot in empty records perl -pe 's/ +/_/g' | # replace spaces with underscores perl -pe 's/^(\S+\t\S+\t\S+\t\S+\t\S+)\t(\S+\t\S+\t\S+)\t/$2\t$1\t/' | # switch initial column order perl -pe 's/Inferred_linkage_map\S+/LG_cM/; s/Alternative\S+/Non_IA3023_allele/' | # shorten column labels perl -pe 's/SNP_ID_Glyma1.01/ID_Glyma1.01/; s/Position/POS/; s/Wm82a2v1/Wm82a2/g' | # shorten column labels perl -pe 's/Chr/glyma.Wm82.gnm2.Gm/' | # Add chromosome prefix perl -ane 'for $i (0 .. 7){print $F[$i], "\t"}; print $F[$#F], "\t"; for $i (8 .. $#F-1){print $F[$i], "\t"}; print "\n"' | perl -pe 's/\r$//; s/\s+$/\n/' | # fix line endings cat > $basename.imputed1_5176RILs_29kSNPs_01.txt & nohup cat from_QSong/ready_imputed_dataset1_5176RILsX29416SNPs_imputed_from_SoySNP50K_SNPs_AB_format_Oct21.txt | perl -pe 's/\t\t/\t.\t/' | # add dot in empty records perl -pe 's/ +/_/g' | # replace spaces with underscores perl -pe 's/^(\S+\t\S+\t\S+\t\S+\t\S+)\t(\S+\t\S+\t\S+)\t/$2\t$1\t/' | # switch initial column order perl -pe 's/Inferred_linkage_map\S+/LG_cM/; s/Alternative\S+/Non_IA3023_allele/' | # shorten column labels perl -pe 's/SNP_ID_Glyma1.01/ID_Glyma1.01/; s/Position/POS/; s/Wm82a2v1/Wm82a2/g' | # shorten column labels perl -pe 's/Chr/glyma.Wm82.gnm2.Gm/' | # Add chromosome prefix perl -ane 'for $i (0 .. 7){print $F[$i], "\t"}; print $F[$#F], "\t"; for $i (8 .. $#F-1){print $F[$i], "\t"}; print "\n"' | perl -pe 's/\r$//; s/\s+$/\n/' | # fix line endings cat > $basename.imputed1_5176RILs_29kSNPs_AB.txt & nohup cat from_QSong/ready_imputed_dataset1_5176RILsX29416SNPs_imputed_from_SoySNP50K_SNPs_ACGT_vcf_format_Oct21.txt | perl -pe 's/\t\t/\t.\t/' | # add dot in empty records perl -pe 's/ +/_/g' | # replace spaces with underscores perl -pe 's/^(\S+\t\S+\t\S+\t\S+\t\S+)\t(\S+\t\S+\t\S+)\t/$2\t$1\t/' | # switch initial column order perl -pe 's/Inferred_linkage_map\S+/LG_cM/; s/Alternative\S+/Non_IA3023_allele/' | # shorten column labels perl -pe 's/SNP_ID_Glyma1.01/ID_Glyma1.01/; s/Position/POS/; s/Wm82a2v1/Wm82a2/g' | # shorten column labels perl -pe 's/Chr/glyma.Wm82.gnm2.Gm/' | # Add chromosome prefix perl -ane 'for $i (0 .. 7){print $F[$i], "\t"}; print $F[$#F], "\t"; for $i (8 .. $#F-1){print $F[$i], "\t"}; print "\n"' | perl -pe 's/\r$//; s/\s+$/\n/' | # fix line endings cat > $basename.imputed1_5176RILs_29kSNPs_ACGT.txt & ### Process dataset 2 (three formats) ### nohup cat from_QSong/ready_imputed_dataset2_5176RILsx423419SNPs_01_format_Oct29.txt | perl -pe 's/\t\t/\t.\t/' | # add dot in empty records perl -pe 's/ +/_/g; s/^(\S+)\t(\S+)\t(\S+)/$2\t$3\t$1/' | # replace spaces with underscores and switch initial column order perl -pe 's/Linkage_position_\S+/LG_cM/; s/Chr_/#CHROM_/; s/Position/POS/; s/Wma2v1/Wm82a2/g' | # shorten column labels perl -pe 's/SNP_ID_in_SoySNP50K_orSoyNAM6K_Dataset/SNP_ID_in_SoySNP50K_or_SoyNAM6K/' | # shorten column labels perl -pe 's/^Chr/glyma.Wm82.gnm2.Gm/' | # Add chromosome prefix perl -ane 'for $i (0 .. 5){print $F[$i], "\t"}; print $F[$#F], "\t"; for $i (6 .. $#F-1){print $F[$i], "\t"}; print "\n"' | perl -pe 's/\r$//; s/\s+$/\n/' | # fix line endings cat > $basename.imputed2_5176RILs_423kSNPs_01.txt & nohup cat from_QSong/ready_imputed_dataset2_5176RILsx423419SNPs_ACGT_vcf_format_Oct21.txt | perl -pe 's/\t\t/\t.\t/' | # add dot in empty records perl -pe 's/ +/_/g; s/^(\S+)\t(\S+)\t(\S+)/$2\t$3\t$1/' | # replace spaces with underscores and switch initial column order perl -pe 's/Linkage_position_\S+/LG_cM/; s/Chr_/#CHROM_/; s/Position/POS/; s/Wma2v1/Wm82a2/g' | # shorten column labels perl -pe 's/SNP_ID_in_SoySNP50K_orSoyNAM6K_Dataset/SNP_ID_in_SoySNP50K_or_SoyNAM6K/' | # shorten column labels perl -pe 's/Chr/glyma.Wm82.gnm2.Gm/' | # Add chromosome prefix perl -ane 'for $i (0 .. 5){print $F[$i], "\t"}; print $F[$#F], "\t"; for $i (6 .. $#F-1){print $F[$i], "\t"}; print "\n"' | perl -pe 's/\r$//; s/\s+$/\n/' | # fix line endings cat > $basename.imputed2_5176RILs_423kSNPs_ACGT.txt & nohup cat from_QSong/ready_imputed_dataset2_5176RILsx423419SNPs_AB_format_Oct21.txt | perl -pe 's/\t\t/\t.\t/' | # add dot in empty records perl -pe 's/ +/_/g; s/^(\S+)\t(\S+)\t(\S+)/$2\t$3\t$1/' | # replace spaces with underscores and switch initial column order perl -pe 's/Linkage_position_\S+/LG_cM/; s/Chr_/#CHROM_/; s/Position/POS/; s/Wma2v1/Wm82a2/g' | # shorten column labels perl -pe 's/SNP_ID_in_SoySNP50K_orSoyNAM6K_Dataset/SNP_ID_in_SoySNP50K_or_SoyNAM6K/' | # shorten column labels perl -pe 's/Chr/glyma.Wm82.gnm2.Gm/' | # Add chromosome prefix perl -ane 'for $i (0 .. 5){print $F[$i], "\t"}; print $F[$#F], "\t"; for $i (6 .. $#F-1){print $F[$i], "\t"}; print "\n"' | perl -pe 's/\r$//; s/\s+$/\n/' | # fix line endings cat > $basename.imputed2_5176RILs_423kSNPs_AB.txt & 2022-01-10 Sam Hokin split this collection into 2021a containing imputed1 (now imputed) and 2021b containing imputed2 (now imputed)