We have now completed all marker calling and filtering. We will now export the genotype data for linkage mapping into the LepMap format.
Ensure you have a clear working environment
rm(list = ls())
Load packages
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.6 ✓ dplyr 1.0.7
## ✓ tidyr 1.2.0 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.1
## Warning: package 'tidyr' was built under R version 4.0.5
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
Read in the filtered data
fam_SNP <- readRDS("../1_DataPrep/4_1_FilteredMarkerData_Informative_HC.RData")
Rename parents to add the \(Project_\)Species_$Family_ at the start
fam_SNP <- fam_SNP %>%
rename_at(vars(ends_with("MOT_F1")),
~ str_replace(., "MOT_F1",
paste0("LS_LM_HC_","MOT_F1"))) %>%
rename_at(vars(ends_with("FAT_M1")),
~ str_replace(., "FAT_M1",
paste0("LS_LM_HC_","FAT_M1")))
Select only the relevant columns
LM_SNP <- fam_SNP %>%
select(markerID,starts_with("LS_LM_HC_"))
First, let’s convert genotypes to the required LEPMAP3 format. Genotypes should be coded as follows:
For SNPs
First, let’s move the parents to the first two columns
LM_SNP <- LM_SNP %>% select(markerID,
ends_with("_MOT_F1"),
ends_with("_FAT_M1"),
everything())
Then we can convert
LM_SNP <- LM_SNP %>%
mutate_at(vars(starts_with("LS_LM_HC_")),
function(d) case_when(
d == 0 ~ "1.0 0 0 0 0 0 0 0 0 0",
d == 1 ~ "0 1.0 0 0 0 0 0 0 0 0",
d == 2 ~ "0 0 0 0 1.0 0 0 0 0 0",
d == -1 ~ "1 1 1 1 1 1 1 1 1 1"))
Now that we have our final dataset, it is time to export to LEPMAP3 format.
Add POS column, a placeholder
LM_SNP <- LM_SNP %>%
mutate(position = "POS") %>%
select(markerID,position,everything())
Now harness the immense power of mapply
#make row with family name
row1 <- c("CHR","POS",rep("LS_LM_HC",
times = (ncol(LM_SNP)-2) ))
#make row with individual names
row2 <- colnames(LM_SNP)
#replace CHR and POS in first and second column of second row
row2[1] <- "CHR"
row2[2] <- "POS"
#make row with parent ID
row3 <- c("CHR","POS",0,0,
rep("LS_LM_HC_FAT_M1", times = (ncol(LM_SNP)-4)))
#make row with mother ID
row4 <- c("CHR","POS",0,0,
rep("LS_LM_HC_MOT_F1", times = (ncol(LM_SNP)-4)))
#make row with sex if known, otherwise 0
row5 <- c("CHR","POS",2,1,
rep(0, times = (ncol(LM_SNP)-4) ))
#make row with phenotype and additional info, otherwise 999
row6 <- c("CHR","POS",
rep(999,times = (ncol(LM_SNP)-2) ))
#merge alltogether
temp <- as.data.frame(rbind(row1,row2,row3,row4,row5,row6))
names(temp) <- colnames(LM_SNP)
LM_SNP <- rbind(temp,LM_SNP)
Save the full dataset as a RData file
saveRDS(LM_SNP, file = "../1_DataPrep/5_1_LepMapInput.RData")
And finally save the LEPMAP3 input files. They should be tab delimited .txt files
write.table(LM_SNP,
"../1_DataPrep/5_2_LepMapInput_HC_A.txt",
sep="\t", quote = FALSE,
col.names = FALSE, row.names=FALSE)
Done! Now we are ready to start the linkage mapping stage with LepMap3!!