In this script I produce the final summary information for the linkage maps (e.g., interval between unique position, density of loci, etc) and use this info to produce Table 1.
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.6 ✓ dplyr 1.0.7
## ✓ tidyr 1.2.0 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.1
## Warning: package 'tidyr' was built under R version 4.0.5
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
my.df <- readRDS(file = "../2_LinMap/10_4_linkageMap.RData")
Make a list with one element per linkage group
my.list <- as.list(seq(1,13,1))
my.list <- lapply(my.list, function(x){
y <- my.df %>% filter(LG == x)
return(y)
})
Separate into male, female and sex averaged linkage maps
my.male <- lapply(my.list, function(x){
y <- x %>%
distinct(male, .keep_all = TRUE) %>%
drop_na(male) %>%
arrange(male) %>%
pull(male)
})
my.female <- lapply(my.list, function(x){
y <- x %>%
distinct(female, .keep_all = TRUE) %>%
drop_na(female) %>%
arrange(female) %>%
pull(female)
})
my.average <- lapply(my.list, function(x){
y <- x %>%
distinct(sex.averaged, .keep_all = TRUE) %>%
arrange(sex.averaged) %>%
pull(sex.averaged)
})
Calculate intervals
my.male <- lapply(my.male, function(x){
z <- vector(mode = "numeric", length(x)-1)
w <- 0
y <- 1
for (i in 1:length(x)-1) {
z[i] <- x[y]-x[w]
w <- w+1
y <- y+1
}
return(z)
})
my.female <- lapply(my.female, function(x){
z <- vector(mode = "numeric", length(x)-1)
w <- 0
y <- 1
for (i in 1:length(x)-1) {
z[i] <- x[y]-x[w]
w <- w+1
y <- y+1
}
return(z)
})
my.average <- lapply(my.average, function(x){
z <- vector(mode = "numeric", length(x)-1)
w <- 0
y <- 1
for (i in 1:length(x)-1) {
z[i] <- x[y]-x[w]
w <- w+1
y <- y+1
}
return(z)
})
Bind into one dataframe per sex
my.male.whole <- do.call(c, my.male)
my.female.whole <- do.call(c, my.female)
my.average.whole <- do.call(c, my.average)
Calculate average non-zero interval for male map
mean(my.male.whole)
## [1] 1.203763
Calculate average non-zero interval for female map
mean(my.female.whole)
## [1] 0.8292114
Calculate average non-zero interval for sex averaged map
mean(my.average.whole)
## [1] 0.4816138
my.male <- lapply(my.male, mean)
my.female <- lapply(my.female, mean)
my.average <- lapply(my.average, mean)
Create starting stable with LG number, and the non-zero interval we calculated
table1 <- tibble("LG"=c("1","2","3","4","5","6",
"7","8","9","10","11","12","13"),
"nonzero.average"=unlist(my.average),
"nonzero.male"=unlist(my.male),
"nonzero.female"=unlist(my.female))
rm(my.male, my.female, my.average, my.list)
Calculate number of SNPs and number of unique positions
nsnps.average <- my.df %>%
group_by(LG) %>%
summarise(nSNPs.average = n(),
unique.average = length(unique(sex.averaged)),
max.average = max(sex.averaged))
nsnps.male <- my.df %>% drop_na(male) %>%
group_by(LG) %>%
summarise(nSNPs.male = n(),
unique.male = length(unique(male)),
max.male = max(male))
nsnps.female <- my.df %>% drop_na(female) %>%
group_by(LG) %>%
summarise(nSNPs.female = n(),
unique.female = length(unique(female)),
max.female = max(female))
Add to table
table1 <- left_join(table1, nsnps.average, by = "LG")
table1 <- left_join(table1, nsnps.male, by = "LG")
table1 <- left_join(table1, nsnps.female, by = "LG")
table1 <- table1 %>%
add_row(LG = "All",
nonzero.average = 0.4823,
nonzero.male = 1.2043,
nonzero.female = 0.8217,
nSNPs.average = sum(table1$nSNPs.average),
unique.average = sum(table1$unique.average),
max.average = sum(table1$max.average),
nSNPs.male = sum(table1$nSNPs.male),
unique.male = sum(table1$unique.male),
max.male = sum(table1$max.male),
nSNPs.female = sum(table1$nSNPs.female),
unique.female = sum(table1$unique.female),
max.female = sum(table1$max.female))
#and add the female to male ratio
table1 <- table1 %>%
mutate("F:M ratio" = max.female/max.male)
Now let’s order these details in a better way
table1 <- table1 %>%
select(LG,
nSNPs.average, nSNPs.male, nSNPs.female,
unique.average, unique.male, unique.female,
max.average, max.male, max.female,
nonzero.average, nonzero.male, nonzero.female,
`F:M ratio`)
To calculate estimated genome coverage I use the equation by Bishop et al. (1983) \(c = 1 - e^{-2dn/L}\), where d
is the average spacing of markers, n
is the number of markers and L
is the legth of the linkage map. In our table these correspond to nSNPs (n), max (L) and nonzero (d).
table2 <- table1 %>%
mutate(
coverage.uniue = (1 - exp(-2*nonzero.average*unique.average/max.average)),
coverage.nSNPs = (1 - exp(-2*nonzero.average*nSNPs.average/max.average)))
write_delim(table1, file = "../2_LinMap/11_Table1.txt",
delim = "\t")
At this stage I do not know how to format within R. I have tried 3 different packages from blogposts I found on the internet and neither works.
write_delim(table2, file = "../2_LinMap/11_Table1_withCoverage.txt",
delim = "\t")