1. Background

In this script I produce the final summary information for the linkage maps (e.g., interval between unique position, density of loci, etc) and use this info to produce Table 1.

2. Load packages and data

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.6     ✓ dplyr   1.0.7
## ✓ tidyr   1.2.0     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.1
## Warning: package 'tidyr' was built under R version 4.0.5
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
my.df <- readRDS(file = "../2_LinMap/10_4_linkageMap.RData")

3. Calculate non-zero interval

Make a list with one element per linkage group

my.list <- as.list(seq(1,13,1))

my.list <- lapply(my.list, function(x){
  y <- my.df %>% filter(LG == x)
  return(y)
})

Separate into male, female and sex averaged linkage maps

my.male <- lapply(my.list, function(x){
  y <- x %>%
    distinct(male, .keep_all = TRUE) %>%
    drop_na(male) %>%
    arrange(male) %>%
    pull(male)
})

my.female <- lapply(my.list, function(x){
  y <- x %>%
    distinct(female, .keep_all = TRUE) %>%
    drop_na(female) %>%
    arrange(female) %>%
    pull(female)
})

my.average <- lapply(my.list, function(x){
  y <- x %>%
    distinct(sex.averaged, .keep_all = TRUE) %>%
    arrange(sex.averaged) %>%
    pull(sex.averaged)
})

Calculate intervals

my.male <- lapply(my.male, function(x){
  z <- vector(mode = "numeric", length(x)-1)
  
  w <- 0
  y <- 1
  
  for (i in 1:length(x)-1) {
    
    z[i] <- x[y]-x[w]
    
    w <- w+1
    y <- y+1
  }
  
  return(z)
})

my.female <- lapply(my.female, function(x){
  z <- vector(mode = "numeric", length(x)-1)
  
  w <- 0
  y <- 1
  
  for (i in 1:length(x)-1) {
    
    z[i] <- x[y]-x[w]
    
    w <- w+1
    y <- y+1
  }
  
  return(z)
})

my.average <- lapply(my.average, function(x){
  z <- vector(mode = "numeric", length(x)-1)
  
  w <- 0
  y <- 1
  
  for (i in 1:length(x)-1) {
    
    z[i] <- x[y]-x[w]
    
    w <- w+1
    y <- y+1
  }
  
  return(z)
})

3.1. Calculate average non-zero interval for whole map

Bind into one dataframe per sex

my.male.whole <- do.call(c, my.male)

my.female.whole <- do.call(c, my.female)

my.average.whole <- do.call(c, my.average)

Calculate average non-zero interval for male map

mean(my.male.whole)
## [1] 1.203763

Calculate average non-zero interval for female map

mean(my.female.whole)
## [1] 0.8292114

Calculate average non-zero interval for sex averaged map

mean(my.average.whole)
## [1] 0.4816138

3.2. Calculate average non-zero interval for each LG

my.male <- lapply(my.male, mean)

my.female <- lapply(my.female, mean)

my.average <- lapply(my.average, mean)

4. Build Table 1

Create starting stable with LG number, and the non-zero interval we calculated

table1 <- tibble("LG"=c("1","2","3","4","5","6",
                        "7","8","9","10","11","12","13"),
                 "nonzero.average"=unlist(my.average),
                 "nonzero.male"=unlist(my.male),
                 "nonzero.female"=unlist(my.female))

rm(my.male, my.female, my.average, my.list)

4.1. Number of snps and unique positions per LG

Calculate number of SNPs and number of unique positions

nsnps.average <- my.df %>%
  group_by(LG) %>%
  summarise(nSNPs.average = n(),
            unique.average = length(unique(sex.averaged)),
            max.average = max(sex.averaged))

nsnps.male <- my.df %>% drop_na(male) %>%
  group_by(LG) %>%
  summarise(nSNPs.male = n(),
            unique.male = length(unique(male)),
            max.male = max(male))

nsnps.female <- my.df %>% drop_na(female) %>%
  group_by(LG) %>%
  summarise(nSNPs.female = n(),
            unique.female = length(unique(female)),
            max.female = max(female))

Add to table

table1 <- left_join(table1, nsnps.average, by = "LG")
table1 <- left_join(table1, nsnps.male, by = "LG")
table1 <- left_join(table1, nsnps.female, by = "LG")

4.2. Average and sum by column

table1 <- table1 %>%
  add_row(LG = "All",
          nonzero.average = 0.4823,
          nonzero.male = 1.2043,
          nonzero.female = 0.8217,
          nSNPs.average = sum(table1$nSNPs.average),
          unique.average = sum(table1$unique.average),
          max.average = sum(table1$max.average),
          nSNPs.male = sum(table1$nSNPs.male),
          unique.male = sum(table1$unique.male),
          max.male = sum(table1$max.male),
          nSNPs.female = sum(table1$nSNPs.female),
          unique.female = sum(table1$unique.female),
          max.female = sum(table1$max.female))

#and add the female to male ratio
table1 <- table1 %>%
  mutate("F:M ratio" = max.female/max.male)

Now let’s order these details in a better way

table1 <- table1 %>%
  select(LG,
         nSNPs.average, nSNPs.male, nSNPs.female,
         unique.average, unique.male, unique.female,
         max.average, max.male, max.female,
         nonzero.average, nonzero.male, nonzero.female,
         `F:M ratio`)

4.4. Calculate estimated genome coverage

To calculate estimated genome coverage I use the equation by Bishop et al. (1983) \(c = 1 - e^{-2dn/L}\), where d is the average spacing of markers, n is the number of markers and L is the legth of the linkage map. In our table these correspond to nSNPs (n), max (L) and nonzero (d).

table2 <- table1 %>%
  mutate(
    coverage.uniue = (1 - exp(-2*nonzero.average*unique.average/max.average)),
    coverage.nSNPs = (1 - exp(-2*nonzero.average*nSNPs.average/max.average)))

4.5. Export as publication ready table

write_delim(table1, file = "../2_LinMap/11_Table1.txt",
            delim = "\t")

At this stage I do not know how to format within R. I have tried 3 different packages from blogposts I found on the internet and neither works.

write_delim(table2, file = "../2_LinMap/11_Table1_withCoverage.txt",
            delim = "\t")