Import Data and Use Base R to Subset then Merge

We use the readxl package to import data from the “Data_Complete_%OC” sheet in “4NP Standard Curve and Data.xlsx”.

  ### Import the entire sheet
  percoc <- read_excel("4NP Standard Curve and Data.xlsx",
                      sheet = "Data_Complete_%OC", 
                      skip = 2)
## New names:
## * `Mass of dust per trial (g)` -> `Mass of dust per trial (g)...5`
## * `4NP in trial (mg)` -> `4NP in trial (mg)...6`
## * `Mass released (mg)` -> `Mass released (mg)...7`
## * `%released` -> `%released...8`
## * `Mass of dust per trial (g)` -> `Mass of dust per trial (g)...9`
## * ... and 11 more problems
  names(percoc)
##  [1] "Days"                            "Mass dosed"                     
##  [3] "Mass dust dosed(g)"              "Conc on dust (mg/g)"            
##  [5] "Mass of dust per trial (g)...5"  "4NP in trial (mg)...6"          
##  [7] "Mass released (mg)...7"          "%released...8"                  
##  [9] "Mass of dust per trial (g)...9"  "4NP in trial (mg)...10"         
## [11] "Mass released (mg)...11"         "%released...12"                 
## [13] "Mass of dust per trial (g)...13" "4NP in trial (mg)...14"         
## [15] "Mass released (mg)...15"         "%released...16"                 
## [17] "Mass of dust per trial (g)...17" "4NP in trial (mg)...18"         
## [19] "Mass released (mg)...19"         "%released...20"
  ### Subset columns 1:4 and 5:8 to get the 4% OC data
  percoc.4perc <- percoc[, c(1:4,5:8)]
  names(percoc.4perc) <- c("Days", "Mass_Dosed", "Dust_Massed_Dosed", "Conc_On_Dust", "Mass_Of_Dust_Per_Trial", "FourNP_In_Trial", "Mass_Released", "Perc_Released")
  percoc.4perc$Perc_OC <- 4
  
  ### Subset columns 1:4 and 9:12 to get the 10% OC data
  percoc.10perc <- percoc[, c(1:4,9:12)]
  names(percoc.10perc) <- c("Days", "Mass_Dosed", "Dust_Massed_Dosed", "Conc_On_Dust", "Mass_Of_Dust_Per_Trial", "FourNP_In_Trial", "Mass_Released", "Perc_Released")
  percoc.10perc$Perc_OC <- 10
  
  ### Subset columns 1:4 and 13:16 to get the 50% OC data
  percoc.50perc <- percoc[, c(1:4,13:16)]
  names(percoc.50perc) <- c("Days", "Mass_Dosed", "Dust_Massed_Dosed", "Conc_On_Dust", "Mass_Of_Dust_Per_Trial", "FourNP_In_Trial", "Mass_Released", "Perc_Released")
  percoc.50perc$Perc_OC <- 50
  
  ### Subset columns 1:4 and 17:20 to get the 70% OC data
  percoc.70perc <- percoc[, c(1:4,17:20)]
  names(percoc.70perc) <- c("Days", "Mass_Dosed", "Dust_Massed_Dosed", "Conc_On_Dust", "Mass_Of_Dust_Per_Trial", "FourNP_In_Trial", "Mass_Released", "Perc_Released")
  percoc.70perc$Perc_OC <- 70

  ### Stack the 4, 10, 50, and 70 percent data to create a single data.frame
  percoc <- rbind(percoc.4perc, percoc.10perc, percoc.50perc, percoc.70perc)
  
  ### "Study day" is coded as "Days.Trial" 
  percoc$Sample <- factor(round((percoc$Days - floor(percoc$Days))*10))
  percoc$Days <- floor(percoc$Days)
  
  ### For percentage and proportion to numeric
  percoc$Perc_Released <- as.numeric(percoc$Perc_Released)
  percoc$Prop_Released <- percoc$Perc_Released / 100

  ### Remove/delete the temporary individual percentage data.frames
  rm(percoc.4perc, percoc.10perc, percoc.50perc, percoc.70perc)
  
  ### Write the complete, reformatted data.frame to CSV
  write.csv(percoc, "Four_NP_PercOC.csv", row.names = FALSE)
  
  ### Check to make sure everything is balanced and plot it just for fun
  head(percoc)
## # A tibble: 6 x 11
##    Days Mass_Dosed Dust_Massed_Dos~ Conc_On_Dust Mass_Of_Dust_Pe~
##   <dbl>      <dbl>            <dbl>        <dbl>            <dbl>
## 1     0        0                0           0               NA   
## 2     5       18.0              5.5         3.27             0.88
## 3     5       15.6              5.6         2.79             1.01
## 4     5       22.3              7           3.19             1.4 
## 5    10       18.0              5.5         3.27             1.33
## 6    10       15.6              5.6         2.79             1.21
## # ... with 6 more variables: FourNP_In_Trial <dbl>, Mass_Released <dbl>,
## #   Perc_Released <dbl>, Perc_OC <dbl>, Sample <fct>, Prop_Released <dbl>
  table(percoc$Days, percoc$Perc_OC)
##     
##      4 10 50 70
##   0  1  1  1  1
##   5  3  3  3  3
##   10 3  3  3  3
##   15 3  3  3  3
##   20 3  3  3  3
##   25 3  3  3  3
##   30 3  3  3  3
##   35 3  3  3  3
##   40 3  3  3  3
  ggplot(percoc, aes(x=Days, y=Perc_Released, group=factor(Perc_OC),     
                     colour=factor(Perc_OC), shape=factor(Perc_OC), 
                     linetype=factor(Perc_OC))) +
         geom_point() +
         stat_summary(fun.y="mean", geom="line") +
         #stat_summary(fun.y="mean", geom="point", shape = "-", size=10) +
         theme_bw() + 
         labs(shape = "OC%", linetype = "OC%", colour = "OC%") + 
         ylab("Percent 4-NP Released")

Import Data using Ranges and then Merge

We use the readxl package to import data from select ranges of the “Data_Complete_%OC” sheet in “4NP Standard Curve and Data.xlsx”.

  ### Import the 4% OC data
  percoc.4 <- read_excel("4NP Standard Curve and Data.xlsx",
                      sheet = "Data_Complete_%OC",
                      col_types=c(rep("numeric",4), rep("numeric",4), rep("skip",12)),
                      skip=2)
  names(percoc.4)
## [1] "Days"                       "Mass dosed"                
## [3] "Mass dust dosed(g)"         "Conc on dust (mg/g)"       
## [5] "Mass of dust per trial (g)" "4NP in trial (mg)"         
## [7] "Mass released (mg)"         "%released"
  names(percoc.4) <- c("Days", "Mass_Dosed", "Dust_Massed_Dosed", "Conc_On_Dust", "Mass_Of_Dust_Per_Trial", "FourNP_In_Trial", "Mass_Released", "Perc_Released")
  percoc.4$Perc_OC <- 4
  
  ### Import the 10% OC data
  percoc.10 <- read_excel("4NP Standard Curve and Data.xlsx",
                      sheet = "Data_Complete_%OC",
                      col_types=c(rep("numeric",4), rep("skip",4), rep("numeric",4), rep("skip",8)),
                      skip=2)
  names(percoc.10)
## [1] "Days"                       "Mass dosed"                
## [3] "Mass dust dosed(g)"         "Conc on dust (mg/g)"       
## [5] "Mass of dust per trial (g)" "4NP in trial (mg)"         
## [7] "Mass released (mg)"         "%released"
  names(percoc.10) <- c("Days", "Mass_Dosed", "Dust_Massed_Dosed", "Conc_On_Dust", "Mass_Of_Dust_Per_Trial", "FourNP_In_Trial", "Mass_Released", "Perc_Released")
  percoc.10$Perc_OC <- 10
  
  ### A more efficient way of selecting columns
  selectcols <- rep("skip", 20)
  selectcols[c(1:4, 13:16)] <- "numeric"
  ### Import the 50% OC data
  percoc.50 <- read_excel("4NP Standard Curve and Data.xlsx",
                      sheet = "Data_Complete_%OC",
                      col_types = selectcols,
                      skip=2)
  names(percoc.50)
## [1] "Days"                       "Mass dosed"                
## [3] "Mass dust dosed(g)"         "Conc on dust (mg/g)"       
## [5] "Mass of dust per trial (g)" "4NP in trial (mg)"         
## [7] "Mass released (mg)"         "%released"
  names(percoc.50) <- c("Days", "Mass_Dosed", "Dust_Massed_Dosed", "Conc_On_Dust", "Mass_Of_Dust_Per_Trial", "FourNP_In_Trial", "Mass_Released", "Perc_Released")
  percoc.50$Perc_OC <- 50
  
  ### Import the 70% OC data
  selectcols <- rep("skip", 20)
  selectcols[c(1:4, 17:20)] <- "numeric"
  percoc.70 <- read_excel("4NP Standard Curve and Data.xlsx",
                      sheet = "Data_Complete_%OC",
                      col_types = selectcols,
                      skip=2)
  names(percoc.70)
## [1] "Days"                       "Mass dosed"                
## [3] "Mass dust dosed(g)"         "Conc on dust (mg/g)"       
## [5] "Mass of dust per trial (g)" "4NP in trial (mg)"         
## [7] "Mass released (mg)"         "%released"
  names(percoc.70) <- c("Days", "Mass_Dosed", "Dust_Massed_Dosed", "Conc_On_Dust", "Mass_Of_Dust_Per_Trial", "FourNP_In_Trial", "Mass_Released", "Perc_Released")
  percoc.70$Perc_OC <- 70


  ### Stack the 4, 10, 50, and 70 percent data to create a single data.frame
  percoc2 <- rbind(percoc.4, percoc.10, percoc.50, percoc.70)
  percoc2$Sample <- factor(round((percoc2$Days - floor(percoc2$Days))*10))
  percoc2$Days <- floor(percoc2$Days)
  percoc2$Perc_Released <- as.numeric(percoc2$Perc_Released)
  percoc2$Prop_Released <- percoc2$Perc_Released / 100

  ### Remove/delete the temporary individual percentage data.frames
  rm(percoc.4, percoc.10, percoc.50, percoc.70)
  
  ### Write the complete, reformatted data.frame to CSV
  write.csv(percoc2, "Four_NP_PercOC2.csv", row.names = FALSE)
  
  ### Check to make sure everything is balanced and plot it just for fun
  head(percoc2)
## # A tibble: 6 x 11
##    Days Mass_Dosed Dust_Massed_Dos~ Conc_On_Dust Mass_Of_Dust_Pe~
##   <dbl>      <dbl>            <dbl>        <dbl>            <dbl>
## 1     0        0                0           0               NA   
## 2     5       18.0              5.5         3.27             0.88
## 3     5       15.6              5.6         2.79             1.01
## 4     5       22.3              7           3.19             1.4 
## 5    10       18.0              5.5         3.27             1.33
## 6    10       15.6              5.6         2.79             1.21
## # ... with 6 more variables: FourNP_In_Trial <dbl>, Mass_Released <dbl>,
## #   Perc_Released <dbl>, Perc_OC <dbl>, Sample <fct>, Prop_Released <dbl>
  table(percoc2$Days, percoc2$Perc_OC)
##     
##      4 10 50 70
##   0  1  1  1  1
##   5  3  3  3  3
##   10 3  3  3  3
##   15 3  3  3  3
##   20 3  3  3  3
##   25 3  3  3  3
##   30 3  3  3  3
##   35 3  3  3  3
##   40 3  3  3  3
  ggplot(percoc2, aes(x=Days, y=Perc_Released, group=factor(Perc_OC),     
                     colour=factor(Perc_OC), shape=factor(Perc_OC), 
                     linetype=factor(Perc_OC))) +
         geom_point() +
         stat_summary(fun.y="mean", geom="line") +
         #stat_summary(fun.y="mean", geom="point", shape = "-", size=10) +
         theme_bw() + 
         labs(shape = "OC%", linetype = "OC%", colour = "OC%") + 
         ylab("Percent 4-NP Released")