Using dplyr to transform and summarize data
Scott Gabara
06/11/2020
Using dplyr to transform and summarize data back to top
#Using dplyr to transform and summarize data
str(CO2)
## Classes 'nfnGroupedData', 'nfGroupedData', 'groupedData' and 'data.frame': 84 obs. of 5 variables:
## $ Plant : Ord.factor w/ 12 levels "Qn1"<"Qn2"<"Qn3"<..: 1 1 1 1 1 1 1 2 2 2 ...
## $ Type : Factor w/ 2 levels "Quebec","Mississippi": 1 1 1 1 1 1 1 1 1 1 ...
## $ Treatment: Factor w/ 2 levels "nonchilled","chilled": 1 1 1 1 1 1 1 1 1 1 ...
## $ conc : num 95 175 250 350 500 675 1000 95 175 250 ...
## $ uptake : num 16 30.4 34.8 37.2 35.3 39.2 39.7 13.6 27.3 37.1 ...
## - attr(*, "formula")=Class 'formula' language uptake ~ conc | Plant
## .. ..- attr(*, ".Environment")=<environment: R_EmptyEnv>
## - attr(*, "outer")=Class 'formula' language ~Treatment * Type
## .. ..- attr(*, ".Environment")=<environment: R_EmptyEnv>
## - attr(*, "labels")=List of 2
## ..$ x: chr "Ambient carbon dioxide concentration"
## ..$ y: chr "CO2 uptake rate"
## - attr(*, "units")=List of 2
## ..$ x: chr "(uL/L)"
## ..$ y: chr "(umol/m^2 s)"
head(CO2, n=10)
## Plant Type Treatment conc uptake
## 1 Qn1 Quebec nonchilled 95 16.0
## 2 Qn1 Quebec nonchilled 175 30.4
## 3 Qn1 Quebec nonchilled 250 34.8
## 4 Qn1 Quebec nonchilled 350 37.2
## 5 Qn1 Quebec nonchilled 500 35.3
## 6 Qn1 Quebec nonchilled 675 39.2
## 7 Qn1 Quebec nonchilled 1000 39.7
## 8 Qn2 Quebec nonchilled 95 13.6
## 9 Qn2 Quebec nonchilled 175 27.3
## 10 Qn2 Quebec nonchilled 250 37.1
tail(CO2)
## Plant Type Treatment conc uptake
## 79 Mc3 Mississippi chilled 175 18.0
## 80 Mc3 Mississippi chilled 250 17.9
## 81 Mc3 Mississippi chilled 350 17.9
## 82 Mc3 Mississippi chilled 500 17.9
## 83 Mc3 Mississippi chilled 675 18.9
## 84 Mc3 Mississippi chilled 1000 19.9
library(dplyr) # select(), filter(), arrange(), mutate(), summarise(), group_by()
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
#select() to select a set of columns or rows
# use - to remove, -conc and -Plant
head(select(CO2, -conc, -Plant))
## Type Treatment uptake
## 1 Quebec nonchilled 16.0
## 2 Quebec nonchilled 30.4
## 3 Quebec nonchilled 34.8
## 4 Quebec nonchilled 37.2
## 5 Quebec nonchilled 35.3
## 6 Quebec nonchilled 39.2
# Filter data to only Quebec
fil_Quebec <- filter(CO2, Type == "Quebec")
fil_Quebec
## Plant Type Treatment conc uptake
## 1 Qn1 Quebec nonchilled 95 16.0
## 2 Qn1 Quebec nonchilled 175 30.4
## 3 Qn1 Quebec nonchilled 250 34.8
## 4 Qn1 Quebec nonchilled 350 37.2
## 5 Qn1 Quebec nonchilled 500 35.3
## 6 Qn1 Quebec nonchilled 675 39.2
## 7 Qn1 Quebec nonchilled 1000 39.7
## 8 Qn2 Quebec nonchilled 95 13.6
## 9 Qn2 Quebec nonchilled 175 27.3
## 10 Qn2 Quebec nonchilled 250 37.1
## 11 Qn2 Quebec nonchilled 350 41.8
## 12 Qn2 Quebec nonchilled 500 40.6
## 13 Qn2 Quebec nonchilled 675 41.4
## 14 Qn2 Quebec nonchilled 1000 44.3
## 15 Qn3 Quebec nonchilled 95 16.2
## 16 Qn3 Quebec nonchilled 175 32.4
## 17 Qn3 Quebec nonchilled 250 40.3
## 18 Qn3 Quebec nonchilled 350 42.1
## 19 Qn3 Quebec nonchilled 500 42.9
## 20 Qn3 Quebec nonchilled 675 43.9
## 21 Qn3 Quebec nonchilled 1000 45.5
## 22 Qc1 Quebec chilled 95 14.2
## 23 Qc1 Quebec chilled 175 24.1
## 24 Qc1 Quebec chilled 250 30.3
## 25 Qc1 Quebec chilled 350 34.6
## 26 Qc1 Quebec chilled 500 32.5
## 27 Qc1 Quebec chilled 675 35.4
## 28 Qc1 Quebec chilled 1000 38.7
## 29 Qc2 Quebec chilled 95 9.3
## 30 Qc2 Quebec chilled 175 27.3
## 31 Qc2 Quebec chilled 250 35.0
## 32 Qc2 Quebec chilled 350 38.8
## 33 Qc2 Quebec chilled 500 38.6
## 34 Qc2 Quebec chilled 675 37.5
## 35 Qc2 Quebec chilled 1000 42.4
## 36 Qc3 Quebec chilled 95 15.1
## 37 Qc3 Quebec chilled 175 21.0
## 38 Qc3 Quebec chilled 250 38.1
## 39 Qc3 Quebec chilled 350 34.0
## 40 Qc3 Quebec chilled 500 38.9
## 41 Qc3 Quebec chilled 675 39.6
## 42 Qc3 Quebec chilled 1000 41.4
# Filter data to only conc > or = to 500
filter(CO2, conc >= 500)
## Plant Type Treatment conc uptake
## 1 Qn1 Quebec nonchilled 500 35.3
## 2 Qn1 Quebec nonchilled 675 39.2
## 3 Qn1 Quebec nonchilled 1000 39.7
## 4 Qn2 Quebec nonchilled 500 40.6
## 5 Qn2 Quebec nonchilled 675 41.4
## 6 Qn2 Quebec nonchilled 1000 44.3
## 7 Qn3 Quebec nonchilled 500 42.9
## 8 Qn3 Quebec nonchilled 675 43.9
## 9 Qn3 Quebec nonchilled 1000 45.5
## 10 Qc1 Quebec chilled 500 32.5
## 11 Qc1 Quebec chilled 675 35.4
## 12 Qc1 Quebec chilled 1000 38.7
## 13 Qc2 Quebec chilled 500 38.6
## 14 Qc2 Quebec chilled 675 37.5
## 15 Qc2 Quebec chilled 1000 42.4
## 16 Qc3 Quebec chilled 500 38.9
## 17 Qc3 Quebec chilled 675 39.6
## 18 Qc3 Quebec chilled 1000 41.4
## 19 Mn1 Mississippi nonchilled 500 30.9
## 20 Mn1 Mississippi nonchilled 675 32.4
## 21 Mn1 Mississippi nonchilled 1000 35.5
## 22 Mn2 Mississippi nonchilled 500 32.4
## 23 Mn2 Mississippi nonchilled 675 31.1
## 24 Mn2 Mississippi nonchilled 1000 31.5
## 25 Mn3 Mississippi nonchilled 500 28.5
## 26 Mn3 Mississippi nonchilled 675 28.1
## 27 Mn3 Mississippi nonchilled 1000 27.8
## 28 Mc1 Mississippi chilled 500 19.5
## 29 Mc1 Mississippi chilled 675 22.2
## 30 Mc1 Mississippi chilled 1000 21.9
## 31 Mc2 Mississippi chilled 500 12.5
## 32 Mc2 Mississippi chilled 675 13.7
## 33 Mc2 Mississippi chilled 1000 14.4
## 34 Mc3 Mississippi chilled 500 17.9
## 35 Mc3 Mississippi chilled 675 18.9
## 36 Mc3 Mississippi chilled 1000 19.9
# Filter data to only conc > or = to 500 and uptake > or = to 40
filter(CO2, conc >= 500, uptake >= 40)
## Plant Type Treatment conc uptake
## 1 Qn2 Quebec nonchilled 500 40.6
## 2 Qn2 Quebec nonchilled 675 41.4
## 3 Qn2 Quebec nonchilled 1000 44.3
## 4 Qn3 Quebec nonchilled 500 42.9
## 5 Qn3 Quebec nonchilled 675 43.9
## 6 Qn3 Quebec nonchilled 1000 45.5
## 7 Qc2 Quebec chilled 1000 42.4
## 8 Qc3 Quebec chilled 1000 41.4
# use arrange() to organize data in an order
head(CO2)
## Plant Type Treatment conc uptake
## 1 Qn1 Quebec nonchilled 95 16.0
## 2 Qn1 Quebec nonchilled 175 30.4
## 3 Qn1 Quebec nonchilled 250 34.8
## 4 Qn1 Quebec nonchilled 350 37.2
## 5 Qn1 Quebec nonchilled 500 35.3
## 6 Qn1 Quebec nonchilled 675 39.2
CO2 %>% arrange(uptake) %>% head
## Plant Type Treatment conc uptake
## 1 Mc2 Mississippi chilled 95 7.7
## 2 Qc2 Quebec chilled 95 9.3
## 3 Mc1 Mississippi chilled 95 10.5
## 4 Mn1 Mississippi nonchilled 95 10.6
## 5 Mc3 Mississippi chilled 95 10.6
## 6 Mn3 Mississippi nonchilled 95 11.3
# mutate allows you to make a new column to the data frame, here based on ratio of conc/uptake
CO2 %>%
mutate(C.U= conc/uptake ) %>%
head
## Plant Type Treatment conc uptake C.U
## 1 Qn1 Quebec nonchilled 95 16.0 5.937500
## 2 Qn1 Quebec nonchilled 175 30.4 5.756579
## 3 Qn1 Quebec nonchilled 250 34.8 7.183908
## 4 Qn1 Quebec nonchilled 350 37.2 9.408602
## 5 Qn1 Quebec nonchilled 500 35.3 14.164306
## 6 Qn1 Quebec nonchilled 675 39.2 17.219388
# summarise allows creation of summary statistics
CO2 %>%
summarise(
min_conc = min(conc),
max_conc = max(conc),
avg_conc = mean(conc))
## min_conc max_conc avg_conc
## 1 95 1000 435