Code
library(palmerpenguins)
library(dplyr)
library(ggplot2)
library(tidyr)
library(DBI)
library(duckdb)
library(palmerpenguins)
library(dplyr)
library(ggplot2)
library(tidyr)
library(DBI)
library(duckdb)
# df <- palmerpenguins::penguins%>% drop_na()
# query data from duckdb database
= DBI::dbConnect(duckdb::duckdb(), dbdir = "my-db.duckdb")
con = dplyr::tbl(con, "penguins") df
%>%
df group_by(species, sex) %>%
summarise(
across(
ends_with("mm") | ends_with("g"),
mean(x, na.rm = TRUE)
\(x)
)%>%
) # not required, but illustrates that work has been pushed off to duckdb
::collect() %>%
dplyr::kable() knitr
species | sex | bill_length_mm | bill_depth_mm | flipper_length_mm | body_mass_g |
---|---|---|---|---|---|
Adelie | NA | 37.84000 | 18.32000 | 185.6000 | 3540.000 |
Gentoo | female | 45.56379 | 14.23793 | 212.7069 | 4679.741 |
Adelie | male | 40.39041 | 19.07260 | 192.4110 | 4043.493 |
Chinstrap | male | 51.09412 | 19.25294 | 199.9118 | 3938.971 |
Adelie | female | 37.25753 | 17.62192 | 187.7945 | 3368.836 |
Gentoo | NA | 45.62500 | 14.55000 | 215.7500 | 4587.500 |
Gentoo | male | 49.47377 | 15.71803 | 221.5410 | 5484.836 |
Chinstrap | female | 46.57353 | 17.58824 | 191.7353 | 3527.206 |
# shifting backend to duckdb required refactoring pipe to keep operations
# together
%>%
df filter(!is.na(species), !is.na(bill_length_mm), !is.na(body_mass_g)) %>%
mutate(colour = case_when(
== "Adelie") ~ "#ff7400",
(species == "Chinstrap") ~ "#c35ccc",
(species == "Gentoo") ~ "#057275",
(species TRUE ~ ""
%>%
)) ggplot(aes(x = bill_length_mm, y = body_mass_g, colour = colour)) +
geom_point() +
geom_smooth(method = "lm") +
scale_color_manual(
values = c("#ff7400", "#c35ccc", "#057275"),
labels = c("Adelie", "Chinstrap", "Gentoo")
+
) theme_bw()
::dbDisconnect(con, shutdown = TRUE) DBI