Geography and Language
Geography and Language
Reading a table from a PostgreSQL database with the ‘flights’ dataset (336,776 rows,
19 columns) of the package nytflights13:
# First using RODBC / RODBCDBI
library(DBI)
library(RODBCDBI)
rodbc <- dbConnect(RODBCDBI::ODBC(), dsn = "PostgreSQL")
system.time(rodbc_result <- dbReadTable(rodbc, "flights"))
#> Warning: closing unused RODBC handle 2
#> user system elapsed
#> 19.203 1.356 21.724
library(tibble)
as_tibble(odbc_result)
#> # A tibble: 336,776 × 20
#> row.names year month day dep_time sched_dep_time dep_delay arr_time
#> <chr> <int> <int> <int> <int> <int> <dbl> <int>
#> 1 1 2013 1 1 517 515 2 830
#> 2 2 2013 1 1 533 529 4 850
#> 3 3 2013 1 1 542 540 2 923
#> 4 4 2013 1 1 544 545 -1 1004
#> 5 5 2013 1 1 554 600 -6 812
#> 6 6 2013 1 1 554 558 -4 740
#> 7 7 2013 1 1 555 600 -5 913
#> 8 8 2013 1 1 557 600 -3 709
#> 9 9 2013 1 1 557 600 -3 838
#> 10 10 2013 1 1 558 600 -2 753
#> # ... with 336,766 more rows, and 12 more variables: sched_arr_time <int>,
#> # arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
#> # origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
#> # minute <dbl>, time_hour <dttm>
identical(dim(rodbc_result), dim(odbc_result))
#> [1] TRUE
rm(rodbc_result, odbc_result, odbc, rodbc)
gc(verbose = FALSE)
#> Warning: closing unused RODBC handle 3
#> used (Mb) gc trigger (Mb) max used (Mb)
#> Ncells 712236 38.1 1770749 94.6 1770749 94.6
#> Vcells 8991012 68.6 27225095 207.8 33776265 257.7
WRITING
Writing the same dataset to the database:
library(nycflights13)
# rodbc does not support writing timestamps natively.
rodbc <- dbConnect(RODBCDBI::ODBC(), dsn = "PostgreSQL")
system.time(dbWriteTable(rodbc, "flights2", as.data.frame(flights[, names(flights) != "time_hour"])))
#> user system elapsed
#> 6.693 3.786 48.423
# Writing
rsqlserver <- dbConnect(RSQLServer::SQLServer(), server = "SQLServer")
system.time(dbWriteTable(rsqlserver, "flights2", as.data.frame(flights)))
#> user system elapsed
#> 645.219 12.287 820.806
# Reading
system.time(dbReadTable(rsqlserver, "flights", as.data.frame(flights)))
#> user system elapsed
#> 5.101 1.289 3.584