10

I'm trying to use the large object (https://www.postgresql.org/docs/10/largeobjects.html) feature of PostgreSQL in R, and I have some trouble writing and reading using {DBI}/{RPostgres}.

Here is what I have tried so far:

# Getting the db
docker run --rm --name some-postgres -e POSTGRES_PASSWORD=mysecretpassword -d -p 5433:5432 postgres
library(DBI)
con <- dbConnect(
  RPostgres::Postgres(),
  dbname = "postgres",
  host = "localhost",
  port = 5433,
  user = "postgres",
  password = "mysecretpassword"
)

Creation works :

> dbGetQuery(con, "SELECT lo_create(1234);")
  lo_create
1      1234

But then I have a hard time figuring out how to write an R object to this large object. For example, how would I write mtcars as a large object in Postgres using {DBI} and {RPostgres}?

And then, how do I read it back again in R?

2 Answers 2

2

Consider R's serialize() (the underlying build of .RData/.RDS formats) to save R objects into a Postgres OID column for large objects and use Postgres v10+ server-side large object functions to create and retrieve content. Below can possibly work with bytea types by removing all lo_* functions.

Assuming table structure:

CREATE TABLE my_table(
   ...
   r_object OID
   ...
)

To append the R object:

# PREPARED STATEMENT
sql <- "INSERT INTO my_table(r_object) VALUES (lo_from_bytea(0, ?r_obj))"

# BIND PARAMETER OF SERIALIZED RAW VECTOR
query <- DBI::sqlInterpolate(conn, sql, r_obj = serialize(mtcars))

# EXECUTE ACTION
dbExecute(conn, query)

To retrieve the R object:

sql <- "SELECT lo_get(r_object) AS r_data FROM my_table"
pg_data <- dbGetQuery(conn, sql)

# UNSERIALIZE RETURNED RAW VECTOR
mtcars_from_pg <- unserialize(pg_data$r_data[1])

Alternatively, you can use Postgres' TEXT type with unlimited length and R's dput (ASCII representation of R objects) with eval + parse on returned dput string.

CREATE TABLE my_table(
   ...
   r_text TEXT
   ...
)
# PREPARED STATEMENT
sql <- "INSERT INTO my_table(r_text) VALUES (?r_obj)"

# BIND PARAMETER OF DPUT STRING OUTPUT
query <- DBI::sqlInterpolate(conn, sql, r_obj = capture.output(dput(mtcars)))

# EXECUTE ACTION
dbExecute(conn, query)

# RETRIEVE CONTENT
sql <- "SELECT r_text FROM my_table"
pg_data <- dbGetQuery(conn, sql)

# UNSERIALIZE RETURNED RAW VECTOR
mtcars_from_pg <- eval(parse(text=pg_data$r_text[1]))
Sign up to request clarification or add additional context in comments.

Comments

0

Just to mirror the answer here on SO from https://github.com/r-dbi/RPostgres/issues/519:

You can upload a LO (large object) using postgresImportLargeObject. Downloading a LO is a bit more involved.

See code below for a full example:

file <- "mytestfile.bin"
# generate a file with 800MB filesize using bash
# system(sprintf("dd if=/dev/urandom of=%s bs=1M count=800", file))
prettyunits::pretty_bytes(file.size(file))
#> [1] "838.86 MB"

# 1. Create Table =====
DBI::dbExecute(
  con,
  "CREATE TABLE IF NOT EXISTS lo_tester (
    id SERIAL PRIMARY KEY,
    name TEXT,
    lo_link OID
  )"
)
"lo_tester" %in% DBI::dbListTables(con)
#> [1] TRUE

# 2. Insert Large Object =====
t0 <- Sys.time()
DBI::dbWithTransaction(con, {
  oid <- RPostgres::postgresImportLargeObject(con, file)
  q <- sprintf(
    "INSERT INTO lo_tester (name, lo_link) VALUES ('%s', %d);",
    file,
    oid
  )
  DBI::dbExecute(con, q)
})
cat(sprintf("Inserting took %0.2fs\n", difftime(Sys.time(), t0, units = "secs")))
#> Inserting took 144.86s

# inserted as oid
oid
#> [1] 557400

DBI::dbGetQuery(con, "SELECT * FROM lo_tester;")
#>   id           name lo_link
#> 1  1 mytestfile.bin  557400


# 3. Fetch the data again ====
# gets the size of an oid object
get_oid_size <- function(con, oid) {
  query <- sprintf("
    WITH lo AS (  SELECT lo_open(%d, 262144) AS fd  ),
      seek AS (  SELECT lo_lseek(fd, 0, 2) AS pos, fd FROM lo  ),
      tell AS (  SELECT lo_tell(fd) AS size, fd FROM seek  )
    SELECT size FROM tell;
  ", oid)

  # Execute the block
  DBI::dbGetQuery(con, query)[[1]]
}

# Downloads a large object in chunks
download_oid <- function(con, oid, outfile, max_chunk_size = 200 * 1024^2) {
  size <- get_oid_size(con, oid)

  if (size <= max_chunk_size) {
    r <- DBI::dbGetQuery(con, "SELECT lo_get($1) AS lo_data", params = list(oid))
    writeBin(r$lo_data[[1]], outfile)
    return(invisible(outfile))
  } else {
    n_chunks <- ceiling(size / max_chunk_size)
    con_out <- file(outfile, "wb")
    on.exit(try(close(con_out), silent = TRUE), add = TRUE)

    for (i in seq_len(n_chunks)) {
      offset <- (i - 1) * max_chunk_size
      bytes_to_read <- if (i < n_chunks) max_chunk_size else (size - offset)
      message("Downloading chunk ", i, " of ", n_chunks, " - offset ",
              offset, " bytes_to_read ", bytes_to_read, " - ", prettyunits::pretty_bytes(bytes_to_read))

      r <- DBI::dbGetQuery(con, "SELECT lo_get($1, $2, $3) AS lo_data",
                           params = list(oid, offset, bytes_to_read))
      writeBin(r$lo_data[[1]], con_out)
    }
    close(con_out)
    return(invisible(outfile))
  }
}
# deletes a LO
delete_oid <- function(con, oid) {
  DBI::dbExecute(con, "SELECT lo_unlink($1);", params = list(oid))
  invisible(TRUE)
}

file.size(outfile)
#> [1] 838860800
file.size(file)
#> [1] 838860800

rlang::hash_file(outfile)
#> [1] "f9bc115120c045115ecdb1784da09984"
rlang::hash_file(file)
#> [1] "f9bc115120c045115ecdb1784da09984"

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.