Closed
Description
I want to write custom functions to generate queries. In general usage, I have had no problems setting up connections with only the project and then accessing a table with the dataset name:
conn = dbConnect(bigquery(), "projname")
bq_table = tbl(conn, "dataset.tablename")
I've never had any problems with this in general use, even with cross-source tables.
However, when building up queries from strings, the dataset needs to be in the connection. Reprex below.
library(magrittr)
library(tidyverse)
library(dbplyr)
#>
#> Attaching package: 'dbplyr'
#> The following objects are masked from 'package:dplyr':
#>
#> ident, sql
library(bigrquery)
library(DBI)
library(RPostgres)
library(RPostgreSQL)
bq_deauth()
bq_auth(email="[email protected]")
getCount = function(bq_table)
{
### extract connection
conn = bq_table$src$con
print(conn)
### build sql string
sql_string = paste0(
"SELECT COUNT(*) FROM (",
dbplyr::sql_render(bq_table),
")"
) %>%
str_replace("\n", "")
print(sql_string)
### build query object
sql_query = dbplyr::sql(sql_string)
print(sql_query)
new_bq_table = tbl(conn, sql_query)
}
conn = dbConnect(
bigquery(),
project = "proj1-305622"
)
# df = tibble(
# A = sample(1:10, 5),
# B = letters[1:5]
# )
#
# dbWriteTable(
# conn,
# name = "dataset.df",
# value = df1,
# overwrite = T
# )
df_bq = tbl(conn, "dataset.df")
df_bq %>% getCount()
#> <BigQueryConnection>
#> Billing: proj1-305622
#> [1] "SELECT COUNT(*) FROM (SELECT *FROM `dataset.df`)"
#> <SQL> SELECT COUNT(*) FROM (SELECT *FROM `dataset.df`)
#> Error: dataset is not a string (a length one character vector).
conn = dbConnect(
bigquery(),
project = "proj1-305622",
dataset = "dataset"
)
df_bq = tbl(conn, "dataset.df")
df_bq %>% getCount()
#> <BigQueryConnection>
#> Dataset: proj1-305622.dataset
#> Billing: proj1-305622
#> [1] "SELECT COUNT(*) FROM (SELECT *FROM `dataset.df`)"
#> <SQL> SELECT COUNT(*) FROM (SELECT *FROM `dataset.df`)
Created on 2021-02-23 by the reprex package (v1.0.0)