0

I'm trying to create using BQ API a table from an external source (google sheet).

project_id = 'XXX'
dataset_name= f'{project_id}.brodmann_dev'
table_name = f'{dataset_name}.test2'
from google.cloud import bigquery
from google.cloud.bigquery.table import Table
from google.cloud.bigquery.schema import SchemaField
from google.cloud.bigquery.external_config import GoogleSheetsOptions

source_uri="https://docs.google.com/spreadsheets/d/XXXX"
external_config = bigquery.ExternalConfig(source_format=bigquery.ExternalSourceFormat.GOOGLE_SHEETS)
external_config.source_uri=source_uri
schema = [SchemaField('id', 'STRING', 'NULLABLE', None, (), None)]
table = bigquery.Table(table_name,schema=schema)
table.external_config=external_config
client.delete_table(table, not_found_ok=True) 
client.create_table(table)

The table is created but without any reference to the source uri

1 Answer 1

1
client = bigquery.Client( project=project_id)
project_id = 'XXX'
dataset_id= f'{project_id}.brodmann_dev'
dataset = client.get_dataset(dataset_id)
table_id = "test2"
schema = [
    bigquery.SchemaField("name", "STRING"),
    bigquery.SchemaField("post_abbr", "STRING"),
    bigquery.SchemaField("bad_record", "INTEGER"),
]

table = bigquery.Table(dataset.table(table_id), schema=schema)
client.delete_table(table, not_found_ok=True)
external_config = bigquery.ExternalConfig("GOOGLE_SHEETS")

external_config.source_uris = ['https://docs.google.com/spreadsheets/d/XXX']
external_config.options.skip_leading_rows = 1  
external_config.options.range = ("cat_highway")

# if `schema=None` it works fine.
external_config.autodetect = True   

# if the value is `1` then it throws an error as in the table cell `C3` and `C5` has character. 
external_config.max_bad_records = 2    

# Verify with the csv and json it works (through the error if it's value is False and csv has an extra column).
external_config.ignore_unknown_values = True   
table.external_data_configuration = external_config

table = client.create_table(table)


sql = "SELECT * FROM `{}.{}`".format(dataset_id, table_id)
query_job = client.query(sql)  
Sign up to request clarification or add additional context in comments.

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.