From e8a77965baf09b057fe950ecc44a343c39e48742 Mon Sep 17 00:00:00 2001 From: Tyler Treat Date: Fri, 23 Jan 2015 19:26:47 -0600 Subject: [PATCH 001/146] Add private_key_file kwarg on get_client This kwarg allows you to pass the key file name and get_client handles reading it. The actual key string can still be passed in using private_key. --- README.md | 5 +++-- bigquery/client.py | 21 ++++++++++++++------ bigquery/tests/test_client.py | 37 ++++++++++++++++++++++++++++++++++- 3 files changed, 54 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index d174e38..bc8c933 100644 --- a/README.md +++ b/README.md @@ -21,9 +21,10 @@ project_id = 'project_id' service_account = 'my_id_123@developer.gserviceaccount.com' # PKCS12 or PEM key provided by Google. -key = 'secret_key' +key = 'key.pem' -client = get_client(project_id, service_account=service_account, private_key=key, readonly=True) +client = get_client(project_id, service_account=service_account, + private_key_file=key, readonly=True) # Submit an async query. job_id, _results = client.query('SELECT * FROM dataset.my_table LIMIT 1000') diff --git a/bigquery/client.py b/bigquery/client.py index 6d67527..66a0213 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -46,7 +46,8 @@ def get_client(project_id, credentials=None, service_account=None, - private_key=None, readonly=True, swallow_results=True): + private_key=None, private_key_file=None, readonly=True, + swallow_results=True): """Return a singleton instance of BigQueryClient. Either AssertionCredentials or a service account and private key combination need to be provided in order to authenticate requests to BigQuery. @@ -58,6 +59,9 @@ def get_client(project_id, credentials=None, service_account=None, service_account: the Google API service account name. private_key: the private key associated with the service account in PKCS12 or PEM format. + private_key_file: the name of the file containing the private key + associated with the service account in PKCS12 or PEM + format. readonly: bool indicating if BigQuery access is read-only. Has no effect if credentials are provided. swallow_results: If set to false then return the actual response value @@ -67,9 +71,13 @@ def get_client(project_id, credentials=None, service_account=None, an instance of BigQueryClient. """ - if not credentials and not (service_account and private_key): - raise Exception('AssertionCredentials or service account and private' - 'key need to be provided') + if not credentials: + assert service_account and (private_key or private_key_file), \ + 'Must provide AssertionCredentials or service account and key' + + if private_key_file: + with open(private_key_file, 'rb') as key_file: + private_key = key_file.read() bq_service = _get_bq_service(credentials=credentials, service_account=service_account, @@ -83,7 +91,8 @@ def _get_bq_service(credentials=None, service_account=None, private_key=None, readonly=True): """Construct an authorized BigQuery service object.""" - assert credentials or (service_account and private_key) + assert credentials or (service_account and private_key), \ + 'Must provide AssertionCredentials or service account and key' if not credentials: scope = BIGQUERY_SCOPE_READ_ONLY if readonly else BIGQUERY_SCOPE @@ -820,7 +829,7 @@ def _get_all_tables(self, dataset_id, cache=False): projectId=self.project_id, datasetId=dataset_id, pageToken=page_token - ).execute() + ).execute() page_token = res.get('nextPageToken') result['tables'] += res.get('tables', []) self.cache[dataset_id] = (datetime.now(), result) diff --git a/bigquery/tests/test_client.py b/bigquery/tests/test_client.py index 6262808..0c65ea3 100644 --- a/bigquery/tests/test_client.py +++ b/bigquery/tests/test_client.py @@ -37,7 +37,7 @@ def setUp(self): def test_no_credentials(self): """Ensure an Exception is raised when no credentials are provided.""" - self.assertRaises(Exception, client.get_client, 'foo', 'bar') + self.assertRaises(AssertionError, client.get_client, 'foo') @mock.patch('bigquery.client._credentials') @mock.patch('bigquery.client.build') @@ -99,6 +99,41 @@ def test_initialize_read_write(self, mock_build, mock_return_cred): self.assertEquals(mock_bq, bq_client.bigquery) self.assertEquals(project_id, bq_client.project_id) + @mock.patch('bigquery.client._credentials') + @mock.patch('bigquery.client.build') + @mock.patch('__builtin__.open') + def test_initialize_key_file(self, mock_open, mock_build, + mock_return_cred): + """Ensure that a BigQueryClient is initialized and returned with + read/write permissions using a private key file. + """ + from bigquery.client import BIGQUERY_SCOPE + + mock_cred = mock.Mock() + mock_http = mock.Mock() + mock_cred.return_value.authorize.return_value = mock_http + mock_bq = mock.Mock() + mock_build.return_value = mock_bq + key_file = 'key.pem' + key = 'key' + mock_open.return_value.__enter__.return_value.read.return_value = key + service_account = 'account' + project_id = 'project' + mock_return_cred.return_value = mock_cred + + bq_client = client.get_client( + project_id, service_account=service_account, + private_key_file=key_file, readonly=False) + + mock_open.assert_called_once_with(key_file, 'rb') + mock_return_cred.assert_called_once_with() + mock_cred.assert_called_once_with(service_account, key, + scope=BIGQUERY_SCOPE) + mock_cred.authorize.assert_called_once() + mock_build.assert_called_once_with('bigquery', 'v2', http=mock_http) + self.assertEquals(mock_bq, bq_client.bigquery) + self.assertEquals(project_id, bq_client.project_id) + class TestQuery(unittest.TestCase): From 0944748139049254924b405e3137172975a7dcd3 Mon Sep 17 00:00:00 2001 From: Tyler Treat Date: Tue, 3 Feb 2015 00:09:40 -0600 Subject: [PATCH 002/146] Fix logging in create_table and delete_table This fixes the logging when an error occurs to make debugging easier. --- bigquery/client.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bigquery/client.py b/bigquery/client.py index 66a0213..68afdc0 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -330,7 +330,7 @@ def create_table(self, dataset, table, schema): except HttpError as e: logging.error(('Cannot create table {0}.{1}\n' 'Http Error: {2}').format(dataset, table, - e.message)) + e.content)) if self.swallow_results: return False else: @@ -362,7 +362,7 @@ def delete_table(self, dataset, table): except HttpError as e: logging.error(('Cannot delete table {0}.{1}\n' 'Http Error: {2}').format(dataset, table, - e.message)) + e.content)) if self.swallow_results: return False else: From f7ca89f481f25bd077a93b9a5484b4715626d786 Mon Sep 17 00:00:00 2001 From: Tyler Treat Date: Tue, 17 Mar 2015 13:28:10 -0500 Subject: [PATCH 003/146] Bump version --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 521b873..5220cfd 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ from setuptools import find_packages from setuptools import setup -VERSION = '0.1.1' +VERSION = '1.0.0' setup_args = dict( name='BigQuery-Python', From 140cce5ab45648c4a47a2517919c9f8f3d2c017e Mon Sep 17 00:00:00 2001 From: ilikedata Date: Tue, 7 Apr 2015 16:31:02 +1000 Subject: [PATCH 004/146] Quoting dictionary keys in schema generation sample --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index bc8c933..0359d37 100644 --- a/README.md +++ b/README.md @@ -231,7 +231,7 @@ client.patch_dataset('mydataset', friendly_name="mon Dataset") # friendly_name c ```python from bigquery import schema_from_record -schema_from_record({id:123, posts: [{id:123, text: "tihs is a post"}], username: "bob"}) +schema_from_record({"id":123, "posts": [{"id":123, "text": "tihs is a post"}], "username": "bob"}) ``` # Caveats From ec33a65b58ae4f0ad23138f47448f4126ae06dc3 Mon Sep 17 00:00:00 2001 From: Anant Date: Wed, 22 Apr 2015 09:52:47 -0600 Subject: [PATCH 005/146] Passing the timestamp parser to describe field --- bigquery/schema_builder.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bigquery/schema_builder.py b/bigquery/schema_builder.py index 371903f..3b063f2 100644 --- a/bigquery/schema_builder.py +++ b/bigquery/schema_builder.py @@ -29,7 +29,8 @@ def schema_from_record(record, timestamp_parser=default_timestamp_parser): Returns: schema: list """ - return [describe_field(k, v) for k, v in record.items()] + return [describe_field(k, v, timestamp_parser=timestamp_parser) + for k, v in record.items()] def describe_field(k, v, timestamp_parser=default_timestamp_parser): From 408f6ec9120ce0dbcd85b12e83ad28d05a62ac5d Mon Sep 17 00:00:00 2001 From: Conrad Dean Date: Sat, 2 May 2015 14:54:47 -0400 Subject: [PATCH 006/146] factor querying and job insertion into shared method --- bigquery/client.py | 110 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 83 insertions(+), 27 deletions(-) diff --git a/bigquery/client.py b/bigquery/client.py index 68afdc0..d5a1a74 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -120,17 +120,22 @@ def __init__(self, bq_service, project_id, swallow_results=True): self.swallow_results = swallow_results self.cache = {} - def query(self, query, max_results=None, timeout=0, dry_run=False): - """Submit a query to BigQuery. + def _submit_query_job(self, query_data): + + """ Submit a query job to BigQuery. + + This is similar to BigQueryClient.query, but gives the user + direct access to the query method on the offical BigQuery + python client. + + For fine-grained control over a query job, see: + https://google-api-client-libraries.appspot.com/documentation/bigquery/v2/python/latest/bigquery_v2.jobs.html#query + + Args: - query: BigQuery query string. - max_results: maximum number of rows to return per page of results. - timeout: how long to wait for the query to complete, in seconds, - before the request times out and returns. - dry_run: if True, the query isn't actually run. A valid query will - return an empty response, while an invalid one will return - the same error message it would if it wasn't a dry run. + query_data: query object as per "configuration.query" in + https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.query Returns: job id and query results if query completed. If dry_run is True, @@ -141,21 +146,15 @@ def query(self, query, max_results=None, timeout=0, dry_run=False): BigQueryTimeoutException on timeout """ - logging.debug('Executing query: %s' % query) + logging.debug('Submitting query job: %s' % query_data) job_collection = self.bigquery.jobs() - query_data = { - 'query': query, - 'timeoutMs': timeout * 1000, - 'dryRun': dry_run, - 'maxResults': max_results, - } try: query_reply = job_collection.query( projectId=self.project_id, body=query_data).execute() except HttpError as e: - if dry_run: + if query_data.get("dryRun", False): return None, json.loads(e.content) raise @@ -166,12 +165,75 @@ def query(self, query, max_results=None, timeout=0, dry_run=False): # raise exceptions if it's not an async query # and job is not completed after timeout - if not job_complete and timeout: + if not job_complete and query_data.get("timeoutMs", False): logging.error('BigQuery job %s timeout' % job_id) raise BigQueryTimeoutException() return job_id, [self._transform_row(row, schema) for row in rows] + def _insert_job(self, body_object): + + """ Submit a job to BigQuery + + Direct proxy to the insert() method of the offical BigQuery + python client. + + Able to submit load, link, query, copy, or extract jobs. + + For more details, see: + https://google-api-client-libraries.appspot.com/documentation/bigquery/v2/python/latest/bigquery_v2.jobs.html#insert + + + Args: + body_object: body object passed to bigquery.jobs().insert() + + Returns: + response of the bigquery.jobs().insert().execute() call + + Raises: + BigQueryTimeoutException on timeout + """ + + logging.debug('Submitting job: %s' % body_object) + + job_collection = self.bigquery.jobs() + + return job_collection.insert( + projectId=self.project_id, + body=body_object + ).execute() + + def query(self, query, max_results=None, timeout=0, dry_run=False): + """Submit a query to BigQuery. + + Args: + query: BigQuery query string. + max_results: maximum number of rows to return per page of results. + timeout: how long to wait for the query to complete, in seconds, + before the request times out and returns. + dry_run: if True, the query isn't actually run. A valid query will + return an empty response, while an invalid one will return + the same error message it would if it wasn't a dry run. + + Returns: + job id and query results if query completed. If dry_run is True, + job id will be None and results will be empty if the query is valid + or a dict containing the response if invalid. + + Raises: + BigQueryTimeoutException on timeout + """ + + logging.debug('Executing query: %s' % query) + + query_data = { + 'query': query, + 'timeoutMs': timeout * 1000, + 'dryRun': dry_run, + 'maxResults': max_results, + } + return self._submit_query_job(query_data) + def get_query_schema(self, job_id): """Retrieve the schema of a query by job id. @@ -534,9 +596,7 @@ def import_data_from_uris( } logging.debug("Creating load job %s" % body) - job_resource = self.bigquery.jobs() \ - .insert(projectId=self.project_id, body=body) \ - .execute() + job_resource = self._insert_job(body) self._raise_insert_exception_if_error(job_resource) return job_resource @@ -620,9 +680,7 @@ def export_data_to_uris( } logging.info("Creating export job %s" % body) - job_resource = self.bigquery.jobs() \ - .insert(projectId=self.project_id, body=body) \ - .execute() + job_resource = self._insert_job(body) self._raise_insert_exception_if_error(job_resource) return job_resource @@ -696,9 +754,7 @@ def write_to_table( } logging.info("Creating write to table job %s" % body) - job_resource = self.bigquery.jobs() \ - .insert(projectId=self.project_id, body=body) \ - .execute() + job_resource = self._insert_job(body) self._raise_insert_exception_if_error(job_resource) return job_resource From 745d67ec1b29f448f6a301e3f5ec146d64a8db5b Mon Sep 17 00:00:00 2001 From: Dylan Roy Date: Thu, 7 May 2015 17:09:13 -0500 Subject: [PATCH 007/146] Added a create view function to client. --- bigquery/client.py | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/bigquery/client.py b/bigquery/client.py index d5a1a74..2d618c3 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -398,6 +398,46 @@ def create_table(self, dataset, table, schema): else: return {} + def create_view(self, dataset, view, query): + """Create a new view in the dataset. + + Args: + dataset: the dataset to create the view in. + view: the name of view to create. + query: a query that BigQuery executes when the view is referenced. + + Returns: + bool indicating if the view was successfully created or not, + or response from BigQuery if swallow_results is set for False. + """ + + body = { + 'tableReference': { + 'tableId': view, + 'projectId': self.project_id, + 'datasetId': dataset + }, + 'view': { + 'query': query + } + } + + try: + view = self.bigquery.tables().insert( + projectId=self.project_id, + datasetId=dataset, + body=body + ).execute() + if self.swallow_results: + return True + else: + return view + + except HttpError as e: + logging.error(('Cannot create view {0}.{1}\n' + 'Http Error: {2}').format(dataset, view, + e.content)) + def delete_table(self, dataset, table): """Delete a table from the dataset. From 4ba0c49907b4fd01fb2cf0df190a6265a74b898a Mon Sep 17 00:00:00 2001 From: royd Date: Fri, 8 May 2015 10:32:59 -0500 Subject: [PATCH 008/146] added tests for create view --- bigquery/tests/test_client.py | 71 +++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/bigquery/tests/test_client.py b/bigquery/tests/test_client.py index 0c65ea3..5f00563 100644 --- a/bigquery/tests/test_client.py +++ b/bigquery/tests/test_client.py @@ -1424,6 +1424,77 @@ def test_table_create_success(self): self.mock_tables.insert.return_value.execute.assert_called_with() +class TestCreateView(unittest.TestCase): + + def setUp(self): + self.mock_bq_service = mock.Mock() + self.mock_tables = mock.Mock() + self.mock_bq_service.tables.return_value = self.mock_tables + self.table = 'table' + self.project = 'project' + self.dataset = 'dataset' + self.query = 'SELECT "foo" bar' + self.client = client.BigQueryClient(self.mock_bq_service, self.project) + self.body = { + 'view': {'query': self.query}, + 'tableReference': { + 'tableId': self.table, 'projectId': self.project, + 'datasetId': self.dataset} + } + + def test_view_create_failed(self): + """Ensure that if creating the table fails, False is returned, + or if swallow_results is False an empty dict is returned.""" + + self.mock_tables.insert.return_value.execute.side_effect = ( + HttpError(HttpResponse(404), 'There was an error')) + + actual = self.client.create_view(self.dataset, self.table, + self.query) + + self.assertFalse(actual) + + self.client.swallow_results = False + + actual = self.client.create_view(self.dataset, self.table, + self.query) + + self.assertEqual(actual, {}) + + self.client.swallow_results = True + + self.mock_tables.insert.assert_called_with( + projectId=self.project, datasetId=self.dataset, body=self.body) + + self.mock_tables.insert.return_value.execute.assert_called_with() + + def test_view_create_success(self): + """Ensure that if creating the table succeeds, True is returned, + or if swallow_results is False the actual response is returned.""" + + self.mock_tables.insert.return_value.execute.side_effect = [ + {'status': 'bar'}] + + actual = self.client.create_view(self.dataset, self.table, + self.query) + + self.assertTrue(actual) + + self.client.swallow_results = False + + actual = self.client.create_view(self.dataset, self.table, + self.query) + + self.assertEqual(actual, {'status': 'bar'}) + + self.client.swallow_results = True + + self.mock_tables.insert.assert_called_with( + projectId=self.project, datasetId=self.dataset, body=self.body) + + self.mock_tables.insert.return_value.execute.assert_called_with() + + class TestDeleteTable(unittest.TestCase): def setUp(self): From 92e99af036c822555de85c8d863a1b50b517c864 Mon Sep 17 00:00:00 2001 From: royd Date: Fri, 8 May 2015 10:59:21 -0500 Subject: [PATCH 009/146] updated on HttpError behavior for create view --- bigquery/client.py | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/bigquery/client.py b/bigquery/client.py index 2d618c3..2e27c6e 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -400,28 +400,28 @@ def create_table(self, dataset, table, schema): def create_view(self, dataset, view, query): """Create a new view in the dataset. - + Args: dataset: the dataset to create the view in. view: the name of view to create. query: a query that BigQuery executes when the view is referenced. - + Returns: bool indicating if the view was successfully created or not, or response from BigQuery if swallow_results is set for False. """ - + body = { - 'tableReference': { - 'tableId': view, - 'projectId': self.project_id, - 'datasetId': dataset - }, + 'tableReference': { + 'tableId': view, + 'projectId': self.project_id, + 'datasetId': dataset + }, 'view': { 'query': query } } - + try: view = self.bigquery.tables().insert( projectId=self.project_id, @@ -432,11 +432,15 @@ def create_view(self, dataset, view, query): return True else: return view - + except HttpError as e: logging.error(('Cannot create view {0}.{1}\n' 'Http Error: {2}').format(dataset, view, e.content)) + if self.swallow_results: + return False + else: + return {} def delete_table(self, dataset, table): """Delete a table from the dataset. From db607ebf3769b488d49961a646351bdf70041abc Mon Sep 17 00:00:00 2001 From: royd Date: Fri, 8 May 2015 11:21:46 -0500 Subject: [PATCH 010/146] mimicing table structure for other tests in view tests --- bigquery/tests/test_client.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bigquery/tests/test_client.py b/bigquery/tests/test_client.py index 5f00563..30a8178 100644 --- a/bigquery/tests/test_client.py +++ b/bigquery/tests/test_client.py @@ -1433,7 +1433,7 @@ def setUp(self): self.table = 'table' self.project = 'project' self.dataset = 'dataset' - self.query = 'SELECT "foo" bar' + self.query = 'SELECT "bar" foo, "foo" bar' self.client = client.BigQueryClient(self.mock_bq_service, self.project) self.body = { 'view': {'query': self.query}, @@ -1472,8 +1472,8 @@ def test_view_create_success(self): """Ensure that if creating the table succeeds, True is returned, or if swallow_results is False the actual response is returned.""" - self.mock_tables.insert.return_value.execute.side_effect = [ - {'status': 'bar'}] + self.mock_tables.insert.return_value.execute.side_effect = [{ + 'status': 'foo'}, {'status': 'bar'}] actual = self.client.create_view(self.dataset, self.table, self.query) From 079819035c0137ff7962c5cdada364fe7f3c0dbb Mon Sep 17 00:00:00 2001 From: Tyler Treat Date: Mon, 11 May 2015 18:03:54 -0500 Subject: [PATCH 011/146] Bump version --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 5220cfd..52d088d 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ from setuptools import find_packages from setuptools import setup -VERSION = '1.0.0' +VERSION = '1.1.0' setup_args = dict( name='BigQuery-Python', @@ -16,7 +16,7 @@ author='Tyler Treat', author_email='ttreat31@gmail.com', classifiers=[ - 'Development Status :: 4 - Beta', + 'Development Status :: 5 - Production/Stable', 'Environment :: Web Environment', 'Intended Audience :: Developers', 'Operating System :: OS Independent', From b787883a699d02854f46bbee296461978ac45e29 Mon Sep 17 00:00:00 2001 From: Tyler Treat Date: Thu, 21 May 2015 16:12:39 +0600 Subject: [PATCH 012/146] Bump version --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 52d088d..5b59ac5 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ from setuptools import find_packages from setuptools import setup -VERSION = '1.1.0' +VERSION = '1.1.1' setup_args = dict( name='BigQuery-Python', From 79df2b5eeaf2809015e5fa74bafbe563dc7587b2 Mon Sep 17 00:00:00 2001 From: pirsquare Date: Sat, 6 Jun 2015 23:37:00 +0800 Subject: [PATCH 013/146] Join query results returning multiple pages --- bigquery/client.py | 52 ++++++++++---------- bigquery/tests/test_client.py | 89 +++++++++++++++++++++++++++++++---- 2 files changed, 105 insertions(+), 36 deletions(-) diff --git a/bigquery/client.py b/bigquery/client.py index 2e27c6e..ec464af 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -121,7 +121,6 @@ def __init__(self, bq_service, project_id, swallow_results=True): self.cache = {} def _submit_query_job(self, query_data): - """ Submit a query job to BigQuery. This is similar to BigQueryClient.query, but gives the user @@ -172,7 +171,6 @@ def _submit_query_job(self, query_data): return job_id, [self._transform_row(row, schema) for row in rows] def _insert_job(self, body_object): - """ Submit a job to BigQuery Direct proxy to the insert() method of the offical BigQuery @@ -243,9 +241,7 @@ def get_query_schema(self, job_id): A list of dictionaries that represent the schema. """ - job_collection = self.bigquery.jobs() - query_reply = self._get_query_results( - job_collection, self.project_id, job_id, offset=0, limit=0) + query_reply = self.get_query_results(job_id, offset=0, limit=0) if not query_reply['jobComplete']: logging.warning('BigQuery job %s not complete' % job_id) @@ -289,38 +285,41 @@ def check_job(self, job_id): included in the query table if it has completed. """ - job_collection = self.bigquery.jobs() - query_reply = self._get_query_results( - job_collection, self.project_id, job_id, offset=0, limit=0) + query_reply = self.get_query_results(job_id, offset=0, limit=0) return (query_reply.get('jobComplete', False), int(query_reply.get('totalRows', 0))) - def get_query_rows(self, job_id, offset=None, limit=None): + def get_query_rows(self, job_id, offset=None, limit=None, timeout=0): """Retrieve a list of rows from a query table by job id. - Args: job_id: The job id that references a BigQuery query. offset: The offset of the rows to pull from BigQuery. limit: The number of rows to retrieve from a query table. - + timeout: Timeout in seconds. Returns: A list of dictionaries that represent table rows. """ - job_collection = self.bigquery.jobs() - query_reply = self._get_query_results( - job_collection, self.project_id, job_id, offset=offset, - limit=limit) - + # Get query results + query_reply = self.get_query_results(job_id, offset=offset, limit=limit, timeout=timeout) if not query_reply['jobComplete']: logging.warning('BigQuery job %s not complete' % job_id) raise UnfinishedQueryException() - schema = query_reply['schema']['fields'] + schema = query_reply["schema"]["fields"] rows = query_reply.get('rows', []) - - return [self._transform_row(row, schema) for row in rows] + page_token = query_reply.get("pageToken") + records = [self._transform_row(row, schema) for row in rows] + + # Append to records if there are multiple pages for query results + while page_token: + query_reply = self.get_query_results(job_id, offset=offset, limit=limit, + page_token=page_token, timeout=timeout) + page_token = query_reply.get("pageToken") + rows = query_reply.get('rows', []) + records += [self._transform_row(row, schema) for row in rows] + return records def check_table(self, dataset, table): """Check to see if a table exists. @@ -1039,27 +1038,26 @@ def _in_range(self, start_time, end_time, time): time <= start_time <= time + ONE_MONTH or \ time <= end_time <= time + ONE_MONTH - def _get_query_results(self, job_collection, project_id, job_id, - offset=None, limit=None): + def get_query_results(self, job_id, offset=None, limit=None, page_token=None, timeout=0): """Execute the query job indicated by the given job id. - Args: - job_collection: The collection the job belongs to. - project_id: The project id of the table. job_id: The job id of the query to check. offset: The index the result set should start at. limit: The maximum number of results to retrieve. - + page_token: Page token, returned by a previous call, to request the next page of results. + timeout: Timeout in seconds. Returns: The query reply. """ + job_collection = self.bigquery.jobs() return job_collection.getQueryResults( - projectId=project_id, + projectId=self.project_id, jobId=job_id, startIndex=offset, maxResults=limit, - timeoutMs=0).execute() + pageToken=page_token, + timeoutMs=timeout * 1000).execute() def _transform_row(self, row, schema): """Apply the given schema to the given BigQuery data row. diff --git a/bigquery/tests/test_client.py b/bigquery/tests/test_client.py index 30a8178..b09cba4 100644 --- a/bigquery/tests/test_client.py +++ b/bigquery/tests/test_client.py @@ -358,7 +358,6 @@ def test_get_response(self): """Ensure that the query is executed and the query reply is returned. """ - project_id = 'foo' job_id = 'bar' mock_query_job = mock.Mock() @@ -368,14 +367,15 @@ def test_get_response(self): offset = 5 limit = 10 + page_token = "token" + timeout = 1 - actual = self.client._get_query_results(self.mock_job_collection, - project_id, job_id, - offset, limit) + actual = self.client.get_query_results(job_id, offset, limit, page_token, timeout) self.mock_job_collection.getQueryResults.assert_called_once_with( - timeoutMs=0, projectId=project_id, jobId=job_id, - startIndex=offset, maxResults=limit) + projectId=self.project_id, jobId=job_id, startIndex=offset, + maxResults=limit, pageToken=page_token, timeoutMs=1000) + mock_query_job.execute.assert_called_once() self.assertEquals(actual, mock_query_reply) @@ -458,7 +458,7 @@ def test_transform_row_with_nested_repeated(self): self.assertEquals(actual, expected) -@mock.patch('bigquery.client.BigQueryClient._get_query_results') +@mock.patch('bigquery.client.BigQueryClient.get_query_results') class TestCheckJob(unittest.TestCase): def setUp(self): @@ -1175,7 +1175,7 @@ def test_not_inside_range(self): } -@mock.patch('bigquery.client.BigQueryClient._get_query_results') +@mock.patch('bigquery.client.BigQueryClient.get_query_results') class TestGetQuerySchema(unittest.TestCase): def test_query_complete(self, get_query_mock): @@ -1251,7 +1251,7 @@ def test_table_does_not_exist(self): self.mock_tables.get.return_value.execute.assert_called_once_with() -@mock.patch('bigquery.client.BigQueryClient._get_query_results') +@mock.patch('bigquery.client.BigQueryClient.get_query_results') class TestGetQueryRows(unittest.TestCase): def test_query_complete(self, get_query_mock): @@ -1281,6 +1281,77 @@ def test_query_complete(self, get_query_mock): {'foo': 'abc', 'spider': 'xyz'}] self.assertEquals(result_rows, expected_rows) + def test_query_complete_with_page_token(self, get_query_mock): + """Ensure that get_query_rows works with page token.""" + from bigquery.client import BigQueryClient + + page_one_resp = { + "jobComplete": True, + "kind": "bigquery#getQueryResultsResponse", + "pageToken": "TOKEN_TO_PAGE_2", + "schema": { + "fields": [{ + "name": "first_name", + "type": "STRING", + }, { + "name": "last_name", + "type": "STRING", + }] + }, + "rows": [{ + "f": [{ + "v": "foo", + }, { + "v": "bar" + }] + }, { + "f": [{ + "v": "abc", + }, { + "v": "xyz" + }] + }], + "totalRows": "4" + } + + page_two_resp = { + "jobComplete": True, + "kind": "bigquery#getQueryResultsResponse", + "schema": { + "fields": [{ + "name": "first_name", + "type": "STRING", + }, { + "name": "last_name", + "type": "STRING", + }] + }, + "rows": [{ + "f": [{ + "v": "the", + }, { + "v": "beatles" + }] + }, { + "f": [{ + "v": "monty", + }, { + "v": "python" + }] + }], + "totalRows": "4" + } + + bq = BigQueryClient(mock.Mock(), 'project') + get_query_mock.side_effect = [page_one_resp, page_two_resp] + result_rows = bq.get_query_rows(job_id=123, offset=0, limit=0) + + expected_rows = [{'first_name': 'foo', 'last_name': 'bar'}, + {'first_name': 'abc', 'last_name': 'xyz'}, + {'first_name': 'the', 'last_name': 'beatles'}, + {'first_name': 'monty', 'last_name': 'python'}] + self.assertEquals(result_rows, expected_rows) + def test_query_incomplete(self, get_query_mock): """Ensure that get_query_rows handles scenarios where the query is not finished. From 606330e5c3161aa4d1db522a7e65cbc95bce5e64 Mon Sep 17 00:00:00 2001 From: pirsquare Date: Sat, 6 Jun 2015 23:38:26 +0800 Subject: [PATCH 014/146] Add dataset helper methods --- bigquery/client.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/bigquery/client.py b/bigquery/client.py index ec464af..89f02a1 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -321,6 +321,33 @@ def get_query_rows(self, job_id, offset=None, limit=None, timeout=0): records += [self._transform_row(row, schema) for row in rows] return records + def check_dataset(self, dataset_id): + """Check to see if a dataset exists. + Args: + dataset: dataset unique id + Returns: + bool indicating if the table exists. + """ + dataset = self.get_dataset(dataset_id) + return bool(dataset) + + def get_dataset(self, dataset_id): + """ + Retrieve a dataset if it exists, otherwise return an empty dict. + Args: + dataset: dataset unique id + Returns: + dictionary containing the dataset object if it exists, otherwise + an empty dictionary + """ + try: + dataset = self.bigquery.datasets().get( + projectId=self.project_id, datasetId=dataset_id).execute() + except HttpError: + dataset = {} + + return dataset + def check_table(self, dataset, table): """Check to see if a table exists. From 1dfbf6a119fc5227e73d8f9686f4c7bb9ddca55c Mon Sep 17 00:00:00 2001 From: pirsquare Date: Sat, 6 Jun 2015 23:43:10 +0800 Subject: [PATCH 015/146] Add example on checking dataset --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index 0359d37..62f2556 100644 --- a/README.md +++ b/README.md @@ -225,6 +225,9 @@ client.update_dataset('mydataset', friendly_name="mon Dataset") # description is # Patch dataset client.patch_dataset('mydataset', friendly_name="mon Dataset") # friendly_name changed; description is preserved + +# Check if dataset exists. +exists = client.check_dataset('mydataset') ``` # Creating a schema from a sample record From ca408f84c5d327812032c605f950c1f89724912c Mon Sep 17 00:00:00 2001 From: pirsquare Date: Sun, 7 Jun 2015 05:19:39 +0800 Subject: [PATCH 016/146] Update docstrings for `get_query_rows` and `get_query_results` --- bigquery/client.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/bigquery/client.py b/bigquery/client.py index 89f02a1..331940f 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -292,6 +292,10 @@ def check_job(self, job_id): def get_query_rows(self, job_id, offset=None, limit=None, timeout=0): """Retrieve a list of rows from a query table by job id. + This method will append results from multiple pages together. If you want + to manually page through results, you can use `get_query_results` + method directly. + Args: job_id: The job id that references a BigQuery query. offset: The offset of the rows to pull from BigQuery. @@ -1066,7 +1070,9 @@ def _in_range(self, start_time, end_time, time): time <= end_time <= time + ONE_MONTH def get_query_results(self, job_id, offset=None, limit=None, page_token=None, timeout=0): - """Execute the query job indicated by the given job id. + """Execute the query job indicated by the given job id. This is direct mapping to + bigquery api https://cloud.google.com/bigquery/docs/reference/v2/jobs/getQueryResults + Args: job_id: The job id of the query to check. offset: The index the result set should start at. From d0cd954ff1971280dc308b79e0ac01f68a75ec0c Mon Sep 17 00:00:00 2001 From: Tyler Treat Date: Sat, 30 May 2015 11:17:26 -0600 Subject: [PATCH 017/146] Bump minor version --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 5b59ac5..6a91a69 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ from setuptools import find_packages from setuptools import setup -VERSION = '1.1.1' +VERSION = '1.2.0' setup_args = dict( name='BigQuery-Python', From 93c1c3570c21d9500aa2b44c19f35e48259818dc Mon Sep 17 00:00:00 2001 From: scribu Date: Wed, 1 Jul 2015 17:29:18 +0300 Subject: [PATCH 018/146] pass timestamp_parser to schema_from_record --- bigquery/schema_builder.py | 2 +- bigquery/tests/test_schema_builder.py | 27 +++++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/bigquery/schema_builder.py b/bigquery/schema_builder.py index 3b063f2..195fc3d 100644 --- a/bigquery/schema_builder.py +++ b/bigquery/schema_builder.py @@ -75,7 +75,7 @@ def bq_schema_field(name, bq_type, mode): field = bq_schema_field(k, bq_type, mode) if bq_type == "record": try: - field['fields'] = schema_from_record(v) + field['fields'] = schema_from_record(v, timestamp_parser) except InvalidTypeException, e: # recursively construct the key causing the error raise InvalidTypeException("%s.%s" % (k, e.key), e.value) diff --git a/bigquery/tests/test_schema_builder.py b/bigquery/tests/test_schema_builder.py index 2207060..eef1298 100644 --- a/bigquery/tests/test_schema_builder.py +++ b/bigquery/tests/test_schema_builder.py @@ -90,6 +90,33 @@ def test_hierarchical_record(self): self.assertItemsEqual(schema_from_record(record), schema) + def test_hierarchical_record_with_timestamps(self): + record = {"global": "2001-01-01", "user": {"local": "2001-01-01"}} + + schema_with_ts = [ + {"name": "global", "type": "timestamp", "mode": "nullable"}, + {"name": "user", "type": "record", "mode": "nullable", + "fields": [{ + "name": "local", + "type": "timestamp", + "mode": "nullable"}]}] + + schema_without_ts = [ + {"name": "global", "type": "string", "mode": "nullable"}, + {"name": "user", "type": "record", "mode": "nullable", + "fields": [{ + "name": "local", + "type": "string", + "mode": "nullable"}]}] + + self.assertItemsEqual( + schema_from_record(record), + schema_with_ts) + + self.assertItemsEqual( + schema_from_record(record, timestamp_parser=lambda x: False), + schema_without_ts) + def test_repeated_field(self): record = {"ids": [1, 2, 3, 4, 5]} schema = [{"name": "ids", "type": "integer", "mode": "repeated"}] From 7216deb6b198f39eb89d315fda9f0869d43778b9 Mon Sep 17 00:00:00 2001 From: Tobias Macey Date: Tue, 7 Jul 2015 15:45:41 -0400 Subject: [PATCH 019/146] Updated library and unit tests to run on Python 2 and Python 3 --- bigquery/__init__.py | 7 +- bigquery/client.py | 10 +- bigquery/query_builder.py | 5 +- bigquery/schema_builder.py | 10 +- bigquery/tests/test_client.py | 46 ++++---- bigquery/tests/test_query_builder.py | 163 +++++++++++++++++++++----- bigquery/tests/test_schema_builder.py | 69 +++++------ setup.py | 6 +- 8 files changed, 218 insertions(+), 98 deletions(-) diff --git a/bigquery/__init__.py b/bigquery/__init__.py index 2ae326f..ef22544 100644 --- a/bigquery/__init__.py +++ b/bigquery/__init__.py @@ -1,5 +1,6 @@ -from client import get_client -from client import ( +from __future__ import absolute_import +from .client import get_client +from .client import ( BIGQUERY_SCOPE, BIGQUERY_SCOPE_READ_ONLY, JOB_CREATE_IF_NEEDED, @@ -14,4 +15,4 @@ JOB_ENCODING_ISO_8859_1 ) -from schema_builder import schema_from_record +from .schema_builder import schema_from_record diff --git a/bigquery/client.py b/bigquery/client.py index 331940f..9450a10 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -154,7 +154,7 @@ def _submit_query_job(self, query_data): projectId=self.project_id, body=query_data).execute() except HttpError as e: if query_data.get("dryRun", False): - return None, json.loads(e.content) + return None, json.loads(e.content.decode('utf8')) raise job_id = query_reply['jobReference'].get('jobId') @@ -266,7 +266,7 @@ def get_table_schema(self, dataset, table): projectId=self.project_id, tableId=table, datasetId=dataset).execute() - except HttpError, e: + except HttpError as e: if int(e.resp['status']) == 404: logging.warn('Table %s.%s does not exist', dataset, table) return None @@ -651,7 +651,7 @@ def import_data_from_uris( skip_leading_rows=skip_leading_rows, quote=quote) non_null_values = dict((k, v) for k, v - in all_values.items() + in list(all_values.items()) if v) raise Exception("Parameters field_delimiter, allow_jagged_rows, " "allow_quoted_newlines, quote and " @@ -1048,7 +1048,7 @@ def _filter_tables_by_time(self, tables, start_time, end_time): A list of table names that are inside the time range. """ - return [table_name for (table_name, unix_seconds) in tables.iteritems() + return [table_name for (table_name, unix_seconds) in tables.items() if self._in_range(start_time, end_time, unix_seconds)] def _in_range(self, start_time, end_time, time): @@ -1167,7 +1167,7 @@ def _generate_hex_for_uris(self, uris): Returns: string of hexed uris """ - return sha256(":".join(uris) + str(time())).hexdigest() + return sha256((":".join(uris) + str(time())).encode()).hexdigest() def _raise_insert_exception_if_error(self, job): error_http = job.get('error') diff --git a/bigquery/query_builder.py b/bigquery/query_builder.py index 942f78e..1cfa72a 100644 --- a/bigquery/query_builder.py +++ b/bigquery/query_builder.py @@ -77,7 +77,7 @@ def _render_select(selections): return 'SELECT *' rendered_selections = [] - for name, options in selections.iteritems(): + for name, options in selections.items(): if not isinstance(options, list): options = [options] @@ -200,7 +200,8 @@ def _render_condition(field, field_type, comparators): if condition == "IN": if isinstance(value, (list, tuple, set)): value = ', '.join( - [_render_condition_value(v, field_type) for v in value] + sorted([_render_condition_value(v, field_type) + for v in value]) ) else: value = _render_condition_value(value, field_type) diff --git a/bigquery/schema_builder.py b/bigquery/schema_builder.py index 195fc3d..09084a7 100644 --- a/bigquery/schema_builder.py +++ b/bigquery/schema_builder.py @@ -1,10 +1,12 @@ +from __future__ import absolute_import __author__ = 'Aneil Mallavarapu (http://github.com/aneilbaboo)' from datetime import datetime +import six import dateutil.parser -from errors import InvalidTypeException +from .errors import InvalidTypeException def default_timestamp_parser(s): @@ -30,7 +32,7 @@ def schema_from_record(record, timestamp_parser=default_timestamp_parser): schema: list """ return [describe_field(k, v, timestamp_parser=timestamp_parser) - for k, v in record.items()] + for k, v in list(record.items())] def describe_field(k, v, timestamp_parser=default_timestamp_parser): @@ -76,7 +78,7 @@ def bq_schema_field(name, bq_type, mode): if bq_type == "record": try: field['fields'] = schema_from_record(v, timestamp_parser) - except InvalidTypeException, e: + except InvalidTypeException as e: # recursively construct the key causing the error raise InvalidTypeException("%s.%s" % (k, e.key), e.value) @@ -100,7 +102,7 @@ def bigquery_type(o, timestamp_parser=default_timestamp_parser): t = type(o) if t == int: return "integer" - elif t == str or t == unicode: + elif (t == six.binary_type and six.PY2) or t == six.text_type: if timestamp_parser and timestamp_parser(o): return "timestamp" else: diff --git a/bigquery/tests/test_client.py b/bigquery/tests/test_client.py index b09cba4..67c6a55 100644 --- a/bigquery/tests/test_client.py +++ b/bigquery/tests/test_client.py @@ -1,6 +1,7 @@ import unittest import mock +import six from nose.tools import raises from apiclient.errors import HttpError @@ -101,7 +102,7 @@ def test_initialize_read_write(self, mock_build, mock_return_cred): @mock.patch('bigquery.client._credentials') @mock.patch('bigquery.client.build') - @mock.patch('__builtin__.open') + @mock.patch('__builtin__.open' if six.PY2 else 'builtins.open') def test_initialize_key_file(self, mock_open, mock_build, mock_return_cred): """Ensure that a BigQueryClient is initialized and returned with @@ -295,7 +296,7 @@ def test_query_dry_run_invalid(self): mock_query_job = mock.Mock() mock_query_job.execute.side_effect = HttpError( - 'crap', '{"message": "Bad query"}') + 'crap', '{"message": "Bad query"}'.encode('utf8')) self.mock_job_collection.query.return_value = mock_query_job @@ -370,7 +371,8 @@ def test_get_response(self): page_token = "token" timeout = 1 - actual = self.client.get_query_results(job_id, offset, limit, page_token, timeout) + actual = self.client.get_query_results(job_id, offset, limit, + page_token, timeout) self.mock_job_collection.getQueryResults.assert_called_once_with( projectId=self.project_id, jobId=job_id, startIndex=offset, @@ -1042,8 +1044,9 @@ def test_multi_inside_range(self): }, 1370002000, 1370000000) self.assertEqual( - ['Daenerys Targaryen', 'William Shatner', 'Gordon Freeman'], - tables + sorted( + ['Daenerys Targaryen', 'William Shatner', 'Gordon Freeman']), + sorted(tables) ) def test_not_inside_range(self): @@ -1242,7 +1245,7 @@ def test_table_exists(self): def test_table_does_not_exist(self): """Ensure that None is returned if the table doesn't exist.""" self.mock_tables.get.return_value.execute.side_effect = \ - HttpError({'status': "404"}, '{}') + HttpError({'status': "404"}, '{}'.encode('utf8')) self.assertIsNone( self.client.get_table_schema(self.dataset, self.table)) @@ -1394,7 +1397,7 @@ def test_table_does_not_exist(self): """Ensure that if the table does not exist, False is returned.""" self.mock_tables.get.return_value.execute.side_effect = ( - HttpError(HttpResponse(404), 'There was an error')) + HttpError(HttpResponse(404), 'There was an error'.encode('utf8'))) actual = self.client.check_table(self.dataset, self.table) @@ -1447,7 +1450,7 @@ def test_table_create_failed(self): or if swallow_results is False an empty dict is returned.""" self.mock_tables.insert.return_value.execute.side_effect = ( - HttpError(HttpResponse(404), 'There was an error')) + HttpError(HttpResponse(404), 'There was an error'.encode('utf8'))) actual = self.client.create_table(self.dataset, self.table, self.schema) @@ -1518,7 +1521,7 @@ def test_view_create_failed(self): or if swallow_results is False an empty dict is returned.""" self.mock_tables.insert.return_value.execute.side_effect = ( - HttpError(HttpResponse(404), 'There was an error')) + HttpError(HttpResponse(404), 'There was an error'.encode('utf8'))) actual = self.client.create_view(self.dataset, self.table, self.query) @@ -1582,7 +1585,7 @@ def test_delete_table_fail(self): or the actual response is swallow_results is False.""" self.mock_tables.delete.return_value.execute.side_effect = ( - HttpError(HttpResponse(404), 'There was an error')) + HttpError(HttpResponse(404), 'There was an error'.encode('utf8'))) actual = self.client.delete_table(self.dataset, self.table) @@ -1784,7 +1787,7 @@ def test_push_failed_swallow_results_false(self): def test_push_exception(self): """Ensure that if insertAll raises an exception, False is returned.""" - e = HttpError(HttpResponse(404), 'There was an error') + e = HttpError(HttpResponse(404), 'There was an error'.encode('utf8')) self.mock_table_data.insertAll.return_value.execute.side_effect = e actual = self.client.push_rows(self.dataset, self.table, self.rows, @@ -1973,7 +1976,7 @@ def test_get_tables(self): bq = client.BigQueryClient(mock_bq_service, 'project') tables = bq.get_tables('dataset', 'appspot-1', 0, 10000000000) - self.assertItemsEqual(tables, ['2013_06_appspot_1']) + six.assertCountEqual(self, tables, ['2013_06_appspot_1']) def test_get_tables_from_datetimes(self): """Ensure tables falling in the time window, specified with datetimes, @@ -1996,7 +1999,7 @@ def test_get_tables_from_datetimes(self): end = datetime(2013, 7, 10) tables = bq.get_tables('dataset', 'appspot-1', start, end) - self.assertItemsEqual(tables, ['2013_06_appspot_1']) + six.assertCountEqual(self, tables, ['2013_06_appspot_1']) # @@ -2027,7 +2030,7 @@ def test_dataset_create_failed(self): """Ensure that if creating the table fails, False is returned.""" self.mock_datasets.insert.return_value.execute.side_effect = \ - HttpError(HttpResponse(404), 'There was an error') + HttpError(HttpResponse(404), 'There was an error'.encode('utf8')) actual = self.client.create_dataset(self.dataset, friendly_name=self.friendly_name, @@ -2096,7 +2099,7 @@ def test_delete_datasets_fail(self): """Ensure that if deleting table fails, False is returned.""" self.mock_datasets.delete.return_value.execute.side_effect = \ - HttpError(HttpResponse(404), 'There was an error') + HttpError(HttpResponse(404), 'There was an error'.encode('utf8')) actual = self.client.delete_dataset(self.dataset) @@ -2254,7 +2257,8 @@ def test_get_datasets(self): bq = client.BigQueryClient(mock_bq_service, 'project') datasets = bq.get_datasets() - self.assertItemsEqual(datasets, FULL_DATASET_LIST_RESPONSE['datasets']) + six.assertCountEqual(self, datasets, + FULL_DATASET_LIST_RESPONSE['datasets']) def test_get_datasets_returns_no_list(self): """Ensure we handle the no datasets case""" @@ -2273,7 +2277,7 @@ def test_get_datasets_returns_no_list(self): bq = client.BigQueryClient(mock_bq_service, 'project') datasets = bq.get_datasets() - self.assertItemsEqual(datasets, []) + six.assertCountEqual(self, datasets, []) class TestUpdateDataset(unittest.TestCase): @@ -2301,12 +2305,12 @@ def test_dataset_update_failed(self): """Ensure that if creating the table fails, False is returned.""" self.mock_datasets.update.return_value.execute.side_effect = \ - HttpError(HttpResponse(404), 'There was an error') + HttpError(HttpResponse(404), 'There was an error'.encode('utf8')) actual = self.client.update_dataset(self.dataset, - friendly_name=self.friendly_name, - description=self.description, - access=self.access) + friendly_name=self.friendly_name, + description=self.description, + access=self.access) self.assertFalse(actual) self.client.swallow_results = False diff --git a/bigquery/tests/test_query_builder.py b/bigquery/tests/test_query_builder.py index b2e2de1..8591c6b 100644 --- a/bigquery/tests/test_query_builder.py +++ b/bigquery/tests/test_query_builder.py @@ -1,5 +1,8 @@ +import six import unittest +unittest.TestCase.maxDiff = None + class TestRenderSelect(unittest.TestCase): @@ -18,11 +21,13 @@ def test_multiple_selects(self): 'ip': {'alias': 'IP'}, 'app_logs': {'alias': 'AppLogs'}}) - expected = 'SELECT status as Status, latency as Latency, ' \ - 'max_log_level as MaxLogLevel, resource as URL, user as ' \ - 'User, ip as IP, start_time as TimeStamp, version_id as ' \ - 'Version, app_logs as AppLogs' - self.assertEqual(expected, result) + expected = ('SELECT status as Status, latency as Latency, ' + 'max_log_level as MaxLogLevel, resource as URL, user as ' + 'User, ip as IP, start_time as TimeStamp, version_id as ' + 'Version, app_logs as AppLogs') + six.assertCountEqual( + self, sorted(expected[len('SELECT '):].split(', ')), + sorted(result[len('SELECT '):].split(', '))) def test_casting(self): """Ensure that render select can handle custom casting.""" @@ -202,14 +207,16 @@ def test_in_comparator(self): } ]) - self.assertEqual(result, "WHERE ((foobar IN (STRING('a'), STRING('b'))" - " AND foobar IN (STRING('c'), STRING('d')) " - "AND foobar IN (STRING('e'), STRING('f')) AND" - " foobar IN (STRING('g'))) AND (NOT foobar IN" - " (STRING('h'), STRING('i')) AND NOT foobar " - "IN (STRING('k'), STRING('j')) AND NOT foobar" - " IN (STRING('l'), STRING('m')) AND NOT " - "foobar IN (STRING('n'))))") + six.assertCountEqual(self, result[len('WHERE '):].split(' AND '), + "WHERE ((foobar IN (STRING('a'), STRING('b'))" + " AND foobar IN (STRING('c'), STRING('d')) " + "AND foobar IN (STRING('e'), STRING('f')) AND" + " foobar IN (STRING('g'))) AND (NOT foobar IN" + " (STRING('h'), STRING('i')) AND NOT foobar " + "IN (STRING('j'), STRING('k')) AND NOT foobar" + " IN (STRING('l'), STRING('m')) AND NOT " + "foobar IN (STRING('n'))))" [len('WHERE '):] + .split(' AND ')) class TestRenderOrder(unittest.TestCase): @@ -298,7 +305,14 @@ def test_full_query(self): " WHERE (start_time <= INTEGER('1371566954')) AND " "(start_time >= INTEGER('1371556954')) GROUP BY " "timestamp, status ORDER BY timestamp desc") - self.assertEqual(result, expected_query) + expected_select = (expected_query[len('SELECT '):] + .split('FROM')[0].strip().split(', ')) + expected_from = expected_query[len('SELECT '):].split('FROM')[1] + result_select = (result[len('SELECT '):].split('FROM')[0] + .strip().split(', ')) + result_from = result[len('SELECT '):].split('FROM')[1] + six.assertCountEqual(self, expected_select, result_select) + six.assertCountEqual(self, expected_from, result_from) def test_empty_conditions(self): """Ensure that render query can handle an empty list of conditions.""" @@ -319,7 +333,14 @@ def test_empty_conditions(self): "resource as url FROM " "[dataset.2013_06_appspot_1] ORDER BY " "timestamp desc") - self.assertEqual(result, expected_query) + expected_select = (expected_query[len('SELECT '):] + .split('FROM')[0].strip().split(', ')) + expected_from = expected_query[len('SELECT '):].split('FROM')[1] + result_select = (result[len('SELECT '):].split('FROM')[0] + .strip().split(', ')) + result_from = result[len('SELECT '):].split('FROM')[1] + six.assertCountEqual(self, expected_select, result_select) + six.assertCountEqual(self, expected_from, result_from) def test_incorrect_conditions(self): """Ensure that render query can handle incorrectly formatted @@ -348,7 +369,14 @@ def test_incorrect_conditions(self): "resource as url FROM " "[dataset.2013_06_appspot_1] ORDER BY " "timestamp desc") - self.assertEqual(result, expected_query) + expected_select = (expected_query[len('SELECT '):] + .split('FROM')[0].strip().split(', ')) + expected_from = expected_query[len('SELECT '):].split('FROM')[1] + result_select = (result[len('SELECT '):].split('FROM')[0] + .strip().split(', ')) + result_from = result[len('SELECT '):].split('FROM')[1] + six.assertCountEqual(self, expected_select, result_select) + six.assertCountEqual(self, expected_from, result_from) def test_multiple_condition_values(self): """Ensure that render query can handle conditions with multiple values. @@ -393,7 +421,14 @@ def test_multiple_condition_values(self): "((resource CONTAINS STRING('foo') AND resource " "CONTAINS STRING('baz')) AND (NOT resource CONTAINS " "STRING('bar'))) ORDER BY timestamp desc") - self.assertEqual(result, expected_query) + expected_select = (expected_query[len('SELECT '):] + .split('FROM')[0].strip().split(', ')) + expected_from = expected_query[len('SELECT '):].split('FROM')[1] + result_select = (result[len('SELECT '):].split('FROM')[0] + .strip().split(', ')) + result_from = result[len('SELECT '):].split('FROM')[1] + six.assertCountEqual(self, expected_select, result_select) + six.assertCountEqual(self, expected_from, result_from) def test_negated_condition_value(self): """Ensure that render query can handle conditions with negated values. @@ -420,7 +455,14 @@ def test_negated_condition_value(self): "resource as url FROM " "[dataset.2013_06_appspot_1] WHERE (NOT resource " "CONTAINS STRING('foo')) ORDER BY timestamp desc") - self.assertEqual(result, expected_query) + expected_select = (expected_query[len('SELECT '):] + .split('FROM')[0].strip().split(', ')) + expected_from = expected_query[len('SELECT '):].split('FROM')[1] + result_select = (result[len('SELECT '):].split('FROM')[0] + .strip().split(', ')) + result_from = result[len('SELECT '):].split('FROM')[1] + six.assertCountEqual(self, expected_select, result_select) + six.assertCountEqual(self, expected_from, result_from) def test_multiple_negated_condition_values(self): """Ensure that render query can handle conditions with multiple negated @@ -456,7 +498,14 @@ def test_multiple_negated_condition_values(self): "CONTAINS STRING('foo') AND NOT resource CONTAINS " "STRING('baz') AND NOT resource CONTAINS " "STRING('bar')) ORDER BY timestamp desc") - self.assertEqual(result, expected_query) + expected_select = (expected_query[len('SELECT '):] + .split('FROM')[0].strip().split(', ')) + expected_from = expected_query[len('SELECT '):].split('FROM')[1] + result_select = (result[len('SELECT '):].split('FROM')[0] + .strip().split(', ')) + result_from = result[len('SELECT '):].split('FROM')[1] + six.assertCountEqual(self, expected_select, result_select) + six.assertCountEqual(self, expected_from, result_from) def test_empty_order(self): """Ensure that render query can handle an empty formatted order.""" @@ -487,7 +536,14 @@ def test_empty_order(self): "[dataset.2013_06_appspot_1] WHERE (start_time " "<= INTEGER('1371566954')) AND (start_time >= " "INTEGER('1371556954')) ") - self.assertEqual(result, expected_query) + expected_select = (expected_query[len('SELECT '):] + .split('FROM')[0].strip().split(', ')) + expected_from = expected_query[len('SELECT '):].split('FROM')[1] + result_select = (result[len('SELECT '):].split('FROM')[0] + .strip().split(', ')) + result_from = result[len('SELECT '):].split('FROM')[1] + six.assertCountEqual(self, expected_select, result_select) + six.assertCountEqual(self, expected_from, result_from) def test_incorrect_order(self): """Ensure that render query can handle inccorectly formatted order.""" @@ -518,7 +574,14 @@ def test_incorrect_order(self): "[dataset.2013_06_appspot_1] WHERE (start_time " "<= INTEGER('1371566954')) AND (start_time >= " "INTEGER('1371556954')) ") - self.assertEqual(result, expected_query) + expected_select = (expected_query[len('SELECT '):] + .split('FROM')[0].strip().split(', ')) + expected_from = expected_query[len('SELECT '):].split('FROM')[1] + result_select = (result[len('SELECT '):].split('FROM')[0] + .strip().split(', ')) + result_from = result[len('SELECT '):].split('FROM')[1] + six.assertCountEqual(self, expected_select, result_select) + six.assertCountEqual(self, expected_from, result_from) def test_empty_select(self): """Ensure that render query corrently handles no selection.""" @@ -574,7 +637,17 @@ def test_no_alias(self): "[dataset.2013_06_appspot_1] WHERE (start_time " "<= INTEGER('1371566954')) AND (start_time >= " "INTEGER('1371556954')) ORDER BY start_time desc") - self.assertEqual(result, expected_query) + expected_select = (field.strip() for field in + expected_query[len('SELECT '):] + .split('FROM')[0].strip().split(', ')) + expected_from = (expected_query[len('SELECT '):].split('FROM')[1] + .strip()) + result_select = (field.strip() for field in + result[len('SELECT '):].split('FROM')[0] + .strip().split(', ')) + result_from = result[len('SELECT '):].split('FROM')[1].strip() + six.assertCountEqual(self, expected_select, result_select) + six.assertCountEqual(self, expected_from, result_from) def test_formatting(self): """Ensure that render query runs with formatting a select.""" @@ -609,7 +682,14 @@ def test_formatting(self): "[dataset.2013_06_appspot_1] WHERE (start_time " "<= INTEGER('1371566954')) AND (start_time >= " "INTEGER('1371556954')) ORDER BY timestamp desc") - self.assertEqual(result, expected_query) + expected_select = (expected_query[len('SELECT '):] + .split('FROM')[0].strip().split(', ')) + expected_from = expected_query[len('SELECT '):].split('FROM')[1] + result_select = (result[len('SELECT '):].split('FROM')[0] + .strip().split(', ')) + result_from = result[len('SELECT '):].split('FROM')[1] + six.assertCountEqual(self, expected_select, result_select) + six.assertCountEqual(self, expected_from, result_from) def test_formatting_duplicate_columns(self): """Ensure that render query runs with formatting a select for a @@ -655,7 +735,14 @@ def test_formatting_duplicate_columns(self): "(start_time <= INTEGER('1371566954')) AND " "(start_time >= INTEGER('1371556954')) ORDER BY " "timestamp desc") - self.assertEqual(result, expected_query) + expected_select = (expected_query[len('SELECT '):] + .split('FROM')[0].strip().split(', ')) + expected_from = expected_query[len('SELECT '):].split('FROM')[1] + result_select = (result[len('SELECT '):].split('FROM')[0] + .strip().split(', ')) + result_from = result[len('SELECT '):].split('FROM')[1] + six.assertCountEqual(self, expected_select, result_select) + six.assertCountEqual(self, expected_from, result_from) def test_sec_to_micro_formatting(self): """Ensure that render query runs sec_to_micro formatting on a @@ -692,7 +779,14 @@ def test_sec_to_micro_formatting(self): "[dataset.2013_06_appspot_1] WHERE (start_time " "<= INTEGER('1371566954')) AND (start_time >= " "INTEGER('1371556954')) ORDER BY timestamp desc") - self.assertEqual(result, expected_query) + expected_select = (expected_query[len('SELECT '):] + .split('FROM')[0].strip().split(', ')) + expected_from = expected_query[len('SELECT '):].split('FROM')[1] + result_select = (result[len('SELECT '):].split('FROM')[0] + .strip().split(', ')) + result_from = result[len('SELECT '):].split('FROM')[1] + six.assertCountEqual(self, expected_select, result_select) + six.assertCountEqual(self, expected_from, result_from) def test_no_table_or_dataset(self): """Ensure that render query returns None if there is no dataset or @@ -741,7 +835,15 @@ def test_empty_groupings(self): "resource as url FROM " "[dataset.2013_06_appspot_1] ORDER BY " "timestamp desc") - self.assertEqual(result, expected_query) + expected_select = (expected_query[len('SELECT '):] + .split('FROM')[0].strip().split(', ')) + expected_from = expected_query[len('SELECT '):].split('FROM')[1] + result_select = (result[len('SELECT '):].split('FROM')[0] + .strip().split(', ')) + result_from = result[len('SELECT '):].split('FROM')[1] + six.assertCountEqual(self, expected_select, result_select) + six.assertCountEqual(self, expected_from, result_from) + def test_multi_tables(self): """Ensure that render query arguments work with multiple tables.""" @@ -775,4 +877,11 @@ def test_multi_tables(self): "<= INTEGER('1371566954')) AND (start_time >= " "INTEGER('1371556954')) GROUP BY timestamp, status " "ORDER BY timestamp desc") - self.assertEqual(result, expected_query) + expected_select = (expected_query[len('SELECT '):] + .split('FROM')[0].strip().split(', ')) + expected_from = expected_query[len('SELECT '):].split('FROM')[1] + result_select = (result[len('SELECT '):].split('FROM')[0] + .strip().split(', ')) + result_from = result[len('SELECT '):].split('FROM')[1] + six.assertCountEqual(self, expected_select, result_select) + six.assertCountEqual(self, expected_from, result_from) diff --git a/bigquery/tests/test_schema_builder.py b/bigquery/tests/test_schema_builder.py index eef1298..060162b 100644 --- a/bigquery/tests/test_schema_builder.py +++ b/bigquery/tests/test_schema_builder.py @@ -1,7 +1,8 @@ +from six.moves.builtins import object from datetime import datetime import unittest - +import six from bigquery.schema_builder import schema_from_record from bigquery.schema_builder import describe_field from bigquery.schema_builder import bigquery_type @@ -11,48 +12,49 @@ class TestBigQueryTypes(unittest.TestCase): def test_str_is_string(self): - self.assertItemsEqual(bigquery_type("Bob"), 'string') + six.assertCountEqual(self, bigquery_type("Bob"), 'string') def test_unicode_is_string(self): - self.assertItemsEqual(bigquery_type(u"Here is a happy face \u263A"), - 'string') + six.assertCountEqual(self, bigquery_type(u"Here is a happy face \u263A"), + 'string') def test_int_is_integer(self): - self.assertItemsEqual(bigquery_type(123), 'integer') + six.assertCountEqual(self, bigquery_type(123), 'integer') def test_datetime_is_timestamp(self): - self.assertItemsEqual(bigquery_type(datetime.now()), 'timestamp') + six.assertCountEqual(self, bigquery_type(datetime.now()), 'timestamp') def test_isoformat_timestring(self): - self.assertItemsEqual(bigquery_type(datetime.now().isoformat()), - 'timestamp') + six.assertCountEqual(self, bigquery_type(datetime.now().isoformat()), + 'timestamp') def test_timestring_feb_20_1973(self): - self.assertItemsEqual(bigquery_type("February 20th 1973"), 'timestamp') + six.assertCountEqual(self, bigquery_type("February 20th 1973"), + 'timestamp') def test_timestring_thu_1_july_2004_22_30_00(self): - self.assertItemsEqual(bigquery_type("Thu, 1 July 2004 22:30:00"), - 'timestamp') + six.assertCountEqual(self, bigquery_type("Thu, 1 July 2004 22:30:00"), + 'timestamp') def test_today_is_not_timestring(self): - self.assertItemsEqual(bigquery_type("today"), 'string') + six.assertCountEqual(self, bigquery_type("today"), 'string') def test_timestring_next_thursday(self): - self.assertItemsEqual(bigquery_type("February 20th 1973"), 'timestamp') + six.assertCountEqual(self, bigquery_type("February 20th 1973"), 'timestamp') def test_timestring_arbitrary_fn_success(self): - self.assertItemsEqual( - bigquery_type("whatever", timestamp_parser=lambda x: True), + six.assertCountEqual( + self, bigquery_type("whatever", timestamp_parser=lambda x: True), 'timestamp') def test_timestring_arbitrary_fn_fail(self): - self.assertItemsEqual( - bigquery_type("February 20th 1973", - timestamp_parser=lambda x: False), + six.assertCountEqual( + self, bigquery_type("February 20th 1973", + timestamp_parser=lambda x: False), 'string') def test_class_instance_is_invalid_type(self): - class SomeClass: + class SomeClass(object): pass self.assertIsNone(bigquery_type(SomeClass())) @@ -61,15 +63,15 @@ def test_list_is_invalid_type(self): self.assertIsNone(bigquery_type([1, 2, 3])) def test_dict_is_record(self): - self.assertItemsEqual(bigquery_type({"a": 1}), 'record') + six.assertCountEqual(self, bigquery_type({"a": 1}), 'record') class TestFieldDescription(unittest.TestCase): def test_simple_string_field(self): - self.assertItemsEqual(describe_field("user", "Bob"), - {"name": "user", "type": "string", "mode": - "nullable"}) + six.assertCountEqual(self, describe_field("user", "Bob"), + {"name": "user", "type": "string", "mode": + "nullable"}) class TestSchemaGenerator(unittest.TestCase): @@ -79,7 +81,7 @@ def test_simple_record(self): schema = [{"name": "username", "type": "string", "mode": "nullable"}, {"name": "id", "type": "integer", "mode": "nullable"}] - self.assertItemsEqual(schema_from_record(record), schema) + six.assertCountEqual(self, schema_from_record(record), schema) def test_hierarchical_record(self): record = {"user": {"username": "Bob", "id": 123}} @@ -87,8 +89,11 @@ def test_hierarchical_record(self): "fields": [{"name": "username", "type": "string", "mode": "nullable"}, {"name": "id", "type": "integer", "mode": "nullable"}]}] - - self.assertItemsEqual(schema_from_record(record), schema) + generated_schema = schema_from_record(record) + schema_fields = schema[0].pop('fields') + generated_fields = generated_schema[0].pop('fields') + six.assertCountEqual(self, schema_fields, generated_fields) + six.assertCountEqual(self, generated_schema, schema) def test_hierarchical_record_with_timestamps(self): record = {"global": "2001-01-01", "user": {"local": "2001-01-01"}} @@ -109,19 +114,17 @@ def test_hierarchical_record_with_timestamps(self): "type": "string", "mode": "nullable"}]}] - self.assertItemsEqual( - schema_from_record(record), - schema_with_ts) + six.assertCountEqual(self, schema_from_record(record), schema_with_ts) - self.assertItemsEqual( - schema_from_record(record, timestamp_parser=lambda x: False), + six.assertCountEqual( + self, schema_from_record(record, timestamp_parser=lambda x: False), schema_without_ts) def test_repeated_field(self): record = {"ids": [1, 2, 3, 4, 5]} schema = [{"name": "ids", "type": "integer", "mode": "repeated"}] - self.assertItemsEqual(schema_from_record(record), schema) + six.assertCountEqual(self, schema_from_record(record), schema) def test_nested_invalid_type_reported_correctly(self): key = "wrong answer" @@ -129,7 +132,7 @@ def test_nested_invalid_type_reported_correctly(self): try: schema_from_record({"a": {"b": [{"c": None}]}}) - except InvalidTypeException, e: + except InvalidTypeException as e: key = e.key value = e.value diff --git a/setup.py b/setup.py index 6a91a69..2ab7020 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ from setuptools import find_packages from setuptools import setup -VERSION = '1.2.0' +VERSION = '1.3.0' setup_args = dict( name='BigQuery-Python', @@ -20,10 +20,10 @@ 'Environment :: Web Environment', 'Intended Audience :: Developers', 'Operating System :: OS Independent', - 'Programming Language :: Python', + 'Programming Language :: Python2', + 'Programming Language :: Python3', ], ) if __name__ == '__main__': setup(**setup_args) - From 52cd52f1875270add25a137439cb9b78a83bd34d Mon Sep 17 00:00:00 2001 From: Tobias Macey Date: Thu, 9 Jul 2015 08:45:52 -0400 Subject: [PATCH 020/146] Added tox and `wait_for_job` patch Added support for tox to execute tests against multiple python version Added patch to support `job_id` in `wait_for_job` --- .travis.yml | 20 ++++++++++++-------- bigquery/client.py | 20 ++++++++++---------- bigquery/tests/test_client.py | 30 +++++++++++++++++++++++++----- requirements_dev.txt | 2 ++ tox.ini | 12 ++++++++++++ 5 files changed, 61 insertions(+), 23 deletions(-) create mode 100644 tox.ini diff --git a/.travis.yml b/.travis.yml index eb0ba39..d5ee93d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,12 +1,16 @@ language: python -python: - - "2.7" - install: - - pip install -r requirements.txt - - pip install -r requirements_dev.txt - -script: make test - + - pip install tox +script: tox -e $TOXENV notifications: email: false +env: + - TOXENV=py26 + - TOXENV=py27 + - TOXENV=py32 + - TOXENV=py33 + - TOXENV=py34 + - TOXENV=py35 + - TOXENV=pypy + - TOXENV=pypy3 + - TOXENV=jython \ No newline at end of file diff --git a/bigquery/client.py b/bigquery/client.py index 9450a10..cbada94 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -1,21 +1,19 @@ import calendar +import json +import logging from collections import defaultdict from datetime import datetime, timedelta -from time import sleep -from time import time from hashlib import sha256 -import json -import logging +from time import sleep, time +import httplib2 +import six from apiclient.discovery import build from apiclient.errors import HttpError -import httplib2 +from bigquery.errors import (BigQueryTimeoutException, JobExecutingException, + JobInsertException, UnfinishedQueryException) from bigquery.schema_builder import schema_from_record -from bigquery.errors import ( - JobExecutingException, JobInsertException, - UnfinishedQueryException, BigQueryTimeoutException -) BIGQUERY_SCOPE = 'https://www.googleapis.com/auth/bigquery' BIGQUERY_SCOPE_READ_ONLY = 'https://www.googleapis.com/auth/bigquery.readonly' @@ -837,6 +835,7 @@ def wait_for_job(self, job, interval=5, timeout=60): Waits until the job indicated by job_resource is done or has failed Args: job: dict, representing a BigQuery job resource + or str, representing a BigQuery job id interval: optional float polling interval in seconds, default = 5 timeout: optional float timeout in seconds, default = 60 Returns: @@ -848,7 +847,8 @@ def wait_for_job(self, job, interval=5, timeout=60): BigQueryTimeoutException on timeout """ complete = False - job_id = job['jobReference']['jobId'] + job_id = (job if isinstance(job, (six.binary_type, six.text_type)) + else job['jobReference']['jobId']) job_resource = None start_time = time() diff --git a/bigquery/tests/test_client.py b/bigquery/tests/test_client.py index 67c6a55..9712f17 100644 --- a/bigquery/tests/test_client.py +++ b/bigquery/tests/test_client.py @@ -588,6 +588,26 @@ def test_wait_job_error_result(self): interval=.01, timeout=.01) + def test_accepts_job_id(self): + """Ensure it accepts a job Id rather than a full job resource""" + + return_values = [{'status': {'state': u'RUNNING'}, + 'jobReference': {'jobId': "testJob"}}, + {'status': {'state': u'DONE'}, + 'jobReference': {'jobId': "testJob"}}] + + def side_effect(*args, **kwargs): + return return_values.pop(0) + + self.api_mock.jobs().get().execute.side_effect = side_effect + + job_resource = self.client.wait_for_job("testJob", + interval=.01, + timeout=600) + + self.assertEqual(self.api_mock.jobs().get().execute.call_count, 2) + self.assertIsInstance(job_resource, dict) + class TestImportDataFromURIs(unittest.TestCase): @@ -859,8 +879,8 @@ def test_export(self, mock_generate_hex): body = { "jobReference": { "projectId": self.project_id, - "jobId": "%s-%s-destinationuri" % - (self.dataset_id, self.table_id) + "jobId": "%s-%s-destinationuri" % (self.dataset_id, + self.table_id) }, "configuration": { "extract": { @@ -2308,9 +2328,9 @@ def test_dataset_update_failed(self): HttpError(HttpResponse(404), 'There was an error'.encode('utf8')) actual = self.client.update_dataset(self.dataset, - friendly_name=self.friendly_name, - description=self.description, - access=self.access) + friendly_name=self.friendly_name, + description=self.description, + access=self.access) self.assertFalse(actual) self.client.swallow_results = False diff --git a/requirements_dev.txt b/requirements_dev.txt index a36ba42..a1292b0 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -3,3 +3,5 @@ rednose mock coverage nose-exclude +tox +-r requirements.txt diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000..3a3c16f --- /dev/null +++ b/tox.ini @@ -0,0 +1,12 @@ +# Tox (http://tox.testrun.org/) is a tool for running tests +# in multiple virtualenvs. This configuration file will run the +# test suite on all supported python versions. To use it, "pip install tox" +# and then run "tox" from this directory. + +[tox] +envlist = py27, py33, py34, nightly, pypy + +[testenv] +commands = nosetests +deps = -rrequirements_dev.txt +skip_missing_interpreters = True \ No newline at end of file From 00cafe917526dec02323fcd958e749187c5b5f2e Mon Sep 17 00:00:00 2001 From: Tobias Macey Date: Thu, 9 Jul 2015 09:29:41 -0400 Subject: [PATCH 021/146] Added support for numeric job ids in `wait_for_job` --- .travis.yml | 6 +----- bigquery/client.py | 5 +++-- bigquery/tests/test_client.py | 18 ++++++++++++++++++ 3 files changed, 22 insertions(+), 7 deletions(-) diff --git a/.travis.yml b/.travis.yml index d5ee93d..f18482d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,12 +5,8 @@ script: tox -e $TOXENV notifications: email: false env: - - TOXENV=py26 - TOXENV=py27 - - TOXENV=py32 - TOXENV=py33 - TOXENV=py34 - - TOXENV=py35 + - TOXENV=nightly - TOXENV=pypy - - TOXENV=pypy3 - - TOXENV=jython \ No newline at end of file diff --git a/bigquery/client.py b/bigquery/client.py index cbada94..0b04ff9 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -847,8 +847,9 @@ def wait_for_job(self, job, interval=5, timeout=60): BigQueryTimeoutException on timeout """ complete = False - job_id = (job if isinstance(job, (six.binary_type, six.text_type)) - else job['jobReference']['jobId']) + job_id = str(job if isinstance(job, + (six.binary_type, six.text_type, int)) + else job['jobReference']['jobId']) job_resource = None start_time = time() diff --git a/bigquery/tests/test_client.py b/bigquery/tests/test_client.py index 9712f17..12ed294 100644 --- a/bigquery/tests/test_client.py +++ b/bigquery/tests/test_client.py @@ -602,6 +602,24 @@ def side_effect(*args, **kwargs): self.api_mock.jobs().get().execute.side_effect = side_effect job_resource = self.client.wait_for_job("testJob", + interval=.01, + timeout=5) + + self.assertEqual(self.api_mock.jobs().get().execute.call_count, 2) + self.assertIsInstance(job_resource, dict) + + def test_accepts_integer_job_id(self): + return_values = [{'status': {'state': u'RUNNING'}, + 'jobReference': {'jobId': "testJob"}}, + {'status': {'state': u'DONE'}, + 'jobReference': {'jobId': "testJob"}}] + + def side_effect(*args, **kwargs): + return return_values.pop(0) + + self.api_mock.jobs().get().execute.side_effect = side_effect + + job_resource = self.client.wait_for_job(1234567, interval=.01, timeout=600) From 919ae6d82e9e94af6547eacf63636fbf8673e61d Mon Sep 17 00:00:00 2001 From: Tyler Treat Date: Thu, 9 Jul 2015 17:56:49 -0500 Subject: [PATCH 022/146] Have tox run with coverage --- tox.ini | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tox.ini b/tox.ini index 3a3c16f..ce76190 100644 --- a/tox.ini +++ b/tox.ini @@ -7,6 +7,6 @@ envlist = py27, py33, py34, nightly, pypy [testenv] -commands = nosetests +commands = nosetests --logging-level=ERROR -a slow --with-coverage --cover-package=bigquery deps = -rrequirements_dev.txt -skip_missing_interpreters = True \ No newline at end of file +skip_missing_interpreters = True From 508b3ed183a89ac307ade8cc1ca34f29e86ea6d1 Mon Sep 17 00:00:00 2001 From: Tyler Treat Date: Thu, 9 Jul 2015 18:03:08 -0500 Subject: [PATCH 023/146] Fix setup.py --- setup.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 2ab7020..d29d2b9 100644 --- a/setup.py +++ b/setup.py @@ -20,8 +20,7 @@ 'Environment :: Web Environment', 'Intended Audience :: Developers', 'Operating System :: OS Independent', - 'Programming Language :: Python2', - 'Programming Language :: Python3', + 'Programming Language :: Python', ], ) From 451528975fc1775664f3c01c18623fb2fa054b7a Mon Sep 17 00:00:00 2001 From: Takashi Nishibayashi Date: Tue, 21 Jul 2015 15:12:50 +0900 Subject: [PATCH 024/146] Add expiration_time option for create_table --- bigquery/client.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/bigquery/client.py b/bigquery/client.py index 0b04ff9..4ccbbac 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -384,13 +384,14 @@ def get_table(self, dataset, table): return table - def create_table(self, dataset, table, schema): + def create_table(self, dataset, table, schema, expiration_time=None): """Create a new table in the dataset. Args: dataset: the dataset to create the table in. table: the name of table to create. schema: table schema dict. + expiration_time: the expiry time in milliseconds since the epoch. Returns: bool indicating if the table was successfully created or not, @@ -406,6 +407,9 @@ def create_table(self, dataset, table, schema): } } + if expiration_time is not None: + body['expirationTime'] = expiration_time + try: table = self.bigquery.tables().insert( projectId=self.project_id, From 958a54d3168de36498e42f08296d8e9689d72a3e Mon Sep 17 00:00:00 2001 From: Takashi Nishibayashi Date: Tue, 21 Jul 2015 16:21:37 +0900 Subject: [PATCH 025/146] Add test for expiration_time option --- bigquery/tests/test_client.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/bigquery/tests/test_client.py b/bigquery/tests/test_client.py index 12ed294..79d4001 100644 --- a/bigquery/tests/test_client.py +++ b/bigquery/tests/test_client.py @@ -1482,6 +1482,7 @@ def setUp(self): 'tableId': self.table, 'projectId': self.project, 'datasetId': self.dataset} } + self.expiration_time = 1437513693000 def test_table_create_failed(self): """Ensure that if creating the table fails, False is returned, @@ -1535,6 +1536,26 @@ def test_table_create_success(self): self.mock_tables.insert.return_value.execute.assert_called_with() + def test_table_create_body_with_expiration_time(self): + """Ensure that if expiration_time has specified, + it passed to the body.""" + + self.mock_tables.insert.return_value.execute.side_effect = [{ + 'status': 'foo'}, {'status': 'bar'}] + + actual = self.client.create_table(self.dataset, self.table, + self.schema, self.expiration_time) + + body = self.body.copy() + body.update({ + 'expirationTime': self.expiration_time + }) + + self.mock_tables.insert.assert_called_with( + projectId=self.project, datasetId=self.dataset, body=body) + + self.mock_tables.insert.return_value.execute.assert_called_with() + class TestCreateView(unittest.TestCase): From 6f43831c5d8759625332b17fb92b9c4b9d8ed9bb Mon Sep 17 00:00:00 2001 From: Tyler Treat Date: Tue, 21 Jul 2015 09:18:59 -0500 Subject: [PATCH 026/146] Fix unit tests --- bigquery/tests/test_client.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/bigquery/tests/test_client.py b/bigquery/tests/test_client.py index 79d4001..93af414 100644 --- a/bigquery/tests/test_client.py +++ b/bigquery/tests/test_client.py @@ -65,7 +65,7 @@ def test_initialize_readonly(self, mock_build, mock_return_cred): mock_return_cred.assert_called_once_with() mock_cred.assert_called_once_with(service_account, key, scope=BIGQUERY_SCOPE_READ_ONLY) - mock_cred.authorize.assert_called_once() + self.assertTrue(mock_cred.return_value.authorize.called) mock_build.assert_called_once_with('bigquery', 'v2', http=mock_http) self.assertEquals(mock_bq, bq_client.bigquery) self.assertEquals(project_id, bq_client.project_id) @@ -95,7 +95,7 @@ def test_initialize_read_write(self, mock_build, mock_return_cred): mock_return_cred.assert_called_once_with() mock_cred.assert_called_once_with(service_account, key, scope=BIGQUERY_SCOPE) - mock_cred.authorize.assert_called_once() + self.assertTrue(mock_cred.return_value.authorize.called) mock_build.assert_called_once_with('bigquery', 'v2', http=mock_http) self.assertEquals(mock_bq, bq_client.bigquery) self.assertEquals(project_id, bq_client.project_id) @@ -130,7 +130,7 @@ def test_initialize_key_file(self, mock_open, mock_build, mock_return_cred.assert_called_once_with() mock_cred.assert_called_once_with(service_account, key, scope=BIGQUERY_SCOPE) - mock_cred.authorize.assert_called_once() + self.assertTrue(mock_cred.return_value.authorize.called) mock_build.assert_called_once_with('bigquery', 'v2', http=mock_http) self.assertEquals(mock_bq, bq_client.bigquery) self.assertEquals(project_id, bq_client.project_id) @@ -378,7 +378,7 @@ def test_get_response(self): projectId=self.project_id, jobId=job_id, startIndex=offset, maxResults=limit, pageToken=page_token, timeoutMs=1000) - mock_query_job.execute.assert_called_once() + mock_query_job.execute.assert_called_once_with() self.assertEquals(actual, mock_query_reply) From f615350e7e9161a7c2582cec086b987e5d7b4e12 Mon Sep 17 00:00:00 2001 From: Takashi Nishibayashi Date: Wed, 22 Jul 2015 10:38:28 +0900 Subject: [PATCH 027/146] Fix pep8 issue --- bigquery/tests/test_client.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bigquery/tests/test_client.py b/bigquery/tests/test_client.py index 93af414..9169cb8 100644 --- a/bigquery/tests/test_client.py +++ b/bigquery/tests/test_client.py @@ -1543,8 +1543,8 @@ def test_table_create_body_with_expiration_time(self): self.mock_tables.insert.return_value.execute.side_effect = [{ 'status': 'foo'}, {'status': 'bar'}] - actual = self.client.create_table(self.dataset, self.table, - self.schema, self.expiration_time) + self.client.create_table(self.dataset, self.table, + self.schema, self.expiration_time) body = self.body.copy() body.update({ From 9d7b0cb789ce075436bb0196387bd87220f6fe11 Mon Sep 17 00:00:00 2001 From: Tyler Treat Date: Sat, 1 Aug 2015 13:05:41 -0500 Subject: [PATCH 028/146] Pin mock to 1.0.1 and bump setup.py version --- requirements_dev.txt | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements_dev.txt b/requirements_dev.txt index a1292b0..ca3fffb 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -1,6 +1,6 @@ nose rednose -mock +mock==1.0.1 coverage nose-exclude tox diff --git a/setup.py b/setup.py index d29d2b9..903c368 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ from setuptools import find_packages from setuptools import setup -VERSION = '1.3.0' +VERSION = '1.4.0' setup_args = dict( name='BigQuery-Python', From d4c63b3d6a9135317cc983e3c39db41781bbd82b Mon Sep 17 00:00:00 2001 From: Tyler Treat Date: Sun, 2 Aug 2015 16:03:40 -0500 Subject: [PATCH 029/146] Bump version to 1.4.1 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 903c368..a396207 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ from setuptools import find_packages from setuptools import setup -VERSION = '1.4.0' +VERSION = '1.4.1' setup_args = dict( name='BigQuery-Python', From 2ca1ab433671202db97fa7e69acfe9a27bd73a76 Mon Sep 17 00:00:00 2001 From: scribu Date: Wed, 26 Aug 2015 01:59:41 +0300 Subject: [PATCH 030/146] remove unused pyopenssl dependency --- setup.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index a396207..9878d9d 100644 --- a/setup.py +++ b/setup.py @@ -11,8 +11,11 @@ license='Apache', packages=find_packages(), include_package_data=True, - install_requires=['google-api-python-client', 'pyopenssl', 'httplib2', - 'python-dateutil'], + install_requires=[ + 'google-api-python-client', + 'httplib2', + 'python-dateutil' + ], author='Tyler Treat', author_email='ttreat31@gmail.com', classifiers=[ From b391d403608f3c558bccaef22341fca581380b52 Mon Sep 17 00:00:00 2001 From: scribu Date: Wed, 26 Aug 2015 02:07:03 +0300 Subject: [PATCH 031/146] remove duplicated requirements.txt --- .travis.yml | 1 + requirements.txt | 4 ---- requirements_dev.txt | 1 - 3 files changed, 1 insertion(+), 5 deletions(-) delete mode 100644 requirements.txt diff --git a/.travis.yml b/.travis.yml index f18482d..9f422c6 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,5 +1,6 @@ language: python install: + - python setup.py develop - pip install tox script: tox -e $TOXENV notifications: diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 00c05ef..0000000 --- a/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -google-api-python-client -httplib2 -pyopenssl -python-dateutil \ No newline at end of file diff --git a/requirements_dev.txt b/requirements_dev.txt index ca3fffb..74162c3 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -4,4 +4,3 @@ mock==1.0.1 coverage nose-exclude tox --r requirements.txt From e30bf51a8eeb88784764039da55be68c74f563aa Mon Sep 17 00:00:00 2001 From: Sarang Shravagi Date: Fri, 4 Sep 2015 11:28:22 +0530 Subject: [PATCH 032/146] Update Table and Patch Table support has been added. --- bigquery/client.py | 99 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 99 insertions(+) diff --git a/bigquery/client.py b/bigquery/client.py index 4ccbbac..9f46a44 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -384,6 +384,105 @@ def get_table(self, dataset, table): return table + + def update_table(self, dataset, table, schema, expiration_time=None): + """Updates information in an existing table. The update method + replaces the entire table resource, whereas the patch method only + replaces fields that are provided in the submitted table resource. + + Args: + dataset: the dataset to update the table in. + table: the name of table to update. + schema: table schema dict. Schema Should have older as well as new fields. + expiration_time: the expiry time in milliseconds since the epoch. + + Returns: + bool indicating if the table was successfully updated or not, + or response from BigQuery if swallow_results is set for False. + """ + + body = { + 'schema': {'fields': schema}, + 'tableReference': { + 'tableId': table, + 'projectId': self.project_id, + 'datasetId': dataset + } + } + + if expiration_time is not None: + body['expirationTime'] = expiration_time + + try: + table = self.bigquery.tables().update( + projectId=self.project_id, + tableId=table, + datasetId=dataset, + body=body + ).execute() + if self.swallow_results: + return True + else: + return table + + except HttpError as e: + logging.error(('Cannot update table {0}.{1}\n' + 'Http Error: {2}').format(dataset, table, + e.content)) + if self.swallow_results: + return False + else: + return {} + + def patch_table(self, dataset, table, schema, expiration_time=None): + """Updates information in an existing dataset. The update method + replaces the entire dataset resource, whereas the patch method only + replaces fields that are provided in the submitted dataset resource. + + Args: + dataset: the dataset to patch the table in. + table: the name of table to patch. + schema: table schema dict. Schema Should have older as well as new fields. + expiration_time: the expiry time in milliseconds since the epoch. + + Returns: + bool indicating if the table was successfully updated or not, + or response from BigQuery if swallow_results is set for False. + """ + + body = { + 'schema': {'fields': schema}, + 'tableReference': { + 'tableId': table, + 'projectId': self.project_id, + 'datasetId': dataset + } + } + + if expiration_time is not None: + body['expirationTime'] = expiration_time + + try: + table = self.bigquery.tables().patch( + projectId=self.project_id, + tableId=table, + datasetId=dataset, + body=body + ).execute() + if self.swallow_results: + return True + else: + return table + + except HttpError as e: + logging.error(('Cannot patch table {0}.{1}\n' + 'Http Error: {2}').format(dataset, table, + e.content)) + if self.swallow_results: + return False + else: + return {} + def create_table(self, dataset, table, schema, expiration_time=None): """Create a new table in the dataset. From 5b7eabc61c3ddd4e64d978af9212f4bfd093c11d Mon Sep 17 00:00:00 2001 From: sagarrakshe Date: Wed, 14 Oct 2015 14:17:33 +0530 Subject: [PATCH 033/146] Add more syntax support for SQL query generation 1. Add basic 'HAVING' clause (before the 'ORDER BY' clause). 2. Select source tables in 'DATE_RANGE' format. 3. Add 'BETWEEN' clause in conditions. 4. Modify 'ORDER BY' to accept multiple parameters. --- bigquery/query_builder.py | 52 ++++++++++++++++++++++++++++++++------- 1 file changed, 43 insertions(+), 9 deletions(-) diff --git a/bigquery/query_builder.py b/bigquery/query_builder.py index 1cfa72a..4550691 100644 --- a/bigquery/query_builder.py +++ b/bigquery/query_builder.py @@ -2,7 +2,7 @@ def render_query(dataset, tables, select=None, conditions=None, - groupings=None, order_by=None): + groupings=None, having=None, order_by=None): """Render a query that will run over the given tables using the specified parameters. @@ -46,12 +46,13 @@ def render_query(dataset, tables, select=None, conditions=None, if None in (dataset, tables): return None - query = "%s %s %s %s %s" % ( + query = "%s %s %s %s %s %s" % ( _render_select(select), _render_sources(dataset, tables), _render_conditions(conditions), _render_groupings(groupings), - _render_order(order_by), + _render_having(having), + _render_order(order_by) ) return query @@ -85,7 +86,7 @@ def _render_select(selections): for options_dict in options: name = original_name alias = options_dict.get('alias') - alias = "as %s" % alias if alias else "" + alias = "AS %s" % alias if alias else "" formatter = options_dict.get('format') if formatter: @@ -133,8 +134,20 @@ def _render_sources(dataset, tables): a string that represents the from part of a query. """ - return "FROM " + ", ".join( - ["[%s.%s]" % (dataset, table) for table in tables]) + if isinstance(tables, dict): + if tables['date_range']: + try: + dataset_table = '.'.join([dataset, tables['table']]) + return "FROM (TABLE_DATE_RANGE([{}], TIMESTAMP('{}'),"\ + " TIMESTAMP('{}'))) ".format(dataset_table, + tables['from_date'], + tables['to_date']) + except KeyError as exp: + raise Exception('Missing parameter %s' % (exp)) + + else: + return "FROM " + ", ".join( + ["[%s.%s]" % (dataset, table) for table in tables]) def _render_conditions(conditions): @@ -206,6 +219,12 @@ def _render_condition(field, field_type, comparators): else: value = _render_condition_value(value, field_type) value = "(" + value + ")" + elif condition == "BETWEEN": + if isinstance(value, (tuple, list)): + value = ' AND '.join( + sorted([_render_condition_value(v, field_type) + for v in value]) + ) else: value = _render_condition_value(value, field_type) @@ -242,25 +261,40 @@ def _render_condition_value(value, field_type): value = 1 if value else 0 elif field_type in ("STRING", "INTEGER", "FLOAT"): value = "'%s'" % (value) + elif field_type in ("TIMESTAMP"): + value = "'%s'" % (str(value)) return "%s(%s)" % (field_type, value) +def _render_having(having): + """Render the having part of a query. + + Args: + having: accepts the having query as it is. + + Returns: + a string that represents the having part of a query. + """ + + return "HAVING %s" % (having) if having else "" + + def _render_order(order): """Render the order by part of a query. Args: order: a dictionary with two keys, field and direction. Such that the dictionary should be formatted as - {'field':'TimeStamp, 'direction':'desc'}. + {'fields': ['TimeStamp'], 'direction':'desc'}. Returns: a string that represents the order by part of a query. """ - if not order or 'field' not in order or 'direction' not in order: + if not order or 'fields' not in order or 'direction' not in order: return '' - return "ORDER BY %s %s" % (order['field'], order['direction']) + return "ORDER BY %s %s" % (", ".join(order['fields']), order['direction']) def _render_groupings(fields): From 87d053cb06b200219e8afb7ade79895fe0009e78 Mon Sep 17 00:00:00 2001 From: sagarrakshe Date: Wed, 14 Oct 2015 17:53:39 +0530 Subject: [PATCH 034/146] Make the SELECT alias 'AS' to lower case --- bigquery/query_builder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigquery/query_builder.py b/bigquery/query_builder.py index 4550691..78f69d1 100644 --- a/bigquery/query_builder.py +++ b/bigquery/query_builder.py @@ -86,7 +86,7 @@ def _render_select(selections): for options_dict in options: name = original_name alias = options_dict.get('alias') - alias = "AS %s" % alias if alias else "" + alias = "as %s" % alias if alias else "" formatter = options_dict.get('format') if formatter: From 9b6493e7e984725289062444ef3d91843eee404c Mon Sep 17 00:00:00 2001 From: sagarrakshe Date: Wed, 14 Oct 2015 18:02:44 +0530 Subject: [PATCH 035/146] Added tags to the .gitignore --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 6f21cde..48f2589 100644 --- a/.gitignore +++ b/.gitignore @@ -35,3 +35,6 @@ nosetests.xml .mr.developer.cfg .project .pydevproject + +# Tags +tags From b3ec17fae678c998d094f2a37fe7357d8eff4684 Mon Sep 17 00:00:00 2001 From: sagarrakshe Date: Wed, 14 Oct 2015 18:04:56 +0530 Subject: [PATCH 036/146] Modify tests Test cases modified as the ORDER_BY syntax has changed and HAVING clause also has been added. --- bigquery/tests/test_query_builder.py | 62 ++++++++++++++-------------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/bigquery/tests/test_query_builder.py b/bigquery/tests/test_query_builder.py index 8591c6b..7bc22c2 100644 --- a/bigquery/tests/test_query_builder.py +++ b/bigquery/tests/test_query_builder.py @@ -225,7 +225,7 @@ def test_order(self): """Ensure that render order can work under expected conditions.""" from bigquery.query_builder import _render_order - result = _render_order({'field': 'foo', 'direction': 'desc'}) + result = _render_order({'fields': ['foo'], 'direction': 'desc'}) self.assertEqual(result, "ORDER BY foo desc") @@ -298,13 +298,13 @@ def test_full_query(self): } ], groupings=['timestamp', 'status'], - order_by={'field': 'timestamp', 'direction': 'desc'}) + order_by={'fields': ['timestamp'], 'direction': 'desc'}) expected_query = ("SELECT status as status, start_time as timestamp, " "resource as url FROM [dataset.2013_06_appspot_1]" " WHERE (start_time <= INTEGER('1371566954')) AND " "(start_time >= INTEGER('1371556954')) GROUP BY " - "timestamp, status ORDER BY timestamp desc") + "timestamp, status ORDER BY timestamp desc") expected_select = (expected_query[len('SELECT '):] .split('FROM')[0].strip().split(', ')) expected_from = expected_query[len('SELECT '):].split('FROM')[1] @@ -327,17 +327,18 @@ def test_empty_conditions(self): 'resource': {'alias': 'url'} }, conditions=[], - order_by={'field': 'timestamp', 'direction': 'desc'}) + order_by={'fields': ['timestamp'], 'direction': 'desc'}) expected_query = ("SELECT status as status, start_time as timestamp, " "resource as url FROM " - "[dataset.2013_06_appspot_1] ORDER BY " + "[dataset.2013_06_appspot_1] ORDER BY " "timestamp desc") expected_select = (expected_query[len('SELECT '):] .split('FROM')[0].strip().split(', ')) expected_from = expected_query[len('SELECT '):].split('FROM')[1] result_select = (result[len('SELECT '):].split('FROM')[0] .strip().split(', ')) + result_from = result[len('SELECT '):].split('FROM')[1] six.assertCountEqual(self, expected_select, result_select) six.assertCountEqual(self, expected_from, result_from) @@ -363,11 +364,11 @@ def test_incorrect_conditions(self): 'negate': False}, 'compoorattor': '>=', 'type': 'INTEGER'} ], - order_by={'field': 'timestamp', 'direction': 'desc'}) + order_by={'fields': ['timestamp'], 'direction': 'desc'}) expected_query = ("SELECT status as status, start_time as timestamp, " "resource as url FROM " - "[dataset.2013_06_appspot_1] ORDER BY " + "[dataset.2013_06_appspot_1] ORDER BY " "timestamp desc") expected_select = (expected_query[len('SELECT '):] .split('FROM')[0].strip().split(', ')) @@ -411,7 +412,7 @@ def test_multiple_condition_values(self): 'negate': False}], 'type': 'STRING'} ], - order_by={'field': 'timestamp', 'direction': 'desc'}) + order_by={'fields': ['timestamp'], 'direction': 'desc'}) expected_query = ("SELECT status as status, start_time as timestamp, " "resource as url FROM " @@ -420,7 +421,7 @@ def test_multiple_condition_values(self): "INTEGER('1371556954')) AND " "((resource CONTAINS STRING('foo') AND resource " "CONTAINS STRING('baz')) AND (NOT resource CONTAINS " - "STRING('bar'))) ORDER BY timestamp desc") + "STRING('bar'))) ORDER BY timestamp desc") expected_select = (expected_query[len('SELECT '):] .split('FROM')[0].strip().split(', ')) expected_from = expected_query[len('SELECT '):].split('FROM')[1] @@ -449,12 +450,12 @@ def test_negated_condition_value(self): 'negate': True}], 'type': 'STRING'} ], - order_by={'field': 'timestamp', 'direction': 'desc'}) + order_by={'fields': ['timestamp'], 'direction': 'desc'}) expected_query = ("SELECT status as status, start_time as timestamp, " "resource as url FROM " "[dataset.2013_06_appspot_1] WHERE (NOT resource " - "CONTAINS STRING('foo')) ORDER BY timestamp desc") + "CONTAINS STRING('foo')) ORDER BY timestamp desc") expected_select = (expected_query[len('SELECT '):] .split('FROM')[0].strip().split(', ')) expected_from = expected_query[len('SELECT '):].split('FROM')[1] @@ -490,14 +491,14 @@ def test_multiple_negated_condition_values(self): 'negate': True}], 'type': 'STRING'} ], - order_by={'field': 'timestamp', 'direction': 'desc'}) + order_by={'fields': ['timestamp'], 'direction': 'desc'}) expected_query = ("SELECT status as status, start_time as timestamp, " "resource as url FROM " "[dataset.2013_06_appspot_1] WHERE (NOT resource " "CONTAINS STRING('foo') AND NOT resource CONTAINS " "STRING('baz') AND NOT resource CONTAINS " - "STRING('bar')) ORDER BY timestamp desc") + "STRING('bar')) ORDER BY timestamp desc") expected_select = (expected_query[len('SELECT '):] .split('FROM')[0].strip().split(', ')) expected_from = expected_query[len('SELECT '):].split('FROM')[1] @@ -535,7 +536,7 @@ def test_empty_order(self): "resource as url FROM " "[dataset.2013_06_appspot_1] WHERE (start_time " "<= INTEGER('1371566954')) AND (start_time >= " - "INTEGER('1371556954')) ") + "INTEGER('1371556954')) ") expected_select = (expected_query[len('SELECT '):] .split('FROM')[0].strip().split(', ')) expected_from = expected_query[len('SELECT '):].split('FROM')[1] @@ -573,7 +574,7 @@ def test_incorrect_order(self): "resource as url FROM " "[dataset.2013_06_appspot_1] WHERE (start_time " "<= INTEGER('1371566954')) AND (start_time >= " - "INTEGER('1371556954')) ") + "INTEGER('1371556954')) ") expected_select = (expected_query[len('SELECT '):] .split('FROM')[0].strip().split(', ')) expected_from = expected_query[len('SELECT '):].split('FROM')[1] @@ -601,11 +602,11 @@ def test_empty_select(self): 'negate': False}], 'type': 'INTEGER'}, ], - order_by={'field': 'timestamp', 'direction': 'desc'}) + order_by={'fields': ['timestamp'], 'direction': 'desc'}) expected_query = ("SELECT * FROM [dataset.2013_06_appspot_1] " "WHERE (start_time <= INTEGER('1371566954')) AND " - "(start_time >= INTEGER('1371556954')) ORDER BY " + "(start_time >= INTEGER('1371556954')) ORDER BY " "timestamp desc") self.assertEqual(result, expected_query) @@ -631,12 +632,12 @@ def test_no_alias(self): 'negate': False}], 'type': 'INTEGER'} ], - order_by={'field': 'start_time', 'direction': 'desc'}) + order_by={'fields': ['start_time'], 'direction': 'desc'}) expected_query = ("SELECT status , start_time , resource FROM " "[dataset.2013_06_appspot_1] WHERE (start_time " "<= INTEGER('1371566954')) AND (start_time >= " - "INTEGER('1371556954')) ORDER BY start_time desc") + "INTEGER('1371556954')) ORDER BY start_time desc") expected_select = (field.strip() for field in expected_query[len('SELECT '):] .split('FROM')[0].strip().split(', ')) @@ -674,14 +675,14 @@ def test_formatting(self): 'negate': False}], 'type': 'INTEGER'}, ], - order_by={'field': 'timestamp', 'direction': 'desc'}) + order_by={'fields': ['timestamp'], 'direction': 'desc'}) expected_query = ("SELECT status as status, " "FORMAT_UTC_USEC(INTEGER(start_time)) as timestamp, " "resource as url FROM " "[dataset.2013_06_appspot_1] WHERE (start_time " "<= INTEGER('1371566954')) AND (start_time >= " - "INTEGER('1371556954')) ORDER BY timestamp desc") + "INTEGER('1371556954')) ORDER BY timestamp desc") expected_select = (expected_query[len('SELECT '):] .split('FROM')[0].strip().split(', ')) expected_from = expected_query[len('SELECT '):].split('FROM')[1] @@ -725,7 +726,7 @@ def test_formatting_duplicate_columns(self): 'negate': False}], 'type': 'INTEGER'}, ], - order_by={'field': 'timestamp', 'direction': 'desc'}) + order_by={'fields': ['timestamp'], 'direction': 'desc'}) expected_query = ("SELECT status as status, " "FORMAT_UTC_USEC(INTEGER(start_time)) as timestamp, " @@ -733,7 +734,7 @@ def test_formatting_duplicate_columns(self): "10) as day, resource as url FROM " "[dataset.2013_06_appspot_1] WHERE " "(start_time <= INTEGER('1371566954')) AND " - "(start_time >= INTEGER('1371556954')) ORDER BY " + "(start_time >= INTEGER('1371556954')) ORDER BY " "timestamp desc") expected_select = (expected_query[len('SELECT '):] .split('FROM')[0].strip().split(', ')) @@ -771,14 +772,14 @@ def test_sec_to_micro_formatting(self): 'negate': False}], 'type': 'INTEGER'}, ], - order_by={'field': 'timestamp', 'direction': 'desc'}) + order_by={'fields': ['timestamp'], 'direction': 'desc'}) expected_query = ("SELECT status as status, " "SEC_TO_TIMESTAMP(INTEGER(start_time*1000000)) as " "timestamp, resource as url FROM " "[dataset.2013_06_appspot_1] WHERE (start_time " "<= INTEGER('1371566954')) AND (start_time >= " - "INTEGER('1371556954')) ORDER BY timestamp desc") + "INTEGER('1371556954')) ORDER BY timestamp desc") expected_select = (expected_query[len('SELECT '):] .split('FROM')[0].strip().split(', ')) expected_from = expected_query[len('SELECT '):].split('FROM')[1] @@ -812,7 +813,7 @@ def test_no_table_or_dataset(self): 'negate': False}], 'type': 'INTEGER'}, ], - order_by={'field': 'timestamp', 'direction': 'desc'}) + order_by={'fields': ['timestamp'], 'direction': 'desc'}) self.assertIsNone(result) @@ -829,11 +830,11 @@ def test_empty_groupings(self): 'resource': {'alias': 'url'} }, groupings=[], - order_by={'field': 'timestamp', 'direction': 'desc'}) + order_by={'fields': ['timestamp'], 'direction': 'desc'}) expected_query = ("SELECT status as status, start_time as timestamp, " "resource as url FROM " - "[dataset.2013_06_appspot_1] ORDER BY " + "[dataset.2013_06_appspot_1] ORDER BY " "timestamp desc") expected_select = (expected_query[len('SELECT '):] .split('FROM')[0].strip().split(', ')) @@ -844,7 +845,6 @@ def test_empty_groupings(self): six.assertCountEqual(self, expected_select, result_select) six.assertCountEqual(self, expected_from, result_from) - def test_multi_tables(self): """Ensure that render query arguments work with multiple tables.""" from bigquery.query_builder import render_query @@ -868,14 +868,14 @@ def test_multi_tables(self): 'type': 'INTEGER'}, ], groupings=['timestamp', 'status'], - order_by={'field': 'timestamp', 'direction': 'desc'}) + order_by={'fields': ['timestamp'], 'direction': 'desc'}) expected_query = ("SELECT status as status, start_time as timestamp, " "resource as url FROM " "[dataset.2013_06_appspot_1], " "[dataset.2013_07_appspot_1] WHERE (start_time " "<= INTEGER('1371566954')) AND (start_time >= " - "INTEGER('1371556954')) GROUP BY timestamp, status " + "INTEGER('1371556954')) GROUP BY timestamp, status " "ORDER BY timestamp desc") expected_select = (expected_query[len('SELECT '):] .split('FROM')[0].strip().split(', ')) From bfb88ac82ca6e7f79805c076d8a032f9e930be13 Mon Sep 17 00:00:00 2001 From: sagarrakshe Date: Thu, 15 Oct 2015 00:02:53 +0530 Subject: [PATCH 037/146] Render conditions in HAVING clause Render the conditions for the having clause. Moved the 'GROUP BY' clause defination abouve 'HAVING'. --- bigquery/query_builder.py | 63 ++++++++++++++++++++++++++------------- 1 file changed, 43 insertions(+), 20 deletions(-) diff --git a/bigquery/query_builder.py b/bigquery/query_builder.py index 78f69d1..6a1e37d 100644 --- a/bigquery/query_builder.py +++ b/bigquery/query_builder.py @@ -266,24 +266,63 @@ def _render_condition_value(value, field_type): return "%s(%s)" % (field_type, value) -def _render_having(having): +def _render_groupings(fields): + """Render the group by part of a query. + + Args: + fields: a list of fields to group by. + + Returns: + a string that represents the group by part of a query. + """ + + if not fields: + return "" + + return "GROUP BY " + ", ".join(fields) + + +def _render_having(having_conditions): """Render the having part of a query. Args: - having: accepts the having query as it is. + conditions: a list of dictionary items to filter the rows. + Each dict should be formatted as {'field': 'start_time', + 'value': {'value': 1, 'negate': False}, 'comparator': '>', + 'type': 'FLOAT'} which is represetned as + 'start_time > FLOAT('1')' in the query. Returns: a string that represents the having part of a query. """ + if not having_conditions: + return "" - return "HAVING %s" % (having) if having else "" + rendered_conditions = [] + + for condition in having_conditions: + field = condition.get('field') + field_type = condition.get('type') + comparators = condition.get('comparators') + + if None in (field, field_type, comparators) or not comparators: + logging.warn('Invalid condition passed in: %s' % condition) + continue + + rendered_conditions.append( + _render_condition(field, field_type, comparators)) + + if not rendered_conditions: + return "" + + return "HAVING %s" % (" AND ".join(rendered_conditions)) def _render_order(order): """Render the order by part of a query. Args: - order: a dictionary with two keys, field and direction. + order: a dictionary with two keys, fields and direction. Such that the dictionary should be formatted as {'fields': ['TimeStamp'], 'direction':'desc'}. @@ -295,19 +334,3 @@ def _render_order(order): return '' return "ORDER BY %s %s" % (", ".join(order['fields']), order['direction']) - - -def _render_groupings(fields): - """Render the group by part of a query. - - Args: - fields: a list of fields to group by. - - Returns: - a string that represents the group by part of a query. - """ - - if not fields: - return "" - - return "GROUP BY " + ", ".join(fields) From 1249147cb8251bf5f6eb3c509779197e74a6585e Mon Sep 17 00:00:00 2001 From: sagarrakshe Date: Thu, 15 Oct 2015 15:35:35 +0530 Subject: [PATCH 038/146] Modify the BETWEEN condition logic Check the length of the value is 2, when BETWEEN is used. If length is not equal to 2 throw warning. --- bigquery/query_builder.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/bigquery/query_builder.py b/bigquery/query_builder.py index 6a1e37d..1a2c9ea 100644 --- a/bigquery/query_builder.py +++ b/bigquery/query_builder.py @@ -220,11 +220,14 @@ def _render_condition(field, field_type, comparators): value = _render_condition_value(value, field_type) value = "(" + value + ")" elif condition == "BETWEEN": - if isinstance(value, (tuple, list)): + if isinstance(value, (tuple, list, set)) and len(value) == 2: value = ' AND '.join( sorted([_render_condition_value(v, field_type) for v in value]) ) + elif isinstance(value, (tuple, list, set)) and len(value) != 2: + logging.warn('Invalid condition passed in: %s' % condition) + value = "(" + value + ")" else: value = _render_condition_value(value, field_type) From ec8554782d59733a14d4f2308d0c6097a343bbe2 Mon Sep 17 00:00:00 2001 From: sagarrakshe Date: Thu, 15 Oct 2015 15:39:52 +0530 Subject: [PATCH 039/146] Unit Test to check the BETWEEN operator --- bigquery/tests/test_query_builder.py | 36 ++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/bigquery/tests/test_query_builder.py b/bigquery/tests/test_query_builder.py index 7bc22c2..8cfd684 100644 --- a/bigquery/tests/test_query_builder.py +++ b/bigquery/tests/test_query_builder.py @@ -218,6 +218,42 @@ def test_in_comparator(self): "foobar IN (STRING('n'))))" [len('WHERE '):] .split(' AND ')) + def test_between_comparator(self): + """Ensure that render conditions can handle "BETWEEN" condition.""" + from bigquery.query_builder import _render_conditions + + result = _render_conditions([ + { + 'field': 'foobar', + 'type': 'STRING', + 'comparators': [ + {'condition': 'BETWEEN', 'negate': False, + 'value': ['a', 'b']}, + {'condition': 'BETWEEN', 'negate': False, + 'value': {'c', 'd'}}, + {'condition': 'BETWEEN', 'negate': False, + 'value': ('e', 'f')}, + {'condition': 'BETWEEN', 'negate': True, + 'value': ['h', 'i']}, + {'condition': 'BETWEEN', 'negate': True, + 'value': {'j', 'k'}}, + {'condition': 'BETWEEN', 'negate': True, + 'value': ('l', 'm')} + ] + } + ]) + + six.assertCountEqual(self, result[len('WHERE '):].split(' AND '), + "WHERE ((foobar BETWEEN (STRING('a') AND " + "STRING('b')) AND foobar BETWEEN (STRING('c') " + "AND STRING('d')) AND foobar BETWEEN " + "(STRING('e') AND STRING('f'))) AND (NOT foobar " + "BETWEEN (STRING('h') AND STRING('i')) AND NOT " + "foobar BETWEEN (STRING('j') AND STRING('k')) " + "AND NOT foobar BETWEEN (STRING('l') AND " + "STRING('m'))))" [len('WHERE '):] + .split(' AND ')) + class TestRenderOrder(unittest.TestCase): From 215080602c5e1bdde52c4d4143038f628d965348 Mon Sep 17 00:00:00 2001 From: sagarrakshe Date: Thu, 15 Oct 2015 15:41:17 +0530 Subject: [PATCH 040/146] Unit Test for HAVING Clause --- bigquery/tests/test_query_builder.py | 29 ++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/bigquery/tests/test_query_builder.py b/bigquery/tests/test_query_builder.py index 8cfd684..a8e235c 100644 --- a/bigquery/tests/test_query_builder.py +++ b/bigquery/tests/test_query_builder.py @@ -295,6 +295,35 @@ def test_no_fields(self): self.assertEqual(result, "") +class TestRenderHaving(unittest.TestCase): + + def test_mutliple_fields(self): + """Ensure that render having works with multiple fields.""" + from bigquery.query_builder \ + import _render_having + + result = _render_having([ + { + 'field': 'bar', + 'type': 'STRING', + 'comparators': [ + {'condition': '>=', 'negate': False, 'value': '1'} + ] + } + ]) + + self.assertEqual(result, "HAVING (bar >= STRING('1'))") + + def test_no_fields(self): + """Ensure that render having can work with out any arguments.""" + from bigquery.query_builder \ + import _render_having + + result = _render_having(None) + + self.assertEqual(result, "") + + class TestRenderQuery(unittest.TestCase): def test_full_query(self): From c25ead6677bad1e0c142925e7a3d4203b723b865 Mon Sep 17 00:00:00 2001 From: sagarrakshe Date: Thu, 15 Oct 2015 15:41:43 +0530 Subject: [PATCH 041/146] Add Having clause in complete render query --- bigquery/tests/test_query_builder.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/bigquery/tests/test_query_builder.py b/bigquery/tests/test_query_builder.py index a8e235c..7871fa8 100644 --- a/bigquery/tests/test_query_builder.py +++ b/bigquery/tests/test_query_builder.py @@ -363,13 +363,27 @@ def test_full_query(self): } ], groupings=['timestamp', 'status'], + having=[ + { + 'field': 'status', + 'comparators': [ + { + 'condition': '==', + 'value': 1, + 'negate': False + } + ], + 'type': 'INTEGER' + } + ], order_by={'fields': ['timestamp'], 'direction': 'desc'}) expected_query = ("SELECT status as status, start_time as timestamp, " "resource as url FROM [dataset.2013_06_appspot_1]" " WHERE (start_time <= INTEGER('1371566954')) AND " "(start_time >= INTEGER('1371556954')) GROUP BY " - "timestamp, status ORDER BY timestamp desc") + "timestamp, status HAVING (status == INTEGER('1')) " + "ORDER BY timestamp desc") expected_select = (expected_query[len('SELECT '):] .split('FROM')[0].strip().split(', ')) expected_from = expected_query[len('SELECT '):].split('FROM')[1] From cdfbac8887691a4e98c9ebba591d4587c6656c38 Mon Sep 17 00:00:00 2001 From: sagarrakshe Date: Thu, 15 Oct 2015 17:40:39 +0530 Subject: [PATCH 042/146] Unit Test for selecting tables in date range --- bigquery/tests/test_query_builder.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/bigquery/tests/test_query_builder.py b/bigquery/tests/test_query_builder.py index 7871fa8..47e9c97 100644 --- a/bigquery/tests/test_query_builder.py +++ b/bigquery/tests/test_query_builder.py @@ -80,6 +80,22 @@ def test_no_dataset(self): self.assertEqual(result, 'FROM [.man], [.pig], [.bro]') + def test_tables_in_date_range(self): + """Ensure that render sources can handle tables in DATE RANGE.""" + from bigquery.query_builder import _render_sources + + tables = { + 'date_range': True, + 'from_date': '2015-08-23', + 'to_date': '2015-10-10', + 'table': 'pets_' + } + + result = _render_sources('animals', tables) + + self.assertEqual(result, "FROM (TABLE_DATE_RANGE([animals.pets_], " + "TIMESTAMP('2015-08-23'), TIMESTAMP('2015-10-10'))) ") + class TestRenderConditions(unittest.TestCase): From 005ae9408239450634b0e69aa9dd683cdddd7edd Mon Sep 17 00:00:00 2001 From: sagarrakshe Date: Thu, 15 Oct 2015 18:43:29 +0530 Subject: [PATCH 043/146] Minor error handling in _render_sources --- bigquery/query_builder.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/bigquery/query_builder.py b/bigquery/query_builder.py index 1a2c9ea..424c688 100644 --- a/bigquery/query_builder.py +++ b/bigquery/query_builder.py @@ -135,7 +135,7 @@ def _render_sources(dataset, tables): """ if isinstance(tables, dict): - if tables['date_range']: + if tables.get('date_range', False): try: dataset_table = '.'.join([dataset, tables['table']]) return "FROM (TABLE_DATE_RANGE([{}], TIMESTAMP('{}'),"\ @@ -143,7 +143,8 @@ def _render_sources(dataset, tables): tables['from_date'], tables['to_date']) except KeyError as exp: - raise Exception('Missing parameter %s' % (exp)) + logging.warn('Missing parameter %s in selecting sources' % + (exp)) else: return "FROM " + ", ".join( From cfb5d295ae9288f2a5e887aa2c3538726110b65d Mon Sep 17 00:00:00 2001 From: sagarrakshe Date: Thu, 15 Oct 2015 18:46:30 +0530 Subject: [PATCH 044/146] Updated README.md according to new changes --- README.md | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 62f2556..16e6617 100644 --- a/README.md +++ b/README.md @@ -94,13 +94,32 @@ conditions = [ } ] +grouping = ['Timestamp'] + +having = [ + { + 'field': 'Timestamp', + 'type': 'INTEGER', + 'comparators': [ + { + 'condition': '==', + 'negate': False, + 'value': 1399478981 + } + ] + } +] + +order_by ={'fields': ['Timestamp'], 'direction': 'desc'} + query = render_query( 'dataset', ['table'], select=selects, conditions=conditions, - groupings=['Timestamp'], - order_by={'field': 'Timestamp', 'direction': 'desc'} + groupings=grouping, + having=having, + order_by=order_by ) job_id, _ = client.query(query) From 90d664327fb93efd4cb8301017f5e83578b95eab Mon Sep 17 00:00:00 2001 From: sagarrakshe Date: Fri, 16 Oct 2015 16:19:54 +0530 Subject: [PATCH 045/146] Minor bug fix: BETWEEN Operator --- bigquery/query_builder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigquery/query_builder.py b/bigquery/query_builder.py index 424c688..fc6d90c 100644 --- a/bigquery/query_builder.py +++ b/bigquery/query_builder.py @@ -228,7 +228,7 @@ def _render_condition(field, field_type, comparators): ) elif isinstance(value, (tuple, list, set)) and len(value) != 2: logging.warn('Invalid condition passed in: %s' % condition) - value = "(" + value + ")" + else: value = _render_condition_value(value, field_type) From 0d576684f12b02a6be854fdfea1387b12fb98a4a Mon Sep 17 00:00:00 2001 From: Shinichi Ishimura Date: Thu, 26 Nov 2015 01:05:56 +0900 Subject: [PATCH 046/146] Add support for JSON key authorization --- README.md | 5 +++++ bigquery/client.py | 20 ++++++++++++++++---- bigquery/tests/test_client.py | 31 +++++++++++++++++++++++++++++++ 3 files changed, 52 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 62f2556..cdec34f 100644 --- a/README.md +++ b/README.md @@ -26,6 +26,11 @@ key = 'key.pem' client = get_client(project_id, service_account=service_account, private_key_file=key, readonly=True) +# JSON key provided by Google +json_key = 'key.json' + +client = get_client(project_id, json_key_file=json_key, readonly=True) + # Submit an async query. job_id, _results = client.query('SELECT * FROM dataset.my_table LIMIT 1000') diff --git a/bigquery/client.py b/bigquery/client.py index 4ccbbac..adbff82 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -44,8 +44,9 @@ def get_client(project_id, credentials=None, service_account=None, - private_key=None, private_key_file=None, readonly=True, - swallow_results=True): + private_key=None, private_key_file=None, + json_key=None, json_key_file=None, + readonly=True, swallow_results=True): """Return a singleton instance of BigQueryClient. Either AssertionCredentials or a service account and private key combination need to be provided in order to authenticate requests to BigQuery. @@ -60,6 +61,9 @@ def get_client(project_id, credentials=None, service_account=None, private_key_file: the name of the file containing the private key associated with the service account in PKCS12 or PEM format. + json_key: the JSON key associated with the service account + json_key_file: the name of the JSON key file associated with + the service account readonly: bool indicating if BigQuery access is read-only. Has no effect if credentials are provided. swallow_results: If set to false then return the actual response value @@ -70,13 +74,21 @@ def get_client(project_id, credentials=None, service_account=None, """ if not credentials: - assert service_account and (private_key or private_key_file), \ - 'Must provide AssertionCredentials or service account and key' + assert (service_account and (private_key or private_key_file)) or (json_key or json_key_file), \ + 'Must provide AssertionCredentials or service account and P12 key or JSON key' if private_key_file: with open(private_key_file, 'rb') as key_file: private_key = key_file.read() + if json_key_file: + with open(json_key_file, 'rb') as key_file: + json_key = json.loads(key_file.read()) + + if json_key: + service_account = json_key['client_email'] + private_key = json_key['private_key'] + bq_service = _get_bq_service(credentials=credentials, service_account=service_account, private_key=private_key, diff --git a/bigquery/tests/test_client.py b/bigquery/tests/test_client.py index 9169cb8..4e91d90 100644 --- a/bigquery/tests/test_client.py +++ b/bigquery/tests/test_client.py @@ -135,6 +135,37 @@ def test_initialize_key_file(self, mock_open, mock_build, self.assertEquals(mock_bq, bq_client.bigquery) self.assertEquals(project_id, bq_client.project_id) + @mock.patch('bigquery.client._credentials') + @mock.patch('bigquery.client.build') + @mock.patch('__builtin__.open' if six.PY2 else 'builtins.open') + def test_initialize_json_key_file(self, mock_open, mock_build, mock_return_cred): + """Ensure that a BigQueryClient is initialized and returned with + read/write permissions using a JSON key file. + """ + from bigquery.client import BIGQUERY_SCOPE + import json + + mock_cred = mock.Mock() + mock_http = mock.Mock() + mock_cred.return_value.authorize.return_value = mock_http + mock_bq = mock.Mock() + mock_build.return_value = mock_bq + json_key_file = 'key.json' + json_key = {'client_email': 'mail', 'private_key': 'pkey'} + mock_open.return_value.__enter__.return_value.read.return_value = json.dumps(json_key) + project_id = 'project' + mock_return_cred.return_value = mock_cred + + bq_client = client.get_client(project_id, json_key_file=json_key_file, readonly=False) + + mock_open.assert_called_once_with(json_key_file, 'rb') + mock_return_cred.assert_called_once_with() + mock_cred.assert_called_once_with(json_key['client_email'], json_key['private_key'], scope=BIGQUERY_SCOPE) + self.assertTrue(mock_cred.return_value.authorize.called) + mock_build.assert_called_once_with('bigquery', 'v2', http=mock_http) + self.assertEquals(mock_bq, bq_client.bigquery) + self.assertEquals(project_id, bq_client.project_id) + class TestQuery(unittest.TestCase): From cf68d106d826eeca4c04821419a92cc5c44f8b9c Mon Sep 17 00:00:00 2001 From: root Date: Sun, 29 Nov 2015 15:40:20 -0500 Subject: [PATCH 047/146] Convert TIMESTAMP to Python float() type properly - closes #72 --- bigquery/client.py | 3 +++ bigquery/tests/test_client.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/bigquery/client.py b/bigquery/client.py index adbff82..d550c4a 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -1146,6 +1146,9 @@ def _transform_row(self, row, schema): elif col_dict['type'] == 'BOOLEAN': row_value = row_value in ('True', 'true', 'TRUE') + + elif col_dict['type'] == 'TIMESTAMP': + row_value = float(row_value) log[col_name] = row_value diff --git a/bigquery/tests/test_client.py b/bigquery/tests/test_client.py index 4e91d90..469e44c 100644 --- a/bigquery/tests/test_client.py +++ b/bigquery/tests/test_client.py @@ -434,7 +434,7 @@ def test_transform_row(self): {'name': 'bar', 'type': 'FLOAT'}, {'name': 'baz', 'type': 'STRING'}, {'name': 'qux', 'type': 'BOOLEAN'}, - {'name': 'timestamp', 'type': 'FLOAT'}] + {'name': 'timestamp', 'type': 'TIMESTAMP'}] row = {'f': [{'v': '42'}, {'v': None}, {'v': 'batman'}, {'v': 'True'}, {'v': '1.371145650319132E9'}]} From a88871b6a64b91b88dbb75d91b8ba679349a977b Mon Sep 17 00:00:00 2001 From: puhitaku Date: Tue, 8 Dec 2015 18:57:27 +0900 Subject: [PATCH 048/146] Fix loading json for Python3 --- bigquery/client.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bigquery/client.py b/bigquery/client.py index d550c4a..860b887 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -82,8 +82,8 @@ def get_client(project_id, credentials=None, service_account=None, private_key = key_file.read() if json_key_file: - with open(json_key_file, 'rb') as key_file: - json_key = json.loads(key_file.read()) + with open(json_key_file, 'r') as key_file: + json_key = json.load(key_file) if json_key: service_account = json_key['client_email'] From 198d35fc2bc967e3eaa642c8621e3ba8c4fe657c Mon Sep 17 00:00:00 2001 From: puhitaku Date: Tue, 8 Dec 2015 20:37:15 +0900 Subject: [PATCH 049/146] Change assert command for change --- bigquery/tests/test_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigquery/tests/test_client.py b/bigquery/tests/test_client.py index 469e44c..d8869ee 100644 --- a/bigquery/tests/test_client.py +++ b/bigquery/tests/test_client.py @@ -158,7 +158,7 @@ def test_initialize_json_key_file(self, mock_open, mock_build, mock_return_cred) bq_client = client.get_client(project_id, json_key_file=json_key_file, readonly=False) - mock_open.assert_called_once_with(json_key_file, 'rb') + mock_open.assert_called_once_with(json_key_file, 'r') mock_return_cred.assert_called_once_with() mock_cred.assert_called_once_with(json_key['client_email'], json_key['private_key'], scope=BIGQUERY_SCOPE) self.assertTrue(mock_cred.return_value.authorize.called) From 7c0167e8c98164aa1d70c71a923aba22c4f08aed Mon Sep 17 00:00:00 2001 From: Matteo Danieli Date: Tue, 22 Dec 2015 15:35:00 +0100 Subject: [PATCH 050/146] Take into account limit argument when returning rows --- bigquery/client.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bigquery/client.py b/bigquery/client.py index 860b887..621c67c 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -327,13 +327,13 @@ def get_query_rows(self, job_id, offset=None, limit=None, timeout=0): records = [self._transform_row(row, schema) for row in rows] # Append to records if there are multiple pages for query results - while page_token: + while limit is None and page_token or len(records) < limit: query_reply = self.get_query_results(job_id, offset=offset, limit=limit, page_token=page_token, timeout=timeout) page_token = query_reply.get("pageToken") rows = query_reply.get('rows', []) records += [self._transform_row(row, schema) for row in rows] - return records + return records[:limit] if limit else records def check_dataset(self, dataset_id): """Check to see if a dataset exists. From d454a3d64fb593ce324c2bcbfea4adc4164e3f12 Mon Sep 17 00:00:00 2001 From: Matteo Danieli Date: Tue, 22 Dec 2015 19:51:32 +0100 Subject: [PATCH 051/146] Fix stopping condition --- bigquery/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigquery/client.py b/bigquery/client.py index 621c67c..1ac3cb3 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -327,7 +327,7 @@ def get_query_rows(self, job_id, offset=None, limit=None, timeout=0): records = [self._transform_row(row, schema) for row in rows] # Append to records if there are multiple pages for query results - while limit is None and page_token or len(records) < limit: + while page_token and (not limit or len(records) < limit): query_reply = self.get_query_results(job_id, offset=offset, limit=limit, page_token=page_token, timeout=timeout) page_token = query_reply.get("pageToken") From 117b916fdedacffff9912ed350616ebc50d22d8f Mon Sep 17 00:00:00 2001 From: Tyler Treat Date: Tue, 22 Dec 2015 13:08:17 -0600 Subject: [PATCH 052/146] Bump version to 1.5.0 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 9878d9d..cc13fbc 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ from setuptools import find_packages from setuptools import setup -VERSION = '1.4.1' +VERSION = '1.5.0' setup_args = dict( name='BigQuery-Python', From 26241da38659e94a766309fce8037b6f3cefd1a3 Mon Sep 17 00:00:00 2001 From: Antoine Cezar Date: Mon, 18 Jan 2016 11:10:26 +0100 Subject: [PATCH 053/146] Add an update table method to client Fix #66 --- bigquery/client.py | 42 ++++++++++++++++++++ bigquery/tests/test_client.py | 75 +++++++++++++++++++++++++++++++++++ 2 files changed, 117 insertions(+) diff --git a/bigquery/client.py b/bigquery/client.py index 1ac3cb3..caf5694 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -442,6 +442,48 @@ def create_table(self, dataset, table, schema, expiration_time=None): else: return {} + def update_table(self, dataset, table, schema): + """Update an existing table in the dataset. + + Args: + dataset: the dataset to update the table in. + table: the name of table to update. + schema: table schema dict. + + Returns: + bool indicating if the table was successfully updated or not, + or response from BigQuery if swallow_results is set for False. + """ + + body = { + 'schema': {'fields': schema}, + 'tableReference': { + 'tableId': table, + 'projectId': self.project_id, + 'datasetId': dataset + } + } + + try: + result = self.bigquery.tables().update( + projectId=self.project_id, + datasetId=dataset, + body=body + ).execute() + if self.swallow_results: + return True + else: + return result + + except HttpError as e: + logging.error(('Cannot update table {0}.{1}\n' + 'Http Error: {2}').format(dataset, table, + e.content)) + if self.swallow_results: + return False + else: + return {} + def create_view(self, dataset, view, query): """Create a new view in the dataset. diff --git a/bigquery/tests/test_client.py b/bigquery/tests/test_client.py index d8869ee..3b89878 100644 --- a/bigquery/tests/test_client.py +++ b/bigquery/tests/test_client.py @@ -1588,6 +1588,81 @@ def test_table_create_body_with_expiration_time(self): self.mock_tables.insert.return_value.execute.assert_called_with() +class TestUpdateTable(unittest.TestCase): + + def setUp(self): + self.mock_bq_service = mock.Mock() + self.mock_tables = mock.Mock() + self.mock_bq_service.tables.return_value = self.mock_tables + self.table = 'table' + self.schema = [ + {'name': 'foo', 'type': 'STRING', 'mode': 'nullable'}, + {'name': 'bar', 'type': 'FLOAT', 'mode': 'nullable'} + ] + self.project = 'project' + self.dataset = 'dataset' + self.client = client.BigQueryClient(self.mock_bq_service, self.project) + self.body = { + 'schema': {'fields': self.schema}, + 'tableReference': { + 'tableId': self.table, 'projectId': self.project, + 'datasetId': self.dataset} + } + self.expiration_time = 1437513693000 + + def test_table_update_failed(self): + """Ensure that if updating the table fails, False is returned, + or if swallow_results is False an empty dict is returned.""" + + self.mock_tables.update.return_value.execute.side_effect = ( + HttpError(HttpResponse(404), 'There was an error'.encode('utf8'))) + + actual = self.client.update_table(self.dataset, self.table, + self.schema) + + self.assertFalse(actual) + + self.client.swallow_results = False + + actual = self.client.update_table(self.dataset, self.table, + self.schema) + + self.assertEqual(actual, {}) + + self.client.swallow_results = True + + self.mock_tables.update.assert_called_with( + projectId=self.project, datasetId=self.dataset, body=self.body) + + self.mock_tables.update.return_value.execute.assert_called_with() + + def test_table_update_success(self): + """Ensure that if updating the table succeeds, True is returned, + or if swallow_results is False the actual response is returned.""" + + self.mock_tables.update.return_value.execute.side_effect = [{ + 'status': 'foo'}, {'status': 'bar'}] + + actual = self.client.update_table(self.dataset, self.table, + self.schema) + + self.assertTrue(actual) + + self.client.swallow_results = False + + actual = self.client.update_table(self.dataset, self.table, + self.schema) + + self.assertEqual(actual, {'status': 'bar'}) + + self.client.swallow_results = True + + self.mock_tables.update.assert_called_with( + projectId=self.project, datasetId=self.dataset, body=self.body) + + self.mock_tables.update.return_value.execute.assert_called_with() + + class TestCreateView(unittest.TestCase): def setUp(self): From 5c6afe74bdcc19c01d429e2cf784d6518fc976fe Mon Sep 17 00:00:00 2001 From: Antoine Cezar Date: Mon, 18 Jan 2016 14:00:38 +0100 Subject: [PATCH 054/146] Add an patch table method to client Fix #66 --- bigquery/client.py | 42 ++++++++++++++++++++ bigquery/tests/test_client.py | 75 +++++++++++++++++++++++++++++++++++ 2 files changed, 117 insertions(+) diff --git a/bigquery/client.py b/bigquery/client.py index caf5694..1642bdb 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -484,6 +484,48 @@ def update_table(self, dataset, table, schema): else: return {} + def patch_table(self, dataset, table, schema): + """Patch an existing table in the dataset. + + Args: + dataset: the dataset to patch the table in. + table: the name of table to patch. + schema: table schema dict. + + Returns: + bool indicating if the table was successfully patched or not, + or response from BigQuery if swallow_results is set for False. + """ + + body = { + 'schema': {'fields': schema}, + 'tableReference': { + 'tableId': table, + 'projectId': self.project_id, + 'datasetId': dataset + } + } + + try: + result = self.bigquery.tables().patch( + projectId=self.project_id, + datasetId=dataset, + body=body + ).execute() + if self.swallow_results: + return True + else: + return result + + except HttpError as e: + logging.error(('Cannot patch table {0}.{1}\n' + 'Http Error: {2}').format(dataset, table, + e.content)) + if self.swallow_results: + return False + else: + return {} + def create_view(self, dataset, view, query): """Create a new view in the dataset. diff --git a/bigquery/tests/test_client.py b/bigquery/tests/test_client.py index 3b89878..a0f1440 100644 --- a/bigquery/tests/test_client.py +++ b/bigquery/tests/test_client.py @@ -1663,6 +1663,81 @@ def test_table_update_success(self): self.mock_tables.update.return_value.execute.assert_called_with() +class TestPatchTable(unittest.TestCase): + + def setUp(self): + self.mock_bq_service = mock.Mock() + self.mock_tables = mock.Mock() + self.mock_bq_service.tables.return_value = self.mock_tables + self.table = 'table' + self.schema = [ + {'name': 'foo', 'type': 'STRING', 'mode': 'nullable'}, + {'name': 'bar', 'type': 'FLOAT', 'mode': 'nullable'} + ] + self.project = 'project' + self.dataset = 'dataset' + self.client = client.BigQueryClient(self.mock_bq_service, self.project) + self.body = { + 'schema': {'fields': self.schema}, + 'tableReference': { + 'tableId': self.table, 'projectId': self.project, + 'datasetId': self.dataset} + } + self.expiration_time = 1437513693000 + + def test_table_patch_failed(self): + """Ensure that if patching the table fails, False is returned, + or if swallow_results is False an empty dict is returned.""" + + self.mock_tables.patch.return_value.execute.side_effect = ( + HttpError(HttpResponse(404), 'There was an error'.encode('utf8'))) + + actual = self.client.patch_table(self.dataset, self.table, + self.schema) + + self.assertFalse(actual) + + self.client.swallow_results = False + + actual = self.client.patch_table(self.dataset, self.table, + self.schema) + + self.assertEqual(actual, {}) + + self.client.swallow_results = True + + self.mock_tables.patch.assert_called_with( + projectId=self.project, datasetId=self.dataset, body=self.body) + + self.mock_tables.patch.return_value.execute.assert_called_with() + + def test_table_patch_success(self): + """Ensure that if patching the table succeeds, True is returned, + or if swallow_results is False the actual response is returned.""" + + self.mock_tables.patch.return_value.execute.side_effect = [{ + 'status': 'foo'}, {'status': 'bar'}] + + actual = self.client.patch_table(self.dataset, self.table, + self.schema) + + self.assertTrue(actual) + + self.client.swallow_results = False + + actual = self.client.patch_table(self.dataset, self.table, + self.schema) + + self.assertEqual(actual, {'status': 'bar'}) + + self.client.swallow_results = True + + self.mock_tables.patch.assert_called_with( + projectId=self.project, datasetId=self.dataset, body=self.body) + + self.mock_tables.patch.return_value.execute.assert_called_with() + + class TestCreateView(unittest.TestCase): def setUp(self): From 8f365c6dcc6b7606debfcfb5851592176b8a3c3a Mon Sep 17 00:00:00 2001 From: Suren Shrestha Date: Thu, 21 Jan 2016 12:11:15 +1100 Subject: [PATCH 055/146] added udf support to client.write_table and tests fixed --- bigquery/client.py | 11 ++++++++++- bigquery/tests/test_client.py | 5 +++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/bigquery/client.py b/bigquery/client.py index 1642bdb..99ff257 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -861,6 +861,7 @@ def write_to_table( query, dataset=None, table=None, + external_udf_uris=[], allow_large_results=None, use_query_cache=None, priority=None, @@ -919,6 +920,14 @@ def write_to_table( if write_disposition: configuration['writeDisposition'] = write_disposition + configuration['userDefinedFunctionResources'] = [] + for external_udf_uri in external_udf_uris: + configuration['userDefinedFunctionResources'].append( + { + "resourceUri": external_udf_uri + } + ) + body = { "configuration": { 'query': configuration @@ -1230,7 +1239,7 @@ def _transform_row(self, row, schema): elif col_dict['type'] == 'BOOLEAN': row_value = row_value in ('True', 'true', 'TRUE') - + elif col_dict['type'] == 'TIMESTAMP': row_value = float(row_value) diff --git a/bigquery/tests/test_client.py b/bigquery/tests/test_client.py index a0f1440..7594598 100644 --- a/bigquery/tests/test_client.py +++ b/bigquery/tests/test_client.py @@ -1012,6 +1012,7 @@ def setUp(self): self.project_id = 'project' self.dataset_id = 'dataset' self.table_id = 'table' + self.external_udf_uris = ['gs://bucket/external_udf.js'] self.use_query_cache = False self.priority = "INTERACTIVE" self.client = client.BigQueryClient(self.mock_api, @@ -1032,6 +1033,9 @@ def test_write(self): "tableId": self.table_id }, "query": self.query, + "userDefinedFunctionResources": [{ + "resourceUri": self.external_udf_uris[0] + }], "useQueryCache": self.use_query_cache, "priority": self.priority, } @@ -1042,6 +1046,7 @@ def test_write(self): result = self.client.write_to_table(self.query, self.dataset_id, self.table_id, + external_udf_uris=self.external_udf_uris, use_query_cache=False, priority=self.priority) From e5471965374800cde62f9f5559f349c629479a80 Mon Sep 17 00:00:00 2001 From: Suren Shrestha Date: Thu, 21 Jan 2016 12:24:33 +1100 Subject: [PATCH 056/146] Update README.md to include example for UDF support in write_to_table --- README.md | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/README.md b/README.md index cdec34f..0004a7e 100644 --- a/README.md +++ b/README.md @@ -168,6 +168,34 @@ try: except BigQueryTimeoutException: print "Timeout" +# write to permanent table with UDF in query string +external_udf_uris = ["gs://bigquery-sandbox-udf/url_decode.js"] +query = """SELECT requests, title + FROM + urlDecode( + SELECT + title, sum(requests) AS num_requests + FROM + [fh-bigquery:wikipedia.pagecounts_201504] + WHERE language = 'fr' + GROUP EACH BY title + ) + WHERE title LIKE '%ç%' + ORDER BY requests DESC + LIMIT 100 + """ +job = client.write_to_table( + query, + 'dataset', + 'table' + external_udf_uris=external_udf_uris +) + +try: + job_resource = client.wait_for_job(job, timeout=60) + print job_resource +except BigQueryTimeoutException: + print "Timeout" # write to temporary table job = client.write_to_table('SELECT * FROM dataset.original_table LIMIT 100') @@ -176,6 +204,8 @@ try: print job_resource except BigQueryTimeoutException: print "Timeout" + + ``` # Import data from Google cloud storage From 9ed09690f3fd84afc4a0eed771810439a678aa76 Mon Sep 17 00:00:00 2001 From: Suren Shrestha Date: Thu, 21 Jan 2016 12:31:06 +1100 Subject: [PATCH 057/146] docstring for external_udf_uris in client.write_to_table --- bigquery/client.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/bigquery/client.py b/bigquery/client.py index 99ff257..e3bcf40 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -875,6 +875,11 @@ def write_to_table( query: required BigQuery query string. dataset: optional string id of the dataset table: optional string id of the table + external_udf_uris: optional list of external UDF URIs + (if given, + URIs must be Google Cloud Storage + and have .js extensions + ) allow_large_results: optional boolean use_query_cache: optional boolean priority: optional string From 970546fcc53f2db0cca33ad19dbde46d7c8fba26 Mon Sep 17 00:00:00 2001 From: Suren Shrestha Date: Fri, 22 Jan 2016 09:06:18 +1100 Subject: [PATCH 058/146] Update README.md to include missing comma --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 0004a7e..f5c931f 100644 --- a/README.md +++ b/README.md @@ -187,7 +187,7 @@ query = """SELECT requests, title job = client.write_to_table( query, 'dataset', - 'table' + 'table', external_udf_uris=external_udf_uris ) From 2d3a08b426e3964388d2fff7ee0eaab2ecdb31c7 Mon Sep 17 00:00:00 2001 From: sagarrakshe Date: Tue, 2 Feb 2016 15:37:19 +0530 Subject: [PATCH 059/146] Removed un-neccessary tags file --- .gitignore | 3 --- 1 file changed, 3 deletions(-) diff --git a/.gitignore b/.gitignore index 48f2589..6f21cde 100644 --- a/.gitignore +++ b/.gitignore @@ -35,6 +35,3 @@ nosetests.xml .mr.developer.cfg .project .pydevproject - -# Tags -tags From 12156cac0f063dfebbc9f968490bf8370abb1813 Mon Sep 17 00:00:00 2001 From: sagarrakshe Date: Tue, 2 Feb 2016 17:00:14 +0530 Subject: [PATCH 060/146] Corrected BETWEEN comparator test --- bigquery/tests/test_query_builder.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/bigquery/tests/test_query_builder.py b/bigquery/tests/test_query_builder.py index 47e9c97..df37a3e 100644 --- a/bigquery/tests/test_query_builder.py +++ b/bigquery/tests/test_query_builder.py @@ -260,14 +260,14 @@ def test_between_comparator(self): ]) six.assertCountEqual(self, result[len('WHERE '):].split(' AND '), - "WHERE ((foobar BETWEEN (STRING('a') AND " - "STRING('b')) AND foobar BETWEEN (STRING('c') " - "AND STRING('d')) AND foobar BETWEEN " - "(STRING('e') AND STRING('f'))) AND (NOT foobar " - "BETWEEN (STRING('h') AND STRING('i')) AND NOT " - "foobar BETWEEN (STRING('j') AND STRING('k')) " - "AND NOT foobar BETWEEN (STRING('l') AND " - "STRING('m'))))" [len('WHERE '):] + "WHERE ((foobar BETWEEN STRING('a') AND " + "STRING('b') AND foobar BETWEEN STRING('c') " + "AND STRING('d') AND foobar BETWEEN " + "STRING('e') AND STRING('f')) AND (NOT foobar " + "BETWEEN STRING('h') AND STRING('i') AND NOT " + "foobar BETWEEN STRING('j') AND STRING('k') " + "AND NOT foobar BETWEEN STRING('l') AND " + "STRING('m')))" [len('WHERE '):] .split(' AND ')) From bd7ce788911f36242d36679b4ce40faa67ecc946 Mon Sep 17 00:00:00 2001 From: sagarrakshe Date: Wed, 3 Feb 2016 08:31:34 +0530 Subject: [PATCH 061/146] Remove duplicate methods 'patch_table', 'update_table' --- bigquery/client.py | 84 ---------------------------------------------- 1 file changed, 84 deletions(-) diff --git a/bigquery/client.py b/bigquery/client.py index 4610703..05072e6 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -541,90 +541,6 @@ def create_table(self, dataset, table, schema, expiration_time=None): else: return {} - def update_table(self, dataset, table, schema): - """Update an existing table in the dataset. - - Args: - dataset: the dataset to update the table in. - table: the name of table to update. - schema: table schema dict. - - Returns: - bool indicating if the table was successfully updated or not, - or response from BigQuery if swallow_results is set for False. - """ - - body = { - 'schema': {'fields': schema}, - 'tableReference': { - 'tableId': table, - 'projectId': self.project_id, - 'datasetId': dataset - } - } - - try: - result = self.bigquery.tables().update( - projectId=self.project_id, - datasetId=dataset, - body=body - ).execute() - if self.swallow_results: - return True - else: - return result - - except HttpError as e: - logging.error(('Cannot update table {0}.{1}\n' - 'Http Error: {2}').format(dataset, table, - e.content)) - if self.swallow_results: - return False - else: - return {} - - def patch_table(self, dataset, table, schema): - """Patch an existing table in the dataset. - - Args: - dataset: the dataset to patch the table in. - table: the name of table to patch. - schema: table schema dict. - - Returns: - bool indicating if the table was successfully patched or not, - or response from BigQuery if swallow_results is set for False. - """ - - body = { - 'schema': {'fields': schema}, - 'tableReference': { - 'tableId': table, - 'projectId': self.project_id, - 'datasetId': dataset - } - } - - try: - result = self.bigquery.tables().patch( - projectId=self.project_id, - datasetId=dataset, - body=body - ).execute() - if self.swallow_results: - return True - else: - return result - - except HttpError as e: - logging.error(('Cannot patch table {0}.{1}\n' - 'Http Error: {2}').format(dataset, table, - e.content)) - if self.swallow_results: - return False - else: - return {} - def create_view(self, dataset, view, query): """Create a new view in the dataset. From 02212150777d2e7d2852881e2c0b3c05c874c361 Mon Sep 17 00:00:00 2001 From: sagarrakshe Date: Thu, 4 Feb 2016 17:54:03 +0530 Subject: [PATCH 062/146] Fix: Deleted the un-correct code for patch_table Deleted the un-correct code for patch_table and update_table. --- bigquery/client.py | 63 ++++++++++++++++++---------------------------- 1 file changed, 24 insertions(+), 39 deletions(-) diff --git a/bigquery/client.py b/bigquery/client.py index 05072e6..e3bcf40 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -396,20 +396,17 @@ def get_table(self, dataset, table): return table - - def update_table(self, dataset, table, schema, expiration_time=None): - """Updates information in an existing table. The update method - replaces the entire table resource, whereas the patch method only - replaces fields that are provided in the submitted table resource. + def create_table(self, dataset, table, schema, expiration_time=None): + """Create a new table in the dataset. Args: - dataset: the dataset to update the table in. - table: the name of table to update. - schema: table schema dict. Schema Should have older as well as new fields. + dataset: the dataset to create the table in. + table: the name of table to create. + schema: table schema dict. expiration_time: the expiry time in milliseconds since the epoch. Returns: - bool indicating if the table was successfully updated or not, + bool indicating if the table was successfully created or not, or response from BigQuery if swallow_results is set for False. """ @@ -426,9 +423,8 @@ def update_table(self, dataset, table, schema, expiration_time=None): body['expirationTime'] = expiration_time try: - table = self.bigquery.tables().update( + table = self.bigquery.tables().insert( projectId=self.project_id, - tableId=table, datasetId=dataset, body=body ).execute() @@ -438,7 +434,7 @@ def update_table(self, dataset, table, schema, expiration_time=None): return table except HttpError as e: - logging.error(('Cannot update table {0}.{1}\n' + logging.error(('Cannot create table {0}.{1}\n' 'Http Error: {2}').format(dataset, table, e.content)) if self.swallow_results: @@ -446,16 +442,13 @@ def update_table(self, dataset, table, schema, expiration_time=None): else: return {} - def patch_table(self, dataset, table, schema, expiration_time=None): - """Updates information in an existing dataset. The update method - replaces the entire dataset resource, whereas the patch method only - replaces fields that are provided in the submitted dataset resource. + def update_table(self, dataset, table, schema): + """Update an existing table in the dataset. Args: - dataset: the dataset to patch the table in. - table: the name of table to patch. - schema: table schema dict. Schema Should have older as well as new fields. - expiration_time: the expiry time in milliseconds since the epoch. + dataset: the dataset to update the table in. + table: the name of table to update. + schema: table schema dict. Returns: bool indicating if the table was successfully updated or not, @@ -471,23 +464,19 @@ def patch_table(self, dataset, table, schema, expiration_time=None): } } - if expiration_time is not None: - body['expirationTime'] = expiration_time - try: - table = self.bigquery.tables().patch( + result = self.bigquery.tables().update( projectId=self.project_id, - tableId=table, datasetId=dataset, body=body ).execute() if self.swallow_results: return True else: - return table + return result except HttpError as e: - logging.error(('Cannot patch table {0}.{1}\n' + logging.error(('Cannot update table {0}.{1}\n' 'Http Error: {2}').format(dataset, table, e.content)) if self.swallow_results: @@ -495,17 +484,16 @@ def patch_table(self, dataset, table, schema, expiration_time=None): else: return {} - def create_table(self, dataset, table, schema, expiration_time=None): - """Create a new table in the dataset. + def patch_table(self, dataset, table, schema): + """Patch an existing table in the dataset. Args: - dataset: the dataset to create the table in. - table: the name of table to create. + dataset: the dataset to patch the table in. + table: the name of table to patch. schema: table schema dict. - expiration_time: the expiry time in milliseconds since the epoch. Returns: - bool indicating if the table was successfully created or not, + bool indicating if the table was successfully patched or not, or response from BigQuery if swallow_results is set for False. """ @@ -518,11 +506,8 @@ def create_table(self, dataset, table, schema, expiration_time=None): } } - if expiration_time is not None: - body['expirationTime'] = expiration_time - try: - table = self.bigquery.tables().insert( + result = self.bigquery.tables().patch( projectId=self.project_id, datasetId=dataset, body=body @@ -530,10 +515,10 @@ def create_table(self, dataset, table, schema, expiration_time=None): if self.swallow_results: return True else: - return table + return result except HttpError as e: - logging.error(('Cannot create table {0}.{1}\n' + logging.error(('Cannot patch table {0}.{1}\n' 'Http Error: {2}').format(dataset, table, e.content)) if self.swallow_results: From d45009a074f3cedbec8d8ff77d27b99e56917aef Mon Sep 17 00:00:00 2001 From: Andrew Gardner Date: Thu, 11 Feb 2016 15:20:59 +0000 Subject: [PATCH 063/146] Add service_url parameter to get_client and _get_bq_service --- bigquery/client.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/bigquery/client.py b/bigquery/client.py index e3bcf40..5db2630 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -8,7 +8,7 @@ import httplib2 import six -from apiclient.discovery import build +from apiclient.discovery import build, DISCOVERY_URI from apiclient.errors import HttpError from bigquery.errors import (BigQueryTimeoutException, JobExecutingException, @@ -43,7 +43,8 @@ JOB_DESTINATION_FORMAT_CSV = JOB_FORMAT_CSV -def get_client(project_id, credentials=None, service_account=None, +def get_client(project_id, credentials=None, + service_url=None, service_account=None, private_key=None, private_key_file=None, json_key=None, json_key_file=None, readonly=True, swallow_results=True): @@ -55,6 +56,12 @@ def get_client(project_id, credentials=None, service_account=None, project_id: the BigQuery project id. credentials: an AssertionCredentials instance to authenticate requests to BigQuery. + service_url: a URI string template pointing to the location of + Google's API discovery service. Requires two parameters + {api} and {apiVersion} that when filled in produce an + absolute URI to the discovery document for that service. + If not set then the default googleapiclient disovery URI + is used. service_account: the Google API service account name. private_key: the private key associated with the service account in PKCS12 or PEM format. @@ -77,6 +84,9 @@ def get_client(project_id, credentials=None, service_account=None, assert (service_account and (private_key or private_key_file)) or (json_key or json_key_file), \ 'Must provide AssertionCredentials or service account and P12 key or JSON key' + if service_url is None: + service_url = DISCOVERY_URI + if private_key_file: with open(private_key_file, 'rb') as key_file: private_key = key_file.read() @@ -90,6 +100,7 @@ def get_client(project_id, credentials=None, service_account=None, private_key = json_key['private_key'] bq_service = _get_bq_service(credentials=credentials, + service_url=service_url, service_account=service_account, private_key=private_key, readonly=readonly) @@ -97,7 +108,7 @@ def get_client(project_id, credentials=None, service_account=None, return BigQueryClient(bq_service, project_id, swallow_results) -def _get_bq_service(credentials=None, service_account=None, private_key=None, +def _get_bq_service(credentials=None, service_url=None, service_account=None, private_key=None, readonly=True): """Construct an authorized BigQuery service object.""" @@ -110,7 +121,7 @@ def _get_bq_service(credentials=None, service_account=None, private_key=None, http = httplib2.Http() http = credentials.authorize(http) - service = build('bigquery', 'v2', http=http) + service = build('bigquery', 'v2', http=http, discoveryServiceUrl=service_url) return service From 12231dee45079029199fac813d436c96404cca8e Mon Sep 17 00:00:00 2001 From: Andrew Gardner Date: Thu, 11 Feb 2016 17:18:21 +0000 Subject: [PATCH 064/146] Modify test_client tests to take service_url parameter into acccount --- bigquery/tests/test_client.py | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/bigquery/tests/test_client.py b/bigquery/tests/test_client.py index 7594598..f7050c6 100644 --- a/bigquery/tests/test_client.py +++ b/bigquery/tests/test_client.py @@ -50,6 +50,7 @@ def test_initialize_readonly(self, mock_build, mock_return_cred): mock_cred = mock.Mock() mock_http = mock.Mock() + mock_service_url = mock.Mock() mock_cred.return_value.authorize.return_value = mock_http mock_bq = mock.Mock() mock_build.return_value = mock_bq @@ -59,14 +60,16 @@ def test_initialize_readonly(self, mock_build, mock_return_cred): mock_return_cred.return_value = mock_cred bq_client = client.get_client( - project_id, service_account=service_account, private_key=key, + project_id, service_url=mock_service_url, + service_account=service_account, private_key=key, readonly=True) mock_return_cred.assert_called_once_with() mock_cred.assert_called_once_with(service_account, key, scope=BIGQUERY_SCOPE_READ_ONLY) self.assertTrue(mock_cred.return_value.authorize.called) - mock_build.assert_called_once_with('bigquery', 'v2', http=mock_http) + mock_build.assert_called_once_with('bigquery', 'v2', http=mock_http, + discoveryServiceUrl=mock_service_url) self.assertEquals(mock_bq, bq_client.bigquery) self.assertEquals(project_id, bq_client.project_id) @@ -80,6 +83,7 @@ def test_initialize_read_write(self, mock_build, mock_return_cred): mock_cred = mock.Mock() mock_http = mock.Mock() + mock_service_url = mock.Mock() mock_cred.return_value.authorize.return_value = mock_http mock_bq = mock.Mock() mock_build.return_value = mock_bq @@ -89,14 +93,16 @@ def test_initialize_read_write(self, mock_build, mock_return_cred): mock_return_cred.return_value = mock_cred bq_client = client.get_client( - project_id, service_account=service_account, private_key=key, + project_id, service_url=mock_service_url, + service_account=service_account, private_key=key, readonly=False) mock_return_cred.assert_called_once_with() mock_cred.assert_called_once_with(service_account, key, scope=BIGQUERY_SCOPE) self.assertTrue(mock_cred.return_value.authorize.called) - mock_build.assert_called_once_with('bigquery', 'v2', http=mock_http) + mock_build.assert_called_once_with('bigquery', 'v2', http=mock_http, + discoveryServiceUrl=mock_service_url) self.assertEquals(mock_bq, bq_client.bigquery) self.assertEquals(project_id, bq_client.project_id) @@ -112,6 +118,7 @@ def test_initialize_key_file(self, mock_open, mock_build, mock_cred = mock.Mock() mock_http = mock.Mock() + mock_service_url = mock.Mock() mock_cred.return_value.authorize.return_value = mock_http mock_bq = mock.Mock() mock_build.return_value = mock_bq @@ -123,7 +130,8 @@ def test_initialize_key_file(self, mock_open, mock_build, mock_return_cred.return_value = mock_cred bq_client = client.get_client( - project_id, service_account=service_account, + project_id, service_url=mock_service_url, + service_account=service_account, private_key_file=key_file, readonly=False) mock_open.assert_called_once_with(key_file, 'rb') @@ -131,7 +139,8 @@ def test_initialize_key_file(self, mock_open, mock_build, mock_cred.assert_called_once_with(service_account, key, scope=BIGQUERY_SCOPE) self.assertTrue(mock_cred.return_value.authorize.called) - mock_build.assert_called_once_with('bigquery', 'v2', http=mock_http) + mock_build.assert_called_once_with('bigquery', 'v2', http=mock_http, + discoveryServiceUrl=mock_service_url) self.assertEquals(mock_bq, bq_client.bigquery) self.assertEquals(project_id, bq_client.project_id) @@ -147,6 +156,7 @@ def test_initialize_json_key_file(self, mock_open, mock_build, mock_return_cred) mock_cred = mock.Mock() mock_http = mock.Mock() + mock_service_url = mock.Mock() mock_cred.return_value.authorize.return_value = mock_http mock_bq = mock.Mock() mock_build.return_value = mock_bq @@ -156,13 +166,14 @@ def test_initialize_json_key_file(self, mock_open, mock_build, mock_return_cred) project_id = 'project' mock_return_cred.return_value = mock_cred - bq_client = client.get_client(project_id, json_key_file=json_key_file, readonly=False) + bq_client = client.get_client( + project_id, service_url=mock_service_url, json_key_file=json_key_file, readonly=False) mock_open.assert_called_once_with(json_key_file, 'r') mock_return_cred.assert_called_once_with() mock_cred.assert_called_once_with(json_key['client_email'], json_key['private_key'], scope=BIGQUERY_SCOPE) self.assertTrue(mock_cred.return_value.authorize.called) - mock_build.assert_called_once_with('bigquery', 'v2', http=mock_http) + mock_build.assert_called_once_with('bigquery', 'v2', http=mock_http, discoveryServiceUrl=mock_service_url) self.assertEquals(mock_bq, bq_client.bigquery) self.assertEquals(project_id, bq_client.project_id) From 3f90480aae5845007c9507ae0d7b194b4fde0225 Mon Sep 17 00:00:00 2001 From: Tyler Treat Date: Tue, 15 Mar 2016 09:14:54 -0500 Subject: [PATCH 065/146] Bump version to 1.6.0 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index cc13fbc..b0c737b 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ from setuptools import find_packages from setuptools import setup -VERSION = '1.5.0' +VERSION = '1.6.0' setup_args = dict( name='BigQuery-Python', From 117af090b73cb3a8df66107e3c5c4dd4d86f85fc Mon Sep 17 00:00:00 2001 From: nickstanisha Date: Sat, 9 Apr 2016 14:14:21 -0400 Subject: [PATCH 066/146] Changed all docstrings to NumPy Style Python Docstrings --- bigquery/__init__.py | 3 + bigquery/client.py | 987 +++++++++++++++++++++++-------------- bigquery/query_builder.py | 233 +++++---- bigquery/schema_builder.py | 66 ++- 4 files changed, 791 insertions(+), 498 deletions(-) diff --git a/bigquery/__init__.py b/bigquery/__init__.py index ef22544..086be47 100644 --- a/bigquery/__init__.py +++ b/bigquery/__init__.py @@ -1,4 +1,7 @@ from __future__ import absolute_import + +__version__ = '1.6.0' + from .client import get_client from .client import ( BIGQUERY_SCOPE, diff --git a/bigquery/client.py b/bigquery/client.py index 5db2630..33e8275 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -52,32 +52,41 @@ def get_client(project_id, credentials=None, AssertionCredentials or a service account and private key combination need to be provided in order to authenticate requests to BigQuery. - Args: - project_id: the BigQuery project id. - credentials: an AssertionCredentials instance to authenticate requests - to BigQuery. - service_url: a URI string template pointing to the location of - Google's API discovery service. Requires two parameters - {api} and {apiVersion} that when filled in produce an - absolute URI to the discovery document for that service. - If not set then the default googleapiclient disovery URI - is used. - service_account: the Google API service account name. - private_key: the private key associated with the service account in - PKCS12 or PEM format. - private_key_file: the name of the file containing the private key - associated with the service account in PKCS12 or PEM - format. - json_key: the JSON key associated with the service account - json_key_file: the name of the JSON key file associated with - the service account - readonly: bool indicating if BigQuery access is read-only. Has no - effect if credentials are provided. - swallow_results: If set to false then return the actual response value - instead of converting to a boolean. - - Returns: - an instance of BigQueryClient. + Parameters + ---------- + project_id : str + The BigQuery project id + credentials : oauth2client.client.SignedJwtAssertionCredentials, optional + AssertionCredentials instance to authenticate requests to BigQuery (optional, + must provide `service_account` and (`private_key` or `private_key_file`) or + (`json_key` or `json_key_file`) if not included + service_url : str, optional + A URI string template pointing to the location of Google's API discovery + service. Requires two parameters {api} and {apiVersion} that when filled in + produce an absolute URI to the discovery document for that service. If not set + then the default googleapiclient discovery URI is used. See `credentials` + service_account : str, optional + The Google API service account name. See `credentials` + private_key : str, optional + The private key associated with the service account in PKCS12 or PEM format. See `credentials` + private_key_file : str, optional + The name of the file containing the private key associated with the service + account in PKCS12 or PEM format. See `credentials` + json_key : dict, optional + The JSON key associated with the service account. See `credentials` + json_key_file : str, optional + The name of the JSON key file associated with the service account. See `credentials`. + readonly : bool + Bool indicating if BigQuery access is read-only. Has no effect if credentials are + provided. Default True. + swallow_results : bool + If set to False, then return the actual response value instead of converting to + boolean. Default True. + + Returns + ------- + BigQueryClient + An instance of the BigQuery client. """ if not credentials: @@ -151,19 +160,23 @@ def _submit_query_job(self, query_data): For fine-grained control over a query job, see: https://google-api-client-libraries.appspot.com/documentation/bigquery/v2/python/latest/bigquery_v2.jobs.html#query + Parameters + ---------- + query_data + query object as per "configuration.query" in + https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.query - - Args: - query_data: query object as per "configuration.query" in - https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.query - - Returns: + Returns + ------- + tuple job id and query results if query completed. If dry_run is True, job id will be None and results will be empty if the query is valid or a dict containing the response if invalid. - Raises: - BigQueryTimeoutException on timeout + Raises + ------ + BigQueryTimeoutException + On timeout """ logging.debug('Submitting query job: %s' % query_data) @@ -202,15 +215,17 @@ def _insert_job(self, body_object): For more details, see: https://google-api-client-libraries.appspot.com/documentation/bigquery/v2/python/latest/bigquery_v2.jobs.html#insert + Parameters + ---------- + body_object : body object passed to bigquery.jobs().insert() - Args: - body_object: body object passed to bigquery.jobs().insert() - - Returns: - response of the bigquery.jobs().insert().execute() call + Returns + ------- + response of the bigquery.jobs().insert().execute() call - Raises: - BigQueryTimeoutException on timeout + Raises + ------ + BigQueryTimeoutException on timeout """ logging.debug('Submitting job: %s' % body_object) @@ -225,22 +240,31 @@ def _insert_job(self, body_object): def query(self, query, max_results=None, timeout=0, dry_run=False): """Submit a query to BigQuery. - Args: - query: BigQuery query string. - max_results: maximum number of rows to return per page of results. - timeout: how long to wait for the query to complete, in seconds, - before the request times out and returns. - dry_run: if True, the query isn't actually run. A valid query will - return an empty response, while an invalid one will return - the same error message it would if it wasn't a dry run. - - Returns: - job id and query results if query completed. If dry_run is True, - job id will be None and results will be empty if the query is valid - or a dict containing the response if invalid. - - Raises: - BigQueryTimeoutException on timeout + Parameters + ---------- + query : str + BigQuery query string + max_results : int, optional + The maximum number of rows to return per page of results. + timeout : float, optional + How long to wait for the query to complete, in seconds before + the request times out and returns. + dry_run : bool, optional + If True, the query isn't actually run. A valid query will return an + empty response, while an invalid one will return the same error message + it would if it wasn't a dry run. + + Returns + ------- + tuple + (job id, query results) if the query completed. If dry_run is True, job id + will be None and results will be empty if the query is valid or a ``dict`` containing + the response if invalid. + + Raises + ------ + BigQueryTimeoutException + on timeout """ logging.debug('Executing query: %s' % query) @@ -256,10 +280,15 @@ def query(self, query, max_results=None, timeout=0, dry_run=False): def get_query_schema(self, job_id): """Retrieve the schema of a query by job id. - Args: - job_id: The job_id that references a BigQuery query. - Returns: - A list of dictionaries that represent the schema. + Parameters + ---------- + job_id : str + The job_id that references a BigQuery query + + Returns + ------- + list + A ``list`` of ``dict`` objects that represent the schema. """ query_reply = self.get_query_results(job_id, offset=0, limit=0) @@ -273,13 +302,18 @@ def get_query_schema(self, job_id): def get_table_schema(self, dataset, table): """Return the table schema. - Args: - dataset: the dataset containing the table. - table: the table to get the schema for. - - Returns: - A list of dicts that represent the table schema. If the table - doesn't exist, None is returned. + Parameters + ---------- + dataset : str + The dataset containing the `table`. + table : str + The table to get the schema for + + Returns + ------- + list + A ``list`` of ``dict`` objects that represent the table schema. If + the table doesn't exist, None is returned. """ try: @@ -298,12 +332,17 @@ def get_table_schema(self, dataset, table): def check_job(self, job_id): """Return the state and number of results of a query by job id. - Args: - job_id: The job id of the query to check. - - Returns: - Whether or not the query has completed and the total number of rows - included in the query table if it has completed. + Parameters + ---------- + job_id : str + The job id of the query to check. + + Returns + ------- + tuple + (``bool``, ``int``) Whether or not the query has completed and the + total number of rows included in the query table if it has completed + (else 0) """ query_reply = self.get_query_results(job_id, offset=0, limit=0) @@ -317,13 +356,21 @@ def get_query_rows(self, job_id, offset=None, limit=None, timeout=0): to manually page through results, you can use `get_query_results` method directly. - Args: - job_id: The job id that references a BigQuery query. - offset: The offset of the rows to pull from BigQuery. - limit: The number of rows to retrieve from a query table. - timeout: Timeout in seconds. - Returns: - A list of dictionaries that represent table rows. + Parameters + ---------- + job_id : str + The job id that references a BigQuery query. + offset : int, optional + The offset of the rows to pull from BigQuery + limit : int, optional + The number of rows to retrieve from a query table. + timeout : float, optional + Timeout in seconds. + + Returns + ------- + list + A ``list`` of ``dict`` objects that represent table rows. """ # Get query results @@ -348,22 +395,32 @@ def get_query_rows(self, job_id, offset=None, limit=None, timeout=0): def check_dataset(self, dataset_id): """Check to see if a dataset exists. - Args: - dataset: dataset unique id - Returns: - bool indicating if the table exists. + + Parameters + ---------- + dataset_id : str + Dataset unique id + + Returns + ------- + bool + True if dataset at `dataset_id` exists, else Fasle """ dataset = self.get_dataset(dataset_id) return bool(dataset) def get_dataset(self, dataset_id): - """ - Retrieve a dataset if it exists, otherwise return an empty dict. - Args: - dataset: dataset unique id - Returns: - dictionary containing the dataset object if it exists, otherwise - an empty dictionary + """Retrieve a dataset if it exists, otherwise return an empty dict. + + Parameters + ---------- + dataset_id : str + Dataset unique id + + Returns + ------- + dict + Contains dataset object if it exists, else empty """ try: dataset = self.bigquery.datasets().get( @@ -376,27 +433,35 @@ def get_dataset(self, dataset_id): def check_table(self, dataset, table): """Check to see if a table exists. - Args: - dataset: the dataset to check. - table: the name of the table. - - Returns: - bool indicating if the table exists. + Parameters + ---------- + dataset : str + The dataset to check + table : str + The name of the table + + Returns + ------- + bool + True if table exists, else False """ table = self.get_table(dataset, table) return bool(table) def get_table(self, dataset, table): - """ - Retrieve a table if it exists, otherwise return an empty dict. - - Args: - dataset: the dataset that the table is in - table: the name of the table - - Returns: - dictionary containing the table object if it exists, otherwise - an empty dictionary + """ Retrieve a table if it exists, otherwise return an empty dict. + + Parameters + ---------- + dataset : str + The dataset that the table is in + table : str + The name of the table + + Returns + ------- + dict + Containing the table object if it exists, else empty """ try: table = self.bigquery.tables().get( @@ -410,15 +475,22 @@ def get_table(self, dataset, table): def create_table(self, dataset, table, schema, expiration_time=None): """Create a new table in the dataset. - Args: - dataset: the dataset to create the table in. - table: the name of table to create. - schema: table schema dict. - expiration_time: the expiry time in milliseconds since the epoch. - - Returns: - bool indicating if the table was successfully created or not, - or response from BigQuery if swallow_results is set for False. + Parameters + ---------- + dataset : str + The dataset to create the table in + table : str + The name of the table to create + schema : dict + The table schema + expiration_time : float, optional + The expiry time in milliseconds since the epoch. + + Returns + ------- + Union[bool, dict] + If the table was successfully created, or response from BigQuery + if swallow_results is set to False """ body = { @@ -456,14 +528,20 @@ def create_table(self, dataset, table, schema, expiration_time=None): def update_table(self, dataset, table, schema): """Update an existing table in the dataset. - Args: - dataset: the dataset to update the table in. - table: the name of table to update. - schema: table schema dict. - - Returns: + Parameters + ---------- + dataset : str + The dataset to update the table in + table : str + The name of the table to update + schema : dict + Table schema + + Returns + ------- + Union[bool, dict] bool indicating if the table was successfully updated or not, - or response from BigQuery if swallow_results is set for False. + or response from BigQuery if swallow_results is set to False. """ body = { @@ -498,14 +576,20 @@ def update_table(self, dataset, table, schema): def patch_table(self, dataset, table, schema): """Patch an existing table in the dataset. - Args: - dataset: the dataset to patch the table in. - table: the name of table to patch. - schema: table schema dict. - - Returns: - bool indicating if the table was successfully patched or not, - or response from BigQuery if swallow_results is set for False. + Parameters + ---------- + dataset : str + The dataset to patch the table in + table : str + The name of the table to patch + schema : dict + The table schema + + Returns + ------- + Union[bool, dict] + Bool indicating if the table was successfully patched or not, + or response from BigQuery if swallow_results is set to False """ body = { @@ -540,14 +624,20 @@ def patch_table(self, dataset, table, schema): def create_view(self, dataset, view, query): """Create a new view in the dataset. - Args: - dataset: the dataset to create the view in. - view: the name of view to create. - query: a query that BigQuery executes when the view is referenced. - - Returns: + Parameters + ---------- + dataset : str + The dataset to create the view in + view : str + The name of the view to create + query : dict + A query that BigQuery executes when the view is referenced. + + Returns + ------- + Union[bool, dict] bool indicating if the view was successfully created or not, - or response from BigQuery if swallow_results is set for False. + or response from BigQuery if swallow_results is set to False. """ body = { @@ -584,11 +674,16 @@ def create_view(self, dataset, view, query): def delete_table(self, dataset, table): """Delete a table from the dataset. - Args: - dataset: the dataset to delete the table from. - table: the name of the table to delete. + Parameters + ---------- + dataset : str + The dataset to delete the table from. + table : str + The name of the table to delete - Returns: + Returns + ------- + Union[bool, dict] bool indicating if the table was successfully deleted or not, or response from BigQuery if swallow_results is set for False. """ @@ -617,16 +712,21 @@ def get_tables(self, dataset_id, app_id, start_time, end_time): """Retrieve a list of tables that are related to the given app id and are inside the range of start and end times. - Args: - dataset_id: The BigQuery dataset id to consider. - app_id: The appspot name - start_time: The datetime or unix time after which records will be - fetched. - end_time: The datetime or unix time up to which records will be - fetched. - - Returns: - A list of table names. + Parameters + ---------- + dataset_id : str + The BigQuery dataset id to consider. + app_id : str + The appspot name + start_time : Union[datetime, int] + The datetime or unix time after which records will be fetched. + end_time : Union[datetime, int] + The datetime or unix time up to which records will be fetched. + + Returns + ------- + list + A ``list`` of table names. """ if isinstance(start_time, datetime): @@ -660,40 +760,55 @@ def import_data_from_uris( skip_leading_rows=None, ): """ - Imports data into a BigQuery table from cloud storage. - Args: - source_uris: required string or list of strings representing - the uris on cloud storage of the form: - gs://bucket/filename - dataset: required string id of the dataset - table: required string id of the table - job: optional string identifying the job (a unique jobid - is automatically generated if not provided) - schema: optional list representing the bigquery schema - source_format: optional string - (one of the JOB_SOURCE_FORMAT_* constants) - create_disposition: optional string - (one of the JOB_CREATE_* constants) - write_disposition: optional string - (one of the JOB_WRITE_* constants) - encoding: optional string default - (one of the JOB_ENCODING_* constants) - ignore_unknown_values: optional boolean - max_bad_records: optional boolean - allow_jagged_rows: optional boolean for csv only - allow_quoted_newlines: optional boolean for csv only - field_delimiter: optional string for csv only - quote: optional string the quote character for csv only - skip_leading_rows: optional int for csv only - - Optional arguments with value None are determined by - BigQuery as described: - https://developers.google.com/bigquery/docs/reference/v2/jobs - - Returns: - dict, a BigQuery job resource - Raises: - JobInsertException on http/auth failures or error in result + Imports data into a BigQuery table from cloud storage. Optional arguments that are not + specified are determined by BigQuery as described: + https://developers.google.com/bigquery/docs/reference/v2/jobs + + Parameters + ---------- + source_urls : list + A ``list`` of ``str`` objects representing the urls on cloud storage + of the form: gs://bucket/filename + dataset : str + String id of the dataset + table : str + String id of the table + job : str, optional + Identifies the job (a unique job id is automatically generated if not provided) + schema : list, optional + Represents the BigQuery schema + source_format : str, optional + One of the JOB_SOURCE_FORMAT_* constants + create_disposition : str, optional + One of the JOB_CREATE_* constants + write_disposition : str, optional + One of the JOB_WRITE_* constants + encoding : str, optional + One of the JOB_ENCODING_* constants + ignore_unknown_values : bool, optional + Whether or not to ignore unknown values + max_bad_records : int, optional + Maximum number of bad records + allow_jagged_rows : bool, optional + For csv only + allow_quoted_newlines : bool, optional + For csv only + field_delimiter : str, optional + For csv only + quote : str, optional + Quote character for csv only + skip_leading_rows : int, optional + For csv only + + Returns + ------- + dict + A BigQuery job response + + Raises + ------ + JobInsertException + on http/auth failures or error in result """ source_uris = source_uris if isinstance(source_uris, list) \ else [source_uris] @@ -795,30 +910,40 @@ def export_data_to_uris( field_delimiter=None, ): """ - Export data from a BigQuery table to cloud storage. - Args: - destination_uris: required string or list of strings representing - the uris on cloud storage of the form: - gs://bucket/filename - dataset: required string id of the dataset - table: required string id of the table - job: optional string identifying the job (a unique jobid - is automatically generated if not provided) - compression: optional string - (one of the JOB_COMPRESSION_* constants) - destination_format: optional string - (one of the JOB_DESTINATION_FORMAT_* constants) - print_header: optional boolean - field_delimiter: optional string - - Optional arguments with value None are determined by - BigQuery as described: - https://developers.google.com/bigquery/docs/reference/v2/jobs - - Returns: - dict, a BigQuery job resource - Raises: - JobInsertException on http/auth failures or error in result + Export data from a BigQuery table to cloud storage. Optional arguments that are + not specified are determined by BigQuery as described: + https://developers.google.com/bigquery/docs/reference/v2/jobs + + Parameters + ---------- + destination_urls : Union[str, list] + ``str`` or ``list`` of ``str`` objects representing the URIs on + cloud storage of the form: gs://bucket/filename + dataset : str + String id of the dataset + table : str + String id of the table + job : str, optional + String identifying the job (a unique jobid is automatically generated if + not provided) + compression : str, optional + One of the JOB_COMPRESSION_* constants + destination_format : str, optional + One of the JOB_DESTination_FORMAT_* constants + print_header : bool, optional + Whether or not to print the header + field_delimiter : str, optional + Character separating fields in delimited file + + Returns + ------- + dict + A BigQuery job resource + + Raises + ------ + JobInsertException + On http/auth failures or error in result """ destination_uris = destination_uris \ if isinstance(destination_uris, list) else [destination_uris] @@ -881,33 +1006,41 @@ def write_to_table( ): """ Write query result to table. If dataset or table is not provided, - Bigquery will write the result to temporary table. - Args: - query: required BigQuery query string. - dataset: optional string id of the dataset - table: optional string id of the table - external_udf_uris: optional list of external UDF URIs - (if given, - URIs must be Google Cloud Storage - and have .js extensions - ) - allow_large_results: optional boolean - use_query_cache: optional boolean - priority: optional string - (one of the JOB_PRIORITY_* constants) - create_disposition: optional string - (one of the JOB_CREATE_* constants) - write_disposition: optional string - (one of the JOB_WRITE_* constants) - - Optional arguments with value None are determined by - BigQuery as described: - https://developers.google.com/bigquery/docs/reference/v2/jobs - - Returns: - dict, a BigQuery job resource - Raises: - JobInsertException on http/auth failures or error in result + Bigquery will write the result to temporary table. Optional arguments + that are not specified are determined by BigQuery as described: + https://developers.google.com/bigquery/docs/reference/v2/jobs + + Parameters + ---------- + query : str + BigQuery query string + dataset : str, optional + String id of the dataset + table : str, optional + String id of the table + external_udf_uris : list, optional + Contains extternal UDF URIs. If given, URIs must be Google Cloud + Storage and have .js extensions. + allow_large_results : bool, optional + Whether or not to allow large results + use_query_cache : bool, optional + Whether or not to use query cache + priority : str, optional + One of the JOB_PRIORITY_* constants + create_disposition : str, optional + One of the JOB_CREATE_* constants + write_disposition : str, optional + One of the JOB_WRITE_* constants + + Returns + ------- + dict + A BigQuery job resource + + Raises + ------ + JobInsertException + On http/auth failures or error in result """ configuration = { @@ -958,18 +1091,27 @@ def write_to_table( def wait_for_job(self, job, interval=5, timeout=60): """ Waits until the job indicated by job_resource is done or has failed - Args: - job: dict, representing a BigQuery job resource - or str, representing a BigQuery job id - interval: optional float polling interval in seconds, default = 5 - timeout: optional float timeout in seconds, default = 60 - Returns: - dict, final state of the job_resource, as described here: - https://developers.google.com/resources/api-libraries/documentation - /bigquery/v2/python/latest/bigquery_v2.jobs.html#get - Raises: - JobExecutingException on http/auth failures or error in result - BigQueryTimeoutException on timeout + + Parameters + ---------- + job : Union[dict, str] + ``dict`` representing a BigQuery job resource, or a ``str`` representing + the BigQuery job id + interval : float, optional + Polling interval in seconds, default = 5 + timeout : float, optional + Timeout in seconds, default = 60 + + Returns + ------- + dict + Final state of the job resouce, as described here: + https://developers.google.com/resources/api-libraries/documentation/bigquery/v2/python/latest/bigquery_v2.jobs.html#get + + Raises + ------ + Union[JobExecutingException, BigQueryTimeoutException] + On http/auth failures or timeout """ complete = False job_id = str(job if isinstance(job, @@ -998,13 +1140,20 @@ def wait_for_job(self, job, interval=5, timeout=60): def push_rows(self, dataset, table, rows, insert_id_key=None): """Upload rows to BigQuery table. - Args: - dataset: the dataset to upload to. - table: the name of the table to insert rows into. - rows: list of rows to add to table - insert_id_key: key for insertId in row - - Returns: + Parameters + ---------- + dataset : str + The dataset to upload to + table : str + The name of the table to insert rows into + rows : list + A ``list`` of rows (``dict`` objects) to add to the table + insert_id_key : str, optional + Key for insertId in row + + Returns + ------- + Union[bool, dict] bool indicating if insert succeeded or not, or response from BigQuery if swallow_results is set for False. """ @@ -1061,12 +1210,18 @@ def push_rows(self, dataset, table, rows, insert_id_key=None): def _get_all_tables(self, dataset_id, cache=False): """Retrieve a list of all tables for the dataset. - Args: - dataset_id: the dataset to retrieve table names for. - cache: To use cached value or not. Timeout value - equals CACHE_TIMEOUT. - Returns: - a dictionary of app ids mapped to their table names. + Parameters + ---------- + dataset_id : str + The dataset to retrieve table names for + cache : bool, optional + To use cached value or not (default False). Timeout value equals + CACHE_TIMEOUT. + + Returns + ------- + dict + A ``dict`` of app ids mapped to their table names """ do_fetch = True if cache and self.cache.get(dataset_id): @@ -1095,12 +1250,15 @@ def _get_all_tables(self, dataset_id, cache=False): def _parse_table_list_response(self, list_response): """Parse the response received from calling list on tables. - Args: - list_response: The response found by calling list on a BigQuery - table object. + Parameters + ---------- + list_response + The response found by calling list on a BigQuery table object. - Returns: - The dictionary of dates referenced by table names. + Returns + ------- + dict + Dates referenced by table names """ tables = defaultdict(dict) @@ -1131,12 +1289,15 @@ def _parse_table_name(self, table_id): """Parse a table name in the form of appid_YYYY_MM or YYYY_MM_appid and return a tuple consisting of YYYY-MM and the app id. - Args: - table_id: The table id as listed by BigQuery. + Parameters + ---------- + table_id : str + The table id as listed by BigQuery - Returns: - Tuple containing year/month and app id. Returns None, None if the - table id cannot be parsed. + Returns + ------- + tuple + (year/month, app id), or (None, None) if the table id cannot be parsed. """ # Prefix date @@ -1165,13 +1326,19 @@ def _filter_tables_by_time(self, tables, start_time, end_time): """Filter a table dictionary and return table names based on the range of start and end times in unix seconds. - Args: - tables: The dictionary of dates referenced by table names - start_time: The unix time after which records will be fetched. - end_time: The unix time up to which records will be fetched. - - Returns: - A list of table names that are inside the time range. + Parameters + ---------- + tables : dict + Dates referenced by table names + start_time : int + The unix time after which records will be fetched + end_time : int + The unix time up to which records will be fetched + + Returns + ------- + list + Table names that are inside the time range """ return [table_name for (table_name, unix_seconds) in tables.items() @@ -1180,12 +1347,18 @@ def _filter_tables_by_time(self, tables, start_time, end_time): def _in_range(self, start_time, end_time, time): """Indicate if the given time falls inside of the given range. - Args: - start_time: The unix time for the start of the range. - end_time: The unix time for the end of the range. - time: The unix time to check. - - Returns: + Parameters + ---------- + start_time : int + The unix time for the start of the range + end_time : int + The unix time for the end of the range + time : int + The unix time to check + + Returns + ------- + bool True if the time falls within the range, False otherwise. """ @@ -1199,14 +1372,23 @@ def get_query_results(self, job_id, offset=None, limit=None, page_token=None, ti """Execute the query job indicated by the given job id. This is direct mapping to bigquery api https://cloud.google.com/bigquery/docs/reference/v2/jobs/getQueryResults - Args: - job_id: The job id of the query to check. - offset: The index the result set should start at. - limit: The maximum number of results to retrieve. - page_token: Page token, returned by a previous call, to request the next page of results. - timeout: Timeout in seconds. - Returns: - The query reply. + Parameters + ---------- + job_id : str + The job id of the query to check + offset : optional + The index the result set should start at. + limit : int, optional + The maximum number of results to retrieve. + page_token : optional + Page token, returned by previous call, to request the next page of results. + timeout : float, optional + Timeout in seconds + + Returns + ------- + out + The query reply """ job_collection = self.bigquery.jobs() @@ -1221,14 +1403,18 @@ def get_query_results(self, job_id, offset=None, limit=None, page_token=None, ti def _transform_row(self, row, schema): """Apply the given schema to the given BigQuery data row. - Args: - row: A single BigQuery row to transform. - schema: The BigQuery table schema to apply to the row, specifically - the list of field dicts. - - Returns: - Dict containing keys that match the schema and values that match - the row. + Parameters + ---------- + row + A single BigQuery row to transform + schema : list + The BigQuery table schema to apply to the row, specifically + the list of field dicts. + + Returns + ------- + dict + Mapping schema to row """ log = {} @@ -1267,12 +1453,16 @@ def _recurse_on_row(self, col_dict, nested_value): """Apply the schema specified by the given dict to the nested value by recursing on it. - Args: - col_dict: A dict containing the schema to apply to the nested - value. - nested_value: A value nested in a BigQuery row. - Returns: - Dict or list of dicts from applied schema. + Parameters + ---------- + col_dict : dict + The schema to apply to the nested value. + nested_value : A value nested in a BigQuery row. + + Returns + ------- + Union[dict, list] + ``dict`` or ``list`` of ``dict`` objects from applied schema. """ row_value = None @@ -1291,10 +1481,15 @@ def _recurse_on_row(self, col_dict, nested_value): def _generate_hex_for_uris(self, uris): """Given uris, generate and return hex version of it - Args: - uris: A list containing all uris - Returns: - string of hexed uris + Parameters + ---------- + uris : list + Containing all uris + + Returns + ------- + str + Hexed uris """ return sha256((":".join(uris) + str(time())).encode()).hexdigest() @@ -1327,18 +1522,23 @@ def create_dataset(self, dataset_id, friendly_name=None, description=None, access=None): """Create a new BigQuery dataset. - Args: - dataset_id: required unique string identifying the dataset with the - project (the referenceId of the dataset, not the - integer id of the dataset) - friendly_name: optional string providing a human readable name - description: optional longer string providing a description - access: optional object indicating access permissions (see - https://developers.google.com/bigquery/docs/reference/v2/ - datasets#resource) - - Returns: - bool indicating if dataset was created or not, or response + Parameters + ---------- + dataset_id : str + Unique ``str`` identifying the dataset with the project (the referenceID + of the dataset, not the integer id of the dataset) + friendly_name: str, optional + A human readable name + description: str, optional + Longer string providing a description + access : list, optional + Indicating access permissions (see + https://developers.google.com/bigquery/docs/reference/v2/datasets#resource) + + Returns + ------- + Union[bool, dict] + ``bool`` indicating if dataset was created or not, or response from BigQuery if swallow_results is set for False """ try: @@ -1365,8 +1565,10 @@ def create_dataset(self, dataset_id, friendly_name=None, description=None, def get_datasets(self): """List all datasets in the project. - Returns: - a list of dataset resources + Returns + ------- + list + Dataset resources """ try: datasets = self.bigquery.datasets() @@ -1380,17 +1582,24 @@ def get_datasets(self): def delete_dataset(self, dataset_id, delete_contents=False): """Delete a BigQuery dataset. - Args: - dataset_id: required unique string identifying the dataset with the - project (the referenceId of the dataset) - delete_contents: forces deletion of the dataset even when the - dataset contains data - Returns: - bool indicating if the delete was successful or not, or response + Parameters + ---------- + dataset_id : str + Unique ``str`` identifying the datset with the project (the referenceId of the dataset) + delete_contents : bool, optional + If True, forces the deletion of the dataset even when the dataset contains data + (Default = False) + + Returns + ------- + Union[bool, dict[ + ool indicating if the delete was successful or not, or response from BigQuery if swallow_results is set for False - Raises: - HttpError 404 when dataset with dataset_id does not exist + Raises + ------- + HttpError + 404 when dataset with dataset_id does not exist """ try: datasets = self.bigquery.datasets() @@ -1416,15 +1625,21 @@ def update_dataset(self, dataset_id, friendly_name=None, description=None, replaces the entire dataset resource, whereas the patch method only replaces fields that are provided in the submitted dataset resource. - Args: - dataset_id: required unique string identifying the dataset with the - project (the referenceId of the dataset). - friendly_name: an optional descriptive name for the dataset. - description: an optional description of the dataset. - access: an optional object indicating access permissions. - - Returns: - bool indicating if the update was successful or not, or response + Parameters + ---------- + dataset_id : str + Unique ``str`` identifying the dataset with the project (the referencedId of the dataset) + friendly_name : str, optional + An optional descriptive name for the dataset. + description : str, optional + An optional description of the dataset. + access : list, optional + Indicating access permissions + + Returns + ------- + Union[bool, dict] + ``bool`` indicating if the update was successful or not, or response from BigQuery if swallow_results is set for False. """ try: @@ -1453,14 +1668,21 @@ def patch_dataset(self, dataset_id, friendly_name=None, description=None, replaces the entire dataset resource, whereas the patch method only replaces fields that are provided in the submitted dataset resource. - Args: - dataset_id: required unique string identifying the dataset with the - projedct (the referenceId of the dataset). - friendly_name: an optional descriptive name for the dataset. - description: an optional description of the dataset. - access: an optional object indicating access permissions. - Returns: - bool indicating if the patch was successful or not, or response + Parameters + ---------- + dataset_id : str + Unique string idenfitying the dataset with the project (the referenceId of the dataset) + friendly_name : str, optional + An optional descriptive name for the dataset. + description : str, optional + An optional description of the dataset. + access : list, optional + Indicating access permissions. + + Returns + ------- + Union[bool, dict] + ``bool`` indicating if the patch was successful or not, or response from BigQuery if swallow_results is set for False. """ try: @@ -1484,17 +1706,23 @@ def patch_dataset(self, dataset_id, friendly_name=None, description=None, def dataset_resource(self, ref_id, friendly_name=None, description=None, access=None): - """See https://developers.google.com/bigquery/docs/reference/v2/ - datasets#resource - - Args: - ref_id: string dataset id (the reference id, not the integer id) - friendly_name: opt string - description: opt string - access: opt list - - Returns: - a dictionary representing a BigQuery dataset resource + """See https://developers.google.com/bigquery/docs/reference/v2/datasets#resource + + Parameters + ---------- + ref_id : str + Dataset id (the reference id, not the integer id) + friendly_name : str, optional + An optional descriptive name for the dataset + description : str, optional + An optional description for the dataset + access : list, optional + Indicating access permissions + + Returns + ------- + dict + Representing BigQuery dataset resource """ data = { "datasetReference": { @@ -1516,18 +1744,25 @@ def schema_from_record(cls, record): """Given a dict representing a record instance to be inserted into BigQuery, calculate the schema. - Args: - record: dict representing a record to be inserted into big query, - where all keys are strings (representing column names in - the record) and all values are of type int, str, unicode, - float,bool, timestamp or dict. A dict value represents a - record, and must conform to the same restrictions as record - - Returns: - a list representing a BigQuery schema - - Note: results are undefined if a different value types are provided for - a repeated field: E.g. - { rfield: [ { x: 1}, {x: "a string"} ] } # undefined! + Parameters + ---------- + record : dict + representing a record to be inserted into big query, + where all keys are ``str`` objects (representing column names in + the record) and all values are of type ``int``, ``str``, ``unicode``, + ``float``, ``bool``, ``datetime``, or ``dict``. A ``dict`` value represents a + record, and must conform to the same restrictions as record + + Returns + ------- + list + BigQuery schema + + Notes + ----- + Results are undefined if a different value type is provided for a repeated + field: E.g. + + >>> { rfield: [ { x: 1}, {x: "a string"} ] } # undefined! """ return schema_from_record(record) diff --git a/bigquery/query_builder.py b/bigquery/query_builder.py index fc6d90c..cb5e60a 100644 --- a/bigquery/query_builder.py +++ b/bigquery/query_builder.py @@ -6,41 +6,33 @@ def render_query(dataset, tables, select=None, conditions=None, """Render a query that will run over the given tables using the specified parameters. - Args: - dataset: the BigQuery data set to query data from. - tables: the tables in dataset to query. - select: a dictionary of selections for a table. The keys function as - column names and the values function as options to apply to - the select field such as alias and format. For example, - { - 'start_time': { - 'alias': 'StartTime', - 'format': 'INTEGER-FORMAT_UTC_USEC' - } - } - is represented as 'SEC_TO_TIMESTAMP(INTEGER(start_time)) as - StartTime' in a query. Pass None to select all. - conditions: a list of dicts to filter results by. - Each dict should be formatted as the following: - { - 'field': 'foo', - 'type': 'FLOAT', - 'comparators': [ - { - 'condition': '>=', - 'negate': False, - 'value': '1' - } - ] - } - which is rendered as 'foo >= FLOAT('1')' in the query. - groupings: a list of field names to group by. - order_by: a dict with two keys, field and direction. - Such that the dictionary should be formatted as - {'field':'TimeStamp, 'direction':'desc'}. - - Returns: - a query string. + Parameters + ---------- + dataset : str + The BigQuery dataset to query data from + tables : Union[dict, list] + The table in `dataset` to query. + select : dict, optional + The keys function as column names and the values function as options to apply to + the select field such as alias and format. For example, select['start_time'] might + have the form {'alias': 'StartTime', 'format': 'INTEGER-FORMAT_UTC_USEC'}, which would + be represented as 'SEC_TO_TIMESTAMP(INTEGER(start_time)) as StartTime' in a query. Pass + `None` to seoect all. + conditions : list, optional + a ``list`` of ``dict`` objects to filter results by. Each dict should have the keys 'field', + 'type', and 'comparators'. The first two map to strings representing the field (e.g. 'foo') + and type (e.g. 'FLOAT'). 'comparators' maps to another ``dict`` containing the keys 'condition', + 'negate', and 'value'. If 'comparators' = {'condition': '>=', 'negate': False, 'value': 1}, this + example will be rdnered as 'foo >= FLOAT('1')' in the query. + ``list`` of field names to group by + order_by : dict, optional + Keys = {'field', 'direction'}. `dict` should be formatted as {'field':'TimeStamp, 'direction':'desc'} + or similar + + Returns + ------- + str + A rendered query """ if None in (dataset, tables): @@ -61,17 +53,19 @@ def render_query(dataset, tables, select=None, conditions=None, def _render_select(selections): """Render the selection part of a query. - Args: - selections: a dictionary of selections for a table. The - keys function as column names and the values function as - options to apply to the select field such as alias and format. - For example {'start_time': {'alias': 'StartTime', 'format': - 'INTEGER-FORMAT_UTC_USEC'}} is represented as - 'SEC_TO_TIMESTAMP(INTEGER(start_time))' in a query. Pass None to - select all. - - Returns: - a string that represents the select part of a query. + Parameters + ---------- + selections : dict + Selections for a table + + Returns + ------- + str + A string for the "select" part of a query + + See Also + -------- + render_query : Further clarification of `selections` dict formatting """ if not selections: @@ -100,15 +94,20 @@ def _render_select(selections): def _format_select(formatter, name): """Modify the query selector by applying any formatters to it. - Args: - formatter: hyphen-delimited formatter string where formatters are - applied inside-out, e.g. the formatter string - SEC_TO_MICRO-INTEGER-FORMAT_UTC_USEC applied to the selector - foo would result in FORMAT_UTC_USEC(INTEGER(foo*1000000)). - name: the name of the selector to apply formatters to. - - Returns: - formatted selector. + Parameters + ---------- + formatter : str + Hyphen-delimited formatter string where formatters are + applied inside-out, e.g. the formatter string + SEC_TO_MICRO-INTEGER-FORMAT_UTC_USEC applied to the selector + foo would result in FORMAT_UTC_USEC(INTEGER(foo*1000000)). + name: str + The name of the selector to apply formatters to. + + Returns + ------- + str + The formatted selector """ for caster in formatter.split('-'): @@ -126,12 +125,17 @@ def _format_select(formatter, name): def _render_sources(dataset, tables): """Render the source part of a query. - Args: - dataset: the data set to fetch log data from. - tables: the tables to fetch log data from. - - Returns: - a string that represents the from part of a query. + Parameters + ---------- + dataset : str + The data set to fetch log data from. + tables : Union[dict, list] + The tables to fetch log data from + + Returns + ------- + str + A string that represents the "from" part of a query. """ if isinstance(tables, dict): @@ -154,15 +158,19 @@ def _render_sources(dataset, tables): def _render_conditions(conditions): """Render the conditions part of a query. - Args: - conditions: a list of dictionary items to filter a table. - Each dict should be formatted as {'field': 'start_time', - 'value': {'value': 1, 'negate': False}, 'comparator': '>', - 'type': 'FLOAT'} which is represetned as - 'start_time > FLOAT('1')' in the query. + Parameters + ---------- + conditions : list + A list of dictionay items to filter a table. + + Returns + ------- + str + A string that represents the "where" part of a query - Returns: - a string that represents the where part of a query. + See Also + -------- + render_query : Further clarification of `conditions` formatting. """ if not conditions: @@ -191,14 +199,18 @@ def _render_conditions(conditions): def _render_condition(field, field_type, comparators): """Render a single query condition. - Args: - field: the field the condition applies to. - field_type: the data type of the field. - comparator: the logic operator to use. - value_dicts: a list of value dicts of the form - {'value': 'foo', 'negate': False} - - Returns: + Parameters + ---------- + field : str + The field the condition applies to + field_type : str + The data type of the field. + comparators : array_like + An iterable of logic operators to use. + + Returns + ------- + str a condition string. """ @@ -252,12 +264,17 @@ def _render_condition(field, field_type, comparators): def _render_condition_value(value, field_type): """Render a query condition value. - Args: - value: the value of the condition. - field_type: the data type of the field. - - Returns: - a value string. + Parameters + ---------- + value : Union[bool, int, float, str, datetime] + The value of the condition + field_type : str + The data type of the field + + Returns + ------- + str + A value string. """ # BigQuery cannot cast strings to booleans, convert to ints @@ -273,11 +290,15 @@ def _render_condition_value(value, field_type): def _render_groupings(fields): """Render the group by part of a query. - Args: - fields: a list of fields to group by. + Parameters + ---------- + fields : list + A list of fields to group by. - Returns: - a string that represents the group by part of a query. + Returns + ------- + str + A string that represents the "group by" part of a query. """ if not fields: @@ -289,15 +310,19 @@ def _render_groupings(fields): def _render_having(having_conditions): """Render the having part of a query. - Args: - conditions: a list of dictionary items to filter the rows. - Each dict should be formatted as {'field': 'start_time', - 'value': {'value': 1, 'negate': False}, 'comparator': '>', - 'type': 'FLOAT'} which is represetned as - 'start_time > FLOAT('1')' in the query. + Parameters + ---------- + having_conditions : list + A ``list`` of ``dict``s to filter the rows - Returns: - a string that represents the having part of a query. + Returns + ------- + str + A string that represents the "having" part of a query. + + See Also + -------- + render_query : Further clarification of `conditions` formatting. """ if not having_conditions: return "" @@ -325,13 +350,17 @@ def _render_having(having_conditions): def _render_order(order): """Render the order by part of a query. - Args: - order: a dictionary with two keys, fields and direction. - Such that the dictionary should be formatted as - {'fields': ['TimeStamp'], 'direction':'desc'}. - - Returns: - a string that represents the order by part of a query. + Parameters + ---------- + order : dict + A dictionary with two keys, fields and direction. + Such that the dictionary should be formatted as + {'fields': ['TimeStamp'], 'direction':'desc'}. + + Returns + ------- + str + A string that represents the "order by" part of a query. """ if not order or 'fields' not in order or 'direction' not in order: diff --git a/bigquery/schema_builder.py b/bigquery/schema_builder.py index 09084a7..575b390 100644 --- a/bigquery/schema_builder.py +++ b/bigquery/schema_builder.py @@ -23,13 +23,17 @@ def schema_from_record(record, timestamp_parser=default_timestamp_parser): """Generate a BigQuery schema given an example of a record that is to be inserted into BigQuery. - Args: - record: dict - timestamp_parser: unary function taking a string and return non-NIL if - string represents a date - - Returns: - schema: list + Parameters + ---------- + record : dict + Example of a record that is to be inserted into BigQuery + timestamp_parser : function, optional + Unary function taking a ``str`` and returning and ``bool`` that is + True if the string represents a date + + Returns + ------- + Schema: list """ return [describe_field(k, v, timestamp_parser=timestamp_parser) for k, v in list(record.items())] @@ -41,16 +45,25 @@ def describe_field(k, v, timestamp_parser=default_timestamp_parser): element describing that field. Raise errors if invalid value types are provided. - Args: - k: str/unicode, key representing the column - v: str/unicode/int/float/datetime/object - - Returns: - object describing the field - - Raises: - Exception: if invalid value types are provided. - + Parameters + ---------- + k : Union[str, unicode] + Key representing the column + v : Union[str, unicode, int, float, datetime, object] + Value mapped to by `k` + + Returns + ------- + object + Describing the field + + Raises + ------ + Exception + If invalid value types are provided. + + Examples + -------- >>> describe_field("username", "Bob") {"name": "username", "type": "string", "mode": "nullable"} >>> describe_field("users", [{"username": "Bob"}]) @@ -90,9 +103,22 @@ def bigquery_type(o, timestamp_parser=default_timestamp_parser): one of str/unicode/int/float/datetime/record, where record is a dict containing value which have matching BigQuery types. - Returns: - str or None if no matching type could be found - + Parameters + ---------- + o : object + A Python object + time_stamp_parser : function, optional + Unary function taking a ``str`` and returning and ``bool`` that is + True if the string represents a date + + Returns + ------- + Union[str, None] + Name of the corresponding BigQuery type for `o`, or None if no type + could be found + + Examples + -------- >>> bigquery_type("abc") "string" >>> bigquery_type(123) From 35d784dd6074be2b8bf55164dc82b02ebfedfbc9 Mon Sep 17 00:00:00 2001 From: nickstanisha Date: Sat, 9 Apr 2016 17:00:53 -0400 Subject: [PATCH 067/146] Added Sphinx and Numpydoc tools for automated documentation Using numpydoc allows aesthetically pleasing and easy to read documents to be created using Sphinx. You can see real examples of this with Numpy's own documentation (https://docs.scipy.org/doc/numpy-1.10.0/reference/generated/numpy.mean.html). This docs directory contains the basic file structure necessary for creating automated documentation based off NumPy styled Docstrings. --- docs/Makefile | 216 +++++++++++++++++++++++++ docs/conf.py | 294 ++++++++++++++++++++++++++++++++++ docs/index.rst | 29 ++++ docs/make.bat | 263 ++++++++++++++++++++++++++++++ docs/pages/client.rst | 13 ++ docs/pages/query_builder.rst | 7 + docs/pages/schema_builder.rst | 7 + 7 files changed, 829 insertions(+) create mode 100644 docs/Makefile create mode 100644 docs/conf.py create mode 100644 docs/index.rst create mode 100644 docs/make.bat create mode 100644 docs/pages/client.rst create mode 100644 docs/pages/query_builder.rst create mode 100644 docs/pages/schema_builder.rst diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..3f83b08 --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,216 @@ +# Makefile for Sphinx documentation +# + +# You can set these variables from the command line. +SPHINXOPTS = +SPHINXBUILD = sphinx-build +PAPER = +BUILDDIR = _build + +# User-friendly check for sphinx-build +ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) +$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) +endif + +# Internal variables. +PAPEROPT_a4 = -D latex_paper_size=a4 +PAPEROPT_letter = -D latex_paper_size=letter +ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . +# the i18n builder cannot share the environment and doctrees with the others +I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . + +.PHONY: help +help: + @echo "Please use \`make ' where is one of" + @echo " html to make standalone HTML files" + @echo " dirhtml to make HTML files named index.html in directories" + @echo " singlehtml to make a single large HTML file" + @echo " pickle to make pickle files" + @echo " json to make JSON files" + @echo " htmlhelp to make HTML files and a HTML help project" + @echo " qthelp to make HTML files and a qthelp project" + @echo " applehelp to make an Apple Help Book" + @echo " devhelp to make HTML files and a Devhelp project" + @echo " epub to make an epub" + @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" + @echo " latexpdf to make LaTeX files and run them through pdflatex" + @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" + @echo " text to make text files" + @echo " man to make manual pages" + @echo " texinfo to make Texinfo files" + @echo " info to make Texinfo files and run them through makeinfo" + @echo " gettext to make PO message catalogs" + @echo " changes to make an overview of all changed/added/deprecated items" + @echo " xml to make Docutils-native XML files" + @echo " pseudoxml to make pseudoxml-XML files for display purposes" + @echo " linkcheck to check all external links for integrity" + @echo " doctest to run all doctests embedded in the documentation (if enabled)" + @echo " coverage to run coverage check of the documentation (if enabled)" + +.PHONY: clean +clean: + rm -rf $(BUILDDIR)/* + +.PHONY: html +html: + $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." + +.PHONY: dirhtml +dirhtml: + $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml + @echo + @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." + +.PHONY: singlehtml +singlehtml: + $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml + @echo + @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." + +.PHONY: pickle +pickle: + $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle + @echo + @echo "Build finished; now you can process the pickle files." + +.PHONY: json +json: + $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json + @echo + @echo "Build finished; now you can process the JSON files." + +.PHONY: htmlhelp +htmlhelp: + $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp + @echo + @echo "Build finished; now you can run HTML Help Workshop with the" \ + ".hhp project file in $(BUILDDIR)/htmlhelp." + +.PHONY: qthelp +qthelp: + $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp + @echo + @echo "Build finished; now you can run "qcollectiongenerator" with the" \ + ".qhcp project file in $(BUILDDIR)/qthelp, like this:" + @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/BigQuery-Python.qhcp" + @echo "To view the help file:" + @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/BigQuery-Python.qhc" + +.PHONY: applehelp +applehelp: + $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp + @echo + @echo "Build finished. The help book is in $(BUILDDIR)/applehelp." + @echo "N.B. You won't be able to view it unless you put it in" \ + "~/Library/Documentation/Help or install it in your application" \ + "bundle." + +.PHONY: devhelp +devhelp: + $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp + @echo + @echo "Build finished." + @echo "To view the help file:" + @echo "# mkdir -p $$HOME/.local/share/devhelp/BigQuery-Python" + @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/BigQuery-Python" + @echo "# devhelp" + +.PHONY: epub +epub: + $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub + @echo + @echo "Build finished. The epub file is in $(BUILDDIR)/epub." + +.PHONY: latex +latex: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo + @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." + @echo "Run \`make' in that directory to run these through (pdf)latex" \ + "(use \`make latexpdf' here to do that automatically)." + +.PHONY: latexpdf +latexpdf: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo "Running LaTeX files through pdflatex..." + $(MAKE) -C $(BUILDDIR)/latex all-pdf + @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." + +.PHONY: latexpdfja +latexpdfja: + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex + @echo "Running LaTeX files through platex and dvipdfmx..." + $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja + @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." + +.PHONY: text +text: + $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text + @echo + @echo "Build finished. The text files are in $(BUILDDIR)/text." + +.PHONY: man +man: + $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man + @echo + @echo "Build finished. The manual pages are in $(BUILDDIR)/man." + +.PHONY: texinfo +texinfo: + $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo + @echo + @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." + @echo "Run \`make' in that directory to run these through makeinfo" \ + "(use \`make info' here to do that automatically)." + +.PHONY: info +info: + $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo + @echo "Running Texinfo files through makeinfo..." + make -C $(BUILDDIR)/texinfo info + @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." + +.PHONY: gettext +gettext: + $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale + @echo + @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." + +.PHONY: changes +changes: + $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes + @echo + @echo "The overview file is in $(BUILDDIR)/changes." + +.PHONY: linkcheck +linkcheck: + $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck + @echo + @echo "Link check complete; look for any errors in the above output " \ + "or in $(BUILDDIR)/linkcheck/output.txt." + +.PHONY: doctest +doctest: + $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest + @echo "Testing of doctests in the sources finished, look at the " \ + "results in $(BUILDDIR)/doctest/output.txt." + +.PHONY: coverage +coverage: + $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage + @echo "Testing of coverage in the sources finished, look at the " \ + "results in $(BUILDDIR)/coverage/python.txt." + +.PHONY: xml +xml: + $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml + @echo + @echo "Build finished. The XML files are in $(BUILDDIR)/xml." + +.PHONY: pseudoxml +pseudoxml: + $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml + @echo + @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 0000000..a97fc34 --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,294 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# BigQuery-Python documentation build configuration file, created by +# sphinx-quickstart on Sat Apr 9 13:11:15 2016. +# +# This file is execfile()d with the current directory set to its +# containing dir. +# +# Note that not all possible configuration values are present in this +# autogenerated file. +# +# All configuration values have a default; values that are commented out +# serve to show the default. + +import sys +import os + +#numpydoc_show_class_members = False + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +#sys.path.insert(0, os.path.abspath('.')) + +sys.path.insert(0, os.path.abspath('../')) +import bigquery + +# -- General configuration ------------------------------------------------ + +# If your documentation needs a minimal Sphinx version, state it here. +#needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + 'sphinx.ext.autodoc', + 'sphinx.ext.mathjax', + 'numpydoc', + 'sphinx.ext.autosummary' +] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix(es) of source filenames. +# You can specify multiple suffix as a list of string: +# source_suffix = ['.rst', '.md'] +source_suffix = '.rst' + +# The encoding of source files. +#source_encoding = 'utf-8-sig' + +# The master toctree document. +master_doc = 'index' + +# General information about the project. +project = 'BigQuery-Python' +copyright = '2016, Tyler Treat' +author = 'Tyler Treat' + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The short X.Y version. +version = bigquery.__version__ +# The full version, including alpha/beta/rc tags. +release = bigquery.__version__ + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +# +# This is also used if you do content translation via gettext catalogs. +# Usually you set "language" from the command line for these cases. +language = None + +# There are two options for replacing |today|: either, you set today to some +# non-false value, then it is used: +#today = '' +# Else, today_fmt is used as the format for a strftime call. +#today_fmt = '%B %d, %Y' + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +exclude_patterns = ['_build'] + +# The reST default role (used for this markup: `text`) to use for all +# documents. +#default_role = None + +# If true, '()' will be appended to :func: etc. cross-reference text. +#add_function_parentheses = True + +# If true, the current module name will be prepended to all description +# unit titles (such as .. function::). +#add_module_names = True + +# If true, sectionauthor and moduleauthor directives will be shown in the +# output. They are ignored by default. +#show_authors = False + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + +# A list of ignored prefixes for module index sorting. +#modindex_common_prefix = [] + +# If true, keep warnings as "system message" paragraphs in the built documents. +#keep_warnings = False + +# If true, `todo` and `todoList` produce output, else they produce nothing. +todo_include_todos = False + + +# -- Options for HTML output ---------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +html_theme = 'sphinxdoc' + +# Theme options are theme-specific and customize the look and feel of a theme +# further. For a list of options available for each theme, see the +# documentation. +#html_theme_options = {} + +# Add any paths that contain custom themes here, relative to this directory. +#html_theme_path = [] + +# The name for this set of Sphinx documents. If None, it defaults to +# " v documentation". +#html_title = None + +# A shorter title for the navigation bar. Default is the same as html_title. +#html_short_title = None + +# The name of an image file (relative to this directory) to place at the top +# of the sidebar. +#html_logo = None + +# The name of an image file (within the static path) to use as favicon of the +# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 +# pixels large. +#html_favicon = None + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] + +# Add any extra paths that contain custom files (such as robots.txt or +# .htaccess) here, relative to this directory. These files are copied +# directly to the root of the documentation. +#html_extra_path = [] + +# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, +# using the given strftime format. +#html_last_updated_fmt = '%b %d, %Y' + +# If true, SmartyPants will be used to convert quotes and dashes to +# typographically correct entities. +#html_use_smartypants = True + +# Custom sidebar templates, maps document names to template names. +#html_sidebars = {} + +# Additional templates that should be rendered to pages, maps page names to +# template names. +#html_additional_pages = {} + +# If false, no module index is generated. +#html_domain_indices = True + +# If false, no index is generated. +#html_use_index = True + +# If true, the index is split into individual pages for each letter. +#html_split_index = False + +# If true, links to the reST sources are added to the pages. +#html_show_sourcelink = True + +# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. +#html_show_sphinx = True + +# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. +#html_show_copyright = True + +# If true, an OpenSearch description file will be output, and all pages will +# contain a tag referring to it. The value of this option must be the +# base URL from which the finished HTML is served. +#html_use_opensearch = '' + +# This is the file name suffix for HTML files (e.g. ".xhtml"). +#html_file_suffix = None + +# Language to be used for generating the HTML full-text search index. +# Sphinx supports the following languages: +# 'da', 'de', 'en', 'es', 'fi', 'fr', 'h', 'it', 'ja' +# 'nl', 'no', 'pt', 'ro', 'r', 'sv', 'tr' +#html_search_language = 'en' + +# A dictionary with options for the search language support, empty by default. +# Now only 'ja' uses this config value +#html_search_options = {'type': 'default'} + +# The name of a javascript file (relative to the configuration directory) that +# implements a search results scorer. If empty, the default will be used. +#html_search_scorer = 'scorer.js' + +# Output file base name for HTML help builder. +htmlhelp_basename = 'BigQuery-Pythondoc' + +# -- Options for LaTeX output --------------------------------------------- + +latex_elements = { +# The paper size ('letterpaper' or 'a4paper'). +#'papersize': 'letterpaper', + +# The font size ('10pt', '11pt' or '12pt'). +#'pointsize': '10pt', + +# Additional stuff for the LaTeX preamble. +#'preamble': '', + +# Latex figure (float) alignment +#'figure_align': 'htbp', +} + +# Grouping the document tree into LaTeX files. List of tuples +# (source start file, target name, title, +# author, documentclass [howto, manual, or own class]). +latex_documents = [ + (master_doc, 'BigQuery-Python.tex', 'BigQuery-Python Documentation', + 'Tyler Treat', 'manual'), +] + +# The name of an image file (relative to this directory) to place at the top of +# the title page. +#latex_logo = None + +# For "manual" documents, if this is true, then toplevel headings are parts, +# not chapters. +#latex_use_parts = False + +# If true, show page references after internal links. +#latex_show_pagerefs = False + +# If true, show URL addresses after external links. +#latex_show_urls = False + +# Documents to append as an appendix to all manuals. +#latex_appendices = [] + +# If false, no module index is generated. +#latex_domain_indices = True + + +# -- Options for manual page output --------------------------------------- + +# One entry per manual page. List of tuples +# (source start file, name, description, authors, manual section). +man_pages = [ + (master_doc, 'bigquery-python', 'BigQuery-Python Documentation', + [author], 1) +] + +# If true, show URL addresses after external links. +#man_show_urls = False + + +# -- Options for Texinfo output ------------------------------------------- + +# Grouping the document tree into Texinfo files. List of tuples +# (source start file, target name, title, author, +# dir menu entry, description, category) +texinfo_documents = [ + (master_doc, 'BigQuery-Python', 'BigQuery-Python Documentation', + author, 'BigQuery-Python', 'One line description of project.', + 'Miscellaneous'), +] + +# Documents to append as an appendix to all manuals. +#texinfo_appendices = [] + +# If false, no module index is generated. +#texinfo_domain_indices = True + +# How to display URL addresses: 'footnote', 'no', or 'inline'. +#texinfo_show_urls = 'footnote' + +# If true, do not generate a @detailmenu in the "Top" node's menu. +#texinfo_no_detailmenu = False diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 0000000..0708835 --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,29 @@ +.. BigQuery-Python documentation master file, created by + sphinx-quickstart on Sat Apr 9 13:11:15 2016. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Welcome to BigQuery-Python's documentation! +=========================================== + +Content +------- + +.. toctree:: + + pages/client + pages/query_builder + pages/schema_builder + +References +---------- +* `BigQuery-Python Source Code `_ +* `BigQuery API Reference `_ + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` + diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 0000000..2b8c095 --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,263 @@ +@ECHO OFF + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set BUILDDIR=_build +set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% . +set I18NSPHINXOPTS=%SPHINXOPTS% . +if NOT "%PAPER%" == "" ( + set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% + set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% +) + +if "%1" == "" goto help + +if "%1" == "help" ( + :help + echo.Please use `make ^` where ^ is one of + echo. html to make standalone HTML files + echo. dirhtml to make HTML files named index.html in directories + echo. singlehtml to make a single large HTML file + echo. pickle to make pickle files + echo. json to make JSON files + echo. htmlhelp to make HTML files and a HTML help project + echo. qthelp to make HTML files and a qthelp project + echo. devhelp to make HTML files and a Devhelp project + echo. epub to make an epub + echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter + echo. text to make text files + echo. man to make manual pages + echo. texinfo to make Texinfo files + echo. gettext to make PO message catalogs + echo. changes to make an overview over all changed/added/deprecated items + echo. xml to make Docutils-native XML files + echo. pseudoxml to make pseudoxml-XML files for display purposes + echo. linkcheck to check all external links for integrity + echo. doctest to run all doctests embedded in the documentation if enabled + echo. coverage to run coverage check of the documentation if enabled + goto end +) + +if "%1" == "clean" ( + for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i + del /q /s %BUILDDIR%\* + goto end +) + + +REM Check if sphinx-build is available and fallback to Python version if any +%SPHINXBUILD% 1>NUL 2>NUL +if errorlevel 9009 goto sphinx_python +goto sphinx_ok + +:sphinx_python + +set SPHINXBUILD=python -m sphinx.__init__ +%SPHINXBUILD% 2> nul +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 +) + +:sphinx_ok + + +if "%1" == "html" ( + %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The HTML pages are in %BUILDDIR%/html. + goto end +) + +if "%1" == "dirhtml" ( + %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. + goto end +) + +if "%1" == "singlehtml" ( + %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. + goto end +) + +if "%1" == "pickle" ( + %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can process the pickle files. + goto end +) + +if "%1" == "json" ( + %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can process the JSON files. + goto end +) + +if "%1" == "htmlhelp" ( + %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can run HTML Help Workshop with the ^ +.hhp project file in %BUILDDIR%/htmlhelp. + goto end +) + +if "%1" == "qthelp" ( + %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; now you can run "qcollectiongenerator" with the ^ +.qhcp project file in %BUILDDIR%/qthelp, like this: + echo.^> qcollectiongenerator %BUILDDIR%\qthelp\BigQuery-Python.qhcp + echo.To view the help file: + echo.^> assistant -collectionFile %BUILDDIR%\qthelp\BigQuery-Python.ghc + goto end +) + +if "%1" == "devhelp" ( + %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. + goto end +) + +if "%1" == "epub" ( + %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The epub file is in %BUILDDIR%/epub. + goto end +) + +if "%1" == "latex" ( + %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex + if errorlevel 1 exit /b 1 + echo. + echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. + goto end +) + +if "%1" == "latexpdf" ( + %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex + cd %BUILDDIR%/latex + make all-pdf + cd %~dp0 + echo. + echo.Build finished; the PDF files are in %BUILDDIR%/latex. + goto end +) + +if "%1" == "latexpdfja" ( + %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex + cd %BUILDDIR%/latex + make all-pdf-ja + cd %~dp0 + echo. + echo.Build finished; the PDF files are in %BUILDDIR%/latex. + goto end +) + +if "%1" == "text" ( + %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The text files are in %BUILDDIR%/text. + goto end +) + +if "%1" == "man" ( + %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The manual pages are in %BUILDDIR%/man. + goto end +) + +if "%1" == "texinfo" ( + %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. + goto end +) + +if "%1" == "gettext" ( + %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The message catalogs are in %BUILDDIR%/locale. + goto end +) + +if "%1" == "changes" ( + %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes + if errorlevel 1 exit /b 1 + echo. + echo.The overview file is in %BUILDDIR%/changes. + goto end +) + +if "%1" == "linkcheck" ( + %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck + if errorlevel 1 exit /b 1 + echo. + echo.Link check complete; look for any errors in the above output ^ +or in %BUILDDIR%/linkcheck/output.txt. + goto end +) + +if "%1" == "doctest" ( + %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest + if errorlevel 1 exit /b 1 + echo. + echo.Testing of doctests in the sources finished, look at the ^ +results in %BUILDDIR%/doctest/output.txt. + goto end +) + +if "%1" == "coverage" ( + %SPHINXBUILD% -b coverage %ALLSPHINXOPTS% %BUILDDIR%/coverage + if errorlevel 1 exit /b 1 + echo. + echo.Testing of coverage in the sources finished, look at the ^ +results in %BUILDDIR%/coverage/python.txt. + goto end +) + +if "%1" == "xml" ( + %SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The XML files are in %BUILDDIR%/xml. + goto end +) + +if "%1" == "pseudoxml" ( + %SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml + if errorlevel 1 exit /b 1 + echo. + echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml. + goto end +) + +:end diff --git a/docs/pages/client.rst b/docs/pages/client.rst new file mode 100644 index 0000000..f21a864 --- /dev/null +++ b/docs/pages/client.rst @@ -0,0 +1,13 @@ +.. _client: + +client +====== + +.. automodule:: bigquery.client + :members: + +:mod:`BigQueryClient` Class +--------------------------- + +.. autoclass:: bigquery.client.BigQueryClient + :members: diff --git a/docs/pages/query_builder.rst b/docs/pages/query_builder.rst new file mode 100644 index 0000000..4053073 --- /dev/null +++ b/docs/pages/query_builder.rst @@ -0,0 +1,7 @@ +.. _query_builder + +query_builder +============= + +.. automodule:: bigquery.query_builder + :members: diff --git a/docs/pages/schema_builder.rst b/docs/pages/schema_builder.rst new file mode 100644 index 0000000..0d16def --- /dev/null +++ b/docs/pages/schema_builder.rst @@ -0,0 +1,7 @@ +.. _schema_builder + +schema_builder +============== + +.. automodule:: bigquery.schema_builder + :members: From 44e0b78a8dd834484cda555a5f8546afabb36375 Mon Sep 17 00:00:00 2001 From: Tyler Treat Date: Sun, 10 Apr 2016 12:40:25 -0500 Subject: [PATCH 068/146] Add documentation link to readme --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index f4e2f66..8d42cb3 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,8 @@ Simple Python client for interacting with Google BigQuery. This client provides an API for retrieving and inserting BigQuery data by wrapping Google's low-level API client library. It also provides facilities that make it convenient to access data that is tied to an App Engine appspot, such as request logs. +[Documentation](http://tylertreat.github.io/BigQuery-Python/) + # Installation `pip install bigquery-python` From 218b8426bfbfe52a1c00b9d32e8bc4f7b23b31ac Mon Sep 17 00:00:00 2001 From: Takashi Nishibayashi Date: Fri, 15 Apr 2016 03:44:41 +0900 Subject: [PATCH 069/146] Add stream insert options --- bigquery/client.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/bigquery/client.py b/bigquery/client.py index 33e8275..8e00e8b 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -1137,7 +1137,8 @@ def wait_for_job(self, job, interval=5, timeout=60): return job_resource - def push_rows(self, dataset, table, rows, insert_id_key=None): + def push_rows(self, dataset, table, rows, insert_id_key=None, + skip_invalid_rows=None, ignore_unknown_values=None): """Upload rows to BigQuery table. Parameters @@ -1150,6 +1151,10 @@ def push_rows(self, dataset, table, rows, insert_id_key=None): A ``list`` of rows (``dict`` objects) to add to the table insert_id_key : str, optional Key for insertId in row + skip_invalid_rows : bool, optional + Insert all valid rows of a request, even if invalid rows exist. + ignore_unknown_values : bool, optional + Accept rows that contain values that do not match the schema. Returns ------- @@ -1173,6 +1178,12 @@ def push_rows(self, dataset, table, rows, insert_id_key=None): "rows": rows_data } + if skip_invalid_rows is not None: + data['skipInvalidRows'] = skip_invalid_rows + + if ignore_unknown_values is not None: + data['ignoreUnknownValues'] = ignore_unknown_values + try: response = table_data.insertAll( projectId=self.project_id, From bfbfde63702a9bf79579460b2e9fa0806bc0eaa0 Mon Sep 17 00:00:00 2001 From: Jason Bennett Date: Sun, 17 Apr 2016 14:32:52 -0700 Subject: [PATCH 070/146] Upgrade to latest OAuth library(2.0.2) and Google Python library (1.5.0) --- bigquery/client.py | 67 +++++++++++++++++++---------------- bigquery/tests/test_client.py | 64 ++++++++++++++++----------------- 2 files changed, 67 insertions(+), 64 deletions(-) diff --git a/bigquery/client.py b/bigquery/client.py index 33e8275..31f74d3 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -4,19 +4,19 @@ from collections import defaultdict from datetime import datetime, timedelta from hashlib import sha256 +from io import StringIO from time import sleep, time -import httplib2 import six -from apiclient.discovery import build, DISCOVERY_URI -from apiclient.errors import HttpError - from bigquery.errors import (BigQueryTimeoutException, JobExecutingException, JobInsertException, UnfinishedQueryException) from bigquery.schema_builder import schema_from_record +from googleapiclient.discovery import build, DISCOVERY_URI +from googleapiclient.errors import HttpError +from httplib2 import Http -BIGQUERY_SCOPE = 'https://www.googleapis.com/auth/bigquery' -BIGQUERY_SCOPE_READ_ONLY = 'https://www.googleapis.com/auth/bigquery.readonly' +BIGQUERY_SCOPE = ['https://www.googleapis.com/auth/bigquery'] +BIGQUERY_SCOPE_READ_ONLY = ['https://www.googleapis.com/auth/bigquery.readonly'] CACHE_TIMEOUT = timedelta(seconds=30) @@ -90,56 +90,63 @@ def get_client(project_id, credentials=None, """ if not credentials: - assert (service_account and (private_key or private_key_file)) or (json_key or json_key_file), \ + assert (service_account and (private_key or private_key_file)) or ( + json_key or json_key_file), \ 'Must provide AssertionCredentials or service account and P12 key or JSON key' if service_url is None: service_url = DISCOVERY_URI + scope = BIGQUERY_SCOPE_READ_ONLY if readonly else BIGQUERY_SCOPE + if private_key_file: - with open(private_key_file, 'rb') as key_file: - private_key = key_file.read() + credentials = _credentials().from_p12_keyfile(service_account, + private_key_file, + scopes=scope) + + if private_key: + try: + if isinstance(private_key, basestring): + private_key = private_key.decode('utf-8') + except NameError: + # python3 -- private_key is already unicode + pass + credentials = _credentials().from_p12_keyfile_buffer( + service_account, + StringIO(private_key), + scopes=scope) if json_key_file: - with open(json_key_file, 'r') as key_file: - json_key = json.load(key_file) + credentials = _credentials().from_json_keyfile_name(json_key_file, + scopes=scope) if json_key: - service_account = json_key['client_email'] - private_key = json_key['private_key'] + credentials = _credentials().from_json_keyfile_dict(json_key, + scopes=scope) bq_service = _get_bq_service(credentials=credentials, - service_url=service_url, - service_account=service_account, - private_key=private_key, - readonly=readonly) + service_url=service_url) return BigQueryClient(bq_service, project_id, swallow_results) -def _get_bq_service(credentials=None, service_url=None, service_account=None, private_key=None, - readonly=True): +def _get_bq_service(credentials=None, service_url=None): """Construct an authorized BigQuery service object.""" - assert credentials or (service_account and private_key), \ - 'Must provide AssertionCredentials or service account and key' - - if not credentials: - scope = BIGQUERY_SCOPE_READ_ONLY if readonly else BIGQUERY_SCOPE - credentials = _credentials()(service_account, private_key, scope=scope) + assert credentials, 'Must provide ServiceAccountCredentials' - http = httplib2.Http() - http = credentials.authorize(http) - service = build('bigquery', 'v2', http=http, discoveryServiceUrl=service_url) + http = credentials.authorize(Http()) + service = build('bigquery', 'v2', http=http, + discoveryServiceUrl=service_url) return service def _credentials(): """Import and return SignedJwtAssertionCredentials class""" - from oauth2client.client import SignedJwtAssertionCredentials + from oauth2client.service_account import ServiceAccountCredentials - return SignedJwtAssertionCredentials + return ServiceAccountCredentials class BigQueryClient(object): diff --git a/bigquery/tests/test_client.py b/bigquery/tests/test_client.py index f7050c6..d86c4c8 100644 --- a/bigquery/tests/test_client.py +++ b/bigquery/tests/test_client.py @@ -2,18 +2,16 @@ import mock import six -from nose.tools import raises - -from apiclient.errors import HttpError from bigquery import client from bigquery.errors import ( JobInsertException, JobExecutingException, BigQueryTimeoutException ) +from googleapiclient.errors import HttpError +from nose.tools import raises class HttpResponse(object): - def __init__(self, status, reason='There was an error'): """ Args: @@ -24,7 +22,6 @@ def __init__(self, status, reason='There was an error'): class TestGetClient(unittest.TestCase): - def setUp(self): client._bq_client = None @@ -51,7 +48,7 @@ def test_initialize_readonly(self, mock_build, mock_return_cred): mock_cred = mock.Mock() mock_http = mock.Mock() mock_service_url = mock.Mock() - mock_cred.return_value.authorize.return_value = mock_http + mock_cred.from_p12_keyfile_buffer.return_value.authorize.return_value = mock_http mock_bq = mock.Mock() mock_build.return_value = mock_bq key = 'key' @@ -65,9 +62,11 @@ def test_initialize_readonly(self, mock_build, mock_return_cred): readonly=True) mock_return_cred.assert_called_once_with() - mock_cred.assert_called_once_with(service_account, key, - scope=BIGQUERY_SCOPE_READ_ONLY) - self.assertTrue(mock_cred.return_value.authorize.called) + mock_cred.from_p12_keyfile_buffer.assert_called_once_with( + service_account, mock.ANY, + scopes=BIGQUERY_SCOPE_READ_ONLY) + self.assertTrue( + mock_cred.from_p12_keyfile_buffer.return_value.authorize.called) mock_build.assert_called_once_with('bigquery', 'v2', http=mock_http, discoveryServiceUrl=mock_service_url) self.assertEquals(mock_bq, bq_client.bigquery) @@ -84,7 +83,7 @@ def test_initialize_read_write(self, mock_build, mock_return_cred): mock_cred = mock.Mock() mock_http = mock.Mock() mock_service_url = mock.Mock() - mock_cred.return_value.authorize.return_value = mock_http + mock_cred.from_p12_keyfile_buffer.return_value.authorize.return_value = mock_http mock_bq = mock.Mock() mock_build.return_value = mock_bq key = 'key' @@ -98,9 +97,10 @@ def test_initialize_read_write(self, mock_build, mock_return_cred): readonly=False) mock_return_cred.assert_called_once_with() - mock_cred.assert_called_once_with(service_account, key, - scope=BIGQUERY_SCOPE) - self.assertTrue(mock_cred.return_value.authorize.called) + mock_cred.from_p12_keyfile_buffer.assert_called_once_with( + service_account, mock.ANY, scopes=BIGQUERY_SCOPE) + self.assertTrue( + mock_cred.from_p12_keyfile_buffer.return_value.authorize.called) mock_build.assert_called_once_with('bigquery', 'v2', http=mock_http, discoveryServiceUrl=mock_service_url) self.assertEquals(mock_bq, bq_client.bigquery) @@ -108,9 +108,7 @@ def test_initialize_read_write(self, mock_build, mock_return_cred): @mock.patch('bigquery.client._credentials') @mock.patch('bigquery.client.build') - @mock.patch('__builtin__.open' if six.PY2 else 'builtins.open') - def test_initialize_key_file(self, mock_open, mock_build, - mock_return_cred): + def test_initialize_key_file(self, mock_build, mock_return_cred): """Ensure that a BigQueryClient is initialized and returned with read/write permissions using a private key file. """ @@ -119,12 +117,10 @@ def test_initialize_key_file(self, mock_open, mock_build, mock_cred = mock.Mock() mock_http = mock.Mock() mock_service_url = mock.Mock() - mock_cred.return_value.authorize.return_value = mock_http + mock_cred.from_p12_keyfile.return_value.authorize.return_value = mock_http mock_bq = mock.Mock() mock_build.return_value = mock_bq key_file = 'key.pem' - key = 'key' - mock_open.return_value.__enter__.return_value.read.return_value = key service_account = 'account' project_id = 'project' mock_return_cred.return_value = mock_cred @@ -134,11 +130,12 @@ def test_initialize_key_file(self, mock_open, mock_build, service_account=service_account, private_key_file=key_file, readonly=False) - mock_open.assert_called_once_with(key_file, 'rb') mock_return_cred.assert_called_once_with() - mock_cred.assert_called_once_with(service_account, key, - scope=BIGQUERY_SCOPE) - self.assertTrue(mock_cred.return_value.authorize.called) + mock_cred.from_p12_keyfile.assert_called_once_with(service_account, + key_file, + scopes=BIGQUERY_SCOPE) + self.assertTrue( + mock_cred.from_p12_keyfile.return_value.authorize.called) mock_build.assert_called_once_with('bigquery', 'v2', http=mock_http, discoveryServiceUrl=mock_service_url) self.assertEquals(mock_bq, bq_client.bigquery) @@ -146,34 +143,33 @@ def test_initialize_key_file(self, mock_open, mock_build, @mock.patch('bigquery.client._credentials') @mock.patch('bigquery.client.build') - @mock.patch('__builtin__.open' if six.PY2 else 'builtins.open') - def test_initialize_json_key_file(self, mock_open, mock_build, mock_return_cred): + def test_initialize_json_key_file(self, mock_build, mock_return_cred): """Ensure that a BigQueryClient is initialized and returned with read/write permissions using a JSON key file. """ from bigquery.client import BIGQUERY_SCOPE - import json mock_cred = mock.Mock() mock_http = mock.Mock() mock_service_url = mock.Mock() - mock_cred.return_value.authorize.return_value = mock_http + mock_cred.from_json_keyfile_name.return_value.authorize.return_value = mock_http mock_bq = mock.Mock() mock_build.return_value = mock_bq json_key_file = 'key.json' - json_key = {'client_email': 'mail', 'private_key': 'pkey'} - mock_open.return_value.__enter__.return_value.read.return_value = json.dumps(json_key) project_id = 'project' mock_return_cred.return_value = mock_cred bq_client = client.get_client( - project_id, service_url=mock_service_url, json_key_file=json_key_file, readonly=False) + project_id, service_url=mock_service_url, + json_key_file=json_key_file, readonly=False) - mock_open.assert_called_once_with(json_key_file, 'r') mock_return_cred.assert_called_once_with() - mock_cred.assert_called_once_with(json_key['client_email'], json_key['private_key'], scope=BIGQUERY_SCOPE) - self.assertTrue(mock_cred.return_value.authorize.called) - mock_build.assert_called_once_with('bigquery', 'v2', http=mock_http, discoveryServiceUrl=mock_service_url) + mock_cred.from_json_keyfile_name.assert_called_once_with(json_key_file, + scopes=BIGQUERY_SCOPE) + self.assertTrue( + mock_cred.from_json_keyfile_name.return_value.authorize.called) + mock_build.assert_called_once_with('bigquery', 'v2', http=mock_http, + discoveryServiceUrl=mock_service_url) self.assertEquals(mock_bq, bq_client.bigquery) self.assertEquals(project_id, bq_client.project_id) From 5a9290a3d43e904efb248eda82930de918356436 Mon Sep 17 00:00:00 2001 From: Takashi Nishibayashi Date: Mon, 18 Apr 2016 12:12:52 +0900 Subject: [PATCH 071/146] Add tests --- bigquery/tests/test_client.py | 41 +++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/bigquery/tests/test_client.py b/bigquery/tests/test_client.py index f7050c6..3cf84c8 100644 --- a/bigquery/tests/test_client.py +++ b/bigquery/tests/test_client.py @@ -2108,6 +2108,47 @@ def test_push_success(self): self.mock_table_data.insertAll.return_value.execute.assert_has_calls( execute_calls) + def test_request_data_with_options(self): + """Ensure that insertAll body has optional property only when + the optional parameter of push_rows passed. + """ + expected_body = self.data.copy() + + self.client.push_rows( + self.dataset, self.table, self.rows, + insert_id_key='one') + self.mock_table_data.insertAll.assert_called_with( + projectId=self.project, + datasetId=self.dataset, + tableId=self.table, + body=expected_body) + + self.client.push_rows( + self.dataset, self.table, self.rows, + insert_id_key='one', + ignore_unknown_values=False, + skip_invalid_rows=False) + expected_body['ignoreUnknownValues'] = False + expected_body['skipInvalidRows'] = False + self.mock_table_data.insertAll.assert_called_with( + projectId=self.project, + datasetId=self.dataset, + tableId=self.table, + body=expected_body) + + self.client.push_rows( + self.dataset, self.table, self.rows, + insert_id_key='one', + ignore_unknown_values=True, + skip_invalid_rows=True) + expected_body['ignoreUnknownValues'] = True + expected_body['skipInvalidRows'] = True + self.mock_table_data.insertAll.assert_called_with( + projectId=self.project, + datasetId=self.dataset, + tableId=self.table, + body=expected_body) + class TestGetAllTables(unittest.TestCase): From 6436432cc490e835506a85e9fb7cab216a84deca Mon Sep 17 00:00:00 2001 From: Takashi Nishibayashi Date: Fri, 22 Apr 2016 15:41:13 +0900 Subject: [PATCH 072/146] Use package logger instead of root logger --- bigquery/client.py | 50 ++++++++++++++++++++------------------- bigquery/query_builder.py | 16 ++++++------- 2 files changed, 34 insertions(+), 32 deletions(-) diff --git a/bigquery/client.py b/bigquery/client.py index a3c64ec..68d3bb5 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -1,6 +1,6 @@ import calendar import json -import logging +from logging import getLogger from collections import defaultdict from datetime import datetime, timedelta from hashlib import sha256 @@ -42,6 +42,8 @@ JOB_FORMAT_NEWLINE_DELIMITED_JSON JOB_DESTINATION_FORMAT_CSV = JOB_FORMAT_CSV +logger = getLogger(__name__) + def get_client(project_id, credentials=None, service_url=None, service_account=None, @@ -186,7 +188,7 @@ def _submit_query_job(self, query_data): On timeout """ - logging.debug('Submitting query job: %s' % query_data) + logger.debug('Submitting query job: %s' % query_data) job_collection = self.bigquery.jobs() @@ -206,7 +208,7 @@ def _submit_query_job(self, query_data): # raise exceptions if it's not an async query # and job is not completed after timeout if not job_complete and query_data.get("timeoutMs", False): - logging.error('BigQuery job %s timeout' % job_id) + logger.error('BigQuery job %s timeout' % job_id) raise BigQueryTimeoutException() return job_id, [self._transform_row(row, schema) for row in rows] @@ -235,7 +237,7 @@ def _insert_job(self, body_object): BigQueryTimeoutException on timeout """ - logging.debug('Submitting job: %s' % body_object) + logger.debug('Submitting job: %s' % body_object) job_collection = self.bigquery.jobs() @@ -274,7 +276,7 @@ def query(self, query, max_results=None, timeout=0, dry_run=False): on timeout """ - logging.debug('Executing query: %s' % query) + logger.debug('Executing query: %s' % query) query_data = { 'query': query, @@ -301,7 +303,7 @@ def get_query_schema(self, job_id): query_reply = self.get_query_results(job_id, offset=0, limit=0) if not query_reply['jobComplete']: - logging.warning('BigQuery job %s not complete' % job_id) + logger.warning('BigQuery job %s not complete' % job_id) raise UnfinishedQueryException() return query_reply['schema']['fields'] @@ -330,7 +332,7 @@ def get_table_schema(self, dataset, table): datasetId=dataset).execute() except HttpError as e: if int(e.resp['status']) == 404: - logging.warn('Table %s.%s does not exist', dataset, table) + logger.warn('Table %s.%s does not exist', dataset, table) return None raise @@ -383,7 +385,7 @@ def get_query_rows(self, job_id, offset=None, limit=None, timeout=0): # Get query results query_reply = self.get_query_results(job_id, offset=offset, limit=limit, timeout=timeout) if not query_reply['jobComplete']: - logging.warning('BigQuery job %s not complete' % job_id) + logger.warning('BigQuery job %s not complete' % job_id) raise UnfinishedQueryException() schema = query_reply["schema"]["fields"] @@ -524,7 +526,7 @@ def create_table(self, dataset, table, schema, expiration_time=None): return table except HttpError as e: - logging.error(('Cannot create table {0}.{1}\n' + logger.error(('Cannot create table {0}.{1}\n' 'Http Error: {2}').format(dataset, table, e.content)) if self.swallow_results: @@ -572,7 +574,7 @@ def update_table(self, dataset, table, schema): return result except HttpError as e: - logging.error(('Cannot update table {0}.{1}\n' + logger.error(('Cannot update table {0}.{1}\n' 'Http Error: {2}').format(dataset, table, e.content)) if self.swallow_results: @@ -620,7 +622,7 @@ def patch_table(self, dataset, table, schema): return result except HttpError as e: - logging.error(('Cannot patch table {0}.{1}\n' + logger.error(('Cannot patch table {0}.{1}\n' 'Http Error: {2}').format(dataset, table, e.content)) if self.swallow_results: @@ -670,7 +672,7 @@ def create_view(self, dataset, view, query): return view except HttpError as e: - logging.error(('Cannot create view {0}.{1}\n' + logger.error(('Cannot create view {0}.{1}\n' 'Http Error: {2}').format(dataset, view, e.content)) if self.swallow_results: @@ -707,7 +709,7 @@ def delete_table(self, dataset, table): return response except HttpError as e: - logging.error(('Cannot delete table {0}.{1}\n' + logger.error(('Cannot delete table {0}.{1}\n' 'Http Error: {2}').format(dataset, table, e.content)) if self.swallow_results: @@ -900,7 +902,7 @@ def import_data_from_uris( } } - logging.debug("Creating load job %s" % body) + logger.debug("Creating load job %s" % body) job_resource = self._insert_job(body) self._raise_insert_exception_if_error(job_resource) return job_resource @@ -994,7 +996,7 @@ def export_data_to_uris( } } - logging.info("Creating export job %s" % body) + logger.info("Creating export job %s" % body) job_resource = self._insert_job(body) self._raise_insert_exception_if_error(job_resource) return job_resource @@ -1090,7 +1092,7 @@ def write_to_table( } } - logging.info("Creating write to table job %s" % body) + logger.info("Creating write to table job %s" % body) job_resource = self._insert_job(body) self._raise_insert_exception_if_error(job_resource) return job_resource @@ -1139,7 +1141,7 @@ def wait_for_job(self, job, interval=5, timeout=60): # raise exceptions if timeout if not complete: - logging.error('BigQuery job %s timeout' % job_id) + logger.error('BigQuery job %s timeout' % job_id) raise BigQueryTimeoutException() return job_resource @@ -1200,7 +1202,7 @@ def push_rows(self, dataset, table, rows, insert_id_key=None, ).execute() if response.get('insertErrors'): - logging.error('BigQuery insert errors: %s' % response) + logger.error('BigQuery insert errors: %s' % response) if self.swallow_results: return False else: @@ -1212,7 +1214,7 @@ def push_rows(self, dataset, table, rows, insert_id_key=None, return response except HttpError as e: - logging.exception('Problem with BigQuery insertAll') + logger.exception('Problem with BigQuery insertAll') if self.swallow_results: return False else: @@ -1573,7 +1575,7 @@ def create_dataset(self, dataset_id, friendly_name=None, description=None, else: return response except HttpError as e: - logging.error('Cannot create dataset {0}, {1}'.format(dataset_id, + logger.error('Cannot create dataset {0}, {1}'.format(dataset_id, e)) if self.swallow_results: return False @@ -1594,7 +1596,7 @@ def get_datasets(self): result = request.execute() return result.get('datasets', []) except HttpError as e: - logging.error("Cannot list datasets: {0}".format(e)) + logger.error("Cannot list datasets: {0}".format(e)) return None def delete_dataset(self, dataset_id, delete_contents=False): @@ -1630,7 +1632,7 @@ def delete_dataset(self, dataset_id, delete_contents=False): else: return response except HttpError as e: - logging.error('Cannot delete dataset {0}: {1}'.format(dataset_id, + logger.error('Cannot delete dataset {0}: {1}'.format(dataset_id, e)) if self.swallow_results: return False @@ -1673,7 +1675,7 @@ def update_dataset(self, dataset_id, friendly_name=None, description=None, else: return response except HttpError as e: - logging.error('Cannot update dataset {0}: {1}'.format(dataset_id, + logger.error('Cannot update dataset {0}: {1}'.format(dataset_id, e)) if self.swallow_results: return False @@ -1715,7 +1717,7 @@ def patch_dataset(self, dataset_id, friendly_name=None, description=None, else: return response except HttpError as e: - logging.error('Cannot patch dataset {0}: {1}'.format(dataset_id, + logger.error('Cannot patch dataset {0}: {1}'.format(dataset_id, e)) if self.swallow_results: return False diff --git a/bigquery/query_builder.py b/bigquery/query_builder.py index cb5e60a..fb02896 100644 --- a/bigquery/query_builder.py +++ b/bigquery/query_builder.py @@ -1,4 +1,6 @@ -import logging +from logging import getLogger + +logger = getLogger(__name__) def render_query(dataset, tables, select=None, conditions=None, @@ -131,8 +133,7 @@ def _render_sources(dataset, tables): The data set to fetch log data from. tables : Union[dict, list] The tables to fetch log data from - - Returns +Returns ------- str A string that represents the "from" part of a query. @@ -147,8 +148,7 @@ def _render_sources(dataset, tables): tables['from_date'], tables['to_date']) except KeyError as exp: - logging.warn('Missing parameter %s in selecting sources' % - (exp)) + logger.warn('Missing parameter %s in selecting sources' % (exp)) else: return "FROM " + ", ".join( @@ -184,7 +184,7 @@ def _render_conditions(conditions): comparators = condition.get('comparators') if None in (field, field_type, comparators) or not comparators: - logging.warn('Invalid condition passed in: %s' % condition) + logger.warn('Invalid condition passed in: %s' % condition) continue rendered_conditions.append( @@ -239,7 +239,7 @@ def _render_condition(field, field_type, comparators): for v in value]) ) elif isinstance(value, (tuple, list, set)) and len(value) != 2: - logging.warn('Invalid condition passed in: %s' % condition) + logger.warn('Invalid condition passed in: %s' % condition) else: value = _render_condition_value(value, field_type) @@ -335,7 +335,7 @@ def _render_having(having_conditions): comparators = condition.get('comparators') if None in (field, field_type, comparators) or not comparators: - logging.warn('Invalid condition passed in: %s' % condition) + logger.warn('Invalid condition passed in: %s' % condition) continue rendered_conditions.append( From 96789445309a33b4b00ce36c94918c5f7a445922 Mon Sep 17 00:00:00 2001 From: Takashi Nishibayashi Date: Fri, 22 Apr 2016 16:22:21 +0900 Subject: [PATCH 073/146] Revert comment line --- bigquery/query_builder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigquery/query_builder.py b/bigquery/query_builder.py index fb02896..c149eb1 100644 --- a/bigquery/query_builder.py +++ b/bigquery/query_builder.py @@ -133,7 +133,7 @@ def _render_sources(dataset, tables): The data set to fetch log data from. tables : Union[dict, list] The tables to fetch log data from -Returns + Returns ------- str A string that represents the "from" part of a query. From 22d3e5801df74bb6d4182343c0f0d34691844b99 Mon Sep 17 00:00:00 2001 From: Takashi Nishibayashi Date: Fri, 22 Apr 2016 16:24:01 +0900 Subject: [PATCH 074/146] Revart unnecesarry change --- bigquery/query_builder.py | 1 + 1 file changed, 1 insertion(+) diff --git a/bigquery/query_builder.py b/bigquery/query_builder.py index c149eb1..b6f568b 100644 --- a/bigquery/query_builder.py +++ b/bigquery/query_builder.py @@ -133,6 +133,7 @@ def _render_sources(dataset, tables): The data set to fetch log data from. tables : Union[dict, list] The tables to fetch log data from + Returns ------- str From 7ce159a4002fe75a0eb2165afe11bf100f0e9b88 Mon Sep 17 00:00:00 2001 From: orangain Date: Sat, 23 Apr 2016 14:48:50 +0900 Subject: [PATCH 075/146] Read project_id from JSON key file. A JSON key file provided by Google contains project_id. Now project_id argument of get_client() is optional and read from the JSON key file if json_key or json_key_file is provided. I believe this improve usability of get_client(). --- README.md | 2 +- bigquery/client.py | 16 ++++++++---- bigquery/tests/test_client.py | 48 ++++++++++++++++++++++++++++++++--- 3 files changed, 56 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 8d42cb3..6b4606c 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,7 @@ client = get_client(project_id, service_account=service_account, # JSON key provided by Google json_key = 'key.json' -client = get_client(project_id, json_key_file=json_key, readonly=True) +client = get_client(json_key_file=json_key, readonly=True) # Submit an async query. job_id, _results = client.query('SELECT * FROM dataset.my_table LIMIT 1000') diff --git a/bigquery/client.py b/bigquery/client.py index 68d3bb5..cacb876 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -45,7 +45,7 @@ logger = getLogger(__name__) -def get_client(project_id, credentials=None, +def get_client(project_id=None, credentials=None, service_url=None, service_account=None, private_key=None, private_key_file=None, json_key=None, json_key_file=None, @@ -56,8 +56,8 @@ def get_client(project_id, credentials=None, Parameters ---------- - project_id : str - The BigQuery project id + project_id : str, optional + The BigQuery project id, required unless json_key or json_key_file is provided. credentials : oauth2client.client.SignedJwtAssertionCredentials, optional AssertionCredentials instance to authenticate requests to BigQuery (optional, must provide `service_account` and (`private_key` or `private_key_file`) or @@ -96,6 +96,10 @@ def get_client(project_id, credentials=None, json_key or json_key_file), \ 'Must provide AssertionCredentials or service account and P12 key or JSON key' + if not project_id: + assert json_key or json_key_file, \ + 'Must provide project_id unless json_key or json_key_file is provided' + if service_url is None: service_url = DISCOVERY_URI @@ -119,12 +123,14 @@ def get_client(project_id, credentials=None, scopes=scope) if json_key_file: - credentials = _credentials().from_json_keyfile_name(json_key_file, - scopes=scope) + with open(json_key_file, 'r') as key_file: + json_key = json.load(key_file) if json_key: credentials = _credentials().from_json_keyfile_dict(json_key, scopes=scope) + if not project_id: + project_id = json_key['project_id'] bq_service = _get_bq_service(credentials=credentials, service_url=service_url) diff --git a/bigquery/tests/test_client.py b/bigquery/tests/test_client.py index be1ff0f..ffd7818 100644 --- a/bigquery/tests/test_client.py +++ b/bigquery/tests/test_client.py @@ -143,19 +143,23 @@ def test_initialize_key_file(self, mock_build, mock_return_cred): @mock.patch('bigquery.client._credentials') @mock.patch('bigquery.client.build') - def test_initialize_json_key_file(self, mock_build, mock_return_cred): + @mock.patch('__builtin__.open' if six.PY2 else 'builtins.open') + def test_initialize_json_key_file(self, mock_open, mock_build, mock_return_cred): """Ensure that a BigQueryClient is initialized and returned with read/write permissions using a JSON key file. """ from bigquery.client import BIGQUERY_SCOPE + import json mock_cred = mock.Mock() mock_http = mock.Mock() mock_service_url = mock.Mock() - mock_cred.from_json_keyfile_name.return_value.authorize.return_value = mock_http + mock_cred.from_json_keyfile_dict.return_value.authorize.return_value = mock_http mock_bq = mock.Mock() mock_build.return_value = mock_bq json_key_file = 'key.json' + json_key = {'client_email': 'mail', 'private_key': 'pkey'} + mock_open.return_value.__enter__.return_value.read.return_value = json.dumps(json_key) project_id = 'project' mock_return_cred.return_value = mock_cred @@ -164,15 +168,51 @@ def test_initialize_json_key_file(self, mock_build, mock_return_cred): json_key_file=json_key_file, readonly=False) mock_return_cred.assert_called_once_with() - mock_cred.from_json_keyfile_name.assert_called_once_with(json_key_file, + mock_cred.from_json_keyfile_dict.assert_called_once_with(json_key, scopes=BIGQUERY_SCOPE) self.assertTrue( - mock_cred.from_json_keyfile_name.return_value.authorize.called) + mock_cred.from_json_keyfile_dict.return_value.authorize.called) mock_build.assert_called_once_with('bigquery', 'v2', http=mock_http, discoveryServiceUrl=mock_service_url) self.assertEquals(mock_bq, bq_client.bigquery) self.assertEquals(project_id, bq_client.project_id) + @mock.patch('bigquery.client._credentials') + @mock.patch('bigquery.client.build') + @mock.patch('__builtin__.open' if six.PY2 else 'builtins.open') + def test_initialize_json_key_file_without_project_id(self, mock_open, mock_build, + mock_return_cred): + """Ensure that a BigQueryClient is initialized and returned with + read/write permissions using a JSON key file without project_id. + """ + from bigquery.client import BIGQUERY_SCOPE + import json + + mock_cred = mock.Mock() + mock_http = mock.Mock() + mock_service_url = mock.Mock() + mock_cred.from_json_keyfile_dict.return_value.authorize.return_value = mock_http + mock_bq = mock.Mock() + mock_build.return_value = mock_bq + json_key_file = 'key.json' + json_key = {'client_email': 'mail', 'private_key': 'pkey', 'project_id': 'project'} + mock_open.return_value.__enter__.return_value.read.return_value = json.dumps(json_key) + mock_return_cred.return_value = mock_cred + + bq_client = client.get_client( + service_url=mock_service_url, json_key_file=json_key_file, readonly=False) + + mock_open.assert_called_once_with(json_key_file, 'r') + mock_return_cred.assert_called_once_with() + mock_cred.from_json_keyfile_dict.assert_called_once_with(json_key, + scopes=BIGQUERY_SCOPE) + self.assertTrue( + mock_cred.from_json_keyfile_dict.return_value.authorize.called) + mock_build.assert_called_once_with('bigquery', 'v2', http=mock_http, + discoveryServiceUrl=mock_service_url) + self.assertEquals(mock_bq, bq_client.bigquery) + self.assertEquals(json_key['project_id'], bq_client.project_id) + class TestQuery(unittest.TestCase): From 890270affac1138e6b568ef28a9cf325f6dfcf45 Mon Sep 17 00:00:00 2001 From: Tyler Treat Date: Sat, 23 Apr 2016 13:14:07 -0500 Subject: [PATCH 076/146] Bump version to 1.7.0 --- bigquery/__init__.py | 2 +- setup.py | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/bigquery/__init__.py b/bigquery/__init__.py index 086be47..3a4f000 100644 --- a/bigquery/__init__.py +++ b/bigquery/__init__.py @@ -1,6 +1,6 @@ from __future__ import absolute_import -__version__ = '1.6.0' +__version__ = '1.7.0' from .client import get_client from .client import ( diff --git a/setup.py b/setup.py index b0c737b..acdaf5e 100644 --- a/setup.py +++ b/setup.py @@ -1,13 +1,12 @@ from setuptools import find_packages from setuptools import setup - -VERSION = '1.6.0' +from bigquery import __version__ setup_args = dict( name='BigQuery-Python', description='Simple Python client for interacting with Google BigQuery.', url='https://github.com/tylertreat/BigQuery-Python', - version=VERSION, + version=__version__, license='Apache', packages=find_packages(), include_package_data=True, From 4f84b83a9cfa9cf243f784bac273ceea978102e0 Mon Sep 17 00:00:00 2001 From: Tyler Treat Date: Sat, 23 Apr 2016 13:36:08 -0500 Subject: [PATCH 077/146] Various formatting fixes and fix import issue --- bigquery/client.py | 184 +++++++++++++++++++++++++-------------------- 1 file changed, 101 insertions(+), 83 deletions(-) diff --git a/bigquery/client.py b/bigquery/client.py index cacb876..9bab750 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -10,13 +10,17 @@ import six from bigquery.errors import (BigQueryTimeoutException, JobExecutingException, JobInsertException, UnfinishedQueryException) -from bigquery.schema_builder import schema_from_record from googleapiclient.discovery import build, DISCOVERY_URI from googleapiclient.errors import HttpError from httplib2 import Http -BIGQUERY_SCOPE = ['https://www.googleapis.com/auth/bigquery'] -BIGQUERY_SCOPE_READ_ONLY = ['https://www.googleapis.com/auth/bigquery.readonly'] +BIGQUERY_SCOPE = [ + 'https://www.googleapis.com/auth/bigquery' +] + +BIGQUERY_SCOPE_READ_ONLY = [ + 'https://www.googleapis.com/auth/bigquery.readonly' +] CACHE_TIMEOUT = timedelta(seconds=30) @@ -57,33 +61,37 @@ def get_client(project_id=None, credentials=None, Parameters ---------- project_id : str, optional - The BigQuery project id, required unless json_key or json_key_file is provided. + The BigQuery project id, required unless json_key or json_key_file is + provided. credentials : oauth2client.client.SignedJwtAssertionCredentials, optional - AssertionCredentials instance to authenticate requests to BigQuery (optional, - must provide `service_account` and (`private_key` or `private_key_file`) or - (`json_key` or `json_key_file`) if not included + AssertionCredentials instance to authenticate requests to BigQuery + (optional, must provide `service_account` and (`private_key` or + `private_key_file`) or (`json_key` or `json_key_file`) if not included service_url : str, optional - A URI string template pointing to the location of Google's API discovery - service. Requires two parameters {api} and {apiVersion} that when filled in - produce an absolute URI to the discovery document for that service. If not set - then the default googleapiclient discovery URI is used. See `credentials` + A URI string template pointing to the location of Google's API + discovery service. Requires two parameters {api} and {apiVersion} that + when filled in produce an absolute URI to the discovery document for + that service. If not set then the default googleapiclient discovery URI + is used. See `credentials` service_account : str, optional The Google API service account name. See `credentials` private_key : str, optional - The private key associated with the service account in PKCS12 or PEM format. See `credentials` + The private key associated with the service account in PKCS12 or PEM + format. See `credentials` private_key_file : str, optional - The name of the file containing the private key associated with the service - account in PKCS12 or PEM format. See `credentials` + The name of the file containing the private key associated with the + service account in PKCS12 or PEM format. See `credentials` json_key : dict, optional The JSON key associated with the service account. See `credentials` json_key_file : str, optional - The name of the JSON key file associated with the service account. See `credentials`. + The name of the JSON key file associated with the service account. See + `credentials`. readonly : bool - Bool indicating if BigQuery access is read-only. Has no effect if credentials are - provided. Default True. + Bool indicating if BigQuery access is read-only. Has no effect if + credentials are provided. Default True. swallow_results : bool - If set to False, then return the actual response value instead of converting to - boolean. Default True. + If set to False, then return the actual response value instead of + converting to boolean. Default True. Returns ------- @@ -94,11 +102,13 @@ def get_client(project_id=None, credentials=None, if not credentials: assert (service_account and (private_key or private_key_file)) or ( json_key or json_key_file), \ - 'Must provide AssertionCredentials or service account and P12 key or JSON key' + 'Must provide AssertionCredentials or service account and P12 key\ + or JSON key' if not project_id: assert json_key or json_key_file, \ - 'Must provide project_id unless json_key or json_key_file is provided' + 'Must provide project_id unless json_key or json_key_file is\ + provided' if service_url is None: service_url = DISCOVERY_URI @@ -266,15 +276,15 @@ def query(self, query, max_results=None, timeout=0, dry_run=False): the request times out and returns. dry_run : bool, optional If True, the query isn't actually run. A valid query will return an - empty response, while an invalid one will return the same error message - it would if it wasn't a dry run. + empty response, while an invalid one will return the same error + message it would if it wasn't a dry run. Returns ------- tuple - (job id, query results) if the query completed. If dry_run is True, job id - will be None and results will be empty if the query is valid or a ``dict`` containing - the response if invalid. + (job id, query results) if the query completed. If dry_run is True, + job id will be None and results will be empty if the query is valid + or a ``dict`` containing the response if invalid. Raises ------ @@ -356,8 +366,8 @@ def check_job(self, job_id): ------- tuple (``bool``, ``int``) Whether or not the query has completed and the - total number of rows included in the query table if it has completed - (else 0) + total number of rows included in the query table if it has + completed (else 0) """ query_reply = self.get_query_results(job_id, offset=0, limit=0) @@ -367,8 +377,8 @@ def check_job(self, job_id): def get_query_rows(self, job_id, offset=None, limit=None, timeout=0): """Retrieve a list of rows from a query table by job id. - This method will append results from multiple pages together. If you want - to manually page through results, you can use `get_query_results` + This method will append results from multiple pages together. If you + want to manually page through results, you can use `get_query_results` method directly. Parameters @@ -389,7 +399,8 @@ def get_query_rows(self, job_id, offset=None, limit=None, timeout=0): """ # Get query results - query_reply = self.get_query_results(job_id, offset=offset, limit=limit, timeout=timeout) + query_reply = self.get_query_results(job_id, offset=offset, + limit=limit, timeout=timeout) if not query_reply['jobComplete']: logger.warning('BigQuery job %s not complete' % job_id) raise UnfinishedQueryException() @@ -401,8 +412,9 @@ def get_query_rows(self, job_id, offset=None, limit=None, timeout=0): # Append to records if there are multiple pages for query results while page_token and (not limit or len(records) < limit): - query_reply = self.get_query_results(job_id, offset=offset, limit=limit, - page_token=page_token, timeout=timeout) + query_reply = self.get_query_results( + job_id, offset=offset, limit=limit, page_token=page_token, + timeout=timeout) page_token = query_reply.get("pageToken") rows = query_reply.get('rows', []) records += [self._transform_row(row, schema) for row in rows] @@ -533,8 +545,7 @@ def create_table(self, dataset, table, schema, expiration_time=None): except HttpError as e: logger.error(('Cannot create table {0}.{1}\n' - 'Http Error: {2}').format(dataset, table, - e.content)) + 'Http Error: {2}').format(dataset, table, e.content)) if self.swallow_results: return False else: @@ -581,8 +592,7 @@ def update_table(self, dataset, table, schema): except HttpError as e: logger.error(('Cannot update table {0}.{1}\n' - 'Http Error: {2}').format(dataset, table, - e.content)) + 'Http Error: {2}').format(dataset, table, e.content)) if self.swallow_results: return False else: @@ -629,8 +639,7 @@ def patch_table(self, dataset, table, schema): except HttpError as e: logger.error(('Cannot patch table {0}.{1}\n' - 'Http Error: {2}').format(dataset, table, - e.content)) + 'Http Error: {2}').format(dataset, table, e.content)) if self.swallow_results: return False else: @@ -679,8 +688,7 @@ def create_view(self, dataset, view, query): except HttpError as e: logger.error(('Cannot create view {0}.{1}\n' - 'Http Error: {2}').format(dataset, view, - e.content)) + 'Http Error: {2}').format(dataset, view, e.content)) if self.swallow_results: return False else: @@ -716,8 +724,7 @@ def delete_table(self, dataset, table): except HttpError as e: logger.error(('Cannot delete table {0}.{1}\n' - 'Http Error: {2}').format(dataset, table, - e.content)) + 'Http Error: {2}').format(dataset, table, e.content)) if self.swallow_results: return False else: @@ -775,21 +782,23 @@ def import_data_from_uris( skip_leading_rows=None, ): """ - Imports data into a BigQuery table from cloud storage. Optional arguments that are not - specified are determined by BigQuery as described: - https://developers.google.com/bigquery/docs/reference/v2/jobs + Imports data into a BigQuery table from cloud storage. Optional + arguments that are not specified are determined by BigQuery as + described: + https://developers.google.com/bigquery/docs/reference/v2/jobs Parameters ---------- source_urls : list - A ``list`` of ``str`` objects representing the urls on cloud storage - of the form: gs://bucket/filename + A ``list`` of ``str`` objects representing the urls on cloud + storage of the form: gs://bucket/filename dataset : str String id of the dataset table : str String id of the table job : str, optional - Identifies the job (a unique job id is automatically generated if not provided) + Identifies the job (a unique job id is automatically generated if + not provided) schema : list, optional Represents the BigQuery schema source_format : str, optional @@ -925,8 +934,8 @@ def export_data_to_uris( field_delimiter=None, ): """ - Export data from a BigQuery table to cloud storage. Optional arguments that are - not specified are determined by BigQuery as described: + Export data from a BigQuery table to cloud storage. Optional arguments + that are not specified are determined by BigQuery as described: https://developers.google.com/bigquery/docs/reference/v2/jobs Parameters @@ -939,8 +948,8 @@ def export_data_to_uris( table : str String id of the table job : str, optional - String identifying the job (a unique jobid is automatically generated if - not provided) + String identifying the job (a unique jobid is automatically + generated if not provided) compression : str, optional One of the JOB_COMPRESSION_* constants destination_format : str, optional @@ -1110,8 +1119,8 @@ def wait_for_job(self, job, interval=5, timeout=60): Parameters ---------- job : Union[dict, str] - ``dict`` representing a BigQuery job resource, or a ``str`` representing - the BigQuery job id + ``dict`` representing a BigQuery job resource, or a ``str`` + representing the BigQuery job id interval : float, optional Polling interval in seconds, default = 5 timeout : float, optional @@ -1323,7 +1332,8 @@ def _parse_table_name(self, table_id): Returns ------- tuple - (year/month, app id), or (None, None) if the table id cannot be parsed. + (year/month, app id), or (None, None) if the table id cannot be + parsed. """ # Prefix date @@ -1394,9 +1404,11 @@ def _in_range(self, start_time, end_time, time): time <= start_time <= time + ONE_MONTH or \ time <= end_time <= time + ONE_MONTH - def get_query_results(self, job_id, offset=None, limit=None, page_token=None, timeout=0): - """Execute the query job indicated by the given job id. This is direct mapping to - bigquery api https://cloud.google.com/bigquery/docs/reference/v2/jobs/getQueryResults + def get_query_results(self, job_id, offset=None, limit=None, + page_token=None, timeout=0): + """Execute the query job indicated by the given job id. This is direct + mapping to bigquery api + https://cloud.google.com/bigquery/docs/reference/v2/jobs/getQueryResults Parameters ---------- @@ -1407,7 +1419,8 @@ def get_query_results(self, job_id, offset=None, limit=None, page_token=None, ti limit : int, optional The maximum number of results to retrieve. page_token : optional - Page token, returned by previous call, to request the next page of results. + Page token, returned by previous call, to request the next page of + results. timeout : float, optional Timeout in seconds @@ -1551,8 +1564,8 @@ def create_dataset(self, dataset_id, friendly_name=None, description=None, Parameters ---------- dataset_id : str - Unique ``str`` identifying the dataset with the project (the referenceID - of the dataset, not the integer id of the dataset) + Unique ``str`` identifying the dataset with the project (the + referenceID of the dataset, not the integer id of the dataset) friendly_name: str, optional A human readable name description: str, optional @@ -1581,8 +1594,8 @@ def create_dataset(self, dataset_id, friendly_name=None, description=None, else: return response except HttpError as e: - logger.error('Cannot create dataset {0}, {1}'.format(dataset_id, - e)) + logger.error( + 'Cannot create dataset {0}, {1}'.format(dataset_id, e)) if self.swallow_results: return False else: @@ -1611,10 +1624,11 @@ def delete_dataset(self, dataset_id, delete_contents=False): Parameters ---------- dataset_id : str - Unique ``str`` identifying the datset with the project (the referenceId of the dataset) + Unique ``str`` identifying the datset with the project (the + referenceId of the dataset) delete_contents : bool, optional - If True, forces the deletion of the dataset even when the dataset contains data - (Default = False) + If True, forces the deletion of the dataset even when the dataset + contains data (Default = False) Returns ------- @@ -1638,8 +1652,8 @@ def delete_dataset(self, dataset_id, delete_contents=False): else: return response except HttpError as e: - logger.error('Cannot delete dataset {0}: {1}'.format(dataset_id, - e)) + logger.error( + 'Cannot delete dataset {0}: {1}'.format(dataset_id, e)) if self.swallow_results: return False else: @@ -1654,7 +1668,8 @@ def update_dataset(self, dataset_id, friendly_name=None, description=None, Parameters ---------- dataset_id : str - Unique ``str`` identifying the dataset with the project (the referencedId of the dataset) + Unique ``str`` identifying the dataset with the project (the + referencedId of the dataset) friendly_name : str, optional An optional descriptive name for the dataset. description : str, optional @@ -1665,8 +1680,8 @@ def update_dataset(self, dataset_id, friendly_name=None, description=None, Returns ------- Union[bool, dict] - ``bool`` indicating if the update was successful or not, or response - from BigQuery if swallow_results is set for False. + ``bool`` indicating if the update was successful or not, or + response from BigQuery if swallow_results is set for False. """ try: datasets = self.bigquery.datasets() @@ -1681,8 +1696,8 @@ def update_dataset(self, dataset_id, friendly_name=None, description=None, else: return response except HttpError as e: - logger.error('Cannot update dataset {0}: {1}'.format(dataset_id, - e)) + logger.error( + 'Cannot update dataset {0}: {1}'.format(dataset_id, e)) if self.swallow_results: return False else: @@ -1697,7 +1712,8 @@ def patch_dataset(self, dataset_id, friendly_name=None, description=None, Parameters ---------- dataset_id : str - Unique string idenfitying the dataset with the project (the referenceId of the dataset) + Unique string idenfitying the dataset with the project (the + referenceId of the dataset) friendly_name : str, optional An optional descriptive name for the dataset. description : str, optional @@ -1723,8 +1739,7 @@ def patch_dataset(self, dataset_id, friendly_name=None, description=None, else: return response except HttpError as e: - logger.error('Cannot patch dataset {0}: {1}'.format(dataset_id, - e)) + logger.error('Cannot patch dataset {0}: {1}'.format(dataset_id, e)) if self.swallow_results: return False else: @@ -1732,7 +1747,8 @@ def patch_dataset(self, dataset_id, friendly_name=None, description=None, def dataset_resource(self, ref_id, friendly_name=None, description=None, access=None): - """See https://developers.google.com/bigquery/docs/reference/v2/datasets#resource + """See + https://developers.google.com/bigquery/docs/reference/v2/datasets#resource Parameters ---------- @@ -1775,9 +1791,10 @@ def schema_from_record(cls, record): record : dict representing a record to be inserted into big query, where all keys are ``str`` objects (representing column names in - the record) and all values are of type ``int``, ``str``, ``unicode``, - ``float``, ``bool``, ``datetime``, or ``dict``. A ``dict`` value represents a - record, and must conform to the same restrictions as record + the record) and all values are of type ``int``, ``str``, + ``unicode``, ``float``, ``bool``, ``datetime``, or ``dict``. A + ``dict`` value represents a record, and must conform to the same + restrictions as record. Returns ------- @@ -1786,9 +1803,10 @@ def schema_from_record(cls, record): Notes ----- - Results are undefined if a different value type is provided for a repeated - field: E.g. + Results are undefined if a different value type is provided for a + repeated field: E.g. >>> { rfield: [ { x: 1}, {x: "a string"} ] } # undefined! """ + from bigquery.schema_builder import schema_from_record return schema_from_record(record) From 7da5061c3e8a51bc1a1ea35da799795b2be34b46 Mon Sep 17 00:00:00 2001 From: Tyler Treat Date: Sat, 23 Apr 2016 13:56:46 -0500 Subject: [PATCH 078/146] Fix version --- bigquery/__init__.py | 2 +- bigquery/version.py | 1 + setup.py | 9 +++++++-- 3 files changed, 9 insertions(+), 3 deletions(-) create mode 100644 bigquery/version.py diff --git a/bigquery/__init__.py b/bigquery/__init__.py index 3a4f000..b393875 100644 --- a/bigquery/__init__.py +++ b/bigquery/__init__.py @@ -1,6 +1,6 @@ from __future__ import absolute_import -__version__ = '1.7.0' +from .version import __version__ from .client import get_client from .client import ( diff --git a/bigquery/version.py b/bigquery/version.py new file mode 100644 index 0000000..0e1a38d --- /dev/null +++ b/bigquery/version.py @@ -0,0 +1 @@ +__version__ = '1.7.0' diff --git a/setup.py b/setup.py index acdaf5e..fc1c5de 100644 --- a/setup.py +++ b/setup.py @@ -1,12 +1,17 @@ +from distutils.util import convert_path from setuptools import find_packages from setuptools import setup -from bigquery import __version__ + +ns = {} +version_path = convert_path('bigquery/version.py') +with open(version_path) as version_file: + exec(version_file.read(), ns) setup_args = dict( name='BigQuery-Python', description='Simple Python client for interacting with Google BigQuery.', url='https://github.com/tylertreat/BigQuery-Python', - version=__version__, + version=ns['__version__'], license='Apache', packages=find_packages(), include_package_data=True, From c9eb5e4cf10682b88ef60c134ce4724d1f18589c Mon Sep 17 00:00:00 2001 From: Tyler Treat Date: Sat, 23 Apr 2016 14:05:44 -0500 Subject: [PATCH 079/146] PEP8 formatting fixes --- bigquery/query_builder.py | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/bigquery/query_builder.py b/bigquery/query_builder.py index b6f568b..8fc403f 100644 --- a/bigquery/query_builder.py +++ b/bigquery/query_builder.py @@ -15,21 +15,24 @@ def render_query(dataset, tables, select=None, conditions=None, tables : Union[dict, list] The table in `dataset` to query. select : dict, optional - The keys function as column names and the values function as options to apply to - the select field such as alias and format. For example, select['start_time'] might - have the form {'alias': 'StartTime', 'format': 'INTEGER-FORMAT_UTC_USEC'}, which would - be represented as 'SEC_TO_TIMESTAMP(INTEGER(start_time)) as StartTime' in a query. Pass - `None` to seoect all. + The keys function as column names and the values function as options to + apply to the select field such as alias and format. For example, + select['start_time'] might have the form + {'alias': 'StartTime', 'format': 'INTEGER-FORMAT_UTC_USEC'}, which + would be represented as 'SEC_TO_TIMESTAMP(INTEGER(start_time)) as + StartTime' in a query. Pass `None` to select all. conditions : list, optional - a ``list`` of ``dict`` objects to filter results by. Each dict should have the keys 'field', - 'type', and 'comparators'. The first two map to strings representing the field (e.g. 'foo') - and type (e.g. 'FLOAT'). 'comparators' maps to another ``dict`` containing the keys 'condition', - 'negate', and 'value'. If 'comparators' = {'condition': '>=', 'negate': False, 'value': 1}, this - example will be rdnered as 'foo >= FLOAT('1')' in the query. + a ``list`` of ``dict`` objects to filter results by. Each dict should + have the keys 'field', 'type', and 'comparators'. The first two map to + strings representing the field (e.g. 'foo') and type (e.g. 'FLOAT'). + 'comparators' maps to another ``dict`` containing the keys 'condition', + 'negate', and 'value'. + If 'comparators' = {'condition': '>=', 'negate': False, 'value': 1}, + this example will be rdnered as 'foo >= FLOAT('1')' in the query. ``list`` of field names to group by order_by : dict, optional - Keys = {'field', 'direction'}. `dict` should be formatted as {'field':'TimeStamp, 'direction':'desc'} - or similar + Keys = {'field', 'direction'}. `dict` should be formatted as + {'field':'TimeStamp, 'direction':'desc'} or similar Returns ------- @@ -149,7 +152,8 @@ def _render_sources(dataset, tables): tables['from_date'], tables['to_date']) except KeyError as exp: - logger.warn('Missing parameter %s in selecting sources' % (exp)) + logger.warn( + 'Missing parameter %s in selecting sources' % (exp)) else: return "FROM " + ", ".join( From bab1c997ea4b40ee84dc341a6bc10022380311e1 Mon Sep 17 00:00:00 2001 From: Takashi Nishibayashi Date: Thu, 28 Apr 2016 15:02:06 +0900 Subject: [PATCH 080/146] Add template_suffix option support --- bigquery/client.py | 9 ++++++++- bigquery/tests/test_client.py | 5 ++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/bigquery/client.py b/bigquery/client.py index 9bab750..ea5c503 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -1162,7 +1162,8 @@ def wait_for_job(self, job, interval=5, timeout=60): return job_resource def push_rows(self, dataset, table, rows, insert_id_key=None, - skip_invalid_rows=None, ignore_unknown_values=None): + skip_invalid_rows=None, ignore_unknown_values=None, + template_suffix=None): """Upload rows to BigQuery table. Parameters @@ -1179,6 +1180,9 @@ def push_rows(self, dataset, table, rows, insert_id_key=None, Insert all valid rows of a request, even if invalid rows exist. ignore_unknown_values : bool, optional Accept rows that contain values that do not match the schema. + template_suffix : str, optional + Inserts the rows into an {table}{template_suffix}. + If table {table}{template_suffix} doesn't exist, create from {table}. Returns ------- @@ -1208,6 +1212,9 @@ def push_rows(self, dataset, table, rows, insert_id_key=None, if ignore_unknown_values is not None: data['ignoreUnknownValues'] = ignore_unknown_values + if template_suffix is not None: + data['templateSuffix'] = template_suffix + try: response = table_data.insertAll( projectId=self.project_id, diff --git a/bigquery/tests/test_client.py b/bigquery/tests/test_client.py index ffd7818..39bf05b 100644 --- a/bigquery/tests/test_client.py +++ b/bigquery/tests/test_client.py @@ -2176,9 +2176,12 @@ def test_request_data_with_options(self): self.dataset, self.table, self.rows, insert_id_key='one', ignore_unknown_values=True, - skip_invalid_rows=True) + skip_invalid_rows=True, + template_suffix='20160428' + ) expected_body['ignoreUnknownValues'] = True expected_body['skipInvalidRows'] = True + expected_body['templateSuffix'] = '20160428' self.mock_table_data.insertAll.assert_called_with( projectId=self.project, datasetId=self.dataset, From d51b251242c1004017ab7839bce33e85d1f258ee Mon Sep 17 00:00:00 2001 From: Ruxandra Burtica Date: Mon, 16 May 2016 19:26:24 +0300 Subject: [PATCH 081/146] Added get_all_tables method, returning a list with all tables within a dataset #97 --- bigquery/client.py | 68 +++++++++++++++++++++++++++-------- bigquery/tests/test_client.py | 38 ++++++++++++++++++-- 2 files changed, 89 insertions(+), 17 deletions(-) diff --git a/bigquery/client.py b/bigquery/client.py index ea5c503..dff7307 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -1249,8 +1249,32 @@ def push_rows(self, dataset, table, rows, insert_id_key=None, }] } + def get_all_tables(self, dataset_id): + """Retrieve a list of tables for the dataset. + + Parameters + ---------- + dataset_id : str + The dataset to retrieve table data for. + + Returns + ------- + dict + A ``list`` with all table names + """ + tables_data = self._get_all_tables_for_dataset(dataset_id) + + tables = [] + for table in tables_data['tables']: + table_name = table.get('tableReference', {}).get('tableId') + if table_name: + tables.append(table_name) + return tables + def _get_all_tables(self, dataset_id, cache=False): - """Retrieve a list of all tables for the dataset. + """Retrieve the list of tables for dataset, that respect the formats: + * appid_YYYY_MM + * YYYY_MM_appid Parameters ---------- @@ -1272,23 +1296,39 @@ def _get_all_tables(self, dataset_id, cache=False): do_fetch = False if do_fetch: - result = self.bigquery.tables().list( - projectId=self.project_id, - datasetId=dataset_id).execute() - - page_token = result.get('nextPageToken') - while page_token: - res = self.bigquery.tables().list( - projectId=self.project_id, - datasetId=dataset_id, - pageToken=page_token - ).execute() - page_token = res.get('nextPageToken') - result['tables'] += res.get('tables', []) + result = self._get_all_tables_for_dataset(dataset_id) self.cache[dataset_id] = (datetime.now(), result) return self._parse_table_list_response(result) + def _get_all_tables_for_dataset(self, dataset_id): + """Retrieve a list of all tables for the dataset. + + Parameters + ---------- + dataset_id : str + The dataset to retrieve table names for + + Returns + ------- + dict + A ``dict`` containing tables key with all tables + """ + result = self.bigquery.tables().list( + projectId=self.project_id, + datasetId=dataset_id).execute() + + page_token = result.get('nextPageToken') + while page_token: + res = self.bigquery.tables().list( + projectId=self.project_id, + datasetId=dataset_id, + pageToken=page_token + ).execute() + page_token = res.get('nextPageToken') + result['tables'] += res.get('tables', []) + return result + def _parse_table_list_response(self, list_response): """Parse the response received from calling list on tables. diff --git a/bigquery/tests/test_client.py b/bigquery/tests/test_client.py index 39bf05b..263e2d7 100644 --- a/bigquery/tests/test_client.py +++ b/bigquery/tests/test_client.py @@ -1290,12 +1290,21 @@ def test_not_inside_range(self): "tableId": "appspot_6_2013_06" } }, + { + "kind": "bigquery#table", + "id": "project:dataset.table_not_matching_naming", + "tableReference": { + "projectId": "project", + "datasetId": "dataset", + "tableId": "table_not_matching_naming" + } + }, { "kind": "bigquery#table", "id": "bad table data" - } + }, ], - "totalItems": 8 + "totalItems": 9 } @@ -2191,7 +2200,7 @@ def test_request_data_with_options(self): class TestGetAllTables(unittest.TestCase): - def test_get_tables(self): + def test_get_all_tables(self): """Ensure get_all_tables fetches table names from BigQuery.""" mock_execute = mock.Mock() @@ -2205,6 +2214,29 @@ def test_get_tables(self): bq = client.BigQueryClient(mock_bq_service, 'project') + expected_result = [ + '2013_05_appspot', '2013_06_appspot_1', '2013_06_appspot_2', + '2013_06_appspot_3', '2013_06_appspot_4', '2013_06_appspot_5', + 'appspot_6_2013_06', 'table_not_matching_naming' + ] + + tables = bq.get_all_tables('dataset') + self.assertEquals(expected_result, tables) + + def test_get_tables(self): + """Ensure _get_all_tables fetches table names from BigQuery.""" + + mock_execute = mock.Mock() + mock_execute.execute.return_value = FULL_TABLE_LIST_RESPONSE + + mock_tables = mock.Mock() + mock_tables.list.return_value = mock_execute + + mock_bq_service = mock.Mock() + mock_bq_service.tables.return_value = mock_tables + + bq = client.BigQueryClient(mock_bq_service, 'project') + expected_result = { 'appspot-3': {'2013_06_appspot_3': 1370044800}, 'appspot-2': {'2013_06_appspot_2': 1370044800}, From 76bb8c2d8270238a243e054a2bfaf59a8819ced8 Mon Sep 17 00:00:00 2001 From: Ruxandra Burtica Date: Mon, 16 May 2016 19:31:07 +0300 Subject: [PATCH 082/146] Updated docstring. #97 --- bigquery/client.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/bigquery/client.py b/bigquery/client.py index dff7307..4917e35 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -1259,8 +1259,7 @@ def get_all_tables(self, dataset_id): Returns ------- - dict - A ``list`` with all table names + A ``list`` with all table names """ tables_data = self._get_all_tables_for_dataset(dataset_id) From 67af0041815d1a564a3f81c7a73c0218da520b6b Mon Sep 17 00:00:00 2001 From: Ruxandra Burtica Date: Fri, 20 May 2016 13:44:50 +0300 Subject: [PATCH 083/146] Fixed KeyError when there are no tables for a dataset - returning [] in that case. --- bigquery/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigquery/client.py b/bigquery/client.py index 4917e35..390ff68 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -1264,7 +1264,7 @@ def get_all_tables(self, dataset_id): tables_data = self._get_all_tables_for_dataset(dataset_id) tables = [] - for table in tables_data['tables']: + for table in tables_data.get('tables', []): table_name = table.get('tableReference', {}).get('tableId') if table_name: tables.append(table_name) From 5fa94427230970747211941f1ef844b8f856a8ba Mon Sep 17 00:00:00 2001 From: Tyler Treat Date: Sat, 21 May 2016 12:54:05 -0500 Subject: [PATCH 084/146] Bump version to 1.8.0 --- bigquery/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigquery/version.py b/bigquery/version.py index 0e1a38d..b280975 100644 --- a/bigquery/version.py +++ b/bigquery/version.py @@ -1 +1 @@ -__version__ = '1.7.0' +__version__ = '1.8.0' From 3466050cb182ef5b6553d891c2cf0f3b254a57a9 Mon Sep 17 00:00:00 2001 From: Ruxandra Burtica Date: Tue, 24 May 2016 08:37:45 +0300 Subject: [PATCH 085/146] Added limit to BigQuery. --- README.md | 3 ++- bigquery/query_builder.py | 28 +++++++++++++++++++++++++--- bigquery/version.py | 2 +- 3 files changed, 28 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 6b4606c..7cf342a 100644 --- a/README.md +++ b/README.md @@ -126,7 +126,8 @@ query = render_query( conditions=conditions, groupings=grouping, having=having, - order_by=order_by + order_by=order_by, + limit=47 ) job_id, _ = client.query(query) diff --git a/bigquery/query_builder.py b/bigquery/query_builder.py index 8fc403f..7362148 100644 --- a/bigquery/query_builder.py +++ b/bigquery/query_builder.py @@ -4,7 +4,7 @@ def render_query(dataset, tables, select=None, conditions=None, - groupings=None, having=None, order_by=None): + groupings=None, having=None, order_by=None, limit=None): """Render a query that will run over the given tables using the specified parameters. @@ -33,6 +33,8 @@ def render_query(dataset, tables, select=None, conditions=None, order_by : dict, optional Keys = {'field', 'direction'}. `dict` should be formatted as {'field':'TimeStamp, 'direction':'desc'} or similar + limit : int, optional + Limit the amount of data needed to be returned. Returns ------- @@ -43,13 +45,14 @@ def render_query(dataset, tables, select=None, conditions=None, if None in (dataset, tables): return None - query = "%s %s %s %s %s %s" % ( + query = "%s %s %s %s %s %s %s" % ( _render_select(select), _render_sources(dataset, tables), _render_conditions(conditions), _render_groupings(groupings), _render_having(having), - _render_order(order_by) + _render_order(order_by), + _render_limit(limit) ) return query @@ -372,3 +375,22 @@ def _render_order(order): return '' return "ORDER BY %s %s" % (", ".join(order['fields']), order['direction']) + + +def _render_limit(limit): + """Render the limit part of a query. + + Parameters + ---------- + limit : int, optional + Limit the amount of data needed to be returned. + + Returns + ------- + str + A string that represents the "limit" part of a query. + """ + if not limit: + return '' + + return "LIMIT %s" % limit diff --git a/bigquery/version.py b/bigquery/version.py index 0e1a38d..e8b6b09 100644 --- a/bigquery/version.py +++ b/bigquery/version.py @@ -1 +1 @@ -__version__ = '1.7.0' +__version__ = '1.8.1' From a9510cf433d937b4bc7159dcd28870560b1af134 Mon Sep 17 00:00:00 2001 From: Jordan Howlett Date: Tue, 24 May 2016 17:15:49 -0400 Subject: [PATCH 086/146] Add support for UseLegacySQL boolean in order to use BigQuerys standard SQL --- bigquery/client.py | 18 ++++++++++++++++-- bigquery/tests/test_client.py | 25 +++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 2 deletions(-) diff --git a/bigquery/client.py b/bigquery/client.py index 390ff68..5848b7e 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -262,7 +262,7 @@ def _insert_job(self, body_object): body=body_object ).execute() - def query(self, query, max_results=None, timeout=0, dry_run=False): + def query(self, query, max_results=None, timeout=0, dry_run=False, use_legacy_sql=None): """Submit a query to BigQuery. Parameters @@ -278,6 +278,9 @@ def query(self, query, max_results=None, timeout=0, dry_run=False): If True, the query isn't actually run. A valid query will return an empty response, while an invalid one will return the same error message it would if it wasn't a dry run. + use_legacy_sql : bool, optional. Default True. + If False, the query will use BigQuery's standard SQL (https://cloud.google.com/bigquery/sql-reference/) + Returns ------- @@ -298,8 +301,12 @@ def query(self, query, max_results=None, timeout=0, dry_run=False): 'query': query, 'timeoutMs': timeout * 1000, 'dryRun': dry_run, - 'maxResults': max_results, + 'maxResults': max_results } + + if use_legacy_sql is not None: + query_data['useLegacySql'] = use_legacy_sql + return self._submit_query_job(query_data) def get_query_schema(self, job_id): @@ -1027,6 +1034,7 @@ def write_to_table( priority=None, create_disposition=None, write_disposition=None, + use_legacy_sql=None ): """ Write query result to table. If dataset or table is not provided, @@ -1055,6 +1063,9 @@ def write_to_table( One of the JOB_CREATE_* constants write_disposition : str, optional One of the JOB_WRITE_* constants + use_legacy_sql: + If False, the query will use BigQuery's standard SQL (https://cloud.google.com/bigquery/sql-reference/) + Returns ------- @@ -1084,6 +1095,9 @@ def write_to_table( if use_query_cache is not None: configuration['useQueryCache'] = use_query_cache + if use_legacy_sql is not None: + configuration['useLegacySql'] = use_legacy_sql + if priority: configuration['priority'] = priority diff --git a/bigquery/tests/test_client.py b/bigquery/tests/test_client.py index 263e2d7..bd7d4d6 100644 --- a/bigquery/tests/test_client.py +++ b/bigquery/tests/test_client.py @@ -253,6 +253,7 @@ def test_query(self): self.assertEquals(job_id, 'spiderman') self.assertEquals(results, []) + def test_query_max_results_set(self): """Ensure that we retrieve the job id from the query and the maxResults parameter is set. @@ -418,6 +419,30 @@ def test_query_with_results(self): self.assertEquals(job_id, 'spiderman') self.assertEquals(results, [{'foo': 10}]) + def test_query_with_using_legacy_sql(self): + """Ensure that use_legacy_sql bool gets used""" + + mock_query_job = mock.Mock() + expected_job_id = 'spiderman' + expected_job_ref = {'jobId': expected_job_id} + + mock_query_job.execute.return_value = { + 'jobReference': expected_job_ref, + 'jobComplete': True + } + + self.mock_job_collection.query.return_value = mock_query_job + + job_id, results = self.client.query(self.query, use_legacy_sql=False) + + self.mock_job_collection.query.assert_called_once_with( + projectId=self.project_id, + body={'query': self.query, 'timeoutMs': 0, 'dryRun': False, + 'maxResults': None, 'useLegacySql': False} + ) + self.assertEquals(job_id, 'spiderman') + self.assertEquals(results, []) + class TestGetQueryResults(unittest.TestCase): From cf6be16ca76358e7ebf075eee4557e4605cd8cbb Mon Sep 17 00:00:00 2001 From: Ruxandra Burtica Date: Fri, 8 Jul 2016 17:17:20 +0300 Subject: [PATCH 087/146] Getting back to the BigQuery-Python version --- bigquery/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigquery/version.py b/bigquery/version.py index e8b6b09..b280975 100644 --- a/bigquery/version.py +++ b/bigquery/version.py @@ -1 +1 @@ -__version__ = '1.8.1' +__version__ = '1.8.0' From 8ece369d380a175f99f3e8728f7b82df6699a33e Mon Sep 17 00:00:00 2001 From: Ruxandra Burtica Date: Sun, 10 Jul 2016 10:17:05 +0300 Subject: [PATCH 088/146] Fixed tests. #104 --- bigquery/tests/test_query_builder.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/bigquery/tests/test_query_builder.py b/bigquery/tests/test_query_builder.py index df37a3e..8b77603 100644 --- a/bigquery/tests/test_query_builder.py +++ b/bigquery/tests/test_query_builder.py @@ -399,7 +399,7 @@ def test_full_query(self): " WHERE (start_time <= INTEGER('1371566954')) AND " "(start_time >= INTEGER('1371556954')) GROUP BY " "timestamp, status HAVING (status == INTEGER('1')) " - "ORDER BY timestamp desc") + "ORDER BY timestamp desc ") expected_select = (expected_query[len('SELECT '):] .split('FROM')[0].strip().split(', ')) expected_from = expected_query[len('SELECT '):].split('FROM')[1] @@ -427,7 +427,7 @@ def test_empty_conditions(self): expected_query = ("SELECT status as status, start_time as timestamp, " "resource as url FROM " "[dataset.2013_06_appspot_1] ORDER BY " - "timestamp desc") + "timestamp desc ") expected_select = (expected_query[len('SELECT '):] .split('FROM')[0].strip().split(', ')) expected_from = expected_query[len('SELECT '):].split('FROM')[1] @@ -464,7 +464,7 @@ def test_incorrect_conditions(self): expected_query = ("SELECT status as status, start_time as timestamp, " "resource as url FROM " "[dataset.2013_06_appspot_1] ORDER BY " - "timestamp desc") + "timestamp desc ") expected_select = (expected_query[len('SELECT '):] .split('FROM')[0].strip().split(', ')) expected_from = expected_query[len('SELECT '):].split('FROM')[1] @@ -516,7 +516,7 @@ def test_multiple_condition_values(self): "INTEGER('1371556954')) AND " "((resource CONTAINS STRING('foo') AND resource " "CONTAINS STRING('baz')) AND (NOT resource CONTAINS " - "STRING('bar'))) ORDER BY timestamp desc") + "STRING('bar'))) ORDER BY timestamp desc ") expected_select = (expected_query[len('SELECT '):] .split('FROM')[0].strip().split(', ')) expected_from = expected_query[len('SELECT '):].split('FROM')[1] @@ -550,7 +550,7 @@ def test_negated_condition_value(self): expected_query = ("SELECT status as status, start_time as timestamp, " "resource as url FROM " "[dataset.2013_06_appspot_1] WHERE (NOT resource " - "CONTAINS STRING('foo')) ORDER BY timestamp desc") + "CONTAINS STRING('foo')) ORDER BY timestamp desc ") expected_select = (expected_query[len('SELECT '):] .split('FROM')[0].strip().split(', ')) expected_from = expected_query[len('SELECT '):].split('FROM')[1] @@ -593,7 +593,7 @@ def test_multiple_negated_condition_values(self): "[dataset.2013_06_appspot_1] WHERE (NOT resource " "CONTAINS STRING('foo') AND NOT resource CONTAINS " "STRING('baz') AND NOT resource CONTAINS " - "STRING('bar')) ORDER BY timestamp desc") + "STRING('bar')) ORDER BY timestamp desc ") expected_select = (expected_query[len('SELECT '):] .split('FROM')[0].strip().split(', ')) expected_from = expected_query[len('SELECT '):].split('FROM')[1] @@ -631,7 +631,7 @@ def test_empty_order(self): "resource as url FROM " "[dataset.2013_06_appspot_1] WHERE (start_time " "<= INTEGER('1371566954')) AND (start_time >= " - "INTEGER('1371556954')) ") + "INTEGER('1371556954')) ") expected_select = (expected_query[len('SELECT '):] .split('FROM')[0].strip().split(', ')) expected_from = expected_query[len('SELECT '):].split('FROM')[1] @@ -669,7 +669,7 @@ def test_incorrect_order(self): "resource as url FROM " "[dataset.2013_06_appspot_1] WHERE (start_time " "<= INTEGER('1371566954')) AND (start_time >= " - "INTEGER('1371556954')) ") + "INTEGER('1371556954')) ") expected_select = (expected_query[len('SELECT '):] .split('FROM')[0].strip().split(', ')) expected_from = expected_query[len('SELECT '):].split('FROM')[1] @@ -702,7 +702,7 @@ def test_empty_select(self): expected_query = ("SELECT * FROM [dataset.2013_06_appspot_1] " "WHERE (start_time <= INTEGER('1371566954')) AND " "(start_time >= INTEGER('1371556954')) ORDER BY " - "timestamp desc") + "timestamp desc ") self.assertEqual(result, expected_query) def test_no_alias(self): @@ -777,7 +777,7 @@ def test_formatting(self): "resource as url FROM " "[dataset.2013_06_appspot_1] WHERE (start_time " "<= INTEGER('1371566954')) AND (start_time >= " - "INTEGER('1371556954')) ORDER BY timestamp desc") + "INTEGER('1371556954')) ORDER BY timestamp desc ") expected_select = (expected_query[len('SELECT '):] .split('FROM')[0].strip().split(', ')) expected_from = expected_query[len('SELECT '):].split('FROM')[1] @@ -830,7 +830,7 @@ def test_formatting_duplicate_columns(self): "[dataset.2013_06_appspot_1] WHERE " "(start_time <= INTEGER('1371566954')) AND " "(start_time >= INTEGER('1371556954')) ORDER BY " - "timestamp desc") + "timestamp desc ") expected_select = (expected_query[len('SELECT '):] .split('FROM')[0].strip().split(', ')) expected_from = expected_query[len('SELECT '):].split('FROM')[1] @@ -874,7 +874,7 @@ def test_sec_to_micro_formatting(self): "timestamp, resource as url FROM " "[dataset.2013_06_appspot_1] WHERE (start_time " "<= INTEGER('1371566954')) AND (start_time >= " - "INTEGER('1371556954')) ORDER BY timestamp desc") + "INTEGER('1371556954')) ORDER BY timestamp desc ") expected_select = (expected_query[len('SELECT '):] .split('FROM')[0].strip().split(', ')) expected_from = expected_query[len('SELECT '):].split('FROM')[1] @@ -930,7 +930,7 @@ def test_empty_groupings(self): expected_query = ("SELECT status as status, start_time as timestamp, " "resource as url FROM " "[dataset.2013_06_appspot_1] ORDER BY " - "timestamp desc") + "timestamp desc ") expected_select = (expected_query[len('SELECT '):] .split('FROM')[0].strip().split(', ')) expected_from = expected_query[len('SELECT '):].split('FROM')[1] @@ -971,7 +971,7 @@ def test_multi_tables(self): "[dataset.2013_07_appspot_1] WHERE (start_time " "<= INTEGER('1371566954')) AND (start_time >= " "INTEGER('1371556954')) GROUP BY timestamp, status " - "ORDER BY timestamp desc") + "ORDER BY timestamp desc ") expected_select = (expected_query[len('SELECT '):] .split('FROM')[0].strip().split(', ')) expected_from = expected_query[len('SELECT '):].split('FROM')[1] From e1a08ab286da7bb4827efc516e9e173770726a7b Mon Sep 17 00:00:00 2001 From: Ruxandra Burtica Date: Sun, 10 Jul 2016 10:28:16 +0300 Subject: [PATCH 089/146] Added limit tests #104 --- bigquery/tests/test_query_builder.py | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/bigquery/tests/test_query_builder.py b/bigquery/tests/test_query_builder.py index 8b77603..d9381c5 100644 --- a/bigquery/tests/test_query_builder.py +++ b/bigquery/tests/test_query_builder.py @@ -340,6 +340,27 @@ def test_no_fields(self): self.assertEqual(result, "") +class TestLimit(unittest.TestCase): + + def test_with_limit(self): + """Ensure that render limit works.""" + from bigquery.query_builder \ + import _render_limit + + result = _render_limit(8) + + self.assertEqual(result, "LIMIT 8") + + def test_no_fields(self): + """Ensure that render limit can work without any arguments.""" + from bigquery.query_builder \ + import _render_limit + + result = _render_limit(None) + + self.assertEqual(result, "") + + class TestRenderQuery(unittest.TestCase): def test_full_query(self): @@ -392,14 +413,16 @@ def test_full_query(self): 'type': 'INTEGER' } ], - order_by={'fields': ['timestamp'], 'direction': 'desc'}) + order_by={'fields': ['timestamp'], 'direction': 'desc'}, + limit=10) expected_query = ("SELECT status as status, start_time as timestamp, " "resource as url FROM [dataset.2013_06_appspot_1]" " WHERE (start_time <= INTEGER('1371566954')) AND " "(start_time >= INTEGER('1371556954')) GROUP BY " "timestamp, status HAVING (status == INTEGER('1')) " - "ORDER BY timestamp desc ") + "ORDER BY timestamp desc " + "LIMIT 10") expected_select = (expected_query[len('SELECT '):] .split('FROM')[0].strip().split(', ')) expected_from = expected_query[len('SELECT '):].split('FROM')[1] From 5cbb968e8cfa521aea312d1a8ea8114dbdfc5013 Mon Sep 17 00:00:00 2001 From: Ruxandra Burtica Date: Sun, 10 Jul 2016 10:48:48 +0300 Subject: [PATCH 090/146] Added limit to test_full_query. #104 --- bigquery/tests/test_query_builder.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/bigquery/tests/test_query_builder.py b/bigquery/tests/test_query_builder.py index d9381c5..6e9e9ee 100644 --- a/bigquery/tests/test_query_builder.py +++ b/bigquery/tests/test_query_builder.py @@ -421,8 +421,7 @@ def test_full_query(self): " WHERE (start_time <= INTEGER('1371566954')) AND " "(start_time >= INTEGER('1371556954')) GROUP BY " "timestamp, status HAVING (status == INTEGER('1')) " - "ORDER BY timestamp desc " - "LIMIT 10") + "ORDER BY timestamp desc LIMIT 10") expected_select = (expected_query[len('SELECT '):] .split('FROM')[0].strip().split(', ')) expected_from = expected_query[len('SELECT '):].split('FROM')[1] @@ -755,7 +754,7 @@ def test_no_alias(self): expected_query = ("SELECT status , start_time , resource FROM " "[dataset.2013_06_appspot_1] WHERE (start_time " "<= INTEGER('1371566954')) AND (start_time >= " - "INTEGER('1371556954')) ORDER BY start_time desc") + "INTEGER('1371556954')) ORDER BY start_time desc ") expected_select = (field.strip() for field in expected_query[len('SELECT '):] .split('FROM')[0].strip().split(', ')) @@ -931,7 +930,8 @@ def test_no_table_or_dataset(self): 'negate': False}], 'type': 'INTEGER'}, ], - order_by={'fields': ['timestamp'], 'direction': 'desc'}) + order_by={'fields': ['timestamp'], 'direction': 'desc'}, + limit=10) self.assertIsNone(result) From 73ded9d5b77843685498c60050ad90aa23d14612 Mon Sep 17 00:00:00 2001 From: Ruxandra Burtica Date: Sun, 10 Jul 2016 11:46:34 +0300 Subject: [PATCH 091/146] Added method for getting project IDs. --- bigquery/client.py | 7 +++++++ bigquery/tests/test_client.py | 29 +++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/bigquery/client.py b/bigquery/client.py index 390ff68..ca210fc 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -148,6 +148,13 @@ def get_client(project_id=None, credentials=None, return BigQueryClient(bq_service, project_id, swallow_results) +def get_project_ids(bq_service): + """Given the BigQuery service, return all project IDs.""" + projects_request = bq_service.projects().list().execute() + return [project['id'] + for project in projects_request.get('projects', [])] + + def _get_bq_service(credentials=None, service_url=None): """Construct an authorized BigQuery service object.""" diff --git a/bigquery/tests/test_client.py b/bigquery/tests/test_client.py index 263e2d7..ec8a2f4 100644 --- a/bigquery/tests/test_client.py +++ b/bigquery/tests/test_client.py @@ -214,6 +214,35 @@ def test_initialize_json_key_file_without_project_id(self, mock_open, mock_build self.assertEquals(json_key['project_id'], bq_client.project_id) +class TestGetProjectIds(unittest.TestCase): + + def test_get_project_ids(self): + mock_bq_service = mock.Mock() + mock_bq_service.projects().list().execute.return_value = { + 'kind': 'bigquery#projectList', + 'projects': [ + { + 'friendlyName': 'Big Query Test', + 'id': 'big-query-test', + 'kind': 'bigquery#project', + 'numericId': '1435372465', + 'projectReference': {'projectId': 'big-query-test'} + }, + { + 'friendlyName': 'Company', + 'id': 'company', + 'kind': 'bigquery#project', + 'numericId': '4263574685796', + 'projectReference': {'projectId': 'company'} + } + ], + 'totalItems': 2 + } + + project_ids = client.get_project_ids(mock_bq_service) + self.assertEqual(project_ids, ['big-query-test', 'company']) + + class TestQuery(unittest.TestCase): def setUp(self): From 82eb0b887a13b03180b1c24f91b9c78ee0248c8c Mon Sep 17 00:00:00 2001 From: Ruxandra Burtica Date: Sun, 10 Jul 2016 14:58:40 +0300 Subject: [PATCH 092/146] Updated BigQuery to return more information about projects. --- bigquery/client.py | 15 +++++++++++---- bigquery/tests/test_client.py | 14 +++++++++----- 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/bigquery/client.py b/bigquery/client.py index ca210fc..9139ce9 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -148,11 +148,18 @@ def get_client(project_id=None, credentials=None, return BigQueryClient(bq_service, project_id, swallow_results) -def get_project_ids(bq_service): - """Given the BigQuery service, return all project IDs.""" +def get_projects(bq_service): + """Given the BigQuery service, return data about all projects.""" projects_request = bq_service.projects().list().execute() - return [project['id'] - for project in projects_request.get('projects', [])] + + projects = [] + for project in projects_request.get('projects', []): + project_data = { + 'id': project['id'], + 'name': project['friendlyName'] + } + projects.append(project_data) + return projects def _get_bq_service(credentials=None, service_url=None): diff --git a/bigquery/tests/test_client.py b/bigquery/tests/test_client.py index ec8a2f4..988eeb4 100644 --- a/bigquery/tests/test_client.py +++ b/bigquery/tests/test_client.py @@ -229,18 +229,22 @@ def test_get_project_ids(self): 'projectReference': {'projectId': 'big-query-test'} }, { - 'friendlyName': 'Company', - 'id': 'company', + 'friendlyName': 'BQ Company project', + 'id': 'bq-project', 'kind': 'bigquery#project', 'numericId': '4263574685796', - 'projectReference': {'projectId': 'company'} + 'projectReference': {'projectId': 'bq-project'} } ], 'totalItems': 2 } - project_ids = client.get_project_ids(mock_bq_service) - self.assertEqual(project_ids, ['big-query-test', 'company']) + projects = client.get_projects(mock_bq_service) + expected_projects_data = [ + {'id': 'big-query-test', 'name': 'Big Query Test'}, + {'id': 'bq-project', 'name': 'BQ Company project'} + ] + self.assertEqual(projects, expected_projects_data) class TestQuery(unittest.TestCase): From 8a030d8f5d21784c2ae854516ba0d321e0324be0 Mon Sep 17 00:00:00 2001 From: CK Date: Mon, 1 Aug 2016 11:27:15 +0530 Subject: [PATCH 093/146] allowing write_to_table to accept maximumBillingTire parameter --- bigquery/client.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/bigquery/client.py b/bigquery/client.py index f4b5d6a..e6000d8 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -1048,7 +1048,8 @@ def write_to_table( priority=None, create_disposition=None, write_disposition=None, - use_legacy_sql=None + use_legacy_sql=None, + maximum_billing_tier=None ): """ Write query result to table. If dataset or table is not provided, @@ -1077,9 +1078,10 @@ def write_to_table( One of the JOB_CREATE_* constants write_disposition : str, optional One of the JOB_WRITE_* constants - use_legacy_sql: + use_legacy_sql: bool, optional If False, the query will use BigQuery's standard SQL (https://cloud.google.com/bigquery/sql-reference/) - + maximum_billing_tier : integer, optional + Limits the billing tier for this job. Queries that have resource usage beyond this tier will fail (without incurring a charge). If unspecified, this will be set to your project default. For more information, see https://cloud.google.com/bigquery/pricing#high-compute Returns ------- @@ -1106,6 +1108,9 @@ def write_to_table( if allow_large_results is not None: configuration['allowLargeResults'] = allow_large_results + if maximum_billing_tier is not None: + configuration['maximumBillingTier'] = maximum_billing_tier + if use_query_cache is not None: configuration['useQueryCache'] = use_query_cache From d1d850d68af3fbdeea661ff6ef34309c555e5801 Mon Sep 17 00:00:00 2001 From: CK Date: Wed, 3 Aug 2016 11:48:01 +0530 Subject: [PATCH 094/146] adding unittest and following pep8 standards --- bigquery/client.py | 9 ++++++-- bigquery/tests/test_client.py | 39 +++++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 2 deletions(-) diff --git a/bigquery/client.py b/bigquery/client.py index e6000d8..d626117 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -1079,9 +1079,14 @@ def write_to_table( write_disposition : str, optional One of the JOB_WRITE_* constants use_legacy_sql: bool, optional - If False, the query will use BigQuery's standard SQL (https://cloud.google.com/bigquery/sql-reference/) + If False, the query will use BigQuery's standard SQL + (https://cloud.google.com/bigquery/sql-reference/) maximum_billing_tier : integer, optional - Limits the billing tier for this job. Queries that have resource usage beyond this tier will fail (without incurring a charge). If unspecified, this will be set to your project default. For more information, see https://cloud.google.com/bigquery/pricing#high-compute + Limits the billing tier for this job. Queries that have resource + usage beyond this tier will fail (without incurring a charge). If + unspecified, this will be set to your project default. For more + information, + see https://cloud.google.com/bigquery/pricing#high-compute Returns ------- diff --git a/bigquery/tests/test_client.py b/bigquery/tests/test_client.py index 13695a8..e462c6b 100644 --- a/bigquery/tests/test_client.py +++ b/bigquery/tests/test_client.py @@ -1117,6 +1117,7 @@ def setUp(self): self.project_id = 'project' self.dataset_id = 'dataset' self.table_id = 'table' + self.maximum_billing_tier = 1000 self.external_udf_uris = ['gs://bucket/external_udf.js'] self.use_query_cache = False self.priority = "INTERACTIVE" @@ -1162,6 +1163,44 @@ def test_write(self): self.assertEqual(result, expected_result) + def test_write_maxbilltier(self): + """ Ensure that write is working when maximumBillingTier is set""" + expected_result = { + 'status': {'state': u'RUNNING'}, + } + + body = { + "configuration": { + "query": { + "destinationTable": { + "projectId": self.project_id, + "datasetId": self.dataset_id, + "tableId": self.table_id + }, + "query": self.query, + "userDefinedFunctionResources": [{ + "resourceUri": self.external_udf_uris[0] + }], + "useQueryCache": self.use_query_cache, + "priority": self.priority, + "maximumBillingTier": self.maximum_billing_tier + } + } + } + + self.mock_api.jobs().insert().execute.return_value = expected_result + result = self.client.write_to_table( + self.query, self.dataset_id, self.table_id, priority=self.priority, + external_udf_uris=self.external_udf_uris, use_query_cache=False, + maximum_billing_tier=self.maximum_billing_tier) + + self.mock_api.jobs().insert.assert_called_with( + projectId=self.project_id, + body=body + ) + + self.assertEqual(result, expected_result) + def test_write_http_error(self): """ Test write with http error""" expected_result = { From 8b2f6886e9ff38daf8027f8edb2e969f9c3f95f9 Mon Sep 17 00:00:00 2001 From: Aaron Kavlie Date: Wed, 3 Aug 2016 16:09:07 -0700 Subject: [PATCH 095/146] Add flatten boolean option This allows for turning off flattening of query results for write_to_table, so an exact copy of (all or part of) a table can be created. --- README.md | 5 ----- bigquery/client.py | 7 +++++++ 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 7cf342a..2053e05 100644 --- a/README.md +++ b/README.md @@ -294,11 +294,6 @@ from bigquery import schema_from_record schema_from_record({"id":123, "posts": [{"id":123, "text": "tihs is a post"}], "username": "bob"}) ``` -# Caveats - -BigQuery [flattens](https://developers.google.com/bigquery/docs/data?hl=ja#flatten) results with repeated records, so a result might actually map to multiple rows. This means that the row count may be larger than the actual number of results because BigQuery reports the number of unrolled rows but the returned results are rolled back up. - - # Contributing Requirements to commit here: diff --git a/bigquery/client.py b/bigquery/client.py index d626117..64a93c2 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -1044,6 +1044,7 @@ def write_to_table( table=None, external_udf_uris=[], allow_large_results=None, + flatten=None, use_query_cache=None, priority=None, create_disposition=None, @@ -1070,6 +1071,9 @@ def write_to_table( Storage and have .js extensions. allow_large_results : bool, optional Whether or not to allow large results + flatten : bool, optional + Whether or not to flatten nested and repeated fields + in query results use_query_cache : bool, optional Whether or not to use query cache priority : str, optional @@ -1113,6 +1117,9 @@ def write_to_table( if allow_large_results is not None: configuration['allowLargeResults'] = allow_large_results + if flatten is not None: + configuration['flattenResults'] = flatten + if maximum_billing_tier is not None: configuration['maximumBillingTier'] = maximum_billing_tier From 12a2a9d6734d1b17297f84d1e1ef468025036f04 Mon Sep 17 00:00:00 2001 From: Aaron Kavlie Date: Wed, 3 Aug 2016 16:19:04 -0700 Subject: [PATCH 096/146] Move new arg to the end to avoid potential API breakage --- bigquery/client.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bigquery/client.py b/bigquery/client.py index 64a93c2..2f879a7 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -1044,13 +1044,13 @@ def write_to_table( table=None, external_udf_uris=[], allow_large_results=None, - flatten=None, use_query_cache=None, priority=None, create_disposition=None, write_disposition=None, use_legacy_sql=None, - maximum_billing_tier=None + maximum_billing_tier=None, + flatten=None ): """ Write query result to table. If dataset or table is not provided, From 6bde6044497e761c3aa313a50b6439ab2f6245a6 Mon Sep 17 00:00:00 2001 From: Aaron Kavlie Date: Wed, 3 Aug 2016 16:21:48 -0700 Subject: [PATCH 097/146] Move parameter description too --- bigquery/client.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bigquery/client.py b/bigquery/client.py index 2f879a7..4321ac8 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -1071,9 +1071,6 @@ def write_to_table( Storage and have .js extensions. allow_large_results : bool, optional Whether or not to allow large results - flatten : bool, optional - Whether or not to flatten nested and repeated fields - in query results use_query_cache : bool, optional Whether or not to use query cache priority : str, optional @@ -1091,6 +1088,9 @@ def write_to_table( unspecified, this will be set to your project default. For more information, see https://cloud.google.com/bigquery/pricing#high-compute + flatten : bool, optional + Whether or not to flatten nested and repeated fields + in query results Returns ------- From 28e94c91aaed9d409536e1c73f22f67f72160bb3 Mon Sep 17 00:00:00 2001 From: Aaron Kavlie Date: Wed, 3 Aug 2016 22:18:49 -0700 Subject: [PATCH 098/146] Add test of flatten option --- bigquery/tests/test_client.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/bigquery/tests/test_client.py b/bigquery/tests/test_client.py index e462c6b..4ffac9b 100644 --- a/bigquery/tests/test_client.py +++ b/bigquery/tests/test_client.py @@ -1121,6 +1121,7 @@ def setUp(self): self.external_udf_uris = ['gs://bucket/external_udf.js'] self.use_query_cache = False self.priority = "INTERACTIVE" + self.flatten_results = False self.client = client.BigQueryClient(self.mock_api, self.project_id) @@ -1144,6 +1145,7 @@ def test_write(self): }], "useQueryCache": self.use_query_cache, "priority": self.priority, + "flattenResults": self.flatten_results, } } } @@ -1154,6 +1156,7 @@ def test_write(self): self.table_id, external_udf_uris=self.external_udf_uris, use_query_cache=False, + flatten=False, priority=self.priority) self.mock_api.jobs().insert.assert_called_with( From a48ce366ba3ac7ba9a924164bb322d4a88e587ce Mon Sep 17 00:00:00 2001 From: Tyler Treat Date: Mon, 12 Sep 2016 12:17:16 -0500 Subject: [PATCH 099/146] Bump version to 1.9.0 --- bigquery/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigquery/version.py b/bigquery/version.py index b280975..e5102d3 100644 --- a/bigquery/version.py +++ b/bigquery/version.py @@ -1 +1 @@ -__version__ = '1.8.0' +__version__ = '1.9.0' From 651200e7731a99c8420a6c470a32e326c3e01832 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=A3=95=E5=8F=B8=20=E9=87=91=E6=B2=A2?= Date: Tue, 4 Oct 2016 19:18:40 +0900 Subject: [PATCH 100/146] Add the time partitioning --- bigquery/client.py | 8 +++++++- bigquery/tests/test_client.py | 22 ++++++++++++++++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/bigquery/client.py b/bigquery/client.py index 4321ac8..6e1fe1a 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -520,7 +520,8 @@ def get_table(self, dataset, table): return table - def create_table(self, dataset, table, schema, expiration_time=None): + def create_table(self, dataset, table, schema, + expiration_time=None, time_partitioning=False): """Create a new table in the dataset. Parameters @@ -533,6 +534,8 @@ def create_table(self, dataset, table, schema, expiration_time=None): The table schema expiration_time : float, optional The expiry time in milliseconds since the epoch. + time_partitioning : bool, optional + Create a time partitioning. Returns ------- @@ -553,6 +556,9 @@ def create_table(self, dataset, table, schema, expiration_time=None): if expiration_time is not None: body['expirationTime'] = expiration_time + if time_partitioning: + body['timePartitioning'] = "DAY" + try: table = self.bigquery.tables().insert( projectId=self.project_id, diff --git a/bigquery/tests/test_client.py b/bigquery/tests/test_client.py index 4ffac9b..69b923c 100644 --- a/bigquery/tests/test_client.py +++ b/bigquery/tests/test_client.py @@ -1675,6 +1675,7 @@ def setUp(self): 'datasetId': self.dataset} } self.expiration_time = 1437513693000 + self.time_partitioning = True def test_table_create_failed(self): """Ensure that if creating the table fails, False is returned, @@ -1748,6 +1749,27 @@ def test_table_create_body_with_expiration_time(self): self.mock_tables.insert.return_value.execute.assert_called_with() + def test_table_create_body_with_time_partitioning(self): + """Ensure that if time_partitioning has specified, + it passed to the body.""" + + self.mock_tables.insert.return_value.execute.side_effect = [{ + 'status': 'foo'}, {'status': 'bar'}] + + self.client.create_table(self.dataset, self.table, + self.schema, + time_partitioning=self.time_partitioning) + + body = self.body.copy() + body.update({ + 'timePartitioning': "DAY" + }) + + self.mock_tables.insert.assert_called_with( + projectId=self.project, datasetId=self.dataset, body=body) + + self.mock_tables.insert.return_value.execute.assert_called_with() + class TestUpdateTable(unittest.TestCase): From 35e979001269549d8521d1497c120d4671b4364b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=A3=95=E5=8F=B8=20=E9=87=91=E6=B2=A2?= Date: Wed, 5 Oct 2016 15:10:45 +0900 Subject: [PATCH 101/146] fixed time partitioning issue --- bigquery/client.py | 2 +- bigquery/tests/test_client.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/bigquery/client.py b/bigquery/client.py index 6e1fe1a..0fdc98c 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -557,7 +557,7 @@ def create_table(self, dataset, table, schema, body['expirationTime'] = expiration_time if time_partitioning: - body['timePartitioning'] = "DAY" + body['timePartitioning'] = {'type': 'DAY'} try: table = self.bigquery.tables().insert( diff --git a/bigquery/tests/test_client.py b/bigquery/tests/test_client.py index 69b923c..b740414 100644 --- a/bigquery/tests/test_client.py +++ b/bigquery/tests/test_client.py @@ -1762,7 +1762,7 @@ def test_table_create_body_with_time_partitioning(self): body = self.body.copy() body.update({ - 'timePartitioning': "DAY" + 'timePartitioning': {'type': 'DAY'} }) self.mock_tables.insert.assert_called_with( From ee3e54f8897905b7ff2160ce88b6609b2602c5db Mon Sep 17 00:00:00 2001 From: nrfk Date: Mon, 17 Oct 2016 20:25:08 +0200 Subject: [PATCH 102/146] Add possibility to decide location (EU or US) of a dataset when creating a new dataset --- bigquery/client.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/bigquery/client.py b/bigquery/client.py index 0fdc98c..aa589d4 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -1655,7 +1655,7 @@ def _raise_executing_exception_if_error(self, job): # DataSet manipulation methods # def create_dataset(self, dataset_id, friendly_name=None, description=None, - access=None): + access=None, location=None): """Create a new BigQuery dataset. Parameters @@ -1670,6 +1670,9 @@ def create_dataset(self, dataset_id, friendly_name=None, description=None, access : list, optional Indicating access permissions (see https://developers.google.com/bigquery/docs/reference/v2/datasets#resource) + location : str, optional + Indicating where dataset should be stored: EU or US (see + https://developers.google.com/bigquery/docs/reference/v2/datasets#resource) Returns ------- @@ -1682,7 +1685,8 @@ def create_dataset(self, dataset_id, friendly_name=None, description=None, dataset_data = self.dataset_resource(dataset_id, friendly_name=friendly_name, description=description, - access=access) + access=access, + location=location) response = datasets.insert(projectId=self.project_id, body=dataset_data).execute() @@ -1843,7 +1847,7 @@ def patch_dataset(self, dataset_id, friendly_name=None, description=None, return {} def dataset_resource(self, ref_id, friendly_name=None, description=None, - access=None): + access=None, location=None): """See https://developers.google.com/bigquery/docs/reference/v2/datasets#resource @@ -1857,6 +1861,8 @@ def dataset_resource(self, ref_id, friendly_name=None, description=None, An optional description for the dataset access : list, optional Indicating access permissions + location: str, optional, 'EU' or 'US' + An optional geographical location for the dataset(EU or US) Returns ------- @@ -1875,6 +1881,8 @@ def dataset_resource(self, ref_id, friendly_name=None, description=None, data["description"] = description if access: data["access"] = access + if location: + data["location"] = location return data From a9e37c839bc23899550840a65355ec84eb446ce7 Mon Sep 17 00:00:00 2001 From: Robin Thomas Date: Wed, 26 Oct 2016 12:07:52 -0400 Subject: [PATCH 103/146] add external_udf_uris support to query(), with docstring and test coverage. Change write_to_table code to use the same list comprehension to construct userDefinedFunctionResources. --- bigquery/client.py | 23 +++++++++++++---------- bigquery/tests/test_client.py | 12 +++++++++--- 2 files changed, 22 insertions(+), 13 deletions(-) diff --git a/bigquery/client.py b/bigquery/client.py index aa589d4..927345c 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -276,7 +276,7 @@ def _insert_job(self, body_object): body=body_object ).execute() - def query(self, query, max_results=None, timeout=0, dry_run=False, use_legacy_sql=None): + def query(self, query, max_results=None, timeout=0, dry_run=False, use_legacy_sql=None, external_udf_uris=None): """Submit a query to BigQuery. Parameters @@ -294,6 +294,9 @@ def query(self, query, max_results=None, timeout=0, dry_run=False, use_legacy_sq message it would if it wasn't a dry run. use_legacy_sql : bool, optional. Default True. If False, the query will use BigQuery's standard SQL (https://cloud.google.com/bigquery/sql-reference/) + external_udf_uris : list, optional + Contains external UDF URIs. If given, URIs must be Google Cloud + Storage and have .js extensions. Returns @@ -321,6 +324,10 @@ def query(self, query, max_results=None, timeout=0, dry_run=False, use_legacy_sq if use_legacy_sql is not None: query_data['useLegacySql'] = use_legacy_sql + if external_udf_uris: + query_data['userDefinedFunctionResources'] = \ + [ {'resourceUri': u} for u in external_udf_uris ] + return self._submit_query_job(query_data) def get_query_schema(self, job_id): @@ -1048,7 +1055,7 @@ def write_to_table( query, dataset=None, table=None, - external_udf_uris=[], + external_udf_uris=None, allow_large_results=None, use_query_cache=None, priority=None, @@ -1073,7 +1080,7 @@ def write_to_table( table : str, optional String id of the table external_udf_uris : list, optional - Contains extternal UDF URIs. If given, URIs must be Google Cloud + Contains external UDF URIs. If given, URIs must be Google Cloud Storage and have .js extensions. allow_large_results : bool, optional Whether or not to allow large results @@ -1144,13 +1151,9 @@ def write_to_table( if write_disposition: configuration['writeDisposition'] = write_disposition - configuration['userDefinedFunctionResources'] = [] - for external_udf_uri in external_udf_uris: - configuration['userDefinedFunctionResources'].append( - { - "resourceUri": external_udf_uri - } - ) + if external_udf_uris: + configuration['userDefinedFunctionResources'] = \ + [ {'resourceUri': u} for u in external_udf_uris ] body = { "configuration": { diff --git a/bigquery/tests/test_client.py b/bigquery/tests/test_client.py index b740414..94c7d61 100644 --- a/bigquery/tests/test_client.py +++ b/bigquery/tests/test_client.py @@ -259,6 +259,7 @@ def setUp(self): self.query = 'foo' self.project_id = 'project' + self.external_udf_uris = ['gs://bucket/external_udf.js'] self.client = client.BigQueryClient(self.mock_bq_service, self.project_id) @@ -276,12 +277,17 @@ def test_query(self): self.mock_job_collection.query.return_value = mock_query_job - job_id, results = self.client.query(self.query) + job_id, results = self.client.query(self.query, external_udf_uris=self.external_udf_uris) self.mock_job_collection.query.assert_called_once_with( projectId=self.project_id, - body={'query': self.query, 'timeoutMs': 0, 'dryRun': False, - 'maxResults': None} + body={ + 'query': self.query, + 'userDefinedFunctionResources': [ {'resourceUri': u} for u in self.external_udf_uris ], + 'timeoutMs': 0, + 'dryRun': False, + 'maxResults': None + } ) self.assertEquals(job_id, 'spiderman') self.assertEquals(results, []) From c0eef0a90fcc8716e426d4a95ab1a28c9bfeeb0a Mon Sep 17 00:00:00 2001 From: Tyler Treat Date: Wed, 26 Oct 2016 14:44:49 -0500 Subject: [PATCH 104/146] Bump version to 1.10.0 --- bigquery/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigquery/version.py b/bigquery/version.py index e5102d3..52af183 100644 --- a/bigquery/version.py +++ b/bigquery/version.py @@ -1 +1 @@ -__version__ = '1.9.0' +__version__ = '1.10.0' From 3559f5b74bc385b78ef329f77d144a805f18c958 Mon Sep 17 00:00:00 2001 From: Trenton Smith Date: Fri, 18 Nov 2016 16:20:04 -0700 Subject: [PATCH 105/146] add NullHandler to library logger --- bigquery/client.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bigquery/client.py b/bigquery/client.py index 927345c..a25f6fa 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -1,6 +1,6 @@ import calendar import json -from logging import getLogger +from logging import getLogger, NullHandler from collections import defaultdict from datetime import datetime, timedelta from hashlib import sha256 @@ -47,6 +47,7 @@ JOB_DESTINATION_FORMAT_CSV = JOB_FORMAT_CSV logger = getLogger(__name__) +logger.addHandler(logging.NullHandler()) def get_client(project_id=None, credentials=None, From b5a88cb59ff40e1d252566789ef627e1d731778d Mon Sep 17 00:00:00 2001 From: Trenton Smith Date: Fri, 18 Nov 2016 16:25:59 -0700 Subject: [PATCH 106/146] fix typo and add NullHandler to logger in query_builder --- bigquery/client.py | 2 +- bigquery/query_builder.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/bigquery/client.py b/bigquery/client.py index a25f6fa..1c58ed1 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -47,7 +47,7 @@ JOB_DESTINATION_FORMAT_CSV = JOB_FORMAT_CSV logger = getLogger(__name__) -logger.addHandler(logging.NullHandler()) +logger.addHandler(NullHandler()) def get_client(project_id=None, credentials=None, diff --git a/bigquery/query_builder.py b/bigquery/query_builder.py index 7362148..1054299 100644 --- a/bigquery/query_builder.py +++ b/bigquery/query_builder.py @@ -1,6 +1,7 @@ -from logging import getLogger +from logging import getLogger, NullHandler logger = getLogger(__name__) +logger.addHandler(NullHandler()) def render_query(dataset, tables, select=None, conditions=None, From d09ef1aab7ff66c491e668b1af759d0ebb20fa19 Mon Sep 17 00:00:00 2001 From: Julio David Quintana Date: Wed, 21 Dec 2016 18:18:22 -0600 Subject: [PATCH 107/146] Add ability to choose to use legacy SQL or standard SQL when creating a view. --- bigquery/client.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/bigquery/client.py b/bigquery/client.py index 1c58ed1..d669673 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -680,7 +680,7 @@ def patch_table(self, dataset, table, schema): else: return {} - def create_view(self, dataset, view, query): + def create_view(self, dataset, view, query, use_legacy_sql=None): """Create a new view in the dataset. Parameters @@ -710,6 +710,9 @@ def create_view(self, dataset, view, query): } } + if use_legacy_sql is not None: + body['view']['useLegacySql'] = use_legacy_sql + try: view = self.bigquery.tables().insert( projectId=self.project_id, From acaee133eed04f4f06315c9e9e1e66c1dcf2a4ca Mon Sep 17 00:00:00 2001 From: Julio David Quintana Date: Thu, 22 Dec 2016 10:59:26 -0600 Subject: [PATCH 108/146] Add docstring for use_legacy_sql kwarg --- bigquery/client.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/bigquery/client.py b/bigquery/client.py index d669673..7e5d92f 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -691,6 +691,9 @@ def create_view(self, dataset, view, query, use_legacy_sql=None): The name of the view to create query : dict A query that BigQuery executes when the view is referenced. + use_legacy_sql : bool, optional + If False, the query will use BigQuery's standard SQL + (https://cloud.google.com/bigquery/sql-reference/) Returns ------- From b2e39e49c5da0be085ead109e3860c9153f361cd Mon Sep 17 00:00:00 2001 From: Tyler Treat Date: Tue, 14 Feb 2017 09:08:49 -0600 Subject: [PATCH 109/146] Bump version to 1.11.0 --- bigquery/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigquery/version.py b/bigquery/version.py index 52af183..da77e85 100644 --- a/bigquery/version.py +++ b/bigquery/version.py @@ -1 +1 @@ -__version__ = '1.10.0' +__version__ = '1.11.0' From bcb358e2546cdfe495e55844676f101ba4619efe Mon Sep 17 00:00:00 2001 From: Ciaran Blewitt Date: Tue, 7 Mar 2017 13:34:08 +1100 Subject: [PATCH 110/146] Add support for long in schema_builder --- bigquery/schema_builder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigquery/schema_builder.py b/bigquery/schema_builder.py index 575b390..f4ff8ca 100644 --- a/bigquery/schema_builder.py +++ b/bigquery/schema_builder.py @@ -126,7 +126,7 @@ def bigquery_type(o, timestamp_parser=default_timestamp_parser): """ t = type(o) - if t == int: + if t == int or t == long: return "integer" elif (t == six.binary_type and six.PY2) or t == six.text_type: if timestamp_parser and timestamp_parser(o): From df78c3512bbe21d15add1b19079a50d823e35275 Mon Sep 17 00:00:00 2001 From: Ciaran Blewitt Date: Tue, 7 Mar 2017 15:06:15 +1100 Subject: [PATCH 111/146] Replaced check with python3-friendly version --- bigquery/schema_builder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigquery/schema_builder.py b/bigquery/schema_builder.py index f4ff8ca..c55429a 100644 --- a/bigquery/schema_builder.py +++ b/bigquery/schema_builder.py @@ -126,7 +126,7 @@ def bigquery_type(o, timestamp_parser=default_timestamp_parser): """ t = type(o) - if t == int or t == long: + if isinstance(t, six.integertype): return "integer" elif (t == six.binary_type and six.PY2) or t == six.text_type: if timestamp_parser and timestamp_parser(o): From d098adc35d157a7aa948693817784c189e67f60e Mon Sep 17 00:00:00 2001 From: Ciaran Blewitt Date: Tue, 7 Mar 2017 15:09:08 +1100 Subject: [PATCH 112/146] Fixed typo for six.integer_types --- bigquery/schema_builder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigquery/schema_builder.py b/bigquery/schema_builder.py index c55429a..35369f6 100644 --- a/bigquery/schema_builder.py +++ b/bigquery/schema_builder.py @@ -126,7 +126,7 @@ def bigquery_type(o, timestamp_parser=default_timestamp_parser): """ t = type(o) - if isinstance(t, six.integertype): + if isinstance(t, six.integer_types): return "integer" elif (t == six.binary_type and six.PY2) or t == six.text_type: if timestamp_parser and timestamp_parser(o): From 9db30711ba7009ab2ee04ae195927c717cafc959 Mon Sep 17 00:00:00 2001 From: Ciaran Blewitt Date: Tue, 7 Mar 2017 16:18:06 +1100 Subject: [PATCH 113/146] Changed schema_builder to check in six.integer_types --- bigquery/schema_builder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigquery/schema_builder.py b/bigquery/schema_builder.py index 35369f6..65027b8 100644 --- a/bigquery/schema_builder.py +++ b/bigquery/schema_builder.py @@ -126,7 +126,7 @@ def bigquery_type(o, timestamp_parser=default_timestamp_parser): """ t = type(o) - if isinstance(t, six.integer_types): + if t in six.integer_types: return "integer" elif (t == six.binary_type and six.PY2) or t == six.text_type: if timestamp_parser and timestamp_parser(o): From 3939514d0559a8d33f8b8cf647311922e0124b54 Mon Sep 17 00:00:00 2001 From: cynipe Date: Fri, 28 Apr 2017 13:54:20 +0900 Subject: [PATCH 114/146] Allow to specify nested column as insertId for push_row --- bigquery/client.py | 11 ++++++--- bigquery/tests/test_client.py | 46 +++++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 3 deletions(-) diff --git a/bigquery/client.py b/bigquery/client.py index 7e5d92f..17a3a89 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -6,6 +6,7 @@ from hashlib import sha256 from io import StringIO from time import sleep, time +from functools import reduce import six from bigquery.errors import (BigQueryTimeoutException, JobExecutingException, @@ -1236,7 +1237,8 @@ def push_rows(self, dataset, table, rows, insert_id_key=None, rows : list A ``list`` of rows (``dict`` objects) to add to the table insert_id_key : str, optional - Key for insertId in row + Key for insertId in row. + You can use dot separated key for nested column. skip_invalid_rows : bool, optional Insert all valid rows of a request, even if invalid rows exist. ignore_unknown_values : bool, optional @@ -1258,8 +1260,11 @@ def push_rows(self, dataset, table, rows, insert_id_key=None, for row in rows: each_row = {} each_row["json"] = row - if insert_id_key in row: - each_row["insertId"] = row[insert_id_key] + if insert_id_key is not None: + keys = insert_id_key.split('.') + val = reduce(lambda d, key: d.get(key) if d else None, keys, row) + if val is not None: + each_row["insertId"] = val rows_data.append(each_row) data = { diff --git a/bigquery/tests/test_client.py b/bigquery/tests/test_client.py index 94c7d61..1315147 100644 --- a/bigquery/tests/test_client.py +++ b/bigquery/tests/test_client.py @@ -2325,6 +2325,52 @@ def test_request_data_with_options(self): tableId=self.table, body=expected_body) + def test_insert_id_key_with_nested_column(self): + """Ensure that dot separated insert_id_key properly extracted with nested column value.""" + rows = [ + {'nested': {'col': 'nested_col1'}, 'val': 1}, + {'nested': {'col': 'nested_col2'}, 'val': 2}, + ] + expected_body = self.data.copy() + expected_body['rows'] = [ + {'insertId': 'nested_col1', 'json': {'nested': {'col': 'nested_col1'}, 'val': 1}}, + {'insertId': 'nested_col2', 'json': {'nested': {'col': 'nested_col2'}, 'val': 2}}, + ] + + self.client.push_rows(self.dataset, self.table, rows, + insert_id_key='nested.col') + self.mock_table_data.insertAll.assert_called_with( + projectId=self.project, + datasetId=self.dataset, + tableId=self.table, + body=expected_body) + + expected_body = self.data.copy() + expected_body['rows'] = [ + {'insertId': 1, 'json': {'nested': {'col': 'nested_col1'}, 'val': 1}}, + {'insertId': 2, 'json': {'nested': {'col': 'nested_col2'}, 'val': 2}}, + ] + self.client.push_rows(self.dataset, self.table, rows, + insert_id_key='val') + self.mock_table_data.insertAll.assert_called_with( + projectId=self.project, + datasetId=self.dataset, + tableId=self.table, + body=expected_body) + + expected_body = self.data.copy() + expected_body['rows'] = [ + {'json': {'nested': {'col': 'nested_col1'}, 'val': 1}}, + {'json': {'nested': {'col': 'nested_col2'}, 'val': 2}}, + ] + self.client.push_rows(self.dataset, self.table, rows, + insert_id_key='no_such.column') + self.mock_table_data.insertAll.assert_called_with( + projectId=self.project, + datasetId=self.dataset, + tableId=self.table, + body=expected_body) + class TestGetAllTables(unittest.TestCase): From 0edf54c0cd1de51a1993173fd55cb1f4b0834124 Mon Sep 17 00:00:00 2001 From: Tyler Treat Date: Fri, 5 May 2017 00:24:45 -0500 Subject: [PATCH 115/146] Bump version to 1.11.1 --- bigquery/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigquery/version.py b/bigquery/version.py index da77e85..522ba08 100644 --- a/bigquery/version.py +++ b/bigquery/version.py @@ -1 +1 @@ -__version__ = '1.11.0' +__version__ = '1.11.1' From 8ccce603045a24bd0df49a2bf26a946ac84cba6e Mon Sep 17 00:00:00 2001 From: e271828- Date: Sat, 27 May 2017 09:27:37 -0700 Subject: [PATCH 116/146] _parse_table_name failed in the event of a name like _YYYYMMDD_ --- bigquery/client.py | 5 ++++- bigquery/tests/test_client.py | 11 ++++++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/bigquery/client.py b/bigquery/client.py index 17a3a89..b8971cd 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -1436,6 +1436,8 @@ def _parse_table_name(self, table_id): """Parse a table name in the form of appid_YYYY_MM or YYYY_MM_appid and return a tuple consisting of YYYY-MM and the app id. + Returns (None, None) in the event of a name like _YYYYMMDD_ + Parameters ---------- table_id : str @@ -1463,9 +1465,10 @@ def _parse_table_name(self, table_id): year_month = "-".join(attributes[-2:]) app_id = "-".join(attributes[:-2]) + # Check if date parsed correctly if year_month.count("-") == 1 and all( - [num.isdigit() for num in year_month.split('-')]): + [num.isdigit() for num in year_month.split('-')]) and len(year_month) == 7: return year_month, app_id return None, None diff --git a/bigquery/tests/test_client.py b/bigquery/tests/test_client.py index 1315147..a331387 100644 --- a/bigquery/tests/test_client.py +++ b/bigquery/tests/test_client.py @@ -1333,6 +1333,15 @@ def test_not_inside_range(self): "kind": "bigquery#tableList", "etag": "\"GSclnjk0zID1ucM3F-xYinOm1oE/cn58Rpu8v8pB4eoJQaiTe11lPQc\"", "tables": [ + { + "kind": "bigquery#table", + "id": "project:dataset.notanappspottable_20130515_0261", + "tableReference": { + "projectId": "project", + "datasetId": "dataset", + "tableId": "notanappspottable_20130515_0261" + } + }, { "kind": "bigquery#table", "id": "project:dataset.2013_05_appspot_1", @@ -2389,7 +2398,7 @@ def test_get_all_tables(self): bq = client.BigQueryClient(mock_bq_service, 'project') expected_result = [ - '2013_05_appspot', '2013_06_appspot_1', '2013_06_appspot_2', + 'notanappspottable_20130515_0261', '2013_05_appspot', '2013_06_appspot_1', '2013_06_appspot_2', '2013_06_appspot_3', '2013_06_appspot_4', '2013_06_appspot_5', 'appspot_6_2013_06', 'table_not_matching_naming' ] From 2e3509fa3ff8afc5206990fb3e347eae83fccde0 Mon Sep 17 00:00:00 2001 From: Tyler Treat Date: Sun, 28 May 2017 22:20:33 -0500 Subject: [PATCH 117/146] Bump version to 1.11.2 --- bigquery/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigquery/version.py b/bigquery/version.py index 522ba08..6c371de 100644 --- a/bigquery/version.py +++ b/bigquery/version.py @@ -1 +1 @@ -__version__ = '1.11.1' +__version__ = '1.11.2' From d9e9ac7dc6e732505c13df74fed9f40473a2515d Mon Sep 17 00:00:00 2001 From: e271828- Date: Thu, 8 Jun 2017 18:50:28 -0700 Subject: [PATCH 118/146] document get_table --- README.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 2053e05..d53d9a9 100644 --- a/README.md +++ b/README.md @@ -135,7 +135,7 @@ job_id, _ = client.query(query) # Managing Tables -The BigQuery client provides facilities to manage dataset tables, including creating, deleting, and checking the existence of tables. +The BigQuery client provides facilities to manage dataset tables, including creating, deleting, checking the existence, and getting the metadata of tables. ```python # Create a new table. @@ -150,6 +150,10 @@ deleted = client.delete_table('dataset', 'my_table') # Check if a table exists. exists = client.check_table('dataset', 'my_table') + +# Get a table's full metadata. Includes numRows, numBytes, etc. +# See: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables +metadata = client.get_table('dataset', 'my_table') ``` There is also functionality for retrieving tables that are associated with a Google App Engine appspot, assuming table names are in the form of appid_YYYY_MM or YYYY_MM_appid. This allows tables between a date range to be selected and queried on. From 503b8a6553b7fc684e784abc9136baad0af17a80 Mon Sep 17 00:00:00 2001 From: tushar Date: Mon, 12 Jun 2017 18:56:38 +0200 Subject: [PATCH 119/146] updated update_table to include tableId= table --- bigquery/client.py | 1 + 1 file changed, 1 insertion(+) diff --git a/bigquery/client.py b/bigquery/client.py index b8971cd..db264a2 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -618,6 +618,7 @@ def update_table(self, dataset, table, schema): try: result = self.bigquery.tables().update( projectId=self.project_id, + tableId= table, datasetId=dataset, body=body ).execute() From b60256e97def3f7bd68ad53f6c21de8fcdf47d89 Mon Sep 17 00:00:00 2001 From: tushar Date: Mon, 12 Jun 2017 18:57:03 +0200 Subject: [PATCH 120/146] update update_table unit test to include tableId --- bigquery/tests/test_client.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bigquery/tests/test_client.py b/bigquery/tests/test_client.py index a331387..60bcc42 100644 --- a/bigquery/tests/test_client.py +++ b/bigquery/tests/test_client.py @@ -1830,7 +1830,7 @@ def test_table_update_failed(self): self.client.swallow_results = True self.mock_tables.update.assert_called_with( - projectId=self.project, datasetId=self.dataset, body=self.body) + projectId=self.project, tableId=self.table, datasetId=self.dataset, body=self.body) self.mock_tables.update.return_value.execute.assert_called_with() @@ -1856,7 +1856,7 @@ def test_table_update_success(self): self.client.swallow_results = True self.mock_tables.update.assert_called_with( - projectId=self.project, datasetId=self.dataset, body=self.body) + projectId=self.project, tableId=self.table, datasetId=self.dataset, body=self.body) self.mock_tables.update.return_value.execute.assert_called_with() From d6744c37431d1445fd6a419625d140b289a068b9 Mon Sep 17 00:00:00 2001 From: Tyler Treat Date: Tue, 13 Jun 2017 09:04:41 -0500 Subject: [PATCH 121/146] Bump version to 1.12.0 --- bigquery/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigquery/version.py b/bigquery/version.py index 6c371de..666b2f7 100644 --- a/bigquery/version.py +++ b/bigquery/version.py @@ -1 +1 @@ -__version__ = '1.11.2' +__version__ = '1.12.0' From d18356a93d5af24cc50eed6bfd7cd3154f313947 Mon Sep 17 00:00:00 2001 From: Vishvajit Pathak Date: Thu, 3 Aug 2017 17:57:56 +0530 Subject: [PATCH 122/146] typo fixes --- bigquery/query_builder.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bigquery/query_builder.py b/bigquery/query_builder.py index 1054299..b29d0cd 100644 --- a/bigquery/query_builder.py +++ b/bigquery/query_builder.py @@ -29,7 +29,7 @@ def render_query(dataset, tables, select=None, conditions=None, 'comparators' maps to another ``dict`` containing the keys 'condition', 'negate', and 'value'. If 'comparators' = {'condition': '>=', 'negate': False, 'value': 1}, - this example will be rdnered as 'foo >= FLOAT('1')' in the query. + this example will be rendered as 'foo >= FLOAT('1')' in the query. ``list`` of field names to group by order_by : dict, optional Keys = {'field', 'direction'}. `dict` should be formatted as @@ -170,7 +170,7 @@ def _render_conditions(conditions): Parameters ---------- conditions : list - A list of dictionay items to filter a table. + A list of dictionary items to filter a table. Returns ------- From 66c18809061eefb646d5a08d2725d2f85059fdd7 Mon Sep 17 00:00:00 2001 From: Vishvajit Pathak Date: Thu, 3 Aug 2017 18:04:50 +0530 Subject: [PATCH 123/146] typos fix --- bigquery/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigquery/client.py b/bigquery/client.py index db264a2..61933e4 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -983,7 +983,7 @@ def export_data_to_uris( Parameters ---------- - destination_urls : Union[str, list] + destination_uris : Union[str, list] ``str`` or ``list`` of ``str`` objects representing the URIs on cloud storage of the form: gs://bucket/filename dataset : str From 55915c0f7134f4f245bb7da4a9c0425910dc3c66 Mon Sep 17 00:00:00 2001 From: Vishvajit Pathak Date: Thu, 3 Aug 2017 18:28:42 +0530 Subject: [PATCH 124/146] typo fix --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index d53d9a9..8171078 100644 --- a/README.md +++ b/README.md @@ -295,7 +295,7 @@ exists = client.check_dataset('mydataset') ```python from bigquery import schema_from_record -schema_from_record({"id":123, "posts": [{"id":123, "text": "tihs is a post"}], "username": "bob"}) +schema_from_record({"id":123, "posts": [{"id":123, "text": "this is a post"}], "username": "bob"}) ``` # Contributing From b34eff532daaa53bb0192a1e2f258d5e47ced51f Mon Sep 17 00:00:00 2001 From: Vishvajit Pathak Date: Thu, 3 Aug 2017 19:01:17 +0530 Subject: [PATCH 125/146] expiration_time changed to int/double --- bigquery/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigquery/client.py b/bigquery/client.py index 61933e4..9af8dcb 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -541,7 +541,7 @@ def create_table(self, dataset, table, schema, The name of the table to create schema : dict The table schema - expiration_time : float, optional + expiration_time : int or double, optional The expiry time in milliseconds since the epoch. time_partitioning : bool, optional Create a time partitioning. From 67c855626a0f6e83f2724387b83c2c0440234a1f Mon Sep 17 00:00:00 2001 From: Yves Bastide Date: Mon, 31 Jul 2017 15:18:18 +0200 Subject: [PATCH 126/146] Add num_retries argument Signed-off-by: Yves Bastide --- bigquery/client.py | 56 +++++++++++++++++++++++++++------------------- 1 file changed, 33 insertions(+), 23 deletions(-) diff --git a/bigquery/client.py b/bigquery/client.py index 9af8dcb..847c9fb 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -55,7 +55,8 @@ def get_client(project_id=None, credentials=None, service_url=None, service_account=None, private_key=None, private_key_file=None, json_key=None, json_key_file=None, - readonly=True, swallow_results=True): + readonly=True, swallow_results=True, + num_retries=0): """Return a singleton instance of BigQueryClient. Either AssertionCredentials or a service account and private key combination need to be provided in order to authenticate requests to BigQuery. @@ -94,6 +95,9 @@ def get_client(project_id=None, credentials=None, swallow_results : bool If set to False, then return the actual response value instead of converting to boolean. Default True. + num_retries : int, optional + The number of times to retry the request. Default 0 (no retry). + Returns ------- @@ -147,7 +151,8 @@ def get_client(project_id=None, credentials=None, bq_service = _get_bq_service(credentials=credentials, service_url=service_url) - return BigQueryClient(bq_service, project_id, swallow_results) + return BigQueryClient(bq_service, project_id, swallow_results, + num_retries) def get_projects(bq_service): @@ -185,10 +190,12 @@ def _credentials(): class BigQueryClient(object): - def __init__(self, bq_service, project_id, swallow_results=True): + def __init__(self, bq_service, project_id, swallow_results=True, + num_retries=0): self.bigquery = bq_service self.project_id = project_id self.swallow_results = swallow_results + self.num_retries = num_retries self.cache = {} def _submit_query_job(self, query_data): @@ -226,7 +233,8 @@ def _submit_query_job(self, query_data): try: query_reply = job_collection.query( - projectId=self.project_id, body=query_data).execute() + projectId=self.project_id, body=query_data).execute( + num_retries=self.num_retries) except HttpError as e: if query_data.get("dryRun", False): return None, json.loads(e.content.decode('utf8')) @@ -276,7 +284,7 @@ def _insert_job(self, body_object): return job_collection.insert( projectId=self.project_id, body=body_object - ).execute() + ).execute(num_retries=self.num_retries) def query(self, query, max_results=None, timeout=0, dry_run=False, use_legacy_sql=None, external_udf_uris=None): """Submit a query to BigQuery. @@ -375,7 +383,7 @@ def get_table_schema(self, dataset, table): result = self.bigquery.tables().get( projectId=self.project_id, tableId=table, - datasetId=dataset).execute() + datasetId=dataset).execute(num_retries=self.num_retries) except HttpError as e: if int(e.resp['status']) == 404: logger.warn('Table %s.%s does not exist', dataset, table) @@ -481,7 +489,8 @@ def get_dataset(self, dataset_id): """ try: dataset = self.bigquery.datasets().get( - projectId=self.project_id, datasetId=dataset_id).execute() + projectId=self.project_id, datasetId=dataset_id).execute( + num_retries=self.num_retries) except HttpError: dataset = {} @@ -523,7 +532,7 @@ def get_table(self, dataset, table): try: table = self.bigquery.tables().get( projectId=self.project_id, datasetId=dataset, - tableId=table).execute() + tableId=table).execute(num_retries=self.num_retries) except HttpError: table = {} @@ -573,7 +582,7 @@ def create_table(self, dataset, table, schema, projectId=self.project_id, datasetId=dataset, body=body - ).execute() + ).execute(num_retries=self.num_retries) if self.swallow_results: return True else: @@ -621,7 +630,7 @@ def update_table(self, dataset, table, schema): tableId= table, datasetId=dataset, body=body - ).execute() + ).execute(num_retries=self.num_retries) if self.swallow_results: return True else: @@ -668,7 +677,7 @@ def patch_table(self, dataset, table, schema): projectId=self.project_id, datasetId=dataset, body=body - ).execute() + ).execute(num_retries=self.num_retries) if self.swallow_results: return True else: @@ -723,7 +732,7 @@ def create_view(self, dataset, view, query, use_legacy_sql=None): projectId=self.project_id, datasetId=dataset, body=body - ).execute() + ).execute(num_retries=self.num_retries) if self.swallow_results: return True else: @@ -759,7 +768,7 @@ def delete_table(self, dataset, table): projectId=self.project_id, datasetId=dataset, tableId=table - ).execute() + ).execute(num_retries=self.num_retries) if self.swallow_results: return True else: @@ -1212,7 +1221,7 @@ def wait_for_job(self, job, interval=5, timeout=60): sleep(interval) request = self.bigquery.jobs().get(projectId=self.project_id, jobId=job_id) - job_resource = request.execute() + job_resource = request.execute(num_retries=self.num_retries) self._raise_executing_exception_if_error(job_resource) complete = job_resource.get('status').get('state') == u'DONE' elapsed_time = time() - start_time @@ -1288,7 +1297,7 @@ def push_rows(self, dataset, table, rows, insert_id_key=None, datasetId=dataset, tableId=table, body=data - ).execute() + ).execute(num_retries=self.num_retries) if response.get('insertErrors'): logger.error('BigQuery insert errors: %s' % response) @@ -1382,7 +1391,7 @@ def _get_all_tables_for_dataset(self, dataset_id): """ result = self.bigquery.tables().list( projectId=self.project_id, - datasetId=dataset_id).execute() + datasetId=dataset_id).execute(num_retries=self.num_retries) page_token = result.get('nextPageToken') while page_token: @@ -1390,7 +1399,7 @@ def _get_all_tables_for_dataset(self, dataset_id): projectId=self.project_id, datasetId=dataset_id, pageToken=page_token - ).execute() + ).execute(num_retries=self.num_retries) page_token = res.get('nextPageToken') result['tables'] += res.get('tables', []) return result @@ -1553,7 +1562,7 @@ def get_query_results(self, job_id, offset=None, limit=None, startIndex=offset, maxResults=limit, pageToken=page_token, - timeoutMs=timeout * 1000).execute() + timeoutMs=timeout * 1000).execute(num_retries=self.num_retries) def _transform_row(self, row, schema): """Apply the given schema to the given BigQuery data row. @@ -1708,7 +1717,8 @@ def create_dataset(self, dataset_id, friendly_name=None, description=None, location=location) response = datasets.insert(projectId=self.project_id, - body=dataset_data).execute() + body=dataset_data).execute( + num_retries=self.num_retries) if self.swallow_results: return True else: @@ -1732,7 +1742,7 @@ def get_datasets(self): try: datasets = self.bigquery.datasets() request = datasets.list(projectId=self.project_id) - result = request.execute() + result = request.execute(num_retries=self.num_retries) return result.get('datasets', []) except HttpError as e: logger.error("Cannot list datasets: {0}".format(e)) @@ -1766,7 +1776,7 @@ def delete_dataset(self, dataset_id, delete_contents=False): request = datasets.delete(projectId=self.project_id, datasetId=dataset_id, deleteContents=delete_contents) - response = request.execute() + response = request.execute(num_retries=self.num_retries) if self.swallow_results: return True else: @@ -1810,7 +1820,7 @@ def update_dataset(self, dataset_id, friendly_name=None, description=None, request = datasets.update(projectId=self.project_id, datasetId=dataset_id, body=body) - response = request.execute() + response = request.execute(num_retries=self.num_retries) if self.swallow_results: return True else: @@ -1853,7 +1863,7 @@ def patch_dataset(self, dataset_id, friendly_name=None, description=None, description, access) request = datasets.patch(projectId=self.project_id, datasetId=dataset_id, body=body) - response = request.execute() + response = request.execute(num_retries=self.num_retries) if self.swallow_results: return True else: From 289ad25b2b415a3d43ff6b75f3b4fbf48ce61a75 Mon Sep 17 00:00:00 2001 From: Yves Bastide Date: Tue, 5 Sep 2017 15:45:31 +0200 Subject: [PATCH 127/146] Fix tests Add `num_retries=0` to `execute.assert_called_with()`. Signed-off-by: Yves Bastide --- bigquery/tests/test_client.py | 77 +++++++++++++++++++++-------------- 1 file changed, 47 insertions(+), 30 deletions(-) diff --git a/bigquery/tests/test_client.py b/bigquery/tests/test_client.py index 60bcc42..0bf5a18 100644 --- a/bigquery/tests/test_client.py +++ b/bigquery/tests/test_client.py @@ -292,7 +292,6 @@ def test_query(self): self.assertEquals(job_id, 'spiderman') self.assertEquals(results, []) - def test_query_max_results_set(self): """Ensure that we retrieve the job id from the query and the maxResults parameter is set. @@ -520,7 +519,7 @@ def test_get_response(self): projectId=self.project_id, jobId=job_id, startIndex=offset, maxResults=limit, pageToken=page_token, timeoutMs=1000) - mock_query_job.execute.assert_called_once_with() + mock_query_job.execute.assert_called_once_with(num_retries=0) self.assertEquals(actual, mock_query_reply) @@ -1485,7 +1484,8 @@ def test_table_exists(self): expected, self.client.get_table_schema(self.dataset, self.table)) self.mock_tables.get.assert_called_once_with( projectId=self.project, tableId=self.table, datasetId=self.dataset) - self.mock_tables.get.return_value.execute.assert_called_once_with() + self.mock_tables.get.return_value.execute. \ + assert_called_once_with(num_retries=0) def test_table_does_not_exist(self): """Ensure that None is returned if the table doesn't exist.""" @@ -1496,7 +1496,8 @@ def test_table_does_not_exist(self): self.client.get_table_schema(self.dataset, self.table)) self.mock_tables.get.assert_called_once_with( projectId=self.project, tableId=self.table, datasetId=self.dataset) - self.mock_tables.get.return_value.execute.assert_called_once_with() + self.mock_tables.get.return_value.execute. \ + assert_called_once_with(num_retries=0) @mock.patch('bigquery.client.BigQueryClient.get_query_results') @@ -1651,7 +1652,8 @@ def test_table_does_not_exist(self): self.mock_tables.get.assert_called_once_with( projectId=self.project, datasetId=self.dataset, tableId=self.table) - self.mock_tables.get.return_value.execute.assert_called_once_with() + self.mock_tables.get.return_value.execute. \ + assert_called_once_with(num_retries=0) def test_table_does_exist(self): """Ensure that if the table does exist, True is returned.""" @@ -1666,7 +1668,8 @@ def test_table_does_exist(self): self.mock_tables.get.assert_called_once_with( projectId=self.project, datasetId=self.dataset, tableId=self.table) - self.mock_tables.get.return_value.execute.assert_called_once_with() + self.mock_tables.get.return_value.execute. \ + assert_called_once_with(num_retries=0) class TestCreateTable(unittest.TestCase): @@ -1716,7 +1719,8 @@ def test_table_create_failed(self): self.mock_tables.insert.assert_called_with( projectId=self.project, datasetId=self.dataset, body=self.body) - self.mock_tables.insert.return_value.execute.assert_called_with() + self.mock_tables.insert.return_value.execute. \ + assert_called_with(num_retries=0) def test_table_create_success(self): """Ensure that if creating the table succeeds, True is returned, @@ -1742,7 +1746,8 @@ def test_table_create_success(self): self.mock_tables.insert.assert_called_with( projectId=self.project, datasetId=self.dataset, body=self.body) - self.mock_tables.insert.return_value.execute.assert_called_with() + self.mock_tables.insert.return_value.execute. \ + assert_called_with(num_retries=0) def test_table_create_body_with_expiration_time(self): """Ensure that if expiration_time has specified, @@ -1762,7 +1767,8 @@ def test_table_create_body_with_expiration_time(self): self.mock_tables.insert.assert_called_with( projectId=self.project, datasetId=self.dataset, body=body) - self.mock_tables.insert.return_value.execute.assert_called_with() + self.mock_tables.insert.return_value.execute. \ + assert_called_with(num_retries=0) def test_table_create_body_with_time_partitioning(self): """Ensure that if time_partitioning has specified, @@ -1783,7 +1789,8 @@ def test_table_create_body_with_time_partitioning(self): self.mock_tables.insert.assert_called_with( projectId=self.project, datasetId=self.dataset, body=body) - self.mock_tables.insert.return_value.execute.assert_called_with() + self.mock_tables.insert.return_value.execute. \ + assert_called_with(num_retries=0) class TestUpdateTable(unittest.TestCase): @@ -1830,9 +1837,11 @@ def test_table_update_failed(self): self.client.swallow_results = True self.mock_tables.update.assert_called_with( - projectId=self.project, tableId=self.table, datasetId=self.dataset, body=self.body) + projectId=self.project, tableId=self.table, datasetId=self.dataset, + body=self.body) - self.mock_tables.update.return_value.execute.assert_called_with() + self.mock_tables.update.return_value.execute. \ + assert_called_with(num_retries=0) def test_table_update_success(self): """Ensure that if updating the table succeeds, True is returned, @@ -1856,9 +1865,11 @@ def test_table_update_success(self): self.client.swallow_results = True self.mock_tables.update.assert_called_with( - projectId=self.project, tableId=self.table, datasetId=self.dataset, body=self.body) + projectId=self.project, tableId=self.table, datasetId=self.dataset, + body=self.body) - self.mock_tables.update.return_value.execute.assert_called_with() + self.mock_tables.update.return_value.execute. \ + assert_called_with(num_retries=0) class TestPatchTable(unittest.TestCase): @@ -1907,7 +1918,8 @@ def test_table_patch_failed(self): self.mock_tables.patch.assert_called_with( projectId=self.project, datasetId=self.dataset, body=self.body) - self.mock_tables.patch.return_value.execute.assert_called_with() + self.mock_tables.patch.return_value.execute. \ + assert_called_with(num_retries=0) def test_table_patch_success(self): """Ensure that if patching the table succeeds, True is returned, @@ -1933,7 +1945,8 @@ def test_table_patch_success(self): self.mock_tables.patch.assert_called_with( projectId=self.project, datasetId=self.dataset, body=self.body) - self.mock_tables.patch.return_value.execute.assert_called_with() + self.mock_tables.patch.return_value.execute. \ + assert_called_with(num_retries=0) class TestCreateView(unittest.TestCase): @@ -1978,7 +1991,8 @@ def test_view_create_failed(self): self.mock_tables.insert.assert_called_with( projectId=self.project, datasetId=self.dataset, body=self.body) - self.mock_tables.insert.return_value.execute.assert_called_with() + self.mock_tables.insert.return_value.execute. \ + assert_called_with(num_retries=0) def test_view_create_success(self): """Ensure that if creating the table succeeds, True is returned, @@ -2004,7 +2018,8 @@ def test_view_create_success(self): self.mock_tables.insert.assert_called_with( projectId=self.project, datasetId=self.dataset, body=self.body) - self.mock_tables.insert.return_value.execute.assert_called_with() + self.mock_tables.insert.return_value.execute. \ + assert_called_with(num_retries=0) class TestDeleteTable(unittest.TestCase): @@ -2040,7 +2055,8 @@ def test_delete_table_fail(self): self.mock_tables.delete.assert_called_with( projectId=self.project, datasetId=self.dataset, tableId=self.table) - self.mock_tables.delete.return_value.execute.assert_called_with() + self.mock_tables.delete.return_value.execute. \ + assert_called_with(num_retries=0) def test_delete_table_success(self): """Ensure that if deleting table succeeds, True is returned, @@ -2064,7 +2080,8 @@ def test_delete_table_success(self): self.mock_tables.delete.assert_called_with( projectId=self.project, datasetId=self.dataset, tableId=self.table) - self.mock_tables.delete.return_value.execute.assert_called_with() + self.mock_tables.delete.return_value.execute. \ + assert_called_with(num_retries=0) class TestParseTableListReponse(unittest.TestCase): @@ -2200,7 +2217,7 @@ def test_push_failed(self): projectId=self.project, datasetId=self.dataset, tableId=self.table, body=self.data) - execute_calls = [mock.call()] + execute_calls = [mock.call(num_retries=0)] self.mock_table_data.insertAll.return_value.execute.assert_has_calls( execute_calls) @@ -2254,7 +2271,7 @@ def test_push_exception(self): projectId=self.project, datasetId=self.dataset, tableId=self.table, body=self.data) - execute_calls = [mock.call()] + execute_calls = [mock.call(num_retries=0)] self.mock_table_data.insertAll.return_value.execute.assert_has_calls( execute_calls) @@ -2286,7 +2303,7 @@ def test_push_success(self): projectId=self.project, datasetId=self.dataset, tableId=self.table, body=self.data) - execute_calls = [mock.call()] + execute_calls = [mock.call(num_retries=0)] self.mock_table_data.insertAll.return_value.execute.assert_has_calls( execute_calls) @@ -2604,7 +2621,7 @@ def test_dataset_create_failed(self): projectId=self.project, body=self.body) self.mock_datasets.insert.return_value.execute. \ - assert_called_with() + assert_called_with(num_retries=0) def test_dataset_create_success(self): """Ensure that if creating the table fails, False is returned.""" @@ -2633,7 +2650,7 @@ def test_dataset_create_success(self): projectId=self.project, body=self.body) self.mock_datasets.insert.return_value.execute. \ - assert_called_with() + assert_called_with(num_retries=0) class TestDeleteDataset(unittest.TestCase): @@ -2669,7 +2686,7 @@ def test_delete_datasets_fail(self): self.client.swallow_results = True self.mock_datasets.delete.return_value.execute. \ - assert_called_with() + assert_called_with(num_retries=0) def test_delete_datasets_success(self): """Ensure that if deleting table succeeds, True is returned.""" @@ -2694,7 +2711,7 @@ def test_delete_datasets_success(self): deleteContents=False) self.mock_datasets.delete.return_value.execute. \ - assert_called_with() + assert_called_with(num_retries=0) def test_delete_datasets_delete_contents_success(self): """Ensure that if deleting table succeeds, True is returned.""" @@ -2719,7 +2736,7 @@ def test_delete_datasets_delete_contents_success(self): deleteContents=True) self.mock_datasets.delete.return_value.execute. \ - assert_called_with() + assert_called_with(num_retries=0) FULL_DATASET_LIST_RESPONSE = { @@ -2879,7 +2896,7 @@ def test_dataset_update_failed(self): projectId=self.project, datasetId=self.dataset, body=self.body) self.mock_datasets.update.return_value.execute. \ - assert_called_with() + assert_called_with(num_retries=0) def test_dataset_update_success(self): """Ensure that if creating the table fails, False is returned.""" @@ -2908,4 +2925,4 @@ def test_dataset_update_success(self): projectId=self.project, datasetId=self.dataset, body=self.body) self.mock_datasets.update.return_value.execute. \ - assert_called_with() + assert_called_with(num_retries=0) From 188ef7d5c3676d104fa996f2b1729b46c14d2694 Mon Sep 17 00:00:00 2001 From: Yves Bastide Date: Tue, 5 Sep 2017 15:45:47 +0200 Subject: [PATCH 128/146] Add tests with num_retries Maybe too many?.. Signed-off-by: Yves Bastide --- bigquery/tests/test_client.py | 215 ++++++++++++++++++++++++++++++++++ 1 file changed, 215 insertions(+) diff --git a/bigquery/tests/test_client.py b/bigquery/tests/test_client.py index 0bf5a18..a5e8161 100644 --- a/bigquery/tests/test_client.py +++ b/bigquery/tests/test_client.py @@ -2926,3 +2926,218 @@ def test_dataset_update_success(self): self.mock_datasets.update.return_value.execute. \ assert_called_with(num_retries=0) + + +class TestNumRetries(unittest.TestCase): + + def setUp(self): + client._bq_client = None + + self.mock_bq_service = mock.Mock() + self.mock_tables = mock.Mock() + self.mock_job_collection = mock.Mock() + self.mock_datasets = mock.Mock() + self.mock_table_data = mock.Mock() + self.mock_bq_service.tables.return_value = self.mock_tables + self.mock_bq_service.jobs.return_value = self.mock_job_collection + self.mock_bq_service.datasets.return_value = self.mock_datasets + self.mock_bq_service.tabledata.return_value = self.mock_table_data + + self.project_id = 'project' + self.num_retries = 5 + self.client = client.BigQueryClient(self.mock_bq_service, + self.project_id, + num_retries=self.num_retries) + self.dataset = 'dataset' + self.project = 'project' + self.table = 'table' + self.schema = [ + {'name': 'foo', 'type': 'STRING', 'mode': 'nullable'}, + {'name': 'bar', 'type': 'FLOAT', 'mode': 'nullable'} + ] + self.friendly_name = "friendly name" + self.description = "description" + self.access = [{'userByEmail': "bob@gmail.com"}] + self.query = 'SELECT "bar" foo, "foo" bar' + self.rows = [ + {'one': 'uno', 'two': 'dos'}, {'one': 'ein', 'two': 'zwei'}, + {'two': 'kiwi'}] + self.data = { + "kind": "bigquery#tableDataInsertAllRequest", + "rows": [{'insertId': "uno", 'json': {'one': 'uno', 'two': 'dos'}}, + {'insertId': "ein", 'json': + {'one': 'ein', 'two': 'zwei'}}, + {'json': {'two': 'kiwi'}}] + } + + def test_get_response(self): + job_id = 'bar' + + mock_query_job = mock.Mock() + mock_query_reply = mock.Mock() + mock_query_job.execute.return_value = mock_query_reply + self.mock_job_collection.getQueryResults.return_value = mock_query_job + + offset = 5 + limit = 10 + page_token = "token" + timeout = 1 + + self.client.get_query_results(job_id, offset, limit, + page_token, timeout) + + mock_query_job.execute. \ + assert_called_once_with(num_retries=self.num_retries) + + def test_table_exists(self): + expected = [ + {'type': 'FLOAT', 'name': 'foo', 'mode': 'NULLABLE'}, + {'type': 'INTEGER', 'name': 'bar', 'mode': 'NULLABLE'}, + {'type': 'INTEGER', 'name': 'baz', 'mode': 'NULLABLE'}, + ] + + self.mock_tables.get.return_value.execute.return_value = \ + {'schema': {'fields': expected}} + + self.client.get_table_schema(self.dataset, self.table) + self.mock_tables.get.return_value.execute. \ + assert_called_once_with(num_retries=self.num_retries) + + def test_table_create(self): + self.mock_tables.insert.return_value.execute.side_effect = [{ + 'status': 'foo'}, {'status': 'bar'}] + + self.client.create_table(self.dataset, self.table, + self.schema) + + self.mock_tables.insert.return_value.execute. \ + assert_called_with(num_retries=self.num_retries) + + def test_table_update(self): + self.mock_tables.update.return_value.execute.side_effect = [{ + 'status': 'foo'}, {'status': 'bar'}] + + self.client.update_table(self.dataset, self.table, + self.schema) + + self.mock_tables.update.return_value.execute. \ + assert_called_with(num_retries=self.num_retries) + + def test_table_patch(self): + self.mock_tables.patch.return_value.execute.side_effect = [{ + 'status': 'foo'}, {'status': 'bar'}] + + self.client.patch_table(self.dataset, self.table, + self.schema) + + self.mock_tables.patch.return_value.execute. \ + assert_called_with(num_retries=self.num_retries) + + def test_view_create(self): + body = { + 'view': {'query': self.query}, + 'tableReference': { + 'tableId': self.table, 'projectId': self.project, + 'datasetId': self.dataset + } + } + + self.mock_tables.insert.return_value.execute.side_effect = [{ + 'status': 'foo'}, {'status': 'bar'}] + + actual = self.client.create_view(self.dataset, self.table, + self.query) + + self.assertTrue(actual) + + self.mock_tables.insert.assert_called_with( + projectId=self.project, datasetId=self.dataset, body=body) + + self.mock_tables.insert.return_value.execute. \ + assert_called_with(num_retries=self.num_retries) + + def test_delete_table(self): + self.mock_tables.delete.return_value.execute.side_effect = [{ + 'status': 'foo'}, {'status': 'bar'}] + + actual = self.client.delete_table(self.dataset, self.table) + + self.assertTrue(actual) + + self.mock_tables.delete.assert_called_with( + projectId=self.project, datasetId=self.dataset, tableId=self.table) + + self.mock_tables.delete.return_value.execute. \ + assert_called_with(num_retries=self.num_retries) + + def test_push(self): + self.mock_table_data.insertAll.return_value.execute.return_value = { + 'status': 'foo'} + + actual = self.client.push_rows(self.dataset, self.table, self.rows, + 'one') + + self.assertTrue(actual) + + self.mock_bq_service.tabledata.assert_called_with() + + self.mock_table_data.insertAll.assert_called_with( + projectId=self.project, datasetId=self.dataset, tableId=self.table, + body=self.data) + + execute_calls = [mock.call(num_retries=self.num_retries)] + self.mock_table_data.insertAll.return_value.execute.assert_has_calls( + execute_calls) + + def test_dataset_create(self): + body = { + 'datasetReference': { + 'datasetId': self.dataset, + 'projectId': self.project}, + 'friendlyName': self.friendly_name, + 'description': self.description, + 'access': self.access + } + + self.mock_datasets.insert.return_value.execute.side_effect = [{ + 'status': 'foo'}, {'status': 'bar'}] + + actual = self.client.create_dataset(self.dataset, + self.friendly_name, + self.description, + self.access) + self.assertTrue(actual) + + self.mock_datasets.insert.assert_called_with( + projectId=self.project, body=body) + + self.mock_datasets.insert.return_value.execute. \ + assert_called_with(num_retries=self.num_retries) + + def test_delete_datasets(self): + self.mock_datasets.delete.return_value.execute.side_effect = [{ + 'status': 'foo'}, {'status': 'bar'}] + + actual = self.client.delete_dataset(self.dataset) + + self.assertTrue(actual) + + self.mock_datasets.delete.assert_called_with( + projectId=self.project, datasetId=self.dataset, + deleteContents=False) + + self.mock_datasets.delete.return_value.execute. \ + assert_called_with(num_retries=self.num_retries) + + def test_dataset_update(self): + self.mock_datasets.update.return_value.execute.side_effect = [{ + 'status': 'foo'}, {'status': 'bar'}] + + actual = self.client.update_dataset(self.dataset, + self.friendly_name, + self.description, + self.access) + self.assertTrue(actual) + + self.mock_datasets.update.return_value.execute. \ + assert_called_with(num_retries=self.num_retries) From 4a569bd3b8ede0297b3973b15e550c998b03f3e9 Mon Sep 17 00:00:00 2001 From: Tyler Treat Date: Wed, 20 Sep 2017 10:23:36 -0500 Subject: [PATCH 129/146] Try to fix travis --- .travis.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 9f422c6..1e1c28c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,4 +1,7 @@ language: python +before_install: + - sudo apt-get update -q + - sudo apt-get install pypy -y install: - python setup.py develop - pip install tox @@ -7,7 +10,6 @@ notifications: email: false env: - TOXENV=py27 - - TOXENV=py33 - TOXENV=py34 - TOXENV=nightly - TOXENV=pypy From d308391cf9508f1568fb9647fd711e436a2978f4 Mon Sep 17 00:00:00 2001 From: Tyler Treat Date: Mon, 2 Oct 2017 16:25:19 -0500 Subject: [PATCH 130/146] Bump version to 1.13.0 --- bigquery/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigquery/version.py b/bigquery/version.py index 666b2f7..84c54b7 100644 --- a/bigquery/version.py +++ b/bigquery/version.py @@ -1 +1 @@ -__version__ = '1.12.0' +__version__ = '1.13.0' From 40de946cf1af7d6317666db28a5740aad42c39ea Mon Sep 17 00:00:00 2001 From: Alireza Date: Tue, 30 Jan 2018 15:09:06 +0100 Subject: [PATCH 131/146] feat: Support `IS NULL`\`IS NOT NULL` condition --- bigquery/query_builder.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bigquery/query_builder.py b/bigquery/query_builder.py index b29d0cd..435bb73 100644 --- a/bigquery/query_builder.py +++ b/bigquery/query_builder.py @@ -241,6 +241,8 @@ def _render_condition(field, field_type, comparators): else: value = _render_condition_value(value, field_type) value = "(" + value + ")" + elif condition == "IS NULL" or condition == "IS NOT NULL": + return field + " " + condition elif condition == "BETWEEN": if isinstance(value, (tuple, list, set)) and len(value) == 2: value = ' AND '.join( From 2ce1b8d9deb4a35c8d2759ef38ba27859a1d3ce5 Mon Sep 17 00:00:00 2001 From: Tuan Vu Date: Wed, 14 Mar 2018 16:48:22 -0700 Subject: [PATCH 132/146] support a different project_id to run job This supports authenticate to 1 project_id but run jobs in a different project_id. --- bigquery/client.py | 271 ++++++++++++++++++++++++++++++++------------- 1 file changed, 195 insertions(+), 76 deletions(-) diff --git a/bigquery/client.py b/bigquery/client.py index 847c9fb..0c6377e 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -198,6 +198,26 @@ def __init__(self, bq_service, project_id, swallow_results=True, self.num_retries = num_retries self.cache = {} + def _get_project_id(self, project_id=None): + """ Get new project_id + + Default is self.project_id, which is the project client authenticate to. + A new project_id is specified when client wants to authenticate to 1 project, + but run jobs in a different project. + + Parameters + ---------- + project_id : str + BigQuery project_id + + Returns + ------- + project_id: BigQuery project_id + """ + if project_id is None: + project_id = self.project_id + return project_id + def _submit_query_job(self, query_data): """ Submit a query job to BigQuery. @@ -253,6 +273,27 @@ def _submit_query_job(self, query_data): return job_id, [self._transform_row(row, schema) for row in rows] + def _get_job_reference(self, job_id): + """ Get job reference from job_id + For more details, see: + https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#resource + + Parameters + ---------- + job_id: + Id of the job + + Returns + ------- + job_reference: json of job_reference + """ + job_reference = { + "projectId": self.project_id, + "jobId": job_id + } + + return job_reference + def _insert_job(self, body_object): """ Submit a job to BigQuery @@ -362,7 +403,7 @@ def get_query_schema(self, job_id): return query_reply['schema']['fields'] - def get_table_schema(self, dataset, table): + def get_table_schema(self, dataset, table, project_id=None): """Return the table schema. Parameters @@ -371,6 +412,8 @@ def get_table_schema(self, dataset, table): The dataset containing the `table`. table : str The table to get the schema for + project_id: str, optional + The project of the dataset. Returns ------- @@ -380,8 +423,9 @@ def get_table_schema(self, dataset, table): """ try: + project_id = self._get_project_id(project_id) result = self.bigquery.tables().get( - projectId=self.project_id, + projectId=project_id, tableId=table, datasetId=dataset).execute(num_retries=self.num_retries) except HttpError as e: @@ -458,29 +502,33 @@ def get_query_rows(self, job_id, offset=None, limit=None, timeout=0): records += [self._transform_row(row, schema) for row in rows] return records[:limit] if limit else records - def check_dataset(self, dataset_id): + def check_dataset(self, dataset_id, project_id=None): """Check to see if a dataset exists. Parameters ---------- dataset_id : str Dataset unique id + project_id: str, optional + The project the dataset is in Returns ------- bool True if dataset at `dataset_id` exists, else Fasle - """ - dataset = self.get_dataset(dataset_id) + """ + dataset = self.get_dataset(dataset_id, project_id) return bool(dataset) - def get_dataset(self, dataset_id): + def get_dataset(self, dataset_id, project_id=None): """Retrieve a dataset if it exists, otherwise return an empty dict. Parameters ---------- dataset_id : str Dataset unique id + project_id: str, optional + The project the dataset is in Returns ------- @@ -488,15 +536,16 @@ def get_dataset(self, dataset_id): Contains dataset object if it exists, else empty """ try: + project_id = self._get_project_id(project_id) dataset = self.bigquery.datasets().get( - projectId=self.project_id, datasetId=dataset_id).execute( + projectId=project_id, datasetId=dataset_id).execute( num_retries=self.num_retries) except HttpError: dataset = {} return dataset - def check_table(self, dataset, table): + def check_table(self, dataset, table, project_id=None): """Check to see if a table exists. Parameters @@ -505,16 +554,18 @@ def check_table(self, dataset, table): The dataset to check table : str The name of the table + project_id: str, optional + The project the table is in Returns ------- bool True if table exists, else False """ - table = self.get_table(dataset, table) + table = self.get_table(dataset, table, project_id) return bool(table) - def get_table(self, dataset, table): + def get_table(self, dataset, table, project_id=None): """ Retrieve a table if it exists, otherwise return an empty dict. Parameters @@ -523,6 +574,8 @@ def get_table(self, dataset, table): The dataset that the table is in table : str The name of the table + project_id: str, optional + The project that the table is in Returns ------- @@ -530,15 +583,16 @@ def get_table(self, dataset, table): Containing the table object if it exists, else empty """ try: + project_id = self._get_project_id(project_id) table = self.bigquery.tables().get( - projectId=self.project_id, datasetId=dataset, + projectId=project_id, datasetId=dataset, tableId=table).execute(num_retries=self.num_retries) except HttpError: table = {} return table - def create_table(self, dataset, table, schema, + def create_table(self, dataset, table, schema, project_id=None, expiration_time=None, time_partitioning=False): """Create a new table in the dataset. @@ -550,6 +604,8 @@ def create_table(self, dataset, table, schema, The name of the table to create schema : dict The table schema + project_id: str, optional + The project to create the table in expiration_time : int or double, optional The expiry time in milliseconds since the epoch. time_partitioning : bool, optional @@ -561,12 +617,13 @@ def create_table(self, dataset, table, schema, If the table was successfully created, or response from BigQuery if swallow_results is set to False """ + project_id = self._get_project_id(project_id) body = { 'schema': {'fields': schema}, 'tableReference': { 'tableId': table, - 'projectId': self.project_id, + 'projectId': project_id, 'datasetId': dataset } } @@ -579,7 +636,7 @@ def create_table(self, dataset, table, schema, try: table = self.bigquery.tables().insert( - projectId=self.project_id, + projectId=project_id, datasetId=dataset, body=body ).execute(num_retries=self.num_retries) @@ -589,14 +646,14 @@ def create_table(self, dataset, table, schema, return table except HttpError as e: - logger.error(('Cannot create table {0}.{1}\n' - 'Http Error: {2}').format(dataset, table, e.content)) + logger.error(('Cannot create table {0}.{1}.{2}\n' + 'Http Error: {3}').format(project_id, dataset, table, e.content)) if self.swallow_results: return False else: return {} - def update_table(self, dataset, table, schema): + def update_table(self, dataset, table, schema, project_id=None): """Update an existing table in the dataset. Parameters @@ -607,6 +664,8 @@ def update_table(self, dataset, table, schema): The name of the table to update schema : dict Table schema + project_id: str, optional + The project to update the table in Returns ------- @@ -614,19 +673,20 @@ def update_table(self, dataset, table, schema): bool indicating if the table was successfully updated or not, or response from BigQuery if swallow_results is set to False. """ + project_id = self._get_project_id(project_id) body = { 'schema': {'fields': schema}, 'tableReference': { 'tableId': table, - 'projectId': self.project_id, + 'projectId': project_id, 'datasetId': dataset } } try: result = self.bigquery.tables().update( - projectId=self.project_id, + projectId=project_id, tableId= table, datasetId=dataset, body=body @@ -637,14 +697,14 @@ def update_table(self, dataset, table, schema): return result except HttpError as e: - logger.error(('Cannot update table {0}.{1}\n' - 'Http Error: {2}').format(dataset, table, e.content)) + logger.error(('Cannot update table {0}.{1}.{2}\n' + 'Http Error: {3}').format(project_id, dataset, table, e.content)) if self.swallow_results: return False else: return {} - def patch_table(self, dataset, table, schema): + def patch_table(self, dataset, table, schema, project_id=None): """Patch an existing table in the dataset. Parameters @@ -655,6 +715,8 @@ def patch_table(self, dataset, table, schema): The name of the table to patch schema : dict The table schema + project_id: str, optional + The project to patch the table in Returns ------- @@ -662,19 +724,20 @@ def patch_table(self, dataset, table, schema): Bool indicating if the table was successfully patched or not, or response from BigQuery if swallow_results is set to False """ + project_id = self._get_project_id(project_id) body = { 'schema': {'fields': schema}, 'tableReference': { 'tableId': table, - 'projectId': self.project_id, + 'projectId': project_id, 'datasetId': dataset } } try: result = self.bigquery.tables().patch( - projectId=self.project_id, + projectId=project_id, datasetId=dataset, body=body ).execute(num_retries=self.num_retries) @@ -684,14 +747,14 @@ def patch_table(self, dataset, table, schema): return result except HttpError as e: - logger.error(('Cannot patch table {0}.{1}\n' - 'Http Error: {2}').format(dataset, table, e.content)) + logger.error(('Cannot patch table {0}.{1}.{2}\n' + 'Http Error: {3}').format(project_id, dataset, table, e.content)) if self.swallow_results: return False else: return {} - def create_view(self, dataset, view, query, use_legacy_sql=None): + def create_view(self, dataset, view, query, project_id=None, use_legacy_sql=None): """Create a new view in the dataset. Parameters @@ -702,6 +765,8 @@ def create_view(self, dataset, view, query, use_legacy_sql=None): The name of the view to create query : dict A query that BigQuery executes when the view is referenced. + project_id: str, optional + The project to create the view in use_legacy_sql : bool, optional If False, the query will use BigQuery's standard SQL (https://cloud.google.com/bigquery/sql-reference/) @@ -712,11 +777,12 @@ def create_view(self, dataset, view, query, use_legacy_sql=None): bool indicating if the view was successfully created or not, or response from BigQuery if swallow_results is set to False. """ + project_id = self._get_project_id(project_id) body = { 'tableReference': { 'tableId': view, - 'projectId': self.project_id, + 'projectId': project_id, 'datasetId': dataset }, 'view': { @@ -729,7 +795,7 @@ def create_view(self, dataset, view, query, use_legacy_sql=None): try: view = self.bigquery.tables().insert( - projectId=self.project_id, + projectId=project_id, datasetId=dataset, body=body ).execute(num_retries=self.num_retries) @@ -746,7 +812,7 @@ def create_view(self, dataset, view, query, use_legacy_sql=None): else: return {} - def delete_table(self, dataset, table): + def delete_table(self, dataset, table, project_id=None): """Delete a table from the dataset. Parameters @@ -755,6 +821,8 @@ def delete_table(self, dataset, table): The dataset to delete the table from. table : str The name of the table to delete + project_id: str, optional + String id of the project Returns ------- @@ -764,8 +832,9 @@ def delete_table(self, dataset, table): """ try: + project_id = self._get_project_id(project_id) response = self.bigquery.tables().delete( - projectId=self.project_id, + projectId=project_id, datasetId=dataset, tableId=table ).execute(num_retries=self.num_retries) @@ -782,7 +851,7 @@ def delete_table(self, dataset, table): else: return {} - def get_tables(self, dataset_id, app_id, start_time, end_time): + def get_tables(self, dataset_id, app_id, start_time, end_time, project_id=None): """Retrieve a list of tables that are related to the given app id and are inside the range of start and end times. @@ -796,6 +865,8 @@ def get_tables(self, dataset_id, app_id, start_time, end_time): The datetime or unix time after which records will be fetched. end_time : Union[datetime, int] The datetime or unix time up to which records will be fetched. + project_id: str, optional + String id of the project Returns ------- @@ -809,7 +880,7 @@ def get_tables(self, dataset_id, app_id, start_time, end_time): if isinstance(end_time, datetime): end_time = calendar.timegm(end_time.utctimetuple()) - every_table = self._get_all_tables(dataset_id) + every_table = self._get_all_tables(dataset_id, project_id) app_tables = every_table.get(app_id, {}) return self._filter_tables_by_time(app_tables, start_time, end_time) @@ -820,6 +891,7 @@ def import_data_from_uris( dataset, table, schema=None, + project_id=None, job=None, source_format=None, create_disposition=None, @@ -848,11 +920,13 @@ def import_data_from_uris( String id of the dataset table : str String id of the table - job : str, optional - Identifies the job (a unique job id is automatically generated if - not provided) schema : list, optional Represents the BigQuery schema + project_id: str, optional + String id of the project + job : str, optional + Identifies the job (a unique job id is automatically generated if + not provided) source_format : str, optional One of the JOB_SOURCE_FORMAT_* constants create_disposition : str, optional @@ -889,9 +963,11 @@ def import_data_from_uris( source_uris = source_uris if isinstance(source_uris, list) \ else [source_uris] + project_id = self._get_project_id(project_id) + configuration = { "destinationTable": { - "projectId": self.project_id, + "projectId": project_id, "tableId": table, "datasetId": dataset }, @@ -963,10 +1039,7 @@ def import_data_from_uris( "configuration": { 'load': configuration }, - "jobReference": { - "projectId": self.project_id, - "jobId": job - } + "jobReference": self._get_job_reference(job) } logger.debug("Creating load job %s" % body) @@ -979,6 +1052,7 @@ def export_data_to_uris( destination_uris, dataset, table, + project_id=None, job=None, compression=None, destination_format=None, @@ -999,6 +1073,8 @@ def export_data_to_uris( String id of the dataset table : str String id of the table + project_id: str, optional + String id of the project job : str, optional String identifying the job (a unique jobid is automatically generated if not provided) @@ -1024,9 +1100,11 @@ def export_data_to_uris( destination_uris = destination_uris \ if isinstance(destination_uris, list) else [destination_uris] + project_id = self._get_project_id(project_id) + configuration = { "sourceTable": { - "projectId": self.project_id, + "projectId": project_id, "tableId": table, "datasetId": dataset }, @@ -1057,10 +1135,7 @@ def export_data_to_uris( "configuration": { 'extract': configuration }, - "jobReference": { - "projectId": self.project_id, - "jobId": job - } + "jobReference": self._get_job_reference(job_id) } logger.info("Creating export job %s" % body) @@ -1073,6 +1148,7 @@ def write_to_table( query, dataset=None, table=None, + project_id=None, external_udf_uris=None, allow_large_results=None, use_query_cache=None, @@ -1097,6 +1173,8 @@ def write_to_table( String id of the dataset table : str, optional String id of the table + project_id: str, optional + String id of the project external_udf_uris : list, optional Contains external UDF URIs. If given, URIs must be Google Cloud Storage and have .js extensions. @@ -1138,9 +1216,11 @@ def write_to_table( "query": query, } + project_id = self._get_project_id(project_id) + if dataset and table: configuration['destinationTable'] = { - "projectId": self.project_id, + "projectId": project_id, "tableId": table, "datasetId": dataset } @@ -1233,7 +1313,7 @@ def wait_for_job(self, job, interval=5, timeout=60): return job_resource - def push_rows(self, dataset, table, rows, insert_id_key=None, + def push_rows(self, dataset, table, rows, project_id=None, insert_id_key=None, skip_invalid_rows=None, ignore_unknown_values=None, template_suffix=None): """Upload rows to BigQuery table. @@ -1244,6 +1324,8 @@ def push_rows(self, dataset, table, rows, insert_id_key=None, The dataset to upload to table : str The name of the table to insert rows into + project_id: str, optional + The project to upload to rows : list A ``list`` of rows (``dict`` objects) to add to the table insert_id_key : str, optional @@ -1292,8 +1374,9 @@ def push_rows(self, dataset, table, rows, insert_id_key=None, data['templateSuffix'] = template_suffix try: + project_id = self._get_project_id(project_id) response = table_data.insertAll( - projectId=self.project_id, + projectId=project_id, datasetId=dataset, tableId=table, body=data @@ -1325,19 +1408,21 @@ def push_rows(self, dataset, table, rows, insert_id_key=None, }] } - def get_all_tables(self, dataset_id): + def get_all_tables(self, dataset_id, project_id=None): """Retrieve a list of tables for the dataset. Parameters ---------- dataset_id : str The dataset to retrieve table data for. + project_id: str + Unique ``str`` identifying the BigQuery project contains the dataset Returns ------- A ``list`` with all table names """ - tables_data = self._get_all_tables_for_dataset(dataset_id) + tables_data = self._get_all_tables_for_dataset(dataset_id, project_id) tables = [] for table in tables_data.get('tables', []): @@ -1346,7 +1431,7 @@ def get_all_tables(self, dataset_id): tables.append(table_name) return tables - def _get_all_tables(self, dataset_id, cache=False): + def _get_all_tables(self, dataset_id, project_id=None, cache=False): """Retrieve the list of tables for dataset, that respect the formats: * appid_YYYY_MM * YYYY_MM_appid @@ -1355,6 +1440,8 @@ def _get_all_tables(self, dataset_id, cache=False): ---------- dataset_id : str The dataset to retrieve table names for + project_id: str + Unique ``str`` identifying the BigQuery project contains the dataset cache : bool, optional To use cached value or not (default False). Timeout value equals CACHE_TIMEOUT. @@ -1371,24 +1458,28 @@ def _get_all_tables(self, dataset_id, cache=False): do_fetch = False if do_fetch: - result = self._get_all_tables_for_dataset(dataset_id) + result = self._get_all_tables_for_dataset(dataset_id, project_id) self.cache[dataset_id] = (datetime.now(), result) return self._parse_table_list_response(result) - def _get_all_tables_for_dataset(self, dataset_id): + def _get_all_tables_for_dataset(self, dataset_id, project_id=None): """Retrieve a list of all tables for the dataset. Parameters ---------- dataset_id : str The dataset to retrieve table names for + project_id: str + Unique ``str`` identifying the BigQuery project contains the dataset Returns ------- dict A ``dict`` containing tables key with all tables """ + project_id = self._get_project_id(project_id) + result = self.bigquery.tables().list( projectId=self.project_id, datasetId=dataset_id).execute(num_retries=self.num_retries) @@ -1682,7 +1773,7 @@ def _raise_executing_exception_if_error(self, job): # # DataSet manipulation methods # - def create_dataset(self, dataset_id, friendly_name=None, description=None, + def create_dataset(self, dataset_id, project_id=None, friendly_name=None, description=None, access=None, location=None): """Create a new BigQuery dataset. @@ -1691,6 +1782,8 @@ def create_dataset(self, dataset_id, friendly_name=None, description=None, dataset_id : str Unique ``str`` identifying the dataset with the project (the referenceID of the dataset, not the integer id of the dataset) + project_id: str + Unique ``str`` identifying the BigQuery project contains the dataset friendly_name: str, optional A human readable name description: str, optional @@ -1708,15 +1801,19 @@ def create_dataset(self, dataset_id, friendly_name=None, description=None, ``bool`` indicating if dataset was created or not, or response from BigQuery if swallow_results is set for False """ - try: + project_id = self._get_project_id(project_id) + + try: datasets = self.bigquery.datasets() - dataset_data = self.dataset_resource(dataset_id, + dataset_data = self.dataset_resource(dataset_id, + project_id=project_id, friendly_name=friendly_name, description=description, access=access, - location=location) + location=location + ) - response = datasets.insert(projectId=self.project_id, + response = datasets.insert(projectId=project_id, body=dataset_data).execute( num_retries=self.num_retries) if self.swallow_results: @@ -1731,31 +1828,40 @@ def create_dataset(self, dataset_id, friendly_name=None, description=None, else: return {} - def get_datasets(self): + def get_datasets(self, project_id=None): """List all datasets in the project. + + Parameters + ---------- + project_id: str + Unique ``str`` identifying the BigQuery project contains the dataset Returns ------- list Dataset resources """ - try: + project_id = self._get_project_id(project_id) + + try: datasets = self.bigquery.datasets() - request = datasets.list(projectId=self.project_id) + request = datasets.list(projectId=project_id) result = request.execute(num_retries=self.num_retries) return result.get('datasets', []) except HttpError as e: logger.error("Cannot list datasets: {0}".format(e)) return None - def delete_dataset(self, dataset_id, delete_contents=False): + def delete_dataset(self, dataset_id, project_id=None, delete_contents=False): """Delete a BigQuery dataset. Parameters ---------- dataset_id : str - Unique ``str`` identifying the datset with the project (the + Unique ``str`` identifying the dataset with the project (the referenceId of the dataset) + project_id: str + Unique ``str`` identifying the BigQuery project contains the dataset delete_contents : bool, optional If True, forces the deletion of the dataset even when the dataset contains data (Default = False) @@ -1771,9 +1877,11 @@ def delete_dataset(self, dataset_id, delete_contents=False): HttpError 404 when dataset with dataset_id does not exist """ - try: + project_id = self._get_project_id(project_id) + + try: datasets = self.bigquery.datasets() - request = datasets.delete(projectId=self.project_id, + request = datasets.delete(projectId=project_id, datasetId=dataset_id, deleteContents=delete_contents) response = request.execute(num_retries=self.num_retries) @@ -1789,7 +1897,7 @@ def delete_dataset(self, dataset_id, delete_contents=False): else: return {} - def update_dataset(self, dataset_id, friendly_name=None, description=None, + def update_dataset(self, dataset_id, project_id=None, friendly_name=None, description=None, access=None): """Updates information in an existing dataset. The update method replaces the entire dataset resource, whereas the patch method only @@ -1800,6 +1908,8 @@ def update_dataset(self, dataset_id, friendly_name=None, description=None, dataset_id : str Unique ``str`` identifying the dataset with the project (the referencedId of the dataset) + project_id: str + Unique ``str`` identifying the BigQuery project contains the dataset friendly_name : str, optional An optional descriptive name for the dataset. description : str, optional @@ -1813,11 +1923,13 @@ def update_dataset(self, dataset_id, friendly_name=None, description=None, ``bool`` indicating if the update was successful or not, or response from BigQuery if swallow_results is set for False. """ - try: + project_id = self._get_project_id(project_id) + + try: datasets = self.bigquery.datasets() - body = self.dataset_resource(dataset_id, friendly_name, + body = self.dataset_resource(dataset_id, project_id, friendly_name, description, access) - request = datasets.update(projectId=self.project_id, + request = datasets.update(projectId=project_id, datasetId=dataset_id, body=body) response = request.execute(num_retries=self.num_retries) @@ -1833,7 +1945,7 @@ def update_dataset(self, dataset_id, friendly_name=None, description=None, else: return {} - def patch_dataset(self, dataset_id, friendly_name=None, description=None, + def patch_dataset(self, dataset_id, project_id=None, friendly_name=None, description=None, access=None): """Updates information in an existing dataset. The update method replaces the entire dataset resource, whereas the patch method only @@ -1844,6 +1956,8 @@ def patch_dataset(self, dataset_id, friendly_name=None, description=None, dataset_id : str Unique string idenfitying the dataset with the project (the referenceId of the dataset) + project_id: str + Unique ``str`` identifying the BigQuery project contains the dataset friendly_name : str, optional An optional descriptive name for the dataset. description : str, optional @@ -1857,11 +1971,13 @@ def patch_dataset(self, dataset_id, friendly_name=None, description=None, ``bool`` indicating if the patch was successful or not, or response from BigQuery if swallow_results is set for False. """ - try: + project_id = self._get_project_id(project_id) + + try: datasets = self.bigquery.datasets() - body = self.dataset_resource(dataset_id, friendly_name, + body = self.dataset_resource(dataset_id, project_id, friendly_name, description, access) - request = datasets.patch(projectId=self.project_id, + request = datasets.patch(projectId=project_id, datasetId=dataset_id, body=body) response = request.execute(num_retries=self.num_retries) if self.swallow_results: @@ -1875,7 +1991,7 @@ def patch_dataset(self, dataset_id, friendly_name=None, description=None, else: return {} - def dataset_resource(self, ref_id, friendly_name=None, description=None, + def dataset_resource(self, ref_id, project_id=None, friendly_name=None, description=None, access=None, location=None): """See https://developers.google.com/bigquery/docs/reference/v2/datasets#resource @@ -1884,6 +2000,8 @@ def dataset_resource(self, ref_id, friendly_name=None, description=None, ---------- ref_id : str Dataset id (the reference id, not the integer id) + project_id: str + Unique ``str`` identifying the BigQuery project contains the dataset friendly_name : str, optional An optional descriptive name for the dataset description : str, optional @@ -1898,10 +2016,11 @@ def dataset_resource(self, ref_id, friendly_name=None, description=None, dict Representing BigQuery dataset resource """ + project_id = self._get_project_id(project_id) data = { "datasetReference": { "datasetId": ref_id, - "projectId": self.project_id + "projectId": project_id } } if friendly_name: From 855be4e7c0fe4744072fb542a71e2d793263be3c Mon Sep 17 00:00:00 2001 From: Tuan Vu Date: Wed, 14 Mar 2018 21:44:08 -0700 Subject: [PATCH 133/146] update client and test_client to support a different project_id to run job --- bigquery/client.py | 155 ++++++++++++++++++---------------- bigquery/tests/test_client.py | 16 ++-- 2 files changed, 90 insertions(+), 81 deletions(-) diff --git a/bigquery/client.py b/bigquery/client.py index 0c6377e..b9d4e51 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -421,9 +421,9 @@ def get_table_schema(self, dataset, table, project_id=None): A ``list`` of ``dict`` objects that represent the table schema. If the table doesn't exist, None is returned. """ + project_id = self._get_project_id(project_id) - try: - project_id = self._get_project_id(project_id) + try: result = self.bigquery.tables().get( projectId=project_id, tableId=table, @@ -535,8 +535,9 @@ def get_dataset(self, dataset_id, project_id=None): dict Contains dataset object if it exists, else empty """ - try: - project_id = self._get_project_id(project_id) + project_id = self._get_project_id(project_id) + + try: dataset = self.bigquery.datasets().get( projectId=project_id, datasetId=dataset_id).execute( num_retries=self.num_retries) @@ -582,8 +583,8 @@ def get_table(self, dataset, table, project_id=None): dict Containing the table object if it exists, else empty """ - try: - project_id = self._get_project_id(project_id) + project_id = self._get_project_id(project_id) + try: table = self.bigquery.tables().get( projectId=project_id, datasetId=dataset, tableId=table).execute(num_retries=self.num_retries) @@ -592,8 +593,9 @@ def get_table(self, dataset, table, project_id=None): return table - def create_table(self, dataset, table, schema, project_id=None, - expiration_time=None, time_partitioning=False): + def create_table(self, dataset, table, schema, + expiration_time=None, time_partitioning=False, + project_id=None): """Create a new table in the dataset. Parameters @@ -603,13 +605,13 @@ def create_table(self, dataset, table, schema, project_id=None, table : str The name of the table to create schema : dict - The table schema - project_id: str, optional - The project to create the table in + The table schema expiration_time : int or double, optional The expiry time in milliseconds since the epoch. time_partitioning : bool, optional Create a time partitioning. + project_id: str, optional + The project to create the table in Returns ------- @@ -754,7 +756,7 @@ def patch_table(self, dataset, table, schema, project_id=None): else: return {} - def create_view(self, dataset, view, query, project_id=None, use_legacy_sql=None): + def create_view(self, dataset, view, query, use_legacy_sql=None, project_id=None): """Create a new view in the dataset. Parameters @@ -764,12 +766,12 @@ def create_view(self, dataset, view, query, project_id=None, use_legacy_sql=None view : str The name of the view to create query : dict - A query that BigQuery executes when the view is referenced. - project_id: str, optional - The project to create the view in + A query that BigQuery executes when the view is referenced. use_legacy_sql : bool, optional If False, the query will use BigQuery's standard SQL (https://cloud.google.com/bigquery/sql-reference/) + project_id: str, optional + The project to create the view in Returns ------- @@ -830,9 +832,9 @@ def delete_table(self, dataset, table, project_id=None): bool indicating if the table was successfully deleted or not, or response from BigQuery if swallow_results is set for False. """ + project_id = self._get_project_id(project_id) - try: - project_id = self._get_project_id(project_id) + try: response = self.bigquery.tables().delete( projectId=project_id, datasetId=dataset, @@ -890,8 +892,7 @@ def import_data_from_uris( source_uris, dataset, table, - schema=None, - project_id=None, + schema=None, job=None, source_format=None, create_disposition=None, @@ -904,6 +905,7 @@ def import_data_from_uris( field_delimiter=None, quote=None, skip_leading_rows=None, + project_id=None, ): """ Imports data into a BigQuery table from cloud storage. Optional @@ -921,9 +923,7 @@ def import_data_from_uris( table : str String id of the table schema : list, optional - Represents the BigQuery schema - project_id: str, optional - String id of the project + Represents the BigQuery schema job : str, optional Identifies the job (a unique job id is automatically generated if not provided) @@ -949,6 +949,8 @@ def import_data_from_uris( Quote character for csv only skip_leading_rows : int, optional For csv only + project_id: str, optional + String id of the project Returns ------- @@ -1051,13 +1053,13 @@ def export_data_to_uris( self, destination_uris, dataset, - table, - project_id=None, + table, job=None, compression=None, destination_format=None, print_header=None, field_delimiter=None, + project_id=None, ): """ Export data from a BigQuery table to cloud storage. Optional arguments @@ -1072,9 +1074,7 @@ def export_data_to_uris( dataset : str String id of the dataset table : str - String id of the table - project_id: str, optional - String id of the project + String id of the table job : str, optional String identifying the job (a unique jobid is automatically generated if not provided) @@ -1086,6 +1086,8 @@ def export_data_to_uris( Whether or not to print the header field_delimiter : str, optional Character separating fields in delimited file + project_id: str, optional + String id of the project Returns ------- @@ -1135,7 +1137,7 @@ def export_data_to_uris( "configuration": { 'extract': configuration }, - "jobReference": self._get_job_reference(job_id) + "jobReference": self._get_job_reference(job) } logger.info("Creating export job %s" % body) @@ -1147,8 +1149,7 @@ def write_to_table( self, query, dataset=None, - table=None, - project_id=None, + table=None, external_udf_uris=None, allow_large_results=None, use_query_cache=None, @@ -1157,7 +1158,8 @@ def write_to_table( write_disposition=None, use_legacy_sql=None, maximum_billing_tier=None, - flatten=None + flatten=None, + project_id=None, ): """ Write query result to table. If dataset or table is not provided, @@ -1172,9 +1174,7 @@ def write_to_table( dataset : str, optional String id of the dataset table : str, optional - String id of the table - project_id: str, optional - String id of the project + String id of the table external_udf_uris : list, optional Contains external UDF URIs. If given, URIs must be Google Cloud Storage and have .js extensions. @@ -1200,6 +1200,8 @@ def write_to_table( flatten : bool, optional Whether or not to flatten nested and repeated fields in query results + project_id: str, optional + String id of the project Returns ------- @@ -1313,9 +1315,9 @@ def wait_for_job(self, job, interval=5, timeout=60): return job_resource - def push_rows(self, dataset, table, rows, project_id=None, insert_id_key=None, + def push_rows(self, dataset, table, rows, insert_id_key=None, skip_invalid_rows=None, ignore_unknown_values=None, - template_suffix=None): + template_suffix=None, project_id=None): """Upload rows to BigQuery table. Parameters @@ -1323,9 +1325,7 @@ def push_rows(self, dataset, table, rows, project_id=None, insert_id_key=None, dataset : str The dataset to upload to table : str - The name of the table to insert rows into - project_id: str, optional - The project to upload to + The name of the table to insert rows into rows : list A ``list`` of rows (``dict`` objects) to add to the table insert_id_key : str, optional @@ -1338,6 +1338,8 @@ def push_rows(self, dataset, table, rows, project_id=None, insert_id_key=None, template_suffix : str, optional Inserts the rows into an {table}{template_suffix}. If table {table}{template_suffix} doesn't exist, create from {table}. + project_id: str, optional + The project to upload to Returns ------- @@ -1345,7 +1347,7 @@ def push_rows(self, dataset, table, rows, project_id=None, insert_id_key=None, bool indicating if insert succeeded or not, or response from BigQuery if swallow_results is set for False. """ - + project_id = self._get_project_id(project_id) table_data = self.bigquery.tabledata() rows_data = [] @@ -1373,8 +1375,7 @@ def push_rows(self, dataset, table, rows, project_id=None, insert_id_key=None, if template_suffix is not None: data['templateSuffix'] = template_suffix - try: - project_id = self._get_project_id(project_id) + try: response = table_data.insertAll( projectId=project_id, datasetId=dataset, @@ -1431,7 +1432,7 @@ def get_all_tables(self, dataset_id, project_id=None): tables.append(table_name) return tables - def _get_all_tables(self, dataset_id, project_id=None, cache=False): + def _get_all_tables(self, dataset_id, cache=False, project_id=None): """Retrieve the list of tables for dataset, that respect the formats: * appid_YYYY_MM * YYYY_MM_appid @@ -1439,12 +1440,12 @@ def _get_all_tables(self, dataset_id, project_id=None, cache=False): Parameters ---------- dataset_id : str - The dataset to retrieve table names for - project_id: str - Unique ``str`` identifying the BigQuery project contains the dataset + The dataset to retrieve table names for cache : bool, optional To use cached value or not (default False). Timeout value equals CACHE_TIMEOUT. + project_id: str + Unique ``str`` identifying the BigQuery project contains the dataset Returns ------- @@ -1773,17 +1774,15 @@ def _raise_executing_exception_if_error(self, job): # # DataSet manipulation methods # - def create_dataset(self, dataset_id, project_id=None, friendly_name=None, description=None, - access=None, location=None): + def create_dataset(self, dataset_id, friendly_name=None, description=None, + access=None, location=None, project_id=None): """Create a new BigQuery dataset. Parameters ---------- dataset_id : str Unique ``str`` identifying the dataset with the project (the - referenceID of the dataset, not the integer id of the dataset) - project_id: str - Unique ``str`` identifying the BigQuery project contains the dataset + referenceID of the dataset, not the integer id of the dataset) friendly_name: str, optional A human readable name description: str, optional @@ -1794,6 +1793,8 @@ def create_dataset(self, dataset_id, project_id=None, friendly_name=None, descri location : str, optional Indicating where dataset should be stored: EU or US (see https://developers.google.com/bigquery/docs/reference/v2/datasets#resource) + project_id: str + Unique ``str`` identifying the BigQuery project contains the dataset Returns ------- @@ -1852,19 +1853,19 @@ def get_datasets(self, project_id=None): logger.error("Cannot list datasets: {0}".format(e)) return None - def delete_dataset(self, dataset_id, project_id=None, delete_contents=False): + def delete_dataset(self, dataset_id, delete_contents=False, project_id=None): """Delete a BigQuery dataset. Parameters ---------- dataset_id : str Unique ``str`` identifying the dataset with the project (the - referenceId of the dataset) - project_id: str + referenceId of the dataset) Unique ``str`` identifying the BigQuery project contains the dataset delete_contents : bool, optional If True, forces the deletion of the dataset even when the dataset contains data (Default = False) + project_id: str, optional Returns ------- @@ -1897,8 +1898,8 @@ def delete_dataset(self, dataset_id, project_id=None, delete_contents=False): else: return {} - def update_dataset(self, dataset_id, project_id=None, friendly_name=None, description=None, - access=None): + def update_dataset(self, dataset_id, friendly_name=None, description=None, + access=None, project_id=None): """Updates information in an existing dataset. The update method replaces the entire dataset resource, whereas the patch method only replaces fields that are provided in the submitted dataset resource. @@ -1907,15 +1908,15 @@ def update_dataset(self, dataset_id, project_id=None, friendly_name=None, descri ---------- dataset_id : str Unique ``str`` identifying the dataset with the project (the - referencedId of the dataset) - project_id: str - Unique ``str`` identifying the BigQuery project contains the dataset + referencedId of the dataset) friendly_name : str, optional An optional descriptive name for the dataset. description : str, optional An optional description of the dataset. access : list, optional Indicating access permissions + project_id: str, optional + Unique ``str`` identifying the BigQuery project contains the dataset Returns ------- @@ -1927,8 +1928,12 @@ def update_dataset(self, dataset_id, project_id=None, friendly_name=None, descri try: datasets = self.bigquery.datasets() - body = self.dataset_resource(dataset_id, project_id, friendly_name, - description, access) + body = self.dataset_resource(dataset_id, + friendly_name=friendly_name, + description=description, + access=access, + project_id=project_id) + request = datasets.update(projectId=project_id, datasetId=dataset_id, body=body) @@ -1945,8 +1950,8 @@ def update_dataset(self, dataset_id, project_id=None, friendly_name=None, descri else: return {} - def patch_dataset(self, dataset_id, project_id=None, friendly_name=None, description=None, - access=None): + def patch_dataset(self, dataset_id, friendly_name=None, description=None, + access=None, project_id=None): """Updates information in an existing dataset. The update method replaces the entire dataset resource, whereas the patch method only replaces fields that are provided in the submitted dataset resource. @@ -1955,15 +1960,15 @@ def patch_dataset(self, dataset_id, project_id=None, friendly_name=None, descrip ---------- dataset_id : str Unique string idenfitying the dataset with the project (the - referenceId of the dataset) - project_id: str - Unique ``str`` identifying the BigQuery project contains the dataset + referenceId of the dataset) friendly_name : str, optional An optional descriptive name for the dataset. description : str, optional An optional description of the dataset. access : list, optional Indicating access permissions. + project_id: str, optional + Unique ``str`` identifying the BigQuery project contains the dataset Returns ------- @@ -1975,8 +1980,11 @@ def patch_dataset(self, dataset_id, project_id=None, friendly_name=None, descrip try: datasets = self.bigquery.datasets() - body = self.dataset_resource(dataset_id, project_id, friendly_name, - description, access) + body = self.dataset_resource(dataset_id, + friendly_name=friendly_name, + description=description, + access=access, + project_id=project_id) request = datasets.patch(projectId=project_id, datasetId=dataset_id, body=body) response = request.execute(num_retries=self.num_retries) @@ -1991,17 +1999,15 @@ def patch_dataset(self, dataset_id, project_id=None, friendly_name=None, descrip else: return {} - def dataset_resource(self, ref_id, project_id=None, friendly_name=None, description=None, - access=None, location=None): + def dataset_resource(self, ref_id, friendly_name=None, description=None, + access=None, location=None, project_id=None): """See https://developers.google.com/bigquery/docs/reference/v2/datasets#resource Parameters ---------- ref_id : str - Dataset id (the reference id, not the integer id) - project_id: str - Unique ``str`` identifying the BigQuery project contains the dataset + Dataset id (the reference id, not the integer id) friendly_name : str, optional An optional descriptive name for the dataset description : str, optional @@ -2010,6 +2016,8 @@ def dataset_resource(self, ref_id, project_id=None, friendly_name=None, descript Indicating access permissions location: str, optional, 'EU' or 'US' An optional geographical location for the dataset(EU or US) + project_id: str + Unique ``str`` identifying the BigQuery project contains the dataset Returns ------- @@ -2017,6 +2025,7 @@ def dataset_resource(self, ref_id, project_id=None, friendly_name=None, descript Representing BigQuery dataset resource """ project_id = self._get_project_id(project_id) + data = { "datasetReference": { "datasetId": ref_id, diff --git a/bigquery/tests/test_client.py b/bigquery/tests/test_client.py index a5e8161..b581830 100644 --- a/bigquery/tests/test_client.py +++ b/bigquery/tests/test_client.py @@ -2904,18 +2904,18 @@ def test_dataset_update_success(self): self.mock_datasets.update.return_value.execute.side_effect = [{ 'status': 'foo'}, {'status': 'bar'}] - actual = self.client.update_dataset(self.dataset, - self.friendly_name, - self.description, - self.access) + actual = self.client.update_dataset(self.dataset, + friendly_name=self.friendly_name, + description=self.description, + access=self.access) self.assertTrue(actual) self.client.swallow_results = False - actual = self.client.update_dataset(self.dataset, - self.friendly_name, - self.description, - self.access) + actual = self.client.update_dataset(self.dataset, + friendly_name=self.friendly_name, + description=self.description, + access=self.access) self.assertEqual(actual, {'status': 'bar'}) From 1617ad653e3c9bc41706b4512b12b6fb50132ac9 Mon Sep 17 00:00:00 2001 From: Tyler Treat Date: Fri, 16 Mar 2018 20:27:55 -0500 Subject: [PATCH 134/146] Bump version --- bigquery/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigquery/version.py b/bigquery/version.py index 84c54b7..e4f2ad4 100644 --- a/bigquery/version.py +++ b/bigquery/version.py @@ -1 +1 @@ -__version__ = '1.13.0' +__version__ = '1.14.0' From 700eb9dac5c6cf0bc4fd36078d083abcf828b0bd Mon Sep 17 00:00:00 2001 From: Juan Sandoval Date: Tue, 24 Apr 2018 12:11:06 -0500 Subject: [PATCH 135/146] Remove OAuth cache discovery from google client library. (#1) This fix the warning: ImportError: file_cache is unavailable when using oauth2client >= 4.0.0 And allow us to continue using the latest OAuth library versions --- bigquery/client.py | 9 +++++-- bigquery/tests/test_client.py | 45 +++++++++++++++++++++++++++-------- 2 files changed, 42 insertions(+), 12 deletions(-) diff --git a/bigquery/client.py b/bigquery/client.py index b9d4e51..d76cec7 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -175,8 +175,13 @@ def _get_bq_service(credentials=None, service_url=None): assert credentials, 'Must provide ServiceAccountCredentials' http = credentials.authorize(Http()) - service = build('bigquery', 'v2', http=http, - discoveryServiceUrl=service_url) + service = build( + 'bigquery', + 'v2', + http=http, + discoveryServiceUrl=service_url, + cache_discovery=False + ) return service diff --git a/bigquery/tests/test_client.py b/bigquery/tests/test_client.py index b581830..9af4f0c 100644 --- a/bigquery/tests/test_client.py +++ b/bigquery/tests/test_client.py @@ -67,8 +67,13 @@ def test_initialize_readonly(self, mock_build, mock_return_cred): scopes=BIGQUERY_SCOPE_READ_ONLY) self.assertTrue( mock_cred.from_p12_keyfile_buffer.return_value.authorize.called) - mock_build.assert_called_once_with('bigquery', 'v2', http=mock_http, - discoveryServiceUrl=mock_service_url) + mock_build.assert_called_once_with( + 'bigquery', + 'v2', + http=mock_http, + discoveryServiceUrl=mock_service_url, + cache_discovery=False + ) self.assertEquals(mock_bq, bq_client.bigquery) self.assertEquals(project_id, bq_client.project_id) @@ -101,8 +106,13 @@ def test_initialize_read_write(self, mock_build, mock_return_cred): service_account, mock.ANY, scopes=BIGQUERY_SCOPE) self.assertTrue( mock_cred.from_p12_keyfile_buffer.return_value.authorize.called) - mock_build.assert_called_once_with('bigquery', 'v2', http=mock_http, - discoveryServiceUrl=mock_service_url) + mock_build.assert_called_once_with( + 'bigquery', + 'v2', + http=mock_http, + discoveryServiceUrl=mock_service_url, + cache_discovery=False + ) self.assertEquals(mock_bq, bq_client.bigquery) self.assertEquals(project_id, bq_client.project_id) @@ -136,8 +146,13 @@ def test_initialize_key_file(self, mock_build, mock_return_cred): scopes=BIGQUERY_SCOPE) self.assertTrue( mock_cred.from_p12_keyfile.return_value.authorize.called) - mock_build.assert_called_once_with('bigquery', 'v2', http=mock_http, - discoveryServiceUrl=mock_service_url) + mock_build.assert_called_once_with( + 'bigquery', + 'v2', + http=mock_http, + discoveryServiceUrl=mock_service_url, + cache_discovery=False + ) self.assertEquals(mock_bq, bq_client.bigquery) self.assertEquals(project_id, bq_client.project_id) @@ -172,8 +187,13 @@ def test_initialize_json_key_file(self, mock_open, mock_build, mock_return_cred) scopes=BIGQUERY_SCOPE) self.assertTrue( mock_cred.from_json_keyfile_dict.return_value.authorize.called) - mock_build.assert_called_once_with('bigquery', 'v2', http=mock_http, - discoveryServiceUrl=mock_service_url) + mock_build.assert_called_once_with( + 'bigquery', + 'v2', + http=mock_http, + discoveryServiceUrl=mock_service_url, + cache_discovery=False + ) self.assertEquals(mock_bq, bq_client.bigquery) self.assertEquals(project_id, bq_client.project_id) @@ -208,8 +228,13 @@ def test_initialize_json_key_file_without_project_id(self, mock_open, mock_build scopes=BIGQUERY_SCOPE) self.assertTrue( mock_cred.from_json_keyfile_dict.return_value.authorize.called) - mock_build.assert_called_once_with('bigquery', 'v2', http=mock_http, - discoveryServiceUrl=mock_service_url) + mock_build.assert_called_once_with( + 'bigquery', + 'v2', + http=mock_http, + discoveryServiceUrl=mock_service_url, + cache_discovery=False + ) self.assertEquals(mock_bq, bq_client.bigquery) self.assertEquals(json_key['project_id'], bq_client.project_id) From da151c3b16bbd0a8bc757efe7221f40c8b1c6e61 Mon Sep 17 00:00:00 2001 From: rhoboro Date: Thu, 17 Jan 2019 12:19:09 +0900 Subject: [PATCH 136/146] fix get_all_tables with different project_id --- bigquery/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigquery/client.py b/bigquery/client.py index d76cec7..6bfab16 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -1487,7 +1487,7 @@ def _get_all_tables_for_dataset(self, dataset_id, project_id=None): project_id = self._get_project_id(project_id) result = self.bigquery.tables().list( - projectId=self.project_id, + projectId=project_id, datasetId=dataset_id).execute(num_retries=self.num_retries) page_token = result.get('nextPageToken') From fb47d0459b93646e859464a2a2313e7a5e58a059 Mon Sep 17 00:00:00 2001 From: rhoboro Date: Thu, 17 Jan 2019 14:25:34 +0900 Subject: [PATCH 137/146] fix paging too --- bigquery/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigquery/client.py b/bigquery/client.py index 6bfab16..537e23a 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -1493,7 +1493,7 @@ def _get_all_tables_for_dataset(self, dataset_id, project_id=None): page_token = result.get('nextPageToken') while page_token: res = self.bigquery.tables().list( - projectId=self.project_id, + projectId=project_id, datasetId=dataset_id, pageToken=page_token ).execute(num_retries=self.num_retries) From 8df1c772e93f6335f6a1e8b1db1997a8592f0951 Mon Sep 17 00:00:00 2001 From: Tyler Treat Date: Wed, 16 Jan 2019 23:34:29 -0600 Subject: [PATCH 138/146] Bump version to 1.14.1 --- bigquery/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigquery/version.py b/bigquery/version.py index e4f2ad4..c162747 100644 --- a/bigquery/version.py +++ b/bigquery/version.py @@ -1 +1 @@ -__version__ = '1.14.0' +__version__ = '1.14.1' From 24cc8c18822e1478920b3144186e8672c5f4dc22 Mon Sep 17 00:00:00 2001 From: sleepless-se Date: Thu, 14 Feb 2019 01:27:24 +0800 Subject: [PATCH 139/146] It was invalid json format. add a comma --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 8171078..009f125 100644 --- a/README.md +++ b/README.md @@ -173,7 +173,7 @@ The client provides an API for inserting data into a BigQuery table. The last pa ```python # Insert data into table. rows = [ - {'one': 'ein', 'two': 'zwei'} + {'one': 'ein', 'two': 'zwei'}, {'id': 'NzAzYmRiY', 'one': 'uno', 'two': 'dos'}, {'id': 'NzAzYmRiY', 'one': 'ein', 'two': 'zwei'} # duplicate entry ] From 1491e1bdc0fb1b8a8fa8fd87255032c5834f10dc Mon Sep 17 00:00:00 2001 From: Yves Bastide Date: Tue, 30 Jul 2019 13:18:36 +0200 Subject: [PATCH 140/146] Fix client.patch_table tableId is a required argument of the patch method. Also, there's no need to pass a tableReference in the body. Signed-off-by: Yves Bastide --- bigquery/client.py | 6 +----- bigquery/tests/test_client.py | 9 ++++----- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/bigquery/client.py b/bigquery/client.py index 537e23a..125d048 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -735,17 +735,13 @@ def patch_table(self, dataset, table, schema, project_id=None): body = { 'schema': {'fields': schema}, - 'tableReference': { - 'tableId': table, - 'projectId': project_id, - 'datasetId': dataset - } } try: result = self.bigquery.tables().patch( projectId=project_id, datasetId=dataset, + tableId=table, body=body ).execute(num_retries=self.num_retries) if self.swallow_results: diff --git a/bigquery/tests/test_client.py b/bigquery/tests/test_client.py index 9af4f0c..5d36aa9 100644 --- a/bigquery/tests/test_client.py +++ b/bigquery/tests/test_client.py @@ -1913,9 +1913,6 @@ def setUp(self): self.client = client.BigQueryClient(self.mock_bq_service, self.project) self.body = { 'schema': {'fields': self.schema}, - 'tableReference': { - 'tableId': self.table, 'projectId': self.project, - 'datasetId': self.dataset} } self.expiration_time = 1437513693000 @@ -1941,7 +1938,8 @@ def test_table_patch_failed(self): self.client.swallow_results = True self.mock_tables.patch.assert_called_with( - projectId=self.project, datasetId=self.dataset, body=self.body) + projectId=self.project, datasetId=self.dataset, + tableId=self.table, body=self.body) self.mock_tables.patch.return_value.execute. \ assert_called_with(num_retries=0) @@ -1968,7 +1966,8 @@ def test_table_patch_success(self): self.client.swallow_results = True self.mock_tables.patch.assert_called_with( - projectId=self.project, datasetId=self.dataset, body=self.body) + projectId=self.project, datasetId=self.dataset, + tableId=self.table, body=self.body) self.mock_tables.patch.return_value.execute. \ assert_called_with(num_retries=0) From 5cc95ba35913b68bcf19210534e41e708e7e8384 Mon Sep 17 00:00:00 2001 From: Yves Bastide Date: Tue, 30 Jul 2019 14:22:51 +0200 Subject: [PATCH 141/146] Fix Travis and tox envlist Replace python 3.3 and 3.4 with 3.5 and 3.6. Signed-off-by: Yves Bastide --- .travis.yml | 3 ++- tox.ini | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index 1e1c28c..ba3cdc8 100644 --- a/.travis.yml +++ b/.travis.yml @@ -10,6 +10,7 @@ notifications: email: false env: - TOXENV=py27 - - TOXENV=py34 + - TOXENV=py35 + - TOXENV=py36 - TOXENV=nightly - TOXENV=pypy diff --git a/tox.ini b/tox.ini index ce76190..58dadc9 100644 --- a/tox.ini +++ b/tox.ini @@ -4,7 +4,7 @@ # and then run "tox" from this directory. [tox] -envlist = py27, py33, py34, nightly, pypy +envlist = py27, py35, py36, nightly, pypy [testenv] commands = nosetests --logging-level=ERROR -a slow --with-coverage --cover-package=bigquery From 8ebf84f6310b5bfd26de18b9dce50c7f37ff9b94 Mon Sep 17 00:00:00 2001 From: Ege U Date: Tue, 10 Dec 2019 16:14:06 +0300 Subject: [PATCH 142/146] Dry runs return bytes processed, and cache hit now --- bigquery/client.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/bigquery/client.py b/bigquery/client.py index 125d048..eedafc2 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -243,7 +243,7 @@ def _submit_query_job(self, query_data): ------- tuple job id and query results if query completed. If dry_run is True, - job id will be None and results will be empty if the query is valid + job id will be None and results will be [cacheHit and totalBytesProcessed] if the query is valid or a dict containing the response if invalid. Raises @@ -269,13 +269,17 @@ def _submit_query_job(self, query_data): schema = query_reply.get('schema', {'fields': None})['fields'] rows = query_reply.get('rows', []) job_complete = query_reply.get('jobComplete', False) + cache_hit = query_reply['cacheHit'] + total_bytes_processed = query_reply['totalBytesProcessed'] # raise exceptions if it's not an async query # and job is not completed after timeout if not job_complete and query_data.get("timeoutMs", False): logger.error('BigQuery job %s timeout' % job_id) raise BigQueryTimeoutException() - + + if query_data.get("dryRun", True): + return job_id, [cache_hit, total_bytes_processed] return job_id, [self._transform_row(row, schema) for row in rows] def _get_job_reference(self, job_id): @@ -345,8 +349,8 @@ def query(self, query, max_results=None, timeout=0, dry_run=False, use_legacy_sq How long to wait for the query to complete, in seconds before the request times out and returns. dry_run : bool, optional - If True, the query isn't actually run. A valid query will return an - empty response, while an invalid one will return the same error + If True, the query isn't actually run. A valid query will return + cache hit, and total bytes processed, while an invalid one will return the same error message it would if it wasn't a dry run. use_legacy_sql : bool, optional. Default True. If False, the query will use BigQuery's standard SQL (https://cloud.google.com/bigquery/sql-reference/) @@ -359,7 +363,7 @@ def query(self, query, max_results=None, timeout=0, dry_run=False, use_legacy_sq ------- tuple (job id, query results) if the query completed. If dry_run is True, - job id will be None and results will be empty if the query is valid + job id will be None and results will be [cacheHit and totalBytesProcessed] if the query is valid or a ``dict`` containing the response if invalid. Raises From 01f38be5947df8ae5a9936703181a8062c5fc48c Mon Sep 17 00:00:00 2001 From: Ege U Date: Tue, 10 Dec 2019 16:32:44 +0300 Subject: [PATCH 143/146] Rewrote the tests --- bigquery/tests/test_client.py | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/bigquery/tests/test_client.py b/bigquery/tests/test_client.py index 5d36aa9..1f2d247 100644 --- a/bigquery/tests/test_client.py +++ b/bigquery/tests/test_client.py @@ -297,7 +297,9 @@ def test_query(self): mock_query_job.execute.return_value = { 'jobReference': expected_job_ref, - 'jobComplete': True + 'jobComplete': True, + 'cacheHit': False, + 'totalBytesProcessed': 0 } self.mock_job_collection.query.return_value = mock_query_job @@ -329,6 +331,8 @@ def test_query_max_results_set(self): mock_query_job.execute.return_value = { 'jobReference': expected_job_ref, 'jobComplete': True, + 'cacheHit': False, + 'totalBytesProcessed': 0 } self.mock_job_collection.query.return_value = mock_query_job @@ -357,6 +361,8 @@ def test_query_timeout_set(self): mock_query_job.execute.return_value = { 'jobReference': expected_job_ref, 'jobComplete': True, + 'cacheHit': False, + 'totalBytesProcessed': 0 } self.mock_job_collection.query.return_value = mock_query_job @@ -382,6 +388,8 @@ def test_sync_query_timeout(self): mock_query_job.execute.return_value = { 'jobReference': expected_job_ref, 'jobComplete': False, + 'cacheHit': False, + 'totalBytesProcessed': 0 } self.mock_job_collection.query.return_value = mock_query_job @@ -400,6 +408,8 @@ def test_async_query_timeout(self): mock_query_job.execute.return_value = { 'jobReference': expected_job_ref, 'jobComplete': False, + 'cacheHit': False, + 'totalBytesProcessed': 0 } self.mock_job_collection.query.return_value = mock_query_job @@ -409,14 +419,18 @@ def test_async_query_timeout(self): self.assertEquals(results, []) def test_query_dry_run_valid(self): - """Ensure that None and an empty list is returned from the query when + """Ensure that None and [cacheHit, totalBytesProcessed] is returned from the query when dry_run is True and the query is valid. """ mock_query_job = mock.Mock() - mock_query_job.execute.return_value = {'jobReference': {}, - 'jobComplete': True} + mock_query_job.execute.return_value = { + 'jobReference': {}, + 'jobComplete': True, + 'cacheHit': False, + 'totalBytesProcessed': 0 + } self.mock_job_collection.query.return_value = mock_query_job @@ -428,7 +442,7 @@ def test_query_dry_run_valid(self): 'dryRun': True} ) self.assertIsNone(job_id) - self.assertEqual([], results) + self.assertEqual([False, 0], results) def test_query_dry_run_invalid(self): """Ensure that None and a dict is returned from the query when dry_run @@ -468,6 +482,8 @@ def test_query_with_results(self): 'schema': {'fields': [{'name': 'foo', 'type': 'INTEGER'}]}, 'rows': [{'f': [{'v': 10}]}], 'jobComplete': True, + 'cacheHit': False, + 'totalBytesProcessed': 0 } self.mock_job_collection.query.return_value = mock_query_job @@ -491,7 +507,9 @@ def test_query_with_using_legacy_sql(self): mock_query_job.execute.return_value = { 'jobReference': expected_job_ref, - 'jobComplete': True + 'jobComplete': True, + 'cacheHit': False, + 'totalBytesProcessed': 0 } self.mock_job_collection.query.return_value = mock_query_job @@ -873,7 +891,7 @@ def test_json_job_body_constructed_correctly(self): body = { "jobReference": { "projectId": self.project_id, - "jobId": "job" + "jobId": "job", }, "configuration": { "load": { From 0d2c801745c48732f20c9002d2a6026995875540 Mon Sep 17 00:00:00 2001 From: Tyler Treat Date: Tue, 10 Dec 2019 17:48:32 -0600 Subject: [PATCH 144/146] Bump version to 1.15.0 --- bigquery/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigquery/version.py b/bigquery/version.py index c162747..1c19d78 100644 --- a/bigquery/version.py +++ b/bigquery/version.py @@ -1 +1 @@ -__version__ = '1.14.1' +__version__ = '1.15.0' From df42f83b637fbe4a70eac200ae05ea8a5f775316 Mon Sep 17 00:00:00 2001 From: Rahul Kumar Gupta <67097571+rahulshivan05@users.noreply.github.com> Date: Thu, 1 Oct 2020 09:06:03 +0530 Subject: [PATCH 145/146] Update requirements_dev.txt --- requirements_dev.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_dev.txt b/requirements_dev.txt index 74162c3..1040dea 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -1,6 +1,6 @@ nose rednose -mock==1.0.1 +mock==4.0.2 coverage nose-exclude tox From 77a7b1b1f3c3cbe50ce0db20b2ebc39012fbca78 Mon Sep 17 00:00:00 2001 From: Tim Gates Date: Wed, 24 Nov 2021 06:50:37 +1100 Subject: [PATCH 146/146] docs: fix simple typo, offical -> official There is a small typo in bigquery/client.py. Should read `official` rather than `offical`. --- bigquery/client.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bigquery/client.py b/bigquery/client.py index eedafc2..bb4d50a 100644 --- a/bigquery/client.py +++ b/bigquery/client.py @@ -227,7 +227,7 @@ def _submit_query_job(self, query_data): """ Submit a query job to BigQuery. This is similar to BigQueryClient.query, but gives the user - direct access to the query method on the offical BigQuery + direct access to the query method on the official BigQuery python client. For fine-grained control over a query job, see: @@ -306,7 +306,7 @@ def _get_job_reference(self, job_id): def _insert_job(self, body_object): """ Submit a job to BigQuery - Direct proxy to the insert() method of the offical BigQuery + Direct proxy to the insert() method of the official BigQuery python client. Able to submit load, link, query, copy, or extract jobs.