diff --git a/bigframes/_config/bigquery_options.py b/bigframes/_config/bigquery_options.py index 648b69dea7..e1e8129ca3 100644 --- a/bigframes/_config/bigquery_options.py +++ b/bigframes/_config/bigquery_options.py @@ -127,6 +127,11 @@ def application_name(self) -> Optional[str]: The recommended format is ``"application-name/major.minor.patch_version"`` or ``"(gpn:PartnerName;)"`` for official Google partners. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.bigquery.application_name = "my-app/1.0.0" # doctest: +SKIP + Returns: None or str: Application name as a string if exists; otherwise None. @@ -145,6 +150,13 @@ def application_name(self, value: Optional[str]): def credentials(self) -> Optional[google.auth.credentials.Credentials]: """The OAuth2 credentials to use for this client. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> import google.auth + >>> credentials, project = google.auth.default() # doctest: +SKIP + >>> bpd.options.bigquery.credentials = credentials # doctest: +SKIP + Returns: None or google.auth.credentials.Credentials: google.auth.credentials.Credentials if exists; otherwise None. @@ -163,6 +175,11 @@ def location(self) -> Optional[str]: For more information, see https://cloud.google.com/bigquery/docs/locations BigQuery locations. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.bigquery.location = "US" # doctest: +SKIP + Returns: None or str: Default location as a string; otherwise None. @@ -179,6 +196,11 @@ def location(self, value: Optional[str]): def project(self) -> Optional[str]: """Google Cloud project ID to use for billing and as the default project. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.bigquery.project = "my-project" # doctest: +SKIP + Returns: None or str: Google Cloud project ID as a string; otherwise None. @@ -206,6 +228,11 @@ def bq_connection(self) -> Optional[str]: If this option isn't provided, or project or location aren't provided, session will use its default project/location/connection_id as default connection. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.bigquery.bq_connection = "my-project.us.my-connection" # doctest: +SKIP + Returns: None or str: Name of the BigQuery connection as a string; otherwise None. @@ -228,6 +255,11 @@ def skip_bq_connection_check(self) -> bool: necessary permissions set up to support BigQuery DataFrames operations, then a runtime error will be reported. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.bigquery.skip_bq_connection_check = True # doctest: +SKIP + Returns: bool: A boolean value, where True indicates a BigQuery connection is @@ -300,6 +332,12 @@ def use_regional_endpoints(self) -> bool: does not promise any guarantee on the request remaining within the location during transit. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.bigquery.location = "europe-west3" # doctest: +SKIP + >>> bpd.options.bigquery.use_regional_endpoints = True # doctest: +SKIP + Returns: bool: A boolean value, where True indicates that regional endpoints @@ -339,6 +377,11 @@ def kms_key_name(self) -> Optional[str]: For more information, see https://cloud.google.com/bigquery/docs/customer-managed-encryption#assign_role Assign the Encrypter/Decrypter. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.bigquery.kms_key_name = "projects/my-project/locations/us/keyRings/my-ring/cryptoKeys/my-key" # doctest: +SKIP + Returns: None or str: Name of the customer managed encryption key as a string; otherwise None. @@ -356,6 +399,11 @@ def kms_key_name(self, value: str): def ordering_mode(self) -> Literal["strict", "partial"]: """Controls whether total row order is always maintained for DataFrame/Series. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.bigquery.ordering_mode = "partial" # doctest: +SKIP + Returns: Literal: A literal string value of either strict or partial ordering mode. @@ -432,7 +480,14 @@ def requests_transport_adapters( @property def enable_polars_execution(self) -> bool: - """If True, will use polars to execute some simple query plans locally.""" + """If True, will use polars to execute some simple query plans locally. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.bigquery.enable_polars_execution = True # doctest: +SKIP + + """ return self._enable_polars_execution @enable_polars_execution.setter diff --git a/bigframes/_config/compute_options.py b/bigframes/_config/compute_options.py index 7810ee897f..027566ae07 100644 --- a/bigframes/_config/compute_options.py +++ b/bigframes/_config/compute_options.py @@ -28,30 +28,30 @@ class ComputeOptions: >>> import bigframes.pandas as bpd >>> df = bpd.read_gbq("bigquery-public-data.ml_datasets.penguins") - >>> bpd.options.compute.maximum_bytes_billed = 500 + >>> bpd.options.compute.maximum_bytes_billed = 500 # doctest: +SKIP >>> df.to_pandas() # this should fail # doctest: +SKIP google.api_core.exceptions.InternalServerError: 500 Query exceeded limit for bytes billed: 500. 10485760 or higher required. - >>> bpd.options.compute.maximum_bytes_billed = None # reset option + >>> bpd.options.compute.maximum_bytes_billed = None # reset option # doctest: +SKIP To add multiple extra labels to a query configuration, use the `assign_extra_query_labels` method with keyword arguments: - >>> bpd.options.compute.assign_extra_query_labels(test1=1, test2="abc") - >>> bpd.options.compute.extra_query_labels + >>> bpd.options.compute.assign_extra_query_labels(test1=1, test2="abc") # doctest: +SKIP + >>> bpd.options.compute.extra_query_labels # doctest: +SKIP {'test1': 1, 'test2': 'abc'} Alternatively, you can add labels individually by directly accessing the `extra_query_labels` dictionary: - >>> bpd.options.compute.extra_query_labels["test3"] = False - >>> bpd.options.compute.extra_query_labels + >>> bpd.options.compute.extra_query_labels["test3"] = False # doctest: +SKIP + >>> bpd.options.compute.extra_query_labels # doctest: +SKIP {'test1': 1, 'test2': 'abc', 'test3': False} To remove a label from the configuration, use the `del` keyword on the desired label key: - >>> del bpd.options.compute.extra_query_labels["test1"] - >>> bpd.options.compute.extra_query_labels + >>> del bpd.options.compute.extra_query_labels["test1"] # doctest: +SKIP + >>> bpd.options.compute.extra_query_labels # doctest: +SKIP {'test2': 'abc', 'test3': False} """ @@ -63,6 +63,11 @@ class ComputeOptions: their operations to resume. The default value is 0. Set the value to None to turn off the guard. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.compute.ai_ops_confirmation_threshold = 100 # doctest: +SKIP + Returns: Optional[int]: Number of rows. """ @@ -73,6 +78,11 @@ class ComputeOptions: When set to True, the operation automatically fails without asking for user inputs. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.compute.ai_ops_threshold_autofail = True # doctest: +SKIP + Returns: bool: True if the guard is enabled. """ @@ -85,6 +95,10 @@ class ComputeOptions: 10 GB for potentially faster execution; BigQuery will raise an error if this limit is exceeded. Setting to True removes this result size limit. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.compute.allow_large_results = True # doctest: +SKIP Returns: bool | None: True if results > 10 GB are enabled. @@ -97,6 +111,10 @@ class ComputeOptions: query engine to handle. However this comes at the cost of increase cost and latency. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.compute.enable_multi_query_execution = True # doctest: +SKIP Returns: bool | None: True if enabled. @@ -121,6 +139,11 @@ class ComputeOptions: default. See `maximum_bytes_billed`: https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.job.QueryJobConfig#google_cloud_bigquery_job_QueryJobConfig_maximum_bytes_billed. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.compute.maximum_bytes_billed = 1000 # doctest: +SKIP + Returns: int | None: Number of bytes, if set. """ @@ -136,6 +159,11 @@ class ComputeOptions: of rows to be downloaded exceeds this limit, a ``bigframes.exceptions.MaximumResultRowsExceeded`` exception is raised. + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.compute.maximum_result_rows = 1000 # doctest: +SKIP + Returns: int | None: Number of rows, if set. """ diff --git a/bigframes/_config/experiment_options.py b/bigframes/_config/experiment_options.py index ee54e017fe..811d6b8bd4 100644 --- a/bigframes/_config/experiment_options.py +++ b/bigframes/_config/experiment_options.py @@ -31,6 +31,13 @@ def __init__(self): @property def semantic_operators(self) -> bool: + """Deprecated. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.experiments.semantic_operators = True # doctest: +SKIP + """ return self._semantic_operators @semantic_operators.setter @@ -44,6 +51,13 @@ def semantic_operators(self, value: bool): @property def ai_operators(self) -> bool: + """If True, allow using the AI operators. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.experiments.ai_operators = True # doctest: +SKIP + """ return self._ai_operators @ai_operators.setter diff --git a/bigframes/_config/sampling_options.py b/bigframes/_config/sampling_options.py index 107142c3ba..9746e01f31 100644 --- a/bigframes/_config/sampling_options.py +++ b/bigframes/_config/sampling_options.py @@ -31,6 +31,11 @@ class SamplingOptions: Download size threshold in MB. Default 500. If value set to None, the download size won't be checked. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.sampling.max_download_size = 1000 # doctest: +SKIP """ enable_downsampling: bool = False @@ -40,6 +45,11 @@ class SamplingOptions: If max_download_size is exceeded when downloading data (e.g., to_pandas()), the data will be downsampled if enable_downsampling is True, otherwise, an error will be raised. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.sampling.enable_downsampling = True # doctest: +SKIP """ sampling_method: Literal["head", "uniform"] = "uniform" @@ -50,6 +60,11 @@ class SamplingOptions: the beginning. It is fast and requires minimal computations to perform the downsampling.; "uniform": This algorithm returns uniform random samples of the data. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.sampling.sampling_method = "head" # doctest: +SKIP """ random_state: Optional[int] = None @@ -58,6 +73,11 @@ class SamplingOptions: If provided, the uniform method may take longer to execute and require more computation. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.sampling.random_state = 42 # doctest: +SKIP """ def with_max_download_size(self, max_rows: Optional[int]) -> SamplingOptions: diff --git a/third_party/bigframes_vendored/pandas/core/config_init.py b/third_party/bigframes_vendored/pandas/core/config_init.py index 194ec4a8a7..9ffd1ed59f 100644 --- a/third_party/bigframes_vendored/pandas/core/config_init.py +++ b/third_party/bigframes_vendored/pandas/core/config_init.py @@ -29,13 +29,13 @@ class DisplayOptions: >>> import bigframes.pandas as bpd >>> df = bpd.read_gbq("bigquery-public-data.ml_datasets.penguins") - >>> bpd.options.display.repr_mode = "deferred" - >>> df.head(20) # will no longer run the job + >>> bpd.options.display.repr_mode = "deferred" # doctest: +SKIP + >>> df.head(20) # will no longer run the job # doctest: +SKIP Computation deferred. Computation will process 28.9 kB Users can also get a dry run of the job by accessing the query_job property before they've run the job. This will return a dry run instance of the job they can inspect. - >>> df.query_job.total_bytes_processed + >>> df.query_job.total_bytes_processed # doctest: +SKIP 28947 User can execute the job by calling .to_pandas() @@ -44,21 +44,21 @@ class DisplayOptions: Reset repr_mode option - >>> bpd.options.display.repr_mode = "head" + >>> bpd.options.display.repr_mode = "head" # doctest: +SKIP Can also set the progress_bar option to see the progress bar in terminal, - >>> bpd.options.display.progress_bar = "terminal" + >>> bpd.options.display.progress_bar = "terminal" # doctest: +SKIP notebook, - >>> bpd.options.display.progress_bar = "notebook" + >>> bpd.options.display.progress_bar = "notebook" # doctest: +SKIP or just remove it. Setting to default value "auto" will detect and show progress bar automatically. - >>> bpd.options.display.progress_bar = "auto" + >>> bpd.options.display.progress_bar = "auto" # doctest: +SKIP """ # Options borrowed from pandas. @@ -67,6 +67,11 @@ class DisplayOptions: Maximum number of columns to display. Default 20. If `max_columns` is exceeded, switch to truncate view. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.max_columns = 50 # doctest: +SKIP """ max_rows: int = 10 @@ -74,6 +79,11 @@ class DisplayOptions: Maximum number of rows to display. Default 10. If `max_rows` is exceeded, switch to truncate view. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.max_rows = 50 # doctest: +SKIP """ precision: int = 6 @@ -81,6 +91,11 @@ class DisplayOptions: Controls the floating point output precision. Defaults to 6. See :attr:`pandas.options.display.precision`. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.precision = 2 # doctest: +SKIP """ # Options unique to BigQuery DataFrames. @@ -90,6 +105,11 @@ class DisplayOptions: Valid values are `auto`, `notebook`, and `terminal`. Set to `None` to remove progress bars. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = "terminal" # doctest: +SKIP """ repr_mode: Literal["head", "deferred", "anywidget"] = "head" @@ -105,6 +125,11 @@ class DisplayOptions: Instead, estimated bytes processed will be shown. DataFrame and Series objects can still be computed with methods that explicitly execute and download results. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.repr_mode = "deferred" # doctest: +SKIP """ max_colwidth: Optional[int] = 50 @@ -113,12 +138,22 @@ class DisplayOptions: When the column overflows, a "..." placeholder is embedded in the output. A 'None' value means unlimited. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.max_colwidth = 20 # doctest: +SKIP """ max_info_columns: int = 100 """ Used in DataFrame.info method to decide if information in each column will be printed. Default 100. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.max_info_columns = 50 # doctest: +SKIP """ max_info_rows: Optional[int] = 200_000 @@ -130,6 +165,11 @@ class DisplayOptions: For large frames, this can be quite slow. max_info_rows and max_info_cols limit this null check only to frames with smaller dimensions than specified. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.max_info_rows = 100 # doctest: +SKIP """ memory_usage: bool = True @@ -138,19 +178,39 @@ class DisplayOptions: df.info() is called. Default True. Valid values True, False. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.memory_usage = False # doctest: +SKIP """ blob_display: bool = True """ If True, display the blob content in notebook DataFrame preview. Default True. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.blob_display = True # doctest: +SKIP """ blob_display_width: Optional[int] = None """ Width in pixels that the blob constrained to. Default None.. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.blob_display_width = 100 # doctest: +SKIP """ blob_display_height: Optional[int] = None """ Height in pixels that the blob constrained to. Default None.. + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.blob_display_height = 100 # doctest: +SKIP """