1616
1717from __future__ import absolute_import
1818
19- from multiprocessing import Process
2019import os
2120import pathlib
22- from pathlib import Path
2321import re
2422import shutil
2523from typing import Dict , List
4240 "third_party" ,
4341 "noxfile.py" ,
4442 "setup.py" ,
45- os .path .join ("scripts" , "benchmark" ),
4643]
4744
4845DEFAULT_PYTHON_VERSION = "3.10"
@@ -686,7 +683,7 @@ def notebook(session: nox.Session):
686683 "seaborn" ,
687684 )
688685
689- notebooks_list = list (Path ("notebooks/" ).glob ("*/*.ipynb" ))
686+ notebooks_list = list (pathlib . Path ("notebooks/" ).glob ("*/*.ipynb" ))
690687
691688 denylist = [
692689 # Regionalized testing is manually added later.
@@ -698,7 +695,7 @@ def notebook(session: nox.Session):
698695 # With the notebooks_fill_params.py script, we are able to find and
699696 # replace the PROJECT_ID parameter, but not the others.
700697 #
701- # TODO(ashleyxu ): Test these notebooks by replacing parameters with
698+ # TODO(b/357904266 ): Test these notebooks by replacing parameters with
702699 # appropriate values and omitting cleanup logic that may break
703700 # our test infrastructure.
704701 "notebooks/getting_started/ml_fundamentals_bq_dataframes.ipynb" , # Needs DATASET.
@@ -748,17 +745,6 @@ def notebook(session: nox.Session):
748745 for nb in notebooks + list (notebooks_reg ):
749746 assert os .path .exists (nb ), nb
750747
751- # TODO(shobs): For some reason --retries arg masks exceptions occurred in
752- # notebook failures, and shows unhelpful INTERNALERROR. Investigate that
753- # and enable retries if we can find a way to surface the real exception
754- # bacause the notebook is running against real GCP and something may fail
755- # due to transient issues.
756- pytest_command = [
757- "py.test" ,
758- "--nbmake" ,
759- "--nbmake-timeout=900" , # 15 minutes
760- ]
761-
762748 try :
763749 # Populate notebook parameters and make a backup so that the notebooks
764750 # are runnable.
@@ -767,22 +753,23 @@ def notebook(session: nox.Session):
767753 CURRENT_DIRECTORY / "scripts" / "notebooks_fill_params.py" ,
768754 * notebooks ,
769755 )
770-
771- # Run notebooks in parallel session.run's, since each notebook
772- # takes an environment variable for performance logging
773- processes = []
774756 for notebook in notebooks :
775- process = Process (
776- target = session .run ,
777- args = (* pytest_command , notebook ),
778- kwargs = {"env" : {LOGGING_NAME_ENV_VAR : os .path .basename (notebook )}},
757+ session .run (
758+ "python" ,
759+ "scripts/run_and_publish_benchmark.py" ,
760+ "--notebook" ,
761+ f"--benchmark-path={ notebook } " ,
779762 )
780- process .start ()
781- processes .append (process )
782-
783- for process in processes :
784- process .join ()
785763
764+ for notebook , regions in notebooks_reg .items ():
765+ for region in regions :
766+ session .run (
767+ "python" ,
768+ "scripts/run_and_publish_benchmark.py" ,
769+ "--notebook" ,
770+ f"--benchmark-path={ notebook } " ,
771+ f"--region={ region } " ,
772+ )
786773 finally :
787774 # Prevent our notebook changes from getting checked in to git
788775 # accidentally.
@@ -791,116 +778,37 @@ def notebook(session: nox.Session):
791778 CURRENT_DIRECTORY / "scripts" / "notebooks_restore_from_backup.py" ,
792779 * notebooks ,
793780 )
794-
795- # Additionally run regionalized notebooks in parallel session.run's.
796- # Each notebook takes a different region via env param.
797- processes = []
798- for notebook , regions in notebooks_reg .items ():
799- for region in regions :
800- process = Process (
801- target = session .run ,
802- args = (* pytest_command , notebook ),
803- kwargs = {
804- "env" : {
805- "BIGQUERY_LOCATION" : region ,
806- LOGGING_NAME_ENV_VAR : os .path .basename (notebook ),
807- }
808- },
809- )
810- process .start ()
811- processes .append (process )
812-
813- for process in processes :
814- process .join ()
815-
816- # when the environment variable is set as it is above,
817- # notebooks output a .bytesprocessed and .slotmillis report
818- # collect those reports and print a summary
819- _print_performance_report ("notebooks/" )
781+ session .run (
782+ "python" ,
783+ "scripts/run_and_publish_benchmark.py" ,
784+ "--notebook" ,
785+ "--publish-benchmarks=notebooks/" ,
786+ )
820787
821788
822789@nox .session (python = DEFAULT_PYTHON_VERSION )
823790def benchmark (session : nox .Session ):
824791 session .install ("-e" , ".[all]" )
825- base_path = os .path .join ("scripts" , "benchmark" )
826-
827- benchmark_script_list = list (Path (base_path ).rglob ("*.py" ))
828- # Run benchmarks in parallel session.run's, since each benchmark
829- # takes an environment variable for performance logging
830- processes = []
831- for benchmark in benchmark_script_list :
832- process = Process (
833- target = session .run ,
834- args = ("python" , benchmark ),
835- kwargs = {"env" : {LOGGING_NAME_ENV_VAR : benchmark .as_posix ()}},
836- )
837- process .start ()
838- processes .append (process )
839-
840- for process in processes :
841- process .join ()
842-
843- # when the environment variable is set as it is above,
844- # notebooks output a .bytesprocessed and .slotmillis report
845- # collect those reports and print a summary
846- _print_performance_report (base_path )
792+ base_path = os .path .join ("tests" , "benchmark" )
847793
794+ benchmark_script_list = list (pathlib .Path (base_path ).rglob ("*.py" ))
848795
849- def _print_performance_report (path : str ):
850- """Add an informational report about http queries, bytes
851- processed, and slot time to the testlog output for purposes
852- of measuring bigquery-related performance changes.
853-
854- Looks specifically for output files in subfolders of the
855- passed path. (*/*.bytesprocessed and */*.slotmillis)
856- """
857- print ("---BIGQUERY USAGE REPORT---" )
858- results_dict = {}
859- bytes_reports = sorted (Path (path ).rglob ("*.bytesprocessed" ))
860- for bytes_report in bytes_reports :
861- with open (bytes_report , "r" ) as bytes_file :
862- filename = bytes_report .relative_to (path ).with_suffix ("" )
863- lines = bytes_file .read ().splitlines ()
864- query_count = len (lines )
865- total_bytes = sum ([int (line ) for line in lines ])
866- results_dict [filename ] = [query_count , total_bytes ]
867- os .remove (bytes_report )
868-
869- millis_reports = sorted (Path (path ).rglob ("*.slotmillis" ))
870- for millis_report in millis_reports :
871- with open (millis_report , "r" ) as millis_file :
872- filename = millis_report .relative_to (path ).with_suffix ("" )
873- lines = millis_file .read ().splitlines ()
874- total_slot_millis = sum ([int (line ) for line in lines ])
875- results_dict [filename ] += [total_slot_millis ]
876- os .remove (millis_report )
877-
878- cumulative_queries = 0
879- cumulative_bytes = 0
880- cumulative_slot_millis = 0
881- for name , results in results_dict .items ():
882- if len (results ) != 3 :
883- raise IOError (
884- "Mismatch in performance logging output. "
885- "Expected one .bytesprocessed and one .slotmillis "
886- "file for each notebook."
796+ try :
797+ for benchmark in benchmark_script_list :
798+ if benchmark .name in ("__init__.py" , "utils.py" ):
799+ continue
800+ session .run (
801+ "python" ,
802+ "scripts/run_and_publish_benchmark.py" ,
803+ f"--benchmark-path={ benchmark } " ,
887804 )
888- query_count , total_bytes , total_slot_millis = results
889- cumulative_queries += query_count
890- cumulative_bytes += total_bytes
891- cumulative_slot_millis += total_slot_millis
892- print (
893- f"{ name } - query count: { query_count } ,"
894- f" bytes processed sum: { total_bytes } ,"
895- f" slot millis sum: { total_slot_millis } "
805+ finally :
806+ session .run (
807+ "python" ,
808+ "scripts/run_and_publish_benchmark.py" ,
809+ f"--publish-benchmarks={ base_path } " ,
896810 )
897811
898- print (
899- f"---total queries: { cumulative_queries } , "
900- f"total bytes: { cumulative_bytes } , "
901- f"total slot millis: { cumulative_slot_millis } ---"
902- )
903-
904812
905813@nox .session (python = "3.10" )
906814def release_dry_run (session ):
0 commit comments