diff --git a/paddle/scripts/paddle_build.sh b/paddle/scripts/paddle_build.sh index 01d34ee0cb018..21a1a4f9fbc16 100644 --- a/paddle/scripts/paddle_build.sh +++ b/paddle/scripts/paddle_build.sh @@ -3140,10 +3140,10 @@ function exec_samplecode_test() { cd ${PADDLE_ROOT}/tools if [ "$1" = "cpu" ] ; then - python sampcd_processor.py --debug cpu; example_error=$? + python sampcd_processor.py --debug --mode cpu; example_error=$? elif [ "$1" = "gpu" ] ; then SAMPLE_CODE_EXEC_THREADS=${SAMPLE_CODE_EXEC_THREADS:-2} - python sampcd_processor.py --threads=${SAMPLE_CODE_EXEC_THREADS} --debug gpu; example_error=$? + python sampcd_processor.py --threads=${SAMPLE_CODE_EXEC_THREADS} --debug --mode gpu; example_error=$? fi if [ "$example_error" != "0" ];then echo "Code instance execution failed" >&2 diff --git a/tools/sampcd_processor.py b/tools/sampcd_processor.py index f1464a222f5db..ab53cc6121c05 100644 --- a/tools/sampcd_processor.py +++ b/tools/sampcd_processor.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,486 +13,440 @@ # limitations under the License. """ please make sure to run in the tools path -usage: python sample_test.py {cpu or gpu} +usage: python sampcd_processor.py --mode {cpu or gpu} {cpu or gpu}: running in cpu version or gpu version for example, you can run cpu version testing like this: - python sampcd_processor.py cpu + python sampcd_processor.py --mode cpu """ + +import functools import logging import multiprocessing import os import platform +import queue import re -import shutil -import subprocess import sys +import threading import time +import typing -from sampcd_processor_utils import ENV_KEY_TEST_CAPACITY # noqa: F401 +import xdoctest from sampcd_processor_utils import ( - API_DIFF_SPEC_FN, - extract_code_blocks_from_docstr, - get_full_api_from_pr_spec, - get_incrementapi, + TEST_TIMEOUT, + DocTester, + TestResult, + logger, parse_args, run_doctest, ) -from sampcd_processor_xdoctest import Xdoctester - -logger = logging.getLogger() -if logger.handlers: - console = logger.handlers[ - 0 - ] # we assume the first handler is the one we want to configure -else: - console = logging.StreamHandler(stream=sys.stderr) - logger.addHandler(console) -console.setFormatter(logging.Formatter("%(message)s")) - -RUN_ON_DEVICE = 'cpu' -SAMPLE_CODE_TEST_CAPACITY = set() -GPU_ID = 0 -whl_error = [] -SAMPLECODE_TEMPDIR = 'samplecode_temp' -ENV_KEY_CODES_FRONTEND = 'CODES_INSERTED_INTO_FRONTEND' -SUMMARY_INFO = { - 'success': [], - 'failed': [], - 'skiptest': [], - 'nocodes': [], - # ... required not-match + +XDOCTEST_CONFIG = { + "global_exec": r"\n".join( + [ + "import paddle", + "paddle.device.set_device('cpu')", + "paddle.set_default_dtype('float32')", + "paddle.disable_static()", + ] + ), + "default_runtime_state": {"IGNORE_WHITESPACE": True}, } -def find_all(srcstr, substr): - """ - to find all desired substring in the source string - and return their starting indices as a list - - Args: - srcstr(str): the parent string - substr(str): substr - - Returns: - list: a list of the indices of the substrings - found - """ - indices = [] - gotone = srcstr.find(substr) - while gotone != -1: - indices.append(gotone) - gotone = srcstr.find(substr, gotone + 1) - return indices - - -def find_last_future_line_end(cbstr): - """ - find the last `__future__` line. - - Args: - docstr(str): docstring - Return: - index of the line end or None. - """ - pat = re.compile('__future__.*\n') - lastmo = None - it = re.finditer(pat, cbstr) - while True: - try: - lastmo = next(it) - except StopIteration: - break - if lastmo: - return lastmo.end() - else: - return None - - -def get_test_capacity(): - """ - collect capacities and set to SAMPLE_CODE_TEST_CAPACITY - """ - global SAMPLE_CODE_TEST_CAPACITY # write - global ENV_KEY_TEST_CAPACITY, RUN_ON_DEVICE # readonly - if ENV_KEY_TEST_CAPACITY in os.environ: - for r in os.environ[ENV_KEY_TEST_CAPACITY].split(','): - rr = r.strip().lower() - if r: - SAMPLE_CODE_TEST_CAPACITY.add(rr) - if 'cpu' not in SAMPLE_CODE_TEST_CAPACITY: - SAMPLE_CODE_TEST_CAPACITY.add('cpu') - - if RUN_ON_DEVICE: - SAMPLE_CODE_TEST_CAPACITY.add(RUN_ON_DEVICE) - - -def is_required_match(requirestr, cbtitle='not-specified'): - """ - search the required instruction in the code-block, and check it match the current running environment. - - environment values of equipped: cpu, gpu, xpu, distributed, skip - the 'skip' is the special flag to skip the test, so is_required_match will return False directly. - - Args: - requirestr(str): the required string. - cbtitle(str): the title of the code-block. - returns: - True - yes, matched - False - not match - None - skipped # trick - """ - global SAMPLE_CODE_TEST_CAPACITY, RUN_ON_DEVICE # readonly - requires = {'cpu'} - if requirestr: - for r in requirestr.split(','): - rr = r.strip().lower() - if rr: - requires.add(rr) - else: - requires.add(RUN_ON_DEVICE) - if 'skip' in requires or 'skiptest' in requires: - logger.info('%s: skipped', cbtitle) - return None - - if all( - k in SAMPLE_CODE_TEST_CAPACITY - for k in requires - if k not in ['skip', 'skiptest'] - ): - return True +def _patch_global_state(debug, verbose): + # patch xdoctest global_state + from xdoctest import global_state - logger.info( - '%s: the equipments [%s] not match the required [%s].', - cbtitle, - ','.join(SAMPLE_CODE_TEST_CAPACITY), - ','.join(requires), - ) - return False - - -def insert_codes_into_codeblock(codeblock, apiname='not-specified'): - """ - insert some codes in the frontend and backend into the code-block. - """ - global ENV_KEY_CODES_FRONTEND, GPU_ID, RUN_ON_DEVICE # readonly - inserted_codes_f = '' - inserted_codes_b = '' - if ( - ENV_KEY_CODES_FRONTEND in os.environ - and os.environ[ENV_KEY_CODES_FRONTEND] - ): - inserted_codes_f = os.environ[ENV_KEY_CODES_FRONTEND] - else: - cpu_str = '\nimport os\nos.environ["CUDA_VISIBLE_DEVICES"] = ""\n' - gpu_str = ( - '\nimport os\nos.environ["CUDA_VISIBLE_DEVICES"] = "{}"\n'.format( - GPU_ID - ) - ) - if 'required' in codeblock and codeblock['required']: - if codeblock['required'] == 'cpu': - inserted_codes_f = cpu_str - elif codeblock['required'] == 'gpu': - inserted_codes_f = gpu_str - else: - if RUN_ON_DEVICE == "cpu": - inserted_codes_f = cpu_str - elif RUN_ON_DEVICE == "gpu": - inserted_codes_f = gpu_str - inserted_codes_b = '\nprint("{}\'s sample code (name:{}, id:{}) is executed successfully!")'.format( - apiname, codeblock['name'], codeblock['id'] + _debug_xdoctest = debug and verbose > 2 + global_state.DEBUG = _debug_xdoctest + global_state.DEBUG_PARSER = global_state.DEBUG_PARSER and _debug_xdoctest + global_state.DEBUG_CORE = global_state.DEBUG_CORE and _debug_xdoctest + global_state.DEBUG_RUNNER = global_state.DEBUG_RUNNER and _debug_xdoctest + global_state.DEBUG_DOCTEST = global_state.DEBUG_DOCTEST and _debug_xdoctest + + +def _patch_tensor_place(): + from xdoctest import checker + + pattern_tensor = re.compile( + r""" + (Tensor\(.*?place=) # Tensor start + (.*?) # Place=(XXX) + (\,.*?\)) + """, + re.X | re.S, ) - cb = codeblock['codes'] - last_future_line_end = find_last_future_line_end(cb) - if last_future_line_end: - return ( - cb[:last_future_line_end] - + inserted_codes_f - + cb[last_future_line_end:] - + inserted_codes_b + _check_output = checker.check_output + + def check_output(got, want, runstate=None): + if not want: # nocover + return True + + return _check_output( + got=pattern_tensor.sub(r'\1Place(cpu)\3', got), + want=pattern_tensor.sub(r'\1Place(cpu)\3', want), + runstate=runstate, ) - else: - return inserted_codes_f + cb + inserted_codes_b - - -def is_ps_wrapped_codeblock(codeblock): - """If the codeblock is wrapped by PS1(>>> ), - we skip test and use xdoctest instead. - """ - codes = codeblock['codes'] - match_obj = re.search(r"\n>>>\s?", "\n" + codes) - return match_obj is not None - - -def sampcd_extract_to_file(srccom, name, htype="def", hname=""): - """ - Extract sample codes from __doc__, and write them to files. - - Args: - srccom(str): the source comment of some API whose - example codes will be extracted and run. - name(str): the name of the API. - htype(str): the type of hint banners, def/class/method. - hname(str): the name of the hint banners , e.t. def hname. - - Returns: - sample_code_filenames(list of str) - """ - global GPU_ID, RUN_ON_DEVICE, SAMPLECODE_TEMPDIR # readonly - global SUMMARY_INFO # update - - codeblocks = extract_code_blocks_from_docstr(srccom) - if len(codeblocks) == 0: - SUMMARY_INFO['nocodes'].append(name) - # detect sample codes using >>> to format and consider this situation as wrong - logger.info(htype + " name:" + name) - logger.info("-----------------------") - if srccom.find("Examples:") != -1: - logger.info("----example code check----") - if srccom.find(">>>") != -1: - logger.warning( - r"""Deprecated sample code style: - Examples: - >>>codeline - >>>codeline - -Please use '.. code-block:: python' to format the sample code.""" + + checker.check_output = check_output + + +def _patch_float_precision(digits): + from xdoctest import checker + + pattern_number = re.compile( + r""" + (?: + (?<=[\s*\[\(\'\"\:]) # number starts + (?: # int/float or complex-real + (?: + [+-]? + (?: + (?: \d*\.\d+) | (?: \d+\.?) # int/float + ) ) - return [] - else: - logger.error( - "Error: No sample code found! Please check if the API comment contais string 'Examples:' correctly" + (?:[Ee][+-]?\d+)? ) - return [] + (?: # complex-imag + (?: + (?: + [+-]? + (?: + (?: \d*\.\d+) | (?: \d+\.?) + ) + ) + (?:[Ee][+-]?\d+)? + ) + (?:[Jj]) + )? + ) + """, + re.X | re.S, + ) - sample_code_filenames = [] - for y, cb in enumerate(codeblocks): - if is_ps_wrapped_codeblock(cb): - SUMMARY_INFO['skiptest'].append("{}-{}".format(name, cb['id'])) - logger.info( - '{}\' code block (name:{}, id:{}) is wrapped by PS1(>>> ), which will be tested by xdoctest.'.format( - name, cb['name'], cb['id'] + _check_output = checker.check_output + + def _sub_number(match_obj, digits): + match_str = match_obj.group() + + if 'j' in match_str or 'J' in match_str: + try: + match_num = complex(match_str) + except ValueError: + return match_str + + return ( + str( + complex( + round(match_num.real, digits), + round(match_num.imag, digits), + ) ) + .strip('(') + .strip(')') ) - continue - - matched = is_required_match(cb['required'], name) - # matched has three states: - # True - please execute it; - # None - no sample code found; - # False - it need other special equipment or environment. - # so, the following conditional statements are intentionally arranged. - if matched: - tfname = os.path.join( - SAMPLECODE_TEMPDIR, - '{}_example{}'.format( - name, - '.py' if len(codeblocks) == 1 else f'_{y + 1}.py', - ), + else: + try: + return str(round(float(match_str), digits)) + except ValueError: + return match_str + + sub_number = functools.partial(_sub_number, digits=digits) + + def check_output(got, want, runstate=None): + if not want: # nocover + return True + + return _check_output( + got=pattern_number.sub(sub_number, got), + want=pattern_number.sub(sub_number, want), + runstate=runstate, + ) + + checker.check_output = check_output + + +class Directive: + """Base class of global direvtives just for `xdoctest`.""" + + pattern: typing.Pattern + + def parse_directive(self, docstring: str) -> typing.Tuple[str, typing.Any]: + pass + + +class TimeoutDirective(Directive): + pattern = re.compile( + r""" + (?: + (?: + \s*\>{3}\s*\#\s*x?doctest\:\s* ) - with open(tfname, 'w') as tempf: - sampcd = insert_codes_into_codeblock(cb, name) - tempf.write(sampcd) - sample_code_filenames.append(tfname) - elif matched is None: - logger.info( - '{}\' code block (name:{}, id:{}) is skipped.'.format( - name, cb['name'], cb['id'] - ) + (?P[\+\-]) + (?: + TIMEOUT ) - SUMMARY_INFO['skiptest'].append("{}-{}".format(name, cb['id'])) - elif not matched: - logger.info( - '{}\' code block (name:{}, id:{}) required({}) not match capacity({}).'.format( - name, - cb['name'], - cb['id'], - cb['required'], - SAMPLE_CODE_TEST_CAPACITY, - ) + \( + (?P