From 7bec4fa3dd770e4d5f66e66d5f58da0314dbbf69 Mon Sep 17 00:00:00 2001 From: Agriya Khetarpal <74401230+agriyakhetarpal@users.noreply.github.com> Date: Fri, 20 Dec 2024 08:26:00 +0530 Subject: [PATCH] GH-45071: [Packaging][Docs] Fix NumPy v2 include directory for Emscripten, and update Pyodide-related documentation (#45072) ### Rationale for this change This change would allow building PyArrow correctly with NumPy 1.X and NumPy 2.X, since we are trying to do the latter for pyodide/pyodide#4925. This PR closes gh-45071. ### What changes are included in this PR? This PR - issues a correction for the NumPy header files when building under Emscripten - updates Pyodide-related build instructions ### Are these changes tested? Yes, working here: https://github.com/agriyakhetarpal/pyodide-numpy-2.0-rebuilds/actions/runs/12399351376/job/34619554658#step:8:4547 via https://github.com/agriyakhetarpal/pyodide/commit/b651698ab31b9fe277437543dd483eb19aaa2462 that applies a subset of the changes as a patch (the CI job is failing for unrelated reasons, please ignore). ### Are there any user-facing changes? Yes, users trying to build a WASM wheel via Pyodide are now requested to use newer Pyodide and Emscripten versions, and the latest stable version of `pyodide-build` available. * GitHub Issue: #45071 Authored-by: Agriya Khetarpal <74401230+agriyakhetarpal@users.noreply.github.com> Signed-off-by: Sutou Kouhei --- ci/docker/conda-python-emscripten.dockerfile | 6 +++--- docs/source/developers/cpp/emscripten.rst | 16 ++++++++-------- python/CMakeLists.txt | 11 ++++++++++- 3 files changed, 21 insertions(+), 12 deletions(-) diff --git a/ci/docker/conda-python-emscripten.dockerfile b/ci/docker/conda-python-emscripten.dockerfile index 8ad705c920ba8..47ff550cd59ca 100644 --- a/ci/docker/conda-python-emscripten.dockerfile +++ b/ci/docker/conda-python-emscripten.dockerfile @@ -27,14 +27,14 @@ ARG required_python_min="(3,12)" # fail if python version < 3.12 RUN echo "check PYTHON>=${required_python_min}" && python -c "import sys;sys.exit(0 if sys.version_info>=${required_python_min} else 1)" -# install selenium and pyodide-build and recent python +# install selenium and recent pyodide-build and recent python # needs to be a login shell so ~/.profile is read SHELL ["/bin/bash", "--login", "-c", "-o", "pipefail"] RUN python -m pip install --no-cache-dir selenium==${selenium_version} && \ - python -m pip install --no-cache-dir --upgrade pyodide-build==${pyodide_version} - + python -m pip install --no-cache-dir --upgrade pyodide-build>=${pyodide_version} + # install pyodide dist directory to /pyodide RUN pyodide_dist_url="https://github.com/pyodide/pyodide/releases/download/${pyodide_version}/pyodide-${pyodide_version}.tar.bz2" && \ wget -q "${pyodide_dist_url}" -O- | tar -xj -C / diff --git a/docs/source/developers/cpp/emscripten.rst b/docs/source/developers/cpp/emscripten.rst index b4c563aae1a3b..bfa0c5bc35021 100644 --- a/docs/source/developers/cpp/emscripten.rst +++ b/docs/source/developers/cpp/emscripten.rst @@ -33,7 +33,9 @@ activate it using the commands below (see https://emscripten.org/docs/getting_st git clone https://github.com/emscripten-core/emsdk.git cd emsdk # replace with the desired EMSDK version. - # e.g. for Pyodide 0.24, you need EMSDK version 3.1.45 + # e.g. for Pyodide 0.26, you need EMSDK version 3.1.58 + # the versions can be found in the Makefile.envs file in the Pyodide repo: + # https://github.com/pyodide/pyodide/blob/10b484cfe427e076c929a55dc35cfff01ea8d3bc/Makefile.envs ./emsdk install ./emsdk activate source ./emsdk_env.sh @@ -46,8 +48,8 @@ versions of emsdk tools. .. code:: shell # install Pyodide build tools. - # e.g. for version 0.24 of Pyodide: - pip install pyodide-build==0.24 + # e.g., for version 0.26 of Pyodide, pyodide-build 0.26 and later work + pip install "pyodide-build>=0.26" Then build with the ``ninja-release-emscripten`` CMake preset, like below: @@ -69,8 +71,7 @@ go to ``arrow/python`` and run pyodide build It should make a wheel targeting the currently enabled version of -Pyodide (i.e. the version corresponding to the currently installed -``pyodide-build``) in the ``dist`` subdirectory. +Pyodide in the ``dist`` subdirectory. Manual Build @@ -85,9 +86,8 @@ you will need to override. In particular you will need: #. ``CMAKE_TOOLCHAIN_FILE`` set by using ``emcmake cmake`` instead of just ``cmake``. -#. You will quite likely need to set ``ARROW_ENABLE_THREADING`` to ``OFF`` - for builds targeting single threaded Emscripten environments such as - Pyodide. +#. You will need to set ``ARROW_ENABLE_THREADING`` to ``OFF`` for builds + targeting single-threaded Emscripten environments such as Pyodide. #. ``ARROW_FLIGHT`` and anything else that uses network probably won't work. diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index c39a1129ac17a..80d1cd31ac231 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -166,8 +166,17 @@ if($ENV{PYODIDE}) # modules (at least under Pyodide it does). set(Python3_INCLUDE_DIR $ENV{PYTHONINCLUDE}) set(Python3_LIBRARY $ENV{CPYTHONLIB}) - set(Python3_NumPy_INCLUDE_DIR $ENV{NUMPY_LIB}/core/include) set(Python3_EXECUTABLE) + execute_process(COMMAND ${Python3_EXECUTABLE} -c + "import numpy; print(numpy.__version__)" + OUTPUT_VARIABLE PYODIDE_NUMPY_VERSION + OUTPUT_STRIP_TRAILING_WHITESPACE) + string(REGEX MATCH "^([0-9]+)" PYODIDE_NUMPY_MAJOR_VERSION ${PYODIDE_NUMPY_VERSION}) + if(PYODIDE_NUMPY_MAJOR_VERSION GREATER_EQUAL 2) + set(Python3_NumPy_INCLUDE_DIR $ENV{NUMPY_LIB}/_core/include) + else() + set(Python3_NumPy_INCLUDE_DIR $ENV{NUMPY_LIB}/core/include) + endif() set(ENV{_PYTHON_SYSCONFIGDATA_NAME} $ENV{SYSCONFIG_NAME}) # we set the c and cxx compiler manually to bypass pywasmcross # which is pyodide's way of messing with C++ build parameters.