From 02f59daf078af5b54c020a04a4db9b02253e2f64 Mon Sep 17 00:00:00 2001 From: Ricardo Wurmus Date: Fri, 7 Nov 2025 17:45:02 +0100 Subject: [PATCH] gnu: python-pytorch-for-r-torch: Update to 2.7.1. * gnu/packages/patches/python-pytorch-for-r-torch-fix-codegen.patch, gnu/packages/patches/python-pytorch-for-r-torch-system-libraries.patch: Update. * gnu/packages/patches/python-pytorch-for-r-torch-without-kineto.patch: New file. * gnu/local.mk (dist_patch_DATA): Record it. * gnu/packages/machine-learning.scm (python-pytorch-for-r-torch): Update to 2.7.1. [source]: Use new patch. [arguments]: Remove phase 'fix-aten-vec; copy and adjust 'use-system-libraries phase from python-pytorch. [inputs]: Inherit all from python-pytorch; replace gloo with gloo-for-r-torch. [native-inputs]: Inherit all from python-pytorch. [propagated-inputs]: Inherit all from python-pytorch. Change-Id: Ib2cf511fc34f609bbc7e92971720b00c4523419f --- gnu/local.mk | 1 + gnu/packages/machine-learning.scm | 89 +++-- ...thon-pytorch-for-r-torch-fix-codegen.patch | 59 ++-- ...pytorch-for-r-torch-system-libraries.patch | 332 ++++++++++-------- ...n-pytorch-for-r-torch-without-kineto.patch | 64 ++++ 5 files changed, 353 insertions(+), 192 deletions(-) create mode 100644 gnu/packages/patches/python-pytorch-for-r-torch-without-kineto.patch diff --git a/gnu/local.mk b/gnu/local.mk index ed4881a83b5..bf605632252 100644 --- a/gnu/local.mk +++ b/gnu/local.mk @@ -2176,6 +2176,7 @@ dist_patch_DATA = \ %D%/packages/patches/python-pytorch-fix-codegen.patch \ %D%/packages/patches/python-pytorch-for-r-torch-fix-codegen.patch \ %D%/packages/patches/python-pytorch-for-r-torch-system-libraries.patch \ + %D%/packages/patches/python-pytorch-for-r-torch-without-kineto.patch \ %D%/packages/patches/python-pytorch-runpath.patch \ %D%/packages/patches/python-pytorch-system-libraries.patch \ %D%/packages/patches/python-pytorch-without-kineto.patch \ diff --git a/gnu/packages/machine-learning.scm b/gnu/packages/machine-learning.scm index 68ff3c460a4..9dd69f742a6 100644 --- a/gnu/packages/machine-learning.scm +++ b/gnu/packages/machine-learning.scm @@ -4909,7 +4909,7 @@ in the audio domain.") (delete 'disable-avx-dependencies))))) (supported-systems '("x86_64-linux")))) -(define %python-pytorch-for-r-torch-version "2.0.1") +(define %python-pytorch-for-r-torch-version "2.7.1") (define %python-pytorch-for-r-torch-src (origin @@ -4921,11 +4921,11 @@ in the audio domain.") %python-pytorch-for-r-torch-version)) (sha256 (base32 - "0iirrn687i7sfv0p0i7dn89x3rf13a7l8y1y5h190h51yjxpxqxa")) + "0734kfm66hsqdzgs2s4wj5yagvifijbgb0c5wfmp3qcdrraa9x57")) (patches (search-patches "python-pytorch-for-r-torch-system-libraries.patch" "python-pytorch-runpath.patch" - "python-pytorch-without-kineto.patch" + "python-pytorch-for-r-torch-without-kineto.patch" ;; Some autogeneration scripts depend on the ;; compile PyTorch library. Therefore, we create ;; dummy versions which are regenerated later. @@ -4952,34 +4952,73 @@ in the audio domain.") (name "python-pytorch") (version %python-pytorch-for-r-torch-version) (source %python-pytorch-for-r-torch-src) + (inputs + (modify-inputs (package-inputs python-pytorch) + (replace "gloo" gloo-for-r-torch))) (arguments (substitute-keyword-arguments (package-arguments python-pytorch) ((#:phases phases) #~(modify-phases #$phases - ;; See https://github.com/pytorch/pytorch/issues/61244 - (add-after 'unpack 'fix-aten-vec + (replace 'use-system-libraries (lambda _ + (for-each + (lambda (file) + ;; Check whether the files exist for the + ;; python-pytorch-for-r-torch package + (when (file-exists? file) + (substitute* file + (("\"miniz\\.h\"") "") + (("") "")))) + '("caffe2/serialize/crc.cc" + "caffe2/serialize/inline_container.cc" + "torch/csrc/inductor/aoti_package/model_package_loader.cpp")) + + (substitute* "aten/src/ATen/native/vulkan/api/Allocator.h" + (("") + "")) + ;; Fix missing header for std::for_each in Vulkan API + (substitute* "aten/src/ATen/native/vulkan/api/QueryPool.cpp" + (("#include " all) + (string-append all "\n#include "))) + ;; For Vulkan + (substitute* "CMakeLists.txt" + (("append_cxx_flag.*-Werror=(return-type|range-loop-construct).*") "")) (substitute* - '("aten/src/ATen/cpu/vec/vec512/vec512_bfloat16.h" - "aten/src/ATen/cpu/vec/vec256/vec256_bfloat16.h") - (("map\\(const __") "map(__")))))))) - (native-inputs - (modify-inputs (package-native-inputs python-pytorch) - (replace "ideep-pytorch" ideep-pytorch-for-r-torch))) - (inputs - (modify-inputs (package-inputs python-pytorch) - (prepend foxi) - (prepend qnnpack) - (replace "qnnpack-pytorch" qnnpack-pytorch-for-r-torch) - (replace "oneapi-dnnl" oneapi-dnnl-for-r-torch) - (replace "xnnpack" xnnpack-for-r-torch))) - (propagated-inputs - (modify-inputs (package-propagated-inputs python-pytorch) - (append python-filelock - python-jinja2 - python-networkx - python-opt-einsum - python-sympy))))) + (cons* + "torch/csrc/Module.cpp" + (map + (lambda (name) + (string-append + "torch/utils/benchmark/utils/valgrind_wrapper/" + name)) + '("compat_bindings.cpp" "timer_callgrind_template.cpp"))) + (("") "")) + (setenv "USE_VULKAN" "1") + ;; Tell 'setup.py' to let 'CMakeLists.txt' know that we + ;; want to use "system libraries" instead of the bundled + ;; ones. + (setenv "USE_SYSTEM_LIBS" "1") + ;; For oneDNN + (setenv "USE_MKLDNN" "1") + ;; Only works with CUPTI + (setenv "USE_KINETO" "0") + ;; Prevent CMake error by disabling explicitely + (setenv "USE_ITT" "0") + ;; Disable on unsupported systems + (if #$(not (member + (or (%current-target-system) + (%current-system)) + (package-transitive-supported-systems qnnpack))) + (setenv "USE_QNNPACK" "0")) + (substitute* '("requirements.txt" "setup.py") + (("sympy>=1\\.13\\.3") + "sympy>=1.13.1")))) + (replace 'skip-nccl-call + (lambda _ + ;; Comment-out `checkout_nccl()` invokation in build_pytorch(). + (substitute* "tools/build_pytorch_libs.py" + (("^[[:blank:]]*checkout_nccl\\(\\)" all) + (string-append "# " all "\n pass"))))))))))) (define-public python-pytorch-geometric (package diff --git a/gnu/packages/patches/python-pytorch-for-r-torch-fix-codegen.patch b/gnu/packages/patches/python-pytorch-for-r-torch-fix-codegen.patch index 8515e5ab13a..3862339b141 100644 --- a/gnu/packages/patches/python-pytorch-for-r-torch-fix-codegen.patch +++ b/gnu/packages/patches/python-pytorch-for-r-torch-fix-codegen.patch @@ -6,7 +6,7 @@ is later corrected. codegen_external.py is patched to avoid duplicate functions and add the static keyword as in the existing generated file. diff --git a/tools/gen_flatbuffers.sh b/tools/gen_flatbuffers.sh -index cc0263dbbf..ac34e84b82 100644 +index cc0263dbb..ac34e84b8 100644 --- a/tools/gen_flatbuffers.sh +++ b/tools/gen_flatbuffers.sh @@ -1,13 +1,13 @@ @@ -32,10 +32,10 @@ index cc0263dbbf..ac34e84b82 100644 -c "$ROOT/torch/csrc/jit/serialization/mobile_bytecode.fbs" echo '// @generated' >> "$ROOT/torch/csrc/jit/serialization/mobile_bytecode_generated.h" diff --git a/torch/csrc/jit/tensorexpr/codegen_external.py b/torch/csrc/jit/tensorexpr/codegen_external.py -index 120520b139..0c8587f02d 100644 +index 5dcf1b284..0e20b0c10 100644 --- a/torch/csrc/jit/tensorexpr/codegen_external.py +++ b/torch/csrc/jit/tensorexpr/codegen_external.py -@@ -16,9 +16,14 @@ def gen_external(native_functions_path, tags_path, external_path): +@@ -21,9 +21,14 @@ def gen_external(native_functions_path, tags_path, external_path): native_functions = parse_native_yaml(native_functions_path, tags_path) func_decls = [] func_registrations = [] @@ -51,7 +51,7 @@ index 120520b139..0c8587f02d 100644 args = schema.arguments # Only supports extern calls for functions with out variants if not schema.is_out_fn(): -@@ -48,7 +53,7 @@ def gen_external(native_functions_path, tags_path, external_path): +@@ -63,7 +68,7 @@ def gen_external(native_functions_path, tags_path, external_path): # print(tensor_decls, name, arg_names) func_decl = f"""\ @@ -61,7 +61,7 @@ index 120520b139..0c8587f02d 100644 void** buf_data, int64_t* buf_ranks, diff --git a/torchgen/decompositions/gen_jit_decompositions.py b/torchgen/decompositions/gen_jit_decompositions.py -index 7cfbb803f9..2e69bb1868 100644 +index b42948045..e1cfc73a5 100644 --- a/torchgen/decompositions/gen_jit_decompositions.py +++ b/torchgen/decompositions/gen_jit_decompositions.py @@ -1,8 +1,12 @@ @@ -76,9 +76,9 @@ index 7cfbb803f9..2e69bb1868 100644 +else: + decomposition_table = {} - # from torchgen.code_template import CodeTemplate -@@ -85,7 +89,7 @@ def write_decomposition_util_file(path: str) -> None: + # from torchgen.code_template import CodeTemplate +@@ -86,7 +90,7 @@ def write_decomposition_util_file(path: str) -> None: def main() -> None: @@ -88,40 +88,41 @@ index 7cfbb803f9..2e69bb1868 100644 write_decomposition_util_file(str(upgrader_path)) diff --git a/torchgen/operator_versions/gen_mobile_upgraders.py b/torchgen/operator_versions/gen_mobile_upgraders.py -index e5287cffc5..57f3c38096 100644 +index 845034cb7..a1c5767c2 100644 --- a/torchgen/operator_versions/gen_mobile_upgraders.py +++ b/torchgen/operator_versions/gen_mobile_upgraders.py -@@ -2,10 +2,12 @@ - import os +@@ -6,10 +6,13 @@ import os from enum import Enum + from operator import itemgetter from pathlib import Path +import sys - from typing import Any, Dict, List + from typing import Any -import torch -from torch.jit.generate_bytecode import generate_upgraders_bytecode +if len(sys.argv) < 2 or sys.argv[1] != "dummy": + import torch + from torch.jit.generate_bytecode import generate_upgraders_bytecode - ++ from torchgen.code_template import CodeTemplate from torchgen.operator_versions.gen_mobile_upgraders_constant import ( -@@ -262,7 +264,10 @@ def construct_register_size(register_size_from_yaml: int) -> str: + MOBILE_UPGRADERS_HEADER_DESCRIPTION, +@@ -263,7 +266,10 @@ def construct_register_size(register_size_from_yaml: int) -> str: def construct_version_maps( - upgrader_bytecode_function_to_index_map: Dict[str, Any] + upgrader_bytecode_function_to_index_map: dict[str, Any], ) -> str: - version_map = torch._C._get_operator_version_map() + if len(sys.argv) < 2 or sys.argv[1] != "dummy": + version_map = torch._C._get_operator_version_map() + else: + version_map = {} - sorted_version_map_ = sorted(version_map.items(), key=lambda item: item[0]) # type: ignore[no-any-return] - sorted_version_map = {name: lst for name, lst in sorted_version_map_} + sorted_version_map_ = sorted(version_map.items(), key=itemgetter(0)) # type: ignore[no-any-return] + sorted_version_map = dict(sorted_version_map_) + +@@ -375,7 +381,10 @@ def sort_upgrader(upgrader_list: list[dict[str, Any]]) -> list[dict[str, Any]]: -@@ -379,7 +384,10 @@ def sort_upgrader(upgrader_list: List[Dict[str, Any]]) -> List[Dict[str, Any]]: def main() -> None: - - upgrader_list = generate_upgraders_bytecode() + if len(sys.argv) < 2 or sys.argv[1] != "dummy": + upgrader_list = generate_upgraders_bytecode() @@ -131,16 +132,24 @@ index e5287cffc5..57f3c38096 100644 for up in sorted_upgrader_list: print("after sort upgrader : ", next(iter(up))) diff --git a/torchgen/shape_functions/gen_jit_shape_functions.py b/torchgen/shape_functions/gen_jit_shape_functions.py -index c6336a6951..34e394d818 100644 +index 56a3d8bf0..ffd0785fd 100644 --- a/torchgen/shape_functions/gen_jit_shape_functions.py +++ b/torchgen/shape_functions/gen_jit_shape_functions.py -@@ -18,16 +18,20 @@ you are in the root directory of the Pytorch git repo""" +@@ -1,6 +1,7 @@ + #!/usr/bin/env python3 + import os + import sys ++import importlib + from importlib.util import module_from_spec, spec_from_file_location + from itertools import chain + from pathlib import Path +@@ -18,17 +19,21 @@ you are in the root directory of the Pytorch git repo""" if not file_path.exists(): - raise Exception(err_msg) + raise Exception(err_msg) # noqa: TRY002 --spec = importlib.util.spec_from_file_location(module_name, file_path) +-spec = spec_from_file_location(module_name, file_path) -assert spec is not None --module = importlib.util.module_from_spec(spec) +-module = module_from_spec(spec) -sys.modules[module_name] = module -assert spec.loader is not None -assert module is not None @@ -148,6 +157,7 @@ index c6336a6951..34e394d818 100644 - -bounded_compute_graph_mapping = module.bounded_compute_graph_mapping -shape_compute_graph_mapping = module.shape_compute_graph_mapping +- +if len(sys.argv) < 2 or sys.argv[1] != "dummy": + spec = importlib.util.spec_from_file_location(module_name, file_path) + assert spec is not None @@ -159,9 +169,10 @@ index c6336a6951..34e394d818 100644 + + bounded_compute_graph_mapping = module.bounded_compute_graph_mapping + shape_compute_graph_mapping = module.shape_compute_graph_mapping ++ +else: + bounded_compute_graph_mapping = {} + shape_compute_graph_mapping = {} - SHAPE_HEADER = r""" + /** diff --git a/gnu/packages/patches/python-pytorch-for-r-torch-system-libraries.patch b/gnu/packages/patches/python-pytorch-for-r-torch-system-libraries.patch index e5d647f70d2..39e46db00f1 100644 --- a/gnu/packages/patches/python-pytorch-for-r-torch-system-libraries.patch +++ b/gnu/packages/patches/python-pytorch-for-r-torch-system-libraries.patch @@ -1,16 +1,14 @@ Patch build files to also system libraries instead of bundled ones for the libraries not supported or working only by specifying USE_SYSTEM_LIBS. This includes using the clog, cpuinfo, fbgemm, foxi, fp16, fxdiv, googletest, -ideep, miniz, nnpack, oneapi-dnnl, pocketfft, pthreadpool, qnnpack, +httlib, ideep, miniz, nnpack, oneapi-dnnl, pocketfft, pthreadpool, qnnpack-pytorch, tensorpipe, valgrind and xnnpack packages. -For QNNPACK, two versions were bundled and are required: The upstream one and -an internal fork (now in the package qnnpack-pytorch). diff --git a/aten/src/ATen/CMakeLists.txt b/aten/src/ATen/CMakeLists.txt -index 96fc297..7f27b66 100644 +index 085af373e..3287429b4 100644 --- a/aten/src/ATen/CMakeLists.txt +++ b/aten/src/ATen/CMakeLists.txt -@@ -362,9 +362,9 @@ if(AT_NNPACK_ENABLED) +@@ -468,9 +468,9 @@ if(AT_NNPACK_ENABLED) list(APPEND ATen_CPU_DEPENDENCY_LIBS nnpack) # cpuinfo is added below endif() @@ -21,13 +19,13 @@ index 96fc297..7f27b66 100644 + list(APPEND ATen_CPU_DEPENDENCY_LIBS DNNL::dnnl) +endif(USE_MKLDNN) - list(APPEND ATen_CPU_DEPENDENCY_LIBS cpuinfo) - + if(USE_MKLDNN_ACL) + list(APPEND ATen_CPU_INCLUDE ${ACL_INCLUDE_DIRS}) diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt -index 221e3f3..417f601 100644 +index d2d23b7ab..1a7e5a042 100644 --- a/caffe2/CMakeLists.txt +++ b/caffe2/CMakeLists.txt -@@ -110,9 +110,6 @@ if(NOT MSVC AND USE_XNNPACK) +@@ -91,9 +91,6 @@ if(NOT MSVC AND USE_XNNPACK) if(NOT TARGET fxdiv) set(FXDIV_BUILD_TESTS OFF CACHE BOOL "") set(FXDIV_BUILD_BENCHMARKS OFF CACHE BOOL "") @@ -37,7 +35,7 @@ index 221e3f3..417f601 100644 endif() endif() -@@ -975,7 +972,6 @@ elseif(USE_CUDA) +@@ -1135,7 +1132,6 @@ if(USE_XPU) endif() if(NOT MSVC AND USE_XNNPACK) @@ -45,15 +43,26 @@ index 221e3f3..417f601 100644 endif() # ========================================================== -@@ -1314,6 +1310,7 @@ target_link_libraries(torch_cpu PUBLIC c10) +@@ -1254,8 +1250,8 @@ endif() + target_include_directories(torch_cpu PRIVATE + ${TORCH_ROOT}/third_party/cpp-httplib) + +-target_include_directories(torch_cpu PRIVATE +- ${TORCH_ROOT}/third_party/nlohmann/include) ++find_package(httplib REQUIRED) ++target_link_libraries(torch_cpu PUBLIC httplib::httplib) + + install(DIRECTORY "${TORCH_SRC_DIR}/csrc" + DESTINATION ${TORCH_INSTALL_INCLUDE_DIR}/torch +@@ -1494,6 +1490,7 @@ target_link_libraries(torch_cpu PUBLIC c10) target_link_libraries(torch_cpu PUBLIC ${Caffe2_PUBLIC_DEPENDENCY_LIBS}) target_link_libraries(torch_cpu PRIVATE ${Caffe2_DEPENDENCY_LIBS}) target_link_libraries(torch_cpu PRIVATE ${Caffe2_DEPENDENCY_WHOLE_LINK_LIBS}) +target_link_libraries(torch_cpu PRIVATE miniz clog) - target_include_directories(torch_cpu INTERFACE $) - target_include_directories(torch_cpu PRIVATE ${Caffe2_CPU_INCLUDE}) - target_include_directories(torch_cpu SYSTEM PRIVATE "${Caffe2_DEPENDENCY_INCLUDE}") -@@ -1570,7 +1567,7 @@ if(BUILD_STATIC_RUNTIME_BENCHMARK) + if(USE_MPI) + target_link_libraries(torch_cpu PRIVATE MPI::MPI_CXX) + endif() +@@ -1728,7 +1725,7 @@ if(BUILD_STATIC_RUNTIME_BENCHMARK) add_executable(static_runtime_bench "${STATIC_RUNTIME_BENCHMARK_SRCS}") add_executable(static_runtime_test "${STATIC_RUNTIME_TEST_SRCS}") target_link_libraries(static_runtime_bench torch_library benchmark) @@ -61,8 +70,8 @@ index 221e3f3..417f601 100644 + target_link_libraries(static_runtime_test torch_library gtest_main gtest) endif() - if(BUILD_TENSOREXPR_BENCHMARK) -@@ -1601,7 +1598,7 @@ if(BUILD_MOBILE_TEST) + if(BUILD_MOBILE_BENCHMARK) +@@ -1747,7 +1744,7 @@ if(BUILD_MOBILE_TEST) foreach(test_src ${ATen_MOBILE_TEST_SRCS}) get_filename_component(test_name ${test_src} NAME_WE) add_executable(${test_name} "${test_src}") @@ -71,32 +80,61 @@ index 221e3f3..417f601 100644 target_include_directories(${test_name} PRIVATE $) target_include_directories(${test_name} PRIVATE $) target_include_directories(${test_name} PRIVATE ${ATen_CPU_INCLUDE}) -@@ -1622,13 +1619,13 @@ if(BUILD_TEST) +@@ -1768,7 +1765,7 @@ if(BUILD_TEST) if(NOT MSVC) add_executable(${test_name}_${CPU_CAPABILITY} "${test_src}" ../aten/src/ATen/native/quantized/AffineQuantizerBase.cpp) # TODO: Get rid of c10 dependency (which is only needed for the implementation of AT_ERROR) -- target_link_libraries(${test_name}_${CPU_CAPABILITY} c10 sleef gtest_main) -+ target_link_libraries(${test_name}_${CPU_CAPABILITY} c10 sleef gtest_main gtest) +- target_link_libraries(${test_name}_${CPU_CAPABILITY} c10 sleef gtest_main nlohmann) ++ target_link_libraries(${test_name}_${CPU_CAPABILITY} c10 sleef gtest_main gtest nlohmann) if(USE_FBGEMM) target_link_libraries(${test_name}_${CPU_CAPABILITY} fbgemm) endif() +@@ -1782,7 +1779,7 @@ if(BUILD_TEST) + endif() else() add_executable(${test_name}_${CPU_CAPABILITY} "${test_src}") -- target_link_libraries(${test_name}_${CPU_CAPABILITY} torch_library gtest_main) -+ target_link_libraries(${test_name}_${CPU_CAPABILITY} torch_library gtest_main gtest) +- target_link_libraries(${test_name}_${CPU_CAPABILITY} torch_library sleef gtest_main) ++ target_link_libraries(${test_name}_${CPU_CAPABILITY} torch_library sleef gtest_main gtest) endif() target_include_directories(${test_name}_${CPU_CAPABILITY} PRIVATE $) target_include_directories(${test_name}_${CPU_CAPABILITY} PRIVATE $) -@@ -1645,7 +1642,7 @@ if(BUILD_TEST) +@@ -1799,7 +1796,7 @@ if(BUILD_TEST) foreach(test_src ${Caffe2_CPU_TEST_SRCS}) get_filename_component(test_name ${test_src} NAME_WE) add_executable(${test_name} "${test_src}") - target_link_libraries(${test_name} torch_library gtest_main) + target_link_libraries(${test_name} torch_library gtest_main gtest) - target_include_directories(${test_name} PRIVATE $) - target_include_directories(${test_name} PRIVATE $) - target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE}) -@@ -1703,7 +1700,7 @@ if(BUILD_TEST) + if(NOT MSVC) + target_link_libraries(${test_name} stdc++) + endif() +@@ -1823,7 +1820,7 @@ if(BUILD_TEST) + add_executable(${test_name} "${test_src}") + find_library(metal NAMES Metal) + find_library(foundation NAMES Foundation) +- target_link_libraries(${test_name} torch_library gtest_main ${metal} ${foundation}) ++ target_link_libraries(${test_name} torch_library gtest_main gtest ${metal} ${foundation}) + target_include_directories(${test_name} PRIVATE $) + target_include_directories(${test_name} PRIVATE $) + target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE}) +@@ -1843,7 +1840,7 @@ if(BUILD_TEST) + foreach(test_src ${Caffe2_GPU_TEST_SRCS}) + get_filename_component(test_name ${test_src} NAME_WE) + add_executable(${test_name} "${test_src}") +- target_link_libraries(${test_name} torch_library gtest_main) ++ target_link_libraries(${test_name} torch_library gtest_main gtest) + if(USE_CUDNN AND ${test_name} MATCHES "cudnn") + target_link_libraries(${test_name} torch::cudnn) + endif() +@@ -1865,7 +1862,7 @@ if(BUILD_TEST) + foreach(test_src ${Caffe2_XPU_TEST_SRCS}) + get_filename_component(test_name ${test_src} NAME_WE) + add_executable(${test_name} "${test_src}") +- target_link_libraries(${test_name} torch_library gtest_main) ++ target_link_libraries(${test_name} torch_library gtest_main gtest) + target_include_directories(${test_name} PRIVATE $) + target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE}) + add_test(NAME ${test_name} COMMAND $) +@@ -1880,7 +1877,7 @@ if(BUILD_TEST) foreach(test_src ${Caffe2_VULKAN_TEST_SRCS}) get_filename_component(test_name ${test_src} NAME_WE) add_executable(${test_name} "${test_src}") @@ -105,23 +143,32 @@ index 221e3f3..417f601 100644 target_include_directories(${test_name} PRIVATE $) target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE}) add_test(NAME ${test_name} COMMAND $) +@@ -1899,7 +1896,7 @@ if(BUILD_TEST) + foreach(test_src ${Caffe2_HIP_TEST_SRCS}) + get_filename_component(test_name ${test_src} NAME_WE) + add_executable(${test_name} "${test_src}") +- target_link_libraries(${test_name} torch_library gtest_main) ++ target_link_libraries(${test_name} torch_library gtest_main gtest) + target_include_directories(${test_name} PRIVATE $) + target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE} ${Caffe2_HIP_INCLUDE}) + target_compile_options(${test_name} PRIVATE ${HIP_CXX_FLAGS}) diff --git a/caffe2/serialize/CMakeLists.txt b/caffe2/serialize/CMakeLists.txt -index 1552b59..67e1a9a 100644 +index ebbff0f29..dcded2590 100644 --- a/caffe2/serialize/CMakeLists.txt +++ b/caffe2/serialize/CMakeLists.txt @@ -2,7 +2,6 @@ file(GLOB tmp *_test.cc) set(Caffe2_CPU_TEST_SRCS ${Caffe2_CPU_TEST_SRCS} ${tmp}) list(APPEND Caffe2_CPU_SRCS -- ${PROJECT_SOURCE_DIR}/third_party/miniz-2.1.0/miniz.c +- ${PROJECT_SOURCE_DIR}/third_party/miniz-3.0.2/miniz.c ${CMAKE_CURRENT_SOURCE_DIR}/inline_container.cc ${CMAKE_CURRENT_SOURCE_DIR}/istream_adapter.cc ${CMAKE_CURRENT_SOURCE_DIR}/file_adapter.cc diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake -index 8c0e3c2..d65576a 100644 +index be45936a8..bb1aa1cc1 100644 --- a/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake -@@ -298,7 +298,7 @@ endif() +@@ -276,7 +276,7 @@ endif() # --- [ PocketFFT set(AT_POCKETFFT_ENABLED 0) if(NOT AT_MKL_ENABLED) @@ -130,27 +177,7 @@ index 8c0e3c2..d65576a 100644 if(NOT EXISTS "${POCKETFFT_INCLUDE_DIR}") message(FATAL_ERROR "pocketfft directory not found, expected ${POCKETFFT_INCLUDE_DIR}") elif(NOT EXISTS "${POCKETFFT_INCLUDE_DIR}/pocketfft_hdronly.h") -@@ -501,19 +501,6 @@ if(USE_QNNPACK) - set(QNNPACK_BUILD_TESTS OFF CACHE BOOL "") - set(QNNPACK_BUILD_BENCHMARKS OFF CACHE BOOL "") - set(QNNPACK_LIBRARY_TYPE "static" CACHE STRING "") -- add_subdirectory( -- "${QNNPACK_SOURCE_DIR}" -- "${CONFU_DEPENDENCIES_BINARY_DIR}/QNNPACK") -- -- # TODO: See https://github.com/pytorch/pytorch/issues/56285 -- if(CMAKE_CXX_COMPILER_ID MATCHES "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "GNU") -- target_compile_options(qnnpack PRIVATE -Wno-deprecated-declarations) -- endif() -- -- # We build static versions of QNNPACK and pthreadpool but link -- # them into a shared library for Caffe2, so they need PIC. -- set_property(TARGET qnnpack PROPERTY POSITION_INDEPENDENT_CODE ON) -- set_property(TARGET cpuinfo PROPERTY POSITION_INDEPENDENT_CODE ON) - - if(QNNPACK_CUSTOM_THREADPOOL) - target_compile_definitions( -@@ -562,13 +549,6 @@ if(USE_PYTORCH_QNNPACK) +@@ -460,15 +460,6 @@ if(USE_PYTORCH_QNNPACK) set(PYTORCH_QNNPACK_BUILD_TESTS OFF CACHE BOOL "") set(PYTORCH_QNNPACK_BUILD_BENCHMARKS OFF CACHE BOOL "") set(PYTORCH_QNNPACK_LIBRARY_TYPE "static" CACHE STRING "") @@ -161,10 +188,33 @@ index 8c0e3c2..d65576a 100644 - # them into a shared library for Caffe2, so they need PIC. - set_property(TARGET pytorch_qnnpack PROPERTY POSITION_INDEPENDENT_CODE ON) - set_property(TARGET cpuinfo PROPERTY POSITION_INDEPENDENT_CODE ON) +- # QNNPACK depends on gemmlowp headers +- target_include_directories(pytorch_qnnpack PRIVATE "${CAFFE2_THIRD_PARTY_ROOT}/gemmlowp") + endif() - if(PYTORCH_QNNPACK_CUSTOM_THREADPOOL) - target_compile_definitions( -@@ -750,11 +730,6 @@ if(BUILD_TEST OR BUILD_MOBILE_BENCHMARK OR BUILD_MOBILE_TEST) + list(APPEND Caffe2_DEPENDENCY_LIBS pytorch_qnnpack) +@@ -558,16 +549,15 @@ if(USE_XNNPACK AND NOT USE_SYSTEM_XNNPACK) + list(APPEND Caffe2_DEPENDENCY_LIBS XNNPACK microkernels-prod) + elseif(NOT TARGET XNNPACK AND USE_SYSTEM_XNNPACK) + add_library(XNNPACK SHARED IMPORTED) +- add_library(microkernels-prod SHARED IMPORTED) ++ add_library(microkernels-prod INTERFACE IMPORTED) + find_library(XNNPACK_LIBRARY XNNPACK) +- find_library(microkernels-prod_LIBRARY microkernels-prod) + set_property(TARGET XNNPACK PROPERTY IMPORTED_LOCATION "${XNNPACK_LIBRARY}") +- set_property(TARGET microkernels-prod PROPERTY IMPORTED_LOCATION "${microkernels-prod_LIBRARY}") +- if(NOT XNNPACK_LIBRARY or NOT microkernels-prod_LIBRARY) ++ set_property(TARGET microkernels-prod PROPERTY INTERFACE_LINK_LIBRARIES XNNPACK) ++ if(NOT XNNPACK_LIBRARY) + message(FATAL_ERROR "Cannot find XNNPACK") + endif() + message("-- Found XNNPACK: ${XNNPACK_LIBRARY}") +- list(APPEND Caffe2_DEPENDENCY_LIBS XNNPACK microkernels-prod) ++ list(APPEND Caffe2_DEPENDENCY_LIBS XNNPACK) + endif() + + # ---[ Vulkan deps +@@ -650,11 +640,6 @@ if(BUILD_TEST OR BUILD_MOBILE_BENCHMARK OR BUILD_MOBILE_TEST) # this shouldn't be necessary anymore. get_property(INC_DIR_temp DIRECTORY PROPERTY INCLUDE_DIRECTORIES) set_property(DIRECTORY PROPERTY INCLUDE_DIRECTORIES "") @@ -176,9 +226,9 @@ index 8c0e3c2..d65576a 100644 # We will not need to test benchmark lib itself. set(BENCHMARK_ENABLE_TESTING OFF CACHE BOOL "Disable benchmark testing as we don't need it.") -@@ -829,16 +804,6 @@ if(USE_FBGEMM) - else() - set(FBGEMM_LIBRARY_TYPE "static" CACHE STRING "") +@@ -732,16 +717,6 @@ if(USE_FBGEMM) + if(USE_ASAN) + set(USE_SANITIZER "address,undefined" CACHE STRING "-fsanitize options for FBGEMM") endif() - add_subdirectory("${FBGEMM_SOURCE_DIR}") - set_property(TARGET fbgemm_generic PROPERTY POSITION_INDEPENDENT_CODE ON) @@ -190,44 +240,39 @@ index 8c0e3c2..d65576a 100644 - target_compile_options_if_supported(asmjit -Wno-deprecated-copy) - target_compile_options_if_supported(asmjit -Wno-unused-but-set-variable) - endif() + if(CMAKE_CXX_COMPILER_ID MATCHES "Clang") + target_compile_options_if_supported(asmjit -Wno-extra-semi) + target_compile_options_if_supported(fbgemm -Wno-extra-semi) +@@ -829,7 +804,7 @@ if(NOT TARGET fp16 AND NOT USE_SYSTEM_FP16) + "${CONFU_DEPENDENCIES_BINARY_DIR}/FP16") endif() - - if(USE_FBGEMM) -@@ -1001,7 +966,7 @@ if(NOT TARGET fp16 AND NOT USE_SYSTEM_FP16) - "${FP16_SOURCE_DIR}" - "${CONFU_DEPENDENCIES_BINARY_DIR}/FP16") elseif(NOT TARGET fp16 AND USE_SYSTEM_FP16) - add_library(fp16 STATIC "/usr/include/fp16.h") + add_library(fp16 STATIC "#FP16_INCLUDE_DIR") set_target_properties(fp16 PROPERTIES LINKER_LANGUAGE C) endif() list(APPEND Caffe2_DEPENDENCY_LIBS fp16) -@@ -1395,7 +1360,6 @@ if(USE_DISTRIBUTED AND USE_TENSORPIPE) - - # Tensorpipe uses cuda_add_library - torch_update_find_cuda_flags() -- add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/tensorpipe) - - list(APPEND Caffe2_DEPENDENCY_LIBS tensorpipe) - if(USE_CUDA) -@@ -1551,7 +1515,6 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_DISABLE_ONNX) - set_target_properties(onnx_proto PROPERTIES CXX_STANDARD 17) +@@ -1170,10 +1145,9 @@ if(USE_DISTRIBUTED AND USE_TENSORPIPE) + message(WARNING "Archived TensorPipe forces CMake compatibility mode") + set(CMAKE_POLICY_VERSION_MINIMUM 3.5) endif() - endif() -- add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/../third_party/foxi EXCLUDE_FROM_ALL) - - add_definitions(-DONNX_NAMESPACE=${ONNX_NAMESPACE}) - if(NOT USE_SYSTEM_ONNX) -@@ -1582,7 +1545,7 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_DISABLE_ONNX) +- add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/tensorpipe) + # Suppress warning to unblock libnop comiplation by clang-17 + # See https://github.com/pytorch/pytorch/issues/151316 + target_compile_options_if_supported(tensorpipe -Wno-missing-template-arg-list-after-template-kw) + if(CMAKE_VERSION VERSION_GREATER_EQUAL "4.0.0") + unset(CMAKE_POLICY_VERSION_MINIMUM) + endif() +@@ -1340,7 +1314,7 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_DISABLE_ONNX) endif() set_property(TARGET onnx_proto PROPERTY IMPORTED_LOCATION ${ONNX_PROTO_LIBRARY}) message("-- Found onnx: ${ONNX_LIBRARY} ${ONNX_PROTO_LIBRARY}") - list(APPEND Caffe2_DEPENDENCY_LIBS onnx_proto onnx) + list(APPEND Caffe2_DEPENDENCY_LIBS onnx_proto onnx onnx_optimizer) endif() - include_directories(${FOXI_INCLUDE_DIRS}) - list(APPEND Caffe2_DEPENDENCY_LIBS foxi_loader) -@@ -1752,9 +1715,8 @@ if(NOT INTERN_BUILD_MOBILE) + # Recover the build shared libs option. + set(BUILD_SHARED_LIBS ${TEMP_BUILD_SHARED_LIBS}) +@@ -1500,9 +1474,8 @@ if(NOT INTERN_BUILD_MOBILE) endif() if(USE_MKLDNN) include(${CMAKE_CURRENT_LIST_DIR}/public/mkldnn.cmake) @@ -235,10 +280,10 @@ index 8c0e3c2..d65576a 100644 + if(DNNL_FOUND) set(AT_MKLDNN_ENABLED 1) - include_directories(AFTER SYSTEM ${MKLDNN_INCLUDE_DIR}) - if(BUILD_CAFFE2_OPS) - list(APPEND Caffe2_DEPENDENCY_LIBS caffe2::mkldnn) - endif(BUILD_CAFFE2_OPS) -@@ -1819,7 +1781,7 @@ endif() + else() + message(WARNING "MKLDNN could not be found.") + caffe2_update_option(USE_MKLDNN OFF) +@@ -1583,7 +1556,7 @@ endif() # set(TEMP_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}) set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libs" FORCE) @@ -247,7 +292,7 @@ index 8c0e3c2..d65576a 100644 # Disable compiler feature checks for `fmt`. # -@@ -1828,7 +1790,6 @@ add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt) +@@ -1592,7 +1565,6 @@ add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt) # CMAKE_CXX_FLAGS in ways that break feature checks. Since we already know # `fmt` is compatible with a superset of the compilers that PyTorch is, it # shouldn't be too bad to just disable the checks. @@ -256,7 +301,7 @@ index 8c0e3c2..d65576a 100644 list(APPEND Caffe2_DEPENDENCY_LIBS fmt::fmt-header-only) set(BUILD_SHARED_LIBS ${TEMP_BUILD_SHARED_LIBS} CACHE BOOL "Build shared libs" FORCE) diff --git a/cmake/External/nnpack.cmake b/cmake/External/nnpack.cmake -index a41343c..6075bdd 100644 +index 8a4a310d6..f413d2e61 100644 --- a/cmake/External/nnpack.cmake +++ b/cmake/External/nnpack.cmake @@ -40,7 +40,7 @@ endif() @@ -268,7 +313,7 @@ index a41343c..6075bdd 100644 message(STATUS "Brace yourself, we are building NNPACK") set(CAFFE2_THIRD_PARTY_ROOT ${PROJECT_SOURCE_DIR}/third_party) -@@ -114,6 +114,5 @@ endif() +@@ -94,6 +94,5 @@ endif() # (4) Catch-all: not supported. ############################################################################## @@ -278,7 +323,7 @@ index a41343c..6075bdd 100644 +set(NNPACK_FOUND TRUE) +set(USE_NNPACK ON) diff --git a/cmake/public/mkldnn.cmake b/cmake/public/mkldnn.cmake -index 50404d3..ca067f0 100644 +index 87935625f..9f8fa3df8 100644 --- a/cmake/public/mkldnn.cmake +++ b/cmake/public/mkldnn.cmake @@ -4,7 +4,7 @@ if(CPU_AARCH64) @@ -290,105 +335,93 @@ index 50404d3..ca067f0 100644 if(NOT TARGET caffe2::mkldnn) add_library(caffe2::mkldnn INTERFACE IMPORTED) -@@ -15,7 +15,7 @@ set_property( +@@ -15,4 +15,4 @@ set_property( ${MKLDNN_INCLUDE_DIR}) set_property( TARGET caffe2::mkldnn PROPERTY INTERFACE_LINK_LIBRARIES - ${MKLDNN_LIBRARIES}) + DNNL::dnnl) - if(BUILD_ONEDNN_GRAPH) - if(NOT TARGET caffe2::dnnl_graph) - add_library(caffe2::dnnl_graph INTERFACE IMPORTED) diff --git a/setup.py b/setup.py -index 34b2854..5db117f 100644 +index 61ee9363f..3691cc35c 100644 --- a/setup.py +++ b/setup.py -@@ -418,13 +418,9 @@ def build_deps(): +@@ -508,13 +508,9 @@ def build_deps(): # Windows has very poor support for them. sym_files = [ - 'tools/shared/_utils_internal.py', -- 'torch/utils/benchmark/utils/valgrind_wrapper/callgrind.h', -- 'torch/utils/benchmark/utils/valgrind_wrapper/valgrind.h', + "tools/shared/_utils_internal.py", +- "torch/utils/benchmark/utils/valgrind_wrapper/callgrind.h", +- "torch/utils/benchmark/utils/valgrind_wrapper/valgrind.h", ] orig_files = [ - 'torch/_utils_internal.py', -- 'third_party/valgrind-headers/callgrind.h', -- 'third_party/valgrind-headers/valgrind.h', + "torch/_utils_internal.py", +- "third_party/valgrind-headers/callgrind.h", +- "third_party/valgrind-headers/valgrind.h", ] for sym_file, orig_file in zip(sym_files, orig_files): same = False diff --git a/test/cpp/c10d/CMakeLists.txt b/test/cpp/c10d/CMakeLists.txt -index 89c6b91..0c60d08 100644 +index 5b423241d..e069accd6 100644 --- a/test/cpp/c10d/CMakeLists.txt +++ b/test/cpp/c10d/CMakeLists.txt -@@ -16,14 +16,14 @@ function(c10d_add_test test_src) - add_test(NAME ${test_name} COMMAND $) +@@ -26,17 +26,17 @@ function(c10d_add_test test_src) + endif() endfunction() --c10d_add_test(FileStoreTest.cpp torch_cpu gtest_main) --c10d_add_test(TCPStoreTest.cpp torch_cpu gtest_main) -+c10d_add_test(FileStoreTest.cpp torch_cpu gtest_main gtest) -+c10d_add_test(TCPStoreTest.cpp torch_cpu gtest_main gtest) - if(INSTALL_TEST) - install(TARGETS FileStoreTest DESTINATION bin) - install(TARGETS TCPStoreTest DESTINATION bin) - endif() +-c10d_add_test(BackoffTest.cpp LINK_LIBRARIES torch_cpu gtest_main INSTALL_TEST OFF) +-c10d_add_test(FileStoreTest.cpp LINK_LIBRARIES torch_cpu gtest_main INSTALL_TEST ${INSTALL_TEST}) +-c10d_add_test(TCPStoreTest.cpp LINK_LIBRARIES torch_cpu gtest_main INSTALL_TEST ${INSTALL_TEST}) ++c10d_add_test(BackoffTest.cpp LINK_LIBRARIES torch_cpu gtest_main gtest INSTALL_TEST OFF) ++c10d_add_test(FileStoreTest.cpp LINK_LIBRARIES torch_cpu gtest_main gtest INSTALL_TEST ${INSTALL_TEST}) ++c10d_add_test(TCPStoreTest.cpp LINK_LIBRARIES torch_cpu gtest_main gtest INSTALL_TEST ${INSTALL_TEST}) if(NOT WIN32) -- c10d_add_test(HashStoreTest.cpp torch_cpu gtest_main) -+ c10d_add_test(HashStoreTest.cpp torch_cpu gtest_main gtest) - if(INSTALL_TEST) - install(TARGETS HashStoreTest DESTINATION bin) - endif() -@@ -31,11 +31,11 @@ endif() +- c10d_add_test(HashStoreTest.cpp LINK_LIBRARIES torch_cpu gtest_main INSTALL_TEST ${INSTALL_TEST}) ++ c10d_add_test(HashStoreTest.cpp LINK_LIBRARIES torch_cpu gtest_main gtest INSTALL_TEST ${INSTALL_TEST}) + endif() if(USE_CUDA) if(USE_GLOO AND USE_C10D_GLOO) -- c10d_add_test(ProcessGroupGlooTest.cpp torch_cpu c10d_cuda_test gtest_main) -+ c10d_add_test(ProcessGroupGlooTest.cpp torch_cpu c10d_cuda_test gtest_main gtest) - if(INSTALL_TEST) - install(TARGETS ProcessGroupGlooTest DESTINATION bin) - endif() -- c10d_add_test(ProcessGroupGlooAsyncTest.cpp torch_cpu c10d_cuda_test gtest_main) -+ c10d_add_test(ProcessGroupGlooAsyncTest.cpp torch_cpu c10d_cuda_test gtest_main gtest) +- c10d_add_test(ProcessGroupGlooTest.cpp LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main INSTALL_TEST ${INSTALL_TEST}) +- c10d_add_test(ProcessGroupGlooAsyncTest.cpp LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main INSTALL_TEST ${INSTALL_TEST}) ++ c10d_add_test(ProcessGroupGlooTest.cpp LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main gtest INSTALL_TEST ${INSTALL_TEST}) ++ c10d_add_test(ProcessGroupGlooAsyncTest.cpp LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main gtest INSTALL_TEST ${INSTALL_TEST}) endif() if(USE_NCCL AND USE_C10D_NCCL) # NCCL is a private dependency of libtorch, but the tests include some -@@ -44,10 +44,10 @@ if(USE_CUDA) +@@ -45,10 +45,10 @@ if(USE_CUDA) # a private dependency of the tests as well. c10d_add_test( ProcessGroupNCCLTest.cpp -- torch_cpu c10d_cuda_test gtest_main __caffe2_nccl) -+ torch_cpu c10d_cuda_test gtest_main gtest __caffe2_nccl) +- LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main __caffe2_nccl INSTALL_TEST ${INSTALL_TEST}) ++ LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main gtest __caffe2_nccl INSTALL_TEST ${INSTALL_TEST}) c10d_add_test( ProcessGroupNCCLErrorsTest.cpp -- torch_cpu c10d_cuda_test gtest_main __caffe2_nccl) -+ torch_cpu c10d_cuda_test gtest_main gtest __caffe2_nccl) +- LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main __caffe2_nccl INSTALL_TEST ${INSTALL_TEST}) ++ LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main gtest __caffe2_nccl INSTALL_TEST ${INSTALL_TEST}) if(INSTALL_TEST) - install(TARGETS ProcessGroupNCCLTest DESTINATION bin) - install(TARGETS ProcessGroupNCCLErrorsTest DESTINATION bin) -@@ -61,7 +61,7 @@ if(USE_CUDA) + install(TARGETS c10d_cuda_test DESTINATION lib) + endif() +@@ -60,14 +60,14 @@ if(USE_CUDA) # a private dependency of the tests as well. c10d_add_test( ProcessGroupUCCTest.cpp -- torch_cpu c10d_cuda_test gtest_main __caffe2_ucc) -+ torch_cpu c10d_cuda_test gtest_main gtest __caffe2_ucc) +- LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main __caffe2_ucc INSTALL_TEST ${INSTALL_TEST}) ++ LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main gtest __caffe2_ucc INSTALL_TEST ${INSTALL_TEST}) if(INSTALL_TEST) - install(TARGETS ProcessGroupUCCTest DESTINATION bin) install(TARGETS c10d_cuda_test DESTINATION lib) -@@ -69,7 +69,7 @@ if(USE_CUDA) + endif() endif() else() if(USE_GLOO AND USE_C10D_GLOO) -- c10d_add_test(ProcessGroupGlooTest.cpp torch_cpu gtest_main) -+ c10d_add_test(ProcessGroupGlooTest.cpp torch_cpu gtest_main gtest) +- c10d_add_test(ProcessGroupGlooTest.cpp LINK_LIBRARIES torch_cpu gtest_main INSTALL_TEST OFF) ++ c10d_add_test(ProcessGroupGlooTest.cpp LINK_LIBRARIES torch_cpu gtest_main gtest INSTALL_TEST OFF) endif() endif() diff --git a/test/cpp/tensorexpr/CMakeLists.txt b/test/cpp/tensorexpr/CMakeLists.txt -index 7dff706..90b1003 100644 +index 9c409e078..6cddd8de4 100644 --- a/test/cpp/tensorexpr/CMakeLists.txt +++ b/test/cpp/tensorexpr/CMakeLists.txt -@@ -54,7 +54,7 @@ target_include_directories(tutorial_tensorexpr PRIVATE ${ATen_CPU_INCLUDE}) +@@ -51,7 +51,7 @@ target_include_directories(tutorial_tensorexpr PRIVATE ${ATen_CPU_INCLUDE}) # pthreadpool header. For some build environment we need add the dependency # explicitly. if(USE_PTHREADPOOL) @@ -396,4 +429,17 @@ index 7dff706..90b1003 100644 + target_link_libraries(test_tensorexpr PRIVATE pthreadpool) endif() if(USE_CUDA) - target_link_libraries(test_tensorexpr PRIVATE + target_compile_definitions(test_tensorexpr PRIVATE USE_CUDA) +diff --git a/torch/CMakeLists.txt b/torch/CMakeLists.txt +index 8b8ebdc6e..034b5e56c 100644 +--- a/torch/CMakeLists.txt ++++ b/torch/CMakeLists.txt +@@ -82,8 +82,6 @@ set(TORCH_PYTHON_LINK_LIBRARIES + Python::Module + pybind::pybind11 + opentelemetry::api +- httplib +- nlohmann + shm + fmt::fmt-header-only + ATEN_CPU_FILES_GEN_LIB) diff --git a/gnu/packages/patches/python-pytorch-for-r-torch-without-kineto.patch b/gnu/packages/patches/python-pytorch-for-r-torch-without-kineto.patch new file mode 100644 index 00000000000..1b10f18d5a6 --- /dev/null +++ b/gnu/packages/patches/python-pytorch-for-r-torch-without-kineto.patch @@ -0,0 +1,64 @@ +Even when building without Kineto, the header is still +imported and the ActivityType type is used. This patch was copied from +https://github.com/pytorch/pytorch/pull/111048 and adapted. + +diff --git a/torch/csrc/profiler/kineto_shim.h b/torch/csrc/profiler/kineto_shim.h +index c4efd7785..2caef1f1e 100644 +--- a/torch/csrc/profiler/kineto_shim.h ++++ b/torch/csrc/profiler/kineto_shim.h +@@ -12,7 +12,55 @@ + #undef USE_KINETO + #endif + ++#ifdef USE_KINETO + #include ++#else ++namespace libkineto { ++// copied from header ++/* ++ * Copyright (c) Meta Platforms, Inc. and affiliates. ++ * All rights reserved. ++ * ++ * This source code is licensed under the BSD-style license found in the ++ * LICENSE file in the root directory of this source tree. ++ */ ++ ++// Note : All activity types are not enabled by default. Please add them ++// at correct position in the enum ++enum class ActivityType { ++ // Activity types enabled by default ++ CPU_OP = 0, // cpu side ops ++ USER_ANNOTATION, ++ GPU_USER_ANNOTATION, ++ GPU_MEMCPY, ++ GPU_MEMSET, ++ CONCURRENT_KERNEL, // on-device kernels ++ EXTERNAL_CORRELATION, ++ CUDA_RUNTIME, // host side cuda runtime events ++ CUDA_DRIVER, // host side cuda driver events ++ CPU_INSTANT_EVENT, // host side point-like events ++ PYTHON_FUNCTION, ++ OVERHEAD, // CUPTI induced overhead events sampled from its overhead API. ++ ++ // Optional Activity types ++ CUDA_SYNC, // synchronization events between runtime and kernels ++ GLOW_RUNTIME, // host side glow runtime events ++ MTIA_RUNTIME, // host side MTIA runtime events ++ CUDA_PROFILER_RANGE, // CUPTI Profiler range for performance metrics ++ MTIA_CCP_EVENTS, // MTIA ondevice CCP events ++ HPU_OP, // HPU host side runtime event ++ XPU_RUNTIME, // host side xpu runtime events ++ MTIA_WORKLOADD, ++ ++ PRIVATEUSE1_RUNTIME, ++ PRIVATEUSE1_DRIVER, ++ ++ ENUM_COUNT, // This is to add buffer and not used for any profiling logic. Add your new type before it. ++ OPTIONAL_ACTIVITY_TYPE_START = CUDA_SYNC, ++}; ++} ++ ++#endif + + #include + #include