diff --git a/CMakeLists.txt b/CMakeLists.txt index 79c7938337..f2affb10d5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -8,7 +8,7 @@ include(mooncake-common/FindJsonCpp.cmake) include(mooncake-common/FindGLOG.cmake) include(mooncake-common/common.cmake) # unit test -if (BUILD_UNIT_TESTS) +if(BUILD_UNIT_TESTS) enable_testing() endif() @@ -16,7 +16,8 @@ option(WITH_TE "build mooncake transfer engine and sample code" ON) option(WITH_STORE "build mooncake store library and sample code" ON) option(WITH_STORE_GO "build Go bindings for mooncake store" OFF) option(WITH_P2P_STORE "build p2p store library and sample code" OFF) -option(WITH_RUST_EXAMPLE "build the Rust interface and sample code for the transfer engine" OFF) +option(WITH_RUST_EXAMPLE + "build the Rust interface and sample code for the transfer engine" OFF) option(WITH_STORE_RUST "build the Rust bindings for the Mooncake Store" ON) option(WITH_EP "build mooncake with expert parallelism support" OFF) option(USE_NOF "build mooncake store with NoF SSD pool support" OFF) @@ -24,40 +25,35 @@ option(USE_NOF "build mooncake store with NoF SSD pool support" OFF) include(${CMAKE_CURRENT_SOURCE_DIR}/mooncake-common/SetupPython.cmake) add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extern/pybind11) execute_process( - COMMAND ${PYTHON_EXECUTABLE} -c "import sys; print(sys.path[-1])" - OUTPUT_VARIABLE PYTHON_SYS_PATH -) + COMMAND ${PYTHON_EXECUTABLE} -c "import sys; print(sys.path[-1])" + OUTPUT_VARIABLE PYTHON_SYS_PATH) string(STRIP ${PYTHON_SYS_PATH} PYTHON_SYS_PATH) -if (USE_ETCD) +if(USE_ETCD) add_compile_definitions(USE_ETCD) - if (USE_ETCD_LEGACY) + if(USE_ETCD_LEGACY) add_compile_definitions(USE_ETCD_LEGACY) - message(STATUS "etcd as metadata server support is enabled (etcd-cpp-api-v3)") + message( + STATUS "etcd as metadata server support is enabled (etcd-cpp-api-v3)") else() message(STATUS "etcd as metadata server support is enabled (go package)") endif() endif() option(STORE_USE_ETCD "build mooncake store with etcd" OFF) -if (STORE_USE_ETCD) +if(STORE_USE_ETCD) add_compile_definitions(STORE_USE_ETCD) endif() option(STORE_USE_REDIS "build mooncake store with redis" OFF) -if (STORE_USE_REDIS) +if(STORE_USE_REDIS) add_compile_definitions(STORE_USE_REDIS) endif() -option(STORE_USE_K8S_LEASE "build mooncake store with K8s Lease leader election" OFF) -if (STORE_USE_K8S_LEASE) - if (STORE_USE_ETCD) - message(FATAL_ERROR "STORE_USE_K8S_LEASE and STORE_USE_ETCD cannot be enabled together because both build Go c-shared HA backends.") - endif() - if (USE_ETCD AND NOT USE_ETCD_LEGACY) - message(FATAL_ERROR "STORE_USE_K8S_LEASE cannot be enabled with non-legacy USE_ETCD because both build Go c-shared libraries in the same process.") - endif() +option(STORE_USE_K8S_LEASE + "build mooncake store with K8s Lease leader election" OFF) +if(STORE_USE_K8S_LEASE) add_compile_definitions(STORE_USE_K8S_LEASE) endif() -if (USE_NOF) +if(USE_NOF) add_compile_definitions(USE_NOF) else() message(STATUS "USE_NOF=OFF, NoF SSD pool support is disabled") @@ -69,23 +65,22 @@ option(STORE_USE_JEMALLOC "Use jemalloc in mooncake store master" OFF) add_compile_definitions(ASIO_SEPARATE_COMPILATION ASIO_DYN_LINK) add_subdirectory(mooncake-common) -include_directories(mooncake-common/etcd) -include_directories(mooncake-common/k8s-lease) +include_directories(mooncake-common/ha-wrapper) include_directories(mooncake-common/include) -if (WITH_TE) +if(WITH_TE) add_subdirectory(mooncake-transfer-engine) include_directories(mooncake-transfer-engine/include) endif() -if (WITH_STORE) +if(WITH_STORE) message(STATUS "Mooncake Store will be built") add_subdirectory(mooncake-store) include_directories(mooncake-store/include) endif() -if (WITH_STORE_RUST) - if (NOT WITH_STORE) +if(WITH_STORE_RUST) + if(NOT WITH_STORE) message(FATAL_ERROR "WITH_STORE_RUST=ON requires WITH_STORE=ON") endif() message(STATUS "Mooncake Store Rust bindings will be built") @@ -93,28 +88,34 @@ if (WITH_STORE_RUST) endif() option(EP_USE_IDE "Enable intelligent indexing for IDEs" OFF) -if (WITH_EP) - if (EP_USE_IDE) +if(WITH_EP) + if(EP_USE_IDE) message(WARNING "EP_USE_IDE enabled. DO NOT USE IN PRODUCTION!") add_subdirectory(mooncake-ep) include_directories(mooncake-ep/include) add_subdirectory(mooncake-pg) include_directories(mooncake-pg/include) - else () - message(STATUS "WITH_EP enabled: building Mooncake EP and PG Python extensions") + else() + message( + STATUS "WITH_EP enabled: building Mooncake EP and PG Python extensions") if(USE_CUDA) find_package(CUDAToolkit REQUIRED) message(STATUS "Detected CUDA version: ${CUDAToolkit_VERSION}") endif() - # EP_TORCH_VERSIONS: semicolon-separated list of PyTorch versions to build for. - # Can be set via -DEP_TORCH_VERSIONS="2.9.1;2.8.0" or the EP_TORCH_VERSIONS env var. - # Empty means build with the currently-installed torch. + # EP_TORCH_VERSIONS: semicolon-separated list of PyTorch versions to build + # for. Can be set via -DEP_TORCH_VERSIONS="2.9.1;2.8.0" or the + # EP_TORCH_VERSIONS env var. Empty means build with the currently-installed + # torch. if(NOT EP_TORCH_VERSIONS) set(EP_TORCH_VERSIONS "$ENV{EP_TORCH_VERSIONS}") endif() - set(EP_TORCH_VERSIONS "${EP_TORCH_VERSIONS}" CACHE STRING - "PyTorch versions for EP/PG extensions, semicolon-separated (empty = use currently-installed torch)") + set(EP_TORCH_VERSIONS + "${EP_TORCH_VERSIONS}" + CACHE + STRING + "PyTorch versions for EP/PG extensions, semicolon-separated (empty = use currently-installed torch)" + ) # TORCH_CUDA_ARCH_LIST forwarded to the torch CUDA extension build. if(NOT TORCH_CUDA_ARCH_LIST) @@ -123,69 +124,72 @@ if (WITH_EP) if(NOT TORCH_CUDA_ARCH_LIST) set(TORCH_CUDA_ARCH_LIST "8.0;9.0") endif() - set(TORCH_CUDA_ARCH_LIST "${TORCH_CUDA_ARCH_LIST}" CACHE STRING - "CUDA arch list for EP/PG extension builds (e.g. \"8.0;9.0\")") + set(TORCH_CUDA_ARCH_LIST + "${TORCH_CUDA_ARCH_LIST}" + CACHE STRING + "CUDA arch list for EP/PG extension builds (e.g. \"8.0;9.0\")") # Staging directory: EP/PG .so files are placed here during make and later # injected into the wheel AFTER auditwheel, so patchelf never touches the # CUDA fatbins (which would cause cudaErrorInvalidKernelImage at runtime). set(EP_PG_STAGING_DIR "${CMAKE_BINARY_DIR}/ep_pg_staging") - # Convert semicolon-separated lists to pipe-separated strings so they survive - # CMake's COMMAND list-splitting (semicolons are CMake list separators). + # Convert semicolon-separated lists to pipe-separated strings so they + # survive CMake's COMMAND list-splitting (semicolons are CMake list + # separators). string(REPLACE ";" "|" _ep_torch_versions_pipe "${EP_TORCH_VERSIONS}") string(REPLACE ";" "|" _torch_cuda_arch_list_pipe "${TORCH_CUDA_ARCH_LIST}") - add_custom_target(mooncake_ep_ext ALL + add_custom_target( + mooncake_ep_ext ALL COMMAND ${CMAKE_COMMAND} -E make_directory "${EP_PG_STAGING_DIR}" - COMMAND ${CMAKE_COMMAND} - "-DSOURCE_DIR=${CMAKE_CURRENT_SOURCE_DIR}/mooncake-ep" + COMMAND + ${CMAKE_COMMAND} "-DSOURCE_DIR=${CMAKE_CURRENT_SOURCE_DIR}/mooncake-ep" "-DEP_CUDA_MAJOR=${CUDAToolkit_VERSION_MAJOR}" "-DEP_CUDA_MINOR=${CUDAToolkit_VERSION_MINOR}" "-DEP_TORCH_VERSIONS=${_ep_torch_versions_pipe}" "-DTORCH_CUDA_ARCH_LIST=${_torch_cuda_arch_list_pipe}" "-DSTAGING_DIR=${EP_PG_STAGING_DIR}" "-DENGINE_SO_PATH=$" - "-DEP_USE_MUSA=$,1,0>" - -P "${CMAKE_CURRENT_SOURCE_DIR}/mooncake-ep/BuildEpExt.cmake" + "-DEP_USE_MUSA=$,1,0>" -P + "${CMAKE_CURRENT_SOURCE_DIR}/mooncake-ep/BuildEpExt.cmake" COMMENT "Building Mooncake EP Python extension(s)" DEPENDS engine - VERBATIM - ) + VERBATIM) - add_custom_target(mooncake_pg_ext ALL + add_custom_target( + mooncake_pg_ext ALL COMMAND ${CMAKE_COMMAND} -E make_directory "${EP_PG_STAGING_DIR}" - COMMAND ${CMAKE_COMMAND} - "-DSOURCE_DIR=${CMAKE_CURRENT_SOURCE_DIR}/mooncake-pg" + COMMAND + ${CMAKE_COMMAND} "-DSOURCE_DIR=${CMAKE_CURRENT_SOURCE_DIR}/mooncake-pg" "-DEP_CUDA_MAJOR=${CUDAToolkit_VERSION_MAJOR}" "-DEP_CUDA_MINOR=${CUDAToolkit_VERSION_MINOR}" "-DEP_TORCH_VERSIONS=${_ep_torch_versions_pipe}" "-DTORCH_CUDA_ARCH_LIST=${_torch_cuda_arch_list_pipe}" "-DSTAGING_DIR=${EP_PG_STAGING_DIR}" "-DENGINE_SO_PATH=$" - "-DEP_USE_MUSA=$,1,0>" - -P "${CMAKE_CURRENT_SOURCE_DIR}/mooncake-pg/BuildPgExt.cmake" + "-DEP_USE_MUSA=$,1,0>" -P + "${CMAKE_CURRENT_SOURCE_DIR}/mooncake-pg/BuildPgExt.cmake" COMMENT "Building Mooncake PG Python extension(s)" DEPENDS engine mooncake_ep_ext - VERBATIM - ) - endif () + VERBATIM) + endif() endif() add_subdirectory(mooncake-integration) -if (WITH_STORE_GO AND WITH_STORE) +if(WITH_STORE_GO AND WITH_STORE) add_custom_target(build_store_go DEPENDS mooncake_store transfer_engine) add_custom_command( - TARGET build_store_go - COMMAND bash build.sh ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR} ${USE_ETCD} ${USE_REDIS} ${USE_HTTP} ${USE_ETCD_LEGACY} - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/mooncake-store/go - ) + TARGET build_store_go + COMMAND bash build.sh ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR} + ${USE_ETCD} ${USE_REDIS} ${USE_HTTP} ${USE_ETCD_LEGACY} + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/mooncake-store/go) set_property(TARGET build_store_go PROPERTY EXCLUDE_FROM_ALL FALSE) message(STATUS "Mooncake Store Go bindings will be built") endif() -if (WITH_P2P_STORE) +if(WITH_P2P_STORE) add_subdirectory(mooncake-p2p-store) message(STATUS "P2P Store will be built") endif() diff --git a/mooncake-common/CMakeLists.txt b/mooncake-common/CMakeLists.txt index dade5b984a..4c78ead1d7 100644 --- a/mooncake-common/CMakeLists.txt +++ b/mooncake-common/CMakeLists.txt @@ -1,14 +1,18 @@ -if ((USE_ETCD AND NOT USE_ETCD_LEGACY) OR STORE_USE_ETCD) - add_subdirectory(etcd) -endif() - -if (STORE_USE_K8S_LEASE) - add_subdirectory(k8s-lease) +if((USE_ETCD AND NOT USE_ETCD_LEGACY) + OR STORE_USE_ETCD + OR STORE_USE_K8S_LEASE) + add_subdirectory(ha-wrapper) + set(HA_WRAPPER_INCLUDE + ${HA_WRAPPER_INCLUDE} + PARENT_SCOPE) + set(HA_WRAPPER_LIB + ${HA_WRAPPER_LIB} + PARENT_SCOPE) endif() include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include) add_subdirectory(src) -if (BUILD_UNIT_TESTS) +if(BUILD_UNIT_TESTS) add_subdirectory(tests) endif() diff --git a/mooncake-common/etcd/CMakeLists.txt b/mooncake-common/etcd/CMakeLists.txt deleted file mode 100644 index 570e8bbf17..0000000000 --- a/mooncake-common/etcd/CMakeLists.txt +++ /dev/null @@ -1,20 +0,0 @@ -add_custom_command( - OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/libetcd_wrapper.so - COMMAND bash -c "go mod tidy" && bash -c "go build -buildmode=c-shared -o ${CMAKE_CURRENT_BINARY_DIR}/libetcd_wrapper.so etcd_wrapper.go" && cp ${CMAKE_CURRENT_BINARY_DIR}/libetcd_wrapper.h ${CMAKE_CURRENT_SOURCE_DIR} - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} - COMMENT "Building Go shared library" - DEPENDS etcd_wrapper.go go.mod go.sum build.sh -) - -set(ETCD_WRAPPER_INCLUDE ${CMAKE_CURRENT_BINARY_DIR}/libetcd_wrapper.h) -set(ETCD_WRAPPER_LIB ${CMAKE_CURRENT_BINARY_DIR}/libetcd_wrapper.so) - -add_custom_target( - build_etcd_wrapper - DEPENDS ${ETCD_WRAPPER_LIB} -) - -install( - FILES ${ETCD_WRAPPER_LIB} - DESTINATION lib -) diff --git a/mooncake-common/etcd/build.sh b/mooncake-common/etcd/build.sh deleted file mode 100755 index cf94d82a34..0000000000 --- a/mooncake-common/etcd/build.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -go mod init github.com/kvcache-ai/Mooncake/mooncake-common/etcd -go mod tidy -go build -o libetcd_wrapper.so -buildmode=c-shared etcd_wrapper.go diff --git a/mooncake-common/etcd/go.mod b/mooncake-common/etcd/go.mod deleted file mode 100644 index ef5b3503f1..0000000000 --- a/mooncake-common/etcd/go.mod +++ /dev/null @@ -1,28 +0,0 @@ -module github.com/kvcache-ai/Mooncake/mooncake-common/etcd - -go 1.25.0 - -toolchain go1.25.10 - -require ( - go.etcd.io/etcd/api/v3 v3.5.21 - go.etcd.io/etcd/client/v3 v3.5.21 -) - -require ( - github.com/coreos/go-semver v0.3.0 // indirect - github.com/coreos/go-systemd/v22 v22.3.2 // indirect - github.com/gogo/protobuf v1.3.2 // indirect - github.com/golang/protobuf v1.5.4 // indirect - go.etcd.io/etcd/client/pkg/v3 v3.5.21 // indirect - go.uber.org/atomic v1.7.0 // indirect - go.uber.org/multierr v1.6.0 // indirect - go.uber.org/zap v1.17.0 // indirect - golang.org/x/net v0.55.0 // indirect - golang.org/x/sys v0.39.0 // indirect - golang.org/x/text v0.32.0 // indirect - google.golang.org/genproto/googleapis/api v0.0.0-20251202230838-ff82c1b0f217 // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20251202230838-ff82c1b0f217 // indirect - google.golang.org/grpc v1.79.3 // indirect - google.golang.org/protobuf v1.36.10 // indirect -) diff --git a/mooncake-common/etcd/go.sum b/mooncake-common/etcd/go.sum deleted file mode 100644 index 888b82dc64..0000000000 --- a/mooncake-common/etcd/go.sum +++ /dev/null @@ -1,108 +0,0 @@ -github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= -github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= -github.com/coreos/go-semver v0.3.0 h1:wkHLiw0WNATZnSG7epLsujiMCgPAc9xhjJ4tgnAxmfM= -github.com/coreos/go-semver v0.3.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk= -github.com/coreos/go-systemd/v22 v22.3.2 h1:D9/bQk5vlXQFZ6Kwuu6zaiXJ9oTPe68++AzAJc1DzSI= -github.com/coreos/go-systemd/v22 v22.3.2/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= -github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= -github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= -github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= -github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= -github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= -github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= -github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= -github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= -github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= -github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= -github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= -github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= -github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= -github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= -github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= -github.com/pkg/errors v0.8.1 h1:iURUrRGxPUNPdy5/HRSm+Yj6okJ6UtLINN0Q9M4+h3I= -github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= -github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= -github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= -github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= -github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= -github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= -github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= -go.etcd.io/etcd/api/v3 v3.5.21 h1:A6O2/JDb3tvHhiIz3xf9nJ7REHvtEFJJ3veW3FbCnS8= -go.etcd.io/etcd/api/v3 v3.5.21/go.mod h1:c3aH5wcvXv/9dqIw2Y810LDXJfhSYdHQ0vxmP3CCHVY= -go.etcd.io/etcd/client/pkg/v3 v3.5.21 h1:lPBu71Y7osQmzlflM9OfeIV2JlmpBjqBNlLtcoBqUTc= -go.etcd.io/etcd/client/pkg/v3 v3.5.21/go.mod h1:BgqT/IXPjK9NkeSDjbzwsHySX3yIle2+ndz28nVsjUs= -go.etcd.io/etcd/client/v3 v3.5.21 h1:T6b1Ow6fNjOLOtM0xSoKNQt1ASPCLWrF9XMHcH9pEyY= -go.etcd.io/etcd/client/v3 v3.5.21/go.mod h1:mFYy67IOqmbRf/kRUvsHixzo3iG+1OF2W2+jVIQRAnU= -go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64= -go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y= -go.opentelemetry.io/otel v1.39.0 h1:8yPrr/S0ND9QEfTfdP9V+SiwT4E0G7Y5MO7p85nis48= -go.opentelemetry.io/otel v1.39.0/go.mod h1:kLlFTywNWrFyEdH0oj2xK0bFYZtHRYUdv1NklR/tgc8= -go.opentelemetry.io/otel/metric v1.39.0 h1:d1UzonvEZriVfpNKEVmHXbdf909uGTOQjA0HF0Ls5Q0= -go.opentelemetry.io/otel/metric v1.39.0/go.mod h1:jrZSWL33sD7bBxg1xjrqyDjnuzTUB0x1nBERXd7Ftcs= -go.opentelemetry.io/otel/sdk v1.39.0 h1:nMLYcjVsvdui1B/4FRkwjzoRVsMK8uL/cj0OyhKzt18= -go.opentelemetry.io/otel/sdk v1.39.0/go.mod h1:vDojkC4/jsTJsE+kh+LXYQlbL8CgrEcwmt1ENZszdJE= -go.opentelemetry.io/otel/sdk/metric v1.39.0 h1:cXMVVFVgsIf2YL6QkRF4Urbr/aMInf+2WKg+sEJTtB8= -go.opentelemetry.io/otel/sdk/metric v1.39.0/go.mod h1:xq9HEVH7qeX69/JnwEfp6fVq5wosJsY1mt4lLfYdVew= -go.opentelemetry.io/otel/trace v1.39.0 h1:2d2vfpEDmCJ5zVYz7ijaJdOF59xLomrvj7bjt6/qCJI= -go.opentelemetry.io/otel/trace v1.39.0/go.mod h1:88w4/PnZSazkGzz/w84VHpQafiU4EtqqlVdxWy+rNOA= -go.uber.org/atomic v1.7.0 h1:ADUqmZGgLDDfbSL9ZmPxKTybcoEYHgpYfELNoN+7hsw= -go.uber.org/atomic v1.7.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= -go.uber.org/multierr v1.6.0 h1:y6IPFStTAIT5Ytl7/XYmHvzXQ7S3g/IeZW9hyZ5thw4= -go.uber.org/multierr v1.6.0/go.mod h1:cdWPpRnG4AhwMwsgIHip0KRBQjJy5kYEpYjJxpXp9iU= -go.uber.org/zap v1.17.0 h1:MTjgFu6ZLKvY6Pvaqk97GlxNBuMpV4Hy/3P6tRGlI2U= -go.uber.org/zap v1.17.0/go.mod h1:MXVU+bhUf/A7Xi2HNOnopQOrmycQ5Ih87HtOu4q5SSo= -golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= -golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= -golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= -golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.48.0 h1:zyQRTTrjc33Lhh0fBgT/H3oZq9WuvRR5gPC70xpDiQU= -golang.org/x/net v0.48.0/go.mod h1:+ndRgGjkh8FGtu1w1FGbEC31if4VrNVMuKTgcAAnQRY= -golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.39.0 h1:CvCKL8MeisomCi6qNZ+wbb0DN9E5AATixKsvNtMoMFk= -golang.org/x/sys v0.39.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= -golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= -golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.32.0 h1:ZD01bjUt1FQ9WJ0ClOL5vxgxOI/sVCNgX1YtKwcY0mU= -golang.org/x/text v0.32.0/go.mod h1:o/rUWzghvpD5TXrTIBuJU77MTaN0ljMWE47kxGJQ7jY= -golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= -golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= -golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk= -gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E= -google.golang.org/genproto/googleapis/api v0.0.0-20251202230838-ff82c1b0f217 h1:fCvbg86sFXwdrl5LgVcTEvNC+2txB5mgROGmRL5mrls= -google.golang.org/genproto/googleapis/api v0.0.0-20251202230838-ff82c1b0f217/go.mod h1:+rXWjjaukWZun3mLfjmVnQi18E1AsFbDN9QdJ5YXLto= -google.golang.org/genproto/googleapis/rpc v0.0.0-20251202230838-ff82c1b0f217 h1:gRkg/vSppuSQoDjxyiGfN4Upv/h/DQmIR10ZU8dh4Ww= -google.golang.org/genproto/googleapis/rpc v0.0.0-20251202230838-ff82c1b0f217/go.mod h1:7i2o+ce6H/6BluujYR+kqX3GKH+dChPTQU19wjRPiGk= -google.golang.org/grpc v1.79.3 h1:sybAEdRIEtvcD68Gx7dmnwjZKlyfuc61Dyo9pGXXkKE= -google.golang.org/grpc v1.79.3/go.mod h1:KmT0Kjez+0dde/v2j9vzwoAScgEPx/Bw1CYChhHLrHQ= -google.golang.org/protobuf v1.36.10 h1:AYd7cD/uASjIL6Q9LiTjz8JLcrh/88q5UObnmY3aOOE= -google.golang.org/protobuf v1.36.10/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco= -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= -gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= -gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= -gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/mooncake-common/ha-wrapper/CMakeLists.txt b/mooncake-common/ha-wrapper/CMakeLists.txt new file mode 100644 index 0000000000..8553968fbb --- /dev/null +++ b/mooncake-common/ha-wrapper/CMakeLists.txt @@ -0,0 +1,27 @@ +add_custom_command( + OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/libmooncake_ha_wrapper.so + ${CMAKE_CURRENT_BINARY_DIR}/libmooncake_ha_wrapper.h + COMMAND bash -c "go mod tidy" + COMMAND + bash -c + "go build -buildmode=c-shared -o ${CMAKE_CURRENT_BINARY_DIR}/libmooncake_ha_wrapper.so etcd_wrapper.go k8s_lease_wrapper.go" + COMMAND + ${CMAKE_COMMAND} -E copy + ${CMAKE_CURRENT_BINARY_DIR}/libmooncake_ha_wrapper.h + ${CMAKE_CURRENT_SOURCE_DIR}/libmooncake_ha_wrapper.h + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + COMMENT "Building unified Go HA wrapper" + DEPENDS etcd_wrapper.go k8s_lease_wrapper.go go.mod) + +set(HA_WRAPPER_INCLUDE + ${CMAKE_CURRENT_BINARY_DIR} + PARENT_SCOPE) +set(HA_WRAPPER_LIB + ${CMAKE_CURRENT_BINARY_DIR}/libmooncake_ha_wrapper.so + PARENT_SCOPE) + +add_custom_target(build_ha_wrapper + DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/libmooncake_ha_wrapper.so) + +install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libmooncake_ha_wrapper.so + DESTINATION lib) diff --git a/mooncake-common/etcd/etcd_wrapper.go b/mooncake-common/ha-wrapper/etcd_wrapper.go similarity index 100% rename from mooncake-common/etcd/etcd_wrapper.go rename to mooncake-common/ha-wrapper/etcd_wrapper.go diff --git a/mooncake-common/ha-wrapper/go.mod b/mooncake-common/ha-wrapper/go.mod new file mode 100644 index 0000000000..6e7fbd24bb --- /dev/null +++ b/mooncake-common/ha-wrapper/go.mod @@ -0,0 +1,68 @@ +module github.com/kvcache-ai/Mooncake/mooncake-common/ha-wrapper + +go 1.25.0 + +toolchain go1.25.10 + +require ( + go.etcd.io/etcd/api/v3 v3.5.21 + go.etcd.io/etcd/client/v3 v3.5.21 + k8s.io/api v0.34.3 + k8s.io/apimachinery v0.34.3 + k8s.io/client-go v0.34.3 +) + +require ( + github.com/coreos/go-semver v0.3.0 // indirect + github.com/coreos/go-systemd/v22 v22.3.2 // indirect + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/emicklei/go-restful/v3 v3.12.2 // indirect + github.com/fxamacker/cbor/v2 v2.9.0 // indirect + github.com/go-logr/logr v1.4.2 // indirect + github.com/go-openapi/jsonpointer v0.21.0 // indirect + github.com/go-openapi/jsonreference v0.20.2 // indirect + github.com/go-openapi/swag v0.23.0 // indirect + github.com/gogo/protobuf v1.3.2 // indirect + github.com/golang/protobuf v1.5.4 // indirect + github.com/google/gnostic-models v0.7.0 // indirect + github.com/google/go-cmp v0.7.0 // indirect + github.com/google/uuid v1.6.0 // indirect + github.com/josharian/intern v1.0.0 // indirect + github.com/json-iterator/go v1.1.12 // indirect + github.com/mailru/easyjson v0.7.7 // indirect + github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect + github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee // indirect + github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect + github.com/onsi/ginkgo/v2 v2.22.0 // indirect + github.com/onsi/gomega v1.36.1 // indirect + github.com/pkg/errors v0.9.1 // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect + github.com/spf13/pflag v1.0.6 // indirect + github.com/x448/float16 v0.8.4 // indirect + go.etcd.io/etcd/client/pkg/v3 v3.5.21 // indirect + go.opentelemetry.io/otel v1.35.0 // indirect + go.uber.org/multierr v1.11.0 // indirect + go.uber.org/zap v1.27.0 // indirect + go.yaml.in/yaml/v2 v2.4.2 // indirect + go.yaml.in/yaml/v3 v3.0.4 // indirect + golang.org/x/net v0.38.0 // indirect + golang.org/x/oauth2 v0.27.0 // indirect + golang.org/x/sys v0.31.0 // indirect + golang.org/x/term v0.30.0 // indirect + golang.org/x/text v0.23.0 // indirect + golang.org/x/time v0.9.0 // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20250303144028-a0af3efb3deb // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20250303144028-a0af3efb3deb // indirect + google.golang.org/grpc v1.72.1 // indirect + google.golang.org/protobuf v1.36.5 // indirect + gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect + gopkg.in/inf.v0 v0.9.1 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect + k8s.io/klog/v2 v2.130.1 // indirect + k8s.io/kube-openapi v0.0.0-20250710124328-f3f2b991d03b // indirect + k8s.io/utils v0.0.0-20251002143259-bc988d571ff4 // indirect + sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 // indirect + sigs.k8s.io/randfill v1.0.0 // indirect + sigs.k8s.io/structured-merge-diff/v6 v6.3.0 // indirect + sigs.k8s.io/yaml v1.6.0 // indirect +) diff --git a/mooncake-common/k8s-lease/k8s_lease_wrapper.go b/mooncake-common/ha-wrapper/k8s_lease_wrapper.go similarity index 82% rename from mooncake-common/k8s-lease/k8s_lease_wrapper.go rename to mooncake-common/ha-wrapper/k8s_lease_wrapper.go index b67caca52a..225df5b73a 100644 --- a/mooncake-common/k8s-lease/k8s_lease_wrapper.go +++ b/mooncake-common/ha-wrapper/k8s_lease_wrapper.go @@ -6,6 +6,9 @@ package main #include // Trampoline to invoke C/C++ callback safely from Go via cgo. +#ifndef MOONCAKE_K8S_CALLBACK_TRAMPOLINES +#define MOONCAKE_K8S_CALLBACK_TRAMPOLINES + typedef void (*holder_change_cb_t)(void* ctx, const char* holder, size_t holderSize, int64_t leaseTransitions); @@ -15,6 +18,8 @@ static inline void call_holder_change_cb(holder_change_cb_t func, void* ctx, int64_t leaseTransitions) { func(ctx, holder, holderSize, leaseTransitions); } + +#endif // MOONCAKE_K8S_CALLBACK_TRAMPOLINES */ import "C" @@ -54,36 +59,36 @@ type watchState struct { } var ( - globalClient kubernetes.Interface - clientMutex sync.Mutex - initClientFn = initClient + k8sGlobalClient kubernetes.Interface + k8sClientMutex sync.Mutex + k8sInitClientFn = initK8sClient - elections = make(map[string]*electionState) - electionMutex sync.Mutex + k8sElections = make(map[string]*electionState) + k8sElectionMutex sync.Mutex - watches = make(map[string]*watchState) - watchMutex sync.Mutex + k8sWatches = make(map[string]*watchState) + k8sWatchMutex sync.Mutex ) func electionKey(namespace, leaseName string) string { return namespace + "/" + leaseName } -func ensureClientInitialized() error { - clientMutex.Lock() - initialized := globalClient != nil - clientMutex.Unlock() +func ensureK8sClientInitialized() error { + k8sClientMutex.Lock() + initialized := k8sGlobalClient != nil + k8sClientMutex.Unlock() if initialized { return nil } - return initClientFn() + return k8sInitClientFn() } -// initClient creates the K8s clientset from in-cluster config or KUBECONFIG. -func initClient() error { - clientMutex.Lock() - defer clientMutex.Unlock() - if globalClient != nil { +// initK8sClient creates the K8s clientset from in-cluster config or KUBECONFIG. +func initK8sClient() error { + k8sClientMutex.Lock() + defer k8sClientMutex.Unlock() + if k8sGlobalClient != nil { return nil } @@ -107,22 +112,22 @@ func initClient() error { if err != nil { return fmt.Errorf("failed to create k8s clientset: %w", err) } - globalClient = client + k8sGlobalClient = client return nil } // runElection starts a leader election goroutine for the given namespace/leaseName. func runElection(namespace, leaseName, identity string, leaseDurationSec, renewDeadlineSec, retryPeriodSec int) error { - if err := ensureClientInitialized(); err != nil { + if err := ensureK8sClientInitialized(); err != nil { return err } key := electionKey(namespace, leaseName) - electionMutex.Lock() - if _, exists := elections[key]; exists { - electionMutex.Unlock() + k8sElectionMutex.Lock() + if _, exists := k8sElections[key]; exists { + k8sElectionMutex.Unlock() return fmt.Errorf("election already running for %s", key) } @@ -132,15 +137,15 @@ func runElection(namespace, leaseName, identity string, elected: make(chan struct{}), lost: make(chan struct{}), } - elections[key] = state - electionMutex.Unlock() + k8sElections[key] = state + k8sElectionMutex.Unlock() lock := &resourcelock.LeaseLock{ LeaseMeta: metav1.ObjectMeta{ Name: leaseName, Namespace: namespace, }, - Client: globalClient.CoordinationV1(), + Client: k8sGlobalClient.CoordinationV1(), LockConfig: resourcelock.ResourceLockConfig{ Identity: identity, }, @@ -165,18 +170,18 @@ func runElection(namespace, leaseName, identity string, OnStoppedLeading: func() { close(state.lost) // Auto-cleanup: remove from map so the same key can be reused. - electionMutex.Lock() - if elections[key] == state { - delete(elections, key) + k8sElectionMutex.Lock() + if k8sElections[key] == state { + delete(k8sElections, key) } - electionMutex.Unlock() + k8sElectionMutex.Unlock() }, }, }) if err != nil { - electionMutex.Lock() - delete(elections, key) - electionMutex.Unlock() + k8sElectionMutex.Lock() + delete(k8sElections, key) + k8sElectionMutex.Unlock() cancel() return fmt.Errorf("failed to create leader elector: %w", err) } @@ -187,14 +192,14 @@ func runElection(namespace, leaseName, identity string, // getHolder reads the current Lease holder identity and transitions. func getHolder(namespace, leaseName string) (string, int64, error) { - if err := ensureClientInitialized(); err != nil { + if err := ensureK8sClientInitialized(); err != nil { return "", 0, err } ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() - lease, err := globalClient.CoordinationV1().Leases(namespace).Get(ctx, leaseName, metav1.GetOptions{}) + lease, err := k8sGlobalClient.CoordinationV1().Leases(namespace).Get(ctx, leaseName, metav1.GetOptions{}) if err != nil { return "", 0, fmt.Errorf("failed to get lease: %w", err) } @@ -221,7 +226,7 @@ func getHolder(namespace, leaseName string) (string, int64, error) { //export K8sLeaseInit func K8sLeaseInit(errMsg **C.char) C.int { - if err := ensureClientInitialized(); err != nil { + if err := ensureK8sClientInitialized(); err != nil { *errMsg = C.CString(err.Error()) return -1 } @@ -256,9 +261,9 @@ func K8sLeaseWaitElected( ) C.int { key := electionKey(C.GoString(ns), C.GoString(leaseName)) - electionMutex.Lock() - state, exists := elections[key] - electionMutex.Unlock() + k8sElectionMutex.Lock() + state, exists := k8sElections[key] + k8sElectionMutex.Unlock() if !exists { *errMsg = C.CString("no election running for " + key) @@ -290,9 +295,9 @@ func K8sLeaseWaitLost( ) C.int { key := electionKey(C.GoString(ns), C.GoString(leaseName)) - electionMutex.Lock() - state, exists := elections[key] - electionMutex.Unlock() + k8sElectionMutex.Lock() + state, exists := k8sElections[key] + k8sElectionMutex.Unlock() if !exists { // Already cleaned up by OnStoppedLeading — election is over. @@ -315,9 +320,9 @@ func K8sLeaseCancelElection( ) C.int { key := electionKey(C.GoString(ns), C.GoString(leaseName)) - electionMutex.Lock() - state, exists := elections[key] - electionMutex.Unlock() + k8sElectionMutex.Lock() + state, exists := k8sElections[key] + k8sElectionMutex.Unlock() if !exists { // Idempotent — no error if no election @@ -374,27 +379,27 @@ func K8sLeaseWatchHolder( *errMsg = C.CString("callback function is nil") return -1 } - if err := ensureClientInitialized(); err != nil { + if err := ensureK8sClientInitialized(); err != nil { *errMsg = C.CString(err.Error()) return -1 } - watchMutex.Lock() - if _, exists := watches[key]; exists { - watchMutex.Unlock() + k8sWatchMutex.Lock() + if _, exists := k8sWatches[key]; exists { + k8sWatchMutex.Unlock() *errMsg = C.CString("watch already running for " + key) return -1 } ctx, cancel := context.WithCancel(context.Background()) - watches[key] = &watchState{cancel: cancel} - watchMutex.Unlock() + k8sWatches[key] = &watchState{cancel: cancel} + k8sWatchMutex.Unlock() go func() { defer func() { - watchMutex.Lock() - delete(watches, key) - watchMutex.Unlock() + k8sWatchMutex.Lock() + delete(k8sWatches, key) + k8sWatchMutex.Unlock() }() for { @@ -404,7 +409,7 @@ func K8sLeaseWatchHolder( default: } - watcher, err := globalClient.CoordinationV1().Leases(nsStr).Watch(ctx, metav1.ListOptions{ + watcher, err := k8sGlobalClient.CoordinationV1().Leases(nsStr).Watch(ctx, metav1.ListOptions{ FieldSelector: "metadata.name=" + ln, }) if err != nil { @@ -475,9 +480,9 @@ func K8sLeaseCancelWatch( ) C.int { key := electionKey(C.GoString(ns), C.GoString(leaseName)) - watchMutex.Lock() - state, exists := watches[key] - watchMutex.Unlock() + k8sWatchMutex.Lock() + state, exists := k8sWatches[key] + k8sWatchMutex.Unlock() if !exists { // Idempotent @@ -491,7 +496,7 @@ func K8sLeaseCancelWatch( // patchPodLabel sets or removes a label on a pod using a JSON merge patch. // If value is non-nil, the label is set; if nil, the label is removed. func patchPodLabel(namespace, podName, labelKey string, value interface{}) error { - if err := ensureClientInitialized(); err != nil { + if err := ensureK8sClientInitialized(); err != nil { return err } @@ -510,7 +515,7 @@ func patchPodLabel(namespace, podName, labelKey string, value interface{}) error ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() - _, err = globalClient.CoreV1().Pods(namespace).Patch( + _, err = k8sGlobalClient.CoreV1().Pods(namespace).Patch( ctx, podName, types.MergePatchType, patchBytes, metav1.PatchOptions{}) if err != nil { return fmt.Errorf("failed to patch pod label: %w", err) @@ -545,5 +550,3 @@ func K8sRemovePodLabel( } return 0 } - -func main() {} diff --git a/mooncake-common/k8s-lease/k8s_lease_wrapper_test.go b/mooncake-common/ha-wrapper/k8s_lease_wrapper_test.go similarity index 94% rename from mooncake-common/k8s-lease/k8s_lease_wrapper_test.go rename to mooncake-common/ha-wrapper/k8s_lease_wrapper_test.go index 47b50e0276..59d27e347d 100644 --- a/mooncake-common/k8s-lease/k8s_lease_wrapper_test.go +++ b/mooncake-common/ha-wrapper/k8s_lease_wrapper_test.go @@ -14,21 +14,21 @@ import ( "k8s.io/utils/ptr" ) -// swapClient replaces globalClient and returns the old one. +// swapClient replaces k8sGlobalClient and returns the old one. func swapClient(newClient kubernetes.Interface) kubernetes.Interface { - clientMutex.Lock() - defer clientMutex.Unlock() - old := globalClient - globalClient = newClient + k8sClientMutex.Lock() + defer k8sClientMutex.Unlock() + old := k8sGlobalClient + k8sGlobalClient = newClient return old } -// swapInitClientFn replaces initClientFn and returns the old one. +// swapInitClientFn replaces k8sInitClientFn and returns the old one. func swapInitClientFn(newFn func() error) func() error { - clientMutex.Lock() - defer clientMutex.Unlock() - old := initClientFn - initClientFn = newFn + k8sClientMutex.Lock() + defer k8sClientMutex.Unlock() + old := k8sInitClientFn + k8sInitClientFn = newFn return old } @@ -77,9 +77,9 @@ func TestPatchPodLabelAutoInitializesClient(t *testing.T) { initCalls := 0 oldInitFn := swapInitClientFn(func() error { initCalls++ - clientMutex.Lock() - globalClient = fakeClient - clientMutex.Unlock() + k8sClientMutex.Lock() + k8sGlobalClient = fakeClient + k8sClientMutex.Unlock() return nil }) defer swapInitClientFn(oldInitFn) @@ -89,7 +89,7 @@ func TestPatchPodLabelAutoInitializesClient(t *testing.T) { t.Fatalf("patchPodLabel(set) failed: %v", err) } if initCalls != 1 { - t.Fatalf("initClientFn calls = %d, want 1", initCalls) + t.Fatalf("k8sInitClientFn calls = %d, want 1", initCalls) } pod, err := fakeClient.CoreV1().Pods("default").Get( @@ -105,7 +105,7 @@ func TestPatchPodLabelAutoInitializesClient(t *testing.T) { t.Fatalf("patchPodLabel(remove) failed: %v", err) } if initCalls != 1 { - t.Fatalf("initClientFn calls after second patch = %d, want 1", initCalls) + t.Fatalf("k8sInitClientFn calls after second patch = %d, want 1", initCalls) } pod, err = fakeClient.CoreV1().Pods("default").Get( diff --git a/mooncake-common/k8s-lease/CMakeLists.txt b/mooncake-common/k8s-lease/CMakeLists.txt deleted file mode 100644 index ed555f17b0..0000000000 --- a/mooncake-common/k8s-lease/CMakeLists.txt +++ /dev/null @@ -1,21 +0,0 @@ -add_custom_command( - OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/libk8s_lease_wrapper.so - ${CMAKE_CURRENT_BINARY_DIR}/libk8s_lease_wrapper.h - COMMAND bash -c "go mod tidy" && bash -c "go build -buildmode=c-shared -o ${CMAKE_CURRENT_BINARY_DIR}/libk8s_lease_wrapper.so k8s_lease_wrapper.go" && cp ${CMAKE_CURRENT_BINARY_DIR}/libk8s_lease_wrapper.h ${CMAKE_CURRENT_SOURCE_DIR} - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} - COMMENT "Building K8s Lease Go shared library" - DEPENDS k8s_lease_wrapper.go -) - -set(K8S_LEASE_WRAPPER_INCLUDE ${CMAKE_CURRENT_BINARY_DIR}/libk8s_lease_wrapper.h) -set(K8S_LEASE_WRAPPER_LIB ${CMAKE_CURRENT_BINARY_DIR}/libk8s_lease_wrapper.so) - -add_custom_target( - build_k8s_lease_wrapper - DEPENDS ${K8S_LEASE_WRAPPER_LIB} -) - -install( - FILES ${K8S_LEASE_WRAPPER_LIB} - DESTINATION lib -) diff --git a/mooncake-common/k8s-lease/cmd/envtest-server/main.go b/mooncake-common/k8s-lease/cmd/envtest-server/main.go deleted file mode 100644 index efda29de61..0000000000 --- a/mooncake-common/k8s-lease/cmd/envtest-server/main.go +++ /dev/null @@ -1,61 +0,0 @@ -// envtest-server starts a real kube-apiserver + etcd via envtest, writes the -// KUBECONFIG path to stdout, and blocks until SIGTERM or SIGINT. This lets -// C++ tests launch it as a subprocess and talk to a real K8s API without a -// full cluster. -package main - -import ( - "fmt" - "os" - "os/signal" - "path/filepath" - "syscall" - - "k8s.io/client-go/tools/clientcmd" - clientcmdapi "k8s.io/client-go/tools/clientcmd/api" - "sigs.k8s.io/controller-runtime/pkg/envtest" -) - -func main() { - env := &envtest.Environment{} - - cfg, err := env.Start() - if err != nil { - fmt.Fprintf(os.Stderr, "envtest start failed: %v\n", err) - os.Exit(1) - } - - // Write a KUBECONFIG file that points at the envtest kube-apiserver. - kubeconfigPath := filepath.Join(os.TempDir(), fmt.Sprintf("envtest-kubeconfig-%d", os.Getpid())) - kubeconfig := clientcmdapi.NewConfig() - kubeconfig.Clusters["envtest"] = &clientcmdapi.Cluster{ - Server: cfg.Host, - CertificateAuthorityData: cfg.CAData, - } - kubeconfig.AuthInfos["envtest"] = &clientcmdapi.AuthInfo{ - ClientCertificateData: cfg.CertData, - ClientKeyData: cfg.KeyData, - } - kubeconfig.Contexts["envtest"] = &clientcmdapi.Context{ - Cluster: "envtest", - AuthInfo: "envtest", - } - kubeconfig.CurrentContext = "envtest" - - if err := clientcmd.WriteToFile(*kubeconfig, kubeconfigPath); err != nil { - fmt.Fprintf(os.Stderr, "failed to write kubeconfig: %v\n", err) - env.Stop() - os.Exit(1) - } - - // Print the kubeconfig path — the parent process reads this from stdout. - fmt.Println(kubeconfigPath) - - // Block until SIGTERM or SIGINT. - sigCh := make(chan os.Signal, 1) - signal.Notify(sigCh, syscall.SIGTERM, syscall.SIGINT) - <-sigCh - - os.Remove(kubeconfigPath) - env.Stop() -} diff --git a/mooncake-common/k8s-lease/go.mod b/mooncake-common/k8s-lease/go.mod deleted file mode 100644 index 4bfc205264..0000000000 --- a/mooncake-common/k8s-lease/go.mod +++ /dev/null @@ -1,60 +0,0 @@ -module github.com/kvcache-ai/Mooncake/mooncake-common/k8s-lease - -go 1.24.0 - -require ( - k8s.io/api v0.34.3 - k8s.io/apimachinery v0.34.3 - k8s.io/client-go v0.34.3 - k8s.io/utils v0.0.0-20251002143259-bc988d571ff4 - sigs.k8s.io/controller-runtime v0.22.5 -) - -require ( - github.com/beorn7/perks v1.0.1 // indirect - github.com/cespare/xxhash/v2 v2.3.0 // indirect - github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect - github.com/emicklei/go-restful/v3 v3.12.2 // indirect - github.com/evanphx/json-patch/v5 v5.9.11 // indirect - github.com/fxamacker/cbor/v2 v2.9.0 // indirect - github.com/go-logr/logr v1.4.3 // indirect - github.com/go-openapi/jsonpointer v0.21.0 // indirect - github.com/go-openapi/jsonreference v0.20.2 // indirect - github.com/go-openapi/swag v0.23.0 // indirect - github.com/gogo/protobuf v1.3.2 // indirect - github.com/google/gnostic-models v0.7.0 // indirect - github.com/google/go-cmp v0.7.0 // indirect - github.com/google/uuid v1.6.0 // indirect - github.com/josharian/intern v1.0.0 // indirect - github.com/json-iterator/go v1.1.12 // indirect - github.com/mailru/easyjson v0.7.7 // indirect - github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect - github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee // indirect - github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect - github.com/pmezard/go-difflib v1.0.0 // indirect - github.com/prometheus/client_golang v1.23.2 // indirect - github.com/prometheus/client_model v0.6.2 // indirect - github.com/prometheus/common v0.66.1 // indirect - github.com/prometheus/procfs v0.16.1 // indirect - github.com/spf13/pflag v1.0.9 // indirect - github.com/x448/float16 v0.8.4 // indirect - go.yaml.in/yaml/v2 v2.4.3 // indirect - go.yaml.in/yaml/v3 v3.0.4 // indirect - golang.org/x/net v0.47.0 // indirect - golang.org/x/oauth2 v0.30.0 // indirect - golang.org/x/sys v0.38.0 // indirect - golang.org/x/term v0.37.0 // indirect - golang.org/x/text v0.31.0 // indirect - golang.org/x/time v0.9.0 // indirect - google.golang.org/protobuf v1.36.8 // indirect - gopkg.in/evanphx/json-patch.v4 v4.13.0 // indirect - gopkg.in/inf.v0 v0.9.1 // indirect - gopkg.in/yaml.v3 v3.0.1 // indirect - k8s.io/apiextensions-apiserver v0.34.3 // indirect - k8s.io/klog/v2 v2.130.1 // indirect - k8s.io/kube-openapi v0.0.0-20250910181357-589584f1c912 // indirect - sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 // indirect - sigs.k8s.io/randfill v1.0.0 // indirect - sigs.k8s.io/structured-merge-diff/v6 v6.3.2-0.20260122202528-d9cc6641c482 // indirect - sigs.k8s.io/yaml v1.6.0 // indirect -) diff --git a/mooncake-common/k8s-lease/k8s_lease_integration_test.go b/mooncake-common/k8s-lease/k8s_lease_integration_test.go deleted file mode 100644 index 5db713fe14..0000000000 --- a/mooncake-common/k8s-lease/k8s_lease_integration_test.go +++ /dev/null @@ -1,571 +0,0 @@ -//go:build integration - -package main - -import ( - "context" - "fmt" - "os" - "sync" - "testing" - "time" - - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/client-go/kubernetes" - "k8s.io/client-go/rest" - "k8s.io/client-go/tools/leaderelection" - "k8s.io/client-go/tools/leaderelection/resourcelock" - - "sigs.k8s.io/controller-runtime/pkg/envtest" -) - -var ( - testEnv *envtest.Environment - testConfig *rest.Config -) - -type electionStateNoRelease struct { - cancel context.CancelFunc - elected chan struct{} - lost chan struct{} -} - -func TestMain(m *testing.M) { - testEnv = &envtest.Environment{} - - var err error - testConfig, err = testEnv.Start() - if err != nil { - fmt.Fprintf(os.Stderr, "failed to start envtest: %v\n", err) - os.Exit(1) - } - - // Set up global client for the wrapper - client, err := kubernetes.NewForConfig(testConfig) - if err != nil { - fmt.Fprintf(os.Stderr, "failed to create clientset: %v\n", err) - testEnv.Stop() - os.Exit(1) - } - clientMutex.Lock() - globalClient = client - clientMutex.Unlock() - - code := m.Run() - - testEnv.Stop() - os.Exit(code) -} - -func runElectionWithoutRelease(namespace, leaseName, identity string, - leaseDurationSec, renewDeadlineSec, retryPeriodSec int) (*electionStateNoRelease, error) { - if err := ensureClientInitialized(); err != nil { - return nil, err - } - - ctx, cancel := context.WithCancel(context.Background()) - state := &electionStateNoRelease{ - cancel: cancel, - elected: make(chan struct{}), - lost: make(chan struct{}), - } - - lock := &resourcelock.LeaseLock{ - LeaseMeta: metav1.ObjectMeta{ - Name: leaseName, - Namespace: namespace, - }, - Client: globalClient.CoordinationV1(), - LockConfig: resourcelock.ResourceLockConfig{ - Identity: identity, - }, - } - - le, err := leaderelection.NewLeaderElector(leaderelection.LeaderElectionConfig{ - Lock: lock, - LeaseDuration: time.Duration(leaseDurationSec) * time.Second, - RenewDeadline: time.Duration(renewDeadlineSec) * time.Second, - RetryPeriod: time.Duration(retryPeriodSec) * time.Second, - ReleaseOnCancel: false, - Callbacks: leaderelection.LeaderCallbacks{ - OnStartedLeading: func(ctx context.Context) { - close(state.elected) - <-ctx.Done() - }, - OnStoppedLeading: func() { - close(state.lost) - }, - }, - }) - if err != nil { - cancel() - return nil, fmt.Errorf("failed to create leader elector: %w", err) - } - - go le.Run(ctx) - return state, nil -} - -// TestSingleLeaderElection verifies a single candidate becomes leader. -func TestSingleLeaderElection(t *testing.T) { - ns := "default" - lease := "single-election-test" - identity := "node-1:8080" - - err := runElection(ns, lease, identity, 5, 4, 1) - if err != nil { - t.Fatalf("runElection failed: %v", err) - } - - // Wait for elected - key := electionKey(ns, lease) - electionMutex.Lock() - state := elections[key] - electionMutex.Unlock() - - select { - case <-state.elected: - // success - case <-time.After(15 * time.Second): - t.Fatal("timed out waiting for election") - } - - // Verify holder via getHolder - holder, transitions, err := getHolder(ns, lease) - if err != nil { - t.Fatalf("getHolder failed: %v", err) - } - if holder != identity { - t.Errorf("expected holder %q, got %q", identity, holder) - } - // First election — transitions should be 0 or 1 - if transitions < 0 { - t.Errorf("expected non-negative transitions, got %d", transitions) - } - - // Cancel the election - electionMutex.Lock() - state = elections[key] - electionMutex.Unlock() - state.cancel() - - select { - case <-state.lost: - // success - case <-time.After(10 * time.Second): - t.Fatal("timed out waiting for election loss after cancel") - } -} - -// TestLeaderEpoch verifies leaseTransitions increments across elections. -func TestLeaderEpoch(t *testing.T) { - ns := "default" - lease := "epoch-test" - - // First election - err := runElection(ns, lease, "node-epoch-1:8080", 5, 4, 1) - if err != nil { - t.Fatalf("first runElection failed: %v", err) - } - - key := electionKey(ns, lease) - electionMutex.Lock() - state1 := elections[key] - electionMutex.Unlock() - - select { - case <-state1.elected: - case <-time.After(15 * time.Second): - t.Fatal("timed out on first election") - } - - _, trans1, _ := getHolder(ns, lease) - - // Cancel first election and wait for loss - state1.cancel() - select { - case <-state1.lost: - case <-time.After(10 * time.Second): - t.Fatal("timed out waiting for first election loss") - } - - // Wait for lease to expire / be released - time.Sleep(2 * time.Second) - - // Second election - err = runElection(ns, lease, "node-epoch-2:8080", 5, 4, 1) - if err != nil { - t.Fatalf("second runElection failed: %v", err) - } - - electionMutex.Lock() - state2 := elections[key] - electionMutex.Unlock() - - select { - case <-state2.elected: - case <-time.After(15 * time.Second): - t.Fatal("timed out on second election") - } - - _, trans2, _ := getHolder(ns, lease) - if trans2 <= trans1 { - t.Errorf("expected transitions to increment: first=%d, second=%d", trans1, trans2) - } - - state2.cancel() - select { - case <-state2.lost: - case <-time.After(10 * time.Second): - t.Fatal("timed out waiting for second election loss") - } -} - -// TestSequentialLeadershipHandoff tests that a second candidate can acquire -// leadership after the first one releases it. -func TestSequentialLeadershipHandoff(t *testing.T) { - ns := "default" - lease := "two-candidate-test" - - err1 := runElection(ns, lease, "candidate-a:8080", 5, 4, 1) - if err1 != nil { - t.Fatalf("first runElection failed: %v", err1) - } - - key := electionKey(ns, lease) - electionMutex.Lock() - stateA := elections[key] - electionMutex.Unlock() - - // Wait for first candidate to win - select { - case <-stateA.elected: - case <-time.After(15 * time.Second): - t.Fatal("timed out waiting for first candidate") - } - - // Verify holder is candidate-a - holder, _, err := getHolder(ns, lease) - if err != nil { - t.Fatalf("getHolder failed: %v", err) - } - if holder != "candidate-a:8080" { - t.Errorf("expected candidate-a, got %q", holder) - } - - // Cancel candidate-a - stateA.cancel() - select { - case <-stateA.lost: - case <-time.After(10 * time.Second): - t.Fatal("timed out waiting for candidate-a loss") - } - - // Wait for lease to expire - time.Sleep(2 * time.Second) - - // Start candidate-b - err2 := runElection(ns, lease, "candidate-b:8080", 5, 4, 1) - if err2 != nil { - t.Fatalf("second runElection failed: %v", err2) - } - - electionMutex.Lock() - stateB := elections[key] - electionMutex.Unlock() - - select { - case <-stateB.elected: - case <-time.After(15 * time.Second): - t.Fatal("timed out waiting for candidate-b") - } - - holder, _, err = getHolder(ns, lease) - if err != nil { - t.Fatalf("getHolder after takeover failed: %v", err) - } - if holder != "candidate-b:8080" { - t.Errorf("expected candidate-b, got %q", holder) - } - - stateB.cancel() - select { - case <-stateB.lost: - case <-time.After(10 * time.Second): - t.Fatal("timed out waiting for candidate-b loss") - } -} - -// TestConcurrentCandidateElection starts two candidates simultaneously and -// verifies that exactly one wins leadership. -func TestConcurrentCandidateElection(t *testing.T) { - ns := "default" - lease := "concurrent-election-test" - - type result struct { - identity string - elected bool - } - - candidates := []string{"candidate-a:8080", "candidate-b:8080"} - results := make(chan result, len(candidates)) - - lock := func(identity string) *resourcelock.LeaseLock { - return &resourcelock.LeaseLock{ - LeaseMeta: metav1.ObjectMeta{ - Name: lease, - Namespace: ns, - }, - Client: globalClient.CoordinationV1(), - LockConfig: resourcelock.ResourceLockConfig{ - Identity: identity, - }, - } - } - - var wg sync.WaitGroup - for _, id := range candidates { - wg.Add(1) - go func(identity string) { - defer wg.Done() - - // Short timeout: enough for one to acquire, but the loser - // times out before the winner's lease could expire. - ctx, cancel := context.WithTimeout(context.Background(), 8*time.Second) - defer cancel() - - elected := make(chan struct{}) - le, err := leaderelection.NewLeaderElector(leaderelection.LeaderElectionConfig{ - Lock: lock(identity), - LeaseDuration: 5 * time.Second, - RenewDeadline: 3 * time.Second, - RetryPeriod: 1 * time.Second, - ReleaseOnCancel: true, - Callbacks: leaderelection.LeaderCallbacks{ - OnStartedLeading: func(ctx context.Context) { - close(elected) - <-ctx.Done() - }, - OnStoppedLeading: func() {}, - }, - }) - if err != nil { - t.Errorf("NewLeaderElector(%s): %v", identity, err) - return - } - - go le.Run(ctx) - - select { - case <-elected: - results <- result{identity, true} - // Keep holding until context expires (8s total). - // Winner does NOT release early, so loser cannot - // re-acquire within its own 8s window. - <-ctx.Done() - case <-ctx.Done(): - results <- result{identity, false} - } - }(id) - } - - wg.Wait() - close(results) - - winners := 0 - for r := range results { - if r.elected { - winners++ - t.Logf("winner: %s", r.identity) - } - } - - if winners != 1 { - t.Fatalf("expected exactly 1 winner, got %d", winners) - } -} - -// TestCancelElection tests that cancelling an election makes WaitLost return. -func TestCancelElection(t *testing.T) { - ns := "default" - lease := "cancel-test" - - err := runElection(ns, lease, "cancel-node:8080", 5, 4, 1) - if err != nil { - t.Fatalf("runElection failed: %v", err) - } - - key := electionKey(ns, lease) - electionMutex.Lock() - state := elections[key] - electionMutex.Unlock() - - // Wait for elected - select { - case <-state.elected: - case <-time.After(15 * time.Second): - t.Fatal("timed out waiting for election") - } - - // Cancel - state.cancel() - - // WaitLost should return promptly - select { - case <-state.lost: - // success - case <-time.After(10 * time.Second): - t.Fatal("WaitLost did not return after cancel") - } -} - -// TestGetHolderDuringElection verifies getHolder works while election is active. -func TestGetHolderDuringElection(t *testing.T) { - ns := "default" - lease := "active-get-holder-test" - identity := "active-node:8080" - - err := runElection(ns, lease, identity, 5, 4, 1) - if err != nil { - t.Fatalf("runElection failed: %v", err) - } - - key := electionKey(ns, lease) - electionMutex.Lock() - state := elections[key] - electionMutex.Unlock() - - select { - case <-state.elected: - case <-time.After(15 * time.Second): - t.Fatal("timed out waiting for election") - } - - // Concurrent getHolder calls during active election - var wg sync.WaitGroup - for i := 0; i < 5; i++ { - wg.Add(1) - go func() { - defer wg.Done() - holder, _, err := getHolder(ns, lease) - if err != nil { - t.Errorf("getHolder during election failed: %v", err) - return - } - if holder != identity { - t.Errorf("expected %q, got %q", identity, holder) - } - }() - } - wg.Wait() - - state.cancel() - <-state.lost -} - -// TestGetHolderReturnsEmptyAfterLeaderDeath verifies that after a leader stops -// renewing its lease without releasing it, getHolder returns an empty holder -// once the lease expires. This is the integration-level counterpart to the -// unit test TestGetHolderReturnsEmptyForExpiredLease. -func TestGetHolderReturnsEmptyAfterLeaderDeath(t *testing.T) { - ns := "default" - lease := "expired-leader-test" - identity := "doomed-leader:8080" - - // Acquire leadership without ReleaseOnCancel so canceling simulates a dead - // leader that stops renewing and leaves the old holder until expiry. - state, err := runElectionWithoutRelease(ns, lease, identity, 5, 4, 1) - if err != nil { - t.Fatalf("runElection failed: %v", err) - } - - select { - case <-state.elected: - case <-time.After(15 * time.Second): - t.Fatal("timed out waiting for election") - } - - // Verify holder while active. - holder, _, err := getHolder(ns, lease) - if err != nil { - t.Fatalf("getHolder (active) failed: %v", err) - } - if holder != identity { - t.Fatalf("expected active holder %q, got %q", identity, holder) - } - - // Simulate leader death: stop renewing without explicitly releasing. - state.cancel() - select { - case <-state.lost: - case <-time.After(10 * time.Second): - t.Fatal("timed out waiting for loss") - } - - // Wait for the lease to expire (leaseDuration=5s, add margin). - time.Sleep(7 * time.Second) - - // After expiry, getHolder must return empty holder so that the - // supervisor will attempt acquisition. - holder, _, err = getHolder(ns, lease) - if err != nil { - t.Fatalf("getHolder (expired) failed: %v", err) - } - if holder != "" { - t.Errorf("expected empty holder after lease expiry, got %q", holder) - } -} - -// TestFailoverAfterLeaderDeath verifies that a new candidate can acquire -// leadership after the previous leader dies and its lease expires. -func TestFailoverAfterLeaderDeath(t *testing.T) { - ns := "default" - lease := "failover-test" - - // First leader acquires without ReleaseOnCancel so canceling leaves the - // old holder in place until the lease naturally expires. - state1, err := runElectionWithoutRelease(ns, lease, "leader-1:8080", 5, 4, 1) - if err != nil { - t.Fatalf("first runElection failed: %v", err) - } - - select { - case <-state1.elected: - case <-time.After(15 * time.Second): - t.Fatal("timed out waiting for first election") - } - - // Simulate crash: cancel without release, wait for expiry. - state1.cancel() - <-state1.lost - time.Sleep(7 * time.Second) - - // Second candidate should be able to acquire. - err = runElection(ns, lease, "leader-2:8080", 5, 4, 1) - if err != nil { - t.Fatalf("second runElection failed: %v", err) - } - - key := electionKey(ns, lease) - electionMutex.Lock() - state2 := elections[key] - electionMutex.Unlock() - - select { - case <-state2.elected: - // success — failover worked - case <-time.After(15 * time.Second): - t.Fatal("second candidate failed to acquire after leader death") - } - - holder, _, err := getHolder(ns, lease) - if err != nil { - t.Fatalf("getHolder after failover failed: %v", err) - } - if holder != "leader-2:8080" { - t.Errorf("expected new leader %q, got %q", "leader-2:8080", holder) - } - - state2.cancel() - <-state2.lost -} diff --git a/mooncake-p2p-store/build.sh b/mooncake-p2p-store/build.sh index 774e6ba688..44356b58a2 100644 --- a/mooncake-p2p-store/build.sh +++ b/mooncake-p2p-store/build.sh @@ -61,7 +61,7 @@ if [ "$USE_ETCD" = "ON" ]; then if [ "$USE_ETCD_LEGACY" = "ON" ]; then EXT_LDFLAGS+=" -letcd-cpp-api -lprotobuf -lgrpc++ -lgrpc" else - EXT_LDFLAGS+=" -L$BUILD_DIR/mooncake-common/etcd -letcd_wrapper" + EXT_LDFLAGS+=" -L$BUILD_DIR/mooncake-common/ha-wrapper -lmooncake_ha_wrapper" fi fi diff --git a/mooncake-store/CMakeLists.txt b/mooncake-store/CMakeLists.txt index 5cf0f8a4ea..49fc1805e9 100644 --- a/mooncake-store/CMakeLists.txt +++ b/mooncake-store/CMakeLists.txt @@ -4,74 +4,77 @@ project(MooncakeStore VERSION 2.0.0) set(MOONCAKE_STORE_VERSION ${PROJECT_VERSION}) # Read display version from mooncake-wheel/pyproject.toml -set(PYPROJECT_TOML_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../mooncake-wheel/pyproject.toml") +set(PYPROJECT_TOML_PATH + "${CMAKE_CURRENT_SOURCE_DIR}/../mooncake-wheel/pyproject.toml") if(EXISTS "${PYPROJECT_TOML_PATH}") - file(READ "${PYPROJECT_TOML_PATH}" PYPROJECT_CONTENT) - string(REGEX MATCH "version = \"([^\"]+)\"" _ "${PYPROJECT_CONTENT}") - set(MOONCAKE_DISPLAY_VERSION_BASE "${CMAKE_MATCH_1}") + file(READ "${PYPROJECT_TOML_PATH}" PYPROJECT_CONTENT) + string(REGEX MATCH "version = \"([^\"]+)\"" _ "${PYPROJECT_CONTENT}") + set(MOONCAKE_DISPLAY_VERSION_BASE "${CMAKE_MATCH_1}") endif() if(NOT MOONCAKE_DISPLAY_VERSION_BASE) - message(WARNING "Could not read display version from ${PYPROJECT_TOML_PATH}; falling back to MOONCAKE_STORE_VERSION") - set(MOONCAKE_DISPLAY_VERSION_BASE "${MOONCAKE_STORE_VERSION}") + message( + WARNING + "Could not read display version from ${PYPROJECT_TOML_PATH}; falling back to MOONCAKE_STORE_VERSION" + ) + set(MOONCAKE_DISPLAY_VERSION_BASE "${MOONCAKE_STORE_VERSION}") endif() # Get short git commit hash for display find_package(Git QUIET) if(GIT_FOUND) - execute_process( - COMMAND ${GIT_EXECUTABLE} rev-parse --short HEAD - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} - OUTPUT_VARIABLE GIT_SHORT_HASH - OUTPUT_STRIP_TRAILING_WHITESPACE - ERROR_QUIET - ) + execute_process( + COMMAND ${GIT_EXECUTABLE} rev-parse --short HEAD + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + OUTPUT_VARIABLE GIT_SHORT_HASH + OUTPUT_STRIP_TRAILING_WHITESPACE ERROR_QUIET) endif() if(NOT GIT_SHORT_HASH) - set(GIT_SHORT_HASH "unknown") + set(GIT_SHORT_HASH "unknown") endif() -set(MOONCAKE_DISPLAY_VERSION "${MOONCAKE_DISPLAY_VERSION_BASE} (git: ${GIT_SHORT_HASH})") +set(MOONCAKE_DISPLAY_VERSION + "${MOONCAKE_DISPLAY_VERSION_BASE} (git: ${GIT_SHORT_HASH})") configure_file(include/version.h.in include/version.h @ONLY) -if (STORE_USE_ETCD) - set(ETCD_WRAPPER_INCLUDE ${CMAKE_CURRENT_BINARY_DIR}/../mooncake-common/etcd/) - set(ETCD_WRAPPER_LIB ${CMAKE_CURRENT_BINARY_DIR}/../mooncake-common/etcd/libetcd_wrapper.so) +if((USE_ETCD AND NOT USE_ETCD_LEGACY) + OR STORE_USE_ETCD + OR STORE_USE_K8S_LEASE) + set(ETCD_WRAPPER_INCLUDE ${HA_WRAPPER_INCLUDE}) + set(ETCD_WRAPPER_LIB ${HA_WRAPPER_LIB}) + set(K8S_LEASE_WRAPPER_INCLUDE ${HA_WRAPPER_INCLUDE}) + set(K8S_LEASE_WRAPPER_LIB ${HA_WRAPPER_LIB}) endif() -if (STORE_USE_K8S_LEASE) - set(K8S_LEASE_WRAPPER_INCLUDE ${CMAKE_CURRENT_BINARY_DIR}/../mooncake-common/k8s-lease/) - set(K8S_LEASE_WRAPPER_LIB ${CMAKE_CURRENT_BINARY_DIR}/../mooncake-common/k8s-lease/libk8s_lease_wrapper.so) +if(STORE_USE_REDIS) + find_path(MOONCAKE_STORE_HIREDIS_INCLUDE_DIR hiredis/hiredis.h REQUIRED) + find_library(MOONCAKE_STORE_HIREDIS_LIBRARY hiredis REQUIRED) + message(STATUS "Redis HA backend: Enabled") endif() -if (STORE_USE_REDIS) - find_path(MOONCAKE_STORE_HIREDIS_INCLUDE_DIR hiredis/hiredis.h REQUIRED) - find_library(MOONCAKE_STORE_HIREDIS_LIBRARY hiredis REQUIRED) - message(STATUS "Redis HA backend: Enabled") -endif() - -if (NOT STORE_USE_ETCD AND NOT STORE_USE_REDIS AND NOT STORE_USE_K8S_LEASE) - message(STATUS "Store HA backends are disabled") +if(NOT STORE_USE_ETCD + AND NOT STORE_USE_REDIS + AND NOT STORE_USE_K8S_LEASE) + message(STATUS "Store HA backends are disabled") endif() # Add include directories include_directories( - ${CMAKE_CURRENT_SOURCE_DIR}/include/cachelib_memory_allocator/include - ${CMAKE_CURRENT_SOURCE_DIR}/include/cachelib_memory_allocator/fake_include - ${CMAKE_CURRENT_SOURCE_DIR}/include/cachelib_memory_allocator/ - ${CMAKE_CURRENT_SOURCE_DIR}/include/mooncake-store/proto/ - ${CMAKE_CURRENT_SOURCE_DIR}/include/ - ${CMAKE_CURRENT_BINARY_DIR}/include/ - ${CMAKE_CURRENT_SOURCE_DIR}/../mooncake-transfer-engine/include - ${ETCD_WRAPPER_INCLUDE} -) + ${CMAKE_CURRENT_SOURCE_DIR}/include/cachelib_memory_allocator/include + ${CMAKE_CURRENT_SOURCE_DIR}/include/cachelib_memory_allocator/fake_include + ${CMAKE_CURRENT_SOURCE_DIR}/include/cachelib_memory_allocator/ + ${CMAKE_CURRENT_SOURCE_DIR}/include/mooncake-store/proto/ + ${CMAKE_CURRENT_SOURCE_DIR}/include/ + ${CMAKE_CURRENT_BINARY_DIR}/include/ + ${CMAKE_CURRENT_SOURCE_DIR}/../mooncake-transfer-engine/include + ${ETCD_WRAPPER_INCLUDE}) # Add subdirectories add_subdirectory(src) -if (BUILD_UNIT_TESTS) - add_subdirectory(tests) +if(BUILD_UNIT_TESTS) + add_subdirectory(tests) endif() -if (BUILD_BENCHMARK) - add_subdirectory(benchmarks) +if(BUILD_BENCHMARK) + add_subdirectory(benchmarks) endif() diff --git a/mooncake-store/go/build.sh b/mooncake-store/go/build.sh index 52304cc7e3..a66992b292 100755 --- a/mooncake-store/go/build.sh +++ b/mooncake-store/go/build.sh @@ -54,7 +54,7 @@ if [ "$USE_ETCD" = "ON" ]; then if [ "$USE_ETCD_LEGACY" = "ON" ]; then CGO_LDFLAGS+=" -letcd-cpp-api -lprotobuf -lgrpc++ -lgrpc" else - CGO_LDFLAGS+=" -L${BUILD_DIR}/mooncake-common/etcd -letcd_wrapper" + CGO_LDFLAGS+=" -L${BUILD_DIR}/mooncake-common/ha-wrapper -lmooncake_ha_wrapper" fi fi diff --git a/mooncake-store/include/types.h b/mooncake-store/include/types.h index e2ef20afbc..4e2067ad8e 100644 --- a/mooncake-store/include/types.h +++ b/mooncake-store/include/types.h @@ -17,7 +17,7 @@ #include "ylt/struct_pack.hpp" #ifdef STORE_USE_ETCD -#include "libetcd_wrapper.h" +#include "libmooncake_ha_wrapper.h" #endif namespace mooncake { diff --git a/mooncake-store/rust/README.md b/mooncake-store/rust/README.md index 65d4912525..48e4551f1b 100644 --- a/mooncake-store/rust/README.md +++ b/mooncake-store/rust/README.md @@ -51,7 +51,7 @@ directory, add the relevant build outputs to `LD_LIBRARY_PATH`: ```bash export LD_LIBRARY_PATH=$MOONCAKE_BUILD_DIR/mooncake-common:\ -$MOONCAKE_BUILD_DIR/mooncake-common/etcd:\ +$MOONCAKE_BUILD_DIR/mooncake-common/ha-wrapper:\ $MOONCAKE_BUILD_DIR/mooncake-store/src:\ $MOONCAKE_BUILD_DIR/mooncake-store/src/cachelib_memory_allocator:\ $MOONCAKE_BUILD_DIR/mooncake-transfer-engine/src:\ diff --git a/mooncake-store/rust/build.rs b/mooncake-store/rust/build.rs index 5622d9550a..d8588f8c11 100644 --- a/mooncake-store/rust/build.rs +++ b/mooncake-store/rust/build.rs @@ -257,7 +257,7 @@ fn main() { build_dir.join("mooncake-transfer-engine/src"), build_dir.join("mooncake-transfer-engine/src/common/base"), build_dir.join("mooncake-asio"), - build_dir.join("mooncake-common/etcd"), + build_dir.join("mooncake-common/ha-wrapper"), ] { push_existing_dir(&mut search_dirs, dir); } @@ -272,7 +272,7 @@ fn main() { default_build_dir.join("mooncake-transfer-engine/src/common/base"), default_build_dir.join("mooncake-asio"), default_build_dir.join("mooncake-common"), - default_build_dir.join("mooncake-common/etcd"), + default_build_dir.join("mooncake-common/ha-wrapper"), PathBuf::from("/usr/local/lib"), PathBuf::from("/usr/lib/x86_64-linux-gnu"), PathBuf::from("/lib/x86_64-linux-gnu"), diff --git a/mooncake-store/src/CMakeLists.txt b/mooncake-store/src/CMakeLists.txt index e2c2830c38..29cbaae3bb 100644 --- a/mooncake-store/src/CMakeLists.txt +++ b/mooncake-store/src/CMakeLists.txt @@ -155,7 +155,7 @@ if(STORE_USE_K8S_LEASE) k8s_lease_helper.cpp PROPERTIES OBJECT_DEPENDS - "${CMAKE_BINARY_DIR}/mooncake-common/k8s-lease/libk8s_lease_wrapper.h") + "${CMAKE_BINARY_DIR}/mooncake-common/ha-wrapper/libmooncake_ha_wrapper.h") endif() if(USE_NOF) @@ -260,11 +260,10 @@ target_link_libraries( ${SPDK_STATIC_LIBS} asio_shared PRIVATE transfer_engine) -if(STORE_USE_ETCD) - add_dependencies(mooncake_store build_etcd_wrapper) -endif() -if(STORE_USE_K8S_LEASE) - add_dependencies(mooncake_store build_k8s_lease_wrapper) +if((USE_ETCD AND NOT USE_ETCD_LEGACY) + OR STORE_USE_ETCD + OR STORE_USE_K8S_LEASE) + add_dependencies(mooncake_store build_ha_wrapper) endif() if(URING_LIB AND URING_INCLUDE) @@ -302,8 +301,10 @@ target_link_libraries( ${MASTER_EXTRA_LIBS} asio_shared) -if(STORE_USE_ETCD) - add_dependencies(mooncake_master build_etcd_wrapper) +if((USE_ETCD AND NOT USE_ETCD_LEGACY) + OR STORE_USE_ETCD + OR STORE_USE_K8S_LEASE) + add_dependencies(mooncake_master build_ha_wrapper) endif() # Client server binary diff --git a/mooncake-store/src/etcd_helper.cpp b/mooncake-store/src/etcd_helper.cpp index ba096c720e..58d8b62c5a 100644 --- a/mooncake-store/src/etcd_helper.cpp +++ b/mooncake-store/src/etcd_helper.cpp @@ -1,7 +1,7 @@ #include "etcd_helper.h" #ifdef STORE_USE_ETCD -#include "libetcd_wrapper.h" +#include "libmooncake_ha_wrapper.h" #endif #include diff --git a/mooncake-store/src/k8s_lease_helper.cpp b/mooncake-store/src/k8s_lease_helper.cpp index 301fc31de6..1e80379f35 100644 --- a/mooncake-store/src/k8s_lease_helper.cpp +++ b/mooncake-store/src/k8s_lease_helper.cpp @@ -1,7 +1,7 @@ #include "k8s_lease_helper.h" #ifdef STORE_USE_K8S_LEASE -#include "libk8s_lease_wrapper.h" +#include "libmooncake_ha_wrapper.h" #endif #include diff --git a/mooncake-transfer-engine/src/CMakeLists.txt b/mooncake-transfer-engine/src/CMakeLists.txt index 1bd77e1d80..ee558e8ab9 100644 --- a/mooncake-transfer-engine/src/CMakeLists.txt +++ b/mooncake-transfer-engine/src/CMakeLists.txt @@ -29,10 +29,8 @@ if(USE_ETCD) target_link_libraries(transfer_engine PUBLIC etcd-cpp-api) endif() else() - add_dependencies(transfer_engine build_etcd_wrapper) - set(ETCD_WRAPPER_LIB - ${CMAKE_CURRENT_BINARY_DIR}/../../mooncake-common/etcd/libetcd_wrapper.so - ) + add_dependencies(transfer_engine build_ha_wrapper) + set(ETCD_WRAPPER_LIB ${HA_WRAPPER_LIB}) target_link_libraries(transfer_engine PRIVATE ${ETCD_WRAPPER_LIB}) endif() endif() @@ -97,8 +95,8 @@ if(USE_HIP) target_include_directories(transfer_engine PRIVATE ${HIP_INCLUDE_DIRS}) target_link_libraries(transfer_engine PUBLIC hip::host rt) - # Optional dmabuf MR registration path (requires hsa-runtime64 and a - # kernel with CONFIG_PCI_P2PDMA + CONFIG_DMABUF_MOVE_NOTIFY). + # Optional dmabuf MR registration path (requires hsa-runtime64 and a kernel + # with CONFIG_PCI_P2PDMA + CONFIG_DMABUF_MOVE_NOTIFY). option(USE_HIP_DMABUF "Enable HIP dmabuf RDMA MR registration" ON) if(USE_HIP_DMABUF) find_package(hsa-runtime64 CONFIG) @@ -107,7 +105,8 @@ if(USE_HIP) target_link_libraries(transfer_engine PUBLIC hsa-runtime64::hsa-runtime64) message(STATUS "HIP dmabuf MR registration enabled (hsa-runtime64 found)") else() - message(STATUS "HIP dmabuf MR registration disabled (hsa-runtime64 not found)") + message( + STATUS "HIP dmabuf MR registration disabled (hsa-runtime64 not found)") endif() else() message(STATUS "HIP dmabuf MR registration disabled (USE_HIP_DMABUF=OFF)") diff --git a/mooncake-transfer-engine/src/transfer_metadata_plugin.cpp b/mooncake-transfer-engine/src/transfer_metadata_plugin.cpp index 4df028d239..0ce00c44e1 100644 --- a/mooncake-transfer-engine/src/transfer_metadata_plugin.cpp +++ b/mooncake-transfer-engine/src/transfer_metadata_plugin.cpp @@ -40,7 +40,7 @@ #ifdef USE_ETCD_LEGACY #include #else -#include +#include #endif #endif // USE_ETCD diff --git a/mooncake-transfer-engine/tent/include/tent/metastore/etcd.h b/mooncake-transfer-engine/tent/include/tent/metastore/etcd.h index f3c52b6a59..6f5b09a655 100644 --- a/mooncake-transfer-engine/tent/include/tent/metastore/etcd.h +++ b/mooncake-transfer-engine/tent/include/tent/metastore/etcd.h @@ -16,7 +16,7 @@ #define TENT_ETCD_H #include -#include +#include #include "tent/runtime/metastore.h" @@ -44,4 +44,4 @@ class EtcdMetaStore : public MetaStore { } // namespace tent } // namespace mooncake -#endif // TENT_ETCD_H \ No newline at end of file +#endif // TENT_ETCD_H diff --git a/mooncake-transfer-engine/tent/src/metastore/CMakeLists.txt b/mooncake-transfer-engine/tent/src/metastore/CMakeLists.txt index 9755a01a81..384a669d73 100644 --- a/mooncake-transfer-engine/tent/src/metastore/CMakeLists.txt +++ b/mooncake-transfer-engine/tent/src/metastore/CMakeLists.txt @@ -2,37 +2,38 @@ cmake_minimum_required(VERSION 3.12) project(tent_metastore LANGUAGES CXX) function(add_metastore_module name) - set(options) - set(oneValueArgs) - set(multiValueArgs DEPS) - cmake_parse_arguments(RES "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + set(options) + set(oneValueArgs) + set(multiValueArgs DEPS) + cmake_parse_arguments(RES "${options}" "${oneValueArgs}" "${multiValueArgs}" + ${ARGN}) - set(SRCS ${name}.cpp) + set(SRCS ${name}.cpp) - add_library(metastore_${name} STATIC ${SRCS}) - if(RES_DEPS) - target_link_libraries(metastore_${name} PUBLIC ${RES_DEPS} tent_common) - endif() + add_library(metastore_${name} STATIC ${SRCS}) + if(RES_DEPS) + target_link_libraries(metastore_${name} PUBLIC ${RES_DEPS} tent_common) + endif() endfunction() -if (USE_ETCD) - message(STATUS "Etcd metastore support: Enabled") - add_metastore_module(etcd DEPS ${CMAKE_BINARY_DIR}/mooncake-common/etcd/libetcd_wrapper.so) - add_dependencies(metastore_etcd build_etcd_wrapper) +if(USE_ETCD) + message(STATUS "Etcd metastore support: Enabled") + add_metastore_module(etcd DEPS ${HA_WRAPPER_LIB}) + add_dependencies(metastore_etcd build_ha_wrapper) endif() find_package(CURL) if(CURL_FOUND) - message(STATUS "Http metastore support: Enabled") - add_metastore_module(http DEPS CURL::libcurl) + message(STATUS "Http metastore support: Enabled") + add_metastore_module(http DEPS CURL::libcurl) else() - message(STATUS "Http metastore support: Disabled") + message(STATUS "Http metastore support: Disabled") endif() find_library(HIREDIS_LIB hiredis) if(HIREDIS_LIB) - message(STATUS "Redis metastore support: Enabled") - add_metastore_module(redis DEPS ${HIREDIS_LIB}) + message(STATUS "Redis metastore support: Enabled") + add_metastore_module(redis DEPS ${HIREDIS_LIB}) else() - message(STATUS "Redis metastore support: Disabled") -endif() \ No newline at end of file + message(STATUS "Redis metastore support: Disabled") +endif() diff --git a/scripts/ascend/dependencies_openeuler.sh b/scripts/ascend/dependencies_openeuler.sh index 0d6b83f957..f2ef05758a 100644 --- a/scripts/ascend/dependencies_openeuler.sh +++ b/scripts/ascend/dependencies_openeuler.sh @@ -74,7 +74,7 @@ echo "This script installs (same scope as root dependencies.sh):" echo " - openEuler/RHEL system packages (dnf/yum)" echo " - Git submodules (pybind11, yalantinglibs, ...)" echo " - yalantinglibs (from extern/ submodule)" -echo " - Go ${GOVER} (for USE_ETCD / libetcd_wrapper.so)" +echo " - Go ${GOVER} (for USE_ETCD / libmooncake_ha_wrapper.so)" echo "Run scripts/ascend/dependencies_ascend_installation.sh afterward for Ascend extras." echo @@ -307,4 +307,4 @@ echo "Next steps (example NPU release build):" echo " source /usr/local/Ascend/cann-9.0.0/set_env.sh" echo " mkdir -p build && cd build" echo " cmake .. -DUSE_ASCEND_DIRECT=ON -DUSE_CUDA=OFF -DWITH_EP=OFF -DUSE_HTTP=ON -DUSE_ETCD=ON -DSTORE_USE_ETCD=ON" -echo " cmake --build . -j\$(nproc) && cmake --install ." \ No newline at end of file +echo " cmake --build . -j\$(nproc) && cmake --install ." diff --git a/scripts/build_wheel.sh b/scripts/build_wheel.sh index e6bc964f16..06a2d0710d 100755 --- a/scripts/build_wheel.sh +++ b/scripts/build_wheel.sh @@ -17,7 +17,7 @@ BUILD_DIR_ABS="$(pwd)/${BUILD_DIR}" echo "Building wheel for Python ${PYTHON_VERSION} with output directory ${OUTPUT_DIR}" # Ensure LD_LIBRARY_PATH includes /usr/local/lib -export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${BUILD_DIR_ABS}/mooncake-common:${BUILD_DIR_ABS}/mooncake-common/etcd:${BUILD_DIR_ABS}/mooncake-common/k8s-lease:/usr/local/lib +export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:${BUILD_DIR_ABS}/mooncake-common:${BUILD_DIR_ABS}/mooncake-common/ha-wrapper:/usr/local/lib echo "Cleaning wheel-build directory" rm -rf mooncake-wheel/mooncake_transfer_engine* @@ -62,16 +62,10 @@ if [ -f ${BUILD_DIR}/mooncake-common/src/libmooncake_common.so ]; then cp ${BUILD_DIR}/mooncake-common/src/libmooncake_common.so mooncake-wheel/mooncake/libmooncake_common.so fi -# Copy libtransfer_engine.so to mooncake directory (only when USE_ETCD is set) -if [ -f ${BUILD_DIR}/mooncake-common/etcd/libetcd_wrapper.so ]; then - echo "Copying libetcd_wrapper.so..." - cp ${BUILD_DIR}/mooncake-common/etcd/libetcd_wrapper.so mooncake-wheel/mooncake/libetcd_wrapper.so -fi - -# Copy libk8s_lease_wrapper.so to mooncake directory (only when STORE_USE_K8S_LEASE is set) -if [ -f ${BUILD_DIR}/mooncake-common/k8s-lease/libk8s_lease_wrapper.so ]; then - echo "Copying libk8s_lease_wrapper.so..." - cp ${BUILD_DIR}/mooncake-common/k8s-lease/libk8s_lease_wrapper.so mooncake-wheel/mooncake/libk8s_lease_wrapper.so +# Copy unified Go HA wrapper (when Go-based etcd or k8s support is enabled) +if [ -f ${BUILD_DIR}/mooncake-common/ha-wrapper/libmooncake_ha_wrapper.so ]; then + echo "Copying libmooncake_ha_wrapper.so..." + cp ${BUILD_DIR}/mooncake-common/ha-wrapper/libmooncake_ha_wrapper.so mooncake-wheel/mooncake/libmooncake_ha_wrapper.so fi # Copy libtransfer_engine.so to mooncake directory (only when BUILD_SHARED_LIBS is set)