From 09c0b6662f334802dcd0f47e20c752c8c57738d3 Mon Sep 17 00:00:00 2001 From: Ji Bin Date: Sat, 4 Nov 2023 22:28:36 +0800 Subject: [PATCH] Promote version v2.3.2 Signed-off-by: Ji Bin --- .github/workflows/main.yml | 8 ++- milvus_binary/env.sh | 2 +- milvus_binary/patches/knowhere-f4c1757.patch | 34 ++++++++++ .../milvus-v2.3.2/0001-fix-for-gettid.patch | 25 ++++++++ ...002-link-with-CoreServices-for-macos.patch | 28 ++++++++ src/milvus/__init__.py | 2 +- src/milvus/data/config.yaml.template | 64 ++++++++++++++++--- 7 files changed, 151 insertions(+), 12 deletions(-) create mode 100644 milvus_binary/patches/knowhere-f4c1757.patch create mode 100644 milvus_binary/patches/milvus-v2.3.2/0001-fix-for-gettid.patch create mode 100644 milvus_binary/patches/milvus-v2.3.2/0002-link-with-CoreServices-for-macos.patch diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 792b789..9db412c 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -45,7 +45,7 @@ jobs: restore-keys: linux-ccache- - name: Install build requires run: | - yum -y install rh-python38 patch + yum -y install rh-python38 rh-git227-git-all patch - name: Build Wheel run: | # devtoolset 11 @@ -54,10 +54,16 @@ jobs: # python 3.8 export PATH=/opt/rh/rh-python38/root/usr/local/bin:/opt/rh/rh-python38/root/usr/bin${PATH:+:${PATH}} export LD_LIBRARY_PATH=/opt/rh/rh-python38/root/usr/lib64${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}} + # git + export PATH=/opt/rh/rh-git227/root/usr/bin${PATH:+:${PATH}} + export LD_LIBRARY_PATH=/opt/rh/httpd24/root/usr/lib64${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}} python3 -m pip install --user -U pip python3 -m pip install --user build wheel 'setuptools>64.0' + # install cmake + python3 -m pip install cmake==3.27.7 # install patchelf yum -y install patchelf + yum -y install perl-IPC-Cmd python3 -m build -w -n - uses: actions/upload-artifact@v3 with: diff --git a/milvus_binary/env.sh b/milvus_binary/env.sh index 705dd58..b1884ac 100644 --- a/milvus_binary/env.sh +++ b/milvus_binary/env.sh @@ -1,5 +1,5 @@ MILVUS_REPO="https://github.com/milvus-io/milvus.git" -MILVUS_VERSION="v2.3.1" +MILVUS_VERSION="v2.3.2" BUILD_PROXY= BUILD_FORCE=NO diff --git a/milvus_binary/patches/knowhere-f4c1757.patch b/milvus_binary/patches/knowhere-f4c1757.patch new file mode 100644 index 0000000..bef5484 --- /dev/null +++ b/milvus_binary/patches/knowhere-f4c1757.patch @@ -0,0 +1,34 @@ +From b731a16cb656bfe964a6253e6d7e3a28c30045ac Mon Sep 17 00:00:00 2001 +From: Ji Bin +Date: Sun, 5 Nov 2023 18:27:32 +0800 +Subject: [PATCH] fix for gettid + +Signed-off-by: Ji Bin +--- + include/knowhere/comp/thread_pool.h | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/include/knowhere/comp/thread_pool.h b/include/knowhere/comp/thread_pool.h +index a00920d..60546f5 100644 +--- a/include/knowhere/comp/thread_pool.h ++++ b/include/knowhere/comp/thread_pool.h +@@ -13,6 +13,7 @@ + + #include + #include ++#include + + #include + #include +@@ -35,7 +36,7 @@ class ThreadPool { + std::thread + newThread(folly::Func&& func) override { + return folly::NamedThreadFactory::newThread([&, func = std::move(func)]() mutable { +- if (setpriority(PRIO_PROCESS, gettid(), 19) != 0) { ++ if (setpriority(PRIO_PROCESS, syscall(SYS_gettid), 19) != 0) { + LOG_KNOWHERE_ERROR_ << "Failed to set priority of knowhere thread. Error is: " + << std::strerror(errno); + } else { +-- +2.42.1 + diff --git a/milvus_binary/patches/milvus-v2.3.2/0001-fix-for-gettid.patch b/milvus_binary/patches/milvus-v2.3.2/0001-fix-for-gettid.patch new file mode 100644 index 0000000..1d12634 --- /dev/null +++ b/milvus_binary/patches/milvus-v2.3.2/0001-fix-for-gettid.patch @@ -0,0 +1,25 @@ +From 4e4d7cd34d61fd671560eefde1674f6ec76a59ff Mon Sep 17 00:00:00 2001 +From: Ji Bin +Date: Sun, 5 Nov 2023 18:49:29 +0800 +Subject: [PATCH 1/2] fix for gettid + +Signed-off-by: Ji Bin +--- + internal/core/thirdparty/knowhere/CMakeLists.txt | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/internal/core/thirdparty/knowhere/CMakeLists.txt b/internal/core/thirdparty/knowhere/CMakeLists.txt +index cc0729a84..aad2b3de7 100644 +--- a/internal/core/thirdparty/knowhere/CMakeLists.txt ++++ b/internal/core/thirdparty/knowhere/CMakeLists.txt +@@ -41,6 +41,7 @@ FetchContent_Declare( + GIT_TAG ${KNOWHERE_VERSION} + SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/knowhere-src + BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/knowhere-build ++ PATCH_COMMAND patch -p1 < ${CMAKE_SOURCE_DIR}/../../../patches/knowhere-f4c1757.patch + DOWNLOAD_DIR ${THIRDPARTY_DOWNLOAD_PATH} ) + + FetchContent_GetProperties( knowhere ) +-- +2.42.1 + diff --git a/milvus_binary/patches/milvus-v2.3.2/0002-link-with-CoreServices-for-macos.patch b/milvus_binary/patches/milvus-v2.3.2/0002-link-with-CoreServices-for-macos.patch new file mode 100644 index 0000000..e53056c --- /dev/null +++ b/milvus_binary/patches/milvus-v2.3.2/0002-link-with-CoreServices-for-macos.patch @@ -0,0 +1,28 @@ +From 9ec041db457b54188d61c47740238eaab338d952 Mon Sep 17 00:00:00 2001 +From: Ji Bin +Date: Sun, 5 Nov 2023 22:22:51 +0800 +Subject: [PATCH 2/2] link with CoreServices for macos + +Signed-off-by: Ji Bin +--- + internal/core/src/storage/azure-blob-storage/CMakeLists.txt | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/internal/core/src/storage/azure-blob-storage/CMakeLists.txt b/internal/core/src/storage/azure-blob-storage/CMakeLists.txt +index 91c2cc347..4441a2fae 100644 +--- a/internal/core/src/storage/azure-blob-storage/CMakeLists.txt ++++ b/internal/core/src/storage/azure-blob-storage/CMakeLists.txt +@@ -25,5 +25,10 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-parameter -Wno-return-type - + add_library(blob-chunk-manager SHARED AzureBlobChunkManager.cpp) + target_link_libraries(blob-chunk-manager PRIVATE Azure::azure-identity Azure::azure-storage-blobs) + ++if (APPLE) ++ find_library(CORESERVICES_LIBRARY CoreServices) ++ target_link_libraries(blob-chunk-manager PRIVATE ${CORESERVICES_LIBRARY}) ++endif (APPLE) ++ + install(TARGETS blob-chunk-manager DESTINATION "${CMAKE_INSTALL_LIBDIR}") + +-- +2.42.1 + diff --git a/src/milvus/__init__.py b/src/milvus/__init__.py index 6fdde34..aafd98b 100644 --- a/src/milvus/__init__.py +++ b/src/milvus/__init__.py @@ -21,7 +21,7 @@ import json import hashlib -__version__ = '2.3.1' +__version__ = '2.3.2' LOGGERS = {} diff --git a/src/milvus/data/config.yaml.template b/src/milvus/data/config.yaml.template index 65c21e1..add8e56 100644 --- a/src/milvus/data/config.yaml.template +++ b/src/milvus/data/config.yaml.template @@ -45,9 +45,19 @@ etcd: metastore: # Default value: etcd - # Valid values: [etcd, mysql] + # Valid values: [etcd, tikv] type: etcd +# Related configuration of tikv, used to store Milvus metadata. +# Notice that when TiKV is enabled for metastore, you still need to have etcd for service discovery. +# TiKV is a good option when the metadata size requires better horizontal scalability. +tikv: + # Note that the default pd port of tikv is 2379, which conflicts with etcd. + endpoints: 127.0.0.1:2389 + rootPath: by-dev # The root path where data is stored + metaSubPath: meta # metaRootPath = rootPath + '/' + metaSubPath + kvSubPath: kv # kvRootPath = rootPath + '/' + kvSubPath + localStorage: path: {{ local_storage_dir }} # please adjust in embedded Milvus: /tmp/milvus/data/ @@ -84,11 +94,13 @@ minio: region: "" # Cloud whether use virtual host bucket mode useVirtualHost: false + # timeout for request time in milliseconds + requestTimeoutMs: 3000 # Milvus supports four MQ: rocksmq(based on RockDB), natsmq(embedded nats-server), Pulsar and Kafka. # You can change your mq by setting mq.type field. # If you don't set mq.type field as default, there is a note about enabling priority if we config multiple mq in this file. -# 1. standalone(local) mode: rocksmq(default) > natsmq > Pulsar > Kafka +# 1. standalone(local) mode: rocksmq(default) > Pulsar > Kafka # 2. cluster mode: Pulsar(default) > Kafka (rocksmq and natsmq is unsupported in cluster mode) mq: # Default value: "default" @@ -99,11 +111,12 @@ mq: pulsar: address: localhost # Address of pulsar port: 6650 # Port of Pulsar - webport: 80 # Web port of pulsar, if you connect direcly without proxy, should use 8080 + webport: 80 # Web port of pulsar, if you connect directly without proxy, should use 8080 maxMessageSize: 5242880 # 5 * 1024 * 1024 Bytes, Maximum size of each message in pulsar. tenant: public namespace: default requestTimeout: 60 # pulsar client global request timeout in seconds + enableClientMetrics: false # Whether to register pulsar client metrics into milvus metrics path. # If you want to enable kafka, needs to comment the pulsar configs # kafka: @@ -112,6 +125,7 @@ pulsar: # saslPassword: # saslMechanisms: PLAIN # securityProtocol: SASL_SSL +# readTimeout: 10 # read message timeout in seconds rocksmq: # The path where the message is stored in rocksmq @@ -157,6 +171,9 @@ rootCoord: importTaskExpiration: 900 # (in seconds) Duration after which an import task will expire (be killed). Default 900 seconds (15 minutes). importTaskRetention: 86400 # (in seconds) Milvus will keep the record of import tasks for at least `importTaskRetention` seconds. Default 86400, seconds (24 hours). enableActiveStandby: false + # can specify ip for example + # ip: 127.0.0.1 + ip: # if not specify address, will use the first unicastable address as local ip port: {{ root_coord_port(int): 53100 }} grpc: serverMaxSendSize: 536870912 @@ -183,11 +200,16 @@ proxy: ginLogging: false maxTaskNum: 1024 # max task number of proxy task queue accessLog: - localPath: {{ system_log_path }} + enable: false filename: milvus_access_log.log # Log filename, leave empty to disable file log. + localPath: {{ system_log_path }} + # maxSize: 64 # max log file size of singal log file to trigger rotate. http: enabled: true # Whether to enable the http server debug_mode: false # Whether to enable http server debug mode + # can specify ip for example + # ip: 127.0.0.1 + ip: # if not specify address, will use the first unicastable address as local ip port: {{ proxy_port(int): 19530 }} internalPort: {{ proxy_internal_port(int): 19529 }} grpc: @@ -214,6 +236,9 @@ queryCoord: heartbeatAvailableInterval: 10000 # 10s, Only QueryNodes which fetched heartbeats within the duration are available loadTimeoutSeconds: 600 checkHandoffInterval: 5000 + # can specify ip for example + # ip: 127.0.0.1 + ip: # if not specify address, will use the first unicastable address as local ip port: {{ query_coord_port(int): 19531 }} grpc: serverMaxSendSize: 536870912 @@ -248,8 +273,9 @@ queryNode: enableDisk: false # enable querynode load disk index, and search on disk index maxDiskUsagePercentage: 95 cache: - enabled: true - memoryLimit: 2147483648 # 2 GB, 2 * 1024 *1024 *1024 + enabled: true # deprecated, TODO: remove it + memoryLimit: 2147483648 # 2 GB, 2 * 1024 *1024 *1024 # deprecated, TODO: remove it + readAheadPolicy: willneed # The read ahead policy of chunk cache, options: `normal, random, sequential, willneed, dontneed` grouping: enabled: true maxNQ: 1000 @@ -281,6 +307,9 @@ queryNode: enableCrossUserGrouping: false # false by default Enable Cross user grouping when using user-task-polling policy. (close it if task of any user can not merge others). maxPendingTaskPerUser: 1024 # 50 by default, max pending task in scheduler per user. + # can specify ip for example + # ip: 127.0.0.1 + ip: # if not specify address, will use the first unicastable address as local ip port: {{ query_node_port(int): 21123 }} grpc: serverMaxSendSize: 536870912 @@ -302,6 +331,9 @@ indexNode: buildParallel: 1 enableDisk: true # enable index node build disk vector index maxDiskUsagePercentage: 95 + # can specify ip for example + # ip: 127.0.0.1 + ip: # if not specify address, will use the first unicastable address as local ip port: {{ index_node_port(int): 21121 }} grpc: serverMaxSendSize: 536870912 @@ -316,7 +348,7 @@ dataCoord: balanceInterval: 360 #The interval for the channelBalancer on datacoord to check balance status segment: maxSize: 512 # Maximum size of a segment in MB - diskSegmentMaxSize: 2048 # Maximun size of a segment in MB for collection which has Disk index + diskSegmentMaxSize: 2048 # Maximum size of a segment in MB for collection which has Disk index sealProportion: 0.23 # The time of the assignment expiration in ms # Warning! this parameter is an expert variable and closely related to data integrity. Without specific @@ -345,7 +377,7 @@ dataCoord: compaction: enableAutoCompaction: true rpcTimeout: 10 # compaction rpc request timeout in seconds - maxParallelTaskNum: 100 # max parallel compaction task number + maxParallelTaskNum: 10 # max parallel compaction task number indexBasedCompaction: true enableGarbageCollection: true @@ -354,6 +386,9 @@ dataCoord: missingTolerance: 3600 # file meta missing tolerance duration in seconds, 3600 dropTolerance: 10800 # file belongs to dropped entity tolerance duration in seconds. 10800 enableActiveStandby: false + # can specify ip for example + # ip: 127.0.0.1 + ip: # if not specify address, will use the first unicastable address as local ip port: {{ data_coord_port(int): 13333 }} grpc: serverMaxSendSize: 536870912 @@ -371,6 +406,9 @@ dataNode: insertBufSize: 16777216 # Max buffer size to flush for a single segment. deleteBufBytes: 67108864 # Max buffer size to flush del for a single channel syncPeriod: 600 # The period to sync segments if buffer is not empty. + # can specify ip for example + # ip: 127.0.0.1 + ip: # if not specify address, will use the first unicastable address as local ip port: {{ data_node_port(int): 21124 }} grpc: serverMaxSendSize: 536870912 @@ -384,6 +422,11 @@ dataNode: watermarkCluster: 0.5 # memory watermark for cluster, upon reaching this watermark, segments will be synced. timetick: byRPC: true + channel: + # specify the size of global work pool of all channels + # if this parameter <= 0, will set it as the maximum number of CPUs that can be executing + # suggest to set it bigger on large collection numbers to avoid blocking + workPoolSize: -1 # Configures the system log output. log: @@ -425,6 +468,7 @@ common: rootCoordTimeTick: rootcoord-timetick rootCoordStatistics: rootcoord-statistics rootCoordDml: rootcoord-dml + replicateMsg: replicate-msg rootCoordDelta: rootcoord-delta search: search searchResult: searchResult @@ -489,6 +533,7 @@ common: threshold: info: 500 # minimum milliseconds for printing durations in info level warn: 1000 # minimum milliseconds for printing durations in warn level + ttMsgEnabled: true # Whether the instance disable sending ts messages # QuotaConfig, configurations of Milvus quota and limits. # By default, we enable: @@ -624,7 +669,8 @@ trace: # Fractions >= 1 will always sample. Fractions < 0 are treated as zero. sampleFraction: 0 jaeger: - url: # when exporter is jaeger should set the jaeger's URL + url: # "http://127.0.0.1:14268/api/traces" + # when exporter is jaeger should set the jaeger's URL autoIndex: params: