milvus-io
diff --git a/‎configs/milvus.yaml‎
Lines changed: 1 addition & 0 deletions b/‎configs/milvus.yaml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎internal/querycoordv2/assign/OWNERS‎
Lines changed: 5 additions & 0 deletions b/‎internal/querycoordv2/assign/OWNERS‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎internal/querycoordv2/assign/assign_policy.go‎
Lines changed: 79 additions & 0 deletions b/‎internal/querycoordv2/assign/assign_policy.go‎
Lines changed: 79 additions & 0 deletions
diff --git a/‎internal/querycoordv2/assign/assign_policy_components.go‎
Lines changed: 164 additions & 0 deletions b/‎internal/querycoordv2/assign/assign_policy_components.go‎
Lines changed: 164 additions & 0 deletions
diff --git a/‎internal/querycoordv2/assign/assign_policy_factory.go‎
Lines changed: 134 additions & 0 deletions b/‎internal/querycoordv2/assign/assign_policy_factory.go‎
Lines changed: 134 additions & 0 deletions
@@ -432,6 +432,7 @@ queryCoord:
   checkNodeSessionInterval: 60 # the interval(in seconds) of check querynode cluster session
   gracefulStopTimeout: 5 # seconds. force stop node without graceful stop
   enableStoppingBalance: true # whether enable stopping balance
+  stoppingBalanceAssignPolicy: ScoreBased # assign policy for stopping balance, options: RoundRobin, RowCount, ScoreBased
   channelExclusiveNodeFactor: 4 # the least node number for enable channel's exclusive mode
   collectionObserverInterval: 200 # the interval of collection observer
   checkExecutedFlagInterval: 100 # the interval of check executed flag to force to pull dist
 
@@ -0,0 +1,5 @@
+reviewers:
+  - weiliu1031
+
+approvers:
+  - maintainers
@@ -0,0 +1,79 @@
+// Licensed to the LF AI & Data foundation under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package assign
+
+import (
+	"context"
+	"fmt"
+
+	"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
+	"github.com/milvus-io/milvus/internal/querycoordv2/meta"
+)
+
+// SegmentAssignPlan represents a plan to assign a segment to a node
+type SegmentAssignPlan struct {
+	Segment      *meta.Segment
+	Replica      *meta.Replica
+	From         int64 // -1 if empty
+	To           int64
+	FromScore    int64
+	ToScore      int64
+	SegmentScore int64
+	LoadPriority commonpb.LoadPriority
+}
+
+func (segPlan *SegmentAssignPlan) String() string {
+	return fmt.Sprintf("SegmentPlan:[collectionID: %d, replicaID: %d, segmentID: %d, from: %d, to: %d, fromScore: %d, toScore: %d, segmentScore: %d]\n",
+		segPlan.Segment.CollectionID, segPlan.Replica.GetID(), segPlan.Segment.ID, segPlan.From, segPlan.To, segPlan.FromScore, segPlan.ToScore, segPlan.SegmentScore)
+}
+
+// ChannelAssignPlan represents a plan to assign a channel to a node
+type ChannelAssignPlan struct {
+	Channel      *meta.DmChannel
+	Replica      *meta.Replica
+	From         int64
+	To           int64
+	FromScore    int64
+	ToScore      int64
+	ChannelScore int64
+}
+
+func (chanPlan *ChannelAssignPlan) String() string {
+	return fmt.Sprintf("ChannelPlan:[collectionID: %d, channel: %s, replicaID: %d, from: %d, to: %d]\n",
+		chanPlan.Channel.CollectionID, chanPlan.Channel.ChannelName, chanPlan.Replica.GetID(), chanPlan.From, chanPlan.To)
+}
+
+// AssignPolicy defines the unified policy for assigning both segments and channels to nodes
+// This interface abstracts the common logic of resource assignment across different balancers
+type AssignPolicy interface {
+	// AssignSegment assigns segments to nodes based on the policy
+	// Returns a list of segment assignment plans
+	AssignSegment(ctx context.Context, collectionID int64, segments []*meta.Segment, nodes []int64, forceAssign bool) []SegmentAssignPlan
+
+	// AssignChannel assigns channels to nodes based on the policy
+	// Returns a list of channel assignment plans
+	AssignChannel(ctx context.Context, collectionID int64, channels []*meta.DmChannel, nodes []int64, forceAssign bool) []ChannelAssignPlan
+}
+
+// AssignPolicyConfig contains common configuration for assignment policies
+type AssignPolicyConfig struct {
+	// BatchSize limits the number of resources to assign in one batch
+	BatchSize int
+
+	// EnableBenefitCheck enables benefit evaluation before assignment
+	EnableBenefitCheck bool
+}
@@ -0,0 +1,164 @@
+// Licensed to the LF AI & Data foundation under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package assign
+
+import (
+	"context"
+	"math"
+
+	"github.com/blang/semver/v4"
+	"github.com/samber/lo"
+
+	"github.com/milvus-io/milvus/internal/querycoordv2/meta"
+	"github.com/milvus-io/milvus/internal/querycoordv2/params"
+	"github.com/milvus-io/milvus/internal/querycoordv2/session"
+)
+
+// nodeFilter filters nodes based on various criteria
+type nodeFilter interface {
+	// FilterNodes filters the input nodes and returns the valid ones
+	FilterNodes(ctx context.Context, nodes []int64, forceAssign bool) []int64
+}
+
+// segmentScoreCalculator calculates the score of a segment
+type segmentScoreCalculator interface {
+	// CalculateScore returns the score of a segment (e.g., row count)
+	CalculateScore(segment *meta.Segment) int64
+}
+
+// channelScoreCalculator calculates the score of a channel
+type channelScoreCalculator interface {
+	// CalculateScore returns the score of a channel
+	CalculateScore(channel *meta.DmChannel) int64
+}
+
+// nodeScoreCalculator calculates the load score of a node
+type nodeScoreCalculator interface {
+	// CalculateScore returns the current load score of a node for the given collection
+	CalculateScore(ctx context.Context, nodeID int64, collectionID int64) float64
+}
+
+// assignmentStrategy defines how to select the best node for resource assignment
+type assignmentStrategy interface {
+	// SelectNode selects the best node from candidates
+	// Returns the selected node ID and updated node info
+	SelectNode(candidates []*NodeItem, resourceScore int64) (selectedNode int64, updatedNodeInfo []*NodeItem)
+}
+
+// benefitEvaluator evaluates whether an assignment provides enough benefit
+type benefitEvaluator interface {
+	// HasEnoughBenefit checks if the assignment from source to target is beneficial enough
+	// sourceScore: current score of source node
+	// targetScore: current score of target node
+	// resourceScore: score of the resource to be assigned
+	HasEnoughBenefit(sourceScore float64, targetScore float64, resourceScore int64) bool
+}
+
+// ============================================================================
+// Common Component Implementations
+// These are reusable implementations that can be shared across different policies
+// ============================================================================
+
+// commonSegmentNodeFilter is a reusable node filter for segment assignment
+// It filters out nodes that are not in normal state
+type commonSegmentNodeFilter struct {
+	nodeManager *session.NodeManager
+}
+
+// newCommonSegmentNodeFilter creates a new common segment node filter
+func newCommonSegmentNodeFilter(nodeManager *session.NodeManager) nodeFilter {
+	return &commonSegmentNodeFilter{nodeManager: nodeManager}
+}
+
+// FilterNodes filters the input nodes and returns nodes in normal state
+func (f *commonSegmentNodeFilter) FilterNodes(ctx context.Context, nodes []int64, forceAssign bool) []int64 {
+	if forceAssign {
+		return nodes
+	}
+	return lo.Filter(nodes, func(node int64, _ int) bool {
+		info := f.nodeManager.Get(node)
+		return info != nil && info.GetState() == session.NodeStateNormal
+	})
+}
+
+// commonChannelNodeFilter is a reusable node filter for channel assignment
+// It filters out SQN nodes (if enabled), nodes with version < 2.4, and non-normal nodes
+type commonChannelNodeFilter struct {
+	nodeManager *session.NodeManager
+}
+
+// newCommonChannelNodeFilter creates a new common channel node filter
+func newCommonChannelNodeFilter(nodeManager *session.NodeManager) nodeFilter {
+	return &commonChannelNodeFilter{nodeManager: nodeManager}
+}
+
+// FilterNodes filters nodes for channel assignment considering SQN, version, and state
+func (f *commonChannelNodeFilter) FilterNodes(ctx context.Context, nodes []int64, forceAssign bool) []int64 {
+	// Filter SQN if streaming service is enabled
+	nodes = filterSQNIfStreamingServiceEnabled(nodes)
+
+	if forceAssign {
+		return nodes
+	}
+
+	// Version range filter: require version > 2.3.x
+	versionRangeFilter := semver.MustParseRange(">2.3.x")
+	return lo.Filter(nodes, func(node int64, _ int) bool {
+		info := f.nodeManager.Get(node)
+		// Balance channel to qn with version < 2.4 is not allowed since l0 segment supported
+		// If watch channel on qn with version < 2.4, it may cause delete data loss
+		return info != nil && info.GetState() == session.NodeStateNormal && versionRangeFilter(info.Version())
+	})
+}
+
+// commonScoreBasedBenefitEvaluator is a reusable benefit evaluator
+// for score-based policies
+type commonScoreBasedBenefitEvaluator struct{}
+
+// HasEnoughBenefit checks if the assignment provides enough benefit
+// It considers:
+// 1. Score unbalance toleration factor
+// 2. Reverse unbalance toleration factor (if assignment would reverse the balance)
+func (e *commonScoreBasedBenefitEvaluator) HasEnoughBenefit(sourceScore float64, targetScore float64, resourceScore int64) bool {
+	// Check if the score diff between source and target is below tolerance
+	oldPriorityDiff := math.Abs(sourceScore - targetScore)
+	if oldPriorityDiff < targetScore*params.Params.QueryCoordCfg.ScoreUnbalanceTolerationFactor.GetAsFloat() {
+		return false
+	}
+
+	// Check if assignment would reverse the balance
+	newSourceScore := sourceScore - float64(resourceScore)
+	newTargetScore := targetScore + float64(resourceScore)
+	if newTargetScore > newSourceScore {
+		// If score diff is reversed, check if the new diff is acceptable
+		newScoreDiff := math.Abs(newSourceScore - newTargetScore)
+		if newScoreDiff*params.Params.QueryCoordCfg.ReverseUnbalanceTolerationFactor.GetAsFloat() >= oldPriorityDiff {
+			return false
+		}
+	}
+
+	return true
+}
+
+// HasEnoughBenefitForNodes is a helper method for NodeItem-based evaluation
+func (e *commonScoreBasedBenefitEvaluator) HasEnoughBenefitForNodes(sourceNode *NodeItem, targetNode *NodeItem, scoreChanges float64) bool {
+	return e.HasEnoughBenefit(
+		float64(sourceNode.getPriority()),
+		float64(targetNode.getPriority()),
+		int64(scoreChanges),
+	)
+}
@@ -0,0 +1,134 @@
+// Licensed to the LF AI & Data foundation under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package assign
+
+import (
+	"sync"
+
+	"go.uber.org/zap"
+
+	"github.com/milvus-io/milvus/internal/querycoordv2/meta"
+	"github.com/milvus-io/milvus/internal/querycoordv2/session"
+	"github.com/milvus-io/milvus/internal/querycoordv2/task"
+	"github.com/milvus-io/milvus/pkg/v2/log"
+)
+
+const (
+	// PolicyTypeRoundRobin uses simple round-robin assignment
+	PolicyTypeRoundRobin = "round_robin"
+	// PolicyTypeRowCount uses row count/channel count-based priority queue assignment
+	PolicyTypeRowCount = "row_count"
+	// PolicyTypeScoreBased uses comprehensive score-based assignment with benefit evaluation
+	PolicyTypeScoreBased = "score_based"
+)
+
+// AssignPolicyFactory is responsible for creating and caching assign policy instances.
+// It supports dynamic policy switching based on configuration changes.
+type AssignPolicyFactory struct {
+	policyMap  map[string]AssignPolicy
+	policyLock sync.RWMutex
+
+	// Dependencies for creating policies
+	scheduler   task.Scheduler
+	nodeManager *session.NodeManager
+	dist        *meta.DistributionManager
+	meta        *meta.Meta
+	targetMgr   meta.TargetManagerInterface
+}
+
+// Global factory instance
+var (
+	globalPolicyFactory *AssignPolicyFactory
+	policyFactoryOnce   sync.Once
+)
+
+// InitGlobalAssignPolicyFactory initializes the global assign policy factory singleton.
+// This should be called once during server startup.
+func InitGlobalAssignPolicyFactory(
+	scheduler task.Scheduler,
+	nodeManager *session.NodeManager,
+	dist *meta.DistributionManager,
+	meta *meta.Meta,
+	targetMgr meta.TargetManagerInterface,
+) {
+	policyFactoryOnce.Do(func() {
+		globalPolicyFactory = NewAssignPolicyFactory(scheduler, nodeManager, dist, meta, targetMgr)
+		log.Info("Global assign policy factory initialized")
+	})
+}
+
+// GetGlobalAssignPolicyFactory returns the global assign policy factory instance.
+// Returns nil if InitGlobalAssignPolicyFactory has not been called.
+func GetGlobalAssignPolicyFactory() *AssignPolicyFactory {
+	return globalPolicyFactory
+}
+
+// ResetGlobalAssignPolicyFactoryForTest resets the global factory for testing purposes.
+// This should only be used in tests.
+func ResetGlobalAssignPolicyFactoryForTest() {
+	globalPolicyFactory = nil
+	policyFactoryOnce = sync.Once{}
+}
+
+// NewAssignPolicyFactory creates a new AssignPolicyFactory instance.
+func NewAssignPolicyFactory(
+	scheduler task.Scheduler,
+	nodeManager *session.NodeManager,
+	dist *meta.DistributionManager,
+	meta *meta.Meta,
+	targetMgr meta.TargetManagerInterface,
+) *AssignPolicyFactory {
+	return &AssignPolicyFactory{
+		policyMap:   make(map[string]AssignPolicy),
+		scheduler:   scheduler,
+		nodeManager: nodeManager,
+		dist:        dist,
+		meta:        meta,
+		targetMgr:   targetMgr,
+	}
+}
+
+// GetPolicy returns an assign policy instance based on the specified policy type.
+// It caches policy instances and reuses them when the policy type hasn't changed.
+func (f *AssignPolicyFactory) GetPolicy(policyType string) AssignPolicy {
+	f.policyLock.Lock()
+	defer f.policyLock.Unlock()
+
+	policy, ok := f.policyMap[policyType]
+	if ok {
+		return policy
+	}
+
+	log.Info("Creating new assign policy", zap.String("type", policyType))
+
+	switch policyType {
+	case PolicyTypeRoundRobin:
+		policy = newRoundRobinAssignPolicy(f.nodeManager, f.scheduler, f.targetMgr)
+	case PolicyTypeRowCount:
+		policy = newRowCountBasedAssignPolicy(f.nodeManager, f.scheduler, f.dist)
+	case PolicyTypeScoreBased:
+		policy = newScoreBasedAssignPolicy(f.nodeManager, f.scheduler, f.dist, f.meta)
+	default:
+		log.Info("Unknown assign policy type, using default",
+			zap.String("requested", policyType),
+			zap.String("default", PolicyTypeScoreBased))
+		policy = newScoreBasedAssignPolicy(f.nodeManager, f.scheduler, f.dist, f.meta)
+	}
+
+	f.policyMap[policyType] = policy
+	return policy
+}