diff --git a/gpuallocator/besteffort_policy.go b/gpuallocator/besteffort_policy.go index 4c0f0bc..95eb48b 100644 --- a/gpuallocator/besteffort_policy.go +++ b/gpuallocator/besteffort_policy.go @@ -82,11 +82,14 @@ func (p *bestEffortPolicy) Allocate(available []*Device, required []*Device, siz // Find the highest scoring GPU set in the highest scoring GPU partition. bestSet := filteredBestPartition[0] bestScore = calculateGPUSetScore(bestSet) + bestLeftScore := calculateGPUSetScore(getDiffSet(available, bestSet)) for i := 1; i < len(filteredBestPartition); i++ { score := calculateGPUSetScore(filteredBestPartition[i]) - if score > bestScore { + leftScore := calculateGPUSetScore(getDiffSet(available, filteredBestPartition[i])) + if score > bestScore || (score == bestScore && bestLeftScore < leftScore) { bestSet = filteredBestPartition[i] bestScore = score + bestLeftScore = leftScore } } @@ -114,6 +117,17 @@ func gpuSetContainsAll(gpuSet []*Device, gpuSubset []*Device) bool { return true } +// getDiffSet return devices that gpuSet - gpuSubset +func getDiffSet(gpuSet []*Device, gpuSubset []*Device) []*Device { + var rt []*Device + for _, gpu := range gpuSet { + if !gpuSetContains(gpuSubset, gpu) { + rt = append(rt, gpu) + } + } + return rt +} + // Check to see if 'gpuPartition' has at least one set containing all 'gpuSubset' devices and no padding. func gpuPartitionContainsSetWithAll(gpuPartition [][]*Device, gpuSubset []*Device) bool { for _, gpuSet := range gpuPartition { diff --git a/gpuallocator/besteffort_test.go b/gpuallocator/besteffort_test.go index 24699c6..dd90641 100644 --- a/gpuallocator/besteffort_test.go +++ b/gpuallocator/besteffort_test.go @@ -118,6 +118,14 @@ func TestBestEffortAllocate(t *testing.T) { 1, []int{}, }, + { + "Left Score Must Max", + New8xA10Node().Devices(), + []int{0, 1, 2, 3, 4, 5}, + []int{}, + 2, + []int{4, 5}, + }, } RunPolicyAllocTests(t, policy, tests) diff --git a/gpuallocator/common_test.go b/gpuallocator/common_test.go index 51c1377..ac23ac0 100644 --- a/gpuallocator/common_test.go +++ b/gpuallocator/common_test.go @@ -407,6 +407,54 @@ func NewDGX1VoltaNode() TestNode { return node } +func New8xA10Node() TestNode { + node := TestNode{ + NewTestGPU(0), + NewTestGPU(1), + NewTestGPU(2), + NewTestGPU(3), + NewTestGPU(4), + NewTestGPU(5), + NewTestGPU(6), + NewTestGPU(7), + } + + // NVLinks + node.AddLink(0, 1, nvml.SingleNVLINKLink) + node.AddLink(0, 2, nvml.SingleNVLINKLink) + node.AddLink(0, 3, nvml.SingleNVLINKLink) + + node.AddLink(1, 0, nvml.SingleNVLINKLink) + node.AddLink(1, 2, nvml.SingleNVLINKLink) + node.AddLink(1, 3, nvml.SingleNVLINKLink) + + node.AddLink(2, 0, nvml.SingleNVLINKLink) + node.AddLink(2, 1, nvml.SingleNVLINKLink) + node.AddLink(2, 3, nvml.SingleNVLINKLink) + + node.AddLink(3, 0, nvml.SingleNVLINKLink) + node.AddLink(3, 1, nvml.SingleNVLINKLink) + node.AddLink(3, 2, nvml.SingleNVLINKLink) + + node.AddLink(4, 5, nvml.SingleNVLINKLink) + node.AddLink(4, 6, nvml.SingleNVLINKLink) + node.AddLink(4, 7, nvml.SingleNVLINKLink) + + node.AddLink(5, 4, nvml.SingleNVLINKLink) + node.AddLink(5, 6, nvml.SingleNVLINKLink) + node.AddLink(5, 7, nvml.SingleNVLINKLink) + + node.AddLink(6, 4, nvml.SingleNVLINKLink) + node.AddLink(6, 5, nvml.SingleNVLINKLink) + node.AddLink(6, 7, nvml.SingleNVLINKLink) + + node.AddLink(7, 4, nvml.SingleNVLINKLink) + node.AddLink(7, 5, nvml.SingleNVLINKLink) + node.AddLink(7, 6, nvml.SingleNVLINKLink) + + return node +} + func NewDGX2VoltaNode() TestNode { return nil }