Skip to content

Commit 54dc223

Browse files
authored
Merge pull request #22 from 8onlichtman/complete_greedy.py
Complete greedy.py
2 parents aa962f6 + 964feea commit 54dc223

File tree

3 files changed

+186
-42
lines changed

3 files changed

+186
-42
lines changed

prtpy/binners.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,13 @@ def add_item_to_bin(self, bins:BinsArray, item: Any, bin_index: int)->BinsArray:
7575
Return the bins after the addition.
7676
"""
7777
return bins
78-
78+
79+
def remove_item_from_bin(self, bins:BinsArray, bin_index: int, item_index: int)->BinsArray:
80+
sums, lists = bins
81+
value = lists[bin_index][item_index]
82+
sums[bin_index] -= value
83+
del lists[bin_index][item_index]
84+
return bins
7985

8086
@abstractmethod
8187
def sort_by_ascending_sum(self, bins:BinsArray):
@@ -99,6 +105,8 @@ def numbins(self, bins: BinsArray) -> int:
99105
"""
100106
return None
101107

108+
109+
102110
@abstractmethod
103111
def sums(self, bins: BinsArray) -> Tuple[float]:
104112
"""

prtpy/objectives.py

+44-1
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,6 @@ def lower_bound(self, sums:list, sum_of_remaining_items:float, are_sums_in_ascen
144144
25.0
145145
>>> MinimizeDifference.lower_bound([10,20,30,40,50], sum_of_remaining_items=45)
146146
15.0
147-
148147
>>> MinimizeDifference.lower_bound([10,20,30,40,50], sum_of_remaining_items=200)
149148
0.0
150149
>>> MinimizeDifference.lower_bound([0,0,0,0,0], sum_of_remaining_items=54)
@@ -156,6 +155,50 @@ def lower_bound(self, sums:list, sum_of_remaining_items:float, are_sums_in_ascen
156155

157156

158157

158+
class MinimizeTheDistanceFromAvg(Objective):
159+
def value_to_minimize(self, sums:list, are_sums_in_ascending_order:bool=False)->float:
160+
avg = sum(sums) / len(sums)
161+
diff_from_avg = 0
162+
for s in sums:
163+
if (s > avg):
164+
diff_from_avg = diff_from_avg + (s - avg)
165+
return diff_from_avg
166+
def __str__(self) -> str:
167+
return "minimize-the-distance-from-avg"
168+
def lower_bound(self, sums:list, sum_of_remaining_items:float, are_sums_in_ascending_order:bool=False)->float:
169+
"""
170+
First we calculate the final avg including the remaining items.
171+
We try to add values from the remaining sum to the bins that haven't reached the avg yet
172+
(they don't contribute to our final difference because we only take the bins that are more than avg).
173+
If at any point all bins are equal to avg then we just need to divide the remaining sum amongst all bins
174+
and the residue will be given out to random bins (one each).
175+
We calculate and return the difference.
176+
>>> MinimizeDistAvg.lower_bound([10,20,30,40,50], sum_of_remaining_items=5)
177+
28.0
178+
>>> MinimizeDistAvg.lower_bound([10,20,30,40,50], sum_of_remaining_items=20)
179+
22.0
180+
>>> MinimizeDistAvg.lower_bound([10,20,30,40,50], sum_of_remaining_items=45)
181+
12.0
182+
183+
>>> MinimizeDistAvg.lower_bound([10,20,30,40,50], sum_of_remaining_items=200)
184+
0.0
185+
>>> MinimizeDistAvg.lower_bound([0,0,0,0,0], sum_of_remaining_items=54)
186+
0.8
187+
"""
188+
remaining = sum_of_remaining_items
189+
avg = (sum(sums) + remaining) / len(sums)
190+
diff_from_avg = 0
191+
for s in sums:
192+
if (s < avg and remaining > 0):
193+
remaining = remaining - min(remaining, int(avg - s))
194+
if(s > avg):
195+
diff_from_avg = diff_from_avg + (s - avg)
196+
return diff_from_avg + ((remaining % len(sums)) / len(sums))
197+
198+
MinimizeDistAvg = MinimizeTheDistanceFromAvg()
199+
200+
201+
159202
if __name__ == "__main__":
160203
import doctest
161204

prtpy/partitioning/complete_greedy.py

+133-40
Original file line numberDiff line numberDiff line change
@@ -10,24 +10,28 @@
1010
explains how to have a set with a custom key.
1111
1212
Programmer: Erel Segal-Halevi
13+
Eitan Lichtman added Minimize Distance from Avg Objective
1314
"""
14-
15+
import math
1516
from typing import List, Tuple, Callable, Iterator, Any
1617
import numpy as np
1718
import logging, time
19+
1820
from prtpy import objectives as obj, Binner, BinsArray
1921

2022
logger = logging.getLogger(__name__)
2123

2224

23-
2425
def anytime(
25-
binner: Binner, numbins: int, items: List[any],
26+
binner: Binner, numbins: int, items: List[any], relative_value: List[any] = None,
2627
objective: obj.Objective = obj.MinimizeDifference,
27-
use_lower_bound: bool = True, # Prune branches whose lower bound (= optimistic value) is at least as large as the current minimum.
28-
use_fast_lower_bound: bool = True, # A faster lower bound, that does not create the branch at all. Useful for min-max and max-min objectives.
29-
use_heuristic_3: bool = False, # An improved stopping condition, applicable for min-max only. Not very useful in experiments.
30-
use_set_of_seen_states: bool = True,
28+
use_lower_bound: bool = True,
29+
# Prune branches whose lower bound (= optimistic value) is at least as large as the current minimum.
30+
use_fast_lower_bound: bool = True,
31+
# A faster lower bound, that does not create the branch at all. Useful for min-max max-min and min-dist-avg objectives.
32+
use_heuristic_3: bool = False,
33+
# An improved stopping condition, applicable for min-max only. Not very useful in experiments.
34+
use_set_of_seen_states: bool = True,
3135
time_limit: float = np.inf,
3236
) -> Iterator:
3337
"""
@@ -41,6 +45,10 @@ def anytime(
4145
Bin #0: [6, 5, 4], sum=15.0
4246
Bin #1: [8, 7], sum=15.0
4347
48+
>>> printbins(anytime(BinnerKeepingContents(), 2, [4,5,6,7,8], [0.3,0.7], objective=obj.MinimizeDistAvg))
49+
Bin #0: [5, 4], sum=9.0
50+
Bin #1: [8, 7, 6], sum=21.0
51+
4452
The following examples are based on:
4553
Walter (2013), 'Comparing the minimum completion times of two longest-first scheduling-heuristics'.
4654
>>> walter_numbers = [46, 39, 27, 26, 16, 13, 10]
@@ -56,6 +64,56 @@ def anytime(
5664
Bin #0: [46, 10], sum=56.0
5765
Bin #1: [27, 16, 13], sum=56.0
5866
Bin #2: [39, 26], sum=65.0
67+
>>> printbins(anytime(BinnerKeepingContents(), 3, walter_numbers, objective=obj.MinimizeDistAvg))
68+
Bin #0: [39, 16], sum=55.0
69+
Bin #1: [46, 13], sum=59.0
70+
Bin #2: [27, 26, 10], sum=63.0
71+
>>> printbins(anytime(BinnerKeepingContents(), 3, walter_numbers,[0.2,0.4,0.4], objective=obj.MinimizeDistAvg))
72+
Bin #0: [27, 10], sum=37.0
73+
Bin #1: [39, 16, 13], sum=68.0
74+
Bin #2: [46, 26], sum=72.0
75+
76+
>>> printbins(anytime(BinnerKeepingContents(), 5, [460000000, 390000000, 270000000, 260000000, 160000000, 130000000, 100000000],[0.2,0.4,0.1,0.15,0.15], objective=obj.MinimizeDistAvg))
77+
Bin #0: [390000000], sum=390000000.0
78+
Bin #1: [460000000, 130000000, 100000000], sum=690000000.0
79+
Bin #2: [160000000], sum=160000000.0
80+
Bin #3: [260000000], sum=260000000.0
81+
Bin #4: [270000000], sum=270000000.0
82+
83+
84+
>>> printbins(anytime(BinnerKeepingContents(), 10, [115268834, 22638149, 35260669, 68111031, 13376625, 20835125, 179398684, 69888000, 94462800, 5100340, 27184906, 305371, 272847, 545681, 1680746, 763835, 763835], [0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1], objective=obj.MinimizeDistAvg))
85+
Bin #0: [13376625, 5100340, 1680746, 763835, 763835, 545681, 305371, 272847], sum=22809280.0
86+
Bin #1: [20835125], sum=20835125.0
87+
Bin #2: [22638149], sum=22638149.0
88+
Bin #3: [27184906], sum=27184906.0
89+
Bin #4: [35260669], sum=35260669.0
90+
Bin #5: [68111031], sum=68111031.0
91+
Bin #6: [69888000], sum=69888000.0
92+
Bin #7: [94462800], sum=94462800.0
93+
Bin #8: [115268834], sum=115268834.0
94+
Bin #9: [179398684], sum=179398684.0
95+
96+
97+
>>> printbins(anytime(BinnerKeepingContents(), 3, walter_numbers,[0.1,0.9,0], objective=obj.MinimizeDistAvg))
98+
Bin #0: [16], sum=16.0
99+
Bin #1: [46, 39, 27, 26, 13, 10], sum=161.0
100+
Bin #2: [], sum=0.0
101+
102+
103+
>>> printbins(anytime(BinnerKeepingContents(), 5, [2,2,5,5,5,5,9], objective=obj.MinimizeDistAvg))
104+
Bin #0: [5], sum=5.0
105+
Bin #1: [5], sum=5.0
106+
Bin #2: [5, 2], sum=7.0
107+
Bin #3: [5, 2], sum=7.0
108+
Bin #4: [9], sum=9.0
109+
>>> printbins(anytime(BinnerKeepingContents(), 3, [1,1,1,1], objective=obj.MinimizeDistAvg))
110+
Bin #0: [1], sum=1.0
111+
Bin #1: [1], sum=1.0
112+
Bin #2: [1, 1], sum=2.0
113+
>>> printbins(anytime(BinnerKeepingContents(), 3, [1,1,1,1,1], objective=obj.MinimizeDistAvg))
114+
Bin #0: [1], sum=1.0
115+
Bin #1: [1, 1], sum=2.0
116+
Bin #2: [1, 1], sum=2.0
59117
60118
Compare results with and without the lower bound:
61119
>>> random_numbers = np.random.randint(1, 2**48-1, 10, dtype=np.int64)
@@ -95,24 +153,34 @@ def anytime(
95153
end_time = start_time + time_limit
96154

97155
sorted_items = sorted(items, key=binner.valueof, reverse=True)
98-
sums_of_remaining_items = [sum(map(binner.valueof, sorted_items[i:])) for i in range(numitems)] + [0] # For Heuristic 3
156+
sums_of_remaining_items = [sum(map(binner.valueof, sorted_items[i:])) for i in range(numitems)] + [
157+
0] # For Heuristic 3
158+
from prtpy import BinnerKeepingContents, BinnerKeepingSums, printbins
99159
best_bins, best_objective_value = None, np.inf
100160

101-
global_lower_bound = objective.lower_bound(np.zeros(numbins), sums_of_remaining_items[0], are_sums_in_ascending_order=True)
102161

103-
logger.info("\nComplete Greedy %s Partitioning of %d items into %d parts. Lower bound: %s", objective, numitems, numbins, global_lower_bound)
162+
global_lower_bound = objective.lower_bound(np.zeros(numbins), sums_of_remaining_items[0],
163+
are_sums_in_ascending_order=True)
164+
165+
logger.info("\nComplete Greedy %s Partitioning of %d items into %d parts. Lower bound: %s", objective, numitems,
166+
numbins, global_lower_bound)
104167

105168
# Create a stack whose elements are a partition and the current depth.
106169
# Initially, it contains a single tuple: an empty partition with depth 0.
107-
first_bins = binner.new_bins(numbins)
170+
# If the input has relative values for each bin -
171+
# we add a sum to each bin in order to equal them out to the bin with the highest relative value
172+
# (at the end of the algorithm we will remove these sums).
173+
first_bins = binner.new_bins(numbins)
174+
if (relative_value):
175+
for i in range(numbins):
176+
binner.add_item_to_bin(first_bins, (max(relative_value) * sum(items) - relative_value[i] * sum(items)), i)
108177
first_vertex = (first_bins, 0)
109178
stack: List[Tuple[BinsArray, int]] = [first_vertex]
110179
if use_set_of_seen_states:
111180
seen_states = set(tuple(binner.sums(first_bins)))
112-
113181
# For logging and profiling:
114-
complete_partitions_checked = 0
115-
intermediate_partitions_checked = 1
182+
complete_partitions_checked = 0
183+
intermediate_partitions_checked = 1
116184

117185
times_fast_lower_bound_activated = 0
118186
times_lower_bound_activated = 0
@@ -134,108 +202,133 @@ def anytime(
134202
if new_objective_value < best_objective_value:
135203
best_bins, best_objective_value = current_bins, new_objective_value
136204
logger.info(" Found a better solution: %s, with value %s", current_bins, best_objective_value)
137-
if new_objective_value<=global_lower_bound:
205+
if new_objective_value <= global_lower_bound:
138206
logger.info(" Solution matches global lower bound - stopping")
139207
break
140208
continue
141-
142209
# Heuristic 3: "If the sum of the remaining unassigned integers plus the smallest current subset sum is <= the largest subset sum, all remaining integers are assigned to the subset with the smallest sum, terminating that branch of the tree."
143210
# Note that this heuristic is valid only for the objective "minimize largest sum"!
144-
if use_heuristic_3 and objective==obj.MinimizeLargestSum:
211+
if use_heuristic_3 and objective == obj.MinimizeLargestSum:
145212
if sums_of_remaining_items[depth] + current_sums[0] <= current_sums[-1]:
146213
new_bins = binner.copy_bins(current_bins)
147-
for i in range(depth,numitems):
214+
for i in range(depth, numitems):
148215
binner.add_item_to_bin(new_bins, sorted_items[i], 0)
149-
binner.sort_by_ascending_sum(new_bins)
216+
binner.sort_by_ascending_sum(new_bins)
150217
new_depth = numitems
151218
stack.append((new_bins, new_depth))
152219
logger.debug(" Heuristic 3 activated")
153-
times_heuristic_3_activated+=1
220+
times_heuristic_3_activated += 1
154221
continue
155-
156222
next_item = sorted_items[depth]
157-
sum_of_remaining_items = sums_of_remaining_items[depth+1]
223+
sum_of_remaining_items = sums_of_remaining_items[depth + 1]
158224

159225
previous_bin_sum = None
160226

161227
# We want to insert the next item to the bin with the *smallest* sum first.
162228
# But, since we use a stack, we have to insert it to the bin with the *largest* sum first,
163229
# so that it is pushed deeper into the stack.
164230
# Therefore, we proceed in reverse, by *descending* order of sum.
165-
for bin_index in reversed(range(numbins)):
231+
for bin_index in reversed(range(numbins)):
166232

167233
# Heuristic 1: "If there are two subsets with the same sum, the current number is assigned to only one."
168234
current_bin_sum = current_sums[bin_index]
169235
if current_bin_sum == previous_bin_sum:
170-
continue
236+
continue
171237
previous_bin_sum = current_bin_sum
172238

173239
# Fast-lower-bound heuristic - before creating the new vertex.
174-
# Currently implemented only for two objectives: min-max and max-min.
240+
# Currently implemented only for two objectives: min-max, max-min and min-dist-avg
175241
if use_fast_lower_bound:
176-
if objective==obj.MinimizeLargestSum:
242+
if objective == obj.MinimizeLargestSum:
177243
# "If an assignment to a subset creates a subset sum that equals or exceeds the largest subset sum in the best complete solution found so far, that branch is pruned from the tree."
178244
fast_lower_bound = max(current_bin_sum + binner.valueof(next_item), current_sums[-1])
179-
elif objective==obj.MaximizeSmallestSum:
245+
elif objective == obj.MaximizeSmallestSum:
180246
# An adaptation of the above heuristic to maximizing the smallest sum.
181-
if bin_index==0:
182-
new_smallest_sum = min(current_sums[0]+binner.valueof(next_item), current_sums[1])
247+
if bin_index == 0:
248+
new_smallest_sum = min(current_sums[0] + binner.valueof(next_item), current_sums[1])
183249
else:
184250
new_smallest_sum = current_sums[0]
185-
fast_lower_bound = -(new_smallest_sum+sum_of_remaining_items)
251+
fast_lower_bound = -(new_smallest_sum + sum_of_remaining_items)
252+
elif objective == obj.MinimizeDistAvg:
253+
if relative_value:
254+
fast_lower_bound = 0
255+
for i in range (numbins):
256+
# For each bin: we take off the sum that we added in the beginning of the algorithm (max(relative_value) * sum(items) - relative_value[i] * sum(items))
257+
# Then we check if the difference between the bin's sum and the relative AVG for bin i: (sum(items)*relative_value[i])
258+
# is positive and contributes to our final difference or negative and we will not add anything to our difference.
259+
fast_lower_bound = fast_lower_bound + max((current_sums[i]-(max(relative_value) * sum(items) - relative_value[i] * sum(items)))-sum(items)*relative_value[i],0)
260+
else:
261+
fast_lower_bound = 0
262+
avg = sum(items) / numbins
263+
for i in range (numbins):
264+
fast_lower_bound = fast_lower_bound + max(current_sums[i]-avg,0)
186265
else:
187266
fast_lower_bound = -np.inf
188267
if fast_lower_bound >= best_objective_value:
189268
times_fast_lower_bound_activated += 1
190269
continue
191270

192271
new_bins = binner.add_item_to_bin(binner.copy_bins(current_bins), next_item, bin_index)
193-
binner.sort_by_ascending_sum(new_bins)
272+
if not relative_value:
273+
binner.sort_by_ascending_sum(new_bins)
194274
new_sums = tuple(binner.sums(new_bins))
195275

196276
# Lower-bound heuristic.
197277
if use_lower_bound:
198-
lower_bound = objective.lower_bound(new_sums, sum_of_remaining_items, are_sums_in_ascending_order=True)
278+
lower_bound = objective.lower_bound(new_sums, sum_of_remaining_items, are_sums_in_ascending_order=False)
199279
if lower_bound >= best_objective_value:
200280
logger.debug(" Lower bound %f too large", lower_bound)
201281
times_lower_bound_activated += 1
202282
continue
203-
if use_set_of_seen_states:
283+
if use_set_of_seen_states:
204284
if new_sums in seen_states:
205285
logger.debug(" State %s already seen", new_sums)
206286
times_seen_state_skipped += 1
207287
continue
208-
seen_states.add(new_sums) # should be after if use_lower_bound
288+
seen_states.add(new_sums) # should be after if use_lower_bound
209289

210290
new_vertex = (new_bins, depth + 1)
211291
stack.append(new_vertex)
212292
intermediate_partitions_checked += 1
213293

214-
logger.info("Checked %d out of %d complete partitions, and %d intermediate partitions.", complete_partitions_checked, numbins**numitems, intermediate_partitions_checked)
215-
logger.info(" Heuristics: fast lower bound = %d, lower bound = %d, seen state = %d, heuristic 3 = %d.", times_fast_lower_bound_activated, times_lower_bound_activated, times_seen_state_skipped, times_heuristic_3_activated)
294+
logger.info("Checked %d out of %d complete partitions, and %d intermediate partitions.",
295+
complete_partitions_checked, numbins ** numitems, intermediate_partitions_checked)
296+
logger.info(" Heuristics: fast lower bound = %d, lower bound = %d, seen state = %d, heuristic 3 = %d.",
297+
times_fast_lower_bound_activated, times_lower_bound_activated, times_seen_state_skipped,
298+
times_heuristic_3_activated)
216299

300+
301+
if (relative_value):
302+
# For each bin we remove the value that we added in the beginning of the algorithm.
303+
for i in range(numbins):
304+
binner.remove_item_from_bin(best_bins, i, 0)
305+
306+
for i in range(numbins):
307+
binner.sums(best_bins)[i] = math.floor(binner.sums(best_bins)[i])
217308
return best_bins
218309

219310

220311
if __name__ == "__main__":
221312
import doctest, sys
313+
222314
(failures, tests) = doctest.testmod(report=True, optionflags=doctest.FAIL_FAST)
223315
print("{} failures, {} tests".format(failures, tests))
224-
if failures>0:
316+
if failures > 0:
225317
sys.exit()
226-
318+
227319
# DEMO
228320
logger.setLevel(logging.INFO)
229321
logger.addHandler(logging.StreamHandler())
230322

231323
from prtpy import BinnerKeepingContents, BinnerKeepingSums
232-
anytime(BinnerKeepingContents(), 2, [4,5,6,7,8], objective=obj.MinimizeLargestSum)
324+
325+
anytime(BinnerKeepingContents(), 2, [4, 5, 6, 7, 8], objective=obj.MinimizeLargestSum)
233326

234327
walter_numbers = [46, 39, 27, 26, 16, 13, 10]
235328
anytime(BinnerKeepingContents(), 3, walter_numbers, objective=obj.MaximizeSmallestSum)
236329
anytime(BinnerKeepingContents(), 3, walter_numbers, objective=obj.MinimizeLargestSum)
237330

238-
random_numbers = np.random.randint(1, 2**16-1, 15, dtype=np.int64)
331+
random_numbers = np.random.randint(1, 2 ** 16 - 1, 15, dtype=np.int64)
239332
anytime(BinnerKeepingSums(), 3, random_numbers, objective=obj.MaximizeSmallestSum)
240333
anytime(BinnerKeepingSums(), 3, random_numbers, objective=obj.MinimizeLargestSum)
241334
anytime(BinnerKeepingSums(), 3, random_numbers, objective=obj.MinimizeDifference)

0 commit comments

Comments
 (0)