Faster multi-process handling and clean up

This commit is contained in:
Sebastian Seedorf
2018-02-21 14:01:22 +01:00
parent db0becbbe7
commit fd23752f17

View File

@@ -1,178 +1,168 @@
import numpy as np import numpy as np
import threading
import queue
import time
INPUT_FILE = "data/medium.in" INPUT_FILE = "data/medium.in"
POPULATION = 50 POPULATION = 50
MUTATION_AMOUNT = 2000 MUTATION_AMOUNT = 200#1000000
ITERATIONS = 30 ITERATIONS = 100
THREAD_COUNT = 20
data = [line for line in open(INPUT_FILE)] def mutation(entries):
params = list(map(int, data[0].split(" ")))
data = [[0 if x=="T" else 1 for x in line] for line in data[1:]]
data = np.array(data)[:, :-1]
clusters = np.arange(params[0]*params[1]).reshape((1, params[0], params[1]))
clusters = np.repeat(clusters, POPULATION, axis=0)+1
print(params)
print(data)
print(clusters[0])
values = {} def mutation_entry(entry, args):
first = True HEIGHT, WIDTH, MIN_PIECES, MAX_SIZE, _ = args
def get_fitness(vals, clean=False): def expand_hztl(entry, x, y, direction):
fit = 0 val = entry[y, x]
for key, val in vals.items(): ynew = y+direction
if key == 0: oval = entry[ynew, x]
continue # left
size = sum(val) nx = x
if size <= params[3] and min(val) >= params[2]: while nx > 0 and val == entry[y, nx-1]:
fit += size entry[ynew, nx] = val
if clean: nx -= 1
continue nval = entry[ynew, nx] if nx != x else oval
size_diff = params[3]-size entry[ynew, nx] = val
if size_diff < 0: if nval != 0:
fit += 1-size_diff**2 while nx > 0 and nval == entry[ynew, nx-1]:
elif size_diff > 0: nx -= 1
fit += np.exp(-abs(size-params[3])) entry[ynew, nx] = 0
return fit # right
nx = x
while nx < WIDTH-1 and val == entry[y, nx+1]:
entry[ynew, nx] = val
nx += 1
nval = entry[ynew, nx] if nx != x else oval
entry[ynew, nx] = val
if nval != 0:
while nx < WIDTH-1 and nval == entry[ynew, nx+1]:
nx += 1
entry[ynew, nx] = 0
def expand_vert(entry, x, y, direction):
val = entry[y, x]
xnew = x+direction
oval = entry[y, xnew]
# left
ny = y
while ny > 0 and val == entry[ny-1, x]:
entry[ny, xnew] = val
ny -= 1
nval = entry[ny, xnew] if ny != y else oval
entry[ny, xnew] = val
if nval != 0:
while ny > 0 and nval == entry[ny-1, xnew]:
ny -= 1
entry[ny, xnew] = 0
# right
ny = y
while ny < HEIGHT-1 and val == entry[ny+1, x]:
entry[ny, xnew] = val
ny += 1
nval = entry[ny, xnew] if ny != y else oval
entry[ny, xnew] = val
if nval != 0:
while ny < HEIGHT-1 and nval == entry[ny+1, xnew]:
ny += 1
entry[ny, xnew] = 0
def get_left_bound(clust, y, x): for _ in range(np.random.random_integers(MUTATION_AMOUNT)):
val = clust[y, x] y = np.random.random_integers(HEIGHT)-1
while x > 0 and val == clust[y, x-1]: x = np.random.random_integers(WIDTH)-1
x -= 1 if entry[y, x] == 0: # create new cluser
return (y, x) entry[y, x] = np.amax(entry)+1
z = np.random.random()
if z < 0.25: # expand to top
if y > 0:
expand_hztl(entry, x, y, -1)
elif z < 0.5: # expand to left
if x > 0:
expand_vert(entry, x, y, -1)
elif z < 0.75: # expand to bottom
if y < HEIGHT-1:
expand_hztl(entry, x, y, 1)
else: # expand to right
if x < WIDTH-1:
expand_vert(entry, x, y, 1)
return entry
sub_arr, args = entries
for idx in range(sub_arr.shape[0]):
sub_arr[idx,:,:] = mutation_entry(sub_arr[idx,:,:], args)
return sub_arr
def get_right_bound(clust, y, x): def get_fitnesses(entries):
val = clust[y, x] def get_fitness(entry, args):
while x+1 < clust.shape[1] and val == clust[y, x+1]: HEIGHT, WIDTH, MIN_PIECES, MAX_SIZE, (data, data_inv) = args
x += 1
return (y, x)
def get_fitness_per_cluster(cluster):
fit = 0
size = cluster[0]
a = 0
b = 0
if size <= MAX_SIZE and cluster[1] >= MIN_PIECES:
a = size
size_diff = MAX_SIZE-size
if size_diff < 0:
b = MAX_SIZE/8+size_diff
elif size_diff > 0:
b = np.power(3, -size_diff)*MAX_SIZE/8
else:
b = a+1
return [a, b] # a = clean score ; b = fitness score
def get_top_bound(clust, y, x): mname, mcount = np.unique(entry*data, return_counts=True)
val = clust[y, x] mdict = dict(zip(mname, mcount))
while y > 0 and val == clust[y-1, x]: tname, tcount = np.unique(entry*data_inv, return_counts=True)
y -= 1 tdict = dict(zip(tname, tcount))
return (y, x) c = np.array([[mdict.get(key) or 0, tdict.get(key) or 0] for key in (mdict.keys() | tdict.keys()) if key != 0])
c = np.vstack((np.sum(c, axis=1), np.min(c, axis=1))).T
return sum(np.apply_along_axis(get_fitness_per_cluster, 1, c))
sub_arr, args = entries
fitnesses = np.zeros((sub_arr.shape[0], 2))
for idx in range(sub_arr.shape[0]):
x = get_fitness(sub_arr[idx,:,:], args)
#print(x)
fitnesses[idx] = get_fitness(sub_arr[idx,:,:], args)
return fitnesses
def get_bottom_bound(clust, y, x): if __name__ == '__main__':
val = clust[y, x] import numpy as np
while y+1 < clust.shape[0] and val == clust[y+1, x]: import multiprocessing
y += 1
return (y, x)
def thread_map(func, array, data=None):
chunks = [(sub_arr, (HEIGHT, WIDTH, MIN_PIECES, MAX_SIZE, data)) for sub_arr in np.array_split(array, min(multiprocessing.cpu_count(), len(array)))]
pool = multiprocessing.Pool()
individual_results = pool.map(func, chunks)
pool.close()
pool.join()
return np.concatenate(individual_results)
def set_area(clust, y1, x1, y2, x2, value): data = [line for line in open(INPUT_FILE)]
for y in range(y1, y2+1): params = list(map(int, data[0].split(" ")))
for x in range(x1, x2+1): HEIGHT = params[0]
clust[y, x] = value WIDTH = params[1]
return clust MIN_PIECES = params[2]
MAX_SIZE = params[3]
data = [[0 if x=="T" else 1 for x in line] for line in data[1:]]
data = np.array(data)[:, :-1]
data_inv = 1-data
clusters = np.arange(HEIGHT*WIDTH).reshape((1, HEIGHT, WIDTH))
clusters = np.repeat(clusters, POPULATION, axis=0)+1
for iteration in range(ITERATIONS):
def mutation(clust): # mutation
for _ in range(np.random.random_integers(MUTATION_AMOUNT)): print(iteration, "Mutation")
y = np.random.random_integers(params[0])-1 clusters = thread_map(mutation, clusters)
x = np.random.random_integers(params[1])-1 # get fitness
z = np.random.random() print(iteration, "Get Fitness")
if z < 0.2: fitnesses = thread_map(get_fitnesses, clusters, (data, data_inv))
if y > 0: # expand to top max_idx = np.argmax(fitnesses[:, 0])
yn = y-1 print(clusters[max_idx])
_, inner_left = get_left_bound(clust, y, x) print(iteration, max(fitnesses[:, 0]))
_, outer_left = get_left_bound(clust, yn, inner_left) # selection
_, inner_right = get_right_bound(clust, y, x) print(iteration, "Selection")
_, outer_right = get_right_bound(clust, yn, inner_right) z_exp = [np.exp(i) for i in fitnesses[:, 1]]
clust = set_area(clust, yn, outer_left, yn, outer_right, 0) sum_z_exp = sum(z_exp)
clust = set_area(clust, yn, inner_left, yn, inner_right, clust[y, x]) softmax = [i / sum_z_exp for i in z_exp]
elif z < 0.4: idx = np.random.choice(POPULATION, POPULATION, p=softmax)
if x > 0: # expand to left clusters = clusters[idx, :, :]
xn = x-1
inner_top, _ = get_top_bound(clust, y, x)
outer_top, _ = get_top_bound(clust, inner_top, xn)
inner_bot, _ = get_bottom_bound(clust, y, x)
outer_bot, _ = get_bottom_bound(clust, inner_bot, xn)
clust = set_area(clust, outer_top, xn, outer_bot, xn, 0)
clust = set_area(clust, inner_top, xn, inner_bot, xn, clust[y, x])
elif z < 0.6:
if y < params[0]-1: # expand to bottom
yn = y+1
_, inner_left = get_left_bound(clust, y, x)
_, outer_left = get_left_bound(clust, yn, inner_left)
_, inner_right = get_right_bound(clust, y, x)
_, outer_right = get_right_bound(clust, yn, inner_right)
clust = set_area(clust, yn, outer_left, yn, outer_right, 0)
clust = set_area(clust, yn, inner_left, yn, inner_right, clust[y, x])
elif z < 0.8:
if x < params[1]-1: # expand to right
xn = x+1
inner_top, _ = get_top_bound(clust, y, x)
outer_top, _ = get_top_bound(clust, inner_top, xn)
inner_bot, _ = get_bottom_bound(clust, y, x)
outer_bot, _ = get_bottom_bound(clust, inner_bot, xn)
clust = set_area(clust, outer_top, xn, outer_bot, xn, 0)
clust = set_area(clust, inner_top, xn, inner_bot, xn, clust[y, x])
else:
pass#clust[y, x] = np.amax(clust)+1
return clust
def myfunc(a, b):
global first, values
if first:
first = False
return
if a not in values:
values[a] = [0, 0]
values[a][b] += 1
vfunc = np.vectorize(myfunc)
# mutation
for i in range(POPULATION):
if i % 20 == 0:
print("mutation", i)
clusters[i] = mutation(clusters[i])
for iteration in range(ITERATIONS):
# calc fitness
fitnesses = np.zeros((POPULATION, ))
for i, cluster in enumerate(clusters):
if i % 20 == 0:
print("fitness", i, iteration)
values = {}
first = True
vfunc(cluster, data)
fitnesses[i] = get_fitness(values)
# select
z_exp = [np.exp(i) for i in fitnesses]
sum_z_exp = sum(z_exp)
softmax = [i / sum_z_exp for i in z_exp]
idx = np.random.choice(POPULATION, POPULATION, p=softmax)
clusters = clusters[idx, :, :]
# print best
max_idx = np.argmax(fitnesses)
print(clusters[max_idx])
print(iteration, max(fitnesses))
# mutation
for i in range(POPULATION):
clusters[i] = mutation(clusters[i])
fitnesses = np.zeros((POPULATION, ))
for i, cluster in enumerate(clusters):
values = {}
first = True
vfunc(cluster, data)
fitnesses[i] = get_fitness(values, clean=True)
max_idx = np.argmax(fitnesses)
print(clusters[max_idx])
print(max(fitnesses))