From fd23752f17891b1e760f3e240a47290b597d88f7 Mon Sep 17 00:00:00 2001 From: Sebastian Seedorf Date: Wed, 21 Feb 2018 14:01:22 +0100 Subject: [PATCH] Faster multi-process handling and clean up --- __main__.py | 310 +++++++++++++++++++++++++--------------------------- 1 file changed, 150 insertions(+), 160 deletions(-) diff --git a/__main__.py b/__main__.py index 3882931..839cea7 100644 --- a/__main__.py +++ b/__main__.py @@ -1,178 +1,168 @@ import numpy as np -import threading -import queue -import time INPUT_FILE = "data/medium.in" POPULATION = 50 -MUTATION_AMOUNT = 2000 -ITERATIONS = 30 -THREAD_COUNT = 20 +MUTATION_AMOUNT = 200#1000000 +ITERATIONS = 100 -data = [line for line in open(INPUT_FILE)] -params = list(map(int, data[0].split(" "))) -data = [[0 if x=="T" else 1 for x in line] for line in data[1:]] -data = np.array(data)[:, :-1] -clusters = np.arange(params[0]*params[1]).reshape((1, params[0], params[1])) -clusters = np.repeat(clusters, POPULATION, axis=0)+1 -print(params) -print(data) -print(clusters[0]) +def mutation(entries): -values = {} -first = True + def mutation_entry(entry, args): + HEIGHT, WIDTH, MIN_PIECES, MAX_SIZE, _ = args -def get_fitness(vals, clean=False): - fit = 0 - for key, val in vals.items(): - if key == 0: - continue - size = sum(val) - if size <= params[3] and min(val) >= params[2]: - fit += size - if clean: - continue - size_diff = params[3]-size - if size_diff < 0: - fit += 1-size_diff**2 - elif size_diff > 0: - fit += np.exp(-abs(size-params[3])) - return fit + def expand_hztl(entry, x, y, direction): + val = entry[y, x] + ynew = y+direction + oval = entry[ynew, x] + # left + nx = x + while nx > 0 and val == entry[y, nx-1]: + entry[ynew, nx] = val + nx -= 1 + nval = entry[ynew, nx] if nx != x else oval + entry[ynew, nx] = val + if nval != 0: + while nx > 0 and nval == entry[ynew, nx-1]: + nx -= 1 + entry[ynew, nx] = 0 + # right + nx = x + while nx < WIDTH-1 and val == entry[y, nx+1]: + entry[ynew, nx] = val + nx += 1 + nval = entry[ynew, nx] if nx != x else oval + entry[ynew, nx] = val + if nval != 0: + while nx < WIDTH-1 and nval == entry[ynew, nx+1]: + nx += 1 + entry[ynew, nx] = 0 + def expand_vert(entry, x, y, direction): + val = entry[y, x] + xnew = x+direction + oval = entry[y, xnew] + # left + ny = y + while ny > 0 and val == entry[ny-1, x]: + entry[ny, xnew] = val + ny -= 1 + nval = entry[ny, xnew] if ny != y else oval + entry[ny, xnew] = val + if nval != 0: + while ny > 0 and nval == entry[ny-1, xnew]: + ny -= 1 + entry[ny, xnew] = 0 + # right + ny = y + while ny < HEIGHT-1 and val == entry[ny+1, x]: + entry[ny, xnew] = val + ny += 1 + nval = entry[ny, xnew] if ny != y else oval + entry[ny, xnew] = val + if nval != 0: + while ny < HEIGHT-1 and nval == entry[ny+1, xnew]: + ny += 1 + entry[ny, xnew] = 0 -def get_left_bound(clust, y, x): - val = clust[y, x] - while x > 0 and val == clust[y, x-1]: - x -= 1 - return (y, x) + for _ in range(np.random.random_integers(MUTATION_AMOUNT)): + y = np.random.random_integers(HEIGHT)-1 + x = np.random.random_integers(WIDTH)-1 + if entry[y, x] == 0: # create new cluser + entry[y, x] = np.amax(entry)+1 + z = np.random.random() + if z < 0.25: # expand to top + if y > 0: + expand_hztl(entry, x, y, -1) + elif z < 0.5: # expand to left + if x > 0: + expand_vert(entry, x, y, -1) + elif z < 0.75: # expand to bottom + if y < HEIGHT-1: + expand_hztl(entry, x, y, 1) + else: # expand to right + if x < WIDTH-1: + expand_vert(entry, x, y, 1) + return entry + sub_arr, args = entries + for idx in range(sub_arr.shape[0]): + sub_arr[idx,:,:] = mutation_entry(sub_arr[idx,:,:], args) + return sub_arr -def get_right_bound(clust, y, x): - val = clust[y, x] - while x+1 < clust.shape[1] and val == clust[y, x+1]: - x += 1 - return (y, x) +def get_fitnesses(entries): + def get_fitness(entry, args): + HEIGHT, WIDTH, MIN_PIECES, MAX_SIZE, (data, data_inv) = args + def get_fitness_per_cluster(cluster): + fit = 0 + size = cluster[0] + a = 0 + b = 0 + if size <= MAX_SIZE and cluster[1] >= MIN_PIECES: + a = size + size_diff = MAX_SIZE-size + if size_diff < 0: + b = MAX_SIZE/8+size_diff + elif size_diff > 0: + b = np.power(3, -size_diff)*MAX_SIZE/8 + else: + b = a+1 + return [a, b] # a = clean score ; b = fitness score -def get_top_bound(clust, y, x): - val = clust[y, x] - while y > 0 and val == clust[y-1, x]: - y -= 1 - return (y, x) + mname, mcount = np.unique(entry*data, return_counts=True) + mdict = dict(zip(mname, mcount)) + tname, tcount = np.unique(entry*data_inv, return_counts=True) + tdict = dict(zip(tname, tcount)) + c = np.array([[mdict.get(key) or 0, tdict.get(key) or 0] for key in (mdict.keys() | tdict.keys()) if key != 0]) + c = np.vstack((np.sum(c, axis=1), np.min(c, axis=1))).T + return sum(np.apply_along_axis(get_fitness_per_cluster, 1, c)) + sub_arr, args = entries + fitnesses = np.zeros((sub_arr.shape[0], 2)) + for idx in range(sub_arr.shape[0]): + x = get_fitness(sub_arr[idx,:,:], args) + #print(x) + fitnesses[idx] = get_fitness(sub_arr[idx,:,:], args) + return fitnesses -def get_bottom_bound(clust, y, x): - val = clust[y, x] - while y+1 < clust.shape[0] and val == clust[y+1, x]: - y += 1 - return (y, x) +if __name__ == '__main__': + import numpy as np + import multiprocessing + def thread_map(func, array, data=None): + chunks = [(sub_arr, (HEIGHT, WIDTH, MIN_PIECES, MAX_SIZE, data)) for sub_arr in np.array_split(array, min(multiprocessing.cpu_count(), len(array)))] + pool = multiprocessing.Pool() + individual_results = pool.map(func, chunks) + pool.close() + pool.join() + return np.concatenate(individual_results) -def set_area(clust, y1, x1, y2, x2, value): - for y in range(y1, y2+1): - for x in range(x1, x2+1): - clust[y, x] = value - return clust + data = [line for line in open(INPUT_FILE)] + params = list(map(int, data[0].split(" "))) + HEIGHT = params[0] + WIDTH = params[1] + MIN_PIECES = params[2] + MAX_SIZE = params[3] + data = [[0 if x=="T" else 1 for x in line] for line in data[1:]] + data = np.array(data)[:, :-1] + data_inv = 1-data + clusters = np.arange(HEIGHT*WIDTH).reshape((1, HEIGHT, WIDTH)) + clusters = np.repeat(clusters, POPULATION, axis=0)+1 - -def mutation(clust): - for _ in range(np.random.random_integers(MUTATION_AMOUNT)): - y = np.random.random_integers(params[0])-1 - x = np.random.random_integers(params[1])-1 - z = np.random.random() - if z < 0.2: - if y > 0: # expand to top - yn = y-1 - _, inner_left = get_left_bound(clust, y, x) - _, outer_left = get_left_bound(clust, yn, inner_left) - _, inner_right = get_right_bound(clust, y, x) - _, outer_right = get_right_bound(clust, yn, inner_right) - clust = set_area(clust, yn, outer_left, yn, outer_right, 0) - clust = set_area(clust, yn, inner_left, yn, inner_right, clust[y, x]) - elif z < 0.4: - if x > 0: # expand to left - xn = x-1 - inner_top, _ = get_top_bound(clust, y, x) - outer_top, _ = get_top_bound(clust, inner_top, xn) - inner_bot, _ = get_bottom_bound(clust, y, x) - outer_bot, _ = get_bottom_bound(clust, inner_bot, xn) - clust = set_area(clust, outer_top, xn, outer_bot, xn, 0) - clust = set_area(clust, inner_top, xn, inner_bot, xn, clust[y, x]) - elif z < 0.6: - if y < params[0]-1: # expand to bottom - yn = y+1 - _, inner_left = get_left_bound(clust, y, x) - _, outer_left = get_left_bound(clust, yn, inner_left) - _, inner_right = get_right_bound(clust, y, x) - _, outer_right = get_right_bound(clust, yn, inner_right) - clust = set_area(clust, yn, outer_left, yn, outer_right, 0) - clust = set_area(clust, yn, inner_left, yn, inner_right, clust[y, x]) - elif z < 0.8: - if x < params[1]-1: # expand to right - xn = x+1 - inner_top, _ = get_top_bound(clust, y, x) - outer_top, _ = get_top_bound(clust, inner_top, xn) - inner_bot, _ = get_bottom_bound(clust, y, x) - outer_bot, _ = get_bottom_bound(clust, inner_bot, xn) - clust = set_area(clust, outer_top, xn, outer_bot, xn, 0) - clust = set_area(clust, inner_top, xn, inner_bot, xn, clust[y, x]) - else: - pass#clust[y, x] = np.amax(clust)+1 - return clust - - - - - - -def myfunc(a, b): - global first, values - if first: - first = False - return - if a not in values: - values[a] = [0, 0] - values[a][b] += 1 -vfunc = np.vectorize(myfunc) - -# mutation -for i in range(POPULATION): - if i % 20 == 0: - print("mutation", i) - clusters[i] = mutation(clusters[i]) - -for iteration in range(ITERATIONS): - # calc fitness - fitnesses = np.zeros((POPULATION, )) - for i, cluster in enumerate(clusters): - if i % 20 == 0: - print("fitness", i, iteration) - values = {} - first = True - vfunc(cluster, data) - fitnesses[i] = get_fitness(values) - # select - z_exp = [np.exp(i) for i in fitnesses] - sum_z_exp = sum(z_exp) - softmax = [i / sum_z_exp for i in z_exp] - idx = np.random.choice(POPULATION, POPULATION, p=softmax) - clusters = clusters[idx, :, :] - - # print best - max_idx = np.argmax(fitnesses) - print(clusters[max_idx]) - print(iteration, max(fitnesses)) - - # mutation - for i in range(POPULATION): - clusters[i] = mutation(clusters[i]) - -fitnesses = np.zeros((POPULATION, )) -for i, cluster in enumerate(clusters): - values = {} - first = True - vfunc(cluster, data) - fitnesses[i] = get_fitness(values, clean=True) -max_idx = np.argmax(fitnesses) -print(clusters[max_idx]) -print(max(fitnesses)) \ No newline at end of file + for iteration in range(ITERATIONS): + # mutation + print(iteration, "Mutation") + clusters = thread_map(mutation, clusters) + # get fitness + print(iteration, "Get Fitness") + fitnesses = thread_map(get_fitnesses, clusters, (data, data_inv)) + max_idx = np.argmax(fitnesses[:, 0]) + print(clusters[max_idx]) + print(iteration, max(fitnesses[:, 0])) + # selection + print(iteration, "Selection") + z_exp = [np.exp(i) for i in fitnesses[:, 1]] + sum_z_exp = sum(z_exp) + softmax = [i / sum_z_exp for i in z_exp] + idx = np.random.choice(POPULATION, POPULATION, p=softmax) + clusters = clusters[idx, :, :]