From fd23752f17891b1e760f3e240a47290b597d88f7 Mon Sep 17 00:00:00 2001
From: Sebastian Seedorf <seedorf96@samoa.zedat.fu-berlin.de>
Date: Wed, 21 Feb 2018 14:01:22 +0100
Subject: [PATCH] Faster multi-process handling and clean up

---
 __main__.py | 310 +++++++++++++++++++++++++---------------------------
 1 file changed, 150 insertions(+), 160 deletions(-)

diff --git a/__main__.py b/__main__.py
index 3882931..839cea7 100644
--- a/__main__.py
+++ b/__main__.py
@@ -1,178 +1,168 @@
 import numpy as np
-import threading
-import queue
-import time
 
 INPUT_FILE = "data/medium.in"
 POPULATION = 50
-MUTATION_AMOUNT = 2000
-ITERATIONS = 30
-THREAD_COUNT = 20
+MUTATION_AMOUNT = 200#1000000
+ITERATIONS = 100
 
-data = [line for line in open(INPUT_FILE)]
-params = list(map(int, data[0].split(" ")))
-data = [[0 if x=="T" else 1 for x in line] for line in data[1:]]
-data = np.array(data)[:, :-1]
-clusters = np.arange(params[0]*params[1]).reshape((1, params[0], params[1]))
-clusters = np.repeat(clusters, POPULATION, axis=0)+1
-print(params)
-print(data)
-print(clusters[0])
+def mutation(entries):
 
-values = {}
-first = True
+    def mutation_entry(entry, args):
+        HEIGHT, WIDTH, MIN_PIECES, MAX_SIZE, _ = args
 
-def get_fitness(vals, clean=False):
-    fit = 0
-    for key, val in vals.items():
-        if key == 0:
-            continue
-        size = sum(val)
-        if size <= params[3] and min(val) >= params[2]:
-            fit += size
-        if clean:
-            continue
-        size_diff = params[3]-size
-        if size_diff < 0:
-            fit += 1-size_diff**2
-        elif size_diff > 0:
-            fit += np.exp(-abs(size-params[3]))
-    return fit
+        def expand_hztl(entry, x, y, direction):
+            val = entry[y, x]
+            ynew = y+direction
+            oval = entry[ynew, x]
+            # left
+            nx = x
+            while nx > 0 and val == entry[y, nx-1]:
+                entry[ynew, nx] = val
+                nx -= 1
+            nval = entry[ynew, nx] if nx != x else oval
+            entry[ynew, nx] = val
+            if nval != 0:
+                while nx > 0 and nval == entry[ynew, nx-1]:
+                    nx -= 1
+                    entry[ynew, nx] = 0
+            # right
+            nx = x
+            while nx < WIDTH-1 and val == entry[y, nx+1]:
+                entry[ynew, nx] = val
+                nx += 1
+            nval = entry[ynew, nx] if nx != x else oval
+            entry[ynew, nx] = val
+            if nval != 0:
+                while nx < WIDTH-1 and nval == entry[ynew, nx+1]:
+                    nx += 1
+                    entry[ynew, nx] = 0
 
+        def expand_vert(entry, x, y, direction):
+            val = entry[y, x]
+            xnew = x+direction
+            oval = entry[y, xnew]
+            # left
+            ny = y
+            while ny > 0 and val == entry[ny-1, x]:
+                entry[ny, xnew] = val
+                ny -= 1
+            nval = entry[ny, xnew] if ny != y else oval
+            entry[ny, xnew] = val
+            if nval != 0:
+                while ny > 0 and nval == entry[ny-1, xnew]:
+                    ny -= 1
+                    entry[ny, xnew] = 0
+            # right
+            ny = y
+            while ny < HEIGHT-1 and val == entry[ny+1, x]:
+                entry[ny, xnew] = val
+                ny += 1
+            nval = entry[ny, xnew] if ny != y else oval
+            entry[ny, xnew] = val
+            if nval != 0:
+                while ny < HEIGHT-1 and nval == entry[ny+1, xnew]:
+                    ny += 1
+                    entry[ny, xnew] = 0
 
-def get_left_bound(clust, y, x):
-    val = clust[y, x]
-    while x > 0 and val == clust[y, x-1]:
-        x -= 1
-    return (y, x)
+        for _ in range(np.random.random_integers(MUTATION_AMOUNT)):
+            y = np.random.random_integers(HEIGHT)-1
+            x = np.random.random_integers(WIDTH)-1
+            if entry[y, x] == 0:  # create new cluser
+                entry[y, x] = np.amax(entry)+1
+            z = np.random.random()
+            if z < 0.25:  # expand to top
+                if y > 0:
+                    expand_hztl(entry, x, y, -1)
+            elif z < 0.5:  # expand to left
+                if x > 0:
+                    expand_vert(entry, x, y, -1)
+            elif z < 0.75:  # expand to bottom
+                if y < HEIGHT-1:
+                    expand_hztl(entry, x, y, 1)
+            else:  # expand to right
+                if x < WIDTH-1:
+                    expand_vert(entry, x, y, 1)
+        return entry
 
+    sub_arr, args = entries
+    for idx in range(sub_arr.shape[0]):
+        sub_arr[idx,:,:] = mutation_entry(sub_arr[idx,:,:], args)
+    return sub_arr
 
-def get_right_bound(clust, y, x):
-    val = clust[y, x]
-    while x+1 < clust.shape[1] and val == clust[y, x+1]:
-        x += 1
-    return (y, x)
+def get_fitnesses(entries):
+    def get_fitness(entry, args):
+        HEIGHT, WIDTH, MIN_PIECES, MAX_SIZE, (data, data_inv) = args
 
+        def get_fitness_per_cluster(cluster):
+            fit = 0
+            size = cluster[0]
+            a = 0
+            b = 0
+            if size <= MAX_SIZE and cluster[1] >= MIN_PIECES:
+                a = size
+            size_diff = MAX_SIZE-size
+            if size_diff < 0:
+                b = MAX_SIZE/8+size_diff
+            elif size_diff > 0:
+                b = np.power(3, -size_diff)*MAX_SIZE/8
+            else:
+                b = a+1
+            return [a, b]  # a = clean score ; b = fitness score
 
-def get_top_bound(clust, y, x):
-    val = clust[y, x]
-    while y > 0 and val == clust[y-1, x]:
-        y -= 1
-    return (y, x)
+        mname, mcount = np.unique(entry*data, return_counts=True)
+        mdict = dict(zip(mname, mcount))
+        tname, tcount = np.unique(entry*data_inv, return_counts=True)
+        tdict = dict(zip(tname, tcount))
+        c = np.array([[mdict.get(key) or 0, tdict.get(key) or 0] for key in (mdict.keys() | tdict.keys()) if key != 0])
+        c = np.vstack((np.sum(c, axis=1), np.min(c, axis=1))).T
+        return sum(np.apply_along_axis(get_fitness_per_cluster, 1, c))
 
+    sub_arr, args = entries
+    fitnesses = np.zeros((sub_arr.shape[0], 2))
+    for idx in range(sub_arr.shape[0]):
+        x = get_fitness(sub_arr[idx,:,:], args)
+        #print(x)
+        fitnesses[idx] = get_fitness(sub_arr[idx,:,:], args)
+    return fitnesses
 
-def get_bottom_bound(clust, y, x):
-    val = clust[y, x]
-    while y+1 < clust.shape[0] and val == clust[y+1, x]:
-        y += 1
-    return (y, x)
+if __name__ == '__main__':
+    import numpy as np
+    import multiprocessing
 
+    def thread_map(func, array, data=None):
+        chunks = [(sub_arr, (HEIGHT, WIDTH, MIN_PIECES, MAX_SIZE, data)) for sub_arr in np.array_split(array, min(multiprocessing.cpu_count(), len(array)))]
+        pool = multiprocessing.Pool()
+        individual_results = pool.map(func, chunks)
+        pool.close()
+        pool.join()
+        return np.concatenate(individual_results)
 
-def set_area(clust, y1, x1, y2, x2, value):
-    for y in range(y1, y2+1):
-        for x in range(x1, x2+1):
-            clust[y, x] = value
-    return clust
+    data = [line for line in open(INPUT_FILE)]
+    params = list(map(int, data[0].split(" ")))
+    HEIGHT = params[0]
+    WIDTH = params[1]
+    MIN_PIECES = params[2]
+    MAX_SIZE = params[3]
+    data = [[0 if x=="T" else 1 for x in line] for line in data[1:]]
+    data = np.array(data)[:, :-1]
+    data_inv = 1-data
+    clusters = np.arange(HEIGHT*WIDTH).reshape((1, HEIGHT, WIDTH))
+    clusters = np.repeat(clusters, POPULATION, axis=0)+1
 
-
-def mutation(clust):
-    for _ in range(np.random.random_integers(MUTATION_AMOUNT)):
-        y = np.random.random_integers(params[0])-1
-        x = np.random.random_integers(params[1])-1
-        z = np.random.random()
-        if z < 0.2:
-            if y > 0:  # expand to top
-                yn = y-1
-                _, inner_left = get_left_bound(clust, y, x)
-                _, outer_left = get_left_bound(clust, yn, inner_left)
-                _, inner_right = get_right_bound(clust, y, x)
-                _, outer_right = get_right_bound(clust, yn, inner_right)
-                clust = set_area(clust, yn, outer_left, yn, outer_right, 0)
-                clust = set_area(clust, yn, inner_left, yn, inner_right, clust[y, x])
-        elif z < 0.4:
-            if x > 0:  # expand to left
-                xn = x-1
-                inner_top, _ = get_top_bound(clust, y, x)
-                outer_top, _ = get_top_bound(clust, inner_top, xn)
-                inner_bot, _ = get_bottom_bound(clust, y, x)
-                outer_bot, _ = get_bottom_bound(clust, inner_bot, xn)
-                clust = set_area(clust, outer_top, xn, outer_bot, xn, 0)
-                clust = set_area(clust, inner_top, xn, inner_bot, xn, clust[y, x])
-        elif z < 0.6:
-            if y < params[0]-1:  # expand to bottom
-                yn = y+1
-                _, inner_left = get_left_bound(clust, y, x)
-                _, outer_left = get_left_bound(clust, yn, inner_left)
-                _, inner_right = get_right_bound(clust, y, x)
-                _, outer_right = get_right_bound(clust, yn, inner_right)
-                clust = set_area(clust, yn, outer_left, yn, outer_right, 0)
-                clust = set_area(clust, yn, inner_left, yn, inner_right, clust[y, x])
-        elif z < 0.8:
-            if x < params[1]-1:  # expand to right
-                xn = x+1
-                inner_top, _ = get_top_bound(clust, y, x)
-                outer_top, _ = get_top_bound(clust, inner_top, xn)
-                inner_bot, _ = get_bottom_bound(clust, y, x)
-                outer_bot, _ = get_bottom_bound(clust, inner_bot, xn)
-                clust = set_area(clust, outer_top, xn, outer_bot, xn, 0)
-                clust = set_area(clust, inner_top, xn, inner_bot, xn, clust[y, x])
-        else:
-            pass#clust[y, x] = np.amax(clust)+1
-    return clust
-
-
-
-
-
-
-def myfunc(a, b):
-    global first, values
-    if first:
-        first = False
-        return
-    if a not in values:
-        values[a] = [0, 0]
-    values[a][b] += 1
-vfunc = np.vectorize(myfunc)
-
-# mutation
-for i in range(POPULATION):
-    if i % 20 == 0:
-        print("mutation", i)
-    clusters[i] = mutation(clusters[i])
-
-for iteration in range(ITERATIONS):
-    # calc fitness
-    fitnesses = np.zeros((POPULATION, ))
-    for i, cluster in enumerate(clusters):
-        if i % 20 == 0:
-            print("fitness", i, iteration)
-        values = {}
-        first = True
-        vfunc(cluster, data)
-        fitnesses[i] = get_fitness(values)
-    # select
-    z_exp = [np.exp(i) for i in fitnesses]
-    sum_z_exp = sum(z_exp)
-    softmax = [i / sum_z_exp for i in z_exp]
-    idx = np.random.choice(POPULATION, POPULATION, p=softmax)
-    clusters = clusters[idx, :, :]
-
-    # print best
-    max_idx = np.argmax(fitnesses)
-    print(clusters[max_idx])
-    print(iteration, max(fitnesses))
-
-    # mutation
-    for i in range(POPULATION):
-        clusters[i] = mutation(clusters[i])
-
-fitnesses = np.zeros((POPULATION, ))
-for i, cluster in enumerate(clusters):
-    values = {}
-    first = True
-    vfunc(cluster, data)
-    fitnesses[i] = get_fitness(values, clean=True)
-max_idx = np.argmax(fitnesses)
-print(clusters[max_idx])
-print(max(fitnesses))
\ No newline at end of file
+    for iteration in range(ITERATIONS):
+        # mutation
+        print(iteration, "Mutation")
+        clusters = thread_map(mutation, clusters)
+        # get fitness
+        print(iteration, "Get Fitness")
+        fitnesses = thread_map(get_fitnesses, clusters, (data, data_inv))
+        max_idx = np.argmax(fitnesses[:, 0])
+        print(clusters[max_idx])
+        print(iteration, max(fitnesses[:, 0]))
+        # selection
+        print(iteration, "Selection")
+        z_exp = [np.exp(i) for i in fitnesses[:, 1]]
+        sum_z_exp = sum(z_exp)
+        softmax = [i / sum_z_exp for i in z_exp]
+        idx = np.random.choice(POPULATION, POPULATION, p=softmax)
+        clusters = clusters[idx, :, :]