Faster multi-process handling and clean up

2018-02-21 14:01:22 +01:00
parent db0becbbe7
commit fd23752f17
1 changed files with 150 additions and 160 deletions
--- a/main.py
+++ b/main.py
@@ -1,178 +1,168 @@
 import numpy as np
 import threading
 import queue
 import time
 INPUT_FILE = "data/medium.in"
 POPULATION = 50
-MUTATION_AMOUNT = 2000
+MUTATION_AMOUNT = 200#1000000
-ITERATIONS = 30
+ITERATIONS = 100
 THREAD_COUNT = 20
-data = [line for line in open(INPUT_FILE)]
+def mutation(entries):
 params = list(map(int, data[0].split(" ")))
 data = [[0 if x=="T" else 1 for x in line] for line in data[1:]]
 data = np.array(data)[:, :-1]
 clusters = np.arange(params[0]*params[1]).reshape((1, params[0], params[1]))
 clusters = np.repeat(clusters, POPULATION, axis=0)+1
 print(params)
 print(data)
 print(clusters[0])
-values = {}
+    def mutation_entry(entry, args):
-first = True
+        HEIGHT, WIDTH, MIN_PIECES, MAX_SIZE, _ = args
-def get_fitness(vals, clean=False):
+        def expand_hztl(entry, x, y, direction):
-    fit = 0
+            val = entry[y, x]
-    for key, val in vals.items():
+            ynew = y+direction
-        if key == 0:
+            oval = entry[ynew, x]
-            continue
+            # left
-        size = sum(val)
+            nx = x
-        if size <= params[3] and min(val) >= params[2]:
+            while nx > 0 and val == entry[y, nx-1]:
-            fit += size
+                entry[ynew, nx] = val
-        if clean:
+                nx -= 1
-            continue
+            nval = entry[ynew, nx] if nx != x else oval
-        size_diff = params[3]-size
+            entry[ynew, nx] = val
-        if size_diff < 0:
+            if nval != 0:
-            fit += 1-size_diff**2
+                while nx > 0 and nval == entry[ynew, nx-1]:
-        elif size_diff > 0:
+                    nx -= 1
-            fit += np.exp(-abs(size-params[3]))
+                    entry[ynew, nx] = 0
-    return fit
+            # right
            nx = x
            while nx < WIDTH-1 and val == entry[y, nx+1]:
                entry[ynew, nx] = val
                nx += 1
            nval = entry[ynew, nx] if nx != x else oval
            entry[ynew, nx] = val
            if nval != 0:
                while nx < WIDTH-1 and nval == entry[ynew, nx+1]:
                    nx += 1
                    entry[ynew, nx] = 0
        def expand_vert(entry, x, y, direction):
            val = entry[y, x]
            xnew = x+direction
            oval = entry[y, xnew]
            # left
            ny = y
            while ny > 0 and val == entry[ny-1, x]:
                entry[ny, xnew] = val
                ny -= 1
            nval = entry[ny, xnew] if ny != y else oval
            entry[ny, xnew] = val
            if nval != 0:
                while ny > 0 and nval == entry[ny-1, xnew]:
                    ny -= 1
                    entry[ny, xnew] = 0
            # right
            ny = y
            while ny < HEIGHT-1 and val == entry[ny+1, x]:
                entry[ny, xnew] = val
                ny += 1
            nval = entry[ny, xnew] if ny != y else oval
            entry[ny, xnew] = val
            if nval != 0:
                while ny < HEIGHT-1 and nval == entry[ny+1, xnew]:
                    ny += 1
                    entry[ny, xnew] = 0
-def get_left_bound(clust, y, x):
+        for _ in range(np.random.random_integers(MUTATION_AMOUNT)):
-    val = clust[y, x]
+            y = np.random.random_integers(HEIGHT)-1
-    while x > 0 and val == clust[y, x-1]:
+            x = np.random.random_integers(WIDTH)-1
-        x -= 1
+            if entry[y, x] == 0:  # create new cluser
-    return (y, x)
+                entry[y, x] = np.amax(entry)+1
            z = np.random.random()
            if z < 0.25:  # expand to top
                if y > 0:
                    expand_hztl(entry, x, y, -1)
            elif z < 0.5:  # expand to left
                if x > 0:
                    expand_vert(entry, x, y, -1)
            elif z < 0.75:  # expand to bottom
                if y < HEIGHT-1:
                    expand_hztl(entry, x, y, 1)
            else:  # expand to right
                if x < WIDTH-1:
                    expand_vert(entry, x, y, 1)
        return entry
    sub_arr, args = entries
    for idx in range(sub_arr.shape[0]):
        sub_arr[idx,:,:] = mutation_entry(sub_arr[idx,:,:], args)
    return sub_arr
-def get_right_bound(clust, y, x):
+def get_fitnesses(entries):
-    val = clust[y, x]
+    def get_fitness(entry, args):
-    while x+1 < clust.shape[1] and val == clust[y, x+1]:
+        HEIGHT, WIDTH, MIN_PIECES, MAX_SIZE, (data, data_inv) = args
        x += 1
    return (y, x)
        def get_fitness_per_cluster(cluster):
            fit = 0
            size = cluster[0]
            a = 0
            b = 0
            if size <= MAX_SIZE and cluster[1] >= MIN_PIECES:
                a = size
            size_diff = MAX_SIZE-size
            if size_diff < 0:
                b = MAX_SIZE/8+size_diff
            elif size_diff > 0:
                b = np.power(3, -size_diff)*MAX_SIZE/8
            else:
                b = a+1
            return [a, b]  # a = clean score ; b = fitness score
-def get_top_bound(clust, y, x):
+        mname, mcount = np.unique(entry*data, return_counts=True)
-    val = clust[y, x]
+        mdict = dict(zip(mname, mcount))
-    while y > 0 and val == clust[y-1, x]:
+        tname, tcount = np.unique(entry*data_inv, return_counts=True)
-        y -= 1
+        tdict = dict(zip(tname, tcount))
-    return (y, x)
+        c = np.array([[mdict.get(key) or 0, tdict.get(key) or 0] for key in (mdict.keys() | tdict.keys()) if key != 0])
        c = np.vstack((np.sum(c, axis=1), np.min(c, axis=1))).T
        return sum(np.apply_along_axis(get_fitness_per_cluster, 1, c))
    sub_arr, args = entries
    fitnesses = np.zeros((sub_arr.shape[0], 2))
    for idx in range(sub_arr.shape[0]):
        x = get_fitness(sub_arr[idx,:,:], args)
        #print(x)
        fitnesses[idx] = get_fitness(sub_arr[idx,:,:], args)
    return fitnesses
-def get_bottom_bound(clust, y, x):
+if __name__ == '__main__':
-    val = clust[y, x]
+    import numpy as np
-    while y+1 < clust.shape[0] and val == clust[y+1, x]:
+    import multiprocessing
        y += 1
    return (y, x)
    def thread_map(func, array, data=None):
        chunks = [(sub_arr, (HEIGHT, WIDTH, MIN_PIECES, MAX_SIZE, data)) for sub_arr in np.array_split(array, min(multiprocessing.cpu_count(), len(array)))]
        pool = multiprocessing.Pool()
        individual_results = pool.map(func, chunks)
        pool.close()
        pool.join()
        return np.concatenate(individual_results)
-def set_area(clust, y1, x1, y2, x2, value):
+    data = [line for line in open(INPUT_FILE)]
-    for y in range(y1, y2+1):
+    params = list(map(int, data[0].split(" ")))
-        for x in range(x1, x2+1):
+    HEIGHT = params[0]
-            clust[y, x] = value
+    WIDTH = params[1]
-    return clust
+    MIN_PIECES = params[2]
    MAX_SIZE = params[3]
    data = [[0 if x=="T" else 1 for x in line] for line in data[1:]]
    data = np.array(data)[:, :-1]
    data_inv = 1-data
    clusters = np.arange(HEIGHT*WIDTH).reshape((1, HEIGHT, WIDTH))
    clusters = np.repeat(clusters, POPULATION, axis=0)+1
-
+    for iteration in range(ITERATIONS):
-def mutation(clust):
+        # mutation
-    for _ in range(np.random.random_integers(MUTATION_AMOUNT)):
+        print(iteration, "Mutation")
-        y = np.random.random_integers(params[0])-1
+        clusters = thread_map(mutation, clusters)
-        x = np.random.random_integers(params[1])-1
+        # get fitness
-        z = np.random.random()
+        print(iteration, "Get Fitness")
-        if z < 0.2:
+        fitnesses = thread_map(get_fitnesses, clusters, (data, data_inv))
-            if y > 0:  # expand to top
+        max_idx = np.argmax(fitnesses[:, 0])
-                yn = y-1
+        print(clusters[max_idx])
-                _, inner_left = get_left_bound(clust, y, x)
+        print(iteration, max(fitnesses[:, 0]))
-                _, outer_left = get_left_bound(clust, yn, inner_left)
+        # selection
-                _, inner_right = get_right_bound(clust, y, x)
+        print(iteration, "Selection")
-                _, outer_right = get_right_bound(clust, yn, inner_right)
+        z_exp = [np.exp(i) for i in fitnesses[:, 1]]
-                clust = set_area(clust, yn, outer_left, yn, outer_right, 0)
+        sum_z_exp = sum(z_exp)
-                clust = set_area(clust, yn, inner_left, yn, inner_right, clust[y, x])
+        softmax = [i / sum_z_exp for i in z_exp]
-        elif z < 0.4:
+        idx = np.random.choice(POPULATION, POPULATION, p=softmax)
-            if x > 0:  # expand to left
+        clusters = clusters[idx, :, :]
                xn = x-1
                inner_top, _ = get_top_bound(clust, y, x)
                outer_top, _ = get_top_bound(clust, inner_top, xn)
                inner_bot, _ = get_bottom_bound(clust, y, x)
                outer_bot, _ = get_bottom_bound(clust, inner_bot, xn)
                clust = set_area(clust, outer_top, xn, outer_bot, xn, 0)
                clust = set_area(clust, inner_top, xn, inner_bot, xn, clust[y, x])
        elif z < 0.6:
            if y < params[0]-1:  # expand to bottom
                yn = y+1
                _, inner_left = get_left_bound(clust, y, x)
                _, outer_left = get_left_bound(clust, yn, inner_left)
                _, inner_right = get_right_bound(clust, y, x)
                _, outer_right = get_right_bound(clust, yn, inner_right)
                clust = set_area(clust, yn, outer_left, yn, outer_right, 0)
                clust = set_area(clust, yn, inner_left, yn, inner_right, clust[y, x])
        elif z < 0.8:
            if x < params[1]-1:  # expand to right
                xn = x+1
                inner_top, _ = get_top_bound(clust, y, x)
                outer_top, _ = get_top_bound(clust, inner_top, xn)
                inner_bot, _ = get_bottom_bound(clust, y, x)
                outer_bot, _ = get_bottom_bound(clust, inner_bot, xn)
                clust = set_area(clust, outer_top, xn, outer_bot, xn, 0)
                clust = set_area(clust, inner_top, xn, inner_bot, xn, clust[y, x])
        else:
            pass#clust[y, x] = np.amax(clust)+1
    return clust
 def myfunc(a, b):
    global first, values
    if first:
        first = False
        return
    if a not in values:
        values[a] = [0, 0]
    values[a][b] += 1
 vfunc = np.vectorize(myfunc)
 # mutation
 for i in range(POPULATION):
    if i % 20 == 0:
        print("mutation", i)
    clusters[i] = mutation(clusters[i])
 for iteration in range(ITERATIONS):
    # calc fitness
    fitnesses = np.zeros((POPULATION, ))
    for i, cluster in enumerate(clusters):
        if i % 20 == 0:
            print("fitness", i, iteration)
        values = {}
        first = True
        vfunc(cluster, data)
        fitnesses[i] = get_fitness(values)
    # select
    z_exp = [np.exp(i) for i in fitnesses]
    sum_z_exp = sum(z_exp)
    softmax = [i / sum_z_exp for i in z_exp]
    idx = np.random.choice(POPULATION, POPULATION, p=softmax)
    clusters = clusters[idx, :, :]
    # print best
    max_idx = np.argmax(fitnesses)
    print(clusters[max_idx])
    print(iteration, max(fitnesses))
    # mutation
    for i in range(POPULATION):
        clusters[i] = mutation(clusters[i])
 fitnesses = np.zeros((POPULATION, ))
 for i, cluster in enumerate(clusters):
    values = {}
    first = True
    vfunc(cluster, data)
    fitnesses[i] = get_fitness(values, clean=True)
 max_idx = np.argmax(fitnesses)
 print(clusters[max_idx])
 print(max(fitnesses))