numpy 这个递归函数能否转化为具有类似性能的迭代函数?

[0 0 7 7 0 0
 0 0 7 0 0 0
 0 0 0 0 0 7
 0 6 6 0 0 7
 0 0 4 4 0 0]


[0 0 1 1 0 0
 0 0 1 0 0 0
 0 0 0 0 0 3
 0 2 2 0 0 3
 0 0 2 2 0 0]


[0 0 1 1 0 0
 0 0 1 0 0 0
 0 0 0 0 0 4
 0 2 2 0 0 4
 0 0 3 3 0 0]

asked this about a year ago和一直在使用的解决方案,在接受的答案,但我正在优化我的代码的运行时,并重新审视这个问题。
对于我通常使用的数据大小,链接的解决方案需要大约1 m30 s运行。我写了下面的递归算法,它像普通的python一样运行大约需要30秒,而numba的JIT运行时间为1- 2秒(旁注,我讨厌相邻的函数,任何能让它不那么混乱同时仍然与numba兼容的提示都会受到欢迎):

def adjacent(idx, shape):
    coords = []
    if len(shape) > 2:
        if idx[0] < shape[0] - 1:
            coords.append((idx[0] + 1, idx[1], idx[2]))
        if idx[0] > 0:
            coords.append((idx[0] - 1, idx[1], idx[2]))
        if idx[1] < shape[1] - 1:
            coords.append((idx[0], idx[1] + 1, idx[2]))
        if idx[1] > 0:
            coords.append((idx[0], idx[1] - 1, idx[2]))
        if idx[2] < shape[2] - 1:
            coords.append((idx[0], idx[1], idx[2] + 1))
        if idx[2] > 0:
            coords.append((idx[0], idx[1], idx[2] - 1))
        if idx[0] < shape[0] - 1:
            coords.append((idx[0] + 1, idx[1]))
        if idx[0] > 0:
            coords.append((idx[0] - 1, idx[1]))
        if idx[1] < shape[1] - 1:
            coords.append((idx[0], idx[1] + 1))
        if idx[1] > 0:
            coords.append((idx[0], idx[1] - 1))
    return coords

def apply_label(labels, decoded_image, current_label, idx):
    labels[idx] = current_label
    for aidx in adjacent(idx, labels.shape):
        if decoded_image[aidx] == decoded_image[idx] and labels[aidx] == 0:
            apply_label(labels, decoded_image, current_label, aidx)

def label_image(decoded_image):
    labels = np.zeros_like(decoded_image, dtype=np.uint32)
    current_label = 0
    for idx in zip(*np.where(decoded_image >= 0)):
        if labels[idx] == 0:
            current_label += 1
            apply_label(labels, decoded_image, current_label, idx)
    return labels, current_label


def label_image(decoded_image):
    shape = decoded_image.shape
    labels = np.zeros_like(decoded_image, dtype=np.uint32)
    current_label = 0
    for idx in zip(*np.where(decoded_image > 0)):
        if labels[idx] == 0:
            current_label += 1
            stack = [idx]
            while stack:
                top = stack.pop()
                labels[top] = current_label
                for i in range(0, len(shape)):
                    if top[i] > 0:
                        neighbor = list(top)
                        neighbor[i] -= 1
                        neighbor = tuple(neighbor)
                        if decoded_image[neighbor] == decoded_image[idx] and labels[neighbor] == 0:
                    if top[i] < shape[i] - 1:
                        neighbor = list(top)
                        neighbor[i] += 1
                        neighbor = tuple(neighbor)
                        if decoded_image[neighbor] == decoded_image[idx] and labels[neighbor] == 0:
    return labels


def label_image_2d(decoded_image):
    w, h = decoded_image.shape
    labels = np.zeros_like(decoded_image, dtype=np.uint32)
    current_label = 0
    for idx in zip(*np.where(decoded_image > 0)):
        if labels[idx] == 0:
            current_label += 1
            stack = [idx]
            while stack:
                x, y = stack.pop()
                if decoded_image[x, y] != decoded_image[idx] or labels[x, y] != 0:
                    continue # already visited or not part of this group
                labels[x, y] = current_label
                if x > 0: stack.append((x-1, y))
                if x+1 < w: stack.append((x+1, y))
                if y > 0: stack.append((x, y-1))
                if y+1 < h: stack.append((x, y+1))
    return labels

def label_image_3d(decoded_image):
    w, h, l = decoded_image.shape
    labels = np.zeros_like(decoded_image, dtype=np.uint32)
    current_label = 0
    for idx in zip(*np.where(decoded_image > 0)):
        if labels[idx] == 0:
            current_label += 1
            stack = [idx]
            while stack:
                x, y, z = stack.pop()
                if decoded_image[x, y, z] != decoded_image[idx] or labels[x, y, z] != 0:
                    continue # already visited or not part of this group
                labels[x, y, z] = current_label
                if x > 0: stack.append((x-1, y, z))
                if x+1 < w: stack.append((x+1, y, z))
                if y > 0: stack.append((x, y-1, z))
                if y+1 < h: stack.append((x, y+1, z))
                if z > 0: stack.append((x, y, z-1))
                if z+1 < l: stack.append((x, y, z+1))
    return labels

def label_image(decoded_image):
    dim = len(decoded_image.shape)
    if dim == 2:
        return label_image_2d(decoded_image)
    assert dim == 3
    return label_image_3d(decoded_image)

还请注意,迭代解决方案不受堆栈限制:np.full((100,100,100), 1)在迭代解决方案中工作正常,但在递归解决方案中失败(如果使用numba,则会出现segfaults)。

for i in range(1, 10000):
    label_image(np.full((20,20,20), i))

迭代解决方案似乎快了好几倍(在我的机器上大约快了5倍,见下文)。你可能会优化递归解决方案,并使其达到相当的速度。通过避免临时coords列表或通过将np.where改变为> 0
我不知道numba如何优化压缩的np.where。为了进一步优化,您可以考虑(和基准测试)使用显式嵌套for x in range(0, w): for y in range(0, h):循环。

  • zip转换为continue而不是np.where的显式循环。
  • decoded_image[idx]存储在一个局部变量中(理想情况下应该没关系,但也没什么坏处)。
  • 重用堆栈。这可以防止不必要的(重新)分配和GC压力。还可以考虑为电池堆提供初始容量(分别为w*hw*h*l)。
def label_image_2d(decoded_image):
    w, h = decoded_image.shape
    labels = np.zeros_like(decoded_image, dtype=np.uint32)
    current_label = 0
    stack = []
    for sx in range(0, w):
        for sy in range(0, h):
            start = (sx, sy)
            image_label = decoded_image[start]
            if image_label <= 0 or labels[start] != 0:
            current_label += 1
            while stack:
                x, y = stack.pop()
                if decoded_image[x, y] != image_label or labels[x, y] != 0:
                    continue # already visited or not part of this group
                labels[x, y] = current_label
                if x > 0: stack.append((x-1, y))
                if x+1 < w: stack.append((x+1, y))
                if y > 0: stack.append((x, y-1))
                if y+1 < h: stack.append((x, y+1))
    return labels

def label_image_3d(decoded_image):
    w, h, l = decoded_image.shape
    labels = np.zeros_like(decoded_image, dtype=np.uint32)
    current_label = 0
    stack = []
    for sx in range(0, w):
        for sy in range(0, h):
            for sz in range(0, l):
                start = (sx, sy, sz)
                image_label = decoded_image[start]
                if image_label <= 0 or labels[start] != 0:
                current_label += 1
                while stack:
                    x, y, z = stack.pop()
                    if decoded_image[x, y, z] != image_label or labels[x, y, z] != 0:
                        continue # already visited or not part of this group
                    labels[x, y, z] = current_label
                    if x > 0: stack.append((x-1, y, z))
                    if x+1 < w: stack.append((x+1, y, z))
                    if y > 0: stack.append((x, y-1, z))
                    if y+1 < h: stack.append((x, y+1, z))
                    if z > 0: stack.append((x, y, z-1))
                    if z+1 < l: stack.append((x, y, z+1))
    return labels


import numpy as np
import timeit

import rec
import iter_old
import iter_new
import merge

shape = (100, 100, 100)
n = 20
for module in [rec, iter_old, iter_new, merge]:

    label_image = module.label_image
    # Trigger compilation of 2d & 3d functions
    label_image(np.zeros((1, 1)))
    label_image(np.zeros((1, 1, 1)))

    i = 0
    def test_full():
        global i
        i += 1
        label_image(np.full(shape, i))
    print("single group:", timeit.timeit(test_full, number=n))
    print("random (few groups):", timeit.timeit(
        lambda: label_image(np.random.randint(low = 1, high = 10, size = shape)),
    print("random (many groups):", timeit.timeit(
        lambda: label_image(np.random.randint(low = 1, high = 400, size = shape)),
    print("only groups:", timeit.timeit(
        lambda: label_image(np.arange(,


<module 'rec' from '...'>
single group: 32.39212468900041
random (few groups): 14.648884047001047
random (many groups): 13.304533919001187
only groups: 13.513677138000276
<module 'iter_old' from '...'>
single group: 10.287227957000141
random (few groups): 17.37535468200076
random (many groups): 14.506630064999626
only groups: 13.132202609998785
<module 'iter_new' from '...'>
single group: 7.388022166000155
random (few groups): 11.585243002000425
random (many groups): 9.560101995000878
only groups: 8.693653742000606
<module 'merge' from '...'>
single group: 14.657021331999204
random (few groups): 14.146574055999736
random (many groups): 13.412314713001251
only groups: 12.642367746000673


<module 'iter_old' from '...'>
single group: 3.5357716739999887
random (few groups): 4.931695729999774
random (many groups): 3.4671142009992764
only groups: 3.3023930709987326
<module 'iter_new' from '...'>
single group: 2.45903080700009
random (few groups): 2.907660342001691
random (many groups): 2.309699692999857
only groups: 2.052835552000033
<module 'merge' from '...'>
single group: 3.7620838259990705
random (few groups): 3.3524249689999124
random (many groups): 3.126650959999097
only groups: 2.9456547739991947




This video解释了算法。

def adjacent(idx, shape):
    coords = []
    if len(shape) > 2:
        if idx[0] > 0:
            coords.append((idx[0] - 1, idx[1], idx[2]))
        if idx[1] > 0:
            coords.append((idx[0], idx[1] - 1, idx[2]))
        if idx[2] > 0:
            coords.append((idx[0], idx[1], idx[2] - 1))
        if idx[0] > 0:
            coords.append((idx[0] - 1, idx[1]))
        if idx[1] > 0:
            coords.append((idx[0], idx[1] - 1))
    return coords

def merge_classes(labels, mergetable):
    for idx in np.ndindex(labels.shape):
        class_num = labels[idx]
        if class_num < len(mergetable):
            merge_target = mergetable[class_num]
            if merge_target != -1:
                labels[idx] = merge_target

def add_to_merge_table(mergetable, class1, class2):
    # identify smallest element
    lo_class = min(class1, class2)
    hi_class = max(class1, class2)
    # Does the merge table require expansion?
    while len(mergetable) <= hi_class:
        new_mergetable = np.zeros(len(mergetable) * 2, dtype=np.int32)
        new_mergetable[:] = -1
        new_mergetable[:len(mergetable)] = mergetable
        mergetable = new_mergetable
    while mergetable[lo_class] != -1:
        lo_class = mergetable[lo_class]
    mergetable[hi_class] = lo_class
    return mergetable

def label_image(decoded_image):
    labels = np.zeros_like(decoded_image, dtype=np.uint32)
    current_label = 0
    mergetable = np.zeros(8, dtype=np.int32)
    mergetable[:] = -1
    for idx in np.ndindex(labels.shape):
        decoded_image_idx = decoded_image[idx]
        labels_idx = labels[idx]
        for aidx in adjacent(idx, labels.shape):
            labels_aidx = labels[aidx]
            if labels_aidx != 0 and decoded_image[aidx] == decoded_image_idx:
                # Already have class for neighboring pixel
                if labels_idx == 0:
                    # This pixel has no class, copy neighbor
                    labels_idx = labels[idx] = labels_aidx
                elif labels_aidx != labels_idx:
                    # This pixel has a contradictory class
                    # Assign minimum and add to merge table
                    mergetable = add_to_merge_table(mergetable, labels_aidx, labels_idx)
                    labels_idx = labels[idx] = min(labels_idx, labels_aidx)
        if labels_idx == 0:
            current_label += 1
            labels[idx] = current_label
    merge_classes(labels, mergetable)
    return labels, current_label

