scipy 检查点是否位于n维单纯形内的最快方法

wf82jlnq  于 2023-08-05  发布在  其他
关注(0)|答案(1)|浏览(89)

我有一些非常大的数据集,分析管道的一部分是确定,如标题所述,每个点是否被n维度的单纯形约束。如果可能的话,我正在试图找到一种方法来快速计算,而无需并行化。其中一个障碍是数据集的维度不同,因此解决方案需要应用于任何维度,而不是固定在2D或3D。
然而,为了简单起见,我使用了2D的例子,因为它们很容易表示,但在理论上,数学应该成立。

重心坐标

我最初的想法是使用重心坐标,从笛卡尔坐标转换而来,就像done here一样,但我对这个方法的实现至少可以说是不可信的:

import numpy as np
import matplotlib.pyplot as plt

def is_in_simplex(point, T_inv, simplex):
    first_n = np.matmul(
        T_inv, (point - simplex[-1])
    )
    last = 1 - np.sum(first_n)
    bary = np.concatenate((first_n, [last]))
    return np.all((bary <= 1) & (bary >= 0))

# setup

simplex = np.array([[0, 0,], [8, 8,], [10, 3]])
rng = np.random.default_rng()
test_points = rng.random((10, 2))*10

# Maths starts here

T = np.array(simplex[:-1] - simplex[-1]).T
T_inv = np.linalg.inv(T)
within = np.vectorize(is_in_simplex, excluded={1, 2})(test_points, T_inv, simplex)

# drawing

polygon = np.concatenate([simplex, np.array([simplex[0]])])
print()
plt.plot(*polygon.T)
plt.scatter(*test_points.T)
for i, p in enumerate(test_points, 0):
    print(f"{i}\t{p}\t{test_points[i]}\t{within[i]}")
    plt.annotate(i, p)

字符串
其输出是:

0   [4.15391239 4.85852344] [4.15391239 4.85852344] [ True  True]
1   [5.24829898 9.22879891] [5.24829898 9.22879891] [ True False]
2   [3.31255765 0.75891285] [3.31255765 0.75891285] [ True  True]
3   [3.67468612 1.30045647] [3.67468612 1.30045647] [ True  True]
4   [9.95049042 5.932782  ] [9.95049042 5.932782  ] [False  True]
5   [8.42621723 6.35824573] [8.42621723 6.35824573] [False  True]
6   [4.19569122 3.41275362] [4.19569122 3.41275362] [ True  True]
7   [1.57324033 8.00273677] [1.57324033 8.00273677] [False False]
8   [1.9183791  0.54945207] [1.9183791  0.54945207] [ True  True]
9   [0.52448473 7.77920839] [0.52448473 7.77920839] [False  True]


x1c 0d1x的数据
第一列是索引,第二列是笛卡尔坐标,第三列 * 应该是 * 前两个重心坐标(应该假设它们加为1),第四列 * 应该 * 显示点是否位于单纯形内。
正如你可能已经注意到的,有几件事是错误的。点3、5和6应该被标记为在单形内,但是它们的重心坐标完全错误。由于它们被单形约束,重心坐标应大于0,但总和为1。is_in_simplex()的输出是一个数组,而每个点都应该是一个布尔值。
不包括RNG、打印和绘图,10个点需要0.0383秒,100个点需要0.0487秒,1,000个点需要0.0994秒,10,000个点需要0.523秒。

线性规划

另一种方法是使用线性规划,但有些东西是关闭的,因为我的时间远远大于那些reported here(第二个答案,我用它作为起点)。

import numpy as np
from scipy.optimize import linprog
import time

def vectorizable(point, simplexT, coeffs):
    b = np.r_[point, np.ones(1)]
    lp = linprog(coeffs, A_eq = simplexT, b_eq = b)
    return lp.success

dims = 2
rng = np.random.default_rng()
test_points = rng.random((10, dims))*10
simplex = np.array([[0, 0,], [8, 8,], [10, 3]])
coeffs = np.zeros(len(simplex))
simplex_T = np.r_[simplex.T,np.ones((1,len(simplex)))]

start_time = time.time()
in_simplex = np.vectorize(vectorizable,
                        excluded={1, 2},
                        signature="(n) -> ()")(test_points, simplex_T, coeffs)

print(f"----- {time.time() - start_time} seconds -----")

polygon = np.concatenate([simplex, np.array([simplex[0]])])
print()
plt.plot(*polygon.T)
plt.scatter(*test_points.T)
for i, p in enumerate(test_points, 0):
    print(f"{i}\t{p}\t{in_simplex[i]}")
    plt.annotate(i, p)


这一次,我得到了想要的结果:

----- 0.019016504287719727 seconds -----

0   [5.90479358 5.75174668] True
1   [0.51156474 0.86088186] False
2   [9.22371526 4.025967  ] True
3   [9.35307399 5.38630723] False
4   [2.83575442 5.66318545] False
5   [7.89786072 6.06068206] True
6   [0.09838826 1.38358132] False
7   [3.19776368 9.73562359] False
8   [9.9122709  0.76862067] False
9   [4.52352281 6.2259428 ] False



对于10、100和1,000个点,定时或多或少处于相同的数量级。然而,当我跳到10,000点时,我突然看到4到8秒之间的任何地方,这太慢了,只有当我增加维度时才增加到几十秒和分钟。
如前所述,我希望尽可能避免并行化。任何关于重心部分的帮助/建议都将非常感谢,特别是如果它可以工作,将比线性规划方法更快。有什么方法可以加速LP方法吗?
谢啦,谢啦

whlutmcx

whlutmcx1#

线性代数方法(我不认为这里需要LP)。使用一个矩阵乘法进行超平面half-space测试,然后进行一些max()和sign()后处理。
您可以通过在任何半空间测试之前执行直线修剪来变得更聪明,并在任何一个半空间测试失败时将矩阵乘法分区并停止。当一些重要部分的点在单纯形之外测试时,它帮助最大。在最极端的情况下,如果单纯形中不包含任何点(尝试半径=1.1),则非分区算法需要约0.6秒,而50个分区需要约0.01秒。

from time import monotonic

import numpy as np
from numpy.random import default_rng
from scipy.spatial import ConvexHull

def make_homogeneous(test_points: np.ndarray) -> np.ndarray:
    """
    Pre-process an array of (p, ndim) test points into a homogeneous
    transformation matrix of size (ndim+1, p). This only needs to be
    done once for a given point cloud.
    """
    return np.vstack((
        test_points.T,
        np.ones(shape=test_points.shape[0], dtype=test_points.dtype),
    ))

def test_hull(
    hull: ConvexHull, test_homogeneous: np.ndarray,
    n_partitions: int = 1, trim: bool = True,
) -> np.ndarray:
    """
    Vectorized test of whether each test point falls within the simplex.

    :param hull: Hull defining the simplex. Number of dimensions (i.e. hull.equations.shape[1]-1)
                 must be equal to number of dimensions in the test point cloud.
    :param test_homogeneous:
                 Test point cloud, in homogeneous transformation matrix format (from
                 make_homoegeneous()).
    :param n_partitions:
                 Number of inner product partitions. If the number of points falling inside of the
                 simplex is high, partitioning will not help and this should be left as 1 (non-
                 partitioned). If the number of points falling inside of the simplex is low, set
                 this on the order of ~ 1% of the number of hull faces.
    :param trim: Whether to perform a rectilinear trim before any dot products.
    :return: A boolean array with length as the number of test points: true for "inside", false for
             "outside". Values exactly on the simplex boundary are treated as "true" (inside the
             simplex) due to `<= 0` below.
    """

    # m: number of hull faces (to be partitioned)
    # n: 1 + ndim
    # p: number of test points
    m, n = hull.equations.shape
    n, p = test_homogeneous.shape
    partition_size = m // n_partitions

    if trim:
        extents0 = hull.points.min(axis=0)[:, np.newaxis]
        extents1 = hull.points.max(axis=0)[:, np.newaxis]
        inside = (
            (test_homogeneous[:3, :] >= extents0).all(axis=0) &
            (test_homogeneous[:3, :] <= extents1).all(axis=0)
        )
        test_subset = test_homogeneous[:, inside]
        # print(f'Trimmed to {np.count_nonzero(inside)}/{p} points')
    else:
        inside = np.ones(shape=p, dtype=bool)
        test_subset = test_homogeneous

    for i in range(0, m, partition_size):
        hull_subset = hull.equations[i: i + partition_size, :]
        product = hull_subset @ test_subset

        inside_subset = product.max(axis=0) <= 0
        # inside_subset = (product < 0).all(axis=0)  # Equivalent, marginally slower?

        inside[inside] = inside_subset
        if not inside_subset.any():
            break

        test_subset = test_subset[:, inside_subset]

    return inside

def cube_test() -> None:
    """
    Unit test for a cube-shaped hull (2 hull facets per cube side, for 12 facets total)
    """
    hull = ConvexHull([
        [-1, -1, -1],
        [ 1, -1, -1],
        [-1,  1, -1],
        [ 1,  1, -1],
        [-1, -1,  1],
        [ 1, -1,  1],
        [-1,  1,  1],
        [ 1,  1,  1],
    ])
    in_points = np.array([[ 0. ,  0. ,  0. ],
                          [ 0.9,  0. ,  0.2],
                          [-0.5, -0.2, -0.3],
                          [-0.9,  0.4,  0.6],
                          [ 0.1,  0.1,  0.1]])
    bound_points = np.array([[ 1. ,  1. ,  1. ],
                             [-1. , -1. , -1. ],
                             [ 0.5,  0. ,  1. ],
                             [ 1. ,  1. ,  0. ],
                             [ 0. ,  0. ,  1. ]])
    out_points = np.array([[ 2. ,  0. ,  0. ],
                           [ 1. ,  1. ,  1.5],
                           [ 0. ,  0. , -2. ],
                           [ 0. ,  1.1,  0. ],
                           [-1.1,  0. ,  1.2]])
    assert np.all(test_hull(hull, make_homogeneous(in_points)))
    assert np.all(test_hull(hull, make_homogeneous(bound_points)))
    assert not np.any(test_hull(hull, make_homogeneous(out_points)))

    assert np.all(test_hull(hull, make_homogeneous(in_points), n_partitions=4))
    assert np.all(test_hull(hull, make_homogeneous(bound_points), n_partitions=4))
    assert not np.any(test_hull(hull, make_homogeneous(out_points), n_partitions=4))

def random_hemisphere(
    rand, n: int, radius: float = 1,
    centre: tuple[float, float, float] = (0, 0, 0),
    theta_limit=np.pi/2,
) -> np.ndarray:
    """
    Generate a 3D hemisphere with randomly-distributed vertices. The "cut" face of the hemisphere is
    not guaranteed to be exact when processed as a convex hull.
    """
    phi = rand.uniform(low=0, high=2*np.pi, size=n)
    theta = rand.uniform(low=0, high=theta_limit, size=n)
    cx = np.sin(theta)*np.cos(phi)
    cy = np.sin(theta)*np.sin(phi)
    cz = np.cos(theta)
    return radius*np.stack((cx, cy, cz), axis=1) + centre

def hemisphere_test() -> None:
    """
    Unit test for a hemisphere-shaped ("bowl") convex hull.
    """
    rand = default_rng(seed=0)

    centre = 5, 10, 12  # not the barycentre: only the centre of the "cut" face

    hull = ConvexHull(random_hemisphere(rand, n=100, radius=2, centre=centre))

    test_in = make_homogeneous(random_hemisphere(rand, n=150, radius=1.5, centre=centre, theta_limit=np.pi*0.4))
    indicators = test_hull(hull, test_in)
    assert np.all(indicators)

    test_close = make_homogeneous(random_hemisphere(rand, n=150, radius=1.975, centre=centre, theta_limit=np.pi*0.45))
    indicators = test_hull(hull, test_close)
    mean = indicators.mean()
    assert 0.48 <= mean <= 0.52  # 0.5067 for this seed

    test_out = make_homogeneous(random_hemisphere(rand, n=150, radius=2.1, centre=centre))
    indicators = test_hull(hull, test_out)
    assert not np.any(indicators)

def time_test() -> None:
    """
    Output timings for non-partitioned and partitioned configurations of a hemisphere hull test
    """
    rand = default_rng(seed=0)
    n = 10_000
    hull_pts = random_hemisphere(rand, n=n)
    test_pts = random_hemisphere(rand, n=n, radius=0.9999)

    t0 = monotonic()
    homogeneous = make_homogeneous(test_pts)
    t1 = monotonic()
    hull = ConvexHull(hull_pts)
    t2 = monotonic()
    print('n =', n)
    print(f'make_homogeneous: {t1-t0:.4f} s')
    print(f'ConvexHull:       {t2-t1:.4f} s')

    t3 = monotonic()
    indicators = test_hull(hull, homogeneous)
    t4 = monotonic()
    print(f'test_hull(part=   1): {t4-t3:.4f} s, {np.count_nonzero(indicators)} inside')

    for part in (5, 10, 20, 50, 100, 200, 500, 1_000):
        t5 = monotonic()
        indicators = test_hull(hull, homogeneous, n_partitions=part)
        t6 = monotonic()
        print(f'test_hull(part={part:4}): {t6-t5:.4f} s, {np.count_nonzero(indicators)} inside')

if __name__ == '__main__':
    cube_test()
    hemisphere_test()
    time_test()

个字符

相关问题