numpy python -使用数组而不是itertools组合创建组合数组

我写了一个python脚本，它需要一个函数来生成一个随机的数字组合，根据要使用的最小值和最大值以及应该用于生成组合的数字的数量来平均输入的平均值。
我被这个函数检查了，因为这个函数必须在分析中每个月循环，然后在几千次模拟中循环。
我需要能够运行大量的模拟，并寻找一种方法，可以修改它以利用数组并内置numpy算术而不是for循环。
这是我目前编写的函数：

def deal_months(ccc, num_deals, min, max):
    if len(ccc) != len(num_deals):
        raise Exception("ccc is not the same length as num_deals")
    integers = list(range(min, max + 1))
    combinations = [list(combinations_with_replacement(integers, 1)),
                    list(combinations_with_replacement(integers, 2)),
                    list(combinations_with_replacement(integers, 3)),
                    list(combinations_with_replacement(integers, 4))]
    final_combos = []
    for ccc, num_deals in zip(ccc, num_deals):
        if num_deals == 0: 
            final_combos += [0]
            continue
        matching_combos = [combo for combo in combinations[num_deals - 1] if sum(combo) / num_deals == ccc]
        if not matching_combos: 
            final_combos += [0]
            continue
        rand = random.randint(0, len(matching_combos) - 1)
        final_combos += [matching_combos[rand]]
    return final_combos

这是用于预测业务模型中的收入，ccc和num_deals应该是一个值的列表或数组，其长度等于分析中的总月数。
在模型中，ccc是随机正态分布，平均值为8，标准差为3，四舍五入，num_deals在0-4之间，最小值为0，最大值为36。

在不使用任何numpy花哨的向量化计算的情况下，你已经可以通过预计算和记忆所有内容来大大减少时间（假设你对大量样本N重复这个过程）：

from itertools import combinations_with_replacement
import random
import timeit

N = 10000

ccc = [round(random.normalvariate(8,3)) for _ in range(N)]
num_deals = [random.randint(0,4) for _ in range(N)]

# first benchmark your solution:
def deal_months(ccc, num_deals, min=0, max=36):
    if len(ccc) != len(num_deals):
        raise Exception("ccc is not the same length as num_deals")
    integers = list(range(min, max + 1))
    combinations = [list(combinations_with_replacement(integers, 1)),
                    list(combinations_with_replacement(integers, 2)),
                    list(combinations_with_replacement(integers, 3)),
                    list(combinations_with_replacement(integers, 4))]
    final_combos = []
    for ccc, num_deals in zip(ccc, num_deals):
        if num_deals == 0: 
            final_combos += [0]
            continue
        matching_combos = [combo for combo in combinations[num_deals - 1] if sum(combo) == ccc * num_deals]
        if not matching_combos: 
            final_combos += [0]
            continue
        rand = random.randint(0, len(matching_combos) - 1)
        final_combos += [matching_combos[rand]]
    return final_combos

%timeit deal_months(ccc, num_deals)
# 10.2 s ± 71.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


from collections import defaultdict

# now compare with mine:
def deal_months_2(ccc, num_deals, min=0, max=36):
    if len(ccc) != len(num_deals):
        raise Exception("ccc is not the same length as num_deals")
    integers = list(range(min, max + 1))
    all_matching_combos = defaultdict(list)
    for n in range(1,5):
        for c in combinations_with_replacement(integers, n):
            mean, rest = divmod(sum(c), n)
            if rest:
                continue
            all_matching_combos[(mean, n)].append(c)
    final_combos = []
    for c, n in zip(ccc, num_deals):
        matching_combos = all_matching_combos[(c, n)]
        if not matching_combos: 
            final_combos.append(0)
            continue
        final_combos.append(random.choice(matching_combos))
    return final_combos
    
%timeit deal_months_2(ccc, num_deals)
# 14.4 ms ± 576 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)

请注意，大部分时间是预先计算，一旦完成，即使是大量的采样也变得微不足道：

def get_all_matching_combos(min=0, max=36):
    integers = list(range(min, max + 1))
    all_matching_combos = defaultdict(list)
    for n in range(1,5):
        for c in combinations_with_replacement(integers, n):
            mean, rest = divmod(sum(c), n)
            if rest:
                continue
            all_matching_combos[(mean, n)].append(c)
    return all_matching_combos

%timeit get_all_matching_combos()
# 11.9 ms ± 12.5 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)

def sample(ccc, num_deals, all_matching_combos=get_all_matching_combos()):
    if len(ccc) != len(num_deals):
        raise Exception("ccc is not the same length as num_deals")
    final_combos = []
    for c, n in zip(ccc, num_deals):
        matching_combos = all_matching_combos[(c, n)]
        if not matching_combos: 
            final_combos.append(0)
            continue
        final_combos.append(random.choice(matching_combos))
    return final_combos

N = 10000
ccc = [round(random.normalvariate(8,3)) for _ in range(N)]
num_deals = [random.randint(0,4) for _ in range(N)]
%timeit sample(ccc, num_deals)
# 2.53 ms ± 7.63 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)

你也可以通过设置随机种子来确保相同的结果，让自己相信我的魔法等同于你的原始代码：

def deal_months(ccc, num_deals, min=0, max=36):
    if len(ccc) != len(num_deals):
        raise Exception("ccc is not the same length as num_deals")
    integers = list(range(min, max + 1))
    combinations = [list(combinations_with_replacement(integers, 1)),
                    list(combinations_with_replacement(integers, 2)),
                    list(combinations_with_replacement(integers, 3)),
                    list(combinations_with_replacement(integers, 4))]
    final_combos = []
    random.seed(0) # <<< ensure same sampling
    for ccc, num_deals in zip(ccc, num_deals):
        if num_deals == 0: 
            final_combos += [0]
            continue
        matching_combos = [combo for combo in combinations[num_deals - 1] if sum(combo) == ccc * num_deals]
        if not matching_combos: 
            final_combos += [0]
            continue
        rand = random.randint(0, len(matching_combos) - 1)
        final_combos += [matching_combos[rand]]
    return final_combos

def deal_months_2(ccc, num_deals, min=0, max=36):
    if len(ccc) != len(num_deals):
        raise Exception("ccc is not the same length as num_deals")
    integers = list(range(min, max + 1))
    all_matching_combos = defaultdict(list)
    for n in range(1,5):
        for c in combinations_with_replacement(integers, n):
            mean, rest = divmod(sum(c), n)
            if rest:
                continue
            all_matching_combos[(mean, n)].append(c)
    final_combos = []
    random.seed(0) # <<< ensure same sampling
    for c, n in zip(ccc, num_deals):
        matching_combos = all_matching_combos[(c, n)]
        if not matching_combos: 
            final_combos.append(0)
            continue
        final_combos.append(random.choice(matching_combos))
    return final_combos

N = 1000
ccc = [round(random.normalvariate(8,3)) for _ in range(N)]
num_deals = [random.randint(0,4) for _ in range(N)]
d = deal_months(ccc, num_deals)
d2 = deal_months_2(ccc, num_deals)
assert d == d2

numpy python -使用数组而不是itertools组合创建组合数组

1条答案

相关问题

热门标签

最新问答