def merge_sets(sets):
for i in range(len(sets) - 1, 0, -1):
for j in range(i - 1, -1, -1):
if sets[i] & sets[j]: # check for intersection (non-empty -> True)
sets[j] |= sets[i] # merge i-th set into j-th set
sets.pop(i) # remove i-th set
break # terminate inner loop and continue with next i
return sets
tuples = [(10,22), (10,20), (10,69), (34,18), (18,17), (89,990), (86,80), (174,175), (543,542)]
sets = [set(t) for t in tuples]
merged_sets = merge_sets(sets)
merged_tuples = [tuple(s) for s in merged_sets]
# merged_tuples: [(20, 69, 22, 10), (17, 34, 18), (89, 990), (80, 86), (174, 175), (542, 543)]
def merge_sets(sets):
length = len(sets)
for j in range(length - 2, -1, -1): # loop from length-2 to 0
# mark indices from length-1 to j+1 for which the sets intersect:
merge_into_j = [i for i in range(length - 1, j, -1) if sets[j] & sets[i]]
for i in merge_into_j: # merge_into_j contains indices in RTL direction
sets[j] |= sets[i] # merge i-th set into j-th set
sets.pop(i) # remove i-th set
length -= len(merge_into_j)
return sets
from collections import defaultdict
tuples = [(10, 22), (10, 20), (10, 69), (34, 18), (18, 17),
(89, 990), (86, 80), (174, 175), (543, 542)]
values_to_index = defaultdict(int)
# This part is a single pass over the elements
for i, t in enumerate(tuples):
for v in t:
values_to_index[v] |= (1 << i)
# Merging the sets is done with integer operations
# Instead of popping elements, it seems simpler to construct
# a new list, since that will be shorter than the input,
# reducing the number of searches
merged_sets = []
for s in values_to_index.values():
# To avoid unnecessary checks for disjointness, keep track of the
# elements that merge pre-existing sets and clean up as you go
merges = []
for i in range(len(merged_sets)):
if merged_sets[i] & s:
merged_sets[i] |= s
merges.append(i)
if len(merges) == 0:
merged_sets.append(s)
elif len(merges) > 1:
k = merges[0]
for i in merges[:0:-1]:
merged_sets[k] |= merged_sets.pop(i)
# Now we have a list of integers telling us which set each tuple goes to
output = [set() for _ in range(len(merged_sets))]
for output_index, bits in enumerate(merged_sets):
while bits:
b = bits & (~bits + 1)
output[output_index].update(tuples[b.bit_length() - 1])
bits ^= b
result = [tuple(s) for s in output]
3条答案
按热度按时间zpgglvta1#
[在这个问题的最初版本中,这个版本后来被编辑过],听起来像是你想要一个 * 传递闭包 *,但实际上你还需要一个 * 对称闭包 *,这样你就可以合并
(10,22)
、(10,20)
和(10,69)
。这段代码很简单。它首先将元组转换成集合,这样我们就可以方便地检查共享元素(集合交集)。最后,我们将集合转换回元组。
函数
merge_sets
比较所有集合,每当两个集合相交时,我们就合并它们并继续。有三点值得进一步说明:
[(1, 2), (3, 4), (2, 3)]
(从左到右遍历)或[(2, 3), (3, 4), (1, 2)]
(从右到左遍历,如我的代码所示)。)因为我们合并集合的方向与循环前进的方向相同(此处:从右到左这个选择在算法上是任意的,但请参见下一个要点),我们可以简单地继续循环,而不必在每次合并后在更早的点重新启动它们。list
元素比从左边移除要快(为了从左边快速移除,我们可以使用collections.deque
)。map
)来解决的原因是,我们处理的是可变对象(我们将修改列表以及包含的项)。即使使用传统的循环,我们也需要小心地以正确的方式修改项(通过在循环前进的方向上合并它们;参见第一个要点)。改进的解决方案:
因为在“最坏”情况下,所有元组都可能是不同的,所以似乎没有办法对所有元组进行比较(或它们的对应集合),导致n*(n+1)/2次比较。然而,我们可以通过减少重复的部分比较的次数来稍微加快速度。例如,如果集合i合并到集合j中,但是在中间有另一个集合m,它与集合i相交,当到达集合m时,我们必须将集合m与新的集合j进行比较。但是由于上述循环的工作方式,我们已经比较了来自集合j和集合m的那些元素,如果它们最初来自集合i。
为了避免这样的部分重复,我们简单地将j移到外部循环(仍然从右到左遍历),然后 * 标记 * 与之相交的集合j右边的集合,只有在这之后,我们才将它们合并到集合j中。
True
。*4ktjp1zp2#
我想知道你是否可以通过Lover of Structure's answer背后的思想,移动O(n^2)部分来操作整数来获得加速。下面是一个尝试。在这个版本中,我不需要假设输入是一个集合列表:
位迭代器由https://stackoverflow.com/a/8898977/2988730提供。
hmmo2u0o3#
将“兼容”元组链接在一起,即那些共享至少一个元素的元组。每个链将是原始数据的一个连接组件。
如果结果需要按元组长度降序排序,请修改函数的return语句,如下所示
return sorted(ccs, key=len, reverse=True)