在R?中生成黄石序列(A098550)的快速方法

x759pob2  于 2023-02-01  发布在  其他
关注(0)|答案(2)|浏览(134)

我刚刚在YouTube上看了一段来自Numberphile的关于黄石序列的视频(A098550),它是基于一个以1和2开头的序列,后面的项是由规则生成的:
1.无重复项
1.总是选择最小的整数
1.广义耦合系数(a_n,a_(n-1))= 1
1.广义相对密度(a_n,a_(n-2))〉1
前15个任期为:1 2 3 4 9 8 15 14 5 6 25 12 35 16 7
R中的Q&D方法可能是这样的,但可以理解的是,在尝试生成更长的序列时,这种方法会变得非常慢。它还对序列中可能的最大数量做了一些假设(如信息:10,000个项目的序列永远不会高于5000)。
我们能做些什么来加快速度呢?

library(DescTools)
a <- c(1, 2, 3)
p <- length(a)

# all natural numbers
all_ints <- 1:5000

for (n in p:1000) {
    # rule 1 - remove all number that are in sequence already
    next_a_set <- all_ints[which(!all_ints %in% a)]

    # rule 3 - search the remaining set for numbers that have gcd == 1
    next_a_option <- next_a_set[which(
        sapply(
            next_a_set,
            function(x) GCD(a[n], x)
        ) == 1
    )]

    # rule 4 - search the remaining number for gcd > 1
    next_a <- next_a_option[which(
        sapply(
            next_a_option,
            function(x) GCD(a[n - 1], x)
        ) > 1
    )]

    # select the lowest
    a <- c(a, min(next_a))
    n <- n + 1
}
cedebl8k

cedebl8k1#

下面是一个比你的版本快20倍的版本,并附有对更改的评论:

# Set a to the final length from the start.
a <- c(1, 2, 3, rep(NA, 997))
p <- 3

# Define a vectorized gcd() function.  We'll be testing
# lots of gcds at once.  This uses the Euclidean algorithm.
gcd <- function(x, y) { # vectorized gcd

  while (any(y != 0)) {
    x1 <- ifelse(y == 0, x, y)
    y <- ifelse(y == 0, 0, x %% y)
    x <- x1
  }
  x
}

# Guess at a reasonably large vector to work from,
# but we'll grow it later if not big enough.
allnum <- 1:1000

# Keep a logical record of what has been used
used <- c(rep(TRUE, 3), rep(FALSE, length(allnum) - 3))

for (n in p:1000) {
  # rule 1 - remove all number that are in sequence already
  # nothing to do -- used already records that.
  
  repeat {
    # rule 3 - search the remaining set for numbers that have gcd == 1

    keep <- !used & gcd(a[n], allnum) == 1

    # rule 4 - search the remaining number for gcd > 1
    keep <- keep & gcd(a[n-1], allnum) > 1
  
    # If we found anything, break out of this loop
    if (any(keep))
      break
    
    # Otherwise, make the set of possible values twice as big,
    # and try again
    allnum <- seq_len(2*length(allnum))
    used <- c(used, rep(FALSE, length(used)))
  }
  # select the lowest
  newval <- which.max(keep)
  # Assign into the appropriate place
  a[n+1] <- newval
  # Record that it has been used
  used[newval] <- TRUE  
}

如果你分析它,你会发现它大部分时间都花在gcd()函数上,你可以用C或C++重写它,这样做会更快。

bkkx9g8r

bkkx9g8r2#

这里最大的变化是预分配和限制搜索尚未使用的号码。

library(numbers)
N <- 5e3
a <- integer(N)
a[1:3] <- 1:3
b <- logical(N) # which numbers have been used already?
b[1:3] <- TRUE
NN <- 1:N

system.time({
  for (n in 4:N) {
    a1 <- a[n - 1L]
    a2 <- a[n - 2L]
    for (k in NN[!b]) {
      if (GCD(k, a1) == 1L & GCD(k, a2) > 1L) {
        a[n] <- k
        b[k] <- TRUE
        break
      }
    }
    if (!a[n]) {
      a <- a[1:(n - 1L)]
      break
    }
  }
})
#>    user  system elapsed 
#>    1.28    0.00    1.28
length(a)
#> [1] 1137

有关快速C++算法,请参见here

相关问题