我正在运行下面的代码片段(std::time对象只是用于基准测试),它以给定的顺序从u8
的向量中获取u8
元素,并以此顺序创建一个包含这些对象的新向量。
for idx in cur_prefix_ref.iter() {
let now = std::time::Instant::now();
let elapsed_first = now.elapsed();
unsafe {
val = *data.get_unchecked(*idx as usize).get_unchecked(j);
}
let elapsed_second = now.elapsed();
new_add.push(val);
if val == 0 {
zero_tot += 1;
} else if val == 1 {
one_tot += 1;
}
if (ct == 0) || (ct_extra == fm_gap) {
occ_positions[0].push(zero_tot);
occ_positions[1].push(one_tot);
ct_extra = 0;
}
ct += 1;
ct_extra += 1;
let elapsed_third = now.elapsed();
elapse1.push(elapsed_first);
elapse2.push(elapsed_second);
elapse3.push(elapsed_third);
}
在我的完整代码中,这个内部循环最终运行了数亿次,所以我试图尽可能地优化它。根据be基准测试,我似乎花费了绝大部分的循环时间来查找Vec<Vec<u8>>
中的值,在val = *data.get_unchecked(*idx as usize).get_unchecked(j);
行上。参见下面,它对来自该循环的不同迭代的一些elapsed_first,elapsed_second,elapsed_third
次进行基准测试(每个列表的第i^个元素来自同一次运行):
First: [27ns, 23ns, 21ns, 24ns, 27ns, 23ns, 28ns, 23ns, 26ns, 23ns, 21ns, 22ns, 27ns, 27ns, 28ns, 23ns, 25ns, 24ns, 26ns, 25ns, 22ns, 24ns, 24ns, 28ns, 28ns, 28ns, 26ns, 22ns, 22ns, 21ns]
Second: [538ns, 695ns, 550ns, 486ns, 627ns, 615ns, 562ns, 570ns, 661ns, 521ns, 617ns, 358ns, 444ns, 560ns, 540ns, 471ns, 656ns, 336ns, 233ns, 209ns, 433ns, 373ns, 1.427µs, 542ns, 708ns, 288ns, 304ns, 608ns, 297ns, 252ns]
Third: [612ns, 736ns, 587ns, 525ns, 665ns, 658ns, 608ns, 614ns, 701ns, 560ns, 656ns, 395ns, 482ns, 606ns, 578ns, 510ns, 696ns, 374ns, 270ns, 246ns, 470ns, 416ns, 1.47µs, 583ns, 751ns, 327ns, 348ns, 645ns, 334ns, 289ns]
我一直在试图理解为什么这个简单的向量查找是迄今为止花费最多时间的比特相比,其他一切,但仍然没有弄清楚。任何帮助是非常感谢!
编辑:这里是完整的函数,这段代码来自:
pub fn spaced_pbwt(vcf: &VCFData, pbwt_cols: &Vec<SiteRow>, fm_gap: u32) -> SpacedPbwt {
let now = std::time::Instant::now();
let data_positions: Vec<u32> = vcf.positions.clone();
let mut pbwt_positions: Vec<u32> = Vec::new();
let mut insert_positions: Vec<u32> = Vec::new();
let data: &Vec<Vec<u8>> = &vcf.vcf_data;
let mut col_set: HashSet<u32> = HashSet::new();
let mut n: usize = 0;
for col in pbwt_cols {
let pos = col.position;
col_set.insert(pos);
n += 1;
}
let m = data.len();
let n_full = data[0].len();
let n_other = n_full-n;
let mut is_pbwt_col :Vec<u8> = Vec::with_capacity(n_full+1);
let mut pbwt_positions: Vec<u32> = Vec::new();
let mut inserted_positions: Vec<u32> = Vec::new();
let mut prefixes : Vec<Vec<u32>> = Vec::with_capacity(n+1);
let mut divergences : Vec<Vec<u32>> = Vec::with_capacity(n+1);
let mut binaries: Vec<Vec<u8>> = Vec::with_capacity(n_full+1);
let cur_prefix : Vec<u32> = Vec::from_iter(0..m as u32);
let cur_divergence : Vec<u32> = vec![0; m];
let mut j: usize = 0;
let mut j_pbwt = 0;
let mut count_vec: Vec<u32> = Vec::new();
let mut occ_vec : Vec<Vec<Vec<u32>>> = Vec::new();
prefixes.push(cur_prefix);
divergences.push(cur_divergence);
let mut cur_prefix_ref: &Vec<u32> = &(prefixes[prefixes.len()-1]);
let mut cur_divergence_ref: &Vec<u32> = &divergences[divergences.len()-1];
let mut ct: i32 = 0;
let mut ct_extra: u32 = 0;
let mut zero_tot: u32 = 0;
let mut one_tot: u32 = 0;
let mut occ_positions: Vec<Vec<u32>> = vec![Vec::new(),Vec::new()];
let mut new_add: Vec<u8> = Vec::with_capacity(m);
let mut a: Vec<u32> = Vec::with_capacity(m);
let mut b: Vec<u32> = Vec::with_capacity(m);
let mut d: Vec<u32> = Vec::with_capacity(m);
let mut e: Vec<u32> = Vec::with_capacity(m);
let mut bin_values: Vec<u8> = Vec::with_capacity(m);
let mut elapse1 = Vec::new();
let mut elapse2 = Vec::new();
let mut elapse3 = Vec::new();
for col in &vcf.positions {
if !col_set.contains(&col) {
ct = 0;
ct_extra = 0;
zero_tot = 0;
one_tot = 0;
occ_positions = vec![Vec::new(),Vec::new()];
new_add = Vec::with_capacity(m);
let mut val: u8;
for idx in cur_prefix_ref.iter() {
let now = std::time::Instant::now();
let elapsed_first = now.elapsed();
unsafe {
val = *data.get_unchecked(*idx as usize).get_unchecked(j);
}
let elapsed_second = now.elapsed();
new_add.push(val);
if val == 0 {
zero_tot += 1;
} else if val == 1 {
one_tot += 1;
}
if (ct == 0) || (ct_extra == fm_gap) {
occ_positions[0].push(zero_tot);
occ_positions[1].push(one_tot);
ct_extra = 0;
}
ct += 1;
ct_extra += 1;
let elapsed_third = now.elapsed();
elapse1.push(elapsed_first);
elapse2.push(elapsed_second);
elapse3.push(elapsed_third);
}
binaries.push(new_add);
is_pbwt_col.push(0);
inserted_positions.push(*col);
count_vec.push(zero_tot);
occ_vec.push(occ_positions);
} else {
a = Vec::with_capacity(m);
b = Vec::with_capacity(m);
d = Vec::with_capacity(m);
e = Vec::with_capacity(m);
bin_values = Vec::with_capacity(m);
let mut p: u32 = j_pbwt+1;
let mut q: u32 = j_pbwt+1;
occ_positions = vec![Vec::new(),Vec::new()];
ct = 0;
ct_extra = 0;
zero_tot = 0;
one_tot = 0;
let mut cur_allele: u8;
for (idx,start_point) in
cur_prefix_ref.iter().zip(cur_divergence_ref.iter()) {
let idx_val = *idx;
unsafe {
cur_allele = *data.get_unchecked(idx_val as usize).get_unchecked(j);
}
bin_values.push(cur_allele);
let st = *start_point;
if st > p {
p = st;
}
if st > q {
q = st;
}
if cur_allele == 0 {
a.push(idx_val);
d.push(p);
p = 0;
zero_tot += 1;
}
if cur_allele == 1 {
b.push(idx_val);
e.push(q);
q = 0;
one_tot += 1;
}
if (ct == 0) || (ct_extra == fm_gap) {
occ_positions[0].push(zero_tot);
occ_positions[1].push(one_tot);
ct_extra = 0;
}
ct += 1;
ct_extra += 1;
}
let mut new_prefix = a;
new_prefix.append(&mut b);
let mut new_divergence = d;
new_divergence.append(&mut e);
prefixes.push(new_prefix);
divergences.push(new_divergence);
binaries.push(bin_values);
cur_prefix_ref = &(prefixes[prefixes.len()-1]);
cur_divergence_ref = &divergences[divergences.len()-1];
count_vec.push(zero_tot);
occ_vec.push(occ_positions);
is_pbwt_col.push(1);
pbwt_positions.push(*col);
}
j += 1;
}
let elapsed = now.elapsed();
println!("Calc Time: {:.4?}", elapsed);
println!("First: {:?}", &elapse1[500..530]);
println!("Second: {:?}", &elapse2[500..530]);
println!("Third: {:?}", &elapse3[500..530]);
return SpacedPbwt {
num_samples: m as u32,
num_pbwt_sites: n as u32,
num_inserted_sites: n_other as u32,
num_total_sites: n_full as u32,
pbwt_positions: pbwt_positions,
inserted_positions: inserted_positions,
all_positions: data_positions,
pbwt_col_flags: is_pbwt_col,
bin_pbwt: binaries,
count: count_vec,
occ_list: occ_vec,
fm_gap: fm_gap,
};
}
编辑编辑:
下面是该文件的修改版本,每个人都应该能够在自己的机器上运行,并且表现出我所关心的行为。它只使用rand
crate作为依赖项:
use rand::{seq::IteratorRandom, thread_rng}; // 0.6.1
use rand::distributions::{Distribution, Uniform};
use std::collections::HashSet;
pub fn spaced_pbwt(data: &Vec<Vec<u8>>, fm_gap: u32) -> () {
let now = std::time::Instant::now();
let m = data.len();
let n = data[0].len();
let half_n = n/2;
let mut rng = thread_rng();
let sample: Vec<u32> = (0u32..n as u32).collect();
let perm = sample.iter().choose_multiple(&mut rng, half_n);
let mut cols_to_permute: Vec<u32> = Vec::new();
for i in perm {
cols_to_permute.push(*i);
}
let mut col_set: HashSet<u32> = HashSet::new();
let mut n: usize = 0;
for col in &cols_to_permute {
col_set.insert(*col);
n += 1;
}
let m = data.len();
let n_full = data[0].len();
let n_other = n_full-n;
let mut is_pbwt_col :Vec<u8> = Vec::with_capacity(n_full+1);
let mut pbwt_positions: Vec<u32> = Vec::new();
let mut inserted_positions: Vec<u32> = Vec::new();
let mut prefixes : Vec<Vec<u32>> = Vec::with_capacity(n+1);
let mut divergences : Vec<Vec<u32>> = Vec::with_capacity(n+1);
let mut binaries: Vec<Vec<u8>> = Vec::with_capacity(n_full+1);
let cur_prefix : Vec<u32> = Vec::from_iter(0..m as u32);
let cur_divergence : Vec<u32> = vec![0; m];
let mut j: usize = 0;
let mut j_pbwt = 0;
let mut count_vec: Vec<u32> = Vec::new();
let mut occ_vec : Vec<Vec<Vec<u32>>> = Vec::new();
prefixes.push(cur_prefix);
divergences.push(cur_divergence);
let mut cur_prefix_ref: &Vec<u32> = &(prefixes[prefixes.len()-1]);
let mut cur_divergence_ref: &Vec<u32> = &divergences[divergences.len()-1];
let mut ct: i32 = 0;
let mut ct_extra: u32 = 0;
let mut zero_tot: u32 = 0;
let mut one_tot: u32 = 0;
let mut occ_positions: Vec<Vec<u32>> = vec![Vec::new(),Vec::new()];
let mut new_add: Vec<u8> = Vec::with_capacity(m);
let mut a: Vec<u32> = Vec::with_capacity(m);
let mut b: Vec<u32> = Vec::with_capacity(m);
let mut d: Vec<u32> = Vec::with_capacity(m);
let mut e: Vec<u32> = Vec::with_capacity(m);
let mut bin_values: Vec<u8> = Vec::with_capacity(m);
let mut elapse1 = Vec::new();
let mut elapse2 = Vec::new();
let mut elapse3 = Vec::new();
for col in 0..n {
if !col_set.contains(&(col as u32)) {
ct = 0;
ct_extra = 0;
zero_tot = 0;
one_tot = 0;
occ_positions = vec![Vec::new(),Vec::new()];
new_add = Vec::with_capacity(m);
let mut val: u8;
for idx in cur_prefix_ref.iter() {
let now = std::time::Instant::now();
let elapsed_first = now.elapsed();
unsafe {
val = *data.get_unchecked(*idx as usize).get_unchecked(j);
}
let elapsed_second = now.elapsed();
new_add.push(val);
if val == 0 {
zero_tot += 1;
} else if val == 1 {
one_tot += 1;
}
if (ct == 0) || (ct_extra == fm_gap) {
occ_positions[0].push(zero_tot);
occ_positions[1].push(one_tot);
ct_extra = 0;
}
ct += 1;
ct_extra += 1;
let elapsed_third = now.elapsed();
elapse1.push(elapsed_first);
elapse2.push(elapsed_second);
elapse3.push(elapsed_third);
}
binaries.push(new_add);
is_pbwt_col.push(0);
inserted_positions.push(col as u32);
count_vec.push(zero_tot);
occ_vec.push(occ_positions);
} else {
a = Vec::with_capacity(m);
b = Vec::with_capacity(m);
d = Vec::with_capacity(m);
e = Vec::with_capacity(m);
bin_values = Vec::with_capacity(m);
let mut p: u32 = j_pbwt+1;
let mut q: u32 = j_pbwt+1;
occ_positions = vec![Vec::new(),Vec::new()];
ct = 0;
ct_extra = 0;
zero_tot = 0;
one_tot = 0;
let mut cur_allele: u8;
for (idx,start_point) in
cur_prefix_ref.iter().zip(cur_divergence_ref.iter()) {
let idx_val = *idx;
unsafe {
cur_allele = *data.get_unchecked(idx_val as usize).get_unchecked(j);
}
bin_values.push(cur_allele);
let st = *start_point;
if st > p {
p = st;
}
if st > q {
q = st;
}
if cur_allele == 0 {
a.push(idx_val);
d.push(p);
p = 0;
zero_tot += 1;
}
if cur_allele == 1 {
b.push(idx_val);
e.push(q);
q = 0;
one_tot += 1;
}
if (ct == 0) || (ct_extra == fm_gap) {
occ_positions[0].push(zero_tot);
occ_positions[1].push(one_tot);
ct_extra = 0;
}
ct += 1;
ct_extra += 1;
}
let mut new_prefix = a;
new_prefix.append(&mut b);
let mut new_divergence = d;
new_divergence.append(&mut e);
prefixes.push(new_prefix);
divergences.push(new_divergence);
binaries.push(bin_values);
cur_prefix_ref = &(prefixes[prefixes.len()-1]);
cur_divergence_ref = &divergences[divergences.len()-1];
count_vec.push(zero_tot);
occ_vec.push(occ_positions);
is_pbwt_col.push(1);
pbwt_positions.push(col as u32);
}
j += 1;
}
let elapsed = now.elapsed();
println!("Calc Time: {:.4?}", elapsed);
println!("First: {:?}", &elapse1[500..530]);
println!("Second: {:?}", &elapse2[500..530]);
println!("Third: {:?}", &elapse3[500..530]);
}
fn main() {
let m = 4000;
let n = 50000;
let step: Uniform<u8> = Uniform::new(0,2);
let mut rng = rand::thread_rng();
let mut data = Vec::new();
for _ in 0..m {
let choices: Vec<u8> = step.sample_iter(&mut rng).take(n).collect();
data.push(choices);
}
let fm = 2;
spaced_pbwt(&data,fm);
}
1条答案
按热度按时间agyaoht71#
当我运行您的代码(在
i7-7700HQ
上)时,我得到了这些数字这和你的结果有很大的不同比例。既然你说了,有一个C程序运行得更快,应该不是你的系统有问题。
接下来我会想到的是你需要一个
cargo clean
并重新编译整个程序。有时候(我在夜间编译器上)我遇到了一个问题,这使得重新编译的二进制文件变慢,可能是因为一些代码布局问题,编译器的东西idk。一个干净的构建通常会修复它。接下来,您可以尝试使用链接时间优化。将以下代码添加到您的
Cargo.toml
:然后使用以下命令运行配置文件
第三,使用单个数组,就像一些评论说的那样。
ndarray
板条箱非常适合这个。对我来说,它把时间降到了一个三个三个一个