using System;
public class Program
{
public static void Main()
{
string str = "***********4123";
var strSpan = str.AsSpan();
Console.WriteLine($"{strSpan[..4]} {strSpan[4..10]} {strSpan[10..]}");
}
}
using BenchmarkDotNet.Attributes;
using Bogus;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
namespace RegexBenchmark
{
[MemoryDiagnoser(false)]
public class Benchmark
{
[Params(1000, 10_000)]
public int N = 1000;
private readonly Regex _regex = new (@"(\*{4})(\*{6})(.*)", RegexOptions.Compiled);
private string[] _inputs;
[GlobalSetup]
public void Setup()
{
var faker = new Faker();
_inputs = Enumerable.Range(0, N).Select(_ => faker.Random.ReplaceNumbers("***********####")).ToArray();
}
[Benchmark]
public string[] RegexVersionUncompiled()
{
string[] result = new string[N];
for( int i = 0; i < N; i++ ) result[i] = Regex.Replace(_inputs[i], @"(\*{4})(\*{6})(.*)", "$1 $2 $3");
return result;
}
[Benchmark]
public string[] RegexVersionCompiled()
{
string[] result = new string[N];
for (int i = 0; i < N; i++) result[i] = _regex.Replace(_inputs[i], "$1 $2 $3");
return result;
}
[Benchmark]
public string[] SpanVersion()
{
string[] result = new string[N];
for (int i = 0; i < N; i++)
{
var strSpan = _inputs[i].AsSpan();
result[i] = $"{strSpan[..4]} {strSpan[4..10]} {strSpan[10..]}";
}
return result;
}
[Benchmark]
public string[] StringInsertVersion()
{
string[] result = new string[N];
for (int i = 0; i < N; i++)
{
result[i] = _inputs[i].Insert(4, " ").Insert(11, " ");
}
return result;
}
}
}
有趣的是:当我打开GC列的显示时,Regex似乎对垃圾收集的压力较小:
| Method | N | Mean | Error | StdDev | Ratio | RatioSD | Gen0 | Gen1 | Allocated | Alloc Ratio |
|----------------------- |------ |------------:|----------:|----------:|------:|--------:|---------:|---------:|----------:|------------:|
| RegexVersionUncompiled | 1000 | 329.92 us | 6.402 us | 8.547 us | 8.55 | 0.29 | 7.3242 | 1.4648 | 62.52 KB | 1.00 |
| RegexVersionCompiled | 1000 | 244.21 us | 4.637 us | 4.962 us | 6.34 | 0.17 | 7.5684 | 1.7090 | 62.52 KB | 1.00 |
| SpanVersion | 1000 | 38.60 us | 0.717 us | 0.670 us | 1.00 | 0.00 | 7.6294 | 1.8921 | 62.52 KB | 1.00 |
| StringInsertVersion | 1000 | 32.69 us | 0.302 us | 0.267 us | 0.85 | 0.02 | 14.3433 | 3.5400 | 117.21 KB | 1.87 |
| | | | | | | | | | | |
| RegexVersionUncompiled | 10000 | 3,242.25 us | 61.809 us | 66.135 us | 7.50 | 0.09 | 74.2188 | 70.3125 | 625.03 KB | 1.00 |
| RegexVersionCompiled | 10000 | 2,431.65 us | 47.894 us | 44.800 us | 5.64 | 0.13 | 74.2188 | 70.3125 | 625.03 KB | 1.00 |
| SpanVersion | 10000 | 431.01 us | 5.069 us | 4.741 us | 1.00 | 0.00 | 76.1719 | 75.6836 | 625.02 KB | 1.00 |
| StringInsertVersion | 10000 | 429.69 us | 7.117 us | 5.943 us | 1.00 | 0.02 | 142.5781 | 142.0898 | 1171.9 KB | 1.87 |
3条答案
按热度按时间b91juud31#
使用捕获组。
转换为具有“{第一组} {第二组} {剩余}"的新值。
Demo @ .NET Fiddle
y0u0uwnf2#
如果您不坚持使用Regex,您可以:
输出:
实际应用:https://dotnetfiddle.net/kKoRJb
注意,对于生产,我会为输入添加健全性检查。
另外:如果你选择正则表达式,考虑预编译它和缓存。我没有对正则表达式运行这个解决方案的基准测试,所以如果性能在这里很关键,你可能想这样做。
更新
所以,我很感兴趣,做了一个小基准:
基于这段代码,我从Yong Shun's answer "窃取"了Regex,从Hossein Sabziani' answer "窃取"了String. Insert Version:
有趣的是:当我打开GC列的显示时,Regex似乎对垃圾收集的压力较小:
考虑到扩展性,我可能仍会选择Span解决方案:
lstz6jyr3#
您可以使用String.Insert(Int32, String)到
add a string at a specific index
: