在 Delphi 中删除字符串中的非数字字符

ee7vknir  于 2023-01-20  发布在  其他
关注(0)|答案(1)|浏览(277)

我有三个函数可以成功地从给定字符串中删除所有非数字字符:
第一个函数循环遍历输入字符串的字符,如果当前字符是数字,则将其添加到作为函数结果返回的新字符串中。

function RemoveNonNumericChars(const s: string): string;
  begin
    Result := '';
    for var i := 1 to Length(s) do
    begin
      if s[i] in ['0'..'9'] then
        Result := Result + s[i];
    end;
  end;

第二个函数从右到左循环输入字符串中的字符,如果当前字符不是数字,则使用Delete函数将其从字符串中删除

function RemoveNonNumericChars(const s: string): string;
  begin
    Result := s;
    for var i := Length(Result) downto 1 do
    begin
      if not(Result[i] in ['0'..'9']) then
        Delete(Result, i, 1);
    end;
  end;

第三个函数使用正则表达式将所有非数字字符替换为空,从而将它们删除。TRegEx来自System.RegularExpressions单元。

function RemoveNonNumericChars(const s: string): string;
  begin
    var RegEx := TRegEx.Create('[^0-9]');
    Result := RegEx.Replace(s, '');
  end;

这三个函数都能满足我的需要,但我想知道 Delphi 中是否有内置函数来实现这个功能...或者有比我现在的方法更好的方法。在Delphi中,从字符串中删除非数字字符的最好和/或最快的方法是什么?

qnzebej0

qnzebej01#

这两种方法都很慢,因为要不断地改变字符串的长度,而且它们只能识别阿拉伯数字。
要解决性能问题,请预分配最大结果长度:

function RemoveNonDigits(const S: string): string;
begin
  SetLength(Result, S.Length);
  var LActualLength := 0;
  for var i := 1 to S.Length do
    if CharInSet(S[i],  ['0'..'9']) then
    begin
      Inc(LActualLength);
      Result[LActualLength] := S[i];
    end;
  SetLength(Result, LActualLength);
end;

要支持非阿拉伯数字,请使用TCharacter.IsDigit函数:

function RemoveNonDigits(const S: string): string;
begin
  SetLength(Result, S.Length);
  var LActualLength := 0;
  for var i := 1 to S.Length do
    if S[i].IsDigit then
    begin
      Inc(LActualLength);
      Result[LActualLength] := S[i];
    end;
  SetLength(Result, LActualLength);
end;

为了进一步优化,正如Stefan Glienke所建议的,可以绕过RTL的字符串处理机制,直接编写每个字符,但会损失一些代码可读性:

function RemoveNonDigits(const S: string): string;
begin
  SetLength(Result, S.Length);
  var ResChr := PChar(Result);
  var LActualLength := 0;
  for var i := 1 to S.Length do
    if CharInSet(S[i],  ['0'..'9']) then
    begin
      Inc(LActualLength);
      ResChr^ := S[i];
      Inc(ResChr);
    end;
  SetLength(Result, LActualLength);
end;

基准

为了好玩,我对长度小于100的随机输入字符串做了一个非常原始的基准测试,其中字符串是数字的概率约为24%:

program Benchmark;

{$APPTYPE CONSOLE}

{$R *.res}

uses
  System.SysUtils, System.RegularExpressions, Windows;

function OP1(const s: string): string;
begin
  Result := '';
  for var i := 1 to Length(s) do
  begin
    if s[i] in ['0'..'9'] then
      Result := Result + s[i];
  end;
end;

function OP2(const s: string): string;
begin
  Result := s;
  for var i := Length(Result) downto 1 do
  begin
    if not(Result[i] in ['0'..'9']) then
      Delete(Result, i, 1);
  end;
end;

function OP3(const s: string): string;
begin
  var RegEx := TRegEx.Create('[^0-9]');
  Result := RegEx.Replace(s, '');
end;

function AR1(const S: string): string;
begin
  SetLength(Result, S.Length);
  var LActualLength := 0;
  for var i := 1 to S.Length do
    if CharInSet(S[i],  ['0'..'9']) then
    begin
      Inc(LActualLength);
      Result[LActualLength] := S[i];
    end;
  SetLength(Result, LActualLength);
end;

function AR2(const S: string): string;
begin
  SetLength(Result, S.Length);
  var ResChr := PChar(Result);
  var LActualLength := 0;
  for var i := 1 to S.Length do
    if CharInSet(S[i],  ['0'..'9']) then
    begin
      Inc(LActualLength);
      ResChr^ := S[i];
      Inc(ResChr);
    end;
  SetLength(Result, LActualLength);
end;

function AR3(const S: string): string;
begin
  SetLength(Result, S.Length);
  var ResChr := PChar(Result);
  for var i := 1 to S.Length do
    if CharInSet(S[i],  ['0'..'9']) then
    begin
      ResChr^ := S[i];
      Inc(ResChr);
    end;
  SetLength(Result, ResChr - PChar(Result));
end;

function RandomInputString: string;
begin
  SetLength(Result, Random(100));
  for var i := 1 to Result.Length do
    Result[i] := Chr(Ord('0') + Random(42));
end;

begin

  Randomize;

  const N = 1000000;

  var Data := TArray<string>(nil);
  SetLength(Data, N);
  for var i := 0 to N - 1 do
    Data[i] := RandomInputString;

  var f, c0, cOP1, cOP2, cOP3, cAR1, cAR2, cAR3: Int64;

  QueryPerformanceFrequency(f);

  QueryPerformanceCounter(c0);
  for var i := 0 to High(Data) do
    OP1(Data[i]);
  QueryPerformanceCounter(cOP1);
  Dec(cOP1, c0);

  QueryPerformanceCounter(c0);
  for var i := 0 to High(Data) do
    OP2(Data[i]);
  QueryPerformanceCounter(cOP2);
  Dec(cOP2, c0);

  QueryPerformanceCounter(c0);
  for var i := 0 to High(Data) do
    OP3(Data[i]);
  QueryPerformanceCounter(cOP3);
  Dec(cOP3, c0);

  QueryPerformanceCounter(c0);
  for var i := 0 to High(Data) do
    AR1(Data[i]);
  QueryPerformanceCounter(cAR1);
  Dec(cAR1, c0);

  QueryPerformanceCounter(c0);
  for var i := 0 to High(Data) do
    AR2(Data[i]);
  QueryPerformanceCounter(cAR2);
  Dec(cAR2, c0);

  QueryPerformanceCounter(c0);
  for var i := 0 to High(Data) do
    AR3(Data[i]);
  QueryPerformanceCounter(cAR3);
  Dec(cAR3, c0);

  Writeln('Computations per second:');
  Writeln('OP1: ', Round(N / (cOP1 / f)));
  Writeln('OP2: ', Round(N / (cOP2 / f)));
  Writeln('OP3: ', Round(N / (cOP3 / f)));
  Writeln('AR1: ', Round(N / (cAR1 / f)));
  Writeln('AR2: ', Round(N / (cAR2 / f)));
  Writeln('AR3: ', Round(N / (cAR3 / f)));

  Readln;

end.

结果:

Computations per second:
OP1: 1398134
OP2: 875116
OP3: 39162
AR1: 3406172
AR2: 4063260
AR3: 4032343

正如您所看到的,至少在这个测试中,正则表达式是迄今为止最慢的方法,预分配带来了很大的不同,而避免_UniqueStringU问题似乎只带来了相对较小的改进。
但是即使使用非常慢的RegEx方法,你也可以每秒调用40000个,在我13年前的电脑上。

相关问题