Delphi -比较和标记字符串差异

smtd7mpg  于 2023-05-06  发布在  其他
关注(0)|答案(2)|浏览(169)

我需要做的是比较两个字符串,并用开始/结束标记来标记变化的差异。示例:

this is string number one.
this string is string number two.

输出将是

this [is|string is] string number [one|two].

我已经想了很久了。我发现了一些我相信能帮助我做到这一点的东西,但我无法做到这一点。
http://www.angusj.com/delphi/textdiff.html
我有80%的工作在这里,但我不知道如何让它做的正是我想让它做的。任何帮助将不胜感激。

uses
  Windows, Messages, SysUtils, Variants, Classes, Graphics, Controls, Forms,
  Dialogs, Diff, StdCtrls;

type
  TForm2 = class(TForm)
    Edit1: TEdit;
    Edit2: TEdit;
    Button1: TButton;
    Memo1: TMemo;
    Diff: TDiff;
    procedure Button1Click(Sender: TObject);
  private
    { Private declarations }
  public
    { Public declarations }
  end;

var
  Form2: TForm2;
  s1,s2:string;

implementation

{$R *.dfm}

procedure TForm2.Button1Click(Sender: TObject);
var
  i: Integer;
  lastKind: TChangeKind;

  procedure AddCharToStr(var s: string; c: char; kind, lastkind: TChangeKind);
  begin
    if (kind  lastkind) AND (lastkind = ckNone) and (kind  ckNone) then s:=s+'[';
    if (kind  lastkind) AND (lastkind  ckNone) and (kind = ckNone) then s:=s+']';
    case kind of         
      ckNone: s := s  + c;
      ckAdd: s := s + c;         
      ckDelete: s := s  + c;
      ckModify: s := s + '|' + c;
    end;
  end;

begin
  Diff.Execute(pchar(edit1.text), pchar(edit2.text), length(edit1.text), length(edit2.text));

  //now, display the diffs ...
  lastKind := ckNone;
  s1 := ''; s2 := '';
  form2.caption:= inttostr(diff.Count);
  for i := 0 to Diff.count-1 do
  begin

    with Diff.Compares[i] do
    begin
      //show changes to first string (with spaces for adds to align with second string)
      if Kind = ckAdd then 
      begin 
        AddCharToStr(s1,' ',Kind, lastKind); 
      end
      else 
      AddCharToStr(s1,chr1,Kind,lastKind);
      if Kind = ckDelete then 
      begin 
        AddCharToStr(s2,' ',Kind, lastKind)
      end
      else AddCharToStr(s2,chr2,Kind,lastKind);

      lastKind := Kind;
    end;
  end;
    memo1.Lines.Add(s1);
    memo1.Lines.Add(s2);
end;

end.

我从www.example.com上获取了basicdemo 1angusj.com,并对其进行了修改,以达到目前的效果。

3pmvbmvn

3pmvbmvn1#

要解决您所描述的问题,您基本上必须执行类似于在DNA或蛋白质数据biological sequence alignment中执行的操作。如果你只有两个字符串(或一个常量引用字符串),它可以通过基于dynamic programming的成对比对算法来处理,比如Needleman Wunsch algorithm * 和相关算法。(多序列比对变得更加复杂。)
[* 编辑:链接应该是:http://en.wikipedia.org/wiki/Needleman -Wunsch_algorithm ]

编辑二:

由于您似乎对在单词而不是字符级别上进行比较感兴趣,因此可以(1)将输入字符串拆分为字符串数组,其中每个数组元素表示一个单词,然后(2)在这些单词级别上进行对齐。这样做的好处是,对齐的搜索空间变得更小,因此您希望它总体上更快。我相应地改编并“Delphied”了维基百科文章中的伪代码示例:

program AlignWords; 

 {$APPTYPE CONSOLE}

 function MaxChoice (C1, C2, C3: integer): integer;
begin
  Result:= C1;
  if C2 > Result then Result:= C2;
  if C3 > Result then Result:= C3;
end;

 function WordSim (const S1, S2: String): integer; overload;
//Case-sensitive!
var i, l1, l2, minL: integer;
begin
  l1:= length(S1);
  l2:= length(S2);
  Result:= l1-l2;
  if Result > 0 then Result:= -Result;
  if (S1='') or (S2='') then exit;
  minL:= l1;
  if l2 < l1 then minL:= l2;
  for i := 1 to minL do if S1[i]<>S2[i] then dec(Result);
end;

 procedure AlignWordsNW (const A, B: Array of String; GapChar: Char; const Delimiter: ShortString; GapPenalty: integer; out AlignmentA, AlignmentB: string);
// Needleman-Wunsch alignment
// GapPenalty should be a negative value!
var
  F: array of array of integer;
  i, j,
  Choice1, Choice2, Choice3,
  Score, ScoreDiag, ScoreUp, ScoreLeft :integer;
  function GapChars (const S: String): String;
  var i: integer;
  begin
    assert (length(S)>0);
    Result:='';
    for i := 0 to length(S) - 1 do Result:=Result + GapChar;
  end;
begin
  SetLength (F, length(A)+1, length(B)+1);
  for i := 0 to length(A) do F[i,0]:= GapPenaltyi;
  for j := 0 to length(B) do F[0,j]:= GapPenaltyj;
  for i:=1 to length(A) do begin
    for j:= 1 to length(B) do begin
      Choice1:= F[i-1,j-1] + WordSim(A[i-1], B[j-1]);
      Choice2:= F[i-1, j] + GapPenalty;
      Choice3:= F[i, j-1] + GapPenalty;
      F[i,j]:= maxChoice (Choice1, Choice2, Choice3);
    end;
  end;
  AlignmentA:= '';
  AlignmentB:= '';
  i:= length(A);
  j:= length(B);
  while (i > 0) and (j > 0) do begin
    Score := F[i,j];
    ScoreDiag:= F[i-1,j-1];
    ScoreUp:= F[i,j-1];
    ScoreLeft:= F[i-1,j];
    if Score = ScoreDiag + WordSim(A[i-1], B[j-1]) then begin
      AlignmentA:= A[i-1] + Delimiter + AlignmentA;
      AlignmentB:= B[j-1] + Delimiter + AlignmentB;
      dec (i);
      dec (j);
    end else if Score = ScoreLeft + GapPenalty then begin
      AlignmentA:= A[i-1] + Delimiter + AlignmentA;
      AlignmentB:= GapChars (A[i-1]) + Delimiter + AlignmentB;
      dec(i);
    end else begin
      assert (Score = ScoreUp + GapPenalty);
      AlignmentA:= GapChars(B[j-1]) + Delimiter + AlignmentA;
      AlignmentB:= B[j-1] + Delimiter + AlignmentB;
      dec (j);
    end;
  end;
  while (i > 0) do begin
    AlignmentA:= A[i-1] + Delimiter + AlignmentA;
    AlignmentB:= GapChars(A[i-1]) + Delimiter + AlignmentB;
    dec(i);
  end;
  while (j > 0) do begin
    AlignmentA:= GapChars(B[j-1]) + Delimiter + AlignmentA;
    AlignmentB:= B[j-1] + Delimiter + AlignmentB;
    dec(j);
  end;
end;

 Type
  TStringArray = Array Of String;

 Var
  as1, as2: TStringArray;
  s1, s2: string;

 BEGIN
    as1:= TStringArray.create ('this','is','string','number','one.');
    as2:= TStringArray.Create ('this','string','is','string','number','two.');

 AlignWordsNW (as1, as2, '-',' ',-1, s1,s2);
writeln (s1);
writeln (s2);

 END.

本例的输出为

this ------ is string number ---- one. 
this string is string number two. ----

它并不完美,但你明白了。从这类输出中,您应该能够做您想做的事情。请注意,您可能需要调整GapPenalty和相似度评分函数WordSim以满足您的需要。

7y4bm7vi

7y4bm7vi2#

有一个Object Pascal Diff Engine可用,可能会有所帮助。您可能希望将每个“单词”分成单独的行进行比较,或者修改算法以逐个单词进行比较。

相关问题