检测两个字符串之间的差异

发布于 2021-01-31 15:56:57

我有两根弦

string a = "foo bar";
string b = "bar foo";

我想检测从a到的更改 b我必须更改哪些字符a才能使用b

我认为必须对每个字符进行一次迭代,并检测它是否被添加,删除或保持相等。这是我意想不到的结果

'f' Remove
'o' Remove
'o' Remove
' ' Remove
'b' Equal
'a' Equal
'r' Equal
' ' Add
'f' Add
'o' Add
'o' Add

类和结果的枚举:

public enum Operation { Add,Equal,Remove };
public class Difference
{
    public Operation op { get; set; }
    public char c { get; set; }
}

这是我的解决方案,但是“删除”案例对我来说尚不清楚

public static List<Difference> CalculateDifferences(string left, string right)
{
    int count = 0;
    List<Difference> result = new List<Difference>();
    foreach (char ch in left)
    {
        int index = right.IndexOf(ch, count);
        if (index == count)
        {
            count++;
            result.Add(new Difference() { c = ch, op = Operation.Equal });
        }
        else if (index > count)
        {
            string add = right.Substring(count, index - count);
            result.AddRange(add.Select(x => new Difference() { c = x, op = Operation.Add }));
            count += add.Length;
        }
        else
        {
            //Remove?
        }
    }
    return result;
}

删除的字符的代码看起来如何?


更新-添加了更多示例

范例1:

string a = "foobar";
string b = "fooar";

预期结果:

'f' Equal
'o' Equal
'o' Equal
'b' Remove
'a' Equal
'r' Equal

范例2:

string a = "asdfghjk";
string b = "wsedrftr";

预期结果:

'a' Remove
'w' Add
's' Equal
'e' Add
'd' Equal
'r' Add
'f' Equal
'g' Remove
'h' Remove
'j' Remove
'k' Remove
't' Add
'r' Add

更新:

这是Dmitry和Ingen的答案之间的 比较https
://dotnetfiddle.net/MJQDAO

关注者
0
被浏览
129
1 个回答
  • 面试哥
    面试哥 2021-01-31
    为面试而生,有面试问题,就找面试哥。

    您正在寻找 (最小)编辑距离 / (最小)编辑顺序 。您可以在此处找到该过程的 理论

    https://web.stanford.edu/class/cs124/lec/med.pdf

    让我们实现(最简单的)Levenstein距离/序列算法(有关详细信息,请参见https://en.wikipedia.org/wiki/Levenshtein_distance)。让我们从
    帮助器 类开始(我对您的实现做了一些更改):

      public enum EditOperationKind : byte {
        None,    // Nothing to do
        Add,     // Add new character
        Edit,    // Edit character into character (including char into itself)
        Remove,  // Delete existing character
      };
    
      public struct EditOperation {
        public EditOperation(char valueFrom, char valueTo, EditOperationKind operation) {
          ValueFrom = valueFrom;
          ValueTo = valueTo;
    
          Operation = valueFrom == valueTo ? EditOperationKind.None : operation;
        }
    
        public char ValueFrom { get; }
        public char ValueTo {get ;}
        public EditOperationKind Operation { get; }
    
        public override string ToString() {
          switch (Operation) {
            case EditOperationKind.None:
              return $"'{ValueTo}' Equal";
            case EditOperationKind.Add:
              return $"'{ValueTo}' Add";
            case EditOperationKind.Remove:
              return $"'{ValueFrom}' Remove";
            case EditOperationKind.Edit:
              return $"'{ValueFrom}' to '{ValueTo}' Edit";
            default:
              return "???";
          }
        }
      }
    

    从提供的示例中可以看到,我们没有任何 编辑 操作,但是 添加+ remove ;这就是为什么我已经把editCost = 2insertCost = 1int removeCost = 1(在的情况下, 领带insert + removeedit我们提出insert + remove)。现在我们准备实现Levenstein算法:

    public static EditOperation[] EditSequence(
      string source, string target, 
      int insertCost = 1, int removeCost = 1, int editCost = 2) {
    
      if (null == source)
        throw new ArgumentNullException("source");
      else if (null == target)
        throw new ArgumentNullException("target");
    
      // Forward: building score matrix
    
      // Best operation (among insert, update, delete) to perform 
      EditOperationKind[][] M = Enumerable
        .Range(0, source.Length + 1)
        .Select(line => new EditOperationKind[target.Length + 1])
        .ToArray();
    
      // Minimum cost so far
      int[][] D = Enumerable
        .Range(0, source.Length + 1)
        .Select(line => new int[target.Length + 1])
        .ToArray();
    
      // Edge: all removes
      for (int i = 1; i <= source.Length; ++i) {
        M[i][0] = EditOperationKind.Remove;
        D[i][0] = removeCost * i;
      }
    
      // Edge: all inserts 
      for (int i = 1; i <= target.Length; ++i) {
        M[0][i] = EditOperationKind.Add;
        D[0][i] = insertCost * i;
      }
    
      // Having fit N - 1, K - 1 characters let's fit N, K
      for (int i = 1; i <= source.Length; ++i)
        for (int j = 1; j <= target.Length; ++j) {
          // here we choose the operation with the least cost
          int insert = D[i][j - 1] + insertCost;
          int delete = D[i - 1][j] + removeCost;
          int edit = D[i - 1][j - 1] + (source[i - 1] == target[j - 1] ? 0 : editCost);
    
          int min = Math.Min(Math.Min(insert, delete), edit);
    
          if (min == insert) 
            M[i][j] = EditOperationKind.Add;
          else if (min == delete)
            M[i][j] = EditOperationKind.Remove;
          else if (min == edit)
            M[i][j] = EditOperationKind.Edit;
    
          D[i][j] = min;
        }
    
      // Backward: knowing scores (D) and actions (M) let's building edit sequence
      List<EditOperation> result = 
        new List<EditOperation>(source.Length + target.Length);
    
      for (int x = target.Length, y = source.Length; (x > 0) || (y > 0);) {
        EditOperationKind op = M[y][x];
    
        if (op == EditOperationKind.Add) {
          x -= 1;
          result.Add(new EditOperation('\0', target[x], op));
        }
        else if (op == EditOperationKind.Remove) {
          y -= 1;
          result.Add(new EditOperation(source[y], '\0', op));
        }
        else if (op == EditOperationKind.Edit) {
          x -= 1;
          y -= 1;
          result.Add(new EditOperation(source[y], target[x], op));
        }
        else // Start of the matching (EditOperationKind.None)
          break;
      }
    
      result.Reverse();
    
      return result.ToArray();
    }
    

    演示:

    var sequence = EditSequence("asdfghjk", "wsedrftr");
    
    Console.Write(string.Join(Environment.NewLine, sequence));
    

    结果:

    'a' Remove
    'w' Add
    's' Equal
    'e' Add
    'd' Equal
    'r' Add
    'f' Equal
    'g' Remove
    'h' Remove
    'j' Remove
    'k' Remove
    't' Add
    'r' Add
    


知识点
面圈网VIP题库

面圈网VIP题库全新上线,海量真题题库资源。 90大类考试,超10万份考试真题开放下载啦

去下载看看