unity3d 如何充分利用BFS实现亚毫秒级执行?

pzfprimi  于 2022-12-23  发布在  其他
关注(0)|答案(2)|浏览(122)

我有一个BFS的实现,工作得很好,但似乎得到真正的CPU沉重,即使在低深度(亚毫秒的深度4,但10毫秒的深度10).我很有信心,这个算法应该运行亚毫秒事件在深度100,但我不太确定我错过了什么.以下是代码:

using System.Collections.Generic;
using System.Linq;
using UnityEngine;

public class VisionGraph : MonoBehaviour
{
    public Transform Ground;
    public int Height;
    public int Width;
    private MeshFilter mf;
    public Vector3[] Vertices;
    public float precision;
    public Vector3 SelectedVertex;

    // Start is called before the first frame update
    private void Start()
    {
        mf = Ground.GetComponent<MeshFilter>();
        Matrix4x4 localToWorld = transform.localToWorldMatrix;

        Vector3 world_max = localToWorld.MultiplyPoint3x4(mf.mesh.vertices[0]);

        Width = Mathf.RoundToInt(world_max.z * (1 / precision));
        int maxIndex = Mathf.RoundToInt((world_max.z * (1 / precision)) * (Height * (1 / precision)) * world_max.x);
        Vertices = new Vector3[maxIndex];
        
        //This is the graph initialization. 
        //Indices increment by 1 while actual coordinates increments by `precision`. 
        //Indices are then reversed to coordinates in BFS with `position/precision`.
        int xInd, yInd, zInd; xInd = yInd = zInd = 0;
        float x, y, z; x = y = z = 0;

        int index = 0;

        while (index < Vertices.Length - 1)
        {
            index = (yInd * (Width * Width)) + (zInd * Width) + xInd;

            Debug.Log(index + " " + maxIndex);
            Vertices[index] = new(x, y, z);
            x += precision;
            xInd++;

            if (x > world_max.x)
            {
                x = 0;
                xInd = 0;
                z += precision;
                zInd++;

                if (z > world_max.z)
                {
                    z = 0;
                    zInd = 0;
                    y += precision;
                    yInd++;
                }
            }
        }

        SelectedVertex = Vertices[600];
    }

    private void OnDrawGizmos()
    {
        // Needs to be turned into retrieve index from position.
        // but i'm not sure how to clamp the continuous position to `precision` steps.

        SelectedVertex = Vertices.Where(v => Vector3.Distance(v, SelectedVertex) <= precision - 0.0001 ).FirstOrDefault();

        var watch = System.Diagnostics.Stopwatch.StartNew();
        List<Vector3Int> closeVertices = BFS(SelectedVertex, 10); // second param is the search depth

        watch.Stop();
        Debug.Log(watch.ElapsedMilliseconds);
        foreach (var vert in closeVertices)
        {
            var index = (vert.y * (Width * Width)) + (vert.z * Width) + vert.x;
            if (index >= Vertices.Length) continue;
            Gizmos.color = Color.red;
            Gizmos.DrawSphere(Vertices[index], 0.1f);
        }
    }

    private List<Vector3Int> BFS(Vector3 start, int depth)
    {
        Vector3Int startIndex = new((int)(start.x / precision), (int)(start.y / precision), (int)(start.z / precision));

        Dictionary<Vector3Int, bool> closedList = new();
        List<Vector3Int> queue = new() { startIndex };

        while (queue.Count > 0)
        {
            Vector3Int v = queue[^1];
            queue.RemoveAt(queue.Count-1);

            Vector3Int[] neighbors = new[]
            {
                v + Vector3Int.left,
                v + Vector3Int.right,
                v + Vector3Int.up,
                v + Vector3Int.down,
                v + Vector3Int.forward,
                v + Vector3Int.back,
            };
            foreach (Vector3Int n in neighbors)
            {
                if (n.x < 0 || n.y < 0 || n.z < 0) continue; // this will alos include the high limit of the grid but i dont "need" it at this point of the tests

                //For every implementation of graph search algorithms I make, this always seem to bee the weak part.
                if ((n - startIndex).sqrMagnitude > depth*depth || queue.Any(vert => vert == n) || closedList.ContainsKey(n)) continue;

                queue.Insert(0, n);
            }
            closedList.Add(v, true); 
        }

        return closedList.Keys.ToList();
    }
}

closedList上使用字典是一个很糟糕的尝试,试图减少列表搜索时间,它曾经是closedList.Any(vert => vert == n),但我没有看到每一个的使用有很大的变化。我真的很高兴,如果有人能指出是什么真正减缓了这一点。
第二个问题:你是如何在BFS中实现多线程的呢?队列和闭列表都是动态的,有没有一个解决方案可以在NativeLists中解决这个问题呢?谢谢你的时间。如果有什么不清楚的地方,请告诉我。

vuktfyat

vuktfyat1#

首先,这可能适合,也可能不适合您的情况,但作为对我自己的一种锻炼,我认为看看给定代码的执行速度有多快会很有趣。
我在测试中创建了三个方法。
1.第一个是你逐字逐句的方法。
1.第二种方法是你的代码但是“重新调整”了,但是从循环中取出了neighbours数组创建,并添加了Queue<Vector3Int>而不是List<Vector3Int>,因为我怀疑插入到List<T>的元素0可能是原始代码中问题的一个重要部分,还将Dictionary<Vector3Int, bool>替换为HashSet<Vector3Int>。我还用Contains(v)替换了Any(v => v == n)。结果Contains明显更快。
1.然后,我创建了第三个方法,我进一步添加了第二个HashSet,以便能够快速查找仍在队列中的项目。
然后我使用StopWatch对每个方法运行测试以记录时间。我还再次运行了最后一个方法,但那次是从Task.Run开始的。
以下是两种“优化”方法:

private List<Vector3Int> BFS_Optimised ( Vector3 start, int depth )
{
    Vector3Int startIndex = new((int)(start.x / precision), (int)(start.y / precision), (int)(start.z / precision));
    HashSet<Vector3Int> closedList = new ();
    Queue<Vector3Int> queue = new ();
    queue.Enqueue(startIndex);
    var dSquared = depth * depth;

    Vector3Int[] neighbors =
        new[] { Vector3Int.left, Vector3Int.right, Vector3Int.up, Vector3Int.down, Vector3Int.forward, Vector3Int.back };

    while ( queue.Count > 0 )
    {
        var v = queue.Dequeue();
        for ( int i = 0; i < 6; ++i )
        {
            var n = v + neighbors[i];
            if ( n.x < 0 || n.y < 0 || n.z < 0 )
                continue; // this will alos include the high limit of the grid but i dont "need" it at this point of the tests
            if ( ( n - startIndex ).sqrMagnitude > dSquared 
                    || closedList.Contains ( n ) 
                    ||  queue.Contains ( n ) ) // queue.Any(v => v == n ) ) //
                    continue;
            queue.Enqueue ( n );
        }
        closedList.Add ( v );
    }
    return closedList.ToList ( );
}

private List<Vector3Int> BFS_Optimised2 ( Vector3 start, int depth )
{
    Vector3Int startIndex = new((int)(start.x / precision), (int)(start.y / precision), (int)(start.z / precision));
    HashSet<Vector3Int> closedList = new ();
    Queue<Vector3Int> queue = new ();
    queue.Enqueue ( startIndex );
    HashSet<Vector3Int> qHash = new ( ) { startIndex };
    var dSquared = depth * depth;

    Vector3Int[] neighbors = 
            new[] { Vector3Int.left, Vector3Int.right, Vector3Int.up, Vector3Int.down, Vector3Int.forward, Vector3Int.back };

    while ( queue.Count > 0 )
    {
        var v = queue.Dequeue();
        qHash.Remove ( v );
        for ( int i = 0; i < 6; i++ )
        {
            var n = v + neighbors[i];
            if ( n.x < 0 || n.y < 0 || n.z < 0 )
                continue; // this will alos include the high limit of the grid but i dont "need" it at this point of the tests
            if ( ( n - startIndex ).sqrMagnitude > dSquared 
                    || closedList.Contains ( n ) 
                    || qHash.Contains ( n ) )
                continue;
            queue.Enqueue ( n );
            qHash.Add ( n );
        }
        closedList.Add ( v );
    }
    return closedList.ToList ( );
}

下面是测试(请原谅测试的粗糙和缺乏深度):

async void Run ( )
{
    var iterations = 100;
    var d = 10;
    var v = new Vector3 ( 10, 10, 10 );

    List<Vector3Int> r1 = default;
    List<Vector3Int> r2 = default;
    List<Vector3Int> r3 = default;
    List<Vector3Int> r4 = default;

    Debug.Log ( "Waiting ... " );
    await Task.Delay ( 2000 );
    Debug.Log ( "Run ... " );

    Stopwatch sw = new();
    sw.Start ( );
    for ( int i = 0; i < iterations; i++ )
        r1 = BFS ( v, d );
    sw.Stop ( );
    var t1 = sw.Elapsed.TotalMilliseconds;

    sw.Restart ( );
    for ( int i = 0; i < iterations; i++ )
        r2 = BFS_Optimised ( v, d );
    sw.Stop ( );
    var t2 = sw.Elapsed.TotalMilliseconds;

    sw.Restart ( );
    for ( int i = 0; i < iterations; i++ )
        r3 = BFS_Optimised2 ( v, d );
    sw.Stop ( );
    var t3 = sw.Elapsed.TotalMilliseconds;

    sw.Restart ( );
    r4 = await Task.Run ( ( ) => BFS_Optimised2 ( v, d ) );
    sw.Stop ( );
    var t4 = sw.Elapsed.TotalMilliseconds;


    StringBuilder sb = new();
    sb.AppendLine ( $"Original : {t1} ms [{r1.Count}] ({r1 [ 0 ]}) .. ({r1 [ ^1 ]})" );
    sb.AppendLine ( $"Optimised : {t2} ms [{r2.Count}] ({r2 [ 0 ]}) .. ({r2 [ ^1 ]})" );
    sb.AppendLine ( $"Optimised2 : {t3} ms [{r3.Count}] ({r3 [ 0 ]}) .. ({r3 [ ^1 ]})" );
    sb.AppendLine ( $"Optimised2 Task.Run : {t4} ms [{r4.Count}] ({r4 [ 0 ]}) .. ({r4 [ ^1 ]})" );
    Debug.Log ( sb.ToString ( ) );
}

以下是结果:

Original : 10701.7465 ms [4169] ((10, 10, 10)) .. ((15, 5, 3))
Optimised : 1830.9519 ms [4169] ((10, 10, 10)) .. ((15, 5, 3))
Optimised2 : 209.1559 ms [4169] ((10, 10, 10)) .. ((15, 5, 3))
Optimised2 Task.Run : 17.7353 ms [4169] ((10, 10, 10)) .. ((15, 5, 3))
x7yiwoj4

x7yiwoj42#

我不能一眼就看出你的代码在做什么(没有文档,我的C#也很生疏)但是你的BFS有惊人的代码量(一个BFS通常只需要几行)为什么每次通过while循环都需要创建一个新的大型数据结构呢?这是昂贵的,据我回忆,C#使用了一些困扰的垃圾收集器,它将一直与您斗争
你有一个名为queue的变量,但它不是一个队列,而是一个列表。这是令人困惑的,而且可能是不正确的。
就我所知,你是在动态地创建一个节点的邻居。糟糕的想法!你应该用邻接矩阵(最简单的)或节点邻接列表(更复杂,但对于大型图来说内存效率很高)来建模图。然后你可以使用节点索引来查找邻居,而不需要一遍又一遍地创建它们。
为了进行比较,下面是一个BFS的C++代码,它使用一真实的队列,并且只关心节点索引。

void cPathFinder::breadth(
        std::function<void(int v, int p)> visitor)
    {
        if (!nodeCount())
            throw std::runtime_error("breadth called on empty graph");

        std::vector<bool> visited(nodeCount(), false);
        std::queue<int> Q;

        visited[myStart] = true;
        Q.push(myStart);

        while (Q.size())
        {
            int v = Q.front();
            Q.pop();
            for (int w : adjacent(v))
            {
                if (!visited[w])
                {
                    // reached a new node
                    visitor(w, v);
                    visited[w] = true;
                    Q.push(w);
                }
            }
        }
    }

相关问题