javascript 将路径数组转换为数据结构

3mpgtkmj  于 2023-03-11  发布在  Java
关注(0)|答案(4)|浏览(133)

我有一个如下的路径数组:

/doc/data/main.js
/doc/data/xl.js
/doc/data/dandu/sdasa.js
/mnt/data/la.js

我试着构建以下结构:

{
  "directories": {
    "/doc/data": {
      "directories": {
        "dandu": {
          "files": {
            "sdasa.js": 1
          }
        }
      },
      "files": {
        "main.js": 1,
        "xl.js": 1
      }
    },
    "/mnt/data": {
      "directories": {},
      "files": {
        "la.js": 1
      }
    }
  },
  "files": {}
}

请忽略示例中文件的值。我将在将来为它分配更复杂的数据。当前值为1。
从以前的topic中,我发现我可以使用以下函数来获得类似的结果:

var parsePathArray = function() {
    var parsed = {};
    for(var i = 0; i < paths.length; i++) {
        var position = parsed;
        var split = paths[i].split('/');
        for(var j = 0; j < split.length; j++) {
            if(split[j] !== "") {
                if(typeof position[split[j]] === 'undefined')
                    position[split[j]] = {};
                position = position[split[j]];
            }
        }
    }
    return parsed;
}

这个解决方案的主要问题是它拆分了每个目录,但我不想拆分每个目录,而是获取至少包含一个文件的目录。例如,/doc在我的示例中没有文件(只有目录-/data),所以我们继续使用它。我尝试稍微修改一下函数,但它不起作用:

var str = '';
for (var j = 0; j < split.length; j++) {
    if (j < split.length - 1 && typeof this.files[str] === 'undefined') {
        str += '/' + split[j];
        continue;
    }
    if (str !== '') {
        if (typeof this.files[str] === 'undefined')
            this.files[str] = {};
        this.files = this.files[str];
    }
}

将这些字符串转换为该数据结构的最佳方法是什么?

jecbmhm3

jecbmhm31#

这是我想到的解决方案。它的工作原理是一次构建一个路径,然后将其与现有的数据结构进行比较。它还应该自己处理文件,因为你最初的帖子似乎暗示这是必要的。我决定最后将其分为两个函数,因为这可能会更容易解释。

代码:

const paths = [
    '/doc/data/main.js',
    'doc/data/xl.js',
    '/etc/further/owy.js',
    '/etc/further/abc.js',
    'etc/mma.js',
    '/mnt/data/it.js',
    '/mnt/data/path/is/long/la.js',
    'mnt/data/path/is/la.js',
    '/doc/data/dandu/sdasa.js',
    '/etc/i/j/k/l/thing.js',
    '/etc/i/j/areallylongname.js',
    'thing.js'
];

function buildStructure(paths) {
    let structure = {
        directories: {},
        files: {}
    };

    const compare = (a, b) => {
        return a.split('/').length - b.split('/').length;
    };

    [...paths]
    .map(path => path = path.charAt(0) === '/' ? path : `/${path}`)
    .sort((a, b) => compare(a, b)).forEach(path => {
        const nodes = path.split('/').slice(1);
        const file = nodes.pop();
        
        let pointer = findDirectory(nodes[0] ? structure.directories : structure, '', [...nodes]);

        pointer.files = pointer.files || {};
        pointer.files = {
            ...pointer.files,
            [file]: 1
        };
    });

    return structure;
};

function findDirectory(pointer, subPath, nodes) {
    if (nodes.length === 0) {
        if (subPath) {
            pointer[subPath] = {};
            pointer = pointer[subPath];
        };
        return pointer;
    };

    let newPath = `${subPath}/${nodes[0]}`;
    nodes.shift();

    if (pointer[newPath]) {
        pointer = pointer[newPath];

        if (nodes.length >= 1) {
            pointer.directories = pointer.directories || {};
            pointer = pointer.directories;
        };

        newPath = '';
    };

    return findDirectory(pointer, newPath, nodes);
};

const structure = buildStructure(paths);
console.log(structure);
.as-console-wrapper { min-height: 100%!important; top: 0; }

说明:

这比我开始工作时想象的要复杂得多(也有趣得多)。一旦开始连接目录,操作的顺序就真的很重要了。
buildStructure开始,我们Map路径数组以捕获任何没有前导斜杠的条目,然后根据它们引用的目录数对其进行排序,这样我们就可以确定我们是从结构的顶部向下工作的。
将每个路径分隔成一个节点数组,然后弹出文件字符串,剩下的内容如下:

const nodes = ['doc', 'data'];
const file = 'main.js';

现在我们必须通过findDirectory来传递这些节点,以找到/创建文件的位置,变量pointer用于跟踪我们在structure对象中的位置,并且我们对指针所做的任何更改都将在结构中复制,因为它们共享引用等式。
findDirectory函数递归地处理每个节点,逐渐地将路径构建回它的全长。每当我们创建一个已经存在于structure目录中的路径时,我们就移动到它的内部,并重新开始构建路径,以尝试找到下一个路径。如果我们找不到它,那么我们就得到了一个全新的目录。目标是当我们退出函数时总是在正确的目录中结束--如果需要的话,在此过程沿着创建它。
为简化起见,假设我们只有两条路径要记录:

const paths = [
  'doc/data/main.js',
  'doc/data/dandu/sdasa.js'
];

对于第一条路径,findDirectory将执行三次遍历,每次遍历时将为其提供以下参数:

pointer = structure.directories > same > same

subPath = '' > '/doc' > '/doc/data'

nodes = ['doc', 'data'] > ['data'] > []

我们没有找到匹配的路径,所以当函数退出时,它会在structure.directories上创建该目录。现在,第二个路径将执行四次遍历:

pointer = 
  structure.directories > 
  same > 
  structure.directories./doc/data.directories > 
  same

subPath = '' > '/doc' > '' > '/dandu' 

nodes = ['doc', 'data', 'dandu'] > ['data', 'dandu'] > ['dandu'] > []

正如你所看到的,在第二次传递中,我们创建了字符串/doc/data,它确实存在于structure.directories中。所以我们进入它,因为有更多的节点要处理,我们在那里创建了一个新的目录对象,并输入它。如果没有更多的节点要处理,我们就知道我们已经到达了正确的层次,这是不必要的。从这里开始,这是一个简单的情况,重新建立路径,并重复这个过程。
一旦我们找到了正确的目录,我们就可以直接把文件放在指针上,它将被注册到结构中,一旦我们移动到下一个路径,指针将再次指向structure.directories
如果没有要处理的节点(仅文件名),则传递整个structures对象findDirectory,文件将进入对象的顶层。
希望这能很好地解释问题,并对你有用。我很喜欢在这方面的工作,并会很高兴对如何改进它的任何建议。

enxuqcxy

enxuqcxy2#

这个挑战真的不是那么微不足道的。然而,该方法与人们可以考虑的、易于阅读和理解的以及因此可维护的子任务一起工作,以达到OP的目标...

const pathList = [
  '/doc/data/main.js',
  '/doc/data/fame.js',
  '/doc/data/fame.es',
  '/doc/data/xl.js',
  '/doc/data/dandu/sdasa.js',

  '/mnt/data/la.js',
  '/mnt/la.es',

  'foo/bar/baz/biz/foo.js',
  'foo/bar/baz/biz/bar.js',
  '/foo/bar.js',
  '/foo/bar/baz/foo.js',
  'foo/bar/baz/bar.js',
  'foo/bar/baz/biz.js',

  '/foobar.js',
  'bazbiz.js',

  '/etc/further/owy.js',
  '/etc/further/abc.js',
  'etc/mma.js',
  '/etc/i/j/k/l/thing.js',
  '/etc/i/j/areallylongname.js'
];

function createSeparatedPathAndFileData(path) {
  const regXReplace = (/^\/+/);     // for replacing leading slash sequences in `path`.
  const regXSplit = (/\/([^/]*)$/); // for retrieving separated path- and file-name data.
  
  const filePartials = path.replace(regXReplace, '').split(regXSplit);
  if (filePartials.length === 1) {

    // assure at least an empty `pathName`.
    filePartials.unshift('');
  }
  const [pathName, fileName] = filePartials;

  return {
    pathName,
    fileName
  };
}

function compareByPathAndFileNameAndExtension(a, b) {
  const regXSplit = (/\.([^.]*)$/); // split for filename and captured file extension. 

  const [aName, aExtension] = a.fileName.split(regXSplit);
  const [bName, bExtension] = b.fileName.split(regXSplit);

  return (
       a.pathName.localeCompare(b.pathName)
    || aName.localeCompare(bName)
    || aExtension.localeCompare(bExtension)
  )
}

function getRightPathPartial(root, pathName) {
  let rightPartial = null; // null || string.

  const partials = pathName.split(`${ root }\/`);
  if ((partials.length === 2) && (partials[0] === '')) {

    rightPartial = partials[1];
  }
  return rightPartial; // null || string.
}

function getPathPartials(previousPartials, pathName) {
  let pathPartials = Array.from(previousPartials);
  let rightPartial;

  while (!rightPartial && pathPartials.pop() && (pathPartials.length >= 1)) {

    rightPartial = getRightPathPartial(pathPartials.join('\/'), pathName);
  }
  if (pathPartials.length === 0) {

    pathPartials.push(pathName);

  } else if (rightPartial) {

    pathPartials = pathPartials.concat(rightPartial);
  }
  return pathPartials;
}

function createPathPartialDataFromCurrentAndPreviousItem(fileData, idx, list) {
  const previousItem = list[idx - 1];
  if (previousItem) {

    const previousPathName = previousItem.pathName;
    const currentPathName = fileData.pathName;

    if (previousPathName === currentPathName) {

      // duplicate/copy path partials.
      fileData.pathPartials = [].concat(previousItem.pathPartials);

    } else {
      // a) try an instant match first ...

      const rightPartial = getRightPathPartial(previousPathName, currentPathName);
      if (rightPartial || (previousPathName === currentPathName)) {

        // concat path partials.
        fileData.pathPartials = previousItem.pathPartials.concat(rightPartial);

      } else {
        // ... before b) programmatically work back the root-path
        //               and look each time for another partial match.

        fileData.pathPartials = getPathPartials(
          previousItem.pathPartials,
          fileData.pathName
        );
      }
    }
  } else {
    // initialize partials by adding path name.
    fileData.pathPartials = [fileData.pathName];
  }
  return fileData;
}

function isUnassignedIndex(index) {
  return (Object.keys(index).length === 0);
}
function assignInitialIndexProperties(index) {
  return Object.assign(index, {
    directories: {},
    files: {}
  });
}

function assignFileDataToIndex(index, fileData) {
  if (isUnassignedIndex(index)) {
    assignInitialIndexProperties(index);
  }
  const { pathPartials, fileName } = fileData;

  let path, directories;
  let subIndex = index;

  while (path = pathPartials.shift()) {
    directories = subIndex.directories;

    if (path in directories) {

      subIndex = directories[path];
    } else {
      subIndex = directories[path] = assignInitialIndexProperties({});
    }
  }
  subIndex.files[fileName] = 1;

  return index;
}

console.log(
  'input :: path list ...',
  pathList
  //.map(createSeparatedPathAndFileData)
  //.sort(compareByPathAndFileNameAndExtension)
  //.map(createPathPartialDataFromCurrentAndPreviousItem)
  //.reduce(assignFileDataToIndex, {})
);
console.log(
  '1st :: create separated path and file data from the original list ...',
  pathList
    .map(createSeparatedPathAndFileData)
  //.sort(compareByPathAndFileNameAndExtension)
  //.map(createPathPartialDataFromCurrentAndPreviousItem)
  //.reduce(assignFileDataToIndex, {})
);
console.log(
  '2nd :: sort previous data by comparing path- and file-names and its extensions ...',
  pathList
    .map(createSeparatedPathAndFileData)
    .sort(compareByPathAndFileNameAndExtension)
  //.map(createPathPartialDataFromCurrentAndPreviousItem)
  //.reduce(assignFileDataToIndex, {})
);
console.log(
  '3rd :: create partial path data from current/previous items of the sorted list ...',
  pathList
    .map(createSeparatedPathAndFileData)
    .sort(compareByPathAndFileNameAndExtension)
    .map(createPathPartialDataFromCurrentAndPreviousItem)
  //.reduce(assignFileDataToIndex, {})
);
console.log(
  '4th :: output :: assemble final index from before created list of partial path data ...',
  pathList
    .map(createSeparatedPathAndFileData)
    .sort(compareByPathAndFileNameAndExtension)
    .map(createPathPartialDataFromCurrentAndPreviousItem)
    .reduce(assignFileDataToIndex, {})
);
.as-console-wrapper { min-height: 100%!important; top: 0; }

...从上面的日志中可以看到,这些任务是...

清理和(重新)构建/Map

1.通过移除前导斜线的可能序列来净化/规范化每条路径。
1.构建文件数据项的列表,对于每个项,该列表包含X1 M0 N1 X和对应路径项的X1 M1 N1 X,其为后者的净化/规格化形式。
例如,'/doc/data/dandu/sdasa.js'Map到...

{
  "pathName": "doc/data/dandu",
  "fileName": "sdasa.js"
}

分类

排序是通过比较两个当前Map的文件数据项的特性来完成的,方法如下...
1.按pathName进行比较
1.按fileName比较,无扩展名
1.按文件扩展名比较
因此原始文件列表看起来像这样...

[
  '/doc/data/main.js',
  '/doc/data/fame.js',
  '/doc/data/fame.es',
  '/doc/data/dandu/sdasa.js',
  'foo/bar/baz/biz/bar.js',
  '/foo/bar.js',
  'foo/bar/baz/biz.js',
  '/foobar.js'
]

......将被(清理/规范化Map并)排序为类似于......

[{
  "pathName": "",
  "fileName": "foobar.js"
}, {
  "pathName": "doc/data",
  "fileName": "fame.es"
}, {
  "pathName": "doc/data",
  "fileName": "fame.js"
}, {
  "pathName": "doc/data",
  "fileName": "main.js"
}, {
  "pathName": "doc/data/dandu",
  "fileName": "sdasa.js"
}, {
  "pathName": "foo",
  "fileName": "bar.js"
}, {
  "pathName": "foo/bar/baz",
  "fileName": "biz.js"
}, {
  "pathName": "foo/bar/baz/biz",
  "fileName": "bar.js"
}]

排序是最基本的,因为后面的算法依赖于整齐排序/对齐的pathName

拆分为路径片段并将其聚类

为了保持这个任务 * 死愚蠢 *,它是由一个Map过程来完成的,这个过程不仅使用当前处理的项,而且使用这个项的前一个兄弟(或前导)。
通过将当前pathName与之前的pathName拆分,将构建额外的pathPartials列表。
例如,'foo/bar/baz'将被拆分因此,'bar/baz'已经是一个聚集的部分路径,该路径将用于通过将该部分连接到其先前兄弟的pathPartials列表(此时为['foo'])来创建当前文件数据项的pathPartials列表。因此,前者的结果将是['foo', 'bar/baz']
对于'foo/bar/baz/biz'也是如此,它以前的路径名为'foo/bar/baz',以前的部分列表为['foo', 'bar/baz'],拆分结果为'biz',新的部分列表为['foo', 'bar/baz', 'biz']
上面的排序文件数据列表Map到这个新列表中...

[{
  "pathName": "",
  "fileName": "foobar.js",
  "pathPartials": [
    ""
  ]
}, {
  "pathName": "doc/data",
  "fileName": "fame.es",
  "pathPartials": [
    "doc/data"
  ]
}, {
  "pathName": "doc/data",
  "fileName": "fame.js",
  "pathPartials": [
    "doc/data"
  ]
}, {
  "pathName": "doc/data",
  "fileName": "main.js",
  "pathPartials": [
    "doc/data"
  ]
}, {
  "pathName": "doc/data/dandu",
  "fileName": "sdasa.js",
  "pathPartials": [
    "doc/data",
    "dandu"
  ]
}, {
  "pathName": "foo",
  "fileName": "bar.js",
  "pathPartials": [
    "foo"
  ]
}, {
  "pathName": "foo/bar/baz",
  "fileName": "biz.js",
  "pathPartials": [
    "foo",
    "bar/baz"
  ]
}, {
  "pathName": "foo/bar/baz/biz",
  "fileName": "bar.js",
  "pathPartials": [
    "foo",
    "bar/baz",
    "biz"
  ]
}]

汇编最终索引

最后一步是一个简单的列表缩减任务,因为此时已经完成了正确地拆分和聚类项的每个路径部分的最困难部分。

cpjpxq1n

cpjpxq1n3#

我的方法循环遍历每个文件(路径),并在遍历目录并最终遍历文件时递归地构建目录结构。
递归函数接受currentNode,以便知道 currently 有什么,这样我们就可以确保向结构中 add,而不是覆盖任何内容。
它还接受currentPath,currentPath会逐渐接受第一个元素,这样你就只处理剩下的元素,这样我们就知道什么时候完成了(只剩下一个元素)。
这可能不是最快的方法,但它似乎是一个整体失去了比任何其他答案更容易理解。

const files = [
  "doc/data/main.js",
  "doc/data/xl.js",
  "doc/data/dandu/sdasa.js",
  "mnt/data/la.js"
]

const toTree = (files) => {
  const diveIn = (currentNode, currentPath) => {
    const fileOrDirName = currentPath[0]
    
    // If there's only section left, it's a file
    if (currentPath.length == 1) {
      // Return a new Directory structure,
      // copying any existing directories or files
      // and adding in our new file
      return {
        ...currentNode,
        files: {
          ...currentNode.files,
          [fileOrDirName]: 1,
        }
      }
    // Otherwise we've got a directory and need to keep going
    } else {
      // If this directory already exists, use that
      // otherwise we create a brand new, empty Directory structure
      const newDirectory = currentNode.directories[fileOrDirName] || {
        directories: {},
        files: {},
      }
      // Return a new Directory structure,
      // copying any existing directories or files
      // and adding in our new directory
      return {
        ...currentNode,
        directories: {
          ...currentNode.directories,
          [fileOrDirName]: diveIn(newDirectory, currentPath.slice(1)),
        }
      }
    }
  }

  // Start with empty Directory structure
  let rootDirectory = {
    directories: {},
    files: {}
  }

  // Loop through each file (path) and recursively build
  // directories/files, each time returning a new Directory
  // structure for the next file (path) to use to do the same
  files.forEach((file) => {
    const splitPath = file.split("/")

    rootDirectory = diveIn(rootDirectory, splitPath)
  })
    
  return rootDirectory
}

console.log(toTree(files))
fjaof16o

fjaof16o4#

你可以用一个递归函数来完成它,记住这只是一个可能的解决方案,可能不是最好的。

const workPath = (path, structure) => {
    if(!structure) structure = {};

    const folders = path.split("/");
    const file = folders.pop();

    // Check weather any of the possible paths are available
    let breakPoint = null;
    let tempPath;
    for(let i = 0; i< folders.length; i++){
        const copy = [... folders];
        tempPath = copy.splice(0, i+1).join("/");

        if(structure[tempPath]){
            breakPoint = i;
            break;
        }        
    }

    // If there was no path available, we create it in the structure
    if(breakPoint == null){
        const foldersPath = folders.join("/");
        structure[foldersPath]= {};
        structure[foldersPath]["files"] = {};
        structure[foldersPath]["files"][file] = 1;
    }

    // If there is a path inside of the structure, that also is the entire path we are working with,
    // We just add the file to the path
    else if(breakPoint && breakPoint == folders.length - 1){
        structure[folders.join("/")]["files"][file] = 1;
    }
    
    // If we get here, it means that some part of the path is available but not the entire path
    // So, we just call the workPath function recursively with only one portion of the path
    else{
        const subPath = folders.splice(breakPoint + 1).join("/") + "/" + file;
        
        structure[tempPath]["directories"] = workPath(subPath, structure[tempPath]["directories"]);  
    }

    return structure;
}

const convert = array => {
    let structure = {};
    for(let path of array){
        structure = workPath(path, structure);
    }

    return structure;
}

“convert”函数需要所有路径的数组。
请记住,此解决方案不考虑其中没有文件的条目。

相关问题