linux 当我在C中下载相同的文件时删除文件

anauzrmj  于 2022-11-22  发布在  Linux
关注(0)|答案(2)|浏览(149)

当我们在网上下载一些相同的文件时,文件名变成(2),(3)...
example
我想删除这些文件与C。首先,我想找到文件和打印。我写了一些代码吹。但它不工作。

int main(){

        const char *path;
        DIR *dir;
        struct dirent* entry;
        if((path=getenv("HOME"))==NULL){//get HOME path
                path = getpwuid(getuid())->pw_dir;
        }
        const char *downloads = "/Downloads";
        strcat(path,downloads); //make ~/Downloads
        if(chdir(path)!=0){
                perror("chdir()");
                return -1;
        }
        if((dir=opendir(path))==NULL){ //open directory
                perror("open");
                return 1;
        }
        while((entry=readdir(dir))!=NULL){
                struct dirent *cmpentry;
                DIR *cmpdir;
                if((cmpdir=opendir(path))==NULL){
                        perror("opendir");
                        return -1;
                }

                while((cmpentry=readdir(cmpdir))!=NULL){
                        if((entry->d_name[0]!='.')&&strcmp(entry->d_name,cmpentry->d_name)!=0){
                                char *ptr=strstr(cmpentry->d_name,entry->d_name);
                                if(ptr!=NULL)
                                        printf("%s\n",cmpentry->d_name);
                          
                                }
                        }
                }
        }

我该怎么修?

guicsvcw

guicsvcw1#

一系列问题......

  1. path * 没有 * 足够的空间用于strcat,因此您有UB(未定义的行为)
    1.无需使用chdir
    1.没有closedir调用,因此对于一个大目录,您将用完文件描述符。
    1.不跳过...条目
    1.使用strcmpstrstr是 * 不 * 足够的。重复和/或未命中。
    1.重复打开同一个目录是缓慢/浪费的。最好读取目录 * 一次 * 并将条目保存在数组中。
    一些修正:
    1.撷取数组中的数据
    1.使用一个辅助结构体(例如下面的struct root),将文件名拆分为组成部分(例如foo(1).pdf--〉foo(1).pdf
    1.添加了长度和文件内容的比较
    下面是原始代码,并标注了错误:
int
main()
{

    const char *path;
    DIR *dir;
    struct dirent *entry;

    // get HOME path
    if ((path = getenv("HOME")) == NULL) {
        path = getpwuid(getuid())->pw_dir;
    }
    const char *downloads = "/Downloads";

    // make ~/Downloads
// NOTE/BUG: not enough space in path
// NOTE/BUG: path is a const
    strcat(path, downloads);
// NOTE/BUG: no need to chdir as opendir is enough
    if (chdir(path) != 0) {
        perror("chdir()");
        return -1;
    }

    // open directory
// NOTE/BUG: no closedir for this
    if ((dir = opendir(path)) == NULL) {
        perror("open");
        return 1;
    }

    while ((entry = readdir(dir)) != NULL) {
// NOTE/BUG: no check for "." or ".."
        struct dirent *cmpentry;
        DIR *cmpdir;

// NOTE/BUG: no closedir for this
        if ((cmpdir = opendir(path)) == NULL) {
            perror("opendir");
            return -1;
        }

        while ((cmpentry = readdir(cmpdir)) != NULL) {
// NOTE/BUG: strcmp sense is inverted
// NOTE/BUG: strcmp wrong
            if ((entry->d_name[0] != '.') &&
                strcmp(entry->d_name, cmpentry->d_name) != 0) {
                char *ptr = strstr(cmpentry->d_name, entry->d_name);

                if (ptr != NULL)
                    printf("%s\n", cmpentry->d_name);
            }
        }
    }
}

在上面的代码中,我使用了cpp条件语句来表示旧代码和新代码:

#if 0
// old code
#else
// new code
#endif

#if 1
// new code
#endif

注意:通过unifdef -k运行文件可以清除此问题
下面是重构后的代码,它带有注解:

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <pwd.h>
#include <string.h>
#include <dirent.h>
#include <ctype.h>
#include <sys/stat.h>

#ifdef DEBUG
#define dbgprt(_fmt...) \
    fprintf(stderr,_fmt)
#else
#define dbgprt(_fmt...) \
    do { } while (0)
#endif

// filename parsing control
struct root {
    struct dirent root_ent;             // raw directory entry
    off_t root_size;                    // file size
    int root_paren;                     // 1=has "(1)"
    int root_dup;                       // 1=is a duplicate
    char *root_suf;                     // suffix/entension (e.g. ".pdf")
    char root_core[256];                // root/core/base name
};

// rootshow -- show root struct contents
void
rootshow(const struct root *root,const char *who)
{

    dbgprt("rootshow: d_name='%s' root_dup=%d root_paren=%d root_core='%s' root_suf='%s' (from %s)\n",
        root->root_ent.d_name,
        root->root_dup,root->root_paren,
        root->root_core,root->root_suf,who);
}

// rootof -- split up filenames into components
void
rootof(struct root *root,struct dirent *ent,off_t size)
{
    char tail[256];

    memset(root,0,sizeof(*root));

    do {
        // get directory entry
        root->root_ent = *ent;

        // remember the file size
        root->root_size = size;

        // get the filename
        strcpy(tail,ent->d_name);

        // remember and strip the extension
        char *dot = strrchr(tail,'.');
        if (dot != NULL) {
            root->root_suf = &ent->d_name[dot - tail];
            *dot = 0;
        }

        // get root/base (e.g. "foo.pdf" --> "foo")
        strcpy(root->root_core,tail);

        // rightmost part of file must be "(1)"
        char *rparen = &tail[strlen(tail) - 1];
        if (*rparen != ')')
            break;

        // assume it's of the correct form
        root->root_paren = 1;

        // look for "(" and ensure it has some digits
        char *lparen = rparen - 1;
        for (;  lparen >= tail;  --lparen) {
            if (*lparen == '(')
                break;
            if (! isdigit(*lparen)) {
                root->root_paren = 0;
                break;
            }
        }

        // we got something like "X)" (i.e. _not_ "(1)")
        if (! root->root_paren)
            break;

        // assume it's _not_ a match
        root->root_paren = 0;

        // we got something like "()"
        if ((lparen + 1) == rparen)
            break;

        // we must have the "("
        if (lparen < tail)
            break;
        if (*lparen != '(')
            break;

        // strip "(1)"
        *lparen = 0;

        root->root_paren = 1;
        strcpy(root->root_core,tail);
    } while (0);

#if DEBUG
    rootshow(root,"rootof");
#endif
}

// fullpath -- get full path (e.g. dir/tail)
void
fullpath(char *path,const char *dir,const char *tail)
{

    strcpy(path,dir);
    strcat(path,"/");
    strcat(path,tail);
}

// dirload -- load up directory into list
struct root *
dirload(const char *path,int *countp)
{
    char file[1024];
    struct root *list = NULL;
    int count = 0;
    int cap = 0;

    // open directory
    DIR *dirp = opendir(path);
    if (dirp == NULL) {
        perror("open");
        exit(1);
    }

    while (1) {
        struct dirent *ent = readdir(dirp);
        if (ent == NULL)
            break;

        // skip over "." and ".."
        const char *tail = ent->d_name;
        if (tail[0] == '.') {
            if (tail[1] == 0)
                continue;
            if ((tail[1] == '.') && (tail[2] == 0))
                continue;
        }

        // optional -- only ordinary files
#if 1
        if (ent->d_type != DT_REG)
            continue;
#endif

        // enlarge array
        if (count >= cap) {
            cap += 10;
            list = realloc(list,sizeof(*list) * cap);
            if (list == NULL) {
                perror("realloc");
                exit(1);
            }
        }

        // get file size
        struct stat st;
        fullpath(file,path,ent->d_name);
        if (stat(file,&st) < 0) {
            perror(file);
            exit(1);
        }

        // parse the filename
        rootof(&list[count],ent,st.st_size);
        ++count;
    }

    closedir(dirp);

    // return count to caller
    *countp = count;

    return list;
}

// filematch -- compare the file contents
// RETURNS: 1=match, 0=mismatch
int
filematch(const char *dir,const struct root *lhs,const struct root *rhs)
{
    int fdlhs;
    char lhsfile[1024];
    char lhsbuf[4096];

    int fdrhs;
    char rhsfile[1024];
    char rhsbuf[4096];

    int match = 0;

    do {
        // file sizes must match
        if (lhs->root_size != rhs->root_size)
            break;

        // open the LHS file
        fullpath(lhsfile,dir,lhs->root_ent.d_name);
        fdlhs = open(lhsfile,O_RDONLY);
        if (fdlhs < 0) {
            perror(lhsfile);
            exit(1);
        }

        // open the RHS file
        fullpath(rhsfile,dir,rhs->root_ent.d_name);
        fdrhs = open(rhsfile,O_RDONLY);
        if (fdrhs < 0) {
            perror(rhsfile);
            exit(1);
        }

        match = 1;

        off_t resid = lhs->root_size;
        ssize_t rlen;
        ssize_t xlen;
        for (;  resid > 0;  resid -= rlen) {
            if (resid > sizeof(lhsbuf))
                rlen = sizeof(lhsbuf);
            else
                rlen = resid;

            // get LHS chunk
            xlen = read(fdlhs,lhsbuf,rlen);
            if (xlen != rlen) {
                perror(lhsfile);
                exit(1);
            }

            // get RHS chunk
            xlen = read(fdrhs,rhsbuf,rlen);
            if (xlen != rlen) {
                perror(rhsfile);
                exit(1);
            }

            // they must match
            if (memcmp(lhsbuf,rhsbuf,rlen) != 0) {
                match = 0;
                break;
            }
        }

        close(fdlhs);
        close(fdrhs);
    } while (0);

    return match;
}

int
main(int argc,char **argv)
{
    char path[1024];

    // skip over program name
    --argc;
    ++argv;

    // find the directory
    do {
        if (argc > 0) {
            strcpy(path,*argv);
            break;
        }

        // get HOME path
        const char *home = getenv("HOME");
        if (home == NULL)
            home = getpwuid(getuid())->pw_dir;

        // make ~/Downloads
        fullpath(path,home,"Downloads");
    } while (0);

#if DEBUG
    setlinebuf(stdout);
    setlinebuf(stderr);
#endif

    int count = 0;
    struct root *list = dirload(path,&count);

    for (int lhsidx = 0;  lhsidx < count;  ++lhsidx) {
        struct root *lhs = &list[lhsidx];

        // must _not_ have "(1)"
        if (lhs->root_paren)
            continue;

        rootshow(lhs,"LHS");

        for (int rhsidx = 0;  rhsidx < count;  ++rhsidx) {
            // skip over the same entry
            if (rhsidx == lhsidx)
                continue;

            struct root *rhs = &list[rhsidx];

            rootshow(rhs,"RHS");

            // file types must match
            if (rhs->root_ent.d_type != lhs->root_ent.d_type)
                continue;

            // must have "(1)"
            if (! rhs->root_paren)
                continue;

            // suffix must match
            // both entries must have [or _not_ have] a suffix
            if (lhs->root_suf != NULL) {
                if (rhs->root_suf == NULL)
                    continue;
                if (strcmp(lhs->root_suf,rhs->root_suf) != 0)
                    continue;
            }
            else {
                if (rhs->root_suf != NULL)
                    continue;
            }

            // core must match
            if (strcmp(lhs->root_core,rhs->root_core) != 0)
                continue;

            // contents must match
            if (! filematch(path,lhs,rhs))
                continue;

            printf("%s is dup of %s\n",
                rhs->root_ent.d_name,lhs->root_ent.d_name);

            // mark it as a removable duplicate
            rhs->root_dup = 1;
        }
    }

    return 0;
}

下面是一个测试perl脚本:

#!/usr/bin/perl
# dotest -- test program

master(@ARGV);
exit(0);

# master -- master control
sub master
{
    my(@argv) = @_;

    $xfile = shift(@argv);
    $xfile //= "duptest";
    $pwd = $ENV{PWD};
    $xfile = "$pwd/$xfile";

    $tstdir = "/tmp/testdir";

    dotest("abc","xyz");

    dotest("abc.pdf","jkl");
    dotest("abc(1).pdf","jkl");

    dotest("abc(2)","xyz");
    dotest("abc(3)","xx");
    dotest("abc(3)","xzy");

    dotest("def","blah");
    dotest("def(3)","blah");
    dotest("def.pdf","blah");
}

sub dotest
{
    my($file,$body) = @_;

    printf("\n");
    printf("%s\n","-" x 80);

    system("rm -fr $tstdir");
    system("mkdir -p $tstdir");

    push(@allfiles,[$file,$body]);

    ###@rfiles = shuffle(@allfiles);
    @rfiles = @allfiles;

    foreach $pair (@rfiles) {
        ($tail,$body) = @$pair;
        printf("dotest: FILE %s '%s'\n",$tail,$body);

        $file = sprintf("%s/%s",$tstdir,$tail);

        open($xfdst,">$file") or
            die("dotest: unable to open '$file' -- $!\n");
        print($xfdst $body);
        close($xfdst);
    }

    @fsort = sort(@allfiles);

    @xfiles = (`$xfile $tstdir`);
    $code = $? >> 8;
    die("dotest: program aborted\n")
        if ($code);

    foreach $tail (@xfiles) {
        chomp($tail);
        printf("dotest: XDUP %s\n",$tail);
    }
}

下面是测试程序的输出:

--------------------------------------------------------------------------------
dotest: FILE abc 'xyz'

--------------------------------------------------------------------------------
dotest: FILE abc 'xyz'
dotest: FILE abc.pdf 'jkl'

--------------------------------------------------------------------------------
dotest: FILE abc 'xyz'
dotest: FILE abc.pdf 'jkl'
dotest: FILE abc(1).pdf 'jkl'
dotest: XDUP abc(1).pdf is dup of abc.pdf

--------------------------------------------------------------------------------
dotest: FILE abc 'xyz'
dotest: FILE abc.pdf 'jkl'
dotest: FILE abc(1).pdf 'jkl'
dotest: FILE abc(2) 'xyz'
dotest: XDUP abc(1).pdf is dup of abc.pdf
dotest: XDUP abc(2) is dup of abc

--------------------------------------------------------------------------------
dotest: FILE abc 'xyz'
dotest: FILE abc.pdf 'jkl'
dotest: FILE abc(1).pdf 'jkl'
dotest: FILE abc(2) 'xyz'
dotest: FILE abc(3) 'xx'
dotest: XDUP abc(1).pdf is dup of abc.pdf
dotest: XDUP abc(2) is dup of abc

--------------------------------------------------------------------------------
dotest: FILE abc 'xyz'
dotest: FILE abc.pdf 'jkl'
dotest: FILE abc(1).pdf 'jkl'
dotest: FILE abc(2) 'xyz'
dotest: FILE abc(3) 'xx'
dotest: FILE abc(3) 'xzy'
dotest: XDUP abc(1).pdf is dup of abc.pdf
dotest: XDUP abc(2) is dup of abc

--------------------------------------------------------------------------------
dotest: FILE abc 'xyz'
dotest: FILE abc.pdf 'jkl'
dotest: FILE abc(1).pdf 'jkl'
dotest: FILE abc(2) 'xyz'
dotest: FILE abc(3) 'xx'
dotest: FILE abc(3) 'xzy'
dotest: FILE def 'blah'
dotest: XDUP abc(1).pdf is dup of abc.pdf
dotest: XDUP abc(2) is dup of abc

--------------------------------------------------------------------------------
dotest: FILE abc 'xyz'
dotest: FILE abc.pdf 'jkl'
dotest: FILE abc(1).pdf 'jkl'
dotest: FILE abc(2) 'xyz'
dotest: FILE abc(3) 'xx'
dotest: FILE abc(3) 'xzy'
dotest: FILE def 'blah'
dotest: FILE def(3) 'blah'
dotest: XDUP def(3) is dup of def
dotest: XDUP abc(1).pdf is dup of abc.pdf
dotest: XDUP abc(2) is dup of abc

--------------------------------------------------------------------------------
dotest: FILE abc 'xyz'
dotest: FILE abc.pdf 'jkl'
dotest: FILE abc(1).pdf 'jkl'
dotest: FILE abc(2) 'xyz'
dotest: FILE abc(3) 'xx'
dotest: FILE abc(3) 'xzy'
dotest: FILE def 'blah'
dotest: FILE def(3) 'blah'
dotest: FILE def.pdf 'blah'
dotest: XDUP def(3) is dup of def
dotest: XDUP abc(1).pdf is dup of abc.pdf
dotest: XDUP abc(2) is dup of abc
ikfrs5lh

ikfrs5lh2#

readdir()不像ls那样读取文件,而是按照它在目录中的位置顺序读取文件。你的程序有一个工作变体,但是它工作错误,不是你想要的方式。请自行更正它。

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <sys/types.h>
#include <dirent.h>
#include <errno.h>
#include <unistd.h>
#include <pwd.h>

int main(){
int m;
char path[256],downloads[256],substr[256],buf[160],*ptr;
DIR *dir,*cmpdir;
struct dirent entry,cmpentry,*pe;

 strcpy(path,getenv("HOME"));
 if(path==NULL){//get HOME path
  strcpy(path,getpwuid(getuid())->pw_dir);
 }
        strcpy(downloads,"/Downloads");
// strcpy(downloads,"/tmp/down");
 strcat(path,downloads);errno=0; //make ~/Downloads
 if(chdir(path)!=0){
    m = errno;strcpy(buf,strerror(m));fprintf(stdout,"%d %s\n",m,buf);
  return -1;
 }
 errno=0;
 if((dir=opendir(path))==NULL){ //open directory
    m=errno;strcpy(buf,strerror(m));fprintf(stdout,"%d %s\n",m,buf);
    return 1;
 }
 while((pe=readdir(dir))!=NULL){entry=*pe;
    errno=0;if((cmpdir=opendir(path))==NULL){m=errno;
     strcpy(buf,strerror(m));fprintf(stdout,"%d %s\n",m,buf);
     return -1;
    }
 }
 while((pe=readdir(cmpdir))!=NULL){cmpentry=*pe;
 if((entry.d_name[0]!='.')&&(strncmp(entry.d_name,"..",2)!=0)
    &&(strcmp(entry.d_name,cmpentry.d_name)!=0)){
fprintf(stdout,"%s %s\n",entry.d_name,cmpentry.d_name);fflush(stdout);
     ptr=strstr(cmpentry.d_name,entry.d_name);
     if(ptr!=NULL){strcpy(substr,ptr);
         fprintf(stdout,"%s\n",cmpentry.d_name);
    }
   }
  }
return 0;}

相关问题