LCOV - code coverage report
Current view: top level - src/server/protocol1 - bedup.c (source / functions) Hit Total Coverage
Test: burp-coverage-clean.info Lines: 232 453 51.2 %
Date: 2016-11-02 Functions: 14 22 63.6 %

          Line data    Source code
       1             : #include "../../burp.h"
       2             : #include "../../alloc.h"
       3             : #include "../../conf.h"
       4             : #include "../../conffile.h"
       5             : #include "../../handy.h"
       6             : #include "../../fsops.h"
       7             : #include "../../fzp.h"
       8             : #include "../../lock.h"
       9             : #include "../../log.h"
      10             : #include "../../prepend.h"
      11             : #include "../../strlist.h"
      12             : #include "bedup.h"
      13             : 
      14             : #include <uthash.h>
      15             : 
      16             : #define LOCKFILE_NAME           "lockfile"
      17             : #define BEDUP_LOCKFILE_NAME     "lockfile.bedup"
      18             : 
      19             : #define DEF_MAX_LINKS           10000
      20             : 
      21             : static int makelinks=0;
      22             : static int deletedups=0;
      23             : 
      24             : static uint64_t savedbytes=0;
      25             : static uint64_t count=0;
      26             : static int ccount=0;
      27             : 
      28             : static struct lock *locklist=NULL;
      29             : 
      30             : static int verbose=0;
      31             : 
      32             : typedef struct file file_t;
      33             : 
      34             : struct file
      35             : {
      36             :         char *path;
      37             :         dev_t dev;
      38             :         ino_t ino;
      39             :         nlink_t nlink;
      40             :         uint64_t full_cksum;
      41             :         uint64_t part_cksum;
      42             :         file_t *next;
      43             : };
      44             : 
      45             : struct mystruct
      46             : {
      47             :         off_t st_size;
      48             :         file_t *files;
      49             :         UT_hash_handle hh;
      50             : };
      51             : 
      52             : struct mystruct *myfiles=NULL;
      53             : 
      54           4 : static struct mystruct *find_key(off_t st_size)
      55             : {
      56             :         struct mystruct *s;
      57             : 
      58           4 :         HASH_FIND_INT(myfiles, &st_size, s);
      59           4 :         return s;
      60             : }
      61             : 
      62           2 : static int add_file(struct mystruct *s, struct file *f)
      63             : {
      64             :         struct file *newfile;
      65           2 :         if(!(newfile=(struct file *)malloc_w(sizeof(struct file), __func__)))
      66             :                 return -1;
      67             :         memcpy(newfile, f, sizeof(struct file));
      68           2 :         f->path=NULL;
      69           2 :         newfile->next=s->files;
      70           2 :         s->files=newfile;
      71             :         return 0;
      72             : }
      73             : 
      74           2 : static int add_key(off_t st_size, struct file *f)
      75             : {
      76             :         struct mystruct *s;
      77             : 
      78           2 :         if(!(s=(struct mystruct *)malloc_w(sizeof(struct mystruct), __func__)))
      79             :                 return -1;
      80           2 :         s->st_size=st_size;
      81           2 :         s->files=NULL;
      82           2 :         if(add_file(s, f)) return -1;
      83             : //printf("HASH ADD %d\n", st_size);
      84           2 :         HASH_ADD_INT(myfiles, st_size, s);
      85             :         return 0;
      86             : }
      87             : 
      88             : static void file_free_content(struct file *file)
      89             : {
      90           2 :         if(!file) return;
      91           2 :         free_w(&file->path);
      92             : }
      93             : 
      94           2 : static void file_free(struct file **file)
      95             : {
      96           4 :         if(!file || !*file) return;
      97           2 :         file_free_content(*file);
      98           2 :         free_v((void **)file);
      99             : }
     100             : 
     101           2 : static void files_free(struct file **files)
     102             : {
     103             :         struct file *f;
     104             :         struct file *fhead;
     105           2 :         if(!files || !*files) return;
     106             :         fhead=*files;
     107           4 :         while(fhead)
     108             :         {
     109           2 :                 f=fhead;
     110           2 :                 fhead=fhead->next;
     111           2 :                 file_free(&f);
     112             :         }
     113             : }
     114             : 
     115             : static void mystruct_free_content(struct mystruct *mystruct)
     116             : {
     117           2 :         if(!mystruct) return;
     118           2 :         files_free(&mystruct->files);
     119             : }
     120             : 
     121           2 : static void mystruct_free(struct mystruct **mystruct)
     122             : {
     123           4 :         if(!mystruct || !*mystruct) return;
     124           2 :         mystruct_free_content(*mystruct);
     125           2 :         free_v((void **)mystruct);
     126             : }
     127             : 
     128           2 : static void mystruct_delete_all(void)
     129             : {
     130             :         struct mystruct *tmp;
     131             :         struct mystruct *mystruct;
     132             : 
     133           4 :         HASH_ITER(hh, myfiles, mystruct, tmp)
     134             :         {
     135           2 :                 HASH_DEL(myfiles, mystruct);
     136           2 :                 mystruct_free(&mystruct);
     137             :         }
     138           2 :         myfiles=NULL;
     139           2 : }
     140             : 
     141             : #define FULL_CHUNK      4096
     142             : 
     143           2 : static int full_match(struct file *o, struct file *n,
     144             :         struct fzp **ofp, struct fzp **nfp)
     145             : {
     146             :         size_t ogot;
     147             :         size_t ngot;
     148           2 :         unsigned int i=0;
     149             :         static char obuf[FULL_CHUNK];
     150             :         static char nbuf[FULL_CHUNK];
     151             : 
     152           2 :         if(*ofp) fzp_seek(*ofp, 0, SEEK_SET);
     153           0 :         else if(!(*ofp=fzp_open(o->path, "rb")))
     154             :         {
     155             :                 // Blank this entry so that it can be ignored from
     156             :                 // now on.
     157           0 :                 free_w(&o->path);
     158             :                 return 0;
     159             :         }
     160             : 
     161           2 :         if(*nfp) fzp_seek(*nfp, 0, SEEK_SET);
     162           0 :         else if(!(*nfp=fzp_open(n->path, "rb"))) return 0;
     163             : 
     164             :         while(1)
     165             :         {
     166           2 :                 ogot=fzp_read(*ofp, obuf, FULL_CHUNK);
     167           2 :                 ngot=fzp_read(*nfp, nbuf, FULL_CHUNK);
     168           2 :                 if(ogot!=ngot) return 0;
     169          20 :                 for(i=0; i<ogot; i++)
     170          20 :                         if(obuf[i]!=nbuf[i]) return 0;
     171           2 :                 if(ogot<FULL_CHUNK) break;
     172             :         }
     173             : 
     174             :         return 1;
     175             : }
     176             : 
     177             : #define PART_CHUNK      1024
     178             : 
     179           4 : static int get_part_cksum(struct file *f, struct fzp **fzp)
     180             : {
     181             :         MD5_CTX md5;
     182           4 :         int got=0;
     183             :         static char buf[PART_CHUNK];
     184             :         unsigned char checksum[MD5_DIGEST_LENGTH+1];
     185             : 
     186           4 :         if(*fzp) fzp_seek(*fzp, 0, SEEK_SET);
     187           4 :         else if(!(*fzp=fzp_open(f->path, "rb")))
     188             :         {
     189           0 :                 f->part_cksum=0;
     190           0 :                 return 0;
     191             :         }
     192             : 
     193           4 :         if(!MD5_Init(&md5))
     194             :         {
     195           0 :                 logp("MD5_Init() failed\n");
     196           0 :                 return -1;
     197             :         }
     198             : 
     199           4 :         got=fzp_read(*fzp, buf, PART_CHUNK);
     200             : 
     201           4 :         if(!MD5_Update(&md5, buf, got))
     202             :         {
     203           0 :                 logp("MD5_Update() failed\n");
     204           0 :                 return -1;
     205             :         }
     206             : 
     207           4 :         if(!MD5_Final(checksum, &md5))
     208             :         {
     209           0 :                 logp("MD5_Final() failed\n");
     210           0 :                 return -1;
     211             :         }
     212             : 
     213           4 :         memcpy(&(f->part_cksum), checksum, sizeof(unsigned));
     214             : 
     215             :         // Try for a bit of efficiency - no need to calculate the full checksum
     216             :         // again if we already read the whole file.
     217           4 :         if(got<PART_CHUNK) f->full_cksum=f->part_cksum;
     218             : 
     219             :         return 0;
     220             : }
     221             : 
     222           0 : static int get_full_cksum(struct file *f, struct fzp **fzp)
     223             : {
     224           0 :         size_t s=0;
     225             :         MD5_CTX md5;
     226             :         static char buf[FULL_CHUNK];
     227             :         unsigned char checksum[MD5_DIGEST_LENGTH+1];
     228             : 
     229           0 :         if(*fzp) fzp_seek(*fzp, 0, SEEK_SET);
     230           0 :         else if(!(*fzp=fzp_open(f->path, "rb")))
     231             :         {
     232           0 :                 f->full_cksum=0;
     233           0 :                 return 0;
     234             :         }
     235             : 
     236           0 :         if(!MD5_Init(&md5))
     237             :         {
     238           0 :                 logp("MD5_Init() failed\n");
     239           0 :                 return -1;
     240             :         }
     241             : 
     242           0 :         while((s=fzp_read(*fzp, buf, FULL_CHUNK))>0)
     243             :         {
     244           0 :                 if(!MD5_Update(&md5, buf, s))
     245             :                 {
     246           0 :                         logp("MD5_Update() failed\n");
     247           0 :                         return -1;
     248             :                 }
     249           0 :                 if(s<FULL_CHUNK) break;
     250             :         }
     251             : 
     252           0 :         if(!MD5_Final(checksum, &md5))
     253             :         {
     254           0 :                 logp("MD5_Final() failed\n");
     255           0 :                 return -1;
     256             :         }
     257             : 
     258           0 :         memcpy(&(f->full_cksum), checksum, sizeof(unsigned));
     259             : 
     260           0 :         return 0;
     261             : }
     262             : 
     263             : /* Make it atomic by linking to a temporary file, then moving it into place. */
     264           1 : static int do_hardlink(struct file *o, struct file *n, const char *ext)
     265             : {
     266           1 :         int ret=-1;
     267           1 :         char *tmppath=NULL;
     268           1 :         if(!(tmppath=prepend(o->path, ext)))
     269             :         {
     270           0 :                 log_out_of_memory(__func__);
     271             :                 goto end;
     272             :         }
     273           1 :         if(link(n->path, tmppath))
     274             :         {
     275           0 :                 logp("Could not hardlink %s to %s: %s\n", tmppath, n->path,
     276           0 :                         strerror(errno));
     277             :                 goto end;
     278             :         }
     279           1 :         if((ret=do_rename(tmppath, o->path)))
     280             :                 goto end;
     281           1 :         ret=0;
     282             : end:
     283           1 :         free_w(&tmppath);
     284           1 :         return ret;
     285             : }
     286             : 
     287             : static void reset_old_file(struct file *oldfile, struct file *newfile,
     288             :         struct stat *info)
     289             : {
     290             :         //printf("reset %s with %s %d\n", oldfile->path, newfile->path,
     291             :         //      info->st_nlink);
     292           0 :         oldfile->nlink=info->st_nlink;
     293           0 :         free_w(&oldfile->path);
     294           0 :         oldfile->path=newfile->path;
     295           0 :         newfile->path=NULL;
     296             : }
     297             : 
     298           2 : static int check_files(struct mystruct *find, struct file *newfile,
     299           0 :         struct stat *info, const char *ext, unsigned int maxlinks)
     300             : {
     301           2 :         int found=0;
     302           2 :         struct fzp *nfp=NULL;
     303           2 :         struct fzp *ofp=NULL;
     304           2 :         struct file *f=NULL;
     305             : 
     306           2 :         for(f=find->files; f; f=f->next)
     307             :         {
     308             : //printf("  against: '%s'\n", f->path);
     309           2 :                 if(!f->path)
     310             :                 {
     311             :                         // If the full_match() function fails to open oldfile
     312             :                         // (which could happen if burp deleted some old
     313             :                         // directories), it will free path and set it to NULL.
     314             :                         // Skip entries like this.
     315           0 :                         continue;
     316             :                 }
     317           2 :                 if(newfile->dev!=f->dev)
     318             :                 {
     319             :                         // Different device.
     320           0 :                         continue;
     321             :                 }
     322           2 :                 if(newfile->ino==f->ino)
     323             :                 {
     324             :                         // Same device, same inode, therefore these two files
     325             :                         // are hardlinked to each other already.
     326             :                         found++;
     327             :                         break;
     328             :                 }
     329           2 :                 if((!newfile->part_cksum && get_part_cksum(newfile, &nfp))
     330           2 :                   || (!f->part_cksum && get_part_cksum(f, &ofp)))
     331             :                 {
     332             :                         // Some error with md5sums Give up.
     333             :                         return -1;
     334             :                 }
     335           2 :                 if(newfile->part_cksum!=f->part_cksum)
     336             :                 {
     337           0 :                         fzp_close(&ofp);
     338           0 :                         continue;
     339             :                 }
     340             :                 //printf("  %s, %s\n", find->files->path, newfile->path);
     341             :                 //printf("  part cksum matched\n");
     342             : 
     343           2 :                 if((!newfile->full_cksum && get_full_cksum(newfile, &nfp))
     344           2 :                   || (!f->full_cksum && get_full_cksum(f, &ofp)))
     345             :                 {
     346             :                         // Some error with md5sums Give up.
     347             :                         return -1;
     348             :                 }
     349           2 :                 if(newfile->full_cksum!=f->full_cksum)
     350             :                 {
     351           0 :                         fzp_close(&ofp);
     352           0 :                         continue;
     353             :                 }
     354             : 
     355             :                 //printf("  full cksum matched\n");
     356           2 :                 if(!full_match(newfile, f, &nfp, &ofp))
     357             :                 {
     358           0 :                         fzp_close(&ofp);
     359           0 :                         continue;
     360             :                 }
     361             :                 //printf("  full match\n");
     362             :                 //printf("%s, %s\n", find->files->path, newfile->path);
     363             : 
     364             :                 // If there are already enough links to this file, replace
     365             :                 // our memory of it with the new file so that files later on
     366             :                 // can link to the new one. 
     367           2 :                 if(f->nlink>=maxlinks)
     368             :                 {
     369             :                         // Just need to reset the path name and the number
     370             :                         // of links, and pretend that it was found otherwise
     371             :                         // NULL newfile will get added to the memory.
     372           0 :                         reset_old_file(f, newfile, info);
     373           0 :                         found++;
     374           0 :                         break;
     375             :                 }
     376             : 
     377           2 :                 found++;
     378           2 :                 count++;
     379             : 
     380           2 :                 if(verbose) printf("%s\n", newfile->path);
     381             : 
     382             :                 // Now hardlink it.
     383           2 :                 if(makelinks)
     384             :                 {
     385           1 :                         switch(do_hardlink(newfile, f, ext))
     386             :                         {
     387             :                                 case 0:
     388           1 :                                         f->nlink++;
     389             :                                         // Only count bytes as saved if we
     390             :                                         // removed the last link.
     391           1 :                                         if(newfile->nlink==1)
     392           1 :                                                 savedbytes+=info->st_size;
     393             :                                         break;
     394             :                                 case -1:
     395             :                                         // On error, replace the memory of the
     396             :                                         // old file with the one that we just
     397             :                                         // found. It might work better when
     398             :                                         // someone later tries to link to the
     399             :                                         // new one instead of the old one.
     400           0 :                                         reset_old_file(f, newfile, info);
     401           0 :                                         count--;
     402           0 :                                         break;
     403             :                                 default:
     404             :                                         // Abandon all hope.
     405             :                                         // This could happen if renaming the
     406             :                                         // hardlink failed in such a way that
     407             :                                         // the target file was unlinked without
     408             :                                         // being replaced - ie, if the max
     409             :                                         // number of hardlinks is being hit.
     410             :                                         return -1;
     411             :                         }
     412             :                 }
     413           1 :                 else if(deletedups)
     414             :                 {
     415           0 :                         if(unlink(newfile->path))
     416             :                         {
     417           0 :                                 logp("Could not delete %s: %s\n",
     418           0 :                                         newfile->path, strerror(errno));
     419             :                         }
     420             :                         else
     421             :                         {
     422             :                                 // Only count bytes as saved if we removed the
     423             :                                 // last link.
     424           0 :                                 if(newfile->nlink==1)
     425           0 :                                         savedbytes+=info->st_size;
     426             :                         }
     427             :                 }
     428             :                 else
     429             :                 {
     430             :                         // To be able to tell how many bytes
     431             :                         // are saveable.
     432           1 :                         savedbytes+=info->st_size;
     433             :                 }
     434             : 
     435             :                 break;
     436             :         }
     437           2 :         fzp_close(&nfp);
     438           2 :         fzp_close(&ofp);
     439             : 
     440           2 :         if(found)
     441             :         {
     442           2 :                 free_w(&newfile->path);
     443           2 :                 return 0;
     444             :         }
     445             : 
     446           0 :         if(add_file(find, newfile)) return -1;
     447             : 
     448           0 :         return 0;
     449             : }
     450             : 
     451           0 : static int looks_like_protocol1(const char *basedir)
     452             : {
     453           0 :         int ret=-1;
     454           0 :         char *tmp=NULL;
     455           0 :         if(!(tmp=prepend_s(basedir, "current")))
     456             :         {
     457           0 :                 log_out_of_memory(__func__);
     458           0 :                 goto end;
     459             :         }
     460             :         // If there is a 'current' symlink here, we think it looks like a
     461             :         // protocol 1 backup.
     462           0 :         if(is_lnk(tmp)>0)
     463             :         {
     464             :                 ret=1;
     465             :                 goto end;
     466             :         }
     467           0 :         ret=0;
     468             : end:
     469           0 :         free_w(&tmp);
     470           0 :         return ret;
     471             : }
     472             : 
     473           0 : static int get_link(const char *basedir, const char *lnk, char real[], size_t r)
     474             : {
     475           0 :         readlink_w_in_dir(basedir, lnk, real, r);
     476             :         // Strip any trailing slash.
     477           0 :         if(real[strlen(real)-1]=='/')
     478           0 :                 real[strlen(real)-1]='\0';
     479           0 :         return 0;
     480             : }
     481             : 
     482           0 : static int level_exclusion(int level, const char *fname,
     483             :         const char *working, const char *finishing)
     484             : {
     485           0 :         if(level==0)
     486             :         {
     487             :                 /* Be careful not to try to dedup the lockfiles.
     488             :                    The lock actually gets lost if you open one to do a
     489             :                    checksum
     490             :                    and then close it. This caused me major headaches to
     491             :                    figure out. */
     492           0 :                 if(!strcmp(fname, LOCKFILE_NAME)
     493           0 :                   || !strcmp(fname, BEDUP_LOCKFILE_NAME))
     494             :                         return 1;
     495             : 
     496             :                 /* Skip places where backups are going on. */
     497           0 :                 if(!strcmp(fname, working)
     498           0 :                   || !strcmp(fname, finishing))
     499             :                         return 1;
     500             : 
     501           0 :                 if(!strcmp(fname, "deleteme"))
     502             :                         return 1;
     503             :         }
     504           0 :         else if(level==1)
     505             :         {
     506             :                 // Do not dedup stuff that might be appended to later.
     507           0 :                 if(!strncmp(fname, "log", strlen("log"))
     508           0 :                   || !strncmp(fname, "verifylog", strlen("verifylog"))
     509           0 :                   || !strncmp(fname, "restorelog", strlen("restorelog")))
     510             :                         return 1;
     511             :         }
     512           0 :         return 0;
     513             : }
     514             : 
     515             : // Return 0 for directory processed, -1 for error, 1 for not processed.
     516           2 : static int process_dir(const char *oldpath, const char *newpath,
     517             :         const char *ext, unsigned int maxlinks, int burp_mode, int level)
     518             : {
     519           2 :         int ret=-1;
     520           2 :         DIR *dirp=NULL;
     521           2 :         char *path=NULL;
     522             :         struct stat info;
     523           2 :         struct dirent *dirinfo=NULL;
     524             :         struct file newfile;
     525           2 :         struct mystruct *find=NULL;
     526             :         static char working[256]="";
     527             :         static char finishing[256]="";
     528             : 
     529           2 :         newfile.path=NULL;
     530             : 
     531           2 :         if(!(path=prepend_s(oldpath, newpath))) goto end;
     532             : 
     533           2 :         if(burp_mode && level==0)
     534             :         {
     535           0 :                 if(get_link(path, "working", working, sizeof(working))
     536           0 :                   || get_link(path, "finishing", finishing, sizeof(finishing)))
     537             :                         goto end;
     538           0 :                 if(!looks_like_protocol1(path))
     539             :                 {
     540           0 :                         logp("%s does not look like a protocol 1 storage directory - skipping\n", path);
     541           0 :                         ret=1;
     542           0 :                         goto end;
     543             :                 }
     544             :         }
     545             : 
     546           2 :         if(!(dirp=opendir(path)))
     547             :         {
     548           0 :                 logp("Could not opendir '%s': %s\n", path, strerror(errno));
     549           0 :                 ret=1;
     550           0 :                 goto end;
     551             :         }
     552          10 :         while((dirinfo=readdir(dirp)))
     553             :         {
     554           8 :                 if(!strcmp(dirinfo->d_name, ".")
     555           6 :                   || !strcmp(dirinfo->d_name, ".."))
     556           4 :                         continue;
     557             : 
     558             :                 //printf("try %s\n", dirinfo->d_name);
     559             : 
     560           4 :                 if(burp_mode
     561           0 :                   && level_exclusion(level, dirinfo->d_name,
     562             :                         working, finishing))
     563           0 :                                 continue;
     564             : 
     565           4 :                 free_w(&newfile.path);
     566           4 :                 if(!(newfile.path=prepend_s(path, dirinfo->d_name)))
     567             :                         goto end;
     568             : 
     569           8 :                 if(lstat(newfile.path, &info))
     570           0 :                         continue;
     571             : 
     572           4 :                 if(S_ISDIR(info.st_mode))
     573             :                 {
     574           0 :                         if(process_dir(path, dirinfo->d_name, ext, maxlinks,                                 burp_mode, level+1))
     575             :                                         goto end;
     576           0 :                         continue;
     577             :                 }
     578           4 :                 else if(!S_ISREG(info.st_mode)
     579           4 :                   || !info.st_size) // ignore zero-length files
     580           0 :                         continue;
     581             : 
     582           4 :                 newfile.dev=info.st_dev;
     583           4 :                 newfile.ino=info.st_ino;
     584           4 :                 newfile.nlink=info.st_nlink;
     585           4 :                 newfile.full_cksum=0;
     586           4 :                 newfile.part_cksum=0;
     587           4 :                 newfile.next=NULL;
     588             : 
     589           4 :                 if((find=find_key(info.st_size)))
     590             :                 {
     591             :                         //printf("check %d: %s\n", info.st_size, newfile.path);
     592           2 :                         if(check_files(find, &newfile, &info, ext, maxlinks))
     593             :                                 goto end;
     594             :                 }
     595             :                 else
     596             :                 {
     597             :                         //printf("add: %s\n", newfile.path);
     598           2 :                         if(add_key(info.st_size, &newfile))
     599             :                                 goto end;
     600             :                 }
     601             :         }
     602             :         ret=0;
     603             : end:
     604           2 :         closedir(dirp);
     605           2 :         free_w(&newfile.path);
     606           2 :         free_w(&path);
     607           2 :         return ret;
     608             : }
     609             : 
     610           0 : static void sighandler(__attribute__ ((unused)) int signum)
     611             : {
     612           0 :         locks_release_and_free(&locklist);
     613           0 :         exit(1);
     614             : }
     615             : 
     616           0 : static int is_regular_file(const char *clientconfdir, const char *file)
     617             : {
     618             :         struct stat statp;
     619           0 :         char *fullpath=NULL;
     620           0 :         if(!(fullpath=prepend_s(clientconfdir, file)))
     621             :                 return 0;
     622           0 :         if(lstat(fullpath, &statp))
     623             :         {
     624           0 :                 free_w(&fullpath);
     625           0 :                 return 0;
     626             :         }
     627           0 :         free_w(&fullpath);
     628           0 :         return S_ISREG(statp.st_mode);
     629             : }
     630             : 
     631           0 : static int in_group(struct strlist *grouplist, const char *dedup_group)
     632             : {
     633             :         struct strlist *g;
     634             : 
     635           0 :         for(g=grouplist; g; g=g->next)
     636           0 :                 if(!strcmp(g->path, dedup_group)) return 1;
     637             : 
     638             :         return 0;
     639             : }
     640             : 
     641           0 : static int iterate_over_clients(struct conf **globalcs,
     642             :         struct strlist *grouplist, const char *ext, unsigned int maxlinks)
     643             : {
     644           0 :         int ret=0;
     645           0 :         DIR *dirp=NULL;
     646           0 :         struct conf **cconfs=NULL;
     647           0 :         struct dirent *dirinfo=NULL;
     648           0 :         const char *globalclientconfdir=get_string(globalcs[OPT_CLIENTCONFDIR]);
     649             : 
     650           0 :         signal(SIGABRT, &sighandler);
     651           0 :         signal(SIGTERM, &sighandler);
     652           0 :         signal(SIGINT, &sighandler);
     653             : 
     654           0 :         if(!(cconfs=confs_alloc())) return -1;
     655           0 :         if(confs_init(cconfs)) return -1;
     656             : 
     657           0 :         if(!(dirp=opendir(globalclientconfdir)))
     658             :         {
     659           0 :                 logp("Could not opendir '%s': %s\n",
     660           0 :                         globalclientconfdir, strerror(errno));
     661           0 :                 return 0;
     662             :         }
     663           0 :         while((dirinfo=readdir(dirp)))
     664             :         {
     665           0 :                 char *lockfile=NULL;
     666           0 :                 char *lockfilebase=NULL;
     667           0 :                 char *client_lockdir=NULL;
     668           0 :                 struct lock *lock=NULL;
     669             : 
     670           0 :                 if(dirinfo->d_ino==0
     671             :                 // looks_like...() also avoids '.' and '..'.
     672           0 :                   || looks_like_tmp_or_hidden_file(dirinfo->d_name)
     673           0 :                   || !is_regular_file(globalclientconfdir, dirinfo->d_name))
     674           0 :                         continue;
     675             : 
     676           0 :                 confs_free_content(cconfs);
     677           0 :                 if(confs_init(cconfs)) return -1;
     678             : 
     679           0 :                 if(set_string(cconfs[OPT_CNAME], dirinfo->d_name))
     680             :                         return -1;
     681             : 
     682           0 :                 if(conf_load_clientconfdir(globalcs, cconfs))
     683             :                 {
     684           0 :                         logp("could not load config for client %s\n",
     685             :                                 dirinfo->d_name);
     686           0 :                         return 0;
     687             :                 }
     688             : 
     689           0 :                 if(grouplist)
     690             :                 {
     691           0 :                         const char *dedup_group=
     692           0 :                                 get_string(cconfs[OPT_DEDUP_GROUP]);
     693           0 :                         if(!dedup_group
     694           0 :                           || !in_group(grouplist, dedup_group))
     695           0 :                                 continue;
     696             :                 }
     697             : 
     698           0 :                 if(!(client_lockdir=get_string(cconfs[OPT_CLIENT_LOCKDIR])))
     699           0 :                         client_lockdir=get_string(cconfs[OPT_DIRECTORY]);
     700             : 
     701           0 :                 if(!(lockfilebase=prepend_s(client_lockdir, dirinfo->d_name))
     702           0 :                  || !(lockfile=prepend_s(lockfilebase, BEDUP_LOCKFILE_NAME)))
     703             :                 {
     704           0 :                         free_w(&lockfilebase);
     705           0 :                         free_w(&lockfile);
     706           0 :                         ret=-1;
     707           0 :                         break;
     708             :                 }
     709           0 :                 free_w(&lockfilebase);
     710             : 
     711           0 :                 if(!(lock=lock_alloc_and_init(lockfile)))
     712             :                 {
     713             :                         ret=-1;
     714             :                         break;
     715             :                 }
     716           0 :                 lock_get(lock);
     717           0 :                 free_w(&lockfile);
     718             : 
     719           0 :                 if(lock->status!=GET_LOCK_GOT)
     720             :                 {
     721           0 :                         logp("Could not get %s\n", lock->path);
     722           0 :                         continue;
     723             :                 }
     724           0 :                 logp("Got %s\n", lock->path);
     725             : 
     726             :                 // Remember that we got that lock.
     727           0 :                 lock_add_to_list(&locklist, lock);
     728             : 
     729           0 :                 switch(process_dir(get_string(cconfs[OPT_DIRECTORY]),
     730             :                         dirinfo->d_name,
     731             :                         ext, maxlinks, 1 /* burp mode */, 0 /* level */))
     732             :                 {
     733           0 :                         case 0: ccount++;
     734           0 :                         case 1: continue;
     735             :                         default: ret=-1; break;
     736             :                 }
     737             :                 break;
     738             :         }
     739           0 :         closedir(dirp);
     740             : 
     741           0 :         locks_release_and_free(&locklist);
     742             : 
     743           0 :         confs_free(&cconfs);
     744             : 
     745           0 :         return ret;
     746             : }
     747             : 
     748             : static char *get_config_path(void)
     749             : {
     750             :         static char path[256]="";
     751             :         snprintf(path, sizeof(path), "%s", SYSCONFDIR "/burp.conf");
     752             :         return path;
     753             : }
     754             : 
     755           2 : static int usage(void)
     756             : {
     757           2 :         logfmt("\nUsage: %s [options]\n", prog);
     758           2 :         logfmt("\n");
     759           2 :         logfmt(" Options:\n");
     760           2 :         logfmt("  -c <path>                Path to config file (default: %s).\n", get_config_path());
     761           2 :         logfmt("  -g <list of group names> Only run on the directories of clients that\n");
     762           2 :         logfmt("                           are in one of the groups specified.\n");
     763           2 :         logfmt("                           The list is comma-separated. To put a client in a\n");
     764           2 :         logfmt("                           group, use the 'dedup_group' option in the client\n");
     765           2 :         logfmt("                           configuration file on the server.\n");
     766           2 :         logfmt("  -h|-?                    Print this text and exit.\n");
     767           2 :         logfmt("  -d                       Delete any duplicate files found.\n");
     768           2 :         logfmt("                           (non-burp mode only)\n");
     769           2 :         logfmt("  -l                       Hard link any duplicate files found.\n");
     770           2 :         logfmt("  -m <number>              Maximum number of hard links to a single file.\n");
     771           2 :         logfmt("                           (non-burp mode only - in burp mode, use the\n");
     772           2 :         logfmt("                           max_hardlinks option in the configuration file)\n");
     773           2 :         logfmt("                           The default is %d. On ext3, the maximum number\n", DEF_MAX_LINKS);
     774           2 :         logfmt("                           of links possible is 32000, but space is needed\n");
     775           2 :         logfmt("                           for the normal operation of burp.\n");
     776           2 :         logfmt("  -n <list of directories> Non-burp mode. Deduplicate any (set of) directories.\n");
     777           2 :         logfmt("  -v                       Print duplicate paths.\n");
     778           2 :         logfmt("  -V                       Print version and exit.\n");
     779           2 :         logfmt("\n");
     780           2 :         logfmt("By default, %s will read %s and deduplicate client storage\n", prog, get_config_path());
     781           2 :         logfmt("directories using special knowledge of the structure.\n");
     782           2 :         logfmt("\n");
     783           2 :         logfmt("With '-n', this knowledge is turned off and you have to specify the directories\n");
     784           2 :         logfmt("to deduplicate on the command line. Running with '-n' is therefore dangerous\n");
     785           2 :         logfmt("if you are deduplicating burp storage directories.\n\n");
     786           2 :         return 1;
     787             : }
     788             : 
     789          13 : int run_bedup(int argc, char *argv[])
     790             : {
     791          13 :         int i=1;
     792          13 :         int ret=0;
     793          13 :         int option=0;
     794          13 :         int nonburp=0;
     795          13 :         unsigned int maxlinks=DEF_MAX_LINKS;
     796          13 :         char *groups=NULL;
     797          13 :         char ext[16]="";
     798          13 :         int givenconfigfile=0;
     799          13 :         const char *configfile=NULL;
     800             : 
     801          13 :         configfile=get_config_path();
     802          13 :         snprintf(ext, sizeof(ext), ".bedup.%d", getpid());
     803             : 
     804          28 :         while((option=getopt(argc, argv, "c:dg:hlm:nvV?"))!=-1)
     805             :         {
     806          18 :                 switch(option)
     807             :                 {
     808             :                         case 'c':
     809           1 :                                 configfile=optarg;
     810           1 :                                 givenconfigfile=1;
     811           1 :                                 break;
     812             :                         case 'd':
     813           2 :                                 deletedups=1;
     814           2 :                                 break;
     815             :                         case 'g':
     816           1 :                                 groups=optarg;
     817           1 :                                 break;
     818             :                         case 'l':
     819           2 :                                 makelinks=1;
     820           2 :                                 break;
     821             :                         case 'm':
     822           4 :                                 maxlinks=atoi(optarg);
     823           2 :                                 break;
     824             :                         case 'n':
     825           6 :                                 nonburp=1;
     826           6 :                                 break;
     827             :                         case 'V':
     828           1 :                                 logfmt("%s-%s\n", prog, VERSION);
     829           1 :                                 return 0;
     830             :                         case 'v':
     831           1 :                                 verbose=1;
     832           1 :                                 break;
     833             :                         case 'h':
     834             :                         case '?':
     835           2 :                                 return usage();
     836             :                 }
     837             :         }
     838             : 
     839          10 :         if(nonburp && givenconfigfile)
     840             :         {
     841           1 :                 logp("-n and -c options are mutually exclusive\n");
     842           1 :                 return 1;
     843             :         }
     844           9 :         if(nonburp && groups)
     845             :         {
     846           1 :                 logp("-n and -g options are mutually exclusive\n");
     847           1 :                 return 1;
     848             :         }
     849           8 :         if(!nonburp && maxlinks!=DEF_MAX_LINKS)
     850             :         {
     851           1 :                 logp("-m option is specified via the configuration file in burp mode (max_hardlinks=)\n");
     852           1 :                 return 1;
     853             :         }
     854           7 :         if(deletedups && makelinks)
     855             :         {
     856           1 :                 logp("-d and -l options are mutually exclusive\n");
     857           1 :                 return 1;
     858             :         }
     859           6 :         if(deletedups && !nonburp)
     860             :         {
     861           1 :                 logp("-d option requires -n option\n");
     862           1 :                 return 1;
     863             :         }
     864             : 
     865           5 :         if(optind>=argc)
     866             :         {
     867           1 :                 if(nonburp)
     868             :                 {
     869           1 :                         logp("No directories found after options\n");
     870           1 :                         return 1;
     871             :                 }
     872             :         }
     873             :         else
     874             :         {
     875           4 :                 if(!nonburp)
     876             :                 {
     877           1 :                         logp("Do not specify extra arguments.\n");
     878           1 :                         return 1;
     879             :                 }
     880             :         }
     881             : 
     882           3 :         if(maxlinks<2)
     883             :         {
     884           1 :                 logp("The argument to -m needs to be greater than 1.\n");
     885           1 :                 return 1;
     886             :         }
     887             : 
     888           2 :         if(nonburp)
     889             :         {
     890             :                 // Read directories from command line.
     891           2 :                 for(i=optind; i<argc; i++)
     892             :                 {
     893             :                         // Strip trailing slashes, for tidiness.
     894           2 :                         if(argv[i][strlen(argv[i])-1]=='/')
     895           0 :                                 argv[i][strlen(argv[i])-1]='\0';
     896           2 :                         if(process_dir("", argv[i], ext, maxlinks,
     897             :                                 0 /* not burp mode */, 0 /* level */))
     898             :                         {
     899             :                                 ret=1;
     900             :                                 break;
     901             :                         }
     902             :                 }
     903             :         }
     904             :         else
     905             :         {
     906           0 :                 struct conf **globalcs=NULL;
     907           0 :                 struct strlist *grouplist=NULL;
     908           0 :                 struct lock *globallock=NULL;
     909             : 
     910           0 :                 if(groups)
     911             :                 {
     912           0 :                         char *tok=NULL;
     913           0 :                         if((tok=strtok(groups, ",\n")))
     914             :                         {
     915             :                                 do
     916             :                                 {
     917           0 :                                         if(strlist_add(&grouplist, tok, 1))
     918             :                                         {
     919           0 :                                                 log_out_of_memory(__func__);
     920           0 :                                                 return -1;
     921             :                                         }
     922           0 :                                 } while((tok=strtok(NULL, ",\n")));
     923             :                         }
     924           0 :                         if(!grouplist)
     925             :                         {
     926           0 :                                 logp("unable to read list of groups\n");
     927           0 :                                 return -1;
     928             :                         }
     929             :                 }
     930             : 
     931             :                 // Read directories from config files, and get locks.
     932           0 :                 if(!(globalcs=confs_alloc())) return -1;
     933           0 :                 if(confs_init(globalcs)) return -1;
     934           0 :                 if(conf_load_global_only(configfile, globalcs)) return 1;
     935           0 :                 if(get_e_burp_mode(globalcs[OPT_BURP_MODE])!=BURP_MODE_SERVER)
     936             :                 {
     937           0 :                         logp("%s is not a server config file\n", configfile);
     938           0 :                         confs_free(&globalcs);
     939           0 :                         return 1;
     940             :                 }
     941           0 :                 logp("Dedup clients from %s\n",
     942           0 :                         get_string(globalcs[OPT_CLIENTCONFDIR]));
     943           0 :                 maxlinks=get_int(globalcs[OPT_MAX_HARDLINKS]);
     944           0 :                 if(grouplist)
     945             :                 {
     946           0 :                         struct strlist *g=NULL;
     947           0 :                         logp("in dedup groups:\n");
     948           0 :                         for(g=grouplist; g; g=g->next)
     949           0 :                                 logp("%s\n", g->path);
     950             :                 }
     951             :                 else
     952             :                 {
     953           0 :                         char *lockpath=NULL;
     954           0 :                         const char *opt_lockfile=confs_get_lockfile(globalcs);
     955             :                         // Only get the global lock when doing a global run.
     956             :                         // If you are doing individual groups, you are likely
     957             :                         // to want to do many different dedup jobs and a
     958             :                         // global lock would get in the way.
     959           0 :                         if(!(lockpath=prepend(opt_lockfile, ".bedup"))
     960           0 :                           || !(globallock=lock_alloc_and_init(lockpath)))
     961           0 :                                 return 1;
     962           0 :                         lock_get(globallock);
     963           0 :                         if(globallock->status!=GET_LOCK_GOT)
     964             :                         {
     965           0 :                                 logp("Could not get lock %s (%d)\n", lockpath,
     966             :                                         globallock->status);
     967           0 :                                 free_w(&lockpath);
     968           0 :                                 return 1;
     969             :                         }
     970           0 :                         logp("Got %s\n", lockpath);
     971             :                 }
     972           0 :                 ret=iterate_over_clients(globalcs, grouplist, ext, maxlinks);
     973           0 :                 confs_free(&globalcs);
     974             : 
     975           0 :                 lock_release(globallock);
     976           0 :                 lock_free(&globallock);
     977           0 :                 strlists_free(&grouplist);
     978             :         }
     979             : 
     980           2 :         if(!nonburp)
     981             :         {
     982           0 :                 logp("%d client storages scanned\n", ccount);
     983             :         }
     984           2 :         logp("%" PRIu64 " duplicate %s found\n",
     985           2 :                 count, count==1?"file":"files");
     986           6 :         logp("%" PRIu64 " bytes %s%s\n",
     987           3 :                 savedbytes, (makelinks || deletedups)?"saved":"saveable",
     988             :                         bytes_to_human(savedbytes));
     989           2 :         mystruct_delete_all();
     990           2 :         return ret;
     991             : }

Generated by: LCOV version 1.10