LCOV - code coverage report
Current view: top level - src/server - bedup.c (source / functions) Hit Total Coverage
Test: burp-coverage-clean.info Lines: 244 463 52.7 %
Date: 2022-08-30 22:36:43 Functions: 16 24 66.7 %

          Line data    Source code
       1             : #include "../burp.h"
       2             : #include "../alloc.h"
       3             : #include "../conf.h"
       4             : #include "../conffile.h"
       5             : #include "../handy.h"
       6             : #include "../fsops.h"
       7             : #include "../fzp.h"
       8             : #include "../lock.h"
       9             : #include "../log.h"
      10             : #include "../md5.h"
      11             : #include "../prepend.h"
      12             : #include "../strlist.h"
      13             : #include "bedup.h"
      14             : 
      15             : #include <uthash.h>
      16             : 
      17             : #define LOCKFILE_NAME           "lockfile"
      18             : #define BEDUP_LOCKFILE_NAME     "lockfile.bedup"
      19             : 
      20             : #define DEF_MAX_LINKS           10000
      21             : 
      22             : static int makelinks=0;
      23             : static int deletedups=0;
      24             : 
      25             : static uint64_t savedbytes=0;
      26             : static uint64_t count=0;
      27             : static int ccount=0;
      28             : 
      29             : static struct lock *locklist=NULL;
      30             : 
      31             : static int verbose=0;
      32             : 
      33             : static unsigned int maxlinks=DEF_MAX_LINKS;
      34             : static char ext[16]="";
      35             : 
      36             : typedef struct file file_t;
      37             : 
      38             : struct file
      39             : {
      40             :         char *path;
      41             :         dev_t dev;
      42             :         ino_t ino;
      43             :         nlink_t nlink;
      44             :         uint64_t full_cksum;
      45             :         uint64_t part_cksum;
      46             :         file_t *next;
      47             : };
      48             : 
      49             : struct mystruct
      50             : {
      51             :         off_t st_size;
      52             :         file_t *files;
      53             :         UT_hash_handle hh;
      54             : };
      55             : 
      56             : struct mystruct *myfiles=NULL;
      57             : 
      58           9 : static struct mystruct *find_key(off_t st_size)
      59             : {
      60             :         struct mystruct *s;
      61             : 
      62           9 :         HASH_FIND_INT(myfiles, &st_size, s);
      63           9 :         return s;
      64             : }
      65             : 
      66           3 : static int add_file(struct mystruct *s, struct file *f)
      67             : {
      68             :         struct file *newfile;
      69           3 :         if(!(newfile=(struct file *)malloc_w(sizeof(struct file), __func__)))
      70             :                 return -1;
      71           3 :         memcpy(newfile, f, sizeof(struct file));
      72           3 :         f->path=NULL;
      73           3 :         newfile->next=s->files;
      74           3 :         s->files=newfile;
      75             :         return 0;
      76             : }
      77             : 
      78           3 : static int add_key(off_t st_size, struct file *f)
      79             : {
      80             :         struct mystruct *s;
      81             : 
      82           3 :         if(!(s=(struct mystruct *)malloc_w(sizeof(struct mystruct), __func__)))
      83             :                 return -1;
      84           3 :         s->st_size=st_size;
      85           3 :         s->files=NULL;
      86           3 :         if(add_file(s, f)) return -1;
      87             : //printf("HASH ADD %d\n", st_size);
      88           3 :         HASH_ADD_INT(myfiles, st_size, s);
      89             :         return 0;
      90             : }
      91             : 
      92             : static void file_free_content(struct file *file)
      93             : {
      94             :         if(!file) return;
      95           3 :         free_w(&file->path);
      96             : }
      97             : 
      98           3 : static void file_free(struct file **file)
      99             : {
     100           3 :         if(!file || !*file) return;
     101           6 :         file_free_content(*file);
     102           3 :         free_v((void **)file);
     103             : }
     104             : 
     105           3 : static void files_free(struct file **files)
     106             : {
     107             :         struct file *f;
     108             :         struct file *fhead;
     109           3 :         if(!files || !*files) return;
     110             :         fhead=*files;
     111           6 :         while(fhead)
     112             :         {
     113           3 :                 f=fhead;
     114           3 :                 fhead=fhead->next;
     115           3 :                 file_free(&f);
     116             :         }
     117             : }
     118             : 
     119             : static void mystruct_free_content(struct mystruct *mystruct)
     120             : {
     121             :         if(!mystruct) return;
     122           3 :         files_free(&mystruct->files);
     123             : }
     124             : 
     125           3 : static void mystruct_free(struct mystruct **mystruct)
     126             : {
     127           3 :         if(!mystruct || !*mystruct) return;
     128           6 :         mystruct_free_content(*mystruct);
     129           3 :         free_v((void **)mystruct);
     130             : }
     131             : 
     132           3 : static void mystruct_delete_all(void)
     133             : {
     134             :         struct mystruct *tmp;
     135             :         struct mystruct *mystruct;
     136             : 
     137           6 :         HASH_ITER(hh, myfiles, mystruct, tmp)
     138             :         {
     139           3 :                 HASH_DEL(myfiles, mystruct);
     140           3 :                 mystruct_free(&mystruct);
     141             :         }
     142           3 :         myfiles=NULL;
     143           3 : }
     144             : 
     145             : #define FULL_CHUNK      4096
     146             : 
     147           6 : static int full_match(struct file *o, struct file *n,
     148             :         struct fzp **ofp, struct fzp **nfp)
     149             : {
     150             :         size_t ogot;
     151             :         size_t ngot;
     152           6 :         unsigned int i=0;
     153             :         static char obuf[FULL_CHUNK];
     154             :         static char nbuf[FULL_CHUNK];
     155             : 
     156           6 :         if(*ofp) fzp_seek(*ofp, 0, SEEK_SET);
     157           0 :         else if(!(*ofp=fzp_open(o->path, "rb")))
     158             :         {
     159             :                 // Blank this entry so that it can be ignored from
     160             :                 // now on.
     161           0 :                 free_w(&o->path);
     162             :                 return 0;
     163             :         }
     164             : 
     165           6 :         if(*nfp) fzp_seek(*nfp, 0, SEEK_SET);
     166           3 :         else if(!(*nfp=fzp_open(n->path, "rb"))) return 0;
     167             : 
     168             :         while(1)
     169             :         {
     170           6 :                 ogot=fzp_read(*ofp, obuf, FULL_CHUNK);
     171           6 :                 ngot=fzp_read(*nfp, nbuf, FULL_CHUNK);
     172           6 :                 if(ogot!=ngot) return 0;
     173          60 :                 for(i=0; i<ogot; i++)
     174          60 :                         if(obuf[i]!=nbuf[i]) return 0;
     175           6 :                 if(ogot<FULL_CHUNK) break;
     176             :         }
     177             : 
     178             :         return 1;
     179             : }
     180             : 
     181             : #define PART_CHUNK      1024
     182             : 
     183           9 : static int get_part_cksum(struct file *f, struct fzp **fzp)
     184             : {
     185           9 :         struct md5 *md5=NULL;
     186           9 :         int ret=-1;
     187           9 :         int got=0;
     188             :         static char buf[PART_CHUNK];
     189             :         unsigned char checksum[MD5_DIGEST_LENGTH+1];
     190             : 
     191           9 :         if(*fzp) fzp_seek(*fzp, 0, SEEK_SET);
     192           9 :         else if(!(*fzp=fzp_open(f->path, "rb")))
     193             :         {
     194           0 :                 f->part_cksum=0;
     195           0 :                 return 0;
     196             :         }
     197             : 
     198           9 :         if(!(md5=md5_alloc(__func__)))
     199             :                 goto end;
     200           9 :         if(!md5_init(md5))
     201             :         {
     202           0 :                 logp("md5_init() failed\n");
     203           0 :                 goto end;
     204             :         }
     205             : 
     206           9 :         got=fzp_read(*fzp, buf, PART_CHUNK);
     207             : 
     208           9 :         if(!md5_update(md5, buf, got))
     209             :         {
     210           0 :                 logp("md5_update() failed\n");
     211           0 :                 goto end;
     212             :         }
     213             : 
     214           9 :         if(!md5_final(md5, checksum))
     215             :         {
     216           0 :                 logp("md5_final() failed\n");
     217           0 :                 goto end;
     218             :         }
     219             : 
     220           9 :         memcpy(&(f->part_cksum), checksum, sizeof(unsigned));
     221             : 
     222             :         // Try for a bit of efficiency - no need to calculate the full checksum
     223             :         // again if we already read the whole file.
     224           9 :         if(got<PART_CHUNK) f->full_cksum=f->part_cksum;
     225             : 
     226             :         ret=0;
     227             : end:
     228           9 :         md5_free(&md5);
     229           9 :         return ret;
     230             : }
     231             : 
     232           0 : static int get_full_cksum(struct file *f, struct fzp **fzp)
     233             : {
     234           0 :         size_t s=0;
     235           0 :         int ret=-1;
     236           0 :         struct md5 *md5=NULL;
     237             :         static char buf[FULL_CHUNK];
     238             :         unsigned char checksum[MD5_DIGEST_LENGTH+1];
     239             : 
     240           0 :         if(*fzp) fzp_seek(*fzp, 0, SEEK_SET);
     241           0 :         else if(!(*fzp=fzp_open(f->path, "rb")))
     242             :         {
     243           0 :                 f->full_cksum=0;
     244           0 :                 return 0;
     245             :         }
     246             : 
     247           0 :         if(!(md5=md5_alloc(__func__)))
     248             :                 goto end;
     249           0 :         if(!md5_init(md5))
     250             :         {
     251           0 :                 logp("md5_init() failed\n");
     252           0 :                 goto end;
     253             :         }
     254             : 
     255           0 :         while((s=fzp_read(*fzp, buf, FULL_CHUNK))>0)
     256             :         {
     257           0 :                 if(!md5_update(md5, buf, s))
     258             :                 {
     259           0 :                         logp("md5_update() failed\n");
     260           0 :                         goto end;
     261             :                 }
     262           0 :                 if(s<FULL_CHUNK) break;
     263             :         }
     264             : 
     265           0 :         if(!md5_final(md5, checksum))
     266             :         {
     267           0 :                 logp("md5_final() failed\n");
     268           0 :                 goto end;
     269             :         }
     270             : 
     271           0 :         memcpy(&(f->full_cksum), checksum, sizeof(unsigned));
     272             : 
     273           0 :         ret=0;
     274             : end:
     275           0 :         md5_free(&md5);
     276           0 :         return ret;
     277             : }
     278             : 
     279             : /* Make it atomic by linking to a temporary file, then moving it into place. */
     280           3 : static int do_hardlink(struct file *o, struct file *n)
     281             : {
     282           3 :         int ret=-1;
     283           3 :         char *tmppath=NULL;
     284           3 :         if(!(tmppath=prepend(o->path, ext)))
     285             :         {
     286           0 :                 log_out_of_memory(__func__);
     287             :                 goto end;
     288             :         }
     289           3 :         if(link(n->path, tmppath))
     290             :         {
     291           0 :                 logp("Could not hardlink %s to %s: %s\n", tmppath, n->path,
     292           0 :                         strerror(errno));
     293             :                 goto end;
     294             :         }
     295           3 :         if((ret=do_rename(tmppath, o->path)))
     296             :         {
     297             :                 // 'man 2 rename', says it should be safe to unlink tmppath:
     298             :                 // "If newpath exists but the operation fails for some reason,
     299             :                 // rename() guarantees to leave an instance of newpath in
     300             :                 // place."
     301           0 :                 if(unlink(tmppath))
     302           0 :                         logp("Could not unlink %s\n", tmppath);
     303             :                 goto end;
     304             :         }
     305             :         ret=0;
     306             : end:
     307           3 :         free_w(&tmppath);
     308           3 :         return ret;
     309             : }
     310             : 
     311           2 : static void reset_old_file(struct file *oldfile, struct file *newfile,
     312             :         struct stat *info)
     313             : {
     314             :         //printf("reset %s with %s %d\n", oldfile->path, newfile->path,
     315             :         //      info->st_nlink);
     316             :         struct file *next;
     317             : 
     318           2 :         next=oldfile->next;
     319           2 :         free_w(&oldfile->path);
     320           2 :         memcpy(oldfile, newfile, sizeof(struct file));
     321           2 :         oldfile->next=next;
     322           2 :         newfile->path=NULL;
     323           2 : }
     324             : 
     325           6 : static int check_files(struct mystruct *find, struct file *newfile,
     326             :         struct stat *info)
     327             : {
     328           6 :         int found=0;
     329           6 :         struct fzp *nfp=NULL;
     330           6 :         struct fzp *ofp=NULL;
     331           6 :         struct file *f=NULL;
     332             : 
     333           6 :         for(f=find->files; f; f=f->next)
     334             :         {
     335             : //printf("  against: '%s'\n", f->path);
     336           6 :                 if(!f->path)
     337             :                 {
     338             :                         // If the full_match() function fails to open oldfile
     339             :                         // (which could happen if burp deleted some old
     340             :                         // directories), it will free path and set it to NULL.
     341             :                         // Skip entries like this.
     342           0 :                         continue;
     343             :                 }
     344           6 :                 if(newfile->dev!=f->dev)
     345             :                 {
     346             :                         // Different device.
     347           0 :                         continue;
     348             :                 }
     349           6 :                 if(newfile->ino==f->ino)
     350             :                 {
     351             :                         // Same device, same inode, therefore these two files
     352             :                         // are hardlinked to each other already.
     353             :                         found++;
     354             :                         break;
     355             :                 }
     356           6 :                 if(newfile->nlink>=maxlinks) {
     357             :                         // This new file file has enough links. Just leave it
     358             :                         // as it is to avoid undoing all these hardlinks.
     359             :                         found++;
     360             :                         break;
     361             :                 }
     362           6 :                 if((!newfile->part_cksum && get_part_cksum(newfile, &nfp))
     363           6 :                   || (!f->part_cksum && get_part_cksum(f, &ofp)))
     364             :                 {
     365             :                         // Some error with md5sums Give up.
     366             :                         return -1;
     367             :                 }
     368           6 :                 if(newfile->part_cksum!=f->part_cksum)
     369             :                 {
     370           0 :                         fzp_close(&ofp);
     371           0 :                         continue;
     372             :                 }
     373             :                 //printf("  %s, %s\n", find->files->path, newfile->path);
     374             :                 //printf("  part cksum matched\n");
     375             : 
     376           6 :                 if((!newfile->full_cksum && get_full_cksum(newfile, &nfp))
     377           6 :                   || (!f->full_cksum && get_full_cksum(f, &ofp)))
     378             :                 {
     379             :                         // Some error with md5sums Give up.
     380             :                         return -1;
     381             :                 }
     382           6 :                 if(newfile->full_cksum!=f->full_cksum)
     383             :                 {
     384           0 :                         fzp_close(&ofp);
     385           0 :                         continue;
     386             :                 }
     387             : 
     388             :                 //printf("  full cksum matched\n");
     389           6 :                 if(!full_match(newfile, f, &nfp, &ofp))
     390             :                 {
     391           0 :                         fzp_close(&ofp);
     392           0 :                         continue;
     393             :                 }
     394             :                 //printf("  full match\n");
     395             :                 //printf("%s, %s\n", find->files->path, newfile->path);
     396             : 
     397             :                 // If there are already enough links to this file, replace
     398             :                 // our memory of it with the new file so that files later on
     399             :                 // can link to the new one. 
     400           6 :                 if(f->nlink>=maxlinks)
     401             :                 {
     402             :                         // Just need to reset the path name and the number
     403             :                         // of links, and pretend that it was found otherwise
     404             :                         // NULL newfile will get added to the memory.
     405           2 :                         reset_old_file(f, newfile, info);
     406           2 :                         found++;
     407             :                         break;
     408             :                 }
     409             : 
     410           4 :                 found++;
     411           4 :                 count++;
     412             : 
     413           4 :                 if(verbose) printf("%s\n", newfile->path);
     414             : 
     415             :                 // Now hardlink it.
     416           4 :                 if(makelinks)
     417             :                 {
     418           3 :                         if(do_hardlink(newfile, f))
     419             :                         {
     420           0 :                                 count--;
     421             :                                 return -1;
     422             :                         }
     423           3 :                         f->nlink++;
     424             :                         // Only count bytes as saved if we
     425             :                         // removed the last link.
     426           3 :                         if(newfile->nlink==1)
     427           3 :                                 savedbytes+=info->st_size;
     428             :                 }
     429           1 :                 else if(deletedups)
     430             :                 {
     431           0 :                         if(unlink(newfile->path))
     432             :                         {
     433           0 :                                 logp("Could not delete %s: %s\n",
     434           0 :                                         newfile->path, strerror(errno));
     435             :                         }
     436             :                         else
     437             :                         {
     438             :                                 // Only count bytes as saved if we removed the
     439             :                                 // last link.
     440           0 :                                 if(newfile->nlink==1)
     441           0 :                                         savedbytes+=info->st_size;
     442             :                         }
     443             :                 }
     444             :                 else
     445             :                 {
     446             :                         // To be able to tell how many bytes
     447             :                         // are saveable.
     448           1 :                         savedbytes+=info->st_size;
     449             :                 }
     450             : 
     451             :                 break;
     452             :         }
     453           6 :         fzp_close(&nfp);
     454           6 :         fzp_close(&ofp);
     455             : 
     456           6 :         if(found)
     457             :         {
     458           6 :                 free_w(&newfile->path);
     459             :                 return 0;
     460             :         }
     461             : 
     462           0 :         if(add_file(find, newfile)) return -1;
     463             : 
     464             :         return 0;
     465             : }
     466             : 
     467           0 : static int looks_like_ours(const char *basedir)
     468             : {
     469           0 :         int ret=-1;
     470           0 :         char *tmp=NULL;
     471           0 :         if(!(tmp=prepend_s(basedir, "current")))
     472             :         {
     473           0 :                 log_out_of_memory(__func__);
     474           0 :                 goto end;
     475             :         }
     476             :         // If there is a 'current' symlink here, we think it looks like a
     477             :         // one of our storage directories.
     478           0 :         if(is_lnk_lstat(tmp)>0)
     479             :         {
     480             :                 ret=1;
     481             :                 goto end;
     482             :         }
     483           0 :         ret=0;
     484             : end:
     485           0 :         free_w(&tmp);
     486           0 :         return ret;
     487             : }
     488             : 
     489           0 : static int get_link(const char *basedir, const char *lnk, char real[], size_t r)
     490             : {
     491           0 :         readlink_w_in_dir(basedir, lnk, real, r);
     492             :         // Strip any trailing slash.
     493           0 :         if(real[strlen(real)-1]=='/')
     494           0 :                 real[strlen(real)-1]='\0';
     495           0 :         return 0;
     496             : }
     497             : 
     498           0 : static int level_exclusion(int level, const char *fname,
     499             :         const char *working, const char *finishing)
     500             : {
     501           0 :         if(level==0)
     502             :         {
     503             :                 /* Be careful not to try to dedup the lockfiles.
     504             :                    The lock actually gets lost if you open one to do a
     505             :                    checksum
     506             :                    and then close it. This caused me major headaches to
     507             :                    figure out. */
     508           0 :                 if(!strcmp(fname, LOCKFILE_NAME)
     509           0 :                   || !strcmp(fname, BEDUP_LOCKFILE_NAME))
     510             :                         return 1;
     511             : 
     512             :                 /* Skip places where backups are going on. */
     513           0 :                 if(!strcmp(fname, working)
     514           0 :                   || !strcmp(fname, finishing))
     515             :                         return 1;
     516             : 
     517           0 :                 if(!strcmp(fname, "deleteme"))
     518             :                         return 1;
     519             :         }
     520           0 :         else if(level==1)
     521             :         {
     522             :                 // Do not dedup stuff that might be appended to later.
     523           0 :                 if(!strncmp(fname, "log", strlen("log"))
     524           0 :                   || !strncmp(fname, "verifylog", strlen("verifylog"))
     525           0 :                   || !strncmp(fname, "restorelog", strlen("restorelog")))
     526             :                         return 1;
     527             :         }
     528           0 :         return 0;
     529             : }
     530             : 
     531             : // Return 0 for directory processed, -1 for error, 1 for not processed.
     532           3 : static int process_dir(const char *oldpath, const char *newpath,
     533             :         int burp_mode, int level)
     534             : {
     535           3 :         int ret=-1;
     536           3 :         DIR *dirp=NULL;
     537           3 :         char *path=NULL;
     538             :         struct stat info;
     539           3 :         struct dirent *dirinfo=NULL;
     540             :         struct file newfile;
     541           3 :         struct mystruct *find=NULL;
     542             :         static char working[256]="";
     543             :         static char finishing[256]="";
     544             : 
     545           3 :         newfile.path=NULL;
     546             : 
     547           3 :         if(!(path=prepend_s(oldpath, newpath))) goto end;
     548             : 
     549           3 :         if(burp_mode && level==0)
     550             :         {
     551           0 :                 if(get_link(path, "working", working, sizeof(working))
     552           0 :                   || get_link(path, "finishing", finishing, sizeof(finishing)))
     553             :                         goto end;
     554           0 :                 if(!looks_like_ours(path))
     555             :                 {
     556           0 :                         logp("%s does not look like one of our storage directories - skipping\n", path);
     557           0 :                         ret=1;
     558           0 :                         goto end;
     559             :                 }
     560             :         }
     561             : 
     562           3 :         if(!(dirp=opendir(path)))
     563             :         {
     564           0 :                 logp("Could not opendir '%s': %s\n", path, strerror(errno));
     565           0 :                 ret=1;
     566           0 :                 goto end;
     567             :         }
     568          18 :         while((dirinfo=readdir(dirp)))
     569             :         {
     570          15 :                 if(!strcmp(dirinfo->d_name, ".")
     571          12 :                   || !strcmp(dirinfo->d_name, ".."))
     572           6 :                         continue;
     573             : 
     574             :                 //printf("try %s\n", dirinfo->d_name);
     575             : 
     576           9 :                 if(burp_mode
     577           0 :                   && level_exclusion(level, dirinfo->d_name,
     578             :                         working, finishing))
     579           0 :                                 continue;
     580             : 
     581           9 :                 free_w(&newfile.path);
     582           9 :                 if(!(newfile.path=prepend_s(path, dirinfo->d_name)))
     583             :                         goto end;
     584             : 
     585          18 :                 if(lstat(newfile.path, &info))
     586           0 :                         continue;
     587             : 
     588           9 :                 if(S_ISDIR(info.st_mode))
     589             :                 {
     590           0 :                         if(process_dir(path, dirinfo->d_name,
     591             :                                 burp_mode, level+1))
     592             :                                         goto end;
     593           0 :                         continue;
     594             :                 }
     595           9 :                 else if(!S_ISREG(info.st_mode)
     596           9 :                   || !info.st_size) // ignore zero-length files
     597           0 :                         continue;
     598             : 
     599           9 :                 newfile.dev=info.st_dev;
     600           9 :                 newfile.ino=info.st_ino;
     601           9 :                 newfile.nlink=info.st_nlink;
     602           9 :                 newfile.full_cksum=0;
     603           9 :                 newfile.part_cksum=0;
     604           9 :                 newfile.next=NULL;
     605             : 
     606           9 :                 if((find=find_key(info.st_size)))
     607             :                 {
     608             :                         //printf("check %d: %s\n", info.st_size, newfile.path);
     609           6 :                         if(check_files(find, &newfile, &info))
     610             :                                 goto end;
     611             :                 }
     612             :                 else
     613             :                 {
     614             :                         //printf("add: %s\n", newfile.path);
     615           3 :                         if(add_key(info.st_size, &newfile))
     616             :                                 goto end;
     617             :                 }
     618             :         }
     619             :         ret=0;
     620             : end:
     621           3 :         if(dirp) closedir(dirp);
     622           3 :         free_w(&newfile.path);
     623           3 :         free_w(&path);
     624           3 :         return ret;
     625             : }
     626             : 
     627           0 : static void sighandler(__attribute__ ((unused)) int signum)
     628             : {
     629           0 :         locks_release_and_free(&locklist);
     630           0 :         exit(1);
     631             : }
     632             : 
     633           0 : static int is_regular_file(const char *clientconfdir, const char *file)
     634             : {
     635             :         struct stat statp;
     636           0 :         char *fullpath=NULL;
     637           0 :         if(!(fullpath=prepend_s(clientconfdir, file)))
     638             :                 return 0;
     639           0 :         if(lstat(fullpath, &statp))
     640             :         {
     641           0 :                 free_w(&fullpath);
     642           0 :                 return 0;
     643             :         }
     644           0 :         free_w(&fullpath);
     645           0 :         return S_ISREG(statp.st_mode);
     646             : }
     647             : 
     648             : static int in_group(struct strlist *grouplist, const char *dedup_group)
     649             : {
     650             :         struct strlist *g;
     651             : 
     652           0 :         for(g=grouplist; g; g=g->next)
     653           0 :                 if(!strcmp(g->path, dedup_group)) return 1;
     654             : 
     655             :         return 0;
     656             : }
     657             : 
     658           0 : static int iterate_over_clients(struct conf **globalcs,
     659             :         struct strlist *grouplist)
     660             : {
     661           0 :         int ret=0;
     662           0 :         DIR *dirp=NULL;
     663           0 :         struct conf **cconfs=NULL;
     664           0 :         struct dirent *dirinfo=NULL;
     665           0 :         const char *globalclientconfdir=get_string(globalcs[OPT_CLIENTCONFDIR]);
     666             : 
     667           0 :         if(!(cconfs=confs_alloc())) return -1;
     668           0 :         if(confs_init(cconfs)) return -1;
     669             : 
     670           0 :         if(!(dirp=opendir(globalclientconfdir)))
     671             :         {
     672           0 :                 logp("Could not opendir '%s': %s\n",
     673           0 :                         globalclientconfdir, strerror(errno));
     674           0 :                 return 0;
     675             :         }
     676           0 :         while((dirinfo=readdir(dirp)))
     677             :         {
     678           0 :                 char *lockfile=NULL;
     679           0 :                 char *lockfilebase=NULL;
     680           0 :                 char *client_lockdir=NULL;
     681           0 :                 struct lock *lock=NULL;
     682             : 
     683           0 :                 if(dirinfo->d_ino==0
     684           0 :                   || !cname_valid(dirinfo->d_name)
     685           0 :                   || !is_regular_file(globalclientconfdir, dirinfo->d_name))
     686           0 :                         continue;
     687             : 
     688           0 :                 confs_free_content(cconfs);
     689           0 :                 if(confs_init(cconfs)) return -1;
     690             : 
     691           0 :                 if(set_string(cconfs[OPT_CNAME], dirinfo->d_name))
     692             :                         return -1;
     693             : 
     694           0 :                 if(conf_load_clientconfdir(globalcs, cconfs))
     695             :                 {
     696           0 :                         logp("could not load config for client %s\n",
     697             :                                 dirinfo->d_name);
     698           0 :                         return 0;
     699             :                 }
     700             : 
     701           0 :                 if(grouplist)
     702             :                 {
     703           0 :                         const char *dedup_group=
     704           0 :                                 get_string(cconfs[OPT_DEDUP_GROUP]);
     705           0 :                         if(!dedup_group
     706           0 :                           || !in_group(grouplist, dedup_group))
     707           0 :                                 continue;
     708             :                 }
     709             : 
     710           0 :                 if(!(client_lockdir=get_string(cconfs[OPT_CLIENT_LOCKDIR])))
     711           0 :                         client_lockdir=get_string(cconfs[OPT_DIRECTORY]);
     712             : 
     713           0 :                 if(!(lockfilebase=prepend_s(client_lockdir, dirinfo->d_name))
     714           0 :                  || !(lockfile=prepend_s(lockfilebase, BEDUP_LOCKFILE_NAME)))
     715             :                 {
     716           0 :                         free_w(&lockfilebase);
     717           0 :                         free_w(&lockfile);
     718           0 :                         ret=-1;
     719           0 :                         break;
     720             :                 }
     721           0 :                 free_w(&lockfilebase);
     722             : 
     723           0 :                 if(!(lock=lock_alloc_and_init(lockfile)))
     724             :                 {
     725             :                         ret=-1;
     726             :                         break;
     727             :                 }
     728           0 :                 lock_get(lock);
     729           0 :                 free_w(&lockfile);
     730             : 
     731           0 :                 if(lock->status!=GET_LOCK_GOT)
     732             :                 {
     733           0 :                         logp("Could not get %s\n", lock->path);
     734           0 :                         continue;
     735             :                 }
     736           0 :                 logp("Got %s\n", lock->path);
     737             : 
     738             :                 // Remember that we got that lock.
     739           0 :                 lock_add_to_list(&locklist, lock);
     740             : 
     741           0 :                 switch(process_dir(get_string(cconfs[OPT_DIRECTORY]),
     742             :                         dirinfo->d_name,
     743             :                         1 /* burp mode */, 0 /* level */))
     744             :                 {
     745           0 :                         case 0: ccount++;
     746           0 :                         case 1: continue;
     747             :                         default: ret=-1; break;
     748             :                 }
     749             :                 break;
     750             :         }
     751           0 :         closedir(dirp);
     752             : 
     753           0 :         locks_release_and_free(&locklist);
     754             : 
     755           0 :         confs_free(&cconfs);
     756             : 
     757           0 :         return ret;
     758             : }
     759             : 
     760           0 : static int process_from_conf(const char *configfile, char **groups)
     761             : {
     762           0 :         int ret=-1;
     763           0 :         struct conf **globalcs=NULL;
     764           0 :         struct strlist *grouplist=NULL;
     765           0 :         struct lock *globallock=NULL;
     766             : 
     767           0 :         signal(SIGABRT, &sighandler);
     768           0 :         signal(SIGTERM, &sighandler);
     769           0 :         signal(SIGINT, &sighandler);
     770             : 
     771           0 :         if(*groups)
     772             :         {
     773           0 :                 char *tok=NULL;
     774           0 :                 if((tok=strtok(*groups, ",\n")))
     775             :                 {
     776             :                         do
     777             :                         {
     778           0 :                                 if(strlist_add(&grouplist, tok, 1))
     779             :                                 {
     780           0 :                                         log_out_of_memory(__func__);
     781           0 :                                         goto end;
     782             :                                 }
     783           0 :                         } while((tok=strtok(NULL, ",\n")));
     784             :                 }
     785           0 :                 if(!grouplist)
     786             :                 {
     787           0 :                         logp("unable to read list of groups\n");
     788           0 :                         goto end;
     789             :                 }
     790             :         }
     791             : 
     792             :         // Read directories from config files, and get locks.
     793           0 :         if(!(globalcs=confs_alloc())
     794           0 :           || confs_init(globalcs)
     795           0 :           || conf_load_global_only(configfile, globalcs))
     796             :                 goto end;
     797             : 
     798           0 :         if(get_e_burp_mode(globalcs[OPT_BURP_MODE])!=BURP_MODE_SERVER)
     799             :         {
     800           0 :                 logp("%s is not a server config file\n", configfile);
     801           0 :                 goto end;
     802             :         }
     803           0 :         logp("Dedup clients from %s\n",
     804           0 :                 get_string(globalcs[OPT_CLIENTCONFDIR]));
     805           0 :         maxlinks=get_int(globalcs[OPT_MAX_HARDLINKS]);
     806           0 :         if(grouplist)
     807             :         {
     808           0 :                 struct strlist *g=NULL;
     809           0 :                 logp("in dedup groups:\n");
     810           0 :                 for(g=grouplist; g; g=g->next)
     811           0 :                         logp("%s\n", g->path);
     812             :         }
     813             :         else
     814             :         {
     815           0 :                 char *lockpath=NULL;
     816           0 :                 const char *opt_lockfile=confs_get_lockfile(globalcs);
     817             :                 // Only get the global lock when doing a global run.
     818             :                 // If you are doing individual groups, you are likely
     819             :                 // to want to do many different dedup jobs and a
     820             :                 // global lock would get in the way.
     821           0 :                 if(!(lockpath=prepend(opt_lockfile, ".bedup"))
     822           0 :                   || !(globallock=lock_alloc_and_init(lockpath)))
     823             :                         goto end;
     824           0 :                 lock_get(globallock);
     825           0 :                 if(globallock->status!=GET_LOCK_GOT)
     826             :                 {
     827           0 :                         logp("Could not get lock %s (%d)\n", lockpath,
     828             :                                 globallock->status);
     829           0 :                         free_w(&lockpath);
     830           0 :                         goto end;
     831             :                 }
     832           0 :                 logp("Got %s\n", lockpath);
     833             :         }
     834           0 :         ret=iterate_over_clients(globalcs, grouplist);
     835             : end:
     836           0 :         confs_free(&globalcs);
     837           0 :         lock_release(globallock);
     838           0 :         lock_free(&globallock);
     839           0 :         strlists_free(&grouplist);
     840           0 :         return ret;
     841             : }
     842             : 
     843           3 : static int process_from_command_line(int argc, char *argv[])
     844             : {
     845             :         int i;
     846           6 :         for(i=optind; i<argc; i++)
     847             :         {
     848             :                 // Strip trailing slashes, for tidiness.
     849           3 :                 if(argv[i][strlen(argv[i])-1]=='/')
     850           0 :                         argv[i][strlen(argv[i])-1]='\0';
     851           3 :                 if(process_dir("", argv[i],
     852             :                         0 /* not burp mode */, 0 /* level */))
     853             :                                 return 1;
     854             :         }
     855             :         return  0;
     856             : }
     857             : 
     858           2 : static int usage(void)
     859             : {
     860           2 :         logfmt("\nUsage: %s [options]\n", prog);
     861           2 :         logfmt("\n");
     862           2 :         logfmt(" Options:\n");
     863           2 :         logfmt("  -c <path>                Path to config file (default: %s).\n", config_default_path());
     864           2 :         logfmt("  -g <list of group names> Only run on the directories of clients that\n");
     865           2 :         logfmt("                           are in one of the groups specified.\n");
     866           2 :         logfmt("                           The list is comma-separated. To put a client in a\n");
     867           2 :         logfmt("                           group, use the 'dedup_group' option in the client\n");
     868           2 :         logfmt("                           configuration file on the server.\n");
     869           2 :         logfmt("  -h|-?                    Print this text and exit.\n");
     870           2 :         logfmt("  -d                       Delete any duplicate files found.\n");
     871           2 :         logfmt("                           (non-%s mode only)\n", PACKAGE_TARNAME);
     872           2 :         logfmt("  -l                       Hard link any duplicate files found.\n");
     873           2 :         logfmt("  -m <number>              Maximum number of hard links to a single file.\n");
     874           2 :         logfmt("                           (non-%s mode only - in burp mode, use the\n", PACKAGE_TARNAME);
     875           2 :         logfmt("                           max_hardlinks option in the configuration file)\n");
     876           2 :         logfmt("                           The default is %d. On ext3, the maximum number\n", DEF_MAX_LINKS);
     877           2 :         logfmt("                           of links possible is 32000, but space is needed\n");
     878           2 :         logfmt("                           for the normal operation of %s.\n", PACKAGE_TARNAME);
     879           2 :         logfmt("  -n <list of directories> Non-%s mode. Deduplicate any (set of) directories.\n", PACKAGE_TARNAME);
     880           2 :         logfmt("  -v                       Print duplicate paths.\n");
     881           2 :         logfmt("  -V                       Print version and exit.\n");
     882           2 :         logfmt("\n");
     883           2 :         logfmt("By default, %s will read %s and deduplicate client storage\n", prog, config_default_path());
     884           2 :         logfmt("directories using special knowledge of the structure.\n");
     885           2 :         logfmt("\n");
     886           2 :         logfmt("With '-n', this knowledge is turned off and you have to specify the directories\n");
     887           2 :         logfmt("to deduplicate on the command line. Running with '-n' is therefore dangerous\n");
     888           2 :         logfmt("if you are deduplicating %s storage directories.\n\n", PACKAGE_TARNAME);
     889           2 :         return 1;
     890             : }
     891             : 
     892          14 : int run_bedup(int argc, char *argv[])
     893             : {
     894          14 :         int ret=0;
     895          14 :         int option=0;
     896          14 :         int nonburp=0;
     897          14 :         char *groups=NULL;
     898          14 :         int givenconfigfile=0;
     899          14 :         const char *configfile=NULL;
     900             : 
     901          14 :         configfile=config_default_path();
     902          14 :         snprintf(ext, sizeof(ext), ".bedup.%d", getpid());
     903             : 
     904          46 :         while((option=getopt(argc, argv, "c:dg:hlm:nvV?"))!=-1)
     905             :         {
     906          21 :                 switch(option)
     907             :                 {
     908             :                         case 'c':
     909           1 :                                 configfile=optarg;
     910           1 :                                 givenconfigfile=1;
     911           1 :                                 break;
     912             :                         case 'd':
     913           2 :                                 deletedups=1;
     914           2 :                                 break;
     915             :                         case 'g':
     916           1 :                                 groups=optarg;
     917           1 :                                 break;
     918             :                         case 'l':
     919           3 :                                 makelinks=1;
     920           3 :                                 break;
     921             :                         case 'm':
     922           6 :                                 maxlinks=atoi(optarg);
     923           3 :                                 break;
     924             :                         case 'n':
     925           7 :                                 nonburp=1;
     926           7 :                                 break;
     927             :                         case 'V':
     928           1 :                                 logfmt("%s-%s\n", prog, PACKAGE_VERSION);
     929           1 :                                 return 0;
     930             :                         case 'v':
     931           1 :                                 verbose=1;
     932           1 :                                 break;
     933             :                         case 'h':
     934             :                         case '?':
     935           2 :                                 return usage();
     936             :                 }
     937             :         }
     938             : 
     939          11 :         if(nonburp && givenconfigfile)
     940             :         {
     941           1 :                 logp("-n and -c options are mutually exclusive\n");
     942           1 :                 return 1;
     943             :         }
     944          10 :         if(nonburp && groups)
     945             :         {
     946           1 :                 logp("-n and -g options are mutually exclusive\n");
     947           1 :                 return 1;
     948             :         }
     949           9 :         if(!nonburp && maxlinks!=DEF_MAX_LINKS)
     950             :         {
     951           1 :                 logp("-m option is specified via the configuration file in %s mode (max_hardlinks=)\n", PACKAGE_TARNAME);
     952           1 :                 return 1;
     953             :         }
     954           8 :         if(deletedups && makelinks)
     955             :         {
     956           1 :                 logp("-d and -l options are mutually exclusive\n");
     957           1 :                 return 1;
     958             :         }
     959           7 :         if(deletedups && !nonburp)
     960             :         {
     961           1 :                 logp("-d option requires -n option\n");
     962           1 :                 return 1;
     963             :         }
     964             : 
     965           6 :         if(optind>=argc)
     966             :         {
     967           1 :                 if(nonburp)
     968             :                 {
     969           1 :                         logp("No directories found after options\n");
     970           1 :                         return 1;
     971             :                 }
     972             :         }
     973             :         else
     974             :         {
     975           5 :                 if(!nonburp)
     976             :                 {
     977           1 :                         logp("Do not specify extra arguments.\n");
     978           1 :                         return 1;
     979             :                 }
     980             :         }
     981             : 
     982           4 :         if(maxlinks<2)
     983             :         {
     984           1 :                 logp("The argument to -m needs to be greater than 1.\n");
     985           1 :                 return 1;
     986             :         }
     987             : 
     988           3 :         if(nonburp)
     989             :         {
     990             :                 // Read directories from command line.
     991           3 :                 if(process_from_command_line(argc, argv))
     992           0 :                         ret=1;
     993             :         }
     994             :         else
     995             :         {
     996           0 :                 if(process_from_conf(configfile, &groups))
     997           0 :                         ret=1;
     998             :         }
     999             : 
    1000           3 :         if(!nonburp)
    1001             :         {
    1002           0 :                 logp("%d client storages scanned\n", ccount);
    1003             :         }
    1004           3 :         logp("%" PRIu64 " duplicate %s found\n",
    1005           3 :                 count, count==1?"file":"files");
    1006           9 :         logp("%" PRIu64 " bytes %s%s\n",
    1007           4 :                 savedbytes, (makelinks || deletedups)?"saved":"saveable",
    1008             :                         bytes_to_human(savedbytes));
    1009           3 :         mystruct_delete_all();
    1010           3 :         return ret;
    1011             : }

Generated by: LCOV version 1.13