Line data Source code
1 : #include "../../burp.h"
2 : #include "../../alloc.h"
3 : #include "../../conf.h"
4 : #include "../../conffile.h"
5 : #include "../../handy.h"
6 : #include "../../fsops.h"
7 : #include "../../fzp.h"
8 : #include "../../lock.h"
9 : #include "../../log.h"
10 : #include "../../prepend.h"
11 : #include "../../strlist.h"
12 :
13 : #include <uthash.h>
14 :
15 : #define LOCKFILE_NAME "lockfile"
16 : #define BEDUP_LOCKFILE_NAME "lockfile.bedup"
17 :
18 : #define DEF_MAX_LINKS 10000
19 :
20 : static int makelinks=0;
21 : static int deletedups=0;
22 :
23 : static uint64_t savedbytes=0;
24 : static uint64_t count=0;
25 : static int ccount=0;
26 :
27 : static struct lock *locklist=NULL;
28 :
29 : static int verbose=0;
30 :
31 : typedef struct file file_t;
32 :
33 : struct file
34 : {
35 : char *path;
36 : dev_t dev;
37 : ino_t ino;
38 : nlink_t nlink;
39 : uint64_t full_cksum;
40 : uint64_t part_cksum;
41 : file_t *next;
42 : };
43 :
44 : struct mystruct
45 : {
46 : off_t st_size;
47 : file_t *files;
48 : UT_hash_handle hh;
49 : };
50 :
51 : struct mystruct *myfiles=NULL;
52 :
53 0 : struct mystruct *find_key(off_t st_size)
54 : {
55 : struct mystruct *s;
56 :
57 0 : HASH_FIND_INT(myfiles, &st_size, s);
58 0 : return s;
59 : }
60 :
61 0 : static int add_file(struct mystruct *s, struct file *f)
62 : {
63 : struct file *newfile;
64 0 : if(!(newfile=(struct file *)malloc_w(sizeof(struct file), __func__)))
65 0 : return -1;
66 0 : memcpy(newfile, f, sizeof(struct file));
67 0 : f->path=NULL;
68 0 : newfile->next=s->files;
69 0 : s->files=newfile;
70 0 : return 0;
71 : }
72 :
73 0 : static int add_key(off_t st_size, struct file *f)
74 : {
75 : struct mystruct *s;
76 :
77 0 : if(!(s=(struct mystruct *)malloc_w(sizeof(struct mystruct), __func__)))
78 0 : return -1;
79 0 : s->st_size=st_size;
80 0 : s->files=NULL;
81 0 : if(add_file(s, f)) return -1;
82 : //printf("HASH ADD %d\n", st_size);
83 0 : HASH_ADD_INT(myfiles, st_size, s);
84 0 : return 0;
85 : }
86 :
87 : #define FULL_CHUNK 4096
88 :
89 0 : static int full_match(struct file *o, struct file *n,
90 : struct fzp **ofp, struct fzp **nfp)
91 : {
92 : size_t ogot;
93 : size_t ngot;
94 0 : unsigned int i=0;
95 : static char obuf[FULL_CHUNK];
96 : static char nbuf[FULL_CHUNK];
97 :
98 0 : if(*ofp) fzp_seek(*ofp, 0, SEEK_SET);
99 0 : else if(!(*ofp=fzp_open(o->path, "rb")))
100 : {
101 : // Blank this entry so that it can be ignored from
102 : // now on.
103 0 : free_w(&o->path);
104 0 : return 0;
105 : }
106 :
107 0 : if(*nfp) fzp_seek(*nfp, 0, SEEK_SET);
108 0 : else if(!(*nfp=fzp_open(n->path, "rb"))) return 0;
109 :
110 : while(1)
111 : {
112 0 : ogot=fzp_read(*ofp, obuf, FULL_CHUNK);
113 0 : ngot=fzp_read(*nfp, nbuf, FULL_CHUNK);
114 0 : if(ogot!=ngot) return 0;
115 0 : for(i=0; i<ogot; i++)
116 0 : if(obuf[i]!=nbuf[i]) return 0;
117 0 : if(ogot<FULL_CHUNK) break;
118 : }
119 :
120 0 : return 1;
121 : }
122 :
123 : #define PART_CHUNK 1024
124 :
125 0 : static int get_part_cksum(struct file *f, struct fzp **fzp)
126 : {
127 : MD5_CTX md5;
128 0 : int got=0;
129 : static char buf[PART_CHUNK];
130 : unsigned char checksum[MD5_DIGEST_LENGTH+1];
131 :
132 0 : if(*fzp) fzp_seek(*fzp, 0, SEEK_SET);
133 0 : else if(!(*fzp=fzp_open(f->path, "rb")))
134 : {
135 0 : f->part_cksum=0;
136 0 : return 0;
137 : }
138 :
139 0 : if(!MD5_Init(&md5))
140 : {
141 0 : logp("MD5_Init() failed\n");
142 0 : return -1;
143 : }
144 :
145 0 : got=fzp_read(*fzp, buf, PART_CHUNK);
146 :
147 0 : if(!MD5_Update(&md5, buf, got))
148 : {
149 0 : logp("MD5_Update() failed\n");
150 0 : return -1;
151 : }
152 :
153 0 : if(!MD5_Final(checksum, &md5))
154 : {
155 0 : logp("MD5_Final() failed\n");
156 0 : return -1;
157 : }
158 :
159 0 : memcpy(&(f->part_cksum), checksum, sizeof(unsigned));
160 :
161 : // Try for a bit of efficiency - no need to calculate the full checksum
162 : // again if we already read the whole file.
163 0 : if(got<PART_CHUNK) f->full_cksum=f->part_cksum;
164 :
165 0 : return 0;
166 : }
167 :
168 0 : static int get_full_cksum(struct file *f, struct fzp **fzp)
169 : {
170 0 : size_t s=0;
171 : MD5_CTX md5;
172 : static char buf[FULL_CHUNK];
173 : unsigned char checksum[MD5_DIGEST_LENGTH+1];
174 :
175 0 : if(*fzp) fzp_seek(*fzp, 0, SEEK_SET);
176 0 : else if(!(*fzp=fzp_open(f->path, "rb")))
177 : {
178 0 : f->full_cksum=0;
179 0 : return 0;
180 : }
181 :
182 0 : if(!MD5_Init(&md5))
183 : {
184 0 : logp("MD5_Init() failed\n");
185 0 : return -1;
186 : }
187 :
188 0 : while((s=fzp_read(*fzp, buf, FULL_CHUNK))>0)
189 : {
190 0 : if(!MD5_Update(&md5, buf, s))
191 : {
192 0 : logp("MD5_Update() failed\n");
193 0 : return -1;
194 : }
195 0 : if(s<FULL_CHUNK) break;
196 : }
197 :
198 0 : if(!MD5_Final(checksum, &md5))
199 : {
200 0 : logp("MD5_Final() failed\n");
201 0 : return -1;
202 : }
203 :
204 0 : memcpy(&(f->full_cksum), checksum, sizeof(unsigned));
205 :
206 0 : return 0;
207 : }
208 :
209 : /* Make it atomic by linking to a temporary file, then moving it into place. */
210 0 : static int do_hardlink(struct file *o, struct file *n, const char *ext)
211 : {
212 0 : int ret=-1;
213 0 : char *tmppath=NULL;
214 0 : if(!(tmppath=prepend(o->path, ext)))
215 : {
216 0 : log_out_of_memory(__func__);
217 0 : goto end;
218 : }
219 0 : if(link(n->path, tmppath))
220 : {
221 : logp("Could not hardlink %s to %s: %s\n", tmppath, n->path,
222 0 : strerror(errno));
223 0 : goto end;
224 : }
225 0 : if((ret=do_rename(tmppath, o->path)))
226 0 : goto end;
227 0 : ret=0;
228 : end:
229 0 : free_w(&tmppath);
230 0 : return ret;
231 : }
232 :
233 0 : static void reset_old_file(struct file *oldfile, struct file *newfile,
234 : struct stat *info)
235 : {
236 : //printf("reset %s with %s %d\n", oldfile->path, newfile->path,
237 : // info->st_nlink);
238 0 : oldfile->nlink=info->st_nlink;
239 0 : free_w(&oldfile->path);
240 0 : oldfile->path=newfile->path;
241 0 : newfile->path=NULL;
242 0 : }
243 :
244 0 : static int check_files(struct mystruct *find, struct file *newfile,
245 : struct stat *info, const char *ext, unsigned int maxlinks)
246 : {
247 0 : int found=0;
248 0 : struct fzp *nfp=NULL;
249 0 : struct fzp *ofp=NULL;
250 0 : struct file *f=NULL;
251 :
252 0 : for(f=find->files; f; f=f->next)
253 : {
254 : //printf(" against: '%s'\n", f->path);
255 0 : if(!f->path)
256 : {
257 : // If the full_match() function fails to open oldfile
258 : // (which could happen if burp deleted some old
259 : // directories), it will free path and set it to NULL.
260 : // Skip entries like this.
261 0 : continue;
262 : }
263 0 : if(newfile->dev!=f->dev)
264 : {
265 : // Different device.
266 0 : continue;
267 : }
268 0 : if(newfile->ino==f->ino)
269 : {
270 : // Same device, same inode, therefore these two files
271 : // are hardlinked to each other already.
272 0 : found++;
273 0 : break;
274 : }
275 0 : if((!newfile->part_cksum && get_part_cksum(newfile, &nfp))
276 0 : || (!f->part_cksum && get_part_cksum(f, &ofp)))
277 : {
278 : // Some error with md5sums Give up.
279 0 : return -1;
280 : }
281 0 : if(newfile->part_cksum!=f->part_cksum)
282 : {
283 0 : fzp_close(&ofp);
284 0 : continue;
285 : }
286 : //printf(" %s, %s\n", find->files->path, newfile->path);
287 : //printf(" part cksum matched\n");
288 :
289 0 : if((!newfile->full_cksum && get_full_cksum(newfile, &nfp))
290 0 : || (!f->full_cksum && get_full_cksum(f, &ofp)))
291 : {
292 : // Some error with md5sums Give up.
293 0 : return -1;
294 : }
295 0 : if(newfile->full_cksum!=f->full_cksum)
296 : {
297 0 : fzp_close(&ofp);
298 0 : continue;
299 : }
300 :
301 : //printf(" full cksum matched\n");
302 0 : if(!full_match(newfile, f, &nfp, &ofp))
303 : {
304 0 : fzp_close(&ofp);
305 0 : continue;
306 : }
307 : //printf(" full match\n");
308 : //printf("%s, %s\n", find->files->path, newfile->path);
309 :
310 : // If there are already enough links to this file, replace
311 : // our memory of it with the new file so that files later on
312 : // can link to the new one.
313 0 : if(f->nlink>=maxlinks)
314 : {
315 : // Just need to reset the path name and the number
316 : // of links, and pretend that it was found otherwise
317 : // NULL newfile will get added to the memory.
318 0 : reset_old_file(f, newfile, info);
319 0 : found++;
320 0 : break;
321 : }
322 :
323 0 : found++;
324 0 : count++;
325 :
326 0 : if(verbose) printf("%s\n", newfile->path);
327 :
328 : // Now hardlink it.
329 0 : if(makelinks)
330 : {
331 0 : switch(do_hardlink(newfile, f, ext))
332 : {
333 : case 0:
334 0 : f->nlink++;
335 : // Only count bytes as saved if we
336 : // removed the last link.
337 0 : if(newfile->nlink==1)
338 0 : savedbytes+=info->st_size;
339 0 : break;
340 : case -1:
341 : // On error, replace the memory of the
342 : // old file with the one that we just
343 : // found. It might work better when
344 : // someone later tries to link to the
345 : // new one instead of the old one.
346 0 : reset_old_file(f, newfile, info);
347 0 : count--;
348 0 : break;
349 : default:
350 : // Abandon all hope.
351 : // This could happen if renaming the
352 : // hardlink failed in such a way that
353 : // the target file was unlinked without
354 : // being replaced - ie, if the max
355 : // number of hardlinks is being hit.
356 0 : return -1;
357 : }
358 : }
359 0 : else if(deletedups)
360 : {
361 0 : if(unlink(newfile->path))
362 : {
363 : logp("Could not delete %s: %s\n",
364 0 : newfile->path, strerror(errno));
365 : }
366 : else
367 : {
368 : // Only count bytes as saved if we removed the
369 : // last link.
370 0 : if(newfile->nlink==1)
371 0 : savedbytes+=info->st_size;
372 : }
373 : }
374 : else
375 : {
376 : // To be able to tell how many bytes
377 : // are saveable.
378 0 : savedbytes+=info->st_size;
379 : }
380 :
381 0 : break;
382 : }
383 0 : fzp_close(&nfp);
384 0 : fzp_close(&ofp);
385 :
386 0 : if(found)
387 : {
388 0 : free_w(&newfile->path);
389 0 : return 0;
390 : }
391 :
392 0 : if(add_file(find, newfile)) return -1;
393 :
394 0 : return 0;
395 : }
396 :
397 0 : static int looks_like_protocol1(const char *basedir)
398 : {
399 0 : int ret=-1;
400 0 : char *tmp=NULL;
401 : struct stat statp;
402 0 : if(!(tmp=prepend_s(basedir, "current")))
403 : {
404 0 : log_out_of_memory(__func__);
405 0 : goto end;
406 : }
407 : // If there is a 'current' symlink here, we think it looks like a
408 : // protocol 1 backup.
409 0 : if(!lstat(tmp, &statp) && S_ISLNK(statp.st_mode))
410 : {
411 0 : ret=1;
412 0 : goto end;
413 : }
414 0 : ret=0;
415 : end:
416 0 : free_w(&tmp);
417 0 : return ret;
418 : }
419 :
420 0 : static int get_link(const char *basedir, const char *lnk, char real[], size_t r)
421 : {
422 0 : int len=0;
423 0 : char *tmp=NULL;
424 0 : if(!(tmp=prepend_s(basedir, lnk)))
425 : {
426 0 : log_out_of_memory(__func__);
427 0 : return -1;
428 : }
429 0 : if((len=readlink(tmp, real, r-1))<0) len=0;
430 0 : real[len]='\0';
431 0 : free_w(&tmp);
432 : // Strip any trailing slash.
433 0 : if(real[strlen(real)-1]=='/') real[strlen(real)-1]='\0';
434 0 : return 0;
435 : }
436 :
437 0 : static int level_exclusion(int level, const char *fname,
438 : const char *working, const char *finishing)
439 : {
440 0 : if(level==0)
441 : {
442 : /* Be careful not to try to dedup the lockfiles.
443 : The lock actually gets lost if you open one to do a
444 : checksum
445 : and then close it. This caused me major headaches to
446 : figure out. */
447 0 : if(!strcmp(fname, LOCKFILE_NAME)
448 0 : || !strcmp(fname, BEDUP_LOCKFILE_NAME))
449 0 : return 1;
450 :
451 : /* Skip places where backups are going on. */
452 0 : if(!strcmp(fname, working)
453 0 : || !strcmp(fname, finishing))
454 0 : return 1;
455 :
456 0 : if(!strcmp(fname, "deleteme"))
457 0 : return 1;
458 : }
459 0 : else if(level==1)
460 : {
461 : // Do not dedup stuff that might be appended to later.
462 0 : if(!strncmp(fname, "log", strlen("log"))
463 0 : || !strncmp(fname, "verifylog", strlen("verifylog"))
464 0 : || !strncmp(fname, "restorelog", strlen("restorelog")))
465 0 : return 1;
466 : }
467 0 : return 0;
468 : }
469 :
470 : // Return 0 for directory processed, -1 for error, 1 for not processed.
471 0 : static int process_dir(const char *oldpath, const char *newpath,
472 : const char *ext, unsigned int maxlinks, int burp_mode, int level)
473 : {
474 0 : int ret=-1;
475 0 : DIR *dirp=NULL;
476 0 : char *path=NULL;
477 : struct stat info;
478 0 : struct dirent *dirinfo=NULL;
479 : struct file newfile;
480 0 : struct mystruct *find=NULL;
481 : static char working[256]="";
482 : static char finishing[256]="";
483 :
484 0 : newfile.path=NULL;
485 :
486 0 : if(!(path=prepend_s(oldpath, newpath))) goto end;
487 :
488 0 : if(burp_mode && level==0)
489 : {
490 0 : if(get_link(path, "working", working, sizeof(working))
491 0 : || get_link(path, "finishing", finishing, sizeof(finishing)))
492 0 : goto end;
493 0 : if(!looks_like_protocol1(path))
494 : {
495 0 : logp("%s does not look like a protocol 1 storage directory - skipping\n", path);
496 0 : ret=1;
497 0 : goto end;
498 : }
499 : }
500 :
501 0 : if(!(dirp=opendir(path)))
502 : {
503 0 : logp("Could not opendir '%s': %s\n", path, strerror(errno));
504 0 : ret=1;
505 0 : goto end;
506 : }
507 0 : while((dirinfo=readdir(dirp)))
508 : {
509 0 : if(!strcmp(dirinfo->d_name, ".")
510 0 : || !strcmp(dirinfo->d_name, ".."))
511 0 : continue;
512 :
513 : //printf("try %s\n", dirinfo->d_name);
514 :
515 0 : if(burp_mode
516 0 : && level_exclusion(level, dirinfo->d_name,
517 0 : working, finishing))
518 0 : continue;
519 :
520 0 : free_w(&newfile.path);
521 0 : if(!(newfile.path=prepend_s(path, dirinfo->d_name)))
522 0 : goto end;
523 :
524 0 : if(lstat(newfile.path, &info))
525 0 : continue;
526 :
527 0 : if(S_ISDIR(info.st_mode))
528 : {
529 0 : if(process_dir(path, dirinfo->d_name, ext, maxlinks, burp_mode, level+1))
530 0 : goto end;
531 0 : continue;
532 : }
533 0 : else if(!S_ISREG(info.st_mode)
534 0 : || !info.st_size) // ignore zero-length files
535 0 : continue;
536 :
537 0 : newfile.dev=info.st_dev;
538 0 : newfile.ino=info.st_ino;
539 0 : newfile.nlink=info.st_nlink;
540 0 : newfile.full_cksum=0;
541 0 : newfile.part_cksum=0;
542 0 : newfile.next=NULL;
543 :
544 0 : if((find=find_key(info.st_size)))
545 : {
546 : //printf("check %d: %s\n", info.st_size, newfile.path);
547 0 : if(check_files(find, &newfile, &info, ext, maxlinks))
548 0 : goto end;
549 : }
550 : else
551 : {
552 : //printf("add: %s\n", newfile.path);
553 0 : if(add_key(info.st_size, &newfile))
554 0 : goto end;
555 : }
556 : }
557 0 : ret=0;
558 : end:
559 0 : closedir(dirp);
560 0 : free_w(&newfile.path);
561 0 : free_w(&path);
562 0 : return ret;
563 : }
564 :
565 0 : static void sighandler(int signum)
566 : {
567 0 : locks_release_and_free(&locklist);
568 0 : exit(1);
569 : }
570 :
571 0 : static int is_regular_file(const char *clientconfdir, const char *file)
572 : {
573 : struct stat statp;
574 0 : char *fullpath=NULL;
575 0 : if(!(fullpath=prepend_s(clientconfdir, file)))
576 0 : return 0;
577 0 : if(lstat(fullpath, &statp))
578 : {
579 0 : free_w(&fullpath);
580 0 : return 0;
581 : }
582 0 : free_w(&fullpath);
583 0 : return S_ISREG(statp.st_mode);
584 : }
585 :
586 0 : static int in_group(struct strlist *grouplist, const char *dedup_group)
587 : {
588 : struct strlist *g;
589 :
590 0 : for(g=grouplist; g; g=g->next)
591 0 : if(!strcmp(g->path, dedup_group)) return 1;
592 :
593 0 : return 0;
594 : }
595 :
596 0 : static int iterate_over_clients(struct conf **globalcs,
597 : struct strlist *grouplist, const char *ext, unsigned int maxlinks)
598 : {
599 0 : int ret=0;
600 0 : DIR *dirp=NULL;
601 0 : struct conf **cconfs=NULL;
602 0 : struct dirent *dirinfo=NULL;
603 0 : const char *globalclientconfdir=get_string(globalcs[OPT_CLIENTCONFDIR]);
604 :
605 0 : signal(SIGABRT, &sighandler);
606 0 : signal(SIGTERM, &sighandler);
607 0 : signal(SIGINT, &sighandler);
608 :
609 0 : if(!(cconfs=confs_alloc())) return -1;
610 0 : if(confs_init(cconfs)) return -1;
611 :
612 0 : if(!(dirp=opendir(globalclientconfdir)))
613 : {
614 : logp("Could not opendir '%s': %s\n",
615 0 : globalclientconfdir, strerror(errno));
616 0 : return 0;
617 : }
618 0 : while((dirinfo=readdir(dirp)))
619 : {
620 0 : char *lockfile=NULL;
621 0 : char *lockfilebase=NULL;
622 0 : char *client_lockdir=NULL;
623 0 : struct lock *lock=NULL;
624 :
625 0 : if(dirinfo->d_ino==0
626 : // looks_like...() also avoids '.' and '..'.
627 0 : || looks_like_tmp_or_hidden_file(dirinfo->d_name)
628 0 : || !is_regular_file(globalclientconfdir, dirinfo->d_name))
629 0 : continue;
630 :
631 0 : confs_free_content(cconfs);
632 0 : if(confs_init(cconfs)) return -1;
633 :
634 0 : if(set_string(cconfs[OPT_CNAME], dirinfo->d_name))
635 0 : return -1;
636 :
637 0 : if(conf_load_clientconfdir(globalcs, cconfs))
638 : {
639 : logp("could not load config for client %s\n",
640 0 : dirinfo->d_name);
641 0 : return 0;
642 : }
643 :
644 0 : if(grouplist)
645 : {
646 : const char *dedup_group=
647 0 : get_string(cconfs[OPT_DEDUP_GROUP]);
648 0 : if(!dedup_group
649 0 : || !in_group(grouplist, dedup_group))
650 0 : continue;
651 : }
652 :
653 0 : if(!(client_lockdir=get_string(cconfs[OPT_CLIENT_LOCKDIR])))
654 0 : client_lockdir=get_string(cconfs[OPT_DIRECTORY]);
655 :
656 0 : if(!(lockfilebase=prepend_s(client_lockdir, dirinfo->d_name))
657 0 : || !(lockfile=prepend_s(lockfilebase, BEDUP_LOCKFILE_NAME)))
658 : {
659 0 : free_w(&lockfilebase);
660 0 : free_w(&lockfile);
661 0 : ret=-1;
662 0 : break;
663 : }
664 0 : free_w(&lockfilebase);
665 :
666 0 : if(!(lock=lock_alloc_and_init(lockfile)))
667 : {
668 0 : ret=-1;
669 0 : break;
670 : }
671 0 : lock_get(lock);
672 0 : free_w(&lockfile);
673 :
674 0 : if(lock->status!=GET_LOCK_GOT)
675 : {
676 0 : logp("Could not get %s\n", lock->path);
677 0 : continue;
678 : }
679 0 : logp("Got %s\n", lock->path);
680 :
681 : // Remember that we got that lock.
682 0 : lock_add_to_list(&locklist, lock);
683 :
684 0 : switch(process_dir(get_string(cconfs[OPT_DIRECTORY]),
685 : dirinfo->d_name,
686 0 : ext, maxlinks, 1 /* burp mode */, 0 /* level */))
687 : {
688 0 : case 0: ccount++;
689 0 : case 1: continue;
690 0 : default: ret=-1; break;
691 : }
692 0 : break;
693 : }
694 0 : closedir(dirp);
695 :
696 0 : locks_release_and_free(&locklist);
697 :
698 0 : confs_free(&cconfs);
699 :
700 0 : return ret;
701 : }
702 :
703 0 : static char *get_config_path(void)
704 : {
705 : static char path[256]="";
706 0 : snprintf(path, sizeof(path), "%s", SYSCONFDIR "/burp.conf");
707 0 : return path;
708 : }
709 :
710 0 : static int usage(void)
711 : {
712 0 : printf("\nUsage: %s [options]\n", prog);
713 0 : printf("\n");
714 0 : printf(" Options:\n");
715 0 : printf(" -c <path> Path to config file (default: %s).\n", get_config_path());
716 0 : printf(" -g <list of group names> Only run on the directories of clients that\n");
717 0 : printf(" are in one of the groups specified.\n");
718 0 : printf(" The list is comma-separated. To put a client in a\n");
719 0 : printf(" group, use the 'dedup_group' option in the client\n");
720 0 : printf(" configuration file on the server.\n");
721 0 : printf(" -h|-? Print this text and exit.\n");
722 0 : printf(" -d Delete any duplicate files found.\n");
723 0 : printf(" (non-burp mode only)\n");
724 0 : printf(" -l Hard link any duplicate files found.\n");
725 0 : printf(" -m <number> Maximum number of hard links to a single file.\n");
726 0 : printf(" (non-burp mode only - in burp mode, use the\n");
727 0 : printf(" max_hardlinks option in the configuration file)\n");
728 0 : printf(" The default is %d. On ext3, the maximum number\n", DEF_MAX_LINKS);
729 0 : printf(" of links possible is 32000, but space is needed\n");
730 0 : printf(" for the normal operation of burp.\n");
731 0 : printf(" -n <list of directories> Non-burp mode. Deduplicate any (set of) directories.\n");
732 0 : printf(" -v Print duplicate paths.\n");
733 0 : printf(" -V Print version and exit.\n");
734 0 : printf("\n");
735 0 : printf("By default, %s will read %s and deduplicate client storage\n", prog, get_config_path());
736 0 : printf("directories using special knowledge of the structure.\n");
737 0 : printf("\n");
738 0 : printf("With '-n', this knowledge is turned off and you have to specify the directories\n");
739 0 : printf("to deduplicate on the command line. Running with '-n' is therefore dangerous\n");
740 0 : printf("if you are deduplicating burp storage directories.\n\n");
741 0 : return 1;
742 : }
743 :
744 0 : int run_bedup(int argc, char *argv[])
745 : {
746 0 : int i=1;
747 0 : int ret=0;
748 0 : int option=0;
749 0 : int nonburp=0;
750 0 : unsigned int maxlinks=DEF_MAX_LINKS;
751 0 : char *groups=NULL;
752 0 : char ext[16]="";
753 0 : int givenconfigfile=0;
754 0 : const char *configfile=NULL;
755 :
756 0 : configfile=get_config_path();
757 0 : snprintf(ext, sizeof(ext), ".bedup.%d", getpid());
758 :
759 0 : while((option=getopt(argc, argv, "c:dg:hlm:nvV?"))!=-1)
760 : {
761 0 : switch(option)
762 : {
763 : case 'c':
764 0 : configfile=optarg;
765 0 : givenconfigfile=1;
766 0 : break;
767 : case 'd':
768 0 : deletedups=1;
769 0 : break;
770 : case 'g':
771 0 : groups=optarg;
772 0 : break;
773 : case 'l':
774 0 : makelinks=1;
775 0 : break;
776 : case 'm':
777 0 : maxlinks=atoi(optarg);
778 0 : break;
779 : case 'n':
780 0 : nonburp=1;
781 0 : break;
782 : case 'V':
783 0 : printf("%s-%s\n", prog, VERSION);
784 0 : return 0;
785 : case 'v':
786 0 : verbose=1;
787 0 : break;
788 : case 'h':
789 : case '?':
790 0 : return usage();
791 : }
792 : }
793 :
794 0 : if(nonburp && givenconfigfile)
795 : {
796 0 : logp("-n and -c options are mutually exclusive\n");
797 0 : return 1;
798 : }
799 0 : if(nonburp && groups)
800 : {
801 0 : logp("-n and -g options are mutually exclusive\n");
802 0 : return 1;
803 : }
804 0 : if(!nonburp && maxlinks!=DEF_MAX_LINKS)
805 : {
806 0 : logp("-m option is specified via the configuration file in burp mode (max_hardlinks=)\n");
807 0 : return 1;
808 : }
809 0 : if(deletedups && makelinks)
810 : {
811 0 : logp("-d and -l options are mutually exclusive\n");
812 0 : return 1;
813 : }
814 0 : if(deletedups && !nonburp)
815 : {
816 0 : logp("-d option requires -n option\n");
817 0 : return 1;
818 : }
819 :
820 0 : if(optind>=argc)
821 : {
822 0 : if(nonburp)
823 : {
824 0 : logp("No directories found after options\n");
825 0 : return 1;
826 : }
827 : }
828 : else
829 : {
830 0 : if(!nonburp)
831 : {
832 0 : logp("Do not specify extra arguments.\n");
833 0 : return 1;
834 : }
835 : }
836 :
837 0 : if(maxlinks<2)
838 : {
839 0 : logp("The argument to -m needs to be greater than 1.\n");
840 0 : return 1;
841 : }
842 :
843 0 : if(nonburp)
844 : {
845 : // Read directories from command line.
846 0 : for(i=optind; i<argc; i++)
847 : {
848 : // Strip trailing slashes, for tidiness.
849 0 : if(argv[i][strlen(argv[i])-1]=='/')
850 0 : argv[i][strlen(argv[i])-1]='\0';
851 0 : if(process_dir("", argv[i], ext, maxlinks,
852 0 : 0 /* not burp mode */, 0 /* level */))
853 : {
854 0 : ret=1;
855 0 : break;
856 : }
857 : }
858 : }
859 : else
860 : {
861 0 : struct conf **globalcs=NULL;
862 0 : struct strlist *grouplist=NULL;
863 0 : struct lock *globallock=NULL;
864 :
865 0 : if(groups)
866 : {
867 0 : char *tok=NULL;
868 0 : if((tok=strtok(groups, ",\n")))
869 : {
870 0 : do
871 : {
872 0 : if(strlist_add(&grouplist, tok, 1))
873 : {
874 0 : log_out_of_memory(__func__);
875 0 : return -1;
876 : }
877 : } while((tok=strtok(NULL, ",\n")));
878 : }
879 0 : if(!grouplist)
880 : {
881 0 : logp("unable to read list of groups\n");
882 0 : return -1;
883 : }
884 : }
885 :
886 : // Read directories from config files, and get locks.
887 0 : if(!(globalcs=confs_alloc())) return -1;
888 0 : if(confs_init(globalcs)) return -1;
889 0 : if(conf_load_global_only(configfile, globalcs)) return 1;
890 0 : if(get_e_burp_mode(globalcs[OPT_BURP_MODE])!=BURP_MODE_SERVER)
891 : {
892 0 : logp("%s is not a server config file\n", configfile);
893 0 : confs_free(&globalcs);
894 0 : return 1;
895 : }
896 : logp("Dedup clients from %s\n",
897 0 : get_string(globalcs[OPT_CLIENTCONFDIR]));
898 0 : maxlinks=get_int(globalcs[OPT_MAX_HARDLINKS]);
899 0 : if(grouplist)
900 : {
901 0 : struct strlist *g=NULL;
902 0 : logp("in dedup groups:\n");
903 0 : for(g=grouplist; g; g=g->next)
904 0 : logp("%s\n", g->path);
905 : }
906 : else
907 : {
908 0 : char *lockpath=NULL;
909 0 : const char *opt_lockfile=confs_get_lockfile(globalcs);
910 : // Only get the global lock when doing a global run.
911 : // If you are doing individual groups, you are likely
912 : // to want to do many different dedup jobs and a
913 : // global lock would get in the way.
914 0 : if(!(lockpath=prepend(opt_lockfile, ".bedup"))
915 0 : || !(globallock=lock_alloc_and_init(lockpath)))
916 0 : return 1;
917 0 : lock_get(globallock);
918 0 : if(globallock->status!=GET_LOCK_GOT)
919 : {
920 : logp("Could not get lock %s (%d)\n", lockpath,
921 0 : globallock->status);
922 0 : free_w(&lockpath);
923 0 : return 1;
924 : }
925 0 : logp("Got %s\n", lockpath);
926 : }
927 0 : ret=iterate_over_clients(globalcs, grouplist, ext, maxlinks);
928 0 : confs_free(&globalcs);
929 :
930 0 : lock_release(globallock);
931 0 : lock_free(&globallock);
932 0 : strlists_free(&grouplist);
933 : }
934 :
935 0 : if(!nonburp)
936 : {
937 0 : logp("%d client storages scanned\n", ccount);
938 : }
939 : logp("%llu duplicate %s found\n",
940 0 : count, count==1?"file":"files");
941 : logp("%llu bytes %s%s\n",
942 0 : savedbytes, (makelinks || deletedups)?"saved":"saveable",
943 0 : bytes_to_human(savedbytes));
944 0 : return ret;
945 : }
|