Line data Source code
1 : #include "../../burp.h"
2 : #include "../../alloc.h"
3 : #include "../../conf.h"
4 : #include "../../conffile.h"
5 : #include "../../handy.h"
6 : #include "../../fsops.h"
7 : #include "../../fzp.h"
8 : #include "../../lock.h"
9 : #include "../../log.h"
10 : #include "../../prepend.h"
11 : #include "../../strlist.h"
12 : #include "bedup.h"
13 :
14 : #include <uthash.h>
15 :
16 : #define LOCKFILE_NAME "lockfile"
17 : #define BEDUP_LOCKFILE_NAME "lockfile.bedup"
18 :
19 : #define DEF_MAX_LINKS 10000
20 :
21 : static int makelinks=0;
22 : static int deletedups=0;
23 :
24 : static uint64_t savedbytes=0;
25 : static uint64_t count=0;
26 : static int ccount=0;
27 :
28 : static struct lock *locklist=NULL;
29 :
30 : static int verbose=0;
31 :
32 : typedef struct file file_t;
33 :
34 : struct file
35 : {
36 : char *path;
37 : dev_t dev;
38 : ino_t ino;
39 : nlink_t nlink;
40 : uint64_t full_cksum;
41 : uint64_t part_cksum;
42 : file_t *next;
43 : };
44 :
45 : struct mystruct
46 : {
47 : off_t st_size;
48 : file_t *files;
49 : UT_hash_handle hh;
50 : };
51 :
52 : struct mystruct *myfiles=NULL;
53 :
54 4 : static struct mystruct *find_key(off_t st_size)
55 : {
56 : struct mystruct *s;
57 :
58 4 : HASH_FIND_INT(myfiles, &st_size, s);
59 4 : return s;
60 : }
61 :
62 2 : static int add_file(struct mystruct *s, struct file *f)
63 : {
64 : struct file *newfile;
65 2 : if(!(newfile=(struct file *)malloc_w(sizeof(struct file), __func__)))
66 : return -1;
67 : memcpy(newfile, f, sizeof(struct file));
68 2 : f->path=NULL;
69 2 : newfile->next=s->files;
70 2 : s->files=newfile;
71 : return 0;
72 : }
73 :
74 2 : static int add_key(off_t st_size, struct file *f)
75 : {
76 : struct mystruct *s;
77 :
78 2 : if(!(s=(struct mystruct *)malloc_w(sizeof(struct mystruct), __func__)))
79 : return -1;
80 2 : s->st_size=st_size;
81 2 : s->files=NULL;
82 2 : if(add_file(s, f)) return -1;
83 : //printf("HASH ADD %d\n", st_size);
84 2 : HASH_ADD_INT(myfiles, st_size, s);
85 : return 0;
86 : }
87 :
88 : static void file_free_content(struct file *file)
89 : {
90 2 : if(!file) return;
91 2 : free_w(&file->path);
92 : }
93 :
94 2 : static void file_free(struct file **file)
95 : {
96 4 : if(!file || !*file) return;
97 2 : file_free_content(*file);
98 2 : free_v((void **)file);
99 : }
100 :
101 2 : static void files_free(struct file **files)
102 : {
103 : struct file *f;
104 : struct file *fhead;
105 2 : if(!files || !*files) return;
106 : fhead=*files;
107 4 : while(fhead)
108 : {
109 2 : f=fhead;
110 2 : fhead=fhead->next;
111 2 : file_free(&f);
112 : }
113 : }
114 :
115 : static void mystruct_free_content(struct mystruct *mystruct)
116 : {
117 2 : if(!mystruct) return;
118 2 : files_free(&mystruct->files);
119 : }
120 :
121 2 : static void mystruct_free(struct mystruct **mystruct)
122 : {
123 4 : if(!mystruct || !*mystruct) return;
124 2 : mystruct_free_content(*mystruct);
125 2 : free_v((void **)mystruct);
126 : }
127 :
128 2 : static void mystruct_delete_all(void)
129 : {
130 : struct mystruct *tmp;
131 : struct mystruct *mystruct;
132 :
133 4 : HASH_ITER(hh, myfiles, mystruct, tmp)
134 : {
135 2 : HASH_DEL(myfiles, mystruct);
136 2 : mystruct_free(&mystruct);
137 : }
138 2 : myfiles=NULL;
139 2 : }
140 :
141 : #define FULL_CHUNK 4096
142 :
143 2 : static int full_match(struct file *o, struct file *n,
144 : struct fzp **ofp, struct fzp **nfp)
145 : {
146 : size_t ogot;
147 : size_t ngot;
148 2 : unsigned int i=0;
149 : static char obuf[FULL_CHUNK];
150 : static char nbuf[FULL_CHUNK];
151 :
152 2 : if(*ofp) fzp_seek(*ofp, 0, SEEK_SET);
153 0 : else if(!(*ofp=fzp_open(o->path, "rb")))
154 : {
155 : // Blank this entry so that it can be ignored from
156 : // now on.
157 0 : free_w(&o->path);
158 : return 0;
159 : }
160 :
161 2 : if(*nfp) fzp_seek(*nfp, 0, SEEK_SET);
162 0 : else if(!(*nfp=fzp_open(n->path, "rb"))) return 0;
163 :
164 : while(1)
165 : {
166 2 : ogot=fzp_read(*ofp, obuf, FULL_CHUNK);
167 2 : ngot=fzp_read(*nfp, nbuf, FULL_CHUNK);
168 2 : if(ogot!=ngot) return 0;
169 20 : for(i=0; i<ogot; i++)
170 20 : if(obuf[i]!=nbuf[i]) return 0;
171 2 : if(ogot<FULL_CHUNK) break;
172 : }
173 :
174 : return 1;
175 : }
176 :
177 : #define PART_CHUNK 1024
178 :
179 4 : static int get_part_cksum(struct file *f, struct fzp **fzp)
180 : {
181 : MD5_CTX md5;
182 4 : int got=0;
183 : static char buf[PART_CHUNK];
184 : unsigned char checksum[MD5_DIGEST_LENGTH+1];
185 :
186 4 : if(*fzp) fzp_seek(*fzp, 0, SEEK_SET);
187 4 : else if(!(*fzp=fzp_open(f->path, "rb")))
188 : {
189 0 : f->part_cksum=0;
190 0 : return 0;
191 : }
192 :
193 4 : if(!MD5_Init(&md5))
194 : {
195 0 : logp("MD5_Init() failed\n");
196 0 : return -1;
197 : }
198 :
199 4 : got=fzp_read(*fzp, buf, PART_CHUNK);
200 :
201 4 : if(!MD5_Update(&md5, buf, got))
202 : {
203 0 : logp("MD5_Update() failed\n");
204 0 : return -1;
205 : }
206 :
207 4 : if(!MD5_Final(checksum, &md5))
208 : {
209 0 : logp("MD5_Final() failed\n");
210 0 : return -1;
211 : }
212 :
213 4 : memcpy(&(f->part_cksum), checksum, sizeof(unsigned));
214 :
215 : // Try for a bit of efficiency - no need to calculate the full checksum
216 : // again if we already read the whole file.
217 4 : if(got<PART_CHUNK) f->full_cksum=f->part_cksum;
218 :
219 : return 0;
220 : }
221 :
222 0 : static int get_full_cksum(struct file *f, struct fzp **fzp)
223 : {
224 0 : size_t s=0;
225 : MD5_CTX md5;
226 : static char buf[FULL_CHUNK];
227 : unsigned char checksum[MD5_DIGEST_LENGTH+1];
228 :
229 0 : if(*fzp) fzp_seek(*fzp, 0, SEEK_SET);
230 0 : else if(!(*fzp=fzp_open(f->path, "rb")))
231 : {
232 0 : f->full_cksum=0;
233 0 : return 0;
234 : }
235 :
236 0 : if(!MD5_Init(&md5))
237 : {
238 0 : logp("MD5_Init() failed\n");
239 0 : return -1;
240 : }
241 :
242 0 : while((s=fzp_read(*fzp, buf, FULL_CHUNK))>0)
243 : {
244 0 : if(!MD5_Update(&md5, buf, s))
245 : {
246 0 : logp("MD5_Update() failed\n");
247 0 : return -1;
248 : }
249 0 : if(s<FULL_CHUNK) break;
250 : }
251 :
252 0 : if(!MD5_Final(checksum, &md5))
253 : {
254 0 : logp("MD5_Final() failed\n");
255 0 : return -1;
256 : }
257 :
258 0 : memcpy(&(f->full_cksum), checksum, sizeof(unsigned));
259 :
260 0 : return 0;
261 : }
262 :
263 : /* Make it atomic by linking to a temporary file, then moving it into place. */
264 1 : static int do_hardlink(struct file *o, struct file *n, const char *ext)
265 : {
266 1 : int ret=-1;
267 1 : char *tmppath=NULL;
268 1 : if(!(tmppath=prepend(o->path, ext)))
269 : {
270 0 : log_out_of_memory(__func__);
271 : goto end;
272 : }
273 1 : if(link(n->path, tmppath))
274 : {
275 0 : logp("Could not hardlink %s to %s: %s\n", tmppath, n->path,
276 0 : strerror(errno));
277 : goto end;
278 : }
279 1 : if((ret=do_rename(tmppath, o->path)))
280 : goto end;
281 1 : ret=0;
282 : end:
283 1 : free_w(&tmppath);
284 1 : return ret;
285 : }
286 :
287 : static void reset_old_file(struct file *oldfile, struct file *newfile,
288 : struct stat *info)
289 : {
290 : //printf("reset %s with %s %d\n", oldfile->path, newfile->path,
291 : // info->st_nlink);
292 0 : oldfile->nlink=info->st_nlink;
293 0 : free_w(&oldfile->path);
294 0 : oldfile->path=newfile->path;
295 0 : newfile->path=NULL;
296 : }
297 :
298 2 : static int check_files(struct mystruct *find, struct file *newfile,
299 0 : struct stat *info, const char *ext, unsigned int maxlinks)
300 : {
301 2 : int found=0;
302 2 : struct fzp *nfp=NULL;
303 2 : struct fzp *ofp=NULL;
304 2 : struct file *f=NULL;
305 :
306 2 : for(f=find->files; f; f=f->next)
307 : {
308 : //printf(" against: '%s'\n", f->path);
309 2 : if(!f->path)
310 : {
311 : // If the full_match() function fails to open oldfile
312 : // (which could happen if burp deleted some old
313 : // directories), it will free path and set it to NULL.
314 : // Skip entries like this.
315 0 : continue;
316 : }
317 2 : if(newfile->dev!=f->dev)
318 : {
319 : // Different device.
320 0 : continue;
321 : }
322 2 : if(newfile->ino==f->ino)
323 : {
324 : // Same device, same inode, therefore these two files
325 : // are hardlinked to each other already.
326 : found++;
327 : break;
328 : }
329 2 : if((!newfile->part_cksum && get_part_cksum(newfile, &nfp))
330 2 : || (!f->part_cksum && get_part_cksum(f, &ofp)))
331 : {
332 : // Some error with md5sums Give up.
333 : return -1;
334 : }
335 2 : if(newfile->part_cksum!=f->part_cksum)
336 : {
337 0 : fzp_close(&ofp);
338 0 : continue;
339 : }
340 : //printf(" %s, %s\n", find->files->path, newfile->path);
341 : //printf(" part cksum matched\n");
342 :
343 2 : if((!newfile->full_cksum && get_full_cksum(newfile, &nfp))
344 2 : || (!f->full_cksum && get_full_cksum(f, &ofp)))
345 : {
346 : // Some error with md5sums Give up.
347 : return -1;
348 : }
349 2 : if(newfile->full_cksum!=f->full_cksum)
350 : {
351 0 : fzp_close(&ofp);
352 0 : continue;
353 : }
354 :
355 : //printf(" full cksum matched\n");
356 2 : if(!full_match(newfile, f, &nfp, &ofp))
357 : {
358 0 : fzp_close(&ofp);
359 0 : continue;
360 : }
361 : //printf(" full match\n");
362 : //printf("%s, %s\n", find->files->path, newfile->path);
363 :
364 : // If there are already enough links to this file, replace
365 : // our memory of it with the new file so that files later on
366 : // can link to the new one.
367 2 : if(f->nlink>=maxlinks)
368 : {
369 : // Just need to reset the path name and the number
370 : // of links, and pretend that it was found otherwise
371 : // NULL newfile will get added to the memory.
372 0 : reset_old_file(f, newfile, info);
373 0 : found++;
374 0 : break;
375 : }
376 :
377 2 : found++;
378 2 : count++;
379 :
380 2 : if(verbose) printf("%s\n", newfile->path);
381 :
382 : // Now hardlink it.
383 2 : if(makelinks)
384 : {
385 1 : switch(do_hardlink(newfile, f, ext))
386 : {
387 : case 0:
388 1 : f->nlink++;
389 : // Only count bytes as saved if we
390 : // removed the last link.
391 1 : if(newfile->nlink==1)
392 1 : savedbytes+=info->st_size;
393 : break;
394 : case -1:
395 : // On error, replace the memory of the
396 : // old file with the one that we just
397 : // found. It might work better when
398 : // someone later tries to link to the
399 : // new one instead of the old one.
400 0 : reset_old_file(f, newfile, info);
401 0 : count--;
402 0 : break;
403 : default:
404 : // Abandon all hope.
405 : // This could happen if renaming the
406 : // hardlink failed in such a way that
407 : // the target file was unlinked without
408 : // being replaced - ie, if the max
409 : // number of hardlinks is being hit.
410 : return -1;
411 : }
412 : }
413 1 : else if(deletedups)
414 : {
415 0 : if(unlink(newfile->path))
416 : {
417 0 : logp("Could not delete %s: %s\n",
418 0 : newfile->path, strerror(errno));
419 : }
420 : else
421 : {
422 : // Only count bytes as saved if we removed the
423 : // last link.
424 0 : if(newfile->nlink==1)
425 0 : savedbytes+=info->st_size;
426 : }
427 : }
428 : else
429 : {
430 : // To be able to tell how many bytes
431 : // are saveable.
432 1 : savedbytes+=info->st_size;
433 : }
434 :
435 : break;
436 : }
437 2 : fzp_close(&nfp);
438 2 : fzp_close(&ofp);
439 :
440 2 : if(found)
441 : {
442 2 : free_w(&newfile->path);
443 2 : return 0;
444 : }
445 :
446 0 : if(add_file(find, newfile)) return -1;
447 :
448 0 : return 0;
449 : }
450 :
451 0 : static int looks_like_protocol1(const char *basedir)
452 : {
453 0 : int ret=-1;
454 0 : char *tmp=NULL;
455 0 : if(!(tmp=prepend_s(basedir, "current")))
456 : {
457 0 : log_out_of_memory(__func__);
458 0 : goto end;
459 : }
460 : // If there is a 'current' symlink here, we think it looks like a
461 : // protocol 1 backup.
462 0 : if(is_lnk(tmp)>0)
463 : {
464 : ret=1;
465 : goto end;
466 : }
467 0 : ret=0;
468 : end:
469 0 : free_w(&tmp);
470 0 : return ret;
471 : }
472 :
473 0 : static int get_link(const char *basedir, const char *lnk, char real[], size_t r)
474 : {
475 0 : readlink_w_in_dir(basedir, lnk, real, r);
476 : // Strip any trailing slash.
477 0 : if(real[strlen(real)-1]=='/')
478 0 : real[strlen(real)-1]='\0';
479 0 : return 0;
480 : }
481 :
482 0 : static int level_exclusion(int level, const char *fname,
483 : const char *working, const char *finishing)
484 : {
485 0 : if(level==0)
486 : {
487 : /* Be careful not to try to dedup the lockfiles.
488 : The lock actually gets lost if you open one to do a
489 : checksum
490 : and then close it. This caused me major headaches to
491 : figure out. */
492 0 : if(!strcmp(fname, LOCKFILE_NAME)
493 0 : || !strcmp(fname, BEDUP_LOCKFILE_NAME))
494 : return 1;
495 :
496 : /* Skip places where backups are going on. */
497 0 : if(!strcmp(fname, working)
498 0 : || !strcmp(fname, finishing))
499 : return 1;
500 :
501 0 : if(!strcmp(fname, "deleteme"))
502 : return 1;
503 : }
504 0 : else if(level==1)
505 : {
506 : // Do not dedup stuff that might be appended to later.
507 0 : if(!strncmp(fname, "log", strlen("log"))
508 0 : || !strncmp(fname, "verifylog", strlen("verifylog"))
509 0 : || !strncmp(fname, "restorelog", strlen("restorelog")))
510 : return 1;
511 : }
512 0 : return 0;
513 : }
514 :
515 : // Return 0 for directory processed, -1 for error, 1 for not processed.
516 2 : static int process_dir(const char *oldpath, const char *newpath,
517 : const char *ext, unsigned int maxlinks, int burp_mode, int level)
518 : {
519 2 : int ret=-1;
520 2 : DIR *dirp=NULL;
521 2 : char *path=NULL;
522 : struct stat info;
523 2 : struct dirent *dirinfo=NULL;
524 : struct file newfile;
525 2 : struct mystruct *find=NULL;
526 : static char working[256]="";
527 : static char finishing[256]="";
528 :
529 2 : newfile.path=NULL;
530 :
531 2 : if(!(path=prepend_s(oldpath, newpath))) goto end;
532 :
533 2 : if(burp_mode && level==0)
534 : {
535 0 : if(get_link(path, "working", working, sizeof(working))
536 0 : || get_link(path, "finishing", finishing, sizeof(finishing)))
537 : goto end;
538 0 : if(!looks_like_protocol1(path))
539 : {
540 0 : logp("%s does not look like a protocol 1 storage directory - skipping\n", path);
541 0 : ret=1;
542 0 : goto end;
543 : }
544 : }
545 :
546 2 : if(!(dirp=opendir(path)))
547 : {
548 0 : logp("Could not opendir '%s': %s\n", path, strerror(errno));
549 0 : ret=1;
550 0 : goto end;
551 : }
552 10 : while((dirinfo=readdir(dirp)))
553 : {
554 8 : if(!strcmp(dirinfo->d_name, ".")
555 6 : || !strcmp(dirinfo->d_name, ".."))
556 4 : continue;
557 :
558 : //printf("try %s\n", dirinfo->d_name);
559 :
560 4 : if(burp_mode
561 0 : && level_exclusion(level, dirinfo->d_name,
562 : working, finishing))
563 0 : continue;
564 :
565 4 : free_w(&newfile.path);
566 4 : if(!(newfile.path=prepend_s(path, dirinfo->d_name)))
567 : goto end;
568 :
569 8 : if(lstat(newfile.path, &info))
570 0 : continue;
571 :
572 4 : if(S_ISDIR(info.st_mode))
573 : {
574 0 : if(process_dir(path, dirinfo->d_name, ext, maxlinks, burp_mode, level+1))
575 : goto end;
576 0 : continue;
577 : }
578 4 : else if(!S_ISREG(info.st_mode)
579 4 : || !info.st_size) // ignore zero-length files
580 0 : continue;
581 :
582 4 : newfile.dev=info.st_dev;
583 4 : newfile.ino=info.st_ino;
584 4 : newfile.nlink=info.st_nlink;
585 4 : newfile.full_cksum=0;
586 4 : newfile.part_cksum=0;
587 4 : newfile.next=NULL;
588 :
589 4 : if((find=find_key(info.st_size)))
590 : {
591 : //printf("check %d: %s\n", info.st_size, newfile.path);
592 2 : if(check_files(find, &newfile, &info, ext, maxlinks))
593 : goto end;
594 : }
595 : else
596 : {
597 : //printf("add: %s\n", newfile.path);
598 2 : if(add_key(info.st_size, &newfile))
599 : goto end;
600 : }
601 : }
602 : ret=0;
603 : end:
604 2 : closedir(dirp);
605 2 : free_w(&newfile.path);
606 2 : free_w(&path);
607 2 : return ret;
608 : }
609 :
610 0 : static void sighandler(__attribute__ ((unused)) int signum)
611 : {
612 0 : locks_release_and_free(&locklist);
613 0 : exit(1);
614 : }
615 :
616 0 : static int is_regular_file(const char *clientconfdir, const char *file)
617 : {
618 : struct stat statp;
619 0 : char *fullpath=NULL;
620 0 : if(!(fullpath=prepend_s(clientconfdir, file)))
621 : return 0;
622 0 : if(lstat(fullpath, &statp))
623 : {
624 0 : free_w(&fullpath);
625 0 : return 0;
626 : }
627 0 : free_w(&fullpath);
628 0 : return S_ISREG(statp.st_mode);
629 : }
630 :
631 0 : static int in_group(struct strlist *grouplist, const char *dedup_group)
632 : {
633 : struct strlist *g;
634 :
635 0 : for(g=grouplist; g; g=g->next)
636 0 : if(!strcmp(g->path, dedup_group)) return 1;
637 :
638 : return 0;
639 : }
640 :
641 0 : static int iterate_over_clients(struct conf **globalcs,
642 : struct strlist *grouplist, const char *ext, unsigned int maxlinks)
643 : {
644 0 : int ret=0;
645 0 : DIR *dirp=NULL;
646 0 : struct conf **cconfs=NULL;
647 0 : struct dirent *dirinfo=NULL;
648 0 : const char *globalclientconfdir=get_string(globalcs[OPT_CLIENTCONFDIR]);
649 :
650 0 : signal(SIGABRT, &sighandler);
651 0 : signal(SIGTERM, &sighandler);
652 0 : signal(SIGINT, &sighandler);
653 :
654 0 : if(!(cconfs=confs_alloc())) return -1;
655 0 : if(confs_init(cconfs)) return -1;
656 :
657 0 : if(!(dirp=opendir(globalclientconfdir)))
658 : {
659 0 : logp("Could not opendir '%s': %s\n",
660 0 : globalclientconfdir, strerror(errno));
661 0 : return 0;
662 : }
663 0 : while((dirinfo=readdir(dirp)))
664 : {
665 0 : char *lockfile=NULL;
666 0 : char *lockfilebase=NULL;
667 0 : char *client_lockdir=NULL;
668 0 : struct lock *lock=NULL;
669 :
670 0 : if(dirinfo->d_ino==0
671 : // looks_like...() also avoids '.' and '..'.
672 0 : || looks_like_tmp_or_hidden_file(dirinfo->d_name)
673 0 : || !is_regular_file(globalclientconfdir, dirinfo->d_name))
674 0 : continue;
675 :
676 0 : confs_free_content(cconfs);
677 0 : if(confs_init(cconfs)) return -1;
678 :
679 0 : if(set_string(cconfs[OPT_CNAME], dirinfo->d_name))
680 : return -1;
681 :
682 0 : if(conf_load_clientconfdir(globalcs, cconfs))
683 : {
684 0 : logp("could not load config for client %s\n",
685 : dirinfo->d_name);
686 0 : return 0;
687 : }
688 :
689 0 : if(grouplist)
690 : {
691 0 : const char *dedup_group=
692 0 : get_string(cconfs[OPT_DEDUP_GROUP]);
693 0 : if(!dedup_group
694 0 : || !in_group(grouplist, dedup_group))
695 0 : continue;
696 : }
697 :
698 0 : if(!(client_lockdir=get_string(cconfs[OPT_CLIENT_LOCKDIR])))
699 0 : client_lockdir=get_string(cconfs[OPT_DIRECTORY]);
700 :
701 0 : if(!(lockfilebase=prepend_s(client_lockdir, dirinfo->d_name))
702 0 : || !(lockfile=prepend_s(lockfilebase, BEDUP_LOCKFILE_NAME)))
703 : {
704 0 : free_w(&lockfilebase);
705 0 : free_w(&lockfile);
706 0 : ret=-1;
707 0 : break;
708 : }
709 0 : free_w(&lockfilebase);
710 :
711 0 : if(!(lock=lock_alloc_and_init(lockfile)))
712 : {
713 : ret=-1;
714 : break;
715 : }
716 0 : lock_get(lock);
717 0 : free_w(&lockfile);
718 :
719 0 : if(lock->status!=GET_LOCK_GOT)
720 : {
721 0 : logp("Could not get %s\n", lock->path);
722 0 : continue;
723 : }
724 0 : logp("Got %s\n", lock->path);
725 :
726 : // Remember that we got that lock.
727 0 : lock_add_to_list(&locklist, lock);
728 :
729 0 : switch(process_dir(get_string(cconfs[OPT_DIRECTORY]),
730 : dirinfo->d_name,
731 : ext, maxlinks, 1 /* burp mode */, 0 /* level */))
732 : {
733 0 : case 0: ccount++;
734 0 : case 1: continue;
735 : default: ret=-1; break;
736 : }
737 : break;
738 : }
739 0 : closedir(dirp);
740 :
741 0 : locks_release_and_free(&locklist);
742 :
743 0 : confs_free(&cconfs);
744 :
745 0 : return ret;
746 : }
747 :
748 : static char *get_config_path(void)
749 : {
750 : static char path[256]="";
751 : snprintf(path, sizeof(path), "%s", SYSCONFDIR "/burp.conf");
752 : return path;
753 : }
754 :
755 2 : static int usage(void)
756 : {
757 2 : logfmt("\nUsage: %s [options]\n", prog);
758 2 : logfmt("\n");
759 2 : logfmt(" Options:\n");
760 2 : logfmt(" -c <path> Path to config file (default: %s).\n", get_config_path());
761 2 : logfmt(" -g <list of group names> Only run on the directories of clients that\n");
762 2 : logfmt(" are in one of the groups specified.\n");
763 2 : logfmt(" The list is comma-separated. To put a client in a\n");
764 2 : logfmt(" group, use the 'dedup_group' option in the client\n");
765 2 : logfmt(" configuration file on the server.\n");
766 2 : logfmt(" -h|-? Print this text and exit.\n");
767 2 : logfmt(" -d Delete any duplicate files found.\n");
768 2 : logfmt(" (non-burp mode only)\n");
769 2 : logfmt(" -l Hard link any duplicate files found.\n");
770 2 : logfmt(" -m <number> Maximum number of hard links to a single file.\n");
771 2 : logfmt(" (non-burp mode only - in burp mode, use the\n");
772 2 : logfmt(" max_hardlinks option in the configuration file)\n");
773 2 : logfmt(" The default is %d. On ext3, the maximum number\n", DEF_MAX_LINKS);
774 2 : logfmt(" of links possible is 32000, but space is needed\n");
775 2 : logfmt(" for the normal operation of burp.\n");
776 2 : logfmt(" -n <list of directories> Non-burp mode. Deduplicate any (set of) directories.\n");
777 2 : logfmt(" -v Print duplicate paths.\n");
778 2 : logfmt(" -V Print version and exit.\n");
779 2 : logfmt("\n");
780 2 : logfmt("By default, %s will read %s and deduplicate client storage\n", prog, get_config_path());
781 2 : logfmt("directories using special knowledge of the structure.\n");
782 2 : logfmt("\n");
783 2 : logfmt("With '-n', this knowledge is turned off and you have to specify the directories\n");
784 2 : logfmt("to deduplicate on the command line. Running with '-n' is therefore dangerous\n");
785 2 : logfmt("if you are deduplicating burp storage directories.\n\n");
786 2 : return 1;
787 : }
788 :
789 13 : int run_bedup(int argc, char *argv[])
790 : {
791 13 : int i=1;
792 13 : int ret=0;
793 13 : int option=0;
794 13 : int nonburp=0;
795 13 : unsigned int maxlinks=DEF_MAX_LINKS;
796 13 : char *groups=NULL;
797 13 : char ext[16]="";
798 13 : int givenconfigfile=0;
799 13 : const char *configfile=NULL;
800 :
801 13 : configfile=get_config_path();
802 13 : snprintf(ext, sizeof(ext), ".bedup.%d", getpid());
803 :
804 28 : while((option=getopt(argc, argv, "c:dg:hlm:nvV?"))!=-1)
805 : {
806 18 : switch(option)
807 : {
808 : case 'c':
809 1 : configfile=optarg;
810 1 : givenconfigfile=1;
811 1 : break;
812 : case 'd':
813 2 : deletedups=1;
814 2 : break;
815 : case 'g':
816 1 : groups=optarg;
817 1 : break;
818 : case 'l':
819 2 : makelinks=1;
820 2 : break;
821 : case 'm':
822 4 : maxlinks=atoi(optarg);
823 2 : break;
824 : case 'n':
825 6 : nonburp=1;
826 6 : break;
827 : case 'V':
828 1 : logfmt("%s-%s\n", prog, VERSION);
829 1 : return 0;
830 : case 'v':
831 1 : verbose=1;
832 1 : break;
833 : case 'h':
834 : case '?':
835 2 : return usage();
836 : }
837 : }
838 :
839 10 : if(nonburp && givenconfigfile)
840 : {
841 1 : logp("-n and -c options are mutually exclusive\n");
842 1 : return 1;
843 : }
844 9 : if(nonburp && groups)
845 : {
846 1 : logp("-n and -g options are mutually exclusive\n");
847 1 : return 1;
848 : }
849 8 : if(!nonburp && maxlinks!=DEF_MAX_LINKS)
850 : {
851 1 : logp("-m option is specified via the configuration file in burp mode (max_hardlinks=)\n");
852 1 : return 1;
853 : }
854 7 : if(deletedups && makelinks)
855 : {
856 1 : logp("-d and -l options are mutually exclusive\n");
857 1 : return 1;
858 : }
859 6 : if(deletedups && !nonburp)
860 : {
861 1 : logp("-d option requires -n option\n");
862 1 : return 1;
863 : }
864 :
865 5 : if(optind>=argc)
866 : {
867 1 : if(nonburp)
868 : {
869 1 : logp("No directories found after options\n");
870 1 : return 1;
871 : }
872 : }
873 : else
874 : {
875 4 : if(!nonburp)
876 : {
877 1 : logp("Do not specify extra arguments.\n");
878 1 : return 1;
879 : }
880 : }
881 :
882 3 : if(maxlinks<2)
883 : {
884 1 : logp("The argument to -m needs to be greater than 1.\n");
885 1 : return 1;
886 : }
887 :
888 2 : if(nonburp)
889 : {
890 : // Read directories from command line.
891 2 : for(i=optind; i<argc; i++)
892 : {
893 : // Strip trailing slashes, for tidiness.
894 2 : if(argv[i][strlen(argv[i])-1]=='/')
895 0 : argv[i][strlen(argv[i])-1]='\0';
896 2 : if(process_dir("", argv[i], ext, maxlinks,
897 : 0 /* not burp mode */, 0 /* level */))
898 : {
899 : ret=1;
900 : break;
901 : }
902 : }
903 : }
904 : else
905 : {
906 0 : struct conf **globalcs=NULL;
907 0 : struct strlist *grouplist=NULL;
908 0 : struct lock *globallock=NULL;
909 :
910 0 : if(groups)
911 : {
912 0 : char *tok=NULL;
913 0 : if((tok=strtok(groups, ",\n")))
914 : {
915 : do
916 : {
917 0 : if(strlist_add(&grouplist, tok, 1))
918 : {
919 0 : log_out_of_memory(__func__);
920 0 : return -1;
921 : }
922 0 : } while((tok=strtok(NULL, ",\n")));
923 : }
924 0 : if(!grouplist)
925 : {
926 0 : logp("unable to read list of groups\n");
927 0 : return -1;
928 : }
929 : }
930 :
931 : // Read directories from config files, and get locks.
932 0 : if(!(globalcs=confs_alloc())) return -1;
933 0 : if(confs_init(globalcs)) return -1;
934 0 : if(conf_load_global_only(configfile, globalcs)) return 1;
935 0 : if(get_e_burp_mode(globalcs[OPT_BURP_MODE])!=BURP_MODE_SERVER)
936 : {
937 0 : logp("%s is not a server config file\n", configfile);
938 0 : confs_free(&globalcs);
939 0 : return 1;
940 : }
941 0 : logp("Dedup clients from %s\n",
942 0 : get_string(globalcs[OPT_CLIENTCONFDIR]));
943 0 : maxlinks=get_int(globalcs[OPT_MAX_HARDLINKS]);
944 0 : if(grouplist)
945 : {
946 0 : struct strlist *g=NULL;
947 0 : logp("in dedup groups:\n");
948 0 : for(g=grouplist; g; g=g->next)
949 0 : logp("%s\n", g->path);
950 : }
951 : else
952 : {
953 0 : char *lockpath=NULL;
954 0 : const char *opt_lockfile=confs_get_lockfile(globalcs);
955 : // Only get the global lock when doing a global run.
956 : // If you are doing individual groups, you are likely
957 : // to want to do many different dedup jobs and a
958 : // global lock would get in the way.
959 0 : if(!(lockpath=prepend(opt_lockfile, ".bedup"))
960 0 : || !(globallock=lock_alloc_and_init(lockpath)))
961 0 : return 1;
962 0 : lock_get(globallock);
963 0 : if(globallock->status!=GET_LOCK_GOT)
964 : {
965 0 : logp("Could not get lock %s (%d)\n", lockpath,
966 : globallock->status);
967 0 : free_w(&lockpath);
968 0 : return 1;
969 : }
970 0 : logp("Got %s\n", lockpath);
971 : }
972 0 : ret=iterate_over_clients(globalcs, grouplist, ext, maxlinks);
973 0 : confs_free(&globalcs);
974 :
975 0 : lock_release(globallock);
976 0 : lock_free(&globallock);
977 0 : strlists_free(&grouplist);
978 : }
979 :
980 2 : if(!nonburp)
981 : {
982 0 : logp("%d client storages scanned\n", ccount);
983 : }
984 2 : logp("%" PRIu64 " duplicate %s found\n",
985 2 : count, count==1?"file":"files");
986 6 : logp("%" PRIu64 " bytes %s%s\n",
987 3 : savedbytes, (makelinks || deletedups)?"saved":"saveable",
988 : bytes_to_human(savedbytes));
989 2 : mystruct_delete_all();
990 2 : return ret;
991 : }
|