Line data Source code
1 : #include "../../burp.h"
2 : #include "../../alloc.h"
3 : #include "../../conf.h"
4 : #include "../../conffile.h"
5 : #include "../../handy.h"
6 : #include "../../fsops.h"
7 : #include "../../fzp.h"
8 : #include "../../lock.h"
9 : #include "../../log.h"
10 : #include "../../prepend.h"
11 : #include "../../strlist.h"
12 :
13 : #include <uthash.h>
14 :
15 : #define LOCKFILE_NAME "lockfile"
16 : #define BEDUP_LOCKFILE_NAME "lockfile.bedup"
17 :
18 : #define DEF_MAX_LINKS 10000
19 :
20 : static int makelinks=0;
21 : static int deletedups=0;
22 :
23 : static uint64_t savedbytes=0;
24 : static uint64_t count=0;
25 : static int ccount=0;
26 :
27 : static struct lock *locklist=NULL;
28 :
29 : static int verbose=0;
30 :
31 : typedef struct file file_t;
32 :
33 : struct file
34 : {
35 : char *path;
36 : dev_t dev;
37 : ino_t ino;
38 : nlink_t nlink;
39 : uint64_t full_cksum;
40 : uint64_t part_cksum;
41 : file_t *next;
42 : };
43 :
44 : struct mystruct
45 : {
46 : off_t st_size;
47 : file_t *files;
48 : UT_hash_handle hh;
49 : };
50 :
51 : struct mystruct *myfiles=NULL;
52 :
53 4 : struct mystruct *find_key(off_t st_size)
54 : {
55 : struct mystruct *s;
56 :
57 4 : HASH_FIND_INT(myfiles, &st_size, s);
58 4 : return s;
59 : }
60 :
61 2 : static int add_file(struct mystruct *s, struct file *f)
62 : {
63 : struct file *newfile;
64 2 : if(!(newfile=(struct file *)malloc_w(sizeof(struct file), __func__)))
65 : return -1;
66 : memcpy(newfile, f, sizeof(struct file));
67 2 : f->path=NULL;
68 2 : newfile->next=s->files;
69 2 : s->files=newfile;
70 : return 0;
71 : }
72 :
73 2 : static int add_key(off_t st_size, struct file *f)
74 : {
75 : struct mystruct *s;
76 :
77 2 : if(!(s=(struct mystruct *)malloc_w(sizeof(struct mystruct), __func__)))
78 : return -1;
79 2 : s->st_size=st_size;
80 2 : s->files=NULL;
81 2 : if(add_file(s, f)) return -1;
82 : //printf("HASH ADD %d\n", st_size);
83 6 : HASH_ADD_INT(myfiles, st_size, s);
84 : return 0;
85 : }
86 :
87 : static void file_free_content(struct file *file)
88 : {
89 2 : if(!file) return;
90 2 : free_w(&file->path);
91 : }
92 :
93 2 : static void file_free(struct file **file)
94 : {
95 4 : if(!file || !*file) return;
96 2 : file_free_content(*file);
97 2 : free_v((void **)file);
98 : }
99 :
100 2 : static void files_free(struct file **files)
101 : {
102 : struct file *f;
103 : struct file *fhead;
104 2 : if(!files || !*files) return;
105 : fhead=*files;
106 4 : while(fhead)
107 : {
108 2 : f=fhead;
109 2 : fhead=fhead->next;
110 2 : file_free(&f);
111 : }
112 : }
113 :
114 : static void mystruct_free_content(struct mystruct *mystruct)
115 : {
116 2 : if(!mystruct) return;
117 2 : files_free(&mystruct->files);
118 : }
119 :
120 2 : static void mystruct_free(struct mystruct **mystruct)
121 : {
122 4 : if(!mystruct || !*mystruct) return;
123 2 : mystruct_free_content(*mystruct);
124 2 : free_v((void **)mystruct);
125 : }
126 :
127 2 : static void mystruct_delete_all(void)
128 : {
129 : struct mystruct *tmp;
130 : struct mystruct *mystruct;
131 :
132 4 : HASH_ITER(hh, myfiles, mystruct, tmp)
133 : {
134 2 : HASH_DEL(myfiles, mystruct);
135 2 : mystruct_free(&mystruct);
136 : }
137 2 : myfiles=NULL;
138 2 : }
139 :
140 : #define FULL_CHUNK 4096
141 :
142 2 : static int full_match(struct file *o, struct file *n,
143 : struct fzp **ofp, struct fzp **nfp)
144 : {
145 : size_t ogot;
146 : size_t ngot;
147 2 : unsigned int i=0;
148 : static char obuf[FULL_CHUNK];
149 : static char nbuf[FULL_CHUNK];
150 :
151 2 : if(*ofp) fzp_seek(*ofp, 0, SEEK_SET);
152 0 : else if(!(*ofp=fzp_open(o->path, "rb")))
153 : {
154 : // Blank this entry so that it can be ignored from
155 : // now on.
156 0 : free_w(&o->path);
157 : return 0;
158 : }
159 :
160 2 : if(*nfp) fzp_seek(*nfp, 0, SEEK_SET);
161 0 : else if(!(*nfp=fzp_open(n->path, "rb"))) return 0;
162 :
163 : while(1)
164 : {
165 2 : ogot=fzp_read(*ofp, obuf, FULL_CHUNK);
166 2 : ngot=fzp_read(*nfp, nbuf, FULL_CHUNK);
167 2 : if(ogot!=ngot) return 0;
168 20 : for(i=0; i<ogot; i++)
169 20 : if(obuf[i]!=nbuf[i]) return 0;
170 2 : if(ogot<FULL_CHUNK) break;
171 : }
172 :
173 : return 1;
174 : }
175 :
176 : #define PART_CHUNK 1024
177 :
178 4 : static int get_part_cksum(struct file *f, struct fzp **fzp)
179 : {
180 : MD5_CTX md5;
181 4 : int got=0;
182 : static char buf[PART_CHUNK];
183 : unsigned char checksum[MD5_DIGEST_LENGTH+1];
184 :
185 4 : if(*fzp) fzp_seek(*fzp, 0, SEEK_SET);
186 4 : else if(!(*fzp=fzp_open(f->path, "rb")))
187 : {
188 0 : f->part_cksum=0;
189 0 : return 0;
190 : }
191 :
192 4 : if(!MD5_Init(&md5))
193 : {
194 0 : logp("MD5_Init() failed\n");
195 0 : return -1;
196 : }
197 :
198 4 : got=fzp_read(*fzp, buf, PART_CHUNK);
199 :
200 4 : if(!MD5_Update(&md5, buf, got))
201 : {
202 0 : logp("MD5_Update() failed\n");
203 0 : return -1;
204 : }
205 :
206 4 : if(!MD5_Final(checksum, &md5))
207 : {
208 0 : logp("MD5_Final() failed\n");
209 0 : return -1;
210 : }
211 :
212 4 : memcpy(&(f->part_cksum), checksum, sizeof(unsigned));
213 :
214 : // Try for a bit of efficiency - no need to calculate the full checksum
215 : // again if we already read the whole file.
216 4 : if(got<PART_CHUNK) f->full_cksum=f->part_cksum;
217 :
218 : return 0;
219 : }
220 :
221 0 : static int get_full_cksum(struct file *f, struct fzp **fzp)
222 : {
223 0 : size_t s=0;
224 : MD5_CTX md5;
225 : static char buf[FULL_CHUNK];
226 : unsigned char checksum[MD5_DIGEST_LENGTH+1];
227 :
228 0 : if(*fzp) fzp_seek(*fzp, 0, SEEK_SET);
229 0 : else if(!(*fzp=fzp_open(f->path, "rb")))
230 : {
231 0 : f->full_cksum=0;
232 0 : return 0;
233 : }
234 :
235 0 : if(!MD5_Init(&md5))
236 : {
237 0 : logp("MD5_Init() failed\n");
238 0 : return -1;
239 : }
240 :
241 0 : while((s=fzp_read(*fzp, buf, FULL_CHUNK))>0)
242 : {
243 0 : if(!MD5_Update(&md5, buf, s))
244 : {
245 0 : logp("MD5_Update() failed\n");
246 0 : return -1;
247 : }
248 0 : if(s<FULL_CHUNK) break;
249 : }
250 :
251 0 : if(!MD5_Final(checksum, &md5))
252 : {
253 0 : logp("MD5_Final() failed\n");
254 0 : return -1;
255 : }
256 :
257 0 : memcpy(&(f->full_cksum), checksum, sizeof(unsigned));
258 :
259 0 : return 0;
260 : }
261 :
262 : /* Make it atomic by linking to a temporary file, then moving it into place. */
263 1 : static int do_hardlink(struct file *o, struct file *n, const char *ext)
264 : {
265 1 : int ret=-1;
266 1 : char *tmppath=NULL;
267 1 : if(!(tmppath=prepend(o->path, ext)))
268 : {
269 0 : log_out_of_memory(__func__);
270 : goto end;
271 : }
272 1 : if(link(n->path, tmppath))
273 : {
274 : logp("Could not hardlink %s to %s: %s\n", tmppath, n->path,
275 0 : strerror(errno));
276 : goto end;
277 : }
278 1 : if((ret=do_rename(tmppath, o->path)))
279 : goto end;
280 1 : ret=0;
281 : end:
282 1 : free_w(&tmppath);
283 1 : return ret;
284 : }
285 :
286 : static void reset_old_file(struct file *oldfile, struct file *newfile,
287 : struct stat *info)
288 : {
289 : //printf("reset %s with %s %d\n", oldfile->path, newfile->path,
290 : // info->st_nlink);
291 0 : oldfile->nlink=info->st_nlink;
292 0 : free_w(&oldfile->path);
293 0 : oldfile->path=newfile->path;
294 0 : newfile->path=NULL;
295 : }
296 :
297 2 : static int check_files(struct mystruct *find, struct file *newfile,
298 0 : struct stat *info, const char *ext, unsigned int maxlinks)
299 : {
300 2 : int found=0;
301 2 : struct fzp *nfp=NULL;
302 2 : struct fzp *ofp=NULL;
303 2 : struct file *f=NULL;
304 :
305 2 : for(f=find->files; f; f=f->next)
306 : {
307 : //printf(" against: '%s'\n", f->path);
308 2 : if(!f->path)
309 : {
310 : // If the full_match() function fails to open oldfile
311 : // (which could happen if burp deleted some old
312 : // directories), it will free path and set it to NULL.
313 : // Skip entries like this.
314 : continue;
315 : }
316 2 : if(newfile->dev!=f->dev)
317 : {
318 : // Different device.
319 : continue;
320 : }
321 2 : if(newfile->ino==f->ino)
322 : {
323 : // Same device, same inode, therefore these two files
324 : // are hardlinked to each other already.
325 : found++;
326 : break;
327 : }
328 6 : if((!newfile->part_cksum && get_part_cksum(newfile, &nfp))
329 4 : || (!f->part_cksum && get_part_cksum(f, &ofp)))
330 : {
331 : // Some error with md5sums Give up.
332 : return -1;
333 : }
334 2 : if(newfile->part_cksum!=f->part_cksum)
335 : {
336 0 : fzp_close(&ofp);
337 0 : continue;
338 : }
339 : //printf(" %s, %s\n", find->files->path, newfile->path);
340 : //printf(" part cksum matched\n");
341 :
342 4 : if((!newfile->full_cksum && get_full_cksum(newfile, &nfp))
343 4 : || (!f->full_cksum && get_full_cksum(f, &ofp)))
344 : {
345 : // Some error with md5sums Give up.
346 : return -1;
347 : }
348 2 : if(newfile->full_cksum!=f->full_cksum)
349 : {
350 0 : fzp_close(&ofp);
351 0 : continue;
352 : }
353 :
354 : //printf(" full cksum matched\n");
355 2 : if(!full_match(newfile, f, &nfp, &ofp))
356 : {
357 0 : fzp_close(&ofp);
358 0 : continue;
359 : }
360 : //printf(" full match\n");
361 : //printf("%s, %s\n", find->files->path, newfile->path);
362 :
363 : // If there are already enough links to this file, replace
364 : // our memory of it with the new file so that files later on
365 : // can link to the new one.
366 2 : if(f->nlink>=maxlinks)
367 : {
368 : // Just need to reset the path name and the number
369 : // of links, and pretend that it was found otherwise
370 : // NULL newfile will get added to the memory.
371 0 : reset_old_file(f, newfile, info);
372 0 : found++;
373 0 : break;
374 : }
375 :
376 2 : found++;
377 2 : count++;
378 :
379 2 : if(verbose) printf("%s\n", newfile->path);
380 :
381 : // Now hardlink it.
382 2 : if(makelinks)
383 : {
384 1 : switch(do_hardlink(newfile, f, ext))
385 : {
386 : case 0:
387 1 : f->nlink++;
388 : // Only count bytes as saved if we
389 : // removed the last link.
390 1 : if(newfile->nlink==1)
391 1 : savedbytes+=info->st_size;
392 : break;
393 : case -1:
394 : // On error, replace the memory of the
395 : // old file with the one that we just
396 : // found. It might work better when
397 : // someone later tries to link to the
398 : // new one instead of the old one.
399 0 : reset_old_file(f, newfile, info);
400 0 : count--;
401 0 : break;
402 : default:
403 : // Abandon all hope.
404 : // This could happen if renaming the
405 : // hardlink failed in such a way that
406 : // the target file was unlinked without
407 : // being replaced - ie, if the max
408 : // number of hardlinks is being hit.
409 : return -1;
410 : }
411 : }
412 1 : else if(deletedups)
413 : {
414 0 : if(unlink(newfile->path))
415 : {
416 : logp("Could not delete %s: %s\n",
417 0 : newfile->path, strerror(errno));
418 : }
419 : else
420 : {
421 : // Only count bytes as saved if we removed the
422 : // last link.
423 0 : if(newfile->nlink==1)
424 0 : savedbytes+=info->st_size;
425 : }
426 : }
427 : else
428 : {
429 : // To be able to tell how many bytes
430 : // are saveable.
431 1 : savedbytes+=info->st_size;
432 : }
433 :
434 : break;
435 : }
436 2 : fzp_close(&nfp);
437 2 : fzp_close(&ofp);
438 :
439 2 : if(found)
440 : {
441 2 : free_w(&newfile->path);
442 2 : return 0;
443 : }
444 :
445 0 : if(add_file(find, newfile)) return -1;
446 :
447 0 : return 0;
448 : }
449 :
450 0 : static int looks_like_protocol1(const char *basedir)
451 : {
452 0 : int ret=-1;
453 0 : char *tmp=NULL;
454 : struct stat statp;
455 0 : if(!(tmp=prepend_s(basedir, "current")))
456 : {
457 0 : log_out_of_memory(__func__);
458 0 : goto end;
459 : }
460 : // If there is a 'current' symlink here, we think it looks like a
461 : // protocol 1 backup.
462 0 : if(!lstat(tmp, &statp) && S_ISLNK(statp.st_mode))
463 : {
464 : ret=1;
465 : goto end;
466 : }
467 0 : ret=0;
468 : end:
469 0 : free_w(&tmp);
470 0 : return ret;
471 : }
472 :
473 0 : static int get_link(const char *basedir, const char *lnk, char real[], size_t r)
474 : {
475 0 : int len=0;
476 0 : char *tmp=NULL;
477 0 : if(!(tmp=prepend_s(basedir, lnk)))
478 : {
479 0 : log_out_of_memory(__func__);
480 0 : return -1;
481 : }
482 0 : if((len=readlink(tmp, real, r-1))<0) len=0;
483 0 : real[len]='\0';
484 0 : free_w(&tmp);
485 : // Strip any trailing slash.
486 0 : if(real[strlen(real)-1]=='/') real[strlen(real)-1]='\0';
487 : return 0;
488 : }
489 :
490 0 : static int level_exclusion(int level, const char *fname,
491 : const char *working, const char *finishing)
492 : {
493 0 : if(level==0)
494 : {
495 : /* Be careful not to try to dedup the lockfiles.
496 : The lock actually gets lost if you open one to do a
497 : checksum
498 : and then close it. This caused me major headaches to
499 : figure out. */
500 0 : if(!strcmp(fname, LOCKFILE_NAME)
501 0 : || !strcmp(fname, BEDUP_LOCKFILE_NAME))
502 : return 1;
503 :
504 : /* Skip places where backups are going on. */
505 0 : if(!strcmp(fname, working)
506 0 : || !strcmp(fname, finishing))
507 : return 1;
508 :
509 0 : if(!strcmp(fname, "deleteme"))
510 : return 1;
511 : }
512 0 : else if(level==1)
513 : {
514 : // Do not dedup stuff that might be appended to later.
515 0 : if(!strncmp(fname, "log", strlen("log"))
516 0 : || !strncmp(fname, "verifylog", strlen("verifylog"))
517 0 : || !strncmp(fname, "restorelog", strlen("restorelog")))
518 : return 1;
519 : }
520 0 : return 0;
521 : }
522 :
523 : // Return 0 for directory processed, -1 for error, 1 for not processed.
524 2 : static int process_dir(const char *oldpath, const char *newpath,
525 : const char *ext, unsigned int maxlinks, int burp_mode, int level)
526 : {
527 2 : int ret=-1;
528 2 : DIR *dirp=NULL;
529 2 : char *path=NULL;
530 : struct stat info;
531 2 : struct dirent *dirinfo=NULL;
532 : struct file newfile;
533 2 : struct mystruct *find=NULL;
534 : static char working[256]="";
535 : static char finishing[256]="";
536 :
537 2 : newfile.path=NULL;
538 :
539 2 : if(!(path=prepend_s(oldpath, newpath))) goto end;
540 :
541 2 : if(burp_mode && level==0)
542 : {
543 0 : if(get_link(path, "working", working, sizeof(working))
544 0 : || get_link(path, "finishing", finishing, sizeof(finishing)))
545 : goto end;
546 0 : if(!looks_like_protocol1(path))
547 : {
548 0 : logp("%s does not look like a protocol 1 storage directory - skipping\n", path);
549 0 : ret=1;
550 0 : goto end;
551 : }
552 : }
553 :
554 2 : if(!(dirp=opendir(path)))
555 : {
556 0 : logp("Could not opendir '%s': %s\n", path, strerror(errno));
557 0 : ret=1;
558 0 : goto end;
559 : }
560 10 : while((dirinfo=readdir(dirp)))
561 : {
562 8 : if(!strcmp(dirinfo->d_name, ".")
563 6 : || !strcmp(dirinfo->d_name, ".."))
564 : continue;
565 :
566 : //printf("try %s\n", dirinfo->d_name);
567 :
568 4 : if(burp_mode
569 4 : && level_exclusion(level, dirinfo->d_name,
570 0 : working, finishing))
571 : continue;
572 :
573 4 : free_w(&newfile.path);
574 4 : if(!(newfile.path=prepend_s(path, dirinfo->d_name)))
575 : goto end;
576 :
577 8 : if(lstat(newfile.path, &info))
578 : continue;
579 :
580 4 : if(S_ISDIR(info.st_mode))
581 : {
582 0 : if(process_dir(path, dirinfo->d_name, ext, maxlinks, burp_mode, level+1))
583 : goto end;
584 : continue;
585 : }
586 4 : else if(!S_ISREG(info.st_mode)
587 4 : || !info.st_size) // ignore zero-length files
588 : continue;
589 :
590 4 : newfile.dev=info.st_dev;
591 4 : newfile.ino=info.st_ino;
592 4 : newfile.nlink=info.st_nlink;
593 4 : newfile.full_cksum=0;
594 4 : newfile.part_cksum=0;
595 4 : newfile.next=NULL;
596 :
597 4 : if((find=find_key(info.st_size)))
598 : {
599 : //printf("check %d: %s\n", info.st_size, newfile.path);
600 2 : if(check_files(find, &newfile, &info, ext, maxlinks))
601 : goto end;
602 : }
603 : else
604 : {
605 : //printf("add: %s\n", newfile.path);
606 2 : if(add_key(info.st_size, &newfile))
607 : goto end;
608 : }
609 : }
610 : ret=0;
611 : end:
612 2 : closedir(dirp);
613 2 : free_w(&newfile.path);
614 2 : free_w(&path);
615 2 : return ret;
616 : }
617 :
618 0 : static void sighandler(int signum)
619 : {
620 0 : locks_release_and_free(&locklist);
621 0 : exit(1);
622 : }
623 :
624 0 : static int is_regular_file(const char *clientconfdir, const char *file)
625 : {
626 : struct stat statp;
627 0 : char *fullpath=NULL;
628 0 : if(!(fullpath=prepend_s(clientconfdir, file)))
629 : return 0;
630 0 : if(lstat(fullpath, &statp))
631 : {
632 0 : free_w(&fullpath);
633 0 : return 0;
634 : }
635 0 : free_w(&fullpath);
636 0 : return S_ISREG(statp.st_mode);
637 : }
638 :
639 0 : static int in_group(struct strlist *grouplist, const char *dedup_group)
640 : {
641 : struct strlist *g;
642 :
643 0 : for(g=grouplist; g; g=g->next)
644 0 : if(!strcmp(g->path, dedup_group)) return 1;
645 :
646 : return 0;
647 : }
648 :
649 0 : static int iterate_over_clients(struct conf **globalcs,
650 : struct strlist *grouplist, const char *ext, unsigned int maxlinks)
651 : {
652 0 : int ret=0;
653 0 : DIR *dirp=NULL;
654 0 : struct conf **cconfs=NULL;
655 0 : struct dirent *dirinfo=NULL;
656 0 : const char *globalclientconfdir=get_string(globalcs[OPT_CLIENTCONFDIR]);
657 :
658 0 : signal(SIGABRT, &sighandler);
659 0 : signal(SIGTERM, &sighandler);
660 0 : signal(SIGINT, &sighandler);
661 :
662 0 : if(!(cconfs=confs_alloc())) return -1;
663 0 : if(confs_init(cconfs)) return -1;
664 :
665 0 : if(!(dirp=opendir(globalclientconfdir)))
666 : {
667 : logp("Could not opendir '%s': %s\n",
668 0 : globalclientconfdir, strerror(errno));
669 0 : return 0;
670 : }
671 0 : while((dirinfo=readdir(dirp)))
672 : {
673 0 : char *lockfile=NULL;
674 0 : char *lockfilebase=NULL;
675 0 : char *client_lockdir=NULL;
676 0 : struct lock *lock=NULL;
677 :
678 0 : if(dirinfo->d_ino==0
679 : // looks_like...() also avoids '.' and '..'.
680 0 : || looks_like_tmp_or_hidden_file(dirinfo->d_name)
681 0 : || !is_regular_file(globalclientconfdir, dirinfo->d_name))
682 0 : continue;
683 :
684 0 : confs_free_content(cconfs);
685 0 : if(confs_init(cconfs)) return -1;
686 :
687 0 : if(set_string(cconfs[OPT_CNAME], dirinfo->d_name))
688 : return -1;
689 :
690 0 : if(conf_load_clientconfdir(globalcs, cconfs))
691 : {
692 : logp("could not load config for client %s\n",
693 0 : dirinfo->d_name);
694 0 : return 0;
695 : }
696 :
697 0 : if(grouplist)
698 : {
699 : const char *dedup_group=
700 0 : get_string(cconfs[OPT_DEDUP_GROUP]);
701 0 : if(!dedup_group
702 0 : || !in_group(grouplist, dedup_group))
703 : continue;
704 : }
705 :
706 0 : if(!(client_lockdir=get_string(cconfs[OPT_CLIENT_LOCKDIR])))
707 0 : client_lockdir=get_string(cconfs[OPT_DIRECTORY]);
708 :
709 0 : if(!(lockfilebase=prepend_s(client_lockdir, dirinfo->d_name))
710 0 : || !(lockfile=prepend_s(lockfilebase, BEDUP_LOCKFILE_NAME)))
711 : {
712 0 : free_w(&lockfilebase);
713 0 : free_w(&lockfile);
714 0 : ret=-1;
715 0 : break;
716 : }
717 0 : free_w(&lockfilebase);
718 :
719 0 : if(!(lock=lock_alloc_and_init(lockfile)))
720 : {
721 : ret=-1;
722 : break;
723 : }
724 0 : lock_get(lock);
725 0 : free_w(&lockfile);
726 :
727 0 : if(lock->status!=GET_LOCK_GOT)
728 : {
729 0 : logp("Could not get %s\n", lock->path);
730 0 : continue;
731 : }
732 0 : logp("Got %s\n", lock->path);
733 :
734 : // Remember that we got that lock.
735 0 : lock_add_to_list(&locklist, lock);
736 :
737 0 : switch(process_dir(get_string(cconfs[OPT_DIRECTORY]),
738 : dirinfo->d_name,
739 : ext, maxlinks, 1 /* burp mode */, 0 /* level */))
740 : {
741 0 : case 0: ccount++;
742 : case 1: continue;
743 : default: ret=-1; break;
744 : }
745 : break;
746 : }
747 0 : closedir(dirp);
748 :
749 0 : locks_release_and_free(&locklist);
750 :
751 0 : confs_free(&cconfs);
752 :
753 0 : return ret;
754 : }
755 :
756 : static char *get_config_path(void)
757 : {
758 : static char path[256]="";
759 : snprintf(path, sizeof(path), "%s", SYSCONFDIR "/burp.conf");
760 : return path;
761 : }
762 :
763 2 : static int usage(void)
764 : {
765 2 : logf("\nUsage: %s [options]\n", prog);
766 2 : logf("\n");
767 2 : logf(" Options:\n");
768 2 : logf(" -c <path> Path to config file (default: %s).\n", get_config_path());
769 2 : logf(" -g <list of group names> Only run on the directories of clients that\n");
770 2 : logf(" are in one of the groups specified.\n");
771 2 : logf(" The list is comma-separated. To put a client in a\n");
772 2 : logf(" group, use the 'dedup_group' option in the client\n");
773 2 : logf(" configuration file on the server.\n");
774 2 : logf(" -h|-? Print this text and exit.\n");
775 2 : logf(" -d Delete any duplicate files found.\n");
776 2 : logf(" (non-burp mode only)\n");
777 2 : logf(" -l Hard link any duplicate files found.\n");
778 2 : logf(" -m <number> Maximum number of hard links to a single file.\n");
779 2 : logf(" (non-burp mode only - in burp mode, use the\n");
780 2 : logf(" max_hardlinks option in the configuration file)\n");
781 2 : logf(" The default is %d. On ext3, the maximum number\n", DEF_MAX_LINKS);
782 2 : logf(" of links possible is 32000, but space is needed\n");
783 2 : logf(" for the normal operation of burp.\n");
784 2 : logf(" -n <list of directories> Non-burp mode. Deduplicate any (set of) directories.\n");
785 2 : logf(" -v Print duplicate paths.\n");
786 2 : logf(" -V Print version and exit.\n");
787 2 : logf("\n");
788 2 : logf("By default, %s will read %s and deduplicate client storage\n", prog, get_config_path());
789 2 : logf("directories using special knowledge of the structure.\n");
790 2 : logf("\n");
791 2 : logf("With '-n', this knowledge is turned off and you have to specify the directories\n");
792 2 : logf("to deduplicate on the command line. Running with '-n' is therefore dangerous\n");
793 2 : logf("if you are deduplicating burp storage directories.\n\n");
794 2 : return 1;
795 : }
796 :
797 13 : int run_bedup(int argc, char *argv[])
798 : {
799 13 : int i=1;
800 13 : int ret=0;
801 13 : int option=0;
802 13 : int nonburp=0;
803 13 : unsigned int maxlinks=DEF_MAX_LINKS;
804 13 : char *groups=NULL;
805 13 : char ext[16]="";
806 13 : int givenconfigfile=0;
807 13 : const char *configfile=NULL;
808 :
809 13 : configfile=get_config_path();
810 13 : snprintf(ext, sizeof(ext), ".bedup.%d", getpid());
811 :
812 28 : while((option=getopt(argc, argv, "c:dg:hlm:nvV?"))!=-1)
813 : {
814 18 : switch(option)
815 : {
816 : case 'c':
817 1 : configfile=optarg;
818 1 : givenconfigfile=1;
819 1 : break;
820 : case 'd':
821 2 : deletedups=1;
822 2 : break;
823 : case 'g':
824 1 : groups=optarg;
825 1 : break;
826 : case 'l':
827 2 : makelinks=1;
828 2 : break;
829 : case 'm':
830 4 : maxlinks=atoi(optarg);
831 2 : break;
832 : case 'n':
833 6 : nonburp=1;
834 6 : break;
835 : case 'V':
836 1 : logf("%s-%s\n", prog, VERSION);
837 1 : return 0;
838 : case 'v':
839 1 : verbose=1;
840 1 : break;
841 : case 'h':
842 : case '?':
843 2 : return usage();
844 : }
845 : }
846 :
847 10 : if(nonburp && givenconfigfile)
848 : {
849 1 : logp("-n and -c options are mutually exclusive\n");
850 1 : return 1;
851 : }
852 9 : if(nonburp && groups)
853 : {
854 1 : logp("-n and -g options are mutually exclusive\n");
855 1 : return 1;
856 : }
857 8 : if(!nonburp && maxlinks!=DEF_MAX_LINKS)
858 : {
859 1 : logp("-m option is specified via the configuration file in burp mode (max_hardlinks=)\n");
860 1 : return 1;
861 : }
862 7 : if(deletedups && makelinks)
863 : {
864 1 : logp("-d and -l options are mutually exclusive\n");
865 1 : return 1;
866 : }
867 6 : if(deletedups && !nonburp)
868 : {
869 1 : logp("-d option requires -n option\n");
870 1 : return 1;
871 : }
872 :
873 5 : if(optind>=argc)
874 : {
875 1 : if(nonburp)
876 : {
877 1 : logp("No directories found after options\n");
878 1 : return 1;
879 : }
880 : }
881 : else
882 : {
883 4 : if(!nonburp)
884 : {
885 1 : logp("Do not specify extra arguments.\n");
886 1 : return 1;
887 : }
888 : }
889 :
890 3 : if(maxlinks<2)
891 : {
892 1 : logp("The argument to -m needs to be greater than 1.\n");
893 1 : return 1;
894 : }
895 :
896 2 : if(nonburp)
897 : {
898 : // Read directories from command line.
899 2 : for(i=optind; i<argc; i++)
900 : {
901 : // Strip trailing slashes, for tidiness.
902 2 : if(argv[i][strlen(argv[i])-1]=='/')
903 0 : argv[i][strlen(argv[i])-1]='\0';
904 2 : if(process_dir("", argv[i], ext, maxlinks,
905 2 : 0 /* not burp mode */, 0 /* level */))
906 : {
907 : ret=1;
908 : break;
909 : }
910 : }
911 : }
912 : else
913 : {
914 0 : struct conf **globalcs=NULL;
915 0 : struct strlist *grouplist=NULL;
916 0 : struct lock *globallock=NULL;
917 :
918 0 : if(groups)
919 : {
920 0 : char *tok=NULL;
921 0 : if((tok=strtok(groups, ",\n")))
922 : {
923 0 : do
924 : {
925 0 : if(strlist_add(&grouplist, tok, 1))
926 : {
927 0 : log_out_of_memory(__func__);
928 0 : return -1;
929 : }
930 : } while((tok=strtok(NULL, ",\n")));
931 : }
932 0 : if(!grouplist)
933 : {
934 0 : logp("unable to read list of groups\n");
935 0 : return -1;
936 : }
937 : }
938 :
939 : // Read directories from config files, and get locks.
940 0 : if(!(globalcs=confs_alloc())) return -1;
941 0 : if(confs_init(globalcs)) return -1;
942 0 : if(conf_load_global_only(configfile, globalcs)) return 1;
943 0 : if(get_e_burp_mode(globalcs[OPT_BURP_MODE])!=BURP_MODE_SERVER)
944 : {
945 0 : logp("%s is not a server config file\n", configfile);
946 0 : confs_free(&globalcs);
947 0 : return 1;
948 : }
949 : logp("Dedup clients from %s\n",
950 0 : get_string(globalcs[OPT_CLIENTCONFDIR]));
951 0 : maxlinks=get_int(globalcs[OPT_MAX_HARDLINKS]);
952 0 : if(grouplist)
953 : {
954 0 : struct strlist *g=NULL;
955 0 : logp("in dedup groups:\n");
956 0 : for(g=grouplist; g; g=g->next)
957 0 : logp("%s\n", g->path);
958 : }
959 : else
960 : {
961 0 : char *lockpath=NULL;
962 0 : const char *opt_lockfile=confs_get_lockfile(globalcs);
963 : // Only get the global lock when doing a global run.
964 : // If you are doing individual groups, you are likely
965 : // to want to do many different dedup jobs and a
966 : // global lock would get in the way.
967 0 : if(!(lockpath=prepend(opt_lockfile, ".bedup"))
968 0 : || !(globallock=lock_alloc_and_init(lockpath)))
969 0 : return 1;
970 0 : lock_get(globallock);
971 0 : if(globallock->status!=GET_LOCK_GOT)
972 : {
973 : logp("Could not get lock %s (%d)\n", lockpath,
974 0 : globallock->status);
975 0 : free_w(&lockpath);
976 0 : return 1;
977 : }
978 0 : logp("Got %s\n", lockpath);
979 : }
980 0 : ret=iterate_over_clients(globalcs, grouplist, ext, maxlinks);
981 0 : confs_free(&globalcs);
982 :
983 0 : lock_release(globallock);
984 0 : lock_free(&globallock);
985 0 : strlists_free(&grouplist);
986 : }
987 :
988 2 : if(!nonburp)
989 : {
990 0 : logp("%d client storages scanned\n", ccount);
991 : }
992 : logp("%llu duplicate %s found\n",
993 2 : count, count==1?"file":"files");
994 : logp("%llu bytes %s%s\n",
995 1 : savedbytes, (makelinks || deletedups)?"saved":"saveable",
996 4 : bytes_to_human(savedbytes));
997 2 : mystruct_delete_all();
998 2 : return ret;
999 : }
|