Line data Source code
1 : #include "../burp.h"
2 : #include "../alloc.h"
3 : #include "../conf.h"
4 : #include "../conffile.h"
5 : #include "../handy.h"
6 : #include "../fsops.h"
7 : #include "../fzp.h"
8 : #include "../lock.h"
9 : #include "../log.h"
10 : #include "../md5.h"
11 : #include "../prepend.h"
12 : #include "../strlist.h"
13 : #include "bedup.h"
14 :
15 : #include <uthash.h>
16 :
17 : #define LOCKFILE_NAME "lockfile"
18 : #define BEDUP_LOCKFILE_NAME "lockfile.bedup"
19 :
20 : #define DEF_MAX_LINKS 10000
21 :
22 : static int makelinks=0;
23 : static int deletedups=0;
24 :
25 : static uint64_t savedbytes=0;
26 : static uint64_t count=0;
27 : static int ccount=0;
28 :
29 : static struct lock *locklist=NULL;
30 :
31 : static int verbose=0;
32 :
33 : static unsigned int maxlinks=DEF_MAX_LINKS;
34 : static char ext[16]="";
35 :
36 : typedef struct file file_t;
37 :
38 : struct file
39 : {
40 : char *path;
41 : dev_t dev;
42 : ino_t ino;
43 : nlink_t nlink;
44 : uint64_t full_cksum;
45 : uint64_t part_cksum;
46 : file_t *next;
47 : };
48 :
49 : struct mystruct
50 : {
51 : off_t st_size;
52 : file_t *files;
53 : UT_hash_handle hh;
54 : };
55 :
56 : struct mystruct *myfiles=NULL;
57 :
58 9 : static struct mystruct *find_key(off_t st_size)
59 : {
60 : struct mystruct *s;
61 :
62 9 : HASH_FIND_INT(myfiles, &st_size, s);
63 9 : return s;
64 : }
65 :
66 3 : static int add_file(struct mystruct *s, struct file *f)
67 : {
68 : struct file *newfile;
69 3 : if(!(newfile=(struct file *)malloc_w(sizeof(struct file), __func__)))
70 : return -1;
71 3 : memcpy(newfile, f, sizeof(struct file));
72 3 : f->path=NULL;
73 3 : newfile->next=s->files;
74 3 : s->files=newfile;
75 : return 0;
76 : }
77 :
78 3 : static int add_key(off_t st_size, struct file *f)
79 : {
80 : struct mystruct *s;
81 :
82 3 : if(!(s=(struct mystruct *)malloc_w(sizeof(struct mystruct), __func__)))
83 : return -1;
84 3 : s->st_size=st_size;
85 3 : s->files=NULL;
86 3 : if(add_file(s, f)) return -1;
87 : //printf("HASH ADD %d\n", st_size);
88 3 : HASH_ADD_INT(myfiles, st_size, s);
89 : return 0;
90 : }
91 :
92 : static void file_free_content(struct file *file)
93 : {
94 : if(!file) return;
95 3 : free_w(&file->path);
96 : }
97 :
98 3 : static void file_free(struct file **file)
99 : {
100 3 : if(!file || !*file) return;
101 6 : file_free_content(*file);
102 3 : free_v((void **)file);
103 : }
104 :
105 3 : static void files_free(struct file **files)
106 : {
107 : struct file *f;
108 : struct file *fhead;
109 3 : if(!files || !*files) return;
110 : fhead=*files;
111 6 : while(fhead)
112 : {
113 3 : f=fhead;
114 3 : fhead=fhead->next;
115 3 : file_free(&f);
116 : }
117 : }
118 :
119 : static void mystruct_free_content(struct mystruct *mystruct)
120 : {
121 : if(!mystruct) return;
122 3 : files_free(&mystruct->files);
123 : }
124 :
125 3 : static void mystruct_free(struct mystruct **mystruct)
126 : {
127 3 : if(!mystruct || !*mystruct) return;
128 6 : mystruct_free_content(*mystruct);
129 3 : free_v((void **)mystruct);
130 : }
131 :
132 3 : static void mystruct_delete_all(void)
133 : {
134 : struct mystruct *tmp;
135 : struct mystruct *mystruct;
136 :
137 6 : HASH_ITER(hh, myfiles, mystruct, tmp)
138 : {
139 3 : HASH_DEL(myfiles, mystruct);
140 3 : mystruct_free(&mystruct);
141 : }
142 3 : myfiles=NULL;
143 3 : }
144 :
145 : #define FULL_CHUNK 4096
146 :
147 6 : static int full_match(struct file *o, struct file *n,
148 : struct fzp **ofp, struct fzp **nfp)
149 : {
150 : size_t ogot;
151 : size_t ngot;
152 6 : unsigned int i=0;
153 : static char obuf[FULL_CHUNK];
154 : static char nbuf[FULL_CHUNK];
155 :
156 6 : if(*ofp) fzp_seek(*ofp, 0, SEEK_SET);
157 0 : else if(!(*ofp=fzp_open(o->path, "rb")))
158 : {
159 : // Blank this entry so that it can be ignored from
160 : // now on.
161 0 : free_w(&o->path);
162 : return 0;
163 : }
164 :
165 6 : if(*nfp) fzp_seek(*nfp, 0, SEEK_SET);
166 3 : else if(!(*nfp=fzp_open(n->path, "rb"))) return 0;
167 :
168 : while(1)
169 : {
170 6 : ogot=fzp_read(*ofp, obuf, FULL_CHUNK);
171 6 : ngot=fzp_read(*nfp, nbuf, FULL_CHUNK);
172 6 : if(ogot!=ngot) return 0;
173 60 : for(i=0; i<ogot; i++)
174 60 : if(obuf[i]!=nbuf[i]) return 0;
175 6 : if(ogot<FULL_CHUNK) break;
176 : }
177 :
178 : return 1;
179 : }
180 :
181 : #define PART_CHUNK 1024
182 :
183 9 : static int get_part_cksum(struct file *f, struct fzp **fzp)
184 : {
185 9 : struct md5 *md5=NULL;
186 9 : int ret=-1;
187 9 : int got=0;
188 : static char buf[PART_CHUNK];
189 : unsigned char checksum[MD5_DIGEST_LENGTH+1];
190 :
191 9 : if(*fzp) fzp_seek(*fzp, 0, SEEK_SET);
192 9 : else if(!(*fzp=fzp_open(f->path, "rb")))
193 : {
194 0 : f->part_cksum=0;
195 0 : return 0;
196 : }
197 :
198 9 : if(!(md5=md5_alloc(__func__)))
199 : goto end;
200 9 : if(!md5_init(md5))
201 : {
202 0 : logp("md5_init() failed\n");
203 0 : goto end;
204 : }
205 :
206 9 : got=fzp_read(*fzp, buf, PART_CHUNK);
207 :
208 9 : if(!md5_update(md5, buf, got))
209 : {
210 0 : logp("md5_update() failed\n");
211 0 : goto end;
212 : }
213 :
214 9 : if(!md5_final(md5, checksum))
215 : {
216 0 : logp("md5_final() failed\n");
217 0 : goto end;
218 : }
219 :
220 9 : memcpy(&(f->part_cksum), checksum, sizeof(unsigned));
221 :
222 : // Try for a bit of efficiency - no need to calculate the full checksum
223 : // again if we already read the whole file.
224 9 : if(got<PART_CHUNK) f->full_cksum=f->part_cksum;
225 :
226 : ret=0;
227 : end:
228 9 : md5_free(&md5);
229 9 : return ret;
230 : }
231 :
232 0 : static int get_full_cksum(struct file *f, struct fzp **fzp)
233 : {
234 0 : size_t s=0;
235 0 : int ret=-1;
236 0 : struct md5 *md5=NULL;
237 : static char buf[FULL_CHUNK];
238 : unsigned char checksum[MD5_DIGEST_LENGTH+1];
239 :
240 0 : if(*fzp) fzp_seek(*fzp, 0, SEEK_SET);
241 0 : else if(!(*fzp=fzp_open(f->path, "rb")))
242 : {
243 0 : f->full_cksum=0;
244 0 : return 0;
245 : }
246 :
247 0 : if(!(md5=md5_alloc(__func__)))
248 : goto end;
249 0 : if(!md5_init(md5))
250 : {
251 0 : logp("md5_init() failed\n");
252 0 : goto end;
253 : }
254 :
255 0 : while((s=fzp_read(*fzp, buf, FULL_CHUNK))>0)
256 : {
257 0 : if(!md5_update(md5, buf, s))
258 : {
259 0 : logp("md5_update() failed\n");
260 0 : goto end;
261 : }
262 0 : if(s<FULL_CHUNK) break;
263 : }
264 :
265 0 : if(!md5_final(md5, checksum))
266 : {
267 0 : logp("md5_final() failed\n");
268 0 : goto end;
269 : }
270 :
271 0 : memcpy(&(f->full_cksum), checksum, sizeof(unsigned));
272 :
273 0 : ret=0;
274 : end:
275 0 : md5_free(&md5);
276 0 : return ret;
277 : }
278 :
279 : /* Make it atomic by linking to a temporary file, then moving it into place. */
280 3 : static int do_hardlink(struct file *o, struct file *n)
281 : {
282 3 : int ret=-1;
283 3 : char *tmppath=NULL;
284 3 : if(!(tmppath=prepend(o->path, ext)))
285 : {
286 0 : log_out_of_memory(__func__);
287 : goto end;
288 : }
289 3 : if(link(n->path, tmppath))
290 : {
291 0 : logp("Could not hardlink %s to %s: %s\n", tmppath, n->path,
292 0 : strerror(errno));
293 : goto end;
294 : }
295 3 : if((ret=do_rename(tmppath, o->path)))
296 : {
297 : // 'man 2 rename', says it should be safe to unlink tmppath:
298 : // "If newpath exists but the operation fails for some reason,
299 : // rename() guarantees to leave an instance of newpath in
300 : // place."
301 0 : if(unlink(tmppath))
302 0 : logp("Could not unlink %s\n", tmppath);
303 : goto end;
304 : }
305 : ret=0;
306 : end:
307 3 : free_w(&tmppath);
308 3 : return ret;
309 : }
310 :
311 2 : static void reset_old_file(struct file *oldfile, struct file *newfile,
312 : struct stat *info)
313 : {
314 : //printf("reset %s with %s %d\n", oldfile->path, newfile->path,
315 : // info->st_nlink);
316 : struct file *next;
317 :
318 2 : next=oldfile->next;
319 2 : free_w(&oldfile->path);
320 2 : memcpy(oldfile, newfile, sizeof(struct file));
321 2 : oldfile->next=next;
322 2 : newfile->path=NULL;
323 2 : }
324 :
325 6 : static int check_files(struct mystruct *find, struct file *newfile,
326 : struct stat *info)
327 : {
328 6 : int found=0;
329 6 : struct fzp *nfp=NULL;
330 6 : struct fzp *ofp=NULL;
331 6 : struct file *f=NULL;
332 :
333 6 : for(f=find->files; f; f=f->next)
334 : {
335 : //printf(" against: '%s'\n", f->path);
336 6 : if(!f->path)
337 : {
338 : // If the full_match() function fails to open oldfile
339 : // (which could happen if burp deleted some old
340 : // directories), it will free path and set it to NULL.
341 : // Skip entries like this.
342 0 : continue;
343 : }
344 6 : if(newfile->dev!=f->dev)
345 : {
346 : // Different device.
347 0 : continue;
348 : }
349 6 : if(newfile->ino==f->ino)
350 : {
351 : // Same device, same inode, therefore these two files
352 : // are hardlinked to each other already.
353 : found++;
354 : break;
355 : }
356 6 : if(newfile->nlink>=maxlinks) {
357 : // This new file file has enough links. Just leave it
358 : // as it is to avoid undoing all these hardlinks.
359 : found++;
360 : break;
361 : }
362 6 : if((!newfile->part_cksum && get_part_cksum(newfile, &nfp))
363 6 : || (!f->part_cksum && get_part_cksum(f, &ofp)))
364 : {
365 : // Some error with md5sums Give up.
366 : return -1;
367 : }
368 6 : if(newfile->part_cksum!=f->part_cksum)
369 : {
370 0 : fzp_close(&ofp);
371 0 : continue;
372 : }
373 : //printf(" %s, %s\n", find->files->path, newfile->path);
374 : //printf(" part cksum matched\n");
375 :
376 6 : if((!newfile->full_cksum && get_full_cksum(newfile, &nfp))
377 6 : || (!f->full_cksum && get_full_cksum(f, &ofp)))
378 : {
379 : // Some error with md5sums Give up.
380 : return -1;
381 : }
382 6 : if(newfile->full_cksum!=f->full_cksum)
383 : {
384 0 : fzp_close(&ofp);
385 0 : continue;
386 : }
387 :
388 : //printf(" full cksum matched\n");
389 6 : if(!full_match(newfile, f, &nfp, &ofp))
390 : {
391 0 : fzp_close(&ofp);
392 0 : continue;
393 : }
394 : //printf(" full match\n");
395 : //printf("%s, %s\n", find->files->path, newfile->path);
396 :
397 : // If there are already enough links to this file, replace
398 : // our memory of it with the new file so that files later on
399 : // can link to the new one.
400 6 : if(f->nlink>=maxlinks)
401 : {
402 : // Just need to reset the path name and the number
403 : // of links, and pretend that it was found otherwise
404 : // NULL newfile will get added to the memory.
405 2 : reset_old_file(f, newfile, info);
406 2 : found++;
407 : break;
408 : }
409 :
410 4 : found++;
411 4 : count++;
412 :
413 4 : if(verbose) printf("%s\n", newfile->path);
414 :
415 : // Now hardlink it.
416 4 : if(makelinks)
417 : {
418 3 : if(do_hardlink(newfile, f))
419 : {
420 0 : count--;
421 : return -1;
422 : }
423 3 : f->nlink++;
424 : // Only count bytes as saved if we
425 : // removed the last link.
426 3 : if(newfile->nlink==1)
427 3 : savedbytes+=info->st_size;
428 : }
429 1 : else if(deletedups)
430 : {
431 0 : if(unlink(newfile->path))
432 : {
433 0 : logp("Could not delete %s: %s\n",
434 0 : newfile->path, strerror(errno));
435 : }
436 : else
437 : {
438 : // Only count bytes as saved if we removed the
439 : // last link.
440 0 : if(newfile->nlink==1)
441 0 : savedbytes+=info->st_size;
442 : }
443 : }
444 : else
445 : {
446 : // To be able to tell how many bytes
447 : // are saveable.
448 1 : savedbytes+=info->st_size;
449 : }
450 :
451 : break;
452 : }
453 6 : fzp_close(&nfp);
454 6 : fzp_close(&ofp);
455 :
456 6 : if(found)
457 : {
458 6 : free_w(&newfile->path);
459 : return 0;
460 : }
461 :
462 0 : if(add_file(find, newfile)) return -1;
463 :
464 : return 0;
465 : }
466 :
467 0 : static int looks_like_ours(const char *basedir)
468 : {
469 0 : int ret=-1;
470 0 : char *tmp=NULL;
471 0 : if(!(tmp=prepend_s(basedir, "current")))
472 : {
473 0 : log_out_of_memory(__func__);
474 0 : goto end;
475 : }
476 : // If there is a 'current' symlink here, we think it looks like a
477 : // one of our storage directories.
478 0 : if(is_lnk_lstat(tmp)>0)
479 : {
480 : ret=1;
481 : goto end;
482 : }
483 0 : ret=0;
484 : end:
485 0 : free_w(&tmp);
486 0 : return ret;
487 : }
488 :
489 0 : static int get_link(const char *basedir, const char *lnk, char real[], size_t r)
490 : {
491 0 : readlink_w_in_dir(basedir, lnk, real, r);
492 : // Strip any trailing slash.
493 0 : if(real[strlen(real)-1]=='/')
494 0 : real[strlen(real)-1]='\0';
495 0 : return 0;
496 : }
497 :
498 0 : static int level_exclusion(int level, const char *fname,
499 : const char *working, const char *finishing)
500 : {
501 0 : if(level==0)
502 : {
503 : /* Be careful not to try to dedup the lockfiles.
504 : The lock actually gets lost if you open one to do a
505 : checksum
506 : and then close it. This caused me major headaches to
507 : figure out. */
508 0 : if(!strcmp(fname, LOCKFILE_NAME)
509 0 : || !strcmp(fname, BEDUP_LOCKFILE_NAME))
510 : return 1;
511 :
512 : /* Skip places where backups are going on. */
513 0 : if(!strcmp(fname, working)
514 0 : || !strcmp(fname, finishing))
515 : return 1;
516 :
517 0 : if(!strcmp(fname, "deleteme"))
518 : return 1;
519 : }
520 0 : else if(level==1)
521 : {
522 : // Do not dedup stuff that might be appended to later.
523 0 : if(!strncmp(fname, "log", strlen("log"))
524 0 : || !strncmp(fname, "verifylog", strlen("verifylog"))
525 0 : || !strncmp(fname, "restorelog", strlen("restorelog")))
526 : return 1;
527 : }
528 0 : return 0;
529 : }
530 :
531 : // Return 0 for directory processed, -1 for error, 1 for not processed.
532 3 : static int process_dir(const char *oldpath, const char *newpath,
533 : int burp_mode, int level)
534 : {
535 3 : int ret=-1;
536 3 : DIR *dirp=NULL;
537 3 : char *path=NULL;
538 : struct stat info;
539 3 : struct dirent *dirinfo=NULL;
540 : struct file newfile;
541 3 : struct mystruct *find=NULL;
542 : static char working[256]="";
543 : static char finishing[256]="";
544 :
545 3 : newfile.path=NULL;
546 :
547 3 : if(!(path=prepend_s(oldpath, newpath))) goto end;
548 :
549 3 : if(burp_mode && level==0)
550 : {
551 0 : if(get_link(path, "working", working, sizeof(working))
552 0 : || get_link(path, "finishing", finishing, sizeof(finishing)))
553 : goto end;
554 0 : if(!looks_like_ours(path))
555 : {
556 0 : logp("%s does not look like one of our storage directories - skipping\n", path);
557 0 : ret=1;
558 0 : goto end;
559 : }
560 : }
561 :
562 3 : if(!(dirp=opendir(path)))
563 : {
564 0 : logp("Could not opendir '%s': %s\n", path, strerror(errno));
565 0 : ret=1;
566 0 : goto end;
567 : }
568 18 : while((dirinfo=readdir(dirp)))
569 : {
570 15 : if(!strcmp(dirinfo->d_name, ".")
571 12 : || !strcmp(dirinfo->d_name, ".."))
572 6 : continue;
573 :
574 : //printf("try %s\n", dirinfo->d_name);
575 :
576 9 : if(burp_mode
577 0 : && level_exclusion(level, dirinfo->d_name,
578 : working, finishing))
579 0 : continue;
580 :
581 9 : free_w(&newfile.path);
582 9 : if(!(newfile.path=prepend_s(path, dirinfo->d_name)))
583 : goto end;
584 :
585 18 : if(lstat(newfile.path, &info))
586 0 : continue;
587 :
588 9 : if(S_ISDIR(info.st_mode))
589 : {
590 0 : if(process_dir(path, dirinfo->d_name,
591 : burp_mode, level+1))
592 : goto end;
593 0 : continue;
594 : }
595 9 : else if(!S_ISREG(info.st_mode)
596 9 : || !info.st_size) // ignore zero-length files
597 0 : continue;
598 :
599 9 : newfile.dev=info.st_dev;
600 9 : newfile.ino=info.st_ino;
601 9 : newfile.nlink=info.st_nlink;
602 9 : newfile.full_cksum=0;
603 9 : newfile.part_cksum=0;
604 9 : newfile.next=NULL;
605 :
606 9 : if((find=find_key(info.st_size)))
607 : {
608 : //printf("check %d: %s\n", info.st_size, newfile.path);
609 6 : if(check_files(find, &newfile, &info))
610 : goto end;
611 : }
612 : else
613 : {
614 : //printf("add: %s\n", newfile.path);
615 3 : if(add_key(info.st_size, &newfile))
616 : goto end;
617 : }
618 : }
619 : ret=0;
620 : end:
621 3 : if(dirp) closedir(dirp);
622 3 : free_w(&newfile.path);
623 3 : free_w(&path);
624 3 : return ret;
625 : }
626 :
627 0 : static void sighandler(__attribute__ ((unused)) int signum)
628 : {
629 0 : locks_release_and_free(&locklist);
630 0 : exit(1);
631 : }
632 :
633 0 : static int is_regular_file(const char *clientconfdir, const char *file)
634 : {
635 : struct stat statp;
636 0 : char *fullpath=NULL;
637 0 : if(!(fullpath=prepend_s(clientconfdir, file)))
638 : return 0;
639 0 : if(lstat(fullpath, &statp))
640 : {
641 0 : free_w(&fullpath);
642 0 : return 0;
643 : }
644 0 : free_w(&fullpath);
645 0 : return S_ISREG(statp.st_mode);
646 : }
647 :
648 : static int in_group(struct strlist *grouplist, const char *dedup_group)
649 : {
650 : struct strlist *g;
651 :
652 0 : for(g=grouplist; g; g=g->next)
653 0 : if(!strcmp(g->path, dedup_group)) return 1;
654 :
655 : return 0;
656 : }
657 :
658 0 : static int iterate_over_clients(struct conf **globalcs,
659 : struct strlist *grouplist)
660 : {
661 0 : int ret=0;
662 0 : DIR *dirp=NULL;
663 0 : struct conf **cconfs=NULL;
664 0 : struct dirent *dirinfo=NULL;
665 0 : const char *globalclientconfdir=get_string(globalcs[OPT_CLIENTCONFDIR]);
666 :
667 0 : if(!(cconfs=confs_alloc())) return -1;
668 0 : if(confs_init(cconfs)) return -1;
669 :
670 0 : if(!(dirp=opendir(globalclientconfdir)))
671 : {
672 0 : logp("Could not opendir '%s': %s\n",
673 0 : globalclientconfdir, strerror(errno));
674 0 : return 0;
675 : }
676 0 : while((dirinfo=readdir(dirp)))
677 : {
678 0 : char *lockfile=NULL;
679 0 : char *lockfilebase=NULL;
680 0 : char *client_lockdir=NULL;
681 0 : struct lock *lock=NULL;
682 :
683 0 : if(dirinfo->d_ino==0
684 0 : || !cname_valid(dirinfo->d_name)
685 0 : || !is_regular_file(globalclientconfdir, dirinfo->d_name))
686 0 : continue;
687 :
688 0 : confs_free_content(cconfs);
689 0 : if(confs_init(cconfs)) return -1;
690 :
691 0 : if(set_string(cconfs[OPT_CNAME], dirinfo->d_name))
692 : return -1;
693 :
694 0 : if(conf_load_clientconfdir(globalcs, cconfs))
695 : {
696 0 : logp("could not load config for client %s\n",
697 : dirinfo->d_name);
698 0 : return 0;
699 : }
700 :
701 0 : if(grouplist)
702 : {
703 0 : const char *dedup_group=
704 0 : get_string(cconfs[OPT_DEDUP_GROUP]);
705 0 : if(!dedup_group
706 0 : || !in_group(grouplist, dedup_group))
707 0 : continue;
708 : }
709 :
710 0 : if(!(client_lockdir=get_string(cconfs[OPT_CLIENT_LOCKDIR])))
711 0 : client_lockdir=get_string(cconfs[OPT_DIRECTORY]);
712 :
713 0 : if(!(lockfilebase=prepend_s(client_lockdir, dirinfo->d_name))
714 0 : || !(lockfile=prepend_s(lockfilebase, BEDUP_LOCKFILE_NAME)))
715 : {
716 0 : free_w(&lockfilebase);
717 0 : free_w(&lockfile);
718 0 : ret=-1;
719 0 : break;
720 : }
721 0 : free_w(&lockfilebase);
722 :
723 0 : if(!(lock=lock_alloc_and_init(lockfile)))
724 : {
725 : ret=-1;
726 : break;
727 : }
728 0 : lock_get(lock);
729 0 : free_w(&lockfile);
730 :
731 0 : if(lock->status!=GET_LOCK_GOT)
732 : {
733 0 : logp("Could not get %s\n", lock->path);
734 0 : continue;
735 : }
736 0 : logp("Got %s\n", lock->path);
737 :
738 : // Remember that we got that lock.
739 0 : lock_add_to_list(&locklist, lock);
740 :
741 0 : switch(process_dir(get_string(cconfs[OPT_DIRECTORY]),
742 : dirinfo->d_name,
743 : 1 /* burp mode */, 0 /* level */))
744 : {
745 0 : case 0: ccount++;
746 0 : case 1: continue;
747 : default: ret=-1; break;
748 : }
749 : break;
750 : }
751 0 : closedir(dirp);
752 :
753 0 : locks_release_and_free(&locklist);
754 :
755 0 : confs_free(&cconfs);
756 :
757 0 : return ret;
758 : }
759 :
760 0 : static int process_from_conf(const char *configfile, char **groups)
761 : {
762 0 : int ret=-1;
763 0 : struct conf **globalcs=NULL;
764 0 : struct strlist *grouplist=NULL;
765 0 : struct lock *globallock=NULL;
766 :
767 0 : signal(SIGABRT, &sighandler);
768 0 : signal(SIGTERM, &sighandler);
769 0 : signal(SIGINT, &sighandler);
770 :
771 0 : if(*groups)
772 : {
773 0 : char *tok=NULL;
774 0 : if((tok=strtok(*groups, ",\n")))
775 : {
776 : do
777 : {
778 0 : if(strlist_add(&grouplist, tok, 1))
779 : {
780 0 : log_out_of_memory(__func__);
781 0 : goto end;
782 : }
783 0 : } while((tok=strtok(NULL, ",\n")));
784 : }
785 0 : if(!grouplist)
786 : {
787 0 : logp("unable to read list of groups\n");
788 0 : goto end;
789 : }
790 : }
791 :
792 : // Read directories from config files, and get locks.
793 0 : if(!(globalcs=confs_alloc())
794 0 : || confs_init(globalcs)
795 0 : || conf_load_global_only(configfile, globalcs))
796 : goto end;
797 :
798 0 : if(get_e_burp_mode(globalcs[OPT_BURP_MODE])!=BURP_MODE_SERVER)
799 : {
800 0 : logp("%s is not a server config file\n", configfile);
801 0 : goto end;
802 : }
803 0 : logp("Dedup clients from %s\n",
804 0 : get_string(globalcs[OPT_CLIENTCONFDIR]));
805 0 : maxlinks=get_int(globalcs[OPT_MAX_HARDLINKS]);
806 0 : if(grouplist)
807 : {
808 0 : struct strlist *g=NULL;
809 0 : logp("in dedup groups:\n");
810 0 : for(g=grouplist; g; g=g->next)
811 0 : logp("%s\n", g->path);
812 : }
813 : else
814 : {
815 0 : char *lockpath=NULL;
816 0 : const char *opt_lockfile=confs_get_lockfile(globalcs);
817 : // Only get the global lock when doing a global run.
818 : // If you are doing individual groups, you are likely
819 : // to want to do many different dedup jobs and a
820 : // global lock would get in the way.
821 0 : if(!(lockpath=prepend(opt_lockfile, ".bedup"))
822 0 : || !(globallock=lock_alloc_and_init(lockpath)))
823 : goto end;
824 0 : lock_get(globallock);
825 0 : if(globallock->status!=GET_LOCK_GOT)
826 : {
827 0 : logp("Could not get lock %s (%d)\n", lockpath,
828 : globallock->status);
829 0 : free_w(&lockpath);
830 0 : goto end;
831 : }
832 0 : logp("Got %s\n", lockpath);
833 : }
834 0 : ret=iterate_over_clients(globalcs, grouplist);
835 : end:
836 0 : confs_free(&globalcs);
837 0 : lock_release(globallock);
838 0 : lock_free(&globallock);
839 0 : strlists_free(&grouplist);
840 0 : return ret;
841 : }
842 :
843 3 : static int process_from_command_line(int argc, char *argv[])
844 : {
845 : int i;
846 6 : for(i=optind; i<argc; i++)
847 : {
848 : // Strip trailing slashes, for tidiness.
849 3 : if(argv[i][strlen(argv[i])-1]=='/')
850 0 : argv[i][strlen(argv[i])-1]='\0';
851 3 : if(process_dir("", argv[i],
852 : 0 /* not burp mode */, 0 /* level */))
853 : return 1;
854 : }
855 : return 0;
856 : }
857 :
858 2 : static int usage(void)
859 : {
860 2 : logfmt("\nUsage: %s [options]\n", prog);
861 2 : logfmt("\n");
862 2 : logfmt(" Options:\n");
863 2 : logfmt(" -c <path> Path to config file (default: %s).\n", config_default_path());
864 2 : logfmt(" -g <list of group names> Only run on the directories of clients that\n");
865 2 : logfmt(" are in one of the groups specified.\n");
866 2 : logfmt(" The list is comma-separated. To put a client in a\n");
867 2 : logfmt(" group, use the 'dedup_group' option in the client\n");
868 2 : logfmt(" configuration file on the server.\n");
869 2 : logfmt(" -h|-? Print this text and exit.\n");
870 2 : logfmt(" -d Delete any duplicate files found.\n");
871 2 : logfmt(" (non-%s mode only)\n", PACKAGE_TARNAME);
872 2 : logfmt(" -l Hard link any duplicate files found.\n");
873 2 : logfmt(" -m <number> Maximum number of hard links to a single file.\n");
874 2 : logfmt(" (non-%s mode only - in burp mode, use the\n", PACKAGE_TARNAME);
875 2 : logfmt(" max_hardlinks option in the configuration file)\n");
876 2 : logfmt(" The default is %d. On ext3, the maximum number\n", DEF_MAX_LINKS);
877 2 : logfmt(" of links possible is 32000, but space is needed\n");
878 2 : logfmt(" for the normal operation of %s.\n", PACKAGE_TARNAME);
879 2 : logfmt(" -n <list of directories> Non-%s mode. Deduplicate any (set of) directories.\n", PACKAGE_TARNAME);
880 2 : logfmt(" -v Print duplicate paths.\n");
881 2 : logfmt(" -V Print version and exit.\n");
882 2 : logfmt("\n");
883 2 : logfmt("By default, %s will read %s and deduplicate client storage\n", prog, config_default_path());
884 2 : logfmt("directories using special knowledge of the structure.\n");
885 2 : logfmt("\n");
886 2 : logfmt("With '-n', this knowledge is turned off and you have to specify the directories\n");
887 2 : logfmt("to deduplicate on the command line. Running with '-n' is therefore dangerous\n");
888 2 : logfmt("if you are deduplicating %s storage directories.\n\n", PACKAGE_TARNAME);
889 2 : return 1;
890 : }
891 :
892 14 : int run_bedup(int argc, char *argv[])
893 : {
894 14 : int ret=0;
895 14 : int option=0;
896 14 : int nonburp=0;
897 14 : char *groups=NULL;
898 14 : int givenconfigfile=0;
899 14 : const char *configfile=NULL;
900 :
901 14 : configfile=config_default_path();
902 14 : snprintf(ext, sizeof(ext), ".bedup.%d", getpid());
903 :
904 46 : while((option=getopt(argc, argv, "c:dg:hlm:nvV?"))!=-1)
905 : {
906 21 : switch(option)
907 : {
908 : case 'c':
909 1 : configfile=optarg;
910 1 : givenconfigfile=1;
911 1 : break;
912 : case 'd':
913 2 : deletedups=1;
914 2 : break;
915 : case 'g':
916 1 : groups=optarg;
917 1 : break;
918 : case 'l':
919 3 : makelinks=1;
920 3 : break;
921 : case 'm':
922 6 : maxlinks=atoi(optarg);
923 3 : break;
924 : case 'n':
925 7 : nonburp=1;
926 7 : break;
927 : case 'V':
928 1 : logfmt("%s-%s\n", prog, PACKAGE_VERSION);
929 1 : return 0;
930 : case 'v':
931 1 : verbose=1;
932 1 : break;
933 : case 'h':
934 : case '?':
935 2 : return usage();
936 : }
937 : }
938 :
939 11 : if(nonburp && givenconfigfile)
940 : {
941 1 : logp("-n and -c options are mutually exclusive\n");
942 1 : return 1;
943 : }
944 10 : if(nonburp && groups)
945 : {
946 1 : logp("-n and -g options are mutually exclusive\n");
947 1 : return 1;
948 : }
949 9 : if(!nonburp && maxlinks!=DEF_MAX_LINKS)
950 : {
951 1 : logp("-m option is specified via the configuration file in %s mode (max_hardlinks=)\n", PACKAGE_TARNAME);
952 1 : return 1;
953 : }
954 8 : if(deletedups && makelinks)
955 : {
956 1 : logp("-d and -l options are mutually exclusive\n");
957 1 : return 1;
958 : }
959 7 : if(deletedups && !nonburp)
960 : {
961 1 : logp("-d option requires -n option\n");
962 1 : return 1;
963 : }
964 :
965 6 : if(optind>=argc)
966 : {
967 1 : if(nonburp)
968 : {
969 1 : logp("No directories found after options\n");
970 1 : return 1;
971 : }
972 : }
973 : else
974 : {
975 5 : if(!nonburp)
976 : {
977 1 : logp("Do not specify extra arguments.\n");
978 1 : return 1;
979 : }
980 : }
981 :
982 4 : if(maxlinks<2)
983 : {
984 1 : logp("The argument to -m needs to be greater than 1.\n");
985 1 : return 1;
986 : }
987 :
988 3 : if(nonburp)
989 : {
990 : // Read directories from command line.
991 3 : if(process_from_command_line(argc, argv))
992 0 : ret=1;
993 : }
994 : else
995 : {
996 0 : if(process_from_conf(configfile, &groups))
997 0 : ret=1;
998 : }
999 :
1000 3 : if(!nonburp)
1001 : {
1002 0 : logp("%d client storages scanned\n", ccount);
1003 : }
1004 3 : logp("%" PRIu64 " duplicate %s found\n",
1005 3 : count, count==1?"file":"files");
1006 9 : logp("%" PRIu64 " bytes %s%s\n",
1007 4 : savedbytes, (makelinks || deletedups)?"saved":"saveable",
1008 : bytes_to_human(savedbytes));
1009 3 : mystruct_delete_all();
1010 3 : return ret;
1011 : }
|