Line data Source code
1 : #include "../../burp.h"
2 : #include "../../alloc.h"
3 : #include "../../conf.h"
4 : #include "../../conffile.h"
5 : #include "../../handy.h"
6 : #include "../../fsops.h"
7 : #include "../../fzp.h"
8 : #include "../../lock.h"
9 : #include "../../log.h"
10 : #include "../../prepend.h"
11 : #include "../../strlist.h"
12 : #include "bedup.h"
13 :
14 : #include <uthash.h>
15 :
16 : #define LOCKFILE_NAME "lockfile"
17 : #define BEDUP_LOCKFILE_NAME "lockfile.bedup"
18 :
19 : #define DEF_MAX_LINKS 10000
20 :
21 : static int makelinks=0;
22 : static int deletedups=0;
23 :
24 : static uint64_t savedbytes=0;
25 : static uint64_t count=0;
26 : static int ccount=0;
27 :
28 : static struct lock *locklist=NULL;
29 :
30 : static int verbose=0;
31 :
32 : static unsigned int maxlinks=DEF_MAX_LINKS;
33 : static char ext[16]="";
34 :
35 : typedef struct file file_t;
36 :
37 : struct file
38 : {
39 : char *path;
40 : dev_t dev;
41 : ino_t ino;
42 : nlink_t nlink;
43 : uint64_t full_cksum;
44 : uint64_t part_cksum;
45 : file_t *next;
46 : };
47 :
48 : struct mystruct
49 : {
50 : off_t st_size;
51 : file_t *files;
52 : UT_hash_handle hh;
53 : };
54 :
55 : struct mystruct *myfiles=NULL;
56 :
57 9 : static struct mystruct *find_key(off_t st_size)
58 : {
59 : struct mystruct *s;
60 :
61 9 : HASH_FIND_INT(myfiles, &st_size, s);
62 9 : return s;
63 : }
64 :
65 3 : static int add_file(struct mystruct *s, struct file *f)
66 : {
67 : struct file *newfile;
68 3 : if(!(newfile=(struct file *)malloc_w(sizeof(struct file), __func__)))
69 : return -1;
70 3 : memcpy(newfile, f, sizeof(struct file));
71 3 : f->path=NULL;
72 3 : newfile->next=s->files;
73 3 : s->files=newfile;
74 : return 0;
75 : }
76 :
77 3 : static int add_key(off_t st_size, struct file *f)
78 : {
79 : struct mystruct *s;
80 :
81 3 : if(!(s=(struct mystruct *)malloc_w(sizeof(struct mystruct), __func__)))
82 : return -1;
83 3 : s->st_size=st_size;
84 3 : s->files=NULL;
85 3 : if(add_file(s, f)) return -1;
86 : //printf("HASH ADD %d\n", st_size);
87 3 : HASH_ADD_INT(myfiles, st_size, s);
88 : return 0;
89 : }
90 :
91 : static void file_free_content(struct file *file)
92 : {
93 : if(!file) return;
94 3 : free_w(&file->path);
95 : }
96 :
97 3 : static void file_free(struct file **file)
98 : {
99 3 : if(!file || !*file) return;
100 6 : file_free_content(*file);
101 3 : free_v((void **)file);
102 : }
103 :
104 3 : static void files_free(struct file **files)
105 : {
106 : struct file *f;
107 : struct file *fhead;
108 3 : if(!files || !*files) return;
109 : fhead=*files;
110 6 : while(fhead)
111 : {
112 3 : f=fhead;
113 3 : fhead=fhead->next;
114 3 : file_free(&f);
115 : }
116 : }
117 :
118 : static void mystruct_free_content(struct mystruct *mystruct)
119 : {
120 : if(!mystruct) return;
121 3 : files_free(&mystruct->files);
122 : }
123 :
124 3 : static void mystruct_free(struct mystruct **mystruct)
125 : {
126 3 : if(!mystruct || !*mystruct) return;
127 6 : mystruct_free_content(*mystruct);
128 3 : free_v((void **)mystruct);
129 : }
130 :
131 3 : static void mystruct_delete_all(void)
132 : {
133 : struct mystruct *tmp;
134 : struct mystruct *mystruct;
135 :
136 6 : HASH_ITER(hh, myfiles, mystruct, tmp)
137 : {
138 3 : HASH_DEL(myfiles, mystruct);
139 3 : mystruct_free(&mystruct);
140 : }
141 3 : myfiles=NULL;
142 3 : }
143 :
144 : #define FULL_CHUNK 4096
145 :
146 6 : static int full_match(struct file *o, struct file *n,
147 : struct fzp **ofp, struct fzp **nfp)
148 : {
149 : size_t ogot;
150 : size_t ngot;
151 6 : unsigned int i=0;
152 : static char obuf[FULL_CHUNK];
153 : static char nbuf[FULL_CHUNK];
154 :
155 6 : if(*ofp) fzp_seek(*ofp, 0, SEEK_SET);
156 0 : else if(!(*ofp=fzp_open(o->path, "rb")))
157 : {
158 : // Blank this entry so that it can be ignored from
159 : // now on.
160 0 : free_w(&o->path);
161 : return 0;
162 : }
163 :
164 6 : if(*nfp) fzp_seek(*nfp, 0, SEEK_SET);
165 3 : else if(!(*nfp=fzp_open(n->path, "rb"))) return 0;
166 :
167 : while(1)
168 : {
169 6 : ogot=fzp_read(*ofp, obuf, FULL_CHUNK);
170 6 : ngot=fzp_read(*nfp, nbuf, FULL_CHUNK);
171 6 : if(ogot!=ngot) return 0;
172 60 : for(i=0; i<ogot; i++)
173 60 : if(obuf[i]!=nbuf[i]) return 0;
174 6 : if(ogot<FULL_CHUNK) break;
175 : }
176 :
177 : return 1;
178 : }
179 :
180 : #define PART_CHUNK 1024
181 :
182 9 : static int get_part_cksum(struct file *f, struct fzp **fzp)
183 : {
184 : MD5_CTX md5;
185 9 : int got=0;
186 : static char buf[PART_CHUNK];
187 : unsigned char checksum[MD5_DIGEST_LENGTH+1];
188 :
189 9 : if(*fzp) fzp_seek(*fzp, 0, SEEK_SET);
190 9 : else if(!(*fzp=fzp_open(f->path, "rb")))
191 : {
192 0 : f->part_cksum=0;
193 0 : return 0;
194 : }
195 :
196 9 : if(!MD5_Init(&md5))
197 : {
198 0 : logp("MD5_Init() failed\n");
199 0 : return -1;
200 : }
201 :
202 9 : got=fzp_read(*fzp, buf, PART_CHUNK);
203 :
204 9 : if(!MD5_Update(&md5, buf, got))
205 : {
206 0 : logp("MD5_Update() failed\n");
207 0 : return -1;
208 : }
209 :
210 9 : if(!MD5_Final(checksum, &md5))
211 : {
212 0 : logp("MD5_Final() failed\n");
213 0 : return -1;
214 : }
215 :
216 9 : memcpy(&(f->part_cksum), checksum, sizeof(unsigned));
217 :
218 : // Try for a bit of efficiency - no need to calculate the full checksum
219 : // again if we already read the whole file.
220 9 : if(got<PART_CHUNK) f->full_cksum=f->part_cksum;
221 :
222 : return 0;
223 : }
224 :
225 0 : static int get_full_cksum(struct file *f, struct fzp **fzp)
226 : {
227 0 : size_t s=0;
228 : MD5_CTX md5;
229 : static char buf[FULL_CHUNK];
230 : unsigned char checksum[MD5_DIGEST_LENGTH+1];
231 :
232 0 : if(*fzp) fzp_seek(*fzp, 0, SEEK_SET);
233 0 : else if(!(*fzp=fzp_open(f->path, "rb")))
234 : {
235 0 : f->full_cksum=0;
236 0 : return 0;
237 : }
238 :
239 0 : if(!MD5_Init(&md5))
240 : {
241 0 : logp("MD5_Init() failed\n");
242 0 : return -1;
243 : }
244 :
245 0 : while((s=fzp_read(*fzp, buf, FULL_CHUNK))>0)
246 : {
247 0 : if(!MD5_Update(&md5, buf, s))
248 : {
249 0 : logp("MD5_Update() failed\n");
250 0 : return -1;
251 : }
252 0 : if(s<FULL_CHUNK) break;
253 : }
254 :
255 0 : if(!MD5_Final(checksum, &md5))
256 : {
257 0 : logp("MD5_Final() failed\n");
258 0 : return -1;
259 : }
260 :
261 0 : memcpy(&(f->full_cksum), checksum, sizeof(unsigned));
262 :
263 0 : return 0;
264 : }
265 :
266 : /* Make it atomic by linking to a temporary file, then moving it into place. */
267 3 : static int do_hardlink(struct file *o, struct file *n)
268 : {
269 3 : int ret=-1;
270 3 : char *tmppath=NULL;
271 3 : if(!(tmppath=prepend(o->path, ext)))
272 : {
273 0 : log_out_of_memory(__func__);
274 : goto end;
275 : }
276 3 : if(link(n->path, tmppath))
277 : {
278 0 : logp("Could not hardlink %s to %s: %s\n", tmppath, n->path,
279 0 : strerror(errno));
280 : goto end;
281 : }
282 3 : if((ret=do_rename(tmppath, o->path)))
283 : {
284 : // 'man 2 rename', says it should be safe to unlink tmppath:
285 : // "If newpath exists but the operation fails for some reason,
286 : // rename() guarantees to leave an instance of newpath in
287 : // place."
288 0 : if(unlink(tmppath))
289 0 : logp("Could not unlink %s\n", tmppath);
290 : goto end;
291 : }
292 : ret=0;
293 : end:
294 3 : free_w(&tmppath);
295 3 : return ret;
296 : }
297 :
298 2 : static void reset_old_file(struct file *oldfile, struct file *newfile,
299 : struct stat *info)
300 : {
301 : //printf("reset %s with %s %d\n", oldfile->path, newfile->path,
302 : // info->st_nlink);
303 : struct file *next;
304 :
305 2 : next=oldfile->next;
306 2 : free_w(&oldfile->path);
307 2 : memcpy(oldfile, newfile, sizeof(struct file));
308 2 : oldfile->next=next;
309 2 : newfile->path=NULL;
310 2 : }
311 :
312 6 : static int check_files(struct mystruct *find, struct file *newfile,
313 : struct stat *info)
314 : {
315 6 : int found=0;
316 6 : struct fzp *nfp=NULL;
317 6 : struct fzp *ofp=NULL;
318 6 : struct file *f=NULL;
319 :
320 6 : for(f=find->files; f; f=f->next)
321 : {
322 : //printf(" against: '%s'\n", f->path);
323 6 : if(!f->path)
324 : {
325 : // If the full_match() function fails to open oldfile
326 : // (which could happen if burp deleted some old
327 : // directories), it will free path and set it to NULL.
328 : // Skip entries like this.
329 0 : continue;
330 : }
331 6 : if(newfile->dev!=f->dev)
332 : {
333 : // Different device.
334 0 : continue;
335 : }
336 6 : if(newfile->ino==f->ino)
337 : {
338 : // Same device, same inode, therefore these two files
339 : // are hardlinked to each other already.
340 : found++;
341 : break;
342 : }
343 6 : if(newfile->nlink>=maxlinks) {
344 : // This new file file has enough links. Just leave it
345 : // as it is to avoid undoing all these hardlinks.
346 : found++;
347 : break;
348 : }
349 6 : if((!newfile->part_cksum && get_part_cksum(newfile, &nfp))
350 6 : || (!f->part_cksum && get_part_cksum(f, &ofp)))
351 : {
352 : // Some error with md5sums Give up.
353 : return -1;
354 : }
355 6 : if(newfile->part_cksum!=f->part_cksum)
356 : {
357 0 : fzp_close(&ofp);
358 0 : continue;
359 : }
360 : //printf(" %s, %s\n", find->files->path, newfile->path);
361 : //printf(" part cksum matched\n");
362 :
363 6 : if((!newfile->full_cksum && get_full_cksum(newfile, &nfp))
364 6 : || (!f->full_cksum && get_full_cksum(f, &ofp)))
365 : {
366 : // Some error with md5sums Give up.
367 : return -1;
368 : }
369 6 : if(newfile->full_cksum!=f->full_cksum)
370 : {
371 0 : fzp_close(&ofp);
372 0 : continue;
373 : }
374 :
375 : //printf(" full cksum matched\n");
376 6 : if(!full_match(newfile, f, &nfp, &ofp))
377 : {
378 0 : fzp_close(&ofp);
379 0 : continue;
380 : }
381 : //printf(" full match\n");
382 : //printf("%s, %s\n", find->files->path, newfile->path);
383 :
384 : // If there are already enough links to this file, replace
385 : // our memory of it with the new file so that files later on
386 : // can link to the new one.
387 6 : if(f->nlink>=maxlinks)
388 : {
389 : // Just need to reset the path name and the number
390 : // of links, and pretend that it was found otherwise
391 : // NULL newfile will get added to the memory.
392 2 : reset_old_file(f, newfile, info);
393 2 : found++;
394 : break;
395 : }
396 :
397 4 : found++;
398 4 : count++;
399 :
400 4 : if(verbose) printf("%s\n", newfile->path);
401 :
402 : // Now hardlink it.
403 4 : if(makelinks)
404 : {
405 3 : if(do_hardlink(newfile, f))
406 : {
407 0 : count--;
408 : return -1;
409 : }
410 3 : f->nlink++;
411 : // Only count bytes as saved if we
412 : // removed the last link.
413 3 : if(newfile->nlink==1)
414 3 : savedbytes+=info->st_size;
415 : }
416 1 : else if(deletedups)
417 : {
418 0 : if(unlink(newfile->path))
419 : {
420 0 : logp("Could not delete %s: %s\n",
421 0 : newfile->path, strerror(errno));
422 : }
423 : else
424 : {
425 : // Only count bytes as saved if we removed the
426 : // last link.
427 0 : if(newfile->nlink==1)
428 0 : savedbytes+=info->st_size;
429 : }
430 : }
431 : else
432 : {
433 : // To be able to tell how many bytes
434 : // are saveable.
435 1 : savedbytes+=info->st_size;
436 : }
437 :
438 : break;
439 : }
440 6 : fzp_close(&nfp);
441 6 : fzp_close(&ofp);
442 :
443 6 : if(found)
444 : {
445 6 : free_w(&newfile->path);
446 : return 0;
447 : }
448 :
449 0 : if(add_file(find, newfile)) return -1;
450 :
451 : return 0;
452 : }
453 :
454 0 : static int looks_like_protocol1(const char *basedir)
455 : {
456 0 : int ret=-1;
457 0 : char *tmp=NULL;
458 0 : if(!(tmp=prepend_s(basedir, "current")))
459 : {
460 0 : log_out_of_memory(__func__);
461 0 : goto end;
462 : }
463 : // If there is a 'current' symlink here, we think it looks like a
464 : // protocol 1 backup.
465 0 : if(is_lnk_lstat(tmp)>0)
466 : {
467 : ret=1;
468 : goto end;
469 : }
470 0 : ret=0;
471 : end:
472 0 : free_w(&tmp);
473 0 : return ret;
474 : }
475 :
476 0 : static int get_link(const char *basedir, const char *lnk, char real[], size_t r)
477 : {
478 0 : readlink_w_in_dir(basedir, lnk, real, r);
479 : // Strip any trailing slash.
480 0 : if(real[strlen(real)-1]=='/')
481 0 : real[strlen(real)-1]='\0';
482 0 : return 0;
483 : }
484 :
485 0 : static int level_exclusion(int level, const char *fname,
486 : const char *working, const char *finishing)
487 : {
488 0 : if(level==0)
489 : {
490 : /* Be careful not to try to dedup the lockfiles.
491 : The lock actually gets lost if you open one to do a
492 : checksum
493 : and then close it. This caused me major headaches to
494 : figure out. */
495 0 : if(!strcmp(fname, LOCKFILE_NAME)
496 0 : || !strcmp(fname, BEDUP_LOCKFILE_NAME))
497 : return 1;
498 :
499 : /* Skip places where backups are going on. */
500 0 : if(!strcmp(fname, working)
501 0 : || !strcmp(fname, finishing))
502 : return 1;
503 :
504 0 : if(!strcmp(fname, "deleteme"))
505 : return 1;
506 : }
507 0 : else if(level==1)
508 : {
509 : // Do not dedup stuff that might be appended to later.
510 0 : if(!strncmp(fname, "log", strlen("log"))
511 0 : || !strncmp(fname, "verifylog", strlen("verifylog"))
512 0 : || !strncmp(fname, "restorelog", strlen("restorelog")))
513 : return 1;
514 : }
515 0 : return 0;
516 : }
517 :
518 : // Return 0 for directory processed, -1 for error, 1 for not processed.
519 3 : static int process_dir(const char *oldpath, const char *newpath,
520 : int burp_mode, int level)
521 : {
522 3 : int ret=-1;
523 3 : DIR *dirp=NULL;
524 3 : char *path=NULL;
525 : struct stat info;
526 3 : struct dirent *dirinfo=NULL;
527 : struct file newfile;
528 3 : struct mystruct *find=NULL;
529 : static char working[256]="";
530 : static char finishing[256]="";
531 :
532 3 : newfile.path=NULL;
533 :
534 3 : if(!(path=prepend_s(oldpath, newpath))) goto end;
535 :
536 3 : if(burp_mode && level==0)
537 : {
538 0 : if(get_link(path, "working", working, sizeof(working))
539 0 : || get_link(path, "finishing", finishing, sizeof(finishing)))
540 : goto end;
541 0 : if(!looks_like_protocol1(path))
542 : {
543 0 : logp("%s does not look like a protocol 1 storage directory - skipping\n", path);
544 0 : ret=1;
545 0 : goto end;
546 : }
547 : }
548 :
549 3 : if(!(dirp=opendir(path)))
550 : {
551 0 : logp("Could not opendir '%s': %s\n", path, strerror(errno));
552 0 : ret=1;
553 0 : goto end;
554 : }
555 18 : while((dirinfo=readdir(dirp)))
556 : {
557 15 : if(!strcmp(dirinfo->d_name, ".")
558 12 : || !strcmp(dirinfo->d_name, ".."))
559 6 : continue;
560 :
561 : //printf("try %s\n", dirinfo->d_name);
562 :
563 9 : if(burp_mode
564 0 : && level_exclusion(level, dirinfo->d_name,
565 : working, finishing))
566 0 : continue;
567 :
568 9 : free_w(&newfile.path);
569 9 : if(!(newfile.path=prepend_s(path, dirinfo->d_name)))
570 : goto end;
571 :
572 18 : if(lstat(newfile.path, &info))
573 0 : continue;
574 :
575 9 : if(S_ISDIR(info.st_mode))
576 : {
577 0 : if(process_dir(path, dirinfo->d_name,
578 : burp_mode, level+1))
579 : goto end;
580 0 : continue;
581 : }
582 9 : else if(!S_ISREG(info.st_mode)
583 9 : || !info.st_size) // ignore zero-length files
584 0 : continue;
585 :
586 9 : newfile.dev=info.st_dev;
587 9 : newfile.ino=info.st_ino;
588 9 : newfile.nlink=info.st_nlink;
589 9 : newfile.full_cksum=0;
590 9 : newfile.part_cksum=0;
591 9 : newfile.next=NULL;
592 :
593 9 : if((find=find_key(info.st_size)))
594 : {
595 : //printf("check %d: %s\n", info.st_size, newfile.path);
596 6 : if(check_files(find, &newfile, &info))
597 : goto end;
598 : }
599 : else
600 : {
601 : //printf("add: %s\n", newfile.path);
602 3 : if(add_key(info.st_size, &newfile))
603 : goto end;
604 : }
605 : }
606 : ret=0;
607 : end:
608 3 : if(dirp) closedir(dirp);
609 3 : free_w(&newfile.path);
610 3 : free_w(&path);
611 3 : return ret;
612 : }
613 :
614 0 : static void sighandler(__attribute__ ((unused)) int signum)
615 : {
616 0 : locks_release_and_free(&locklist);
617 0 : exit(1);
618 : }
619 :
620 0 : static int is_regular_file(const char *clientconfdir, const char *file)
621 : {
622 : struct stat statp;
623 0 : char *fullpath=NULL;
624 0 : if(!(fullpath=prepend_s(clientconfdir, file)))
625 : return 0;
626 0 : if(lstat(fullpath, &statp))
627 : {
628 0 : free_w(&fullpath);
629 0 : return 0;
630 : }
631 0 : free_w(&fullpath);
632 0 : return S_ISREG(statp.st_mode);
633 : }
634 :
635 : static int in_group(struct strlist *grouplist, const char *dedup_group)
636 : {
637 : struct strlist *g;
638 :
639 0 : for(g=grouplist; g; g=g->next)
640 0 : if(!strcmp(g->path, dedup_group)) return 1;
641 :
642 : return 0;
643 : }
644 :
645 0 : static int iterate_over_clients(struct conf **globalcs,
646 : struct strlist *grouplist)
647 : {
648 0 : int ret=0;
649 0 : DIR *dirp=NULL;
650 0 : struct conf **cconfs=NULL;
651 0 : struct dirent *dirinfo=NULL;
652 0 : const char *globalclientconfdir=get_string(globalcs[OPT_CLIENTCONFDIR]);
653 :
654 0 : if(!(cconfs=confs_alloc())) return -1;
655 0 : if(confs_init(cconfs)) return -1;
656 :
657 0 : if(!(dirp=opendir(globalclientconfdir)))
658 : {
659 0 : logp("Could not opendir '%s': %s\n",
660 0 : globalclientconfdir, strerror(errno));
661 0 : return 0;
662 : }
663 0 : while((dirinfo=readdir(dirp)))
664 : {
665 0 : char *lockfile=NULL;
666 0 : char *lockfilebase=NULL;
667 0 : char *client_lockdir=NULL;
668 0 : struct lock *lock=NULL;
669 :
670 0 : if(dirinfo->d_ino==0
671 0 : || !cname_valid(dirinfo->d_name)
672 0 : || !is_regular_file(globalclientconfdir, dirinfo->d_name))
673 0 : continue;
674 :
675 0 : confs_free_content(cconfs);
676 0 : if(confs_init(cconfs)) return -1;
677 :
678 0 : if(set_string(cconfs[OPT_CNAME], dirinfo->d_name))
679 : return -1;
680 :
681 0 : if(conf_load_clientconfdir(globalcs, cconfs))
682 : {
683 0 : logp("could not load config for client %s\n",
684 : dirinfo->d_name);
685 0 : return 0;
686 : }
687 :
688 0 : if(grouplist)
689 : {
690 0 : const char *dedup_group=
691 0 : get_string(cconfs[OPT_DEDUP_GROUP]);
692 0 : if(!dedup_group
693 0 : || !in_group(grouplist, dedup_group))
694 0 : continue;
695 : }
696 :
697 0 : if(!(client_lockdir=get_string(cconfs[OPT_CLIENT_LOCKDIR])))
698 0 : client_lockdir=get_string(cconfs[OPT_DIRECTORY]);
699 :
700 0 : if(!(lockfilebase=prepend_s(client_lockdir, dirinfo->d_name))
701 0 : || !(lockfile=prepend_s(lockfilebase, BEDUP_LOCKFILE_NAME)))
702 : {
703 0 : free_w(&lockfilebase);
704 0 : free_w(&lockfile);
705 0 : ret=-1;
706 0 : break;
707 : }
708 0 : free_w(&lockfilebase);
709 :
710 0 : if(!(lock=lock_alloc_and_init(lockfile)))
711 : {
712 : ret=-1;
713 : break;
714 : }
715 0 : lock_get(lock);
716 0 : free_w(&lockfile);
717 :
718 0 : if(lock->status!=GET_LOCK_GOT)
719 : {
720 0 : logp("Could not get %s\n", lock->path);
721 0 : continue;
722 : }
723 0 : logp("Got %s\n", lock->path);
724 :
725 : // Remember that we got that lock.
726 0 : lock_add_to_list(&locklist, lock);
727 :
728 0 : switch(process_dir(get_string(cconfs[OPT_DIRECTORY]),
729 : dirinfo->d_name,
730 : 1 /* burp mode */, 0 /* level */))
731 : {
732 0 : case 0: ccount++;
733 0 : case 1: continue;
734 : default: ret=-1; break;
735 : }
736 : break;
737 : }
738 0 : closedir(dirp);
739 :
740 0 : locks_release_and_free(&locklist);
741 :
742 0 : confs_free(&cconfs);
743 :
744 0 : return ret;
745 : }
746 :
747 0 : static int process_from_conf(const char *configfile, char **groups)
748 : {
749 0 : int ret=-1;
750 0 : struct conf **globalcs=NULL;
751 0 : struct strlist *grouplist=NULL;
752 0 : struct lock *globallock=NULL;
753 :
754 0 : signal(SIGABRT, &sighandler);
755 0 : signal(SIGTERM, &sighandler);
756 0 : signal(SIGINT, &sighandler);
757 :
758 0 : if(*groups)
759 : {
760 0 : char *tok=NULL;
761 0 : if((tok=strtok(*groups, ",\n")))
762 : {
763 : do
764 : {
765 0 : if(strlist_add(&grouplist, tok, 1))
766 : {
767 0 : log_out_of_memory(__func__);
768 0 : goto end;
769 : }
770 0 : } while((tok=strtok(NULL, ",\n")));
771 : }
772 0 : if(!grouplist)
773 : {
774 0 : logp("unable to read list of groups\n");
775 0 : goto end;
776 : }
777 : }
778 :
779 : // Read directories from config files, and get locks.
780 0 : if(!(globalcs=confs_alloc())
781 0 : || confs_init(globalcs)
782 0 : || conf_load_global_only(configfile, globalcs))
783 : goto end;
784 :
785 0 : if(get_e_burp_mode(globalcs[OPT_BURP_MODE])!=BURP_MODE_SERVER)
786 : {
787 0 : logp("%s is not a server config file\n", configfile);
788 0 : goto end;
789 : }
790 0 : logp("Dedup clients from %s\n",
791 0 : get_string(globalcs[OPT_CLIENTCONFDIR]));
792 0 : maxlinks=get_int(globalcs[OPT_MAX_HARDLINKS]);
793 0 : if(grouplist)
794 : {
795 0 : struct strlist *g=NULL;
796 0 : logp("in dedup groups:\n");
797 0 : for(g=grouplist; g; g=g->next)
798 0 : logp("%s\n", g->path);
799 : }
800 : else
801 : {
802 0 : char *lockpath=NULL;
803 0 : const char *opt_lockfile=confs_get_lockfile(globalcs);
804 : // Only get the global lock when doing a global run.
805 : // If you are doing individual groups, you are likely
806 : // to want to do many different dedup jobs and a
807 : // global lock would get in the way.
808 0 : if(!(lockpath=prepend(opt_lockfile, ".bedup"))
809 0 : || !(globallock=lock_alloc_and_init(lockpath)))
810 : goto end;
811 0 : lock_get(globallock);
812 0 : if(globallock->status!=GET_LOCK_GOT)
813 : {
814 0 : logp("Could not get lock %s (%d)\n", lockpath,
815 : globallock->status);
816 0 : free_w(&lockpath);
817 0 : goto end;
818 : }
819 0 : logp("Got %s\n", lockpath);
820 : }
821 0 : ret=iterate_over_clients(globalcs, grouplist);
822 : end:
823 0 : confs_free(&globalcs);
824 0 : lock_release(globallock);
825 0 : lock_free(&globallock);
826 0 : strlists_free(&grouplist);
827 0 : return ret;
828 : }
829 :
830 3 : static int process_from_command_line(int argc, char *argv[])
831 : {
832 : int i;
833 6 : for(i=optind; i<argc; i++)
834 : {
835 : // Strip trailing slashes, for tidiness.
836 3 : if(argv[i][strlen(argv[i])-1]=='/')
837 0 : argv[i][strlen(argv[i])-1]='\0';
838 3 : if(process_dir("", argv[i],
839 : 0 /* not burp mode */, 0 /* level */))
840 : return 1;
841 : }
842 : return 0;
843 : }
844 :
845 2 : static int usage(void)
846 : {
847 2 : logfmt("\nUsage: %s [options]\n", prog);
848 2 : logfmt("\n");
849 2 : logfmt(" Options:\n");
850 2 : logfmt(" -c <path> Path to config file (default: %s).\n", config_default_path());
851 2 : logfmt(" -g <list of group names> Only run on the directories of clients that\n");
852 2 : logfmt(" are in one of the groups specified.\n");
853 2 : logfmt(" The list is comma-separated. To put a client in a\n");
854 2 : logfmt(" group, use the 'dedup_group' option in the client\n");
855 2 : logfmt(" configuration file on the server.\n");
856 2 : logfmt(" -h|-? Print this text and exit.\n");
857 2 : logfmt(" -d Delete any duplicate files found.\n");
858 2 : logfmt(" (non-%s mode only)\n", PACKAGE_TARNAME);
859 2 : logfmt(" -l Hard link any duplicate files found.\n");
860 2 : logfmt(" -m <number> Maximum number of hard links to a single file.\n");
861 2 : logfmt(" (non-%s mode only - in burp mode, use the\n", PACKAGE_TARNAME);
862 2 : logfmt(" max_hardlinks option in the configuration file)\n");
863 2 : logfmt(" The default is %d. On ext3, the maximum number\n", DEF_MAX_LINKS);
864 2 : logfmt(" of links possible is 32000, but space is needed\n");
865 2 : logfmt(" for the normal operation of %s.\n", PACKAGE_TARNAME);
866 2 : logfmt(" -n <list of directories> Non-%s mode. Deduplicate any (set of) directories.\n", PACKAGE_TARNAME);
867 2 : logfmt(" -v Print duplicate paths.\n");
868 2 : logfmt(" -V Print version and exit.\n");
869 2 : logfmt("\n");
870 2 : logfmt("By default, %s will read %s and deduplicate client storage\n", prog, config_default_path());
871 2 : logfmt("directories using special knowledge of the structure.\n");
872 2 : logfmt("\n");
873 2 : logfmt("With '-n', this knowledge is turned off and you have to specify the directories\n");
874 2 : logfmt("to deduplicate on the command line. Running with '-n' is therefore dangerous\n");
875 2 : logfmt("if you are deduplicating %s storage directories.\n\n", PACKAGE_TARNAME);
876 2 : return 1;
877 : }
878 :
879 14 : int run_bedup(int argc, char *argv[])
880 : {
881 14 : int ret=0;
882 14 : int option=0;
883 14 : int nonburp=0;
884 14 : char *groups=NULL;
885 14 : int givenconfigfile=0;
886 14 : const char *configfile=NULL;
887 :
888 14 : configfile=config_default_path();
889 14 : snprintf(ext, sizeof(ext), ".bedup.%d", getpid());
890 :
891 46 : while((option=getopt(argc, argv, "c:dg:hlm:nvV?"))!=-1)
892 : {
893 21 : switch(option)
894 : {
895 : case 'c':
896 1 : configfile=optarg;
897 1 : givenconfigfile=1;
898 1 : break;
899 : case 'd':
900 2 : deletedups=1;
901 2 : break;
902 : case 'g':
903 1 : groups=optarg;
904 1 : break;
905 : case 'l':
906 3 : makelinks=1;
907 3 : break;
908 : case 'm':
909 6 : maxlinks=atoi(optarg);
910 3 : break;
911 : case 'n':
912 7 : nonburp=1;
913 7 : break;
914 : case 'V':
915 1 : logfmt("%s-%s\n", prog, PACKAGE_VERSION);
916 1 : return 0;
917 : case 'v':
918 1 : verbose=1;
919 1 : break;
920 : case 'h':
921 : case '?':
922 2 : return usage();
923 : }
924 : }
925 :
926 11 : if(nonburp && givenconfigfile)
927 : {
928 1 : logp("-n and -c options are mutually exclusive\n");
929 1 : return 1;
930 : }
931 10 : if(nonburp && groups)
932 : {
933 1 : logp("-n and -g options are mutually exclusive\n");
934 1 : return 1;
935 : }
936 9 : if(!nonburp && maxlinks!=DEF_MAX_LINKS)
937 : {
938 1 : logp("-m option is specified via the configuration file in %s mode (max_hardlinks=)\n", PACKAGE_TARNAME);
939 1 : return 1;
940 : }
941 8 : if(deletedups && makelinks)
942 : {
943 1 : logp("-d and -l options are mutually exclusive\n");
944 1 : return 1;
945 : }
946 7 : if(deletedups && !nonburp)
947 : {
948 1 : logp("-d option requires -n option\n");
949 1 : return 1;
950 : }
951 :
952 6 : if(optind>=argc)
953 : {
954 1 : if(nonburp)
955 : {
956 1 : logp("No directories found after options\n");
957 1 : return 1;
958 : }
959 : }
960 : else
961 : {
962 5 : if(!nonburp)
963 : {
964 1 : logp("Do not specify extra arguments.\n");
965 1 : return 1;
966 : }
967 : }
968 :
969 4 : if(maxlinks<2)
970 : {
971 1 : logp("The argument to -m needs to be greater than 1.\n");
972 1 : return 1;
973 : }
974 :
975 3 : if(nonburp)
976 : {
977 : // Read directories from command line.
978 3 : if(process_from_command_line(argc, argv))
979 0 : ret=1;
980 : }
981 : else
982 : {
983 0 : if(process_from_conf(configfile, &groups))
984 0 : ret=1;
985 : }
986 :
987 3 : if(!nonburp)
988 : {
989 0 : logp("%d client storages scanned\n", ccount);
990 : }
991 3 : logp("%" PRIu64 " duplicate %s found\n",
992 3 : count, count==1?"file":"files");
993 9 : logp("%" PRIu64 " bytes %s%s\n",
994 4 : savedbytes, (makelinks || deletedups)?"saved":"saveable",
995 : bytes_to_human(savedbytes));
996 3 : mystruct_delete_all();
997 3 : return ret;
998 : }
|