Line data Source code
1 : #include "../../../burp.h"
2 : #include "../../../alloc.h"
3 : #include "../../../asfd.h"
4 : #include "../../../lock.h"
5 : #include "../../../log.h"
6 : #include "../../../prepend.h"
7 : #include "../../../protocol2/blist.h"
8 : #include "../../../protocol2/blk.h"
9 : #include "candidate.h"
10 : #include "champ_chooser.h"
11 : #include "hash.h"
12 : #include "incoming.h"
13 : #include "scores.h"
14 : #include "sparse.h"
15 :
16 : static void try_lock_msg(int seconds)
17 : {
18 0 : logp("Unable to get sparse lock for %d seconds.\n", seconds);
19 : }
20 :
21 45 : static int try_to_get_lock(struct lock *lock)
22 : {
23 : // Sleeping for 1800*2 seconds makes 1 hour.
24 : // This should be super generous.
25 45 : int lock_tries=0;
26 45 : int lock_tries_max=1800;
27 45 : int sleeptime=2;
28 :
29 : while(1)
30 : {
31 45 : lock_get(lock);
32 45 : switch(lock->status)
33 : {
34 : case GET_LOCK_GOT:
35 45 : logp("locked: sparse index\n");
36 45 : return 0;
37 : case GET_LOCK_NOT_GOT:
38 0 : lock_tries++;
39 0 : if(lock_tries>lock_tries_max)
40 : {
41 0 : try_lock_msg(lock_tries_max*sleeptime);
42 0 : logp("Giving up.\n");
43 0 : return -1;
44 : }
45 : // Log every 10 seconds.
46 0 : if(lock_tries%(10/sleeptime))
47 : {
48 0 : try_lock_msg(lock_tries*sleeptime);
49 : }
50 0 : sleep(sleeptime);
51 0 : continue;
52 : case GET_LOCK_ERROR:
53 : default:
54 0 : logp("Unable to get global sparse lock.\n");
55 0 : return -1;
56 : }
57 : }
58 : // Never reached.
59 : return -1;
60 : }
61 :
62 45 : struct lock *try_to_get_sparse_lock(const char *sparse_path)
63 : {
64 45 : char *lockfile=NULL;
65 45 : struct lock *lock=NULL;
66 45 : if(!(lockfile=prepend_n(sparse_path, "lock", strlen("lock"), "."))
67 45 : || !(lock=lock_alloc_and_init(lockfile))
68 45 : || try_to_get_lock(lock))
69 0 : lock_free(&lock);
70 45 : free_w(&lockfile);
71 45 : return lock;
72 : }
73 :
74 1 : static int load_existing_sparse(const char *datadir, struct scores *scores)
75 : {
76 1 : int ret=-1;
77 : struct stat statp;
78 1 : struct lock *lock=NULL;
79 1 : char *sparse_path=NULL;
80 1 : if(!(sparse_path=prepend_s(datadir, "sparse"))) goto end;
81 : // Best not let other things mess with the sparse lock while we are
82 : // trying to read it.
83 1 : if(!(lock=try_to_get_sparse_lock(sparse_path)))
84 : goto end;
85 2 : if(lstat(sparse_path, &statp))
86 : {
87 : ret=0;
88 : goto end;
89 : }
90 1 : if(candidate_load(NULL, sparse_path, scores))
91 : goto end;
92 1 : ret=0;
93 : end:
94 1 : free_w(&sparse_path);
95 1 : lock_release(lock);
96 1 : lock_free(&lock);
97 1 : return ret;
98 : }
99 :
100 1 : struct scores *champ_chooser_init(const char *datadir)
101 : {
102 1 : struct scores *scores=NULL;
103 1 : if(!(scores=scores_alloc())
104 1 : || load_existing_sparse(datadir, scores))
105 : goto error;
106 1 : return scores;
107 : error:
108 0 : scores_free(&scores);
109 0 : return NULL;
110 : }
111 :
112 1 : void champ_chooser_free(struct scores **scores)
113 : {
114 1 : candidates_free();
115 1 : sparse_delete_all();
116 1 : scores_free(scores);
117 1 : }
118 :
119 4096 : static int already_got_block(struct asfd *asfd, struct blk *blk)
120 : {
121 : static struct hash_weak *hash_weak;
122 :
123 : // If already got, need to overwrite the references.
124 4096 : if((hash_weak=hash_weak_find(blk->fingerprint)))
125 : {
126 : static struct hash_strong *hash_strong;
127 0 : if((hash_strong=hash_strong_find(
128 0 : hash_weak, blk->md5sum)))
129 : {
130 0 : blk->savepath=hash_strong->savepath;
131 : //printf("FOUND: %s %s\n", blk->weak, blk->strong);
132 : //printf("F");
133 0 : blk->got=BLK_GOT;
134 0 : asfd->in->got++;
135 : return 0;
136 : }
137 : else
138 : {
139 : // printf("COLLISION: %s %s\n", blk->weak, blk->strong);
140 : // collisions++;
141 : }
142 : }
143 :
144 4096 : blk->got=BLK_NOT_GOT;
145 : //printf(".");
146 : return 0;
147 : }
148 :
149 : #define CHAMPS_MAX 10
150 :
151 1 : int deduplicate(struct asfd *asfd, const char *directory, struct scores *scores)
152 : {
153 : struct blk *blk;
154 1 : struct incoming *in=asfd->in;
155 : struct candidate *champ;
156 1 : struct candidate *champ_last=NULL;
157 1 : int count=0;
158 1 : int blk_count=0;
159 :
160 1 : if(!in) return 0;
161 :
162 1 : incoming_found_reset(in);
163 1 : count=0;
164 2 : while(count!=CHAMPS_MAX
165 1 : && (champ=candidates_choose_champ(in, champ_last, scores)))
166 : {
167 : // printf("Got champ: %s %d\n", champ->path, *(champ->score));
168 0 : switch(hash_load(champ->path, directory))
169 : {
170 : case HASH_RET_OK:
171 0 : count++;
172 0 : champ_last=champ;
173 0 : break;
174 : case HASH_RET_PERM:
175 : return -1;
176 : case HASH_RET_TEMP:
177 0 : champ->deleted=1;
178 0 : break;
179 : }
180 : }
181 :
182 1 : blk_count=0;
183 4097 : for(blk=asfd->blist->blk_to_dedup; blk; blk=blk->next)
184 : {
185 : //printf("try: %lu\n", blk->index);
186 4096 : blk_count++;
187 :
188 4096 : if(blk_is_zero_length(blk))
189 : {
190 : //printf("got: %s %s\n", blk->weak, blk->strong);
191 0 : blk->got=BLK_GOT;
192 0 : in->got++;
193 0 : continue;
194 : }
195 :
196 : // If already got, this function will set blk->save_path
197 : // to be the location of the already got block.
198 4096 : if(already_got_block(asfd, blk)) return -1;
199 :
200 : //printf("after agb: %lu %d\n", blk->index, blk->got);
201 : }
202 :
203 1 : logp("%s: %04d/%04zu - %04d/%04d\n",
204 1 : asfd->desc, count, candidates_len, in->got, blk_count);
205 :
206 : // Start the incoming array again.
207 1 : in->size=0;
208 : // Destroy the deduplication hash table.
209 1 : hash_delete_all();
210 :
211 1 : asfd->blist->blk_to_dedup=NULL;
212 :
213 1 : return 0;
214 : }
|