Line data Source code
1 : #include "../../../burp.h"
2 : #include "../../../alloc.h"
3 : #include "../../../asfd.h"
4 : #include "../../../lock.h"
5 : #include "../../../log.h"
6 : #include "../../../prepend.h"
7 : #include "../../../protocol2/blist.h"
8 : #include "../../../protocol2/blk.h"
9 : #include "candidate.h"
10 : #include "champ_chooser.h"
11 : #include "hash.h"
12 : #include "incoming.h"
13 : #include "scores.h"
14 : #include "sparse.h"
15 :
16 : static void try_lock_msg(int seconds)
17 : {
18 0 : logp("Unable to get sparse lock for %d seconds.\n", seconds);
19 : }
20 :
21 35 : static int try_to_get_lock(struct lock *lock)
22 : {
23 : // Sleeping for 1800*2 seconds makes 1 hour.
24 : // This should be super generous.
25 35 : int lock_tries=0;
26 35 : int lock_tries_max=1800;
27 35 : int sleeptime=2;
28 :
29 : while(1)
30 : {
31 35 : lock_get(lock);
32 35 : switch(lock->status)
33 : {
34 : case GET_LOCK_GOT:
35 35 : logp("Got sparse lock\n");
36 35 : return 0;
37 : case GET_LOCK_NOT_GOT:
38 0 : lock_tries++;
39 : if(lock_tries>lock_tries_max)
40 : {
41 : try_lock_msg(lock_tries_max*sleeptime);
42 : return -1;
43 : }
44 : // Log every 10 seconds.
45 : if(lock_tries%(10/sleeptime))
46 : {
47 0 : try_lock_msg(lock_tries_max*sleeptime);
48 0 : logp("Giving up.\n");
49 0 : return -1;
50 : }
51 : sleep(sleeptime);
52 : continue;
53 : case GET_LOCK_ERROR:
54 : default:
55 0 : logp("Unable to get global sparse lock.\n");
56 0 : return -1;
57 : }
58 : }
59 : // Never reached.
60 : return -1;
61 : }
62 :
63 35 : struct lock *try_to_get_sparse_lock(const char *sparse_path)
64 : {
65 35 : char *lockfile=NULL;
66 35 : struct lock *lock=NULL;
67 70 : if(!(lockfile=prepend_n(sparse_path, "lock", strlen("lock"), "."))
68 35 : || !(lock=lock_alloc_and_init(lockfile))
69 70 : || try_to_get_lock(lock))
70 0 : lock_free(&lock);
71 35 : free_w(&lockfile);
72 35 : return lock;
73 : }
74 :
75 1 : static int load_existing_sparse(const char *datadir, struct scores *scores)
76 : {
77 1 : int ret=-1;
78 : struct stat statp;
79 1 : struct lock *lock=NULL;
80 1 : char *sparse_path=NULL;
81 1 : if(!(sparse_path=prepend_s(datadir, "sparse"))) goto end;
82 : // Best not let other things mess with the sparse lock while we are
83 : // trying to read it.
84 1 : if(!(lock=try_to_get_sparse_lock(sparse_path)))
85 : goto end;
86 2 : if(lstat(sparse_path, &statp))
87 : {
88 : ret=0;
89 : goto end;
90 : }
91 1 : if(candidate_load(NULL, sparse_path, scores))
92 : goto end;
93 1 : ret=0;
94 : end:
95 1 : free_w(&sparse_path);
96 1 : lock_release(lock);
97 1 : lock_free(&lock);
98 1 : return ret;
99 : }
100 :
101 1 : struct scores *champ_chooser_init(const char *datadir)
102 : {
103 1 : struct scores *scores=NULL;
104 2 : if(!(scores=scores_alloc())
105 1 : || load_existing_sparse(datadir, scores))
106 : goto error;
107 1 : return scores;
108 : error:
109 0 : scores_free(&scores);
110 0 : return NULL;
111 : }
112 :
113 1 : void champ_chooser_free(struct scores **scores)
114 : {
115 1 : candidates_free();
116 1 : sparse_delete_all();
117 1 : scores_free(scores);
118 1 : }
119 :
120 4096 : static int already_got_block(struct asfd *asfd, struct blk *blk)
121 : {
122 : static struct hash_weak *hash_weak;
123 :
124 : // If already got, need to overwrite the references.
125 4096 : if((hash_weak=hash_weak_find(blk->fingerprint)))
126 : {
127 : static struct hash_strong *hash_strong;
128 0 : if((hash_strong=hash_strong_find(
129 0 : hash_weak, blk->md5sum)))
130 : {
131 0 : blk->savepath=hash_strong->savepath;
132 : //printf("FOUND: %s %s\n", blk->weak, blk->strong);
133 : //printf("F");
134 0 : blk->got=BLK_GOT;
135 0 : asfd->in->got++;
136 : return 0;
137 : }
138 : else
139 : {
140 : // printf("COLLISION: %s %s\n", blk->weak, blk->strong);
141 : // collisions++;
142 : }
143 : }
144 :
145 4096 : blk->got=BLK_NOT_GOT;
146 : //printf(".");
147 : return 0;
148 : }
149 :
150 : #define CHAMPS_MAX 10
151 :
152 1 : int deduplicate(struct asfd *asfd, const char *directory, struct scores *scores)
153 : {
154 : struct blk *blk;
155 1 : struct incoming *in=asfd->in;
156 : struct candidate *champ;
157 1 : struct candidate *champ_last=NULL;
158 1 : int count=0;
159 1 : int blk_count=0;
160 :
161 1 : if(!in) return 0;
162 :
163 1 : incoming_found_reset(in);
164 1 : count=0;
165 2 : while(count!=CHAMPS_MAX
166 1 : && (champ=candidates_choose_champ(in, champ_last, scores)))
167 : {
168 : // printf("Got champ: %s %d\n", champ->path, *(champ->score));
169 0 : switch(hash_load(champ->path, directory))
170 : {
171 : case HASH_RET_OK:
172 0 : count++;
173 0 : champ_last=champ;
174 0 : break;
175 : case HASH_RET_PERM:
176 : return -1;
177 : case HASH_RET_TEMP:
178 0 : champ->deleted=1;
179 0 : break;
180 : }
181 : }
182 :
183 1 : blk_count=0;
184 4097 : for(blk=asfd->blist->blk_to_dedup; blk; blk=blk->next)
185 : {
186 : //printf("try: %lu\n", blk->index);
187 4096 : blk_count++;
188 :
189 4096 : if(blk_is_zero_length(blk))
190 : {
191 : //printf("got: %s %s\n", blk->weak, blk->strong);
192 0 : blk->got=BLK_GOT;
193 0 : in->got++;
194 0 : continue;
195 : }
196 :
197 : // If already got, this function will set blk->save_path
198 : // to be the location of the already got block.
199 4096 : if(already_got_block(asfd, blk)) return -1;
200 :
201 : //printf("after agb: %lu %d\n", blk->index, blk->got);
202 : }
203 :
204 : logp("%s: %04d/%04zu - %04d/%04d\n",
205 1 : asfd->desc, count, candidates_len, in->got, blk_count);
206 :
207 : // Start the incoming array again.
208 1 : in->size=0;
209 : // Destroy the deduplication hash table.
210 1 : hash_delete_all();
211 :
212 1 : asfd->blist->blk_to_dedup=NULL;
213 :
214 1 : return 0;
215 : }
|