rpm  5.4.15
rpmbf.c
Go to the documentation of this file.
1 
5 #include "system.h"
6 #include <math.h>
7 
8 #include <rpmiotypes.h>
9 #include <rpmio.h> /* for *Pool methods */
10 #include <rpmlog.h>
11 
12 #define _RPMBF_INTERNAL
13 #include <rpmbf.h>
14 
15 #include "debug.h"
16 
17 /* Any pair of 32 bit hashes can be used. lookup3.c generates pairs, will do. */
18 #define _JLU3_jlu32lpair 1
19 #include "lookup3.c"
20 
21 /*@unchecked@*/
22 int _rpmbf_debug = 0;
23 
24 /*@-mustmod@*/ /* XXX splint on crack */
25 static void rpmbfFini(void * _bf)
26  /*@globals fileSystem @*/
27  /*@modifies *_bf, fileSystem @*/
28 {
29  rpmbf bf = (rpmbf) _bf;
30 
31  bf->bits = PBM_FREE(bf->bits);
32 }
33 /*@=mustmod@*/
34 
35 /*@unchecked@*/ /*@only@*/ /*@null@*/
37 
38 static rpmbf rpmbfGetPool(/*@null@*/ rpmioPool pool)
39  /*@globals _rpmbfPool, fileSystem @*/
40  /*@modifies pool, _rpmbfPool, fileSystem @*/
41 {
42  rpmbf bf;
43 
44  if (_rpmbfPool == NULL) {
45  _rpmbfPool = rpmioNewPool("bf", sizeof(*bf), -1, _rpmbf_debug,
46  NULL, NULL, rpmbfFini);
47  pool = _rpmbfPool;
48  }
49  return (rpmbf) rpmioGetPool(pool, sizeof(*bf));
50 }
51 
52 rpmbf rpmbfNew(size_t m, size_t k, unsigned flags)
53 {
54  static size_t nestimate = 1024; /* XXX default estimated population. */
55  rpmbf bf = rpmbfGetPool(_rpmbfPool);
56 
57  if (k == 0) k = 16;
58  if (m == 0) m = (3 * nestimate * k) / 2;
59 
60  bf->k = k;
61  bf->m = m;
62  bf->n = 0;
63  bf->bits = (unsigned char *) PBM_ALLOC(bf->m-1);
64 
65  return rpmbfLink(bf);
66 }
67 
68 int rpmbfAdd(rpmbf bf, const void * _s, size_t ns)
69 {
70  const char * s = (const char *) _s;
71  rpmuint32_t h0 = 0;
72  rpmuint32_t h1 = 0;
73 
74  if (bf == NULL) return -1;
75 
76  if (ns == 0) ns = strlen(s);
77  jlu32lpair(s, ns, &h0, &h1);
78 
79  for (ns = 0; ns < bf->k; ns++) {
80  rpmuint32_t h = h0 + ns * h1;
81  rpmuint32_t ix = (h % bf->m);
82  PBM_SET(ix, bf);
83  }
84  bf->n++;
85 if (_rpmbf_debug)
86 fprintf(stderr, "<-- %s(%p,\"%s\") bf{%u,%u}[%u]\n", __FUNCTION__, bf, s, (unsigned)bf->m, (unsigned)bf->k, (unsigned)bf->n);
87  return 0;
88 }
89 
90 int rpmbfChk(rpmbf bf, const void * _s, size_t ns)
91 {
92  const char * s = (const char *) _s;
93  rpmuint32_t h0 = 0;
94  rpmuint32_t h1 = 0;
95  int rc = 1;
96 
97  if (bf == NULL) return -1;
98 
99  if (ns == 0) ns = strlen(s);
100  jlu32lpair(s, ns, &h0, &h1);
101 
102  for (ns = 0; ns < bf->k; ns++) {
103  rpmuint32_t h = h0 + ns * h1;
104  rpmuint32_t ix = (h % bf->m);
105  if (PBM_ISSET(ix, bf))
106  continue;
107  rc = 0;
108  break;
109  }
110 if (_rpmbf_debug)
111 fprintf(stderr, "<-- %s(%p,\"%s\") bf{%u,%u}[%u] rc %d\n", __FUNCTION__, bf, s, (unsigned)bf->m, (unsigned)bf->k, (unsigned)bf->n, rc);
112  return rc;
113 }
114 
116 {
117  static size_t nbw = (__PBM_NBITS/8);
118  __pbm_bits * bits;
119  size_t nw;
120 
121  if (bf == NULL) return -1;
122 
123  bits = __PBM_BITS(bf);
124  nw = (__PBM_IX(bf->m-1) + 1);
125  memset(bits, 0, nw * nbw);
126  bf->n = 0;
127 if (_rpmbf_debug)
128 fprintf(stderr, "<-- %s(%p) bf{%u,%u}[%u]\n", __FUNCTION__, bf, (unsigned)bf->m, (unsigned)bf->k, (unsigned)bf->n);
129  return 0;
130 }
131 
132 int rpmbfDel(rpmbf bf, const void * _s, size_t ns)
133 {
134  const char * s = (const char *) _s;
135  rpmuint32_t h0 = 0;
136  rpmuint32_t h1 = 0;
137 
138  if (bf == NULL) return -1;
139 
140  if (ns == 0) ns = strlen(s);
141 assert(ns > 0);
142  jlu32lpair(s, ns, &h0, &h1);
143 
144  for (ns = 0; ns < bf->k; ns++) {
145  rpmuint32_t h = h0 + ns * h1;
146  rpmuint32_t ix = (h % bf->m);
147  PBM_CLR(ix, bf);
148  }
149  if (bf->n != 0)
150  bf->n--;
151 if (_rpmbf_debug)
152 fprintf(stderr, "<-- %s(%p,\"%s\") bf{%u,%u}[%u]\n", __FUNCTION__, bf, s, (unsigned)bf->m, (unsigned)bf->k, (unsigned)bf->n);
153  return 0;
154 }
155 
157 {
158  __pbm_bits * abits;
159  __pbm_bits * bbits;
160  size_t nw;
161  size_t i;
162 
163  if (a == NULL || b == NULL) return -1;
164 
165  abits = __PBM_BITS(a);
166  bbits = __PBM_BITS(b);
167  nw = (__PBM_IX(a->m-1) + 1);
168 
169  if (!(a->m == b->m && a->k == b->k))
170  return -1;
171  for (i = 0; i < nw; i++)
172  abits[i] &= bbits[i];
173  a->n = 1; /* XXX what is population estimate? */
174 if (_rpmbf_debug)
175 fprintf(stderr, "<-- %s(%p,%p) bf{%u,%u}[%u]\n", __FUNCTION__, a, b, (unsigned)a->m, (unsigned)a->k, (unsigned)a->n);
176  return 0;
177 }
178 
179 int rpmbfUnion(rpmbf a, const rpmbf b)
180 {
181  __pbm_bits * abits;
182  __pbm_bits * bbits;
183  size_t nw;
184  size_t i;
185 
186  if (a == NULL || b == NULL) return -1;
187 
188  abits = __PBM_BITS(a);
189  bbits = __PBM_BITS(b);
190  nw = (__PBM_IX(a->m-1) + 1);
191 
192  if (!(a->m == b->m && a->k == b->k))
193  return -1;
194  for (i = 0; i < nw; i++)
195  abits[i] |= bbits[i];
196  a->n += b->n;
197 if (_rpmbf_debug)
198 fprintf(stderr, "<-- %s(%p,%p) bf{%u,%u}[%u]\n", __FUNCTION__, a, b, (unsigned)a->m, (unsigned)a->k, (unsigned)a->n);
199  return 0;
200 }
201 
202 void rpmbfParams(size_t n, double e, size_t * mp, size_t * kp)
203 {
204  static size_t _nmin = 10;
205  static size_t _n = 10000;
206  static size_t _nmax = 1 * 1000 * 1000 * 1000;
207  static double _emin = 1.0e-10;
208  static double _e = 1.0e-4;
209  static double _emax = 1.0e-2;
210  size_t m;
211  size_t k;
212 
213  /* XXX sanity checks on (n,e) args. */
214  if (!(n >= _nmin && _n <= _nmax))
215  n = _n;
216  if (!(e >= _emin && _e <= _emax))
217  e = _e;
218 
219  m = (size_t)((n * log(e)) / (log(1.0 / pow(2.0, log(2.0)))) + 0.5);
220  k = (size_t) ((m * log(2.0)) / n);
221  if (mp) *mp = m;
222  if (kp) *kp = k;
223 if (_rpmbf_debug)
224 fprintf(stderr, "<-- %s(%u, %g) m %u k %u\n", __FUNCTION__, (unsigned)n, e, (unsigned)m, (unsigned)k);
225 }
const bson * b
Definition: bson.h:280
int _rpmbf_debug
Definition: rpmbf.c:22
rpmbf rpmbfNew(size_t m, size_t k, unsigned flags)
Create a Bloom filter.
Definition: rpmbf.c:52
struct rpmbf_s * rpmbf
Definition: rpmbf.h:17
void rpmbfParams(size_t n, double e, size_t *mp, size_t *kp)
Return optimal {m, k} for given n and e.
Definition: rpmbf.c:202
Yet Another syslog(3) API clone.
unsigned int rpmuint32_t
Definition: rpmiotypes.h:28
rpmioItem rpmioGetPool(rpmioPool pool, size_t size)
Get unused item from pool, or alloc a new item.
Definition: rpmmalloc.c:220
int rpmbfDel(rpmbf bf, const void *_s, size_t ns)
Delete item from a Bloom filter.
Definition: rpmbf.c:132
int rpmbfUnion(rpmbf a, const rpmbf b)
Return union of two Bloom filters.
Definition: rpmbf.c:179
const char const bson int mongo_write_concern int flags
Definition: mongo.h:485
unsigned int __pbm_bits
Definition: rpmbf.h:19
static rpmbf rpmbfGetPool(rpmioPool pool)
Definition: rpmbf.c:38
const char const int i
Definition: bson.h:778
int rpmbfIntersect(rpmbf a, const rpmbf b)
Return intersection of two Bloom filters.
Definition: rpmbf.c:156
rpmioPool rpmioNewPool(const char *name, size_t size, int limit, int flags, char *(*dbg)(void *item), void(*init)(void *item), void(*fini)(void *item))
Create a memory pool.
Definition: rpmmalloc.c:109
rpmioPool _rpmbfPool
Definition: rpmbf.c:36
int rpmbfClr(rpmbf bf)
Clear a Bloom filter, discarding all set memberships.
Definition: rpmbf.c:115
int rpmbfAdd(rpmbf bf, const void *_s, size_t ns)
Add item to a Bloom filter.
Definition: rpmbf.c:68
static void rpmbfFini(void *_bf)
Definition: rpmbf.c:25
rpmbf rpmbfLink(rpmbf bf)
Reference a Bloom filter instance.
int rpmbfChk(rpmbf bf, const void *_s, size_t ns)
Check for item in a Bloom filter.
Definition: rpmbf.c:90
const char * ns
Definition: mongo.h:326