1
2 """PyDbLite.py
3
4 In-memory database management, with selection by list comprehension
5 or generator expression
6
7 Fields are untyped : they can store anything that can be pickled.
8 Selected records are returned as dictionaries. Each record is
9 identified by a unique id and has a version number incremented
10 at every record update, to detect concurrent access
11
12 Syntax :
13 from PyDbLite import Base
14 db = Base('dummy')
15 # create new base with field names
16 db.create('name','age','size')
17 # existing base
18 db.open()
19 # insert new record
20 db.insert(name='homer',age=23,size=1.84)
21 # records are dictionaries with a unique integer key __id__
22 # selection by list comprehension
23 res = [ r for r in db if 30 > r['age'] >= 18 and r['size'] < 2 ]
24 # or generator expression
25 for r in (r for r in db if r['name'] in ('homer','marge') ):
26 # simple selection (equality test)
27 res = db(age=30)
28 # delete a record or a list of records
29 db.delete(one_record)
30 db.delete(list_of_records)
31 # delete a record by its id
32 del db[rec_id]
33 # direct access by id
34 record = db[rec_id] # the record such that record['__id__'] == rec_id
35 # create an index on a field
36 db.create_index('age')
37 # access by index
38 records = db._age[23] # returns the list of records with age == 23
39 # update
40 db.update(record,age=24)
41 # add and drop fields
42 db.add_field('new_field')
43 db.drop_field('name')
44 # save changes on disk
45 db.commit()
46 """
47
48 import os
49 import cPickle
50 import bisect
51
52
53 try:
54 set([])
55 except NameError:
56 from sets import Set as set
57
59 """Class used for indexing a base on a field
60 The instance of Index is an attribute the Base instance"""
61
63 self.db = db
64 self.field = field
65
67 return iter(self.db.indices[self.field])
68
70 return self.db.indices[self.field].keys()
71
73 """Lookup by key : return the list of records where
74 field value is equal to this key, or an empty list"""
75 ids = self.db.indices[self.field].get(key,[])
76 return [ self.db.records[_id] for _id in ids ]
77
79
82
83 - def create(self,*fields,**kw):
84 """Create a new base with specified field names
85 A keyword argument mode can be specified ; it is used if a file
86 with the base name already exists
87 - if mode = 'open' : open the existing base, ignore the fields
88 - if mode = 'override' : erase the existing base and create a
89 new one with the specified fields"""
90 self.mode = mode = kw.get("mode",None)
91 if os.path.exists(self.name):
92 if not os.path.isfile(self.name):
93 raise IOError,"%s exists and is not a file" %self.name
94 elif mode is None:
95 raise IOError,"Base %s already exists" %self.name
96 elif mode == "open":
97 return self.open()
98 elif mode == "override":
99 os.remove(self.name)
100 self.fields = list(fields)
101 self.records = {}
102 self.next_id = 0
103 self.indices = {}
104 self.commit()
105 return self
106
108 """Create an index on the specified field names
109
110 An index on a field is a mapping between the values taken by the field
111 and the sorted list of the ids of the records whose field is equal to
112 this value
113
114 For each indexed field, an attribute of self is created, an instance
115 of the class Index (see above). Its name it the field name, with the
116 prefix _ to avoid name conflicts
117 """
118 reset = False
119 for f in fields:
120 if not f in self.fields:
121 raise NameError,"%s is not a field name" %f
122
123 if self.mode == "open" and f in self.indices:
124 continue
125 reset = True
126 self.indices[f] = {}
127 for _id,record in self.records.iteritems():
128
129 bisect.insort(self.indices[f].setdefault(record[f],[]),
130 _id)
131
132
133 setattr(self,'_'+f,Index(self,f))
134 if reset:
135 self.commit()
136
138 """Open an existing database and load its content into memory"""
139 _in = open(self.name)
140 self.fields = cPickle.load(_in)
141 self.next_id = cPickle.load(_in)
142 self.records = cPickle.load(_in)
143 self.indices = cPickle.load(_in)
144 for f in self.indices.keys():
145 setattr(self,'_'+f,Index(self,f))
146 _in.close()
147 self.mode = "open"
148 return self
149
151 """Write the database to a file"""
152 out = open(self.name,'wb')
153 cPickle.dump(self.fields,out)
154 cPickle.dump(self.next_id,out)
155 cPickle.dump(self.records,out)
156 cPickle.dump(self.indices,out)
157 out.close()
158
160 """Insert a record in the database
161 Parameters can be positional or keyword arguments. If positional
162 they must be in the same order as in the create() method
163 If some of the fields are missing the value is set to None
164 Returns the record identifier
165 """
166 if args:
167 kw = dict([(f,arg) for f,arg in zip(self.fields,args)])
168
169 record = dict([(f,None) for f in self.fields])
170
171 for (k,v) in kw.iteritems():
172 record[k]=v
173
174 record['__id__'] = self.next_id
175
176 record['__version__'] = 0
177
178 self.records[self.next_id] = record
179
180 for ix in self.indices.keys():
181 bisect.insort(self.indices[ix].setdefault(record[ix],[]),
182 self.next_id)
183
184 self.next_id += 1
185 return record['__id__']
186
188 """Remove a single record, or the records in an iterable
189 Before starting deletion, test if all records are in the base
190 and don't have twice the same __id__
191 Return the number of deleted items
192 """
193 if isinstance(removed,dict):
194
195 removed = [removed]
196 else:
197
198 removed = [ r for r in removed ]
199 if not removed:
200 return 0
201 _ids = [ r['__id__'] for r in removed ]
202 _ids.sort()
203 keys = set(self.records.keys())
204
205 if not set(_ids).issubset(keys):
206 missing = list(set(_ids).difference(keys))
207 raise IndexError,'Delete aborted. Records with these ids' \
208 ' not found in the base : %s' %str(missing)
209
210 for i in range(len(_ids)-1):
211 if _ids[i] == _ids[i+1]:
212 raise IndexError,"Delete aborted. Duplicate id : %s" %_ids[i]
213 deleted = len(removed)
214 while removed:
215 r = removed.pop()
216 _id = r['__id__']
217
218 for indx in self.indices.keys():
219 pos = bisect.bisect(self.indices[indx][r[indx]],_id)-1
220 del self.indices[indx][r[indx]][pos]
221 if not self.indices[indx][r[indx]]:
222 del self.indices[indx][r[indx]]
223
224 del self.records[_id]
225 return deleted
226
227 - def update(self,record,**kw):
228 """Update the record with new keys and values and update indices"""
229
230 _id = record["__id__"]
231 for indx in self.indices.keys():
232 if indx in kw.keys():
233 if record[indx] == kw[indx]:
234 continue
235
236 old_pos = bisect.bisect(self.indices[indx][record[indx]],_id)-1
237 del self.indices[indx][record[indx]][old_pos]
238 if not self.indices[indx][record[indx]]:
239 del self.indices[indx][record[indx]]
240
241 bisect.insort(self.indices[indx].setdefault(kw[indx],[]),_id)
242
243 record.update(kw)
244
245 record["__version__"] += 1
246
248 if field in self.fields + ["__id__","__version__"]:
249 raise ValueError,"Field %s already defined" %field
250 for r in self:
251 r[field] = default
252 self.fields.append(field)
253 self.commit()
254
256 if field in ["__id__","__version__"]:
257 raise ValueError,"Can't delete field %s" %field
258 self.fields.remove(field)
259 for r in self:
260 del r[field]
261 if field in self.indices:
262 del self.indices[field]
263 self.commit()
264
266 """Selection by field values
267 db(key=value) returns the list of records where r[key] = value"""
268 for key in kw:
269 if not key in self.fields:
270 raise ValueError,"Field %s not in the database" %key
271 def sel_func(r):
272 for key in kw:
273 if not r[key] == kw[key]:
274 return False
275 return True
276 return [ r for r in self if sel_func(r) ]
277
279 """Direct access by record id"""
280 return self.records[record_id]
281
283 return len(self.records)
284
286 """Delete by record id"""
287 self.delete(self[record_id])
288
290 """Iteration on the records"""
291 return self.records.itervalues()
292
293 if __name__ == '__main__':
294
295 import random
296 import datetime
297 names = ['pierre','claire','simon','camille','jean',
298 'florence','marie-anne']
299 db = Base('PyDbLite_test')
300 db.create('name','age','size','birth',mode="override")
301 for i in range(1000):
302 db.insert(name=unicode(random.choice(names)),
303 age=random.randint(7,47),size=random.uniform(1.10,1.95),
304 birth=datetime.date(1990,10,10))
305 db.create_index('age')
306 db.commit()
307
308 print 'Record #20 :',db[20]
309 print '\nRecords with age=30 :'
310 for rec in db._age[30]:
311 print '%-10s | %2s | %s' %(rec['name'],rec['age'],round(rec['size'],2))
312
313 print "\nSame with __call__"
314 for rec in db(age=30):
315 print '%-10s | %2s | %s' %(rec['name'],rec['age'],round(rec['size'],2))
316 print db._age[30] == db(age=30)
317
318 db.insert(name=unicode(random.choice(names)))
319 print '\nNumber of records with 30 <= age < 33 :',
320 print sum([1 for r in db if 33 > r['age'] >= 30])
321
322 print db.delete([])
323
324 d = db.delete([r for r in db if 32> r['age'] >= 30 and r['name']==u'pierre'])
325 print "\nDeleting %s records with name == 'pierre' and 30 <= age < 32" %d
326 print '\nAfter deleting records '
327 for rec in db._age[30]:
328 print '%-10s | %2s | %s' %(rec['name'],rec['age'],round(rec['size'],2))
329 print '\n',sum([1 for r in db]),'records in the database'
330 print '\nMake pierre uppercase for age > 27'
331 for record in ([r for r in db if r['name']=='pierre' and r['age'] >27]) :
332 db.update(record,name=u"Pierre")
333 print len([r for r in db if r['name']==u'Pierre']),'Pierre'
334 print len([r for r in db if r['name']==u'pierre']),'pierre'
335 print len([r for r in db if r['name'] in [u'pierre',u'Pierre']]),'p/Pierre'
336 print 'is unicode :',isinstance(db[20]['name'],unicode)
337 db.commit()
338 db.open()
339 print '\nSame operation after commit + open'
340 print len([r for r in db if r['name']==u'Pierre']),'Pierre'
341 print len([r for r in db if r['name']==u'pierre']),'pierre'
342 print len([r for r in db if r['name'] in [u'pierre',u'Pierre']]),'p/Pierre'
343 print 'is unicode :',isinstance(db[20]['name'],unicode)
344
345 print "\nDeleting record #20"
346 del db[20]
347 if not 20 in db:
348 print "record 20 removed"
349
350 print db[21]
351 db.drop_field('name')
352 print db[21]
353 db.add_field('adate',datetime.date.today())
354 print db[21]
355
356 k = db._age.keys()[0]
357 print "key",k
358 print k in db._age
359 db.delete(db._age[k])
360 print db._age[k]
361 print k in db._age
362
363