Package spade :: Module dblite
[hide private]
[frames] | no frames]

Source Code for Module spade.dblite

  1  ## {{{ http://code.activestate.com/recipes/496770/ (r4) 
  2  """PyDbLite.py 
  3   
  4  In-memory database management, with selection by list comprehension  
  5  or generator expression 
  6   
  7  Fields are untyped : they can store anything that can be pickled. 
  8  Selected records are returned as dictionaries. Each record is  
  9  identified by a unique id and has a version number incremented 
 10  at every record update, to detect concurrent access 
 11   
 12  Syntax : 
 13      from PyDbLite import Base 
 14      db = Base('dummy') 
 15      # create new base with field names 
 16      db.create('name','age','size') 
 17      # existing base 
 18      db.open() 
 19      # insert new record 
 20      db.insert(name='homer',age=23,size=1.84) 
 21      # records are dictionaries with a unique integer key __id__ 
 22      # selection by list comprehension 
 23      res = [ r for r in db if 30 > r['age'] >= 18 and r['size'] < 2 ] 
 24      # or generator expression 
 25      for r in (r for r in db if r['name'] in ('homer','marge') ): 
 26      # simple selection (equality test) 
 27      res = db(age=30) 
 28      # delete a record or a list of records 
 29      db.delete(one_record) 
 30      db.delete(list_of_records) 
 31      # delete a record by its id 
 32      del db[rec_id] 
 33      # direct access by id 
 34      record = db[rec_id] # the record such that record['__id__'] == rec_id 
 35      # create an index on a field 
 36      db.create_index('age') 
 37      # access by index 
 38      records = db._age[23] # returns the list of records with age == 23 
 39      # update 
 40      db.update(record,age=24) 
 41      # add and drop fields 
 42      db.add_field('new_field') 
 43      db.drop_field('name') 
 44      # save changes on disk 
 45      db.commit() 
 46  """ 
 47   
 48  import os 
 49  import cPickle 
 50  import bisect 
 51   
 52  # compatibility with Python 2.3 
 53  try: 
 54      set([]) 
 55  except NameError: 
 56      from sets import Set as set 
 57       
58 -class Index:
59 """Class used for indexing a base on a field 60 The instance of Index is an attribute the Base instance""" 61
62 - def __init__(self,db,field):
63 self.db = db # database object (instance of Base) 64 self.field = field # field name
65
66 - def __iter__(self):
67 return iter(self.db.indices[self.field])
68
69 - def keys(self):
70 return self.db.indices[self.field].keys()
71
72 - def __getitem__(self,key):
73 """Lookup by key : return the list of records where 74 field value is equal to this key, or an empty list""" 75 ids = self.db.indices[self.field].get(key,[]) 76 return [ self.db.records[_id] for _id in ids ]
77
78 -class Base:
79
80 - def __init__(self,basename):
81 self.name = basename
82
83 - def create(self,*fields,**kw):
84 """Create a new base with specified field names 85 A keyword argument mode can be specified ; it is used if a file 86 with the base name already exists 87 - if mode = 'open' : open the existing base, ignore the fields 88 - if mode = 'override' : erase the existing base and create a 89 new one with the specified fields""" 90 self.mode = mode = kw.get("mode",None) 91 if os.path.exists(self.name): 92 if not os.path.isfile(self.name): 93 raise IOError,"%s exists and is not a file" %self.name 94 elif mode is None: 95 raise IOError,"Base %s already exists" %self.name 96 elif mode == "open": 97 return self.open() 98 elif mode == "override": 99 os.remove(self.name) 100 self.fields = list(fields) 101 self.records = {} 102 self.next_id = 0 103 self.indices = {} 104 self.commit() 105 return self
106
107 - def create_index(self,*fields):
108 """Create an index on the specified field names 109 110 An index on a field is a mapping between the values taken by the field 111 and the sorted list of the ids of the records whose field is equal to 112 this value 113 114 For each indexed field, an attribute of self is created, an instance 115 of the class Index (see above). Its name it the field name, with the 116 prefix _ to avoid name conflicts 117 """ 118 reset = False 119 for f in fields: 120 if not f in self.fields: 121 raise NameError,"%s is not a field name" %f 122 # initialize the indices 123 if self.mode == "open" and f in self.indices: 124 continue 125 reset = True 126 self.indices[f] = {} 127 for _id,record in self.records.iteritems(): 128 # use bisect to quickly insert the id in the list 129 bisect.insort(self.indices[f].setdefault(record[f],[]), 130 _id) 131 # create a new attribute of self, used to find the records 132 # by this index 133 setattr(self,'_'+f,Index(self,f)) 134 if reset: 135 self.commit()
136
137 - def open(self):
138 """Open an existing database and load its content into memory""" 139 _in = open(self.name) # don't specify binary mode ! 140 self.fields = cPickle.load(_in) 141 self.next_id = cPickle.load(_in) 142 self.records = cPickle.load(_in) 143 self.indices = cPickle.load(_in) 144 for f in self.indices.keys(): 145 setattr(self,'_'+f,Index(self,f)) 146 _in.close() 147 self.mode = "open" 148 return self
149
150 - def commit(self):
151 """Write the database to a file""" 152 out = open(self.name,'wb') 153 cPickle.dump(self.fields,out) 154 cPickle.dump(self.next_id,out) 155 cPickle.dump(self.records,out) 156 cPickle.dump(self.indices,out) 157 out.close()
158
159 - def insert(self,*args,**kw):
160 """Insert a record in the database 161 Parameters can be positional or keyword arguments. If positional 162 they must be in the same order as in the create() method 163 If some of the fields are missing the value is set to None 164 Returns the record identifier 165 """ 166 if args: 167 kw = dict([(f,arg) for f,arg in zip(self.fields,args)]) 168 # initialize all fields to None 169 record = dict([(f,None) for f in self.fields]) 170 # set keys and values 171 for (k,v) in kw.iteritems(): 172 record[k]=v 173 # add the key __id__ : record identifier 174 record['__id__'] = self.next_id 175 # add the key __version__ : version number 176 record['__version__'] = 0 177 # create an entry in the dictionary self.records, indexed by __id__ 178 self.records[self.next_id] = record 179 # update index 180 for ix in self.indices.keys(): 181 bisect.insort(self.indices[ix].setdefault(record[ix],[]), 182 self.next_id) 183 # increment the next __id__ to attribute 184 self.next_id += 1 185 return record['__id__']
186
187 - def delete(self,removed):
188 """Remove a single record, or the records in an iterable 189 Before starting deletion, test if all records are in the base 190 and don't have twice the same __id__ 191 Return the number of deleted items 192 """ 193 if isinstance(removed,dict): 194 # remove a single record 195 removed = [removed] 196 else: 197 # convert iterable into a list (to be able to sort it) 198 removed = [ r for r in removed ] 199 if not removed: 200 return 0 201 _ids = [ r['__id__'] for r in removed ] 202 _ids.sort() 203 keys = set(self.records.keys()) 204 # check if the records are in the base 205 if not set(_ids).issubset(keys): 206 missing = list(set(_ids).difference(keys)) 207 raise IndexError,'Delete aborted. Records with these ids' \ 208 ' not found in the base : %s' %str(missing) 209 # raise exception if duplicate ids 210 for i in range(len(_ids)-1): 211 if _ids[i] == _ids[i+1]: 212 raise IndexError,"Delete aborted. Duplicate id : %s" %_ids[i] 213 deleted = len(removed) 214 while removed: 215 r = removed.pop() 216 _id = r['__id__'] 217 # remove id from indices 218 for indx in self.indices.keys(): 219 pos = bisect.bisect(self.indices[indx][r[indx]],_id)-1 220 del self.indices[indx][r[indx]][pos] 221 if not self.indices[indx][r[indx]]: 222 del self.indices[indx][r[indx]] 223 # remove record from self.records 224 del self.records[_id] 225 return deleted
226
227 - def update(self,record,**kw):
228 """Update the record with new keys and values and update indices""" 229 # update indices 230 _id = record["__id__"] 231 for indx in self.indices.keys(): 232 if indx in kw.keys(): 233 if record[indx] == kw[indx]: 234 continue 235 # remove id for the old value 236 old_pos = bisect.bisect(self.indices[indx][record[indx]],_id)-1 237 del self.indices[indx][record[indx]][old_pos] 238 if not self.indices[indx][record[indx]]: 239 del self.indices[indx][record[indx]] 240 # insert new value 241 bisect.insort(self.indices[indx].setdefault(kw[indx],[]),_id) 242 # update record values 243 record.update(kw) 244 # increment version number 245 record["__version__"] += 1
246
247 - def add_field(self,field,default=None):
248 if field in self.fields + ["__id__","__version__"]: 249 raise ValueError,"Field %s already defined" %field 250 for r in self: 251 r[field] = default 252 self.fields.append(field) 253 self.commit()
254
255 - def drop_field(self,field):
256 if field in ["__id__","__version__"]: 257 raise ValueError,"Can't delete field %s" %field 258 self.fields.remove(field) 259 for r in self: 260 del r[field] 261 if field in self.indices: 262 del self.indices[field] 263 self.commit()
264
265 - def __call__(self,**kw):
266 """Selection by field values 267 db(key=value) returns the list of records where r[key] = value""" 268 for key in kw: 269 if not key in self.fields: 270 raise ValueError,"Field %s not in the database" %key 271 def sel_func(r): 272 for key in kw: 273 if not r[key] == kw[key]: 274 return False 275 return True
276 return [ r for r in self if sel_func(r) ]
277
278 - def __getitem__(self,record_id):
279 """Direct access by record id""" 280 return self.records[record_id]
281
282 - def __len__(self):
283 return len(self.records)
284
285 - def __delitem__(self,record_id):
286 """Delete by record id""" 287 self.delete(self[record_id])
288
289 - def __iter__(self):
290 """Iteration on the records""" 291 return self.records.itervalues()
292 293 if __name__ == '__main__': 294 # test on a 1000 record base 295 import random 296 import datetime 297 names = ['pierre','claire','simon','camille','jean', 298 'florence','marie-anne'] 299 db = Base('PyDbLite_test') 300 db.create('name','age','size','birth',mode="override") 301 for i in range(1000): 302 db.insert(name=unicode(random.choice(names)), 303 age=random.randint(7,47),size=random.uniform(1.10,1.95), 304 birth=datetime.date(1990,10,10)) 305 db.create_index('age') 306 db.commit() 307 308 print 'Record #20 :',db[20] 309 print '\nRecords with age=30 :' 310 for rec in db._age[30]: 311 print '%-10s | %2s | %s' %(rec['name'],rec['age'],round(rec['size'],2)) 312 313 print "\nSame with __call__" 314 for rec in db(age=30): 315 print '%-10s | %2s | %s' %(rec['name'],rec['age'],round(rec['size'],2)) 316 print db._age[30] == db(age=30) 317 318 db.insert(name=unicode(random.choice(names))) # missing fields 319 print '\nNumber of records with 30 <= age < 33 :', 320 print sum([1 for r in db if 33 > r['age'] >= 30]) 321 322 print db.delete([]) 323 324 d = db.delete([r for r in db if 32> r['age'] >= 30 and r['name']==u'pierre']) 325 print "\nDeleting %s records with name == 'pierre' and 30 <= age < 32" %d 326 print '\nAfter deleting records ' 327 for rec in db._age[30]: 328 print '%-10s | %2s | %s' %(rec['name'],rec['age'],round(rec['size'],2)) 329 print '\n',sum([1 for r in db]),'records in the database' 330 print '\nMake pierre uppercase for age > 27' 331 for record in ([r for r in db if r['name']=='pierre' and r['age'] >27]) : 332 db.update(record,name=u"Pierre") 333 print len([r for r in db if r['name']==u'Pierre']),'Pierre' 334 print len([r for r in db if r['name']==u'pierre']),'pierre' 335 print len([r for r in db if r['name'] in [u'pierre',u'Pierre']]),'p/Pierre' 336 print 'is unicode :',isinstance(db[20]['name'],unicode) 337 db.commit() 338 db.open() 339 print '\nSame operation after commit + open' 340 print len([r for r in db if r['name']==u'Pierre']),'Pierre' 341 print len([r for r in db if r['name']==u'pierre']),'pierre' 342 print len([r for r in db if r['name'] in [u'pierre',u'Pierre']]),'p/Pierre' 343 print 'is unicode :',isinstance(db[20]['name'],unicode) 344 345 print "\nDeleting record #20" 346 del db[20] 347 if not 20 in db: 348 print "record 20 removed" 349 350 print db[21] 351 db.drop_field('name') 352 print db[21] 353 db.add_field('adate',datetime.date.today()) 354 print db[21] 355 356 k = db._age.keys()[0] 357 print "key",k 358 print k in db._age 359 db.delete(db._age[k]) 360 print db._age[k] 361 print k in db._age 362 ## end of http://code.activestate.com/recipes/496770/ }}} 363