Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

311

312

313

314

315

316

317

318

319

320

321

322

323

324

325

326

327

328

329

330

331

332

333

334

335

336

337

338

339

340

341

342

343

344

345

346

347

348

349

350

351

352

353

354

355

356

357

358

359

360

361

362

363

364

365

366

367

368

369

370

371

372

373

374

375

376

377

378

379

380

381

382

383

384

385

386

387

388

389

390

391

392

393

394

395

396

397

398

399

400

401

402

403

404

405

406

407

408

409

410

411

412

413

414

415

416

417

418

419

420

421

422

423

424

425

426

427

428

429

430

431

432

433

434

435

436

437

438

439

440

441

442

443

444

445

446

447

448

449

450

#!/usr/bin/env python3 

 

import toml 

import re, itertools 

import pandas as pd 

from pathlib import Path 

from nonstdlib import plural, indices_from_str 

from copy import deepcopy 

from .util import * 

 

# Data Structures 

# =============== 

# `config` 

# A direct reflection of the TOML input file. Arbitrary parameters can be  

# specified on a per-experiment, per-plate, per-row, per-column, or per-well  

# basis.  

# 

# `wells` 

# Dictionary where the keys are (row, col) well indices and the values are  

# dictionaries containing arbitrary information about said well. This is  

# basically a version of the `config` data structure where all messy well  

# names have been resolved to simple indices, and all the parameters have  

# been resolved on a per-well basis. 

# 

# `table` 

# A pandas DataFrame derived from the `wells` data structure. Each row  

# represents a well, and each column represents one of the fields in the well  

# dictionaries. Columns identifying the plate and well are also added. 

 

def load(toml_path, data_loader=None, merge_cols=None, 

path_guess=None, path_required=False): 

 

try: 

## Parse the TOML file: 

config, paths, concats = config_from_toml(toml_path, path_guess) 

labels = table_from_config(config, paths) 

labels = pd.concat([labels, *concats], sort=False) 

 

if path_required or data_loader: 

if 'path' not in labels or labels['path'].isnull().any(): 

raise paths.missing_path_error 

 

if len(labels) == 0: 

raise ConfigError("No wells defined.") 

 

## Load the data associated with each well: 

if data_loader is None: 

if merge_cols is not None: 

raise ValueError("Specified columns to merge, but no function to load data!") 

return labels 

 

data = pd.DataFrame() 

 

for path in labels['path'].unique(): 

df = data_loader(path) 

df['path'] = path 

data = data.append(df, sort=False) 

 

## Merge the labels and the data into a single data frame: 

if merge_cols is None: 

return labels, data 

 

def check_merge_cols(cols, known_cols, attrs): 

unknown_cols = set(cols) - set(known_cols) 

if unknown_cols: 

quoted_join = lambda it: ', '.join(f"'{x}'" for x in it) 

raise ValueError(f"Cannot merge on {quoted_join(unknown_cols)}. Allowed {attrs} of the `merge_cols` dict: {quoted_join(known_cols)}.") 

return list(cols) 

 

left_ok = 'well', 'well0', 'row', 'col', 'row_i', 'col_i', 'plate' 

left_on = check_merge_cols(merge_cols.keys(), left_ok, 'keys') 

right_on = check_merge_cols(merge_cols.values(), data.columns, 'values') 

 

return pd.merge( 

labels, data, 

left_on=left_on + ['path'], 

right_on=right_on + ['path'], 

) 

except ConfigError as err: 

err.toml_path = toml_path 

raise 

 

def config_from_toml(toml_path, path_guess=None): 

toml_path = Path(toml_path).resolve() 

config = configdict(toml.load(str(toml_path))) 

 

def iter_paths_from_meta(key): 

if key not in config.meta: 

yield from [] 

else: 

paths = config.meta[key] 

if isinstance(paths, str): 

paths = [paths] 

for path in paths: 

yield resolve_path(toml_path, path) 

 

# Synthesize any available path information. 

paths = PathManager( 

config.meta.get('path'), 

config.meta.get('paths'), 

toml_path, 

path_guess, 

) 

 

# Include one or more remote files if any are specified.  

for path in reversed(list(iter_paths_from_meta('include'))): 

defaults, *_ = config_from_toml(path) 

recursive_merge(config, defaults) 

 

# Load any files to be concatenated. 

concats = [] 

for path in iter_paths_from_meta('concat'): 

df = load(path, path_guess=path_guess) 

concats.append(df) 

 

# Print out any messages contained in the file. 

if 'alert' in config.meta: 

try: print(f"{toml_path.relative_to(Path.cwd())}:") 

except ValueError: print(f"{toml_path}:") 

print(config.meta['alert']) 

 

config.pop('meta', None) 

return config, paths, concats 

 

def table_from_config(config, paths): 

config = configdict(config) 

 

if not config.plates: 

wells = wells_from_config(config) 

# Getting the index can raise errors we might not care about if there  

# aren't any wells (e.g. it doesn't matter if a path doesn't exist if  

# it won't be associated with any wells). Skipping the call is a bit  

# of a hacky way to avoid these errors, but it works. 

index = paths.get_index_for_only_plate() if wells else {} 

return table_from_wells(wells, index) 

 

else: 

tables = [] 

paths.check_named_plates(config.plates) 

 

for key, plate_config in config.plates.items(): 

# Copy to avoid infinite recursion. 

plate_config = recursive_merge(plate_config.copy(), config) 

wells = wells_from_config(plate_config) 

 

index = paths.get_index_for_named_plate(key) if wells else {} 

tables += [table_from_wells(wells, index)] 

 

# Make an effort to keep the columns in a reasonable order. I don't  

# know why `pd.concat()` doesn't do this on its own... 

cols = tables[-1].columns 

return pd.concat(tables, sort=False)[cols] 

 

def wells_from_config(config, label=None): 

config = configdict(config) 

wells = {} 

 

def iter_wells(config): 

for key in config: 

for well in key.split(','): 

yield well, config[key] 

 

def iter_rows(config): 

def row_range(a, b): 

A, B = i_from_row(a), i_from_row(b) 

yield from [ 

row_from_i(x) 

for x in range(A, B+1) 

] 

 

for key in config: 

for row in indices_from_str(key, cast=str, range=row_range): 

yield row, config[key] 

 

def iter_cols(config): 

def col_range(a, b): 

A, B = j_from_col(a), j_from_col(b) 

yield from [ 

col_from_j(x) 

for x in range(A, B+1) 

] 

 

for key in config: 

for col in indices_from_str(key, cast=str, range=col_range): 

yield col, config[key] 

 

## Create and fill in wells defined by 'well' blocks. 

for well, params in iter_wells(config.wells): 

ij = ij_from_well(well) 

if ij in wells: 

raise ConfigError(f"[well.{well}] defined more than once.") 

wells[ij] = deepcopy(params) 

 

## Create new wells implied by any 'block' blocks: 

blocks = {} 

pattern = re.compile('(\d+)x(\d+)') 

 

for size in config.blocks: 

match = pattern.match(size) 

if not match: 

raise ConfigError(f"Unknown [block] size '{size}', expected 'WxH' (where W and H are both positive integers).") 

 

width, height = map(int, match.groups()) 

if width == 0: 

raise ConfigError(f"[block.{size}] has no width. No wells defined.") 

if height == 0: 

raise ConfigError(f"[block.{size}] has no height. No wells defined.") 

 

for top_left, params in iter_wells(config.blocks[size]): 

for ij in iter_ij_in_block(top_left, width, height): 

wells.setdefault(ij, {}) 

blocks.setdefault(ij, []) 

blocks[ij].append(deepcopy(params)) 

 

## Create new wells implied by any 'row' & 'col' blocks. 

 

def simplify_keys(dim): 

before = config.get(dim, {}) 

after = {} 

 

callbacks = { 

'row': (iter_rows, i_from_row), 

'col': (iter_cols, j_from_col), 

'irow': (iter_rows, i_from_row), 

'icol': (iter_cols, j_from_col), 

} 

iter_keys, index_getter = callbacks[dim] 

 

for b, params in iter_keys(before): 

a = index_getter(b) 

after.setdefault(a, {}) 

recursive_merge(after[a], params) 

 

return after 

 

def sanity_check(dim1, *dim2s): 

if config.get(dim1) \ 

and not wells \ 

and not blocks \ 

and not any(config.get(x) for x in dim2s): 

raise ConfigError(f"Found {plural(config[dim1]):? [{dim1}] block/s}, but no [{'/'.join(dim2s)}] blocks. No wells defined.") 

 

rows = simplify_keys('row') 

cols = simplify_keys('col') 

irows = simplify_keys('irow') 

icols = simplify_keys('icol') 

 

sanity_check('row', 'col', 'icol') 

sanity_check('col', 'row', 'irow') 

for ij in itertools.product(rows, cols): 

wells.setdefault(ij, {}) 

 

sanity_check('irow', 'col') 

for ii, j in itertools.product(irows, cols): 

ij = interleave(ii, j), j 

wells.setdefault(ij, {}) 

 

sanity_check('icol', 'row') 

for i, jj in itertools.product(rows, icols): 

ij = i, interleave(i, jj) 

wells.setdefault(ij, {}) 

 

## Fill in any wells created above. 

for ij in wells: 

i, j = ij 

ii = interleave(i, j) 

jj = interleave(j, i) 

 

# Merge in order of precedence: [block], [row/col], top-level. 

# [well] is already accounted for. 

for block in blocks.get(ij, {}): 

recursive_merge(wells[ij], block) 

 

recursive_merge(wells[ij], rows.get(i, {})) 

recursive_merge(wells[ij], cols.get(j, {})) 

recursive_merge(wells[ij], irows.get(ii, {})) 

recursive_merge(wells[ij], icols.get(jj, {})) 

recursive_merge(wells[ij], config.user) 

 

return wells 

 

def table_from_wells(wells, index): 

table = [] 

user_cols = [] 

max_j = max([12] + [j for i,j in wells]) 

digits = len(str(max_j + 1)) 

 

for i, j in wells: 

row, col = row_col_from_ij(i, j) 

well = well_from_ij(i, j) 

well0 = well0_from_well(well, digits=digits) 

user_cols += [x for x in wells[i,j] if x not in user_cols] 

 

table += [{ 

**wells[i,j], 

**index, 

'well': well, 

'well0': well0, 

'row': row, 'col': col, 

'row_i': i, 'col_j': j, 

}] 

 

# Make an effort to put the columns in a reasonable order: 

columns = ['well', 'well0', 'row', 'col', 'row_i', 'col_j'] 

columns += list(index) + user_cols 

 

return pd.DataFrame(table, columns=columns) 

 

 

def recursive_merge(config, defaults, overwrite=False): 

for key, default in defaults.items(): 

if isinstance(default, dict): 

if isinstance(config.get(key, {}), dict): 

config.setdefault(key, {}) 

recursive_merge(config[key], default, overwrite) 

elif overwrite: 

config[key] = deepcopy(default) 

else: 

if overwrite or key not in config: 

config[key] = default 

 

# Modified in-place, but also returned for convenience. 

return config 

 

def resolve_path(parent_path, child_path): 

parent_dir = Path(parent_path).parent 

child_path = Path(child_path) 

 

if child_path.is_absolute(): 

return child_path 

else: 

return parent_dir / child_path 

 

class PathManager: 

 

def __init__(self, path, paths, toml_path, path_guess=None): 

self.path = path 

self.paths = paths 

self.toml_path = Path(toml_path) 

self.path_guess = path_guess 

self.missing_path_error = None 

 

def __str__(self): 

return str({ 

'path': self.path, 

'paths': self.paths, 

'toml_path': self.toml_path, 

'path_guess': self.path_guess, 

}) 

 

def check_overspecified(self): 

if self.path and self.paths: 

raise ConfigError(f"Both `meta.path` and `meta.paths` specified; ambiguous.") 

 

def check_named_plates(self, names): 

self.check_overspecified() 

 

if self.path is not None: 

raise ConfigError(f"`meta.path` specified with one or more [plate] blocks ({', '.join(names)}). Did you mean to use `meta.paths`?") 

 

if isinstance(self.paths, dict): 

if set(names) != set(self.paths): 

raise ConfigError(f"The keys in `meta.paths` ({', '.join(sorted(self.paths))}) don't match the [plate] blocks ({', '.join(sorted(names))})") 

 

def get_index_for_only_plate(self): 

# If there is only one plate: 

# - Have `paths`: Ambiguous, complain. 

# - Have `path`: Use it, complain if non-existent 

# - Have extension: Guess path from stem, complain if non-existent. 

# - Don't have anything: Don't put path in the index 

 

def make_index(path): 

path = resolve_path(self.toml_path, path) 

if not path.exists(): 

raise ConfigError(f"'{path}' does not exist") 

return {'path': path} 

 

self.check_overspecified() 

 

if self.paths is not None: 

raise ConfigError(f"`meta.paths` ({self.paths if isinstance(self.paths, str) else ', '.join(self.paths)}) specified without any [plate] blocks. Did you mean to use `meta.path`?") 

 

if self.path is not None: 

return make_index(self.path) 

 

if self.path_guess: 

return make_index(self.path_guess.format(self.toml_path)) 

 

self.missing_path_error = ConfigError(f"Analysis requires a data file, but none was specified and none could be inferred. Did you mean to set `meta.path`?") 

return {} 

 

def get_index_for_named_plate(self, name): 

# If there are multiple plates: 

# - Have `path`: Ambiguous, complain. 

# - `paths` is string: Format with name, complain if non-existent or  

# if formatting didn't change the path. 

# - `paths` is dict: Make sure the keys match the plates in the config.  

# Look up the path, complain if non-existent. 

# - Don't have `paths`: Put the name in the index without a path. 

 

def make_index(name, path): 

path = resolve_path(self.toml_path, path) 

if not path.exists(): 

raise ConfigError(f"'{path}' for plate '{name}' does not exist") 

return {'plate': name, 'path': path} 

 

if self.paths is None: 

self.missing_path_error = ConfigError(f"Analysis requires a data file for each plate, but none were specified. Did you mean to set `meta.paths`?") 

return {'plate': name} 

 

if isinstance(self.paths, str): 

return make_index(name, self.paths.format(name)) 

 

if isinstance(self.paths, dict): 

if name not in self.paths: 

raise ConfigError(f"No data file path specified for plate '{name}'") 

return make_index(name, self.paths[name]) 

 

raise ConfigError(f"Expected `meta.paths` to be dict or str, got {type(self.paths)}: {self.paths}") 

 

class configdict(dict): 

special = { 

'meta': 'meta', 

'plates': 'plate', 

'rows': 'row', 

'irows': 'irow', 

'cols': 'col', 

'icols': 'icol', 

'blocks': 'block', 

'wells': 'well', 

} 

 

def __init__(self, config): 

self.update(config) 

 

def __getattr__(self, key): 

if key in self.special: 

return self.setdefault(self.special[key], {}) 

 

def __setattr__(self, key, value): 

if key in self.special: 

self[self.special[key]] = value 

 

@property 

def user(self): 

return {k: v 

for k, v in self.items() 

if k not in self.special.values() 

}