Source code for gcmpy.joint_degree

# construct joint degree distributions for gcmpy
#
# Copyright (C) 2021 Peter Mann
#
# This file is part of gcmpy, generalised configuration model networks in Python.
#
# gcmpy is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 2 of the License, or
# (at your option) any later version.
#
# gcmpy is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with gcmpy. If not, see <http://www.gnu.org/licenses/gpl.html>.

import ast
import random
from itertools import product
from collections import Counter

import numpy as np

from typing import Tuple, Callable, List
from .types import _JDD, _JDS, _JOINT_DEGREE, _COVER

[docs]class JDD_Interface(object): '''Joint degree distribution interface. Subclasses will define how self._jdd is created, and all implementations should do so upon construction. :param modulo: list of integers for number of nodes in each motif''' def __init__( self, modulo ): self._jdd : _JDD = {} self._modulo : List[int] = modulo def _sample_JDS( self, N : int )->_JDS: '''Samples N joint motif sequence from self._jdd object. Intended to be a private method, only to be called by sample_JDS. :param N: number of samples :return jds: joint degree sequence''' keys = np.array(list(self._jdd.keys())) values = np.array(list(self._jdd.values())) choice_list_indices = np.random.choice(len(keys), N, replace=True, p=values) return keys[choice_list_indices]
[docs] def sample_JDS( self, N : int )->_JDS: '''Method to ensure the sum of the motifs in each dimension mod modulo[i] (which is the number of vertices in each motif) is zero by adding motifs to the motif list. This ensures the JDS is graphic. :param N: number of samples :returns jds: joint degree sequnce''' # raw joint degree sequence jds = list(map(ast.literal_eval,self._sample_JDS(N))) # ensure that the sum of the numbers is divisible by the motif size ntops = list(map(sum, zip(*jds))) for i, ntop in enumerate(ntops): if ntop % self._modulo[i] != 0: # if not, round up to add one more motif to the network for j in range(self._modulo[i] - ntop % self._modulo[i]): j = random.randrange(0,len(jds)) t = list(jds[j]) t[i] += 1 jds[j]=t return jds
[docs] def normalise_jdd(self): '''Normalises self._jdd probability distribution.''' summation = sum(self._jdd.values()) for key in self._jdd: self._jdd[key] /= summation
[docs] def convert_jds_to_jdd(self, jds : _JDS): '''Convert a joint degree sequence to self._jdd. :param jds: joint degree sequence''' n_samples = len(jds) self._jdd = dict((k,v/n_samples) for k, v in Counter(str(tuple(e)) for e in list(jds)).items())
[docs]class JDD_manual(JDD_Interface): '''Specify self._jdd by hand. :param jdd: joint degree distribution''' def __init__(self, jdd : _JDD, modulo : List[int])->None: super().__init__(modulo) self._jdd = jdd
[docs]class JDD_empirical_data(JDD_Interface): '''An empirical joint degree sequence is used to create self._jdd. :param jds: joint degree sequence''' def __init__(self, jds : _JDS, modulo : List[int])->None: super().__init__(modulo) self.convert_jds_to_jdd(jds)
[docs]class JDD_joint_function(JDD_Interface): '''Multivariate function to evaluate the probability of given joint degree from an analytical source. Note, callable self._fp must accept a joint degree tuple and return a float. :param fp: callback :param modulo: list of ints for number of vertices in each motif :param hi_lo_degree_bounds: list of tuples (int,int) for kmin,kmax per topology :param use_sampling: bool to use sampling or direct approach :param n_samples: number of samples if not direct''' def __init__(self, fp : Callable, modulo : List[int], hi_lo_degree_bounds : Tuple[int,int], use_sampling : bool = False, n_samples : int = 1e5 ): super().__init__(modulo) self._fp = fp self._hi_lo_degree_bounds = hi_lo_degree_bounds if not use_sampling: self._create_jdd_directly() else: self._n_samples = n_samples self._create_jdd_by_sampling() def _create_jdd_by_sampling(self)->None: '''Sampling algorithm for joint degree distribution function not defined. Should be overriden for each implementation.''' raise(NotImplementedError) def _create_jdd_directly(self)->None: '''Evaluates probability directly by generating all possible joint degrees.''' # build list of lists of possible degrees in each dimension ks = [list(range(kmin,kmax+1)) for kmin,kmax in self._hi_lo_degree_bounds] # iterate all joint degrees and evaluate the joint degree for jd in list(product(*ks)): self._jdd[str(jd)] = self._fp(jd)
[docs]class JDD_marginals(JDD_Interface): '''Merge uncorrelated marginals in each topology from analytical data together to create self._jdd. If using a direct method, all possible joint degree tuples are evaluated; however, for large varience in the allowed degrees this method is slow. Instead, we can choose to sample the analytical functions by setting use_sampling which draws n_samples weighted samples from each marginal function. :param arr_fp: array of callbacks :param modulo: list of ints for number of vertices in each motif :param hi_lo_degree_bounds: list of tuples (int,int) for kmin,kmax per topology :param use_sampling: bool to use sampling or direct approach :param n_samples: number of samples if not direct''' def __init__(self, arr_fp : List[Callable], modulo : List[int], hi_lo_degree_bounds : List[Tuple[int,int]], use_sampling : bool = False, n_samples : int = 1e5 ): super().__init__(modulo) self._arr_fp = arr_fp self._hi_lo_degree_bounds = hi_lo_degree_bounds if not use_sampling: self._create_jdd_directly() else: self._n_samples = n_samples self._create_jdd_by_sampling() def _generate_all_joint_degrees(self)->List[_JOINT_DEGREE] : '''Generate all possible joint degrees from a range of min/max degree of each motif. :return jd: list of joint degrees''' ks = [] for kmin, kmax in self._hi_lo_degree_bounds: ks.append([k for k in range(kmin,kmax)]) return list(product(*ks)) def _evaluate_prob_of_joint_degree(self, joint_degree : List[_JOINT_DEGREE])->float: '''Evaluate the joint probability of a joint degree using function callbacks. :param joint_degree: list of joint degreees :returns prod: probability of joint degree''' prod : float = 1.0 for i,deg in enumerate(joint_degree): prod *= self._arr_fp[i](deg) return prod def _create_jdd_directly(self)->None: '''create self._jdd by enumerating probability of all possible joint degrees from analytical functions.''' self._jdd = dict((str(key), 0.0) for key in self._generate_all_joint_degrees()) for key in self._jdd: self._jdd[key] = self._evaluate_prob_of_joint_degree(ast.literal_eval(key)) self.normalise_jdd() def _draw_from_analytical_joint(self)->np.ndarray: '''Draw `self._n_samples' from a marginal distribution `self._arr_fp' along each dimension. :returns jds: samples from marginal distributions''' ret = [] for i in range(len(self._hi_lo_degree_bounds)): kmin,kmax = self._hi_lo_degree_bounds[i] ks = [k for k in range(kmin, kmax+1)] # possible degrees pks = [self._arr_fp[i](k) for k in ks] # degree weights ret.append(random.choices(ks,pks,k=self._n_samples)) # sample this dimension return np.column_stack(ret) # return sampled degrees def _create_jdd_by_sampling(self)->None: '''Draws jds samples from analytical marginal functions and converts to a joint degree distribution. ''' self.convert_jds_to_jdd(self._draw_from_analytical_joint())
[docs]class JDD_split_K_model(JDD_Interface): '''An overall degree distribution is split with probabilities of creating each motif.''' def __init__(self, fp : Callable, modulo : List[int], probs : List[float], kmin : int, kmax : int)->None: super().__init__(modulo) self._fp = fp self._probs = probs self._kmin = kmin self._kmax = kmax self.create_jdd()
[docs] def create_jdd(self)->None: '''Creates self._jdd using the split degree model''' for k in range(self._kmin,self._kmax): self.resolve_degree(k, self._fp(k)) self.normalise_jdd()
[docs] def get_valid_joint_degrees(self, remaining_degree : int, topology : int)->List[_JOINT_DEGREE]: '''Returns a list of tuples by recursion. Only an ordered list of cliques are currently supported. :param remaining_degree: current free edges that can be partitioned :param topology: column index of joint degree tuple :returns list of joint degrees''' if topology == 1: yield [remaining_degree] else: for i in range( 0, remaining_degree//topology+1 ): for row in self.get_valid_joint_degrees(remaining_degree-i*topology, topology-1): yield row+[i]
[docs] def calc_prob_of_joint_degree(self, jd : _JOINT_DEGREE)->float: '''calculates the probability of a joint degree from the input params. :param jd: joint degree :returns probability: float value''' prod : float = 1.0 for i, degree in enumerate(jd): prod *= pow(self._probs[i], (i+1)*degree) return prod
[docs] def resolve_degree(self, k : int, prob_overall_k : float)->None: '''creates all valid joint degrees for overall k and their probability and updates self._jdd. :param k: overall degree :param prob_overall_k: float value''' # get a list of valid joint degrees valid_tuples : List[_JOINT_DEGREE] = list(self.get_valid_joint_degrees(k,len(self._probs))) # calculate the probability of each joint degree tuple probabilities = [] for jd in valid_tuples: probabilities.append(self.calc_prob_of_joint_degree(jd)) # normalise the probabilities to unity total = sum(probabilities) for i in range(len(probabilities)): probabilities[i] /= total # add each tuple to self._jdd weighted by probability of overall degree k for i, jd in enumerate(valid_tuples): self._jdd[str(jd)] = prob_overall_k * probabilities[i]
[docs]class JDD_delta_model(JDD_split_K_model): '''A distribution of single edges apart from a specified target degree. For instance a distribution of degrees in a single dimension (2-cliques) with zeros for all other motif type counts, apart from when k=target. ''' def __init__(self, fp : Callable, modulo : List[int], probs : List[float], target_k : int, kmin : int, kmax : int)->None: self._target_k = target_k super().__init__(fp, modulo, probs, kmin, kmax)
[docs] def create_jdd(self)->None: '''Creates self._jdd using the degree delta model. ''' for k in range(self._kmin,self._kmax): zeros = [0]*len(self._modulo) if k != self._target_k: zeros[0] = k self._jdd[str(tuple(zeros))] = self._fp(k) else: self.resolve_degree(k, self._fp(k)) self.normalise_jdd()
[docs]class JDD_clique_cover(JDD_Interface): '''Creates self._jdd from a list of cliques in the network. The sizes of the cliques can be obtained from the self._modulo member.''' def __init__(self, C : _COVER): ''':param C: clique cover''' self._cover = C super().__init__(sorted(list(set([len(c) for c in C])))) self.create_jdd()
[docs] def create_jdd(self)->None: node_ids = list(set([node for clique in self._cover for node in clique])) zero_index = 0 if min(node_ids) != zero_index: zero_index = 1 largest_clique = len(max(self._cover, key = len)) jds = [] for _ in range(len(node_ids)): jd = [0] * largest_clique jds.append(jd) for c in self._cover: clique_size = len(c) for node in c: jds[node - zero_index][clique_size-1] += 1 # iterate each column of the jds and record the index if all zeros indxs = [i for i, top in enumerate(zip(*jds)) if not any(top)] # use the indexes of the zero columns to remove for i in indxs: for jd in jds: del jd[i] # convert jds to jdd self.convert_jds_to_jdd(jds)