Cope 2.5.0
My personal "standard library" of all the generally useful code I've written for various projects over the years
Loading...
Searching...
No Matches
Namespaces | Functions | Variables
data.py File Reference

Namespaces

namespace  Cope
 
namespace  Cope.experimental
 
namespace  Cope.experimental.data
 

Functions

def Cope.experimental.data._cast2dataframe (func)
 
def Cope.experimental.data.installLibs (libs=['pandas', 'numpy', 'imblearn', 'ipywidgets', 'seaborn', 'scipy', 'matplotlib'])
 
def Cope.experimental.data.addVerbose (func)
 
def Cope.experimental.data._cleaning_func (**decorator_kwargs)
 
def Cope.experimental.data.insertSample (df, sample, index=-1)
 
def Cope.experimental.data.ensureIterable (obj, useList=False)
 
def Cope.experimental.data.ensureNotIterable (obj, emptyBecomes=None)
 
def Cope.experimental.data.getOutliers (data, zscore=None)
 
def Cope.experimental.data.normalizePercentage (p, error='Percentage is of the wrong type(int or float expected)')
 
def Cope.experimental.data.isiterable (obj, includeStr=False)
 
def Cope.experimental.data.sort_dict_by_value_length (d)
 
pd.DataFrame Cope.experimental.data.timeFeatures (df)
 
pd.DataFrame Cope.experimental.data.catagorical (df, time=False)
 
pd.DataFrame Cope.experimental.data.quantitative (df, time=True)
 
def Cope.experimental.data.isTimeFeature (pd.Series s)
 
def Cope.experimental.data.isCatagorical (pd.Series s, time=False)
 
def Cope.experimental.data.isQuantatative (pd.Series s, time=True)
 
def Cope.experimental.data.missingSummary (df, thresh=.6)
 
def Cope.experimental.data.significantCorrelations (df, thresh=.5)
 
def Cope.experimental.data.getNiceTypesTable (df, types=None)
 
def Cope.experimental.data.percentCountPlot (data, feature, target=None, ax=None, title='Percentage of values used in {}')
 
def Cope.experimental.data.column_entropy (pd.Series column, base=e)
 
def Cope.experimental.data.pretty_2_column_array (a, limit=30, paren=None)
 
def Cope.experimental.data.pretty_counts (pd.Series s, paren=False)
 
def Cope.experimental.data.meanConfInterval (data, confidence=0.95, mean=False)
 
def Cope.experimental.data.showOutliers (data, column, zscore, **snsArgs)
 
def Cope.experimental.data.interactWithOutliers (df, feature=None, step=.2)
 
def Cope.experimental.data.handle_outliers (col, Union['remove', 'constrain'] method='remove', zscore=3, log=...)
 
def Cope.experimental.data.handle_missing (col, Union[pd.Series, 'remove', 'mean', 'median', 'mode', 'random', 'balanced_random', Any] method, missing_value=np.nan, log=...)
 
def Cope.experimental.data.query (pd.DataFrame df, str column, str query, Union[pd.Series, 'remove', 'new', 'mean', 'median', 'mode', 'random', 'balanced_random', Any] method, true=1, false=0, verbose=False)
 
def Cope.experimental.data.remove (col, val, log=...)
 
def Cope.experimental.data.bin (col, Union['frequency', 'width', Tuple, List] method, amt=5, log=...)
 
def Cope.experimental.data.rescale (df, return_scaler=False, log=...)
 
def Cope.experimental.data.convert_time (df_or_col, str col=None, Union['timestamp'] method='timestamp', verbose=False)
 
def Cope.experimental.data.convert_numeric (df, str col=None, Union['assign', 'one_hot_encode'] method='one_hot_encode', returnAssignments=False, skip=[], verbose=False)
 
def Cope.experimental.data.split (*data, amt=.2, Union['random', 'chunk', 'head', 'tail'] method='random', target=[], splitTargets=False, seed=42)
 
def Cope.experimental.data.explore (data, target=None, stats=None, additionalStats=[], missing=True, corr=.55, entropy=None, start='Description', startFeature=None, startx=None, starty=None, startHue=None, alpha=None)
 
def Cope.experimental.data.suggestedCleaning (df, target)
 
def Cope.experimental.data._cleanColumn (df, args, column, verbose, ignoreWarnings=False)
 
pd.DataFrame Cope.experimental.data.clean (pd.DataFrame df, Dict[str, Dict[str, Any]] config, bool verbose=False, str split=None)
 
def Cope.experimental.data.resample (X, y, Union['oversample', 'undersample', 'mixed'] method='oversample', seed=None)
 
def Cope.experimental.data.evaluateQuantitative (test, testPredictions, train=None, trainPredictions=None, accuracy=3, explain=False, compact=False, line=False, log=...)
 
def Cope.experimental.data.evaluateCatagorical (test, testPredictions, train=None, trainPredictions=None, accuracy=3, curve=False, confusion=False, explain=False, compact=False)
 
def Cope.experimental.data.evaluate (catagorical, test, testPredictions, train=None, trainPredictions=None, accuracy=3, curve=False, confusion=False, explain=False, compact=False, line=False)
 
def Cope.experimental.data.importances (tree, names=None, rtn=False, graph=True, best=.01)
 
def Cope.experimental.data.saveStats (file, name, model, testY, predY, trainY=None, trainPredY=None, notes='', new=False, show=True, save=True)
 
def Cope.experimental.data.plot_history (history)
 

Variables

 Cope.experimental.data.try :
 
pass Cope.experimental.data.except
 
 Cope.experimental.data.else :
 
str Cope.experimental.data.MODE_SELECTION = 'random'
 
int Cope.experimental.data.SMALL_DATASET = 1000
 
int Cope.experimental.data.HIGH_CARDINALITY = 50
 
int Cope.experimental.data.ALERT_MISSING = .55
 
int Cope.experimental.data.OUTLIER_THRESHOLD = .5
 
bool Cope.experimental.data.CONTINUOUS_UPDATE_SLIDER = False
 
list Cope.experimental.data._catagoricalTypes = ['bool', 'bool_', 'object', 'object_', 'Interval', 'bool8', 'category']
 
list Cope.experimental.data._quantitativeTypes = ['number']
 
list Cope.experimental.data._timeTypes = ['datetimetz', 'timedelta', 'datetime']
 
print Cope.experimental.data.todo = lambda *a('TODO: ', *a)
 
 Cope.experimental.data.transform
 
def Cope.experimental.data.quickSummary = explore
 
def Cope.experimental.data.evaluateQ = evaluateQuantitative
 
def Cope.experimental.data.evaluateC = evaluateCatagorical