Source code for graphid.util.util_misc

import ubelt as ub  # NOQA
import itertools as it
import numpy as np



[docs]
def randn(mean=0, std=1, shape=[], a_max=None, a_min=None, rng=None):
    a = (rng.randn(*shape) * std) + mean
    if a_max is not None or a_min is not None:
        a = np.clip(a, a_min, a_max)
    return a




[docs]
def aslist(sequence):
    """
    Ensures that the sequence object is a Python list.
    Handles, numpy arrays, and python sequences (e.g. tuples, and iterables).

    Args:
        sequence (sequence): a list-like object

    Returns:
        list: list_ - `sequence` as a Python list

    Example:
        >>> s1 = [1, 2, 3]
        >>> s2 = (1, 2, 3)
        >>> assert aslist(s1) is s1
        >>> assert aslist(s2) is not s2
        >>> aslist(np.array([[1, 2], [3, 4], [5, 6]]))
        [[1, 2], [3, 4], [5, 6]]
        >>> aslist(range(3))
        [0, 1, 2]
    """
    if isinstance(sequence, list):
        return sequence
    elif isinstance(sequence, np.ndarray):
        list_ = sequence.tolist()
    else:
        list_ = list(sequence)
    return list_




[docs]
class classproperty(property):
    """
    Decorates a method turning it into a classattribute

    References:
        https://stackoverflow.com/questions/1697501/python-staticmethod-with-property
    """
    def __get__(self, cls, owner):
        return classmethod(self.fget).__get__(None, owner)()




[docs]
def estarmap(func, iter_, **kwargs):
    """
    Eager version of it.starmap from itertools

    Note this is inefficient and should only be used when prototyping and
    debugging.
    """
    return [func(*arg, **kwargs) for arg in iter_]




[docs]
def delete_dict_keys(dict_, key_list):
    r"""
    Removes items from a dictionary inplace. Keys that do not exist are
    ignored.

    Args:
        dict_ (dict): dict like object with a __del__ attribute
        key_list (list): list of keys that specify the items to remove

    Example:
        >>> dict_ = {'bread': 1, 'churches': 1, 'cider': 2, 'very small rocks': 2}
        >>> key_list = ['duck', 'bread', 'cider']
        >>> delete_dict_keys(dict_, key_list)
        >>> result = ub.urepr(dict_, nl=False)
        >>> print(result)
        {'churches': 1, 'very small rocks': 2}

    """
    invalid_keys = set(key_list) - set(dict_.keys())
    valid_keys = set(key_list) - invalid_keys
    for key in valid_keys:
        del dict_[key]
    return dict_




[docs]
def flag_None_items(list_):
    return [item is None for item in list_]




[docs]
def where(flag_list):
    """ takes flags returns indexes of True values """
    return [index for index, flag in enumerate(flag_list) if flag]




[docs]
def delete_items_by_index(list_, index_list, copy=False):
    """
    Remove items from ``list_`` at positions specified in ``index_list``
    The original ``list_`` is preserved if ``copy`` is True

    Args:
        list_ (list):
        index_list (list):
        copy (bool): preserves original list if True

    Example:
        >>> list_ = [8, 1, 8, 1, 6, 6, 3, 4, 4, 5, 6]
        >>> index_list = [2, -1]
        >>> result = delete_items_by_index(list_, index_list)
        >>> print(result)
        [8, 1, 1, 6, 6, 3, 4, 4, 5]
    """
    if copy:
        list_ = list_[:]
    # Rectify negative indicies
    index_list_ = [(len(list_) + x if x < 0 else x) for x in index_list]
    # Remove largest indicies first
    index_list_ = sorted(index_list_, reverse=True)
    for index in index_list_:
        del list_[index]
    return list_




[docs]
def make_index_lookup(list_, dict_factory=dict):
    r"""
    Args:
        list_ (list): assumed to have unique items

    Returns:
        dict: mapping from item to index

    Example:
        >>> list_ = [5, 3, 8, 2]
        >>> idx2_item = make_index_lookup(list_)
        >>> result = ub.urepr(idx2_item, nl=False, sort=1)
        >>> assert list(ub.take(idx2_item, list_)) == list(range(len(list_)))
        >>> print(result)
        {2: 3, 3: 1, 5: 0, 8: 2}
    """
    return dict_factory(zip(list_, range(len(list_))))




[docs]
def cprint(text, color=None):
    """ provides some color to terminal output

    Args:
        text (str):
        color (str):

    Ignore:
        assert color in ['', 'yellow', 'blink', 'lightgray', 'underline',
        'darkyellow', 'blue', 'darkblue', 'faint', 'fuchsia', 'black', 'white',
        'red', 'brown', 'turquoise', 'bold', 'darkred', 'darkgreen', 'reset',
        'standout', 'darkteal', 'darkgray', 'overline', 'purple', 'green', 'teal',
        'fuscia']

    Example0:
        >>> import pygments.console
        >>> msg_list = list(pygments.console.codes.keys())
        >>> color_list = list(pygments.console.codes.keys())
        >>> [cprint(text, color) for text, color in zip(msg_list, color_list)]

    Example1:
        >>> import pygments.console
        >>> print('line1')
        >>> cprint('line2', 'red')
        >>> cprint('line3', 'blue')
        >>> cprint('line4', 'magenta')
        >>> cprint('line5', 'reset')
        >>> cprint('line5', 'magenta')
        >>> print('line6')
    """
    if False and ub.WIN32:
        # Ignore colors on windows. Seems to cause a recursion error
        print(text)
    else:
        try:
            if color is None:
                print(ub.color_text(text, 'blue'))
            else:
                print(ub.color_text(text, color))
        except RecursionError:
            print(text)




[docs]
def ensure_iterable(obj):
    """
    Args:
        obj (scalar or iterable):

    Returns:
        it3erable: obj if it was iterable otherwise [obj]

    Timeit:
        %timeit util.ensure_iterable([1])
        %timeit util.ensure_iterable(1)
        %timeit util.ensure_iterable(np.array(1))
        %timeit util.ensure_iterable([1])
        %timeit [1]


    Example:
        >>> obj_list = [3, [3], '3', (3,), [3,4,5]]
        >>> result = [ensure_iterable(obj) for obj in obj_list]
        >>> result = str(result)
        >>> print(result)
        [[3], [3], ['3'], (3,), [3, 4, 5]]
    """
    if ub.iterable(obj):
        return obj
    else:
        return [obj]




[docs]
def highlight_regex(str_, pat, reflags=0, color='red'):
    """
    FIXME Use pygments instead
    """
    import re
    matches = list(re.finditer(pat, str_, flags=reflags))
    colored = str_
    for match in reversed(matches):
        start = match.start()
        end = match.end()
        colored_part = ub.color_text(colored[start:end], color)
        colored = colored[:start] + colored_part + colored[end:]
    return colored




[docs]
def regex_word(w):
    return r'\b%s\b' % (w,)




[docs]
def setdiff(list1, list2):
    """
    returns list1 elements that are not in list2. preserves order of list1

    Args:
        list1 (list):
        list2 (list):

    Returns:
        list: new_list

    Example:
        >>> list1 = ['featweight_rowid', 'feature_rowid', 'config_rowid', 'featweight_forground_weight']
        >>> list2 = [u'featweight_rowid']
        >>> new_list = setdiff(list1, list2)
        >>> result = ub.urepr(new_list, nl=False)
        >>> print(result)
        ['feature_rowid', 'config_rowid', 'featweight_forground_weight']
    """
    set2 = set(list2)
    return [item for item in list1 if item not in set2]




[docs]
def all_dict_combinations(varied_dict):
    """
    all_dict_combinations

    Args:
        varied_dict (dict):  a dict with lists of possible parameter settings

    Returns:
        list: dict_list a list of dicts correpsonding to all combinations of params settings

    Example:
        >>> varied_dict = {'logdist_weight': [0.0, 1.0], 'pipeline_root': ['vsmany'], 'sv_on': [True, False, None]}
        >>> dict_list = all_dict_combinations(varied_dict)
        >>> result = str(ub.urepr(dict_list))
        >>> print(result)
        [
            {'logdist_weight': 0.0, 'pipeline_root': 'vsmany', 'sv_on': True},
            {'logdist_weight': 0.0, 'pipeline_root': 'vsmany', 'sv_on': False},
            {'logdist_weight': 0.0, 'pipeline_root': 'vsmany', 'sv_on': None},
            {'logdist_weight': 1.0, 'pipeline_root': 'vsmany', 'sv_on': True},
            {'logdist_weight': 1.0, 'pipeline_root': 'vsmany', 'sv_on': False},
            {'logdist_weight': 1.0, 'pipeline_root': 'vsmany', 'sv_on': None},
        ]
    """
    #tups_list = [[(key, val) for val in val_list]
    #             if isinstance(val_list, (list, tuple))
    #             else [(key, val_list)]
    #             for (key, val_list) in six.iteritems(varied_dict)]
    tups_list = [[(key, val) for val in val_list]
                 if isinstance(val_list, (list))
                 #if isinstance(val_list, (list, tuple))
                 else [(key, val_list)]
                 for (key, val_list) in iteritems_sorted(varied_dict)]
    dict_list = [dict(tups) for tups in it.product(*tups_list)]
    #dict_list = [{key: val for (key, val) in tups} for tups in it.product(*tups_list)]
    #from collections import OrderedDict
    #dict_list = [OrderedDict([(key, val) for (key, val) in tups]) for tups in it.product(*tups_list)]
    return dict_list




[docs]
def iteritems_sorted(dict_):
    """ change to iteritems ordered """
    if isinstance(dict_, ub.odict):
        return dict_.items()
    else:
        return iter(sorted(dict_.items()))




[docs]
def partial_order(list_, part):
    list_items = set(list_)
    part_items = set(part)
    begin = [p for p in part if p in list_items]
    end = [item for item in list_ if item not in part_items]
    return begin + end




[docs]
def replace_nones(list_, repl=-1):
    r"""
    Recursively removes Nones in all lists and sublists and replaces them with
    the repl variable

    Args:
        list_ (list):
        repl (obj): replacement value

    Returns:
        list

    Example:
        >>> list_ = [None, 0, 1, 2]
        >>> repl = -1
        >>> repl_list = replace_nones(list_, repl)
        >>> result = str(repl_list)
        >>> print(result)
        [-1, 0, 1, 2]

    """
    repl_list = [
        repl if item is None else (
            replace_nones(item, repl) if isinstance(item, list) else item
        )
        for item in list_
    ]
    return repl_list




[docs]
def take_percentile_parts(arr, front=None, mid=None, back=None):
    """
    Take parts from front, back, or middle of a list

    Example:
        >>> arr = list(range(20))
        >>> front = 3
        >>> mid = 3
        >>> back = 3
        >>> result = take_percentile_parts(arr, front, mid, back)
        >>> print(result)
        [0, 1, 2, 9, 10, 11, 17, 18, 19]
    """
    slices = []
    if front:
        slices += [snapped_slice(len(arr), 0.0, front)]
    if mid:
        slices += [snapped_slice(len(arr), 0.5, mid)]
    if back:
        slices += [snapped_slice(len(arr), 1.0, back)]
    parts = list(ub.flatten([arr[sl] for sl in slices]))
    return parts




[docs]
def snapped_slice(size, frac, n):
    r"""
    Creates a slice spanning `n` items in a list of length `size` at position
    `frac`.

    Args:
        size (int): length of the list
        frac (float): position in the range [0, 1]
        n (int): number of items in the slice

    Returns:
        slice: slice object that best fits the criteria

    SeeAlso:
        take_percentile_parts

    Example:

    Example:
        >>> # DISABLE_DOCTEST
        >>> print(snapped_slice(0, 0, 10))
        >>> print(snapped_slice(1, 0, 10))
        >>> print(snapped_slice(100, 0, 10))
        >>> print(snapped_slice(9, 0, 10))
        >>> print(snapped_slice(100, 1, 10))
        pass
    """
    from math import floor, ceil
    if size < n:
        n = size
    start = int(size * frac - ceil(n / 2)) + 1
    stop  = int(size * frac + floor(n / 2)) + 1
    # slide to the front or the back
    buf = 0
    if stop >= size:
        buf = (size - stop)
    elif start < 0:
        buf = 0 - start
    stop += buf
    start += buf
    assert stop <= size, 'out of bounds [%r, %r]' % (stop, start)
    sl = slice(start, stop)
    return sl




[docs]
def get_timestamp(format_='iso', use_second=False, delta_seconds=None,
                  isutc=False, timezone=False):
    """
    get_timestamp

    Args:
        format_ (str): (tag, printable, filename, other)
        use_second (bool):
        delta_seconds (None):

    Returns:
        str: stamp

    Example:
        >>> format_ = 'printable'
        >>> use_second = False
        >>> delta_seconds = None
        >>> stamp = get_timestamp(format_, use_second, delta_seconds)
        >>> print(stamp)
        >>> assert len(stamp) == len('15:43:04 2015/02/24')
    """
    # TODO: time.timezone
    import time
    import datetime
    if format_ == 'int':
        if isutc:
            stamp = int(time.mktime(time.gmtime()))
        else:
            stamp = int(time.mktime(time.localtime()))
        return stamp
    if isutc:
        now = datetime.datetime.utcnow()
    else:
        now = datetime.datetime.now()
    if delta_seconds is not None:
        now += datetime.timedelta(seconds=delta_seconds)
    if format_ == 'iso':
        # ISO 8601
        #utcnow = datetime.datetime.utcnow()
        #utcnow.isoformat()
        localOffsetHour = time.timezone // 3600
        utc_offset = '-' + str(localOffsetHour) if localOffsetHour < 0 else '+' + str(localOffsetHour)
        stamp = time.strftime('%Y-%m-%dT%H%M%S') + utc_offset
        return stamp
    if format_ == 'tag':
        time_tup = (now.year - 2000, now.month, now.day)
        stamp = '%02d%02d%02d' % time_tup
    elif format_ == 'printable':
        time_tup = (now.hour, now.minute, now.second, now.year, now.month, now.day)
        time_format = '%02d:%02d:%02d %02d/%02d/%02d'
        stamp = time_format % time_tup
    else:
        if use_second:
            time_tup = (now.year, now.month, now.day, now.hour, now.minute, now.second)
            time_formats = {
                'filename': 'ymd_hms-%04d-%02d-%02d_%02d-%02d-%02d',
                'comment': '# (yyyy-mm-dd hh:mm:ss) %04d-%02d-%02d %02d:%02d:%02d'}
        else:
            time_tup = (now.year, now.month, now.day, now.hour, now.minute)
            time_formats = {
                'filename': 'ymd_hm-%04d-%02d-%02d_%02d-%02d',
                'comment': '# (yyyy-mm-dd hh:mm) %04d-%02d-%02d %02d:%02d'}
        stamp = time_formats[format_] % time_tup
    if timezone:
        if isutc:
            stamp += '_UTC'
        else:
            from pytz import reference
            localtime = reference.LocalTimezone()
            tzname = localtime.tzname(now)
            stamp += '_' + tzname
    return stamp




[docs]
def isect(list1, list2):
    """
    returns list1 elements that are also in list2. preserves order of list1

    intersect_ordered

    Args:
        list1 (list):
        list2 (list):

    Returns:
        list: new_list

    Example:
        >>> list1 = ['featweight_rowid', 'feature_rowid', 'config_rowid', 'featweight_forground_weight']
        >>> list2 = [u'featweight_rowid']
        >>> result = isect(list1, list2)
        >>> print(result)
        ['featweight_rowid']
    """
    set2 = set(list2)
    return [item for item in list1 if item in set2]




[docs]
def safe_extreme(arr, op, fill=np.nan, finite=False, nans=True):
    """
    Applies an exterme operation to an 1d array (typically max/min) but ensures
    a value is always returned even in operations without identities. The
    default identity must be specified using the `fill` argument.

    Args:
        arr (ndarray): 1d array to take extreme of
        op (func): vectorized operation like np.max to apply to array
        fill (float): return type if arr has no elements (default = nan)
        finite (bool): if True ignores non-finite values (default = False)
        nans (bool): if False ignores nans (default = True)
    """
    if arr is None:
        extreme = fill
    else:
        arr = np.asarray(arr)
        if finite:
            arr = arr.compress(np.isfinite(arr))
        if not nans:
            arr = arr.compress(np.logical_not(np.isnan(arr)))
        if len(arr) == 0:
            extreme =  fill
        else:
            extreme = op(arr)
    return extreme




[docs]
def safe_argmax(arr, fill=np.nan, finite=False, nans=True):
    """
    Doctest:
        >>> assert safe_argmax([np.nan, np.nan], nans=False) == 0
        >>> assert safe_argmax([-100, np.nan], nans=False) == 0
        >>> assert safe_argmax([np.nan, -100], nans=False) == 1
        >>> assert safe_argmax([-100, 0], nans=False) == 1
        >>> assert np.isnan(safe_argmax([]))
    """
    if len(arr) == 0:
        return fill
    extreme = safe_max(arr, fill=fill, finite=finite, nans=nans)
    if np.isnan(extreme):
        arg_extreme = np.where(np.isnan(arr))[0][0]
    else:
        arg_extreme = np.where(arr == extreme)[0][0]
    return arg_extreme




[docs]
def safe_max(arr, fill=np.nan, finite=False, nans=True):
    r"""
    Args:
        arr (ndarray): 1d array to take max of
        fill (float): return type if arr has no elements (default = nan)
        finite (bool): if True ignores non-finite values (default = False)
        nans (bool): if False ignores nans (default = True)

    Example:
        >>> arrs = [[], [np.nan], [-np.inf, np.nan, np.inf], [np.inf], [np.inf, 1], [0, 1]]
        >>> arrs = [np.array(arr) for arr in arrs]
        >>> fill = np.nan
        >>> results1 = [safe_max(arr, fill, finite=False, nans=True) for arr in arrs]
        >>> results2 = [safe_max(arr, fill, finite=True, nans=True) for arr in arrs]
        >>> results3 = [safe_max(arr, fill, finite=True, nans=False) for arr in arrs]
        >>> results4 = [safe_max(arr, fill, finite=False, nans=False) for arr in arrs]
        >>> results = [results1, results2, results3, results4]
        >>> result = ('results = %s' % (ub.urepr(results, nl=1, sv=1),))
        >>> print(result)
        results = [
            [nan, nan, nan, inf, inf, 1],
            [nan, nan, nan, nan, 1.0, 1],
            [nan, nan, nan, nan, 1.0, 1],
            [nan, nan, inf, inf, inf, 1],
        ]
    """
    return safe_extreme(arr, np.max, fill, finite, nans)




[docs]
def safe_min(arr, fill=np.nan, finite=False, nans=True):
    """
    Example:
        >>> arrs = [[], [np.nan], [-np.inf, np.nan, np.inf], [np.inf], [np.inf, 1], [0, 1]]
        >>> arrs = [np.array(arr) for arr in arrs]
        >>> fill = np.nan
        >>> results1 = [safe_min(arr, fill, finite=False, nans=True) for arr in arrs]
        >>> results2 = [safe_min(arr, fill, finite=True, nans=True) for arr in arrs]
        >>> results3 = [safe_min(arr, fill, finite=True, nans=False) for arr in arrs]
        >>> results4 = [safe_min(arr, fill, finite=False, nans=False) for arr in arrs]
        >>> results = [results1, results2, results3, results4]
        >>> result = ('results = %s' % (ub.urepr(results, nl=1, sv=1),))
        >>> print(result)
        results = [
            [nan, nan, nan, inf, 1.0, 0],
            [nan, nan, nan, nan, 1.0, 0],
            [nan, nan, nan, nan, 1.0, 0],
            [nan, nan, -inf, inf, 1.0, 0],
        ]
    """
    return safe_extreme(arr, np.min, fill, finite, nans)




[docs]
def stats_dict(list_, axis=None, use_nan=False, use_sum=False,
               use_median=False, size=False):
    """
    Args:
        list_ (listlike): values to get statistics of
        axis (int): if `list_` is ndarray then this specifies the axis

    Returns:
        OrderedDict: stats: dictionary of common numpy statistics
            (min, max, mean, std, nMin, nMax, shape)

    Examples0:
        >>> # xdoctest: +IGNORE_WHITESPACE
        >>> import numpy as np
        >>> axis = 0
        >>> np.random.seed(0)
        >>> list_ = np.random.rand(10, 2).astype(np.float32)
        >>> stats = stats_dict(list_, axis, use_nan=False)
        >>> result = str(ub.urepr(stats, nl=1, precision=4, with_dtype=True))
        >>> print(result)
        {
            'mean': np.array([0.5206, 0.6425], dtype=np.float32),
            'std': np.array([0.2854, 0.2517], dtype=np.float32),
            'max': np.array([0.9637, 0.9256], dtype=np.float32),
            'min': np.array([0.0202, 0.0871], dtype=np.float32),
            'nMin': np.array([1, 1], dtype=np.int32),
            'nMax': np.array([1, 1], dtype=np.int32),
            'shape': (10, 2),
        }

    Examples1:
        >>> import numpy as np
        >>> axis = 0
        >>> rng = np.random.RandomState(0)
        >>> list_ = rng.randint(0, 42, size=100).astype(np.float32)
        >>> list_[4] = np.nan
        >>> stats = stats_dict(list_, axis, use_nan=True)
        >>> result = str(ub.urepr(stats, precision=1, sk=True))
        >>> print(result)
        {mean: 20.0, std: 13.2, max: 41.0, min: 0.0, nMin: 7, nMax: 3, shape: (100,), num_nan: 1,}
    """
    datacast = np.float32
    # Assure input is in numpy format
    if isinstance(list_, np.ndarray):
        nparr = list_
    elif isinstance(list_, list):
        nparr = np.array(list_)
    else:
        nparr = np.array(list(list_))
    # Check to make sure stats are feasible
    if len(nparr) == 0:
        stats = ub.odict([('empty_list', True)])
        if size:
            stats['size'] = 0
    else:
        if use_nan:
            min_val = np.nanmin(nparr, axis=axis)
            max_val = np.nanmax(nparr, axis=axis)
            mean_ = np.nanmean(nparr, axis=axis)
            std_  = np.nanstd(nparr, axis=axis)
        else:
            min_val = nparr.min(axis=axis)
            max_val = nparr.max(axis=axis)
            mean_ = nparr.mean(axis=axis)
            std_  = nparr.std(axis=axis)
        # number of entries with min/max val
        nMin = np.sum(nparr == min_val, axis=axis)
        nMax = np.sum(nparr == max_val, axis=axis)
        stats = ub.odict([
            ('mean',  datacast(mean_)),
            ('std',   datacast(std_)),
            ('max',   (max_val)),
            ('min',   (min_val)),
            ('nMin',  np.int32(nMin)),
            ('nMax',  np.int32(nMax)),
        ])
        if size:
            stats['size'] = nparr.size
        else:
            stats['shape'] = nparr.shape
        if use_median:
            stats['med'] = np.nanmedian(nparr)
        if use_nan:
            stats['num_nan'] = np.isnan(nparr).sum()
        if use_sum:
            sumfunc = np.nansum if use_nan else np.sum
            stats['sum'] = sumfunc(nparr, axis=axis)
    return stats