Source code for graphid.util.util_numpy

import numpy as np
import ubelt as ub  # NOQA


[docs] def iter_reduce_ufunc(ufunc, arr_iter, out=None): """ constant memory iteration and reduction applys ufunc from left to right over the input arrays Example: >>> arr_list = [ ... np.array([0, 1, 2, 3, 8, 9]), ... np.array([4, 1, 2, 3, 4, 5]), ... np.array([0, 5, 2, 3, 4, 5]), ... np.array([1, 1, 6, 3, 4, 5]), ... np.array([0, 1, 2, 7, 4, 5]) ... ] >>> memory = np.array([9, 9, 9, 9, 9, 9]) >>> gen_memory = memory.copy() >>> def arr_gen(arr_list, gen_memory): ... for arr in arr_list: ... gen_memory[:] = arr ... yield gen_memory >>> print('memory = %r' % (memory,)) >>> print('gen_memory = %r' % (gen_memory,)) >>> ufunc = np.maximum >>> res1 = iter_reduce_ufunc(ufunc, iter(arr_list), out=None) >>> res2 = iter_reduce_ufunc(ufunc, iter(arr_list), out=memory) >>> res3 = iter_reduce_ufunc(ufunc, arr_gen(arr_list, gen_memory), out=memory) >>> print('res1 = %r' % (res1,)) >>> print('res2 = %r' % (res2,)) >>> print('res3 = %r' % (res3,)) >>> print('memory = %r' % (memory,)) >>> print('gen_memory = %r' % (gen_memory,)) >>> assert np.all(res1 == res2) >>> assert np.all(res2 == res3) """ # Get first item in iterator try: initial = next(arr_iter) except StopIteration: return None # Populate the outvariable if specified otherwise make a copy of the first # item to be the output memory if out is not None: out[:] = initial else: out = initial.copy() # Iterate and reduce for arr in arr_iter: ufunc(out, arr, out=out) return out
[docs] def isect_flags(arr, other): """ Example: >>> arr = np.array([ >>> [1, 2, 3, 4], >>> [5, 6, 3, 4], >>> [1, 1, 3, 4], >>> ]) >>> other = np.array([1, 4, 6]) >>> mask = isect_flags(arr, other) >>> print(mask) [[ True False False True] [False True False True] [ True True False True]] """ flags = iter_reduce_ufunc(np.logical_or, (arr == item for item in other)).ravel() flags.shape = arr.shape return flags
[docs] def atleast_nd(arr, n, front=False): r""" View inputs as arrays with at least n dimensions. TODO: Submit as a PR to numpy Args: arr (array_like): One array-like object. Non-array inputs are converted to arrays. Arrays that already have n or more dimensions are preserved. n (int): number of dimensions to ensure tofront (bool): if True new dimensions are added to the front of the array. otherwise they are added to the back. Returns ------- ndarray : An array with ``a.ndim >= n``. Copies are avoided where possible, and views with three or more dimensions are returned. For example, a 1-D array of shape ``(N,)`` becomes a view of shape ``(1, N, 1)``, and a 2-D array of shape ``(M, N)`` becomes a view of shape ``(M, N, 1)``. See Also --------- ensure_shape, np.atleast_1d, np.atleast_2d, np.atleast_3d Example ------- >>> n = 2 >>> arr = np.array([1, 1, 1]) >>> arr_ = atleast_nd(arr, n) >>> result = ub.urepr(arr_.tolist(), nl=0) >>> print(result) [[1], [1], [1]] Example ------- >>> n = 4 >>> arr1 = [1, 1, 1] >>> arr2 = np.array(0) >>> arr3 = np.array([[[[[1]]]]]) >>> arr1_ = atleast_nd(arr1, n) >>> arr2_ = atleast_nd(arr2, n) >>> arr3_ = atleast_nd(arr3, n) >>> result1 = ub.urepr(arr1_.tolist(), nl=0) >>> result2 = ub.urepr(arr2_.tolist(), nl=0) >>> result3 = ub.urepr(arr3_.tolist(), nl=0) >>> result = '\n'.join([result1, result2, result3]) >>> print(result) [[[[1]]], [[[1]]], [[[1]]]] [[[[0]]]] [[[[[1]]]]] Ignore: # Hmm, mine is actually faster %timeit atleast_nd(arr, 3) %timeit np.atleast_3d(arr) Benchmark: import ubelt N = 100 t1 = ubelt.Timerit(N, label='mine') for timer in t1: arr = np.empty((10, 10)) with timer: atleast_nd(arr, 3) t2 = ubelt.Timerit(N, label='baseline') for timer in t2: arr = np.empty((10, 10)) with timer: np.atleast_3d(arr) """ arr_ = np.asanyarray(arr) ndims = len(arr_.shape) if n is not None and ndims < n: # append the required number of dimensions to the front or back if front: expander = (None,) * (n - ndims) + (Ellipsis,) else: expander = (Ellipsis,) + (None,) * (n - ndims) arr_ = arr_[expander] return arr_
[docs] def apply_grouping(items, groupxs, axis=0): """ applies grouping from group_indicies apply_grouping Args: items (ndarray): groupxs (list of ndarrays): Returns: list of ndarrays: grouped items SeeAlso: group_indices invert_apply_grouping Example: >>> # xdoctest: +IGNORE_WHITESPACE >>> idx2_groupid = np.array([2, 1, 2, 1, 2, 1, 2, 3, 3, 3, 3]) >>> items = np.array([1, 8, 5, 5, 8, 6, 7, 5, 3, 0, 9]) >>> (keys, groupxs) = group_indices(idx2_groupid) >>> grouped_items = apply_grouping(items, groupxs) >>> result = str(grouped_items) >>> print(result) [array([8, 5, 6]), array([1, 5, 8, 7]), array([5, 3, 0, 9])] """ # SHOULD DO A CONTIGUOUS CHECK HERE #items_ = np.ascontiguousarray(items) return [items.take(xs, axis=axis) for xs in groupxs]
[docs] def group_indices(idx2_groupid, assume_sorted=False): r""" group_indices Args: idx2_groupid (ndarray): numpy array of group ids (must be numeric) Returns: tuple (ndarray, list of ndarrays): (keys, groupxs) Example0: >>> # xdoctest: +IGNORE_WHITESPACE >>> idx2_groupid = np.array([2, 1, 2, 1, 2, 1, 2, 3, 3, 3, 3]) >>> (keys, groupxs) = group_indices(idx2_groupid) >>> result = ub.urepr((keys, groupxs), nobr=True, with_dtype=True) >>> print(result) np.array([1, 2, 3], dtype=np.int64), [ np.array([1, 3, 5], dtype=np.int64), np.array([0, 2, 4, 6], dtype=np.int64), np.array([ 7, 8, 9, 10], dtype=np.int64)... Example1: >>> # xdoctest: +IGNORE_WHITESPACE >>> idx2_groupid = np.array([[ 24], [ 129], [ 659], [ 659], [ 24], ... [659], [ 659], [ 822], [ 659], [ 659], [24]]) >>> # 2d arrays must be flattened before coming into this function so >>> # information is on the last axis >>> (keys, groupxs) = group_indices(idx2_groupid.T[0]) >>> result = ub.urepr((keys, groupxs), nobr=True, with_dtype=True) >>> print(result) np.array([ 24, 129, 659, 822], dtype=np.int64), [ np.array([ 0, 4, 10], dtype=np.int64), np.array([1], dtype=np.int64), np.array([2, 3, 5, 6, 8, 9], dtype=np.int64), np.array([7], dtype=np.int64)... Example2: >>> # xdoctest: +IGNORE_WHITESPACE >>> idx2_groupid = np.array([True, True, False, True, False, False, True]) >>> (keys, groupxs) = group_indices(idx2_groupid) >>> result = ub.urepr((keys, groupxs), nobr=True, with_dtype=True) >>> print(result) np.array([False, True], dtype=bool), [ np.array([2, 4, 5], dtype=np.int64), np.array([0, 1, 3, 6], dtype=np.int64)... Timeit: import numba group_indices_numba = numba.jit(group_indices) group_indices_numba(idx2_groupid) SeeAlso: apply_grouping References: http://stackoverflow.com/questions/4651683/ numpy-grouping-using-itertools-groupby-performance TODO: Look into np.split http://stackoverflow.com/questions/21888406/ getting-the-indexes-to-the-duplicate-columns-of-a-numpy-array """ # Sort items and idx2_groupid by groupid if assume_sorted: sortx = np.arange(len(idx2_groupid)) groupids_sorted = idx2_groupid else: sortx = idx2_groupid.argsort() groupids_sorted = idx2_groupid.take(sortx) # Ensure bools are internally cast to integers if groupids_sorted.dtype.kind == 'b': cast_groupids = groupids_sorted.astype(np.int8) else: cast_groupids = groupids_sorted num_items = idx2_groupid.size # Find the boundaries between groups diff = np.ones(num_items + 1, cast_groupids.dtype) np.subtract(cast_groupids[1:], cast_groupids[:-1], out=diff[1:num_items]) idxs = np.flatnonzero(diff) # Groups are between bounding indexes # <len(keys) bottlneck> groupxs = [sortx[lx:rx] for lx, rx in zip(idxs, idxs[1:])] # 34.5% # Unique group keys keys = groupids_sorted[idxs[:-1]] return keys, groupxs
[docs] def group_items(item_list, groupid_list, assume_sorted=False, axis=None): keys, groupxs = group_indices(groupid_list, assume_sorted=assume_sorted) grouped_values = apply_grouping(item_list, groupxs, axis=axis) return dict(zip(keys, grouped_values))
if __name__ == '__main__': """ CommandLine: python -m netharn.util.util_numpy all """ import xdoctest xdoctest.doctest_module(__file__)