import ubelt as ub
import operator as op
[docs]
def sortedby(item_list, key_list, reverse=False):
""" sorts ``item_list`` using key_list
Args:
list_ (list): list to sort
key_list (list): list to sort by
reverse (bool): sort order is descending (largest first)
if reverse is True else acscending (smallest first)
Returns:
list : ``list_`` sorted by the values of another ``list``. defaults to
ascending order
SeeAlso:
sortedby2
Examples:
>>> list_ = [1, 2, 3, 4, 5]
>>> key_list = [2, 5, 3, 1, 5]
>>> result = sortedby(list_, key_list, reverse=True)
>>> print(result)
[5, 2, 3, 1, 4]
"""
assert len(item_list) == len(key_list), (
'Expected same len. Got: %r != %r' % (len(item_list), len(key_list)))
sorted_list = [item for (key, item) in
sorted(list(zip(key_list, item_list)), reverse=reverse)]
return sorted_list
[docs]
def grouping_delta(old, new, pure=True):
"""
Finds what happened to the old groups to form the new groups.
Args:
old (set of frozensets): old grouping
new (set of frozensets): new grouping
pure (bool): hybrids are separated from pure merges and splits if
pure is True, otherwise hybrid cases are grouped in merges and
splits.
Returns:
dict: delta: dictionary of changes containing the merges, splits,
unchanged, and hybrid cases. Except for unchanged, case a subdict
with new and old keys. For splits / merges, one of these contains
nested sequences to indicate what the split / merge is. Also
reports elements added and removed between old and new if the
flattened sets are not the same.
Notes:
merges - which old groups were merged into a single new group.
splits - which old groups were split into multiple new groups.
hybrid - which old groups had split/merge actions applied.
unchanged - which old groups are the same as new groups.
Example:
>>> # xdoc: +IGNORE_WHITESPACE
>>> old = [
>>> [20, 21, 22, 23], [1, 2], [12], [13, 14], [3, 4], [5, 6,11],
>>> [7], [8, 9], [10], [31, 32], [33, 34, 35], [41, 42, 43, 44, 45]
>>> ]
>>> new = [
>>> [20, 21], [22, 23], [1, 2], [12, 13, 14], [4], [5, 6, 3], [7, 8],
>>> [9, 10, 11], [31, 32, 33, 34, 35], [41, 42, 43, 44], [45],
>>> ]
>>> delta = grouping_delta(old, new)
>>> assert set(old[0]) in delta['splits']['old']
>>> assert set(new[3]) in delta['merges']['new']
>>> assert set(old[1]) in delta['unchanged']
>>> result = ub.urepr(delta, nl=2, sort=True, nobr=1, sk=True)
>>> print(result)
hybrid: {
merges: [{{10}, {11}, {9}}, {{3}, {5, 6}}, {{4}}, {{7}, {8}}],
new: {{3, 5, 6}, {4}, {7, 8}, {9, 10, 11}},
old: {{10}, {3, 4}, {5, 6, 11}, {7}, {8, 9}},
splits: [{{10}}, {{11}, {5, 6}}, {{3}, {4}}, {{7}}, {{8}, {9}}],
},
items: {
added: {},
removed: {},
},
merges: {
new: [{12, 13, 14}, {31, 32, 33, 34, 35}],
old: [{{12}, {13, 14}}, {{31, 32}, {33, 34, 35}}],
},
splits: {
new: [{{20, 21}, {22, 23}}, {{41, 42, 43, 44}, {45}}],
old: [{20, 21, 22, 23}, {41, 42, 43, 44, 45}],
},
unchanged: {
{1, 2},
},
Example:
>>> old = [
>>> [1, 2, 3], [4], [5, 6, 7, 8, 9], [10, 11, 12]
>>> ]
>>> new = [
>>> [1], [2], [3, 4], [5, 6, 7], [8, 9, 10, 11, 12]
>>> ]
>>> # every case here is hybrid
>>> pure_delta = grouping_delta(old, new, pure=True)
>>> assert len(list(ub.flatten(pure_delta['merges'].values()))) == 0
>>> assert len(list(ub.flatten(pure_delta['splits'].values()))) == 0
>>> delta = grouping_delta(old, new, pure=False)
>>> delta = order_dict_by(delta, ['unchanged', 'splits', 'merges'])
>>> result = ub.urepr(delta, nl=2, sort=True, sk=True)
>>> print(result)
{
items: {
added: {},
removed: {},
},
merges: [
[{3}, {4}],
[{10, 11, 12}, {8, 9}],
],
splits: [
[{1}, {2}, {3}],
[{5, 6, 7}, {8, 9}],
],
unchanged: {},
}
Example:
>>> delta = grouping_delta([[1, 2, 3]], [])
>>> assert len(delta['items']['removed']) == 3
>>> delta = grouping_delta([], [[1, 2, 3]])
>>> assert len(delta['items']['added']) == 3
>>> delta = grouping_delta([[1]], [[1, 2, 3]])
>>> assert len(delta['items']['added']) == 2
>>> assert len(delta['unchanged']) == 1
"""
_old = {frozenset(_group) for _group in old}
_new = {frozenset(_group) for _group in new}
_new_items = set(ub.flatten(_new))
_old_items = set(ub.flatten(_old))
if _new_items != _old_items:
_added = _new_items - _old_items
_removed = _old_items - _new_items
# Make the sets items the same
_old = {frozenset(_group - _removed) for _group in _old}
_new = {frozenset(_group - _added) for _group in _new}
_old = {_group for _group in _old if _group}
_new = {_group for _group in _new if _group}
else:
_added = {}
_removed = {}
# assert _new_items == _old_items, 'new and old sets must be the same'
# Find the groups that are exactly the same
unchanged = _new.intersection(_old)
new_sets = _new.difference(unchanged)
old_sets = _old.difference(unchanged)
# connected compoment lookups
old_conn = {p: frozenset(ps) for ps in _old for p in ps}
new_conn = {t: frozenset(ts) for ts in _new for t in ts}
# How many old sets can be merged into perfect pieces?
# For each new sets, find if it can be made via merging old sets
old_merges = []
new_merges = []
for ts in new_sets:
ccs = set([old_conn.get(t, frozenset()) for t in ts])
if frozenset.union(*ccs) == ts:
# This is a pure merge
old_merges.append(ccs)
new_merges.append(ts)
# How many oldictions can be split into perfect pieces?
new_splits = []
old_splits = []
for ps in old_sets:
ccs = set([new_conn.get(p, frozenset()) for p in ps])
if frozenset.union(*ccs) == ps:
# This is a pure merge
new_splits.append(ccs)
old_splits.append(ps)
old_merges_flat = list(ub.flatten(old_merges))
new_splits_flat = list(ub.flatten(new_splits))
old_hybrid = frozenset(map(frozenset, old_sets)).difference(
set(old_splits + old_merges_flat))
new_hybrid = frozenset(map(frozenset, new_sets)).difference(
set(new_merges + new_splits_flat))
breakup_hybrids = True
if breakup_hybrids:
# First split each hybrid
lookup = {a: n for n, items in enumerate(new_hybrid) for a in items}
hybrid_splits = []
for items in old_hybrid:
nids = list(ub.take(lookup, items))
split_part = list(ub.group_items(items, nids).values())
hybrid_splits.append(set(map(frozenset, split_part)))
# And then merge them into new groups
hybrid_merge_parts = list(ub.flatten(hybrid_splits))
part_nids = [lookup[next(iter(aids))] for aids in hybrid_merge_parts]
hybrid_merges = list(map(set, ub.group_items(hybrid_merge_parts,
part_nids).values()))
if pure:
delta = ub.odict()
delta['unchanged'] = unchanged
delta['splits'] = ub.odict([
('old', old_splits),
('new', new_splits),
])
delta['merges'] = ub.odict([
('old', old_merges),
('new', new_merges),
])
delta['hybrid'] = ub.odict([
('old', old_hybrid),
('new', new_hybrid),
('splits', hybrid_splits),
('merges', hybrid_merges),
])
else:
# Incorporate hybrid partial cases with pure splits and merges
new_splits2 = [s for s in hybrid_splits if len(s) > 1]
old_merges2 = [m for m in hybrid_merges if len(m) > 1]
all_new_splits = new_splits + new_splits2
all_old_merges = old_merges + old_merges2
# Don't bother differentiating old and new
# old_splits2 = [frozenset(ub.flatten(s)) for s in new_splits2]
# new_merges2 = [frozenset(ub.flatten(m)) for m in old_merges2]
# all_old_splits = old_splits + old_splits2
# all_new_merges = new_merges + new_merges2
splits = all_new_splits
merges = all_old_merges
# Sort by split and merge sizes
splits = sortedby(splits, [len(list(ub.flatten(_))) for _ in splits])
merges = sortedby(merges, [len(list(ub.flatten(_))) for _ in merges])
splits = [sortedby(_, list(map(len, _))) for _ in splits]
merges = [sortedby(_, list(map(len, _))) for _ in merges]
delta = ub.odict()
delta['unchanged'] = unchanged
delta['splits'] = splits
delta['merges'] = merges
delta['items'] = ub.odict([
('added', _added),
('removed', _removed),
])
return delta
[docs]
def order_dict_by(dict_, key_order):
r"""
Reorders items in a dictionary according to a custom key order
Args:
dict_ (dict_): a dictionary
key_order (list): custom key order
Returns:
OrderedDict: sorted_dict
Example:
>>> dict_ = {1: 1, 2: 2, 3: 3, 4: 4}
>>> key_order = [4, 2, 3, 1]
>>> sorted_dict = order_dict_by(dict_, key_order)
>>> result = ('sorted_dict = %s' % (ub.urepr(sorted_dict, nl=False),))
>>> print(result)
>>> assert result == 'sorted_dict = {4: 4, 2: 2, 3: 3, 1: 1}'
"""
import itertools as it
dict_keys = set(dict_.keys())
other_keys = dict_keys - set(key_order)
key_order = it.chain(key_order, other_keys)
sorted_dict = ub.odict(
(key, dict_[key]) for key in key_order if key in dict_keys
)
return sorted_dict
[docs]
def group_pairs(pair_list):
"""
Groups a list of items using the first element in each pair as the item and
the second element as the groupid.
Args:
pair_list (list): list of 2-tuples (item, groupid)
Returns:
dict: groupid_to_items: maps a groupid to a list of items
"""
# Initialize dict of lists
groupid_to_items = ub.ddict(list)
# Insert each item into the correct group
for item, groupid in pair_list:
groupid_to_items[groupid].append(item)
return groupid_to_items
[docs]
def sort_dict(dict_, part='keys', key=None, reverse=False):
"""
sorts a dictionary by its values or its keys
Args:
dict_ (dict_): a dictionary
part (str): specifies to sort by keys or values
key (Optional[func]): a function that takes specified part
and returns a sortable value
reverse (bool): (Defaults to False) - True for descinding order. False
for ascending order.
Returns:
OrderedDict: sorted dictionary
Example:
>>> dict_ = {'a': 3, 'c': 2, 'b': 1}
>>> results = []
>>> results.append(sort_dict(dict_, 'keys'))
>>> results.append(sort_dict(dict_, 'vals'))
>>> results.append(sort_dict(dict_, 'vals', lambda x: -x))
>>> result = ub.urepr(results)
>>> print(result)
[
{'a': 3, 'b': 1, 'c': 2},
{'b': 1, 'c': 2, 'a': 3},
{'a': 3, 'c': 2, 'b': 1},
]
"""
if part == 'keys':
index = 0
elif part in {'vals', 'values'}:
index = 1
else:
raise ValueError('Unknown method part=%r' % (part,))
if key is None:
_key = op.itemgetter(index)
else:
def _key(item):
return key(item[index])
sorted_items = sorted(dict_.items(), key=_key, reverse=reverse)
sorted_dict = ub.odict(sorted_items)
return sorted_dict