Source code for graphid.core.annot_inference

import copy
import logging
import collections
import ubelt as ub
import numpy as np
import pandas as pd
import networkx as nx
import itertools as it
from graphid import util
from graphid.core import state as const
from graphid.core import mixin_viz
from graphid.core import mixin_helpers
from graphid.core import mixin_dynamic
from graphid.core import mixin_priority
from graphid.core import mixin_loops
from graphid.core import mixin_callbacks
from graphid.core import mixin_simulation
from graphid.core import mixin_invariants
from graphid.core import mixin_redundancy
from graphid.core.state import POSTV, NEGTV, INCMP, UNREV, UNKWN
from graphid.core.state import UNINFERABLE
from graphid.core.state import SAME, DIFF, NULL


[docs] def _rectify_decision(evidence_decision, meta_decision): """ If evidence decision is not explicitly set, then meta decision is used to make a guess. Raises a ValueError if decisions are in incompatible states. """ # Default to the decision based on the media evidence decision = evidence_decision # Overwrite the graph decision with the meta decision if necessary if meta_decision == SAME: if decision in UNINFERABLE: decision = POSTV elif decision == NEGTV: raise ValueError('evidence=negative and meta=same') elif meta_decision == DIFF: if decision in UNINFERABLE: decision = NEGTV elif decision == POSTV: raise ValueError('evidence=positive and meta=diff') return decision
[docs] class Consistency(object):
[docs] def is_consistent(infr, cc): """ Determines if a PCC contains inconsistencies Args: cc (set): nodes in a PCC Returns: flag: bool: returns True unless cc contains any negative edges Example: >>> from graphid import demo >>> infr = demo.demodata_infr(num_pccs=1, p_incon=1) >>> assert not infr.is_consistent(next(infr.positive_components())) >>> infr = demo.demodata_infr(num_pccs=1, p_incon=0) >>> assert infr.is_consistent(next(infr.positive_components())) """ return len(cc) <= 2 or not any(util.edges_inside(infr.neg_graph, cc))
[docs] def positive_components(infr, graph=None): """ Generates the positive connected compoments (PCCs) in the graph These will contain both consistent and inconsinstent PCCs. Yields: cc: set: nodes within the PCC """ pos_graph = infr.pos_graph if graph is None or graph is infr.graph: ccs = pos_graph.connected_components() else: unique_labels = { pos_graph.node_label(node) for node in graph.nodes()} ccs = (pos_graph.connected_to(node) for node in unique_labels) for cc in ccs: yield cc
[docs] def inconsistent_components(infr, graph=None): """ Generates inconsistent PCCs. These PCCs contain internal negative edges indicating an error exists. """ for cc in infr.positive_components(graph): if not infr.is_consistent(cc): yield cc
[docs] def consistent_components(infr, graph=None): """ Generates consistent PCCs. These PCCs contain no internal negative edges. Yields: cc: set: nodes within the PCC """ # Find PCCs without any negative edges for cc in infr.positive_components(graph): if infr.is_consistent(cc): yield cc
[docs] class Feedback(object):
[docs] def _check_edge(infr, edge): aid1, aid2 = edge if aid1 not in infr.aids_set: raise ValueError('aid1=%r is not part of the graph' % (aid1,)) if aid2 not in infr.aids_set: raise ValueError('aid2=%r is not part of the graph' % (aid2,))
[docs] def add_feedback_from(infr, items, verbose=None, **kwargs): """ Args: items (List[Edge]): each edge is a dictionary with aid1, aid2, evidence_decision, meta_decision, etc.. """ if verbose is None: verbose = infr.verbose > 5 if isinstance(items, pd.DataFrame): if list(items.index.names) == ['aid1', 'aid2']: for edge, data in items.iterrows(): infr.add_feedback(edge=edge, verbose=verbose, **data) else: raise ValueError( 'Cannot interpret pd.DataFrame without edge index') else: # Dangerous if item length > 3 for item in items: args = [] if len(item) == 1: # Case where items=[edge1, edge2] if isinstance(item[0], int) or len(item[0]) != 2: raise ValueError('invalid edge') if len(item) == 2: # Case where items=[(edge1, state), (edge2, state)] if ub.iterable(item[0]): edge = item[0] args = item[1:] else: edge = item else: raise ValueError('invalid edge') # Case where items=[(u, v, state), (u, v, state)] if len(item) > 3: raise ValueError('pass in data as a dataframe or ' 'use kwargs') infr.add_feedback(edge, *args, verbose=verbose, **kwargs)
[docs] def edge_decision(infr, edge): """ Gets a decision on an edge, either explicitly or implicitly """ evidence_decision = infr.get_edge_attr(edge, 'evidence_decision', on_missing='default', default=UNREV) meta_decision = infr.get_edge_attr(edge, 'meta_decision', on_missing='default', default=NULL) decision = _rectify_decision(evidence_decision, meta_decision) return decision
[docs] def edge_decision_from(infr, edges): """ Gets a decision for multiple edges """ edges = list(edges) evidence_decisions = infr.gen_edge_values( 'evidence_decision', edges, on_missing='default', default=UNREV) meta_decisions = infr.gen_edge_values( 'meta_decision', edges, on_missing='default', default=NULL) for ed, md in zip(evidence_decisions, meta_decisions): yield _rectify_decision(ed, md)
[docs] def add_node_feedback(infr, aid, **attrs): infr.print('Setting aid={} {}'.format(aid, ub.urepr(attrs))) for key, value in attrs.items(): infr.set_node_attrs(key, {aid: value})
[docs] def add_feedback(infr, edge, evidence_decision=None, tags=None, user_id=None, meta_decision=None, confidence=None, timestamp_c1=None, timestamp_c2=None, timestamp_s1=None, timestamp=None, verbose=None, priority=None): """ Primary method for adding feedback and review edges to the graph. Args: edge (tuple): an undirected edge represented as a pair of aids evidence_decision (str): decision made based on visual evidence between the two photos. Can be POSTV, NEGTV, INCMP, or UNKWN. Note: POSTV etc... are the variables not the strings. tags (list of str): additional information to specify user_id (str): who is doing this review. This can identify a human or algorithm reviewer (e.g. 'user:joncrall' or 'algo:vamp'). meta_decision (str): decision made based on external knowledge. Perhaps the photographer knows that two animals are the same because all photos are of the same animal. This constrains the identity problem, but does not impact the computer vision learning algorithms, which aren't given the info needed to make this sort of decision. confidence (str): how sure is the user of this decision. timestamp_c1 (int): Time that the review client started timestamp_c2 (int): Time that the review client ended timestamp_s1 (int): Time that the review server started timestamp (int): Time that the review server ended verbose (bool): verbosity priority (float, optional): the priority assigned to this edge before review. This is only relevant for the termination criterion. Notes: If `infr.params['inference.enabled']` is True, then the edge is inserted into the graph and its properties are updated dynamically. Otherwise it is only added to the internal feedback dictionary and the `apply_feedback_edges` method must be called. Example: >>> from graphid import demo >>> infr = demo.demodata_infr(num_pccs=5) >>> infr.add_feedback((5, 6), POSTV) >>> infr.add_feedback((5, 6), NEGTV, tags=['photobomb']) >>> infr.add_feedback((1, 2), INCMP) >>> print(ub.urepr(infr.internal_feedback, nl=3, sk=1)) >>> assert len(infr.external_feedback) == 0 >>> assert len(infr.internal_feedback) == 2 >>> assert len(infr.internal_feedback[(5, 6)]) == 2 >>> assert len(infr.internal_feedback[(1, 2)]) == 1 """ prev_verbose = infr.verbose if verbose is not None: infr.verbose = verbose edge = aid1, aid2 = util.e_(*edge) if not infr.has_edge(edge): if infr.params['allow_unseen_nodes']: # Allow new aids if not infr.graph.has_node(aid1): infr.add_aids([aid1]) if not infr.graph.has_node(aid2): infr.add_aids([aid2]) infr._check_edge(edge) infr.graph.add_edge(aid1, aid2) if evidence_decision is None: evidence_decision = UNREV if meta_decision is None: meta_decision = const.META_DECISION.CODE.NULL if confidence is None: confidence = const.CONFIDENCE.CODE.UNKNOWN if timestamp is None: timestamp = ub.timestamp() msg = 'add_feedback ({}, {}), '.format(aid1, aid2) loc = locals() msg += ', '.join([ str(val) for key, val in ( (key, loc[key]) for key in ['evidence_decision', 'tags', 'user_id', 'confidence', 'meta_decision']) if val is not None ]) infr.print(msg, 2, color='white') if meta_decision == NULL: # TODO: check previous meta_decision and use that if its consistent # with the evidence decision. pass decision = _rectify_decision(evidence_decision, meta_decision) if decision == UNREV: # Unreviewing an edge deletes anything not yet committed if edge in infr.external_feedback: raise ValueError('External edge reviews cannot be undone') if edge in infr.internal_feedback: del infr.internal_feedback[edge] # Remove the edge from the queue if it is in there. if infr.queue: if edge in infr.queue: del infr.queue[edge] # Keep track of sequential reviews and set properties on global graph num_reviews = infr.get_edge_attr(edge, 'num_reviews', default=0) review_id = next(infr.review_counter) feedback_item = { 'tags': tags, 'evidence_decision': evidence_decision, 'meta_decision': meta_decision, 'timestamp_c1': timestamp_c1, 'timestamp_c2': timestamp_c2, 'timestamp_s1': timestamp_s1, 'timestamp': timestamp, 'confidence': confidence, 'user_id': user_id, 'num_reviews': num_reviews + 1, 'review_id': review_id, } infr.internal_feedback[edge].append(feedback_item) infr.set_edge_attr(edge, feedback_item) if infr.test_mode: prev_decision = infr._get_current_decision(edge) infr._dynamic_test_callback(edge, decision, prev_decision, user_id) # must happen after dynamic test callback infr.set_edge_attr(edge, {'decision': decision}) if infr.params['inference.enabled']: assert infr.dirty is False, ( 'need to recompute before dynamic inference continues') # Dynamically update priority queue based on the new edge if decision == POSTV: action = infr._positive_decision(edge) elif decision == NEGTV: action = infr._negative_decision(edge) elif decision in UNINFERABLE: # incomparable and unreview have the same inference structure action = infr._uninferable_decision(edge, decision) else: raise AssertionError('Unknown decision=%r' % (decision,)) if infr.test_mode: infr.test_state['action'] = action if False: infr._print_debug_ccs() else: action = None infr.dirty = True infr._add_review_edge(edge, decision) if infr.params['inference.enabled'] and infr.refresh: # only add to criteria if this wasn't requested as a fix edge if priority is not None and priority <= 1.0: meaningful = bool({'merge', 'split'} & set(action)) infr.refresh.add(meaningful, user_id, decision) if infr.test_mode: infr.metrics_list.append(infr.measure_metrics()) infr.verbose = prev_verbose
[docs] def _print_debug_ccs(infr): assert all([ub.allsame(infr.node_labels(*cc)) for cc in infr.positive_components()]) sorted_ccs = sorted([ set(cc) for cc in infr.pos_graph.connected_components() ]) msg = '[' + ', '.join([ repr(cc) if infr.is_consistent(cc) else ub.color_text(repr(cc), 'red') for cc in sorted_ccs]) + ']' print(msg)
@util.classproperty def feedback_keys(Infr): """ edge attribute keys used for feedback """ return Infr.feedback_data_keys + ['num_reviews', 'review_id'] @util.classproperty def feedback_data_keys(Infr): """ edge attribute keys used for feedback """ return [ 'evidence_decision', 'tags', 'user_id', 'meta_decision', 'timestamp_c1', 'timestamp_c2', 'timestamp_s1', 'timestamp', 'confidence' ]
[docs] def apply_feedback_edges(infr): """ Transforms the feedback dictionaries into nx graph edge attributes. This """ infr.print('apply_feedback_edges', 1) # Transforms dictionary feedback into numpy array edges = [] attr_lists = {key: [] for key in infr.feedback_keys} for edge, vals in infr.all_feedback_items(): # hack for feedback rectification feedback_item = infr._rectify_feedback_item(vals) feedback_item['review_id'] = next(infr.review_counter) feedback_item['num_reviews'] = len(vals) # if feedback_item['decision'] == 'unknown': # continue set1 = set(feedback_item.keys()) set2 = set(attr_lists.keys()) if set1 != set2: raise AssertionError('Bad feedback keys') for key, val in feedback_item.items(): attr_lists[key].append(val) edges.append(edge) assert ub.allsame(list(map(len, attr_lists.values()))) assert len(edges) == len(next(iter(attr_lists.values()))) # Put pair orders in context of the graph infr.print('_set_feedback_edges(nEdges=%d)' % (len(edges),), 3) # Ensure edges exist for edge in edges: if not infr.graph.has_edge(*edge): infr.graph.add_edge(*edge) # take evidence_decision and meta_decision into account decisions = [ _rectify_decision(ed, md) for ed, md in zip(attr_lists['evidence_decision'], attr_lists['meta_decision']) ] for state, es in ub.group_items(edges, decisions).items(): infr._add_review_edges_from(es, state) for key, val_list in attr_lists.items(): infr.set_edge_attrs(key, ub.dzip(edges, val_list)) if infr.params['inference.enabled']: infr.apply_nondynamic_update()
[docs] def _rectify_feedback(infr, feedback): return {edge: infr._rectify_feedback_item(vals) for edge, vals in feedback.items()}
[docs] def _rectify_feedback_item(infr, vals): """ uses most recently use strategy """ return vals[-1]
[docs] def all_feedback_items(infr): for edge, vals in infr.external_feedback.items(): yield edge, vals for edge, vals in infr.internal_feedback.items(): yield edge, vals
[docs] def all_feedback(infr): all_feedback = ub.ddict(list) all_feedback.update(infr.all_feedback_items()) return all_feedback
[docs] def clear_feedback(infr, edges=None): """ Delete all edges properties related to feedback """ if edges is None: edges = infr.graph.edges() edges = list(edges) infr.print('clear_feedback len(edges) = %r' % (len(edges)), 2) infr.external_feedback = ub.ddict(list) infr.internal_feedback = ub.ddict(list) # Kill all feedback, remote edge labels, but leave graph edges alone keys = infr.feedback_keys + ['inferred_state'] util.nx_delete_edge_attr(infr.graph, keys, edges) # Move reviewed edges back into the unreviewed graph for key in (POSTV, NEGTV, INCMP): subgraph = infr.review_graphs[key] prev_edges = list(ub.compress(edges, list(subgraph.has_edges(edges)))) subgraph.remove_edges_from(prev_edges) infr.review_graphs[UNREV].add_edges_from(prev_edges) infr.pos_redun_nids.clear() infr.neg_redun_metagraph.clear() infr.nid_to_errors.clear() if __debug__: infr.assert_disjoint_invariant()
[docs] def clear_edges(infr): """ Removes all edges from the graph """ for graph in infr.review_graphs.values(): graph.remove_edges_from(list(graph.edges())) infr.graph.remove_edges_from(list(infr.graph.edges())) infr.pos_redun_nids.clear() infr.neg_redun_metagraph.clear() infr.nid_to_errors.clear()
[docs] def reset(infr, state='empty'): """ Removes all edges from graph and resets name labels. Example: >>> from graphid.core.annot_inference import * # NOQA >>> from graphid import demo >>> infr = demo.demodata_infr(num_pccs=5) >>> assert len(list(infr.edges())) > 0 >>> infr.reset(state='empty') >>> assert len(list(infr.edges())) == 0 """ infr.clear_edges() infr.clear_feedback() if state == 'empty': # Remove all edges, and component names infr.clear_name_labels() elif state == 'orig': raise NotImplementedError('unused') infr.reset_name_labels() else: raise ValueError('Unknown state=%r' % (state,))
[docs] def reset_name_labels(infr): """ Resets all annotation node name labels to their initial values """ infr.print('reset_name_labels', 1) orig_names = infr.get_node_attrs('orig_name_label') infr.set_node_attrs('name_label', orig_names)
[docs] def clear_name_labels(infr): """ Sets all annotation node name labels to be unknown """ infr.print('clear_name_labels()', 1) # make distinct names for all nodes distinct_names = { node: -aid for node, aid in infr.get_node_attrs('aid').items() } infr.set_node_attrs('name_label', distinct_names)
[docs] class NameRelabel(object):
[docs] def node_label(infr, aid): return infr.pos_graph.node_label(aid)
[docs] def node_labels(infr, *aids): return infr.pos_graph.node_labels(*aids)
[docs] def _next_nid(infr): if getattr(infr, 'nid_counter', None) is None: nids = nx.get_node_attributes(infr.graph, 'name_label') infr.nid_counter = max(nids) infr.nid_counter += 1 new_nid = infr.nid_counter return new_nid
[docs] def _rectify_names(infr, old_names, new_labels): """ Finds the best assignment of old names based on the new groups each is assigned to. old_names = [None, None, None, 1, 2, 3, 3, 4, 4, 4, 5, None] new_labels = [ 1, 2, 2, 3, 4, 5, 5, 6, 3, 3, 7, 7] """ infr.print('rectifying name lists', 3) newlabel_to_oldnames = ub.group_items(old_names, new_labels) unique_newlabels = list(newlabel_to_oldnames.keys()) grouped_oldnames_ = list(ub.take(newlabel_to_oldnames, unique_newlabels)) # Mark annots that are unknown and still grouped by themselves still_unknown = [len(g) == 1 and g[0] is None for g in grouped_oldnames_] # Remove nones for name rectifier grouped_oldnames = [ [n for n in oldgroup if n is not None] for oldgroup in grouped_oldnames_] new_names = util.name_rectifier.find_consistent_labeling( grouped_oldnames, verbose=infr.verbose >= 3, extra_prefix=None) unknown_labels = list(ub.compress(unique_newlabels, still_unknown)) new_flags = [n is None for n in new_names] label_to_name = ub.dzip(unique_newlabels, new_names) needs_assign = list(ub.compress(unique_newlabels, new_flags)) return label_to_name, needs_assign, unknown_labels
[docs] def _rectified_relabel(infr, cc_subgraphs): """ Reuses as many names as possible """ # Determine which names can be reused infr.print('grouping names for rectification', 3) grouped_oldnames_ = [ list(nx.get_node_attributes(subgraph, 'name_label').values()) for count, subgraph in enumerate(cc_subgraphs) ] # Make sure negatives dont get priority grouped_oldnames = [ [n for n in group if len(group) == 1 or n > 0] for group in grouped_oldnames_ ] infr.print('begin rectification of %d grouped old names' % ( len(grouped_oldnames)), 2) new_labels = util.name_rectifier.find_consistent_labeling( grouped_oldnames, verbose=infr.verbose >= 3) infr.print('done rectifying new names', 2) new_flags = [ not isinstance(n, int) and n.startswith('_extra_name') for n in new_labels ] for idx in util.where(new_flags): new_labels[idx] = infr._next_nid() for idx, label in enumerate(new_labels): if label < 0 and len(grouped_oldnames[idx]) > 1: # Remove negative ids for grouped items new_labels[idx] = infr._next_nid() return new_labels
[docs] def relabel_using_reviews(infr, graph=None, rectify=True): """ Relabels nodes in graph based on positive connected components This will change the 'name_label' of the nodes to be consistent while preserving any existing names as best as possible. If rectify=False, this will be faster, but the old names may not be preserved and each PCC will be assigned an arbitrary name. Note: if something messes up you can call infr.reset_labels_to_ibeis() to reset node labels to their original values --- this will almost always put the graph in an inconsistent state --- but then you can this with rectify=True to fix everything up. Args: graph (nx.Graph, optional): only edges in `graph` are relabeled defaults to current graph. rectify (bool, optional): if True names attempt to remain consistent otherwise there are no restrictions on name labels other than that they are distinct. Example: >>> from graphid import demo, util >>> infr = demo.demodata_infr(num_pccs=5, pos_redun=1) >>> names0 = set(infr.get_node_attrs('name_label').values()) >>> infr.relabel_using_reviews(rectify=True) >>> names1 = set(infr.get_node_attrs('name_label').values()) >>> assert names0 == names1 >>> # wont change because its the entire graph >>> #infr.relabel_using_reviews(rectify=False) >>> #names2 = set(infr.get_node_attrs('name_label').values()) """ infr.print('relabel_using_reviews', 2) if graph is None: graph = infr.graph # Get subgraphs and check consistency cc_subgraphs = [] num_inconsistent = 0 for cc in infr.positive_components(graph=graph): cc_subgraphs.append(infr.graph.subgraph(cc)) if not infr.is_consistent(cc): num_inconsistent += 1 infr.print('num_inconsistent = %r' % (num_inconsistent,), 2) if infr.verbose >= 2: cc_sizes = list(map(len, cc_subgraphs)) pcc_size_hist = ub.dict_hist(cc_sizes) pcc_size_stats = util.stats_dict(cc_sizes) if len(pcc_size_hist) < 8: infr.print('PCC size hist = %s' % (ub.urepr(pcc_size_hist),)) infr.print('PCC size stats = %s' % (ub.urepr(pcc_size_stats),)) if rectify: # Rectified relabeling, preserves grouping and labeling if possible new_labels = infr._rectified_relabel(cc_subgraphs) else: # Arbitrary relabeling, only preserves grouping if graph is infr.graph: # Use union find labels new_labels = { count: infr.node_label(next(iter(subgraph.nodes()))) for count, subgraph in enumerate(cc_subgraphs) } else: new_labels = {count: infr._next_nid() for count, subgraph in enumerate(cc_subgraphs)} for count, subgraph in enumerate(cc_subgraphs): new_nid = new_labels[count] node_to_newlabel = ub.dzip(subgraph.nodes(), [new_nid]) infr.set_node_attrs('name_label', node_to_newlabel) num_names = len(cc_subgraphs) infr.print('done relabeling', 3) return num_names, num_inconsistent
[docs] class MiscHelpers(object):
[docs] def _rectify_nids(infr, aids, nids): if aids is None: raise ValueError('aids cannot be None') if nids is None: nids = [-aid for aid in aids] elif not ub.iterable(nids): nids = [nids] * len(aids) return nids
[docs] def remove_aids(infr, aids): """ Remove annotations from the graph. Returns: dict: split: indicates which PCCs were split by this action. Note: This may cause unintended splits! CommandLine: xdoctest -m graphid.core.annot_inference MiscHelpers.remove_aids Example: >>> from graphid import demo, util >>> infr = demo.demodata_infr(num_pccs=5, pos_redun=1) >>> infr.refresh_candidate_edges() >>> infr.pin_node_layout() >>> before = infr.copy() >>> aids = infr.aids[::5] >>> splits = infr.remove_aids(aids) >>> assert len(splits['old']) > 0 >>> infr.assert_invariants() >>> # xdoc: +REQUIRES(--show) >>> util.qtensure() >>> after = infr >>> before.show(fnum=1, pnum=(1, 2, 1), pickable=True) >>> after.show(fnum=1, pnum=(1, 2, 2), pickable=True) """ infr.print('remove_aids len(aids)={}'.format(len(aids)), level=3) # Determine which edges are going to be removed remove_edges = util.edges_outgoing(infr.graph, aids) old_groups = list(infr.positive_components()) # Remove from tertiary bookkeeping structures remove_idxs = list(ub.take(util.make_index_lookup(infr.aids), aids)) util.delete_items_by_index(infr.orig_name_labels, remove_idxs) util.delete_items_by_index(infr.aids, remove_idxs) infr.aids_set = set(infr.aids) # Remove from secondary bookkeeping structures util.delete_dict_keys(infr.external_feedback, remove_edges) util.delete_dict_keys(infr.internal_feedback, remove_edges) # Remove from core bookkeeping structures infr.graph.remove_nodes_from(aids) for graph in infr.review_graphs.values(): graph.remove_nodes_from(aids) infr.queue.delete_items(remove_edges) # TODO: should refactor to preform a dyanmic step, but in this case is # less work to use a bazooka to shoot a fly. infr.apply_nondynamic_update() # I'm unsure if relabeling is necessary infr.relabel_using_reviews() new_groups = list(infr.positive_components()) # print('old_groups = {!r}'.format(old_groups)) # print('new_groups = {!r}'.format(new_groups)) delta = util.grouping_delta(old_groups, new_groups) splits = delta['splits'] n_old = len(splits['old']) n_new = len(list(ub.flatten(splits['new']))) infr.print( 'removing {} aids split {} old PCCs into {} new PCCs'.format( len(aids), n_old, n_new)) return splits
# print(ub.urepr(delta, nl=2))
[docs] def add_aids(infr, aids, nids=None): """ CommandLine: python -m graphid.core.annot_inference MiscHelpers.add_aids Doctest: >>> aids_ = [1, 2, 3, 4, 5, 6, 7, 9] >>> infr = AnnotInference(aids=aids_, autoinit=True) >>> aids = [2, 22, 7, 9, 8] >>> nids = None >>> infr.add_aids(aids, nids) >>> result = infr.aids >>> print(result) >>> assert len(infr.graph) == len(infr.aids) [1, 2, 3, 4, 5, 6, 7, 9, 22, 8] """ if aids is None: raise ValueError('aids cannot be None') nids = infr._rectify_nids(aids, nids) assert len(aids) == len(nids), 'must correspond' if infr.aids is None: nids = infr._rectify_nids(aids, nids) # Set object attributes infr.aids = aids infr.aids_set = set(infr.aids) infr.orig_name_labels = nids else: aid_to_idx = util.make_index_lookup(infr.aids) orig_idxs = list(ub.take(aid_to_idx, aids, None)) new_flags = util.flag_None_items(orig_idxs) new_aids = list(ub.compress(aids, new_flags)) new_nids = list(ub.compress(nids, new_flags)) # Extend object attributes infr.aids.extend(new_aids) infr.orig_name_labels.extend(new_nids) infr.aids_set.update(new_aids) infr.update_node_attributes(new_aids, new_nids) if infr.graph is not None: infr.graph.add_nodes_from(aids) for subgraph in infr.review_graphs.values(): subgraph.add_nodes_from(aids) nids = set(infr.pos_graph.node_labels(*aids)) infr.neg_metagraph.add_nodes_from(nids)
[docs] def update_node_attributes(infr, aids=None, nids=None): if aids is None: aids = infr.aids nids = infr.orig_name_labels assert aids is not None, 'must have aids' assert nids is not None, 'must have nids' node_to_aid = {aid: aid for aid in aids} node_to_nid = {aid: nid for aid, nid in zip(aids, nids)} assert len(node_to_nid) == len(node_to_aid) infr.graph.add_nodes_from(aids) for subgraph in infr.review_graphs.values(): subgraph.add_nodes_from(aids) infr.set_node_attrs('aid', node_to_aid) infr.set_node_attrs('name_label', node_to_nid) infr.set_node_attrs('orig_name_label', node_to_nid)
# TODO: depricate these, they will always be identity I think # this is also taken care of by relabel_using_reviews
[docs] def initialize_graph(infr, graph=None): """ Constructs the internal networkx Graph objects """ infr.print('initialize_graph', 1) if graph is None: infr.graph = infr._graph_cls() else: infr.graph = graph infr.review_graphs[POSTV] = util.DynConnGraph() infr.review_graphs[NEGTV] = infr._graph_cls() infr.review_graphs[INCMP] = infr._graph_cls() infr.review_graphs[UNKWN] = infr._graph_cls() infr.review_graphs[UNREV] = infr._graph_cls() if graph is not None: for u, v, d in graph.edges(data=True): evidence_decision = d.get('evidence_decision', UNREV) meta_decision = d.get('meta_decision', NULL) decision = _rectify_decision(evidence_decision, meta_decision) if decision in {POSTV, NEGTV, INCMP, UNREV, UNKWN}: infr.review_graphs[decision].add_edge(u, v) else: raise ValueError('Unknown decision=%r' % (decision,)) infr.update_node_attributes()
[docs] def print(infr, msg, level=1, color=None): if color is None: color = 'turquoise' if ub.WIN32 else 'blue' RECORD_LOGS = True INFER_CALLER = True if INFER_CALLER: from xdoctest.dynamic_analysis import get_parent_frame # Record the name of the calling function parent_name = get_parent_frame().f_code.co_name msg = '[{}] '.format(parent_name) + msg if RECORD_LOGS: # Append the message to an internal log deque infr.logs.append((msg, color)) if len(infr.logs) == infr.logs.maxlen: infr.log_index = max(infr.log_index - 1, 0) if infr.verbose >= level: # Print the message to stdout loglevel = logging.INFO util.cprint('[infr] ' + msg, color) else: loglevel = logging.DEBUG if infr.logger: # Send the message to a python logger infr.logger.log(loglevel, msg)
[docs] def latest_logs(infr, colored=False): index = infr.log_index infr.log_index = len(infr.logs) if colored: return [infr.logs[x] for x in range(index, len(infr.logs))] else: return [infr.logs[x][0] for x in range(index, len(infr.logs))]
[docs] def dump_logs(infr): print('--- <LOG DUMP> ---') for msg, color in infr.logs: util.cprint('[infr] ' + msg, color) print(r'--- </LOG DUMP> ---')
[docs] class AltConstructors(object): _graph_cls = util.NiceGraph # _graph_cls = nx.Graph # _graph_cls = nx.DiGraph
[docs] @classmethod def from_pairs(AnnotInference, aid_pairs, attrs=None, verbose=False): G = AnnotInference._graph_cls() if any(a1 == a2 for a1, a2 in aid_pairs): raise AssertionError('cannot have self-edges') G.add_edges_from(aid_pairs) if attrs is not None: for key in attrs.keys(): values = ub.dzip(aid_pairs, attrs[key]) nx.set_edge_attributes(G, name=key, values=values) infr = AnnotInference.from_netx(G, verbose=verbose) return infr
[docs] @classmethod def from_netx(AnnotInference, G, verbose=False, infer=True): """ Creates an AnnotInference object from a networkx graph """ aids = list(G.nodes()) nids = [-a for a in aids] infr = AnnotInference(aids, nids, autoinit=False, verbose=verbose) infr.initialize_graph(graph=G) # hack orig_name_labels = [infr.pos_graph.node_label(a) for a in aids] infr.orig_name_labels = orig_name_labels infr.set_node_attrs('orig_name_label', ub.dzip(aids, orig_name_labels)) if infer: infr.apply_nondynamic_update() return infr
[docs] def status(infr, extended=False): """ Returns information about the state of the graph. Args: extended (bool): if True, adds in extra information that requires an O(|E|) amount of computation, otherwise only O(1) stats that are dynamically tracked are returned. Returns: dict: a dictionary containing status information. Each of the keys represents the following information: nNodes: number of nodes in the graph nEdges: number of edges in the graph nCCs: number of positive connected components nPostvEdges: number of edges labeled as positive nNegtvEdges: number of edges labeled as negative nIncmpEdges: number of edges labeled as incomparable nUnrevEdges: number of edges labeled as unreviewed nPosRedunCCs: the number of PCCs which are currently k-positive-redundant, i.e. we are confident those PCCs are the same individual. nNegRedunPairs: the number of PCCs pairs which are currently k-negative-redundant, i.e. we are confident those PCCs are different individuals. nInconsistentCCs: the number of inconsistent PCCs that need to be fixed, i.e. the number of PCCs with an internal negative edges. If extended is True, then the following keys are also present nNegEdgesWithin: number of negatives edges inside PCCs nNegEdgesBetween: number of negative edges between PCCs nIncompEdgesWithin: number of incomparable edges inside PCCs nIncompEdgesBetween: number of incomparable edges between PCCs nUnrevEdgesWithin: number of unreviewed edges inside PCCs nUrevEdgesBetween: number of unreviewed edges between PCCs Example: >>> from graphid import demo >>> infr = demo.demodata_infr(num_pccs=5, p_incon=0.5, pcc_size=10) >>> print(ub.urepr(infr.status(extended=True))) { 'nNodes': 50, 'nEdges': 93, 'nCCs': 5, 'nPostvEdges': 66, 'nNegtvEdges': 10, 'nIncmpEdges': 2, 'nUnrevEdges': 15, 'nPosRedunCCs': 1, 'nNegRedunPairs': 2, 'nInconsistentCCs': 3, 'nNegEdgesWithin': 4, 'nNegEdgesBetween': 6, 'nIncompEdgesWithin': 0, 'nIncompEdgesBetween': 2, 'nUnrevEdgesWithin': 15, 'nUrevEdgesBetween': 0, } """ status_dict = ub.odict([ ('nNodes', len(infr.aids)), ('nEdges', infr.graph.number_of_edges()), ('nCCs', infr.pos_graph.number_of_components()), ('nPostvEdges', infr.pos_graph.number_of_edges()), ('nNegtvEdges', infr.neg_graph.number_of_edges()), ('nIncmpEdges', infr.incomp_graph.number_of_edges()), ('nUnrevEdges', infr.unreviewed_graph.number_of_edges()), ('nPosRedunCCs', len(infr.pos_redun_nids)), ('nNegRedunPairs', infr.neg_redun_metagraph.number_of_edges()), ('nInconsistentCCs', len(infr.nid_to_errors)), #('nUnkwnEdges', infr.unknown_graph.number_of_edges()), ]) if extended: def count_within_between(edges): n_within = 0 n_between = 0 for u, v in edges: nid1, nid2 = infr.pos_graph.node_labels(u, v) if nid1 == nid2: n_within += 1 else: n_between += 1 return n_within, n_between a, b = count_within_between(infr.neg_graph.edges()) status_dict['nNegEdgesWithin'] = a status_dict['nNegEdgesBetween'] = b a, b = count_within_between(infr.incomp_graph.edges()) status_dict['nIncompEdgesWithin'] = a status_dict['nIncompEdgesBetween'] = b a, b = count_within_between(infr.unreviewed_graph.edges()) status_dict['nUnrevEdgesWithin'] = a status_dict['nUrevEdgesBetween'] = b return status_dict
[docs] class AnnotInference(ub.NiceRepr, # Old internal stuffs AltConstructors, MiscHelpers, Feedback, NameRelabel, Consistency, # New annot_inference algorithm stuffs mixin_dynamic.NonDynamicUpdate, mixin_dynamic.Recovery, mixin_dynamic.DynamicUpdate, mixin_redundancy.Redundancy, mixin_priority.Priority, # General helpers mixin_invariants.AssertInvariants, mixin_helpers.DummyEdges, mixin_helpers.Convenience, mixin_helpers.AttrAccess, # Simulation and Loops mixin_simulation.SimulationHelpers, mixin_loops.InfrReviewers, mixin_loops.InfrLoops, # For matching and candidate edge callbacks mixin_callbacks.InfrCallbacks, mixin_callbacks.InfrCandidates, # Visualization mixin_viz.GraphVisualization, ): """ class for maintaining state of an identification CommandLine: python -m graphid.core.annot_inference AnnotInference python -m graphid.core.annot_inference AnnotInference --show Example: >>> from graphid.core import AnnotInference >>> import pytest >>> infr = AnnotInference() >>> print('infr = {}'.format(infr)) infr = <AnnotInference(nNodes=0, nEdges=0, nCCs=0)> >>> infr.add_aids(list(range(1, 6))) >>> print('infr = {}'.format(infr)) infr = <AnnotInference(nNodes=5, nEdges=0, nCCs=5)> >>> # Add some feedback >>> infr.params['allow_unseen_nodes'] = False >>> infr.add_feedback((1, 2), POSTV) >>> infr.add_feedback((1, 3), INCMP) >>> infr.add_feedback((1, 4), NEGTV) >>> with pytest.raises(ValueError): >>> infr.add_feedback((1, 10), NEGTV) >>> with pytest.raises(ValueError): >>> infr.add_feedback((11, 12), NEGTV) >>> print('infr = {}'.format(infr)) infr = <AnnotInference(nNodes=5, nEdges=3, nCCs=4)> >>> # xdoc: +REQUIRES(--show) >>> infr.show_graph() >>> util.show_if_requested() """ def __getstate__(self): state = self.__dict__.copy() # Dont pickle generators state['_gen'] = None state['logger'] = None return state def __init__(infr, aids=[], nids=None, autoinit=True, verbose=False): """ Ignore: pass """ # infr.verbose = verbose infr.name = None infr.verbose = verbose # setup logging infr.logger = None infr.logs = collections.deque(maxlen=10000) infr.log_index = 0 # If not dirty, new feedback should dynamically maintain a consistent # state. If dirty it means we need to recompute connected compoments # before we can continue with dynamic review. infr.dirty = False infr.readonly = False infr.aids = None infr.aids_set = None infr.orig_name_labels = None # Underlying graph structure infr.graph = None infr.review_graphs = { POSTV: None, NEGTV: None, INCMP: None, UNKWN: None, UNREV: None, } # Criterion infr.queue = util.PriorityQueue() infr.refresh = None infr.review_counter = it.count(0) infr.nid_counter = None # Dynamic Properties (requires bookkeeping) infr.nid_to_errors = {} # Recover graph holds positive edges of inconsistent PCCs infr.recover_graph = util.DynConnGraph() # Set of PCCs that are positive redundant infr.pos_redun_nids = set([]) # Represents the metagraph of negative edges between PCCs infr.neg_redun_metagraph = infr._graph_cls() # NEW VERSION: metagraph of PCCs with ANY number of negative edges # between them. The weight on the edge should represent the strength. infr.neg_metagraph = infr._graph_cls() # This should represent The feedback read from a database. We do not # need to do any updates to an external database based on this data. infr.external_feedback = ub.ddict(list) # Feedback that has not been synced with the external database. # Once we sync, this is merged into external feedback. infr.internal_feedback = ub.ddict(list) # Bookkeeping infr.edge_truth = {} infr.task_probs = ub.ddict(dict) # A generator that maintains the state of the algorithm infr._gen = None # Computer vision algorithms infr.ranker = None # the ranking algorithm (e.g. LNBNN) infr.verifier = None # the match_state classifier infr.verifiers = None # dictionary of tasks -> classifier # TODO: move to params infr.task_thresh = { 'match_state': { POSTV: np.inf, NEGTV: np.inf, INCMP: np.inf, }, 'photobomb_state': { 'pb': np.inf, 'nopb': np.inf, } } # Parameters / Configurations / Callbacks infr.callbacks = { 'request_review': None, 'review_ready': None, 'review_finished': None, } infr.params = { # If False, adding edges with non-existant nodes will error # Otherwise it will silently add them. 'allow_unseen_nodes': True, 'manual.n_peek': 1, 'ranking.enabled': True, 'ranking.ntop': 5, 'algo.max_outer_loops': None, 'algo.quickstart': False, 'algo.hardcase': False, # Dynamic Inference 'inference.enabled': True, 'inference.update_attrs': True, # Termination / Refresh 'refresh.window': 20, 'refresh.patience': 72, 'refresh.thresh': 0.052, 'refresh.method': 'binomial', # Redundancy # if redun.enabled is True, then redundant edges will be ignored by # # the priority queue and extra edges needed to achieve minimum # redundancy will be searched for if the queue is empty. 'redun.enabled': True, # positive/negative k 'redun.pos': 2, 'redun.neg': 2, # does positive/negative augmentation 'redun.enforce_pos': True, 'redun.enforce_neg': True, # prevents user interaction in final phase 'redun.neg.only_auto': True, # Only review CCs connected by confidence less than this value # a good values is 'pretty_sure' 'queue.conf.thresh': None, # Autoreviewer params 'autoreview.enabled': True, 'autoreview.prioritize_nonpos': True, } infr._viz_image_config = { 'in_image': False, 'thumbsize': 221, } infr.verifier_params = {} # TODO infr.ranker_params = {} # Developer modes (consoldate this) infr.test_mode = False infr.simulation_mode = False # set to the current phase of the main loop # (mostly for testing) infr.loop_phase = None # Testing state infr.metrics_list = None infr.test_state = None infr.test_gt_pos_graph = None infr.nid_to_gt_cc = None infr.node_truth = None infr.real_n_pcc_mst_edges = None # External: Can we remove these? infr.cm_list = None infr.vsone_matches = {} infr.qreq_ = None infr.manual_wgt = None infr.print('__init__', level=1) infr.add_aids(aids, nids) if autoinit: infr.initialize_graph() if isinstance(autoinit, str): raise Exception('Cannot autoinit this way anymore') def __nice__(infr): """ for ub.NiceRepr """ if infr.graph is None: return 'nAids=%r, G=None' % (len(infr.aids)) else: fmt = 'nNodes={}, nEdges={}, nCCs={}' msg = fmt.format( len(infr.aids), infr.graph.number_of_edges(), infr.pos_graph.number_of_components(), # infr.incomp_graph.number_of_edges(), # infr.unreviewed_graph.number_of_edges(), ) return msg
[docs] def subparams(infr, prefix): """ Returns dict of params prefixed with <prefix>. The returned dict does not contain the prefix Example: >>> infr = AnnotInference() >>> result = ub.urepr(infr.subparams('refresh'), nl=0, precision=1, sort=1) >>> print(result) {'method': 'binomial', 'patience': 72, 'thresh': 0.1, 'window': 20} """ prefix_ = prefix + '.' subparams = {k[len(prefix_):]: v for k, v in infr.params.items() if k.startswith(prefix_)} return subparams
[docs] def copy(infr): infr2 = AnnotInference( copy.deepcopy(infr.aids), copy.deepcopy(infr.orig_name_labels), autoinit=False, verbose=infr.verbose) # shallow algorithm classes infr2.verifiers = infr.verifiers infr2.ranker = infr.ranker infr2.graph = infr.graph.copy() infr2.external_feedback = copy.deepcopy(infr.external_feedback) infr2.internal_feedback = copy.deepcopy(infr.internal_feedback) infr2.cm_list = copy.deepcopy(infr.cm_list) infr2.qreq_ = copy.deepcopy(infr.qreq_) infr2.nid_counter = infr.nid_counter infr2.recover_graph = copy.deepcopy(infr.recover_graph) infr2.pos_redun_nids = copy.deepcopy(infr.pos_redun_nids) infr2.neg_redun_metagraph = copy.deepcopy(infr.neg_redun_metagraph) infr2.neg_metagraph = copy.deepcopy(infr.neg_metagraph) infr2.review_graphs = copy.deepcopy(infr.review_graphs) infr2.nid_to_errors = copy.deepcopy(infr.nid_to_errors) infr2.readonly = infr.readonly infr2.dirty = infr.dirty infr2.test_mode = infr.test_mode infr2.test_mode = infr.test_mode infr2.simulation_mode = infr.simulation_mode infr.queue = copy.deepcopy(infr.queue) infr.params = copy.deepcopy(infr.params) infr2._viz_image_config = infr._viz_image_config.copy() if infr.test_mode: infr2.test_state = copy.deepcopy(infr.test_state) infr2.metrics_list = copy.deepcopy(infr.metrics_list) return infr2
[docs] def subgraph(infr, aids): """ Makes a new inference object that is a subset of the original. Note, this is not robust, be careful. The subgraph should be treated as read only. Do not commit any reviews made from here. """ orig_name_labels = list(infr.gen_node_values('orig_name_label', aids)) infr2 = AnnotInference(aids, orig_name_labels, autoinit=False, verbose=infr.verbose) # deep copy the graph structure infr2.graph = infr.graph.subgraph(aids).copy() infr2.readonly = True infr2.verifiers = infr.verifiers infr2.ranker = infr.ranker infr.params = copy.deepcopy(infr.params) infr2._viz_image_config = infr._viz_image_config.copy() # infr2._viz_init_nodes = infr._viz_image_config # infr2._viz_image_config_dirty = infr._viz_image_config_dirty infr2.edge_truth = { e: infr.edge_truth[e] for e in infr2.graph.edges() if e in infr.edge_truth } # TODO: internal/external feedback infr2.nid_counter = infr.nid_counter infr2.dirty = True infr2.cm_list = None infr2.qreq_ = None # TODO: # infr2.nid_to_errors {} # = copy.deepcopy(infr.nid_to_errors) # infr2.recover_graph = copy.deepcopy(infr.recover_graph) # infr2.pos_redun_nids = copy.deepcopy(infr.pos_redun_nids) # infr2.neg_redun_metagraph = copy.deepcopy(infr.neg_redun_metagraph) infr2.review_graphs = {} for k, g in infr.review_graphs.items(): if g is None: infr2.review_graphs[k] = None elif k == POSTV: infr2.review_graphs[k] = g.subgraph(aids, dynamic=True) else: infr2.review_graphs[k] = g.subgraph(aids) return infr2
[docs] def set_config(infr, config, **kw): pass
if __name__ == '__main__': """ CommandLine: python -m graphid.core.annot_inference all """ import xdoctest xdoctest.doctest_module(__file__)