Source code for graphid.demo.dummy_algos

import itertools as it
import numpy as np
import pandas as pd
import ubelt as ub
import networkx as nx  # NOQA
from graphid.core.state import POSTV, NEGTV, INCMP, UNREV  # NOQA
from graphid.core.state import SAME, DIFF, NULL  # NOQA
from graphid import util
# from numpy.core.umath_tests import matrix_multiply  # NOQA


[docs] class DummyRanker(object): """ Generates dummy rankings """ def __init__(ranker, verif): ranker.verif = verif
[docs] def predict_single_ranking(ranker, u, K=10): """ simulates the ranking algorithm. Order is defined using the dummy vsone scores, but tests are only applied to randomly selected gt and gf pairs. So, you usually will get a gt result, but you might not if all the scores are bad. """ verif = ranker.verif infr = verif.infr nid = verif.orig_labels[u] others = verif.orig_groups[nid] others_gt = sorted(others - {u}) others_gf = sorted(verif.orig_nodes - others) # rng = np.random.RandomState(u + 4110499444 + len(others)) rng = verif.rng vs_list = [] k_gt = min(len(others_gt), max(1, K // 2)) k_gf = min(len(others_gf), max(1, K * 4)) if k_gt > 0: gt = rng.choice(others_gt, k_gt, replace=False) vs_list.append(gt) if k_gf > 0: gf = rng.choice(others_gf, k_gf, replace=False) vs_list.append(gf) u_edges = [infr.e_(u, v) for v in it.chain.from_iterable(vs_list)] u_probs = np.array(verif.predict_edges(u_edges)) # infr.set_edge_attrs('prob_match', ub.dzip(u_edges, u_probs)) # Need to determenistically sort here # sortx = np.argsort(u_probs)[::-1][0:K] sortx = np.argsort(u_probs)[::-1][0:K] ranked_edges = list(ub.take(u_edges, sortx)) ranked_nodes = [edge[0] if u != edge[0] else edge[1] for edge in ranked_edges] # assert len(ranked_edges) == K return ranked_nodes
[docs] def predict_rankings(ranker, nodes, K=10): """ Yields a list ranked edges connected to each node. """ for u in nodes: yield ranker.predict_single_ranking(u, K=K)
[docs] def predict_candidate_edges(ranker, nodes, K=10): """ CommandLine: python -m graphid.demo.dummy_algos DummyRanker.predict_candidate_edges Example: >>> from graphid import demo >>> kwargs = dict(num_pccs=40, size=2) >>> infr = demo.demodata_infr(**kwargs) >>> edges = list(infr.ranker.predict_candidate_edges(infr.aids, K=100)) >>> scores = np.array(infr.verifier.predict_edges(edges)) >>> assert len(edges) > 0 """ new_edges = [] for u, ranks in zip(nodes, ranker.predict_rankings(nodes, K=K)): new_edges.extend([util.e_(u, v) for v in ranks]) new_edges = set(new_edges) return new_edges
[docs] class DummyVerif(object): """ Generates dummy scores between pairs of annotations. (not necesarilly existing edges in the graph) CommandLine: python -m graphid.demo DummyVerif:1 Example: >>> from graphid.demo import * # NOQA >>> from graphid import demo >>> kwargs = dict(num_pccs=6, p_incon=.5, size_std=2) >>> infr = demo.demodata_infr(**kwargs) >>> infr.dummy_verif.predict_edges([(1, 2)]) >>> infr.dummy_verif.predict_edges([(1, 21)]) >>> assert len(infr.dummy_verif.infr.task_probs['match_state']) == 2 """ def __init__(verif, infr): verif.rng = np.random.RandomState(4033913) verif.dummy_params = { NEGTV: {'mean': .2, 'std': .25}, POSTV: {'mean': .85, 'std': .2}, INCMP: {'mean': .15, 'std': .1}, } verif.infr = infr verif.orig_nodes = set(infr.aids) verif.orig_labels = infr.get_node_attrs('orig_name_label') verif.orig_groups = ub.invert_dict(verif.orig_labels, False) verif.orig_groups = ub.map_vals(set, verif.orig_groups)
[docs] def predict_proba_df(verif, edges): """ CommandLine: python -m graphid.demo DummyVerif.predict_edges Example: >>> from graphid import demo >>> kwargs = dict(num_pccs=40, size=2) >>> infr = demo.demodata_infr(**kwargs) >>> verif = infr.dummy_verif >>> edges = list(infr.graph.edges()) >>> probs = verif.predict_proba_df(edges) """ infr = verif.infr edges = list(it.starmap(verif.infr.e_, edges)) prob_cache = infr.task_probs['match_state'] is_miss = np.array([e not in prob_cache for e in edges]) # is_hit = ~is_miss if np.any(is_miss): miss_edges = list(ub.compress(edges, is_miss)) miss_truths = [verif._get_truth(edge) for edge in miss_edges] grouped_edges = ub.group_items(miss_edges, miss_truths) # Need to make this determenistic too states = [POSTV, NEGTV, INCMP] for key in sorted(grouped_edges.keys()): group = grouped_edges[key] probs0 = util.randn(shape=[len(group)], rng=verif.rng, a_max=1, a_min=0, **verif.dummy_params[key]) # Just randomly assign other probs probs1 = verif.rng.rand(len(group)) * (1 - probs0) probs2 = 1 - (probs0 + probs1) for edge, probs in zip(group, zip(probs0, probs1, probs2)): prob_cache[edge] = ub.dzip(states, probs) probs = pd.DataFrame( list(ub.take(prob_cache, edges)), index=util.ensure_multi_index(edges, ('aid1', 'aid2')) ) return probs
[docs] def predict_edges(verif, edges): pos_scores = verif.predict_proba_df(edges)[POSTV] return pos_scores
[docs] def show_score_probs(verif): """ CommandLine: python -m graphid.demo.dummy_algos DummyVerif.show_score_probs --show Example: >>> from graphid import core >>> from graphid import demo >>> infr = core.AnnotInference() >>> verif = demo.DummyVerif(infr) >>> verif.show_score_probs() >>> util.show_if_requested() """ import matplotlib.pyplot as plt n = 100000 for key in verif.dummy_params.keys(): probs = util.randn(shape=[n], rng=verif.rng, a_max=1, a_min=0, **verif.dummy_params[key]) color = verif.infr._get_truth_colors()[key] plt.hist(probs, bins=100, label=key, alpha=.8, color=color) plt.legend()
[docs] def _get_truth(verif, edge): infr = verif.infr if edge in infr.edge_truth: return infr.edge_truth[edge] node_dict = infr.graph.nodes nid1 = node_dict[edge[0]]['orig_name_label'] nid2 = node_dict[edge[1]]['orig_name_label'] return POSTV if nid1 == nid2 else NEGTV
if __name__ == '__main__': """ CommandLine: python -m graphid.demo.dummy_algos all """ import xdoctest xdoctest.doctest_module(__file__)