Source code for fafbseg.move.merge

#    A collection of tools to interface with manually traced and autosegmented
#    data in FAFB.
#
#    Copyright (C) 2019 Philipp Schlegel
#
#    This program is free software: you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation, either version 3 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.

import numpy as np
import pymaid
import navis
import random

from tqdm import tqdm

from .. import utils
from ..google import find_fragments

from .merge_utils import collapse_nodes
from .interfaces import confirm_overlap

import inquirer
from inquirer.themes import GreenPassion

# This is to prevent FutureWarning from numpy (via vispy)
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

use_pbars = utils.use_pbars

__all__ = ['merge_into_catmaid']


[docs]@utils.never_cache def merge_into_catmaid(x, target_instance, tag, min_node_overlap=4, min_overlap_size=1, merge_limit=1, min_upload_size=0, min_upload_nodes=1, update_radii=True, import_tags=False, label_joins=True, sid_from_nodes=True, mesh=None): """Merge neuron into target CATMAID instance. This function will attempt to: 1. Find fragments in ``target_instance`` that overlap with ``x`` using whatever segmentation data source you have set using ``fafbseg.use_...``. 2. Generate a union of these fragments and ``x``. 3. Make a differential upload of the union leaving existing nodes untouched. 4. Join uploaded and existing tracings into a single continuous neuron. This will also upload connectors but no node tags. Parameters ---------- x : pymaid.CatmaidNeuron/List | navis.TreeNeuron/List Neuron(s)/fragment(s) to commit to ``target_instance``. target_instance : pymaid.CatmaidInstance Target Catmaid instance to commit the neuron to. tag : str A tag to be added as part of a ``{URL} upload {tag}`` annotation. This should be something identifying your group - e.g. ``tag='WTCam'`` for the Cambridge Wellcome Trust group. min_node_overlap : int, optional Minimal overlap between `x` and a potentially overlapping neuron in ``target_instance``. If the fragment has less total nodes than `min_overlap`, the threshold will be lowered to: ``min_overlap = min(min_overlap, fragment.n_nodes)`` min_overlap_size : int, optional Minimum node count for potentially overlapping neurons in ``target_instance``. Use this to e.g. exclude single-node synapse orphans. merge_limit : int, optional Distance threshold [um] for collapsing nodes of ``x`` into overlapping fragments in target instance. Decreasing this will help if your neuron has complicated branching patterns (e.g. uPN dendrites) at the cost of potentially creating duplicate parallel tracings in the neuron's backbone. min_upload_size : float, optional Minimum size in microns for upload of new branches: branches found in ``x`` but not in the overlapping neuron(s) in ``target_instance`` are uploaded in fragments. Use this parameter to exclude small branches that might not be worth the additional review time. min_upload_nodes : int, optional As ``min_upload_size`` but for number of nodes instead of cable length. update_radii : bool, optional If True, will use radii in ``x`` to update radii of overlapping fragments if (and only if) the nodes do not currently have a radius (i.e. radius<=0). import_tags : bool, optional If True, will import node tags. Please note that this will NOT import tags of nodes that have been collapsed into manual tracings. label_joins : bool, optional If True, will label nodes at which old and new tracings have been joined with tags ("Joined from ..." and "Joined with ...") and with a lower confidence of 1. sid_from_nodes : bool, optional If True and the to-be-merged neuron has a "skeleton_id" column it will be used to set the ``source_id`` upon uploading new branches. This is relevant if your neuron is a virtual chimera of several neurons: in order to preserve provenance (i.e. correctly associating each node with a ``source_id`` origin). mesh : Volume | MeshNeuron | mesh-like object | list thereof Mesh representation of ``x``. If provided, will use to improve merging. If ``x`` is a list of neurons, must provide a mesh for each of them. Returns ------- Nothing If all went well. dict If something failed, returns server responses with error logs. Examples -------- Setup >>> import fafbseg >>> import pymaid >>> # Set up connections to manual and autoseg CATMAID >>> manual = pymaid.CatmaidInstance('URL', 'HTTP_USER', 'HTTP_PW', 'API_TOKEN') >>> auto = pymaid.CatmaidInstance('URL', 'HTTP_USER', 'HTTP_PW', 'API_TOKEN') >>> # Set a segmentation data source >>> fafbseg.use_google_storage("https://storage.googleapis.com/fafb-ffn1-20190805/segmentation") Merge a neuron from autoseg into v14 >>> # Fetch the autoseg neuron to transfer to v14 >>> x = pymaid.get_neuron(267355161, remote_instance=auto) >>> # Get the neuron's annotations so that they can be merged too >>> x.get_annotations(remote_instance=auto) >>> # Start the commit >>> # See online documentation for video of merge process >>> resp = fafbseg.merge_neuron(x, target_instance=manual) """ if not isinstance(x, navis.NeuronList): if not isinstance(x, navis.TreeNeuron): raise TypeError('Expected TreeNeuron/List, got "{}"'.format(type(x))) x = navis.NeuronList(x) if not isinstance(mesh, (np.ndarray, list)): if isinstance(mesh, type(None)): mesh = [mesh] * len(x) else: mesh = [mesh] if len(mesh) != len(x): raise ValueError(f'Got {len(mesh)} meshes for {len(x)} neurons.') # Make a copy - in case we make any changes to the neurons # (like changing duplicate skeleton IDs) x = x.copy() if not isinstance(tag, (str, type(None))): raise TypeError('Tag must be string, got "{}"'.format(type(tag))) # Check user permissions perm = target_instance.fetch(target_instance.make_url('permissions')) requ_perm = ['can_annotate', 'can_annotate_with_token', 'can_import'] miss_perm = [p for p in requ_perm if target_instance.project_id not in perm[0].get(p, [])] if miss_perm: msg = 'You lack permissions: {}. Please contact an administrator.' raise PermissionError(msg.format(', '.join(miss_perm))) pymaid.set_loggers('WARNING') # Throttle requests just to play it safe # On a bad connection one might have to decrease max_threads further target_instance.max_threads = min(target_instance.max_threads, 50) # For user convenience, we will do all the stuff that needs user # interaction first and then run the automatic merge: # Start by find all overlapping fragments overlapping = [] for n, m in tqdm(zip(x, mesh), desc='Pre-processing neuron(s)', leave=False, disable=not use_pbars, total=len(x)): ol = find_fragments(n, min_node_overlap=min_node_overlap, min_nodes=min_overlap_size, mesh=m, remote_instance=target_instance) if ol: # Add number of samplers to each neuron n_samplers = pymaid.get_sampler_counts(ol, remote_instance=target_instance) for nn in ol: nn.sampler_count = n_samplers[str(nn.id)] overlapping.append(ol) # Now have the user confirm merges before we actually make them viewer = navis.Viewer(title='Confirm merges') viewer.clear() overlap_cnf = [] base_neurons = [] try: for n, ol in zip(x, overlapping): # This asks user a bunch of questions prior to merge and upload ol, bn = confirm_overlap(n, ol, viewer=viewer) overlap_cnf.append(ol) base_neurons.append(bn) except BaseException: raise finally: viewer.close() for i, (n, ol, bn, m) in enumerate(zip(x, overlap_cnf, base_neurons, mesh)): print(f'Processing neuron "{n.name}" ({n.id}) [{i}/{len(x)}]', flush=True) # If no overlapping neurons proceed with just uploading. if not ol: print('No overlapping fragments found. Uploading without merging...', end='', flush=True) resp = pymaid.upload_neuron(n, import_tags=import_tags, import_annotations=True, import_connectors=True, remote_instance=target_instance) if 'error' in resp: return resp # Add annotations _ = __merge_annotations(n, resp['skeleton_id'], tag, target_instance) msg = '\nNeuron "{}" successfully uploaded to target instance as "{}" #{}' print(msg.format(n.name, n.name, resp['skeleton_id']), flush=True) continue # Check if there is a duplicate skeleton ID between the to-be-merged # neuron and the to-merge-into neurons original_skid = None if n.id in ol.id: print('Fixing duplicate skeleton IDs.', flush=True) # Keep track of old skid original_skid = n.id # Skeleton ID must stay convertable to integer n.id = str(random.randint(1, 1000000)) n._clear_temp_attr() # Check if there are any duplicate node IDs between neuron ``x`` and the # overlapping fragments and create new IDs for ``x`` if necessary duplicated = n.nodes[n.nodes.node_id.isin(ol.nodes.node_id.values)] if not duplicated.empty: print('Duplicate node IDs found. Regenerating node tables... ', end='', flush=True) max_ix = max(ol.nodes.node_id.max(), n.nodes.node_id.max()) + 1 new_ids = range(max_ix, max_ix + duplicated.shape[0]) id_map = {old: new for old, new in zip(duplicated.node_id, new_ids)} n.nodes['node_id'] = n.nodes.node_id.map(lambda n: id_map.get(n, n)) n.nodes['parent_id'] = n.nodes.parent_id.map(lambda n: id_map.get(n, n)) if n.has_connectors: n.connectors['node_id'] = n.connectors.node_id.map(lambda n: id_map.get(n, n)) n._clear_temp_attr() print('Done.', flush=True) # Combining the fragments into a single neuron is actually non-trivial: # 1. Collapse nodes of our input neuron `x` into within-distance nodes # in the overlapping fragments (never the other way around!) # 2. At the same time keep connectivity (i.e. edges) of the input-neuron # 3. Keep track of the nodes' provenance (i.e. the contractions) # # In addition there are a lot of edge-cases to consider. For example: # - multiple nodes collapsing onto the same node # - nodes of overlapping fragments that are close enough to be collapsed # (e.g. orphan synapse nodes) # Keep track of original skeleton IDs for a in ol + n: # Original skeleton of each node a.nodes['origin_skeletons'] = a.id if a.has_connectors: # Original skeleton of each connector a.connectors['origin_skeletons'] = a.id print('Generating union of all fragments... ', end='', flush=True) union, new_edges, collapsed_into = collapse_nodes(n, ol, limit=merge_limit, base_neuron=bn, mesh=m) print('Done.', flush=True) print('Extracting new nodes to upload... ', end='', flush=True) # Now we have to break the neuron into "new" fragments that we can upload # First get the new and old nodes new_nodes = union.nodes[union.nodes.origin_skeletons == n.id].node_id.values old_nodes = union.nodes[union.nodes.origin_skeletons != n.id].node_id.values # Now remove the already existing nodes from the union only_new = navis.subset_neuron(union, new_nodes) # And then break into continuous fragments for upload frags = navis.break_fragments(only_new) print('Done.', flush=True) # Also get the new edges we need to generate to_stitch = new_edges[~new_edges.parent_id.isnull()] # We need this later -> no need to compute this for every uploaded fragment cond1b = to_stitch.node_id.isin(old_nodes) cond2b = to_stitch.parent_id.isin(old_nodes) # Now upload each fragment and keep track of new node IDs tn_map = {} for f in tqdm(frags, desc='Merging new arbors', leave=False, disable=not use_pbars): # In cases of complete merging into existing neurons, the fragment # will have no nodes if f.n_nodes < 1: continue # Check if fragment is a "linker" and as such can not be skipped lcond1 = np.isin(f.nodes.node_id.values, new_edges.node_id.values) lcond2 = np.isin(f.nodes.node_id.values, new_edges.parent_id.values) # If not linker, check skip conditions if sum(lcond1) + sum(lcond2) <= 1: if f.cable_length < min_upload_size: continue if f.n_nodes < min_upload_nodes: continue # Collect origin info for this neuron if it's a CatmaidNeuron if isinstance(n, pymaid.CatmaidNeuron): source_info = {'source_type': 'segmentation'} if not sid_from_nodes or 'origin_skeletons' not in f.nodes.columns: # If we had to change the skeleton ID due to duplication, make # sure to pass the original skid as source ID if original_skid: source_info['source_id'] = int(original_skid) else: source_info['source_id'] = int(n.id) else: if f.nodes.origin_skeletons.unique().shape[0] == 1: skid = f.nodes.origin_skeletons.unique()[0] else: print('Warning: uploading chimera fragment with multiple ' 'skeleton IDs! Using largest contributor ID.') # Use the skeleton ID that has the most nodes by_skid = f.nodes.groupby('origin_skeletons').x.count() skid = by_skid.sort_values(ascending=False).index.values[0] source_info['source_id'] = int(skid) if not isinstance(getattr(n, '_remote_instance', None), type(None)): source_info['source_project_id'] = n._remote_instance.project_id source_info['source_url'] = n._remote_instance.server else: # Unknown source source_info = {} resp = pymaid.upload_neuron(f, import_tags=import_tags, import_annotations=False, import_connectors=True, remote_instance=target_instance, **source_info) # Stop if there was any error while uploading if 'error' in resp: return resp # Collect old -> new node IDs tn_map.update(resp['node_id_map']) # Now check if we can create any of the new edges by joining nodes # Both treenode and parent ID have to be either existing nodes or # newly uploaded cond1a = to_stitch.node_id.isin(tn_map) cond2a = to_stitch.parent_id.isin(tn_map) to_gen = to_stitch.loc[(cond1a | cond1b) & (cond2a | cond2b)] # Join nodes for node in to_gen.itertuples(): # Make sure our base_neuron always come out as winner on top if node.node_id in bn.nodes.node_id.values: winner, looser = node.node_id, node.parent_id else: winner, looser = node.parent_id, node.node_id # We need to map winner and looser to the new node IDs winner = tn_map.get(winner, winner) looser = tn_map.get(looser, looser) # And now do the join resp = pymaid.join_nodes(winner, looser, no_prompt=True, tag_nodes=label_joins, remote_instance=target_instance) # See if there was any error while uploading if 'error' in resp: print('Skipping joining nodes ' '{} and {}: {} - '.format(node.node_id, node.parent_id, resp['error'])) # Skip changing confidences continue # Pop this edge from new_edges and from condition new_edges.drop(node.Index, inplace=True) cond1b.drop(node.Index, inplace=True) cond2b.drop(node.Index, inplace=True) # Change node confidences at new join if label_joins: new_conf = {looser: 1} resp = pymaid.update_node_confidence(new_conf, remote_instance=target_instance) # Add annotations if n.has_annotations: _ = __merge_annotations(n, bn, tag, target_instance) # Update node radii if update_radii and 'radius' in n.nodes.columns and np.all(n.nodes.radius): print('Updating radii of existing nodes... ', end='', flush=True) resp = update_node_radii(source=n, target=ol, remote_instance=target_instance, limit=merge_limit, skip_existing=True) print('Done.', flush=True) print('Neuron "{}" successfully merged into target instance as "{}" #{}'.format(n.name, bn.name, bn.id), flush=True) return
def __merge_annotations(n, bn, tag, target_instance): """Make sure proper annotations are added.""" to_add = [] # Add "{URL} upload {tag} annotation" if not isinstance(getattr(n, '_remote_instance', None), type(None)): u = n._remote_instance.server.split('/')[-1] + ' upload' if isinstance(tag, str): u += " {}".format(tag) to_add.append(u) # Existing annotation (the individual fragments would not have inherited them) if n.__dict__.get('annotations', []): to_add += n.annotations # If anything to add if to_add: _ = pymaid.add_annotations(bn, to_add, remote_instance=target_instance) def update_node_radii(source, target, remote_instance, limit=2, skip_existing=True): """Update node radii in target neuron from their nearest neighbor in source neuron. Parameters ---------- source : CatmaidNeuron Neuron which node radii to use to update target neuron. target : CatmaidNeuron Neuron which node radii to update. remote_instance : CatmaidInstance Catmaid instance in which ``target`` lives. limit : int, optional Max distance [um] between source and target neurons for nearest neighbor search. skip_existing : bool, optional If True, will skip nodes in ``source`` that already have a radius >0. Returns ------- dict Server response. """ if not isinstance(source, (navis.TreeNeuron, navis.NeuronList)): raise TypeError('Expected navis.TreeNeuron, pymaid.CatmaidNeuron ' 'or NeuronList, got "{}"'.format(type(source))) if not isinstance(target, (navis.TreeNeuron, navis.NeuronList)): raise TypeError('Expected navis.TreeNeuron, pymaid.CatmaidNeuron ' 'or NeuronList, got "{}"'.format(type(target))) # Turn limit from microns to nanometres limit *= 1000 # First find the closest neighbor within distance limit for each node in target # Find nodes in A to be merged into B tree = navis.neuron2KDTree(source, tree_type='c', data='nodes') nodes = target.nodes if skip_existing: # Extract nodes without a radius nodes = nodes[nodes.radius <= 0] # For each node in A get the nearest neighbor in B coords = nodes[['x', 'y', 'z']].values nn_dist, nn_ix = tree.query(coords, k=1, distance_upper_bound=limit) # Find nodes that are close enough to collapse tn_ids = nodes.loc[nn_dist <= limit].node_id.values new_radii = source.nodes.iloc[nn_ix[nn_dist <= limit]].radius.values return pymaid.update_radii(dict(zip(tn_ids, new_radii)), remote_instance=remote_instance)