Source code for fafbseg.flywire.synapses

#    A collection of tools to interface with manually traced and autosegmented
#    data in FAFB.
#
#    Copyright (C) 2019 Philipp Schlegel
#
#    This program is free software: you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation, either version 3 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.

import navis

import datetime as dt
import numpy as np
import pandas as pd

from functools import partial
from tqdm.auto import trange

from .segmentation import get_lineage_graph, roots_to_supervoxels
from .utils import (
    parse_root_ids,
    get_cave_client,
    retry,
    get_synapse_areas,
    find_mat_version,
    inject_dataset,
)
from .annotations import is_proofread, parse_neuroncriteria

from ..utils import make_iterable
from ..synapses.utils import catmaid_table
from ..synapses.transmitters import collapse_nt_predictions

__all__ = [
    "get_synapses",
    "get_connectivity",
    "get_transmitter_predictions",
    "get_adjacency",
    "get_synapse_counts",
]


[docs] @parse_neuroncriteria() @inject_dataset(disallowed=["flat_630", "flat_571"]) def get_synapse_counts( x, by_neuropil=False, materialization="auto", filtered=True, min_score=None, batch_size=10, *, dataset=None, **kwargs, ): """Fetch synapse counts for given root IDs. Parameters ---------- x : int | list of int | Neuron/List | NeuronCriteria Either a FlyWire segment ID (i.e. root ID), a list thereof or a Neuron/List. For neurons, the ``.id`` is assumed to be the root ID. If you have a neuron (in FlyWire space) but don't know its ID, use :func:`fafbseg.flywire.neuron_to_segments` first. by_neuropil : bool If True, returned DataFrame will contain a break down by neuropil. materialization : int | str, optional Which materialization to query: - 'auto' (default) tries to find the most recent materialization version at which all the query IDs existed - 'latest' uses the latest materialized table - 'live' queries against the live data - this will be much slower! - pass an integer (e.g. `447`) to use a specific materialization version filtered : bool Whether to use the filtered synapse table. Briefly, this filter removes redundant and low confidence (<= 50 cleft score) synapses. See also https://tinyurl.com/4j9v7t86 (links to CAVE website). min_score : int Minimum "cleft score". Buhmann et al. used a threshold of 30 in their paper. However, for FlyWire analyses that threshold was raised to 50 (see also `filtered`). batch_size : int Number of IDs to query per batch. Too large batches might lead to truncated tables: currently individual queries can not return more than 200_000 rows and you will see a warning if that limit is exceeded. dataset : "public" | "production" | "sandbox", optional Against which FlyWire dataset to query. If ``None`` will fall back to the default dataset (see :func:`~fafbseg.flywire.set_default_dataset`). **kwargs Keyword arguments are passed through to :func:`fafbseg.flywire.get_synapses`. Returns ------- pandas.DataFrame If ``by_neuropil=False`` returns counts indexed by root ID. If ``by_neuropil=True`` returns counts indexed by root ID and neuropil. See Also -------- :func:`~fafbseg.flywire.get_synapses` Use this function to fetch the actual synapses. Examples -------- Get synapse counts for a given root ID: >>> from fafbseg import flywire >>> n_syn = flywire.get_synapse_counts(720575940603231916) Using materialization version 783. >>> n_syn pre post id 720575940603231916 4571 887 """ # Parse root IDs ids = parse_root_ids(x).astype(np.int64) # First get the synapses syn = get_synapses( ids, pre=True, post=True, attach=False, min_score=min_score, transmitters=True, materialization=materialization, neuropils=by_neuropil, filtered=filtered, batch_size=batch_size, dataset=dataset, **kwargs, ) pre = syn[syn.pre.isin(ids)] post = syn[syn.post.isin(ids)] if not by_neuropil: counts = pd.DataFrame() counts["id"] = ids counts["pre"] = pre.value_counts("pre").reindex(ids).fillna(0).values counts["post"] = post.value_counts("post").reindex(ids).fillna(0).values counts.set_index("id", inplace=True) else: pre_grp = pre.groupby(["pre", "neuropil"]).size() pre_grp = pre_grp[pre_grp > 0] pre_grp.index.set_names(["id", "neuropil"], inplace=True) post_grp = post.groupby(["post", "neuropil"]).size() post_grp = post_grp[post_grp > 0] post_grp.index.set_names(["id", "neuropil"], inplace=True) neuropils = np.unique( np.append( pre_grp.index.get_level_values(1), post_grp.index.get_level_values(1) ) ) index = pd.MultiIndex.from_product([ids, neuropils], names=["id", "neuropil"]) counts = pd.concat( [pre_grp.reindex(index).fillna(0), post_grp.reindex(index).fillna(0)], axis=1, ) counts.columns = ["pre", "post"] counts = counts[counts.max(axis=1) > 0] return counts
[docs] @parse_neuroncriteria() @inject_dataset(disallowed=["flat_630", "flat_571"]) def get_transmitter_predictions( x, single_pred=False, weighted=True, materialization="auto", filtered=True, neuropils=None, batch_size=10, *, dataset=None, **kwargs, ): """Fetch neurotransmitter predictions for neurons. Based on Eckstein et al. (2020). The per-synapse predictions are collapsed into per-neuron prediction by calculating the average confidence for each neurotransmitter across all synapses weighted by the "cleft score". Bottom line: higher confidence synapses have more weight than low confidence synapses. Parameters ---------- x : int | list of int | Neuron/List | NeuronCriteria Either a FlyWire segment ID (i.e. root ID), a list thereof or a Neuron/List. For neurons, the ``.id`` is assumed to be the root ID. If you have a neuron (in FlyWire space) but don't know its ID, use :func:`fafbseg.flywire.neuron_to_segments` first. single_pred : bool Whether to only return the highest probability transmitter for each neuron. weighted : bool If True, will weight predictions based on confidence: higher cleft score = more weight. materialization : int | str, optional Which materialization to query: - 'auto' (default) tries to find the most recent materialization version at which all the query IDs existed - 'latest' uses the latest materialized table - 'live' queries against the live data - this will be much slower! - pass an integer (e.g. `447`) to use a specific materialization version filtered : bool Whether to use the filtered synapse table. Briefly, this filter removes redundant and low confidence (<= 50 cleft score) synapses. See also https://tinyurl.com/4j9v7t86 (links to CAVE website). neuropils : str | list of str, optional Provide neuropil (e.g. ``'AL_R'``) or list thereof (e.g. ``['AL_R', 'AL_L']``) to filter predictions to these ROIs. Prefix neuropil with a tilde (e.g. ``~AL_R``) to exclude it. batch_size : int Number of IDs to query per batch. Too large batches might lead to truncated tables: currently individual queries can not return more than 200_000 rows and you will see a warning if that limit is exceeded. dataset : "public" | "production" | "sandbox", optional Against which FlyWire dataset to query. If ``None`` will fall back to the default dataset (see :func:`~fafbseg.flywire.set_default_dataset`). **kwargs Keyword arguments are passed through to :func:`fafbseg.flywire.get_synapses`. Returns ------- pandas.DataFrame If `single_pred=False`: returns a dataframe with all per-transmitter confidences for each query neuron. dict If `single_pred=True`: returns dictionary with `(top_transmitter, confidence)` named tuple for each query neuron. Examples -------- >>> from fafbseg import flywire Get per-transmitter predictions for a single neuron: >>> flywire.get_transmitter_predictions(720575940603231916) Using materialization version 783. root_id 720575940603231916 gaba 0.011677 acetylcholine 0.938961 glutamate 0.017902 octopamine 0.012861 serotonin 0.012467 dopamine 0.006132 Return only the most likely transmitter: >>> flywire.get_transmitter_predictions(720575940603231916, single_pred=True) Using materialization version 783. {720575940603231916: prediction(transmitter='acetylcholine', probability=0.9389612897479809)} """ # First get the synapses syn = get_synapses( x, pre=True, post=False, attach=False, min_score=None, transmitters=True, materialization=materialization, neuropils=neuropils is not None, filtered=filtered, batch_size=batch_size, dataset=dataset, **kwargs, ) if not isinstance(neuropils, type(None)): neuropils = make_iterable(neuropils) filter_in = [n for n in neuropils if not n.startswith("~")] filter_out = [n[1:] for n in neuropils if n.startswith("~")] if filter_in: syn = syn[syn.neuropil.isin(filter_in)] if filter_out: syn = syn[~syn.neuropil.isin(filter_out)] # Avoid setting-on-copy warning if syn._is_view: syn = syn.copy() # Process the predictions pred = collapse_nt_predictions( syn, single_pred=single_pred, weighted=weighted, id_col="pre" ) if not single_pred: pred.columns.name = "root_id" return pred.reindex(make_iterable(x).astype(np.int64), axis=1) else: return pred
[docs] @parse_neuroncriteria() @inject_dataset(disallowed=["flat_630", "flat_571"]) def get_synapses( x, pre=True, post=True, attach=True, filtered=True, min_score=None, transmitters=False, neuropils=False, clean=True, materialization="auto", batch_size=10, *, dataset=None, progress=True, ): """Fetch Buhmann et al. (2019) synapses for given neuron(s). Parameters ---------- x : int | list of int | Neuron/List | NeuronCriteria Either a FlyWire segment ID (i.e. root ID), a list thereof or a Neuron/List. For neurons, the ``.id`` is assumed to be the root ID. If you have a neuron (in FlyWire space) but don't know its ID, use :func:`fafbseg.flywire.neuron_to_segments` first. pre : bool Whether to fetch presynapses for the given neurons. post : bool Whether to fetch postsynapses for the given neurons. transmitters : bool Whether to also load per-synapse neurotransmitter predictions from Eckstein et al. (2020). neuropils : bool Whether to add a column indicating which neuropil a synapse is in. attach : bool If True and ``x`` is Neuron/List, the synapses will be added as ``.connectors`` table. For TreeNeurons (skeletons), the synapses will be mapped to the closest node. Note that the function will still return the full synapse table. filtered : bool Whether to use the filtered synapse table. Briefly, this filter removes redundant and low confidence (<= 50 cleft score) synapses. See also https://tinyurl.com/4j9v7t86 (links to CAVE website). min_score : int Minimum "cleft score". Buhmann et al. used a threshold of 30 in their paper. However, for FlyWire analyses that threshold was raised to 50 (see also `filtered`). clean : bool If True, we will perform some clean up of the connectivity compared with the raw synapse information. Currently, we:: - drop autapses - drop synapses from/to background (id 0) - drop synapses that are >10um from the skeleton (only if ``attach=True``) batch_size : int Number of IDs to query per batch. Too large batches might lead to truncated tables: currently individual queries can not return more than 500_000 rows and you will see a warning if that limit is exceeded. materialization : int | str, optional Which materialization to query: - 'auto' (default) tries to find the most recent materialization version at which all the query IDs existed - 'latest' uses the latest materialized table - 'live' queries against the live data - this will be much slower! - pass an integer (e.g. `447`) to use a specific materialization version dataset : "public" | "production" | "sandbox", optional Against which FlyWire dataset to query. If ``None`` will fall back to the default dataset (see :func:`~fafbseg.flywire.set_default_dataset`). Returns ------- pandas.DataFrame Note that each synapse (or rather synaptic connection) will show up only once. Depending on the query neurons (`x`), a given row might represent a presynapse for one and a postsynapse for another neuron. See Also -------- :func:`~fafbseg.flywire.get_connectivity` Use this function to fetch the edges between neurons instead of individual synapses. Examples -------- Fetch synapses for a given root ID: >>> from fafbseg import flywire >>> syn = flywire.get_synapses(720575940603231916) Using materialization version 783. >>> syn.head() #doctest: +SKIP pre post cleft_score pre_x pre_y pre_z post_x post_y post_z id 0 720575940631406673 720575940603231916 60 434336 218108 28240 434340 218204 28240 3535576 1 720575940608044501 720575940603231916 136 429180 212316 51520 429244 212136 51520 15712693 2 720575940627777265 720575940603231916 142 440272 215372 35240 440152 215392 35200 29684635 3 720575940606227890 720575940603231916 147 429932 224436 41120 429968 224584 41120 111586446 4 720575940627777265 720575940603231916 146 423856 216648 51280 423844 216528 51240 15689207 Skeletonize a neuron and attach its synapses: >>> from fafbseg import flywire >>> sk = flywire.get_skeletons(720575940603231916) >>> _ = flywire.get_synapses(sk, attach=True) Using materialization version 783. >>> sk.connectors.head() #doctest: +SKIP connector_id x y z cleft_score partner_id type node_id 0 0 356304 146840 145120 145 720575940627592977 pre 217 1 1 344456 164324 162440 153 720575940537249676 pre 5 2 2 373200 149464 162440 52 720575940599849357 pre 390 3 3 355220 156784 151000 144 720575940537605841 pre 171 4 4 346320 154520 151720 142 720575940635161060 pre 30 """ if not pre and not post: raise ValueError("`pre` and `post` must not both be False") if isinstance(materialization, str): if materialization not in ("latest", "live", "auto"): raise ValueError( '`materialization` must be "auto", "latest", "live" or ' f'integer, got "{materialization}"' ) elif not isinstance(materialization, int): raise ValueError( '`materialization` must be "auto", "latest", "live" or integer, ' f'got "{type(materialization)}"' ) if (min_score is not None) and (min_score < 50) and filtered: msg = ( "Querying synapse table with `filtered=True` already removes " "synaptic connections with cleft_score <= 50. If you want less " "confident connections set `filtered=False`. Note that this will " "also drop the de-duplication (see docstring)." ) navis.config.logger.warning(msg) # Parse root IDs ids = parse_root_ids(x).astype(np.int64) # Get the cave client client = get_cave_client(dataset=dataset) # Check if IDs existed at this materialization if materialization == "latest": materialization = client.materialize.most_recent_version() if materialization == "auto": materialization = find_mat_version(ids, dataset=dataset, verbose=progress) else: _check_ids(ids, materialization=materialization, dataset=dataset) columns = [ "pre_pt_root_id", "post_pt_root_id", "cleft_score", "pre_pt_position", "post_pt_position", "id", ] sv_cols = ["pre_pt_supervoxel_id", "post_pt_supervoxel_id"] if transmitters: columns += ["gaba", "ach", "glut", "oct", "ser", "da"] if materialization == "live" and filtered: raise ValueError( "It is currently not possible to fetch filtered " "synapses in live queries. You can set `filtered=False` " "but please be aware that this will query the " "unfiltered synapse table. See docs for details." ) elif materialization == "live": func = partial( retry(client.materialize.live_query), table=client.materialize.synapse_table, timestamp=dt.datetime.utcnow(), split_positions=True, # nb there is a bug in CAVE which causes empty results if we don't # ask for supervoxels select_columns=columns + sv_cols, ) elif filtered: func = partial( retry(client.materialize.query_view), view_name="valid_synapses_nt_np_v6", materialization_version=materialization, split_positions=True, select_columns=columns, ) else: func = partial( retry(client.materialize.query_table), table=client.materialize.synapse_table, split_positions=True, materialization_version=materialization, select_columns=columns, ) syn = [] for i in trange( 0, len(ids), batch_size, desc="Fetching synapses", disable=not progress or len(ids) <= batch_size, ): batch = ids[i : i + batch_size] if post: syn.append( func(filter_in_dict=dict(post_pt_root_id=batch)).drop( sv_cols, axis=1, errors="ignore" ) ) if pre: syn.append( func(filter_in_dict=dict(pre_pt_root_id=batch)).drop( sv_cols, axis=1, errors="ignore" ) ) # Drop attrs to avoid issues when concatenating for df in syn: df.attrs = {} # Drop empty tables but make sure to keep at least one if all are empty if not all([t.empty for t in syn]): syn = [t for t in syn if not t.empty] # Combine results from batches syn = pd.concat(syn, axis=0, ignore_index=True) # Rename some of those columns syn.rename( { "post_pt_root_id": "post", "pre_pt_root_id": "pre", "post_pt_position_x": "post_x", "post_pt_position_y": "post_y", "post_pt_position_z": "post_z", "pre_pt_position_x": "pre_x", "pre_pt_position_y": "pre_y", "pre_pt_position_z": "pre_z", "idx": "id", # this may exists if we made a join query "id_x": "id", # this may exists if we made a join query }, axis=1, inplace=True, ) # Depending on how queries were batched, we need to drop duplicate synapses syn.drop_duplicates("id", inplace=True) if transmitters: syn.rename( { "ach": "acetylcholine", "glut": "glutamate", "oct": "octopamine", "ser": "serotonin", "da": "dopamine", }, axis=1, inplace=True, ) # Next we need to run some clean-up: # Drop below threshold connections if min_score: syn = syn[syn.cleft_score >= min_score] if clean: # Drop autapses syn = syn[syn.pre != syn.post] # Drop connections involving 0 (background, glia) syn = syn[(syn.pre != 0) & (syn.post != 0)] # Avoid copy warning if syn._is_view: syn = syn.copy() if neuropils: syn["neuropil"] = get_synapse_areas(syn["id"].values) syn["neuropil"] = syn.neuropil.astype("category") # Drop ID column # syn.drop('id', axis=1, inplace=True) if isinstance(x, navis.core.BaseNeuron): x = navis.NeuronList([x]) if attach and isinstance(x, navis.NeuronList): for n in x: presyn = postsyn = pd.DataFrame([]) add_cols = ["neuropil"] if neuropils else [] if pre: cols = ["pre_x", "pre_y", "pre_z", "cleft_score", "post"] + add_cols presyn = syn.loc[syn.pre == np.int64(n.id), cols].rename( {"pre_x": "x", "pre_y": "y", "pre_z": "z", "post": "partner_id"}, axis=1, ) presyn["type"] = "pre" if post: cols = ["post_x", "post_y", "post_z", "cleft_score", "pre"] + add_cols postsyn = syn.loc[syn.post == np.int64(n.id), cols].rename( {"post_x": "x", "post_y": "y", "post_z": "z", "pre": "partner_id"}, axis=1, ) postsyn["type"] = "post" connectors = pd.concat((presyn, postsyn), axis=0, ignore_index=True) # Turn type column into categorical to save memory connectors["type"] = connectors["type"].astype("category") # If TreeNeuron, map each synapse to a node if isinstance(n, navis.TreeNeuron): tree = navis.neuron2KDTree(n, data="nodes") dist, ix = tree.query(connectors[["x", "y", "z"]].values) too_far = dist > 10_000 if any(too_far) and clean: connectors = connectors[~too_far].copy() ix = ix[~too_far] connectors["node_id"] = n.nodes.node_id.values[ix] # Add an ID column for navis' sake connectors.insert(0, "connector_id", np.arange(connectors.shape[0])) n.connectors = connectors syn.attrs["materialization"] = materialization return syn
[docs] @parse_neuroncriteria() @inject_dataset(disallowed=["flat_630", "flat_571"]) def get_adjacency( sources, targets=None, materialization="auto", neuropils=None, filtered=True, min_score=None, batch_size=1000, *, dataset=None, progress=True, ): """Fetch adjacency matrix. Notes ----- As of May 2023, CAVE provides "views" of materialized tables. This includes a view with neuron edges (as opposed to individual synaptic connections) which can be much faster to query. We will automatically use this view _if_: 1. `filtered=True` (default is `True`) 2. `min_score=None` or `min_score=50` (default is None) 3. `neuropils=None` (default is `None`) 4. `mat!='live'` (default is "auto" which can end up as "live") Parameters ---------- sources : int | list of int | Neuron/List | NeuronCriteria Either FlyWire segment ID (i.e. root ID), a list thereof or a Neuron/List. For neurons, the ``.id`` is assumed to be the root ID. If you have a neuron (in FlyWire space) but don't know its ID, use :func:`fafbseg.flywire.neuron_to_segments` first. targets : int | list of int | Neuron/List | NeuronCriteria, optional Either FlyWire segment ID (i.e. root ID), a list thereof or a Neuron/List. For neurons, the ``.id`` is assumed to be the root ID. If you have a neuron (in FlyWire space) but don't know its ID, use :func:`fafbseg.flywire.neuron_to_segments` first. If ``None``, will assume ```targets = sources``. neuropils : str | list of str, optional Provide neuropil (e.g. ``'AL_R'``) or list thereof (e.g. ``['AL_R', 'AL_L']``) to filter connectivity to these ROIs. Prefix neuropil with a tilde (e.g. ``~AL_R``) to exclude it. filtered : bool Whether to use the filtered synapse table. Briefly, this filter removes redundant and low confidence (<= 50 cleft score) synapses. See also https://tinyurl.com/4j9v7t86 (links to CAVE website). min_score : int Minimum "cleft score". Buhmann et al. used a threshold of 30 in their paper. However, for FlyWire analyses that threshold was raised to 50 (see also `filtered`). batch_size : int Number of IDs to query per batch. Too large batches can lead to truncated tables: currently individual queries do not return more than 200_000 connections. If you see a warning that this limit has been exceeded, decrease the batch size! materialization : int | str, optional Which materialization to query: - 'auto' (default) tries to find the most recent materialization version at which all the query IDs existed - 'latest' uses the latest materialized table - 'live' queries against the live data - this will be much slower! - pass an integer (e.g. `447`) to use a specific materialization version dataset : "public" | "production" | "sandbox", optional Against which FlyWire dataset to query. If ``None`` will fall back to the default dataset (see :func:`~fafbseg.flywire.set_default_dataset`). Returns ------- adjacency : pd.DataFrame Adjacency matrix. Rows (sources) and columns (targets) are in the same order as input. See Also -------- :func:`~fafbseg.flywire.get_connectivity` Use this function to fetch all up- and/or downstream partners for a set of neurons. Examples -------- Get connections between given root IDs: >>> from fafbseg import flywire >>> adj = flywire.get_adjacency(sources=720575940631406673, ... targets=720575940603231916) Using materialization version 783. >>> adj target 720575940603231916 source 720575940631406673 5 """ if isinstance(targets, type(None)): targets = sources if isinstance(materialization, str): if materialization not in ("latest", "live", "auto"): raise ValueError( '`materialization` must be "auto", "latest", "live" or ' f'integer, got "{materialization}"' ) elif not isinstance(materialization, int): raise ValueError( '`materialization` must be "auto", "latest", "live" or integer, ' f'got "{type(materialization)}"' ) # Parse root IDs sources = parse_root_ids(sources).astype(np.int64) targets = parse_root_ids(targets).astype(np.int64) both = np.unique(np.append(sources, targets)) client = get_cave_client(dataset=dataset) # Check if IDs existed at this materialization if materialization == "latest": materialization = client.materialize.most_recent_version() if materialization == "auto": materialization = find_mat_version(both, dataset=dataset, verbose=progress) else: _check_ids(both, materialization=materialization, dataset=dataset) columns = ["pre_pt_root_id", "post_pt_root_id", "cleft_score", "id"] sv_cols = ["pre_pt_supervoxel_id", "post_pt_supervoxel_id"] if materialization == "live" and filtered: raise ValueError( "It is currently not possible to fetch filtered " "synapses in live queries. You can set `filtered=False` " "but please be aware that this will query the " "unfiltered synapse table. See docs for details." ) elif materialization == "live": func = partial( retry(client.materialize.live_query), table=client.materialize.synapse_table, timestamp=dt.datetime.utcnow(), # nb there is a bug in CAVE which causes empty results if we don't # ask for supervoxels select_columns=columns + sv_cols, ) elif filtered: has_view = "valid_connection_v2" in client.materialize.get_views( materialization ) no_np = isinstance(neuropils, type(None)) no_score_thresh = not min_score or min_score == 50 if has_view & no_np & no_score_thresh: columns = ["pre_pt_root_id", "post_pt_root_id", "n_syn"] func = partial( retry(client.materialize.query_view), view_name="valid_connection_v2", select_columns=columns, materialization_version=materialization, ) else: func = partial( retry(client.materialize.query_view), view_name="valid_synapses_nt_np_v6", materialization_version=materialization, split_positions=True, select_columns=columns, ) else: func = partial( retry(client.materialize.query_table), table=client.materialize.synapse_table, materialization_version=materialization, select_columns=columns, ) syn = [] for i in trange( 0, len(sources), batch_size, desc="Fetching adjacency", disable=not progress or len(sources) <= batch_size, ): source_batch = sources[i : i + batch_size] for k in range(0, len(targets), batch_size): target_batch = targets[k : k + batch_size] filter_in_dict = dict( post_pt_root_id=target_batch, pre_pt_root_id=source_batch ) this = func(filter_in_dict=filter_in_dict) # We need to drop the .attrs (which contain meta data from queries) # Otherwise we run into issues when concatenating this.attrs = {} if not this.empty: syn.append(this.drop( sv_cols, axis=1, errors="ignore" )) # Drop empty tables but make sure to keep at least one if all are empty if not all([t.empty for t in syn]): syn = [t for t in syn if not t.empty] # Combine results from batches if len(syn): syn = pd.concat(syn, axis=0, ignore_index=True) else: adj = pd.DataFrame( np.zeros((len(sources), len(targets))), index=sources, columns=targets ) adj.index.name = "source" adj.columns.name = "target" return adj # Depending on how queries were batched, we need to drop duplicate synapses if "id" in syn.columns: syn.drop_duplicates("id", inplace=True) else: syn.drop_duplicates( ["pre_pt_root_id", "post_pt_root_id", "n_syn"], inplace=True ) # Subset to the desired neuropils if not isinstance(neuropils, type(None)): neuropils = make_iterable(neuropils) if len(neuropils): filter_in = [n for n in neuropils if not n.startswith("~")] filter_out = [n[1:] for n in neuropils if n.startswith("~")] syn["neuropil"] = get_synapse_areas(syn["id"].values) syn["neuropil"] = syn.neuropil.astype("category") if filter_in: syn = syn[syn.neuropil.isin(filter_in)] if filter_out: syn = syn[~syn.neuropil.isin(filter_out)] if syn._is_view: syn = syn.copy() # Rename some of those columns syn.rename( {"post_pt_root_id": "post", "pre_pt_root_id": "pre", "n_syn": "weight"}, axis=1, inplace=True, ) # Next we need to run some clean-up: # Drop below threshold connections if min_score and "cleft_score" in syn.columns: syn = syn[syn.cleft_score >= min_score] # Aggregate if "weight" not in syn.columns: cn = syn.groupby(["pre", "post"], as_index=False).size() else: cn = syn cn.columns = ["source", "target", "weight"] # Pivot adj = cn.pivot(index="source", columns="target", values="weight").fillna(0) # Index to match order and add any missing neurons adj = adj.reindex(index=sources, columns=targets).fillna(0) return adj
[docs] @parse_neuroncriteria() @inject_dataset(disallowed=["flat_630", "flat_571"]) def get_connectivity( x, clean=True, style="simple", upstream=True, downstream=True, proofread_only=False, transmitters=False, neuropils=None, filtered=True, min_score=None, batch_size=30, materialization="auto", *, progress=True, dataset=None, ): """Fetch Buhmann et al. (2019) connectivity for given neuron(s). Notes ----- As of May 2023, CAVE provides "views" of materialized tables. This includes a view with neuron edges (as opposed to individual synaptic connections) which can be much faster to query. We will automatically use use the view _if_: 1. `filtered=True` (default is `False`) 2. `min_score=None` or `min_score=50` 3. `neuropils=None` (default is `None`) 4. `mat!='live'` (default is "auto" which can end up as "live") Parameters ---------- x : int | list of int | Neuron/List | NeuronCriteria Either a FlyWire root ID, a list thereof or a Neuron/List. For neurons, the ``.id`` is assumed to be the root ID. If you have a neuron (in FlyWire space) but don't know its ID, use :func:`fafbseg.flywire.neuron_to_segments` first. clean : bool If True, we will perform some clean up of the connectivity compared with the raw synapse information. Currently, we:: - drop autapses - drop synapses from/to background (id 0) style : "simple" | "catmaid" Style of the returned table. upstream : bool Whether to fetch upstream connectivity of ```x``. downstream : bool Whether to fetch downstream connectivity of ```x``. proofread_only: bool Whether to filter out root IDs that have not been proofread. Query IDs (i.e. `x`) will never be excluded regardless of proofreading status! transmitters : bool If True, will attach the best guess for the transmitter for a given connection based on the predictions in Eckstein et al (2020). IMPORTANT: the predictions are based solely on the connections retrieved as part of this query which likely represent only a fraction of each neuron's total synapses. As such the predictions need to be taken with a grain of salt - in particular for weak connections! To get the "full" predictions see :func:`fafbseg.flywire.predict_transmitter`. neuropils : bool | str | list of str, optional If True, will return edges broken down by neuropils. You can also provide neuropil (e.g. ``'AL_R'``) or list thereof (e.g. ``['AL_R', 'AL_L']``) to filter connectivity to these ROIs. Prefix neuropil with a tilde (e.g. ``~AL_R``) to exclude it. filtered : bool Whether to use the filtered synapse table. Briefly, this filter removes redundant and low confidence (<= 50 cleft score) synapses. See also https://tinyurl.com/4j9v7t86 (links to CAVE website). min_score : int Minimum "cleft score". Buhmann et al. used a threshold of 30 in their paper. However, for FlyWire analyses that threshold was raised to 50 (see also `filtered`). batch_size : int Number of IDs to query per batch. Too large batches might lead to truncated tables: currently individual queries can not return more than 200_000 rows and you will see a warning if that limit is exceeded. materialization : int | str, optional Which materialization to query: - 'auto' (default) tries to find the most recent materialization version at which the query IDs co-exist - 'latest' uses the latest materialized table - 'live' queries against the live data - this will be much slower! - pass an integer (e.g. `447`) to use a specific materialization version dataset : "public" | "production" | "sandbox", optional Against which FlyWire dataset to query. If ``None`` will fall back to the default dataset (see :func:`~fafbseg.flywire.set_default_dataset`). Returns ------- pd.DataFrame Connectivity table. See Also -------- :func:`~fafbseg.flywire.get_adjacency` Use this function to fetch connections between two sets of neurons. Examples -------- Get connections from/to given root ID(s): >>> from fafbseg import flywire >>> edges = flywire.get_connectivity(720575940603231916) Using materialization version 783. >>> edges.head() pre post weight 0 720575940635945919 720575940603231916 83 1 720575940620541349 720575940603231916 58 2 720575940603231916 720575940629163931 50 3 720575940603231916 720575940635945919 46 4 720575940603231916 720575940646122804 42 """ if not upstream and not downstream: raise ValueError("`upstream` and `downstream` must not both be False") if style not in ("simple", "catmaid"): raise ValueError(f'`style` must be "simple" or "catmaid", got "{style}"') if style == "catmaid": if transmitters: raise ValueError('`style` must be "simple" when asking for transmitters') if neuropils is True: raise ValueError('`style` must be "simple" when asking for neuropils') if isinstance(materialization, str): if materialization not in ("latest", "live", "auto"): raise ValueError( '`materialization` must be "auto", "latest", "live" or ' f'integer, got "{materialization}"' ) elif not isinstance(materialization, int): raise ValueError( '`materialization` must be "auto", "latest", "live" or integer, ' f'got "{type(materialization)}"' ) # Parse root IDs ids = parse_root_ids(x) client = get_cave_client(dataset=dataset) # Check if IDs existed at this materialization if materialization == "latest": materialization = client.materialize.most_recent_version() if materialization == "auto": materialization = find_mat_version(ids, dataset=dataset, verbose=progress) else: _check_ids(ids, materialization=materialization, dataset=dataset) columns = ["pre_pt_root_id", "post_pt_root_id", "cleft_score", "id"] sv_cols = ["pre_pt_supervoxel_id", "post_pt_supervoxel_id"] if transmitters: columns += ["gaba", "ach", "glut", "oct", "ser", "da"] if materialization == "live" and filtered: raise ValueError( "It is currently not possible to fetch filtered " "synapses in live queries. You can set `filtered=False` " "but please be aware that this will query the " "unfiltered synapse table. See docs for details." ) elif materialization == "live": func = partial( retry(client.materialize.live_query), table=client.materialize.synapse_table, timestamp=dt.datetime.utcnow(), select_columns=columns + sv_cols, ) elif filtered: # Check if there is a view for valid connections has_view = "valid_connection_v2" in client.materialize.get_views( materialization ) no_np = isinstance(neuropils, type(None)) no_score_thresh = (not min_score) or (min_score == 50) # If all conditions are met, we can use the view if has_view & no_np & no_score_thresh: columns = ["pre_pt_root_id", "post_pt_root_id", "n_syn"] if transmitters: columns += ["gaba", "ach", "glut", "oct", "ser", "da"] func = partial( retry(client.materialize.query_view), view_name="valid_connection_v2", select_columns=columns, materialization_version=materialization, ) # Otherwise we need to query the valid synapse view else: func = partial( retry(client.materialize.query_view), view_name="valid_synapses_nt_np_v6", materialization_version=materialization, split_positions=True, select_columns=columns, ) else: func = partial( retry(client.materialize.query_table), table=client.materialize.synapse_table, materialization_version=materialization, select_columns=columns, ) syn = [] for i in trange( 0, len(ids), batch_size, desc="Fetching connectivity", disable=not progress or len(ids) <= batch_size, ): batch = ids[i : i + batch_size] if upstream: syn.append(func(filter_in_dict=dict(post_pt_root_id=batch))) if downstream: syn.append(func(filter_in_dict=dict(pre_pt_root_id=batch))) # Some clean-up for df in syn: # Drop supervoxel columns (if they exist) df.drop(sv_cols, axis=1, errors="ignore", inplace=True) # Drop `attrs`` to avoid issues when concatenating df.attrs = {} # Drop empty tables but make sure to keep at least one if all are empty if not all([t.empty for t in syn]): syn = [t for t in syn if not t.empty] # Combine results from batches syn = pd.concat(syn, axis=0, ignore_index=True) # Depending on how queries were batched, we need to drop duplicate synapses if "id" in syn.columns: syn.drop_duplicates("id", inplace=True) else: syn.drop_duplicates( ["pre_pt_root_id", "post_pt_root_id", "n_syn"], inplace=True ) # Subset to the desired neuropils if not isinstance(neuropils, type(None)): syn["neuropil"] = get_synapse_areas(syn["id"].values) syn["neuropil"] = syn.neuropil.astype("category") if not isinstance(neuropils, bool): neuropils = make_iterable(neuropils) if len(neuropils): filter_in = [n for n in neuropils if not n.startswith("~")] filter_out = [n[1:] for n in neuropils if n.startswith("~")] if filter_in: syn = syn[syn.neuropil.isin(filter_in)] if filter_out: syn = syn[~syn.neuropil.isin(filter_out)] # Rename some of those columns syn.rename( { "post_pt_root_id": "post", "pre_pt_root_id": "pre", "ach": "acetylcholine", "glut": "glutamate", "oct": "octopamine", "ser": "serotonin", "da": "dopamine", "n_syn": "weight", }, axis=1, inplace=True, ) # Next we need to run some clean-up: # Drop below threshold connections if min_score and "cleft_score" in syn.columns: syn = syn[syn.cleft_score >= min_score] if clean: # Drop autapses syn = syn[syn.pre != syn.post] # Drop connections involving 0 (background, glia) syn = syn[(syn.pre != 0) & (syn.post != 0)] # Turn into connectivity table if "weight" not in syn.columns: if neuropils is True and "neuropil" in syn.columns: cn_table = ( syn.groupby(["pre", "post", "neuropil"], as_index=False) .size() .rename({"size": "weight"}, axis=1) ) else: cn_table = ( syn.groupby(["pre", "post"], as_index=False) .size() .rename({"size": "weight"}, axis=1) ) else: cn_table = syn # Filter to proofread neurons only if proofread_only: all_ids = np.unique(cn_table[["pre", "post"]].values.flatten()) is_pr = all_ids[is_proofread(all_ids, materialization=materialization)] # Make sure we don't drop our query neurons keep = np.append(is_pr, ids) cn_table = cn_table[cn_table.pre.isin(keep) & cn_table.post.isin(keep)] # Style if style == "catmaid": cn_table = catmaid_table(cn_table, query_ids=ids) else: cn_table = cn_table.copy() # avoid setting on copy warning cn_table.sort_values("weight", ascending=False, inplace=True) cn_table.reset_index(drop=True, inplace=True) if transmitters: # Generate per-neuron predictions pred = collapse_nt_predictions(syn, single_pred=True, id_col="pre") cn_table["pred_nt"] = cn_table.pre.map(lambda x: pred.get(x, [None])[0]) cn_table["pred_conf"] = cn_table.pre.map(lambda x: pred.get(x, [None, None])[1]) cn_table.attrs["materialization"] = materialization return cn_table
@inject_dataset() def get_supervoxel_synapses( x, pre=True, post=True, batch_size=300, progress=True, *, dataset=None ): """Fetch Buhmann et al. (2019) synapses for given supervoxels. Parameters ---------- x : int | list of int Supervoxel IDs. pre : bool Whether to fetch presynapses for the given neurons. post : bool Whether to fetch postsynapses for the given neurons. batch_size : int Number of IDs to query per batch. Too large batches might lead to truncated tables: currently individual queries can not return more than 200_000 rows and you will see a warning if that limit is exceeded. dataset : "public" | "production" | "sandbox" | "flat_630", optional Against which FlyWire dataset to query. If ``None`` will fall back to the default dataset (see :func:`~fafbseg.flywire.set_default_dataset`). Returns ------- pandas.DataFrame """ if not pre and not post: raise ValueError("`pre` and `post` must not both be False") # Parse supervoxel IDs ids = parse_root_ids(x) client = get_cave_client(dataset=dataset) columns = [ "pre_pt_supervoxel_id", "post_pt_supervoxel_id", "cleft_score", "pre_pt_position", "post_pt_position", "id", ] func = partial( retry(client.materialize.query_table), table=client.materialize.synapse_table, split_positions=True, select_columns=columns, ) syn = [] for i in trange( 0, len(ids), batch_size, desc="Fetching synapses", disable=not progress or len(ids) <= batch_size, ): batch = ids[i : i + batch_size] if post: syn.append(func(filter_in_dict=dict(post_pt_supervoxel_id=batch))) if pre: syn.append(func(filter_in_dict=dict(pre_pt_supervoxel_id=batch))) # Drop attrs to avoid issues when concatenating for df in syn: df.attrs = {} # Drop empty tables but make sure to keep at least one if all are empty if not all([t.empty for t in syn]): syn = [t for t in syn if not t.empty] # Combine results from batches syn = pd.concat(syn, axis=0, ignore_index=True) # Depending on how queries were batched, we need to drop duplicate synapses syn.drop_duplicates("id", inplace=True) # Rename some of those columns syn.rename( { "post_pt_position_x": "post_x", "post_pt_position_y": "post_y", "post_pt_position_z": "post_z", "pre_pt_position_x": "pre_x", "pre_pt_position_y": "pre_y", "pre_pt_position_z": "pre_z", }, axis=1, inplace=True, ) # Next we need to run some clean-up: # Drop below threshold connections # if min_score: # syn = syn[syn.cleft_score >= min_score] # Avoid setting-on-copy warnings if syn._is_view: syn = syn.copy() return syn @inject_dataset() def synapse_contributions(x, *, dataset=None): """Return synapse contributions to given neuron.""" # Grab synapses for this neuron syn = get_synapses(x) pre = syn[syn.pre == x] post = syn[syn.post == x] print(f"Neuron has {len(pre)} pre- and {len(post)} postsynapses") G = get_lineage_graph(x, user=True, size=True) data = [] for n in navis.config.tqdm(G.nodes): pred = list(G.predecessors(n)) # If this is from base segmentation if not pred: continue # If only one predecessor this came out of a split if len(pred) == 1: sv_added = 0 sv_removed = G.nodes[n]["size"] - G.nodes[pred[0]]["size"] pre_added = post_added = -1 # Two predecessors means this was the result of a merge elif len(pred) == 2: sizes = (G.nodes[pred[0]]["size"], G.nodes[pred[1]]["size"]) smaller = np.argmin(sizes) sv_removed = 0 sv_added = sizes[smaller] sv = roots_to_supervoxels(pred[smaller], dataset=dataset)[pred[smaller]] pre_added = pre.pre_pt_supervoxel_id.isin(sv).sum() post_added = post.post_pt_supervoxel_id.isin(sv).sum() else: raise ValueError(f"Unexpected number of predecessors for {n}: {len(pred)}") data.append( [ n, G.nodes[n].get("user", "NA"), sv_added, sv_removed, pre_added, post_added, ] ) # TODO # - for every merge operation, count only the number of supervoxels added that # actually made it into the current root ID # - for every split operation, count only the supervoxels that weren't added # back later on # - count every added/removed supervoxel only once # IDEAS # - use the largest original root ID as the point of reference return pd.DataFrame( data, columns=[ "root_id", "user", "sv_added", "sv_removed", "pre_added", "post_added", ], ) def _check_ids(ids, materialization, dataset="production"): """Check IDs whether they existed at given materialization. Parameters ---------- ids : iterable materialization : "live" | "latest" | int Returns ------- None """ client = get_cave_client(dataset=dataset) ids = np.asarray(ids) _is_latest_roots = retry(client.chunkedgraph.is_latest_roots) _get_timestamp = retry(client.materialize.get_timestamp) _get_root_timestamps = retry(client.chunkedgraph.get_root_timestamps) # Check if any of these root IDs are outdated if materialization == "live": not_latest = ids[~_is_latest_roots(ids)] if any(not_latest): print( f'Root ID(s) {", ".join(not_latest.astype(str))} are outdated ' "and live connectivity might be inaccurrate." ) else: if materialization == "latest": materialization = client.materialize.most_recent_version() # Is the root ID more recent than the materialization? ts_m = _get_timestamp(materialization) ts_r = _get_root_timestamps(ids) too_recent = ids[ts_r > ts_m] if any(too_recent): print( "Some root IDs are more recent than materialization " f"{materialization} and synapse/connectivity data will be " f'inaccurate:\n\n {", ".join(too_recent.astype(str))}\n\n' "You can either try mapping these IDs back in time or use" '`materialization="auto"`.' ) # Those that aren't too young might be too old ids = ids[ts_r <= ts_m] if len(ids): # This only checks if these were the up to date roots at the given time # hence it doesn't tell us whether the root is too young or too old # but since we checked the too young roots before we can assume # the roots flagged here are too old not_latest = ids[~_is_latest_roots(ids, timestamp=ts_m)] if any(not_latest): print( "Some root IDs were already outdated at materialization " f"{materialization} and synapse/connectivity data will be " f'inaccurrate:\n\n {", ".join(not_latest.astype(str))}\n\n' "Try updating the root IDs using `flywire.update_ids` " "or `flywire.supervoxels_to_roots` if you have supervoxel IDs," " or pick a different materialization version." )