Source code for fafbseg.flywire.synapses

#    A collection of tools to interface with manually traced and autosegmented
#    data in FAFB.
#
#    Copyright (C) 2019 Philipp Schlegel
#
#    This program is free software: you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation, either version 3 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.

import navis

import datetime as dt
import numpy as np
import pandas as pd

from functools import partial
from tqdm.auto import trange

from .segmentation import get_lineage_graph, roots_to_supervoxels
from .utils import (
    parse_root_ids,
    get_cave_client,
    retry,
    get_synapse_areas,
    find_mat_version,
    inject_dataset,
)
from .annotations import is_proofread, parse_neuroncriteria

from ..utils import make_iterable
from ..synapses.utils import catmaid_table
from ..synapses.transmitters import collapse_nt_predictions

__all__ = [
    "get_synapses",
    "get_connectivity",
    "get_transmitter_predictions",
    "get_adjacency",
    "get_synapse_counts",
]



[docs]
@parse_neuroncriteria()
@inject_dataset(disallowed=["flat_630", "flat_571"])
def get_synapse_counts(
    x,
    by_neuropil=False,
    materialization="auto",
    filtered=True,
    min_score=None,
    batch_size=10,
    *,
    dataset=None,
    **kwargs,
):
    """Fetch synapse counts for given root IDs.

    Parameters
    ----------
    x :             int | list of int | Neuron/List | NeuronCriteria
                    Either a FlyWire segment ID (i.e. root ID), a list thereof or
                    a Neuron/List. For neurons, the ``.id`` is assumed to be the
                    root ID. If you have a neuron (in FlyWire space) but don't
                    know its ID, use :func:`fafbseg.flywire.neuron_to_segments`
                    first.
    by_neuropil :   bool
                    If True, returned DataFrame will contain a break down by
                    neuropil.
    materialization : int | str, optional
                    Which materialization to query:
                     - 'auto' (default) tries to find the most recent
                       materialization version at which all the query IDs existed
                     - 'latest' uses the latest materialized table
                     - 'live' queries against the live data - this will be much slower!
                     - pass an integer (e.g. `447`) to use a specific materialization version
    filtered :      bool
                    Whether to use the filtered synapse table. Briefly, this
                    filter removes redundant and low confidence (<= 50 cleft score)
                    synapses. See also https://tinyurl.com/4j9v7t86 (links to
                    CAVE website).
    min_score :     int
                    Minimum "cleft score". Buhmann et al. used a threshold of 30
                    in their paper. However, for FlyWire analyses that threshold
                    was raised to 50 (see also `filtered`).
    batch_size :    int
                    Number of IDs to query per batch. Too large batches might
                    lead to truncated tables: currently individual queries can
                    not return more than 200_000 rows and you will see a warning
                    if that limit is exceeded.
    dataset :       "public" | "production" | "sandbox", optional
                    Against which FlyWire dataset to query. If ``None`` will fall
                    back to the default dataset (see
                    :func:`~fafbseg.flywire.set_default_dataset`).
    **kwargs
                    Keyword arguments are passed through to
                    :func:`fafbseg.flywire.get_synapses`.

    Returns
    -------
    pandas.DataFrame
                    If ``by_neuropil=False`` returns counts indexed by root ID.
                    If ``by_neuropil=True`` returns counts indexed by root ID
                    and neuropil.

    See Also
    --------
    :func:`~fafbseg.flywire.get_synapses`
                    Use this function to fetch the actual synapses.

    Examples
    --------

    Get synapse counts for a given root ID:

    >>> from fafbseg import flywire
    >>> n_syn = flywire.get_synapse_counts(720575940603231916)
    Using materialization version 783.
    >>> n_syn
                        pre  post
    id
    720575940603231916  4571   887

    """
    # Parse root IDs
    ids = parse_root_ids(x).astype(np.int64)

    # First get the synapses
    syn = get_synapses(
        ids,
        pre=True,
        post=True,
        attach=False,
        min_score=min_score,
        transmitters=True,
        materialization=materialization,
        neuropils=by_neuropil,
        filtered=filtered,
        batch_size=batch_size,
        dataset=dataset,
        **kwargs,
    )

    pre = syn[syn.pre.isin(ids)]
    post = syn[syn.post.isin(ids)]

    if not by_neuropil:
        counts = pd.DataFrame()
        counts["id"] = ids
        counts["pre"] = pre.value_counts("pre").reindex(ids).fillna(0).values
        counts["post"] = post.value_counts("post").reindex(ids).fillna(0).values
        counts.set_index("id", inplace=True)
    else:
        pre_grp = pre.groupby(["pre", "neuropil"]).size()
        pre_grp = pre_grp[pre_grp > 0]
        pre_grp.index.set_names(["id", "neuropil"], inplace=True)

        post_grp = post.groupby(["post", "neuropil"]).size()
        post_grp = post_grp[post_grp > 0]
        post_grp.index.set_names(["id", "neuropil"], inplace=True)

        neuropils = np.unique(
            np.append(
                pre_grp.index.get_level_values(1), post_grp.index.get_level_values(1)
            )
        )

        index = pd.MultiIndex.from_product([ids, neuropils], names=["id", "neuropil"])

        counts = pd.concat(
            [pre_grp.reindex(index).fillna(0), post_grp.reindex(index).fillna(0)],
            axis=1,
        )
        counts.columns = ["pre", "post"]
        counts = counts[counts.max(axis=1) > 0]

    return counts




[docs]
@parse_neuroncriteria()
@inject_dataset(disallowed=["flat_630", "flat_571"])
def get_transmitter_predictions(
    x,
    single_pred=False,
    weighted=True,
    materialization="auto",
    filtered=True,
    neuropils=None,
    batch_size=10,
    *,
    dataset=None,
    **kwargs,
):
    """Fetch neurotransmitter predictions for neurons.

    Based on Eckstein et al. (2020). The per-synapse predictions are collapsed
    into per-neuron prediction by calculating the average confidence for
    each neurotransmitter across all synapses weighted by the "cleft score".
    Bottom line: higher confidence synapses have more weight than low confidence
    synapses.

    Parameters
    ----------
    x :             int | list of int | Neuron/List | NeuronCriteria
                    Either a FlyWire segment ID (i.e. root ID), a list thereof or
                    a Neuron/List. For neurons, the ``.id`` is assumed to be the
                    root ID. If you have a neuron (in FlyWire space) but don't
                    know its ID, use :func:`fafbseg.flywire.neuron_to_segments`
                    first.
    single_pred :   bool
                    Whether to only return the highest probability transmitter
                    for each neuron.
    weighted :      bool
                    If True, will weight predictions based on confidence: higher
                    cleft score = more weight.
    materialization : int | str, optional
                    Which materialization to query:
                     - 'auto' (default) tries to find the most recent
                       materialization version at which all the query IDs existed
                     - 'latest' uses the latest materialized table
                     - 'live' queries against the live data - this will be much slower!
                     - pass an integer (e.g. `447`) to use a specific materialization version
    filtered :      bool
                    Whether to use the filtered synapse table. Briefly, this
                    filter removes redundant and low confidence (<= 50 cleft score)
                    synapses. See also https://tinyurl.com/4j9v7t86 (links to
                    CAVE website).
    neuropils :     str | list of str, optional
                    Provide neuropil (e.g. ``'AL_R'``) or list thereof (e.g.
                    ``['AL_R', 'AL_L']``) to filter predictions to these ROIs.
                    Prefix neuropil with a tilde (e.g. ``~AL_R``) to exclude it.
    batch_size :    int
                    Number of IDs to query per batch. Too large batches might
                    lead to truncated tables: currently individual queries can
                    not return more than 200_000 rows and you will see a warning
                    if that limit is exceeded.
    dataset :       "public" | "production" | "sandbox", optional
                    Against which FlyWire dataset to query. If ``None`` will fall
                    back to the default dataset (see
                    :func:`~fafbseg.flywire.set_default_dataset`).
    **kwargs
                    Keyword arguments are passed through to
                    :func:`fafbseg.flywire.get_synapses`.


    Returns
    -------
    pandas.DataFrame
                    If `single_pred=False`: returns a dataframe with all
                    per-transmitter confidences for each query neuron.

    dict
                    If `single_pred=True`: returns dictionary with
                    `(top_transmitter, confidence)` named tuple for each query
                    neuron.

    Examples
    --------
    >>> from fafbseg import flywire

    Get per-transmitter predictions for a single neuron:

    >>> flywire.get_transmitter_predictions(720575940603231916)
    Using materialization version 783.
    root_id        720575940603231916
    gaba                     0.011677
    acetylcholine            0.938961
    glutamate                0.017902
    octopamine               0.012861
    serotonin                0.012467
    dopamine                 0.006132

    Return only the most likely transmitter:

    >>> flywire.get_transmitter_predictions(720575940603231916, single_pred=True)
    Using materialization version 783.
    {720575940603231916: prediction(transmitter='acetylcholine', probability=0.9389612897479809)}

    """
    # First get the synapses
    syn = get_synapses(
        x,
        pre=True,
        post=False,
        attach=False,
        min_score=None,
        transmitters=True,
        materialization=materialization,
        neuropils=neuropils is not None,
        filtered=filtered,
        batch_size=batch_size,
        dataset=dataset,
        **kwargs,
    )

    if not isinstance(neuropils, type(None)):
        neuropils = make_iterable(neuropils)
        filter_in = [n for n in neuropils if not n.startswith("~")]
        filter_out = [n[1:] for n in neuropils if n.startswith("~")]

        if filter_in:
            syn = syn[syn.neuropil.isin(filter_in)]
        if filter_out:
            syn = syn[~syn.neuropil.isin(filter_out)]

        # Avoid setting-on-copy warning
        if syn._is_view:
            syn = syn.copy()

    # Process the predictions
    pred = collapse_nt_predictions(
        syn, single_pred=single_pred, weighted=weighted, id_col="pre"
    )
    if not single_pred:
        pred.columns.name = "root_id"
        return pred.reindex(make_iterable(x).astype(np.int64), axis=1)
    else:
        return pred




[docs]
@parse_neuroncriteria()
@inject_dataset(disallowed=["flat_630", "flat_571"])
def get_synapses(
    x,
    pre=True,
    post=True,
    attach=True,
    filtered=True,
    min_score=None,
    transmitters=False,
    neuropils=False,
    clean=True,
    materialization="auto",
    batch_size=10,
    *,
    dataset=None,
    progress=True,
):
    """Fetch Buhmann et al. (2019) synapses for given neuron(s).

    Parameters
    ----------
    x :             int | list of int | Neuron/List | NeuronCriteria
                    Either a FlyWire segment ID (i.e. root ID), a list thereof
                    or a Neuron/List. For neurons, the ``.id`` is assumed to be
                    the root ID. If you have a neuron (in FlyWire space) but
                    don't know its ID, use
                    :func:`fafbseg.flywire.neuron_to_segments` first.
    pre :           bool
                    Whether to fetch presynapses for the given neurons.
    post :          bool
                    Whether to fetch postsynapses for the given neurons.
    transmitters :  bool
                    Whether to also load per-synapse neurotransmitter predictions
                    from Eckstein et al. (2020).
    neuropils :     bool
                    Whether to add a column indicating which neuropil a synapse
                    is in.
    attach :        bool
                    If True and ``x`` is Neuron/List, the synapses will be added
                    as ``.connectors`` table. For TreeNeurons (skeletons), the
                    synapses will be mapped to the closest node. Note that the
                    function will still return the full synapse table.
    filtered :      bool
                    Whether to use the filtered synapse table. Briefly, this
                    filter removes redundant and low confidence (<= 50 cleft score)
                    synapses. See also https://tinyurl.com/4j9v7t86 (links to
                    CAVE website).
    min_score :     int
                    Minimum "cleft score". Buhmann et al. used a threshold of 30
                    in their paper. However, for FlyWire analyses that threshold
                    was raised to 50 (see also `filtered`).
    clean :         bool
                    If True, we will perform some clean up of the connectivity
                    compared with the raw synapse information. Currently, we::
                        - drop autapses
                        - drop synapses from/to background (id 0)
                        - drop synapses that are >10um from the skeleton (only
                          if ``attach=True``)
    batch_size :    int
                    Number of IDs to query per batch. Too large batches might
                    lead to truncated tables: currently individual queries can
                    not return more than 500_000 rows and you will see a warning
                    if that limit is exceeded.
    materialization : int | str, optional
                    Which materialization to query:
                     - 'auto' (default) tries to find the most recent
                       materialization version at which all the query IDs existed
                     - 'latest' uses the latest materialized table
                     - 'live' queries against the live data - this will be much slower!
                     - pass an integer (e.g. `447`) to use a specific materialization version
    dataset :       "public" | "production" | "sandbox", optional
                    Against which FlyWire dataset to query. If ``None`` will fall
                    back to the default dataset (see
                    :func:`~fafbseg.flywire.set_default_dataset`).

    Returns
    -------
    pandas.DataFrame
                    Note that each synapse (or rather synaptic connection)
                    will show up only once. Depending on the query neurons
                    (`x`), a given row might represent a presynapse for one and
                    a postsynapse for another neuron.

    See Also
    --------
    :func:`~fafbseg.flywire.get_connectivity`
                    Use this function to fetch the edges between neurons instead
                    of individual synapses.


    Examples
    --------

    Fetch synapses for a given root ID:

    >>> from fafbseg import flywire
    >>> syn = flywire.get_synapses(720575940603231916)
    Using materialization version 783.
    >>> syn.head()                                               #doctest: +SKIP
                      pre                post  cleft_score   pre_x   pre_y  pre_z  post_x  post_y  post_z         id
    0  720575940631406673  720575940603231916           60  434336  218108  28240  434340  218204   28240    3535576
    1  720575940608044501  720575940603231916          136  429180  212316  51520  429244  212136   51520   15712693
    2  720575940627777265  720575940603231916          142  440272  215372  35240  440152  215392   35200   29684635
    3  720575940606227890  720575940603231916          147  429932  224436  41120  429968  224584   41120  111586446
    4  720575940627777265  720575940603231916          146  423856  216648  51280  423844  216528   51240   15689207

    Skeletonize a neuron and attach its synapses:

    >>> from fafbseg import flywire
    >>> sk = flywire.get_skeletons(720575940603231916)
    >>> _ = flywire.get_synapses(sk, attach=True)
    Using materialization version 783.
    >>> sk.connectors.head()                                     #doctest: +SKIP
       connector_id       x       y       z  cleft_score          partner_id type  node_id
    0             0  356304  146840  145120          145  720575940627592977  pre      217
    1             1  344456  164324  162440          153  720575940537249676  pre        5
    2             2  373200  149464  162440           52  720575940599849357  pre      390
    3             3  355220  156784  151000          144  720575940537605841  pre      171
    4             4  346320  154520  151720          142  720575940635161060  pre       30

    """
    if not pre and not post:
        raise ValueError("`pre` and `post` must not both be False")

    if isinstance(materialization, str):
        if materialization not in ("latest", "live", "auto"):
            raise ValueError(
                '`materialization` must be "auto", "latest", "live" or '
                f'integer, got "{materialization}"'
            )
    elif not isinstance(materialization, int):
        raise ValueError(
            '`materialization` must be "auto", "latest", "live" or integer, '
            f'got "{type(materialization)}"'
        )

    if (min_score is not None) and (min_score < 50) and filtered:
        msg = (
            "Querying synapse table with `filtered=True` already removes "
            "synaptic connections with cleft_score <= 50. If you want less "
            "confident connections set `filtered=False`. Note that this will "
            "also drop the de-duplication (see docstring)."
        )
        navis.config.logger.warning(msg)

    # Parse root IDs
    ids = parse_root_ids(x).astype(np.int64)

    # Get the cave client
    client = get_cave_client(dataset=dataset)

    # Check if IDs existed at this materialization
    if materialization == "latest":
        materialization = client.materialize.most_recent_version()

    if materialization == "auto":
        materialization = find_mat_version(ids, dataset=dataset, verbose=progress)
    else:
        _check_ids(ids, materialization=materialization, dataset=dataset)

    columns = [
        "pre_pt_root_id",
        "post_pt_root_id",
        "cleft_score",
        "pre_pt_position",
        "post_pt_position",
        "id",
    ]
    sv_cols = ["pre_pt_supervoxel_id", "post_pt_supervoxel_id"]

    if transmitters:
        columns += ["gaba", "ach", "glut", "oct", "ser", "da"]

    if materialization == "live" and filtered:
        raise ValueError(
            "It is currently not possible to fetch filtered "
            "synapses in live queries. You can set `filtered=False` "
            "but please be aware that this will query the "
            "unfiltered synapse table. See docs for details."
        )
    elif materialization == "live":
        func = partial(
            retry(client.materialize.live_query),
            table=client.materialize.synapse_table,
            timestamp=dt.datetime.utcnow(),
            split_positions=True,
            # nb there is a bug in CAVE which causes empty results if we don't
            # ask for supervoxels
            select_columns=columns + sv_cols,
        )
    elif filtered:
        func = partial(
            retry(client.materialize.query_view),
            view_name="valid_synapses_nt_np_v6",
            materialization_version=materialization,
            split_positions=True,
            select_columns=columns,
        )
    else:
        func = partial(
            retry(client.materialize.query_table),
            table=client.materialize.synapse_table,
            split_positions=True,
            materialization_version=materialization,
            select_columns=columns,
        )

    syn = []
    for i in trange(
        0,
        len(ids),
        batch_size,
        desc="Fetching synapses",
        disable=not progress or len(ids) <= batch_size,
    ):
        batch = ids[i : i + batch_size]
        if post:
            syn.append(
                func(filter_in_dict=dict(post_pt_root_id=batch)).drop(
                    sv_cols, axis=1, errors="ignore"
                )
            )
        if pre:
            syn.append(
                func(filter_in_dict=dict(pre_pt_root_id=batch)).drop(
                    sv_cols, axis=1, errors="ignore"
                )
            )

    # Drop attrs to avoid issues when concatenating
    for df in syn:
        df.attrs = {}

    # Drop empty tables but make sure to keep at least one if all are empty
    if not all([t.empty for t in syn]):
        syn = [t for t in syn if not t.empty]

    # Combine results from batches
    syn = pd.concat(syn, axis=0, ignore_index=True)

    # Rename some of those columns
    syn.rename(
        {
            "post_pt_root_id": "post",
            "pre_pt_root_id": "pre",
            "post_pt_position_x": "post_x",
            "post_pt_position_y": "post_y",
            "post_pt_position_z": "post_z",
            "pre_pt_position_x": "pre_x",
            "pre_pt_position_y": "pre_y",
            "pre_pt_position_z": "pre_z",
            "idx": "id",  # this may exists if we made a join query
            "id_x": "id",  # this may exists if we made a join query
        },
        axis=1,
        inplace=True,
    )

    # Depending on how queries were batched, we need to drop duplicate synapses
    syn.drop_duplicates("id", inplace=True)

    if transmitters:
        syn.rename(
            {
                "ach": "acetylcholine",
                "glut": "glutamate",
                "oct": "octopamine",
                "ser": "serotonin",
                "da": "dopamine",
            },
            axis=1,
            inplace=True,
        )

    # Next we need to run some clean-up:
    # Drop below threshold connections
    if min_score:
        syn = syn[syn.cleft_score >= min_score]

    if clean:
        # Drop autapses
        syn = syn[syn.pre != syn.post]
        # Drop connections involving 0 (background, glia)
        syn = syn[(syn.pre != 0) & (syn.post != 0)]

    # Avoid copy warning
    if syn._is_view:
        syn = syn.copy()

    if neuropils:
        syn["neuropil"] = get_synapse_areas(syn["id"].values)
        syn["neuropil"] = syn.neuropil.astype("category")

    # Drop ID column
    # syn.drop('id', axis=1, inplace=True)

    if isinstance(x, navis.core.BaseNeuron):
        x = navis.NeuronList([x])

    if attach and isinstance(x, navis.NeuronList):
        for n in x:
            presyn = postsyn = pd.DataFrame([])
            add_cols = ["neuropil"] if neuropils else []
            if pre:
                cols = ["pre_x", "pre_y", "pre_z", "cleft_score", "post"] + add_cols
                presyn = syn.loc[syn.pre == np.int64(n.id), cols].rename(
                    {"pre_x": "x", "pre_y": "y", "pre_z": "z", "post": "partner_id"},
                    axis=1,
                )
                presyn["type"] = "pre"
            if post:
                cols = ["post_x", "post_y", "post_z", "cleft_score", "pre"] + add_cols
                postsyn = syn.loc[syn.post == np.int64(n.id), cols].rename(
                    {"post_x": "x", "post_y": "y", "post_z": "z", "pre": "partner_id"},
                    axis=1,
                )
                postsyn["type"] = "post"

            connectors = pd.concat((presyn, postsyn), axis=0, ignore_index=True)

            # Turn type column into categorical to save memory
            connectors["type"] = connectors["type"].astype("category")

            # If TreeNeuron, map each synapse to a node
            if isinstance(n, navis.TreeNeuron):
                tree = navis.neuron2KDTree(n, data="nodes")
                dist, ix = tree.query(connectors[["x", "y", "z"]].values)

                too_far = dist > 10_000
                if any(too_far) and clean:
                    connectors = connectors[~too_far].copy()
                    ix = ix[~too_far]

                connectors["node_id"] = n.nodes.node_id.values[ix]

                # Add an ID column for navis' sake
                connectors.insert(0, "connector_id", np.arange(connectors.shape[0]))

            n.connectors = connectors

    syn.attrs["materialization"] = materialization

    return syn




[docs]
@parse_neuroncriteria()
@inject_dataset(disallowed=["flat_630", "flat_571"])
def get_adjacency(
    sources,
    targets=None,
    materialization="auto",
    neuropils=None,
    filtered=True,
    min_score=None,
    batch_size=1000,
    *,
    dataset=None,
    progress=True,
):
    """Fetch adjacency matrix.

    Notes
    -----
    As of May 2023, CAVE provides "views" of materialized tables. This includes
    a view with neuron edges (as opposed to individual synaptic connections)
    which can be much faster to query. We will automatically use this view _if_:
     1. `filtered=True` (default is `True`)
     2. `min_score=None` or `min_score=50` (default is None)
     3. `neuropils=None` (default is `None`)
     4. `mat!='live'` (default is "auto" which can end up as "live")

    Parameters
    ----------
    sources :       int | list of int | Neuron/List | NeuronCriteria
                    Either FlyWire segment ID (i.e. root ID), a list thereof
                    or a Neuron/List. For neurons, the ``.id`` is assumed to be
                    the root ID. If you have a neuron (in FlyWire space) but
                    don't know its ID, use :func:`fafbseg.flywire.neuron_to_segments`
                    first.
    targets :       int | list of int | Neuron/List | NeuronCriteria, optional
                    Either FlyWire segment ID (i.e. root ID), a list thereof
                    or a Neuron/List. For neurons, the ``.id`` is assumed to be
                    the root ID. If you have a neuron (in FlyWire space) but
                    don't know its ID, use :func:`fafbseg.flywire.neuron_to_segments`
                    first. If ``None``, will assume ```targets = sources``.
    neuropils :     str | list of str, optional
                    Provide neuropil (e.g. ``'AL_R'``) or list thereof (e.g.
                    ``['AL_R', 'AL_L']``) to filter connectivity to these ROIs.
                    Prefix neuropil with a tilde (e.g. ``~AL_R``) to exclude it.
    filtered :      bool
                    Whether to use the filtered synapse table. Briefly, this
                    filter removes redundant and low confidence (<= 50 cleft score)
                    synapses. See also https://tinyurl.com/4j9v7t86 (links to
                    CAVE website).
    min_score :     int
                    Minimum "cleft score". Buhmann et al. used a threshold of 30
                    in their paper. However, for FlyWire analyses that threshold
                    was raised to 50 (see also `filtered`).
    batch_size :    int
                    Number of IDs to query per batch. Too large batches can
                    lead to truncated tables: currently individual queries do
                    not return more than 200_000 connections. If you see a
                    warning that this limit has been exceeded, decrease the
                    batch size!
    materialization : int | str, optional
                    Which materialization to query:
                     - 'auto' (default) tries to find the most recent
                       materialization version at which all the query IDs existed
                     - 'latest' uses the latest materialized table
                     - 'live' queries against the live data - this will be much slower!
                     - pass an integer (e.g. `447`) to use a specific materialization version
    dataset :       "public" | "production" | "sandbox", optional
                    Against which FlyWire dataset to query. If ``None`` will fall
                    back to the default dataset (see
                    :func:`~fafbseg.flywire.set_default_dataset`).

    Returns
    -------
    adjacency :     pd.DataFrame
                    Adjacency matrix. Rows (sources) and columns (targets) are
                    in the same order as input.

    See Also
    --------
    :func:`~fafbseg.flywire.get_connectivity`
                    Use this function to fetch all up- and/or downstream partners
                    for a set of neurons.

    Examples
    --------

    Get connections between given root IDs:

    >>> from fafbseg import flywire
    >>> adj = flywire.get_adjacency(sources=720575940631406673,
    ...                             targets=720575940603231916)
    Using materialization version 783.
    >>> adj
    target              720575940603231916
    source
    720575940631406673                   5

    """
    if isinstance(targets, type(None)):
        targets = sources

    if isinstance(materialization, str):
        if materialization not in ("latest", "live", "auto"):
            raise ValueError(
                '`materialization` must be "auto", "latest", "live" or '
                f'integer, got "{materialization}"'
            )
    elif not isinstance(materialization, int):
        raise ValueError(
            '`materialization` must be "auto", "latest", "live" or integer, '
            f'got "{type(materialization)}"'
        )

    # Parse root IDs
    sources = parse_root_ids(sources).astype(np.int64)
    targets = parse_root_ids(targets).astype(np.int64)
    both = np.unique(np.append(sources, targets))

    client = get_cave_client(dataset=dataset)

    # Check if IDs existed at this materialization
    if materialization == "latest":
        materialization = client.materialize.most_recent_version()

    if materialization == "auto":
        materialization = find_mat_version(both, dataset=dataset, verbose=progress)
    else:
        _check_ids(both, materialization=materialization, dataset=dataset)

    columns = ["pre_pt_root_id", "post_pt_root_id", "cleft_score", "id"]
    sv_cols = ["pre_pt_supervoxel_id", "post_pt_supervoxel_id"]

    if materialization == "live" and filtered:
        raise ValueError(
            "It is currently not possible to fetch filtered "
            "synapses in live queries. You can set `filtered=False` "
            "but please be aware that this will query the "
            "unfiltered synapse table. See docs for details."
        )
    elif materialization == "live":
        func = partial(
            retry(client.materialize.live_query),
            table=client.materialize.synapse_table,
            timestamp=dt.datetime.utcnow(),
            # nb there is a bug in CAVE which causes empty results if we don't
            # ask for supervoxels
            select_columns=columns + sv_cols,
        )
    elif filtered:
        has_view = "valid_connection_v2" in client.materialize.get_views(
            materialization
        )
        no_np = isinstance(neuropils, type(None))
        no_score_thresh = not min_score or min_score == 50
        if has_view & no_np & no_score_thresh:
            columns = ["pre_pt_root_id", "post_pt_root_id", "n_syn"]
            func = partial(
                retry(client.materialize.query_view),
                view_name="valid_connection_v2",
                select_columns=columns,
                materialization_version=materialization,
            )
        else:
            func = partial(
                retry(client.materialize.query_view),
                view_name="valid_synapses_nt_np_v6",
                materialization_version=materialization,
                split_positions=True,
                select_columns=columns,
            )
    else:
        func = partial(
            retry(client.materialize.query_table),
            table=client.materialize.synapse_table,
            materialization_version=materialization,
            select_columns=columns,
        )

    syn = []
    for i in trange(
        0,
        len(sources),
        batch_size,
        desc="Fetching adjacency",
        disable=not progress or len(sources) <= batch_size,
    ):
        source_batch = sources[i : i + batch_size]
        for k in range(0, len(targets), batch_size):
            target_batch = targets[k : k + batch_size]
            filter_in_dict = dict(
                post_pt_root_id=target_batch, pre_pt_root_id=source_batch
            )
            this = func(filter_in_dict=filter_in_dict)

            # We need to drop the .attrs (which contain meta data from queries)
            # Otherwise we run into issues when concatenating
            this.attrs = {}

            if not this.empty:
                syn.append(this.drop(
                    sv_cols, axis=1, errors="ignore"
                ))

    # Drop empty tables but make sure to keep at least one if all are empty
    if not all([t.empty for t in syn]):
        syn = [t for t in syn if not t.empty]

    # Combine results from batches
    if len(syn):
        syn = pd.concat(syn, axis=0, ignore_index=True)
    else:
        adj = pd.DataFrame(
            np.zeros((len(sources), len(targets))), index=sources, columns=targets
        )
        adj.index.name = "source"
        adj.columns.name = "target"
        return adj

    # Depending on how queries were batched, we need to drop duplicate synapses
    if "id" in syn.columns:
        syn.drop_duplicates("id", inplace=True)
    else:
        syn.drop_duplicates(
            ["pre_pt_root_id", "post_pt_root_id", "n_syn"], inplace=True
        )

    # Subset to the desired neuropils
    if not isinstance(neuropils, type(None)):
        neuropils = make_iterable(neuropils)

        if len(neuropils):
            filter_in = [n for n in neuropils if not n.startswith("~")]
            filter_out = [n[1:] for n in neuropils if n.startswith("~")]

            syn["neuropil"] = get_synapse_areas(syn["id"].values)
            syn["neuropil"] = syn.neuropil.astype("category")

            if filter_in:
                syn = syn[syn.neuropil.isin(filter_in)]
            if filter_out:
                syn = syn[~syn.neuropil.isin(filter_out)]

            if syn._is_view:
                syn = syn.copy()

    # Rename some of those columns
    syn.rename(
        {"post_pt_root_id": "post", "pre_pt_root_id": "pre", "n_syn": "weight"},
        axis=1,
        inplace=True,
    )

    # Next we need to run some clean-up:
    # Drop below threshold connections
    if min_score and "cleft_score" in syn.columns:
        syn = syn[syn.cleft_score >= min_score]

    # Aggregate
    if "weight" not in syn.columns:
        cn = syn.groupby(["pre", "post"], as_index=False).size()
    else:
        cn = syn
    cn.columns = ["source", "target", "weight"]

    # Pivot
    adj = cn.pivot(index="source", columns="target", values="weight").fillna(0)

    # Index to match order and add any missing neurons
    adj = adj.reindex(index=sources, columns=targets).fillna(0)

    return adj




[docs]
@parse_neuroncriteria()
@inject_dataset(disallowed=["flat_630", "flat_571"])
def get_connectivity(
    x,
    clean=True,
    style="simple",
    upstream=True,
    downstream=True,
    proofread_only=False,
    transmitters=False,
    neuropils=None,
    filtered=True,
    min_score=None,
    batch_size=30,
    materialization="auto",
    *,
    progress=True,
    dataset=None,
):
    """Fetch Buhmann et al. (2019) connectivity for given neuron(s).

    Notes
    -----
    As of May 2023, CAVE provides "views" of materialized tables. This includes
    a view with neuron edges (as opposed to individual synaptic connections)
    which can be much faster to query. We will automatically use use the
    view _if_:
     1. `filtered=True` (default is `False`)
     2. `min_score=None` or `min_score=50`
     3. `neuropils=None` (default is `None`)
     4. `mat!='live'` (default is "auto" which can end up as "live")

    Parameters
    ----------
    x :             int | list of int | Neuron/List | NeuronCriteria
                    Either a FlyWire root ID, a list thereof or a Neuron/List.
                    For neurons, the ``.id`` is assumed to be the root ID. If
                    you have a neuron (in FlyWire space) but don't know its ID,
                    use :func:`fafbseg.flywire.neuron_to_segments` first.
    clean :         bool
                    If True, we will perform some clean up of the connectivity
                    compared with the raw synapse information. Currently, we::
                     - drop autapses
                     - drop synapses from/to background (id 0)
    style :         "simple" | "catmaid"
                    Style of the returned table.
    upstream :      bool
                    Whether to fetch upstream connectivity of ```x``.
    downstream :    bool
                    Whether to fetch downstream connectivity of ```x``.
    proofread_only: bool
                    Whether to filter out root IDs that have not been proofread.
                    Query IDs (i.e. `x`) will never be excluded regardless of
                    proofreading status!
    transmitters :  bool
                    If True, will attach the best guess for the transmitter
                    for a given connection based on the predictions in Eckstein
                    et al (2020). IMPORTANT: the predictions are based solely on
                    the connections retrieved as part of this query which likely
                    represent only a fraction of each neuron's total synapses.
                    As such the predictions need to be taken with a grain
                    of salt - in particular for weak connections!
                    To get the "full" predictions see
                    :func:`fafbseg.flywire.predict_transmitter`.
    neuropils :     bool | str | list of str, optional
                    If True, will return edges broken down by neuropils. You can
                    also provide neuropil (e.g. ``'AL_R'``) or list thereof (e.g.
                    ``['AL_R', 'AL_L']``) to filter connectivity to these ROIs.
                    Prefix neuropil with a tilde (e.g. ``~AL_R``) to exclude it.
    filtered :      bool
                    Whether to use the filtered synapse table. Briefly, this
                    filter removes redundant and low confidence (<= 50 cleft score)
                    synapses. See also https://tinyurl.com/4j9v7t86 (links to
                    CAVE website).
    min_score :     int
                    Minimum "cleft score". Buhmann et al. used a threshold of 30
                    in their paper. However, for FlyWire analyses that threshold
                    was raised to 50 (see also `filtered`).
    batch_size :    int
                    Number of IDs to query per batch. Too large batches might
                    lead to truncated tables: currently individual queries can
                    not return more than 200_000 rows and you will see a warning
                    if that limit is exceeded.
    materialization : int | str, optional
                    Which materialization to query:
                     - 'auto' (default) tries to find the most recent
                       materialization version at which the query IDs co-exist
                     - 'latest' uses the latest materialized table
                     - 'live' queries against the live data - this will be much slower!
                     - pass an integer (e.g. `447`) to use a specific materialization version
    dataset :       "public" | "production" | "sandbox", optional
                    Against which FlyWire dataset to query. If ``None`` will fall
                    back to the default dataset (see
                    :func:`~fafbseg.flywire.set_default_dataset`).

    Returns
    -------
    pd.DataFrame
                Connectivity table.

    See Also
    --------
    :func:`~fafbseg.flywire.get_adjacency`
                    Use this function to fetch connections between two sets
                    of neurons.

    Examples
    --------

    Get connections from/to given root ID(s):

    >>> from fafbseg import flywire
    >>> edges = flywire.get_connectivity(720575940603231916)
    Using materialization version 783.
    >>> edges.head()
                    pre                post  weight
    0  720575940635945919  720575940603231916      83
    1  720575940620541349  720575940603231916      58
    2  720575940603231916  720575940629163931      50
    3  720575940603231916  720575940635945919      46
    4  720575940603231916  720575940646122804      42

    """
    if not upstream and not downstream:
        raise ValueError("`upstream` and `downstream` must not both be False")

    if style not in ("simple", "catmaid"):
        raise ValueError(f'`style` must be "simple" or "catmaid", got "{style}"')

    if style == "catmaid":
        if transmitters:
            raise ValueError('`style` must be "simple" when asking for transmitters')
        if neuropils is True:
            raise ValueError('`style` must be "simple" when asking for neuropils')

    if isinstance(materialization, str):
        if materialization not in ("latest", "live", "auto"):
            raise ValueError(
                '`materialization` must be "auto", "latest", "live" or '
                f'integer, got "{materialization}"'
            )
    elif not isinstance(materialization, int):
        raise ValueError(
            '`materialization` must be "auto", "latest", "live" or integer, '
            f'got "{type(materialization)}"'
        )

    # Parse root IDs
    ids = parse_root_ids(x)

    client = get_cave_client(dataset=dataset)

    # Check if IDs existed at this materialization
    if materialization == "latest":
        materialization = client.materialize.most_recent_version()

    if materialization == "auto":
        materialization = find_mat_version(ids, dataset=dataset, verbose=progress)
    else:
        _check_ids(ids, materialization=materialization, dataset=dataset)

    columns = ["pre_pt_root_id", "post_pt_root_id", "cleft_score", "id"]
    sv_cols = ["pre_pt_supervoxel_id", "post_pt_supervoxel_id"]

    if transmitters:
        columns += ["gaba", "ach", "glut", "oct", "ser", "da"]

    if materialization == "live" and filtered:
        raise ValueError(
            "It is currently not possible to fetch filtered "
            "synapses in live queries. You can set `filtered=False` "
            "but please be aware that this will query the "
            "unfiltered synapse table. See docs for details."
        )
    elif materialization == "live":
        func = partial(
            retry(client.materialize.live_query),
            table=client.materialize.synapse_table,
            timestamp=dt.datetime.utcnow(),
            select_columns=columns + sv_cols,
        )
    elif filtered:
        # Check if there is a view for valid connections
        has_view = "valid_connection_v2" in client.materialize.get_views(
            materialization
        )
        no_np = isinstance(neuropils, type(None))
        no_score_thresh = (not min_score) or (min_score == 50)
        # If all conditions are met, we can use the view
        if has_view & no_np & no_score_thresh:
            columns = ["pre_pt_root_id", "post_pt_root_id", "n_syn"]
            if transmitters:
                columns += ["gaba", "ach", "glut", "oct", "ser", "da"]
            func = partial(
                retry(client.materialize.query_view),
                view_name="valid_connection_v2",
                select_columns=columns,
                materialization_version=materialization,
            )
        # Otherwise we need to query the valid synapse view
        else:
            func = partial(
                retry(client.materialize.query_view),
                view_name="valid_synapses_nt_np_v6",
                materialization_version=materialization,
                split_positions=True,
                select_columns=columns,
            )
    else:
        func = partial(
            retry(client.materialize.query_table),
            table=client.materialize.synapse_table,
            materialization_version=materialization,
            select_columns=columns,
        )

    syn = []
    for i in trange(
        0,
        len(ids),
        batch_size,
        desc="Fetching connectivity",
        disable=not progress or len(ids) <= batch_size,
    ):
        batch = ids[i : i + batch_size]
        if upstream:
            syn.append(func(filter_in_dict=dict(post_pt_root_id=batch)))
        if downstream:
            syn.append(func(filter_in_dict=dict(pre_pt_root_id=batch)))

    # Some clean-up
    for df in syn:
        # Drop supervoxel columns (if they exist)
        df.drop(sv_cols, axis=1, errors="ignore", inplace=True)
        # Drop `attrs`` to avoid issues when concatenating
        df.attrs = {}

    # Drop empty tables but make sure to keep at least one if all are empty
    if not all([t.empty for t in syn]):
        syn = [t for t in syn if not t.empty]

    # Combine results from batches
    syn = pd.concat(syn, axis=0, ignore_index=True)

    # Depending on how queries were batched, we need to drop duplicate synapses
    if "id" in syn.columns:
        syn.drop_duplicates("id", inplace=True)
    else:
        syn.drop_duplicates(
            ["pre_pt_root_id", "post_pt_root_id", "n_syn"], inplace=True
        )

    # Subset to the desired neuropils
    if not isinstance(neuropils, type(None)):
        syn["neuropil"] = get_synapse_areas(syn["id"].values)
        syn["neuropil"] = syn.neuropil.astype("category")

        if not isinstance(neuropils, bool):
            neuropils = make_iterable(neuropils)

            if len(neuropils):
                filter_in = [n for n in neuropils if not n.startswith("~")]
                filter_out = [n[1:] for n in neuropils if n.startswith("~")]

                if filter_in:
                    syn = syn[syn.neuropil.isin(filter_in)]
                if filter_out:
                    syn = syn[~syn.neuropil.isin(filter_out)]

    # Rename some of those columns
    syn.rename(
        {
            "post_pt_root_id": "post",
            "pre_pt_root_id": "pre",
            "ach": "acetylcholine",
            "glut": "glutamate",
            "oct": "octopamine",
            "ser": "serotonin",
            "da": "dopamine",
            "n_syn": "weight",
        },
        axis=1,
        inplace=True,
    )

    # Next we need to run some clean-up:
    # Drop below threshold connections
    if min_score and "cleft_score" in syn.columns:
        syn = syn[syn.cleft_score >= min_score]

    if clean:
        # Drop autapses
        syn = syn[syn.pre != syn.post]
        # Drop connections involving 0 (background, glia)
        syn = syn[(syn.pre != 0) & (syn.post != 0)]

    # Turn into connectivity table
    if "weight" not in syn.columns:
        if neuropils is True and "neuropil" in syn.columns:
            cn_table = (
                syn.groupby(["pre", "post", "neuropil"], as_index=False)
                .size()
                .rename({"size": "weight"}, axis=1)
            )
        else:
            cn_table = (
                syn.groupby(["pre", "post"], as_index=False)
                .size()
                .rename({"size": "weight"}, axis=1)
            )
    else:
        cn_table = syn

    # Filter to proofread neurons only
    if proofread_only:
        all_ids = np.unique(cn_table[["pre", "post"]].values.flatten())
        is_pr = all_ids[is_proofread(all_ids, materialization=materialization)]

        # Make sure we don't drop our query neurons
        keep = np.append(is_pr, ids)

        cn_table = cn_table[cn_table.pre.isin(keep) & cn_table.post.isin(keep)]

    # Style
    if style == "catmaid":
        cn_table = catmaid_table(cn_table, query_ids=ids)
    else:
        cn_table = cn_table.copy()  # avoid setting on copy warning
        cn_table.sort_values("weight", ascending=False, inplace=True)
        cn_table.reset_index(drop=True, inplace=True)

    if transmitters:
        # Generate per-neuron predictions
        pred = collapse_nt_predictions(syn, single_pred=True, id_col="pre")

        cn_table["pred_nt"] = cn_table.pre.map(lambda x: pred.get(x, [None])[0])
        cn_table["pred_conf"] = cn_table.pre.map(lambda x: pred.get(x, [None, None])[1])

    cn_table.attrs["materialization"] = materialization

    return cn_table



@inject_dataset()
def get_supervoxel_synapses(
    x, pre=True, post=True, batch_size=300, progress=True, *, dataset=None
):
    """Fetch Buhmann et al. (2019) synapses for given supervoxels.

    Parameters
    ----------
    x :             int | list of int
                    Supervoxel IDs.
    pre :           bool
                    Whether to fetch presynapses for the given neurons.
    post :          bool
                    Whether to fetch postsynapses for the given neurons.
    batch_size :    int
                    Number of IDs to query per batch. Too large batches might
                    lead to truncated tables: currently individual queries can
                    not return more than 200_000 rows and you will see a warning
                    if that limit is exceeded.
    dataset :       "public" | "production" | "sandbox" | "flat_630", optional
                    Against which FlyWire dataset to query. If ``None`` will fall
                    back to the default dataset (see
                    :func:`~fafbseg.flywire.set_default_dataset`).

    Returns
    -------
    pandas.DataFrame

    """
    if not pre and not post:
        raise ValueError("`pre` and `post` must not both be False")

    # Parse supervoxel IDs
    ids = parse_root_ids(x)

    client = get_cave_client(dataset=dataset)

    columns = [
        "pre_pt_supervoxel_id",
        "post_pt_supervoxel_id",
        "cleft_score",
        "pre_pt_position",
        "post_pt_position",
        "id",
    ]

    func = partial(
        retry(client.materialize.query_table),
        table=client.materialize.synapse_table,
        split_positions=True,
        select_columns=columns,
    )

    syn = []
    for i in trange(
        0,
        len(ids),
        batch_size,
        desc="Fetching synapses",
        disable=not progress or len(ids) <= batch_size,
    ):
        batch = ids[i : i + batch_size]
        if post:
            syn.append(func(filter_in_dict=dict(post_pt_supervoxel_id=batch)))
        if pre:
            syn.append(func(filter_in_dict=dict(pre_pt_supervoxel_id=batch)))

    # Drop attrs to avoid issues when concatenating
    for df in syn:
        df.attrs = {}

    # Drop empty tables but make sure to keep at least one if all are empty
    if not all([t.empty for t in syn]):
        syn = [t for t in syn if not t.empty]

    # Combine results from batches
    syn = pd.concat(syn, axis=0, ignore_index=True)

    # Depending on how queries were batched, we need to drop duplicate synapses
    syn.drop_duplicates("id", inplace=True)

    # Rename some of those columns
    syn.rename(
        {
            "post_pt_position_x": "post_x",
            "post_pt_position_y": "post_y",
            "post_pt_position_z": "post_z",
            "pre_pt_position_x": "pre_x",
            "pre_pt_position_y": "pre_y",
            "pre_pt_position_z": "pre_z",
        },
        axis=1,
        inplace=True,
    )

    # Next we need to run some clean-up:
    # Drop below threshold connections
    # if min_score:
    #    syn = syn[syn.cleft_score >= min_score]

    # Avoid setting-on-copy warnings
    if syn._is_view:
        syn = syn.copy()

    return syn


@inject_dataset()
def synapse_contributions(x, *, dataset=None):
    """Return synapse contributions to given neuron."""

    # Grab synapses for this neuron
    syn = get_synapses(x)
    pre = syn[syn.pre == x]
    post = syn[syn.post == x]

    print(f"Neuron has {len(pre)} pre- and {len(post)} postsynapses")

    G = get_lineage_graph(x, user=True, size=True)

    data = []
    for n in navis.config.tqdm(G.nodes):
        pred = list(G.predecessors(n))

        # If this is from base segmentation
        if not pred:
            continue

        # If only one predecessor this came out of a split
        if len(pred) == 1:
            sv_added = 0
            sv_removed = G.nodes[n]["size"] - G.nodes[pred[0]]["size"]
            pre_added = post_added = -1
        # Two predecessors means this was the result of a merge
        elif len(pred) == 2:
            sizes = (G.nodes[pred[0]]["size"], G.nodes[pred[1]]["size"])
            smaller = np.argmin(sizes)
            sv_removed = 0
            sv_added = sizes[smaller]
            sv = roots_to_supervoxels(pred[smaller], dataset=dataset)[pred[smaller]]
            pre_added = pre.pre_pt_supervoxel_id.isin(sv).sum()
            post_added = post.post_pt_supervoxel_id.isin(sv).sum()
        else:
            raise ValueError(f"Unexpected number of predecessors for {n}: {len(pred)}")

        data.append(
            [
                n,
                G.nodes[n].get("user", "NA"),
                sv_added,
                sv_removed,
                pre_added,
                post_added,
            ]
        )

    # TODO
    # - for every merge operation, count only the number of supervoxels added that
    #   actually made it into the current root ID
    # - for every split operation, count only the supervoxels that weren't added
    #   back later on
    # - count every added/removed supervoxel only once

    # IDEAS
    # - use the largest original root ID as the point of reference

    return pd.DataFrame(
        data,
        columns=[
            "root_id",
            "user",
            "sv_added",
            "sv_removed",
            "pre_added",
            "post_added",
        ],
    )


def _check_ids(ids, materialization, dataset="production"):
    """Check IDs whether they existed at given materialization.

    Parameters
    ----------
    ids :               iterable
    materialization :   "live" | "latest" | int

    Returns
    -------
    None

    """
    client = get_cave_client(dataset=dataset)

    ids = np.asarray(ids)

    _is_latest_roots = retry(client.chunkedgraph.is_latest_roots)
    _get_timestamp = retry(client.materialize.get_timestamp)
    _get_root_timestamps = retry(client.chunkedgraph.get_root_timestamps)

    # Check if any of these root IDs are outdated
    if materialization == "live":
        not_latest = ids[~_is_latest_roots(ids)]
        if any(not_latest):
            print(
                f'Root ID(s) {", ".join(not_latest.astype(str))} are outdated '
                "and live connectivity might be inaccurrate."
            )
    else:
        if materialization == "latest":
            materialization = client.materialize.most_recent_version()

        # Is the root ID more recent than the materialization?
        ts_m = _get_timestamp(materialization)
        ts_r = _get_root_timestamps(ids)
        too_recent = ids[ts_r > ts_m]
        if any(too_recent):
            print(
                "Some root IDs are more recent than materialization "
                f"{materialization} and synapse/connectivity data will be "
                f'inaccurate:\n\n {", ".join(too_recent.astype(str))}\n\n'
                "You can either try mapping these IDs back in time or use"
                '`materialization="auto"`.'
            )

        # Those that aren't too young might be too old
        ids = ids[ts_r <= ts_m]
        if len(ids):
            # This only checks if these were the up to date roots at the given time
            # hence it doesn't tell us whether the root is too young or too old
            # but since we checked the too young roots before we can assume
            # the roots flagged here are too old
            not_latest = ids[~_is_latest_roots(ids, timestamp=ts_m)]
            if any(not_latest):
                print(
                    "Some root IDs were already outdated at materialization "
                    f"{materialization} and synapse/connectivity data will be "
                    f'inaccurrate:\n\n {", ".join(not_latest.astype(str))}\n\n'
                    "Try updating the root IDs using `flywire.update_ids` "
                    "or `flywire.supervoxels_to_roots` if you have supervoxel IDs,"
                    " or pick a different materialization version."
                )