Source code for fafbseg.synapses.synapses
# A collection of tools to interface with manually traced and autosegmented
# data in FAFB.
#
# Copyright (C) 2019 Philipp Schlegel
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
import navis
import os
import sqlite3
import networkx as nx
import numpy as np
import pandas as pd
from scipy.spatial import cKDTree
from tqdm.auto import tqdm
from .. import google
conn = None
__all__ = ['query_synapses', 'query_connections', 'get_neuron_synapses',
'get_neuron_synapses', 'assign_connectors']
def get_connection(filepath=None, force_reconnect=False):
"""Connect to SQL DB containg the synapses."""
global conn
# This prevents us from intializing many connections
if conn and not force_reconnect:
if filepath:
print('A connection to a database already exists. Call '
'get_connection() with `force_reconnect=True` to '
'force re-initialization.')
return conn
if not filepath:
filepath = os.environ.get('BUHMANN_SYNAPSE_DB', None)
if not filepath:
raise ValueError('Must provided filepath to SQL synapse database '
'either as `filepath` parameter or as '
'`BUHMANN_SYNAPSE_DB` environment variable.')
conn = sqlite3.connect(filepath)
return conn
[docs]def query_synapses(seg_ids, pre=True, post=True, score_thresh=30, ret='brief',
downcast=True, db=None):
"""Fetch synapses for given segment IDs.
Parameters
----------
seg_ids : int | list of int
Segment IDs to fetch synapses for.
pre/post : bool
Whether to fetch pre- and/or postsynapses.
score_thresh : int, optional
If provided will only return synapses with a cleft score of
this or higher. The default of 30 is used in the original
Buhmann et al paper.
ret : "brief" | "full"
If "full" will return all fields. If "brief" will omit some
of the less relevant fields.
downcast: bool
If True, will downcast some columns to a more reasonable
(and compact) datatype (usually int/float64 -> int/float32).
db : str, optional
Must point to SQL database containing the synapse data. If
not provided will look for a `BUHMANN_SYNAPSE_DB`
environment variable.
Return
------
pd.DataFrame
"""
assert ret in ('brief', 'full')
assert isinstance(score_thresh, (type(None), int, float))
conn = get_connection(db)
seg_ids = navis.utils.make_iterable(seg_ids)
# Turn into query string
seg_ids_str = f'{",".join(seg_ids.astype(str))}'
# Create query
if ret == 'brief':
cols = ['pre_x', 'pre_y', 'pre_z', 'post_x', 'post_y', 'post_z', 'scores',
'cleft_id', 'cleft_scores', 'segmentid_post', 'segmentid_pre']
else:
cols = ['*']
sel = f'SELECT {", ".join(cols)} from synlinks'
if pre and post:
where = f'WHERE (segmentid_pre IN ({seg_ids_str}) OR segmentid_post in ({seg_ids_str}))'
elif pre:
where = f'WHERE segmentid_pre IN ({seg_ids_str})'
elif post:
where = f'WHERE segmentid_post IN ({seg_ids_str})'
else:
raise ValueError('`pre` and `post` must not both be False')
if score_thresh:
where += f' AND cleft_scores >= {score_thresh}'
return _query_database(f'{sel} {where};', conn, downcast=downcast)
def _query_database(query, conn, downcast=True):
"""Query the synapse database."""
resp = pd.read_sql(query, conn)
# Fix some data types
# A lot of these come out at 64 bit but with the exception of
# segmentation IDs 32 bit is more than enough
DTYPES = {'pre_x': np.int32,
'pre_y': np.int32,
'pre_z': np.int32,
'post_x': np.int32,
'post_y': np.int32,
'post_z': np.int32,
'cleft_scores': np.int32,
'dist': np.float32,
'segmentid_post': np.int64,
'segmentid_pre': np.int64,
'scores': np.float32,
'clust_con_offset': np.int32,
'cleft_id': np.int32,
'prob_count': np.int32,
'prob_max': np.int32,
'prob_mean': np.int32,
'prob_min': np.int32,
'prob_sum': np.int32,
'offset': np.int32
}
if downcast:
to_conv = {k: v for k, v in DTYPES.items() if k in resp.columns}
resp = resp.astype(to_conv, errors='ignore')
return resp
[docs]def query_connections(pre_ids, post_ids, score_thresh=30, ret='brief',
downcast=True, db=None):
"""Fetch synaptic connections between given segment IDs.
Parameters
----------
pre_/post_ids : int | list of int
Pre- and postsynaptic segment IDs to search for.
score_thresh : int, optional
If provided will only return synapses with a cleft score of
this or higher. The default of 30 is used in the original
Buhmann et al paper.
ret : "brief" | "full"
If "full" will return all fields. If "brief" will omit some
of the less relevant fields.
downcast: bool
If True, will downcast some columns to a more reasonable
(and compact) datatype (usually int/float64 -> int/float32).
db : str, optional
Must point to SQL database containing the synapse data. If
not provided will look for a `BUHMANN_SYNAPSE_DB`
environment variable.
Return
------
pd.DataFrame
"""
assert ret in ('brief', 'full')
assert isinstance(score_thresh, (type(None), int, float))
conn = get_connection(db)
pre_ids = navis.utils.make_iterable(pre_ids)
post_ids = navis.utils.make_iterable(pre_ids)
# Turn into query string
pre_ids_str = f'{",".join(pre_ids.astype(str))}'
post_ids_str = f'{",".join(post_ids.astype(str))}'
# Create query
if ret == 'brief':
cols = ['pre_x', 'pre_y', 'pre_z', 'post_x', 'post_y', 'post_z', 'scores',
'cleft_id', 'cleft_scores', 'segmentid_post', 'segmentid_pre']
else:
cols = ['*']
sel = f'SELECT {", ".join(cols)} from synlinks'
where = f'WHERE (segmentid_pre IN ({pre_ids_str}) AND segmentid_post in ({post_ids_str}))'
if score_thresh:
where += f' AND cleft_scores >= {score_thresh}'
return _query_database(f'{sel} {where};', conn=conn, downcast=downcast)
[docs]def get_neuron_synapses(x, pre=True, post=True, collapse_connectors=False,
score_thresh=30, ol_thresh=2, dist_thresh=1000,
attach=True, drop_autapses=True, drop_duplicates=True,
db=None, verbose=True, ret='catmaid', progress=True):
"""Fetch synapses for a given neuron.
Works by:
1. Fetch segment IDs corresponding to a neuron
2. Fetch Buhmann et al. synapses associated with these segment IDs
3. Do some clean-up. See parameters for details.
Notes
-----
1. If ``collapse_connectors=False`` the ``connector_id`` column is a
simple enumeration and is effectively meaningless.
2. The x/y/z coordinates always correspond to the presynaptic site (like
with CATMAID connectors). These columns are renamed from "pre_{x|y|z}"
in the database.
3. Some of the clean-up assumes that the query neurons are unique neurons
and not fragments of the same neuron. If that is the case, you might be
better of running this function for each fragment individually.
Parameters
----------
x : navis.Neuron | pymaid.CatmaidNeuron | NeuronList
Neurons to fetch synapses for.
pre/post : bool
Whether to fetch pre- and/or postsynapses.
collapse_connectors : bool
If True, we will pool presynaptic connections into
CATMAID-like connectors. If False each row represents a
connections.
ol_thresh : int, optional
If provided, will required a minimum node overlap between
a neuron and a segment for that segment to be included
(see step 1 above).
score_thresh : int, optional
If provided will only return synapses with a cleft score of
this or higher.
dist_thresh : int
Synapses further away than the given distance [nm] from any
node in the neuron than given distance will be discarded.
This is always with respect to the cleft site
irrespective of whether our neuron is pre- or postsynaptic
to it.
attach : bool
If True, will attach synapses as `.connectors` to neurons.
If False, will return DataFrame with synapses.
drop_autapses : bool
If True, will drop synapses where pre- and postsynapse point
to the same neuron. Autapses are wrong in 99.9% of all cases
we've seen.
drop_duplicates : bool
If True, will merge synapses which connect the same
pair of pre-post segmentation IDs and are within
less than 2500nm.
db : str, optional
Must point to SQL database containing the synapse data. If
not provided will look for a `BUHMANN_SYNAPSE_DB`
environment variable.
ret : "catmaid" | "brief" | "full"
If "full" will return all synapse properties. If "brief"
will return more relevant subset. If "catmaid" will return
only CATMAID-like columns.
progress : bool
Whether to show progress bars or not.
Return
------
pd.DataFrame
Only if ``attach=False``.
"""
# This is just so we throw a DB exception early and not wait for fetching
# segment Ids first
_ = get_connection(db)
assert isinstance(x, (navis.BaseNeuron, navis.NeuronList))
assert ret in ("catmaid", "brief", "full")
if not isinstance(x, navis.NeuronList):
x = navis.NeuronList([x])
# Get segments for this neuron(s)
seg_ids = google.neuron_to_segments(x)
# Drop segments with overlap below threshold
if ol_thresh:
seg_ids = seg_ids.loc[seg_ids.max(axis=1) >= ol_thresh]
# We will make sure that every segment ID is only attributed to a single
# neuron
not_top = seg_ids.values != seg_ids.max(axis=1).values.reshape((seg_ids.shape[0], 1))
where_not_top = np.where(not_top)
if np.any(where_not_top[0]):
seg_ids.values[where_not_top] = None # do not change this to 0
# Fetch pre- and postsynapses associated with these segments
# It's cheaper to get them all in one go
syn = query_synapses(seg_ids.index.values,
score_thresh=score_thresh,
ret=ret if ret != 'catmaid' else 'brief',
db=None)
# Drop autapses - they are most likely wrong
if drop_autapses:
syn = syn[syn.segmentid_pre != syn.segmentid_post]
if drop_duplicates:
dupl_thresh = 250
# Deal with pre- and postsynapses separately
while True:
# Generate pairs of pre- and postsynaptic coordinates that are
# suspiciously close
pre_tree = cKDTree(syn[['pre_x', 'pre_y', 'pre_z']].values)
post_tree = cKDTree(syn[['post_x', 'post_y', 'post_z']].values)
pre_pairs = pre_tree.query_pairs(r=dupl_thresh)
post_pairs = post_tree.query_pairs(r=dupl_thresh)
# We will consider pairs for removal where both pre- OR postsynapse
# are close - this is easy to change via below operator
pairs = pre_pairs | post_pairs # union of both
pairs = np.array(list(pairs))
# For each pair check if they connect the same IDs
same_pre = syn.iloc[pairs[:, 0]].segmentid_pre.values == syn.iloc[pairs[:, 1]].segmentid_pre.values
same_post = syn.iloc[pairs[:, 0]].segmentid_post.values == syn.iloc[pairs[:, 1]].segmentid_post.values
same_cn = same_pre & same_post
# If no more pairs to collapse break
if same_cn.sum() == 0:
break
# Generate a graph from pairs
G = nx.Graph()
G.add_edges_from(pairs[same_cn])
# Find the minimum number of nodes we need to remove
# to separate the connectors
to_rm = []
for cn in nx.connected_components(G):
to_rm += list(nx.minimum_node_cut(nx.subgraph(G, cn)))
# Drop those nodes
syn = syn.drop(index=syn.index.values[to_rm])
# Reset index
syn.reset_index(drop=True, inplace=True)
if collapse_connectors:
assign_connectors(syn)
else:
# Make fake IDs
syn['connector_id'] = np.arange(syn.shape[0]).astype(np.int32)
# Now associate synapses with neurons
tables = []
for c in tqdm(seg_ids.columns,
desc='Proc. neurons',
disable=not progress or seg_ids.shape[1] == 1,
leave=False):
this_segs = seg_ids.loc[seg_ids[c].notnull(), c]
is_pre = syn.segmentid_pre.isin(this_segs.index.values)
is_post = syn.segmentid_post.isin(this_segs.index.values)
# At this point we might see the exact same connection showing up in
# `is_pre` and in `is_post`. This happens when we mapped both the
# pre- and the postsynaptic segment to this neuron - likely an error.
# In these cases we have to decide whether our neuron is truely pre-
# or postsynaptic. For this we will use the overlap counts:
# First find connections that would show up twice
is_dupl = is_pre & is_post
if any(is_dupl):
dupl = syn[is_dupl]
# Next get the overlap counts for the pre- and postsynaptic seg IDs
dupl_pre_ol = seg_ids.loc[dupl.segmentid_pre, c].values
dupl_post_ol = seg_ids.loc[dupl.segmentid_post, c].values
# We go for the one with more overlap
true_pre = dupl_pre_ol > dupl_post_ol
# Propagate that decision
is_pre[is_dupl] = true_pre
is_post[is_dupl] = ~true_pre
# Now get our synapses
this_pre = syn[is_pre]
this_post = syn[is_post]
# Keep only one connector per presynapse
# -> just like in CATMAID connector tables
# Postsynaptic connectors will still show up multiple times
if collapse_connectors:
this_pre = this_pre.drop_duplicates('connector_id')
# Combine pre- and postsynapses and keep track of the type
connectors = pd.concat([this_pre, this_post], axis=0).reset_index(drop=True)
connectors['type'] = 'post'
connectors.iloc[:this_pre.shape[0],
connectors.columns.get_loc('type')] = 'pre'
connectors['type'] = connectors['type'].astype('category')
# Rename columns such that x/y/z corresponds to presynaptic sites
connectors.rename({'pre_x': 'x', 'pre_y': 'y', 'pre_z': 'z'},
axis=1, inplace=True)
# For CATMAID-like connector tables subset to relevant columns
if ret == 'catmaid':
connectors = connectors[['connector_id', 'x', 'y', 'z',
'cleft_scores', 'type']].copy()
# Map connectors to nodes
# Note that this is where we enforce `dist_thresh`
neuron = x.idx[c]
tree = navis.neuron2KDTree(neuron)
dist, ix = tree.query(connectors[['x', 'y', 'z']].values,
distance_upper_bound=dist_thresh)
# Drop far away connectors
connectors = connectors.loc[dist < np.inf]
# Assign node IDs
connectors['node_id'] = neuron.nodes.iloc[ix[dist < np.inf]].node_id.values
# Somas can end up having synapses, which we know is wrong and is
# relatively easy to fix
if np.any(neuron.soma):
somata = navis.utils.make_iterable(neuron.soma)
s_locs = neuron.nodes.loc[neuron.nodes.node_id.isin(somata),
['x', 'y', 'z']].values
# Find all nodes within 2 micron around the somas
soma_node_ix = tree.query_ball_point(s_locs, r=2000)
soma_node_ix = [n for l in soma_node_ix for n in l]
soma_node_id = neuron.nodes.iloc[soma_node_ix].node_id.values
# Drop connectors attached to these soma nodes
connectors = connectors[~connectors.node_id.isin(soma_node_id)]
if attach:
neuron.connectors = connectors.reset_index(drop=True)
else:
connectors['neuron'] = neuron.id # do NOT change the type of this
tables.append(connectors)
if not attach:
connectors = pd.concat(tables, axis=0, sort=True).reset_index(drop=True)
return connectors
def get_neuron_connections(sources, targets=None, agglomerate=True,
score_thresh=30, ol_thresh=5, dist_thresh=2000,
drop_duplicates=True, drop_autapses=True, db=None,
verbose=True):
"""Fetch connections between sets of neurons.
Works by:
1. Fetch segment IDs corresponding to given neurons
2. Fetch Buhmann et al. synaptic connections between them
3. Do some clean-up. See parameters for details - defaults are those used
by Buhmann et al
Parameters
----------
sources : navis.Neuron | pymaid.CatmaidNeuron | NeuronList
Presynaptic neurons to fetch connections for.
targets : navis.Neuron | pymaid.CatmaidNeuron | NeuronList | None
Postsynaptic neurons to fetch connections for. If ``None``,
``targets = sources``.
agglomerate : bool
If True, will agglomerate connectivity by ID and return a
weighted edge list. If False, will return a list of
individual synapses.
ol_thresh : int, optional
If provided, will required a minimum node overlap between
a neuron and a segment for that segment to be included
(see step 1 above).
score_thresh : int, optional
If provided will only return synapses with a cleft score of
this or higher.
dist_thresh : int
Synapses further away than the given distance [nm] from any
node in the neuron than given distance will be discarded.
drop_duplicates : bool
If True, will merge synapses which connect the same
pair of pre-post segmentation IDs and are within
less than 250nm.
drop_autapses : bool
If True, will automatically drop autapses.
db : str, optional
Must point to SQL database containing the synapse data. If
not provided will look for a ```BUHMANN_SYNAPSE_DB``
environment variable.
Return
------
pd.DataFrame
Either edge list or list of synapses - see ``agglomerate``
parameter.
"""
# This is just so we throw a DB exception early and not wait for fetching
# segment Ids first
_ = get_connection(db)
assert isinstance(sources, (navis.BaseNeuron, navis.NeuronList))
if isinstance(targets, type(None)):
targets = sources
assert isinstance(targets, (navis.BaseNeuron, navis.NeuronList))
if not isinstance(sources, navis.NeuronList):
sources = navis.NeuronList([sources])
if not isinstance(targets, navis.NeuronList):
targets = navis.NeuronList([targets])
# Get segments for this neuron(s)
unique_neurons = (sources + targets).remove_duplicates(key='id')
seg_ids = google.neuron_to_segments(unique_neurons)
# Drop segments with overlap below threshold
if ol_thresh:
seg_ids = seg_ids.loc[seg_ids.max(axis=1) >= ol_thresh]
# We need to make sure that every segment ID is only attributed to a single
# neuron
is_top = seg_ids.values != seg_ids.max(axis=1).values.reshape((seg_ids.shape[0], 1))
where_top = np.where(is_top)
seg_ids.values[where_top] = 0 # do not change this to None
pre_ids = seg_ids.loc[seg_ids[sources.id].max(axis=1) > 0].index.values
post_ids = seg_ids.loc[seg_ids[targets.id].max(axis=1) > 0].index.values
# Fetch pre- and postsynapses associated with these segments
# It's cheaper to get them all in one go
syn = query_connections(pre_ids, post_ids,
score_thresh=score_thresh,
db=None)
# Now associate synapses with neurons
seg2neuron = dict(zip(seg_ids.index.values,
seg_ids.columns[np.argmax(seg_ids.values, axis=1)]))
syn['id_pre'] = syn.segmentid_pre.map(seg2neuron)
syn['id_post'] = syn.segmentid_post.map(seg2neuron)
# Let the clean-up BEGIN!
# First drop nasty autapses
if drop_autapses:
syn = syn[syn.id_pre != syn.id_post]
# Next drop synapses far away from our neurons
if dist_thresh:
syn['pre_close'] = False
syn['post_close'] = False
for id in np.unique(syn[['id_pre', 'id_post']].values.flatten()):
neuron = unique_neurons.idx[id]
tree = navis.neuron2KDTree(neuron)
is_pre = syn.id_pre == id
if np.any(is_pre):
dist, ix = tree.query(syn.loc[is_pre, ['pre_x', 'pre_y', 'pre_z']].values,
distance_upper_bound=dist_thresh)
syn.loc[is_pre, 'pre_close'] = dist < float('inf')
is_post = syn.id_post == id
if np.any(is_post):
dist, ix = tree.query(syn.loc[is_post, ['post_x', 'post_y', 'post_z']].values,
distance_upper_bound=dist_thresh)
syn.loc[is_post, 'post_close'] = dist < float('inf')
# Drop connections where either pre- or postsynaptic site are too far
# away from the neuron
syn = syn[syn.pre_close & syn.post_close]
# Drop duplicate connections, i.e. connections that connect the same pre-
# and postsynaptic segmentation ID and are within a distance of 250nm
dupl_thresh = 250
if drop_duplicates:
# We are dealing with this from a presynaptic perspective
while True:
pre_tree = cKDTree(syn[['pre_x', 'pre_y', 'pre_z']].values)
pairs = pre_tree.query_pairs(r=dupl_thresh, output_type='ndarray')
same_pre = syn.iloc[pairs[:, 0]].segmentid_pre.values == syn.iloc[pairs[:, 1]].segmentid_pre.values
same_post = syn.iloc[pairs[:, 0]].segmentid_post.values == syn.iloc[pairs[:, 1]].segmentid_post.values
same_cn = same_pre & same_post
# If no more pairs to collapse break
if same_cn.sum() == 0:
break
# Generate a graph from pairs
G = nx.Graph()
G.add_edges_from(pairs[same_cn])
# Find the minimum number of nodes we need to remove
# to separate the connectors
to_rm = []
for cn in nx.connected_components(G):
to_rm += list(nx.minimum_node_cut(nx.subgraph(G, cn)))
syn = syn.drop(index=syn.index.values[to_rm])
if agglomerate:
edges = syn.groupby(['id_pre', 'id_post'],
as_index=False).cleft_id.count()
edges.rename({'cleft_id': 'weight'}, axis=1, inplace=True)
edges.sort_values('weight', ascending=False, inplace=True)
return edges.reset_index(drop=True)
return syn
[docs]def assign_connectors(synapses, max_dist=300):
"""Collapse synapses by presynaptic connectors.
1. Iterate over each unique presynaptic segment ID and ...
2. Form pairs of presynapses that are within ``max_dist``
3. Use these pairs to create a network graph
4. Break graph into connected components
3. Give a unique connector ID to all synapses in a connected component
Parameters
----------
synapse : pandas.DataFrame
Must contains ['pre_x', 'pre_y', 'pre_z'] or
column.
max_dist : int
Max distance at which two presynaptic locations may be
collapsed.
Returns
-------
None
Adds a `connector_id` column to DataFrame.
"""
assert isinstance(synapses, pd.DataFrame)
if all(np.isin(['pre_x', 'pre_y', 'pre_z'], synapses.columns)):
loc_cols = ['pre_x', 'pre_y', 'pre_z']
elif all(np.isin(['x', 'y', 'z'], synapses.columns)):
loc_cols = ['x', 'y', 'z']
else:
raise ValueError('Need either pre_x/pre_y/pre_z or x/y/z columns.')
# Iterate over presynaptic IDs
cn_id_counter = 1
synapses['connector_id'] = None
for sid in tqdm(synapses.segmentid_pre.unique(),
leave=False,
desc='Collapsing synapses'):
this_sid = synapses.segmentid_pre == sid
this = synapses[this_sid]
if this.shape[0] == 1:
synapses.loc[this_sid, 'connector_id'] = cn_id_counter
cn_id_counter += 1
continue
# Build KDTree and generate pairs
this_locs = this[loc_cols].values
tree = cKDTree(this_locs)
pairs = tree.query_pairs(r=max_dist)
# Generate graph from pairs
G = nx.Graph()
G.add_nodes_from(np.arange(this.shape[0])) # this is for lone synapses
G.add_edges_from(pairs)
this_cn_ids = np.zeros(this.shape[0])
for cc in nx.connected_components(G):
this_cn_ids[list(cc)] = cn_id_counter
cn_id_counter += 1
synapses.loc[this_sid, 'connector_id'] = this_cn_ids
synapses['connector_id'] = synapses.connector_id.astype(np.int32)