From 69f6f3fd7a6f6e14ad38d6fe83e56aa8190ad64f Mon Sep 17 00:00:00 2001 From: Eric Hunsberger Date: Wed, 28 Oct 2020 10:01:18 -0400 Subject: [PATCH 01/11] Speed up block splitting Also minor speedup to hardware build --- nengo_loihi/block.py | 3 +- nengo_loihi/builder/split_blocks.py | 52 +++++++++++++++++++---------- nengo_loihi/hardware/builder.py | 6 ++-- 3 files changed, 40 insertions(+), 21 deletions(-) diff --git a/nengo_loihi/block.py b/nengo_loihi/block.py index c4b98ed05..430a91892 100644 --- a/nengo_loihi/block.py +++ b/nengo_loihi/block.py @@ -517,13 +517,14 @@ def bits_per_axon(self, n_weights): synapse_idx_bits = 4 n_synapses_bits = 6 + bits_per_memunit = 64 bits = 0 synapses_per_block = self.n_synapses + 1 for i in range(0, n_weights, synapses_per_block): n = min(n_weights - i, synapses_per_block) bits_i = n * bits_per_weight + synapse_idx_bits + n_synapses_bits # round up to nearest memory unit - bits_i = -64 * (-bits_i // 64) + bits_i = -bits_per_memunit * (-bits_i // bits_per_memunit) bits += bits_i return bits diff --git a/nengo_loihi/builder/split_blocks.py b/nengo_loihi/builder/split_blocks.py index 3e1d5a0cc..d0ba31435 100644 --- a/nengo_loihi/builder/split_blocks.py +++ b/nengo_loihi/builder/split_blocks.py @@ -70,7 +70,7 @@ def ceil_div(a, b): return -((-a) // b) -def split_model(model): # noqa: C901 +def split_model(model, validate=1): # noqa: C901 """Split blocks in the given model that exceed the hardware constraints. Will split any block that has more than the allowable number of compartments, @@ -85,6 +85,8 @@ def split_model(model): # noqa: C901 ---------- model : `nengo_loihi.builder.Model` The model whose blocks should be split. + validate : int + Level of validation to perform: 0 = none, 1 = minimal, 2 = maximal Returns ------- @@ -106,7 +108,7 @@ def split_model(model): # noqa: C901 synapse_map = {} for old_block in model.blocks: - new_blocks = split_block(old_block, model.block_shapes) + new_blocks = split_block(old_block, model.block_shapes, validate=validate) block_map[old_block] = new_blocks if len(new_blocks) == 1: @@ -117,7 +119,9 @@ def split_model(model): # noqa: C901 else: # break apart synapses for old_synapse in old_block.synapses: - new_synapse_axons = split_synapse(old_block, old_synapse, new_blocks) + new_synapse_axons = split_synapse( + old_block, old_synapse, new_blocks, validate=validate + ) synapse_map[old_synapse] = new_synapse_axons for old_block in model.blocks: @@ -127,7 +131,7 @@ def split_model(model): # noqa: C901 split_input_axons(input, block_map, synapse_map) for probe in model.probes: - split_probe(probe, block_map, synapse_map) + split_probe(probe, block_map, synapse_map, validate=validate) new_blocks = [block for group in block_map.values() for block in group] @@ -145,7 +149,7 @@ def split_model(model): # noqa: C901 return block_map -def split_probe(probe, block_map, synapse_map): +def split_probe(probe, block_map, synapse_map, validate=1): """Modify probe in place to target new blocks""" assert len(probe.target) == len(probe.slice) == len(probe.weights) == 1 old_block = probe.target[0] @@ -205,7 +209,8 @@ def split_probe(probe, block_map, synapse_map): ids = np.array([i for ii in ids for i in ii]) assert ids.shape == old_comp_ids.shape - assert np.array_equal(np.unique(ids), old_comp_ids) + if validate >= 1: + assert np.array_equal(np.unique(ids), old_comp_ids) if is_transformed or np.array_equal(ids, old_comp_ids): # weighted probes don't need reindexing because summed outputs are ordered @@ -319,7 +324,7 @@ def split_axon(old_axon, old_axon_idxs, old_atoms, new_synapses): return new_axons -def split_block(old_block, block_shapes): +def split_block(old_block, block_shapes, validate=1): """Break a block apart into smaller blocks, each able to fit on one core""" n_compartments = old_block.compartment.n_compartments n_in_axons = sum(synapse.n_axons for synapse in old_block.synapses) @@ -361,7 +366,8 @@ def split_block(old_block, block_shapes): assert len(new_block_inds) > 0 if len(new_block_inds) == 1: # if block can fit on one core, just return the current block - assert new_block_inds[0].set == set(range(n_compartments)) + if validate >= 1: + assert new_block_inds[0].set == set(range(n_compartments)) new_blocks = [old_block] return OrderedDict(zip(new_blocks, new_block_inds)) @@ -421,7 +427,7 @@ def split_block(old_block, block_shapes): return OrderedDict(zip(new_blocks, new_block_inds)) -def split_synapse(old_block, old_synapse, new_blocks): +def split_synapse(old_block, old_synapse, new_blocks, validate=1): """Break a synapse apart to work with new blocks Parameters @@ -462,9 +468,10 @@ def split_synapse(old_block, old_synapse, new_blocks): for axon_idx in range(old_synapse.n_axons): weight_idx = old_synapse.axon_weight_idx(axon_idx) indices = old_synapse.indices[weight_idx] - assert all( - np.array_equal(i, indices[0]) for i in indices[1:] - ), "All atoms must target same indices" + if validate >= 1: + assert all( + np.array_equal(i, indices[0]) for i in indices[1:] + ), "All atoms must target same indices" indices = indices[0] base = old_synapse.axon_compartment_base(axon_idx) @@ -514,6 +521,7 @@ def split_synapse(old_block, old_synapse, new_blocks): block_comp_ids, axon_overlaps, axon_ids, + validate=validate, ) logger.info( @@ -530,7 +538,13 @@ def split_synapse(old_block, old_synapse, new_blocks): def set_new_synapse_weights( - old_synapse, old_input_axons, new_synapse, block_comp_ids, axon_overlaps, axon_ids + old_synapse, + old_input_axons, + new_synapse, + block_comp_ids, + axon_overlaps, + axon_ids, + validate=1, ): has_shared_weights = old_synapse.axon_to_weight_map is not None @@ -543,7 +557,8 @@ def set_new_synapse_weights( new_axon_compartment_bases = [] compartment_map = dict(zip(block_comp_ids, range(len(block_comp_ids)))) - new_block_comp_idxs = IndicesList(range(len(block_comp_ids))) + if validate >= 2: + new_block_comp_idxs = IndicesList(range(len(block_comp_ids))) # iterate over all old axon ids that will also input to this new synapse for old_axon_id in axon_ids: @@ -557,7 +572,7 @@ def set_new_synapse_weights( valid_comp_ids = old_axon_comp_ids else: i_valid = np.array( - [i in block_comp_ids for i in old_axon_comp_ids], dtype=bool + [i in block_comp_ids.set for i in old_axon_comp_ids.flat], dtype=bool ) ww = old_weights[:, i_valid] ii = old_indices[:, i_valid] @@ -599,8 +614,11 @@ def set_new_synapse_weights( weight_idx_map[key] = len(weights) weights.append(ww) indices.append(new_ii) - assert all(new_base + i in new_block_comp_idxs for i in new_ii.flat) - else: + + if validate >= 2: + check_inds = new_base + new_ii + assert set(check_inds.flat).issubset(new_block_comp_idxs.set) + elif validate >= 2: # we have these weights/indices in memory, double check they're the same weight_idx = weight_idx_map[key] assert np.array_equal(ww, weights[weight_idx]) diff --git a/nengo_loihi/hardware/builder.py b/nengo_loihi/hardware/builder.py index d09dbba06..21799b93d 100644 --- a/nengo_loihi/hardware/builder.py +++ b/nengo_loihi/hardware/builder.py @@ -296,14 +296,14 @@ def build_block(nxsdk_core, core, block, compartment_idxs, ax_range): logger.debug("Building %s on core.id=%d", block, nxsdk_core.id) - for i, bias in enumerate(block.compartment.bias): - bman, bexp = bias_to_manexp(bias) + bman, bexp = bias_to_manexp(block.compartment.bias) + for i, _ in enumerate(bman): icomp = core.compartment_cfg_idxs[block][i] ivth = core.vth_cfg_idxs[block][i] ii = compartment_idxs[i] nxsdk_core.cxCfg[ii].configure( - bias=bman, biasExp=bexp, vthProfile=ivth, cxProfile=icomp + bias=bman[i], biasExp=bexp[i], vthProfile=ivth, cxProfile=icomp ) phasex = "phase%d" % (ii % 4,) From 183c872354d90b76739595c64850499c8c4a89f6 Mon Sep 17 00:00:00 2001 From: Eric Hunsberger Date: Wed, 28 Oct 2020 10:01:53 -0400 Subject: [PATCH 02/11] Explicitly test q0 and current overflow These were being tested before, it just wasn't evident/assured. --- nengo_loihi/emulator/tests/test_interface.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/nengo_loihi/emulator/tests/test_interface.py b/nengo_loihi/emulator/tests/test_interface.py index e1d5659b9..fd11d5750 100644 --- a/nengo_loihi/emulator/tests/test_interface.py +++ b/nengo_loihi/emulator/tests/test_interface.py @@ -81,8 +81,9 @@ def test_uv_overflow(n_axons, plt, allclose, monkeypatch): assert EmulatorInterface.strict # Tests should be run in strict mode monkeypatch.setattr(EmulatorInterface, "strict", False) + overflow_var = "q0" if n_axons == 1000 else "current" with EmulatorInterface(model) as emu: - with pytest.warns(UserWarning): + with pytest.warns(UserWarning, match=f"Overflow in {overflow_var}"): emu.run_steps(nt) emu_u = emu.collect_probe_output(probe_u) emu_v = emu.collect_probe_output(probe_v) From 805208055ec43e60c62715ea6198ae264e31ee25 Mon Sep 17 00:00:00 2001 From: Eric Hunsberger Date: Tue, 2 Feb 2021 16:18:50 -0500 Subject: [PATCH 03/11] Fix r in decay_magnitude docstring --- nengo_loihi/builder/discretize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nengo_loihi/builder/discretize.py b/nengo_loihi/builder/discretize.py index 9ca043e09..7e1158560 100644 --- a/nengo_loihi/builder/discretize.py +++ b/nengo_loihi/builder/discretize.py @@ -146,7 +146,7 @@ def decay_magnitude(decay, x0=2 ** 21, bits=12, offset=0): x_i = floor(r x_{i-1}) - where ``r = (2**bits - offset - decay)``. + where ``r = (2**bits - offset - decay) / 2**bits``. To simulate the effects of rounding in decay, we subtract an expected loss due to rounding (``q``) each iteration. Our estimated series is therefore:: From d131b7e05b9276f1f0cf67513b57d8da04463462 Mon Sep 17 00:00:00 2001 From: Eric Hunsberger Date: Thu, 29 Oct 2020 11:15:33 -0400 Subject: [PATCH 04/11] Properly pass dt for preset DecodeNeurons --- CHANGES.rst | 2 ++ nengo_loihi/decode_neurons.py | 24 ++++++++++++++---------- nengo_loihi/tests/test_decode_neurons.py | 17 ++++++++++++++--- 3 files changed, 30 insertions(+), 13 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 1b98df241..a26564804 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -44,10 +44,12 @@ Release history is deleted. (`#312`_) - Fixed probe filters such that multiple ``Simulator.run`` calls now results in the same probe data as a single call of equivalent length. (`#271`_, `#303`_) +- Fixed handling of ``dt`` within ``DecodeNeurons`` for ``dt != 0.001``. (`#309`_) .. _#271: https://github.com/nengo/nengo-loihi/issues/271 .. _#289: https://github.com/nengo/nengo-loihi/pull/289 .. _#303: https://github.com/nengo/nengo-loihi/pull/303 +.. _#309: https://github.com/nengo/nengo-loihi/pull/309 .. _#312: https://github.com/nengo/nengo-loihi/pull/312 .. _#317: https://github.com/nengo/nengo-loihi/pull/317 .. _#320: https://github.com/nengo/nengo-loihi/pull/320 diff --git a/nengo_loihi/decode_neurons.py b/nengo_loihi/decode_neurons.py index a1aeb9271..4459b4dbf 100644 --- a/nengo_loihi/decode_neurons.py +++ b/nengo_loihi/decode_neurons.py @@ -261,6 +261,10 @@ class Preset5DecodeNeurons(OnOffDecodeNeurons): nengo-loihi-sandbox/utils/interneuron_unidecoder_design.py """ + # TODO: why does this scale factor help? Found it empirically in + # test_decode_neurons.test_add_inputs (see there for a description) + scale_factor = 1.05 + def __init__(self, dt=0.001, rate=None): super().__init__(pairs_per_dim=5, dt=dt, rate=rate) @@ -270,14 +274,12 @@ def __init__(self, dt=0.001, rate=None): gain, bias = self.neuron_type.gain_bias(max_rates, intercepts) target_point = 0.85 - target_rate = np.sum(self.neuron_type.rates(target_point, gain, bias)) - self.scale = 1.08 * target_point / (self.dt * target_rate) - # ^ TODO: why does this 1.08 factor help? found it empirically in - # test_decode_neurons.test_add_inputs + target_rate = np.sum(self.neuron_type.rates(target_point, gain, bias, dt=dt)) + self.scale = self.scale_factor * target_point / (self.dt * target_rate) + # repeat gains/biases for on/off neurons self.gain = gain.repeat(2) self.bias = bias.repeat(2) - # ^ repeat for on/off neurons def __str__(self): return "%s(dt=%0.3g, rate=%0.3g)" % (type(self).__name__, self.dt, self.rate) @@ -290,6 +292,10 @@ class Preset10DecodeNeurons(OnOffDecodeNeurons): nengo-loihi-sandbox/utils/interneuron_unidecoder_design.py """ + # TODO: why does this scale factor help? Found it empirically in + # test_decode_neurons.test_add_inputs (see there for a description) + scale_factor = 1.05 + def __init__(self, dt=0.001, rate=None): super().__init__(pairs_per_dim=10, dt=dt, rate=rate) @@ -300,14 +306,12 @@ def __init__(self, dt=0.001, rate=None): gain, bias = self.neuron_type.gain_bias(max_rates, intercepts) target_point = 1.0 - target_rate = np.sum(self.neuron_type.rates(target_point, gain, bias)) - self.scale = 1.05 * target_point / (self.dt * target_rate) - # ^ TODO: why does this 1.05 factor help? found it empirically in - # test_decode_neurons.test_add_inputs + target_rate = np.sum(self.neuron_type.rates(target_point, gain, bias, dt=dt)) + self.scale = self.scale_factor * target_point / (self.dt * target_rate) + # repeat gains/biases for on/off neurons self.gain = gain.repeat(2) self.bias = bias.repeat(2) - # ^ repeat for on/off neurons def __str__(self): return "%s(dt=%0.3g, rate=%0.3g)" % (type(self).__name__, self.dt, self.rate) diff --git a/nengo_loihi/tests/test_decode_neurons.py b/nengo_loihi/tests/test_decode_neurons.py index 5ecf9dbae..b58a83468 100644 --- a/nengo_loihi/tests/test_decode_neurons.py +++ b/nengo_loihi/tests/test_decode_neurons.py @@ -23,6 +23,15 @@ ], ) def test_add_inputs(decode_neurons, tolerance, Simulator, seed, plt): + """Test the addition of two inputs with DecodeNeurons. + + Note: This test forms the basis for the scale factors for Preset5DecodeNeurons + and Preset10DecodeNeurons. It is unclear exactly why these scale factors help. + The best values depend on the exact inputs below, as well as the seed used for + this test. More testing is needed to find optimal scale factors, or (ideally) + get rid of them completely if we can better understand the underlying mechanics. + """ + sim_time = 2.0 pres_time = sim_time / 4 eval_time = sim_time / 8 @@ -36,6 +45,8 @@ def test_add_inputs(decode_neurons, tolerance, Simulator, seed, plt): {t: stim_values[i][1] for i, t in enumerate(stim_times)} ) + probe_solver = nengo.solvers.LstsqL2nz(reg=0.01) + with nengo.Network(seed=seed) as model: stim_a = nengo.Node(stim_fn_a) stim_b = nengo.Node(stim_fn_b) @@ -54,9 +65,9 @@ def test_add_inputs(decode_neurons, tolerance, Simulator, seed, plt): stim_synapse = out_synapse.combine(nengo.Alpha(0.005)).combine( nengo.Alpha(0.005) ) - p_stim_a = nengo.Probe(stim_a, synapse=stim_synapse) - p_stim_b = nengo.Probe(stim_b, synapse=stim_synapse) - p_c = nengo.Probe(c, synapse=out_synapse) + p_stim_a = nengo.Probe(stim_a, synapse=stim_synapse, solver=probe_solver) + p_stim_b = nengo.Probe(stim_b, synapse=stim_synapse, solver=probe_solver) + p_c = nengo.Probe(c, synapse=out_synapse, solver=probe_solver) build_model = Model() build_model.decode_neurons = decode_neurons From fe7c03ebafaadb9703744b5595d5784027daa768 Mon Sep 17 00:00:00 2001 From: Eric Hunsberger Date: Wed, 4 Nov 2020 12:25:07 -0500 Subject: [PATCH 05/11] Use nengo.rc.rc.float_dtype, reduce memory - Respect nengo.rc.rc.float_dtype where we can - Use int32 in a number of places to reduce memory --- CHANGES.rst | 3 +++ nengo_loihi/block.py | 14 +++++++++----- nengo_loihi/builder/connection.py | 22 +++++++++++++++------- nengo_loihi/builder/ensemble.py | 18 ++++++++++++++---- nengo_loihi/conv.py | 14 +++++++------- 5 files changed, 48 insertions(+), 23 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index a26564804..923443bdb 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -37,6 +37,8 @@ Release history - Build errors specify the associated objects, making them easier to debug. (`#289`_) - Deobfuscated NxSDK API calls. (`#320`_) +- The builder now respects the `precision.bits`_ attribute in ``nengorc`` files, + allowing for reduced-precision builds to save memory. (`#309`_) **Fixed** @@ -53,6 +55,7 @@ Release history .. _#312: https://github.com/nengo/nengo-loihi/pull/312 .. _#317: https://github.com/nengo/nengo-loihi/pull/317 .. _#320: https://github.com/nengo/nengo-loihi/pull/320 +.. _precision.bits: https://www.nengo.ai/nengo/nengorc.html#configuration-options 1.0.0 (January 20, 2021) ======================== diff --git a/nengo_loihi/block.py b/nengo_loihi/block.py index 430a91892..ddbee993b 100644 --- a/nengo_loihi/block.py +++ b/nengo_loihi/block.py @@ -154,19 +154,21 @@ class Compartment: def __init__(self, n_compartments, label=None): self.n_compartments = n_compartments self.label = label + # dtype must be float32, because of how we discretize in place to int32 + self.dtype = np.float32 # parameters specific to compartments/block - self.decay_u = np.ones(n_compartments, dtype=np.float32) + self.decay_u = np.ones(n_compartments, dtype=self.dtype) # ^ default to no filter - self.decay_v = np.zeros(n_compartments, dtype=np.float32) + self.decay_v = np.zeros(n_compartments, dtype=self.dtype) # ^ default to integration self.tau_s = None self.scale_u = True self.scale_v = False self.refract_delay = np.zeros(n_compartments, dtype=np.int32) - self.vth = np.zeros(n_compartments, dtype=np.float32) - self.bias = np.zeros(n_compartments, dtype=np.float32) + self.vth = np.zeros(n_compartments, dtype=self.dtype) + self.bias = np.zeros(n_compartments, dtype=self.dtype) self.enable_noise = np.zeros(n_compartments, dtype=bool) # parameters common to core @@ -683,9 +685,11 @@ def _set_weights_indices( self, weights, indices=None, - weight_dtype=np.float32, + weight_dtype=None, compression=0, ): + # must be float32, because of how we discretize in place to int32 + weight_dtype = np.float32 if weight_dtype is None else weight_dtype weights = [ np.array(w, copy=False, dtype=weight_dtype, ndmin=2) for w in weights ] diff --git a/nengo_loihi/builder/connection.py b/nengo_loihi/builder/connection.py index 29a81be68..d185e18c5 100644 --- a/nengo_loihi/builder/connection.py +++ b/nengo_loihi/builder/connection.py @@ -13,6 +13,7 @@ from nengo.connection import LearningRule from nengo.ensemble import Neurons from nengo.exceptions import BuildError, ValidationError +from nengo.rc import rc from nengo.solvers import Solver from nengo_loihi.block import Axon, LoihiBlock, Synapse @@ -521,6 +522,10 @@ def build_full_chip_connection(model, conn): # noqa: C901 if neuron_type is not None and hasattr(neuron_type, "amplitude"): weights = scale_matrix(weights, neuron_type.amplitude) + # to proper dtype + transform = transform.astype(rc.float_dtype) + weights = weights.astype(rc.float_dtype) + # loihi_weights has shape (in, out), to match the shape by block.Synapses loihi_weights = weights.T @@ -540,7 +545,7 @@ def build_full_chip_connection(model, conn): # noqa: C901 # use the same scaling as the ensemble does, to get good # decodes. Note that this assumes that the decoded value # is in the range -radius to radius, which is usually true. - gain = 1.0 / conn.pre_obj.radius + gain = np.array(1.0 / conn.pre_obj.radius, dtype=rc.float_dtype) decoder_block = LoihiBlock(2 * d, label="%s" % conn) decoder_block.compartment.configure_nonspiking( @@ -563,7 +568,8 @@ def build_full_chip_connection(model, conn): # noqa: C901 # use spiking decode neurons for on-chip connection if isinstance(conn.post_obj, Ensemble): # loihi encoders don't include radius, so handle scaling here - loihi_weights = scale_matrix(loihi_weights, 1.0 / conn.post_obj.radius) + gain = np.array(1.0 / conn.post_obj.radius, dtype=rc.float_dtype) + loihi_weights = scale_matrix(loihi_weights, gain) post_d = conn.post_obj.size_in post_inds = np.arange(post_d, dtype=np.int32)[post_slice] @@ -583,7 +589,7 @@ def build_full_chip_connection(model, conn): # noqa: C901 decoder_block.compartment.configure_filter(tau_s, dt=model.dt) post_tau = model.decode_tau - target_axons = -np.ones(pre_obj.n_neurons, dtype=int) + target_axons = -np.ones(pre_obj.n_neurons, dtype=np.int32) target_axons[pre_slice] = np.arange(target_axons[pre_slice].size) pre_slice = slice(None) @@ -662,7 +668,7 @@ def build_full_chip_connection(model, conn): # noqa: C901 post_obj.add_synapse(syn) model.objs[conn]["weights"] = syn - target_axons = -np.ones(mid_obj.n_neurons, dtype=int) + target_axons = -np.ones(mid_obj.n_neurons, dtype=np.int32) target_axons[pre_slice] = np.arange(target_axons[pre_slice].size) assert target_axons[pre_slice].size == n1 @@ -684,7 +690,8 @@ def build_full_chip_connection(model, conn): # noqa: C901 assert post_obj.n_neurons == n2 # loihi encoders don't include radius, so handle scaling here - loihi_weights = scale_matrix(loihi_weights, 1.0 / conn.post_obj.radius) + scale = np.array(1.0 / conn.post_obj.radius, dtype=rc.float_dtype) + loihi_weights = scale_matrix(loihi_weights, scale) syn = Synapse(n1, label="%s::decoder_weights" % conn) syn.set_weights(loihi_weights) @@ -783,6 +790,7 @@ def build_conv2d_connection(model, transform, conn): obj=conn.post_obj.ensemble, ) kernel = kernel * gain[0] + kernel = kernel.astype(rc.float_dtype) pop_type = model.config[conn].pop_type new_transform = copy.copy(transform) @@ -802,9 +810,9 @@ def build_conv2d_connection(model, transform, conn): "is therefore emulator-only." ) - target_axons = -np.ones(pre_obj.n_neurons, dtype=int) + target_axons = -np.ones(pre_obj.n_neurons, dtype=np.int32) target_axons[conn.pre_slice] = pixel_idxs(input_shape) - atoms = np.zeros(pre_obj.n_neurons, dtype=int) + atoms = np.zeros(pre_obj.n_neurons, dtype=np.int32) atoms[conn.pre_slice] = channel_idxs(input_shape) ax = Axon(np.prod(input_shape.spatial_shape), label="conv2d_weights") diff --git a/nengo_loihi/builder/ensemble.py b/nengo_loihi/builder/ensemble.py index fc1cfd2c5..0393a452f 100644 --- a/nengo_loihi/builder/ensemble.py +++ b/nengo_loihi/builder/ensemble.py @@ -7,14 +7,16 @@ from nengo.builder.ensemble import BuiltEnsemble, gen_eval_points from nengo.dists import Distribution, get_samples from nengo.exceptions import BuildError +from nengo.rc import rc from nengo_loihi.block import LoihiBlock from nengo_loihi.builder.builder import Builder -def get_gain_bias(ens, rng=np.random, intercept_limit=1.0): +def get_gain_bias(ens, rng=np.random, intercept_limit=1.0, dtype=None): # Modified from the Nengo version to handle `intercept_limit` + dtype = rc.float_dtype if dtype is None else dtype if ens.gain is not None and ens.bias is not None: gain = get_samples(ens.gain, ens.n_neurons, rng=rng) bias = get_samples(ens.bias, ens.n_neurons, rng=rng) @@ -60,6 +62,11 @@ def get_gain_bias(ens, rng=np.random, intercept_limit=1.0): "by reducing the maximum intercept value to below 1." ) + dtype = rc.float_dtype + gain = gain.astype(dtype) if gain is not None else gain + bias = bias.astype(dtype) if bias is not None else bias + max_rates = max_rates.astype(dtype) if max_rates is not None else max_rates + intercepts = intercepts.astype(dtype) if intercepts is not None else intercepts return gain, bias, max_rates, intercepts @@ -71,13 +78,14 @@ def build_ensemble(model, ens): # Create random number generator rng = np.random.RandomState(model.seeds[ens]) - eval_points = gen_eval_points(ens, ens.eval_points, rng=rng) + eval_points = gen_eval_points(ens, ens.eval_points, rng=rng, dtype=rc.float_dtype) # Set up encoders if isinstance(ens.encoders, Distribution): encoders = get_samples(ens.encoders, ens.n_neurons, ens.dimensions, rng=rng) + encoders = np.asarray(encoders, dtype=rc.float_dtype) else: - encoders = npext.array(ens.encoders, min_dims=2, dtype=np.float64) + encoders = npext.array(ens.encoders, min_dims=2, dtype=rc.float_dtype) if ens.normalize_encoders: encoders /= npext.norm(encoders, axis=1, keepdims=True) @@ -90,7 +98,9 @@ def build_ensemble(model, ens): ) # Build the neurons - gain, bias, max_rates, intercepts = get_gain_bias(ens, rng, model.intercept_limit) + gain, bias, max_rates, intercepts = get_gain_bias( + ens, rng, intercept_limit=model.intercept_limit, dtype=rc.float_dtype + ) block = LoihiBlock(ens.n_neurons, label="%s" % ens) block.compartment.bias[:] = bias diff --git a/nengo_loihi/conv.py b/nengo_loihi/conv.py index 377019ada..f7e6688f6 100644 --- a/nengo_loihi/conv.py +++ b/nengo_loihi/conv.py @@ -155,8 +155,8 @@ def conv2d_loihi_weights(transform): weights = [] indices = [] # compartment offset (aka. compartment base) for each axon - offsets = np.zeros(input_rows * input_cols, dtype=int) - axon_to_weight_map = np.zeros(input_rows * input_cols, dtype=int) + offsets = np.zeros(input_rows * input_cols, dtype=np.int32) + axon_to_weight_map = np.zeros(input_rows * input_cols, dtype=np.int32) weights_map = {} for i, j in itertools.product(range(input_rows), range(input_cols)): ij = i * input_cols + j @@ -203,10 +203,10 @@ def conv2d_loihi_weights(transform): # --- determine indices # channel inds are zero, since we use same indices for each channel - channel_inds = np.zeros(n_channels, dtype=int) - row_inds = np.arange(wmask_i.sum()) - col_inds = np.arange(wmask_j.sum()) - filter_inds = np.arange(n_filters) + channel_inds = np.zeros(n_channels, dtype=np.int32) + row_inds = np.arange(wmask_i.sum(), dtype=np.int32) + col_inds = np.arange(wmask_j.sum(), dtype=np.int32) + filter_inds = np.arange(n_filters, dtype=np.int32) order = [channel_inds, row_inds, col_inds, filter_inds] shape = [n_channels, output_rows, output_cols, n_filters] @@ -217,7 +217,7 @@ def conv2d_loihi_weights(transform): shape = [shape[i] for i in (0, 3, 1, 2)] n = len(shape) - strides = [np.prod(shape[i + 1 :]) for i in range(n)] + strides = [np.prod(shape[i + 1 :], dtype=np.int32) for i in range(n)] # inds[i_0,...,i_{n-1}] = sum_{k=0}^{n-1} strides[k] * order[k][i_k] strided_inds = [ From c1d7e1066006e72edf224a55d1e97e8503154826 Mon Sep 17 00:00:00 2001 From: Eric Hunsberger Date: Mon, 2 Nov 2020 16:00:33 -0500 Subject: [PATCH 06/11] Save discretize info on compartment for reference --- CHANGES.rst | 2 ++ nengo_loihi/block.py | 2 ++ nengo_loihi/builder/discretize.py | 14 ++++++++++---- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 923443bdb..7624a4bda 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -32,6 +32,8 @@ Release history - Added ``Simulator.clear_probes`` to clear probe histories. This can help reduce memory usage during long runs, by running for a segment of the full run time, recording the relevant outputs, calling ``clear_probes``, and resuming the run. (`#303`_) +- ``Block`` now has a ``.discretize_info`` attribute that stores parameters used + for discretizing that block. (`#309`_) **Changed** diff --git a/nengo_loihi/block.py b/nengo_loihi/block.py index ddbee993b..18caf72ce 100644 --- a/nengo_loihi/block.py +++ b/nengo_loihi/block.py @@ -178,6 +178,8 @@ def __init__(self, n_compartments, label=None): self.noise_exp = 0 self.noise_at_membrane = 0 + self.discretize_info = None + def __str__(self): return "%s(%s)" % (type(self).__name__, self.label if self.label else "") diff --git a/nengo_loihi/builder/discretize.py b/nengo_loihi/builder/discretize.py index 7e1158560..462e0a617 100644 --- a/nengo_loihi/builder/discretize.py +++ b/nengo_loihi/builder/discretize.py @@ -242,10 +242,11 @@ def discretize_block(block): w_maxs = [s.max_abs_weight() for s in block.synapses] w_max = max(w_maxs) if len(w_maxs) > 0 else 0 - p = discretize_compartment(block.compartment, w_max) + info = discretize_compartment(block.compartment, w_max) for synapse in block.synapses: - discretize_synapse(synapse, w_max, p["w_scale"], p["w_exp"]) - return p["v_scale"] + discretize_synapse(synapse, w_max, info["w_scale"], info["w_exp"]) + + return info["v_scale"] def discretize_compartment(comp, w_max): @@ -363,7 +364,12 @@ def discretize_compartment(comp, w_max): vmaxe = np.clip(np.round((np.log2(vmax + 1) - 9) * 0.5), 0, 2 ** 3 - 1) comp.vmax = 2 ** (9 + 2 * vmaxe) - 1 - return dict(w_max=w_max, w_scale=w_scale, w_exp=w_exp, v_scale=v_scale) + info = dict( + w_max=w_max, w_exp=w_exp, v_scale=v_scale, b_scale=b_scale, w_scale=w_scale + ) + comp.discretize_info = info + + return info def discretize_synapse(synapse, w_max, w_scale, w_exp): From 9abc52d13c237b0d997943f0581f76bc906e671f Mon Sep 17 00:00:00 2001 From: Eric Hunsberger Date: Tue, 24 Nov 2020 16:34:50 -0500 Subject: [PATCH 07/11] Add connection_decode_neurons To map connections to DecodeNeurons so that users can see which connections have DecodeNeurons and get a handle to the relevant Ensemble or LoihiBlock if necessary. --- CHANGES.rst | 3 +++ nengo_loihi/builder/builder.py | 7 +++++-- nengo_loihi/builder/connection.py | 11 +++++----- nengo_loihi/builder/tests/test_builder.py | 25 +++++++++++++++++++++++ 4 files changed, 38 insertions(+), 8 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 7624a4bda..a15565c33 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -34,6 +34,9 @@ Release history relevant outputs, calling ``clear_probes``, and resuming the run. (`#303`_) - ``Block`` now has a ``.discretize_info`` attribute that stores parameters used for discretizing that block. (`#309`_) +- ``Model`` now has a ``connection_decode_neurons`` attribute that maps ``Connection`` + objects that require decode neurons to the corresponding ``Ensemble`` objects + implementing them. (`#309`_) **Changed** diff --git a/nengo_loihi/builder/builder.py b/nengo_loihi/builder/builder.py index 334c97715..956c55541 100644 --- a/nengo_loihi/builder/builder.py +++ b/nengo_loihi/builder/builder.py @@ -44,6 +44,9 @@ class Model: Attributes ---------- + connection_decode_neurons : dict of {Connection: Ensemble} + Map of each `nengo.Connection` that requires DecodeNeurons, to the + `nengo.Ensemble` that implements said DecodeNeurons. Build parameters @@ -127,12 +130,12 @@ def __init__(self, dt=0.001, label=None, builder=None): self.block_shapes = {} self.probes = [] - # Will be filled in by the simulator __init__ - self.split = None + self.connection_decode_neurons = {} # Will be filled in by the network builder self.toplevel = None self.config = None + self.split = None # Resources used by the build process self.objs = defaultdict(dict) # maps Nengo objects to Loihi objects diff --git a/nengo_loihi/builder/connection.py b/nengo_loihi/builder/connection.py index d185e18c5..dcc439441 100644 --- a/nengo_loihi/builder/connection.py +++ b/nengo_loihi/builder/connection.py @@ -208,6 +208,7 @@ def build_host_to_chip(model, conn): ens.label = None if conn.label is None else "%s_ens" % conn.label _inherit_seed(host, ens, model, conn) host.build(ens) + model.connection_decode_neurons[conn] = ens pre2ens = Connection( conn.pre, @@ -552,8 +553,6 @@ def build_full_chip_connection(model, conn): # noqa: C901 dt=model.dt, vth=model.vth_nonspiking ) decoder_block.compartment.bias[:] = 0 - model.add_block(decoder_block) - model.objs[conn]["decoded"] = decoder_block dec_syn = Synapse(n, label="probe_decoders") weights2 = stack_matrices( @@ -563,7 +562,6 @@ def build_full_chip_connection(model, conn): # noqa: C901 dec_syn.set_weights(weights2) decoder_block.add_synapse(dec_syn) - model.objs[conn]["decoders"] = dec_syn else: # use spiking decode neurons for on-chip connection if isinstance(conn.post_obj, Ensemble): @@ -581,9 +579,10 @@ def build_full_chip_connection(model, conn): # noqa: C901 loihi_weights, block_label="%s" % conn, syn_label="decoders" ) - model.add_block(decoder_block) - model.objs[conn]["decoded"] = decoder_block - model.objs[conn]["decoders"] = dec_syn + model.add_block(decoder_block) + model.objs[conn]["decoded"] = decoder_block + model.objs[conn]["decoders"] = dec_syn + model.connection_decode_neurons[conn] = decoder_block # use tau_s for filter into decode neurons, decode_tau for filter out decoder_block.compartment.configure_filter(tau_s, dt=model.dt) diff --git a/nengo_loihi/builder/tests/test_builder.py b/nengo_loihi/builder/tests/test_builder.py index 14cd2d9d2..84dbfcdd3 100644 --- a/nengo_loihi/builder/tests/test_builder.py +++ b/nengo_loihi/builder/tests/test_builder.py @@ -57,3 +57,28 @@ def test_probemap_bad_type_error(Simulator, monkeypatch): def test_builder_strings(): model = Model(label="myModel") assert str(model) == "Model(myModel)" + + +@pytest.mark.parametrize("a_on_chip", [True, False]) +def test_connection_decode_neurons(a_on_chip, Simulator): + with nengo.Network() as net: + nengo_loihi.add_params(net) + + u = nengo.Node([1], label="u") + a = nengo.Ensemble(100, 1, label="a") + net.config[a].on_chip = a_on_chip + b = nengo.Ensemble(100, 1, label="b") + probe1 = nengo.Probe(b) + nengo.Probe(b.neurons) + conn1 = nengo.Connection(u, a) + conn2 = nengo.Connection(a, b) + + with Simulator(net) as sim: + dic = sim.model.connection_decode_neurons + assert isinstance(dic.pop(conn1 if a_on_chip else conn2), nengo.Ensemble) + if a_on_chip: + assert isinstance(dic.pop(conn2), nengo_loihi.block.LoihiBlock) + + conn3, dec3 = dic.popitem() + assert conn3.pre == b and conn3.post == probe1 + assert len(dic) == 0 From 8757458d3c50985739c2949aa38b5e745b18fae6 Mon Sep 17 00:00:00 2001 From: Eric Hunsberger Date: Mon, 9 Nov 2020 15:22:55 -0500 Subject: [PATCH 08/11] Add GreedyComms allocator To reduce inter-chip communication. --- CHANGES.rst | 2 + nengo_loihi/builder/builder.py | 1 + nengo_loihi/builder/network.py | 7 +- nengo_loihi/hardware/allocators.py | 201 ++++++++++++++++++ nengo_loihi/hardware/tests/test_allocators.py | 55 ++++- 5 files changed, 264 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index a15565c33..4b07dceb6 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -37,6 +37,8 @@ Release history - ``Model`` now has a ``connection_decode_neurons`` attribute that maps ``Connection`` objects that require decode neurons to the corresponding ``Ensemble`` objects implementing them. (`#309`_) +- Added the ``GreedyComms`` allocator, which reduces inter-chip communication, speeding + up networks with high traffic between chips. (`#309`_) **Changed** diff --git a/nengo_loihi/builder/builder.py b/nengo_loihi/builder/builder.py index 956c55541..09a2b8234 100644 --- a/nengo_loihi/builder/builder.py +++ b/nengo_loihi/builder/builder.py @@ -129,6 +129,7 @@ def __init__(self, dt=0.001, label=None, builder=None): self.blocks = OrderedDict() self.block_shapes = {} self.probes = [] + self.block_comp_map = {} self.connection_decode_neurons = {} diff --git a/nengo_loihi/builder/network.py b/nengo_loihi/builder/network.py index 145ce872c..5ed946ee8 100644 --- a/nengo_loihi/builder/network.py +++ b/nengo_loihi/builder/network.py @@ -40,7 +40,12 @@ def build_network( model.build(conn) # Split blocks into blocks that will fit on cores - split_model(model) + block_map = split_model(model) + model.block_comp_map = { + new_block: comp_idxs + for old_block, new_blocks in block_map.items() + for new_block, comp_idxs in new_blocks.items() + } if discretize: discretize_model(model) diff --git a/nengo_loihi/hardware/allocators.py b/nengo_loihi/hardware/allocators.py index 99697a05d..a2bc2448e 100644 --- a/nengo_loihi/hardware/allocators.py +++ b/nengo_loihi/hardware/allocators.py @@ -214,3 +214,204 @@ def get_chip(i): logger.info("Round-robin allocation across %d chips", board.n_chips) return board + + +def ens_to_block_rates(model, ens_rates): + block_rates = {} + for ens, rates in ens_rates.items(): + if ens not in model.objs: + if ens in model.host_pre.sig or ens in model.host.sig: + continue # this ensemble is not on chip, so skip it + + raise ValueError(f"Ensemble {ens} does not appear in the model") + + assert len(rates) == ens.n_neurons + blocks = model.objs[ens]["out"] + blocks = blocks if isinstance(blocks, (list, tuple)) else [blocks] + + for block in blocks: + comp_idxs = model.block_comp_map.get(block, None) + if comp_idxs is None: + assert len(blocks) == 1 + assert block.compartment.n_compartments == ens.n_neurons + block_rates[block] = rates + else: + block_rates[block] = rates[comp_idxs] + + return block_rates + + +def compute_block_conns(block_map, block_rates=None, conns_in=False): # noqa: C901 + # --- store number of axons from block i to block j + block_conns = {k: {} for k in block_map} + if conns_in: + block_conns_in = {k: {} for k in block_map} + + synapse_block_map = {} + for i, block_i in block_map.items(): + for synapse in block_i.synapses: + assert id(synapse) not in synapse_block_map + synapse_block_map[id(synapse)] = i + + for i, block_i in block_map.items(): + for axon in block_i.axons: + j = synapse_block_map[id(axon.target)] + + if i == j: + # don't care about self connections + continue + + # use non-zero value as default, so that even if all rates are zero, this + # still gets recognized as a connection from i to j + block_conns[i].setdefault(j, 1e-16) + if conns_in: + block_conns_in[j].setdefault(i, 1e-16) + + if block_rates is None: + val = axon.n_axons + elif block_i not in block_rates: + raise KeyError(f"block {block_i} not in block_rates") + else: + rates = block_rates[block_i] + comp_idxs = np.arange(block_i.compartment.n_compartments) + axon_ids = axon.map_axon(comp_idxs) + assert axon_ids.size == rates.size + val = rates[axon_ids >= 0].sum() + + block_conns[i][j] += val + if conns_in: + block_conns_in[j][i] += val + + return (block_conns, block_conns_in) if conns_in else block_conns + + +def measure_interchip_conns(board, block_rates=None): + i = 0 + block_map = {} + block_chip = {} + for chip in board.chips: + chip_idx = board.chip_idxs[chip] + for core in chip.cores: + # core_idx = chip.core_idxs[core] + for block in core.blocks: + block_map[i] = block + block_chip[i] = chip_idx + i += 1 + + block_conns = compute_block_conns(block_map, block_rates=block_rates) + + stats = {"interchip": 0, "intrachip": 0} + stats["interchip_pairs"] = [] + stats["intrachip_pairs"] = [] + for i, block in block_map.items(): + chip_idx_i = block_chip[i] + for j, weight in block_conns[i].items(): + if i == j: + continue + + chip_idx_j = block_chip[j] + key = "intrachip" if chip_idx_i == chip_idx_j else "interchip" + stats[key] += weight + stats[f"{key}_pairs"].append((block_map[i], block_map[j])) + + return stats + + +class GreedyComms(Greedy): + """Assigns each block to a core, using as few chips as possible, minimizing comms. + + A variant of the `.Greedy` allocator that also minimizes inter-chip communication. + + Starts by arbitrarily assigning a block to a chip. Then adds the block that has the + most communication with the first block to that same chip. Continue adding blocks + with the most communication to already placed blocks, until the chip is full. Then + start a new chip using the block with the least communication. + """ + + def __init__(self, cores_per_chip=128, ensemble_rates=None): + super().__init__(cores_per_chip=cores_per_chip) + self.ensemble_rates = ensemble_rates + + def __call__(self, model, n_chips): # noqa: C901 + block_map = dict(enumerate(model.blocks)) + block_rates = ( + ens_to_block_rates(model, self.ensemble_rates) + if self.ensemble_rates is not None + else None + ) + block_conns_out, block_conns_in = compute_block_conns( + block_map, block_rates=block_rates, conns_in=True + ) + + # find blocks with no pre block + no_pre_blocks = [] + for i in block_map: + if sum(v for v in block_conns_in[i].values()) == 0: + no_pre_blocks.append(i) + + # --- create board + board = Board() + + # add inputs to board + for input in model.inputs: + self.input_to_board(input, board) + + # --- add blocks to chips + chip = None + unallocated_blocks = set(block_map) + + while len(unallocated_blocks) > 0: + if chip is None or len(chip.cores) == self.cores_per_chip: + assert ( + len(board.chips) < n_chips + ), f"The network needs more chips than requested ({n_chips})" + + # start a new chip + chip = board.new_chip() + + # choose a no-pre block, if possible + for block_idx in no_pre_blocks: + if block_idx in unallocated_blocks: + break + else: + block_idx = next(iter(unallocated_blocks)) + + chip_blocks = set() + else: + # choose the block with the largest connection to blocks on this chip + block_idx = -1 + max_conn = 0 + for i in chip_blocks: + for j in unallocated_blocks.intersection(block_conns_out[i]): + ij = block_conns_out[i][j] + if ij > max_conn: + max_conn = ij + block_idx = j + + for j in unallocated_blocks.intersection(block_conns_in[i]): + ij = block_conns_in[i][j] + if ij > max_conn: + max_conn = ij + block_idx = j + + if block_idx < 0: + # none of the remaining blocks connect to blocks on this chip, + # so pick a no-pre block if possible, otherwise any block will do. + for block_idx in no_pre_blocks: + if block_idx in unallocated_blocks: + break + else: + block_idx = next(iter(unallocated_blocks)) + + block = block_map[block_idx] + self.block_to_new_core(block, chip) + + chip_blocks.add(block_idx) + unallocated_blocks.remove(block_idx) + + # add probes + board.probes.extend(model.probes) + + logger.info("GreedyComms allocation across %d chips", board.n_chips) + + return board diff --git a/nengo_loihi/hardware/tests/test_allocators.py b/nengo_loihi/hardware/tests/test_allocators.py index 27e4f9f57..94655aa81 100644 --- a/nengo_loihi/hardware/tests/test_allocators.py +++ b/nengo_loihi/hardware/tests/test_allocators.py @@ -7,7 +7,14 @@ from nengo_loihi.block import Axon, LoihiBlock, Synapse from nengo_loihi.builder import Model from nengo_loihi.builder.discretize import discretize_model -from nengo_loihi.hardware.allocators import Greedy, RoundRobin, core_stdp_pre_cfgs +from nengo_loihi.hardware.allocators import ( + Greedy, + GreedyComms, + RoundRobin, + core_stdp_pre_cfgs, + ens_to_block_rates, + measure_interchip_conns, +) from nengo_loihi.hardware.nxsdk_objects import Board from nengo_loihi.inputs import LoihiInput @@ -163,6 +170,52 @@ def test_greedy_chip_allocator_cfg_check(): Greedy(cores_per_chip=130)(model, n_chips=4) +@pytest.mark.parametrize("Allocator", [GreedyComms]) +def test_comms_allocators(Allocator, Simulator): + rng = np.random.RandomState(1) # same seed for all allocators, to compare + with nengo.Network(seed=0) as net: + n_ensembles = 256 + n_neurons = rng.randint(64, 256, size=n_ensembles) + ensembles = [nengo.Ensemble(n, dimensions=1) for n in n_neurons] + + conn_pairs = rng.randint(0, n_ensembles, size=(2 * n_ensembles, 2)) + for i, j in conn_pairs: + ei, ej = ensembles[i].neurons, ensembles[j].neurons + nengo.Connection( + ei, + ej, + transform=rng.uniform(-0.1, 0.1, size=(ej.size_in, ei.size_out)), + ) + + ens_rates = { + ensemble: rng.uniform(1, 100, size=1) + * rng.uniform(0.9, 1, size=ensemble.n_neurons) + for ensemble in ensembles + } + + with Simulator(net, target="sim") as sim: + model = sim.model + n_chips = 3 + block_rates = ens_to_block_rates(model, ens_rates) + board_norates = Allocator()(model, n_chips=n_chips) + board_rates = Allocator(ensemble_rates=ens_rates)(model, n_chips=n_chips) + + norates_axons = measure_interchip_conns(board_norates) + norates_spikes = measure_interchip_conns(board_norates, block_rates=block_rates) + rates_axons = measure_interchip_conns(board_rates) + rates_spikes = measure_interchip_conns(board_rates, block_rates=block_rates) + + print( + f"No rates: {norates_axons['interchip']} axons, " + f"{norates_spikes['interchip']} spikes" + ) + print( + f"Rates: {rates_axons['interchip']} axons, {rates_spikes['interchip']} spikes" + ) + assert norates_axons["interchip"] < rates_axons["interchip"] + assert rates_spikes["interchip"] < norates_spikes["interchip"] + + @pytest.mark.slow @pytest.mark.target_loihi def test_deterministic_network_allocation(Simulator, seed): From 7cf1281ebc3918d252da00865632932d656bd607 Mon Sep 17 00:00:00 2001 From: Eric Hunsberger Date: Tue, 17 Nov 2020 14:01:37 -0500 Subject: [PATCH 09/11] Add PartitionComms allocator --- CHANGES.rst | 6 +- nengo_loihi/hardware/allocators.py | 85 +++++++++++++++++++ nengo_loihi/hardware/tests/test_allocators.py | 6 +- 3 files changed, 94 insertions(+), 3 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 4b07dceb6..aeea53076 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -37,8 +37,10 @@ Release history - ``Model`` now has a ``connection_decode_neurons`` attribute that maps ``Connection`` objects that require decode neurons to the corresponding ``Ensemble`` objects implementing them. (`#309`_) -- Added the ``GreedyComms`` allocator, which reduces inter-chip communication, speeding - up networks with high traffic between chips. (`#309`_) +- Added the ``GreedyComms`` and ``PartitionComms`` allocators, which reduce inter-chip + communication, speeding up networks with high traffic between chips. + ``PartitionComms`` typically finds a more optimal partitioning than ``GreedyComms``, + but does require the ``nxmetis`` package. (`#309`_) **Changed** diff --git a/nengo_loihi/hardware/allocators.py b/nengo_loihi/hardware/allocators.py index a2bc2448e..0c8c2665f 100644 --- a/nengo_loihi/hardware/allocators.py +++ b/nengo_loihi/hardware/allocators.py @@ -415,3 +415,88 @@ def __call__(self, model, n_chips): # noqa: C901 logger.info("GreedyComms allocation across %d chips", board.n_chips) return board + + +class PartitionComms(Allocator): + """Uses METIS partitioner to spread blocks across all chips, minimizing comms. + + Spreads blocks equally across cores and minimizes inter-chip communication. + + Requires `nxmetis `. + """ + + # TODO: + # - Potentially allow more blocks on one chip (i.e. unbalanced partitioning), + # if it will improve communication. Unclear if nxmetis supports this. + # - Check that partitioning is always balanced, and that no chips + # will have too many cores. Initial tests show that it is always balanced. + + def __init__(self, ensemble_rates=None, rate_scale=1): + import networkx # pylint: disable=import-outside-toplevel + import nxmetis # pylint: disable=import-outside-toplevel + + super().__init__() + self.ensemble_rates = ensemble_rates + self.rate_scale = rate_scale + + self.networkx = networkx + self.nxmetis = nxmetis + + def __call__(self, model, n_chips): + block_map = dict(enumerate(model.blocks)) + + block_rates = None + if self.ensemble_rates is not None: + block_rates = ens_to_block_rates(model, self.ensemble_rates) + block_rates = { + block: np.round(rate * self.rate_scale) + for block, rate in block_rates.items() + } + + block_conns = compute_block_conns(block_map, block_rates=block_rates) + + # partition graph + G = self.networkx.Graph() + G.add_nodes_from(block_map.keys()) + + edge_map = set() + for i in block_map: + for j, val in block_conns[i].items(): + if (i, j) in edge_map or (j, i) in edge_map: + continue + + val = val + block_conns[j].get(i, 0) + # G.add_edge(i, j, weight=float(val)) + G.add_edge(i, j, weight=int(round(val))) # weights must be integers + edge_map.add((i, j)) + edge_map.add((j, i)) + + objval, parts = self.nxmetis.partition(G, nparts=int(n_chips)) + + for i, part in enumerate(parts): + if len(part) > 128: + raise ValueError( + f"Partition {i} has {len(part)} cores, " + "which exceeds the available 128 cores" + ) + + # --- create board + board = Board() + + # add inputs to board + for input in model.inputs: + self.input_to_board(input, board) + + # blocks to chips + for part in parts: + chip = board.new_chip() + for block_idx in part: + block = block_map[block_idx] + self.block_to_new_core(block, chip) + + # add probes + board.probes.extend(model.probes) + + logger.info("METIS allocation across %d chips", board.n_chips) + + return board diff --git a/nengo_loihi/hardware/tests/test_allocators.py b/nengo_loihi/hardware/tests/test_allocators.py index 94655aa81..1304320f3 100644 --- a/nengo_loihi/hardware/tests/test_allocators.py +++ b/nengo_loihi/hardware/tests/test_allocators.py @@ -10,6 +10,7 @@ from nengo_loihi.hardware.allocators import ( Greedy, GreedyComms, + PartitionComms, RoundRobin, core_stdp_pre_cfgs, ens_to_block_rates, @@ -170,8 +171,11 @@ def test_greedy_chip_allocator_cfg_check(): Greedy(cores_per_chip=130)(model, n_chips=4) -@pytest.mark.parametrize("Allocator", [GreedyComms]) +@pytest.mark.parametrize("Allocator", [GreedyComms, PartitionComms]) def test_comms_allocators(Allocator, Simulator): + if Allocator is PartitionComms: + pytest.importorskip("nxmetis") + rng = np.random.RandomState(1) # same seed for all allocators, to compare with nengo.Network(seed=0) as net: n_ensembles = 256 From 7732a14cd5d3f1891dfe33361d5d799f8d2ef729 Mon Sep 17 00:00:00 2001 From: Eric Hunsberger Date: Wed, 1 Dec 2021 12:21:13 -0500 Subject: [PATCH 10/11] Fix NxSDK import To ensure that HAS_NXSDK is never false if nxsdk is installed. --- nengo_loihi/hardware/nxsdk_shim.py | 50 ++++++++++++++---------------- 1 file changed, 23 insertions(+), 27 deletions(-) diff --git a/nengo_loihi/hardware/nxsdk_shim.py b/nengo_loihi/hardware/nxsdk_shim.py index 193f9c550..02ccf39e1 100644 --- a/nengo_loihi/hardware/nxsdk_shim.py +++ b/nengo_loihi/hardware/nxsdk_shim.py @@ -29,8 +29,6 @@ def parse_nxsdk_version(nxsdk): nxsdk_dir = os.path.realpath(os.path.join(os.path.dirname(nxsdk.__file__), "..")) nxsdk_version = parse_nxsdk_version(nxsdk) - import nxsdk.graph.graph as snip_maker - def assert_nxsdk(): pass @@ -40,7 +38,6 @@ def assert_nxsdk(): nxsdk = None nxsdk_dir = None nxsdk_version = None - snip_maker = None exception = sys.exc_info()[1] @@ -49,6 +46,29 @@ def assert_nxsdk(exception=exception): if HAS_NXSDK: # noqa: C901 + import nxsdk.compiler.microcodegen.interface as micro_gen + import nxsdk.graph.graph as snip_maker + from nxsdk.graph.nxinputgen.nxinputgen import BasicSpikeGenerator as SpikeGen + from nxsdk.graph.nxprobes import N2SpikeProbe as SpikeProbe + from nxsdk.graph.processes.phase_enums import Phase as SnipPhase + + try: + # try new location (nxsdk > 0.9.0) + from nxsdk.arch.n2a.compiler.tracecfggen.tracecfggen import ( + TraceCfgGen as TraceConfigGenerator, + ) + except ImportError: # pragma: no cover + # try old location (nxsdk <= 0.9.0) + from nxsdk.compiler.tracecfggen.tracecfggen import ( + TraceCfgGen as TraceConfigGenerator, + ) + + try: + # try new location (nxsdk >= 1.0.0) + from nxsdk.arch.n2a.n2board import N2Board as NxsdkBoard + except ImportError: # pragma: no cover + # try old location (nxsdk < 1.0.0) + from nxsdk.graph.nxboard import N2Board as NxsdkBoard class SnipMaker(snip_maker.Graph): """Patch of the snip process manager that is multiprocess safe.""" @@ -115,30 +135,6 @@ def createSnip(self, phase, *args, **kwargs): return super().createSnip(phase, *args, **kwargs) snip_maker.Graph = SnipMaker - - import nxsdk.compiler.microcodegen.interface as micro_gen - - try: - # try new location (nxsdk > 0.9.0) - from nxsdk.arch.n2a.compiler.tracecfggen.tracecfggen import ( - TraceCfgGen as TraceConfigGenerator, - ) - except ImportError: # pragma: no cover - # try old location (nxsdk <= 0.9.0) - from nxsdk.compiler.tracecfggen.tracecfggen import ( - TraceCfgGen as TraceConfigGenerator, - ) - - try: - # try new location (nxsdk >= 1.0.0) - from nxsdk.arch.n2a.n2board import N2Board as NxsdkBoard - except ImportError: # pragma: no cover - # try old location (nxsdk < 1.0.0) - from nxsdk.graph.nxboard import N2Board as NxsdkBoard - - from nxsdk.graph.nxinputgen.nxinputgen import BasicSpikeGenerator as SpikeGen - from nxsdk.graph.nxprobes import N2SpikeProbe as SpikeProbe - from nxsdk.graph.processes.phase_enums import Phase as SnipPhase else: SnipMaker = None micro_gen = None From 4c42c65d81d5a0abfc10c5a1f3520bda22bd0bc8 Mon Sep 17 00:00:00 2001 From: Eric Hunsberger Date: Tue, 10 Nov 2020 13:49:06 -0500 Subject: [PATCH 11/11] Add LoihiRectifiedLinear --- CHANGES.rst | 3 + nengo_loihi/builder/nengo_dl.py | 34 ++++++++-- nengo_loihi/neurons.py | 13 ++++ nengo_loihi/tests/test_neurons.py | 101 ++++++++++++++---------------- 4 files changed, 91 insertions(+), 60 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index aeea53076..a51c1a1f5 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -41,6 +41,9 @@ Release history communication, speeding up networks with high traffic between chips. ``PartitionComms`` typically finds a more optimal partitioning than ``GreedyComms``, but does require the ``nxmetis`` package. (`#309`_) +- Added the ``LoihiRectifiedLinear`` neuron type to train deep networks for Loihi using + Nengo or NengoDL. It is a rate neuron type and thus must ultimitely be swapped for + ``LoihiSpikingRectifiedLinear`` to run on Loihi. (`#309`_) **Changed** diff --git a/nengo_loihi/builder/nengo_dl.py b/nengo_loihi/builder/nengo_dl.py index 978fff2b8..3b1051d78 100644 --- a/nengo_loihi/builder/nengo_dl.py +++ b/nengo_loihi/builder/nengo_dl.py @@ -7,6 +7,7 @@ from nengo_loihi.neurons import ( AlphaRCNoise, LoihiLIF, + LoihiRectifiedLinear, LoihiSpikingRectifiedLinear, LowpassRCNoise, discretize_tau_rc, @@ -16,7 +17,7 @@ if HAS_DL: import nengo_dl import tensorflow as tf - from nengo_dl.neuron_builders import LIFBuilder, SpikingRectifiedLinearBuilder + from nengo_dl.neuron_builders import LIFBuilder, TFNeuronBuilder else: # pragma: no cover # Empty classes so that we can define the subclasses even though # we will never use them, as they are only used in the `install` @@ -24,7 +25,7 @@ class LIFBuilder: pass - class SpikingRectifiedLinearBuilder: + class TFNeuronBuilder: pass @@ -260,8 +261,8 @@ def step(self, J, dt, voltage, refractory_time): ) -class LoihiSpikingRectifiedLinearBuilder(SpikingRectifiedLinearBuilder): - """nengo_dl builder for the LoihiSpikingRectifiedLinear neuron type.""" +class LoihiRectifiedLinearBuilder(TFNeuronBuilder): + """nengo_dl builder for the LoihiRectifiedLinear neuron type.""" def build_pre(self, signals, config): super().build_pre(signals, config) @@ -276,7 +277,7 @@ def build_pre(self, signals, config): self.zero = signals.zero self.one = signals.one - def training_step(self, J, dt, **state): + def step(self, J, dt): # Since LoihiLIF takes `ceil(period/dt)` the firing rate is # always below the LIF rate. Using `tau_ref1` in LIF curve makes # it the average of the LoihiLIF curve (rather than upper bound). @@ -297,6 +298,23 @@ def training_step(self, J, dt, **state): # loihi_rates on forward pass, rates on backwards return rates + tf.stop_gradient(loihi_rates - rates) + +class LoihiSpikingRectifiedLinearBuilder(LoihiRectifiedLinearBuilder): + """nengo_dl builder for the LoihiSpikingRectifiedLinear neuron type.""" + + def build_pre(self, signals, config): + super().build_pre(signals, config) + + self.zeros = tf.zeros( + (signals.minibatch_size,) + self.J_data.shape, signals.dtype + ) + + self.epsilon = tf.constant(1e-15, dtype=signals.dtype) + + # copy these so that they're easily accessible in _step functions + self.zero = signals.zero + self.one = signals.one + def step(self, J, dt, voltage): voltage += J * dt spiked = voltage > self.one @@ -308,6 +326,9 @@ def step(self, J, dt, voltage): # being used at all) return tf.stop_gradient(spikes), tf.stop_gradient(voltage) + def training_step(self, J, dt, **state): + return super().step(J, dt) + class Installer: def __init__(self): @@ -323,6 +344,9 @@ def __call__(self): nengo_dl.neuron_builders.SimNeuronsBuilder.TF_NEURON_IMPL[ LoihiLIF ] = LoihiLIFBuilder + nengo_dl.neuron_builders.SimNeuronsBuilder.TF_NEURON_IMPL[ + LoihiRectifiedLinear + ] = LoihiRectifiedLinearBuilder nengo_dl.neuron_builders.SimNeuronsBuilder.TF_NEURON_IMPL[ LoihiSpikingRectifiedLinear ] = LoihiSpikingRectifiedLinearBuilder diff --git a/nengo_loihi/neurons.py b/nengo_loihi/neurons.py index c3fa94d5c..c49ae6355 100644 --- a/nengo_loihi/neurons.py +++ b/nengo_loihi/neurons.py @@ -184,6 +184,19 @@ def step(self, dt, J, output, voltage, refractory_time): refractory_time[spikes_mask] = tau_ref + dt +class LoihiRectifiedLinear(RectifiedLinear): + def __init__(self, amplitude=1, **kwargs): + super().__init__(amplitude=amplitude, **kwargs) + _install_dl_builders() + + def rates(self, x, gain, bias, dt=0.001): + return loihi_spikingrectifiedlinear_rates(self, x, gain, bias, dt) + + def step(self, dt, J, output): + output[:] = 0 + output[J > 0] = (self.amplitude / dt) / np.ceil(np.reciprocal(dt * J[J > 0])) + + class LoihiSpikingRectifiedLinear(SpikingRectifiedLinear): """Simulate spiking rectified linear neurons as done by Loihi. diff --git a/nengo_loihi/tests/test_neurons.py b/nengo_loihi/tests/test_neurons.py index c9d02b0d2..a7ce60c3b 100644 --- a/nengo_loihi/tests/test_neurons.py +++ b/nengo_loihi/tests/test_neurons.py @@ -9,6 +9,7 @@ from nengo_loihi.neurons import ( AlphaRCNoise, LoihiLIF, + LoihiRectifiedLinear, LoihiSpikingRectifiedLinear, LowpassRCNoise, discretize_tau_rc, @@ -107,19 +108,35 @@ def test_loihi_rates_other_type(neuron_type, allclose): assert allclose(rates, ref_rates) -@pytest.mark.parametrize("neuron_type", [LoihiLIF(), LoihiSpikingRectifiedLinear()]) -def test_loihi_neurons(neuron_type, Simulator, plt, allclose): +@pytest.mark.parametrize( # noqa: C901 + "NeuronType", [LoihiLIF, LoihiRectifiedLinear, LoihiSpikingRectifiedLinear] +) +@pytest.mark.parametrize("inference_only", [True, False] if HAS_DL else [None]) +def test_loihi_neurons( + NeuronType, inference_only, Simulator, plt, allclose, monkeypatch +): + if HAS_DL: + # "uninstall" NengoDL builders to make sure each neuron type reinstalls them + monkeypatch.setattr(install_dl_builders, "installed", False) + + neuron_type = NeuronType() + if HAS_DL: + assert install_dl_builders.installed + dt = 0.0007 n = 256 encoders = np.ones((n, 1)) gain = np.zeros(n) - if isinstance(neuron_type, nengo.SpikingRectifiedLinear): + if isinstance(neuron_type, nengo.RectifiedLinear): bias = np.linspace(0, 1001, n) else: bias = np.linspace(0, 30, n) with nengo.Network() as model: + if HAS_DL: + nengo_dl.configure_settings(inference_only=inference_only) + ens = nengo.Ensemble( n, 1, neuron_type=neuron_type, encoders=encoders, gain=gain, bias=bias ) @@ -129,69 +146,43 @@ def test_loihi_neurons(neuron_type, Simulator, plt, allclose): with nengo.Simulator(model, dt=dt) as nengo_sim: nengo_sim.run(t_final) - with Simulator(model, dt=dt) as loihi_sim: - loihi_sim.run(t_final) - - rates_nengosim = np.sum(nengo_sim.data[probe] > 0, axis=0) / t_final - rates_loihisim = np.sum(loihi_sim.data[probe] > 0, axis=0) / t_final - - rates_ref = neuron_type.rates(0.0, gain, bias, dt=dt).squeeze() - plt.plot(bias, rates_loihisim, "r", label="loihi sim") - plt.plot(bias, rates_nengosim, "b-.", label="nengo sim") - plt.plot(bias, rates_ref, "k--", label="ref") - plt.legend(loc="best") + rates_nengosim = nengo_sim.data[probe].mean(axis=0) - assert rates_ref.shape == rates_nengosim.shape == rates_loihisim.shape - atol = 1.0 / t_final # the fundamental unit for our rates - assert allclose(rates_nengosim, rates_ref, atol=atol, rtol=0, xtol=1) - assert allclose(rates_loihisim, rates_ref, atol=atol, rtol=0, xtol=1) + rates_dlsim = None + if HAS_DL: + with nengo_dl.Simulator(model, dt=dt) as dl_sim: + dl_sim.run(t_final) + rates_dlsim = dl_sim.data[probe].mean(axis=0) -@pytest.mark.skipif(not HAS_DL, reason="requires nengo-dl") -@pytest.mark.parametrize("neuron_type", [LoihiLIF(), LoihiSpikingRectifiedLinear()]) -@pytest.mark.parametrize("inference_only", (True, False)) -def test_nengo_dl_neurons(neuron_type, inference_only, Simulator, plt, allclose): - install_dl_builders() + rates_loihisim = None + if type(neuron_type) in (LoihiLIF, LoihiSpikingRectifiedLinear): + with Simulator(model, dt=dt) as loihi_sim: + loihi_sim.run(t_final) - dt = 0.0007 - - n = 256 - encoders = np.ones((n, 1)) - gain = np.zeros(n) - if isinstance(neuron_type, nengo.SpikingRectifiedLinear): - bias = np.linspace(0, 1001, n) - else: - bias = np.linspace(0, 30, n) - - with nengo.Network() as model: - nengo_dl.configure_settings(inference_only=inference_only) - - a = nengo.Ensemble( - n, 1, neuron_type=neuron_type, encoders=encoders, gain=gain, bias=bias - ) - ap = nengo.Probe(a.neurons) - - t_final = 1.0 - with nengo_dl.Simulator(model, dt=dt) as dl_sim: - dl_sim.run(t_final) - - with Simulator(model, dt=dt) as loihi_sim: - loihi_sim.run(t_final) - - rates_dlsim = (dl_sim.data[ap] > 0).sum(axis=0) / t_final - rates_loihisim = (loihi_sim.data[ap] > 0).sum(axis=0) / t_final + rates_loihisim = loihi_sim.data[probe].mean(axis=0) zeros = np.zeros((1, gain.size)) rates_ref = neuron_type.rates(zeros, gain, bias, dt=dt).squeeze(axis=0) - plt.plot(bias, rates_loihisim, "r", label="loihi sim") - plt.plot(bias, rates_dlsim, "b-.", label="dl sim") + + # plot + if rates_loihisim is not None: + plt.plot(bias, rates_loihisim, "r", label="loihi sim") + if rates_dlsim is not None: + plt.plot(bias, rates_dlsim, "g-.", label="dl sim") + plt.plot(bias, rates_nengosim, "b:", label="nengo sim") plt.plot(bias, rates_ref, "k--", label="rates_ref") plt.legend(loc="best") atol = 1.0 / t_final # the fundamental unit for our rates - assert rates_ref.shape == rates_dlsim.shape == rates_loihisim.shape - assert allclose(rates_dlsim, rates_ref, atol=atol, rtol=0, xtol=1) - assert allclose(rates_loihisim, rates_ref, atol=atol, rtol=0, xtol=1) + assert rates_ref.shape == rates_nengosim.shape + assert allclose(rates_nengosim, rates_ref, atol=atol, rtol=0, xtol=1) + if rates_dlsim is not None: + assert rates_ref.shape == rates_dlsim.shape + assert allclose(rates_dlsim, rates_ref, atol=atol, rtol=0, xtol=1) + if rates_loihisim is not None: + assert rates_ref.shape == rates_loihisim.shape + assert allclose(rates_loihisim, rates_ref, atol=atol, rtol=0, xtol=1) def test_lif_min_voltage(Simulator, plt, allclose):