From 0e662bba3d98ce5b827b0b250ed76a80cec84821 Mon Sep 17 00:00:00 2001 From: Vesa Karvonen Date: Mon, 16 Dec 2024 12:45:09 +0200 Subject: [PATCH] Upgrade to ocamlformat 0.27.0 --- .ocamlformat | 2 +- src/Multicore_magic.mli | 58 ++++++++++++++++++++--------------------- 2 files changed, 29 insertions(+), 31 deletions(-) diff --git a/.ocamlformat b/.ocamlformat index f18ddc6..82ab089 100644 --- a/.ocamlformat +++ b/.ocamlformat @@ -1,4 +1,4 @@ profile = default -version = 0.26.2 +version = 0.27.0 exp-grouping = preserve diff --git a/src/Multicore_magic.mli b/src/Multicore_magic.mli index c107515..f05a537 100644 --- a/src/Multicore_magic.mli +++ b/src/Multicore_magic.mli @@ -8,12 +8,12 @@ val copy_as_padded : 'a -> 'a (** Depending on the object, either creates a shallow clone of it or returns it - as is. When cloned, the clone will have extra padding words added after the + as is. When cloned, the clone will have extra padding words added after the last used word. This is designed to help avoid - {{:https://en.wikipedia.org/wiki/False_sharing} false sharing}. False - sharing has a negative impact on multicore performance. Accesses of both + {{:https://en.wikipedia.org/wiki/False_sharing} false sharing}. False + sharing has a negative impact on multicore performance. Accesses of both atomic and non-atomic locations, whether read-only or read-write, may suffer from false sharing. @@ -24,26 +24,23 @@ val copy_as_padded : 'a -> 'a {[ let padded_atomic = Multicore_magic.copy_as_padded (Atomic.make 101) - let padded_ref = Multicore_magic.copy_as_padded (ref 42) - let padded_record = Multicore_magic.copy_as_padded { - number = 76; - pointer = 1 :: 2 :: 3 :: []; - } + let padded_record = + Multicore_magic.copy_as_padded { number = 76; pointer = [ 1; 2; 3 ] } let padded_variant = Multicore_magic.copy_as_padded (Some 1) ]} - Padding changes the length of an array. If you need to pad an array, use + Padding changes the length of an array. If you need to pad an array, use {!make_padded_array}. *) val copy_as : ?padded:bool -> 'a -> 'a -(** [copy_as x] by default simply returns [x]. When [~padded:true] is - explicitly specified, returns {{!copy_as_padded} [copy_as_padded x]}. *) +(** [copy_as x] by default simply returns [x]. When [~padded:true] is explicitly + specified, returns {{!copy_as_padded} [copy_as_padded x]}. *) val make_padded_array : int -> 'a -> 'a array -(** Creates a padded array. The length of the returned array includes padding. +(** Creates a padded array. The length of the returned array includes padding. Use {!length_of_padded_array} to get the unpadded length. *) val length_of_padded_array : 'a array -> int @@ -76,8 +73,8 @@ val fenceless_get : 'a Atomic.t -> 'a ]} A potential performance problem with the above example is that it performs - two acquire fences. Both the [Atomic.get] and the [Atomic.compare_and_set] - perform an acquire fence. This may have a negative impact on performance. + two acquire fences. Both the [Atomic.get] and the [Atomic.compare_and_set] + perform an acquire fence. This may have a negative impact on performance. Assuming the first fence is not necessary, we can rewrite the example using {!fenceless_get} as follows: @@ -109,11 +106,11 @@ val fenceless_set : 'a Atomic.t -> 'a -> unit ]} A potential performance problem with the above example is that it performs - two full fences. Both the [Atomic.set] used to initialize the data - structure and the [Atomic.exchange] used to publish the data structure - perform a full fence. The same would also apply in cases where - [Atomic.compare_and_set] or [Atomic.set] would be used to publish the data - structure. This may have a negative impact on performance. + two full fences. Both the [Atomic.set] used to initialize the data structure + and the [Atomic.exchange] used to publish the data structure perform a full + fence. The same would also apply in cases where [Atomic.compare_and_set] or + [Atomic.set] would be used to publish the data structure. This may have a + negative impact on performance. Using {!fenceless_set} we can rewrite the example as follows: @@ -139,11 +136,11 @@ module Transparent_atomic : sig (** A replacement for [Stdlib.Atomic] with fixes and performance improvements [Stdlib.Atomic.get] is incorrectly subject to CSE optimization in OCaml - 5.0.0 and 5.1.0. This can result in code being generated that can produce - results that cannot be explained with the OCaml memory model. It can also + 5.0.0 and 5.1.0. This can result in code being generated that can produce + results that cannot be explained with the OCaml memory model. It can also sometimes result in code being generated where a manual optimization to avoid writing to memory is defeated by the compiler as the compiler - eliminates a (repeated) read access. This module implements {!get} such + eliminates a (repeated) read access. This module implements {!get} such that argument to [Stdlib.Atomic.get] is passed through [Sys.opaque_identity], which prevents the compiler from applying the CSE optimization. @@ -152,9 +149,9 @@ module Transparent_atomic : sig assuming that the array might be an array of [float]ing point numbers. That is because the [Stdlib.Atomic.t] type constructor is opaque, which means that the compiler cannot assume that [_ Stdlib.Atomic.t] is not the - same as [float]. This module defines {{!t} the type} as [private 'a ref], + same as [float]. This module defines {{!t} the type} as [private 'a ref], which allows the compiler to know that it cannot be the same as [float], - which allows the compiler to generate more efficient array accesses. This + which allows the compiler to generate more efficient array accesses. This can both improve performance and reduce size of generated code when using arrays of atomics. *) @@ -181,7 +178,7 @@ module Atomic_array : sig Where available, this uses an undocumented operation exported by the OCaml 5 runtime, {{:https://github.com/ocaml/ocaml/blob/7a5d882d22cdd32b6319e9be680bd1a3d67377a9/runtime/memory.c#L313-L338} - [caml_atomic_cas_field]}, which makes it possible to perform sequentially + [caml_atomic_cas_field]}, which makes it possible to perform sequentially consistent atomic updates of record fields and array elements. Hopefully a future version of OCaml provides more comprehensive and even @@ -200,7 +197,8 @@ module Atomic_array : sig copy of the given [non_atomic_array]. *) val init : int -> (int -> 'a) -> 'a t - (** [init n fn] is equivalent to {{!of_array} [of_array (Array.init n fn)]}. *) + (** [init n fn] is equivalent to {{!of_array} [of_array (Array.init n fn)]}. + *) val length : 'a t -> int (** [length atomic_array] returns the length of the [atomic_array]. *) @@ -227,7 +225,7 @@ module Atomic_array : sig (** [unsafe_compare_and_set atomic_array index before after] atomically updates the specified [index] of the [atomic_array] to the [after] value in case it had the [before] value and returns a boolean indicating whether - that was the case. This operation is {i sequentially consistent} and may + that was the case. This operation is {i sequentially consistent} and may not be reordered with respect to other reads and writes in program order. ⚠️ No bounds checking is performed. *) @@ -237,12 +235,12 @@ end val instantaneous_domain_index : unit -> int (** [instantaneous_domain_index ()] potentially (re)allocates and returns a - non-negative integer "index" for the current domain. The indices are + non-negative integer "index" for the current domain. The indices are guaranteed to be unique among the domains that exist at a point in time. Each call of [instantaneous_domain_index ()] may return a different index. The intention is that the returned value can be used as an index into a - contention avoiding parallelism safe data structure. For example, a naïve + contention avoiding parallelism safe data structure. For example, a naïve scalable increment of one counter from an array of counters could be done as follows: @@ -256,7 +254,7 @@ val instantaneous_domain_index : unit -> int ]} The implementation ensures that the indices are allocated as densely as - possible at any given moment. This should allow allocating as many counters + possible at any given moment. This should allow allocating as many counters as needed and essentially eliminate contention. On OCaml 4 [instantaneous_domain_index ()] will always return [0]. *)