diff --git a/blogware/symtab.go b/blogware/symtab.go index b44bb6d..2bdf828 100644 --- a/blogware/symtab.go +++ b/blogware/symtab.go @@ -49,7 +49,7 @@ var ( SymBold = BuiltinCmd("b", ArgTypeSeq) SymUnderline = BuiltinCmd("u", ArgTypeSeq) SymNormal = BuiltinCmd("normal", ArgTypeSeq) - SymEmphasis = BuiltinCmd("em", ArgTypeSeq) + SymEmphasis = BuiltinCmd("emph", ArgTypeSeq) SymSmallCaps = BuiltinCmd("sc", ArgTypeSeq) SymCircled = BuiltinCmd("circled", ArgTypeNum) SymCode = BuiltinCmd("code", ArgTypeSeq) diff --git a/posts/01-effective-rust-canisters.tex b/posts/01-effective-rust-canisters.tex index cd63850..6f7d817 100644 --- a/posts/01-effective-rust-canisters.tex +++ b/posts/01-effective-rust-canisters.tex @@ -105,8 +105,8 @@ \subsection{canister-state}{Canister state} The issue can stay undetected until your canister is in active use, storing (and corrupting) user data. If we used a \code{RefCell}, the code would panic before we shipped it. -It should now be clear \em{how} to declare global variables. -Let us discuss \em{where} to put them. +It should now be clear \emph{how} to declare global variables. +Let us discuss \emph{where} to put them. \advice{clear-state}{Put all your globals in one basket.} @@ -132,10 +132,10 @@ \subsection{canister-state}{Canister state} I borrowed \href{https://sdk.dfinity.org/docs/language-guide/upgrades.html#_declaring_stable_variables}{Motoko terminology} here: \begin{enumerate} \item - The system preserves \em{stable} variables across upgrades. + The system preserves \emph{stable} variables across upgrades. For example, a user database should probably be stable. \item - The system discards \em{flexible} variables on code upgrades. + The system discards \emph{flexible} variables on code upgrades. For example, you can make a cache flexible if it is not crucial for your canister. \end{enumerate} @@ -405,7 +405,7 @@ \subsection{cycle-consumption}{Reducing cycle consumption} \advice{instruction-counter}{Measure the number of instructions your endpoints consume.} -The \href{https://docs.rs/ic-cdk/0.5.3/ic_cdk/api/fn.instruction_counter.html}{\code{instruction_counter}} API will tell you the number of \em{instructions} your code consumed since the last \href{https://internetcomputer.org/docs/current/references/ic-interface-spec/#entry-points}{entry point}. +The \href{https://docs.rs/ic-cdk/0.5.3/ic_cdk/api/fn.instruction_counter.html}{\code{instruction_counter}} API will tell you the number of \emph{instructions} your code consumed since the last \href{https://internetcomputer.org/docs/current/references/ic-interface-spec/#entry-points}{entry point}. Instructions are the internal currency of the IC runtime. One IC instruction is the \href{https://en.wikipedia.org/wiki/Quantum}{quantum} of work that the system can do, such as loading a 32-bit integer from a memory address. The system assigns an instruction cost equivalent to each \href{https://sourcegraph.com/github.com/dfinity/ic@cfdbbf5fb5fdbc8f483dfd3a5f7f627b752d3156/-/blob/rs/embedders/src/wasm_utils/instrumentation.rs?L155-177}{WebAssembly instruction} and \href{https://sourcegraph.com/github.com/dfinity/ic@cfdbbf5/-/blob/rs/embedders/src/wasmtime_embedder/system_api_complexity.rs?L40-107}{system call}. @@ -432,7 +432,7 @@ \subsection{cycle-consumption}{Reducing cycle consumption} let tx = apply_transfer(from, to, amount)?; let tx_id = archive_transaction(tx).\b{await}?; - \em{// \b{BAD}: the await point above resets the instruction counter.} + \emph{// \b{BAD}: the await point above resets the instruction counter.} let end = ic_cdk::api::instruction_counter(); record_measurement(end - start); @@ -454,7 +454,7 @@ \subsection{cycle-consumption}{Reducing cycle consumption} struct HttpResponse { status_code: u16, headers: Vec<(String, String)>, - \em{// \b{BAD}: inefficient} + \emph{// \b{BAD}: inefficient} body: Vec, } \end{code} @@ -466,7 +466,7 @@ \subsection{cycle-consumption}{Reducing cycle consumption} struct HttpResponse { status_code: u16, headers: Vec<(String, String)>, - \em{// \b{OK}: encoded efficiently} + \emph{// \b{OK}: encoded efficiently} #[serde(with = "serde_bytes")] body: Vec, } @@ -479,7 +479,7 @@ \subsection{cycle-consumption}{Reducing cycle consumption} struct HttpResponse { status_code: u16, headers: Vec<(String, String)>, - \em{// \b{OK}: also efficient} + \emph{// \b{OK}: also efficient} body: serde_bytes::ByteBuf, } \end{code} @@ -537,7 +537,7 @@ \subsection{cycle-consumption}{Reducing cycle consumption} #[query] fn http_request(_request: HttpRequest) -> HttpResponse { - \em{// \b{NOTE}: we are making a full copy of the asset.} + \emph{// \b{NOTE}: we are making a full copy of the asset.} let body = ASSET.with(|cell| cell.borrow().clone()); HttpResponse { @@ -619,7 +619,7 @@ \subsection{cycle-consumption}{Reducing cycle consumption} I experimented with a one-megabyte asset and measured that the original code relying on a deep copy consumed 16 million instructions. At the same time, versions with reference counting and explicit lifetimes needed only 12 million instructions\sidenote{sn-candid-copy}{ The 25\% improvement shows that our code does little but copy bytes. - The code did at least \em{three} copies: \circled{1} from a \code{thread_local} to an \code{HttpResponse}, \circled{2} from the \code{HttpResponse} to candid's \href{https://sourcegraph.com/github.com/dfinity/candid@8b742c9701640ca220c356c23c5f834d13150cc4/-/blob/rust/candid/src/ser.rs?L28}{value buffer}, and \circled{3} from candid's \href{https://sourcegraph.com/github.com/dfinity/candid@8b742c9701640ca220c356c23c5f834d13150cc4/-/blob/rust/candid/src/ser.rs?L61}{value buffer} to the call's \href{https://sourcegraph.com/github.com/dfinity/cdk-rs@39cd49a3b2ca6736d7c3d3bf3605e567302825b7/-/blob/src/ic-cdk/src/api/call.rs?L481-500}{argument buffer}. + The code did at least \emph{three} copies: \circled{1} from a \code{thread_local} to an \code{HttpResponse}, \circled{2} from the \code{HttpResponse} to candid's \href{https://sourcegraph.com/github.com/dfinity/candid@8b742c9701640ca220c356c23c5f834d13150cc4/-/blob/rust/candid/src/ser.rs?L28}{value buffer}, and \circled{3} from candid's \href{https://sourcegraph.com/github.com/dfinity/candid@8b742c9701640ca220c356c23c5f834d13150cc4/-/blob/rust/candid/src/ser.rs?L61}{value buffer} to the call's \href{https://sourcegraph.com/github.com/dfinity/cdk-rs@39cd49a3b2ca6736d7c3d3bf3605e567302825b7/-/blob/src/ic-cdk/src/api/call.rs?L481-500}{argument buffer}. We removed ⅓ of copies and got ¼ improvement in instruction consumption. So only ¼ of our instructions contributed to work unrelated to copying the asset's byte array. }. @@ -673,7 +673,7 @@ \subsection{module-size}{Reducing module size} As with any optimization process, you need a profiler to guide your experiments. The \href{https://rustwasm.github.io/twiggy/}{\code{twiggy}}\sidenote{sn-twiggy-order}{ \code{twiggy} needs debug info to display function names. - Run it \em{before} you shrink your module with \code{ic-wasm}. + Run it \emph{before} you shrink your module with \code{ic-wasm}. } tool is excellent for finding the largest functions in your WebAssembly modules. \begin{figure} @@ -764,7 +764,7 @@ \subsection{upgrades}{Upgrades} You can view stable memory as a communication channel between your canister's old and new versions. All proper communication protocols have a version. One day, you might want to change the stable data layout or serialization format radically. -The code becomes messy and brittle if the stable memory decoding procedure needs to \em{guess} the data format. +The code becomes messy and brittle if the stable memory decoding procedure needs to \emph{guess} the data format. Save your nerve cells and think about versioning in advance. It is as easy as declaring, ``the first byte of my stable memory is the version number.'' diff --git a/posts/02-ic-state-machine-replication.tex b/posts/02-ic-state-machine-replication.tex index bafddd1..14db61b 100644 --- a/posts/02-ic-state-machine-replication.tex +++ b/posts/02-ic-state-machine-replication.tex @@ -17,7 +17,7 @@ \section{state-machine}{The state machine} Before we dive into the internals of the protocol, let's first define the \href{https://en.wikipedia.org/wiki/Finite-state_machine}{state machine} that we'll be dealing with. -Nodes participating in the Internet Computer are grouped into units called \em{subnet blockchains}, or simply \em{subnets}. +Nodes participating in the Internet Computer are grouped into units called \emph{subnet blockchains}, or simply \emph{subnets}. Nodes in the same subnet run their own instance of the core Internet Computer Protocol (i.e., they form an independent peer-to-peer network and reach consensus independently from other subnets). We'll model as a state machine the computation that nodes in the same subnet perform. @@ -28,7 +28,7 @@ \section{state-machine}{The state machine} These blocks are the inputs of our state machine. } \term{Outputs}{ - In our model, the main artifact of the execution is a data structure called \em{state tree}. + In our model, the main artifact of the execution is a data structure called \emph{state tree}. We'll learn more about state trees in a moment.} \term{States}{ The single most important thing that the Internet Computer does is hosting canisters. @@ -48,7 +48,7 @@ \section{state-machine}{The state machine} \item Executes the messages on the selected canisters and records the execution results. \end{itemize} All of the above modifies the data structure that we call ``state'' and acts as a transition function. - Note that we can call this procedure a \em{function} only if it's deterministic: given the same block and the same original state, the replica will modify the state in exactly the same way. + Note that we can call this procedure a \emph{function} only if it's deterministic: given the same block and the same original state, the replica will modify the state in exactly the same way. Thanks to the careful design of execution algorithms and guarantees that WebAssembly provides, the procedure is indeed deterministic. } \term{Output function}{ @@ -62,7 +62,7 @@ \section{state-machine}{The state machine} } \end{description} -I call these state machines (one for each subnet) \em{replicated} because each honest node on a subnet has an exact copy of the machine. +I call these state machines (one for each subnet) \emph{replicated} because each honest node on a subnet has an exact copy of the machine. \subsection{checkpoints}{Checkpoints} @@ -70,14 +70,14 @@ \subsection{checkpoints}{Checkpoints} This new node cannot start processing and proposing new blocks until it has the right state, the state that results from execution of all the blocks produced by this subnet so far. One way to bring the node up to date is to download all those blocks and ``replay'' them. -This sounds simple, but if the rate of change is high and message execution is costly, the new node might need a \em{lot} of time to catch up. +This sounds simple, but if the rate of change is high and message execution is costly, the new node might need a \emph{lot} of time to catch up. As the Red Queen put it: ``My dear, here we must run as fast as we can, just to stay in place. And if you wish to go anywhere you must run twice as fast as that.'' Another solution is to create persistent snapshots of the state from time to time. The peers can fetch and load those snapshots when they need help. This method works really well for our state machine: it reduces the catch up time from days to minutes. -Let's call those persistent snapshots \em{checkpoints}. +Let's call those persistent snapshots \emph{checkpoints}. \begin{figure}[grayscale-diagram] \marginnote{sm-components}{Components of the state machine: blocks as inputs, states, state trees as outputs, and checkpoints.} @@ -149,13 +149,13 @@ \subsection{state-artifact}{State as an artifact} Load the checkpoint, replay a few blocks, and you're ready to rock. There is a more interesting case, however: a healthy replica can help other replicas catch up by sending them a recent checkpoint. -Replicas in a subnet communicate by exchanging \em{artifacts} using a peer-to-peer protocol. +Replicas in a subnet communicate by exchanging \emph{artifacts} using a peer-to-peer protocol. Most of these artifacts (e.g., user ingress messages, random beacons, state certifications) are relatively small, up to a few megabytes in size. But the machinery for artifact transfer is quite general: the protocol supports fetching arbitrary large artifacts by slicing them into chunks, provided that there is a way to authenticate each chunk independently. Furthermore, multiple chunks can be fetched in parallel from multiple peers. Sounds a lot like \href{https://en.wikipedia.org/wiki/BitTorrent}{BitTorrent}, isn't it? -Before advertising a checkpoint, replica computes a \em{manifest} for that checkpoint. +Before advertising a checkpoint, replica computes a \emph{manifest} for that checkpoint. Manifest is an inventory of files constituting a checkpoint. Files are sliced into chunks, and the manifest enumerates paths, sizes and cryptographic hashes of every file and every chunk of each file. In our BitTorrent analogy, manifest plays a role of a \href{https://en.wikipedia.org/wiki/Torrent_file}{.torrent file}. @@ -177,7 +177,7 @@ \subsection{trigger-transfer}{Triggering state transfer} As you might have guessed, the consensus subsystem armed with \href{https://en.wikipedia.org/wiki/Threshold_cryptosystem}{threshold signatures} comes to the rescue again. Replicas gather a threshold signature on a full state hash and use that signature as a proof of checkpoint authenticity. The result is an artifact containing a state height, a full state hash, and a threshold signature. -We'll call this artifact a \em{catch-up package}. +We'll call this artifact a \emph{catch-up package}. The interaction between the replica consensus module and the state machine is something like the following \begin{enumerate} diff --git a/posts/03-rust-packages-crates-modules.tex b/posts/03-rust-packages-crates-modules.tex index a27e4d2..73b8bde 100644 --- a/posts/03-rust-packages-crates-modules.tex +++ b/posts/03-rust-packages-crates-modules.tex @@ -20,7 +20,7 @@ \section{personae}{Dramatis Personae} -Rust terminology proved to be confusing because the term \em{crate} is overloaded. +Rust terminology proved to be confusing because the term \emph{crate} is overloaded. For example, the first edition of the venerable \href{https://doc.rust-lang.org/1.25.0/book/}{The Rust Programming Language} book contained the following misleading passage \blockquote{ @@ -74,7 +74,7 @@ \section{modules-vs-crates}{Modules vs Crates} The bitter reality is that Rust takes quite some time to compile, and modules don't help you shorten the compilation time: \begin{itemize} \item - The basic unit of compilation is a \em{crate}, not a \em{module}. + The basic unit of compilation is a \emph{crate}, not a \emph{module}. You must recompile all the modules in a crate even if you change only one. The more code you put in a crate, the longer it takes to compile. \item @@ -98,7 +98,7 @@ \section{code-organization-advice}{Advice on code organization} (proptest strategies, mock and fake component implementations, helper functions, etc.), and the \code{replica} package instantiating all the components. \item - Packages with lots of \em{reverse dependencies}. + Packages with lots of \emph{reverse dependencies}. Examples from the IC codebase are the \code{types} package containing common type definitions and the \code{interfaces} package specifying component interfaces. \end{itemize} @@ -112,7 +112,7 @@ \section{code-organization-advice}{Advice on code organization} Sometimes it is possible to eliminate a dependency hub. For example, the \code{test-utils} package is a union of independent utilities. -We can group these utilities by the component they help to test and factor the code into multiple \code{\em{}-test-utils} packages. +We can group these utilities by the component they help to test and factor the code into multiple \code{\emph{}-test-utils} packages. More often, however, dependency hubs will have to stay. Some types from \code{types} are pervasive. @@ -147,7 +147,7 @@ \section{code-organization-advice}{Advice on code organization} The \code{replicated_state} package is heavy; we didn't want to merge its contents with \code{interfaces}. So we took the first option and moved the types shared between \code{interfaces} and \code{replicated_state} into the \code{types} package. -One property of trait definitions in the \code{interfaces} package is that the traits depend only on the \code{ReplicatedState} type \em{name}. +One property of trait definitions in the \code{interfaces} package is that the traits depend only on the \code{ReplicatedState} type \emph{name}. The traits do not need to know \code{ReplicatedState}'s definition. \begin{figure} @@ -214,7 +214,7 @@ \section{code-organization-advice}{Advice on code organization} What a great question. I would ask the same thing in 2019! -Global variables are \href{http://wiki.c2.com/?GlobalVariablesAreBad}{\em{bad}}, but my previous experience suggested that loggers and metric sinks are special. +Global variables are \href{http://wiki.c2.com/?GlobalVariablesAreBad}{\emph{bad}}, but my previous experience suggested that loggers and metric sinks are special. Oh well, they aren't, after all. The usual problems with implicit state dependencies are especially prominent in Rust. @@ -230,7 +230,7 @@ \section{code-organization-advice}{Advice on code organization} Explicitly passing loggers eliminates that problem. \item Testing code relying on an implicit state often becomes hard or impossible in a multi-threaded environment. - The code recording your metrics is, well, \em{code}. + The code recording your metrics is, well, \emph{code}. It also deserves to be tested. \item If you use a library relying on implicit state, you can introduce subtle bugs if you depend on incompatible library versions in different packages. @@ -246,7 +246,7 @@ \section{code-organization-advice}{Advice on code organization} Our code seemed correct, yet the metrics were missing. One of the packages depended on prometheus version \code{0.9}, while all other packages used \code{0.10}. -According to \href{https://semver.org/}{semver}, these versions are incompatible, so cargo linked both versions into the binary, introducing \em{two} implicit registries. +According to \href{https://semver.org/}{semver}, these versions are incompatible, so cargo linked both versions into the binary, introducing \emph{two} implicit registries. We exposed only the \code{0.10} version registry over the HTTP interface. As you correctly guessed, the missing components recorded metrics to the \code{0.9} registry. @@ -379,12 +379,12 @@ \subsection{confusing-crates-and-packages}{Confusing crates and packages} Oh, how is that? -Aren't \code{lib.rs} and \code{transmogrify.rs} in the same \em{crate}? +Aren't \code{lib.rs} and \code{transmogrify.rs} in the same \emph{crate}? No, they are not. -The \code{image-magic} \em{package} defines two \em{crates}: a \em{library crate} named \code{image_magic} (note that cargo replaced the dash in the package name with an underscore) and a \em{binary crate} named \code{transmogrify}. +The \code{image-magic} \emph{package} defines two \emph{crates}: a \emph{library crate} named \code{image_magic} (note that cargo replaced the dash in the package name with an underscore) and a \emph{binary crate} named \code{transmogrify}. So when you write \code{use crate::Image} in \code{transmogrify.rs}, you tell the compiler to look for the type defined in the same binary. -The \code{image_magic} \em{crate} is just as external to \code{transmogrify} as any other library would be, so we have to specify the library name in the use declaration: +The \code{image_magic} \emph{crate} is just as external to \code{transmogrify} as any other library would be, so we have to specify the library name in the use declaration: \begin{figure} \begin{code}[good] @@ -410,10 +410,10 @@ \subsection{quasi-circular}{Quasi-circular dependencies} Cargo uses this profile when you run \code{cargo build}. } \term{test}{ - Mostly the same as the \em{dev} profile. + Mostly the same as the \emph{dev} profile. When you test a library crate, cargo builds the library with the \code{test} profile and injects the main function executing the test harness. This profile is enabled when you run \code{cargo test}. - Cargo builds dependencies of the crate under test using the \em{dev} profile. + Cargo builds dependencies of the crate under test using the \emph{dev} profile. } \end{description} @@ -457,7 +457,7 @@ \subsection{quasi-circular}{Quasi-circular dependencies} Wait, didn't we create a dependency cycle? \code{foo} depends on \code{foo-test-utils} that depends on \code{foo}, right? -There is no circular dependency because cargo compiles \code{foo} twice: once with \em{dev} profile to link with \code{foo-test-utils} and once with \em{test} profile to add the test harness. +There is no circular dependency because cargo compiles \code{foo} twice: once with \emph{dev} profile to link with \code{foo-test-utils} and once with \emph{test} profile to add the test harness. \begin{figure}[grayscale-diagram] \marginnote{mn-dep-foo}{Dependency diagram for \code{foo} library test.} @@ -521,7 +521,7 @@ \subsection{quasi-circular}{Quasi-circular dependencies} \end{figure} What could that mean? -The compiler tells us that type definitions in the \em{test} and the \em{dev} versions of \code{foo} are incompatible. +The compiler tells us that type definitions in the \emph{test} and the \emph{dev} versions of \code{foo} are incompatible. Technically, these are different, incompatible crates even though these crates share the name. The way out of trouble is to define a separate integration test crate in the \code{foo} package and move the tests there. @@ -538,7 +538,7 @@ \subsection{quasi-circular}{Quasi-circular dependencies} \end{code} \end{figure} -The test above compiles fine because cargo links the test and \code{foo_test_utils} with the \em{dev} version of \code{foo}. +The test above compiles fine because cargo links the test and \code{foo_test_utils} with the \emph{dev} version of \code{foo}. \begin{figure}[grayscale-diagram] \marginnote{mn-foo-test-dep-diag}{Dependency diagram for \code{foo_test} integration test.} diff --git a/posts/04-square-joy-trapped-rain-water.tex b/posts/04-square-joy-trapped-rain-water.tex index 0699a43..52e7413 100644 --- a/posts/04-square-joy-trapped-rain-water.tex +++ b/posts/04-square-joy-trapped-rain-water.tex @@ -41,12 +41,12 @@ \section{the-problem}{The problem: heavy rains in Flatland} As city architects, we know the heights in units of all the buildings. We need to compute how much water (in square units) accumulates between the buildings after heavy rain. -More dryly: given an array of non-negative integers \em{H}, representing heights of unit-width bars placed next to one another, compute the total area of water trapped by the configuration after a rain. +More dryly: given an array of non-negative integers \emph{H}, representing heights of unit-width bars placed next to one another, compute the total area of water trapped by the configuration after a rain. \subsection{example-2d}{Example} \begin{tabular*}{r l} - \em{Input} & \code{0 1 0 2 1 0 1 3 2 1 2 1} \\ - \em{Output} & \code{6} \\ + \emph{Input} & \code{0 1 0 2 1 0 1 3 2 1 2 1} \\ + \emph{Output} & \code{6} \\ \end{tabular*} \newline @@ -64,7 +64,7 @@ \section{a-solution}{A solution} So, let's focus on the bar at an arbitrary index \math{i}. What would stop the water from flowing out? Another bar that is higher than \math{H\[i\]}. -Furthermore, we need bars higher than \math{H\[i\]} on \em{both} sides of \math{i} for the water to stay. +Furthermore, we need bars higher than \math{H\[i\]} on \emph{both} sides of \math{i} for the water to stay. So, the water level at index \math{i} is determined by the minimum of the highest bars on the left and right. Computing the highest bar to the left and to the right for each index is not efficient: we would need to make \math{O(N\sup{2})} steps. @@ -185,7 +185,7 @@ \section{translating-to-j}{Translating our idea to J} \section{drawing-solutions}{Drawing solutions} -Knowing the answer is excellent, but being able to \em{see} it at a glance would be even better. +Knowing the answer is excellent, but being able to \emph{see} it at a glance would be even better. In this section, we'll write code to represent solutions visually. What would we like to see in that picture? @@ -346,7 +346,7 @@ \section{3d}{Breaking out of Flatland} One of the ways to better understand a problem is to generalize it. Let's break out into the third dimension. -Given a \em{two-dimensional} array of non-negative integers \math{H}, representing heights of square-unit bars placed next to one another, compute the total \em{volume} of water trapped by the configuration after it rains. +Given a \emph{two-dimensional} array of non-negative integers \math{H}, representing heights of square-unit bars placed next to one another, compute the total \emph{volume} of water trapped by the configuration after it rains. To make the problem more concrete, let's inspect a few instances. We'll use \href{https://code.jsoftware.com/wiki/Vocabulary/tilde}{\code{~} (reflex)} adverb to save us some typing. @@ -607,7 +607,7 @@ \section{back-to-2d}{Looking back at Flatland} \item Always move the pointer that looks at the lowest height. The left pointer moves to the right; the right pointer moves to the left. - \item If one of the two pointers looks at height \em{M} greater than the lowest boundary, update the lowest boundary to be \em{M}. + \item If one of the two pointers looks at height \emph{M} greater than the lowest boundary, update the lowest boundary to be \emph{M}. \end{itemize} That is precisely how a Dijkstra graph search would propagate, always picking the shortest edge to proceed. diff --git a/posts/05-debug-like-feynman.tex b/posts/05-debug-like-feynman.tex index ba38505..9d0eac0 100644 --- a/posts/05-debug-like-feynman.tex +++ b/posts/05-debug-like-feynman.tex @@ -30,7 +30,7 @@ \end{enumerate} One non-obvious detail of the scientific method hides in the second step. -The goal of the experiment is to \em{disprove} the prediction. +The goal of the experiment is to \emph{disprove} the prediction. The default assumption is that there is no Higgs boson and that the new medication under test does not have any effect. There are infinitely many ideas, most of them have nothing to do with reality. If you look hard enough, you can find arguments for those ideas. @@ -60,7 +60,7 @@ \section{record-your-observations}{Record your observations} Great, there is an issue with a promising title! Your former teammate closed it a year ago. Full of hope, you open the issue. -All you see is \em{status: done, resolution: fixed}. +All you see is \emph{status: done, resolution: fixed}. It seems that you are going to have a long day. Does this story sound familiar? @@ -99,14 +99,14 @@ \section{test-causality}{Test causality} The next day, you learn that the bug is still reproducible. How could that happen? -Many programmers write tests \em{after} the code is complete. +Many programmers write tests \emph{after} the code is complete. I am sure you did this. I certainly did—many times. Sadly, if we apply basic logic, this approach makes no sense. -How can you be sure that it is your implementation \em{causing} the tests to pass? -The implication \em{implementation ⇒ test pass} proves merely a \href{https://en.wikipedia.org/wiki/Correlation}{correlation}. -We also need to show \em{not(implementation) ⇒ not(test pass)} to prove \href{https://en.wikipedia.org/wiki/Causality}{causation}. +How can you be sure that it is your implementation \emph{causing} the tests to pass? +The implication \emph{implementation ⇒ test pass} proves merely a \href{https://en.wikipedia.org/wiki/Correlation}{correlation}. +We also need to show \emph{not(implementation) ⇒ not(test pass)} to prove \href{https://en.wikipedia.org/wiki/Causality}{causation}. ``What else could be causing tests to pass?'' you might say. I do not know, neither do you. @@ -155,7 +155,7 @@ \section{know-your-data}{Know your data} We fight about the way to place braces and the perfect number of spaces to use for indentation. We can argue with a religious zeal about the name of a function or a variable. -Yet the most precious substance, the \em{data} flowing through our code, often gets little attention. +Yet the most precious substance, the \emph{data} flowing through our code, often gets little attention. It hides from us behind variables with descriptive names. Why is data important anyway? @@ -218,7 +218,7 @@ \section{debug-mental-models}{Debug mental models} As \href{https://en.wikipedia.org/wiki/Linus_Torvalds}{Linus Torvalds} put it in \href{https://lkml.org/lkml/2000/9/6/65}{one of his famous emails about debuggers}: \blockquote{ - It's that you have to look at the level \em{above} sources. + It's that you have to look at the level \emph{above} sources. At the meaning of things. Without a debugger, you basically have to go the next step: understand what the program does. Not just that particular line. @@ -329,7 +329,7 @@ \section{question-method}{Question your method} This impression is false. No one has a clue. -Many project management techniques focus on being \em{efficient} and producing more features in less time. +Many project management techniques focus on being \emph{efficient} and producing more features in less time. This goal is fundamentally flawed. We should not be after efficiency. In my experience, the main problem with software is that people waste time on non-essential work. @@ -371,8 +371,8 @@ \section{question-method}{Question your method} Be critical of your method. Blindly following the procedures should not feel right to you. Challenge the status quo, stop, and think. -\em{Why am I doing this?} -\em{Is it worth my time?} +\emph{Why am I doing this?} +\emph{Is it worth my time?} \section{conclusion}{Conclusion} diff --git a/posts/06-ic-orthogonal-persistence.tex b/posts/06-ic-orthogonal-persistence.tex index 297ce21..8835cc4 100644 --- a/posts/06-ic-orthogonal-persistence.tex +++ b/posts/06-ic-orthogonal-persistence.tex @@ -107,19 +107,19 @@ \section{actors}{Actors} \section{snapshots-deltas}{Snapshots and deltas} -The implementation divides the contents of each memory into 4096-byte chunks called \em{pages}. -When the actor executes a message, the system automatically detects the memory pages that the actor modifies or \em{dirties}. +The implementation divides the contents of each memory into 4096-byte chunks called \emph{pages}. +When the actor executes a message, the system automatically detects the memory pages that the actor modifies or \emph{dirties}. The system uses low-level \sc{unix api}s to detect page modifications. Since most operating systems operate on 4096-byte memory pages, using the same page size in the replica is the most natural choice. The memory snapshot of an actor is a map from a page index to the page contents. -We call this data structure a \em{page map}. +We call this data structure a \emph{page map}. There are many ways to implement this data structure. The primary assumption that guided the current implementation is that each message execution modifies only a small number of pages. This assumption holds in practice: as of April 2022, 95\% of message executions change at most seven memory pages. If the expected number of dirtied pages is small, it is natural to apply some delta-encoding scheme. -Thus we chose to represent page maps as a combination of on-disk \em{checkpoint files} and in-memory \em{page deltas}. +Thus we chose to represent page maps as a combination of on-disk \emph{checkpoint files} and in-memory \emph{page deltas}. A checkpoint file is a flat binary file with a full copy of the actor memory that the system creates at the end of some execution rounds. Once created, checkpoint files are immutable. A page delta is a \href{https://en.wikipedia.org/wiki/Persistent_data_structure}{persistent} map that contains pages dirtied by the actor since the last checkpoint. diff --git a/posts/07-square-joy-pre-order.tex b/posts/07-square-joy-pre-order.tex index a9a7107..c577741 100644 --- a/posts/07-square-joy-pre-order.tex +++ b/posts/07-square-joy-pre-order.tex @@ -20,7 +20,7 @@ \section{the-problem}{The problem: recover a binary tree} Given two sequences of distinct values, \code{inorder} and \code{preorder}, where \code{inorder} is an \href{https://en.wikipedia.org/wiki/Tree_traversal#In-order,_LNR}{in-order traversal} of a binary tree, -and \code{preorder} is a \href{https://en.wikipedia.org/wiki/Tree_traversal#Pre-order,_NLR}{pre-order traversal} of the \em{same} tree, recover the tree structure. +and \code{preorder} is a \href{https://en.wikipedia.org/wiki/Tree_traversal#Pre-order,_NLR}{pre-order traversal} of the \emph{same} tree, recover the tree structure. For example, if our inputs are the following sequences @@ -47,7 +47,7 @@ \section{vectorizing-the-problem}{Vectorizing the problem} The way we represent the inputs and outputs of our problem shapes our approach to a solution. Our inputs, \code{inorder} and \code{preorder}, are already arrays. -However, there is something peculiar about these arrays: they contain the same values, or, in math terms, \code{preorder} is a \em{permutation} of \code{inorder}. +However, there is something peculiar about these arrays: they contain the same values, or, in math terms, \code{preorder} is a \emph{permutation} of \code{inorder}. We can eliminate this redundancy by compressing the inputs to a single numeric array: the permutation that turns \code{inorder} into \code{preorder}. A primitive J operation, dyadic \href{https://code.jsoftware.com/wiki/Vocabulary/idot#dyadic}{\code{i.} (index of)}, transforms the original inputs to the more convenient and compact permutation form. @@ -287,11 +287,11 @@ \subsection{right-children}{Finding right children} \subsection{computing-parent-vector}{Computing the parent vector} -We have two ways to compute the parents: one works for left children (let us call it the \em{L-algorithm}), and another works for right children (the \em{R-algorithm}). +We have two ways to compute the parents: one works for left children (let us call it the \emph{L-algorithm}), and another works for right children (the \emph{R-algorithm}). How do we combine these results? Note that array \code{L} had zeros for all right children. -Also, note that the L-algorithm produces larger values than the R-algorithm: the L-algorithm looks for \em{larger} nodes to the left of each position, while the R-algorithm looks for \em{smaller} nodes. +Also, note that the L-algorithm produces larger values than the R-algorithm: the L-algorithm looks for \emph{larger} nodes to the left of each position, while the R-algorithm looks for \emph{smaller} nodes. So taking the maximum of arrays \math{L} and \math{R} gives us the correct answer. \begin{verbatim}[j] diff --git a/posts/08-ic-xnet.tex b/posts/08-ic-xnet.tex index a6dbff4..46840e8 100644 --- a/posts/08-ic-xnet.tex +++ b/posts/08-ic-xnet.tex @@ -11,11 +11,11 @@ \section{introduction}{Introduction} This article continues the previous post on state machine replication in the Internet Computer (IC), \href{/posts/02-ic-state-machine-replication.html}{A swarm of replicated state machines}. -This time, we shall examine the protocol over which independent state machines (also known as \em{subnets}) communicate, the \em{XNet protocol}. +This time, we shall examine the protocol over which independent state machines (also known as \emph{subnets}) communicate, the \emph{XNet protocol}. \section{subnets}{Subnets} -A \em{subnet} is a collection of nodes participating in a single instance of the \href{https://dfinity.org/howitworks/consensus}{consensus protocol}. +A \emph{subnet} is a collection of nodes participating in a single instance of the \href{https://dfinity.org/howitworks/consensus}{consensus protocol}. All the nodes in a subnet have the same state and apply the same blocks. It might be tempting to think that nodes of a subnet are physically collocated. @@ -23,7 +23,7 @@ \section{subnets}{Subnets} There are still good reasons to assign multiple nodes in a data center to the same subnet, such as increasing query call capacity and speeding up recovery in case of a replica restart. }. The goal is to improve availability: a disaster in a single data center cannot take down the entire subnet. -The \em{registry} canister maintains the assignment of nodes to physical machines, and the \href{https://dfinity.org/howitworks/network-nervous-system-nns}{Network Nervous System} governs all the changes to the registry. +The \emph{registry} canister maintains the assignment of nodes to physical machines, and the \href{https://dfinity.org/howitworks/network-nervous-system-nns}{Network Nervous System} governs all the changes to the registry. Nodes from different subnets can live in the same data center. This setup might be counter-intuitive: nodes from different subnets might sometimes have better network connectivity than nodes in the same subnet. @@ -57,19 +57,19 @@ \section{message-streams}{Message streams} \includegraphics{/images/08-message-streams.svg} \end{figure} -The component merging the queues (aptly called \em{stream builder}) should satisfy a few constraints: +The component merging the queues (aptly called \emph{stream builder}) should satisfy a few constraints: \begin{itemize} \item - \em{Determinism}. + \emph{Determinism}. All nodes in the subnet must agree on the exact contents of the stream. - To do its job, the stream builder needs the mapping from canister identifiers to subnet identifiers, the \em{routing table}. + To do its job, the stream builder needs the mapping from canister identifiers to subnet identifiers, the \emph{routing table}. The routing table comes from the registry and changes over time. To ensure determinism, each block pins the registry version that the state machine is using for message processing. \item - \em{Ordering}. + \emph{Ordering}. If canister \math{A} sends two requests to canister \math{B}, \math{R\sub{1}}, and \math{R\sub{2}}, then \math{R\sub{1}} should appear before \math{R\sub{2}} in the stream. \item - \em{Fairness.} + \emph{Fairness.} We do not want messages from a single chatty canister to dominate the stream. The stream builder tries to interleave messages so that each canister has the same bandwidth. \end{itemize} @@ -80,8 +80,8 @@ \section{block-payloads}{Block payloads} The job of the consensus protocol is to aggregate messages from the outside world and pack them into a neat block. Consensus includes several types of messages into blocks, such as user ingress messages, Bitcoin transactions (for subnets with enabled Bitcoin integration), and inter-canister messages from other subnets. -We call messages paired with the data required for their validation a \em{payload}. -We call components that pull payloads from the network \em{payload builders}. +We call messages paired with the data required for their validation a \emph{payload}. +We call components that pull payloads from the network \emph{payload builders}. \begin{figure}[grayscale-diagram] \marginnote{mn-payloads}{The consensus algorithm aggregates messages from the outside world into blocks.} @@ -89,7 +89,7 @@ \section{block-payloads}{Block payloads} \end{figure} XNet payload builder pulls messages from nodes assigned to other subnets using a simple HTTP protocol. -\em{XNet endpoint} is a component that serves messages destined for other subnets over secure TLS connections, accepting connections only from other nodes\sidenote{sn-xnet-tls}{This measure does not imply privacy because malicious nodes can access the data; but ensures that network providers cannot read the messages.}. +\emph{XNet endpoint} is a component that serves messages destined for other subnets over secure TLS connections, accepting connections only from other nodes\sidenote{sn-xnet-tls}{This measure does not imply privacy because malicious nodes can access the data; but ensures that network providers cannot read the messages.}. XNet endpoint fetches the complete list of nodes, their subnet assignment, IP addresses, and public keys (required to establish a TLS connection) from the registry. \begin{figure}[grayscale-diagram] @@ -106,17 +106,17 @@ \section{garbage-collection}{Garbage collection} We now know how one subnet accumulates messages destined for another subnet. This knowledge begs another question: how do replicas remove messages that the destination subnet has already consumed? We need a feedback mechanism allowing the consumer subnet to tell the producer subnet that it does not need some stream prefix anymore. -We call this mechanism \em{signals}. +We call this mechanism \emph{signals}. -Signals are a part of the XNet payload specifying the prefix of the \em{reverse} stream that the sending subnet can drop. -When a node from subnet \math{X} fetches XNet payload from a node from subnet \math{Y}, in addition to actual messages, the \math{Y} node includes the \em{stream header}. +Signals are a part of the XNet payload specifying the prefix of the \emph{reverse} stream that the sending subnet can drop. +When a node from subnet \math{X} fetches XNet payload from a node from subnet \math{Y}, in addition to actual messages, the \math{Y} node includes the \emph{stream header}. The stream header describes the state of the \math{X ↔ Y} communication: \begin{itemize} \item The full range of message indices in the forward stream \math{X → Y}. \item - The signals for the \em{reverse} stream (\math{Y → X}): for each message index in the reverse stream, \math{Y} tells whether \math{X} can garbage collect the message (an \sc{ack} signal) or should reroute the message (a \sc{reject} signal). + The signals for the \emph{reverse} stream (\math{Y → X}): for each message index in the reverse stream, \math{Y} tells whether \math{X} can garbage collect the message (an \sc{ack} signal) or should reroute the message (a \sc{reject} signal). A \sc{reject} signal indicates that the destination canister moved, so \math{X} should route the message into another stream. \end{itemize} @@ -158,8 +158,8 @@ \section{stream-certification}{Stream certification} Conceptually, a node requesting messages from a subnet is not different from a client requesting a response. This similarity allows us to use the same\sidenote{sn-same-auth}{ Currently, there is a minor difference in how we represent certificates in the XNet protocol and the \href{https://internetcomputer.org/docs/current/references/ic-interface-spec/#_certificate}{IC HTTP} interface. - The HTTP interface certificates combine the data and the hashes required to check the authenticity in a single data structure, the \em{hash tree}. - The XNet protocol separates the raw message data and the hashes (called the \em{witness}) into separate data structures. + The HTTP interface certificates combine the data and the hashes required to check the authenticity in a single data structure, the \emph{hash tree}. + The XNet protocol separates the raw message data and the hashes (called the \emph{witness}) into separate data structures. The reason for that distinction is purely historical. We implemented the XNet protocol significantly earlier than the response authentication, so we could not benefit from \href{https://www.joachim-breitner.de/blog}{Joachim Breitner's} brilliance. Joachim took the XNet authentication scheme as the starting point and simplified it for the IC Interface Specification. diff --git a/posts/09-fungible-tokens-101.tex b/posts/09-fungible-tokens-101.tex index 5e61c30..89aa1ac 100644 --- a/posts/09-fungible-tokens-101.tex +++ b/posts/09-fungible-tokens-101.tex @@ -72,8 +72,8 @@ \subsection{asset-ledgers}{Asset ledgers} Now Allen has \math{\$3.34} on his account, Geneviève has \math{\$8.32}, and Meriam has \math{-\$11.66}\sidenote{sn-compute-balance}{ To compute a person's balance, go over all the records in the log from top to bottom. - If the person appears in the \em{from} column, subtract the \em{amount} from their balance; - if the person appears in the \em{to} column, add the \em{amount} to their balance. + If the person appears in the \emph{from} column, subtract the \emph{amount} from their balance; + if the person appears in the \emph{to} column, add the \emph{amount} to their balance. }. This ledger has an interesting property: the sum of all balances is always zero because we started with no funds, and each record only moves funds. @@ -87,19 +87,19 @@ \subsection{asset-ledgers}{Asset ledgers} \subsection{minting-burning}{Minting and burning} -All ledgers have a way to produce, or \em{mint}\sidenote{sn-newton-ming}{Did you know that Sir Isaac Newton worked at the \href{https://newtonandthemint.history.ox.ac.uk/}{Royal Mint} for three decades?}, tokens out of thin air. +All ledgers have a way to produce, or \emph{mint}\sidenote{sn-newton-ming}{Did you know that Sir Isaac Newton worked at the \href{https://newtonandthemint.history.ox.ac.uk/}{Royal Mint} for three decades?}, tokens out of thin air. Bitcoin network mints tokens as a reward for participants that help the ledger grow. The IC mints ICP utility tokens to reward participants in the network governance and node providers. -Another popular scheme is \em{wrapped tokens}, where the ledger mints tokens as proxies for other assets. +Another popular scheme is \emph{wrapped tokens}, where the ledger mints tokens as proxies for other assets. Let us extend the tip ledger example to make it operate on wrapped dollars. Imagine now that Geneviève does not trust the folks she hangs out with, but she wants to continue enjoying the convenience of virtual money. Whenever someone transfers her virtual tokens on a piece of paper, she wants to be sure she can claim her buck back. One way to approach the issue is to set up a piggy bank at the office. -Anyone who puts \math{\$1} into the bank gets a \em{wrapped} \math{\$1} on the ledger. -The transaction converting a physical bill into a virtual token is a \em{mint} transaction. +Anyone who puts \math{\$1} into the bank gets a \emph{wrapped} \math{\$1} on the ledger. +The transaction converting a physical bill into a virtual token is a \emph{mint} transaction. One day Geneviève, Allen, and Meriam put \math{\$10} each into the piggy and mint their wrapped money on the ledger. @@ -125,7 +125,7 @@ \subsection{minting-burning}{Minting and burning} \end{tabular} The main difference with the original scheme is that now Geneviève can exit the group and get her money back at any point. -All she needs is to open the piggy under a supervision of a trusted party, get her \math{\$14}, and record a \em{burn} transaction on the ledger by sending her tokens to the void (sometimes called the \em{minting account}). +All she needs is to open the piggy under a supervision of a trusted party, get her \math{\$14}, and record a \emph{burn} transaction on the ledger by sending her tokens to the void (sometimes called the \emph{minting account}). The sum of all balances on the ledger is always equal to the amount of money in the piggy bank. \begin{tabular}{l l r r} @@ -183,7 +183,7 @@ \subsection{approvals}{Approvals} Geneviève could transfer some budget to Alex before he goes out, and he could transfer her the leftover when he is back. That solves the problem, but Geneviève cannot use the locked funds herself during that time because, technically, they belong to Alex. -Another approach popularized by the Ethereum community is to introduce the notion of \em{approvals}. +Another approach popularized by the Ethereum community is to introduce the notion of \emph{approvals}. The ledger could have another table with spending allowances between two people. \begin{tabular}{l l r} @@ -227,9 +227,9 @@ \subsection{fees}{Fees} ``No more than you have on your account,'' replied Meriam. -``I don't have anything yet. Can I send \em{nothing}?'' +``I don't have anything yet. Can I send \emph{nothing}?'' -``Ahm\ldots Well, there are no rules prohibiting \em{that}, I suppose.'' +``Ahm\ldots Well, there are no rules prohibiting \emph{that}, I suppose.'' Peppy grabbed a pen and started filling the lines with blocky letters. @@ -242,7 +242,7 @@ \subsection{fees}{Fees} \fun{PEPPY} & \fun{MOM} & \fun{\$0} \\ \end{tabular} -``OK, Peppy, stop. There is a new rule: you cannot transfer \em{nothing}. Only a positive amount.'' +``OK, Peppy, stop. There is a new rule: you cannot transfer \emph{nothing}. Only a positive amount.'' Peppy stopped. A few wrinkles appeared on her forehead, and then her lips curled into a smile. @@ -286,6 +286,6 @@ \section{summary}{Summary} We have seen that fungible tokens are an essential part of our daily life. We learned that \href{#asset-ledgers}{ledger} is a robust accounting mechanism that we can adapt to the task at hand by adopting various features: \href{#minting-burning}{mints}, \href{#subaccounts}{subaccounts}, \href{#approvals}{approvals}, and \href{#fees}{transfer fees}. -In \href{/posts/10-payment-flows.html}{the following article}, we will discuss protocols allowing clients to exchange tokens for service, known as \em{payment flows}. +In \href{/posts/10-payment-flows.html}{the following article}, we will discuss protocols allowing clients to exchange tokens for service, known as \emph{payment flows}. -\end{document} \ No newline at end of file +\end{document} diff --git a/posts/10-payment-flows.tex b/posts/10-payment-flows.tex index 0f4ae9f..328879b 100644 --- a/posts/10-payment-flows.tex +++ b/posts/10-payment-flows.tex @@ -13,7 +13,7 @@ \section{introduction}{Introduction} In the previous article, \href{/posts/09-fungible-tokens-101.html}{Fungible tokens 101}, I introduced the concept of a ledger and various extensions that can help us solve practical problems. -In this article, we shall analyze a few \em{payment flows}---protocols built on top of a ledger allowing clients to exchange tokens for a service---in the context of the \href{https://internetcomputer.org}{Internet Computer}. +In this article, we shall analyze a few \emph{payment flows}---protocols built on top of a ledger allowing clients to exchange tokens for a service---in the context of the \href{https://internetcomputer.org}{Internet Computer}. \section{prerequisites}{Prerequisites} \subsection{the-payment-scenario}{The payment scenario} @@ -30,11 +30,11 @@ \subsection{participants}{Participants} Each flow will involve the following participants: \begin{tabular*}{l l} -\includegraphics[grayscale]{/images/10-me.png} & \em{Me}: a merry human sitting in front of a computer and ordering a new laptop. \\ -\includegraphics[grayscale]{/images/10-shop.png} & \em{Shop}: an Internet Computer smart contract accepting orders. \\ -\includegraphics[grayscale]{/images/10-webpage.png} & \em{Web page}: a spaghetti of markup, styling, and scripts serving the \em{shop} UI. \\ -\includegraphics[grayscale]{/images/10-wallet.png} & \em{Wallet}: a trusty hardware wallet device, such as \href{https://www.ledger.com/}{Ledger} or \href{https://trezor.io/}{Trezor}, with a corresponding UI for interacting with the ledger, such as \href{https://www.ledger.com/ledger-live}{Ledger Live}. A more sophisticated wallet can smoothen the UX, but the ideas remain the same. \\ -\includegraphics[grayscale]{/images/10-ledger.png} & \em{Ledger}: an Internet Computer smart contract processing payments. \\ +\includegraphics[grayscale]{/images/10-me.png} & \emph{Me}: a merry human sitting in front of a computer and ordering a new laptop. \\ +\includegraphics[grayscale]{/images/10-shop.png} & \emph{Shop}: an Internet Computer smart contract accepting orders. \\ +\includegraphics[grayscale]{/images/10-webpage.png} & \emph{Web page}: a spaghetti of markup, styling, and scripts serving the \emph{shop} UI. \\ +\includegraphics[grayscale]{/images/10-wallet.png} & \emph{Wallet}: a trusty hardware wallet device, such as \href{https://www.ledger.com/}{Ledger} or \href{https://trezor.io/}{Trezor}, with a corresponding UI for interacting with the ledger, such as \href{https://www.ledger.com/ledger-live}{Ledger Live}. A more sophisticated wallet can smoothen the UX, but the ideas remain the same. \\ +\includegraphics[grayscale]{/images/10-ledger.png} & \emph{Ledger}: an Internet Computer smart contract processing payments. \\ \end{tabular*} \subsection{payment-phases}{Payment phases} @@ -42,36 +42,36 @@ \subsection{payment-phases}{Payment phases} All the payment flows we will analyze have three phases: \begin{enumerate} \item - \em{The negotiation phase}. - After I place my order and fill in the shipment details, the shop creates a unique order identifier, \em{Invoice ID}. - The \em{web page} displays the payment details (e.g., as a \sc{qr} code of the request I need to sign) and instructions on how to proceed with the order. + \emph{The negotiation phase}. + After I place my order and fill in the shipment details, the shop creates a unique order identifier, \emph{Invoice ID}. + The \emph{web page} displays the payment details (e.g., as a \sc{qr} code of the request I need to sign) and instructions on how to proceed with the order. \item - \em{The payment phase}. - I use my \em{wallet} to execute the transaction as instructed on the \em{web page}. + \emph{The payment phase}. + I use my \emph{wallet} to execute the transaction as instructed on the \emph{web page}. This phase is essentially the same in all flows; only the transaction type varies. \item - \em{The notification phase}. + \emph{The notification phase}. The shop receives a payment notification for the Invoice ID, validates the payment, and updates the order status. - The \em{web page} displays an upbeat message, completing the flow. + The \emph{web page} displays an upbeat message, completing the flow. \end{enumerate} \section{invoice-account}{Invoice account} The first payment flow we will analyze relies on the \href{/posts/09-fungible-tokens-101.html#subaccounts}{subaccounts} ledger feature. -The idea behind the flow is quite clever: the shop can use its subaccount identified by the \em{Invoice ID} as a temporary ``cell'' for the payment. +The idea behind the flow is quite clever: the shop can use its subaccount identified by the \emph{Invoice ID} as a temporary ``cell'' for the payment. I can transfer my tokens to this cell, and the shop can move tokens out because the cell belongs to the shop. The happy case of the flow needs only one primitive from the ledger, the \code{transfer} method specified below. \begin{code}[candid] service : { - \em{// Transfers token \b{amount} from the account of the (implicit) \b{caller}} - \em{// to the account specified by the principal and the subaccount.} - \em{// Arguments:} - \em{// \b{amount} - the token amount to transfer.} - \em{// \b{from_subaccount} - the subaccount of the caller to transfer tokens from.} - \em{// \b{to} - the receiver of the tokens.} - \em{// \b{to_subaccount} - which subaccount of the receiver the tokens will land on.} + \emph{// Transfers token \b{amount} from the account of the (implicit) \b{caller}} + \emph{// to the account specified by the principal and the subaccount.} + \emph{// Arguments:} + \emph{// \b{amount} - the token amount to transfer.} + \emph{// \b{from_subaccount} - the subaccount of the caller to transfer tokens from.} + \emph{// \b{to} - the receiver of the tokens.} + \emph{// \b{to_subaccount} - which subaccount of the receiver the tokens will land on.} \b{transfer}(record { amount : nat; from_subaccount : opt blob; @@ -84,12 +84,12 @@ \section{invoice-account}{Invoice account} The flow proceeds as follows: \begin{enumerate} \item - In the negotiation phase, the webpage instructs me to transfer tokens to the shop's \em{Invoice ID} subaccount and displays a big green ``Done'' button that I need to press after the payment succeeds. + In the negotiation phase, the webpage instructs me to transfer tokens to the shop's \emph{Invoice ID} subaccount and displays a big green ``Done'' button that I need to press after the payment succeeds. \item In the payment phase, I use my wallet to execute the \code{transfer(\{ amount = Price, to = Shop, to_subaccount = InvoiceId\})} call on the ledger. \item - In the notification phase, I click on the ``Done'' button dispatching a notification to the \em{shop} indicating that I paid the invoice (the webpage can remember the \em{Invoice ID} on the client side, so I do not have to type it in). - Upon receiving the notification, the shop attempts to transfer the amount from its \em{Invoice ID} subaccount to its default account, calling \code{transfer(\{ amount = Price - Fee, from_subaccount = InvoiceID, to = Shop \})} on the ledger. + In the notification phase, I click on the ``Done'' button dispatching a notification to the \emph{shop} indicating that I paid the invoice (the webpage can remember the \emph{Invoice ID} on the client side, so I do not have to type it in). + Upon receiving the notification, the shop attempts to transfer the amount from its \emph{Invoice ID} subaccount to its default account, calling \code{transfer(\{ amount = Price - Fee, from_subaccount = InvoiceID, to = Shop \})} on the ledger. If that final transfer succeeds, the order is complete. \end{enumerate} @@ -103,7 +103,7 @@ \section{invoice-account}{Invoice account} The invoice account flow has a few interesting properties: \begin{itemize} \item The ledger must process at least two messages: one transfer from me and another from the shop. - \item Two transfers mean that the ledger charges \em{two} fees for each flow: one from me and another from the shop. + \item Two transfers mean that the ledger charges \emph{two} fees for each flow: one from me and another from the shop. \item The ledger needs to remember one additional \code{(principal, subaccount, amount)} tuple for the duration of the flow. The tuple occupies at least 70 bytes. @@ -122,35 +122,35 @@ \section{approve-transfer-from}{Approve-transfer-from} The approve-transfer-from pattern relies on the \href{/posts/09-fungible-tokens-101.html#approvals}{approvals} ledger feature, first appearing in the \href{https://ethereum.org/en/developers/docs/standards/tokens/erc-20/}{ERC-20} token standard. The flow uses two new ledger primitives, \code{approve} and \code{transfer_from}, and involves three parties: \begin{enumerate} - \item The \em{owner} holds tokens on the ledger. The owner can \em{approve} transfers from its account to a \em{delegate}. - \item The \em{delegate} can \em{transfer} tokens \em{from} the owner's account within the approved cap. - \item The \em{beneficiary} receives tokens from the delegate as if the owner sent them. + \item The \emph{owner} holds tokens on the ledger. The owner can \emph{approve} transfers from its account to a \emph{delegate}. + \item The \emph{delegate} can \emph{transfer} tokens \emph{from} the owner's account within the approved cap. + \item The \emph{beneficiary} receives tokens from the delegate as if the owner sent them. \end{enumerate} -In our \href{#payment-scenario}{scenario}, the delegate and the beneficiary are the same entity---the \em{shop}. +In our \href{#payment-scenario}{scenario}, the delegate and the beneficiary are the same entity---the \emph{shop}. We can capture the required ledger primitives in the following Candid interface: \begin{code}[candid] service : { - \em{// Entitles the \b{delegate} to spend at most the specified token \b{amount} on behalf} - \em{// of the (implicit) \b{caller}.} - \em{// Arguments:} - \em{// \b{amount} - the cap on the amount the delegate can transfer from the caller's account.} - \em{// \b{delegate} - the actor entitled to make payments on behalf of the caller.} + \emph{// Entitles the \b{delegate} to spend at most the specified token \b{amount} on behalf} + \emph{// of the (implicit) \b{caller}.} + \emph{// Arguments:} + \emph{// \b{amount} - the cap on the amount the delegate can transfer from the caller's account.} + \emph{// \b{delegate} - the actor entitled to make payments on behalf of the caller.} \b{approve}(record { amount : nat; delegate : principal; }) -> (); - \em{// Transfers the specified token \b{amount} from the \b{owner} account to the} - \em{// specified account.} - \em{// Arguments:} - \em{// \b{amount} - the token amount to transfer.} - \em{// \b{owner} - the account to transfer tokens from.} - \em{// \b{to} - the receiver of the tokens (the beneficiary).} - \em{//} - \em{// PRECONDITION: the \b{owner} has approved at least the \b{amount} to the (implicit) \b{caller}.} - \em{// POSTCONDITION: the caller's allowance decreases by the \b{amount}.} + \emph{// Transfers the specified token \b{amount} from the \b{owner} account to the} + \emph{// specified account.} + \emph{// Arguments:} + \emph{// \b{amount} - the token amount to transfer.} + \emph{// \b{owner} - the account to transfer tokens from.} + \emph{// \b{to} - the receiver of the tokens (the beneficiary).} + \emph{//} + \emph{// PRECONDITION: the \b{owner} has approved at least the \b{amount} to the (implicit) \b{caller}.} + \emph{// POSTCONDITION: the caller's allowance decreases by the \b{amount}.} \b{transfer_from}(record { amount : nat; owner : principal; @@ -168,7 +168,7 @@ \section{approve-transfer-from}{Approve-transfer-from} \item In the payment phase, I use my wallet to execute the \code{approve(\{to = Shop, amount = Price\})} call on the ledger. \item In the notification phase, I paste my ledger address into the text field and press the button. - Once the shop receives the notification with my address and the \em{Invoice ID}, it executes \code{transfer_from(\{ amount = Price; owner = Wallet; to = Shop \})} call on the ledger. + Once the shop receives the notification with my address and the \emph{Invoice ID}, it executes \code{transfer_from(\{ amount = Price; owner = Wallet; to = Shop \})} call on the ledger. If that transfer is successful, the order is complete. \end{enumerate} @@ -182,7 +182,7 @@ \section{approve-transfer-from}{Approve-transfer-from} Let us see how this flow compares to the \href{#invoice-account}{invoice account} flow: \begin{itemize} \item The ledger must process at least two messages: approval from the owner and a transfer from the shop. - \item The ledger charges \em{two} fees for each payment: one for my approval and another for the shop's transfer. + \item The ledger charges \emph{two} fees for each payment: one for my approval and another for the shop's transfer. \item The ledger needs to remember one additional \code{(principal, principal, amount)} tuple for the duration of the flow. The tuple occupies at least 68 bytes. @@ -205,19 +205,19 @@ \section{transfer-notify}{Transfer-notify} That is the idea behind the transfer-notify flow. There is one issue we need to sort out, however. -When we relied on the webpage to send the notification, we could include the \em{Invoice ID} into the payload, making it possible for the shop to identify the relevant order. -If we ask the ledger to send the payment notification, we must pass the \em{Invoice ID} in that message. +When we relied on the webpage to send the notification, we could include the \emph{Invoice ID} into the payload, making it possible for the shop to identify the relevant order. +If we ask the ledger to send the payment notification, we must pass the \emph{Invoice ID} in that message. The common way to address this issue is to add the \code{memo} argument to the transfer arguments, allowing the caller to attach an arbitrary payload to the transaction details. \begin{code}[candid] service : { - \em{// Transfers token \b{amount} from the account of the (implicit) \b{caller}} - \em{// to the account specified by the principal.} - \em{// If the transfer is successful, sends a notification to the receiver.} - \em{// Arguments:} - \em{// \b{amount} - the token amount to transfer.} - \em{// \b{to} - the receiver of the tokens.} - \em{// \b{memo} - an opaque identifier attached to the notification.} + \emph{// Transfers token \b{amount} from the account of the (implicit) \b{caller}} + \emph{// to the account specified by the principal.} + \emph{// If the transfer is successful, sends a notification to the receiver.} + \emph{// Arguments:} + \emph{// \b{amount} - the token amount to transfer.} + \emph{// \b{to} - the receiver of the tokens.} + \emph{// \b{memo} - an opaque identifier attached to the notification.} \b{transfer_notify}(record { amount : nat; to : principal; @@ -231,7 +231,7 @@ \section{transfer-notify}{Transfer-notify} \item In the negotiation phase, the webpage displays the payment details and starts polling the shop for payment confirmation. \item In the payment phase, I use my wallet to execute the \code{transfer_notify(\{to = Shop, amount = Price, memo = InvoiceID\})} call on the ledger. \item - Once the transfer succeeds, the ledger notifies the shop about the payment, providing the amount and the \code{memo} containing the \em{Invoice ID}. + Once the transfer succeeds, the ledger notifies the shop about the payment, providing the amount and the \code{memo} containing the \emph{Invoice ID}. The shop consumes the notification and changes the order status. The next time the webpage polls the shop, the shop replies with a confirmation, and I see a positive message. \end{enumerate} @@ -265,29 +265,29 @@ \section{transfer-notify}{Transfer-notify} \section{transfer-notify}{Transfer-fetch} The transfer-fetch flow relies on the ability to request details of past transactions from the ledger. -After I transfer tokens to the shop, specifying the \em{Invoice ID} as the transaction memo, the ledger issues a unique transaction identifier. +After I transfer tokens to the shop, specifying the \emph{Invoice ID} as the transaction memo, the ledger issues a unique transaction identifier. I can then pass this identifier to the shop as proof of my payment. The shop can fetch transaction details directly from the ledger to validate the payment. Below is the interface we expect from the ledger. \begin{code}[candid] service : { - \em{// Transfers token \b{amount} from the account of the (implicit) \b{caller}} - \em{// to the account specified by the principal.} - \em{// Returns a unique transaction identifier.} - \em{// Arguments:} - \em{// \b{amount} - the token amount to transfer.} - \em{// \b{to} - the receiver of the tokens.} - \em{// \b{memo} - an opaque identifier attached to the transaction.} + \emph{// Transfers token \b{amount} from the account of the (implicit) \b{caller}} + \emph{// to the account specified by the principal.} + \emph{// Returns a unique transaction identifier.} + \emph{// Arguments:} + \emph{// \b{amount} - the token amount to transfer.} + \emph{// \b{to} - the receiver of the tokens.} + \emph{// \b{memo} - an opaque identifier attached to the transaction.} \b{transfer}(record { amount : nat; to : principal; memo : opt blob; }) -> (nat); - \em{// Retrieves details of the transaction with the specified identifier.} - \em{// Arguments:} - \em{// \b{txid} - a unique transaction identifier.} + \emph{// Retrieves details of the transaction with the specified identifier.} + \emph{// Arguments:} + \emph{// \b{txid} - a unique transaction identifier.} \b{fetch}(txid : nat) -> (opt record { from : principal; to : principal; diff --git a/posts/11-ii-stable-memory.tex b/posts/11-ii-stable-memory.tex index 0d55e5d..1f9b627 100644 --- a/posts/11-ii-stable-memory.tex +++ b/posts/11-ii-stable-memory.tex @@ -28,13 +28,13 @@ \section{introduction}{Introduction} \section{ii-data-model}{Internet Identity data model} The Internet Identity service acts as a proxy between the browser's \href{https://webauthn.io/}{authentication mechanism} and the Internet Computer authentication system. -A user registers in the service by creating an \em{anchor} (a short number) and associating authentication devices, such as \href{https://en.wikipedia.org/wiki/YubiKey}{Yubikey} or \href{https://en.wikipedia.org/wiki/Touch_ID}{Apple Touch ID}, with that anchor. +A user registers in the service by creating an \emph{anchor} (a short number) and associating authentication devices, such as \href{https://en.wikipedia.org/wiki/YubiKey}{Yubikey} or \href{https://en.wikipedia.org/wiki/Touch_ID}{Apple Touch ID}, with that anchor. The Internet Identity canister stores these associations and presents a consistent identity to each DApp integrated with the authentication protocol. \begin{figure}[grayscale-diagram] \marginnote{mn-ii-model}{ The data model of the Internet Identity system. - The system allocates a unique \em{anchor} (a short number) for each user. + The system allocates a unique \emph{anchor} (a short number) for each user. The user can attach one or more authentication devices to the anchor. The Internet Identity system allows users to log into DApps that support the authentication protocol. The DApps will see the same user identity (also known as the \href{https://internetcomputer.org/docs/current/references/ic-interface-spec/#principal}{principal}) consistently, regardless of which device the user used for authentication. @@ -105,8 +105,8 @@ \section{ii-stable-memory}{Stable memory as primary storage} \subsection{ii-memory-layout}{The memory layout} The Internet Identity canister divides the stable memory space into non-overlapping sections. -The first section is the \em{header} holding the canister configuration, such as the random salt for hashing and the assigned anchor range. -The rest of the memory is an array of \em{entries}; each entry corresponds to the data of a single anchor. +The first section is the \emph{header} holding the canister configuration, such as the random salt for hashing and the assigned anchor range. +The rest of the memory is an array of \emph{entries}; each entry corresponds to the data of a single anchor. \begin{figure}[grayscale-diagram] \marginnote{mn-ii-memory-layout}{ @@ -121,20 +121,20 @@ \subsection{ii-memory-layout}{The memory layout} The following is the list of all header fields as of October 2022: \begin{enumerate} \item - \em{Magic} (3 bytes): a fixed string \code{"IIC"} \href{https://en.wikipedia.org/wiki/Magic_number_(programming)#Format_indicators}{indicating} the Internet Identity stable memory layout. + \emph{Magic} (3 bytes): a fixed string \code{"IIC"} \href{https://en.wikipedia.org/wiki/Magic_number_(programming)#Format_indicators}{indicating} the Internet Identity stable memory layout. \item - \em{Version} (1 byte): the version of the memory layout. + \emph{Version} (1 byte): the version of the memory layout. If we need to change the layout significantly, the version will tell the canister how to interpret the data after the code upgrade. \item - \em{Entry count} (4 bytes): the total number of anchors allocated so far. + \emph{Entry count} (4 bytes): the total number of anchors allocated so far. \item - \em{Min anchor} (8 bytes): the value of the first anchor assigned to the canister. + \emph{Min anchor} (8 bytes): the value of the first anchor assigned to the canister. The canister allocates anchors sequentially, starting from this number. \item - \em{Max anchor} (8 bytes): the value of the largest anchor assigned to the canister. + \emph{Max anchor} (8 bytes): the value of the largest anchor assigned to the canister. The canister becomes full and stops allocating anchors when \code{MinAnchor + EntryCount = MaxAnchor}. \item - \em{Salt} (32 bytes): salt for hashing. + \emph{Salt} (32 bytes): salt for hashing. The canister initializes the salt upon the first request by issuing a \href{https://internetcomputer.org/docs/current/references/ic-interface-spec/#ic-raw_rand}{\code{raw_rand}} call. \end{enumerate} @@ -167,4 +167,4 @@ \section{code-pointers}{Code pointers} \href{https://github.com/dfinity/internet-identity/blob/62afdcbd74b1de1d9ab41c9f856e2319661a32cf/src/internet_identity/src/storage.rs}{The stable data storage} implementation. I am sure you will find this code easy to read now that you know its story. \end{itemize} -\end{document} \ No newline at end of file +\end{document} diff --git a/posts/12-rust-error-handling.tex b/posts/12-rust-error-handling.tex index ca0873e..41048e6 100644 --- a/posts/12-rust-error-handling.tex +++ b/posts/12-rust-error-handling.tex @@ -12,7 +12,7 @@ \section{introduction}{Introduction} If I had to pick my favorite \href{https://www.rust-lang.org/}{Rust language} feature, that would be its systematic approach to error handling. -Sum types, generics (such as \href{https://doc.rust-lang.org/std/result/enum.Result.html}{\code{Result}}), and a holistic standard library design perfectly\sidenote{sn-polimorphic-variants}{\em{Almost} perfectly: I miss \href{https://dev.realworldocaml.org/variants.html#scrollNav-4}{polymorphic variants} badly.} match my obsession with edge cases. +Sum types, generics (such as \href{https://doc.rust-lang.org/std/result/enum.Result.html}{\code{Result}}), and a holistic standard library design perfectly\sidenote{sn-polimorphic-variants}{\emph{Almost} perfectly: I miss \href{https://dev.realworldocaml.org/variants.html#scrollNav-4}{polymorphic variants} badly.} match my obsession with edge cases. Rust error handling is so good that even \href{https://haskell.org/}{Haskell} looks bleak and woefully unsafe\sidenote{sn-haskell-exceptions}{ Haskell can replicate Rust's approach to error handling, but the standard library chose the route of runtime exceptions, and \href{https://www.fpcomplete.com/haskell/tutorial/exceptions/}{practitioners} followed the lead. }. @@ -75,7 +75,7 @@ \subsection{prefer-specific-enums}{Prefer specific enums} DatabaseConnectionError, Unauthorized, FileNotFound, - \em{// \ldots } + \emph{// \ldots } } pub fn frobnicate(n: u64) -> Result { /* \ldots */ } @@ -84,7 +84,7 @@ \subsection{prefer-specific-enums}{Prefer specific enums} These approaches might work fine for you, but I found them unsatisfactory for library design\sidenote{sn-anyhow}{ However, I often use the \code{anyhow} approach to simplify structuring errors in command-line tools and daemons. -} in the long run: they facilitate \em{propagating} errors (often with little context about the operation that caused the error), not \em{handling} errors. +} in the long run: they facilitate \emph{propagating} errors (often with little context about the operation that caused the error), not \emph{handling} errors. When it comes to interface clarity and simplicity, nothing beats \href{https://en.wikipedia.org/wiki/Algebraic_data_type}{algebraic data types} (\sc{adt}s). Let us use the power of \sc{adt}s to fix the \code{frobnicate} function interface. @@ -95,9 +95,9 @@ \subsection{prefer-specific-enums}{Prefer specific enums} } \begin{code}[good] pub enum FrobnicateError { - \em{/// Frobnicate does not accept inputs above this number.} + \emph{/// Frobnicate does not accept inputs above this number.} InputExceeds(u64), - \em{/// Frobnicate cannot work on mondays. Court order.} + \emph{/// Frobnicate cannot work on mondays. Court order.} CannotFrobnicateOnMondays, } @@ -106,7 +106,7 @@ \subsection{prefer-specific-enums}{Prefer specific enums} \end{figure} -Now the type system tells the readers what exactly can go wrong, making \em{handling} the errors a breeze. +Now the type system tells the readers what exactly can go wrong, making \emph{handling} the errors a breeze. You might think, ``I will never finish my project if I define a new enum for each function that can fail.'' In my experience, expressing failures using the type system takes less work than documenting all the quirks of the interface. @@ -149,7 +149,7 @@ \subsection{avoid-panics}{Reserve panics for bugs in your code} A library function relying on documentation to specify correct inputs. } \begin{code}[bad] -\em{/// Frobnicates an integer. +\emph{/// Frobnicates an integer. /// /// \b{# Panics} /// @@ -160,7 +160,7 @@ \subsection{avoid-panics}{Reserve panics for bugs in your code} \end{code} \end{figure} -Feel free to use panics and assertions to check invariants that must hold in \em{your} code. +Feel free to use panics and assertions to check invariants that must hold in \emph{your} code. \begin{figure} \marginnote{mn-panic-doc}{ @@ -191,9 +191,9 @@ \subsection{lift-input-validation}{Lift input validation} } \begin{code}[bad] pub enum SendMailError { - \em{/// One of the addresses passed to \code{send_mail} is invalid.} + \emph{/// One of the addresses passed to \code{send_mail} is invalid.} \b{MalformedAddress} { address: String, reason: String }, - \em{/// Failed to connect to the mail server.} + \emph{/// Failed to connect to the mail server.} FailedToConnect { source: std::io::Error, reason: String }, /* \ldots */ } @@ -211,7 +211,7 @@ \subsection{lift-input-validation}{Lift input validation} Introducing a new type to make \code{send_mail} inputs valid by construction. } \begin{code}[good] -\em{/// Represents valid email addresses}. +\emph{/// Represents valid email addresses}. pub struct EmailAddress(String); impl std::str::FromStr for EmailAddress { @@ -220,7 +220,7 @@ \subsection{lift-input-validation}{Lift input validation} } pub enum SendMailError { - \em{// no more InvalidAddress!} + \emph{// no more InvalidAddress!} FailedToConnect { source: std::io::Error, reason: String }, /* \ldots */ } @@ -289,7 +289,7 @@ \subsection{errors-problem-vs-solution}{Define errors in terms of the problem, n \end{code} \end{figure} -This error type does not tell the caller \em{what} problem you are solving but \em{how} you solve it. +This error type does not tell the caller \emph{what} problem you are solving but \emph{how} you solve it. Implementation details leak into the caller's code, causing much pain: \begin{itemize} \item @@ -315,30 +315,30 @@ \subsection{errors-problem-vs-solution}{Define errors in terms of the problem, n } \begin{code}[good] pub enum FetchTxError { - \em{/// Could not connect to the server.} + \emph{/// Could not connect to the server.} ConnectionFailed { url: String, reason: String, cause: Option, // \circled{1} }, - \em{/// Cannot find transaction with the specified txid.} + \emph{/// Cannot find transaction with the specified txid.} TxNotFound(Txid), // \circled{2} - \em{/// The object data is not valid CBOR.} + \emph{/// The object data is not valid CBOR.} InvalidEncoding { // \circled{3} data: Bytes, error_offset: Option, error_message: String, }, - \em{/// The public key is malformed.} + \emph{/// The public key is malformed.} MalformedPublicKey { // \circled{4} key_bytes: Vec, reason: String, }, - \em{/// The transaction signature does not match the public key.} + \emph{/// The transaction signature does not match the public key.} SignatureVerificationFailed { // \circled{4} txid: Txid, pk: Pubkey, @@ -489,4 +489,4 @@ \section{resources}{Resources} \end{enumerate} You can discuss this article on \href{https://www.reddit.com/r/rust/comments/yvdz6l/blog_post_designing_error_types_in_rust}{Reddit}. -\end{document} \ No newline at end of file +\end{document} diff --git a/posts/13-icp-ledger.tex b/posts/13-icp-ledger.tex index 145f794..8ba952a 100644 --- a/posts/13-icp-ledger.tex +++ b/posts/13-icp-ledger.tex @@ -17,8 +17,8 @@ \section{introduction}{Introduction} \section{background}{Background} -Unlike BTC for Bitcoin and ETH for Ethereum, the ICP token is not the Internet Computer's native currency\sidenote{sn-cycles}{\em{Cycles} is the native currency of the ICP protocol. You pay cycles for installing and running smart contracts. The network allows you to exchange ICP for cycles with the help of the Cycles Minting Canister (CMC).}. -ICP is a \em{utility token}; its primary purpose is participation in network governance. +Unlike BTC for Bitcoin and ETH for Ethereum, the ICP token is not the Internet Computer's native currency\sidenote{sn-cycles}{\emph{Cycles} is the native currency of the ICP protocol. You pay cycles for installing and running smart contracts. The network allows you to exchange ICP for cycles with the help of the Cycles Minting Canister (CMC).}. +ICP is a \emph{utility token}; its primary purpose is participation in network governance. In the early prototypes of the IC, canisters could hold ICP directly and send them around freely without consulting any third party. This design was unsatisfactory for two reasons: @@ -44,7 +44,7 @@ \section{account-id}{Account identifiers} } \begin{code}[pseudocode] account_identifier(principal, subaccount) := CRC32(h) || h - \em{where} h = SHA224("\\x0Aaccount-id" || principal || subaccount) + \emph{where} h = SHA224("\\x0Aaccount-id" || principal || subaccount) \end{code} \end{figure} @@ -84,7 +84,7 @@ \section{account-id}{Account identifiers} \section{transactions-and-blocks}{Transactions and blocks} -The \href{https://rosetta-api.org}{Rosetta API} expects a blockchain to have \em{blocks} containing \em{transactions}. +The \href{https://rosetta-api.org}{Rosetta API} expects a blockchain to have \emph{blocks} containing \emph{transactions}. Smart contracts on the IC do not have access to raw blocks and messages within them, so the ICP ledger models its own ``blockchain'' to satisfy the Rosetta data model. Each ledger operation, such as minting or transferring tokens, becomes a transaction that the ledger wraps into a unique block and adds to the chain. @@ -165,7 +165,7 @@ \section{storage-and-archives}{Storage and archives} There needed to be more than the capacity of a single canister to store an entire transaction history. The team solved the storage issue beautifully. -When the transaction history grows above a pre-configured threshold, the ledger creates a new canister, an \em{archive node}, and moves old transactions to the archive memory. +When the transaction history grows above a pre-configured threshold, the ledger creates a new canister, an \emph{archive node}, and moves old transactions to the archive memory. The ledger spawns a new archive node when the previous archive node becomes full. \begin{figure}[grayscale-diagram] @@ -212,7 +212,7 @@ \section{tx-sigs}{Transaction signatures} Even though a few significant changes to the base protocol could resolve the technical issues, retaining transaction witnesses at the ICP ledger layer would have a downside: the space required to store these witnesses would exceed the transaction data by an order of magnitude (most transactions are tiny, about one hundred bytes). -This design reminds me of Bitcoin's \href{https://en.wikipedia.org/wiki/SegWit}{Segregated Witnesses} (\em{SegWit}) proposal separating the transaction data from unlock scripts. +This design reminds me of Bitcoin's \href{https://en.wikipedia.org/wiki/SegWit}{Segregated Witnesses} (\emph{SegWit}) proposal separating the transaction data from unlock scripts. SegWit transactions require signatures for validation, but \href{https://en.bitcoinwiki.org/wiki/Simplified_Payment_Verification}{Simple Payment Verification} nodes get blocks without signatures to save storage space and bandwidth. \section{references}{References} diff --git a/posts/14-stable-structures.tex b/posts/14-stable-structures.tex index a1ab086..6c4b9dc 100644 --- a/posts/14-stable-structures.tex +++ b/posts/14-stable-structures.tex @@ -18,7 +18,7 @@ \section{introduction}{Introduction} Canisters hosted on the Internet Computer (IC) are mutable: canister controllers can upgrade the code to add new features or fix bugs without changing the canister's identity. -Since the \href{/posts/06-ic-orthogonal-persistence.html#upgrades}{orthogonal persistence} feature cannot handle upgrades, the IC allows canisters to use additional storage, called \em{stable memory}, to facilitate the data transfer between code versions. +Since the \href{/posts/06-ic-orthogonal-persistence.html#upgrades}{orthogonal persistence} feature cannot handle upgrades, the IC allows canisters to use additional storage, called \emph{stable memory}, to facilitate the data transfer between code versions. The \href{/posts/11-ii-stable-memory.html#conventional-memory-management}{conventional approach} to canister state persistence is to serialize the entire state to stable memory in the \code{pre_upgrade} hook and decode it back in the \code{post_upgrade} hook. This approach is easy to implement and works well for relatively small datasets. Unfortunately, it does not scale well and can render a canister non-upgradable, so I \href{/posts/01-effective-rust-canisters.html#stable-memory-main}{recommend} using stable memory as the primary storage when possible. @@ -39,30 +39,30 @@ \section{design-principles}{Design principles} \begin{itemize} \item \label{radical-simplicity} - \em{Radical simplicity.} + \emph{Radical simplicity.} Programming stable memory is significantly easier than working with conventional file systems. The IC solves many issues with which any good storage must deal: data integrity, partial writes, power outages, and atomicity of multiple writes. Even with all these issues sorted out, complicated designs would be hard to implement, debug, and maintain. Each data structure follows the most straightforward design that solves the problem at hand. \item \label{backward-compatibility} - \em{Backward compatibility.} + \emph{Backward compatibility.} Upgrading the library version must preserve the data. All data structures have a metadata section with the layout version. Newer implementations will respect old layouts and should not require data migration. \item - \em{No \code{pre_upgrade} hooks.} + \emph{No \code{pre_upgrade} hooks.} A bug in the \code{pre_upgrade} hook can make your canister \href{/posts/01-effective-rust-canisters.html#upgrade-hook-panics}{non-upgradable}. The best way to avoid this issue is not to have a \code{pre_upgrade} hook. \item - \em{Limited blast radius.} + \emph{Limited blast radius.} If a single data structure has a bug, it should not corrupt the contents of other data structures. \item - \em{No reallocation.} + \emph{No reallocation.} Moving large amounts of data is expensive and can lead to prohibitively high cycle consumption. All data structures must manage their memory without costly moves. \item - \em{Compatibility with \href{https://github.com/WebAssembly/multi-memory/blob/master/proposals/multi-memory/Overview.md}{multi-memory} WebAssembly.} + \emph{Compatibility with \href{https://github.com/WebAssembly/multi-memory/blob/master/proposals/multi-memory/Overview.md}{multi-memory} WebAssembly.} The design should work when canisters have multiple stable memories since this feature is on the \href{https://forum.dfinity.org/t/proposal-wasm-native-stable-memory/15966#proposal-7}{IC roadmap}. \end{itemize} @@ -86,16 +86,16 @@ \subsection{memory}{Memory} \begin{code}[rust] pub trait \b{Memory} { - \em{/// Equivalent to WebAssembly memory.size.} + \emph{/// Equivalent to WebAssembly memory.size.} fn \b{size}(&self) -> u64; - \em{/// Equivalent to WebAssembly memory.grow.} + \emph{/// Equivalent to WebAssembly memory.grow.} fn \b{grow}(&self, pages: u64) -> i64; - \em{/// Copies bytes from this memory to the heap (in Wasm, memory 0).} + \emph{/// Copies bytes from this memory to the heap (in Wasm, memory 0).} fn \b{read}(&self, offset: u64, dst: &mut [u8]); - \em{/// Writes bytes from the heap (in Wasm, memory 0) to this memory.} + \emph{/// Writes bytes from the heap (in Wasm, memory 0) to this memory.} fn \b{write}(&self, offset: u64, src: &[u8]); } \end{code} @@ -161,12 +161,12 @@ \subsection{storable-types}{Storable types} } \begin{code}[rust] pub trait \b{Storable} {\label{storable-trait} - \em{/// Serializes a value of a storable type into bytes.} + \emph{/// Serializes a value of a storable type into bytes.} fn \b{to_bytes}(&self) -> Cow<'_, [u8]>; - \em{/// Deserializes a value of a storable type from a byte array.} - \em{///} - \em{/// \b{REQUIREMENT}: Self::from_bytes(self.to_bytes().to_vec()) == self} + \emph{/// Deserializes a value of a storable type from a byte array.} + \emph{///} + \emph{/// \b{REQUIREMENT}: Self::from_bytes(self.to_bytes().to_vec()) == self} fn \b{from_bytes}(bytes: Cow<[u8]>) -> Self; } \end{code} @@ -182,15 +182,15 @@ \subsection{storable-types}{Storable types} } \begin{code}[rust] pub trait \label{bounded-storable-trait}\b{BoundedStorable}: \href{#storable-trait}{\code{\b{Storable}}} { - \em{/// The maximum slice length that \b{to_bytes} can return.} - \em{///} - \em{/// \b{REQUIREMENT}: self.to_bytes().len() ≤ Self::MAX_SIZE as usize} + \emph{/// The maximum slice length that \b{to_bytes} can return.} + \emph{///} + \emph{/// \b{REQUIREMENT}: self.to_bytes().len() ≤ Self::MAX_SIZE as usize} const \b{MAX_SIZE}: u32;\label{max-size-attribute} - \em{/// Whether all values of this type have the same length (equal to Self::MAX_SIZE)} - \em{/// when serialized. If you are unsure about this flag, set it to \b{false}.} - \em{///} - \em{/// \b{REQUIREMENT}: Self::IS_FIXED_SIZE ⇒ self.to_bytes().len() == Self::MAX_SIZE as usize} + \emph{/// Whether all values of this type have the same length (equal to Self::MAX_SIZE)} + \emph{/// when serialized. If you are unsure about this flag, set it to \b{false}.} + \emph{///} + \emph{/// \b{REQUIREMENT}: Self::IS_FIXED_SIZE ⇒ self.to_bytes().len() == Self::MAX_SIZE as usize} const \b{IS_FIXED_SIZE}: bool;\label{is-fixed-size-attribute} } \end{code} @@ -218,21 +218,21 @@ \section{data-structures}{Data structures} \begin{figure} \begin{code}[bad] -\em{// \b{BAD}: stable structures do not support nesting.} +\emph{// \b{BAD}: stable structures do not support nesting.} type BalanceMap = StableBTreeMap>; \end{code} \begin{code}[good] -\em{// \b{GOOD}: use a composite key (a tuple) to avoid nesting.} -\em{// Use a \href{#range-scan-example}{\b{range scan}} to find all subaccounts of a principal.} +\emph{// \b{GOOD}: use a composite key (a tuple) to avoid nesting.} +\emph{// Use a \href{#range-scan-example}{\b{range scan}} to find all subaccounts of a principal.} type BalanceMap = StableBTreeMap<(Principal, Subaccount), Tokens>; \end{code} \begin{code}[bad] -\em{// \b{BAD}: stable structures do not support nesting.} +\emph{// \b{BAD}: stable structures do not support nesting.} type TxIndex = StableBTreeMap>; \end{code} \begin{code}[good] -\em{// \b{GOOD}: use a composite key to avoid nesting.} -\em{// Use a \href{#range-scan-example}{\b{range scan}} to find all transactions of a principal.} +\emph{// \b{GOOD}: use a composite key to avoid nesting.} +\emph{// Use a \href{#range-scan-example}{\b{range scan}} to find all transactions of a principal.} type TxIndex = StableBTreeMap<(Principal, TxId), Transaction>; \end{code} \end{figure} @@ -250,12 +250,12 @@ \subsection{stable-cell}{Stable cell} } \begin{code}[rust] impl struct \b{Cell} { - \em{/// Returns the current cell value.} - \em{/// Complexity: O(1).} + \emph{/// Returns the current cell value.} + \emph{/// Complexity: O(1).} pub fn \b{get}(&self) -> Option<&T>; - \em{/// Updates the cell value.} - \em{/// Complexity: O(value size).} + \emph{/// Updates the cell value.} + \emph{/// Complexity: O(value size).} pub fn \b{set}(&mut self, value: T) -> Result; } \end{code} @@ -290,24 +290,24 @@ \subsection{stable-vec}{Stable vector} } \begin{code}[rust] impl struct \b{Vec} { - \em{/// Adds a new item at the vector's back.} - \em{/// Complexity: O(T::MAX_SIZE).} + \emph{/// Adds a new item at the vector's back.} + \emph{/// Complexity: O(T::MAX_SIZE).} pub fn \b{push}(&self, item: &T) -> Result; - \em{/// Removes an item from the vector's back.} - \em{/// Complexity: O(T::MAX_SIZE).} + \emph{/// Removes an item from the vector's back.} + \emph{/// Complexity: O(T::MAX_SIZE).} pub fn \b{pop}(&self) -> Option; - \em{/// Returns the item at the specified index.} - \em{/// Complexity: O(T::MAX_SIZE).} + \emph{/// Returns the item at the specified index.} + \emph{/// Complexity: O(T::MAX_SIZE).} pub fn \b{get}(&self, index: usize) -> Option; - \em{/// Updates the item at the specified index.} - \em{/// Complexity: O(T::MAX_SIZE).} + \emph{/// Updates the item at the specified index.} + \emph{/// Complexity: O(T::MAX_SIZE).} pub fn \b{set}(&self, index: usize, item: &T); - \em{/// Returns the number of items in the vector.} - \em{/// Complexity: O(1).} + \emph{/// Returns the number of items in the vector.} + \emph{/// Complexity: O(1).} pub fn \b{len}() -> usize; } \end{code} @@ -329,7 +329,7 @@ \subsection{stable-log}{Stable log} }{Jay Kreps, \href{https://engineering.linkedin.com/distributed-systems/log-what-every-software-engineer-should-know-about-real-time-datas-unifying}{The Log}} A \href{https://docs.rs/ic-stable-structures/latest/ic_stable_structures/log/struct.Log.html}{Log} is an append-only list of arbitrary-sized values, similar to \href{https://redis.io/docs/data-types/streams/}{streams} in Redis. -The log requires two memories: the \em{index} storing entry offsets and the \em{data} storing raw entry bytes. +The log requires two memories: the \emph{index} storing entry offsets and the \emph{data} storing raw entry bytes. The number of instructions needed to access old and append new entries does not depend on the number of items in the log, only on the entry size. \begin{figure} @@ -343,16 +343,16 @@ \subsection{stable-log}{Stable log} Index: \href{#memory}{\code{Memory}}, Data: \href{#memory}{\code{Memory}}, { - \em{/// Adds a new entry to the log.} - \em{/// Complexity: O(entry size).} + \emph{/// Adds a new entry to the log.} + \emph{/// Complexity: O(entry size).} pub fn \b{append}(&self, bytes: &T) -> Result; - \em{/// Returns the entry at the specified index.} - \em{/// Complexity: O(entry size).} + \emph{/// Returns the entry at the specified index.} + \emph{/// Complexity: O(entry size).} pub fn \b{get}(&self, idx: u64) -> Option; - \em{/// Returns the number of entries in the log.} - \em{/// Complexity: O(1).} + \emph{/// Returns the number of entries in the log.} + \emph{/// Complexity: O(1).} pub fn \b{len}() -> u64; } \end{code} @@ -360,7 +360,7 @@ \subsection{stable-log}{Stable log} \begin{figure}[grayscale-diagram] \marginnote{mn-log-figure}{ A \code{Log} is an append-only list of values. - Logs need two memories: the \em{index} memory storing value offsets and the \em{data} memory storing raw entries. + Logs need two memories: the \emph{index} memory storing value offsets and the \emph{data} memory storing raw entries. The image depicts a log with two values: the first entry is 100 bytes long, and the second entry is 200 bytes long. } \includegraphics{/images/14-log.svg} @@ -379,7 +379,7 @@ \subsection{stable-log}{Stable log} \subsection{stable-btree}{Stable B-tree} \epigraph{ - \em{Deletion} of items from a B-tree is fairly straightforward in principle, but it is complicated in the details. + \emph{Deletion} of items from a B-tree is fairly straightforward in principle, but it is complicated in the details. }{Niklaus Wirth, ``Algorithms + Data Structures = Programs'', \sc{dynamic information structures}, p. 250} The \href{https://docs.rs/ic-stable-structures/0.4.0/ic_stable_structures/btreemap/struct.BTreeMap.html}{\code{BTreeMap}} stable structure is an associative container that can hold any \href{#storable-types}{bounded storable type}. @@ -400,23 +400,23 @@ \subsection{stable-btree}{Stable B-tree} V: \href{#bounded-storable-trait}{\code{BoundedStorable}}, M: \href{#memory}{\code{Memory}}, { - \em{/// Adds a new entry to the map.} - \em{/// Complexity: O(log(N) * K::MAX_SIZE + V::MAX_SIZE).} + \emph{/// Adds a new entry to the map.} + \emph{/// Complexity: O(log(N) * K::MAX_SIZE + V::MAX_SIZE).} pub fn \b{insert}(&self, key: K, value: V) -> Option; - \em{/// Returns the value associated with the specified key.} - \em{/// Complexity: O(log(N) * K::MAX_SIZE + V::MAX_SIZE).} + \emph{/// Returns the value associated with the specified key.} + \emph{/// Complexity: O(log(N) * K::MAX_SIZE + V::MAX_SIZE).} pub fn \b{get}(&self, key: &K) -> Option; - \em{/// Removes an entry from the map.} - \em{/// Complexity: O(log(N) * K::MAX_SIZE + V::MAX_SIZE).} + \emph{/// Removes an entry from the map.} + \emph{/// Complexity: O(log(N) * K::MAX_SIZE + V::MAX_SIZE).} pub fn \b{remove}(&self, key: &K) -> Option; - \em{/// Returns an iterator over the entries in the specified key range.} + \emph{/// Returns an iterator over the entries in the specified key range.} pub fn \b{range}(&self, range: impl RangeBounds) -> impl Iterator; - \em{/// Returns the number of entries in the map.} - \em{/// Complexity: O(1).} + \emph{/// Returns the number of entries in the map.} + \emph{/// Complexity: O(1).} pub fn \b{len}() -> usize; } \end{code} @@ -424,7 +424,7 @@ \subsection{stable-btree}{Stable B-tree} \begin{figure}[grayscale-diagram] \marginnote{mn-btree}{ - A \code{BTreeMap} is an associative container storing data in fixed-size dynamically-allocated \em{nodes}. + A \code{BTreeMap} is an associative container storing data in fixed-size dynamically-allocated \emph{nodes}. Each node stores an array of key-value mappings ordered by key. The tree uses the \href{https://en.wikipedia.org/wiki/Free_list}{free-list} technique for allocating and freeing nodes. Dotted boxes represent the logical tree structure. @@ -442,7 +442,7 @@ \subsection{stable-btree}{Stable B-tree} Two examples of using the \code{StableBTreeMap::range} method. } \begin{code}[rust] -\em{/// Selects all subaccounts of the specified principal.} +\emph{/// Selects all subaccounts of the specified principal.} fn \b{principal_subaccounts}( balance_map: &StableBTreeMap<(Principal, Subaccount), Tokens>, principal: Principal, @@ -453,7 +453,7 @@ \subsection{stable-btree}{Stable B-tree} .map(|((_, s), t)| (s, t)) } -\em{/// Selects a transaction range for the specified principal.} +\emph{/// Selects a transaction range for the specified principal.} fn \b{principal_tx_range}( tx_index: &StableBTreeMap<(Principal, TxId), Transaction>, principal: Principal, @@ -505,7 +505,7 @@ \section{constructing-ss}{Constructing stable structures} \end{figure} The main benefit of this approach is that the runtime will automatically initialize the stable structure the first time you access it. -\em{Ensure that you access all such variables in the \code{post_upgrade} hook.} +\emph{Ensure that you access all such variables in the \code{post_upgrade} hook.} Otherwise, you might only be able to catch a configuration error after the upgrade is complete. However, you do not have to declare each stable structure in a separate global variable. @@ -574,8 +574,8 @@ \section{tying-together}{Tying it all together} I also implement \href{https://doc.rust-lang.org/std/ops/trait.Deref.html}{\code{std::ops::Deref}} to improve the ergonomics of the wrapper type. \begin{code}[rust] -\em{/// A helper type implementing Storable for all} -\em{/// serde-serializable types using the CBOR encoding.} +\emph{/// A helper type implementing Storable for all} +\emph{/// serde-serializable types using the CBOR encoding.} #[derive(Default)] struct \b{Cbor}(pub T) where T: serde::Serialize + serde::de::DeserializeOwned; @@ -607,14 +607,14 @@ \section{tying-together}{Tying it all together} Note the use of \href{#restricted-memory}{\code{RestrictedMemory}} to split the canister memory into two non-intersecting regions and \href{#memory-manager}{\code{MemoryManager}} (abbreviated as \code{MM}) to interleave multiple data structures in the second region. \begin{code}[rust] -\em{// NOTE: ensure that all memory ids are unique and} -\em{// do not change across upgrades!} +\emph{// NOTE: ensure that all memory ids are unique and} +\emph{// do not change across upgrades!} const \b{BALANCES_MEM_ID}: MemoryId = MemoryId::new(0); const \b{LOG_INDX_MEM_ID}: MemoryId = MemoryId::new(1); const \b{LOG_DATA_MEM_ID}: MemoryId = MemoryId::new(2); -\em{// NOTE: we allocate the first 16 pages (about 2 MiB) of the} -\em{// canister memory for the metadata.} +\emph{// NOTE: we allocate the first 16 pages (about 2 MiB) of the} +\emph{// canister memory for the metadata.} const \b{METADATA_PAGES}: u64 = 16; type RM = RestrictedMemory; @@ -658,4 +658,4 @@ \section{next}{Where to go next} }, such as \href{https://github.com/dfinity/internet-identity/blob/97e8d968aba653c8857537ecd541b35de5085608/src/archive/src/main.rs}{II archive} and \href{https://github.com/dfinity/ic/blob/df57b720fd0ceed70f021f4812c797fb40d97503/rs/bitcoin/ckbtc/minter/src/storage.rs}{ckBTC minter}, and \href{https://github.com/dfinity/bitcoin-canister/blob/9242d5f9a784ac115c2042fd09705dd9321ff7b7/canister/src/block_header_store.rs#L13}{Bitcoin}. \item Read the \href{https://docs.rs/ic-stable-structures/latest/ic_stable_structures/}{official documentation}. \end{itemize} -\end{document} \ No newline at end of file +\end{document} diff --git a/posts/15-when-rust-hurts.tex b/posts/15-when-rust-hurts.tex index a647f41..1bb7649 100644 --- a/posts/15-when-rust-hurts.tex +++ b/posts/15-when-rust-hurts.tex @@ -34,16 +34,16 @@ \section{objects-values-references}{Objects, values, and references} Understanding the difference between objects, values, and references is helpful before diving deeper into Rust. -In the context of this article, \em{values} are entities with distinct identities, such as numbers and strings. -An \em{object} is a representation of a value in the computer memory. -A \em{reference} is the address of an object that we can use to access the object or its parts. +In the context of this article, \emph{values} are entities with distinct identities, such as numbers and strings. +An \emph{object} is a representation of a value in the computer memory. +A \emph{reference} is the address of an object that we can use to access the object or its parts. \begin{figure}[grayscale-diagram] \marginnote{mn-objects-values-refs}{ A visualization of values, objects, and references on an example of an integer in a 16-bit computer. The value is number five, which has no inherent type. The object is a 16-bit integer stored at address \code{0x0300} (\href{https://en.wikipedia.org/wiki/Endianness}{little-endian}). - The memory contains a \em{reference} to the number, represented as a pointer to address \code{0x0300}. + The memory contains a \emph{reference} to the number, represented as a pointer to address \code{0x0300}. } \includegraphics{/images/15-objects-values-references.svg} \end{figure} @@ -219,7 +219,7 @@ \subsection{functional-abstraction}{Functional abstraction} impl S { fn ensure_has_entry(&mut self, key: i64) { use std::collections::hash_map::Entry::*; - \em{// This version is more verbose, but it works with Rust 2018.} + \emph{// This version is more verbose, but it works with Rust 2018.} match self.map.entry(key) { Occupied(mut e) => e.get_mut(), Vacant(mut e) => e.insert(self.def.clone()), @@ -250,7 +250,7 @@ \subsection{newtype-abstrction}{Newtype abstraction} } println!("{}", Hex((0..32).collect())); -\em{// => 000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f} +\emph{// => 000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f} \end{code} The new type idiom is efficient: the representation of the \code{Hex} type in the machine's memory is identical to that of \code{Vec}. @@ -286,15 +286,15 @@ \subsection{views-and-bundles}{Views and bundles} Some functions return references to one of their arguments. }. This pattern is so common that it might be helpful to define new terms. -I call input types with lifetime parameters \em{views} because they are optimal for inspecting data. -I call regular output types \em{bundles} because they are self-contained. +I call input types with lifetime parameters \emph{views} because they are optimal for inspecting data. +I call regular output types \emph{bundles} because they are self-contained. The following snippet comes from the (sunset) \href{https://github.com/bytecodealliance/lucet}{Lucet} WebAssembly runtime. \begin{code}[rust] -\em{/// A WebAssembly global along with its export specification.} -\em{/// The lifetime parameter exists to support zero-copy deserialization} -\em{/// for the `\&str` fields at the leaves of the structure.} -\em{/// For a variant with owned types at the leaves, see `OwnedGlobalSpec`.} +\emph{/// A WebAssembly global along with its export specification.} +\emph{/// The lifetime parameter exists to support zero-copy deserialization} +\emph{/// for the `\&str` fields at the leaves of the structure.} +\emph{/// For a variant with owned types at the leaves, see `OwnedGlobalSpec`.} pub struct \href{https://github.com/bytecodealliance/lucet/blob/51fb1ed414fe44f842db437d94abb6eb439d7c92/lucet-module/src/globals.rs#L8}{\code{GlobalSpec}}<'a> { global: Global<'a>, export_names: Vec<&'a str>, @@ -302,8 +302,8 @@ \subsection{views-and-bundles}{Views and bundles} \ldots -\em{/// A variant of `GlobalSpec` with owned strings throughout.} -\em{/// This type is useful when directly building up a value to be serialized.} +\emph{/// A variant of `GlobalSpec` with owned strings throughout.} +\emph{/// This type is useful when directly building up a value to be serialized.} pub struct \href{https://github.com/bytecodealliance/lucet/blob/51fb1ed414fe44f842db437d94abb6eb439d7c92/lucet-module/src/globals.rs#L112}{\code{OwnedGlobalSpec}} { global: OwnedGlobal, export_names: Vec, @@ -313,11 +313,11 @@ \subsection{views-and-bundles}{Views and bundles} The authors duplicated the \code{GlobalSpec} data structure to support two use cases: \begin{itemize} \item - \code{GlobalSpec<'a>} is a \em{view} object that the code authors parse from a byte buffer. + \code{GlobalSpec<'a>} is a \emph{view} object that the code authors parse from a byte buffer. Individual fields of this view point back to the relevant regions of the buffer. This representation is helpful for functions that need to inspect values of type \code{GlobalSpec} without modifying them. \item - \code{OwnedGlobalSpec} is a \em{bundle}: it does not contain references to other data structures. + \code{OwnedGlobalSpec} is a \emph{bundle}: it does not contain references to other data structures. This representation is helpful for functions that construct values of type \code{GlobalSpec} and pass them around or put them into a container. \end{itemize} @@ -348,10 +348,10 @@ \subsection{object-composition}{Object composition} } the database with its snapshot, but Rust will not let us. \begin{code}[bad] -\em{// There is no way to define the following struct without} -\em{// contaminating it with lifetimes.} +\emph{// There is no way to define the following struct without} +\emph{// contaminating it with lifetimes.} struct DbSnapshot { - snapshot: Snapshot<'a>, \em{// what should 'a be?} + snapshot: Snapshot<'a>, \emph{// what should 'a be?} db: Arc, } \end{code} @@ -362,7 +362,7 @@ \subsection{object-composition}{Object composition} As discussed in the \href{#objects-values-references}{Objects, values, and references} section, modifying a referenced object is usually a bug. In our case, the \code{snapshot} object might depend on the physical location of the \code{db} object. If we move the \code{DbSnapshot} as a whole, the physical location of the \code{db} field will change, corrupting references in the \code{snapshot} object. -We \em{know} that moving \code{Arc} will not change the location of the \code{Db} object, but there is no way to communicate this information to \code{rustc}. +We \emph{know} that moving \code{Arc} will not change the location of the \code{Db} object, but there is no way to communicate this information to \code{rustc}. Another issue with \code{DbSnapshot} is that the order of its field \href{https://doc.rust-lang.org/stable/reference/destructors.html}{destruction} matters. If Rust allowed sibling pointers, changing the field order could introduce undefined behavior: the \code{snapshot}'s destructor could try to access fields of a destroyed \code{db} object. @@ -427,7 +427,7 @@ \subsection{orphan-rules}{Orphan rules} \includegraphics{/images/15-orphan-rules.svg} \end{figure} -These rules make it easy for the compiler to guarantee \em{coherence}, which is a smart way to say that all parts of your program see the same trait implementation for a particular type. +These rules make it easy for the compiler to guarantee \emph{coherence}, which is a smart way to say that all parts of your program see the same trait implementation for a particular type. In exchange, this rule significantly complicates integrating traits and types from unrelated libraries. One example is traits we want to use only in tests, such as \href{https://altsysrq.github.io/rustdoc/proptest/1.0.0/proptest/arbitrary/trait.Arbitrary.html}{\code{Arbitrary}} from the \href{https://crates.io/crates/proptest}{proptest} package. @@ -460,7 +460,7 @@ \subsection{deadlocks}{Deadlocks} Obviously such a program isn't very good, but Rust can only hold your hand so far. }{The Rustonomicon, \href{https://doc.rust-lang.org/nomicon/races.html}{Data Races and Race Conditions}} -Safe Rust prevents a specific type of concurrency bug called \em{data race}. +Safe Rust prevents a specific type of concurrency bug called \emph{data race}. Concurrent Rust programs have plenty of other ways to behave incorrectly. One class of concurrency bugs that I experienced firsthand is \href{https://en.wikipedia.org/wiki/Deadlock}{deadlock}. @@ -474,19 +474,19 @@ \subsection{deadlocks}{Deadlocks} impl Service { pub fn do_something(&self) { let guard = self.lock.read(); - \em{// \ldots } - self.helper_function(); \em{// \b{BUG}: will panic or deadlock} - \em{// \ldots } + \emph{// \ldots } + self.helper_function(); \emph{// \b{BUG}: will panic or deadlock} + \emph{// \ldots } } fn helper_function(&self) { let guard = self.lock.read(); - \em{// \ldots } + \emph{// \ldots } } } \end{code} -The documentation for \href{https://doc.rust-lang.org/std/sync/struct.RwLock.html#method.read}{\code{RwLock::read}} mentions that the function \em{might} panic if the current thread already holds the lock. +The documentation for \href{https://doc.rust-lang.org/std/sync/struct.RwLock.html#method.read}{\code{RwLock::read}} mentions that the function \emph{might} panic if the current thread already holds the lock. All I got was a hanging program. Some languages tried to provide a solution to this problem in their concurrency toolkits. @@ -496,7 +496,7 @@ \subsection{deadlocks}{Deadlocks} \subsection{filesystem-shared-resource}{Filesystem is a shared resource} \epigraph{ - We can view a path as an \em{address}. + We can view a path as an \emph{address}. Then a string representing a path is a pointer, and accessing a file through a path is a pointer dereference. Thus, component interference due to file overwriting can be viewed as an address collision problem: two components occupy overlapping parts of the address space. }{Eelco Dolstra, \href{https://edolstra.github.io/pubs/phd-thesis.pdf}{The Purely Functional Software Deployment Model}, p. 53} @@ -535,7 +535,7 @@ \subsection{implicit-async-runtimes}{Implicit async runtimes} \href{https://docs.rs/tokio/1.25.0/tokio/fn.spawn.html}{\code{tokio::spawn}}(some_async_func()); // ^ // | - // \em{This code will panic if we remove this line. Spukhafte Fernwirkung!} + // \emph{This code will panic if we remove this line. Spukhafte Fernwirkung!} } // | // | fn main() { // v @@ -566,7 +566,7 @@ \subsection{functions-have-colors}{Functions have colors} }{Bob Nystrom, \href{https://journal.stuffwithstuff.com/2015/02/01/what-color-is-your-function/}{What Color is Your Function?}} Rust's \code{async/.await} syntax simplifies the composition of asynchronous algorithms. -In return, it brings a fair amount of complexity, \href{https://journal.stuffwithstuff.com/2015/02/01/what-color-is-your-function/}{painting every function} in \em{blue} (sync) or \em{red} (async) color. +In return, it brings a fair amount of complexity, \href{https://journal.stuffwithstuff.com/2015/02/01/what-color-is-your-function/}{painting every function} in \emph{blue} (sync) or \emph{red} (async) color. There are new rules to follow: \begin{itemize} \item @@ -581,7 +581,7 @@ \subsection{functions-have-colors}{Functions have colors} Not all sync functions are equally blue. \end{itemize} -That is it; some sync functions are secretly \em{purple}: they can read files, join threads, or \href{https://doc.rust-lang.org/std/thread/fn.sleep.html}{\code{thread::sleep}} on a couch. +That is it; some sync functions are secretly \emph{purple}: they can read files, join threads, or \href{https://doc.rust-lang.org/std/thread/fn.sleep.html}{\code{thread::sleep}} on a couch. We do not want to call these purple (blocking) functions from red (async) functions because they will block the runtime and kill the performance benefits that motivated us to step into this asynchrony mess. Unfortunately, purple functions are secretive: you cannot tell whether a function is purple without inspecting its body and the bodies of all other functions in its \href{https://en.wikipedia.org/wiki/Call_graph}{call graph}. @@ -607,4 +607,4 @@ \section{conclusion}{Conclusion} Oh well, no language is perfect for every problem. You can discuss this article on \href{https://www.reddit.com/r/rust/comments/112hmga/blog_post_when_rust_hurts/}{Reddit}. -\end{document} \ No newline at end of file +\end{document} diff --git a/posts/16-building-a-second-brain.tex b/posts/16-building-a-second-brain.tex index efefcfe..eedee4b 100644 --- a/posts/16-building-a-second-brain.tex +++ b/posts/16-building-a-second-brain.tex @@ -44,15 +44,15 @@ \subsection{organize}{Organize} The crux of the method is classifying your notes into four top-level categories: \begin{enumerate} \item - \em{Projects} are short-term efforts with a goal and a deadline. + \emph{Projects} are short-term efforts with a goal and a deadline. \item - \em{Areas} are long-term responsibilities that do not have a final goal, but they have a standard you want to meet. + \emph{Areas} are long-term responsibilities that do not have a final goal, but they have a standard you want to meet. Finances, travel, health, and family are areas. \item - \em{Resources} are topics of interest that might be useful in the future. + \emph{Resources} are topics of interest that might be useful in the future. Hobbies (music, calligraphy, etc.) and research subjects (geometry, type systems, cryptography, etc.) are good examples of resources. \item - \em{Archives} contain inactive items from other categories. + \emph{Archives} contain inactive items from other categories. \end{enumerate} One helpful analogy for the \sc{para} system the book mentions is cooking in a kitchen: @@ -103,7 +103,7 @@ \subsection{express}{Express} Use your \sc{pkm} to track these knowledge pieces so you can find them quickly when you work on a project where they could be helpful. Strive to split your project into chunks and deliver them separately, receiving feedback as soon as possible. -According to Tiago, the creative process usually goes through two stages: \href{https://fortelabs.com/blog/divergence-and-convergence-the-two-fundamental-stages-of-the-creative-process/}{\em{divergence} and \em{convergence}}. +According to Tiago, the creative process usually goes through two stages: \href{https://fortelabs.com/blog/divergence-and-convergence-the-two-fundamental-stages-of-the-creative-process/}{\emph{divergence} and \emph{convergence}}. During the divergence phase, we generate ideas and wander. During the convergence stage, we eliminate options and decide what is essential. \href{#capture}{Capture} and \href{#organize}{Organize} in \href{#code}{CODE} correspond to the divergence stage; \href{#distill}{Distill} and \href{#express}{Express}---to the convergence stage. @@ -111,15 +111,15 @@ \subsection{express}{Express} Tiago also suggests three techniques to boost your creative output: \begin{itemize} \item - \href{https://fortelabs.com/blog/just-in-time-pm-21-workflow-strategies/}{\em{Archipelago of ideas}}: start creative work not from a blank slate but from an outline filled with notes and quotes. + \href{https://fortelabs.com/blog/just-in-time-pm-21-workflow-strategies/}{\emph{Archipelago of ideas}}: start creative work not from a blank slate but from an outline filled with notes and quotes. This way, you separate the activity of choosing ideas (divergence) from the act of arranging them (convergence). These activities benefit from different states of mind. \item - \href{https://medium.com/@mstine/day-6-how-you-can-use-hemingways-bridge-to-ship-today-s-momentum-to-tomorrow-a1af14e300ef}{\em{Hemingway bridge}}: stop working when you have clear next steps. + \href{https://medium.com/@mstine/day-6-how-you-can-use-hemingways-bridge-to-ship-today-s-momentum-to-tomorrow-a1af14e300ef}{\emph{Hemingway bridge}}: stop working when you have clear next steps. This technique will make picking up the project the next day easier. If you are putting a project on hold, write a note with the path forward and the context that will help you resurrect the project. \item - \em{Dial down the scope}: reduce the project size to fit into the deadline instead of moving the deadline. + \emph{Dial down the scope}: reduce the project size to fit into the deadline instead of moving the deadline. Cut unfinished ideas and use them for future projects. Ship something small and concrete. \end{itemize} @@ -146,4 +146,4 @@ \section{resources}{Resources} \item \href{https://www.buildingasecondbrain.com/resources}{How to Choose Your Notetaking App}. \end{itemize} -\end{document} \ No newline at end of file +\end{document} diff --git a/posts/17-scaling-rust-builds-with-bazel.tex b/posts/17-scaling-rust-builds-with-bazel.tex index 856f64f..fb97721 100644 --- a/posts/17-scaling-rust-builds-with-bazel.tex +++ b/posts/17-scaling-rust-builds-with-bazel.tex @@ -25,7 +25,7 @@ \section{cargo-limitations}{Cargo's limitations} \subsection{not-a-build-system}{Cargo is not a build system} \epigraph{ - Cargo is the Rust \em{package manager}. + Cargo is the Rust \emph{package manager}. Cargo downloads your Rust package's dependencies, compiles your packages, makes distributable packages, and uploads them to crates.io. }{\href{https://doc.rust-lang.org/cargo/}{The Cargo Book}} @@ -249,4 +249,4 @@ \section{acknowledgements}{Acknowledgments} You can discuss this article on \href{https://www.reddit.com/r/rust/comments/11xxffc/blog_post_scaling_rust_builds_with_bazel/}{Reddit}. -\end{document} \ No newline at end of file +\end{document} diff --git a/posts/18-if-composers-were-hackers.tex b/posts/18-if-composers-were-hackers.tex index cabf2eb..bced242 100644 --- a/posts/18-if-composers-were-hackers.tex +++ b/posts/18-if-composers-were-hackers.tex @@ -29,7 +29,7 @@ \section{bach}{Johan Sebastian Bach} \begin{itemize} \item - Like a well-written APL program, Bach's music is \em{dense}. + Like a well-written APL program, Bach's music is \emph{dense}. He achieves immense expressive power with a few well-chosen constructs. \item In his lifetime, Bach was famous not for his compositions, but for his improvisation skills. @@ -51,7 +51,7 @@ \section{bach}{Johan Sebastian Bach} \begin{figure}[grayscale-diagram] \marginnote{mn-repr-bach}{ - Representative Bach's music: \em{Wachet auf, ruft uns die Stimme}, a deeply moving and masterfully harmonized choral. + Representative Bach's music: \emph{Wachet auf, ruft uns die Stimme}, a deeply moving and masterfully harmonized choral. } \includegraphics{/images/18-bach-snippet.png} \end{figure} diff --git a/posts/19-eventlog.tex b/posts/19-eventlog.tex index 5f34bec..6d13ee1 100644 --- a/posts/19-eventlog.tex +++ b/posts/19-eventlog.tex @@ -12,7 +12,7 @@ \section{intro}{Introduction} The \href{https://medium.com/dfinity/chain-key-bitcoin-a-decentralized-bitcoin-twin-ceb8f4ddf95e}{chain-key Bitcoin} (ckBTC) project became \href{https://twitter.com/dfinity/status/1642887821731004418}{publicly available} on April 3, 2023. -The ckBTC \em{minter} smart contract is the most novel part of the product responsible for converting Bitcoin to ckBTC tokens and back. +The ckBTC \emph{minter} smart contract is the most novel part of the product responsible for converting Bitcoin to ckBTC tokens and back. This contract features several design choices that some developers might find insightful. This article describes how the ckBTC minter, which I will further refer to as ``the minter'', organizes its storage. @@ -38,7 +38,7 @@ \section{motivation}{Motivation} \begin{figure}[grayscale-diagram] \marginnote{mn-traditional-upgrade}{ The traditional canister state management scheme. - The canister applies \em{state transitions} \math{T\sub{i}} to its states \math{S\sub{i}} on the Wasm heap (designated with a circle) and marshals the state through stable memory on upgrades. + The canister applies \emph{state transitions} \math{T\sub{i}} to its states \math{S\sub{i}} on the Wasm heap (designated with a circle) and marshals the state through stable memory on upgrades. This approach requires state representations to be backward compatible only within the scope of a single upgrade. The number of instructions needed for an upgrade is proportional to the state size. } @@ -51,7 +51,7 @@ \section{motivation}{Motivation} \begin{figure}[grayscale-diagram] \marginnote{mn-stable-memory}{ Managing the canister state in directly stable memory. - The canister applies \em{state transitions} \math{T\sub{i}} to its states \math{S\sub{i}} persisted in stable memory. + The canister applies \emph{state transitions} \math{T\sub{i}} to its states \math{S\sub{i}} persisted in stable memory. This approach trades the flexibility of state representation for the predictability and safety of upgrades. The number of instructions needed for an upgrade is constant. } @@ -115,9 +115,9 @@ \subsection{what-is-an-event}{What is an event?} The brute-force approach is to record as events the arguments of all incoming update calls, all outgoing inter-canister calls, and the corresponding replies. This approach might work, but it takes a lot of work to implement and requires a complicated log replay procedure. -Differentiating between \em{requests} (sometimes called \em{commands}) and \em{events} is a better option. -\em{Requests} come from the outside world (ingress messages, replies, timers) and \em{might} trigger canister state changes. -\em{Events} record effects of productive requests on the canister state. +Differentiating between \emph{requests} (sometimes called \emph{commands}) and \emph{events} is a better option. +\emph{Requests} come from the outside world (ingress messages, replies, timers) and \emph{might} trigger canister state changes. +\emph{Events} record effects of productive requests on the canister state. \begin{figure}[grayscale-diagram] \marginnote{mn-request-event}{ diff --git a/posts/20-candid-for-engineers.tex b/posts/20-candid-for-engineers.tex index 95df656..31b8909 100644 --- a/posts/20-candid-for-engineers.tex +++ b/posts/20-candid-for-engineers.tex @@ -43,7 +43,7 @@ \section{candid-overview}{Candid overview} \begin{figure} \begin{code}[candid] \b{service} counter : { - \em{// A method taking a reference to a function.} + \emph{// A method taking a reference to a function.} subscribe : (func (int) -> ()) -> (); } \end{code} @@ -73,7 +73,7 @@ \subsection{service-definitions}{Service definitions} }; \b{service} TokenRegistry : { - \em{// Returns a reference to a token ledger service given the token symbol.} + \emph{// Returns a reference to a token ledger service given the token symbol.} lookup : (symbol : text) -> (opt Token) query; } \end{code} @@ -81,7 +81,7 @@ \subsection{service-definitions}{Service definitions} Two syntactic forms can introduce a service definition: with and without init arguments. The technical term for a service definition with init arguments is \href{https://github.com/dfinity/candid/blob/master/spec/Candid.md#services}{service constructor}\sidenote{sn-class}{ - Some implementations use the term \href{https://docs.rs/candid/0.8.4/candid/types/internal/enum.Type.html#variant.Class}{\em{class}}. + Some implementations use the term \href{https://docs.rs/candid/0.8.4/candid/types/internal/enum.Type.html#variant.Class}{\emph{class}}. } \begin{figure} @@ -91,13 +91,13 @@ \subsection{service-definitions}{Service definitions} \begin{code}[candid] \b{service} Token : { balance : (record { of : principal }) -> (nat) query; - \em{// ...} + \emph{// ...} } \end{code} \begin{code}[candid] \b{service} Token : \b{(init_balances : vec record \{ principal; nat \})} -> { balance : (record { of : principal }) -> (nat) query; - \em{// ...} + \emph{// ...} } \end{code} \end{figure} @@ -105,9 +105,9 @@ \subsection{service-definitions}{Service definitions} Conceptually, a service constructor represents an uninitialized canister, whereas a service represents a deployed canister. Init arguments describe the value the canister maintainers must specify when instantiating the canister. -Ideally, canister \em{build} tools should produce a \b{service constructor}. +Ideally, canister \emph{build} tools should produce a \b{service constructor}. If the module contains no init args, the tools should use the form \code{service : () -> \{\ldots\}}. -Canister \em{deploy} tools, such as \href{https://internetcomputer.org/docs/current/references/cli-reference/dfx-deploy}{\code{dfx deploy}}, should use the init args to install the canister, and use the \b{service} as the public metadata, stripping out the init args. +Canister \emph{deploy} tools, such as \href{https://internetcomputer.org/docs/current/references/cli-reference/dfx-deploy}{\code{dfx deploy}}, should use the init args to install the canister, and use the \b{service} as the public metadata, stripping out the init args. As of July 2023, Motoko compiler and Rust CDK don't follow these conventions, so people often conflate the two concepts. \subsection{types}{Types} @@ -125,10 +125,10 @@ \subsection{types}{Types} The \href{https://internetcomputer.org/docs/current/references/candid-ref#type-vec-t}{\code{vec}} type constructor for declaring collections. \item \href{https://internetcomputer.org/docs/current/references/candid-ref#type-record--n--t--}{Records} as product types (also known as \code{structs}) with named fields, such as \newline - \code{\b{record} \{ \em{first_line} : text; \em{second_line} : \b{opt} text; \em{zip} : text; /* \ldots */ \}}. + \code{\b{record} \{ \emph{first_line} : text; \emph{second_line} : \b{opt} text; \emph{zip} : text; /* \ldots */ \}}. \item \href{https://internetcomputer.org/docs/current/references/candid-ref#type-variant--n--t--}{Variants} as sum types (also known as \code{enums}) with named alternatives, such as \newline - \code{\b{variant} \{ \em{cash}; \em{credit_card} : \b{record} \{ /* \ldots */ \} \}}. + \code{\b{variant} \{ \emph{cash}; \emph{credit_card} : \b{record} \{ /* \ldots */ \} \}}. \item The \href{https://internetcomputer.org/docs/current/references/candid-ref#type-reserved}{\code{reserved}} type for retiring unused fields. \item @@ -160,7 +160,7 @@ \subsection{records-and-variants}{Records and variants} Records and variants have similar syntax; the primary difference is the keyword introducing the type. The meanings of the constructs are complementary, however. -A record type indicates that \em{all} of its fields must be set, and a variant type indicates that precisely \em{one} field must be set. +A record type indicates that \emph{all} of its fields must be set, and a variant type indicates that precisely \emph{one} field must be set. \begin{figure} \marginnote{mn-record-vs-variant}{ @@ -214,13 +214,13 @@ \subsection{tuples}{Tuples} Tuple-like constructions in Candid: a record with tuple fields (top) and argument sequences (bottom). } \begin{code}[candid] -\em{// A record with tuple fields.} +\emph{// A record with tuple fields.} \b{type} Entry = \b{record} { text; nat }; -\em{// The Entry and ExplicitEntry types are equivalent.} +\emph{// The Entry and ExplicitEntry types are equivalent.} \b{type} ExplicitEntry = \b{record} { \b{0} : text; \b{1} : nat }; service ArithmeticService : { - \em{// Argument and result sequences.} + \emph{// Argument and result sequences.} div : (divident : nat, divisor : nat) -> (quotient : nat, reminder : nat) query; } \end{code} @@ -250,7 +250,7 @@ \subsection{structural-typing}{Structural typing} Type names serve as monikers for the type structure, not as the type's identity. Variable bindings in Rust are a good analogy for type names in Candid. -The \code{let x = 5;} statement \em{binds} name \em{x} to value \code{5}, but \em{x} does not become the identity of that value. +The \code{let x = 5;} statement \emph{binds} name \emph{x} to value \code{5}, but \emph{x} does not become the identity of that value. Expressions such as \code{x == 5} and \code{\{ let y = 5; y == x \}} evaluate to \code{true}. \begin{figure} @@ -259,7 +259,7 @@ \subsection{structural-typing}{Structural typing} } \begin{code}[candid] -\em{// These types are identical from Candid's point of view.} +\emph{// These types are identical from Candid's point of view.} type Point2d = record { x : int; y : int }; type ECPoint = record { x : int; y : int }; \end{code} @@ -308,7 +308,7 @@ \subsection{subtyping}{Subtyping} Adding a new field to a record creates a subtype.\newline \code{record \{ name : text; status : variant \{ user; admin \} \} <: record \{ name : text \} } \item - Less intuitively, removing an \em{optional} field also creates a subtype. + Less intuitively, removing an \emph{optional} field also creates a subtype. \newline \code{record \{ name : text \} <: record \{ name : text; status : \b{opt} variant \{ user; admin \} \} } \item @@ -338,22 +338,22 @@ \subsection{subtyping}{Subtyping} \begin{itemize} \item - Remove an unused record field (or, better, change its type to \code{reserved}) from the method \em{input} argument. + Remove an unused record field (or, better, change its type to \code{reserved}) from the method \emph{input} argument. \item - Add a new case to a variant in the method \em{input} argument. + Add a new case to a variant in the method \emph{input} argument. \item - Add a new field to a record in the method \em{result} type. + Add a new field to a record in the method \emph{result} type. \item - Remove an optional field from a record in the method \em{result} type. + Remove an optional field from a record in the method \emph{result} type. \item - Remove an alternative from a variant type in the method \em{result} type. + Remove an alternative from a variant type in the method \emph{result} type. \end{itemize} Before we close the subtyping discussion, let's consider a sequence of type changes where an optional field gets removed and re-introduced later with a different type. \begin{figure} \marginnote{mn-subtype-opt}{ - An example of the \em{special opt subtyping rule}. + An example of the \emph{special opt subtyping rule}. Step \circled{1} removes the optional \code{status} field; step \circled{2} adds an optional field with the same name but an incompatible type. The horizontal bar applies the transitive property of subtyping, eliminating the intermediate type without the \code{status} field. } @@ -367,7 +367,7 @@ \subsection{subtyping}{Subtyping} \end{figure} Indeed, in Candid, \math{opt T <: opt V} holds for any types \math{T} and \math{V}. -This counter-intuitive property bears the name of the \em{special opt rule}, and it causes a lot of grief in practice. +This counter-intuitive property bears the name of the \emph{special opt rule}, and it causes a lot of grief in practice. Multiple developers reported changing an optional field in an incompatible way, causing the corresponding values to decode as \code{null} after the upgrade. Joachim Breitner's \href{https://www.joachim-breitner.de/blog/784-A_Candid_explainer__Opt_is_special}{opt is special} article explores the topic in more detail and provides historical background. @@ -377,12 +377,12 @@ \subsection{binary-message-anatomy}{Binary message anatomy} In Candid, a binary message defines a tuple of \math{n} values and logically consists of three parts: \begin{enumerate} \item - The \em{type table} part defines composite types (\href{https://internetcomputer.org/docs/current/references/candid-ref#type-record--n--t--}{records}, \href{https://internetcomputer.org/docs/current/references/candid-ref#type-variant--n--t--}{variants}, \href{https://internetcomputer.org/docs/current/references/candid-ref#type-opt-t}{options}, \href{https://internetcomputer.org/docs/current/references/candid-ref#type-vec-t}{vectors}, etc.) required to decode the message. + The \emph{type table} part defines composite types (\href{https://internetcomputer.org/docs/current/references/candid-ref#type-record--n--t--}{records}, \href{https://internetcomputer.org/docs/current/references/candid-ref#type-variant--n--t--}{variants}, \href{https://internetcomputer.org/docs/current/references/candid-ref#type-opt-t}{options}, \href{https://internetcomputer.org/docs/current/references/candid-ref#type-vec-t}{vectors}, etc.) required to decode the message. \item - The \em{types} part is an \math{n}-tuple of integers specifying the types \math{(T\sub{1},\ldots,T\sub{n})} of values in the next section. + The \emph{types} part is an \math{n}-tuple of integers specifying the types \math{(T\sub{1},\ldots,T\sub{n})} of values in the next section. The types are either primitives (negative integers) or pointers into the type table (non-negative integers). \item - The \em{values} part is an \math{n}-tuple of serialized values \math{(V\sub{1},\ldots,V\sub{n})}. + The \emph{values} part is an \math{n}-tuple of serialized values \math{(V\sub{1},\ldots,V\sub{n})}. \end{enumerate} The tuple values usually correspond to service method arguments or results. @@ -440,11 +440,11 @@ \subsection{encoding-a-tree}{Example: encoding a tree} A definition of a \href{https://en.wikipedia.org/wiki/Rose_tree}{rose tree} data type containing 32-bit integers (top) and the Candid representation of the same type (bottom). } \begin{code}[rust] -\em{// Rust} +\emph{// Rust} pub enum Tree { Leaf(i32), Forest(Vec) } \end{code} \begin{code}[candid] -\em{// Candid} +\emph{// Candid} type Tree = variant { leaf : int32; forest : vec Tree }; \end{code} \end{figure} @@ -548,7 +548,7 @@ \subsection{faq-remove-field}{Can I remove a record field?} Reserved fields make it unlikely that future service developers will use the field name in an unexpected way. \begin{code}[good] -\em{// OK: the age field is optional.} +\emph{// OK: the age field is optional.} type User = record { name : text; - age : opt nat; @@ -561,7 +561,7 @@ \subsection{faq-remove-field}{Can I remove a record field?} \end{code} \begin{code}[good] -\em{// GOOD: marking an opt field as reserved.} +\emph{// GOOD: marking an opt field as reserved.} type User = record { name : text; - age : opt nat; @@ -595,7 +595,7 @@ \subsection{faq-remove-field}{Can I remove a record field?} You should preserve the field if the type appears in a method return type. \begin{code}[bad] -\em{// BAD: the User type appears as an argument \b{and} a result.} +\emph{// BAD: the User type appears as an argument \b{and} a result.} type User = record { name : text; - age : nat; @@ -636,7 +636,7 @@ \subsection{faq-remove-field}{Can I add a record field?} Adding a non-optional field breaks backward compatibility if the record appears in a method argument. \begin{code}[bad] - \em{// BAD: breaks the client code} + \emph{// BAD: breaks the client code} service UserService : { - add_user : (record { name : text }) -> (nat); + add_user : (record { name : text; \b{age : nat} }) -> (nat); @@ -650,7 +650,7 @@ \subsection{faq-remove-alternative}{Can I remove a variant alternative?} } safe. \begin{code}[good] - \em{// OK: changing an optional field} + \emph{// OK: changing an optional field} type OrderDetails = record { - size : \b{opt} variant { tiny; small; medium; large } + size : \b{opt} variant { small; medium; large } @@ -673,7 +673,7 @@ \subsection{faq-remove-alternative}{Can I remove a variant alternative?} \end{code} \begin{code}[bad] - \em{// BAD: this change might break clients.} + \emph{// BAD: this change might break clients.} service CoffeeShop : { - order_coffee : (record { size : variant { tiny; small; medium; large } }) -> (nat); + order_coffee : (record { size : variant { small; medium; large } }) -> (nat); @@ -687,7 +687,7 @@ \subsection{faq-remove-alternative}{Can I add a variant alternative?} } safe. \begin{code}[good] - \em{// OK: changing an optional field} + \emph{// OK: changing an optional field} type User = record { name : text; - age : \b{opt} variant { child; adult } @@ -712,7 +712,7 @@ \subsection{faq-remove-alternative}{Can I add a variant alternative?} \end{code} \begin{code}[bad] -\em{// BAD: the User type appears as an argument \b{and} a result.} +\emph{// BAD: the User type appears as an argument \b{and} a result.} type User = record { name : text; - age : variant { child; adult } @@ -769,12 +769,12 @@ \subsection{faq-post-upgrade-arg}{How do I specify the post_upgrade arg?} \begin{code}[candid] \b{type} ServiceArg = variant { Init : record { minter : principal }; - \em{// We might want to override the minter on upgrade.} + \emph{// We might want to override the minter on upgrade.} Upgrade : record { minter : \b{opt} principal } }; \b{service} TokenService : (ServiceArg) -> { - \em{// \ldots} + \emph{// \ldots} } \end{code} \end{figure} @@ -800,4 +800,4 @@ \section{resources}{Resources} \href{https://github.com/dfinity/candid/blob/master/spec/Candid.md}{The Candid Specification} is the authoritative source of truth for all facets of the language. \end{itemize} -\end{document} \ No newline at end of file +\end{document} diff --git a/posts/21-machs-principle.tex b/posts/21-machs-principle.tex index b5d470b..7382fd5 100644 --- a/posts/21-machs-principle.tex +++ b/posts/21-machs-principle.tex @@ -17,18 +17,18 @@ \section{inertia-mystery}{The mystery of inertia} } If you have ever tried to move a closet, you know how much sweat you must shed before the damn thing ends up where you want it to be. -Physicists call this mysterious property of objects \em{inertia}\sidenote{sn-inertia}{ +Physicists call this mysterious property of objects \emph{inertia}\sidenote{sn-inertia}{ More formally, inertia is the tendency of a body to stay at rest or continue moving in a straight line unless some force acts on the body. }. Believe it or not, physicists have no idea where inertia comes from. -Sure, there is the \href{https://en.wikipedia.org/wiki/Newton's_laws_of_motion}{first Newton's law} \em{postulating} inertia, but its source remains obscure. +Sure, there is the \href{https://en.wikipedia.org/wiki/Newton's_laws_of_motion}{first Newton's law} \emph{postulating} inertia, but its source remains obscure. Newton believed in absolute time and space. In his worldview, inertia is the resistance of bodies to forces accelerating them relative to the eternal cosmic frame of reference. \href{https://en.wikipedia.org/wiki/Ernst_Mach}{Ernst Mach}, an Austrian physicist and philosopher, disagreed with Newton's position. -In his mind, all the interactions in nature, including the laws of motion and inertia, should be \em{relative}. +In his mind, all the interactions in nature, including the laws of motion and inertia, should be \emph{relative}. The difference between Newton's and Mach's views is subtle. @@ -76,7 +76,7 @@ \section{relations-as-inertia}{Relations as inertia} Or you may find yourself caught up in a dysfunctional relationship. You feel lonely and depressed, yet you can't find the strength to quit. -You still get \em{some} scraps of affection from your partner, a shared household is somewhat convenient, and you don't want to decide what to do with Charlie, your \href{https://en.wikipedia.org/wiki/Labrador_Retriever}{Labrador}. +You still get \emph{some} scraps of affection from your partner, a shared household is somewhat convenient, and you don't want to decide what to do with Charlie, your \href{https://en.wikipedia.org/wiki/Labrador_Retriever}{Labrador}. Relationships are the source of inertia in our lives. These include your loved ones, friends, community, church, and places you enjoy visiting. @@ -85,7 +85,7 @@ \section{relations-as-inertia}{Relations as inertia} \section{inertia-in-software}{Inertia in software} \epigraph{ - Complexity is caused by two things: \em{dependencies} and \em{obscurity}. + Complexity is caused by two things: \emph{dependencies} and \emph{obscurity}. }{John Ousterhout, \href{https://www.amazon.com/Philosophy-Software-Design-2nd/dp/173210221X}{A Philosophy of Software Design, 2nd Edition}.} Changing software is hard. @@ -112,8 +112,8 @@ \section{inertia-in-software}{Inertia in software} However, the more callers a function has, the harder it is to change \href{https://www.hyrumslaw.com/}{anything} about it. Interestingly, we can see \href{https://en.wikipedia.org/wiki/Regression_testing}{regression testing} as a way to increase software's inertia. -The ultimate goal of such tests is to make \em{destructive} changes harder to make. -Unfortunately, tests often turn into \href{https://testing.googleblog.com/2015/01/testing-on-toilet-change-detector-tests.html}{change detectors}, making \em{any} change unnecessarily complicated. +The ultimate goal of such tests is to make \emph{destructive} changes harder to make. +Unfortunately, tests often turn into \href{https://testing.googleblog.com/2015/01/testing-on-toilet-change-detector-tests.html}{change detectors}, making \emph{any} change unnecessarily complicated. The \href{https://github.com/github/renaming}{default Git branch renaming} is one of my favorite examples of inertia in software. The idea behind the change is trivial; it would take Linus about a minute to change the default branch name from \code{master} to \code{main} back in 2005. @@ -129,18 +129,18 @@ \section{conclusion}{Conclusion} We can draw a few practical implications from this principle. -Firstly, we must \em{account for hidden relations when we estimate project costs}. +Firstly, we must \emph{account for hidden relations when we estimate project costs}. The approach most people employ is to base estimates solely on the change to be done. It seems reasonable and rational, but \href{https://en.wikipedia.org/wiki/Planning_fallacy}{it doesn't work}. Any change worth doing is trapped in a web of dependencies hidden from an unsuspecting observer; even minor changes might require herculean efforts. The best predictor of project completion time is the time it took to complete similar projects in the past. -Secondly, we can \em{achieve goals faster by reducing dependencies}. +Secondly, we can \emph{achieve goals faster by reducing dependencies}. Minimizing the number of teams involved in a project can drastically reduce the \href{https://www.investopedia.com/terms/l/leadtime.asp}{lead time}. Fred Brooks \href{https://en.wikipedia.org/wiki/The_Mythical_Man-Month}{observed} similar dynamics while managing a large team at IBM: Adding more people to a project running late tends to increase its inertia, further delaying the delivery. -Lastly, we must \em{address bad decisions before they get tangled in dependencies}. +Lastly, we must \emph{address bad decisions before they get tangled in dependencies}. We want to fix our mistakes quickly before others rely on them. The tighter the feedback loop, the cheaper our mistakes become. Tinkering, experimentation, and \href{https://agilemanifesto.org/}{incremental delivery} almost always trump theorizing and planning. diff --git a/posts/22-flat-in-order-trees.tex b/posts/22-flat-in-order-trees.tex index 9b14898..398b576 100644 --- a/posts/22-flat-in-order-trees.tex +++ b/posts/22-flat-in-order-trees.tex @@ -67,11 +67,11 @@ \section{sec-background}{Background} \section{sec-nomenclature}{Nomenclature} We will consider two types of trees. -First, we'll analyze the case of \em{perfect binary trees} (\code{PBT}), i.e., binary trees in which all interior nodes have two children and all leaves have the same depth. +First, we'll analyze the case of \emph{perfect binary trees} (\code{PBT}), i.e., binary trees in which all interior nodes have two children and all leaves have the same depth. These trees correspond to sequences of \math{2\sup{K}} data blocks and contain \math{2\sup{K+1}-1} nodes. Second, we'll generalize perfect trees to handle sequences of any \math{N} data blocks. -There is no established name for such trees, so we'll call them \em{left-perfect binary trees} (\code{LPBT}). +There is no established name for such trees, so we'll call them \emph{left-perfect binary trees} (\code{LPBT}). A left-perfect tree with \math{N} leaves is either a perfect tree or a fork where the left child is a perfect tree containing at least \math{N/2} leaves, and the right child is a left-perfect tree containing the rest of the leaves. \begin{figure}[grayscale-diagram,medium-size] @@ -141,17 +141,17 @@ \section{sec-fbt}{Traversing perfect binary trees} #include #include -\em{// Isolates the last set bit of number N.} +\emph{// Isolates the last set bit of number N.} uint64_t LastSetBit(const uint64_t n) { return n - ((n - 1) & n); } -\em{// Isolates the last unset bit of number N.} +\emph{// Isolates the last unset bit of number N.} uint64_t LastZeroBit(const uint64_t n) { return LastSetBit(n + 1); } -\em{// Rounds the argument up to the next highest power of two.} +\emph{// Rounds the argument up to the next highest power of two.} uint64_t RoundUpPowerOf2(uint64_t n) { // See \href{https://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2}{Bit Twiddling Hacks}. n--; @@ -170,7 +170,7 @@ \section{sec-fbt}{Traversing perfect binary trees} The addressing scheme proof suggests the function for locating the tree root: we divide the tree size by two. \begin{code}[c] -\em{// Computes the root of the perfect binary tree of the given SIZE.} +\emph{// Computes the root of the perfect binary tree of the given SIZE.} uint64_t PBT_Root(const uint64_t size) { assert(size == 1 || RoundUpPowerOf2(size) == size + 1); return size >> 1; @@ -189,7 +189,7 @@ \section{sec-fbt}{Traversing perfect binary trees} \end{figure} \begin{code}[c] -\em{// Computes the parent of node I in a perfect binary tree.} +\emph{// Computes the parent of node I in a perfect binary tree.} uint64_t PBT_Parent(const uint64_t i) { return (LastZeroBit(i) | i) & ~(LastZeroBit(i) << 1); } @@ -201,15 +201,15 @@ \section{sec-fbt}{Traversing perfect binary trees} For the right child, we also need to set bit \math{k} to one. \begin{code}[c] -\em{// Computes the left child of node P in a perfect binary tree.} -\em{// Requires: P is not a leaf.} +\emph{// Computes the left child of node P in a perfect binary tree.} +\emph{// Requires: P is not a leaf.} uint64_t PBT_LeftChild(const uint64_t p) { assert(p & 1); return p & ~(LastZeroBit(p) >> 1); } -\em{// Computes the right child of node P in a perfect binary tree.} -\em{// Requires: P is not a leaf.} +\emph{// Computes the right child of node P in a perfect binary tree.} +\emph{// Requires: P is not a leaf.} uint64_t PBT_RightChild(const uint64_t p) { assert(p & 1); return (p | LastZeroBit(p)) & ~(LastZeroBit(p) >> 1); @@ -224,7 +224,7 @@ \section{sec-lpbt}{Traversing left-perfect binary trees} Hence the formula for the left child we developed for perfect binary trees works as-is. \begin{code}[c] -\em{// Computes the left child in a flat in-order left-perfect binary tree.} +\emph{// Computes the left child in a flat in-order left-perfect binary tree.} uint64_t LPBT_LeftChild(const uint64_t i) { return PBT_LeftChild(i); } \end{code} @@ -243,7 +243,7 @@ \section{sec-lpbt}{Traversing left-perfect binary trees} Thus, given the number of tree nodes, we compute the root of the smallest perfect tree containing the input tree. \begin{code}[c] -\em{// Computes the root of a left-perfect binary tree of the given SIZE.} +\emph{// Computes the root of a left-perfect binary tree of the given SIZE.} uint64_t LPBT_Root(const uint64_t size) { return PBT_Root(RoundUpPowerOf2(size + 1) - 1); } @@ -264,7 +264,7 @@ \section{sec-lpbt}{Traversing left-perfect binary trees} \end{figure} \begin{code}[c] -\em{// Computes the parent of node I in a left-perfect binary tree of the given SIZE.} +\emph{// Computes the parent of node I in a left-perfect binary tree of the given SIZE.} uint64_t LPBT_Parent_Iterative(uint64_t i, const uint64_t size) { do { i = PBT_Parent(i); } while (i >= size); reutrn i; @@ -279,12 +279,12 @@ \section{sec-lpbt}{Traversing left-perfect binary trees} Thus, we can attempt to compute the parent using \code{PBT_Parent} first and adjust the result if it falls out of bounds. \begin{code}[c] -\em{// Computes the leftmost leaf of node I in a perfect binary tree.} +\emph{// Computes the leftmost leaf of node I in a perfect binary tree.} uint64_t PBT_LeftmostLeaf(const uint64_t i) { return i & (i + 1); } -\em{// Computes the parent of node I in a left-perfect binary tree of the given SIZE.} +\emph{// Computes the parent of node I in a left-perfect binary tree of the given SIZE.} uint64_t LPBT_Parent(const uint64_t i, const uint64_t size) { assert(i != LPBT_Root(size)); const uint64_t p = PBT_Parent(i); @@ -297,7 +297,7 @@ \section{sec-lpbt}{Traversing left-perfect binary trees} If the outcome falls outside the tree bounds, the recursive definition of left-perfect trees implies that the right child must be the root of the right subtree. \begin{code}[c] -\em{// Computes the right child of node I in a left-perfect binary tree of the given SIZE.} +\emph{// Computes the right child of node I in a left-perfect binary tree of the given SIZE.} uint64_t LPBT_RightChild(const uint64_t i, const uint64_t size) { assert(n & 1); const uint64_t r = PBT_RightChild(i); @@ -308,7 +308,7 @@ \section{sec-lpbt}{Traversing left-perfect binary trees} Alternatively, if \code{PBT_RightChild}'s result falls outside of the tree, we can follow the left links in the ``virtual'' perfect tree until we hit the tree bounds again. \begin{code}[c] -\em{// Computes the right child of node I in a left-perfect binary tree of the given SIZE.} +\emph{// Computes the right child of node I in a left-perfect binary tree of the given SIZE.} uint64_t LPBT_RightChild_Iterative(const uint64_t i, const uint64_t size) { assert(n & 1); uint64_t r; @@ -333,7 +333,7 @@ \section{sec-extensible-segment-trees}{Extensible segment trees} We must find the most significant bit where the leaves differ, set this bit to zero, and set all lower bits. \begin{code}[c] -\em{// Extracts the most significant bit from number N.} +\emph{// Extracts the most significant bit from number N.} uint64_t MostSignificantBit(uint64_t n) { uint64_t x = n; x |= (x >> 1); @@ -345,8 +345,8 @@ \section{sec-extensible-segment-trees}{Extensible segment trees} return x - (x >> 1); } -\em{// Computes the lowest common ancestor of leaves X and Y in a left-perfect} -\em{// binary tree.} +\emph{// Computes the lowest common ancestor of leaves X and Y in a left-perfect} +\emph{// binary tree.} uint64_t LPBT_LeavesLCA(const uint64_t x, const uint64_t y) { assert(!(x & 1)); assert(!(y & 1)); @@ -361,19 +361,19 @@ \section{sec-extensible-segment-trees}{Extensible segment trees} Let's define the data structures representing the tree. \begin{code}[c] -\em{// The type of values stored in the tree nodes.} +\emph{// The type of values stored in the tree nodes.} typedef int64_t value_t; -\em{// The maximum number of nodes in the tree.} +\emph{// The maximum number of nodes in the tree.} const size_t MAX_NODES = 10001; -\em{// The current number of nodes in the tree.} -\em{// Invariant: G_NumNodes <= MAX_NODES} -\em{// Invariant: G_NumNodes == 0 || G_NumNodes \% 2 == 1} +\emph{// The current number of nodes in the tree.} +\emph{// Invariant: G_NumNodes <= MAX_NODES} +\emph{// Invariant: G_NumNodes == 0 || G_NumNodes \% 2 == 1} size_t G_NumNodes; -\em{// The flat in-order representation of the tree.} -\em{// Even elements correspond to the sequence items.} +\emph{// The flat in-order representation of the tree.} +\emph{// Even elements correspond to the sequence items.} value_t G_Nodes[MAX_NODES]; \end{code} @@ -389,7 +389,7 @@ \section{sec-extensible-segment-trees}{Extensible segment trees} It sets the corresponding leaf value and traverses the tree upwards, recomputing the parent node values. \begin{code}[c] -\em{// Updates the sequence to contain the given ITEM at the specified POSITION.} +\emph{// Updates the sequence to contain the given ITEM at the specified POSITION.} void ST_Set(const size_t position, const value_t item) { assert(G_NumNodes > 0); assert(position <= G_NumNodes / 2); @@ -415,7 +415,7 @@ \section{sec-extensible-segment-trees}{Extensible segment trees} In the case of non-empty sequences, the procedure adds a new interior node and a leaf and delegates the rest to the previously defined \code{ST_Set} procedure. \begin{code}[c] -\em{// Appends the given ITEM to the sequence.} +\emph{// Appends the given ITEM to the sequence.} void ST_Append(const value_t item) { assert(G_NumNodes + 2 <= MAX_NODES); @@ -430,8 +430,8 @@ \section{sec-extensible-segment-trees}{Extensible segment trees} The \code{ST_Sum} function computing the sum of sequence items in the specified range is the most involved part of the implementation. It relies on the \code{LPBT_LeavesLCA} function to compute the LCA of the two leaves corresponding to the requested bounds. -It then traverses the tree from the left bound to the LCA node, summing all unaccounted \em{right} subtrees on the way up. -It does the same for the right bound, summing all unaccounted \em{left} subtrees on the path. +It then traverses the tree from the left bound to the LCA node, summing all unaccounted \emph{right} subtrees on the way up. +It does the same for the right bound, summing all unaccounted \emph{left} subtrees on the path. \begin{figure}[grayscale-diagram] \marginnote{mn-lca-sum}{ @@ -442,7 +442,7 @@ \section{sec-extensible-segment-trees}{Extensible segment trees} \end{figure} \begin{code}[c] -\em{// Computes the sum of the sequence items in the index interval \[l, r\].} +\emph{// Computes the sum of the sequence items in the index interval \[l, r\].} value_t ST_Sum(const size_t l, const size_t r) { assert(r * 2 < G_NumNodes); assert(l <= r); @@ -455,8 +455,8 @@ \section{sec-extensible-segment-trees}{Extensible segment trees} const uint64_t lca = LPBT_LeavesLCA(i, j); value_t acc = Combine(G_Nodes[i], G_Nodes[j]); - \em{// Traverse the tree upwards from the left bound and sum up all} - \em{// the right subtrees on the way.} + \emph{// Traverse the tree upwards from the left bound and sum up all} + \emph{// the right subtrees on the way.} while (1) { const uint64_t p = LPBT_Parent(i, G_NumNodes); if (p == lca) break; @@ -465,8 +465,8 @@ \section{sec-extensible-segment-trees}{Extensible segment trees} if (rc != i) acc = Combine(acc, G_Nodes[rc]); i = p; } - \em{// Traverse the tree upwards from the right bound and sum up all} - \em{// the left subtrees on the way.} + \emph{// Traverse the tree upwards from the right bound and sum up all} + \emph{// the left subtrees on the way.} while (1) { const uint64_t p = LPBT_Parent(j, G_NumNodes); if (p == lca) break; diff --git a/posts/23-numeric-tower-fiasco.tex b/posts/23-numeric-tower-fiasco.tex index b458d07..1c1d372 100644 --- a/posts/23-numeric-tower-fiasco.tex +++ b/posts/23-numeric-tower-fiasco.tex @@ -251,7 +251,7 @@ \section{the-functional-design}{The functional design} We don't need to modify existing types when we add new functions operating on numbers. \end{itemize} -To address the boilerplate issue, we'll introduce the numeric type \em{promotion} operation. +To address the boilerplate issue, we'll introduce the numeric type \emph{promotion} operation. When we add two numbers of different types, we convert the simpler type to the more complex one using the previously discussed type conversion functions. We then apply the binary operator dealing with numbers of the same promoted type. Finally, we demote the result to the simplest type that can hold the value. @@ -315,7 +315,7 @@ \section{conclusion}{Conclusion} At first, I felt the same about the numeric tower case, but now I'm sure the problem is not me; it's the class hierarchies. The approach of piling classes on top of one another and trying to make this stack coherent is fundamentally flawed. -It fails spectacularly even on tiny examples where the problem domain is \em{mathematically} specified. +It fails spectacularly even on tiny examples where the problem domain is \emph{mathematically} specified. The numeric tower is one example, but there are more. The \href{https://en.wikipedia.org/wiki/Circle%E2%80%93ellipse_problem}{circle-ellipse problem} is a good one, but my favorite is how inheriting \code{ColorPoint} from \code{Point} breaks the transitivity property of the \code{equals} method: diff --git a/posts/24-ocr.tex b/posts/24-ocr.tex index cd218b7..60671c4 100644 --- a/posts/24-ocr.tex +++ b/posts/24-ocr.tex @@ -11,11 +11,11 @@ \section{abstract}{Abstract} This article is a high-level overview of the Off-Chain Reporting protocol (\sc{ocr}) powering most of \href{https://chain.link/}{Chainlink} products. -The protocol allows a group of \math{n} nodes called \em{oracles}, up to \math{f} of which could be \href{https://en.wikipedia.org/wiki/Byzantine_fault}{byzantine} (\math{f < n⁄3}), to agree on a data point and record it on a blockchain supporting smart contracts (e.g., Ethereum). +The protocol allows a group of \math{n} nodes called \emph{oracles}, up to \math{f} of which could be \href{https://en.wikipedia.org/wiki/Byzantine_fault}{byzantine} (\math{f < n⁄3}), to agree on a data point and record it on a blockchain supporting smart contracts (e.g., Ethereum). \section{components}{Protocol components} -All \sc{ocr} deployments have two parts with different execution models: the on-chain part, implemented as a smart contract called the \em{aggregator}, and the off-chain part, implemented as a peer-to-peer network of oracles. +All \sc{ocr} deployments have two parts with different execution models: the on-chain part, implemented as a smart contract called the \emph{aggregator}, and the off-chain part, implemented as a peer-to-peer network of oracles. The off-chain communication protocol, in turn, consists of three sub-protocols layered on top of one another: the \href{#pacemaker}{pacemaker}, the \href{#report-generation}{report generation}, and the \href{#transmission}{transmission} protocols. \subsection{aggregator-contract}{The aggregator contract} @@ -31,9 +31,9 @@ \subsection{aggregator-contract}{The aggregator contract} \subsection{pacemaker}{Pacemaker} -The \em{pacemaker} algorithm of the \sc{ocr} protocol periodically assigns a node to be a \em{leader} coordinating the rest of the protocol functions. -The period between two consecutive leader assignments is called an \em{epoch}. -Within each epoch, the leader initiates \em{rounds} of the \href{#report-generation}{report generation} algorithm. +The \emph{pacemaker} algorithm of the \sc{ocr} protocol periodically assigns a node to be a \emph{leader} coordinating the rest of the protocol functions. +The period between two consecutive leader assignments is called an \emph{epoch}. +Within each epoch, the leader initiates \emph{rounds} of the \href{#report-generation}{report generation} algorithm. The tuple \math{(e, r)}, where \math{e} is the epoch number and \math{r} is the round number, serves as a logical clock for the protocol. \begin{figure}[grayscale-diagram] @@ -50,7 +50,7 @@ \subsection{pacemaker}{Pacemaker} \item The protocol didn't progress for the configured time amount. \end{enumerate} -The function mapping epoch numbers to leader nodes is a \href{https://crypto.stanford.edu/pbc/notes/crypto/prf.html}{cryptographic pseudo-random function} parameterized by a secret key (called the \em{seed} key) known only to the oracles. +The function mapping epoch numbers to leader nodes is a \href{https://crypto.stanford.edu/pbc/notes/crypto/prf.html}{cryptographic pseudo-random function} parameterized by a secret key (called the \emph{seed} key) known only to the oracles. Thus, oracles know the exact sequence of all leader assignments in advance, but to all outside observers, the assignments are indistinguishable from random. \subsection{report-generation}{Report generation} @@ -58,23 +58,23 @@ \subsection{report-generation}{Report generation} The report generation algorithm produces a data point for the \href{#aggregator-contract}{aggregator contract}. For example, if the aggregator contract records an asset price in \sc{usd}, the algorithm produces a price that faulty oracles can't manipulate. -First, the leader initiates a new round by picking a \em{query} describing the task the followers need to execute and sending it to all the followers. +First, the leader initiates a new round by picking a \emph{query} describing the task the followers need to execute and sending it to all the followers. \begin{figure}[grayscale-diagram,medium-size] \includegraphics{/images/24-report-1.svg} \end{figure} -The followers execute the query (usually by observing an external data source) and send a signed \em{observation} back to the leader. +The followers execute the query (usually by observing an external data source) and send a signed \emph{observation} back to the leader. In the asset price example, the observation will be their guess of the asset price. \begin{figure}[grayscale-diagram,medium-size] \includegraphics{/images/24-report-2.svg} \end{figure} -If the leader receives at least \math{2f+1} observations, it distributes these signed (or \em{attributed}) observations among the followers. +If the leader receives at least \math{2f+1} observations, it distributes these signed (or \emph{attributed}) observations among the followers. \begin{figure}[grayscale-diagram,medium-size] \includegraphics{/images/24-report-3.svg} \end{figure} -Next, the followers distill attributed observations into a \em{report}, sign it, and send it to the leader. +Next, the followers distill attributed observations into a \emph{report}, sign it, and send it to the leader. In the asset price example, the report is the median price. The observation aggregation function must consider any \math{f} out of at least \math{2f+1} observations untrustworthy for the protocol to be byzantine-fault-tolerant. For example, the asset price application must not pick one of the top or bottom \math{f} prices. @@ -82,7 +82,7 @@ \subsection{report-generation}{Report generation} \includegraphics{/images/24-report-4.svg} \end{figure} -If at least \math{f+1} followers signed the same report, the leader aggregates the report and the signatures into an \em{attested} report and transmits it to all the followers. +If at least \math{f+1} followers signed the same report, the leader aggregates the report and the signatures into an \emph{attested} report and transmits it to all the followers. \begin{figure}[grayscale-diagram,medium-size] \includegraphics{/images/24-report-5.svg} \end{figure} @@ -125,4 +125,4 @@ \section{resources}{Resources} \item In the \href{https://youtu.be/XKiLkmwVaYA}{Looking under the hood of \sc{ocr} 2.0} video, Lorenz Breidenbach explains the protocol evolution and the plugin architecture. \item In the \href{https://youtu.be/VPVH3QCwc0U}{\sc{ocr3} protocol overview} video, Chrysa Stathakopoulou outlines the protocol structure and mentions features added in its third iteration. \end{itemize} -\end{document} \ No newline at end of file +\end{document} diff --git a/posts/25-domain-types.tex b/posts/25-domain-types.tex index 064f019..98c1c55 100644 --- a/posts/25-domain-types.tex +++ b/posts/25-domain-types.tex @@ -26,8 +26,8 @@ \section{intro}{Introduction} }{John Ousterhout, ``A Philosophy of Software Design'', Chapter 14, ``Choosing names''} The author attributes the bug to poor variable naming, but this blame is misplaced. -If the programmer had defined \em{distinct types} for logical and physical block numbers, the compiler would have caught this mistake immediately. -In this article, I call such definitions \em{domain types}. +If the programmer had defined \emph{distinct types} for logical and physical block numbers, the compiler would have caught this mistake immediately. +In this article, I call such definitions \emph{domain types}. They serve as documentation, help catch bugs at compile time, and make the code more secure\sidenote{sn-secure-by-design}{ The book \href{https://www.manning.com/books/secure-by-design}{Secure by Design} by Dan Bergh Johnsson et al. provides many examples of using domain types for building a secure system from the ground up. }. @@ -38,7 +38,7 @@ \section{language-features}{Language features} Many languages provide syntax for simplifying domain type definitions. Such definitions create a new distinct type sharing representation with a chosen underlying type (e.g., a 64-bit integer). -The semantics of such definitions vary across languages, but they usually fall into one of two categories: \em{newtypes} and \em{typedefs}. +The semantics of such definitions vary across languages, but they usually fall into one of two categories: \emph{newtypes} and \emph{typedefs}. \subsection{newtypes}{Newtypes} @@ -52,7 +52,7 @@ \subsection{newtypes}{Newtypes} Some third-party packages, such as \href{https://crates.io/crates/derive_more}{\code{derive\_more}}, make this task easier. } \begin{code} -\em{/// The number of standard SI apples.} +\emph{/// The number of standard SI apples.} #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] struct MetricApples(i64); @@ -77,7 +77,7 @@ \subsection{typedefs}{Typedefs} Typedefs inherit all operations from the underlying type, even those meaningless for the new type. } \begin{code} -\em{/// MetricApples hold the number of standard SI apples.} +\emph{/// MetricApples hold the number of standard SI apples.} type MetricApples int64 func main() { @@ -91,7 +91,7 @@ \subsection{typedefs}{Typedefs} \end{code} \end{figure} -\href{https://go.dev/ref/spec#Type_definitions}{Go}, \href{https://dlang.org/library/std/typecons/typedef.html}{D} and \href{https://en.wikibooks.org/wiki/Ada_Programming/Type_System#Derived_types}{Ada} provide typedefs (Ada calls typedefs \em{derived types}). +\href{https://go.dev/ref/spec#Type_definitions}{Go}, \href{https://dlang.org/library/std/typecons/typedef.html}{D} and \href{https://en.wikibooks.org/wiki/Ada_Programming/Type_System#Derived_types}{Ada} provide typedefs (Ada calls typedefs \emph{derived types}). The \href{https://www.boost.org/}{Boost} project for C++ implements \href{https://www.boost.org/doc/libs/1_61_0/libs/serialization/doc/strong_typedef.html}{typedefs} as a library (C's \href{https://en.cppreference.com/w/c/language/typedef}{typedef declarations} are ``weak typedefs'': they introduce an alias for an existing type, not a new type). @@ -115,30 +115,30 @@ \section{domain-type-classes}{Domain type classes} } \begin{code} trait DomainType { - \em{/// The primitive type representing the domain value.} + \emph{/// The primitive type representing the domain value.} type Representation; \label{representation-type} - \em{/// Creates a domain value from its representation value.} + \emph{/// Creates a domain value from its representation value.} fn from_repr(repr: Representation) -> Self; - \em{/// Extracts the representation value from the domain value.} + \emph{/// Extracts the representation value from the domain value.} fn to_repr(self) -> Representation; } \end{code} \end{figure} -The code snippets present \em{minimal} interfaces for each type class. +The code snippets present \emph{minimal} interfaces for each type class. Practical concerns often require adding more operations. For example, using identifiers as keys in a dictionary requires exposing a hash function (for hash maps) or imposing an ordering (for search trees), and serializing values requires accessing their internal representation. \subsection{identifiers}{Identifiers} One of the most common uses of domain types is a transparent handle for an entity or an asset in the real world, such as a customer identifier in an online store or an employee number in a payroll application. -I call these types \em{identifiers}. +I call these types \emph{identifiers}. Identifiers have no structure, i.e., we don't care about their internal representation. The only fundamental requirement is the ability to compare values of those types for equality. -This lack of structure suggests an appropriate mathematical model for such types: a \em{set}, a collection of distinct objects. +This lack of structure suggests an appropriate mathematical model for such types: a \emph{set}, a collection of distinct objects. \begin{figure} \marginnote{mn-identifiers-interface}{ @@ -146,7 +146,7 @@ \subsection{identifiers}{Identifiers} } \begin{code} trait Eq { - \em{/// Returns true if two values are equal.} + \emph{/// Returns true if two values are equal.} fn eq(&self, other: &Self) -> bool; } @@ -181,27 +181,27 @@ \subsection{amounts}{Amounts} } \begin{code} trait Ord: Eq { - \em{/// Compares two values.} + \emph{/// Compares two values.} fn cmp(&self, other: &Self) -> Ordering; } trait VectorSpace { - \em{/// The scalar type is usually the same as the \href{#representation-type}{Representation} type.} + \emph{/// The scalar type is usually the same as the \href{#representation-type}{Representation} type.} type Scalar; - \em{/// Returns the additive inverse of the value.} + \emph{/// Returns the additive inverse of the value.} fn neg(self) -> Self; - \em{/// Adds two vectors.} + \emph{/// Adds two vectors.} fn add(self, other: Self) -> Self; - \em{/// Subtracts the other vector from self.} + \emph{/// Subtracts the other vector from self.} fn sub(self, other: Self) -> Self; - \em{/// Multiplies the vector by a scalar.} + \emph{/// Multiplies the vector by a scalar.} fn mul(self, factor: Scalar) -> Self; - \em{/// Divides the vector by a scalar.} + \emph{/// Divides the vector by a scalar.} fn div(self, factor: Scalar) -> Self; } @@ -226,10 +226,10 @@ \subsection{Loci}{Loci} We can compare, order, and subtract them to compute the distance between two points. For example, subtracting 5 am on Friday from 3 am on Saturday gives us twenty-two hours. Adding or multiplying these dates makes no sense, however. -This semantic demands a new class of types, \em{loci} (plural of \em{locus}). +This semantic demands a new class of types, \emph{loci} (plural of \emph{locus}). One example of the locus/distance dichotomy coming from system programming is the memory address arithmetic. -Low-level programming languages differentiate \em{pointers} (memory addresses) and \em{offsets} (distances between addresses). +Low-level programming languages differentiate \emph{pointers} (memory addresses) and \emph{offsets} (distances between addresses). In the C programming language, the \code{void*} type represents a memory address, and the \code{ptrdiff\_t} type represents an offset. Subtracting two pointers gives an offset, but adding or multiplying pointers is meaningless. @@ -242,25 +242,25 @@ \subsection{Loci}{Loci} } \begin{code} trait LocusLike: IdentifierLike + Ord { - \em{/// The type representing the distance between two positions.} + \emph{/// The type representing the distance between two positions.} type Distance: AmountLike; - \em{/// The origin for the absolute coordinate system.} + \emph{/// The origin for the absolute coordinate system.} const ORIGIN: Self; - \em{/// Moves the point away from the origin by the specified distance.} + \emph{/// Moves the point away from the origin by the specified distance.} fn add(self, other: Distance) -> Self; - \em{/// Returns the distance between two points.} + \emph{/// Returns the distance between two points.} fn sub(self, other: Self) -> Distance; } \end{code} \end{figure} Timestamps offer an excellent demonstration of the ``distance type + the origin'' concept. -Go and Rust represent timestamps as a number of \em{nanoseconds} passed from the \sc{unix} epoch (midnight of January 1st, 1970), -The C programming language defines the \href{https://en.cppreference.com/w/c/chrono/time_t}{\code{time\_t}} type, which is almost always the number of \em{seconds} from the \sc{unix} epoch. -The \href{https://en.wikipedia.org/wiki/Q_(programming_language_from_Kx_Systems)}{q programming language} also uses nanoseconds, but \href{https://code.kx.com/q4m3/2_Basic_Data_Types_Atoms/#253-date-time-types}{chose the \em{millennium}} (midnight of January 1st, 2000) as its origin point. +Go and Rust represent timestamps as a number of \emph{nanoseconds} passed from the \sc{unix} epoch (midnight of January 1st, 1970), +The C programming language defines the \href{https://en.cppreference.com/w/c/chrono/time_t}{\code{time\_t}} type, which is almost always the number of \emph{seconds} from the \sc{unix} epoch. +The \href{https://en.wikipedia.org/wiki/Q_(programming_language_from_Kx_Systems)}{q programming language} also uses nanoseconds, but \href{https://code.kx.com/q4m3/2_Basic_Data_Types_Atoms/#253-date-time-types}{chose the \emph{millennium}} (midnight of January 1st, 2000) as its origin point. Changing the distance type (e.g., seconds to nanoseconds) or the origin (e.g., \sc{unix} epoch to the millennium) calls for a different timestamp type. The Go standard library employs the locus type design for its \href{https://pkg.go.dev/time}{\code{time}} package, differentiating the time instant (\href{https://pkg.go.dev/time#Time}{\code{time.Time}}) and time duration (\href{https://pkg.go.dev/time#Duration}{\code{time.Duration}}). @@ -278,15 +278,15 @@ \subsection{quantities}{Quantities} We can model complex type interactions using methods of \href{https://en.wikipedia.org/wiki/Dimensional_analysis}{dimensional analysis}. If we view \href{#amounts}{amounts} as values with an attached label identifying their unit, then our new types are a natural extension demanding a more structured label equivalent to a vector of base units raised to rational powers. For example, acceleration would have label \math{(distance \times time\sup{-2})}, and the \sc{usd}/\sc{eur} \href{https://en.wikipedia.org/wiki/Currency_pair}{pair} exchange rate would have label \math{(eur \times usd\sup{-1})}. -I call types with such rich label structure \em{quantities}. +I call types with such rich label structure \emph{quantities}. Quantities are a proper extension of amounts: addition, subtraction, and scalar multiplication work the same way, leaving the label structure untouched. The additional label structure gives meaning to multiplication and division. -The result of multiplication will have a base unit vector with the component-wise \em{sum} of the power vectors of the unit factors. +The result of multiplication will have a base unit vector with the component-wise \emph{sum} of the power vectors of the unit factors. For example, car fuel consumption computation could use an expression like \math{2 (km) \times 0.05 (liter \times km\sup{-1}) = 0.1 (liter)}. -Dividing values produces a label that's a component-wise \em{difference} between the dividend and divisor power vectors. +Dividing values produces a label that's a component-wise \emph{difference} between the dividend and divisor power vectors. For example, running pace computation could use an expression like \math{10 (min) / 2 (km) = 5 (min \times km\sup{-1})}. \begin{figure} @@ -295,11 +295,11 @@ \subsection{quantities}{Quantities} } \begin{code} trait QuantityLike: AmountLike { - \em{/// Multiplies two quantities.} + \emph{/// Multiplies two quantities.} fn mul>(self, other: O) -> impl QuantityLike>; - \em{/// Divides self by the specified quantity.} + \emph{/// Divides self by the specified quantity.} fn div>(self, other: O) -> impl QuantityLike>; } @@ -343,4 +343,4 @@ \section{exercises}{Exercises} I'll gladly add your gem to this article with a proper attribution. \end{enumerate} -\end{document} \ No newline at end of file +\end{document} diff --git a/posts/26-good-names-form-galois-connections.tex b/posts/26-good-names-form-galois-connections.tex index 59b9a56..12ec189 100644 --- a/posts/26-good-names-form-galois-connections.tex +++ b/posts/26-good-names-form-galois-connections.tex @@ -31,7 +31,7 @@ There are many books with naming heuristics ranging from classics such as ``\href{https://www.goodreads.com/book/show/4845.Code_Complete}{Code Complete}'' by Steve McConnell and ``\href{https://www.goodreads.com/book/show/3735293-clean-code}{Clean Code}'' by Robert C. Martin to more recent ``\href{https://www.goodreads.com/book/show/123009242-naming-things}{Naming things.}'' by Tom Benner. }, but it left me unsatisfied. -I felt that I already knew \em{how} to choose good names, but I wanted to have a good mathematical model for what good names \em{are}. +I felt that I already knew \emph{how} to choose good names, but I wanted to have a good mathematical model for what good names \emph{are}. This article is the result of my exploration. \section{objects-concepts-expressions}{Objects, concepts, and expressions} @@ -56,9 +56,9 @@ \section{objects-concepts-expressions}{Objects, concepts, and expressions} Words are arbitrary symbols representing concepts. I'll borrow terminology from \href{https://link.springer.com/book/10.1007/978-94-007-1736-7}{Tretise on Intuitionistic Type Theory} and use -\em{object} to refer to things existing in the world (we won't need them much, though), -\em{concept} to refer to mental representations of these objects, -and \em{expression} to refer to words or sentences used to communicate concepts. +\emph{object} to refer to things existing in the world (we won't need them much, though), +\emph{concept} to refer to mental representations of these objects, +and \emph{expression} to refer to words or sentences used to communicate concepts. \begin{figure}[grayscale-diagram] \marginnote{mn-object-concept-expression}{ @@ -83,7 +83,7 @@ \section{maps-of-meaning}{Maps of meaning} \includegraphics{/images/26-concepts-words.svg} \end{figure} -We must add another ingredient to the picture to tackle the problem of names: the \em{context}. +We must add another ingredient to the picture to tackle the problem of names: the \emph{context}. Context refers to all the other things that influence the interpretation of an expression: surrounding words in the sentence, the task the reader is trying to accomplish, and the reader's culture and background. The amount of information in the context is vast as it includes the strengths of all the neuron connections in the reader's brain. @@ -112,7 +112,7 @@ \section{channel}{Names as communication channels} In information theory, a channel is a theoretical construction that allows sending messages through time and space. A channel takes messages from a set \math{X} as inputs and produces messages from a set \math{Y} as outputs. -Channels are \em{noisy}: they distort the messages passing them; that's why the channel's input and output sets might differ. +Channels are \emph{noisy}: they distort the messages passing them; that's why the channel's input and output sets might differ. We represent the noise mathematically by using \href{https://en.wikipedia.org/wiki/Conditional_probability}{conditional probabilities}: \math{p(y | x)} is the probability of receiving message \math{y} if the sender sent message \math{x}. When you think of a channel, you probably imagine a cable transmitting electrical signals or how the space around you buzzes with electromagnetic waves. @@ -170,7 +170,7 @@ \section{names-as-galois-connections}{Names as Galois connections} We start with the sets in question and their orderings. Our first set is the set of expressions \math{E}. -For expressions \math{e\sub{1}, e\sub{2} \in E}, we say that \math{e\sub{1} \leq e\sub{2}} if \math{e\sub{1}} at least as expressive as \math{e\sub{2}} \em{given the reader context}. +For expressions \math{e\sub{1}, e\sub{2} \in E}, we say that \math{e\sub{1} \leq e\sub{2}} if \math{e\sub{1}} at least as expressive as \math{e\sub{2}} \emph{given the reader context}. If the expressions are equally expressive, we define the shorter element as smaller. The reader context is a crucial component of the \math{\leq} operator. For example, ``cat'' is more expressive than ``felis catus'' in a regular conversation, but it might be the other way around in a scientific paper. @@ -231,4 +231,4 @@ \section{related-work}{Related work} ``\href{https://teamscale.com/hubfs/26978363/Publications/2005-concise-and-consistent-naming.pdf}{Concise and Consistent Naming}'' by Florian Deißenböck and Markus Pizka provides a similar, though slightly simpler, formal model of variable names in section 3.2. -\end{document} \ No newline at end of file +\end{document} diff --git a/posts/27-extending-https-outcalls.tex b/posts/27-extending-https-outcalls.tex index a46359c..f42e1a0 100644 --- a/posts/27-extending-https-outcalls.tex +++ b/posts/27-extending-https-outcalls.tex @@ -33,7 +33,7 @@ \section{https-outcalls-overview}{HTTPS outcalls in a nutshell} First, the canister sends a message to the management canister that includes the \sc{https} request payload and the \href{https://internetcomputer.org/docs/current/references/https-outcalls-how-it-works#transformation-function}{transform callback function}. The management canister includes this request in a dedicated queue in the node's replicated state. -A background process independent from the replicated state machine called \em{adapter} periodically inspects the request queue and executes requests from the queue. +A background process independent from the replicated state machine called \emph{adapter} periodically inspects the request queue and executes requests from the queue. Each replica has an independent instance of the adapter process. \begin{figure}[grayscale-diagram,p75] @@ -68,14 +68,14 @@ \section{extending-https-outcalls}{Extending HTTPS outcalls} The \sc{ocr} protocol defines three stages: \begin{enumerate} \item - In the \em{query} stage, the participants receive a task to observe an external data source. + In the \emph{query} stage, the participants receive a task to observe an external data source. This stage is implicit in \sc{https} outcalls: instead of the protocol leader initiating the query, a canister triggers a query using the system interface. \item - In the \em{observation} stage, each node observes the data source, signs its observation, and sends it over the network. + In the \emph{observation} stage, each node observes the data source, signs its observation, and sends it over the network. The \sc{ic} implements this step through the adapter process discussed in the previous section and the consensus algorithm. The adapter executes an \sc{https} request and filters it through the calling canister's transformation function. The transformation result is the observation. - \item In the \em{report} stage, the network aggregates participant observations into the final report. + \item In the \emph{report} stage, the network aggregates participant observations into the final report. This stage is hard-coded in the \sc{ic} consensus protocol. If \math{2f + 1} nodes observed the same \sc{http} response, its value becomes the report. \end{enumerate} @@ -104,7 +104,7 @@ \subsection{multi-https-outcalls}{Multi-HTTP outcalls} } \begin{code}[candid] service ic : { - \em{// \ldots} + \emph{// \ldots} multi_http_request : (http_request_args) -> (vec http_request_result); }; \end{code} @@ -153,7 +153,7 @@ \subsection{aggregation-callbacks}{Faulty design: aggregation callbacks} Unfortunately, this approach doesn't work. The problem is that we cannot guarantee that different nodes will see the same subset of responses. -Each healthy node in the network of \math{3f + 1} nodes will see responses from \em{some} other nodes (at least \math{2f + 1}), but the exact subset might differ for each node. +Each healthy node in the network of \math{3f + 1} nodes will see responses from \emph{some} other nodes (at least \math{2f + 1}), but the exact subset might differ for each node. Different observation subsets will lead to unequal aggregated reports, and the system might fail to reach consensus. The \sc{ocr} protocol solves this issue by electing a leader node that picks the subset of observations and distributes it to the followers. @@ -188,7 +188,7 @@ \section{price-feeds}{Use-case: price feeds} Since the block space is precious, the \code{ExamplePriceResponse} structure restricts the response contents to the fields we need to construct the report. \begin{code}[rust] -\em{/// The format of response we get from the example price feed JSON API.} +\emph{/// The format of response we get from the example price feed JSON API.} #[derive(serde::Serialize, serde::Deserialize, Debug)] struct ExamplePriceResponse { price: f64, @@ -272,7 +272,7 @@ \section{price-feeds}{Use-case: price feeds} price: median_price, timestamp_seconds: median_ts, } -} \em{// end of observe_icp_price} +} \emph{// end of observe_icp_price} \end{code} \section{conclusion}{Conclusion} @@ -281,4 +281,4 @@ \section{conclusion}{Conclusion} Unfortunately, the current implementation is limited to use cases of deterministic \sc{http} responses. This article explored how to lift this limitation by taking inspiration from the \sc{ocr} protocol and including all the \sc{http} request versions to the requesting canister. -\end{document} \ No newline at end of file +\end{document} diff --git a/posts/28-enlightenmentware.tex b/posts/28-enlightenmentware.tex index a55059d..a34dde5 100644 --- a/posts/28-enlightenmentware.tex +++ b/posts/28-enlightenmentware.tex @@ -16,7 +16,7 @@ Most of them can barely get the job done. But occasionally, we discover a piece of software that transcends mere utility. These tools capture our imagination, open new possibilities, and affect how we design our own systems. -I call such software \em{enlightenmentware}. +I call such software \emph{enlightenmentware}. The most common source of enlightenment for programmers is the programming language they use at work or learn as a hobby. I experienced many jolts of enlightenment from fiddling with programming languages, from \href{https://en.wikipedia.org/wiki/Microsoft_Macro_Assembler}{\sc{masm}} and \href{https://en.wikipedia.org/wiki/C_(programming_language)}{C} to \href{https://en.wikipedia.org/wiki/Prolog}{Prolog} and \href{https://www.idris-lang.org/}{Idris}. @@ -35,10 +35,10 @@ \section{unix}{UNIX} } I started looking for my first real programming job around 2008, while studying at university in my hometown of Nizhny Novgorod. -Almost all the open positions required knowledge of mysterious things called \sc{unix} and \em{sockets}. +Almost all the open positions required knowledge of mysterious things called \sc{unix} and \emph{sockets}. My curriculum didn't offer a course on \sc{unix} or operating systems in general, so I decided to get a textbook and master the topic myself. -``\href{https://www.goodreads.com/book/show/22066650-unix}{The \sc{unix} Operating System}'' by Andrey Robachevsky et al., also known as the \em{turtle book} in Russia because of its cover, introduced me to the magical world of \sc{unix}-like operating systems. +``\href{https://www.goodreads.com/book/show/22066650-unix}{The \sc{unix} Operating System}'' by Andrey Robachevsky et al., also known as the \emph{turtle book} in Russia because of its cover, introduced me to the magical world of \sc{unix}-like operating systems. \sc{unix} became something I could understand, explore, and programmatically interact with. All pieces of the puzzle---the filesystem interface, the process model with environments and permissions, forking, sockets, and signals---fell into place and revealed a coherent, beautiful picture. @@ -69,7 +69,7 @@ \section{git}{Git} } I encountered version control systems in early 2009; the company I worked for used \href{https://en.wikipedia.org/wiki/IBM_Rational_ClearCase}{Rational ClearCase} to manage their code. -The system versioned each file separately and relied on large configuration files---\em{config specs}---to construct a consistent snapshot of the source tree. +The system versioned each file separately and relied on large configuration files---\emph{config specs}---to construct a consistent snapshot of the source tree. The tool was utterly confusing and intimidating, so I avoided dealing with it beyond the minimal requirements of my job. About a year later, I joined a shop that used \href{https://subversion.apache.org/}{Subversion}. @@ -92,7 +92,7 @@ \section{git}{Git} Git removed the friction from using version control; there was no excuse not to version anything of value anymore. Merging branches with Git didn't cause anxiety disorders. -The staging area---confusingly named \em{index}---became essential to my workflows. +The staging area---confusingly named \emph{index}---became essential to my workflows. But my favorite feature was the breathtaking beauty of Git's design, the elegant mix of distributed systems, acyclic graphs, and content-addressed storage. Learning about Git's internals was so much fun that I became interested in the bits and bolts of other version control systems. @@ -126,7 +126,7 @@ \section{emacs}{Emacs} Almost everyone in our group used \href{https://en.wikipedia.org/wiki/NEdit}{NEdit} to edit the code. One day, I noticed a person whose editor looked markedly different from everyone else's; the background was dark, and the code glowed with bright colors. To me, that was a sign of their superior technical knowledge. -I \em{needed} to learn how to tweak my editor. +I \emph{needed} to learn how to tweak my editor. The quest for customization led me to \href{https://www.vim.org/}{Vim} (the workstation had Vim 6 installed out of the box). After all, if the goal is to stand out from the crowd, why stop at the color scheme? @@ -220,7 +220,7 @@ \section{bazel}{Bazel} I heard about Google's internal build tool, \code{blaze}, and couldn't wait to lay my hands on it. Surprisingly, I didn't need to fiddle with \code{blaze}, nor did I have to understand how it worked. I could copy some build targets and edit the dependency list, and the build worked as expected. -\code{blaze} made correct and fast builds not just easy, but \em{boring} in the good sense. +\code{blaze} made correct and fast builds not just easy, but \emph{boring} in the good sense. Only a few years later, when I attempted to use \href{https://bazel.build/}{Bazel}---the open-source version of \code{blaze}---for a toy personal project, did I have to understand the underlying model. Bazel was the final piece of the puzzle, together with Haskell's typeclasses, \href{https://research.google/pubs/flumejava-easy-efficient-data-parallel-pipelines/}{Flume pipelines} interface, and the \href{https://www.tensorflow.org/}{TensorFlow} 1.0 execution model, that made me understand the ubiquitous plan-execute pattern\sidenote{sn-build-systems-a-la-carte}{ @@ -228,7 +228,7 @@ \section{bazel}{Bazel} Thomas Leonard's \href{https://roscidus.com/blog/blog/2019/11/14/cicd-pipelines}{CI/CD pipelines: Monad, Arrow or Dart?} blog post is also a great read on this topic. }. Bazel build file is a program that constructs a slice of the build artifact graph. -Bazel rules don't \em{run} the build commands; they \em{declare} how to transform inputs into outputs, and the Bazel engine figures out the rest. +Bazel rules don't \emph{run} the build commands; they \emph{declare} how to transform inputs into outputs, and the Bazel engine figures out the rest. My relationship with the tool reached true intimacy when I helped \href{/posts/17-scaling-rust-builds-with-bazel.html}{transition \sc{dfinity}'s build system to Bazel}. Despite all the challenges I faced on the way, Bazel is still my favorite build system. @@ -249,7 +249,7 @@ \section{conclusion}{Conclusion} \item They are ``round'': they pack the most volume in the smallest surface area. \sc{unix} surface area is tiny, but it unlocks much power. - Emacs and Git are all over the place, but their \em{core} is small, sweet, and easy to appreciate. + Emacs and Git are all over the place, but their \emph{core} is small, sweet, and easy to appreciate. \item They invite and encourage you to explore their internals. It's not only about being free and open-source; mastering them is also well worth the investment. @@ -257,4 +257,4 @@ \section{conclusion}{Conclusion} What's your enlightenmentware? Tell me on \href{https://news.ycombinator.com/item?id=40419856}{Hacker News} or \href{https://www.reddit.com/r/programming/comments/1cwa1m8/blog_post_enlightenmentware/}{Reddit}! -\end{document} \ No newline at end of file +\end{document}