From 230f0ff0080e5bf60ba77873ec2e973ab246756b Mon Sep 17 00:00:00 2001
From: dyma solovei <dmytro.y.solovei@gmail.com>
Date: Sat, 24 Feb 2024 17:25:35 +0100
Subject: [PATCH] feat: decode v3.0 notebooks

Prior to v4.0:
- top-level 'worksheets' contained multiple worksheets with the actual 'cells'
- execution_results was called pyout
- error output was called pyerr
- code cell 'source' was called 'input'; execution_count was called prompt_number
- mime-bundle explicitly defined keys for all mime-tyipes which it supported and
had to be decoded differently

BREAKING: decode.Decoder interface not inlcudes ExtractCells method to handle the
deprecation of top-level 'worksheets'
---
 decode/decode.go          |  18 +-
 decode/decode_test.go     | 356 +++++++++++++++++++++++++++++++++++-
 schema/common/notebook.go |   1 -
 schema/v3/schema.go       | 372 ++++++++++++++++++++++++++++++++++++++
 schema/v4/schema.go       |  14 +-
 version.go                |   1 +
 6 files changed, 753 insertions(+), 9 deletions(-)
 create mode 100644 schema/v3/schema.go

diff --git a/decode/decode.go b/decode/decode.go
index 2db3aa9..ce59ecd 100644
--- a/decode/decode.go
+++ b/decode/decode.go
@@ -44,8 +44,13 @@ func (n *notebook) UnmarshalJSON(data []byte) error {
 		return fmt.Errorf("%s: notebook metadata: %w", ver, err)
 	}
 
-	n.cells = make([]schema.Cell, len(n.Notebook.Cells))
-	for i, raw := range n.Notebook.Cells {
+	cells, err := d.ExtractCells(data)
+	if err != nil {
+		return fmt.Errorf("%s: extract cells: %w", ver, err)
+	}
+
+	n.cells = make([]schema.Cell, len(cells))
+	for i, raw := range cells {
 		c := cell{meta: meta, decoder: d}
 		if err := json.Unmarshal(raw, &c); err != nil {
 			return fmt.Errorf("%s: %w", ver, err)
@@ -78,7 +83,16 @@ func (c *cell) UnmarshalJSON(data []byte) error {
 // Decoder implementations are version-aware and decode cell contents and metadata
 // based on the respective JSON schema definition.
 type Decoder interface {
+	// ExtractCells accesses the array of notebook cells.
+	//
+	// Prior to v4.0 cells were not a part of the top level structure,
+	// and were contained in "worksheets" instead.
+	ExtractCells(data []byte) ([]json.RawMessage, error)
+
+	// DecodeMeta decodes version-specific metadata.
 	DecodeMeta(data []byte) (schema.NotebookMetadata, error)
+
+	// DecodeCell decodes raw cell data to a version-specific implementation.
 	DecodeCell(v map[string]interface{}, data []byte, meta schema.NotebookMetadata) (schema.Cell, error)
 }
 
diff --git a/decode/decode_test.go b/decode/decode_test.go
index 2d55660..7279145 100644
--- a/decode/decode_test.go
+++ b/decode/decode_test.go
@@ -6,6 +6,7 @@ import (
 
 	"github.com/bevzzz/nb/schema"
 	"github.com/bevzzz/nb/schema/common"
+	_ "github.com/bevzzz/nb/schema/v3"
 	_ "github.com/bevzzz/nb/schema/v4"
 
 	"github.com/bevzzz/nb/decode"
@@ -94,9 +95,24 @@ func TestDecodeBytes(t *testing.T) {
 						{"cell_type": "markdown", "metadata": {}, "source": []},
 						{"cell_type": "markdown", "metadata": {}, "source": []}
 					]
-				}`,
+					}`,
 				nCells: 2,
 			},
+			{
+				name: "v3.0",
+				json: `{
+					"nbformat": 3, "nbformat_minor": 0, "metadata": {}, "worksheets": [
+						{"cells": [
+							{"cell_type": "markdown", "metadata": {}, "source": []},
+							{"cell_type": "markdown", "metadata": {}, "source": []}
+						]},
+						{"cells": [
+							{"cell_type": "markdown", "metadata": {}, "source": []}
+						]}
+					]
+				}`,
+				nCells: 3,
+			},
 		} {
 			t.Run(tt.name, func(t *testing.T) {
 				nb, err := decode.Bytes([]byte(tt.json))
@@ -224,6 +240,21 @@ func TestDecodeBytes(t *testing.T) {
 					Data:     []byte("base64-encoded-image-data"),
 				},
 			},
+			{
+				name: "v3.0: no explicit mime-type",
+				json: `{
+					"nbformat": 3, "nbformat_minor": 0, "metadata": {}, "worksheets": [
+						{"cells": [
+							{"cell_type": "raw", "source": ["sometimes you just want to rawdog sqweel"]}
+						]}
+					]
+				}`,
+				want: WithAttachments{Cell: Cell{
+					Type:     schema.Raw,
+					MimeType: common.PlainText,
+					Text:     []byte("sometimes you just want to rawdog sqweel"),
+				}},
+			},
 		} {
 			t.Run(tt.name, func(t *testing.T) {
 				nb, err := decode.Bytes([]byte(tt.json))
@@ -259,7 +290,8 @@ func TestDecodeBytes(t *testing.T) {
 						{
 							"cell_type": "code", "execution_count": 5,
 							"source": ["print('Hi, mom!')"],  "outputs": [
-								{"output_type": "stream"}, {"output_type": "stream"}
+								{"output_type": "stream", "name": "stdout"},
+								{"output_type": "stream", "name": "stderr"}
 							]
 						}
 					]
@@ -275,6 +307,32 @@ func TestDecodeBytes(t *testing.T) {
 					OutputLen:      2,
 				},
 			},
+			{
+				name: "v3.0",
+				json: `{
+					"nbformat": 3, "nbformat_minor": 0, "metadata": {}, "worksheets": [
+						{"cells": [
+							{
+								"cell_type": "code", "language": "javascript", "prompt_number": 5,
+								"input": ["print('Hi, mom!')"],  "outputs": [
+									{"output_type": "stream", "stream": "stdout"}, 
+									{"output_type": "stream", "stream": "stderr"}
+								]
+							}
+						]}
+					]
+				}`,
+				want: outcome{
+					Cell: Cell{
+						Type:     schema.Code,
+						MimeType: "application/x-python", // FIXME: expect language-specific mime-type
+						Text:     []byte("print('Hi, mom!')"),
+					},
+					Language:       "javascript",
+					ExecutionCount: 5,
+					OutputLen:      2,
+				},
+			},
 		} {
 			t.Run(tt.name, func(t *testing.T) {
 				nb, err := decode.Bytes([]byte(tt.json))
@@ -321,6 +379,28 @@ func TestDecodeBytes(t *testing.T) {
 					}},
 				},
 			},
+			{
+				name: "v3.0: stream output to stdout",
+				json: `{
+					"nbformat": 3, "nbformat_minor": 0, "metadata": {}, "worksheets": [
+						{"cells": [
+							{"cell_type": "code", "outputs": [
+								{
+									"output_type": "stream", "stream": "stdout",
+									"text": ["$> ls\n", ".\n", "..\n", "nb/"]
+								}
+							]}
+						]}		
+					]
+				}`,
+				want: []output{
+					{Cell: Cell{
+						Type:     schema.Stream,
+						MimeType: common.Stdout,
+						Text:     []byte("$> ls\n.\n..\nnb/"),
+					}},
+				},
+			},
 			{
 				name: "v4.4: stream output to stderr",
 				json: `{
@@ -342,6 +422,28 @@ func TestDecodeBytes(t *testing.T) {
 					}},
 				},
 			},
+			{
+				name: "v3.0: stream output to stderr",
+				json: `{
+					"nbformat": 3, "nbformat_minor": 0, "metadata": {}, "worksheets": [
+						{"cells": [
+							{"cell_type": "code", "outputs": [
+								{
+									"output_type": "stream", "stream": "stderr",
+									"text": ["KeyError: ", "dict['unknown key']"]
+								}
+							]}
+						]}
+					]
+				}`,
+				want: []output{
+					{Cell: Cell{
+						Type:     schema.Stream,
+						MimeType: common.Stderr,
+						Text:     []byte("KeyError: dict['unknown key']"),
+					}},
+				},
+			},
 			{
 				name: "v4.4: stream output to unrecognized target",
 				json: `{
@@ -371,13 +473,13 @@ func TestDecodeBytes(t *testing.T) {
 						{"cell_type": "code", "outputs": [
 							{"output_type": "display_data", "metadata": {},
 								"data": {
-									"image/png":  "base64-encoded-png-image",
+									"image/png": "base64-encoded-png-image",
 									"text/plain": "<Figure size 640x480 with 1 Axes>"
 								}
 							},
 							{"output_type": "display_data", "metadata": {},
 								"data": {
-									"image/jpeg":  "base64-encoded-jpeg-image",
+									"image/jpeg": "base64-encoded-jpeg-image",
 									"text/plain": "<Figure size 100x500 with 2 Axes>"
 								}
 							},
@@ -407,6 +509,93 @@ func TestDecodeBytes(t *testing.T) {
 					}},
 				},
 			},
+			{
+				name: "v3.0: display_data output different recognized formats",
+				json: `{
+					"nbformat": 3, "nbformat_minor": 0, "metadata": {}, "worksheets": [
+						{"cells": [
+							{"cell_type": "code", "outputs": [
+								{"output_type": "display_data", "metadata": {},
+									"png": ["base64-encoded-png-image"],
+									"text": ["<Figure size 640x480 with 1 Axes>"]
+								},
+								{"output_type": "display_data", "metadata": {},
+									"jpeg": ["base64-encoded-jpeg-image"],
+									"text": ["<Figure size 100x500 with 2 Axes>"]
+								},
+								{"output_type": "display_data", "metadata": {},
+									"html": ["<img />"]
+								},
+								{"output_type": "display_data", "metadata": {},
+									"svg": ["<svg />"]
+								},
+								{"output_type": "display_data", "metadata": {},
+									"javascript": ["[,,,].length"]
+								},
+								{"output_type": "display_data", "metadata": {},
+									"json": ["{\"foo\": \"bar\"}"]
+								},
+								{"output_type": "display_data", "metadata": {},
+									"pdf": ["some-raw-pdf-data"]
+								},
+								{"output_type": "display_data", "metadata": {},
+									"latex": ["c = \\sqrt{a^2 + b^2}"]
+								},
+								{"output_type": "display_data", "metadata": {},
+									"text": ["<Image url='https://image.com/?id=123' height=500>"]
+								}
+							]}
+						]}
+					]
+				}`,
+				want: []output{
+					{Cell: Cell{
+						Type:     schema.DisplayData,
+						MimeType: "image/png",
+						Text:     []byte("base64-encoded-png-image"),
+					}},
+					{Cell: Cell{
+						Type:     schema.DisplayData,
+						MimeType: "image/jpeg",
+						Text:     []byte("base64-encoded-jpeg-image"),
+					}},
+					{Cell: Cell{
+						Type:     schema.DisplayData,
+						MimeType: "text/html",
+						Text:     []byte(`<img />`),
+					}},
+					{Cell: Cell{
+						Type:     schema.DisplayData,
+						MimeType: "image/svg+xml",
+						Text:     []byte(`<svg />`),
+					}},
+					{Cell: Cell{
+						Type:     schema.DisplayData,
+						MimeType: "text/javascript",
+						Text:     []byte("[,,,].length"),
+					}},
+					{Cell: Cell{
+						Type:     schema.DisplayData,
+						MimeType: "application/json",
+						Text:     []byte("{\"foo\": \"bar\"}"), // ????
+					}},
+					{Cell: Cell{
+						Type:     schema.DisplayData,
+						MimeType: "application/pdf",
+						Text:     []byte("some-raw-pdf-data"), // ????
+					}},
+					{Cell: Cell{
+						Type:     schema.DisplayData,
+						MimeType: "application/x-latex",
+						Text:     []byte("c = \\sqrt{a^2 + b^2}"), // ????
+					}},
+					{Cell: Cell{
+						Type:     schema.DisplayData,
+						MimeType: common.PlainText,
+						Text:     []byte("<Image url='https://image.com/?id=123' height=500>"),
+					}},
+				},
+			},
 			{
 				name: "v4.4: execute_result output with several images and a plain text",
 				json: `{
@@ -441,6 +630,101 @@ func TestDecodeBytes(t *testing.T) {
 					}},
 				},
 			},
+			{
+				name: "v3.0: pyout (execute_result) output different recognized formats",
+				json: `{
+					"nbformat": 3, "nbformat_minor": 0, "metadata": {}, "worksheets": [
+						{"cells": [
+							{"cell_type": "code", "outputs": [
+								{"output_type": "pyout", "metadata": {},
+									"prompt_number": 42,
+									"png": ["base64-encoded-png-image"],
+									"text": ["<Figure size 640x480 with 1 Axes>"]
+								},
+								{"output_type": "pyout", "metadata": {},
+									"prompt_number": 42,
+									"jpeg": ["base64-encoded-jpeg-image"],
+									"text": ["<Figure size 100x500 with 2 Axes>"]
+								},
+								{"output_type": "pyout", "metadata": {},
+									"prompt_number": 42,
+									"html": ["<img />"]
+								},
+								{"output_type": "pyout", "metadata": {},
+									"prompt_number": 42,
+									"svg": ["<svg />"]
+								},
+								{"output_type": "pyout", "metadata": {},
+									"prompt_number": 42,
+									"javascript": ["[,,,].length"]
+								},
+								{"output_type": "pyout", "metadata": {},
+									"prompt_number": 42,
+									"json": ["{\"foo\": \"bar\"}"]
+								},
+								{"output_type": "pyout", "metadata": {},
+									"pdf": ["some-raw-pdf-data"]
+								},
+								{"output_type": "pyout", "metadata": {},
+									"prompt_number": 42,
+									"latex": ["c = \\sqrt{a^2 + b^2}"]
+								},
+								{"output_type": "pyout", "metadata": {},
+									"prompt_number": 42,
+									"text": ["<Image url='https://image.com/?id=123' height=500>"]
+								}
+							]}
+						]}
+					]
+				}`,
+				want: []output{
+					{ExecutionCount: 42, Cell: Cell{
+						Type:     schema.ExecuteResult,
+						MimeType: "image/png",
+						Text:     []byte("base64-encoded-png-image"),
+					}},
+					{ExecutionCount: 42, Cell: Cell{
+						Type:     schema.ExecuteResult,
+						MimeType: "image/jpeg",
+						Text:     []byte("base64-encoded-jpeg-image"),
+					}},
+					{ExecutionCount: 42, Cell: Cell{
+						Type:     schema.ExecuteResult,
+						MimeType: "text/html",
+						Text:     []byte(`<img />`),
+					}},
+					{ExecutionCount: 42, Cell: Cell{
+						Type:     schema.ExecuteResult,
+						MimeType: "image/svg+xml",
+						Text:     []byte(`<svg />`),
+					}},
+					{ExecutionCount: 42, Cell: Cell{
+						Type:     schema.ExecuteResult,
+						MimeType: "text/javascript",
+						Text:     []byte("[,,,].length"),
+					}},
+					{ExecutionCount: 42, Cell: Cell{
+						Type:     schema.ExecuteResult,
+						MimeType: "application/json",
+						Text:     []byte("{\"foo\": \"bar\"}"), // ????
+					}},
+					{ExecutionCount: 42, Cell: Cell{
+						Type:     schema.ExecuteResult,
+						MimeType: "application/pdf",
+						Text:     []byte("some-raw-pdf-data"), // ????
+					}},
+					{ExecutionCount: 42, Cell: Cell{
+						Type:     schema.ExecuteResult,
+						MimeType: "application/x-latex",
+						Text:     []byte("c = \\sqrt{a^2 + b^2}"), // ????
+					}},
+					{ExecutionCount: 42, Cell: Cell{
+						Type:     schema.ExecuteResult,
+						MimeType: common.PlainText,
+						Text:     []byte("<Image url='https://image.com/?id=123' height=500>"),
+					}},
+				},
+			},
 			{
 				name: "v4.4: error output",
 				json: `{
@@ -467,6 +751,33 @@ func TestDecodeBytes(t *testing.T) {
 					}},
 				},
 			},
+			{
+				name: "v3.0: error output",
+				json: `{
+					"nbformat": 3, "nbformat_minor": 0, "metadata": {}, "worksheets": [
+						{"cells": [
+							{"cell_type": "code", "outputs": [
+								{
+									"output_type": "pyerr", "ename": "ZeroDivisionError", "evalue": "division by zero",
+									"traceback": [
+										"Traceback (most recent call last):",
+										"\tFile \"main.py\", line 3, in <module>",
+										"\t\tprint(n/0)",
+										"\tZeroDivisionError: division by zero"
+									]
+								}
+							]}
+						]}
+					]
+				}`,
+				want: []output{
+					{Cell: Cell{
+						Type:     schema.Error,
+						MimeType: common.Stderr,
+						Text:     []byte("Traceback (most recent call last):\n\tFile \"main.py\", line 3, in <module>\n\t\tprint(n/0)\n\tZeroDivisionError: division by zero"),
+					}},
+				},
+			},
 		} {
 			t.Run(tt.name, func(t *testing.T) {
 				nb, err := decode.Bytes([]byte(tt.json))
@@ -483,6 +794,43 @@ func TestDecodeBytes(t *testing.T) {
 			})
 		}
 	})
+
+	t.Run("heading cells", func(t *testing.T) {
+		for _, tt := range []struct {
+			name string
+			json string
+			want Cell
+		}{
+			{
+				name: "v3.0 used to have dedicated heading cells",
+				json: `{
+					"nbformat": 3, "nbformat_minor": 0, "metadata": {}, "worksheets": [
+						{"cells": [
+							{
+								"cell_type": "heading", "level": 2, 
+								"source": ["Fun facts about Ronald McDonald"], "metadata": {}
+							}
+						]}
+					]
+				}`,
+				want: Cell{
+					Type:     schema.Markdown,
+					MimeType: common.MarkdownText,
+					Text:     []byte("## Fun facts about Ronald McDonald"),
+				},
+			},
+		} {
+			t.Run(tt.name, func(t *testing.T) {
+				nb, err := decode.Bytes([]byte(tt.json))
+				require.NoError(t, err)
+
+				got := nb.Cells()
+				require.Len(t, got, 1, "expected 1 cell")
+
+				checkCell(t, got[0], tt.want)
+			})
+		}
+	})
 }
 
 // checkCell compares the cell's type and content to expected.
diff --git a/schema/common/notebook.go b/schema/common/notebook.go
index 8246fe7..482e50d 100644
--- a/schema/common/notebook.go
+++ b/schema/common/notebook.go
@@ -10,7 +10,6 @@ type Notebook struct {
 	VersionMajor int               `json:"nbformat"`
 	VersionMinor int               `json:"nbformat_minor"`
 	Metadata     json.RawMessage   `json:"metadata"` // TODO: omitempty
-	Cells        []json.RawMessage `json:"cells"`
 }
 
 func (n *Notebook) Version() schema.Version {
diff --git a/schema/v3/schema.go b/schema/v3/schema.go
new file mode 100644
index 0000000..a6cbbff
--- /dev/null
+++ b/schema/v3/schema.go
@@ -0,0 +1,372 @@
+package v3
+
+import (
+	"bytes"
+	"encoding/json"
+	"fmt"
+	"strings"
+
+	"github.com/bevzzz/nb/decode"
+	"github.com/bevzzz/nb/schema"
+	"github.com/bevzzz/nb/schema/common"
+)
+
+func init() {
+	decode.RegisterDecoder(schema.Version{Major: 3, Minor: 0}, new(decoder))
+}
+
+// decoder decodes cell contents and metadata for nbformat v3.0.
+type decoder struct{}
+
+var _ decode.Decoder = (*decoder)(nil)
+
+func (d *decoder) ExtractCells(data []byte) ([]json.RawMessage, error) {
+	var raw struct {
+		Worksheets []struct {
+			Cells []json.RawMessage `json:"cells"`
+		} `json:"worksheets"`
+	}
+	if err := json.Unmarshal(data, &raw); err != nil {
+		return nil, err
+	}
+
+	var cells []json.RawMessage
+	for i := range raw.Worksheets {
+		cells = append(cells, raw.Worksheets[i].Cells...)
+	}
+	return cells, nil
+}
+
+func (d *decoder) DecodeMeta(data []byte) (schema.NotebookMetadata, error) {
+	return nil, nil
+}
+
+func (d *decoder) DecodeCell(m map[string]interface{}, data []byte, meta schema.NotebookMetadata) (schema.Cell, error) {
+	var ct interface{}
+	var c schema.Cell
+	switch ct = m["cell_type"]; ct {
+	case "markdown":
+		c = &Markdown{}
+	case "heading":
+		c = &Heading{}
+	case "raw":
+		c = &Raw{}
+	case "code":
+		c = &Code{}
+	default:
+		return nil, fmt.Errorf("unknown cell type %q", ct)
+	}
+	if err := json.Unmarshal(data, &c); err != nil {
+		return nil, fmt.Errorf("%s: %w", ct, err)
+	}
+	return c, nil
+}
+
+// Markdown defines the schema for a "markdown" cell.
+type Markdown struct {
+	Source common.MultilineString `json:"source"`
+}
+
+var _ schema.Cell = (*Markdown)(nil)
+
+func (md *Markdown) Type() schema.CellType {
+	return schema.Markdown
+}
+
+func (md *Markdown) MimeType() string {
+	return common.MarkdownText
+}
+
+func (md *Markdown) Text() []byte {
+	return md.Source.Text()
+}
+
+// Heading is a dedicated cell type which represent a heading in a Jupyter notebook.
+// This type is deprecated in the later versions and the content is stored as markdown instead.
+//
+// Heading cell behaves exactly like a markdown cell, decorating its source with the
+// appropriate number of heading signs (#).
+type Heading struct {
+	Source common.MultilineString `json:"source"`
+	Level  int
+}
+
+var _ schema.Cell = (*Heading)(nil)
+
+func (h *Heading) Type() schema.CellType {
+	return schema.Markdown
+}
+
+func (h *Heading) MimeType() string {
+	return common.MarkdownText
+}
+
+func (h *Heading) Text() []byte {
+	hashes := append(bytes.Repeat([]byte("#"), h.Level), " "...)
+	return append(hashes, h.Source.Text()...)
+}
+
+// Raw defines the schema for a "raw" cell.
+type Raw struct {
+	Source   common.MultilineString `json:"source"`
+	Metadata RawCellMetadata        `json:"metadata"`
+}
+
+var _ schema.Cell = (*Raw)(nil)
+
+func (raw *Raw) Type() schema.CellType {
+	return schema.Raw
+}
+
+func (raw *Raw) MimeType() string {
+	return raw.Metadata.MimeType()
+}
+
+func (raw *Raw) Text() []byte {
+	return raw.Source.Text()
+}
+
+// RawCellMetadata may specify a target conversion format.
+type RawCellMetadata struct {
+	Format      *string `json:"format"`
+	RawMimeType *string `json:"raw_mimetype"`
+}
+
+// MimeType returns a more specific mime-type if one is provided and "text/plain" otherwise.
+func (raw *RawCellMetadata) MimeType() string {
+	switch {
+	case raw.Format != nil:
+		return *raw.Format
+	case raw.RawMimeType != nil:
+		return *raw.RawMimeType
+	default:
+		return common.PlainText
+	}
+}
+
+// Code defines the schema for a "code" cell.
+type Code struct {
+	Source        common.MultilineString `json:"input"`
+	TimesExecuted int                    `json:"prompt_number"`
+	Out           []Output               `json:"outputs"`
+	Lang          string                 `json:"language"`
+}
+
+var _ schema.CodeCell = (*Code)(nil)
+var _ schema.Outputter = (*Code)(nil)
+
+func (code *Code) Type() schema.CellType {
+	return schema.Code
+}
+
+// FIXME: return correct mime type (add a function to common)
+func (code *Code) MimeType() string {
+	return "application/x-python"
+}
+
+func (code *Code) Text() []byte {
+	return code.Source.Text()
+}
+
+func (code *Code) Language() string {
+	return code.Lang
+}
+
+func (code *Code) ExecutionCount() int {
+	return code.TimesExecuted
+}
+
+func (code *Code) Outputs() (cells []schema.Cell) {
+	for i := range code.Out {
+		cells = append(cells, code.Out[i].cell)
+	}
+	return
+}
+
+// Outputs unmarshals cell outputs into schema.Cell based on their type.
+type Output struct {
+	cell schema.Cell
+}
+
+func (out *Output) UnmarshalJSON(data []byte) error {
+	var v map[string]interface{}
+	if err := json.Unmarshal(data, &v); err != nil {
+		return fmt.Errorf("code outputs: %w", err)
+	}
+
+	var t interface{}
+	var c schema.Cell
+	switch t = v["output_type"]; t {
+	case "stream":
+		c = &StreamOutput{}
+	case "display_data":
+		c = &DisplayDataOutput{}
+	case "pyout":
+		c = &ExecuteResultOutput{}
+	case "pyerr":
+		c = &ErrorOutput{}
+	default:
+		return fmt.Errorf("unknown output type %q", t)
+	}
+
+	if err := json.Unmarshal(data, &c); err != nil {
+		return fmt.Errorf("%q output: %w", t, err)
+	}
+	out.cell = c
+	return nil
+}
+
+// StreamOutput is a plain, text-based output of the executed code.
+// Depending on the stream "target", Type() can report "text/plain" (stdout) or "error" (stderr).
+// The output is often decorated with ANSI-color sequences, which should be handled separately.
+type StreamOutput struct {
+	// Target can be stdout or stderr.
+	Target string                 `json:"stream"`
+	Source common.MultilineString `json:"text"`
+}
+
+var _ schema.Cell = (*StreamOutput)(nil)
+
+func (stream *StreamOutput) Type() schema.CellType {
+	return schema.Stream
+}
+
+func (stream *StreamOutput) MimeType() string {
+	switch stream.Target {
+	case "stdout":
+		return common.Stdout
+	case "stderr":
+		return common.Stderr
+	}
+	return common.PlainText
+}
+
+func (stream *StreamOutput) Text() []byte {
+	return stream.Source.Text()
+}
+
+// DisplayDataOutput are rich-format outputs generated by running the code in the parent cell.
+type DisplayDataOutput struct {
+	MimeBundle
+	Metadata map[string]interface{} `json:"metadata"`
+}
+
+var _ schema.Cell = (*DisplayDataOutput)(nil)
+
+func (dd *DisplayDataOutput) Type() schema.CellType {
+	return schema.DisplayData
+}
+
+// MimeBundle contains rich output data keyed by mime-type.
+type MimeBundle struct {
+	PNG        common.MultilineString `json:"png,omitempty"`
+	JPEG       common.MultilineString `json:"jpeg,omitempty"`
+	HTML       common.MultilineString `json:"html,omitempty"`
+	SVG        common.MultilineString `json:"svg,omitempty"`
+	Javascript common.MultilineString `json:"javascript,omitempty"`
+	JSON       common.MultilineString `json:"json,omitempty"`
+	PDF        common.MultilineString `json:"pdf,omitempty"`
+	LaTeX      common.MultilineString `json:"latex,omitempty"`
+	Txt        common.MultilineString `json:"text,omitempty"`
+}
+
+var _ schema.MimeBundle = (*MimeBundle)(nil)
+
+// MimeType returns the richer of the mime-types present in the bundle,
+// and falls back to "text/plain" otherwise.
+func (mb MimeBundle) MimeType() string {
+	switch {
+	case mb.PNG != nil:
+		return "image/png"
+	case mb.JPEG != nil:
+		return "image/jpeg"
+	case mb.HTML != nil:
+		return "text/html"
+	case mb.SVG != nil:
+		return "image/svg+xml"
+	case mb.Javascript != nil:
+		return "text/javascript"
+	case mb.JSON != nil:
+		return "application/json"
+	case mb.PDF != nil:
+		return "application/pdf"
+	case mb.LaTeX != nil:
+		return "application/x-latex"
+	}
+	return common.PlainText
+}
+
+// Text returns data with the richer mime-type.
+func (mb MimeBundle) Text() []byte {
+	return mb.Data(mb.MimeType())
+}
+
+// Data returns mime-type-specific content if present and a nil slice otherwise.
+func (mb MimeBundle) Data(mime string) []byte {
+	switch mime {
+	case "image/png":
+		return mb.PNG.Text()
+	case "image/jpeg":
+		return mb.JPEG.Text()
+	case "text/html":
+		return mb.HTML.Text()
+	case "image/svg+xml":
+		return mb.SVG.Text()
+	case "text/javascript":
+		return mb.Javascript.Text()
+	case "application/json":
+		return mb.JSON.Text()
+	case "application/pdf":
+		return mb.PDF.Text()
+	case "application/x-latex":
+		return mb.LaTeX.Text()
+	case common.PlainText:
+		return mb.Txt.Text()
+	}
+	return nil
+}
+
+// PlainText returns data for "text/plain" mime-type and a nil slice otherwise.
+func (mb MimeBundle) PlainText() []byte {
+	return mb.Data(common.PlainText)
+}
+
+// ExecuteResultOutput is the result of executing the code in the cell.
+// Its contents are identical to those of DisplayDataOutput with the addition of the execution count.
+type ExecuteResultOutput struct {
+	DisplayDataOutput
+	TimesExecuted int `json:"prompt_number"`
+}
+
+var _ schema.Cell = (*ExecuteResultOutput)(nil)
+var _ schema.ExecutionCounter = (*ExecuteResultOutput)(nil)
+
+func (ex *ExecuteResultOutput) Type() schema.CellType {
+	return schema.ExecuteResult
+}
+
+func (ex *ExecuteResultOutput) ExecutionCount() int {
+	return ex.TimesExecuted
+}
+
+// ErrorOutput stores the output of a failed code execution.
+type ErrorOutput struct {
+	ExceptionName  string   `json:"ename"`
+	ExceptionValue string   `json:"evalue"`
+	Traceback      []string `json:"traceback"`
+}
+
+var _ schema.Cell = (*ErrorOutput)(nil)
+
+func (err *ErrorOutput) Type() schema.CellType {
+	return schema.Error
+}
+
+func (err *ErrorOutput) MimeType() string {
+	return common.Stderr
+}
+
+func (err *ErrorOutput) Text() (txt []byte) {
+	s := strings.Join(err.Traceback, "\n")
+	return []byte(s)
+}
diff --git a/schema/v4/schema.go b/schema/v4/schema.go
index 630613a..c4b9369 100644
--- a/schema/v4/schema.go
+++ b/schema/v4/schema.go
@@ -26,6 +26,16 @@ type decoder struct{}
 
 var _ decode.Decoder = (*decoder)(nil)
 
+func (d *decoder) ExtractCells(data []byte) ([]json.RawMessage, error) {
+	var raw struct {
+		Cells []json.RawMessage `json:"cells"`
+	}
+	if err := json.Unmarshal(data, &raw); err != nil {
+		return nil, err
+	}
+	return raw.Cells, nil
+}
+
 func (d *decoder) DecodeMeta(data []byte) (schema.NotebookMetadata, error) {
 	var nm NotebookMetadata
 	if err := json.Unmarshal(data, &nm); err != nil {
@@ -163,7 +173,7 @@ func (code *Code) Type() schema.CellType {
 	return schema.Code
 }
 
-// TODO: return correct mime type (add a function to common)
+// FIXME: return correct mime type (add a function to common)
 func (code *Code) MimeType() string {
 	return "application/x-python"
 }
@@ -303,7 +313,7 @@ func (mb MimeBundle) Data(mime string) []byte {
 	return nil
 }
 
-// RawText returns data for "text/plain" mime-type and a nil slice otherwise.
+// PlainText returns data for "text/plain" mime-type and a nil slice otherwise.
 func (mb MimeBundle) PlainText() []byte {
 	return mb.Data(common.PlainText)
 }
diff --git a/version.go b/version.go
index 3ac4da1..8e26083 100644
--- a/version.go
+++ b/version.go
@@ -2,6 +2,7 @@ package nb
 
 import (
 	// Currently supported nbformat versions:
+	_ "github.com/bevzzz/nb/schema/v3"
 	_ "github.com/bevzzz/nb/schema/v4"
 )