diff --git a/extension/extra/goldmark-jupyter/LICENCE b/extension/extra/goldmark-jupyter/LICENCE new file mode 100644 index 0000000..ad04dc7 --- /dev/null +++ b/extension/extra/goldmark-jupyter/LICENCE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024 Dmytro Solovei + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/extension/extra/goldmark-jupyter/README.md b/extension/extra/goldmark-jupyter/README.md new file mode 100644 index 0000000..67b9e05 --- /dev/null +++ b/extension/extra/goldmark-jupyter/README.md @@ -0,0 +1,85 @@ +# goldmark-jupyter + +From `nbformat` documentation: + +```txt +Markdown (and raw) cells can have a number of attachments, typically inline images, that can be referenced in the markdown content of a cell. 🖇 + +(punctuation mine) +``` + +`goldmark-jupyter` helps [`goldmark`](https://github.com/yuin/goldmark) recognise [cell attachments](https://nbformat.readthedocs.io/en/latest/format_description.html#cell-attachments) and include them in the rendered markdown correctly. + + +| `goldmark` | `goldmark-jupyter` | +| ----------- | ----------- | +| ![img](./assets/goldmark.png) | ![img](./assets/goldmark-jupyter.png) | + +## Installation + +```sh +go get github.com/bevzzz/nb/extensions/extra/goldmark-jupyter +``` + +## Usage + +Package `goldmark-jupyter` exports 2 dedicated extensions for `goldmark` and `nb`, which should be used together like so: + +```go +import ( + "github.com/bevzzz/nb" + "github.com/bevzzz/nb/extensions/extra/goldmark-jupyter" + "github.com/yuin/goldmark" +) + +md := goldmark.New( + goldmark.WithExtensions( + jupyter.Attachments(), + ), +) + +c := nb.New( + nb.WithExtensions( + jupyter.Goldmark(md), + ), +) + +if err := c.Convert(io.Stdout, b); err != nil { + panic(err) +} +``` + +`Attachments` will extend the default `goldmark.Markdown` with a custom link parser and an image renderer. Quite naturally, this renderer accepts `html.Options` which can be passed to the constructor: + +```go +import ( + "github.com/bevzzz/nb/extensions/extra/goldmark-jupyter" + "github.com/yuin/goldmark" + "github.com/yuin/goldmark/render/html" +) + +md := goldmark.New( + goldmark.WithExtensions( + jupyter.Attachments( + html.WithXHTML(), + html.WithUnsafe(), + ), + ), +) +``` + +Note, however, that options not applicable to image rendering will have no effect. As of the day of writing, `goldmark v1.6.0` references these options when rendering images: + +- `WithXHML()` +- `WithUnsafe()` +- `WithWriter(w)` + +## Contributing + +Thank you for giving `goldmark-jupyter` a run! + +If you find a bug that needs fixing or a feature that needs adding, please consider describing it in an issue or opening a PR. + +## License + +This software is released under [the MIT License](https://opensource.org/license/mit/). diff --git a/extension/extra/goldmark-jupyter/assets/goldmark-jupyter.png b/extension/extra/goldmark-jupyter/assets/goldmark-jupyter.png new file mode 100644 index 0000000..dab1e2d Binary files /dev/null and b/extension/extra/goldmark-jupyter/assets/goldmark-jupyter.png differ diff --git a/extension/extra/goldmark-jupyter/assets/goldmark.png b/extension/extra/goldmark-jupyter/assets/goldmark.png new file mode 100644 index 0000000..525bda8 Binary files /dev/null and b/extension/extra/goldmark-jupyter/assets/goldmark.png differ diff --git a/extension/extra/goldmark-jupyter/attachment.go b/extension/extra/goldmark-jupyter/attachment.go new file mode 100644 index 0000000..8722df0 --- /dev/null +++ b/extension/extra/goldmark-jupyter/attachment.go @@ -0,0 +1,206 @@ +// Package jupyter provides extensions for goldmark and nb. Together they add support +// for inline images, which have their data stored as cell attachments, in markdown cells. +// +// How it is achieved: +// +// 1. Goldmark extends nb with a custom "markdown" cell renderer which +// stores cell attachments to the parser.Context on every render. +// +// 2. Attachments extends goldmark with a custom link parser (ast.KindLink) +// and an image NodeRenderFunc. +// +// The parser is context-aware and will get the related mime-bundle from the context +// and store it to node attributes for every link whose destination looks like "attachments:image.png" +// +// Custom image renderer writes base64-encoded data from the mime-bundle if one's present, +// falling back to the destination URL. +package jupyter + +import ( + "io" + "regexp" + + "github.com/bevzzz/nb" + "github.com/bevzzz/nb/extension" + "github.com/bevzzz/nb/schema" + "github.com/yuin/goldmark" + "github.com/yuin/goldmark/ast" + "github.com/yuin/goldmark/parser" + "github.com/yuin/goldmark/renderer" + "github.com/yuin/goldmark/renderer/html" + "github.com/yuin/goldmark/text" + "github.com/yuin/goldmark/util" +) + +// Attachments adds support for Jupyter [cell attachments] to goldmark parser and renderer. +// +// [cell attachments]: https://nbformat.readthedocs.io/en/latest/format_description.html#cell-attachments +func Attachments(opts ...html.Option) goldmark.Extender { + c := html.NewConfig() + for _, opt := range opts { + opt.SetHTMLOption(&c) + } + return &attachments{ + config: c, + } +} + +// Goldmark overrides the default rendering function for markdown cells +// and stores cell attachments to the parser.Context on every render. +func Goldmark(md goldmark.Markdown) nb.Extension { + return extension.NewMarkdown( + func(w io.Writer, c schema.Cell) error { + ctx := newContext(c) + return md.Convert(c.Text(), w, parser.WithContext(ctx)) + }, + ) +} + +var ( + // key is a context key for storing cell attachments. + key = parser.NewContextKey() + + // name is the name of a node attribute that holds the mime-bundle. + // This package uses node attributes as a proxy for rendering context, + // so will never be added to the HTML output. The name is + // intentionally [invalid] to avoid name-clashes with othen potential attributes. + // + // [invalid]: https://www.w3.org/TR/2011/WD-html5-20110525/syntax.html#attributes-0 + name = []byte("") +) + +// newContext adds mime-bundles from cell attachements to a new parse.Context. +func newContext(cell schema.Cell) parser.Context { + ctx := parser.NewContext() + if c, ok := cell.(schema.HasAttachments); ok { + ctx.Set(key, c.Attachments()) + } + return ctx +} + +// linkParser adds base64-encoded image data from parser.Context to node's attributes. +type linkParser struct { + link parser.InlineParser // link is goldmark's default link parser. +} + +func newLinkParser() *linkParser { + return &linkParser{ + link: parser.NewLinkParser(), + } +} + +var _ parser.InlineParser = (*linkParser)(nil) + +func (p *linkParser) Trigger() []byte { + return p.link.Trigger() +} + +// attachedFile retrieves the name of the attached file from the link's destination. +var attachedFile = regexp.MustCompile(`attachment:(\w+\.\w+)$`) + +// Parse stores mime-bundle in node attributes for links whose destination is an attachment. +func (p *linkParser) Parse(parent ast.Node, block text.Reader, pc parser.Context) (n ast.Node) { + n = p.link.Parse(parent, block, pc) + + img, ok := n.(*ast.Image) + if !ok { + // goldmark's default link parser will return a "state node" whenever it's triggered + // by the opening bracket of the link's alt-text "[" or any intermediate characters. + // We only want to intercept when the link is done parsing and we get a valid *ast.Image. + return n + } + + submatch := attachedFile.FindSubmatch(img.Destination) + if len(submatch) < 2 { + return + } + filename := submatch[1] + + att, ok := pc.Get(key).(schema.Attachments) + if att == nil || !ok { + return + } + + // Admittedly + data := att.MimeBundle(string(filename)) + n.SetAttribute(name, data) + return +} + +// image renders inline images from cell attachments. +type image struct { + html.Config +} + +var _ renderer.NodeRenderer = (*image)(nil) + +func (img *image) RegisterFuncs(reg renderer.NodeRendererFuncRegisterer) { + reg.Register(ast.KindImage, img.render) +} + +// render borrows heavily from goldmark's [renderImage]. +// +// [renderImage]: https://github.com/yuin/goldmark/blob/90c46e0829c11ca8d1010856b2a6f6f88bfc68a3/renderer/html/html.go#L673 +func (img *image) render(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) { + if !entering { + return ast.WalkContinue, nil + } + + n := node.(*ast.Image) + _, _ = w.WriteString("`)
+	_, _ = w.Write(nodeToHTMLText(n, source))
+	_ = w.WriteByte('") + } else { + _, _ = w.WriteString(">") + } + + return ast.WalkSkipChildren, nil +} + +// attachments implements goldmark.Extender. +type attachments struct { + config html.Config +} + +var _ goldmark.Extender = (*attachments)(nil) + +// Extends adds custom link parser and image renderer. +// +// Priorities are selected based on the ones used in goldmark. +func (a *attachments) Extend(md goldmark.Markdown) { + md.Parser().AddOptions( + parser.WithInlineParsers(util.Prioritized(newLinkParser(), 199)), // default: 200 + ) + md.Renderer().AddOptions( + renderer.WithNodeRenderers(util.Prioritized(&image{Config: a.config}, 999)), // default: 1000 + ) +} diff --git a/extension/extra/goldmark-jupyter/attachment_test.go b/extension/extra/goldmark-jupyter/attachment_test.go new file mode 100644 index 0000000..34852df --- /dev/null +++ b/extension/extra/goldmark-jupyter/attachment_test.go @@ -0,0 +1,86 @@ +package jupyter_test + +import ( + "strings" + "testing" + + "github.com/bevzzz/nb" + "github.com/bevzzz/nb/pkg/test" + "github.com/bevzzz/nb/schema" + jupyter "github.com/nb/extension/extra/goldmark-jupyter" + "github.com/stretchr/testify/require" + "github.com/yuin/goldmark" + "github.com/yuin/goldmark/renderer/html" +) + +func Test(t *testing.T) { + for _, tt := range []struct { + name string + cell schema.Cell + opts []html.Option + want string + }{ + { + name: "with attachment", + cell: test.WithAttachment( + test.Markdown("![alt](attachment:photo.jpeg)"), + "photo.jpeg", + map[string]interface{}{ + "image/jpeg": "base64-image-data", + }, + ), + want: `

alt

`, + }, + { + name: "with html.Options", + cell: test.WithAttachment( + test.Markdown("![alt](attachment:photo.jpeg)"), + "photo.jpeg", + map[string]interface{}{ + "image/jpeg": "base64-image-data", + }, + ), + opts: []html.Option{ + html.WithXHTML(), // closes image tag with "/>" + }, + want: `

alt

`, + }, + { + name: "regular image", + cell: test.Markdown("![alt](https://example.com/photo)"), + want: `

alt

`, + }, + { + name: "regular image with title", + cell: test.Markdown("![alt](https://example.com/photo \"Title\")"), + want: `

alt

`, + }, + } { + t.Run(tt.name, func(t *testing.T) { + // Arrange + var sb strings.Builder + md := goldmark.New( + goldmark.WithExtensions( + jupyter.Attachments(tt.opts...), + ), + ) + + c := nb.New( + nb.WithExtensions( + jupyter.Goldmark(md), + ), + nb.WithRenderOptions(test.NoWrapper), + ) + + r := c.Renderer() + + // Act + err := r.Render(&sb, test.Notebook(tt.cell)) + require.NoError(t, err) + + // Assert + got := strings.Trim(sb.String(), "\n") + require.Equal(t, tt.want, got, "rendered markdown") + }) + } +} diff --git a/extension/extra/goldmark-jupyter/go.mod b/extension/extra/goldmark-jupyter/go.mod new file mode 100644 index 0000000..a08059f --- /dev/null +++ b/extension/extra/goldmark-jupyter/go.mod @@ -0,0 +1,16 @@ +module github.com/nb/extension/extra/goldmark-jupyter + +go 1.18 + +require ( + github.com/bevzzz/nb v0.2.0 + github.com/stretchr/testify v1.8.4 + github.com/yuin/goldmark v1.6.0 +) + +require ( + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect + golang.org/x/net v0.20.0 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect +) diff --git a/extension/extra/goldmark-jupyter/go.sum b/extension/extra/goldmark-jupyter/go.sum new file mode 100644 index 0000000..0737a86 --- /dev/null +++ b/extension/extra/goldmark-jupyter/go.sum @@ -0,0 +1,17 @@ +github.com/bevzzz/nb v0.2.0 h1:KcM1+12N/vCBl43M8kzsRbMxolHpYJlLWOEQe/PFHPA= +github.com/bevzzz/nb v0.2.0/go.mod h1:i8J311U4tUD6ZjBDE3HY8qPswTuUORiUfAFcWPqUTdA= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= +github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= +github.com/yuin/goldmark v1.6.0 h1:boZcn2GTjpsynOsC0iJHnBWa4Bi0qzfJjthwauItG68= +github.com/yuin/goldmark v1.6.0/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= +golang.org/x/net v0.20.0 h1:aCL9BSgETF1k+blQaYUBx9hJ9LOGP3gAVemcZlf1Kpo= +golang.org/x/net v0.20.0/go.mod h1:z8BVo6PvndSri0LbOE3hAn0apkU+1YvI6E70E9jsnvY= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/extension/extra/goldmark-jupyter/goldmark.go b/extension/extra/goldmark-jupyter/goldmark.go new file mode 100644 index 0000000..eb1194b --- /dev/null +++ b/extension/extra/goldmark-jupyter/goldmark.go @@ -0,0 +1,53 @@ +package jupyter + +import ( + "bytes" + + "github.com/yuin/goldmark/ast" + "github.com/yuin/goldmark/util" +) + +// nodeToHTMLText is an unexported utility function from [goldmark/render/html] package, +// that writes node's content in HTML format. +// +// Text of the original license and copyright notice below: +// +// MIT License +// +// Copyright (c) 2019 Yusuke Inuzuka +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// +// [goldmark/render/html]: https://github.com/yuin/goldmark/blob/master/renderer/html/html.go +func nodeToHTMLText(n ast.Node, source []byte) []byte { + var buf bytes.Buffer + for c := n.FirstChild(); c != nil; c = c.NextSibling() { + if s, ok := c.(*ast.String); ok && s.IsCode() { + buf.Write(s.Text(source)) + } else if !c.HasChildren() { + buf.Write(util.EscapeHTML(c.Text(source))) + if t, ok := c.(*ast.Text); ok && t.SoftLineBreak() { + buf.WriteByte('\n') + } + } else { + buf.Write(nodeToHTMLText(c, source)) + } + } + return buf.Bytes() +}