diff --git a/doc-trace.rkt b/doc-trace.rkt index 8591866..bf1ebe1 100644 --- a/doc-trace.rkt +++ b/doc-trace.rkt @@ -7,7 +7,8 @@ data/interval-map net/url "interfaces.rkt" - "responses.rkt") + "responses.rkt" + "docs-helpers.rkt") (struct Decl (require? id left right) #:transparent) @@ -86,18 +87,12 @@ (when url (when (= start finish) (set! finish (add1 finish))) - (define url (path->url path)) - (define url2 (if url-tag - (make-url (url-scheme url) - (url-user url) - (url-host url) - (url-port url) - (url-path-absolute? url) - (url-path url) - (url-query url) - url-tag) - url)) - (interval-map-set! docs start finish (list (url->string url2) def-tag)))) + (define path-url (path->url path)) + (define link+tag (cond + [url-tag (struct-copy url path-url [fragment url-tag])] + [def-tag (struct-copy url path-url [fragment (def-tag->html-anchor-tag def-tag)])] + [else path-url])) + (interval-map-set! docs start finish (list (url->string link+tag) def-tag)))) (define/override (syncheck:add-jump-to-definition source-obj start end id filename submods) (define decl (Decl filename id 0 0)) (interval-map-set! sym-bindings start (add1 end) decl)) diff --git a/docs-helpers.rkt b/docs-helpers.rkt index 23f1179..928e49f 100644 --- a/docs-helpers.rkt +++ b/docs-helpers.rkt @@ -7,7 +7,9 @@ racket/dict setup/collects racket/string - scribble/xref) + scribble/xref + net/url-string + racket/format) (define the-bluebox-cache (make-blueboxes-cache #t)) (define pkg-cache (make-hash)) @@ -54,7 +56,54 @@ [else (list strs #f)])] [else (list #f #f)])) +;; Examples: +;; Input: "file:///C:/Program Files/Racket/doc/reference/module.html#(form._((quote._~23~25kernel)._module))" #f +;; Output: https://docs.racket-lang.org/reference/module.html#%28form._%28%28quote._%7E23%7E25kernel%29._module%29%29 +;; (i.e. https://docs.racket-lang.org/ + left trimmed `url`) +;; Input: "pairs.html#(def._((lib._racket/list..rkt)._add-between))" "C:/Program Files/Racket/doc/reference/strings.html" +;; Output: https://docs.racket-lang.org/reference/pairs.html#%28def._%28%28lib._racket%2Flist..rkt%29._add-between%29%29 +;; (i.e. https://docs.racket-lang.org/ + /reference/ from `docs-path` + `url`) +(define (make-proper-url-for-online-documentation url [docs-path #f]) + (define online-docs-url "https://docs.racket-lang.org/") + (define (absolute-web-url? url) (and (string-contains? url "://") (not (string-prefix? url "file")))) + (define (get-relative-docs-url url) ;; e.g. "reference/module.html#(form._((quote._~23~25kernel)._module))" + (last (string-split url #rx"/doc/(racket/)?"))) ; "(racket/)?" in case docs are installed in 'usr/share/doc/racket' on linux + (define (strip-off-last-path-segment url) (string-join (drop-right (string-split url "/") 1) "/" #:after-last "/")) + (define (encode-url url-string) + (define url-struct (string->url url-string)) + ;; particularly encode chars '(', ')' and '~' from Markdown. Both VSCode's and Atom's Md parsers don't like them in links. + (current-url-encode-mode 'unreserved) + (define encoded-url (string-replace (url->string url-struct) "~" "%7E")) + ;; Rarely, there are `redirecting` links that require putting `&` back in query to work properly + (string-replace encoded-url "&" "&")) + + (define encoded-url (encode-url url)) + (cond + [(absolute-web-url? encoded-url) encoded-url] + [docs-path + (define ending (get-relative-docs-url docs-path)) + (~a online-docs-url + (if (or (string-prefix? encoded-url "#") (zero? (string-length encoded-url))) + ending + (strip-off-last-path-segment ending)) + encoded-url)] + [else (~a online-docs-url (get-relative-docs-url encoded-url))])) + +;; Example: '(def ((quote #%kernel) hasheq)) => "(def._((quote._~23~25kernel)._hasheq))" +;; mostly a copy of a closed function `anchor-name` in `scribble-lib/scribble/html-render.rkt` +(define (def-tag->html-anchor-tag v) + (define (encode-byte b) (string-append (if (< b 16) "~0" "~") (number->string b 16))) + (define (encode-bytes str) (string->bytes/utf-8 (encode-byte (bytes-ref str 0)))) + (let* ([v (string->bytes/utf-8 (format "~a" v))] + [v (regexp-replace* #rx#"[A-Z.]" v #".&")] + [v (regexp-replace* #rx#" " v #"._")] + [v (regexp-replace* #rx#"\"" v #".'")] + [v (regexp-replace* #rx#"[^-a-zA-Z0-9_!+*'()/.,]" v encode-bytes)]) + (bytes->string/utf-8 v))) + (provide find-containing-paren get-docs-for-tag - id-to-tag) + id-to-tag + make-proper-url-for-online-documentation + def-tag->html-anchor-tag) diff --git a/documentation-parser.rkt b/documentation-parser.rkt new file mode 100644 index 0000000..a58932f --- /dev/null +++ b/documentation-parser.rkt @@ -0,0 +1,290 @@ +#lang racket/base +(require + racket/match + racket/string + racket/format + net/url-string + html-parsing + racket/function + "docs-helpers.rkt") + + +;; ------------------------------------------ +;; Cursor (aka html node tree navigator) is implemented with the Zipper data structure +;; The idea of its use is borrowed from a similar project: https://github.com/dyoo/wescheme-docs/blob/master/tree-cursor.rkt +;; Zipper is described in detail in a paper "Functional Pearl. The Zipper" by Gerard Huet + +(struct cursor (selected-node lefts parent rights) #:transparent) +(define (make-cursor tree-node) (cursor tree-node '() #f '())) + +(define (cursor-can-go-down? current-cursor) + (match current-cursor [(cursor (list fst-child _ ...) _ _ _) #t] [_ #f])) +(define (cursor-can-go-up? current-cursor) + (match current-cursor [(cursor _ _ parent _) #:when parent #t] [_ #f])) +(define (cursor-can-go-left? current-cursor) + (match current-cursor [(cursor _ (list left-sibling _ ...) _ _) #t] [_ #f])) +(define (cursor-can-go-right? current-cursor) + (match current-cursor [(cursor _ _ _ (list right-sibling _ ...)) #t] [_ #f])) + +(define (cursor-go-down current-cursor) + (match current-cursor + [(cursor (list fst-child others ...) _ _ _) + (cursor fst-child '() current-cursor others)] + [_ (error "Cursor can't move down!")])) +(define (cursor-go-up current-cursor) + (match current-cursor + [(cursor selected-node lefts parent rights) + (cursor (append (reverse lefts) (cons selected-node rights)) + (cursor-lefts parent) (cursor-parent parent) (cursor-rights parent))] + [_ (error "Cursor can't move up!")])) +(define (cursor-go-up-until-true cursor predicate?) + (cond + [(predicate? (cursor-selected-node cursor)) cursor] + [(cursor-can-go-up? cursor) (cursor-go-up-until-true (cursor-go-up cursor) predicate?)] + [else #f])) +(define (cursor-go-left current-cursor) + (match current-cursor + [(cursor selected-node (list fst-left-sibling others ...) parent rights) + (cursor fst-left-sibling others parent (cons selected-node rights))] + [_ (error "Cursor can't move left!")])) +(define (cursor-go-right current-cursor) + (match current-cursor + [(cursor selected-node lefts parent (list fst-right-sibling others ...)) + (cursor fst-right-sibling (cons selected-node lefts) parent others)] + [_ (error "Cursor can't move right!")])) +(define (cursor-go-to-next-sibling-or-uncle-node cursor doc-is-nested?) + (define (is-outside-of-blockquote? cursor) (is-blockquote-leftindent? (cursor-selected-node cursor))) + (cond + [(cursor-can-go-right? cursor) (cursor-go-right cursor)] + [(cursor-can-go-up? cursor) + (define parent-cursor (cursor-go-up cursor)) + (cond + ;; If currently parsed doc was `nested`, don't go outside of it - stop parsing here + [(and doc-is-nested? (is-outside-of-blockquote? parent-cursor)) #f] + [else (cursor-go-to-next-sibling-or-uncle-node parent-cursor doc-is-nested?)])] + [else #f])) + + +;; ------------------------------------------ +;; This functions are responsible for finding html nodes that constitute the selected element's documentation + +(define (find-doc-beginning-and-take-cursor doc-xexp anchor-name) + (define (find-node predicate? cursor) + (cond + [(predicate? (cursor-selected-node cursor)) cursor] + [(cursor-can-go-down? cursor) (find-node predicate? (cursor-go-down cursor))] + [(cursor-can-go-right? cursor) (find-node predicate? (cursor-go-right cursor))] + [(cursor-can-go-up? cursor) + (let loop ([cursor cursor]) + (cond + [(cursor-can-go-right? cursor) (find-node predicate? (cursor-go-right cursor))] + [(cursor-can-go-up? cursor) (loop (cursor-go-up cursor))] + [else #f] + ))] + [else #f])) + (define (find-doc-beginning cursor) + (or + (cursor-go-up-until-true cursor + (match-lambda [`(div (@ (class "SIntrapara")) ,_ ...) #t] [_ #f])) + ;; Just in case someone for some peculiar reason forgot to include
in the documentation + (cursor-go-up-until-true cursor + (match-lambda [`(blockquote (@ (class "SVInsetFlow")) ,_ ...) #t] [_ #f])))) + (define maybe-cursor (find-node + (λ (x) (equal? x (list 'name anchor-name))) + (make-cursor doc-xexp))) + (and maybe-cursor (find-doc-beginning maybe-cursor))) + +(define (selected-node-contains-documentation-boundary? cursor doc-is-nested?) + (define (find-boundary-node predicate? tree) + (cond + ;; If the selected docs weren't nested themselves, then we collect nested docs without checking for their boundaries + [(and (not doc-is-nested?) (is-blockquote-leftindent? tree)) #f] + ;; if there is a list, then we want (probably?) take it as a whole, no matter what boundaries it may have + [(tag-name? tree 'ul) #f] + [(predicate? tree) tree] + [(list? tree) (ormap (λ (x) (find-boundary-node predicate? x)) tree)] + [else #f])) + (find-boundary-node + (match-lambda + ;; beginning of docs for the next function, method, struct, or whatever + [`(div (@ (class "RBackgroundLabelInner")) + (p ,(or "class" "constructor" "interface" "method" "mixin" "parameter" "procedure" "signature" "struct" "syntax" "value"))) #t] + ;; end of a doc list ;; e.g.