From 13da7b166a583bf7555009c6801dda98e2055258 Mon Sep 17 00:00:00 2001 From: Fuwn Date: Tue, 23 Jul 2024 21:09:50 -0700 Subject: [PATCH] fix(html): use markdown safety --- Cargo.lock | 424 +++++++++++++++++++++++++++++++++++++++++++++++++--- Cargo.toml | 4 +- src/html.rs | 9 +- 3 files changed, 414 insertions(+), 23 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index aa5f1f8..d8496f1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -227,6 +227,55 @@ dependencies = [ "alloc-no-stdlib", ] +[[package]] +name = "anstream" +version = "0.6.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "418c75fa768af9c03be99d17643f93f79bbba589895012a80e3452a19ddda15b" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "038dfcf04a5feb68e9c60b21c9625a54c2c0616e79b72b0fd87075a056ae1d1b" + +[[package]] +name = "anstyle-parse" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c03a11a9034d92058ceb6ee011ce58af4a9bf61491aa7e1e59ecd24bd40d22d4" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad186efb764318d35165f1758e7dcef3b10628e26d41a44bc5550652e6804391" +dependencies = [ + "windows-sys 0.52.0", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61a38449feb7068f52bb06c12759005cf459ee52bb4adc1d5a7c4322d716fb19" +dependencies = [ + "anstyle", + "windows-sys 0.52.0", +] + [[package]] name = "anyhow" version = "1.0.86" @@ -251,6 +300,30 @@ version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" +[[package]] +name = "bincode" +version = "1.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" +dependencies = [ + "serde", +] + +[[package]] +name = "bit-set" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1" +dependencies = [ + "bit-vec", +] + +[[package]] +name = "bit-vec" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" + [[package]] name = "bitflags" version = "1.3.2" @@ -335,6 +408,16 @@ dependencies = [ "bytes", ] +[[package]] +name = "caseless" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "808dab3318747be122cb31d36de18d4d1c81277a76f8332a02b81a3d73463d7f" +dependencies = [ + "regex", + "unicode-normalization", +] + [[package]] name = "cc" version = "1.0.79" @@ -350,12 +433,80 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "clap" +version = "4.5.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f6b81fb3c84f5563d509c59b5a48d935f689e993afa90fe39047f05adef9142" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ca6706fd5224857d9ac5eb9355f6683563cc0541c7cd9d014043b57cbec78ac" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", + "terminal_size", +] + +[[package]] +name = "clap_derive" +version = "4.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2bac35c6dafb060fd4d275d9a4ffae97917c13a6327903a8be2153cd964f7085" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn 2.0.66", +] + +[[package]] +name = "clap_lex" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b82cf0babdbd58558212896d1a4272303a57bdb245c2bf1147185fb45640e70" + [[package]] name = "clru" version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b8191fa7302e03607ff0e237d4246cc043ff5b3cb9409d995172ba3bea16b807" +[[package]] +name = "colorchoice" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b6a852b24ab71dffc585bcb46eaf7959d175cb865a7152e35b348d1b2960422" + +[[package]] +name = "comrak" +version = "0.26.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "395ab67843c57df5a4ee29d610740828dbc928cc64ecf0f2a1d5cd0e98e107a9" +dependencies = [ + "caseless", + "clap", + "derive_builder", + "entities", + "memchr", + "once_cell", + "regex", + "shell-words", + "slug", + "syntect", + "typed-arena", + "unicode_categories", + "xdg", +] + [[package]] name = "convert_case" version = "0.4.0" @@ -401,6 +552,41 @@ dependencies = [ "typenum", ] +[[package]] +name = "darling" +version = "0.20.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f63b86c8a8826a49b8c21f08a2d07338eec8d900540f8630dc76284be802989" +dependencies = [ + "darling_core", + "darling_macro", +] + +[[package]] +name = "darling_core" +version = "0.20.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95133861a8032aaea082871032f5815eb9e98cef03fa916ab4500513994df9e5" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn 2.0.66", +] + +[[package]] +name = "darling_macro" +version = "0.20.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d336a2a514f6ccccaa3e09b02d41d35330c07ddf03a62165fcec10bb561c7806" +dependencies = [ + "darling_core", + "quote", + "syn 2.0.66", +] + [[package]] name = "deranged" version = "0.3.11" @@ -410,6 +596,37 @@ dependencies = [ "powerfmt", ] +[[package]] +name = "derive_builder" +version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0350b5cb0331628a5916d6c5c0b72e97393b8b6b03b47a9284f4e7f5a405ffd7" +dependencies = [ + "derive_builder_macro", +] + +[[package]] +name = "derive_builder_core" +version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d48cda787f839151732d396ac69e3473923d54312c070ee21e9effcaa8ca0b1d" +dependencies = [ + "darling", + "proc-macro2", + "quote", + "syn 2.0.66", +] + +[[package]] +name = "derive_builder_macro" +version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "206868b8242f27cecce124c19fd88157fbd0dd334df2587f36417bafbc85097b" +dependencies = [ + "derive_builder_core", + "syn 2.0.66", +] + [[package]] name = "derive_more" version = "0.99.17" @@ -423,6 +640,12 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "deunicode" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "339544cc9e2c4dc3fc7149fd630c5f22263a4fdf18a98afd0075784968b5cf00" + [[package]] name = "digest" version = "0.10.6" @@ -465,6 +688,12 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "entities" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5320ae4c3782150d900b79807611a59a99fc9a1d61d686faafc24b93fc8d7ca" + [[package]] name = "env_logger" version = "0.10.0" @@ -494,6 +723,16 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "fancy-regex" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b95f7c0680e4142284cf8b22c14a476e87d61b004a3a0861872b32ef7ead40a2" +dependencies = [ + "bit-set", + "regex", +] + [[package]] name = "faster-hex" version = "0.9.0" @@ -1141,6 +1380,12 @@ version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + [[package]] name = "hermit-abi" version = "0.2.6" @@ -1165,15 +1410,6 @@ dependencies = [ "windows-sys 0.48.0", ] -[[package]] -name = "html-escape" -version = "0.2.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d1ad449764d627e22bfd7cd5e8868264fc9236e07c752972b4080cd351cb476" -dependencies = [ - "utf8-width", -] - [[package]] name = "http" version = "0.2.9" @@ -1321,6 +1557,12 @@ dependencies = [ "syn 2.0.66", ] +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + [[package]] name = "idna" version = "1.0.0" @@ -1375,6 +1617,12 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "is_terminal_polyfill" +version = "1.70.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8478577c03552c21db0e2724ffb8986a5ce7af88107e6be5d2ee6e158c12800" + [[package]] name = "itoa" version = "1.0.6" @@ -1411,6 +1659,12 @@ version = "0.2.155" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" +[[package]] +name = "linked-hash-map" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f" + [[package]] name = "linux-raw-sys" version = "0.3.8" @@ -1541,9 +1795,31 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.17.1" +version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7e5500299e16ebb147ae15a00a942af264cf3688f47923b8fc2cd5858f23ad3" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" + +[[package]] +name = "onig" +version = "6.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c4b31c8722ad9171c6d77d3557db078cab2bd50afcc9d09c8b315c59df8ca4f" +dependencies = [ + "bitflags 1.3.2", + "libc", + "once_cell", + "onig_sys", +] + +[[package]] +name = "onig_sys" +version = "69.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b829e3d7e9cc74c7e315ee8edb185bf4190da5acde74afd7fc59c35b1f086e7" +dependencies = [ + "cc", + "pkg-config", +] [[package]] name = "parking_lot" @@ -1598,6 +1874,19 @@ version = "0.3.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "26072860ba924cbfa98ea39c8c19b4dd6a4a25423dbdf219c1eca91aa0cf6964" +[[package]] +name = "plist" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42cf17e9a1800f5f396bc67d193dc9411b59012a5876445ef450d449881e1016" +dependencies = [ + "base64", + "indexmap", + "quick-xml", + "serde", + "time", +] + [[package]] name = "powerfmt" version = "0.2.0" @@ -1635,6 +1924,15 @@ version = "28.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "744a264d26b88a6a7e37cbad97953fa233b94d585236310bcbc88474b4092d79" +[[package]] +name = "quick-xml" +version = "0.32.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d3a6e5838b60e0e8fa7a43f22ade549a37d61f8bdbe636d0d7816191de969c2" +dependencies = [ + "memchr", +] + [[package]] name = "quote" version = "1.0.36" @@ -1700,7 +1998,7 @@ checksum = "af83e617f331cc6ae2da5443c602dfa5af81e517212d9d611a5b3ba1777b5370" dependencies = [ "aho-corasick", "memchr", - "regex-syntax", + "regex-syntax 0.7.1", ] [[package]] @@ -1721,6 +2019,12 @@ version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a5996294f19bd3aae0453a862ad728f60e6600695733dd5df01da90c54363a3c" +[[package]] +name = "regex-syntax" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" + [[package]] name = "ring" version = "0.16.20" @@ -1843,9 +2147,9 @@ version = "0.2.27" dependencies = [ "actix-web", "anyhow", + "comrak", "dotenv", "germ", - "html-escape", "log", "pretty_env_logger", "url", @@ -1912,6 +2216,12 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ae1a47186c03a32177042e55dbc5fd5aee900b8e0069a8d70fba96a9375cd012" +[[package]] +name = "shell-words" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24188a676b6ae68c3b2cb3a01be17fbf7240ce009799bb56d5b1409051e78fde" + [[package]] name = "signal-hook" version = "0.3.15" @@ -1940,6 +2250,16 @@ dependencies = [ "autocfg", ] +[[package]] +name = "slug" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3bd94acec9c8da640005f8e135a39fc0372e74535e6b368b7a04b875f784c8c4" +dependencies = [ + "deunicode", + "wasm-bindgen", +] + [[package]] name = "smallvec" version = "1.13.2" @@ -1978,6 +2298,12 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + [[package]] name = "syn" version = "1.0.109" @@ -2011,6 +2337,29 @@ dependencies = [ "syn 2.0.66", ] +[[package]] +name = "syntect" +version = "5.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "874dcfa363995604333cf947ae9f751ca3af4522c60886774c4963943b4746b1" +dependencies = [ + "bincode", + "bitflags 1.3.2", + "fancy-regex", + "flate2", + "fnv", + "once_cell", + "onig", + "plist", + "regex-syntax 0.8.4", + "serde", + "serde_derive", + "serde_json", + "thiserror", + "walkdir", + "yaml-rust", +] + [[package]] name = "tempfile" version = "3.5.0" @@ -2033,6 +2382,16 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "terminal_size" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21bebf2b7c9e0a515f6e0f8c51dc0f8e4696391e6f1ff30379559f8365fb0df7" +dependencies = [ + "rustix 0.38.34", + "windows-sys 0.48.0", +] + [[package]] name = "thiserror" version = "1.0.40" @@ -2186,6 +2545,12 @@ dependencies = [ "once_cell", ] +[[package]] +name = "typed-arena" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6af6ae20167a9ece4bcb41af5b80f8a1f1df981f6391189ce00fd257af04126a" + [[package]] name = "typenum" version = "1.16.0" @@ -2213,6 +2578,12 @@ dependencies = [ "tinyvec", ] +[[package]] +name = "unicode_categories" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e" + [[package]] name = "untrusted" version = "0.7.1" @@ -2236,18 +2607,18 @@ version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246" -[[package]] -name = "utf8-width" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86bd8d4e895da8537e5315b8254664e6b769c4ff3db18321b297a1e7004392e3" - [[package]] name = "utf8_iter" version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + [[package]] name = "vergen" version = "8.3.1" @@ -2604,6 +2975,21 @@ version = "0.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51" +[[package]] +name = "xdg" +version = "2.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "213b7324336b53d2414b2db8537e56544d981803139155afa84f76eeebb7a546" + +[[package]] +name = "yaml-rust" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56c1936c4cc7a1c9ab21a1ebb602eb942ba868cbd44a99cb7cdc5892335e1c85" +dependencies = [ + "linked-hash-map", +] + [[package]] name = "yoke" version = "0.7.4" diff --git a/Cargo.toml b/Cargo.toml index 5cd07ff..e9e88b8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -36,8 +36,8 @@ dotenv = "0.15.0" # URL Standard url = "2.3.1" -# HTML Encoding -html-escape = "0.2.13" +# Markdown Encoding +comrak = "0.26.0" [build-dependencies] # Compile-time Environment Variables diff --git a/src/html.rs b/src/html.rs index 81012ed..e9d0b96 100644 --- a/src/html.rs +++ b/src/html.rs @@ -9,6 +9,12 @@ fn link_from_host_href(url: &Url, href: &str) -> Option { )) } +fn safe(text: &str) -> String { + comrak::markdown_to_html(text, &comrak::ComrakOptions::default()) + .replace("

", "") + .replace("

", "") +} + #[allow(clippy::too_many_lines, clippy::cognitive_complexity)] pub fn from_gemini( response: &germ::request::Response, @@ -21,7 +27,6 @@ pub fn from_gemini( let ast = ast_tree.inner(); let mut html = String::new(); let mut title = String::new(); - let safe = html_escape::encode_text; let mut previous_link = false; let condense_links = { let links = var("CONDENSE_LINKS").map_or_else( @@ -208,7 +213,7 @@ pub fn from_gemini( "", items .iter() - .map(|i| format!("
  • {i}
  • ")) + .map(|i| format!("
  • {}
  • ", safe(i))) .collect::>() .join("\n") )),