diff --git a/CHANGELOG.md b/CHANGELOG.md index c835d3739..aa6d4df10 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,8 @@ project adheres to [Semantic Versioning](http://semver.org/). (Unreleased) ================== ### Changed +* `ctx.font` has a new C++ parser and is 2x-400x faster. Please file an issue if you experience different results, as caching has been removed. + ### Added ### Fixed diff --git a/binding.gyp b/binding.gyp index 166842641..bf647f7d1 100644 --- a/binding.gyp +++ b/binding.gyp @@ -75,7 +75,8 @@ 'src/Image.cc', 'src/ImageData.cc', 'src/init.cc', - 'src/register_font.cc' + 'src/register_font.cc', + 'src/FontParser.cc' ], 'conditions': [ ['OS=="win"', { diff --git a/index.js b/index.js index 89f2daabc..adde4da12 100644 --- a/index.js +++ b/index.js @@ -2,7 +2,6 @@ const Canvas = require('./lib/canvas') const Image = require('./lib/image') const CanvasRenderingContext2D = require('./lib/context2d') const CanvasPattern = require('./lib/pattern') -const parseFont = require('./lib/parse-font') const packageJson = require('./package.json') const bindings = require('./lib/bindings') const fs = require('fs') @@ -12,7 +11,6 @@ const JPEGStream = require('./lib/jpegstream') const { DOMPoint, DOMMatrix } = require('./lib/DOMMatrix') bindings.setDOMMatrix(DOMMatrix) -bindings.setParseFont(parseFont) function createCanvas (width, height, type) { return new Canvas(width, height, type) @@ -73,7 +71,6 @@ exports.DOMPoint = DOMPoint exports.registerFont = registerFont exports.deregisterAllFonts = deregisterAllFonts -exports.parseFont = parseFont exports.createCanvas = createCanvas exports.createImageData = createImageData diff --git a/lib/parse-font.js b/lib/parse-font.js deleted file mode 100644 index a18f05e51..000000000 --- a/lib/parse-font.js +++ /dev/null @@ -1,110 +0,0 @@ -'use strict' - -/** - * Font RegExp helpers. - */ - -const weights = 'bold|bolder|lighter|[1-9]00' -const styles = 'italic|oblique' -const variants = 'small-caps' -const stretches = 'ultra-condensed|extra-condensed|condensed|semi-condensed|semi-expanded|expanded|extra-expanded|ultra-expanded' -const units = 'px|pt|pc|in|cm|mm|%|em|ex|ch|rem|q' -const string = /'((\\'|[^'])+)'|"((\\"|[^"])+)"|[\w\s-]+/.source - -// [ [ <‘font-style’> || || <‘font-weight’> || <‘font-stretch’> ]? -// <‘font-size’> [ / <‘line-height’> ]? <‘font-family’> ] -// https://drafts.csswg.org/css-fonts-3/#font-prop -const weightRe = new RegExp(`(${weights}) +`, 'i') -const styleRe = new RegExp(`(${styles}) +`, 'i') -const variantRe = new RegExp(`(${variants}) +`, 'i') -const stretchRe = new RegExp(`(${stretches}) +`, 'i') -const familyRe = new RegExp(string, 'g') -const unquoteRe = /^['"](.*)['"]$/ -const unescapeRe = /\\(['"])/g -const sizeFamilyRe = new RegExp( - `([\\d\\.]+)(${units}) *((?:${string})( *, *(?:${string}))*)`) - -/** - * Cache font parsing. - */ - -const cache = {} - -const defaultHeight = 16 // pt, common browser default - -/** - * Parse font `str`. - * - * @param {String} str - * @return {Object} Parsed font. `size` is in device units. `unit` is the unit - * appearing in the input string. - * @api private - */ - -module.exports = str => { - // Cached - if (cache[str]) return cache[str] - - // Try for required properties first. - const sizeFamily = sizeFamilyRe.exec(str) - if (!sizeFamily) return // invalid - - const names = sizeFamily[3] - .match(familyRe) - // remove actual bounding quotes, if any, unescape any remaining quotes inside - .map(s => s.trim().replace(unquoteRe, '$1').replace(unescapeRe, '$1')) - .filter(s => !!s) - - // Default values and required properties - const font = { - weight: 'normal', - style: 'normal', - stretch: 'normal', - variant: 'normal', - size: parseFloat(sizeFamily[1]), - unit: sizeFamily[2], - family: names.join(',') - } - - // Optional, unordered properties. - let weight, style, variant, stretch - // Stop search at `sizeFamily.index` - const substr = str.substring(0, sizeFamily.index) - if ((weight = weightRe.exec(substr))) font.weight = weight[1] - if ((style = styleRe.exec(substr))) font.style = style[1] - if ((variant = variantRe.exec(substr))) font.variant = variant[1] - if ((stretch = stretchRe.exec(substr))) font.stretch = stretch[1] - - // Convert to device units. (`font.unit` is the original unit) - // TODO: ch, ex - switch (font.unit) { - case 'pt': - font.size /= 0.75 - break - case 'pc': - font.size *= 16 - break - case 'in': - font.size *= 96 - break - case 'cm': - font.size *= 96.0 / 2.54 - break - case 'mm': - font.size *= 96.0 / 25.4 - break - case '%': - // TODO disabled because existing unit tests assume 100 - // font.size *= defaultHeight / 100 / 0.75 - break - case 'em': - case 'rem': - font.size *= defaultHeight / 0.75 - break - case 'q': - font.size *= 96 / 25.4 / 4 - break - } - - return (cache[str] = font) -} diff --git a/src/Canvas.cc b/src/Canvas.cc index 6ba312008..7b208bec2 100644 --- a/src/Canvas.cc +++ b/src/Canvas.cc @@ -21,6 +21,7 @@ #include "Util.h" #include #include "node_buffer.h" +#include "FontParser.h" #ifdef HAVE_JPEG #include "JPEGStream.h" @@ -68,7 +69,8 @@ Canvas::Initialize(Napi::Env& env, Napi::Object& exports) { StaticValue("PNG_FILTER_PAETH", Napi::Number::New(env, PNG_FILTER_PAETH), napi_default_jsproperty), StaticValue("PNG_ALL_FILTERS", Napi::Number::New(env, PNG_ALL_FILTERS), napi_default_jsproperty), StaticMethod<&Canvas::RegisterFont>("_registerFont", napi_default_method), - StaticMethod<&Canvas::DeregisterAllFonts>("_deregisterAllFonts", napi_default_method) + StaticMethod<&Canvas::DeregisterAllFonts>("_deregisterAllFonts", napi_default_method), + StaticMethod<&Canvas::ParseFont>("parseFont", napi_default_method) }); data->CanvasCtor = Napi::Persistent(ctor); @@ -694,6 +696,7 @@ Canvas::RegisterFont(const Napi::CallbackInfo& info) { // now check the attrs, there are many ways to be wrong Napi::Object js_user_desc = info[1].As(); + // TODO: use FontParser on these values just like the FontFace API works char *family = str_value(js_user_desc.Get("family"), NULL, false); char *weight = str_value(js_user_desc.Get("weight"), "normal", true); char *style = str_value(js_user_desc.Get("style"), "normal", false); @@ -749,6 +752,40 @@ Canvas::DeregisterAllFonts(const Napi::CallbackInfo& info) { if (!success) Napi::Error::New(env, "Could not deregister one or more fonts").ThrowAsJavaScriptException(); } +/* + * Do not use! This is only exported for testing + */ +Napi::Value +Canvas::ParseFont(const Napi::CallbackInfo& info) { + Napi::Env env = info.Env(); + + if (info.Length() != 1) return env.Undefined(); + + Napi::String str; + if (!info[0].ToString().UnwrapTo(&str)) return env.Undefined(); + + bool ok; + auto props = FontParser::parse(str, &ok); + if (!ok) return env.Undefined(); + + Napi::Object obj = Napi::Object::New(env); + obj.Set("size", Napi::Number::New(env, props.fontSize)); + Napi::Array families = Napi::Array::New(env); + obj.Set("families", families); + + unsigned int index = 0; + + for (auto& family : props.fontFamily) { + families[index++] = Napi::String::New(env, family); + } + + obj.Set("weight", Napi::Number::New(env, props.fontWeight)); + obj.Set("variant", Napi::Number::New(env, static_cast(props.fontVariant))); + obj.Set("style", Napi::Number::New(env, static_cast(props.fontStyle))); + + return obj; +} + /* * Get a PangoStyle from a CSS string (like "italic") */ diff --git a/src/Canvas.h b/src/Canvas.h index 5f35b356b..5b039539a 100644 --- a/src/Canvas.h +++ b/src/Canvas.h @@ -68,6 +68,7 @@ class Canvas : public Napi::ObjectWrap { void StreamJPEGSync(const Napi::CallbackInfo& info); static void RegisterFont(const Napi::CallbackInfo& info); static void DeregisterAllFonts(const Napi::CallbackInfo& info); + static Napi::Value ParseFont(const Napi::CallbackInfo& info); Napi::Error CairoError(cairo_status_t status); static void ToPngBufferAsync(Closure* closure); static void ToJpegBufferAsync(Closure* closure); diff --git a/src/CanvasRenderingContext2d.cc b/src/CanvasRenderingContext2d.cc index d0966e299..1597d089a 100644 --- a/src/CanvasRenderingContext2d.cc +++ b/src/CanvasRenderingContext2d.cc @@ -9,6 +9,7 @@ #include "CanvasGradient.h" #include "CanvasPattern.h" #include "InstanceData.h" +#include "FontParser.h" #include #include #include "Image.h" @@ -2575,34 +2576,29 @@ Context2d::GetFont(const Napi::CallbackInfo& info) { void Context2d::SetFont(const Napi::CallbackInfo& info, const Napi::Value& value) { - InstanceData* data = env.GetInstanceData(); - if (!value.IsString()) return; - if (!value.As().Utf8Value().length()) return; - - Napi::Value mparsed; + std::string str = value.As().Utf8Value(); + if (!str.length()) return; - // parseFont returns undefined for invalid CSS font strings - if (!data->parseFont.Call({ value }).UnwrapTo(&mparsed) || mparsed.IsUndefined()) return; - - Napi::Object font = mparsed.As(); - - Napi::String empty = Napi::String::New(env, ""); - Napi::Number zero = Napi::Number::New(env, 0); - - std::string weight = font.Get("weight").UnwrapOr(empty).ToString().UnwrapOr(empty).Utf8Value(); - std::string style = font.Get("style").UnwrapOr(empty).ToString().UnwrapOr(empty).Utf8Value(); - double size = font.Get("size").UnwrapOr(zero).ToNumber().UnwrapOr(zero).DoubleValue(); - std::string unit = font.Get("unit").UnwrapOr(empty).ToString().UnwrapOr(empty).Utf8Value(); - std::string family = font.Get("family").UnwrapOr(empty).ToString().UnwrapOr(empty).Utf8Value(); + bool success; + auto props = FontParser::parse(str, &success); + if (!success) return; PangoFontDescription *desc = pango_font_description_copy(state->fontDescription); pango_font_description_free(state->fontDescription); - pango_font_description_set_style(desc, Canvas::GetStyleFromCSSString(style.c_str())); - pango_font_description_set_weight(desc, Canvas::GetWeightFromCSSString(weight.c_str())); + PangoStyle style = props.fontStyle == FontStyle::Italic ? PANGO_STYLE_ITALIC + : props.fontStyle == FontStyle::Oblique ? PANGO_STYLE_OBLIQUE + : PANGO_STYLE_NORMAL; + pango_font_description_set_style(desc, style); + pango_font_description_set_weight(desc, static_cast(props.fontWeight)); + + std::string family = props.fontFamily.empty() ? "" : props.fontFamily[0]; + for (size_t i = 1; i < props.fontFamily.size(); i++) { + family += "," + props.fontFamily[i]; + } if (family.length() > 0) { // See #1643 - Pango understands "sans" whereas CSS uses "sans-serif" std::string s1(family); @@ -2617,12 +2613,12 @@ Context2d::SetFont(const Napi::CallbackInfo& info, const Napi::Value& value) { PangoFontDescription *sys_desc = Canvas::ResolveFontDescription(desc); pango_font_description_free(desc); - if (size > 0) pango_font_description_set_absolute_size(sys_desc, size * PANGO_SCALE); + if (props.fontSize > 0) pango_font_description_set_absolute_size(sys_desc, props.fontSize * PANGO_SCALE); state->fontDescription = sys_desc; pango_layout_set_font_description(_layout, sys_desc); - state->font = value.As().Utf8Value().c_str(); + state->font = str; } /* diff --git a/src/CharData.h b/src/CharData.h new file mode 100644 index 000000000..ebc2dd5e1 --- /dev/null +++ b/src/CharData.h @@ -0,0 +1,231 @@ +// This is used for classifying characters according to the definition of tokens +// in the CSS standards, but could be extended for any other future uses + +#pragma once + +namespace CharData { + static constexpr uint8_t Whitespace = 0x1; + static constexpr uint8_t Newline = 0x2; + static constexpr uint8_t Hex = 0x4; + static constexpr uint8_t Nmstart = 0x8; + static constexpr uint8_t Nmchar = 0x10; + static constexpr uint8_t Sign = 0x20; + static constexpr uint8_t Digit = 0x40; + static constexpr uint8_t NumStart = 0x80; +}; + +using namespace CharData; + +constexpr const uint8_t charData[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0-8 + Whitespace, // 9 (HT) + Whitespace | Newline, // 10 (LF) + 0, // 11 (VT) + Whitespace | Newline, // 12 (FF) + Whitespace | Newline, // 13 (CR) + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 14-31 + Whitespace, // 32 (Space) + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 33-42 + Sign | NumStart, // 43 (+) + 0, // 44 + Nmchar | Sign | NumStart, // 45 (-) + 0, 0, // 46-47 + Nmchar | Digit | NumStart | Hex, // 48 (0) + Nmchar | Digit | NumStart | Hex, // 49 (1) + Nmchar | Digit | NumStart | Hex, // 50 (2) + Nmchar | Digit | NumStart | Hex, // 51 (3) + Nmchar | Digit | NumStart | Hex, // 52 (4) + Nmchar | Digit | NumStart | Hex, // 53 (5) + Nmchar | Digit | NumStart | Hex, // 54 (6) + Nmchar | Digit | NumStart | Hex, // 55 (7) + Nmchar | Digit | NumStart | Hex, // 56 (8) + Nmchar | Digit | NumStart | Hex, // 57 (9) + 0, 0, 0, 0, 0, 0, 0, // 58-64 + Nmstart | Nmchar | Hex, // 65 (A) + Nmstart | Nmchar | Hex, // 66 (B) + Nmstart | Nmchar | Hex, // 67 (C) + Nmstart | Nmchar | Hex, // 68 (D) + Nmstart | Nmchar | Hex, // 69 (E) + Nmstart | Nmchar | Hex, // 70 (F) + Nmstart | Nmchar, // 71 (G) + Nmstart | Nmchar, // 72 (H) + Nmstart | Nmchar, // 73 (I) + Nmstart | Nmchar, // 74 (J) + Nmstart | Nmchar, // 75 (K) + Nmstart | Nmchar, // 76 (L) + Nmstart | Nmchar, // 77 (M) + Nmstart | Nmchar, // 78 (N) + Nmstart | Nmchar, // 79 (O) + Nmstart | Nmchar, // 80 (P) + Nmstart | Nmchar, // 81 (Q) + Nmstart | Nmchar, // 82 (R) + Nmstart | Nmchar, // 83 (S) + Nmstart | Nmchar, // 84 (T) + Nmstart | Nmchar, // 85 (U) + Nmstart | Nmchar, // 86 (V) + Nmstart | Nmchar, // 87 (W) + Nmstart | Nmchar, // 88 (X) + Nmstart | Nmchar, // 89 (Y) + Nmstart | Nmchar, // 90 (Z) + 0, // 91 + Nmstart, // 92 (\) + 0, 0, // 93-94 + Nmstart | Nmchar, // 95 (_) + 0, // 96 + Nmstart | Nmchar | Hex, // 97 (a) + Nmstart | Nmchar | Hex, // 98 (b) + Nmstart | Nmchar | Hex, // 99 (c) + Nmstart | Nmchar | Hex, // 100 (d) + Nmstart | Nmchar | Hex, // 101 (e) + Nmstart | Nmchar | Hex, // 102 (f) + Nmstart | Nmchar, // 103 (g) + Nmstart | Nmchar, // 104 (h) + Nmstart | Nmchar, // 105 (i) + Nmstart | Nmchar, // 106 (j) + Nmstart | Nmchar, // 107 (k) + Nmstart | Nmchar, // 108 (l) + Nmstart | Nmchar, // 109 (m) + Nmstart | Nmchar, // 110 (n) + Nmstart | Nmchar, // 111 (o) + Nmstart | Nmchar, // 112 (p) + Nmstart | Nmchar, // 113 (q) + Nmstart | Nmchar, // 114 (r) + Nmstart | Nmchar, // 115 (s) + Nmstart | Nmchar, // 116 (t) + Nmstart | Nmchar, // 117 (u) + Nmstart | Nmchar, // 118 (v) + Nmstart | Nmchar, // 119 (w) + Nmstart | Nmchar, // 120 (x) + Nmstart | Nmchar, // 121 (y) + Nmstart | Nmchar, // 122 (z) + 0, 0, 0, 0, 0, // 123-127 + // Non-ASCII + Nmstart | Nmchar, // 128 + Nmstart | Nmchar, // 129 + Nmstart | Nmchar, // 130 + Nmstart | Nmchar, // 131 + Nmstart | Nmchar, // 132 + Nmstart | Nmchar, // 133 + Nmstart | Nmchar, // 134 + Nmstart | Nmchar, // 135 + Nmstart | Nmchar, // 136 + Nmstart | Nmchar, // 137 + Nmstart | Nmchar, // 138 + Nmstart | Nmchar, // 139 + Nmstart | Nmchar, // 140 + Nmstart | Nmchar, // 141 + Nmstart | Nmchar, // 142 + Nmstart | Nmchar, // 143 + Nmstart | Nmchar, // 144 + Nmstart | Nmchar, // 145 + Nmstart | Nmchar, // 146 + Nmstart | Nmchar, // 147 + Nmstart | Nmchar, // 148 + Nmstart | Nmchar, // 149 + Nmstart | Nmchar, // 150 + Nmstart | Nmchar, // 151 + Nmstart | Nmchar, // 152 + Nmstart | Nmchar, // 153 + Nmstart | Nmchar, // 154 + Nmstart | Nmchar, // 155 + Nmstart | Nmchar, // 156 + Nmstart | Nmchar, // 157 + Nmstart | Nmchar, // 158 + Nmstart | Nmchar, // 159 + Nmstart | Nmchar, // 160 + Nmstart | Nmchar, // 161 + Nmstart | Nmchar, // 162 + Nmstart | Nmchar, // 163 + Nmstart | Nmchar, // 164 + Nmstart | Nmchar, // 165 + Nmstart | Nmchar, // 166 + Nmstart | Nmchar, // 167 + Nmstart | Nmchar, // 168 + Nmstart | Nmchar, // 169 + Nmstart | Nmchar, // 170 + Nmstart | Nmchar, // 171 + Nmstart | Nmchar, // 172 + Nmstart | Nmchar, // 173 + Nmstart | Nmchar, // 174 + Nmstart | Nmchar, // 175 + Nmstart | Nmchar, // 176 + Nmstart | Nmchar, // 177 + Nmstart | Nmchar, // 178 + Nmstart | Nmchar, // 179 + Nmstart | Nmchar, // 180 + Nmstart | Nmchar, // 181 + Nmstart | Nmchar, // 182 + Nmstart | Nmchar, // 183 + Nmstart | Nmchar, // 184 + Nmstart | Nmchar, // 185 + Nmstart | Nmchar, // 186 + Nmstart | Nmchar, // 187 + Nmstart | Nmchar, // 188 + Nmstart | Nmchar, // 189 + Nmstart | Nmchar, // 190 + Nmstart | Nmchar, // 191 + Nmstart | Nmchar, // 192 + Nmstart | Nmchar, // 193 + Nmstart | Nmchar, // 194 + Nmstart | Nmchar, // 195 + Nmstart | Nmchar, // 196 + Nmstart | Nmchar, // 197 + Nmstart | Nmchar, // 198 + Nmstart | Nmchar, // 199 + Nmstart | Nmchar, // 200 + Nmstart | Nmchar, // 201 + Nmstart | Nmchar, // 202 + Nmstart | Nmchar, // 203 + Nmstart | Nmchar, // 204 + Nmstart | Nmchar, // 205 + Nmstart | Nmchar, // 206 + Nmstart | Nmchar, // 207 + Nmstart | Nmchar, // 208 + Nmstart | Nmchar, // 209 + Nmstart | Nmchar, // 210 + Nmstart | Nmchar, // 211 + Nmstart | Nmchar, // 212 + Nmstart | Nmchar, // 213 + Nmstart | Nmchar, // 214 + Nmstart | Nmchar, // 215 + Nmstart | Nmchar, // 216 + Nmstart | Nmchar, // 217 + Nmstart | Nmchar, // 218 + Nmstart | Nmchar, // 219 + Nmstart | Nmchar, // 220 + Nmstart | Nmchar, // 221 + Nmstart | Nmchar, // 222 + Nmstart | Nmchar, // 223 + Nmstart | Nmchar, // 224 + Nmstart | Nmchar, // 225 + Nmstart | Nmchar, // 226 + Nmstart | Nmchar, // 227 + Nmstart | Nmchar, // 228 + Nmstart | Nmchar, // 229 + Nmstart | Nmchar, // 230 + Nmstart | Nmchar, // 231 + Nmstart | Nmchar, // 232 + Nmstart | Nmchar, // 233 + Nmstart | Nmchar, // 234 + Nmstart | Nmchar, // 235 + Nmstart | Nmchar, // 236 + Nmstart | Nmchar, // 237 + Nmstart | Nmchar, // 238 + Nmstart | Nmchar, // 239 + Nmstart | Nmchar, // 240 + Nmstart | Nmchar, // 241 + Nmstart | Nmchar, // 242 + Nmstart | Nmchar, // 243 + Nmstart | Nmchar, // 244 + Nmstart | Nmchar, // 245 + Nmstart | Nmchar, // 246 + Nmstart | Nmchar, // 247 + Nmstart | Nmchar, // 248 + Nmstart | Nmchar, // 249 + Nmstart | Nmchar, // 250 + Nmstart | Nmchar, // 251 + Nmstart | Nmchar, // 252 + Nmstart | Nmchar, // 253 + Nmstart | Nmchar, // 254 + Nmstart | Nmchar // 255 +}; diff --git a/src/FontParser.cc b/src/FontParser.cc new file mode 100644 index 000000000..773502cb3 --- /dev/null +++ b/src/FontParser.cc @@ -0,0 +1,605 @@ +// This is written to exactly parse the `font` shorthand in CSS2: +// https://www.w3.org/TR/CSS22/fonts.html#font-shorthand +// https://www.w3.org/TR/CSS22/syndata.html#tokenization +// +// We may want to update it for CSS 3 (e.g. font-stretch, or updated +// tokenization) but I've only ever seen one or two issues filed in node-canvas +// due to parsing in my 8 years on the project + +#include "FontParser.h" +#include "CharData.h" +#include +#include + +Token::Token(Type type, std::string value) : type_(type), value_(std::move(value)) {} + +Token::Token(Type type, double value) : type_(type), value_(value) {} + +Token::Token(Type type) : type_(type), value_(std::string{}) {} + +const std::string& +Token::getString() const { + static const std::string empty; + auto* str = std::get_if(&value_); + return str ? *str : empty; +} + +double +Token::getNumber() const { + auto* num = std::get_if(&value_); + return num ? *num : 0.0f; +} + +Tokenizer::Tokenizer(std::string_view input) : input_(input) {} + +std::string +Tokenizer::utf8Encode(uint32_t codepoint) { + std::string result; + + if (codepoint < 0x80) { + result += static_cast(codepoint); + } else if (codepoint < 0x800) { + result += static_cast((codepoint >> 6) | 0xc0); + result += static_cast((codepoint & 0x3f) | 0x80); + } else if (codepoint < 0x10000) { + result += static_cast((codepoint >> 12) | 0xe0); + result += static_cast(((codepoint >> 6) & 0x3f) | 0x80); + result += static_cast((codepoint & 0x3f) | 0x80); + } else { + result += static_cast((codepoint >> 18) | 0xf0); + result += static_cast(((codepoint >> 12) & 0x3f) | 0x80); + result += static_cast(((codepoint >> 6) & 0x3f) | 0x80); + result += static_cast((codepoint & 0x3f) | 0x80); + } + + return result; +} + +char +Tokenizer::peek() const { + return position_ < input_.length() ? input_[position_] : '\0'; +} + +char +Tokenizer::advance() { + return position_ < input_.length() ? input_[position_++] : '\0'; +} + +Token +Tokenizer::parseNumber() { + enum class State { + Start, + AfterSign, + Digits, + AfterDecimal, + AfterE, + AfterESign, + ExponentDigits + }; + + size_t start = position_; + size_t ePosition = 0; + State state = State::Start; + bool valid = false; + + while (position_ < input_.length()) { + char c = peek(); + uint8_t flags = charData[static_cast(c)]; + + switch (state) { + case State::Start: + if (flags & CharData::Sign) { + position_++; + state = State::AfterSign; + } else if (flags & CharData::Digit) { + position_++; + state = State::Digits; + valid = true; + } else if (c == '.') { + position_++; + state = State::AfterDecimal; + } else { + goto done; + } + break; + + case State::AfterSign: + if (flags & CharData::Digit) { + position_++; + state = State::Digits; + valid = true; + } else if (c == '.') { + position_++; + state = State::AfterDecimal; + } else { + goto done; + } + break; + + case State::Digits: + if (flags & CharData::Digit) { + position_++; + } else if (c == '.') { + position_++; + state = State::AfterDecimal; + } else if (c == 'e' || c == 'E') { + ePosition = position_; + position_++; + state = State::AfterE; + valid = false; + } else { + goto done; + } + break; + + case State::AfterDecimal: + if (flags & CharData::Digit) { + position_++; + valid = true; + state = State::Digits; + } else { + goto done; + } + break; + + case State::AfterE: + if (flags & CharData::Sign) { + position_++; + state = State::AfterESign; + } else if (flags & CharData::Digit) { + position_++; + valid = true; + state = State::ExponentDigits; + } else { + position_ = ePosition; + valid = true; + goto done; + } + break; + + case State::AfterESign: + if (flags & CharData::Digit) { + position_++; + valid = true; + state = State::ExponentDigits; + } else { + position_ = ePosition; + valid = true; + goto done; + } + break; + + case State::ExponentDigits: + if (flags & CharData::Digit) { + position_++; + } else { + goto done; + } + break; + } + } + +done: + if (!valid) { + position_ = start; + return Token(Token::Type::Invalid); + } + + std::string number_str(input_.substr(start, position_ - start)); + double value = std::stod(number_str); + return Token(Token::Type::Number, value); +} + +// Note that identifiers are always lower-case. This helps us make easier/more +// efficient comparisons, but means that font-families specified as identifiers +// will be lower-cased. Since font selection isn't case sensitive, this +// shouldn't ever be a problem. +Token +Tokenizer::parseIdentifier() { + std::string identifier; + auto flags = CharData::Nmstart; + auto start = position_; + + while (position_ < input_.length()) { + char c = peek(); + + if (c == '\\') { + advance(); + if (!parseEscape(identifier)) { + position_ = start; + return Token(Token::Type::Invalid); + } + flags = CharData::Nmchar; + } else if (charData[static_cast(c)] & flags) { + identifier += advance() + (c >= 'A' && c <= 'Z' ? 32 : 0); + flags = CharData::Nmchar; + } else { + break; + } + } + + return Token(Token::Type::Identifier, identifier); +} + +uint32_t +Tokenizer::parseUnicode() { + uint32_t value = 0; + size_t count = 0; + + while (position_ < input_.length() && count < 6) { + char c = peek(); + uint32_t digit; + + if (c >= '0' && c <= '9') { + digit = c - '0'; + } else if (c >= 'a' && c <= 'f') { + digit = c - 'a' + 10; + } else if (c >= 'A' && c <= 'F') { + digit = c - 'A' + 10; + } else { + break; + } + + value = value * 16 + digit; + advance(); + count++; + } + + // Optional whitespace after hex escape + char c = peek(); + if (c == '\r') { + advance(); + if (peek() == '\n') advance(); + } else if (isWhitespace(c)) { + advance(); + } + + return value; +} + +bool +Tokenizer::parseEscape(std::string& str) { + char c = peek(); + auto flags = charData[static_cast(c)]; + + if (flags & CharData::Hex) { + str += utf8Encode(parseUnicode()); + return true; + } else if (!(flags & CharData::Newline) && !(flags & CharData::Hex)) { + str += advance(); + return true; + } + + return false; +} + +Token +Tokenizer::parseString(char quote) { + advance(); + std::string value; + auto start = position_; + + while (position_ < input_.length()) { + char c = peek(); + + if (c == quote) { + advance(); + return Token(Token::Type::QuotedString, value); + } else if (c == '\\') { + advance(); + c = peek(); + if (c == '\r') { + advance(); + if (peek() == '\n') advance(); + } else if (isNewline(c)) { + advance(); + } else { + if (!parseEscape(value)) { + position_ = start; + return Token(Token::Type::Invalid); + } + } + } else { + value += advance(); + } + } + + position_ = start; + return Token(Token::Type::Invalid); +} + +Token +Tokenizer::nextToken() { + if (position_ >= input_.length()) { + return Token(Token::Type::EndOfInput); + } + + char c = peek(); + auto flags = charData[static_cast(c)]; + + if (isWhitespace(c)) { + std::string whitespace; + while (position_ < input_.length() && isWhitespace(peek())) { + whitespace += advance(); + } + return Token(Token::Type::Whitespace, whitespace); + } + + if (flags & CharData::NumStart) { + Token token = parseNumber(); + if (token.type() != Token::Type::Invalid) return token; + } + + if (flags & CharData::Nmstart) { + Token token = parseIdentifier(); + if (token.type() != Token::Type::Invalid) return token; + } + + if (c == '"') { + Token token = parseString('"'); + if (token.type() != Token::Type::Invalid) return token; + } + + if (c == '\'') { + Token token = parseString('\''); + if (token.type() != Token::Type::Invalid) return token; + } + + switch (advance()) { + case '/': return Token(Token::Type::Slash); + case ',': return Token(Token::Type::Comma); + case '%': return Token(Token::Type::Percent); + default: return Token(Token::Type::Invalid); + } +} + +FontParser::FontParser(std::string_view input) + : tokenizer_(input) + , currentToken_(tokenizer_.nextToken()) + , nextToken_(tokenizer_.nextToken()) {} + +const std::unordered_map FontParser::weightMap = { + {"normal", 400}, + {"bold", 700}, + {"lighter", 100}, + {"bolder", 700} +}; + +const std::unordered_map FontParser::unitMap = { + {"cm", 37.8f}, + {"mm", 3.78f}, + {"in", 96.0f}, + {"pt", 96.0f / 72.0f}, + {"pc", 96.0f / 6.0f}, + {"em", 16.0f}, + {"px", 1.0f} +}; + +void +FontParser::advance() { + currentToken_ = nextToken_; + nextToken_ = tokenizer_.nextToken(); +} + +void +FontParser::skipWs() { + while (currentToken_.type() == Token::Type::Whitespace) advance(); +} + +bool +FontParser::check(Token::Type type) const { + return currentToken_.type() == type; +} + +bool +FontParser::checkWs() const { + return nextToken_.type() == Token::Type::Whitespace + || nextToken_.type() == Token::Type::EndOfInput; +} + +bool +FontParser::parseFontStyle(FontProperties& props) { + if (check(Token::Type::Identifier)) { + const auto& value = currentToken_.getString(); + if (value == "italic") { + props.fontStyle = FontStyle::Italic; + advance(); + return true; + } else if (value == "oblique") { + props.fontStyle = FontStyle::Oblique; + advance(); + return true; + } else if (value == "normal") { + props.fontStyle = FontStyle::Normal; + advance(); + return true; + } + } + + return false; +} + +bool +FontParser::parseFontVariant(FontProperties& props) { + if (check(Token::Type::Identifier)) { + const auto& value = currentToken_.getString(); + if (value == "small-caps") { + props.fontVariant = FontVariant::SmallCaps; + advance(); + return true; + } else if (value == "normal") { + props.fontVariant = FontVariant::Normal; + advance(); + return true; + } + } + + return false; +} + +bool +FontParser::parseFontWeight(FontProperties& props) { + if (check(Token::Type::Number)) { + double weightFloat = currentToken_.getNumber(); + int weight = static_cast(weightFloat); + if (weight < 1 || weight > 1000) return false; + props.fontWeight = static_cast(weight); + advance(); + return true; + } else if (check(Token::Type::Identifier)) { + const auto& value = currentToken_.getString(); + + if (auto it = weightMap.find(value); it != weightMap.end()) { + props.fontWeight = it->second; + advance(); + return true; + } + } + + return false; +} + +bool +FontParser::parseFontSize(FontProperties& props) { + if (!check(Token::Type::Number)) return false; + + props.fontSize = currentToken_.getNumber(); + advance(); + + double multiplier = 1.0f; + if (check(Token::Type::Identifier)) { + const auto& unit = currentToken_.getString(); + + if (auto it = unitMap.find(unit); it != unitMap.end()) { + multiplier = it->second; + advance(); + } else { + return false; + } + } else if (check(Token::Type::Percent)) { + multiplier = 16.0f / 100.0f; + advance(); + } else { + return false; + } + + // Technically if we consumed some tokens but couldn't parse the font-size, + // we should rewind the tokenizer, but I don't think the grammar allows for + // any valid alternates in this specific case + + props.fontSize *= multiplier; + return true; +} + +// line-height is not used by canvas ever, but should still parse +bool +FontParser::parseLineHeight(FontProperties& props) { + if (check(Token::Type::Slash)) { + advance(); + skipWs(); + if (check(Token::Type::Number)) { + advance(); + if (check(Token::Type::Percent)) { + advance(); + } else if (check(Token::Type::Identifier)) { + auto identifier = currentToken_.getString(); + if (auto it = unitMap.find(identifier); it != unitMap.end()) { + advance(); + } else { + return false; + } + } else { + return false; + } + } else if (check(Token::Type::Identifier) && currentToken_.getString() == "normal") { + advance(); + } else { + return false; + } + } + + return true; +} + +bool +FontParser::parseFontFamily(FontProperties& props) { + while (!check(Token::Type::EndOfInput)) { + std::string family = ""; + std::string trailingWs = ""; + bool found = false; + + while ( + check(Token::Type::QuotedString) || + check(Token::Type::Identifier) || + check(Token::Type::Whitespace) + ) { + if (check(Token::Type::Whitespace)) { + if (found) trailingWs += currentToken_.getString(); + } else { // Identifier, QuotedString + if (found) { + family += trailingWs; + trailingWs.clear(); + } + + family += currentToken_.getString(); + found = true; + } + + advance(); + } + + if (!found) return false; // only whitespace or non-id/string found + + props.fontFamily.push_back(family); + + if (check(Token::Type::Comma)) advance(); + } + + return true; +} + +FontProperties +FontParser::parse(const std::string& fontString, bool* success) { + FontParser parser(fontString); + auto result = parser.parseFont(); + if (success) *success = !parser.hasError_; + return result; +} + +FontProperties +FontParser::parseFont() { + FontProperties props; + uint8_t state = 0b111; + + skipWs(); + + for (size_t i = 0; i < 3 && checkWs(); i++) { + if ((state & 0b001) && parseFontStyle(props)) { + state &= 0b110; + goto match; + } + + if ((state & 0b010) && parseFontVariant(props)) { + state &= 0b101; + goto match; + } + + if ((state & 0b100) && parseFontWeight(props)) { + state &= 0b011; + goto match; + } + + break; // all attempts exhausted + match: skipWs(); // success: move to the next non-ws token + } + + if (parseFontSize(props)) { + skipWs(); + if (parseLineHeight(props) && parseFontFamily(props)) { + return props; + } + } + + hasError_ = true; + return props; +} diff --git a/src/FontParser.h b/src/FontParser.h new file mode 100644 index 000000000..c88802109 --- /dev/null +++ b/src/FontParser.h @@ -0,0 +1,115 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include "CharData.h" + +enum class FontStyle { + Normal, + Italic, + Oblique +}; + +enum class FontVariant { + Normal, + SmallCaps +}; + +struct FontProperties { + double fontSize{16.0f}; + std::vector fontFamily; + uint16_t fontWeight{400}; + FontVariant fontVariant{FontVariant::Normal}; + FontStyle fontStyle{FontStyle::Normal}; +}; + +class Token { + public: + enum class Type { + Invalid, + Number, + Percent, + Identifier, + Slash, + Comma, + QuotedString, + Whitespace, + EndOfInput + }; + + Token(Type type, std::string value); + Token(Type type, double value); + Token(Type type); + + Type type() const { return type_; } + + const std::string& getString() const; + double getNumber() const; + + private: + Type type_; + std::variant value_; +}; + +class Tokenizer { + public: + Tokenizer(std::string_view input); + Token nextToken(); + + private: + std::string_view input_; + size_t position_{0}; + + // Util + std::string utf8Encode(uint32_t codepoint); + inline bool isWhitespace(char c) const { + return charData[static_cast(c)] & CharData::Whitespace; + } + inline bool isNewline(char c) const { + return charData[static_cast(c)] & CharData::Newline; + } + + // Moving through the string + char peek() const; + char advance(); + + // Tokenize + Token parseNumber(); + Token parseIdentifier(); + uint32_t parseUnicode(); + bool parseEscape(std::string& str); + Token parseString(char quote); +}; + +class FontParser { + public: + static FontProperties parse(const std::string& fontString, bool* success = nullptr); + + private: + static const std::unordered_map weightMap; + static const std::unordered_map unitMap; + + FontParser(std::string_view input); + + void advance(); + void skipWs(); + bool check(Token::Type type) const; + bool checkWs() const; + + bool parseFontStyle(FontProperties& props); + bool parseFontVariant(FontProperties& props); + bool parseFontWeight(FontProperties& props); + bool parseFontSize(FontProperties& props); + bool parseLineHeight(FontProperties& props); + bool parseFontFamily(FontProperties& props); + FontProperties parseFont(); + + Tokenizer tokenizer_; + Token currentToken_; + Token nextToken_; + bool hasError_{false}; +}; diff --git a/test/canvas.test.js b/test/canvas.test.js index 1a75ac031..75f15ed5a 100644 --- a/test/canvas.test.js +++ b/test/canvas.test.js @@ -14,7 +14,6 @@ const { createCanvas, createImageData, loadImage, - parseFont, registerFont, Canvas, deregisterAllFonts @@ -37,78 +36,6 @@ describe('Canvas', function () { assert('width' in Canvas.prototype) }) - it('.parseFont()', function () { - const tests = [ - '20px Arial', - { size: 20, unit: 'px', family: 'Arial' }, - '20pt Arial', - { size: 26.666666666666668, unit: 'pt', family: 'Arial' }, - '20.5pt Arial', - { size: 27.333333333333332, unit: 'pt', family: 'Arial' }, - '20% Arial', - { size: 20, unit: '%', family: 'Arial' }, // TODO I think this is a bad assertion - ZB 23-Jul-2017 - '20mm Arial', - { size: 75.59055118110237, unit: 'mm', family: 'Arial' }, - '20px serif', - { size: 20, unit: 'px', family: 'serif' }, - '20px sans-serif', - { size: 20, unit: 'px', family: 'sans-serif' }, - '20px monospace', - { size: 20, unit: 'px', family: 'monospace' }, - '50px Arial, sans-serif', - { size: 50, unit: 'px', family: 'Arial,sans-serif' }, - 'bold italic 50px Arial, sans-serif', - { style: 'italic', weight: 'bold', size: 50, unit: 'px', family: 'Arial,sans-serif' }, - '50px Helvetica , Arial, sans-serif', - { size: 50, unit: 'px', family: 'Helvetica,Arial,sans-serif' }, - '50px "Helvetica Neue", sans-serif', - { size: 50, unit: 'px', family: 'Helvetica Neue,sans-serif' }, - '50px "Helvetica Neue", "foo bar baz" , sans-serif', - { size: 50, unit: 'px', family: 'Helvetica Neue,foo bar baz,sans-serif' }, - "50px 'Helvetica Neue'", - { size: 50, unit: 'px', family: 'Helvetica Neue' }, - 'italic 20px Arial', - { size: 20, unit: 'px', style: 'italic', family: 'Arial' }, - 'oblique 20px Arial', - { size: 20, unit: 'px', style: 'oblique', family: 'Arial' }, - 'normal 20px Arial', - { size: 20, unit: 'px', style: 'normal', family: 'Arial' }, - '300 20px Arial', - { size: 20, unit: 'px', weight: '300', family: 'Arial' }, - '800 20px Arial', - { size: 20, unit: 'px', weight: '800', family: 'Arial' }, - 'bolder 20px Arial', - { size: 20, unit: 'px', weight: 'bolder', family: 'Arial' }, - 'lighter 20px Arial', - { size: 20, unit: 'px', weight: 'lighter', family: 'Arial' }, - 'normal normal normal 16px Impact', - { size: 16, unit: 'px', weight: 'normal', family: 'Impact', style: 'normal', variant: 'normal' }, - 'italic small-caps bolder 16px cursive', - { size: 16, unit: 'px', style: 'italic', variant: 'small-caps', weight: 'bolder', family: 'cursive' }, - '20px "new century schoolbook", serif', - { size: 20, unit: 'px', family: 'new century schoolbook,serif' }, - '20px "Arial bold 300"', // synthetic case with weight keyword inside family - { size: 20, unit: 'px', family: 'Arial bold 300', variant: 'normal' }, - `50px "Helvetica 'Neue'", "foo \\"bar\\" baz" , "Someone's weird \\'edge\\' case", sans-serif`, - { size: 50, unit: 'px', family: `Helvetica 'Neue',foo "bar" baz,Someone's weird 'edge' case,sans-serif` } - ] - - for (let i = 0, len = tests.length; i < len; ++i) { - const str = tests[i++] - const expected = tests[i] - const actual = parseFont(str) - - if (!expected.style) expected.style = 'normal' - if (!expected.weight) expected.weight = 'normal' - if (!expected.stretch) expected.stretch = 'normal' - if (!expected.variant) expected.variant = 'normal' - - assert.deepEqual(actual, expected, 'Failed to parse: ' + str) - } - - assert.strictEqual(parseFont('Helvetica, sans'), undefined) - }) - it('registerFont', function () { // Minimal test to make sure nothing is thrown registerFont('./examples/pfennigFont/Pfennig.ttf', { family: 'Pfennig' }) diff --git a/test/fontParser.test.js b/test/fontParser.test.js new file mode 100644 index 000000000..0302466b9 --- /dev/null +++ b/test/fontParser.test.js @@ -0,0 +1,118 @@ +/* eslint-env mocha */ + +'use strict' + +/** + * Module dependencies. + */ +const assert = require('assert') +const {Canvas} = require('..'); + +const tests = [ + '20px Arial', + { size: 20, families: ['arial'] }, + '20pt Arial', + { size: 26.666667461395264, families: ['arial'] }, + '20.5pt Arial', + { size: 27.333334147930145, families: ['arial'] }, + '20% Arial', + { size: 3.1999999284744263, families: ['arial'] }, + '20mm Arial', + { size: 75.59999942779541, families: ['arial'] }, + '20px serif', + { size: 20, families: ['serif'] }, + '20px sans-serif', + { size: 20, families: ['sans-serif'] }, + '20px monospace', + { size: 20, families: ['monospace'] }, + '50px Arial, sans-serif', + { size: 50, families: ['arial', 'sans-serif'] }, + 'bold italic 50px Arial, sans-serif', + { style: 1, weight: 700, size: 50, families: ['arial', 'sans-serif'] }, + '50px Helvetica , Arial, sans-serif', + { size: 50, families: ['helvetica', 'arial', 'sans-serif'] }, + '50px "Helvetica Neue", sans-serif', + { size: 50, families: ['Helvetica Neue', 'sans-serif'] }, + '50px "Helvetica Neue", "foo bar baz" , sans-serif', + { size: 50, families: ['Helvetica Neue', 'foo bar baz', 'sans-serif'] }, + "50px 'Helvetica Neue'", + { size: 50, families: ['Helvetica Neue'] }, + 'italic 20px Arial', + { size: 20, style: 1, families: ['arial'] }, + 'oblique 20px Arial', + { size: 20, style: 2, families: ['arial'] }, + 'normal 20px Arial', + { size: 20, families: ['arial'] }, + '300 20px Arial', + { size: 20, weight: 300, families: ['arial'] }, + '800 20px Arial', + { size: 20, weight: 800, families: ['arial'] }, + 'bolder 20px Arial', + { size: 20, weight: 700, families: ['arial'] }, + 'lighter 20px Arial', + { size: 20, weight: 100, families: ['arial'] }, + 'normal normal normal 16px Impact', + { size: 16, families: ['impact'] }, + 'italic small-caps bolder 16px cursive', + { size: 16, style: 1, variant: 1, weight: 700, families: ['cursive'] }, + '20px "new century schoolbook", serif', + { size: 20, families: ['new century schoolbook', 'serif'] }, + '20px "Arial bold 300"', // synthetic case with weight keyword inside family + { size: 20, families: ['Arial bold 300'] }, + `50px "Helvetica 'Neue'", "foo \\"bar\\" baz" , "Someone's weird \\'edge\\' case", sans-serif`, + { size: 50, families: [`Helvetica 'Neue'`, 'foo "bar" baz', `Someone's weird 'edge' case`, 'sans-serif'] }, + 'Helvetica, sans', + undefined, + '123px thefont/123abc', + undefined, + '123px /\tnormal thefont', + {size: 123, families: ['thefont']}, + '12px/1.2whoops arial', + undefined, + 'bold bold 12px thefont', + undefined, + 'italic italic 12px Arial', + undefined, + 'small-caps bold italic small-caps 12px Arial', + undefined, + 'small-caps bold oblique 12px \'A\'ri\\61l', + {size: 12, style: 2, weight: 700, variant: 1, families: ['Arial']}, + '12px/34% "The\\\n Word"', + {size: 12, families: ['The Word']}, + '', + undefined, + 'normal normal normal 1%/normal a , \'b\'', + {size: 0.1599999964237213, families: ['a', 'b']}, + 'normalnormalnormal 1px/normal a', + undefined, + '12px _the_font', + {size: 12, families: ['_the_font']}, + '9px 7 birds', + undefined, + '2em "Courier', + undefined, + `2em \\'Courier\\"`, + {size: 32, families: ['\'courier"']}, + '1px \\10abcde', + {size: 1, families: [String.fromCodePoint(parseInt('10abcd', 16)) + 'e']}, + '3E+2 1e-1px yay', + {weight: 300, size: 0.1, families: ['yay']} +]; + +describe('Font parser', function () { + for (let i = 0; i < tests.length; i++) { + const str = tests[i++] + it(str, function () { + const expected = tests[i] + const actual = Canvas.parseFont(str) + + if (expected) { + if (expected.style == null) expected.style = 0 + if (expected.weight == null) expected.weight = 400 + if (expected.variant == null) expected.variant = 0 + } + + assert.deepEqual(actual, expected) + }) + } +})