Skip to content

Commit

Permalink
Chore: Move useful clipper logic to the lib package to be used in oth…
Browse files Browse the repository at this point in the history
…er places (#9053)
  • Loading branch information
pedr authored Oct 13, 2023
1 parent b1e1db7 commit 5733017
Show file tree
Hide file tree
Showing 8 changed files with 275 additions and 126 deletions.
1 change: 1 addition & 0 deletions .eslintignore
Original file line number Diff line number Diff line change
Expand Up @@ -575,6 +575,7 @@ packages/lib/WelcomeUtils.js
packages/lib/array.js
packages/lib/callbackUrlUtils.test.js
packages/lib/callbackUrlUtils.js
packages/lib/clipperUtils.js
packages/lib/commands/historyBackward.js
packages/lib/commands/historyForward.js
packages/lib/commands/index.js
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -561,6 +561,7 @@ packages/lib/WelcomeUtils.js
packages/lib/array.js
packages/lib/callbackUrlUtils.test.js
packages/lib/callbackUrlUtils.js
packages/lib/clipperUtils.js
packages/lib/commands/historyBackward.js
packages/lib/commands/historyForward.js
packages/lib/commands/index.js
Expand Down
114 changes: 114 additions & 0 deletions packages/app-clipper/content_scripts/clipperUtils.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
'use strict';
Object.defineProperty(exports, '__esModule', { value: true });
exports.getStyleSheets = exports.getImageSizes = void 0;
function absoluteUrl(url) {
if (!url) { return url; }
const protocol = url.toLowerCase().split(':')[0];
if (['http', 'https', 'file', 'data'].indexOf(protocol) >= 0) { return url; }
if (url.indexOf('//') === 0) {
return location.protocol + url;
} else if (url[0] === '/') {
return `${location.protocol}//${location.host}${url}`;
} else {
return `${baseUrl()}/${url}`;
}
}
function pageLocationOrigin() {
// location.origin normally returns the protocol + domain + port (eg. https://example.com:8080)
// but for file:// protocol this is browser dependant and in particular Firefox returns "null"
// in this case.
if (location.protocol === 'file:') {
return 'file://';
} else {
return location.origin;
}
}
function baseUrl() {
let output = pageLocationOrigin() + location.pathname;
if (output[output.length - 1] !== '/') {
const output2 = output.split('/');
output2.pop();
output = output2.join('/');
}
return output;
}
function getJoplinClipperSvgClassName(svg) {
for (const className of svg.classList) {
if (className.indexOf('joplin-clipper-svg-') === 0) { return className; }
}
return '';
}
function getImageSizes(element, forceAbsoluteUrls = false) {
const output = {};
const images = element.getElementsByTagName('img');
for (let i = 0; i < images.length; i++) {
const img = images[i];
if (img.classList && img.classList.contains('joplin-clipper-hidden')) { continue; }
let src = imageSrc(img);
src = forceAbsoluteUrls ? absoluteUrl(src) : src;
if (!output[src]) { output[src] = []; }
output[src].push({
width: img.width,
height: img.height,
naturalWidth: img.naturalWidth,
naturalHeight: img.naturalHeight,
});
}
const svgs = element.getElementsByTagName('svg');
for (let i = 0; i < svgs.length; i++) {
const svg = svgs[i];
if (svg.classList && svg.classList.contains('joplin-clipper-hidden')) { continue; }
const className = getJoplinClipperSvgClassName(svg); // 'joplin-clipper-svg-' + i;
if (!className) {
console.warn('SVG without a Joplin class:', svg);
continue;
}
if (!svg.classList.contains(className)) {
svg.classList.add(className);
}
const rect = svg.getBoundingClientRect();
if (!output[className]) { output[className] = []; }
output[className].push({
width: rect.width,
height: rect.height,
});
}
return output;
}
exports.getImageSizes = getImageSizes;
// In general we should use currentSrc because that's the image that's currently displayed,
// especially within <picture> tags or with srcset. In these cases there can be multiple
// sources and the best one is probably the one being displayed, thus currentSrc.
function imageSrc(image) {
if (image.currentSrc) { return image.currentSrc; }
return image.src;
}
// Given a document, return a <style> tag that contains all the styles
// required to render the page. Not currently used but could be as an
// option to clip pages as HTML.
// eslint-disable-next-line
function getStyleSheets(doc) {
const output = [];
for (let i = 0; i < doc.styleSheets.length; i++) {
const sheet = doc.styleSheets[i];
try {
for (const cssRule of sheet.cssRules) {
output.push({ type: 'text', value: cssRule.cssText });
}
} catch (error) {
// Calling sheet.cssRules will throw a CORS error on Chrome if the stylesheet is on a different domain.
// In that case, we skip it and add it to the list of stylesheet URLs. These URls will be downloaded
// by the desktop application, since it doesn't have CORS restrictions.
// eslint-disable-next-line
console.info('Could not retrieve stylesheet now:', sheet.href);
// eslint-disable-next-line
console.info('It will downloaded by the main application.');
// eslint-disable-next-line
console.info(error);
output.push({ type: 'url', value: sheet.href });
}
}
return output;
}
exports.getStyleSheets = getStyleSheets;
// # sourceMappingURL=clipperUtils.js.map
141 changes: 16 additions & 125 deletions packages/app-clipper/content_scripts/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -20,20 +20,6 @@
browserSupportsPromises_ = false;
}

function absoluteUrl(url) {
if (!url) return url;
const protocol = url.toLowerCase().split(':')[0];
if (['http', 'https', 'file', 'data'].indexOf(protocol) >= 0) return url;

if (url.indexOf('//') === 0) {
return location.protocol + url;
} else if (url[0] === '/') {
return `${location.protocol}//${location.host}${url}`;
} else {
return `${baseUrl()}/${url}`;
}
}

function escapeHtml(s) {
return s
.replace(/&/g, '&amp;')
Expand All @@ -49,85 +35,6 @@
return document.title.trim();
}

function pageLocationOrigin() {
// location.origin normally returns the protocol + domain + port (eg. https://example.com:8080)
// but for file:// protocol this is browser dependant and in particular Firefox returns "null"
// in this case.

if (location.protocol === 'file:') {
return 'file://';
} else {
return location.origin;
}
}

function baseUrl() {
let output = pageLocationOrigin() + location.pathname;
if (output[output.length - 1] !== '/') {
output = output.split('/');
output.pop();
output = output.join('/');
}
return output;
}

function getJoplinClipperSvgClassName(svg) {
for (const className of svg.classList) {
if (className.indexOf('joplin-clipper-svg-') === 0) return className;
}
return '';
}

function getImageSizes(element, forceAbsoluteUrls = false) {
const output = {};

const images = element.getElementsByTagName('img');
for (let i = 0; i < images.length; i++) {
const img = images[i];
if (img.classList && img.classList.contains('joplin-clipper-hidden')) continue;

let src = imageSrc(img);
src = forceAbsoluteUrls ? absoluteUrl(src) : src;

if (!output[src]) output[src] = [];

output[src].push({
width: img.width,
height: img.height,
naturalWidth: img.naturalWidth,
naturalHeight: img.naturalHeight,
});
}

const svgs = element.getElementsByTagName('svg');
for (let i = 0; i < svgs.length; i++) {
const svg = svgs[i];
if (svg.classList && svg.classList.contains('joplin-clipper-hidden')) continue;

const className = getJoplinClipperSvgClassName(svg);// 'joplin-clipper-svg-' + i;

if (!className) {
console.warn('SVG without a Joplin class:', svg);
continue;
}

if (!svg.classList.contains(className)) {
svg.classList.add(className);
}

const rect = svg.getBoundingClientRect();

if (!output[className]) output[className] = [];

output[className].push({
width: rect.width,
height: rect.height,
});
}

return output;
}

function getAnchorNames(element) {
const output = [];
// Anchor names are normally in A tags but can be in SPAN too
Expand All @@ -146,14 +53,6 @@
return output;
}

// In general we should use currentSrc because that's the image that's currently displayed,
// especially within <picture> tags or with srcset. In these cases there can be multiple
// sources and the best one is probably the one being displayed, thus currentSrc.
function imageSrc(image) {
if (image.currentSrc) return image.currentSrc;
return image.src;
}

// Cleans up element by removing all its invisible children (which we don't want to render as Markdown)
// And hard-code the image dimensions so that the information can be used by the clipper server to
// display them at the right sizes in the notes.
Expand Down Expand Up @@ -181,6 +80,7 @@
}

if (nodeName === 'img') {
// eslint-disable-next-line no-undef
const src = absoluteUrl(imageSrc(node));
node.setAttribute('src', src);
if (!(src in imageIndexes)) imageIndexes[src] = 0;
Expand All @@ -199,6 +99,7 @@
}

if (nodeName === 'svg') {
// eslint-disable-next-line no-undef
const className = getJoplinClipperSvgClassName(node);
if (!(className in imageIndexes)) imageIndexes[className] = 0;

Expand All @@ -216,11 +117,13 @@
}

if (nodeName === 'embed') {
// eslint-disable-next-line no-undef
const src = absoluteUrl(node.src);
node.setAttribute('src', src);
}

if (nodeName === 'object') {
// eslint-disable-next-line no-undef
const data = absoluteUrl(node.data);
node.setAttribute('data', data);
}
Expand Down Expand Up @@ -300,37 +203,14 @@
let svgId = 0;

for (const svg of svgs) {
// eslint-disable-next-line no-undef
if (!getJoplinClipperSvgClassName(svg)) {
svg.classList.add(`joplin-clipper-svg-${svgId}`);
svgId++;
}
}
}

// Given a document, return a <style> tag that contains all the styles
// required to render the page. Not currently used but could be as an
// option to clip pages as HTML.
function getStyleSheets(doc) {
const output = [];
for (let i = 0; i < doc.styleSheets.length; i++) {
const sheet = doc.styleSheets[i];
try {
for (const cssRule of sheet.cssRules) {
output.push({ type: 'text', value: cssRule.cssText });
}
} catch (error) {
// Calling sheet.cssRules will throw a CORS error on Chrome if the stylesheet is on a different domain.
// In that case, we skip it and add it to the list of stylesheet URLs. These URls will be downloaded
// by the desktop application, since it doesn't have CORS restrictions.
console.info('Could not retrieve stylesheet now:', sheet.href);
console.info('It will downloaded by the main application.');
console.info(error);
output.push({ type: 'url', value: sheet.href });
}
}
return output;
}

function documentForReadability() {
// Readability directly change the passed document so clone it so as
// to preserve the original web page.
Expand Down Expand Up @@ -372,7 +252,9 @@
name: shouldSendToJoplin ? 'sendContentToJoplin' : 'clippedContent',
title: title,
html: html,
// eslint-disable-next-line no-undef
base_url: baseUrl(),
// eslint-disable-next-line no-undef
url: pageLocationOrigin() + location.pathname + location.search,
parent_id: command.parent_id,
tags: command.tags || '',
Expand All @@ -397,6 +279,7 @@
response.warning = 'Could not retrieve simplified version of page - full page has been saved instead.';
return response;
}
// eslint-disable-next-line no-undef
return clippedContentResponse(article.title, article.body, getImageSizes(document), getAnchorNames(document));

} else if (command.name === 'isProbablyReaderable') {
Expand All @@ -408,6 +291,7 @@
} else if (command.name === 'completePageHtml') {

if (isPagePdf()) {
// eslint-disable-next-line no-undef
return clippedContentResponse(pageTitle(), embedPageUrl(), getImageSizes(document), getAnchorNames(document));
}

Expand All @@ -417,10 +301,12 @@
// Because cleanUpElement is going to modify the DOM and remove elements we don't want to work
// directly on the document, so we make a copy of it first.
const cleanDocument = document.body.cloneNode(true);
// eslint-disable-next-line no-undef
const imageSizes = getImageSizes(document, true);
const imageIndexes = {};
cleanUpElement(convertToMarkup, cleanDocument, imageSizes, imageIndexes);

// eslint-disable-next-line no-undef
const stylesheets = convertToMarkup === 'html' ? getStyleSheets(document) : null;

// The <BODY> tag may have a style in the CSS stylesheets. This
Expand Down Expand Up @@ -462,9 +348,11 @@
container.appendChild(range.cloneContents());
}

// eslint-disable-next-line no-undef
const imageSizes = getImageSizes(document, true);
const imageIndexes = {};
cleanUpElement(convertToMarkup, container, imageSizes, imageIndexes);
// eslint-disable-next-line no-undef
return clippedContentResponse(pageTitle(), container.innerHTML, getImageSizes(document), getAnchorNames(document));

} else if (command.name === 'screenshot') {
Expand Down Expand Up @@ -567,6 +455,7 @@
const content = {
title: pageTitle(),
crop_rect: selectionArea,
// eslint-disable-next-line no-undef
url: pageLocationOrigin() + location.pathname + location.search,
parent_id: command.parent_id,
tags: command.tags,
Expand All @@ -591,7 +480,9 @@

} else if (command.name === 'pageUrl') {

// eslint-disable-next-line no-undef
const url = pageLocationOrigin() + location.pathname + location.search;
// eslint-disable-next-line no-undef
return clippedContentResponse(pageTitle(), url, getImageSizes(document), getAnchorNames(document));

} else {
Expand Down
2 changes: 1 addition & 1 deletion packages/app-clipper/manifest.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"manifest_version": 2,
"name": "Joplin Web Clipper [DEV]",
"version": "2.13.0",
"version": "2.13.1",
"description": "Capture and save web pages and screenshots from your browser to Joplin.",
"homepage_url": "https://joplinapp.org",
"content_security_policy": "script-src 'self'; object-src 'self'",
Expand Down
Loading

0 comments on commit 5733017

Please sign in to comment.