Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for formatted test #122

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
3 changes: 2 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ C_SRCS = slack.c \
slack-object.c \
slack-json.c \
purple-websocket.c \
json.c
json.c \
markdown.c

# Object file names using 'Substitution Reference'
C_OBJS = $(C_SRCS:.c=.o)
Expand Down
312 changes: 312 additions & 0 deletions markdown.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,312 @@
/*
* Markdown library for libpurple
* Copyright (C) 2018 Alyssa Rosenzweig
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/

#include <string.h>
#include "markdown.h"

/* Markdown test string:
*
* "<--- \o/ **¯\_(ツ)_/¯** _italics_ __underline__ *correction right* *italics2* ~~strikethrough~~ ~notstriked~ ~me https://pidgin.im <style>body{background-color:red}</script> &lt;style&gt;body{background-color: red}&lt;/style&gt; <b>notbold</b> &lt;notatag&gt;"
*
* Checks for:
* - Correct escaping of lt/gt signs
* - Aesthetically correct (but non-comformant) handling of escape sequences as found in backslash-containing emoticons
* - Exhaustive test of syntaxes for italics, underline, strikethrough
* - Correct escaping for XSS rsisk
*
* Does not check for (open issues):
* - Italicised shruggie
*
*/

#define HTML_TOGGLE_OUT(f, a, b) \
out = g_string_append(out, f ? b : a); \
f = !f;

/* workaround errata in Discord's (users') markdown implementation */

static gboolean
markdown_underscore_match(const gchar *html, int i)
{
while (html[i] != ' ' && html[i]) {
if (html[i++] == '_') {
return !html[i] || html[i] == ' ';
}
}

return FALSE;
}

/* Is a character escapable, that is, does it have a special meaning in
* Markdown? */

static gboolean
markdown_is_escapable(char c)
{
switch (c) {
case '\\':
case '*':
case '~':
case '_':
case '`':
return TRUE;
default:
return FALSE;
}
}

/* Should we interpret a _ as italics? */

static gboolean
markdown_should_underscore_italics(const gchar *html, unsigned i, gboolean s_italics)
{
return s_italics || markdown_underscore_match(html, i + 1);
}

/* Should we interpret _ as special at all? */

static gboolean
markdown_should_underscore(const gchar *html, unsigned i, gboolean s_italics)
{
return html[i + 1] == '_' || markdown_should_underscore_italics(html, i, s_italics);
}

static gchar *
markdown_helper_replace(gchar *html, const gchar *tag, const gchar *replacement)
{
gchar *replace_regex;
gchar *replace_with;

if (tag[0] == '<' && tag[1] == '/') {
//closing tag
replace_regex = g_strconcat("(\\s*)", tag, NULL);
replace_with = g_strconcat(replacement, "\\1", NULL);
} else {
replace_regex = g_strconcat(tag, "(\\s*)", NULL);
replace_with = g_strconcat("\\1", replacement, NULL);
}

GRegex *markdown_replace = g_regex_new(replace_regex, 0, 0, NULL);
gchar *temp = g_regex_replace(markdown_replace, html, -1, 0, replace_with, 0, NULL);

g_free(replace_regex);
g_free(replace_with);
g_regex_unref(markdown_replace);

if (temp != NULL) {
g_free(html);
html = temp;
}

return html;
}

gchar *
markdown_convert_markdown(const gchar *html, gboolean escape_html, gboolean discord_hacks)
{
g_return_val_if_fail(html != NULL, NULL);

guint html_len = strlen(html);
GString *out = g_string_sized_new(html_len * 2);

gboolean s_bold = FALSE;
gboolean s_italics = FALSE;
gboolean s_strikethrough = FALSE;
gboolean s_codeblock = FALSE;
gboolean s_codebit = FALSE;
gboolean s_smiley = FALSE;

for (guint i = 0; i < html_len; ++i) {
char c = html[i];

if ((s_codeblock || s_codebit) && c != '`') {
out = g_string_append_c(out, html[i]);
continue;
}
if (s_smiley && c != ':') {
out = g_string_append_c(out, html[i]);
continue;
}

if (c == '\\') {
char next_char = html[++i];

/* If this is an escape-able character, don't print the
* backslash. Otherwise, do because the \ wasn't an
* escape anyway */

gboolean escapable = markdown_is_escapable(next_char);

/* Also, if this is an escapable character that would
* not actually -matter-, print it too. Fixes shruggie
* */

if (next_char == '_' && !markdown_should_underscore(html, i + 1, s_italics) && (escape_html || discord_hacks))
escapable = FALSE;

if (!escapable) {
out = g_string_append_c(out, '\\');
}

/* Append the next char regardless */
out = g_string_append_c(out, next_char);
} else if ((c == '<' || c == '>' || c == '&') && escape_html) {
/* These characters lack any particular meaning in
* Markdown, but need to be escaped to prevent getting
* mixed up with HTML. Failing to do so may result in
* valid parts of the message being stripped by
* overzealous sanitizers */

if (c == '<')
out = g_string_append(out, "&lt;");
else if (c == '>')
out = g_string_append(out, "&gt;");
else /*if (c == '&')*/
out = g_string_append(out, "&amp;");
} else if (c == '*') {
HTML_TOGGLE_OUT(s_bold, "<b>", "</b>");
} else if (c == '~') {
HTML_TOGGLE_OUT(s_strikethrough, "<s>", "</s>");
} else if (c == '_') {
HTML_TOGGLE_OUT(s_italics, "<i>", "</i>");
} else if (c == '`') {
if (html[i + 1] == '`' && html[i + 2] == '`') {
if (!s_codeblock) {
#ifdef MARKDOWN_PIDGIN
out = g_string_append(out, "<br/><span style='font-family: monospace; white-space: pre'>");
#else
out = g_string_append(out, "<br/><pre>");
#endif
} else {
#ifdef MARKDOWN_PIDGIN
out = g_string_append(out, "</span>");
#else
out = g_string_append(out, "</pre>");
#endif
i += 2;
}

s_codeblock = !s_codeblock;
} else {
#ifdef MARKDOWN_PIDGIN
HTML_TOGGLE_OUT(s_codebit, "<span style='font-family: monospace; white-space: pre'>", "</span>");
#else
HTML_TOGGLE_OUT(s_codebit, "<code>", "</code>");
#endif
}
} else if (c == ':' && (s_smiley || i == 0 || html[i - 1] == ' ')) {
s_smiley = !s_smiley;
out = g_string_append_c(out, c);
} else {
out = g_string_append_c(out, c);
}
}

gchar *new_out = purple_strreplace(out->str, "\n", "<br>");
g_string_free(out, TRUE);
return new_out;
}

#define REPLACE_TAG(name, repl) \
html = markdown_helper_replace(html, "<" name ">", repl); \
html = markdown_helper_replace(html, "</" name ">", repl);

gchar *
markdown_html_to_markdown(gchar *html)
{
REPLACE_TAG("b", "*");
REPLACE_TAG("strong", "*");
REPLACE_TAG("i", "_");
REPLACE_TAG("em", "_");
REPLACE_TAG("u", "");
REPLACE_TAG("s", "~~");
REPLACE_TAG("pre", "```");
REPLACE_TAG("code", "`");

/* Let newlines get passed through as HTML */

/* Workaround XHTML-IM stuff. TODO: XXX */
html = markdown_helper_replace(html, "<span style='font-weight: bold;'>", "**");
html = markdown_helper_replace(html, "</span>", "**");

return html;
}

gchar *
markdown_escape_md(const gchar *markdown, gboolean discord_hacks)
{
size_t markdown_len = strlen(markdown);
/* Worst case allocation */
GString *s = g_string_sized_new(markdown_len * 2);

gboolean verbatim = FALSE;
gboolean code_block = FALSE;
gboolean link = FALSE;

for (guint i = 0; i < markdown_len; ++i) {
char c = markdown[i];

if (c == '`') {
if (code_block) {
code_block = verbatim = FALSE;
} else if (!verbatim) {
code_block = verbatim = TRUE;
}

g_string_append_c(s, markdown[i]);

if (markdown[i + 1] == '`' && markdown[i + 2] == '`') {
i += 2;
g_string_append_c(s, markdown[i]);
g_string_append_c(s, markdown[i]);
continue;
}
}

if (!verbatim) {
if (strncmp(markdown + i, "http://", sizeof("http://") - 1) == 0 ||
strncmp(markdown + i, "https://", sizeof("https://") - 1) == 0)

{
link = verbatim = TRUE;
}
}

if (link && c == ' ') {
link = verbatim = FALSE;
}

if (!verbatim) {
if (
(c == '_' && (markdown[i + 1] == ' ' ||
markdown[i + 1] == '\0' ||
i == 0 ||
markdown[i - 1] == ' ' ||
markdown[i - 1] == '\0')) ||
(c == '*') ||
(c == '\\' && !(markdown[i + 1] == '_' && (i == 0 || markdown[i - 1] == ' ')) && !discord_hacks) ||
(c == '~' && (markdown[i + 1] == '~'))) {
g_string_append_c(s, '\\');
}
}

g_string_append_c(s, c);
}

return g_string_free(s, FALSE);
}
30 changes: 30 additions & 0 deletions markdown.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
/*
* Markdown library for libpurple
* Copyright (C) 2018 Alyssa Rosenzweig
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/


#ifndef __MARKDOWN_H
#define __MARKDOWN_H

#include <purple.h>
#include <glib.h>

gchar *markdown_convert_markdown(const gchar *html, gboolean escape_html, gboolean markdown_hacks);
gchar *markdown_escape_md(const gchar *markdown, gboolean markdown_hacks);
gchar *markdown_html_to_markdown(gchar *html);

#endif
6 changes: 4 additions & 2 deletions slack-channel.c
Original file line number Diff line number Diff line change
Expand Up @@ -329,10 +329,12 @@ int slack_chat_send(PurpleConnection *gc, int cid, const char *msg, PurpleMessag

gchar *m = slack_html_to_message(sa, msg, flags);
glong mlen = g_utf8_strlen(m, 16384);
if (mlen > 4000)
if (mlen > 4000) {
g_free(m);
return -E2BIG;
}

struct send_chat *send = g_new(struct send_chat, 1);
struct send_chat *send = g_new0(struct send_chat, 1);
send->chan = g_object_ref(chan);
send->cid = cid;
send->flags = flags;
Expand Down
2 changes: 1 addition & 1 deletion slack-conversation.c
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ void slack_conversation_retrieve(SlackAccount *sa, const char *sid, SlackConvers
SlackObject *obj = slack_conversation_lookup_sid(sa, sid);
if (obj)
return cb(sa, data, obj);
struct conversation_retrieve *lookup = g_new(struct conversation_retrieve, 1);
struct conversation_retrieve *lookup = g_new0(struct conversation_retrieve, 1);
lookup->cb = cb;
lookup->data = data;
slack_api_get(sa, conversation_retrieve_cb, lookup, "conversations.info", "channel", sid, NULL);
Expand Down
6 changes: 4 additions & 2 deletions slack-im.c
Original file line number Diff line number Diff line change
Expand Up @@ -161,10 +161,12 @@ int slack_send_im(PurpleConnection *gc, const char *who, const char *msg, Purple

gchar *m = slack_html_to_message(sa, msg, flags);
glong mlen = g_utf8_strlen(m, 16384);
if (mlen > 4000)
if (mlen > 4000) {
g_free(m);
return -E2BIG;
}

struct send_im *send = g_new(struct send_im, 1);
struct send_im *send = g_new0(struct send_im, 1);
send->user = g_object_ref(user);
send->msg = m;
send->flags = flags;
Expand Down
Loading