From 45d829e4393179885dba093f683faff42b57e6de Mon Sep 17 00:00:00 2001 From: Evan Hahn <69474926+EvanHahn-Signal@users.noreply.github.com> Date: Wed, 26 Aug 2020 14:47:50 -0500 Subject: [PATCH] Improved link verification logic. --- js/modules/link_previews.js | 17 ++++++++++++++++- test/modules/link_previews_test.js | 18 ++++++++++++++++++ 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/js/modules/link_previews.js b/js/modules/link_previews.js index 0a5b52df2..e5e6a64b1 100644 --- a/js/modules/link_previews.js +++ b/js/modules/link_previews.js @@ -1,6 +1,6 @@ /* global URL */ -const { isNumber, compact } = require('lodash'); +const { isNumber, compact, isEmpty } = require('lodash'); const he = require('he'); const nodeUrl = require('url'); const LinkifyIt = require('linkify-it'); @@ -235,11 +235,26 @@ function isLinkSneaky(link) { return true; } + // To quote [RFC 1034][0]: "the total number of octets that represent a + // domain name [...] is limited to 255." To be extra careful, we set a + // maximum of 2048. (This also uses the string's `.length` property, + // which isn't exactly the same thing as the number of octets.) + // [0]: https://tools.ietf.org/html/rfc1034 + if (domain.length > 2048) { + return true; + } + // Domains cannot contain encoded characters if (domain.includes('%')) { return true; } + // There must be at least 2 domain labels, and none of them can be empty. + const labels = domain.split('.'); + if (labels.length < 2 || labels.some(isEmpty)) { + return true; + } + // This is necesary because getDomain returns domains in punycode form. const unicodeDomain = nodeUrl.domainToUnicode ? nodeUrl.domainToUnicode(domain) diff --git a/test/modules/link_previews_test.js b/test/modules/link_previews_test.js index d0361dec8..5cba9a21a 100644 --- a/test/modules/link_previews_test.js +++ b/test/modules/link_previews_test.js @@ -397,6 +397,24 @@ describe('Link previews', () => { assert.strictEqual(isLinkSneaky(link), true); }); + it("returns true if the domain doesn't contain a .", () => { + assert.isTrue(isLinkSneaky('https://example')); + }); + + it('returns true if the domain has any empty labels', () => { + assert.isTrue(isLinkSneaky('https://example.')); + assert.isTrue(isLinkSneaky('https://example.com.')); + assert.isTrue(isLinkSneaky('https://.example.com')); + assert.isTrue(isLinkSneaky('https://..example.com')); + }); + + it('returns true if the domain is longer than 2048 UTF-16 code points', () => { + const domain = `${'a'.repeat(2041)}.example`; + assert.lengthOf(domain, 2049, 'Test domain is the incorrect length'); + const link = `https://${domain}/foo/bar`; + assert.isTrue(isLinkSneaky(link)); + }); + it('returns false for regular @ in url', () => { const link = 'https://lbry.tv/@ScammerRevolts:b0/DELETING-EVERY-FILE-OFF-A-SCAMMERS-LAPTOP-Destroyed:1';