Browse Source

Fix links with newline characters

* lisp/org-element.el (org-element-link-parser): Correctly parse links
  with newline characters.

* lisp/org.el (org-make-link-regexps): Allow newline characters within
  angle links.
(org-activate-angle-links): Support multiline angle links.

* testing/lisp/test-org-element.el (test-org-element/link-parser):
  Update tests.

As a rule of thumb, any newline character and all surrounding
whitespaces are treated as a single space in a bracket link.  They are
removed in angle links.
Nicolas Goaziou 5 years ago
parent
commit
f63d76872c
3 changed files with 58 additions and 35 deletions
  1. 45 25
      lisp/org-element.el
  2. 3 5
      lisp/org.el
  3. 10 5
      testing/lisp/test-org-element.el

+ 45 - 25
lisp/org-element.el

@@ -3045,53 +3045,73 @@ Assume point is at the beginning of the link."
 	      contents-end (match-end 1)))
        ;; Type 2: Standard link, i.e. [[http://orgmode.org][homepage]]
        ((looking-at org-bracket-link-regexp)
-	(setq contents-begin (match-beginning 3)
-	      contents-end (match-end 3)
-	      link-end (match-end 0)
-	      ;; RAW-LINK is the original link.  Expand any
-	      ;; abbreviation in it.
-	      raw-link (org-translate-link
+	(setq contents-begin (match-beginning 3))
+	(setq contents-end (match-end 3))
+	(setq link-end (match-end 0))
+	;; RAW-LINK is the original link.  Expand any
+	;; abbreviation in it.
+	;;
+	;; Also treat any newline character and associated
+	;; indentation as a single space character.  This is not
+	;; compatible with RFC 3986, which requires to ignore
+	;; them altogether.  However, doing so would require
+	;; users to encode spaces on the fly when writing links
+	;; (e.g., insert [[shell:ls%20*.org]] instead of
+	;; [[shell:ls *.org]], which defeats Org's focus on
+	;; simplicity.
+	(setq raw-link (org-translate-link
 			(org-link-expand-abbrev
-			 (org-match-string-no-properties 1))))
-	;; Determine TYPE of link and set PATH accordingly.
+			 (replace-regexp-in-string
+			  "[ \t]*\n[ \t]*" " "
+			  (org-match-string-no-properties 1)))))
+	;; Determine TYPE of link and set PATH accordingly.  According
+	;; to RFC 3986, remove whitespaces from URI in external links.
+	;; In internal ones, treat indentation as a single space.
 	(cond
 	 ;; File type.
 	 ((or (file-name-absolute-p raw-link)
 	      (string-match "\\`\\.\\.?/" raw-link))
-	  (setq type "file" path raw-link))
+	  (setq type "file")
+	  (setq path raw-link))
 	 ;; Explicit type (http, irc, bbdb...).  See `org-link-types'.
 	 ((string-match org-link-types-re raw-link)
-	  (setq type (match-string 1 raw-link)
-		;; According to RFC 3986, extra whitespace should be
-		;; ignored when a URI is extracted.
-		path (replace-regexp-in-string
-		      "[ \t]*\n[ \t]*" "" (substring raw-link (match-end 0)))))
+	  (setq type (match-string 1 raw-link))
+	  (setq path (substring raw-link (match-end 0))))
 	 ;; Id type: PATH is the id.
-	 ((string-match "\\`id:\\([-a-f0-9]+\\)" raw-link)
+	 ((string-match "\\`id:\\([-a-f0-9]+\\)\\'" raw-link)
 	  (setq type "id" path (match-string 1 raw-link)))
 	 ;; Code-ref type: PATH is the name of the reference.
-	 ((string-match "\\`(\\(.*\\))\\'" raw-link)
-	  (setq type "coderef" path (match-string 1 raw-link)))
+	 ((and (org-string-match-p "\\`(" raw-link)
+	       (org-string-match-p ")\\'" raw-link))
+	  (setq type "coderef")
+	  (setq path (substring raw-link 1 -1)))
 	 ;; Custom-id type: PATH is the name of the custom id.
 	 ((= (string-to-char raw-link) ?#)
-	  (setq type "custom-id" path (substring raw-link 1)))
+	  (setq type "custom-id")
+	  (setq path (substring raw-link 1)))
 	 ;; Fuzzy type: Internal link either matches a target, an
 	 ;; headline name or nothing.  PATH is the target or
 	 ;; headline's name.
-	 (t (setq type "fuzzy" path raw-link))))
+	 (t
+	  (setq type "fuzzy")
+	  (setq path raw-link))))
        ;; Type 3: Plain link, e.g., http://orgmode.org
        ((looking-at org-plain-link-re)
 	(setq raw-link (org-match-string-no-properties 0)
 	      type (org-match-string-no-properties 1)
 	      link-end (match-end 0)
 	      path (org-match-string-no-properties 2)))
-       ;; Type 4: Angular link, e.g., <http://orgmode.org>
+       ;; Type 4: Angular link, e.g., <http://orgmode.org>.  Unlike to
+       ;; bracket links, follow RFC 3986 and remove any extra
+       ;; whitespace in URI.
        ((looking-at org-angle-link-re)
-	(setq raw-link (buffer-substring-no-properties
-			(match-beginning 1) (match-end 2))
-	      type (org-match-string-no-properties 1)
-	      link-end (match-end 0)
-	      path (org-match-string-no-properties 2)))
+	(setq type (org-match-string-no-properties 1))
+	(setq link-end (match-end 0))
+	(setq raw-link
+	      (buffer-substring-no-properties
+	       (match-beginning 1) (match-end 2)))
+	(setq path (replace-regexp-in-string
+		    "[ \t]*\n[ \t]*" "" (org-match-string-no-properties 2))))
        (t (throw 'no-object nil)))
       ;; In any case, deduce end point after trailing white space from
       ;; LINK-END variable.

+ 3 - 5
lisp/org.el

@@ -5712,10 +5712,7 @@ This should be called after the variable `org-link-types' has changed."
 		  "\\([^" org-non-link-chars " ]"
 		  "[^\t\n\r]*\\)")
 	  org-angle-link-re
-	  (concat "<" types-re ":"
-		  "\\([^" org-non-link-chars " ]"
-		  "[^" org-non-link-chars "]*"
-		  "\\)>")
+	  (format "<%s:\\(\n?\\(?:[^>\n]+\n?\\)*\\)>" types-re)
 	  org-plain-link-re
 	  (concat
 	   "\\<" types-re ":"
@@ -5998,7 +5995,8 @@ by a #."
 	(org-remove-flyspell-overlays-in (match-beginning 0) (match-end 0))
 	(add-text-properties (match-beginning 0) (match-end 0)
 			     (list 'mouse-face 'highlight
-				   'keymap org-mouse-map))
+				   'keymap org-mouse-map
+				   'font-lock-multiline t))
 	(org-rear-nonsticky-at (match-end 0))
 	t)))
 

+ 10 - 5
testing/lisp/test-org-element.el

@@ -1628,17 +1628,22 @@ e^{i\\pi}+1=0
        (equal (org-element-property :path (org-element-context)) file))))
   ;; ... multi-line link.
   (should
-   (equal "//orgmode.org"
-	  (org-test-with-temp-text "[[http://orgmode.\norg]]"
+   (equal "ls *.org"
+	  (org-test-with-temp-text "[[shell:ls\n*.org]]"
 	    (org-element-property :path (org-element-context)))))
   ;; Plain link.
   (should
    (org-test-with-temp-text "A link: http://orgmode.org"
      (org-element-map (org-element-parse-buffer) 'link 'identity)))
-  ;; Angular link.
+  ;; Angular link.  Follow RFC 3986.
   (should
-   (org-test-with-temp-text "A link: <http://orgmode.org>"
-     (org-element-map (org-element-parse-buffer) 'link 'identity nil t)))
+   (eq 'link
+       (org-test-with-temp-text "A link: <point><http://orgmode.org>"
+	 (org-element-type (org-element-context)))))
+  (should
+   (equal "//orgmode.org"
+       (org-test-with-temp-text "A link: <point><http://orgmode\n.org>"
+	 (org-element-property :path (org-element-context)))))
   ;; Link abbreviation.
   (should
    (equal "http"