diff options
author | Nicolas Goaziou <mail@nicolasgoaziou.fr> | 2015-08-04 16:40:25 +0200 |
---|---|---|
committer | Nicolas Goaziou <mail@nicolasgoaziou.fr> | 2015-08-04 16:40:25 +0200 |
commit | f63d76872c77b410f3c28afce5a2919dee2f9242 (patch) | |
tree | 4ee8000bcc4a0aecb3204fa6d85826762b500682 | |
parent | 02532389f498da7bdf09020dc0b3a9dca1920d8e (diff) | |
download | org-mode-f63d76872c77b410f3c28afce5a2919dee2f9242.tar.gz |
Fix links with newline characters
* lisp/org-element.el (org-element-link-parser): Correctly parse links
with newline characters.
* lisp/org.el (org-make-link-regexps): Allow newline characters within
angle links.
(org-activate-angle-links): Support multiline angle links.
* testing/lisp/test-org-element.el (test-org-element/link-parser):
Update tests.
As a rule of thumb, any newline character and all surrounding
whitespaces are treated as a single space in a bracket link. They are
removed in angle links.
-rw-r--r-- | lisp/org-element.el | 70 | ||||
-rwxr-xr-x | lisp/org.el | 8 | ||||
-rw-r--r-- | testing/lisp/test-org-element.el | 15 |
3 files changed, 58 insertions, 35 deletions
diff --git a/lisp/org-element.el b/lisp/org-element.el index aa6e8d0..c7e76e8 100644 --- a/lisp/org-element.el +++ b/lisp/org-element.el @@ -3045,53 +3045,73 @@ Assume point is at the beginning of the link." contents-end (match-end 1))) ;; Type 2: Standard link, i.e. [[http://orgmode.org][homepage]] ((looking-at org-bracket-link-regexp) - (setq contents-begin (match-beginning 3) - contents-end (match-end 3) - link-end (match-end 0) - ;; RAW-LINK is the original link. Expand any - ;; abbreviation in it. - raw-link (org-translate-link + (setq contents-begin (match-beginning 3)) + (setq contents-end (match-end 3)) + (setq link-end (match-end 0)) + ;; RAW-LINK is the original link. Expand any + ;; abbreviation in it. + ;; + ;; Also treat any newline character and associated + ;; indentation as a single space character. This is not + ;; compatible with RFC 3986, which requires to ignore + ;; them altogether. However, doing so would require + ;; users to encode spaces on the fly when writing links + ;; (e.g., insert [[shell:ls%20*.org]] instead of + ;; [[shell:ls *.org]], which defeats Org's focus on + ;; simplicity. + (setq raw-link (org-translate-link (org-link-expand-abbrev - (org-match-string-no-properties 1)))) - ;; Determine TYPE of link and set PATH accordingly. + (replace-regexp-in-string + "[ \t]*\n[ \t]*" " " + (org-match-string-no-properties 1))))) + ;; Determine TYPE of link and set PATH accordingly. According + ;; to RFC 3986, remove whitespaces from URI in external links. + ;; In internal ones, treat indentation as a single space. (cond ;; File type. ((or (file-name-absolute-p raw-link) (string-match "\\`\\.\\.?/" raw-link)) - (setq type "file" path raw-link)) + (setq type "file") + (setq path raw-link)) ;; Explicit type (http, irc, bbdb...). See `org-link-types'. ((string-match org-link-types-re raw-link) - (setq type (match-string 1 raw-link) - ;; According to RFC 3986, extra whitespace should be - ;; ignored when a URI is extracted. - path (replace-regexp-in-string - "[ \t]*\n[ \t]*" "" (substring raw-link (match-end 0))))) + (setq type (match-string 1 raw-link)) + (setq path (substring raw-link (match-end 0)))) ;; Id type: PATH is the id. - ((string-match "\\`id:\\([-a-f0-9]+\\)" raw-link) + ((string-match "\\`id:\\([-a-f0-9]+\\)\\'" raw-link) (setq type "id" path (match-string 1 raw-link))) ;; Code-ref type: PATH is the name of the reference. - ((string-match "\\`(\\(.*\\))\\'" raw-link) - (setq type "coderef" path (match-string 1 raw-link))) + ((and (org-string-match-p "\\`(" raw-link) + (org-string-match-p ")\\'" raw-link)) + (setq type "coderef") + (setq path (substring raw-link 1 -1))) ;; Custom-id type: PATH is the name of the custom id. ((= (string-to-char raw-link) ?#) - (setq type "custom-id" path (substring raw-link 1))) + (setq type "custom-id") + (setq path (substring raw-link 1))) ;; Fuzzy type: Internal link either matches a target, an ;; headline name or nothing. PATH is the target or ;; headline's name. - (t (setq type "fuzzy" path raw-link)))) + (t + (setq type "fuzzy") + (setq path raw-link)))) ;; Type 3: Plain link, e.g., http://orgmode.org ((looking-at org-plain-link-re) (setq raw-link (org-match-string-no-properties 0) type (org-match-string-no-properties 1) link-end (match-end 0) path (org-match-string-no-properties 2))) - ;; Type 4: Angular link, e.g., <http://orgmode.org> + ;; Type 4: Angular link, e.g., <http://orgmode.org>. Unlike to + ;; bracket links, follow RFC 3986 and remove any extra + ;; whitespace in URI. ((looking-at org-angle-link-re) - (setq raw-link (buffer-substring-no-properties - (match-beginning 1) (match-end 2)) - type (org-match-string-no-properties 1) - link-end (match-end 0) - path (org-match-string-no-properties 2))) + (setq type (org-match-string-no-properties 1)) + (setq link-end (match-end 0)) + (setq raw-link + (buffer-substring-no-properties + (match-beginning 1) (match-end 2))) + (setq path (replace-regexp-in-string + "[ \t]*\n[ \t]*" "" (org-match-string-no-properties 2)))) (t (throw 'no-object nil))) ;; In any case, deduce end point after trailing white space from ;; LINK-END variable. diff --git a/lisp/org.el b/lisp/org.el index 886608b..484f3ff 100755 --- a/lisp/org.el +++ b/lisp/org.el @@ -5712,10 +5712,7 @@ This should be called after the variable `org-link-types' has changed." "\\([^" org-non-link-chars " ]" "[^\t\n\r]*\\)") org-angle-link-re - (concat "<" types-re ":" - "\\([^" org-non-link-chars " ]" - "[^" org-non-link-chars "]*" - "\\)>") + (format "<%s:\\(\n?\\(?:[^>\n]+\n?\\)*\\)>" types-re) org-plain-link-re (concat "\\<" types-re ":" @@ -5998,7 +5995,8 @@ by a #." (org-remove-flyspell-overlays-in (match-beginning 0) (match-end 0)) (add-text-properties (match-beginning 0) (match-end 0) (list 'mouse-face 'highlight - 'keymap org-mouse-map)) + 'keymap org-mouse-map + 'font-lock-multiline t)) (org-rear-nonsticky-at (match-end 0)) t))) diff --git a/testing/lisp/test-org-element.el b/testing/lisp/test-org-element.el index 9d9ac86..f55c3ee 100644 --- a/testing/lisp/test-org-element.el +++ b/testing/lisp/test-org-element.el @@ -1628,17 +1628,22 @@ e^{i\\pi}+1=0 (equal (org-element-property :path (org-element-context)) file)))) ;; ... multi-line link. (should - (equal "//orgmode.org" - (org-test-with-temp-text "[[http://orgmode.\norg]]" + (equal "ls *.org" + (org-test-with-temp-text "[[shell:ls\n*.org]]" (org-element-property :path (org-element-context))))) ;; Plain link. (should (org-test-with-temp-text "A link: http://orgmode.org" (org-element-map (org-element-parse-buffer) 'link 'identity))) - ;; Angular link. + ;; Angular link. Follow RFC 3986. (should - (org-test-with-temp-text "A link: <http://orgmode.org>" - (org-element-map (org-element-parse-buffer) 'link 'identity nil t))) + (eq 'link + (org-test-with-temp-text "A link: <point><http://orgmode.org>" + (org-element-type (org-element-context))))) + (should + (equal "//orgmode.org" + (org-test-with-temp-text "A link: <point><http://orgmode\n.org>" + (org-element-property :path (org-element-context))))) ;; Link abbreviation. (should (equal "http" |