From 002084b1bea4311fdac184f080206bf890937d80 Mon Sep 17 00:00:00 2001 From: Matthias Deiml Date: Sun, 26 Jun 2022 18:02:29 +0200 Subject: feat(markdown)!: switch to split parser (#3048) * switch to split markdown parser with separate block and inline parsers to improve performance * add exclude_children! directive (useful for something like Injected markdown incorrectly highlights indented docstrings #2212) * split markdown queries into block and inline ones and add the injection for inline into block grammar * add include_dir option to parser configs (needed because the two grammars don't live in the repos root directory) BREAKING CHANGE: downstream queries need to be adapted to new parser --- lockfile.json | 5 ++- lua/nvim-treesitter/parsers.lua | 15 ++++++- lua/nvim-treesitter/query_predicates.lua | 25 ++++++++++++ queries/markdown/highlights.scm | 63 ++++-------------------------- queries/markdown/injections.scm | 6 +-- queries/markdown_inline/highlights.scm | 67 ++++++++++++++++++++++++++++++++ queries/markdown_inline/injections.scm | 1 + 7 files changed, 121 insertions(+), 61 deletions(-) create mode 100644 queries/markdown_inline/highlights.scm create mode 100644 queries/markdown_inline/injections.scm diff --git a/lockfile.json b/lockfile.json index c92ed250a..33e468610 100644 --- a/lockfile.json +++ b/lockfile.json @@ -192,7 +192,10 @@ "revision": "a4b9187417d6be349ee5fd4b6e77b4172c6827dd" }, "markdown": { - "revision": "be3e08acfd85bd87d85f41fde74fdcec25f76dbe" + "revision": "acb097808683eea7c6d6d469644a275b9f6a64f5" + }, + "markdown_inline": { + "revision": "acb097808683eea7c6d6d469644a275b9f6a64f5" }, "ninja": { "revision": "0a95cfdc0745b6ae82f60d3a339b37f19b7b9267" diff --git a/lua/nvim-treesitter/parsers.lua b/lua/nvim-treesitter/parsers.lua index dc02c9322..b24b6ce0c 100644 --- a/lua/nvim-treesitter/parsers.lua +++ b/lua/nvim-treesitter/parsers.lua @@ -528,9 +528,22 @@ list.hcl = { list.markdown = { install_info = { url = "https://github.com/MDeiml/tree-sitter-markdown", + location = "tree-sitter-markdown", files = { "src/parser.c", "src/scanner.cc" }, - branch = "main", + branch = "split_parser", + }, + maintainers = { "@MDeiml" }, + experimental = true, +} + +list.markdown_inline = { + install_info = { + url = "https://github.com/MDeiml/tree-sitter-markdown", + location = "tree-sitter-markdown-inline", + files = { "src/parser.c", "src/scanner.cc" }, + branch = "split_parser", }, + maintainers = { "@MDeiml" }, experimental = true, } diff --git a/lua/nvim-treesitter/query_predicates.lua b/lua/nvim-treesitter/query_predicates.lua index be713304c..75f0efc04 100644 --- a/lua/nvim-treesitter/query_predicates.lua +++ b/lua/nvim-treesitter/query_predicates.lua @@ -128,3 +128,28 @@ query.add_directive("downcase!", function(match, _, bufnr, pred, metadata) metadata[key] = string.lower(text) end end) + +query.add_directive("exclude_children!", function(match, _pattern, _bufnr, pred, metadata) + local capture_id = pred[2] + local node = match[capture_id] + local start_row, start_col, end_row, end_col = node:range() + local ranges = {} + for i = 0, node:named_child_count() - 1 do + local child = node:named_child(i) + local child_start_row, child_start_col, child_end_row, child_end_col = child:range() + if child_start_row > start_row or child_start_col > start_col then + table.insert(ranges, { + start_row, + start_col, + child_start_row, + child_start_col, + }) + end + start_row = child_end_row + start_col = child_end_col + end + if end_row > start_row or end_col > start_col then + table.insert(ranges, { start_row, start_col, end_row, end_col }) + end + metadata.content = ranges +end) diff --git a/queries/markdown/highlights.scm b/queries/markdown/highlights.scm index 9d6959b22..4cc81f9e7 100644 --- a/queries/markdown/highlights.scm +++ b/queries/markdown/highlights.scm @@ -1,6 +1,6 @@ -;; From MDeiml/tree-sitter-markdown -(atx_heading (heading_content) @text.title) -(setext_heading (heading_content) @text.title) +;From MDeiml/tree-sitter-markdown +(atx_heading (inline) @text.title) +(setext_heading (paragraph) @text.title) [ (atx_h1_marker) @@ -14,33 +14,23 @@ ] @punctuation.special [ - (code_span) (link_title) (indented_code_block) (fenced_code_block) ] @text.literal [ - (emphasis_delimiter) - (code_span_delimiter) (fenced_code_block_delimiter) ] @punctuation.delimiter (code_fence_content) @none -(emphasis) @text.emphasis - -(strong_emphasis) @text.strong - [ (link_destination) - (uri_autolink) ] @text.uri [ (link_label) - (link_text) - (image_description) ] @text.reference [ @@ -52,56 +42,17 @@ (thematic_break) ] @punctuation.special -(block_quote_marker) @punctuation.special +[ + (block_continuation) + (block_quote_marker) +] @punctuation.special [ (backslash_escape) - (hard_line_break) ] @string.escape -(image "!" @punctuation.delimiter) -(image "[" @punctuation.delimiter) -(image "]" @punctuation.delimiter) -(image "(" @punctuation.delimiter) -; (image ")" @punctuation.delimiter) - -(inline_link "[" @punctuation.delimiter) -(inline_link "]" @punctuation.delimiter) -(inline_link "(" @punctuation.delimiter) -; (inline_link ")" @punctuation.delimiter) - -(shortcut_link "[" @punctuation.delimiter) -(shortcut_link "]" @punctuation.delimiter) - ([ (info_string) (fenced_code_block_delimiter) - (code_span_delimiter) - (emphasis_delimiter) ] @conceal (#set! conceal "")) - - -; Conceal inline links -(inline_link - [ - "[" - "]" - "(" - (link_destination) - ")" -] @conceal -(#set! conceal "")) - - -; Conceal image links -(image - [ - "!" - "[" - "]" - "(" - (link_destination) - ")" -] @conceal - (#set! conceal "")) diff --git a/queries/markdown/injections.scm b/queries/markdown/injections.scm index 6fa047ac9..c5aca6c54 100644 --- a/queries/markdown/injections.scm +++ b/queries/markdown/injections.scm @@ -1,9 +1,9 @@ (fenced_code_block (info_string (language) @language) - (code_fence_content) @content) + (code_fence_content) @content (#exclude_children! @content)) ((html_block) @html) -((html_tag) @html) -(document . (thematic_break) (_) @yaml @combined (thematic_break)) +(document . (section . (thematic_break) (_) @yaml @combined (thematic_break))) +((inline) @markdown_inline (#exclude_children! @markdown_inline)) diff --git a/queries/markdown_inline/highlights.scm b/queries/markdown_inline/highlights.scm new file mode 100644 index 000000000..495e6f1ab --- /dev/null +++ b/queries/markdown_inline/highlights.scm @@ -0,0 +1,67 @@ +;; From MDeiml/tree-sitter-markdown +[ + (code_span) + (link_title) +] @text.literal + +[ + (emphasis_delimiter) + (code_span_delimiter) +] @punctuation.delimiter + +(emphasis) @text.emphasis + +(strong_emphasis) @text.strong + +[ + (link_destination) + (uri_autolink) +] @text.uri + +[ + (link_label) + (link_text) + (image_description) +] @text.reference + +[ + (backslash_escape) + (hard_line_break) +] @string.escape + +; "(" not part of query because of +; https://github.com/nvim-treesitter/nvim-treesitter/issues/2206 +; TODO: Find better fix for this +(image ["!" "[" "]" "("] @punctuation.delimiter) +(inline_link ["[" "]" "("] @punctuation.delimiter) +(shortcut_link ["[" "]"] @punctuation.delimiter) + +([ + (code_span_delimiter) + (emphasis_delimiter) +] @conceal +(#set! conceal "")) + +; Conceal inline links +(inline_link + [ + "[" + "]" + "(" + (link_destination) + ")" +] @conceal +(#set! conceal "")) + + +; Conceal image links +(image + [ + "!" + "[" + "]" + "(" + (link_destination) + ")" +] @conceal + (#set! conceal "")) diff --git a/queries/markdown_inline/injections.scm b/queries/markdown_inline/injections.scm new file mode 100644 index 000000000..5cf45dac2 --- /dev/null +++ b/queries/markdown_inline/injections.scm @@ -0,0 +1 @@ +((html_tag) @html) -- cgit v1.2.3-70-g09d2