diff --git a/modules/markup/markdown/markdown_test.go b/modules/markup/markdown/markdown_test.go index 1f75799900..3aee7a372d 100644 --- a/modules/markup/markdown/markdown_test.go +++ b/modules/markup/markdown/markdown_test.go @@ -1489,14 +1489,14 @@ func TestCallout(t *testing.T) { `) } -func TestCodeblockLanguageStripping(t *testing.T) { +func TestCodeblockLanguageTransformation(t *testing.T) { test := func(input, expected string) { buffer, err := markdown.RenderString(&markup.RenderContext{Ctx: git.DefaultContext}, input) require.NoError(t, err) assert.Equal(t, strings.TrimSpace(expected), strings.TrimSpace(string(buffer))) } - // Unstripped + // No transformation test( "```rust\n"+ "fn main() {}\n"+ @@ -1504,7 +1504,7 @@ func TestCodeblockLanguageStripping(t *testing.T) { `
fn main() {}
`)
- // Stripped
+ // Comma stripped
test(
"```rust,ignore\n"+
"fn main() {}\n"+
@@ -1512,6 +1512,32 @@ func TestCodeblockLanguageStripping(t *testing.T) {
`fn main() {}
`)
+ // Pandoc stripping
+ // https://pandoc.org/MANUAL.html#extension-fenced_code_attributes
+ test(
+ "```haskell {.numberLines}\n"+
+ "qsort [] = []\n"+
+ "qsort (x:xs) = qsort (filter (< x) xs) ++ [x] ++\n"+
+ " qsort (filter (>= x) xs)\n"+
+ "```",
+ `qsort [] = []
+qsort (x:xs) = qsort (filter (< x) xs) ++ [x] ++
+ qsort (filter (>= x) xs)
+`)
+
+ // Pandoc language extracting
+ // https://pandoc.org/MANUAL.html#extension-fenced_code_attributes
+ test(
+ "``` { #mycode .numberLines .haskell startFrom=\"100\" } \n"+
+ "qsort [] = []\n"+
+ "qsort (x:xs) = qsort (filter (< x) xs) ++ [x] ++\n"+
+ " qsort (filter (>= x) xs)\n"+
+ "```",
+ `qsort [] = []
+qsort (x:xs) = qsort (filter (< x) xs) ++ [x] ++
+ qsort (filter (>= x) xs)
+`)
+
// No language identifier
test(
"```\n"+
diff --git a/modules/markup/markdown/transform_codeblock_lang.go b/modules/markup/markdown/transform_codeblock_lang.go
index 5263ec4ffc..f730265b15 100644
--- a/modules/markup/markdown/transform_codeblock_lang.go
+++ b/modules/markup/markdown/transform_codeblock_lang.go
@@ -6,6 +6,7 @@ package markdown
import (
"bytes"
+ "github.com/alecthomas/chroma/v2/lexers"
"github.com/yuin/goldmark/ast"
"github.com/yuin/goldmark/text"
)
@@ -16,6 +17,32 @@ func (g *ASTTransformer) transformCodeblockLanguage(v *ast.FencedCodeBlock, read
}
src := reader.Source()
info := v.Info.Segment.Value(src)
+
+ // Parse Pandoc style attributes
+ // https://pandoc.org/MANUAL.html#extension-fenced_code_attributes
+ //
+ // For example,
+ // ```{.haskell .numberLines}
+ // ...
+ // ```
+ // Should have a language of "haskell", not "{.haskell .numberLines}"
+ if trimmed := bytes.TrimSpace(info); bytes.HasPrefix(trimmed, []byte{'{'}) && bytes.HasSuffix(trimmed, []byte{'}'}) {
+ attributes := trimmed[1 : len(trimmed)-1]
+ for attribute := range bytes.SplitSeq(attributes, []byte{' '}) {
+ if class, found := bytes.CutPrefix(attribute, []byte{'.'}); found {
+ if lexer := lexers.Get(string(class)); lexer != nil {
+ lang := class
+ langInx := bytes.Index(info, lang)
+ start := v.Info.Segment.Start + langInx
+ end := start + len(lang)
+ v.Info = ast.NewTextSegment(text.NewSegment(start, end))
+ return
+ }
+ }
+ }
+ return
+ }
+
// Strip language after commas
//
// For example,