diff --git a/modules/markup/markdown/markdown_test.go b/modules/markup/markdown/markdown_test.go index 1f75799900..3aee7a372d 100644 --- a/modules/markup/markdown/markdown_test.go +++ b/modules/markup/markdown/markdown_test.go @@ -1489,14 +1489,14 @@ func TestCallout(t *testing.T) { `) } -func TestCodeblockLanguageStripping(t *testing.T) { +func TestCodeblockLanguageTransformation(t *testing.T) { test := func(input, expected string) { buffer, err := markdown.RenderString(&markup.RenderContext{Ctx: git.DefaultContext}, input) require.NoError(t, err) assert.Equal(t, strings.TrimSpace(expected), strings.TrimSpace(string(buffer))) } - // Unstripped + // No transformation test( "```rust\n"+ "fn main() {}\n"+ @@ -1504,7 +1504,7 @@ func TestCodeblockLanguageStripping(t *testing.T) { `
fn main() {}
 
`) - // Stripped + // Comma stripped test( "```rust,ignore\n"+ "fn main() {}\n"+ @@ -1512,6 +1512,32 @@ func TestCodeblockLanguageStripping(t *testing.T) { `
fn main() {}
 
`) + // Pandoc stripping + // https://pandoc.org/MANUAL.html#extension-fenced_code_attributes + test( + "```haskell {.numberLines}\n"+ + "qsort [] = []\n"+ + "qsort (x:xs) = qsort (filter (< x) xs) ++ [x] ++\n"+ + " qsort (filter (>= x) xs)\n"+ + "```", + `
qsort []     = []
+qsort (x:xs) = qsort (filter (< x) xs) ++ [x] ++
+               qsort (filter (>= x) xs)
+
`) + + // Pandoc language extracting + // https://pandoc.org/MANUAL.html#extension-fenced_code_attributes + test( + "``` { #mycode .numberLines .haskell startFrom=\"100\" } \n"+ + "qsort [] = []\n"+ + "qsort (x:xs) = qsort (filter (< x) xs) ++ [x] ++\n"+ + " qsort (filter (>= x) xs)\n"+ + "```", + `
qsort []     = []
+qsort (x:xs) = qsort (filter (< x) xs) ++ [x] ++
+               qsort (filter (>= x) xs)
+
`) + // No language identifier test( "```\n"+ diff --git a/modules/markup/markdown/transform_codeblock_lang.go b/modules/markup/markdown/transform_codeblock_lang.go index 5263ec4ffc..f730265b15 100644 --- a/modules/markup/markdown/transform_codeblock_lang.go +++ b/modules/markup/markdown/transform_codeblock_lang.go @@ -6,6 +6,7 @@ package markdown import ( "bytes" + "github.com/alecthomas/chroma/v2/lexers" "github.com/yuin/goldmark/ast" "github.com/yuin/goldmark/text" ) @@ -16,6 +17,32 @@ func (g *ASTTransformer) transformCodeblockLanguage(v *ast.FencedCodeBlock, read } src := reader.Source() info := v.Info.Segment.Value(src) + + // Parse Pandoc style attributes + // https://pandoc.org/MANUAL.html#extension-fenced_code_attributes + // + // For example, + // ```{.haskell .numberLines} + // ... + // ``` + // Should have a language of "haskell", not "{.haskell .numberLines}" + if trimmed := bytes.TrimSpace(info); bytes.HasPrefix(trimmed, []byte{'{'}) && bytes.HasSuffix(trimmed, []byte{'}'}) { + attributes := trimmed[1 : len(trimmed)-1] + for attribute := range bytes.SplitSeq(attributes, []byte{' '}) { + if class, found := bytes.CutPrefix(attribute, []byte{'.'}); found { + if lexer := lexers.Get(string(class)); lexer != nil { + lang := class + langInx := bytes.Index(info, lang) + start := v.Info.Segment.Start + langInx + end := start + len(lang) + v.Info = ast.NewTextSegment(text.NewSegment(start, end)) + return + } + } + } + return + } + // Strip language after commas // // For example,