feat(ui): support Pandoc style code blocks (#12099)

This resolves https://codeberg.org/forgejo/forgejo/issues/11107.

Codeberg doesn't support [Pandoc style code blocks](https://pandoc.org/MANUAL.html#extension-fenced_code_attributes), so only the two of these 3 will have syntax highlighting.

\`\`\`haskell
qsort []     = []
qsort (x:xs) = qsort (filter (< x) xs) ++ [x] ++
               qsort (filter (>= x) xs)
\`\`\`

\`\`\`haskell {.numberLines}
qsort []     = []
qsort (x:xs) = qsort (filter (< x) xs) ++ [x] ++
               qsort (filter (>= x) xs)
\`\`\`

\`\`\`{.numberLines .haskell}
qsort []     = []
qsort (x:xs) = qsort (filter (< x) xs) ++ [x] ++
               qsort (filter (>= x) xs)
\`\`\`

```haskell
qsort []     = []
qsort (x:xs) = qsort (filter (< x) xs) ++ [x] ++
               qsort (filter (>= x) xs)
```

```haskell {.numberLines}
qsort []     = []
qsort (x:xs) = qsort (filter (< x) xs) ++ [x] ++
               qsort (filter (>= x) xs)
```

```{.numberLines .haskell}
qsort []     = []
qsort (x:xs) = qsort (filter (< x) xs) ++ [x] ++
               qsort (filter (>= x) xs)
```

This PR adds syntax highlighting to the examples with Pandoc style code blocks. It also adds redundant code to explicitly handle the second case with the trailing attribute syntax, which might be unnecessary since it already works, but I think should be fine to leave in.

Reviewed-on: https://codeberg.org/forgejo/forgejo/pulls/12099
Reviewed-by: Ellen Εμίλια Άννα Zscheile <fogti@noreply.codeberg.org>
Reviewed-by: Gusted <gusted@noreply.codeberg.org>
This commit is contained in:
TurtleArmy 2026-05-12 00:53:09 +02:00 committed by Gusted
parent 88fd372d9a
commit 5b6c702f41
2 changed files with 56 additions and 3 deletions

View file

@ -1489,14 +1489,14 @@ func TestCallout(t *testing.T) {
</blockquote>`) </blockquote>`)
} }
func TestCodeblockLanguageStripping(t *testing.T) { func TestCodeblockLanguageTransformation(t *testing.T) {
test := func(input, expected string) { test := func(input, expected string) {
buffer, err := markdown.RenderString(&markup.RenderContext{Ctx: git.DefaultContext}, input) buffer, err := markdown.RenderString(&markup.RenderContext{Ctx: git.DefaultContext}, input)
require.NoError(t, err) require.NoError(t, err)
assert.Equal(t, strings.TrimSpace(expected), strings.TrimSpace(string(buffer))) assert.Equal(t, strings.TrimSpace(expected), strings.TrimSpace(string(buffer)))
} }
// Unstripped // No transformation
test( test(
"```rust\n"+ "```rust\n"+
"fn main() {}\n"+ "fn main() {}\n"+
@ -1504,7 +1504,7 @@ func TestCodeblockLanguageStripping(t *testing.T) {
`<pre class="code-block"><code class="chroma language-rust display"><span class="k">fn</span> <span class="nf">main</span><span class="p">()</span><span class="w"> </span><span class="p">{}</span><span class="w"> `<pre class="code-block"><code class="chroma language-rust display"><span class="k">fn</span> <span class="nf">main</span><span class="p">()</span><span class="w"> </span><span class="p">{}</span><span class="w">
</span></code></pre>`) </span></code></pre>`)
// Stripped // Comma stripped
test( test(
"```rust,ignore\n"+ "```rust,ignore\n"+
"fn main() {}\n"+ "fn main() {}\n"+
@ -1512,6 +1512,32 @@ func TestCodeblockLanguageStripping(t *testing.T) {
`<pre class="code-block"><code class="chroma language-rust display"><span class="k">fn</span> <span class="nf">main</span><span class="p">()</span><span class="w"> </span><span class="p">{}</span><span class="w"> `<pre class="code-block"><code class="chroma language-rust display"><span class="k">fn</span> <span class="nf">main</span><span class="p">()</span><span class="w"> </span><span class="p">{}</span><span class="w">
</span></code></pre>`) </span></code></pre>`)
// Pandoc stripping
// https://pandoc.org/MANUAL.html#extension-fenced_code_attributes
test(
"```haskell {.numberLines}\n"+
"qsort [] = []\n"+
"qsort (x:xs) = qsort (filter (< x) xs) ++ [x] ++\n"+
" qsort (filter (>= x) xs)\n"+
"```",
`<pre class="code-block"><code class="chroma language-haskell display"><span class="nf">qsort</span> <span class="kt">[]</span> <span class="ow">=</span> <span class="kt">[]</span>
<span class="nf">qsort</span> <span class="p">(</span><span class="n">x</span><span class="kt">:</span><span class="n">xs</span><span class="p">)</span> <span class="ow">=</span> <span class="n">qsort</span> <span class="p">(</span><span class="n">filter</span> <span class="p">(</span><span class="o">&lt;</span> <span class="n">x</span><span class="p">)</span> <span class="n">xs</span><span class="p">)</span> <span class="o">++</span> <span class="p">[</span><span class="n">x</span><span class="p">]</span> <span class="o">++</span>
<span class="n">qsort</span> <span class="p">(</span><span class="n">filter</span> <span class="p">(</span><span class="o">&gt;=</span> <span class="n">x</span><span class="p">)</span> <span class="n">xs</span><span class="p">)</span>
</code></pre>`)
// Pandoc language extracting
// https://pandoc.org/MANUAL.html#extension-fenced_code_attributes
test(
"``` { #mycode .numberLines .haskell startFrom=\"100\" } \n"+
"qsort [] = []\n"+
"qsort (x:xs) = qsort (filter (< x) xs) ++ [x] ++\n"+
" qsort (filter (>= x) xs)\n"+
"```",
`<pre class="code-block"><code class="chroma language-haskell display"><span class="nf">qsort</span> <span class="kt">[]</span> <span class="ow">=</span> <span class="kt">[]</span>
<span class="nf">qsort</span> <span class="p">(</span><span class="n">x</span><span class="kt">:</span><span class="n">xs</span><span class="p">)</span> <span class="ow">=</span> <span class="n">qsort</span> <span class="p">(</span><span class="n">filter</span> <span class="p">(</span><span class="o">&lt;</span> <span class="n">x</span><span class="p">)</span> <span class="n">xs</span><span class="p">)</span> <span class="o">++</span> <span class="p">[</span><span class="n">x</span><span class="p">]</span> <span class="o">++</span>
<span class="n">qsort</span> <span class="p">(</span><span class="n">filter</span> <span class="p">(</span><span class="o">&gt;=</span> <span class="n">x</span><span class="p">)</span> <span class="n">xs</span><span class="p">)</span>
</code></pre>`)
// No language identifier // No language identifier
test( test(
"```\n"+ "```\n"+

View file

@ -6,6 +6,7 @@ package markdown
import ( import (
"bytes" "bytes"
"github.com/alecthomas/chroma/v2/lexers"
"github.com/yuin/goldmark/ast" "github.com/yuin/goldmark/ast"
"github.com/yuin/goldmark/text" "github.com/yuin/goldmark/text"
) )
@ -16,6 +17,32 @@ func (g *ASTTransformer) transformCodeblockLanguage(v *ast.FencedCodeBlock, read
} }
src := reader.Source() src := reader.Source()
info := v.Info.Segment.Value(src) info := v.Info.Segment.Value(src)
// Parse Pandoc style attributes
// https://pandoc.org/MANUAL.html#extension-fenced_code_attributes
//
// For example,
// ```{.haskell .numberLines}
// ...
// ```
// Should have a language of "haskell", not "{.haskell .numberLines}"
if trimmed := bytes.TrimSpace(info); bytes.HasPrefix(trimmed, []byte{'{'}) && bytes.HasSuffix(trimmed, []byte{'}'}) {
attributes := trimmed[1 : len(trimmed)-1]
for attribute := range bytes.SplitSeq(attributes, []byte{' '}) {
if class, found := bytes.CutPrefix(attribute, []byte{'.'}); found {
if lexer := lexers.Get(string(class)); lexer != nil {
lang := class
langInx := bytes.Index(info, lang)
start := v.Info.Segment.Start + langInx
end := start + len(lang)
v.Info = ast.NewTextSegment(text.NewSegment(start, end))
return
}
}
}
return
}
// Strip language after commas // Strip language after commas
// //
// For example, // For example,