jojo/models/db/iterate.go
Gusted 77dbc35138 chore: add modernizer linter (#11936)
- Go has a suite of small linters that helps with modernizing Go code by using newer functions and catching small mistakes, https://pkg.go.dev/golang.org/x/tools/go/analysis/passes/modernize.
- Enable this linter in golangci-lint.
- There's also [`go fix`](https://go.dev/blog/gofix), which is not yet released as a linter in golangci-lint: https://github.com/golangci/golangci-lint/pull/6385

Reviewed-on: https://codeberg.org/forgejo/forgejo/pulls/11936
Reviewed-by: Mathieu Fenniak <mfenniak@noreply.codeberg.org>
Co-authored-by: Gusted <postmaster@gusted.xyz>
Co-committed-by: Gusted <postmaster@gusted.xyz>
2026-04-02 03:29:37 +02:00

208 lines
7 KiB
Go

// Copyright 2022 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package db
import (
"context"
"errors"
"fmt"
"reflect"
"strings"
"forgejo.org/modules/setting"
"xorm.io/builder"
)
// Iterate iterate all the Bean object. The table being iterated must have a single-column primary key.
func Iterate[Bean any](ctx context.Context, cond builder.Cond, f func(ctx context.Context, bean *Bean) error) error {
var dummy Bean
batchSize := setting.Database.IterateBufferSize
table, err := TableInfo(&dummy)
if err != nil {
return fmt.Errorf("unable to fetch table info for bean %v: %w", dummy, err)
}
if len(table.PrimaryKeys) != 1 {
return fmt.Errorf("iterate only supported on a table with 1 primary key field, but table %s had %d", table.Name, len(table.PrimaryKeys))
}
pkDbName := table.PrimaryKeys[0]
var pkStructFieldName string
for _, c := range table.Columns() {
if c.Name == pkDbName {
pkStructFieldName = c.FieldName
break
}
}
if pkStructFieldName == "" {
return fmt.Errorf("iterate unable to identify struct field for primary key %s", pkDbName)
}
var lastPK any
for {
select {
case <-ctx.Done():
return ctx.Err()
default:
beans := make([]*Bean, 0, batchSize)
sess := GetEngine(ctx)
sess = sess.OrderBy(pkDbName)
if cond != nil {
sess = sess.Where(cond)
}
if lastPK != nil {
sess = sess.Where(builder.Gt{pkDbName: lastPK})
}
if err := sess.Limit(batchSize).Find(&beans); err != nil {
return err
}
if len(beans) == 0 {
return nil
}
for _, bean := range beans {
if err := f(ctx, bean); err != nil {
return err
}
}
lastBean := beans[len(beans)-1]
lastPK = extractFieldValue(lastBean, pkStructFieldName)
}
}
}
func extractFieldValue(bean any, fieldName string) any {
v := reflect.ValueOf(bean)
if v.Kind() == reflect.Pointer {
v = v.Elem()
}
field := v.FieldByName(fieldName)
return field.Interface()
}
// IterateByKeyset iterates all the records on a database (matching the provided condition) in the order of specified
// order fields, and invokes the provided handler function for each record. It is safe to UPDATE or DELETE the record in
// the handler function, as long as the order fields are not mutated on the record (which could cause records to be
// missed or iterated multiple times).
//
// Assuming order fields a, b, and c, then database queries will be performed as "SELECT * FROM table WHERE (a, b, c) >
// (last_a, last_b, last_c) ORDER BY a, b, c LIMIT buffer_size" repeatedly until the query returns no records (except
// the first query will have no WHERE clause).
//
// Critical requirements for proper usage:
//
// - the order fields encompass at least one UNIQUE or PRIMARY KEY constraint of the table to ensure that records are
// not duplicated -- for example, if the table has a unique index covering `(repo_id, index)`, then it would be safe to
// use this function as long as both fields (in either order) are provided as order fields.
//
// - none of the order fields may have NULL values in them, as the `=` and `>` comparisons being performed by the
// iterative queries will not operate on these records consistently as they do with other values.
//
// This implementation could be a much simpler streaming scan of the query results, except that doesn't permit making
// any additional database queries or data modifications in the target function -- SQLite cannot write while holding a
// read lock. Buffering pages of data in-memory avoids that issue.
//
// Performance:
//
// - High performance will result from an alignment of an index on the table with the order fields, in the same field
// order, even if additional ordering fields could be provided after the index fields. In the absence of this index
// alignment, it is reasonable to expect that every extra page of data accessed will require a query that will perform
// an index scan (if available) or sequential scan of the target table. In testing on the `commit_status` table with
// 455k records, a fully index-supported ordering allowed each query page to execute in 0.18ms, as opposed to 80ms
// per-query without matching supporting index.
//
// - In the absence of a matching index, slower per-query performance can be compensated with a larger `batchSize`
// parameter, which controls how many records to fetch at once and therefore reduces the number of queries required.
// This requires more memory. Similar `commit_status` table testing showed these stats for iteration time and memory
// usage for different buffer sizes; specifics will vary depending on the target table:
// - buffer size = 1,000,000 - iterates in 2.8 seconds, consumes 363 MB of RAM
// - buffer size = 100,000 - iterates in 3.5 seconds, consume 130 MB of RAM
// - buffer size = 10,000 - iterates in 7.1 seconds, consumes 59 MB of RAM
// - buffer size = 1,000 - iterates in 33.9 seconds, consumes 42 MB of RAM
func IterateByKeyset[Bean any](ctx context.Context, cond builder.Cond, orderFields []string, batchSize int, f func(ctx context.Context, bean *Bean) error) error {
var dummy Bean
if len(orderFields) == 0 {
return errors.New("orderFields must be provided")
}
table, err := TableInfo(&dummy)
if err != nil {
return fmt.Errorf("unable to fetch table info for bean %v: %w", dummy, err)
}
goFieldNames := make([]string, len(orderFields))
for i, f := range orderFields {
goFieldNames[i] = table.GetColumn(f).FieldName
}
sqlFieldNames := make([]string, len(orderFields))
for i, f := range orderFields {
// Support field names like "index" which need quoting in builder.Cond & OrderBy
sqlFieldNames[i] = x.Dialect().Quoter().Quote(f)
}
var lastKey []any
// For the order fields, generate clauses (a, b, c) and (?, ?, ?) which will be used in the WHERE clause when
// reading additional pages of data.
rowValue := strings.Builder{}
rowParameterValue := strings.Builder{}
rowValue.WriteString("(")
rowParameterValue.WriteString("(")
for i, f := range sqlFieldNames {
rowValue.WriteString(f)
rowParameterValue.WriteString("?")
if i != len(sqlFieldNames)-1 {
rowValue.WriteString(", ")
rowParameterValue.WriteString(", ")
}
}
rowValue.WriteString(")")
rowParameterValue.WriteString(")")
for {
select {
case <-ctx.Done():
return ctx.Err()
default:
beans := make([]*Bean, 0, batchSize)
sess := GetEngine(ctx)
for _, f := range sqlFieldNames {
sess = sess.OrderBy(f)
}
if cond != nil {
sess = sess.Where(cond)
}
if lastKey != nil {
sess = sess.Where(
builder.Expr(fmt.Sprintf("%s > %s", rowValue.String(), rowParameterValue.String()), lastKey...))
}
if err := sess.Limit(batchSize).Find(&beans); err != nil {
return err
}
if len(beans) == 0 {
return nil
}
for _, bean := range beans {
if err := f(ctx, bean); err != nil {
return err
}
}
lastBean := beans[len(beans)-1]
lastKey = make([]any, len(goFieldNames))
for i := range goFieldNames {
lastKey[i] = extractFieldValue(lastBean, goFieldNames[i])
}
}
}
}