mirror of
https://codeberg.org/forgejo/forgejo.git
synced 2026-05-12 22:10:25 +00:00
fix: retry ActionRun updates when optimistic-concurrency-control indicates record changed (#10893)
When concurrent updates occur to the `action_run` table, fetching a task via `FetchTask` can result in an error: ``` time="2026-01-16T16:02:30Z" level=error msg="failed to fetch task" error="internal: pick task: CreateTaskForRunner: update run 2358339: run has changed" ``` This is an error with no known harm. However, it is more efficient to recover from doing 90% of the work to fetch a task, and complete that work, rather than error out and rollback. Discovered while investigating https://code.forgejo.org/forgejo/runner/issues/1302#issuecomment-73859, although conclusions do not indicate that it is a source of this bug. ## Checklist The [contributor guide](https://forgejo.org/docs/next/contributor/) contains information that will be helpful to first time contributors. There also are a few [conditions for merging Pull Requests in Forgejo repositories](https://codeberg.org/forgejo/governance/src/branch/main/PullRequestsAgreement.md). You are also welcome to join the [Forgejo development chatroom](https://matrix.to/#/#forgejo-development:matrix.org). ### Tests - I added test coverage for Go changes... - [ ] in their respective `*_test.go` for unit tests. - [ ] in the `tests/integration` directory if it involves interactions with a live Forgejo server. - I added test coverage for JavaScript changes... - [ ] in `web_src/js/*.test.js` if it can be unit tested. - [ ] in `tests/e2e/*.test.e2e.js` if it requires interactions with a live Forgejo server (see also the [developer guide for JavaScript testing](https://codeberg.org/forgejo/forgejo/src/branch/forgejo/tests/e2e/README.md#end-to-end-tests)). ### Documentation - [ ] I created a pull request [to the documentation](https://codeberg.org/forgejo/docs) to explain to Forgejo users how to use this change. - [ ] I did not document these changes and I do not expect someone else to do it. ### Release notes - [ ] This change will be noticed by a Forgejo user or admin (feature, bug fix, performance, etc.). I suggest to include a release note for this change. - [ ] This change is not visible to a Forgejo user or admin (refactor, dependency upgrade, etc.). I think there is no need to add a release note for this change. *The decision if the pull request will be shown in the release notes is up to the mergers / release team.* The content of the `release-notes/<pull request number>.md` file will serve as the basis for the release notes. If the file does not exist, the title of the pull request will be used instead. Reviewed-on: https://codeberg.org/forgejo/forgejo/pulls/10893 Reviewed-by: Michael Kriese <michael.kriese@gmx.de> Reviewed-by: Andreas Ahlenstorf <aahlenst@noreply.codeberg.org> Co-authored-by: Mathieu Fenniak <mathieu@fenniak.net> Co-committed-by: Mathieu Fenniak <mathieu@fenniak.net>
This commit is contained in:
parent
157a9516f7
commit
9f8dda6325
3 changed files with 89 additions and 8 deletions
|
|
@ -5,12 +5,14 @@ package actions
|
|||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"slices"
|
||||
"time"
|
||||
|
||||
"forgejo.org/models/db"
|
||||
"forgejo.org/modules/container"
|
||||
"forgejo.org/modules/log"
|
||||
"forgejo.org/modules/timeutil"
|
||||
"forgejo.org/modules/util"
|
||||
|
||||
|
|
@ -174,7 +176,7 @@ func UpdateRunJobWithoutNotification(ctx context.Context, job *ActionRunJob, con
|
|||
}
|
||||
}
|
||||
|
||||
{
|
||||
for {
|
||||
// Other goroutines may aggregate the status of the run and update it too.
|
||||
// So we need load the run and its jobs before updating the run.
|
||||
run, err := GetRunByID(ctx, job.RunID)
|
||||
|
|
@ -185,23 +187,39 @@ func UpdateRunJobWithoutNotification(ctx context.Context, job *ActionRunJob, con
|
|||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
run.Status = AggregateJobStatus(jobs)
|
||||
|
||||
updateRequired := false
|
||||
newStatus := AggregateJobStatus(jobs)
|
||||
if run.Status != newStatus {
|
||||
run.Status = newStatus
|
||||
updateRequired = true
|
||||
}
|
||||
if run.Started.IsZero() && run.Status.IsRunning() {
|
||||
run.Started = timeutil.TimeStampNow()
|
||||
updateRequired = true
|
||||
}
|
||||
if run.Stopped.IsZero() && run.Status.IsDone() {
|
||||
run.Stopped = timeutil.TimeStampNow()
|
||||
updateRequired = true
|
||||
}
|
||||
// As the caller has to ensure the ActionRunNowDone notification is sent we can ignore doing so here.
|
||||
if err := UpdateRunWithoutNotification(ctx, run, "status", "started", "stopped"); err != nil {
|
||||
return 0, fmt.Errorf("update run %d: %w", run.ID, err)
|
||||
if updateRequired {
|
||||
// As the caller has to ensure the ActionRunNowDone notification is sent we can ignore doing so here.
|
||||
if err := UpdateRunWithoutNotification(ctx, run, "status", "started", "stopped"); err != nil && errors.Is(err, ErrActionRunOutOfDate) {
|
||||
// Retry update; another session affected `run` simultaneously. It wasn't necessarily another update
|
||||
// from this same loop -- there are other codepaths that update `ActionRun`.
|
||||
log.Debug("UpdateRunWithoutNotification failed with %v; looping for retry", err)
|
||||
continue
|
||||
} else if err != nil {
|
||||
return 0, fmt.Errorf("update run %d: %w", run.ID, err)
|
||||
}
|
||||
}
|
||||
break // exit retry loop
|
||||
}
|
||||
|
||||
return affected, nil
|
||||
}
|
||||
|
||||
func AggregateJobStatus(jobs []*ActionRunJob) Status {
|
||||
var AggregateJobStatus = func(jobs []*ActionRunJob) Status {
|
||||
allSuccessOrSkipped := len(jobs) != 0
|
||||
allSkipped := len(jobs) != 0
|
||||
var hasFailure, hasCancelled, hasWaiting, hasRunning, hasBlocked bool
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue