mirror of
https://codeberg.org/forgejo/forgejo.git
synced 2026-05-12 22:10:25 +00:00
This reverts commit d4951968f0, #10008.
When Forgejo cancels a job server-side, for example due to an additional push to an open PR, it immediately archives the logs from DBFS to disk due to the changes in #10008. Then, the runner recognizes that the job status is cancelled and it attempts to flush its pending logs to Forgejo, resulting in warnings being logged:
```
forgejo-runner.log:time="2026-02-23T01:32:11+01:00" level=warning msg="uploading final logs failed, but will be retried: already_exists: log file has been archived" task_id=51
forgejo-runner.log:time="2026-02-23T01:32:11+01:00" level=warning msg="uploading final logs failed, but will be retried: already_exists: log file has been archived" task_id=51
forgejo-runner.log:time="2026-02-23T01:32:11+01:00" level=warning msg="uploading final logs failed, but will be retried: already_exists: log file has been archived" task_id=51
forgejo-runner.log:time="2026-02-23T01:32:12+01:00" level=warning msg="uploading final logs failed, but will be retried: already_exists: log file has been archived" task_id=51
forgejo-runner.log:time="2026-02-23T01:32:13+01:00" level=warning msg="uploading final logs failed, but will be retried: already_exists: log file has been archived" task_id=51
forgejo-runner.log:time="2026-02-23T01:32:14+01:00" level=warning msg="uploading final logs failed, but will be retried: already_exists: log file has been archived" task_id=51
forgejo-runner.log:time="2026-02-23T01:32:16+01:00" level=info msg="runner: received shutdown signal"
forgejo-runner.log:time="2026-02-23T01:32:16+01:00" level=info msg="runner: shutdown initiated, waiting [runner].shutdown_timeout=0s for running jobs to complete before shutting down"
forgejo-runner.log:time="2026-02-23T01:32:16+01:00" level=info msg="[poller] shutdown begin, 1 tasks currently running"
forgejo-runner.log:time="2026-02-23T01:32:16+01:00" level=info msg="forcing the jobs to shutdown"
forgejo-runner.log:time="2026-02-23T01:32:18+01:00" level=warning msg="uploading final logs failed, but will be retried: already_exists: log file has been archived" task_id=51
forgejo-runner.log:time="2026-02-23T01:32:24+01:00" level=warning msg="uploading final logs failed, but will be retried: already_exists: log file has been archived" task_id=51
```
This appears to be the cause of the `push-cancel` end-to-end test failing since #10008 was merged. https://code.forgejo.org/forgejo/end-to-end/actions/runs/4985/jobs/8/attempt/1 The `push-cancel` test case itself seems to succeed, but then the test process aborts with `return 1`. Doesn't reproduce locally.
Reviewed-on: https://codeberg.org/forgejo/forgejo/pulls/11462
Reviewed-by: Michael Kriese <michael.kriese@gmx.de>
Co-authored-by: Mathieu Fenniak <mathieu@fenniak.net>
Co-committed-by: Mathieu Fenniak <mathieu@fenniak.net>
103 lines
3.1 KiB
Go
103 lines
3.1 KiB
Go
// Copyright 2022 The Gitea Authors. All rights reserved.
|
|
// SPDX-License-Identifier: MIT
|
|
|
|
package actions
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"time"
|
|
|
|
actions_model "forgejo.org/models/actions"
|
|
"forgejo.org/models/db"
|
|
"forgejo.org/modules/actions"
|
|
"forgejo.org/modules/log"
|
|
"forgejo.org/modules/optional"
|
|
"forgejo.org/modules/setting"
|
|
"forgejo.org/modules/timeutil"
|
|
)
|
|
|
|
// StopZombieTasks stops the task which have running status, but haven't been updated for a long time
|
|
func StopZombieTasks(ctx context.Context) error {
|
|
return stopTasks(ctx, actions_model.FindTaskOptions{
|
|
Status: []actions_model.Status{actions_model.StatusRunning},
|
|
UpdatedBefore: timeutil.TimeStamp(time.Now().Add(-setting.Actions.ZombieTaskTimeout).Unix()),
|
|
})
|
|
}
|
|
|
|
// StopEndlessTasks stops the tasks which have running status and continuous updates, but don't end for a long time
|
|
func StopEndlessTasks(ctx context.Context) error {
|
|
return stopTasks(ctx, actions_model.FindTaskOptions{
|
|
Status: []actions_model.Status{actions_model.StatusRunning},
|
|
StartedBefore: timeutil.TimeStamp(time.Now().Add(-setting.Actions.EndlessTaskTimeout).Unix()),
|
|
})
|
|
}
|
|
|
|
func stopTasks(ctx context.Context, opts actions_model.FindTaskOptions) error {
|
|
tasks, err := db.Find[actions_model.ActionTask](ctx, opts)
|
|
if err != nil {
|
|
return fmt.Errorf("find tasks: %w", err)
|
|
}
|
|
|
|
jobs := make([]*actions_model.ActionRunJob, 0, len(tasks))
|
|
for _, task := range tasks {
|
|
if err := db.WithTx(ctx, func(ctx context.Context) error {
|
|
if err := StopTask(ctx, task.ID, actions_model.StatusFailure); err != nil {
|
|
return err
|
|
}
|
|
if err := task.LoadJob(ctx); err != nil {
|
|
return err
|
|
}
|
|
jobs = append(jobs, task.Job)
|
|
return nil
|
|
}); err != nil {
|
|
log.Warn("Cannot stop task %v: %v", task.ID, err)
|
|
continue
|
|
}
|
|
|
|
remove, err := actions.TransferLogs(ctx, task.LogFilename)
|
|
if err != nil {
|
|
log.Warn("Cannot transfer logs of task %v: %v", task.ID, err)
|
|
continue
|
|
}
|
|
task.LogInStorage = true
|
|
if err := actions_model.UpdateTask(ctx, task, "log_in_storage"); err != nil {
|
|
log.Warn("Cannot update task %v: %v", task.ID, err)
|
|
continue
|
|
}
|
|
remove()
|
|
}
|
|
|
|
CreateCommitStatus(ctx, jobs...)
|
|
|
|
return nil
|
|
}
|
|
|
|
// CancelAbandonedJobs cancels the jobs which have waiting status, but haven't been picked by a runner for a long time
|
|
func CancelAbandonedJobs(ctx context.Context) error {
|
|
jobs, err := db.Find[actions_model.ActionRunJob](ctx, actions_model.FindRunJobOptions{
|
|
Statuses: []actions_model.Status{actions_model.StatusWaiting, actions_model.StatusBlocked},
|
|
UpdatedBefore: timeutil.TimeStamp(time.Now().Add(-setting.Actions.AbandonedJobTimeout).Unix()),
|
|
RunNeedsApproval: optional.Some(false),
|
|
})
|
|
if err != nil {
|
|
log.Warn("find abandoned tasks: %v", err)
|
|
return err
|
|
}
|
|
|
|
now := timeutil.TimeStampNow()
|
|
for _, job := range jobs {
|
|
job.Status = actions_model.StatusCancelled
|
|
job.Stopped = now
|
|
if err := db.WithTx(ctx, func(ctx context.Context) error {
|
|
_, err := UpdateRunJob(ctx, job, nil, "status", "stopped")
|
|
return err
|
|
}); err != nil {
|
|
log.Warn("cancel abandoned job %v: %v", job.ID, err)
|
|
// go on
|
|
}
|
|
CreateCommitStatus(ctx, job)
|
|
}
|
|
|
|
return nil
|
|
}
|