jojo/services/actions/clear_tasks.go
Mathieu Fenniak 6bac9e29e7 Revert "fix: ensure actions logs are transferred when a task is done (#10008)" (#11462)
This reverts commit d4951968f0, #10008.

When Forgejo cancels a job server-side, for example due to an additional push to an open PR, it immediately archives the logs from DBFS to disk due to the changes in #10008.  Then, the runner recognizes that the job status is cancelled and it attempts to flush its pending logs to Forgejo, resulting in warnings being logged:

```
forgejo-runner.log:time="2026-02-23T01:32:11+01:00" level=warning msg="uploading final logs failed, but will be retried: already_exists: log file has been archived" task_id=51
forgejo-runner.log:time="2026-02-23T01:32:11+01:00" level=warning msg="uploading final logs failed, but will be retried: already_exists: log file has been archived" task_id=51
forgejo-runner.log:time="2026-02-23T01:32:11+01:00" level=warning msg="uploading final logs failed, but will be retried: already_exists: log file has been archived" task_id=51
forgejo-runner.log:time="2026-02-23T01:32:12+01:00" level=warning msg="uploading final logs failed, but will be retried: already_exists: log file has been archived" task_id=51
forgejo-runner.log:time="2026-02-23T01:32:13+01:00" level=warning msg="uploading final logs failed, but will be retried: already_exists: log file has been archived" task_id=51
forgejo-runner.log:time="2026-02-23T01:32:14+01:00" level=warning msg="uploading final logs failed, but will be retried: already_exists: log file has been archived" task_id=51
forgejo-runner.log:time="2026-02-23T01:32:16+01:00" level=info msg="runner: received shutdown signal"
forgejo-runner.log:time="2026-02-23T01:32:16+01:00" level=info msg="runner: shutdown initiated, waiting [runner].shutdown_timeout=0s for running jobs to complete before shutting down"
forgejo-runner.log:time="2026-02-23T01:32:16+01:00" level=info msg="[poller] shutdown begin, 1 tasks currently running"
forgejo-runner.log:time="2026-02-23T01:32:16+01:00" level=info msg="forcing the jobs to shutdown"
forgejo-runner.log:time="2026-02-23T01:32:18+01:00" level=warning msg="uploading final logs failed, but will be retried: already_exists: log file has been archived" task_id=51
forgejo-runner.log:time="2026-02-23T01:32:24+01:00" level=warning msg="uploading final logs failed, but will be retried: already_exists: log file has been archived" task_id=51
```

This appears to be the cause of the `push-cancel` end-to-end test failing since #10008 was merged.  https://code.forgejo.org/forgejo/end-to-end/actions/runs/4985/jobs/8/attempt/1   The `push-cancel` test case itself seems to succeed, but then the test process aborts with `return 1`.  Doesn't reproduce locally.

Reviewed-on: https://codeberg.org/forgejo/forgejo/pulls/11462
Reviewed-by: Michael Kriese <michael.kriese@gmx.de>
Co-authored-by: Mathieu Fenniak <mathieu@fenniak.net>
Co-committed-by: Mathieu Fenniak <mathieu@fenniak.net>
2026-03-02 15:34:09 +01:00

103 lines
3.1 KiB
Go

// Copyright 2022 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package actions
import (
"context"
"fmt"
"time"
actions_model "forgejo.org/models/actions"
"forgejo.org/models/db"
"forgejo.org/modules/actions"
"forgejo.org/modules/log"
"forgejo.org/modules/optional"
"forgejo.org/modules/setting"
"forgejo.org/modules/timeutil"
)
// StopZombieTasks stops the task which have running status, but haven't been updated for a long time
func StopZombieTasks(ctx context.Context) error {
return stopTasks(ctx, actions_model.FindTaskOptions{
Status: []actions_model.Status{actions_model.StatusRunning},
UpdatedBefore: timeutil.TimeStamp(time.Now().Add(-setting.Actions.ZombieTaskTimeout).Unix()),
})
}
// StopEndlessTasks stops the tasks which have running status and continuous updates, but don't end for a long time
func StopEndlessTasks(ctx context.Context) error {
return stopTasks(ctx, actions_model.FindTaskOptions{
Status: []actions_model.Status{actions_model.StatusRunning},
StartedBefore: timeutil.TimeStamp(time.Now().Add(-setting.Actions.EndlessTaskTimeout).Unix()),
})
}
func stopTasks(ctx context.Context, opts actions_model.FindTaskOptions) error {
tasks, err := db.Find[actions_model.ActionTask](ctx, opts)
if err != nil {
return fmt.Errorf("find tasks: %w", err)
}
jobs := make([]*actions_model.ActionRunJob, 0, len(tasks))
for _, task := range tasks {
if err := db.WithTx(ctx, func(ctx context.Context) error {
if err := StopTask(ctx, task.ID, actions_model.StatusFailure); err != nil {
return err
}
if err := task.LoadJob(ctx); err != nil {
return err
}
jobs = append(jobs, task.Job)
return nil
}); err != nil {
log.Warn("Cannot stop task %v: %v", task.ID, err)
continue
}
remove, err := actions.TransferLogs(ctx, task.LogFilename)
if err != nil {
log.Warn("Cannot transfer logs of task %v: %v", task.ID, err)
continue
}
task.LogInStorage = true
if err := actions_model.UpdateTask(ctx, task, "log_in_storage"); err != nil {
log.Warn("Cannot update task %v: %v", task.ID, err)
continue
}
remove()
}
CreateCommitStatus(ctx, jobs...)
return nil
}
// CancelAbandonedJobs cancels the jobs which have waiting status, but haven't been picked by a runner for a long time
func CancelAbandonedJobs(ctx context.Context) error {
jobs, err := db.Find[actions_model.ActionRunJob](ctx, actions_model.FindRunJobOptions{
Statuses: []actions_model.Status{actions_model.StatusWaiting, actions_model.StatusBlocked},
UpdatedBefore: timeutil.TimeStamp(time.Now().Add(-setting.Actions.AbandonedJobTimeout).Unix()),
RunNeedsApproval: optional.Some(false),
})
if err != nil {
log.Warn("find abandoned tasks: %v", err)
return err
}
now := timeutil.TimeStampNow()
for _, job := range jobs {
job.Status = actions_model.StatusCancelled
job.Stopped = now
if err := db.WithTx(ctx, func(ctx context.Context) error {
_, err := UpdateRunJob(ctx, job, nil, "status", "stopped")
return err
}); err != nil {
log.Warn("cancel abandoned job %v: %v", job.ID, err)
// go on
}
CreateCommitStatus(ctx, job)
}
return nil
}