statuses update

This commit is contained in:
sanek5g
2026-06-24 18:58:35 +03:00
parent fdddacf534
commit 0a9bfd0799
39 changed files with 2099 additions and 12 deletions

View File

@@ -18,6 +18,8 @@ import (
"github.com/joho/godotenv"
_ "github.com/jackc/pgx/v5/stdlib"
amqp "github.com/rabbitmq/amqp091-go"
"github.com/postmet/analyse/internal/pipelinestatus"
)
func init() {
@@ -326,7 +328,7 @@ func saveAnalysis(ctx context.Context, db *sql.DB, task WorkerMessage, analysis
transcription = COALESCE(NULLIF($4, ''), transcription),
metadata = COALESCE($5::jsonb, metadata),
updated_at = now(),
status = CASE WHEN tagging IS NOT NULL THEN 'done' ELSE status END
status = CASE WHEN tagging IS NOT NULL THEN 'done' ELSE 'in_progress' END
WHERE task_id = $1
RETURNING (analysis IS NOT NULL AND tagging IS NOT NULL)
`, task.TaskID, string(analysis), task.Filename, task.Transcription, string(metadata)).Scan(&complete)
@@ -336,6 +338,41 @@ func saveAnalysis(ctx context.Context, db *sql.DB, task WorkerMessage, analysis
return complete, nil
}
func setTaskInProgress(ctx context.Context, db *sql.DB, taskID, filename string) error {
_, err := db.ExecContext(ctx, `
INSERT INTO results (task_id, filename, status)
VALUES ($1, NULLIF($2, ''), 'in_progress')
ON CONFLICT (task_id) DO UPDATE SET
status = 'in_progress',
filename = COALESCE(NULLIF(EXCLUDED.filename, ''), results.filename),
updated_at = now()
`, taskID, filename)
return err
}
func setTaskError(ctx context.Context, db *sql.DB, taskID string) error {
_, err := db.ExecContext(ctx, `
INSERT INTO results (task_id, status) VALUES ($1, 'error')
ON CONFLICT (task_id) DO UPDATE SET status = 'error', updated_at = now()
`, taskID)
return err
}
func publishStatus(ctx context.Context, ch *amqp.Channel, queue, taskID, filename, status, stage, errMsg string) {
ev := pipelinestatus.Event{
TaskID: taskID,
Filename: filename,
Status: status,
Stage: stage,
}
if errMsg != "" {
ev.Error = errMsg
}
if err := pipelinestatus.Publish(ctx, ch, queue, ev); err != nil {
slog.Warn("status publish failed", "worker", "analyse", "task_id", taskID, "stage", stage, "error", err)
}
}
// ===================== MAIN =====================
func loadDotenv() {
@@ -360,6 +397,7 @@ func main() {
apiURL := getEnv("YANDEX_API_URL", "https://ai.api.cloud.yandex.net/v1/chat/completions")
inputQueue := getEnv("ANALYSE_QUEUE", "analyse")
finalQueue := getEnv("FINAL_QUEUE", "final")
statusQueue := getEnv("STATUS_QUEUE", "pipeline.status")
if token == "" {
slog.Error("YANDEX_API_KEY is required")
@@ -397,6 +435,10 @@ func main() {
slog.Error("declare queue failed", "queue", finalQueue, "error", err)
os.Exit(1)
}
if err := pipelinestatus.DeclareQueue(ch, statusQueue); err != nil {
slog.Error("declare status queue failed", "queue", statusQueue, "error", err)
os.Exit(1)
}
ch.Qos(1, 0, false)
msgs, err := ch.Consume(inputQueue, "", false, false, false, false, nil)
@@ -404,7 +446,7 @@ func main() {
slog.Error("consume failed", "error", err)
os.Exit(1)
}
slog.Info("worker started", "worker", "analyse", "queue", inputQueue, "model", model)
slog.Info("worker started", "worker", "analyse", "queue", inputQueue, "status_queue", statusQueue, "model", model)
for d := range msgs {
taskStart := time.Now()
@@ -449,9 +491,18 @@ func main() {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
if err := setTaskInProgress(ctx, db, task.TaskID, task.Filename); err != nil {
slog.Warn("set in_progress failed", "worker", "analyse", "task_id", task.TaskID, "error", err)
}
publishStatus(ctx, ch, statusQueue, task.TaskID, task.Filename,
pipelinestatus.StatusInProgress, pipelinestatus.StageAnalysing, "")
result, stats, err := runAnalysis(ctx, apiURL, model, task.TaskID, task.Transcription, task.Prompts)
if err != nil {
cancel()
_ = setTaskError(context.Background(), db, task.TaskID)
publishStatus(context.Background(), ch, statusQueue, task.TaskID, task.Filename,
pipelinestatus.StatusError, pipelinestatus.StageAnalysing, err.Error())
slog.Warn("task failed, discarded",
"worker", "analyse", "task_id", task.TaskID,
"llm_calls_done", stats.LLMCalls,
@@ -482,7 +533,7 @@ func main() {
}
if complete {
notifyFinal(ctx, ch, db, finalQueue, task.TaskID, "analyse")
notifyFinal(ctx, ch, db, finalQueue, statusQueue, task.TaskID, task.Filename, "analyse")
slog.Info("task complete", append(taskAttrs, "was_last", "analyse")...)
} else {
slog.Info("task partial", append(taskAttrs, "waiting_for", "tagging")...)
@@ -549,12 +600,14 @@ func loadFinalPayload(ctx context.Context, db *sql.DB, taskID string) ([]byte, e
return json.Marshal(msg)
}
func notifyFinal(ctx context.Context, ch *amqp.Channel, db *sql.DB, queue, taskID, worker string) {
func notifyFinal(ctx context.Context, ch *amqp.Channel, db *sql.DB, queue, statusQueue, taskID, filename, worker string) {
body, err := loadFinalPayload(ctx, db, taskID)
if err != nil {
slog.Warn("load final payload failed", "worker", worker, "task_id", taskID, "error", err)
return
}
publishStatus(ctx, ch, statusQueue, taskID, filename,
pipelinestatus.StatusDone, pipelinestatus.StageCompleted, "")
if err := ch.PublishWithContext(ctx, "", queue, false, false,
amqp.Publishing{
ContentType: "application/json",

View File

@@ -0,0 +1,164 @@
package main
import (
"context"
"encoding/json"
"net/http"
"net/http/httptest"
"os"
"path/filepath"
"strings"
"testing"
)
func TestTruncate(t *testing.T) {
if got := truncate("short", 10); got != "short" {
t.Fatalf("got %q", got)
}
long := strings.Repeat("a", 20)
got := truncate(long, 10)
if len(got) != 13 || !strings.HasSuffix(got, "...") {
t.Fatalf("got %q", got)
}
}
func TestTokenFingerprint(t *testing.T) {
if tokenFingerprint("short") != "***" {
t.Fatal("short token should be masked")
}
fp := tokenFingerprint("abcdefghijklmnop")
if !strings.HasPrefix(fp, "abcdefgh") || !strings.HasSuffix(fp, "mnop") {
t.Fatalf("got %q", fp)
}
}
func TestBuildPromptQuery(t *testing.T) {
p := Prompt{Name: "behavioral", Prompt: "Оцени звонок"}
q := buildPromptQuery("текст транскрипции", p)
if !strings.Contains(q, "Оцени звонок") {
t.Fatal("missing prompt text")
}
if !strings.Contains(q, "текст транскрипции") {
t.Fatal("missing transcription")
}
if !strings.Contains(q, "=== ТРАНСКРИПЦИЯ ===") {
t.Fatal("missing section header")
}
}
func TestRunAnalysis(t *testing.T) {
t.Setenv("YANDEX_API_KEY", "test-token")
var calls int
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
calls++
if auth := r.Header.Get("Authorization"); auth != "Bearer test-token" {
t.Fatalf("auth: got %q", auth)
}
_ = json.NewEncoder(w).Encode(map[string]any{
"choices": []map[string]any{
{"message": map[string]any{"content": `{"ok":true}`}},
},
"usage": map[string]any{
"prompt_tokens": 10, "completion_tokens": 5, "total_tokens": 15,
},
})
}))
defer srv.Close()
prompts := []Prompt{
{Name: "behavioral", Prompt: "p1"},
{Name: "client_data", Prompt: "p2"},
}
result, stats, err := runAnalysis(context.Background(), srv.URL, "model", "task-1", "транскрипт", prompts)
if err != nil {
t.Fatal(err)
}
if calls != 2 {
t.Fatalf("llm calls: got %d, want 2", calls)
}
if len(result) != 2 {
t.Fatalf("result keys: %d", len(result))
}
if stats.LLMCalls != 2 || stats.TotalTokens != 30 {
t.Fatalf("stats: %+v", stats)
}
}
func TestRunAnalysisSkipsEmptyNames(t *testing.T) {
t.Setenv("YANDEX_API_KEY", "k")
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
_ = json.NewEncoder(w).Encode(map[string]any{
"choices": []map[string]any{{"message": map[string]any{"content": `{"x":1}`}}},
})
}))
defer srv.Close()
result, stats, err := runAnalysis(context.Background(), srv.URL, "m", "t", "text", []Prompt{
{Name: "", Prompt: "skip"},
{Name: "valid", Prompt: "go"},
})
if err != nil {
t.Fatal(err)
}
if len(result) != 1 {
t.Fatalf("want 1 result, got %d", len(result))
}
if stats.LLMCalls != 1 {
t.Fatalf("llm calls: %d", stats.LLMCalls)
}
}
func TestExtractFilePath(t *testing.T) {
body, _ := json.Marshal(map[string]string{
"task_id": "01HX",
"file_path": "/data/storage/processing/01HX.wav",
})
if got := extractFilePath(body); got != "/data/storage/processing/01HX.wav" {
t.Fatalf("got %q", got)
}
if extractFilePath([]byte("not json")) != "" {
t.Fatal("invalid json should return empty")
}
}
func TestDeleteProcessingFile(t *testing.T) {
dir := t.TempDir()
processingDir := filepath.Join(dir, "processing")
if err := os.MkdirAll(processingDir, 0o755); err != nil {
t.Fatal(err)
}
path := filepath.Join(processingDir, "01TASK.wav")
if err := os.WriteFile(path, []byte("audio"), 0o644); err != nil {
t.Fatal(err)
}
deleteProcessingFile(path, "01TASK", "analyse")
if _, err := os.Stat(path); !os.IsNotExist(err) {
t.Fatal("file should be deleted")
}
}
func TestDeleteProcessingFileRejectsOutsideProcessing(t *testing.T) {
dir := t.TempDir()
path := filepath.Join(dir, "incoming", "file.wav")
if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
t.Fatal(err)
}
if err := os.WriteFile(path, []byte("x"), 0o644); err != nil {
t.Fatal(err)
}
deleteProcessingFile(path, "t", "analyse")
if _, err := os.Stat(path); err != nil {
t.Fatal("file outside processing must not be deleted")
}
}
func TestAccumulateUsage(t *testing.T) {
stats := &analysisStats{}
accumulateUsage(stats, &llmCallResult{Usage: &tokenUsage{TotalTokens: 10, PromptTokens: 6, CompletionTokens: 4}})
accumulateUsage(stats, &llmCallResult{Usage: &tokenUsage{TotalTokens: 5, PromptTokens: 3, CompletionTokens: 2}})
if stats.LLMCalls != 2 || stats.TotalTokens != 15 {
t.Fatalf("stats: %+v", stats)
}
}

View File

@@ -0,0 +1,57 @@
package pipelinestatus
import (
"context"
"encoding/json"
"fmt"
"time"
amqp "github.com/rabbitmq/amqp091-go"
)
const (
StatusPending = "pending"
StatusInProgress = "in_progress"
StatusDone = "done"
StatusError = "error"
)
const (
StageQueued = "queued"
StageTranscribing = "transcribing"
StageAnalysing = "analysing"
StageTagging = "tagging"
StageCompleted = "completed"
)
type Event struct {
TaskID string `json:"task_id"`
Filename string `json:"filename,omitempty"`
Status string `json:"status"`
Stage string `json:"stage"`
Error string `json:"error,omitempty"`
Timestamp int64 `json:"timestamp"`
}
func DeclareQueue(ch *amqp.Channel, queue string) error {
_, err := ch.QueueDeclare(queue, true, false, false, false, nil)
return err
}
func Publish(ctx context.Context, ch *amqp.Channel, queue string, ev Event) error {
if ev.Timestamp == 0 {
ev.Timestamp = time.Now().Unix()
}
body, err := json.Marshal(ev)
if err != nil {
return err
}
if err := ch.PublishWithContext(ctx, "", queue, false, false, amqp.Publishing{
ContentType: "application/json",
Body: body,
DeliveryMode: amqp.Persistent,
}); err != nil {
return fmt.Errorf("publish status: %w", err)
}
return nil
}