statuses update

This commit is contained in:
sanek5g
2026-06-24 18:58:35 +03:00
parent fdddacf534
commit 0a9bfd0799
39 changed files with 2099 additions and 12 deletions

View File

@@ -15,6 +15,7 @@ type Config struct {
InputExchange string
InputRoutingKey string
Prefetch int
StatusQueue string
NexaraBaseURL string
NexaraAPIKey string
@@ -38,6 +39,7 @@ func Load() Config {
InputExchange: getEnv("RABBITMQ_EXCHANGE", "audio_pipeline"),
InputRoutingKey: getEnv("RABBITMQ_ROUTING_KEY", "audio.new"),
Prefetch: getInt("PREFETCH", 1),
StatusQueue: getEnv("STATUS_QUEUE", "pipeline.status"),
NexaraBaseURL: getEnv("NEXARA_BASE_URL", "https://api.nexara.ru"),
NexaraAPIKey: os.Getenv("NEXARA_API_KEY"),

View File

@@ -0,0 +1,51 @@
package config
import (
"testing"
"time"
)
func TestLoadDefaults(t *testing.T) {
t.Setenv("RABBITMQ_URL", "")
t.Setenv("STATUS_QUEUE", "")
t.Setenv("NEXARA_TIMEOUT", "")
cfg := Load()
if cfg.InputQueue != "transcribe.tasks" {
t.Fatalf("InputQueue: got %q", cfg.InputQueue)
}
if cfg.StatusQueue != "pipeline.status" {
t.Fatalf("StatusQueue: got %q", cfg.StatusQueue)
}
if cfg.NexaraTimeout != 10*time.Minute {
t.Fatalf("NexaraTimeout: got %v", cfg.NexaraTimeout)
}
if cfg.PromptsSection != 1 {
t.Fatalf("PromptsSection: got %d", cfg.PromptsSection)
}
}
func TestLoadFromEnv(t *testing.T) {
t.Setenv("INPUT_QUEUE", "custom.tasks")
t.Setenv("STATUS_QUEUE", "status.events")
t.Setenv("PREFETCH", "4")
t.Setenv("NEXARA_TIMEOUT", "2m")
t.Setenv("PROMPTS_SECTION", "2")
cfg := Load()
if cfg.InputQueue != "custom.tasks" {
t.Fatalf("InputQueue: got %q", cfg.InputQueue)
}
if cfg.StatusQueue != "status.events" {
t.Fatalf("StatusQueue: got %q", cfg.StatusQueue)
}
if cfg.Prefetch != 4 {
t.Fatalf("Prefetch: got %d", cfg.Prefetch)
}
if cfg.NexaraTimeout != 2*time.Minute {
t.Fatalf("NexaraTimeout: got %v", cfg.NexaraTimeout)
}
if cfg.PromptsSection != 2 {
t.Fatalf("PromptsSection: got %d", cfg.PromptsSection)
}
}

View File

@@ -12,6 +12,7 @@ import (
"github.com/postmet/transcribe/internal/config"
"github.com/postmet/transcribe/internal/models"
"github.com/postmet/transcribe/internal/nexara"
"github.com/postmet/transcribe/internal/pipelinestatus"
"github.com/postmet/transcribe/internal/prompts"
)
@@ -70,6 +71,9 @@ func setupTopology(ch *amqp.Channel, cfg config.Config) error {
return fmt.Errorf("bind queue %s: %w", q, err)
}
}
if err := pipelinestatus.DeclareQueue(ch, cfg.StatusQueue); err != nil {
return fmt.Errorf("declare status queue: %w", err)
}
return ch.Qos(cfg.Prefetch, 0, false)
}
@@ -84,7 +88,11 @@ func (c *Consumer) Run(ctx context.Context) error {
return err
}
slog.Info("transcribe worker started", "queue", c.cfg.InputQueue, "output_exchange", c.cfg.OutputExchange)
slog.Info("transcribe worker started",
"queue", c.cfg.InputQueue,
"output_exchange", c.cfg.OutputExchange,
"status_queue", c.cfg.StatusQueue,
)
for {
select {
@@ -112,9 +120,12 @@ func (c *Consumer) handle(ctx context.Context, d amqp.Delivery) {
txCtx, cancel := context.WithTimeout(ctx, c.cfg.NexaraTimeout+30*time.Second)
defer cancel()
c.publishStatus(txCtx, task.TaskID, task.Filename, pipelinestatus.StatusInProgress, pipelinestatus.StageTranscribing, "")
text, lang, segments, err := c.nexara.TranscribeFile(txCtx, task.FilePath)
if err != nil {
slog.Warn("transcription failed", "task_id", task.TaskID, "error", err)
c.publishStatus(txCtx, task.TaskID, task.Filename, pipelinestatus.StatusError, pipelinestatus.StageTranscribing, err.Error())
_ = d.Nack(false, false)
return
}
@@ -122,6 +133,7 @@ func (c *Consumer) handle(ctx context.Context, d amqp.Delivery) {
promptList, err := c.prompts.Load(txCtx)
if err != nil {
slog.Warn("prompts load failed", "task_id", task.TaskID, "error", err)
c.publishStatus(txCtx, task.TaskID, task.Filename, pipelinestatus.StatusError, pipelinestatus.StageTranscribing, err.Error())
_ = d.Nack(false, false)
return
}
@@ -170,3 +182,18 @@ func (c *Consumer) handle(ctx context.Context, d amqp.Delivery) {
slog.Info("transcribed", "task_id", task.TaskID, "language", lang, "chars", len(text), "segments", len(segments), "prompts", len(promptList))
_ = d.Ack(false)
}
func (c *Consumer) publishStatus(ctx context.Context, taskID, filename, status, stage, errMsg string) {
ev := pipelinestatus.Event{
TaskID: taskID,
Filename: filename,
Status: status,
Stage: stage,
}
if errMsg != "" {
ev.Error = errMsg
}
if err := pipelinestatus.Publish(ctx, c.ch, c.cfg.StatusQueue, ev); err != nil {
slog.Warn("status publish failed", "task_id", taskID, "stage", stage, "error", err)
}
}

View File

@@ -0,0 +1,58 @@
package models
import (
"encoding/json"
"testing"
)
func TestAudioTaskRoundTrip(t *testing.T) {
src := AudioTask{
TaskID: "01HX",
FilePath: "/data/processing/01HX.wav",
Filename: "call.wav",
Size: 2048,
CreatedAt: 1717843200,
}
body, err := json.Marshal(src)
if err != nil {
t.Fatal(err)
}
var got AudioTask
if err := json.Unmarshal(body, &got); err != nil {
t.Fatal(err)
}
if got != src {
t.Fatalf("round-trip mismatch: %+v vs %+v", got, src)
}
}
func TestTranscriptionResultRoundTrip(t *testing.T) {
src := TranscriptionResult{
TaskID: "01HX",
Filename: "call.wav",
FilePath: "/data/processing/01HX.wav",
Transcription: "текст звонка",
Language: "ru",
Segments: []Segment{
{Start: 0, End: 1.2, Text: "текст"},
},
Prompts: []Prompt{
{ID: 1, IDSection: 1, Name: "behavioral", Prompt: "analyze", DtCreate: "2026-01-01"},
},
TranscribedAt: 1717843200,
}
body, err := json.Marshal(src)
if err != nil {
t.Fatal(err)
}
var got TranscriptionResult
if err := json.Unmarshal(body, &got); err != nil {
t.Fatal(err)
}
if got.TaskID != src.TaskID || got.Transcription != src.Transcription {
t.Fatalf("mismatch: %+v", got)
}
if len(got.Segments) != 1 || len(got.Prompts) != 1 {
t.Fatalf("nested fields lost: %+v", got)
}
}

View File

@@ -0,0 +1,118 @@
package nexara
import (
"context"
"encoding/json"
"net/http"
"net/http/httptest"
"os"
"path/filepath"
"strings"
"testing"
"time"
)
func TestTranscribeFileSuccess(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodPost {
t.Fatalf("method: got %s", r.Method)
}
if !strings.HasPrefix(r.Header.Get("Content-Type"), "multipart/form-data") {
t.Fatalf("content-type: got %s", r.Header.Get("Content-Type"))
}
if auth := r.Header.Get("Authorization"); auth != "Bearer nexara-key" {
t.Fatalf("auth: got %q", auth)
}
if err := r.ParseMultipartForm(1 << 20); err != nil {
t.Fatal(err)
}
if r.FormValue("model") != "whisper-1" {
t.Fatalf("model: got %q", r.FormValue("model"))
}
if r.FormValue("response_format") != "json" {
t.Fatalf("response_format: got %q", r.FormValue("response_format"))
}
file, _, err := r.FormFile("file")
if err != nil {
t.Fatal(err)
}
defer file.Close()
buf := make([]byte, 64)
n, _ := file.Read(buf)
if string(buf[:n]) != "audio-bytes" {
t.Fatalf("file content: got %q", string(buf[:n]))
}
_ = json.NewEncoder(w).Encode(map[string]any{
"text": "привет мир",
"language": "ru",
"segments": []map[string]any{
{"start": 0.0, "end": 1.5, "text": "привет"},
{"start": 1.5, "end": 3.0, "text": "мир"},
},
})
}))
defer srv.Close()
dir := t.TempDir()
audioPath := filepath.Join(dir, "test.wav")
if err := os.WriteFile(audioPath, []byte("audio-bytes"), 0o644); err != nil {
t.Fatal(err)
}
client := New(srv.URL, "nexara-key", "whisper-1", 5*time.Second)
text, lang, segments, err := client.TranscribeFile(context.Background(), audioPath)
if err != nil {
t.Fatal(err)
}
if text != "привет мир" {
t.Fatalf("text: got %q", text)
}
if lang != "ru" {
t.Fatalf("language: got %q", lang)
}
if len(segments) != 2 {
t.Fatalf("segments: got %d", len(segments))
}
if segments[0].Text != "привет" || segments[1].End != 3.0 {
t.Fatalf("unexpected segments: %+v", segments)
}
}
func TestTranscribeFileAPIError(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
w.WriteHeader(http.StatusUnauthorized)
_, _ = w.Write([]byte("invalid key"))
}))
defer srv.Close()
dir := t.TempDir()
audioPath := filepath.Join(dir, "test.wav")
if err := os.WriteFile(audioPath, []byte("x"), 0o644); err != nil {
t.Fatal(err)
}
client := New(srv.URL, "bad", "", 5*time.Second)
_, _, _, err := client.TranscribeFile(context.Background(), audioPath)
if err == nil {
t.Fatal("expected error on 401")
}
if !strings.Contains(err.Error(), "401") {
t.Fatalf("error should mention status: %v", err)
}
}
func TestTranscribeFileMissingFile(t *testing.T) {
client := New("http://localhost", "key", "", time.Second)
_, _, _, err := client.TranscribeFile(context.Background(), filepath.Join(t.TempDir(), "missing.wav"))
if err == nil {
t.Fatal("expected error for missing file")
}
}
func TestNewTrimsBaseURL(t *testing.T) {
c := New("https://api.example.com/", "k", "m", time.Second)
if !strings.HasSuffix(c.apiURL, "/api/v1/audio/transcriptions") {
t.Fatalf("apiURL: got %s", c.apiURL)
}
}

View File

@@ -0,0 +1,57 @@
package pipelinestatus
import (
"context"
"encoding/json"
"fmt"
"time"
amqp "github.com/rabbitmq/amqp091-go"
)
const (
StatusPending = "pending"
StatusInProgress = "in_progress"
StatusDone = "done"
StatusError = "error"
)
const (
StageQueued = "queued"
StageTranscribing = "transcribing"
StageAnalysing = "analysing"
StageTagging = "tagging"
StageCompleted = "completed"
)
type Event struct {
TaskID string `json:"task_id"`
Filename string `json:"filename,omitempty"`
Status string `json:"status"`
Stage string `json:"stage"`
Error string `json:"error,omitempty"`
Timestamp int64 `json:"timestamp"`
}
func DeclareQueue(ch *amqp.Channel, queue string) error {
_, err := ch.QueueDeclare(queue, true, false, false, false, nil)
return err
}
func Publish(ctx context.Context, ch *amqp.Channel, queue string, ev Event) error {
if ev.Timestamp == 0 {
ev.Timestamp = time.Now().Unix()
}
body, err := json.Marshal(ev)
if err != nil {
return err
}
if err := ch.PublishWithContext(ctx, "", queue, false, false, amqp.Publishing{
ContentType: "application/json",
Body: body,
DeliveryMode: amqp.Persistent,
}); err != nil {
return fmt.Errorf("publish status: %w", err)
}
return nil
}

View File

@@ -0,0 +1,116 @@
package prompts
import (
"context"
"encoding/json"
"net/http"
"net/http/httptest"
"os"
"path/filepath"
"testing"
)
func samplePrompts() []byte {
data, err := json.Marshal([]map[string]any{
{"id": 1, "id_section": 1, "name": "behavioral", "prompt": "p1", "dt_create": "2026-01-01"},
{"id": 2, "id_section": 1, "name": "client_data", "prompt": "p2", "dt_create": "2026-01-01"},
{"id": 3, "id_section": 2, "name": "other", "prompt": "p3", "dt_create": "2026-01-01"},
})
if err != nil {
panic(err)
}
return data
}
func TestLoadStatic(t *testing.T) {
dir := t.TempDir()
path := filepath.Join(dir, "prompts.json")
if err := os.WriteFile(path, samplePrompts(), 0o644); err != nil {
t.Fatal(err)
}
l := New("static", path, "", "", 1)
got, err := l.Load(context.Background())
if err != nil {
t.Fatal(err)
}
if len(got) != 2 {
t.Fatalf("want 2 prompts for section 1, got %d", len(got))
}
if got[0].Name != "behavioral" || got[1].Name != "client_data" {
t.Fatalf("unexpected names: %v, %v", got[0].Name, got[1].Name)
}
}
func TestLoadStaticAllSections(t *testing.T) {
dir := t.TempDir()
path := filepath.Join(dir, "prompts.json")
if err := os.WriteFile(path, samplePrompts(), 0o644); err != nil {
t.Fatal(err)
}
l := New("static", path, "", "", 0)
got, err := l.Load(context.Background())
if err != nil {
t.Fatal(err)
}
if len(got) != 3 {
t.Fatalf("want 3 prompts without filter, got %d", len(got))
}
}
func TestLoadStaticMissingFile(t *testing.T) {
l := New("static", filepath.Join(t.TempDir(), "missing.json"), "", "", 1)
_, err := l.Load(context.Background())
if err == nil {
t.Fatal("expected error for missing file")
}
}
func TestLoadHTTP(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path != "/metrics/" {
t.Fatalf("path: got %s", r.URL.Path)
}
if r.URL.Query().Get("id_section") != "1" {
t.Fatalf("id_section: got %s", r.URL.Query().Get("id_section"))
}
if auth := r.Header.Get("Authorization"); auth != "Bearer test-key" {
t.Fatalf("auth: got %q", auth)
}
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write(samplePrompts())
}))
defer srv.Close()
l := New("http", "", srv.URL, "test-key", 1)
got, err := l.Load(context.Background())
if err != nil {
t.Fatal(err)
}
if len(got) != 2 {
t.Fatalf("want 2 prompts, got %d", len(got))
}
}
func TestLoadHTTPErrorStatus(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
w.WriteHeader(http.StatusInternalServerError)
_, _ = w.Write([]byte("boom"))
}))
defer srv.Close()
l := New("http", "", srv.URL, "", 1)
_, err := l.Load(context.Background())
if err == nil {
t.Fatal("expected error on 500")
}
}
func TestLoadHTTPMissingBaseURL(t *testing.T) {
l := New("http", "", "", "", 1)
_, err := l.Load(context.Background())
if err == nil {
t.Fatal("expected error without base URL")
}
}