audio_pipeline

This commit is contained in:
sanek5g
2026-06-10 17:12:58 +03:00
commit 00ddac5af7
29 changed files with 3297 additions and 0 deletions

View File

@@ -0,0 +1,11 @@
FROM golang:1.22-alpine AS build
WORKDIR /src
COPY go.mod go.sum* ./
RUN go mod download
COPY . .
RUN CGO_ENABLED=0 go build -o /transcribe ./cmd/transcribe
FROM alpine:3.19
RUN apk add --no-cache ca-certificates
COPY --from=build /transcribe /transcribe
ENTRYPOINT ["/transcribe"]

View File

@@ -0,0 +1,64 @@
package main
import (
"context"
"log/slog"
"os"
"os/signal"
"syscall"
"time"
amqp "github.com/rabbitmq/amqp091-go"
"github.com/yourorg/transcribe/internal/config"
"github.com/yourorg/transcribe/internal/consumer"
)
func main() {
slog.SetDefault(slog.New(slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{Level: slog.LevelInfo})))
cfg := config.Load()
if cfg.NexaraAPIKey == "" {
slog.Error("NEXARA_API_KEY is required")
os.Exit(1)
}
ch := mustRabbit(cfg.RabbitURL)
cons, err := consumer.New(cfg, ch)
if err != nil {
slog.Error("consumer init failed", "error", err)
os.Exit(1)
}
ctx, stop := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM)
defer stop()
if err := cons.Run(ctx); err != nil && ctx.Err() == nil {
slog.Error("consumer stopped", "error", err)
os.Exit(1)
}
slog.Info("transcribe worker stopping")
}
func mustRabbit(url string) *amqp.Channel {
var conn *amqp.Connection
var err error
for i := 0; i < 30; i++ {
conn, err = amqp.Dial(url)
if err == nil {
break
}
slog.Info("waiting for rabbit", "attempt", i+1, "error", err)
time.Sleep(2 * time.Second)
}
if err != nil {
slog.Error("rabbit unreachable", "error", err)
os.Exit(1)
}
ch, err := conn.Channel()
if err != nil {
slog.Error("rabbit channel failed", "error", err)
os.Exit(1)
}
return ch
}

View File

@@ -0,0 +1,23 @@
[
{
"id": 1,
"id_section": 1,
"name": "behavioral",
"prompt": "Ты — строгий классификатор звонков.\n\nЗадача:\nПроанализируй диалог и оцени поведенческие критерии.\n\nКритерии:\n1. Приветствие\n2. Инициативность (выявление цели, попытка развить разговор)\n3. Уточнил, остались ли вопросы\n4. Прощание\n\nИнструкция:\nДля каждого критерия:\n- определи наличие\n- найди ДОСЛОВНУЮ цитату\n- оцени confidence (0.01.0)\n\nФормат ответа (строго JSON):\n\n{\n \"greeting\": {\n \"value\": true/false,\n \"evidence\": \"цитата или null\",\n \"confidence\": number\n },\n \"initiative\": {\n \"value\": true/false,\n \"evidence\": \"цитата или null\",\n \"confidence\": number\n },\n \"questions_check\": {\n \"value\": true/false,\n \"evidence\": \"цитата или null\",\n \"confidence\": number\n },\n \"closing\": {\n \"value\": true/false,\n \"evidence\": \"цитата или null\",\n \"confidence\": number\n }\n}\n\nЖЁСТКИЕ ПРАВИЛА:\n- каждый критерий оценивается независимо\n- не додумывать\n- если нет → value=false, evidence=null, confidence=0.0\n- evidence должен подтверждать вывод",
"dt_create": "2026-06-09T09:00:00.000000"
},
{
"id": 2,
"id_section": 1,
"name": "client_data",
"prompt": "Ты — строгий классификатор звонков.\n\nЗадача:\nОпредели, какие данные о клиенте были получены.\n\nКритерии:\n1. Первый ли раз обращается\n2. Указан ли город клиента\n3. Тип клиента (физ/юр)\n4. Получены ли контакты\n5. Источник (откуда узнали)\n\nФормат ответа (строго JSON):\n\n{\n \"first_time\": {\n \"value\": true/false,\n \"evidence\": \"цитата или null\",\n \"confidence\": number\n },\n \"client_city\": {\n \"value\": true/false,\n \"city\": \"строка или null\",\n \"evidence\": \"цитата или null\",\n \"confidence\": number\n },\n \"client_type\": {\n \"value\": true/false,\n \"type\": \"physical|legal|null\",\n \"evidence\": \"цитата или null\",\n \"confidence\": number\n },\n \"contacts\": {\n \"value\": true/false,\n \"evidence\": \"цитата или null\",\n \"confidence\": number\n },\n \"source\": {\n \"value\": true/false,\n \"evidence\": \"цитата или null\",\n \"confidence\": number\n }\n}\n\nЖЁСТКИЕ ПРАВИЛА:\n- city/type только если явно сказано\n- не додумывать\n- если нет → value=false, evidence=null, confidence=0.0",
"dt_create": "2026-06-09T09:00:00.000000"
},
{
"id": 3,
"id_section": 1,
"name": "cargo_data",
"prompt": "Ты — строгий классификатор логистических данных.\n\nКритерии:\n1. Характер груза\n2. Параметры груза (вес, объем, размеры)\n3. Стоимость груза\n\nФормат ответа (строго JSON):\n\n{\n \"cargo_type\": {\n \"value\": true/false,\n \"type\": \"строка или null\",\n \"evidence\": \"цитата или null\",\n \"confidence\": number\n },\n \"cargo_params\": {\n \"value\": true/false,\n \"params\": \"строка или null\",\n \"evidence\": \"цитата или null\",\n \"confidence\": number\n },\n \"cargo_value\": {\n \"value\": true/false,\n \"amount\": \"строка или null\",\n \"evidence\": \"цитата или null\",\n \"confidence\": number\n }\n}\n\nЖЁСТКИЕ ПРАВИЛА:\n- только явные данные\n- числа/параметры должны быть в evidence\n- если нет → value=false, evidence=null, confidence=0.0",
"dt_create": "2026-06-09T09:00:00.000000"
}
]

View File

@@ -0,0 +1,5 @@
module github.com/yourorg/transcribe
go 1.22
require github.com/rabbitmq/amqp091-go v1.9.0

18
workers/transcribe/go.sum Normal file
View File

@@ -0,0 +1,18 @@
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/rabbitmq/amqp091-go v1.9.0 h1:qrQtyzB4H8BQgEuJwhmVQqVHB9O4+MNDJCCAcpc3Aoo=
github.com/rabbitmq/amqp091-go v1.9.0/go.mod h1:+jPrT9iY2eLjRaMSRHUhc3z14E/l85kv/f+6luSD3pc=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
go.uber.org/goleak v1.2.1 h1:NBol2c7O1ZokfZ0LEU9K6Whx/KnwvepVetCUhtKja4A=
go.uber.org/goleak v1.2.1/go.mod h1:qlT2yGI9QafXHhZZLxlSuNsMw3FFLxBr+tBRlmO1xH4=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

View File

@@ -0,0 +1,78 @@
package config
import (
"os"
"strconv"
"time"
)
type Config struct {
RabbitURL string
InputQueue string
OutputExchange string
AnalyseQueue string
TaggingQueue string
InputExchange string
InputRoutingKey string
Prefetch int
NexaraBaseURL string
NexaraAPIKey string
NexaraModel string
NexaraTimeout time.Duration
PromptsSource string
PromptsFile string
PromptsBaseURL string
PromptsAPIKey string
PromptsSection int
}
func Load() Config {
return Config{
RabbitURL: getEnv("RABBITMQ_URL", "amqp://guest:guest@localhost:5672/"),
InputQueue: getEnv("INPUT_QUEUE", "transcribe.tasks"),
OutputExchange: getEnv("OUTPUT_EXCHANGE", "transcription_done"),
AnalyseQueue: getEnv("ANALYSE_QUEUE", "analyse"),
TaggingQueue: getEnv("TAGGING_QUEUE", "tagging"),
InputExchange: getEnv("RABBITMQ_EXCHANGE", "audio_pipeline"),
InputRoutingKey: getEnv("RABBITMQ_ROUTING_KEY", "audio.new"),
Prefetch: getInt("PREFETCH", 1),
NexaraBaseURL: getEnv("NEXARA_BASE_URL", "https://api.nexara.ru"),
NexaraAPIKey: os.Getenv("NEXARA_API_KEY"),
NexaraModel: getEnv("NEXARA_MODEL", "whisper-1"),
NexaraTimeout: getDuration("NEXARA_TIMEOUT", 10*time.Minute),
PromptsSource: getEnv("PROMPTS_SOURCE", "static"),
PromptsFile: getEnv("PROMPTS_FILE", "/app/configs/prompts.json"),
PromptsBaseURL: os.Getenv("PROMPTS_BASE_URL"),
PromptsAPIKey: os.Getenv("PROMPTS_API_KEY"),
PromptsSection: getInt("PROMPTS_SECTION", 1),
}
}
func getEnv(key, def string) string {
if v := os.Getenv(key); v != "" {
return v
}
return def
}
func getInt(key string, def int) int {
if v := os.Getenv(key); v != "" {
if i, err := strconv.Atoi(v); err == nil {
return i
}
}
return def
}
func getDuration(key string, def time.Duration) time.Duration {
if v := os.Getenv(key); v != "" {
if d, err := time.ParseDuration(v); err == nil {
return d
}
}
return def
}

View File

@@ -0,0 +1,172 @@
package consumer
import (
"context"
"encoding/json"
"fmt"
"log/slog"
"time"
amqp "github.com/rabbitmq/amqp091-go"
"github.com/yourorg/transcribe/internal/config"
"github.com/yourorg/transcribe/internal/models"
"github.com/yourorg/transcribe/internal/nexara"
"github.com/yourorg/transcribe/internal/prompts"
)
type Consumer struct {
cfg config.Config
ch *amqp.Channel
nexara *nexara.Client
prompts *prompts.Loader
}
func New(cfg config.Config, ch *amqp.Channel) (*Consumer, error) {
if err := setupTopology(ch, cfg); err != nil {
return nil, err
}
return &Consumer{
cfg: cfg,
ch: ch,
nexara: nexara.New(cfg.NexaraBaseURL, cfg.NexaraAPIKey, cfg.NexaraModel, cfg.NexaraTimeout),
prompts: prompts.New(cfg.PromptsSource, cfg.PromptsFile, cfg.PromptsBaseURL, cfg.PromptsAPIKey, cfg.PromptsSection),
}, nil
}
func setupTopology(ch *amqp.Channel, cfg config.Config) error {
if err := ch.ExchangeDeclare("dlx", "direct", true, false, false, false, nil); err != nil {
return fmt.Errorf("declare dlx: %w", err)
}
if err := ch.ExchangeDeclare(cfg.InputExchange, "direct", true, false, false, false, nil); err != nil {
return fmt.Errorf("declare input exchange: %w", err)
}
if err := ch.ExchangeDeclare(cfg.OutputExchange, "fanout", true, false, false, false, nil); err != nil {
return fmt.Errorf("declare output exchange: %w", err)
}
dlqArgs := amqp.Table{
"x-dead-letter-exchange": "dlx",
"x-dead-letter-routing-key": cfg.InputQueue + ".failed",
}
if _, err := ch.QueueDeclare(cfg.InputQueue, true, false, false, false, dlqArgs); err != nil {
return fmt.Errorf("declare input queue: %w", err)
}
if _, err := ch.QueueDeclare(cfg.InputQueue+".failed", true, false, false, false, nil); err != nil {
return fmt.Errorf("declare dlq: %w", err)
}
if err := ch.QueueBind(cfg.InputQueue+".failed", cfg.InputQueue+".failed", "dlx", false, nil); err != nil {
return fmt.Errorf("bind dlq: %w", err)
}
if err := ch.QueueBind(cfg.InputQueue, cfg.InputRoutingKey, cfg.InputExchange, false, nil); err != nil {
return fmt.Errorf("bind input queue: %w", err)
}
for _, q := range []string{cfg.AnalyseQueue, cfg.TaggingQueue} {
if _, err := ch.QueueDeclare(q, true, false, false, false, nil); err != nil {
return fmt.Errorf("declare queue %s: %w", q, err)
}
if err := ch.QueueBind(q, "", cfg.OutputExchange, false, nil); err != nil {
return fmt.Errorf("bind queue %s: %w", q, err)
}
}
return ch.Qos(cfg.Prefetch, 0, false)
}
func (c *Consumer) Run(ctx context.Context) error {
if err := c.ch.Confirm(false); err != nil {
return fmt.Errorf("confirm mode: %w", err)
}
msgs, err := c.ch.Consume(c.cfg.InputQueue, "", false, false, false, false, nil)
if err != nil {
return err
}
slog.Info("transcribe worker started", "queue", c.cfg.InputQueue, "output_exchange", c.cfg.OutputExchange)
for {
select {
case <-ctx.Done():
return nil
case d, ok := <-msgs:
if !ok {
return fmt.Errorf("delivery channel closed")
}
c.handle(ctx, d)
}
}
}
func (c *Consumer) handle(ctx context.Context, d amqp.Delivery) {
var task models.AudioTask
if err := json.Unmarshal(d.Body, &task); err != nil {
slog.Warn("bad message", "delivery_tag", d.DeliveryTag, "error", err)
_ = d.Nack(false, false)
return
}
slog.Info("message received", "task_id", task.TaskID, "file_path", task.FilePath, "filename", task.Filename)
txCtx, cancel := context.WithTimeout(ctx, c.cfg.NexaraTimeout+30*time.Second)
defer cancel()
text, lang, segments, err := c.nexara.TranscribeFile(txCtx, task.FilePath)
if err != nil {
slog.Warn("transcription failed", "task_id", task.TaskID, "error", err)
_ = d.Nack(false, false)
return
}
promptList, err := c.prompts.Load(txCtx)
if err != nil {
slog.Warn("prompts load failed", "task_id", task.TaskID, "error", err)
_ = d.Nack(false, false)
return
}
result := models.TranscriptionResult{
TaskID: task.TaskID,
Filename: task.Filename,
FilePath: task.FilePath,
Transcription: text,
Language: lang,
Segments: segments,
Prompts: promptList,
TranscribedAt: time.Now().Unix(),
}
body, err := json.Marshal(result)
if err != nil {
slog.Warn("marshal failed", "task_id", task.TaskID, "error", err)
_ = d.Nack(false, false)
return
}
confirms := c.ch.NotifyPublish(make(chan amqp.Confirmation, 1))
if err := c.ch.PublishWithContext(txCtx, c.cfg.OutputExchange, "", false, false, amqp.Publishing{
ContentType: "application/json",
Body: body,
DeliveryMode: amqp.Persistent,
}); err != nil {
slog.Warn("publish failed, requeue", "task_id", task.TaskID, "error", err)
_ = d.Nack(false, true)
return
}
select {
case confirm := <-confirms:
if !confirm.Ack {
slog.Warn("publish not confirmed, requeue", "task_id", task.TaskID)
_ = d.Nack(false, true)
return
}
case <-txCtx.Done():
slog.Warn("publish timeout, requeue", "task_id", task.TaskID)
_ = d.Nack(false, true)
return
}
slog.Info("transcribed", "task_id", task.TaskID, "language", lang, "chars", len(text), "segments", len(segments), "prompts", len(promptList))
_ = d.Ack(false)
}

View File

@@ -0,0 +1,34 @@
package models
type AudioTask struct {
TaskID string `json:"task_id"`
FilePath string `json:"file_path"`
Filename string `json:"filename"`
Size int64 `json:"size"`
CreatedAt int64 `json:"created_at"`
}
type Segment struct {
Start float64 `json:"start"`
End float64 `json:"end"`
Text string `json:"text"`
}
type Prompt struct {
ID int `json:"id"`
IDSection int `json:"id_section"`
Name string `json:"name"`
Prompt string `json:"prompt"`
DtCreate string `json:"dt_create"`
}
type TranscriptionResult struct {
TaskID string `json:"task_id"`
Filename string `json:"filename"`
FilePath string `json:"file_path"`
Transcription string `json:"transcription"`
Language string `json:"language"`
Segments []Segment `json:"segments,omitempty"`
Prompts []Prompt `json:"prompts"`
TranscribedAt int64 `json:"transcribed_at"`
}

View File

@@ -0,0 +1,117 @@
package nexara
import (
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"mime/multipart"
"net/http"
"os"
"path/filepath"
"strings"
"time"
"github.com/yourorg/transcribe/internal/models"
)
type Client struct {
apiURL string
apiKey string
model string
httpClient *http.Client
}
func New(baseURL, apiKey, model string, timeout time.Duration) *Client {
baseURL = strings.TrimRight(baseURL, "/")
return &Client{
apiURL: baseURL + "/api/v1/audio/transcriptions",
apiKey: apiKey,
model: model,
httpClient: &http.Client{
Timeout: timeout,
},
}
}
func (c *Client) TranscribeFile(ctx context.Context, path string) (text, language string, segments []models.Segment, err error) {
f, err := os.Open(path)
if err != nil {
return "", "", nil, fmt.Errorf("open file: %w", err)
}
defer f.Close()
body := &bytes.Buffer{}
writer := multipart.NewWriter(body)
part, err := writer.CreateFormFile("file", filepath.Base(path))
if err != nil {
return "", "", nil, err
}
if _, err := io.Copy(part, f); err != nil {
return "", "", nil, err
}
if c.model != "" {
if err := writer.WriteField("model", c.model); err != nil {
return "", "", nil, err
}
}
if err := writer.WriteField("response_format", "json"); err != nil {
return "", "", nil, err
}
if err := writer.Close(); err != nil {
return "", "", nil, err
}
req, err := http.NewRequestWithContext(ctx, http.MethodPost, c.apiURL, body)
if err != nil {
return "", "", nil, err
}
req.Header.Set("Content-Type", writer.FormDataContentType())
req.Header.Set("Authorization", "Bearer "+c.apiKey)
resp, err := c.httpClient.Do(req)
if err != nil {
return "", "", nil, fmt.Errorf("request: %w", err)
}
defer resp.Body.Close()
respBody, err := io.ReadAll(resp.Body)
if err != nil {
return "", "", nil, err
}
if resp.StatusCode != http.StatusOK {
return "", "", nil, fmt.Errorf("status %d: %s", resp.StatusCode, string(respBody))
}
var raw map[string]any
if err := json.Unmarshal(respBody, &raw); err != nil {
return "", "", nil, fmt.Errorf("parse: %w", err)
}
if t, ok := raw["text"].(string); ok {
text = t
}
if lang, ok := raw["language"].(string); ok {
language = lang
}
if segs, ok := raw["segments"].([]any); ok {
for _, s := range segs {
m, ok := s.(map[string]any)
if !ok {
continue
}
var seg models.Segment
if v, ok := m["start"].(float64); ok {
seg.Start = v
}
if v, ok := m["end"].(float64); ok {
seg.End = v
}
if v, ok := m["text"].(string); ok {
seg.Text = v
}
segments = append(segments, seg)
}
}
return text, language, segments, nil
}

View File

@@ -0,0 +1,100 @@
package prompts
import (
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"os"
"strconv"
"strings"
"time"
"github.com/yourorg/transcribe/internal/models"
)
type Loader struct {
source string
filePath string
baseURL string
apiKey string
sectionID int
client *http.Client
}
func New(source, filePath, baseURL, apiKey string, sectionID int) *Loader {
return &Loader{
source: source,
filePath: filePath,
baseURL: strings.TrimRight(baseURL, "/"),
apiKey: apiKey,
sectionID: sectionID,
client: &http.Client{Timeout: 30 * time.Second},
}
}
func (l *Loader) Load(ctx context.Context) ([]models.Prompt, error) {
switch strings.ToLower(l.source) {
case "http":
return l.loadHTTP(ctx)
default:
return l.loadStatic()
}
}
func (l *Loader) loadStatic() ([]models.Prompt, error) {
data, err := os.ReadFile(l.filePath)
if err != nil {
return nil, fmt.Errorf("read prompts file: %w", err)
}
var prompts []models.Prompt
if err := json.Unmarshal(data, &prompts); err != nil {
return nil, fmt.Errorf("parse prompts file: %w", err)
}
return filterSection(prompts, l.sectionID), nil
}
func (l *Loader) loadHTTP(ctx context.Context) ([]models.Prompt, error) {
if l.baseURL == "" {
return nil, fmt.Errorf("PROMPTS_BASE_URL is required for http source")
}
url := fmt.Sprintf("%s/metrics/?id_section=%s", l.baseURL, strconv.Itoa(l.sectionID))
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
if err != nil {
return nil, err
}
if l.apiKey != "" {
req.Header.Set("Authorization", "Bearer "+l.apiKey)
}
resp, err := l.client.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, err
}
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("prompts api status %d: %s", resp.StatusCode, string(body))
}
var prompts []models.Prompt
if err := json.Unmarshal(body, &prompts); err != nil {
return nil, fmt.Errorf("parse prompts response: %w", err)
}
return filterSection(prompts, l.sectionID), nil
}
func filterSection(prompts []models.Prompt, sectionID int) []models.Prompt {
if sectionID <= 0 {
return prompts
}
out := make([]models.Prompt, 0, len(prompts))
for _, p := range prompts {
if p.IDSection == sectionID {
out = append(out, p)
}
}
return out
}