audio_pipeline

This commit is contained in:
sanek5g
2026-06-10 17:12:58 +03:00
commit 00ddac5af7
29 changed files with 3297 additions and 0 deletions

View File

@@ -0,0 +1,12 @@
FROM golang:1.22-alpine AS build
WORKDIR /src
COPY go.mod go.sum* ./
RUN go mod download
COPY . .
RUN CGO_ENABLED=0 GOOS=linux go build -ldflags="-s -w" -o /analyse ./cmd/analyse
FROM alpine:3.20
RUN apk add --no-cache ca-certificates
WORKDIR /app
COPY --from=build /analyse /app/analyse
ENTRYPOINT ["/app/analyse"]

View File

@@ -0,0 +1,657 @@
package main
import (
"bytes"
"context"
"database/sql"
"encoding/json"
"fmt"
"io"
"log/slog"
"net"
"net/http"
"os"
"strings"
"time"
"unicode/utf8"
"github.com/joho/godotenv"
_ "github.com/jackc/pgx/v5/stdlib"
amqp "github.com/rabbitmq/amqp091-go"
)
func init() {
slog.SetDefault(slog.New(slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{Level: slog.LevelInfo})))
}
// ── входящее сообщение из очереди analyse (TranscriptionResult от transcribe) ──
type WorkerMessage struct {
TaskID string `json:"task_id"`
Filename string `json:"filename"`
FilePath string `json:"file_path"`
Transcription string `json:"transcription"`
Language string `json:"language"`
Segments []Segment `json:"segments,omitempty"`
Prompts []Prompt `json:"prompts"`
TranscribedAt int64 `json:"transcribed_at"`
}
type Segment struct {
Start float64 `json:"start"`
End float64 `json:"end"`
Text string `json:"text"`
}
type Prompt struct {
ID int `json:"id"`
IDSection int `json:"id_section"`
Name string `json:"name"`
Prompt string `json:"prompt"`
DtCreate string `json:"dt_create"`
}
// AnalysisResult — ключ = name промпта, значение = полный JSON-ответ LLM.
type AnalysisResult map[string]any
// ── LLM request/response ──
type chatMessage struct {
Role string `json:"role"`
Content string `json:"content"`
}
type chatRequest struct {
Model string `json:"model"`
Temperature float64 `json:"temperature"`
ResponseFormat struct {
Type string `json:"type"`
} `json:"response_format"`
Messages []chatMessage `json:"messages"`
}
type tokenUsage struct {
PromptTokens int `json:"prompt_tokens"`
CompletionTokens int `json:"completion_tokens"`
TotalTokens int `json:"total_tokens"`
}
type chatResponse struct {
Choices []struct {
Message struct {
Content string `json:"content"`
} `json:"message"`
} `json:"choices"`
Usage *tokenUsage `json:"usage"`
}
type llmCallResult struct {
Content string
RequestBytes int
ResponseBytes int
Usage *tokenUsage
Duration time.Duration
}
type analysisStats struct {
LLMCalls int
TotalTokens int
PromptTokens int
OutputTokens int
}
// ===================== LLM =====================
var llmHTTPClient = newLLMHTTPClient(150 * time.Second)
func newLLMHTTPClient(totalTimeout time.Duration) *http.Client {
return &http.Client{
Timeout: totalTimeout,
Transport: &http.Transport{
Proxy: http.ProxyFromEnvironment,
DialContext: (&net.Dialer{
Timeout: 30 * time.Second,
KeepAlive: 30 * time.Second,
}).DialContext,
TLSHandshakeTimeout: 60 * time.Second,
ResponseHeaderTimeout: 90 * time.Second,
ExpectContinueTimeout: 5 * time.Second,
IdleConnTimeout: 90 * time.Second,
},
}
}
func callLLM(ctx context.Context, apiURL, model, prompt string) (*llmCallResult, error) {
const systemPrompt = "Ты — строгий классификатор звонков. Отвечай только JSON, без пояснений."
reqBody := chatRequest{
Model: model,
Temperature: 0.1,
Messages: []chatMessage{
{Role: "system", Content: systemPrompt},
{Role: "user", Content: prompt},
},
}
reqBody.ResponseFormat.Type = "json_object"
jsonData, err := json.Marshal(reqBody)
if err != nil {
return nil, err
}
req, err := http.NewRequestWithContext(ctx, "POST", apiURL, bytes.NewBuffer(jsonData))
if err != nil {
return nil, err
}
req.Header.Set("Authorization", "Bearer "+os.Getenv("YANDEX_API_KEY"))
req.Header.Set("Content-Type", "application/json")
start := time.Now()
resp, err := llmHTTPClient.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
body, _ := io.ReadAll(resp.Body)
duration := time.Since(start)
if resp.StatusCode != http.StatusOK {
return &llmCallResult{
RequestBytes: len(jsonData),
ResponseBytes: len(body),
Duration: duration,
}, fmt.Errorf("status %d: %s", resp.StatusCode, truncate(string(body), 500))
}
var result chatResponse
if err := json.Unmarshal(body, &result); err != nil {
return &llmCallResult{
RequestBytes: len(jsonData),
ResponseBytes: len(body),
Duration: duration,
}, err
}
if len(result.Choices) == 0 {
return &llmCallResult{
RequestBytes: len(jsonData),
ResponseBytes: len(body),
Duration: duration,
}, fmt.Errorf("empty response")
}
return &llmCallResult{
Content: result.Choices[0].Message.Content,
RequestBytes: len(jsonData),
ResponseBytes: len(body),
Usage: result.Usage,
Duration: duration,
}, nil
}
func checkYandexAPI(ctx context.Context, apiURL, model string) error {
slog.Info("yandex api check started", "worker", "analyse", "url", apiURL, "model", model)
res, err := callLLM(ctx, apiURL, model, `Ответь только JSON: {"ok":true}`)
if err != nil {
return err
}
attrs := []any{
"worker", "analyse",
"duration_ms", res.Duration.Milliseconds(),
"response_chars", utf8.RuneCountInString(res.Content),
}
if res.Usage != nil {
attrs = append(attrs,
"prompt_tokens", res.Usage.PromptTokens,
"completion_tokens", res.Usage.CompletionTokens,
"total_tokens", res.Usage.TotalTokens,
)
}
slog.Info("yandex api check ok", attrs...)
return nil
}
func logLLMCall(taskID, model, promptName string, promptIndex, promptTotal, attempt, inputChars int, res *llmCallResult, err error) {
attrs := []any{
"worker", "analyse",
"task_id", taskID,
"model", model,
"call_type", "analyse_prompt",
"prompt_name", promptName,
"prompt_index", promptIndex,
"prompt_total", promptTotal,
"attempt", attempt,
"input_chars", inputChars,
}
if res != nil {
attrs = append(attrs,
"duration_ms", res.Duration.Milliseconds(),
"request_bytes", res.RequestBytes,
"response_bytes", res.ResponseBytes,
"response_chars", utf8.RuneCountInString(res.Content),
)
if res.Usage != nil {
attrs = append(attrs,
"prompt_tokens", res.Usage.PromptTokens,
"completion_tokens", res.Usage.CompletionTokens,
"total_tokens", res.Usage.TotalTokens,
)
}
}
if err != nil {
slog.Warn("llm call failed", append(attrs, "error", err)...)
return
}
slog.Info("llm call ok", attrs...)
}
func accumulateUsage(stats *analysisStats, res *llmCallResult) {
stats.LLMCalls++
if res != nil && res.Usage != nil {
stats.TotalTokens += res.Usage.TotalTokens
stats.PromptTokens += res.Usage.PromptTokens
stats.OutputTokens += res.Usage.CompletionTokens
}
}
func buildPromptQuery(transcription string, p Prompt) string {
var b strings.Builder
b.WriteString(p.Prompt)
b.WriteString("\n\n=== ТРАНСКРИПЦИЯ ===\n\"\"\"\n")
b.WriteString(transcription)
b.WriteString("\n\"\"\"")
return b.String()
}
func analysePrompt(ctx context.Context, apiURL, model, transcription string, p Prompt, index, total int, taskID string, stats *analysisStats) (any, error) {
query := buildPromptQuery(transcription, p)
inputChars := utf8.RuneCountInString(query)
res, err := callLLM(ctx, apiURL, model, query)
logLLMCall(taskID, model, p.Name, index, total, 1, inputChars, res, err)
accumulateUsage(stats, res)
if err != nil {
return nil, err
}
var parsed any
if err := json.Unmarshal([]byte(res.Content), &parsed); err != nil {
return nil, fmt.Errorf("parse: %w, resp: %s", err, truncate(res.Content, 300))
}
return parsed, nil
}
func runAnalysis(ctx context.Context, apiURL, model, taskID, transcription string, prompts []Prompt) (AnalysisResult, analysisStats, error) {
stats := analysisStats{}
result := make(AnalysisResult, len(prompts))
valid := make([]Prompt, 0, len(prompts))
for _, p := range prompts {
if p.Name != "" {
valid = append(valid, p)
}
}
total := len(valid)
for i, p := range valid {
value, err := analysePrompt(ctx, apiURL, model, transcription, p, i+1, total, taskID, &stats)
if err != nil {
return nil, stats, fmt.Errorf("%s: %w", p.Name, err)
}
result[p.Name] = value
}
return result, stats, nil
}
// ===================== DB =====================
func saveAnalysis(ctx context.Context, db *sql.DB, task WorkerMessage, analysis []byte) (complete bool, err error) {
metadata, _ := json.Marshal(map[string]any{
"file_path": task.FilePath,
"language": task.Language,
"segments": task.Segments,
"prompts": task.Prompts,
"transcribed_at": task.TranscribedAt,
})
_, err = db.ExecContext(ctx,
`INSERT INTO results (task_id) VALUES ($1) ON CONFLICT (task_id) DO NOTHING`, task.TaskID)
if err != nil {
return false, fmt.Errorf("ensure row: %w", err)
}
err = db.QueryRowContext(ctx, `
UPDATE results
SET analysis = $2::jsonb,
filename = COALESCE(NULLIF($3, ''), filename),
transcription = COALESCE(NULLIF($4, ''), transcription),
metadata = COALESCE($5::jsonb, metadata),
updated_at = now(),
status = CASE WHEN tagging IS NOT NULL THEN 'done' ELSE status END
WHERE task_id = $1
RETURNING (analysis IS NOT NULL AND tagging IS NOT NULL)
`, task.TaskID, string(analysis), task.Filename, task.Transcription, string(metadata)).Scan(&complete)
if err != nil {
return false, fmt.Errorf("update analysis: %w", err)
}
return complete, nil
}
// ===================== MAIN =====================
func loadDotenv() {
path := os.Getenv("DOTENV_PATH")
if path == "" {
return
}
if err := godotenv.Overload(path); err != nil {
slog.Warn("dotenv load failed", "path", path, "error", err)
return
}
slog.Info("dotenv loaded", "path", path)
}
func main() {
loadDotenv()
amqpURL := getEnv("RABBITMQ_URL", "amqp://guest:guest@localhost:5672/")
dbURL := getEnv("DATABASE_URL", "")
token := os.Getenv("YANDEX_API_KEY")
model := os.Getenv("YANDEX_MODEL")
apiURL := getEnv("YANDEX_API_URL", "https://ai.api.cloud.yandex.net/v1/chat/completions")
inputQueue := getEnv("ANALYSE_QUEUE", "analyse")
finalQueue := getEnv("FINAL_QUEUE", "final")
if token == "" {
slog.Error("YANDEX_API_KEY is required")
os.Exit(1)
}
if model == "" {
slog.Error("YANDEX_MODEL is required")
os.Exit(1)
}
if dbURL == "" {
slog.Error("DATABASE_URL is required")
os.Exit(1)
}
slog.Info("config loaded", "worker", "analyse",
"yandex_token", tokenFingerprint(token), "model", model, "api_url", apiURL)
db := mustDB(dbURL)
defer db.Close()
checkCtx, checkCancel := context.WithTimeout(context.Background(), 90*time.Second)
if err := checkYandexAPI(checkCtx, apiURL, model); err != nil {
checkCancel()
slog.Error("yandex api check failed — worker will not start", "worker", "analyse", "error", err)
os.Exit(1)
}
checkCancel()
ch := mustRabbit(amqpURL)
if _, err := ch.QueueDeclare(inputQueue, true, false, false, false, nil); err != nil {
slog.Error("declare queue failed", "queue", inputQueue, "error", err)
os.Exit(1)
}
if _, err := ch.QueueDeclare(finalQueue, true, false, false, false, nil); err != nil {
slog.Error("declare queue failed", "queue", finalQueue, "error", err)
os.Exit(1)
}
ch.Qos(1, 0, false)
msgs, err := ch.Consume(inputQueue, "", false, false, false, false, nil)
if err != nil {
slog.Error("consume failed", "error", err)
os.Exit(1)
}
slog.Info("worker started", "worker", "analyse", "queue", inputQueue, "model", model)
for d := range msgs {
taskStart := time.Now()
var task WorkerMessage
if err := json.Unmarshal(d.Body, &task); err != nil {
slog.Warn("bad message", "worker", "analyse", "delivery_tag", d.DeliveryTag,
"body_bytes", len(d.Body), "error", err)
d.Nack(false, false)
continue
}
promptNames := make([]string, 0, len(task.Prompts))
promptTextChars := 0
for _, p := range task.Prompts {
if p.Name != "" {
promptNames = append(promptNames, p.Name)
promptTextChars += utf8.RuneCountInString(p.Prompt)
}
}
transcriptionChars := utf8.RuneCountInString(task.Transcription)
slog.Info("message received", "worker", "analyse",
"task_id", task.TaskID,
"filename", task.Filename,
"delivery_tag", d.DeliveryTag,
"redelivered", d.Redelivered,
"body_bytes", len(d.Body),
"transcription_chars", transcriptionChars,
"segments", len(task.Segments),
"prompts", len(promptNames),
"prompt_names", promptNames,
"prompt_text_chars", promptTextChars,
"llm_calls_expected", len(promptNames),
)
if d.Redelivered {
slog.Warn("redelivered message skipped — no llm call",
"worker", "analyse", "task_id", task.TaskID,
"delivery_tag", d.DeliveryTag, "prompts", len(promptNames))
d.Nack(false, false)
continue
}
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
result, stats, err := runAnalysis(ctx, apiURL, model, task.TaskID, task.Transcription, task.Prompts)
if err != nil {
cancel()
slog.Warn("task failed, discarded",
"worker", "analyse", "task_id", task.TaskID,
"llm_calls_done", stats.LLMCalls,
"total_tokens_so_far", stats.TotalTokens,
"error", err)
d.Nack(false, false)
continue
}
analysisJSON, _ := json.Marshal(result)
complete, err := saveAnalysis(ctx, db, task, analysisJSON)
if err != nil {
cancel()
slog.Warn("db save failed, discarded",
"worker", "analyse", "task_id", task.TaskID, "error", err)
d.Nack(false, false)
continue
}
taskAttrs := []any{
"worker", "analyse",
"task_id", task.TaskID,
"llm_calls", stats.LLMCalls,
"total_tokens", stats.TotalTokens,
"prompt_tokens", stats.PromptTokens,
"completion_tokens", stats.OutputTokens,
"duration_ms", time.Since(taskStart).Milliseconds(),
}
if complete {
notifyFinal(ctx, ch, db, finalQueue, task.TaskID, "analyse")
slog.Info("task complete", append(taskAttrs, "was_last", "analyse")...)
} else {
slog.Info("task partial", append(taskAttrs, "waiting_for", "tagging")...)
}
cancel()
d.Ack(false)
}
}
func truncate(s string, max int) string {
if len(s) <= max {
return s
}
return s[:max] + "..."
}
func loadFinalPayload(ctx context.Context, db *sql.DB, taskID string) ([]byte, error) {
var (
filename, transcription, status sql.NullString
analysis, tagging, metadata []byte
createdAt, updatedAt time.Time
)
err := db.QueryRowContext(ctx, `
SELECT filename, transcription, analysis, tagging, metadata, status, created_at, updated_at
FROM results WHERE task_id = $1
`, taskID).Scan(&filename, &transcription, &analysis, &tagging, &metadata, &status, &createdAt, &updatedAt)
if err != nil {
return nil, fmt.Errorf("load result: %w", err)
}
msg := map[string]any{
"task_id": taskID,
"status": status.String,
"created_at": createdAt,
"updated_at": updatedAt,
}
if filename.Valid {
msg["filename"] = filename.String
}
if transcription.Valid {
msg["transcription"] = transcription.String
}
if len(analysis) > 0 {
var v any
if err := json.Unmarshal(analysis, &v); err == nil {
msg["analysis"] = v
}
}
if len(tagging) > 0 {
var v any
if err := json.Unmarshal(tagging, &v); err == nil {
msg["tagging"] = v
}
}
if len(metadata) > 0 {
var meta map[string]any
if err := json.Unmarshal(metadata, &meta); err == nil {
for k, v := range meta {
msg[k] = v
}
}
}
return json.Marshal(msg)
}
func notifyFinal(ctx context.Context, ch *amqp.Channel, db *sql.DB, queue, taskID, worker string) {
body, err := loadFinalPayload(ctx, db, taskID)
if err != nil {
slog.Warn("load final payload failed", "worker", worker, "task_id", taskID, "error", err)
return
}
if err := ch.PublishWithContext(ctx, "", queue, false, false,
amqp.Publishing{
ContentType: "application/json",
Body: body,
DeliveryMode: amqp.Persistent,
}); err != nil {
slog.Warn("publish final failed", "worker", worker, "task_id", taskID, "error", err)
return
}
slog.Info("published final", "worker", worker, "task_id", taskID, "queue", queue, "body_bytes", len(body))
deleteProcessingFile(extractFilePath(body), taskID, worker)
}
func extractFilePath(body []byte) string {
var msg map[string]any
if err := json.Unmarshal(body, &msg); err != nil {
return ""
}
fp, _ := msg["file_path"].(string)
return fp
}
func deleteProcessingFile(filePath, taskID, worker string) {
if filePath == "" {
slog.Warn("processing file not deleted: no file_path", "worker", worker, "task_id", taskID)
return
}
if !strings.Contains(filePath, "/processing/") {
slog.Warn("processing file not deleted: path outside processing", "worker", worker, "task_id", taskID, "path", filePath)
return
}
if err := os.Remove(filePath); err != nil {
if os.IsNotExist(err) {
slog.Info("processing file already removed", "worker", worker, "task_id", taskID, "path", filePath)
return
}
slog.Warn("processing file delete failed", "worker", worker, "task_id", taskID, "path", filePath, "error", err)
return
}
slog.Info("processing file deleted", "worker", worker, "task_id", taskID, "path", filePath)
}
func getEnv(k, d string) string {
if v := os.Getenv(k); v != "" {
return v
}
return d
}
func tokenFingerprint(token string) string {
if len(token) <= 12 {
return "***"
}
return token[:8] + "..." + token[len(token)-4:]
}
func mustDB(url string) *sql.DB {
db, err := sql.Open("pgx", url)
if err != nil {
slog.Error("db open failed", "error", err)
os.Exit(1)
}
db.SetMaxOpenConns(5)
time.Sleep(2 * time.Second) // дать Docker DNS зарегистрировать postgres
for i := 0; i < 60; i++ {
if err = db.Ping(); err == nil {
return db
}
if i < 5 || (i+1)%10 == 0 {
slog.Info("waiting for db", "attempt", i+1, "error", err)
}
time.Sleep(3 * time.Second)
}
slog.Error("db unreachable", "error", err)
os.Exit(1)
return nil
}
func mustRabbit(url string) *amqp.Channel {
var conn *amqp.Connection
var err error
for i := 0; i < 30; i++ {
conn, err = amqp.Dial(url)
if err == nil {
break
}
slog.Info("waiting for rabbit", "attempt", i+1, "error", err)
time.Sleep(2 * time.Second)
}
if err != nil {
slog.Error("rabbit unreachable", "error", err)
os.Exit(1)
}
ch, err := conn.Channel()
if err != nil {
slog.Error("rabbit channel failed", "error", err)
os.Exit(1)
}
return ch
}

18
workers/analyse/go.mod Normal file
View File

@@ -0,0 +1,18 @@
module github.com/yourorg/analyse
go 1.22
require (
github.com/jackc/pgx/v5 v5.5.5
github.com/joho/godotenv v1.5.1
github.com/rabbitmq/amqp091-go v1.9.0
)
require (
github.com/jackc/pgpassfile v1.0.0 // indirect
github.com/jackc/pgservicefile v0.0.0-20221227161230-091c0ba34f0a // indirect
github.com/jackc/puddle/v2 v2.2.1 // indirect
golang.org/x/crypto v0.17.0 // indirect
golang.org/x/sync v0.1.0 // indirect
golang.org/x/text v0.14.0 // indirect
)

41
workers/analyse/go.sum Normal file
View File

@@ -0,0 +1,41 @@
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM=
github.com/jackc/pgpassfile v1.0.0/go.mod h1:CEx0iS5ambNFdcRtxPj5JhEz+xB6uRky5eyVu/W2HEg=
github.com/jackc/pgservicefile v0.0.0-20221227161230-091c0ba34f0a h1:bbPeKD0xmW/Y25WS6cokEszi5g+S0QxI/d45PkRi7Nk=
github.com/jackc/pgservicefile v0.0.0-20221227161230-091c0ba34f0a/go.mod h1:5TJZWKEWniPve33vlWYSoGYefn3gLQRzjfDlhSJ9ZKM=
github.com/jackc/pgx/v5 v5.5.5 h1:amBjrZVmksIdNjxGW/IiIMzxMKZFelXbUoPNb+8sjQw=
github.com/jackc/pgx/v5 v5.5.5/go.mod h1:ez9gk+OAat140fv9ErkZDYFWmXLfV+++K0uAOiwgm1A=
github.com/jackc/puddle/v2 v2.2.1 h1:RhxXJtFG022u4ibrCSMSiu5aOq1i77R3OHKNJj77OAk=
github.com/jackc/puddle/v2 v2.2.1/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4=
github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0=
github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/rabbitmq/amqp091-go v1.9.0 h1:qrQtyzB4H8BQgEuJwhmVQqVHB9O4+MNDJCCAcpc3Aoo=
github.com/rabbitmq/amqp091-go v1.9.0/go.mod h1:+jPrT9iY2eLjRaMSRHUhc3z14E/l85kv/f+6luSD3pc=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
go.uber.org/goleak v1.2.1 h1:NBol2c7O1ZokfZ0LEU9K6Whx/KnwvepVetCUhtKja4A=
go.uber.org/goleak v1.2.1/go.mod h1:qlT2yGI9QafXHhZZLxlSuNsMw3FFLxBr+tBRlmO1xH4=
golang.org/x/crypto v0.17.0 h1:r8bRNjWL3GshPW3gkd+RpvzWrZAwPS49OmTGZ/uhM4k=
golang.org/x/crypto v0.17.0/go.mod h1:gCAAfMLgwOJRpTjQ2zCCt2OcSfYMTeZVSRtQlPC7Nq4=
golang.org/x/sync v0.1.0 h1:wsuoTGHzEhffawBOhz5CYhcrV4IdKZbEyZjBMuTp12o=
golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ=
golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

View File

@@ -0,0 +1,11 @@
FROM golang:1.22-alpine AS build
WORKDIR /src
COPY go.mod go.sum* ./
RUN go mod download
COPY . .
RUN CGO_ENABLED=0 go build -o /tagging ./cmd/tagging
FROM alpine:3.19
RUN apk add --no-cache ca-certificates
COPY --from=build /tagging /tagging
ENTRYPOINT ["/tagging"]

View File

@@ -0,0 +1,685 @@
package main
import (
"bytes"
"context"
"database/sql"
"encoding/json"
"fmt"
"io"
"log/slog"
"net"
"net/http"
"os"
"strings"
"time"
"unicode/utf8"
"github.com/joho/godotenv"
_ "github.com/jackc/pgx/v5/stdlib"
amqp "github.com/rabbitmq/amqp091-go"
)
func init() {
slog.SetDefault(slog.New(slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{Level: slog.LevelInfo})))
}
func apiURL() string {
if u := os.Getenv("YANDEX_API_URL"); u != "" {
return u
}
return "https://ai.api.cloud.yandex.net/v1/chat/completions"
}
// ── входящее сообщение из очереди tagging ──
type WorkerMessage struct {
TaskID string `json:"task_id"`
Filename string `json:"filename"`
Transcription string `json:"transcription"`
}
// ── результат классификации ──
type ClassificationResult struct {
L1 string `json:"L1"`
L2 string `json:"L2"`
L3 string `json:"L3"`
RiskLevel string `json:"risk_level"`
HasActionItems bool `json:"has_action_items"`
HasDeadline bool `json:"has_deadline"`
}
// ── LLM request/response ──
type chatMessage struct {
Role string `json:"role"`
Content string `json:"content"`
}
type chatRequest struct {
Model string `json:"model"`
Temperature float64 `json:"temperature"`
ResponseFormat struct {
Type string `json:"type"`
} `json:"response_format"`
Messages []chatMessage `json:"messages"`
}
type tokenUsage struct {
PromptTokens int `json:"prompt_tokens"`
CompletionTokens int `json:"completion_tokens"`
TotalTokens int `json:"total_tokens"`
}
type chatResponse struct {
Choices []struct {
Message struct {
Content string `json:"content"`
} `json:"message"`
} `json:"choices"`
Usage *tokenUsage `json:"usage"`
}
type llmCallResult struct {
Content string
RequestBytes int
ResponseBytes int
Usage *tokenUsage
Duration time.Duration
}
// ===================== LLM =====================
var llmHTTPClient = newLLMHTTPClient(90 * time.Second)
func newLLMHTTPClient(totalTimeout time.Duration) *http.Client {
return &http.Client{
Timeout: totalTimeout,
Transport: &http.Transport{
Proxy: http.ProxyFromEnvironment,
DialContext: (&net.Dialer{
Timeout: 30 * time.Second,
KeepAlive: 30 * time.Second,
}).DialContext,
TLSHandshakeTimeout: 60 * time.Second,
ResponseHeaderTimeout: 60 * time.Second,
ExpectContinueTimeout: 5 * time.Second,
IdleConnTimeout: 90 * time.Second,
},
}
}
func callLLM(ctx context.Context, model, prompt string) (*llmCallResult, error) {
const systemPrompt = "Ты — классификатор диалогов в логистике. Отвечай только JSON, без пояснений."
reqBody := chatRequest{
Model: model,
Temperature: 0.1,
Messages: []chatMessage{
{Role: "system", Content: systemPrompt},
{Role: "user", Content: prompt},
},
}
reqBody.ResponseFormat.Type = "json_object"
jsonData, err := json.Marshal(reqBody)
if err != nil {
return nil, err
}
req, err := http.NewRequestWithContext(ctx, "POST", apiURL(), bytes.NewBuffer(jsonData))
if err != nil {
return nil, err
}
req.Header.Set("Authorization", "Bearer "+os.Getenv("YANDEX_API_KEY"))
req.Header.Set("Content-Type", "application/json")
start := time.Now()
resp, err := llmHTTPClient.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
body, _ := io.ReadAll(resp.Body)
duration := time.Since(start)
if resp.StatusCode != http.StatusOK {
return &llmCallResult{
RequestBytes: len(jsonData),
ResponseBytes: len(body),
Duration: duration,
}, fmt.Errorf("status %d: %s", resp.StatusCode, truncate(string(body), 500))
}
var result chatResponse
if err := json.Unmarshal(body, &result); err != nil {
return &llmCallResult{
RequestBytes: len(jsonData),
ResponseBytes: len(body),
Duration: duration,
}, err
}
if len(result.Choices) == 0 {
return &llmCallResult{
RequestBytes: len(jsonData),
ResponseBytes: len(body),
Duration: duration,
}, fmt.Errorf("empty response")
}
return &llmCallResult{
Content: result.Choices[0].Message.Content,
RequestBytes: len(jsonData),
ResponseBytes: len(body),
Usage: result.Usage,
Duration: duration,
}, nil
}
func checkYandexAPI(ctx context.Context, model string) error {
slog.Info("yandex api check started", "worker", "tagging", "url", apiURL(), "model", model)
res, err := callLLM(ctx, model, `Ответь только JSON: {"ok":true}`)
if err != nil {
return err
}
attrs := []any{
"worker", "tagging",
"duration_ms", res.Duration.Milliseconds(),
"response_chars", utf8.RuneCountInString(res.Content),
}
if res.Usage != nil {
attrs = append(attrs,
"prompt_tokens", res.Usage.PromptTokens,
"completion_tokens", res.Usage.CompletionTokens,
"total_tokens", res.Usage.TotalTokens,
)
}
slog.Info("yandex api check ok", attrs...)
return nil
}
func logLLMCall(worker, taskID, model, callType string, attempt int, inputChars int, res *llmCallResult, err error) {
attrs := []any{
"worker", worker,
"task_id", taskID,
"model", model,
"call_type", callType,
"attempt", attempt,
"input_chars", inputChars,
}
if res != nil {
attrs = append(attrs,
"duration_ms", res.Duration.Milliseconds(),
"request_bytes", res.RequestBytes,
"response_bytes", res.ResponseBytes,
"response_chars", utf8.RuneCountInString(res.Content),
)
if res.Usage != nil {
attrs = append(attrs,
"prompt_tokens", res.Usage.PromptTokens,
"completion_tokens", res.Usage.CompletionTokens,
"total_tokens", res.Usage.TotalTokens,
)
}
}
if err != nil {
slog.Warn("llm call failed", append(attrs, "error", err)...)
return
}
slog.Info("llm call ok", attrs...)
}
func buildPrompt(text string) string {
return fmt.Sprintf(`Ты — классификатор диалогов в логистике.
Тебе даётся НЕструктурированный текст диалога (разговор, звонок, переписка).
Текст может быть неаккуратным, с ошибками, без структуры.
Твоя задача:
1. Понять смысл диалога
2. Выделить ключевую цель разговора
3. Определить наличие проблемы
4. Классифицировать диалог по правилам ниже
=== ИЕРАРХИЯ КЛАССОВ ===
L1:
- new_order
- order_change
- tracking
- delivery_coordination
- problem
- claim
- information_request
- internal_communication
- other
L2:
Для problem:
- delivery_issue
- cargo_issue
- data_issue
- communication_issue
Для delivery_coordination:
- delivery_time
- unloading_conditions
- warehouse_rules
- access
- scheduling
Для tracking:
- location_request
- status_update
- eta
L3 (опционально):
- wrong_contact
- wrong_address
- missing_info
- delay
- lost
- damage
- cannot_reach
- no_response
=== ДОПОЛНИТЕЛЬНЫЕ ПОЛЯ ===
risk_level:
- none
- low
- medium
- high
has_action_items:
- true / false
has_deadline:
- true / false
=== ПРАВИЛА ===
1. Определи основную цель разговора:
- заказ → new_order
- изменение → order_change
- узнать статус → tracking
- согласование → delivery_coordination
- ошибка / проблема → problem
2. Если есть любая ошибка или сбой → ВСЕГДА L1 = problem
3. Ошибки в email / телефоне / адресе → L2 = data_issue
4. Если обсуждают условия (время, склад, разгрузка) без проблемы → delivery_coordination
5. Если спрашивают "где груз?" → tracking
6. Определи risk_level:
- low → проблема не влияет на доставку
- medium → возможна задержка
- high → срыв сроков / потеря
7. has_action_items = true если:
- есть договорённости ("перезвоню", "свяжется", "отправлю")
8. has_deadline = true если:
- есть конкретное время ("в 18:00", "через 10 минут", "завтра")
---
=== ФОРМАТ ОТВЕТА ===
Ответ только JSON, без пояснений:
{
"L1": "...",
"L2": "...",
"L3": "...",
"risk_level": "...",
"has_action_items": true/false,
"has_deadline": true/false
}
---
=== ДИАЛОГ ===
Текст:
"""
%s
"""`, text)
}
func classify(ctx context.Context, taskID, model, text string) (ClassificationResult, error) {
prompt := buildPrompt(text)
inputChars := utf8.RuneCountInString(prompt)
res, err := callLLM(ctx, model, prompt)
logLLMCall("tagging", taskID, model, "classify", 1, inputChars, res, err)
if err != nil {
return ClassificationResult{}, err
}
var result ClassificationResult
if err := json.Unmarshal([]byte(res.Content), &result); err != nil {
return ClassificationResult{}, fmt.Errorf("parse: %w, resp: %s", err, truncate(res.Content, 300))
}
return result, nil
}
// ===================== DB =====================
func saveTagging(ctx context.Context, db *sql.DB, taskID, filename, transcription string, tagging []byte) (complete bool, err error) {
_, err = db.ExecContext(ctx,
`INSERT INTO results (task_id) VALUES ($1) ON CONFLICT (task_id) DO NOTHING`, taskID)
if err != nil {
return false, fmt.Errorf("ensure row: %w", err)
}
err = db.QueryRowContext(ctx, `
UPDATE results
SET tagging = $2::jsonb,
filename = COALESCE(NULLIF($3, ''), filename),
transcription = COALESCE(NULLIF($4, ''), transcription),
updated_at = now(),
status = CASE WHEN analysis IS NOT NULL THEN 'done' ELSE status END
WHERE task_id = $1
RETURNING (analysis IS NOT NULL AND tagging IS NOT NULL)
`, taskID, string(tagging), filename, transcription).Scan(&complete)
if err != nil {
return false, fmt.Errorf("update tagging: %w", err)
}
return complete, nil
}
// ===================== MAIN =====================
func loadDotenv() {
path := os.Getenv("DOTENV_PATH")
if path == "" {
return
}
if err := godotenv.Overload(path); err != nil {
slog.Warn("dotenv load failed", "path", path, "error", err)
return
}
slog.Info("dotenv loaded", "path", path)
}
func main() {
loadDotenv()
amqpURL := getenv("RABBITMQ_URL", "amqp://guest:guest@localhost:5672/")
dbURL := os.Getenv("DATABASE_URL")
token := os.Getenv("YANDEX_API_KEY")
model := os.Getenv("YANDEX_MODEL")
inputQueue := getenv("TAGGING_QUEUE", "tagging")
finalQueue := getenv("FINAL_QUEUE", "final")
if token == "" {
slog.Error("YANDEX_API_KEY is required")
os.Exit(1)
}
if model == "" {
slog.Error("YANDEX_MODEL is required")
os.Exit(1)
}
slog.Info("config loaded", "worker", "tagging",
"yandex_token", tokenFingerprint(token), "model", model, "api_url", apiURL())
db := mustDB(dbURL)
defer db.Close()
checkCtx, checkCancel := context.WithTimeout(context.Background(), 90*time.Second)
if err := checkYandexAPI(checkCtx, model); err != nil {
checkCancel()
slog.Error("yandex api check failed — worker will not start", "worker", "tagging", "error", err)
os.Exit(1)
}
checkCancel()
ch := mustRabbit(amqpURL)
if _, err := ch.QueueDeclare(inputQueue, true, false, false, false, nil); err != nil {
slog.Error("declare queue failed", "queue", inputQueue, "error", err)
os.Exit(1)
}
if _, err := ch.QueueDeclare(finalQueue, true, false, false, false, nil); err != nil {
slog.Error("declare queue failed", "queue", finalQueue, "error", err)
os.Exit(1)
}
ch.Qos(1, 0, false)
msgs, err := ch.Consume(inputQueue, "", false, false, false, false, nil)
if err != nil {
slog.Error("consume failed", "error", err)
os.Exit(1)
}
slog.Info("worker started", "worker", "tagging", "queue", inputQueue, "model", model)
for d := range msgs {
taskStart := time.Now()
var task WorkerMessage
if err := json.Unmarshal(d.Body, &task); err != nil {
slog.Warn("bad message", "worker", "tagging", "delivery_tag", d.DeliveryTag,
"body_bytes", len(d.Body), "error", err)
d.Nack(false, false)
continue
}
transcriptionChars := utf8.RuneCountInString(task.Transcription)
slog.Info("message received", "worker", "tagging",
"task_id", task.TaskID,
"filename", task.Filename,
"delivery_tag", d.DeliveryTag,
"redelivered", d.Redelivered,
"body_bytes", len(d.Body),
"transcription_chars", transcriptionChars,
"llm_calls_expected", 1,
)
if d.Redelivered {
slog.Warn("redelivered message skipped — no llm call",
"worker", "tagging", "task_id", task.TaskID, "delivery_tag", d.DeliveryTag)
d.Nack(false, false)
continue
}
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute)
result, err := classify(ctx, task.TaskID, model, task.Transcription)
if err != nil {
cancel()
slog.Warn("task failed, discarded",
"worker", "tagging", "task_id", task.TaskID,
"llm_calls", 1, "error", err)
d.Nack(false, false)
continue
}
tagJSON, _ := json.Marshal(result)
complete, err := saveTagging(ctx, db, task.TaskID, task.Filename, task.Transcription, tagJSON)
if err != nil {
cancel()
slog.Warn("db save failed, discarded",
"worker", "tagging", "task_id", task.TaskID, "error", err)
d.Nack(false, false)
continue
}
if complete {
notifyFinal(ctx, ch, db, finalQueue, task.TaskID, "tagging")
slog.Info("task complete", "worker", "tagging", "task_id", task.TaskID,
"was_last", "tagging", "L1", result.L1,
"llm_calls", 1, "duration_ms", time.Since(taskStart).Milliseconds())
} else {
slog.Info("task partial", "worker", "tagging", "task_id", task.TaskID,
"waiting_for", "analyse", "L1", result.L1,
"llm_calls", 1, "duration_ms", time.Since(taskStart).Milliseconds())
}
cancel()
d.Ack(false)
}
}
func getenv(k, d string) string {
if v := os.Getenv(k); v != "" {
return v
}
return d
}
func tokenFingerprint(token string) string {
if len(token) <= 12 {
return "***"
}
return token[:8] + "..." + token[len(token)-4:]
}
func truncate(s string, max int) string {
if len(s) <= max {
return s
}
return s[:max] + "..."
}
func loadFinalPayload(ctx context.Context, db *sql.DB, taskID string) ([]byte, error) {
var (
filename, transcription, status sql.NullString
analysis, tagging, metadata []byte
createdAt, updatedAt time.Time
)
err := db.QueryRowContext(ctx, `
SELECT filename, transcription, analysis, tagging, metadata, status, created_at, updated_at
FROM results WHERE task_id = $1
`, taskID).Scan(&filename, &transcription, &analysis, &tagging, &metadata, &status, &createdAt, &updatedAt)
if err != nil {
return nil, fmt.Errorf("load result: %w", err)
}
msg := map[string]any{
"task_id": taskID,
"status": status.String,
"created_at": createdAt,
"updated_at": updatedAt,
}
if filename.Valid {
msg["filename"] = filename.String
}
if transcription.Valid {
msg["transcription"] = transcription.String
}
if len(analysis) > 0 {
var v any
if err := json.Unmarshal(analysis, &v); err == nil {
msg["analysis"] = v
}
}
if len(tagging) > 0 {
var v any
if err := json.Unmarshal(tagging, &v); err == nil {
msg["tagging"] = v
}
}
if len(metadata) > 0 {
var meta map[string]any
if err := json.Unmarshal(metadata, &meta); err == nil {
for k, v := range meta {
msg[k] = v
}
}
}
return json.Marshal(msg)
}
func notifyFinal(ctx context.Context, ch *amqp.Channel, db *sql.DB, queue, taskID, worker string) {
body, err := loadFinalPayload(ctx, db, taskID)
if err != nil {
slog.Warn("load final payload failed", "worker", worker, "task_id", taskID, "error", err)
return
}
if err := ch.PublishWithContext(ctx, "", queue, false, false,
amqp.Publishing{
ContentType: "application/json",
Body: body,
DeliveryMode: amqp.Persistent,
}); err != nil {
slog.Warn("publish final failed", "worker", worker, "task_id", taskID, "error", err)
return
}
slog.Info("published final", "worker", worker, "task_id", taskID, "queue", queue, "body_bytes", len(body))
deleteProcessingFile(extractFilePath(body), taskID, worker)
}
func extractFilePath(body []byte) string {
var msg map[string]any
if err := json.Unmarshal(body, &msg); err != nil {
return ""
}
fp, _ := msg["file_path"].(string)
return fp
}
func deleteProcessingFile(filePath, taskID, worker string) {
if filePath == "" {
slog.Warn("processing file not deleted: no file_path", "worker", worker, "task_id", taskID)
return
}
if !strings.Contains(filePath, "/processing/") {
slog.Warn("processing file not deleted: path outside processing", "worker", worker, "task_id", taskID, "path", filePath)
return
}
if err := os.Remove(filePath); err != nil {
if os.IsNotExist(err) {
slog.Info("processing file already removed", "worker", worker, "task_id", taskID, "path", filePath)
return
}
slog.Warn("processing file delete failed", "worker", worker, "task_id", taskID, "path", filePath, "error", err)
return
}
slog.Info("processing file deleted", "worker", worker, "task_id", taskID, "path", filePath)
}
func mustDB(url string) *sql.DB {
db, err := sql.Open("pgx", url)
if err != nil {
slog.Error("db open failed", "error", err)
os.Exit(1)
}
db.SetMaxOpenConns(5)
time.Sleep(2 * time.Second) // дать Docker DNS зарегистрировать postgres
for i := 0; i < 60; i++ {
if err = db.Ping(); err == nil {
return db
}
if i < 5 || (i+1)%10 == 0 {
slog.Info("waiting for db", "attempt", i+1, "error", err)
}
time.Sleep(3 * time.Second)
}
slog.Error("db unreachable", "error", err)
os.Exit(1)
return nil
}
func mustRabbit(url string) *amqp.Channel {
var conn *amqp.Connection
var err error
for i := 0; i < 30; i++ {
conn, err = amqp.Dial(url)
if err == nil {
break
}
slog.Info("waiting for rabbit", "attempt", i+1, "error", err)
time.Sleep(2 * time.Second)
}
if err != nil {
slog.Error("rabbit unreachable", "error", err)
os.Exit(1)
}
ch, err := conn.Channel()
if err != nil {
slog.Error("rabbit channel failed", "error", err)
os.Exit(1)
}
return ch
}

18
workers/tagging/go.mod Normal file
View File

@@ -0,0 +1,18 @@
module github.com/yourorg/tagging
go 1.22
require (
github.com/jackc/pgx/v5 v5.5.5
github.com/joho/godotenv v1.5.1
github.com/rabbitmq/amqp091-go v1.9.0
)
require (
github.com/jackc/pgpassfile v1.0.0 // indirect
github.com/jackc/pgservicefile v0.0.0-20221227161230-091c0ba34f0a // indirect
github.com/jackc/puddle/v2 v2.2.1 // indirect
golang.org/x/crypto v0.17.0 // indirect
golang.org/x/sync v0.1.0 // indirect
golang.org/x/text v0.14.0 // indirect
)

41
workers/tagging/go.sum Normal file
View File

@@ -0,0 +1,41 @@
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM=
github.com/jackc/pgpassfile v1.0.0/go.mod h1:CEx0iS5ambNFdcRtxPj5JhEz+xB6uRky5eyVu/W2HEg=
github.com/jackc/pgservicefile v0.0.0-20221227161230-091c0ba34f0a h1:bbPeKD0xmW/Y25WS6cokEszi5g+S0QxI/d45PkRi7Nk=
github.com/jackc/pgservicefile v0.0.0-20221227161230-091c0ba34f0a/go.mod h1:5TJZWKEWniPve33vlWYSoGYefn3gLQRzjfDlhSJ9ZKM=
github.com/jackc/pgx/v5 v5.5.5 h1:amBjrZVmksIdNjxGW/IiIMzxMKZFelXbUoPNb+8sjQw=
github.com/jackc/pgx/v5 v5.5.5/go.mod h1:ez9gk+OAat140fv9ErkZDYFWmXLfV+++K0uAOiwgm1A=
github.com/jackc/puddle/v2 v2.2.1 h1:RhxXJtFG022u4ibrCSMSiu5aOq1i77R3OHKNJj77OAk=
github.com/jackc/puddle/v2 v2.2.1/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4=
github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0=
github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/rabbitmq/amqp091-go v1.9.0 h1:qrQtyzB4H8BQgEuJwhmVQqVHB9O4+MNDJCCAcpc3Aoo=
github.com/rabbitmq/amqp091-go v1.9.0/go.mod h1:+jPrT9iY2eLjRaMSRHUhc3z14E/l85kv/f+6luSD3pc=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
go.uber.org/goleak v1.2.1 h1:NBol2c7O1ZokfZ0LEU9K6Whx/KnwvepVetCUhtKja4A=
go.uber.org/goleak v1.2.1/go.mod h1:qlT2yGI9QafXHhZZLxlSuNsMw3FFLxBr+tBRlmO1xH4=
golang.org/x/crypto v0.17.0 h1:r8bRNjWL3GshPW3gkd+RpvzWrZAwPS49OmTGZ/uhM4k=
golang.org/x/crypto v0.17.0/go.mod h1:gCAAfMLgwOJRpTjQ2zCCt2OcSfYMTeZVSRtQlPC7Nq4=
golang.org/x/sync v0.1.0 h1:wsuoTGHzEhffawBOhz5CYhcrV4IdKZbEyZjBMuTp12o=
golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ=
golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

View File

@@ -0,0 +1,11 @@
FROM golang:1.22-alpine AS build
WORKDIR /src
COPY go.mod go.sum* ./
RUN go mod download
COPY . .
RUN CGO_ENABLED=0 go build -o /transcribe ./cmd/transcribe
FROM alpine:3.19
RUN apk add --no-cache ca-certificates
COPY --from=build /transcribe /transcribe
ENTRYPOINT ["/transcribe"]

View File

@@ -0,0 +1,64 @@
package main
import (
"context"
"log/slog"
"os"
"os/signal"
"syscall"
"time"
amqp "github.com/rabbitmq/amqp091-go"
"github.com/yourorg/transcribe/internal/config"
"github.com/yourorg/transcribe/internal/consumer"
)
func main() {
slog.SetDefault(slog.New(slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{Level: slog.LevelInfo})))
cfg := config.Load()
if cfg.NexaraAPIKey == "" {
slog.Error("NEXARA_API_KEY is required")
os.Exit(1)
}
ch := mustRabbit(cfg.RabbitURL)
cons, err := consumer.New(cfg, ch)
if err != nil {
slog.Error("consumer init failed", "error", err)
os.Exit(1)
}
ctx, stop := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM)
defer stop()
if err := cons.Run(ctx); err != nil && ctx.Err() == nil {
slog.Error("consumer stopped", "error", err)
os.Exit(1)
}
slog.Info("transcribe worker stopping")
}
func mustRabbit(url string) *amqp.Channel {
var conn *amqp.Connection
var err error
for i := 0; i < 30; i++ {
conn, err = amqp.Dial(url)
if err == nil {
break
}
slog.Info("waiting for rabbit", "attempt", i+1, "error", err)
time.Sleep(2 * time.Second)
}
if err != nil {
slog.Error("rabbit unreachable", "error", err)
os.Exit(1)
}
ch, err := conn.Channel()
if err != nil {
slog.Error("rabbit channel failed", "error", err)
os.Exit(1)
}
return ch
}

View File

@@ -0,0 +1,23 @@
[
{
"id": 1,
"id_section": 1,
"name": "behavioral",
"prompt": "Ты — строгий классификатор звонков.\n\nЗадача:\nПроанализируй диалог и оцени поведенческие критерии.\n\nКритерии:\n1. Приветствие\n2. Инициативность (выявление цели, попытка развить разговор)\n3. Уточнил, остались ли вопросы\n4. Прощание\n\nИнструкция:\nДля каждого критерия:\n- определи наличие\n- найди ДОСЛОВНУЮ цитату\n- оцени confidence (0.01.0)\n\nФормат ответа (строго JSON):\n\n{\n \"greeting\": {\n \"value\": true/false,\n \"evidence\": \"цитата или null\",\n \"confidence\": number\n },\n \"initiative\": {\n \"value\": true/false,\n \"evidence\": \"цитата или null\",\n \"confidence\": number\n },\n \"questions_check\": {\n \"value\": true/false,\n \"evidence\": \"цитата или null\",\n \"confidence\": number\n },\n \"closing\": {\n \"value\": true/false,\n \"evidence\": \"цитата или null\",\n \"confidence\": number\n }\n}\n\nЖЁСТКИЕ ПРАВИЛА:\n- каждый критерий оценивается независимо\n- не додумывать\n- если нет → value=false, evidence=null, confidence=0.0\n- evidence должен подтверждать вывод",
"dt_create": "2026-06-09T09:00:00.000000"
},
{
"id": 2,
"id_section": 1,
"name": "client_data",
"prompt": "Ты — строгий классификатор звонков.\n\nЗадача:\nОпредели, какие данные о клиенте были получены.\n\nКритерии:\n1. Первый ли раз обращается\n2. Указан ли город клиента\n3. Тип клиента (физ/юр)\n4. Получены ли контакты\n5. Источник (откуда узнали)\n\nФормат ответа (строго JSON):\n\n{\n \"first_time\": {\n \"value\": true/false,\n \"evidence\": \"цитата или null\",\n \"confidence\": number\n },\n \"client_city\": {\n \"value\": true/false,\n \"city\": \"строка или null\",\n \"evidence\": \"цитата или null\",\n \"confidence\": number\n },\n \"client_type\": {\n \"value\": true/false,\n \"type\": \"physical|legal|null\",\n \"evidence\": \"цитата или null\",\n \"confidence\": number\n },\n \"contacts\": {\n \"value\": true/false,\n \"evidence\": \"цитата или null\",\n \"confidence\": number\n },\n \"source\": {\n \"value\": true/false,\n \"evidence\": \"цитата или null\",\n \"confidence\": number\n }\n}\n\nЖЁСТКИЕ ПРАВИЛА:\n- city/type только если явно сказано\n- не додумывать\n- если нет → value=false, evidence=null, confidence=0.0",
"dt_create": "2026-06-09T09:00:00.000000"
},
{
"id": 3,
"id_section": 1,
"name": "cargo_data",
"prompt": "Ты — строгий классификатор логистических данных.\n\nКритерии:\n1. Характер груза\n2. Параметры груза (вес, объем, размеры)\n3. Стоимость груза\n\nФормат ответа (строго JSON):\n\n{\n \"cargo_type\": {\n \"value\": true/false,\n \"type\": \"строка или null\",\n \"evidence\": \"цитата или null\",\n \"confidence\": number\n },\n \"cargo_params\": {\n \"value\": true/false,\n \"params\": \"строка или null\",\n \"evidence\": \"цитата или null\",\n \"confidence\": number\n },\n \"cargo_value\": {\n \"value\": true/false,\n \"amount\": \"строка или null\",\n \"evidence\": \"цитата или null\",\n \"confidence\": number\n }\n}\n\nЖЁСТКИЕ ПРАВИЛА:\n- только явные данные\n- числа/параметры должны быть в evidence\n- если нет → value=false, evidence=null, confidence=0.0",
"dt_create": "2026-06-09T09:00:00.000000"
}
]

View File

@@ -0,0 +1,5 @@
module github.com/yourorg/transcribe
go 1.22
require github.com/rabbitmq/amqp091-go v1.9.0

18
workers/transcribe/go.sum Normal file
View File

@@ -0,0 +1,18 @@
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/rabbitmq/amqp091-go v1.9.0 h1:qrQtyzB4H8BQgEuJwhmVQqVHB9O4+MNDJCCAcpc3Aoo=
github.com/rabbitmq/amqp091-go v1.9.0/go.mod h1:+jPrT9iY2eLjRaMSRHUhc3z14E/l85kv/f+6luSD3pc=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
go.uber.org/goleak v1.2.1 h1:NBol2c7O1ZokfZ0LEU9K6Whx/KnwvepVetCUhtKja4A=
go.uber.org/goleak v1.2.1/go.mod h1:qlT2yGI9QafXHhZZLxlSuNsMw3FFLxBr+tBRlmO1xH4=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

View File

@@ -0,0 +1,78 @@
package config
import (
"os"
"strconv"
"time"
)
type Config struct {
RabbitURL string
InputQueue string
OutputExchange string
AnalyseQueue string
TaggingQueue string
InputExchange string
InputRoutingKey string
Prefetch int
NexaraBaseURL string
NexaraAPIKey string
NexaraModel string
NexaraTimeout time.Duration
PromptsSource string
PromptsFile string
PromptsBaseURL string
PromptsAPIKey string
PromptsSection int
}
func Load() Config {
return Config{
RabbitURL: getEnv("RABBITMQ_URL", "amqp://guest:guest@localhost:5672/"),
InputQueue: getEnv("INPUT_QUEUE", "transcribe.tasks"),
OutputExchange: getEnv("OUTPUT_EXCHANGE", "transcription_done"),
AnalyseQueue: getEnv("ANALYSE_QUEUE", "analyse"),
TaggingQueue: getEnv("TAGGING_QUEUE", "tagging"),
InputExchange: getEnv("RABBITMQ_EXCHANGE", "audio_pipeline"),
InputRoutingKey: getEnv("RABBITMQ_ROUTING_KEY", "audio.new"),
Prefetch: getInt("PREFETCH", 1),
NexaraBaseURL: getEnv("NEXARA_BASE_URL", "https://api.nexara.ru"),
NexaraAPIKey: os.Getenv("NEXARA_API_KEY"),
NexaraModel: getEnv("NEXARA_MODEL", "whisper-1"),
NexaraTimeout: getDuration("NEXARA_TIMEOUT", 10*time.Minute),
PromptsSource: getEnv("PROMPTS_SOURCE", "static"),
PromptsFile: getEnv("PROMPTS_FILE", "/app/configs/prompts.json"),
PromptsBaseURL: os.Getenv("PROMPTS_BASE_URL"),
PromptsAPIKey: os.Getenv("PROMPTS_API_KEY"),
PromptsSection: getInt("PROMPTS_SECTION", 1),
}
}
func getEnv(key, def string) string {
if v := os.Getenv(key); v != "" {
return v
}
return def
}
func getInt(key string, def int) int {
if v := os.Getenv(key); v != "" {
if i, err := strconv.Atoi(v); err == nil {
return i
}
}
return def
}
func getDuration(key string, def time.Duration) time.Duration {
if v := os.Getenv(key); v != "" {
if d, err := time.ParseDuration(v); err == nil {
return d
}
}
return def
}

View File

@@ -0,0 +1,172 @@
package consumer
import (
"context"
"encoding/json"
"fmt"
"log/slog"
"time"
amqp "github.com/rabbitmq/amqp091-go"
"github.com/yourorg/transcribe/internal/config"
"github.com/yourorg/transcribe/internal/models"
"github.com/yourorg/transcribe/internal/nexara"
"github.com/yourorg/transcribe/internal/prompts"
)
type Consumer struct {
cfg config.Config
ch *amqp.Channel
nexara *nexara.Client
prompts *prompts.Loader
}
func New(cfg config.Config, ch *amqp.Channel) (*Consumer, error) {
if err := setupTopology(ch, cfg); err != nil {
return nil, err
}
return &Consumer{
cfg: cfg,
ch: ch,
nexara: nexara.New(cfg.NexaraBaseURL, cfg.NexaraAPIKey, cfg.NexaraModel, cfg.NexaraTimeout),
prompts: prompts.New(cfg.PromptsSource, cfg.PromptsFile, cfg.PromptsBaseURL, cfg.PromptsAPIKey, cfg.PromptsSection),
}, nil
}
func setupTopology(ch *amqp.Channel, cfg config.Config) error {
if err := ch.ExchangeDeclare("dlx", "direct", true, false, false, false, nil); err != nil {
return fmt.Errorf("declare dlx: %w", err)
}
if err := ch.ExchangeDeclare(cfg.InputExchange, "direct", true, false, false, false, nil); err != nil {
return fmt.Errorf("declare input exchange: %w", err)
}
if err := ch.ExchangeDeclare(cfg.OutputExchange, "fanout", true, false, false, false, nil); err != nil {
return fmt.Errorf("declare output exchange: %w", err)
}
dlqArgs := amqp.Table{
"x-dead-letter-exchange": "dlx",
"x-dead-letter-routing-key": cfg.InputQueue + ".failed",
}
if _, err := ch.QueueDeclare(cfg.InputQueue, true, false, false, false, dlqArgs); err != nil {
return fmt.Errorf("declare input queue: %w", err)
}
if _, err := ch.QueueDeclare(cfg.InputQueue+".failed", true, false, false, false, nil); err != nil {
return fmt.Errorf("declare dlq: %w", err)
}
if err := ch.QueueBind(cfg.InputQueue+".failed", cfg.InputQueue+".failed", "dlx", false, nil); err != nil {
return fmt.Errorf("bind dlq: %w", err)
}
if err := ch.QueueBind(cfg.InputQueue, cfg.InputRoutingKey, cfg.InputExchange, false, nil); err != nil {
return fmt.Errorf("bind input queue: %w", err)
}
for _, q := range []string{cfg.AnalyseQueue, cfg.TaggingQueue} {
if _, err := ch.QueueDeclare(q, true, false, false, false, nil); err != nil {
return fmt.Errorf("declare queue %s: %w", q, err)
}
if err := ch.QueueBind(q, "", cfg.OutputExchange, false, nil); err != nil {
return fmt.Errorf("bind queue %s: %w", q, err)
}
}
return ch.Qos(cfg.Prefetch, 0, false)
}
func (c *Consumer) Run(ctx context.Context) error {
if err := c.ch.Confirm(false); err != nil {
return fmt.Errorf("confirm mode: %w", err)
}
msgs, err := c.ch.Consume(c.cfg.InputQueue, "", false, false, false, false, nil)
if err != nil {
return err
}
slog.Info("transcribe worker started", "queue", c.cfg.InputQueue, "output_exchange", c.cfg.OutputExchange)
for {
select {
case <-ctx.Done():
return nil
case d, ok := <-msgs:
if !ok {
return fmt.Errorf("delivery channel closed")
}
c.handle(ctx, d)
}
}
}
func (c *Consumer) handle(ctx context.Context, d amqp.Delivery) {
var task models.AudioTask
if err := json.Unmarshal(d.Body, &task); err != nil {
slog.Warn("bad message", "delivery_tag", d.DeliveryTag, "error", err)
_ = d.Nack(false, false)
return
}
slog.Info("message received", "task_id", task.TaskID, "file_path", task.FilePath, "filename", task.Filename)
txCtx, cancel := context.WithTimeout(ctx, c.cfg.NexaraTimeout+30*time.Second)
defer cancel()
text, lang, segments, err := c.nexara.TranscribeFile(txCtx, task.FilePath)
if err != nil {
slog.Warn("transcription failed", "task_id", task.TaskID, "error", err)
_ = d.Nack(false, false)
return
}
promptList, err := c.prompts.Load(txCtx)
if err != nil {
slog.Warn("prompts load failed", "task_id", task.TaskID, "error", err)
_ = d.Nack(false, false)
return
}
result := models.TranscriptionResult{
TaskID: task.TaskID,
Filename: task.Filename,
FilePath: task.FilePath,
Transcription: text,
Language: lang,
Segments: segments,
Prompts: promptList,
TranscribedAt: time.Now().Unix(),
}
body, err := json.Marshal(result)
if err != nil {
slog.Warn("marshal failed", "task_id", task.TaskID, "error", err)
_ = d.Nack(false, false)
return
}
confirms := c.ch.NotifyPublish(make(chan amqp.Confirmation, 1))
if err := c.ch.PublishWithContext(txCtx, c.cfg.OutputExchange, "", false, false, amqp.Publishing{
ContentType: "application/json",
Body: body,
DeliveryMode: amqp.Persistent,
}); err != nil {
slog.Warn("publish failed, requeue", "task_id", task.TaskID, "error", err)
_ = d.Nack(false, true)
return
}
select {
case confirm := <-confirms:
if !confirm.Ack {
slog.Warn("publish not confirmed, requeue", "task_id", task.TaskID)
_ = d.Nack(false, true)
return
}
case <-txCtx.Done():
slog.Warn("publish timeout, requeue", "task_id", task.TaskID)
_ = d.Nack(false, true)
return
}
slog.Info("transcribed", "task_id", task.TaskID, "language", lang, "chars", len(text), "segments", len(segments), "prompts", len(promptList))
_ = d.Ack(false)
}

View File

@@ -0,0 +1,34 @@
package models
type AudioTask struct {
TaskID string `json:"task_id"`
FilePath string `json:"file_path"`
Filename string `json:"filename"`
Size int64 `json:"size"`
CreatedAt int64 `json:"created_at"`
}
type Segment struct {
Start float64 `json:"start"`
End float64 `json:"end"`
Text string `json:"text"`
}
type Prompt struct {
ID int `json:"id"`
IDSection int `json:"id_section"`
Name string `json:"name"`
Prompt string `json:"prompt"`
DtCreate string `json:"dt_create"`
}
type TranscriptionResult struct {
TaskID string `json:"task_id"`
Filename string `json:"filename"`
FilePath string `json:"file_path"`
Transcription string `json:"transcription"`
Language string `json:"language"`
Segments []Segment `json:"segments,omitempty"`
Prompts []Prompt `json:"prompts"`
TranscribedAt int64 `json:"transcribed_at"`
}

View File

@@ -0,0 +1,117 @@
package nexara
import (
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"mime/multipart"
"net/http"
"os"
"path/filepath"
"strings"
"time"
"github.com/yourorg/transcribe/internal/models"
)
type Client struct {
apiURL string
apiKey string
model string
httpClient *http.Client
}
func New(baseURL, apiKey, model string, timeout time.Duration) *Client {
baseURL = strings.TrimRight(baseURL, "/")
return &Client{
apiURL: baseURL + "/api/v1/audio/transcriptions",
apiKey: apiKey,
model: model,
httpClient: &http.Client{
Timeout: timeout,
},
}
}
func (c *Client) TranscribeFile(ctx context.Context, path string) (text, language string, segments []models.Segment, err error) {
f, err := os.Open(path)
if err != nil {
return "", "", nil, fmt.Errorf("open file: %w", err)
}
defer f.Close()
body := &bytes.Buffer{}
writer := multipart.NewWriter(body)
part, err := writer.CreateFormFile("file", filepath.Base(path))
if err != nil {
return "", "", nil, err
}
if _, err := io.Copy(part, f); err != nil {
return "", "", nil, err
}
if c.model != "" {
if err := writer.WriteField("model", c.model); err != nil {
return "", "", nil, err
}
}
if err := writer.WriteField("response_format", "json"); err != nil {
return "", "", nil, err
}
if err := writer.Close(); err != nil {
return "", "", nil, err
}
req, err := http.NewRequestWithContext(ctx, http.MethodPost, c.apiURL, body)
if err != nil {
return "", "", nil, err
}
req.Header.Set("Content-Type", writer.FormDataContentType())
req.Header.Set("Authorization", "Bearer "+c.apiKey)
resp, err := c.httpClient.Do(req)
if err != nil {
return "", "", nil, fmt.Errorf("request: %w", err)
}
defer resp.Body.Close()
respBody, err := io.ReadAll(resp.Body)
if err != nil {
return "", "", nil, err
}
if resp.StatusCode != http.StatusOK {
return "", "", nil, fmt.Errorf("status %d: %s", resp.StatusCode, string(respBody))
}
var raw map[string]any
if err := json.Unmarshal(respBody, &raw); err != nil {
return "", "", nil, fmt.Errorf("parse: %w", err)
}
if t, ok := raw["text"].(string); ok {
text = t
}
if lang, ok := raw["language"].(string); ok {
language = lang
}
if segs, ok := raw["segments"].([]any); ok {
for _, s := range segs {
m, ok := s.(map[string]any)
if !ok {
continue
}
var seg models.Segment
if v, ok := m["start"].(float64); ok {
seg.Start = v
}
if v, ok := m["end"].(float64); ok {
seg.End = v
}
if v, ok := m["text"].(string); ok {
seg.Text = v
}
segments = append(segments, seg)
}
}
return text, language, segments, nil
}

View File

@@ -0,0 +1,100 @@
package prompts
import (
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"os"
"strconv"
"strings"
"time"
"github.com/yourorg/transcribe/internal/models"
)
type Loader struct {
source string
filePath string
baseURL string
apiKey string
sectionID int
client *http.Client
}
func New(source, filePath, baseURL, apiKey string, sectionID int) *Loader {
return &Loader{
source: source,
filePath: filePath,
baseURL: strings.TrimRight(baseURL, "/"),
apiKey: apiKey,
sectionID: sectionID,
client: &http.Client{Timeout: 30 * time.Second},
}
}
func (l *Loader) Load(ctx context.Context) ([]models.Prompt, error) {
switch strings.ToLower(l.source) {
case "http":
return l.loadHTTP(ctx)
default:
return l.loadStatic()
}
}
func (l *Loader) loadStatic() ([]models.Prompt, error) {
data, err := os.ReadFile(l.filePath)
if err != nil {
return nil, fmt.Errorf("read prompts file: %w", err)
}
var prompts []models.Prompt
if err := json.Unmarshal(data, &prompts); err != nil {
return nil, fmt.Errorf("parse prompts file: %w", err)
}
return filterSection(prompts, l.sectionID), nil
}
func (l *Loader) loadHTTP(ctx context.Context) ([]models.Prompt, error) {
if l.baseURL == "" {
return nil, fmt.Errorf("PROMPTS_BASE_URL is required for http source")
}
url := fmt.Sprintf("%s/metrics/?id_section=%s", l.baseURL, strconv.Itoa(l.sectionID))
req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
if err != nil {
return nil, err
}
if l.apiKey != "" {
req.Header.Set("Authorization", "Bearer "+l.apiKey)
}
resp, err := l.client.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, err
}
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("prompts api status %d: %s", resp.StatusCode, string(body))
}
var prompts []models.Prompt
if err := json.Unmarshal(body, &prompts); err != nil {
return nil, fmt.Errorf("parse prompts response: %w", err)
}
return filterSection(prompts, l.sectionID), nil
}
func filterSection(prompts []models.Prompt, sectionID int) []models.Prompt {
if sectionID <= 0 {
return prompts
}
out := make([]models.Prompt, 0, len(prompts))
for _, p := range prompts {
if p.IDSection == sectionID {
out = append(out, p)
}
}
return out
}