PaperDebugger · wjiayis · Mar 22, 2026 · Mar 22, 2026
diff --git a/internal/api/chat/create_conversation_message_stream_v2.go b/internal/api/chat/create_conversation_message_stream_v2.go
@@ -281,7 +281,7 @@ func (s *ChatServerV2) CreateConversationMessageStream(
 		APIKey: settings.OpenAIAPIKey,
 	}
 
-	openaiChatHistory, inappChatHistory, err := s.aiClientV2.ChatCompletionStreamV2(ctx, stream, conversation.ID.Hex(), modelSlug, conversation.OpenaiChatHistoryCompletion, llmProvider)
+	openaiChatHistory, inappChatHistory, _, err := s.aiClientV2.ChatCompletionStreamV2(ctx, stream, conversation.UserID, conversation.ProjectID, conversation.ID.Hex(), modelSlug, conversation.OpenaiChatHistoryCompletion, llmProvider)
 	if err != nil {
 		return s.sendStreamError(stream, err)
 	}

diff --git a/internal/models/usage.go b/internal/models/usage.go
@@ -0,0 +1,28 @@
+package models
+
+import (
+	"time"
+
+	"go.mongodb.org/mongo-driver/v2/bson"
+)
+
+// Usage tracks cost per user, per project, per model, per hour.
+// Each document represents one hour bucket of usage.
+type Usage struct {
+	ID         bson.ObjectID `bson:"_id"`
+	UserID     bson.ObjectID `bson:"user_id"`
+	ProjectID  string        `bson:"project_id"`
+	ModelSlug  string        `bson:"model_slug"`
+	HourBucket bson.DateTime `bson:"hour_bucket"` // Timestamp truncated to the hour
+	Cost       float64       `bson:"cost"`        // Cost in USD
+	UpdatedAt  bson.DateTime `bson:"updated_at"`
+}
+
+func (u Usage) CollectionName() string {
+	return "usages"
+}
+
+// TruncateToHour truncates a time to the start of its hour.
+func TruncateToHour(t time.Time) time.Time {
+	return t.Truncate(time.Hour)
+}
diff --git a/internal/services/toolkit/client/client_v2.go b/internal/services/toolkit/client/client_v2.go
@@ -20,6 +20,7 @@ type AIClientV2 struct {
 
 	reverseCommentService *services.ReverseCommentService
 	projectService        *services.ProjectService
+	usageService          *services.UsageService
 	cfg                   *cfg.Cfg
 	logger                *logger.Logger
 }
@@ -60,6 +61,7 @@ func NewAIClientV2(
 
 	reverseCommentService *services.ReverseCommentService,
 	projectService *services.ProjectService,
+	usageService *services.UsageService,
 	cfg *cfg.Cfg,
 	logger *logger.Logger,
 ) *AIClientV2 {
@@ -107,6 +109,7 @@ func NewAIClientV2(
 
 		reverseCommentService: reverseCommentService,
 		projectService:        projectService,
+		usageService:          usageService,
 		cfg:                   cfg,
 		logger:                logger,
 	}

diff --git a/internal/services/toolkit/client/completion_v2.go b/internal/services/toolkit/client/completion_v2.go
@@ -6,11 +6,18 @@ import (
 	"paperdebugger/internal/models"
 	"paperdebugger/internal/services/toolkit/handler"
 	chatv2 "paperdebugger/pkg/gen/api/chat/v2"
+	"strconv"
 	"strings"
 
 	"github.com/openai/openai-go/v3"
+	"go.mongodb.org/mongo-driver/v2/bson"
 )
 
+// UsageCost holds cost information from a completion.
+type UsageCost struct {
+	Cost float64
+}
+
 // define []openai.ChatCompletionMessageParamUnion as OpenAIChatHistory
 
 // ChatCompletion orchestrates a chat completion process with a language model (e.g., GPT), handling tool calls and message history management.
@@ -24,13 +31,14 @@ import (
 // Returns:
 //  1. The full chat history sent to the language model (including any tool call results).
 //  2. The incremental chat history visible to the user (including tool call results and assistant responses).
-//  3. An error, if any occurred during the process.
-func (a *AIClientV2) ChatCompletionV2(ctx context.Context, modelSlug string, messages OpenAIChatHistory, llmProvider *models.LLMProviderConfig) (OpenAIChatHistory, AppChatHistory, error) {
-	openaiChatHistory, inappChatHistory, err := a.ChatCompletionStreamV2(ctx, nil, "", modelSlug, messages, llmProvider)
+//  3. Cost information (in USD).
+//  4. An error, if any occurred during the process.
+func (a *AIClientV2) ChatCompletionV2(ctx context.Context, modelSlug string, messages OpenAIChatHistory, llmProvider *models.LLMProviderConfig) (OpenAIChatHistory, AppChatHistory, UsageCost, error) {
+	openaiChatHistory, inappChatHistory, usage, err := a.ChatCompletionStreamV2(ctx, nil, bson.ObjectID{}, "", "", modelSlug, messages, llmProvider)
 	if err != nil {
-		return nil, nil, err
+		return nil, nil, UsageCost{}, err
 	}
-	return openaiChatHistory, inappChatHistory, nil
+	return openaiChatHistory, inappChatHistory, usage, nil
 }
 
 // ChatCompletionStream orchestrates a streaming chat completion process with a language model (e.g., GPT), handling tool calls, message history management, and real-time streaming of responses to the client.
@@ -46,17 +54,19 @@ func (a *AIClientV2) ChatCompletionV2(ctx context.Context, modelSlug string, mes
 // Returns: (same as ChatCompletion)
 //  1. The full chat history sent to the language model (including any tool call results).
 //  2. The incremental chat history visible to the user (including tool call results and assistant responses).
-//  3. An error, if any occurred during the process. (However, in the streaming mode, the error is not returned, but sending by callbackStream)
+//  3. Cost information (in USD, accumulated across all calls).
+//  4. An error, if any occurred during the process. (However, in the streaming mode, the error is not returned, but sending by callbackStream)
 //
 // This function works as follows: (same as ChatCompletion)
 //   - It initializes the chat history for the language model and the user, and sets up a stream handler for real-time updates.
 //   - It repeatedly sends the current chat history to the language model, receives streaming responses, and forwards them to the client as they arrive.
 //   - If tool calls are required, it handles them and appends the results to the chat history, then continues the loop.
 //   - If no tool calls are needed, it appends the assistant's response and exits the loop.
-//   - Finally, it returns the updated chat histories and any error encountered.
-func (a *AIClientV2) ChatCompletionStreamV2(ctx context.Context, callbackStream chatv2.ChatService_CreateConversationMessageStreamServer, conversationId string, modelSlug string, messages OpenAIChatHistory, llmProvider *models.LLMProviderConfig) (OpenAIChatHistory, AppChatHistory, error) {
+//   - Finally, it returns the updated chat histories, accumulated cost, and any error encountered.
+func (a *AIClientV2) ChatCompletionStreamV2(ctx context.Context, callbackStream chatv2.ChatService_CreateConversationMessageStreamServer, userID bson.ObjectID, projectID string, conversationId string, modelSlug string, messages OpenAIChatHistory, llmProvider *models.LLMProviderConfig) (OpenAIChatHistory, AppChatHistory, UsageCost, error) {
 	openaiChatHistory := messages
 	inappChatHistory := AppChatHistory{}
+	usage := UsageCost{}
 
 	streamHandler := handler.NewStreamHandlerV2(callbackStream, conversationId, modelSlug)
 
@@ -77,6 +87,7 @@ func (a *AIClientV2) ChatCompletionStreamV2(ctx context.Context, callbackStream
 		answer_content := ""
 		answer_content_id := ""
 		has_sent_part_begin := false
+		has_finished := false
 		tool_info := map[int]map[string]string{}
 		toolCalls := []openai.FinishedChatCompletionToolCall{}
 		handleReasoning := func(raw string) (string, bool) {
@@ -92,12 +103,18 @@ func (a *AIClientV2) ChatCompletionStreamV2(ctx context.Context, callbackStream
 		}
 
 		for stream.Next() {
-			// time.Sleep(5000 * time.Millisecond) // DEBUG POINT: change this to test in a slow mode
 			chunk := stream.Current()
 
+			// Capture cost from any chunk that has usage data (OpenRouter sends usage in a separate chunk after FinishReason)
+			if chunk.Usage.PromptTokens > 0 || chunk.Usage.CompletionTokens > 0 {
+				if costField, ok := chunk.Usage.JSON.ExtraFields["cost"]; ok {
+					if cost, err := strconv.ParseFloat(costField.Raw(), 64); err == nil {
+						usage.Cost += cost
+					}
+				}
+			}
+
 			if len(chunk.Choices) == 0 {
-				// Handle usage information
-				// fmt.Printf("Usage: %+v\n", chunk.Usage)
 				continue
 			}
 
@@ -180,17 +197,15 @@ func (a *AIClientV2) ChatCompletionStreamV2(ctx context.Context, callbackStream
 				}
 			}
 
-			if chunk.Choices[0].FinishReason != "" {
-				// fmt.Printf("FinishReason: %s\n", chunk.Choices[0].FinishReason)
-				// answer_content += chunk.Choices[0].Delta.Content
-				// fmt.Printf("answer_content: %s\n", answer_content)
+			if chunk.Choices[0].FinishReason != "" && !has_finished {
 				streamHandler.HandleTextDoneItem(chunk, answer_content, reasoning_content)
-				break
+				has_finished = true
+				// Don't break - continue reading to capture the usage chunk that comes after
 			}
 		}
 
 		if err := stream.Err(); err != nil {
-			return nil, nil, err
+			return nil, nil, UsageCost{}, err
 		}
 
 		if answer_content != "" {
@@ -200,7 +215,7 @@ func (a *AIClientV2) ChatCompletionStreamV2(ctx context.Context, callbackStream
 		// Execute the calls (if any), return incremental data
 		openaiToolHistory, inappToolHistory, err := a.toolCallHandler.HandleToolCallsV2(ctx, toolCalls, streamHandler)
 		if err != nil {
-			return nil, nil, err
+			return nil, nil, UsageCost{}, err
 		}
 
 		// // Record the tool call results
@@ -213,5 +228,12 @@ func (a *AIClientV2) ChatCompletionStreamV2(ctx context.Context, callbackStream
 		}
 	}
 
-	return openaiChatHistory, inappChatHistory, nil
+	// Track cost if userID is provided and user is not using their own API key (BYOK)
+	if !userID.IsZero() && !llmProvider.IsCustom() {
+		if err := a.usageService.TrackUsage(ctx, userID, projectID, modelSlug, usage.Cost); err != nil {
+			a.logger.Error("Failed to track usage", "error", err)
+		}
+	}
+
+	return openaiChatHistory, inappChatHistory, usage, nil
 }
diff --git a/internal/services/toolkit/client/get_citation_keys.go b/internal/services/toolkit/client/get_citation_keys.go
@@ -241,7 +241,7 @@ func (a *AIClientV2) GetCitationKeys(ctx context.Context, sentence string, userI
 	// Bibliography is placed at the start of the prompt to leverage prompt caching
 	message := fmt.Sprintf("Bibliography: %s\nSentence: %s\nBased on the sentence and bibliography, suggest only the most relevant citation keys separated by commas with no spaces (e.g. key1,key2). Be selective and only include citations that are directly relevant. Avoid suggesting more than 3 citations. If no relevant citations are found, return '%s'.", bibliography, sentence, emptyCitation)
 
-	_, resp, err := a.ChatCompletionV2(ctx, "gpt-5.2", OpenAIChatHistory{
+	_, resp, _, err := a.ChatCompletionV2(ctx, "gpt-5.2", OpenAIChatHistory{
 		openai.SystemMessage("You are a helpful assistant that suggests relevant citation keys."),
 		openai.UserMessage(message),
 	}, llmProvider)

diff --git a/internal/services/toolkit/client/get_citation_keys_test.go b/internal/services/toolkit/client/get_citation_keys_test.go
@@ -25,10 +25,12 @@ func setupTestClient(t *testing.T) (*client.AIClientV2, *services.ProjectService
 	}
 
 	projectService := services.NewProjectService(dbInstance, cfg.GetCfg(), logger.GetLogger())
+	usageService := services.NewUsageService(dbInstance, cfg.GetCfg(), logger.GetLogger())
 	aiClient := client.NewAIClientV2(
 		dbInstance,
 		&services.ReverseCommentService{},
 		projectService,
+		usageService,
 		cfg.GetCfg(),
 		logger.GetLogger(),
 	)

diff --git a/internal/services/toolkit/client/get_conversation_title_v2.go b/internal/services/toolkit/client/get_conversation_title_v2.go
@@ -29,7 +29,7 @@ func (a *AIClientV2) GetConversationTitleV2(ctx context.Context, inappChatHistor
 	message := strings.Join(messages, "\n")
 	message = fmt.Sprintf("%s\nBased on above conversation, generate a short, clear, and descriptive title that summarizes the main topic or purpose of the discussion. The title should be concise, specific, and use natural language. Avoid vague or generic titles. Use abbreviation and short words if possible. Use 3-5 words if possible. Give me the title only, no other text including any other words.", message)
 
-	_, resp, err := a.ChatCompletionV2(ctx, "gpt-5-nano", OpenAIChatHistory{
+	_, resp, _, err := a.ChatCompletionV2(ctx, "gpt-5-nano", OpenAIChatHistory{
 		openai.SystemMessage("You are a helpful assistant that generates a title for a conversation."),
 		openai.UserMessage(message),
 	}, llmProvider)

diff --git a/internal/services/toolkit/client/utils_v2.go b/internal/services/toolkit/client/utils_v2.go
@@ -74,6 +74,9 @@ func getDefaultParamsV2(modelSlug string, toolRegistry *registry.ToolRegistryV2)
 				Tools:               toolRegistry.GetTools(),
 				ParallelToolCalls:   openaiv3.Bool(true),
 				Store:               openaiv3.Bool(false),
+				StreamOptions: openaiv3.ChatCompletionStreamOptionsParam{
+					IncludeUsage: openaiv3.Bool(true),
+				},
 			}
 		}
 	}
@@ -85,6 +88,9 @@ func getDefaultParamsV2(modelSlug string, toolRegistry *registry.ToolRegistryV2)
 		Tools:               toolRegistry.GetTools(), // Tool registration is managed centrally by the registry
 		ParallelToolCalls:   openaiv3.Bool(true),
 		Store:               openaiv3.Bool(false), // Must set to false, because we are construct our own chat history.
+		StreamOptions: openaiv3.ChatCompletionStreamOptionsParam{
+			IncludeUsage: openaiv3.Bool(true),
+		},
 	}
 }
 

diff --git a/internal/services/usage.go b/internal/services/usage.go
@@ -0,0 +1,62 @@
+package services
+
+import (
+	"context"
+	"time"
+
+	"paperdebugger/internal/libs/cfg"
+	"paperdebugger/internal/libs/db"
+	"paperdebugger/internal/libs/logger"
+	"paperdebugger/internal/models"
+
+	"go.mongodb.org/mongo-driver/v2/bson"
+	"go.mongodb.org/mongo-driver/v2/mongo"
+	"go.mongodb.org/mongo-driver/v2/mongo/options"
+)
+
+type UsageService struct {
+	BaseService
+	usageCollection *mongo.Collection
+}
+
+func NewUsageService(db *db.DB, cfg *cfg.Cfg, logger *logger.Logger) *UsageService {
+	base := NewBaseService(db, cfg, logger)
+	return &UsageService{
+		BaseService:     base,
+		usageCollection: base.db.Collection((models.Usage{}).CollectionName()),
+	}
+}
+
+// TrackUsage increments cost for a user/project/model/hour bucket.
+// Uses upsert to create or update the usage record atomically.
+func (s *UsageService) TrackUsage(ctx context.Context, userID bson.ObjectID, projectID string, modelSlug string, cost float64) error {
+	if cost == 0 {
+		return nil
+	}
+
+	now := time.Now()
+	hourBucket := models.TruncateToHour(now)
+
+	filter := bson.M{
+		"user_id":     userID,
+		"project_id":  projectID,
+		"model_slug":  modelSlug,
+		"hour_bucket": bson.NewDateTimeFromTime(hourBucket),
+	}
+
+	update := bson.M{
+		"$inc": bson.M{
+			"cost": cost,
+		},
+		"$set": bson.M{
+			"updated_at": bson.NewDateTimeFromTime(now),
+		},
+		"$setOnInsert": bson.M{
+			"_id": bson.NewObjectID(),
+		},
+	}
+
+	opts := options.UpdateOne().SetUpsert(true)
+	_, err := s.usageCollection.UpdateOne(ctx, filter, update, opts)
+	return err
+}
diff --git a/internal/wire.go b/internal/wire.go
@@ -43,6 +43,7 @@ var Set = wire.NewSet(
 	services.NewProjectService,
 	services.NewPromptService,
 	services.NewOAuthService,
+	services.NewUsageService,
 
 	cfg.GetCfg,
 	logger.GetLogger,

diff --git a/internal/wire_gen.go b/internal/wire_gen.go
diff --git a/pkg/gen/api/chat/v2/chat.pb.go b/pkg/gen/api/chat/v2/chat.pb.go