1+ import { getClient } from '../../currentScopes' ;
2+
13/**
24 * Default maximum size in bytes for GenAI messages.
35 * Messages exceeding this limit will be truncated.
@@ -12,20 +14,64 @@ type ContentMessage = {
1214 content : string ;
1315} ;
1416
17+ /**
18+ * Message format used by OpenAI and Anthropic APIs for media.
19+ */
20+ type ContentArrayMessage = {
21+ [ key : string ] : unknown ;
22+ content : {
23+ [ key : string ] : unknown ;
24+ type : string ;
25+ } [ ] ;
26+ } ;
27+
28+ /**
29+ * Inline media content source, with a potentially very large base64
30+ * blob or data: uri.
31+ */
32+ type ContentMedia = Record < string , unknown > &
33+ (
34+ | {
35+ media_type : string ;
36+ data : string ;
37+ }
38+ | {
39+ image_url : `data:${string } `;
40+ }
41+ | {
42+ type : 'blob' | 'base64' ;
43+ content : string ;
44+ }
45+ | {
46+ b64_json : string ;
47+ }
48+ | {
49+ uri : `data:${string } `;
50+ }
51+ ) ;
52+
1553/**
1654 * Message format used by Google GenAI API.
1755 * Parts can be strings or objects with a text property.
1856 */
1957type PartsMessage = {
2058 [ key : string ] : unknown ;
21- parts : Array < string | { text : string } > ;
59+ parts : Array < TextPart | MediaPart > ;
2260} ;
2361
2462/**
2563 * A part in a Google GenAI message that contains text.
2664 */
2765type TextPart = string | { text : string } ;
2866
67+ /**
68+ * A part in a Google GenAI that contains media.
69+ */
70+ type MediaPart = {
71+ type : string ;
72+ content : string ;
73+ } ;
74+
2975/**
3076 * Calculate the UTF-8 byte length of a string.
3177 */
@@ -46,9 +92,10 @@ const jsonBytes = (value: unknown): number => {
4692 *
4793 * @param text - The string to truncate
4894 * @param maxBytes - Maximum byte length (UTF-8 encoded)
95+ * @param onTruncate - Function called to record when truncation occurs
4996 * @returns Truncated string that fits within maxBytes
5097 */
51- function truncateTextByBytes ( text : string , maxBytes : number ) : string {
98+ function truncateTextByBytes ( text : string , maxBytes : number , onTruncate : ( count : number ) => void ) : string {
5299 if ( utf8Bytes ( text ) <= maxBytes ) {
53100 return text ;
54101 }
@@ -70,6 +117,7 @@ function truncateTextByBytes(text: string, maxBytes: number): string {
70117 }
71118 }
72119
120+ onTruncate ( 1 ) ;
73121 return bestFit ;
74122}
75123
@@ -79,11 +127,12 @@ function truncateTextByBytes(text: string, maxBytes: number): string {
79127 *
80128 * @returns The text content
81129 */
82- function getPartText ( part : TextPart ) : string {
130+ function getPartText ( part : TextPart | MediaPart ) : string {
83131 if ( typeof part === 'string' ) {
84132 return part ;
85133 }
86- return part . text ;
134+ if ( 'text' in part ) return part . text ;
135+ return '' ;
87136}
88137
89138/**
@@ -93,7 +142,7 @@ function getPartText(part: TextPart): string {
93142 * @param text - New text content
94143 * @returns New part with updated text
95144 */
96- function withPartText ( part : TextPart , text : string ) : TextPart {
145+ function withPartText ( part : TextPart | MediaPart , text : string ) : TextPart {
97146 if ( typeof part === 'string' ) {
98147 return text ;
99148 }
@@ -112,6 +161,33 @@ function isContentMessage(message: unknown): message is ContentMessage {
112161 ) ;
113162}
114163
164+ /**
165+ * Check if a message has the OpenAI/Anthropic content array format.
166+ */
167+ function isContentArrayMessage ( message : unknown ) : message is ContentArrayMessage {
168+ return message !== null && typeof message === 'object' && 'content' in message && Array . isArray ( message . content ) ;
169+ }
170+
171+ /**
172+ * Check if a content part is an OpenAI/Anthropic media source
173+ */
174+ function isContentMedia ( part : unknown ) : part is ContentMedia {
175+ if ( ! part || typeof part !== 'object' ) return false ;
176+
177+ return (
178+ isContentMediaSource ( part ) ||
179+ ( 'media_type' in part && typeof part . media_type === 'string' && 'data' in part ) ||
180+ ( 'image_url' in part && typeof part . image_url === 'string' && part . image_url . startsWith ( 'data:' ) ) ||
181+ ( 'type' in part && ( part . type === 'blob' || part . type === 'base64' ) ) ||
182+ 'b64_json' in part ||
183+ ( 'type' in part && 'result' in part && part . type === 'image_generation' ) ||
184+ ( 'uri' in part && typeof part . uri === 'string' && part . uri . startsWith ( 'data:' ) )
185+ ) ;
186+ }
187+ function isContentMediaSource ( part : NonNullable < unknown > ) : boolean {
188+ return 'type' in part && typeof part . type === 'string' && 'source' in part && isContentMedia ( part . source ) ;
189+ }
190+
115191/**
116192 * Check if a message has the Google GenAI parts format.
117193 */
@@ -132,17 +208,22 @@ function isPartsMessage(message: unknown): message is PartsMessage {
132208 * @param maxBytes - Maximum byte limit
133209 * @returns Array with truncated message, or empty array if it doesn't fit
134210 */
135- function truncateContentMessage ( message : ContentMessage , maxBytes : number ) : unknown [ ] {
211+ function truncateContentMessage (
212+ message : ContentMessage ,
213+ maxBytes : number ,
214+ onTruncate : ( count : number ) => void ,
215+ ) : unknown [ ] {
136216 // Calculate overhead (message structure without content)
137217 const emptyMessage = { ...message , content : '' } ;
138218 const overhead = jsonBytes ( emptyMessage ) ;
139219 const availableForContent = maxBytes - overhead ;
140220
141221 if ( availableForContent <= 0 ) {
222+ onTruncate ( 1 ) ;
142223 return [ ] ;
143224 }
144225
145- const truncatedContent = truncateTextByBytes ( message . content , availableForContent ) ;
226+ const truncatedContent = truncateTextByBytes ( message . content , availableForContent , onTruncate ) ;
146227 return [ { ...message , content : truncatedContent } ] ;
147228}
148229
@@ -154,7 +235,7 @@ function truncateContentMessage(message: ContentMessage, maxBytes: number): unkn
154235 * @param maxBytes - Maximum byte limit
155236 * @returns Array with truncated message, or empty array if it doesn't fit
156237 */
157- function truncatePartsMessage ( message : PartsMessage , maxBytes : number ) : unknown [ ] {
238+ function truncatePartsMessage ( message : PartsMessage , maxBytes : number , onTruncate : ( count : number ) => void ) : unknown [ ] {
158239 const { parts } = message ;
159240
160241 // Calculate overhead by creating empty text parts
@@ -163,11 +244,12 @@ function truncatePartsMessage(message: PartsMessage, maxBytes: number): unknown[
163244 let remainingBytes = maxBytes - overhead ;
164245
165246 if ( remainingBytes <= 0 ) {
247+ onTruncate ( 1 ) ;
166248 return [ ] ;
167249 }
168250
169251 // Include parts until we run out of space
170- const includedParts : TextPart [ ] = [ ] ;
252+ const includedParts : ( TextPart | MediaPart ) [ ] = [ ] ;
171253
172254 for ( const part of parts ) {
173255 const text = getPartText ( part ) ;
@@ -179,7 +261,7 @@ function truncatePartsMessage(message: PartsMessage, maxBytes: number): unknown[
179261 remainingBytes -= textSize ;
180262 } else if ( includedParts . length === 0 ) {
181263 // First part doesn't fit: truncate it
182- const truncated = truncateTextByBytes ( text , remainingBytes ) ;
264+ const truncated = truncateTextByBytes ( text , remainingBytes , onTruncate ) ;
183265 if ( truncated ) {
184266 includedParts . push ( withPartText ( part , truncated ) ) ;
185267 }
@@ -190,7 +272,18 @@ function truncatePartsMessage(message: PartsMessage, maxBytes: number): unknown[
190272 }
191273 }
192274
193- return includedParts . length > 0 ? [ { ...message , parts : includedParts } ] : [ ] ;
275+ /* c8 ignore start
276+ * for type safety only, algorithm guarantees SOME text included */
277+ if ( includedParts . length <= 0 ) {
278+ onTruncate ( 1 ) ;
279+ return [ ] ;
280+ } else {
281+ /* c8 ignore stop */
282+ if ( includedParts . length < parts . length ) {
283+ onTruncate ( 1 ) ;
284+ }
285+ return [ { ...message , parts : includedParts } ] ;
286+ }
194287}
195288
196289/**
@@ -204,23 +297,84 @@ function truncatePartsMessage(message: PartsMessage, maxBytes: number): unknown[
204297 * @param maxBytes - Maximum byte limit for the message
205298 * @returns Array containing the truncated message, or empty array if truncation fails
206299 */
207- function truncateSingleMessage ( message : unknown , maxBytes : number ) : unknown [ ] {
300+ function truncateSingleMessage ( message : unknown , maxBytes : number , onTruncate : ( count : number ) => void ) : unknown [ ] {
301+ /* c8 ignore start - unreachable */
208302 if ( ! message || typeof message !== 'object' ) {
303+ onTruncate ( 1 ) ;
209304 return [ ] ;
210305 }
306+ /* c8 ignore start - unreachable */
211307
212308 if ( isContentMessage ( message ) ) {
213- return truncateContentMessage ( message , maxBytes ) ;
309+ return truncateContentMessage ( message , maxBytes , onTruncate ) ;
214310 }
215311
216312 if ( isPartsMessage ( message ) ) {
217- return truncatePartsMessage ( message , maxBytes ) ;
313+ return truncatePartsMessage ( message , maxBytes , onTruncate ) ;
218314 }
219315
220316 // Unknown message format: cannot truncate safely
317+ onTruncate ( 1 ) ;
221318 return [ ] ;
222319}
223320
321+ const REMOVED_STRING = '<removed>' ;
322+
323+ const MEDIA_FIELDS = [ 'image_url' , 'data' , 'content' , 'b64_json' , 'result' , 'uri' ] as const ;
324+
325+ function stripInlineMediaFromSingleMessage ( part : ContentMedia , onTruncate : ( count : number ) => void ) : ContentMedia {
326+ const strip = { ...part } ;
327+ if ( isContentMedia ( strip . source ) ) {
328+ strip . source = stripInlineMediaFromSingleMessage ( strip . source , onTruncate ) ;
329+ }
330+ for ( const field of MEDIA_FIELDS ) {
331+ if ( strip [ field ] ) strip [ field ] = REMOVED_STRING ;
332+ }
333+ return strip ;
334+ }
335+
336+ /**
337+ * Strip the inline media from message arrays.
338+ *
339+ * This returns a stripped message. We do NOT want to mutate the data in place,
340+ * because of course we still want the actual API/client to handle the media.
341+ */
342+ function stripInlineMediaFromMessages ( messages : unknown [ ] , onTruncate : ( count : number ) => void ) : unknown [ ] {
343+ let stripCount = 0 ;
344+ const ont : ( count : number ) => void = count => ( stripCount += count ) ;
345+ const stripped = messages . map ( message => {
346+ if ( ! ! message && typeof message === 'object' ) {
347+ if ( isContentArrayMessage ( message ) ) {
348+ // eslint-disable-next-line no-param-reassign
349+ message = {
350+ ...message ,
351+ content : stripInlineMediaFromMessages ( message . content , ont ) ,
352+ } ;
353+ } else if ( 'content' in message && isContentMedia ( message . content ) ) {
354+ // eslint-disable-next-line no-param-reassign
355+ message = {
356+ ...message ,
357+ content : stripInlineMediaFromSingleMessage ( message . content , ont ) ,
358+ } ;
359+ }
360+ if ( isPartsMessage ( message ) ) {
361+ // eslint-disable-next-line no-param-reassign
362+ message = {
363+ ...message ,
364+ parts : stripInlineMediaFromMessages ( message . parts , ont ) ,
365+ } ;
366+ }
367+ if ( isContentMedia ( message ) ) {
368+ // eslint-disable-next-line no-param-reassign
369+ message = stripInlineMediaFromSingleMessage ( message , ont ) ;
370+ }
371+ }
372+ return message ;
373+ } ) ;
374+ if ( stripCount ) onTruncate ( stripCount ) ;
375+ return stripped ;
376+ }
377+
224378/**
225379 * Truncate an array of messages to fit within a byte limit.
226380 *
@@ -240,12 +394,21 @@ function truncateSingleMessage(message: unknown, maxBytes: number): unknown[] {
240394 * // Returns [msg3, msg4] if they fit, or [msg4] if only it fits, etc.
241395 * ```
242396 */
243- export function truncateMessagesByBytes ( messages : unknown [ ] , maxBytes : number ) : unknown [ ] {
397+ function truncateMessagesByBytes (
398+ messages : unknown [ ] ,
399+ maxBytes : number ,
400+ onTruncate : ( count : number ) => void ,
401+ ) : unknown [ ] {
244402 // Early return for empty or invalid input
245403 if ( ! Array . isArray ( messages ) || messages . length === 0 ) {
246404 return messages ;
247405 }
248406
407+ // strip inline media first. This will often get us below the threshold,
408+ // while preserving human-readable information about messages sent.
409+ // eslint-disable-next-line no-param-reassign
410+ messages = stripInlineMediaFromMessages ( messages , onTruncate ) ;
411+
249412 // Fast path: if all messages fit, return as-is
250413 const totalBytes = jsonBytes ( messages ) ;
251414 if ( totalBytes <= maxBytes ) {
@@ -275,8 +438,14 @@ export function truncateMessagesByBytes(messages: unknown[], maxBytes: number):
275438
276439 // If no complete messages fit, try truncating just the newest message
277440 if ( startIndex === messages . length ) {
441+ // we're truncating down to one message, so all others dropped.
442+ if ( messages . length !== 1 ) onTruncate ( messages . length - 1 ) ;
278443 const newestMessage = messages [ messages . length - 1 ] ;
279- return truncateSingleMessage ( newestMessage , maxBytes ) ;
444+ return truncateSingleMessage ( newestMessage , maxBytes , onTruncate ) ;
445+ }
446+
447+ if ( startIndex !== 0 ) {
448+ onTruncate ( startIndex ) ;
280449 }
281450
282451 // Return the suffix that fits
@@ -292,7 +461,24 @@ export function truncateMessagesByBytes(messages: unknown[], maxBytes: number):
292461 * @returns Truncated array of messages
293462 */
294463export function truncateGenAiMessages ( messages : unknown [ ] ) : unknown [ ] {
295- return truncateMessagesByBytes ( messages , DEFAULT_GEN_AI_MESSAGES_BYTE_LIMIT ) ;
464+ const { record, send } = getOnTruncate ( ) ;
465+ const truncated = truncateMessagesByBytes ( messages , DEFAULT_GEN_AI_MESSAGES_BYTE_LIMIT , record ) ;
466+ send ( ) ;
467+ return truncated ;
468+ }
469+
470+ function getOnTruncate ( ) : { record : ( count : number ) => void ; send : ( ) => void } {
471+ let truncations = 0 ;
472+ return {
473+ record : ( count : number ) => {
474+ truncations += count ;
475+ } ,
476+ send : ( ) => {
477+ if ( ! truncations ) return ;
478+ const client = getClient ( ) ;
479+ client ?. recordDroppedEvent ( 'before_send' , 'attachment' , truncations ) ;
480+ } ,
481+ } ;
296482}
297483
298484/**
@@ -302,5 +488,8 @@ export function truncateGenAiMessages(messages: unknown[]): unknown[] {
302488 * @returns Truncated string
303489 */
304490export function truncateGenAiStringInput ( input : string ) : string {
305- return truncateTextByBytes ( input , DEFAULT_GEN_AI_MESSAGES_BYTE_LIMIT ) ;
491+ const { record, send } = getOnTruncate ( ) ;
492+ const truncated = truncateTextByBytes ( input , DEFAULT_GEN_AI_MESSAGES_BYTE_LIMIT , record ) ;
493+ send ( ) ;
494+ return truncated ;
306495}
0 commit comments