memodb-io · gusye1234 · Dec 11, 2025 · Dec 9, 2025 · Dec 9, 2025 · Dec 9, 2025
diff --git a/docs/api-reference/openapi.json b/docs/api-reference/openapi.json
@@ -788,7 +788,7 @@
             "type" : "string"
           }
         }, {
-          "description" : "Limit of messages to return. Max 200. If limit is 0 or not provided, all messages will be returned.",
+          "description" : "Limit of messages to return. Max 200. If limit is 0 or not provided, all messages will be returned. \n\nWARNING!\n Use `limit` only for read-only/display purposes (pagination, viewing). Do NOT use `limit` to truncate messages before sending to LLM as it may cause tool-call and tool-result unpairing issues. Instead, use the `token_limit` edit strategy in `edit_strategies` parameter to safely manage message context size.",
           "in" : "query",
           "name" : "limit",
           "schema" : {

diff --git a/docs/store/editing.mdx b/docs/store/editing.mdx
@@ -55,7 +55,7 @@ Please do not use the token count to calculate the cost of LLM, as the actual to
 ## Context Editing On-the-fly
 Acontext supports to edit the session context when you obtain the current messages.
 
-The basic usage is to pass the `edit_strategy` and `edit_params` to the `get_messages` method to get the edited session messages without modifying the original session storage:
+The basic usage is to pass the `edit_strategies` to the `get_messages` method to get the edited session messages without modifying the original session storage:
 
 <CodeGroup>
 ```python Python
@@ -99,6 +99,46 @@ const originalSession = await client.sessions.getMessages('session-uuid');
 ```
 </CodeGroup>
 
+### Token Limit
+This strategy truncates messages based on token count, removing the oldest messages until the total token count is within the specified limit. 
+
+It's useful for managing context window limits and ensuring your session stays within model constraints.
+
+It will:
+- Removes messages from oldest to newest
+- Maintains tool-call/tool-result pairing (when removing a tool-call, its corresponding tool-result is also removed)
+
+<CodeGroup>
+```python Python
+# Limit session to 20,000 tokens
+edited_session = client.sessions.get_messages(
+  session_id="session-uuid",
+  edit_strategies=[
+    {
+      "type": "token_limit",
+      "params": {
+        "limit_tokens": 20000
+      }
+    }
+  ],
+)
+```
+```typescript TypeScript
+// Limit session to 20,000 tokens
+const editedSession = await client.sessions.getMessages('session-uuid', {
+  editStrategies: [
+    {
+      type: 'token_limit' as const,
+      params: {
+        limit_tokens: 20000
+      }
+    }
+  ],
+});
+```
+</CodeGroup>
+
+
 ### Remove Tool Result
 This strategy will replace the oldest tool results' content with a placeholder text to reduce the session context, while keeping the most recent N tool results intact.
 
@@ -153,6 +193,8 @@ const editedSession = await client.sessions.getMessages('session-uuid', {
 ```
 </CodeGroup>
 
+
+
 ## Context Engineering and Editing
 
 Context Engineering is an emerging discipline focused on designing, managing, and optimizing the information provided to large language models (LLMs) and AI agents to enhance their performance, reliability, and consistency. 

diff --git a/docs/store/messages/multi-modal.mdx b/docs/store/messages/multi-modal.mdx
@@ -558,7 +558,6 @@ client = AcontextClient(
 result = client.sessions.get_messages(
     session_id="session_uuid",
     format="anthropic",  # or "openai"
-    limit=50
 )
 
 print(f"Retrieved {len(result.items)} messages")

diff --git a/docs/store/messages/multi-provider.mdx b/docs/store/messages/multi-provider.mdx
@@ -223,7 +223,7 @@ Each message receives a unique ID upon creation. You can use these IDs to refere
 
 ### Get all messages from a session
 
-Retrieve messages from a session with pagination support:
+Retrieve messages from a session:
 
 <CodeGroup>
 ```python Python
@@ -237,22 +237,12 @@ client = AcontextClient(
 # Get messages from a session
 result = client.sessions.get_messages(
     session_id="session_uuid",
-    limit=50,
-    format="openai",
-    time_desc=True  # Most recent first
+    format="openai"
 )
 
 print(f"Retrieved {len(result.items)} messages")
 for msg in result.items:
     print(f"- {msg.role}: {msg.content[:50]}...")
-
-# Handle pagination if there are more messages
-if result.next_cursor:
-    next_page = client.sessions.get_messages(
-        session_id="session_uuid",
-        cursor=result.next_cursor,
-        limit=50
-    )
 ```
 
 ```typescript TypeScript
@@ -265,23 +255,13 @@ const client = new AcontextClient({
 
 // Get messages from a session
 const result = await client.sessions.getMessages('session_uuid', {
-  limit: 50,
-  format: 'openai',
-  timeDesc: true  // Most recent first
+  format: 'openai'
 });
 
 console.log(`Retrieved ${result.items.length} messages`);
 result.items.forEach(msg => {
   console.log(`- ${msg.role}: ${msg.content.substring(0, 50)}...`);
 });
-
-// Handle pagination if there are more messages
-if (result.nextCursor) {
-  const nextPage = await client.sessions.getMessages('session_uuid', {
-    cursor: result.nextCursor,
-    limit: 50
-  });
-}
 ```
 </CodeGroup>
 
@@ -341,8 +321,7 @@ try:
     # 3. Retrieve messages later
     result = client.sessions.get_messages(
         session_id=session.id,
-        format="openai",
-        time_desc=False  # Chronological order
+        format="openai"
     )
 
     print(f"\nRetrieved conversation ({len(result.items)} messages):")
@@ -386,8 +365,7 @@ async function storeAndRetrieveConversation() {
 
     // 3. Retrieve messages later
     const result = await client.sessions.getMessages(session.id, {
-      format: 'openai',
-      timeDesc: false  // Chronological order
+      format: 'openai'
     });
 
     console.log(`\nRetrieved conversation (${result.items.length} messages):`);
@@ -404,45 +382,6 @@ storeAndRetrieveConversation();
 ```
 </CodeGroup>
 
-## Pagination and limits
-
-When retrieving large message histories, use pagination to efficiently process results:
-
-<Steps>
-<Step title="Set an appropriate limit">
-Start with a reasonable page size (e.g., 50-100 messages) based on your use case.
-
-```python
-result = client.sessions.get_messages(session_id="session_uuid", limit=50)
-```
-</Step>
-
-<Step title="Check for more pages">
-Look for the `next_cursor` field in the response to determine if more messages exist.
-
-```python
-if result.next_cursor:
-    print("More messages available")
-```
-</Step>
-
-<Step title="Fetch subsequent pages">
-Use the cursor to retrieve the next page of results.
-
-```python
-next_page = client.sessions.get_messages(
-    session_id="session_uuid",
-    cursor=result.next_cursor,
-    limit=50
-)
-```
-</Step>
-</Steps>
-
-<Note>
-The maximum limit per request is typically 100 messages. Check your plan's specific limits in the dashboard.
-</Note>
-
 ## Managing sessions
 
 ### Delete a session
@@ -543,13 +482,6 @@ for (const session of sessions.items.slice(10)) {
 - Use **Anthropic format** if you're primarily using Claude models
 - You can convert between formats when retrieving messages
 </Accordion>
-
-<Accordion title="Handle pagination efficiently">
-- Set appropriate page sizes (50-100 messages typically works well)
-- Cache results when possible to reduce API calls
-- Use `time_desc=True` to get most recent messages first
-- Process pages asynchronously for better performance with large histories
-</Accordion>
 </AccordionGroup>
 
 ## Next steps

diff --git a/src/client/acontext-py/src/acontext/resources/async_sessions.py b/src/client/acontext-py/src/acontext/resources/async_sessions.py
@@ -278,7 +278,9 @@ async def get_messages(
             time_desc: Order by created_at descending if True, ascending if False. Defaults to None.
             edit_strategies: Optional list of edit strategies to apply before format conversion.
                 Each strategy is a dict with 'type' and 'params' keys.
-                Example: [{"type": "remove_tool_result", "params": {"keep_recent_n_tool_results": 3}}]
+                Examples:
+                    - Remove tool results: [{"type": "remove_tool_result", "params": {"keep_recent_n_tool_results": 3}}]
+                    - Token limit: [{"type": "token_limit", "params": {"limit_tokens": 20000}}]
                 Defaults to None.
 
         Returns:

diff --git a/src/client/acontext-py/src/acontext/resources/sessions.py b/src/client/acontext-py/src/acontext/resources/sessions.py
@@ -278,7 +278,9 @@ def get_messages(
             time_desc: Order by created_at descending if True, ascending if False. Defaults to None.
             edit_strategies: Optional list of edit strategies to apply before format conversion.
                 Each strategy is a dict with 'type' and 'params' keys.
-                Example: [{"type": "remove_tool_result", "params": {"keep_recent_n_tool_results": 3}}]
+                Examples:
+                    - Remove tool results: [{"type": "remove_tool_result", "params": {"keep_recent_n_tool_results": 3}}]
+                    - Token limit: [{"type": "token_limit", "params": {"limit_tokens": 20000}}]
                 Defaults to None.
 
         Returns:

diff --git a/src/client/acontext-py/src/acontext/types/session.py b/src/client/acontext-py/src/acontext/types/session.py
@@ -30,9 +30,36 @@ class RemoveToolResultStrategy(TypedDict):
     params: RemoveToolResultParams
 
 
+class TokenLimitParams(TypedDict):
+    """Parameters for the token_limit edit strategy.
+
+    Attributes:
+        limit_tokens: Maximum number of tokens to keep. Required parameter.
+            Messages will be removed from oldest to newest until total tokens <= limit_tokens.
+            Tool-call and tool-result pairs are always removed together.
+    """
+
+    limit_tokens: int
+
+
+class TokenLimitStrategy(TypedDict):
+    """Edit strategy to truncate messages based on token count.
+
+    Removes oldest messages until the total token count is within the specified limit.
+    Maintains tool-call/tool-result pairing - when removing a message with tool-calls,
+    the corresponding tool-result messages are also removed.
+
+    Example:
+        {"type": "token_limit", "params": {"limit_tokens": 20000}}
+    """
+
+    type: Literal["token_limit"]
+    params: TokenLimitParams
+
+
 # Union type for all edit strategies
 # When adding new strategies, add them to this Union: EditStrategy = Union[RemoveToolResultStrategy, OtherStrategy, ...]
-EditStrategy = Union[RemoveToolResultStrategy]
+EditStrategy = Union[RemoveToolResultStrategy, TokenLimitStrategy]
 
 
 class Asset(BaseModel):

diff --git a/src/client/acontext-ts/src/resources/sessions.ts b/src/client/acontext-ts/src/resources/sessions.ts
@@ -179,6 +179,22 @@ export class SessionsAPI {
     }
   }
 
+  /**
+   * Get messages for a session.
+   *
+   * @param sessionId - The UUID of the session.
+   * @param options - Options for retrieving messages.
+   * @param options.limit - Maximum number of messages to return.
+   * @param options.cursor - Cursor for pagination.
+   * @param options.withAssetPublicUrl - Whether to include presigned URLs for assets.
+   * @param options.format - The format of the messages ('acontext', 'openai', or 'anthropic').
+   * @param options.timeDesc - Order by created_at descending if true, ascending if false.
+   * @param options.editStrategies - Optional list of edit strategies to apply before format conversion.
+   *   Examples:
+   *   - Remove tool results: [{ type: 'remove_tool_result', params: { keep_recent_n_tool_results: 3 } }]
+   *   - Token limit: [{ type: 'token_limit', params: { limit_tokens: 20000 } }]
+   * @returns GetMessagesOutput containing the list of messages and pagination information.
+   */
   async getMessages(
     sessionId: string,
     options?: {

diff --git a/src/client/acontext-ts/src/types/session.ts b/src/client/acontext-ts/src/types/session.ts
@@ -142,11 +142,44 @@ export const RemoveToolResultStrategySchema = z.object({
 
 export type RemoveToolResultStrategy = z.infer<typeof RemoveToolResultStrategySchema>;
 
+/**
+ * Parameters for the token_limit edit strategy.
+ */
+export const TokenLimitParamsSchema = z.object({
+  /**
+   * Maximum number of tokens to keep. Required parameter.
+   * Messages will be removed from oldest to newest until total tokens <= limit_tokens.
+   * Tool-call and tool-result pairs are always removed together.
+   */
+  limit_tokens: z.number(),
+});
+
+export type TokenLimitParams = z.infer<typeof TokenLimitParamsSchema>;
+
+/**
+ * Edit strategy to truncate messages based on token count.
+ * 
+ * Removes oldest messages until the total token count is within the specified limit.
+ * Maintains tool-call/tool-result pairing - when removing a message with tool-calls,
+ * the corresponding tool-result messages are also removed.
+ * 
+ * Example: { type: 'token_limit', params: { limit_tokens: 20000 } }
+ */
+export const TokenLimitStrategySchema = z.object({
+  type: z.literal('token_limit'),
+  params: TokenLimitParamsSchema,
+});
+
+export type TokenLimitStrategy = z.infer<typeof TokenLimitStrategySchema>;
+
 /**
  * Union schema for all edit strategies.
  * When adding new strategies, extend this union: z.union([RemoveToolResultStrategySchema, OtherStrategySchema, ...])
  */
-export const EditStrategySchema = z.union([RemoveToolResultStrategySchema]);
+export const EditStrategySchema = z.union([
+  RemoveToolResultStrategySchema,
+  TokenLimitStrategySchema,
+]);
 
 export type EditStrategy = z.infer<typeof EditStrategySchema>;
 
diff --git a/src/server/api/go/docs/docs.go b/src/server/api/go/docs/docs.go
@@ -1086,7 +1086,7 @@ const docTemplate = `{
                     },
                     {
                         "type": "integer",
-                        "description": "Limit of messages to return. Max 200. If limit is 0 or not provided, all messages will be returned.",
+                        "description": "Limit of messages to return. Max 200. If limit is 0 or not provided, all messages will be returned. \n\nWARNING!\n Use ` + "`" + `limit` + "`" + ` only for read-only/display purposes (pagination, viewing). Do NOT use ` + "`" + `limit` + "`" + ` to truncate messages before sending to LLM as it may cause tool-call and tool-result unpairing issues. Instead, use the ` + "`" + `token_limit` + "`" + ` edit strategy in ` + "`" + `edit_strategies` + "`" + ` parameter to safely manage message context size.",
                         "name": "limit",
                         "in": "query"
                     },

diff --git a/src/server/api/go/docs/swagger.json b/src/server/api/go/docs/swagger.json
@@ -1083,7 +1083,7 @@
                     },
                     {
                         "type": "integer",
-                        "description": "Limit of messages to return. Max 200. If limit is 0 or not provided, all messages will be returned.",
+                        "description": "Limit of messages to return. Max 200. If limit is 0 or not provided, all messages will be returned. \n\nWARNING!\n Use `limit` only for read-only/display purposes (pagination, viewing). Do NOT use `limit` to truncate messages before sending to LLM as it may cause tool-call and tool-result unpairing issues. Instead, use the `token_limit` edit strategy in `edit_strategies` parameter to safely manage message context size.",
                         "name": "limit",
                         "in": "query"
                     },

diff --git a/src/server/api/go/docs/swagger.yaml b/src/server/api/go/docs/swagger.yaml
@@ -1399,8 +1399,12 @@ paths:
         name: session_id
         required: true
         type: string
-      - description: Limit of messages to return. Max 200. If limit is 0 or not provided,
-          all messages will be returned.
+      - description: "Limit of messages to return. Max 200. If limit is 0 or not provided,
+          all messages will be returned. \n\nWARNING!\n Use `limit` only for read-only/display
+          purposes (pagination, viewing). Do NOT use `limit` to truncate messages
+          before sending to LLM as it may cause tool-call and tool-result unpairing
+          issues. Instead, use the `token_limit` edit strategy in `edit_strategies`
+          parameter to safely manage message context size."
         in: query
         name: limit
         type: integer

diff --git a/src/server/api/go/internal/modules/handler/session.go b/src/server/api/go/internal/modules/handler/session.go
@@ -480,7 +480,7 @@ type GetMessagesReq struct {
 //	@Accept			json
 //	@Produce		json
 //	@Param			session_id				path	string	true	"Session ID"	format(uuid)
-//	@Param			limit					query	integer	false	"Limit of messages to return. Max 200. If limit is 0 or not provided, all messages will be returned."
+//	@Param			limit					query	integer	false	"Limit of messages to return. Max 200. If limit is 0 or not provided, all messages will be returned. \n\nWARNING!\n Use `limit` only for read-only/display purposes (pagination, viewing). Do NOT use `limit` to truncate messages before sending to LLM as it may cause tool-call and tool-result unpairing issues. Instead, use the `token_limit` edit strategy in `edit_strategies` parameter to safely manage message context size."
 //	@Param			cursor					query	string	false	"Cursor for pagination. Use the cursor from the previous response to get the next page."
 //	@Param			with_asset_public_url	query	string	false	"Whether to return asset public url, default is true"								example(true)
 //	@Param			format					query	string	false	"Format to convert messages to: acontext (original), openai (default), anthropic."	enums(acontext,openai,anthropic)