diff options
| author | Dhravya Shah <[email protected]> | 2025-09-28 16:42:06 -0700 |
|---|---|---|
| committer | Dhravya Shah <[email protected]> | 2025-09-28 16:42:06 -0700 |
| commit | 2093b316d9ecb9cfa9c550f436caee08e12f5d11 (patch) | |
| tree | 07b87fbd48b0b38ef26b9d5f839ad8cd61d82331 /apps/docs/memory-api/track-progress.mdx | |
| parent | Merge branch 'main' of https://github.com/supermemoryai/supermemory (diff) | |
| download | supermemory-2093b316d9ecb9cfa9c550f436caee08e12f5d11.tar.xz supermemory-2093b316d9ecb9cfa9c550f436caee08e12f5d11.zip | |
migrate docs to public
Diffstat (limited to 'apps/docs/memory-api/track-progress.mdx')
| -rw-r--r-- | apps/docs/memory-api/track-progress.mdx | 274 |
1 files changed, 274 insertions, 0 deletions
diff --git a/apps/docs/memory-api/track-progress.mdx b/apps/docs/memory-api/track-progress.mdx new file mode 100644 index 00000000..60126dab --- /dev/null +++ b/apps/docs/memory-api/track-progress.mdx @@ -0,0 +1,274 @@ +--- +title: "Track Processing Status" +description: "Monitor document processing status in real-time" +icon: "activity" +--- + +Track your documents through the processing pipeline to provide better user experiences and handle edge cases. + +## Processing Pipeline + + + +Each stage serves a specific purpose: + +- **Queued**: Document is waiting in the processing queue +- **Extracting**: Content is being extracted (OCR for images, transcription for videos) +- **Chunking**: Content is broken into optimal, searchable pieces +- **Embedding**: Each chunk is converted to vector representations +- **Indexing**: Vectors are added to the search index +- **Done**: Document is fully processed and searchable + +<Note> +Processing time varies by content type. Plain text processes in seconds, while a 10-minute video might take 2-3 minutes. +</Note> + +## Processing Documents + +Monitor all documents currently being processed across your account. + +`GET /v3/documents/processing` + +<CodeGroup> + +```typescript +// Direct API call (not in SDK) +const response = await fetch('https://api.supermemory.ai/v3/documents/processing', { + headers: { + 'Authorization': `Bearer ${SUPERMEMORY_API_KEY}` + } +}); + +const processing = await response.json(); +console.log(`${processing.documents.length} documents processing`); +``` + +```python +# Direct API call (not in SDK) +import requests + +response = requests.get( + 'https://api.supermemory.ai/v3/documents/processing', + headers={'Authorization': f'Bearer {SUPERMEMORY_API_KEY}'} +) + +processing = response.json() +print(f"{len(processing['documents'])} documents processing") +``` + +```bash +curl -X GET "https://api.supermemory.ai/v3/documents/processing" \ + -H "Authorization: Bearer $SUPERMEMORY_API_KEY" +``` + +</CodeGroup> + +### Response Format + +```json +{ + "documents": [ + { + "id": "doc_abc123", + "status": "extracting", + "created_at": "2024-01-15T10:30:00Z", + "updated_at": "2024-01-15T10:30:15Z", + "container_tags": ["research"], + "metadata": { + "source": "upload", + "filename": "report.pdf" + } + }, + { + "id": "doc_def456", + "status": "chunking", + "created_at": "2024-01-15T10:29:00Z", + "updated_at": "2024-01-15T10:30:00Z", + "container_tags": ["articles"], + "metadata": { + "source": "url", + "url": "https://example.com/article" + } + } + ], + "total": 2 +} +``` + +## Individual Documents + +Track specific document processing status. + +`GET /v3/documents/{id}` + +<CodeGroup> + +```typescript +const memory = await client.memories.get("doc_abc123"); + +console.log(`Status: ${memory.status}`); + +// Poll for completion +while (memory.status !== 'done') { + await new Promise(r => setTimeout(r, 2000)); + memory = await client.memories.get("doc_abc123"); + console.log(`Status: ${memory.status}`); +} +``` + +```python +memory = client.memories.get("doc_abc123") + +print(f"Status: {memory['status']}") + +# Poll for completion +import time +while memory['status'] != 'done': + time.sleep(2) + memory = client.memories.get("doc_abc123") + print(f"Status: {memory['status']}") +``` + +```bash +curl -X GET "https://api.supermemory.ai/v3/documents/doc_abc123" \ + -H "Authorization: Bearer $SUPERMEMORY_API_KEY" +``` + +</CodeGroup> + +### Response Format + +```json +{ + "id": "doc_abc123", + "status": "done", + "content": "The original content...", + "container_tags": ["research"], + "metadata": { + "source": "upload", + "filename": "report.pdf" + }, + "created_at": "2024-01-15T10:30:00Z", + "updated_at": "2024-01-15T10:31:00Z" +} +``` + +## Status Values + +| Status | Description | Typical Duration | +|--------|-------------|------------------| +| `queued` | Waiting to be processed | < 5 seconds | +| `extracting` | Extracting content from source | 5-30 seconds | +| `chunking` | Breaking into searchable pieces | 5-15 seconds | +| `embedding` | Creating vector representations | 10-30 seconds | +| `indexing` | Adding to search index | 5-10 seconds | +| `done` | Fully processed and searchable | - | +| `failed` | Processing failed | - | + +## Polling Best Practices + +When polling for status updates: + +```typescript +async function waitForProcessing(documentId: string, maxWaitMs = 300000) { + const startTime = Date.now(); + const pollInterval = 2000; // 2 seconds + + while (Date.now() - startTime < maxWaitMs) { + const doc = await client.memories.get(documentId); + + if (doc.status === 'done') { + return doc; + } + + if (doc.status === 'failed') { + throw new Error(`Processing failed for ${documentId}`); + } + + await new Promise(r => setTimeout(r, pollInterval)); + } + + throw new Error(`Timeout waiting for ${documentId}`); +} +``` + +## Batch Processing + +For multiple documents, track them efficiently: + +```typescript +async function trackBatch(documentIds: string[]) { + const statuses = new Map(); + + // Initial check + for (const id of documentIds) { + const doc = await client.memories.get(id); + statuses.set(id, doc.status); + } + + // Poll until all done + while ([...statuses.values()].some(s => s !== 'done' && s !== 'failed')) { + await new Promise(r => setTimeout(r, 5000)); // 5 second interval for batch + + for (const id of documentIds) { + if (statuses.get(id) !== 'done' && statuses.get(id) !== 'failed') { + const doc = await client.memories.get(id); + statuses.set(id, doc.status); + } + } + + // Log progress + const done = [...statuses.values()].filter(s => s === 'done').length; + console.log(`Progress: ${done}/${documentIds.length} complete`); + } + + return statuses; +} +``` + +## Error Handling + +Handle processing failures gracefully: + +```typescript +async function addWithRetry(content: string, maxRetries = 3) { + for (let attempt = 1; attempt <= maxRetries; attempt++) { + const { id } = await client.memories.add({ content }); + + try { + const result = await waitForProcessing(id); + return result; + } catch (error) { + console.error(`Attempt ${attempt} failed:`, error); + + if (attempt === maxRetries) { + throw error; + } + + // Exponential backoff + await new Promise(r => setTimeout(r, 1000 * Math.pow(2, attempt))); + } + } +} +``` + +## Processing Times by Content Type + +Documents: Created near instantly (200-500ms) + +Memories: Supermemory creates a memory graph understanding based on semantic analysis and contextual understanding. + +| Content Type | Memory Processing Time | Notes | +|--------------|------------------------|-------| +| Plain Text | 5-10 seconds | Fastest processing | +| Markdown | 5-10 seconds | Similar to plain text | +| PDF (< 10 pages) | 15-30 seconds | OCR if needed | +| PDF (> 100 pages) | 1-3 minutes | Depends on complexity | +| Images | 10-20 seconds | OCR processing | +| YouTube Videos | 1-2 min per 10 min video | Transcription required | +| Web Pages | 10-20 seconds | Content extraction | +| Google Docs | 10-15 seconds | API extraction | + +<Note> +**Pro Tip**: Use the processing status endpoint to provide real-time feedback to users, especially for larger documents or batch uploads. +</Note> |