diff options
| author | Dhravya Shah <[email protected]> | 2025-01-23 19:05:33 -0700 |
|---|---|---|
| committer | Dhravya Shah <[email protected]> | 2025-01-23 19:05:33 -0700 |
| commit | 12b26382094c0faf27bfb94ec50564e966a22993 (patch) | |
| tree | 1841c3a6d59dd662e3c065422fe01ba4937008be /packages/db/schema.ts | |
| parent | import tools: CSV and markdown (obsidian) (diff) | |
| download | supermemory-chunking-and-retrieval.tar.xz supermemory-chunking-and-retrieval.zip | |
De duplication and updating chunkschunking-and-retrieval
Diffstat (limited to 'packages/db/schema.ts')
| -rw-r--r-- | packages/db/schema.ts | 39 |
1 files changed, 36 insertions, 3 deletions
diff --git a/packages/db/schema.ts b/packages/db/schema.ts index a2f24b06..8ffa8992 100644 --- a/packages/db/schema.ts +++ b/packages/db/schema.ts @@ -146,6 +146,30 @@ export const documentType = pgTable("document_type", { type: text("type").primaryKey(), }); +export const processedContent = pgTable( + "processed_content", + { + id: serial("id").primaryKey(), + contentHash: text("content_hash").notNull().unique(), + content: text("content").notNull(), + isSuccessfullyProcessed: boolean("is_successfully_processed").default( + false + ), + errorMessage: text("error_message"), + createdAt: timestamp("created_at", { withTimezone: true }) + .notNull() + .defaultNow(), + updatedAt: timestamp("updated_at", { withTimezone: true }) + .notNull() + .defaultNow(), + }, + (table) => ({ + contentHashIdx: uniqueIndex("processed_content_hash_idx").on( + table.contentHash + ), + }) +); + export const documents = pgTable( "documents", { @@ -167,11 +191,11 @@ export const documents = pgTable( .notNull() .references(() => users.id, { onDelete: "cascade" }), content: text("content"), + contentHash: text("content_hash"), isSuccessfullyProcessed: boolean("is_successfully_processed").default( false ), errorMessage: text("error_message"), - contentHash: text("content_hash"), }, (document) => ({ documentsIdIdx: uniqueIndex("document_id_idx").on(document.id), @@ -181,6 +205,13 @@ export const documents = pgTable( document.raw, document.userId ), + documentContentHashIdx: index("document_content_hash_idx").on( + document.contentHash + ), + documentUrlUserIdx: uniqueIndex("document_url_user_idx").on( + document.url, + document.userId + ), }) ); @@ -213,7 +244,8 @@ export const chunk = pgTable( documentId: integer("document_id") .references(() => documents.id, { onDelete: "cascade" }) .notNull(), - textContent: text("text_content"), + textContent: text("text_content").notNull(), + contentHash: text("content_hash"), orderInDocument: integer("order_in_document").notNull(), embeddings: vector("embeddings", { dimensions: 1536 }), metadata: jsonb("metadata").$type<Metadata>(), @@ -222,11 +254,12 @@ export const chunk = pgTable( .defaultNow(), updated_at: timestamp("updated_at", { withTimezone: true }) .notNull() - .defaultNow(), // handle deletion on application layer + .defaultNow(), }, (chunk) => ({ chunkIdIdx: uniqueIndex("chunk_id_idx").on(chunk.id), chunkDocumentIdIdx: index("chunk_document_id_idx").on(chunk.documentId), + chunkContentHashIdx: index("chunk_content_hash_idx").on(chunk.contentHash), embeddingIndex: index("embeddingIndex").using( "hnsw", chunk.embeddings.op("vector_cosine_ops") |