Skip to main content
This guide covers the essential operations for working with Kodexa Documents in TypeScript: creating, loading, manipulating, and saving documents.

Initialization

Always initialize the SDK before use:
import { Kodexa } from '@kodexa-ai/document-wasm-ts';

async function main() {
  // Initialize once at application start
  await Kodexa.init();

  // Your code here...
}

main();

Creating Documents

Empty Document

Create a new document and build its structure:
import { Kodexa } from '@kodexa-ai/document-wasm-ts';

async function createDocument() {
  await Kodexa.init();

  const doc = await Kodexa.createDocument();

  try {
    // Create root node
    const root = await doc.createNode('document');
    await root.setContent('My Document');

    // Add child nodes
    const section = await doc.createNode('section');
    await section.setContent('Introduction');
    await root.addChild(section);

    const para1 = await doc.createNode('paragraph');
    await para1.setContent('This is the first paragraph.');
    await section.addChild(para1);

    const para2 = await doc.createNode('paragraph');
    await para2.setContent('This is the second paragraph.');
    await section.addChild(para2);

    const children = await root.getChildren();
    console.log(`Created document with ${children.length} sections`);
  } finally {
    doc.dispose();
  }
}

From Text

Parse text into a document:
async function fromText() {
  await Kodexa.init();

  const text = 'First paragraph.\nSecond paragraph.\nThird paragraph.';
  const doc = await Kodexa.fromText(text);

  try {
    const paragraphs = await doc.select('//paragraph');
    console.log(`Created ${paragraphs.length} paragraphs from text`);
  } finally {
    doc.dispose();
  }
}

From JSON

Load from JSON representation:
async function fromJson() {
  await Kodexa.init();

  const jsonData = JSON.stringify({
    uuid: 'example-uuid',
    metadata: { title: 'Test Document' }
  });

  const doc = await Kodexa.fromJson(jsonData);

  try {
    console.log('Loaded from JSON');
  } finally {
    doc.dispose();
  }
}

Loading Documents

From Blob (Browser)

Load from a file upload or fetch response:
async function loadFromBlob() {
  await Kodexa.init();

  // From file input
  const fileInput = document.getElementById('file') as HTMLInputElement;
  const file = fileInput.files?.[0];

  if (file) {
    const doc = await Kodexa.fromBlob(file);

    try {
      const root = await doc.getRoot();
      console.log(`Loaded document with root type: ${root?.type}`);
    } finally {
      doc.dispose();
    }
  }
}

From Fetch Response

async function loadFromApi() {
  await Kodexa.init();

  const response = await fetch('https://api.example.com/documents/123');
  const blob = await response.blob();
  const doc = await Kodexa.fromBlob(blob);

  try {
    const nodes = await doc.select('//*');
    console.log(`Document has ${nodes.length} nodes`);
  } finally {
    doc.dispose();
  }
}

Working with Content Nodes

Traverse the document tree:
async function navigateTree() {
  await Kodexa.init();
  const doc = await Kodexa.fromText('Para 1\nPara 2\nPara 3');

  try {
    const root = await doc.getRoot();

    if (root) {
      // Get all children
      const children = await root.getChildren();

      for (const child of children) {
        // Access properties
        console.log(`Type: ${child.type}`);
        console.log(`Content: ${child.content}`);
        console.log(`Index: ${child.index}`);

        // Navigate relationships
        const parent = await child.getParent();
        const nextNode = await child.nextNode();
        const prevNode = await child.previousNode();
        const depth = await child.getDepth();

        const isFirst = await child.isFirstChild();
        const isLast = await child.isLastChild();
      }
    }
  } finally {
    doc.dispose();
  }
}

Content Access

Read and modify node content:
async function workWithContent() {
  await Kodexa.init();
  const doc = await Kodexa.createDocument();

  try {
    const node = await doc.createNode('paragraph');

    // Set content
    await node.setContent('Initial content');

    // Read content
    const content = await node.getContent();
    console.log(`Content: ${content}`);

    // Synchronous access (after first async call)
    console.log(`Direct access: ${node.content}`);
  } finally {
    doc.dispose();
  }
}

Querying with Selectors

Use XPath-like selectors to find nodes:
async function queryDocument() {
  await Kodexa.init();
  const doc = await Kodexa.fromText('Important note\nRegular text\nAnother note');

  try {
    // Select all nodes of a type
    const allParagraphs = await doc.select('//paragraph');
    console.log(`Found ${allParagraphs.length} paragraphs`);

    // Select first match
    const firstPara = await doc.selectFirst('//paragraph');
    if (firstPara) {
      console.log(`First paragraph: ${firstPara.content}`);
    }

    // Filter by content
    const noteParagraphs = await doc.select("//paragraph[contains(@content, 'note')]");
    console.log(`Found ${noteParagraphs.length} paragraphs with 'note'`);

    // Select by tag
    const tagged = await doc.select("//*[@tag='important']");
  } finally {
    doc.dispose();
  }
}

Common Selector Patterns

SelectorDescription
//*All nodes
//paragraphAll paragraphs
//section/paragraphDirect child paragraphs of sections
//paragraph[1]First paragraph
//*[@tag='important']Nodes with ‘important’ tag
//paragraph[contains(@content, 'text')]Paragraphs containing ‘text’

Adding Tags

Annotate nodes with tags:
async function tagNodes() {
  await Kodexa.init();
  const doc = await Kodexa.fromText('Invoice total: $500.00');

  try {
    const firstPara = await doc.selectFirst('//paragraph');

    if (firstPara) {
      // Simple tag
      await firstPara.tag('important');

      // Tag with options
      await firstPara.tagWithOptions('invoice-total', {
        confidence: 0.95,
        value: '500.00'
      });

      // Check for tags
      const hasTag = await firstPara.hasTag('important');
      console.log(`Has 'important' tag: ${hasTag}`);

      // Get all tags
      const tags = await firstPara.getTags();
      console.log(`Tags: ${tags.join(', ')}`);

      // Remove a tag
      await firstPara.removeTag('important');
    }
  } finally {
    doc.dispose();
  }
}

Adding Features

Attach metadata to nodes:
async function addFeatures() {
  await Kodexa.init();
  const doc = await Kodexa.createDocument();

  try {
    const node = await doc.createNode('paragraph');
    await node.setContent('Styled text');

    // Add features (type, name, value)
    await node.setFeature('style', 'font-family', 'Arial');
    await node.setFeature('style', 'font-size', '12pt');
    await node.setFeature('analysis', 'word-count', 2);
    await node.setFeature('position', 'bbox', { x: 100, y: 200, w: 300, h: 50 });

    // Retrieve features
    const fontFeature = await node.getFeature('style', 'font-family');
    if (fontFeature) {
      console.log(`Font: ${await node.getFeatureValue('style', 'font-family')}`);
    }

    // Check for feature
    const hasFeature = await node.hasFeature('style', 'font-size');
    console.log(`Has font-size: ${hasFeature}`);

    // Get all features
    const allFeatures = await node.getFeatures();
    console.log(`Total features: ${allFeatures.length}`);

    // Get features by type
    const styleFeatures = await node.getFeaturesOfType('style');
    console.log(`Style features: ${styleFeatures.length}`);
  } finally {
    doc.dispose();
  }
}

Spatial Data

Work with bounding boxes and rotation:
async function spatialData() {
  await Kodexa.init();
  const doc = await Kodexa.createDocument();

  try {
    const node = await doc.createNode('paragraph');

    // Set bounding box (x, y, width, height)
    await node.setBBox(100, 200, 300, 50);

    // Get bounding box
    const bbox = await node.getBBox();
    if (bbox) {
      console.log(`Position: (${bbox.x}, ${bbox.y})`);
      console.log(`Size: ${bbox.width} x ${bbox.height}`);
    }

    // Set rotation
    await node.setRotate(90);
  } finally {
    doc.dispose();
  }
}

Document Metadata

Work with document-level metadata:
async function documentMetadata() {
  await Kodexa.init();
  const doc = await Kodexa.createDocument();

  try {
    // Set metadata
    await doc.setMetadataValue('title', 'My Document');
    await doc.setMetadataValue('author', 'Jane Doe');
    await doc.setMetadataValue('tags', ['invoice', '2024']);

    // Get metadata
    const title = await doc.getMetadataValue('title');
    console.log(`Title: ${title}`);

    // Get all metadata
    const allMetadata = await doc.getMetadata();
    console.log('All metadata:', allMetadata);
  } finally {
    doc.dispose();
  }
}

Saving Documents

To JSON

Export for debugging or API responses:
async function saveToJson() {
  await Kodexa.init();
  const doc = await Kodexa.fromText('Content to save');

  try {
    const json = await doc.toJson();
    console.log(json);

    // Send to server
    await fetch('/api/documents', {
      method: 'POST',
      headers: { 'Content-Type': 'application/json' },
      body: json
    });
  } finally {
    doc.dispose();
  }
}

To Blob

Export as KDDB binary format:
async function saveToBlob() {
  await Kodexa.init();
  const doc = await Kodexa.fromText('Content for download');

  try {
    const blob = await doc.toBlob();

    // Download in browser
    const url = URL.createObjectURL(blob);
    const a = document.createElement('a');
    a.href = url;
    a.download = 'document.kddb';
    a.click();
    URL.revokeObjectURL(url);
  } finally {
    doc.dispose();
  }
}

Memory Management Best Practices

Proper memory management is critical in WebAssembly applications. Failure to dispose documents will cause memory leaks.

Try-Finally Pattern

async function safeDocumentHandling() {
  await Kodexa.init();
  const doc = await Kodexa.createDocument();

  try {
    // All document operations here
    const root = await doc.createNode('document');
    await root.setContent('Safe content');
    // ...
  } finally {
    // Always dispose, even if an error occurred
    doc.dispose();
  }
}

Multiple Documents

async function handleMultipleDocuments() {
  await Kodexa.init();

  const docs: KddbDocument[] = [];

  try {
    for (let i = 0; i < 10; i++) {
      const doc = await Kodexa.fromText(`Document ${i}`);
      docs.push(doc);
    }

    // Process documents...
    for (const doc of docs) {
      const root = await doc.getRoot();
      console.log(root?.content);
    }
  } finally {
    // Dispose all documents
    for (const doc of docs) {
      doc.dispose();
    }
  }
}

Application Cleanup

// Browser: cleanup on page unload
window.addEventListener('beforeunload', () => {
  Kodexa.cleanup();
});

// Node.js: cleanup on process exit
process.on('exit', () => {
  Kodexa.cleanup();
});

// Express/Koa: cleanup on server shutdown
process.on('SIGTERM', () => {
  Kodexa.cleanup();
  process.exit(0);
});

Error Handling

Handle errors gracefully:
async function handleErrors() {
  try {
    await Kodexa.init();
  } catch (error) {
    console.error('Failed to initialize WASM:', error);
    return;
  }

  let doc;
  try {
    doc = await Kodexa.fromBlob(someBlob);
    const root = await doc.getRoot();
    // ...
  } catch (error) {
    if (error instanceof Error) {
      console.error('Document error:', error.message);
    }
  } finally {
    doc?.dispose();
  }
}

Complete Example

Here’s a full workflow combining the concepts:
import { Kodexa, KddbDocument } from '@kodexa-ai/document-wasm-ts';

async function processInvoice() {
  await Kodexa.init();

  const doc = await Kodexa.createDocument();

  try {
    // Set document metadata
    await doc.setMetadataValue('title', 'Invoice Processing Result');
    await doc.setMetadataValue('processor', 'kodexa-document-example');

    // Build document structure
    const root = await doc.createNode('document');
    await root.setContent('Invoice #12345');

    // Add header section
    const header = await doc.createNode('section');
    await header.setContent('Header');
    await root.addChild(header);

    const vendor = await doc.createNode('paragraph');
    await vendor.setContent('Vendor: Acme Corp');
    await header.addChild(vendor);

    const date = await doc.createNode('paragraph');
    await date.setContent('Date: 2024-01-15');
    await header.addChild(date);

    // Add line items
    const items = await doc.createNode('section');
    await items.setContent('Line Items');
    await root.addChild(items);

    const lineItems = [
      { desc: 'Widget A', amount: 100.00 },
      { desc: 'Widget B', amount: 250.00 },
      { desc: 'Service Fee', amount: 50.00 }
    ];

    for (let i = 0; i < lineItems.length; i++) {
      const item = await doc.createNode('paragraph');
      await item.setContent(`${lineItems[i].desc}: $${lineItems[i].amount.toFixed(2)}`);
      await items.addChild(item);

      await item.setFeature('line-item', 'amount', lineItems[i].amount);
      await item.setFeature('line-item', 'index', i);
      await item.tagWithOptions('line-item', { value: String(lineItems[i].amount) });
    }

    // Add total
    const total = await doc.createNode('paragraph');
    await total.setContent('Total: $400.00');
    await root.addChild(total);

    await total.tagWithOptions('invoice-total', {
      confidence: 1.0,
      value: '400.00'
    });
    await total.setFeature('summary', 'calculated', true);

    // Query the document
    const taggedItems = await doc.select("//*[@tag='line-item']");
    console.log(`Found ${taggedItems.length} line items`);

    const totalNode = await doc.selectFirst("//*[@tag='invoice-total']");
    if (totalNode) {
      console.log(`Invoice total: ${totalNode.content}`);
    }

    // Export
    const json = await doc.toJson();
    console.log('Document JSON:', json);

    console.log('Document processed successfully');

  } finally {
    doc.dispose();
  }
}

processInvoice().catch(console.error);

Next Steps

  • Explore advanced selectors for complex queries
  • Learn about Web Workers for background processing
  • Integrate with frontend frameworks (React, Vue, Angular)