from kodexa_document import Document# Always use context managers for automatic cleanupwith Document() as doc: # Create the root node root = doc.create_node("document", "My Document") doc.content_node = root # Add child nodes section = doc.create_node("section", "Introduction", parent=root) para1 = doc.create_node("paragraph", "This is the first paragraph.", parent=section) para2 = doc.create_node("paragraph", "This is the second paragraph.", parent=section) print(f"Created document with {len(root.get_children())} sections")
text = """First paragraph of content.Second paragraph with more details.Third paragraph to conclude."""with Document.from_text(text, separator="\n") as doc: paragraphs = doc.select("//paragraph") print(f"Created {len(paragraphs)} paragraphs from text")
# Load into memory for fast processing (creates a copy)with Document.from_kddb("document.kddb", detached=True) as doc: print(f"Loaded document: {doc.uuid}") nodes = doc.select("//*") print(f"Total nodes: {len(nodes)}")# Load for in-place editing (modifies original file)with Document.from_kddb("document.kddb", detached=False) as doc: # Changes are saved to the original file doc.set_metadata("last_accessed", "2024-01-15")
import requests# Example: Load from an API responseresponse = requests.get("https://api.example.com/documents/123")kddb_bytes = response.contentwith Document.from_kddb(kddb_bytes) as doc: print(f"Loaded document from API: {doc.uuid}")
with Document.from_kddb("document.kddb") as doc: root = doc.content_node # Get all children children = root.get_children() # Navigate relationships for child in children: parent = child.get_parent() # Back to root siblings = child.get_siblings() # Other children next_node = child.next_node() # Next sibling depth = child.get_depth() # Depth in tree print(f"Node type: {child.type}, depth: {depth}")
with Document() as doc: root = doc.create_node("document") doc.content_node = root para = doc.create_node("paragraph", "Styled text", parent=root) # Add features (type, name, value) para.add_feature("style", "font-family", "Arial") para.add_feature("style", "font-size", "12pt") para.add_feature("analysis", "word-count", 2) para.add_feature("position", "bbox", {"x": 100, "y": 200, "w": 300, "h": 50}) # Retrieve features font = para.get_feature("style", "font-family") if font: print(f"Font: {font.get_value()}") # Get all features of a type style_features = para.get_features_of_type("style") # Get all features all_features = para.get_features()
with Document() as doc: root = doc.create_node("document") doc.content_node = root para = doc.create_node("paragraph", "Important invoice total: $1,234.56", parent=root) # Simple tag para.tag("important") # Tag with confidence and value para.tag("invoice-total", confidence=0.95, value="$1,234.56") # Check for tags if para.has_tag("important"): print("This paragraph is marked as important") # Get tag details tag = para.get_tag("invoice-total") if tag: confidence = tag.get("Confidence") value = tag.get("Value") print(f"Invoice total: {value} (confidence: {confidence})") # List all tags all_tags = para.get_tags() print(f"Tags: {all_tags}")
with Document() as doc: root = doc.create_node("document", "API response") doc.content_node = root # Get as bytes kddb_bytes = doc.to_kddb() # Send in API response # return Response(content=kddb_bytes, media_type="application/octet-stream")
with Document() as doc: # Add labels doc.add_label("invoice") doc.add_label("financial") doc.add_label("q1-2024") # Get all labels labels = doc.labels print(f"Document labels: {labels}")
from kodexa_document import Document, DataObjectAccessor, DataObjectInputwith Document.from_kddb("processed.kddb") as doc: accessor = DataObjectAccessor(doc) # List all data objects all_objects = accessor.get_all() # Get root-level data objects (no parent) roots = accessor.get_roots() # Get children of a specific group children = accessor.get_children(parent_group_uuid="some-uuid") # Create a new data object new_obj = accessor.create(DataObjectInput( taxonomy_ref="taxonomy://my-org/invoice", path="/invoice" )) # Look up by UUID obj = accessor.get_by_uuid("abc-123")
from kodexa_document import Document, AuditAccessorwith Document.from_kddb("processed.kddb") as doc: audit = AuditAccessor(doc) # List all revisions revisions = audit.list_revisions() # Get details for a specific revision details = audit.get_revision_details(revision_id=1) # View history for a specific data object history = audit.get_data_object_history(data_object_id=1)