Skip to content

Commit

Permalink
added types for untyped function paramters. Typed stack variable as…
Browse files Browse the repository at this point in the history
… it was not explicitly typed. avoided header_meta redefinition
  • Loading branch information
AhmedTammaa committed Dec 20, 2024
1 parent cdd62b7 commit b4d4e57
Showing 1 changed file with 7 additions and 7 deletions.
14 changes: 7 additions & 7 deletions libs/text-splitters/langchain_text_splitters/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ def __init__(
self.elements_tree: Dict[int, Tuple[str, str, int, int]] = {}
self.return_each_element = return_each_element

def _header_level(self, element) -> int:
def _header_level(self, element: Any) -> int:
"""Determine the heading level of an element.
Args:
Expand All @@ -168,7 +168,7 @@ def _header_level(self, element) -> int:
return int(tag_name[1])
return 9999

def _dom_depth(self, element) -> int:
def _dom_depth(self, element: Any) -> int:
"""Compute the DOM depth of an element.
Args:
Expand All @@ -182,7 +182,7 @@ def _dom_depth(self, element) -> int:
depth += 1
return depth

def _build_tree(self, elements) -> None:
def _build_tree(self, elements: Any) -> None:
"""Build a tree structure from a list of HTML elements.
Args:
Expand Down Expand Up @@ -375,7 +375,7 @@ def split_text_from_file(self, file: Any) -> List[Document]:
for idx, (tag, text, level, dom_depth) in self.elements_tree.items()
}

stack = []
stack: List[Node] = []
for idx in sorted(nodes):
node = nodes[idx]
while stack and (
Expand Down Expand Up @@ -428,7 +428,7 @@ def process_node(node: Node) -> None:
node_content = node.content # type: ignore[attr-defined]
node_level = node.level # type: ignore[attr-defined]
node_dom_depth = node.dom_depth # type: ignore[attr-defined]

header_meta: Dict[str, str]
if node_type in self.header_tags:
# Remove headers of the same or lower level
headers_to_remove = [
Expand All @@ -444,7 +444,7 @@ def process_node(node: Node) -> None:
active_headers[header_key] = (node_content, node_level, node_dom_depth)

# Create metadata based on active headers
header_meta: Dict[str, str] = {
header_meta = {
key: content
for key, (content, lvl, dd) in active_headers.items()
if node_dom_depth >= dd
Expand All @@ -457,7 +457,7 @@ def process_node(node: Node) -> None:
else:
# For non-header elements, associate with current headers
if node_content.strip():
header_meta: Dict[str, str] = {
header_meta = {
key: content
for key, (content, lvl, dd) in active_headers.items()
if node_dom_depth >= dd
Expand Down

0 comments on commit b4d4e57

Please sign in to comment.