Skip to content

Commit

Permalink
Merge pull request #435 from davidleon/fix/unicode_escape
Browse files Browse the repository at this point in the history
fix unicode escape. for the case of "/utils" in the response.
  • Loading branch information
LarFii authored Dec 9, 2024
2 parents 725284e + 3210c8f commit 6282abc
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 2 deletions.
5 changes: 3 additions & 2 deletions lightrag/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from .utils import (
wrap_embedding_func_with_attrs,
locate_json_string_body_from_string,
safe_unicode_decode,
)

import sys
Expand Down Expand Up @@ -85,14 +86,14 @@ async def inner():
if content is None:
continue
if r"\u" in content:
content = content.encode("utf-8").decode("unicode_escape")
content = safe_unicode_decode(content.encode("utf-8"))
yield content

return inner()
else:
content = response.choices[0].message.content
if r"\u" in content:
content = content.encode("utf-8").decode("unicode_escape")
content = safe_unicode_decode(content.encode("utf-8"))
return content


Expand Down
17 changes: 17 additions & 0 deletions lightrag/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -507,3 +507,20 @@ async def save_to_cache(hashing_kv, cache_data: CacheData):
}

await hashing_kv.upsert({cache_data.mode: mode_cache})


def safe_unicode_decode(content):
# Regular expression to find all Unicode escape sequences of the form \uXXXX
unicode_escape_pattern = re.compile(r"\\u([0-9a-fA-F]{4})")

# Function to replace the Unicode escape with the actual character
def replace_unicode_escape(match):
# Convert the matched hexadecimal value into the actual Unicode character
return chr(int(match.group(1), 16))

# Perform the substitution
decoded_content = unicode_escape_pattern.sub(
replace_unicode_escape, content.decode("utf-8")
)

return decoded_content

0 comments on commit 6282abc

Please sign in to comment.