Skip to content

Commit

Permalink
Merge branch 'master' of github.com:xavier/xlsx_reader
Browse files Browse the repository at this point in the history
  • Loading branch information
xavier committed Oct 13, 2023
2 parents 1c6a6e2 + 00888d2 commit eb7cc62
Show file tree
Hide file tree
Showing 7 changed files with 68 additions and 14 deletions.
2 changes: 1 addition & 1 deletion lib/xlsx_reader.ex
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ defmodule XlsxReader do
```
"""
@spec sheet(XlsxReader.Package.t(), sheet_name(), Keyword.t()) :: {:ok, rows()}
@spec sheet(XlsxReader.Package.t(), sheet_name(), Keyword.t()) :: {:ok, rows()} | error()
def sheet(package, sheet_name, options \\ []) do
PackageLoader.load_sheet_by_name(package, sheet_name, options)
end
Expand Down
17 changes: 12 additions & 5 deletions lib/xlsx_reader/package_loader.ex
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ defmodule XlsxReader.PackageLoader do
RelationshipsParser,
SharedStringsParser,
StylesParser,
Utils,
WorkbookParser,
WorksheetParser
}
Expand Down Expand Up @@ -105,28 +106,28 @@ defmodule XlsxReader.PackageLoader do
end

defp load_workbook_xml(zip_handle) do
with {:ok, xml} <- ZipArchive.extract(zip_handle, @workbook_xml) do
with {:ok, xml} <- extract_xml(zip_handle, @workbook_xml) do
WorkbookParser.parse(xml)
end
end

defp load_workbook_xml_rels(zip_handle) do
with {:ok, xml} <- ZipArchive.extract(zip_handle, @workbook_xml_rels) do
with {:ok, xml} <- extract_xml(zip_handle, @workbook_xml_rels) do
RelationshipsParser.parse(xml)
end
end

defp load_shared_strings(package) do
with {:ok, file} <- single_rel_target(package.workbook.rels.shared_strings),
{:ok, xml} <- ZipArchive.extract(package.zip_handle, file),
{:ok, xml} <- extract_xml(package.zip_handle, file),
{:ok, shared_strings} <- SharedStringsParser.parse(xml) do
%{package | workbook: %{package.workbook | shared_strings: shared_strings}}
end
end

defp load_styles(package, supported_custom_formats) do
with {:ok, file} <- single_rel_target(package.workbook.rels.styles),
{:ok, xml} <- ZipArchive.extract(package.zip_handle, file),
{:ok, xml} <- extract_xml(package.zip_handle, file),
{:ok, style_types, custom_formats} <- StylesParser.parse(xml, supported_custom_formats) do
%{
package
Expand All @@ -146,11 +147,17 @@ defmodule XlsxReader.PackageLoader do
end

defp load_worksheet_xml(package, file, options) do
with {:ok, xml} <- ZipArchive.extract(package.zip_handle, file) do
with {:ok, xml} <- extract_xml(package.zip_handle, file) do
WorksheetParser.parse(xml, package.workbook, options)
end
end

defp extract_xml(zip_handle, file) do
with {:ok, xml} <- ZipArchive.extract(zip_handle, file) do
Utils.ensure_utf8(xml)
end
end

defp xl_path(relative_path), do: Path.join("xl", relative_path)

defp find_sheet_by_name(package, name) do
Expand Down
43 changes: 43 additions & 0 deletions lib/xlsx_reader/parsers/utils.ex
Original file line number Diff line number Diff line change
Expand Up @@ -58,4 +58,47 @@ defmodule XlsxReader.Parsers.Utils do
end
end)
end

@doc """
Returns an UTF-8 binary which is the only character encoding supported by the XML parser.
Converts to UTF-8 from UTF-16BE/LE if a BOM is detected.
## Examples
iex> XlsxReader.Parsers.Utils.ensure_utf8("UTF-8")
{:ok, "UTF-8"}
iex> XlsxReader.Parsers.Utils.ensure_utf8(<<0xff, 0xfe, 0x55, 0x00, 0x54, 0x00, 0x46, 0x00, 0x2d, 0x00, 0x31, 0x00, 0x36, 0x00, 0x4c, 0x00, 0x45, 0x00>>)
{:ok, "UTF-16LE"}
iex> XlsxReader.Parsers.Utils.ensure_utf8(<<0xfe, 0xff, 0x00, 0x55, 0x00, 0x54, 0x00, 0x46, 0x00, 0x2d, 0x00, 0x31, 0x00, 0x36, 0x00, 0x42, 0x00, 0x45>>)
{:ok, "UTF-16BE"}
iex> XlsxReader.Parsers.Utils.ensure_utf8(<<0xff, 0xfe, 0x00>>)
{:error, "incomplete UTF-16 binary"}
"""
@spec ensure_utf8(binary()) :: {:ok, String.t()} | {:error, String.t()}
def ensure_utf8(<<0xFF, 0xFE, rest::binary>>),
do: convert_utf16_to_utf8(rest, :little)

def ensure_utf8(<<0xFE, 0xFF, rest::binary>>),
do: convert_utf16_to_utf8(rest, :big)

def ensure_utf8(utf8), do: {:ok, utf8}

defp convert_utf16_to_utf8(utf16, endianess) do
case :unicode.characters_to_binary(utf16, {:utf16, endianess}) do
utf8 when is_binary(utf8) ->
{:ok, utf8}

{:error, _, _} ->
{:error, "error converting UTF-16 binary to UTF-8"}

{:incomplete, _, _} ->
{:error, "incomplete UTF-16 binary"}
end
end
end
8 changes: 2 additions & 6 deletions lib/xlsx_reader/zip_archive.ex
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ defmodule XlsxReader.ZipArchive do
def extract(zip_handle, file) do
with {:ok, zip} <- source(zip_handle),
{:ok, [{_, contents}]} <- :zip.extract(zip, extract_options(file)) do
{:ok, remove_bom(contents)}
{:ok, contents}
else
{:ok, []} ->
{:error, "file #{inspect(file)} not found in archive"}
Expand All @@ -57,10 +57,6 @@ defmodule XlsxReader.ZipArchive do

##

def remove_bom(str) when is_binary(str) do
str |> String.trim_leading("\uFEFF")
end

defp source({:path, path}) do
{:ok, String.to_charlist(path)}
end
Expand Down Expand Up @@ -92,7 +88,7 @@ defmodule XlsxReader.ZipArchive do
end

defp translate_zip_error({:error, code})
when code in [:einval, :bad_eocd, :bad_central_directory] do
when code in [:einval, :bad_eocd, :bad_central_directory, :eisdir] do
{:error, "invalid zip file"}
end
end
2 changes: 1 addition & 1 deletion mix.exs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ defmodule XlsxReader.MixProject do
# Run "mix help deps" to learn about dependencies.
defp deps do
[
{:saxy, "~> 1.4.0"},
{:saxy, "~> 1.5.0"},
{:credo, "~> 1.4.0", only: [:dev, :test], runtime: false},
{:decimal, "~> 1.0 or ~> 2.0", optional: true},
{:dialyxir, "~> 1.0.0", only: :dev, runtime: false},
Expand Down
2 changes: 1 addition & 1 deletion mix.lock
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,5 @@
"makeup_elixir": {:hex, :makeup_elixir, "0.16.0", "f8c570a0d33f8039513fbccaf7108c5d750f47d8defd44088371191b76492b0b", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}, {:nimble_parsec, "~> 1.2.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "28b2cbdc13960a46ae9a8858c4bebdec3c9a6d7b4b9e7f4ed1502f8159f338e7"},
"makeup_erlang": {:hex, :makeup_erlang, "0.1.1", "3fcb7f09eb9d98dc4d208f49cc955a34218fc41ff6b84df7c75b3e6e533cc65f", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm", "174d0809e98a4ef0b3309256cbf97101c6ec01c4ab0b23e926a9e17df2077cbb"},
"nimble_parsec": {:hex, :nimble_parsec, "1.2.3", "244836e6e3f1200c7f30cb56733fd808744eca61fd182f731eac4af635cc6d0b", [:mix], [], "hexpm", "c8d789e39b9131acf7b99291e93dae60ab48ef14a7ee9d58c6964f59efb570b0"},
"saxy": {:hex, :saxy, "1.4.0", "c7203ad20001f72eaaad07d08f82be063fa94a40924e6bb39d93d55f979abcba", [:mix], [], "hexpm", "3fe790354d3f2234ad0b5be2d99822a23fa2d4e8ccd6657c672901dac172e9a9"},
"saxy": {:hex, :saxy, "1.5.0", "0141127f2d042856f135fb2d94e0beecda7a2306f47546dbc6411fc5b07e28bf", [:mix], [], "hexpm", "ea7bb6328fbd1f2aceffa3ec6090bfb18c85aadf0f8e5030905e84235861cf89"},
}
8 changes: 8 additions & 0 deletions test/xlsx_reader_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,14 @@ defmodule XlsxReaderTest do
assert {:error, "invalid zip file"} = XlsxReader.open(xlsx)
end

test "rejects relative and absolute path to directory" do
relative_path = "test"
absolute_path = Path.absname(relative_path)

assert {:error, "invalid zip file"} = XlsxReader.open(relative_path)
assert {:error, "invalid zip file"} = XlsxReader.open(absolute_path)
end

test "supported custom formats" do
xlsx = TestFixtures.path("test.xlsx")

Expand Down

0 comments on commit eb7cc62

Please sign in to comment.