Skip to content

Commit

Permalink
Extend xlsx_reader to include formulas (#22)
Browse files Browse the repository at this point in the history
Add cell_data_format option to return data as Cell structs instead of values

This provides access to cell formulas.
  • Loading branch information
dharness authored Dec 11, 2023
1 parent 8d025e3 commit 508dab9
Show file tree
Hide file tree
Showing 5 changed files with 89 additions and 6 deletions.
1 change: 1 addition & 0 deletions lib/xlsx_reader.ex
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,7 @@ defmodule XlsxReader do
* `skip_row?`: function callback that determines if a row should be skipped.
Takes precedence over `blank_value` and `empty_rows`.
Defaults to `nil` (keeping the behaviour of `blank_value` and `empty_rows`).
* `cell_data_format`: Controls the format of the cell data. Can be `:value` (default, returns the cell value only) or `:cell` (returns instances of `XlsxReader.Cell`).
The `Decimal` type requires the [decimal](https://github.com/ericmj/decimal) library.
Expand Down
24 changes: 24 additions & 0 deletions lib/xlsx_reader/cell.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
defmodule XlsxReader.Cell do
@moduledoc """
Cell structure.
This structure contains the information of a cell in a sheet.
- `value` - The value of the cell
- `formula` - The formula used in the cell, if any
- `ref` - The cell reference, like 'A1', 'B2', etc.
This structure is used when the `cell_data_format` option is set to `:cell`.
"""

defstruct [:value, :formula, :ref]

@typedoc """
XLSX cell data
"""
@type t :: %__MODULE__{
value: term(),
formula: String.t() | nil,
ref: String.t()
}
end
42 changes: 37 additions & 5 deletions lib/xlsx_reader/parsers/worksheet_parser.ex
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ defmodule XlsxReader.Parsers.WorksheetParser do

@behaviour Saxy.Handler

alias XlsxReader.{CellReference, Conversion, Number}
alias XlsxReader.{Cell, CellReference, Conversion, Number}
alias XlsxReader.Parsers.Utils

defmodule State do
Expand All @@ -21,11 +21,13 @@ defmodule XlsxReader.Parsers.WorksheetParser do
cell_type: nil,
cell_style: nil,
value: nil,
formula: nil,
type_conversion: nil,
blank_value: nil,
empty_rows: nil,
number_type: nil,
skip_row?: nil
skip_row?: nil,
cell_data_format: :value
end

@doc """
Expand All @@ -40,6 +42,7 @@ defmodule XlsxReader.Parsers.WorksheetParser do
* `skip_row?`: function callback that determines if a row should be skipped or not.
Overwrites `blank_value` and `empty_rows` on the matter of skipping rows.
Defaults to `nil` (keeping the behaviour of `blank_value` and `empty_rows`).
* `cell_data_format`: Controls the format of the cell data. Can be `:value` (default, returns the cell value only) or `:cell` (returns instances of `XlsxReader.Cell`).
"""
def parse(xml, workbook, options \\ []) do
Expand All @@ -49,7 +52,8 @@ defmodule XlsxReader.Parsers.WorksheetParser do
blank_value: Keyword.get(options, :blank_value, ""),
empty_rows: Keyword.get(options, :empty_rows, true),
number_type: Keyword.get(options, :number_type, Float),
skip_row?: Keyword.get(options, :skip_row?)
skip_row?: Keyword.get(options, :skip_row?),
cell_data_format: Keyword.get(options, :cell_data_format, :value)
})
end

Expand Down Expand Up @@ -99,6 +103,10 @@ defmodule XlsxReader.Parsers.WorksheetParser do
{:ok, expect_value(state)}
end

def handle_event(:start_element, {"f", _attributes}, state) do
{:ok, expect_formula(state)}
end

@impl Saxy.Handler
def handle_event(:start_element, _element, state) do
{:ok, state}
Expand Down Expand Up @@ -133,6 +141,11 @@ defmodule XlsxReader.Parsers.WorksheetParser do
{:ok, store_value(state, chars)}
end

@impl Saxy.Handler
def handle_event(:characters, chars, %{value: :expect_formula} = state) do
{:ok, store_formula(state, chars)}
end

@impl Saxy.Handler
def handle_event(:characters, _chars, state) do
{:ok, state}
Expand All @@ -152,17 +165,26 @@ defmodule XlsxReader.Parsers.WorksheetParser do
%{state | value: :expect_chars}
end

defp expect_formula(state) do
%{state | value: :expect_formula}
end

defp store_value(state, value) do
%{state | value: value}
end

defp store_formula(state, formula) do
%{state | formula: formula}
end

defp add_cell_to_row(state) do
%{
state
| row: [convert_current_cell_value(state) | state.row],
| row: [format_cell_data(state) | state.row],
cell_ref: nil,
cell_type: nil,
value: nil
value: nil,
formula: nil
}
end

Expand Down Expand Up @@ -250,6 +272,16 @@ defmodule XlsxReader.Parsers.WorksheetParser do
Utils.map_attributes(attributes, @cell_attributes_mapping)
end

defp format_cell_data(state) do
value = convert_current_cell_value(state)

case state.cell_data_format do
:cell -> %Cell{value: value, formula: state.formula, ref: state.cell_ref}
:value -> value
_ -> value
end
end

defp convert_current_cell_value(%State{type_conversion: false} = state) do
case {state.cell_type, state.value} do
{_, nil} ->
Expand Down
Binary file added test/fixtures/has_formulas.xlsx
Binary file not shown.
28 changes: 27 additions & 1 deletion test/xlsx_reader/parsers/worksheet_parser_test.exs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
defmodule XlsxReader.Parsers.WorksheetParserTest do
use ExUnit.Case

alias XlsxReader.{Conversion, Workbook}
alias XlsxReader.{Cell, Conversion, Workbook}
alias XlsxReader.Parsers.{SharedStringsParser, StylesParser, WorksheetParser}

setup do
Expand Down Expand Up @@ -110,4 +110,30 @@ defmodule XlsxReader.Parsers.WorksheetParserTest do

assert [] == rows
end

test "should return cell structs instead of values when cell_data_format is :cell" do
{:ok, package} = XlsxReader.open(TestFixtures.path("has_formulas.xlsx"))
{:ok, sheets} = XlsxReader.sheets(package, cell_data_format: :cell)

expected = [
{"sheet_1",
[
[
%Cell{value: "abc", formula: nil, ref: "A1"},
%Cell{value: 123.0, formula: nil, ref: "B1"}
]
]},
{"sheet_2",
[
[
%Cell{value: "def", formula: nil, ref: "A1"},
%Cell{value: 456.0, formula: nil, ref: "B1"},
"",
%Cell{value: 466.0, formula: "SUM(B1, 10)", ref: "D1"}
]
]}
]

assert expected == sheets
end
end

0 comments on commit 508dab9

Please sign in to comment.