From 0fce871a73ae5a61cedb453c1fff914e4488c229 Mon Sep 17 00:00:00 2001 From: lfenzo Date: Thu, 31 Oct 2024 19:43:24 -0300 Subject: [PATCH] docs: fixed missing docs for utility functions and added locale statistics in each provider --- docs/Project.toml | 3 ++ docs/src/index.md | 23 ++++++++------- docs/src/providers/finance.md | 10 +++++++ docs/src/providers/identity.md | 10 +++++++ docs/src/providers/localization.md | 10 +++++++ docs/src/utilities/utility_functions.md | 6 ++-- snippets/collect-statistics.jl | 39 +++++++++++++++++++++++++ snippets/correct-data.jl | 2 -- 8 files changed, 87 insertions(+), 16 deletions(-) create mode 100644 snippets/collect-statistics.jl diff --git a/docs/Project.toml b/docs/Project.toml index 45ff452..476e096 100755 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -1,6 +1,9 @@ [deps] +CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" +DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" DocumenterMermaid = "a078cd44-4d9c-4618-b545-3ab9d77f9177" Impostor = "c85fbf42-ee25-425e-8745-f7176d4f19ec" LiveServer = "16fef848-5104-11e9-1b77-fb7a48bbb589" +PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d" diff --git a/docs/src/index.md b/docs/src/index.md index 0549cc6..27a0c3f 100755 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -1,3 +1,10 @@ +```@setup main +using Impostor +using DataFrames +using CSV +``` + + ## What is Impostor.jl? Impostor is a synthetic tabular-data generator based on random samplings over pre-defined @@ -28,8 +35,7 @@ using Pkg; Pkg.add("Impostor") To get started with Impostor, select your generator function of choice, the simplest example is to generate single and multiple values specifying the number of expected values in the output. -```@repl -using Impostor # hide +```@repl main firstname(5) firstname() # equivalent to firstname(1) ``` @@ -48,16 +54,14 @@ Generator functions may be found in each of the Providers individual pages or vi All generator functions accept a `locale` keyword argument, in case no value is provided in the `locale` kwarg the **Session Locale** is used (see section *Concepts* below). -```@repl -using Impostor # hide +```@repl main firstname(2; locale = ["pt_BR"]) firstname(2; locale = ["en_US", "pt_BR"]) ``` In order to change the default `locale` used by the session use the [`setlocale!`](@ref) function: -```@repl -using Impostor # hide +```@repl main setlocale!("pt_BR"); firstname(2) resetlocale!(); # hide @@ -69,9 +73,7 @@ Besides providing several *generator functions* which may be used as standalone generators, Impostor also exports the [`ImpostorTemplate`](@ref) which is a utility struct to encapsulate formats and generate a fully fledgned table. -```@repl -using Impostor # hide -using DataFrames # hide +```@repl main template = ImpostorTemplate([:firstname, :surname, :country_code, :state, :city]); template(3) @@ -104,8 +106,7 @@ In order to facilitate naming and referencing later on the major concepts implem | *Option-based* | `func(v::Vector, n::Int)` | Generates an output with `n` entries produced by `func` but **restricting the generated entries to specified options in `v`**, which specific contents will depend on `func`. Generator functions taking on options in different levels accept the `level` kwarg, when that is the case, docstrings will explain each specific behavior. | | *Mask-based* | `func(v::Vector)` | Generates an output with `length(v)` entries produced by `func`. **The contents of `v` specify element-wise options to restrict the output of `func`.** Equivalent *in terms of output* with calling `[func(opt, 1) for opt in v]` (*i.e.* the option-based generation), but sub-optimal in terms of performance. Generator functions taking on masks in different levels accept the `level` kwarg, when it is the case, docstrings will explain each specific behavior.| - ```@repl - using Impostor # hide + ```@repl main firstname(3) # value-based generation firstname(["F"], 3) # option-based generation firstname(["F", "M", "F"]) # mask-based generation diff --git a/docs/src/providers/finance.md b/docs/src/providers/finance.md index ada3c90..427695c 100755 --- a/docs/src/providers/finance.md +++ b/docs/src/providers/finance.md @@ -2,6 +2,16 @@ The following generator functions are available in the *Finance* provider: +```@example +using Impostor # hide +using CSV # hide +using DataFrames # hide +include(joinpath(pkgdir(Impostor), "snippets", "collect-statistics.jl")) # hide +collect_provider_availability_statistics("finance") # hide +``` + +----------- + ```@docs bank_name bank_official_name diff --git a/docs/src/providers/identity.md b/docs/src/providers/identity.md index 881a2f1..25d33a0 100755 --- a/docs/src/providers/identity.md +++ b/docs/src/providers/identity.md @@ -2,6 +2,16 @@ The following generator functions are available in the *Identity* provider: +```@example +using Impostor # hide +using CSV # hide +using DataFrames # hide +include(joinpath(pkgdir(Impostor), "snippets", "collect-statistics.jl")) # hide +collect_provider_availability_statistics("identity") # hide +``` + +----------- + ```@docs prefix birthdate diff --git a/docs/src/providers/localization.md b/docs/src/providers/localization.md index 1944db5..ca5f8d1 100755 --- a/docs/src/providers/localization.md +++ b/docs/src/providers/localization.md @@ -2,6 +2,16 @@ The following generator functions are available in the *Localization* provider: +```@example +using Impostor # hide +using CSV # hide +using DataFrames # hide +include(joinpath(pkgdir(Impostor), "snippets", "collect-statistics.jl")) # hide +collect_provider_availability_statistics("localization") # hide +``` + +----------- + ```@docs address address_complement diff --git a/docs/src/utilities/utility_functions.md b/docs/src/utilities/utility_functions.md index c7ab09b..6d2d714 100644 --- a/docs/src/utilities/utility_functions.md +++ b/docs/src/utilities/utility_functions.md @@ -7,7 +7,7 @@ Impostor.coerse_string_type setlocale! session_locale resetlocale! -provider_exists -content_exists -locale_exists +is_provider_available +is_content_available +is_locale_available ``` diff --git a/snippets/collect-statistics.jl b/snippets/collect-statistics.jl new file mode 100644 index 0000000..64eb978 --- /dev/null +++ b/snippets/collect-statistics.jl @@ -0,0 +1,39 @@ +using CSV +using DataFrames +using Impostor +using PrettyTables +using DataStructures + + +""" + +""" +function collect_provider_availability_statistics(provider::String) + provider_dir = joinpath(pkgdir(Impostor), "src", "data", provider) + provider_stats = [] + + for content_dir in readdir(provider_dir, join=true) + + content_locales = String[] + for content_file in readdir(content_dir) + if endswith(content_file, ".csv") + push!(content_locales, split(content_file, ".") |> first) + end + end + + content_dir = split(content_dir, "/") |> last |> String + df = Impostor._load!(provider, content_dir, content_locales) + + n_entries = unique(df, [ncol(df)]) |> nrow + + data = OrderedDict( + "Generator Function" => content_dir, + "Available Locales" => length(content_locales), + "Unique Entries" => n_entries, + ) + + push!(provider_stats, data) + end + + return pretty_table(DataFrame(provider_stats)) +end diff --git a/snippets/correct-data.jl b/snippets/correct-data.jl index 473d4e2..c7dab22 100644 --- a/snippets/correct-data.jl +++ b/snippets/correct-data.jl @@ -12,5 +12,3 @@ function to_semicolon(provider::String, content::String) @info file end end - -to_semicolon("localization", "country")