Skip to content

Commit

Permalink
deploy: 9fa14cb
Browse files Browse the repository at this point in the history
  • Loading branch information
HYLcool committed Oct 16, 2023
1 parent 048007d commit cd3be0d
Show file tree
Hide file tree
Showing 89 changed files with 1,584 additions and 145 deletions.
Binary file modified .doctrees/data_juicer.analysis.doctree
Binary file not shown.
Binary file modified .doctrees/data_juicer.core.doctree
Binary file not shown.
Binary file modified .doctrees/data_juicer.format.doctree
Binary file not shown.
Binary file modified .doctrees/data_juicer.ops.common.doctree
Binary file not shown.
Binary file modified .doctrees/data_juicer.ops.deduplicator.doctree
Binary file not shown.
Binary file modified .doctrees/data_juicer.ops.doctree
Binary file not shown.
Binary file modified .doctrees/data_juicer.ops.filter.doctree
Binary file not shown.
Binary file modified .doctrees/data_juicer.ops.mapper.doctree
Binary file not shown.
Binary file modified .doctrees/data_juicer.ops.selector.doctree
Binary file not shown.
Binary file modified .doctrees/data_juicer.utils.doctree
Binary file not shown.
Binary file modified .doctrees/environment.pickle
Binary file not shown.
4 changes: 2 additions & 2 deletions _modules/data_juicer/analysis/column_wise_analysis.html
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ <h1>Source code for data_juicer.analysis.column_wise_analysis</h1><div class="hi
<div class="viewcode-block" id="ColumnWiseAnalysis"><a class="viewcode-back" href="../../../data_juicer.analysis.html#data_juicer.analysis.column_wise_analysis.ColumnWiseAnalysis">[docs]</a><span class="k">class</span> <span class="nc">ColumnWiseAnalysis</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Apply analysis on each column of stats respectively.&quot;&quot;&quot;</span>

<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span>
<div class="viewcode-block" id="ColumnWiseAnalysis.__init__"><a class="viewcode-back" href="../../../data_juicer.analysis.html#data_juicer.analysis.column_wise_analysis.ColumnWiseAnalysis.__init__">[docs]</a> <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span>
<span class="n">dataset</span><span class="p">,</span>
<span class="n">output_path</span><span class="p">,</span>
<span class="n">overall_result</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span>
Expand All @@ -148,7 +148,7 @@ <h1>Source code for data_juicer.analysis.column_wise_analysis</h1><div class="hi
<span class="n">overall_result</span> <span class="o">=</span> <span class="n">oa</span><span class="o">.</span><span class="n">analyse</span><span class="p">()</span>
<span class="bp">self</span><span class="o">.</span><span class="n">overall_result</span> <span class="o">=</span> <span class="n">overall_result</span>

<span class="bp">self</span><span class="o">.</span><span class="n">save_stats_in_one_file</span> <span class="o">=</span> <span class="n">save_stats_in_one_file</span>
<span class="bp">self</span><span class="o">.</span><span class="n">save_stats_in_one_file</span> <span class="o">=</span> <span class="n">save_stats_in_one_file</span></div>

<div class="viewcode-block" id="ColumnWiseAnalysis.analyse"><a class="viewcode-back" href="../../../data_juicer.analysis.html#data_juicer.analysis.column_wise_analysis.ColumnWiseAnalysis.analyse">[docs]</a> <span class="k">def</span> <span class="nf">analyse</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">show_percentiles</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">show</span><span class="o">=</span><span class="kc">False</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
Expand Down
4 changes: 2 additions & 2 deletions _modules/data_juicer/analysis/diversity_analysis.html
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ <h1>Source code for data_juicer.analysis.diversity_analysis</h1><div class="high
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Apply diversity analysis for each sample and get an overall analysis</span>
<span class="sd"> result.&quot;&quot;&quot;</span>

<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">dataset</span><span class="p">,</span> <span class="n">output_path</span><span class="p">,</span> <span class="n">lang_or_model</span><span class="o">=</span><span class="s1">&#39;en&#39;</span><span class="p">):</span>
<div class="viewcode-block" id="DiversityAnalysis.__init__"><a class="viewcode-back" href="../../../data_juicer.analysis.html#data_juicer.analysis.diversity_analysis.DiversityAnalysis.__init__">[docs]</a> <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">dataset</span><span class="p">,</span> <span class="n">output_path</span><span class="p">,</span> <span class="n">lang_or_model</span><span class="o">=</span><span class="s1">&#39;en&#39;</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Initialization method :param dataset: the dataset to be analysed</span>
<span class="sd"> :param output_path: path to store the analysis results :param</span>
<span class="sd"> lang_or_model: the diversity model or a specific language used to load</span>
Expand All @@ -167,7 +167,7 @@ <h1>Source code for data_juicer.analysis.diversity_analysis</h1><div class="high
<span class="bp">self</span><span class="o">.</span><span class="n">output_path</span> <span class="o">=</span> <span class="n">output_path</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">exists</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">output_path</span><span class="p">):</span>
<span class="n">os</span><span class="o">.</span><span class="n">makedirs</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">output_path</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">lang_or_model</span> <span class="o">=</span> <span class="n">lang_or_model</span>
<span class="bp">self</span><span class="o">.</span><span class="n">lang_or_model</span> <span class="o">=</span> <span class="n">lang_or_model</span></div>

<div class="viewcode-block" id="DiversityAnalysis.compute"><a class="viewcode-back" href="../../../data_juicer.analysis.html#data_juicer.analysis.diversity_analysis.DiversityAnalysis.compute">[docs]</a> <span class="k">def</span> <span class="nf">compute</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">lang_or_model</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">column_name</span><span class="o">=</span><span class="s1">&#39;text&#39;</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
Expand Down
4 changes: 2 additions & 2 deletions _modules/data_juicer/analysis/overall_analysis.html
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ <h1>Source code for data_juicer.analysis.overall_analysis</h1><div class="highli
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Apply analysis on the overall stats, including mean, std, quantiles,</span>
<span class="sd"> etc.&quot;&quot;&quot;</span>

<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">dataset</span><span class="p">,</span> <span class="n">output_path</span><span class="p">):</span>
<div class="viewcode-block" id="OverallAnalysis.__init__"><a class="viewcode-back" href="../../../data_juicer.analysis.html#data_juicer.analysis.overall_analysis.OverallAnalysis.__init__">[docs]</a> <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">dataset</span><span class="p">,</span> <span class="n">output_path</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Initialization method.</span>

Expand All @@ -91,7 +91,7 @@ <h1>Source code for data_juicer.analysis.overall_analysis</h1><div class="highli
<span class="n">os</span><span class="o">.</span><span class="n">makedirs</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">output_path</span><span class="p">)</span>

<span class="c1"># default percentiles to analyse</span>
<span class="bp">self</span><span class="o">.</span><span class="n">default_percentiles</span> <span class="o">=</span> <span class="p">[</span><span class="mf">0.25</span><span class="p">,</span> <span class="mf">0.5</span><span class="p">,</span> <span class="mf">0.75</span><span class="p">]</span>
<span class="bp">self</span><span class="o">.</span><span class="n">default_percentiles</span> <span class="o">=</span> <span class="p">[</span><span class="mf">0.25</span><span class="p">,</span> <span class="mf">0.5</span><span class="p">,</span> <span class="mf">0.75</span><span class="p">]</span></div>

<div class="viewcode-block" id="OverallAnalysis.analyse"><a class="viewcode-back" href="../../../data_juicer.analysis.html#data_juicer.analysis.overall_analysis.OverallAnalysis.analyse">[docs]</a> <span class="k">def</span> <span class="nf">analyse</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">percentiles</span><span class="o">=</span><span class="p">[]):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
Expand Down
4 changes: 2 additions & 2 deletions _modules/data_juicer/core/analyser.html
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ <h1>Source code for data_juicer.core.analyser</h1><div class="highlight"><pre>
<span class="sd"> dataset better.</span>
<span class="sd"> &quot;&quot;&quot;</span>

<span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">cfg</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<div class="viewcode-block" id="Analyser.__init__"><a class="viewcode-back" href="../../../data_juicer.core.html#data_juicer.core.analyser.Analyser.__init__">[docs]</a> <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">cfg</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Initialization method.</span>

Expand Down Expand Up @@ -131,7 +131,7 @@ <h1>Source code for data_juicer.core.analyser</h1><div class="highlight"><pre>
<span class="c1"># parsed_res</span>
<span class="bp">self</span><span class="o">.</span><span class="n">overall_result</span> <span class="o">=</span> <span class="kc">None</span>
<span class="bp">self</span><span class="o">.</span><span class="n">overall_single_plot_path</span> <span class="o">=</span> <span class="kc">None</span>
<span class="bp">self</span><span class="o">.</span><span class="n">analysis_path</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">cfg</span><span class="o">.</span><span class="n">work_dir</span><span class="p">,</span> <span class="s1">&#39;analysis&#39;</span><span class="p">)</span>
<span class="bp">self</span><span class="o">.</span><span class="n">analysis_path</span> <span class="o">=</span> <span class="n">os</span><span class="o">.</span><span class="n">path</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">cfg</span><span class="o">.</span><span class="n">work_dir</span><span class="p">,</span> <span class="s1">&#39;analysis&#39;</span><span class="p">)</span></div>

<div class="viewcode-block" id="Analyser.run"><a class="viewcode-back" href="../../../data_juicer.core.html#data_juicer.core.analyser.Analyser.run">[docs]</a> <span class="k">def</span> <span class="nf">run</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">load_data_np</span><span class="o">=</span><span class="kc">None</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
Expand Down
Loading

0 comments on commit cd3be0d

Please sign in to comment.