diff --git a/2023-assignment_1.html b/2023-assignment_1.html
new file mode 100644
index 0000000..97aba91
--- /dev/null
+++ b/2023-assignment_1.html
@@ -0,0 +1,554 @@
+
+
+<!DOCTYPE html>
+
+
+<html lang="en" >
+
+  <head>
+    <meta charset="utf-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
+
+    <title>Assignment 1 - Matrix-matrix multiplication &#8212; Techniques of High-Performance Computing - Lecture Notes</title>
+  
+  
+  
+  <script data-cfasync="false">
+    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
+    document.documentElement.dataset.theme = localStorage.getItem("theme") || "light";
+  </script>
+  
+  <!-- Loaded before other Sphinx assets -->
+  <link href="_static/styles/theme.css?digest=ac02cc09edc035673794" rel="stylesheet" />
+<link href="_static/styles/bootstrap.css?digest=ac02cc09edc035673794" rel="stylesheet" />
+<link href="_static/styles/pydata-sphinx-theme.css?digest=ac02cc09edc035673794" rel="stylesheet" />
+
+  
+  <link href="_static/vendor/fontawesome/6.1.2/css/all.min.css?digest=ac02cc09edc035673794" rel="stylesheet" />
+  <link rel="preload" as="font" type="font/woff2" crossorigin href="_static/vendor/fontawesome/6.1.2/webfonts/fa-solid-900.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="_static/vendor/fontawesome/6.1.2/webfonts/fa-brands-400.woff2" />
+<link rel="preload" as="font" type="font/woff2" crossorigin href="_static/vendor/fontawesome/6.1.2/webfonts/fa-regular-400.woff2" />
+
+    <link rel="stylesheet" type="text/css" href="_static/pygments.css" />
+    <link rel="stylesheet" href="_static/styles/sphinx-book-theme.css?digest=14f4ca6b54d191a8c7657f6c759bf11a5fb86285" type="text/css" />
+    <link rel="stylesheet" type="text/css" href="_static/togglebutton.css" />
+    <link rel="stylesheet" type="text/css" href="_static/copybutton.css" />
+    <link rel="stylesheet" type="text/css" href="_static/mystnb.4510f1fc1dee50b3e5859aac5469c37c29e427902b24a333a5f9fcb2f0b3ac41.css" />
+    <link rel="stylesheet" type="text/css" href="_static/sphinx-thebe.css" />
+    <link rel="stylesheet" type="text/css" href="_static/design-style.4045f2051d55cab465a707391d5b2007.min.css" />
+  
+  <!-- Pre-loaded scripts that we'll load fully later -->
+  <link rel="preload" as="script" href="_static/scripts/bootstrap.js?digest=ac02cc09edc035673794" />
+<link rel="preload" as="script" href="_static/scripts/pydata-sphinx-theme.js?digest=ac02cc09edc035673794" />
+  <script src="_static/vendor/fontawesome/6.1.2/js/all.min.js?digest=ac02cc09edc035673794"></script>
+
+    <script data-url_root="./" id="documentation_options" src="_static/documentation_options.js"></script>
+    <script src="_static/jquery.js"></script>
+    <script src="_static/underscore.js"></script>
+    <script src="_static/_sphinx_javascript_frameworks_compat.js"></script>
+    <script src="_static/doctools.js"></script>
+    <script src="_static/clipboard.min.js"></script>
+    <script src="_static/copybutton.js"></script>
+    <script src="_static/scripts/sphinx-book-theme.js?digest=5a5c038af52cf7bc1a1ec88eea08e6366ee68824"></script>
+    <script>let toggleHintShow = 'Click to show';</script>
+    <script>let toggleHintHide = 'Click to hide';</script>
+    <script>let toggleOpenOnPrint = 'true';</script>
+    <script src="_static/togglebutton.js"></script>
+    <script>var togglebuttonSelector = '.toggle, .admonition.dropdown';</script>
+    <script src="_static/design-tabs.js"></script>
+    <script>const THEBE_JS_URL = "https://unpkg.com/thebe@0.8.2/lib/index.js"
+const thebe_selector = ".thebe,.cell"
+const thebe_selector_input = "pre"
+const thebe_selector_output = ".output, .cell_output"
+</script>
+    <script async="async" src="_static/sphinx-thebe.js"></script>
+    <script>window.MathJax = {"options": {"processHtmlClass": "tex2jax_process|mathjax_process|math|output_area"}}</script>
+    <script defer="defer" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
+    <script>DOCUMENTATION_OPTIONS.pagename = '2023-assignment_1';</script>
+    <link rel="shortcut icon" href="_static/favicon.ico"/>
+    <link rel="index" title="Index" href="genindex.html" />
+    <link rel="search" title="Search" href="search.html" />
+  <meta name="viewport" content="width=device-width, initial-scale=1"/>
+  <meta name="docsearch:language" content="en"/>
+  </head>
+  
+  
+  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
+
+  
+  
+  <a class="skip-link" href="#main-content">Skip to main content</a>
+  
+  <div id="pst-scroll-pixel-helper"></div>
+
+  
+  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
+    <i class="fa-solid fa-arrow-up"></i>
+    Back to top
+  </button>
+
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          name="__primary"
+          id="__primary"/>
+  <label class="overlay overlay-primary" for="__primary"></label>
+  
+  <input type="checkbox"
+          class="sidebar-toggle"
+          name="__secondary"
+          id="__secondary"/>
+  <label class="overlay overlay-secondary" for="__secondary"></label>
+  
+  <div class="search-button__wrapper">
+    <div class="search-button__overlay"></div>
+    <div class="search-button__search-container">
+<form class="bd-search d-flex align-items-center"
+      action="search.html"
+      method="get">
+  <i class="fa-solid fa-magnifying-glass"></i>
+  <input type="search"
+         class="form-control"
+         name="q"
+         id="search-input"
+         placeholder="Search this book..."
+         aria-label="Search this book..."
+         autocomplete="off"
+         autocorrect="off"
+         autocapitalize="off"
+         spellcheck="false"/>
+  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
+</form></div>
+  </div>
+  
+    <nav class="bd-header navbar navbar-expand-lg bd-navbar">
+    </nav>
+  
+  <div class="bd-container">
+    <div class="bd-container__inner bd-page-width">
+      
+      <div class="bd-sidebar-primary bd-sidebar">
+        
+
+  
+  <div class="sidebar-header-items sidebar-primary__section">
+    
+    
+    
+    
+  </div>
+  
+    <div class="sidebar-primary-items__start sidebar-primary__section">
+        <div class="sidebar-primary-item">
+  
+
+<a class="navbar-brand logo" href="intro.html">
+  
+  
+  
+  
+    
+    
+      
+    
+    
+    <img src="_static/cpu_logo.png" class="logo__image only-light" alt="Logo image"/>
+    <script>document.write(`<img src="_static/cpu_logo.png" class="logo__image only-dark" alt="Logo image"/>`);</script>
+  
+  
+</a></div>
+        <div class="sidebar-primary-item"><nav class="bd-links" id="bd-docs-nav" aria-label="Main">
+    <div class="bd-toc-item navbar-nav active">
+        
+        <ul class="nav bd-sidenav bd-sidenav__home-link">
+            <li class="toctree-l1">
+                <a class="reference internal" href="intro.html">
+                    Welcome to Techniques of High-Performance Computing
+                </a>
+            </li>
+        </ul>
+        <p aria-level="2" class="caption" role="heading"><span class="caption-text">High-Performance Computing with Python</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="what_is_hpc.html">What is High-Performance Computing?</a></li>
+<li class="toctree-l1"><a class="reference internal" href="hpc_languages.html">Languages for High-Performance Computing</a></li>
+<li class="toctree-l1"><a class="reference internal" href="python_hpc_tools.html">Python HPC Tools</a></li>
+<li class="toctree-l1"><a class="reference internal" href="numpy_and_data_layouts.html">Memory layout and Numpy arrays</a></li>
+<li class="toctree-l1"><a class="reference internal" href="parallel_principles.html">Parallel Computing Principles in Python</a></li>
+<li class="toctree-l1"><a class="reference internal" href="working_with_numba.html">Working with Numba</a></li>
+<li class="toctree-l1"><a class="reference internal" href="simd.html">SIMD Autovectorization in Numba</a></li>
+<li class="toctree-l1"><a class="reference internal" href="numexpr.html">A Numexpr example</a></li>
+<li class="toctree-l1"><a class="reference internal" href="gpu_introduction.html">An Introduction to GPU Computing</a></li>
+<li class="toctree-l1"><a class="reference internal" href="cuda_introduction.html">A tour of CUDA</a></li>
+<li class="toctree-l1"><a class="reference internal" href="numba_cuda.html">Numba Cuda in Practice</a></li>
+<li class="toctree-l1"><a class="reference internal" href="rbf_evaluation.html">GPU accelerated evaluation of particle sums</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Sparse Linear Algebra</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="sparse_linalg_pde.html">The need for sparse linear algebra - A PDE example</a></li>
+<li class="toctree-l1"><a class="reference internal" href="sparse_data_structures.html">Sparse Matrix data structures</a></li>
+<li class="toctree-l1"><a class="reference internal" href="sparse_solvers_introduction.html">An introduction to sparse linear system solvers</a></li>
+<li class="toctree-l1"><a class="reference internal" href="it_solvers1.html">Iterative Solvers 1 - Krylov subspaces, Arnoldi Iteration and the Full Orthogonalisation Method</a></li>
+<li class="toctree-l1"><a class="reference internal" href="it_solvers2.html">Iterative Solvers 2 - From FOM to GMRES</a></li>
+<li class="toctree-l1"><a class="reference internal" href="it_solvers3.html">Iterative Solvers 3 - The Conjugate Gradient Method</a></li>
+<li class="toctree-l1"><a class="reference internal" href="it_solvers4.html">Iterative Solvers 4 - Preconditioning</a></li>
+<li class="toctree-l1"><a class="reference internal" href="sparse_direct_solvers.html">Sparse Direct Solvers</a></li>
+<li class="toctree-l1"><a class="reference internal" href="petsc_for_sparse_systems.html">Using petsc4py for sparse linear systems</a></li>
+<li class="toctree-l1"><a class="reference internal" href="multigrid.html">Multigrid Methods</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Time-Dependent Problems</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="simple_time_stepping.html">Simple time-stepping</a></li>
+<li class="toctree-l1"><a class="reference internal" href="wave_equation.html">Discretising the wave equation</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Conclusions</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="further_topics.html">Further topics</a></li>
+</ul>
+<p aria-level="2" class="caption" role="heading"><span class="caption-text">Additional notes from 2022</span></p>
+<ul class="nav bd-sidenav">
+<li class="toctree-l1"><a class="reference internal" href="2022_matrices_and_simultaneous_equations.html">Matrices and simultaneous equations</a></li>
+<li class="toctree-l1"><a class="reference internal" href="2022_classes.html">Python classes</a></li>
+</ul>
+
+    </div>
+</nav></div>
+    </div>
+  
+  
+  <div class="sidebar-primary-items__end sidebar-primary__section">
+  </div>
+  
+  <div id="rtd-footer-container"></div>
+
+
+      </div>
+      
+      <main id="main-content" class="bd-main">
+        
+        
+
+<div class="sbt-scroll-pixel-helper"></div>
+
+          <div class="bd-content">
+            <div class="bd-article-container">
+              
+              <div class="bd-header-article">
+<div class="header-article-items header-article__inner">
+  
+    <div class="header-article-items__start">
+      
+        <div class="header-article-item"><label class="sidebar-toggle primary-toggle btn btn-sm" for="__primary" title="Toggle primary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+  <span class="fa-solid fa-bars"></span>
+</label></div>
+      
+    </div>
+  
+  
+    <div class="header-article-items__end">
+      
+        <div class="header-article-item">
+
+<div class="article-header-buttons">
+
+
+
+
+
+<div class="dropdown dropdown-download-buttons">
+  <button class="btn dropdown-toggle" type="button" data-bs-toggle="dropdown" aria-expanded="false" aria-label="Download this page">
+    <i class="fas fa-download"></i>
+  </button>
+  <ul class="dropdown-menu">
+      
+      
+      
+      <li><a href="_sources/2023-assignment_1.md" target="_blank"
+   class="btn btn-sm btn-download-source-button dropdown-item"
+   title="Download source file"
+   data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file"></i>
+  </span>
+<span class="btn__text-container">.md</span>
+</a>
+</li>
+      
+      
+      
+      
+      <li>
+<button onclick="window.print()"
+  class="btn btn-sm btn-download-pdf-button dropdown-item"
+  title="Print to PDF"
+  data-bs-placement="left" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-file-pdf"></i>
+  </span>
+<span class="btn__text-container">.pdf</span>
+</button>
+</li>
+      
+  </ul>
+</div>
+
+
+
+
+<button onclick="toggleFullScreen()"
+  class="btn btn-sm btn-fullscreen-button"
+  title="Fullscreen mode"
+  data-bs-placement="bottom" data-bs-toggle="tooltip"
+>
+  
+
+<span class="btn__icon-container">
+  <i class="fas fa-expand"></i>
+  </span>
+
+</button>
+
+
+<script>
+document.write(`
+  <button class="btn btn-sm navbar-btn theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <span class="theme-switch nav-link" data-mode="light"><i class="fa-solid fa-sun fa-lg"></i></span>
+    <span class="theme-switch nav-link" data-mode="dark"><i class="fa-solid fa-moon fa-lg"></i></span>
+    <span class="theme-switch nav-link" data-mode="auto"><i class="fa-solid fa-circle-half-stroke fa-lg"></i></span>
+  </button>
+`);
+</script>
+
+<script>
+document.write(`
+  <button class="btn btn-sm navbar-btn search-button search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
+  </button>
+`);
+</script>
+<label class="sidebar-toggle secondary-toggle btn btn-sm" for="__secondary"title="Toggle secondary sidebar" data-bs-placement="bottom" data-bs-toggle="tooltip">
+    <span class="fa-solid fa-list"></span>
+</label>
+</div></div>
+      
+    </div>
+  
+</div>
+</div>
+              
+              
+
+<div id="jb-print-docs-body" class="onlyprint">
+    <h1>Assignment 1 - Matrix-matrix multiplication</h1>
+    <!-- Table of contents -->
+    <div id="print-main-content">
+        <div id="jb-print-toc">
+            
+            <div>
+                <h2> Contents </h2>
+            </div>
+            <nav aria-label="Page">
+                <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#the-assignment">The assignment</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#part-1-a-better-function">Part 1: a better function</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#part-2-speeding-it-up-with-numba">Part 2: speeding it up with Numba</a></li>
+</ul>
+</li>
+</ul>
+            </nav>
+        </div>
+    </div>
+</div>
+
+              
+                
+<div id="searchbox"></div>
+                <article class="bd-article" role="main">
+                  
+  <section class="tex2jax_ignore mathjax_ignore" id="assignment-1-matrix-matrix-multiplication">
+<h1>Assignment 1 - Matrix-matrix multiplication<a class="headerlink" href="#assignment-1-matrix-matrix-multiplication" title="Permalink to this heading">#</a></h1>
+<p>This assignment makes up 20% of the overall marks for the course. The deadline for submitting this assignment is <strong>5pm on Thursday 19 October 2022</strong>.</p>
+<p>Coursework is to be submitted using the link on Moodle. You should submit a single pdf file containing your code, the output when you run your code, and your answers
+to any text questions included in the assessment. The easiest ways to create this file are:</p>
+<ul class="simple">
+<li><p>Write your code and answers in a Jupyter notebook, then select File -&gt; Download as -&gt; PDF via LaTeX (.pdf).</p></li>
+<li><p>Write your code and answers on Google Colab, then select File -&gt; Print, and print it as a pdf.</p></li>
+</ul>
+<p>Tasks you are required to carry out and questions you are required to answer are shown in bold below.</p>
+<section id="the-assignment">
+<h2>The assignment<a class="headerlink" href="#the-assignment" title="Permalink to this heading">#</a></h2>
+<p>In this assignment, we will look at computing the product <span class="math notranslate nohighlight">\(AB\)</span> of two matrices <span class="math notranslate nohighlight">\(A,B\in\mathbb{R}^{n\times n}\)</span>. The following snippet of code defines a function that computes the
+product of two matrices. As an example, the product of two 10 by 10 matrices is printed. The final line prints <code class="docutils literal notranslate"><span class="pre">matrix1</span> <span class="pre">&#64;</span> <span class="pre">matrix2</span></code> - the <code class="docutils literal notranslate"><span class="pre">&#64;</span></code> symbol denotes matrix multiplication, and
+Python will get Numpy to compute the product of two matrices. By looking at the output, it’s possible to check that the two results are the same.</p>
+<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
+
+
+<span class="k">def</span> <span class="nf">slow_matrix_product</span><span class="p">(</span><span class="n">mat1</span><span class="p">,</span> <span class="n">mat2</span><span class="p">):</span>
+<span class="w">    </span><span class="sd">&quot;&quot;&quot;Multiply two matrices.&quot;&quot;&quot;</span>
+    <span class="k">assert</span> <span class="n">mat1</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> <span class="o">==</span> <span class="n">mat2</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
+    <span class="n">result</span> <span class="o">=</span> <span class="p">[]</span>
+    <span class="k">for</span> <span class="n">c</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">mat2</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">]):</span>
+        <span class="n">column</span> <span class="o">=</span> <span class="p">[]</span>
+        <span class="k">for</span> <span class="n">r</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">mat1</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">0</span><span class="p">]):</span>
+            <span class="n">value</span> <span class="o">=</span> <span class="mi">0</span>
+            <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="n">mat1</span><span class="o">.</span><span class="n">shape</span><span class="p">[</span><span class="mi">1</span><span class="p">]):</span>
+                <span class="n">value</span> <span class="o">+=</span> <span class="n">mat1</span><span class="p">[</span><span class="n">r</span><span class="p">,</span> <span class="n">i</span><span class="p">]</span> <span class="o">*</span> <span class="n">mat2</span><span class="p">[</span><span class="n">i</span><span class="p">,</span> <span class="n">c</span><span class="p">]</span>
+            <span class="n">column</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">value</span><span class="p">)</span>
+        <span class="n">result</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">column</span><span class="p">)</span>
+    <span class="k">return</span> <span class="n">np</span><span class="o">.</span><span class="n">array</span><span class="p">(</span><span class="n">result</span><span class="p">)</span><span class="o">.</span><span class="n">transpose</span><span class="p">()</span>
+
+
+<span class="n">matrix1</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">rand</span><span class="p">(</span><span class="mi">10</span><span class="p">,</span> <span class="mi">10</span><span class="p">)</span>
+<span class="n">matrix2</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">rand</span><span class="p">(</span><span class="mi">10</span><span class="p">,</span> <span class="mi">10</span><span class="p">)</span>
+
+<span class="nb">print</span><span class="p">(</span><span class="n">slow_matrix_product</span><span class="p">(</span><span class="n">matrix1</span><span class="p">,</span> <span class="n">matrix2</span><span class="p">))</span>
+<span class="nb">print</span><span class="p">(</span><span class="n">matrix1</span> <span class="o">@</span> <span class="n">matrix2</span><span class="p">)</span>
+</pre></div>
+</div>
+<p>The function in this snippet isn’t very good.</p>
+<section id="part-1-a-better-function">
+<h3>Part 1: a better function<a class="headerlink" href="#part-1-a-better-function" title="Permalink to this heading">#</a></h3>
+<p><strong>Write your own function called <code class="docutils literal notranslate"><span class="pre">faster_matrix_product</span></code> that computes the product of two matrices more efficiently than <code class="docutils literal notranslate"><span class="pre">slow_matrix_product</span></code>.</strong>
+Your function may use functions from Numpy (eg <code class="docutils literal notranslate"><span class="pre">np.dot</span></code>) to complete part of its calculation, but your function should not use <code class="docutils literal notranslate"><span class="pre">np.dot</span></code> or <code class="docutils literal notranslate"><span class="pre">&#64;</span></code> to compute
+the full matrix-matrix product.</p>
+<p>Before you look at the performance of your function, you should check that it is computing the correct results. <strong>Write a Python script using an <code class="docutils literal notranslate"><span class="pre">assert</span></code>
+statement that checks that your function gives the same result as using <code class="docutils literal notranslate"><span class="pre">&#64;</span></code> for random 2 by 2, 3 by 3, 4 by 4, and 5 by 5 matrices.</strong></p>
+<p>In a text box, <strong>give two brief reasons (1-2 sentences for each) why your function is better than <code class="docutils literal notranslate"><span class="pre">slow_matrix_product</span></code>.</strong> At least one of your
+reasons should be related to the time you expect the two functions to take.</p>
+<p>Next, we want to compare the speed of <code class="docutils literal notranslate"><span class="pre">slow_matrix_product</span></code> and <code class="docutils literal notranslate"><span class="pre">faster_matrix_product</span></code>. <strong>Write a Python script that runs the two functions for matrices of a range of sizes,
+and use <code class="docutils literal notranslate"><span class="pre">matplotlib</span></code> to create a plot showing the time taken for different sized matrices for both functions.</strong> You should be able to run the functions for matrices
+of size up to around 1000 by 1000 (but if you’re using an older/slower computer, you may decide to decrease the maximums slightly). You do not need to run your functions for
+every size between your minimum and maximum, but should choose a set of 10-15 values that will give you an informative plot.</p>
+</section>
+<section id="part-2-speeding-it-up-with-numba">
+<h3>Part 2: speeding it up with Numba<a class="headerlink" href="#part-2-speeding-it-up-with-numba" title="Permalink to this heading">#</a></h3>
+<p>In the second part of this assignment, you’re going to use Numba to speed up your function.</p>
+<p><strong>Create a copy of your function <code class="docutils literal notranslate"><span class="pre">faster_matrix_product</span></code> that is just-in-time (JIT) compiled using Numba.</strong> To demonstrate the speed improvement acheived by using Numba,
+<strong>make a plot (similar to that you made in the first part) that shows the times taken to multiply matrices using <code class="docutils literal notranslate"><span class="pre">faster_matrix_product</span></code>, <code class="docutils literal notranslate"><span class="pre">faster_matrix_product</span></code> with
+Numba JIT compilation, and Numpy (<code class="docutils literal notranslate"><span class="pre">&#64;</span></code>).</strong> Numpy’s matrix-matrix multiplication is highly optimised, so you should not expect to be as fast is it.</p>
+<p>You may be able to achieve further speed up of your function by adjusting the memory layout used. The function <code class="docutils literal notranslate"><span class="pre">np.asfortanarray</span></code> will make a copy of an array that uses
+Fortran-style ordering, for example:</p>
+<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span> <span class="nn">numpy</span> <span class="k">as</span> <span class="nn">np</span>
+
+<span class="n">a</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">random</span><span class="o">.</span><span class="n">rand</span><span class="p">(</span><span class="mi">10</span><span class="p">,</span> <span class="mi">10</span><span class="p">)</span>
+<span class="n">fortran_a</span> <span class="o">=</span> <span class="n">np</span><span class="o">.</span><span class="n">asfortranarray</span><span class="p">(</span><span class="n">a</span><span class="p">)</span>
+</pre></div>
+</div>
+<p><strong>Make a plot that compares the times taken by your JIT compiled function when the inputs have different combinations of C-style and Fortran-style ordering</strong>
+(ie the plot should have lines for when both inputs are C-style, when the first is C-style and the second is Fortran-style, and so on). Focusing on the fact
+that it is more efficient to access memory that is close to previous accesses, <strong>comment (in 1-2 sentences) on why one of these orderings appears to be fastest that the others</strong>.
+(Numba can do a lot of different things when compiling code, so depending on your function there may or may not be a large difference: if there is little change in speeds
+for your function, you can comment on which ordering you might expect to be faster and why, but conclude that Numba is doing something more advanced.)</p>
+</section>
+</section>
+</section>
+
+    <script type="text/x-thebe-config">
+    {
+        requestKernel: true,
+        binderOptions: {
+            repo: "binder-examples/jupyter-stacks-datascience",
+            ref: "master",
+        },
+        codeMirrorConfig: {
+            theme: "abcdef",
+            mode: "python"
+        },
+        kernelOptions: {
+            name: "python3",
+            path: "./."
+        },
+        predefinedOutput: true
+    }
+    </script>
+    <script>kernelName = 'python3'</script>
+
+                </article>
+              
+
+              
+              
+              
+              
+                <footer class="prev-next-footer">
+                  <!-- Previous / next buttons -->
+<div class="prev-next-area">
+</div>
+                </footer>
+              
+            </div>
+            
+            
+              
+                <div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
+
+  <div class="sidebar-secondary-item">
+  <div class="page-toc tocsection onthispage">
+    <i class="fa-solid fa-list"></i> Contents
+  </div>
+  <nav class="bd-toc-nav page-toc">
+    <ul class="visible nav section-nav flex-column">
+<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#the-assignment">The assignment</a><ul class="nav section-nav flex-column">
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#part-1-a-better-function">Part 1: a better function</a></li>
+<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#part-2-speeding-it-up-with-numba">Part 2: speeding it up with Numba</a></li>
+</ul>
+</li>
+</ul>
+  </nav></div>
+
+</div></div>
+              
+            
+          </div>
+          <footer class="bd-footer-content">
+            
+<div class="bd-footer-content__inner container">
+  
+  <div class="footer-item">
+    
+<p class="component-author">
+By Timo Betcke & Matthew Scroggs
+</p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+  <p class="copyright">
+    
+      © Copyright 2020-22.
+      <br/>
+    
+  </p>
+
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+  <div class="footer-item">
+    
+  </div>
+  
+</div>
+          </footer>
+        
+
+      </main>
+    </div>
+  </div>
+  
+  <!-- Scripts loaded after <body> so the DOM is not blocked -->
+  <script src="_static/scripts/bootstrap.js?digest=ac02cc09edc035673794"></script>
+<script src="_static/scripts/pydata-sphinx-theme.js?digest=ac02cc09edc035673794"></script>
+
+  <footer class="bd-footer">
+  </footer>
+  </body>
+</html>
\ No newline at end of file
diff --git a/_sources/2023-assignment_1.md b/_sources/2023-assignment_1.md
new file mode 100644
index 0000000..acc7d68
--- /dev/null
+++ b/_sources/2023-assignment_1.md
@@ -0,0 +1,85 @@
+# Assignment 1 - Matrix-matrix multiplication
+
+This assignment makes up 20% of the overall marks for the course. The deadline for submitting this assignment is **5pm on Thursday 19 October 2022**.
+
+Coursework is to be submitted using the link on Moodle. You should submit a single pdf file containing your code, the output when you run your code, and your answers
+to any text questions included in the assessment. The easiest ways to create this file are:
+
+- Write your code and answers in a Jupyter notebook, then select File -> Download as -> PDF via LaTeX (.pdf).
+- Write your code and answers on Google Colab, then select File -> Print, and print it as a pdf.
+
+Tasks you are required to carry out and questions you are required to answer are shown in bold below.
+
+## The assignment
+
+In this assignment, we will look at computing the product $AB$ of two matrices $A,B\in\mathbb{R}^{n\times n}$. The following snippet of code defines a function that computes the
+product of two matrices. As an example, the product of two 10 by 10 matrices is printed. The final line prints `matrix1 @ matrix2` - the `@` symbol denotes matrix multiplication, and
+Python will get Numpy to compute the product of two matrices. By looking at the output, it's possible to check that the two results are the same.
+
+```python
+import numpy as np
+
+
+def slow_matrix_product(mat1, mat2):
+    """Multiply two matrices."""
+    assert mat1.shape[1] == mat2.shape[0]
+    result = []
+    for c in range(mat2.shape[1]):
+        column = []
+        for r in range(mat1.shape[0]):
+            value = 0
+            for i in range(mat1.shape[1]):
+                value += mat1[r, i] * mat2[i, c]
+            column.append(value)
+        result.append(column)
+    return np.array(result).transpose()
+
+
+matrix1 = np.random.rand(10, 10)
+matrix2 = np.random.rand(10, 10)
+
+print(slow_matrix_product(matrix1, matrix2))
+print(matrix1 @ matrix2)
+```
+
+The function in this snippet isn't very good.
+
+### Part 1: a better function
+**Write your own function called `faster_matrix_product` that computes the product of two matrices more efficiently than `slow_matrix_product`.**
+Your function may use functions from Numpy (eg `np.dot`) to complete part of its calculation, but your function should not use `np.dot` or `@` to compute
+the full matrix-matrix product.
+
+Before you look at the performance of your function, you should check that it is computing the correct results. **Write a Python script using an `assert`
+statement that checks that your function gives the same result as using `@` for random 2 by 2, 3 by 3, 4 by 4, and 5 by 5 matrices.**
+
+In a text box, **give two brief reasons (1-2 sentences for each) why your function is better than `slow_matrix_product`.** At least one of your
+reasons should be related to the time you expect the two functions to take.
+
+Next, we want to compare the speed of `slow_matrix_product` and `faster_matrix_product`. **Write a Python script that runs the two functions for matrices of a range of sizes,
+and use `matplotlib` to create a plot showing the time taken for different sized matrices for both functions.** You should be able to run the functions for matrices
+of size up to around 1000 by 1000 (but if you're using an older/slower computer, you may decide to decrease the maximums slightly). You do not need to run your functions for
+every size between your minimum and maximum, but should choose a set of 10-15 values that will give you an informative plot.
+
+### Part 2: speeding it up with Numba
+In the second part of this assignment, you're going to use Numba to speed up your function.
+
+**Create a copy of your function `faster_matrix_product` that is just-in-time (JIT) compiled using Numba.** To demonstrate the speed improvement acheived by using Numba,
+**make a plot (similar to that you made in the first part) that shows the times taken to multiply matrices using `faster_matrix_product`, `faster_matrix_product` with
+Numba JIT compilation, and Numpy (`@`).** Numpy's matrix-matrix multiplication is highly optimised, so you should not expect to be as fast is it.
+
+You may be able to achieve further speed up of your function by adjusting the memory layout used. The function `np.asfortanarray` will make a copy of an array that uses
+Fortran-style ordering, for example:
+
+```python
+import numpy as np
+
+a = np.random.rand(10, 10)
+fortran_a = np.asfortranarray(a)
+```
+
+**Make a plot that compares the times taken by your JIT compiled function when the inputs have different combinations of C-style and Fortran-style ordering**
+(ie the plot should have lines for when both inputs are C-style, when the first is C-style and the second is Fortran-style, and so on). Focusing on the fact
+that it is more efficient to access memory that is close to previous accesses, **comment (in 1-2 sentences) on why one of these orderings appears to be fastest that the others**.
+(Numba can do a lot of different things when compiling code, so depending on your function there may or may not be a large difference: if there is little change in speeds
+for your function, you can comment on which ordering you might expect to be faster and why, but conclude that Numba is doing something more advanced.)
+
diff --git a/objects.inv b/objects.inv
index 6f2a3c8..3117106 100644
Binary files a/objects.inv and b/objects.inv differ
diff --git a/searchindex.js b/searchindex.js
index 290f909..dc36d96 100644
--- a/searchindex.js
+++ b/searchindex.js
@@ -1 +1 @@
-Search.setIndex({"docnames": ["2021-assignment_1", "2021-assignment_2", "2021-assignment_3", "2021-assignment_4", "2022-a4-A_and_b", "2022-assignment_1", "2022-assignment_2", "2022-assignment_3", "2022-assignment_4", "2022-class_1", "2022-class_2", "2022-class_3", "2022-class_4", "2022-class_5", "2022-class_6", "2022-class_7", "2022-lsa_1", "2022-lsa_3", "2022-lsa_4", "2022_classes", "2022_matrices_and_simultaneous_equations", "cuda_introduction", "further_topics", "gpu_introduction", "hpc_languages", "intro", "it_solvers1", "it_solvers2", "it_solvers3", "it_solvers4", "multigrid", "numba_cuda", "numexpr", "numpy_and_data_layouts", "parallel_principles", "pde_example", "petsc_for_sparse_systems", "python_hpc_tools", "rbf_evaluation", "simd", "simple_time_stepping", "sparse_data_structures", "sparse_direct_solvers", "sparse_linalg_pde", "sparse_solvers_introduction", "wave_equation", "what_is_hpc", "working_with_numba"], "filenames": ["2021-assignment_1.ipynb", "2021-assignment_2.md", "2021-assignment_3.md", "2021-assignment_4.md", "2022-a4-A_and_b.md", "2022-assignment_1.md", "2022-assignment_2.md", "2022-assignment_3.md", "2022-assignment_4.md", "2022-class_1.md", "2022-class_2.md", "2022-class_3.md", "2022-class_4.md", "2022-class_5.md", "2022-class_6.md", "2022-class_7.md", "2022-lsa_1.md", "2022-lsa_3.md", "2022-lsa_4.md", "2022_classes.md", "2022_matrices_and_simultaneous_equations.md", "cuda_introduction.md", "further_topics.ipynb", "gpu_introduction.md", "hpc_languages.md", "intro.md", "it_solvers1.ipynb", "it_solvers2.ipynb", "it_solvers3.ipynb", "it_solvers4.ipynb", "multigrid.ipynb", "numba_cuda.ipynb", "numexpr.ipynb", "numpy_and_data_layouts.ipynb", "parallel_principles.md", "pde_example.md", "petsc_for_sparse_systems.ipynb", "python_hpc_tools.md", "rbf_evaluation.ipynb", "simd.ipynb", "simple_time_stepping.ipynb", "sparse_data_structures.ipynb", "sparse_direct_solvers.ipynb", "sparse_linalg_pde.ipynb", "sparse_solvers_introduction.ipynb", "wave_equation.ipynb", "what_is_hpc.md", "working_with_numba.ipynb"], "titles": ["Assignment 1 - Matrix multiplication in Numba", "Assignment 2 - GPU Accelerated solution of Poisson problems", "Assignment 3 - Sparse matrix formats on GPUs", "Assignment 4 - Time-dependent problems", "Examples for assignment 4", "Assignment 1 - Matrix-matrix multiplication", "Assignment 2 - Solving two 1D problems", "Assignment 3 - Sparse matrices", "Assignment 4 - Solving a finite element system", "Class 1 (Monday 10 October)", "Class 2 (Monday 17 October)", "Class 3 (Monday 24 October)", "Class 4 (Monday 31 October)", "Class 5 (Monday 14 November)", "Class 6 (Monday 21 November)", "Class 7 (Monday 5 December)", "LSA Assignment 1 - Matrix-matrix multiplication", "LSA Assignment 3 - Sparse matrices", "LSA Assignment 4 - Solving a finite element system", "Python classes", "Matrices and simultaneous equations", "A tour of CUDA", "Further topics", "An Introduction to GPU Computing", "Languages for High-Performance Computing", "Welcome to Techniques of High-Performance Computing", "Iterative Solvers 1 - Krylov subspaces, Arnoldi Iteration and the Full Orthogonalisation Method", "Iterative Solvers 2 - From FOM to GMRES", "Iterative Solvers 3 - The Conjugate Gradient Method", "Iterative Solvers 4 - Preconditioning", "Multigrid Methods", "Numba Cuda in Practice", "A Numexpr example", "Memory layout and Numpy arrays", "Parallel Computing Principles in Python", "The need for sparse linear algebra - A PDE example", "Using petsc4py for sparse linear systems", "Python HPC Tools", "GPU accelerated evaluation of particle sums", "SIMD Autovectorization in Numba", "Simple time-stepping", "Sparse Matrix data structures", "Sparse Direct Solvers", "The need for sparse linear algebra - A PDE example", "An introduction to sparse linear system solvers", "Discretising the wave equation", "What is High-Performance Computing?", "Working with Numba"], "terms": {"note": [0, 1, 2, 3, 8, 11, 14, 28, 30, 33, 34, 39, 40, 46], "thi": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "from": [0, 1, 2, 3, 5, 6, 7, 8, 9, 11, 12, 13, 14, 16, 17, 18, 19, 21, 22, 23, 25, 26, 28, 29, 30, 31, 33, 34, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "2021": [0, 1, 2, 3], "22": [0, 1, 2, 3, 47], "academ": [0, 1, 2, 3, 24], "year": [0, 1, 2, 3, 22, 23, 24, 46], "you": [0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 25, 28, 34, 39, 42], "must": [0, 7, 17, 21, 39, 42], "do": [0, 1, 2, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 18, 22, 23, 26, 28, 30, 31, 33, 34, 36, 37, 38, 39, 40, 42], "includ": [0, 5, 6, 7, 8, 11, 14, 16, 17, 18, 19, 23, 24, 26, 29, 34, 37, 38, 42], "code": [0, 1, 2, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14, 15, 16, 17, 18, 20, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 36, 37, 38, 39, 41, 42, 43, 46, 47], "comment": [0, 5, 6, 7, 8, 16, 17, 18], "singl": [0, 2, 5, 6, 7, 8, 11, 16, 17, 18, 23, 30, 31, 33, 34, 37, 39, 40, 41, 43, 46], "jupyt": [0, 5, 6, 7, 8, 9, 16, 17, 18], "notebook": [0, 5, 6, 7, 8, 9, 16, 17, 18, 30, 36, 38, 39], "To": [0, 1, 3, 5, 6, 8, 9, 10, 11, 16, 18, 21, 23, 26, 28, 30, 31, 33, 36, 39, 40, 43, 46], "submit": [0, 5, 6, 7, 8, 16, 17, 18], "make": [0, 1, 5, 6, 7, 8, 9, 10, 12, 13, 14, 16, 17, 18, 21, 22, 23, 24, 26, 29, 33, 34, 35, 36, 37, 40, 42, 43, 44, 46, 47], "sure": [0, 1, 11, 23, 33], "run": [0, 1, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19, 21, 22, 23, 26, 28, 29, 31, 32, 34, 38, 39, 41, 46], "all": [0, 1, 2, 6, 8, 11, 12, 21, 22, 23, 24, 26, 27, 28, 30, 31, 33, 34, 36, 37, 38, 40, 41, 42, 43, 45, 46, 47], "show": [0, 2, 5, 7, 8, 9, 10, 11, 12, 13, 14, 16, 17, 18, 21, 28, 30, 33, 39, 41, 43, 45, 46, 47], "output": [0, 5, 6, 7, 8, 9, 16, 17, 18, 21, 22], "your": [0, 1, 2, 3, 5, 6, 7, 8, 9, 11, 12, 13, 15, 16, 17, 18, 19, 39], "printout": [0, 41], "pdf": [0, 5, 6, 7, 8, 16, 17, 18], "we": [0, 1, 2, 3, 4, 5, 6, 8, 10, 11, 12, 15, 16, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "consid": [0, 1, 3, 8, 13, 18, 21, 22, 23, 24, 27, 28, 29, 30, 33, 34, 35, 40, 41, 42, 43, 45, 46, 47], "problem": [0, 2, 7, 8, 9, 12, 17, 18, 20, 22, 26, 27, 29, 33, 34, 36, 39, 41, 42, 44, 45, 47], "evalu": [0, 8, 11, 18, 22, 32, 36, 41, 44, 45], "c": [0, 5, 9, 10, 11, 12, 14, 16, 21, 22, 23, 29, 33, 34, 38, 39, 44, 45, 47], "A": [0, 1, 3, 4, 5, 6, 7, 8, 9, 13, 14, 15, 16, 17, 18, 19, 20, 22, 23, 26, 30, 31, 33, 34, 36, 39, 40, 41, 42, 44], "time": [0, 1, 2, 5, 6, 7, 8, 10, 11, 12, 14, 15, 16, 17, 18, 21, 24, 26, 28, 29, 30, 31, 33, 34, 37, 38, 41, 42, 43, 45, 46, 47], "b": [0, 1, 4, 5, 8, 9, 12, 13, 14, 16, 18, 20, 21, 23, 26, 27, 28, 29, 30, 31, 32, 33, 34, 36, 39, 42, 45, 46, 47], "matric": [0, 2, 4, 5, 8, 9, 10, 13, 16, 22, 29, 30, 33, 35, 36, 37, 41, 43, 44], "mathbb": [0, 1, 5, 11, 16, 26, 27, 28, 38, 40, 42, 43], "r": [0, 1, 2, 5, 9, 10, 11, 16, 26, 27, 28, 30, 38, 39, 40, 42, 43, 45], "n": [0, 1, 4, 5, 6, 7, 9, 11, 12, 13, 14, 15, 16, 17, 20, 26, 27, 28, 29, 30, 33, 34, 36, 38, 39, 40, 41, 42, 43, 44, 45, 47], "simpl": [0, 1, 2, 22, 23, 24, 27, 28, 30, 31, 34, 35, 36, 37, 39, 41, 42, 43, 45, 47], "python": [0, 1, 5, 6, 7, 9, 10, 11, 16, 22, 25, 26, 33, 36, 39, 42, 43, 44, 47], "implement": [0, 1, 2, 3, 6, 8, 14, 18, 19, 21, 23, 25, 26, 27, 28, 29, 30, 33, 34, 39, 40, 41, 42, 43], "product": [0, 2, 5, 7, 9, 10, 16, 17, 21, 24, 26, 28, 31, 33, 44], "given": [0, 1, 2, 6, 8, 18, 21, 22, 23, 24, 28, 30, 33, 34, 37, 38, 40, 42, 44, 45, 46, 47], "below": [0, 4, 5, 6, 7, 8, 16, 17, 18, 21, 23, 28, 30, 33, 34, 39, 42, 45, 46], "through": [0, 1, 9, 11, 22, 23, 28, 30, 34, 37, 42, 44], "function": [0, 1, 6, 7, 8, 10, 11, 15, 18, 19, 22, 23, 26, 28, 29, 31, 33, 34, 36, 37, 38, 39, 40, 43, 45, 47], "matrix_product": 0, "At": [0, 2, 3, 5, 16, 21, 26, 38, 45], "end": [0, 1, 2, 4, 6, 7, 8, 9, 14, 15, 17, 18, 19, 20, 23, 26, 27, 28, 29, 30, 33, 40, 46, 47], "check": [0, 1, 5, 6, 7, 9, 16, 17, 23, 26, 29, 31, 33, 36, 39, 43], "against": [0, 1, 3, 6, 7, 12, 17, 26, 27, 28, 33, 38, 41], "numpi": [0, 2, 4, 5, 7, 10, 11, 12, 13, 14, 16, 17, 23, 24, 26, 27, 28, 29, 30, 31, 32, 34, 36, 38, 39, 41, 42, 43, 45, 47], "import": [0, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19, 21, 22, 23, 26, 27, 28, 29, 30, 31, 32, 33, 34, 36, 38, 39, 41, 42, 43, 45, 46, 47], "np": [0, 4, 5, 9, 10, 11, 12, 13, 14, 16, 23, 26, 27, 28, 29, 30, 31, 32, 33, 34, 36, 38, 39, 41, 42, 43, 45, 47], "def": [0, 5, 7, 9, 10, 11, 16, 17, 19, 21, 23, 26, 28, 29, 30, 31, 34, 38, 39, 41, 42, 43, 45, 47], "mat_a": 0, "mat_b": 0, "return": [0, 1, 5, 6, 8, 9, 10, 11, 15, 16, 18, 19, 21, 23, 26, 28, 29, 30, 31, 34, 36, 38, 39, 41, 42, 43, 45, 47], "m": [0, 8, 11, 12, 14, 26, 27, 28, 29, 38, 41, 42, 46, 47], "shape": [0, 2, 5, 7, 9, 10, 16, 17, 21, 23, 26, 28, 29, 31, 33, 36, 38, 39, 41, 42, 43, 47], "0": [0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 14, 15, 16, 17, 18, 20, 21, 23, 26, 27, 28, 29, 30, 31, 32, 33, 36, 38, 39, 40, 41, 42, 43, 45, 46, 47], "assert": [0, 5, 7, 10, 16, 19, 23], "ncol": [0, 27], "mat_c": 0, "zero": [0, 1, 7, 12, 17, 23, 26, 28, 29, 30, 33, 35, 38, 39, 41, 42, 43, 44, 45, 47], "dtype": [0, 7, 11, 17, 21, 23, 26, 28, 30, 31, 33, 34, 36, 38, 39, 41, 42, 43, 45, 47], "float64": [0, 26, 28, 30, 33, 34, 38, 39, 41, 42, 43, 45, 47], "row_ind": [0, 41, 42, 43], "rang": [0, 5, 6, 7, 8, 9, 11, 12, 13, 14, 15, 16, 17, 18, 21, 23, 26, 28, 29, 30, 34, 36, 38, 39, 41, 42, 43, 44, 45, 47], "col_ind": [0, 41, 42, 43], "k": [0, 1, 6, 8, 11, 14, 26, 27, 28, 29, 30, 38, 40, 47], "random": [0, 2, 5, 7, 9, 10, 11, 12, 13, 14, 15, 16, 17, 23, 26, 27, 28, 32, 33, 34, 38, 41, 42], "randn": [0, 2, 13, 14, 23, 26, 27, 28, 33, 34, 41, 42], "10": [0, 4, 5, 6, 11, 12, 15, 16, 22, 23, 26, 27, 28, 30, 31, 32, 33, 38, 39, 41, 42, 43, 45, 46], "c_actual": 0, "c_expect": 0, "error": [0, 1, 6, 8, 18, 22, 24, 28, 39, 40, 41, 42], "linalg": [0, 1, 2, 6, 7, 9, 11, 12, 13, 14, 17, 23, 26, 27, 28, 29, 30, 33, 41, 42, 43], "norm": [0, 11, 23, 26, 28, 29, 30, 33, 36, 41, 42], "print": [0, 5, 6, 7, 8, 9, 10, 12, 16, 17, 18, 19, 23, 26, 33, 36, 39, 41, 42, 43, 45, 47], "f": [0, 1, 6, 9, 14, 28, 29, 30, 36, 38, 40, 41, 42, 43, 45, 46], "The": [0, 1, 2, 3, 4, 9, 10, 11, 13, 14, 15, 19, 21, 23, 24, 25, 31, 33, 34, 36, 38, 39, 42, 44, 47], "0814245296430078e": 0, "16": [0, 4, 8, 11, 21, 23, 26, 31, 33, 34, 38, 39, 41, 42, 43, 46, 47], "one": [0, 1, 3, 4, 5, 7, 8, 16, 17, 18, 20, 21, 22, 23, 24, 26, 29, 30, 31, 33, 34, 37, 39, 41, 42, 43, 46], "most": [0, 8, 12, 17, 18, 21, 23, 24, 26, 28, 29, 31, 33, 34, 36, 39, 41, 42, 44, 46], "fundament": [0, 33], "oper": [0, 1, 7, 11, 17, 19, 21, 22, 23, 24, 26, 29, 30, 32, 33, 34, 36, 37, 39, 40, 41, 42, 43, 44, 46], "modern": [0, 2, 22, 23, 24, 25, 33, 34, 35, 39, 43, 46], "comput": [0, 1, 3, 5, 6, 7, 8, 9, 10, 11, 14, 15, 16, 17, 18, 21, 26, 28, 29, 30, 31, 33, 36, 37, 38, 40, 42, 43, 44, 45, 47], "algorithm": [0, 22, 23, 24, 26, 28, 29, 35, 37, 40, 43, 46], "eventu": [0, 36, 44], "us": [0, 1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 15, 16, 17, 18, 19, 21, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "lot": [0, 5, 9, 11, 19, 22, 24, 27, 36, 37, 41], "effort": [0, 32], "therefor": [0, 9, 19, 28, 30, 33, 36, 40, 42, 43, 46], "spent": 0, "optimis": [0, 1, 5, 16, 21, 23, 33, 34, 37, 42, 46], "vendor": [0, 23], "provid": [0, 2, 22, 23, 27, 32, 33, 34, 37, 40, 41, 44, 47], "hardwar": [0, 22, 34, 46], "bla": [0, 34], "basi": [0, 11, 26, 27, 28, 30, 36, 38], "linear": [0, 1, 8, 18, 25, 26, 29, 30, 33, 37, 40], "algebra": [0, 6, 8, 18, 25, 29, 33, 37, 44], "subroutin": [0, 33], "highli": [0, 5, 16, 23, 30, 33, 34, 35, 42, 43, 44, 46], "effici": [0, 5, 12, 16, 21, 22, 23, 24, 30, 32, 33, 34, 35, 37, 40, 42, 43, 44], "version": [0, 7, 8, 9, 10, 11, 17, 18, 23, 29, 34, 38, 39, 44], "altern": [0, 2, 7, 8, 9, 17, 24, 29, 30, 33], "open": [0, 23, 24, 34], "sourc": [0, 11, 23, 24, 36, 38, 44], "librari": [0, 6, 7, 8, 9, 18, 22, 23, 24, 25, 32, 33, 34, 36, 37, 40, 44, 47], "sucha": 0, "openbla": [0, 33], "wide": [0, 2, 7, 11, 22, 23, 24, 36, 37, 39, 41, 44, 47], "gener": [0, 1, 2, 11, 12, 19, 23, 26, 27, 29, 30, 31, 32, 34, 39, 41, 42, 43, 46, 47], "In": [0, 1, 4, 5, 6, 7, 8, 9, 11, 12, 15, 16, 17, 18, 19, 21, 22, 23, 24, 25, 26, 27, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "want": [0, 1, 3, 5, 6, 9, 10, 11, 16, 22, 23, 24, 26, 27, 28, 30, 33, 34, 35, 36, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "learn": [0, 2, 21, 24, 25, 34, 38, 46], "exampl": [0, 1, 5, 8, 9, 10, 11, 12, 16, 18, 19, 20, 23, 26, 29, 30, 31, 33, 34, 36, 38, 39, 40, 41, 42, 44, 45, 47], "about": [0, 1, 2, 6, 9, 13, 22, 27, 28, 30, 34, 36, 39, 42, 46], "possibl": [0, 1, 2, 5, 6, 16, 22, 24, 28, 29, 31, 33, 42, 46, 47], "speedup": 0, "offer": [0, 2, 44], "effect": [0, 26, 30, 45], "cach": [0, 31, 33, 42], "program": [0, 23, 24, 25, 31, 34], "benchmark": [0, 1, 23, 38], "abov": [0, 1, 6, 8, 9, 11, 19, 21, 26, 27, 30, 34, 39, 41, 42, 44, 45], "dot": [0, 1, 5, 9, 16, 21, 26, 28, 30, 33, 40, 43, 45, 46], "size": [0, 2, 5, 7, 8, 9, 11, 13, 14, 16, 17, 18, 21, 22, 26, 30, 34, 36, 38, 41, 43, 44], "up": [0, 3, 6, 7, 8, 17, 18, 21, 23, 24, 26, 29, 30, 34, 37, 38, 39, 41, 42, 44, 46, 47], "1000": [0, 2, 5, 6, 9, 16, 26, 28, 29, 32, 33, 36, 41, 43], "plot": [0, 1, 2, 3, 5, 6, 7, 8, 10, 12, 14, 15, 16, 17, 18, 26, 27, 28, 30, 33, 36, 37, 45], "result": [0, 2, 5, 9, 10, 16, 21, 26, 28, 34, 38, 39, 40, 42, 43, 47], "need": [0, 1, 2, 5, 6, 7, 8, 9, 10, 12, 15, 16, 17, 18, 19, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 34, 36, 37, 39, 40, 41, 42, 44, 45, 47], "everi": [0, 5, 16, 28], "dimens": [0, 21, 26, 30, 31, 36, 37, 41, 42, 43, 44], "figur": [0, 21, 26, 27, 28, 29, 30, 34, 37, 38, 42, 43, 45], "out": [0, 5, 6, 7, 8, 9, 16, 17, 18, 20, 22, 23, 26, 28, 30, 33, 39, 40, 41, 42, 43, 45, 47], "what": [0, 1, 2, 3, 6, 7, 9, 12, 13, 15, 17, 19, 20, 26, 30, 31, 33, 34, 39, 42, 43, 44, 45, 47], "so": [0, 1, 2, 4, 5, 6, 7, 8, 11, 13, 16, 22, 23, 27, 28, 29, 30, 34, 35, 36, 37, 39, 42, 43, 44, 46, 47], "can": [0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 17, 18, 19, 20, 21, 22, 23, 26, 27, 28, 29, 30, 31, 32, 33, 34, 36, 37, 39, 40, 41, 42, 43, 44, 45, 46, 47], "repres": [0, 6, 7, 17, 20, 21, 23, 26, 33, 34, 39, 44, 46], "without": [0, 6, 9, 10, 21, 27, 28, 29, 39, 42], "spend": 0, "too": [0, 6, 14, 15, 37, 42, 46], "much": [0, 8, 9, 12, 18, 22, 23, 28, 29, 31, 37, 40, 44], "wait": [0, 11, 21, 34], "finish": [0, 21, 34], "perform": [0, 2, 5, 7, 8, 11, 13, 16, 17, 18, 22, 23, 26, 28, 29, 30, 31, 32, 33, 34, 37, 38, 39, 41, 42, 43, 44, 47], "timeit": [0, 9, 32, 38, 39, 41], "magic": [0, 9], "command": [0, 1, 2, 9, 31, 34, 36, 42, 43, 47], "an": [0, 1, 2, 5, 6, 7, 8, 9, 10, 11, 13, 15, 16, 17, 18, 19, 22, 25, 26, 28, 29, 30, 31, 33, 36, 37, 39, 40, 41, 42, 43, 46, 47], "timeit_result": 0, "o": [0, 8, 18, 30, 33, 34, 38, 40, 42, 43, 44], "best": [0, 8, 9, 11, 18, 24, 36, 39], "now": [0, 1, 6, 9, 23, 26, 27, 28, 29, 30, 31, 33, 34, 36, 38, 39, 40, 41, 42, 43, 45, 46, 47], "jit": [0, 5, 16, 21, 23, 31, 38, 39, 41, 47], "compil": [0, 5, 10, 16, 21, 23, 24, 31, 34, 37, 39, 47], "also": [0, 1, 2, 3, 9, 12, 21, 23, 24, 26, 28, 29, 31, 33, 34, 36, 39, 41, 44, 45, 46, 47], "scope": [0, 9], "parallelis": [0, 6, 10, 22, 34, 37, 41], "parallel": [0, 8, 11, 18, 21, 23, 25, 31, 36, 37, 41, 44, 46, 47], "outer": [0, 40, 42], "loop": [0, 10, 11, 32, 37, 38, 39, 41, 47], "serial": [0, 23, 44], "expect": [0, 2, 3, 5, 6, 8, 9, 10, 16, 18, 19, 28], "system": [0, 1, 9, 20, 22, 23, 25, 26, 29, 30, 31, 33, 34, 37, 39, 40, 43, 46], "observ": [0, 1, 9, 13, 26], "let": [0, 1, 7, 8, 10, 17, 26, 27, 28, 29, 30, 31, 32, 34, 36, 38, 39, 40, 41, 42, 43, 45], "improv": [0, 2, 5, 8, 16, 18, 22, 23, 28, 29, 30, 32, 39, 41, 42, 47], "notic": [0, 1, 9, 12, 13, 15, 39], "travers": 0, "column": [0, 5, 16, 26, 33, 38, 41, 42], "howev": [0, 2, 8, 18, 22, 23, 24, 26, 29, 30, 33, 34, 40, 41, 42, 44, 45, 46, 47], "default": [0, 13, 24, 31, 33, 36, 39, 41, 42], "storag": [0, 6, 35, 36, 41, 43], "order": [0, 1, 2, 5, 8, 11, 16, 18, 22, 23, 29, 30, 31, 33, 34, 39, 40, 41, 42, 43, 45], "row": [0, 2, 6, 9, 11, 12, 15, 20, 33, 36, 38, 42, 43], "base": [0, 2, 8, 13, 18, 22, 23, 31, 33, 43, 44, 46], "henc": [0, 21, 26, 27, 28, 29, 30, 33, 40, 42, 43, 45, 46], "express": [0, 27, 28, 32], "jump": [0, 39], "memori": [0, 1, 5, 6, 12, 16, 23, 24, 27, 28, 34, 36, 38, 42, 47], "unit": [0, 1, 8, 23, 26, 27, 34, 38, 46], "move": [0, 23, 34, 44, 46, 47], "again": [0, 6, 23, 26, 27, 28, 30, 39, 40, 44], "But": [0, 22, 23, 24, 26, 28, 29, 31, 34, 36, 40, 41, 42, 44, 45], "choos": [0, 5, 6, 16, 27, 28, 29, 30, 36, 38, 40, 45], "store": [0, 7, 8, 12, 15, 17, 18, 19, 21, 26, 28, 33, 35, 36, 41, 43, 47], "major": [0, 23, 33, 41], "chang": [0, 1, 5, 11, 23, 39, 40, 42], "arrai": [0, 4, 5, 8, 9, 10, 11, 16, 21, 24, 31, 32, 34, 36, 37, 38, 39, 41, 43], "asfortranarrai": [0, 5], "still": [0, 6, 23, 24, 26, 29, 42, 44], "try": [0, 1, 2, 3, 10, 26, 29, 30, 33, 36, 42], "frequent": [0, 24, 27, 28, 38, 41, 42, 44], "techniqu": [0, 22, 26, 29, 34, 44, 46], "block": [0, 1, 7, 11, 17, 21, 23, 31, 34, 38, 42], "inner": [0, 26, 28, 47], "instead": [0, 10, 19, 23, 26, 28, 29, 30, 34, 41, 42], "updat": [0, 27, 30, 42, 46], "element": [0, 2, 21, 26, 30, 32, 33, 34, 35, 36, 37, 39, 41, 42, 43, 44], "ell": 0, "submatrix": 0, "becom": [0, 1, 10, 11, 22, 24, 27, 29, 30, 33, 39, 41, 42, 43, 45, 46], "itself": [0, 19, 24, 30, 37], "two": [0, 1, 2, 5, 7, 8, 9, 10, 11, 16, 17, 18, 19, 21, 23, 30, 31, 33, 36, 37, 38, 39, 40, 41, 42, 44, 45, 46], "submatric": 0, "iter": [0, 1, 6, 8, 13, 14, 18, 22, 23, 25, 34, 36, 42, 47], "forward": [0, 22, 23, 42], "term": [0, 21, 23, 28, 41, 46], "scheme": [0, 1, 3, 6, 40], "For": [0, 1, 2, 4, 6, 7, 8, 9, 11, 12, 15, 17, 18, 19, 20, 21, 22, 23, 27, 28, 29, 30, 31, 33, 34, 36, 39, 40, 41, 42, 44, 45, 46], "innermost": 0, "standard": [0, 2, 11, 21, 22, 24, 26, 31, 33, 34, 37, 40, 43, 44, 46], "tripl": [0, 47], "investig": [0, 1, 12, 30], "how": [0, 1, 2, 3, 6, 7, 8, 9, 10, 12, 13, 17, 18, 19, 20, 21, 24, 26, 28, 30, 33, 34, 36, 41, 42, 43, 45], "depend": [0, 5, 6, 23, 26, 30, 38, 42, 44], "paramet": [0, 1, 13, 23, 27, 30, 33, 34, 36, 38], "compar": [0, 1, 5, 6, 9, 10, 13, 16, 21, 23, 32, 33, 41], "previou": [0, 5, 9, 13, 16, 26, 27, 28, 40], "simplic": [0, 26, 30, 40, 45], "mai": [0, 1, 2, 5, 6, 7, 8, 9, 10, 11, 14, 16, 18, 21, 23, 24, 42], "ar": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46], "deal": [0, 23, 34, 35, 39, 43], "remaind": 0, "part": [0, 2, 9, 11, 23, 26, 28, 37, 40, 44, 47], "divis": [0, 47], "while": [0, 10, 21, 22, 23, 24, 34, 36, 37, 42, 46], "practic": [0, 9, 10, 11, 12, 13, 14, 15, 19, 22, 27, 28, 29, 30, 33, 40, 42, 45], "immedi": [0, 26, 30, 42], "clear": [0, 42], "here": [0, 7, 8, 9, 11, 12, 17, 23, 26, 28, 29, 30, 31, 33, 37, 38, 42, 44, 45, 46], "advantag": [0, 25, 28, 34, 40, 41], "There": [0, 9, 11, 19, 23, 24, 26, 29, 30, 31, 33, 34, 36, 37, 40, 41, 44], "go": [0, 1, 2, 5, 7, 8, 9, 11, 15, 16, 18, 24, 26, 28, 29, 30, 31, 37, 39, 45, 46], "between": [0, 5, 9, 11, 12, 15, 16, 22, 23, 26, 31, 33, 34, 39, 40, 42, 46], "write": [0, 2, 5, 6, 7, 8, 9, 11, 15, 16, 17, 18, 21, 23, 28, 30, 34, 37, 38, 39], "actual": [0, 3, 6, 33, 34], "produc": [0, 10], "machin": [0, 2, 9, 24, 33, 34, 37, 38, 46, 47], "real": [0, 11, 13, 23, 26, 27, 28, 47], "written": [0, 6, 7, 9, 20, 26, 33, 34, 40, 44], "lower": [0, 13, 26, 27, 28, 29, 30, 38, 39, 42], "level": [0, 21, 22, 23, 24, 30, 33, 34, 37, 39, 44, 47], "languag": [0, 25, 34, 37, 39, 47], "optim": [0, 33, 34, 37, 39, 41, 44, 46, 47], "closer": [0, 40], "task": [0, 1, 2, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 31, 34, 38], "experi": [0, 1, 8, 14, 18], "see": [0, 6, 10, 11, 19, 20, 21, 26, 27, 29, 30, 31, 34, 36, 39, 41, 42, 43, 45], "approach": [0, 22, 28, 29, 36, 39], "ha": [0, 2, 7, 8, 9, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 33, 34, 36, 37, 39, 40, 41, 42, 43, 44, 46, 47], "within": [0, 21, 31, 34, 41], "wai": [0, 5, 6, 7, 8, 9, 16, 17, 18, 19, 23, 26, 29, 31, 33, 34, 39, 42, 44], "simd": [0, 21, 23, 33], "demonstr": [0, 2, 3, 5, 16, 21, 22, 33, 34, 35, 36, 41, 42, 43], "form": [1, 7, 9, 17, 21, 22, 26, 28, 29, 30, 31, 45], "delta": [1, 3, 8, 30, 40, 43, 45], "u": [1, 3, 6, 8, 15, 30, 40, 42, 43, 45], "x": [1, 2, 6, 8, 9, 11, 13, 14, 21, 23, 26, 27, 28, 29, 30, 31, 33, 36, 38, 39, 40, 41, 42, 43, 45, 46, 47], "y": [1, 2, 8, 9, 11, 21, 23, 26, 27, 29, 31, 33, 36, 38, 39, 41, 42, 43, 45, 47], "u_": [1, 6, 8, 20, 30, 40, 43, 45], "xx": [1, 45], "yy": 1, "omega": [1, 8, 30, 43], "subset": 1, "boundari": [1, 8, 20, 21, 43, 45], "condit": [1, 8, 22, 26, 27, 28, 29, 30, 34, 42, 43, 45], "g": [1, 8, 9, 11, 14, 23, 26, 27, 28, 29, 30, 38, 42], "gamma": [1, 30], "partial": [1, 8, 21, 22, 25, 40, 42, 43, 44, 45], "our": [1, 8, 19, 21, 22, 26, 27, 28, 29, 30, 33, 36, 37, 39, 40, 41, 44, 45, 46], "domain": [1, 8, 21, 40, 45], "squar": [1, 3, 8, 27, 33, 39, 42], "1": [1, 2, 3, 4, 10, 11, 13, 14, 15, 19, 20, 21, 23, 27, 28, 29, 30, 31, 32, 33, 36, 38, 39, 40, 41, 42, 43, 45, 46, 47], "numer": [1, 8, 19, 22, 24, 26, 29, 33, 37, 42, 46], "solv": [1, 2, 7, 9, 12, 13, 14, 17, 22, 26, 27, 29, 30, 33, 40, 44, 45], "defin": [1, 5, 7, 9, 14, 16, 17, 19, 21, 26, 27, 29, 30, 31, 32, 33, 34, 36, 37, 41, 43, 45, 46, 47], "grid": [1, 11, 21, 23, 30, 31, 38, 45], "point": [1, 3, 4, 6, 8, 9, 11, 20, 21, 22, 26, 28, 29, 30, 33, 38, 39, 42, 43, 45], "x_i": [1, 6, 23, 28, 30, 38, 43], "ih": [1, 43], "y_j": [1, 38, 43], "jh": 1, "i": [1, 5, 6, 8, 9, 11, 13, 14, 16, 18, 20, 21, 22, 23, 26, 28, 29, 30, 34, 39, 40, 41, 42, 43, 45, 47], "j": [1, 6, 8, 11, 18, 20, 21, 26, 28, 29, 30, 38, 41, 42, 43, 47], "h": [1, 6, 8, 20, 26, 28, 30, 40, 43], "approxim": [1, 6, 8, 22, 26, 27, 30, 34, 39, 42, 43, 44], "approx": [1, 6, 26, 27, 28, 29, 30, 40, 43, 46], "frac": [1, 6, 8, 20, 28, 29, 30, 38, 40, 42, 43, 45, 46], "4": [1, 5, 7, 9, 11, 17, 20, 23, 26, 28, 30, 31, 32, 33, 38, 39, 40, 41, 42, 43, 45, 46, 47], "x_": [1, 8, 28, 30, 43], "y_": 1, "If": [1, 8, 9, 11, 14, 18, 19, 20, 21, 23, 26, 28, 33, 34, 36, 38, 39, 41, 42, 43, 45, 47], "neighbor": 1, "simpli": [1, 27, 45], "correspond": [1, 20, 30, 34, 41, 42, 45], "valu": [1, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 22, 26, 27, 28, 29, 30, 33, 36, 39, 40, 43, 45], "data": [1, 6, 7, 8, 11, 12, 18, 21, 22, 23, 24, 28, 29, 31, 33, 34, 36, 37, 39, 42, 43, 44, 47], "sytem": 1, "equat": [1, 8, 22, 25, 26, 29, 30, 33, 36, 40, 44], "first": [1, 5, 6, 7, 8, 10, 11, 16, 19, 21, 22, 23, 24, 26, 27, 28, 31, 33, 34, 36, 38, 40, 41, 42, 43, 46, 47], "creat": [1, 5, 6, 7, 9, 11, 12, 13, 14, 16, 17, 19, 21, 28, 31, 33, 34, 39, 41, 47], "verifi": [1, 2], "refer": [1, 19, 28, 29, 33, 36, 41], "discretis": [1, 3, 22, 28, 36, 40, 41, 42, 43, 44], "take": [1, 2, 5, 6, 7, 8, 9, 10, 14, 15, 16, 17, 18, 20, 21, 25, 26, 28, 30, 34, 40, 43, 44], "callabl": 1, "spars": [1, 8, 13, 14, 15, 18, 25, 26, 27, 28, 30, 37], "csr": [1, 2, 12, 17, 28, 29, 36, 42, 43, 44], "matrix": [1, 4, 6, 9, 10, 13, 14, 20, 21, 26, 27, 28, 29, 30, 33, 35, 37, 42, 43, 44], "right": [1, 6, 7, 8, 11, 17, 26, 27, 28, 29, 30, 36, 39, 40, 42, 43, 45, 46, 47], "hand": [1, 26, 36, 40, 42, 43, 45], "side": [1, 3, 8, 22, 26, 30, 36, 40, 42, 43, 45], "method": [1, 2, 6, 7, 8, 12, 17, 18, 19, 23, 29, 37, 39, 41, 43, 44], "manufactur": [1, 23], "exact": [1, 6, 8, 28, 30], "e": [1, 11, 23, 26, 28, 30, 38, 40, 42, 45, 46, 47], "5": [1, 3, 4, 5, 9, 11, 16, 18, 23, 26, 27, 30, 31, 32, 33, 38, 39, 41, 42, 43, 45, 46, 47], "By": [1, 3, 5, 6, 8, 16, 18, 20, 31, 39], "known": [1, 6, 11, 21, 23, 27, 29, 40, 42, 44, 45, 47], "grow": [1, 2, 22, 26, 36, 40], "scipi": [1, 2, 6, 7, 8, 12, 13, 14, 17, 18, 26, 27, 28, 29, 36, 40, 41, 42, 43, 44], "spsolv": [1, 6, 12, 43], "maximum": [1, 5, 16], "rel": [1, 2, 21, 24, 26, 29, 33, 36, 42, 46], "increas": [1, 3, 26, 27, 39, 42], "e_": [1, 28, 30, 46], "should": [1, 2, 5, 6, 7, 8, 9, 11, 15, 16, 17, 18, 22, 24, 26, 29, 31, 33, 39, 42, 45], "doubl": [1, 11, 19, 23, 26, 33, 34, 39, 46], "logarithm": [1, 28], "loglog": 1, "matplotlib": [1, 5, 6, 8, 10, 11, 13, 16, 23, 26, 27, 28, 29, 30, 33, 36, 38, 42, 43, 45, 47], "As": [1, 5, 8, 16, 19, 21, 27, 29, 30, 34, 36, 39, 40, 41], "conjectur": 1, "rate": [1, 28, 30], "converg": [1, 3, 8, 26, 27, 28, 29, 30, 36, 40, 42], "With": [1, 6, 23, 36, 43], "have": [1, 2, 4, 5, 7, 8, 9, 11, 12, 13, 17, 18, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 33, 34, 35, 36, 38, 39, 40, 41, 42, 43, 44, 45, 47], "someth": [1, 5, 33, 39], "On": [1, 2, 6, 30, 39, 42, 43, 46, 47], "idea": [1, 9, 22, 26, 28, 30, 34, 40, 41, 42, 44, 47], "rewrit": [1, 6, 20], "discret": [1, 2, 23, 29, 41, 42, 43], "left": [1, 6, 7, 8, 11, 17, 26, 27, 28, 29, 30, 39, 40, 42, 43, 45, 46], "2f": 1, "just": [1, 5, 10, 16, 23, 26, 27, 28, 29, 30, 31, 33, 34, 35, 37, 41, 42, 43, 45, 46, 47], "averag": [1, 30], "neighbour": 1, "motiv": [1, 26, 28, 30, 41], "name": [1, 19, 26, 27, 28, 30, 36, 39, 41, 42, 45, 46], "other": [1, 2, 3, 5, 6, 7, 8, 17, 18, 19, 22, 23, 26, 28, 29, 30, 31, 33, 34, 36, 38, 39, 42, 43, 44, 45, 46], "word": [1, 7, 17, 43], "plu": [1, 28], "contribut": [1, 23, 45], "numba": [1, 2, 6, 11, 21, 38, 41], "cuda": [1, 2, 6, 11, 24], "few": [1, 2, 10, 21, 22, 46], "hint": [1, 9], "when": [1, 5, 6, 7, 8, 9, 13, 16, 17, 18, 19, 23, 24, 28, 34, 39, 47], "onli": [1, 4, 21, 23, 26, 27, 28, 30, 31, 33, 34, 36, 38, 39, 40, 41, 42, 44, 46, 47], "copi": [1, 5, 6, 7, 11, 12, 16, 31, 34, 42], "host": [1, 31], "initi": [1, 26, 27, 45], "after": [1, 8, 18, 22, 26, 31, 34], "each": [1, 5, 6, 7, 8, 9, 11, 14, 15, 16, 17, 18, 19, 20, 21, 23, 26, 27, 28, 29, 30, 32, 33, 34, 36, 37, 38, 39, 40, 41, 42, 43, 45, 47], "step": [1, 3, 22, 26, 27, 28, 29, 30, 36, 42], "global": [1, 21, 27, 29, 31, 34, 39, 47], "buffer": [1, 21, 23, 31, 33], "current": [1, 7, 11, 21, 22, 23, 27, 28, 31, 33, 34, 36, 42, 45, 46], "next": [1, 5, 11, 16, 26, 33, 34, 45], "kernel": [1, 11, 23, 38, 47], "execut": [1, 2, 21, 23, 31, 32, 33, 37, 39, 47], "multipl": [1, 21, 22, 26, 30, 31, 33, 34, 39, 41, 47], "repeatedli": 1, "call": [1, 5, 6, 8, 9, 10, 16, 19, 26, 28, 30, 31, 33, 34, 38, 39, 41, 42, 43, 44, 45, 47], "measur": [1, 6, 7, 8, 9, 10, 12, 15, 17, 18, 26, 29, 46, 47], "accur": 1, "decid": [1, 5, 16, 34], "same": [1, 5, 6, 7, 9, 11, 16, 17, 21, 26, 30, 34, 36, 39, 41, 42], "separ": [1, 9, 10, 27, 29, 34], "often": [1, 23, 26, 29, 36, 44, 45], "expens": [1, 24, 27, 33, 42], "select": [1, 5, 6, 7, 8, 9, 11, 16, 17, 18, 31, 44], "doe": [1, 3, 7, 10, 13, 17, 19, 21, 23, 24, 27, 28, 30, 31, 34, 36, 41, 42, 44, 45, 46, 47], "access": [1, 5, 16, 21, 33, 34, 36, 37], "read": [1, 19, 26, 33, 34], "preload": [1, 21], "local": [1, 21, 31, 33, 36, 38], "share": [1, 11, 21, 23, 34, 38, 47], "thread": [1, 11, 23, 31, 38, 41], "where": [1, 2, 6, 7, 8, 11, 15, 17, 18, 21, 23, 24, 26, 27, 28, 29, 30, 39, 40, 41, 42, 45, 47], "carefulli": [1, 6, 23], "describ": [1, 2, 6, 29, 40, 45], "explain": [1, 6, 8, 18], "scientif": [1, 24, 33, 37], "precis": [1, 2, 9, 11, 21, 23, 26, 29, 30, 33, 34, 39, 42, 46], "conclus": [1, 6], "design": [1, 9, 10, 11, 12, 13, 14, 15, 21, 22, 23, 24, 30, 31, 47], "interpret": [1, 10, 11, 34, 37], "signific": [1, 23, 29, 30], "compon": [1, 26, 28, 30, 33], "far": [2, 10, 33, 34, 42], "cpu": [2, 21, 23, 33, 34, 37, 39, 41, 46, 47], "some": [2, 6, 8, 9, 13, 22, 23, 24, 25, 26, 28, 29, 30, 31, 32, 33, 34, 36, 37, 39, 40, 42, 44, 45, 46], "sever": [2, 23, 30, 31, 34, 41, 44], "disadvantag": [2, 23, 36], "vector": [2, 4, 6, 7, 9, 10, 13, 14, 16, 17, 20, 21, 22, 26, 27, 28, 30, 33, 34, 36, 39, 40, 42, 43, 44], "extens": [2, 12, 23, 24, 31], "avx": [2, 34, 39], "etc": [2, 11, 22, 24, 33, 42], "paper": [2, 9, 28, 29, 33], "vazquez": 2, "et": [2, 45], "al": 2, "difficulti": [2, 42], "ellpack": 2, "shortcom": 2, "more": [2, 5, 7, 9, 10, 12, 16, 17, 19, 20, 21, 22, 23, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46], "devic": [2, 11, 23, 34, 46, 47], "summar": 2, "difficultu": 2, "veri": [2, 3, 5, 9, 16, 19, 21, 22, 23, 24, 25, 27, 28, 29, 30, 31, 32, 33, 34, 37, 39, 40, 41, 42, 44, 45, 46, 47], "high": [2, 3, 8, 18, 23, 27, 30, 37], "mark": [2, 3, 5, 6, 7, 8, 16, 17, 18], "look": [2, 5, 6, 7, 8, 10, 16, 17, 18, 20, 30, 36, 39, 40, 41], "matvec": [2, 7, 10, 17, 42], "develop": [2, 21, 22, 23, 24, 26, 34, 37, 42, 44, 45, 46], "happen": [2, 9, 13, 19, 33, 45], "sinc": [2, 23, 27, 28, 34, 40, 46], "2": [2, 4, 9, 11, 14, 19, 20, 21, 23, 26, 28, 29, 30, 31, 32, 33, 36, 38, 39, 40, 41, 42, 43, 45, 46, 47], "routin": [2, 31, 33, 37, 41, 42, 47], "convert": [2, 7, 12, 15, 17], "new": [2, 11, 21, 22, 23, 24, 26, 28, 31, 34, 42, 45, 46], "class": [2, 7, 17, 26, 29, 39, 41, 42, 46, 47], "ellpackmatrix": 2, "deriv": [2, 6, 8, 28, 30, 45], "linearoper": [2, 7, 17, 42], "which": [2, 5, 6, 8, 10, 11, 12, 13, 16, 18, 21, 22, 23, 26, 27, 28, 29, 30, 33, 34, 36, 37, 39, 40, 41, 42, 44, 45, 46, 47], "its": [2, 5, 6, 16, 21, 22, 23, 24, 26, 29, 34, 44, 47], "constructor": 2, "follow": [2, 5, 6, 7, 9, 11, 13, 15, 16, 17, 20, 21, 23, 25, 26, 27, 28, 29, 30, 31, 33, 34, 36, 37, 38, 39, 41, 42, 43, 44, 46, 47], "prototyp": [2, 24], "my_sparse_mat": 2, "csr_mat": [2, 12], "shall": 2, "either": [2, 11, 29, 33, 37, 39, 44], "prang": [2, 10, 11, 34, 38, 41, 47], "multithread": [2, 23, 32, 41], "overal": [2, 5, 6, 7, 8, 16, 17, 18, 29, 38, 42, 43], "beyond": [2, 3, 9], "80": [2, 3], "would": [2, 3, 6, 8, 10, 11, 16, 18, 19, 24, 29, 34, 39, 43, 45, 46], "assum": [2, 26, 30, 33, 34, 38, 39, 42], "solut": [2, 3, 6, 7, 8, 12, 17, 18, 22, 23, 26, 27, 28, 29, 30, 36, 37, 40, 42, 43, 44, 45], "correct": [2, 3, 5, 6, 7, 9, 13, 16, 27, 30, 41], "distanc": 2, "discretise_poisson": [2, 41, 42, 43], "lectur": [2, 8, 11, 12, 14, 15, 18, 20], "poisson": [2, 12, 30, 41, 42], "think": [2, 6, 7, 8, 9, 13, 17, 18, 42], "reason": [2, 5, 6, 16, 26, 30, 33, 34, 37], "own": [2, 5, 7, 8, 9, 11, 16, 17, 18, 21, 22, 23, 34, 44], "implemement": 2, "faster": [2, 5, 7, 9, 10, 11, 17, 31, 32, 38, 39], "2d": 2, "per": [2, 21, 23, 31, 32, 38, 39, 41, 46], "situat": [2, 39, 41], "addit": [2, 8, 18, 23, 28, 32, 33, 34, 37, 39, 45], "complexitii": 2, "usual": [2, 29, 34, 40, 42, 46], "final": [2, 5, 16, 25, 28, 36, 46], "shop": 2, "market": 2, "find": [2, 8, 9, 12, 14, 19, 27, 28, 29, 30, 34, 39, 42], "better": [2, 9, 21, 23, 29, 40, 42], "off": [2, 8, 9, 23, 26, 28, 39], "comparis": 2, "chosen": [2, 6, 8, 18, 30, 34, 41, 42, 45], "plate": 3, "t": [3, 5, 6, 7, 9, 11, 14, 16, 20, 23, 26, 27, 28, 29, 30, 31, 38, 39, 40, 45, 47], "heat": 3, "temperatur": [3, 6], "evolv": [3, 23], "accord": 3, "u_t": [3, 45], "reach": [3, 29], "center": [3, 23, 27, 29, 40, 45], "finit": [3, 6, 12, 37, 43, 45], "differ": [3, 5, 6, 8, 9, 11, 12, 16, 18, 21, 22, 23, 27, 28, 29, 30, 31, 33, 34, 35, 36, 39, 42, 43, 44, 45, 46], "explicit": [3, 30, 42], "implicit": 3, "number": [3, 4, 7, 8, 9, 11, 12, 13, 14, 17, 18, 19, 22, 23, 24, 25, 26, 27, 28, 29, 31, 33, 34, 36, 39, 40, 41, 42, 43, 44, 45, 47], "mani": [3, 8, 9, 12, 18, 22, 23, 24, 26, 28, 29, 30, 33, 34, 37, 38, 39, 40, 41, 42, 43, 44], "digit": [3, 26, 28, 46], "achiev": [3, 5, 8, 16, 18, 22, 23, 28, 34, 42, 46], "12": [3, 4, 8, 26, 36, 39, 41, 43, 46, 47], "424011387033": 3, "gpu": [3, 21, 31, 34, 37, 44, 46, 47], "necessari": [3, 7, 17, 33, 35, 36, 42, 43], "snippet": [4, 5, 7, 9, 16, 17], "give": [4, 5, 6, 8, 9, 10, 16, 19, 22, 23, 26, 28, 30, 31, 35, 39, 40, 41, 43, 44, 45, 46], "mathrm": [4, 6, 7, 8, 11, 13, 14, 15, 17, 18, 20], "mathbf": [4, 6, 8, 9, 11, 13, 14, 18, 20], "3": [4, 5, 6, 9, 10, 19, 20, 26, 29, 31, 33, 36, 38, 39, 41, 42, 43, 44, 45, 46, 47], "interior": [4, 8, 43], "length": [4, 26, 34], "four": [4, 8, 11, 33, 34], "like": [4, 7, 8, 9, 17, 19, 23, 27, 29, 30, 39], "begin": [4, 6, 7, 8, 9, 14, 15, 17, 18, 20, 27, 28, 29, 30, 33, 40], "cc": [4, 8], "nine": 4, "9": [4, 6, 8, 9, 11, 26, 28, 31, 33, 39, 41, 42, 46, 47], "ccc": 4, "6": [4, 11, 18, 26, 28, 31, 33, 39, 40, 41, 46, 47], "7": [4, 11, 26, 31, 32, 33, 38, 39, 41, 46, 47], "8": [4, 6, 8, 11, 13, 23, 26, 27, 28, 29, 30, 31, 33, 34, 36, 39, 41, 42, 43, 45, 46, 47], "cccc": 4, "13": [4, 26, 39], "14": [4, 26, 42, 47], "15": [4, 5, 8, 16, 26, 28, 30, 45], "11": [4, 26, 28, 39, 41, 45], "a_2": [4, 15, 20], "11111111111111116": 4, "b_2": 4, "2699980311833446": 4, "a_3": [4, 15, 20], "4320987654320987": 4, "6419753086419753": 4, "4104938271604938": 4, "b_3": 4, "7251323007221917": 4, "15334285313223067": 4, "34843455260733003": 4, "0558651156722307": 4, "a_4": 4, "972222222222222": 4, "5069444444444444": 4, "3767361111111111": 4, "b_4": 4, "4904895819530766": 4, "055600747809247": 4, "07847904705126368": 4, "8311407883427149": 4, "8765020708205272": 4, "6433980946818605": 4, "7466392365712349": 4, "538021498324083": 4, "a_5": 4, "222222222222222": 4, "4444444444444444": 4, "3611111111111111": 4, "b_5": 4, "2673039440507343": 4, "9698054647507671": 4, "0133080988552785": 4, "07206335813040798": 4, "9472174493756345": 4, "9416429716282946": 4, "6400834406610956": 4, "7322882523543968": 4, "8159823324771336": 4, "9192523853093425": 4, "48342793699793585": 4, "19471066818706848": 4, "20": [5, 6, 9, 16, 23, 26, 30, 38, 42, 46, 47], "cours": [5, 6, 7, 8, 9, 16, 17, 18, 23, 42], "deadlin": [5, 6, 7, 8, 16, 17, 18], "5pm": [5, 6, 7, 8, 16, 17, 18], "thursdai": [5, 6, 7, 8], "octob": 5, "2022": [5, 6, 7, 8], "coursework": [5, 6, 7, 8, 16, 17, 18], "link": [5, 6, 7, 8, 15, 16, 17, 18, 33], "moodl": [5, 6, 7, 8, 16, 17, 18], "file": [5, 6, 7, 8, 16, 17, 18, 37, 39], "contain": [5, 6, 7, 8, 16, 17, 18, 19, 21, 23, 26, 28, 33, 34, 41, 46], "answer": [5, 6, 7, 8, 9, 16, 17, 18], "ani": [5, 6, 7, 8, 10, 11, 15, 16, 17, 18, 23, 33, 34, 37, 40, 42, 45, 46], "text": [5, 6, 7, 8, 14, 16, 17, 18, 20, 26, 27, 29, 30, 40, 45], "question": [5, 6, 7, 8, 16, 17, 18, 34], "assess": [5, 6, 7, 8, 16, 17, 18], "easiest": [5, 6, 7, 8, 16, 17, 18], "download": [5, 6, 7, 8, 16, 17, 18], "via": [5, 6, 7, 8, 16, 17, 18, 22, 29, 34, 42], "latex": [5, 6, 7, 8, 16, 17, 18], "googl": [5, 6, 7, 8, 11, 16, 17, 18, 34], "colab": [5, 6, 7, 8, 11, 16, 17, 18], "requir": [5, 6, 7, 8, 16, 17, 18, 22, 27, 28, 30, 31, 33, 34, 36, 38, 40, 41, 42, 43, 45], "carri": [5, 6, 7, 8, 9, 16, 17, 18], "shown": [5, 6, 7, 8, 16, 17, 18, 21, 22, 45, 46], "bold": [5, 6, 7, 8, 16, 17, 18], "ab": [5, 16, 26, 28, 38, 42], "line": [5, 7, 9, 10, 16, 19, 23, 27, 29, 31, 36, 39, 43, 45, 46], "matrix1": [5, 16], "matrix2": [5, 16], "symbol": [5, 16], "denot": [5, 16, 29, 30, 36, 40, 46], "get": [5, 6, 10, 11, 13, 16, 19, 20, 25, 26, 28, 31, 36, 41], "s": [5, 7, 8, 9, 10, 11, 15, 16, 17, 19, 22, 23, 26, 28, 30, 32, 36, 38, 39, 41, 45], "slow_matrix_product": [5, 16], "mat1": [5, 16], "mat2": [5, 16], "multipli": [5, 10, 16, 19, 20, 21, 26, 28, 29, 30, 46], "append": [5, 9, 10, 16, 26, 27, 28, 29, 30, 34, 42, 45], "transpos": [5, 16, 30], "rand": [5, 9, 10, 14, 16, 23, 26, 27, 28, 32, 33, 38, 41, 42], "isn": [5, 16], "good": [5, 8, 9, 14, 16, 23, 26, 28, 29, 34, 40, 42, 44, 46], "faster_matrix_product": [5, 16], "than": [5, 6, 7, 9, 11, 13, 16, 17, 23, 28, 29, 30, 31, 33, 38, 39, 41, 44], "eg": [5, 9, 11, 13, 16], "complet": [5, 6, 16, 23, 26, 28, 31, 34, 37, 42, 45], "calcul": [5, 11, 16, 23, 37, 39, 43], "full": [5, 6, 16, 25, 28, 31, 36], "befor": [5, 9, 10, 11, 16, 19, 24, 35, 37, 40, 43], "script": [5, 16], "statement": [5, 7, 9, 16, 21, 31, 42], "box": [5, 16, 38], "brief": [5, 16, 23, 37], "sentenc": [5, 6, 7, 8, 16, 17, 18], "why": [5, 6, 7, 8, 9, 16, 17, 18, 30, 34], "least": [5, 8, 9, 16, 18, 27, 33, 39, 45], "relat": [5, 13, 16, 26, 27, 28], "taken": [5, 6, 7, 8, 9, 11, 16, 17, 18, 29, 33, 42, 44], "both": [5, 6, 7, 9, 12, 16, 22, 29, 30, 33, 37, 39, 41, 42, 46], "abl": [5, 10, 16, 22, 23, 34, 42], "around": [5, 9, 16, 23, 26, 28, 38, 40, 45, 46], "re": [5, 6, 8, 9, 11, 16, 27, 30, 38, 42, 45], "older": [5, 16], "slower": [5, 9, 16, 27, 44], "decreas": [5, 8, 16, 40], "slightli": [5, 16, 23, 29, 42, 46], "minimum": [5, 16, 29], "set": [5, 6, 8, 9, 16, 18, 19, 23, 28, 29, 31, 33, 34, 36, 37, 39, 45, 46, 47], "inform": [5, 6, 9, 16, 19, 22, 28, 30, 36, 39, 41, 45], "second": [5, 8, 16, 19, 21, 22, 29, 45, 46], "acheiv": [5, 9, 16], "similar": [5, 6, 11, 16, 23, 34, 36, 47], "made": [5, 8, 16, 18, 22], "fast": [5, 16, 21, 23, 24, 26, 29, 32, 33, 34, 37, 39, 46, 47], "further": [5, 16, 27], "adjust": [5, 13, 16], "layout": [5, 16, 31, 36], "asfortanarrai": 5, "fortran": [5, 22, 23, 33, 47], "style": [5, 9, 33], "fortran_a": 5, "input": [5, 6, 7, 8, 15, 18, 19, 21, 39, 42], "combin": [5, 8, 22, 26, 28, 29, 42, 47], "ie": [5, 6, 8, 18], "focus": [5, 16, 22], "fact": [5, 16, 23, 26, 42], "close": [5, 10, 16, 22, 23, 26, 29, 30, 33, 44, 47], "appear": [5, 10, 11], "fastest": [5, 10, 16, 46], "thing": [5, 8, 10, 18, 22], "larg": [5, 6, 8, 9, 14, 18, 22, 23, 25, 31, 32, 33, 34, 37, 41, 42, 44, 45, 46], "littl": [5, 23, 26, 32, 46], "might": [5, 11, 22, 30, 39], "conclud": [5, 34], "advanc": [5, 23, 33], "novemb": 6, "harmon": [6, 11], "align": [6, 8, 14, 18, 20, 27, 28, 29, 30, 33, 40, 47], "d": [6, 14, 28, 31, 39, 40], "2u": [6, 8, 40], "wavenumb": [6, 11, 38], "29": [6, 26], "pi": [6, 11, 30, 38, 40, 46], "evenli": 6, "space": [6, 12, 30, 34, 42, 45, 46], "x_0": [6, 8, 26, 27, 28], "x_1": [6, 8, 30], "x_2": [6, 30], "x_n": [6, 30], "u_i": [6, 40, 45], "2u_": [6, 45], "2u_i": [6, 40, 45], "bit": [6, 8, 26, 28, 29, 33, 34, 36, 39, 40, 43], "2k": [6, 8], "unknown": [6, 8, 29, 35, 42, 43, 44, 47], "entri": [6, 7, 8, 9, 17, 18, 20, 27, 28, 29, 30, 33, 35, 36, 41, 42, 43, 47], "_i": [6, 8], "case": [6, 7, 8, 17, 18, 26, 27, 29, 37, 39, 40, 41, 45], "otherwis": [6, 8, 18, 20, 42, 45], "_": [6, 8, 11, 14, 27, 28, 29, 41, 42], "appropri": [6, 7, 8, 11, 18], "format": [6, 7, 8, 15, 17, 18, 26, 28, 29, 35, 36, 42, 43, 44, 45, 47], "100": [6, 9, 26, 29, 32, 33, 38, 41, 42, 43], "three": [6, 8, 18, 21, 22, 23, 28, 33, 34, 36, 38, 40, 41, 42, 43, 44, 45, 47], "briefli": [6, 7, 8, 17, 18, 24, 28], "thei": [6, 7, 17, 19, 29, 30, 34, 35, 42, 43, 45, 46], "closest": 6, "wa": [6, 11, 22, 23, 28, 29, 30, 33, 34, 42, 46, 47], "sin": [6, 8, 30, 32, 40], "kx": 6, "differenti": [6, 8, 22, 25, 29, 40, 44, 45], "twice": [6, 39, 45], "substitut": [6, 42], "found": [6, 8, 15, 22, 31, 33, 39, 41], "max_i": [6, 23, 47], "choic": [6, 7, 8, 15, 17, 18, 23, 24, 30, 34, 40, 44], "wrote": [6, 10, 12, 15, 20], "ax": [6, 9, 15, 18, 26, 27, 28, 29, 30, 36, 38, 42, 43, 45], "log": [6, 9, 15, 28], "scale": [6, 9, 22, 24, 36, 37, 38, 46], "pick": [6, 8, 11, 13, 18, 28, 38], "less": [6, 12, 23], "predict": [6, 13, 22], "long": [6, 9, 14, 21, 33, 34, 42], "turn": [6, 8, 25, 26], "higher": [6, 11, 30, 33, 41, 46], "valid": [6, 8, 21], "section": [6, 8, 11, 12, 13, 14, 18, 24, 34], "could": [6, 7, 8, 11, 18, 19, 34, 39, 44], "my": [6, 9, 31, 38, 46], "feasibl": [6, 24], "amount": [6, 7, 12, 17, 23, 24, 31, 37], "rod": 6, "start": [6, 9, 10, 11, 19, 23, 24, 25, 26, 27, 28, 29, 30, 34, 36, 39, 40, 41, 44, 45, 47], "addition": [6, 8, 18, 29], "t_0": 6, "t_1": 6, "t_2": 6, "2h": [6, 8, 30], "t_3": 6, "3h": 6, "t_j": 6, "did": [6, 9, 20, 22, 29, 42], "1000h": 6, "lead": [6, 8, 16, 24, 30, 33, 37, 42], "later": [6, 8, 21, 26, 29, 30, 33, 34, 39, 43, 46], "displaystyl": [6, 8], "sensibl": [6, 12], "care": [6, 7, 17, 26, 27, 39, 40], "estim": [6, 8, 18], "midpoint": [6, 8], "exce": 6, "diagram": [6, 8], "aid": 6, "descript": [6, 37], "essenti": [6, 23, 29, 31, 44, 47], "30": [7, 8, 9, 17, 18, 26], "decemb": [7, 8], "allow": [7, 8, 10, 11, 18, 21, 22, 23, 24, 25, 26, 31, 33, 34, 36, 37, 39, 40, 44, 46], "object": [7, 9, 19, 23, 36], "solver": [7, 9, 17, 25, 30, 36, 37, 40], "subclass": 7, "csrmatrix": [7, 17], "__init__": [7, 17, 19, 39], "self": [7, 17, 19, 34, 47], "coo_matrix": [7, 12, 17, 41, 42, 43], "ll": [7, 9, 11, 17, 26], "put": [7, 9, 17, 27], "__add__": [7, 19], "add": [7, 9, 10, 12, 19, 21, 30, 47], "pass": [7, 8, 13, 17, 29], "_matvec": [7, 17], "coo": [7, 12], "initialis": [7, 34, 36], "extract": 7, "variou": [7, 8, 18, 22, 34, 37], "overload": 7, "togeth": [7, 21, 22, 23, 27, 29, 33, 37], "avoid": [7, 9, 33, 42], "dens": [7, 15, 22, 43, 44], "sum": [7, 8, 11, 23, 29, 34, 41], "indic": [7, 12, 33, 34, 36, 39, 41, 43], "indptr": [7, 12, 41, 43], "tell": [7, 10, 34, 36, 39, 41, 47], "test": [7, 26, 27, 28, 32, 38, 41], "These": [7, 8, 9, 10, 11, 12, 13, 14, 15, 18, 28, 29, 30, 34, 39, 42, 44, 46], "collect": [7, 17, 26, 33, 37, 43, 44], "gmre": [7, 17, 26, 29, 36, 42], "cg": [7, 17, 28, 29], "obtain": [7, 10, 17, 26, 28, 29, 30, 40, 42, 45, 46], "nearli": [7, 17], "exactli": [7, 17, 26, 28, 30], "2n": 7, "structur": [7, 15, 17, 23, 24, 26, 27, 28, 34, 35, 36, 37, 43, 44], "top": [7, 11, 17, 23, 37, 42], "diagon": [7, 8, 17, 26, 28, 29, 36, 42, 43], "bottom": [7, 17], "special": [7, 19, 26, 28, 34, 39], "non": [7, 10, 17, 22, 23, 29, 30], "pmatrix": [7, 8, 9, 15, 17, 18, 20], "cdot": [7, 9, 15, 17, 46], "hspace": [7, 8, 17], "3mm": [7, 8, 17, 18], "vdot": [7, 15, 17, 20, 30], "ddot": [7, 15, 17, 30], "tild": 7, "suppos": 7, "w": [7, 26, 28], "tall": 7, "ensur": [7, 11, 17, 28, 42], "complex": [7, 8, 13, 17, 18, 22, 23, 26, 31, 32, 33, 34, 37, 38, 42, 44, 47], "helmholtz": 8, "wave": 8, "4y": 8, "3x": [8, 9], "split": [8, 29, 34, 39], "mesh": [8, 44], "along": [8, 28, 38, 41, 45], "watch": 8, "degre": [8, 39, 42], "place": [8, 21, 28], "variabl": [8, 22, 26, 34], "xy": [8, 38], "insid": [8, 11, 19, 31, 34, 37, 39], "integr": [8, 22, 23], "detail": [8, 19, 21, 28, 29, 31, 34, 36, 39, 43, 44, 45, 46], "curiou": 8, "chapter": [8, 21, 34, 37], "clae": 8, "johnson": 8, "introduct": [8, 19, 21, 22, 25, 28, 33], "p": [8, 29, 42, 46], "_0": [8, 11], "_1": [8, 29], "a_": [8, 15, 18, 29, 30, 42], "24": [8, 26, 46, 47], "4h": [8, 30], "_j": [8, 11], "horizont": 8, "vertic": 8, "adjac": [8, 42], "36": [8, 47], "b_j": [8, 18], "b_": 8, "c_j": [8, 38], "5mm": 8, "11111111": 8, "definit": [8, 14, 24, 26, 29], "true": [8, 11, 23, 27, 29, 31, 34, 36, 38, 39, 40, 41, 42, 47], "onc": [8, 9, 30, 34, 42], "43209877": 8, "64197531": 8, "41049383": 8, "72513230": 8, "15334285": 8, "34843455": 8, "05586511": 8, "wish": [8, 11, 14, 34], "them": [8, 23, 27, 28, 30, 31, 34, 35, 37, 39, 41, 42, 43, 44], "u_h": 8, "coordin": [8, 23], "3d": [8, 43], "pde": [8, 22, 25, 29, 30, 36, 37, 40, 42, 44], "earlier": [8, 30], "small": [8, 9, 11, 14, 18, 22, 27, 28, 29, 31, 33, 36, 37, 38, 40, 41, 46], "medium": [8, 18, 36, 44], "factor": [8, 15, 18, 23, 26, 28, 29, 30, 32, 34, 38, 40, 42], "deem": [8, 18], "residu": [8, 13, 14, 18, 26, 27, 28, 29, 30, 33, 36, 42], "five": [8, 18], "direct": [8, 18, 23, 24, 25, 26, 34, 36, 37, 38, 45], "option": [8, 9, 12, 18, 27, 33], "petsc": [8, 18, 30, 37, 40, 44], "repeat": [8, 12, 18, 30, 41], "comparison": [8, 18, 29, 38], "larger": [8, 9, 18, 22, 28, 33, 34, 41], "been": [8, 9, 23, 24, 29, 39, 42, 46], "sum_": [8, 29, 30, 38], "th": [8, 26], "mean": [8, 9, 22, 23, 26, 28, 30, 32, 33, 35, 38, 39, 40, 41, 43], "corner": 8, "mathcal": [8, 18, 26, 30, 40, 42, 46], "Is": [8, 10, 18], "alpha": [8, 13, 28, 29, 40], "trivial": [8, 18, 44], "difficult": [8, 18, 23, 29], "speed": [8, 10, 18, 23, 34, 39, 45, 47], "studi": [8, 18], "discuss": [8, 18, 21, 22, 24, 26, 29, 33, 34, 43, 44, 45, 46], "work": [9, 10, 11, 12, 13, 14, 15, 20, 23, 28, 29, 30, 31, 33, 34, 38, 39, 41, 45], "tfrac32": 9, "aa": [9, 29], "perpendicular": 9, "cross": [9, 37], "inv": 9, "last": [9, 10, 22, 26, 33, 36, 41, 42], "behav": [9, 26, 29, 30, 39], "slow_matvec": [9, 10], "templat": [9, 14, 15, 44], "faster_matvec": [9, 10], "hpc": [9, 22, 23, 24, 34], "manual": [9, 31], "probabl": [9, 22], "integ": [9, 10, 21, 33, 34, 39, 42], "float": [9, 11, 21, 23, 31, 33, 39], "fail": 9, "even": [9, 21, 23, 24, 28, 29, 30, 33, 34, 41, 42], "due": [9, 11, 15, 22, 23, 27, 31, 39, 45], "issu": [9, 26, 27, 39, 44], "isclos": 9, "allclos": [9, 23], "commonli": 9, "pytest": 9, "autom": 9, "fast_matvec": 9, "total": [9, 31, 41, 42, 43], "content": 9, "draw": [9, 45], "freedom": [9, 30], "whatev": 9, "bear": 9, "mind": 9, "peopl": [9, 22], "colourblind": 9, "It": [9, 20, 21, 22, 23, 24, 26, 28, 29, 30, 34, 36, 37, 39, 41, 42, 43, 44, 46, 47], "help": [9, 23, 34, 36], "marker": 9, "well": [9, 13, 23, 26, 27, 29, 31, 36, 42, 44, 45, 46], "colour": [9, 11], "curv": [9, 27, 36], "pylab": [9, 11, 47], "plt": [9, 11, 13, 26, 27, 28, 29, 30, 33, 36, 38, 42, 43, 45], "linspac": [9, 26, 30, 33, 39, 43, 45], "y0": 9, "y1": 9, "y2": 9, "ro": 9, "xlabel": [9, 33], "ylabel": [9, 33], "legend": [9, 27, 29], "inlin": [9, 23, 26, 27, 28, 29, 30, 33, 36, 38, 42, 43, 45, 47], "cell": 9, "displai": [9, 23, 46], "xscale": 9, "person": [9, 46], "clearer": 9, "equal": [9, 10, 30, 46], "tick": [9, 43], "axi": [9, 38], "pen": 9, "ve": [9, 10, 19], "don": [9, 11, 28, 39], "resolv": 9, "tweak": [9, 39], "pretti": [9, 26, 39], "interfac": [9, 22, 23, 24, 33, 36, 37, 44], "load": [9, 22, 23, 38, 47], "npy": 9, "my_result": 9, "week": [10, 11], "incorrect": 10, "becaus": [10, 39], "fix": [10, 23, 28, 30, 39, 47], "replac": [10, 26, 40, 42], "acceler": [10, 21, 23, 33, 37, 39, 44, 47], "anoth": [10, 19, 22, 28, 31, 34, 42], "shw": 10, "mode": [10, 30], "compat": [10, 23, 36], "wider": 10, "parellel": 10, "big": [10, 28, 46], "worth": 10, "anyth": [10, 26, 31], "els": [10, 26, 36, 39, 41, 42, 43, 45], "avail": [11, 21, 24, 28, 29, 30, 33, 36, 37, 42, 44, 46], "detect": [11, 24, 31], "suitabl": [11, 17, 22, 24, 28, 29, 41, 42, 44], "runtim": [11, 24], "type": [11, 21, 22, 23, 24, 28, 29, 30, 31, 33, 34, 35, 37, 39, 43, 46, 47], "dure": [11, 15, 20, 23, 42, 47], "particl": 11, "radial": [11, 38], "applic": [11, 22, 23, 24, 27, 28, 29, 30, 34, 35, 37, 43, 44, 45, 46], "sum_jc_jg": 11, "c_0": 11, "c_": 11, "acoust": [11, 38], "green": [11, 22, 38], "magnitud": [11, 26, 28, 40, 42], "slice": 11, "plane": [11, 23, 38], "z": [11, 23, 31, 38, 47], "leqslant": [11, 18], "frac32": 11, "math": [11, 19, 38, 39], "co": [11, 30], "img_siz": 11, "250": 11, "empti": [11, 26, 28, 31, 33, 34, 41, 42, 43], "complex128": [11, 33], "xmin": 11, "xmax": 11, "ymin": 11, "ymax": 11, "imshow": [11, 23, 26, 28, 38, 45, 47], "pixel": 11, "imag": [11, 13, 22, 23, 27, 42, 47], "glanc": 11, "backward": [11, 42], "v": [11, 26, 28, 30, 47], "zip": [11, 41], "extent": [11, 38, 45, 47], "adapt": [11, 22, 23, 39], "area": [11, 22, 29, 37, 39], "leqslant1": 11, "rbf_evaluation_cuda": [11, 38], "inspir": 11, "featur": [11, 23, 24, 27, 34, 35, 36, 37, 43, 47], "didn": [11, 20], "absolut": 11, "posit": [11, 12, 14, 29, 31, 41], "entir": [11, 39], "dimension": [11, 21, 22, 23, 26, 27, 31, 37, 41, 42, 44], "syncthread": [11, 21, 38], "synchronis": [11, 21], "readi": 11, "threadidx": [11, 21, 23, 31, 38], "rather": 11, "mgrid": [11, 38], "ravel": [11, 38], "done": [11, 21, 23, 34, 47], "visual": [11, 37, 38, 42, 43], "njit": [11, 34, 38, 47], "rbf_evalu": [11, 38], "to_devic": [11, 23, 31], "low": [11, 22, 23, 27, 30, 34, 37, 44, 46, 47], "vari": [11, 22], "heavi": 12, "nbyte": [12, 23, 43], "col": [12, 41], "tak": 12, "tocsr": [12, 41, 42, 43], "csc": [12, 41], "vs": [12, 13, 14], "support": [12, 23, 24, 31, 34, 39, 40, 41, 47], "lil": 12, "dia": 12, "dok": 12, "bsr": 12, "except": [12, 31, 39], "proport": 12, "40": [12, 29, 30, 32, 38, 47], "ident": [13, 29, 41, 42], "identity_matrix": 13, "ey": [13, 27, 42], "a_matrix": 13, "sqrt": [13, 27, 28, 34], "control": [13, 21, 34, 37], "stop": 13, "criteria": 13, "tol": [13, 28], "atol": 13, "1e": [13, 26, 28], "lowest": [13, 21], "constant": [13, 28, 39, 42, 47], "chose": 13, "neg": [13, 28, 40], "Or": 13, "pyplot": [13, 23, 26, 27, 28, 29, 30, 33, 36, 38, 42, 43, 45], "eigval": [13, 27], "rx": [13, 27], "markers": [13, 27, 29, 43], "badli": [13, 29], "were": [13, 23, 30, 40, 47], "500": [14, 27, 28, 30, 45], "symmetr": [14, 29, 36], "randomst": [14, 26, 27, 28, 38, 41, 42], "q": [14, 28], "qr": [14, 28], "diag": [14, 26, 28, 29, 30, 42], "precondition": [14, 29, 30, 36, 42, 44], "_k": 14, "alpha_k": [14, 28, 29], "operatornam": 14, "tr": [14, 29], "_m": [14, 26], "fridai": [15, 16, 17, 18], "factoris": [15, 42], "todai": [15, 23, 24], "decomposit": [15, 29, 44], "a_0": [15, 20], "a_1": [15, 20], "decim": 15, "l": [15, 39, 42], "permut": [15, 42], "septemb": [16, 17, 18], "2023": [16, 17, 18], "pair": [16, 20, 39], "ij": [18, 29, 42], "33333333": 18, "25": [18, 30, 47], "guid": [19, 27, 31, 33], "instruct": [19, 34, 39], "instanc": 19, "fraction": [19, 43], "gcd": 19, "denomin": 19, "print_numer": 19, "__str__": 19, "string": [19, 47], "represent": [19, 30, 33, 42], "str": [19, 27], "over": [19, 24, 26, 33, 34, 40, 42, 45, 46], "isinst": 19, "new_numer": 19, "new_denomin": 19, "common_factor": 19, "__mul__": 19, "common": [19, 20, 23, 37, 46, 47], "underscor": 19, "__": 19, "half": [19, 40], "ad": [19, 28, 47], "third": [19, 33, 41], "those": [19, 22, 30, 36, 41, 43], "behaviour": [19, 30], "document": [19, 21, 23, 36, 37], "http": [19, 21, 22, 23, 31, 33, 41], "doc": [19, 31, 41], "org": [19, 31, 33, 41], "datamodel": 19, "html": [19, 31, 41], "specialnam": 19, "sumultan": 20, "4a_0": 20, "3a_1": 20, "2a_0": 20, "complic": [20, 30], "4u_": [20, 43], "got": 20, "dive": [21, 22, 23, 24, 25, 34, 35, 37, 43], "nvidia": [21, 44, 46], "understand": [21, 23, 30, 34, 40, 45, 46], "basic": [21, 25, 26, 33, 36, 42], "organis": 21, "massiv": [21, 22, 26, 44], "huge": [21, 22, 23, 24, 28, 31, 36, 37, 40], "concurr": [21, 31], "moreov": [21, 26, 34, 37, 43, 44, 47], "group": [21, 23], "sm": 21, "respons": 21, "schedul": [21, 31, 34], "a100": [21, 23], "architectur": [21, 22, 23, 25, 33, 46, 47], "com": [21, 22, 23], "blog": [21, 23], "amper": 21, "depth": 21, "consist": [21, 23, 28, 34, 35, 41, 42, 43, 44], "tensor": [21, 33], "mix": [21, 22, 37], "deep": [21, 24], "onto": [21, 23, 30, 31, 33, 34, 40], "logic": 21, "entiti": 21, "divid": 21, "32": [21, 23, 31, 39, 47], "path": 21, "implic": 21, "branch": [21, 23, 39], "roughli": [21, 27, 46], "regist": [21, 34, 39], "ideal": [21, 24, 26, 36], "underutilis": 21, "48": [21, 47], "fulli": [21, 45], "utilis": 21, "easier": [21, 29], "mention": [21, 37], "conveni": [21, 36, 47], "know": [21, 23, 25, 28, 33, 36, 47], "largest": [21, 26, 28, 40, 42], "chunk": [21, 33, 34, 38], "typic": [21, 23, 25, 34, 35, 36, 39, 40, 41, 43, 46], "gigabyt": 21, "slow": [21, 23, 28, 30, 33, 37, 47], "individu": [21, 22, 23, 34], "lifetim": 21, "process": [21, 23, 33, 36, 37, 39, 40, 46], "workload": 21, "back": [21, 23, 30, 31], "forth": 21, "privat": 21, "intermedi": 21, "offici": [21, 23], "principl": [21, 42, 46], "wise": 21, "float32": [21, 23, 31, 33, 38, 39], "usag": [21, 23], "tpbxtpb": 21, "tpb": 21, "fast_matmul": 21, "sa": 21, "sb": 21, "tx": [21, 28, 31, 38], "ty": [21, 31, 38], "bpg": 21, "griddim": [21, 23], "quit": [21, 22, 24, 28, 29, 42], "outsid": 21, "tmp": [21, 26, 28], "until": [21, 26, 30, 34], "list": [22, 31, 33, 34, 41, 46], "interest": [22, 39, 46], "cover": [22, 37], "modul": [22, 23, 24, 25, 34, 37, 39, 46], "exhaust": [22, 28], "am": [22, 29], "leav": [22, 28], "pointer": [22, 41], "who": 22, "research": [22, 29, 44], "decompos": 22, "subproblem": 22, "independ": [22, 23, 26, 34, 40], "exchang": 22, "weak": 22, "coupl": [22, 46], "precondit": [22, 30, 36, 42, 44], "balanc": [22, 34], "network": [22, 34], "commun": [22, 23, 24, 37], "seem": [22, 36, 39], "counterintuit": 22, "inde": [22, 26, 33, 34, 37, 41, 46], "singular": [22, 26, 33, 42], "eigenvalu": [22, 26, 27, 28, 29, 33, 40], "probabilist": 22, "sens": [22, 23, 29, 34, 46], "worri": 22, "extrem": [22, 23, 24, 28, 33], "promin": 22, "recent": [22, 23, 24, 33, 34, 37, 46], "overview": [22, 23, 25, 31, 37, 41, 44], "articl": 22, "foundat": 22, "martinsson": 22, "tropp": 22, "technolog": 22, "invers": 22, "relev": [22, 23, 36], "lnear": 22, "compress": 22, "interact": [22, 38], "tremend": [22, 30], "success": [22, 29, 30, 33, 37], "certain": [22, 23, 27, 29, 34, 42, 44, 46], "aris": [22, 28, 29, 30, 36, 44], "oscillatori": [22, 30], "stationari": [22, 28, 40], "beauti": [22, 27, 28, 34, 45], "greengard": 22, "gueyffier": 22, "rohklin": 22, "seen": [22, 26, 29, 44], "easili": [22, 23, 27, 29, 30, 33, 34, 41, 42, 47], "timestep": 22, "limit": [22, 34, 39, 45], "opportun": 22, "50": [22, 27, 28, 29, 30, 33, 38, 42], "martin": 22, "gander": 22, "easi": [22, 23, 31, 32, 33, 34, 37], "public": [22, 23], "sound": [22, 31], "straight": [22, 23, 42], "fiendishli": 22, "configur": 22, "tool": [22, 23, 30, 34, 42, 46], "docker": 22, "establish": [22, 26, 30, 33], "pack": [22, 39], "softwar": [22, 23, 33, 36, 41, 46], "great": [22, 23, 36, 46], "reproducibl": 22, "lorenabarba": 22, "tag": 22, "undisput": 22, "ucl": 22, "teach": 22, "aspect": 22, "tradit": 22, "merg": 22, "model": [22, 24, 31, 39, 40, 44, 46], "statist": 22, "significantli": [22, 34, 41, 42, 43, 44], "power": [22, 23, 26, 36, 37, 42, 46], "simul": [22, 24, 30, 40, 46], "emerg": 22, "mathemat": [22, 23, 26, 39], "natur": [22, 29, 33, 39, 41, 45], "project": [22, 23, 24, 26, 43, 47], "novel": 22, "concept": 22, "ago": [22, 46], "pure": [22, 39], "petascal": [22, 36], "breakthrough": 22, "strongli": [22, 23, 42], "environ": [22, 24, 37, 46, 47], "graphic": [23, 46], "come": [23, 24, 45], "scene": 23, "construct": [23, 29, 34, 47], "flat": 23, "surfac": 23, "triangl": 23, "y_i": 23, "z_i": 23, "imagin": [23, 33], "camera": 23, "room": 23, "view": [23, 34, 37], "textur": [23, 31], "light": 23, "visibl": [23, 28], "aren": 23, "secret": 23, "ineffici": [23, 33, 34, 36, 37], "earli": 23, "thousand": [23, 35, 37, 43], "recogn": [23, 33], "particular": [23, 24, 25, 34, 37, 39, 42], "benefit": [23, 34, 39], "inher": 23, "won": [23, 26], "bound": [23, 28, 30, 34, 45], "suit": [23, 36], "transfer": [23, 30, 31, 33, 36], "bu": [23, 31], "interleav": 23, "suffici": [23, 29, 36, 40, 41, 44], "ram": [23, 43], "consider": [23, 37, 45], "unstructur": [23, 30], "involv": [23, 28], "challeng": 23, "flow": 23, "harder": 23, "match": 23, "prefer": [23, 24, 37], "took": [23, 29], "promis": 23, "incred": [23, 37], "fifti": 23, "hundr": [23, 34, 35, 43], "cheap": [23, 46], "game": 23, "realiti": [23, 34], "Such": 23, "specif": [23, 37, 44], "mont": 23, "carlo": 23, "synthet": 23, "author": 23, "tune": [23, 44], "field": 23, "matur": [23, 24], "trend": [23, 46], "toward": 23, "heterogen": [23, 24], "ones": [23, 26, 28, 29, 30, 33, 36, 42, 43], "arguabl": 23, "platform": [23, 39], "matlab": [23, 44], "julia": 23, "exist": [23, 24, 31, 33, 34, 37, 42, 45], "ecosystem": [23, 24, 37], "built": [23, 24, 27, 40, 44], "proprietari": 23, "driver": 23, "restrict": [23, 30, 31], "thu": 23, "portabl": 23, "opencl": 23, "khrono": 23, "fpga": 23, "sycl": [23, 24], "grew": 23, "quickli": [23, 24, 39, 42], "gain": 23, "world": [23, 46], "signfic": 23, "invest": 23, "codeplai": 23, "target": [23, 34, 38, 47], "oneapi": 23, "xe": 23, "industri": 23, "mostli": [23, 35, 36, 43], "build": [23, 24, 26, 36, 37, 44], "llvm": [23, 39, 47], "openacc": 23, "pragma": 23, "offload": [23, 47], "mainli": [23, 24, 41], "push": 23, "crai": 23, "partnership": 23, "gcc": 23, "openmp": [23, 47], "origin": [23, 24, 26, 27, 29, 30, 38, 39, 42, 44, 45], "enabl": [23, 31, 39, 47], "segment": 23, "scenario": 23, "landscap": [23, 46], "alwai": [23, 27, 29, 30, 34, 39, 40, 41, 42, 45, 46], "desir": 23, "futur": [23, 39, 46], "proof": 23, "variant": [23, 29, 33, 41, 44], "roll": 23, "anywai": 23, "viabl": 23, "especi": [23, 24, 30, 37], "user": [23, 33], "directli": [23, 30, 34, 36, 37, 44, 45], "card": 23, "peak": [23, 46], "tflop": [23, 46], "rocm": 23, "rocmdoc": 23, "en": 23, "latest": [23, 31], "vega": 23, "rdna": [23, 46], "announc": 23, "cdna": 23, "purpos": [23, 29, 34, 41, 42], "tradition": 23, "competit": 23, "though": [23, 24, 26, 41], "bad": [23, 26, 28], "2020": 23, "plan": 23, "exascal": 23, "strong": 23, "pycuda": 23, "pyopencl": 23, "packag": [23, 37, 39], "launch": 23, "nativ": [23, 24, 33], "respect": [23, 28, 29, 30], "framework": 23, "autoinit": 23, "drv": 23, "sourcemodul": 23, "mod": 23, "__global__": 23, "void": [23, 47], "multiply_them": 23, "dest": 23, "const": [23, 39, 45, 47], "int": [23, 31, 39], "get_funct": 23, "400": [23, 38], "astyp": [23, 38, 39], "zeros_lik": 23, "usr": 23, "bin": 23, "env": [23, 39], "cl": 23, "a_np": 23, "50000": 23, "b_np": 23, "ctx": 23, "create_some_context": 23, "queue": [23, 31], "commandqueu": 23, "mf": 23, "mem_flag": 23, "a_g": 23, "read_onli": 23, "copy_host_ptr": 23, "hostbuf": 23, "b_g": 23, "prg": 23, "__kernel": 23, "__global": 23, "res_g": 23, "gid": 23, "get_global_id": 23, "write_onli": 23, "none": [23, 31, 36, 39, 45], "res_np": 23, "empty_lik": [23, 39], "enqueue_copi": 23, "almost": [23, 30, 31, 34, 44, 45], "taster": 23, "mandelbrot": [23, 47], "mandel": [23, 47], "max_it": [23, 47], "imaginari": [23, 47], "determin": [23, 39, 46, 47], "candid": [23, 47], "membership": [23, 47], "0j": [23, 33, 47], "mandel_kernel": 23, "min_x": [23, 47], "max_x": [23, 47], "min_i": [23, 47], "height": [23, 47], "width": [23, 47], "pixel_size_x": [23, 47], "pixel_size_i": [23, 47], "startx": 23, "blockdim": [23, 31], "blockidx": [23, 31], "starti": 23, "gridx": 23, "gridi": 23, "gimag": 23, "1024": [23, 31, 43], "1536": 23, "uint8": [23, 47], "d_imag": 23, "to_host": 23, "zoo": 24, "short": [24, 28, 36], "dinosaur": 24, "1950": 24, "incarn": 24, "2018": 24, "activ": [24, 29], "legaci": 24, "sizeabl": 24, "successfulli": [24, 39], "petaflop": 24, "Its": [24, 40], "syntax": [24, 33, 39], "broad": 24, "scalabl": [24, 37, 46], "capabl": [24, 31], "unlik": 24, "demand": 24, "oldest": 24, "defacto": 24, "heavili": 24, "excel": [24, 40, 42], "toolbox": 24, "favour": [24, 28], "licens": [24, 39, 44], "commerci": 24, "newcom": 24, "stabl": [24, 26, 40, 45], "releas": 24, "occur": 24, "2015": 24, "popular": 24, "competitor": 24, "main": [24, 25, 28, 34, 36, 37], "ownership": 24, "safeti": 24, "crash": 24, "yet": [24, 41, 45], "infanc": 24, "nevertheless": [24, 34], "seriou": 24, "java": 24, "busi": 24, "focu": 24, "li": 24, "deeper": [25, 34, 37], "session": [26, 34, 44], "introduc": [26, 30, 42], "extend": 26, "assumpt": 26, "noth": [26, 33], "remark": 26, "enough": [26, 34], "being": [26, 39, 43, 44, 45], "span": 26, "2b": 26, "compute_krylov_basi": 26, "index": [26, 27, 28, 29, 30, 31, 34, 36, 38, 39, 41, 45], "linearli": [26, 40], "accuraci": [26, 28, 33, 40, 46], "m_max": 26, "krylov_basi": 26, "cond_numb": 26, "cond": [26, 29], "0e": 26, "00": [26, 47], "01": 26, "03": 26, "05": 26, "07": 26, "08": 26, "5e": 26, "2e": 26, "17": [26, 39, 47], "19": 26, "6e": 26, "21": [26, 42, 47], "7e": 26, "23": 26, "9e": 26, "27": [26, 39, 47], "4e": 26, "18": [26, 33, 39], "31": [26, 41], "catastroph": 26, "alreadi": [26, 28, 34, 40], "eigenvector": 26, "associ": [26, 28, 30, 33, 34, 36, 41, 42, 43], "realli": [26, 28, 34, 46], "orthogon": [26, 27, 28], "pairwis": 26, "accomplish": [26, 44], "modifi": [26, 29, 42], "compute_krylov_basis_orthogon": 26, "subtract": [26, 28, 30, 39], "normalis": [26, 28], "htild": 26, "perfectli": [26, 45], "v_m": [26, 27, 28], "v_mh_m": [26, 27, 28], "h_": [26, 27, 28], "v_": [26, 27, 28, 30], "e_m": [26, 27, 28], "v_i": [26, 30], "tav_j": 26, "sequenc": [26, 29, 30, 42, 44], "av_m": [26, 27, 28], "formula": [26, 37, 45], "h_m": [26, 27, 28], "upper": [26, 28, 30, 42], "triangular": [26, 28, 33, 42], "subdiagon": [26, 28], "nonzero": [26, 28, 35, 41, 42, 43], "av_j": 26, "tav_m": [26, 28], "r_0": [26, 27, 28], "ax_0": [26, 28], "x_m": [26, 28], "v_my_m": [26, 27, 28], "equival": [26, 28, 30, 40, 45], "av_my_m": [26, 27], "proper": [26, 27, 45], "r_m": [26, 27], "ax_m": 26, "tr_m": [26, 27], "tav_my_m": 26, "tr_0": 26, "_2e_1": [26, 27, 28], "e_1": [26, 27], "arriv": [26, 28], "h_my_m": 26, "had": [26, 43], "save": [26, 36], "explor": 26, "beta": 26, "rel_residu": [26, 42], "rnorm": 26, "semilog": [26, 27, 28, 29, 30, 36, 42], "tri": 26, "dim": [26, 33], "nice": [26, 27, 32, 33, 39], "exponenti": 26, "caveat": [26, 44], "cheat": 26, "theoret": 26, "explicitli": [26, 40, 41], "loss": 26, "lose": [26, 45], "tv": 26, "deterior": [26, 44], "everywher": 26, "log10": [26, 28], "vmin": 26, "vmax": 26, "colorbar": [26, 28, 45], "0x7f0f20c1cb80": 26, "classic": 26, "gram": 26, "schmidt": 26, "taught": 26, "book": [26, 28, 29, 30], "stabilis": 26, "introdu": 26, "reorthogonalis": 26, "im": [26, 28, 38], "rare": [27, 33, 42], "guarante": [27, 28, 33, 34, 36, 45], "reduc": [27, 28, 29, 30, 34, 42, 46], "rememb": [27, 28, 30, 40], "search": [27, 28, 39], "subspac": [27, 30, 36, 44], "minim": [27, 28, 29, 33, 36, 42], "oin": 27, "arnoldi": 27, "krylov": [27, 30, 36, 44], "v_my_i": 27, "impos": [27, 28, 43], "bot": 27, "y_m": 27, "minimis": [27, 28], "arg": [27, 34, 39, 47], "min": [27, 28, 46], "_2": [27, 28], "recurr": [27, 28, 36, 40, 45], "nonumb": [27, 28, 29, 30], "bmatrix": [27, 30, 33], "whole": [27, 33, 34, 35, 36, 43], "orthogonalis": [27, 28], "setup": 27, "normal": [27, 39, 45], "distribut": [27, 29, 36], "fig": [27, 28, 29, 30, 38, 42, 43, 45], "figsiz": [27, 28, 29, 30, 38, 42, 43, 45], "add_subplot": [27, 28, 29, 30, 38, 42, 43, 45], "111": [27, 28, 29, 30, 45], "line2d": [27, 29, 36, 43], "0x7f9952011a90": 27, "cluster": [27, 29, 34, 36, 37, 44], "lie": 27, "disk": 27, "ashift": 27, "shift": [27, 29], "color": [27, 38, 45, 47], "enumer": [27, 31], "callback": [27, 28, 29, 42], "lambda": [27, 28, 29, 40, 42], "callback_typ": [27, 42], "pr_norm": [27, 42], "len": [27, 28, 38], "loc": [27, 29], "fancybox": [27, 29], "shadow": [27, 29], "label": [27, 39, 47], "0x7f9951c0be50": 27, "theorem": 27, "heurist": [27, 30, 42], "One": [27, 28, 30, 31, 32, 42, 45, 46], "increasingli": 27, "keep": [27, 28], "strategi": [27, 29, 38, 42], "cost": 27, "tax": 28, "foral": 28, "neq": [28, 42, 46], "energi": 28, "kinet": 28, "nabla": 28, "hessenberg": 28, "v_k": [28, 30], "symmetri": 28, "nicer": 28, "satisfi": [28, 30, 40, 45], "tridiagon": [28, 36, 42], "super": 28, "r0": 28, "artifici": 28, "nois": 28, "colorscal": 28, "0x7ff7a79672b0": 28, "clearli": 28, "simplifi": [28, 37, 41, 46], "compos": 28, "matter": [28, 34, 41, 46], "remain": [28, 39, 42, 45], "contrast": [28, 36, 40], "nonsymmetr": [28, 36], "proce": [28, 42, 45], "t_my_m": 28, "t_m": 28, "yousef": [28, 30], "saad": [28, 29, 30], "onlin": 28, "free": 28, "intuit": 28, "reveal": 28, "summari": [28, 31], "agon": 28, "pain": [28, 34], "jonathan": 28, "shewchuk": 28, "min_": 28, "furthermor": [28, 43], "x_k": 28, "ax_k": 28, "r_k": 28, "r_": 28, "tr_k": 28, "tar_k": 28, "analysi": [28, 37, 40], "state": [28, 34], "kappa": 28, "lambda_": [28, 40], "max": [28, 40, 46], "smallest": [28, 30, 46], "e_k": 28, "ae_k": 28, "_a": 28, "leq": [28, 30, 42, 45, 46], "e_0": [28, 30], "tae_k": 28, "fairli": [28, 37, 39, 47], "Then": [28, 34, 36, 42], "81": [28, 39], "d_0": 28, "d_1": 28, "d_": 28, "mutual": 28, "d_i": 28, "td_j": 28, "enforc": 28, "td_k": 28, "d_k": 28, "plai": 28, "alpha_kd_k": 28, "te_k": 28, "unfortun": [28, 29], "tad_j": 28, "tad_k": 28, "moreoev": 28, "annihili": 28, "r_i": 28, "alpha_i": [28, 30], "tr_i": 28, "tad_i": 28, "alpha_id_i": 28, "ad_i": 28, "beta_": 28, "tr_": 28, "e_i": 28, "lessapprox": 28, "52": 28, "10000": [28, 33, 39], "offset": [28, 29, 42], "sol": [28, 43], "maxit": 28, "arang": [28, 30, 31, 33, 42, 45, 47], "set_titl": [28, 30, 42], "set_xlabel": [28, 30, 38, 42, 45], "set_ylabel": [28, 29, 30, 38, 42, 45], "crucial": 29, "ameni": 29, "million": [29, 31, 41], "billion": [29, 35, 43], "kind": [29, 31, 33, 39, 46], "ap": 29, "latter": 29, "px": [29, 38], "spai": 29, "incomplet": [29, 44], "lu": [29, 36, 44], "choleski": 29, "multigrid": [29, 44], "invert": 29, "analyt": 29, "underli": [29, 34, 37, 42, 44], "physic": [29, 44, 45], "_f": 29, "frobeni": 29, "obvious": 29, "m_k": 29, "procedur": 29, "c_k": 29, "g_k": 29, "tag_k": 29, "ag_k": 29, "m_": [29, 30], "denser": 29, "drop": [29, 42], "m_0": 29, "recommend": 29, "chow": 29, "onenormest": 29, "ag": 29, "trace": 29, "001": 29, "todens": [29, 42], "3961": 29, "9652414689454": 29, "spy": [29, 42, 43], "0x7f7da0203790": 29, "18659718436073": 29, "sign": 29, "residuals_precondit": 29, "0x7f7da00c3f10": 29, "fair": 29, "account": [29, 42, 44], "primit": 29, "cite": 29, "literatur": 29, "precondtion": 29, "encount": 29, "refin": [30, 44], "coarser": [30, 44, 46], "upon": 30, "finer": 30, "2x_i": 30, "quad": 30, "d1": 30, "d2": 30, "lam": 30, "eigh": 30, "221": [30, 45], "222": [30, 45], "223": [30, 45], "224": [30, 45], "hold": [30, 34], "av_k": 30, "lambda_k": [30, 40], "theta_k": 30, "doesn": [30, 31, 39, 45], "damp": 30, "x_j": 30, "ax_j": 30, "f_": 30, "under": [30, 34, 44, 46], "e_j": 30, "mu_i": 30, "w_i": 30, "diagonaliz": [30, 40], "impli": 30, "alpha_iw_i": 30, "coeffici": 30, "sum_i": 30, "lambda_i": 30, "lambda_n": 30, "lambda_1": 30, "alpha_iv_i": 30, "expans": [30, 40], "lambda_j": 30, "theta_j": 30, "nstep": 30, "slowli": 30, "worst": 30, "descreas": 30, "arbitrarili": 30, "rapid": 30, "frequenc": 30, "rapidli": 30, "krang": 30, "smaller": [30, 36, 44, 45], "belong": [30, 41], "eigenmod": 30, "talk": [30, 46], "xrang": 30, "5000": 30, "low_mod": 30, "high_mod": 30, "ax1": [30, 42, 45], "121": [30, 42], "lowli": 30, "ax2": [30, 42, 45], "122": [30, 42], "arbitrari": 30, "recurs": 30, "stage": 30, "down": [30, 37], "hierarchi": 30, "odd": 30, "omega_": 30, "2i": 30, "correspondingli": [30, 41], "sampl": [30, 33], "edg": 30, "highest": 30, "prolong": 30, "i_": 30, "omega_h": 30, "hv": 30, "map": [30, 33], "2j": 30, "v_j": 30, "i_h": 30, "2v_": 30, "a_hu": 30, "smooth": 30, "nu": 30, "a_h": 30, "u_0": [30, 40, 45], "f_h": 30, "pre": 30, "nu_1": 30, "coarsen": 30, "post": 30, "nu_2": 30, "continu": 30, "context": [30, 31], "finest": 30, "geometr": 30, "knowledg": 30, "regular": 30, "translat": [30, 33, 34], "amg": 30, "analys": [30, 42], "conda": [31, 33], "instal": [31, 33, 37], "cudatoolkit": 31, "dynam": [31, 36, 42], "pattern": [31, 34], "spatial": [31, 33, 38, 40], "id": 31, "quadro": 31, "rtx": [31, 46], "3000": [31, 47], "pci": 31, "decor": [31, 47], "an_empty_kernel": 31, "pos_x": 31, "pos_i": 31, "dispatch": 31, "0x7f0dfc812d90": 31, "specifi": [31, 34, 36, 37], "256": [31, 34, 39], "777": 31, "216": 31, "threadsperblock": 31, "blockspergrid": 31, "another_kernel": 31, "tz": 31, "block_x": 31, "block_i": 31, "block_z": 31, "dim_x": 31, "dim_i": 31, "dim_z": 31, "pos_z": 31, "scalar": [31, 33], "tupl": 31, "Not": [31, 34, 41], "comprehens": [31, 33], "yield": 31, "bool": [31, 39], "pydata": 31, "dev": [31, 32, 38, 39, 41], "cudapysupport": 31, "everyth": [31, 36], "reli": [31, 33, 34, 46], "arr": [31, 47], "device_arr": 31, "host_arr": 31, "copy_to_host": 31, "host_arrai": 31, "device_arrai": 31, "pin": 31, "alloc": 31, "stream": [31, 34], "event": 31, "synchron": 31, "helper": [31, 38, 45], "transform": [32, 34, 39, 42], "elementwis": 32, "ne": 32, "1000000": [32, 34], "componentwis": [32, 33], "3195833": 32, "92546223": 32, "68758307": 32, "19557921": 32, "19559017": 32, "24145174": 32, "fals": [32, 43, 45], "89": 32, "ms": [32, 38, 41], "91": 32, "\u00b5s": [32, 38, 39, 41], "std": [32, 38, 39, 41], "995": [32, 45], "arcsinh": 32, "39": [32, 39], "664": 32, "consecut": 33, "address": 33, "byte": [33, 43], "kei": [33, 37, 47], "allevi": 33, "sophist": 33, "ahead": 33, "piec": [33, 34], "unsuit": 33, "across": [33, 36], "band": [33, 42, 43], "ignor": [33, 45], "fly": 33, "bug": 33, "reserv": [33, 42], "transpar": 33, "correctli": 33, "although": [33, 39, 41, 42], "de": 33, "facto": 33, "convent": [33, 39], "interoper": 33, "publish": 33, "scienc": [33, 40], "www": 33, "netlib": 33, "never": [33, 42], "delai": 33, "exploit": [33, 35, 43], "multi": [33, 37], "core": [33, 34, 39, 46, 47], "topic": [33, 44], "intel": [33, 34, 39, 44, 46], "mkl": [33, 44], "bli": 33, "whenev": 33, "ratio": 33, "forg": 33, "automat": [33, 34], "64": [33, 39, 47], "a_random": 33, "a_on": 33, "10x10": 33, "a_zero": 33, "a_empti": 33, "uniti": 33, "a_rang": 33, "968195988006513": 33, "40009539": 33, "71934764": 33, "5445252": 33, "4253604": 33, "fifth": 33, "assign": [33, 36], "similarli": [33, 43], "exp": [33, 38, 45], "9058815227064712e": 33, "themselv": [34, 44], "node": [34, 36, 37, 43], "layer": 34, "handl": [34, 37, 41, 47], "drill": 34, "intern": [34, 36, 47], "argument": [34, 47], "cycl": 34, "amd": [34, 39, 46], "avx2": [34, 39], "eight": 34, "512": [34, 39], "simultan": [34, 39], "clock": 34, "invoc": 34, "costli": 34, "outspoken": 34, "linux": [34, 47], "linu": 34, "torvald": 34, "he": 34, "di": 34, "death": 34, "bring": 34, "cannot": [34, 39, 45], "numexpr": 34, "auto": 34, "awar": 34, "clarifi": 34, "meant": [34, 36], "chrome": 34, "oben": 34, "tab": 34, "strictli": 34, "unless": 34, "mechan": 34, "manag": [34, 46], "whether": 34, "window": 34, "mac": 34, "dozen": 34, "sometim": [34, 42], "freeli": 34, "manipul": 34, "creation": [34, 37, 41, 42, 43], "overhead": [34, 37], "multiprocess": 34, "worker": 34, "arr1": 34, "arr2": 34, "arr3": 34, "nthread": 34, "cpu_count": 34, "array_split": 34, "all_thread": 34, "join": 34, "count": [34, 39, 41, 42, 43], "uniqu": 34, "lock": 34, "technic": 34, "consequ": 34, "webserv": 34, "incom": 34, "connect": [34, 42], "deliv": 34, "multicor": 34, "numba_fun": 34, "interfer": 34, "ctype": 34, "arr1_np": 34, "frombuff": 34, "get_obj": 34, "arr2_np": 34, "arr3_np": 34, "nprocess": 34, "c_doubl": 34, "all_process": 34, "intialis": 34, "serv": 34, "sparsiti": [35, 43], "econom": [35, 43], "properti": [35, 40, 42, 43, 44], "necess": [35, 43], "nonlinear": [36, 40], "workstat": [36, 46], "mat": 36, "assembl": [36, 39], "prealloc": 36, "nnz": 36, "int32": 36, "createaij": 36, "0x7efd14a42630": 36, "fill": [36, 42, 45], "setvalu": 36, "999": 36, "998": 36, "And": [36, 46], "dispos": 36, "local_s": 36, "issymmetr": 36, "getdiagon": 36, "getinfo": 36, "block_siz": 36, "nz_alloc": 36, "2998": 36, "nz_use": 36, "nz_unneed": 36, "59212": 36, "malloc": 36, "fill_ratio_given": 36, "fill_ratio_need": 36, "factor_malloc": 36, "indexptr": 36, "getvaluescsr": 36, "ksp": 36, "setoper": 36, "createvecleft": 36, "createvecright": 36, "bicgstab": 36, "bcg": 36, "disabl": 36, "histori": 36, "afterward": 36, "settyp": 36, "setconvergencehistori": 36, "getpc": 36, "getconvergencehistori": 36, "0x7efd14a19370": 36, "sor": 36, "0x7efd1479ea60": 36, "neeed": 36, "precondtiion": 36, "strang": 36, "preonli": 36, "358396715688236e": 36, "amaz": 36, "undocu": 36, "petscpi": 36, "edit": 37, "web": 37, "browser": 37, "jupyterlab": 37, "beautifulli": 37, "graph": [37, 42, 45], "od": 37, "critic": 37, "rich": 37, "api": [37, 39], "fine": 37, "grain": 37, "desktop": [37, 46], "mpi4pi": 37, "mpi": 37, "petsc4pi": 37, "fenic": 37, "weight": 38, "electrostat": 38, "potenti": [38, 39], "ik": 38, "rbf": 38, "sigma": 38, "mn": 38, "randomli": 38, "xz": 38, "zy": 38, "npoint": 38, "nsourc": 38, "plot_grid": 38, "1j": 38, "targets_xi": 38, "vstack": 38, "targets_xz": 38, "targets_yz": 38, "lognorm": 38, "rcparam": 38, "font": 38, "result_xi": 38, "reshap": [38, 43, 47], "result_xz": 38, "result_yz": 38, "subsequ": 38, "sx": 38, "sy": 38, "local_result": 38, "local_target": 38, "local_sourc": 38, "local_weight": 38, "py": [38, 39], "sync": 38, "squared_diff": 38, "nblock": 38, "61": [38, 39], "59": [38, 47], "204": 38, "205": 38, "laptop": 38, "minor": 39, "undera": 39, "bsd": 39, "copyright": 39, "appli": [39, 42, 45], "backend": 39, "processor": 39, "sse": 39, "128": [39, 47], "skylak": 39, "xeon": [39, 46], "phi": [39, 47], "fit": 39, "impact": 39, "greater": 39, "somewhat": 39, "tricki": [39, 42], "team": 39, "export": 39, "diagnost": 39, "crude": 39, "x86_64": [39, 47], "xmmx": 39, "ymmx": 39, "zmmx": 39, "find_instr": 39, "func": 39, "keyword": [39, 47], "sig": 39, "inspect_asm": 39, "signatur": 39, "break": 39, "No": [39, 42], "nopython": [39, 41, 47], "sqdiff": 39, "x32": 39, "y32": 39, "99999976": 39, "0000002": 39, "x64": 39, "y64": 39, "00000024": 39, "1d": [39, 45], "goe": [39, 46], "95": 39, "101": 39, "ns": 39, "100000": 39, "73": [39, 46], "subp": 39, "nfloat64": 39, "vsubp": 39, "rax": 39, "rsi": 39, "ymm0": 39, "ymm1": 39, "ymm2": 39, "96": 39, "ymm3": 39, "vsubpd": 39, "subpd": 39, "stand": 39, "bet": 39, "ok": 39, "frac_diff1": 39, "6666667": 39, "66662216": 39, "66657776": 39, "400032": 39, "40001604": 39, "rais": [39, 42], "nan": 39, "inf": [39, 41], "guvector": 39, "expand": 39, "zerodivisionerror": 39, "caus": 39, "fortun": 39, "overrid": 39, "error_model": 39, "frac_diff2": 39, "84": 39, "45": 39, "83": 39, "subtl": 39, "annot": 39, "inspect_typ": 39, "home": 39, "betck": 39, "miniconda3": 39, "lib": 39, "python3": 39, "site": 39, "pretty_annot": 39, "futurewarn": 39, "experiment": 39, "warn": 39, "lt": 39, "ipython": 39, "115e70afe30f": 39, "gt": 39, "nbsp": 39, "2load_glob": 39, "4load_method": 39, "getattr": 39, "attr": [39, 47], "0x7f829875c700": 39, "del": 39, "8call_method": 39, "var": 39, "kw": 39, "vararg": 39, "12load_glob": 39, "16load_attr": 39, "unitupl": 39, "int64": [39, 47], "const18": 39, "liter": 39, "20binary_subscr": 39, "static_getitem": 39, "index_var": 39, "22call_funct": 39, "range_state_int64": 39, "24get_it": 39, "getit": 39, "range_iter_int64": 39, "phi26": 39, "26": 39, "26for_it": 39, "iternext": 39, "pair_first": 39, "pair_second": 39, "phi28": 39, "28": [39, 46], "72": 39, "const30": 39, "36binary_subscr": 39, "getitem": 39, "42binary_subscr": 39, "44binary_subtract": 39, "46binary_multipli": 39, "52binary_subscr": 39, "58binary_subscr": 39, "60binary_add": 39, "62binary_true_divid": 39, "74return_valu": 39, "cast": 39, "ir": [39, 45], "promot": 39, "rest": 39, "overli": 39, "conserv": 39, "behavior": 39, "frac_diff3": 39, "dt": [39, 40, 45], "regardless": 39, "99": [39, 47], "speedi": [39, 47], "6x": 39, "permiss": 39, "reorder": 39, "arithmet": [39, 46], "round": [39, 41, 43], "accept": 39, "fastmath": 39, "do_sum": 39, "acc": 39, "accumul": 39, "strict": 39, "do_sum_fast": 39, "reassoci": 39, "permit": 39, "mulp": 39, "vmulp": 39, "xmm1": 39, "xmm7": 39, "xmm4": 39, "xmm6": 39, "xmm5": 39, "2x": 39, "135": 39, "68": 39, "du": 40, "tayler": 40, "tu": 40, "centr": 40, "quadrat": [40, 42, 45], "trick": 40, "famou": [40, 46], "rightarrow": [40, 42, 45], "u_n": 40, "t_n": 40, "t_": 40, "tf": 40, "infti": 40, "nu_0": 40, "decai": 40, "au": 40, "eigenpair": 40, "hat": 40, "dfferenc": 40, "tridiag": 40, "sim": 40, "lesssim": 40, "qudrat": 40, "uncondition": 40, "cheaper": 40, "necesarili": 40, "infrastructur": 40, "worthwil": 41, "didact": 41, "conver": 41, "a_coo": 41, "indici": 41, "triplet": 41, "repetit": 41, "a_csr": 41, "explan": 41, "ith": 41, "locat": 41, "assoc": 41, "csr_matvec": 41, "row_index": 41, "col_start": 41, "col_end": 41, "col_index": 41, "discretise_poiss": 41, "rh": [41, 42, 43], "nelement": [41, 42, 43], "y_exact": 41, "rel_error": 41, "bottleneck": 41, "92": 41, "102": 41, "action": [42, 44], "review": 42, "pa": 42, "pivot": 42, "valueerror": 42, "fill_diagon": 42, "max_index": 42, "argmax": 42, "889527314145724e": 42, "accordingli": 42, "stabil": [42, 45], "sort": 42, "pb": 42, "ly": 42, "ux": 42, "solve_triangular": 42, "7431488527611673e": 42, "princip": 42, "lapack": 42, "custom": 42, "modif": 42, "axesimag": [42, 47], "0x7fd41d9f2640": 42, "131": 42, "im1": 42, "132": 42, "ax3": [42, 45], "133": 42, "bidiagon": 42, "coincid": 42, "nonneg": 42, "m_u": 42, "m_l": 42, "wonder": 42, "a_n": 42, "notat": 42, "revers": 42, "flip": 42, "apermut": 42, "0x7fd41ddeb2e0": 42, "relabel": 42, "aim": 42, "phase": 42, "cuthil": 42, "mckeee": 42, "mckee": 42, "squeez": 42, "connnect": 42, "job": 42, "problemat": 42, "intric": 42, "infeas": [42, 44], "criterion": 42, "ilu": 42, "spilu": 42, "fill_factor": 42, "drop_rul": 42, "residuals1": 42, "callback1": 42, "residuals2": 42, "callback2": 42, "dramat": 42, "jn": 43, "4n": 43, "5n": 43, "16n": 43, "000": [43, 46], "tenthousand": 43, "visualis": 43, "mpl_toolkit": 43, "mplot3d": 43, "axes3d": 43, "cm": 43, "200": [43, 45], "meshgrid": 43, "surf": 43, "plot_surfac": 43, "antialias": 43, "cmap": 43, "coolwarm": 43, "0x7f2d51275790": 43, "zoom": 43, "0x7f2d51275760": 43, "mb": 43, "gb": 43, "miss": [44, 47], "attempt": 44, "gaussian": 44, "elimin": 44, "overcom": 44, "reflect": [44, 45], "coars": 44, "umfpack": 44, "suitespars": 44, "scikit": 44, "constantli": 44, "pardiso": 44, "old": 44, "superlu": 44, "mump": 44, "amesos2": 44, "trilino": 44, "eigen": 44, "extern": [44, 47], "belo": 44, "amgx": 44, "pyamg": 44, "ml": 44, "bind": 44, "tt": 45, "stencil": 45, "courant": 45, "leap": 45, "frog": 45, "circl": 45, "clip_on": 45, "add_artist": 45, "draw_leap_frog": 45, "red_circ": 45, "blue_circ": 45, "set_ylim": 45, "set_xlim": 45, "xaxi": 45, "set_tick": 45, "yaxi": 45, "circ": 45, "draw_dependency_graph": 45, "ycoord": 45, "xcoord": 45, "central": 45, "red": 45, "blue": 45, "chain": 45, "ax4": 45, "u_1": 45, "ct": 45, "u_2": 45, "characterist": 45, "transport": 45, "necessarili": 45, "sake": 45, "propag": 45, "recreat": 45, "meaningless": 45, "friedrich": 45, "lewi": 45, "cfl": 45, "cancel": 45, "formua": 45, "influenc": 45, "solve_wave_equ": 45, "time_step": 45, "interv": [45, 47], "xt_grid": 45, "dx": 45, "il": 45, "t_index": 45, "x_index": 45, "st": 45, "elaps": 45, "20034098625183105": 45, "0348000000000002": 45, "18599629402160645": 45, "explod": 45, "present": 45, "reformul": 45, "ieee": 46, "754": 46, "mantissa": 46, "expon": 46, "1022": 46, "1023": 46, "53": 46, "126": 46, "127": 46, "epsilon_": 46, "gflop": 46, "platinum": 46, "8280m": 46, "612": 46, "raspberri": 46, "fun": 46, "arm": 46, "4090": 46, "ps5": 46, "280": 46, "xbox": 46, "seri": 46, "tabl": 46, "server": 46, "spectrum": 46, "chip": 46, "appl": 46, "70tflop": 46, "bui": 46, "specialis": 46, "regularli": 46, "supercomput": 46, "frontier": 46, "eflop": 46, "hide": 46, "messag": 46, "distant": 46, "desk": 46, "concern": 46, "mobil": 46, "phone": 46, "homepag": 47, "infer": 47, "sum2d": 47, "fractal": 47, "timer": 47, "__enter__": 47, "__exit__": 47, "__future__": 47, "print_funct": 47, "absolute_import": 47, "jet": 47, "ion": 47, "255": 47, "create_fract": 47, "2000": 47, "034635305404663": 47, "0x7f9adddedb80": 47, "nest": 47, "interpet": 47, "3081510066986084": 47, "0x7f9ade196dc0": 47, "02453899383544922": 47, "0x7f9ade00c640": 47, "spread": 47, "inspect": 47, "mysum": 47, "inspect_llvm": 47, "item": 47, "moduleid": 47, "source_filenam": 47, "datalayout": 47, "p270": 47, "p271": 47, "p272": 47, "i64": 47, "f80": 47, "n8": 47, "s128": 47, "gnu": 47, "_zn08numbaenv8__main__10mysum": 47, "2414exx": 47, "local_unnamed_addr": 47, "i8": 47, "null": 47, "pyexc_runtimeerror": 47, "nofre": 47, "norecurs": 47, "nounwind": 47, "writeonli": 47, "i32": 47, "_zn8__main__10mysum": 47, "noalia": 47, "nocaptur": 47, "retptr": 47, "readnon": 47, "excinfo": 47, "nsw": 47, "ret": 47, "_zn7cpython8__main__10mysum": 47, "py_closur": 47, "py_arg": 47, "py_kw": 47, "alloca": 47, "pyarg_unpacktupl": 47, "getelementptr": 47, "inbound": 47, "nonnul": 47, "icmp": 47, "eq": 47, "br": 47, "i1": 47, "endif": 47, "prof": 47, "pred": 47, "pyerr_setstr": 47, "pynumber_long": 47, "pylong_aslonglong": 47, "py_decref": 47, "pyerr_occur": 47, "33": 47, "37": 47, "41": 47, "43": 47, "38": 47, "49": 47, "74": 47, "pylong_fromlonglong": 47, "declar": 47, "cfunc": 47, "stackprotector": 47, "attribut": 47, "branch_weight": 47, "previous": 47, "eas": 47}, "objects": {}, "objtypes": {}, "objnames": {}, "titleterms": {"assign": [0, 1, 2, 3, 4, 5, 6, 7, 8, 16, 17, 18], "1": [0, 5, 6, 7, 8, 9, 16, 17, 18, 26], "matrix": [0, 2, 5, 7, 8, 12, 15, 16, 17, 18, 36, 41], "multipl": [0, 5, 16], "numba": [0, 5, 10, 16, 23, 31, 34, 37, 39, 47], "2": [1, 5, 6, 7, 8, 10, 16, 17, 18, 27], "gpu": [1, 2, 6, 11, 23, 38], "acceler": [1, 6, 11, 34, 38], "solut": [1, 34], "poisson": [1, 35, 43], "problem": [1, 3, 6, 28, 30, 35, 40, 43], "3": [2, 7, 8, 11, 17, 18, 28, 40], "spars": [2, 6, 7, 12, 17, 29, 35, 36, 41, 42, 43, 44], "format": [2, 12, 41], "4": [3, 4, 8, 12, 18, 29], "time": [3, 9, 22, 40], "depend": [3, 40, 45], "exampl": [4, 21, 28, 32, 35, 43], "The": [5, 6, 7, 8, 16, 17, 18, 22, 26, 27, 28, 29, 30, 35, 40, 41, 43, 45, 46], "part": [5, 6, 7, 8, 16, 17, 18], "better": [5, 16], "function": [5, 9, 16], "speed": [5, 16], "up": [5, 16], "solv": [6, 8, 18, 28, 35, 36, 42, 43], "two": 6, "1d": 6, "wave": [6, 11, 45], "matric": [6, 7, 17, 20, 28, 42], "heat": 6, "equat": [6, 20, 28, 42, 45], "implement": [7, 17, 38, 45], "csr": [7, 41], "custom": [7, 17], "finit": [8, 18, 40], "element": [8, 18], "system": [8, 18, 28, 36, 42, 44], "mathemat": 8, "background": [8, 11, 23, 38], "creat": [8, 18, 36], "vector": [8, 18, 41], "compar": [8, 11, 12, 17, 18], "solver": [8, 18, 22, 26, 27, 28, 29, 42, 44], "precondition": [8, 18], "increas": [8, 18], "n": [8, 18], "5": [8, 13, 15], "parallelis": [8, 18], "class": [9, 10, 11, 12, 13, 14, 15, 19], "mondai": [9, 10, 11, 12, 13, 14, 15], "10": 9, "octob": [9, 10, 11, 12], "get": [9, 33, 34], "know": [9, 34], "numpi": [9, 33, 37], "test": 9, "assert": 9, "plot": [9, 11, 13], "matplotlib": [9, 37], "save": 9, "data": [9, 41], "file": 9, "17": 10, "experi": [10, 13, 27], "jit": 10, "vs": [10, 40], "njit": 10, "parallel": [10, 22, 34, 38], "rang": 10, "optimis": [10, 28], "your": 10, "code": 10, "24": 11, "some": [11, 27], "cpu": [11, 38], "extens": 11, "task": 11, "31": 12, "dens": [12, 17, 42], "storag": 12, "when": 12, "worth": 12, "14": 13, "novemb": [13, 14], "us": [13, 14, 22, 36], "gmre": [13, 27], "eigenvalu": [13, 30], "6": 14, "21": 14, "cg": 14, "spai": 14, "precondit": [14, 29], "7": 15, "decemb": 15, "lu": [15, 42], "tridiagon": 15, "lsa": [16, 17, 18], "python": [19, 23, 24, 31, 34, 37], "simultan": 20, "A": [21, 27, 28, 29, 32, 35, 38, 43, 45, 46], "tour": 21, "cuda": [21, 23, 31, 38], "devic": [21, 31], "model": [21, 30], "thread": [21, 34], "core": 21, "warp": 21, "stream": 21, "multiprocessor": 21, "number": [21, 46], "memori": [21, 31, 33], "hierarchi": 21, "an": [21, 23, 34, 44], "further": 22, "topic": 22, "domain": 22, "decomposit": [22, 42], "method": [22, 26, 28, 30, 40, 42, 45], "random": 22, "linear": [22, 28, 35, 36, 42, 43, 44], "algebra": [22, 30, 35, 43], "fast": 22, "direct": [22, 28, 42, 44], "space": 22, "reproduc": 22, "comput": [22, 23, 24, 25, 34, 46], "scienc": 22, "contain": 22, "machin": 22, "learn": [22, 37], "meet": 22, "scientif": 22, "julia": [22, 24], "program": 22, "languag": [22, 23, 24], "introduct": [23, 42, 44], "standard": 23, "hardwar": 23, "nvidia": 23, "amd": 23, "intel": 23, "access": 23, "high": [24, 25, 46], "perform": [24, 25, 46], "fortran": 24, "c": 24, "matlab": 24, "rust": 24, "other": [24, 37, 41], "welcom": 25, "techniqu": 25, "iter": [26, 27, 28, 29, 30, 44, 45], "krylov": 26, "subspac": 26, "arnoldi": [26, 28], "full": [26, 30], "orthogonalis": 26, "fom": [26, 27], "from": 27, "basic": [27, 29, 39], "idea": [27, 29], "remark": [27, 45], "restart": 27, "conjug": [28, 29], "gradient": [28, 29], "symmetr": 28, "posit": 28, "definit": [28, 46], "lanczo": 28, "quadrat": 28, "steepest": 28, "descent": 28, "mix": 28, "numer": [28, 45], "approxim": [29, 40], "invers": 29, "note": 29, "multigrid": 30, "eigenvector": 30, "richardson": 30, "appli": 30, "error": 30, "reduct": [30, 39], "coars": 30, "mesh": 30, "move": 30, "between": 30, "fine": 30, "cycl": 30, "practic": 31, "find": 31, "out": 31, "about": 31, "launch": 31, "kernel": 31, "featur": 31, "manag": 31, "advanc": 31, "numexpr": 32, "layout": 33, "arrai": 33, "rescu": 33, "bla": 33, "lapack": 33, "start": 33, "principl": 34, "simd": [34, 39], "multithread": 34, "loop": 34, "execut": 34, "gil": 34, "without": 34, "altern": 34, "process": 34, "base": 34, "need": [35, 43], "pde": [35, 43], "unit": [35, 43], "squar": [35, 43], "petsc4pi": 36, "petsc": 36, "conclus": 36, "hpc": 37, "tool": 37, "jupyt": 37, "notebook": 37, "scipi": 37, "dask": 37, "panda": 37, "tensorflow": 37, "pytorch": 37, "scikit": 37, "evalu": 38, "particl": 38, "sum": 38, "autovector": 39, "divis": 39, "simpl": 40, "step": [40, 45], "differ": 40, "deriv": 40, "point": [40, 46], "stencil": 40, "second": 40, "applic": 40, "stabil": 40, "forward": 40, "euler": 40, "backward": 40, "implicit": 40, "explicit": 40, "softwar": [40, 44], "structur": [41, 42], "coo": 41, "coordin": 41, "compress": 41, "row": 41, "product": 41, "small": 42, "bandwidth": 42, "arrow": 42, "reorder": 42, "gaussian": 42, "elimin": 42, "limit": 42, "incomplet": 42, "packag": 44, "discretis": 45, "graphic": 45, "descript": 45, "leapfrog": 45, "first": 45, "follow": 45, "cone": 45, "scheme": 45, "final": 45, "what": 46, "float": 46, "how": 46, "mani": 46, "flop": 46, "s": 46, "do": 46, "i": 46, "have": 46, "top": 46, "500": 46, "bias": 46, "work": 47}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 6, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinx": 56}})
\ No newline at end of file
+Search.setIndex({"docnames": ["2021-assignment_1", "2021-assignment_2", "2021-assignment_3", "2021-assignment_4", "2022-a4-A_and_b", "2022-assignment_1", "2022-assignment_2", "2022-assignment_3", "2022-assignment_4", "2022-class_1", "2022-class_2", "2022-class_3", "2022-class_4", "2022-class_5", "2022-class_6", "2022-class_7", "2022-lsa_1", "2022-lsa_3", "2022-lsa_4", "2022_classes", "2022_matrices_and_simultaneous_equations", "2023-assignment_1", "cuda_introduction", "further_topics", "gpu_introduction", "hpc_languages", "intro", "it_solvers1", "it_solvers2", "it_solvers3", "it_solvers4", "multigrid", "numba_cuda", "numexpr", "numpy_and_data_layouts", "parallel_principles", "pde_example", "petsc_for_sparse_systems", "python_hpc_tools", "rbf_evaluation", "simd", "simple_time_stepping", "sparse_data_structures", "sparse_direct_solvers", "sparse_linalg_pde", "sparse_solvers_introduction", "wave_equation", "what_is_hpc", "working_with_numba"], "filenames": ["2021-assignment_1.ipynb", "2021-assignment_2.md", "2021-assignment_3.md", "2021-assignment_4.md", "2022-a4-A_and_b.md", "2022-assignment_1.md", "2022-assignment_2.md", "2022-assignment_3.md", "2022-assignment_4.md", "2022-class_1.md", "2022-class_2.md", "2022-class_3.md", "2022-class_4.md", "2022-class_5.md", "2022-class_6.md", "2022-class_7.md", "2022-lsa_1.md", "2022-lsa_3.md", "2022-lsa_4.md", "2022_classes.md", "2022_matrices_and_simultaneous_equations.md", "2023-assignment_1.md", "cuda_introduction.md", "further_topics.ipynb", "gpu_introduction.md", "hpc_languages.md", "intro.md", "it_solvers1.ipynb", "it_solvers2.ipynb", "it_solvers3.ipynb", "it_solvers4.ipynb", "multigrid.ipynb", "numba_cuda.ipynb", "numexpr.ipynb", "numpy_and_data_layouts.ipynb", "parallel_principles.md", "pde_example.md", "petsc_for_sparse_systems.ipynb", "python_hpc_tools.md", "rbf_evaluation.ipynb", "simd.ipynb", "simple_time_stepping.ipynb", "sparse_data_structures.ipynb", "sparse_direct_solvers.ipynb", "sparse_linalg_pde.ipynb", "sparse_solvers_introduction.ipynb", "wave_equation.ipynb", "what_is_hpc.md", "working_with_numba.ipynb"], "titles": ["Assignment 1 - Matrix multiplication in Numba", "Assignment 2 - GPU Accelerated solution of Poisson problems", "Assignment 3 - Sparse matrix formats on GPUs", "Assignment 4 - Time-dependent problems", "Examples for assignment 4", "Assignment 1 - Matrix-matrix multiplication", "Assignment 2 - Solving two 1D problems", "Assignment 3 - Sparse matrices", "Assignment 4 - Solving a finite element system", "Class 1 (Monday 10 October)", "Class 2 (Monday 17 October)", "Class 3 (Monday 24 October)", "Class 4 (Monday 31 October)", "Class 5 (Monday 14 November)", "Class 6 (Monday 21 November)", "Class 7 (Monday 5 December)", "LSA Assignment 1 - Matrix-matrix multiplication", "LSA Assignment 3 - Sparse matrices", "LSA Assignment 4 - Solving a finite element system", "Python classes", "Matrices and simultaneous equations", "Assignment 1 - Matrix-matrix multiplication", "A tour of CUDA", "Further topics", "An Introduction to GPU Computing", "Languages for High-Performance Computing", "Welcome to Techniques of High-Performance Computing", "Iterative Solvers 1 - Krylov subspaces, Arnoldi Iteration and the Full Orthogonalisation Method", "Iterative Solvers 2 - From FOM to GMRES", "Iterative Solvers 3 - The Conjugate Gradient Method", "Iterative Solvers 4 - Preconditioning", "Multigrid Methods", "Numba Cuda in Practice", "A Numexpr example", "Memory layout and Numpy arrays", "Parallel Computing Principles in Python", "The need for sparse linear algebra - A PDE example", "Using petsc4py for sparse linear systems", "Python HPC Tools", "GPU accelerated evaluation of particle sums", "SIMD Autovectorization in Numba", "Simple time-stepping", "Sparse Matrix data structures", "Sparse Direct Solvers", "The need for sparse linear algebra - A PDE example", "An introduction to sparse linear system solvers", "Discretising the wave equation", "What is High-Performance Computing?", "Working with Numba"], "terms": {"note": [0, 1, 2, 3, 8, 11, 14, 29, 31, 34, 35, 40, 41, 47], "thi": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48], "from": [0, 1, 2, 3, 5, 6, 7, 8, 9, 11, 12, 13, 14, 16, 17, 18, 19, 21, 22, 23, 24, 26, 27, 29, 30, 31, 32, 34, 35, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48], "2021": [0, 1, 2, 3], "22": [0, 1, 2, 3, 48], "academ": [0, 1, 2, 3, 25], "year": [0, 1, 2, 3, 23, 24, 25, 47], "you": [0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 26, 29, 35, 40, 43], "must": [0, 7, 17, 22, 40, 43], "do": [0, 1, 2, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 16, 18, 21, 23, 24, 27, 29, 31, 32, 34, 35, 37, 38, 39, 40, 41, 43], "includ": [0, 5, 6, 7, 8, 11, 14, 16, 17, 18, 19, 21, 24, 25, 27, 30, 35, 38, 39, 43], "code": [0, 1, 2, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14, 15, 16, 17, 18, 20, 21, 23, 24, 25, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 42, 43, 44, 47, 48], "comment": [0, 5, 6, 7, 8, 16, 17, 18, 21], "singl": [0, 2, 5, 6, 7, 8, 11, 16, 17, 18, 21, 24, 31, 32, 34, 35, 38, 40, 41, 42, 44, 47], "jupyt": [0, 5, 6, 7, 8, 9, 16, 17, 18, 21], "notebook": [0, 5, 6, 7, 8, 9, 16, 17, 18, 21, 31, 37, 39, 40], "To": [0, 1, 3, 5, 6, 8, 9, 10, 11, 16, 18, 21, 22, 24, 27, 29, 31, 32, 34, 37, 40, 41, 44, 47], "submit": [0, 5, 6, 7, 8, 16, 17, 18, 21], "make": [0, 1, 5, 6, 7, 8, 9, 10, 12, 13, 14, 16, 17, 18, 21, 22, 23, 24, 25, 27, 30, 34, 35, 36, 37, 38, 41, 43, 44, 45, 47, 48], "sure": [0, 1, 11, 24, 34], "run": [0, 1, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19, 21, 22, 23, 24, 27, 29, 30, 32, 33, 35, 39, 40, 42, 47], "all": [0, 1, 2, 6, 8, 11, 12, 22, 23, 24, 25, 27, 28, 29, 31, 32, 34, 35, 37, 38, 39, 41, 42, 43, 44, 46, 47, 48], "show": [0, 2, 5, 7, 8, 9, 10, 11, 12, 13, 14, 16, 17, 18, 21, 22, 29, 31, 34, 40, 42, 44, 46, 47, 48], "output": [0, 5, 6, 7, 8, 9, 16, 17, 18, 21, 22, 23], "your": [0, 1, 2, 3, 5, 6, 7, 8, 9, 11, 12, 13, 15, 16, 17, 18, 19, 21, 40], "printout": [0, 42], "pdf": [0, 5, 6, 7, 8, 16, 17, 18, 21], "we": [0, 1, 2, 3, 4, 5, 6, 8, 10, 11, 12, 15, 16, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48], "consid": [0, 1, 3, 8, 13, 18, 22, 23, 24, 25, 28, 29, 30, 31, 34, 35, 36, 41, 42, 43, 44, 46, 47, 48], "problem": [0, 2, 7, 8, 9, 12, 17, 18, 20, 23, 27, 28, 30, 34, 35, 37, 40, 42, 43, 45, 46, 48], "evalu": [0, 8, 11, 18, 23, 33, 37, 42, 45, 46], "c": [0, 5, 9, 10, 11, 12, 14, 16, 21, 22, 23, 24, 30, 34, 35, 39, 40, 45, 46, 48], "A": [0, 1, 3, 4, 5, 6, 7, 8, 9, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 24, 27, 31, 32, 34, 35, 37, 40, 41, 42, 43, 45], "time": [0, 1, 2, 5, 6, 7, 8, 10, 11, 12, 14, 15, 16, 17, 18, 21, 22, 25, 27, 29, 30, 31, 32, 34, 35, 38, 39, 42, 43, 44, 46, 47, 48], "b": [0, 1, 4, 5, 8, 9, 12, 13, 14, 16, 18, 20, 21, 22, 24, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 40, 43, 46, 47, 48], "matric": [0, 2, 4, 5, 8, 9, 10, 13, 16, 21, 23, 30, 31, 34, 36, 37, 38, 42, 44, 45], "mathbb": [0, 1, 5, 11, 16, 21, 27, 28, 29, 39, 41, 43, 44], "r": [0, 1, 2, 5, 9, 10, 11, 16, 21, 27, 28, 29, 31, 39, 40, 41, 43, 44, 46], "n": [0, 1, 4, 5, 6, 7, 9, 11, 12, 13, 14, 15, 16, 17, 20, 21, 27, 28, 29, 30, 31, 34, 35, 37, 39, 40, 41, 42, 43, 44, 45, 46, 48], "simpl": [0, 1, 2, 23, 24, 25, 28, 29, 31, 32, 35, 36, 37, 38, 40, 42, 43, 44, 46, 48], "python": [0, 1, 5, 6, 7, 9, 10, 11, 16, 21, 23, 26, 27, 34, 37, 40, 43, 44, 45, 48], "implement": [0, 1, 2, 3, 6, 8, 14, 18, 19, 22, 24, 26, 27, 28, 29, 30, 31, 34, 35, 40, 41, 42, 43, 44], "product": [0, 2, 5, 7, 9, 10, 16, 17, 21, 22, 25, 27, 29, 32, 34, 45], "given": [0, 1, 2, 6, 8, 18, 22, 23, 24, 25, 29, 31, 34, 35, 38, 39, 41, 43, 45, 46, 47, 48], "below": [0, 4, 5, 6, 7, 8, 16, 17, 18, 21, 22, 24, 29, 31, 34, 35, 40, 43, 46, 47], "through": [0, 1, 9, 11, 23, 24, 29, 31, 35, 38, 43, 45], "function": [0, 1, 6, 7, 8, 10, 11, 15, 18, 19, 23, 24, 27, 29, 30, 32, 34, 35, 37, 38, 39, 40, 41, 44, 46, 48], "matrix_product": 0, "At": [0, 2, 3, 5, 16, 21, 22, 27, 39, 46], "end": [0, 1, 2, 4, 6, 7, 8, 9, 14, 15, 17, 18, 19, 20, 24, 27, 28, 29, 30, 31, 34, 41, 47, 48], "check": [0, 1, 5, 6, 7, 9, 16, 17, 21, 24, 27, 30, 32, 34, 37, 40, 44], "against": [0, 1, 3, 6, 7, 12, 17, 27, 28, 29, 34, 39, 42], "numpi": [0, 2, 4, 5, 7, 10, 11, 12, 13, 14, 16, 17, 21, 24, 25, 27, 28, 29, 30, 31, 32, 33, 35, 37, 39, 40, 42, 43, 44, 46, 48], "import": [0, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19, 21, 22, 23, 24, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 39, 40, 42, 43, 44, 46, 47, 48], "np": [0, 4, 5, 9, 10, 11, 12, 13, 14, 16, 21, 24, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 39, 40, 42, 43, 44, 46, 48], "def": [0, 5, 7, 9, 10, 11, 16, 17, 19, 21, 22, 24, 27, 29, 30, 31, 32, 35, 39, 40, 42, 43, 44, 46, 48], "mat_a": 0, "mat_b": 0, "return": [0, 1, 5, 6, 8, 9, 10, 11, 15, 16, 18, 19, 21, 22, 24, 27, 29, 30, 31, 32, 35, 37, 39, 40, 42, 43, 44, 46, 48], "m": [0, 8, 11, 12, 14, 27, 28, 29, 30, 39, 42, 43, 47, 48], "shape": [0, 2, 5, 7, 9, 10, 16, 17, 21, 22, 24, 27, 29, 30, 32, 34, 37, 39, 40, 42, 43, 44, 48], "0": [0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 14, 15, 16, 17, 18, 20, 21, 22, 24, 27, 28, 29, 30, 31, 32, 33, 34, 37, 39, 40, 41, 42, 43, 44, 46, 47, 48], "assert": [0, 5, 7, 10, 16, 19, 21, 24], "ncol": [0, 28], "mat_c": 0, "zero": [0, 1, 7, 12, 17, 24, 27, 29, 30, 31, 34, 36, 39, 40, 42, 43, 44, 45, 46, 48], "dtype": [0, 7, 11, 17, 22, 24, 27, 29, 31, 32, 34, 35, 37, 39, 40, 42, 43, 44, 46, 48], "float64": [0, 27, 29, 31, 34, 35, 39, 40, 42, 43, 44, 46, 48], "row_ind": [0, 42, 43, 44], "rang": [0, 5, 6, 7, 8, 9, 11, 12, 13, 14, 15, 16, 17, 18, 21, 22, 24, 27, 29, 30, 31, 35, 37, 39, 40, 42, 43, 44, 45, 46, 48], "col_ind": [0, 42, 43, 44], "k": [0, 1, 6, 8, 11, 14, 27, 28, 29, 30, 31, 39, 41, 48], "random": [0, 2, 5, 7, 9, 10, 11, 12, 13, 14, 15, 16, 17, 21, 24, 27, 28, 29, 33, 34, 35, 39, 42, 43], "randn": [0, 2, 13, 14, 24, 27, 28, 29, 34, 35, 42, 43], "10": [0, 4, 5, 6, 11, 12, 15, 16, 21, 23, 24, 27, 28, 29, 31, 32, 33, 34, 39, 40, 42, 43, 44, 46, 47], "c_actual": 0, "c_expect": 0, "error": [0, 1, 6, 8, 18, 23, 25, 29, 40, 41, 42, 43], "linalg": [0, 1, 2, 6, 7, 9, 11, 12, 13, 14, 17, 24, 27, 28, 29, 30, 31, 34, 42, 43, 44], "norm": [0, 11, 24, 27, 29, 30, 31, 34, 37, 42, 43], "print": [0, 5, 6, 7, 8, 9, 10, 12, 16, 17, 18, 19, 21, 24, 27, 34, 37, 40, 42, 43, 44, 46, 48], "f": [0, 1, 6, 9, 14, 29, 30, 31, 37, 39, 41, 42, 43, 44, 46, 47], "The": [0, 1, 2, 3, 4, 9, 10, 11, 13, 14, 15, 19, 22, 24, 25, 26, 32, 34, 35, 37, 39, 40, 43, 45, 48], "0814245296430078e": 0, "16": [0, 4, 8, 11, 22, 24, 27, 32, 34, 35, 39, 40, 42, 43, 44, 47, 48], "one": [0, 1, 3, 4, 5, 7, 8, 16, 17, 18, 20, 21, 22, 23, 24, 25, 27, 30, 31, 32, 34, 35, 38, 40, 42, 43, 44, 47], "most": [0, 8, 12, 17, 18, 22, 24, 25, 27, 29, 30, 32, 34, 35, 37, 40, 42, 43, 45, 47], "fundament": [0, 34], "oper": [0, 1, 7, 11, 17, 19, 22, 23, 24, 25, 27, 30, 31, 33, 34, 35, 37, 38, 40, 41, 42, 43, 44, 45, 47], "modern": [0, 2, 23, 24, 25, 26, 34, 35, 36, 40, 44, 47], "comput": [0, 1, 3, 5, 6, 7, 8, 9, 10, 11, 14, 15, 16, 17, 18, 21, 22, 27, 29, 30, 31, 32, 34, 37, 38, 39, 41, 43, 44, 45, 46, 48], "algorithm": [0, 23, 24, 25, 27, 29, 30, 36, 38, 41, 44, 47], "eventu": [0, 37, 45], "us": [0, 1, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 15, 16, 17, 18, 19, 21, 22, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48], "lot": [0, 5, 9, 11, 19, 21, 23, 25, 28, 37, 38, 42], "effort": [0, 33], "therefor": [0, 9, 19, 29, 31, 34, 37, 41, 43, 44, 47], "spent": 0, "optimis": [0, 1, 5, 16, 21, 22, 24, 34, 35, 38, 43, 47], "vendor": [0, 24], "provid": [0, 2, 23, 24, 28, 33, 34, 35, 38, 41, 42, 45, 48], "hardwar": [0, 23, 35, 47], "bla": [0, 35], "basi": [0, 11, 27, 28, 29, 31, 37, 39], "linear": [0, 1, 8, 18, 26, 27, 30, 31, 34, 38, 41], "algebra": [0, 6, 8, 18, 26, 30, 34, 38, 45], "subroutin": [0, 34], "highli": [0, 5, 16, 21, 24, 31, 34, 35, 36, 43, 44, 45, 47], "effici": [0, 5, 12, 16, 21, 22, 23, 24, 25, 31, 33, 34, 35, 36, 38, 41, 43, 44, 45], "version": [0, 7, 8, 9, 10, 11, 17, 18, 24, 30, 35, 39, 40, 45], "altern": [0, 2, 7, 8, 9, 17, 25, 30, 31, 34], "open": [0, 24, 25, 35], "sourc": [0, 11, 24, 25, 37, 39, 45], "librari": [0, 6, 7, 8, 9, 18, 23, 24, 25, 26, 33, 34, 35, 37, 38, 41, 45, 48], "sucha": 0, "openbla": [0, 34], "wide": [0, 2, 7, 11, 23, 24, 25, 37, 38, 40, 42, 45, 48], "gener": [0, 1, 2, 11, 12, 19, 24, 27, 28, 30, 31, 32, 33, 35, 40, 42, 43, 44, 47, 48], "In": [0, 1, 4, 5, 6, 7, 8, 9, 11, 12, 15, 16, 17, 18, 19, 21, 22, 23, 24, 25, 26, 27, 28, 30, 31, 32, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48], "want": [0, 1, 3, 5, 6, 9, 10, 11, 16, 21, 23, 24, 25, 27, 28, 29, 31, 34, 35, 36, 37, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48], "learn": [0, 2, 22, 25, 26, 35, 39, 47], "exampl": [0, 1, 5, 8, 9, 10, 11, 12, 16, 18, 19, 20, 21, 24, 27, 30, 31, 32, 34, 35, 37, 39, 40, 41, 42, 43, 45, 46, 48], "about": [0, 1, 2, 6, 9, 13, 23, 28, 29, 31, 35, 37, 40, 43, 47], "possibl": [0, 1, 2, 5, 6, 16, 21, 23, 25, 29, 30, 32, 34, 43, 47, 48], "speedup": 0, "offer": [0, 2, 45], "effect": [0, 27, 31, 46], "cach": [0, 32, 34, 43], "program": [0, 24, 25, 26, 32, 35], "benchmark": [0, 1, 24, 39], "abov": [0, 1, 6, 8, 9, 11, 19, 22, 27, 28, 31, 35, 40, 42, 43, 45, 46], "dot": [0, 1, 5, 9, 16, 21, 22, 27, 29, 31, 34, 41, 44, 46, 47], "size": [0, 2, 5, 7, 8, 9, 11, 13, 14, 16, 17, 18, 21, 22, 23, 27, 31, 35, 37, 39, 42, 44, 45], "up": [0, 3, 6, 7, 8, 17, 18, 22, 24, 25, 27, 30, 31, 35, 38, 39, 40, 42, 43, 45, 47, 48], "1000": [0, 2, 5, 6, 9, 16, 21, 27, 29, 30, 33, 34, 37, 42, 44], "plot": [0, 1, 2, 3, 5, 6, 7, 8, 10, 12, 14, 15, 16, 17, 18, 21, 27, 28, 29, 31, 34, 37, 38, 46], "result": [0, 2, 5, 9, 10, 16, 21, 22, 27, 29, 35, 39, 40, 41, 43, 44, 48], "need": [0, 1, 2, 5, 6, 7, 8, 9, 10, 12, 15, 16, 17, 18, 19, 21, 22, 23, 24, 25, 27, 28, 29, 30, 31, 32, 35, 37, 38, 40, 41, 42, 43, 45, 46, 48], "everi": [0, 5, 16, 21, 29], "dimens": [0, 22, 27, 31, 32, 37, 38, 42, 43, 44, 45], "figur": [0, 22, 27, 28, 29, 30, 31, 35, 38, 39, 43, 44, 46], "out": [0, 5, 6, 7, 8, 9, 16, 17, 18, 20, 21, 23, 24, 27, 29, 31, 34, 40, 41, 42, 43, 44, 46, 48], "what": [0, 1, 2, 3, 6, 7, 9, 12, 13, 15, 17, 19, 20, 27, 31, 32, 34, 35, 40, 43, 44, 45, 46, 48], "so": [0, 1, 2, 4, 5, 6, 7, 8, 11, 13, 16, 21, 23, 24, 28, 29, 30, 31, 35, 36, 37, 38, 40, 43, 44, 45, 47, 48], "can": [0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 17, 18, 19, 20, 21, 22, 23, 24, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 40, 41, 42, 43, 44, 45, 46, 47, 48], "repres": [0, 6, 7, 17, 20, 22, 24, 27, 34, 35, 40, 45, 47], "without": [0, 6, 9, 10, 22, 28, 29, 30, 40, 43], "spend": 0, "too": [0, 6, 14, 15, 38, 43, 47], "much": [0, 8, 9, 12, 18, 23, 24, 29, 30, 32, 38, 41, 45], "wait": [0, 11, 22, 35], "finish": [0, 22, 35], "perform": [0, 2, 5, 7, 8, 11, 13, 16, 17, 18, 21, 23, 24, 27, 29, 30, 31, 32, 33, 34, 35, 38, 39, 40, 42, 43, 44, 45, 48], "timeit": [0, 9, 33, 39, 40, 42], "magic": [0, 9], "command": [0, 1, 2, 9, 32, 35, 37, 43, 44, 48], "an": [0, 1, 2, 5, 6, 7, 8, 9, 10, 11, 13, 15, 16, 17, 18, 19, 21, 23, 26, 27, 29, 30, 31, 32, 34, 37, 38, 40, 41, 42, 43, 44, 47, 48], "timeit_result": 0, "o": [0, 8, 18, 31, 34, 35, 39, 41, 43, 44, 45], "best": [0, 8, 9, 11, 18, 25, 37, 40], "now": [0, 1, 6, 9, 24, 27, 28, 29, 30, 31, 32, 34, 35, 37, 39, 40, 41, 42, 43, 44, 46, 47, 48], "jit": [0, 5, 16, 21, 22, 24, 32, 39, 40, 42, 48], "compil": [0, 5, 10, 16, 21, 22, 24, 25, 32, 35, 38, 40, 48], "also": [0, 1, 2, 3, 9, 12, 22, 24, 25, 27, 29, 30, 32, 34, 35, 37, 40, 42, 45, 46, 47, 48], "scope": [0, 9], "parallelis": [0, 6, 10, 23, 35, 38, 42], "parallel": [0, 8, 11, 18, 22, 24, 26, 32, 37, 38, 42, 45, 47, 48], "outer": [0, 41, 43], "loop": [0, 10, 11, 33, 38, 39, 40, 42, 48], "serial": [0, 24, 45], "expect": [0, 2, 3, 5, 6, 8, 9, 10, 16, 18, 19, 21, 29], "system": [0, 1, 9, 20, 23, 24, 26, 27, 30, 31, 32, 34, 35, 38, 40, 41, 44, 47], "observ": [0, 1, 9, 13, 27], "let": [0, 1, 7, 8, 10, 17, 27, 28, 29, 30, 31, 32, 33, 35, 37, 39, 40, 41, 42, 43, 44, 46], "improv": [0, 2, 5, 8, 16, 18, 21, 23, 24, 29, 30, 31, 33, 40, 42, 43, 48], "notic": [0, 1, 9, 12, 13, 15, 40], "travers": 0, "column": [0, 5, 16, 21, 27, 34, 39, 42, 43], "howev": [0, 2, 8, 18, 23, 24, 25, 27, 30, 31, 34, 35, 41, 42, 43, 45, 46, 47, 48], "default": [0, 13, 25, 32, 34, 37, 40, 42, 43], "storag": [0, 6, 36, 37, 42, 44], "order": [0, 1, 2, 5, 8, 11, 16, 18, 21, 23, 24, 30, 31, 32, 34, 35, 40, 41, 42, 43, 44, 46], "row": [0, 2, 6, 9, 11, 12, 15, 20, 34, 37, 39, 43, 44], "base": [0, 2, 8, 13, 18, 23, 24, 32, 34, 44, 45, 47], "henc": [0, 22, 27, 28, 29, 30, 31, 34, 41, 43, 44, 46, 47], "express": [0, 28, 29, 33], "jump": [0, 40], "memori": [0, 1, 5, 6, 12, 16, 21, 24, 25, 28, 29, 35, 37, 39, 43, 48], "unit": [0, 1, 8, 24, 27, 28, 35, 39, 47], "move": [0, 24, 35, 45, 47, 48], "again": [0, 6, 24, 27, 28, 29, 31, 40, 41, 45], "But": [0, 23, 24, 25, 27, 29, 30, 32, 35, 37, 41, 42, 43, 45, 46], "choos": [0, 5, 6, 16, 21, 28, 29, 30, 31, 37, 39, 41, 46], "store": [0, 7, 8, 12, 15, 17, 18, 19, 22, 27, 29, 34, 36, 37, 42, 44, 48], "major": [0, 24, 34, 42], "chang": [0, 1, 5, 11, 21, 24, 40, 41, 43], "arrai": [0, 4, 5, 8, 9, 10, 11, 16, 21, 22, 25, 32, 33, 35, 37, 38, 39, 40, 42, 44], "asfortranarrai": [0, 5, 21], "still": [0, 6, 24, 25, 27, 30, 43, 45], "try": [0, 1, 2, 3, 10, 27, 30, 31, 34, 37, 43], "frequent": [0, 25, 28, 29, 39, 42, 43, 45], "techniqu": [0, 23, 27, 30, 35, 45, 47], "block": [0, 1, 7, 11, 17, 22, 24, 32, 35, 39, 43], "inner": [0, 27, 29, 48], "instead": [0, 10, 19, 24, 27, 29, 30, 31, 35, 42, 43], "updat": [0, 28, 31, 43, 47], "element": [0, 2, 22, 27, 31, 33, 34, 35, 36, 37, 38, 40, 42, 43, 44, 45], "ell": 0, "submatrix": 0, "becom": [0, 1, 10, 11, 23, 25, 28, 30, 31, 34, 40, 42, 43, 44, 46, 47], "itself": [0, 19, 25, 31, 38], "two": [0, 1, 2, 5, 7, 8, 9, 10, 11, 16, 17, 18, 19, 21, 22, 24, 31, 32, 34, 37, 38, 39, 40, 41, 42, 43, 45, 46, 47], "submatric": 0, "iter": [0, 1, 6, 8, 13, 14, 18, 23, 24, 26, 35, 37, 43, 48], "forward": [0, 23, 24, 43], "term": [0, 22, 24, 29, 42, 47], "scheme": [0, 1, 3, 6, 41], "For": [0, 1, 2, 4, 6, 7, 8, 9, 11, 12, 15, 17, 18, 19, 20, 22, 23, 24, 28, 29, 30, 31, 32, 34, 35, 37, 40, 41, 42, 43, 45, 46, 47], "innermost": 0, "standard": [0, 2, 11, 22, 23, 25, 27, 32, 34, 35, 38, 41, 44, 45, 47], "tripl": [0, 48], "investig": [0, 1, 12, 31], "how": [0, 1, 2, 3, 6, 7, 8, 9, 10, 12, 13, 17, 18, 19, 20, 22, 25, 27, 29, 31, 34, 35, 37, 42, 43, 44, 46], "depend": [0, 5, 6, 21, 24, 27, 31, 39, 43, 45], "paramet": [0, 1, 13, 24, 28, 31, 34, 35, 37, 39], "compar": [0, 1, 5, 6, 9, 10, 13, 16, 21, 22, 24, 33, 34, 42], "previou": [0, 5, 9, 13, 16, 21, 27, 28, 29, 41], "simplic": [0, 27, 31, 41, 46], "mai": [0, 1, 2, 5, 6, 7, 8, 9, 10, 11, 14, 16, 18, 21, 22, 24, 25, 43], "ar": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24, 25, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "deal": [0, 24, 35, 36, 40, 44], "remaind": 0, "part": [0, 2, 9, 11, 24, 27, 29, 38, 41, 45, 48], "divis": [0, 48], "while": [0, 10, 22, 23, 24, 25, 35, 37, 38, 43, 47], "practic": [0, 9, 10, 11, 12, 13, 14, 15, 19, 23, 28, 29, 30, 31, 34, 41, 43, 46], "immedi": [0, 27, 31, 43], "clear": [0, 43], "here": [0, 7, 8, 9, 11, 12, 17, 24, 27, 29, 30, 31, 32, 34, 38, 39, 43, 45, 46, 47], "advantag": [0, 26, 29, 35, 41, 42], "There": [0, 9, 11, 19, 24, 25, 27, 30, 31, 32, 34, 35, 37, 38, 41, 42, 45], "go": [0, 1, 2, 5, 7, 8, 9, 11, 15, 16, 18, 21, 25, 27, 29, 30, 31, 32, 38, 40, 46, 47], "between": [0, 5, 9, 11, 12, 15, 16, 21, 23, 24, 27, 32, 34, 35, 40, 41, 43, 47], "write": [0, 2, 5, 6, 7, 8, 9, 11, 15, 16, 17, 18, 21, 22, 24, 29, 31, 35, 38, 39, 40], "actual": [0, 3, 6, 34, 35], "produc": [0, 10], "machin": [0, 2, 9, 25, 34, 35, 38, 39, 47, 48], "real": [0, 11, 13, 24, 27, 28, 29, 48], "written": [0, 6, 7, 9, 20, 27, 34, 35, 41, 45], "lower": [0, 13, 27, 28, 29, 30, 31, 39, 40, 43], "level": [0, 22, 23, 24, 25, 31, 34, 35, 38, 40, 45, 48], "languag": [0, 26, 35, 38, 40, 48], "optim": [0, 34, 35, 38, 40, 42, 45, 47, 48], "closer": [0, 41], "task": [0, 1, 2, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 21, 32, 35, 39], "experi": [0, 1, 8, 14, 18], "see": [0, 6, 10, 11, 19, 20, 22, 27, 28, 30, 31, 32, 35, 37, 40, 42, 43, 44, 46], "approach": [0, 23, 29, 30, 37, 40], "ha": [0, 2, 7, 8, 9, 22, 23, 24, 25, 27, 28, 29, 30, 31, 32, 34, 35, 37, 38, 40, 41, 42, 43, 44, 45, 47, 48], "within": [0, 22, 32, 35, 42], "wai": [0, 5, 6, 7, 8, 9, 16, 17, 18, 19, 21, 24, 27, 30, 32, 34, 35, 40, 43, 45], "simd": [0, 22, 24, 34], "demonstr": [0, 2, 3, 5, 16, 21, 22, 23, 34, 35, 36, 37, 42, 43, 44], "form": [1, 7, 9, 17, 22, 23, 27, 29, 30, 31, 32, 46], "delta": [1, 3, 8, 31, 41, 44, 46], "u": [1, 3, 6, 8, 15, 31, 41, 43, 44, 46], "x": [1, 2, 6, 8, 9, 11, 13, 14, 22, 24, 27, 28, 29, 30, 31, 32, 34, 37, 39, 40, 41, 42, 43, 44, 46, 47, 48], "y": [1, 2, 8, 9, 11, 22, 24, 27, 28, 30, 32, 34, 37, 39, 40, 42, 43, 44, 46, 48], "u_": [1, 6, 8, 20, 31, 41, 44, 46], "xx": [1, 46], "yy": 1, "omega": [1, 8, 31, 44], "subset": 1, "boundari": [1, 8, 20, 22, 44, 46], "condit": [1, 8, 23, 27, 28, 29, 30, 31, 35, 43, 44, 46], "g": [1, 8, 9, 11, 14, 24, 27, 28, 29, 30, 31, 39, 43], "gamma": [1, 31], "partial": [1, 8, 22, 23, 26, 41, 43, 44, 45, 46], "our": [1, 8, 19, 22, 23, 27, 28, 29, 30, 31, 34, 37, 38, 40, 41, 42, 45, 46, 47], "domain": [1, 8, 22, 41, 46], "squar": [1, 3, 8, 28, 34, 40, 43], "1": [1, 2, 3, 4, 10, 11, 13, 14, 15, 19, 20, 22, 24, 28, 29, 30, 31, 32, 33, 34, 37, 39, 40, 41, 42, 43, 44, 46, 47, 48], "numer": [1, 8, 19, 23, 25, 27, 30, 34, 38, 43, 47], "solv": [1, 2, 7, 9, 12, 13, 14, 17, 23, 27, 28, 30, 31, 34, 41, 45, 46], "defin": [1, 5, 7, 9, 14, 16, 17, 19, 21, 22, 27, 28, 30, 31, 32, 33, 34, 35, 37, 38, 42, 44, 46, 47, 48], "grid": [1, 11, 22, 24, 31, 32, 39, 46], "point": [1, 3, 4, 6, 8, 9, 11, 20, 22, 23, 27, 29, 30, 31, 34, 39, 40, 43, 44, 46], "x_i": [1, 6, 24, 29, 31, 39, 44], "ih": [1, 44], "y_j": [1, 39, 44], "jh": 1, "i": [1, 5, 6, 8, 9, 11, 13, 14, 16, 18, 20, 21, 22, 23, 24, 27, 29, 30, 31, 35, 40, 41, 42, 43, 44, 46, 48], "j": [1, 6, 8, 11, 18, 20, 22, 27, 29, 30, 31, 39, 42, 43, 44, 48], "h": [1, 6, 8, 20, 27, 29, 31, 41, 44], "approxim": [1, 6, 8, 23, 27, 28, 31, 35, 40, 43, 44, 45], "approx": [1, 6, 27, 28, 29, 30, 31, 41, 44, 47], "frac": [1, 6, 8, 20, 29, 30, 31, 39, 41, 43, 44, 46, 47], "4": [1, 5, 7, 9, 11, 17, 20, 21, 24, 27, 29, 31, 32, 33, 34, 39, 40, 41, 42, 43, 44, 46, 47, 48], "x_": [1, 8, 29, 31, 44], "y_": 1, "If": [1, 8, 9, 11, 14, 18, 19, 20, 22, 24, 27, 29, 34, 35, 37, 39, 40, 42, 43, 44, 46, 48], "neighbor": 1, "simpli": [1, 28, 46], "correspond": [1, 20, 31, 35, 42, 43, 46], "valu": [1, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 21, 23, 27, 28, 29, 30, 31, 34, 37, 40, 41, 44, 46], "data": [1, 6, 7, 8, 11, 12, 18, 22, 23, 24, 25, 29, 30, 32, 34, 35, 37, 38, 40, 43, 44, 45, 48], "sytem": 1, "equat": [1, 8, 23, 26, 27, 30, 31, 34, 37, 41, 45], "first": [1, 5, 6, 7, 8, 10, 11, 16, 19, 21, 22, 23, 24, 25, 27, 28, 29, 32, 34, 35, 37, 39, 41, 42, 43, 44, 47, 48], "creat": [1, 5, 6, 7, 9, 11, 12, 13, 14, 16, 17, 19, 21, 22, 29, 32, 34, 35, 40, 42, 48], "verifi": [1, 2], "refer": [1, 19, 29, 30, 34, 37, 42], "discretis": [1, 3, 23, 29, 37, 41, 42, 43, 44, 45], "take": [1, 2, 5, 6, 7, 8, 9, 10, 14, 15, 16, 17, 18, 20, 21, 22, 26, 27, 29, 31, 35, 41, 44, 45], "callabl": 1, "spars": [1, 8, 13, 14, 15, 18, 26, 27, 28, 29, 31, 38], "csr": [1, 2, 12, 17, 29, 30, 37, 43, 44, 45], "matrix": [1, 4, 6, 9, 10, 13, 14, 20, 22, 27, 28, 29, 30, 31, 34, 36, 38, 43, 44, 45], "right": [1, 6, 7, 8, 11, 17, 27, 28, 29, 30, 31, 37, 40, 41, 43, 44, 46, 47, 48], "hand": [1, 27, 37, 41, 43, 44, 46], "side": [1, 3, 8, 23, 27, 31, 37, 41, 43, 44, 46], "method": [1, 2, 6, 7, 8, 12, 17, 18, 19, 24, 30, 38, 40, 42, 44, 45], "manufactur": [1, 24], "exact": [1, 6, 8, 29, 31], "e": [1, 11, 24, 27, 29, 31, 39, 41, 43, 46, 47, 48], "5": [1, 3, 4, 5, 9, 11, 16, 18, 21, 24, 27, 28, 31, 32, 33, 34, 39, 40, 42, 43, 44, 46, 47, 48], "By": [1, 3, 5, 6, 8, 16, 18, 20, 21, 32, 40], "known": [1, 6, 11, 22, 24, 28, 30, 41, 43, 45, 46, 48], "grow": [1, 2, 23, 27, 37, 41], "scipi": [1, 2, 6, 7, 8, 12, 13, 14, 17, 18, 27, 28, 29, 30, 37, 41, 42, 43, 44, 45], "spsolv": [1, 6, 12, 44], "maximum": [1, 5, 16, 21], "rel": [1, 2, 22, 25, 27, 30, 34, 37, 43, 47], "increas": [1, 3, 27, 28, 40, 43], "e_": [1, 29, 31, 47], "should": [1, 2, 5, 6, 7, 8, 9, 11, 15, 16, 17, 18, 21, 23, 25, 27, 30, 32, 34, 40, 43, 46], "doubl": [1, 11, 19, 24, 27, 34, 35, 40, 47], "logarithm": [1, 29], "loglog": 1, "matplotlib": [1, 5, 6, 8, 10, 11, 13, 16, 21, 24, 27, 28, 29, 30, 31, 34, 37, 39, 43, 44, 46, 48], "As": [1, 5, 8, 16, 19, 21, 22, 28, 30, 31, 35, 37, 40, 41, 42], "conjectur": 1, "rate": [1, 29, 31], "converg": [1, 3, 8, 27, 28, 29, 30, 31, 37, 41, 43], "With": [1, 6, 24, 37, 44], "have": [1, 2, 4, 5, 7, 8, 9, 11, 12, 13, 17, 18, 21, 22, 23, 24, 25, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 39, 40, 41, 42, 43, 44, 45, 46, 48], "someth": [1, 5, 21, 34, 40], "On": [1, 2, 6, 31, 40, 43, 44, 47, 48], "idea": [1, 9, 23, 27, 29, 31, 35, 41, 42, 43, 45, 48], "rewrit": [1, 6, 20], "discret": [1, 2, 24, 30, 42, 43, 44], "left": [1, 6, 7, 8, 11, 17, 27, 28, 29, 30, 31, 40, 41, 43, 44, 46, 47], "2f": 1, "just": [1, 5, 10, 16, 21, 24, 27, 28, 29, 30, 31, 32, 34, 35, 36, 38, 42, 43, 44, 46, 47, 48], "averag": [1, 31], "neighbour": 1, "motiv": [1, 27, 29, 31, 42], "name": [1, 19, 27, 28, 29, 31, 37, 40, 42, 43, 46, 47], "other": [1, 2, 3, 5, 6, 7, 8, 17, 18, 19, 21, 23, 24, 27, 29, 30, 31, 32, 34, 35, 37, 39, 40, 43, 44, 45, 46, 47], "word": [1, 7, 17, 44], "plu": [1, 29], "contribut": [1, 24, 46], "numba": [1, 2, 6, 11, 22, 39, 42], "cuda": [1, 2, 6, 11, 25], "few": [1, 2, 10, 22, 23, 47], "hint": [1, 9], "when": [1, 5, 6, 7, 8, 9, 13, 16, 17, 18, 19, 21, 24, 25, 29, 35, 40, 48], "onli": [1, 4, 22, 24, 27, 28, 29, 31, 32, 34, 35, 37, 39, 40, 41, 42, 43, 45, 47, 48], "copi": [1, 5, 6, 7, 11, 12, 16, 21, 32, 35, 43], "host": [1, 32], "initi": [1, 27, 28, 46], "after": [1, 8, 18, 23, 27, 32, 35], "each": [1, 5, 6, 7, 8, 9, 11, 14, 15, 16, 17, 18, 19, 20, 21, 22, 24, 27, 28, 29, 30, 31, 33, 34, 35, 37, 38, 39, 40, 41, 42, 43, 44, 46, 48], "step": [1, 3, 23, 27, 28, 29, 30, 31, 37, 43], "global": [1, 22, 28, 30, 32, 35, 40, 48], "buffer": [1, 22, 24, 32, 34], "current": [1, 7, 11, 22, 23, 24, 28, 29, 32, 34, 35, 37, 43, 46, 47], "next": [1, 5, 11, 16, 21, 27, 34, 35, 46], "kernel": [1, 11, 24, 39, 48], "execut": [1, 2, 22, 24, 32, 33, 34, 38, 40, 48], "multipl": [1, 22, 23, 27, 31, 32, 34, 35, 40, 42, 48], "repeatedli": 1, "call": [1, 5, 6, 8, 9, 10, 16, 19, 21, 27, 29, 31, 32, 34, 35, 39, 40, 42, 43, 44, 45, 46, 48], "measur": [1, 6, 7, 8, 9, 10, 12, 15, 17, 18, 27, 30, 47, 48], "accur": 1, "decid": [1, 5, 16, 21, 35], "same": [1, 5, 6, 7, 9, 11, 16, 17, 21, 22, 27, 31, 35, 37, 40, 42, 43], "separ": [1, 9, 10, 28, 30, 35], "often": [1, 24, 27, 30, 37, 45, 46], "expens": [1, 25, 28, 34, 43], "select": [1, 5, 6, 7, 8, 9, 11, 16, 17, 18, 21, 32, 45], "doe": [1, 3, 7, 10, 13, 17, 19, 22, 24, 25, 28, 29, 31, 32, 35, 37, 42, 43, 45, 46, 47, 48], "access": [1, 5, 16, 21, 22, 34, 35, 37, 38], "read": [1, 19, 27, 34, 35], "preload": [1, 22], "local": [1, 22, 32, 34, 37, 39], "share": [1, 11, 22, 24, 35, 39, 48], "thread": [1, 11, 24, 32, 39, 42], "where": [1, 2, 6, 7, 8, 11, 15, 17, 18, 22, 24, 25, 27, 28, 29, 30, 31, 40, 41, 42, 43, 46, 48], "carefulli": [1, 6, 24], "describ": [1, 2, 6, 30, 41, 46], "explain": [1, 6, 8, 18], "scientif": [1, 25, 34, 38], "precis": [1, 2, 9, 11, 22, 24, 27, 30, 31, 34, 35, 40, 43, 47], "conclus": [1, 6], "design": [1, 9, 10, 11, 12, 13, 14, 15, 22, 23, 24, 25, 31, 32, 48], "interpret": [1, 10, 11, 35, 38], "signific": [1, 24, 30, 31], "compon": [1, 27, 29, 31, 34], "far": [2, 10, 34, 35, 43], "cpu": [2, 22, 24, 34, 35, 38, 40, 42, 47, 48], "some": [2, 6, 8, 9, 13, 23, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34, 35, 37, 38, 40, 41, 43, 45, 46, 47], "sever": [2, 24, 31, 32, 35, 42, 45], "disadvantag": [2, 24, 37], "vector": [2, 4, 6, 7, 9, 10, 13, 14, 16, 17, 20, 22, 23, 27, 28, 29, 31, 34, 35, 37, 40, 41, 43, 44, 45], "extens": [2, 12, 24, 25, 32], "avx": [2, 35, 40], "etc": [2, 11, 23, 25, 34, 43], "paper": [2, 9, 29, 30, 34], "vazquez": 2, "et": [2, 46], "al": 2, "difficulti": [2, 43], "ellpack": 2, "shortcom": 2, "more": [2, 5, 7, 9, 10, 12, 16, 17, 19, 20, 21, 22, 23, 24, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "devic": [2, 11, 24, 35, 47, 48], "summar": 2, "difficultu": 2, "veri": [2, 3, 5, 9, 16, 19, 21, 22, 23, 24, 25, 26, 28, 29, 30, 31, 32, 33, 34, 35, 38, 40, 41, 42, 43, 45, 46, 47, 48], "high": [2, 3, 8, 18, 24, 28, 31, 38], "mark": [2, 3, 5, 6, 7, 8, 16, 17, 18, 21], "look": [2, 5, 6, 7, 8, 10, 16, 17, 18, 20, 21, 31, 37, 40, 41, 42], "matvec": [2, 7, 10, 17, 43], "develop": [2, 22, 23, 24, 25, 27, 35, 38, 43, 45, 46, 47], "happen": [2, 9, 13, 19, 34, 46], "sinc": [2, 24, 28, 29, 35, 41, 47], "2": [2, 4, 9, 11, 14, 19, 20, 22, 24, 27, 29, 30, 31, 32, 33, 34, 37, 39, 40, 41, 42, 43, 44, 46, 47, 48], "routin": [2, 32, 34, 38, 42, 43, 48], "convert": [2, 7, 12, 15, 17], "new": [2, 11, 22, 23, 24, 25, 27, 29, 32, 35, 43, 46, 47], "class": [2, 7, 17, 27, 30, 40, 42, 43, 47, 48], "ellpackmatrix": 2, "deriv": [2, 6, 8, 29, 31, 46], "linearoper": [2, 7, 17, 43], "which": [2, 5, 6, 8, 10, 11, 12, 13, 16, 18, 21, 22, 23, 24, 27, 28, 29, 30, 31, 34, 35, 37, 38, 40, 41, 42, 43, 45, 46, 47, 48], "its": [2, 5, 6, 16, 21, 22, 23, 24, 25, 27, 30, 35, 45, 48], "constructor": 2, "follow": [2, 5, 6, 7, 9, 11, 13, 15, 16, 17, 20, 21, 22, 24, 26, 27, 28, 29, 30, 31, 32, 34, 35, 37, 38, 39, 40, 42, 43, 44, 45, 47, 48], "prototyp": [2, 25], "my_sparse_mat": 2, "csr_mat": [2, 12], "shall": 2, "either": [2, 11, 30, 34, 38, 40, 45], "prang": [2, 10, 11, 35, 39, 42, 48], "multithread": [2, 24, 33, 42], "overal": [2, 5, 6, 7, 8, 16, 17, 18, 21, 30, 39, 43, 44], "beyond": [2, 3, 9], "80": [2, 3], "would": [2, 3, 6, 8, 10, 11, 16, 18, 19, 25, 30, 35, 40, 44, 46, 47], "assum": [2, 27, 31, 34, 35, 39, 40, 43], "solut": [2, 3, 6, 7, 8, 12, 17, 18, 23, 24, 27, 28, 29, 30, 31, 37, 38, 41, 43, 44, 45, 46], "correct": [2, 3, 5, 6, 7, 9, 13, 16, 21, 28, 31, 42], "distanc": 2, "discretise_poisson": [2, 42, 43, 44], "lectur": [2, 8, 11, 12, 14, 15, 18, 20], "poisson": [2, 12, 31, 42, 43], "think": [2, 6, 7, 8, 9, 13, 17, 18, 43], "reason": [2, 5, 6, 16, 21, 27, 31, 34, 35, 38], "own": [2, 5, 7, 8, 9, 11, 16, 17, 18, 21, 22, 23, 24, 35, 45], "implemement": 2, "faster": [2, 5, 7, 9, 10, 11, 17, 21, 32, 33, 39, 40], "2d": 2, "per": [2, 22, 24, 32, 33, 39, 40, 42, 47], "situat": [2, 40, 42], "addit": [2, 8, 18, 24, 29, 33, 34, 35, 38, 40, 46], "complexitii": 2, "usual": [2, 30, 35, 41, 43, 47], "final": [2, 5, 16, 21, 26, 29, 37, 47], "shop": 2, "market": 2, "find": [2, 8, 9, 12, 14, 19, 28, 29, 30, 31, 35, 40, 43], "better": [2, 9, 22, 24, 30, 41, 43], "off": [2, 8, 9, 24, 27, 29, 40], "comparis": 2, "chosen": [2, 6, 8, 18, 31, 35, 42, 43, 46], "plate": 3, "t": [3, 5, 6, 7, 9, 11, 14, 16, 20, 21, 24, 27, 28, 29, 30, 31, 32, 39, 40, 41, 46, 48], "heat": 3, "temperatur": [3, 6], "evolv": [3, 24], "accord": 3, "u_t": [3, 46], "reach": [3, 30], "center": [3, 24, 28, 30, 41, 46], "finit": [3, 6, 12, 38, 44, 46], "differ": [3, 5, 6, 8, 9, 11, 12, 16, 18, 21, 22, 23, 24, 28, 29, 30, 31, 32, 34, 35, 36, 37, 40, 43, 44, 45, 46, 47], "explicit": [3, 31, 43], "implicit": 3, "number": [3, 4, 7, 8, 9, 11, 12, 13, 14, 17, 18, 19, 23, 24, 25, 26, 27, 28, 29, 30, 32, 34, 35, 37, 40, 41, 42, 43, 44, 45, 46, 48], "mani": [3, 8, 9, 12, 18, 23, 24, 25, 27, 29, 30, 31, 34, 35, 38, 39, 40, 41, 42, 43, 44, 45], "digit": [3, 27, 29, 47], "achiev": [3, 5, 8, 16, 18, 21, 23, 24, 29, 35, 43, 47], "12": [3, 4, 8, 27, 37, 40, 42, 44, 47, 48], "424011387033": 3, "gpu": [3, 22, 32, 35, 38, 45, 47, 48], "necessari": [3, 7, 17, 34, 36, 37, 43, 44], "snippet": [4, 5, 7, 9, 16, 17, 21], "give": [4, 5, 6, 8, 9, 10, 16, 19, 21, 23, 24, 27, 29, 31, 32, 36, 40, 41, 42, 44, 45, 46, 47], "mathrm": [4, 6, 7, 8, 11, 13, 14, 15, 17, 18, 20], "mathbf": [4, 6, 8, 9, 11, 13, 14, 18, 20], "3": [4, 5, 6, 9, 10, 19, 20, 21, 27, 30, 32, 34, 37, 39, 40, 42, 43, 44, 45, 46, 47, 48], "interior": [4, 8, 44], "length": [4, 27, 35], "four": [4, 8, 11, 34, 35], "like": [4, 7, 8, 9, 17, 19, 24, 28, 30, 31, 40], "begin": [4, 6, 7, 8, 9, 14, 15, 17, 18, 20, 28, 29, 30, 31, 34, 41], "cc": [4, 8], "nine": 4, "9": [4, 6, 8, 9, 11, 27, 29, 32, 34, 40, 42, 43, 47, 48], "ccc": 4, "6": [4, 11, 18, 27, 29, 32, 34, 40, 41, 42, 47, 48], "7": [4, 11, 27, 32, 33, 34, 39, 40, 42, 47, 48], "8": [4, 6, 8, 11, 13, 24, 27, 28, 29, 30, 31, 32, 34, 35, 37, 40, 42, 43, 44, 46, 47, 48], "cccc": 4, "13": [4, 27, 40], "14": [4, 27, 43, 48], "15": [4, 5, 8, 16, 21, 27, 29, 31, 46], "11": [4, 27, 29, 40, 42, 46], "a_2": [4, 15, 20], "11111111111111116": 4, "b_2": 4, "2699980311833446": 4, "a_3": [4, 15, 20], "4320987654320987": 4, "6419753086419753": 4, "4104938271604938": 4, "b_3": 4, "7251323007221917": 4, "15334285313223067": 4, "34843455260733003": 4, "0558651156722307": 4, "a_4": 4, "972222222222222": 4, "5069444444444444": 4, "3767361111111111": 4, "b_4": 4, "4904895819530766": 4, "055600747809247": 4, "07847904705126368": 4, "8311407883427149": 4, "8765020708205272": 4, "6433980946818605": 4, "7466392365712349": 4, "538021498324083": 4, "a_5": 4, "222222222222222": 4, "4444444444444444": 4, "3611111111111111": 4, "b_5": 4, "2673039440507343": 4, "9698054647507671": 4, "0133080988552785": 4, "07206335813040798": 4, "9472174493756345": 4, "9416429716282946": 4, "6400834406610956": 4, "7322882523543968": 4, "8159823324771336": 4, "9192523853093425": 4, "48342793699793585": 4, "19471066818706848": 4, "20": [5, 6, 9, 16, 21, 24, 27, 31, 39, 43, 47, 48], "cours": [5, 6, 7, 8, 9, 16, 17, 18, 21, 24, 43], "deadlin": [5, 6, 7, 8, 16, 17, 18, 21], "5pm": [5, 6, 7, 8, 16, 17, 18, 21], "thursdai": [5, 6, 7, 8, 21], "octob": [5, 21], "2022": [5, 6, 7, 8, 21], "coursework": [5, 6, 7, 8, 16, 17, 18, 21], "link": [5, 6, 7, 8, 15, 16, 17, 18, 21, 34], "moodl": [5, 6, 7, 8, 16, 17, 18, 21], "file": [5, 6, 7, 8, 16, 17, 18, 21, 38, 40], "contain": [5, 6, 7, 8, 16, 17, 18, 19, 21, 22, 24, 27, 29, 34, 35, 42, 47], "answer": [5, 6, 7, 8, 9, 16, 17, 18, 21], "ani": [5, 6, 7, 8, 10, 11, 15, 16, 17, 18, 21, 24, 34, 35, 38, 41, 43, 46, 47], "text": [5, 6, 7, 8, 14, 16, 17, 18, 20, 21, 27, 28, 30, 31, 41, 46], "question": [5, 6, 7, 8, 16, 17, 18, 21, 35], "assess": [5, 6, 7, 8, 16, 17, 18, 21], "easiest": [5, 6, 7, 8, 16, 17, 18, 21], "download": [5, 6, 7, 8, 16, 17, 18, 21], "via": [5, 6, 7, 8, 16, 17, 18, 21, 23, 30, 35, 43], "latex": [5, 6, 7, 8, 16, 17, 18, 21], "googl": [5, 6, 7, 8, 11, 16, 17, 18, 21, 35], "colab": [5, 6, 7, 8, 11, 16, 17, 18, 21], "requir": [5, 6, 7, 8, 16, 17, 18, 21, 23, 28, 29, 31, 32, 34, 35, 37, 39, 41, 42, 43, 44, 46], "carri": [5, 6, 7, 8, 9, 16, 17, 18, 21], "shown": [5, 6, 7, 8, 16, 17, 18, 21, 22, 23, 46, 47], "bold": [5, 6, 7, 8, 16, 17, 18, 21], "ab": [5, 16, 21, 27, 29, 39, 43], "line": [5, 7, 9, 10, 16, 19, 21, 24, 28, 30, 32, 37, 40, 44, 46, 47], "matrix1": [5, 16, 21], "matrix2": [5, 16, 21], "symbol": [5, 16, 21], "denot": [5, 16, 21, 30, 31, 37, 41, 47], "get": [5, 6, 10, 11, 13, 16, 19, 20, 21, 26, 27, 29, 32, 37, 42], "s": [5, 7, 8, 9, 10, 11, 15, 16, 17, 19, 21, 23, 24, 27, 29, 31, 33, 37, 39, 40, 42, 46], "slow_matrix_product": [5, 16, 21], "mat1": [5, 16, 21], "mat2": [5, 16, 21], "multipli": [5, 10, 16, 19, 20, 21, 22, 27, 29, 30, 31, 47], "append": [5, 9, 10, 16, 21, 27, 28, 29, 30, 31, 35, 43, 46], "transpos": [5, 16, 21, 31], "rand": [5, 9, 10, 14, 16, 21, 24, 27, 28, 29, 33, 34, 39, 42, 43], "isn": [5, 16, 21], "good": [5, 8, 9, 14, 16, 21, 24, 27, 29, 30, 35, 41, 43, 45, 47], "faster_matrix_product": [5, 16, 21], "than": [5, 6, 7, 9, 11, 13, 16, 17, 21, 24, 29, 30, 31, 32, 34, 39, 40, 42, 45], "eg": [5, 9, 11, 13, 16, 21], "complet": [5, 6, 16, 21, 24, 27, 29, 32, 35, 38, 43, 46], "calcul": [5, 11, 16, 21, 24, 38, 40, 44], "full": [5, 6, 16, 21, 26, 29, 32, 37], "befor": [5, 9, 10, 11, 16, 19, 21, 25, 36, 38, 41, 44], "script": [5, 16, 21], "statement": [5, 7, 9, 16, 21, 22, 32, 43], "box": [5, 16, 21, 39], "brief": [5, 16, 21, 24, 38], "sentenc": [5, 6, 7, 8, 16, 17, 18, 21], "why": [5, 6, 7, 8, 9, 16, 17, 18, 21, 31, 35], "least": [5, 8, 9, 16, 18, 21, 28, 34, 40, 46], "relat": [5, 13, 16, 21, 27, 28, 29], "taken": [5, 6, 7, 8, 9, 11, 16, 17, 18, 21, 30, 34, 43, 45], "both": [5, 6, 7, 9, 12, 16, 21, 23, 30, 31, 34, 38, 40, 42, 43, 47], "abl": [5, 10, 16, 21, 23, 24, 35, 43], "around": [5, 9, 16, 21, 24, 27, 29, 39, 41, 46, 47], "re": [5, 6, 8, 9, 11, 16, 21, 28, 31, 39, 43, 46], "older": [5, 16, 21], "slower": [5, 9, 16, 21, 28, 45], "decreas": [5, 8, 16, 21, 41], "slightli": [5, 16, 21, 24, 30, 43, 47], "minimum": [5, 16, 21, 30], "set": [5, 6, 8, 9, 16, 18, 19, 21, 24, 29, 30, 32, 34, 35, 37, 38, 40, 46, 47, 48], "inform": [5, 6, 9, 16, 19, 21, 23, 29, 31, 37, 40, 42, 46], "second": [5, 8, 16, 19, 21, 22, 23, 30, 46, 47], "acheiv": [5, 9, 16, 21], "similar": [5, 6, 11, 16, 21, 24, 35, 37, 48], "made": [5, 8, 16, 18, 21, 23], "fast": [5, 16, 21, 22, 24, 25, 27, 30, 33, 34, 35, 38, 40, 47, 48], "further": [5, 16, 21, 28], "adjust": [5, 13, 16, 21], "layout": [5, 16, 21, 32, 37], "asfortanarrai": [5, 21], "fortran": [5, 21, 23, 24, 34, 48], "style": [5, 9, 21, 34], "fortran_a": [5, 21], "input": [5, 6, 7, 8, 15, 18, 19, 21, 22, 40, 43], "combin": [5, 8, 21, 23, 27, 29, 30, 43, 48], "ie": [5, 6, 8, 18, 21], "focus": [5, 16, 21, 23], "fact": [5, 16, 21, 24, 27, 43], "close": [5, 10, 16, 21, 23, 24, 27, 30, 31, 34, 45, 48], "appear": [5, 10, 11, 21], "fastest": [5, 10, 16, 21, 47], "thing": [5, 8, 10, 18, 21, 23], "larg": [5, 6, 8, 9, 14, 18, 21, 23, 24, 26, 32, 33, 34, 35, 38, 42, 43, 45, 46, 47], "littl": [5, 21, 24, 27, 33, 47], "might": [5, 11, 21, 23, 31, 40], "conclud": [5, 21, 35], "advanc": [5, 21, 24, 34], "novemb": 6, "harmon": [6, 11], "align": [6, 8, 14, 18, 20, 28, 29, 30, 31, 34, 41, 48], "d": [6, 14, 29, 32, 40, 41], "2u": [6, 8, 41], "wavenumb": [6, 11, 39], "29": [6, 27], "pi": [6, 11, 31, 39, 41, 47], "evenli": 6, "space": [6, 12, 31, 35, 43, 46, 47], "x_0": [6, 8, 27, 28, 29], "x_1": [6, 8, 31], "x_2": [6, 31], "x_n": [6, 31], "u_i": [6, 41, 46], "2u_": [6, 46], "2u_i": [6, 41, 46], "bit": [6, 8, 27, 29, 30, 34, 35, 37, 40, 41, 44], "2k": [6, 8], "unknown": [6, 8, 30, 36, 43, 44, 45, 48], "entri": [6, 7, 8, 9, 17, 18, 20, 28, 29, 30, 31, 34, 36, 37, 42, 43, 44, 48], "_i": [6, 8], "case": [6, 7, 8, 17, 18, 27, 28, 30, 38, 40, 41, 42, 46], "otherwis": [6, 8, 18, 20, 43, 46], "_": [6, 8, 11, 14, 28, 29, 30, 42, 43], "appropri": [6, 7, 8, 11, 18], "format": [6, 7, 8, 15, 17, 18, 27, 29, 30, 36, 37, 43, 44, 45, 46, 48], "100": [6, 9, 27, 30, 33, 34, 39, 42, 43, 44], "three": [6, 8, 18, 22, 23, 24, 29, 34, 35, 37, 39, 41, 42, 43, 44, 45, 46, 48], "briefli": [6, 7, 8, 17, 18, 25, 29], "thei": [6, 7, 17, 19, 30, 31, 35, 36, 43, 44, 46, 47], "closest": 6, "wa": [6, 11, 23, 24, 29, 30, 31, 34, 35, 43, 47, 48], "sin": [6, 8, 31, 33, 41], "kx": 6, "differenti": [6, 8, 23, 26, 30, 41, 45, 46], "twice": [6, 40, 46], "substitut": [6, 43], "found": [6, 8, 15, 23, 32, 34, 40, 42], "max_i": [6, 24, 48], "choic": [6, 7, 8, 15, 17, 18, 24, 25, 31, 35, 41, 45], "wrote": [6, 10, 12, 15, 20], "ax": [6, 9, 15, 18, 27, 28, 29, 30, 31, 37, 39, 43, 44, 46], "log": [6, 9, 15, 29], "scale": [6, 9, 23, 25, 37, 38, 39, 47], "pick": [6, 8, 11, 13, 18, 29, 39], "less": [6, 12, 24], "predict": [6, 13, 23], "long": [6, 9, 14, 22, 34, 35, 43], "turn": [6, 8, 26, 27], "higher": [6, 11, 31, 34, 42, 47], "valid": [6, 8, 22], "section": [6, 8, 11, 12, 13, 14, 18, 25, 35], "could": [6, 7, 8, 11, 18, 19, 35, 40, 45], "my": [6, 9, 32, 39, 47], "feasibl": [6, 25], "amount": [6, 7, 12, 17, 24, 25, 32, 38], "rod": 6, "start": [6, 9, 10, 11, 19, 24, 25, 26, 27, 28, 29, 30, 31, 35, 37, 40, 41, 42, 45, 46, 48], "addition": [6, 8, 18, 30], "t_0": 6, "t_1": 6, "t_2": 6, "2h": [6, 8, 31], "t_3": 6, "3h": 6, "t_j": 6, "did": [6, 9, 20, 23, 30, 43], "1000h": 6, "lead": [6, 8, 16, 25, 31, 34, 38, 43], "later": [6, 8, 22, 27, 30, 31, 34, 35, 40, 44, 47], "displaystyl": [6, 8], "sensibl": [6, 12], "care": [6, 7, 17, 27, 28, 40, 41], "estim": [6, 8, 18], "midpoint": [6, 8], "exce": 6, "diagram": [6, 8], "aid": 6, "descript": [6, 38], "essenti": [6, 24, 30, 32, 45, 48], "30": [7, 8, 9, 17, 18, 27], "decemb": [7, 8], "allow": [7, 8, 10, 11, 18, 22, 23, 24, 25, 26, 27, 32, 34, 35, 37, 38, 40, 41, 45, 47], "object": [7, 9, 19, 24, 37], "solver": [7, 9, 17, 26, 31, 37, 38, 41], "subclass": 7, "csrmatrix": [7, 17], "__init__": [7, 17, 19, 40], "self": [7, 17, 19, 35, 48], "coo_matrix": [7, 12, 17, 42, 43, 44], "ll": [7, 9, 11, 17, 27], "put": [7, 9, 17, 28], "__add__": [7, 19], "add": [7, 9, 10, 12, 19, 22, 31, 48], "pass": [7, 8, 13, 17, 30], "_matvec": [7, 17], "coo": [7, 12], "initialis": [7, 35, 37], "extract": 7, "variou": [7, 8, 18, 23, 35, 38], "overload": 7, "togeth": [7, 22, 23, 24, 28, 30, 34, 38], "avoid": [7, 9, 34, 43], "dens": [7, 15, 23, 44, 45], "sum": [7, 8, 11, 24, 30, 35, 42], "indic": [7, 12, 34, 35, 37, 40, 42, 44], "indptr": [7, 12, 42, 44], "tell": [7, 10, 35, 37, 40, 42, 48], "test": [7, 27, 28, 29, 33, 39, 42], "These": [7, 8, 9, 10, 11, 12, 13, 14, 15, 18, 29, 30, 31, 35, 40, 43, 45, 47], "collect": [7, 17, 27, 34, 38, 44, 45], "gmre": [7, 17, 27, 30, 37, 43], "cg": [7, 17, 29, 30], "obtain": [7, 10, 17, 27, 29, 30, 31, 41, 43, 46, 47], "nearli": [7, 17], "exactli": [7, 17, 27, 29, 31], "2n": 7, "structur": [7, 15, 17, 24, 25, 27, 28, 29, 35, 36, 37, 38, 44, 45], "top": [7, 11, 17, 24, 38, 43], "diagon": [7, 8, 17, 27, 29, 30, 37, 43, 44], "bottom": [7, 17], "special": [7, 19, 27, 29, 35, 40], "non": [7, 10, 17, 23, 24, 30, 31], "pmatrix": [7, 8, 9, 15, 17, 18, 20], "cdot": [7, 9, 15, 17, 47], "hspace": [7, 8, 17], "3mm": [7, 8, 17, 18], "vdot": [7, 15, 17, 20, 31], "ddot": [7, 15, 17, 31], "tild": 7, "suppos": 7, "w": [7, 27, 29], "tall": 7, "ensur": [7, 11, 17, 29, 43], "complex": [7, 8, 13, 17, 18, 23, 24, 27, 32, 33, 34, 35, 38, 39, 43, 45, 48], "helmholtz": 8, "wave": 8, "4y": 8, "3x": [8, 9], "split": [8, 30, 35, 40], "mesh": [8, 45], "along": [8, 29, 39, 42, 46], "watch": 8, "degre": [8, 40, 43], "place": [8, 22, 29], "variabl": [8, 23, 27, 35], "xy": [8, 39], "insid": [8, 11, 19, 32, 35, 38, 40], "integr": [8, 23, 24], "detail": [8, 19, 22, 29, 30, 32, 35, 37, 40, 44, 45, 46, 47], "curiou": 8, "chapter": [8, 22, 35, 38], "clae": 8, "johnson": 8, "introduct": [8, 19, 22, 23, 26, 29, 34], "p": [8, 30, 43, 47], "_0": [8, 11], "_1": [8, 30], "a_": [8, 15, 18, 30, 31, 43], "24": [8, 27, 47, 48], "4h": [8, 31], "_j": [8, 11], "horizont": 8, "vertic": 8, "adjac": [8, 43], "36": [8, 48], "b_j": [8, 18], "b_": 8, "c_j": [8, 39], "5mm": 8, "11111111": 8, "definit": [8, 14, 25, 27, 30], "true": [8, 11, 24, 28, 30, 32, 35, 37, 39, 40, 41, 42, 43, 48], "onc": [8, 9, 31, 35, 43], "43209877": 8, "64197531": 8, "41049383": 8, "72513230": 8, "15334285": 8, "34843455": 8, "05586511": 8, "wish": [8, 11, 14, 35], "them": [8, 24, 28, 29, 31, 32, 35, 36, 38, 40, 42, 43, 44, 45], "u_h": 8, "coordin": [8, 24], "3d": [8, 44], "pde": [8, 23, 26, 30, 31, 37, 38, 41, 43, 45], "earlier": [8, 31], "small": [8, 9, 11, 14, 18, 23, 28, 29, 30, 32, 34, 37, 38, 39, 41, 42, 47], "medium": [8, 18, 37, 45], "factor": [8, 15, 18, 24, 27, 29, 30, 31, 33, 35, 39, 41, 43], "deem": [8, 18], "residu": [8, 13, 14, 18, 27, 28, 29, 30, 31, 34, 37, 43], "five": [8, 18], "direct": [8, 18, 24, 25, 26, 27, 35, 37, 38, 39, 46], "option": [8, 9, 12, 18, 28, 34], "petsc": [8, 18, 31, 38, 41, 45], "repeat": [8, 12, 18, 31, 42], "comparison": [8, 18, 30, 39], "larger": [8, 9, 18, 23, 29, 34, 35, 42], "been": [8, 9, 24, 25, 30, 40, 43, 47], "sum_": [8, 30, 31, 39], "th": [8, 27], "mean": [8, 9, 23, 24, 27, 29, 31, 33, 34, 36, 39, 40, 41, 42, 44], "corner": 8, "mathcal": [8, 18, 27, 31, 41, 43, 47], "Is": [8, 10, 18], "alpha": [8, 13, 29, 30, 41], "trivial": [8, 18, 45], "difficult": [8, 18, 24, 30], "speed": [8, 10, 18, 24, 35, 40, 46, 48], "studi": [8, 18], "discuss": [8, 18, 22, 23, 25, 27, 30, 34, 35, 44, 45, 46, 47], "work": [9, 10, 11, 12, 13, 14, 15, 20, 24, 29, 30, 31, 32, 34, 35, 39, 40, 42, 46], "tfrac32": 9, "aa": [9, 30], "perpendicular": 9, "cross": [9, 38], "inv": 9, "last": [9, 10, 23, 27, 34, 37, 42, 43], "behav": [9, 27, 30, 31, 40], "slow_matvec": [9, 10], "templat": [9, 14, 15, 45], "faster_matvec": [9, 10], "hpc": [9, 23, 24, 25, 35], "manual": [9, 32], "probabl": [9, 23], "integ": [9, 10, 22, 34, 35, 40, 43], "float": [9, 11, 22, 24, 32, 34, 40], "fail": 9, "even": [9, 22, 24, 25, 29, 30, 31, 34, 35, 42, 43], "due": [9, 11, 15, 23, 24, 28, 32, 40, 46], "issu": [9, 27, 28, 40, 45], "isclos": 9, "allclos": [9, 24], "commonli": 9, "pytest": 9, "autom": 9, "fast_matvec": 9, "total": [9, 32, 42, 43, 44], "content": 9, "draw": [9, 46], "freedom": [9, 31], "whatev": 9, "bear": 9, "mind": 9, "peopl": [9, 23], "colourblind": 9, "It": [9, 20, 22, 23, 24, 25, 27, 29, 30, 31, 35, 37, 38, 40, 42, 43, 44, 45, 47, 48], "help": [9, 24, 35, 37], "marker": 9, "well": [9, 13, 24, 27, 28, 30, 32, 37, 43, 45, 46, 47], "colour": [9, 11], "curv": [9, 28, 37], "pylab": [9, 11, 48], "plt": [9, 11, 13, 27, 28, 29, 30, 31, 34, 37, 39, 43, 44, 46], "linspac": [9, 27, 31, 34, 40, 44, 46], "y0": 9, "y1": 9, "y2": 9, "ro": 9, "xlabel": [9, 34], "ylabel": [9, 34], "legend": [9, 28, 30], "inlin": [9, 24, 27, 28, 29, 30, 31, 34, 37, 39, 43, 44, 46, 48], "cell": 9, "displai": [9, 24, 47], "xscale": 9, "person": [9, 47], "clearer": 9, "equal": [9, 10, 31, 47], "tick": [9, 44], "axi": [9, 39], "pen": 9, "ve": [9, 10, 19], "don": [9, 11, 29, 40], "resolv": 9, "tweak": [9, 40], "pretti": [9, 27, 40], "interfac": [9, 23, 24, 25, 34, 37, 38, 45], "load": [9, 23, 24, 39, 48], "npy": 9, "my_result": 9, "week": [10, 11], "incorrect": 10, "becaus": [10, 40], "fix": [10, 24, 29, 31, 40, 48], "replac": [10, 27, 41, 43], "acceler": [10, 22, 24, 34, 38, 40, 45, 48], "anoth": [10, 19, 23, 29, 32, 35, 43], "shw": 10, "mode": [10, 31], "compat": [10, 24, 37], "wider": 10, "parellel": 10, "big": [10, 29, 47], "worth": 10, "anyth": [10, 27, 32], "els": [10, 27, 37, 40, 42, 43, 44, 46], "avail": [11, 22, 25, 29, 30, 31, 34, 37, 38, 43, 45, 47], "detect": [11, 25, 32], "suitabl": [11, 17, 23, 25, 29, 30, 42, 43, 45], "runtim": [11, 25], "type": [11, 22, 23, 24, 25, 29, 30, 31, 32, 34, 35, 36, 38, 40, 44, 47, 48], "dure": [11, 15, 20, 24, 43, 48], "particl": 11, "radial": [11, 39], "applic": [11, 23, 24, 25, 28, 29, 30, 31, 35, 36, 38, 44, 45, 46, 47], "sum_jc_jg": 11, "c_0": 11, "c_": 11, "acoust": [11, 39], "green": [11, 23, 39], "magnitud": [11, 27, 29, 41, 43], "slice": 11, "plane": [11, 24, 39], "z": [11, 24, 32, 39, 48], "leqslant": [11, 18], "frac32": 11, "math": [11, 19, 39, 40], "co": [11, 31], "img_siz": 11, "250": 11, "empti": [11, 27, 29, 32, 34, 35, 42, 43, 44], "complex128": [11, 34], "xmin": 11, "xmax": 11, "ymin": 11, "ymax": 11, "imshow": [11, 24, 27, 29, 39, 46, 48], "pixel": 11, "imag": [11, 13, 23, 24, 28, 43, 48], "glanc": 11, "backward": [11, 43], "v": [11, 27, 29, 31, 48], "zip": [11, 42], "extent": [11, 39, 46, 48], "adapt": [11, 23, 24, 40], "area": [11, 23, 30, 38, 40], "leqslant1": 11, "rbf_evaluation_cuda": [11, 39], "inspir": 11, "featur": [11, 24, 25, 28, 35, 36, 37, 38, 44, 48], "didn": [11, 20], "absolut": 11, "posit": [11, 12, 14, 30, 32, 42], "entir": [11, 40], "dimension": [11, 22, 23, 24, 27, 28, 32, 38, 42, 43, 45], "syncthread": [11, 22, 39], "synchronis": [11, 22], "readi": 11, "threadidx": [11, 22, 24, 32, 39], "rather": 11, "mgrid": [11, 39], "ravel": [11, 39], "done": [11, 22, 24, 35, 48], "visual": [11, 38, 39, 43, 44], "njit": [11, 35, 39, 48], "rbf_evalu": [11, 39], "to_devic": [11, 24, 32], "low": [11, 23, 24, 28, 31, 35, 38, 45, 47, 48], "vari": [11, 23], "heavi": 12, "nbyte": [12, 24, 44], "col": [12, 42], "tak": 12, "tocsr": [12, 42, 43, 44], "csc": [12, 42], "vs": [12, 13, 14], "support": [12, 24, 25, 32, 35, 40, 41, 42, 48], "lil": 12, "dia": 12, "dok": 12, "bsr": 12, "except": [12, 32, 40], "proport": 12, "40": [12, 30, 31, 33, 39, 48], "ident": [13, 30, 42, 43], "identity_matrix": 13, "ey": [13, 28, 43], "a_matrix": 13, "sqrt": [13, 28, 29, 35], "control": [13, 22, 35, 38], "stop": 13, "criteria": 13, "tol": [13, 29], "atol": 13, "1e": [13, 27, 29], "lowest": [13, 22], "constant": [13, 29, 40, 43, 48], "chose": 13, "neg": [13, 29, 41], "Or": 13, "pyplot": [13, 24, 27, 28, 29, 30, 31, 34, 37, 39, 43, 44, 46], "eigval": [13, 28], "rx": [13, 28], "markers": [13, 28, 30, 44], "badli": [13, 30], "were": [13, 24, 31, 41, 48], "500": [14, 28, 29, 31, 46], "symmetr": [14, 30, 37], "randomst": [14, 27, 28, 29, 39, 42, 43], "q": [14, 29], "qr": [14, 29], "diag": [14, 27, 29, 30, 31, 43], "precondition": [14, 30, 31, 37, 43, 45], "_k": 14, "alpha_k": [14, 29, 30], "operatornam": 14, "tr": [14, 30], "_m": [14, 27], "fridai": [15, 16, 17, 18], "factoris": [15, 43], "todai": [15, 24, 25], "decomposit": [15, 30, 45], "a_0": [15, 20], "a_1": [15, 20], "decim": 15, "l": [15, 40, 43], "permut": [15, 43], "septemb": [16, 17, 18], "2023": [16, 17, 18], "pair": [16, 20, 40], "ij": [18, 30, 43], "33333333": 18, "25": [18, 31, 48], "guid": [19, 28, 32, 34], "instruct": [19, 35, 40], "instanc": 19, "fraction": [19, 44], "gcd": 19, "denomin": 19, "print_numer": 19, "__str__": 19, "string": [19, 48], "represent": [19, 31, 34, 43], "str": [19, 28], "over": [19, 25, 27, 34, 35, 41, 43, 46, 47], "isinst": 19, "new_numer": 19, "new_denomin": 19, "common_factor": 19, "__mul__": 19, "common": [19, 20, 24, 38, 47, 48], "underscor": 19, "__": 19, "half": [19, 41], "ad": [19, 29, 48], "third": [19, 34, 42], "those": [19, 23, 31, 37, 42, 44], "behaviour": [19, 31], "document": [19, 22, 24, 37, 38], "http": [19, 22, 23, 24, 32, 34, 42], "doc": [19, 32, 42], "org": [19, 32, 34, 42], "datamodel": 19, "html": [19, 32, 42], "specialnam": 19, "sumultan": 20, "4a_0": 20, "3a_1": 20, "2a_0": 20, "complic": [20, 31], "4u_": [20, 44], "got": 20, "19": [21, 27], "dive": [22, 23, 24, 25, 26, 35, 36, 38, 44], "nvidia": [22, 45, 47], "understand": [22, 24, 31, 35, 41, 46, 47], "basic": [22, 26, 27, 34, 37, 43], "organis": 22, "massiv": [22, 23, 27, 45], "huge": [22, 23, 24, 25, 29, 32, 37, 38, 41], "concurr": [22, 32], "moreov": [22, 27, 35, 38, 44, 45, 48], "group": [22, 24], "sm": 22, "respons": 22, "schedul": [22, 32, 35], "a100": [22, 24], "architectur": [22, 23, 24, 26, 34, 47, 48], "com": [22, 23, 24], "blog": [22, 24], "amper": 22, "depth": 22, "consist": [22, 24, 29, 35, 36, 42, 43, 44, 45], "tensor": [22, 34], "mix": [22, 23, 38], "deep": [22, 25], "onto": [22, 24, 31, 32, 34, 35, 41], "logic": 22, "entiti": 22, "divid": 22, "32": [22, 24, 32, 40, 48], "path": 22, "implic": 22, "branch": [22, 24, 40], "roughli": [22, 28, 47], "regist": [22, 35, 40], "ideal": [22, 25, 27, 37], "underutilis": 22, "48": [22, 48], "fulli": [22, 46], "utilis": 22, "easier": [22, 30], "mention": [22, 38], "conveni": [22, 37, 48], "know": [22, 24, 26, 29, 34, 37, 48], "largest": [22, 27, 29, 41, 43], "chunk": [22, 34, 35, 39], "typic": [22, 24, 26, 35, 36, 37, 40, 41, 42, 44, 47], "gigabyt": 22, "slow": [22, 24, 29, 31, 34, 38, 48], "individu": [22, 23, 24, 35], "lifetim": 22, "process": [22, 24, 34, 37, 38, 40, 41, 47], "workload": 22, "back": [22, 24, 31, 32], "forth": 22, "privat": 22, "intermedi": 22, "offici": [22, 24], "principl": [22, 43, 47], "wise": 22, "float32": [22, 24, 32, 34, 39, 40], "usag": [22, 24], "tpbxtpb": 22, "tpb": 22, "fast_matmul": 22, "sa": 22, "sb": 22, "tx": [22, 29, 32, 39], "ty": [22, 32, 39], "bpg": 22, "griddim": [22, 24], "quit": [22, 23, 25, 29, 30, 43], "outsid": 22, "tmp": [22, 27, 29], "until": [22, 27, 31, 35], "list": [23, 32, 34, 35, 42, 47], "interest": [23, 40, 47], "cover": [23, 38], "modul": [23, 24, 25, 26, 35, 38, 40, 47], "exhaust": [23, 29], "am": [23, 30], "leav": [23, 29], "pointer": [23, 42], "who": 23, "research": [23, 30, 45], "decompos": 23, "subproblem": 23, "independ": [23, 24, 27, 35, 41], "exchang": 23, "weak": 23, "coupl": [23, 47], "precondit": [23, 31, 37, 43, 45], "balanc": [23, 35], "network": [23, 35], "commun": [23, 24, 25, 38], "seem": [23, 37, 40], "counterintuit": 23, "inde": [23, 27, 34, 35, 38, 42, 47], "singular": [23, 27, 34, 43], "eigenvalu": [23, 27, 28, 29, 30, 34, 41], "probabilist": 23, "sens": [23, 24, 30, 35, 47], "worri": 23, "extrem": [23, 24, 25, 29, 34], "promin": 23, "recent": [23, 24, 25, 34, 35, 38, 47], "overview": [23, 24, 26, 32, 38, 42, 45], "articl": 23, "foundat": 23, "martinsson": 23, "tropp": 23, "technolog": 23, "invers": 23, "relev": [23, 24, 37], "lnear": 23, "compress": 23, "interact": [23, 39], "tremend": [23, 31], "success": [23, 30, 31, 34, 38], "certain": [23, 24, 28, 30, 35, 43, 45, 47], "aris": [23, 29, 30, 31, 37, 45], "oscillatori": [23, 31], "stationari": [23, 29, 41], "beauti": [23, 28, 29, 35, 46], "greengard": 23, "gueyffier": 23, "rohklin": 23, "seen": [23, 27, 30, 45], "easili": [23, 24, 28, 30, 31, 34, 35, 42, 43, 48], "timestep": 23, "limit": [23, 35, 40, 46], "opportun": 23, "50": [23, 28, 29, 30, 31, 34, 39, 43], "martin": 23, "gander": 23, "easi": [23, 24, 32, 33, 34, 35, 38], "public": [23, 24], "sound": [23, 32], "straight": [23, 24, 43], "fiendishli": 23, "configur": 23, "tool": [23, 24, 31, 35, 43, 47], "docker": 23, "establish": [23, 27, 31, 34], "pack": [23, 40], "softwar": [23, 24, 34, 37, 42, 47], "great": [23, 24, 37, 47], "reproducibl": 23, "lorenabarba": 23, "tag": 23, "undisput": 23, "ucl": 23, "teach": 23, "aspect": 23, "tradit": 23, "merg": 23, "model": [23, 25, 32, 40, 41, 45, 47], "statist": 23, "significantli": [23, 35, 42, 43, 44, 45], "power": [23, 24, 27, 37, 38, 43, 47], "simul": [23, 25, 31, 41, 47], "emerg": 23, "mathemat": [23, 24, 27, 40], "natur": [23, 30, 34, 40, 42, 46], "project": [23, 24, 25, 27, 44, 48], "novel": 23, "concept": 23, "ago": [23, 47], "pure": [23, 40], "petascal": [23, 37], "breakthrough": 23, "strongli": [23, 24, 43], "environ": [23, 25, 38, 47, 48], "graphic": [24, 47], "come": [24, 25, 46], "scene": 24, "construct": [24, 30, 35, 48], "flat": 24, "surfac": 24, "triangl": 24, "y_i": 24, "z_i": 24, "imagin": [24, 34], "camera": 24, "room": 24, "view": [24, 35, 38], "textur": [24, 32], "light": 24, "visibl": [24, 29], "aren": 24, "secret": 24, "ineffici": [24, 34, 35, 37, 38], "earli": 24, "thousand": [24, 36, 38, 44], "recogn": [24, 34], "particular": [24, 25, 26, 35, 38, 40, 43], "benefit": [24, 35, 40], "inher": 24, "won": [24, 27], "bound": [24, 29, 31, 35, 46], "suit": [24, 37], "transfer": [24, 31, 32, 34, 37], "bu": [24, 32], "interleav": 24, "suffici": [24, 30, 37, 41, 42, 45], "ram": [24, 44], "consider": [24, 38, 46], "unstructur": [24, 31], "involv": [24, 29], "challeng": 24, "flow": 24, "harder": 24, "match": 24, "prefer": [24, 25, 38], "took": [24, 30], "promis": 24, "incred": [24, 38], "fifti": 24, "hundr": [24, 35, 36, 44], "cheap": [24, 47], "game": 24, "realiti": [24, 35], "Such": 24, "specif": [24, 38, 45], "mont": 24, "carlo": 24, "synthet": 24, "author": 24, "tune": [24, 45], "field": 24, "matur": [24, 25], "trend": [24, 47], "toward": 24, "heterogen": [24, 25], "ones": [24, 27, 29, 30, 31, 34, 37, 43, 44], "arguabl": 24, "platform": [24, 40], "matlab": [24, 45], "julia": 24, "exist": [24, 25, 32, 34, 35, 38, 43, 46], "ecosystem": [24, 25, 38], "built": [24, 25, 28, 41, 45], "proprietari": 24, "driver": 24, "restrict": [24, 31, 32], "thu": 24, "portabl": 24, "opencl": 24, "khrono": 24, "fpga": 24, "sycl": [24, 25], "grew": 24, "quickli": [24, 25, 40, 43], "gain": 24, "world": [24, 47], "signfic": 24, "invest": 24, "codeplai": 24, "target": [24, 35, 39, 48], "oneapi": 24, "xe": 24, "industri": 24, "mostli": [24, 36, 37, 44], "build": [24, 25, 27, 37, 38, 45], "llvm": [24, 40, 48], "openacc": 24, "pragma": 24, "offload": [24, 48], "mainli": [24, 25, 42], "push": 24, "crai": 24, "partnership": 24, "gcc": 24, "openmp": [24, 48], "origin": [24, 25, 27, 28, 30, 31, 39, 40, 43, 45, 46], "enabl": [24, 32, 40, 48], "segment": 24, "scenario": 24, "landscap": [24, 47], "alwai": [24, 28, 30, 31, 35, 40, 41, 42, 43, 46, 47], "desir": 24, "futur": [24, 40, 47], "proof": 24, "variant": [24, 30, 34, 42, 45], "roll": 24, "anywai": 24, "viabl": 24, "especi": [24, 25, 31, 38], "user": [24, 34], "directli": [24, 31, 35, 37, 38, 45, 46], "card": 24, "peak": [24, 47], "tflop": [24, 47], "rocm": 24, "rocmdoc": 24, "en": 24, "latest": [24, 32], "vega": 24, "rdna": [24, 47], "announc": 24, "cdna": 24, "purpos": [24, 30, 35, 42, 43], "tradition": 24, "competit": 24, "though": [24, 25, 27, 42], "bad": [24, 27, 29], "2020": 24, "plan": 24, "exascal": 24, "strong": 24, "pycuda": 24, "pyopencl": 24, "packag": [24, 38, 40], "launch": 24, "nativ": [24, 25, 34], "respect": [24, 29, 30, 31], "framework": 24, "autoinit": 24, "drv": 24, "sourcemodul": 24, "mod": 24, "__global__": 24, "void": [24, 48], "multiply_them": 24, "dest": 24, "const": [24, 40, 46, 48], "int": [24, 32, 40], "get_funct": 24, "400": [24, 39], "astyp": [24, 39, 40], "zeros_lik": 24, "usr": 24, "bin": 24, "env": [24, 40], "cl": 24, "a_np": 24, "50000": 24, "b_np": 24, "ctx": 24, "create_some_context": 24, "queue": [24, 32], "commandqueu": 24, "mf": 24, "mem_flag": 24, "a_g": 24, "read_onli": 24, "copy_host_ptr": 24, "hostbuf": 24, "b_g": 24, "prg": 24, "__kernel": 24, "__global": 24, "res_g": 24, "gid": 24, "get_global_id": 24, "write_onli": 24, "none": [24, 32, 37, 40, 46], "res_np": 24, "empty_lik": [24, 40], "enqueue_copi": 24, "almost": [24, 31, 32, 35, 45, 46], "taster": 24, "mandelbrot": [24, 48], "mandel": [24, 48], "max_it": [24, 48], "imaginari": [24, 48], "determin": [24, 40, 47, 48], "candid": [24, 48], "membership": [24, 48], "0j": [24, 34, 48], "mandel_kernel": 24, "min_x": [24, 48], "max_x": [24, 48], "min_i": [24, 48], "height": [24, 48], "width": [24, 48], "pixel_size_x": [24, 48], "pixel_size_i": [24, 48], "startx": 24, "blockdim": [24, 32], "blockidx": [24, 32], "starti": 24, "gridx": 24, "gridi": 24, "gimag": 24, "1024": [24, 32, 44], "1536": 24, "uint8": [24, 48], "d_imag": 24, "to_host": 24, "zoo": 25, "short": [25, 29, 37], "dinosaur": 25, "1950": 25, "incarn": 25, "2018": 25, "activ": [25, 30], "legaci": 25, "sizeabl": 25, "successfulli": [25, 40], "petaflop": 25, "Its": [25, 41], "syntax": [25, 34, 40], "broad": 25, "scalabl": [25, 38, 47], "capabl": [25, 32], "unlik": 25, "demand": 25, "oldest": 25, "defacto": 25, "heavili": 25, "excel": [25, 41, 43], "toolbox": 25, "favour": [25, 29], "licens": [25, 40, 45], "commerci": 25, "newcom": 25, "stabl": [25, 27, 41, 46], "releas": 25, "occur": 25, "2015": 25, "popular": 25, "competitor": 25, "main": [25, 26, 29, 35, 37, 38], "ownership": 25, "safeti": 25, "crash": 25, "yet": [25, 42, 46], "infanc": 25, "nevertheless": [25, 35], "seriou": 25, "java": 25, "busi": 25, "focu": 25, "li": 25, "deeper": [26, 35, 38], "session": [27, 35, 45], "introduc": [27, 31, 43], "extend": 27, "assumpt": 27, "noth": [27, 34], "remark": 27, "enough": [27, 35], "being": [27, 40, 44, 45, 46], "span": 27, "2b": 27, "compute_krylov_basi": 27, "index": [27, 28, 29, 30, 31, 32, 35, 37, 39, 40, 42, 46], "linearli": [27, 41], "accuraci": [27, 29, 34, 41, 47], "m_max": 27, "krylov_basi": 27, "cond_numb": 27, "cond": [27, 30], "0e": 27, "00": [27, 48], "01": 27, "03": 27, "05": 27, "07": 27, "08": 27, "5e": 27, "2e": 27, "17": [27, 40, 48], "6e": 27, "21": [27, 43, 48], "7e": 27, "23": 27, "9e": 27, "27": [27, 40, 48], "4e": 27, "18": [27, 34, 40], "31": [27, 42], "catastroph": 27, "alreadi": [27, 29, 35, 41], "eigenvector": 27, "associ": [27, 29, 31, 34, 35, 37, 42, 43, 44], "realli": [27, 29, 35, 47], "orthogon": [27, 28, 29], "pairwis": 27, "accomplish": [27, 45], "modifi": [27, 30, 43], "compute_krylov_basis_orthogon": 27, "subtract": [27, 29, 31, 40], "normalis": [27, 29], "htild": 27, "perfectli": [27, 46], "v_m": [27, 28, 29], "v_mh_m": [27, 28, 29], "h_": [27, 28, 29], "v_": [27, 28, 29, 31], "e_m": [27, 28, 29], "v_i": [27, 31], "tav_j": 27, "sequenc": [27, 30, 31, 43, 45], "av_m": [27, 28, 29], "formula": [27, 38, 46], "h_m": [27, 28, 29], "upper": [27, 29, 31, 43], "triangular": [27, 29, 34, 43], "subdiagon": [27, 29], "nonzero": [27, 29, 36, 42, 43, 44], "av_j": 27, "tav_m": [27, 29], "r_0": [27, 28, 29], "ax_0": [27, 29], "x_m": [27, 29], "v_my_m": [27, 28, 29], "equival": [27, 29, 31, 41, 46], "av_my_m": [27, 28], "proper": [27, 28, 46], "r_m": [27, 28], "ax_m": 27, "tr_m": [27, 28], "tav_my_m": 27, "tr_0": 27, "_2e_1": [27, 28, 29], "e_1": [27, 28], "arriv": [27, 29], "h_my_m": 27, "had": [27, 44], "save": [27, 37], "explor": 27, "beta": 27, "rel_residu": [27, 43], "rnorm": 27, "semilog": [27, 28, 29, 30, 31, 37, 43], "tri": 27, "dim": [27, 34], "nice": [27, 28, 33, 34, 40], "exponenti": 27, "caveat": [27, 45], "cheat": 27, "theoret": 27, "explicitli": [27, 41, 42], "loss": 27, "lose": [27, 46], "tv": 27, "deterior": [27, 45], "everywher": 27, "log10": [27, 29], "vmin": 27, "vmax": 27, "colorbar": [27, 29, 46], "0x7f0f20c1cb80": 27, "classic": 27, "gram": 27, "schmidt": 27, "taught": 27, "book": [27, 29, 30, 31], "stabilis": 27, "introdu": 27, "reorthogonalis": 27, "im": [27, 29, 39], "rare": [28, 34, 43], "guarante": [28, 29, 34, 35, 37, 46], "reduc": [28, 29, 30, 31, 35, 43, 47], "rememb": [28, 29, 31, 41], "search": [28, 29, 40], "subspac": [28, 31, 37, 45], "minim": [28, 29, 30, 34, 37, 43], "oin": 28, "arnoldi": 28, "krylov": [28, 31, 37, 45], "v_my_i": 28, "impos": [28, 29, 44], "bot": 28, "y_m": 28, "minimis": [28, 29], "arg": [28, 35, 40, 48], "min": [28, 29, 47], "_2": [28, 29], "recurr": [28, 29, 37, 41, 46], "nonumb": [28, 29, 30, 31], "bmatrix": [28, 31, 34], "whole": [28, 34, 35, 36, 37, 44], "orthogonalis": [28, 29], "setup": 28, "normal": [28, 40, 46], "distribut": [28, 30, 37], "fig": [28, 29, 30, 31, 39, 43, 44, 46], "figsiz": [28, 29, 30, 31, 39, 43, 44, 46], "add_subplot": [28, 29, 30, 31, 39, 43, 44, 46], "111": [28, 29, 30, 31, 46], "line2d": [28, 30, 37, 44], "0x7f9952011a90": 28, "cluster": [28, 30, 35, 37, 38, 45], "lie": 28, "disk": 28, "ashift": 28, "shift": [28, 30], "color": [28, 39, 46, 48], "enumer": [28, 32], "callback": [28, 29, 30, 43], "lambda": [28, 29, 30, 41, 43], "callback_typ": [28, 43], "pr_norm": [28, 43], "len": [28, 29, 39], "loc": [28, 30], "fancybox": [28, 30], "shadow": [28, 30], "label": [28, 40, 48], "0x7f9951c0be50": 28, "theorem": 28, "heurist": [28, 31, 43], "One": [28, 29, 31, 32, 33, 43, 46, 47], "increasingli": 28, "keep": [28, 29], "strategi": [28, 30, 39, 43], "cost": 28, "tax": 29, "foral": 29, "neq": [29, 43, 47], "energi": 29, "kinet": 29, "nabla": 29, "hessenberg": 29, "v_k": [29, 31], "symmetri": 29, "nicer": 29, "satisfi": [29, 31, 41, 46], "tridiagon": [29, 37, 43], "super": 29, "r0": 29, "artifici": 29, "nois": 29, "colorscal": 29, "0x7ff7a79672b0": 29, "clearli": 29, "simplifi": [29, 38, 42, 47], "compos": 29, "matter": [29, 35, 42, 47], "remain": [29, 40, 43, 46], "contrast": [29, 37, 41], "nonsymmetr": [29, 37], "proce": [29, 43, 46], "t_my_m": 29, "t_m": 29, "yousef": [29, 31], "saad": [29, 30, 31], "onlin": 29, "free": 29, "intuit": 29, "reveal": 29, "summari": [29, 32], "agon": 29, "pain": [29, 35], "jonathan": 29, "shewchuk": 29, "min_": 29, "furthermor": [29, 44], "x_k": 29, "ax_k": 29, "r_k": 29, "r_": 29, "tr_k": 29, "tar_k": 29, "analysi": [29, 38, 41], "state": [29, 35], "kappa": 29, "lambda_": [29, 41], "max": [29, 41, 47], "smallest": [29, 31, 47], "e_k": 29, "ae_k": 29, "_a": 29, "leq": [29, 31, 43, 46, 47], "e_0": [29, 31], "tae_k": 29, "fairli": [29, 38, 40, 48], "Then": [29, 35, 37, 43], "81": [29, 40], "d_0": 29, "d_1": 29, "d_": 29, "mutual": 29, "d_i": 29, "td_j": 29, "enforc": 29, "td_k": 29, "d_k": 29, "plai": 29, "alpha_kd_k": 29, "te_k": 29, "unfortun": [29, 30], "tad_j": 29, "tad_k": 29, "moreoev": 29, "annihili": 29, "r_i": 29, "alpha_i": [29, 31], "tr_i": 29, "tad_i": 29, "alpha_id_i": 29, "ad_i": 29, "beta_": 29, "tr_": 29, "e_i": 29, "lessapprox": 29, "52": 29, "10000": [29, 34, 40], "offset": [29, 30, 43], "sol": [29, 44], "maxit": 29, "arang": [29, 31, 32, 34, 43, 46, 48], "set_titl": [29, 31, 43], "set_xlabel": [29, 31, 39, 43, 46], "set_ylabel": [29, 30, 31, 39, 43, 46], "crucial": 30, "ameni": 30, "million": [30, 32, 42], "billion": [30, 36, 44], "kind": [30, 32, 34, 40, 47], "ap": 30, "latter": 30, "px": [30, 39], "spai": 30, "incomplet": [30, 45], "lu": [30, 37, 45], "choleski": 30, "multigrid": [30, 45], "invert": 30, "analyt": 30, "underli": [30, 35, 38, 43, 45], "physic": [30, 45, 46], "_f": 30, "frobeni": 30, "obvious": 30, "m_k": 30, "procedur": 30, "c_k": 30, "g_k": 30, "tag_k": 30, "ag_k": 30, "m_": [30, 31], "denser": 30, "drop": [30, 43], "m_0": 30, "recommend": 30, "chow": 30, "onenormest": 30, "ag": 30, "trace": 30, "001": 30, "todens": [30, 43], "3961": 30, "9652414689454": 30, "spy": [30, 43, 44], "0x7f7da0203790": 30, "18659718436073": 30, "sign": 30, "residuals_precondit": 30, "0x7f7da00c3f10": 30, "fair": 30, "account": [30, 43, 45], "primit": 30, "cite": 30, "literatur": 30, "precondtion": 30, "encount": 30, "refin": [31, 45], "coarser": [31, 45, 47], "upon": 31, "finer": 31, "2x_i": 31, "quad": 31, "d1": 31, "d2": 31, "lam": 31, "eigh": 31, "221": [31, 46], "222": [31, 46], "223": [31, 46], "224": [31, 46], "hold": [31, 35], "av_k": 31, "lambda_k": [31, 41], "theta_k": 31, "doesn": [31, 32, 40, 46], "damp": 31, "x_j": 31, "ax_j": 31, "f_": 31, "under": [31, 35, 45, 47], "e_j": 31, "mu_i": 31, "w_i": 31, "diagonaliz": [31, 41], "impli": 31, "alpha_iw_i": 31, "coeffici": 31, "sum_i": 31, "lambda_i": 31, "lambda_n": 31, "lambda_1": 31, "alpha_iv_i": 31, "expans": [31, 41], "lambda_j": 31, "theta_j": 31, "nstep": 31, "slowli": 31, "worst": 31, "descreas": 31, "arbitrarili": 31, "rapid": 31, "frequenc": 31, "rapidli": 31, "krang": 31, "smaller": [31, 37, 45, 46], "belong": [31, 42], "eigenmod": 31, "talk": [31, 47], "xrang": 31, "5000": 31, "low_mod": 31, "high_mod": 31, "ax1": [31, 43, 46], "121": [31, 43], "lowli": 31, "ax2": [31, 43, 46], "122": [31, 43], "arbitrari": 31, "recurs": 31, "stage": 31, "down": [31, 38], "hierarchi": 31, "odd": 31, "omega_": 31, "2i": 31, "correspondingli": [31, 42], "sampl": [31, 34], "edg": 31, "highest": 31, "prolong": 31, "i_": 31, "omega_h": 31, "hv": 31, "map": [31, 34], "2j": 31, "v_j": 31, "i_h": 31, "2v_": 31, "a_hu": 31, "smooth": 31, "nu": 31, "a_h": 31, "u_0": [31, 41, 46], "f_h": 31, "pre": 31, "nu_1": 31, "coarsen": 31, "post": 31, "nu_2": 31, "continu": 31, "context": [31, 32], "finest": 31, "geometr": 31, "knowledg": 31, "regular": 31, "translat": [31, 34, 35], "amg": 31, "analys": [31, 43], "conda": [32, 34], "instal": [32, 34, 38], "cudatoolkit": 32, "dynam": [32, 37, 43], "pattern": [32, 35], "spatial": [32, 34, 39, 41], "id": 32, "quadro": 32, "rtx": [32, 47], "3000": [32, 48], "pci": 32, "decor": [32, 48], "an_empty_kernel": 32, "pos_x": 32, "pos_i": 32, "dispatch": 32, "0x7f0dfc812d90": 32, "specifi": [32, 35, 37, 38], "256": [32, 35, 40], "777": 32, "216": 32, "threadsperblock": 32, "blockspergrid": 32, "another_kernel": 32, "tz": 32, "block_x": 32, "block_i": 32, "block_z": 32, "dim_x": 32, "dim_i": 32, "dim_z": 32, "pos_z": 32, "scalar": [32, 34], "tupl": 32, "Not": [32, 35, 42], "comprehens": [32, 34], "yield": 32, "bool": [32, 40], "pydata": 32, "dev": [32, 33, 39, 40, 42], "cudapysupport": 32, "everyth": [32, 37], "reli": [32, 34, 35, 47], "arr": [32, 48], "device_arr": 32, "host_arr": 32, "copy_to_host": 32, "host_arrai": 32, "device_arrai": 32, "pin": 32, "alloc": 32, "stream": [32, 35], "event": 32, "synchron": 32, "helper": [32, 39, 46], "transform": [33, 35, 40, 43], "elementwis": 33, "ne": 33, "1000000": [33, 35], "componentwis": [33, 34], "3195833": 33, "92546223": 33, "68758307": 33, "19557921": 33, "19559017": 33, "24145174": 33, "fals": [33, 44, 46], "89": 33, "ms": [33, 39, 42], "91": 33, "\u00b5s": [33, 39, 40, 42], "std": [33, 39, 40, 42], "995": [33, 46], "arcsinh": 33, "39": [33, 40], "664": 33, "consecut": 34, "address": 34, "byte": [34, 44], "kei": [34, 38, 48], "allevi": 34, "sophist": 34, "ahead": 34, "piec": [34, 35], "unsuit": 34, "across": [34, 37], "band": [34, 43, 44], "ignor": [34, 46], "fly": 34, "bug": 34, "reserv": [34, 43], "transpar": 34, "correctli": 34, "although": [34, 40, 42, 43], "de": 34, "facto": 34, "convent": [34, 40], "interoper": 34, "publish": 34, "scienc": [34, 41], "www": 34, "netlib": 34, "never": [34, 43], "delai": 34, "exploit": [34, 36, 44], "multi": [34, 38], "core": [34, 35, 40, 47, 48], "topic": [34, 45], "intel": [34, 35, 40, 45, 47], "mkl": [34, 45], "bli": 34, "whenev": 34, "ratio": 34, "forg": 34, "automat": [34, 35], "64": [34, 40, 48], "a_random": 34, "a_on": 34, "10x10": 34, "a_zero": 34, "a_empti": 34, "uniti": 34, "a_rang": 34, "968195988006513": 34, "40009539": 34, "71934764": 34, "5445252": 34, "4253604": 34, "fifth": 34, "assign": [34, 37], "similarli": [34, 44], "exp": [34, 39, 46], "9058815227064712e": 34, "themselv": [35, 45], "node": [35, 37, 38, 44], "layer": 35, "handl": [35, 38, 42, 48], "drill": 35, "intern": [35, 37, 48], "argument": [35, 48], "cycl": 35, "amd": [35, 40, 47], "avx2": [35, 40], "eight": 35, "512": [35, 40], "simultan": [35, 40], "clock": 35, "invoc": 35, "costli": 35, "outspoken": 35, "linux": [35, 48], "linu": 35, "torvald": 35, "he": 35, "di": 35, "death": 35, "bring": 35, "cannot": [35, 40, 46], "numexpr": 35, "auto": 35, "awar": 35, "clarifi": 35, "meant": [35, 37], "chrome": 35, "oben": 35, "tab": 35, "strictli": 35, "unless": 35, "mechan": 35, "manag": [35, 47], "whether": 35, "window": 35, "mac": 35, "dozen": 35, "sometim": [35, 43], "freeli": 35, "manipul": 35, "creation": [35, 38, 42, 43, 44], "overhead": [35, 38], "multiprocess": 35, "worker": 35, "arr1": 35, "arr2": 35, "arr3": 35, "nthread": 35, "cpu_count": 35, "array_split": 35, "all_thread": 35, "join": 35, "count": [35, 40, 42, 43, 44], "uniqu": 35, "lock": 35, "technic": 35, "consequ": 35, "webserv": 35, "incom": 35, "connect": [35, 43], "deliv": 35, "multicor": 35, "numba_fun": 35, "interfer": 35, "ctype": 35, "arr1_np": 35, "frombuff": 35, "get_obj": 35, "arr2_np": 35, "arr3_np": 35, "nprocess": 35, "c_doubl": 35, "all_process": 35, "intialis": 35, "serv": 35, "sparsiti": [36, 44], "econom": [36, 44], "properti": [36, 41, 43, 44, 45], "necess": [36, 44], "nonlinear": [37, 41], "workstat": [37, 47], "mat": 37, "assembl": [37, 40], "prealloc": 37, "nnz": 37, "int32": 37, "createaij": 37, "0x7efd14a42630": 37, "fill": [37, 43, 46], "setvalu": 37, "999": 37, "998": 37, "And": [37, 47], "dispos": 37, "local_s": 37, "issymmetr": 37, "getdiagon": 37, "getinfo": 37, "block_siz": 37, "nz_alloc": 37, "2998": 37, "nz_use": 37, "nz_unneed": 37, "59212": 37, "malloc": 37, "fill_ratio_given": 37, "fill_ratio_need": 37, "factor_malloc": 37, "indexptr": 37, "getvaluescsr": 37, "ksp": 37, "setoper": 37, "createvecleft": 37, "createvecright": 37, "bicgstab": 37, "bcg": 37, "disabl": 37, "histori": 37, "afterward": 37, "settyp": 37, "setconvergencehistori": 37, "getpc": 37, "getconvergencehistori": 37, "0x7efd14a19370": 37, "sor": 37, "0x7efd1479ea60": 37, "neeed": 37, "precondtiion": 37, "strang": 37, "preonli": 37, "358396715688236e": 37, "amaz": 37, "undocu": 37, "petscpi": 37, "edit": 38, "web": 38, "browser": 38, "jupyterlab": 38, "beautifulli": 38, "graph": [38, 43, 46], "od": 38, "critic": 38, "rich": 38, "api": [38, 40], "fine": 38, "grain": 38, "desktop": [38, 47], "mpi4pi": 38, "mpi": 38, "petsc4pi": 38, "fenic": 38, "weight": 39, "electrostat": 39, "potenti": [39, 40], "ik": 39, "rbf": 39, "sigma": 39, "mn": 39, "randomli": 39, "xz": 39, "zy": 39, "npoint": 39, "nsourc": 39, "plot_grid": 39, "1j": 39, "targets_xi": 39, "vstack": 39, "targets_xz": 39, "targets_yz": 39, "lognorm": 39, "rcparam": 39, "font": 39, "result_xi": 39, "reshap": [39, 44, 48], "result_xz": 39, "result_yz": 39, "subsequ": 39, "sx": 39, "sy": 39, "local_result": 39, "local_target": 39, "local_sourc": 39, "local_weight": 39, "py": [39, 40], "sync": 39, "squared_diff": 39, "nblock": 39, "61": [39, 40], "59": [39, 48], "204": 39, "205": 39, "laptop": 39, "minor": 40, "undera": 40, "bsd": 40, "copyright": 40, "appli": [40, 43, 46], "backend": 40, "processor": 40, "sse": 40, "128": [40, 48], "skylak": 40, "xeon": [40, 47], "phi": [40, 48], "fit": 40, "impact": 40, "greater": 40, "somewhat": 40, "tricki": [40, 43], "team": 40, "export": 40, "diagnost": 40, "crude": 40, "x86_64": [40, 48], "xmmx": 40, "ymmx": 40, "zmmx": 40, "find_instr": 40, "func": 40, "keyword": [40, 48], "sig": 40, "inspect_asm": 40, "signatur": 40, "break": 40, "No": [40, 43], "nopython": [40, 42, 48], "sqdiff": 40, "x32": 40, "y32": 40, "99999976": 40, "0000002": 40, "x64": 40, "y64": 40, "00000024": 40, "1d": [40, 46], "goe": [40, 47], "95": 40, "101": 40, "ns": 40, "100000": 40, "73": [40, 47], "subp": 40, "nfloat64": 40, "vsubp": 40, "rax": 40, "rsi": 40, "ymm0": 40, "ymm1": 40, "ymm2": 40, "96": 40, "ymm3": 40, "vsubpd": 40, "subpd": 40, "stand": 40, "bet": 40, "ok": 40, "frac_diff1": 40, "6666667": 40, "66662216": 40, "66657776": 40, "400032": 40, "40001604": 40, "rais": [40, 43], "nan": 40, "inf": [40, 42], "guvector": 40, "expand": 40, "zerodivisionerror": 40, "caus": 40, "fortun": 40, "overrid": 40, "error_model": 40, "frac_diff2": 40, "84": 40, "45": 40, "83": 40, "subtl": 40, "annot": 40, "inspect_typ": 40, "home": 40, "betck": 40, "miniconda3": 40, "lib": 40, "python3": 40, "site": 40, "pretty_annot": 40, "futurewarn": 40, "experiment": 40, "warn": 40, "lt": 40, "ipython": 40, "115e70afe30f": 40, "gt": 40, "nbsp": 40, "2load_glob": 40, "4load_method": 40, "getattr": 40, "attr": [40, 48], "0x7f829875c700": 40, "del": 40, "8call_method": 40, "var": 40, "kw": 40, "vararg": 40, "12load_glob": 40, "16load_attr": 40, "unitupl": 40, "int64": [40, 48], "const18": 40, "liter": 40, "20binary_subscr": 40, "static_getitem": 40, "index_var": 40, "22call_funct": 40, "range_state_int64": 40, "24get_it": 40, "getit": 40, "range_iter_int64": 40, "phi26": 40, "26": 40, "26for_it": 40, "iternext": 40, "pair_first": 40, "pair_second": 40, "phi28": 40, "28": [40, 47], "72": 40, "const30": 40, "36binary_subscr": 40, "getitem": 40, "42binary_subscr": 40, "44binary_subtract": 40, "46binary_multipli": 40, "52binary_subscr": 40, "58binary_subscr": 40, "60binary_add": 40, "62binary_true_divid": 40, "74return_valu": 40, "cast": 40, "ir": [40, 46], "promot": 40, "rest": 40, "overli": 40, "conserv": 40, "behavior": 40, "frac_diff3": 40, "dt": [40, 41, 46], "regardless": 40, "99": [40, 48], "speedi": [40, 48], "6x": 40, "permiss": 40, "reorder": 40, "arithmet": [40, 47], "round": [40, 42, 44], "accept": 40, "fastmath": 40, "do_sum": 40, "acc": 40, "accumul": 40, "strict": 40, "do_sum_fast": 40, "reassoci": 40, "permit": 40, "mulp": 40, "vmulp": 40, "xmm1": 40, "xmm7": 40, "xmm4": 40, "xmm6": 40, "xmm5": 40, "2x": 40, "135": 40, "68": 40, "du": 41, "tayler": 41, "tu": 41, "centr": 41, "quadrat": [41, 43, 46], "trick": 41, "famou": [41, 47], "rightarrow": [41, 43, 46], "u_n": 41, "t_n": 41, "t_": 41, "tf": 41, "infti": 41, "nu_0": 41, "decai": 41, "au": 41, "eigenpair": 41, "hat": 41, "dfferenc": 41, "tridiag": 41, "sim": 41, "lesssim": 41, "qudrat": 41, "uncondition": 41, "cheaper": 41, "necesarili": 41, "infrastructur": 41, "worthwil": 42, "didact": 42, "conver": 42, "a_coo": 42, "indici": 42, "triplet": 42, "repetit": 42, "a_csr": 42, "explan": 42, "ith": 42, "locat": 42, "assoc": 42, "csr_matvec": 42, "row_index": 42, "col_start": 42, "col_end": 42, "col_index": 42, "discretise_poiss": 42, "rh": [42, 43, 44], "nelement": [42, 43, 44], "y_exact": 42, "rel_error": 42, "bottleneck": 42, "92": 42, "102": 42, "action": [43, 45], "review": 43, "pa": 43, "pivot": 43, "valueerror": 43, "fill_diagon": 43, "max_index": 43, "argmax": 43, "889527314145724e": 43, "accordingli": 43, "stabil": [43, 46], "sort": 43, "pb": 43, "ly": 43, "ux": 43, "solve_triangular": 43, "7431488527611673e": 43, "princip": 43, "lapack": 43, "custom": 43, "modif": 43, "axesimag": [43, 48], "0x7fd41d9f2640": 43, "131": 43, "im1": 43, "132": 43, "ax3": [43, 46], "133": 43, "bidiagon": 43, "coincid": 43, "nonneg": 43, "m_u": 43, "m_l": 43, "wonder": 43, "a_n": 43, "notat": 43, "revers": 43, "flip": 43, "apermut": 43, "0x7fd41ddeb2e0": 43, "relabel": 43, "aim": 43, "phase": 43, "cuthil": 43, "mckeee": 43, "mckee": 43, "squeez": 43, "connnect": 43, "job": 43, "problemat": 43, "intric": 43, "infeas": [43, 45], "criterion": 43, "ilu": 43, "spilu": 43, "fill_factor": 43, "drop_rul": 43, "residuals1": 43, "callback1": 43, "residuals2": 43, "callback2": 43, "dramat": 43, "jn": 44, "4n": 44, "5n": 44, "16n": 44, "000": [44, 47], "tenthousand": 44, "visualis": 44, "mpl_toolkit": 44, "mplot3d": 44, "axes3d": 44, "cm": 44, "200": [44, 46], "meshgrid": 44, "surf": 44, "plot_surfac": 44, "antialias": 44, "cmap": 44, "coolwarm": 44, "0x7f2d51275790": 44, "zoom": 44, "0x7f2d51275760": 44, "mb": 44, "gb": 44, "miss": [45, 48], "attempt": 45, "gaussian": 45, "elimin": 45, "overcom": 45, "reflect": [45, 46], "coars": 45, "umfpack": 45, "suitespars": 45, "scikit": 45, "constantli": 45, "pardiso": 45, "old": 45, "superlu": 45, "mump": 45, "amesos2": 45, "trilino": 45, "eigen": 45, "extern": [45, 48], "belo": 45, "amgx": 45, "pyamg": 45, "ml": 45, "bind": 45, "tt": 46, "stencil": 46, "courant": 46, "leap": 46, "frog": 46, "circl": 46, "clip_on": 46, "add_artist": 46, "draw_leap_frog": 46, "red_circ": 46, "blue_circ": 46, "set_ylim": 46, "set_xlim": 46, "xaxi": 46, "set_tick": 46, "yaxi": 46, "circ": 46, "draw_dependency_graph": 46, "ycoord": 46, "xcoord": 46, "central": 46, "red": 46, "blue": 46, "chain": 46, "ax4": 46, "u_1": 46, "ct": 46, "u_2": 46, "characterist": 46, "transport": 46, "necessarili": 46, "sake": 46, "propag": 46, "recreat": 46, "meaningless": 46, "friedrich": 46, "lewi": 46, "cfl": 46, "cancel": 46, "formua": 46, "influenc": 46, "solve_wave_equ": 46, "time_step": 46, "interv": [46, 48], "xt_grid": 46, "dx": 46, "il": 46, "t_index": 46, "x_index": 46, "st": 46, "elaps": 46, "20034098625183105": 46, "0348000000000002": 46, "18599629402160645": 46, "explod": 46, "present": 46, "reformul": 46, "ieee": 47, "754": 47, "mantissa": 47, "expon": 47, "1022": 47, "1023": 47, "53": 47, "126": 47, "127": 47, "epsilon_": 47, "gflop": 47, "platinum": 47, "8280m": 47, "612": 47, "raspberri": 47, "fun": 47, "arm": 47, "4090": 47, "ps5": 47, "280": 47, "xbox": 47, "seri": 47, "tabl": 47, "server": 47, "spectrum": 47, "chip": 47, "appl": 47, "70tflop": 47, "bui": 47, "specialis": 47, "regularli": 47, "supercomput": 47, "frontier": 47, "eflop": 47, "hide": 47, "messag": 47, "distant": 47, "desk": 47, "concern": 47, "mobil": 47, "phone": 47, "homepag": 48, "infer": 48, "sum2d": 48, "fractal": 48, "timer": 48, "__enter__": 48, "__exit__": 48, "__future__": 48, "print_funct": 48, "absolute_import": 48, "jet": 48, "ion": 48, "255": 48, "create_fract": 48, "2000": 48, "034635305404663": 48, "0x7f9adddedb80": 48, "nest": 48, "interpet": 48, "3081510066986084": 48, "0x7f9ade196dc0": 48, "02453899383544922": 48, "0x7f9ade00c640": 48, "spread": 48, "inspect": 48, "mysum": 48, "inspect_llvm": 48, "item": 48, "moduleid": 48, "source_filenam": 48, "datalayout": 48, "p270": 48, "p271": 48, "p272": 48, "i64": 48, "f80": 48, "n8": 48, "s128": 48, "gnu": 48, "_zn08numbaenv8__main__10mysum": 48, "2414exx": 48, "local_unnamed_addr": 48, "i8": 48, "null": 48, "pyexc_runtimeerror": 48, "nofre": 48, "norecurs": 48, "nounwind": 48, "writeonli": 48, "i32": 48, "_zn8__main__10mysum": 48, "noalia": 48, "nocaptur": 48, "retptr": 48, "readnon": 48, "excinfo": 48, "nsw": 48, "ret": 48, "_zn7cpython8__main__10mysum": 48, "py_closur": 48, "py_arg": 48, "py_kw": 48, "alloca": 48, "pyarg_unpacktupl": 48, "getelementptr": 48, "inbound": 48, "nonnul": 48, "icmp": 48, "eq": 48, "br": 48, "i1": 48, "endif": 48, "prof": 48, "pred": 48, "pyerr_setstr": 48, "pynumber_long": 48, "pylong_aslonglong": 48, "py_decref": 48, "pyerr_occur": 48, "33": 48, "37": 48, "41": 48, "43": 48, "38": 48, "49": 48, "74": 48, "pylong_fromlonglong": 48, "declar": 48, "cfunc": 48, "stackprotector": 48, "attribut": 48, "branch_weight": 48, "previous": 48, "eas": 48}, "objects": {}, "objtypes": {}, "objnames": {}, "titleterms": {"assign": [0, 1, 2, 3, 4, 5, 6, 7, 8, 16, 17, 18, 21], "1": [0, 5, 6, 7, 8, 9, 16, 17, 18, 21, 27], "matrix": [0, 2, 5, 7, 8, 12, 15, 16, 17, 18, 21, 37, 42], "multipl": [0, 5, 16, 21], "numba": [0, 5, 10, 16, 21, 24, 32, 35, 38, 40, 48], "2": [1, 5, 6, 7, 8, 10, 16, 17, 18, 21, 28], "gpu": [1, 2, 6, 11, 24, 39], "acceler": [1, 6, 11, 35, 39], "solut": [1, 35], "poisson": [1, 36, 44], "problem": [1, 3, 6, 29, 31, 36, 41, 44], "3": [2, 7, 8, 11, 17, 18, 29, 41], "spars": [2, 6, 7, 12, 17, 30, 36, 37, 42, 43, 44, 45], "format": [2, 12, 42], "4": [3, 4, 8, 12, 18, 30], "time": [3, 9, 23, 41], "depend": [3, 41, 46], "exampl": [4, 22, 29, 33, 36, 44], "The": [5, 6, 7, 8, 16, 17, 18, 21, 23, 27, 28, 29, 30, 31, 36, 41, 42, 44, 46, 47], "part": [5, 6, 7, 8, 16, 17, 18, 21], "better": [5, 16, 21], "function": [5, 9, 16, 21], "speed": [5, 16, 21], "up": [5, 16, 21], "solv": [6, 8, 18, 29, 36, 37, 43, 44], "two": 6, "1d": 6, "wave": [6, 11, 46], "matric": [6, 7, 17, 20, 29, 43], "heat": 6, "equat": [6, 20, 29, 43, 46], "implement": [7, 17, 39, 46], "csr": [7, 42], "custom": [7, 17], "finit": [8, 18, 41], "element": [8, 18], "system": [8, 18, 29, 37, 43, 45], "mathemat": 8, "background": [8, 11, 24, 39], "creat": [8, 18, 37], "vector": [8, 18, 42], "compar": [8, 11, 12, 17, 18], "solver": [8, 18, 23, 27, 28, 29, 30, 43, 45], "precondition": [8, 18], "increas": [8, 18], "n": [8, 18], "5": [8, 13, 15], "parallelis": [8, 18], "class": [9, 10, 11, 12, 13, 14, 15, 19], "mondai": [9, 10, 11, 12, 13, 14, 15], "10": 9, "octob": [9, 10, 11, 12], "get": [9, 34, 35], "know": [9, 35], "numpi": [9, 34, 38], "test": 9, "assert": 9, "plot": [9, 11, 13], "matplotlib": [9, 38], "save": 9, "data": [9, 42], "file": 9, "17": 10, "experi": [10, 13, 28], "jit": 10, "vs": [10, 41], "njit": 10, "parallel": [10, 23, 35, 39], "rang": 10, "optimis": [10, 29], "your": 10, "code": 10, "24": 11, "some": [11, 28], "cpu": [11, 39], "extens": 11, "task": 11, "31": 12, "dens": [12, 17, 43], "storag": 12, "when": 12, "worth": 12, "14": 13, "novemb": [13, 14], "us": [13, 14, 23, 37], "gmre": [13, 28], "eigenvalu": [13, 31], "6": 14, "21": 14, "cg": 14, "spai": 14, "precondit": [14, 30], "7": 15, "decemb": 15, "lu": [15, 43], "tridiagon": 15, "lsa": [16, 17, 18], "python": [19, 24, 25, 32, 35, 38], "simultan": 20, "A": [22, 28, 29, 30, 33, 36, 39, 44, 46, 47], "tour": 22, "cuda": [22, 24, 32, 39], "devic": [22, 32], "model": [22, 31], "thread": [22, 35], "core": 22, "warp": 22, "stream": 22, "multiprocessor": 22, "number": [22, 47], "memori": [22, 32, 34], "hierarchi": 22, "an": [22, 24, 35, 45], "further": 23, "topic": 23, "domain": 23, "decomposit": [23, 43], "method": [23, 27, 29, 31, 41, 43, 46], "random": 23, "linear": [23, 29, 36, 37, 43, 44, 45], "algebra": [23, 31, 36, 44], "fast": 23, "direct": [23, 29, 43, 45], "space": 23, "reproduc": 23, "comput": [23, 24, 25, 26, 35, 47], "scienc": 23, "contain": 23, "machin": 23, "learn": [23, 38], "meet": 23, "scientif": 23, "julia": [23, 25], "program": 23, "languag": [23, 24, 25], "introduct": [24, 43, 45], "standard": 24, "hardwar": 24, "nvidia": 24, "amd": 24, "intel": 24, "access": 24, "high": [25, 26, 47], "perform": [25, 26, 47], "fortran": 25, "c": 25, "matlab": 25, "rust": 25, "other": [25, 38, 42], "welcom": 26, "techniqu": 26, "iter": [27, 28, 29, 30, 31, 45, 46], "krylov": 27, "subspac": 27, "arnoldi": [27, 29], "full": [27, 31], "orthogonalis": 27, "fom": [27, 28], "from": 28, "basic": [28, 30, 40], "idea": [28, 30], "remark": [28, 46], "restart": 28, "conjug": [29, 30], "gradient": [29, 30], "symmetr": 29, "posit": 29, "definit": [29, 47], "lanczo": 29, "quadrat": 29, "steepest": 29, "descent": 29, "mix": 29, "numer": [29, 46], "approxim": [30, 41], "invers": 30, "note": 30, "multigrid": 31, "eigenvector": 31, "richardson": 31, "appli": 31, "error": 31, "reduct": [31, 40], "coars": 31, "mesh": 31, "move": 31, "between": 31, "fine": 31, "cycl": 31, "practic": 32, "find": 32, "out": 32, "about": 32, "launch": 32, "kernel": 32, "featur": 32, "manag": 32, "advanc": 32, "numexpr": 33, "layout": 34, "arrai": 34, "rescu": 34, "bla": 34, "lapack": 34, "start": 34, "principl": 35, "simd": [35, 40], "multithread": 35, "loop": 35, "execut": 35, "gil": 35, "without": 35, "altern": 35, "process": 35, "base": 35, "need": [36, 44], "pde": [36, 44], "unit": [36, 44], "squar": [36, 44], "petsc4pi": 37, "petsc": 37, "conclus": 37, "hpc": 38, "tool": 38, "jupyt": 38, "notebook": 38, "scipi": 38, "dask": 38, "panda": 38, "tensorflow": 38, "pytorch": 38, "scikit": 38, "evalu": 39, "particl": 39, "sum": 39, "autovector": 40, "divis": 40, "simpl": 41, "step": [41, 46], "differ": 41, "deriv": 41, "point": [41, 47], "stencil": 41, "second": 41, "applic": 41, "stabil": 41, "forward": 41, "euler": 41, "backward": 41, "implicit": 41, "explicit": 41, "softwar": [41, 45], "structur": [42, 43], "coo": 42, "coordin": 42, "compress": 42, "row": 42, "product": 42, "small": 43, "bandwidth": 43, "arrow": 43, "reorder": 43, "gaussian": 43, "elimin": 43, "limit": 43, "incomplet": 43, "packag": 45, "discretis": 46, "graphic": 46, "descript": 46, "leapfrog": 46, "first": 46, "follow": 46, "cone": 46, "scheme": 46, "final": 46, "what": 47, "float": 47, "how": 47, "mani": 47, "flop": 47, "s": 47, "do": 47, "i": 47, "have": 47, "top": 47, "500": 47, "bias": 47, "work": 48}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 6, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinx": 56}})
\ No newline at end of file