-
Notifications
You must be signed in to change notification settings - Fork 0
/
matrixderivatives.tex
232 lines (215 loc) · 14.9 KB
/
matrixderivatives.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
\documentclass[12pt,a4paper]{article}
\usepackage{amsmath}
\usepackage{amsthm}
\usepackage{amsfonts}
\usepackage{amssymb}
\usepackage{amsmath,amscd}
\usepackage[symbol]{footmisc}
\usepackage{fancyhdr}
\usepackage{graphicx}
\usepackage{bm}
\usepackage[english]{babel}
\linespread{1.25}
\DeclareMathOperator{\Tr}{Tr}
\DeclareMathOperator{\D}{D}
\DeclareMathOperator{\Real}{Re}
\DeclareMathOperator{\T}{\text{\scriptsize T}}
\renewcommand{\thefootnote}{\fnsymbol{footnote}}
\begin{document}
\section{Hamiltonian Monte Carlo for fuzzy spaces}
\subsection{Statement of the problem}
The fuzzy space action considered here is:
\begin{equation}
S[D] = g_2 \Tr D^2 + \Tr D^4
\end{equation}
where $g_2 \in \mathbb{R}$ and $D$ is of the form:
\begin{equation}
D = \sum_i \omega_i \otimes (M_i \otimes I + \epsilon_i I \otimes M_i^T)
\end{equation}
for Hermitian $\omega_i$ and $M_i$, and $\epsilon_i = \pm 1$. \newline
The dynamical variables in the Monte Carlo are the $n \times n$ matrices $M_i$. Hamiltonian Monte Carlo requires to take derivatives such as:
\begin{equation}
\frac{\partial S[M_i]}{\partial M_k}
\end{equation}
which amounts to finding formulas for terms like:
\begin{equation}
\frac{\partial \Tr D^p}{\partial M_k}.
\end{equation}
In the following, formulas for $p=2$ and $p=4$ are developed.
\subsection{Matrix calculus}
Let $A \in M_n(\mathbb{C})$ and $f(A)$ be a complex valued function of $A$. The derivative of $f$ with respect to $A$ is defined in components as the $n \times n$ matrix:
\begin{equation}
\left(\frac{\partial f}{\partial A}\right)_{lm} \equiv \frac{\partial f}{\partial A_{lm}}.
\end{equation}
The two special cases of interest here are:
\begin{equation}
\frac{\partial \Tr A}{\partial A} = I
\end{equation}
\begin{equation}
\frac{\partial \Tr AB}{\partial A} = B^T.
\end{equation}
\subsection{The case $p=2$}
When $p = 2$ the $M_i$ matrices are decoupled:
\begin{equation}
\Tr D^2 = \sum_i \Tr \omega_i^2 (2 n \Tr M_i^2 + 2 \epsilon_i (\Tr M_i)^2).
\end{equation}
Taking a derivative with respect to $M_k$ yields:
\begin{align}\label{eq:As}
\frac{\partial}{\partial M_k} \left( \sum_i \Tr \omega_i^2 (2 n \Tr M_i^2 + 2 \epsilon_i (\Tr M_i)^2) \right) = \notag \\
\sum_i \delta_{ik} \Tr \omega_i^2 \left( 4 n M_i^T + 4 \epsilon_i (\Tr M_i) I \right) = \notag \\
4 C \left( n M_k^T + \epsilon_k (\Tr M_k) I \right)
\end{align}
where $C \equiv \Tr \omega_i^2$ is the dimension of the Clifford module.
\subsection{The case $p=4$}
First expand $\Tr D^4$:
\begin{align}\label{eq:trd4}
\Tr D^4 &= \sum_{i_1, i_2, i_3, i_4} \Tr (\omega_{i_1} \omega_{i_2} \omega_{i_3} \omega_{i_4}) \cdot \notag \\
\Big( \ &n [1+\epsilon \ *] \Tr (M_{i_1} M_{i_2} M_{i_3} M_{i_4}) \ \ + \notag \\
&\epsilon_{i_1} \Tr M_{i_1} [1 + \epsilon \ *] \Tr ( M_{i_2} M_{i_3} M_{i_4}) \ \ + \notag \\
&\epsilon_{i_2} \Tr M_{i_2} [1 + \epsilon \ *] \Tr (M_{i_1} M_{i_3} M_{i_4}) \ \ + \notag \\
&\epsilon_{i_3} \Tr M_{i_3} [1 + \epsilon \ *] \Tr (M_{i_1} M_{i_2} M_{i_4}) \ \ + \notag \\
&\epsilon_{i_4} \Tr M_{i_4} [1 + \epsilon \ *] \Tr (M_{i_1} M_{i_2} M_{i_3}) \ \ + \notag \\
&\epsilon_{i_1} \epsilon_{i_2} [1 + \epsilon] \Tr (M_{i_1} M_{i_2} ) \Tr (M_{i_3} M_{i_4}) \ \ + \notag \\
&\epsilon_{i_1} \epsilon_{i_3} [1 + \epsilon] \Tr (M_{i_1} M_{i_3}) \Tr (M_{i_2} M_{i_4}) \ \ + \notag \\
&\epsilon_{i_1} \epsilon_{i_4} [1 + \epsilon] \Tr (M_{i_1} M_{i_4}) \Tr (M_{i_2} M_{i_3}) \ \Big)
\end{align}
where $*$ denotes complex conjugation of everything that appears on the right, $\epsilon$ is defined as the product $\epsilon \equiv \epsilon_{i_1}\epsilon_{i_2}\epsilon_{i_3}\epsilon_{i_4}$, and the relation $M^T = M^*$ has been used. Since $D$ is Hermitian, the expression must be real. It is not immediate to see that this is the case because of the $\epsilon = \pm 1$ factor inside the square brackets. Reality nonetheless holds, and becomes manifest by observing that a simultaneous index exchange $i_1 \leftrightarrow i_4$ and $i_2 \leftrightarrow i_3$ is equivalent to taking the complex conjugate (in fact, this is not the only index exchange that amounts to complex conjugation). \newline
Taking a matrix derivative with respect to $M_k$ results in non-vanishing contributions when $k=i_1, k=i_2, k=i_3$ or $k=i_4$:
\begin{align}
\frac{\partial}{\partial M_k} \Tr D^4 &= \sum_{i_1, i_2, i_3, i_4} \Tr (\omega_{i_1} \omega_{i_2} \omega_{i_3} \omega_{i_4}) \cdot \notag \\
\Big( &\delta_{ki_1} A(i_1, i_2, i_3, i_4)^T + \delta_{ki_2} A(i_2, i_3, i_4, i_1)^T + \notag \\
&\delta_{ki_3} A(i_3, i_4, i_1, i_2)^T + \delta_{ki_4} A(i_4, i_1, i_2, i_3)^T \Big)
\end{align}
where $A(a, b, c, d )$ is the following $n \times n$ matrix:
\begin{align}
A(a,b,c,d) \equiv \ &n [1+\epsilon \ \dagger] M_b M_c M_d + \notag \\
&\epsilon_a I [1 + \epsilon \ *] \Tr M_b M_c M_d + \notag \\
&\epsilon_b \Tr M_b [1 + \epsilon \ \dagger] M_c M_d + \notag \\
&\epsilon_c \Tr M_c [1 + \epsilon \ \dagger] M_b M_d + \notag \\
&\epsilon_d \Tr M_d [1 + \epsilon \ \dagger] M_b M_c + \notag \\
&\epsilon_a \epsilon_b M_b [1 + \epsilon] \Tr M_c M_d + \notag \\
&\epsilon_a \epsilon_c M_c [1 + \epsilon] \Tr M_b M_d + \notag \\
&\epsilon_a \epsilon_d M_d [1 + \epsilon] \Tr M_b M_c
\end{align}
and $\dagger$ denotes Hermitian conjugation of everything that appears on the right. Upon relabeling the indices and cycling the $\omega$ matrices in the trace, the equation becomes:
\begin{align}\label{eq:derd4}
\frac{\partial}{\partial M_k} \Tr D^4 &= 4 \sum_{i_1, i_2, i_3, i_4} \delta_{ki_1} \Tr (\omega_{i_1} \omega_{i_2} \omega_{i_3} \omega_{i_4}) A(i_1, i_2, i_3, i_4)^T \notag \\
&= 4 \sum_{i_1, i_2, i_3} \Tr (\omega_{k} \omega_{i_1} \omega_{i_2} \omega_{i_3}) A(k, i_1, i_2, i_3)^T \equiv 4 \sum_{i_1, i_2, i_3} \mathcal{B}_k(i_1, i_2, i_3)
\end{align}
with $\mathcal{B}_k(a, b, c)$ denoting the generic term in the sum.\newline
To see that Eq.(\ref{eq:derd4}) defines a Hermitian matrix, notice that an exchange of indices $i_1 \leftrightarrow i_3$ is equivalent to taking the Hermitian conjugate:
\begin{equation}\label{eq:BBd}
\mathcal{B}_k(i_1, i_2, i_3)^{\dagger} = \mathcal{B}_k(i_3, i_2, i_1)
\end{equation}
therefore the sum in Eq.(\ref{eq:derd4}) reduces to:
\begin{equation}\label{eq:splitsum}
\sum_{\substack{i_1 > i_3 \\ i_2}} [1+\dagger] \mathcal{B}_k(i_1, i_2, i_3) + \sum_{i_1, i_2} \mathcal{B}_k(i_1, i_2, i_1).
\end{equation}
In fact, by looking at the form of $\mathcal{B}$, it is clear that terms in the sum are qualitatively different based on the number of indices that coincide. Therefore it would be computationally convenient to write Eq.(\ref{eq:splitsum}) in a way that emphazises this difference.\newline
The only terms that contribute when all indices are different are the following:
\begin{equation} \label{eq:alldifferent}
\sum_{i_1 > i_2 > i_3} [1+\dagger] \Big( \mathcal{B}_k(i_1, i_2, i_3) + \mathcal{B}_k(i_1, i_3, i_2) + \mathcal{B}_k(i_2, i_1, i_3) \Big).
\end{equation}
The three inequivalent permutations of indices that appear in this formula are based on a group-theoretical argument that will generalize easily to powers of $D$ higher than 4. First consider the symmetric group of order three $S_3$ acting on the set of indices $\{i_1, i_2, i_3\}$, and the subgroup of permutations that induce a simple change in $\mathcal{B}$, which in this case is $H = \{(), (13)\} \cong S_2$ (the first element being the identical permutation, and the second the exchange $i_1 \leftrightarrow i_3$ which induces $\mathcal{B} \rightarrow \mathcal{B}^\dagger$). The idea is then to restrict the sum to $i_1 > i_2 > i_3$ and quotient out the action of $H$ by introducing a suitable pre-factor that accounts for it (in this case $[1+\dagger]$). Practically, the inequivalent permutations of indices that appear in Eq.(\ref{eq:alldifferent}) are found by computing the (left or right) cosets of $H \subset S_3$ and acting on $\{i_1, i_2, i_3\}$ with a representative from each coset. In this case the representatives where chosen to be $(), (23), (12)$.\newline
What is left are terms in which at least two indices are equal. These are:
\begin{align} \label{eq:someequal}
\sum_{i_1 > i_2} [1+\dagger] \Big( \mathcal{B}_k(i_1, i_1, i_2) &+ \mathcal{B}_k(i_1, i_2, i_2) \Big) + \notag \\
&\sum_{i_1 \neq i_2} \mathcal{B}_k(i_1, i_2, i_1) + \sum_{i} \mathcal{B}_k(i,i,i).
\end{align}
At this point, a useful property of the $\omega$ matrices can be exploited to simplify both Eq.(\ref{eq:alldifferent}) and Eq.(\ref{eq:someequal}):
\begin{equation}\label{eq:gamma0}
\Tr(\omega_{\sigma(i_1)} \omega_{\sigma(i_2)} \omega_{\sigma(j)} \omega_{\sigma(k)}) \propto \Tr(\omega_j \omega_k) = 0 \qquad \text{if} \ \ i_1 = i_2 \ \text{and} \ j \neq k
\end{equation}
for any permutation $\sigma$ acting on $\{i_1, i_2, j, k\}$. In other words, if two indices are the same and the other two are different, the trace on the $\omega$ matrices vanishes.\newline
Putting together Eq.(\ref{eq:alldifferent}), Eq.(\ref{eq:someequal}) and Eq.(\ref{eq:gamma0}), the final formula for $\partial_k \Tr D^4$ reads:
\begin{align}
\frac{\partial}{\partial M_k} \Tr D^4 = 4 \Bigg[ \sum_{\substack{i_1 > i_2 > i_3 \\ i_1,i_2,i_3 \neq k}} &[1+\dagger] \Big( \mathcal{B}_k(i_1, i_2, i_3) + \mathcal{B}_k(i_1, i_3, i_2) + \mathcal{B}_k(i_2, i_1, i_3) \Big) \notag \\
&+ \sum_{\substack{i \\ i \neq k}} \Big( [1+\dagger] \mathcal{B}_k(i, i, k) + \mathcal{B}_k(i, k, i) \Big) + \mathcal{B}_k(k,k,k) \Bigg].
\end{align}
The explicit form of $\mathcal{B}_k(i,i,k), \ \mathcal{B}_k(i,k,i)$ and $\mathcal{B}_k(k,k,k)$ is given in Appendix A.
\subsection{A general formula for every $p$}
The first problem is to write $\Tr D^p$ in a useful form, along the lines of Eq.(\ref{eq:trd4}).\newline
$\Tr D^p$ expands to:
\begin{align}
\Tr D^p &= \sum_{i_1 \ldots i_p} \Tr \omega_{i_1} \ldots \omega_{i_p} \cdot \notag \\
&\Tr \big( (M_{i_1} \otimes I + \epsilon_{i_1} I \otimes M_{i_1}^T) \ldots (M_{i_p} \otimes I + \epsilon_{i_p} I \otimes M_{i_p}^T) \big)
\end{align}
Ignoring (for now) the trace over the $\omega$ matrices, a typical term in the sum is:
\begin{equation}\label{eq:typical}
\Tr \big( \epsilon_B A \otimes B^* + \epsilon_A B \otimes A^* \big)
\end{equation}
where $A$ and $B$ are related to the product $M_{i_1} \ldots M_{i_p}$ in the following way:
\begin{enumerate}
\item pick $r \geq 0$ numbers $k_1 < \ldots < k_r$ from $\{1, \ldots , p\}$ and call the remaining $p-r$ numbers $j_1 < \ldots < j_{p-r}$;
\item define $A = M_{i_{k_1}} \ldots M_{i_{k_r}}$ and $B = M_{i_{j_1}} \ldots M_{i_{j_{p-r}}}$ (if $r=0$, $A=I$);
\item define $\epsilon_A = \epsilon_{i_{k_1}} \ldots \epsilon_{i_{k_r}}$ and $\epsilon_B = \epsilon_{i_{j_1}} \ldots \epsilon_{i_{j_{p-r}}} = \epsilon_A \epsilon_{i_1} \ldots \epsilon_{i_p}$.
\end{enumerate}
In particular, a choice of $A$ completely characterizes $B$. \newline
By varying $r$ from $0$ to $\left[\frac{p}{2}\right]$ and summing over all possible choices of $k_1 \ldots k_r$, every term in $\Tr D^p$ is generated.\newline
One can verify that every term in Eq.(\ref{eq:trd4}) ($p=4$) is of that type. For example:
\begin{align}
\Tr M_{i_1} [\epsilon_{i_1} &+ \epsilon_{i_2} \epsilon_{i_3} \epsilon_{i_4} *] \Tr ( M_{i_2} M_{i_3} M_{i_4}) = \notag \\
&\epsilon_{i_2} \epsilon_{i_3} \epsilon_{i_4} \Tr M_{i_1} \Tr (M_{i_2} M_{i_3} M_{i_4})^* + \epsilon_{i_1} \Tr M_{i_1} \Tr ( M_{i_2} M_{i_3} M_{i_4}) = \notag \\
&\epsilon_{i_2} \epsilon_{i_3} \epsilon_{i_4} \Tr M_{i_1} \Tr (M_{i_2} M_{i_3} M_{i_4})^* + \epsilon_{i_1} \Tr (M_{i_1})^* \Tr ( M_{i_2} M_{i_3} M_{i_4}) = \notag \\
&\Tr \big( \epsilon_{i_2} \epsilon_{i_3} \epsilon_{i_4} M_{i_1} \otimes (M_{i_2} M_{i_3} M_{i_4})^* + \epsilon_{i_1} M_{i_2} M_{i_3} M_{i_4} \otimes M_{i_1}^* \big)
\end{align}
which is of the form of Eq.(\ref{eq:typical}) upon identifying $M_{i_1}$ with $A$ and $M_{i_2} M_{i_3} M_{i_4}$ with $B$ (in the second equality the reality of $\Tr M_{i_1}$ has been used). \newline
A way of expressing $\Tr B$ given $A$ is using a modified derivative operator $\D_i$ defined as:
\begin{equation}
\D_i \equiv \Tr \ \circ \ \frac{\partial}{\partial M_i}
\end{equation}
which allows to write:
\begin{equation}
A = M_{i_{k_1}} \ldots M_{i_{k_r}} \implies \Tr B = \D_{i_{k_r}} \ldots \D_{i_{k_1}} \Tr (M_{i_1} \ldots M_{i_p}).
\end{equation}
Therefore Eq.(\ref{eq:typical}) becomes:
\begin{align}
\epsilon_A&[1+ \epsilon_{i_1} \ldots \epsilon_{i_p} *] (\Tr A)^* \Tr B = \notag \\
&\epsilon_{i_{k_1}} \ldots \epsilon_{i_{k_r}}[1 + \epsilon_{i_1} \ldots \epsilon_{i_p} * ] (\Tr M_{i_{k_1}} \ldots M_{i_{k_r}})^* \big( \D_{i_{k_r}} \ldots \D_{i_{k_1}} \Tr (M_{i_1} \ldots M_{i_p}) \big).
\end{align}
There are some special cases that make the expression simpler, namely:
\begin{enumerate}
\item $r=0$ gives a factor $\Tr I = n$;
\item $r=1, 2$ make $\Tr A$ real;
\item $p-r=1, 2$ (which can only occur for $p=2, 4$) make $\Tr B$ real.
\end{enumerate}
Putting everything together, $\Tr D^p$ can be written as:
\begin{align}
\Tr D^p = \sum_{i_1 \ldots i_p} \Tr \ &\omega_{i_1} \ldots \omega_{i_p} \Bigg[ \sum_{r=0}^{\left[\frac{p}{2}\right]} \ \sum_{k_1 < \ldots < k_r = 1}^p \epsilon_{i_{k_1}} \ldots \epsilon_{i_{k_r}} [1 + \epsilon_{i_1} \ldots \epsilon_{i_p} * ] \notag \\
&(\Tr M_{i_{k_1}} \ldots M_{i_{k_r}})^* \big( \D_{i_{k_r}} \ldots \D_{i_{k_1}} \Tr (M_{i_1} \ldots M_{i_p}) \big) \Bigg]. \notag \\
\end{align}
where:
\begin{align}
r=0 \quad &\longrightarrow \quad n[1+\epsilon_{i_1} \ldots \epsilon_{i_p}*]\Tr (M_{i_1} \ldots M_{i_p}) \\
r=1 \quad &\longrightarrow \quad \sum_{k_1 = 1}^p \epsilon_{i_{k_1}} \Tr (M_{i_{k_1}}) [1+\epsilon_{i_1} \ldots \epsilon_{i_p}*] \D_{i_{k_1}} \Tr (M_{i_1} \ldots M_{i_p}) \\
r=2 \quad &\longrightarrow \quad \sum_{k_1<k_2 = 1}^p \epsilon_{i_{k_1}} \epsilon_{i_{k_2}} \Tr (M_{i_{k_1}} M_{i_{k_2}}) [1+\epsilon_{i_1} \ldots \epsilon_{i_p}*] \D_{i_{k_2}} \D_{i_{k_1}} \Tr (M_{i_1} \ldots M_{i_p}).
\end{align}
\subsection{Appendix A}
The explicit form of $\mathcal{B}_k(i,i,k), \ \mathcal{B}_k(i,k,i)$ and $\mathcal{B}_k(k,k,k)$ is given.
\begin{align}
\mathcal{B}_k(i,i,k) &= \Tr(\omega_k \omega_i \omega_i \omega_k)A(k, i, i, k)^T = CA(k,i,i,k)^T \\ \notag
\mathcal{B}_k(i,k,i) &= \Tr(\omega_k \omega_i \omega_k \omega_i)A(k, i, k, i)^T \\ \notag
\mathcal{B}_k(k,k,k) &= \Tr(\omega_k \omega_k \omega_k \omega_k)A(k, k, k, k)^T = CA(k,k,k,k)^T
\end{align}
where $C$ is the dimension of the Clifford module and the $A$ matrices are:
\begin{align}
A(k,i,i,k) = \ &n [1+\epsilon \ \dagger] M_i^2 M_k + \notag \\
&\epsilon_k I [1 + \epsilon] \Tr M_i^2 M_k + \notag \\
&2\epsilon_i \Tr M_i [1 + \epsilon \ \dagger] M_i M_k + \notag \\
&2\epsilon_k \epsilon_i M_i [1 + \epsilon] \Tr M_i M_k + \notag \\
&\epsilon_k \Tr M_k [1 + \epsilon] M_i^2 + \notag \\
&M_k [1 + \epsilon] \Tr M_i^2
\end{align}
\begin{align}
A(k,i,k,i) = \ &n [1+\epsilon] M_i M_k M_i + \notag \\
&\epsilon_k I [1 + \epsilon] \Tr M_i^2 M_k + \notag \\
&\epsilon_i \Tr M_i [1 + \epsilon][1+\dagger] M_i M_k + \notag \\
&2\epsilon_k \epsilon_i M_i [1 + \epsilon] \Tr M_i M_k + \notag \\
&\epsilon_k \Tr M_k [1 + \epsilon] M_i^2 + \notag \\
&M_k [1 + \epsilon] \Tr M_i^2
\end{align}
\begin{align}
A(k,k,k,k) = \ &2n M_k^3 + 2\epsilon_k I \Tr M_k^3 + \notag \\
&6 M_k \Tr M_k^2 + 6\epsilon_k M_k^2 \Tr M_k.
\end{align}
\end{document}