\documentclass [11pt]{article} \usepackage[a4paper]{geometry} \usepackage[utf8]{inputenc} \usepackage[T1]{fontenc} \usepackage{ae} \usepackage{amsmath,amssymb} \usepackage{hyperref} \usepackage{comment} \usepackage[notref,notcite]{showkeys} \newcommand {\corr}[1]{{#1}} \newcommand {\appro}[1]{\widetilde {#1}} \newcommand {\mpc}{{\tt mpc}} \newcommand {\mpfr}{{\tt mpfr}} \newcommand {\ulp}[1]{#1~ulp} \newcommand {\Ulp}{{\operatorname {ulp}}} \DeclareMathOperator{\Exp}{\operatorname {Exp}} \newcommand {\atantwo}{\operatorname {atan2}} \newcommand {\asin}{\operatorname {asin}} \newcommand{\error}{\operatorname {error}} \newcommand{\relerror}{\operatorname {relerror}} \newcommand{\Norm}{\operatorname {N}} \newcommand {\round}{\operatorname {\circ}} \DeclareMathOperator{\pinf}{\bigtriangleup} \DeclareMathOperator{\minf}{\bigtriangledown} \DeclareMathOperator{\A}{\mathcal A} \newcommand {\N}{\mathbb N} \newcommand {\Z}{\mathbb Z} \newcommand {\Q}{\mathbb Q} \newcommand {\R}{\mathbb R} \newcommand {\C}{\mathbb C} \renewcommand {\epsilon}{\varepsilon} \renewcommand {\theta}{\vartheta} \renewcommand {\leq}{\leqslant} \renewcommand {\geq}{\geqslant} \newcommand {\AGM}{\operatorname{AGM}} \newcommand {\sign}{\operatorname{sign}} \newtheorem{theorem}{Theorem} \newtheorem{lemma}[theorem]{Lemma} \newtheorem{definition}[theorem]{Definition} \newtheorem{prop}[theorem]{Proposition} \newtheorem{conj}[theorem]{Conjecture} \newenvironment{proof}{\noindent{\bf Proof:}}{{\hspace* {\fill}$\blacksquare$}} \newcommand {\enumi}[1]{(\alph {#1})} \renewcommand {\labelenumi}{\enumi {enumi}} \newcommand {\enumii}[1]{(\roman {#1})} \renewcommand {\labelenumii}{\enumii {enumii}} \title {MPC: Algorithms and Error Analysis} \author {Andreas Enge \and Philippe Th\'eveny \and Paul Zimmermann} \date {Draft; January 23, 2014} \begin {document} \maketitle \tableofcontents \section {Error propagation} \subsection {Introduction and notation} This section is devoted to the analysis of error propagation: Given a function whose input arguments already have a certain error, what is the error bound on the function output? The output error usually consists of two components: the error propagated from the input, which may be arbitrarily amplified (or, if one is lucky, shrunk); and an additional small error accounting for the rounding of the output. The results are needed to give a cumulated error analysis for algorithms that combine several elementary arithmetic operations. \subsubsection {Ulp calculus} \begin {definition} \label {def:exp} Let $x$ be a real number, which can be written uniquely as $x = m \cdot 2^e$ with $\frac{1}{2} \le |m| < 1$. The {\em exponent} of $x$ is $\Exp(x) = e = \lfloor \log_2 |x| \rfloor + 1$. The number is {\em representable at precision~$p$} if $2^p m$ is an integer. We denote the rounding of $x$ to one of the at most two representable numbers in the open interval $(x - 2^{-p}, x + 2^{-p})$ by $\round (x) = \round_p (x)$, with rounding being to nearest, up, down, towards zero or away from zero if there is a choice. \end {definition} \begin {prop} \label {prop:expmuldiv} If $x_1$ and $x_2$ are two real numbers, then \begin {gather*} \Exp (x_1) + \Exp (x_2) - 1 \leq \Exp (x_1 x_2) \leq \Exp (x_1) + \Exp (x_2), \\ \Exp (x_1) - \Exp (x_2) \leq \Exp \left( \frac {x_1}{x_2} \right) \leq \Exp (x_1) - \Exp (x_2) + 1. \end {gather*} \end {prop} \begin {proof} Write $x_n = m_n 2^{\Exp (x_n)}$ and $x = x_1 x_2 = m 2^{\Exp x} = m_1 m_2 2^{\Exp (x_1) + \Exp (x_2)}$ with $\frac {1}{2} \leq m_n, m < 1$. Then $m = m_1 m_2$ if the product is at least $\frac {1}{2}$ and $m = 2 m_1 m_2$ if the product is less than $\frac {1}{2}$, which yields the first line of inequalities. The other inequalities are derived in the same way from $\frac {1}{2} < \frac {m_1}{m_2} < 2$. \end {proof} \begin {prop} \label {prop:expround} For any real number $x$, \[ \Exp (x) \leq \Exp (\round (x)) \leq \Exp (x) + 1, \] with equality occurring on the right if and only if $|x|$ has been rounded up to $|\round (x)| = 2^{\Exp (x)}$. \end {prop} \begin {proof} Letting $x = m 2^{\Exp (x)}$, we have $\frac {1}{2} \cdot 2^{\Exp (x)} \leq \round (x) \leq 1 \cdot 2^{\Exp (x)}$, since these two numbers are representable (independently of the precision). \end {proof} \begin {definition} \label {def:ulp} Let $x$ be a real number which is representable at precision~$p$. Its associated {\em unit in the last place} is $\Ulp(x) = \Ulp_p (x) = 2^{\Exp(x) - p}$, so that adding $\Ulp(x)$ to $x$ corresponds to adding~$1$ to the integer $2^p m$. \end {definition} \subsubsection {Absolute error} In the remainder of this chapter, all complex numbers are denoted by the letter $z$ with subscripts and mathematical accents, decomposed in Cartesian coordinates as $z = x + i y$ with the same diacritics applied to $x$ and $y$ as to $z$. All representable real numbers are supposed to have the same precision~$p$. We apply the following error definition of real numbers separately to the two coordinates of a complex number. \begin {definition} \label {def:error} Given a real number $\corr x$ and its approximation $\appro x$, we define the {\em absolute error} of $\appro x$ as $\error (\appro x) = | \corr x - \appro x |$. \end {definition} Notice that in the following, the absolute error is usually expressed in terms of $\Ulp$, which is itself a relative measure with respect to the exponent of the number. Let $\corr z = f (\corr {z_1}, \ldots) = \corr x + i \corr y$ be the correct result of a complex function applied to the correct arguments $\corr {z_n}$. We assume that the $\corr {z_n}$ themselves are not known, but only approximate input values $\appro {z_n} = \appro {x_n} + i \appro {y_n}$; for instance, the $\corr {z_n}$ may be the exact results of some formul\ae, whereas the $\appro {z_n}$ are the outcome of the corresponding computation and affected by rounding errors. We suppose that error bounds $\error (\appro {x_n}) \leq k_{R, n} 2^{\Exp (\appro {x_n}) - p}$ and $\error (\appro {y_n}) \leq k_{I, n} 2^{\Exp (\appro {y_n}) - p}$ for some $k_{R, n}$ and $k_{I, n}$ are known. (This particular notation becomes more comprehensible when $\appro {x_n}$ and $\appro {y_n}$ are representable at precision~$p$, since then the units of the error measure become $\Ulp (\appro {x_n})$ and $\Ulp (\appro {y_n})$, respectively; however, there is no need to restrict the results of this chapter to representable numbers.) Our aim is to determine the propagated error in the output value $\appro z = \appro x + i \appro y = f (\appro {z_1}, \ldots)$, which is given by \begin {equation} \label {eq:properror} \error (\appro x) \leq | \Re (f (\corr {z_1}, \ldots)) - \Re (f (\appro {z_1}, \ldots)) | \end {equation} and an analogous formula for $\error (\appro y)$. In general, we are looking for $k_R$ and $k_I$ such that \[ \error (\appro x) \leq k_R 2^{\Exp (\appro x) - p} \text { and } \error (\appro y) \leq k_I 2^{\Exp (\appro y) - p}. \] Moreover, we are interested in the cumulated error if additionally $\appro z$ is rounded coordinatewise at the target precision~$p$ to $\round (\appro z)$. This operation adds an error of $c_R \Ulp (\round (\appro x))$ to the real and of $c_I \Ulp (\round (\appro y))$ to the imaginary part, where $c_X \leq 1$ when $\round$ stands for rounding up, down, to zero or to infinity, and $c_X \leq \frac {1}{2}$ when $\round$ stands for rounding to nearest. Then, via Proposition~\ref {prop:expround}, \[ \error (\round (\appro x)) \leq (k_R + c_R) \Ulp (\appro x) \text { and } \error (\round (\appro y)) \leq (k_I + c_I) \Ulp (\appro y). \] \subsubsection {Real relative error} It can sometimes be useful to determine errors not absolutely as differences (close to~$0$), but relatively as multiplicative factors (close to~$1$). \begin {definition} \label {def:relerror} Given a real number $\corr x$ and its approximation $\appro x$, we define the {\em upper relative error} of $\appro x$ by \[ \relerror^+ (\appro x) = \epsilon^+ = \frac {\max (0, \corr x - \appro x)}{|\appro x|}. \] and its {\em lower relative error} by \[ \relerror^- (\appro x) = \epsilon^- = \frac {\max (0, \appro x - \corr x)}{|\appro x|}. \] The {\em relative error} of $\appro x$ is \[ \relerror (\appro x) = \epsilon = \max (\epsilon^-, \epsilon^+) = \frac {\error (\appro x)}{|\appro x|}. \] \end {definition} Notice that $\epsilon^- = 0$ whenever $\appro x \leq \corr x$ and $\epsilon^+ = 0$ whenever $\appro x \geq \corr x$, so that at least one of $\epsilon^+$ and $\epsilon^-$ is zero. We express relative errors with respect to the approximate value; thus, they define an interval around the approximate value containing the correct one. Precisely, $\corr x = \appro x (1 + \vartheta)$ for some number $\theta$ with $|\theta| \leq \epsilon$. The definition of relative error carries over to complex numbers, see \S\ref {sssec:comrelerror}. However, in the following we usually argument separately for the two coordinates, so we use corresponding $\epsilon$-values with subscript $R$ and $I$ for the real and imaginary part, respectively. When an absolute error is expressed in the relative unit $\Ulp$, then it is easy to switch back and forth between absolute and relative errors. \begin {prop} \label {prop:relerror} Let $\appro x$ be a real number representable at precision $p$. \begin {enumerate} \item If $\error (\appro x) \leq k \Ulp (\appro x)$, then $\relerror (\appro x) \leq k 2^{1 - p}$. \item If $\relerror (\appro x) \leq k 2^{-p}$, then $\error (\appro x) \leq k \Ulp (\appro x)$. \end {enumerate} These assertions remain valid if $\appro x$ is not representable at precision~$p$ and $\Ulp (\appro x)$ is replaced by $2^{\Exp (\appro x) - p}$. \end {prop} \begin {proof} Concerning the first assertion, we have $ \relerror (\appro x) = \frac {\error (\appro x)}{|\appro x|} \leq \frac {k \Ulp (\appro x)}{|\appro x|}. $ Plugging in from Definition~\ref {def:ulp} that $\Ulp (\appro x) = 2^{\Exp (\appro x) - p}$ and $|\appro x| \geq 2^{\Exp (\appro x) - 1}$ finishes the proof. The second assertion is proved in the same manner, using $|\appro x| \leq 2^{\Exp (\appro x)}$. \end {proof} \subsubsection {Complex relative error} \label {sssec:comrelerror} The sign of errors is not meaningful any more in the complex case, and only the notion of total relative error carries over. \begin {definition} \label {def:comrelerror} Given a complex number $\corr z$ and its non-zero approximation $\appro z$, let \[ \theta = \frac {\corr z - \appro z}{\appro z}, \text { or } \corr z = (1 + \theta) \appro z. \] Then the {\em relative error} of $\appro z$ is \[ \relerror (\appro z) = \epsilon = | \theta | = \left| \frac {\corr z - \appro z}{\appro z} \right|. \] \end {definition} The following result gives a coarse estimate of the relative errors of the real and imaginary parts in terms of the complex relative error, and vice versa. \begin {prop} \label {prop:comrelerror} Let $\corr z = \corr x + i \corr y$, $\appro z = \appro x + i \appro y$, $\epsilon = \relerror (\appro z)$, $\epsilon_R = \relerror (\appro x)$ and $\epsilon_I = \relerror (\appro y)$. Then \begin {align*} \epsilon_R &\leq \left( 1 + 2^{\Exp (\appro y) - \Exp (\appro x) + 1} \right) \epsilon, \\ \epsilon_I &\leq \left( 1 + 2^{\Exp (\appro x) - \Exp (\appro y) + 1} \right) \epsilon, \\ \epsilon &\leq \max \left( \epsilon_R, \epsilon_I \right). \end {align*} \end {prop} \begin {proof} Write $\theta = \frac {\corr z - \appro z}{\appro z} = \theta_R + i \theta_I$. Then $\corr x - \appro x = \Re (\appro z \theta) = \appro x \theta_R - \appro y \theta_I$, and \begin {align*} \epsilon_R &= \left| \frac {\corr x - \appro x}{\appro x} \right| \leq |\theta_R| + \left| \frac {\appro y}{\appro x} \right| |\theta_I| \leq \left( 1 + \left| \frac {\appro y}{\appro x} \right| \right) \max (|\theta_R|, |\theta_I|) \\ &\leq \left( 1 + 2^{\Exp (\appro y) - \Exp (\appro x) + 1} \right) |\theta| \end {align*} by Proposition~\ref {prop:expmuldiv}. The second inequality is proved in the same way. For the converse direction, write \[ |\theta|^2 = \frac {(\corr x - \appro x)^2 + (\corr y - \appro y)^2}{\appro x^2 + \appro y^2} = \frac {\epsilon_R^2 \appro x^2 + \epsilon_I^2 \appro y^2}{\appro x^2 + \appro y^2} \leq \max \left( \epsilon_R^2, \epsilon_I^2 \right). \] \end {proof} As a corollary to Propositions~\ref {prop:relerror} and~\ref {prop:comrelerror}, we obtain the following result that shows how to transform absolute into complex relative errors. \begin {prop} \label {prop:comabstorelerror} Let $\appro z = \appro x + i \appro y$ be representable at precision~$p$. If $\error (\appro x) \leq k \, \Ulp (\appro x)$ and $\error (\appro y) \leq k \, \Ulp (\appro y)$, then \[ \relerror (\appro z) \leq k \, 2^{1-p}. \] As in Proposition~\ref {prop:relerror}, there is an immediate generalisation if $\appro z$ is not representable at precision~$p$. \end {prop} This result can be used to analyse how rounding affects complex relative errors. \begin {prop} \label {prop:comrelround} Let $\relerror (\appro z) = \epsilon$. Then \[ \relerror (\round (\appro z)) \leq \epsilon + c (1 + \epsilon) 2^{1-p}, \] where $c = \frac {1}{2}$ if both the real and the imaginary part are rounded to nearest, and $c = 1$ otherwise. \end {prop} \begin {proof} Write $\corr z = (1 + \theta) \appro z$ with $|\theta| = \epsilon$. Applying Proposition~\ref {prop:comabstorelerror} with $\appro z$ in the place of $\corr z$, $\round (\appro z)$ in the place of $\appro z$ and $c$ in the place of~$k$, there is a $\theta'$ with $\appro z = (1 + \theta') \round (\appro z)$ and $|\theta'| \leq c \, 2^{1-p}$. So $\corr z = (1 + \theta'') \round (\appro z)$ with $\theta'' = (1 + \theta)(1 + \theta') - 1 = \theta + \theta' (1 + \theta)$. \end {proof} \subsection {Real functions} In this section, we derive for later use results on error propagation for functions with real arguments and values. Those already contained in \cite{MPFRAlgorithms} are simply quoted for the sake of self-containedness. \subsubsection {Division} \label {sssec:proprealdiv} Let \[ \appro x = \frac {\appro {x_1}}{\appro {x_2}}. \] Then \[ \error (\appro x) = \left| \frac {\corr {x_1}}{\corr {x_2}} - \frac {\appro {x_1}}{\appro {x_2}} \right| = \left| \frac {\appro {x_1}}{\appro {x_2}} \right| \cdot \left| 1 - \left| \frac {\corr {x_1}}{\appro {x_1}} \right| \cdot \left| \frac {\appro {x_2}}{\corr {x_2}} \right| \right| = | \appro x | \cdot \left| 1 - \left| \frac {\corr {x_1}}{\appro {x_1}} \right| \cdot \left| \frac {\appro {x_2}}{\corr {x_2}} \right| \right| \] Using the notation introduced in Definition~\ref {def:relerror} together with the obvious subscripts to the $\epsilon$, we obtain the bounds \[ - \frac {\epsilon_1^+ + \epsilon_2^-}{1 - \epsilon_2^-} = 1 - \frac {1 + \epsilon_1^+}{1 - \epsilon_2^-} \leq 1 - \left| \frac {\corr {x_1}}{\appro {x_1}} \right| \cdot \left| \frac {\appro {x_2}}{\corr {x_2}} \right| \leq 1 - \frac {1 - \epsilon_1^-}{1 + \epsilon_2^+} = \frac {\epsilon_1^- + \epsilon_2^+}{1 + \epsilon_2^+} \] We need to make the assumption that $\epsilon_2^- < 1$, which is reasonable since otherwise the absolute error on $\appro {x_2}$ would exceed the number itself. Then \[ \error (\appro x) \leq \max \left( \frac {\epsilon_1^+ + \epsilon_2^-}{1 - \epsilon_2^-}, \frac {\epsilon_1^- + \epsilon_2^+}{1 + \epsilon_2^+} \right) |\appro x| \leq \frac {\epsilon_1 + \epsilon_2}{1 - \epsilon_2^-} 2^{\Exp (\appro x)} \] Using the estimation of the relative in terms of the absolute error of Proposition~\ref {prop:relerror}, this bound can be translated into \begin {equation} \label {eq:proprealdiv} \error (\appro x) \leq \frac {2 (k_1 + k_2)}{1 - \epsilon_2^-} 2^{\Exp (\appro x) - p} \leq \frac {2 (k_1 + k_2)}{1 - k_2 2^{1 - p}} 2^{\Exp (\appro x) - p} \end {equation} \subsubsection {Square root} \label {sssec:proprealsqrt} Let \[ \appro x = \sqrt {\appro {x_1}}. \] Then by \cite[\S1.7]{MPFRAlgorithms}, \begin {equation} \label {eq:proprealsqrt} \error (\appro x) \leq \frac {2 k_1}{1 + \sqrt {1 + \epsilon_1^-}} 2^{\Exp (\appro x) - p}. \end {equation} \subsubsection {Cosine and sine} \label {sssec:proprealcossin} Let \[ \appro x = \cos {\appro {x_1}}. \] By the mean value theorem, there is a $\xi$ between $x_1$ and $\appro {x_1}$ such that \[ \cos (x_1) - \cos (\appro {x_1}) = -\sin (\xi) (x_1 - \appro {x_1}), \] so that \[ \error (\appro x) \leq \error (\appro {x_1}). \] Taking the exponents into account, one obtains \begin {equation} \label {eq:proprealcos} \error (\appro x) \leq k \, 2^{\Exp (\appro {x_1}) - \Exp (\appro x)} \, 2^{\Exp (\appro x) - p}. \end {equation} For the sine function, a completely analogous argument shows that \eqref {eq:proprealcos} also holds. \subsubsection {Logarithm} \label {sssec:propreallog} Let \[ \appro x = \log (1 + \appro {x_1}) \] for $\appro {x_1} > -1$. By the mean value theorem, there is a $\xi$ between $x_1$ and $\appro {x_1}$ such that \[ \error (\appro x) = \frac {1}{1 + \xi} \error (\appro {x_1}) \leq \frac {1}{1 + \min (x_1, \appro {x_1})} \error (\appro {x_1}). \] For $x_1 > 0$, this implies \begin {eqnarray*} \error (\appro x) & \leq & \error (\appro {x_1}) \leq k \, 2^{\Exp (\appro {x_1}) - \Exp (\appro x)} \, 2^{\Exp (\appro x) - p} \\ & \leq & 2 \, k \, \frac {\appro {x_1}}{\appro x} \, 2^{\Exp (\appro x) - p} \\ & \leq & 2 \, k \, \frac {\appro {x_1}}{\appro {x_1} - \appro {x_1}^2/2} \, 2^{\Exp (\appro x) - p} \end {eqnarray*} using $\log (1 + z) \geq z - z^2/2$ for $z > 0$. For $0 < x_1 \leq 1$, we have $\appro {x_1}^2/2 \leq \appro {x_1}/2$ and \[ \error (\appro x) \leq 4 \, k \, 2^{\Exp (\appro x) - p}. \] \subsection {Complex functions} \subsubsection {Addition/subtraction} \label {sssec:propadd} Using the notation introduced above, we consider \[ \appro z = \appro {z_1} + \appro {z_2}. \] By \eqref {eq:properror}, we obtain \begin{align*} \error (\appro x) & \leq | (\corr {x_1} + \corr {x_2}) - (\appro {x_1} + \appro {x_2})| \\ & \leq | \corr {x_1} - \appro {x_1} | + | \corr {x_2} - \appro {x_2}| \\ & \leq k_{R,1} 2^{\Exp (\appro {x_1}) - p} + k_{R,2} 2^{\Exp (\appro {x_2}) - p} \\ & \leq \left( k_{R,1} 2^{d_{R,1}} + k_{R,2} 2^{d_{R,2}} \right) 2^{\Exp (\appro x) - p}, \end{align*} where $d_{R,n}=\Exp(\appro {x_n})-\Exp(\appro x)$. Otherwise said, the absolute errors add up, but their relative expression in terms of $\Ulp$ of the result grows if the result has a smaller exponent than the operands, that is, if cancellation occurs. If $\appro {x_1}$ and $\appro {x_2}$, have the same sign, then there is no cancellation, $d_{R, n} \leq 0$ and \[ \error (\appro x) \leq (k_{R,1} + k_{R,2}) 2^{\Exp (\appro x) - p}. \] An analogous error bound holds for the imaginary part. For subtraction, the same bounds are obtained, except that the simpler bound now holds whenever $\appro {x_1}$ and $\appro {x_2}$ resp. $\appro {y_1}$ and $\appro {y_2}$ have different signs. We obtain a useful result for complex relative errors when $z_1$ and $z_2$ lie in the same quadrant, so that cancellation occurs neither for the real nor for the imaginary part. \begin {lemma} \label {lm:arithgeom} Let $c_1 = a_1 + i b_1$ and $c_2 = a_2 + i b_2$ lie in the same quadrant, that is, $a_1 a_2$, $b_1 b_2 \geq 0$. Then \[ |c_1| + |c_2| \leq \sqrt 2 \cdot |c_1 + c_2|. \] \end {lemma} \begin {proof} One readily verifies that \[ 2 |c_1 + c_2|^2 - (|c_1| + |c_2|)^2 = (|c_1| - |c_2|)^2 + 4 (a_1 a_2 + b_1 b_2) \geq 0 \] \end {proof} Assume now that $\corr {z_n} = (1 + \theta_n) \appro {z_n}$ for $n=1,2$ with $\epsilon_n = |\theta_n|$ lie in the same quadrant. Then $\corr z = (1 + \theta) \appro z$ with \[ \theta = \frac {\theta_1 \appro {z_1} + \theta_2 \appro {z_2}} {\appro {z_1} + \appro {z_2}}. \] and \begin {equation} \label {eq:propaddrel} \relerror (\appro z) \leq \max (\epsilon_1, \epsilon_2) \frac {|\appro {z_1}| + |\appro {z_2}|}{|\appro {z_1} + \appro {z_2}|} \leq \sqrt 2 \, \max (\epsilon_1, \epsilon_2) \end {equation} by Lemma~\ref {lm:arithgeom}. \subsubsection {Multiplication} \label {sssec:propmul} Let \[ \appro z = \appro {z_1} \times \appro {z_2}, \] so that \begin {align*} \appro x & = \appro {x_1} \appro {x_2} - \appro {y_1} \appro {y_2}, \\ \appro y & = \appro {x_1} \appro {y_2} + \appro {x_2} \appro {y_1}. \end {align*} Then \[ \error (\appro x) \leq | \Re (\corr {z_1} \times \corr {z_2}) - \Re (\appro {z_1} \times \appro {z_2})| \leq | \corr {x_1} \corr {x_2} - \appro {x_1} \appro {x_2}| + | \corr {y_1} \corr {y_2} - \appro {y_1} \appro {y_2}|. \] The first term on the right hand side can be bounded as follows, where we use the short-hand notation $\epsilon_{R, 1}^+$ for $\relerror^+ (\appro {x_1})$, and analogously for other relative errors: \begin{align*} | \corr {x_1} \corr {x_2} - \appro {x_1} \appro {x_2}| & \leq \frac{1}{2} \left( |\appro {x_1} - \corr {x_1}| (|\appro {x_2}| + |\corr {x_2}|) + |\appro {x_2} - \corr {x_2}| (|\appro {x_1}| + |\corr {x_1}|) \right) \\ & \leq \frac {1}{2} \left( \epsilon_{R, 1} |\appro {x_1}| |\appro {x_2}| \left( 1 + \frac {|\corr {x_2}|}{|\appro {x_2}|} \right) + \epsilon_{R, 2} |\appro {x_2}| |\appro {x_1}| \left( 1 + \frac {|\corr {x_1}|}{|\appro {x_1}|} \right) \right) \\ & \leq \left( k_{R, 1} \left( 1 + \frac {|\corr {x_2}|}{|\appro {x_2}|} \right) + k_{R, 2} \left( 1 + \frac {|\corr {x_1}|}{|\appro {x_1}|} \right) \right) |\appro {x_1} \appro {x_2}| \, 2^{-p} \text { by Proposition~\ref {prop:relerror}} \\ & \leq \left( k_{R, 1} (2 + \epsilon_{R, 2}^+) + k_{R, 2} (2 + \epsilon_{R, 1}^+) \right) 2^{\Exp (\appro {x_1} \appro {x_2}) - p}. \end{align*} In the same way, we obtain \[ | \corr {y_1} \corr {y_2} - \appro {y_1} \appro {y_2}| \leq \left( k_{I, 1} (2 + \epsilon_{I, 2}^+) + k_{I, 2} (2 + \epsilon_{I, 1}^+) \right) 2^{\Exp (\appro {y_1} \appro {y_2}) - p}. \] It remains to estimate $\Exp (\appro {x_1} \appro {x_2})$ and $\Exp (\appro {y_1} \appro {y_2})$ with respect to $\Exp (\appro x)$ to obtain a bound in terms of $\Ulp (\appro x)$. This becomes problematic when, due to the subtraction, cancellation occurs. In all generality, let $d = \Exp (\appro {x_1} \appro {x_2}) - \Exp (\appro x) \leq \Exp (\appro {x_1}) + \Exp (\appro {x_2}) - \Exp (\appro x)$ by Proposition~\ref {prop:expmuldiv} and $d' = \Exp( \appro {y_1} \appro {y_2}) - \Exp (\appro x) \leq \Exp (\appro {y_1}) + \Exp (\appro {y_2}) - \Exp (\appro x)$. Then \begin {equation} \label {eq:propmulre} \error( \appro x) \leq \left( \left( k_{R, 1} (2 + \epsilon_{R, 2}^+) + k_{R, 2} (2 + \epsilon_{R, 1}^+) \right) 2^d + \left( k_{I, 1} (2 + \epsilon_{I, 2}^+) + k_{I, 2} (2 + \epsilon_{I, 1}^+) \right) 2^{d'} \right) 2^{\Exp (\appro x) - p}. \end {equation} If $\appro {x_1} \appro {x_2}$ and $\appro {y_1} \appro {y_2}$ have different signs, then there is no cancellation, and, using the monotonicity of the exponent with respect to the absolute value, we obtain \[ \Exp (\appro x) = \Exp (\appro {x_1} \appro {x_2} - \appro {y_1} \appro {y_2}) = \Exp (|\appro {x_1} \appro {x_2}| + |\appro {y_1} \appro {y_2}|) \geq \Exp (|\appro {x_1} \appro {x_2}|), \Exp (|\appro {y_1} \appro {y_2}|), \] so that $d$, $d' \leq 0$ and the error bound simplifies as \[ \error( \appro x) \leq \left( k_{R, 1} (2 + \epsilon_{R, 2}^+) + k_{R, 2} (2 + \epsilon_{R, 1}^+) + k_{I, 1} (2 + \epsilon_{I, 2}^+) + k_{I, 2} (2 + \epsilon_{I, 1}^+) \right) 2^{\Exp (\appro x) - p}. \] The same approach yields the error of the imaginary part. Letting $\delta = \Exp (\appro {x_1} \appro {y_2}) - \Exp (\appro y) \leq \Exp( \appro {x_1}) + \Exp (\appro {y_2}) - \Exp (\appro y)$ and $\delta' = \Exp (\appro {x_2} \appro {y_1}) - \Exp (\appro {y}) \leq \Exp (\appro {x_2}) + \Exp (\appro {y_1}) - \Exp (\appro y)$, it becomes \begin {equation} \label {eq:propmulim} \error( \appro y) \leq \left( \left( k_{R, 1} (2 + \epsilon_{I, 2}^+) + k_{I, 2} (2 + \epsilon_{R, 1}^+) \right) 2^{\delta} + \left( k_{I, 1} (2 + \epsilon_{R, 2}^+) + k_{R, 2} (2 + \epsilon_{I, 1}^+) \right) 2^{\delta'} \right) 2^{\Exp (\appro y) - p}. \end {equation} If $\appro {x_1} \appro {y_2}$ and $\appro {x_2} \appro {y_1}$ have the same sign, then $\delta$, $\delta' \leq 0$ and \[ \error( \appro y) \leq \left( k_{R, 1} (2 + \epsilon_{I, 2}^+) + k_{I, 2} (2 + \epsilon_{R, 1}^+) + k_{I, 1} (2 + \epsilon_{R, 2}^+) + k_{R, 2} (2 + \epsilon_{I, 1}^+) \right) 2^{\Exp (\appro y) - p}. \] Notice that $x_1 x_2$ and $y_1 y_2$ have the same sign if and only if $y_1 x_2$ and $x_1 y_2$ do. So there is always cancellation in precisely one of the real or the imaginary part. The different values $\epsilon_{X, n}^+$ for $X \in \{ R, I \}$ and $n \in \{ 1, 2 \}$ in the formul{\ae} above may be bounded by $k_{X, n} 2^{1 - p}$ according to Proposition~\ref {prop:relerror}. If some $|\appro {x_n}| \geq |\corr {x_n}|$ resp. $|\appro {y_n}| \geq |\corr {y_n}|$ (for instance, because they have been computed by rounding away from zero), then the corresponding $\epsilon_{X, n}^+$ are zero. A coarser bound may be obtained more easily by considering complex relative errors. Write $\corr {z_n} = (1 + \theta_n) \appro {z_n}$ with $\epsilon_n = | \theta_n |$. Then $\corr z = (1 + \theta) \appro z$ with $\theta = \theta_1 + \theta_2 + \theta_1 \theta_2$ and \begin {equation} \label {eq:propmulrel} \epsilon = \relerror (\appro z) \leq \epsilon_1 + \epsilon_2 + \epsilon_1 \epsilon_2. \end {equation} By Proposition~\ref {prop:relerror}, we have $\epsilon_{X, n} \leq k_{X, n} 2^{1-p}$ for $X \in \{ R, I \}$, and by Proposition~\ref {prop:comrelerror}, $\epsilon_n \leq \max (k_{R, n}, k_{I, n}) 2^{1 - p}$. Under normal circumstances, $\epsilon_1 \epsilon_2$ should be negligible; for instance, if we assume that $\max (k_{R, 1}, k_{I, 1}) \max (k_{R, 2}, k_{I, 2}) \leq 2^{p - 1}$, then $\epsilon_1 \epsilon_2 \leq 2^{1-p}$ and $\epsilon \leq \big( \max (k_{R, 1}, k_{I, 1}) + \max (k_{R, 2}, k_{I, 2}) + 1 \big) 2^{1 - p}$. Applying Propositions~\ref {prop:comrelerror} and~\ref {prop:relerror} in the converse direction yields \begin {equation} \label {eq:propmulcomrel} \begin {array}{rl} \error (\appro x) &\leq \big( \max (k_{R, 1}, k_{I, 1}) + \max (k_{R, 2}, k_{I, 2}) + 1 \big) \left( 2 + 2^{\Exp (\appro y) - \Exp (\appro x) + 2} \right) 2^{\Exp (\appro x) - p} \\ \error (\appro y) &\leq \big( \max (k_{R, 1}, k_{I, 1}) + \max (k_{R, 2}, k_{I, 2}) + 1 \big) \left( 2 + 2^{\Exp (\appro x) - \Exp (\appro y) + 2} \right) 2^{\Exp (\appro y) - p} \\ \end {array} \end {equation} \subsubsection {Norm} \label {sssec:propnorm} Let \[ \appro x = \Norm (\appro {z_1}) = |\appro {z_1}|^2 = \appro {x_1}^2 + \appro {y_1}^2. \] Then \[ \error (\appro x) \leq | \Norm (\corr {z_1}) - \Norm (\appro {z_1}) | \leq | \corr {x_1}^2 - \appro {x_1}^2 | + | \corr {y_1}^2 - \appro {y_1}^2 |. \] The first term can be bounded by \begin {align*} | \corr {x_1}^2 - \appro {x_1}^2 | & = |\appro {x_1}| \left| 1 + \frac {|\corr {x_1}|}{|\appro {x_1}|} \right| |\corr {x_1} - \appro {x_1}| \\ & \leq 2^{\Exp (\appro {x_1})} (2 + \epsilon_{R, 1}^+) k_{R, 1} 2^{\Exp (\appro {x_1}) - p} \\ & \leq k_{R, 1} (2 + \epsilon_{R, 1}^+) 2^{\Exp (\appro {x_1}^2) + 1 - p} \text { by Proposition~\ref {prop:expmuldiv}} \\ & \leq 2 k_{R, 1} (2 + \epsilon_{R, 1}^+) 2^{\Exp (\appro x) - p} \text { by the monotonicity of the exponent.} \end {align*} The analogous bound for the second error term yields \begin {equation} \label {eq:propnorm} \error (\appro x) \leq 2 \left( k_{R, 1} (2 + \epsilon_{R, 1}^+) + k_{I, 1} (2 + \epsilon_{I, 1}^+) \right) 2^{\Exp (\appro x) - p} \end {equation} The values $\epsilon_{X, 1}^+$ may be estimated as explained at the end of \S\ref {sssec:propmul}. We also need the relative lower error in the following. This can be obtained by writing \[ \appro {x_1}^2 - \corr {x_1}^2 = \left( 1 - \left| \frac {\corr {x_1}}{\appro {x_1}} \right|^2 \right) \cdot \appro {x_1}^2 \leq \big( 1 - (1 - \epsilon_{R, 1}^-)^2 \big) \appro {x_1}^2 = \big( \epsilon_{R, 1}^- (2 - \epsilon_{R, 1}^-) \big) \appro {x_1}^2. \] Adding the corresponding expression for the second term $\appro {x_1}^2 - \corr {x_1}^2$ yields \begin {equation} \label {eq:propnormepsminus} \frac {\appro x - \corr x}{\appro x} \leq \max \big( \epsilon_{R, 1}^- (2 - \epsilon_{R, 1}^-), \epsilon_{I, 1}^- (2 - \epsilon_{I, 1}^-) \big) =: \epsilon^-, \end {equation} and under the assumption that $\epsilon^- \geq 0$, inspection of Definition~\ref {def:relerror} shows that $\epsilon^- \geq \relerror^- (\appro x)$ since $\appro x$ and $\corr x$ are positive. The converse estimation yields \begin {equation} \label {eq:propnormepsplus} \relerror^+ (\appro x) \leq \epsilon^+ := \frac {\appro x - \corr x}{\appro x} \leq \max \big( \epsilon_{R, 1}^+ (2 + \epsilon_{R, 1}^+), \epsilon_{I, 1}^+ (2 + \epsilon_{I, 1}^+) \big) \end {equation} and $\relerror (\appro x) \leq \epsilon := \max (\epsilon^-, \epsilon^+)$. Letting $\epsilon_1 = \max ( \epsilon_{R, 1}^-, \epsilon_{R, 1}^+, \epsilon_{I, 1}^-, \epsilon_{I, 1}^+ ) = \max ( \epsilon_{R, 1}, \epsilon_{I, 1} )$ and $k_1 = \max ( k_{R, 1}, k_{I, 1})$, we have $\epsilon \leq \epsilon_1 (2 + \epsilon_1) \leq 2 k_1 (2 + \epsilon_1) 2^{-p}$ by Proposition~\ref {prop:relerror}. We obtain an alternative expression for the absolute error as \begin {equation} \label {eq:propnormalt} \error (\appro x) \leq \epsilon \appro x \leq 2 k_1 (2 + \epsilon_1) 2^{\Exp (\appro x) - p} \end {equation} \subsubsection {Division} \label{sssec:propdiv} Let \[ \appro z = \frac {\appro {z_1}}{\appro {z_2}} = \frac {\appro {z_1} \overline {\appro {z_2}}}{\Norm (\appro {z_2})}. \] Then the propagated error may be derived by cumulating the errors obtained for multiplication in \S\ref {sssec:propmul}, the norm in \S\ref {sssec:propnorm} and the division by a real in \S\ref {sssec:proprealdiv}. Let $\appro {z_3} = \appro {z_1} \overline {\appro {z_2}} = \appro {x_3} + i \appro {y_3}$, $d = \Exp (\appro {x_1} \appro {x_2}) - \Exp (\appro {x_3})$ and $d' = \Exp (\appro {y_1} \appro {y_2}) - \Exp (\appro {x_3})$. Then \eqref {eq:propmulre} applies and yields $\error (\appro {x_3}) \leq k_{R, 3} 2^{\Exp (\appro {x_3}) - p}$ with \[ k_{R, 3} = \left( k_{R, 1} (2 + \epsilon_{R, 2}^+) + k_{R, 2} (2 + \epsilon_{R, 1}^+) \right) 2^d + \left( k_{I, 1} (2 + \epsilon_{I, 2}^+) + k_{I, 2} (2 + \epsilon_{I, 1}^+) \right) 2^{d'}. \] We then apply \eqref {eq:propnorm} and \eqref {eq:propnormepsminus} to $\appro {x_4} = \Norm (\appro {z_2})$ to obtain $\error (\appro {x_4}) \leq k_4 2^{\Exp (\appro {x_4}) - p}$ with \[ k_4 = 2 \left( k_{R, 2} (2 + \epsilon_{R, 2}^+) + k_{I, 2} (2 + \epsilon_{I, 2}^+) \right) \] and \[ \relerror (\appro {x_4}) = \epsilon_4^- \leq \max \big( \epsilon_{R, 2}^- (2 - \epsilon_{R, 2}^-), \epsilon_{I, 2}^- (2 - \epsilon_{I, 2}^-) \big). \] Now \eqref {eq:proprealdiv} shows that \begin {equation} \label {eq:propdivre} \error (\appro x) \leq \frac {2 (k_{R, 3} + k_4)}{1 - \epsilon_4^-} 2^{\Exp (\appro x) - p}. \end {equation} As written above, $d$ and $d'$ depend not only on the input and output, but also on the intermediate value $\appro {x_3}$. But noting that $ \appro {x_3} = \appro x \Norm (\appro {z_2}) \geq \appro x 2^{2 \max (\Exp (\appro {x_2}, \Exp (\appro {y_2})) - 2}, $ so that \begin {align*} d &\leq \Exp (\appro {x_1} \appro {x_2}) - \Exp (\appro x) - 2 \max (\Exp (\appro {x_2}, \Exp (\appro {y_2})) + 2, \\ d' &\leq \Exp (\appro {y_1} \appro {y_2}) - \Exp (\appro x) - 2 \max (\Exp (\appro {x_2}, \Exp (\appro {y_2})) + 2, \end {align*} yields a bound \eqref {eq:propdivre} that is independent of intermediate values of the computation. The error in the imaginary part is computed in the same way as \begin {equation} \label {eq:propdivim} \error (\appro y) \leq \frac {2 (k_{I, 3} + k_4)}{1 - \epsilon_4^-} 2^{\Exp (\appro y) - p}. \end {equation} with \begin {align*} k_{I, 3} &= \left( k_{R, 1} (2 + \epsilon_{I, 2}^+) + k_{I, 2} (2 + \epsilon_{R, 1}^+) \right) 2^\delta + \left( k_{I, 1} (2 + \epsilon_{R, 2}^+) + k_{R, 2} (2 + \epsilon_{I, 1}^+) \right) 2^{\delta'} \\ \delta &\leq \Exp (\appro {x_1} \appro {y_2}) - \Exp (\appro y) - 2 \max (\Exp (\appro {x_2}, \Exp (\appro {y_2})) + 2 \\ \delta' &\leq \Exp (\appro {x_2} \appro {y_1}) - \Exp (\appro y) - 2 \max (\Exp (\appro {x_2}, \Exp (\appro {y_2})) + 2 \end {align*} As for the multiplication, a coarser bound may be obtained more easily using complex relative errors. Let $\corr {z_n} = (1 + \theta_n) \appro {z_n}$ with $\epsilon_n = | \theta_n |$. Then $\corr z = (1 + \theta_n) \appro z$ with \[ \theta = \frac {1 + \theta_1}{1 + \theta_2} - 1 = (\theta_1 - \theta_2) \sum_{k = 0}^\infty (- \theta_2)^k \text { and } \epsilon \leq (|\theta_1| + |\theta_2|) \sum_{k = 0}^\infty |\theta_2|^k. \] Using the same notation and assumptions as at the end of \S\ref {sssec:propmul}, in particular that all higher order error terms (involving $\epsilon_1^2$, $\epsilon_2^2$, $\epsilon_1 \epsilon_2$ or higher powers) are absorbed by $2^{\Exp (\appro x) - p}$, we find the exact same error estimate \eqref {eq:propmulcomrel} also for the case of division. \subsubsection {Square root} Let \[ \appro z = \sqrt {\appro {z_1}}. \] Write $\corr {z_1} = (1 + \theta_1) \appro {z_1}$ with $\epsilon_1 = |\theta_1|$, and assume $\epsilon_1 < 1$. Then $\corr z = (1 + \theta) \appro z$ with \[ \theta = \sqrt {1 + \theta_1} - 1 = \frac {1}{2} \theta_1 + \sum_{n=2}^\infty \frac {(-1)^{n+1} 1 \cdot 3 \cdots (2 n - 3)}{2^n \, n!} \theta_1^n \] as a Taylor series, and \[ \epsilon = |\theta| \leq \frac {1}{2} \epsilon_1 + \sum_{n=2}^\infty \frac {1 \cdot 3 \cdots (2 n - 3)}{2^n \, n!} \epsilon_1^n = 1 - \sqrt {1 - \epsilon_1}. \] By the mean value theorem, applied to the function $f (x) = \sqrt {1 - x}$, there is $0 < \xi < \epsilon_1$ with \begin {equation} \label {eq:propsqrt} \epsilon = \frac {1}{2 \sqrt {1 - \xi}} \, \epsilon_1 \leq \frac {1}{2 \sqrt {1 - \epsilon_1}} \, \epsilon_1. \end {equation} For instance $\epsilon \leq \epsilon_1$ for $\epsilon_1 \leq \frac {3}{4}$. We even have $\epsilon \leq \epsilon_1$ for $\epsilon_1 \leq 1$, since $1 - \sqrt{1-x}$ is bounded by $x$ for $0 < x \leq 1$, which comes from $1 - x \leq \sqrt{1-x}$. \subsubsection {Logarithm} \section {Algorithms} This section describes in detail the algorithms used in \mpc, together with the error analysis that allows to prove that the results are correct in the {\mpc} semantics: The input numbers are assumed to be exact, and the output corresponds to the exact result rounded in the desired direction. \subsection {\texttt {mpc\_sqrt}} The following algorithm is due to Friedland \cite{Friedland67,Smith98}. Let $z = x + i y$. Let $w = \sqrt { \frac {|x| + \sqrt {x^2 + y^2}}{2}}$ and $t = \frac {y}{2w}$. Then $(w + it)^2 = |x| + iy$, and with the branch cut on the negative real axis we obtain \[ \sqrt z = \left\{ \begin {array}{cl} w + i t & \text {if } x > 0 \\ t + i w & \text {if } x < 0, y > 0 \\ -t - i w & \text {if } x < 0, y < 0 \end {array} \right. \] We compute $w$ rounded down and thus round down all intermediate results. $\sqrt {x^2 + y^2}$ is computed with an error of \ulp{1} by a call to \texttt {mpc\_abs}; $|x|$ is added with an error of \ulp{1}, since both terms are positive; division by~$2$ is free of error. So $w^2$ is computed with a cumulated error of \ulp{2}. This error of \ulp{2} propagates as is through the real square root: Since we rounded down the argument, we have $\epsilon_1^- = 0$ in \eqref {eq:proprealsqrt}; an error of \ulp{1} needs to be added for the rounding of $w$, so that the total error is \ulp{3}. $t$ is rounded away. Plugging the error of \ulp{3} for $w$ and \ulp{0} for $y$ into \eqref {eq:proprealdiv} shows that the propagated error of real division is \ulp{6}, to which an additional rounding error of \ulp{1} has to be added for a total error of \ulp{7}. \subsection {\texttt {mpc\_log}} Let $z = x + i y$. Then $\log (z) = \frac {1}{2} \log (x^2 + y^2) + i \atantwo (y, x)$. The imaginary part is computed by a call to the corresponding {\mpfr} function. Let $w = \log (x^2 + y^2)$, rounded down. The error of the complex norm is \ulp{1}. The generic error of the real logarithm is then given by \ulp{$2^{2 - e_w} + 1$}, where $e_w$ is the exponent of $w$. For $e_w \geq 2$, this is bounded by \ulp{2} or 2~digits; otherwise, it is bounded by \ulp{$2^{3 - e_w}$} or $3 - e_w$ digits. \subsection {\texttt {mpc\_tan}} Let $z = x + i y$ with $x \neq 0$ and $y \neq 0$. We compute $\tan z$ as follows: \begin{align*} u &\leftarrow \A(\sin z) &\error(\Re(u)) &\leq 1 \Ulp(\Re(u)) &\error(\Im(u)) &\leq 1 \Ulp(\Im(u)) \\ v &\leftarrow \A(\cos z) &\error(\Re(v)) &\leq 1 \Ulp(\Re(v)) &\error(\Im(v)) &\leq 1 \Ulp(\Im(v)) \\ t &\leftarrow \A(u/v) &\error(\Re(t)) &\leq k_R \Ulp(\Re(t)) &\error(\Im(t)) &\leq k_I \Ulp(\Im(t)) \end{align*} where $w_2 \leftarrow \A(w_1)$ means that the real and imaginary parts of $w_2$ are respectively the real and imaginary part of $w_1$ rounded away from zero to the working precision. We know that $\Re(\frac{a+i b}{c+i d})=\frac{a c +b d}{c^2 + d^2}$ and $\Im(\frac{a+i b}{c+i d})=\frac{a d -b c}{c^2 + d^2}$, so in the special case of $\tan z=\frac{\sin x\cosh y+i\cos x\sinh y}{\cos x\cosh y-i\sin x\sinh y}$, we have $abcd < 0$ which means that there might be a cancellation in the computation of the real part while it does never happen in the one of the imaginary part. Then, using the generic error of the division (see \ref{sssec:propdiv}), we have \begin{align*} \error(\Re(t)) &\leq [1+2^{3+e_1}+2^{3+e_2}+2^6] \Ulp(\Re(t)), \\ \error(\Im(t)) &\leq [1+2^3+2^3+2^6] \Ulp(\Im(t)), \end{align*} where $e_1=\Exp(a c) -\Exp(a c+b d)$ and $e_2=\Exp(b d) -\Exp(a c+b d)$. The second inequality shows that $2^7$ is suitable choice for $k_I$. As $|\sinh y|<\cosh y$ for every nonzero $y$, we have $bd0}$, $m$ odd and $e \in \Z$. Then $x^{m 2^e}$ is a dyadic complex if and only if $x^{2^e}$ is. \end{lemma} \begin{proof} Notice that by Proposition~\ref {prop:dyadic} the set of dyadics forms a ring, whence any positive integral power of a dyadic is again dyadic. Thus if $x^{2^e}$ is dyadic, then so is $x^{m 2^e}$. Conversely, assume that $x^{m 2^e}$ is dyadic. If $e \geq 0$, then $x^{2^e}$ is dyadic independently of the assumption, and it remains to consider the case $e < 0$. Write $x = i^u \prod_{k \geq 0} p_k^{\alpha_k}$ and $z = x^{m 2^e} = i^v \prod_k p_k^{\beta_k}$, so that $x^m = z^{2^{|e|}}$. The uniqueness of the prime decomposition implies that $m \alpha_k = 2^{|e|} \beta_k$, and since $m$ is odd, $2^{|e|}$ must divide $\alpha_k$. Then $x^{2^e} = i^w \prod_k p_k^{\gamma_k}$ with $w \equiv m^{-1} v \pmod 4$ and $\gamma_k = \frac {\alpha_k}{2^{|e|}}$. Now $\alpha_k \geq 0$ for $k \geq 1$ implies $\gamma_k \geq 0$ for $k \geq 1$, and $x^{2^e}$ is dyadic by Proposition~\ref {prop:dyadic}. \end{proof} It remains to decide when $x^{2^e}$ is dyadic for $x$ dyadic. If $e \geq 0$, this is trivially the case. For $e < 0$, the question boils down to whether it is possible to take $e$ successive square roots of $x$; as soon as the process fails, it is clear that $x^{2^e}$ cannot be dyadic. \begin{lemma} \label {lm:sqrtrat} Let $x \in \Q (i)$, and write $x = (a + b i)^2$ with $a$, $b \in \R$. Then either both of $a$ and $b$ are rational, or none of them is. \end{lemma} \begin{proof} Assume that one of $a$ and $b$ is rational. Then $\Im x = 2 a b \in \Q$ implies that also the other one is rational. \end{proof} \begin{lemma} Let $x$ be dyadic, and write $x = (a + b i)^2$ with $a$, $b \in \R$. Then either both of $a$ and $b$ are dyadic reals, or none of them is. \end{lemma} \begin{proof} Assume that one of $a$ and $b$ is a dyadic real, that is, a rational with a power of~$2$ as denominator. Then $a$, $b \in \Q$ by Lemma~\ref {lm:sqrtrat}. Now, $\Re x = a^2 - b^2$ implies that also the square of the \textit {a priori} not dyadic coefficient $a$ or $b$, and thus the coefficient itself, has as denominator a power of~$2$. \end{proof} \begin {theorem} Let $x = m 2^e$ and $y = n 2^f$ be dyadic complex numbers with $m$ and $n$ odd, and let $z = x^y$. Call the pair $(x, y)$ {\em exceptional} if at least one of $\Re z$ or $\Im z$ is a dyadic real. Exceptional pairs occur only in the following cases: \begin {enumerate} \item $y = 0$; then $z = 1$ \item $x \geq 0$ and $y \neq 0$ are real; then $\Im z = 0$, and the question whether $\Re z = x^y$ is dyadic involves only real numbers and can thus be delegated to \mpfr. \item $x < 0$ and $y \neq 0$ are real. \begin {enumerate} \item $y \in \Z$; then $\Im z = 0$, and $\Re z = x^y$ is dyadic if and only if $y > 0$, or $y < 0$ and $-m = 1$. \item $y \in \frac {1}{2} \Z \backslash \Z$, that is, $f = -1$; then $\Re z = 0$, and $\Im z = (-x)^y$ is dyadic if and only if $e$ is even, $-m$ is a square, and, in case $y < 0$, $-m = 1$. \item $y \in \frac {1}{4} \Z \backslash \frac {1}{2} \Z$, that is, $f = -2$; then $z = \frac {1 + i}{\sqrt 2} (-x)^y$ has both real and imaginary dyadic parts if and only if $e \equiv 2 \pmod 4$, $-m$ is a fourth power, and, in case $y < 0$, $-m = 1$. \end {enumerate} \item $y$ not real; see Conjecture~\ref {conj} \item $y > 0$ real, $x$ not real; see above \item $y < 0$ real, $x$ not real; still to do \end {enumerate} \end {theorem} \begin {proof} \begin {enumerate} \item Clear by definition. \item Clear. \item The first two subcases $f \geq -1$ follow from the observation that $x^y = (-1)^y (-x)^y$, where $(-1)^y \in \langle i \rangle$. For $y > 0$, the number $(-x)^y$ is dyadic if and only if $(-x)^{2^f}$ is, which leads to the result; for $y < 0$, one furthemore needs that $(-m)^{-1}$ is dyadic, which for $m$ odd is only possible if $-m = 1$. The third subcase $f = -2$ is similar, but one needs that $(-x)^y$ is dyadic up to a factor of $\sqrt 2$. We proceed to show that for $f \leq -3$, there is no exceptional pair. Suppose that $(x, y)$ is an exceptional pair; by switching to $\left( x^{|n|}, \frac {y}{|n|} \right)$, we may assume without loss of generality that $|n| = 1$. Then $x^y$ is obtained by taking $|f|$ successive square roots of either $x$ or $\frac {1}{x}$, both of which are elements of $\Q (i)$. Lemma~\ref {lm:sqrtrat} implies that both $\Re (x^y)$ and $\Im (x^y)$ are rational. Write $x^y = \alpha \zeta = \alpha \zeta_r + i \alpha \zeta_i$, where $\alpha = (-x)^y \in \R$ and $\zeta = \zeta_r + i \zeta_i$ is a primitive root of unity of order~$2^{|f| + 1}$. Then $\alpha \zeta_r$, $\alpha \zeta_i \in \Q$ implies $\zeta \in \Q (i, \alpha)$. Moreover, $\alpha^2 = \alpha^2 (\zeta_r^2 + \zeta_i^2) = (\alpha \zeta_r)^2 + (\alpha \zeta_i)^2 \in \Q (i)$, so that $\Q (i, \alpha)$ is an extension of degree at most~$4$ of $\Q$ containing $\Q (\zeta)$ and thus a primitive $16$-th root of unity, which is impossible. \item \item \item \end {enumerate} \end {proof} \paragraph{Sign of zeroes.} When the output value has a zero real or imaginary part, its sign should be decided, which is not always possible if we want it to be consistent with the formula $x^y = \exp(y\log x)$ (in the following, we exclude $0^y$). Let $x_1$, $x_2$, $y_1$, and $y_2$ real numbers so that $x = x_1 + x_2 i$ and $y = y_1 + y_2 i$. Let $\phi \in [-\pi, +\pi]$ the argument of $x = |x| e^{i\phi}$, with the convention that when $x_1 < 0$ the argument of $x$ is $+\pi$ if $x_2 = +0$ and $-\pi$ if $x_2 = -0$. Then \[ x^y=\exp\left(A(x,y)\right) \left(\cos B(x,y)+\sin B(x,y) i\right) \] where \begin {align*} A(x,y) & = y_1\log|x|-y_2\phi,\\ B(x,y) & = y_2\log|x|+y_1\phi. \end {align*} As $|x^y| = \exp\left(A(x,y)\right)$ is positive, the value of $B(x,y)$ determines the sign of each part of $x^y$. Note that $A(\overline{x},y) = A(x,\overline{y})$ and $B(\overline{x}, y)=-B(x,\overline{y})$, so $\overline{x}^y = \overline{x^{\overline{y}}}$ and we can restrict the study below to $x$ with nonnegative imaginary value (i.e., $x_2 \geq 0$ and $\pi \geq \phi \geq 0$). To determine the sign of the zero part of $x^y$ when it is pure real or pure imaginary, special study is needed around points $(x, y)$ where $B(x, y)$ is a multiple of $\pi/2$. Let \begin {equation} \label {eqn:Bk} B_k(x, y) = y_2 \log|x| +y_1\phi -k\frac{\pi}{2} \end {equation} where $k$ is an integer and let $S_k$ the set of points $(x, y)$ where $B_k(x, y) = 0$. For any integer $k$, we assume that the surface $S_k$ is orientable and not reduced to a single point, then each neighborhood of a point $(x_0, y_0)$ of $S_k$ intersects the region where $B(x, y) > k\pi/2$ and the region where $B(x, y) < k\pi/2$. Thus for an even $k$, we can make $(x, y)$ tend continuously to $(x_0, y_0)$ so that $\Re(x^y) > 0$ or we can make it tend to $(x_0, y_0)$ so that $\Re(x^y) < 0$ (the same applies with $k$ odd and $\Im(x^y)$). In such cases, the sign of the zero part of $x_0^{y_0}$ is not determined. However, when $S_k$ intersects an axis, for example when $\Re(x_0) = 0$, we have to distinguish two cases: $\Re(x_0) = +0$ and $\Re(x_0) = -0$. Then, if $Q_{x_0}$ (resp. $Q_{y_0}$) denotes the quadrant where $x_0$ (resp. $y_0$) lies, it is possible that the sign of $B_k(x,y)$ remains constant for $(x,y)$ in the intersection $I$ of a neighborhood of $(x_0, y_0)$ with $Q_{x_0}\times Q_{y_0}$, determining the sign of the zero part of $x^y$. Let $dB_k(x,y)$ be the derivative of $B_k(x, y)$, we have \begin {equation} \label {eqn:BkDerivative} dB_k(x, y)\cdot(\delta_1, \delta_2, \epsilon_1, \epsilon_2) = \frac{x_1y_2-x_2y_1}{x_1^2+x_2^2}\delta_1 + \frac{x_1y_1+x_2y_2}{x_1^2+x_2^2}\delta_2 + \phi\epsilon_1 + \log|x| \epsilon_2 \end {equation} If $dB_k(x_0, y_0)\cdot(\delta_1, \delta_2, \epsilon_1, \epsilon_2)$ is not zero and if its sign remains constant for all real numbers $\delta_1$, $\delta_2$, $\epsilon_1$, and $\epsilon_2$ so that $x = x_0 + \delta_1 + \delta_2i$, $y = y_0 +\epsilon_1 + \epsilon_2i$, and $(x,y)$ is in the given neighborhood $I$ of $(x_0, y_0)$ defined above, then the sign of $dB_k(x_0, y_0)\cdot(\delta_1, \delta_2, \epsilon_1, \epsilon_2)$ determines the sign of the zero part of $x^y$. In following discussion, we write $B_k(x_1, x_2, y_1, y_2) := B_k(x_1+x_2i, y_1+y_2i)$, as a function of four real arguments. Let $\sigma_1 = -1,+1$ (resp. $\sigma_2$, $\rho_1$, $\rho_2$) denote the sign of $x_1$ (resp. $x_2$, $y_1$, $y_2$). \begin {enumerate} \item Case $B_k(\sigma_1 0, x_2, y_1, y_2)=0$ for $x_2 > 0$. Here $\phi = +\frac{\pi}{2}$. \begin {enumerate} \item if $y_2=\rho_2 0$, then replacing $\phi$ and $y_2$ by their value in (\ref{eqn:Bk}), we have $y_1= k$ and (\ref{eqn:BkDerivative}) gives \[ dB_k(\sigma_1 0, x_2, k, \rho_2 0)\cdot(\delta_1, \delta_2, \epsilon_1, \epsilon_2) = - \frac{k}{x_2} \delta_1 + 0 \delta_2 + \frac{\pi}{2} \epsilon_1 + \log(x_2) \epsilon_2 \] where $\sigma_1 \delta_1 > 0$, and $\rho_2 \epsilon_2 > 0$ so that $\delta_1 + (x_2 + \delta_2)i$ (resp. $k+\epsilon_1 + \epsilon_2 i$) is in the same quadrant $Q_{x_0}$ (resp. $Q_{y_0}$) as $x_0=\sigma_1 0 +x_2 i$ (resp. $y_0=k +\rho_2 0i$). When $k \neq 0$, because, in the last expression, $\epsilon_1$ would take positive as well as negative values, so the sign of $\frac{\pi}{2} \epsilon_1$, and therefore the sign of $dB_k(x, y)\cdot (\delta_1, \delta_2, \epsilon_1, \epsilon_2)$ is not constant. If $k=0$, then $y_1=\rho_1 0$ with $\rho_1 \epsilon_1 > 0$. In this case, \begin {align*} dB_0(\pm 0, x_2, +0, +0)\cdot(\delta_1, \delta_2, \epsilon_1, \epsilon_2) &> 0 \;\text{if}\; x_2 \geq 1 \\ dB_0(\pm 0, x_2, +0, -0)\cdot(\delta_1, \delta_2, \epsilon_1, \epsilon_2) &> 0 \;\text{if}\; 1 \geq x_2 > 0 \\ dB_0(\pm 0, x_2, -0, -0)\cdot(\delta_1, \delta_2, \epsilon_1, \epsilon_2) &< 0 \;\text{if}\; x_2 \geq 1 \\ dB_0(\pm 0, x_2, -0, +0)\cdot(\delta_1, \delta_2, \epsilon_1, \epsilon_2) &< 0 \;\text{if}\; 1 \geq x_2 > 0 \end {align*} and the sign of $dB_k(\sigma_1 0, x_2, \rho_1 0, \rho_2 0)\cdot(\delta_1, \delta_2, \epsilon_1, \epsilon_2)$ is not constant in all other combinations of $k$, $\sigma_1$, $x_2>0$, $\rho_1$, and $\rho_2$. \item if $y_2\neq 0$, then from (\ref{eqn:Bk}) \[ x_2 = \exp\left(\frac{k-y_1}{2y_2}\pi\right). \] But the number in the right hand side of the last equation is known to be transcendental unless $k-y_1=0$. As $x_2$ is dyadic, we have $y_1= k$ and $x_2=+1$. Using (\ref{eqn:BkDerivative}), we write \[ dB_k(\sigma_1 0, +1, k, y_2)\cdot(\delta_1, \delta_2, \epsilon_1, \epsilon_2) = - k \delta_1 + y_2 \delta_2 + \frac{\pi}{2} \epsilon_1 + 0 \epsilon_2 \] where $\sigma_1 \delta_1 > 0$. Here, $\delta_2$ can take negative and positive values preventing the sign of $dB_k(\sigma_1 0, +1, k, y_2)\cdot (\delta_1, \delta_2, \epsilon_1, \epsilon_2)$ from being constant. \end {enumerate} \item Case $B_k(x_1, +0, y_1, y_2)=0$ with $x_1 \neq 0$. (Remember we assumed $x_2 \geq 0$.] \begin {enumerate} \item if $x_1 >0$, then $\phi = +0$. From (\ref{eqn:Bk}), we have \[ y_2 \log x_1 = k \frac{\pi}{2}. \] \begin {enumerate} \item if $y_2 = \rho_2 0$, the last equation implies $k = 0$, and from (\ref{eqn:BkDerivative}) we have \[ dB_0(x_1, +0, y_1, \rho_2 0)\cdot(\delta_1, \delta_2, \epsilon_1, \epsilon_2) = 0 \delta_1 + \frac{y_1}{x_1} \delta_2 + 0 \epsilon_1 + \log(x_1) \epsilon_2 \] with $\delta_2 > 0$ and $\rho_2 \epsilon_2 > 0$. The sign of the last expression in constant only in the following cases, \begin {align*} dB_0(x_1, +0, y_1, +0)\cdot(\delta_1, \delta_2, \epsilon_1, \epsilon_2) &> 0 \;\text{if}\; y_1 \geq 0 \;\text{and}\; x_1 > 1\\ dB_0(+1, +0, y_1, \pm 0)\cdot(\delta_1, \delta_2, \epsilon_1, \epsilon_2) &> 0 \;\text{if}\; y_1 > 0\\ dB_0(x_1, +0, y_1, -0)\cdot(\delta_1, \delta_2, \epsilon_1, \epsilon_2) &> 0 \;\text{if}\; y_1 \geq 0 \;\text{and}\; 1 > x_1 > 0 \\ dB_0(x_1, +0, y_1, -0)\cdot(\delta_1, \delta_2, \epsilon_1, \epsilon_2) &< 0 \;\text{if}\; y_1 \leq 0 \;\text{and}\; x_1 > 1\\ dB_0(+1, +0, y_1, \pm 0)\cdot(\delta_1, \delta_2, \epsilon_1, \epsilon_2) &< 0 \;\text{if}\; y_1 < 0\\ dB_0(x_1, +0, y_1, +0)\cdot(\delta_1, \delta_2, \epsilon_1, \epsilon_2) &< 0 \;\text{if}\; y_1 \leq 0 \;\text{and}\; 1 > x_1 > 0. \end {align*} Notice that we cannot conclude when $dB_0(x,y)$ is identically zero, so the cases $x=+1+0i$, $y=y_1 \pm0i$ cannot be determined by this means. \item If $y_2 \neq 0$, then $x_1 = \exp\left(\frac{k}{2y_2}\pi\right)$ is a dyadic number only if $k=0$ and then $x_1=1$. We have, using (\ref{eqn:BkDerivative}), \[ dB_0(+1, +0, y_1, y_2)\cdot(\delta_1, \delta_2, \epsilon_1, \epsilon_2) = y_2 \delta_1 + y_1 \delta_2 + 0 \epsilon_1 + 0 \epsilon_2 \] with $\delta_2 > 0$. Here $y_2 \delta_1$ can take negative as well as positive values preventing the sign of $dB_0(+1, +0, y_1, y_2)\cdot(\delta_1, \delta_2, \epsilon_1, \epsilon_2)$ from being constant. \end {enumerate} \item if $x_1 < 0$, then $\phi = \pi$. Using (\ref{eqn:BkDerivative}), we have \[ dB_k(x_1, +0, y_1, y_2)\cdot(\delta_1, \delta_2, \epsilon_1, \epsilon_2) = \frac{y_2}{x_1} \delta_1 + \frac{y_1}{x_1} \delta_2 + \pi \epsilon_1 + \log(-x_1) \epsilon_2 \] with $\delta_2 > 0$. If $y_1 \neq 0$, then $\epsilon_1$ can take negative as well as positive values, preventing $dB_k$ from having a constant sign. Assume thus $y_1 = 0$. As $x_1 \neq 0$, $\delta_1$ can also take negative and positive values, and $-y_2 \delta_1$ does not have a constant sign unless $y_2 = 0$. But from \ref {eqn:Bk}, we know that $B_k(x, 0)=0$ implies $k = 0$ since $x_1 = - \exp(\frac{k \pi}{2 y_2})$ is dyadic. When $y = 0$, the derivative of $B_k$ is \[ dB_0(x_1, +0, \rho_1 0, \rho_2 0)\cdot(\delta_1, \delta_2, \epsilon_1, \epsilon_2) = 0 \delta_1 + 0 \delta_2 + \pi \epsilon_1 + \log(-x_1) \epsilon_2 \] with $\delta_2 >0$, $\rho_1 \epsilon_1 >0$ and $\rho_2 \epsilon_2 >0$. Then, \begin {align*} dB_0(x_1, +0, +0, +0)\cdot(\delta_1, \delta_2, \epsilon_1, \epsilon_2) &> 0 \;\text{if}\; x_1 \leq -1 \\ dB_0(x_1, +0, +0, -0)\cdot(\delta_1, \delta_2, \epsilon_1, \epsilon_2) &> 0 \;\text{if}\; -1 \leq x_1 < 0 \\ dB_0(x_1, +0, -0, -0)\cdot(\delta_1, \delta_2, \epsilon_1, \epsilon_2) &< 0 \;\text{if}\; x_1 \leq -1\\ dB_0(x_1, +0, -0, +0)\cdot(\delta_1, \delta_2, \epsilon_1, \epsilon_2) &< 0 \;\text{if}\; -1 \leq x_1 < 0 \end {align*} and the sign of $dB_k(x_1, +0, \rho_1 0, \rho_2 0)\cdot(\delta_1, \delta_2, \epsilon_1, \epsilon_2)$ is not constant in all other combinations of $k$, $x_1<0$, $\rho_1$, and $\rho_2$. \end {enumerate} \item Case $B_k(x_1, x_2, \rho_1 0, y_2)=0$ for $x_2 \geq 0$. Here, we have \[ y_2\log|x|-k\frac{\pi}{2} = 0. \] \begin{enumerate} \item If $y_2=0$, then from the last equation $k=0$. Using (\ref{eqn:BkDerivative}) \[ dB_0(x_1,x_2,\rho_10,\rho_20)\cdot(\delta_1, \delta_2, \epsilon_1, \epsilon_2) = 0\delta_1 + 0\delta_2 + \phi \epsilon_1 +\log|x| \epsilon_2 \] where $\rho_1 \epsilon_1 > 0$ and $\rho_2 \epsilon_2 > 0$. The case $\phi = 0$, that is $x_1 > 0$ and $x_2 = 0$, has already been processed above in Case (b)(i). Let $\phi > 0$, then $x_2$ is not zero or $x_1 < 0$. Using the expression of the derivative given above, we have \begin{align*} dB_0(x_1, x_2, +0, +0)\cdot(\delta_1, \delta_2, \epsilon_1, \epsilon_2) &> 0 \;\text{if}\; |x| \geq 1 \;\text{and}\; \pi \geq \phi > 0\\ dB_0(x_1, x_2, -0, -0)\cdot(\delta_1, \delta_2, \epsilon_1, \epsilon_2) &< 0 \;\text{if}\; |x| \geq 1 \;\text{and}\; \pi \geq \phi > 0\\ dB_0(x_1, x_2, +0, -0)\cdot(\delta_1, \delta_2, \epsilon_1, \epsilon_2) &> 0 \;\text{if}\; 1 \geq |x| > 0 \;\text{and}\; \pi \geq \phi > 0\\ dB_0(x_1, x_2, -0, +0)\cdot(\delta_1, \delta_2, \epsilon_1, \epsilon_2) &< 0 \;\text{if}\; 1 \geq |x| > 0 \;\text{and}\; \pi \geq \phi > 0 \end{align*} \item If $y_2 \neq 0$, from (\ref{eqn:Bk}), we have \[ |x|=\exp\left(\frac{k\pi}{2y_2}\right). \] The right hand side of the equation is known to be transcendental unless $k=0$. As the left hand side is dyadic, we have $k=0$, and then $|x|=1$. Here, (\ref{eqn:BkDerivative}) gives \[ dB_0(x_1,x_2,\rho_10,y_2)\cdot(\delta_1, \delta_2, \epsilon_1, \epsilon_2) = x_1y_2\delta_1 + x_2y_2\delta_2 + \phi \epsilon_1 + 0\epsilon_2 \] with $\rho_1 \epsilon_1 > 0$. If $x_2 \neq 0$, then $x_2y_2\delta_2$ can take negative as well as positive values and the sign of the derivative is not constant. Assume thus $x_2 = 0$. Then, $x_1=\pm 1$ because $|x|=1$, and $x_1y_2\delta_1$ can take negative and positive values. So, the sign of the zero part of $B_k(x_1, x_2, \pm 0, y_2)$ (if any) cannot be determined for all integers $k$ and for all dyadic real numbers $x_1$, $x_2$, and $y_2 \neq 0$. \end{enumerate} \item If $B_k(x_1, x_2, y_1, \rho_2 0)=0$ for $x_2 \geq 0$. The case $y_1=0$ has already been precessed above in Case (c). Let $y_1 \neq 0$, then from (\ref{eqn:Bk}) we have \[ \phi = \frac{k}{2y_1}\pi \] which implies that the argument $\phi$ of $x$ can be written as $r \pi$ for some rational number $r$ and, in the same time, $\cos^2 \phi = x_1^2/|x|^2$ and $\sin^2 \phi = x_2^2/|x|^2$ are rational. The five only possibilities (with $x_2 \geq 0$) are:\footnote{This is wrong: consider $\phi = \pi/3$, with $\cos \phi = 1/2$ and $\sin \phi = \sqrt{3}/2$.} \begin{enumerate} \item $\phi = 0$, but then $x_2=0$. This case has been processed above. \item $\phi = \frac{\pi}{4}$, then $x_1 = x_2 > 0$. From (\ref{eqn:Bk}), we have $y_1 = 2k \neq 0$, and from (\ref{eqn:BkDerivative}) \[ dB_k(x_1, x_1, 2k, \rho_2 0)\cdot(\delta_1, \delta_2, \epsilon_1, \epsilon_2) = -\frac{k}{x_1}\delta_1 + \frac{k}{x_1}\delta_2 + \frac{\pi}{4}\epsilon_1 + \log(\sqrt{2}x_1)\epsilon_2 \] with $\rho_2\epsilon_2>0$. As $x_1 \neq 0$ and $k \neq 0$, the term $\frac{k}{x_1}\delta_1$, and $dB_k(x_1, x_1, 2k, \rho_2 0)\cdot(\delta_1, \delta_2, \epsilon_1, \epsilon_2)$ as well, has no constant sign. \item $\phi = \frac{\pi}{2}$, then $x_1 = 0$. This case has been processed above in Case (a). \item $\phi = \frac{3\pi}{4}$, then $x_1 = -x_2$, $x_1 < 0$ and (\ref{eqn:Bk}) gives $k \neq 0$ and $y_1 = \frac{2k}{3}$. As $y_1$ is a dyadic number, the only compatible values for $k$ are multiple of 3. Let $n$ be a nonzero integer so that $k = 3n$ and $y_1 = 2n$. From (\ref{eqn:BkDerivative}), we have \[ dB_{3n}(-x_2, x_2, 2n, \rho_2 0)\cdot(\delta_1, \delta_2, \epsilon_1, \epsilon_2) = -\frac{n}{x_2}\delta_1 - \frac{n}{x_2}\delta_2 + \frac{3\pi}{4}\epsilon_1 + \log(\sqrt{2}x_2)\epsilon_2 \] with $\rho_2\epsilon_2 >0$. As $n \neq 0$, $\epsilon_1$ can take negative and positive values and the term $\frac{3\pi}{4}\epsilon_1$ has not a constant sign. \item $\phi = \pi$, then $x_1<0$ and $x_2 = 0$. This case has been processed above in Case (b). \end{enumerate} \end {enumerate} To sum up using the inequalities above and deriving those with negative $x_2$ from them and from the relation $\overline{x}^y = \overline{x^{\overline{y}}}$, we can give the almost complete list of complex powers of numbers (for dyadic complex) that have a determined signed zero part, the only exception being $x=+1 \pm 0i$ raised to zero power which cannot be treated as we have done here. \begin{tabular}{r@{ $=$ }lr@{ $=$ }ll} $x^{+0 +0i}$ & $1 +0i$,& $x^{-0 -0i}$ & $1 -0i$ & if $|x|>1$ and $x_2>0$\\ $x^{-0 +0i}$ & $1 +0i$,& $x^{+0 -0i}$ & $1 -0i$ & if $|x|>1$ and $x_2<0$\\ $(x_1 \pm 0i)^{\pm0 +0i}$ & $1 +0i$,& $(x_1 \pm 0i)^{\pm0 -0i}$ & $1 -0i$ & if $x_1>1$\\ $(x_1 +0i)^{+0 +0i}$ & $1 +0i$, & $(x_1 +0i)^{-0 -0i}$ & $1 -0i$ & if $|x_1|>1$\\ $(x_1 -0i)^{-0 +0i}$ & $1 +0i$, & $(x_1 -0i)^{+0 -0i}$ & $1 -0i$ & if $|x_1|>1$\\ $(x_1 +0i)^{y_1 +0i}$ & $x_1^{y_1} +0i$, & $(x_1 -0i)^{y_1 -0i}$ & $x_1^{y_1} -0i$ & if $x_1>1$ and $y_1>0$\\ $(x_1 -0i)^{y_1 +0i}$ & $x_1^{y_1} +0i$, & $(x_1 +0i)^{y_1 -0i}$ & $x_1^{y_1} -0i$ & if $x_1>1$ and $y_1<0$\\ $(+1 +0i)^{y_1 \pm0i}$ & $1 +0i$, & $(+1 -0i)^{y_1 \pm0i}$ & $1 -0i$ & if $y_1>0$\\ $(+1 -0i)^{y_1 \pm0i}$ & $1 +0i$, & $(+1 +0i)^{y_1 \pm0i}$ & $1 -0i$ & if $y_1<0$\\ $x^{+0 -0i}$ & $1 +0i$, & $x^{-0 +0i}$ & $1 -0i$ & if $1>|x|>0$ and $x_2>0$\\ $x^{-0 -0i}$ & $1 +0i$, & $x^{+0 +0i}$ & $1 -0i$ & if $1>|x|>0$ and $x_2<0$\\ $(x_1 \pm0i)^{\pm0 -0i}$ & $1 +0i$, & $(x_1 \pm0i)^{\pm0 +0i}$ & $1 -0i$ & if $1 > x_1 > 0$ \\ $(x_1 +0i)^{+0 -0i}$ & $1 +0i$, & $(x_1 -0i)^{+0 +0i}$ & $1 -0i$ & if $1 > |x_1| > 0$ \\ $(x_1 -0i)^{-0 -0i}$ & $1 +0i$, & $(x_1 +0i)^{-0 +0i}$ & $1 -0i$ & if $1 > |x_1| > 0$ \\ $(x_1 +0i)^{y_1 -0i}$ & $x_1^{y_1} +0i$, & $(x_1 -0i)^{y_1 +0i}$ & $x_1^{y_1} -0i$ & if $1 > x_1 > 0$ and $y_1 > 0$ \\ $(x_1 -0i)^{y_1 -0i}$ & $x_1^{y_1} +0i$, & $(x_1 +0i)^{y_1 +0i}$ & $x_1^{y_1} -0i$ & if $1 > x_1 > 0$ and $y_1 < 0$ \\ $(\pm 0 +x_2i)^{+0 +0i}$ & $1 +0i$, & $(\pm 0 +x_2i)^{-0 -0i}$ & $1 -0i$ & if $x_2 \geq 1$ \\ $(\pm 0 +x_2i)^{+0 -0i}$ & $1 +0i$, & $(\pm 0 +x_2i)^{-0 +0i}$ & $1 -0i$ & if $1 \geq x_2 > 0$ \\ $(\pm 0 +x_2i)^{-0 -0i}$ & $1 +0i$, & $(\pm 0 +x_2i)^{+0 +0i}$ & $1 -0i$ & if $ 0 > x_2 \geq -1$ \\ $(\pm 0 +x_2i)^{-0 +0i}$ & $1 +0i$, & $(\pm 0 +x_2i)^{+0 -0i}$ & $1 -0i$ & if $-1 \geq x_2$ \\ $(-1 +0i)^{+0 \pm0i}$ & $1+0i$, & $(-1 +0i)^{-0 \pm0i}$ & $1-0i$ \\ $(-1 -0i)^{-0 \pm0i}$ & $1+0i$, & $(-1 -0i)^{+0 \pm0i}$ & $1-0i$ \\ \end{tabular} So when $x^y$ is a pure real number, the following pattern is compatible with the determined cases: \begin{tabular}{rl} $x^y = x_1^{y_1} + \rho_2 0i$ & if $|x| > 1$\\ $x^y = 1 + \sigma_2 \rho_1 0i$ & if $|x| = 1$\\ $x^y = x_1^{y_1} - \rho_2 0i$ & if $|x| < 1$\\ $x^y = x_1^{y_1} + \sigma_2 \rho_1 0i$ & if $y_1 \neq 0$ \end{tabular} where $\sigma_2$ (resp $\rho_1$, $\rho_2$) is the sign of $x_2$ (resp. $y_1$, $y_2$) and with the convention $0^0=+1$. \subsection {\texttt {mpc\_pow\_ui}} \label {ssec:mpcpowui} In the case of a positive integer exponent $n$, it may be faster to use binary exponentiation to compute $x^n$. More generally, let $n_1, \ldots, n_k$ be an addition chain for $n$, that is, $n_1 = 1$, $n_k = n$, and for any index $2 \leq r \leq k$, there are indices $1 \leq s, t \leq r-1$ such that $n_r = n_s + n_t$. Define the corresponding powers of $x$ as $\corr x_r = x^{n_r}$, so that $\corr x_1 = x$, $\corr x_k = x^n$ and $\corr x_r = \corr x_s \corr x_t$. The special case of left-to-right binary exponentiation satisfies that $n_{r+1} = 2 n_r$ (which occurs $\lfloor \log_2 n \rfloor$ times) or $n_{r+1} = n_r + 1$ (which occurs once less than the number of $1$ in the binary expansion of $n$, or equivalently, once less than the Hamming weight of $n$); so $k \leq 2 \lfloor \log_2 n \rfloor + 1$. Instead of the correct sequence $\corr x_r$, we compute during the algorithm approximations $\appro x_1 = x = \corr x_1$ and $\appro x_r = \round (\appro x_s \appro x_t)$ at some precision~$p$. Let $\theta_r$ be such that $\corr x_r = \appro x_r (1 + \theta_r)$, so that the relative error of $\appro x_r$ is given by $\epsilon_r = |\theta_r|$. Write $z_r = \appro x_s \appro x_t$. Then $\appro x_r = \round (z_r)$ and $\corr x_r = z_r (1 + \theta_s)(1 + \theta_t)$. Assuming rounding to nearest, the absolute real error attributable to rounding $\Re z_r$ to $\Re \appro x_r$ is bounded by $\frac {1}{2} \cdot 2^{\Exp (\Re z_r) - p} \leq \frac {1}{2} \cdot 2^{\Exp (\Re \appro x_r) - p}$ by Proposition~\ref {prop:expround}, and the corresponding relative real error is bounded by $2^{-p}$ according to Proposition~\ref {prop:relerror}. The same holds for the imaginary part, and Proposition~\ref {prop:comrelerror} implies that the complex relative error is also bounded by $2^{-p}$. Otherwise said, $z_r = \appro x_r (1 + \eta_r)$ with some complex number $\eta_r$ such that $|\eta_r| \leq 2^{-p}$. Putting the equations together, we obtain $1 + \theta_r = (1 + \theta_s)(1 + \theta_t)(1 + \eta_r)$. We can thus rewrite $1 + \theta_r$ as a product of factors of the form $1 + \eta$ with $|\eta| \leq 2^{-p}$. Denoting by $u_r$ the number of such factors, we thus have $u_r = u_s + u_t + 1$, from which we deduce by induction, using $u_1 = 0 = n_1 - 1$, that $u_r = n_r - 1$. So the relative error of $\appro x_k$, compared to $x^n$, is given by $|\theta_k| \leq (1 + 2^{-p})^{n-1} - 1$. Using Propositions~\ref {prop:comrelerror} and~\ref {prop:relerror}, this translates into an absolute error of \[ \left( 1 + 2^{\Exp (\Im \appro x_k) - \Exp (\Re \appro x_k) + 1} \right) \left( (1 + 2^{-p})^{n-1} - 1 \right) 2^p \Ulp (\Re \appro x_k) \] on the real part and of \[ \left( 1 + 2^{\Exp (\Re \appro x_k) - \Exp (\Im \appro x_k) + 1} \right) \left( (1 + 2^{-p})^{n-1} - 1 \right) 2^p \Ulp (\Im \appro x_k) \] on the imaginary part of the result. If we further assume that $(n-1) 2^{-p} \leq 1$, then $(1 + 2^{-p})^{n-1} - 1 \leq 2 (n - 1) 2^{-p}$, because $(1+\varepsilon)^m-1 = \exp(m \log(1+\varepsilon)) - 1 \leq \exp(\varepsilon m) - 1 \leq 2 \varepsilon m$ as long as $\varepsilon m \leq 1$. This gives the simplified bounds \begin{equation} \label{eq:powui_re} \left( 2 + 2^{\Exp (\Im \appro x_k) - \Exp (\Re \appro x_k) + 2} \right) (n-1) \Ulp (\Re \appro x_k) \end{equation} on the real part and of \begin{equation} \label{eq:powui_im} \left( 2 + 2^{\Exp (\Re \appro x_k) - \Exp (\Im \appro x_k) + 2} \right) (n-1) \Ulp (\Im \appro x_k) \end{equation} on the imaginary part. \subsection {\texttt {mpc\_pow\_si}} For the computation of $x^{-n}$ with $n > 0$, the analysis of \S\ref {ssec:mpcpowui} essentially carries through. We keep the same notation. Using bounds on the absolute errors of $\Re z_r$ and $\Im z_r$, we have shown above, using Propositions~\ref {prop:relerror} and~\ref {prop:comrelerror}, that $z_r = \appro {x_r} (1 + \eta_r)$ with $|\eta_r| \leq 2^{-p}$ and concluded that $x^n = \corr {x_k} = \appro {x_k} (1 + \theta_k)$, where $1 + \theta_k$ is the product of $n-1$ factors of the form $1 + \eta$ with $|\eta| \leq 2^{-p}$. By exchanging the roles of $z_r$ and $\appro {x_r}$ and applying Propositions~\ref {prop:relerror} and~\ref {prop:comrelerror} analogously, we obtain that $z_r = \frac {\appro {x_r}}{1 + \zeta_r}$ with $|\zeta_r| \leq 2^{-p}$ and $x^n = \corr {x_k} = \frac {\appro {x_k}}{1 + \xi_k}$, where $\xi_k$ is the product of $n-1$ factors of the form $1 + \zeta$ with $|\zeta| \leq 2^{-p}$. Let $\corr {x_{k+1}} = \corr {x_k}^{-1} = x^{-n}$ be the desired result, $z_{k+1} = \appro {x_k}^{-1}$ and $\appro {x_{k+1}} = \round (z_{k+1})$. As shown in \S\ref {ssec:mpcpowui}, rounding of $z_{k+1}$ to the nearest implies that $z_{k+1} = \appro {x_{k+1}} (1 + \eta_{k+1})$ with $|\eta_{k+1}| \leq 2^{-p}$. Then $\corr {x_{k+1}} = \frac {1}{x_k} = \frac {1 + \xi_k}{\appro {x_k}} = z_{k+1} (1 + \xi_k) = \appro {x_{k+1}} (1 + \xi_k)(1 + \eta_{k+1})$, which contains $n$ factors of the form $1 + \eta$ with $|\eta| \leq 2^{-p}$. Thus, assuming that $n 2^{-p} \leq 1$, the error estimates \eqref {eq:powui_re} and \eqref {eq:powui_im} are valid with $n$ in the place of $n - 1$. \subsection{\texttt {mpc\_agm}, \texttt {mpc\_agm1}} \paragraph {Definition.} Let $a$, $b$ be non-zero complex numbers. Define sequences of arithmetic means $(a_n)$ and geometric means $(b_n)$ by $\corr {a_0} = a$, $\corr {b_0} = b$, $\corr {a_{n+1}} = \frac {\corr {a_n} + \corr {b_n}}{2}$ and $\corr {b_{n+1}} = \sqrt {\corr {a_n} \corr {b_n}}$. At each step, there is a choice of sign for the square root. If $\corr {a_n}$ and $\corr {b_n}$ are at an (unoriented) angle different from $0$ and $\pi$, then they define a two-dimensional pointed cone in the complex plane. Notice that $\corr {a_{n+1}}$ lies in this cone. Following \cite {Cox84} we call \emph {right} the choice that makes also $\corr {b_{n+1}}$ lie in the cone, and following \cite {CrTh10} we call the resulting sequences \emph {optimal}. There is a common limit of the sequences, the \emph {arithmetic-geometric mean} $\AGM (a, b)$. It is immediate that $\AGM$ is symmetric, that is, $\AGM (a, b) = \AGM (b, a)$, and homogeneous, that is, $\AGM (\lambda a, \lambda b) = \lambda \AGM (a, b)$ for any non-zero complex number $\lambda$. So we may assume that $|a| \geq |b|$, and $\AGM (a, b) = a \AGM (a_0, b_0)$ with $\corr {a_0} = 1$, $\corr {b_0} = b/a$, $|\corr {b_0}| \leq 1$. We need to examine the corner cases. If one or both of $a$ and $b$ are zero, all geometric means are zero, and $\AGM (a, b) = 0$. If the angle between $a$ and $b$ is~$0$, then $\corr {b_0}$ is a positive real number, and $\AGM (1, \corr {b_0})$ may be computed with \mpfr. If the angle between $a$ and $b$ is $\pi$, then $\corr {b_0}$ is a negative real number larger than or equal to $-1$. The arithmetic mean of $1$ and $-1$ is zero, so $\AGM (1, -1) = 0$. If $\corr {b_0} > -1$, we take the first geometric mean with a positive imaginary part, so that also $\Im (\AGM (1, \corr {b_0}))$ will be positive. Notice that $\Im (\AGM (1, \corr {b_0}))$ has the same sign as $\Im (\corr {b_0})$ unless $\corr {b_0}$ is real, so this choice determines our branch cut for $\AGM$. So in the following, we analyse the computation of $\AGM (1, \appro {b_0})$ with $\appro {b_0} = \round (\corr {b_0}) \in \C \backslash \R^{\geq 0}$, where the real and imaginary parts of $\appro {b_0}$ are rounded towards~$0$, which ensures $| \appro {b_0} | \leq 1$ with absolute errors of the real and imaginary parts at most \ulp {1}. In the following, relative errors will be most convenient to work with; by Proposition~\ref {prop:comabstorelerror} we have $\relerror (\appro {b_0}) \leq 2^{1-p}$, where $p$ is the working precision. \paragraph {The first iteration -- entering a quadrant.} If $\Re (\appro {b_0}) < 0$, then significant cancellation can occur for the arithmetic mean in the first iteration, which thus needs to be analysed separately. From now on, we use arbitrary rounding modes and apply Proposition~\ref {prop:comrelround} with $c = 1$ (except for $\Re (\appro {a_1})$, which we need to round down or, equivalently, towards~$0$ for a later step in the analysis). We let $\corr {b_1} = \sqrt {\corr {b_0}}$ and $\appro {b_1} = \round \left( \sqrt {\appro {b_0}} \right)$ with \[ \relerror (\appro {b_1}) \leq 2^{1-p} + \left( 1 + 2^{1-p} \right) 2^{1-p} \leq 2^2 \cdot 2^{1-p} \] by \eqref {eq:propsqrt} and Proposition~\ref {prop:comrelround}. We let $\corr {a_1} = \frac {1 + \corr {b_0}}{2}$ and $\appro {a_1} = \round \left( \frac {1 + \appro {b_0}}{2} \right)$. The imaginary part of $\appro {a_1}$ has an error of at most \ulp{1}, but following the discussion of \S\ref {sssec:propadd}, its real part may have an absolute error measured in $\Ulp$ of \[ 2^{\Exp (\Re (\appro {b_0})) - \Exp (\Re (\appro {a_1}))} + 1 \leq 2^{- \Exp (\Re (\appro {a_1}))} + 1 \leq 2^{- \Exp (\Re (\appro {a_1})) + 1}, \] where we used that both $\Re (\appro {b_0})$ and $\Re (\appro {a_1})$ have non-positive exponents. By Proposition~\ref {prop:comabstorelerror}, \[ \relerror (\appro {a_1}) \leq 2^{- \Exp (\Re (\appro {a_1})) + 1} \cdot 2^{1-p}. \] If $\Re (\appro {b_0}) \geq 0$, then $\Exp (\Re (\appro {a_1})) = 0$, and we obtain a bound that is similar to the one above for $\relerror (\appro {b_1})$. If $\appro {b_0}$ approaches $-1$, then the relative error in $\appro {a_1}$ becomes arbitrarily bad, as $\appro {a_1}$ becomes arbitrarily small. Notice that, independently of the rounding mode, $\appro {a_1}$ and $\appro {b_1}$ lie in the same complex quadrant of numbers having non-negative real part and an imaginary part of the same sign as that of $\appro {b_0}$ (or of positive imaginary part if $\appro {b_0}$ is real). During the remainder of the algorithm, we will not leave this quadrant and thus not see any more cancellation in the arithmetic mean. \paragraph {The idea of the analysis.} Let us first give the basic idea of the following, rather technical analysis. Assume a target precision of $N$ bits, that is, a target relative error of $2^{-N}$. If $a$ and $b$ are of roughly equal size, the AGM iteration converges quadratically, that is, the number of correct digits doubles in each step, and we need about $\log_2 N$ iterations. Unfortunately, when $a$ and $b$ are of very different sizes, we have slower convergence. To illustrate this, consider $a = 1$ and $b = 2^{-e}$; during the first iterations, $a_n \approx 2^{-n}$ and $b_n \approx 2^{-e/2^n}$. So we need to increase the number of iterations by roughly $\log_2$ of the absolute value of the exponent of $b/a$; the precise bound $B (N)$ on the number of iterations is given in~\eqref {eq:agmbound}. Moreover, when $\Re (b/a) < 0$, the situation of very different exponents in $a$ and $b$ may occur after one iteration through cancellation as explained above. Unlike Newton iterations, AGM iterations are not auto-correcting, but rounding errors propagate. We lose a constant number of bits per iteration; so to reach the desired precision of $N$ bits, we need to carry out all computations at a working precision of about $p = N + c B (N, b/a)$. The following discussion provides explicit bounds for all these quantities. \paragraph {Rounding error propagation.} Let $\corr {a_n} = \frac {\corr {a_{n-1}} + \corr {b_{n-1}}}{2}$, $\corr {c_n} = \corr {a_{n-1}} \corr {b_{n-1}}$, $\corr {b_n} = \sqrt {\corr {c_n}}$. The computation of $\appro {a_1}$ and $\appro {b_1}$ and their error analysis have been given above. For $n \geq 2$, we compute the sequences \begin {eqnarray*} \appro {a_n} & = & \round \left( \frac {\appro {a_{n-1}} + \appro {b_{n-1}}}{2} \right), \\ \appro {c_n} & = & \round \left( \appro {a_{n-1}} \appro {b_{n-1}} \right), \\ \appro {b_n} & = & \round \left( \sqrt {\appro {c_n}} \right). \end {eqnarray*} Then one sees by induction that $\appro {a_n}$ and $\appro {b_n}$ lie in the same quadrant as $\appro {a_1}$ and $\appro {b_1}$ (or, for that matter, $\corr {a_1}$ and $\corr {b_1}$). Let $\alpha_n = \relerror (\appro {a_n}) / 2^{1-p}$, $\gamma_n = \relerror (\appro {c_n}) / 2^{1-p}$ and $\beta_n = \relerror (\appro {b_n}) / 2^{1-p}$. By \eqref {eq:propaddrel} and Proposition~\ref {prop:comrelround}, \begin {equation} \label {eq:agmalpha} \alpha_n \leq \sqrt 2 \, \max (\alpha_{n-1}, \beta_{n-1}) \left( 1 + 2^{1-p} \right) + 1. \end {equation} By \eqref {eq:propmulrel} and Proposition~\ref {prop:comrelround}, \begin {equation} \label {eq:agmgamma} \gamma_n \leq \left( \alpha_{n-1} + \beta_{n-1} + \alpha_{n-1} \beta_{n-1} 2^{1-p} \right) \left( 1 + 2^{1-p} \right) + 1. \end {equation} If we assume for the time being that $\gamma_n \leq 1 / (2^{1-p})$ (which will be shown under additional constraints later), then \eqref {eq:propsqrt} and Proposition~\ref {prop:comrelround} imply \begin {equation} \label {eq:agmbeta} \beta_n \leq \gamma_n (1 + 2^{1-p}) + 1. \end {equation} Let $r_n = (2^n - 1) r_1$ for some integer $r_1$ such that $\alpha_1$, $\beta_1 \leq r_1$. We may use $r_1 = 2^k$ with $k = \max (2, - \Exp (\Re (\appro {a_1}) + 1))$ by the discussion of the first iteration above. We now show by induction that $\alpha_n, \beta_n \leq r_n \leq 2^{n + k}$ for $p \geq 2 (n + k) - 1$, that is, if the number of iterations $n$ is not too large compared to the working precision $p$. By \eqref {eq:agmgamma} we obtain \[ \gamma_n \leq 2 r_{n-1} + 1 + \left( r_{n-1}^2 + 2 r_{n-1} + r_{n-1}^2 2^{1-p} \right) 2^{1-p}. \] From $r_{n-1} < 2^{n - 1 + k}$ we deduce \[ r_{n-1}^2 2^{1-p} < 2^{2 (n + k) - 1 - p} \leq 1 \] as long as $p \geq 2 (n + k) - 1$, and then, under the same condition, \[ \left( r_{n-1}^2 + 2 r_{n-1} + r_{n-1}^2 2^{1-p} \right) 2^{1-p} \leq (r_{n-1} + 1)^2 \, 2^{1-p} \leq (2^{n+k-1})^2 \, 2^{1-p} \leq 1. \] Then \[ \gamma_n \leq 2 r_{n-1} + 2 \leq r_n - 2. \] By \eqref {eq:agmbeta}, \[ \beta_n \leq r_n - 1 + r_n 2^{1-p} \leq r_n - 1 + 2^{n+k+1-p} \leq r_n \] under a milder than the previous condition. Finally by \eqref {eq:agmalpha}, for $p \geq 3$, \[ \alpha_n \leq \sqrt 2 \, r_{n-1} + 1 + \sqrt {2} \, r_{n-1} 2^{1-p} \leq \frac{5}{4} \sqrt{2} r_{n-1} + 1 \leq 2 r_{n-1} + r_1 = r_n. \] This finishes the induction argument. To summarise, we have for any fixed $n$ that \begin {equation} \label {eq:propagm} \corr {a_n} = (1 + \theta_1) \appro {a_n} \text { with } |\theta_1| \leq 2^{n + k + 1 - p} \text { for } 2 (n + k) - 1 \leq p. \end {equation} \paragraph {Mathematical error.} We also need to estimate the error made by carrying out only a finite number of iterations. Let $z$ satisfy $\Re (z) \geq 0$ and $z \neq 0, 1$, and consider the optimal AGM sequences $(a_n')$ and $(b_n')$ computed (with infinite precision) from $a_0' = 1$ and $b_0' = z$. Let $N' \in \N$ and \[ n \geq B' (N', z) := \max \left( 1, \left\lceil \log_2 |\log_2 |z|| \right\rceil \right) + \lceil \log_2 (N'+2) \rceil + 2 \] (where $\log_2 0$ is to be understood as $- \infty$). Then by \cite[Prop.~3.3, p.~88]{Dupont06}, \[ a_n' = (1 + \theta_2) \AGM (1, z) \text { with } |\theta_2| \leq 2^{-N'}. \] (Notice that here, the relative error as defined in \cite[Def.~1.2, p.~20]{Dupont06} is taken with the roles of the correct and the approximated values reversed compared to our definition.) In our context, we may have $\Re (\corr {b_0}) < 0$, but after one iteration, $\Re (\corr {b_1} / \corr {a_1}) \geq 0$. So we consider $\corr {z} = b_0' = \corr {b_1} / \corr {a_1}$, so that by homogeneity, $\corr {a_n} = \corr {a_1} a_{n-1}'$, $\corr {b_n} = \corr {a_1} b_{n-1}'$ and $\AGM (1, \corr {b_0}) = \AGM (\corr {a_1}, \corr {b_1}) = \corr {a_1} \AGM (1, \corr {z})$. Here \[ |z| = \left| \frac {\corr {b_1}}{\corr {a_1}} \right| = \frac {|\sqrt {\corr {b_0}}|}{|\corr {a_1}|} = \frac {|\corr {b_0}|}{2 |\corr {a_1}|} \] is bounded above by \[ \frac {1}{2 \, \Re (\corr {a_1})} \leq 2^{- \Exp (\Re (\corr {a_1}))} = 2^{- \Exp (\Re (\appro {a_1}))} \] (since $\Re (\appro {a_1})$ was rounded down) and below by \[ \frac {|\corr {b_0}|}{2} \geq \frac {1}{2} \, \max \left( \Re (\corr {b_0}), |\Im (\corr {b_0})| \right) \geq \frac {1}{4} \, 2^{\max \left( \Exp (\Re (\corr {b_0})), \Exp (\Im (\corr {b_0})) \right)} = 2^{\max \left( \Exp (\Re (\appro {b_0})), \Exp (\Im (\appro {b_0})) \right) - 2} \] (since both parts of $\appro {b_0}$ were rounded down). So for $n \geq B (N)$ with \begin{equation} \label{eq:agmbound} \begin {aligned} B (N) = & \left\lceil \log_2 \max (- \Exp (\Re (\appro {a_1})), - \Exp (\Re (\appro {b_0})) + 2, - \Exp (\Im (\appro {b_0})) + 2 ) \right\rceil \\ & + \lceil \log_2 (N+4) \rceil + 3 \end {aligned} \end{equation} (in which all exponents are non-positive) we reach \[ \corr {a_n} = (1 + \theta_2) \AGM (1, \corr {b_0}) \text { with } |\theta_2| \leq 2^{-(N+2)}. \] \paragraph {Total error and working precision.} Combining with \eqref {eq:propagm} we obtain $\AGM (1, b_0) = \frac {1 + \theta_1}{1 + \theta_2} \appro {a_n} = (1 + \theta) \appro {a_n}$ with \[ |\theta| \leq \frac {|\theta_1| + |\theta_2|}{|1 - |\theta_2||} \leq \frac {4}{3} \, \left( 2^{n+k+1-p} + 2^{-(N+2)} \right) \] for $N \geq 2$. So after $n = B (N)$ steps of the AGM iteration at a working precision of $p \geq N + n + k + 3$, we obtain $\appro {a_n}$ with a relative error bounded by $\epsilon = \frac {2}{3} \cdot 2^{-N}$. Finally, we let $\corr {z} = \AGM (a, b) = a \AGM (1, \corr {b_0})$ and $\appro {z} = \round (\corr {a} \appro {a_n})$, where $a$ is known exactly. By~\eqref {eq:propmulrel} and Proposition~\ref {prop:comabstorelerror}, using that $N \geq 2$ implies $B (N) \geq 7$ and $p \geq 12$, this leads to a relative error bounded by $2^{-N}$. \paragraph {Parameter choice.} Recall that in our analysis, $k = \max (2, - \Exp (\Re (\appro {a_1}) + 1))$ is given by the input data. As we need to know it before choosing the final precision, it should be computed from an approximation at a tiny precision, of $2$ bits, say. We may then choose a target relative error~$N$, which determines the number of iterations~$n = B (N)$ by~\eqref {eq:agmbound}. Then the working precision may be chosen as \[ p = \max (2 (n + k) - 1, N + n + k + 3). \] Using Propositions~\ref {prop:comrelerror} and~\ref {prop:relerror}, the complex relative error may be translated into an error expressed in $\Ulp$. With $\appro {z} = \appro x + i \appro y$ the computed approximation of $\corr {z} = \AGM (a, b)$, let $k_R = \max (\Exp (\appro y) - \Exp (\appro x) + 1, 0) + 1$, and $k_I = \max (\Exp (\appro x) - \Exp (\appro y) + 1, 0) + 1$. Then we have $\error (\appro x) \leq 2^{k_R + n + k + 3} \Ulp (\appro x)$ and $\error (\appro y) \leq 2^{k_I + n + k + 3} \Ulp (\appro y)$. In practice, one should take this additional loss into account. If rounding fails after the first computation, nevertheless the values of $k_R$ and $k_I$ will most likely not change with a larger precision. So one should let $k' = \max (k_R, k_I)$, replace $N$ by $N + k'$ and adapt the precision and number of iterations accordingly. \bibliographystyle{acm} \bibliography{algorithms} \end {document}