\documentclass[a4paper]{article}
\usepackage{a4wide}
\usepackage[english]{babel}
\usepackage[mathletters]{ucs}
\usepackage[utf8x]{inputenc}
\usepackage{amsmath}
\usepackage{amsfonts}
\usepackage{booktabs}
                
\usepackage{tikz}
\usepackage{pgfplots}        
\usetikzlibrary{plotmarks}
                
\usepackage[colorlinks=true,urlcolor=blue,citecolor=black,linkcolor=black]{hyperref}

\PrerenderUnicode{íéáÉő}
        
\hypersetup{%
    pdftitle={Sieving Twin Primes},
    pdfauthor={Dr. Gergő Érdi},
    pdfkeywords={prime, twin prime, Miller-Rabin, functional programming, Haskell}
}%        
                
%include polycode.fmt        

%format `divides` = "|"
%format `notdivides` = "\!\! \not|"
%format `div` = "\div"
%format `mod` = "\mod"
%%format mod a m = a "\mod" m
%format Integer = "\mathbb{Z}"
%format Bool = "\mathbb{L}"
%format odd n = 2 "\!\! \not|" n
%format even n = 2 "|" n
%format split2 = "split_2"
%format round (x) = "\left\lfloor" x "\right\rfloor"
%format ln (x) = "\ln" x
%format fromInteger (x) = x
%format fromIntegral (x) = x
%format error = "\textbf{error}"
%format _ = "\bot"
%format (pow x (y)) = x "^{" y "}"
%format (powMod (n) x (y)) = "\llbracket" x "^{" y "}" "\rrbracket_{" n "}"
                                        
%subst conid a = "\textsc{" a "}"        
                                                                
\newcommand{\comment}[1]{}         
                               
\title{Sieving Twin Primes \\ \vspace{1mm} \large{An Implementation in Haskell}}
\date{}
\author{Dr. Gergő Érdi\\ \url{http://gergo.erdi.hu/}}        
                                              
\begin{document}
\maketitle

\comment{
\begin{code}
module TwinSieve where
\end{code}
}
                
\section{Sieve of Eratosthenes}

               
To sieve the targeted interval of generators, a set of small prime
numbers is needed. These can be easily calculated using the algorithm
known as the sieve of Eratosthenes. Since we
only need to sieve small primes, there is no need to implement the
intricate details described in \cite{eratosthenes}, and instead, we
can use the folklore na\"ive implementation\cite{eratosthenes-pearl}:
                    
\comment{
\begin{code}
divides n k = k `mod` n == 0
-- lhs2Tex
notdivides n k = not $ divides n k
              
-- lhs2TeX kludge: Otherwise the (^) operator can't be skinned properly
pow x y = x ^ y

decimalLen 0 = 0
decimalLen n = 1 + decimalLen (n `div` 10)       

-- Since we encounter numbers larger than what a float can hold, we
-- need our own integer logarithm.
ln x = (fromIntegral (log2 x)) / log2e
    where  log2 0  = 0
           log2 x  = 1 + log2 (x `div` 2)
           log2e = logBase 2.0 (exp 1)
\end{code}
}

\begin{code}
primes :: [Integer]        
primes = sieve [2..]
       	where sieve (p:ns) = p : sieve [n | n <- ns, p `notdivides` n]
\end{code}

\section{Sieving intervals}

                 
We generate twin prime candidates using prime numbers from a given
interval. The function |sieveInterval| calculates numbers between
$2^k$ and $2^{k+n}$ that have no small prime divisors. 
   
\begin{code}
sieveInterval :: Integer -> Integer -> [Integer]
sieveInterval k n = filter (not . hasSmallPrimeDiv) [a..b]
    where  a  = pow 2 k
           b  = pow 2 (k + n)
           hasSmallPrimeDiv x = any (\ p -> p /= x && p `divides` x) smallprimes
                      where smallprimes = take 100 primes
\end{code}

        
\section{The Miller-Rabin primality test}


To further filter the sieved generators, and to check the actual twin
prime candidates, the Miller-Rabin primality test\cite{millrab} is
employed.

For this, first we need to check pseudo-primality:        

\begin{code}
isPseudoPrime n a = isPseudoPrime' n (s, d) a
    where (s, d) = split2 (n-1)
{-"\vspace{1em}"-}
split2 n  | odd n       =  (0, n)
          | otherwise   =  let  (s, d) = split2 (n `div` 2)
                           in   (s + 1, d)
\end{code}
\begin{minipage}{\textwidth}
\begin{code}
isPseudoPrime' n (s, d) a  | a >= n               = error "a >= n"
                           | otherwise            = repeatSquare (s - 1) x
    where  x = powMod n a d
           repeatSquare r x  | x == 1 || x == n-1  =  True
                             | r > 0               =  let  x' = powMod n x 2
                                                      in   repeatSquare (r-1) x'
                             | otherwise           =  False
\end{code}
\end{minipage}
        
The Miller-Rabin algorithm tests if a given $|n| \in |Integer|$ is equal to one
or is even (in which case it is obviously not a prime), or finds a
witness, $|a| \in |\Integer|$, for which |n| is not a pseudo-prime (in
which case |n| can also not be a prime), or returns with the result ''probable prime''.
        
\begin{code}
data Primality = One | Even | Witness Integer | Prime
               deriving (Show, Eq)
\end{code}

In the actual implementation of the Miller-Rabin primality test, we
assume the generalized Riemann conjecture, which allows
us to check for witnesses only in $2, \ldots,
\left\lfloor 2 \ln^2 n \right\rfloor$. Using this result\cite{millrab-det}, we can
write a deterministic version of the primality test.

\begin{code}
millrab :: Integer -> Integer -> Primality
millrab  1  k               = One
millrab  2  k               = Prime
millrab  3  k               = Prime
millrab  n  k  | even n     = Even
               | otherwise  = combine [mr a | a <- [2..k']]

    where  k' = minimum [k, n-1, round (2 * (pow (ln (fromIntegral n)) 2))]
           (s, d) = split2 (n-1)
           {-"\vspace{1em}"-}
           mr a = if isPseudoPrime' n (s, d) a then Prime else Witness a
           {-"\vspace{1em}"-}          
           combine (Prime:rs)  = combine rs
           combine []          = Prime
           combine (r:rs)      = r

{-"\vspace{1em}"-}           
millrabDet n = millrab n n           

isMillRabPrime p = case  millrabDet p of
                         Prime  -> True
                         _      -> False

\end{code}

\section{Generating candidate primes}


After using the Miller-Rabin test on the sieved generators, the actual
twin prime candidates are generated using the following polynomial
(The parameter |m| is used to set the order of magnitude of generated candidates).

%format Gen = "\mathcal{G}"                        
%format h0 = "h_0"        
%format f1 = "f_1"
%format f2 = "f_2"
        
\begin{minipage}{\textwidth}
\begin{code}
type Gen = Integer -> Integer
    
f1 :: Integer -> Gen 
f1 m x = (h0 + c * x) * (pow 2 m) - 1
    where  h0  = 5775
           c   = product (take 6 primes)
           e   = 3299
\end{code}
\end{minipage}

So finally everything is ready to search for twin prime pairs between $f_1(2^k)$ and $f_2(2^{k+n})$:

\begin{code}
findGenerators :: Integer -> Integer -> [Integer]
findGenerators k n = filter isMillRabPrime (sieveInterval k n)
{-"\vspace{1em}"-}           
findCandidates :: (Gen, Gen) -> Integer -> Integer -> [(Integer, Integer)]
findCandidates (f1, f2) k n =  let  gs = findGenerators k n
                               in   map (\g -> (f1 g, f2 g)) gs
\end{code}

\section{Sieving twin primes}

By this point, we have developed the necessary toolkit to create our
twin prime sieve. Since the deterministic Miller-Rabin test has a time
complexity in $O(\log^4 n)$, first a probabilistic Miller-Rabin test
is done on both numbers of the twin prime pair candidate.

%format p1 = "p_1"
%format p2 = "p_2"          
          
\begin{code}
both :: (a -> Bool) -> (a, a) -> Bool
both p (x, y) = p x && p y
{-"\vspace{1em}"-}
findPrimePairsProb :: (Gen, Gen) -> Integer -> Integer -> [(Integer, Integer)]
findPrimePairsProb (f1, f2) k n =  let  ns   = findCandidates (f1, f2) k n
                                   in   filter (both isMillRabPrimeProb) ns
          where  isMillRabPrimeProb p = millrab p 2 == Prime
{-"\vspace{1em}"-}
findPrimePairs :: (Gen, Gen) -> Integer -> Integer -> [(Integer, Integer)]
findPrimePairs (f1, f2) k n =  filter (both isMillRabPrime) (findPrimePairsProb (f1, f2) k n)
\end{code}

With this, twin prime sieving is a simple special case of searching
for prime pairs in the form $(p, p + 2)$.

\begin{code}
findTwinPrimes f1 = findPrimePairs (f1, f2)
    where f2 x = f1 x + 2
\end{code}

\appendix
\section{Results}

First runs of the above program actually resulted in disappointment:
as soon as the numbers involved hit ten to tweleve decimal digits,
memory usage started to grow so fast that it filled all available
space, resulting in the OS killing the program. It soon became
apparent that this problem was caused by the na\"ive implementation of
the |powMod n x y| function by first doing the exponentiation, and
then taking the remainder. So instead, the final version used the
following implementation:

        
\begin{code}
powMod :: Integer -> Integer -> Integer -> Integer
powMod n x 1            = x `mod` n
powMod n x y  | even y  = ((powMod n x (y `div` 2)) ^ 2) `mod` n
              | odd y   = ((powMod n x (y-1)) * x) `mod` n
\end{code}

The actual search for twin primes was done by setting |h0| and |c| (as
in the definition of |f1|) and targeting given orders of magnitude by
tuning |m|. Time needed to find the first twin prime pair for a given
|m|, and time needed to check primality of the first number of this
pair was measured. Results are shown in figures
\ref{fig:firsttwin-time} and \ref{fig:millrab-time} compared to the
predicted run time in $\Theta(\log^4 p)$.

\newcommand{\twinprime}[2]{\ensuremath{(5775 + 30030 \cdot #2) \cdot 2^{#1} \pm 1}}

The largest twin prime pair discovered during testing was                                                            
$\twinprime{809}{261847} \sim 10^{254}$.


\section{Possibilities for improvements}

The approach presented here lends itself to parallelisation. For
starters, the Miller-Rabin testings of candidates are independent from
each other, thus, this could be trivially paralellised by a suitably
smart Haskell compiler\cite{gum}.

On a more refined scale, checking for witnesses for a given candidate
can also be done in a parallel fashion. This, however, needs some kind
of synchronization since once a witness is found, checking for others
would be redundant.

A simple change to the code that requires no paralellism but can speed
things up is to merge the checking of $p$ and $p+2$, by first checking
if both are pseudo-prime for 2, then if both are for 3, and so
on. This way, if the first member of the pair is prime, but the second
one is not, this fact is discovered before the full Miller-Rabin check
of the first number would be finished.

\tikzstyle{every pin}=[fill=white, draw=black, font=\footnotesize]       

\begin{figure}[h!]
  \centering
  \begin{tikzpicture}
        [
         every plot legend/.style= {at={(0,0.5)}, anchor=west},
         every mark/.append style={scale=0.5}
         ]
  \begin{axis}[ 
         % height=9cm,
         % width=9cm,
         xlabel={$m$: magnitude},
         ylabel={$t$: CPU time [$s$]}
         ]
       \addplot[color=red, mark=o, only marks] table[x index=0, y index=3] {twin-sieve.table};
       %\addlegendentry{Mérési eredmények}

       \addplot[color=black, style=dotted] expression[domain=0:809] {8.57143*10^(-9) * x^4 + 1};
       %\addlegendentry{$C\cdot m^4$}

       \node[coordinate,pin=left:{$(5775 + 30030 \cdot 261847)\cdot 2^{809} \pm 1$}]
           at (axis cs:809,3454.14) {};
       
       %\addplot[color=blue] expression[domain=0:250] {x^4};
  \end{axis}
   
  \end{tikzpicture}
  \caption{Time needed to find the first twin prime pair of a given magnitude}
  \label{fig:firsttwin-time}
\end{figure}
        
\begin{figure}[h!]
  \centering
  \begin{tikzpicture}
        [
         every plot legend/.style= {at={(0,0.5)}, anchor=west},
         every mark/.append style={scale=0.5}
         ]
  \begin{axis}[ 
         % height=9cm,
         % width=9cm,
         xlabel={$m$: magnitude},
         ylabel={$t$: CPU time [$s$]}
         ]
       \addplot[color=red, mark=o, only marks] table[x index=0, y index=4] {twin-sieve.table};
       %\addlegendentry{Mérési eredmények}

       \addplot[color=black, style=dotted] expression[domain=0:809] {5.18832*10^(-9) * x^4 + 1};
  \end{axis}
   
  \end{tikzpicture}
  \caption{Running time of the deterministic Miller-Rabin primality test}
  \label{fig:millrab-time}
\end{figure}

\comment{                
\begin{table}[h!]
  \centering
  \begin{tabular}{cr}
        $p$ & $\log_{10} p$ \\ \midrule
        \input{twin-sieve-tab}
  \end{tabular}
  \caption{A talált ikerprímek}
\end{table}
}      

\bibliographystyle{unsrt}
\bibliography{citations}
                                       
\comment{                
\begin{code}        
slowPrimeTest p = all (\k -> not $ k `divides` p) [2..floor (sqrt $ fromIntegral p)]
                                 
finv m y = (((y + 1) `div` (2^m)) - 5775) `div` 30030
                     
\end{code}}
        
\end{document}