Browse Source

Squashed 'src/secp256k1/' changes from b0210a9..bccaf86

bccaf86 Merge pull request #150
2a53a47 Merge pull request #151
5f5a31f Merge pull request #149
3907277 Merge pull request #142
a3e0611 Enable tests in x86 travis builds
45da235 x86 builder
8bb0e93 Merge pull request #155
971fe81 build: fix openssl detection for cross builds
f22d73e Explicitly access %0..%2 as 64-bit so we use the right registers for x32 ABI
e66d4d6 Avoid the stack in assembly and use explicit registers
cf7b2b4 Fix ECDSA message hashes to 32 bytes
056ad31 Really compile with -O3 by default
74ad63a Merge pull request #146
9000458 Merge pull request #145
1f46b00 build: fix __builtin_expect detection for clang
aaba2e0 Merge pull request #136
8a0775c Merge pull request #144
ee1eaa7 Merge pull request #141
c88e2b8 Compile with -O3 by default
6558a26 Make the benchmarks print out stats
000bdf6 Rename bench_verify to bench_recovery
7c6fed2 Add a few more additional tests.
992e03b travis: add clang to the test matrix
b43b79a Merge pull request #143
e06a924 Include time.h header for time().
8d11164 Add some additional tests.
3545627 Merge pull request #118
6a9901e Merge pull request #137
376b28b Merge pull request #128
1728806 Merge pull request #138
a5759c5 Check return value of malloc
39bd94d Variable time normalize
ad86bdf Merge pull request #140
54b768c Another redundant secp256k1_fe_normalize
69dcaab Merge pull request #139
1c29f2e Remove redundant secp256k1_fe_normalize from secp256k1_gej_add_ge_var.
2b9388b Remove unused secp256k1_fe_inv_all
f461b76 Allocate precomputation arrays on the heap
b2c9681 Make {mul,sqr}_inner use the same argument order as {mul,sqr}
6793505 Convert YASM code into inline assembly
f048615 Rewrite field assembly to match the C version
3ce74b1 Tweak precomputed table size for G

git-subtree-dir: src/secp256k1
git-subtree-split: bccaf86caa9c44166e5a66600b742c516e03c3f0
pull/1/head
Pieter Wuille 8 years ago
parent
commit
ecae2acb06
  1. 19
      .travis.yml
  2. 30
      Makefile.am
  3. 51
      build-aux/m4/bitcoin_secp.m4
  4. 11
      configure.ac
  5. 32
      include/secp256k1.h
  6. 57
      nasm_lt.sh
  7. 37
      src/bench.h
  8. 45
      src/bench_inv.c
  9. 46
      src/bench_recover.c
  10. 49
      src/bench_sign.c
  11. 59
      src/bench_verify.c
  12. 8
      src/ecdsa_impl.h
  13. 8
      src/eckey_impl.h
  14. 6
      src/ecmult_gen_impl.h
  15. 17
      src/ecmult_impl.h
  16. 8
      src/field.h
  17. 64
      src/field_10x26_impl.h
  18. 469
      src/field_5x52_asm.asm
  19. 495
      src/field_5x52_asm_impl.h
  20. 48
      src/field_5x52_impl.h
  21. 4
      src/field_5x52_int128_impl.h
  22. 4
      src/field_gmp_impl.h
  23. 32
      src/field_impl.h
  24. 7
      src/group.h
  25. 59
      src/group_impl.h
  26. 2
      src/scalar_impl.h
  27. 38
      src/secp256k1.c
  28. 332
      src/tests.c
  29. 6
      src/util.h

19
.travis.yml

@ -1,12 +1,14 @@ @@ -1,12 +1,14 @@
language: cpp
compiler: gcc
language: c
compiler:
- clang
- gcc
install:
- sudo apt-get install -qq libssl-dev
- if [ "$BIGNUM" = "gmp" -o "$BIGNUM" = "auto" -o "$FIELD" = "gmp" ]; then sudo apt-get install -qq libgmp-dev; fi
- if [ "$FIELD" = "64bit_asm" ]; then sudo apt-get install -qq yasm; fi
- if [ "$BIGNUM" = "gmp" -o "$BIGNUM" = "auto" -o "$FIELD" = "gmp" ]; then sudo apt-get install --no-install-recommends --no-upgrade -qq libgmp-dev; fi
- if [ -n "$EXTRAPACKAGES" ]; then sudo apt-get update && sudo apt-get install --no-install-recommends --no-upgrade $EXTRAPACKAGES; fi
env:
global:
- FIELD=auto BIGNUM=auto SCALAR=auto ENDOMORPHISM=no BUILD=check EXTRAFLAGS=
- FIELD=auto BIGNUM=auto SCALAR=auto ENDOMORPHISM=no BUILD=check EXTRAFLAGS= HOST= EXTRAPACKAGES=
matrix:
- SCALAR=32bit
- SCALAR=64bit
@ -22,6 +24,11 @@ env: @@ -22,6 +24,11 @@ env:
- BIGNUM=none ENDOMORPHISM=yes
- BUILD=distcheck
- EXTRAFLAGS=CFLAGS=-DDETERMINISTIC
- HOST=i686-linux-gnu EXTRAPACKAGES="gcc-multilib"
- HOST=i686-linux-gnu EXTRAPACKAGES="gcc-multilib" ENDOMORPHISM=yes
before_script: ./autogen.sh
script: ./configure --enable-endomorphism=$ENDOMORPHISM --with-field=$FIELD --with-bignum=$BIGNUM --with-scalar=$SCALAR $EXTRAFLAGS && make -j2 $BUILD
script:
- if [ -n "$HOST" ]; then export USE_HOST="--host=$HOST"; fi
- if [ "x$HOST" = "xi686-linux-gnu" ]; then export CC="$CC -m32"; fi
- ./configure --enable-endomorphism=$ENDOMORPHISM --with-field=$FIELD --with-bignum=$BIGNUM --with-scalar=$SCALAR $EXTRAFLAGS $USE_HOST && make -j2 $BUILD
os: linux

30
Makefile.am

@ -1,12 +1,6 @@ @@ -1,12 +1,6 @@
ACLOCAL_AMFLAGS = -I build-aux/m4
lib_LTLIBRARIES = libsecp256k1.la
if USE_ASM
COMMON_LIB = libsecp256k1_common.la
else
COMMON_LIB =
endif
noinst_LTLIBRARIES = $(COMMON_LIB)
include_HEADERS = include/secp256k1.h
noinst_HEADERS =
noinst_HEADERS += src/scalar.h
@ -43,30 +37,30 @@ noinst_HEADERS += src/field_gmp.h @@ -43,30 +37,30 @@ noinst_HEADERS += src/field_gmp.h
noinst_HEADERS += src/field_gmp_impl.h
noinst_HEADERS += src/field.h
noinst_HEADERS += src/field_impl.h
noinst_HEADERS += src/bench.h
pkgconfigdir = $(libdir)/pkgconfig
pkgconfig_DATA = libsecp256k1.pc
if USE_ASM
libsecp256k1_common_la_SOURCES = src/field_5x52_asm.asm
endif
libsecp256k1_la_SOURCES = src/secp256k1.c
libsecp256k1_la_CPPFLAGS = -I$(top_srcdir)/include $(SECP_INCLUDES)
libsecp256k1_la_LIBADD = $(COMMON_LIB) $(SECP_LIBS)
libsecp256k1_la_LIBADD = $(SECP_LIBS)
noinst_PROGRAMS =
if USE_BENCHMARK
noinst_PROGRAMS += bench_verify bench_sign bench_inv
noinst_PROGRAMS += bench_verify bench_recover bench_sign bench_inv
bench_verify_SOURCES = src/bench_verify.c
bench_verify_LDADD = libsecp256k1.la $(SECP_LIBS)
bench_verify_LDFLAGS = -static
bench_recover_SOURCES = src/bench_recover.c
bench_recover_LDADD = libsecp256k1.la $(SECP_LIBS)
bench_recover_LDFLAGS = -static
bench_sign_SOURCES = src/bench_sign.c
bench_sign_LDADD = libsecp256k1.la $(SECP_LIBS)
bench_sign_LDFLAGS = -static
bench_inv_SOURCES = src/bench_inv.c
bench_inv_LDADD = $(COMMON_LIB) $(SECP_LIBS)
bench_inv_LDADD = $(SECP_LIBS)
bench_inv_LDFLAGS = -static
bench_inv_CPPFLAGS = $(SECP_INCLUDES)
endif
@ -75,15 +69,9 @@ if USE_TESTS @@ -75,15 +69,9 @@ if USE_TESTS
noinst_PROGRAMS += tests
tests_SOURCES = src/tests.c
tests_CPPFLAGS = -DVERIFY $(SECP_INCLUDES) $(SECP_TEST_INCLUDES)
tests_LDADD = $(COMMON_LIB) $(SECP_LIBS) $(SECP_TEST_LIBS)
tests_LDADD = $(SECP_LIBS) $(SECP_TEST_LIBS)
tests_LDFLAGS = -static
TESTS = tests
endif
EXTRA_DIST = autogen.sh nasm_lt.sh
#x86_64 only
if USE_ASM
.asm.lo:
$(LIBTOOL) --mode=compile --tag YASM $(srcdir)/nasm_lt.sh $(YASM) -f $(YASM_BINFMT) $(YAFLAGS) -I$(srcdir) -I. $< -o $@
endif
EXTRA_DIST = autogen.sh

51
build-aux/m4/bitcoin_secp.m4

@ -11,38 +11,16 @@ fi @@ -11,38 +11,16 @@ fi
dnl
AC_DEFUN([SECP_64BIT_ASM_CHECK],[
if test x"$host_cpu" == x"x86_64"; then
AC_CHECK_PROG(YASM, yasm, yasm)
else
if test x"$set_field" = x"64bit_asm"; then
AC_MSG_ERROR([$set_field field support explicitly requested but is not compatible with this host])
fi
fi
if test x$YASM = x; then
if test x"$set_field" = x"64bit_asm"; then
AC_MSG_ERROR([$set_field field support explicitly requested but yasm was not found])
fi
has_64bit_asm=no
else
case x"$host_os" in
xdarwin*)
YASM_BINFMT=macho64
;;
x*-gnux32)
YASM_BINFMT=elfx32
;;
*)
YASM_BINFMT=elf64
;;
esac
if $YASM -f help | grep -q $YASM_BINFMT; then
has_64bit_asm=yes
else
if test x"$set_field" = x"64bit_asm"; then
AC_MSG_ERROR([$set_field field support explicitly requested but yasm doesn't support $YASM_BINFMT format])
fi
AC_MSG_WARN([yasm too old for $YASM_BINFMT format])
has_64bit_asm=no
AC_MSG_CHECKING(for x86_64 assembly availability)
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
#include <stdint.h>]],[[
uint64_t a = 11, tmp;
__asm__ __volatile__("movq $0x100000000,%1; mulq %%rsi" : "+a"(a) : "S"(tmp) : "cc", "%rdx");
]])],[has_64bit_asm=yes],[has_64bit_asm=no])
AC_MSG_RESULT([$has_64bit_asm])
if test x"$set_field" == x"64bit_asm"; then
if test x"$has_64bit_asm" == x"no"; then
AC_MSG_ERROR([$set_field field support explicitly requested but no x86_64 assembly available])
fi
fi
])
@ -52,8 +30,13 @@ AC_DEFUN([SECP_OPENSSL_CHECK],[ @@ -52,8 +30,13 @@ AC_DEFUN([SECP_OPENSSL_CHECK],[
if test x"$use_pkgconfig" = x"yes"; then
: #NOP
m4_ifdef([PKG_CHECK_MODULES],[
PKG_CHECK_MODULES([CRYPTO], [libcrypto], [has_libcrypto=yes; AC_DEFINE(HAVE_LIBCRYPTO,1,[Define this symbol if libcrypto is installed])],[has_libcrypto=no])
: #NOP
PKG_CHECK_MODULES([CRYPTO], [libcrypto], [has_libcrypto=yes],[has_libcrypto=no])
if test x"$has_libcrypto" = x"yes"; then
TEMP_LIBS="$LIBS"
LIBS="$LIBS $CRYPTO_LIBS"
AC_CHECK_LIB(crypto, main,[AC_DEFINE(HAVE_LIBCRYPTO,1,[Define this symbol if libcrypto is installed])],[has_libcrypto=no])
LIBS="$TEMP_LIBS"
fi
])
else
AC_CHECK_HEADER(openssl/crypto.h,[AC_CHECK_LIB(crypto, main,[has_libcrypto=yes; CRYPTO_LIBS=-lcrypto; AC_DEFINE(HAVE_LIBCRYPTO,1,[Define this symbol if libcrypto is installed])]

11
configure.ac

@ -18,6 +18,10 @@ AC_PATH_TOOL(AR, ar) @@ -18,6 +18,10 @@ AC_PATH_TOOL(AR, ar)
AC_PATH_TOOL(RANLIB, ranlib)
AC_PATH_TOOL(STRIP, strip)
if test "x$CFLAGS" = "x"; then
CFLAGS="-O3 -g"
fi
AC_PROG_CC_C99
if test x"$ac_cv_prog_cc_c99" == x"no"; then
AC_MSG_ERROR([c99 compiler support required])
@ -103,7 +107,11 @@ AC_ARG_WITH([scalar], [AS_HELP_STRING([--with-scalar=64bit|32bit|auto], @@ -103,7 +107,11 @@ AC_ARG_WITH([scalar], [AS_HELP_STRING([--with-scalar=64bit|32bit|auto],
AC_CHECK_TYPES([__int128])
AC_CHECK_DECL(__builtin_expect,AC_DEFINE(HAVE_BUILTIN_EXPECT,1,[Define this symbol if __builtin_expect is available]),,)
AC_MSG_CHECKING([for __builtin_expect])
AC_COMPILE_IFELSE([AC_LANG_SOURCE([[void myfunc() {__builtin_expect(0,0);}]])],
[ AC_MSG_RESULT([yes]);AC_DEFINE(HAVE_BUILTIN_EXPECT,1,[Define this symbol if __builtin_expect is available]) ],
[ AC_MSG_RESULT([no])
])
if test x"$req_field" = x"auto"; then
SECP_64BIT_ASM_CHECK
@ -283,7 +291,6 @@ AC_SUBST(SECP_INCLUDES) @@ -283,7 +291,6 @@ AC_SUBST(SECP_INCLUDES)
AC_SUBST(SECP_LIBS)
AC_SUBST(SECP_TEST_LIBS)
AC_SUBST(SECP_TEST_INCLUDES)
AC_SUBST(YASM_BINFMT)
AM_CONDITIONAL([USE_ASM], [test x"$set_field" == x"64bit_asm"])
AM_CONDITIONAL([USE_TESTS], [test x"$use_tests" != x"no"])
AM_CONDITIONAL([USE_BENCHMARK], [test x"$use_benchmark" != x"no"])

32
include/secp256k1.h

@ -62,8 +62,7 @@ void secp256k1_stop(void); @@ -62,8 +62,7 @@ void secp256k1_stop(void);
* 0: incorrect signature
* -1: invalid public key
* -2: invalid signature
* In: msg: the message being verified (cannot be NULL)
* msglen: the length of the message (at most 32)
* In: msg32: the 32-byte message hash being verified (cannot be NULL)
* sig: the signature being verified (cannot be NULL)
* siglen: the length of the signature
* pubkey: the public key to verify with (cannot be NULL)
@ -71,19 +70,17 @@ void secp256k1_stop(void); @@ -71,19 +70,17 @@ void secp256k1_stop(void);
* Requires starting using SECP256K1_START_VERIFY.
*/
SECP256K1_WARN_UNUSED_RESULT int secp256k1_ecdsa_verify(
const unsigned char *msg,
int msglen,
const unsigned char *msg32,
const unsigned char *sig,
int siglen,
const unsigned char *pubkey,
int pubkeylen
) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(3) SECP256K1_ARG_NONNULL(5);
) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(4);
/** Create an ECDSA signature.
* Returns: 1: signature created
* 0: nonce invalid, try another one
* In: msg: the message being signed (cannot be NULL)
* msglen: the length of the message being signed (at most 32)
* In: msg32: the 32-byte message hash being signed (cannot be NULL)
* seckey: pointer to a 32-byte secret key (cannot be NULL, assumed to be valid)
* nonce: pointer to a 32-byte nonce (cannot be NULL, generated with a cryptographic PRNG)
* Out: sig: pointer to an array where the signature will be placed (cannot be NULL)
@ -92,19 +89,17 @@ SECP256K1_WARN_UNUSED_RESULT int secp256k1_ecdsa_verify( @@ -92,19 +89,17 @@ SECP256K1_WARN_UNUSED_RESULT int secp256k1_ecdsa_verify(
* Requires starting using SECP256K1_START_SIGN.
*/
SECP256K1_WARN_UNUSED_RESULT int secp256k1_ecdsa_sign(
const unsigned char *msg,
int msglen,
const unsigned char *msg32,
unsigned char *sig,
int *siglen,
const unsigned char *seckey,
const unsigned char *nonce
) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(3) SECP256K1_ARG_NONNULL(4) SECP256K1_ARG_NONNULL(5) SECP256K1_ARG_NONNULL(6);
) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3) SECP256K1_ARG_NONNULL(4) SECP256K1_ARG_NONNULL(5);
/** Create a compact ECDSA signature (64 byte + recovery id).
* Returns: 1: signature created
* 0: nonce invalid, try another one
* In: msg: the message being signed (cannot be NULL)
* msglen: the length of the message being signed (at most 32)
* In: msg32: the 32-byte message hash being signed (cannot be NULL)
* seckey: pointer to a 32-byte secret key (cannot be NULL, assumed to be valid)
* nonce: pointer to a 32-byte nonce (cannot be NULL, generated with a cryptographic PRNG)
* Out: sig: pointer to a 64-byte array where the signature will be placed (cannot be NULL)
@ -112,19 +107,17 @@ SECP256K1_WARN_UNUSED_RESULT int secp256k1_ecdsa_sign( @@ -112,19 +107,17 @@ SECP256K1_WARN_UNUSED_RESULT int secp256k1_ecdsa_sign(
* Requires starting using SECP256K1_START_SIGN.
*/
SECP256K1_WARN_UNUSED_RESULT int secp256k1_ecdsa_sign_compact(
const unsigned char *msg,
int msglen,
const unsigned char *msg32,
unsigned char *sig64,
const unsigned char *seckey,
const unsigned char *nonce,
int *recid
) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(3) SECP256K1_ARG_NONNULL(4) SECP256K1_ARG_NONNULL(5);
) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3) SECP256K1_ARG_NONNULL(4);
/** Recover an ECDSA public key from a compact signature.
* Returns: 1: public key successfully recovered (which guarantees a correct signature).
* 0: otherwise.
* In: msg: the message assumed to be signed (cannot be NULL)
* msglen: the length of the message (at most 32)
* In: msg32: the 32-byte message hash assumed to be signed (cannot be NULL)
* sig64: signature as 64 byte array (cannot be NULL)
* compressed: whether to recover a compressed or uncompressed pubkey
* recid: the recovery id (0-3, as returned by ecdsa_sign_compact)
@ -133,14 +126,13 @@ SECP256K1_WARN_UNUSED_RESULT int secp256k1_ecdsa_sign_compact( @@ -133,14 +126,13 @@ SECP256K1_WARN_UNUSED_RESULT int secp256k1_ecdsa_sign_compact(
* Requires starting using SECP256K1_START_VERIFY.
*/
SECP256K1_WARN_UNUSED_RESULT int secp256k1_ecdsa_recover_compact(
const unsigned char *msg,
int msglen,
const unsigned char *msg32,
const unsigned char *sig64,
unsigned char *pubkey,
int *pubkeylen,
int compressed,
int recid
) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(3) SECP256K1_ARG_NONNULL(4) SECP256K1_ARG_NONNULL(5);
) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3) SECP256K1_ARG_NONNULL(4);
/** Verify an ECDSA secret key.
* Returns: 1: secret key is valid

57
nasm_lt.sh

@ -1,57 +0,0 @@ @@ -1,57 +0,0 @@
#! /bin/sh
command=""
infile=""
o_opt=no
pic=no
while [ $# -gt 0 ]; do
case "$1" in
-DPIC|-fPIC|-fpic|-Kpic|-KPIC)
if [ "$pic" != "yes" ] ; then
command="$command -DPIC"
pic=yes
fi
;;
-f|-fbin|-faout|-faoutb|-fcoff|-felf|-felf64|-fas86| \
-fobj|-fwin32|-fwin64|-frdf|-fieee|-fmacho|-fmacho64)
# it's a file format specifier for nasm.
command="$command $1"
;;
-f*)
# maybe a code-generation flag for gcc.
;;
-[Ii]*)
incdir=`echo "$1" | sed 's/^-[Ii]//'`
if [ "x$incdir" = x -a "x$2" != x ] ; then
case "$2" in
-*) ;;
*) incdir="$2"; shift;;
esac
fi
if [ "x$incdir" != x ] ; then
# In the case of NASM, the trailing slash is necessary.
incdir=`echo "$incdir" | sed 's%/*$%/%'`
command="$command -I$incdir"
fi
;;
-o*)
o_opt=yes
command="$command $1"
;;
*.asm)
infile=$1
command="$command $1"
;;
*)
command="$command $1"
;;
esac
shift
done
if [ "$o_opt" != yes ] ; then
# By default, NASM creates an output file
# in the same directory as the input file.
outfile="-o `echo $infile | sed -e 's%^.*/%%' -e 's%\.[^.]*$%%'`.o"
command="$command $outfile"
fi
echo $command
exec $command

37
src/bench.h

@ -0,0 +1,37 @@ @@ -0,0 +1,37 @@
/**********************************************************************
* Copyright (c) 2014 Pieter Wuille *
* Distributed under the MIT software license, see the accompanying *
* file COPYING or http://www.opensource.org/licenses/mit-license.php.*
**********************************************************************/
#ifndef _SECP256K1_BENCH_H_
#define _SECP256K1_BENCH_H_
#include <stdio.h>
#include <math.h>
#include "sys/time.h"
static double gettimedouble(void) {
struct timeval tv;
gettimeofday(&tv, NULL);
return tv.tv_usec * 0.000001 + tv.tv_sec;
}
void run_benchmark(void (*benchmark)(void*), void (*setup)(void*), void (*teardown)(void*), void* data, int count, int iter) {
double min = HUGE_VAL;
double sum = 0.0;
double max = 0.0;
for (int i = 0; i < count; i++) {
if (setup) setup(data);
double begin = gettimedouble();
benchmark(data);
double total = gettimedouble() - begin;
if (teardown) teardown(data);
if (total < min) min = total;
if (total > max) max = total;
sum += total;
}
printf("min %.3fus / avg %.3fus / max %.3fus\n", min * 1000000.0 / iter, (sum / count) * 1000000.0 / iter, max * 1000000.0 / iter);
}
#endif

45
src/bench_inv.c

@ -12,30 +12,41 @@ @@ -12,30 +12,41 @@
#include "field_impl.h"
#include "group_impl.h"
#include "scalar_impl.h"
#include "bench.h"
typedef struct {
secp256k1_scalar_t base, x;
} bench_inv_t;
void bench_inv_setup(void* arg) {
bench_inv_t *data = (bench_inv_t*)arg;
int main(void) {
static const unsigned char init[32] = {
0x02, 0x03, 0x05, 0x07, 0x0b, 0x0d, 0x11, 0x13,
0x17, 0x1d, 0x1f, 0x25, 0x29, 0x2b, 0x2f, 0x35,
0x3b, 0x3d, 0x43, 0x47, 0x49, 0x4f, 0x53, 0x59,
0x61, 0x65, 0x67, 0x6b, 0x6d, 0x71, 0x7f, 0x83
};
static const unsigned char fini[32] = {
0xba, 0x28, 0x58, 0xd8, 0xaa, 0x11, 0xd6, 0xf2,
0xfa, 0xce, 0x50, 0xb1, 0x67, 0x19, 0xb1, 0xa6,
0xe0, 0xaa, 0x84, 0x53, 0xf6, 0x80, 0xfc, 0x23,
0x88, 0x3c, 0xd6, 0x74, 0x9f, 0x27, 0x09, 0x03
};
secp256k1_ge_start();
secp256k1_scalar_t base, x;
secp256k1_scalar_set_b32(&base, init, NULL);
secp256k1_scalar_set_b32(&x, init, NULL);
for (int i=0; i<1000000; i++) {
secp256k1_scalar_inverse(&x, &x);
secp256k1_scalar_add(&x, &x, &base);
secp256k1_scalar_set_b32(&data->base, init, NULL);
secp256k1_scalar_set_b32(&data->x, init, NULL);
}
void bench_inv(void* arg) {
bench_inv_t *data = (bench_inv_t*)arg;
for (int i=0; i<20000; i++) {
secp256k1_scalar_inverse(&data->x, &data->x);
secp256k1_scalar_add(&data->x, &data->x, &data->base);
}
unsigned char res[32];
secp256k1_scalar_get_b32(res, &x);
CHECK(memcmp(res, fini, 32) == 0);
}
int main(void) {
secp256k1_ge_start();
bench_inv_t data;
run_benchmark(bench_inv, bench_inv_setup, NULL, &data, 10, 20000);
secp256k1_ge_stop();
return 0;
}

46
src/bench_recover.c

@ -0,0 +1,46 @@ @@ -0,0 +1,46 @@
/**********************************************************************
* Copyright (c) 2014 Pieter Wuille *
* Distributed under the MIT software license, see the accompanying *
* file COPYING or http://www.opensource.org/licenses/mit-license.php.*
**********************************************************************/
#include "include/secp256k1.h"
#include "util.h"
#include "bench.h"
typedef struct {
unsigned char msg[32];
unsigned char sig[64];
} bench_recover_t;
void bench_recover(void* arg) {
bench_recover_t *data = (bench_recover_t*)arg;
unsigned char pubkey[33];
for (int i=0; i<20000; i++) {
int pubkeylen = 33;
CHECK(secp256k1_ecdsa_recover_compact(data->msg, data->sig, pubkey, &pubkeylen, 1, i % 2));
for (int j = 0; j < 32; j++) {
data->sig[j + 32] = data->msg[j]; /* Move former message to S. */
data->msg[j] = data->sig[j]; /* Move former R to message. */
data->sig[j] = pubkey[j + 1]; /* Move recovered pubkey X coordinate to R (which must be a valid X coordinate). */
}
}
}
void bench_recover_setup(void* arg) {
bench_recover_t *data = (bench_recover_t*)arg;
for (int i = 0; i < 32; i++) data->msg[i] = 1 + i;
for (int i = 0; i < 64; i++) data->sig[i] = 65 + i;
}
int main(void) {
secp256k1_start(SECP256K1_START_VERIFY);
bench_recover_t data;
run_benchmark(bench_recover, bench_recover_setup, NULL, &data, 10, 20000);
secp256k1_stop();
return 0;
}

49
src/bench_sign.c

@ -3,46 +3,45 @@ @@ -3,46 +3,45 @@
* Distributed under the MIT software license, see the accompanying *
* file COPYING or http://www.opensource.org/licenses/mit-license.php.*
**********************************************************************/
#include <stdio.h>
#include <string.h>
#include "include/secp256k1.h"
#include "util.h"
#include "bench.h"
int main(void) {
secp256k1_start(SECP256K1_START_SIGN);
typedef struct {
unsigned char msg[32];
unsigned char nonce[32];
unsigned char key[32];
} bench_sign_t;
for (int i = 0; i < 32; i++) msg[i] = i + 1;
for (int i = 0; i < 32; i++) nonce[i] = i + 33;
for (int i = 0; i < 32; i++) key[i] = i + 65;
static void bench_sign_setup(void* arg) {
bench_sign_t *data = (bench_sign_t*)arg;
unsigned char sig[64];
for (int i = 0; i < 32; i++) data->msg[i] = i + 1;
for (int i = 0; i < 32; i++) data->nonce[i] = i + 33;
for (int i = 0; i < 32; i++) data->key[i] = i + 65;
}
static void bench_sign(void* arg) {
bench_sign_t *data = (bench_sign_t*)arg;
for (int i=0; i<1000000; i++) {
unsigned char sig[64];
for (int i=0; i<20000; i++) {
int recid = 0;
CHECK(secp256k1_ecdsa_sign_compact(msg, 32, sig, key, nonce, &recid));
CHECK(secp256k1_ecdsa_sign_compact(data->msg, sig, data->key, data->nonce, &recid));
for (int j = 0; j < 32; j++) {
nonce[j] = key[j]; /* Move former key to nonce */
msg[j] = sig[j]; /* Move former R to message. */
key[j] = sig[j + 32]; /* Move former S to key. */
data->nonce[j] = data->key[j]; /* Move former key to nonce */
data->msg[j] = sig[j]; /* Move former R to message. */
data->key[j] = sig[j + 32]; /* Move former S to key. */
}
}
}
int main(void) {
secp256k1_start(SECP256K1_START_SIGN);
static const unsigned char fini[64] = {
0x92, 0x03, 0xef, 0xf1, 0x58, 0x0b, 0x49, 0x8d,
0x22, 0x3d, 0x49, 0x0e, 0xbf, 0x26, 0x50, 0x0e,
0x2d, 0x62, 0x90, 0xd7, 0x82, 0xbd, 0x3d, 0x5c,
0xa9, 0x10, 0xa5, 0x49, 0xb1, 0xd8, 0x8c, 0xc0,
0x5b, 0x5e, 0x9e, 0x68, 0x51, 0x3d, 0xe8, 0xec,
0x82, 0x30, 0x82, 0x88, 0x8c, 0xfd, 0xe7, 0x71,
0x15, 0x92, 0xfc, 0x14, 0x59, 0x78, 0x31, 0xb3,
0xf6, 0x07, 0x91, 0x18, 0x00, 0x8d, 0x4c, 0xb2
};
CHECK(memcmp(sig, fini, 64) == 0);
bench_sign_t data;
run_benchmark(bench_sign, bench_sign_setup, NULL, &data, 10, 20000);
secp256k1_stop();
return 0;

59
src/bench_verify.c

@ -9,35 +9,46 @@ @@ -9,35 +9,46 @@
#include "include/secp256k1.h"
#include "util.h"
#include "bench.h"
int main(void) {
secp256k1_start(SECP256K1_START_VERIFY);
typedef struct {
unsigned char msg[32];
unsigned char sig[64];
for (int i = 0; i < 32; i++) msg[i] = 1 + i;
for (int i = 0; i < 64; i++) sig[i] = 65 + i;
unsigned char key[32];
unsigned char nonce[32];
unsigned char sig[72];
int siglen;
unsigned char pubkey[33];
for (int i=0; i<1000000; i++) {
int pubkeylen = 33;
CHECK(secp256k1_ecdsa_recover_compact(msg, 32, sig, pubkey, &pubkeylen, 1, i % 2));
for (int j = 0; j < 32; j++) {
sig[j + 32] = msg[j]; /* Move former message to S. */
msg[j] = sig[j]; /* Move former R to message. */
sig[j] = pubkey[j + 1]; /* Move recovered pubkey X coordinate to R (which must be a valid X coordinate). */
}
int pubkeylen;
} benchmark_verify_t;
static void benchmark_verify(void* arg) {
benchmark_verify_t* data = (benchmark_verify_t*)arg;
for (int i=0; i<20000; i++) {
data->sig[data->siglen - 1] ^= (i & 0xFF);
data->sig[data->siglen - 2] ^= ((i >> 8) & 0xFF);
data->sig[data->siglen - 3] ^= ((i >> 16) & 0xFF);
CHECK(secp256k1_ecdsa_verify(data->msg, data->sig, data->siglen, data->pubkey, data->pubkeylen) == (i == 0));
data->sig[data->siglen - 1] ^= (i & 0xFF);
data->sig[data->siglen - 2] ^= ((i >> 8) & 0xFF);
data->sig[data->siglen - 3] ^= ((i >> 16) & 0xFF);
}
}
int main(void) {
secp256k1_start(SECP256K1_START_VERIFY | SECP256K1_START_SIGN);
benchmark_verify_t data;
for (int i = 0; i < 32; i++) data.msg[i] = 1 + i;
for (int i = 0; i < 32; i++) data.key[i] = 33 + i;
for (int i = 0; i < 32; i++) data.nonce[i] = 65 + i;
data.siglen = 72;
CHECK(secp256k1_ecdsa_sign(data.msg, data.sig, &data.siglen, data.key, data.nonce));
data.pubkeylen = 33;
CHECK(secp256k1_ec_pubkey_create(data.pubkey, &data.pubkeylen, data.key, 1));
static const unsigned char fini[33] = {
0x02,
0x52, 0x63, 0xae, 0x9a, 0x9d, 0x47, 0x1f, 0x1a,
0xb2, 0x36, 0x65, 0x89, 0x11, 0xe7, 0xcc, 0x86,
0xa3, 0xab, 0x97, 0xb6, 0xf1, 0xaf, 0xfd, 0x8f,
0x9b, 0x38, 0xb6, 0x18, 0x55, 0xe5, 0xc2, 0x43
};
CHECK(memcmp(fini, pubkey, 33) == 0);
run_benchmark(benchmark_verify, NULL, NULL, &data, 10, 20000);
secp256k1_stop();
return 0;

8
src/ecdsa_impl.h

@ -27,7 +27,7 @@ static void secp256k1_ecdsa_start(void) { @@ -27,7 +27,7 @@ static void secp256k1_ecdsa_start(void) {
return;
/* Allocate. */
secp256k1_ecdsa_consts_t *ret = (secp256k1_ecdsa_consts_t*)malloc(sizeof(secp256k1_ecdsa_consts_t));
secp256k1_ecdsa_consts_t *ret = (secp256k1_ecdsa_consts_t*)checked_malloc(sizeof(secp256k1_ecdsa_consts_t));
static const unsigned char order[] = {
0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
@ -38,7 +38,7 @@ static void secp256k1_ecdsa_start(void) { @@ -38,7 +38,7 @@ static void secp256k1_ecdsa_start(void) {
secp256k1_fe_set_b32(&ret->order_as_fe, order);
secp256k1_fe_negate(&ret->p_minus_order, &ret->order_as_fe, 1);
secp256k1_fe_normalize(&ret->p_minus_order);
secp256k1_fe_normalize_var(&ret->p_minus_order);
/* Set the global pointer. */
secp256k1_ecdsa_consts = ret;
@ -122,7 +122,7 @@ static int secp256k1_ecdsa_sig_recompute(secp256k1_scalar_t *r2, const secp256k1 @@ -122,7 +122,7 @@ static int secp256k1_ecdsa_sig_recompute(secp256k1_scalar_t *r2, const secp256k1
secp256k1_gej_t pr; secp256k1_ecmult(&pr, &pubkeyj, &u2, &u1);
if (!secp256k1_gej_is_infinity(&pr)) {
secp256k1_fe_t xr; secp256k1_gej_get_x_var(&xr, &pr);
secp256k1_fe_normalize(&xr);
secp256k1_fe_normalize_var(&xr);
unsigned char xrb[32]; secp256k1_fe_get_b32(xrb, &xr);
secp256k1_scalar_set_b32(r2, xrb, NULL);
ret = 1;
@ -144,7 +144,7 @@ static int secp256k1_ecdsa_sig_recover(const secp256k1_ecdsa_sig_t *sig, secp256 @@ -144,7 +144,7 @@ static int secp256k1_ecdsa_sig_recover(const secp256k1_ecdsa_sig_t *sig, secp256
secp256k1_fe_add(&fx, &secp256k1_ecdsa_consts->order_as_fe);
}
secp256k1_ge_t x;
if (!secp256k1_ge_set_xo(&x, &fx, recid & 1))
if (!secp256k1_ge_set_xo_var(&x, &fx, recid & 1))
return 0;
secp256k1_gej_t xj;
secp256k1_gej_set_ge(&xj, &x);

8
src/eckey_impl.h

@ -17,7 +17,7 @@ @@ -17,7 +17,7 @@
static int secp256k1_eckey_pubkey_parse(secp256k1_ge_t *elem, const unsigned char *pub, int size) {
if (size == 33 && (pub[0] == 0x02 || pub[0] == 0x03)) {
secp256k1_fe_t x;
return secp256k1_fe_set_b32(&x, pub+1) && secp256k1_ge_set_xo(elem, &x, pub[0] == 0x03);
return secp256k1_fe_set_b32(&x, pub+1) && secp256k1_ge_set_xo_var(elem, &x, pub[0] == 0x03);
} else if (size == 65 && (pub[0] == 0x04 || pub[0] == 0x06 || pub[0] == 0x07)) {
secp256k1_fe_t x, y;
if (!secp256k1_fe_set_b32(&x, pub+1) || !secp256k1_fe_set_b32(&y, pub+33)) {
@ -26,7 +26,7 @@ static int secp256k1_eckey_pubkey_parse(secp256k1_ge_t *elem, const unsigned cha @@ -26,7 +26,7 @@ static int secp256k1_eckey_pubkey_parse(secp256k1_ge_t *elem, const unsigned cha
secp256k1_ge_set_xy(elem, &x, &y);
if ((pub[0] == 0x06 || pub[0] == 0x07) && secp256k1_fe_is_odd(&y) != (pub[0] == 0x07))
return 0;
return secp256k1_ge_is_valid(elem);
return secp256k1_ge_is_valid_var(elem);
} else {
return 0;
}
@ -36,8 +36,8 @@ static int secp256k1_eckey_pubkey_serialize(secp256k1_ge_t *elem, unsigned char @@ -36,8 +36,8 @@ static int secp256k1_eckey_pubkey_serialize(secp256k1_ge_t *elem, unsigned char
if (secp256k1_ge_is_infinity(elem)) {
return 0;
}
secp256k1_fe_normalize(&elem->x);
secp256k1_fe_normalize(&elem->y);
secp256k1_fe_normalize_var(&elem->x);
secp256k1_fe_normalize_var(&elem->y);
secp256k1_fe_get_b32(&pub[1], &elem->x);
if (compressed) {
*size = 33;

6
src/ecmult_gen_impl.h

@ -34,7 +34,7 @@ static void secp256k1_ecmult_gen_start(void) { @@ -34,7 +34,7 @@ static void secp256k1_ecmult_gen_start(void) {
return;
/* Allocate the precomputation table. */
secp256k1_ecmult_gen_consts_t *ret = (secp256k1_ecmult_gen_consts_t*)malloc(sizeof(secp256k1_ecmult_gen_consts_t));
secp256k1_ecmult_gen_consts_t *ret = (secp256k1_ecmult_gen_consts_t*)checked_malloc(sizeof(secp256k1_ecmult_gen_consts_t));
/* get the generator */
const secp256k1_ge_t *g = &secp256k1_ge_consts->g;
@ -47,7 +47,7 @@ static void secp256k1_ecmult_gen_start(void) { @@ -47,7 +47,7 @@ static void secp256k1_ecmult_gen_start(void) {
secp256k1_fe_t nums_x;
VERIFY_CHECK(secp256k1_fe_set_b32(&nums_x, nums_b32));
secp256k1_ge_t nums_ge;
VERIFY_CHECK(secp256k1_ge_set_xo(&nums_ge, &nums_x, 0));
VERIFY_CHECK(secp256k1_ge_set_xo_var(&nums_ge, &nums_x, 0));
secp256k1_gej_set_ge(&nums_gej, &nums_ge);
/* Add G to make the bits in x uniformly distributed. */
secp256k1_gej_add_ge_var(&nums_gej, &nums_gej, g);
@ -73,7 +73,7 @@ static void secp256k1_ecmult_gen_start(void) { @@ -73,7 +73,7 @@ static void secp256k1_ecmult_gen_start(void) {
secp256k1_gej_double_var(&numsbase, &numsbase);
if (j == 62) {
/* In the last iteration, numsbase is (1 - 2^j) * nums instead. */
secp256k1_gej_neg(&numsbase, &numsbase);
secp256k1_gej_neg_var(&numsbase, &numsbase);
secp256k1_gej_add_var(&numsbase, &numsbase, &nums_gej);
}
}

17
src/ecmult_impl.h

@ -15,11 +15,13 @@ @@ -15,11 +15,13 @@
#define WINDOW_A 5
/** larger numbers may result in slightly better performance, at the cost of
exponentially larger precomputed tables. WINDOW_G == 14 results in 640 KiB. */
exponentially larger precomputed tables. */
#ifdef USE_ENDOMORPHISM
#define WINDOW_G 14
#else
/** Two tables for window size 15: 1.375 MiB. */
#define WINDOW_G 15
#else
/** One table for window size 16: 1.375 MiB. */
#define WINDOW_G 16
#endif
/** Fill a table 'pre' with precomputed odd multiples of a. W determines the size of the table.
@ -43,13 +45,14 @@ static void secp256k1_ecmult_table_precomp_gej_var(secp256k1_gej_t *pre, const s @@ -43,13 +45,14 @@ static void secp256k1_ecmult_table_precomp_gej_var(secp256k1_gej_t *pre, const s
static void secp256k1_ecmult_table_precomp_ge_var(secp256k1_ge_t *pre, const secp256k1_gej_t *a, int w) {
const int table_size = 1 << (w-2);
secp256k1_gej_t prej[table_size];
secp256k1_gej_t *prej = checked_malloc(sizeof(secp256k1_gej_t) * table_size);
prej[0] = *a;
secp256k1_gej_t d; secp256k1_gej_double_var(&d, a);
for (int i=1; i<table_size; i++) {
secp256k1_gej_add_var(&prej[i], &d, &prej[i-1]);
}
secp256k1_ge_set_all_gej_var(table_size, pre, prej);
free(prej);
}
/** The number of entries a table with precomputed multiples needs to have. */
@ -67,8 +70,8 @@ static void secp256k1_ecmult_table_precomp_ge_var(secp256k1_ge_t *pre, const sec @@ -67,8 +70,8 @@ static void secp256k1_ecmult_table_precomp_ge_var(secp256k1_ge_t *pre, const sec
(neg)((r), &(pre)[(-(n)-1)/2]); \
} while(0)
#define ECMULT_TABLE_GET_GEJ(r,pre,n,w) ECMULT_TABLE_GET((r),(pre),(n),(w),secp256k1_gej_neg)
#define ECMULT_TABLE_GET_GE(r,pre,n,w) ECMULT_TABLE_GET((r),(pre),(n),(w),secp256k1_ge_neg)
#define ECMULT_TABLE_GET_GEJ(r,pre,n,w) ECMULT_TABLE_GET((r),(pre),(n),(w),secp256k1_gej_neg_var)
#define ECMULT_TABLE_GET_GE(r,pre,n,w) ECMULT_TABLE_GET((r),(pre),(n),(w),secp256k1_ge_neg_var)
typedef struct {
/* For accelerating the computation of a*P + b*G: */
@ -85,7 +88,7 @@ static void secp256k1_ecmult_start(void) { @@ -85,7 +88,7 @@ static void secp256k1_ecmult_start(void) {
return;
/* Allocate the precomputation table. */
secp256k1_ecmult_consts_t *ret = (secp256k1_ecmult_consts_t*)malloc(sizeof(secp256k1_ecmult_consts_t));
secp256k1_ecmult_consts_t *ret = (secp256k1_ecmult_consts_t*)checked_malloc(sizeof(secp256k1_ecmult_consts_t));
/* get the generator */
const secp256k1_ge_t *g = &secp256k1_ge_consts->g;

8
src/field.h

@ -50,6 +50,9 @@ static void secp256k1_fe_stop(void); @@ -50,6 +50,9 @@ static void secp256k1_fe_stop(void);
/** Normalize a field element. */
static void secp256k1_fe_normalize(secp256k1_fe_t *r);
/** Normalize a field element, without constant-time guarantee. */
static void secp256k1_fe_normalize_var(secp256k1_fe_t *r);
/** Set a field element equal to a small integer. Resulting field element is normalized. */
static void secp256k1_fe_set_int(secp256k1_fe_t *r, int a);
@ -93,7 +96,7 @@ static void secp256k1_fe_sqr(secp256k1_fe_t *r, const secp256k1_fe_t *a); @@ -93,7 +96,7 @@ static void secp256k1_fe_sqr(secp256k1_fe_t *r, const secp256k1_fe_t *a);
/** Sets a field element to be the (modular) square root (if any exist) of another. Requires the
* input's magnitude to be at most 8. The output magnitude is 1 (but not guaranteed to be
* normalized). Return value indicates whether a square root was found. */
static int secp256k1_fe_sqrt(secp256k1_fe_t *r, const secp256k1_fe_t *a);
static int secp256k1_fe_sqrt_var(secp256k1_fe_t *r, const secp256k1_fe_t *a);
/** Sets a field element to be the (modular) inverse of another. Requires the input's magnitude to be
* at most 8. The output magnitude is 1 (but not guaranteed to be normalized). */
@ -105,9 +108,6 @@ static void secp256k1_fe_inv_var(secp256k1_fe_t *r, const secp256k1_fe_t *a); @@ -105,9 +108,6 @@ static void secp256k1_fe_inv_var(secp256k1_fe_t *r, const secp256k1_fe_t *a);
/** Calculate the (modular) inverses of a batch of field elements. Requires the inputs' magnitudes to be
* at most 8. The output magnitudes are 1 (but not guaranteed to be normalized). The inputs and
* outputs must not overlap in memory. */
static void secp256k1_fe_inv_all(size_t len, secp256k1_fe_t r[len], const secp256k1_fe_t a[len]);
/** Potentially faster version of secp256k1_fe_inv_all, without constant-time guarantee. */
static void secp256k1_fe_inv_all_var(size_t len, secp256k1_fe_t r[len], const secp256k1_fe_t a[len]);
/** Convert a field element to a hexadecimal string. */

64
src/field_10x26_impl.h

@ -103,6 +103,62 @@ static void secp256k1_fe_normalize(secp256k1_fe_t *r) { @@ -103,6 +103,62 @@ static void secp256k1_fe_normalize(secp256k1_fe_t *r) {
#endif
}
static void secp256k1_fe_normalize_var(secp256k1_fe_t *r) {
uint32_t t0 = r->n[0], t1 = r->n[1], t2 = r->n[2], t3 = r->n[3], t4 = r->n[4],
t5 = r->n[5], t6 = r->n[6], t7 = r->n[7], t8 = r->n[8], t9 = r->n[9];
/* Reduce t9 at the start so there will be at most a single carry from the first pass */
uint32_t x = t9 >> 22; t9 &= 0x03FFFFFUL;
uint32_t m;
/* The first pass ensures the magnitude is 1, ... */
t0 += x * 0x3D1UL; t1 += (x << 6);
t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL;
t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL;
t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL; m = t2;
t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL; m &= t3;
t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL; m &= t4;
t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL; m &= t5;
t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL; m &= t6;
t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL; m &= t7;
t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL; m &= t8;
/* ... except for a possible carry at bit 22 of t9 (i.e. bit 256 of the field element) */
VERIFY_CHECK(t9 >> 23 == 0);
/* At most a single final reduction is needed; check if the value is >= the field characteristic */
x = (t9 >> 22) | ((t9 == 0x03FFFFFUL) & (m == 0x3FFFFFFUL)
& ((t1 + 0x40UL + ((t0 + 0x3D1UL) >> 26)) > 0x3FFFFFFUL));
if (x) {
t0 += 0x3D1UL; t1 += (x << 6);
t1 += (t0 >> 26); t0 &= 0x3FFFFFFUL;
t2 += (t1 >> 26); t1 &= 0x3FFFFFFUL;
t3 += (t2 >> 26); t2 &= 0x3FFFFFFUL;
t4 += (t3 >> 26); t3 &= 0x3FFFFFFUL;
t5 += (t4 >> 26); t4 &= 0x3FFFFFFUL;
t6 += (t5 >> 26); t5 &= 0x3FFFFFFUL;
t7 += (t6 >> 26); t6 &= 0x3FFFFFFUL;
t8 += (t7 >> 26); t7 &= 0x3FFFFFFUL;
t9 += (t8 >> 26); t8 &= 0x3FFFFFFUL;
/* If t9 didn't carry to bit 22 already, then it should have after any final reduction */
VERIFY_CHECK(t9 >> 22 == x);
/* Mask off the possible multiple of 2^256 from the final reduction */
t9 &= 0x03FFFFFUL;
}
r->n[0] = t0; r->n[1] = t1; r->n[2] = t2; r->n[3] = t3; r->n[4] = t4;
r->n[5] = t5; r->n[6] = t6; r->n[7] = t7; r->n[8] = t8; r->n[9] = t9;
#ifdef VERIFY
r->magnitude = 1;
r->normalized = 1;
secp256k1_fe_verify(r);
#endif
}
SECP256K1_INLINE static void secp256k1_fe_set_int(secp256k1_fe_t *r, int a) {
r->n[0] = a;
r->n[1] = r->n[2] = r->n[3] = r->n[4] = r->n[5] = r->n[6] = r->n[7] = r->n[8] = r->n[9] = 0;
@ -271,7 +327,7 @@ SECP256K1_INLINE static void secp256k1_fe_add(secp256k1_fe_t *r, const secp256k1 @@ -271,7 +327,7 @@ SECP256K1_INLINE static void secp256k1_fe_add(secp256k1_fe_t *r, const secp256k1
#define VERIFY_BITS(x, n) do { } while(0)
#endif
SECP256K1_INLINE static void secp256k1_fe_mul_inner(const uint32_t *a, const uint32_t * SECP256K1_RESTRICT b, uint32_t *r) {
SECP256K1_INLINE static void secp256k1_fe_mul_inner(uint32_t *r, const uint32_t *a, const uint32_t * SECP256K1_RESTRICT b) {
VERIFY_BITS(a[0], 30);
VERIFY_BITS(a[1], 30);
VERIFY_BITS(a[2], 30);
@ -598,7 +654,7 @@ SECP256K1_INLINE static void secp256k1_fe_mul_inner(const uint32_t *a, const uin @@ -598,7 +654,7 @@ SECP256K1_INLINE static void secp256k1_fe_mul_inner(const uint32_t *a, const uin
/* [r9 r8 r7 r6 r5 r4 r3 r2 r1 r0] = [p18 p17 p16 p15 p14 p13 p12 p11 p10 p9 p8 p7 p6 p5 p4 p3 p2 p1 p0] */
}
SECP256K1_INLINE static void secp256k1_fe_sqr_inner(const uint32_t *a, uint32_t *r) {
SECP256K1_INLINE static void secp256k1_fe_sqr_inner(uint32_t *r, const uint32_t *a) {
VERIFY_BITS(a[0], 30);
VERIFY_BITS(a[1], 30);
VERIFY_BITS(a[2], 30);
@ -879,7 +935,7 @@ static void secp256k1_fe_mul(secp256k1_fe_t *r, const secp256k1_fe_t *a, const s @@ -879,7 +935,7 @@ static void secp256k1_fe_mul(secp256k1_fe_t *r, const secp256k1_fe_t *a, const s
secp256k1_fe_verify(b);
VERIFY_CHECK(r != b);
#endif
secp256k1_fe_mul_inner(a->n, b->n, r->n);
secp256k1_fe_mul_inner(r->n, a->n, b->n);
#ifdef VERIFY
r->magnitude = 1;
r->normalized = 0;
@ -892,7 +948,7 @@ static void secp256k1_fe_sqr(secp256k1_fe_t *r, const secp256k1_fe_t *a) { @@ -892,7 +948,7 @@ static void secp256k1_fe_sqr(secp256k1_fe_t *r, const secp256k1_fe_t *a) {
VERIFY_CHECK(a->magnitude <= 8);
secp256k1_fe_verify(a);
#endif
secp256k1_fe_sqr_inner(a->n, r->n);
secp256k1_fe_sqr_inner(r->n, a->n);
#ifdef VERIFY
r->magnitude = 1;
r->normalized = 0;

469
src/field_5x52_asm.asm

@ -1,469 +0,0 @@ @@ -1,469 +0,0 @@
;; Added by Diederik Huys, March 2013
;;
;; Provided public procedures:
;; secp256k1_fe_mul_inner
;; secp256k1_fe_sqr_inner
;;
;; Needed tools: YASM (http://yasm.tortall.net)
;;
;;
BITS 64
%ifidn __OUTPUT_FORMAT__,macho64
%define SYM(x) _ %+ x
%else
%define SYM(x) x
%endif
;; Procedure ExSetMult
;; Register Layout:
;; INPUT: rdi = a->n
;; rsi = b->n
;; rdx = r->a
;;
;; INTERNAL: rdx:rax = multiplication accumulator
;; r9:r8 = c
;; r10-r13 = t0-t3
;; r14 = b.n[0] / t4
;; r15 = b.n[1] / t5
;; rbx = b.n[2] / t6
;; rcx = b.n[3] / t7
;; rbp = Constant 0FFFFFFFFFFFFFh / t8
;; rsi = b.n / b.n[4] / t9
GLOBAL SYM(secp256k1_fe_mul_inner)
ALIGN 32
SYM(secp256k1_fe_mul_inner):
push rbp
push rbx
push r12
push r13
push r14
push r15
push rdx
mov r14,[rsi+8*0] ; preload b.n[0]. This will be the case until
; b.n[0] is no longer needed, then we reassign
; r14 to t4
;; c=a.n[0] * b.n[0]
mov rax,[rdi+0*8] ; load a.n[0]
mov rbp,0FFFFFFFFFFFFFh
mul r14 ; rdx:rax=a.n[0]*b.n[0]
mov r15,[rsi+1*8]
mov r10,rbp ; load modulus into target register for t0
mov r8,rax
and r10,rax ; only need lower qword of c
shrd r8,rdx,52
xor r9,r9 ; c < 2^64, so we ditch the HO part
;; c+=a.n[0] * b.n[1] + a.n[1] * b.n[0]
mov rax,[rdi+0*8]
mul r15
add r8,rax
adc r9,rdx
mov rax,[rdi+1*8]
mul r14
mov r11,rbp
mov rbx,[rsi+2*8]
add r8,rax
adc r9,rdx
and r11,r8
shrd r8,r9,52
xor r9,r9
;; c+=a.n[0 1 2] * b.n[2 1 0]
mov rax,[rdi+0*8]
mul rbx
add r8,rax
adc r9,rdx
mov rax,[rdi+1*8]
mul r15
add r8,rax
adc r9,rdx
mov rax,[rdi+2*8]
mul r14
mov r12,rbp
mov rcx,[rsi+3*8]
add r8,rax
adc r9,rdx
and r12,r8
shrd r8,r9,52
xor r9,r9
;; c+=a.n[0 1 2 3] * b.n[3 2 1 0]
mov rax,[rdi+0*8]
mul rcx
add r8,rax
adc r9,rdx
mov rax,[rdi+1*8]
mul rbx
add r8,rax
adc r9,rdx
mov rax,[rdi+2*8]
mul r15
add r8,rax
adc r9,rdx
mov rax,[rdi+3*8]
mul r14
mov r13,rbp
mov rsi,[rsi+4*8] ; load b.n[4] and destroy pointer
add r8,rax
adc r9,rdx
and r13,r8
shrd r8,r9,52
xor r9,r9
;; c+=a.n[0 1 2 3 4] * b.n[4 3 2 1 0]
mov rax,[rdi+0*8]
mul rsi
add r8,rax
adc r9,rdx
mov rax,[rdi+1*8]
mul rcx
add r8,rax
adc r9,rdx
mov rax,[rdi+2*8]
mul rbx
add r8,rax
adc r9,rdx
mov rax,[rdi+3*8]
mul r15
add r8,rax
adc r9,rdx
mov rax,[rdi+4*8]
mul r14
mov r14,rbp ; load modulus into t4 and destroy a.n[0]
add r8,rax
adc r9,rdx
and r14,r8
shrd r8,r9,52
xor r9,r9
;; c+=a.n[1 2 3 4] * b.n[4 3 2 1]
mov rax,[rdi+1*8]
mul rsi
add r8,rax
adc r9,rdx
mov rax,[rdi+2*8]
mul rcx
add r8,rax
adc r9,rdx
mov rax,[rdi+3*8]
mul rbx
add r8,rax
adc r9,rdx
mov rax,[rdi+4*8]
mul r15
mov r15,rbp
add r8,rax
adc r9,rdx
and r15,r8
shrd r8,r9,52
xor r9,r9
;; c+=a.n[2 3 4] * b.n[4 3 2]
mov rax,[rdi+2*8]
mul rsi
add r8,rax
adc r9,rdx
mov rax,[rdi+3*8]
mul rcx
add r8,rax
adc r9,rdx
mov rax,[rdi+4*8]
mul rbx
mov rbx,rbp
add r8,rax
adc r9,rdx
and rbx,r8
shrd r8,r9,52
xor r9,r9
;; c+=a.n[3 4] * b.n[4 3]
mov rax,[rdi+3*8]
mul rsi
add r8,rax
adc r9,rdx
mov rax,[rdi+4*8]
mul rcx
mov rcx,rbp
add r8,rax
adc r9,rdx
and rcx,r8
shrd r8,r9,52
xor r9,r9
;; c+=a.n[4] * b.n[4]
mov rax,[rdi+4*8]
mul rsi
;; mov rbp,rbp ; modulus already there!
add r8,rax
adc r9,rdx
and rbp,r8
shrd r8,r9,52
xor r9,r9
mov rsi,r8 ; load c into t9 and destroy b.n[4]
;; *******************************************************
common_exit_norm:
mov rdi,01000003D10h ; load constant
mov rax,r15 ; get t5
mul rdi
add rax,r10 ; +t0
adc rdx,0
mov r10,0FFFFFFFFFFFFFh ; modulus. Sadly, we ran out of registers!
mov r8,rax ; +c
and r10,rax
shrd r8,rdx,52
xor r9,r9
mov rax,rbx ; get t6
mul rdi
add rax,r11 ; +t1
adc rdx,0
mov r11,0FFFFFFFFFFFFFh ; modulus
add r8,rax ; +c
adc r9,rdx
and r11,r8
shrd r8,r9,52
xor r9,r9
mov rax,rcx ; get t7
mul rdi
add rax,r12 ; +t2
adc rdx,0
pop rbx ; retrieve pointer to this.n
mov r12,0FFFFFFFFFFFFFh ; modulus
add r8,rax ; +c
adc r9,rdx
and r12,r8
mov [rbx+2*8],r12 ; mov into this.n[2]
shrd r8,r9,52
xor r9,r9
mov rax,rbp ; get t8
mul rdi
add rax,r13 ; +t3
adc rdx,0
mov r13,0FFFFFFFFFFFFFh ; modulus
add r8,rax ; +c
adc r9,rdx
and r13,r8
mov [rbx+3*8],r13 ; -> this.n[3]
shrd r8,r9,52
xor r9,r9
mov rax,rsi ; get t9
mul rdi
add rax,r14 ; +t4
adc rdx,0
mov r14,0FFFFFFFFFFFFh ; !!!
add r8,rax ; +c
adc r9,rdx
and r14,r8
mov [rbx+4*8],r14 ; -> this.n[4]
shrd r8,r9,48 ; !!!
xor r9,r9
mov rax,01000003D1h
mul r8
add rax,r10
adc rdx,0
mov r10,0FFFFFFFFFFFFFh ; modulus
mov r8,rax
and rax,r10
shrd r8,rdx,52
mov [rbx+0*8],rax ; -> this.n[0]
add r8,r11
mov [rbx+1*8],r8 ; -> this.n[1]
pop r15
pop r14
pop r13
pop r12
pop rbx
pop rbp
ret
;; PROC ExSetSquare
;; Register Layout:
;; INPUT: rdi = a.n
;; rsi = this.a
;; INTERNAL: rdx:rax = multiplication accumulator
;; r9:r8 = c
;; r10-r13 = t0-t3
;; r14 = a.n[0] / t4
;; r15 = a.n[1] / t5
;; rbx = a.n[2] / t6
;; rcx = a.n[3] / t7
;; rbp = 0FFFFFFFFFFFFFh / t8
;; rsi = a.n[4] / t9
GLOBAL SYM(secp256k1_fe_sqr_inner)
ALIGN 32
SYM(secp256k1_fe_sqr_inner):
push rbp
push rbx
push r12
push r13
push r14
push r15
push rsi
mov rbp,0FFFFFFFFFFFFFh
;; c=a.n[0] * a.n[0]
mov r14,[rdi+0*8] ; r14=a.n[0]
mov r10,rbp ; modulus
mov rax,r14
mul rax
mov r15,[rdi+1*8] ; a.n[1]
add r14,r14 ; r14=2*a.n[0]
mov r8,rax
and r10,rax ; only need lower qword
shrd r8,rdx,52
xor r9,r9
;; c+=2*a.n[0] * a.n[1]
mov rax,r14 ; r14=2*a.n[0]
mul r15
mov rbx,[rdi+2*8] ; rbx=a.n[2]
mov r11,rbp ; modulus
add r8,rax
adc r9,rdx
and r11,r8
shrd r8,r9,52
xor r9,r9
;; c+=2*a.n[0]*a.n[2]+a.n[1]*a.n[1]
mov rax,r14
mul rbx
add r8,rax
adc r9,rdx
mov rax,r15
mov r12,rbp ; modulus
mul rax
mov rcx,[rdi+3*8] ; rcx=a.n[3]
add r15,r15 ; r15=a.n[1]*2
add r8,rax
adc r9,rdx
and r12,r8 ; only need lower dword
shrd r8,r9,52
xor r9,r9
;; c+=2*a.n[0]*a.n[3]+2*a.n[1]*a.n[2]
mov rax,r14
mul rcx
add r8,rax
adc r9,rdx
mov rax,r15 ; rax=2*a.n[1]
mov r13,rbp ; modulus
mul rbx
mov rsi,[rdi+4*8] ; rsi=a.n[4]
add r8,rax
adc r9,rdx
and r13,r8
shrd r8,r9,52
xor r9,r9
;; c+=2*a.n[0]*a.n[4]+2*a.n[1]*a.n[3]+a.n[2]*a.n[2]
mov rax,r14 ; last time we need 2*a.n[0]
mul rsi
add r8,rax
adc r9,rdx
mov rax,r15
mul rcx
mov r14,rbp ; modulus
add r8,rax