trustbridge/nss-cmake-static: nss/lib/freebl/mpi/mpi_x86

comparison nss/lib/freebl/mpi/mpi_x86_asm.c @ 0:1e5118fa0cb1

This is NSS with a Cmake Buildsyste To compile a static NSS library for Windows we've used the Chromium-NSS fork and added a Cmake buildsystem to compile it statically for Windows. See README.chromium for chromium changes and README.trustbridge for our modifications.

author	Andre Heinecke <andre.heinecke@intevation.de>
date	Mon, 28 Jul 2014 10:47:06 +0200
parents
children

comparison

equal deleted inserted replaced

--1:000000000000
+:1e5118fa0cb1
+/*
+*  mpi_x86_asm.c - MSVC inline assembly implementation of s_mpv_ functions.
+*
+* This Source Code Form is subject to the terms of the Mozilla Public
+* License, v. 2.0. If a copy of the MPL was not distributed with this
+* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#include "mpi-priv.h"
+static int is_sse = -1;
+extern unsigned long s_mpi_is_sse2();
+/*
+*   ebp - 36:	caller's esi
+*   ebp - 32:	caller's edi
+*   ebp - 28:
+*   ebp - 24:
+*   ebp - 20:
+*   ebp - 16:
+*   ebp - 12:
+*   ebp - 8:
+*   ebp - 4:
+*   ebp + 0:	caller's ebp
+*   ebp + 4:	return address
+*   ebp + 8:	a	argument
+*   ebp + 12:	a_len	argument
+*   ebp + 16:	b	argument
+*   ebp + 20:	c	argument
+*   registers:
+*  	eax:
+* 	ebx:	carry
+* 	ecx:	a_len
+* 	edx:
+* 	esi:	a ptr
+* 	edi:	c ptr
+*/
+__declspec(naked) void
+s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
+{
+__asm {
+mov    eax, is_sse
+cmp    eax, 0
+je     s_mpv_mul_d_x86
+jg     s_mpv_mul_d_sse2
+call   s_mpi_is_sse2
+mov    is_sse, eax
+cmp    eax, 0
+jg     s_mpv_mul_d_sse2
+s_mpv_mul_d_x86:
+push   ebp
+mov    ebp,esp
+sub    esp,28
+push   edi
+push   esi
+push   ebx
+mov    ebx,0		; carry = 0
+mov    ecx,[ebp+12]		; ecx = a_len
+mov    edi,[ebp+20]
+cmp    ecx,0
+je     L_2			; jmp if a_len == 0
+mov    esi,[ebp+8]		; esi = a
+cld
+L_1:
+lodsd			; eax = [ds:esi]; esi += 4
+mov    edx,[ebp+16]		; edx = b
+mul    edx			; edx:eax = Phi:Plo = a_i * b
+add    eax,ebx		; add carry (ebx) to edx:eax
+adc    edx,0
+mov    ebx,edx		; high half of product becomes next carry
+stosd			; [es:edi] = ax; edi += 4;
+dec    ecx			; --a_len
+jnz    L_1			; jmp if a_len != 0
+L_2:
+mov    [edi],ebx		; *c = carry
+pop    ebx
+pop    esi
+pop    edi
+leave
+ret
+nop
+s_mpv_mul_d_sse2:
+push   ebp
+mov    ebp, esp
+push   edi
+push   esi
+psubq  mm2, mm2		; carry = 0
+mov    ecx, [ebp+12]	; ecx = a_len
+movd   mm1, [ebp+16]	; mm1 = b
+mov    edi, [ebp+20]
+cmp    ecx, 0
+je     L_6			; jmp if a_len == 0
+mov    esi, [ebp+8]		; esi = a
+cld
+L_5:
+movd   mm0, [esi]		; mm0 = *a++
+add    esi, 4
+pmuludq mm0, mm1		; mm0 = b * *a++
+paddq  mm2, mm0		; add the carry
+movd   [edi], mm2		; store the 32bit result
+add    edi, 4
+psrlq  mm2, 32		; save the carry
+dec    ecx			; --a_len
+jnz    L_5			; jmp if a_len != 0
+L_6:
+movd   [edi], mm2		; *c = carry
+emms
+pop    esi
+pop    edi
+leave
+ret
+nop
+}
+}
+/*
+*   ebp - 36:	caller's esi
+*   ebp - 32:	caller's edi
+*   ebp - 28:
+*   ebp - 24:
+*   ebp - 20:
+*   ebp - 16:
+*   ebp - 12:
+*   ebp - 8:
+*   ebp - 4:
+*   ebp + 0:	caller's ebp
+*   ebp + 4:	return address
+*   ebp + 8:	a	argument
+*   ebp + 12:	a_len	argument
+*   ebp + 16:	b	argument
+*   ebp + 20:	c	argument
+*   registers:
+*  	eax:
+* 	ebx:	carry
+* 	ecx:	a_len
+* 	edx:
+* 	esi:	a ptr
+* 	edi:	c ptr
+*/
+__declspec(naked) void
+s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
+{
+__asm {
+mov    eax, is_sse
+cmp    eax, 0
+je     s_mpv_mul_d_add_x86
+jg     s_mpv_mul_d_add_sse2
+call   s_mpi_is_sse2
+mov    is_sse, eax
+cmp    eax, 0
+jg     s_mpv_mul_d_add_sse2
+s_mpv_mul_d_add_x86:
+push   ebp
+mov    ebp,esp
+sub    esp,28
+push   edi
+push   esi
+push   ebx
+mov    ebx,0		; carry = 0
+mov    ecx,[ebp+12]		; ecx = a_len
+mov    edi,[ebp+20]
+cmp    ecx,0
+je     L_11			; jmp if a_len == 0
+mov    esi,[ebp+8]		; esi = a
+cld
+L_10:
+lodsd			; eax = [ds:esi]; esi += 4
+mov    edx,[ebp+16]		; edx = b
+mul    edx			; edx:eax = Phi:Plo = a_i * b
+add    eax,ebx		; add carry (ebx) to edx:eax
+adc    edx,0
+mov    ebx,[edi]		; add in current word from *c
+add    eax,ebx
+adc    edx,0
+mov    ebx,edx		; high half of product becomes next carry
+stosd			; [es:edi] = ax; edi += 4;
+dec    ecx			; --a_len
+jnz    L_10			; jmp if a_len != 0
+L_11:
+mov    [edi],ebx		; *c = carry
+pop    ebx
+pop    esi
+pop    edi
+leave
+ret
+nop
+s_mpv_mul_d_add_sse2:
+push   ebp
+mov    ebp, esp
+push   edi
+push   esi
+psubq  mm2, mm2		; carry = 0
+mov    ecx, [ebp+12]	; ecx = a_len
+movd   mm1, [ebp+16]	; mm1 = b
+mov    edi, [ebp+20]
+cmp    ecx, 0
+je     L_16			; jmp if a_len == 0
+mov    esi, [ebp+8]		; esi = a
+cld
+L_15:
+movd   mm0, [esi]		; mm0 = *a++
+add    esi, 4
+pmuludq mm0, mm1		; mm0 = b * *a++
+paddq  mm2, mm0		; add the carry
+movd   mm0, [edi]
+paddq  mm2, mm0		; add the carry
+movd   [edi], mm2		; store the 32bit result
+add    edi, 4
+psrlq  mm2, 32		; save the carry
+dec    ecx			; --a_len
+jnz    L_15			; jmp if a_len != 0
+L_16:
+movd   [edi], mm2		; *c = carry
+emms
+pop    esi
+pop    edi
+leave
+ret
+nop
+}
+}
+/*
+*   ebp - 36:	caller's esi
+*   ebp - 32:	caller's edi
+*   ebp - 28:
+*   ebp - 24:
+*   ebp - 20:
+*   ebp - 16:
+*   ebp - 12:
+*   ebp - 8:
+*   ebp - 4:
+*   ebp + 0:	caller's ebp
+*   ebp + 4:	return address
+*   ebp + 8:	a	argument
+*   ebp + 12:	a_len	argument
+*   ebp + 16:	b	argument
+*   ebp + 20:	c	argument
+*   registers:
+*  	eax:
+* 	ebx:	carry
+* 	ecx:	a_len
+* 	edx:
+* 	esi:	a ptr
+* 	edi:	c ptr
+*/
+__declspec(naked) void
+s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
+{
+__asm {
+mov    eax, is_sse
+cmp    eax, 0
+je     s_mpv_mul_d_add_prop_x86
+jg     s_mpv_mul_d_add_prop_sse2
+call   s_mpi_is_sse2
+mov    is_sse, eax
+cmp    eax, 0
+jg     s_mpv_mul_d_add_prop_sse2
+s_mpv_mul_d_add_prop_x86:
+push   ebp
+mov    ebp,esp
+sub    esp,28
+push   edi
+push   esi
+push   ebx
+mov    ebx,0		; carry = 0
+mov    ecx,[ebp+12]		; ecx = a_len
+mov    edi,[ebp+20]
+cmp    ecx,0
+je     L_21			; jmp if a_len == 0
+cld
+mov    esi,[ebp+8]		; esi = a
+L_20:
+lodsd			; eax = [ds:esi]; esi += 4
+mov    edx,[ebp+16]		; edx = b
+mul    edx			; edx:eax = Phi:Plo = a_i * b
+add    eax,ebx		; add carry (ebx) to edx:eax
+adc    edx,0
+mov    ebx,[edi]		; add in current word from *c
+add    eax,ebx
+adc    edx,0
+mov    ebx,edx		; high half of product becomes next carry
+stosd			; [es:edi] = ax; edi += 4;
+dec    ecx			; --a_len
+jnz    L_20			; jmp if a_len != 0
+L_21:
+cmp    ebx,0		; is carry zero?
+jz     L_23
+mov    eax,[edi]		; add in current word from *c
+add    eax,ebx
+stosd			; [es:edi] = ax; edi += 4;
+jnc    L_23
+L_22:
+mov    eax,[edi]		; add in current word from *c
+adc    eax,0
+stosd			; [es:edi] = ax; edi += 4;
+jc     L_22
+L_23:
+pop    ebx
+pop    esi
+pop    edi
+leave
+ret
+nop
+s_mpv_mul_d_add_prop_sse2:
+push   ebp
+mov    ebp, esp
+push   edi
+push   esi
+push   ebx
+psubq  mm2, mm2		; carry = 0
+mov    ecx, [ebp+12]	; ecx = a_len
+movd   mm1, [ebp+16]	; mm1 = b
+mov    edi, [ebp+20]
+cmp    ecx, 0
+je     L_26			; jmp if a_len == 0
+mov    esi, [ebp+8]		; esi = a
+cld
+L_25:
+movd   mm0, [esi]		; mm0 = *a++
+movd   mm3, [edi]		; fetch the sum
+add    esi, 4
+pmuludq mm0, mm1		; mm0 = b * *a++
+paddq  mm2, mm0		; add the carry
+paddq  mm2, mm3		; add *c++
+movd   [edi], mm2		; store the 32bit result
+add    edi, 4
+psrlq  mm2, 32		; save the carry
+dec    ecx			; --a_len
+jnz    L_25			; jmp if a_len != 0
+L_26:
+movd   ebx, mm2
+cmp    ebx, 0		; is carry zero?
+jz     L_28
+mov    eax, [edi]
+add    eax, ebx
+stosd
+jnc    L_28
+L_27:
+mov    eax, [edi]		; add in current word from *c
+adc	   eax, 0
+stosd			; [es:edi] = ax; edi += 4;
+jc     L_27
+L_28:
+emms
+pop    ebx
+pop    esi
+pop    edi
+leave
+ret
+nop
+}
+}
+/*
+*   ebp - 20:	caller's esi
+*   ebp - 16:	caller's edi
+*   ebp - 12:
+*   ebp - 8:	carry
+*   ebp - 4:	a_len	local
+*   ebp + 0:	caller's ebp
+*   ebp + 4:	return address
+*   ebp + 8:	pa	argument
+*   ebp + 12:	a_len	argument
+*   ebp + 16:	ps	argument
+*   ebp + 20:
+*   registers:
+*  	eax:
+* 	ebx:	carry
+* 	ecx:	a_len
+* 	edx:
+* 	esi:	a ptr
+* 	edi:	c ptr
+*/
+__declspec(naked) void
+s_mpv_sqr_add_prop(const mp_digit *a, mp_size a_len, mp_digit *sqrs)
+{
+__asm {
+mov    eax, is_sse
+cmp    eax, 0
+je     s_mpv_sqr_add_prop_x86
+jg     s_mpv_sqr_add_prop_sse2
+call   s_mpi_is_sse2
+mov    is_sse, eax
+cmp    eax, 0
+jg     s_mpv_sqr_add_prop_sse2
+s_mpv_sqr_add_prop_x86:
+push   ebp
+mov    ebp,esp
+sub    esp,12
+push   edi
+push   esi
+push   ebx
+mov    ebx,0		; carry = 0
+mov    ecx,[ebp+12]	; a_len
+mov    edi,[ebp+16]	; edi = ps
+cmp    ecx,0
+je     L_31		; jump if a_len == 0
+cld
+mov    esi,[ebp+8]		; esi = pa
+L_30:
+lodsd			; eax = [ds:si]; si += 4;
+mul    eax
+add    eax,ebx		; add "carry"
+adc    edx,0
+mov    ebx,[edi]
+add    eax,ebx		; add low word from result
+mov    ebx,[edi+4]
+stosd			; [es:di] = eax; di += 4;
+adc    edx,ebx		; add high word from result
+mov    ebx,0
+mov    eax,edx
+adc    ebx,0
+stosd			; [es:di] = eax; di += 4;
+dec    ecx			; --a_len
+jnz    L_30		; jmp if a_len != 0
+L_31:
+cmp    ebx,0		; is carry zero?
+jz     L_34
+mov    eax,[edi]		; add in current word from *c
+add    eax,ebx
+stosd			; [es:edi] = ax; edi += 4;
+jnc    L_34
+L_32:
+mov    eax,[edi]		; add in current word from *c
+adc    eax,0
+stosd			; [es:edi] = ax; edi += 4;
+jc     L_32
+L_34:
+pop    ebx
+pop    esi
+pop    edi
+leave
+ret
+nop
+s_mpv_sqr_add_prop_sse2:
+push   ebp
+mov    ebp, esp
+push   edi
+push   esi
+push   ebx
+psubq  mm2, mm2		; carry = 0
+mov    ecx, [ebp+12]	; ecx = a_len
+mov    edi, [ebp+16]
+cmp    ecx, 0
+je     L_36		; jmp if a_len == 0
+mov    esi, [ebp+8]		; esi = a
+cld
+L_35:
+movd   mm0, [esi]		; mm0 = *a
+movd   mm3, [edi]		; fetch the sum
+add	   esi, 4
+pmuludq mm0, mm0		; mm0 = sqr(a)
+paddq  mm2, mm0		; add the carry
+paddq  mm2, mm3		; add the low word
+movd   mm3, [edi+4]
+movd   [edi], mm2		; store the 32bit result
+psrlq  mm2, 32
+paddq  mm2, mm3		; add the high word
+movd   [edi+4], mm2		; store the 32bit result
+psrlq  mm2, 32		; save the carry.
+add    edi, 8
+dec    ecx			; --a_len
+jnz    L_35			; jmp if a_len != 0
+L_36:
+movd   ebx, mm2
+cmp    ebx, 0		; is carry zero?
+jz     L_38
+mov    eax, [edi]
+add    eax, ebx
+stosd
+jnc    L_38
+L_37:
+mov    eax, [edi]		; add in current word from *c
+adc	   eax, 0
+stosd			; [es:edi] = ax; edi += 4;
+jc     L_37
+L_38:
+emms
+pop    ebx
+pop    esi
+pop    edi
+leave
+ret
+nop
+}
+}
+/*
+*  Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized
+*  so its high bit is 1.   This code is from NSPR.
+*
+*  Dump of assembler code for function s_mpv_div_2dx1d:
+*
+*   esp +  0:   Caller's ebx
+*   esp +  4:	return address
+*   esp +  8:	Nhi	argument
+*   esp + 12:	Nlo	argument
+*   esp + 16:	divisor	argument
+*   esp + 20:	qp	argument
+*   esp + 24:   rp	argument
+*   registers:
+*  	eax:
+* 	ebx:	carry
+* 	ecx:	a_len
+* 	edx:
+* 	esi:	a ptr
+* 	edi:	c ptr
+*/
+__declspec(naked) mp_err
+s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor,
+		mp_digit *qp, mp_digit *rp)
+{
+__asm {
+push   ebx
+mov    edx,[esp+8]
+mov    eax,[esp+12]
+mov    ebx,[esp+16]
+div    ebx
+mov    ebx,[esp+20]
+mov    [ebx],eax
+mov    ebx,[esp+24]
+mov    [ebx],edx
+xor    eax,eax		; return zero
+pop    ebx
+ret
+nop
+}
+}

Mercurial > trustbridge > nss-cmake-static

comparison nss/lib/freebl/mpi/mpi_x86_asm.c @ 0:1e5118fa0cb1