From c31169a01baf73d84de1f48d868389d46142bab7 Mon Sep 17 00:00:00 2001
From: magnum <magnum>
Date: Fri, 16 Dec 2011 12:32:24 +0100
Subject: [PATCH 4/5] SSE_PARA fixes. Tweak SSE_PARA for various versions of
 compilers. Fix hmacmd5 so it handles para > 5. Also
 changes icc target for a boost in both build time and
 performance. Made memory.h (ALIGN_SIMD) more secure
 against include order.

---
 src/Makefile           |    4 +-
 src/hmacMD5_fmt_plug.c |   60 +++++++++-------------------------------------
 src/memory.h           |    2 +
 src/x86-64.h           |   61 +++++++++++++++++++++++++++++++++++++---------
 src/x86-ssei.h         |   62 +++++++++++++++++++++++++++++++++++++-----------
 5 files changed, 113 insertions(+), 76 deletions(-)

diff --git a/src/Makefile b/src/Makefile
index d19065b..4bd8386 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -270,8 +270,8 @@ linux-x86-64-icc:
 	@echo "#define JOHN_BLD" '"'$@'"' > john_build_rule.h
 	$(MAKE) $(PROJ) \
 		JOHN_OBJS="$(JOHN_OBJS) c3_fmt.o x86-64.o sse-intrinsics.o" \
-		CFLAGS="-c -msse2 -axCORE-AVX2 -no-prec-div -ipo -I/usr/include -static-intel -DHAVE_CRYPT -DHAVE_DL $(ICCOMPFLAGS)" \
-		LDFLAGS="-lm -lssl -lcrypto -ipo -static-intel -lcrypt -ldl $(ICCOMPFLAGS) -s " \
+		CFLAGS="-c -fast -O2 -I/usr/include -static-intel -DHAVE_CRYPT -DHAVE_DL $(ICCOMPFLAGS)" \
+		LDFLAGS="-lm -lssl -lcrypto -ipo -static-intel -lcrypt -ldl $(ICCOMPFLAGS) -s" \
 		CPP="icc" CC="icc" AS="icc" LD="icc"
 
 linux-x86-64-32-sse2:
diff --git a/src/hmacMD5_fmt_plug.c b/src/hmacMD5_fmt_plug.c
index f961245..320f143 100644
--- a/src/hmacMD5_fmt_plug.c
+++ b/src/hmacMD5_fmt_plug.c
@@ -171,58 +171,22 @@ static char *hmacmd5_get_key(int index) {
 	return (char *) out;
 }
 
-static int hmacmd5_cmp_all(void *binary, int index) {
-	int i=0;
+static int hmacmd5_cmp_all(void *binary, int count) {
 #ifdef MMX_COEF
-	while(i< (BINARY_SIZE/4) )
-	{
-		if (
-			( ((ARCH_WORD_32 *)binary)[i] != ((ARCH_WORD_32 *)crypt_key)[i*MMX_COEF])
-			&& ( ((ARCH_WORD_32 *)binary)[i] != ((ARCH_WORD_32 *)crypt_key)[i*MMX_COEF+1])
-#if (MMX_COEF > 3)
-			&& ( ((ARCH_WORD_32 *)binary)[i] != ((ARCH_WORD_32 *)crypt_key)[i*MMX_COEF+2])
-			&& ( ((ARCH_WORD_32 *)binary)[i] != ((ARCH_WORD_32 *)crypt_key)[i*MMX_COEF+3])
-#ifdef MD5_SSE_PARA
-			&& ( ((ARCH_WORD_32 *)binary)[i] != ((ARCH_WORD_32 *)crypt_key)[i*MMX_COEF+0+16*1*MMX_COEF])
-			&& ( ((ARCH_WORD_32 *)binary)[i] != ((ARCH_WORD_32 *)crypt_key)[i*MMX_COEF+1+16*1*MMX_COEF])
-			&& ( ((ARCH_WORD_32 *)binary)[i] != ((ARCH_WORD_32 *)crypt_key)[i*MMX_COEF+2+16*1*MMX_COEF])
-			&& ( ((ARCH_WORD_32 *)binary)[i] != ((ARCH_WORD_32 *)crypt_key)[i*MMX_COEF+3+16*1*MMX_COEF])
-#endif
-#if (MD5_SSE_PARA>2)
-			&& ( ((ARCH_WORD_32 *)binary)[i] != ((ARCH_WORD_32 *)crypt_key)[i*MMX_COEF+0+16*2*MMX_COEF])
-			&& ( ((ARCH_WORD_32 *)binary)[i] != ((ARCH_WORD_32 *)crypt_key)[i*MMX_COEF+1+16*2*MMX_COEF])
-			&& ( ((ARCH_WORD_32 *)binary)[i] != ((ARCH_WORD_32 *)crypt_key)[i*MMX_COEF+2+16*2*MMX_COEF])
-			&& ( ((ARCH_WORD_32 *)binary)[i] != ((ARCH_WORD_32 *)crypt_key)[i*MMX_COEF+3+16*2*MMX_COEF])
-#endif
-#if (MD5_SSE_PARA>3)
-			&& ( ((ARCH_WORD_32 *)binary)[i] != ((ARCH_WORD_32 *)crypt_key)[i*MMX_COEF+0+16*3*MMX_COEF])
-			&& ( ((ARCH_WORD_32 *)binary)[i] != ((ARCH_WORD_32 *)crypt_key)[i*MMX_COEF+1+16*3*MMX_COEF])
-			&& ( ((ARCH_WORD_32 *)binary)[i] != ((ARCH_WORD_32 *)crypt_key)[i*MMX_COEF+2+16*3*MMX_COEF])
-			&& ( ((ARCH_WORD_32 *)binary)[i] != ((ARCH_WORD_32 *)crypt_key)[i*MMX_COEF+3+16*3*MMX_COEF])
-#endif
-#if (MD5_SSE_PARA>4)
-			&& ( ((ARCH_WORD_32 *)binary)[i] != ((ARCH_WORD_32 *)crypt_key)[i*MMX_COEF+0+16*4*MMX_COEF])
-			&& ( ((ARCH_WORD_32 *)binary)[i] != ((ARCH_WORD_32 *)crypt_key)[i*MMX_COEF+1+16*4*MMX_COEF])
-			&& ( ((ARCH_WORD_32 *)binary)[i] != ((ARCH_WORD_32 *)crypt_key)[i*MMX_COEF+2+16*4*MMX_COEF])
-			&& ( ((ARCH_WORD_32 *)binary)[i] != ((ARCH_WORD_32 *)crypt_key)[i*MMX_COEF+3+16*4*MMX_COEF])
-#endif
-#if (MD5_SSE_PARA>5)
-#error hmac_md5 format only handles MD5_SSE_PARA up to 5, not over.
-#endif
+	unsigned int x,y=0;
+
+#if MD5_SSE_PARA
+	for(;y<MD5_SSE_PARA;y++)
 #endif
-		)
-			return 0;
-		i++;
-	}
+		for(x=0;x<MMX_COEF;x++)
+		{
+			if( ((ARCH_WORD_32*)binary)[0] == ((ARCH_WORD_32*)crypt_key)[x+y*MMX_COEF*4] )
+				return 1;
+		}
+	return 0;
 #else
-	while(i<BINARY_SIZE)
-	{
-		if(((char *)binary)[i]!=((char *)crypt_key)[i])
-			return 0;
-		i++;
-	}
+	return !memcmp(binary, crypt_key, BINARY_SIZE);
 #endif
-	return 1;
 }
 
 static int hmacmd5_cmp_exact(char *source, int count){
diff --git a/src/memory.h b/src/memory.h
index 718e436..540c7b5 100644
--- a/src/memory.h
+++ b/src/memory.h
@@ -33,6 +33,8 @@
  */
 #if MMX_COEF
 #define MEM_ALIGN_SIMD			(MMX_COEF * 4)
+#else
+#define MEM_ALIGN_SIMD			(16)
 #endif
 
 /*
diff --git a/src/x86-64.h b/src/x86-64.h
index 27b5d96..2e9bd09 100644
--- a/src/x86-64.h
+++ b/src/x86-64.h
@@ -176,37 +176,74 @@
 #define MD5_X2				1
 #define MD5_IMM				1
 
+#ifndef MD5_SSE_PARA
 #if defined(__INTEL_COMPILER) || defined(USING_ICC_S_FILE)
-#define MD5_SSE_PARA		3
+#define MD5_SSE_PARA			3
 #define MD5_N_STR			"12x"
-#elif defined(__GNUC__)
-#define MD5_SSE_PARA		2
+#elif defined(__clang__)
+#define MD5_SSE_PARA			5
+#define MD5_N_STR			"20x"
+#elif defined(__GNUC__) && (__GNUC__ == 4 && __GNUC_MINOR__ == 5)
+#define MD5_SSE_PARA			2
 #define MD5_N_STR			"8x"
+#elif defined(__GNUC__)
+#define MD5_SSE_PARA			3
+#define MD5_N_STR			"12x"
 #else
-#define MD5_SSE_PARA		3
+#define MD5_SSE_PARA			3
 #define MD5_N_STR			"12x"
 #endif
+#endif
 
+#ifndef MD4_SSE_PARA
 #if defined(__INTEL_COMPILER) || defined(USING_ICC_S_FILE)
-#define MD4_SSE_PARA		3
+#define MD4_SSE_PARA			3
 #define MD4_N_STR			"12x"
-#elif defined(__GNUC__)
-#define MD4_SSE_PARA		2
+#elif defined(__clang__)
+#define MD4_SSE_PARA			4
+#define MD4_N_STR			"16x"
+#elif defined(__GNUC__) && (__GNUC__ == 4 && __GNUC_MINOR__ == 5)
+#define MD4_SSE_PARA			2
 #define MD4_N_STR			"8x"
+#elif defined(__GNUC__)
+#define MD4_SSE_PARA			3
+#define MD4_N_STR			"12x"
 #else
-#define MD4_SSE_PARA		3
+#define MD4_SSE_PARA			3
 #define MD4_N_STR			"12x"
 #endif
+#endif
 
+#ifndef SHA1_SSE_PARA
 #if defined(__INTEL_COMPILER) || defined(USING_ICC_S_FILE)
-#define SHA1_SSE_PARA		2
+#define SHA1_SSE_PARA			2
 #define SHA1_N_STR			"8x"
+#elif defined(__clang__)
+#define SHA1_SSE_PARA			2
+#define SHA1_N_STR			"8x"
+#elif defined(__GNUC__) && (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 5))
+#define SHA1_SSE_PARA			1
+#define SHA1_N_STR			"4x"
 #elif defined(__GNUC__)
-#define SHA1_SSE_PARA		2
+#define SHA1_SSE_PARA			2
 #define SHA1_N_STR			"8x"
 #else
-#define SHA1_SSE_PARA		2
-#define SHA1_N_STR			"8x"
+#define SHA1_SSE_PARA			1
+#define SHA1_N_STR			"4x"
+#endif
+#endif
+
+#define STR_VALUE(arg)			#arg
+#define PARA_TO_N(n)			"4x" STR_VALUE(n)
+
+#ifndef MD4_N_STR
+#define MD4_N_STR			PARA_TO_N(MD4_SSE_PARA)
+#endif
+#ifndef MD5_N_STR
+#define MD5_N_STR			PARA_TO_N(MD5_SSE_PARA)
+#endif
+#ifndef SHA1_N_STR
+#define SHA1_N_STR			PARA_TO_N(SHA1_SSE_PARA)
 #endif
 
 #define BF_ASM				0
diff --git a/src/x86-ssei.h b/src/x86-ssei.h
index f56897a..4afd775 100644
--- a/src/x86-ssei.h
+++ b/src/x86-ssei.h
@@ -138,37 +138,71 @@
 #endif
 #define BF_SCALE			1
 
+#ifndef MD5_SSE_PARA
 #if defined(__INTEL_COMPILER) || defined(USING_ICC_S_FILE)
-#define MD5_SSE_PARA		3
+#define MD5_SSE_PARA			3
 #define MD5_N_STR			"12x"
+#elif defined(__clang__)
+#define MD5_SSE_PARA			4
+#define MD5_N_STR			"16x"
 #elif defined(__GNUC__) || defined (_MSC_VER)
-#define MD5_SSE_PARA		2
-#define MD5_N_STR			"8x"
-#else
-#define MD5_SSE_PARA		3
+#define MD5_SSE_PARA			3
 #define MD5_N_STR			"12x"
+#else
+#define MD5_SSE_PARA			2
+#define MD5_N_STR			"8x"
+#endif
 #endif
 
+#ifndef MD4_SSE_PARA
 #if defined(__INTEL_COMPILER) || defined(USING_ICC_S_FILE)
-#define MD4_SSE_PARA		3
+#define MD4_SSE_PARA			3
 #define MD4_N_STR			"12x"
-#elif defined(__GNUC__) || defined (_MSC_VER)
-#define MD4_SSE_PARA		2
+#elif defined(__clang__)
+#define MD4_SSE_PARA			3
+#define MD4_N_STR			"12x"
+#elif defined(__GNUC__) && (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 5))
+#define MD4_SSE_PARA			2
 #define MD4_N_STR			"8x"
-#else
-#define MD4_SSE_PARA		3
+#elif defined(__GNUC__) || defined (_MSC_VER)
+#define MD4_SSE_PARA			3
 #define MD4_N_STR			"12x"
+#else
+#define MD4_SSE_PARA			2
+#define MD4_N_STR			"8x"
+#endif
 #endif
 
+#ifndef SHA1_SSE_PARA
 #if defined(__INTEL_COMPILER) || defined(USING_ICC_S_FILE)
-#define SHA1_SSE_PARA		2
+#define SHA1_SSE_PARA			2
+#define SHA1_N_STR			"8x"
+#elif defined(__clang__)
+#define SHA1_SSE_PARA			2
 #define SHA1_N_STR			"8x"
+#elif defined(__GNUC__) && (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 6))
+#define SHA1_SSE_PARA			1
+#define SHA1_N_STR			"4x"
 #elif defined(__GNUC__) || defined (_MSC_VER)
-#define SHA1_SSE_PARA		2
+#define SHA1_SSE_PARA			2
 #define SHA1_N_STR			"8x"
 #else
-#define SHA1_SSE_PARA		2
-#define SHA1_N_STR			"8x"
+#define SHA1_SSE_PARA			1
+#define SHA1_N_STR			"4x"
+#endif
+#endif
+
+#define STR_VALUE(arg)			#arg
+#define PARA_TO_N(n)			"4x" STR_VALUE(n)
+
+#ifndef MD4_N_STR
+#define MD4_N_STR			PARA_TO_N(MD4_SSE_PARA)
+#endif
+#ifndef MD5_N_STR
+#define MD5_N_STR			PARA_TO_N(MD5_SSE_PARA)
+#endif
+#ifndef SHA1_N_STR
+#define SHA1_N_STR			PARA_TO_N(SHA1_SSE_PARA)
 #endif
 
 #define NT_SSE2
-- 
1.7.5.4

