From 5518aaf8022e59eed4ef31b13ff7a743fc683c02 Mon Sep 17 00:00:00 2001
From: magnum <magnum>
Date: Thu, 27 Oct 2011 18:29:35 +0200
Subject: [PATCH 32/34] j7: Crypt-MD5 OMP+SSE Made md5cryptsse() thread-safe.
 Use 12x MD5-SSE for icc, 8x for gcc.

---
 src/MD5_fmt.c        |   57 ++++++++++++++++++++++++++++++++++++++++---------
 src/sse-intrinsics.c |   17 ++++++--------
 src/x86-64.h         |    2 +-
 3 files changed, 54 insertions(+), 22 deletions(-)

diff --git a/src/MD5_fmt.c b/src/MD5_fmt.c
index 756ed24..1c00323 100644
--- a/src/MD5_fmt.c
+++ b/src/MD5_fmt.c
@@ -17,6 +17,13 @@
 #include "sse-intrinsics.h"
 #endif
 
+#ifdef _OPENMP
+#include <omp.h>
+#define OMP_MAX_PARA              384
+#else
+#define OMP_MAX_PARA              1
+#endif
+
 #define FORMAT_LABEL			"md5"
 #define FORMAT_NAME			"FreeBSD MD5"
 
@@ -33,8 +40,8 @@
 #endif
 #define SALT_SIZE			9
 
-#define MIN_KEYS_PER_CRYPT		MD5_N
-#define MAX_KEYS_PER_CRYPT		MD5_N
+#define MIN_KEYS_PER_CRYPT		(MD5_N * OMP_MAX_PARA)
+#define MAX_KEYS_PER_CRYPT		(MD5_N * OMP_MAX_PARA)
 
 static struct fmt_tests tests[] = {
 	{"$1$12345678$aIccj83HRDBo6ux1bVx7D1", "0123456789ABCDE"},
@@ -60,12 +67,29 @@ static struct fmt_tests tests[] = {
 	{NULL}
 };
 
-static char saved_key[MD5_N][PLAINTEXT_LENGTH + 1];
+static char saved_key[MAX_KEYS_PER_CRYPT][PLAINTEXT_LENGTH + 1];
 #ifdef MD5_SSE_PARA
 static unsigned char cursalt[SALT_SIZE];
 static int CryptType;
-static MD5_word sout[MD5_N*BINARY_SIZE/sizeof(MD5_word)];
+static MD5_word sout[MAX_KEYS_PER_CRYPT*BINARY_SIZE/sizeof(MD5_word)];
+static int omp_para = 1;
+#endif
+
+struct fmt_main fmt_MD5;
+
+static void init(struct fmt_main *pFmt)
+{
+#if defined(_OPENMP) && defined(MD5_SSE_PARA)
+	omp_para = 4 * omp_get_max_threads();
+	if (omp_para < 1)
+		omp_para = 1;
+	if (omp_para > OMP_MAX_PARA)
+		omp_para = OMP_MAX_PARA;
+	fmt_MD5.params.min_keys_per_crypt =
+		fmt_MD5.params.max_keys_per_crypt = MD5_N * omp_para;
 #endif
+	MD5_std_init(pFmt);
+}
 
 static int valid(char *ciphertext, struct fmt_main *pFmt)
 {
@@ -216,7 +240,7 @@ static int cmp_all(void *binary, int count)
 #ifdef MD5_SSE_PARA
 	unsigned int x,y;
 
-	for(y=0;y<MD5_SSE_PARA;y++) for(x=0;x<MMX_COEF;x++)
+	for(y=0;y<MD5_SSE_PARA*omp_para;y++) for(x=0;x<MMX_COEF;x++)
 	{
 		if( ((MD5_word *)binary)[0] == ((MD5_word *)sout)[x+y*MMX_COEF*4] )
 			return 1;
@@ -265,13 +289,20 @@ static int cmp_exact(char *source, int index)
 
 static void crypt_all(int count) {
 #ifdef MD5_SSE_PARA
+#ifdef _OPENMP
+	int t;
+#pragma omp parallel for
+	for (t = 0; t < omp_para; t++)
+		md5cryptsse((unsigned char *)(&saved_key[t*MD5_N]), cursalt, (char *)(&sout[t*MD5_N*BINARY_SIZE/sizeof(MD5_word)]), CryptType);
+#else
 	md5cryptsse((unsigned char *)saved_key, cursalt, (char *)sout, CryptType);
+#endif
 #else
 	MD5_std_crypt();
 #endif
 }
 
-void myMD5_std_set_salt(char *salt)
+static void set_salt(void *salt)
 {
 #ifdef MD5_SSE_PARA
 	memcpy(cursalt, salt, SALT_SIZE);
@@ -305,15 +336,19 @@ struct fmt_main fmt_MD5 = {
 		SALT_SIZE,
 		MIN_KEYS_PER_CRYPT,
 		MAX_KEYS_PER_CRYPT,
+#if defined(_OPENMP) && defined(MD5_SSE_PARA)
+		FMT_CASE | FMT_8_BIT | FMT_OMP,
+#else
 		FMT_CASE | FMT_8_BIT,
+#endif
 		tests
 	}, {
-		MD5_std_init,
+		init,
 		fmt_default_prepare,
 		valid,
 		fmt_default_split,
-		get_binary,		//(void *(*)(char *))MD5_std_get_binary,
-		get_salt,		//(void *(*)(char *))MD5_std_get_salt,
+		get_binary,
+		get_salt,
 		{
 			binary_hash_0,
 			binary_hash_1,
@@ -322,11 +357,11 @@ struct fmt_main fmt_MD5 = {
 			binary_hash_4
 		},
 		salt_hash,
-		(void (*)(void *))myMD5_std_set_salt,
+		set_salt,
 		set_key,
 		get_key,
 		fmt_default_clear_keys,
-		crypt_all,		// (void (*)(int))MD5_std_crypt,
+		crypt_all,
 		{
 			get_hash_0,
 			get_hash_1,
diff --git a/src/sse-intrinsics.c b/src/sse-intrinsics.c
index b6de01d..25a9418 100644
--- a/src/sse-intrinsics.c
+++ b/src/sse-intrinsics.c
@@ -301,16 +301,6 @@ void dispatch(unsigned char buffers[8][64*MD5_SSE_NUM_KEYS], unsigned int f[4*MD
 }
 
 
-#define buffers MD5_INTR_buffers
-#define F MD5_INTR_F
-#ifdef _MSC_VER
-	__declspec(align(16)) unsigned char buffers[8][64*MD5_SSE_NUM_KEYS];
-	__declspec(align(16)) unsigned int F[4*MD5_SSE_NUM_KEYS];
-#else
-	unsigned char buffers[8][64*MD5_SSE_NUM_KEYS] __attribute__ ((aligned(16)));
-	unsigned int F[4*MD5_SSE_NUM_KEYS] __attribute__ ((aligned(16)));
-#endif
-
 void md5cryptsse(unsigned char pwd[MD5_SSE_NUM_KEYS][16], unsigned char * salt, char * out, int md5_type)
 {
 	unsigned int length[MD5_SSE_NUM_KEYS];
@@ -320,6 +310,13 @@ void md5cryptsse(unsigned char pwd[MD5_SSE_NUM_KEYS][16], unsigned char * salt,
 	unsigned int i,j;
 	MD5_CTX ctx;
 	MD5_CTX tctx;
+#ifdef _MSC_VER
+	__declspec(align(16)) unsigned char buffers[8][64*MD5_SSE_NUM_KEYS];
+	__declspec(align(16)) unsigned int F[4*MD5_SSE_NUM_KEYS];
+#else
+	unsigned char buffers[8][64*MD5_SSE_NUM_KEYS] __attribute__ ((aligned(16)));
+	unsigned int F[4*MD5_SSE_NUM_KEYS] __attribute__ ((aligned(16)));
+#endif
 
 	memset(F,0,sizeof(F));
 	memset(buffers, 0, sizeof(buffers));
diff --git a/src/x86-64.h b/src/x86-64.h
index b56d869..98d4f0c 100644
--- a/src/x86-64.h
+++ b/src/x86-64.h
@@ -141,7 +141,7 @@
 #define MD5_X2				1
 #define MD5_IMM				1
 
-#ifdef __GNUC__
+#if !defined(USING_ICC_S_FILE) && defined(__GNUC__) && !defined(__INTEL_COMPILER)
 #define MD5_SSE_PARA		2
 #define MD5_N_STR			"8x"
 #else
-- 
1.7.5.4

