diff -urpN john-1.7.8.orig//src/Makefile john-1.7.8-allcuda_1/src/Makefile
--- john-1.7.8.orig//src/Makefile	2011-05-04 18:52:31.000000000 +0000
+++ john-1.7.8-allcuda_1/src/Makefile	2011-08-22 22:16:40.000000000 +0000
@@ -3,6 +3,7 @@
 # Copyright (c) 1996-2011 by Solar Designer
 #
 
+NVCC = nvcc
 CC = gcc
 AS = $(CC)
 LD = $(CC)
@@ -19,6 +20,17 @@ OMPFLAGS =
 #OMPFLAGS = -fopenmp
 # Sun Studio with OpenMP (set the OMP_NUM_THREADS env var at runtime)
 #OMPFLAGS = -xopenmp
+CUDAPATH = /usr/local/cuda/lib
+CUDA64PATH = /usr/local/cuda/lib64
+
+NVCC_FLAGS = -c -Xptxas -v -arch sm_10
+PHPASS_FLAGS = $(NVCC_FLAGS)
+CRYPTMD5_FLAGS = $(NVCC_FLAGS)
+CRYPTSHA256_FLAGS = $(NVCC_FLAGS)
+CRYPTSHA512_FLAGS = $(NVCC_FLAGS)
+
+
+
 CFLAGS = -c -Wall -O2 -fomit-frame-pointer $(OMPFLAGS)
 ASFLAGS = -c $(OMPFLAGS)
 LDFLAGS = -s $(OMPFLAGS)
@@ -41,6 +53,12 @@ JOHN_OBJS = \
 	unafs.o \
 	unique.o
 
+CUDA_OBJS = \
+	phpasscuda_fmt.o phpass.o \
+	cryptmd5cuda_fmt.o cryptmd5.o \
+	cryptsha256cuda_fmt.o cryptsha256.o \
+	cryptsha512cuda_fmt.o cryptsha512.o
+
 BENCH_DES_OBJS_ORIG = \
 	DES_fmt.o DES_std.o
 
@@ -74,11 +92,13 @@ default:
 	@echo "To build John the Ripper, type:"
 	@echo "	make clean SYSTEM"
 	@echo "where SYSTEM can be one of the following:"
+	@echo "linux-x86-64-cuda        Linux, x86-64 with SSE2 and CUDA (experimental)"
 	@echo "linux-x86-64             Linux, x86-64 with SSE2 (best tested)"
 	@echo "linux-x86-64-avx         Linux, x86-64 with AVX (experimental)"
 	@echo "linux-x86-64-xop         Linux, x86-64 with AVX and XOP (experimental)"
 #	@echo "linux-x86-64-32-sse2     Linux, x86-64, 32-bit with SSE2"
 #	@echo "linux-x86-64-32-mmx      Linux, x86-64, 32-bit with MMX"
+	@echo "linux-x86-cuda           Linux, x86 32-bit with SSE2 and CUDA (experimental)"
 	@echo "linux-x86-sse2           Linux, x86 32-bit with SSE2 (best tested if 32-bit)"
 	@echo "linux-x86-mmx            Linux, x86 32-bit with MMX (for old computers)"
 	@echo "linux-x86-any            Linux, x86 32-bit (for truly ancient computers)"
@@ -148,6 +168,13 @@ default:
 	@echo "beos-x86-any             BeOS, x86"
 	@echo "generic                  Any other Unix-like system with gcc"
 
+linux-x86-64-cuda:
+	$(LN) x86-64.h arch.h
+	$(MAKE) $(PROJ) \
+		JOHN_OBJS="$(JOHN_OBJS) $(CUDA_OBJS) c3_fmt.o x86-64.o" \
+		CFLAGS="$(CFLAGS) -DHAVE_CRYPT" \
+		LDFLAGS="$(LDFLAGS) -L$(CUDA64PATH) -lcudart  -lcrypt"
+
 linux-x86-64:
 	$(LN) x86-64.h arch.h
 	$(MAKE) $(PROJ) \
@@ -185,6 +212,13 @@ linux-x86-64-32-mmx:
 		ASFLAGS="$(ASFLAGS) -m32" \
 		LDFLAGS="$(LDFLAGS) -m32 -lcrypt"
 
+linux-x86-cuda:
+	$(LN) x86-sse.h arch.h
+	$(MAKE) $(PROJ) \
+		JOHN_OBJS="$(JOHN_OBJS) $(CUDA_OBJS) c3_fmt.o x86.o x86-sse.o" \
+		CFLAGS="$(CFLAGS) -DHAVE_CRYPT" \
+		LDFLAGS="$(LDFLAGS) -L$(CUDAPATH) -lcudart -lcrypt"
+
 linux-x86-sse2:
 	$(LN) x86-sse.h arch.h
 	$(MAKE) $(PROJ) \
@@ -814,6 +848,34 @@ generic.h:
 bench: $(BENCH_OBJS)
 	$(LD) $(BENCH_OBJS) $(LDFLAGS) -o bench
 
+phpass.o:  phpass.h cuda/phpass.cu
+	cd cuda; nvcc $(PHPASS_FLAGS) phpass.cu
+
+phpasscuda_fmt.o: phpass.o phpasscuda_fmt.c phpass.h
+	cp cuda/phpass.o phpass.o
+	$(CC)  $(CFLAGS) phpasscuda_fmt.c
+
+cryptmd5.o:  cryptmd5.h cuda/cryptmd5.cu
+	cd cuda; nvcc $(CRYPTMD5_FLAGS) cryptmd5.cu
+	cp cuda/cryptmd5.o cryptmd5.o
+
+cryptmd5_fmt.o: cryptmd5.o cryptmd5cuda_fmt.c cryptmd5.h
+	$(CC)  $(CFLAGS) cryptmd5cuda_fmt.c
+
+cryptsha256.o:  cryptsha256.h cuda/cryptsha256.cu
+	cd cuda; nvcc $(CRYPTSHA256_FLAGS) cryptsha256.cu
+	cp cuda/cryptsha256.o cryptsha256.o
+
+cryptsha256_fmt.o: cryptsha256.o cryptsha256cuda_fmt.c cryptsha256.h
+	$(CC)  $(CFLAGS) cryptsha256cuda_fmt.c
+
+cryptsha512.o:  cryptsha512.h cuda/cryptsha512.cu
+	cd cuda; nvcc $(CRYPTSHA512_FLAGS) cryptsha512.cu
+	cp cuda/cryptsha512.o cryptsha512.o
+
+cryptsha512_fmt.o: cryptsha512.o cryptsha512cuda_fmt.c cryptsha512.h
+	$(CC)  $(CFLAGS) cryptsha512cuda_fmt.c
+
 ../run/john: $(JOHN_OBJS)
 	$(LD) $(JOHN_OBJS) $(LDFLAGS) -o ../run/john
 
@@ -890,6 +952,7 @@ depend:
 
 clean:
 	$(RM) $(PROJ) $(PROJ_DOS) $(PROJ_WIN32)
+	$(RM) cuda/*.o cuda/*~ *~
 	$(RM) ../run/john.exe john-macosx-* *.o *.bak core
 	$(RM) detect bench generic.h arch.h tmp.s
 	$(CP) $(NULL) Makefile.dep
diff -urpN john-1.7.8.orig//src/cryptmd5.h john-1.7.8-allcuda_1/src/cryptmd5.h
--- john-1.7.8.orig//src/cryptmd5.h	1970-01-01 00:00:00.000000000 +0000
+++ john-1.7.8-allcuda_1/src/cryptmd5.h	2011-08-22 21:55:03.000000000 +0000
@@ -0,0 +1,108 @@
+/*
+* This software is Copyright (c) 2011 Lukas Odzioba
+* <lukas dot odzioba at gmail dot com> 
+* and it is hereby released to the general public under the following terms:
+* Redistribution and use in source and binary forms, with or without modification, are permitted.
+*/
+#ifndef _CRYPTMD5_H
+#define _CRYPTMD5_H
+#include <assert.h>
+#include "common.h"
+#include <stdint.h>
+#include <stdbool.h>
+
+#define uint32_t unsigned int
+#define uint8_t unsigned char
+
+#define MIN(a,b) ((a)<(b)?(a):(b))
+#define MAX(a,b) ((a)>(b)?(a):(b))
+
+#define BLOCKS 28
+#define THREADS 256 //set 384 for fermi
+#define KEYS_PER_CRYPT BLOCKS*THREADS
+#define PLAINTEXT_LENGTH	15
+
+typedef struct {
+	unsigned char saltlen;
+	char salt[8];
+	char prefix;		// 'a' when $apr1$ or '1' when $1$
+} crypt_md5_salt;
+
+typedef struct {
+	unsigned char length;
+	unsigned char v[PLAINTEXT_LENGTH];
+} crypt_md5_password;
+
+typedef struct {
+	uint32_t v[4];		//128 bits
+} crypt_md5_hash;
+
+typedef struct {
+#define ctx_buffsize 64
+	uint8_t buffer[64];
+	uint32_t ALIGN;
+} md5_ctx;
+
+static const char md5_salt_prefix[] = "$1$";
+static const char apr1_salt_prefix[] = "$apr1$";
+
+#define ROTATE_LEFT(x, s) ((x << s) | (x >> (32 - s)))
+
+#define F(x, y, z) (x&y | ~x&z)
+#define G(x, y, z) (x&z | y&~z)
+
+#define H(x, y, z) (x^y^z)
+#define I(x, y, z) (y^(x|~z))
+
+#define FF(v, w, x, y, z, s, ac) { \
+ v += F(w, x, y) + z + ac; \
+ v = ROTATE_LEFT(v, s) + w; \
+ }
+#define GG(v, w, x, y, z, s, ac) { \
+ v += G(w, x, y) + z + ac; \
+ v = ROTATE_LEFT(v, s) + w; \
+ }
+#define HH(v, w, x, y, z, s, ac) { \
+ v += H(w, x, y) + z + ac; \
+ v = ROTATE_LEFT(v, s) + w; \
+ }
+#define II(v, w, x, y, z, s, ac) { \
+ v += I(w, x, y) + z + ac; \
+ v = ROTATE_LEFT(v, s) + w; \
+ }
+
+#define FF2(v, w, x, y, s, ac) { \
+ v += F(w, x, y) + ac; \
+ v = ROTATE_LEFT(v, s) + w; \
+ }
+#define GG2(v, w, x, y, s, ac) { \
+ v += G(w, x, y) +  ac; \
+ v = ROTATE_LEFT(v, s) + w; \
+ }
+#define HH2(v, w, x, y, s, ac) { \
+ v += H(w, x, y) + ac; \
+ v = ROTATE_LEFT(v, s) + w; \
+ }
+#define II2(v, w, x, y, s, ac) { \
+ v += I(w, x, y) + ac; \
+ v = ROTATE_LEFT(v, s) + w; \
+ }
+
+#define S11 7
+#define S12 12
+#define S13 17
+#define S14 22
+#define S21 5
+#define S22 9
+#define S23 14
+#define S24 20
+#define S31 4
+#define S32 11
+#define S33 16
+#define S34 23
+#define S41 6
+#define S42 10
+#define S43 15
+#define S44 21
+
+#endif
diff -urpN john-1.7.8.orig//src/cryptmd5cuda_fmt.c john-1.7.8-allcuda_1/src/cryptmd5cuda_fmt.c
--- john-1.7.8.orig//src/cryptmd5cuda_fmt.c	1970-01-01 00:00:00.000000000 +0000
+++ john-1.7.8-allcuda_1/src/cryptmd5cuda_fmt.c	2011-08-22 22:15:49.000000000 +0000
@@ -0,0 +1,330 @@
+/*
+* This software is Copyright (c) 2011 Lukas Odzioba
+* <lukas dot odzioba at gmail dot com> 
+* and it is hereby released to the general public under the following terms:
+* Redistribution and use in source and binary forms, with or without modification, are permitted.
+*/
+#include <string.h>
+#include <unistd.h>
+#include "arch.h"
+#include "formats.h"
+#include "common.h"
+#include "misc.h"
+#include "cryptmd5.h"
+
+#define FORMAT_LABEL		"cryptmd5cuda"
+#define FORMAT_NAME		"CRYPTMD5CUDA"
+
+#define CRYPT_TYPE		"MD5-based CRYPT"
+
+#define BENCHMARK_COMMENT	" saltlen=8,passlen=8"
+#define BENCHMARK_LENGTH	-1
+
+#define BINARY_SIZE		16
+#define SALT_SIZE		(8+1)
+#define MIN_KEYS_PER_CRYPT	KEYS_PER_CRYPT
+#define MAX_KEYS_PER_CRYPT	KEYS_PER_CRYPT
+
+void md5_crypt_gpu(crypt_md5_password *, crypt_md5_hash *, crypt_md5_salt *);
+
+static crypt_md5_password inbuffer[MAX_KEYS_PER_CRYPT];			/** plaintext ciphertexts **/
+static crypt_md5_hash outbuffer[MAX_KEYS_PER_CRYPT];			/** calculated hashes **/
+static crypt_md5_salt host_salt;					/** salt **/
+
+
+//tests are unified for 8+8 length
+static struct fmt_tests tests[] = {
+/*	   {"$1$Btiy90iG$bGn4vzF3g1rIVGZ5odGIp/","qwerty"},
+	   {"$1$salt$c813W/s478KCzR0NnHx7j0","qwerty"},
+	   {"$1$salt$8LO.EVfsTf.HATV1Bd0ZP/","john"},
+	   {"$1$salt$TelRRxWBCxlpXmgAeB82R/","openwall"},
+	   {"$1$salt$l9PzDiECW83MOIMFTRL4Y1","summerofcode"},
+	   {"$1$salt$wZ2yVsplRoPoD7IfTvRsa0","IamMD5"},
+	   {"$1$saltstri$9S4.PyBpUZBRZw6ZsmFQE/","john"},
+	   {"$1$saltstring$YmP55hH3qcHg2cCffyxrq/","ala"},
+*/
+//	   {"$1$salt1234$mdji1uBBCWZ5m2mIWKvLW.","a"},
+//	   {"$1$salt1234$/JUvhIWHD.csWSCPvr7po0","ab"},
+//	   {"$1$salt1234$GrxHg1bgkN2HB5CRCdrmF.","abc"},
+//	   {"$1$salt1234$iZuyvTkrucWx8kVn5BN4M/","abcd"},
+//	   {"$1$salt1234$wn0RbuDtbJlD1Q.X7.9wG/","abcde"},
+
+//	   {"$1$salt1234$lzB83HS4FjzbcD4yMcjl01","abcdef"},
+//	    {"$1$salt1234$bklJHN73KS04Kh6j6qPnr.","abcdefg"}, 
+	{"$1$salt1234$u4RMKGXG2b/Ud2rFmhqi70", "abcdefgh"},	//saltlen=8,passlen=8
+//	   {"$1$salt1234$QjP48HUerU7aUYc/aJnre1","abcdefghi"},
+//	   {"$1$salt1234$9jmu9ldi9vNw.XDO3TahR.","abcdefghij"},
+
+//	   {"$1$salt1234$d3.LnlDWfkTIej5Ef1sCU/","abcdefghijk"},
+//	   {"$1$salt1234$pDV0xEgZR14EpQMmhZ6Hg0","abcdefghijkl"},
+//	   {"$1$salt1234$WumpbolX2y45Dlv0.A1Mj1","abcdefghijklm"},
+//	   {"$1$salt1234$FXBreA27b7N7diemBGn5I1","abcdefghijklmn"},
+//	   {"$1$salt1234$8d5IPIbTd7J/WNEG4b4cl.","abcdefghijklmno"},
+	   
+	   //tests from korelogic2010 contest
+/*	   {"$1$bn6UVs3/$S6CQRLhmenR8OmVp3Jm5p0","sparky"},
+	   {"$1$qRiPuG5Z$pLLczmBnwEOD75Vb7YZLg1","walter"},
+	   {"$1$E.qsK.Hy$.eX0H6arTHaGOIFkf6o.a.","heaven"},
+	   {"$1$Hul2mrWs$.NGCgz3fBGDyG7RMGJAdM0","bananas"},
+	   {"$1$1l88Y.UV$swt2d0SPMrBPkdAD8RwSj0","horses"},
+	   {"$1$DiHrL6V7$fCVDD1GEAKB.BjAgJL1ZX0","maddie"},
+	   {"$1$7fpfV7kr$7LgF64DGPtHPktVKdLM490","bitch1"},
+	   {"$1$VKjk2PJc$5wbrtc9oa8kdEO/ocyi06/","crystal"},
+	   {"$1$S66DxkFm$kG.QfeHNLifEDTDmf4pzJ/","claudia"},
+	   {"$1$T2JMeEYj$Y.wDzFvyb9nlH1EiSCI3M/","august"}, 
+	 
+	   //tests from MD5_fmt.c
+	   {"$1$12345678$aIccj83HRDBo6ux1bVx7D1", "0123456789ABCDE"},
+	   {"$apr1$Q6ZYh...$RV6ft2bZ8j.NGrxLYaJt9.", "test"},
+	   {"$1$12345678$f8QoJuo0DpBRfQSD0vglc1", "12345678"},
+	   {"$1$$qRPK7m23GJusamGpoGLby/", ""},
+	   {"$apr1$a2Jqm...$grFrwEgiQleDr0zR4Jx1b.", "15 chars is max"},
+	   {"$1$$AuJCr07mI7DSew03TmBIv/", "no salt"},
+	   {"$1$`!@#%^&*$E6hD76/pKTS8qToBCkux30", "invalid salt"},
+	   {"$1$12345678$xek.CpjQUVgdf/P2N9KQf/", ""},
+	   {"$1$1234$BdIMOAWFOV2AQlLsrN/Sw.", "1234"},
+	   {"$apr1$rBXqc...$NlXxN9myBOk95T0AyLAsJ0", "john"},
+	   {"$apr1$Grpld/..$qp5GyjwM2dnA5Cdej9b411", "the"},
+	   {"$apr1$GBx.D/..$yfVeeYFCIiEXInfRhBRpy/", "ripper"},
+*/
+	{NULL}
+};
+
+static void init(void)
+{
+  cryptmd5_init(0);
+}
+
+static int valid(char *ciphertext)
+{
+	uint8_t i, len = strlen(ciphertext), prefix = 0;
+
+	if (strncmp(ciphertext, md5_salt_prefix, strlen(md5_salt_prefix)) == 0)
+		prefix |= 1;
+	if (strncmp(ciphertext, apr1_salt_prefix,
+		strlen(apr1_salt_prefix)) == 0)
+		prefix |= 2;
+	if (prefix == 0)
+		return 0;
+
+	char *p = strrchr(ciphertext, '$');
+	for (i = p - ciphertext + 1; i < len; i++) {
+		uint8_t z = ARCH_INDEX(ciphertext[i]);
+		if (ARCH_INDEX(atoi64[z]) == 0x7f)
+			return 0;
+	}
+	if (len - (p - ciphertext + 1) != 22)
+		return 0;
+	return 1;
+};
+
+static int findb64(char c)
+{
+	int ret = ARCH_INDEX(atoi64[(uint8_t) c]);
+	return ret != 0x7f ? ret : 0;
+}
+
+static void to_binary(char *crypt, char *alt)
+{
+
+#define _24bit_from_b64(I,B2,B1,B0) \
+  do{\
+      unsigned char c1=findb64(crypt[I+0]);\
+      unsigned char c2=findb64(crypt[I+1]);\
+      unsigned char c3=findb64(crypt[I+2]);\
+      unsigned char c4=findb64(crypt[I+3]);\
+      unsigned int w=c4<<18|c3<<12|c2<<6|c1;\
+      unsigned char b2=w&0xff;w>>=8;\
+      unsigned char b1=w&0xff;w>>=8;\
+      unsigned char b0=w&0xff;w>>=8;\
+      alt[B2]=b0;\
+      alt[B1]=b1;\
+      alt[B0]=b2;\
+  }while(0)
+
+	_24bit_from_b64(0, 0, 6, 12);
+	_24bit_from_b64(4, 1, 7, 13);
+	_24bit_from_b64(8, 2, 8, 14);
+	_24bit_from_b64(12, 3, 9, 15);
+	_24bit_from_b64(16, 4, 10, 5);
+	uint32_t w = findb64(crypt[21]) << 6 | findb64(crypt[20]) << 0;
+	alt[11] = (w & 0xff);
+}
+
+static void *binary(char *ciphertext)
+{
+	static char b[BINARY_SIZE];
+	memset(b, 0, BINARY_SIZE);
+	char *p = strrchr(ciphertext, '$') + 1;
+	to_binary(p, b);
+	return (void *) b;
+}
+
+
+static void *salt(char *ciphertext)
+{
+	static uint8_t ret[SALT_SIZE];
+	memset(ret, 0, SALT_SIZE);
+	uint8_t i, *pos = (uint8_t *) ciphertext, *dest = ret, *end;
+
+	if (strncmp(ciphertext, md5_salt_prefix, strlen(md5_salt_prefix)) == 0) {
+		pos += strlen(md5_salt_prefix);
+		ret[8] = '1';
+	}
+	if (strncmp(ciphertext, apr1_salt_prefix,
+		strlen(apr1_salt_prefix)) == 0) {
+		pos += strlen(apr1_salt_prefix);
+		ret[8] = 'a';
+	}
+	end = pos;
+	for (i = 0; i < 8 && *end != '$'; i++, end++);
+	while (pos != end)
+		*dest++ = *pos++;
+	return (void *) ret;
+}
+
+static int binary_hash_0(void *binary)
+{
+	return (((ARCH_WORD_32 *) binary)[0] & 0xf);
+}
+
+static int binary_hash_1(void *binary)
+{
+	return ((ARCH_WORD_32 *) binary)[0] & 0xff;
+}
+
+static int binary_hash_2(void *binary)
+{
+	return ((ARCH_WORD_32 *) binary)[0] & 0xfff;
+}
+
+static int binary_hash_3(void *binary)
+{
+	return ((ARCH_WORD_32 *) binary)[0] & 0xffff;
+}
+
+static int binary_hash_4(void *binary)
+{
+	return ((ARCH_WORD_32 *) binary)[0] & 0xfffff;
+}
+
+static void set_salt(void *salt)
+{
+	uint8_t *s = salt;
+	uint8_t len;
+	for (len = 0; len < 8 && s[len]; len++);
+	host_salt.saltlen = len;
+	memcpy(host_salt.salt, s, host_salt.saltlen);
+	host_salt.prefix = s[8];
+}
+
+static void set_key(char *key, int index)
+{
+	uint32_t len = strlen(key);
+	inbuffer[index].length = len;
+	memcpy((char *) inbuffer[index].v, key, len);
+}
+
+static char *get_key(int index)
+{
+	static char ret[PLAINTEXT_LENGTH+1];
+	memcpy(ret,inbuffer[index].v,PLAINTEXT_LENGTH);
+	ret[inbuffer[index].length]='\0';
+	return ret;
+}
+
+static void crypt_all(int count)
+{
+	md5_crypt_gpu(inbuffer, outbuffer, &host_salt);
+}
+
+static int get_hash_0(int index)
+{
+	return ((ARCH_WORD_32 *) outbuffer[index].v)[0] & 0xf;
+}
+
+static int get_hash_1(int index)
+{
+	return ((ARCH_WORD_32 *) outbuffer[index].v)[0] & 0xff;
+}
+
+static int get_hash_2(int index)
+{
+	return ((ARCH_WORD_32 *) outbuffer[index].v)[0] & 0xfff;
+}
+
+static int get_hash_3(int index)
+{
+	return ((ARCH_WORD_32 *) outbuffer[index].v)[0] & 0xffff;
+}
+
+static int get_hash_4(int index)
+{
+	return ((ARCH_WORD_32 *) outbuffer[index].v)[0] & 0xfffff;
+}
+
+static int cmp_all(void *binary, int count)
+{
+	uint32_t i, b = ((uint32_t *) binary)[0];
+	for (i = 0; i < count; i++)
+		if (b == outbuffer[i].v[0])
+			return 1;
+	return 0;
+}
+
+static int cmp_one(void *binary, int index)
+{
+	uint32_t i, *t = (uint32_t *) binary;
+	for (i = 0; i < 4; i++)
+		if (t[i] != outbuffer[index].v[i])
+			return 0;
+	return 1;
+}
+
+static int cmp_exact(char *source, int count)
+{
+	return 1;
+}
+
+struct fmt_main fmt_CRYPTMD5CUDA = {
+	{
+		    FORMAT_LABEL,
+		    FORMAT_NAME,
+		    CRYPT_TYPE,
+		    BENCHMARK_COMMENT,
+		    BENCHMARK_LENGTH,
+		    PLAINTEXT_LENGTH,
+		    BINARY_SIZE,
+		    SALT_SIZE,
+		    MIN_KEYS_PER_CRYPT,
+		    MAX_KEYS_PER_CRYPT,
+		    FMT_CASE | FMT_8_BIT,
+	    tests},
+	{
+		    init,
+		    valid,
+		    fmt_default_split,
+		    binary,
+		    salt,
+		    {
+				binary_hash_0,
+				binary_hash_1,
+				binary_hash_2,
+				binary_hash_3,
+			binary_hash_4},
+		    fmt_default_salt_hash,
+		    set_salt,
+		    set_key,
+		    get_key,
+		    fmt_default_clear_keys,
+		    crypt_all,
+		    {
+				get_hash_0,
+				get_hash_1,
+				get_hash_2,
+				get_hash_3,
+			get_hash_4},
+		    cmp_all,
+		    cmp_one,
+	    cmp_exact}
+};
diff -urpN john-1.7.8.orig//src/cryptsha256.h john-1.7.8-allcuda_1/src/cryptsha256.h
--- john-1.7.8.orig//src/cryptsha256.h	1970-01-01 00:00:00.000000000 +0000
+++ john-1.7.8-allcuda_1/src/cryptsha256.h	2011-08-22 22:15:04.000000000 +0000
@@ -0,0 +1,67 @@
+/*
+* This software is Copyright (c) 2011 Lukas Odzioba
+* <lukas dot odzioba at gmail dot com>
+* and it is hereby released to the general public under the following terms:
+* Redistribution and use in source and binary forms, with or without modification, are permitted.
+*/
+#ifndef _CRYPTSHA256_H
+#define _CRYPTSHA256_H
+
+#include <assert.h>
+#include "common.h"
+#include <stdint.h>
+#include <stdbool.h>
+
+#define BLOCKS 14
+#define THREADS 192//set 320 for fermi
+
+#define KEYS_PER_CRYPT BLOCKS*THREADS
+
+#define uint32_t unsigned int
+#define uint8_t unsigned char
+
+#define rol(x,n) ((x << n) | (x >> (32-n)))
+#define ror(x,n) ((x >> n) | (x << (32-n)))
+#define Ch(x,y,z) ((x & y) ^ ( (~x) & z))
+#define Maj(x,y,z) ((x & y) ^ (x & z) ^ (y & z))
+#define Sigma0(x) ((ror(x,2))  ^ (ror(x,13)) ^ (ror(x,22)))
+#define Sigma1(x) ((ror(x,6))  ^ (ror(x,11)) ^ (ror(x,25)))
+#define sigma0(x) ((ror(x,7))  ^ (ror(x,18)) ^(x>>3))
+#define sigma1(x) ((ror(x,17)) ^ (ror(x,19)) ^(x>>10))
+
+#define MAX(x,y) ((x) > (y) ? (x) : (y))
+#define MIN(x,y) ((x) < (y) ? (x) : (y))
+#define SHOW(x) printf("%s = %08x\n",#x,(x))
+# define SWAP(n) \
+    (((n) << 24) | (((n) & 0xff00) << 8) | (((n) >> 8) & 0xff00) | ((n) >> 24))
+
+#define SALT_LEN_MAX 16
+#define ROUNDS_DEFAULT 5000
+#define ROUNDS_MIN 1000
+#define ROUNDS_MAX 999999999
+
+typedef struct {
+	uint32_t H[8];
+	uint32_t total;
+	uint32_t buflen;
+	uint8_t buffer[64];
+} sha256_ctx;
+
+typedef struct {
+	unsigned char saltlen;
+	uint32_t rounds;
+	char salt[16];
+} crypt_sha256_salt;
+
+typedef struct {
+	unsigned char length;
+	unsigned char v[15];
+} crypt_sha256_password;
+
+
+#define hash_addr(j,idx) (((j)*(KEYS_PER_CRYPT))+(idx))
+typedef struct {
+	uint32_t v[8];		//256 bits
+} crypt_sha256_hash;
+
+#endif
diff -urpN john-1.7.8.orig//src/cryptsha256cuda_fmt.c john-1.7.8-allcuda_1/src/cryptsha256cuda_fmt.c
--- john-1.7.8.orig//src/cryptsha256cuda_fmt.c	1970-01-01 00:00:00.000000000 +0000
+++ john-1.7.8-allcuda_1/src/cryptsha256cuda_fmt.c	2011-08-22 22:15:53.000000000 +0000
@@ -0,0 +1,359 @@
+/*
+* This software is Copyright (c) 2011 Lukas Odzioba
+* <lukas dot odzioba at gmail dot com>
+* and it is hereby released to the general public under the following terms:
+* Redistribution and use in source and binary forms, with or without modification, are permitted.
+*/
+#include <string.h>
+#include "arch.h"
+#include "formats.h"
+#include "common.h"
+#include "misc.h"
+#include "cryptsha256.h"
+#include <unistd.h>
+
+#define FORMAT_LABEL		"cryptsha256cuda"
+#define FORMAT_NAME		"CRYPTSHA256CUDA"
+
+#define CRYPT_TYPE		"SHA256-based CRYPT"
+
+#define BENCHMARK_COMMENT	""
+#define BENCHMARK_LENGTH	-1
+
+#define PLAINTEXT_LENGTH	15
+#define BINARY_SIZE		32
+#define MD5_DIGEST_LENGTH 	16
+
+#define SALT_SIZE		(3+7+9+16)
+
+#define MIN_KEYS_PER_CRYPT	KEYS_PER_CRYPT
+#define MAX_KEYS_PER_CRYPT	KEYS_PER_CRYPT
+
+extern void sha256_crypt_gpu(crypt_sha256_password * inbuffer,
+    uint32_t * outbuffer, crypt_sha256_salt * host_salt);
+
+static crypt_sha256_password inbuffer[MAX_KEYS_PER_CRYPT];			/** plaintext ciphertexts **/
+static uint32_t outbuffer[MAX_KEYS_PER_CRYPT * 8];				/** calculated hashes **/
+
+static char currentsalt[64];
+static crypt_sha256_salt host_salt;
+
+void sha256_crypt_cpu(crypt_sha256_password * passwords,
+    crypt_sha256_hash * output, crypt_sha256_salt * salt);
+
+
+static struct fmt_tests tests[] = {
+	{"$5$saltstring$5B8vYYiY.CVt1RlTTf8KbXBH3hsxY/GNooZaBBGWEc5",
+	    "Hello world!"},
+	/*{"$5$UOUBPEMKQRHHRFML$zicoLpMLhBsNGtEplY/ehM0NtiAqxijiBCrolt7WBW0","jjti"},
+
+	{"$5$XSLWLBSQUCNOWXOB$i7Ho5wUAIjsH2e2zA.WarqYLWir5nmZbUEcjK//Or7.","hgnirgayjnhvi"},
+	{"$5$VDCTRFOIDQXRQVHR$uolqT0wEwU.pvI9jq5xU457JQpiwTTKX3PB/9RS4/h4","o"},
+	{"$5$WTYWNCYHNPMXPG$UwZyrq0irhWs4OcLKcqSbFdktZaNAD2by1CiNNw7oID","tcepf"},
+	{"$5$DQUHKJNMVOEBGBG$91u2d/jMN5QuW3/kBEPG0xC2G8y1TuDU7SGAUYTX.y0","wbfhoc"},
+
+
+	{"$5$saltstring$0Az3qME7zTXm78kfHrR2OtT8WOu2gd8bcVn/9Y.3l/7", "john"},
+
+	 {"$5$saltstring$7cz4bTeQ7MnNssphNhFVrITtuJYY/1tdvLL2uzLvOk8","a"},
+	 {"$5$saltstring$4Wjlxdm/Hbpo8ZQzKFazuvfUZPVVUQn6v1oPTX3nwX/","ab"},
+	 {"$5$saltstring$tDHA0KPsYQ8V.LDB1/fgW7cvROod5ZajSrx1tZU2JG9","abc"},
+	 {"$5$saltstring$LfhGTHVGfbAkxy/xKLgvSfXyeE7hZheoMRKhjfvNF6.","abcd"},
+	 {"$5$saltstring$Qg0Xm9f2VY.ePLAwNXnOPU/s8btLptK/tEU/gFnn8BD","abcde"},
+	 {"$5$saltstring$2Snf.yaHnLnLI3Qhsk2S119X4vKbwQyiTMOHp3Oy7F5","abcdef"},
+	 {"$5$saltstring$4Y5UR.6zwplRx6y93NJVyNkxqdlyT64EV68F2mCrZ16","abcdefg"},
+	 {"$5$saltstring$bEM3iuUR.CTgy8Wygh4zu.CAgmlwx3uxm3dGA34.Ij4","abcdefgh"},
+	 {"$5$saltstring$1/OrKXZSFlaEE2DKMhKKE8qCld5X0Ez0vtz5TvO3U3D","abcdefghi"},
+	 {"$5$saltstring$1IbZU70/Wo9m1b40ha6Ao8d.v6Ja0.bAFg5/QFVzoX/","abcdefghij"},
+	 
+	 {"$5$saltstring$S4gCgloAzqAXE5sRz9DShPvaXrwt4vjDJ4fYgIMbLo1","abcdefghijk"},
+	 {"$5$saltstring$AFNSzsWaoMDvt7lk2bx0rPapzCz2zGahXDdFeoXrNE9","abcdefghijkl"},
+	 {"$5$saltstring$QfHc8JBd2DfyloVL0YLDa23Dc67N9mbdYqyRJQlFqZ5","abcdefghijklm"},
+	 {"$5$saltstring$XKHiS.SSJ545PvJJr2t.HyUpmPZDAIT8fVvzr/HGhd0","abcdefghijklmn"},
+	{"$5$saltstring$VxW44bFDcvixlQoTE4E.k5c8v1w0fGMyZ4tn8nGcWn0","abcdefghijklmno"},
+	
+	 {"$5$QSTVVEKDIDYRNK$4j8TST.29P07GHASD.BUHd0UTaFz7h.Mz//zcHokoZ5","cgyihfkqk"},
+	 
+	 */
+	 
+	 //{"$5$rounds=5000$abcdefghijklmnop$BAYQep7SsuSczAeXlks3F54SpxMUUludHi1C4JVOqpD","abcdefghijklmno"},
+	{NULL}
+};
+
+static void init(void)
+{
+  cryptsha256_init(0);
+}
+
+static int valid(char *ciphertext)
+{
+	uint32_t i, j;
+	int len = strlen(ciphertext);
+
+	if (strncmp(ciphertext, "$5$", 3) != 0)
+		return 0;
+	char *p = strrchr(ciphertext, '$');
+	for (i = p - ciphertext + 1; i < len; i++) {
+		int found = 0;
+		for (j = 0; j < 64; j++)
+			if (itoa64[j] == ARCH_INDEX(ciphertext[i])) {
+				found = 1;
+				break;
+			}
+		if (found == 0)
+			return 0;
+	}
+	if (len - (p - ciphertext + 1) != 43)
+		return 0;
+	return 1;
+};
+
+static int findb64(char c)
+{
+	int i = 0;
+	for (i = 0; i < 64; i++)
+		if (c == itoa64[i])
+			return i;
+	return 0;
+}
+
+static void magic(char *crypt, char *alt)
+{
+
+#define _24bit_from_b64(I,B2,B1,B0) \
+  do{\
+      unsigned char c1=findb64(crypt[I+0]);\
+      unsigned char c2=findb64(crypt[I+1]);\
+      unsigned char c3=findb64(crypt[I+2]);\
+      unsigned char c4=findb64(crypt[I+3]);\
+      unsigned int w=c4<<18|c3<<12|c2<<6|c1;\
+      unsigned char b2=w&0xff;w>>=8;\
+      unsigned char b1=w&0xff;w>>=8;\
+      unsigned char b0=w&0xff;w>>=8;\
+      alt[B2]=b0;\
+      alt[B1]=b1;\
+      alt[B0]=b2;\
+  }while(0)
+
+	_24bit_from_b64(0, 0, 10, 20);
+	_24bit_from_b64(4, 21, 1, 11);
+	_24bit_from_b64(8, 12, 22, 2);
+	_24bit_from_b64(12, 3, 13, 23);
+	_24bit_from_b64(16, 24, 4, 14);
+	_24bit_from_b64(20, 15, 25, 5);
+	_24bit_from_b64(24, 6, 16, 26);
+	_24bit_from_b64(28, 27, 7, 17);
+	_24bit_from_b64(32, 18, 28, 8);
+	_24bit_from_b64(36, 9, 19, 29);
+	unsigned int w =
+	    findb64(crypt[42]) << 12 | findb64(crypt[41]) << 6 |
+	    findb64(crypt[40]);
+	alt[30] = w & 0xff;
+	w >>= 8;
+	alt[31] = w & 0xff;
+	w >>= 8;
+}
+
+
+static void *binary(char *ciphertext)
+{
+	static char b[BINARY_SIZE];
+	memset(b, 0, BINARY_SIZE);
+	char *p = strrchr(ciphertext, '$') + 1;
+	magic(p, b);
+	return (void *) b;
+}
+
+
+static void *salt(char *ciphertext)
+{
+	int end = 0, i, len = strlen(ciphertext);
+	for (i = len - 1; i >= 0; i--)
+		if (ciphertext[i] == '$') {
+			end = i;
+			break;
+
+		}
+  
+	static unsigned char ret[64];
+	for (i = 0; i < end; i++)
+		ret[i] = ciphertext[i];
+	ret[end] = 0;
+	return (void *) ret;
+}
+
+static int binary_hash_0(void *binary)
+{
+	return (((ARCH_WORD_32 *) binary)[0] & 0xf);
+}
+
+static int binary_hash_1(void *binary)
+{
+	return ((ARCH_WORD_32 *) binary)[0] & 0xff;
+}
+
+static int binary_hash_2(void *binary)
+{
+	return ((ARCH_WORD_32 *) binary)[0] & 0xfff;
+}
+
+static int binary_hash_3(void *binary)
+{
+	return ((ARCH_WORD_32 *) binary)[0] & 0xffff;
+}
+
+static int binary_hash_4(void *binary)
+{
+	return ((ARCH_WORD_32 *) binary)[0] & 0xfffff;
+}
+
+static void set_salt(void *salt)
+{
+	unsigned char *s = salt;
+	int i, len = strlen(salt);
+	for (i = 0; i < len; i++)
+		currentsalt[i] = s[i];
+	currentsalt[len] = 0;
+	unsigned char offset = 0;
+	host_salt.rounds = ROUNDS_DEFAULT;
+
+	if (strncmp((char *) "$5$", (char *) currentsalt, 3) == 0)
+		offset += 3;
+
+	if (strncmp((char *) currentsalt + offset, (char *) "rounds=", 7) == 0) {
+		const char *num = currentsalt + offset + 7;
+		char *endp;
+		unsigned long int srounds = strtoul(num, &endp, 10);
+
+		if (*endp == '$') {
+			endp += 1;
+			host_salt.rounds =
+			    MAX(ROUNDS_MIN, MIN(srounds, ROUNDS_MAX));
+		}
+		offset = endp - currentsalt;
+	}
+	memcpy(host_salt.salt, currentsalt + offset, 16);
+	host_salt.saltlen = strlen(host_salt.salt);
+}
+
+static void set_key(char *key, int index)
+{
+	int len = strlen(key);
+	inbuffer[index].length = len;
+	memcpy(inbuffer[index].v,key,len);
+}
+
+static char *get_key(int index)
+{
+	static char ret[PLAINTEXT_LENGTH+1];
+	memcpy(ret,inbuffer[index].v,PLAINTEXT_LENGTH);
+	ret[inbuffer[index].length]='\0';
+	return ret;
+}
+
+static void crypt_all(int count)
+{
+	sha256_crypt_gpu(inbuffer, outbuffer, &host_salt);
+}
+
+static int get_hash_0(int index)
+{
+
+	uint32_t *out = outbuffer;
+	return out[hash_addr(0, index)] & 0xf;
+}
+
+static int get_hash_1(int index)
+{
+	uint32_t *out = outbuffer;
+	return out[hash_addr(0, index)] & 0xff;
+}
+
+static int get_hash_2(int index)
+{
+	uint32_t *out = outbuffer;
+	return out[hash_addr(0, index)] & 0xfff;
+}
+
+static int get_hash_3(int index)
+{
+	uint32_t *out = outbuffer;
+	return out[hash_addr(0, index)] & 0xffff;
+}
+
+static int get_hash_4(int index)
+{
+	uint32_t *out = outbuffer;
+	return out[hash_addr(0, index)] & 0xfffff;
+}
+
+static int cmp_all(void *binary, int count)
+{
+	uint32_t i;
+	uint32_t b = ((uint32_t *) binary)[0];
+	uint32_t *out = outbuffer;
+	for (i = 0; i < count; i++)
+		if (b == out[hash_addr(0, i)])
+			return 1;
+	return 0;
+}
+
+static int cmp_one(void *binary, int index)
+{
+	int i;
+	uint32_t *t = (uint32_t *) binary;
+	uint32_t *out = outbuffer;
+
+	for (i = 0; i < 8; i++)
+		if (t[i] != out[hash_addr(i, index)])
+			return 0;
+	return 1;
+}
+
+static int cmp_exact(char *source, int count)
+{
+	return 1;
+}
+
+struct fmt_main fmt_CRYPTSHA256CUDA = {
+	{
+		    FORMAT_LABEL,
+		    FORMAT_NAME,
+		    CRYPT_TYPE,
+		    BENCHMARK_COMMENT,
+		    BENCHMARK_LENGTH,
+		    PLAINTEXT_LENGTH,
+		    BINARY_SIZE,
+		    SALT_SIZE,
+		    MIN_KEYS_PER_CRYPT,
+		    MAX_KEYS_PER_CRYPT,
+		    FMT_CASE | FMT_8_BIT,
+	    tests},
+	{
+		    init,
+		    valid,
+		    fmt_default_split,
+		    binary,
+		    salt,
+		    {
+				binary_hash_0,
+				binary_hash_1,
+				binary_hash_2,
+				binary_hash_3,
+			binary_hash_4},
+		    fmt_default_salt_hash,
+		    set_salt,
+		    set_key,
+		    get_key,
+		    fmt_default_clear_keys,
+		    crypt_all,
+		    {
+				get_hash_0,
+				get_hash_1,
+				get_hash_2,
+				get_hash_3,
+			get_hash_4},
+		    cmp_all,
+		    cmp_one,
+	    cmp_exact}
+};
diff -urpN john-1.7.8.orig//src/cryptsha512.h john-1.7.8-allcuda_1/src/cryptsha512.h
--- john-1.7.8.orig//src/cryptsha512.h	1970-01-01 00:00:00.000000000 +0000
+++ john-1.7.8-allcuda_1/src/cryptsha512.h	2011-08-22 22:13:03.000000000 +0000
@@ -0,0 +1,81 @@
+/*
+* This software is Copyright (c) 2011 Lukas Odzioba
+* <lukas dot odzioba at gmail dot com> 
+* and it is hereby released to the general public under the following terms:
+* Redistribution and use in source and binary forms, with or without modification, are permitted.
+*/
+#ifndef _CRYPTSHA512_H
+#define _CRYPTSHA512_H
+#include <assert.h>
+#include "common.h"
+#include <stdint.h>
+#include <stdbool.h>
+
+#define uint8_t  unsigned char
+#define uint32_t unsigned int
+#define uint64_t unsigned long long int
+
+#define BLOCKS 14
+#define THREADS 128//set 256 on fermi
+
+#define KEYS_PER_CRYPT BLOCKS*THREADS
+
+#define MAX(x,y) ((x) > (y) ? (x) : (y))
+#define MIN(x,y) ((x) < (y) ? (x) : (y))
+#define SALT_LEN_MAX 16
+#define ROUNDS_DEFAULT 5000
+#define ROUNDS_MIN 1000
+#define ROUNDS_MAX 999999999
+
+static const char sha512_salt_prefix[] = "$6$";
+static const char sha256_rounds_prefix[] = "rounds=";
+
+
+#define rol(x,n) ((x << n) | (x >> (64-n)))
+#define ror(x,n) ((x >> n) | (x << (64-n)))
+#define Ch(x,y,z) ((x & y) ^ ( (~x) & z))
+#define Maj(x,y,z) ((x & y) ^ (x & z) ^ (y & z))
+#define Sigma0(x) ((ror(x,28))  ^ (ror(x,34)) ^ (ror(x,39)))
+#define Sigma1(x) ((ror(x,14))  ^ (ror(x,18)) ^ (ror(x,41)))
+#define sigma0(x) ((ror(x,1))  ^ (ror(x,8)) ^(x>>7))
+#define sigma1(x) ((ror(x,19)) ^ (ror(x,61)) ^(x>>6))
+
+# define SWAP32(n) \
+    (((n) << 24) | (((n) & 0xff00) << 8) | (((n) >> 8) & 0xff00) | ((n) >> 24))
+
+# define SWAP64(n) \
+  (((n) << 56)					\
+   | (((n) & 0xff00) << 40)			\
+   | (((n) & 0xff0000) << 24)			\
+   | (((n) & 0xff000000) << 8)			\
+   | (((n) >> 8) & 0xff000000)			\
+   | (((n) >> 24) & 0xff0000)			\
+   | (((n) >> 40) & 0xff00)			\
+   | ((n) >> 56))
+
+#define hash_addr(j,idx) (((j)*(KEYS_PER_CRYPT))+(idx))
+
+
+typedef struct {
+	uint64_t H[8];
+	uint32_t total;
+	uint32_t buflen;
+	uint8_t buffer[128];	//1024bits
+} sha512_ctx;
+
+typedef struct {
+	uint32_t rounds;
+	uint8_t saltlen;
+	char salt[63];
+} crypt_sha512_salt;
+
+typedef struct {
+	uint8_t length;
+	uint8_t v[16];
+} crypt_sha512_password;
+
+typedef struct {
+	uint64_t v[8];		//512 bits
+} crypt_sha512_hash;
+
+#endif
diff -urpN john-1.7.8.orig//src/cryptsha512cuda_fmt.c john-1.7.8-allcuda_1/src/cryptsha512cuda_fmt.c
--- john-1.7.8.orig//src/cryptsha512cuda_fmt.c	1970-01-01 00:00:00.000000000 +0000
+++ john-1.7.8-allcuda_1/src/cryptsha512cuda_fmt.c	2011-08-22 22:15:58.000000000 +0000
@@ -0,0 +1,347 @@
+/*
+* This software is Copyright (c) 2011 Lukas Odzioba
+* <lukas dot odzioba at gmail dot com> 
+* and it is hereby released to the general public under the following terms:
+* Redistribution and use in source and binary forms, with or without modification, are permitted.
+*/
+#include <string.h>
+#include "arch.h"
+#include "formats.h"
+#include "common.h"
+#include "misc.h"
+#include "cryptsha512.h"
+
+#define FORMAT_LABEL		"cryptsha512cuda"
+#define FORMAT_NAME		"CRYPTSHA512CUDA"
+
+#define PHPASS_TYPE		"SHA512-based CRYPT"
+
+#define BENCHMARK_COMMENT	""
+#define BENCHMARK_LENGTH	-1
+
+#define PLAINTEXT_LENGTH	15
+#define CIPHERTEXT_LENGTH	34
+#define BINARY_SIZE		(3+16+86)
+#define MD5_DIGEST_LENGTH 	16
+
+
+#define SALT_SIZE		(3+7+9+16)
+
+#define MIN_KEYS_PER_CRYPT	KEYS_PER_CRYPT
+#define MAX_KEYS_PER_CRYPT	KEYS_PER_CRYPT
+
+static crypt_sha512_password inbuffer[MAX_KEYS_PER_CRYPT];			/** plaintext ciphertexts **/
+static crypt_sha512_hash outbuffer[MAX_KEYS_PER_CRYPT];			/** calculated hashes **/
+
+void sha512_crypt_gpu(crypt_sha512_password * inbuffer,
+    crypt_sha512_hash * outbuffer, crypt_sha512_salt * host_salt);
+
+static char currentsalt[64];
+static crypt_sha512_salt _salt;
+
+static struct fmt_tests tests[] = {
+
+	{"$6$saltstring$svn8UoSVapNtMuq1ukKS4tPQd8iKwSMHWjl/O817G3uBnIFNjnQJuesI68u4OTLiBFdcbYEdFCoEOfaS35inz1","Hello world!"},
+	//{"$6$saltstring$fgNTR89zXnDUV97U5dkWayBBRaB0WIBnu6s4T7T8Tz1SbUyewwiHjho25yWVkph2p18CmUkqXh4aIyjPnxdgl0","john"},
+	//{"$6$saltstring$MO53nAXQUKXVLlsbiXyPgMsR6q10N7eF7sPvanwdXnEeCj5kE3eYaRvFv0wVW1UZ4SnNTzc1v4OCOq1ASDQZY0","a"},
+	//{"$6$saltstring$q.eQ9PCFPe/tOHJPT7lQwnVQ9znjTT89hsg1NWHCRCAMsbtpBLbg1FLq7xo1BaCM0y/z46pXv4CGESVWQlOk30","ab"},
+	//{"$6$saltstring$pClZZISU0lxEwKr1z81EuJdiMLwWncjShXap25hiDGVMnCvlF5zS3ysvBdVRZqPDCdSTj06rwjrLX3bOS1Cak/","abc"},
+	//{"$6$saltstring$FJJAXr3hydAPJXM311wrzFhzheQ6LJHrufrYl2kBMnRD2pUi6jdS.fSBJ2J1Qfhcz9tPnlJOzeL7aIYi/dytg.","abcd"},
+	//{"$6$saltstring$XDecvJ/rq8tgbE1Pfuu1cTiZlhnbF5OA/vyP6HRPpDengVqhB38vbZTK/BDfPP6XBgvMzE.q9rj6Ck5blj/FK.","abcde"},
+	//{"$6$saltstring$hYPEYaHik6xSMGV1lDWhF0EerSUyCsC150POu9ksaftUWKWwV8TuqSeSLZUkUhjGy7cn.max5qd5IPSICeklL1","abcdef"},
+	//{"$6$saltstring$YBQ5J5EMRuC6k7B2GTsNaXx8u/957XMB.slQmY/lOjKd1zTIQF.ulLmy8O0VnJJ3cV.1pjP.KCgEjjMpz4pnS1","abcdefg"},
+	//{"$6$saltstring$AQapizZGhnIjtXF8OCvbSxQJBuOKvpzf1solf9b76wXFX0VRkqids5AC4YSibbhMSX0z4463sq1uAd9LvKNuO/","abcdefgh"},
+	//{"$6$saltstring$xc66FVXO.Zvv5pS02B4bCmJh5FCBAZpqTK3NoFxTU9U5b6BokbHwmeqQfMqrrkB3j9CXhCzgvC/pvoGPM1xgM1","abcdefghi"},
+	//{"$6$saltstring$Xet3A8EEzzSuL9hZZ31SfDVPT87qz3a.xxcH7eU50aqARlmXywdlfJ.6Cp/TFG1RcguqwrfUbZBbFn1BQ93Kv.","abcdefghij"},
+
+	//{"$6$saltstring$MeML1shJ8psyh5R9YJUZNYNqKzYeBvIsITqc/VqJfUDs8xO5YoUhCn4Db7CXuarMDVkBzIUfYq1d8Tj/T1WBU0","abcdefghijk"},
+	//{"$6$saltstring$i/3NHph8ZV2klLuOc5yX5kOnJWj9zuWbKiaa/NNEkYpNyamdQS1c7n2XQS3.B2Cs/eVyKwHf62PnOayqLLTOZ.","abcdefghijkl"},
+	//{"$6$saltstring$l2IxCS4o2S/vud70F1S5Z7H1WE67QFIXCYqskySdLFjjorEJdAnAp1ZqdgfNuZj2orjmeVDTsTXHpZ1IoxSKd.","abcdefghijklm"},
+	//{"$6$saltstring$PFzjspQs/CDXWALauDTav3u5bHB3n21xWrfwjnjpFO5eM5vuP0qKwDCXmlyZ5svEgsIH1oiZiGlRqkcBP5PiB.","abcdefghijklmn"},
+	//{"$6$saltstring$rdREv5Pd9C9YGtg.zXEQMb6m0sPeq4b6zFW9oWY9w4ZltmjH3yzMLgl9iBuez9DFFUvF5nJH3Y2xidiq1dH9M.", "abcdefghijklmno"},
+
+	{NULL}
+};
+
+static void init(void)
+{
+  cryptsha512_init(0);
+}
+
+static int valid(char *ciphertext)	///v
+{
+	uint32_t i, j;
+	int len = strlen(ciphertext);
+
+	if (strncmp(ciphertext, "$6$", 3) != 0)
+		return 0;
+	char *p = strrchr(ciphertext, '$');
+	for (i = p - ciphertext + 1; i < len; i++) {
+		int found = 0;
+		for (j = 0; j < 64; j++)
+			if (itoa64[j] == ARCH_INDEX(ciphertext[i]))
+				found = 1;
+		if (found == 0) {
+			puts("not found");
+			return 0;
+		}
+	}
+	if (len - (p - ciphertext + 1) != 86)
+		return 0;
+	return 1;
+};
+
+static int findb64(char c)
+{
+	int i = 0;
+	for (i = 0; i < 64; i++)
+		if (c == itoa64[i])
+			return i;
+	return 0;
+}
+
+static void magic(char *crypt, unsigned char *alt)
+{
+
+#define _24bit_from_b64(I,B2,B1,B0) \
+  do{\
+      unsigned char c1=findb64(crypt[I+0]);\
+      unsigned char c2=findb64(crypt[I+1]);\
+      unsigned char c3=findb64(crypt[I+2]);\
+      unsigned char c4=findb64(crypt[I+3]);\
+      unsigned int w=c4<<18|c3<<12|c2<<6|c1;\
+      unsigned char b2=w&0xff;w>>=8;\
+      unsigned char b1=w&0xff;w>>=8;\
+      unsigned char b0=w&0xff;w>>=8;\
+      alt[B2]=b0;\
+      alt[B1]=b1;\
+      alt[B0]=b2;\
+  }while(0)
+	_24bit_from_b64(0, 0, 21, 42);
+	_24bit_from_b64(4, 22, 43, 1);
+	_24bit_from_b64(8, 44, 2, 23);
+	_24bit_from_b64(12, 3, 24, 45);
+	_24bit_from_b64(16, 25, 46, 4);
+	_24bit_from_b64(20, 47, 5, 26);
+	_24bit_from_b64(24, 6, 27, 48);
+	_24bit_from_b64(28, 28, 49, 7);
+	_24bit_from_b64(32, 50, 8, 29);
+	_24bit_from_b64(36, 9, 30, 51);
+	_24bit_from_b64(40, 31, 52, 10);
+	_24bit_from_b64(44, 53, 11, 32);
+	_24bit_from_b64(48, 12, 33, 54);
+	_24bit_from_b64(52, 34, 55, 13);
+	_24bit_from_b64(56, 56, 14, 35);
+	_24bit_from_b64(60, 15, 36, 57);
+	_24bit_from_b64(64, 37, 58, 16);
+	_24bit_from_b64(68, 59, 17, 38);
+	_24bit_from_b64(72, 18, 39, 60);
+	_24bit_from_b64(76, 40, 61, 19);
+	_24bit_from_b64(80, 62, 20, 41);
+
+
+	uint32_t w = findb64(crypt[85]) << 6 | findb64(crypt[84]) << 0;
+	alt[63] = (w & 0xff);
+}
+
+
+static void *binary(char *ciphertext)
+{
+	static unsigned char b[BINARY_SIZE];
+	memset(b, 0, BINARY_SIZE);
+	char *p = strrchr(ciphertext, '$') + 1;
+	magic(p, b);
+	return (void *) b;
+}
+
+
+static void *salt(char *ciphertext)
+{
+	int end = 0, i, len = strlen(ciphertext);
+	for (i = len - 1; i >= 0; i--)
+		if (ciphertext[i] == '$') {
+			end = i;
+			break;
+
+		}
+
+	static unsigned char ret[50];
+	for (i = 0; i < end; i++)
+		ret[i] = ciphertext[i];
+	ret[end] = 0;
+	return (void *) ret;
+}
+
+static int binary_hash_0(void *binary)
+{
+	return (((uint64_t *) binary)[0] & 0xf);
+}
+
+static int binary_hash_1(void *binary)
+{
+	return ((ARCH_WORD_32 *) binary)[0] & 0xff;
+}
+
+static int binary_hash_2(void *binary)
+{
+	return ((ARCH_WORD_32 *) binary)[0] & 0xfff;
+}
+
+static int binary_hash_3(void *binary)
+{
+	return ((ARCH_WORD_32 *) binary)[0] & 0xffff;
+}
+
+static int binary_hash_4(void *binary)
+{
+	return ((ARCH_WORD_32 *) binary)[0] & 0xfffff;
+}
+
+static void set_salt(void *salt)
+{
+	unsigned char *s = salt;
+	int i, len = strlen(salt);
+	for (i = 0; i < len; i++)
+		currentsalt[i] = s[i];
+	currentsalt[len] = 0;
+	unsigned char offset = 0;
+	_salt.rounds = ROUNDS_DEFAULT;
+
+	if (strncmp((char *) "$6$", (char *) currentsalt, 3) == 0)
+		offset += 3;
+
+	if (strncmp((char *) currentsalt + offset, (char *) "rounds=", 7) == 0) {
+		const char *num = currentsalt + offset + 7;
+		char *endp;
+		unsigned long int srounds = strtoul(num, &endp, 10);
+
+		if (*endp == '$') {
+			endp += 1;
+			_salt.rounds =
+			    MAX(ROUNDS_MIN, MIN(srounds, ROUNDS_MAX));
+		}
+		offset = endp - currentsalt;
+	}
+	memcpy(_salt.salt, currentsalt + offset, 16);
+	_salt.saltlen = strlen(_salt.salt);
+}
+
+static void set_key(char *key, int index)
+{
+	int len = strlen(key);
+	inbuffer[index].length = len;
+	memcpy(inbuffer[index].v,key,len);
+}
+
+static char *get_key(int index)
+{
+	static char ret[PLAINTEXT_LENGTH+1];
+	memcpy(ret,inbuffer[index].v,PLAINTEXT_LENGTH);
+	ret[inbuffer[index].length]='\0';
+	return ret;
+}
+
+
+static void gpu_crypt_all(int count)
+{
+	sha512_crypt_gpu(inbuffer, outbuffer, &_salt);
+}
+
+static void crypt_all(int count)
+{
+	gpu_crypt_all(count);
+}
+
+static int get_hash_0(int index)
+{
+	return outbuffer[index].v[0] & 0xf;
+}
+
+static int get_hash_1(int index)
+{
+	return outbuffer[index].v[0] & 0xff;
+}
+
+static int get_hash_2(int index)
+{
+	return outbuffer[index].v[0] & 0xfff;
+}
+
+static int get_hash_3(int index)
+{
+	return outbuffer[index].v[0] & 0xffff;
+}
+
+static int get_hash_4(int index)
+{
+	return outbuffer[index].v[0] & 0xfffff;
+}
+
+static int cmp_all(void *binary, int count)
+{
+	uint32_t i;
+	uint64_t b = ((uint64_t *) binary)[0];
+	for (i = 0; i < count; i++)
+		if (b == outbuffer[i].v[0])
+			return 1;
+	return 0;
+}
+
+static int cmp_one(void *binary, int index)
+{
+	int i;
+	uint64_t *t = (uint64_t *) binary;
+	for (i = 0; i < 8; i++) {
+		if (t[i] != outbuffer[index].v[i])
+			return 0;
+	}
+	return 1;
+}
+
+static int cmp_exact(char *source, int count)
+{
+	return 1;
+}
+
+struct fmt_main fmt_CRYPTSHA512CUDA = {
+	{
+		    FORMAT_LABEL,
+		    FORMAT_NAME,
+		    PHPASS_TYPE,
+		    BENCHMARK_COMMENT,
+		    BENCHMARK_LENGTH,
+		    PLAINTEXT_LENGTH,
+		    BINARY_SIZE,
+		    SALT_SIZE,
+		    MIN_KEYS_PER_CRYPT,
+		    MAX_KEYS_PER_CRYPT,
+		    FMT_CASE | FMT_8_BIT,
+	    tests},
+	{
+		    init,
+		    valid,
+		    fmt_default_split,
+		    binary,
+		    salt,
+		    {
+				binary_hash_0,
+				binary_hash_1,
+				binary_hash_2,
+				binary_hash_3,
+			binary_hash_4},
+		    fmt_default_salt_hash,
+		    set_salt,
+		    set_key,
+		    get_key,
+		    fmt_default_clear_keys,
+		    crypt_all,
+		    {
+				get_hash_0,
+				get_hash_1,
+				get_hash_2,
+				get_hash_3,
+			get_hash_4},
+		    cmp_all,
+		    cmp_one,
+	    cmp_exact}
+};
diff -urpN john-1.7.8.orig//src/cuda/cryptmd5.cu john-1.7.8-allcuda_1/src/cuda/cryptmd5.cu
--- john-1.7.8.orig//src/cuda/cryptmd5.cu	1970-01-01 00:00:00.000000000 +0000
+++ john-1.7.8-allcuda_1/src/cuda/cryptmd5.cu	2011-08-22 22:19:34.000000000 +0000
@@ -0,0 +1,282 @@
+/*
+* This software is Copyright (c) 2011 Lukas Odzioba <lukas dot odzioba at gmail dot com> 
+* and it is hereby released to the general public under the following terms:
+* Redistribution and use in source and binary forms, with or without modification, are permitted.
+*/
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <string.h>
+#include "../cryptmd5.h"
+
+static void HandleError(cudaError_t err, const char *file, int line)
+{
+	if (err != cudaSuccess) {
+		printf("%s in %s at line %d\n", cudaGetErrorString(err), file,
+		    line);
+		exit(EXIT_FAILURE);
+	}
+}
+
+#define HANDLE_ERROR(err) (HandleError(err,__FILE__,__LINE__))
+
+extern "C" void md5_crypt_gpu(crypt_md5_password *, crypt_md5_hash *,
+    crypt_md5_salt *);
+extern "C" void cryptmd5_init(int);
+
+__host__ void cryptmd5_init(int gpuid)
+{
+	int count;
+	HANDLE_ERROR(cudaGetDeviceCount(&count));
+	if (gpuid < count)
+		cudaSetDevice(gpuid);
+	else {
+		printf("Invalid CUDA device id = %d\n", gpuid);
+		exit(1);
+	}
+}
+
+__device__ __constant__ char md5_salt_prefix_cu[] = "$1$";
+__device__ __constant__ char apr1_salt_prefix_cu[] = "$apr1$";
+__device__ __constant__ crypt_md5_salt cuda_salt[1];
+
+__device__ void md5_process_block_cu(const void *, size_t, md5_ctx *);
+__device__ void md5_process_bytes_cu(const void *, size_t, md5_ctx *);
+
+__device__ void init_ctx(md5_ctx * ctx, uint8_t * ctx_buflen)
+{
+	int i = (ctx_buffsize / sizeof(uint32_t)) - 2;
+	uint32_t *buf = (uint32_t *) ctx->buffer;
+	while (i--)
+		*buf++ = 0;
+	ctx_buflen[threadIdx.x] = 0;
+}
+
+__device__ void ctx_update(md5_ctx * ctx, const char *string, uint8_t len,
+    uint8_t * ctx_buflen)
+{
+	uint8_t *dest = &ctx->buffer[ctx_buflen[threadIdx.x]];
+	uint8_t *src = (uint8_t *) string;
+	ctx_buflen[threadIdx.x] += len;
+	memcpy(dest, src, len);
+}
+
+__device__ void md5_digest(md5_ctx * ctx, uint32_t * result,
+    uint8_t * ctx_buflen)
+{
+	uint32_t len = ctx_buflen[threadIdx.x];
+	uint32_t *x = (uint32_t *) ctx->buffer;
+	x[len / 4] |= (((uint32_t) 0x80) << ((len & 0x3) << 3));
+	len <<= 3;
+
+	uint32_t a = 0x67452301;
+	uint32_t b = 0xefcdab89;
+	uint32_t c = 0x98badcfe;
+	uint32_t d = 0x10325476;
+
+	{
+		FF(a, b, c, d, x[0], S11, 0xd76aa478);	/* 1 */
+		FF(d, a, b, c, x[1], S12, 0xe8c7b756);	/* 2 */
+		FF(c, d, a, b, x[2], S13, 0x242070db);	/* 3 */
+		FF(b, c, d, a, x[3], S14, 0xc1bdceee);	/* 4 */
+		FF(a, b, c, d, x[4], S11, 0xf57c0faf);	/* 5 */
+		FF(d, a, b, c, x[5], S12, 0x4787c62a);	/* 6 */
+		FF(c, d, a, b, x[6], S13, 0xa8304613);	/* 7 */
+		FF(b, c, d, a, x[7], S14, 0xfd469501);	/* 8 */
+		FF(a, b, c, d, x[8], S11, 0x698098d8);	/* 9 */
+		FF(d, a, b, c, x[9], S12, 0x8b44f7af);	/* 10 */
+		FF(c, d, a, b, x[10], S13, 0xffff5bb1);	/* 11 */
+		FF(b, c, d, a, x[11], S14, 0x895cd7be);	/* 12 */
+		FF(a, b, c, d, x[12], S11, 0x6b901122);	/* 13 */
+		FF(d, a, b, c, x[13], S12, 0xfd987193);	/* 14 */
+		FF(c, d, a, b, len, S13, 0xa679438e);	/* 15 */
+		FF2(b, c, d, a, S14, 0x49b40821);	/* 16 */
+	}
+	{
+		GG(a, b, c, d, x[1], S21, 0xf61e2562);	/* 17 */
+		GG(d, a, b, c, x[6], S22, 0xc040b340);	/* 18 */
+		GG(c, d, a, b, x[11], S23, 0x265e5a51);	/* 19 */
+		GG(b, c, d, a, x[0], S24, 0xe9b6c7aa);	/* 20 */
+		GG(a, b, c, d, x[5], S21, 0xd62f105d);	/* 21 */
+		GG(d, a, b, c, x[10], S22, 0x2441453);	/* 22 */
+		GG2(c, d, a, b, S23, 0xd8a1e681);	/* 23 */
+		GG(b, c, d, a, x[4], S24, 0xe7d3fbc8);	/* 24 */
+		GG(a, b, c, d, x[9], S21, 0x21e1cde6);	/* 25 */
+		GG(d, a, b, c, len, S22, 0xc33707d6);	/* 26 */
+		GG(c, d, a, b, x[3], S23, 0xf4d50d87);	/* 27 */
+		GG(b, c, d, a, x[8], S24, 0x455a14ed);	/* 28 */
+		GG(a, b, c, d, x[13], S21, 0xa9e3e905);	/* 29 */
+		GG(d, a, b, c, x[2], S22, 0xfcefa3f8);	/* 30 */
+		GG(c, d, a, b, x[7], S23, 0x676f02d9);	/* 31 */
+		GG(b, c, d, a, x[12], S24, 0x8d2a4c8a);	/* 32 */
+	}
+	{
+		HH(a, b, c, d, x[5], S31, 0xfffa3942);	/* 33 */
+		HH(d, a, b, c, x[8], S32, 0x8771f681);	/* 34 */
+		HH(c, d, a, b, x[11], S33, 0x6d9d6122);	/* 35 */
+		HH(b, c, d, a, len, S34, 0xfde5380c);	/* 36 */
+		HH(a, b, c, d, x[1], S31, 0xa4beea44);	/* 37 */
+		HH(d, a, b, c, x[4], S32, 0x4bdecfa9);	/* 38 */
+		HH(c, d, a, b, x[7], S33, 0xf6bb4b60);	/* 39 */
+		HH(b, c, d, a, x[10], S34, 0xbebfbc70);	/* 40 */
+		HH(a, b, c, d, x[13], S31, 0x289b7ec6);	/* 41 */
+		HH(d, a, b, c, x[0], S32, 0xeaa127fa);	/* 42 */
+		HH(c, d, a, b, x[3], S33, 0xd4ef3085);	/* 43 */
+		HH(b, c, d, a, x[6], S34, 0x4881d05);	/* 44 */
+		HH(a, b, c, d, x[9], S31, 0xd9d4d039);	/* 45 */
+		HH(d, a, b, c, x[12], S32, 0xe6db99e5);	/* 46 */
+		HH2(c, d, a, b, S33, 0x1fa27cf8);	/* 47 */
+		HH(b, c, d, a, x[2], S34, 0xc4ac5665);	/* 48 */
+	}
+	{
+		II(a, b, c, d, x[0], S41, 0xf4292244);	/* 49 */
+		II(d, a, b, c, x[7], S42, 0x432aff97);	/* 50 */
+		II(c, d, a, b, len, S43, 0xab9423a7);	/* 51 */
+		II(b, c, d, a, x[5], S44, 0xfc93a039);	/* 52 */
+		II(a, b, c, d, x[12], S41, 0x655b59c3);	/* 53 */
+		II(d, a, b, c, x[3], S42, 0x8f0ccc92);	/* 54 */
+		II(c, d, a, b, x[10], S43, 0xffeff47d);	/* 55 */
+		II(b, c, d, a, x[1], S44, 0x85845dd1);	/* 56 */
+		II(a, b, c, d, x[8], S41, 0x6fa87e4f);	/* 57 */
+		II2(d, a, b, c, S42, 0xfe2ce6e0);	/* 58 */
+		II(c, d, a, b, x[6], S43, 0xa3014314);	/* 59 */
+		II(b, c, d, a, x[13], S44, 0x4e0811a1);	/* 60 */
+		II(a, b, c, d, x[4], S41, 0xf7537e82);	/* 61 */
+		II(d, a, b, c, x[11], S42, 0xbd3af235);	/* 62 */
+		II(c, d, a, b, x[2], S43, 0x2ad7d2bb);	/* 63 */
+		II(b, c, d, a, x[9], S44, 0xeb86d391);	/* 64 */
+	}
+	a += 0x67452301;
+	b += 0xefcdab89;
+	c += 0x98badcfe;
+	d += 0x10325476;
+
+	result[0] = a;
+	result[1] = b;
+	result[2] = c;
+	result[3] = d;
+}
+
+
+__device__ void md5crypt(const char *gpass, size_t keysize, uint32_t * tresult,
+    uint32_t idx)
+{
+
+	uint32_t i;
+	__shared__ uint32_t alt_result[THREADS][4 + 1];
+	__shared__ char spass[THREADS][16 + 4];
+	__shared__ uint8_t ctx_buflen[THREADS];
+	__shared__ uint8_t altctx_buflen[THREADS];
+
+	char *pass = spass[threadIdx.x];
+	memcpy(pass, gpass, 16);
+	uint8_t pass_len = keysize;
+	uint8_t salt_len = cuda_salt[0].saltlen;
+	char *salt = cuda_salt[0].salt;
+	md5_ctx ctx, alt_ctx;
+	init_ctx(&ctx, ctx_buflen);
+	init_ctx(&alt_ctx, altctx_buflen);
+
+	ctx_update(&ctx, pass, pass_len, ctx_buflen);
+	if (cuda_salt[0].prefix == '1') {
+		ctx_update(&ctx, md5_salt_prefix_cu, 3, ctx_buflen);
+	} else
+		ctx_update(&ctx, apr1_salt_prefix_cu, 6, ctx_buflen);
+
+
+	ctx_update(&ctx, salt, salt_len, ctx_buflen);
+
+
+	ctx_update(&alt_ctx, pass, pass_len, altctx_buflen);
+	ctx_update(&alt_ctx, salt, salt_len, altctx_buflen);
+	ctx_update(&alt_ctx, pass, pass_len, altctx_buflen);
+	md5_digest(&alt_ctx, alt_result[threadIdx.x], altctx_buflen);
+
+	for (i = pass_len; i > 16; i -= 16)
+		ctx_update(&ctx, (const char *) alt_result[threadIdx.x], 16,
+		    ctx_buflen);
+	ctx_update(&ctx, (const char *) alt_result[threadIdx.x], i,
+	    ctx_buflen);
+
+	*alt_result[threadIdx.x] = 0;
+
+	for (i = pass_len; i > 0; i >>= 1)
+		if ((i & 1) != 0)
+			ctx.buffer[ctx_buflen[threadIdx.x]++] =
+			    ((const char *) alt_result[threadIdx.x])[0];
+		else
+			ctx.buffer[ctx_buflen[threadIdx.x]++] = pass[0];
+
+	md5_digest(&ctx, alt_result[threadIdx.x], ctx_buflen);
+
+	for (i = 0; i < 1000; i++) {
+		init_ctx(&ctx, ctx_buflen);
+
+		if ((i & 1) != 0)
+			ctx_update(&ctx, pass, pass_len, ctx_buflen);
+		else
+			ctx_update(&ctx,
+			    (const char *) alt_result[threadIdx.x], 16,
+			    ctx_buflen);
+
+		if (i % 3 != 0)
+			ctx_update(&ctx, salt, salt_len, ctx_buflen);
+
+		if (i % 7 != 0)
+			ctx_update(&ctx, pass, pass_len, ctx_buflen);
+
+		if ((i & 1) != 0)
+			ctx_update(&ctx,
+			    (const char *) alt_result[threadIdx.x], 16,
+			    ctx_buflen);
+		else
+			ctx_update(&ctx, pass, pass_len, ctx_buflen);
+		md5_digest(&ctx, alt_result[threadIdx.x], ctx_buflen);
+
+	}
+	tresult[0] = alt_result[threadIdx.x][0];
+	tresult[1] = alt_result[threadIdx.x][1];
+	tresult[2] = alt_result[threadIdx.x][2];
+	tresult[3] = alt_result[threadIdx.x][3];
+}
+
+
+__global__ void kernel_crypt_r(crypt_md5_password * inbuffer,
+    crypt_md5_hash * outbuffer)
+{
+	uint32_t idx = blockIdx.x * blockDim.x + threadIdx.x;
+	md5crypt((char *) inbuffer[idx].v, inbuffer[idx].length,
+	    outbuffer[idx].v, idx);
+}
+
+__host__ void md5_crypt_gpu(crypt_md5_password * inbuffer,
+    crypt_md5_hash * outbuffer, crypt_md5_salt * host_salt)
+{
+
+
+	HANDLE_ERROR(cudaMemcpyToSymbol(cuda_salt, host_salt,
+		sizeof(crypt_md5_salt)));
+	crypt_md5_password *cuda_inbuffer;
+	crypt_md5_hash *cuda_outbuffer;
+
+	size_t insize = sizeof(crypt_md5_password) * KEYS_PER_CRYPT;
+	size_t outsize = sizeof(crypt_md5_hash) * KEYS_PER_CRYPT;
+
+	HANDLE_ERROR(cudaMalloc(&cuda_inbuffer, insize));
+	HANDLE_ERROR(cudaMalloc(&cuda_outbuffer, outsize));
+	HANDLE_ERROR(cudaMemcpy(cuda_inbuffer, inbuffer, insize,
+		cudaMemcpyHostToDevice));
+	dim3 dimGrid(BLOCKS);
+	dim3 dimBlock(THREADS);
+
+	kernel_crypt_r <<< dimGrid, dimBlock >>> (cuda_inbuffer,
+	    cuda_outbuffer);
+
+	cudaThreadSynchronize();
+
+	HANDLE_ERROR(cudaMemcpy(outbuffer, cuda_outbuffer, outsize,
+		cudaMemcpyDeviceToHost));
+
+	HANDLE_ERROR(cudaFree(cuda_inbuffer));
+	HANDLE_ERROR(cudaFree(cuda_outbuffer));
+}
diff -urpN john-1.7.8.orig//src/cuda/cryptsha256.cu john-1.7.8-allcuda_1/src/cuda/cryptsha256.cu
--- john-1.7.8.orig//src/cuda/cryptsha256.cu	1970-01-01 00:00:00.000000000 +0000
+++ john-1.7.8-allcuda_1/src/cuda/cryptsha256.cu	2011-08-22 22:19:19.000000000 +0000
@@ -0,0 +1,345 @@
+/*
+* This software is Copyright (c) 2011 Lukas Odzioba <lukas dot odzioba at gmail dot com>
+* and it is hereby released to the general public under the following terms:
+* Redistribution and use in source and binary forms, with or without modification, are permitted.
+*/
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <string.h>
+#include "../cryptsha256.h"
+#define HANDLE_ERROR(err) (HandleError(err,__FILE__,__LINE__))
+
+static void HandleError(cudaError_t err, const char *file, int line)
+{
+	if (err != cudaSuccess) {
+		printf("%s in %s at line %d\n", cudaGetErrorString(err), file,
+		    line);
+		exit(EXIT_FAILURE);
+	}
+}
+
+extern "C" void sha256_crypt_gpu(crypt_sha256_password * inbuffer,
+    crypt_sha256_hash * outbuffer, crypt_sha256_salt * host_salt);
+
+extern "C" __host__ void cryptsha256_init(int gpuid)
+{
+	int count;
+	HANDLE_ERROR(cudaGetDeviceCount(&count));
+	if (gpuid < count)
+		cudaSetDevice(gpuid);
+	else {
+		printf("Invalid CUDA device id = %d\n", gpuid);
+		exit(1);
+	}
+}
+__constant__ crypt_sha256_salt cuda_salt[1];
+__constant__ uint32_t k[] = {
+	0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1,
+	0x923f82a4, 0xab1c5ed5,
+	0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe,
+	0x9bdc06a7, 0xc19bf174,
+	0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa,
+	0x5cb0a9dc, 0x76f988da,
+	0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147,
+	0x06ca6351, 0x14292967,
+	0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb,
+	0x81c2c92e, 0x92722c85,
+	0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624,
+	0xf40e3585, 0x106aa070,
+	0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a,
+	0x5b9cca4f, 0x682e6ff3,
+	0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb,
+	0xbef9a3f7, 0xc67178f2
+};
+
+
+
+__device__ void init_ctx(sha256_ctx * ctx)
+{
+	ctx->H[0] = 0x6a09e667;
+	ctx->H[1] = 0xbb67ae85;
+	ctx->H[2] = 0x3c6ef372;
+	ctx->H[3] = 0xa54ff53a;
+	ctx->H[4] = 0x510e527f;
+	ctx->H[5] = 0x9b05688c;
+	ctx->H[6] = 0x1f83d9ab;
+	ctx->H[7] = 0x5be0cd19;
+	ctx->total = 0;
+	ctx->buflen = 0;
+}
+
+__device__ void insert_to_buffer(sha256_ctx * ctx, const uint8_t * string,
+    uint8_t len)
+{
+	int i = len;
+	uint8_t *d = &ctx->buffer[ctx->buflen];
+	while (i--)
+		*d++ = *string++;
+	ctx->buflen += len;
+}
+
+__device__ void sha256_block(sha256_ctx * ctx)
+{
+	int i;
+	uint32_t a = ctx->H[0];
+	uint32_t b = ctx->H[1];
+	uint32_t c = ctx->H[2];
+	uint32_t d = ctx->H[3];
+	uint32_t e = ctx->H[4];
+	uint32_t f = ctx->H[5];
+	uint32_t g = ctx->H[6];
+	uint32_t h = ctx->H[7];
+	uint32_t w[16];
+	uint32_t *data = (uint32_t *) ctx->buffer;
+
+    #pragma unroll 16
+	  for (i = 0; i < 16; i++)
+		w[i] = SWAP(data[i]);
+	
+	uint32_t t1, t2;
+	for (i = 0; i < 16; i++) {
+		t1 = k[i] + w[i] + h + Sigma1(e) + Ch(e, f, g);
+		t2 = Maj(a, b, c) + Sigma0(a);
+
+		h = g;
+		g = f;
+		f = e;
+		e = d + t1;
+		d = c;
+		c = b;
+		b = a;
+		a = t1 + t2;
+	}
+
+	for (i = 16; i < 64; i++) {
+
+		w[i & 15] =
+		    sigma1(w[(i - 2) & 15]) + sigma0(w[(i - 15) & 15]) + w[(i -
+			16) & 15] + w[(i - 7) & 15];
+		t1 = k[i] + w[i & 15] + h + Sigma1(e) + Ch(e, f, g);
+		t2 = Maj(a, b, c) + Sigma0(a);
+
+		h = g;
+		g = f;
+		f = e;
+		e = d + t1;
+		d = c;
+		c = b;
+		b = a;
+		a = t1 + t2;
+	}
+
+	ctx->H[0] += a;
+	ctx->H[1] += b;
+	ctx->H[2] += c;
+	ctx->H[3] += d;
+	ctx->H[4] += e;
+	ctx->H[5] += f;
+	ctx->H[6] += g;
+	ctx->H[7] += h;
+
+}
+
+__device__ void ctx_update(sha256_ctx * ctx, const char *string, uint8_t len)
+{
+	ctx->total += len;
+	uint8_t startpos = ctx->buflen;
+	uint8_t partsize;
+	if (startpos + len <= 64) {
+		partsize = len;
+	} else
+		partsize = 64 - startpos;
+
+	insert_to_buffer(ctx, (const uint8_t *) string, partsize);
+	if (ctx->buflen == 64) {
+		uint8_t offset = 64 - startpos;
+		sha256_block(ctx);
+		ctx->buflen = 0;
+		insert_to_buffer(ctx, (const uint8_t *) (string + offset),
+		    len - offset);
+	}
+}
+
+/**
+  Add 0x80 byte to ctx->buffer and clean the rest of it
+**/
+__device__ void ctx_append_1(sha256_ctx * ctx)
+{
+	int i = 63 - ctx->buflen;
+	uint8_t *d = &ctx->buffer[ctx->buflen];
+	*d++ = 0x80;	
+	while (i--)
+	{
+	  *d++ = 0;
+	}
+
+}
+
+/**
+  Add ctx->bufflen at the end of ctx->buffer
+**/
+__device__ void ctx_add_length(sha256_ctx * ctx)
+{
+	uint32_t *blocks = (uint32_t *) ctx->buffer;
+	blocks[15] = SWAP(ctx->total * 8);
+}
+
+__device__ void finish_ctx(sha256_ctx * ctx)
+{
+	ctx_append_1(ctx);
+	ctx_add_length(ctx);
+	ctx->buflen = 0;
+}
+
+__device__ void clear_ctx_buffer(sha256_ctx * ctx)
+{
+	uint32_t *w = (uint32_t *) ctx->buffer;
+#pragma unroll 16
+	for (int i = 0; i < 16; i++)
+		w[i] = 0;
+	ctx->buflen = 0;
+
+}
+
+__device__ void sha256_digest(sha256_ctx * ctx, uint32_t * result)
+{
+	uint8_t i;
+	if (ctx->buflen <= 55) {	//data+0x80+datasize fits in one 512bit block
+		finish_ctx(ctx);
+		sha256_block(ctx);
+	} else {
+		uint8_t moved = 1;
+		if (ctx->buflen < 64) {	//data and 0x80 fits in one block
+			ctx_append_1(ctx);
+			moved = 0;
+		}
+		sha256_block(ctx);
+		clear_ctx_buffer(ctx);
+		if (moved)
+			ctx->buffer[0] = 0x80;	//append 1,the rest is already clean
+		ctx_add_length(ctx);
+		sha256_block(ctx);
+	}
+#pragma unroll 8
+	for (i = 0; i < 8; i++)
+		result[i] = SWAP(ctx->H[i]);
+}
+
+__device__ void sha256crypt(const char *pass, uint8_t passlength,
+    uint32_t * tresult, uint32_t idx, uint32_t rounds)
+{
+
+	uint32_t i, alt_result[8], temp_result[8];
+
+	sha256_ctx ctx, alt_ctx;
+	init_ctx(&ctx);
+	init_ctx(&alt_ctx);
+
+	ctx_update(&ctx, pass, passlength);
+	ctx_update(&ctx, cuda_salt[0].salt, cuda_salt[0].saltlen);
+
+	ctx_update(&alt_ctx, pass, passlength);
+	ctx_update(&alt_ctx, cuda_salt[0].salt, cuda_salt[0].saltlen);
+	ctx_update(&alt_ctx, pass, passlength);
+
+	sha256_digest(&alt_ctx, alt_result);
+
+	ctx_update(&ctx, (const char *) alt_result, passlength);
+
+
+	for (i = passlength; i > 0; i >>= 1) {
+		if ((i & 1) != 0)
+			ctx_update(&ctx, (const char *) alt_result, 32);
+		else
+			ctx_update(&ctx, pass, passlength);
+	}
+	sha256_digest(&ctx, alt_result);
+
+	init_ctx(&alt_ctx);
+	for (i = 0; i < passlength; i++)
+		ctx_update(&alt_ctx, pass, passlength);
+
+	sha256_digest(&alt_ctx, temp_result);
+
+	__shared__ char sp_sequence[THREADS][16+4];
+	char *p_sequence=sp_sequence[threadIdx.x];
+	memcpy(p_sequence, temp_result, passlength);
+
+	init_ctx(&alt_ctx);
+	for (i = 0; i < 16 + ((unsigned char *) alt_result)[0]; i++)
+		ctx_update(&alt_ctx, cuda_salt[0].salt, cuda_salt[0].saltlen);
+
+	sha256_digest(&alt_ctx, temp_result);
+
+	uint8_t saltlength = cuda_salt[0].saltlen;
+
+	__shared__ char ss_sequence[THREADS][16+4];
+	char *s_sequence=ss_sequence[threadIdx.x];
+	memcpy(s_sequence, temp_result, saltlength);
+
+	for (i = 0; i < rounds; i++) {
+		init_ctx(&ctx);
+
+		if ((i & 1) != 0)
+			ctx_update(&ctx, p_sequence, passlength);
+		else
+			ctx_update(&ctx, (const char *) alt_result, 32);
+
+		if ((i % 3) != 0)
+			ctx_update(&ctx, s_sequence, saltlength);
+
+		if ((i % 7) != 0)
+			ctx_update(&ctx, p_sequence, passlength);
+
+		if ((i & 1) != 0)
+			ctx_update(&ctx, (const char *) alt_result, 32);
+		else
+			ctx_update(&ctx, p_sequence, passlength);
+
+
+		sha256_digest(&ctx, alt_result);
+	}
+	__syncthreads();
+#pragma unroll 8
+	for (i = 0; i < 8; i++)
+		tresult[hash_addr(i, idx)] = alt_result[i];
+}
+
+__global__ void kernel_crypt_r(crypt_sha256_password * inbuffer,
+    uint32_t * outbuffer)
+{
+	uint32_t idx = blockIdx.x * blockDim.x + threadIdx.x;
+	sha256crypt((const char *) inbuffer[idx].v, inbuffer[idx].length,
+	    outbuffer, idx, cuda_salt[0].rounds);
+
+}
+
+
+void sha256_crypt_gpu(crypt_sha256_password * inbuffer,
+    crypt_sha256_hash * outbuffer, crypt_sha256_salt * host_salt)
+{
+
+	HANDLE_ERROR(cudaMemcpyToSymbol(cuda_salt, host_salt,
+		sizeof(crypt_sha256_salt)));
+
+	crypt_sha256_password *cuda_inbuffer;
+	uint32_t *cuda_outbuffer;
+	size_t insize = sizeof(crypt_sha256_password) * KEYS_PER_CRYPT;
+	size_t outsize = sizeof(crypt_sha256_hash) * KEYS_PER_CRYPT;
+	HANDLE_ERROR(cudaMalloc(&cuda_inbuffer, insize));
+	HANDLE_ERROR(cudaMalloc(&cuda_outbuffer, outsize));
+
+	HANDLE_ERROR(cudaMemcpy(cuda_inbuffer, inbuffer, insize,
+		cudaMemcpyHostToDevice));
+	dim3 dimGrid(BLOCKS);
+	dim3 dimBlock(THREADS);
+	kernel_crypt_r <<< dimGrid, dimBlock >>> (cuda_inbuffer,
+	    cuda_outbuffer);
+	cudaThreadSynchronize();
+	HANDLE_ERROR(cudaMemcpy(outbuffer, cuda_outbuffer, outsize,
+		cudaMemcpyDeviceToHost));
+
+	HANDLE_ERROR(cudaFree(cuda_inbuffer));
+	HANDLE_ERROR(cudaFree(cuda_outbuffer));
+}
diff -urpN john-1.7.8.orig//src/cuda/cryptsha512.cu john-1.7.8-allcuda_1/src/cuda/cryptsha512.cu
--- john-1.7.8.orig//src/cuda/cryptsha512.cu	1970-01-01 00:00:00.000000000 +0000
+++ john-1.7.8-allcuda_1/src/cuda/cryptsha512.cu	2011-08-22 22:19:25.000000000 +0000
@@ -0,0 +1,376 @@
+/*
+* This software is Copyright (c) 2011 Lukas Odzioba <lukas dot odzioba at gmail dot com> 
+* and it is hereby released to the general public under the following terms:
+* Redistribution and use in source and binary forms, with or without modification, are permitted.
+*/
+#include <stdio.h>
+#include "../cryptsha512.h"
+
+static void HandleError(cudaError_t err, const char *file, int line)
+{
+	if (err != cudaSuccess) {
+		printf("%s in %s at line %d\n", cudaGetErrorString(err), file,
+		    line);
+		exit(EXIT_FAILURE);
+	}
+}
+
+#define HANDLE_ERROR(err) (HandleError(err,__FILE__,__LINE__))
+
+extern "C" __host__ void cryptsha512_init(int gpuid)
+{
+	int count;
+	HANDLE_ERROR(cudaGetDeviceCount(&count));
+	if (gpuid < count)
+		cudaSetDevice(gpuid);
+	else {
+		printf("Invalid CUDA device id = %d\n", gpuid);
+		exit(1);
+	}
+}
+__constant__ uint64_t k[] = {
+	0x428a2f98d728ae22LL, 0x7137449123ef65cdLL, 0xb5c0fbcfec4d3b2fLL,
+	    0xe9b5dba58189dbbcLL,
+	0x3956c25bf348b538LL, 0x59f111f1b605d019LL, 0x923f82a4af194f9bLL,
+	    0xab1c5ed5da6d8118LL,
+	0xd807aa98a3030242LL, 0x12835b0145706fbeLL, 0x243185be4ee4b28cLL,
+	    0x550c7dc3d5ffb4e2LL,
+	0x72be5d74f27b896fLL, 0x80deb1fe3b1696b1LL, 0x9bdc06a725c71235LL,
+	    0xc19bf174cf692694LL,
+	0xe49b69c19ef14ad2LL, 0xefbe4786384f25e3LL, 0x0fc19dc68b8cd5b5LL,
+	    0x240ca1cc77ac9c65LL,
+	0x2de92c6f592b0275LL, 0x4a7484aa6ea6e483LL, 0x5cb0a9dcbd41fbd4LL,
+	    0x76f988da831153b5LL,
+	0x983e5152ee66dfabLL, 0xa831c66d2db43210LL, 0xb00327c898fb213fLL,
+	    0xbf597fc7beef0ee4LL,
+	0xc6e00bf33da88fc2LL, 0xd5a79147930aa725LL, 0x06ca6351e003826fLL,
+	    0x142929670a0e6e70LL,
+	0x27b70a8546d22ffcLL, 0x2e1b21385c26c926LL, 0x4d2c6dfc5ac42aedLL,
+	    0x53380d139d95b3dfLL,
+	0x650a73548baf63deLL, 0x766a0abb3c77b2a8LL, 0x81c2c92e47edaee6LL,
+	    0x92722c851482353bLL,
+	0xa2bfe8a14cf10364LL, 0xa81a664bbc423001LL, 0xc24b8b70d0f89791LL,
+	    0xc76c51a30654be30LL,
+	0xd192e819d6ef5218LL, 0xd69906245565a910LL, 0xf40e35855771202aLL,
+	    0x106aa07032bbd1b8LL,
+	0x19a4c116b8d2d0c8LL, 0x1e376c085141ab53LL, 0x2748774cdf8eeb99LL,
+	    0x34b0bcb5e19b48a8LL,
+	0x391c0cb3c5c95a63LL, 0x4ed8aa4ae3418acbLL, 0x5b9cca4f7763e373LL,
+	    0x682e6ff3d6b2b8a3LL,
+	0x748f82ee5defb2fcLL, 0x78a5636f43172f60LL, 0x84c87814a1f0ab72LL,
+	    0x8cc702081a6439ecLL,
+	0x90befffa23631e28LL, 0xa4506cebde82bde9LL, 0xbef9a3f7b2c67915LL,
+	    0xc67178f2e372532bLL,
+	0xca273eceea26619cLL, 0xd186b8c721c0c207LL, 0xeada7dd6cde0eb1eLL,
+	    0xf57d4f7fee6ed178LL,
+	0x06f067aa72176fbaLL, 0x0a637dc5a2c898a6LL, 0x113f9804bef90daeLL,
+	    0x1b710b35131c471bLL,
+	0x28db77f523047d84LL, 0x32caab7b40c72493LL, 0x3c9ebe0a15c9bebcLL,
+	    0x431d67c49c100d4cLL,
+	0x4cc5d4becb3e42b6LL, 0x597f299cfc657e2aLL, 0x5fcb6fab3ad6faecLL,
+	    0x6c44198c4a475817LL,
+};
+
+__constant__ crypt_sha512_salt cuda_salt[1];
+
+extern "C" void sha512_crypt_gpu(crypt_sha512_password * inbuffer,
+    crypt_sha512_hash * outbuffer, crypt_sha512_salt * salt);
+
+
+__device__ void init_ctx(sha512_ctx * ctx)
+{
+	ctx->H[0] = 0x6a09e667f3bcc908LL;
+	ctx->H[1] = 0xbb67ae8584caa73bLL;
+	ctx->H[2] = 0x3c6ef372fe94f82bLL;
+	ctx->H[3] = 0xa54ff53a5f1d36f1LL;
+	ctx->H[4] = 0x510e527fade682d1LL;
+	ctx->H[5] = 0x9b05688c2b3e6c1fLL;
+	ctx->H[6] = 0x1f83d9abfb41bd6bLL;
+	ctx->H[7] = 0x5be0cd19137e2179LL;
+	ctx->total = 0;
+	ctx->buflen = 0;
+}
+
+
+__device__ void insert_to_buffer(sha512_ctx * ctx, const uint8_t * string,
+    uint8_t len)
+{
+	uint8_t *d = &ctx->buffer[ctx->buflen];
+	memcpy(d,string,len);
+	ctx->buflen += len;
+}
+
+
+__device__ void sha512_block(sha512_ctx * ctx)
+{
+	int i;
+	uint64_t a = ctx->H[0];
+	uint64_t b = ctx->H[1];
+	uint64_t c = ctx->H[2];
+	uint64_t d = ctx->H[3];
+	uint64_t e = ctx->H[4];
+	uint64_t f = ctx->H[5];
+	uint64_t g = ctx->H[6];
+	uint64_t h = ctx->H[7];
+
+
+	uint64_t w[16];
+
+	uint64_t *data = (uint64_t *) ctx->buffer;
+#pragma unroll 16
+	for (i = 0; i < 16; i++)
+		w[i] = SWAP64(data[i]);
+
+	uint64_t t1, t2;
+#pragma unroll 16
+	for (i = 0; i < 16; i++) {
+
+
+		t1 = k[i] + w[i] + h + Sigma1(e) + Ch(e, f, g);
+		t2 = Maj(a, b, c) + Sigma0(a);
+
+		h = g;
+		g = f;
+		f = e;
+		e = d + t1;
+		d = c;
+		c = b;
+		b = a;
+		a = t1 + t2;
+
+	}
+
+
+	for (i = 16; i < 80; i++) {
+
+
+		w[i & 15] =sigma1(w[(i - 2) & 15]) + sigma0(w[(i - 15) & 15]) + w[(i -16) & 15] + w[(i - 7) & 15];
+		t1 = k[i] + w[i & 15] + h + Sigma1(e) + Ch(e, f, g);
+		t2 = Maj(a, b, c) + Sigma0(a);
+
+		h = g;
+		g = f;
+		f = e;
+		e = d + t1;
+		d = c;
+		c = b;
+		b = a;
+		a = t1 + t2;
+
+	}
+
+	ctx->H[0] += a;
+	ctx->H[1] += b;
+	ctx->H[2] += c;
+	ctx->H[3] += d;
+	ctx->H[4] += e;
+	ctx->H[5] += f;
+	ctx->H[6] += g;
+	ctx->H[7] += h;
+}
+
+
+__device__ void ctx_append_1(sha512_ctx * ctx)
+{
+	uint32_t length=ctx->buflen;
+	int i = 127 - length;
+	uint32_t *x = (uint32_t *) ctx->buffer;
+	uint8_t *d = &ctx->buffer[length];
+	*d++ = 0x80;
+	while(++length%4!=0)
+	{  *d++=0;
+	i--;
+	}
+	x=(uint32_t*)d;
+	while(i>0)
+	{  i-=4;
+	    *x++=0;
+	}
+}
+
+__device__ void ctx_add_length(sha512_ctx * ctx)
+{
+	uint64_t *blocks = (uint64_t *) ctx->buffer;
+	blocks[15] = SWAP64((uint64_t) ctx->total * 8);
+}
+
+__device__ void finish_ctx(sha512_ctx * ctx)
+{
+	ctx_append_1(ctx);
+	ctx_add_length(ctx);
+	ctx->buflen = 0;
+}
+
+
+__device__ void ctx_update(sha512_ctx * ctx, const char *string, uint8_t len)
+{
+	ctx->total += len;
+	uint8_t startpos = ctx->buflen;
+	uint8_t partsize;
+	if (startpos + len <= 128) {
+		partsize = len;
+	} else
+		partsize = 128 - startpos;
+
+	insert_to_buffer(ctx, (const uint8_t *) string, partsize);
+	if (ctx->buflen == 128) {
+		uint8_t offset = 128 - startpos;
+		sha512_block(ctx);
+		ctx->buflen = 0;
+		insert_to_buffer(ctx, (const uint8_t *) (string + offset),
+		    len - offset);
+	}
+}
+
+__device__ void clear_ctx_buffer(sha512_ctx * ctx)
+{
+	uint32_t *w = (uint32_t *) ctx->buffer;
+#pragma unroll 30
+	for (int i = 0; i < 30; i++)
+		w[i] = 0;
+	  
+	  ctx->buflen = 0;
+}
+
+__device__ void sha512_digest(sha512_ctx * ctx, uint64_t * result)
+{
+	uint8_t i;
+	if (ctx->buflen <= 111) {	//data+0x80+datasize fits in one 1024bit block
+		finish_ctx(ctx);
+		sha512_block(ctx);
+	} else {
+		uint8_t moved = 1;
+		if (ctx->buflen < 128) {	//data and 0x80 fits in one block
+			ctx_append_1(ctx);
+			moved = 0;
+		}
+		sha512_block(ctx);
+		clear_ctx_buffer(ctx);
+		if (moved)
+			ctx->buffer[0] = 0x80;	//append 1,the rest is already clean
+		ctx_add_length(ctx);
+		sha512_block(ctx);
+	}
+#pragma unroll 8
+	for (i = 0; i < 8; i++)
+		result[i] = SWAP64(ctx->H[i]);
+}
+
+
+__device__ void sha512crypt(const char *pass, uint8_t passlength,
+    uint64_t * tresult, uint32_t idx, uint32_t rounds)
+{
+
+	uint64_t  alt_result[8], temp_result[8];
+	int i;
+	sha512_ctx ctx;
+	init_ctx(&ctx);
+
+	ctx_update(&ctx, pass, passlength);
+	ctx_update(&ctx, cuda_salt[0].salt, cuda_salt[0].saltlen);
+	ctx_update(&ctx, pass, passlength);
+
+	sha512_digest(&ctx, alt_result);
+	init_ctx(&ctx);
+	
+	ctx_update(&ctx, pass, passlength);
+	ctx_update(&ctx, cuda_salt[0].salt, cuda_salt[0].saltlen);
+	ctx_update(&ctx, (const char *) alt_result, passlength);
+
+
+	for (i = passlength; i > 0; i >>= 1) {
+		if ((i & 1) != 0)
+			ctx_update(&ctx, (const char *) alt_result, 64);
+		else
+			ctx_update(&ctx, pass, passlength);
+	}
+	sha512_digest(&ctx, alt_result);
+
+
+	init_ctx(&ctx);
+	for (i = 0; i < passlength; i++)
+		ctx_update(&ctx, pass, passlength);
+
+	sha512_digest(&ctx, temp_result);
+
+	__shared__ char sp_sequence[THREADS][16+4];
+	char *p_sequence=sp_sequence[threadIdx.x];
+	memcpy(p_sequence, temp_result, passlength);
+
+	init_ctx(&ctx);
+	for (i = 0; i < 16 + ((unsigned char *) alt_result)[0]; i++)
+		ctx_update(&ctx, cuda_salt[0].salt, cuda_salt[0].saltlen);
+
+	sha512_digest(&ctx, temp_result);
+
+	uint8_t saltlength = cuda_salt[0].saltlen;
+
+	__shared__ char ss_sequence[THREADS][16+4];
+	char *s_sequence=ss_sequence[threadIdx.x];
+	memcpy(s_sequence, temp_result, saltlength);
+
+	for (i = 0; i < rounds; i++) {
+		init_ctx(&ctx);
+
+		if ((i & 1) != 0)
+			ctx_update(&ctx, p_sequence, passlength);
+		else
+			ctx_update(&ctx, (const char *) alt_result, 64);
+
+		if ((i % 3) != 0)
+			ctx_update(&ctx, s_sequence, saltlength);
+
+		if ((i % 7) != 0)
+			ctx_update(&ctx, p_sequence, passlength);
+
+		if ((i & 1) != 0)
+			ctx_update(&ctx, (const char *) alt_result, 64);
+		else
+			ctx_update(&ctx, p_sequence, passlength);
+
+
+		sha512_digest(&ctx, alt_result);
+	}
+#pragma unroll 8
+	for (i = 0; i < 8; i++)
+		tresult[i] = alt_result[i];
+}
+
+
+
+__global__ void kernel_crypt_r(crypt_sha512_password * inbuffer,
+    crypt_sha512_hash * outbuffer)
+{
+	uint32_t idx = blockIdx.x * blockDim.x + threadIdx.x;
+	sha512crypt((const char *) inbuffer[idx].v, inbuffer[idx].length,
+	    outbuffer[idx].v, idx, cuda_salt[0].rounds);
+}
+
+void sha512_crypt_gpu(crypt_sha512_password * inbuffer,
+    crypt_sha512_hash * outbuffer, crypt_sha512_salt * host_salt)
+{
+
+	crypt_sha512_password *cuda_inbuffer;
+	crypt_sha512_hash *cuda_outbuffer;
+	size_t insize = sizeof(crypt_sha512_password) * KEYS_PER_CRYPT;
+	size_t outsize = sizeof(crypt_sha512_hash) * KEYS_PER_CRYPT;
+
+	HANDLE_ERROR(cudaMalloc(&cuda_inbuffer, insize));
+	HANDLE_ERROR(cudaMalloc(&cuda_outbuffer, outsize));
+
+	HANDLE_ERROR(cudaMemcpy(cuda_inbuffer, inbuffer, insize,
+		cudaMemcpyHostToDevice));
+	HANDLE_ERROR(cudaMemcpyToSymbol(cuda_salt, host_salt,
+		sizeof(crypt_sha512_salt)));
+
+	dim3 dimGrid(BLOCKS);
+	dim3 dimBlock(THREADS);
+	kernel_crypt_r <<< dimGrid, dimBlock >>> (cuda_inbuffer,
+	    cuda_outbuffer);
+	cudaThreadSynchronize();
+	HANDLE_ERROR(cudaMemcpy(outbuffer, cuda_outbuffer, outsize,
+		cudaMemcpyDeviceToHost));
+
+	HANDLE_ERROR(cudaFree(cuda_inbuffer));
+	HANDLE_ERROR(cudaFree(cuda_outbuffer));
+}
diff -urpN john-1.7.8.orig//src/cuda/phpass.cu john-1.7.8-allcuda_1/src/cuda/phpass.cu
--- john-1.7.8.orig//src/cuda/phpass.cu	1970-01-01 00:00:00.000000000 +0000
+++ john-1.7.8-allcuda_1/src/cuda/phpass.cu	2011-08-22 22:18:08.000000000 +0000
@@ -0,0 +1,322 @@
+/*
+* This software is Copyright (c) 2011 Lukas Odzioba <lukas dot odzioba at gmail dot com> 
+* and it is hereby released to the general public under the following terms:
+* Redistribution and use in source and binary forms, with or without modification, are permitted.
+*/
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <string.h>
+#include "../phpass.h"
+
+const uint SETTING_SIZE = 12;
+const uint DATA_IN_SIZE = KEYS_PER_CRYPT * sizeof(phpass_password);
+const uint DATA_OUT_SIZE = KEYS_PER_CRYPT * sizeof(phpass_hash);
+
+static void HandleError(cudaError_t err, const char *file, int line)
+{
+	if (err != cudaSuccess) {
+		printf("%s in %s at line %d\n", cudaGetErrorString(err), file,
+		    line);
+		exit(EXIT_FAILURE);
+	}
+}
+
+#define HANDLE_ERROR(err) (HandleError(err,__FILE__,__LINE__))
+
+__device__ unsigned char *cuda_data = NULL;
+__device__ uint32_t *cuda_data_out = NULL;
+__device__ char *cuda_setting = NULL;
+
+unsigned char *host_data = NULL;
+uint32_t *host_data_out = NULL;
+
+extern "C" void mem_init(unsigned char *, uint32_t *, char *, char *, int);
+extern "C" void mem_clear(void);
+extern "C" void gpu_phpass(void);
+extern "C" void phpass_init(int gpuid);
+
+__global__ void kernel_phpass(unsigned char *data, uint32_t * data_out,
+    char *, int);
+
+
+__host__ void phpass_init(int gpuid)
+{
+	int count;
+	HANDLE_ERROR(cudaGetDeviceCount(&count));
+	if (gpuid < count)
+		cudaSetDevice(gpuid);
+	else {
+		printf("Invalid CUDA device id = %d\n", gpuid);
+		exit(1);
+	}
+}
+
+int cuda_count_log2;
+__host__ void gpu_phpass(void)
+{
+	dim3 dimGrid(BLOCKS);
+	dim3 dimBlock(THREADS);
+	kernel_phpass <<< dimGrid, dimBlock >>> (cuda_data, cuda_data_out,
+	    cuda_setting, cuda_count_log2);
+	HANDLE_ERROR(cudaThreadSynchronize());
+}
+
+__host__ void mem_init(unsigned char *p, uint32_t * h, char *setting,
+    char *itoa, int count_log2)
+{
+	cuda_count_log2 = count_log2;
+	host_data = p;
+	host_data_out = h;
+	HANDLE_ERROR(cudaMalloc(&cuda_setting, SETTING_SIZE));
+	HANDLE_ERROR(cudaMalloc(&cuda_data, DATA_IN_SIZE));
+	HANDLE_ERROR(cudaMalloc(&cuda_data_out, DATA_OUT_SIZE));
+	HANDLE_ERROR(cudaMemcpy(cuda_data, host_data, DATA_IN_SIZE,
+		cudaMemcpyHostToDevice));
+	HANDLE_ERROR(cudaMemcpy(cuda_setting, setting, SETTING_SIZE,
+		cudaMemcpyHostToDevice));
+}
+
+__host__ void mem_clear()
+{
+	HANDLE_ERROR(cudaMemcpy(host_data_out, cuda_data_out, DATA_OUT_SIZE,
+		cudaMemcpyDeviceToHost));
+	HANDLE_ERROR(cudaFree(cuda_data));
+	HANDLE_ERROR(cudaFree(cuda_data_out));
+	HANDLE_ERROR(cudaFree(cuda_setting));
+}
+
+__device__ void cuda_md5(char len, uint32_t * internal_ret, uint32_t * x)
+{
+	x[len / 4] |= (((uint32_t) 0x80) << ((len & 0x3) << 3));
+	uint32_t x14 = len << 3;
+
+	uint32_t a = 0x67452301;
+	uint32_t b = 0xefcdab89;
+	uint32_t c = 0x98badcfe;
+	uint32_t d = 0x10325476;
+
+	a = AC1 + x[0];
+	a = ROTATE_LEFT(a, S11);
+	a += b;			/* 1 */
+	d = (c ^ (a & MASK1)) + x[1] + AC2pCd;
+	d = ROTATE_LEFT(d, S12);
+	d += a;			/* 2 */
+	c = F(d, a, b) + x[2] + AC3pCc;
+	c = ROTATE_LEFT(c, S13);
+	c += d;			/* 3 */
+	b = F(c, d, a) + x[3] + AC4pCb;
+	b = ROTATE_LEFT(b, S14);
+	b += c;
+	FF(a, b, c, d, x[4], S11, 0xf57c0faf);
+	FF(d, a, b, c, x[5], S12, 0x4787c62a);
+	FF(c, d, a, b, x[6], S13, 0xa8304613);
+	FF(b, c, d, a, x[7], S14, 0xfd469501);
+	FF(a, b, c, d, 0, S11, 0x698098d8);
+	FF(d, a, b, c, 0, S12, 0x8b44f7af);
+	FF(c, d, a, b, 0, S13, 0xffff5bb1);
+	FF(b, c, d, a, 0, S14, 0x895cd7be);
+	FF(a, b, c, d, 0, S11, 0x6b901122);
+	FF(d, a, b, c, 0, S12, 0xfd987193);
+	FF(c, d, a, b, x14, S13, 0xa679438e);
+	FF(b, c, d, a, 0, S14, 0x49b40821);
+
+	GG(a, b, c, d, x[1], S21, 0xf61e2562);
+	GG(d, a, b, c, x[6], S22, 0xc040b340);
+	GG(c, d, a, b, 0, S23, 0x265e5a51);
+	GG(b, c, d, a, x[0], S24, 0xe9b6c7aa);
+	GG(a, b, c, d, x[5], S21, 0xd62f105d);
+	GG(d, a, b, c, 0, S22, 0x2441453);
+	GG(c, d, a, b, 0, S23, 0xd8a1e681);
+	GG(b, c, d, a, x[4], S24, 0xe7d3fbc8);
+	GG(a, b, c, d, 0, S21, 0x21e1cde6);
+	GG(d, a, b, c, x14, S22, 0xc33707d6);
+	GG(c, d, a, b, x[3], S23, 0xf4d50d87);
+	GG(b, c, d, a, 0, S24, 0x455a14ed);
+	GG(a, b, c, d, 0, S21, 0xa9e3e905);
+	GG(d, a, b, c, x[2], S22, 0xfcefa3f8);
+	GG(c, d, a, b, x[7], S23, 0x676f02d9);
+	GG(b, c, d, a, 0, S24, 0x8d2a4c8a);
+
+	HH(a, b, c, d, x[5], S31, 0xfffa3942);
+	HH(d, a, b, c, 0, S32, 0x8771f681);
+	HH(c, d, a, b, 0, S33, 0x6d9d6122);
+	HH(b, c, d, a, x14, S34, 0xfde5380c);
+	HH(a, b, c, d, x[1], S31, 0xa4beea44);
+	HH(d, a, b, c, x[4], S32, 0x4bdecfa9);
+	HH(c, d, a, b, x[7], S33, 0xf6bb4b60);
+	HH(b, c, d, a, 0, S34, 0xbebfbc70);
+	HH(a, b, c, d, 0, S31, 0x289b7ec6);
+	HH(d, a, b, c, x[0], S32, 0xeaa127fa);
+	HH(c, d, a, b, x[3], S33, 0xd4ef3085);
+	HH(b, c, d, a, x[6], S34, 0x4881d05);
+	HH(a, b, c, d, 0, S31, 0xd9d4d039);
+	HH(d, a, b, c, 0, S32, 0xe6db99e5);
+	HH(c, d, a, b, 0, S33, 0x1fa27cf8);
+	HH(b, c, d, a, x[2], S34, 0xc4ac5665);
+
+	II(a, b, c, d, x[0], S41, 0xf4292244);
+	II(d, a, b, c, x[7], S42, 0x432aff97);
+	II(c, d, a, b, x14, S43, 0xab9423a7);
+	II(b, c, d, a, x[5], S44, 0xfc93a039);
+	II(a, b, c, d, 0, S41, 0x655b59c3);
+	II(d, a, b, c, x[3], S42, 0x8f0ccc92);
+	II(c, d, a, b, 0, S43, 0xffeff47d);
+	II(b, c, d, a, x[1], S44, 0x85845dd1);
+	II(a, b, c, d, 0, S41, 0x6fa87e4f);
+	II(d, a, b, c, 0, S42, 0xfe2ce6e0);
+	II(c, d, a, b, x[6], S43, 0xa3014314);
+	II(b, c, d, a, 0, S44, 0x4e0811a1);
+	II(a, b, c, d, x[4], S41, 0xf7537e82);
+	II(d, a, b, c, 0, S42, 0xbd3af235);
+	II(c, d, a, b, x[2], S43, 0x2ad7d2bb);
+	II(b, c, d, a, 0, S44, 0xeb86d391);
+
+	internal_ret[0] = a + 0x67452301;
+	internal_ret[1] = b + 0xefcdab89;
+	internal_ret[2] = c + 0x98badcfe;
+	internal_ret[3] = d + 0x10325476;
+}
+
+__device__ void clear_ctx(uint32_t * x)
+{
+	int i;
+#pragma unroll 8
+	for (i = 0; i < 8; i++)
+		*x++ = 0;
+}
+
+__global__ void kernel_phpass(unsigned char *password, uint32_t * data_out,
+    char *setting, int count_log2)
+{
+	uint32_t x[8];
+	clear_ctx(x);
+
+	int length, count, i;
+	unsigned char *buff = (unsigned char *) x;
+	uint32_t idx = blockIdx.x * blockDim.x + threadIdx.x;
+
+	length = password[address(15, idx)];
+
+#pragma unroll 8
+	for (i = 0; i < 8; i++)
+		buff[i] = setting[i + 4];
+
+	for (i = 8; i < 8 + length; i++) {
+		buff[i] = password[address(i - 8, idx)];
+	}
+
+	cuda_md5(8 + length, x, x);
+	count = 1 << count_log2;
+	for (i = 16; i < 16 + length; i++)
+		buff[i] = password[address(i - 16, idx)];
+
+
+	uint32_t a, b, c, d, x0, x1, x2, x3, x4, x5, x6, x7;
+	uint32_t len = 16 + length;
+	uint32_t x14 = len << 3;
+
+	x[len / 4] |= ((0x80) << ((len & 0x3) << 3));
+	x0 = x[0];
+	x1 = x[1];
+	x2 = x[2];
+	x3 = x[3];
+	x4 = x[4];
+	x5 = x[5];
+	x6 = x[6];
+	x7 = x[7];
+	do {
+
+		b = 0xefcdab89;
+		c = 0x98badcfe;
+		d = 0x10325476;
+
+		a = AC1 + x0;
+		a = ROTATE_LEFT(a, S11);
+		a += b;		/* 1 */
+		d = (c ^ (a & MASK1)) + x1 + AC2pCd;
+		d = ROTATE_LEFT(d, S12);
+		d += a;		/* 2 */
+		c = F(d, a, b) + x2 + AC3pCc;
+		c = ROTATE_LEFT(c, S13);
+		c += d;		/* 3 */
+		b = F(c, d, a) + x3 + AC4pCb;
+		b = ROTATE_LEFT(b, S14);
+		b += c;
+		FF(a, b, c, d, x4, S11, 0xf57c0faf);
+		FF(d, a, b, c, x5, S12, 0x4787c62a);
+		FF(c, d, a, b, x6, S13, 0xa8304613);
+		FF(b, c, d, a, x7, S14, 0xfd469501);
+		FF(a, b, c, d, 0, S11, 0x698098d8);
+		FF(d, a, b, c, 0, S12, 0x8b44f7af);
+		FF(c, d, a, b, 0, S13, 0xffff5bb1);
+		FF(b, c, d, a, 0, S14, 0x895cd7be);
+		FF(a, b, c, d, 0, S11, 0x6b901122);
+		FF(d, a, b, c, 0, S12, 0xfd987193);
+		FF(c, d, a, b, x14, S13, 0xa679438e);
+		FF(b, c, d, a, 0, S14, 0x49b40821);
+
+		GG(a, b, c, d, x1, S21, 0xf61e2562);
+		GG(d, a, b, c, x6, S22, 0xc040b340);
+		GG(c, d, a, b, 0, S23, 0x265e5a51);
+		GG(b, c, d, a, x0, S24, 0xe9b6c7aa);
+		GG(a, b, c, d, x5, S21, 0xd62f105d);
+		GG(d, a, b, c, 0, S22, 0x2441453);
+		GG(c, d, a, b, 0, S23, 0xd8a1e681);
+		GG(b, c, d, a, x4, S24, 0xe7d3fbc8);
+		GG(a, b, c, d, 0, S21, 0x21e1cde6);
+		GG(d, a, b, c, x14, S22, 0xc33707d6);
+		GG(c, d, a, b, x3, S23, 0xf4d50d87);
+		GG(b, c, d, a, 0, S24, 0x455a14ed);
+		GG(a, b, c, d, 0, S21, 0xa9e3e905);
+		GG(d, a, b, c, x2, S22, 0xfcefa3f8);
+		GG(c, d, a, b, x7, S23, 0x676f02d9);
+		GG(b, c, d, a, 0, S24, 0x8d2a4c8a);
+
+		HH(a, b, c, d, x5, S31, 0xfffa3942);
+		HH(d, a, b, c, 0, S32, 0x8771f681);
+		HH(c, d, a, b, 0, S33, 0x6d9d6122);
+		HH(b, c, d, a, x14, S34, 0xfde5380c);
+		HH(a, b, c, d, x1, S31, 0xa4beea44);
+		HH(d, a, b, c, x4, S32, 0x4bdecfa9);
+		HH(c, d, a, b, x7, S33, 0xf6bb4b60);
+		HH(b, c, d, a, 0, S34, 0xbebfbc70);
+		HH(a, b, c, d, 0, S31, 0x289b7ec6);
+		HH(d, a, b, c, x0, S32, 0xeaa127fa);
+		HH(c, d, a, b, x3, S33, 0xd4ef3085);
+		HH(b, c, d, a, x6, S34, 0x4881d05);
+		HH(a, b, c, d, 0, S31, 0xd9d4d039);
+		HH(d, a, b, c, 0, S32, 0xe6db99e5);
+		HH(c, d, a, b, 0, S33, 0x1fa27cf8);
+		HH(b, c, d, a, x2, S34, 0xc4ac5665);
+
+		II(a, b, c, d, x0, S41, 0xf4292244);
+		II(d, a, b, c, x7, S42, 0x432aff97);
+		II(c, d, a, b, x14, S43, 0xab9423a7);
+		II(b, c, d, a, x5, S44, 0xfc93a039);
+		II(a, b, c, d, 0, S41, 0x655b59c3);
+		II(d, a, b, c, x3, S42, 0x8f0ccc92);
+		II(c, d, a, b, 0, S43, 0xffeff47d);
+		II(b, c, d, a, x1, S44, 0x85845dd1);
+		II(a, b, c, d, 0, S41, 0x6fa87e4f);
+		II(d, a, b, c, 0, S42, 0xfe2ce6e0);
+		II(c, d, a, b, x6, S43, 0xa3014314);
+		II(b, c, d, a, 0, S44, 0x4e0811a1);
+		II(a, b, c, d, x4, S41, 0xf7537e82);
+		II(d, a, b, c, 0, S42, 0xbd3af235);
+		II(c, d, a, b, x2, S43, 0x2ad7d2bb);
+		II(b, c, d, a, 0, S44, 0xeb86d391);
+
+		x0 = a + 0x67452301;
+		x1 = b + 0xefcdab89;
+		x2 = c + 0x98badcfe;
+		x3 = d + 0x10325476;
+
+	} while (--count);
+
+	data_out[address(0, idx)] = x0;
+	data_out[address(1, idx)] = x1;
+	data_out[address(2, idx)] = x2;
+	data_out[address(3, idx)] = x3;
+
+}
diff -urpN john-1.7.8.orig//src/john.c john-1.7.8-allcuda_1/src/john.c
--- john-1.7.8.orig//src/john.c	2011-02-27 12:31:36.000000000 +0000
+++ john-1.7.8-allcuda_1/src/john.c	2011-08-04 00:19:46.000000000 +0000
@@ -39,6 +39,9 @@ extern int CPU_detect(void);
 
 extern struct fmt_main fmt_DES, fmt_BSDI, fmt_MD5, fmt_BF;
 extern struct fmt_main fmt_AFS, fmt_LM;
+extern struct fmt_main fmt_PHPASSCUDA,fmt_CRYPTMD5CUDA;
+extern struct fmt_main fmt_CRYPTSHA256CUDA,fmt_CRYPTSHA512CUDA;
+
 #ifdef HAVE_CRYPT
 extern struct fmt_main fmt_crypt;
 #endif
@@ -71,6 +74,10 @@ static void john_register_all(void)
 	john_register_one(&fmt_BF);
 	john_register_one(&fmt_AFS);
 	john_register_one(&fmt_LM);
+	john_register_one(&fmt_PHPASSCUDA);
+	john_register_one(&fmt_CRYPTMD5CUDA);
+	john_register_one(&fmt_CRYPTSHA256CUDA);
+	john_register_one(&fmt_CRYPTSHA512CUDA);
 #ifdef HAVE_CRYPT
 	john_register_one(&fmt_crypt);
 #endif
diff -urpN john-1.7.8.orig//src/options.c john-1.7.8-allcuda_1/src/options.c
--- john-1.7.8.orig//src/options.c	2011-06-22 13:01:40.000000000 +0000
+++ john-1.7.8-allcuda_1/src/options.c	2011-08-04 00:19:26.000000000 +0000
@@ -100,7 +100,7 @@ static struct opt_entry opt_list[] = {
 "--salts=[-]COUNT           load salts with[out] at least COUNT passwords " \
 	"only\n" \
 "--format=NAME              force hash type NAME: " \
-	"DES/BSDI/MD5/BF/AFS/LM" MAYBE_CRYPT "\n" \
+	"DES/BSDI/MD5/BF/AFS/LM/PHPASSCUDA/CRYPTMD5CUDA/CRYPTSHA256CUDA/CRYPTSHA512CUDA" MAYBE_CRYPT "\n" \
 "--save-memory=LEVEL        enable memory saving, at LEVEL 1..3\n"
 
 void opt_init(char *name, int argc, char **argv)
diff -urpN john-1.7.8.orig//src/phpass.h john-1.7.8-allcuda_1/src/phpass.h
--- john-1.7.8.orig//src/phpass.h	1970-01-01 00:00:00.000000000 +0000
+++ john-1.7.8-allcuda_1/src/phpass.h	2011-08-22 22:15:26.000000000 +0000
@@ -0,0 +1,81 @@
+/*
+* This software is Copyright (c) 2011 Lukas Odzioba
+* <lukas dot odzioba at gmail dot com> 
+* and it is hereby released to the general public under the following terms:
+* Redistribution and use in source and binary forms, with or without modification, are permitted.
+*/
+#ifndef _PHPASS_H
+#define _PHPASS_H
+#include "common.h"
+#define uint8_t 			unsigned char
+#define uint32_t 			unsigned int
+
+#define ROTATE_LEFT(x, s) 		((x << s) | (x >> (32 - s)))
+#define BLOCKS 				126*3	//it must be always something*3
+#define THREADS 			256
+#define KEYS_PER_CRYPT 			BLOCKS*THREADS
+
+#define F(x, y, z)			((z) ^ ((x) & ((y) ^ (z))))
+#define G(x, y, z)			((y) ^ ((z) & ((x) ^ (y))))
+#define H(x, y, z)			((x) ^ (y) ^ (z))
+#define I(x, y, z)			((y) ^ ((x) | ~(z)))
+
+#define address(j,idx) 			(((j)*KEYS_PER_CRYPT)+(idx))
+
+#define FF(a, b, c, d, x, s, ac) \
+  {(a) += F ((b), (c), (d)) + (x) + (uint32_t)(ac); \
+   (a) = ROTATE_LEFT ((a), (s)); \
+   (a) += (b); \
+  }
+#define GG(a, b, c, d, x, s, ac) \
+  {(a) += G ((b), (c), (d)) + (x) + (uint32_t)(ac); \
+   (a) = ROTATE_LEFT ((a), (s)); \
+   (a) += (b); \
+  }
+#define HH(a, b, c, d, x, s, ac) \
+  {(a) += H ((b), (c), (d)) + (x) + (uint32_t)(ac); \
+   (a) = ROTATE_LEFT ((a), (s)); \
+   (a) += (b); \
+  }
+#define II(a, b, c, d, x, s, ac) \
+  {(a) += I ((b), (c), (d)) + (x) + (uint32_t)(ac); \
+   (a) = ROTATE_LEFT ((a), (s)); \
+   (a) += (b); \
+  }
+
+#define S11				7
+#define S12				12
+#define S13				17
+#define S14				22
+#define S21				5
+#define S22				9
+#define S23				14
+#define S24				20
+#define S31				4
+#define S32				11
+#define S33				16
+#define S34				23
+#define S41				6
+#define S42				10
+#define S43				15
+#define S44				21
+
+#define AC1				0xd76aa477
+#define AC2pCd				0xf8fa0bcc
+#define AC3pCc				0xbcdb4dd9
+#define AC4pCb				0xb18b7a77
+#define MASK1				0x77777777
+
+
+static char phpass_prefix[] = "$P$";
+
+typedef struct {
+	unsigned char v[15];
+	unsigned char length;
+} phpass_password;
+
+typedef struct {
+	uint32_t v[4];		///128bits for hash
+} phpass_hash;
+
+#endif
diff -urpN john-1.7.8.orig//src/phpasscuda_fmt.c john-1.7.8-allcuda_1/src/phpasscuda_fmt.c
--- john-1.7.8.orig//src/phpasscuda_fmt.c	1970-01-01 00:00:00.000000000 +0000
+++ john-1.7.8-allcuda_1/src/phpasscuda_fmt.c	2011-08-22 22:15:44.000000000 +0000
@@ -0,0 +1,295 @@
+/*
+* This software is Copyright (c) 2011 Lukas Odzioba
+* <lukas dot odzioba at gmail dot com> 
+* and it is hereby released to the general public under the following terms:
+* Redistribution and use in source and binary forms, with or without modification, are permitted.
+*/
+#include <string.h>
+#include "arch.h"
+#include "formats.h"
+#include "common.h"
+#include "misc.h"
+#include "phpass.h"
+
+#define FORMAT_LABEL		"phpasscuda"
+#define FORMAT_NAME		"PHPASSCUDA"
+
+#define PHPASS_TYPE		"PORTABLE-MD5"
+
+#define BENCHMARK_COMMENT	""
+#define BENCHMARK_LENGTH	-1
+
+#define PLAINTEXT_LENGTH	15
+#define CIPHERTEXT_LENGTH	34	/// header = 3 | loopcnt = 1 | salt = 8 | ciphertext = 22
+#define BINARY_SIZE		16
+#define MD5_DIGEST_LENGTH 	16
+
+
+#define SALT_SIZE		8
+
+#define MIN_KEYS_PER_CRYPT	KEYS_PER_CRYPT
+#define MAX_KEYS_PER_CRYPT	KEYS_PER_CRYPT
+
+static unsigned char inbuffer[MAX_KEYS_PER_CRYPT * sizeof(phpass_password)];			/** plaintext ciphertexts **/
+static uint32_t outbuffer[MAX_KEYS_PER_CRYPT * 4];						/** calculated hashes **/
+
+static char currentsalt[SALT_SIZE];
+static char loopChar = '*';
+
+extern void mem_init(unsigned char *, uint32_t *, char *, char *, int);
+extern void mem_clear(void);
+extern void gpu_phpass(void);
+
+
+static struct fmt_tests tests[] = {
+/*	{"$P$900000000jPBDh/JWJIyrF0.DmP7kT.", "ala"},
+	{"$P$900000000a94rg7R/nUK0icmALICKj1", "john"},
+	{"$P$900000001ahWiA6cMRZxkgUxj4x/In0", "john"},
+	{"$P$900000000m6YEJzWtTmNBBL4jypbHv1", "openwall"},
+	{"$P$900000000zgzuX4Dc2091D8kak8RdR0", "h3ll00"},
+	{"$P$900000000qZTL5A0XQUX9hq0t8SoKE0", "1234567890"},
+	{"$P$900112200B9LMtPy2FSq910c1a6BrH0", "1234567890"},
+	{"$P$900000000a94rg7R/nUK0icmALICKj1", "john"},
+	{"$P$9sadli2.wzQIuzsR2nYVhUSlHNKgG/0", "john"},
+	{"$P$90000000000tbNYOc9TwXvLEI62rPt1", ""},
+*/
+      {"$P$9saltstriAcRMGl.91RgbAD6WSq64z.","a"},
+      {"$P$9saltstriMljTzvdluiefEfDeGGQEl/","ab"},
+      {"$P$9saltstrikCftjZCE7EY2Kg/pjbl8S.","abc"},
+      {"$P$9saltstriV/GXRIRi9UVeMLMph9BxF0","abcd"},
+      {"$P$9saltstri3JPgLni16rBZtI03oeqT.0","abcde"},
+      {"$P$9saltstri0D3A6JyITCuY72ZoXdejV.","abcdef"},
+      {"$P$9saltstriXeNc.xV8N.K9cTs/XEn13.","abcdefg"},
+      {"$P$9saltstrinwvfzVRP3u1gxG2gTLWqv.","abcdefgh"},
+      {"$P$9saltstriSUQTD.yC2WigjF8RU0Q.Z.","abcdefghi"},
+      {"$P$9saltstriWPpGLG.jwJkwGRwdKNEsg.","abcdefghij"},
+
+      {"$P$9saltstrizjDEWUMXTlQHQ3/jhpR4C.","abcdefghijk"},
+      {"$P$9saltstriGLUwnE6bl91BPJP6sxyka.","abcdefghijkl"},
+      {"$P$9saltstriq7s97e2m7dXnTEx2mtPzx.","abcdefghijklm"},
+      {"$P$9saltstriTWMzWKsEeiE7CKOVVU.rS0","abcdefghijklmn"},
+      {"$P$9saltstriXt7EDPKtkyRVOqcqEW5UU.", "abcdefghijklmno"},
+	{NULL}
+};
+
+static void init(void)
+{
+ phpass_init(0);
+}
+
+static int valid(char *ciphertext)
+{
+	uint32_t i, count_log2;
+
+	if (strlen(ciphertext) != CIPHERTEXT_LENGTH)
+		return 0;
+	if (strncmp(ciphertext, phpass_prefix, 3) != 0)
+		return 0;
+
+	for (i = 3; i < CIPHERTEXT_LENGTH; i++)
+		if (atoi64[ARCH_INDEX(ciphertext[i])] == 0x7F)
+			return 0;
+
+	count_log2 = atoi64[ARCH_INDEX(ciphertext[3])];
+	if (count_log2 < 7 || count_log2 > 31)
+		return 0;
+
+	return 1;
+};
+
+//code from historical JtR phpass patch
+static void *binary(char *ciphertext)
+{
+	static unsigned char b[BINARY_SIZE];
+	memset(b, 0, BINARY_SIZE);
+	int i, bidx = 0;
+	unsigned sixbits;
+	char *pos = &ciphertext[3 + 1 + 8];
+
+	for (i = 0; i < 5; i++) {
+		sixbits = atoi64[ARCH_INDEX(*pos++)];
+		b[bidx] = sixbits;
+		sixbits = atoi64[ARCH_INDEX(*pos++)];
+		b[bidx++] |= (sixbits << 6);
+		sixbits >>= 2;
+		b[bidx] = sixbits;
+		sixbits = atoi64[ARCH_INDEX(*pos++)];
+		b[bidx++] |= (sixbits << 4);
+		sixbits >>= 4;
+		b[bidx] = sixbits;
+		sixbits = atoi64[ARCH_INDEX(*pos++)];
+		b[bidx++] |= (sixbits << 2);
+	}
+	sixbits = atoi64[ARCH_INDEX(*pos++)];
+	b[bidx] = sixbits;
+	sixbits = atoi64[ARCH_INDEX(*pos++)];
+	b[bidx] |= (sixbits << 6);
+	return (void *) b;
+}
+
+static void *salt(char *ciphertext)
+{
+	static unsigned char salt[SALT_SIZE + 2];
+	memcpy(salt, &ciphertext[4], 8);
+	salt[8] = ciphertext[3];
+	salt[9] = 0;
+	return salt;
+}
+
+static int binary_hash_0(void *binary)
+{
+	return (((ARCH_WORD_32 *) binary)[0] & 0xf);
+}
+
+static int binary_hash_1(void *binary)
+{
+	return ((ARCH_WORD_32 *) binary)[0] & 0xff;
+}
+
+static int binary_hash_2(void *binary)
+{
+	return ((ARCH_WORD_32 *) binary)[0] & 0xfff;
+}
+
+static int binary_hash_3(void *binary)
+{
+	return ((ARCH_WORD_32 *) binary)[0] & 0xffff;
+}
+
+static int binary_hash_4(void *binary)
+{
+	return ((ARCH_WORD_32 *) binary)[0] & 0xfffff;
+}
+
+static void set_salt(void *salt)
+{
+	int i;
+	unsigned char *csalt = salt;
+	for (i = 0; i < 8; i++)
+		currentsalt[i] = csalt[i];
+	loopChar = csalt[8];
+}
+
+static void set_key(char *key, int index)
+{
+	int i, len = strlen(key);
+	inbuffer[address(15, index)] = len;
+	for (i = 0; i < len; i++)
+		inbuffer[address(i, index)] = key[i];
+}
+
+static char *get_key(int index)
+{
+	static char r[PLAINTEXT_LENGTH + 1];
+	int i;
+	for (i = 0; i < PLAINTEXT_LENGTH; i++)
+		r[i] = inbuffer[address(i, index)];
+	r[inbuffer[address(15,index)]] = '\0';
+	return r;
+}
+
+static void crypt_all(int count)
+{
+	char setting[40];
+	strcpy(setting, phpass_prefix);
+	setting[3] = loopChar;
+	int count_log2 = 0;
+	count_log2 = atoi64[ARCH_INDEX(setting[3])];
+	strcpy(setting + 4, currentsalt);
+	mem_init(inbuffer, outbuffer, setting, itoa64, count_log2);
+	gpu_phpass();
+	mem_clear();
+}
+
+static int get_hash_0(int index)
+{
+	return outbuffer[address(0, index)] & 0xf;
+}
+
+static int get_hash_1(int index)
+{
+	return outbuffer[address(0, index)] & 0xff;
+}
+
+static int get_hash_2(int index)
+{
+	return outbuffer[address(0, index)] & 0xfff;
+}
+
+static int get_hash_3(int index)
+{
+	return outbuffer[address(0, index)] & 0xffff;
+}
+
+static int get_hash_4(int index)
+{
+	return outbuffer[address(0, index)] & 0xfffff;
+}
+
+static int cmp_all(void *binary, int count)
+{
+	uint32_t i;
+	uint32_t b = ((uint32_t *) binary)[0];
+	for (i = 0; i < count; i++)
+		if (b == outbuffer[address(0, i)])
+			return 1;
+	return 0;
+}
+
+static int cmp_one(void *binary, int index)
+{
+	int i;
+	uint32_t *t = (uint32_t *) binary;
+	for (i = 0; i < 4; i++)
+		if (t[i] != outbuffer[address(i, index)])
+			return 0;
+	return 1;
+}
+
+static int cmp_exact(char *source, int count)
+{
+	return 1;
+}
+
+struct fmt_main fmt_PHPASSCUDA = {
+	{
+		    FORMAT_LABEL,
+		    FORMAT_NAME,
+		    PHPASS_TYPE,
+		    BENCHMARK_COMMENT,
+		    BENCHMARK_LENGTH,
+		    PLAINTEXT_LENGTH,
+		    BINARY_SIZE,
+		    SALT_SIZE + 1,
+		    MIN_KEYS_PER_CRYPT,
+		    MAX_KEYS_PER_CRYPT,
+		    FMT_CASE | FMT_8_BIT,
+	    tests},
+	{
+		    init,
+		    valid,
+		    fmt_default_split,
+		    binary,
+		    salt,
+		    {
+				binary_hash_0,
+				binary_hash_1,
+				binary_hash_2,
+				binary_hash_3,
+			binary_hash_4},
+		    fmt_default_salt_hash,
+		    set_salt,
+		    set_key,
+		    get_key,
+		    fmt_default_clear_keys,
+		    crypt_all,
+		    {
+				get_hash_0,
+				get_hash_1,
+				get_hash_2,
+				get_hash_3,
+			get_hash_4},
+		    cmp_all,
+		    cmp_one,
+	    cmp_exact}
+};
