From df8f52bcbc2a75ed032f3c6031dba35a51004019 Mon Sep 17 00:00:00 2001
From: magnum <magnum>
Date: Tue, 8 Nov 2011 02:24:07 +0100
Subject: [PATCH] j7: Documentation updates

---
 doc/ENCODINGS |   19 +++++++++----------
 doc/OPTIONS   |   45 ++++++++++++++++-----------------------------
 run/john.conf |   18 ++++++++++++------
 3 files changed, 37 insertions(+), 45 deletions(-)
diff --git a/doc/ENCODINGS b/doc/ENCODINGS
index faf3936..1765ee4 100644
--- a/doc/ENCODINGS
+++ b/doc/ENCODINGS
@@ -1,12 +1,13 @@
 This version of John is UTF-8 and codepage aware, using a new command line
 argument. In short, this means "my wordlists and input files are encoded in
 UTF-8" (or CP1252 etc). It does NOT mean you can feed John with eg. a UTF-8
-encoded wordlist when cracking LM. It DOES make John recognise vowels, lower
-or upper case characters, etc. when specifying a codepage.
+encoded wordlist when cracking LM. It DOES make John recognise national vowels,
+lower or upper case characters, etc. when specifying a codepage.
 
-Currently supported encodings: utf-8, iso-8859-1 (or ansi), iso-8859-15,
-koi8-r, cp437, cp737, cp850, cp858, cp866, cp1251 and cp1252. New encodings can
-be added with ease, using automated tools that rely on the Unicode Database.
+Currently supported encodings: utf-8, iso-8859-1 (or ansi), iso-8859-7,
+iso-8859-15, koi8-r, cp437, cp737, cp850, cp858, cp866, cp1251, cp1252 and
+cp1253. New encodings can be added with ease, using automated tools that rely
+on the Unicode Database (see Openwall wiki).
 
 
 Example usage:
@@ -33,8 +34,9 @@ $ iconv < greek.8859-7.dict -f iso-8859-7 -t cp737 | ./john -pipe -enc:cp737 ...
 
 The traditional behavior, and what is still happening if you don't specify an
 encoding, is that John will assume ISO-8859-1 when converting plaintexts or
-salts to UTF-16, and assume ASCII in most other cases (it will use 8-bit
-candidates as-is, but not upper/lower-case them or recognise letters etc).
+salts to UTF-16 (this also happens to be very fast), and assume ASCII in most
+other cases (it will use 8-bit candidates as-is, but not upper/lower-case them
+or recognise letters etc).
 
 Some new reject rules and character classes are implemented, see doc/RULES.
 Note that UTF-8 is not handled well in rules. Some wordlist rules may cut
@@ -48,12 +50,9 @@ it's plaintexts, like old MSSQL, will uppercase properly though.
 Caveats:
 Beware of UTF-8 BOM's. They will cripple the first word in your wordlist.
 
-
 --
 
 These contributions to John are hereby placed in the public domain. In case
 that is not applicable, they are Copyright 2009, 2010, 2011 by magnum and
 JimF and hereby released to the general public. Redistribution and use in
 source and binary forms, with or without modification, is permitted.
-
-magnum
diff --git a/doc/OPTIONS b/doc/OPTIONS
index d3b70f2..863428d 100644
--- a/doc/OPTIONS
+++ b/doc/OPTIONS
@@ -35,31 +35,10 @@ from [List.Rules:Single_2] section would be used.
 
 These are used to enable the wordlist mode.
 
---encoding=NAME		Input data in a character encoding other than the default 'raw'
-                    --encoding=LIST gives a list of all handled encodings in the
-                    local build of john.
-
-NAME can be:
-	iso-8859-1 (or ansi, or 8859-1)		to handle rules processing with 'high'
-										bit letters.
-  	koi8-r                              rules casing with this encoding.
-  	cp1251 (or cp-1251)                 rules casing with this encoding.
-  	cp866  (or cp-866)                  rules casing with this encoding.
-	utf-8  (or utf8)                    to enable UTF8 conversions.
-	or any other encoding listed in the --encoding=LIST response.
-
-John defaults to assuming ISO-8859-1 when converting plaintexts or salts
-to UTF-16. Using this flag will enable UTF-8 conversion instead. This affects
-many Microsoft formats like NT, mscash and mssql. Formats not affected will
-silently ignore this option flag.
-
-In rules processing, john defaults to 7 bit ASCII, for 'letters', and thus, does
-not perform conversions, or even consider a word to be 'pure letters', if it
-contains high bit values, even if they are characters, and will not properly
-perform case switching on these characters (such as A or a with a grave or
-accent).  However, in --encoding=iso-8859-1, john will perform these character
-conversions (also --encoding=koi8-r, --encoding=cp1251 and --encoding=cp866
-use those character encodings)
+--encoding=NAME
+
+Input data in a character encoding other than the default 'raw'. See also
+doc/ENCODINGS.   --encoding=LIST gives a list of all handled encodings.
 
 --rules[=SECTION]		enable word mangling rules for wordlist mode
 
@@ -132,7 +111,8 @@ file format way).  =left is just that literal string "=left".
 
 Tests all of the compiled in hashing algorithms for proper operation and
 benchmarks them.  The "--format" option can be used to restrict this to
-a specific algorithm.
+a specific algorithm.  Using --test=0 will do a very quick self-test but
+will not produce usable speed figures.
 
 --users=[-]LOGIN|UID[,..]	[do not] load this (these) user(s)
 
@@ -194,6 +174,11 @@ types unless you have other hash types (those supported by John
 natively) in the password file(s) as well (in which case another hash
 type may get detected unless you specify this option).
 
+When benchmarking "--format=crypt", it will default to benchmark DES.
+The "--subformat=TYPE" can be added for benchmarking other types, given
+they are supported by the system. Currently supported TYPEs are MD5, BF,
+SHA-256 and SHA-512.
+
 "--format=crypt" is also a way to make John crack crypt(3) hashes of
 different types at the same time, but doing so results in poor
 performance and in unnecessarily poor results (in terms of passwords
@@ -201,7 +186,7 @@ cracked) for hashes of the "faster" types (as compared to the "slower"
 ones loaded for cracking at the same time).  So you are advised to use
 separate invocations of John, one per hash type.
 
---subformat=LIST displays all the built-in md5-gen formats, and exits
+--subformat=LIST displays all the built-in dynamic formats, and exits
 
 --save-memory=LEVEL		enable memory saving, at LEVEL 1..3
 
@@ -219,8 +204,10 @@ One of the significant performance improvements for some builds of
 john, is preloading the wordlist file into memory, instead of reading
 line by line. This is especially true when running with a large list
 of --rules.  The default max size file is 5 million bytes.  Using this
-option allows making this larger.   NOTE if --save-memory is used,
-then memory file processing is turned off.
+option allows making this larger.  A special value is --mem-file-size=0.
+This will force loading to memory regardless of file size.  NOTE if
+--save-memory is used, then memory file processing is turned off.
+
 
 --field-separator-char=c	Use 'c' instead of the char ':'
 
diff --git a/run/john.conf b/run/john.conf
index b1980f3..96de55f 100644
--- a/run/john.conf
+++ b/run/john.conf
@@ -8,10 +8,6 @@
 [Options]
 # Wordlist file name, to be used in batch mode
 Wordlist = $JOHN/password.lst
-# Default Markov mode settings
-Statsfile = $JOHN/stats
-MkvLvl = 200
-MkvMaxLen = 12
 # Use idle cycles only
 Idle = Y
 # Crash recovery file saving delay in seconds
@@ -19,6 +15,11 @@ Save = 600
 # Beep when a password is found (who needs this anyway?)
 Beep = N
 
+# Default Markov mode settings
+Statsfile = $JOHN/stats
+MkvLvl = 200
+MkvMaxLen = 12
+
 # Automagically disable OMP if MPI is used (set to N if
 # you want to run one MPI process per multi-core host)
 MPIOMPmutex = Y
@@ -42,7 +43,10 @@ TimeFormat = %c
 # reported figures will be less accurate (default 0.05%)
 ETAthreshold = 0.05%
 
-# When printing status, show number of candidates tried
+# When printing status, show number of candidates tried (eg. 1/43210 for one
+# guess out of 43 thousand candidates). Note that the number is not equal to
+# "words tried" but rather "words x hash" combinations so if you are attacking
+# 1000 hashes, "43210" means you have tried about 43 words from your wordlist.
 StatusShowCandidates = N
 
 # Always report (to screen and log) cracked passwords as UTF-8, regardless of
@@ -56,7 +60,9 @@ AlwaysReportUTF8 = N
 UnicodeStoreUTF8 = N
 
 # Always report/store non-Unicode formats as UTF-8, regardless of input
-# encoding. There are pros and cons.
+# encoding. This is NOT recommended unless you REALLY understand the
+# implications. The actual codepage that was used is not stored anywhere
+# except in the log file.
 CPstoreUTF8 = N
 
 # Write cracked passwords to the log file (default is just the user name)
-- 
1.7.5.4