#!/bin/bash
#
# Copyright (c) 2010 by (GalaxyMaster) <galaxy at openwall.com>
# Licence: GPL v2 or later <http://www.gnu.org/licenses/gpl.txt>
#

# discover-zone
# -------------
#
# This script tries to guess DNS RRs (Resource Records) in the
# designated domain and uses several approaches to collect as
# much info as possible.  The primary purpose is to create a
# zone configuration file for a domain where zone transfer is
# prohibited (most modern name servers are configured to deny
# zone transfer requests).
#
# The usage of this script is quite simple, just provide it
# with the designated domain name and the script will output
# a valid BIND 9 zone file on its standard output.
#
# You may also specify a space or comma separated list of host
# names to try to extend the list of the guess names attempted
# (e.g. '$ discover-zone domain.tld www2 secure,something' -
# this will try www2.domain.tld, secure.domain.tld, and
# something.domain.tld in addition to the preconfigured list
# of names to be guessed).
#

# refuse to work under root
if [ -w / ]; then
	echo "This script should not be executed under the root account!" >&2
	exit 1
fi

DOMAIN=
EMAIL=
COMMON_RRS="www ftp mail mx mx1 mx2 smtp pop pop3 imap imap4 webmail"

# name server list
declare -a NS

# in this array we will collect our records
declare -a RRS

# an array to hold query cache
declare -a QUERIES

# the carret return character, we using it in substitutions
CR='
'

# the following variable will contain the resulting zone
ZONE=

VERSION="0.0.1"
VERBOSE=


usage() {
	cat << EOF >&2
discover-zone v$VERSION (a part of DNS scripts package)
===
Copyright (c) 2010 by (GalaxyMaster) <galaxy at openwall.com>
Licence: GPL v2 or later <http://www.gnu.org/licenses/gpl.txt>

This script tries to guess DNS RRs (Resource Records) in the
designated domain and uses several approaches to collect as
much info as possible.  The primary purpose is to create a
zone configuration file for a domain where zone transfer is
prohibited (most modern name servers are configured to deny
zone transfer requests).

Usage: discover-zone [options] [domain.tld] [guess names]

The following options are currently supported:

--email=email	- (-e) NOC email (to be used in the SOA record)
--help		- (-h) this help
--nameserver=ns - (-n) set the replacement NS (can be specified
			multiple times to include several NSes
--verbose	- (-v) be verbosive (prints on stderr)
--version	- (-V) displays version information

Example: discover-zone --verbose domain.tld www2,dev

EOF
	exit 1
}

log() {
	if [ "${1:0:6}" == 'ERROR:' ]; then
		echo "$1" >&2
	else
		[ "$VERBOSE" == '1' ] && echo -ne "$1" >&2
	fi
}

find_tool() {
	local TOOL OUTPUT
	TOOL=$(type -fP "$1")
	if [ -n "$TOOL" ]; then
		case "$1" in
			dig)
				[ -n "$DIG" -a -x "$DIG" ] && TOOL="$DIG"
				OUTPUT=$($TOOL -v 2>&1)
				OUTPUT="${OUTPUT%%$CR*}"
				log "$TOOL ($OUTPUT)"
				DIG="$TOOL"
				return 0
				;;
			date)
				[ -n "$DATE" -a -x "$DATE" ] && TOOL="$DATE"
				OUTPUT=$($TOOL --version 2>&1)
				OUTPUT="${OUTPUT%%$CR*}"
				log "$TOOL ($OUTPUT)"
				DATE="$TOOL"
				return 0
				;;
			sed)
				[ -n "$SED" -a -x "$SED" ] && TOOL="$SED"
				OUTPUT=$($TOOL --version 2>&1)
				OUTPUT="${OUTPUT%%$CR*}"
				log "$TOOL ($OUTPUT)"
				SED="$TOOL"
				return 1
				;;
			*)
				log 'UNKNOWN'
				return 1
				;;
		esac
	else
		log 'NOT FOUND\n'
	fi
	return 1
}

parse_arguments() {
	local NEXT_ARG_IS

	for arg do
		if [ -n "$NEXT_ARG_IS" ]; then
			case "$NEXT_ARG_IS" in
				-e)	EMAIL="$arg" ;;
				-n)	NS[${#NS[*]}]="$arg" ;;
				*)	log "ERROR: unknown option '$NEXT_ARG_IS' encountered, aborting!"
					usage ;;
			esac
			NEXT_ARG_IS=
			continue
		fi

		case "$arg" in
			--email=*)	EMAIL="${arg#--email=}" ;;
			--help|-h)	usage ;;
			--nameserver=*)	NS[${#NS[*]}]="${arg#--nameserver=}" ;;
			--verbose|-v)	VERBOSE=1 ;;
			--version|-V)	echo "discover-zone v$VERSION" ; exit 0 ;;
			-[en])		NEXT_ARG_IS="$arg" ;;
			-*)		log "ERROR: unknown option '$arg' encountered, aborting!"
					usage ;;
			*)		if [ -z "$DOMAIN" ]; then
						DOMAIN="$arg"
					else
						[ "${arg/,}" == "$arg" ] &&
							COMMON_RRS="$COMMON_RRS${COMMON_RRS:+ }$arg" ||
							COMMON_RRS="$COMMON_RRS${COMMON_RRS:+ }${arg//,/ }"
					fi
					;;
		esac
	done

	if [ -n "$NEXT_ARG_IS" ]; then
		log "ERROR: dangling option '$NEXT_ARG_IS' detected, should have a parameter, aborting!"
		usage
	fi
}

query_seen() {
	local i
	i=${#QUERIES[*]}
	while [ $i -gt 0 ]; do
		[ "${QUERIES[$i]}" == "$1 $2" ] && return 0
		i=$(( $i - 1 ))
	done
	return 1
}

query_cache() {
	QUERIES[${#QUERIES[*]}]="$1 $2"
}

# XXX: should be enhanced to detect errors and to try different NSes
query() {
	log "8[K$2 [$1] "
	query_seen "$1" "$2" && return
	#$DIG "@$NS" -t "$1" +short "$2"
	$DIG -t "$1" +short "$2"
}

get_a() {
	local R R2 r
	R=$(query a "$2${2:+.}$1")
	query_cache a "$2${2:+.}$1"
	if [ -n "$R" ]; then
		for r in $R; do
			[ "${r:0:1}" == ';' ] && continue # skip dig's status messages
			# a CNAME check
			if [ "${r%.}" != "$r" ]; then
				R2="${r%.$1.}"
				# XXX: should we bark here if $2 is undefined?
				RRS[${#RRS[*]}]="${2:-@} CNAME $R2"
				[ -n "$R2" -a "$R2" != "$r" ] && get_a "$1" "$R2"
			else
				RRS[${#RRS[*]}]="${2:-@} A $r"
			fi
		done
	fi

	# supplimentary checks

	# check whether MX is defined for this name
	get_mx "$1" "$2"

	# check for TXT records
	get_txt "$1" "$2"

	# if the name is a compound, check the upper part
	[ -n "$2" -a "$2" != "${2#*.}" ] && get_a "$1" "${2#*.}"
}

get_txt() {
	local R r DOMAIN_ESC t
	R=$(query txt "$2${2:+.}$1")
	query_cache txt "$2${2:+.}$1"
	[ -n "$R" ] || return
	while [ -n "$R" ]; do
		r="${R%%$CR*}"
		[ "$R" != "$r" ] && R="${R#$r$CR}" || R=
		[ "${r:0:1}" == ';' ] && continue # skip dig's status messages
		RRS[${#RRS[*]}]="${2:-@} TXT $r"
		if [ -n "$SED" -a -x "$SED" ]; then
			# check if the TXT record contains some host names
			DOMAIN_ESC="${1//./\.}"
			for t in $(echo "${r//$DOMAIN_ESC/$1$CR}" | $SED 's/.*[^[:alnum:]_.-]\(\([[:alnum:]_-]\+\.\)\+'"$DOMAIN_ESC"'\)/\1/;t;d'); do
				get_a "$1" "${t%.$1}"
			done
		fi
		unset r
	done
}

get_mx() {
	local R R2 PRIO MX r
	R=$(query mx "$2${2:+.}$1")
	query_cache mx "$2${2:+.}$1"
	[ -n "$R" ] || return
	for r in ${R// /:}; do
		[ "${r:0:1}" == ';' ] && continue # skip dig's status messages
		# a CNAME check
		if [ "${r/:}" == "$r" -a "${r%.}" != "$r" ]; then
			R2="${r%.$1.}"
			# XXX: should we bark here if $2 is undefined?
			RRS[${#RRS[*]}]="${2:-@} CNAME $R2"
			[ -n "$R2" -a "$R2" != "$r" ] && get_a "$1" "$R2"
		else
			RRS[${#RRS[*]}]="${2:-@} MX ${r/:/ }"
			PRIO="${r%%:*}"
			MX="${r#$PRIO:}"
			R2="${MX%.$1.}"
			[ -n "$R2" -a "$R2" != "$MX" ] && get_a "$1" "$R2" "-MX"
		fi
	done
}

print_soa() {
	local SOA sNS sEMAIL SERIAL REFRESH RETRY EXPIRE NCACHE
	SOA="$1"
	SOA="${SOA#@ SOA }"
	sNS="${SOA%% *}" ; SOA="${SOA#$sNS }"
	sEMAIL="${SOA%% *}" ; SOA="${SOA#$sEMAIL }"
	SERIAL="${SOA%% *}" ; SOA="${SOA#$SERIAL }"
	REFRESH="${SOA%% *}" ; SOA="${SOA#$REFRESH }"
	RETRY="${SOA%% *}" ; SOA="${SOA#$RETRY }"
	EXPIRE="${SOA%% *}" ; SOA="${SOA#$EXPIRE }"
	NCACHE="${SOA%% *}" ; SOA="${SOA#$NCACHE }"

	[ -n "${NS[0]}" ] && sNS="${NS[0]}"
	[ -n "$EMAIL" ] && sEMAIL="${EMAIL//@/.}"

	cat << EOF
@		IN SOA		$sNS $sEMAIL (
					$(($SERIAL+1))	; serial
					$REFRESH		; refresh
					$RETRY		; retry
					$EXPIRE		; expire
					$NCACHE		; ncache
				)
EOF
}

print_rr() {
	local NAME TYPE PARAM VALUE
	NAME="$1"
	TYPE="$2"
	PARAM="${TYPE#* }"
	[ "$TYPE" != "$PARAM" ] && TYPE="${TYPE%% *}" || PARAM=
	VALUE="$3"

	# beautify :)
	[ "$TYPE" == 'A' -o "$TYPE" == 'NS' -o "$TYPE" == 'TXT' ] && TYPE="$TYPE	"

	[ "$NAME" == "$CURRENT_RR" ] && NAME= || CURRENT_RR="$NAME"

	echo "$NAME		IN $TYPE${PARAM:+ }$PARAM	$VALUE"
}

print_zone() {
	local ZONE LINE NAME TYPE PARAM
	ZONE="$1"
	echo '$TTL 6H'
	print_soa "$SOA"

	CURRENT_RR=@
	echo
	for PARAM in ${NS[*]} ; do
		print_rr '@' 'NS' "${NS%.$DOMAIN.}"
	done
	echo

	while [ -n "$ZONE" ]; do
		LINE="${ZONE%%$CR*}"
		[ "$LINE" != "$ZONE" ] && ZONE="${ZONE#*$CR}" || ZONE=
		NAME="${LINE%% *}" ; LINE="${LINE#$NAME }"
		PARAM=
		TYPE="${LINE%% *}" ; LINE="${LINE#$TYPE }"
		if [ "$TYPE" == 'MX' ]; then
			PARAM="${LINE%% *}" ; LINE="${LINE#$PARAM }"
		fi
		print_rr "$NAME" "$TYPE${PARAM:+ }$PARAM" "$LINE"
	done
}

# main() starts here :) ======================================================

parse_arguments "$@"

log "discover-zone v$VERSION (a part of DNS scripts package)\n"
log '===\n'
log 'Copyright (c) 2010 by (GalaxyMaster) <galaxy at openwall.com>\n'
log 'Licence: GPL v2 or later <http://www.gnu.org/licenses/gpl.txt>\n'
log '\n'

log "\n* Verifying working environment:"
log '\n\t- dig ... '
find_tool dig
log '\n\t- sed ... '
find_tool sed
log '\n\t- date ... '
find_tool date
log '\n'
if [ -z "$DIG" ]; then
	log "ERROR: this script could do nothing useful without the 'dig' binary, aborting!\n"
	exit 1
fi
[ -z "$SED" ] && log "WARNING: parsing of TXT RRs will be disabled due to the absence of 'sed'\n"
[ -z "$DATE" ] && log "\t\tNOTICE: some statistics will be incorrect since there is no 'date'\n"

EXEC_TIME_START=$($DATE +%s 2>/dev/null)

if [ -z "$DOMAIN" ]; then
	log "ERROR: no domain name has been specified, hence nothing to do, exiting."
	[ "$VERBOSE" != '1' ] && usage
	exit 1
fi

# protect DOMAIN
readonly DOMAIN

# protect EMAIL
[ -n "$EMAIL" ] && readonly EMAIL

# protect specified NSes (if any)
[ ${#NS[*]} -gt 0 ] && readonly -a NS

log "\n* Getting SOA information for '$DOMAIN' ... "
SOA=$($DIG -t soa +short "$DOMAIN")
if [ -z "$SOA" ]; then
	log 'FAILED\n'
	log "ERROR: dig returned empty output, perhaps '$DOMAIN' does not exist"
	exit 1
fi
log "OK\n\t$SOA\n"

log '\n* Discovering DNS records ...\n\t7'
QUERYING_TIME_START=$($DATE +%s 2>/dev/null)
# if we weren't supplied with replacement NSes, get original ones
if [ ${#NS[*]} -eq 0 ]; then
	NSES=$(query ns "$DOMAIN")
	if [ -z "$NSES" ]; then
		log 'FAILED\n'
		log "ERROR: dig returned empty output fo NSes, perhaps '$DOMAIN' does not exist"
		exit 1
	fi
	query_cache ns "$DOMAIN"

	NS[0]="${SOA%% *}" # XXX: should we also ensure that there is a space in the SOA string?

	i=1
	for s in $NSES ; do
		if [ "$s" != "${NS[0]}" ]; then
			NS[$i]="$s"
			i=$(( $i + 1 ))
		fi
	done
	unset NSES i
fi

# is the @ record defined?
get_a "$DOMAIN"

for r in $COMMON_RRS; do
	get_a "$DOMAIN" "$r"
done

# if there are any NSes inside this domain get their A RR
for r in ${NS[@]}; do
	[ "${r%.$DOMAIN.}" != "$r" ] && get_a "$DOMAIN" "${r%.$DOMAIN.}"
done
log '[G[K'
QUERYING_TIME_END=$($DATE +%s 2>/dev/null)
QUERYING_TIME=$(( ${QUERYING_TIME_END:-0} - ${QUERYING_TIME_START:-0} ))
unset QUERYING_TIME_START QUERYING_TIME_END

log '\n* Processing the results ... \n'
RRS_COLLECTED=$(( ${#RRS[*]} + ${#NS[*]} + 1 ))
saved_IFS="$IFS"
IFS="$CR"
ZONE="${RRS[*]}"
IFS="$saved_IFS"
unset RRS saved_IFS

# remove duplicate records (if any)
ZONE=$(echo -n "$ZONE" | LC_ALL=C sort -u)

log "\n* Displaying the resulting zone file:\n===\n"
SOA="@ SOA $SOA"
print_zone "$ZONE"
log '===\n'
log '\n* Statistics:\n'
log "\tunique queries: ${#QUERIES[*]}\n"
log "\t    query rate: $(( ${#QUERIES[*]} / $QUERYING_TIME )).$(( ( ${#QUERIES[*]} % $QUERYING_TIME ) * 100 / $QUERYING_TIME )) per second\n"
log "\tdiscovered RRs: $RRS_COLLECTED\n"
log "\thit/miss ratio: $(( ${RRS_COLLECTED:-0} / ${#QUERIES[*]} )).$(( ( ${RRS_COLLECTED:-0} % ${#QUERIES[*]} ) * 100 / ${#QUERIES[*]} ))\n"
EXEC_TIME_END=$($DATE +%s 2>/dev/null)
EXEC_TIME=$(( ${EXEC_TIME_END:-0} - ${EXEC_TIME_START:0} ))
unset EXEC_TIME_START EXEC_TIME_END
log "\texecution time: $EXEC_TIME second(s)\n"
log '\nCompleted!\n\n'
