########### msgdata.tcl
# Routines (part of main program) for dealing with DATA.
#
# This file is part of SAUCE, a very picky anti-spam receiver-SMTP.
# SAUCE is Copyright (C) 1997-1999 Ian Jackson
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software Foundation,
# Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 
#
# $Id: msgdata.tcl,v 1.33 1999/10/09 13:30:23 ian Exp $

# state variables used during message header processing:
# bypass            we have only bypass and/or admin addresses, forego many checks
# admin             we have only admin addresses, forego even more checks
# late_bounce       `late-bounce': we're going to refuse the message
#                    after the end of the body (and discard the body)
# header            accumulated header text
# currenthn         name of header field we're currently in
# currenthl         value of header field we're currently in, including name
# originators       list of dequoted originator addresses (checked and unchecked)
# originators_unchk list of dequoted as-yet-unchecked originator addresses
# mid               value of Message-ID header field
# resentmid         value of (last) Resent-Message-ID header field (unset => none)
# resentany         1 => we have had a Resent- field, unset => we haven't
# msgcomplete       we have received and copied whole message, just
#                   want to finish verifications
#
# While receiving the message data, we are either:
# * Receiving the header:
#     No DATA has been issued to the local MTA; the header info (such
#     as we need) is accumulating in our VM.  We may have address
#     verification threads outstanding.  In this state late_bounce
#     indicates whether we will want to accept or reject the message
#     when we've seen the headers.
#     Functions called in this state have names starting hdr_...
# * Receiving the body, thinking we will accept the message:
#     DATA has been issued to the local MTA and the header and some
#     of the body has been sent to it.
#     Functions called in this state have names starting body_...
# * Receiving the body, knowing we will reject the message:
#     The local MTA has either not been sent DATA (in which case it
#     will need an RSET), or was closed because we had sent it some
#     of the body or header and must now be reopened.
#     Functions called in this state have names starting bodyrej_...
# Functions callable in any of these states have names starting msg_...
# Functions callable in any state except `body' have names starting
# msgnoa_... (for `no accept').
#
# We finally accept a message (sending final-. to our MTA) when we've
# received and copied the whole message {[info exists
# state(msgcomplete)]}, and have no currently-unchecked originators or
# verifications-in-progress {![info exists state(avfid)] && ![llength
# $state(originators_unchk)]}.

thread_chainproc ic askfordata_done {} {
    set state(header) {}
    set state(currenthn) {}
    set state(currenthl) {}
    set orig {}
    if {[string length $state(mf_dm)]} {
	lappend orig $state(mf_lp)@$state(mf_dm)
    }
    set state(t_w) $orig
    set state(originators) $orig
    set state(originators_unchk) {}
    set state(bypass) [expr {
	([info exists state(a_bypass)] ||
	 [info exists state(a_admin)]) &&
	![info exists state(a_normal)]
    }]
    set state(admin) [expr {
	[info exists state(a_admin)] &&
	![info exists state(a_bypass)] &&
	![info exists state(a_normal)]
    }]
    chanset_hide $state(chan) 3 1
    threadio_gets ic $id $state(chan) hdr_read {}
}

# Header parsing ...

thread_chainproc ic hdr_read {data} {
    global max_header_size blacklist_message require_messageid
    regexp {^.*} $data data
    ic_msg_checkeof
    if {"$data" == "."} { set eom 1 } else { set eom 0 }
    debug 3 "hdr_read >$data<"
    if {[regexp {^[ \t]} $data]} {
	if {!$state(bypass) && ![string length $state(currenthn)]} {
	    ic_msgnoa_reject 0 "554 First line of header was header field continuation"
	    return
	}
	append state(currenthl) "\n$data"
	if {[string length $state(currenthl)] > $max_header_size} {
	    ic_msgnoa_reject 0 "554 header line too large (>$max_header_size bytes)"
	    return
	}
    } elseif {[regexp {^([\041-\071\073-\176\241-\376]+)[ \t]*:} $data all newfn]} {
	ic_hdr_process1 $state(currenthn) $state(currenthl)
	set state(currenthn) $newfn
	set state(currenthl) $data
    } elseif {$eom || ![string length $data] || $state(bypass)} {
	# End of headers (or end of parseable headers and we're bypassing).
	ic_hdr_process1 $state(currenthn) $state(currenthl)
	set state(currenthn) {}
	set state(currenthl) {}
	if {!$eom} { append state(header) "$data\n" }
	if {[info exists state(no)]} {
	    if {$state(admin)} {
		ic_msg_accept $eom
	    } else {
		set state(whyreject) $state(no)
		ic_msgnoa_reject $eom "550 $blacklist_message"
	    }
	} elseif {!$state(bypass) && ![llength $state(originators)]} {
	    ic_msgnoa_reject $eom "554 No originators in envelope or body"
	} elseif {!$state(bypass) && $require_messageid &&
	          ![info exists state(mid)]} {
	    ic_msgnoa_reject $eom "554 No Message-ID header"
	} elseif {!$state(bypass) && $require_messageid &&
		  [info exists state(resentany)] &&
		  ![info exists state(resentmid)]} {
	    ic_msgnoa_reject $eom \
		    "554 Resent- header(s), but no Resent-Message-ID"
	} elseif {[info exists state(late_bounce)]} {
	    ic_msgnoa_reject $eom $state(late_bounce)
	} else {
	    ic_msg_accept $eom
	}
	return
    } else {
	ic_msgnoa_reject 0 "554 Header data malformed"
	return
    }
    threadio_gets ic $id $state(chan) hdr_read {}
}

thread_subproc ic msg_checkeof {} {
    global canonical_hostname
    if {[eof $state(chan)]} {
	catch_close_cleardesc $state(mtachan)
	ic_commandfinalresponse major \
		"421 $canonical_hostname Connection dropped in message data"
	return -code return
    }
}

thread_subproc ic hdr_process1 {hn hl} {
    global blacklist_message max_header_size errorInfo
    if {![string length $hn]} {
	return
    }
    debug 3 "hdr_process1 >$hn|$hl<"
    append state(header) "$hl\n"
    set lowerhn [string tolower $hn]
    if {[string length $state(header)] > $max_header_size} {
	ic_msgnoa_reject 0 "554 header too large (>$max_header_size bytes)"
	return -code return
    }
    regsub {^[^:]+:[ \t]*} $hl {} hl
    if {[regexp -nocase {^resent-(from|reply-to|sender|message-id|to|cc|bcc|date)$} \
	    $hn]} {
	set state(resentany) 1
    }
    if {[regexp -nocase {^message-id$} $hn]} {
	if {!$state(bypass) && [info exists state(mid)]} {
	    ic_msgnoa_reject 0 "554 Message-ID header appears twice"
	    return -code return
	}
	regsub -nocase {^message-id:[ \t\n]*} $hl {} state(mid)
    } elseif {[regexp -nocase {^resent-message-id$} $hn]} {
	regsub -nocase "^resent-message-id:\[ \t\n\]*" $hl {} state(resentmid)
    }
    if {$state(bypass)} return
    if {[regexp -nocase {^(resent-)?(from|sender|reply-to)$} $hn]} {
	if {[catch {
	    ic_hdr_recipients $lowerhn $hl
	} emsg]} {
	    debug 3 "header error >$emsg|$errorInfo<"
	    ic_msgnoa_reject 0 "554 error in $hn header: $emsg"
	    return -code return
	}
    }
    if {!$state(admin) && [info exists headerblacklistres($lowerhn)]} {
	foreach re $headerblacklistres($lowerhn) {
	    if {[info exists state(add_bl)]} { break }
	    if {[catch {
		if {[regexp -- $re $hl]} {
		    set state(add_bl) "Blacklisted `$hn'"
		    msg_bl_late_bounce "Blacklisted `$hn'"
		}
	    } emsg]} {
		log error "blacklist regexp failure ($re): $emsg"
	    }
	}
    }
}

thread_subproc ic msg_bl_late_bounce {why} {
    # Do not call this if admin is 1.
    global blacklist_message
    if {![info exists state(late_bounce)]} {
	set state(whyreject) $why
	set state(late_bounce) "554 $blacklist_message"
    }
}

thread_subproc ic hdr_recipients {lowerhn tf} {
    debug 3 "hdr_recipients >$lowerhn|$tf<"
    set colev 0
    set uq {}
    while {[string length $tf]} {
	debug 3 "hdr_recipients >$lowerhn|$tf|$uq<"
	if {[regexp {^[ \n\t]+(.*)$} $tf all tf]} {
	} elseif {[regexp {^\((.*)$} $tf all tf]} {
	    incr colev
	} elseif {$colev} {
	    if {[regexp {^\)(.*)$} $tf all tf]} {
		incr colev -1
	    } elseif {[regexp {^[^\\\n()]+(.*)$} $tf all tf]} {
	    } elseif {[regexp {^\\.(.*)$} $tf all tf]} {
	    } elseif {[regexp {^\n(.*)$} $tf all tf]} {
	    } else {
		error "invalid text in comment"
	    }
	} elseif {[regexp \
 {^([\055\041\043-\047\051-\053\057-\071\075\077\101-\132\136-\176\200-\376]+)(.*)} \
                   $tf all xt tf]} {
	    binary scan $xt H* xt
            append uq $xt
	} elseif {[regexp {^([][()<>@,;:\.])(.*)} $tf all xt tf]} {
            append uq $xt
	} elseif {[regexp {^"(.*)$} $tf all tf]} {
	    while {[regexp {^([^"\\\n]+)(.*)$} $tf all qt tf] || \
		   [regexp {^\\(.)(.*)$} $tf all qt tf] || \
		   [regexp {^\\(\n)(.*)$} $tf all qt tf]} {
	        binary scan $qt H* qt
	        append uq $qt
	    }
	    if {![regexp {^\"(.*)$} $tf all tf]} {
		error "missing end of quoted string"
	    }
	} else {
	    error "invalid data"
	}
    }
    append uq ,
    while {[string length $uq]} {
	debug 3 "hdr_recipients >$uq<"
	if {[regsub {^[0-9a-f.]+:([][0-9a-f.@,<>]*);} $uq {\1,} uq]} {
	} elseif {[regexp {^[0-9a-f.]*<([][0-9a-f.@]+)>,(.*)} $uq all ras uq]} {
	    regsub {^(@[][0-9a-f.]:)*} $ras {} ras
	    ic_hdr_1recipient $ras
	} elseif {[regexp {^([][0-9a-f.@]+),(.*)} $uq all ras uq]} {
	    ic_hdr_1recipient $ras
	} elseif {[regexp {^,(.*)} $uq all uq]} {
	} else {
	    error "invalid syntax"
	}
    }
}

thread_subproc ic hdr_1recipient {ras} {
    debug 3 "hdr_1recipient >$ras<"
    if {![regexp {^([0-9a-f.]+)@([][0-9a-f.]+)$} $ras all lp dm]} {
	error "invalid address"
    }
    regsub -all {\.} $lp 2e lp
    regsub -all {\.} $dm 2e dm
    set lp [binary format H* $lp]
    set dm [binary format H* $dm]

    lappend state(originators) $lp@$dm
    lappend state(originators_unchk) $lp@$dm
    ic_msg_origverify
}

# Originator verification ...

thread_subproc ic msg_origverify {} {
    global blacklist_message
    # This function is called in approximately two states:
    # - during header processing, when it handles replies wrt one originator
    # and simply queues the next, if none have already been queued.  It does
    # not pass flow of control.
    # - after message body copy, when it ensures continuity of flow of control,
    # by either specifying a caller of itself as a callback or by calling
    # one of the message completion processing functions itself.
    debug 3 "originators_unchk >$state(originators_unchk)<"
    if {!$state(admin)} {
	foreach check $state(originators) {
	    if {"[ds_get addr-list $check]" == "black"} {
		ic_msg_bl_late_bounce "Blacklisted originator address `$check'"
	    }
	}
    }
    debug 3 "originators_unchk checking"
    while {[llength $state(originators_unchk)]} {
	set this [lindex $state(originators_unchk) 0]
	set neworgunchk [lreplace $state(originators_unchk) 0 0]
	if {$state(admin)} {
	    # Do not do any verification
	} elseif {![regexp {^(.*)@([^@]+)$} $this all lp dm]} {
	    set state(late_bounce) "554 Syntax error in `$hn' address $lp@$dm"
	} elseif {![info exists state(late_bounce)]} {
	    switch -exact -- [ds_get addr-list $this] {
		white - whitesoon {
		}
		black {
		    ic_msg_bl_late_bounce "Blacklisted address `$this' in headers"
		}
		default {
		    if {[info exists state(avfid)]} { return }
		    set state(avfid) [thread_start avf \
			    "$state(desc) / verify $lp@$dm" $lp $dm]
		    set state(originators_unchk) $neworgunchk
		    thread_join ic $id avf $state(avfid) \
			    msg_origverify_ok msg_origverify_err $lp@$dm
		    return
		}
	    }
	}
	set state(originators_unchk) $neworgunchk
    }
    debug 3 "originators >$state(originators)<"
    # originators verified.
    if {[info exists state(late_bounce)]} {
	if {[info exists state(avfid)]} {
	    thread_cancel avf $state(avfid)
	    unset state(avfid)
	}
    }
    if {![info exists state(msgcomplete)]} { return }
    if {[info exists state(avfid)]} { return }
    debug 2 "originators verified, all complete $state(originators)"
    # OK, we have whole message.  There is no pending timeout or verification
    # thread, and are no unchecked originators.  So, we can accept or reject it !
    if {[info exists state(late_bounce)]} {
	ic_body_reject 1 $state(late_bounce)
    } else {
	chanset_hide $state(mtachan) 1 1
	threadio_commandresponse ic $id $state(mtachan) "." {} body_finish_ok {}
    }
}

thread_chainproc ic msg_origverify_ok {addr ok message} {
    unset state(avfid)
    if {$ok} {
	lappend state(t_w) $addr
    } else {
	set state(late_bounce) "550 address $addr: $message"
    }
    ic_msg_origverify
}

thread_chainproc ic msg_origverify_err {addr message} {
    unset state(avfid)
    ic_msg_origverify
}

# Accepting the message body ...

thread_subproc ic msg_accept {eom} {
    threadio_commandresponse ic $id $state(mtachan) data {} body_data_ok {} $eom
}

thread_chainproc ic body_data_ok {eom data} {
    global add_received
    if {[regexp {^3[0-9][0-9]} $data]} {
	chanset_hide $state(mtachan) 1 3
	set hdrdata {}
	if {$add_received} {
	    regsub {^.Name\: } {$Name: debian_version_0_5_0 $} {} rcsinfo
	    if {![regexp {^[ $]*$} $rcsinfo]} {
		regsub {^debian_version_} $rcsinfo v rcsinfo
		regsub -all _ $rcsinfo . rcsinfo
	    } else {
		regsub {^.Revision\: } {$Revision: 1.33 $} r rcsinfo
	    }
	    regsub {[ $]*$} $rcsinfo {} rcsinfo
	    set now [clock seconds]
	    set date [clock format $now -gmt true -format {%d %b %Y %T +0000 (GMT)}]
	    if {[string length $state(ident)]} {
		set ident " ident $state(ident)"
	    } else {
		set ident ""
	    }
	    append hdrdata \
"Received: from $state(rh) (\[$state(ra)\])$ident
	  by $state(lh) (SAUCE $rcsinfo)
          with $state(smtpstyle) id sauce-id$id-rp$state(rp); $date\n"
	}
	append hdrdata "$state(header)"
	threadio_puts ic $id $state(mtachan) $hdrdata body_copy {} $eom
    } else {
	ic_body_reject $eom $data
    }
}

thread_chainproc ic body_copy {eom} {
    if {[info exists state(late_bounce)]} {
	ic_body_reject $eom $state(late_bounce)
    } elseif {!$eom} {
	threadio_gets ic $id $state(chan) body_read_ok {}
    } else {
	ic_body_eom
    }
}

thread_chainproc ic body_read_ok {data} {
    ic_msg_checkeof
    if {"$data" == "."} {
	ic_body_eom
    } else {
	threadio_puts ic $id $state(mtachan) "$data\n" body_copy {} 0
    }
}

thread_subproc ic body_eom {} {
    set state(msgcomplete) 1
    ic_msg_origverify
}

thread_subproc ic body_mid {} {
    if {[info exists state(resentmid)]} {
	return $state(resentmid)
    } elseif {[info exists state(mid)]} {
	return $state(mid)
    } else {
	return "(No Message-ID)"
    }
}

thread_subproc ic body_minfo {mid} {
    set sender "$state(mf_lp)@$state(mf_dm)"
    if {"$sender" == "@"} { set sender {<>} }
    return "$mid from $sender"
}    

thread_chainproc ic body_finish_ok {data} {
    global addr_whitelist_delay addr_whitelist_timeout addr_verified_timeout
    global site_whitelist_delay site_whitelist_timeout site_verified_timeout
    set mid [ic_body_mid]
    set minfo [ic_body_minfo $mid]
    if {![regexp {^2..} $data]} {
	ic_commandresponse major $data
	ic_msg_resetvars
	return
    }
    if {![info exists state(no)]} {
	# We don't make whitelist entries for things which
	# bypassed all the usual checks by being `admin', and
	# should have been rejected.
	foreach as {addr site} itlist [list $state(t_w) [list $state(ra)]] {
	    # Furthermore, if this bypassed the header checks then
	    # we set the addr state to `verified' and not `white',
	    # and don't set the site state at all.
	    if {!$state(bypass)} {
		set slu [list whitesoon [set ${as}_whitelist_delay] \
			white [set ${as}_whitelist_timeout]]
		set slw [list white [set ${as}_whitelist_timeout]]
	    } elseif {"$as" == "addr"} {
		set slu [list verified [set ${as}_verified_timeout]]
		set slw {}
	    } else {
		continue
	    }
	    foreach it $itlist {
		set st [ds_get $as-list $it]
		switch -exact -- $st {
		    unknown - verified { set sl $slu }
		    white { set sl $slw }
		    default { set sl {} }
		}
		if {![llength $sl]} continue
		if {[catch {
		    if {"[lindex $sl 0]" != "$st"} {
			eval [list setstate $as $it "$mid $data"] $sl
		    } else {
			eval [list ds_set $as-list $it] $sl
		    }
		} emsg]} {
		    log error \
			    "cannot create whitelist entry for $as $it: $emsg"
		}
	    }
	}
    }
    ic_commandresponse delivery $data
    manyset [ic_getsiteannoy 0] cannoy cannoydesc
    log notice \
 "accepted $minfo via $state(rh) [string tolower $cannoydesc]=${cannoy}ms $data"
    ic_msg_resetvars
}

thread_chainproc ic body_reject {eom why} {
    ic_body_reject $eom $why
}    

thread_subproc ic body_reject {eom why} {
    catch_close_cleardesc state(mtachan)
    ic_msgnoa_reject $eom $why
}

thread_subproc ic msgnoa_reject {eom why} {
    global blacklist_all_addresses always_blacklist_site
    global addr_blacklist_timeout site_blacklist_timeout
    if {[info exists state(add_bl)]} {
	global chan_desc
	manyset $chan_desc($state(chan)) cd cdhi cdho
	set add_bl $state(add_bl)
	set anyadded 0
	log notice "$cd blacklisting because $add_bl"
	foreach try $state(originators) {
	    set st [ds_get addr-list $try]
	    switch -exact -- $st {
		unknown - whitesoon - verified {
		    log notice "$cd ... originator $try ... blacklisting"
		    set thread [thread_start notifybl \
			    "$state(desc) / notify-bl $try" \
			    $try "Originator address `$try'" $state(add_bl)]
		    thread_join {} {} notifybl $thread addbl_done addbl_err \
			    $state(desc) $try "addr $try"
		    setstate addr $try $add_bl black $addr_blacklist_timeout
		    set anyadded 1
		    if {!$blacklist_all_addresses} { break }
		}
		black {
		    log notice "$cd ... originator $try ... refreshing blacklist"
		    setstate addr $try $add_bl black $addr_blacklist_timeout
		    set anyadded 1
		}
		white {
		    log notice "$cd ... originator $try ... is whitelisted"
		}
		default {
		    log error "$cd, originator $try, unknown state $st"
		}
	    }
	}
	if {!$anyadded} {
	    log notice "$cd ... unable to blacklist by originator ..."
	}
	if {!$anyadded || $always_blacklist_site} {
	    set sitename $state(rh)
	    set siteaddr $state(ra)
	    set st [ds_get site-list $siteaddr]
	    switch -exact -- $st {
		unknown - whitesoon - verified {
		    log notice "$cd ... site $siteaddr ... blacklisting"
		    set thread [thread_start notifybl \
			    "$state(desc) / notify-bl-site $siteaddr" \
			    "postmaster@$state(cmdomain)" "Calling address `$siteaddr'" \
			    $state(add_bl)]
		    thread_join {} {} notifybl $thread addbl_done addbl_err \
			    $state(desc) postmaster@$state(cmdomain) "site $siteaddr"
		    setstate site $siteaddr $add_bl black $site_blacklist_timeout
		    set anyadded 1
		}
		black {
		    log notice "$cd ... site $siteaddr ... refreshing blacklist"
		    setstate site $siteaddr $add_bl black $site_blacklist_timeout
		    set anyadded 1
		}
		white {
		    log notice "$cd ... site $siteaddr ... is whitelisted"
		}
		default {
		    log error "$cd, site $siteaddr, unknown state $st"
		}
	    }
	}
	if {!$anyadded} {
	    log notice "$cd ... unable to blacklist !"
	    debug 1 "ra $state(ra)\noriginators $state(originators)\n$state(header)"
	}
    }
    ic_msg_resethdrvars
    if {$eom} {
	ic_bodyrej_response $why
    } else {
	threadio_gets ic $id $state(chan) bodyrej_read {} $why
    }
}

thread_chainproc ic bodyrej_read {why data} {
    ic_msg_checkeof
    if {"$data" == "."} {
	ic_bodyrej_response $why
    } else {
	threadio_gets ic $id $state(chan) bodyrej_read {} $why
    }
}

thread_subproc ic bodyrej_response {what} {
    if {[info exists state(mtachan)]} {
	threadio_commandresponse ic $id $state(mtachan) rset {^2..} \
		msg_resetmta_ok {} $what
    } else {
	ic_mtachan_open
	threadio_commandresponse ic $id $state(mtachan) {} {^220} \
		msg_resetmta_greeting_ok {} $what
    }
}

thread_chainproc ic msg_resetmta_greeting_ok {what data} {
    threadio_commandresponse ic $id $state(mtachan) \
	    "$state(helocmd) $state(helostring)" {^2..} msg_resetmta_ok {} $what
}

thread_chainproc ic msg_resetmta_ok {what data} {
    ic_commandresponse major $what
    ic_msg_resetvars
}

proc addbl_done {desc what where okcode} {
    log notice "$desc: blacklist notification sent to $where: $what: $okcode"
}

proc addbl_err {desc what where message} {
    log notice "$desc: blacklist notification to $where failed: $what: $message"
}

# General cleanup functions ...

thread_subproc ic msg_resetvars {} {
    catch { thread_cancel avf $state(avfid) }
    ic_msg_resethdrvars
    foreach x {
	a_admin a_normal a_bypass add_bl no bypass admin late_bounce defer
	t_w avfid mid resentmid resentany mf_lp mf_dm mf_parms msgcomplete
    } {
	catch { unset state($x) }
    }
    set state(att_rcpts) {}
}

thread_subproc ic msg_resethdrvars {} {
    foreach x {header currenthn currenthl} { catch { unset state($x) } }
}
