; sp1.xm: first part of SPelling error detector/corrector Unicum
; /AJK 13.Sep.82, 26.Sep.82

;    _______
;   |      /
;   |     /
;   |    /    Copyright (c) 1982 by Knowlogy
;   |   //\                         PO Box 283
;   |  //  \                        Wilsonville, Oregon  97070
;   | //    \
;   |//______\


; The sp Unicum is made up of three files:
;   sp.cif -- global definitions
;   sp1.xm -- main program
;   sp2.xm -- correction dialog
;   sp3.xm -- support routines and data
; To re-create the executable program image SP.COM, do the following:
;   xm80 =sp1
;   xm80 =sp2
;   xm80 =sp3
;   l80 sp1,sp2,sp3,lib/s,sp/n/e

; History:
; V1  1.Sep.81: initial distribution of Unica
; V2  1.Oct.82: complete re-write, new dictionary, interactive correction

; Usage: sp file [ -d maindict ] [ -a auxdict ] [ -j jargondict ]
;	    [ -c ] (correct document, default is print mispelled words)
;	    [ -m xxx ] (use "xxx" to mark words, rather than "-->")
;	    [ -h ] [ -b begseq ] [ -e endseq ] (highlight mispelled words)
;	    [ -i nnn ] (ignore words of length nnn or less, default 2)
;	    [ -f format.pattern ] (ignore formats, no default)
;	    [ -o file ] (output to file, default is overwrite input)
;	    [ -t ] (talky)
;	    [ -l nnn ] (set max file line size, default 256)
;	    [ -r ] (ignore auto-replace, always require confirmation)
;	    [ -w ] (ignore WordStar/Runoff format cmnds, same as -f ^\..*$)

	uses LIB2800		; Z80 components
	uses LIB2801		; CP/M components
	uses SP			; sp definitions

	db	13,'SP V2: COPYRIGHT (C) 1982 BY KNOWLOGY',13,10,26,0

; Default highlight begin/end sequences.  These cause reverse video
; on a TRS-80 model II running Pickles&Trout CP/M.  If you have XM-80,
; you might want to change these to something appropriate for your
; terminal.  If you don't have XM-80, you can use DDT to patch these
; locations; enough room has been left in the string for sequences
; of up to eight bytes.  Be sure that the sequence ends in a null.
defbeg::db	'^N',0,0,0,0,0,0,0	; begins at location 012Dh
defend::db	'^O',0,0,0,0,0,0,0	; begins at location 013Bh


; TALK procedure: if -t was specified, do a TPUTF, else do nothing.
; Since the VALS parameter is "stk*", TALK cannot be "forward declared"
; (because of a translator bug), so it appears before its first invocation.
	proc TALK [FORMAT:stk,VALS:stk*]
	entry TALK
	begin
	push	af		; save A and condition codes
	ld	a,(tflg)	; see if -t was specified
	and	a
	jr	z,talk1		; branch if it wasn't
	pop	af		; restore A and condition codes
	jp	TPUTF##		; go do the TPUTF and return
talk1:				; here if -t wasn't specified
	pop	af		; restore A and condition codes
	end TALK		; just return


; Note that "spell" cannot be "sp", because the SP symbol has
; special meaning in MACRO-80.
	entry spell
spell:

; Initialize and begin command scan.
	HEAhea [hl=0100h]		; set up heap and stack
	USKini []			; initialize command scan
	USKflg [hl=flgtbl]		; interpret flags

; Open the input file.
	ld	hl,inch			; assume input from standard input
	ld	(hl),StdIn
	USKgna []->[(infnm)=hl]+C-a	; get input file name
	jr	c,sp1			; if no input file, use standard input
	IO.opn [stk=hl,stk=RO+Text+OldOnly]->[(inch)=a]+C
	jp	c,ioerr			; can't open named file, error
sp1:

; Make sure there are no other filenames.
	USKgna []->[hl]+C-a
	jp	c,sp2
	EPUTF [stk=usage]
	SHLexi [a=1]

usage:	db	'Usage: sp [-chrtw] [-a auxdict] [-b begseq] [-d maindict] '
	db	'[-e endseq]',13,10
	db	'          [-f format.pattern] [-i min.word.length] '
	db	'[-j jargondict]',13,10
	db	'          [-l max.line.length] [-m word.mark] '
	db	'[-o output.file] [file]',13,10,0

; If either -b or -e was set, set -h.
sp2:
	ld	a,(bflg)		; set -h = -b OR -e OR -h
	ld	hl,eflg
	or	(hl)
	ld	hl,hflg
	or	(hl)
	ld	(hl),a

; If -w was set, simulate -f ^\..*$
	ld	a,(wflg)		; set if -w
	and	a
	jr	z,sp4			; if not, skip this
	ld	hl,wsfmt		; use WordStar format
	ld	(fval),hl
	ld	hl,fflg			; see if -f
	ld	a,(hl)
	and	a
	jr	z,sp3			; branch if not
	EPUTF [stk="-f and -w are mutually exclusive^m^j"]
	SHLexi [a=1]
sp3:
	ld	(hl),1			; set -f
sp4:

; If there's a format pattern, compile it.
	ld	hl,0			; assume no format pattern
	ld	(fmtpat),hl
	ld	a,(fflg)		; see if -f or -w was specified
	and	a
	jr	z,sp5			; branch if not
	PATac [hl=(fval),a=1]->[(fmtpat)=hl,a]+C
	jr	nc,sp5			; branch if pattern is ok
	EPUTF [stk="Bad format/typeset command pattern^m^j"]
	SHLexi [a=1]
sp5:

; Open the dictionaries.
	DICTo [hl=(dval),de=128]->[(dch)=a]+C	; open main dictionary
	jp	c,ioerr			; branch on error
	ld	hl,(aval)		; open auxiliary dictionary
	IO.opn [stk=hl,stk=Text+RO+OldOnly]->[(ach)=a]+C
	jp	c,ioerr			; branch on error
	ld	a,(jflg)		; is there a jargon dictionary?
	and	a
	jr	z,sp7			; branch if not
	ld	hl,(jval)		; open jargon dictionary
	IO.opn [stk=hl,stk=Text+RO+OldOnly]->[(jch)=a]+C
	jr	nc,sp7			; branch if file existed
	EPUTF [stk="[Creating jargon dictionary %s]^m^j",stk=hl]
	IO.opn [stk=hl,stk=Text+RO+WO+NewOrOld]->[(jch)=a]+C
	jp	c,ioerr			; branch on error
sp7:

; Allocate the line buffer, work buffer for WRD2800k, and word buffer
	ld	hl,(lval)		; HL = max line length
	inc	hl			; add one for null terminator
	ld	(linlen),hl		; line length
	USKall [hl=(linlen)]->[(linbuf)=hl]
	USKall [hl=(linlen)]->[(wrkbuf)=hl]
	USKall [hl=WrdSiz]->[(wrdbuf)=hl]

; We can now grab the rest of heap space for the internal word list.
; Although we will be opening more files, there will be enough files
; closed first that enough space will be returned to the heap.
	HEAall [de=1024,hl=0FFFFh]->[de,hl]+C
	jp	c,nomem			; "not enough memory" error
	ex	de,hl
	add	hl,de			; HL -> just beyond top of chunk
	ld	(strtop),hl		; that's string space boundary
	ld	(strbot),hl
	ex	de,hl			; get back memory base pointer
	ld	(ptrbot),hl		; that's bottom of pointer list
	ld	de,NULL			; DE -> null string
	ld	(hl),e			; insert it into pointer list
	inc	hl
	ld	(hl),d
	inc	hl
	ld	de,RUBOUT		; DE -> rubout string
	ld	(hl),e			; append it to pointer list
	inc	hl
	ld	(hl),d
	inc	hl			; now HL -> top of pointer list
	ld	(ptrtop),hl		; save top pointer

; Top of loop to make the first pass through the input file.
; Read the next line from that file.
	TALK [stk="[Scanning document]^m^j"]
sp8:
	LIOgl [stk=(inch),stk=(linbuf),stk=(linlen)]->[]+C
	jr	c,sp10			; branch at file EOF
	WRDnew [ix=(linbuf),iy=(wrkbuf),hl=(fmtpat),de=0,bc=trtbl]

; Loop for each word in the line.
; Find its position in the string heap; if it's not already there, insert it.
sp9:
	WRDget [hl=(wrdbuf)]->[hl,de]+C-a
	jr	c,sp8			; branch after last word in line
	ld	hl,(ival)		; see if word is too short
	sbc	hl,de			; (carry is clear from WRDget)
	jr	nc,sp9			; branch if word is short, ignore it
	Lookup [hl=(wrdbuf)]->[de]+C-a	; look up the word in the list
	jr	nc,sp9			; if it's already there, forget it
	Insert [de,hl]			; insert the word into the list
	jr	sp9

; Here when we have a list of all words from the input file.
; Cruise through the main dictionary, marking those words which are not in it.
; Register usage:
;   IX -> next pointer in pointer list
sp10:
	Stat []				; announce memory occupancy stats
	TALK [stk="[Scanning main dictionary: "]
	ld	ix,(ptrbot)		; IX -> bottom pointer
	ld	b,0			; B gets the first letter of words

; Top of loop to read a word from the dictionary.
sp11:
	DICTr [hl=(wrdbuf)]->[]+C-a	; read next word
	jr	c,sp13			; branch when dictionary exhausted
	ld	a,(hl)			; A = first letter of new word
	cp	b			; see if it's unchanged
	jr	z,sp12			; branch if so
	ld	b,a			; it changed, announce new letter
	TALK [stk="%pc",stk=af]
sp12:
	ld	a,1 SHL STgood		; mark word "good"
	Sync [ix,hl,de,a]->[ix]+C-a	; synchronize words
	jr	nc,sp11			; loop if there are more words
sp13:
	TALK [stk="]^m^j"]
	DICTc []			; done with main dictionary

; We're done with the main dictionary.
; Cruise through the auxiliary dictionary.
; Each auxiliary dictionary word is one of the following:
;   goodword
;   badword!
;   badword!goodword
;   badword!!goodword
; In the first case, "goodword" is known to be spelled correctly.
; In the second case, "badword" is known to be spelled incorrectly,
;   even if it appears in the main dictionary.
; In the third case, "badword" is known to be spelled incorrectly,
;   and its correct spelling is "goodword".
; The fourth case is like the third case, but during spelling correction
;   every occurrence of "badword" is automatically changed to "goodword".

; In these two loops, "wrdbuf" is used to store the first word,
; and "linbuf" is used to store any replacement.
	TALK [stk="[Scanning auxiliary dictionary]^m^j"]
	ld	ix,(ptrbot)		; IX -> bottom of pointer list
sp14:					; top of loop to process words
	AuxWord [a=(ach),hl=(wrdbuf),de=(linbuf)]->[a]+C
	jr	c,sp15			; branch when auxiliary dictionary done
	Sync [ix,hl,de,a]->[ix]+C-a	; process the word
	jr	nc,sp14			; loop while words remain
sp15:

; Similarly, cruise through the jargon dictionary.
; The jargon dictionary has the same format as the auxiliary dictionary.
; Since it is examined last, it overrides the main and auxiliary dictionaries.
	ld	a,(jflg)		; was -j specified?
	and	a
	jr	z,sp17			; branch if not
	TALK [stk="[Scanning jargon dictionary]^m^j"]
	ld	ix,(ptrbot)		; IX -> bottom of pointer list
sp16:					; top of loop to process words
	AuxWord [a=(jch),hl=(wrdbuf),de=(linbuf)]->[a]+C
	jr	c,sp17			; branch when jargon dictionary done
	Sync [ix,hl,de,a]->[ix]+C-a	; process the word
	jr	nc,sp16			; loop while words remain
sp17:

; We've finished scanning the dictionaries.
; Now that we've closed the main dictionary, we can open the output file
; (if any).
	ld	hl,outch		; assume output to standard output
	ld	(hl),StdOut
	ld	a,(oflg)		; was -o specified?
	and	a
	jr	z,sp18			; if not, use standard output
	ld	hl,(oval)		; otherwise open requested file
	IO.opn [stk=hl,stk=WO+Text+Replace]->[(outch)=a]+C
	jp	c,ioerr			; can't open named file, error
sp18:

; If -c was specified, we now enter an interactive correction dialog,
; otherwise just write the mispelled words to a file.
	ld	a,(cflg)		; was -c specified?
	and	a
	jr	nz,sp22			; branch if so

; Write mispelled words to the output file.
	TALK [stk="[Writing mispelled word list]^m^j"]
	ld	ix,(ptrbot)		; IX -> bottom of pointer list
sp19:
	inc	ix			; step to next pointer
	inc	ix
	ld	l,(ix+0)
	ld	h,(ix+1)		; HL -> next string
	ld	a,(hl)			; check for terminator string
	cp	07Fh
	jr	z,sp21			; branch when strings exhausted
	ld	d,h			; set DE -> start of string
	ld	e,l
sp20:
	inc	hl			; look for terminator
	bit	7,(hl)
	jr	z,sp20
	bit	STgood,(hl)		; is word correctly spelled?
	jr	nz,sp19			; branch if so, skip this word
	and	a
	sbc	hl,de			; compute length of word
	IO.wri [stk=(outch),stk=de,stk=hl]->[a,hl]+C ; write the word
	jp	c,ioerr			; branch on I/O error
	IO.wri [stk=(outch),stk=CRLF,stk=2]->[a,hl]+C ; carriage return
	jp	c,ioerr
	jr	sp19
sp21:
	SHLexi [a=0]			; all done

; Here to correct the errors.
sp22:
	call	correct##
	SHLexi [a=0]


; Error handlers
ioerr::
	ERRMSG [a,b=1,c=1,hl]
nomem::
	EPUTF [stk="Not enough memory^m^j"]
	SHLexi [a=1]


	end spell
