; tr.xm: TRanslation Unicum
; /AJK 30.Aug.82, 8.Sep.82

;    _______
;   |      /
;   |     /
;   |    /    Copyright (c) 1982 by Knowlogy
;   |   //\                         PO Box 283
;   |  //  \                        Wilsonville, Oregon  97070
;   | //    \
;   |//______\

	uses LIB2800		; Z80 components
	uses LIB2801		; CP/M components

	db	13,'TR V1: COPYRIGHT (C) 1982 BY KNOWLOGY',13,10,26,0

BigBuf	equ	8192		; disk buffer size (to decrease head latency)
StdOut	equ	1		; standard output channel number
MaxRng	equ	129		; max number of characters in a range +1
CR	equ	13		; carriage return
LF	equ	10		; line feed (newline)


	entry tr
tr:

; Initialize.
	HEAhea [hl=0100h]		; initialize heap
	USKini []			; scan command
	USKflg [hl=flgtbl]		; interpret flags

; Interpret flags and strings.
; If -l, -u, or -z is specified, there are no strings;
; if -d is specified, there is one string;
; otherwise there are two strings.

; -l is equivalent to the two strings "A-Z" "a-z" (which the poor user
; would have to type as "\a-\z" "a-z", there being no way to put an
; upper case letter into a command).
	ld	a,(lflg)		; check for -l
	and	a
	jr	z,tr1			; branch if no -l
	ld	a,(cflg)		; ensure -c -d -s -u -z not given
	ld	hl,dflg
	or	(hl)
	ld	hl,sflg
	or	(hl)
	ld	hl,uflg
	or	(hl)
	ld	hl,zflg
	or	(hl)
	jp	nz,tr22			; bad command if clashing flags
	ld	hl,uctr			; -l = "A-Z" "a-z"
	ld	(strin1),hl
	ld	hl,lctr
	ld	(strin2),hl
	jp	tr5			; go build translation table
tr1:

; -u is equivalent to the two strings "a-z" "A-Z".
	ld	a,(uflg)		; check for -u
	and	a
	jr	z,tr2			; branch if no -u
	ld	a,(cflg)		; ensure -c -d -s -z not given
	ld	hl,dflg
	or	(hl)
	ld	hl,sflg
	or	(hl)
	ld	hl,zflg
	or	(hl)
	jp	nz,tr22			; bad command if clashing flags
	ld	hl,lctr			; -u = "a-z" "A-Z"
	ld	(strin1),hl
	ld	hl,uctr
	ld	(strin2),hl
	jr	tr5			; go build translation table
tr2:

; -z means do no translation, just clear parity bits.
	ld	a,(zflg)		; check for -z
	and	a
	jr	z,tr3			; branch if no -z
	ld	a,(cflg)		; ensure -c -d -p -s not given
	ld	hl,dflg
	or	(hl)
	ld	hl,pflg
	or	(hl)
	ld	hl,sflg
	or	(hl)
	jp	nz,tr22			; bad command if clashing flags
	jp	tr7			; no translation or deletion
tr3:

; -d means get one string and delete from it.
; Get the string, call RANGE to interpret quoted characters and expand
; ranges, then call XLT1 to produce a deletion table.  This table consists
; of 128 bytes; each byte contains 1 if the corresponding ASCII character
; should be deleted, otherwise it contains 0.  The address of the deletion
; table is stored at deltbl.
	ld	a,(dflg)		; check for -d
	and	a
	jr	z,tr4			; branch if no -d
	ld	a,(sflg)		; ensure -s not given
	and	a
	jp	nz,tr22			; bad command if clashing flags
	USKgna []->[(strin1)=hl]+C-a	; get the string
	jp	c,tr23			; syntax error if none
	USKall [hl=MaxRng]->[hl]	; allocate a string for range build
	RANGE [de=(strin1),hl,bc=MaxRng,a=0FFh]->[]+C-a ; translate range
	ex	de,hl
	USKall [hl=128]->[(deltbl)=hl]	; build a deletion table
	XLT1 [de,hl,a=(cflg),c=0FFh]
	ex	de,hl			; free the range
	HEAfre [hl]
	jr	tr7
tr4:

; Anything else means get two strings and translate from them.
	USKgna []->[(strin1)=hl]+C-a	; get first string
	jp	c,tr23			; syntax error if none
	USKgna []->[(strin2)=hl]+C-a	; get second string
	jp	c,tr23

; Here with translation strings in strin1 and strin2.
; Get the two strings, and call RANGE for both of them to interpret
; quoted characters and expand ranges.  Then call XLT2 to produce a
; translation table of 128 bytes, indexed by the "from" character from
; string1 and containing the "to" characters from string2.  The address
; of the translate table is stored at trtbl.  If -s (squeeze) is specified,
; build a squeeze table from string2.  The squeeze table is similar to the
; deletion table in that it contains 128 bytes, each of which is nonzero
; if the corresponding ASCII character is to be squeezed.  The address
; of the squeeze table is stored at sqtbl.
tr5:
	USKall [hl=MaxRng]->[(rng1)=hl]	; allocate first range string
	RANGE [de=(strin1),hl,bc=MaxRng,a=0FFh]->[]+C-a ; translate range
	USKall [hl=MaxRng]->[(rng2)=hl]	; allocate second range string
	RANGE [de=(strin2),hl,bc,a=0FFh]->[]+C-a ; translate range
	USKall [hl=128]->[(trtbl)=hl]	; allocate translate table
	XLT2 [ix=(rng1),iy=(rng2),hl,a=(cflg),c=0FFh] ; make translate table
	HEAfre [hl=(rng1)]		; free first range
	ld	a,(sflg)		; are we squeezing?
	and	a
	jr	z,tr6			; branch if not
	USKall [hl=128]->[(sqtbl)=hl]	; squeezing, allocate squeeze table
	XLT1 [de=(rng2),hl,a=0,c]	; compile squeeze table
tr6:
	HEAfre [hl=(rng2)]		; discard second range string
	ld	a,1			; set "trflg"
	ld	(trflg),a

; Here when flag translation and table building are done.
; If -p was specified, change pmask from 0 to 080h.
; The formula "(character AND (pmask)) OR new.character" is used to
; retain parity (if desired) in the new character.
tr7:
	ld	a,(pflg)		; see if -p was specified
	and	a
	jr	z,tr8			; branch if not
	ld	a,080h			; change pmask to 080h
	ld	(pmask),a
tr8:

; Supply a default argument of "-".
	USKdef [stk="-",stk=0]		; default command is "tr -"
	IO.sts [a=StdOut]->[a,b,hl]+C	; get status of standard output channel
	and	(1 SHL StDsk)		; see if channel 1 is a disk file
	ld	hl,128			; assume it isn't: small I/O buffer
	jr	z,tr9			; branch if non-disk device
	ld	hl,BigBuf		; disk device, use large I/O buffer
tr9:					; here with buffer size in HL
	ld	(bufsiz),hl		; remember buffer size
	USKall [hl]->[(bufadr)=hl]	; allocate the buffer

; Top of loop for each file.
; Open the file.
tr10:
	USKtnc [stk=(nflg),stk=(vflg)]->[hl=stk,bc=stk,de=stk,(inch)=a]+C
	jp	c,tr21			; branch when files exhausted

; Top of loop for each buffer-full.
tr11:
	IO.rea [stk=(inch),stk=(bufadr),stk=(bufsiz)]->[a,hl]+C
	jr	nc,tr12			; branch if no I/O error
	ERRMSG [a,b=1,c=1,hl]		; announce error and abort program
tr12:
	ld	a,h			; check for EOF
	or	l			; signaled by zero bytes read
	jr	z,tr10			; branch if EOF, go do next file

; Examine each character in the buffer, deleting, translating, and squeezing
; as indicated, and write characters to the output stream.
; Register use:
;   IX -> character being examined
;   BC = number of characters yet to examine, including current one
;   DE = character with parity cleared, range 0-127 (D is always 0)
	ld	ix,(bufadr)
	ld	b,h
	ld	c,l

; Top of per-byte loop.  Examine next character.
tr13:
	ld	a,(ix+0)		; A = next character
	and	07Fh			; clear parity for translating
	ld	e,a			; put character into DE
	ld	d,0			;   for table indexing

; If character is carriage return and -r was specified, skip it.
	cp	CR			; check for carriage return
	jr	nz,tr14			; branch if it isn't
	ld	a,(rflg)		; was -r specified?
	and	a
	jr	nz,tr20			; if so, skip this character
tr14:

; See if we're translating.  If so, xlt the character.
	ld	a,(trflg)
	and	a
	jr	z,tr15			; branch if not translating
	ld	hl,(trtbl)		; HL -> translate table
	add	hl,de
	ld	e,(hl)			; DE = translation
	jr	tr16			; possibly squeeze character, then
					; possibly restore parity, then
					; output character
tr15:

; Not translating, see if we're deleting.
; If so, see whether to output character.
	ld	a,(dflg)		; see if we're deleting
	and	a
	jr	z,tr17			; branch if not, just write character
	ld	hl,(deltbl)		; HL -> deletion table
	add	hl,de
	ld	a,(hl)			; A is nonzero if we should delete
	and	a			; should we delete?
	jr	nz,tr20			; branch if so, skip character
	jr	tr17			; otherwise go output character

; Here with a parity-less character in E and its parity at (ix+0).
; See if the character should be squeezed out.
tr16:
	ld	a,(prev)		; is new character same as previous?
	cp	e
	jr	nz,tr17			; branch if it's not the same
	ld	a,(sflg)		; it's the same, are we squeezing?
	and	a
	jr	z,tr17			; branch if not
	ld	hl,(sqtbl)		; see if we're squeezing this char
	add	hl,de
	ld	a,(hl)
	and	a
	jr	nz,tr20			; branch if so, don't output char

; We've decided to output the character.
; Remember the previous character.
tr17:
	ld	hl,prev			; store character at prev
	ld	(hl),e

; If character is newline and -r was specified, precede it by carriage return.
	ld	a,e
	cp	LF			; check for line feed
	jr	nz,tr19			; branch if it isn't
	ld	a,(rflg)		; see if -r was specified
	and	a
	jr	z,tr18			; branch if not, just put line feed
	LIOpc [stk=StdOut,a=CR]		; precede linefeed with carriage return
tr18:
	ld	a,LF			; put a line feed
tr19:

; If -p was specified, restore character's parity.
	ld	a,(pmask)		; A = parity mask (80 if -p else 0)
	and	(ix+0)			; A = parity bit if -p
	or	e			; A = character to write

; Write the character.
	LIOpc [stk=StdOut,a]

; Advance to the next character.
tr20:
	inc	ix			; advance buffer pointer
	dec	bc			; decrement buffer count
	ld	a,b			; loop for all characters
	or	c
	jr	nz,tr13

; Done with this buffer, do the next.
	jp	tr11

; Here when all done.
tr21:
	SHLexi [a=0]

; Here if a flag clash is detected.
tr22:
	EPUTF [stk="tr: conflicting flags^M^J"]
	SHLexi [a=1]

; Here if some other bad command was given.
tr23:
	EPUTF [stk="tr: usage: tr [-cdnlprsuvz] [string [string] ] [wild ...]^M^J"]
	SHLexi [a=1]


; Flag table.
; The following flag inconsistencies apply and are checked for.
; Flag  Clashing flags
;  -c : -l -u -z
;  -d : -l -s -u -z
;  -l : -c -d -s -u -z
;  -p : -z
;  -s : -d -l -u -z
;  -u : -c -d -l -s -z
;  -z : -c -d -l -p -s -u
flgtbl:
	db	'c',0,0,0,0,0,0,0,0,0,0,0,0
cflg:	dw	0
	db	'd',0,0,0,0,0,0,0,0,0,0,0,0
dflg:	dw	0
	db	'l',0,0,0,0,0,0,0,0,0,0,0,0
lflg:	dw	0
	db	'n',0,0,0,0,0,0,0,0,0,0,0,0
nflg:	dw	0
	db	'p',0,0,0,0,0,0,0,0,0,0,0,0
pflg:	dw	0
	db	'r',0,0,0,0,0,0,0,0,0,0,0,0
rflg:	dw	0
	db	's',0,0,0,0,0,0,0,0,0,0,0,0
sflg:	dw	0
	db	'u',0,0,0,0,0,0,0,0,0,0,0,0
uflg:	dw	0
	db	'v',0,0,0,0,0,0,0,0,0,0,0,0
vflg:	dw	0
	db	'z',0,0,0,0,0,0,0,0,0,0,0,0
zflg:	dw	0
	db	0

uctr:	db	'A-Z',0		; upper case conversion (eqv to '\a-\z')
lctr:	db	'a-z',0		; lower case conversion

trflg:	db	0		; non-zero means translation active
pmask:	db	0		; mask to AND against parity bit
prev:	db	0FFh		; previous output character (w/o parity)

inch:	ds	1		; input channel
bufadr:	ds	2		; location of buffer
bufsiz:	ds	2		; length of buffer
strin1:	ds	2		; -> first command string
strin2:	ds	2		; -> second command string
rng1:	ds	2		; -> range expansion of strin1
rng2:	ds	2		; -> range expansion of strin2
deltbl:	ds	2		; -> deletion table
trtbl:	ds	2		; -> translation table
sqtbl:	ds	2		; -> squeeze table

	end tr
