Wanted: A Find Subroutine

A General Discussion forum for TI calculators

Moderator: MaxCoderz Staff

User avatar
driesguldolf
Extreme Poster
Posts: 395
Joined: Thu 17 May, 2007 4:49 pm
Location: $4080
Contact:

Post by driesguldolf »

calc84maniac wrote:
driesguldolf wrote:
  • If someone can tell me how to recognize a 2 byte token I can support that too
He doesn't want two byte token support, because that's the major slowdown factor in the TI-OS routines.
It never hurts to try, my idea is to 'correct' the offset by rescanning the string and doing +1 but when theres a 2 byte token skip the next byte:

Code: Select all

; Assume hl->start of master string
; Assume bc->offset
	ld de, 0
rescan:
	ld a, (hl)           ; [7]
	inc hl               ; [6]
	; Assuming that all 2byte token identifiers start from a specific value
	cp some_value        ; [7]
	jp nc, onebytetoken  ; [10]
	inc hl               ; [6]
	dec bc               ; [6]
onebytetoken:
	inc de               ; [6]
	; I know this can be faster, but it's just an example
	dec bc               ; [6]
	ld a, b              ; [4]
	or c                 ; [4]
	jp nz, rescan        ; [10]
	                     ; One byte token: 60 tstates
	                     ; Two byte token: 72 tstates
;Plus if there exists such a thing as some_value I can also eleminate false/positive results by checking wether (pointer_to_start_of_substring-1) is a 2 byte token, if it is then it has to get back searching!
If he only uses one byte tokens the overhead will be (say a string with a length of 6144 characters) 60*6144=368,640 tstates, in seconds: 368,640/6,000,000=0.06144 s
Ok, I was expecting a value a little bit higher ^^. Man it gotta suck to have programmed TIOS :mrgreen:

Ouch:
Image
I made a string with 2048 times "ABC" and then I wanted to stresstest my program by letting it search for all instances of "ABC"...
The answer is 2048! NOT 2928!!!

EDIT: tried again and it gives 2920... Please say that it's TIOS's fault...

This is so wrong:
Image


Bug found!
But really theres nothing to celebrate:
driesguldolf wrote: ; Calculate the amount of characters to look for
; = length(parent)-length(child)+1 +1 (the last +1 is a hack, it may ruin everything)
Why does this sound familiar?...

EDIT: all links updated (it turns out I uploaded an empty .zip file when you couldn't download it :oops:)
http://www.mediafire.com/?anheticyxtn
_________________
Former CursedGHOST shall be known as Cryzz.
Ok ok ok... My previous avatar sucked...
To make mistakes is human, to forgive is divine.
Crap char limit...

Minesweeper!
Last edited by driesguldolf on Wed 17 Oct, 2007 6:37 pm, edited 1 time in total.
User avatar
driesguldolf
Extreme Poster
Posts: 395
Joined: Thu 17 May, 2007 4:49 pm
Location: $4080
Contact:

Post by driesguldolf »

Source:

Code: Select all

.NOLIST
#define   EQU   .equ
#define   equ   .equ
#define   END   .end
#define   end   .end
#include "ti83plus.inc"
.LIST

     .org 9D93h
     .db $BB,$6D


;--------------------------------
; Find all neccesary data and validate it

	ld hl, string1obj
	rst 20h                      ; rmov9toop1
	rst 10h                      ; rfindsym
	; If Str1 doesn't exist or it is archived, just quit
	jp c, error_undefined
	ld a, b
	or a
	jp nz, error_archived

	ex de, hl
	ld e, (hl)
	inc hl
	ld d, (hl)
	inc hl
	ld (parentsize), de
	push hl

	B_CALL(_ansname)
	rst 10h                      ; rfindsym
	; If Ans doesn't exist, isn't a string or it got archived somehow :P, quit too
	jp c, error_undefined
	and %00011111
	cp strngobj
	jp nz, error_datatype
	ld a, b
	or a
	jp nz, error_archived

	ex de, hl
	ld e, (hl)
	inc hl
	ld d, (hl)
	inc hl
	ld (childsize), de
	push hl

	; This will also do the error checking for me
	B_CALL(_rclx)

	; the romcall _convop1 fails for negative numbers, so I check it myself
	ld a, (op1)
	rlca
	jp c, amountofinstances

	B_CALL(_convop1)
	ld b, d
	ld c, e
	inc bc
	pop de
	pop hl

;--------------------------------
; Calculate offset

	push hl
calcoffset:

	push bc
	call searchstring
	pop bc
	jr c, err_offset

	dec bc
	ld a, b
	or c
	jr nz, calcoffset

	; searchstring returns the address where the substring was found but we need the offset
	pop de
	sbc hl, de

	jp finalize

err_offset:
	pop hl                       ; Clean the stack
	ld hl, 0
	jp finalize

;--------------------------------
; Find number of occurences

amountofinstances:
	pop de
	pop hl

	ld bc, -1
instloop:
	inc bc

	push bc
	call searchstring
	pop bc

	jr nc, instloop

	ld h, b
	ld l, c

;--------------------------------
; Makes Ans=hl and finishes

finalize:
	B_CALL(_setxxxxop2)
	ld hl, op2
	rst 20h                      ; rmov9toop1
	B_CALL(_stoans)
	ret



;; ==== searchstring ====
;;
;; Searches for the first next occurence of the string pointed at by de (= child string)
;; in the string pointed at by hl (= parent string)
;;
;; Author:
;;  Dries Guldolf (dguldolf@vub.ac.be)
;;
;; Pre:
;;  de = points at child string
;;  hl = points at parent string
;;
;; Post:
;;  c-flag = set if not found, reset otherwise
;;  hl = points at character after occurence (if found)
;;  Interrupts are disabled
;;  Consider af, bc, de, hl, af', bc', de' and hl' destroyed
;;
;; Warning:
;;  Don't include the length prefix in the pointers.
;;  This routine is designed to be able to continue searching where it left of last time.
;;  This is also a bit specific for this situation and may need to be modified for a more general version.
;;

searchstring:
	di
	exx

	; If the child string is larger than the parent string then we will never find it
	ld hl, (parentsize)
	ld de, (childsize)
	or a
	sbc hl, de
	ret c

	; Calculate the amount of characters to look for
	; = length(parent)-length(child)+1  +1 (this is a hack, it may ruin everything)
	inc hl
	push hl
	exx

	push de
	push hl

	; Set Parity/Overflow
	xor a
	ex af, af'

	; Find the first next occurance of the child string in the parent string
searchloop:
	pop hl
	pop de
	pop bc

	; If we run out of characters then we will never find it
	ex af, af'
	ret po

	ld a, (de)
	cpir

	; Save the sate of Parity/Overflow
	ex af, af'

	push bc
	push de
	push hl
	; We found a matching character, now to see if the rest matches too
	dec hl
	ld bc, (childsize)
verifyloop:
	ld a, (de)
	inc de
	cpi
	; If it was a false alarm, just continue from the next index we found the first character
	jp nz, searchloop
	jp pe, verifyloop

	; We found a match!
	; hl points at the next character
	pop hl
	pop de
	pop bc
	; Save the new size
	dec bc
	ld ix, (childsize)
	add ix, bc
	ld (parentsize), ix
	or a
	ret


;================================
; Error handling

; Not sure what to do with the returns, just to be sure... You never know with TI... :P
fatalerror:
error_archived:
	B_CALL(_errinvalid)
	ret
error_undefined:
	B_CALL(_errundefined)
	ret
error_datatype:
	B_CALL(_errdatatype)
	ret


;================================
; Data

string1obj:
	.db strngobj, tvarstrng, tstr1, 0


parentsize:
	.dw 0
childsize:
	.dw 0


.end
Post Reply