It's got too big to put it on a forum. Actually, I tried to do a tokenizer so it figures out all things including numbers and strings. But after figuring things out it outputs only things that relevant to this task (variables, subs/user functions, labels).
Uses keyword list etc from Rod's entry.
Tested on biggest program I've ever seen in LB - freeform404.bas ;)
'variable challenge'tsh73, Jan 2015global t.maxTokens, t.name, t.type, tp.line, tp.stmnt, tp.num
global curTokenNum, curTokNum, nLines, curStmntNum, contLine
[nonUsedLabel] dummy=1:dummy2=dummy 'non-used line with a (:)
unDimmed(3)=1'example of un-dimmed arraygosub[setLists]'fname$="vars01.bas"'fname$="vars02.bas"'fname$="test.bas"filedialog"Select file to process";chr$(0);"open","*.bas", fname$
if fname$=""thenprint"No file selected - bye":endprint"Processing "; fname$
t.maxTokens=100000
t.name=0
t.type =1dim token$(t.maxTokens,1)
tp.line=0
tp.stmnt=1
tp.num=2dim tokenPos(t.maxTokens,2)
curTokenNum=0'global
curTokNum=0'in a statement (that is between (:))
verbose =0'1open fname$ forinputas#1
nLines=1
isContinuation=0print"Reading parsing line by line..."
t0=time$("ms")whilenot(eof(#1))
nLines=nLines+1lineinput#1, aLine$
if verbose thenprint">"; aLine$
curStmntNum=1
curTokNum=0if isContinuation thenif contLine=0then contLine = nLines-1
isContinuation=0else
contLine=0endifwhile1scan'reading a line splitting it to tokens'1) skip all starting spaces
aLine$=remStartSpaces$(aLine$)'exit if nothing leftif aLine$=""thenexitwhile'print ":"; aLine$'2) check special sequences''commentifleft$(aLine$,1)="'"then'skip line as commentif verbose thenprint" comment skipped"exitwhileendif'3) recognize / read / cut tokenselectcase'3.1) check special sequences'[label]caseleft$(aLine$,1)="["
label$=upto$(aLine$,"]")+"]"
aLine$=after$(aLine$,"]")if verbose thenprint" label: ";label$
call storeToken label$,"lbl"'3.2) check special sequences'"string"caseleft$(aLine$,1)=qq$ 'string
aLine$=mid$(aLine$,2)'cut left (")
aString$=qq$+upto$(aLine$,qq$)+qq$
aLine$=after$(aLine$, qq$)if verbose thenprint" string: "; aString$
call storeToken aString$,"str"caseleft$(aLine$,1)="_"andinstr(varChars$,mid$(aLine$,2,1))<>0'winConst
token$=left$(aLine$,1)
aLine$=mid$(aLine$,2)whileinstr(varChars$;"_",left$(aLine$,1))<>0
token$=token$+left$(aLine$,1)
aLine$=mid$(aLine$,2)if aLine$=""thenexitwhilewendif verbose thenprint" Windows constant: ";token$
call storeToken token$,"winConst"caseleft$(aLine$,1)="_"
aLine$=mid$(aLine$,2)'cut tokenif verbose thenprint" continuation char"'call storeToken "_", "_" 'do not store?
isContinuation=1caseleft$(aLine$,1)=":"
aLine$=mid$(aLine$,2)'cut tokenif verbose thenprint" operator separator"call storeToken ":",":"
curStmntNum=curStmntNum+1
curTokNum=0caseleft$(aLine$,1)="|"
aLine$=mid$(aLine$,2)'cut tokenif verbose thenprint" menu separator"call storeToken "|","|"caseleft$(aLine$,1)=","
aLine$=mid$(aLine$,2)'cut tokenif verbose thenprint" parameter separator"call storeToken ",",","caseleft$(aLine$,1)=";"
aLine$=mid$(aLine$,2)'cut tokenif verbose thenprint" concatenation"call storeToken ";",";"caseleft$(aLine$,1)="="
aLine$=mid$(aLine$,2)'cut tokenif verbose thenprint" assignment or equal"call storeToken "=","="caseleft$(aLine$,1)="("
aLine$=mid$(aLine$,2)'cut tokenif verbose thenprint" opening ("call storeToken "(","("caseleft$(aLine$,1)=")"
aLine$=mid$(aLine$,2)'cut tokenif verbose thenprint" closing )"call storeToken ")",")"caseleft$(aLine$,1)="#"'handle
token$=left$(aLine$,1)
aLine$=mid$(aLine$,2)whileinstr(varChars$,left$(aLine$,1))<>0
token$=token$+left$(aLine$,1)
aLine$=mid$(aLine$,2)if aLine$=""thenexitwhilewendif verbose thenprint" handle: ";token$
call storeToken token$,"hndl"caseinstr(firstVarChars$,left$(aLine$,1))<>0'name (var, arr, sub, func) or keyword
token$=left$(aLine$,1)
aLine$=mid$(aLine$,2)whileinstr(varChars$,left$(aLine$,1))<>0 _
or(left$(aLine$,1)="_"andinstr(varChars$,mid$(aLine$,2,1))<>0)
token$=token$+left$(aLine$,1)
aLine$=mid$(aLine$,2)if aLine$=""thenexitwhilewendselectcasecaseinstr(comlist$;typlist$;opelist$," ";lower$(token$);" ")<>0if verbose thenprint" keyword: ";token$
call storeToken token$,"kwrd"'if REMiflower$(token$)="rem"then'commentif verbose thenprint" comment skipped"exitwhileendifcaseelseif verbose thenprint"name (var, array, sub, or func): ";token$
call storeToken token$,"name"endselectcaseinstr(firstNumChars$,left$(aLine$,1))<>0'number?
notANumber=0selectcasecaseinstr(digits$,left$(aLine$,1))<>0
token$=left$(aLine$,1)
aLine$=mid$(aLine$,2)caseinstr("-.",left$(aLine$,1))<>0and IsNumber(left$(aLine$,2))<>0
token$=left$(aLine$,2)
aLine$=mid$(aLine$,3)caseleft$(aLine$,2)="-."and IsNumber(left$(aLine$,3))<>0
token$=left$(aLine$,3)
aLine$=mid$(aLine$,4)caseelse
notANumber=1endselectif notANumber=0then'read rest of a numberwhile IsNumber(token$)<>0
token$=token$+left$(aLine$,1)
aLine$=mid$(aLine$,2)if aLine$=""thenexitwhilewendif IsNumber(token$)=0then'one char extra
aLine$=right$(token$,1)+aLine$
token$=left$(token$,len(token$)-1)endifif verbose thenprint"number: ";token$
call storeToken token$,"num"else'should be single "-" (or not compiles)
token$=left$(aLine$,1)
aLine$=mid$(aLine$,2)if verbose thenprint" operator ";token$
call storeToken token$,"op"endif'should be moved after "numbers" so "-1" does not process as "-" "1"caseinstr("<>+-*/^",left$(aLine$,1))<>0
token$=left$(aLine$,1)
aLine$=mid$(aLine$,2)'cut tokenif verbose thenprint" operator ";token$
call storeToken token$,"op"caseelse
token$=left$(aLine$,1)
aLine$=mid$(aLine$,2)if verbose thenprint"??char: ";token$
call storeToken token$,"???"endselectwendwendclose#1print"----------------"print"nLines=",nLines
print"numTokens=";curTokenNum
printtime$("ms")-t0;" ms"
t0=time$("ms")goto[SkipPrint]for i =1to curTokenNum
print tokenPos(i, tp.line);" ";_
tokenPos(i, tp.stmnt);" ";_
tokenPos(i, tp.num),_
token$(i, t.type),_
token$(i, t.name)next[SkipPrint]'second pass along token$ array, check for next "(" - to tell functions/arraysPrint"second pass along token$ array..."
i=0while i <= curTokenNum
i=i+1if token$(i, t.type)="name"thenif i+1<=curTokenNum thenif token$(i+1, t.type)="("and _
tokenPos(i, tp.line)= tokenPos(i+1, tp.line)and _
tokenPos(i, tp.stmnt)= tokenPos(i+1, tp.stmnt)thenifinstr(funlist$," ";lower$(token$(i, t.name));" ")<>0then
token$(i, t.type)="buildInFunc"else
token$(i, t.type)="UDForArray"endifendifendifendifwendprinttime$("ms")-t0;" ms"
t0=time$("ms")'third pass, check for functions/subs/arraysPrint"third pass along token$ array..."
i=0while i <= curTokenNum
i=i+1if tokenPos(i, tp.num)=1then'first token on a stmntselectcasecaselower$(token$(i, t.name))="dim"orlower$(token$(i, t.name))="redim"
curLine = tokenPos(i, tp.line)
curStmnt = tokenPos(i, tp.stmnt)while i+1<=curTokenNum
if curLine <> tokenPos(i+1, tp.line) _
or curStmnt <> tokenPos(i+1, tp.stmnt)thenexitwhile
i=i+1if token$(i, t.type)="UDForArray"then token$(i, t.type)="dimmedArray"'mark all other instancesfor j =1to curTokenNum
if token$(j, t.name)=token$(i, t.name)and _
token$(j, t.type)="UDForArray"then token$(j, t.type)="dimmedArray"nextwendcaselower$(token$(i, t.name))="sub"orlower$(token$(i, t.name))="call"
i=i+1
token$(i, t.type)="sub"caselower$(token$(i, t.name))="function"
i=i+1
token$(i, t.type)="UDF"'mark all other instancesfor j =1to curTokenNum
if token$(j, t.name)=token$(i, t.name)and _
token$(j, t.type)="UDForArray"then token$(j, t.type)="UDF"nextendselectendifwendprinttime$("ms")-t0;" ms"
t0=time$("ms")'all other "UDForArray" are undimmed arraysPrint"last pass along token$ array..."for j =1to curTokenNum
if token$(j, t.type)="UDForArray"then token$(j, t.type)="unDimmedArray"nextgoto[SkipPrint2]print"----------------"for i =1to curTokenNum
print tokenPos(i, tp.line);" ";_
tokenPos(i, tp.stmnt);" ";_
tokenPos(i, tp.num),_
token$(i, t.type),_
token$(i, t.name)nextprint"----------------"[SkipPrint2]printtime$("ms")-t0;" ms"
t0=time$("ms")'count variablesPrint"Counting ..."
un.typeName=0
un.count=1dim uniqueName$(curTokenNum,1)
aIndex$ =""'aIndex would be in a form ######|word|, where ###### index in a()'You can guess ###### restricts max len to 999999
aLen=0'POSSIBLE TYPES'( ) , : ; ??? = buildInFunc dimmedArray hndl kwrd lbl name num op str sub UDF unDimmedArray
types2Skip$="( ) , : ; = | num op str buildInFunc kwrd "for i =1to curTokenNum
w$=token$(i, t.type);":";token$(i, t.name)ifinstr(types2Skip$, token$(i, t.type))<>0then[skipToken]
toFind$="|"+w$+"|"
pos=instr(aIndex$, toFind$)if pos =0then'add it in array
aLen = aLen+1
uniqueName$(aLen, un.typeName)=w$
uniqueName$(aLen, un.count)="1"'first time
aIndex$=aIndex$+using("######",aLen)+toFind$
else'get index for a word. FAST.
j =val(mid$(aIndex$, pos-6,6))
cnt =val(uniqueName$(j, un.count))
uniqueName$(j, un.count)=str$(cnt+1)endif[skipToken]nextprinttime$("ms")-t0;" ms"
t0=time$("ms")Print"sorting ..."sort uniqueName$(),1,aLen,0printtime$("ms")-t0;" ms"print"nDiffWords", aLen
print"----------------"print"Counter","Type","Name"print"-------------------------------------------"for i =1to aLen
print uniqueName$(i, un.count),word$(uniqueName$(i, un.typeName),1,":"), _
word$(uniqueName$(i, un.typeName),2,":")nextprint"-over-----------"end'----------------------[setLists]
qq$ =chr$(34)'(")
digits$="1234567890"
letters$=""for i=asc("A")toasc("Z")
letters$=letters$+chr$(i)next
letters$=letters$+lower$(letters$)
firstVarChars$=letters$
varChars$=letters$+digits$+".$"
firstNumChars$=digits$+".-"'from Rod's solution'command list
comlist$=" xor while wend wait until unloadbmp trace to titlebar timer then texteditor "
comlist$=comlist$+"textbox sub stylebits struct stopmidi stop step statictext sort select seek "
comlist$=comlist$+"scan run return resume restore rem redim readjoystick read randomize "
comlist$=comlist$+"radiobutton put prompt printerdialog print popupmenu playwave playmidi "
comlist$=comlist$+"password out or open oncomerror notice nomainwin next name mod menu "
comlist$=comlist$+"maphandle mainwin lprint loop loadbmp listbox line let kill input if "
comlist$=comlist$+"groupbox graphicbox goto gosub global gettrim get function for fontdialog "
comlist$=comlist$+"files filedialog field exit error end else dump do dim data cursor confirm "
comlist$=comlist$+"combobox colordialog cls close checkbox case callfn calldll callback call "
comlist$=comlist$+"button bmpsave bmpbutton beep as and "'type operators
typlist$=" word void ushort ulong short ptr none long dword double boolean "'command operators
opelist$=" window text random output graphics dll dialog byref binary append "
opelist$=opelist$+"horizscrollbar vertscrollbar on off min max window_nf window_popup graphics_fs graphics_nsb graphics_fs_nsb graphics_nf_nsb "
opelist$=opelist$+"text_fs text_nsb text_nsb_ins dialog_modal dialog_nf dialog_nf_modal dialog_fs dialog_nf_fs dialog_popup "
opelist$=opelist$+"yellow brown red darkred pink darkpink blue darkblue green darkgreen "
opelist$=opelist$+"cyan darkcyan white black lightgray darkgray buttonface "'function list
funlist$=" word$ winstring val using upper$ txcount trim$ time$ tan tab str$ sqr "
funlist$=funlist$+"space$ sin rnd rmdir right$ not mkdir min midipos mid$ max lower$ log "
funlist$=funlist$+"lof loc len left$ int instr inputto$ input$ inp hwnd hexdec hbmp exp "
funlist$=funlist$+"eval eval$ eof dechex$ date$ cos chr$ atn asn asc acs abs "
funlist$=funlist$+"upto$ after$ afterlast$ endswith remchar$ "return'---------------------sub storeToken tkn$, type$
curTokenNum=curTokenNum+1
curTokNum=curTokNum+1
token$(curTokenNum, t.name)=tkn$
token$(curTokenNum, t.type)=type$
tokenPos(curTokenNum, tp.line)=iif(contLine<>0, contLine, nLines)
tokenPos(curTokenNum, tp.stmnt)=curStmntNum
tokenPos(curTokenNum, tp.num)= curTokNum
endsub'---------------------function remStartSpaces$(aLine$)
whiteSpaces$=chr$(9)+" "for i =1tolen(aLine$)
c$=mid$(aLine$,i,1)ifinstr(whiteSpaces$, c$)=0then
remStartSpaces$=mid$(aLine$,i)exitfunctionendifnextendfunction'-----------------------------------------------function IsNumber(input$)'checks input$ for being valid number. Returns 1 if yes, 0 otherwise.
IsNumber =0'check sign
ns = eatUp(input$,"+-")if ns>1thenexitfunction'with False'now, digits
n1 = eatUp(input$,"0123456789")'could be decimal point
nd = eatUp(input$,".")if nd>1thenexitfunction'then again, digits
n2 = eatUp(input$,"0123456789")if n1+n2<1thenexitfunction'now, exponent
ne = eatUp(input$,"e")if ne<>0then'we have exponentif ne>1thenexitfunction'check sign
ns = eatUp(input$,"+-")if ns>1thenexitfunction'now, digits
n1 = eatUp(input$,"0123456789")if n1<1thenexitfunctionendififinput$=""then IsNumber =1:exitfunction'else we have leftovers - over with Falseendfunctionfunction eatUp(byRefinput$, chars2eat$)'trims all leading chars from input$ that match chars in chars2eat$'return count of trimmed characters
count =0whilelen(input$)>0ifinstr( chars2eat$,left$(input$,1))<>0theninput$ =mid$(input$,2)
count = count +1elseexitwhileendifwend
eatUp = count
endfunctionfunction iif(test, valYes, valNo)
iif = valNo
if test then iif = valYes
endfunction
It's got too big to put it on a forum. Actually, I tried to do a tokenizer so it figures out all things including numbers and strings. But after figuring things out it outputs only things that relevant to this task (variables, subs/user functions, labels).
Uses keyword list etc from Rod's entry.
Tested on biggest program I've ever seen in LB - freeform404.bas ;)