2021-02-25 17:46:40 -08:00
|
|
|
package picol
|
|
|
|
|
|
|
|
import (
|
|
|
|
"unicode"
|
|
|
|
"unicode/utf8"
|
|
|
|
)
|
|
|
|
|
2023-03-21 07:34:10 -07:00
|
|
|
// Define parser token types
|
2021-02-25 17:46:40 -08:00
|
|
|
const (
|
2023-04-03 19:43:35 -07:00
|
|
|
ParserTokenESC = iota
|
|
|
|
ParserTokenSTR
|
|
|
|
ParserTokenCMD
|
|
|
|
ParserTokenVAR
|
|
|
|
ParserTokenSEP
|
|
|
|
ParserTokenEOL
|
|
|
|
ParserTokenEOF
|
2021-02-25 17:46:40 -08:00
|
|
|
)
|
|
|
|
|
2023-03-21 07:34:10 -07:00
|
|
|
// parserStruct represents the parser state
|
2021-06-02 05:44:21 -07:00
|
|
|
type parserStruct struct {
|
2021-02-25 17:46:40 -08:00
|
|
|
text string
|
|
|
|
p, start, end, ln int
|
|
|
|
insidequote int
|
|
|
|
Type int
|
|
|
|
}
|
|
|
|
|
2023-03-21 07:34:10 -07:00
|
|
|
// initParser initializes a new parserStruct instance
|
2023-05-18 18:04:20 -07:00
|
|
|
func InitParser(text string) *parserStruct {
|
2023-04-03 19:43:35 -07:00
|
|
|
return &parserStruct{text: text, ln: len(text), Type: ParserTokenEOL}
|
2021-02-25 17:46:40 -08:00
|
|
|
}
|
|
|
|
|
2023-03-21 07:34:10 -07:00
|
|
|
// next advances the parser position by one rune
|
2021-06-02 05:44:21 -07:00
|
|
|
func (p *parserStruct) next() {
|
2021-02-25 17:46:40 -08:00
|
|
|
_, w := utf8.DecodeRuneInString(p.text[p.p:])
|
|
|
|
p.p += w
|
|
|
|
p.ln -= w
|
|
|
|
}
|
|
|
|
|
2023-03-21 07:34:10 -07:00
|
|
|
// current returns the current rune at the parser position
|
2021-06-02 05:44:21 -07:00
|
|
|
func (p *parserStruct) current() rune {
|
2021-02-25 17:46:40 -08:00
|
|
|
r, _ := utf8.DecodeRuneInString(p.text[p.p:])
|
|
|
|
return r
|
|
|
|
}
|
|
|
|
|
2023-03-21 07:34:10 -07:00
|
|
|
// token returns the current token text between start and end positions
|
2021-06-02 05:44:21 -07:00
|
|
|
func (p *parserStruct) token() (t string) {
|
2021-02-25 17:46:40 -08:00
|
|
|
defer recover()
|
|
|
|
return p.text[p.start:p.end]
|
|
|
|
}
|
|
|
|
|
2023-03-21 07:34:10 -07:00
|
|
|
// parseSep parses whitespace separators
|
2021-06-02 05:44:21 -07:00
|
|
|
func (p *parserStruct) parseSep() string {
|
2021-02-25 17:46:40 -08:00
|
|
|
p.start = p.p
|
|
|
|
for ; p.p < len(p.text); p.next() {
|
|
|
|
if !unicode.IsSpace(p.current()) {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
p.end = p.p
|
2023-04-03 19:43:35 -07:00
|
|
|
p.Type = ParserTokenSEP
|
2021-02-25 17:46:40 -08:00
|
|
|
return p.token()
|
|
|
|
}
|
|
|
|
|
2023-03-21 07:34:10 -07:00
|
|
|
// parseEol parses end of line and comments
|
2021-06-02 05:44:21 -07:00
|
|
|
func (p *parserStruct) parseEol() string {
|
2021-02-25 17:46:40 -08:00
|
|
|
p.start = p.p
|
|
|
|
|
|
|
|
for ; p.p < len(p.text); p.next() {
|
|
|
|
if p.current() == ';' || unicode.IsSpace(p.current()) {
|
|
|
|
// pass
|
|
|
|
} else {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
p.end = p.p
|
2023-04-03 19:43:35 -07:00
|
|
|
p.Type = ParserTokenEOL
|
2021-02-25 17:46:40 -08:00
|
|
|
return p.token()
|
|
|
|
}
|
|
|
|
|
2023-03-21 07:34:10 -07:00
|
|
|
// parseCommand parses a command within brackets
|
2021-06-02 05:44:21 -07:00
|
|
|
func (p *parserStruct) parseCommand() string {
|
2021-02-25 17:46:40 -08:00
|
|
|
level, blevel := 1, 0
|
|
|
|
p.next() // skip
|
|
|
|
p.start = p.p
|
|
|
|
Loop:
|
|
|
|
for {
|
|
|
|
switch {
|
|
|
|
case p.ln == 0:
|
|
|
|
break Loop
|
|
|
|
case p.current() == '[' && blevel == 0:
|
|
|
|
level++
|
|
|
|
case p.current() == ']' && blevel == 0:
|
|
|
|
level--
|
|
|
|
if level == 0 {
|
|
|
|
break Loop
|
|
|
|
}
|
|
|
|
case p.current() == '\\':
|
|
|
|
p.next()
|
|
|
|
case p.current() == '{':
|
|
|
|
blevel++
|
|
|
|
case p.current() == '}' && blevel != 0:
|
|
|
|
blevel--
|
|
|
|
}
|
|
|
|
p.next()
|
|
|
|
}
|
|
|
|
p.end = p.p
|
2023-04-03 19:43:35 -07:00
|
|
|
p.Type = ParserTokenCMD
|
2021-02-25 17:46:40 -08:00
|
|
|
if p.p < len(p.text) && p.current() == ']' {
|
|
|
|
p.next()
|
|
|
|
}
|
|
|
|
return p.token()
|
|
|
|
}
|
|
|
|
|
2023-03-21 07:34:10 -07:00
|
|
|
// parseVar parses a variable reference
|
2021-06-02 05:44:21 -07:00
|
|
|
func (p *parserStruct) parseVar() string {
|
2021-02-25 17:46:40 -08:00
|
|
|
p.next() // skip the $
|
|
|
|
p.start = p.p
|
|
|
|
|
|
|
|
if p.current() == '{' {
|
2023-04-03 19:43:35 -07:00
|
|
|
p.Type = ParserTokenVAR
|
2021-02-25 17:46:40 -08:00
|
|
|
return p.parseBrace()
|
|
|
|
}
|
|
|
|
|
|
|
|
for p.p < len(p.text) {
|
|
|
|
c := p.current()
|
|
|
|
if unicode.IsLetter(c) || ('0' <= c && c <= '9') || c == '_' {
|
|
|
|
p.next()
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
break
|
|
|
|
}
|
|
|
|
|
|
|
|
if p.start == p.p { // It's just a single char string "$"
|
|
|
|
p.start = p.p - 1
|
|
|
|
p.end = p.p
|
2023-04-03 19:43:35 -07:00
|
|
|
p.Type = ParserTokenSTR
|
2021-02-25 17:46:40 -08:00
|
|
|
} else {
|
|
|
|
p.end = p.p
|
2023-04-03 19:43:35 -07:00
|
|
|
p.Type = ParserTokenVAR
|
2021-02-25 17:46:40 -08:00
|
|
|
}
|
|
|
|
return p.token()
|
|
|
|
}
|
|
|
|
|
2023-03-21 07:34:10 -07:00
|
|
|
// parseBrace parses a brace-enclosed string
|
2021-06-02 05:44:21 -07:00
|
|
|
func (p *parserStruct) parseBrace() string {
|
2021-02-25 17:46:40 -08:00
|
|
|
level := 1
|
|
|
|
p.next() // skip
|
|
|
|
p.start = p.p
|
|
|
|
|
|
|
|
Loop:
|
|
|
|
for p.p < len(p.text) {
|
|
|
|
c := p.current()
|
|
|
|
switch {
|
|
|
|
case p.ln >= 2 && c == '\\':
|
|
|
|
p.next()
|
|
|
|
case p.ln == 0 || c == '}':
|
|
|
|
level--
|
|
|
|
if level == 0 || p.ln == 0 {
|
|
|
|
break Loop
|
|
|
|
}
|
|
|
|
case c == '{':
|
|
|
|
level++
|
|
|
|
}
|
|
|
|
p.next()
|
|
|
|
}
|
|
|
|
p.end = p.p
|
|
|
|
if p.ln != 0 { // Skip final closed brace
|
|
|
|
p.next()
|
|
|
|
}
|
|
|
|
return p.token()
|
|
|
|
}
|
|
|
|
|
2023-03-21 07:34:10 -07:00
|
|
|
// parseString parses a string with or without quotes
|
2021-06-02 05:44:21 -07:00
|
|
|
func (p *parserStruct) parseString() string {
|
2023-04-03 19:43:35 -07:00
|
|
|
newword := p.Type == ParserTokenSEP || p.Type == ParserTokenEOL || p.Type == ParserTokenSTR
|
2021-02-25 17:46:40 -08:00
|
|
|
|
|
|
|
if c := p.current(); newword && c == '{' {
|
2023-04-03 19:43:35 -07:00
|
|
|
p.Type = ParserTokenSTR
|
2021-02-25 17:46:40 -08:00
|
|
|
return p.parseBrace()
|
|
|
|
} else if newword && c == '"' {
|
|
|
|
p.insidequote = 1
|
|
|
|
p.next() // skip
|
|
|
|
}
|
|
|
|
|
|
|
|
p.start = p.p
|
|
|
|
|
|
|
|
Loop:
|
|
|
|
for ; p.ln != 0; p.next() {
|
|
|
|
switch p.current() {
|
|
|
|
case '\\':
|
|
|
|
if p.ln >= 2 {
|
|
|
|
p.next()
|
|
|
|
}
|
|
|
|
case '$', '[':
|
|
|
|
break Loop
|
|
|
|
case '"':
|
|
|
|
if p.insidequote != 0 {
|
|
|
|
p.end = p.p
|
2023-04-03 19:43:35 -07:00
|
|
|
p.Type = ParserTokenESC
|
2021-02-25 17:46:40 -08:00
|
|
|
p.next()
|
|
|
|
p.insidequote = 0
|
|
|
|
return p.token()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if p.current() == ';' || unicode.IsSpace(p.current()) {
|
|
|
|
if p.insidequote == 0 {
|
|
|
|
break Loop
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
p.end = p.p
|
2023-04-03 19:43:35 -07:00
|
|
|
p.Type = ParserTokenESC
|
2021-02-25 17:46:40 -08:00
|
|
|
return p.token()
|
|
|
|
}
|
|
|
|
|
2023-03-21 07:34:10 -07:00
|
|
|
// parseComment skips over comment text
|
2021-06-02 05:44:21 -07:00
|
|
|
func (p *parserStruct) parseComment() string {
|
2021-02-25 17:46:40 -08:00
|
|
|
for p.ln != 0 && p.current() != '\n' {
|
|
|
|
p.next()
|
|
|
|
}
|
|
|
|
return p.token()
|
|
|
|
}
|
|
|
|
|
2023-03-21 07:34:10 -07:00
|
|
|
// GetToken returns the next token from the parser
|
2021-06-02 05:44:21 -07:00
|
|
|
func (p *parserStruct) GetToken() string {
|
2021-02-25 17:46:40 -08:00
|
|
|
for {
|
|
|
|
if p.ln == 0 {
|
2023-04-03 19:43:35 -07:00
|
|
|
if p.Type != ParserTokenEOL && p.Type != ParserTokenEOF {
|
|
|
|
p.Type = ParserTokenEOL
|
2021-02-25 17:46:40 -08:00
|
|
|
} else {
|
2023-04-03 19:43:35 -07:00
|
|
|
p.Type = ParserTokenEOF
|
2021-02-25 17:46:40 -08:00
|
|
|
}
|
|
|
|
return p.token()
|
|
|
|
}
|
|
|
|
|
|
|
|
switch p.current() {
|
|
|
|
case ' ', '\t', '\r':
|
|
|
|
if p.insidequote != 0 {
|
|
|
|
return p.parseString()
|
|
|
|
}
|
|
|
|
return p.parseSep()
|
|
|
|
case '\n', ';':
|
|
|
|
if p.insidequote != 0 {
|
|
|
|
return p.parseString()
|
|
|
|
}
|
|
|
|
return p.parseEol()
|
|
|
|
case '[':
|
|
|
|
return p.parseCommand()
|
|
|
|
case '$':
|
|
|
|
return p.parseVar()
|
|
|
|
case '#':
|
2023-04-03 19:43:35 -07:00
|
|
|
if p.Type == ParserTokenEOL {
|
2021-02-25 17:46:40 -08:00
|
|
|
p.parseComment()
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
return p.parseString()
|
|
|
|
default:
|
|
|
|
return p.parseString()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|