gagent/pkg/picol/parser.go

package picol

import (
	"unicode"
	"unicode/utf8"
)

// Define parser token types
const (
	ParserTokenESC = iota
	ParserTokenSTR
	ParserTokenCMD
	ParserTokenVAR
	ParserTokenSEP
	ParserTokenEOL
	ParserTokenEOF
)

// parserStruct represents the parser state
type parserStruct struct {
	text              string
	p, start, end, ln int
	insidequote       int
	Type              int
}

// initParser initializes a new parserStruct instance
func InitParser(text string) *parserStruct {
	return &parserStruct{text: text, ln: len(text), Type: ParserTokenEOL}
}

// next advances the parser position by one rune
func (p *parserStruct) next() {
	_, w := utf8.DecodeRuneInString(p.text[p.p:])
	p.p += w
	p.ln -= w
}

// current returns the current rune at the parser position
func (p *parserStruct) current() rune {
	r, _ := utf8.DecodeRuneInString(p.text[p.p:])
	return r
}

// token returns the current token text between start and end positions
func (p *parserStruct) token() (t string) {
	defer recover()
	return p.text[p.start:p.end]
}

// parseSep parses whitespace separators
func (p *parserStruct) parseSep() string {
	p.start = p.p
	for ; p.p < len(p.text); p.next() {
		if !unicode.IsSpace(p.current()) {
			break
		}
	}
	p.end = p.p
	p.Type = ParserTokenSEP
	return p.token()
}

// parseEol parses end of line and comments
func (p *parserStruct) parseEol() string {
	p.start = p.p

	for ; p.p < len(p.text); p.next() {
		if p.current() == ';' || unicode.IsSpace(p.current()) {
			// pass
		} else {
			break
		}
	}

	p.end = p.p
	p.Type = ParserTokenEOL
	return p.token()
}

// parseCommand parses a command within brackets
func (p *parserStruct) parseCommand() string {
	level, blevel := 1, 0
	p.next() // skip
	p.start = p.p
Loop:
	for {
		switch {
		case p.ln == 0:
			break Loop
		case p.current() == '[' && blevel == 0:
			level++
		case p.current() == ']' && blevel == 0:
			level--
			if level == 0 {
				break Loop
			}
		case p.current() == '\\':
			p.next()
		case p.current() == '{':
			blevel++
		case p.current() == '}' && blevel != 0:
			blevel--
		}
		p.next()
	}
	p.end = p.p
	p.Type = ParserTokenCMD
	if p.p < len(p.text) && p.current() == ']' {
		p.next()
	}
	return p.token()
}

// parseVar parses a variable reference
func (p *parserStruct) parseVar() string {
	p.next() // skip the $
	p.start = p.p

	if p.current() == '{' {
		p.Type = ParserTokenVAR
		return p.parseBrace()
	}

	for p.p < len(p.text) {
		c := p.current()
		if unicode.IsLetter(c) || ('0' <= c && c <= '9') || c == '_' {
			p.next()
			continue
		}
		break
	}

	if p.start == p.p { // It's just a single char string "$"
		p.start = p.p - 1
		p.end = p.p
		p.Type = ParserTokenSTR
	} else {
		p.end = p.p
		p.Type = ParserTokenVAR
	}
	return p.token()
}

// parseBrace parses a brace-enclosed string
func (p *parserStruct) parseBrace() string {
	level := 1
	p.next() // skip
	p.start = p.p

Loop:
	for p.p < len(p.text) {
		c := p.current()
		switch {
		case p.ln >= 2 && c == '\\':
			p.next()
		case p.ln == 0 || c == '}':
			level--
			if level == 0 || p.ln == 0 {
				break Loop
			}
		case c == '{':
			level++
		}
		p.next()
	}
	p.end = p.p
	if p.ln != 0 { // Skip final closed brace
		p.next()
	}
	return p.token()
}

// parseString parses a string with or without quotes
func (p *parserStruct) parseString() string {
	newword := p.Type == ParserTokenSEP || p.Type == ParserTokenEOL || p.Type == ParserTokenSTR

	if c := p.current(); newword && c == '{' {
		p.Type = ParserTokenSTR
		return p.parseBrace()
	} else if newword && c == '"' {
		p.insidequote = 1
		p.next() // skip
	}

	p.start = p.p

Loop:
	for ; p.ln != 0; p.next() {
		switch p.current() {
		case '\\':
			if p.ln >= 2 {
				p.next()
			}
		case '$', '[':
			break Loop
		case '"':
			if p.insidequote != 0 {
				p.end = p.p
				p.Type = ParserTokenESC
				p.next()
				p.insidequote = 0
				return p.token()
			}
		}
		if p.current() == ';' || unicode.IsSpace(p.current()) {
			if p.insidequote == 0 {
				break Loop
			}
		}
	}

	p.end = p.p
	p.Type = ParserTokenESC
	return p.token()
}

// parseComment skips over comment text
func (p *parserStruct) parseComment() string {
	for p.ln != 0 && p.current() != '\n' {
		p.next()
	}
	return p.token()
}

// GetToken returns the next token from the parser
func (p *parserStruct) GetToken() string {
	for {
		if p.ln == 0 {
			if p.Type != ParserTokenEOL && p.Type != ParserTokenEOF {
				p.Type = ParserTokenEOL
			} else {
				p.Type = ParserTokenEOF
			}
			return p.token()
		}

		switch p.current() {
		case ' ', '\t', '\r':
			if p.insidequote != 0 {
				return p.parseString()
			}
			return p.parseSep()
		case '\n', ';':
			if p.insidequote != 0 {
				return p.parseString()
			}
			return p.parseEol()
		case '[':
			return p.parseCommand()
		case '$':
			return p.parseVar()
		case '#':
			if p.Type == ParserTokenEOL {
				p.parseComment()
				continue
			}
			return p.parseString()
		default:
			return p.parseString()
		}
	}
}
fix: Re-initializing after I destroyed the original repository. 2021-02-25 17:46:40 -08:00			`package picol`

			`import (`
			`"unicode"`
			`"unicode/utf8"`
			`)`

refactor: converted int64 to int32 where appropriate. refactor: reduced usage of fmt in favor of logs and string concatication. refactor: minor re-ordering of the data structures to reduce storage space required. 2023-03-21 07:34:10 -07:00			`// Define parser token types`
fix: Re-initializing after I destroyed the original repository. 2021-02-25 17:46:40 -08:00			`const (`
feat: added some more test harness. 2023-04-03 19:43:35 -07:00			`ParserTokenESC = iota`
			`ParserTokenSTR`
			`ParserTokenCMD`
			`ParserTokenVAR`
			`ParserTokenSEP`
			`ParserTokenEOL`
			`ParserTokenEOF`
fix: Re-initializing after I destroyed the original repository. 2021-02-25 17:46:40 -08:00			`)`

refactor: converted int64 to int32 where appropriate. refactor: reduced usage of fmt in favor of logs and string concatication. refactor: minor re-ordering of the data structures to reduce storage space required. 2023-03-21 07:34:10 -07:00			`// parserStruct represents the parser state`
fix: [CI SKIP] Continuing cleanup based on the report card. 2021-06-02 05:44:21 -07:00			`type parserStruct struct {`
fix: Re-initializing after I destroyed the original repository. 2021-02-25 17:46:40 -08:00			`text string`
			`p, start, end, ln int`
			`insidequote int`
			`Type int`
			`}`

refactor: converted int64 to int32 where appropriate. refactor: reduced usage of fmt in favor of logs and string concatication. refactor: minor re-ordering of the data structures to reduce storage space required. 2023-03-21 07:34:10 -07:00			`// initParser initializes a new parserStruct instance`
fix: modified to fix some of the test failures. 2023-05-18 18:04:20 -07:00			`func InitParser(text string) *parserStruct {`
feat: added some more test harness. 2023-04-03 19:43:35 -07:00			`return &parserStruct{text: text, ln: len(text), Type: ParserTokenEOL}`
fix: Re-initializing after I destroyed the original repository. 2021-02-25 17:46:40 -08:00			`}`

refactor: converted int64 to int32 where appropriate. refactor: reduced usage of fmt in favor of logs and string concatication. refactor: minor re-ordering of the data structures to reduce storage space required. 2023-03-21 07:34:10 -07:00			`// next advances the parser position by one rune`
fix: [CI SKIP] Continuing cleanup based on the report card. 2021-06-02 05:44:21 -07:00			`func (p *parserStruct) next() {`
fix: Re-initializing after I destroyed the original repository. 2021-02-25 17:46:40 -08:00			`_, w := utf8.DecodeRuneInString(p.text[p.p:])`
			`p.p += w`
			`p.ln -= w`
			`}`

refactor: converted int64 to int32 where appropriate. refactor: reduced usage of fmt in favor of logs and string concatication. refactor: minor re-ordering of the data structures to reduce storage space required. 2023-03-21 07:34:10 -07:00			`// current returns the current rune at the parser position`
fix: [CI SKIP] Continuing cleanup based on the report card. 2021-06-02 05:44:21 -07:00			`func (p *parserStruct) current() rune {`
fix: Re-initializing after I destroyed the original repository. 2021-02-25 17:46:40 -08:00			`r, _ := utf8.DecodeRuneInString(p.text[p.p:])`
			`return r`
			`}`

refactor: converted int64 to int32 where appropriate. refactor: reduced usage of fmt in favor of logs and string concatication. refactor: minor re-ordering of the data structures to reduce storage space required. 2023-03-21 07:34:10 -07:00			`// token returns the current token text between start and end positions`
fix: [CI SKIP] Continuing cleanup based on the report card. 2021-06-02 05:44:21 -07:00			`func (p *parserStruct) token() (t string) {`
fix: Re-initializing after I destroyed the original repository. 2021-02-25 17:46:40 -08:00			`defer recover()`
			`return p.text[p.start:p.end]`
			`}`

refactor: converted int64 to int32 where appropriate. refactor: reduced usage of fmt in favor of logs and string concatication. refactor: minor re-ordering of the data structures to reduce storage space required. 2023-03-21 07:34:10 -07:00			`// parseSep parses whitespace separators`
fix: [CI SKIP] Continuing cleanup based on the report card. 2021-06-02 05:44:21 -07:00			`func (p *parserStruct) parseSep() string {`
fix: Re-initializing after I destroyed the original repository. 2021-02-25 17:46:40 -08:00			`p.start = p.p`
			`for ; p.p < len(p.text); p.next() {`
			`if !unicode.IsSpace(p.current()) {`
			`break`
			`}`
			`}`
			`p.end = p.p`
feat: added some more test harness. 2023-04-03 19:43:35 -07:00			`p.Type = ParserTokenSEP`
fix: Re-initializing after I destroyed the original repository. 2021-02-25 17:46:40 -08:00			`return p.token()`
			`}`

refactor: converted int64 to int32 where appropriate. refactor: reduced usage of fmt in favor of logs and string concatication. refactor: minor re-ordering of the data structures to reduce storage space required. 2023-03-21 07:34:10 -07:00			`// parseEol parses end of line and comments`
fix: [CI SKIP] Continuing cleanup based on the report card. 2021-06-02 05:44:21 -07:00			`func (p *parserStruct) parseEol() string {`
fix: Re-initializing after I destroyed the original repository. 2021-02-25 17:46:40 -08:00			`p.start = p.p`

			`for ; p.p < len(p.text); p.next() {`
			`if p.current() == ';' \|\| unicode.IsSpace(p.current()) {`
			`// pass`
			`} else {`
			`break`
			`}`
			`}`

			`p.end = p.p`
feat: added some more test harness. 2023-04-03 19:43:35 -07:00			`p.Type = ParserTokenEOL`
fix: Re-initializing after I destroyed the original repository. 2021-02-25 17:46:40 -08:00			`return p.token()`
			`}`

refactor: converted int64 to int32 where appropriate. refactor: reduced usage of fmt in favor of logs and string concatication. refactor: minor re-ordering of the data structures to reduce storage space required. 2023-03-21 07:34:10 -07:00			`// parseCommand parses a command within brackets`
fix: [CI SKIP] Continuing cleanup based on the report card. 2021-06-02 05:44:21 -07:00			`func (p *parserStruct) parseCommand() string {`
fix: Re-initializing after I destroyed the original repository. 2021-02-25 17:46:40 -08:00			`level, blevel := 1, 0`
			`p.next() // skip`
			`p.start = p.p`
			`Loop:`
			`for {`
			`switch {`
			`case p.ln == 0:`
			`break Loop`
			`case p.current() == '[' && blevel == 0:`
			`level++`
			`case p.current() == ']' && blevel == 0:`
			`level--`
			`if level == 0 {`
			`break Loop`
			`}`
			`case p.current() == '\\':`
			`p.next()`
			`case p.current() == '{':`
			`blevel++`
			`case p.current() == '}' && blevel != 0:`
			`blevel--`
			`}`
			`p.next()`
			`}`
			`p.end = p.p`
feat: added some more test harness. 2023-04-03 19:43:35 -07:00			`p.Type = ParserTokenCMD`
fix: Re-initializing after I destroyed the original repository. 2021-02-25 17:46:40 -08:00			`if p.p < len(p.text) && p.current() == ']' {`
			`p.next()`
			`}`
			`return p.token()`
			`}`

refactor: converted int64 to int32 where appropriate. refactor: reduced usage of fmt in favor of logs and string concatication. refactor: minor re-ordering of the data structures to reduce storage space required. 2023-03-21 07:34:10 -07:00			`// parseVar parses a variable reference`
fix: [CI SKIP] Continuing cleanup based on the report card. 2021-06-02 05:44:21 -07:00			`func (p *parserStruct) parseVar() string {`
fix: Re-initializing after I destroyed the original repository. 2021-02-25 17:46:40 -08:00			`p.next() // skip the $`
			`p.start = p.p`

			`if p.current() == '{' {`
feat: added some more test harness. 2023-04-03 19:43:35 -07:00			`p.Type = ParserTokenVAR`
fix: Re-initializing after I destroyed the original repository. 2021-02-25 17:46:40 -08:00			`return p.parseBrace()`
			`}`

			`for p.p < len(p.text) {`
			`c := p.current()`
			`if unicode.IsLetter(c) \|\| ('0' <= c && c <= '9') \|\| c == '_' {`
			`p.next()`
			`continue`
			`}`
			`break`
			`}`

			`if p.start == p.p { // It's just a single char string "$"`
			`p.start = p.p - 1`
			`p.end = p.p`
feat: added some more test harness. 2023-04-03 19:43:35 -07:00			`p.Type = ParserTokenSTR`
fix: Re-initializing after I destroyed the original repository. 2021-02-25 17:46:40 -08:00			`} else {`
			`p.end = p.p`
feat: added some more test harness. 2023-04-03 19:43:35 -07:00			`p.Type = ParserTokenVAR`
fix: Re-initializing after I destroyed the original repository. 2021-02-25 17:46:40 -08:00			`}`
			`return p.token()`
			`}`

refactor: converted int64 to int32 where appropriate. refactor: reduced usage of fmt in favor of logs and string concatication. refactor: minor re-ordering of the data structures to reduce storage space required. 2023-03-21 07:34:10 -07:00			`// parseBrace parses a brace-enclosed string`
fix: [CI SKIP] Continuing cleanup based on the report card. 2021-06-02 05:44:21 -07:00			`func (p *parserStruct) parseBrace() string {`
fix: Re-initializing after I destroyed the original repository. 2021-02-25 17:46:40 -08:00			`level := 1`
			`p.next() // skip`
			`p.start = p.p`

			`Loop:`
			`for p.p < len(p.text) {`
			`c := p.current()`
			`switch {`
			`case p.ln >= 2 && c == '\\':`
			`p.next()`
			`case p.ln == 0 \|\| c == '}':`
			`level--`
			`if level == 0 \|\| p.ln == 0 {`
			`break Loop`
			`}`
			`case c == '{':`
			`level++`
			`}`
			`p.next()`
			`}`
			`p.end = p.p`
			`if p.ln != 0 { // Skip final closed brace`
			`p.next()`
			`}`
			`return p.token()`
			`}`

refactor: converted int64 to int32 where appropriate. refactor: reduced usage of fmt in favor of logs and string concatication. refactor: minor re-ordering of the data structures to reduce storage space required. 2023-03-21 07:34:10 -07:00			`// parseString parses a string with or without quotes`
fix: [CI SKIP] Continuing cleanup based on the report card. 2021-06-02 05:44:21 -07:00			`func (p *parserStruct) parseString() string {`
feat: added some more test harness. 2023-04-03 19:43:35 -07:00			`newword := p.Type == ParserTokenSEP \|\| p.Type == ParserTokenEOL \|\| p.Type == ParserTokenSTR`
fix: Re-initializing after I destroyed the original repository. 2021-02-25 17:46:40 -08:00
			`if c := p.current(); newword && c == '{' {`
feat: added some more test harness. 2023-04-03 19:43:35 -07:00			`p.Type = ParserTokenSTR`
fix: Re-initializing after I destroyed the original repository. 2021-02-25 17:46:40 -08:00			`return p.parseBrace()`
			`} else if newword && c == '"' {`
			`p.insidequote = 1`
			`p.next() // skip`
			`}`

			`p.start = p.p`

			`Loop:`
			`for ; p.ln != 0; p.next() {`
			`switch p.current() {`
			`case '\\':`
			`if p.ln >= 2 {`
			`p.next()`
			`}`
			`case '$', '[':`
			`break Loop`
			`case '"':`
			`if p.insidequote != 0 {`
			`p.end = p.p`
feat: added some more test harness. 2023-04-03 19:43:35 -07:00			`p.Type = ParserTokenESC`
fix: Re-initializing after I destroyed the original repository. 2021-02-25 17:46:40 -08:00			`p.next()`
			`p.insidequote = 0`
			`return p.token()`
			`}`
			`}`
			`if p.current() == ';' \|\| unicode.IsSpace(p.current()) {`
			`if p.insidequote == 0 {`
			`break Loop`
			`}`
			`}`
			`}`

			`p.end = p.p`
feat: added some more test harness. 2023-04-03 19:43:35 -07:00			`p.Type = ParserTokenESC`
fix: Re-initializing after I destroyed the original repository. 2021-02-25 17:46:40 -08:00			`return p.token()`
			`}`

refactor: converted int64 to int32 where appropriate. refactor: reduced usage of fmt in favor of logs and string concatication. refactor: minor re-ordering of the data structures to reduce storage space required. 2023-03-21 07:34:10 -07:00			`// parseComment skips over comment text`
fix: [CI SKIP] Continuing cleanup based on the report card. 2021-06-02 05:44:21 -07:00			`func (p *parserStruct) parseComment() string {`
fix: Re-initializing after I destroyed the original repository. 2021-02-25 17:46:40 -08:00			`for p.ln != 0 && p.current() != '\n' {`
			`p.next()`
			`}`
			`return p.token()`
			`}`

refactor: converted int64 to int32 where appropriate. refactor: reduced usage of fmt in favor of logs and string concatication. refactor: minor re-ordering of the data structures to reduce storage space required. 2023-03-21 07:34:10 -07:00			`// GetToken returns the next token from the parser`
fix: [CI SKIP] Continuing cleanup based on the report card. 2021-06-02 05:44:21 -07:00			`func (p *parserStruct) GetToken() string {`
fix: Re-initializing after I destroyed the original repository. 2021-02-25 17:46:40 -08:00			`for {`
			`if p.ln == 0 {`
feat: added some more test harness. 2023-04-03 19:43:35 -07:00			`if p.Type != ParserTokenEOL && p.Type != ParserTokenEOF {`
			`p.Type = ParserTokenEOL`
fix: Re-initializing after I destroyed the original repository. 2021-02-25 17:46:40 -08:00			`} else {`
feat: added some more test harness. 2023-04-03 19:43:35 -07:00			`p.Type = ParserTokenEOF`
fix: Re-initializing after I destroyed the original repository. 2021-02-25 17:46:40 -08:00			`}`
			`return p.token()`
			`}`

			`switch p.current() {`
			`case ' ', '\t', '\r':`
			`if p.insidequote != 0 {`
			`return p.parseString()`
			`}`
			`return p.parseSep()`
			`case '\n', ';':`
			`if p.insidequote != 0 {`
			`return p.parseString()`
			`}`
			`return p.parseEol()`
			`case '[':`
			`return p.parseCommand()`
			`case '$':`
			`return p.parseVar()`
			`case '#':`
feat: added some more test harness. 2023-04-03 19:43:35 -07:00			`if p.Type == ParserTokenEOL {`
fix: Re-initializing after I destroyed the original repository. 2021-02-25 17:46:40 -08:00			`p.parseComment()`
			`continue`
			`}`
			`return p.parseString()`
			`default:`
			`return p.parseString()`
			`}`
			`}`
			`}`