Complete LinkSyncServer and LinkSyncExtension implementation
LinkSyncServer: - Fix app.py imports, add CORS middleware, lifespan events - Create api/routes.py router aggregator - Create config/settings.py for centralized configuration - Rewrite models/base.py with proper relationships and serialization - Rewrite all API endpoints with real DB integration (auth, links, collections, sync, queries, tags) - Add admin endpoints (user management, stats, audit log) - Complete query parser with recursive descent and proper precedence - Complete query executor with set operations and field filters - Set up Alembic migrations with initial schema - Create web interface (templates, CSS, JS) - Add 42 passing tests (auth, links, collections, queries) - Add deploy.ps1 and deploy.sh scripts - Update README with deployment workflow LinkSyncExtension: - Create utils/api.js (REST client with retries, auth, error handling) - Create utils/sync.js (3 sync modes + conflict detection) - Create utils/collection.js (collection management) - Create utils/query-engine.js (client-side query parser) - Rewrite background.js (sync loop, bookmark events, message routing) - Rewrite popup.js (tabs, settings modal, notifications, CRUD) - Update popup.html (tabbed interface, query builder, modal) - Update popup.css (full redesign) - Create content/content.js (page metadata extraction) - Create options.html/js (dedicated settings page) - Generate icons (48x48, 96x96) - Update manifest.json (host permissions, content scripts, options) - Create AGENTS.md
This commit is contained in:
@@ -2,219 +2,99 @@
|
||||
LinkSyncServer - Query Executor
|
||||
"""
|
||||
|
||||
from typing import List, Dict, Any, Optional
|
||||
from sqlalchemy.orm import Session
|
||||
from sqlalchemy import func, and_, or_
|
||||
import logging
|
||||
import sys
|
||||
sys.path.insert(0, 'models')
|
||||
from base import Bookmark, User
|
||||
from typing import Any, Dict, List
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def parse_query_expression(query_expression: dict, expressions: list = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Parse query expression in dict format.
|
||||
|
||||
Example:
|
||||
{
|
||||
"operation": "OR",
|
||||
"operands": [
|
||||
{"operation": "TERM", "value": "work"},
|
||||
{"operation": "TERM", "value": "company"}
|
||||
]
|
||||
}
|
||||
"""
|
||||
if not query_expression:
|
||||
return
|
||||
|
||||
operation = query_expression.get('operation')
|
||||
operands = query_expression.get('operands', [])
|
||||
|
||||
if not operands:
|
||||
# Top-level expression (e.g., TERM)
|
||||
if operation == 'TERM':
|
||||
value = query_expression.get('value', '')
|
||||
if value.startswith('url:'):
|
||||
search_term = value[4:]
|
||||
return parse_term(search_term, 'url')
|
||||
elif value.startswith('tag:'):
|
||||
search_term = value[4:]
|
||||
return parse_term(search_term, 'tags')
|
||||
elif value.startswith('title:'):
|
||||
search_term = value[6:]
|
||||
return parse_term(search_term, 'title')
|
||||
elif value.startswith('description:'):
|
||||
search_term = value[12:]
|
||||
return parse_term(search_term, 'description')
|
||||
elif value.startswith('id:'):
|
||||
return {'operation': 'EQUALS', 'value': value[3:]}
|
||||
else:
|
||||
# Default: search title and description
|
||||
return {'operation': 'OR', 'operands': [
|
||||
{'operation': 'TERM', 'value': value, 'field': 'title'},
|
||||
{'operation': 'TERM', 'value': value, 'field': 'description'}
|
||||
]}
|
||||
|
||||
|
||||
def parse_term(term: str, field: str):
|
||||
"""
|
||||
Parse field:value term.
|
||||
|
||||
Returns SQLAlchemy filter clause.
|
||||
"""
|
||||
# Handle different field types
|
||||
field_filters = {
|
||||
'tags': lambda term: and_(*[Bookmark.tags.ilike(f'%{term}%') for tag in term.split(',')]),
|
||||
'title': lambda term: Bookmark.title.ilike(f'%{term}%'),
|
||||
'description': lambda term: Bookmark.description.ilike(f'%{term}%'),
|
||||
'url': lambda term: Bookmark.url.ilike(f'%{term}%'),
|
||||
'path': lambda term: Bookmark.path.ilike(f'%{term}%')
|
||||
}
|
||||
|
||||
# Get filter function
|
||||
filter_fn = field_filters.get(field, lambda term: Bookmark.tags.ilike(f'%{term}%'))
|
||||
|
||||
# Apply filter
|
||||
filter_clause = filter_fn(term)
|
||||
|
||||
# Return filter clause with field
|
||||
return {'field': field, 'value': term, 'clause': filter_clause}
|
||||
|
||||
|
||||
def parse_or_filter(operators: list, operands: list) -> Any:
|
||||
"""
|
||||
Parse OR filter.
|
||||
|
||||
Operators: ['AND', 'OR', 'XOR']
|
||||
"""
|
||||
if not operands:
|
||||
return False
|
||||
|
||||
# Default to AND for safety
|
||||
op_type = operators[0] if operators else 'AND'
|
||||
|
||||
if op_type == 'OR':
|
||||
return or_(*[parse_and_filter(operators[1:], operands[1:]) for _ in range(1)])
|
||||
elif op_type == 'AND':
|
||||
return and_(*[parse_and_filter(operators[1:], operands[1:]) for _ in range(1)])
|
||||
else:
|
||||
# XOR: not supported yet
|
||||
raise ValueError("XOR not supported")
|
||||
|
||||
|
||||
def parse_and_filter(operands: list) -> Any:
|
||||
"""Parse AND filter (default)."""
|
||||
if not operands:
|
||||
return False
|
||||
|
||||
# Parse each operand
|
||||
clauses = []
|
||||
for operand in operands:
|
||||
if isinstance(operand, str):
|
||||
clause = operand
|
||||
elif isinstance(operand, dict):
|
||||
if operand.get('operation') == 'EQUALS':
|
||||
clause = operand['value']
|
||||
elif operand.get('operation') == 'TERM':
|
||||
clauses.append(parse_term(operand.get('value', ''), operand.get('field', 'tags')))
|
||||
# Add other term types as needed
|
||||
else:
|
||||
clauses.append(operand)
|
||||
else:
|
||||
raise ValueError(f"Unknown operand type: {type(operand)}")
|
||||
|
||||
if not clauses:
|
||||
return False
|
||||
|
||||
return clauses
|
||||
|
||||
|
||||
def execute_query(query_expression: dict) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Execute query and return results.
|
||||
|
||||
query_expression: dict from parser
|
||||
returns: list of bookmarks
|
||||
"""
|
||||
# Default session
|
||||
session = Session()
|
||||
|
||||
if not query_expression:
|
||||
def execute_query(parsed: Dict[str, Any], bookmarks: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||
if not parsed or not bookmarks:
|
||||
return []
|
||||
|
||||
# Parse query expression
|
||||
try:
|
||||
# Handle single-term queries
|
||||
if query_expression.get('operation') == 'TERM':
|
||||
search_term = query_expression.get('value', '')
|
||||
field = query_expression.get('field', 'title')
|
||||
|
||||
if field == 'tags':
|
||||
tags = search_term.split(',')
|
||||
filters = [Bookmark.tags.contains(tag) for tag in tags]
|
||||
result = session.query(Bookmark).filter(or_(*filters)).all()
|
||||
elif field == 'title':
|
||||
result = session.query(Bookmark).filter(Bookmark.title.contains(search_term)).all()
|
||||
elif field == 'description':
|
||||
result = session.query(Bookmark).filter(Bookmark.description.contains(search_term)).all()
|
||||
elif field == 'url':
|
||||
result = session.query(Bookmark).filter(Bookmark.url.contains(search_term)).all()
|
||||
else:
|
||||
# Default: search title and description
|
||||
filters = [
|
||||
or_(Bookmark.title.contains(search_term),
|
||||
Bookmark.description.contains(search_term))
|
||||
]
|
||||
result = session.query(Bookmark).filter(or_(*filters)).all()
|
||||
elif query_expression.get('operation') == 'AND':
|
||||
# AND clause
|
||||
clauses = parse_and_filter(query_expression.get('operands', []))
|
||||
if isinstance(clauses, list):
|
||||
result = session.query(Bookmark).filter(and_(*clauses)).all()
|
||||
else:
|
||||
result = session.query(Bookmark).filter(clauses).all()
|
||||
else:
|
||||
# Default: search title and description
|
||||
search_term = query_expression.get('value', '')
|
||||
result = session.query(Bookmark).filter(
|
||||
or_(Bookmark.title.contains(search_term),
|
||||
Bookmark.description.contains(search_term))
|
||||
).all()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Query execution error: {e}")
|
||||
result = []
|
||||
|
||||
return result
|
||||
|
||||
result_ids = _evaluate_node(parsed, bookmarks)
|
||||
return [b for b in bookmarks if b["id"] in result_ids]
|
||||
|
||||
|
||||
def create_bookmarks_from_sync(sync_data: dict):
|
||||
"""
|
||||
Create bookmarks from sync response.
|
||||
|
||||
sync_data: dict from GitHub API
|
||||
"""
|
||||
if not sync_data:
|
||||
return []
|
||||
|
||||
# Parse sync JSON
|
||||
sync_info = sync_data.get('_links', {}).get('sync', {}).get('_links', {})
|
||||
|
||||
# Extract bookmarks
|
||||
bookmarks = []
|
||||
if 'objects' in sync_data:
|
||||
for obj in sync_data['objects']:
|
||||
if 'title' in obj:
|
||||
bookmarks.append({
|
||||
'url': obj.get('url', ''),
|
||||
'title': obj.get('title', ''),
|
||||
'description': obj.get('description', ''),
|
||||
'tags': obj.get('tags', []),
|
||||
'favicon_url': obj.get('favicon_url', ''),
|
||||
'path': obj.get('path', ''),
|
||||
'visit_count': obj.get('visit_count', 0)
|
||||
})
|
||||
|
||||
return bookmarks
|
||||
def _evaluate_node(node: Dict[str, Any], bookmarks: List[Dict[str, Any]]) -> set:
|
||||
operation = node.get("operation", "")
|
||||
|
||||
if operation == "OR":
|
||||
operands = node.get("operands", [])
|
||||
if not operands:
|
||||
return set()
|
||||
result = _evaluate_node(operands[0], bookmarks)
|
||||
for operand in operands[1:]:
|
||||
result |= _evaluate_node(operand, bookmarks)
|
||||
return result
|
||||
|
||||
if operation == "AND":
|
||||
operands = node.get("operands", [])
|
||||
if not operands:
|
||||
return set()
|
||||
result = _evaluate_node(operands[0], bookmarks)
|
||||
for operand in operands[1:]:
|
||||
result &= _evaluate_node(operand, bookmarks)
|
||||
return result
|
||||
|
||||
if operation == "XOR":
|
||||
operands = node.get("operands", [])
|
||||
if not operands:
|
||||
return set()
|
||||
result = _evaluate_node(operands[0], bookmarks)
|
||||
for operand in operands[1:]:
|
||||
result ^= _evaluate_node(operand, bookmarks)
|
||||
return result
|
||||
|
||||
if operation == "TERM":
|
||||
value = node.get("value", "").lower()
|
||||
return {
|
||||
b["id"]
|
||||
for b in bookmarks
|
||||
if value in b.get("title", "").lower()
|
||||
or value in b.get("description", "").lower()
|
||||
or value in b.get("url", "").lower()
|
||||
or value in b.get("notes", "").lower()
|
||||
}
|
||||
|
||||
if operation == "TERM_SET":
|
||||
terms = node.get("value", [])
|
||||
terms_lower = [t.lower() for t in terms]
|
||||
result = set()
|
||||
for b in bookmarks:
|
||||
text = (
|
||||
f"{b.get('title', '')} {b.get('description', '')} {b.get('url', '')} {b.get('notes', '')}"
|
||||
).lower()
|
||||
if any(term in text for term in terms_lower):
|
||||
result.add(b["id"])
|
||||
return result
|
||||
|
||||
if operation.startswith("FIELD:"):
|
||||
field = operation.split(":", 1)[1].upper()
|
||||
value = node.get("value", "").lower()
|
||||
return _evaluate_field(field, value, bookmarks)
|
||||
|
||||
logger.warning(f"Unknown operation: {operation}")
|
||||
return set()
|
||||
|
||||
|
||||
def _evaluate_field(field: str, value: str, bookmarks: List[Dict[str, Any]]) -> set:
|
||||
if field == "URL":
|
||||
return {b["id"] for b in bookmarks if value in b.get("url", "").lower()}
|
||||
if field == "TAG":
|
||||
return {
|
||||
b["id"]
|
||||
for b in bookmarks
|
||||
if any(value in t.lower() for t in (b.get("tags") or []))
|
||||
}
|
||||
if field == "TITLE":
|
||||
return {b["id"] for b in bookmarks if value in b.get("title", "").lower()}
|
||||
if field == "DESCRIPTION":
|
||||
return {b["id"] for b in bookmarks if value in b.get("description", "").lower()}
|
||||
if field == "PATH":
|
||||
return {b["id"] for b in bookmarks if value in (b.get("path") or "").lower()}
|
||||
if field == "ID":
|
||||
return {b["id"] for b in bookmarks if b.get("id") == value}
|
||||
|
||||
logger.warning(f"Unknown field: {field}")
|
||||
return set()
|
||||
|
||||
@@ -2,17 +2,18 @@
|
||||
LinkSyncServer - Query Parser for Expression Parser
|
||||
"""
|
||||
|
||||
import re
|
||||
from typing import Union, Dict, List, Any
|
||||
from enum import Enum
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
|
||||
class TokenType(Enum):
|
||||
OPERATOR = "OPERATOR"
|
||||
TERM = "TERM"
|
||||
VALUE = "VALUE"
|
||||
FIELD = "FIELD"
|
||||
LPAREN = "LPAREN"
|
||||
RPAREN = "RPAREN"
|
||||
COLON = "COLON"
|
||||
COMMA = "COMMA"
|
||||
|
||||
|
||||
class Token:
|
||||
@@ -27,325 +28,232 @@ class Token:
|
||||
|
||||
|
||||
class QuerySyntaxError(Exception):
|
||||
"""Syntax error in query expression."""
|
||||
def __init__(self, message: str, line: int = None, column: int = None):
|
||||
self.message = message
|
||||
self.line = line
|
||||
self.column = column
|
||||
super().__init__(f"{message} at line {line}, column {column}" if line and column else message)
|
||||
if line and column:
|
||||
super().__init__(f"{message} at line {line}, column {column}")
|
||||
else:
|
||||
super().__init__(message)
|
||||
|
||||
|
||||
def lex(expression: str) -> List[Token]:
|
||||
"""
|
||||
Lexical analysis - convert string to tokens.
|
||||
|
||||
Grammar:
|
||||
expression := query_item (OP query_item)*
|
||||
query_item := (expression) | value | term
|
||||
term := OP | value
|
||||
value := url:value | tag:value | title:value | description:value | id:value
|
||||
"""
|
||||
tokens = []
|
||||
pos = 0
|
||||
|
||||
# Operators
|
||||
operators = ['AND', 'OR', 'XOR']
|
||||
|
||||
line = 1
|
||||
column = 1
|
||||
|
||||
while pos < len(expression):
|
||||
# Skip whitespace
|
||||
if expression[pos].isspace():
|
||||
ch = expression[pos]
|
||||
|
||||
if ch in " \t":
|
||||
pos += 1
|
||||
column += 1
|
||||
continue
|
||||
|
||||
if ch == "\n":
|
||||
line += 1
|
||||
column = 1
|
||||
pos += 1
|
||||
continue
|
||||
|
||||
# Check for parentheses
|
||||
if expression[pos] == '(':
|
||||
tokens.append(Token(TokenType.LPAREN, '('))
|
||||
|
||||
if ch == "(":
|
||||
tokens.append(Token(TokenType.LPAREN, "(", line, column))
|
||||
pos += 1
|
||||
column += 1
|
||||
continue
|
||||
|
||||
if expression[pos] == ')':
|
||||
tokens.append(Token(TokenType.RPAREN, ')'))
|
||||
|
||||
if ch == ")":
|
||||
tokens.append(Token(TokenType.RPAREN, ")", line, column))
|
||||
pos += 1
|
||||
column += 1
|
||||
continue
|
||||
|
||||
# Check for operators (AND, OR, XOR)
|
||||
if expression[pos:pos+4] == 'AND':
|
||||
tokens.append(Token(TokenType.OPERATOR, 'AND'))
|
||||
pos += 4
|
||||
|
||||
if ch == ",":
|
||||
tokens.append(Token(TokenType.COMMA, ",", line, column))
|
||||
pos += 1
|
||||
column += 1
|
||||
continue
|
||||
|
||||
if expression[pos:pos+3] == 'OR':
|
||||
tokens.append(Token(TokenType.OPERATOR, 'OR'))
|
||||
|
||||
if expression[pos:].startswith("AND"):
|
||||
tokens.append(Token(TokenType.OPERATOR, "AND", line, column))
|
||||
pos += 3
|
||||
column += 3
|
||||
continue
|
||||
|
||||
if expression[pos:pos+4] == 'XOR':
|
||||
tokens.append(Token(TokenType.OPERATOR, 'XOR'))
|
||||
pos += 4
|
||||
|
||||
if expression[pos:].startswith("OR"):
|
||||
tokens.append(Token(TokenType.OPERATOR, "OR", line, column))
|
||||
pos += 2
|
||||
column += 2
|
||||
continue
|
||||
|
||||
# Check for url: prefix
|
||||
if expression[pos:pos+4] == 'url:':
|
||||
pos += 4
|
||||
# Find end of URL
|
||||
end = expression.find(':', pos)
|
||||
if end == -1 and expression[pos] == '://':
|
||||
# Find end of URL (next space or end of string)
|
||||
end = expression.find(' ', pos)
|
||||
if end == -1:
|
||||
end = len(expression)
|
||||
|
||||
tokens.append(Token(TokenType.TERM, expression[pos:end]))
|
||||
pos = end
|
||||
|
||||
if expression[pos:].startswith("XOR"):
|
||||
tokens.append(Token(TokenType.OPERATOR, "XOR", line, column))
|
||||
pos += 3
|
||||
column += 3
|
||||
continue
|
||||
|
||||
# Check for tag: prefix
|
||||
if expression[pos:pos+5] == 'tag:':
|
||||
pos += 5
|
||||
end = expression.find(':', pos)
|
||||
if end == -1:
|
||||
end = len(expression)
|
||||
tokens.append(Token(TokenType.TERM, expression[pos:end]))
|
||||
pos = end
|
||||
continue
|
||||
|
||||
# Check for title: or description: prefixes
|
||||
if expression[pos:pos+6] in ['title:', 'description:']:
|
||||
field = 'title' if expression[pos:pos+6] == 'title:' else 'description'
|
||||
pos += 6
|
||||
end = expression.find(':', pos)
|
||||
if end == -1 and expression[pos] == ':' :
|
||||
end = len(expression)
|
||||
|
||||
tokens.append(Token(TokenType.TERM, expression[pos:end]))
|
||||
pos = end
|
||||
continue
|
||||
|
||||
# Check for colon (key:value)
|
||||
if expression[pos] == ':':
|
||||
|
||||
if ch in ("'", '"'):
|
||||
quote = ch
|
||||
pos += 1
|
||||
# Get field name (key)
|
||||
field = expression[pos]
|
||||
pos += 1
|
||||
# Get value
|
||||
end = expression.find(' ', pos)
|
||||
if end == -1:
|
||||
end = len(expression)
|
||||
token_val = expression[pos:end].strip('"\'')
|
||||
tokens.append(Token(TokenType.VALUE, f'{field}:{token_val}'))
|
||||
continue
|
||||
|
||||
# Regular term - alphanumeric
|
||||
if expression[pos].isalnum() or expression[pos] in '-_':
|
||||
column += 1
|
||||
start = pos
|
||||
while pos < len(expression) and (expression[pos].isalnum() or expression[pos] in '-_./?=?&'):
|
||||
while pos < len(expression) and expression[pos] != quote:
|
||||
pos += 1
|
||||
tokens.append(Token(TokenType.TERM, expression[start:pos]))
|
||||
value = expression[start:pos]
|
||||
tokens.append(Token(TokenType.TERM, value, line, column))
|
||||
pos += 1
|
||||
column += len(value) + 1
|
||||
continue
|
||||
|
||||
# Unknown character - skip or error
|
||||
|
||||
if ch.isalnum() or ch in "-_.":
|
||||
start = pos
|
||||
start_col = column
|
||||
while pos < len(expression) and (expression[pos].isalnum() or expression[pos] in "-_.:/?&=%"):
|
||||
pos += 1
|
||||
value = expression[start:pos]
|
||||
|
||||
if ":" in value:
|
||||
field, _, field_value = value.partition(":")
|
||||
if field in ("url", "tag", "title", "description", "path", "id"):
|
||||
tokens.append(Token(TokenType.FIELD, field.upper(), line, start_col))
|
||||
tokens.append(Token(TokenType.TERM, field_value, line, start_col + len(field) + 1))
|
||||
column += pos - start
|
||||
continue
|
||||
|
||||
tokens.append(Token(TokenType.TERM, value, line, start_col))
|
||||
column += pos - start
|
||||
continue
|
||||
|
||||
pos += 1
|
||||
|
||||
column += 1
|
||||
|
||||
return tokens
|
||||
|
||||
|
||||
class ASTNode:
|
||||
"""Abstract Syntax Tree Node."""
|
||||
def __init__(self, operator: str, children: List[Union[ASTNode, str, dict]] = None):
|
||||
self.operator = operator
|
||||
self.children = children if children else []
|
||||
|
||||
def __init__(self, node_type: str, value: Any = None, children: Optional[List["ASTNode"]] = None):
|
||||
self.node_type = node_type
|
||||
self.value = value
|
||||
self.children = children or []
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
if self.children:
|
||||
return {
|
||||
"operation": self.node_type,
|
||||
"operands": [child.to_dict() for child in self.children],
|
||||
}
|
||||
if self.value is not None:
|
||||
return {"operation": self.node_type, "value": self.value}
|
||||
return {"operation": self.node_type}
|
||||
|
||||
def __repr__(self):
|
||||
return f"AST({self.operator}, {self.children})"
|
||||
|
||||
|
||||
def parse_operator(token: Token) -> str:
|
||||
"""Convert operator token to Python operator string."""
|
||||
if token.type != TokenType.OPERATOR:
|
||||
raise QuerySyntaxError(f"Expected operator, got {token.value}")
|
||||
|
||||
if token.value == 'AND':
|
||||
return 'and'
|
||||
elif token.value == 'OR':
|
||||
return 'or'
|
||||
elif token.value == 'XOR':
|
||||
return 'xor'
|
||||
else:
|
||||
raise QuerySyntaxError(f"Unknown operator: {token.value}")
|
||||
return f"ASTNode({self.node_type}, {self.value!r}, {self.children})"
|
||||
|
||||
|
||||
class QueryParser:
|
||||
"""Parser for query expressions."""
|
||||
|
||||
def __init__(self):
|
||||
self.tokens = []
|
||||
self.pos = 0
|
||||
self.current_token = None
|
||||
self.error = False
|
||||
|
||||
def error(self, message: str):
|
||||
"""Record and return error."""
|
||||
self.error = True
|
||||
return QuerySyntaxError(message)
|
||||
|
||||
def parse_expression(self) -> List[ASTNode]:
|
||||
"""Parse top-level expression (list of clauses)."""
|
||||
if not self.tokens:
|
||||
return []
|
||||
|
||||
expressions = []
|
||||
|
||||
# Parse first clause
|
||||
expr = self.parse_or()
|
||||
if expr:
|
||||
expressions.append(expr)
|
||||
|
||||
# Parse remaining clauses
|
||||
while self.current_token and self.current_token.value in ['AND', 'OR', 'XOR']:
|
||||
operator = self.current_token.value
|
||||
self.pos += 1
|
||||
expressions.append(operator)
|
||||
expr2 = self.parse_or()
|
||||
if expr2:
|
||||
expressions.append(expr2)
|
||||
|
||||
return expressions
|
||||
|
||||
def parse_or(self) -> Union[ASTNode, None]:
|
||||
"""Parse OR clause."""
|
||||
if not self.current_token:
|
||||
self.tokens: List[Token] = []
|
||||
self.pos: int = 0
|
||||
|
||||
def _current(self) -> Optional[Token]:
|
||||
if self.pos < len(self.tokens):
|
||||
return self.tokens[self.pos]
|
||||
return None
|
||||
|
||||
def _advance(self) -> Optional[Token]:
|
||||
token = self._current()
|
||||
self.pos += 1
|
||||
return token
|
||||
|
||||
def _expect(self, token_type: TokenType, value: str = None) -> Token:
|
||||
token = self._current()
|
||||
if token is None:
|
||||
raise QuerySyntaxError(f"Expected {token_type.value}, got end of input")
|
||||
if token.type != token_type:
|
||||
raise QuerySyntaxError(f"Expected {token_type.value}, got {token.type.value}")
|
||||
if value is not None and token.value != value:
|
||||
raise QuerySyntaxError(f"Expected '{value}', got '{token.value}'")
|
||||
return self._advance()
|
||||
|
||||
def parse(self, expression: str) -> Optional[Dict[str, Any]]:
|
||||
if not expression or not expression.strip():
|
||||
return None
|
||||
|
||||
return self.parse_and()
|
||||
|
||||
def parse_and(self) -> Union[ASTNode, None]:
|
||||
"""Parse AND clause."""
|
||||
left = self.parse_xor()
|
||||
|
||||
while self.current_token and self.current_token.value == 'OR':
|
||||
operator = self.parse_operator(self.current_token)
|
||||
right = self.parse_xor()
|
||||
left = ASTNode(operator, [left, right])
|
||||
|
||||
return left
|
||||
|
||||
def parse_xor(self) -> Union[ASTNode, None]:
|
||||
"""Parse XOR clause."""
|
||||
left = self.parse_term()
|
||||
|
||||
while self.current_token and self.current_token.value == 'AND':
|
||||
operator = self.parse_operator(self.current_token)
|
||||
right = self.parse_term()
|
||||
left = ASTNode(operator, [left, right])
|
||||
|
||||
return left
|
||||
|
||||
def parse_term(self):
|
||||
"""Parse term."""
|
||||
if self.error:
|
||||
return None
|
||||
|
||||
if self.pos >= len(self.tokens):
|
||||
return None
|
||||
|
||||
token = self.current_token
|
||||
|
||||
# Check for parentheses (subexpression)
|
||||
if token and token.value == '(':
|
||||
self.pos += 1
|
||||
self.current_token = self.tokens[self.pos] if self.pos < len(self.tokens) else None
|
||||
sub_expr = self.parse_expression()
|
||||
if not sub_expr and not self.error:
|
||||
return None
|
||||
if self.error:
|
||||
return None
|
||||
if self.current_token and self.current_token.value == ')':
|
||||
self.pos += 1
|
||||
return sub_expr
|
||||
elif token and token.value != ')':
|
||||
return token
|
||||
|
||||
def parse_value(self) -> Union[None, str]:
|
||||
"""Parse value term."""
|
||||
if self.error:
|
||||
return None
|
||||
|
||||
token = self.current_token
|
||||
if not token or token.type != TokenType.TERM:
|
||||
return None
|
||||
|
||||
# Extract URL, TAG, etc.
|
||||
term = token.value
|
||||
|
||||
# Check for url: value
|
||||
if term.startswith('url:'):
|
||||
query = {'operation': 'TERM', 'value': term[4:]}
|
||||
self.pos += 1
|
||||
self.current_token = self.tokens[self.pos] if self.pos < len(self.tokens) else None
|
||||
return query
|
||||
elif term.startswith('tag:'):
|
||||
query = {'operation': 'TERM', 'value': term[4:]}
|
||||
self.pos += 1
|
||||
self.current_token = self.tokens[self.pos] if self.pos < len(self.tokens) else None
|
||||
return query
|
||||
elif term.startswith('title:'):
|
||||
query = {'operation': 'TERM', 'value': term[6:]}
|
||||
self.pos += 1
|
||||
self.current_token = self.tokens[self.pos] if self.pos < len(self.tokens) else None
|
||||
return query
|
||||
elif term.startswith('description:'):
|
||||
query = {'operation': 'TERM', 'value': term[12:]}
|
||||
self.pos += 1
|
||||
self.current_token = self.tokens[self.pos] if self.pos < len(self.tokens) else None
|
||||
return query
|
||||
elif term.startswith('id:'):
|
||||
query = {'operation': 'EQUALS', 'value': term[3:]}
|
||||
self.pos += 1
|
||||
self.current_token = self.tokens[self.pos] if self.pos < len(self.tokens) else None
|
||||
return query
|
||||
elif term.startswith('"') or term.startswith("'"):
|
||||
# Direct value
|
||||
return term
|
||||
else:
|
||||
self.error(f"Unknown term: {term}")
|
||||
return None
|
||||
|
||||
def parse(self, expression: str) -> List[ASTNode]:
|
||||
"""Parse complete expression."""
|
||||
if not expression:
|
||||
return []
|
||||
|
||||
# Check for empty expression
|
||||
if not expression.strip():
|
||||
return []
|
||||
|
||||
# Lexical analysis
|
||||
|
||||
self.tokens = lex(expression)
|
||||
self.pos = 0
|
||||
self.current_token = self.tokens[0] if self.tokens else None
|
||||
|
||||
|
||||
if not self.tokens:
|
||||
return []
|
||||
|
||||
# Parse expression into AST
|
||||
expr = self.parse_expression()
|
||||
|
||||
# Return AST as dict
|
||||
return [self.ast_to_dict(node) for node in expr] if expr else []
|
||||
|
||||
def ast_to_dict(self, node, indent=0):
|
||||
"""Convert AST node to dict representation."""
|
||||
if isinstance(node, ASTNode):
|
||||
if node.children:
|
||||
return {
|
||||
"operation": node.operator,
|
||||
"operands": [self.ast_to_dict(child, indent + 1) for child in node.children]
|
||||
}
|
||||
else:
|
||||
return node.value
|
||||
elif isinstance(node, str):
|
||||
return None
|
||||
|
||||
node = self._parse_or()
|
||||
|
||||
if self._current() is not None:
|
||||
raise QuerySyntaxError(f"Unexpected token: {self._current().value}")
|
||||
|
||||
return node.to_dict() if node else None
|
||||
|
||||
def _parse_or(self) -> ASTNode:
|
||||
left = self._parse_and()
|
||||
while self._current() and self._current().type == TokenType.OPERATOR and self._current().value == "OR":
|
||||
self._advance()
|
||||
right = self._parse_and()
|
||||
left = ASTNode("OR", children=[left, right])
|
||||
return left
|
||||
|
||||
def _parse_and(self) -> ASTNode:
|
||||
left = self._parse_xor()
|
||||
while self._current() and self._current().type == TokenType.OPERATOR and self._current().value == "AND":
|
||||
self._advance()
|
||||
right = self._parse_xor()
|
||||
left = ASTNode("AND", children=[left, right])
|
||||
return left
|
||||
|
||||
def _parse_xor(self) -> ASTNode:
|
||||
left = self._parse_primary()
|
||||
while self._current() and self._current().type == TokenType.OPERATOR and self._current().value == "XOR":
|
||||
self._advance()
|
||||
right = self._parse_primary()
|
||||
left = ASTNode("XOR", children=[left, right])
|
||||
return left
|
||||
|
||||
def _parse_primary(self) -> ASTNode:
|
||||
token = self._current()
|
||||
if token is None:
|
||||
raise QuerySyntaxError("Unexpected end of input")
|
||||
|
||||
if token.type == TokenType.LPAREN:
|
||||
self._advance()
|
||||
node = self._parse_or()
|
||||
self._expect(TokenType.RPAREN)
|
||||
return node
|
||||
elif isinstance(node, dict):
|
||||
return node
|
||||
else:
|
||||
return str(node)
|
||||
|
||||
if token.type == TokenType.FIELD:
|
||||
field_token = self._advance()
|
||||
value_token = self._current()
|
||||
if value_token and value_token.type == TokenType.TERM:
|
||||
self._advance()
|
||||
return ASTNode(f"FIELD:{field_token.value}", value=value_token.value)
|
||||
return ASTNode(f"FIELD:{field_token.value}", value="")
|
||||
|
||||
if token.type == TokenType.TERM:
|
||||
self._advance()
|
||||
return self._parse_term(token)
|
||||
|
||||
raise QuerySyntaxError(f"Unexpected token: {token.value}")
|
||||
|
||||
def _parse_term(self, token: Token) -> ASTNode:
|
||||
next_token = self._current()
|
||||
|
||||
if next_token and next_token.type == TokenType.COMMA:
|
||||
terms = [token.value]
|
||||
while self._current() and self._current().type == TokenType.COMMA:
|
||||
self._advance()
|
||||
term_token = self._current()
|
||||
if term_token and term_token.type == TokenType.TERM:
|
||||
terms.append(term_token.value)
|
||||
self._advance()
|
||||
return ASTNode("TERM_SET", value=terms)
|
||||
|
||||
return ASTNode("TERM", value=token.value)
|
||||
|
||||
Reference in New Issue
Block a user