diff options
| author | German M. Bravo <german.mb@deipi.com> | 2011-12-25 12:34:24 -0600 |
|---|---|---|
| committer | German M. Bravo <german.mb@deipi.com> | 2011-12-25 12:34:24 -0600 |
| commit | 05e44e1fd455de2402192edb61052f7dccac7faa (patch) | |
| tree | 58e84786296673fcc6c94a4985bee6ff980cb1e8 /scss/src | |
| parent | 759565792cda8da5c8d7657176dbc3745267e220 (diff) | |
| download | pyscss-05e44e1fd455de2402192edb61052f7dccac7faa.tar.gz | |
C extension modules for scanning and locating blocks finished (scanning seems slower atm)
Diffstat (limited to 'scss/src')
| -rw-r--r-- | scss/src/_scss.c | 902 | ||||
| -rw-r--r-- | scss/src/block_locator.c | 517 | ||||
| -rw-r--r-- | scss/src/block_locator.h | 50 | ||||
| -rwxr-xr-x | scss/src/block_locator.py | 559 | ||||
| -rwxr-xr-x | scss/src/build.sh | 2 | ||||
| -rw-r--r-- | scss/src/grammar/grammar.g | 13 | ||||
| -rw-r--r-- | scss/src/grammar/grammar.py | 80 | ||||
| -rw-r--r-- | scss/src/grammar/yapps2.py | 9 | ||||
| -rw-r--r-- | scss/src/grammar/yappsrt.py | 25 | ||||
| -rw-r--r-- | scss/src/scanner.c | 406 | ||||
| -rw-r--r-- | scss/src/scanner.h | 62 | ||||
| -rwxr-xr-x[-rw-r--r--] | scss/src/scanner.py | 698 |
12 files changed, 2282 insertions, 1041 deletions
diff --git a/scss/src/_scss.c b/scss/src/_scss.c index 85b42dd..038905b 100644 --- a/scss/src/_scss.c +++ b/scss/src/_scss.c @@ -9,11 +9,8 @@ * Copyright (c) 2011 German M. Bravo (Kronuz), All rights reserved. */ #include <Python.h> - -/* Counter type */ -staticforward PyTypeObject scss_BlockLocatorType; - -#undef DEBUG +#include "block_locator.h" +#include "scanner.h" void reprl(char *str, int len) { char c, @@ -43,547 +40,419 @@ void repr(char *str, char *str2) { reprl(str, (int)(str2 - str)); } -typedef struct { - int error; - int lineno; - char *selprop; - int selprop_sz; - char *codestr; - int codestr_sz; -} scss_Block; +/* BlockLocator */ +staticforward PyTypeObject scss_BlockLocatorType; typedef struct { PyObject_HEAD - char *exc; - char *_codestr; - char *codestr; - char *codestr_ptr; - int codestr_sz; - int lineno; - int par; - char instr; - int depth; - int skip; - char *thin; - char *init; - char *safe; - char *lose; - char *start; - char *end; - scss_Block block; + BlockLocator *locator; } scss_BlockLocator; - -int _strip(char *begin, char *end, int *lineno) { - // " 1\0 some, \n 2\0 aca " - int _cnt, - cnt = 0, - pass = 1, - addnl = 0; - char c, - *line = NULL, - *first = begin, - *last = begin, - *write = lineno ? begin : NULL; - while (begin < end) { - c = *begin; - if (c == '\0') { - if (line == NULL) { - line = first; - if (lineno) { - sscanf(line, "%d", lineno); - } - } - first = last = begin + 1; - pass = 1; - } else if (c == '\n') { - _cnt = (int)(last - first); - if (_cnt > 0) { - cnt += _cnt + addnl; - if (write != NULL) { - if (addnl) { - *write++ = '\n'; - } - while (first < last) { - *write++ = *first++; - } - *write = '\0'; - addnl = 1; - } - } - first = last = begin + 1; - pass = 1; - } else if (c == ' ' || c == '\t') { - if (pass) { - first = last = begin + 1; - } - } else { - last = begin + 1; - pass = 0; - } - begin++; - } - _cnt = (int)(last - first); - if (_cnt > 0) { - cnt += _cnt + addnl; - if (write != NULL) { - if (addnl) { - *write++ = '\n'; - } - while (first < last) { - *write++ = *first++; - } - *write = '\0'; - } +static int +scss_BlockLocator_init(scss_BlockLocator *self, PyObject *args, PyObject *kwds) +{ + char *codestr; + int codestr_sz; + if (!PyArg_ParseTuple(args, "s#", &codestr, &codestr_sz)) { + return -1; } - return cnt; -} - - -typedef void scss_Callback(scss_BlockLocator*); - + self->locator = BlockLocator_new(codestr, codestr_sz); -void _start_string(scss_BlockLocator *self) { #ifdef DEBUG - PySys_WriteStderr("_start_string\n"); + PySys_WriteStderr("Scss BlockLocator object initialized! (%lu)\n", sizeof(scss_BlockLocator)); #endif - // A string starts - self->instr = *(self->codestr_ptr); -} -void _end_string(scss_BlockLocator *self) { - #ifdef DEBUG - PySys_WriteStderr("_end_string\n"); - #endif - // A string ends (FIXME: needs to accept escaped characters) - self->instr = 0; + return 0; } -void _start_parenthesis(scss_BlockLocator *self) { - #ifdef DEBUG - PySys_WriteStderr("_start_parenthesis\n"); - #endif - // parenthesis begins: - self->par++; - self->thin = NULL; - self->safe = self->codestr_ptr + 1; -} +static void +scss_BlockLocator_dealloc(scss_BlockLocator *self) +{ + BlockLocator_del(self->locator); + + self->ob_type->tp_free((PyObject*)self); -void _end_parenthesis(scss_BlockLocator *self) { #ifdef DEBUG - PySys_WriteStderr("_end_parenthesis\n"); + PySys_WriteStderr("Scss BlockLocator object destroyed!\n"); #endif - self->par--; } -void _flush_properties(scss_BlockLocator *self) { - #ifdef DEBUG - PySys_WriteStderr("_flush_properties\n"); - #endif - // Flush properties - int len, lineno = -1; - if (self->lose <= self->init) { - len = _strip(self->lose, self->init, &lineno); - if (len) { - if (lineno != -1) { - self->lineno = lineno; - } +scss_BlockLocator* +scss_BlockLocator_iter(scss_BlockLocator *self) +{ + Py_INCREF(self); + return self; +} - self->block.selprop = self->lose; - self->block.selprop_sz = len; - self->block.codestr = NULL; - self->block.codestr_sz = 0; - self->block.lineno = self->lineno; - self->block.error = -1; - } - self->lose = self->init; +PyObject* +scss_BlockLocator_iternext(scss_BlockLocator *self) +{ + Block *block; + + block = BlockLocator_iternext(self->locator); + + if (block->error == -1) { + return Py_BuildValue( + "is#s#", + block->lineno, + block->selprop, + block->selprop_sz, + block->codestr, + block->codestr_sz + ); } -} -void _start_block1(scss_BlockLocator *self) { - #ifdef DEBUG - PySys_WriteStderr("_start_block1\n"); - #endif - // Start block: - if (self->codestr_ptr > self->codestr && *(self->codestr_ptr - 1) == '#') { - self->skip = 1; - } else { - self->start = self->codestr_ptr; - if (self->thin != NULL && _strip(self->thin, self->codestr_ptr, NULL)) { - self->init = self->thin; - } - _flush_properties(self); - self->thin = NULL; + if (self->locator->exc) { + PyErr_SetString(PyExc_Exception, self->locator->exc); + return NULL; } - self->depth++; -} -void _start_block(scss_BlockLocator *self) { - #ifdef DEBUG - PySys_WriteStderr("_start_block\n"); - #endif - // Start block: - self->depth++; + /* Raising of standard StopIteration exception with empty value. */ + PyErr_SetNone(PyExc_StopIteration); + return NULL; } -void _end_block1(scss_BlockLocator *self) { - #ifdef DEBUG - PySys_WriteStderr("_end_block1\n"); - #endif - // Block ends: - int len, lineno = -1; - self->depth--; - if (!self->skip) { - self->end = self->codestr_ptr; - len = _strip(self->init, self->start, &lineno); - if (lineno != -1) { - self->lineno = lineno; - } +/* Type definition */ - self->block.selprop = self->init; - self->block.selprop_sz = len; - self->block.codestr = (self->start + 1); - self->block.codestr_sz = (int)(self->end - (self->start + 1)); - self->block.lineno = self->lineno; - self->block.error = -1; +static PyTypeObject scss_BlockLocatorType = { + PyObject_HEAD_INIT(NULL) + 0, /* ob_size */ + "scss._BlockLocator", /* tp_name */ + sizeof(scss_BlockLocator), /* tp_basicsize */ + 0, /* tp_itemsize */ + (destructor)scss_BlockLocator_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_ITER, /* tp_flags */ + "Internal BlockLocator iterator object.", /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + (getiterfunc)scss_BlockLocator_iter, /* tp_iter: __iter__() method */ + (iternextfunc)scss_BlockLocator_iternext, /* tp_iternext: next() method */ + 0, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + (initproc)scss_BlockLocator_init, /* tp_init */ +}; - self->init = self->safe = self->lose = self->end + 1; - self->thin = NULL; - } - self->skip = 0; -} -void _end_block(scss_BlockLocator *self) { - #ifdef DEBUG - PySys_WriteStderr("_end_block\n"); - #endif - // Block ends: - self->depth--; -} +/* Scanner */ +static PyObject *PyExc_scss_NoMoreTokens; -void _end_property(scss_BlockLocator *self) { - #ifdef DEBUG - PySys_WriteStderr("_end_property\n"); - #endif - // End of property (or block): - int len, lineno = -1; - self->init = self->codestr_ptr; - if (self->lose <= self->init) { - len = _strip(self->lose, self->init, &lineno); - if (len) { - if (lineno != -1) { - self->lineno = lineno; - } +staticforward PyTypeObject scss_ScannerType; - self->block.selprop = self->lose; - self->block.selprop_sz = len; - self->block.codestr = NULL; - self->block.codestr_sz = 0; - self->block.lineno = self->lineno; - self->block.error = -1; - } - self->init = self->safe = self->lose = self->codestr_ptr + 1; - } - self->thin = NULL; -} +typedef struct { + PyObject_HEAD + Scanner *scanner; +} scss_Scanner; -void _mark_safe(scss_BlockLocator *self) { - #ifdef DEBUG - PySys_WriteStderr("_mark_safe\n"); - #endif - // We are on a safe zone - if (self->thin != NULL && _strip(self->thin, self->codestr_ptr, NULL)) { - self->init = self->thin; - } - self->thin = NULL; - self->safe = self->codestr_ptr + 1; -} -void _mark_thin(scss_BlockLocator *self) { - #ifdef DEBUG - PySys_WriteStderr("_mark_thin\n"); - #endif - // Step on thin ice, if it breaks, it breaks here - if (self->thin != NULL && _strip(self->thin, self->codestr_ptr, NULL)) { - self->init = self->thin; - self->thin = self->codestr_ptr + 1; - } else if (self->thin == NULL && _strip(self->safe, self->codestr_ptr, NULL)) { - self->thin = self->codestr_ptr + 1; +static PyObject * +scss_Scanner_rewind(scss_Scanner *self, PyObject *args) +{ + int token_num; + if (PyArg_ParseTuple(args, "i", &token_num)) { + Scanner_rewind(self->scanner, token_num); } + return (PyObject *)Py_None; } -int scss_function_map_initialized = 0; -scss_Callback* scss_function_map[256 * 256 * 2 * 3]; // (c, instr, par, depth) -void init_function_map(void) { - int i; - - if (scss_function_map_initialized) - return; - scss_function_map_initialized = 1; - - for (i = 0; i < 256 * 256 * 2 * 3; i++) { - scss_function_map[i] = NULL; +static PyObject * +scss_Scanner_scan(scss_Scanner *self, PyObject *args) +{ + PyObject *item; + int i, is_tuple; + long size; + + Token *p_token; + + PyObject *restrictions; + Pattern _restrictions[100]; + int restrictions_sz = 0; + + if (PyArg_ParseTuple(args, "|O", &restrictions)) { + is_tuple = PyTuple_Check(restrictions); + if (is_tuple || PyList_Check(restrictions)) { + size = is_tuple ? PyTuple_Size(restrictions) : PyList_Size(restrictions); + for (i = 0; i < size; ++i) { + item = is_tuple ? PyTuple_GetItem(restrictions, i) : PyList_GetItem(restrictions, i); + if (PyString_Check(item)) { + _restrictions[restrictions_sz].tok = PyString_AsString(item); + _restrictions[restrictions_sz].expr = NULL; + restrictions_sz++; + } + } + } + p_token = Scanner_token(self->scanner, self->scanner->tokens_sz, _restrictions, restrictions_sz); + if (p_token == (Token *)SCANNER_EXC_BAD_TOKEN) { + PyErr_SetString(PyExc_SyntaxError, self->scanner->exc); + return NULL; + } + if (p_token == (Token *)SCANNER_EXC_RESTRICTED) { + PyErr_SetString(PyExc_SyntaxError, self->scanner->exc); + return NULL; + } + if (p_token == (Token *)SCANNER_EXC_UNIMPLEMENTED) { + PyErr_SetString(PyExc_NotImplementedError, self->scanner->exc); + return NULL; + } + if (p_token == (Token *)SCANNER_EXC_NO_MORE_TOKENS) { + PyErr_SetNone(PyExc_scss_NoMoreTokens); + return NULL; + } + if (p_token < 0) { + PyErr_SetNone(PyExc_Exception); + return NULL; + } + return Py_BuildValue( + "iiss#", + p_token->string - self->scanner->input, + p_token->string - self->scanner->input + p_token->string_sz, + p_token->regex->tok, + p_token->string, + p_token->string_sz + ); } - scss_function_map[(int)'\"' + 256*0 + 256*256*0 + 256*256*2*0] = _start_string; - scss_function_map[(int)'\'' + 256*0 + 256*256*0 + 256*256*2*0] = _start_string; - scss_function_map[(int)'\"' + 256*0 + 256*256*1 + 256*256*2*0] = _start_string; - scss_function_map[(int)'\'' + 256*0 + 256*256*1 + 256*256*2*0] = _start_string; - scss_function_map[(int)'\"' + 256*0 + 256*256*0 + 256*256*2*1] = _start_string; - scss_function_map[(int)'\'' + 256*0 + 256*256*0 + 256*256*2*1] = _start_string; - scss_function_map[(int)'\"' + 256*0 + 256*256*1 + 256*256*2*1] = _start_string; - scss_function_map[(int)'\'' + 256*0 + 256*256*1 + 256*256*2*1] = _start_string; - scss_function_map[(int)'\"' + 256*0 + 256*256*0 + 256*256*2*2] = _start_string; - scss_function_map[(int)'\'' + 256*0 + 256*256*0 + 256*256*2*2] = _start_string; - scss_function_map[(int)'\"' + 256*0 + 256*256*1 + 256*256*2*2] = _start_string; - scss_function_map[(int)'\'' + 256*0 + 256*256*1 + 256*256*2*2] = _start_string; - - scss_function_map[(int)'\"' + 256*(int)'\"' + 256*256*0 + 256*256*2*0] = _end_string; - scss_function_map[(int)'\'' + 256*(int)'\'' + 256*256*0 + 256*256*2*0] = _end_string; - scss_function_map[(int)'\"' + 256*(int)'\"' + 256*256*1 + 256*256*2*0] = _end_string; - scss_function_map[(int)'\'' + 256*(int)'\'' + 256*256*1 + 256*256*2*0] = _end_string; - scss_function_map[(int)'\"' + 256*(int)'\"' + 256*256*0 + 256*256*2*1] = _end_string; - scss_function_map[(int)'\'' + 256*(int)'\'' + 256*256*0 + 256*256*2*1] = _end_string; - scss_function_map[(int)'\"' + 256*(int)'\"' + 256*256*1 + 256*256*2*1] = _end_string; - scss_function_map[(int)'\'' + 256*(int)'\'' + 256*256*1 + 256*256*2*1] = _end_string; - scss_function_map[(int)'\"' + 256*(int)'\"' + 256*256*0 + 256*256*2*2] = _end_string; - scss_function_map[(int)'\'' + 256*(int)'\'' + 256*256*0 + 256*256*2*2] = _end_string; - scss_function_map[(int)'\"' + 256*(int)'\"' + 256*256*1 + 256*256*2*2] = _end_string; - scss_function_map[(int)'\'' + 256*(int)'\'' + 256*256*1 + 256*256*2*2] = _end_string; - - scss_function_map[(int)'(' + 256*0 + 256*256*0 + 256*256*2*0] = _start_parenthesis; - scss_function_map[(int)'(' + 256*0 + 256*256*1 + 256*256*2*0] = _start_parenthesis; - scss_function_map[(int)'(' + 256*0 + 256*256*0 + 256*256*2*1] = _start_parenthesis; - scss_function_map[(int)'(' + 256*0 + 256*256*1 + 256*256*2*1] = _start_parenthesis; - scss_function_map[(int)'(' + 256*0 + 256*256*0 + 256*256*2*2] = _start_parenthesis; - scss_function_map[(int)'(' + 256*0 + 256*256*1 + 256*256*2*2] = _start_parenthesis; - - scss_function_map[(int)')' + 256*0 + 256*256*1 + 256*256*2*0] = _end_parenthesis; - scss_function_map[(int)')' + 256*0 + 256*256*1 + 256*256*2*1] = _end_parenthesis; - scss_function_map[(int)')' + 256*0 + 256*256*1 + 256*256*2*2] = _end_parenthesis; - - scss_function_map[(int)'{' + 256*0 + 256*256*0 + 256*256*2*0] = _start_block1; - scss_function_map[(int)'{' + 256*0 + 256*256*0 + 256*256*2*1] = _start_block; - scss_function_map[(int)'{' + 256*0 + 256*256*0 + 256*256*2*2] = _start_block; - - scss_function_map[(int)'}' + 256*0 + 256*256*0 + 256*256*2*1] = _end_block1; - scss_function_map[(int)'}' + 256*0 + 256*256*0 + 256*256*2*2] = _end_block; - - scss_function_map[(int)';' + 256*0 + 256*256*0 + 256*256*2*0] = _end_property; - - scss_function_map[(int)',' + 256*0 + 256*256*0 + 256*256*2*0] = _mark_safe; - - scss_function_map[(int)'\n' + 256*0 + 256*256*0 + 256*256*2*0] = _mark_thin; - - scss_function_map[0 + 256*0 + 256*256*0 + 256*256*2*0] = _flush_properties; - scss_function_map[0 + 256*0 + 256*256*0 + 256*256*2*1] = _flush_properties; - scss_function_map[0 + 256*0 + 256*256*0 + 256*256*2*2] = _flush_properties; - #ifdef DEBUG - PySys_WriteStderr("Scss function maps initialized!\n"); - #endif + return (PyObject *)Py_None; } - -/* constructor */ - static PyObject * -scss_BlockLocator_new(char *codestr, int codestr_sz) +scss_Scanner_token(scss_Scanner *self, PyObject *args) { - scss_BlockLocator *self; - - init_function_map(); - - self = PyObject_New(scss_BlockLocator, &scss_BlockLocatorType);; - if (self) { - self->_codestr = (char *)malloc(codestr_sz); - memcpy(self->_codestr, codestr, codestr_sz); - self->codestr_sz = codestr_sz; - self->codestr = (char *)malloc(self->codestr_sz); - memcpy(self->codestr, self->_codestr, self->codestr_sz); - self->codestr_ptr = self->codestr; - self->lineno = 0; - self->par = 0; - self->instr = 0; - self->depth = 0; - self->skip = 0; - self->thin = self->codestr; - self->init = self->codestr; - self->safe = self->codestr; - self->lose = self->codestr; - self->start = NULL; - self->end = NULL; - self->exc = NULL; - - #ifdef DEBUG - PySys_WriteStderr("Scss BlockLocator object initialized! (%lu)\n", sizeof(scss_BlockLocator)); - #endif + PyObject *item; + int i, is_tuple; + long size; + + Token *p_token; + + int token_num; + PyObject *restrictions; + Pattern _restrictions[100]; + int restrictions_sz = 0; + + if (PyArg_ParseTuple(args, "i|O", &token_num, &restrictions)) { + is_tuple = PyTuple_Check(restrictions); + if (is_tuple || PyList_Check(restrictions)) { + size = is_tuple ? PyTuple_Size(restrictions) : PyList_Size(restrictions); + for (i = 0; i < size; ++i) { + item = is_tuple ? PyTuple_GetItem(restrictions, i) : PyList_GetItem(restrictions, i); + if (PyString_Check(item)) { + _restrictions[restrictions_sz].tok = PyString_AsString(item); + _restrictions[restrictions_sz].expr = NULL; + restrictions_sz++; + } + } + } + p_token = Scanner_token(self->scanner, token_num, _restrictions, restrictions_sz); + if (p_token == (Token *)SCANNER_EXC_BAD_TOKEN) { + PyErr_SetString(PyExc_SyntaxError, self->scanner->exc); + return NULL; + } + if (p_token == (Token *)SCANNER_EXC_RESTRICTED) { + PyErr_SetString(PyExc_SyntaxError, self->scanner->exc); + return NULL; + } + if (p_token == (Token *)SCANNER_EXC_UNIMPLEMENTED) { + PyErr_SetString(PyExc_NotImplementedError, self->scanner->exc); + return NULL; + } + if (p_token == (Token *)SCANNER_EXC_NO_MORE_TOKENS) { + PyErr_SetNone(PyExc_scss_NoMoreTokens); + return NULL; + } + if (p_token < 0) { + PyErr_SetNone(PyExc_Exception); + return NULL; + } + return Py_BuildValue( + "iiss#", + p_token->string - self->scanner->input, + p_token->string - self->scanner->input + p_token->string_sz, + p_token->regex->tok, + p_token->string, + p_token->string_sz + ); } - - return (PyObject *)self; + return (PyObject *)Py_None; } -static void -scss_BlockLocator_rewind(scss_BlockLocator *self) +static PyObject * +scss_Scanner_reset(scss_Scanner *self, PyObject *args, PyObject *kwds) { - free(self->codestr); - self->codestr = (char *)malloc(self->codestr_sz); - memcpy(self->codestr, self->_codestr, self->codestr_sz); - self->codestr_ptr = self->codestr; - self->lineno = 0; - self->par = 0; - self->instr = 0; - self->depth = 0; - self->skip = 0; - self->thin = self->codestr; - self->init = self->codestr; - self->safe = self->codestr; - self->lose = self->codestr; - self->start = NULL; - self->end = NULL; - self->exc = NULL; + char *input = NULL; + int input_sz = 0; - #ifdef DEBUG - PySys_WriteStderr("Scss BlockLocator object rewound!\n"); - #endif + if (PyArg_ParseTuple(args, "|s#", &input, &input_sz)) { + Scanner_reset(self->scanner, input, input_sz); + } + + return (PyObject *)Py_None; } -static void -scss_BlockLocator_dealloc(scss_BlockLocator *self) +static int +scss_Scanner_init(scss_Scanner *self, PyObject *args, PyObject *kwds) { - free(self->codestr); - free(self->_codestr); + PyObject *item, *item0, *item1; + int i, is_tuple, _is_tuple; + long size; + + PyObject *patterns, *ignore; + Pattern _patterns[100]; + int patterns_sz = 0; + Pattern _ignore[100]; + int ignore_sz = 0; + char *input = NULL; + int input_sz = 0; + + if (!PyArg_ParseTuple(args, "OO|s#", &patterns, &ignore, &input, &input_sz)) { + return -1; + } + if (!Scanner_initialized()) { + is_tuple = PyTuple_Check(patterns); + if (is_tuple || PyList_Check(patterns)) { + size = is_tuple ? PyTuple_Size(patterns) : PyList_Size(patterns); + for (i = 0; i < size; ++i) { + item = is_tuple ? PyTuple_GetItem(patterns, i) : PyList_GetItem(patterns, i); + _is_tuple = PyTuple_Check(item); + if (_is_tuple || PyList_Check(item)) { + item0 = _is_tuple ? PyTuple_GetItem(item, 0) : PyList_GetItem(item, 0); + item1 = _is_tuple ? PyTuple_GetItem(item, 1) : PyList_GetItem(item, 1); + if (PyString_Check(item0) && PyString_Check(item1)) { + _patterns[patterns_sz].tok = PyString_AsString(item0); + _patterns[patterns_sz].expr = PyString_AsString(item1); + patterns_sz++; + } + } + } + } + Scanner_initialize(_patterns, patterns_sz); + } + is_tuple = PyTuple_Check(ignore); + if (is_tuple || PyList_Check(ignore)) { + size = is_tuple ? PyTuple_Size(ignore) : PyList_Size(ignore); + for (i = 0; i < size; ++i) { + item = is_tuple ? PyTuple_GetItem(ignore, i) : PyList_GetItem(ignore, i); + if (PyString_Check(item)) { + _ignore[ignore_sz].tok = PyString_AsString(item); + _ignore[ignore_sz].expr = NULL; + ignore_sz++; + } + } + } - self->ob_type->tp_free((PyObject*)self); + self->scanner = Scanner_new(_patterns, patterns_sz, _ignore, ignore_sz, input, input_sz); #ifdef DEBUG - PySys_WriteStderr("Scss BlockLocator object destroyed!\n"); + PySys_WriteStderr("Scss Scanner object initialized! (%lu)\n", sizeof(scss_Scanner)); #endif -} - -/*****/ - -scss_BlockLocator* -scss_BlockLocator_iter(scss_BlockLocator *self) -{ - Py_INCREF(self); - return self; + return 0; } -PyObject* -scss_BlockLocator_iternext(scss_BlockLocator *self) +static PyObject * +scss_Scanner_repr(scss_Scanner *self) { - scss_Callback *fn; - char c = 0; - char *codestr_end = self->codestr + self->codestr_sz; - - memset(&self->block, 0, sizeof(scss_Block)); - - while (self->codestr_ptr < codestr_end) { - c = *(self->codestr_ptr); - if (!c) { - self->codestr_ptr++; - continue; - } - - repeat: - - fn = scss_function_map[ - (int)c + - 256 * self->instr + - 256 * 256 * (int)(self->par != 0) + - 256 * 256 * 2 * (int)(self->depth > 1 ? 2 : self->depth) - ]; - - if (fn != NULL) { - fn(self); - } - - self->codestr_ptr++; - if (self->codestr_ptr > codestr_end) { - self->codestr_ptr = codestr_end; - } - - if (self->block.error) { - #ifdef DEBUG - if (self->block.error < 0) { - PySys_WriteStderr("Block found!\n"); - } else { - PySys_WriteStderr("Exception!\n"); - } - #endif - return Py_BuildValue( - "is#s#", - self->block.lineno, - self->block.selprop, - self->block.selprop_sz, - self->block.codestr, - self->block.codestr_sz - ); - } - } - if (self->par > 0) { - if (self->block.error <= 0) { - self->block.error = 1; - self->exc = "Missing closing parenthesis somewhere in block"; - #ifdef DEBUG - PySys_WriteStderr("%s\n", self->exc); - #endif + /* Print the last 10 tokens that have been scanned in */ + PyObject *repr, *tmp, *tmp2; + Token *p_token; + char *tok; + int i, start, first = 1, cur, max=0, pos; + + if (self->scanner->tokens_sz) { + start = self->scanner->tokens_sz - 10; + for (i = (start < 0) ? 0 : start; i < self->scanner->tokens_sz; i++) { + p_token = self->scanner->tokens[i]; + cur = strlen(p_token->regex->tok) * 2; + if (cur > max) max = cur; } - } else if (self->instr != 0) { - if (self->block.error <= 0) { - self->block.error = 2; - self->exc = "Missing closing string somewhere in block"; - #ifdef DEBUG - PySys_WriteStderr("%s\n", self->exc); - #endif + tok = malloc(max + 4); + repr = PyString_FromString(""); + for (i = (start < 0) ? 0 : start; i < self->scanner->tokens_sz; i++) { + p_token = self->scanner->tokens[i]; + if (!first) PyString_ConcatAndDel(&repr, PyString_FromString("\n")); + + pos = (int)(p_token->string - self->scanner->input); + if (pos < 10) PyString_ConcatAndDel(&repr, PyString_FromString(" ")); + if (pos < 100) PyString_ConcatAndDel(&repr, PyString_FromString(" ")); + if (pos < 1000) PyString_ConcatAndDel(&repr, PyString_FromString(" ")); + PyString_ConcatAndDel(&repr, PyString_FromFormat("(@%d) ", + pos)); + + tmp = PyString_FromString(p_token->regex->tok); + tmp2 = PyObject_Repr(tmp); + memset(tok, ' ', max + 4); + tok[max + 3 - PyString_Size(tmp2)] = '\0'; + PyString_ConcatAndDel(&repr, PyString_FromString(tok)); + PyString_ConcatAndDel(&repr, tmp2); + Py_XDECREF(tmp); + + PyString_ConcatAndDel(&repr, PyString_FromString(" = ")); + tmp = PyString_FromString(p_token->string); + PyString_ConcatAndDel(&repr, PyObject_Repr(tmp)); + Py_XDECREF(tmp); + + first = 0; } - } else if (self->depth > 0) { - if (self->block.error <= 0) { - self->block.error = 3; - self->exc = "Missing closing string somewhere in block"; - #ifdef DEBUG - PySys_WriteStderr("%s\n", self->exc); - #endif - } - if (self->init < codestr_end) { - c = '}'; - goto repeat; - } - } - if (self->init < codestr_end) { - self->init = codestr_end; - c = 0; - goto repeat; + free(tok); + } else { + repr = PyString_FromString("None"); } - scss_BlockLocator_rewind(self); + return (PyObject *)repr; +} - if (self->exc) { - PyErr_SetString(PyExc_Exception, self->exc); - return NULL; - } +static void +scss_Scanner_dealloc(scss_Scanner *self) +{ + Scanner_del(self->scanner); - /* Raising of standard StopIteration exception with empty value. */ - PyErr_SetNone(PyExc_StopIteration); - return NULL; -} + self->ob_type->tp_free((PyObject*)self); + #ifdef DEBUG + PySys_WriteStderr("Scss Scanner object destroyed!\n"); + #endif +} -/* Type definition */ +static PyMethodDef scss_Scanner_methods[] = { + {"scan", (PyCFunction)scss_Scanner_scan, METH_VARARGS, "Scan the next token"}, + {"reset", (PyCFunction)scss_Scanner_reset, METH_VARARGS, "Scan the next token"}, + {"token", (PyCFunction)scss_Scanner_token, METH_VARARGS, "Get the nth token"}, + {"rewind", (PyCFunction)scss_Scanner_rewind, METH_VARARGS, "Rewind scanner"}, + {NULL, NULL, 0, NULL} /* Sentinel */ +}; -static PyTypeObject scss_BlockLocatorType = { +static PyTypeObject scss_ScannerType = { PyObject_HEAD_INIT(NULL) 0, /* ob_size */ - "scss._BlockLocator", /* tp_name */ - sizeof(scss_BlockLocator), /* tp_basicsize */ + "scss.Scanner", /* tp_name */ + sizeof(scss_Scanner), /* tp_basicsize */ 0, /* tp_itemsize */ - (destructor)scss_BlockLocator_dealloc, /* tp_dealloc */ + (destructor)scss_Scanner_dealloc, /* tp_dealloc */ 0, /* tp_print */ 0, /* tp_getattr */ 0, /* tp_setattr */ 0, /* tp_compare */ - 0, /* tp_repr */ + (reprfunc)scss_Scanner_repr, /* tp_repr */ 0, /* tp_as_number */ 0, /* tp_as_sequence */ 0, /* tp_as_mapping */ @@ -593,39 +462,80 @@ static PyTypeObject scss_BlockLocatorType = { 0, /* tp_getattro */ 0, /* tp_setattro */ 0, /* tp_as_buffer */ - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_ITER, /* tp_flags */ - "Internal BlockLocator iterator object.", /* tp_doc */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ + "Scanner object.", /* tp_doc */ 0, /* tp_traverse */ 0, /* tp_clear */ 0, /* tp_richcompare */ 0, /* tp_weaklistoffset */ - (getiterfunc)scss_BlockLocator_iter, /* tp_iter: __iter__() method */ - (iternextfunc)scss_BlockLocator_iternext, /* tp_iternext: next() method */ + 0, /* tp_iter: __iter__() method */ + 0, /* tp_iternext: next() method */ + scss_Scanner_methods, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + (initproc)scss_Scanner_init, /* tp_init */ }; - /* Python constructor */ static PyObject * scss_locate_blocks(PyObject *self, PyObject *args) { - PyObject *result = NULL; + scss_BlockLocator *result = NULL; - char *codestr; - int codestr_sz; - - if (PyArg_ParseTuple(args, "s#", &codestr, &codestr_sz)) { - result = scss_BlockLocator_new(codestr, codestr_sz); + result = PyObject_New(scss_BlockLocator, &scss_BlockLocatorType); + if (result) { + scss_BlockLocator_init(result, args, NULL); } - return result; + return (PyObject *)result; } +static PyObject * +scss_setup_patterns(PyObject *self, PyObject *args) +{ + PyObject *item, *item0, *item1; + int i, is_tuple, _is_tuple; + long size; + + PyObject *patterns; + Pattern _patterns[100]; + int patterns_sz = 0; + if (!Scanner_initialized()) { + if (PyArg_ParseTuple(args, "O", &patterns)) { + is_tuple = PyTuple_Check(patterns); + if (is_tuple || PyList_Check(patterns)) { + size = is_tuple ? PyTuple_Size(patterns) : PyList_Size(patterns); + for (i = 0; i < size; ++i) { + item = is_tuple ? PyTuple_GetItem(patterns, i) : PyList_GetItem(patterns, i); + _is_tuple = PyTuple_Check(item); + if (_is_tuple || PyList_Check(item)) { + item0 = _is_tuple ? PyTuple_GetItem(item, 0) : PyList_GetItem(item, 0); + item1 = _is_tuple ? PyTuple_GetItem(item, 1) : PyList_GetItem(item, 1); + if (PyString_Check(item0) && PyString_Check(item1)) { + _patterns[patterns_sz].tok = PyString_AsString(item0); + _patterns[patterns_sz].expr = PyString_AsString(item1); + patterns_sz++; + } + } + } + } + Scanner_initialize(_patterns, patterns_sz); + } + } + return (PyObject *)Py_None; +} /* Module functions */ -static PyMethodDef scssMethods[] = { - {"locate_blocks", scss_locate_blocks, METH_VARARGS, "Locate Scss blocks."}, +static PyMethodDef scss_methods[] = { + {"locate_blocks", (PyCFunction)scss_locate_blocks, METH_VARARGS, "Locate Scss blocks."}, + {"setup_patterns", (PyCFunction)scss_setup_patterns, METH_VARARGS, "Initialize patterns."}, {NULL, NULL, 0, NULL} /* Sentinel */ }; @@ -641,10 +551,22 @@ init_scss(void) if (PyType_Ready(&scss_BlockLocatorType) < 0) return; - m = Py_InitModule("_scss", scssMethods); + scss_ScannerType.tp_new = PyType_GenericNew; + if (PyType_Ready(&scss_ScannerType) < 0) + return; - init_function_map(); + BlockLocator_initialize(); + Scanner_initialize(NULL, 0); + + m = Py_InitModule("_scss", scss_methods); Py_INCREF(&scss_BlockLocatorType); PyModule_AddObject(m, "_BlockLocator", (PyObject *)&scss_BlockLocatorType); + + Py_INCREF(&scss_ScannerType); + PyModule_AddObject(m, "Scanner", (PyObject *)&scss_ScannerType); + + PyExc_scss_NoMoreTokens = PyErr_NewExceptionWithDoc("_scss.NoMoreTokens", "Another exception object, for when we run out of tokens", NULL, NULL); + Py_INCREF(PyExc_scss_NoMoreTokens); + PyModule_AddObject(m, "NoMoreTokens", (PyObject *)PyExc_scss_NoMoreTokens); } diff --git a/scss/src/block_locator.c b/scss/src/block_locator.c new file mode 100644 index 0000000..7ea086d --- /dev/null +++ b/scss/src/block_locator.c @@ -0,0 +1,517 @@ +/* +* pyScss, a Scss compiler for Python +* SCSS blocks scanner. +* +* German M. Bravo (Kronuz) <german.mb@gmail.com> +* https://github.com/Kronuz/pyScss +* +* MIT license (http://www.opensource.org/licenses/mit-license.php) +* Copyright (c) 2011 German M. Bravo (Kronuz), All rights reserved. +*/ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "block_locator.h" + +int _strip(char *begin, char *end, int *lineno) { + // " 1\0 some, \n 2\0 aca " + int _cnt, + cnt = 0, + pass = 1, + addnl = 0; + char c, + *line = NULL, + *first = begin, + *last = begin, + *write = lineno ? begin : NULL; + while (begin < end) { + c = *begin; + if (c == '\0') { + if (line == NULL) { + line = first; + if (lineno) { + sscanf(line, "%d", lineno); + } + } + first = last = begin + 1; + pass = 1; + } else if (c == '\n') { + _cnt = (int)(last - first); + if (_cnt > 0) { + cnt += _cnt + addnl; + if (write != NULL) { + if (addnl) { + *write++ = '\n'; + } + while (first < last) { + *write++ = *first++; + } + *write = '\0'; + addnl = 1; + } + } + first = last = begin + 1; + pass = 1; + } else if (c == ' ' || c == '\t') { + if (pass) { + first = last = begin + 1; + } + } else { + last = begin + 1; + pass = 0; + } + begin++; + } + _cnt = (int)(last - first); + if (_cnt > 0) { + cnt += _cnt + addnl; + if (write != NULL) { + if (addnl) { + *write++ = '\n'; + } + while (first < last) { + *write++ = *first++; + } + *write = '\0'; + } + } + + return cnt; +} + + +/* BlockLocator */ + +typedef void _BlockLocator_Callback(BlockLocator*); + +static void +_BlockLocator_start_string(BlockLocator *self) { + #ifdef DEBUG + fprintf(stderr, "%s\n", __PRETTY_FUNCTION__); + #endif + // A string starts + self->instr = *(self->codestr_ptr); +} + +static void +_BlockLocator_end_string(BlockLocator *self) { + #ifdef DEBUG + fprintf(stderr, "%s\n", __PRETTY_FUNCTION__); + #endif + // A string ends (FIXME: needs to accept escaped characters) + self->instr = 0; +} + +static void +_BlockLocator_start_parenthesis(BlockLocator *self) { + #ifdef DEBUG + fprintf(stderr, "%s\n", __PRETTY_FUNCTION__); + #endif + // parenthesis begins: + self->par++; + self->thin = NULL; + self->safe = self->codestr_ptr + 1; +} + +static void +_BlockLocator_end_parenthesis(BlockLocator *self) { + #ifdef DEBUG + fprintf(stderr, "%s\n", __PRETTY_FUNCTION__); + #endif + self->par--; +} + +static void +_BlockLocator_flush_properties(BlockLocator *self) { + #ifdef DEBUG + fprintf(stderr, "%s\n", __PRETTY_FUNCTION__); + #endif + // Flush properties + int len, lineno = -1; + if (self->lose <= self->init) { + len = _strip(self->lose, self->init, &lineno); + if (len) { + if (lineno != -1) { + self->lineno = lineno; + } + + self->block.selprop = self->lose; + self->block.selprop_sz = len; + self->block.codestr = NULL; + self->block.codestr_sz = 0; + self->block.lineno = self->lineno; + self->block.error = -1; + } + self->lose = self->init; + } +} + +static void +_BlockLocator_start_block1(BlockLocator *self) { + #ifdef DEBUG + fprintf(stderr, "%s\n", __PRETTY_FUNCTION__); + #endif + // Start block: + if (self->codestr_ptr > self->codestr && *(self->codestr_ptr - 1) == '#') { + self->skip = 1; + } else { + self->start = self->codestr_ptr; + if (self->thin != NULL && _strip(self->thin, self->codestr_ptr, NULL)) { + self->init = self->thin; + } + _BlockLocator_flush_properties(self); + self->thin = NULL; + } + self->depth++; +} + +static void +_BlockLocator_start_block(BlockLocator *self) { + #ifdef DEBUG + fprintf(stderr, "%s\n", __PRETTY_FUNCTION__); + #endif + // Start block: + self->depth++; +} + +static void +_BlockLocator_end_block1(BlockLocator *self) { + #ifdef DEBUG + fprintf(stderr, "%s\n", __PRETTY_FUNCTION__); + #endif + // Block ends: + int len, lineno = -1; + self->depth--; + if (!self->skip) { + self->end = self->codestr_ptr; + len = _strip(self->init, self->start, &lineno); + if (lineno != -1) { + self->lineno = lineno; + } + + self->block.selprop = self->init; + self->block.selprop_sz = len; + self->block.codestr = (self->start + 1); + self->block.codestr_sz = (int)(self->end - (self->start + 1)); + self->block.lineno = self->lineno; + self->block.error = -1; + + self->init = self->safe = self->lose = self->end + 1; + self->thin = NULL; + } + self->skip = 0; +} + +static void +_BlockLocator_end_block(BlockLocator *self) { + #ifdef DEBUG + fprintf(stderr, "%s\n", __PRETTY_FUNCTION__); + #endif + // Block ends: + self->depth--; +} + +static void +_BlockLocator_end_property(BlockLocator *self) { + #ifdef DEBUG + fprintf(stderr, "%s\n", __PRETTY_FUNCTION__); + #endif + // End of property (or block): + int len, lineno = -1; + self->init = self->codestr_ptr; + if (self->lose <= self->init) { + len = _strip(self->lose, self->init, &lineno); + if (len) { + if (lineno != -1) { + self->lineno = lineno; + } + + self->block.selprop = self->lose; + self->block.selprop_sz = len; + self->block.codestr = NULL; + self->block.codestr_sz = 0; + self->block.lineno = self->lineno; + self->block.error = -1; + } + self->init = self->safe = self->lose = self->codestr_ptr + 1; + } + self->thin = NULL; +} + +static void +_BlockLocator_mark_safe(BlockLocator *self) { + #ifdef DEBUG + fprintf(stderr, "%s\n", __PRETTY_FUNCTION__); + #endif + // We are on a safe zone + if (self->thin != NULL && _strip(self->thin, self->codestr_ptr, NULL)) { + self->init = self->thin; + } + self->thin = NULL; + self->safe = self->codestr_ptr + 1; +} + +static void +_BlockLocator_mark_thin(BlockLocator *self) { + #ifdef DEBUG + fprintf(stderr, "%s\n", __PRETTY_FUNCTION__); + #endif + // Step on thin ice, if it breaks, it breaks here + if (self->thin != NULL && _strip(self->thin, self->codestr_ptr, NULL)) { + self->init = self->thin; + self->thin = self->codestr_ptr + 1; + } else if (self->thin == NULL && _strip(self->safe, self->codestr_ptr, NULL)) { + self->thin = self->codestr_ptr + 1; + } +} + +int function_map_initialized = 0; +_BlockLocator_Callback* scss_function_map[256 * 256 * 2 * 3]; // (c, instr, par, depth) + +static void +init_function_map(void) { + int i; + #ifdef DEBUG + fprintf(stderr, "%s\n", __PRETTY_FUNCTION__); + #endif + if (function_map_initialized) + return; + function_map_initialized = 1; + + for (i = 0; i < 256 * 256 * 2 * 3; i++) { + scss_function_map[i] = NULL; + } + scss_function_map[(int)'\"' + 256*0 + 256*256*0 + 256*256*2*0] = _BlockLocator_start_string; + scss_function_map[(int)'\'' + 256*0 + 256*256*0 + 256*256*2*0] = _BlockLocator_start_string; + scss_function_map[(int)'\"' + 256*0 + 256*256*1 + 256*256*2*0] = _BlockLocator_start_string; + scss_function_map[(int)'\'' + 256*0 + 256*256*1 + 256*256*2*0] = _BlockLocator_start_string; + scss_function_map[(int)'\"' + 256*0 + 256*256*0 + 256*256*2*1] = _BlockLocator_start_string; + scss_function_map[(int)'\'' + 256*0 + 256*256*0 + 256*256*2*1] = _BlockLocator_start_string; + scss_function_map[(int)'\"' + 256*0 + 256*256*1 + 256*256*2*1] = _BlockLocator_start_string; + scss_function_map[(int)'\'' + 256*0 + 256*256*1 + 256*256*2*1] = _BlockLocator_start_string; + scss_function_map[(int)'\"' + 256*0 + 256*256*0 + 256*256*2*2] = _BlockLocator_start_string; + scss_function_map[(int)'\'' + 256*0 + 256*256*0 + 256*256*2*2] = _BlockLocator_start_string; + scss_function_map[(int)'\"' + 256*0 + 256*256*1 + 256*256*2*2] = _BlockLocator_start_string; + scss_function_map[(int)'\'' + 256*0 + 256*256*1 + 256*256*2*2] = _BlockLocator_start_string; + + scss_function_map[(int)'\"' + 256*(int)'\"' + 256*256*0 + 256*256*2*0] = _BlockLocator_end_string; + scss_function_map[(int)'\'' + 256*(int)'\'' + 256*256*0 + 256*256*2*0] = _BlockLocator_end_string; + scss_function_map[(int)'\"' + 256*(int)'\"' + 256*256*1 + 256*256*2*0] = _BlockLocator_end_string; + scss_function_map[(int)'\'' + 256*(int)'\'' + 256*256*1 + 256*256*2*0] = _BlockLocator_end_string; + scss_function_map[(int)'\"' + 256*(int)'\"' + 256*256*0 + 256*256*2*1] = _BlockLocator_end_string; + scss_function_map[(int)'\'' + 256*(int)'\'' + 256*256*0 + 256*256*2*1] = _BlockLocator_end_string; + scss_function_map[(int)'\"' + 256*(int)'\"' + 256*256*1 + 256*256*2*1] = _BlockLocator_end_string; + scss_function_map[(int)'\'' + 256*(int)'\'' + 256*256*1 + 256*256*2*1] = _BlockLocator_end_string; + scss_function_map[(int)'\"' + 256*(int)'\"' + 256*256*0 + 256*256*2*2] = _BlockLocator_end_string; + scss_function_map[(int)'\'' + 256*(int)'\'' + 256*256*0 + 256*256*2*2] = _BlockLocator_end_string; + scss_function_map[(int)'\"' + 256*(int)'\"' + 256*256*1 + 256*256*2*2] = _BlockLocator_end_string; + scss_function_map[(int)'\'' + 256*(int)'\'' + 256*256*1 + 256*256*2*2] = _BlockLocator_end_string; + + scss_function_map[(int)'(' + 256*0 + 256*256*0 + 256*256*2*0] = _BlockLocator_start_parenthesis; + scss_function_map[(int)'(' + 256*0 + 256*256*1 + 256*256*2*0] = _BlockLocator_start_parenthesis; + scss_function_map[(int)'(' + 256*0 + 256*256*0 + 256*256*2*1] = _BlockLocator_start_parenthesis; + scss_function_map[(int)'(' + 256*0 + 256*256*1 + 256*256*2*1] = _BlockLocator_start_parenthesis; + scss_function_map[(int)'(' + 256*0 + 256*256*0 + 256*256*2*2] = _BlockLocator_start_parenthesis; + scss_function_map[(int)'(' + 256*0 + 256*256*1 + 256*256*2*2] = _BlockLocator_start_parenthesis; + + scss_function_map[(int)')' + 256*0 + 256*256*1 + 256*256*2*0] = _BlockLocator_end_parenthesis; + scss_function_map[(int)')' + 256*0 + 256*256*1 + 256*256*2*1] = _BlockLocator_end_parenthesis; + scss_function_map[(int)')' + 256*0 + 256*256*1 + 256*256*2*2] = _BlockLocator_end_parenthesis; + + scss_function_map[(int)'{' + 256*0 + 256*256*0 + 256*256*2*0] = _BlockLocator_start_block1; + scss_function_map[(int)'{' + 256*0 + 256*256*0 + 256*256*2*1] = _BlockLocator_start_block; + scss_function_map[(int)'{' + 256*0 + 256*256*0 + 256*256*2*2] = _BlockLocator_start_block; + + scss_function_map[(int)'}' + 256*0 + 256*256*0 + 256*256*2*1] = _BlockLocator_end_block1; + scss_function_map[(int)'}' + 256*0 + 256*256*0 + 256*256*2*2] = _BlockLocator_end_block; + + scss_function_map[(int)';' + 256*0 + 256*256*0 + 256*256*2*0] = _BlockLocator_end_property; + + scss_function_map[(int)',' + 256*0 + 256*256*0 + 256*256*2*0] = _BlockLocator_mark_safe; + + scss_function_map[(int)'\n' + 256*0 + 256*256*0 + 256*256*2*0] = _BlockLocator_mark_thin; + + scss_function_map[0 + 256*0 + 256*256*0 + 256*256*2*0] = _BlockLocator_flush_properties; + scss_function_map[0 + 256*0 + 256*256*0 + 256*256*2*1] = _BlockLocator_flush_properties; + scss_function_map[0 + 256*0 + 256*256*0 + 256*256*2*2] = _BlockLocator_flush_properties; + #ifdef DEBUG + fprintf(stderr, "Scss function maps initialized!\n"); + #endif +} + + +/* BlockLocator public interface */ + +void +BlockLocator_initialize(void) +{ + #ifdef DEBUG + fprintf(stderr, "%s\n", __PRETTY_FUNCTION__); + #endif + init_function_map(); +} + +void +BlockLocator_finalize(void) +{ + #ifdef DEBUG + fprintf(stderr, "%s\n", __PRETTY_FUNCTION__); + #endif +} + +BlockLocator * +BlockLocator_new(char *codestr, int codestr_sz) +{ + BlockLocator *self = (BlockLocator *)malloc(sizeof(BlockLocator)); + #ifdef DEBUG + fprintf(stderr, "%s\n", __PRETTY_FUNCTION__); + #endif + if (self) { + self->_codestr = (char *)malloc(codestr_sz); + memcpy(self->_codestr, codestr, codestr_sz); + self->codestr_sz = codestr_sz; + self->codestr = (char *)malloc(self->codestr_sz); + memcpy(self->codestr, self->_codestr, self->codestr_sz); + self->codestr_ptr = self->codestr; + self->lineno = 0; + self->par = 0; + self->instr = 0; + self->depth = 0; + self->skip = 0; + self->thin = self->codestr; + self->init = self->codestr; + self->safe = self->codestr; + self->lose = self->codestr; + self->start = NULL; + self->end = NULL; + self->exc = NULL; + } + return self; +} + +void +BlockLocator_del(BlockLocator *self) +{ + #ifdef DEBUG + fprintf(stderr, "%s\n", __PRETTY_FUNCTION__); + #endif + free(self->codestr); + free(self->_codestr); + free(self); +} + +void +BlockLocator_rewind(BlockLocator *self) +{ + #ifdef DEBUG + fprintf(stderr, "%s\n", __PRETTY_FUNCTION__); + #endif + free(self->codestr); + self->codestr = (char *)malloc(self->codestr_sz); + memcpy(self->codestr, self->_codestr, self->codestr_sz); + self->codestr_ptr = self->codestr; + self->lineno = 0; + self->par = 0; + self->instr = 0; + self->depth = 0; + self->skip = 0; + self->thin = self->codestr; + self->init = self->codestr; + self->safe = self->codestr; + self->lose = self->codestr; + self->start = NULL; + self->end = NULL; + self->exc = NULL; + + #ifdef DEBUG + fprintf(stderr, "Scss BlockLocator object rewound!\n"); + #endif +} + +Block* +BlockLocator_iternext(BlockLocator *self) +{ + #ifdef DEBUG + fprintf(stderr, "%s\n", __PRETTY_FUNCTION__); + #endif + _BlockLocator_Callback *fn; + char c = 0; + char *codestr_end = self->codestr + self->codestr_sz; + + memset(&self->block, 0, sizeof(Block)); + + while (self->codestr_ptr < codestr_end) { + c = *(self->codestr_ptr); + if (!c) { + self->codestr_ptr++; + continue; + } + + repeat: + + fn = scss_function_map[ + (int)c + + 256 * self->instr + + 256 * 256 * (int)(self->par != 0) + + 256 * 256 * 2 * (int)(self->depth > 1 ? 2 : self->depth) + ]; + + if (fn != NULL) { + fn(self); + } + + self->codestr_ptr++; + if (self->codestr_ptr > codestr_end) { + self->codestr_ptr = codestr_end; + } + + if (self->block.error) { + #ifdef DEBUG + if (self->block.error < 0) { + fprintf(stderr, "Block found!\n"); + } else { + fprintf(stderr, "Exception!\n"); + } + #endif + return &self->block; + } + } + if (self->par > 0) { + if (self->block.error <= 0) { + self->block.error = 1; + self->exc = "Missing closing parenthesis somewhere in block"; + #ifdef DEBUG + fprintf(stderr, "%s\n", self->exc); + #endif + } + } else if (self->instr != 0) { + if (self->block.error <= 0) { + self->block.error = 2; + self->exc = "Missing closing string somewhere in block"; + #ifdef DEBUG + fprintf(stderr, "%s\n", self->exc); + #endif + } + } else if (self->depth > 0) { + if (self->block.error <= 0) { + self->block.error = 3; + self->exc = "Missing closing string somewhere in block"; + #ifdef DEBUG + fprintf(stderr, "%s\n", self->exc); + #endif + } + if (self->init < codestr_end) { + c = '}'; + goto repeat; + } + } + if (self->init < codestr_end) { + self->init = codestr_end; + c = 0; + goto repeat; + } + + BlockLocator_rewind(self); + + return &self->block; +} diff --git a/scss/src/block_locator.h b/scss/src/block_locator.h new file mode 100644 index 0000000..450a269 --- /dev/null +++ b/scss/src/block_locator.h @@ -0,0 +1,50 @@ +/* +* pyScss, a Scss compiler for Python +* SCSS blocks scanner. +* +* German M. Bravo (Kronuz) <german.mb@gmail.com> +* https://github.com/Kronuz/pyScss +* +* MIT license (http://www.opensource.org/licenses/mit-license.php) +* Copyright (c) 2011 German M. Bravo (Kronuz), All rights reserved. +*/ +#ifndef BLOCK_LOCATOR_H +#define BLOCK_LOCATOR_H + +typedef struct { + int error; + int lineno; + char *selprop; + int selprop_sz; + char *codestr; + int codestr_sz; +} Block; + +typedef struct { + char *exc; + char *_codestr; + char *codestr; + char *codestr_ptr; + int codestr_sz; + int lineno; + int par; + char instr; + int depth; + int skip; + char *thin; + char *init; + char *safe; + char *lose; + char *start; + char *end; + Block block; +} BlockLocator; + +void BlockLocator_initialize(void); +void BlockLocator_finalize(void); + +Block* BlockLocator_iternext(BlockLocator *self); +BlockLocator *BlockLocator_new(char *codestr, int codestr_sz); +void BlockLocator_del(BlockLocator *self); + +#endif diff --git a/scss/src/block_locator.py b/scss/src/block_locator.py new file mode 100755 index 0000000..e5a382b --- /dev/null +++ b/scss/src/block_locator.py @@ -0,0 +1,559 @@ +#!/usr/bin/env python + +## locate_blocks() needs heavy optimizations... is way too slow right now! +## Any suggestion from python wizards? :-) + +import re +import sys +from datetime import datetime + +import pstats +import cProfile +from cStringIO import StringIO +def profile(fn): + def wrapper(*args, **kwargs): + profiler = cProfile.Profile() + stream = StringIO() + profiler.enable() + try: + res = fn(*args, **kwargs) + finally: + profiler.disable() + stats = pstats.Stats(profiler, stream=stream) + stats.sort_stats('time') + print >> stream, "" + print >> stream, "=" * 100 + print >> stream, "Stats:" + stats.print_stats() + + print >> stream, "=" * 100 + print >> stream, "Callers:" + stats.print_callers() + + print >> stream, "=" * 100 + print >> stream, "Callees:" + stats.print_callees() + print stream.getvalue() + stream.close() + return res + return wrapper + +DEBUG = False +################################################################################ +# Helper functions + + +SEPARATOR = '\x00' +_nl_re = re.compile(r'\s*\n\s*', re.MULTILINE) +_nl_num_re = re.compile(r'\n.+' + SEPARATOR, re.MULTILINE) +_nl_num_nl_re = re.compile(r'\n.+' + SEPARATOR + r'\s*(?=\n)', re.MULTILINE) +_blocks_re = re.compile(r'[{},;()\'"\n]') + + +def load_string(codestr): + """ + Add line numbers to the string using SEPARATOR as the separation between + the line number and the line. + """ + idx = {'line': 1} + + # Add line numbers: + def _cnt(m): + idx['line'] += 1 + return '\n' + str(idx['line']) + SEPARATOR + codestr = str(idx['line']) + SEPARATOR + _nl_re.sub(_cnt, codestr + '\n') + + # remove empty lines + codestr = _nl_num_nl_re.sub('', codestr) + return codestr + + +def _strip_selprop(selprop, lineno): + # Get the line number of the selector or property and strip all other + # line numbers that might still be there (from multiline selectors) + _lineno, _sep, selprop = selprop.partition(SEPARATOR) + if _sep == SEPARATOR: + _lineno = _lineno.strip() + lineno = int(_lineno) if _lineno else 0 + else: + selprop = _lineno + selprop = _nl_num_re.sub('\n', selprop) + selprop = selprop.strip() + return selprop, lineno + + +def _strip(selprop): + # Strip all line numbers, ignoring them in the way + selprop, _ = _strip_selprop(selprop, None) + return selprop + + +################################################################################ +# Algorithm implemented in C (much slower here): + +PAR = 0 +INSTR = 1 +DEPTH = 2 +SKIP = 3 +THIN = 4 +INIT = 5 +SAFE = 6 +LOSE = 7 +START = 8 +END = 9 +LINENO = 10 +SELPROP = 11 + + +def _start_string(codestr, ctx, i, c): + if DEBUG: print "_start_string" + # A string starts + ctx[INSTR] = c + return + yield + + +def _end_string(codestr, ctx, i, c): + if DEBUG: print "_end_string" + # A string ends (FIXME: needs to accept escaped characters) + ctx[INSTR] = None + return + yield + + +def _start_parenthesis(codestr, ctx, i, c): + if DEBUG: print "_start_parenthesis" + # parenthesis begins: + ctx[PAR] += 1 + ctx[THIN] = None + ctx[SAFE] = i + 1 + return + yield + + +def _end_parenthesis(codestr, ctx, i, c): + if DEBUG: print "_end_parenthesis" + ctx[PAR] -= 1 + return + yield + + +def _flush_properties(codestr, ctx, i, c): + if DEBUG: print "_flush_properties" + # Flush properties + if ctx[LOSE] <= ctx[INIT]: + _property, ctx[LINENO] = _strip_selprop(codestr[ctx[LOSE]:ctx[INIT]], ctx[LINENO]) + if _property: + yield ctx[LINENO], _property, None + ctx[SELPROP] = _property + ctx[LOSE] = ctx[INIT] + return + yield + + +def _start_block1(codestr, ctx, i, c): + if DEBUG: print "_start_block1" + # Start level-1 block + if i > 0 and codestr[i - 1] == '#': # Do not process #{...} as blocks! + ctx[SKIP] = True + else: + ctx[START] = i + if ctx[THIN] is not None and _strip(codestr[ctx[THIN]:i]): + ctx[INIT] = ctx[THIN] + for y in _flush_properties(codestr, ctx, i, c): + yield y + ctx[THIN] = None + ctx[DEPTH] += 1 + return + yield + + +def _start_block(codestr, ctx, i, c): + if DEBUG: print "_start_block" + # Start blocks: + ctx[DEPTH] += 1 + return + yield + + +def _end_block1(codestr, ctx, i, c): + if DEBUG: print "_end_block1" + # End level-1 block: + ctx[DEPTH] -= 1 + if not ctx[SKIP]: + ctx[END] = i + _selectors, ctx[LINENO] = _strip_selprop(codestr[ctx[INIT]:ctx[START]], ctx[LINENO]) + _codestr = codestr[ctx[START] + 1:ctx[END]] + if _selectors: + yield ctx[LINENO], _selectors, _codestr + ctx[SELPROP] = _selectors + ctx[INIT] = ctx[SAFE] = ctx[LOSE] = ctx[END] + 1 + ctx[THIN] = None + ctx[SKIP] = False + return + yield + + +def _end_block(codestr, ctx, i, c): + if DEBUG: print "_end_block" + # Block ends: + ctx[DEPTH] -= 1 + return + yield + + +def _end_property(codestr, ctx, i, c): + if DEBUG: print "_end_property" + # End of property (or block): + ctx[INIT] = i + if ctx[LOSE] <= ctx[INIT]: + _property, ctx[LINENO] = _strip_selprop(codestr[ctx[LOSE]:ctx[INIT]], ctx[LINENO]) + if _property: + yield ctx[LINENO], _property, None + ctx[SELPROP] = _property + ctx[INIT] = ctx[SAFE] = ctx[LOSE] = i + 1 + ctx[THIN] = None + return + yield + + +def _mark_safe(codestr, ctx, i, c): + if DEBUG: print "_mark_safe" + # We are on a safe zone + if ctx[THIN] is not None and _strip(codestr[ctx[THIN]:i]): + ctx[INIT] = ctx[THIN] + ctx[THIN] = None + ctx[SAFE] = i + 1 + return + yield + + +def _mark_thin(codestr, ctx, i, c): + if DEBUG: print "_mark_thin" + # Step on thin ice, if it breaks, it breaks here + if ctx[THIN] is not None and _strip(codestr[ctx[THIN]:i]): + ctx[INIT] = ctx[THIN] + ctx[THIN] = i + 1 + elif ctx[THIN] is None and _strip(codestr[ctx[SAFE]:i]): + ctx[THIN] = i + 1 + return + yield + + +scss_function_map = { + # (c, instr, par, depth) + ('"', None, False, 0): _start_string, + ("'", None, False, 0): _start_string, + ('"', None, True, 0): _start_string, + ("'", None, True, 0): _start_string, + ('"', None, False, 1): _start_string, + ("'", None, False, 1): _start_string, + ('"', None, True, 1): _start_string, + ("'", None, True, 1): _start_string, + ('"', None, False, 2): _start_string, + ("'", None, False, 2): _start_string, + ('"', None, True, 2): _start_string, + ("'", None, True, 2): _start_string, + + ('"', '"', False, 0): _end_string, + ("'", "'", False, 0): _end_string, + ('"', '"', True, 0): _end_string, + ("'", "'", True, 0): _end_string, + ('"', '"', False, 1): _end_string, + ("'", "'", False, 1): _end_string, + ('"', '"', True, 1): _end_string, + ("'", "'", True, 1): _end_string, + ('"', '"', False, 2): _end_string, + ("'", "'", False, 2): _end_string, + ('"', '"', True, 2): _end_string, + ("'", "'", True, 2): _end_string, + + ("(", None, False, 0): _start_parenthesis, + ("(", None, True, 0): _start_parenthesis, + ("(", None, False, 1): _start_parenthesis, + ("(", None, True, 1): _start_parenthesis, + ("(", None, False, 2): _start_parenthesis, + ("(", None, True, 2): _start_parenthesis, + + (")", None, True, 0): _end_parenthesis, + (")", None, True, 1): _end_parenthesis, + (")", None, True, 2): _end_parenthesis, + + ("{", None, False, 0): _start_block1, + ("{", None, False, 1): _start_block, + ("{", None, False, 2): _start_block, + + ("}", None, False, 1): _end_block1, + ("}", None, False, 2): _end_block, + + (";", None, False, 0): _end_property, + + (",", None, False, 0): _mark_safe, + + ("\n", None, False, 0): _mark_thin, + + (None, None, False, 0): _flush_properties, + (None, None, False, 1): _flush_properties, + (None, None, False, 2): _flush_properties, +} + + +def _locate_blocks_a(codestr): + """ + For processing CSS like strings. + + Either returns all selectors (that can be "smart" multi-lined, as + long as it's joined by `,`, or enclosed in `(` and `)`) with its code block + (the one between `{` and `}`, which can be nested), or the "lose" code + (properties) that doesn't have any blocks. + + threshold is the number of blank lines before selectors are broken into + pieces (properties). + """ + ctx = [0, None, 0, False, None, 0, 0, 0, None, None, 0, '??'] + + for m in _blocks_re.finditer(codestr): + c = m.group() + + fn = scss_function_map.get((c, ctx[INSTR], ctx[PAR] != 0, 2 if ctx[DEPTH] > 1 else ctx[DEPTH])) + if DEBUG: print fn and ' > ' or ' ', fn and fn.__name__, (c, ctx[INSTR], ctx[PAR] != 0, 2 if ctx[DEPTH] > 1 else ctx[DEPTH]) + if fn: + for y in fn(codestr, ctx, m.start(), c): + yield y + + codestr_end = len(codestr) + exc = None + if ctx[PAR]: + exc = exc or "Missing closing parenthesis somewhere in block: '%s'" % ctx[SELPROP] + elif ctx[INSTR]: + exc = exc or "Missing closing string somewhere in block: '%s'" % ctx[SELPROP] + elif ctx[DEPTH]: + exc = exc or "Block never closed: '%s'" % ctx[SELPROP] + while ctx[DEPTH] > 0 and ctx[INIT] < codestr_end: + c = '}' + fn = scss_function_map.get((c, ctx[INSTR], ctx[PAR] != 0, 2 if ctx[DEPTH] > 1 else ctx[DEPTH])) + if DEBUG: print fn and ' > ' or ' ! ', fn and fn.__name__, (c, ctx[INSTR], ctx[PAR] != 0, 2 if ctx[DEPTH] > 1 else ctx[DEPTH]) + if fn: + for y in fn(codestr, ctx, m.start(), c): + yield y + + if ctx[INIT] < codestr_end: + ctx[INIT] = codestr_end + c = None + fn = scss_function_map.get((c, ctx[INSTR], ctx[PAR] != 0, 2 if ctx[DEPTH] > 1 else ctx[DEPTH])) + if DEBUG: print fn and ' > ' or ' ! ', fn and fn.__name__, (c, ctx[INSTR], ctx[PAR] != 0, 2 if ctx[DEPTH] > 1 else ctx[DEPTH]) + if fn: + for y in fn(codestr, ctx, m.start(), c): + yield y + + if exc: + raise Exception(exc) + + +################################################################################ +# Algorithm using Regexps in pure Python (fastest pure python): + + +def _locate_blocks_b(codestr): + """ + For processing CSS like strings. + + Either returns all selectors (that can be "smart" multi-lined, as + long as it's joined by `,`, or enclosed in `(` and `)`) with its code block + (the one between `{` and `}`, which can be nested), or the "lose" code + (properties) that doesn't have any blocks. + + threshold is the number of blank lines before selectors are broken into + pieces (properties). + """ + lineno = 0 + + par = 0 + instr = None + depth = 0 + skip = False + thin = None + i = init = safe = lose = 0 + start = end = None + + for m in _blocks_re.finditer(codestr): + i = m.start(0) + c = codestr[i] + if instr is not None: + if c == instr: + instr = None # A string ends (FIXME: needs to accept escaped characters) + elif c in ('"', "'"): + instr = c # A string starts + elif c == '(': # parenthesis begins: + par += 1 + thin = None + safe = i + 1 + elif c == ')': # parenthesis ends: + par -= 1 + elif not par and not instr: + if c == '{': # block begins: + if depth == 0: + if i > 0 and codestr[i - 1] == '#': # Do not process #{...} as blocks! + skip = True + else: + start = i + if thin is not None and _strip(codestr[thin:i]): + init = thin + if lose <= init: + _property, lineno = _strip_selprop(codestr[lose:init], lineno) + if _property: + yield lineno, _property, None + lose = init + thin = None + depth += 1 + elif c == '}': # block ends: + if depth > 0: + depth -= 1 + if depth == 0: + if not skip: + end = i + _selectors, lineno = _strip_selprop(codestr[init:start], lineno) + _codestr = codestr[start + 1:end].strip() + if _selectors: + yield lineno, _selectors, _codestr + init = safe = lose = end + 1 + thin = None + skip = False + elif depth == 0: + if c == ';': # End of property (or block): + init = i + if lose <= init: + _property, lineno = _strip_selprop(codestr[lose:init], lineno) + if _property: + yield lineno, _property, None + init = safe = lose = i + 1 + thin = None + elif c == ',': + if thin is not None and _strip(codestr[thin:i]): + init = thin + thin = None + safe = i + 1 + elif c == '\n': + if thin is not None and _strip(codestr[thin:i]): + init = thin + thin = i + 1 + elif thin is None and _strip(codestr[safe:i]): + thin = i + 1 # Step on thin ice, if it breaks, it breaks here + if depth > 0: + if not skip: + _selectors, lineno = _strip_selprop(codestr[init:start], lineno) + _codestr = codestr[start + 1:].strip() + if _selectors: + yield lineno, _selectors, _codestr + if par: + raise Exception("Missing closing parenthesis somewhere in block: '%s'" % _selectors) + elif instr: + raise Exception("Missing closing string somewhere in block: '%s'" % _selectors) + else: + raise Exception("Block never closed: '%s'" % _selectors) + losestr = codestr[lose:] + for _property in losestr.split(';'): + _property, lineno = _strip_selprop(_property, lineno) + if _property: + yield lineno, _property, None + + +################################################################################ +# Algorithm implemented in C: + + +try: + from _scss import locate_blocks as _locate_blocks_c +except ImportError: + _locate_blocks_c = None + print >>sys.stderr, "Scanning acceleration disabled (_scss not found)!" + + +################################################################################ +# Algorithm implemented in C with CTypes: + + +try: + from _scss_c import locate_blocks as _locate_blocks_d +except ImportError: + _locate_blocks_d = None + print >>sys.stderr, "Scanning CTypes acceleration disabled (_scss_c not found)!" + + +################################################################################ + + +codestr = """ +simple { + block; +} +#{ignored}; +some, +selectors, +and multi-lined, +selectors +with more +{ + the block in here; + can have, nested, selectors { + and properties in nested blocks; + and stuff with #{ ignored blocks }; + } + properties-can: "have strings with stuff like this: }"; +} +and other, +selectors +can be turned into "lose" +properties +if no commas are found +however this is a selector ( + as well as these things, + which are parameters + and can expand + any number of + lines) { + and this is its block;; +} +""" +verify = '\t----------------------------------------------------------------------\n\t>[1] \'simple\'\n\t----------------------------------------------------------------------\n\t>\t[3] \'block\'\n\t----------------------------------------------------------------------\n\t>[5] \'#{ignored}\'\n\t----------------------------------------------------------------------\n\t>[6] \'some,\\nselectors,\\nand multi-lined,\\nselectors\'\n\t----------------------------------------------------------------------\n\t>[10] \'with more\'\n\t----------------------------------------------------------------------\n\t>\t[12] \'the block in here\'\n\t----------------------------------------------------------------------\n\t>\t[13] \'can have, nested, selectors\'\n\t----------------------------------------------------------------------\n\t>\t\t[14] \'and properties in nested blocks\'\n\t----------------------------------------------------------------------\n\t>\t\t[15] \'and stuff with #{ ignored blocks }\'\n\t----------------------------------------------------------------------\n\t>\t[17] \'properties-can: "have strings with stuff like this: }"\'\n\t----------------------------------------------------------------------\n\t>[19] \'and other,\\nselectors\\ncan be turned into "lose"\\nproperties\'\n\t----------------------------------------------------------------------\n\t>[23] \'if no commas are found\\nhowever this is a selector (\\nas well as these things,\\nwhich are parameters\\nand can expand\\nany number of\\nlines)\'\n\t----------------------------------------------------------------------\n\t>\t[30] \'and this is its block\'\n' + + +def process_block(locate_blocks, codestr, level=0, dump=False): + ret = '' if dump else None + for lineno, selprop, block in locate_blocks(codestr): + if dump: + ret += '\t%s\n\t>%s[%s] %s\n' % ('-' * 70, '\t' * level, lineno, repr(selprop)) + if block: + _ret = process_block(locate_blocks, block, level + 1, dump) + if dump: + ret += _ret + return ret + + +def process_blocks(locate_blocks, codestr): + for q in xrange(10000): + process_block(locate_blocks, codestr) +profiled_process_blocks = profile(process_blocks) + +if __name__ == "__main__": + codestr = load_string(codestr) + + for locate_blocks, desc in ( + (_locate_blocks_a, "Pure Python, Full algorithm (_locate_blocks_a)"), + (_locate_blocks_b, "Pure Python, Condensed algorithm (_locate_blocks_b)"), + (_locate_blocks_c, "Builtin C Function, Full algorithm (_locate_blocks_c)"), + (_locate_blocks_d, "CTypes C Function, Full algorithm (_locate_blocks_d)")): + if locate_blocks: + ret = process_block(locate_blocks, codestr, dump=True) + print "This is what %s returned:" % desc + print ret + # print repr(ret) + assert ret == verify, 'It should be:\n%s' % verify + + # start = datetime.now() + # print >>sys.stderr, "Timing: %s..." % desc, + # process_blocks(locate_blocks, codestr) + # elap = datetime.now() - start + + # elapms = elap.seconds * 1000.0 + elap.microseconds / 1000.0 + # print >>sys.stderr, "Done! took %06.3fms" % elapms diff --git a/scss/src/build.sh b/scss/src/build.sh new file mode 100755 index 0000000..76beb21 --- /dev/null +++ b/scss/src/build.sh @@ -0,0 +1,2 @@ +#!/bin/sh +cd .. && python setup.py build && cp build/lib.macosx-10.7-intel-2.7/_scss.so src/ && cd - diff --git a/scss/src/grammar/grammar.g b/scss/src/grammar/grammar.g index 21fcd55..913a632 100644 --- a/scss/src/grammar/grammar.g +++ b/scss/src/grammar/grammar.g @@ -3,14 +3,23 @@ _units = ['em', 'ex', 'px', 'cm', 'mm', 'in', 'pt', 'pc', 'deg', 'rad' 'grad', 'ms', 's', 'hz', 'khz', '%'] +_inv = lambda s: s ParserValue = lambda s: s NumberValue = lambda s: float(s) StringValue = lambda s: s QuotedStringValue = lambda s: s BooleanValue = lambda s: bool(s) ColorValue = lambda s: s -ListValue = lambda s: s -_inv = lambda s: s +class ListValue(): + def __init__(self, v): + if isinstance(v, self.__class__): + self.v = v + else: + self.v = {0: v} + def first(self): + return self.v[0] + def __len__(self): + return len(self.v) def _reorder_list(lst): diff --git a/scss/src/grammar/grammar.py b/scss/src/grammar/grammar.py index 128de1a..607300e 100644 --- a/scss/src/grammar/grammar.py +++ b/scss/src/grammar/grammar.py @@ -3,14 +3,23 @@ _units = ['em', 'ex', 'px', 'cm', 'mm', 'in', 'pt', 'pc', 'deg', 'rad' 'grad', 'ms', 's', 'hz', 'khz', '%'] +_inv = lambda s: s ParserValue = lambda s: s NumberValue = lambda s: float(s) StringValue = lambda s: s QuotedStringValue = lambda s: s BooleanValue = lambda s: bool(s) ColorValue = lambda s: s -ListValue = lambda s: s -_inv = lambda s: s +class ListValue(): + def __init__(self, v): + if isinstance(v, self.__class__): + self.v = v + else: + self.v = {0: v} + def first(self): + return self.v[0] + def __len__(self): + return len(self.v) def _reorder_list(lst): @@ -35,40 +44,45 @@ from yappsrt import * class CalculatorScanner(Scanner): - patterns = [ - ('":"', re.compile(':')), - ('[ \r\t\n]+', re.compile('[ \r\t\n]+')), - ('COMMA', re.compile(',')), - ('LPAR', re.compile('\\(|\\[')), - ('RPAR', re.compile('\\)|\\]')), - ('END', re.compile('$')), - ('MUL', re.compile('[*]')), - ('DIV', re.compile('/')), - ('ADD', re.compile('[+]')), - ('SUB', re.compile('-\\s')), - ('SIGN', re.compile('-(?![a-zA-Z_])')), - ('AND', re.compile('(?<![-\\w])and(?![-\\w])')), - ('OR', re.compile('(?<![-\\w])or(?![-\\w])')), - ('NOT', re.compile('(?<![-\\w])not(?![-\\w])')), - ('NE', re.compile('!=')), - ('INV', re.compile('!')), - ('EQ', re.compile('==')), - ('LE', re.compile('<=')), - ('GE', re.compile('>=')), - ('LT', re.compile('<')), - ('GT', re.compile('>')), - ('STR', re.compile("'[^']*'")), - ('QSTR', re.compile('"[^"]*"')), - ('UNITS', re.compile('(?<!\\s)(?:px|cm|mm|hz|%)(?![-\\w])')), - ('NUM', re.compile('(?:\\d+(?:\\.\\d*)?|\\.\\d+)')), - ('BOOL', re.compile('(?<![-\\w])(?:true|false)(?![-\\w])')), - ('COLOR', re.compile('#(?:[a-fA-F0-9]{6}|[a-fA-F0-9]{3})(?![a-fA-F0-9])')), - ('VAR', re.compile('\\$[-a-zA-Z0-9_]+')), - ('FNCT', re.compile('[-a-zA-Z_][-a-zA-Z0-9_]*(?=\\()')), - ('ID', re.compile('[-a-zA-Z_][-a-zA-Z0-9_]*')), + patterns = None + _patterns = [ + ('":"', ':'), + ('[ \r\t\n]+', '[ \r\t\n]+'), + ('COMMA', ','), + ('LPAR', '\\(|\\['), + ('RPAR', '\\)|\\]'), + ('END', '$'), + ('MUL', '[*]'), + ('DIV', '/'), + ('ADD', '[+]'), + ('SUB', '-\\s'), + ('SIGN', '-(?![a-zA-Z_])'), + ('AND', '(?<![-\\w])and(?![-\\w])'), + ('OR', '(?<![-\\w])or(?![-\\w])'), + ('NOT', '(?<![-\\w])not(?![-\\w])'), + ('NE', '!='), + ('INV', '!'), + ('EQ', '=='), + ('LE', '<='), + ('GE', '>='), + ('LT', '<'), + ('GT', '>'), + ('STR', "'[^']*'"), + ('QSTR', '"[^"]*"'), + ('UNITS', '(?<!\\s)(?:px|cm|mm|hz|%)(?![-\\w])'), + ('NUM', '(?:\\d+(?:\\.\\d*)?|\\.\\d+)'), + ('BOOL', '(?<![-\\w])(?:true|false)(?![-\\w])'), + ('COLOR', '#(?:[a-fA-F0-9]{6}|[a-fA-F0-9]{3})(?![a-fA-F0-9])'), + ('VAR', '\\$[-a-zA-Z0-9_]+'), + ('FNCT', '[-a-zA-Z_][-a-zA-Z0-9_]*(?=\\()'), + ('ID', '[-a-zA-Z_][-a-zA-Z0-9_]*'), ] def __init__(self): + if self.patterns is None: + self.patterns = [] + for k, p in self._patterns: + self.patterns.append((k, re.compile(p))) Scanner.__init__(self, None, ['[ \r\t\n]+']) diff --git a/scss/src/grammar/yapps2.py b/scss/src/grammar/yapps2.py index a0ad8ca..dcbe70d 100644 --- a/scss/src/grammar/yapps2.py +++ b/scss/src/grammar/yapps2.py @@ -226,12 +226,17 @@ class Generator: self.write("from yappsrt import *\n") self.write("\n\n") self.write("class ", self.name, "Scanner(Scanner):\n") - self.write(" patterns = [\n") + self.write(" patterns = None\n") + self.write(" _patterns = [\n") for p in self.terminals: - self.write(" (%s, re.compile(%s)),\n" % ( + self.write(" (%s, %s),\n" % ( repr(p), repr(self.tokens[p]))) self.write(" ]\n\n") self.write(" def __init__(self):\n") + self.write(" if self.patterns is None:\n") + self.write(" self.patterns = []\n") + self.write(" for k, p in self._patterns:\n") + self.write(" self.patterns.append((k, re.compile(p)))\n") self.write(" Scanner.__init__(self, None, %s)\n" % repr(self.ignore)) self.write("\n\n") diff --git a/scss/src/grammar/yappsrt.py b/scss/src/grammar/yappsrt.py index 1b2c2ec..81f8ab5 100644 --- a/scss/src/grammar/yappsrt.py +++ b/scss/src/grammar/yappsrt.py @@ -9,22 +9,6 @@ import re # Parser -class SyntaxError(Exception): - """ - When we run into an unexpected token, this is the exception to use - """ - def __init__(self, pos=-1, msg="Bad Token"): - Exception.__init__(self) - self.pos = pos - self.msg = msg - - def __repr__(self): - if self.pos < 0: - return "#<syntax-error>" - else: - return "SyntaxError[@ char %s: %s]" % (repr(self.pos), self.msg) - - class NoMoreTokens(Exception): """ Another exception object, for when we run out of tokens @@ -108,9 +92,9 @@ class Scanner(object): except KeyError: token = None while True: + best_pat = None # Search the patterns for a match, with earlier # tokens in the list having preference - best_pat = None best_pat_len = 0 for p, regexp in self.patterns: # First check to see if we're restricting to this token @@ -128,11 +112,11 @@ class Scanner(object): msg = "Bad Token" if restrict: msg = "Trying to find one of " + ", ".join(restrict) - raise SyntaxError(self.pos, msg) + raise SyntaxError("SyntaxError[@ char %s: %s]" % (repr(self.pos), msg)) # If we found something that isn't to be ignored, return it if best_pat in self.ignore: - # This token should be ignored .. + # This token should be ignored... self.pos += best_pat_len else: end_pos = self.pos + best_pat_len @@ -145,7 +129,6 @@ class Scanner(object): ) break self.scanned[_k_] = token - if token is not None: self.pos = token[1] # Only add this token if it's not in the list @@ -180,7 +163,7 @@ class Parser(object): """ tok = self._scanner.token(self._pos, set([type])) if tok[2] != type: - raise SyntaxError(tok[0], "Trying to find " + type) + raise SyntaxError("SyntaxError[@ char %s: %s]" % (repr(tok[0]), "Trying to find " + type)) self._pos += 1 return tok[3] diff --git a/scss/src/scanner.c b/scss/src/scanner.c new file mode 100644 index 0000000..d9d6182 --- /dev/null +++ b/scss/src/scanner.c @@ -0,0 +1,406 @@ +/* +* pyScss, a Scss compiler for Python +* SCSS blocks scanner. +* +* German M. Bravo (Kronuz) <german.mb@gmail.com> +* https://github.com/Kronuz/pyScss +* +* MIT license (http://www.opensource.org/licenses/mit-license.php) +* Copyright (c) 2011 German M. Bravo (Kronuz), All rights reserved. +*/ +#include <stdio.h> +#include <string.h> +#include "scanner.h" + +Pattern *Pattern_patterns[MAX_PATTERNS]; +int Pattern_patterns_initialized = 0; + +Pattern* +Pattern_regex(char *tok, char *expr) { + Pattern *ret = NULL; + int j; + #ifdef DEBUG + fprintf(stderr, "%s\n", __PRETTY_FUNCTION__); + #endif + for (j = 0; j < MAX_PATTERNS; j++) { + if(Pattern_patterns[j] == NULL) { + if (expr) { + Pattern_patterns[j] = (Pattern *)calloc(1, sizeof(Pattern)); + Pattern_patterns[j]->tok = strdup(tok); + Pattern_patterns[j]->expr = strdup(expr); + ret = Pattern_patterns[j]; + } + break; + } else { + if (strcmp(Pattern_patterns[j]->tok, tok) == 0) { + ret = Pattern_patterns[j]; + break; + } + } + } + return ret; +} + +static int +Pattern_match(Pattern *regex, char *string, int string_sz, int start_at, Token *p_token) { + int options = PCRE_ANCHORED; + const char *errptr; + int ret, erroffset, ovector[3]; + pcre *p_pattern = regex->pattern; + #ifdef DEBUG + fprintf(stderr, "%s\n", __PRETTY_FUNCTION__); + #endif + if (p_pattern == NULL) { + p_pattern = regex->pattern = pcre_compile(regex->expr, options, &errptr, &erroffset, NULL); + } + ret = pcre_exec( + p_pattern, + NULL, /* no extra data */ + string, + string_sz, + start_at, + PCRE_ANCHORED, /* default options */ + ovector, /* output vector for substring information */ + 3 /* number of elements in the output vector */ + ); + if (ret >= 0) { + if (p_token) { + p_token->regex = regex; + p_token->string = string + ovector[0]; + p_token->string_sz = ovector[1] - ovector[0]; + } + return 1; + } + return 0; +} + +static void Pattern_initialize(Pattern [], int); +static void Pattern_setup(Pattern [], int); +static void Pattern_finalize(void); + + +static void +Pattern_initialize(Pattern patterns[], int patterns_sz) { + int j; + #ifdef DEBUG + fprintf(stderr, "%s\n", __PRETTY_FUNCTION__); + #endif + if (!Pattern_patterns_initialized) { + for (j = 0; j < MAX_PATTERNS; j++) { + Pattern_patterns[j] = NULL; + } + if (patterns_sz) { + Pattern_patterns_initialized = 1; + Pattern_setup(patterns, patterns_sz); + } + } +} + +static void +Pattern_setup(Pattern patterns[], int patterns_sz) { + int i; + Pattern *regex; + #ifdef DEBUG + fprintf(stderr, "%s\n", __PRETTY_FUNCTION__); + #endif + if (!Pattern_patterns_initialized) { + Pattern_initialize(patterns, patterns_sz); + } else { + for (i = 0; i < patterns_sz; i++) { + regex = Pattern_regex(patterns[i].tok, patterns[i].expr); + #ifdef DEBUG + if (regex) { + fprintf(stderr, "Added regex pattern '%s': '%s'\n", regex->tok, regex->expr); + } + #endif + } + } +} + +static void +Pattern_finalize(void) { + int j; + #ifdef DEBUG + fprintf(stderr, "%s\n", __PRETTY_FUNCTION__); + #endif + if (Pattern_patterns_initialized) { + for (j = 0; j < MAX_PATTERNS; j++) { + if (Pattern_patterns[j] != NULL) { + free(Pattern_patterns[j]->tok); + free(Pattern_patterns[j]->expr); + if (Pattern_patterns[j]->pattern != NULL) { + pcre_free(Pattern_patterns[j]->pattern); + } + free(Pattern_patterns[j]); + Pattern_patterns[j] = NULL; + } + } + Pattern_patterns_initialized = 0; + } +} + +/* Scanner */ + + +static int +_Scanner_scan(Scanner *self, Pattern restrictions[], int restrictions_sz) +{ + Token best_token, *p_token; + int j, k, max, skip; + #ifdef DEBUG + fprintf(stderr, "%s\n", __PRETTY_FUNCTION__); + #endif + while (1) { + best_token.regex = NULL; + /* Search the patterns for a match, with earlier + tokens in the list having preference */ + for (j = 0; j < MAX_PATTERNS; j++) { + Pattern *regex = Pattern_patterns[j]; + if (regex == NULL) { + break; + } + #ifdef DEBUG + if (regex) { + fprintf(stderr, "Trying '%s': '%s' at '%d' (\"%s\")\n", regex->tok, regex->expr, self->pos, self->input); + } + #endif + /* First check to see if we're restricting to this token */ + skip = restrictions_sz; + if (skip) { + max = (restrictions_sz > self->ignore_sz) ? restrictions_sz : self->ignore_sz; + for (k = 0; k < max; k++) { + if (k < restrictions_sz && regex == Pattern_regex(restrictions[k].tok, restrictions[k].expr)) { + skip = 0; + break; + } + if (k < self->ignore_sz && regex == self->ignore[k]) { + skip = 0; + break; + } + } + if (skip) { + continue; + #ifdef DEBUG + if (regex) { + fprintf(stderr, "Skipping!\n"); + } + #endif + } + } + if (Pattern_match( + regex, + self->input, + self->input_sz, + self->pos, + &best_token + )) { + #ifdef DEBUG + if (regex) { + fprintf(stderr, "Match OK! '%s': '%s' at '%d'\n", regex->tok, regex->expr, self->pos); + } + #endif + break; + } + } + /* If we didn't find anything, raise an error */ + if (best_token.regex == NULL) { + if (restrictions_sz) { + sprintf(self->exc, "SyntaxError[@ char %d: Trying to find one of the %d restricted tokens!]", self->pos, restrictions_sz); + return SCANNER_EXC_RESTRICTED; + } + sprintf(self->exc, "SyntaxError[@ char %d: Bad Token!]", self->pos); + return SCANNER_EXC_BAD_TOKEN; + } + /* If we found something that isn't to be ignored, return it */ + skip = 0; + for (k = 0; k < self->ignore_sz; k++) { + if (best_token.regex == self->ignore[k]) { + /* This token should be ignored... */ + self->pos += best_token.string_sz; + skip = 1; + break; + } + } + if (!skip) { + break; + } + } + if (best_token.regex) { + self->pos = (int)(best_token.string - self->input + best_token.string_sz); + /* Only add this token if it's not in the list (to prevent looping) */ + if (self->tokens_sz == 0 || + self->tokens[self->tokens_sz - 1]->regex != best_token.regex || + self->tokens[self->tokens_sz - 1]->string != best_token.string || + self->tokens[self->tokens_sz - 1]->string_sz != best_token.string_sz + ) { + p_token = (Token *)malloc(sizeof(Token)); + memcpy(p_token, &best_token, sizeof(Token)); + self->tokens[self->tokens_sz] = p_token; + for (j = 0; j < MAX_PATTERNS; j++) { + self->restrictions[self->tokens_sz][k] = (k < restrictions_sz) ? Pattern_regex(restrictions[k].tok, restrictions[k].expr) : NULL; + } + self->tokens_sz++; + return 1; + } + } + return 0; +} + + +/* Scanner public interface */ + +void +Scanner_reset(Scanner *self, char *input, int input_sz) { + int i, j; + #ifdef DEBUG + fprintf(stderr, "%s\n", __PRETTY_FUNCTION__); + #endif + if (input_sz) { + if (self->input) free(self->input); + self->input = strndup(input, input_sz + 1); + self->input[input_sz] = '\0'; + self->input_sz = input_sz; + } + self->tokens_sz = 0; + for (i = 0; i < MAX_TOKENS; i++) { + if (self->tokens[i]) { + free(self->tokens[i]); + } + self->tokens[i] = NULL; + for (j = 0; j < MAX_PATTERNS; j++) { + self->restrictions[i][j] = NULL; + } + self->restrictions_sz[i] = 0; + } + self->pos = 0; +} + +void +Scanner_del(Scanner *self) { + int i; + #ifdef DEBUG + fprintf(stderr, "%s\n", __PRETTY_FUNCTION__); + #endif + for (i = 0; i < MAX_TOKENS; i++) { + if (self->tokens[i]) { + free(self->tokens[i]); + } + self->tokens[i] = NULL; + } + + if (self->input != NULL) { + free(self->input); + self->input = NULL; + } + + free(self); +} + +Scanner* +Scanner_new(Pattern patterns[], int patterns_sz, Pattern ignore[], int ignore_sz, char *input, int input_sz) +{ + int i; + Scanner *self; + Pattern *regex; + #ifdef DEBUG + fprintf(stderr, "%s\n", __PRETTY_FUNCTION__); + #endif + self = (Scanner *)calloc(1, sizeof(Scanner)); + if (self) { + for (i = 0; i < patterns_sz; i++) { + regex = Pattern_regex(patterns[i].tok, patterns[i].expr); + #ifdef DEBUG + if (regex) { + fprintf(stderr, "Added regex pattern '%s': '%s'\n", regex->tok, regex->expr); + } + #endif + } + for (i = 0; i < ignore_sz; i++) { + regex = Pattern_regex(ignore[i].tok, ignore[i].expr); + if (regex) { + self->ignore[self->ignore_sz++] = regex; + #ifdef DEBUG + fprintf(stderr, "Ignoring token '%s'\n", regex->tok); + #endif + } + } + Scanner_reset(self, input, input_sz); + } + return self; +} + +int +Scanner_initialized(void) +{ + return Pattern_patterns_initialized; +} + +void +Scanner_initialize(Pattern patterns[], int patterns_sz) +{ + #ifdef DEBUG + fprintf(stderr, "%s\n", __PRETTY_FUNCTION__); + #endif + Pattern_initialize(patterns, patterns_sz); +} + +void +Scanner_finalize(void) +{ + #ifdef DEBUG + fprintf(stderr, "%s\n", __PRETTY_FUNCTION__); + #endif + Pattern_finalize(); +} + + +Token* +Scanner_token(Scanner *self, int i, Pattern restrictions[], int restrictions_sz) +{ + int j, k, found; + Pattern *regex; + int result; + #ifdef DEBUG + fprintf(stderr, "%s\n", __PRETTY_FUNCTION__); + #endif + if (i == self->tokens_sz) { + result = _Scanner_scan(self, restrictions, restrictions_sz); + if (result < 0) { + return (Token *)result; + } + } + if (i >= 0 && i < self->tokens_sz) { + if (self->restrictions_sz[i]) { + for (j = 0; j < restrictions_sz; j++) { + found = 0; + for (k = 0; k < self->restrictions_sz[i]; k++) { + regex = Pattern_regex(restrictions[j].tok, restrictions[j].expr); + if (regex == self->restrictions[i][k]) { + found = 1; + break; + } + } + if (!found) { + sprintf(self->exc, "Unimplemented: restriction set changed"); + return (Token *)SCANNER_EXC_UNIMPLEMENTED; + } + } + } + return self->tokens[i]; + } + return (Token *)SCANNER_EXC_NO_MORE_TOKENS; +} + +void +Scanner_rewind(Scanner *self, int i) +{ + Token *p_token; + #ifdef DEBUG + fprintf(stderr, "%s\n", __PRETTY_FUNCTION__); + #endif + if (i >= 0 && i < self->tokens_sz) { + self->tokens_sz = i; + p_token = self->tokens[i]; + self->pos = (int)(p_token->string - self->input); + } +} diff --git a/scss/src/scanner.h b/scss/src/scanner.h new file mode 100644 index 0000000..b7de600 --- /dev/null +++ b/scss/src/scanner.h @@ -0,0 +1,62 @@ +/* +* pyScss, a Scss compiler for Python +* SCSS blocks scanner. +* +* German M. Bravo (Kronuz) <german.mb@gmail.com> +* https://github.com/Kronuz/pyScss +* +* MIT license (http://www.opensource.org/licenses/mit-license.php) +* Copyright (c) 2011 German M. Bravo (Kronuz), All rights reserved. +*/ +#ifndef SCANNER_H +#define SCANNER_H + +#define PCRE_STATIC +#include <pcre.h> + +#define MAX_EXC_STRING 1024 +#define MAX_PATTERNS 1024 +#define MAX_TOKENS 1024 + +#define SCANNER_EXC_BAD_TOKEN -1 +#define SCANNER_EXC_RESTRICTED -2 +#define SCANNER_EXC_UNIMPLEMENTED -3 +#define SCANNER_EXC_NO_MORE_TOKENS -4 + +typedef struct { + char *tok; + char *expr; + pcre *pattern; +} Pattern; + +typedef struct { + Pattern *regex; + char *string; + int string_sz; +} Token; + +typedef struct { + char exc[MAX_EXC_STRING]; + int ignore_sz; + Pattern *ignore[MAX_PATTERNS]; + int tokens_sz; + Token *tokens[MAX_TOKENS]; + int restrictions_sz[MAX_TOKENS]; + Pattern *restrictions[MAX_TOKENS][MAX_PATTERNS]; + int input_sz; + char *input; + int pos; +} Scanner; + +int Scanner_initialized(void); +void Scanner_initialize(Pattern [], int); +void Scanner_finalize(void); + +void Scanner_reset(Scanner *self, char *input, int input_sz); +Scanner *Scanner_new(Pattern [], int, Pattern [], int, char *, int); +void Scanner_del(Scanner *); + +Token* Scanner_token(Scanner *, int, Pattern [], int); +void Scanner_rewind(Scanner *, int); + +#endif diff --git a/scss/src/scanner.py b/scss/src/scanner.py index fb2659a..da4d111 100644..100755 --- a/scss/src/scanner.py +++ b/scss/src/scanner.py @@ -1,4 +1,4 @@ -#!/bin/env python +#!/usr/bin/env python ## locate_blocks() needs heavy optimizations... is way too slow right now! ## Any suggestion from python wizards? :-) @@ -40,520 +40,232 @@ def profile(fn): DEBUG = False ################################################################################ -# Helper functions - - -SEPARATOR = '\x00' -_nl_re = re.compile(r'\s*\n\s*', re.MULTILINE) -_nl_num_re = re.compile(r'\n.+' + SEPARATOR, re.MULTILINE) -_nl_num_nl_re = re.compile(r'\n.+' + SEPARATOR + r'\s*(?=\n)', re.MULTILINE) -_blocks_re = re.compile(r'[{},;()\'"\n]') - - -def load_string(codestr): - """ - Add line numbers to the string using SEPARATOR as the separation between - the line number and the line. - """ - idx = {'line': 1} - - # Add line numbers: - def _cnt(m): - idx['line'] += 1 - return '\n' + str(idx['line']) + SEPARATOR - codestr = str(idx['line']) + SEPARATOR + _nl_re.sub(_cnt, codestr + '\n') - - # remove empty lines - codestr = _nl_num_nl_re.sub('', codestr) - return codestr - - -def _strip_selprop(selprop, lineno): - # Get the line number of the selector or property and strip all other - # line numbers that might still be there (from multiline selectors) - _lineno, _sep, selprop = selprop.partition(SEPARATOR) - if _sep == SEPARATOR: - _lineno = _lineno.strip() - lineno = int(_lineno) if _lineno else 0 - else: - selprop = _lineno - selprop = _nl_num_re.sub('\n', selprop) - selprop = selprop.strip() - return selprop, lineno - - -def _strip(selprop): - # Strip all line numbers, ignoring them in the way - selprop, _ = _strip_selprop(selprop, None) - return selprop +# Helpers + +_units = ['em', 'ex', 'px', 'cm', 'mm', 'in', 'pt', 'pc', 'deg', 'rad' + 'grad', 'ms', 's', 'hz', 'khz', '%'] +PATTERNS = [ + ('":"', ':'), + ('[ \r\t\n]+', '[ \r\t\n]+'), + ('COMMA', ','), + ('LPAR', '\\(|\\['), + ('RPAR', '\\)|\\]'), + ('END', '$'), + ('MUL', '[*]'), + ('DIV', '/'), + ('ADD', '[+]'), + ('SUB', '-\\s'), + ('SIGN', '-(?![a-zA-Z_])'), + ('AND', '(?<![-\\w])and(?![-\\w])'), + ('OR', '(?<![-\\w])or(?![-\\w])'), + ('NOT', '(?<![-\\w])not(?![-\\w])'), + ('NE', '!='), + ('INV', '!'), + ('EQ', '=='), + ('LE', '<='), + ('GE', '>='), + ('LT', '<'), + ('GT', '>'), + ('STR', "'[^']*'"), + ('QSTR', '"[^"]*"'), + ('UNITS', '(?<!\\s)(?:' + '|'.join(_units) + ')(?![-\\w])'), + ('NUM', '(?:\\d+(?:\\.\\d*)?|\\.\\d+)'), + ('BOOL', '(?<![-\\w])(?:true|false)(?![-\\w])'), + ('COLOR', '#(?:[a-fA-F0-9]{6}|[a-fA-F0-9]{3})(?![a-fA-F0-9])'), + ('VAR', '\\$[-a-zA-Z0-9_]+'), + ('FNCT', '[-a-zA-Z_][-a-zA-Z0-9_]*(?=\\()'), + ('ID', '[-a-zA-Z_][-a-zA-Z0-9_]*'), +] ################################################################################ -# Algorithm implemented in C (much slower here): - -PAR = 0 -INSTR = 1 -DEPTH = 2 -SKIP = 3 -THIN = 4 -INIT = 5 -SAFE = 6 -LOSE = 7 -START = 8 -END = 9 -LINENO = 10 -SELPROP = 11 - - -def _start_string(codestr, ctx, i, c): - if DEBUG: print "_start_string" - # A string starts - ctx[INSTR] = c - return - yield - - -def _end_string(codestr, ctx, i, c): - if DEBUG: print "_end_string" - # A string ends (FIXME: needs to accept escaped characters) - ctx[INSTR] = None - return - yield - - -def _start_parenthesis(codestr, ctx, i, c): - if DEBUG: print "_start_parenthesis" - # parenthesis begins: - ctx[PAR] += 1 - ctx[THIN] = None - ctx[SAFE] = i + 1 - return - yield - - -def _end_parenthesis(codestr, ctx, i, c): - if DEBUG: print "_end_parenthesis" - ctx[PAR] -= 1 - return - yield - - -def _flush_properties(codestr, ctx, i, c): - if DEBUG: print "_flush_properties" - # Flush properties - if ctx[LOSE] <= ctx[INIT]: - _property, ctx[LINENO] = _strip_selprop(codestr[ctx[LOSE]:ctx[INIT]], ctx[LINENO]) - if _property: - yield ctx[LINENO], _property, None - ctx[SELPROP] = _property - ctx[LOSE] = ctx[INIT] - return - yield - - -def _start_block1(codestr, ctx, i, c): - if DEBUG: print "_start_block1" - # Start level-1 block - if i > 0 and codestr[i - 1] == '#': # Do not process #{...} as blocks! - ctx[SKIP] = True - else: - ctx[START] = i - if ctx[THIN] is not None and _strip(codestr[ctx[THIN]:i]): - ctx[INIT] = ctx[THIN] - for y in _flush_properties(codestr, ctx, i, c): - yield y - ctx[THIN] = None - ctx[DEPTH] += 1 - return - yield - - -def _start_block(codestr, ctx, i, c): - if DEBUG: print "_start_block" - # Start blocks: - ctx[DEPTH] += 1 - return - yield - - -def _end_block1(codestr, ctx, i, c): - if DEBUG: print "_end_block1" - # End level-1 block: - ctx[DEPTH] -= 1 - if not ctx[SKIP]: - ctx[END] = i - _selectors, ctx[LINENO] = _strip_selprop(codestr[ctx[INIT]:ctx[START]], ctx[LINENO]) - _codestr = codestr[ctx[START] + 1:ctx[END]] - if _selectors: - yield ctx[LINENO], _selectors, _codestr - ctx[SELPROP] = _selectors - ctx[INIT] = ctx[SAFE] = ctx[LOSE] = ctx[END] + 1 - ctx[THIN] = None - ctx[SKIP] = False - return - yield - - -def _end_block(codestr, ctx, i, c): - if DEBUG: print "_end_block" - # Block ends: - ctx[DEPTH] -= 1 - return - yield - - -def _end_property(codestr, ctx, i, c): - if DEBUG: print "_end_property" - # End of property (or block): - ctx[INIT] = i - if ctx[LOSE] <= ctx[INIT]: - _property, ctx[LINENO] = _strip_selprop(codestr[ctx[LOSE]:ctx[INIT]], ctx[LINENO]) - if _property: - yield ctx[LINENO], _property, None - ctx[SELPROP] = _property - ctx[INIT] = ctx[SAFE] = ctx[LOSE] = i + 1 - ctx[THIN] = None - return - yield - - -def _mark_safe(codestr, ctx, i, c): - if DEBUG: print "_mark_safe" - # We are on a safe zone - if ctx[THIN] is not None and _strip(codestr[ctx[THIN]:i]): - ctx[INIT] = ctx[THIN] - ctx[THIN] = None - ctx[SAFE] = i + 1 - return - yield - - -def _mark_thin(codestr, ctx, i, c): - if DEBUG: print "_mark_thin" - # Step on thin ice, if it breaks, it breaks here - if ctx[THIN] is not None and _strip(codestr[ctx[THIN]:i]): - ctx[INIT] = ctx[THIN] - ctx[THIN] = i + 1 - elif ctx[THIN] is None and _strip(codestr[ctx[SAFE]:i]): - ctx[THIN] = i + 1 - return - yield - - -scss_function_map = { - # (c, instr, par, depth) - ('"', None, False, 0): _start_string, - ("'", None, False, 0): _start_string, - ('"', None, True, 0): _start_string, - ("'", None, True, 0): _start_string, - ('"', None, False, 1): _start_string, - ("'", None, False, 1): _start_string, - ('"', None, True, 1): _start_string, - ("'", None, True, 1): _start_string, - ('"', None, False, 2): _start_string, - ("'", None, False, 2): _start_string, - ('"', None, True, 2): _start_string, - ("'", None, True, 2): _start_string, - - ('"', '"', False, 0): _end_string, - ("'", "'", False, 0): _end_string, - ('"', '"', True, 0): _end_string, - ("'", "'", True, 0): _end_string, - ('"', '"', False, 1): _end_string, - ("'", "'", False, 1): _end_string, - ('"', '"', True, 1): _end_string, - ("'", "'", True, 1): _end_string, - ('"', '"', False, 2): _end_string, - ("'", "'", False, 2): _end_string, - ('"', '"', True, 2): _end_string, - ("'", "'", True, 2): _end_string, - - ("(", None, False, 0): _start_parenthesis, - ("(", None, True, 0): _start_parenthesis, - ("(", None, False, 1): _start_parenthesis, - ("(", None, True, 1): _start_parenthesis, - ("(", None, False, 2): _start_parenthesis, - ("(", None, True, 2): _start_parenthesis, - - (")", None, True, 0): _end_parenthesis, - (")", None, True, 1): _end_parenthesis, - (")", None, True, 2): _end_parenthesis, - - ("{", None, False, 0): _start_block1, - ("{", None, False, 1): _start_block, - ("{", None, False, 2): _start_block, - - ("}", None, False, 1): _end_block1, - ("}", None, False, 2): _end_block, - - (";", None, False, 0): _end_property, - - (",", None, False, 0): _mark_safe, - - ("\n", None, False, 0): _mark_thin, - - (None, None, False, 0): _flush_properties, - (None, None, False, 1): _flush_properties, - (None, None, False, 2): _flush_properties, -} - - -def _locate_blocks_a(codestr): - """ - For processing CSS like strings. - - Either returns all selectors (that can be "smart" multi-lined, as - long as it's joined by `,`, or enclosed in `(` and `)`) with its code block - (the one between `{` and `}`, which can be nested), or the "lose" code - (properties) that doesn't have any blocks. - threshold is the number of blank lines before selectors are broken into - pieces (properties). +class NoMoreTokens(Exception): """ - ctx = [0, None, 0, False, None, 0, 0, 0, None, None, 0, '??'] - - for m in _blocks_re.finditer(codestr): - c = m.group() - - fn = scss_function_map.get((c, ctx[INSTR], ctx[PAR] != 0, 2 if ctx[DEPTH] > 1 else ctx[DEPTH])) - if DEBUG: print fn and ' > ' or ' ', fn and fn.__name__, (c, ctx[INSTR], ctx[PAR] != 0, 2 if ctx[DEPTH] > 1 else ctx[DEPTH]) - if fn: - for y in fn(codestr, ctx, m.start(), c): - yield y - - codestr_end = len(codestr) - exc = None - if ctx[PAR]: - exc = exc or "Missing closing parenthesis somewhere in block: '%s'" % ctx[SELPROP] - elif ctx[INSTR]: - exc = exc or "Missing closing string somewhere in block: '%s'" % ctx[SELPROP] - elif ctx[DEPTH]: - exc = exc or "Block never closed: '%s'" % ctx[SELPROP] - while ctx[DEPTH] > 0 and ctx[INIT] < codestr_end: - c = '}' - fn = scss_function_map.get((c, ctx[INSTR], ctx[PAR] != 0, 2 if ctx[DEPTH] > 1 else ctx[DEPTH])) - if DEBUG: print fn and ' > ' or ' ! ', fn and fn.__name__, (c, ctx[INSTR], ctx[PAR] != 0, 2 if ctx[DEPTH] > 1 else ctx[DEPTH]) - if fn: - for y in fn(codestr, ctx, m.start(), c): - yield y - - if ctx[INIT] < codestr_end: - ctx[INIT] = codestr_end - c = None - fn = scss_function_map.get((c, ctx[INSTR], ctx[PAR] != 0, 2 if ctx[DEPTH] > 1 else ctx[DEPTH])) - if DEBUG: print fn and ' > ' or ' ! ', fn and fn.__name__, (c, ctx[INSTR], ctx[PAR] != 0, 2 if ctx[DEPTH] > 1 else ctx[DEPTH]) - if fn: - for y in fn(codestr, ctx, m.start(), c): - yield y - - if exc: - raise Exception(exc) - - -################################################################################ -# Algorithm using Regexps in pure Python (fastest pure python): - - -def _locate_blocks_b(codestr): + Another exception object, for when we run out of tokens """ - For processing CSS like strings. - - Either returns all selectors (that can be "smart" multi-lined, as - long as it's joined by `,`, or enclosed in `(` and `)`) with its code block - (the one between `{` and `}`, which can be nested), or the "lose" code - (properties) that doesn't have any blocks. - - threshold is the number of blank lines before selectors are broken into - pieces (properties). - """ - lineno = 0 - - par = 0 - instr = None - depth = 0 - skip = False - thin = None - i = init = safe = lose = 0 - start = end = None - - for m in _blocks_re.finditer(codestr): - i = m.start(0) - c = codestr[i] - if instr is not None: - if c == instr: - instr = None # A string ends (FIXME: needs to accept escaped characters) - elif c in ('"', "'"): - instr = c # A string starts - elif c == '(': # parenthesis begins: - par += 1 - thin = None - safe = i + 1 - elif c == ')': # parenthesis ends: - par -= 1 - elif not par and not instr: - if c == '{': # block begins: - if depth == 0: - if i > 0 and codestr[i - 1] == '#': # Do not process #{...} as blocks! - skip = True - else: - start = i - if thin is not None and _strip(codestr[thin:i]): - init = thin - if lose <= init: - _property, lineno = _strip_selprop(codestr[lose:init], lineno) - if _property: - yield lineno, _property, None - lose = init - thin = None - depth += 1 - elif c == '}': # block ends: - if depth > 0: - depth -= 1 - if depth == 0: - if not skip: - end = i - _selectors, lineno = _strip_selprop(codestr[init:start], lineno) - _codestr = codestr[start + 1:end].strip() - if _selectors: - yield lineno, _selectors, _codestr - init = safe = lose = end + 1 - thin = None - skip = False - elif depth == 0: - if c == ';': # End of property (or block): - init = i - if lose <= init: - _property, lineno = _strip_selprop(codestr[lose:init], lineno) - if _property: - yield lineno, _property, None - init = safe = lose = i + 1 - thin = None - elif c == ',': - if thin is not None and _strip(codestr[thin:i]): - init = thin - thin = None - safe = i + 1 - elif c == '\n': - if thin is not None and _strip(codestr[thin:i]): - init = thin - thin = i + 1 - elif thin is None and _strip(codestr[safe:i]): - thin = i + 1 # Step on thin ice, if it breaks, it breaks here - if depth > 0: - if not skip: - _selectors, lineno = _strip_selprop(codestr[init:start], lineno) - _codestr = codestr[start + 1:].strip() - if _selectors: - yield lineno, _selectors, _codestr - if par: - raise Exception("Missing closing parenthesis somewhere in block: '%s'" % _selectors) - elif instr: - raise Exception("Missing closing string somewhere in block: '%s'" % _selectors) - else: - raise Exception("Block never closed: '%s'" % _selectors) - losestr = codestr[lose:] - for _property in losestr.split(';'): - _property, lineno = _strip_selprop(_property, lineno) - if _property: - yield lineno, _property, None - - -################################################################################ -# Algorithm implemented in C: - - -try: - from _scss import locate_blocks as _locate_blocks_c -except ImportError: - _locate_blocks_c = None - print >>sys.stderr, "Scanning acceleration disabled (_scss not found)!" + pass + + +class Scanner(object): + _cache_ = {} + + def __init__(self, patterns, ignore, input=None): + """ + Patterns is [(terminal,regex)...] + Ignore is [terminal,...]; + Input is a string + """ + self.reset(input) + self.ignore = ignore + # The stored patterns are a pair (compiled regex,source + # regex). If the patterns variable passed in to the + # constructor is None, we assume that the class already has a + # proper .patterns list constructed + if patterns is not None: + self.patterns = [] + for k, r in patterns: + self.patterns.append((k, re.compile(r))) + + @classmethod + def cleanup(cls): + cls._cache_ = {} + + def reset(self, input): + self.tokens = [] + self.restrictions = [] + self.input = input + self.pos = 0 + self._cache_.setdefault(input, {}) + self.scanned = self._cache_[input] + + def __repr__(self): + """ + Print the last 10 tokens that have been scanned in + """ + output = '' + for t in self.tokens[-10:]: + output = "%s\n (@%s) %s = %s" % (output, t[0], t[2], repr(t[3])) + return output + + def token(self, i, restrict=None): + """ + Get the i'th token, and if i is one past the end, then scan + for another token; restrict is a list of tokens that + are allowed, or 0 for any token. + """ + tokens_len = len(self.tokens) + if i == tokens_len: # We are at the end, get the next... + tokens_len += self.scan(restrict) + if i < tokens_len: + if restrict and self.restrictions[i] and restrict > self.restrictions[i]: + raise NotImplementedError("Unimplemented: restriction set changed") + return self.tokens[i] + raise NoMoreTokens() + + def rewind(self, i): + tokens_len = len(self.tokens) + if i <= tokens_len: + token = self.tokens[i] + self.tokens = self.tokens[:i] + self.restrictions = self.restrictions[:i] + self.pos = token[0] + + def scan(self, restrict): + """ + Should scan another token and add it to the list, self.tokens, + and add the restriction to self.restrictions + """ + # Keep looking for a token, ignoring any in self.ignore + _k_ = (self.pos, tuple(restrict) if restrict else None) + try: + token = self.scanned[_k_] + except KeyError: + token = None + while True: + best_pat = None + # Search the patterns for a match, with earlier + # tokens in the list having preference + best_pat_len = 0 + for p, regexp in self.patterns: + # First check to see if we're restricting to this token + if restrict and p not in restrict and p not in self.ignore: + continue + m = regexp.match(self.input, self.pos) + if m: + # We got a match + best_pat = p + best_pat_len = len(m.group(0)) + break + + # If we didn't find anything, raise an error + if best_pat is None: + msg = "Bad Token" + if restrict: + msg = "Trying to find one of " + ", ".join(restrict) + raise SyntaxError("SyntaxError[@ char %s: %s]" % (repr(self.pos), msg)) + + # If we found something that isn't to be ignored, return it + if best_pat in self.ignore: + # This token should be ignored... + self.pos += best_pat_len + else: + end_pos = self.pos + best_pat_len + # Create a token with this data + token = ( + self.pos, + end_pos, + best_pat, + self.input[self.pos:end_pos] + ) + break + self.scanned[_k_] = token + if token is not None: + self.pos = token[1] + # Only add this token if it's not in the list + # (to prevent looping) + if not self.tokens or token != self.tokens[-1]: + self.tokens.append(token) + self.restrictions.append(restrict) + return 1 + return 0 + + +class _Scanner_a(Scanner): + patterns = None + + def __init__(self): + if self.patterns is None: + self.patterns = [] + for k, p in PATTERNS: + self.patterns.append((k, re.compile(p))) + Scanner.__init__(self, None, ['[ \r\t\n]+']) ################################################################################ -# Algorithm implemented in C with CTypes: - try: - from _scss_c import locate_blocks as _locate_blocks_d + import _scss + _scss.setup_patterns(PATTERNS) + _Scanner_b = _scss.Scanner except ImportError: - _locate_blocks_d = None - print >>sys.stderr, "Scanning CTypes acceleration disabled (_scss_c not found)!" + _Scanner_b = None -################################################################################ - - -codestr = """ -simple { - block; -} -#{ignored}; -some, -selectors, -and multi-lined, -selectors -with more -{ - the block in here; - can have, nested, selectors { - and properties in nested blocks; - and stuff with #{ ignored blocks }; - } - properties-can: "have strings with stuff like this: }"; -} -and other, -selectors -can be turned into "lose" -properties -if no commas are found -however this is a selector ( - as well as these things, - which are parameters - and can expand - any number of - lines) { - and this is its block;; -} -""" -verify = '\t----------------------------------------------------------------------\n\t>[1] \'simple\'\n\t----------------------------------------------------------------------\n\t>\t[3] \'block\'\n\t----------------------------------------------------------------------\n\t>[5] \'#{ignored}\'\n\t----------------------------------------------------------------------\n\t>[6] \'some,\\nselectors,\\nand multi-lined,\\nselectors\'\n\t----------------------------------------------------------------------\n\t>[10] \'with more\'\n\t----------------------------------------------------------------------\n\t>\t[12] \'the block in here\'\n\t----------------------------------------------------------------------\n\t>\t[13] \'can have, nested, selectors\'\n\t----------------------------------------------------------------------\n\t>\t\t[14] \'and properties in nested blocks\'\n\t----------------------------------------------------------------------\n\t>\t\t[15] \'and stuff with #{ ignored blocks }\'\n\t----------------------------------------------------------------------\n\t>\t[17] \'properties-can: "have strings with stuff like this: }"\'\n\t----------------------------------------------------------------------\n\t>[19] \'and other,\\nselectors\\ncan be turned into "lose"\\nproperties\'\n\t----------------------------------------------------------------------\n\t>[23] \'if no commas are found\\nhowever this is a selector (\\nas well as these things,\\nwhich are parameters\\nand can expand\\nany number of\\nlines)\'\n\t----------------------------------------------------------------------\n\t>\t[30] \'and this is its block\'\n' - - -def process_block(locate_blocks, codestr, level=0, dump=False): +def process_scan(Scanner, codestr, level=0, dump=False): ret = '' if dump else None - for lineno, selprop, block in locate_blocks(codestr): - if dump: - ret += '\t%s\n\t>%s[%s] %s\n' % ('-' * 70, '\t' * level, lineno, repr(selprop)) - if block: - _ret = process_block(locate_blocks, block, level + 1, dump) + s = Scanner([], ['COLOR', 'NUM'], '[(5px - 3) * (5px - 3)]') + while True: + try: + s.scan() if dump: - ret += _ret + ret += '%s\n%s\n' % ('-' * 70, repr(s)) + except: + break return ret -def process_blocks(locate_blocks, codestr): - for q in xrange(50000): - process_block(locate_blocks, codestr) -profiled_process_blocks = profile(process_blocks) +def process_scans(Scanner, codestr): + for q in xrange(10000): + process_scan(Scanner, codestr) +profiled_process_scans = profile(process_scans) if __name__ == "__main__": - codestr = load_string(codestr) - - for locate_blocks, desc in ( - (_locate_blocks_a, "Pure Python, Full algorithm (_locate_blocks_a))"), - (_locate_blocks_b, "Pure Python, Condensed algorithm (_locate_blocks_b))"), - (_locate_blocks_c, "Builtin C Function, Full algorithm (_locate_blocks_c))"), - (_locate_blocks_d, "CTypes C Function, Full algorithm (_locate_blocks_d))")): - if locate_blocks: - ret = process_block(locate_blocks, codestr, dump=True) - # print "This is what `%s()` returned:" % locate_blocks - # print ret + for scanner, desc in ( + (_Scanner_a, "Pure Python, Full algorithm (_Scanner_a)"), + (_Scanner_b, "Builtin C Function, Full algorithm (_Scanner_b)"), + ): + if scanner: + ret = process_scan(scanner, codestr, dump=True) + print "This is what %s returned:" % desc + print ret # print repr(ret) assert ret == verify, 'It should be:\n%s' % verify - start = datetime.now() - print >>sys.stderr, "Timing: %s..." % desc, - process_blocks(locate_blocks, codestr) - elap = datetime.now() - start + # start = datetime.now() + # print >>sys.stderr, "Timing: %s..." % desc, + # process_blocks(locate_blocks, codestr) + # elap = datetime.now() - start - elapms = elap.seconds * 1000.0 + elap.microseconds / 1000.0 - print >>sys.stderr, "Done! took %06.3fms" % elapms + # elapms = elap.seconds * 1000.0 + elap.microseconds / 1000.0 + # print >>sys.stderr, "Done! took %06.3fms" % elapms |
