diff options
author | SVN Migration <svn@php.net> | 2004-01-14 21:17:10 +0000 |
---|---|---|
committer | SVN Migration <svn@php.net> | 2004-01-14 21:17:10 +0000 |
commit | cd9a7fe2779af221b9e8616314878ec1c69441b0 (patch) | |
tree | 786edb6a9d5dcb5a62240d7fba5569d29d465402 | |
parent | a508453b1c8e1a4c745dea5c2024063b8f848c60 (diff) | |
download | php-git-cd9a7fe2779af221b9e8616314878ec1c69441b0.tar.gz |
This commit was manufactured by cvs2svn to create branch 'PECL_4_3'.
-rw-r--r-- | ext/sqlite/libsqlite/src/date.c | 845 | ||||
-rw-r--r-- | ext/sqlite/libsqlite/src/vdbeInt.h | 312 | ||||
-rw-r--r-- | ext/sqlite/libsqlite/src/vdbeaux.c | 1042 | ||||
-rw-r--r-- | ext/sqlite/tests/bug26911.phpt | 10 | ||||
-rw-r--r-- | ext/tidy/CREDITS | 2 | ||||
-rw-r--r-- | ext/tidy/README | 122 | ||||
-rw-r--r-- | ext/tidy/TODO | 3 | ||||
-rw-r--r-- | ext/tidy/config.m4 | 35 | ||||
-rw-r--r-- | ext/tidy/examples/cleanhtml.php | 38 | ||||
-rw-r--r-- | ext/tidy/examples/dumpit.php | 93 | ||||
-rw-r--r-- | ext/tidy/examples/urlgrab.php | 60 | ||||
-rw-r--r-- | ext/tidy/package.xml | 64 | ||||
-rw-r--r-- | ext/tidy/php_tidy.h | 238 | ||||
-rw-r--r-- | ext/tidy/tests/001.phpt | 24 | ||||
-rw-r--r-- | ext/tidy/tests/002.phpt | 22 | ||||
-rw-r--r-- | ext/tidy/tests/003.phpt | 25 | ||||
-rw-r--r-- | ext/tidy/tests/004.phpt | 21 | ||||
-rw-r--r-- | ext/tidy/tests/005.html | 1 | ||||
-rw-r--r-- | ext/tidy/tests/005.phpt | 23 | ||||
-rw-r--r-- | ext/tidy/tests/006.phpt | 21 | ||||
-rw-r--r-- | ext/tidy/tests/007.phpt | 36 | ||||
-rw-r--r-- | ext/tidy/tidy.c | 1689 |
22 files changed, 4726 insertions, 0 deletions
diff --git a/ext/sqlite/libsqlite/src/date.c b/ext/sqlite/libsqlite/src/date.c new file mode 100644 index 0000000000..64fc3bccdf --- /dev/null +++ b/ext/sqlite/libsqlite/src/date.c @@ -0,0 +1,845 @@ +/* +** 2003 October 31 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This file contains the C functions that implement date and time +** functions for SQLite. +** +** There is only one exported symbol in this file - the function +** sqliteRegisterDateTimeFunctions() found at the bottom of the file. +** All other code has file scope. +** +** $Id$ +** +** NOTES: +** +** SQLite processes all times and dates as Julian Day numbers. The +** dates and times are stored as the number of days since noon +** in Greenwich on November 24, 4714 B.C. according to the Gregorian +** calendar system. +** +** 1970-01-01 00:00:00 is JD 2440587.5 +** 2000-01-01 00:00:00 is JD 2451544.5 +** +** This implemention requires years to be expressed as a 4-digit number +** which means that only dates between 0000-01-01 and 9999-12-31 can +** be represented, even though julian day numbers allow a much wider +** range of dates. +** +** The Gregorian calendar system is used for all dates and times, +** even those that predate the Gregorian calendar. Historians usually +** use the Julian calendar for dates prior to 1582-10-15 and for some +** dates afterwards, depending on locale. Beware of this difference. +** +** The conversion algorithms are implemented based on descriptions +** in the following text: +** +** Jean Meeus +** Astronomical Algorithms, 2nd Edition, 1998 +** ISBM 0-943396-61-1 +** Willmann-Bell, Inc +** Richmond, Virginia (USA) +*/ +#ifndef SQLITE_OMIT_DATETIME_FUNCS +#include "os.h" +#include "sqliteInt.h" +#include <ctype.h> +#include <stdlib.h> +#include <assert.h> +#include <time.h> + +/* +** A structure for holding a single date and time. +*/ +typedef struct DateTime DateTime; +struct DateTime { + double rJD; /* The julian day number */ + int Y, M, D; /* Year, month, and day */ + int h, m; /* Hour and minutes */ + int tz; /* Timezone offset in minutes */ + double s; /* Seconds */ + char validYMD; /* True if Y,M,D are valid */ + char validHMS; /* True if h,m,s are valid */ + char validJD; /* True if rJD is valid */ + char validTZ; /* True if tz is valid */ +}; + + +/* +** Convert N digits from zDate into an integer. Return +** -1 if zDate does not begin with N digits. +*/ +static int getDigits(const char *zDate, int N){ + int val = 0; + while( N-- ){ + if( !isdigit(*zDate) ) return -1; + val = val*10 + *zDate - '0'; + zDate++; + } + return val; +} + +/* +** Read text from z[] and convert into a floating point number. Return +** the number of digits converted. +*/ +static int getValue(const char *z, double *pR){ + double r = 0.0; + double rDivide = 1.0; + int isNeg = 0; + int nChar = 0; + if( *z=='+' ){ + z++; + nChar++; + }else if( *z=='-' ){ + z++; + isNeg = 1; + nChar++; + } + if( !isdigit(*z) ) return 0; + while( isdigit(*z) ){ + r = r*10.0 + *z - '0'; + nChar++; + z++; + } + if( *z=='.' && isdigit(z[1]) ){ + z++; + nChar++; + while( isdigit(*z) ){ + r = r*10.0 + *z - '0'; + rDivide *= 10.0; + nChar++; + z++; + } + r /= rDivide; + } + if( *z!=0 && !isspace(*z) ) return 0; + *pR = isNeg ? -r : r; + return nChar; +} + +/* +** Parse a timezone extension on the end of a date-time. +** The extension is of the form: +** +** (+/-)HH:MM +** +** If the parse is successful, write the number of minutes +** of change in *pnMin and return 0. If a parser error occurs, +** return 0. +** +** A missing specifier is not considered an error. +*/ +static int parseTimezone(const char *zDate, DateTime *p){ + int sgn = 0; + int nHr, nMn; + while( isspace(*zDate) ){ zDate++; } + p->tz = 0; + if( *zDate=='-' ){ + sgn = -1; + }else if( *zDate=='+' ){ + sgn = +1; + }else{ + return *zDate!=0; + } + zDate++; + nHr = getDigits(zDate, 2); + if( nHr<0 || nHr>14 ) return 1; + zDate += 2; + if( zDate[0]!=':' ) return 1; + zDate++; + nMn = getDigits(zDate, 2); + if( nMn<0 || nMn>59 ) return 1; + zDate += 2; + p->tz = sgn*(nMn + nHr*60); + while( isspace(*zDate) ){ zDate++; } + return *zDate!=0; +} + +/* +** Parse times of the form HH:MM or HH:MM:SS or HH:MM:SS.FFFF. +** The HH, MM, and SS must each be exactly 2 digits. The +** fractional seconds FFFF can be one or more digits. +** +** Return 1 if there is a parsing error and 0 on success. +*/ +static int parseHhMmSs(const char *zDate, DateTime *p){ + int h, m, s; + double ms = 0.0; + h = getDigits(zDate, 2); + if( h<0 || zDate[2]!=':' ) return 1; + zDate += 3; + m = getDigits(zDate, 2); + if( m<0 || m>59 ) return 1; + zDate += 2; + if( *zDate==':' ){ + s = getDigits(&zDate[1], 2); + if( s<0 || s>59 ) return 1; + zDate += 3; + if( *zDate=='.' && isdigit(zDate[1]) ){ + double rScale = 1.0; + zDate++; + while( isdigit(*zDate) ){ + ms = ms*10.0 + *zDate - '0'; + rScale *= 10.0; + zDate++; + } + ms /= rScale; + } + }else{ + s = 0; + } + p->validJD = 0; + p->validHMS = 1; + p->h = h; + p->m = m; + p->s = s + ms; + if( parseTimezone(zDate, p) ) return 1; + p->validTZ = p->tz!=0; + return 0; +} + +/* +** Convert from YYYY-MM-DD HH:MM:SS to julian day. We always assume +** that the YYYY-MM-DD is according to the Gregorian calendar. +** +** Reference: Meeus page 61 +*/ +static void computeJD(DateTime *p){ + int Y, M, D, A, B, X1, X2; + + if( p->validJD ) return; + if( p->validYMD ){ + Y = p->Y; + M = p->M; + D = p->D; + }else{ + Y = 2000; /* If no YMD specified, assume 2000-Jan-01 */ + M = 1; + D = 1; + } + if( M<=2 ){ + Y--; + M += 12; + } + A = Y/100; + B = 2 - A + (A/4); + X1 = 365.25*(Y+4716); + X2 = 30.6001*(M+1); + p->rJD = X1 + X2 + D + B - 1524.5; + p->validJD = 1; + p->validYMD = 0; + if( p->validHMS ){ + p->rJD += (p->h*3600.0 + p->m*60.0 + p->s)/86400.0; + if( p->validTZ ){ + p->rJD += p->tz*60/86400.0; + p->validHMS = 0; + p->validTZ = 0; + } + } +} + +/* +** Parse dates of the form +** +** YYYY-MM-DD HH:MM:SS.FFF +** YYYY-MM-DD HH:MM:SS +** YYYY-MM-DD HH:MM +** YYYY-MM-DD +** +** Write the result into the DateTime structure and return 0 +** on success and 1 if the input string is not a well-formed +** date. +*/ +static int parseYyyyMmDd(const char *zDate, DateTime *p){ + int Y, M, D; + + Y = getDigits(zDate, 4); + if( Y<0 || zDate[4]!='-' ) return 1; + zDate += 5; + M = getDigits(zDate, 2); + if( M<=0 || M>12 || zDate[2]!='-' ) return 1; + zDate += 3; + D = getDigits(zDate, 2); + if( D<=0 || D>31 ) return 1; + zDate += 2; + while( isspace(*zDate) ){ zDate++; } + if( isdigit(*zDate) ){ + if( parseHhMmSs(zDate, p) ) return 1; + }else if( *zDate==0 ){ + p->validHMS = 0; + }else{ + return 1; + } + p->validJD = 0; + p->validYMD = 1; + p->Y = Y; + p->M = M; + p->D = D; + if( p->validTZ ){ + computeJD(p); + } + return 0; +} + +/* +** Attempt to parse the given string into a Julian Day Number. Return +** the number of errors. +** +** The following are acceptable forms for the input string: +** +** YYYY-MM-DD HH:MM:SS.FFF +/-HH:MM +** DDDD.DD +** now +** +** In the first form, the +/-HH:MM is always optional. The fractional +** seconds extension (the ".FFF") is optional. The seconds portion +** (":SS.FFF") is option. The year and date can be omitted as long +** as there is a time string. The time string can be omitted as long +** as there is a year and date. +*/ +static int parseDateOrTime(const char *zDate, DateTime *p){ + int i; + memset(p, 0, sizeof(*p)); + for(i=0; isdigit(zDate[i]); i++){} + if( i==4 && zDate[i]=='-' ){ + return parseYyyyMmDd(zDate, p); + }else if( i==2 && zDate[i]==':' ){ + return parseHhMmSs(zDate, p); + return 0; + }else if( i==0 && sqliteStrICmp(zDate,"now")==0 ){ + double r; + if( sqliteOsCurrentTime(&r)==0 ){ + p->rJD = r; + p->validJD = 1; + return 0; + } + return 1; + }else if( sqliteIsNumber(zDate) ){ + p->rJD = sqliteAtoF(zDate); + p->validJD = 1; + return 0; + } + return 1; +} + +/* +** Compute the Year, Month, and Day from the julian day number. +*/ +static void computeYMD(DateTime *p){ + int Z, A, B, C, D, E, X1; + if( p->validYMD ) return; + Z = p->rJD + 0.5; + A = (Z - 1867216.25)/36524.25; + A = Z + 1 + A - (A/4); + B = A + 1524; + C = (B - 122.1)/365.25; + D = 365.25*C; + E = (B-D)/30.6001; + X1 = 30.6001*E; + p->D = B - D - X1; + p->M = E<14 ? E-1 : E-13; + p->Y = p->M>2 ? C - 4716 : C - 4715; + p->validYMD = 1; +} + +/* +** Compute the Hour, Minute, and Seconds from the julian day number. +*/ +static void computeHMS(DateTime *p){ + int Z, s; + if( p->validHMS ) return; + Z = p->rJD + 0.5; + s = (p->rJD + 0.5 - Z)*86400000.0 + 0.5; + p->s = 0.001*s; + s = p->s; + p->s -= s; + p->h = s/3600; + s -= p->h*3600; + p->m = s/60; + p->s += s - p->m*60; + p->validHMS = 1; +} + +/* +** Compute both YMD and HMS +*/ +static void computeYMD_HMS(DateTime *p){ + computeYMD(p); + computeHMS(p); +} + +/* +** Clear the YMD and HMS and the TZ +*/ +static void clearYMD_HMS_TZ(DateTime *p){ + p->validYMD = 0; + p->validHMS = 0; + p->validTZ = 0; +} + +/* +** Compute the difference (in days) between localtime and UTC (a.k.a. GMT) +** for the time value p where p is in UTC. +*/ +static double localtimeOffset(DateTime *p){ + DateTime x, y; + time_t t; + struct tm *pTm; + x = *p; + computeYMD_HMS(&x); + if( x.Y<1971 || x.Y>=2038 ){ + x.Y = 2000; + x.M = 1; + x.D = 1; + x.h = 0; + x.m = 0; + x.s = 0.0; + } else { + int s = x.s + 0.5; + x.s = s; + } + x.tz = 0; + x.validJD = 0; + computeJD(&x); + t = (x.rJD-2440587.5)*86400.0 + 0.5; + sqliteOsEnterMutex(); + pTm = localtime(&t); + y.Y = pTm->tm_year + 1900; + y.M = pTm->tm_mon + 1; + y.D = pTm->tm_mday; + y.h = pTm->tm_hour; + y.m = pTm->tm_min; + y.s = pTm->tm_sec; + sqliteOsLeaveMutex(); + y.validYMD = 1; + y.validHMS = 1; + y.validJD = 0; + y.validTZ = 0; + computeJD(&y); + return y.rJD - x.rJD; +} + +/* +** Process a modifier to a date-time stamp. The modifiers are +** as follows: +** +** NNN days +** NNN hours +** NNN minutes +** NNN.NNNN seconds +** NNN months +** NNN years +** start of month +** start of year +** start of week +** start of day +** weekday N +** unixepoch +** localtime +** utc +** +** Return 0 on success and 1 if there is any kind of error. +*/ +static int parseModifier(const char *zMod, DateTime *p){ + int rc = 1; + int n; + double r; + char z[30]; + for(n=0; n<sizeof(z)-1 && zMod[n]; n++){ + z[n] = tolower(zMod[n]); + } + z[n] = 0; + switch( z[0] ){ + case 'l': { + /* localtime + ** + ** Assuming the current time value is UTC (a.k.a. GMT), shift it to + ** show local time. + */ + if( strcmp(z, "localtime")==0 ){ + computeJD(p); + p->rJD += localtimeOffset(p); + clearYMD_HMS_TZ(p); + rc = 0; + } + break; + } + case 'u': { + /* + ** unixepoch + ** + ** Treat the current value of p->rJD as the number of + ** seconds since 1970. Convert to a real julian day number. + */ + if( strcmp(z, "unixepoch")==0 && p->validJD ){ + p->rJD = p->rJD/86400.0 + 2440587.5; + clearYMD_HMS_TZ(p); + rc = 0; + }else if( strcmp(z, "utc")==0 ){ + double c1; + computeJD(p); + c1 = localtimeOffset(p); + p->rJD -= c1; + clearYMD_HMS_TZ(p); + p->rJD += c1 - localtimeOffset(p); + rc = 0; + } + break; + } + case 'w': { + /* + ** weekday N + ** + ** Move the date to the same time on the next occurrance of + ** weekday N where 0==Sunday, 1==Monday, and so forth. If the + ** date is already on the appropriate weekday, this is a no-op. + */ + if( strncmp(z, "weekday ", 8)==0 && getValue(&z[8],&r)>0 + && (n=r)==r && n>=0 && r<7 ){ + int Z; + computeYMD_HMS(p); + p->validTZ = 0; + p->validJD = 0; + computeJD(p); + Z = p->rJD + 1.5; + Z %= 7; + if( Z>n ) Z -= 7; + p->rJD += n - Z; + clearYMD_HMS_TZ(p); + rc = 0; + } + break; + } + case 's': { + /* + ** start of TTTTT + ** + ** Move the date backwards to the beginning of the current day, + ** or month or year. + */ + if( strncmp(z, "start of ", 9)!=0 ) break; + zMod = &z[9]; + computeYMD(p); + p->validHMS = 1; + p->h = p->m = 0; + p->s = 0.0; + p->validTZ = 0; + p->validJD = 0; + if( strcmp(zMod,"month")==0 ){ + p->D = 1; + rc = 0; + }else if( strcmp(zMod,"year")==0 ){ + computeYMD(p); + p->M = 1; + p->D = 1; + rc = 0; + }else if( strcmp(zMod,"day")==0 ){ + rc = 0; + } + break; + } + case '+': + case '-': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': { + n = getValue(z, &r); + if( n<=0 ) break; + zMod = &z[n]; + while( isspace(zMod[0]) ) zMod++; + n = strlen(zMod); + if( n>10 || n<3 ) break; + strcpy(z, zMod); + if( z[n-1]=='s' ){ z[n-1] = 0; n--; } + computeJD(p); + rc = 0; + if( n==3 && strcmp(z,"day")==0 ){ + p->rJD += r; + }else if( n==4 && strcmp(z,"hour")==0 ){ + p->rJD += r/24.0; + }else if( n==6 && strcmp(z,"minute")==0 ){ + p->rJD += r/(24.0*60.0); + }else if( n==6 && strcmp(z,"second")==0 ){ + p->rJD += r/(24.0*60.0*60.0); + }else if( n==5 && strcmp(z,"month")==0 ){ + int x, y; + computeYMD_HMS(p); + p->M += r; + x = p->M>0 ? (p->M-1)/12 : (p->M-12)/12; + p->Y += x; + p->M -= x*12; + p->validJD = 0; + computeJD(p); + y = r; + if( y!=r ){ + p->rJD += (r - y)*30.0; + } + }else if( n==4 && strcmp(z,"year")==0 ){ + computeYMD_HMS(p); + p->Y += r; + p->validJD = 0; + computeJD(p); + }else{ + rc = 1; + } + clearYMD_HMS_TZ(p); + break; + } + default: { + break; + } + } + return rc; +} + +/* +** Process time function arguments. argv[0] is a date-time stamp. +** argv[1] and following are modifiers. Parse them all and write +** the resulting time into the DateTime structure p. Return 0 +** on success and 1 if there are any errors. +*/ +static int isDate(int argc, const char **argv, DateTime *p){ + int i; + if( argc==0 ) return 1; + if( argv[0]==0 || parseDateOrTime(argv[0], p) ) return 1; + for(i=1; i<argc; i++){ + if( argv[i]==0 || parseModifier(argv[i], p) ) return 1; + } + return 0; +} + + +/* +** The following routines implement the various date and time functions +** of SQLite. +*/ + +/* +** julianday( TIMESTRING, MOD, MOD, ...) +** +** Return the julian day number of the date specified in the arguments +*/ +static void juliandayFunc(sqlite_func *context, int argc, const char **argv){ + DateTime x; + if( isDate(argc, argv, &x)==0 ){ + computeJD(&x); + sqlite_set_result_double(context, x.rJD); + } +} + +/* +** datetime( TIMESTRING, MOD, MOD, ...) +** +** Return YYYY-MM-DD HH:MM:SS +*/ +static void datetimeFunc(sqlite_func *context, int argc, const char **argv){ + DateTime x; + if( isDate(argc, argv, &x)==0 ){ + char zBuf[100]; + computeYMD_HMS(&x); + sprintf(zBuf, "%04d-%02d-%02d %02d:%02d:%02d",x.Y, x.M, x.D, x.h, x.m, + (int)(x.s)); + sqlite_set_result_string(context, zBuf, -1); + } +} + +/* +** time( TIMESTRING, MOD, MOD, ...) +** +** Return HH:MM:SS +*/ +static void timeFunc(sqlite_func *context, int argc, const char **argv){ + DateTime x; + if( isDate(argc, argv, &x)==0 ){ + char zBuf[100]; + computeHMS(&x); + sprintf(zBuf, "%02d:%02d:%02d", x.h, x.m, (int)x.s); + sqlite_set_result_string(context, zBuf, -1); + } +} + +/* +** date( TIMESTRING, MOD, MOD, ...) +** +** Return YYYY-MM-DD +*/ +static void dateFunc(sqlite_func *context, int argc, const char **argv){ + DateTime x; + if( isDate(argc, argv, &x)==0 ){ + char zBuf[100]; + computeYMD(&x); + sprintf(zBuf, "%04d-%02d-%02d", x.Y, x.M, x.D); + sqlite_set_result_string(context, zBuf, -1); + } +} + +/* +** strftime( FORMAT, TIMESTRING, MOD, MOD, ...) +** +** Return a string described by FORMAT. Conversions as follows: +** +** %d day of month +** %f ** fractional seconds SS.SSS +** %H hour 00-24 +** %j day of year 000-366 +** %J ** Julian day number +** %m month 01-12 +** %M minute 00-59 +** %s seconds since 1970-01-01 +** %S seconds 00-59 +** %w day of week 0-6 sunday==0 +** %W week of year 00-53 +** %Y year 0000-9999 +** %% % +*/ +static void strftimeFunc(sqlite_func *context, int argc, const char **argv){ + DateTime x; + int n, i, j; + char *z; + const char *zFmt = argv[0]; + char zBuf[100]; + if( argv[0]==0 || isDate(argc-1, argv+1, &x) ) return; + for(i=0, n=1; zFmt[i]; i++, n++){ + if( zFmt[i]=='%' ){ + switch( zFmt[i+1] ){ + case 'd': + case 'H': + case 'm': + case 'M': + case 'S': + case 'W': + n++; + /* fall thru */ + case 'w': + case '%': + break; + case 'f': + n += 8; + break; + case 'j': + n += 3; + break; + case 'Y': + n += 8; + break; + case 's': + case 'J': + n += 50; + break; + default: + return; /* ERROR. return a NULL */ + } + i++; + } + } + if( n<sizeof(zBuf) ){ + z = zBuf; + }else{ + z = sqliteMalloc( n ); + if( z==0 ) return; + } + computeJD(&x); + computeYMD_HMS(&x); + for(i=j=0; zFmt[i]; i++){ + if( zFmt[i]!='%' ){ + z[j++] = zFmt[i]; + }else{ + i++; + switch( zFmt[i] ){ + case 'd': sprintf(&z[j],"%02d",x.D); j+=2; break; + case 'f': { + int s = x.s; + int ms = (x.s - s)*1000.0; + sprintf(&z[j],"%02d.%03d",s,ms); + j += strlen(&z[j]); + break; + } + case 'H': sprintf(&z[j],"%02d",x.h); j+=2; break; + case 'W': /* Fall thru */ + case 'j': { + int n; + DateTime y = x; + y.validJD = 0; + y.M = 1; + y.D = 1; + computeJD(&y); + n = x.rJD - y.rJD + 1; + if( zFmt[i]=='W' ){ + sprintf(&z[j],"%02d",(n+6)/7); + j += 2; + }else{ + sprintf(&z[j],"%03d",n); + j += 3; + } + break; + } + case 'J': sprintf(&z[j],"%.16g",x.rJD); j+=strlen(&z[j]); break; + case 'm': sprintf(&z[j],"%02d",x.M); j+=2; break; + case 'M': sprintf(&z[j],"%02d",x.m); j+=2; break; + case 's': { + sprintf(&z[j],"%d",(int)((x.rJD-2440587.5)*86400.0 + 0.5)); + j += strlen(&z[j]); + break; + } + case 'S': sprintf(&z[j],"%02d",(int)(x.s+0.5)); j+=2; break; + case 'w': z[j++] = (((int)(x.rJD+1.5)) % 7) + '0'; break; + case 'Y': sprintf(&z[j],"%04d",x.Y); j+=strlen(&z[j]); break; + case '%': z[j++] = '%'; break; + } + } + } + z[j] = 0; + sqlite_set_result_string(context, z, -1); + if( z!=zBuf ){ + sqliteFree(z); + } +} + + +#endif /* !defined(SQLITE_OMIT_DATETIME_FUNCS) */ + +/* +** This function registered all of the above C functions as SQL +** functions. This should be the only routine in this file with +** external linkage. +*/ +void sqliteRegisterDateTimeFunctions(sqlite *db){ + static struct { + char *zName; + int nArg; + int dataType; + void (*xFunc)(sqlite_func*,int,const char**); + } aFuncs[] = { +#ifndef SQLITE_OMIT_DATETIME_FUNCS + { "julianday", -1, SQLITE_NUMERIC, juliandayFunc }, + { "date", -1, SQLITE_TEXT, dateFunc }, + { "time", -1, SQLITE_TEXT, timeFunc }, + { "datetime", -1, SQLITE_TEXT, datetimeFunc }, + { "strftime", -1, SQLITE_TEXT, strftimeFunc }, +#endif + }; + int i; + + for(i=0; i<sizeof(aFuncs)/sizeof(aFuncs[0]); i++){ + sqlite_create_function(db, aFuncs[i].zName, + aFuncs[i].nArg, aFuncs[i].xFunc, 0); + if( aFuncs[i].xFunc ){ + sqlite_function_type(db, aFuncs[i].zName, aFuncs[i].dataType); + } + } +} diff --git a/ext/sqlite/libsqlite/src/vdbeInt.h b/ext/sqlite/libsqlite/src/vdbeInt.h new file mode 100644 index 0000000000..d58523ad28 --- /dev/null +++ b/ext/sqlite/libsqlite/src/vdbeInt.h @@ -0,0 +1,312 @@ +/* +** 2003 September 6 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This is the header file for information that is private to the +** VDBE. This information used to all be at the top of the single +** source code file "vdbe.c". When that file became too big (over +** 6000 lines long) it was split up into several smaller files and +** this header information was factored out. +*/ + +/* +** When converting from the native format to the key format and back +** again, in addition to changing the byte order we invert the high-order +** bit of the most significant byte. This causes negative numbers to +** sort before positive numbers in the memcmp() function. +*/ +#define keyToInt(X) (sqliteVdbeByteSwap(X) ^ 0x80000000) +#define intToKey(X) (sqliteVdbeByteSwap((X) ^ 0x80000000)) + +/* +** The makefile scans this source file and creates the following +** array of string constants which are the names of all VDBE opcodes. +** This array is defined in a separate source code file named opcode.c +** which is automatically generated by the makefile. +*/ +extern char *sqliteOpcodeNames[]; + +/* +** SQL is translated into a sequence of instructions to be +** executed by a virtual machine. Each instruction is an instance +** of the following structure. +*/ +typedef struct VdbeOp Op; + +/* +** Boolean values +*/ +typedef unsigned char Bool; + +/* +** A cursor is a pointer into a single BTree within a database file. +** The cursor can seek to a BTree entry with a particular key, or +** loop over all entries of the Btree. You can also insert new BTree +** entries or retrieve the key or data from the entry that the cursor +** is currently pointing to. +** +** Every cursor that the virtual machine has open is represented by an +** instance of the following structure. +** +** If the Cursor.isTriggerRow flag is set it means that this cursor is +** really a single row that represents the NEW or OLD pseudo-table of +** a row trigger. The data for the row is stored in Cursor.pData and +** the rowid is in Cursor.iKey. +*/ +struct Cursor { + BtCursor *pCursor; /* The cursor structure of the backend */ + int lastRecno; /* Last recno from a Next or NextIdx operation */ + int nextRowid; /* Next rowid returned by OP_NewRowid */ + Bool recnoIsValid; /* True if lastRecno is valid */ + Bool keyAsData; /* The OP_Column command works on key instead of data */ + Bool atFirst; /* True if pointing to first entry */ + Bool useRandomRowid; /* Generate new record numbers semi-randomly */ + Bool nullRow; /* True if pointing to a row with no data */ + Bool nextRowidValid; /* True if the nextRowid field is valid */ + Bool pseudoTable; /* This is a NEW or OLD pseudo-tables of a trigger */ + Bool deferredMoveto; /* A call to sqliteBtreeMoveto() is needed */ + int movetoTarget; /* Argument to the deferred sqliteBtreeMoveto() */ + Btree *pBt; /* Separate file holding temporary table */ + int nData; /* Number of bytes in pData */ + char *pData; /* Data for a NEW or OLD pseudo-table */ + int iKey; /* Key for the NEW or OLD pseudo-table row */ +}; +typedef struct Cursor Cursor; + +/* +** A sorter builds a list of elements to be sorted. Each element of +** the list is an instance of the following structure. +*/ +typedef struct Sorter Sorter; +struct Sorter { + int nKey; /* Number of bytes in the key */ + char *zKey; /* The key by which we will sort */ + int nData; /* Number of bytes in the data */ + char *pData; /* The data associated with this key */ + Sorter *pNext; /* Next in the list */ +}; + +/* +** Number of buckets used for merge-sort. +*/ +#define NSORT 30 + +/* +** Number of bytes of string storage space available to each stack +** layer without having to malloc. NBFS is short for Number of Bytes +** For Strings. +*/ +#define NBFS 32 + +/* +** A single level of the stack is an instance of the following +** structure. Except, string values are stored on a separate +** list of of pointers to character. The reason for storing +** strings separately is so that they can be easily passed +** to the callback function. +*/ +struct Stack { + int i; /* Integer value */ + int n; /* Number of characters in string value, including '\0' */ + int flags; /* Some combination of STK_Null, STK_Str, STK_Dyn, etc. */ + double r; /* Real value */ + char z[NBFS]; /* Space for short strings */ +}; +typedef struct Stack Stack; + +/* +** Memory cells use the same structure as the stack except that space +** for an arbitrary string is added. +*/ +struct Mem { + Stack s; /* All values of the memory cell besides string */ + char *z; /* String value for this memory cell */ +}; +typedef struct Mem Mem; + +/* +** Allowed values for Stack.flags +*/ +#define STK_Null 0x0001 /* Value is NULL */ +#define STK_Str 0x0002 /* Value is a string */ +#define STK_Int 0x0004 /* Value is an integer */ +#define STK_Real 0x0008 /* Value is a real number */ +#define STK_Dyn 0x0010 /* Need to call sqliteFree() on zStack[] */ +#define STK_Static 0x0020 /* zStack[] points to a static string */ +#define STK_Ephem 0x0040 /* zStack[] points to an ephemeral string */ + +/* The following STK_ value appears only in AggElem.aMem.s.flag fields. +** It indicates that the corresponding AggElem.aMem.z points to a +** aggregate function context that needs to be finalized. +*/ +#define STK_AggCtx 0x0040 /* zStack[] points to an agg function context */ + +/* +** The "context" argument for a installable function. A pointer to an +** instance of this structure is the first argument to the routines used +** implement the SQL functions. +** +** There is a typedef for this structure in sqlite.h. So all routines, +** even the public interface to SQLite, can use a pointer to this structure. +** But this file is the only place where the internal details of this +** structure are known. +** +** This structure is defined inside of vdbe.c because it uses substructures +** (Stack) which are only defined there. +*/ +struct sqlite_func { + FuncDef *pFunc; /* Pointer to function information. MUST BE FIRST */ + Stack s; /* Small strings, ints, and double values go here */ + char *z; /* Space for holding dynamic string results */ + void *pAgg; /* Aggregate context */ + u8 isError; /* Set to true for an error */ + u8 isStep; /* Current in the step function */ + int cnt; /* Number of times that the step function has been called */ +}; + +/* +** An Agg structure describes an Aggregator. Each Agg consists of +** zero or more Aggregator elements (AggElem). Each AggElem contains +** a key and one or more values. The values are used in processing +** aggregate functions in a SELECT. The key is used to implement +** the GROUP BY clause of a select. +*/ +typedef struct Agg Agg; +typedef struct AggElem AggElem; +struct Agg { + int nMem; /* Number of values stored in each AggElem */ + AggElem *pCurrent; /* The AggElem currently in focus */ + HashElem *pSearch; /* The hash element for pCurrent */ + Hash hash; /* Hash table of all aggregate elements */ + FuncDef **apFunc; /* Information about aggregate functions */ +}; +struct AggElem { + char *zKey; /* The key to this AggElem */ + int nKey; /* Number of bytes in the key, including '\0' at end */ + Mem aMem[1]; /* The values for this AggElem */ +}; + +/* +** A Set structure is used for quick testing to see if a value +** is part of a small set. Sets are used to implement code like +** this: +** x.y IN ('hi','hoo','hum') +*/ +typedef struct Set Set; +struct Set { + Hash hash; /* A set is just a hash table */ + HashElem *prev; /* Previously accessed hash elemen */ +}; + +/* +** A Keylist is a bunch of keys into a table. The keylist can +** grow without bound. The keylist stores the ROWIDs of database +** records that need to be deleted or updated. +*/ +typedef struct Keylist Keylist; +struct Keylist { + int nKey; /* Number of slots in aKey[] */ + int nUsed; /* Next unwritten slot in aKey[] */ + int nRead; /* Next unread slot in aKey[] */ + Keylist *pNext; /* Next block of keys */ + int aKey[1]; /* One or more keys. Extra space allocated as needed */ +}; + +/* +** An instance of the virtual machine. This structure contains the complete +** state of the virtual machine. +** +** The "sqlite_vm" structure pointer that is returned by sqlite_compile() +** is really a pointer to an instance of this structure. +*/ +struct Vdbe { + sqlite *db; /* The whole database */ + Vdbe *pPrev,*pNext; /* Linked list of VDBEs with the same Vdbe.db */ + FILE *trace; /* Write an execution trace here, if not NULL */ + int nOp; /* Number of instructions in the program */ + int nOpAlloc; /* Number of slots allocated for aOp[] */ + Op *aOp; /* Space to hold the virtual machine's program */ + int nLabel; /* Number of labels used */ + int nLabelAlloc; /* Number of slots allocated in aLabel[] */ + int *aLabel; /* Space to hold the labels */ + int tos; /* Index of top of stack */ + Stack *aStack; /* The operand stack, except string values */ + char **zStack; /* Text or binary values of the stack */ + char **azColName; /* Becomes the 4th parameter to callbacks */ + int nCursor; /* Number of slots in aCsr[] */ + Cursor *aCsr; /* One element of this array for each open cursor */ + Sorter *pSort; /* A linked list of objects to be sorted */ + FILE *pFile; /* At most one open file handler */ + int nField; /* Number of file fields */ + char **azField; /* Data for each file field */ + int nVar; /* Number of entries in azVariable[] */ + char **azVar; /* Values for the OP_Variable opcode */ + int *anVar; /* Length of each value in azVariable[] */ + u8 *abVar; /* TRUE if azVariable[i] needs to be sqliteFree()ed */ + char *zLine; /* A single line from the input file */ + int nLineAlloc; /* Number of spaces allocated for zLine */ + int magic; /* Magic number for sanity checking */ + int nMem; /* Number of memory locations currently allocated */ + Mem *aMem; /* The memory locations */ + Agg agg; /* Aggregate information */ + int nSet; /* Number of sets allocated */ + Set *aSet; /* An array of sets */ + int nCallback; /* Number of callbacks invoked so far */ + Keylist *pList; /* A list of ROWIDs */ + int keylistStackDepth; /* The size of the "keylist" stack */ + Keylist **keylistStack; /* The stack used by opcodes ListPush & ListPop */ + int pc; /* The program counter */ + int rc; /* Value to return */ + unsigned uniqueCnt; /* Used by OP_MakeRecord when P2!=0 */ + int errorAction; /* Recovery action to do in case of an error */ + int undoTransOnError; /* If error, either ROLLBACK or COMMIT */ + int inTempTrans; /* True if temp database is transactioned */ + int returnStack[100]; /* Return address stack for OP_Gosub & OP_Return */ + int returnDepth; /* Next unused element in returnStack[] */ + int nResColumn; /* Number of columns in one row of the result set */ + char **azResColumn; /* Values for one row of result */ + int (*xCallback)(void*,int,char**,char**); /* Callback for SELECT results */ + void *pCbArg; /* First argument to xCallback() */ + int popStack; /* Pop the stack this much on entry to VdbeExec() */ + char *zErrMsg; /* Error message written here */ + u8 explain; /* True if EXPLAIN present on SQL command */ +}; + +/* +** The following are allowed values for Vdbe.magic +*/ +#define VDBE_MAGIC_INIT 0x26bceaa5 /* Building a VDBE program */ +#define VDBE_MAGIC_RUN 0xbdf20da3 /* VDBE is ready to execute */ +#define VDBE_MAGIC_HALT 0x519c2973 /* VDBE has completed execution */ +#define VDBE_MAGIC_DEAD 0xb606c3c8 /* The VDBE has been deallocated */ + +/* +** Here is a macro to handle the common case of popping the stack +** once. This macro only works from within the sqliteVdbeExec() +** function. +*/ +#define POPSTACK \ + assert(p->tos>=0); \ + if( aStack[p->tos].flags & STK_Dyn ) sqliteFree(zStack[p->tos]); \ + p->tos--; + +/* +** Function prototypes +*/ +void sqliteVdbeCleanupCursor(Cursor*); +void sqliteVdbeSorterReset(Vdbe*); +void sqliteVdbeAggReset(Agg*); +void sqliteVdbeKeylistFree(Keylist*); +void sqliteVdbePopStack(Vdbe*,int); +int sqliteVdbeCursorMoveto(Cursor*); +int sqliteVdbeByteSwap(int); +#if !defined(NDEBUG) || defined(VDBE_PROFILE) +void sqliteVdbePrintOp(FILE*, int, Op*); +#endif diff --git a/ext/sqlite/libsqlite/src/vdbeaux.c b/ext/sqlite/libsqlite/src/vdbeaux.c new file mode 100644 index 0000000000..6249a29f0b --- /dev/null +++ b/ext/sqlite/libsqlite/src/vdbeaux.c @@ -0,0 +1,1042 @@ +/* +** 2003 September 6 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** This file contains code used for creating, destroying, and populating +** a VDBE (or an "sqlite_vm" as it is known to the outside world.) Prior +** to version 2.8.7, all this code was combined into the vdbe.c source file. +** But that file was getting too big so this subroutines were split out. +*/ +#include "sqliteInt.h" +#include "os.h" +#include <ctype.h> +#include "vdbeInt.h" + + +/* +** When debugging the code generator in a symbolic debugger, one can +** set the sqlite_vdbe_addop_trace to 1 and all opcodes will be printed +** as they are added to the instruction stream. +*/ +#ifndef NDEBUG +int sqlite_vdbe_addop_trace = 0; +#endif + + +/* +** Create a new virtual database engine. +*/ +Vdbe *sqliteVdbeCreate(sqlite *db){ + Vdbe *p; + p = sqliteMalloc( sizeof(Vdbe) ); + if( p==0 ) return 0; + p->db = db; + if( db->pVdbe ){ + db->pVdbe->pPrev = p; + } + p->pNext = db->pVdbe; + p->pPrev = 0; + db->pVdbe = p; + p->magic = VDBE_MAGIC_INIT; + return p; +} + +/* +** Turn tracing on or off +*/ +void sqliteVdbeTrace(Vdbe *p, FILE *trace){ + p->trace = trace; +} + +/* +** Add a new instruction to the list of instructions current in the +** VDBE. Return the address of the new instruction. +** +** Parameters: +** +** p Pointer to the VDBE +** +** op The opcode for this instruction +** +** p1, p2 First two of the three possible operands. +** +** Use the sqliteVdbeResolveLabel() function to fix an address and +** the sqliteVdbeChangeP3() function to change the value of the P3 +** operand. +*/ +int sqliteVdbeAddOp(Vdbe *p, int op, int p1, int p2){ + int i; + + i = p->nOp; + p->nOp++; + assert( p->magic==VDBE_MAGIC_INIT ); + if( i>=p->nOpAlloc ){ + int oldSize = p->nOpAlloc; + Op *aNew; + p->nOpAlloc = p->nOpAlloc*2 + 100; + aNew = sqliteRealloc(p->aOp, p->nOpAlloc*sizeof(Op)); + if( aNew==0 ){ + p->nOpAlloc = oldSize; + return 0; + } + p->aOp = aNew; + memset(&p->aOp[oldSize], 0, (p->nOpAlloc-oldSize)*sizeof(Op)); + } + p->aOp[i].opcode = op; + p->aOp[i].p1 = p1; + if( p2<0 && (-1-p2)<p->nLabel && p->aLabel[-1-p2]>=0 ){ + p2 = p->aLabel[-1-p2]; + } + p->aOp[i].p2 = p2; + p->aOp[i].p3 = 0; + p->aOp[i].p3type = P3_NOTUSED; +#ifndef NDEBUG + if( sqlite_vdbe_addop_trace ) sqliteVdbePrintOp(0, i, &p->aOp[i]); +#endif + return i; +} + +/* +** Create a new symbolic label for an instruction that has yet to be +** coded. The symbolic label is really just a negative number. The +** label can be used as the P2 value of an operation. Later, when +** the label is resolved to a specific address, the VDBE will scan +** through its operation list and change all values of P2 which match +** the label into the resolved address. +** +** The VDBE knows that a P2 value is a label because labels are +** always negative and P2 values are suppose to be non-negative. +** Hence, a negative P2 value is a label that has yet to be resolved. +*/ +int sqliteVdbeMakeLabel(Vdbe *p){ + int i; + i = p->nLabel++; + assert( p->magic==VDBE_MAGIC_INIT ); + if( i>=p->nLabelAlloc ){ + int *aNew; + p->nLabelAlloc = p->nLabelAlloc*2 + 10; + aNew = sqliteRealloc( p->aLabel, p->nLabelAlloc*sizeof(p->aLabel[0])); + if( aNew==0 ){ + sqliteFree(p->aLabel); + } + p->aLabel = aNew; + } + if( p->aLabel==0 ){ + p->nLabel = 0; + p->nLabelAlloc = 0; + return 0; + } + p->aLabel[i] = -1; + return -1-i; +} + +/* +** Resolve label "x" to be the address of the next instruction to +** be inserted. The parameter "x" must have been obtained from +** a prior call to sqliteVdbeMakeLabel(). +*/ +void sqliteVdbeResolveLabel(Vdbe *p, int x){ + int j; + assert( p->magic==VDBE_MAGIC_INIT ); + if( x<0 && (-x)<=p->nLabel && p->aOp ){ + if( p->aLabel[-1-x]==p->nOp ) return; + assert( p->aLabel[-1-x]<0 ); + p->aLabel[-1-x] = p->nOp; + for(j=0; j<p->nOp; j++){ + if( p->aOp[j].p2==x ) p->aOp[j].p2 = p->nOp; + } + } +} + +/* +** Return the address of the next instruction to be inserted. +*/ +int sqliteVdbeCurrentAddr(Vdbe *p){ + assert( p->magic==VDBE_MAGIC_INIT ); + return p->nOp; +} + +/* +** Add a whole list of operations to the operation stack. Return the +** address of the first operation added. +*/ +int sqliteVdbeAddOpList(Vdbe *p, int nOp, VdbeOp const *aOp){ + int addr; + assert( p->magic==VDBE_MAGIC_INIT ); + if( p->nOp + nOp >= p->nOpAlloc ){ + int oldSize = p->nOpAlloc; + Op *aNew; + p->nOpAlloc = p->nOpAlloc*2 + nOp + 10; + aNew = sqliteRealloc(p->aOp, p->nOpAlloc*sizeof(Op)); + if( aNew==0 ){ + p->nOpAlloc = oldSize; + return 0; + } + p->aOp = aNew; + memset(&p->aOp[oldSize], 0, (p->nOpAlloc-oldSize)*sizeof(Op)); + } + addr = p->nOp; + if( nOp>0 ){ + int i; + for(i=0; i<nOp; i++){ + int p2 = aOp[i].p2; + p->aOp[i+addr] = aOp[i]; + if( p2<0 ) p->aOp[i+addr].p2 = addr + ADDR(p2); + p->aOp[i+addr].p3type = aOp[i].p3 ? P3_STATIC : P3_NOTUSED; +#ifndef NDEBUG + if( sqlite_vdbe_addop_trace ){ + sqliteVdbePrintOp(0, i+addr, &p->aOp[i+addr]); + } +#endif + } + p->nOp += nOp; + } + return addr; +} + +/* +** Change the value of the P1 operand for a specific instruction. +** This routine is useful when a large program is loaded from a +** static array using sqliteVdbeAddOpList but we want to make a +** few minor changes to the program. +*/ +void sqliteVdbeChangeP1(Vdbe *p, int addr, int val){ + assert( p->magic==VDBE_MAGIC_INIT ); + if( p && addr>=0 && p->nOp>addr && p->aOp ){ + p->aOp[addr].p1 = val; + } +} + +/* +** Change the value of the P2 operand for a specific instruction. +** This routine is useful for setting a jump destination. +*/ +void sqliteVdbeChangeP2(Vdbe *p, int addr, int val){ + assert( val>=0 ); + assert( p->magic==VDBE_MAGIC_INIT ); + if( p && addr>=0 && p->nOp>addr && p->aOp ){ + p->aOp[addr].p2 = val; + } +} + +/* +** Change the value of the P3 operand for a specific instruction. +** This routine is useful when a large program is loaded from a +** static array using sqliteVdbeAddOpList but we want to make a +** few minor changes to the program. +** +** If n>=0 then the P3 operand is dynamic, meaning that a copy of +** the string is made into memory obtained from sqliteMalloc(). +** A value of n==0 means copy bytes of zP3 up to and including the +** first null byte. If n>0 then copy n+1 bytes of zP3. +** +** If n==P3_STATIC it means that zP3 is a pointer to a constant static +** string and we can just copy the pointer. n==P3_POINTER means zP3 is +** a pointer to some object other than a string. +** +** If addr<0 then change P3 on the most recently inserted instruction. +*/ +void sqliteVdbeChangeP3(Vdbe *p, int addr, const char *zP3, int n){ + Op *pOp; + assert( p->magic==VDBE_MAGIC_INIT ); + if( p==0 || p->aOp==0 ) return; + if( addr<0 || addr>=p->nOp ){ + addr = p->nOp - 1; + if( addr<0 ) return; + } + pOp = &p->aOp[addr]; + if( pOp->p3 && pOp->p3type==P3_DYNAMIC ){ + sqliteFree(pOp->p3); + pOp->p3 = 0; + } + if( zP3==0 ){ + pOp->p3 = 0; + pOp->p3type = P3_NOTUSED; + }else if( n<0 ){ + pOp->p3 = (char*)zP3; + pOp->p3type = n; + }else{ + sqliteSetNString(&pOp->p3, zP3, n, 0); + pOp->p3type = P3_DYNAMIC; + } +} + +/* +** If the P3 operand to the specified instruction appears +** to be a quoted string token, then this procedure removes +** the quotes. +** +** The quoting operator can be either a grave ascent (ASCII 0x27) +** or a double quote character (ASCII 0x22). Two quotes in a row +** resolve to be a single actual quote character within the string. +*/ +void sqliteVdbeDequoteP3(Vdbe *p, int addr){ + Op *pOp; + assert( p->magic==VDBE_MAGIC_INIT ); + if( p->aOp==0 || addr<0 || addr>=p->nOp ) return; + pOp = &p->aOp[addr]; + if( pOp->p3==0 || pOp->p3[0]==0 ) return; + if( pOp->p3type==P3_POINTER ) return; + if( pOp->p3type!=P3_DYNAMIC ){ + pOp->p3 = sqliteStrDup(pOp->p3); + pOp->p3type = P3_DYNAMIC; + } + sqliteDequote(pOp->p3); +} + +/* +** On the P3 argument of the given instruction, change all +** strings of whitespace characters into a single space and +** delete leading and trailing whitespace. +*/ +void sqliteVdbeCompressSpace(Vdbe *p, int addr){ + unsigned char *z; + int i, j; + Op *pOp; + assert( p->magic==VDBE_MAGIC_INIT ); + if( p->aOp==0 || addr<0 || addr>=p->nOp ) return; + pOp = &p->aOp[addr]; + if( pOp->p3type==P3_POINTER ){ + return; + } + if( pOp->p3type!=P3_DYNAMIC ){ + pOp->p3 = sqliteStrDup(pOp->p3); + pOp->p3type = P3_DYNAMIC; + } + z = (unsigned char*)pOp->p3; + if( z==0 ) return; + i = j = 0; + while( isspace(z[i]) ){ i++; } + while( z[i] ){ + if( isspace(z[i]) ){ + z[j++] = ' '; + while( isspace(z[++i]) ){} + }else{ + z[j++] = z[i++]; + } + } + while( j>0 && isspace(z[j-1]) ){ j--; } + z[j] = 0; +} + +/* +** Search for the current program for the given opcode and P2 +** value. Return the address plus 1 if found and 0 if not found. +*/ +int sqliteVdbeFindOp(Vdbe *p, int op, int p2){ + int i; + assert( p->magic==VDBE_MAGIC_INIT ); + for(i=0; i<p->nOp; i++){ + if( p->aOp[i].opcode==op && p->aOp[i].p2==p2 ) return i+1; + } + return 0; +} + +/* +** Return the opcode for a given address. +*/ +VdbeOp *sqliteVdbeGetOp(Vdbe *p, int addr){ + assert( p->magic==VDBE_MAGIC_INIT ); + assert( addr>=0 && addr<p->nOp ); + return &p->aOp[addr]; +} + +/* +** The following group or routines are employed by installable functions +** to return their results. +** +** The sqlite_set_result_string() routine can be used to return a string +** value or to return a NULL. To return a NULL, pass in NULL for zResult. +** A copy is made of the string before this routine returns so it is safe +** to pass in an ephemeral string. +** +** sqlite_set_result_error() works like sqlite_set_result_string() except +** that it signals a fatal error. The string argument, if any, is the +** error message. If the argument is NULL a generic substitute error message +** is used. +** +** The sqlite_set_result_int() and sqlite_set_result_double() set the return +** value of the user function to an integer or a double. +** +** These routines are defined here in vdbe.c because they depend on knowing +** the internals of the sqlite_func structure which is only defined in +** this source file. +*/ +char *sqlite_set_result_string(sqlite_func *p, const char *zResult, int n){ + assert( !p->isStep ); + if( p->s.flags & STK_Dyn ){ + sqliteFree(p->z); + } + if( zResult==0 ){ + p->s.flags = STK_Null; + n = 0; + p->z = 0; + p->s.n = 0; + }else{ + if( n<0 ) n = strlen(zResult); + if( n<NBFS-1 ){ + memcpy(p->s.z, zResult, n); + p->s.z[n] = 0; + p->s.flags = STK_Str; + p->z = p->s.z; + }else{ + p->z = sqliteMallocRaw( n+1 ); + if( p->z ){ + memcpy(p->z, zResult, n); + p->z[n] = 0; + } + p->s.flags = STK_Str | STK_Dyn; + } + p->s.n = n+1; + } + return p->z; +} +void sqlite_set_result_int(sqlite_func *p, int iResult){ + assert( !p->isStep ); + if( p->s.flags & STK_Dyn ){ + sqliteFree(p->z); + } + p->s.i = iResult; + p->s.flags = STK_Int; +} +void sqlite_set_result_double(sqlite_func *p, double rResult){ + assert( !p->isStep ); + if( p->s.flags & STK_Dyn ){ + sqliteFree(p->z); + } + p->s.r = rResult; + p->s.flags = STK_Real; +} +void sqlite_set_result_error(sqlite_func *p, const char *zMsg, int n){ + assert( !p->isStep ); + sqlite_set_result_string(p, zMsg, n); + p->isError = 1; +} + +/* +** Extract the user data from a sqlite_func structure and return a +** pointer to it. +*/ +void *sqlite_user_data(sqlite_func *p){ + assert( p && p->pFunc ); + return p->pFunc->pUserData; +} + +/* +** Allocate or return the aggregate context for a user function. A new +** context is allocated on the first call. Subsequent calls return the +** same context that was returned on prior calls. +** +** This routine is defined here in vdbe.c because it depends on knowing +** the internals of the sqlite_func structure which is only defined in +** this source file. +*/ +void *sqlite_aggregate_context(sqlite_func *p, int nByte){ + assert( p && p->pFunc && p->pFunc->xStep ); + if( p->pAgg==0 ){ + if( nByte<=NBFS ){ + p->pAgg = (void*)p->z; + }else{ + p->pAgg = sqliteMalloc( nByte ); + } + } + return p->pAgg; +} + +/* +** Return the number of times the Step function of a aggregate has been +** called. +** +** This routine is defined here in vdbe.c because it depends on knowing +** the internals of the sqlite_func structure which is only defined in +** this source file. +*/ +int sqlite_aggregate_count(sqlite_func *p){ + assert( p && p->pFunc && p->pFunc->xStep ); + return p->cnt; +} + +#if !defined(NDEBUG) || defined(VDBE_PROFILE) +/* +** Print a single opcode. This routine is used for debugging only. +*/ +void sqliteVdbePrintOp(FILE *pOut, int pc, Op *pOp){ + char *zP3; + char zPtr[40]; + if( pOp->p3type==P3_POINTER ){ + sprintf(zPtr, "ptr(%#x)", (int)pOp->p3); + zP3 = zPtr; + }else{ + zP3 = pOp->p3; + } + if( pOut==0 ) pOut = stdout; + fprintf(pOut,"%4d %-12s %4d %4d %s\n", + pc, sqliteOpcodeNames[pOp->opcode], pOp->p1, pOp->p2, zP3 ? zP3 : ""); + fflush(pOut); +} +#endif + +/* +** Give a listing of the program in the virtual machine. +** +** The interface is the same as sqliteVdbeExec(). But instead of +** running the code, it invokes the callback once for each instruction. +** This feature is used to implement "EXPLAIN". +*/ +int sqliteVdbeList( + Vdbe *p /* The VDBE */ +){ + sqlite *db = p->db; + int i; + static char *azColumnNames[] = { + "addr", "opcode", "p1", "p2", "p3", + "int", "text", "int", "int", "text", + 0 + }; + + assert( p->popStack==0 ); + assert( p->explain ); + p->azColName = azColumnNames; + p->azResColumn = p->zStack; + for(i=0; i<5; i++) p->zStack[i] = p->aStack[i].z; + p->rc = SQLITE_OK; + for(i=p->pc; p->rc==SQLITE_OK && i<p->nOp; i++){ + if( db->flags & SQLITE_Interrupt ){ + db->flags &= ~SQLITE_Interrupt; + if( db->magic!=SQLITE_MAGIC_BUSY ){ + p->rc = SQLITE_MISUSE; + }else{ + p->rc = SQLITE_INTERRUPT; + } + sqliteSetString(&p->zErrMsg, sqlite_error_string(p->rc), (char*)0); + break; + } + sprintf(p->zStack[0],"%d",i); + sprintf(p->zStack[2],"%d", p->aOp[i].p1); + sprintf(p->zStack[3],"%d", p->aOp[i].p2); + if( p->aOp[i].p3type==P3_POINTER ){ + sprintf(p->aStack[4].z, "ptr(%#x)", (int)p->aOp[i].p3); + p->zStack[4] = p->aStack[4].z; + }else{ + p->zStack[4] = p->aOp[i].p3; + } + p->zStack[1] = sqliteOpcodeNames[p->aOp[i].opcode]; + if( p->xCallback==0 ){ + p->pc = i+1; + p->azResColumn = p->zStack; + p->nResColumn = 5; + return SQLITE_ROW; + } + if( sqliteSafetyOff(db) ){ + p->rc = SQLITE_MISUSE; + break; + } + if( p->xCallback(p->pCbArg, 5, p->zStack, p->azColName) ){ + p->rc = SQLITE_ABORT; + } + if( sqliteSafetyOn(db) ){ + p->rc = SQLITE_MISUSE; + } + } + return p->rc==SQLITE_OK ? SQLITE_DONE : SQLITE_ERROR; +} + +/* +** Prepare a virtual machine for execution. This involves things such +** as allocating stack space and initializing the program counter. +** After the VDBE has be prepped, it can be executed by one or more +** calls to sqliteVdbeExec(). +** +** The behavior of sqliteVdbeExec() is influenced by the parameters to +** this routine. If xCallback is NULL, then sqliteVdbeExec() will return +** with SQLITE_ROW whenever there is a row of the result set ready +** to be delivered. p->azResColumn will point to the row and +** p->nResColumn gives the number of columns in the row. If xCallback +** is not NULL, then the xCallback() routine is invoked to process each +** row in the result set. +*/ +void sqliteVdbeMakeReady( + Vdbe *p, /* The VDBE */ + int nVar, /* Number of '?' see in the SQL statement */ + sqlite_callback xCallback, /* Result callback */ + void *pCallbackArg, /* 1st argument to xCallback() */ + int isExplain /* True if the EXPLAIN keywords is present */ +){ + int n; + + assert( p!=0 ); + assert( p->magic==VDBE_MAGIC_INIT ); + + /* Add a HALT instruction to the very end of the program. + */ + if( p->nOp==0 || (p->aOp && p->aOp[p->nOp-1].opcode!=OP_Halt) ){ + sqliteVdbeAddOp(p, OP_Halt, 0, 0); + } + + /* No instruction ever pushes more than a single element onto the + ** stack. And the stack never grows on successive executions of the + ** same loop. So the total number of instructions is an upper bound + ** on the maximum stack depth required. + ** + ** Allocation all the stack space we will ever need. + */ + if( p->aStack==0 ){ + p->nVar = nVar; + assert( nVar>=0 ); + n = isExplain ? 10 : p->nOp; + p->aStack = sqliteMalloc( + n*(sizeof(p->aStack[0]) + 2*sizeof(char*)) /* aStack and zStack */ + + p->nVar*(sizeof(char*)+sizeof(int)+1) /* azVar, anVar, abVar */ + ); + p->zStack = (char**)&p->aStack[n]; + p->azColName = (char**)&p->zStack[n]; + p->azVar = (char**)&p->azColName[n]; + p->anVar = (int*)&p->azVar[p->nVar]; + p->abVar = (u8*)&p->anVar[p->nVar]; + } + + sqliteHashInit(&p->agg.hash, SQLITE_HASH_BINARY, 0); + p->agg.pSearch = 0; +#ifdef MEMORY_DEBUG + if( sqliteOsFileExists("vdbe_trace") ){ + p->trace = stdout; + } +#endif + p->tos = -1; + p->pc = 0; + p->rc = SQLITE_OK; + p->uniqueCnt = 0; + p->returnDepth = 0; + p->errorAction = OE_Abort; + p->undoTransOnError = 0; + p->xCallback = xCallback; + p->pCbArg = pCallbackArg; + p->popStack = 0; + p->explain |= isExplain; + p->magic = VDBE_MAGIC_RUN; +#ifdef VDBE_PROFILE + { + int i; + for(i=0; i<p->nOp; i++){ + p->aOp[i].cnt = 0; + p->aOp[i].cycles = 0; + } + } +#endif +} + + +/* +** Remove any elements that remain on the sorter for the VDBE given. +*/ +void sqliteVdbeSorterReset(Vdbe *p){ + while( p->pSort ){ + Sorter *pSorter = p->pSort; + p->pSort = pSorter->pNext; + sqliteFree(pSorter->zKey); + sqliteFree(pSorter->pData); + sqliteFree(pSorter); + } +} + +/* +** Pop the stack N times. Free any memory associated with the +** popped stack elements. +*/ +void sqliteVdbePopStack(Vdbe *p, int N){ + assert( N>=0 ); + if( p->zStack==0 ) return; + assert( p->aStack || sqlite_malloc_failed ); + if( p->aStack==0 ) return; + while( N-- > 0 ){ + if( p->aStack[p->tos].flags & STK_Dyn ){ + sqliteFree(p->zStack[p->tos]); + } + p->aStack[p->tos].flags = 0; + p->zStack[p->tos] = 0; + p->tos--; + } +} + +/* +** Reset an Agg structure. Delete all its contents. +** +** For installable aggregate functions, if the step function has been +** called, make sure the finalizer function has also been called. The +** finalizer might need to free memory that was allocated as part of its +** private context. If the finalizer has not been called yet, call it +** now. +*/ +void sqliteVdbeAggReset(Agg *pAgg){ + int i; + HashElem *p; + for(p = sqliteHashFirst(&pAgg->hash); p; p = sqliteHashNext(p)){ + AggElem *pElem = sqliteHashData(p); + assert( pAgg->apFunc!=0 ); + for(i=0; i<pAgg->nMem; i++){ + Mem *pMem = &pElem->aMem[i]; + if( pAgg->apFunc[i] && (pMem->s.flags & STK_AggCtx)!=0 ){ + sqlite_func ctx; + ctx.pFunc = pAgg->apFunc[i]; + ctx.s.flags = STK_Null; + ctx.z = 0; + ctx.pAgg = pMem->z; + ctx.cnt = pMem->s.i; + ctx.isStep = 0; + ctx.isError = 0; + (*pAgg->apFunc[i]->xFinalize)(&ctx); + if( pMem->z!=0 && pMem->z!=pMem->s.z ){ + sqliteFree(pMem->z); + } + }else if( pMem->s.flags & STK_Dyn ){ + sqliteFree(pMem->z); + } + } + sqliteFree(pElem); + } + sqliteHashClear(&pAgg->hash); + sqliteFree(pAgg->apFunc); + pAgg->apFunc = 0; + pAgg->pCurrent = 0; + pAgg->pSearch = 0; + pAgg->nMem = 0; +} + +/* +** Delete a keylist +*/ +void sqliteVdbeKeylistFree(Keylist *p){ + while( p ){ + Keylist *pNext = p->pNext; + sqliteFree(p); + p = pNext; + } +} + +/* +** Close a cursor and release all the resources that cursor happens +** to hold. +*/ +void sqliteVdbeCleanupCursor(Cursor *pCx){ + if( pCx->pCursor ){ + sqliteBtreeCloseCursor(pCx->pCursor); + } + if( pCx->pBt ){ + sqliteBtreeClose(pCx->pBt); + } + sqliteFree(pCx->pData); + memset(pCx, 0, sizeof(Cursor)); +} + +/* +** Close all cursors +*/ +static void closeAllCursors(Vdbe *p){ + int i; + for(i=0; i<p->nCursor; i++){ + sqliteVdbeCleanupCursor(&p->aCsr[i]); + } + sqliteFree(p->aCsr); + p->aCsr = 0; + p->nCursor = 0; +} + +/* +** Clean up the VM after execution. +** +** This routine will automatically close any cursors, lists, and/or +** sorters that were left open. It also deletes the values of +** variables in the azVariable[] array. +*/ +static void Cleanup(Vdbe *p){ + int i; + sqliteVdbePopStack(p, p->tos+1); + closeAllCursors(p); + if( p->aMem ){ + for(i=0; i<p->nMem; i++){ + if( p->aMem[i].s.flags & STK_Dyn ){ + sqliteFree(p->aMem[i].z); + } + } + } + sqliteFree(p->aMem); + p->aMem = 0; + p->nMem = 0; + if( p->pList ){ + sqliteVdbeKeylistFree(p->pList); + p->pList = 0; + } + sqliteVdbeSorterReset(p); + if( p->pFile ){ + if( p->pFile!=stdin ) fclose(p->pFile); + p->pFile = 0; + } + if( p->azField ){ + sqliteFree(p->azField); + p->azField = 0; + } + p->nField = 0; + if( p->zLine ){ + sqliteFree(p->zLine); + p->zLine = 0; + } + p->nLineAlloc = 0; + sqliteVdbeAggReset(&p->agg); + if( p->aSet ){ + for(i=0; i<p->nSet; i++){ + sqliteHashClear(&p->aSet[i].hash); + } + } + sqliteFree(p->aSet); + p->aSet = 0; + p->nSet = 0; + if( p->keylistStack ){ + int ii; + for(ii = 0; ii < p->keylistStackDepth; ii++){ + sqliteVdbeKeylistFree(p->keylistStack[ii]); + } + sqliteFree(p->keylistStack); + p->keylistStackDepth = 0; + p->keylistStack = 0; + } + sqliteFree(p->zErrMsg); + p->zErrMsg = 0; +} + +/* +** Clean up a VDBE after execution but do not delete the VDBE just yet. +** Write any error messages into *pzErrMsg. Return the result code. +** +** After this routine is run, the VDBE should be ready to be executed +** again. +*/ +int sqliteVdbeReset(Vdbe *p, char **pzErrMsg){ + sqlite *db = p->db; + int i; + + if( p->magic!=VDBE_MAGIC_RUN && p->magic!=VDBE_MAGIC_HALT ){ + sqliteSetString(pzErrMsg, sqlite_error_string(SQLITE_MISUSE), (char*)0); + return SQLITE_MISUSE; + } + if( p->zErrMsg ){ + if( pzErrMsg && *pzErrMsg==0 ){ + *pzErrMsg = p->zErrMsg; + }else{ + sqliteFree(p->zErrMsg); + } + p->zErrMsg = 0; + } + Cleanup(p); + if( p->rc!=SQLITE_OK ){ + switch( p->errorAction ){ + case OE_Abort: { + if( !p->undoTransOnError ){ + for(i=0; i<db->nDb; i++){ + if( db->aDb[i].pBt ){ + sqliteBtreeRollbackCkpt(db->aDb[i].pBt); + } + } + break; + } + /* Fall through to ROLLBACK */ + } + case OE_Rollback: { + sqliteRollbackAll(db); + db->flags &= ~SQLITE_InTrans; + db->onError = OE_Default; + break; + } + default: { + if( p->undoTransOnError ){ + sqliteRollbackAll(db); + db->flags &= ~SQLITE_InTrans; + db->onError = OE_Default; + } + break; + } + } + sqliteRollbackInternalChanges(db); + } + for(i=0; i<db->nDb; i++){ + if( db->aDb[i].pBt && db->aDb[i].inTrans==2 ){ + sqliteBtreeCommitCkpt(db->aDb[i].pBt); + db->aDb[i].inTrans = 1; + } + } + assert( p->tos<p->pc || sqlite_malloc_failed==1 ); +#ifdef VDBE_PROFILE + { + FILE *out = fopen("vdbe_profile.out", "a"); + if( out ){ + int i; + fprintf(out, "---- "); + for(i=0; i<p->nOp; i++){ + fprintf(out, "%02x", p->aOp[i].opcode); + } + fprintf(out, "\n"); + for(i=0; i<p->nOp; i++){ + fprintf(out, "%6d %10lld %8lld ", + p->aOp[i].cnt, + p->aOp[i].cycles, + p->aOp[i].cnt>0 ? p->aOp[i].cycles/p->aOp[i].cnt : 0 + ); + sqliteVdbePrintOp(out, i, &p->aOp[i]); + } + fclose(out); + } + } +#endif + p->magic = VDBE_MAGIC_INIT; + return p->rc; +} + +/* +** Clean up and delete a VDBE after execution. Return an integer which is +** the result code. Write any error message text into *pzErrMsg. +*/ +int sqliteVdbeFinalize(Vdbe *p, char **pzErrMsg){ + int rc; + sqlite *db; + + if( p->magic!=VDBE_MAGIC_RUN && p->magic!=VDBE_MAGIC_HALT ){ + sqliteSetString(pzErrMsg, sqlite_error_string(SQLITE_MISUSE), (char*)0); + return SQLITE_MISUSE; + } + db = p->db; + rc = sqliteVdbeReset(p, pzErrMsg); + sqliteVdbeDelete(p); + if( db->want_to_close && db->pVdbe==0 ){ + sqlite_close(db); + } + return rc; +} + +/* +** Set the values of all variables. Variable $1 in the original SQL will +** be the string azValue[0]. $2 will have the value azValue[1]. And +** so forth. If a value is out of range (for example $3 when nValue==2) +** then its value will be NULL. +** +** This routine overrides any prior call. +*/ +int sqlite_bind(sqlite_vm *pVm, int i, const char *zVal, int len, int copy){ + Vdbe *p = (Vdbe*)pVm; + if( p->magic!=VDBE_MAGIC_RUN || p->pc!=0 ){ + return SQLITE_MISUSE; + } + if( i<1 || i>p->nVar ){ + return SQLITE_RANGE; + } + i--; + if( p->abVar[i] ){ + sqliteFree(p->azVar[i]); + } + if( zVal==0 ){ + copy = 0; + len = 0; + } + if( len<0 ){ + len = strlen(zVal)+1; + } + if( copy ){ + p->azVar[i] = sqliteMalloc( len ); + if( p->azVar[i] ) memcpy(p->azVar[i], zVal, len); + }else{ + p->azVar[i] = (char*)zVal; + } + p->abVar[i] = copy; + p->anVar[i] = len; + return SQLITE_OK; +} + + +/* +** Delete an entire VDBE. +*/ +void sqliteVdbeDelete(Vdbe *p){ + int i; + if( p==0 ) return; + Cleanup(p); + if( p->pPrev ){ + p->pPrev->pNext = p->pNext; + }else{ + assert( p->db->pVdbe==p ); + p->db->pVdbe = p->pNext; + } + if( p->pNext ){ + p->pNext->pPrev = p->pPrev; + } + p->pPrev = p->pNext = 0; + if( p->nOpAlloc==0 ){ + p->aOp = 0; + p->nOp = 0; + } + for(i=0; i<p->nOp; i++){ + if( p->aOp[i].p3type==P3_DYNAMIC ){ + sqliteFree(p->aOp[i].p3); + } + } + for(i=0; i<p->nVar; i++){ + if( p->abVar[i] ) sqliteFree(p->azVar[i]); + } + sqliteFree(p->aOp); + sqliteFree(p->aLabel); + sqliteFree(p->aStack); + p->magic = VDBE_MAGIC_DEAD; + sqliteFree(p); +} + +/* +** Convert an integer in between the native integer format and +** the bigEndian format used as the record number for tables. +** +** The bigEndian format (most significant byte first) is used for +** record numbers so that records will sort into the correct order +** even though memcmp() is used to compare the keys. On machines +** whose native integer format is little endian (ex: i486) the +** order of bytes is reversed. On native big-endian machines +** (ex: Alpha, Sparc, Motorola) the byte order is the same. +** +** This function is its own inverse. In other words +** +** X == byteSwap(byteSwap(X)) +*/ +int sqliteVdbeByteSwap(int x){ + union { + char zBuf[sizeof(int)]; + int i; + } ux; + ux.zBuf[3] = x&0xff; + ux.zBuf[2] = (x>>8)&0xff; + ux.zBuf[1] = (x>>16)&0xff; + ux.zBuf[0] = (x>>24)&0xff; + return ux.i; +} + +/* +** If a MoveTo operation is pending on the given cursor, then do that +** MoveTo now. Return an error code. If no MoveTo is pending, this +** routine does nothing and returns SQLITE_OK. +*/ +int sqliteVdbeCursorMoveto(Cursor *p){ + if( p->deferredMoveto ){ + int res; + extern int sqlite_search_count; + sqliteBtreeMoveto(p->pCursor, (char*)&p->movetoTarget, sizeof(int), &res); + p->lastRecno = keyToInt(p->movetoTarget); + p->recnoIsValid = res==0; + if( res<0 ){ + sqliteBtreeNext(p->pCursor, &res); + } + sqlite_search_count++; + p->deferredMoveto = 0; + } + return SQLITE_OK; +} diff --git a/ext/sqlite/tests/bug26911.phpt b/ext/sqlite/tests/bug26911.phpt new file mode 100644 index 0000000000..e81aeae763 --- /dev/null +++ b/ext/sqlite/tests/bug26911.phpt @@ -0,0 +1,10 @@ +--TEST-- +Bug #26911 (crash when fetching data from empty queries) +--FILE-- +<?php + $db = sqlite_open(":memory:"); + $a = sqlite_query($db, " "); + echo "I am ok\n"; +?> +--EXPECT-- +I am ok diff --git a/ext/tidy/CREDITS b/ext/tidy/CREDITS new file mode 100644 index 0000000000..1c77b2ff3b --- /dev/null +++ b/ext/tidy/CREDITS @@ -0,0 +1,2 @@ +tidy +John Coggeshall, Ilia Alshanetsky diff --git a/ext/tidy/README b/ext/tidy/README new file mode 100644 index 0000000000..2d4e015176 --- /dev/null +++ b/ext/tidy/README @@ -0,0 +1,122 @@ + +README FOR ext/tidy by John Coggeshall <john@php.net> + +Tidy Version: 0.7b + +Tidy is an extension based on Libtidy (http://tidy.sf.net/) and allows a PHP developer +to clean, repair, and traverse HTML, XHTML, and XML documents -- including ones with +embedded scripting languages such as PHP or ASP within them using OO constructs. + +--------------------------------------------------------------------------------------- +!! Important Note !! +--------------------------------------------------------------------------------------- +At this time libtidy has a small memory leak inside the ParseConfigFileEnc() function +used to load configuration from a file. If you intend to use this functionality apply +the "libtidy.txt" patch (cd tidy/src/; patch -p0 < libtidy.txt) to libtidy sources and +then recompile libtidy. +--------------------------------------------------------------------------------------- + +The Tidy extension has two separate APIs, one for general parsing, cleaning, and +repairing and another for document traversal. The general API is provided below: + + tidy_create() Reinitialize the tidy engine + tidy_parse_file($file) Parse the document stored in $file + tidy_parse_string($str) Parse the string stored in $str + + tidy_clean_repair() Clean and repair the document + tidy_diagnose() Diagnose a parsed document + + tidy_setopt($opt, $val) Set a configuration option $opt to $val + tidy_getopt($opt) Retrieve a configuration option + + ** note: $opt is a string representing the option. Although no formal + documentation yet exists for PHP, you can find a description of many + of them at http://www.w3.org/People/Raggett/tidy/ and a list of supported + options in the phpinfo(); output** + + tidy_get_output() Return the cleaned tidy HTML as a string + tidy_get_error_buffer() Return a log of the errors and warnings + returned by tidy + + tidy_get_release() Return the Libtidy release date + tidy_get_status() Return the status of the document + tidy_get_html_ver() Return the major HTML version detected for + the document; + + tidy_is_xhtml() Determines if the document is XHTML + tidy_is_xml() Determines if the document is a generic XML + + tidy_error_count() Returns the number of errors in the document + tidy_warning_count() Returns the number of warnings in the document + tidy_access_count() Returns the number of accessibility-related + warnings in the document. + tidy_config_count() Returns the number of configuration errors found + + tidy_load_config($file) Loads the specified configuration file + tidY_load_config_enc($file, + $enc) Loads the specified config file using the specified + character encoding + tidy_set_encoding($enc) Sets the current character encoding for the document + tidy_save_config($file) Saves the current config to $file + + +Beyond these general-purpose API functions, Tidy also supports the following +functions which are used to retrieve an object for document traversal: + + tidy_get_root() Returns an object starting at the root of the + document + tidy_get_head() Returns an object starting at the <HEAD> tag + tidy_get_html() Returns an object starting at the <HTML> tag + tidy_get_body() Returns an object starting at the <BODY> tag + +All Navigation of the specified document is done via the PHP5 object constructs. +There are two types of objects which Tidy can create. The first is TidyNode, which +represents HTML Tags, Text, and more (see the TidyNode_Type Constants). The second +is TidyAttr, which represents an attribute within an HTML tag (TidyNode). The +functionality of these objects is represented by the following schema: + +class TidyNode { + + public $name; // name of node (i.e. HEAD) + public $value; // value of node (everything between tags) + public $type; // type of node (text, php, asp, etc.) + public $id; // id of node (i.e. TIDY_TAG_HEAD) + + public function attributes(); // an array of attributes (see TidyAttr) + public function children(); // an array of child nodes + + function has_siblings(); // any sibling nodes? + function has_children(); // any child nodes? + + function is_comment(); // is node a comment? + function is_xhtml(); // is document XHTML? + function is_xml(); // is document generic XML (not HTML/XHTML) + function is_text(); // is node text? + function is_html(); // is node an HTML tag? + + function is_jste(); // is jste block? + function is_asp(); // is Microsoft ASP block? + function is_php(); // is PHP block? + + function next(); // returns next node + function prev(); // returns prev node + + /* Searches for a particular attribute in the current node based + on node ID. If found returns a TidyAttr object for it */ + function get_attr($attr_id); + + /* +} + +class TidyAttr { + + public $name; // attribute name i.e. HREF + public $value; // attribute value + public $id; // attribute id i.e. TIDY_ATTR_HREF + +} + +Examples of using these objects to navigate the tree can be found in the examples/ +directory (I suggest looking at urlgrab.php and dumpit.php) + +E-mail thoughts, suggestions, patches, etc. to <john@php.net> diff --git a/ext/tidy/TODO b/ext/tidy/TODO new file mode 100644 index 0000000000..699c207dcb --- /dev/null +++ b/ext/tidy/TODO @@ -0,0 +1,3 @@ +TODO + + - Implement get_nodes() method diff --git a/ext/tidy/config.m4 b/ext/tidy/config.m4 new file mode 100644 index 0000000000..069b3ee6b4 --- /dev/null +++ b/ext/tidy/config.m4 @@ -0,0 +1,35 @@ +dnl +dnl $Id$ +dnl + +PHP_ARG_WITH(tidy,for TIDY support, +[ --with-tidy[=DIR] Include TIDY support]) + +if test "$PHP_TIDY" != "no"; then + PHP_NEW_EXTENSION(tidy, tidy.c, $ext_shared) + if test "$PHP_TIDY" != "yes"; then + TIDY_SEARCH_DIRS=$PHP_TIDY + else + TIDY_SEARCH_DIRS="/usr/local /usr" + fi + for i in $TIDY_SEARCH_DIRS; do + if test -f $i/include/tidy/tidy.h; then + TIDY_DIR=$i + TIDY_INCDIR=$i/include/tidy + elif test -f $i/include/tidy.h; then + TIDY_DIR=$i + TIDY_INCDIR=$i/include + fi + done + + if test -z "$TIDY_DIR"; then + AC_MSG_ERROR(Cannot find libtidy) + fi + + TIDY_LIBDIR=$TIDY_DIR/lib + + AC_DEFINE(HAVE_TIDY,1,[ ]) + PHP_SUBST(TIDY_SHARED_LIBADD) + PHP_ADD_LIBRARY_WITH_PATH(tidy, $TIDY_LIBDIR, TIDY_SHARED_LIBADD) + PHP_ADD_INCLUDE($TIDY_INCDIR) +fi diff --git a/ext/tidy/examples/cleanhtml.php b/ext/tidy/examples/cleanhtml.php new file mode 100644 index 0000000000..9d054cda4f --- /dev/null +++ b/ext/tidy/examples/cleanhtml.php @@ -0,0 +1,38 @@ +<?php + + /* + * cleanhtml.php + * + * A simple script to clean and repair HTML,XHTML,PHP,ASP,etc. documents + * if no file is provided, it reads from standard input. + * + * By: John Coggeshall <john@php.net> + * + * Usage: php cleanhtml.php [filename] + * + */ + + if(!isset($_SERVER['argv'][1])) { + $data = file_get_contents("php://stdin"); + tidy_parse_string($data); + } else { + tidy_parse_file($_SERVER['argv'][1]); + } + + tidy_clean_repair(); + + if(tidy_warning_count() || + tidy_error_count()) { + + echo "\n\nThe following errors or warnings occured:\n"; + echo tidy_get_error_buffer(); + echo "\n"; + } + + echo tidy_get_output(); + +?> + + + +
\ No newline at end of file diff --git a/ext/tidy/examples/dumpit.php b/ext/tidy/examples/dumpit.php new file mode 100644 index 0000000000..e77b7b9323 --- /dev/null +++ b/ext/tidy/examples/dumpit.php @@ -0,0 +1,93 @@ +<?php + /* + * dumpit.php + * + * a command-line script which dumps the given HTML, PHP, ASP, XHTML, etc. + * file as it is represented in the document model. + * + * By: John Coggeshall <john@php.net> + * + * Usage; php dumpit.php <filename> + */ + + tidy_parse_file($_SERVER['argv'][1]); + + /* Optionally you can do this here if you want to fix up the document */ + + /* tidy_clean_repair(); */ + + $tree = tidy_get_root(); + dump_tree($tree); + echo "\n"; + + function node_type($type) { + + switch($type) { + + case TIDY_NODETYPE_ROOT: return "Root Node"; + case TIDY_NODETYPE_DOCTYPE: return "DocType Node"; + case TIDY_NODETYPE_COMMENT: return "Comment Node"; + case TIDY_NODETYPE_PROCINS: return "ProcIns Node"; + case TIDY_NODETYPE_TEXT: return "Text Node"; + case TIDY_NODETYPE_START: return "Start Node"; + case TIDY_NODETYPE_END: return "End Node"; + case TIDY_NODETYPE_STARTEND: return "Start/End Node"; + case TIDY_NODETYPE_CDATA: return "CDATA Node"; + case TIDY_NODETYPE_SECTION: return "Section Node"; + case TIDY_NODETYPE_ASP: return "ASP Source Code Node"; + case TIDY_NODETYPE_PHP: return "PHP Source Code Node"; + case TIDY_NODETYPE_JSTE: return "JSTE Source Code"; + case TIDY_NODETYPE_XMLDECL: return "XML Declaration Node"; + default: return "Unknown Node"; + } + } + + function do_leaf($string, $indent) { + for($i = 0; $i < $indent; $i++) { + echo " "; + } + echo $string; + } + + function dump_tree($node, $indent = 0) { + if($node) { + /* Put something there if the node name is empty */ + $nodename = trim(strtoupper($node->name)); + $nodename = (empty($nodename)) ? "[EMPTY]" : $nodename; + + /* Generate the Node, and a pretty name for it */ + do_leaf(" + $nodename (".node_type($node->type).")\n", $indent); + + /* Check to see if this node is a text node. Text nodes are + generated by start/end tags and contain the text in between. + i.e. <B>foo</B> will create a text node with $node->value + equal to 'foo' */ + if($node->type == TIDY_NODETYPE_TEXT) { + do_leaf(" |\n", $indent); + do_leaf(" +---- Value: '{$node->value}'\n", $indent); + } + + /* Any attributes on this node? */ + if(count($node->attributes())) { + do_leaf(" |\n", $indent); + do_leaf(" +---- Attributes\n", $indent); + + /* Cycle through the attributes and display them and their values. */ + foreach($node->attributes() as $attrib) { + do_leaf(" +--{$attrib->name}\n", $indent); + do_leaf(" | +-- Value: {$attrib->value}\n", $indent); + } + } + + /* Recurse along the children to generate the remaining nodes */ + if($node->has_children()) { + foreach($node->children() as $child) { + dump_tree($child, $indent + 3); + } + } + } + } + + echo tidy_get_output(); + +?>
\ No newline at end of file diff --git a/ext/tidy/examples/urlgrab.php b/ext/tidy/examples/urlgrab.php new file mode 100644 index 0000000000..7896792ea5 --- /dev/null +++ b/ext/tidy/examples/urlgrab.php @@ -0,0 +1,60 @@ +<?php + + /* + * urlgrab.php + * + * A simple command-line utility to extract all of the URLS contained + * within <A HREF> tags from a document. + * + * By: John Coggeshall <john@php.net> + * + * Usage: php urlgrab.php <file> + * + */ + + /* Parse the document */ + tidy_parse_file($_SERVER['argv'][1]); + + /* Fix up the document */ + tidy_clean_repair(); + + /* Get an object representing everything from the <HTML> tag in */ + $html = tidy_get_html(); + + /* Traverse the document tree */ + print_r(get_links($html)); + + function get_links($node) { + $urls = array(); + + /* Check to see if we are on an <A> tag or not */ + if($node->id == TIDY_TAG_A) { + /* If we are, find the HREF attribute */ + $attrib = $node->get_attr(TIDY_ATTR_HREF); + if($attrib) { + /* Add the value of the HREF attrib to $urls */ + $urls[] = $attrib->value; + } + + } + + /* Are there any children? */ + if($node->has_children()) { + + /* Traverse down each child recursively */ + foreach($node->children() as $child) { + + /* Append the results from recursion to $urls */ + foreach(get_links($child) as $url) { + + $urls[] = $url; + + } + + } + } + + return $urls; + } + +?>
\ No newline at end of file diff --git a/ext/tidy/package.xml b/ext/tidy/package.xml new file mode 100644 index 0000000000..266cc5d7c6 --- /dev/null +++ b/ext/tidy/package.xml @@ -0,0 +1,64 @@ +<?xml version="1.0" encoding="ISO-8859-1" ?> +<!DOCTYPE package SYSTEM "../pear/package.dtd"> +<package> + <name>tidy</name> + <summary>Tidy HTML Repairing and Parsing</summary> + <maintainers> + <maintainer> + <user>john</user> + <name>John Coggeshall</name> + <email>john@php.net</email> + <role>lead</role> + </maintainer> + <maintainer> + <user>iliaa</user> + <name>Ilia Alshanetsky</name> + <email>ilia@php.net</email> + <role>lead</role> + </maintainer> + </maintainers> + <description> +Tidy is a binding for the Tidy HTML clean and repair utility which +allows you to not only clean and otherwise manipluate HTML documents, +but also traverse the document tree using the Zend Engine 2 OO semantics. + </description> + <license>PHP</license> + <release> + <state>stable</state> + <version>1.0</version> + <date>2003-11-13</date> + <notes> + Fixed a few PHP5-specific bugs when working with node objects. + </notes> + <configureoptions> + <configureoption name="with-tidy" default="autodetect" prompt="Tidy library installation dir?"/> + </configureoptions> + <filelist> + <file role="src" name="config.m4"/> + <file role="src" name="tidy.c"/> + <file role="src" name="php_tidy.h"/> + + <file role="doc" name="CREDITS"/> + <file role="doc" name="README"/> + <file role="doc" name="TODO"/> + <file role="doc" name="examples/cleanhtml.php"/> + <file role="doc" name="examples/dumpit.php"/> + <file role="doc" name="examples/urlgrab.php"/> + + <file role="test" name="tests/001.phpt"/> + <file role="test" name="tests/002.phpt"/> + <file role="test" name="tests/003.phpt"/> + <file role="test" name="tests/004.phpt"/> + <file role="test" name="tests/005.phpt"/> + <file role="test" name="tests/005.html"/> + <file role="test" name="tests/006.phpt"/> + <file role="test" name="tests/007.phpt"/> + </filelist> + <deps> + <dep type="php" rel="ge">4.3.0</dep> + </deps> + </release> +</package> +<!-- +vim:et:ts=1:sw=1 +--> diff --git a/ext/tidy/php_tidy.h b/ext/tidy/php_tidy.h new file mode 100644 index 0000000000..e170d37086 --- /dev/null +++ b/ext/tidy/php_tidy.h @@ -0,0 +1,238 @@ +/* + +----------------------------------------------------------------------+ + | PHP Version 4 | + +----------------------------------------------------------------------+ + | Copyright (c) 1997-2003 The PHP Group | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.0 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | http://www.php.net/license/3_0.txt. | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ + | Author: John Coggeshall <john@php.net> | + | Ilia Alshanetsky <ilia@php.net> | + +----------------------------------------------------------------------+ +*/ + +/* $Id$ */ + +#ifndef PHP_TIDY_H +#define PHP_TIDY_H + +extern zend_module_entry tidy_module_entry; +#define phpext_tidy_ptr &tidy_module_entry + +#ifdef PHP_WIN32 +#define PHP_TIDY_API __declspec(dllexport) +#else +#define PHP_TIDY_API +#endif + +#ifdef ZTS +#include "TSRM.h" +#endif + +#include "tidyenum.h" +#include "tidy.h" +#include "buffio.h" + +#ifdef ZTS +#define TG(v) TSRMG(tidy_globals_id, zend_tidy_globals *, v) +#else +#define TG(v) (tidy_globals.v) +#endif + +#ifndef TRUE +#define TRUE 1 +#define FALSE 0 +#endif + +#define TIDY_RV_FALSE(__t) __t->type = IS_BOOL; __t->value.lval = FALSE +#define TIDY_RV_TRUE(__t) __t->type = IS_BOOL; __t->value.lval = TRUE + +#define REMOVE_NEWLINE(_z) _z->value.str.val[_z->value.str.len-1] = '\0'; _z->value.str.len--; + +#define TIDY_TAG_CONST(tag) REGISTER_LONG_CONSTANT("TIDY_TAG_" #tag, TidyTag_##tag, CONST_CS | CONST_PERSISTENT) +#define TIDY_ATTR_CONST(attr) REGISTER_LONG_CONSTANT("TIDY_ATTR_" #attr, TidyAttr_##attr, CONST_CS | CONST_PERSISTENT) +#define TIDY_NODE_CONST(name, type) REGISTER_LONG_CONSTANT("TIDY_NODETYPE_" #name, TidyNode_##type, CONST_CS | CONST_PERSISTENT) + +#define PHP_ME_MAPPING(name, func_name, arg_types) \ + ZEND_NAMED_FE(name, ZEND_FN(func_name), arg_types) + +#define PHP_NODE_METHOD(name) PHP_FUNCTION(tnm_ ##name) +#define PHP_ATTR_METHOD(name) PHP_FUNCTION(tam_ ##name) +#define PHP_NODE_ME(name, param) PHP_ME_MAPPING(name, tnm_ ##name, param) +#define PHP_ATTR_ME(name, param) PHP_ME_MAPPING(name, tam_ ##name, param) + + + +#define TIDY_REGISTER_OBJECT(_type, _object, _ptr) \ + { \ + tidy_object *obj; \ + obj = (tidy_object*)zend_object_store_get_object(_object TSRMLS_CC); \ + obj->type = is_ ## _type; \ + obj->u._type = _ptr; \ + } + +#define REGISTER_TIDY_CLASS(name, parent) \ + { \ + zend_class_entry ce; \ + INIT_CLASS_ENTRY(ce, "tidy_" # name, tidy_funcs_ ## name); \ + ce.create_object = tidy_object_new_ ## name; \ + tidy_ce_ ## name = zend_register_internal_class_ex(&ce, parent, NULL TSRMLS_CC); \ + tidy_ce_ ## name->ce_flags |= ZEND_ACC_FINAL_CLASS; \ + memcpy(&tidy_object_handlers_ ## name, zend_get_std_object_handlers(), sizeof(zend_object_handlers)); \ + tidy_object_handlers_ ## name.clone_obj = NULL; \ + } + +#define GET_THIS_CONTAINER() \ + PHPTidyObj *obj; \ + { \ + zval *object = getThis(); \ + obj = (PHPTidyObj *)zend_object_store_get_object(object TSRMLS_CC); \ + } + +#define INSTANCIATE_NODE(_zval, _container, _node) \ + tidy_instanciate(tidy_ce_node, _zval TSRMLS_CC); \ + _container = (PHPTidyObj *) zend_object_store_get_object(_zval TSRMLS_CC); \ + _container->node = _node; \ + _container->attr = NULL; \ + _container->type = is_node; \ + tidy_add_default_properities(_container, is_node TSRMLS_CC); + +#define INSTANCIATE_ATTR(_zval, _container, _attr) \ + tidy_instanciate(tidy_ce_attr, _zval TSRMLS_CC); \ + _container = (PHPTidyObj *) zend_object_store_get_object(_zval TSRMLS_CC); \ + _container->node = NULL; \ + _container->attr = _attr; \ + _container->type = is_attr; \ + tidy_add_default_properities(_container, is_attr TSRMLS_CC); + +#define PHP_NODE_METHOD_IS_TYPE(_type, _const) \ +PHP_NODE_METHOD(is_ ##_type) \ +{ \ + GET_THIS_CONTAINER(); \ + if(tidyNodeGetType(obj->node) == _const) {\ + RETURN_TRUE; \ + } else { \ + RETURN_FALSE; \ + } \ +} + +typedef enum { + is_node, + is_attr +} tidy_obj_type; + +struct _PHPTidyDoc { + + TidyDoc doc; + TidyBuffer *errbuf; + zend_bool parsed; +}; + +typedef struct _PHPTidyDoc PHPTidyDoc; +typedef struct _PHPTidyObj PHPTidyObj; + +struct _PHPTidyObj { + zend_object std; + TidyNode node; + TidyAttr attr; + tidy_obj_type type; +}; + + +PHP_MINIT_FUNCTION(tidy); +PHP_MSHUTDOWN_FUNCTION(tidy); +PHP_RINIT_FUNCTION(tidy); +PHP_RSHUTDOWN_FUNCTION(tidy); +PHP_MINFO_FUNCTION(tidy); + +PHP_FUNCTION(tidy_setopt); +PHP_FUNCTION(tidy_getopt); +PHP_FUNCTION(tidy_parse_string); +PHP_FUNCTION(tidy_parse_file); +PHP_FUNCTION(tidy_clean_repair); +PHP_FUNCTION(tidy_repair_string); +PHP_FUNCTION(tidy_repair_file); +PHP_FUNCTION(tidy_diagnose); +PHP_FUNCTION(tidy_get_output); +PHP_FUNCTION(tidy_get_error_buffer); +PHP_FUNCTION(tidy_get_release); +PHP_FUNCTION(tidy_reset_config); +PHP_FUNCTION(tidy_get_config); +PHP_FUNCTION(tidy_get_status); +PHP_FUNCTION(tidy_get_html_ver); +PHP_FUNCTION(tidy_is_xhtml); +PHP_FUNCTION(tidy_is_xml); +PHP_FUNCTION(tidy_error_count); +PHP_FUNCTION(tidy_warning_count); +PHP_FUNCTION(tidy_access_count); +PHP_FUNCTION(tidy_config_count); +PHP_FUNCTION(tidy_load_config); +PHP_FUNCTION(tidy_load_config_enc); +PHP_FUNCTION(tidy_set_encoding); +PHP_FUNCTION(tidy_save_config); + +PHP_FUNCTION(tidy_get_root); +PHP_FUNCTION(tidy_get_html); +PHP_FUNCTION(tidy_get_head); +PHP_FUNCTION(tidy_get_body); + +PHP_NODE_METHOD(__construct); +PHP_NODE_METHOD(attributes); +PHP_NODE_METHOD(children); + +PHP_NODE_METHOD(has_children); +PHP_NODE_METHOD(has_siblings); +PHP_NODE_METHOD(is_comment); +PHP_NODE_METHOD(is_html); +PHP_NODE_METHOD(is_xhtml); +PHP_NODE_METHOD(is_xml); +PHP_NODE_METHOD(is_text); +PHP_NODE_METHOD(is_jste); +PHP_NODE_METHOD(is_asp); +PHP_NODE_METHOD(is_php); + +PHP_NODE_METHOD(next); +PHP_NODE_METHOD(prev); +PHP_NODE_METHOD(get_attr); +PHP_NODE_METHOD(get_nodes); + +/* resource dtor */ +void dtor_TidyDoc(zend_rsrc_list_entry * TSRMLS_DC); + +/* constant register helpers */ +void _php_tidy_register_nodetypes(INIT_FUNC_ARGS); +void _php_tidy_register_tags(INIT_FUNC_ARGS); +void _php_tidy_register_attributes(INIT_FUNC_ARGS); + +ZEND_BEGIN_MODULE_GLOBALS(tidy) + PHPTidyDoc *tdoc; + zend_bool used; + char *default_config; +ZEND_END_MODULE_GLOBALS(tidy) + +#ifdef ZTS +#define TG(v) TSRMG(tidy_globals_id, zend_tidy_globals *, v) +#else +#define TG(v) (tidy_globals.v) +#endif + + + + +#endif + + +/* + * Local variables: + * tab-width: 4 + * c-basic-offset: 4 + * End: + * vim600: noet sw=4 ts=4 fdm=marker + * vim<600: noet sw=4 ts=4 + */ diff --git a/ext/tidy/tests/001.phpt b/ext/tidy/tests/001.phpt new file mode 100644 index 0000000000..17da6f9874 --- /dev/null +++ b/ext/tidy/tests/001.phpt @@ -0,0 +1,24 @@ +--TEST-- +Check for tidy presence +--SKIPIF-- +<?php if (!extension_loaded("tidy")) print "skip"; ?> +--POST-- +--GET-- +--INI-- +--FILE-- +<?php +echo "tidy extension is available"; +/* + you can add regression tests for your extension here + + the output of your test code has to be equal to the + text in the --EXPECT-- section below for the tests + to pass, differences between the output and the + expected text are interpreted as failure + + see php4/README.TESTING for further information on + writing regression tests +*/ +?> +--EXPECT-- +tidy extension is available diff --git a/ext/tidy/tests/002.phpt b/ext/tidy/tests/002.phpt new file mode 100644 index 0000000000..83456091f7 --- /dev/null +++ b/ext/tidy/tests/002.phpt @@ -0,0 +1,22 @@ +--TEST-- +tidy_parse_string() +--SKIPIF-- +<?php if (!extension_loaded("tidy")) print "skip"; ?> +--POST-- +--GET-- +--INI-- +--FILE-- +<?php + tidy_parse_string("<HTML></HTML>"); + + echo tidy_get_output(); + +?> +--EXPECT-- +<html> +<head> +<title></title> +</head> +<body> +</body> +</html>
\ No newline at end of file diff --git a/ext/tidy/tests/003.phpt b/ext/tidy/tests/003.phpt new file mode 100644 index 0000000000..b008acecdb --- /dev/null +++ b/ext/tidy/tests/003.phpt @@ -0,0 +1,25 @@ +--TEST-- +tidy_clean_repair() +--SKIPIF-- +<?php if (!extension_loaded("tidy")) print "skip"; ?> +--POST-- +--GET-- +--INI-- +--FILE-- +<?php + + tidy_parse_string("<HTML></HTML>"); + tidy_clean_repair(); + + echo tidy_get_output(); + +?> +--EXPECT-- +<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2//EN"> +<html> +<head> +<title></title> +</head> +<body> +</body> +</html> diff --git a/ext/tidy/tests/004.phpt b/ext/tidy/tests/004.phpt new file mode 100644 index 0000000000..ed60a39b27 --- /dev/null +++ b/ext/tidy/tests/004.phpt @@ -0,0 +1,21 @@ +--TEST-- +tidy_diagnose() +--SKIPIF-- +<?php if (!extension_loaded("tidy")) print "skip"; ?> +--POST-- +--GET-- +--INI-- +--FILE-- +<?php + tidy_parse_string("<HTML></HTML>"); + tidy_diagnose(); + echo tidy_get_error_buffer(); + +?> +--EXPECT-- + +line 1 column 1 - Warning: missing <!DOCTYPE> declaration +line 1 column 7 - Warning: discarding unexpected </html> +line 1 column 14 - Warning: inserting missing 'title' element +Info: Document content looks like HTML 3.2 +3 warnings, 0 errors were found!
\ No newline at end of file diff --git a/ext/tidy/tests/005.html b/ext/tidy/tests/005.html new file mode 100644 index 0000000000..8c17451f91 --- /dev/null +++ b/ext/tidy/tests/005.html @@ -0,0 +1 @@ +<HTML></HTML> diff --git a/ext/tidy/tests/005.phpt b/ext/tidy/tests/005.phpt new file mode 100644 index 0000000000..d69a726c8f --- /dev/null +++ b/ext/tidy/tests/005.phpt @@ -0,0 +1,23 @@ +--TEST-- +tidy_parse_file() +--SKIPIF-- +<?php if (!extension_loaded("tidy")) print "skip"; ?> +--POST-- +--GET-- +--INI-- +--FILE-- +<?php + + tidy_parse_file("ext/tidy/tests/005.html"); + + echo tidy_get_output(); + +?> +--EXPECT-- +<html> +<head> +<title></title> +</head> +<body> +</body> +</html>
\ No newline at end of file diff --git a/ext/tidy/tests/006.phpt b/ext/tidy/tests/006.phpt new file mode 100644 index 0000000000..7ea28e79c5 --- /dev/null +++ b/ext/tidy/tests/006.phpt @@ -0,0 +1,21 @@ +--TEST-- +Verbose tidy_get_error_buffer() +--SKIPIF-- +<?php if (!extension_loaded("tidy")) print "skip"; ?> +--POST-- +--GET-- +--INI-- +--FILE-- +<?php + + tidy_parse_string("<HTML><asd asdf></HTML>"); + + echo tidy_get_error_buffer(true); + +?> +--EXPECT-- +line 1 column 1 - Warning: missing <!DOCTYPE> declaration +line 1 column 7 - Error: <asd> is not recognized! +line 1 column 7 - Warning: discarding unexpected <asd> +line 1 column 17 - Warning: discarding unexpected </html> +line 1 column 7 - Warning: inserting missing 'title' element
\ No newline at end of file diff --git a/ext/tidy/tests/007.phpt b/ext/tidy/tests/007.phpt new file mode 100644 index 0000000000..9987677df6 --- /dev/null +++ b/ext/tidy/tests/007.phpt @@ -0,0 +1,36 @@ +--TEST-- +Verbose tidy_setopt() / tidy_getopt() +--SKIPIF-- +<?php if (!extension_loaded("tidy")) print "skip"; ?> +--POST-- +--GET-- +--INI-- +--FILE-- +<?php + + echo "Current Value of 'tidy-mark': "; + var_dump(tidy_getopt("tidy-mark")); + tidy_setopt($tidy, "tidy-mark", true); + echo "\nNew Value of 'tidy-mark': "; + var_dump(tidy_getopt("tidy-mark")); + echo "Current Value of 'error-file': "; + var_dump(tidy_getopt("error-file")); + tidy_setopt($tidy, "error-file", "foobar"); + echo "\nNew Value of 'error-file': "; + var_dump(tidy_getopt("error-file")); + echo "Current Value of 'tab-size': "; + var_dump(tidy_getopt("tab-size")); + tidy_setopt($tidy, "tab-size", 10); + echo "\nNew Value of 'tab-size': "; + var_dump(tidy_getopt("tab-size")); +?> +--EXPECT-- +Current Value of 'tidy-mark': bool(false) + +New Value of 'tidy-mark': bool(true) +Current Value of 'error-file': string(0) "" + +New Value of 'error-file': string(6) "foobar" +Current Value of 'tab-size': int(8) + +New Value of 'tab-size': int(10)
\ No newline at end of file diff --git a/ext/tidy/tidy.c b/ext/tidy/tidy.c new file mode 100644 index 0000000000..c67e29c271 --- /dev/null +++ b/ext/tidy/tidy.c @@ -0,0 +1,1689 @@ +/* + +----------------------------------------------------------------------+ + | PHP Version 4 | + +----------------------------------------------------------------------+ + | Copyright (c) 1997-2003 The PHP Group | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.0 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | http://www.php.net/license/3_0.txt. | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ + | Author: John Coggeshall <john@php.net> | + | Ilia Alshanetsky <ilia@php.net> | + +----------------------------------------------------------------------+ +*/ + +/* $Id$ */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "php.h" +#include "php_tidy.h" +#include "php_ini.h" +#include "ext/standard/info.h" +#include "Zend/zend_API.h" +#include "Zend/zend_hash.h" +#include "safe_mode.h" + +ZEND_DECLARE_MODULE_GLOBALS(tidy); + +#define TIDY_PARSED_CHECK() \ +if(!TG(tdoc)->parsed) { \ + php_error_docref(NULL TSRMLS_CC, E_WARNING, "A document must be parsed before executing this function."); \ + RETURN_FALSE; \ +} \ + +#define TIDY_SAFE_MODE_CHECK(filename) \ +if ((PG(safe_mode) && (!php_checkuid(filename, NULL, CHECKUID_CHECK_FILE_AND_DIR))) || php_check_open_basedir(filename TSRMLS_CC)) { \ + RETURN_FALSE; \ +} \ + +function_entry tidy_functions[] = { + PHP_FE(tidy_setopt, NULL) + PHP_FE(tidy_getopt, NULL) + PHP_FE(tidy_parse_string, NULL) + PHP_FE(tidy_parse_file, NULL) + PHP_FE(tidy_get_output, NULL) + PHP_FE(tidy_get_error_buffer, NULL) + PHP_FE(tidy_clean_repair, NULL) + PHP_FE(tidy_repair_string, NULL) + PHP_FE(tidy_repair_file, NULL) + PHP_FE(tidy_diagnose, NULL) + PHP_FE(tidy_get_release, NULL) + PHP_FE(tidy_get_config, NULL) + PHP_FE(tidy_get_status, NULL) + PHP_FE(tidy_get_html_ver, NULL) + PHP_FE(tidy_is_xhtml, NULL) + PHP_FE(tidy_is_xml, NULL) + PHP_FE(tidy_error_count, NULL) + PHP_FE(tidy_warning_count, NULL) + PHP_FE(tidy_access_count, NULL) + PHP_FE(tidy_config_count, NULL) + PHP_FE(tidy_load_config, NULL) + PHP_FE(tidy_load_config_enc, NULL) + PHP_FE(tidy_set_encoding, NULL) + PHP_FE(tidy_save_config, NULL) + +#ifdef ZEND_ENGINE_2 + PHP_FE(tidy_get_root, NULL) + PHP_FE(tidy_get_head, NULL) + PHP_FE(tidy_get_html, NULL) + PHP_FE(tidy_get_body, NULL) +#endif + + {NULL, NULL, NULL} +}; + +#ifdef ZEND_ENGINE_2 +#include "zend_default_classes.h" + +static void tidy_object_dtor(void *object, zend_object_handle handle TSRMLS_DC); +static void tidy_object_new(zend_class_entry *class_type, zend_object_handlers *handlers, zend_object_value *retval TSRMLS_DC); + +static zend_object_value tidy_object_new_node(zend_class_entry *class_type TSRMLS_DC); +static zend_object_value tidy_object_new_attr(zend_class_entry *class_type TSRMLS_DC); +static zend_object_value tidy_object_new_exception(zend_class_entry *class_type TSRMLS_DC); + +static zend_class_entry *tidy_get_ce_node(zval *object TSRMLS_DC); +static zend_class_entry *tidy_get_ce_attr(zval *object TSRMLS_DC); + +static zval * tidy_instanciate(zend_class_entry *pce, zval *object TSRMLS_DC); + +zend_class_entry *tidy_ce_node, *tidy_ce_attr, + *tidy_ce_exception; + +static zend_object_handlers tidy_object_handlers_node; +static zend_object_handlers tidy_object_handlers_attr; +static zend_object_handlers tidy_object_handlers_exception; + +function_entry tidy_funcs_node[] = { + + PHP_NODE_ME(__construct, NULL) + PHP_NODE_ME(attributes, NULL) + PHP_NODE_ME(children, NULL) + + PHP_NODE_ME(has_children, NULL) + PHP_NODE_ME(has_siblings, NULL) + PHP_NODE_ME(is_comment, NULL) + PHP_NODE_ME(is_html, NULL) + PHP_NODE_ME(is_text, NULL) + PHP_NODE_ME(is_jste, NULL) + PHP_NODE_ME(is_asp, NULL) + PHP_NODE_ME(is_php, NULL) + + PHP_NODE_ME(next, NULL) + PHP_NODE_ME(prev, NULL) + PHP_NODE_ME(get_attr, NULL) + /*PHP_NODE_ME(get_nodes, NULL) TODO */ + {NULL, NULL, NULL} +}; + +function_entry tidy_funcs_attr[] = { + {NULL, NULL, NULL} +}; + +function_entry tidy_funcs_exception[] = { + {NULL, NULL, NULL} +}; + +#endif + +zend_module_entry tidy_module_entry = { +#if ZEND_MODULE_API_NO >= 20010901 + STANDARD_MODULE_HEADER, +#endif + "tidy", + tidy_functions, + PHP_MINIT(tidy), + PHP_MSHUTDOWN(tidy), + PHP_RINIT(tidy), + NULL, + PHP_MINFO(tidy), +#if ZEND_MODULE_API_NO >= 20010901 + "1.0", +#endif + STANDARD_MODULE_PROPERTIES +}; + +#ifdef COMPILE_DL_TIDY +ZEND_GET_MODULE(tidy) +#endif + +/* {{{ PHP_INI + */ +PHP_INI_BEGIN() +STD_PHP_INI_ENTRY("tidy.default_config", "", PHP_INI_SYSTEM, OnUpdateString, default_config, zend_tidy_globals, tidy_globals) +PHP_INI_END() +/* }}} */ + +static void tidy_globals_ctor(zend_tidy_globals *g TSRMLS_DC) +{ + g->used = 0; + g->tdoc = pemalloc(sizeof(PHPTidyDoc), 1); + g->tdoc->doc = tidyCreate(); + g->tdoc->parsed = 0; + g->tdoc->errbuf = pemalloc(sizeof(TidyBuffer), 1); + tidyBufInit(g->tdoc->errbuf); + + if(tidySetErrorBuffer(g->tdoc->doc, g->tdoc->errbuf) != 0) { + zend_error(E_ERROR, "Could not set Tidy error buffer"); + } + + tidyOptSetBool(g->tdoc->doc, TidyForceOutput, yes); + tidyOptSetBool(g->tdoc->doc, TidyMark, no); + + /* remember settings so that we can restore them */ + tidyOptSnapshot(g->tdoc->doc); +} + +static void tidy_globals_dtor(zend_tidy_globals *g TSRMLS_DC) +{ + tidyBufFree(g->tdoc->errbuf); + pefree(g->tdoc->errbuf, 1); + tidyRelease(g->tdoc->doc); + pefree(g->tdoc, 1); + g->used = 0; +} + +static void *php_tidy_get_opt_val(TidyOption opt, TidyOptionType *type TSRMLS_DC) +{ + *type = tidyOptGetType(opt); + + switch (*type) { + case TidyString: { + char *val = (char *) tidyOptGetValue(TG(tdoc)->doc, tidyOptGetId(opt)); + if (val) { + return (void *) estrdup(val); + } else { + return (void *) estrdup(""); + } + } + break; + + case TidyInteger: + return (void *) tidyOptGetInt(TG(tdoc)->doc, tidyOptGetId(opt)); + break; + + case TidyBoolean: + return (void *) tidyOptGetBool(TG(tdoc)->doc, tidyOptGetId(opt)); + break; + } + + /* should not happen */ + return NULL; +} + +static char *php_tidy_file_to_mem(char *filename, zend_bool use_include_path TSRMLS_DC) +{ + php_stream *stream; + int len; + char *data = NULL; + + if (!(stream = php_stream_open_wrapper(filename, "rb", (use_include_path ? USE_PATH : 0) | ENFORCE_SAFE_MODE | REPORT_ERRORS, NULL))) { + return NULL; + } + if ((len = php_stream_copy_to_mem(stream, &data, PHP_STREAM_COPY_ALL, 0)) > 0) { + /* noop */ + } else if (len == 0) { + data = estrdup(""); + } + php_stream_close(stream); + + return data; +} + +static void php_tidy_quick_repair(INTERNAL_FUNCTION_PARAMETERS, zend_bool is_file) +{ + char *data=NULL, *cfg_file=NULL, *arg1; + int cfg_file_len, arg1_len; + zend_bool use_include_path = 0; + + if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|sb", &arg1, &arg1_len, &cfg_file, &cfg_file_len, &use_include_path) == FAILURE) { + RETURN_FALSE; + } + + if (is_file) { + if (!(data = php_tidy_file_to_mem(arg1, use_include_path TSRMLS_CC))) { + RETURN_FALSE; + } + } else { + data = arg1; + } + + if (cfg_file && cfg_file[0]) { + TIDY_SAFE_MODE_CHECK(cfg_file); + if(tidyLoadConfig(TG(tdoc)->doc, cfg_file) < 0) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Could not load configuration file '%s'", cfg_file); + RETVAL_FALSE; + } + TG(used) = 1; + } + + if (data) { + if(tidyParseString(TG(tdoc)->doc, data) < 0) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "[Tidy error] %s", TG(tdoc)->errbuf->bp); + RETVAL_FALSE; + } else { + TG(tdoc)->parsed = TRUE; + if (tidyCleanAndRepair(TG(tdoc)->doc) >= 0) { + TidyBuffer output = {0}; + + tidySaveBuffer (TG(tdoc)->doc, &output); + RETVAL_STRING(output.bp, 1); + tidyBufFree(&output); + } else { + RETVAL_FALSE; + } + } + } + + if (is_file) { + efree(data); + } +} + +PHP_MINIT_FUNCTION(tidy) +{ + REGISTER_INI_ENTRIES(); + +#ifdef ZEND_ENGINE_2 + REGISTER_TIDY_CLASS(node, NULL); + REGISTER_TIDY_CLASS(attr, NULL); + REGISTER_TIDY_CLASS(exception, zend_exception_get_default()); + + tidy_object_handlers_node.get_class_entry = tidy_get_ce_node; + tidy_object_handlers_attr.get_class_entry = tidy_get_ce_attr; +#endif + + _php_tidy_register_tags(INIT_FUNC_ARGS_PASSTHRU); + _php_tidy_register_attributes(INIT_FUNC_ARGS_PASSTHRU); + _php_tidy_register_nodetypes(INIT_FUNC_ARGS_PASSTHRU); + + ZEND_INIT_MODULE_GLOBALS(tidy, tidy_globals_ctor, tidy_globals_dtor); + + return SUCCESS; +} + +PHP_MSHUTDOWN_FUNCTION(tidy) +{ +#ifndef ZTS + tidy_globals_dtor(&tidy_globals TSRMLS_CC); +#endif + return SUCCESS; +} + +PHP_RINIT_FUNCTION(tidy) +{ + if (TG(used) && tidyOptDiffThanSnapshot((TG(tdoc))->doc)) { + tidyOptResetToSnapshot((TG(tdoc))->doc); + TG(used) = 0; + } + /* if a user provided a default configuration file, use it */ + if (TG(default_config) && TG(default_config)[0]) { + if (tidyLoadConfig((TG(tdoc))->doc, TG(default_config)) < 0) { + zend_error(E_ERROR, "Unable to load Tidy configuration file at '%s'.", TG(default_config)); + } + TG(used) = 1; + } + return SUCCESS; +} + +PHP_MINFO_FUNCTION(tidy) +{ + TidyIterator itOpt = tidyGetOptionList(TG(tdoc)->doc); + void *opt_value; + TidyOptionType optt; + char buf[255]; + + php_info_print_table_start(); + php_info_print_table_header(2, "Tidy support", "enabled"); + php_info_print_table_row(2, "libTidy Build Date", (char *)tidyReleaseDate()); + php_info_print_table_end(); + + DISPLAY_INI_ENTRIES(); + + php_info_print_table_start(); + php_info_print_table_header(2, "Tidy Configuration Directive", "Value"); + while (itOpt) { + TidyOption opt = tidyGetNextOption(TG(tdoc)->doc, &itOpt); + + opt_value = php_tidy_get_opt_val(opt, &optt TSRMLS_CC); + switch (optt) { + case TidyString: + php_info_print_table_row(2, (char *)tidyOptGetName(opt), (char*)opt_value); + efree(opt_value); + break; + + case TidyInteger: + sprintf(buf, "%d", (int)opt_value); + php_info_print_table_row(2, (char *)tidyOptGetName(opt), (char*)buf); + break; + + case TidyBoolean: + php_info_print_table_row(2, (char *)tidyOptGetName(opt), (opt_value ? "TRUE" : "FALSE")); + break; + } + } + php_info_print_table_end(); +} + +/* {{{ proto bool tidy_parse_string(string input) + Parse a document stored in a string */ +PHP_FUNCTION(tidy_parse_string) +{ + char *input; + int input_len; + + if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &input, &input_len) == FAILURE) { + RETURN_FALSE; + } + + if(tidyParseString(TG(tdoc)->doc, input) < 0) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "[Tidy error] %s", TG(tdoc)->errbuf->bp); + RETURN_FALSE; + } + + TG(tdoc)->parsed = TRUE; + RETURN_TRUE; +} +/* }}} */ + +/* {{{ proto string tidy_get_error_buffer([boolean detailed]) + Return warnings and errors which occured parsing the specified document*/ +PHP_FUNCTION(tidy_get_error_buffer) +{ + zend_bool detailed = 0; + + if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|b", &detailed) == FAILURE) { + RETURN_FALSE; + } + + TIDY_PARSED_CHECK(); + + if (detailed) { + tidyErrorSummary(TG(tdoc)->doc); + } + + RETVAL_STRING(TG(tdoc)->errbuf->bp, 1); + tidyBufClear(TG(tdoc)->errbuf); +} +/* }}} */ + +/* {{{ proto string tidy_get_output() + Return a string representing the parsed tidy markup */ +PHP_FUNCTION(tidy_get_output) +{ + TidyBuffer output = {0}; + + if (ZEND_NUM_ARGS()) { + WRONG_PARAM_COUNT; + } + + TIDY_PARSED_CHECK(); + + tidySaveBuffer (TG(tdoc)->doc, &output); + + RETVAL_STRING(output.bp, 1); + + tidyBufFree(&output); +} +/* }}} */ + +/* {{{ proto boolean tidy_parse_file(string file [, bool use_include_path]) + Parse markup in file or URI */ +PHP_FUNCTION(tidy_parse_file) +{ + char *inputfile; + int input_len; + zend_bool use_include_path = 0; + char *contents; + + if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &inputfile, &input_len) == FAILURE) { + RETURN_FALSE; + } + + if (!(contents = php_tidy_file_to_mem(inputfile, use_include_path TSRMLS_CC))) { + RETURN_FALSE; + } + + if(tidyParseString(TG(tdoc)->doc, contents) < 0) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "[Tidy error] %s", TG(tdoc)->errbuf->bp); + RETVAL_FALSE; + } else { + TG(tdoc)->parsed = TRUE; + RETVAL_TRUE; + } + + efree(contents); +} +/* }}} */ + +/* {{{ proto boolean tidy_clean_repair() + Execute configured cleanup and repair operations on parsed markup */ +PHP_FUNCTION(tidy_clean_repair) +{ + if(ZEND_NUM_ARGS()) { + WRONG_PARAM_COUNT; + } + + TIDY_PARSED_CHECK(); + + if (tidyCleanAndRepair(TG(tdoc)->doc) >= 0) { + RETURN_TRUE; + } + + RETURN_FALSE; +} +/* }}} */ + +/* {{{ proto boolean tidy_repair_string(string data [, string config_file]) + Repair a string using an optionally provided configuration file */ +PHP_FUNCTION(tidy_repair_string) +{ + php_tidy_quick_repair(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0); +} +/* }}} */ + +/* {{{ proto boolean tidy_repair_file(string filename [, string config_file [, bool use_include_path]]) + Repair a file using an optionally provided configuration file */ +PHP_FUNCTION(tidy_repair_file) +{ + php_tidy_quick_repair(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1); +} +/* }}} */ + +/* {{{ proto boolean tidy_diagnose() + Run configured diagnostics on parsed and repaired markup. */ +PHP_FUNCTION(tidy_diagnose) +{ + if(ZEND_NUM_ARGS()) { + WRONG_PARAM_COUNT; + } + + TIDY_PARSED_CHECK(); + + if (tidyRunDiagnostics(TG(tdoc)->doc) >= 0) { + RETURN_TRUE; + } + + RETURN_FALSE; +} +/* }}} */ + +/* {{{ proto string tidy_get_release() + Get release date (version) for Tidy library */ +PHP_FUNCTION(tidy_get_release) +{ + if(ZEND_NUM_ARGS()) { + WRONG_PARAM_COUNT; + } + + RETURN_STRING((char *)tidyReleaseDate(), 1); +} +/* }}} */ + +/* {{{ proto string tidy_reset_config() + Restore Tidy configuration to default values */ +PHP_FUNCTION(tidy_reset_config) +{ + if(ZEND_NUM_ARGS()) { + WRONG_PARAM_COUNT; + } + + RETURN_BOOL(tidyOptResetToSnapshot(TG(tdoc)->doc)); +} +/* }}} */ + +/* {{{ proto array tidy_get_config() + Get current Tidy configuarion */ +PHP_FUNCTION(tidy_get_config) +{ + TidyIterator itOpt = tidyGetOptionList(TG(tdoc)->doc); + char *opt_name; + void *opt_value; + TidyOptionType optt; + + if(ZEND_NUM_ARGS()) { + WRONG_PARAM_COUNT; + } + + array_init(return_value); + + while (itOpt) { + TidyOption opt = tidyGetNextOption(TG(tdoc)->doc, &itOpt); + + opt_name = (char *)tidyOptGetName(opt); + opt_value = php_tidy_get_opt_val(opt, &optt TSRMLS_CC); + switch (optt) { + case TidyString: + add_assoc_string(return_value, opt_name, (char*)opt_value, 0); + break; + + case TidyInteger: + add_assoc_long(return_value, opt_name, (long)opt_value); + break; + + case TidyBoolean: + add_assoc_bool(return_value, opt_name, (long)opt_value); + break; + } + } + + return; +} +/* }}} */ + + +/* {{{ proto int tidy_get_status() + Get status of specfied document. */ +PHP_FUNCTION(tidy_get_status) +{ + if(ZEND_NUM_ARGS()) { + WRONG_PARAM_COUNT; + } + + RETURN_LONG(tidyStatus(TG(tdoc)->doc)); +} +/* }}} */ + +/* {{{ proto int tidy_get_html_ver() + Get the Detected HTML version for the specified document. */ +PHP_FUNCTION(tidy_get_html_ver) +{ + if(ZEND_NUM_ARGS()) { + WRONG_PARAM_COUNT; + } + + TIDY_PARSED_CHECK(); + + RETURN_LONG(tidyDetectedHtmlVersion(TG(tdoc)->doc)); +} +/* }}} */ + +/* {{{ proto boolean tidy_is_xhtml() + Indicates if the document is a XHTML document. */ +PHP_FUNCTION(tidy_is_xhtml) +{ + if(ZEND_NUM_ARGS()) { + WRONG_PARAM_COUNT; + } + + TIDY_PARSED_CHECK(); + + RETURN_BOOL(tidyDetectedXhtml(TG(tdoc)->doc)); +} +/* }}} */ + +/* {{{ proto boolean tidy_is_xhtml() + Indicates if the document is a generic (non HTML/XHTML) XML document. */ +PHP_FUNCTION(tidy_is_xml) +{ + if(ZEND_NUM_ARGS()) { + WRONG_PARAM_COUNT; + } + + TIDY_PARSED_CHECK(); + + RETURN_BOOL(tidyDetectedGenericXml(TG(tdoc)->doc)); +} +/* }}} */ + +/* {{{ proto int tidy_error_count() + Returns the Number of Tidy errors encountered for specified document. */ +PHP_FUNCTION(tidy_error_count) +{ + if(ZEND_NUM_ARGS()) { + WRONG_PARAM_COUNT; + } + + TIDY_PARSED_CHECK(); + + RETURN_LONG(tidyErrorCount(TG(tdoc)->doc)); +} +/* }}} */ + +/* {{{ proto int tidy_warning_count() + Returns the Number of Tidy warnings encountered for specified document. */ +PHP_FUNCTION(tidy_warning_count) +{ + if(ZEND_NUM_ARGS()) { + WRONG_PARAM_COUNT; + } + + TIDY_PARSED_CHECK(); + + RETURN_LONG(tidyWarningCount(TG(tdoc)->doc)); +} +/* }}} */ + +/* {{{ proto int tidy_access_count() + Returns the Number of Tidy accessibility warnings encountered for specified document. */ +PHP_FUNCTION(tidy_access_count) +{ + if(ZEND_NUM_ARGS()) { + WRONG_PARAM_COUNT; + } + + TIDY_PARSED_CHECK(); + + RETURN_LONG(tidyAccessWarningCount(TG(tdoc)->doc)); +} +/* }}} */ + +/* {{{ proto int tidy_config_count() + Returns the Number of Tidy configuration errors encountered for specified document. */ +PHP_FUNCTION(tidy_config_count) +{ + if(ZEND_NUM_ARGS()) { + WRONG_PARAM_COUNT; + } + + TIDY_PARSED_CHECK(); + + RETURN_LONG(tidyConfigErrorCount(TG(tdoc)->doc)); +} +/* }}} */ + +/* {{{ proto void tidy_load_config(string filename) + Load an ASCII Tidy configuration file */ +PHP_FUNCTION(tidy_load_config) +{ + char *filename; + int filename_len; + + if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &filename, &filename_len) == FAILURE) { + RETURN_FALSE; + } + + TIDY_SAFE_MODE_CHECK(filename); + + if(tidyLoadConfig(TG(tdoc)->doc, filename) < 0) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Could not load configuration file '%s'", filename); + RETURN_FALSE; + } + + TG(used) = 1; + RETURN_TRUE; +} +/* }}} */ + +/* {{{ proto void tidy_load_config(string filename, string encoding) + Load an ASCII Tidy configuration file with the specified encoding */ +PHP_FUNCTION(tidy_load_config_enc) +{ + char *filename, *encoding; + int enc_len, file_len; + + if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss", &filename, &file_len, &encoding, &enc_len) == FAILURE) { + RETURN_FALSE; + } + + TIDY_SAFE_MODE_CHECK(filename); + + if(tidyLoadConfigEnc(TG(tdoc)->doc, filename, encoding) < 0) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Could not load configuration file '%s' using encoding '%s'", filename, encoding); + RETURN_FALSE; + } + + TG(used) = 1; + RETURN_TRUE; +} +/* }}} */ + +/* {{{ proto boolean tidy_set_encoding(string encoding) + Set the input/output character encoding for parsing markup. + Values include: ascii, latin1, raw, utf8, iso2022, mac, win1252, utf16le, + utf16be, utf16, big5 and shiftjis. */ +PHP_FUNCTION(tidy_set_encoding) +{ + char *encoding; + int enc_len; + + if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &encoding, &enc_len) == FAILURE) { + RETURN_FALSE; + } + + if(tidySetCharEncoding(TG(tdoc)->doc, encoding) < 0) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Could not set encoding '%s'", encoding); + RETURN_FALSE; + } + + TG(used) = 1; + RETURN_TRUE; +} +/* }}} */ + +/* {{{ proto boolean tidy_save_config(string filename) + Save current settings to named file. Only non-default values are written. */ +PHP_FUNCTION(tidy_save_config) +{ + char *filename; + int file_len; + + if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &filename, &file_len) == FAILURE) { + RETURN_FALSE; + } + + TIDY_SAFE_MODE_CHECK(filename); + + if(tidyOptSaveFile(TG(tdoc)->doc, filename) < 0) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Could not write tidy configuration file '%s'", filename); + RETURN_FALSE; + } + + RETURN_TRUE; +} +/* }}} */ + +/* {{{ proto boolean tidy_setopt(string option, mixed newvalue) + Updates the configuration settings for the specified tidy document. */ +PHP_FUNCTION(tidy_setopt) +{ + zval *value; + char *optname; + int optname_len; + TidyOption opt; + + if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sz", &optname, &optname_len, &value) == FAILURE) { + RETURN_FALSE; + } + + opt = tidyGetOptionByName(TG(tdoc)->doc, optname); + if (!opt) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown Tidy Configuration Option '%s'", optname); + RETURN_FALSE; + } + + switch(tidyOptGetType(opt)) { + case TidyString: + convert_to_string_ex(&value); + if(tidyOptSetValue(TG(tdoc)->doc, tidyOptGetId(opt), Z_STRVAL_P(value))) { + TG(used) = 1; + RETURN_TRUE; + } + break; + + case TidyInteger: + convert_to_long_ex(&value); + if(tidyOptSetInt(TG(tdoc)->doc, tidyOptGetId(opt), Z_LVAL_P(value))) { + TG(used) = 1; + RETURN_TRUE; + } + break; + + case TidyBoolean: + convert_to_long_ex(&value); + if(tidyOptSetBool(TG(tdoc)->doc, tidyOptGetId(opt), Z_LVAL_P(value))) { + TG(used) = 1; + RETURN_TRUE; + } + break; + + default: + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to determine type of Tidy configuration constant to set"); + break; + } + RETURN_FALSE; +} +/* }}} */ + +/* {{{ proto mixed tidy_getopt(string option) + Returns the value of the specified configuration option for the tidy document. */ +PHP_FUNCTION(tidy_getopt) +{ + char *optname; + void *optval; + int optname_len; + TidyOption opt; + TidyOptionType optt; + + if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &optname, &optname_len) == FAILURE) { + RETURN_FALSE; + } + + opt = tidyGetOptionByName(TG(tdoc)->doc, optname); + if (!opt) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown Tidy Configuration Option '%s'", optname); + RETURN_FALSE; + } + + optval = php_tidy_get_opt_val(opt, &optt TSRMLS_CC); + switch (optt) { + case TidyString: + RETVAL_STRING((char *)optval, 0); + break; + + case TidyInteger: + RETURN_LONG((long)optval); + break; + + case TidyBoolean: + if (optval) { + RETURN_TRUE; + } else { + RETURN_NULL(); + } + break; + + default: + php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to determine type of Tidy configuration constant to get"); + break; + } + + RETURN_FALSE; +} +/* }}} */ + +#ifdef ZEND_ENGINE_2 +static void tidy_object_dtor(void *object, zend_object_handle handle TSRMLS_DC) +{ + PHPTidyObj *intern = (PHPTidyObj *)object; + + zend_hash_destroy(intern->std.properties); + FREE_HASHTABLE(intern->std.properties); + + efree(object); +} + +static void tidy_object_new(zend_class_entry *class_type, zend_object_handlers *handlers, zend_object_value *retval TSRMLS_DC) +{ + PHPTidyObj *intern; + zval *tmp; + + intern = emalloc(sizeof(PHPTidyObj)); + memset(intern, 0, sizeof(PHPTidyObj)); + intern->std.ce = class_type; + + ALLOC_HASHTABLE(intern->std.properties); + zend_hash_init(intern->std.properties, 0, NULL, ZVAL_PTR_DTOR, 0); + zend_hash_copy(intern->std.properties, &class_type->default_properties, (copy_ctor_func_t) zval_add_ref, (void *) &tmp, sizeof(zval *)); + + retval->handle = zend_objects_store_put(intern, tidy_object_dtor, NULL TSRMLS_CC); + retval->handlers = handlers; +} + +static zend_object_value tidy_object_new_node(zend_class_entry *class_type TSRMLS_DC) +{ + zend_object_value retval; + tidy_object_new(class_type, &tidy_object_handlers_node, &retval TSRMLS_CC); + return retval; +} + +static zend_object_value tidy_object_new_attr(zend_class_entry *class_type TSRMLS_DC) +{ + zend_object_value retval; + tidy_object_new(class_type, &tidy_object_handlers_attr, &retval TSRMLS_CC); + return retval; + +} + +static zend_object_value tidy_object_new_exception(zend_class_entry *class_type TSRMLS_DC) +{ + zend_object_value retval; + tidy_object_new(class_type, &tidy_object_handlers_exception, &retval TSRMLS_CC); + return retval; + +} + +static zend_class_entry *tidy_get_ce_node(zval *object TSRMLS_DC) +{ + return tidy_ce_node; +} + +static zend_class_entry *tidy_get_ce_attr(zval *object TSRMLS_DC) +{ + return tidy_ce_attr; +} + +static zval * tidy_instanciate(zend_class_entry *pce, zval *object TSRMLS_DC) +{ + if (!object) { + ALLOC_ZVAL(object); + } + Z_TYPE_P(object) = IS_OBJECT; + object_init_ex(object, pce); + object->refcount = 1; + object->is_ref = 1; + return object; +} + +#define ADD_PROPERITY_STRING(_table, _key, _string) \ + { \ + zval *tmp; \ + MAKE_STD_ZVAL(tmp); \ + if(_string) { \ + ZVAL_STRING(tmp, (char *)_string, 1); \ + } else { \ + ZVAL_EMPTY_STRING(tmp); \ + } \ + zend_hash_update(_table, #_key, sizeof(#_key), (void *)&tmp, sizeof(zval *), NULL); \ + } + +#define ADD_PROPERITY_LONG(_table, _key, _long) \ + { \ + zval *tmp; \ + MAKE_STD_ZVAL(tmp); \ + ZVAL_LONG(tmp, _long); \ + zend_hash_update(_table, #_key, sizeof(#_key), (void *)&tmp, sizeof(zval *), NULL); \ + } + +static void tidy_add_default_properities(PHPTidyObj *obj, tidy_obj_type type TSRMLS_DC) { + + TidyBuffer buf; + + switch(type) { + + case is_node: + memset(&buf, 0, sizeof(buf)); + tidyNodeGetText(TG(tdoc)->doc, obj->node, &buf); + ADD_PROPERITY_STRING(obj->std.properties, value, buf.bp); + tidyBufFree(&buf); + + fprintf(stderr, "type: %d\n",tidyNodeGetType(obj->node)); + ADD_PROPERITY_STRING(obj->std.properties, name, tidyNodeGetName(obj->node)); + ADD_PROPERITY_LONG(obj->std.properties, type, tidyNodeGetType(obj->node)); + switch(tidyNodeGetType(obj->node)) { + + case TidyNode_Root: + case TidyNode_DocType: + case TidyNode_Text: + case TidyNode_Comment: + break; + default: + ADD_PROPERITY_LONG(obj->std.properties, id, tidyNodeGetId(obj->node)); + } + + break; + case is_attr: + ADD_PROPERITY_STRING(obj->std.properties, name, tidyAttrName(obj->attr)); + ADD_PROPERITY_STRING(obj->std.properties, value, tidyAttrValue(obj->attr)); + ADD_PROPERITY_LONG(obj->std.properties, id, tidyAttrGetId(obj->attr)); + break; + } + +} +/* {{{ proto TidyNode tidy_get_root() + Returns a TidyNode Object representing the root of the tidy parse tree */ +PHP_FUNCTION(tidy_get_root) +{ + PHPTidyObj *obj; + + if(ZEND_NUM_ARGS()) { + WRONG_PARAM_COUNT; + } + + TIDY_PARSED_CHECK(); + + tidy_instanciate(tidy_ce_node, return_value TSRMLS_CC); + obj = (PHPTidyObj *) zend_object_store_get_object(return_value TSRMLS_CC); + obj->node = tidyGetRoot(TG(tdoc)->doc); + obj->attr = NULL; + obj->type = is_node; + + tidy_add_default_properities(obj, is_node TSRMLS_CC); + +} +/* }}} */ + +/* {{{ proto TidyNode tidy_get_html() + Returns a TidyNode Object starting from the <HTML> tag of the tidy parse tree */ +PHP_FUNCTION(tidy_get_html) +{ + PHPTidyObj *obj; + + if(ZEND_NUM_ARGS()) { + WRONG_PARAM_COUNT; + } + + TIDY_PARSED_CHECK(); + + tidy_instanciate(tidy_ce_node, return_value TSRMLS_CC); + obj = (PHPTidyObj *) zend_object_store_get_object(return_value TSRMLS_CC); + obj->node = tidyGetHtml(TG(tdoc)->doc); + obj->attr = NULL; + obj->type = is_node; + + tidy_add_default_properities(obj, is_node TSRMLS_CC); +} +/* }}} */ + +/* {{{ proto TidyNode tidy_get_head() + Returns a TidyNode Object starting from the <HEAD> tag of the tidy parse tree */ +PHP_FUNCTION(tidy_get_head) +{ + PHPTidyObj *obj; + + if(ZEND_NUM_ARGS()) { + WRONG_PARAM_COUNT; + } + + TIDY_PARSED_CHECK(); + + tidy_instanciate(tidy_ce_node, return_value TSRMLS_CC); + obj = (PHPTidyObj *) zend_object_store_get_object(return_value TSRMLS_CC); + obj->node = tidyGetHead(TG(tdoc)->doc); + obj->attr = NULL; + obj->type = is_node; + + tidy_add_default_properities(obj, is_node TSRMLS_CC); +} +/* }}} */ + +/* {{{ proto TidyNode tidy_get_body(resource tidy) + Returns a TidyNode Object starting from the <BODY> tag of the tidy parse tree */ +PHP_FUNCTION(tidy_get_body) +{ + PHPTidyObj *obj; + + if(ZEND_NUM_ARGS()) { + WRONG_PARAM_COUNT; + } + + TIDY_PARSED_CHECK(); + + tidy_instanciate(tidy_ce_node, return_value TSRMLS_CC); + obj = (PHPTidyObj *) zend_object_store_get_object(return_value TSRMLS_CC); + obj->node = tidyGetBody(TG(tdoc)->doc); + obj->attr = NULL; + obj->type = is_node; + + tidy_add_default_properities(obj, is_node TSRMLS_CC); +} +/* }}} */ + +/* {{{ proto void tidy_node::tidy_node() + Constructor. */ +PHP_NODE_METHOD(__construct) +{ +} +/* }}} */ + +/* {{{ proto tidy_attr tidy_node::attributes() + Returns an array of attribute objects for node */ +PHP_NODE_METHOD(attributes) +{ + TidyAttr tempattr; + zval *object; + PHPTidyObj *objtemp; + GET_THIS_CONTAINER(); + + tempattr = tidyAttrFirst(obj->node); + + if(tempattr) { + array_init(return_value); + + do { + + MAKE_STD_ZVAL(object); + INSTANCIATE_ATTR(object, objtemp, tempattr); + add_next_index_zval(return_value, object); + + } while((tempattr = tidyAttrNext(tempattr))); + } +} +/* }}} */ + + + +/* {{{ proto tidy_node tidy_node::children() + Returns an array of child nodes */ +PHP_NODE_METHOD(children) +{ + TidyNode tempnode; + zval *object; + PHPTidyObj *objtemp; + GET_THIS_CONTAINER(); + + tempnode = tidyGetChild(obj->node); + + if(tempnode) { + array_init(return_value); + do { + + MAKE_STD_ZVAL(object); + INSTANCIATE_NODE(object, objtemp, tempnode); + add_next_index_zval(return_value, object); + + } while((tempnode = tidyGetNext(tempnode))); + } +} +/* }}} */ + +/* {{{ proto boolean tidy_node::has_children() + Returns true if this node has children */ +PHP_NODE_METHOD(has_children) +{ + GET_THIS_CONTAINER(); + + if(tidyGetChild(obj->node)) { + RETURN_TRUE; + } else { + RETURN_FALSE; + } + +} +/* }}} */ + +/* {{{ proto boolean tidy_node::has_siblings() + Returns true if this node has siblings */ +PHP_NODE_METHOD(has_siblings) +{ + GET_THIS_CONTAINER(); + + if(tidyGetNext(obj->node)) { + RETURN_TRUE; + } else { + RETURN_FALSE; + } + +} +/* }}} */ + +/* {{{ proto boolean tidy_node::is_comment() + Returns true if this node represents a comment */ +PHP_NODE_METHOD(is_comment) +{ + GET_THIS_CONTAINER(); + if(tidyNodeGetType(obj->node) == TidyNode_Comment) { + RETURN_TRUE; + } else { + RETURN_FALSE; + } + +} +/* }}} */ + +/* {{{ proto boolean tidy_node::is_html() + Returns true if this node is part of a HTML document */ +PHP_NODE_METHOD(is_html) +{ + GET_THIS_CONTAINER(); + + if(tidyNodeGetType(obj->node) & (TidyNode_Start | TidyNode_End | TidyNode_StartEnd)) { + RETURN_TRUE; + } + RETURN_FALSE; + +} +/* }}} */ + +/* {{{ proto boolean tidy_node::is_xhtml() + Returns true if this node is part of a XHTML document */ +PHP_NODE_METHOD(is_xhtml) +{ + GET_THIS_CONTAINER(); + if(tidyDetectedXhtml(TG(tdoc)->doc)) { + RETURN_TRUE; + } else { + RETURN_FALSE; + } +} +/* }}} */ + +/* {{{ proto boolean tidy_node::is_xml() + Returns true if this node is part of a XML document */ +PHP_NODE_METHOD(is_xml) +{ + GET_THIS_CONTAINER(); + if(tidyDetectedGenericXml(TG(tdoc)->doc)) { + RETURN_TRUE; + } else { + RETURN_FALSE; + } +} +/* }}} */ + +/* {{{ proto boolean tidy_node::is_text() + Returns true if this node represents text (no markup) */ +PHP_NODE_METHOD(is_text) +{ + GET_THIS_CONTAINER(); + if(tidyNodeGetType(obj->node) == TidyNode_Text) { + RETURN_TRUE; + } else { + RETURN_FALSE; + } + +} +/* }}} */ + +/* {{{ proto boolean tidy_node::is_jste() + Returns true if this node is JSTE */ +PHP_NODE_METHOD(is_jste) +{ + GET_THIS_CONTAINER(); + if(tidyNodeGetType(obj->node) == TidyNode_Jste) { + RETURN_TRUE; + } else { + RETURN_FALSE; + } +} +/* }}} */ + +/* {{{ proto boolean tidy_node::is_asp() + Returns true if this node is ASP */ +PHP_NODE_METHOD(is_asp) +{ + GET_THIS_CONTAINER(); + if(tidyNodeGetType(obj->node) == TidyNode_Asp) { + RETURN_TRUE; + } else { + RETURN_FALSE; + } +} +/* }}} */ + +/* {{{ proto boolean tidy_node::is_jsp() + Returns true if this node is JSP */ +PHP_NODE_METHOD(is_php) +{ + GET_THIS_CONTAINER(); + if(tidyNodeGetType(obj->node) == TidyNode_Php) { + RETURN_TRUE; + } else { + RETURN_FALSE; + } +} +/* }}} */ + +/* {{{ proto tidy_node tidy_node::next() + Returns the next sibling to this node */ +PHP_NODE_METHOD(next) +{ + PHPTidyObj *obj; + + if(ZEND_NUM_ARGS()) { + WRONG_PARAM_COUNT; + } + + TIDY_PARSED_CHECK(); + + tidy_instanciate(tidy_ce_node, return_value TSRMLS_CC); + obj = (PHPTidyObj *) zend_object_store_get_object(return_value TSRMLS_CC); + obj->node = tidyGetNext(obj->node); + obj->attr = NULL; + obj->type = is_node; + + tidy_add_default_properities(obj, is_node TSRMLS_CC); +} +/* }}} */ + +/* {{{ proto tidy_node tidy_node::prev() + Returns the previous sibiling to this node */ +PHP_NODE_METHOD(prev) +{ + PHPTidyObj *obj; + + if(ZEND_NUM_ARGS()) { + WRONG_PARAM_COUNT; + } + + TIDY_PARSED_CHECK(); + + tidy_instanciate(tidy_ce_node, return_value TSRMLS_CC); + obj = (PHPTidyObj *) zend_object_store_get_object(return_value TSRMLS_CC); + obj->node = tidyGetPrev(obj->node); + obj->attr = NULL; + obj->type = is_node; + + tidy_add_default_properities(obj, is_node TSRMLS_CC); +} +/* }}} */ + +/* {{{ proto tidy_attr tidy_node::get_attr(int attrib_id) + Return the attribute with the provided attribute id */ +PHP_NODE_METHOD(get_attr) +{ + TidyAttr tempattr; + long param; + GET_THIS_CONTAINER(); + + if(ZEND_NUM_ARGS() != 1) { + WRONG_PARAM_COUNT; + } + + TIDY_PARSED_CHECK(); + + if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "l", ¶m) == FAILURE) { + RETURN_FALSE; + } + + for(tempattr = tidyAttrFirst(obj->node); tempattr; tempattr = tidyAttrNext(tempattr)) { + + if(tidyAttrGetId(tempattr) == param) { + + tidy_instanciate(tidy_ce_node, return_value TSRMLS_CC); + + obj = (PHPTidyObj *) zend_object_store_get_object(return_value TSRMLS_CC); + obj->node = NULL; + obj->attr = tempattr; + obj->type = is_attr; + + tidy_add_default_properities(obj, is_attr TSRMLS_CC); + } + } + +} +/* }}} */ + +/* {{{ proto tidy_node tidy_node::get_nodes(int node_id) + Return an array of nodes under this node with the specified id */ +PHP_NODE_METHOD(get_nodes) +{ + /* TODO */ +} +/* }}} */ + +#endif /* ZEND_ENGINE_2 */ + +void _php_tidy_register_nodetypes(INIT_FUNC_ARGS) +{ + + TIDY_NODE_CONST(ROOT, Root); + TIDY_NODE_CONST(DOCTYPE, DocType); + TIDY_NODE_CONST(COMMENT, Comment); + TIDY_NODE_CONST(PROCINS, ProcIns); + TIDY_NODE_CONST(TEXT, Text); + TIDY_NODE_CONST(START, Start); + TIDY_NODE_CONST(END, End); + TIDY_NODE_CONST(STARTEND, StartEnd); + TIDY_NODE_CONST(CDATA, CDATA); + TIDY_NODE_CONST(SECTION, Section); + TIDY_NODE_CONST(ASP, Asp); + TIDY_NODE_CONST(JSTE, Jste); + TIDY_NODE_CONST(PHP, Php); + TIDY_NODE_CONST(XMLDECL, XmlDecl); + +} + +void _php_tidy_register_tags(INIT_FUNC_ARGS) +{ + + TIDY_TAG_CONST(UNKNOWN); + TIDY_TAG_CONST(A); + TIDY_TAG_CONST(ABBR); + TIDY_TAG_CONST(ACRONYM); + TIDY_TAG_CONST(ADDRESS); + TIDY_TAG_CONST(ALIGN); + TIDY_TAG_CONST(APPLET); + TIDY_TAG_CONST(AREA); + TIDY_TAG_CONST(B); + TIDY_TAG_CONST(BASE); + TIDY_TAG_CONST(BASEFONT); + TIDY_TAG_CONST(BDO); + TIDY_TAG_CONST(BGSOUND); + TIDY_TAG_CONST(BIG); + TIDY_TAG_CONST(BLINK); + TIDY_TAG_CONST(BLOCKQUOTE); + TIDY_TAG_CONST(BODY); + TIDY_TAG_CONST(BR); + TIDY_TAG_CONST(BUTTON); + TIDY_TAG_CONST(CAPTION); + TIDY_TAG_CONST(CENTER); + TIDY_TAG_CONST(CITE); + TIDY_TAG_CONST(CODE); + TIDY_TAG_CONST(COL); + TIDY_TAG_CONST(COLGROUP); + TIDY_TAG_CONST(COMMENT); + TIDY_TAG_CONST(DD); + TIDY_TAG_CONST(DEL); + TIDY_TAG_CONST(DFN); + TIDY_TAG_CONST(DIR); + TIDY_TAG_CONST(DIV); + TIDY_TAG_CONST(DL); + TIDY_TAG_CONST(DT); + TIDY_TAG_CONST(EM); + TIDY_TAG_CONST(EMBED); + TIDY_TAG_CONST(FIELDSET); + TIDY_TAG_CONST(FONT); + TIDY_TAG_CONST(FORM); + TIDY_TAG_CONST(FRAME); + TIDY_TAG_CONST(FRAMESET); + TIDY_TAG_CONST(H1); + TIDY_TAG_CONST(H2); + TIDY_TAG_CONST(H3); + TIDY_TAG_CONST(H4); + TIDY_TAG_CONST(H5); + TIDY_TAG_CONST(H6); + TIDY_TAG_CONST(HEAD); + TIDY_TAG_CONST(HR); + TIDY_TAG_CONST(HTML); + TIDY_TAG_CONST(I); + TIDY_TAG_CONST(IFRAME); + TIDY_TAG_CONST(ILAYER); + TIDY_TAG_CONST(IMG); + TIDY_TAG_CONST(INPUT); + TIDY_TAG_CONST(INS); + TIDY_TAG_CONST(ISINDEX); + TIDY_TAG_CONST(KBD); + TIDY_TAG_CONST(KEYGEN); + TIDY_TAG_CONST(LABEL); + TIDY_TAG_CONST(LAYER); + TIDY_TAG_CONST(LEGEND); + TIDY_TAG_CONST(LI); + TIDY_TAG_CONST(LINK); + TIDY_TAG_CONST(LISTING); + TIDY_TAG_CONST(MAP); + TIDY_TAG_CONST(MARQUEE); + TIDY_TAG_CONST(MENU); + TIDY_TAG_CONST(META); + TIDY_TAG_CONST(MULTICOL); + TIDY_TAG_CONST(NOBR); + TIDY_TAG_CONST(NOEMBED); + TIDY_TAG_CONST(NOFRAMES); + TIDY_TAG_CONST(NOLAYER); + TIDY_TAG_CONST(NOSAVE); + TIDY_TAG_CONST(NOSCRIPT); + TIDY_TAG_CONST(OBJECT); + TIDY_TAG_CONST(OL); + TIDY_TAG_CONST(OPTGROUP); + TIDY_TAG_CONST(OPTION); + TIDY_TAG_CONST(P); + TIDY_TAG_CONST(PARAM); + TIDY_TAG_CONST(PLAINTEXT); + TIDY_TAG_CONST(PRE); + TIDY_TAG_CONST(Q); + TIDY_TAG_CONST(RB); + TIDY_TAG_CONST(RBC); + TIDY_TAG_CONST(RP); + TIDY_TAG_CONST(RT); + TIDY_TAG_CONST(RTC); + TIDY_TAG_CONST(RUBY); + TIDY_TAG_CONST(S); + TIDY_TAG_CONST(SAMP); + TIDY_TAG_CONST(SCRIPT); + TIDY_TAG_CONST(SELECT); + TIDY_TAG_CONST(SERVER); + TIDY_TAG_CONST(SERVLET); + TIDY_TAG_CONST(SMALL); + TIDY_TAG_CONST(SPACER); + TIDY_TAG_CONST(SPAN); + TIDY_TAG_CONST(STRIKE); + TIDY_TAG_CONST(STRONG); + TIDY_TAG_CONST(STYLE); + TIDY_TAG_CONST(SUB); + TIDY_TAG_CONST(SUP); + TIDY_TAG_CONST(TABLE); + TIDY_TAG_CONST(TBODY); + TIDY_TAG_CONST(TD); + TIDY_TAG_CONST(TEXTAREA); + TIDY_TAG_CONST(TFOOT); + TIDY_TAG_CONST(TH); + TIDY_TAG_CONST(THEAD); + TIDY_TAG_CONST(TITLE); + TIDY_TAG_CONST(TR); + TIDY_TAG_CONST(TT); + TIDY_TAG_CONST(U); + TIDY_TAG_CONST(UL); + TIDY_TAG_CONST(VAR); + TIDY_TAG_CONST(WBR); + TIDY_TAG_CONST(XMP); + +} + +void _php_tidy_register_attributes(INIT_FUNC_ARGS) +{ + + TIDY_ATTR_CONST(UNKNOWN); + TIDY_ATTR_CONST(ABBR); + TIDY_ATTR_CONST(ACCEPT); + TIDY_ATTR_CONST(ACCEPT_CHARSET); + TIDY_ATTR_CONST(ACCESSKEY); + TIDY_ATTR_CONST(ACTION); + TIDY_ATTR_CONST(ADD_DATE); + TIDY_ATTR_CONST(ALIGN); + TIDY_ATTR_CONST(ALINK); + TIDY_ATTR_CONST(ALT); + TIDY_ATTR_CONST(ARCHIVE); + TIDY_ATTR_CONST(AXIS); + TIDY_ATTR_CONST(BACKGROUND); + TIDY_ATTR_CONST(BGCOLOR); + TIDY_ATTR_CONST(BGPROPERTIES); + TIDY_ATTR_CONST(BORDER); + TIDY_ATTR_CONST(BORDERCOLOR); + TIDY_ATTR_CONST(BOTTOMMARGIN); + TIDY_ATTR_CONST(CELLPADDING); + TIDY_ATTR_CONST(CELLSPACING); + TIDY_ATTR_CONST(CHAR); + TIDY_ATTR_CONST(CHAROFF); + TIDY_ATTR_CONST(CHARSET); + TIDY_ATTR_CONST(CHECKED); + TIDY_ATTR_CONST(CITE); + TIDY_ATTR_CONST(CLASS); + TIDY_ATTR_CONST(CLASSID); + TIDY_ATTR_CONST(CLEAR); + TIDY_ATTR_CONST(CODE); + TIDY_ATTR_CONST(CODEBASE); + TIDY_ATTR_CONST(CODETYPE); + TIDY_ATTR_CONST(COLOR); + TIDY_ATTR_CONST(COLS); + TIDY_ATTR_CONST(COLSPAN); + TIDY_ATTR_CONST(COMPACT); + TIDY_ATTR_CONST(CONTENT); + TIDY_ATTR_CONST(COORDS); + TIDY_ATTR_CONST(DATA); + TIDY_ATTR_CONST(DATAFLD); + /* TIDY_ATTR_CONST(DATAFORMATSAS); */ + TIDY_ATTR_CONST(DATAPAGESIZE); + TIDY_ATTR_CONST(DATASRC); + TIDY_ATTR_CONST(DATETIME); + TIDY_ATTR_CONST(DECLARE); + TIDY_ATTR_CONST(DEFER); + TIDY_ATTR_CONST(DIR); + TIDY_ATTR_CONST(DISABLED); + TIDY_ATTR_CONST(ENCODING); + TIDY_ATTR_CONST(ENCTYPE); + TIDY_ATTR_CONST(FACE); + TIDY_ATTR_CONST(FOR); + TIDY_ATTR_CONST(FRAME); + TIDY_ATTR_CONST(FRAMEBORDER); + TIDY_ATTR_CONST(FRAMESPACING); + TIDY_ATTR_CONST(GRIDX); + TIDY_ATTR_CONST(GRIDY); + TIDY_ATTR_CONST(HEADERS); + TIDY_ATTR_CONST(HEIGHT); + TIDY_ATTR_CONST(HREF); + TIDY_ATTR_CONST(HREFLANG); + TIDY_ATTR_CONST(HSPACE); + TIDY_ATTR_CONST(HTTP_EQUIV); + TIDY_ATTR_CONST(ID); + TIDY_ATTR_CONST(ISMAP); + TIDY_ATTR_CONST(LABEL); + TIDY_ATTR_CONST(LANG); + TIDY_ATTR_CONST(LANGUAGE); + TIDY_ATTR_CONST(LAST_MODIFIED); + TIDY_ATTR_CONST(LAST_VISIT); + TIDY_ATTR_CONST(LEFTMARGIN); + TIDY_ATTR_CONST(LINK); + TIDY_ATTR_CONST(LONGDESC); + TIDY_ATTR_CONST(LOWSRC); + TIDY_ATTR_CONST(MARGINHEIGHT); + TIDY_ATTR_CONST(MARGINWIDTH); + TIDY_ATTR_CONST(MAXLENGTH); + TIDY_ATTR_CONST(MEDIA); + TIDY_ATTR_CONST(METHOD); + TIDY_ATTR_CONST(MULTIPLE); + TIDY_ATTR_CONST(NAME); + TIDY_ATTR_CONST(NOHREF); + TIDY_ATTR_CONST(NORESIZE); + TIDY_ATTR_CONST(NOSHADE); + TIDY_ATTR_CONST(NOWRAP); + TIDY_ATTR_CONST(OBJECT); + TIDY_ATTR_CONST(OnAFTERUPDATE); + TIDY_ATTR_CONST(OnBEFOREUNLOAD); + TIDY_ATTR_CONST(OnBEFOREUPDATE); + TIDY_ATTR_CONST(OnBLUR); + TIDY_ATTR_CONST(OnCHANGE); + TIDY_ATTR_CONST(OnCLICK); + TIDY_ATTR_CONST(OnDATAAVAILABLE); + TIDY_ATTR_CONST(OnDATASETCHANGED); + TIDY_ATTR_CONST(OnDATASETCOMPLETE); + TIDY_ATTR_CONST(OnDBLCLICK); + TIDY_ATTR_CONST(OnERRORUPDATE); + TIDY_ATTR_CONST(OnFOCUS); + TIDY_ATTR_CONST(OnKEYDOWN); + TIDY_ATTR_CONST(OnKEYPRESS); + TIDY_ATTR_CONST(OnKEYUP); + TIDY_ATTR_CONST(OnLOAD); + TIDY_ATTR_CONST(OnMOUSEDOWN); + TIDY_ATTR_CONST(OnMOUSEMOVE); + TIDY_ATTR_CONST(OnMOUSEOUT); + TIDY_ATTR_CONST(OnMOUSEOVER); + TIDY_ATTR_CONST(OnMOUSEUP); + TIDY_ATTR_CONST(OnRESET); + TIDY_ATTR_CONST(OnROWENTER); + TIDY_ATTR_CONST(OnROWEXIT); + TIDY_ATTR_CONST(OnSELECT); + TIDY_ATTR_CONST(OnSUBMIT); + TIDY_ATTR_CONST(OnUNLOAD); + TIDY_ATTR_CONST(PROFILE); + TIDY_ATTR_CONST(PROMPT); + TIDY_ATTR_CONST(RBSPAN); + TIDY_ATTR_CONST(READONLY); + TIDY_ATTR_CONST(REL); + TIDY_ATTR_CONST(REV); + TIDY_ATTR_CONST(RIGHTMARGIN); + TIDY_ATTR_CONST(ROWS); + TIDY_ATTR_CONST(ROWSPAN); + TIDY_ATTR_CONST(RULES); + TIDY_ATTR_CONST(SCHEME); + TIDY_ATTR_CONST(SCOPE); + TIDY_ATTR_CONST(SCROLLING); + TIDY_ATTR_CONST(SELECTED); + TIDY_ATTR_CONST(SHAPE); + TIDY_ATTR_CONST(SHOWGRID); + TIDY_ATTR_CONST(SHOWGRIDX); + TIDY_ATTR_CONST(SHOWGRIDY); + TIDY_ATTR_CONST(SIZE); + TIDY_ATTR_CONST(SPAN); + TIDY_ATTR_CONST(SRC); + TIDY_ATTR_CONST(STANDBY); + TIDY_ATTR_CONST(START); + TIDY_ATTR_CONST(STYLE); + TIDY_ATTR_CONST(SUMMARY); + TIDY_ATTR_CONST(TABINDEX); + TIDY_ATTR_CONST(TARGET); + TIDY_ATTR_CONST(TEXT); + TIDY_ATTR_CONST(TITLE); + TIDY_ATTR_CONST(TOPMARGIN); + TIDY_ATTR_CONST(TYPE); + TIDY_ATTR_CONST(USEMAP); + TIDY_ATTR_CONST(VALIGN); + TIDY_ATTR_CONST(VALUE); + TIDY_ATTR_CONST(VALUETYPE); + TIDY_ATTR_CONST(VERSION); + TIDY_ATTR_CONST(VLINK); + TIDY_ATTR_CONST(VSPACE); + TIDY_ATTR_CONST(WIDTH); + TIDY_ATTR_CONST(WRAP); + TIDY_ATTR_CONST(XML_LANG); + TIDY_ATTR_CONST(XML_SPACE); + TIDY_ATTR_CONST(XMLNS); + +} |