summaryrefslogtreecommitdiff
path: root/core/fs/pxe/http_readdir.c
diff options
context:
space:
mode:
Diffstat (limited to 'core/fs/pxe/http_readdir.c')
-rw-r--r--core/fs/pxe/http_readdir.c471
1 files changed, 471 insertions, 0 deletions
diff --git a/core/fs/pxe/http_readdir.c b/core/fs/pxe/http_readdir.c
new file mode 100644
index 00000000..b6e480e7
--- /dev/null
+++ b/core/fs/pxe/http_readdir.c
@@ -0,0 +1,471 @@
+/* ----------------------------------------------------------------------- *
+ *
+ * Copyright 2011 Intel Corporation; author: H. Peter Anvin
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
+ * Boston MA 02110-1301, USA; either version 2 of the License, or
+ * (at your option) any later version; incorporated herein by reference.
+ *
+ * ----------------------------------------------------------------------- */
+
+#include <inttypes.h>
+#include <string.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <dprintf.h>
+#include "pxe.h"
+
+enum http_readdir_state {
+ st_start, /* 0 Initial state */
+ st_open, /* 1 "<" */
+ st_a, /* 2 "<a" */
+ st_attribute, /* 3 "<a " */
+ st_h, /* 4 "<a h" */
+ st_hr, /* 5 */
+ st_hre, /* 6 */
+ st_href, /* 7 */
+ st_hrefeq, /* 8 */
+ st_hrefqu, /* 9 */
+ st_badtag, /* 10 */
+ st_badtagqu, /* 11 */
+ st_badattr, /* 12 */
+ st_badattrqu, /* 13 */
+};
+
+struct machine {
+ char xchar;
+ uint8_t st_xchar;
+ uint8_t st_left; /* < */
+ uint8_t st_right; /* > */
+ uint8_t st_space; /* white */
+ uint8_t st_other; /* anything else */
+};
+
+static const struct machine statemachine[] = {
+ /* xchar st_xchar st_left st_right st_space st_other */
+ { 0, 0, st_open, st_start, st_start, st_start },
+ { 'a', st_a, st_badtag, st_start, st_open, st_badtag },
+ { 0, 0, st_open, st_open, st_attribute, st_badtag },
+ { 'h', st_h, st_open, st_start, st_attribute, st_badattr },
+ { 'r', st_hr, st_open, st_start, st_attribute, st_badattr },
+ { 'e', st_hre, st_open, st_start, st_attribute, st_badattr },
+ { 'f', st_href, st_open, st_start, st_attribute, st_badattr },
+ { '=', st_hrefeq, st_open, st_start, st_attribute, st_badattr },
+ { '\"', st_hrefqu, st_open, st_start, st_attribute, st_hrefeq },
+ { '\"', st_attribute, st_hrefqu, st_hrefqu, st_hrefqu, st_hrefqu },
+ { '\"', st_badtagqu, st_open, st_start, st_badtag, st_badtag },
+ { '\"', st_badtag, st_badtagqu, st_badtagqu, st_badtagqu, st_badtagqu },
+ { '\"', st_badattrqu, st_open, st_start, st_attribute, st_badattr },
+ { '\"', st_attribute, st_badattrqu, st_badattrqu, st_badattrqu, st_badattrqu },
+};
+
+struct html_entity {
+ uint16_t ucs;
+ const char entity[9];
+};
+
+static const struct html_entity entities[] = {
+ { 34, "quot" },
+ { 38, "amp" },
+ { 60, "lt" },
+ { 62, "gt" },
+#ifdef HTTP_ALL_ENTITIES
+ { 160, "nbsp" },
+ { 161, "iexcl" },
+ { 162, "cent" },
+ { 163, "pound" },
+ { 164, "curren" },
+ { 165, "yen" },
+ { 166, "brvbar" },
+ { 167, "sect" },
+ { 168, "uml" },
+ { 169, "copy" },
+ { 170, "ordf" },
+ { 171, "laquo" },
+ { 172, "not" },
+ { 173, "shy" },
+ { 174, "reg" },
+ { 175, "macr" },
+ { 176, "deg" },
+ { 177, "plusmn" },
+ { 178, "sup2" },
+ { 179, "sup3" },
+ { 180, "acute" },
+ { 181, "micro" },
+ { 182, "para" },
+ { 183, "middot" },
+ { 184, "cedil" },
+ { 185, "sup1" },
+ { 186, "ordm" },
+ { 187, "raquo" },
+ { 188, "frac14" },
+ { 189, "frac12" },
+ { 190, "frac34" },
+ { 191, "iquest" },
+ { 192, "Agrave" },
+ { 193, "Aacute" },
+ { 194, "Acirc" },
+ { 195, "Atilde" },
+ { 196, "Auml" },
+ { 197, "Aring" },
+ { 198, "AElig" },
+ { 199, "Ccedil" },
+ { 200, "Egrave" },
+ { 201, "Eacute" },
+ { 202, "Ecirc" },
+ { 203, "Euml" },
+ { 204, "Igrave" },
+ { 205, "Iacute" },
+ { 206, "Icirc" },
+ { 207, "Iuml" },
+ { 208, "ETH" },
+ { 209, "Ntilde" },
+ { 210, "Ograve" },
+ { 211, "Oacute" },
+ { 212, "Ocirc" },
+ { 213, "Otilde" },
+ { 214, "Ouml" },
+ { 215, "times" },
+ { 216, "Oslash" },
+ { 217, "Ugrave" },
+ { 218, "Uacute" },
+ { 219, "Ucirc" },
+ { 220, "Uuml" },
+ { 221, "Yacute" },
+ { 222, "THORN" },
+ { 223, "szlig" },
+ { 224, "agrave" },
+ { 225, "aacute" },
+ { 226, "acirc" },
+ { 227, "atilde" },
+ { 228, "auml" },
+ { 229, "aring" },
+ { 230, "aelig" },
+ { 231, "ccedil" },
+ { 232, "egrave" },
+ { 233, "eacute" },
+ { 234, "ecirc" },
+ { 235, "euml" },
+ { 236, "igrave" },
+ { 237, "iacute" },
+ { 238, "icirc" },
+ { 239, "iuml" },
+ { 240, "eth" },
+ { 241, "ntilde" },
+ { 242, "ograve" },
+ { 243, "oacute" },
+ { 244, "ocirc" },
+ { 245, "otilde" },
+ { 246, "ouml" },
+ { 247, "divide" },
+ { 248, "oslash" },
+ { 249, "ugrave" },
+ { 250, "uacute" },
+ { 251, "ucirc" },
+ { 252, "uuml" },
+ { 253, "yacute" },
+ { 254, "thorn" },
+ { 255, "yuml" },
+ { 338, "OElig" },
+ { 339, "oelig" },
+ { 352, "Scaron" },
+ { 353, "scaron" },
+ { 376, "Yuml" },
+ { 402, "fnof" },
+ { 710, "circ" },
+ { 732, "tilde" },
+ { 913, "Alpha" },
+ { 914, "Beta" },
+ { 915, "Gamma" },
+ { 916, "Delta" },
+ { 917, "Epsilon" },
+ { 918, "Zeta" },
+ { 919, "Eta" },
+ { 920, "Theta" },
+ { 921, "Iota" },
+ { 922, "Kappa" },
+ { 923, "Lambda" },
+ { 924, "Mu" },
+ { 925, "Nu" },
+ { 926, "Xi" },
+ { 927, "Omicron" },
+ { 928, "Pi" },
+ { 929, "Rho" },
+ { 931, "Sigma" },
+ { 932, "Tau" },
+ { 933, "Upsilon" },
+ { 934, "Phi" },
+ { 935, "Chi" },
+ { 936, "Psi" },
+ { 937, "Omega" },
+ { 945, "alpha" },
+ { 946, "beta" },
+ { 947, "gamma" },
+ { 948, "delta" },
+ { 949, "epsilon" },
+ { 950, "zeta" },
+ { 951, "eta" },
+ { 952, "theta" },
+ { 953, "iota" },
+ { 954, "kappa" },
+ { 955, "lambda" },
+ { 956, "mu" },
+ { 957, "nu" },
+ { 958, "xi" },
+ { 959, "omicron" },
+ { 960, "pi" },
+ { 961, "rho" },
+ { 962, "sigmaf" },
+ { 963, "sigma" },
+ { 964, "tau" },
+ { 965, "upsilon" },
+ { 966, "phi" },
+ { 967, "chi" },
+ { 968, "psi" },
+ { 969, "omega" },
+ { 977, "thetasym" },
+ { 978, "upsih" },
+ { 982, "piv" },
+ { 8194, "ensp" },
+ { 8195, "emsp" },
+ { 8201, "thinsp" },
+ { 8204, "zwnj" },
+ { 8205, "zwj" },
+ { 8206, "lrm" },
+ { 8207, "rlm" },
+ { 8211, "ndash" },
+ { 8212, "mdash" },
+ { 8216, "lsquo" },
+ { 8217, "rsquo" },
+ { 8218, "sbquo" },
+ { 8220, "ldquo" },
+ { 8221, "rdquo" },
+ { 8222, "bdquo" },
+ { 8224, "dagger" },
+ { 8225, "Dagger" },
+ { 8226, "bull" },
+ { 8230, "hellip" },
+ { 8240, "permil" },
+ { 8242, "prime" },
+ { 8243, "Prime" },
+ { 8249, "lsaquo" },
+ { 8250, "rsaquo" },
+ { 8254, "oline" },
+ { 8260, "frasl" },
+ { 8364, "euro" },
+ { 8465, "image" },
+ { 8472, "weierp" },
+ { 8476, "real" },
+ { 8482, "trade" },
+ { 8501, "alefsym" },
+ { 8592, "larr" },
+ { 8593, "uarr" },
+ { 8594, "rarr" },
+ { 8595, "darr" },
+ { 8596, "harr" },
+ { 8629, "crarr" },
+ { 8656, "lArr" },
+ { 8657, "uArr" },
+ { 8658, "rArr" },
+ { 8659, "dArr" },
+ { 8660, "hArr" },
+ { 8704, "forall" },
+ { 8706, "part" },
+ { 8707, "exist" },
+ { 8709, "empty" },
+ { 8711, "nabla" },
+ { 8712, "isin" },
+ { 8713, "notin" },
+ { 8715, "ni" },
+ { 8719, "prod" },
+ { 8721, "sum" },
+ { 8722, "minus" },
+ { 8727, "lowast" },
+ { 8730, "radic" },
+ { 8733, "prop" },
+ { 8734, "infin" },
+ { 8736, "ang" },
+ { 8743, "and" },
+ { 8744, "or" },
+ { 8745, "cap" },
+ { 8746, "cup" },
+ { 8747, "int" },
+ { 8756, "there4" },
+ { 8764, "sim" },
+ { 8773, "cong" },
+ { 8776, "asymp" },
+ { 8800, "ne" },
+ { 8801, "equiv" },
+ { 8804, "le" },
+ { 8805, "ge" },
+ { 8834, "sub" },
+ { 8835, "sup" },
+ { 8836, "nsub" },
+ { 8838, "sube" },
+ { 8839, "supe" },
+ { 8853, "oplus" },
+ { 8855, "otimes" },
+ { 8869, "perp" },
+ { 8901, "sdot" },
+ { 8968, "lceil" },
+ { 8969, "rceil" },
+ { 8970, "lfloor" },
+ { 8971, "rfloor" },
+ { 9001, "lang" },
+ { 9002, "rang" },
+ { 9674, "loz" },
+ { 9824, "spades" },
+ { 9827, "clubs" },
+ { 9829, "hearts" },
+ { 9830, "diams" },
+#endif /* HTTP_ALL_ENTITIES */
+ { 0, "" }
+};
+
+struct entity_state {
+ char entity_buf[16];
+ char *ep;
+};
+
+static char *emit(char *p, int c, struct entity_state *st)
+{
+ const struct html_entity *ent;
+ unsigned int ucs;
+
+ if (!st->ep) {
+ if (c == '&') {
+ /* Entity open */
+ st->ep = st->entity_buf;
+ } else {
+ *p++ = c;
+ }
+ } else {
+ if (c == ';') {
+ st->ep = NULL;
+ *p = '\0';
+ if (st->entity_buf[0] == '#') {
+ if ((st->entity_buf[1] | 0x20)== 'x') {
+ ucs = strtoul(st->entity_buf + 2, NULL, 16);
+ } else {
+ ucs = strtoul(st->entity_buf + 1, NULL, 10);
+ }
+ } else {
+ for (ent = entities; ent->ucs; ent++) {
+ if (!strcmp(st->entity_buf, ent->entity))
+ break;
+ }
+ ucs = ent->ucs;
+ }
+ if (ucs < 32 || ucs >= 0x10ffff)
+ return p; /* Bogus */
+ if (ucs >= 0x10000) {
+ *p++ = 0xf0 + (ucs >> 18);
+ *p++ = 0x80 + ((ucs >> 12) & 0x3f);
+ *p++ = 0x80 + ((ucs >> 6) & 0x3f);
+ *p++ = 0x80 + (ucs & 0x3f);
+ } else if (ucs >= 0x800) {
+ *p++ = 0xe0 + (ucs >> 12);
+ *p++ = 0x80 + ((ucs >> 6) & 0x3f);
+ *p++ = 0x80 + (ucs & 0x3f);
+ } else if (ucs >= 0x80) {
+ *p++ = 0xc0 + (ucs >> 6);
+ *p++ = 0x80 + (ucs & 0x3f);
+ } else {
+ *p++ = ucs;
+ }
+ } else if (st->ep < st->entity_buf + sizeof st->entity_buf - 1) {
+ *st->ep++ = c;
+ }
+ }
+ return p;
+}
+
+static const char *http_get_filename(struct inode *inode, char *buf)
+{
+ int c, lc;
+ char *p;
+ const struct machine *sm;
+ struct entity_state es;
+ enum http_readdir_state state = st_start;
+ enum http_readdir_state pstate = st_start;
+
+ memset(&es, 0, sizeof es);
+
+ p = buf;
+ for (;;) {
+ c = pxe_getc(inode);
+ if (c == -1)
+ return NULL;
+
+ lc = tolower(c);
+
+ sm = &statemachine[state];
+
+ if (lc == sm->xchar)
+ state = sm->st_xchar;
+ else if (c == '<')
+ state = sm->st_left;
+ else if (c == '>')
+ state = sm->st_right;
+ else if (isspace(c))
+ state = sm->st_space;
+ else
+ state = sm->st_other;
+
+ if (state == st_hrefeq || state == st_hrefqu) {
+ if (state != pstate)
+ p = buf;
+ else if (p < buf + FILENAME_MAX)
+ p = emit(p, c, &es);
+ pstate = state;
+ } else {
+ if (pstate != st_start)
+ pstate = st_start;
+ if (p != buf && state == st_start) {
+ *p = '\0';
+ return buf;
+ }
+ }
+ }
+}
+
+int http_readdir(struct inode *inode, struct dirent *dirent)
+{
+ char buf[FILENAME_MAX + 6];
+ const char *fn, *sp;
+
+ for (;;) {
+ fn = http_get_filename(inode, buf);
+
+ if (!fn)
+ return -1; /* End of directory */
+
+ /* Ignore entries with http special characters */
+ if (strchr(fn, '#'))
+ continue;
+ if (strchr(fn, '?'))
+ continue;
+
+ /* A slash if present has to be the last character, and not the first */
+ sp = strchr(fn, '/');
+ if (sp) {
+ if (sp == fn || sp[1])
+ continue;
+ } else {
+ sp = strchr(fn, '\0');
+ }
+
+ if (sp > fn + NAME_MAX)
+ continue;
+
+ dirent->d_ino = 0; /* Not applicable */
+ dirent->d_off = 0; /* Not applicable */
+ dirent->d_reclen = offsetof(struct dirent, d_name) + (sp-fn) + 1;
+ dirent->d_type = *sp == '/' ? DT_DIR : DT_REG;
+ memcpy(dirent->d_name, fn, sp-fn);
+ dirent->d_name[sp-fn] = '\0';
+ return 0;
+ }
+}