deps/v8/test/mjsunit/whitespaces.js


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130

// Copyright 2014 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

var whitespaces = [
  // WhiteSpace defined in ECMA-262 5.1, 7.2
  0x0009,  // Tab                  TAB
  0x000B,  // Vertical Tab         VT
  0x000C,  // Form Feed            FF
  0x0020,  // Space                SP
  0x00A0,  // No-break space       NBSP
  0xFEFF,  // Byte Order Mark      BOM

  // LineTerminator defined in ECMA-262 5.1, 7.3
  0x000A,  // Line Feed            LF
  0x000D,  // Carriage Return      CR
  0x2028,  // Line Separator       LS
  0x2029,  // Paragraph Separator  PS

  // Unicode 6.3.0 whitespaces (category 'Zs')
  0x1680,  // Ogham Space Mark
  0x2000,  // EN QUAD
  0x2001,  // EM QUAD
  0x2002,  // EN SPACE
  0x2003,  // EM SPACE
  0x2004,  // THREE-PER-EM SPACE
  0x2005,  // FOUR-PER-EM SPACE
  0x2006,  // SIX-PER-EM SPACE
  0x2007,  // FIGURE SPACE
  0x2008,  // PUNCTUATION SPACE
  0x2009,  // THIN SPACE
  0x200A,  // HAIR SPACE
  0x2028,  // LINE SEPARATOR
  0x2029,  // PARAGRAPH SEPARATOR
  0x202F,  // NARROW NO-BREAK SPACE
  0x205F,  // MEDIUM MATHEMATICAL SPACE
  0x3000,  // IDEOGRAPHIC SPACE
];

// Add single twobyte char to force twobyte representation.
// Interestingly, snowman is not "white" space :)
var twobyte = "\u2603";
var onebyte = "\u007E";
var twobytespace = "\u2000";
var onebytespace = "\u0020";

function is_whitespace(c) {
  return whitespaces.indexOf(c.charCodeAt(0)) > -1;
}

function test_regexp(str) {
  var pos_match = str.match(/\s/);
  var neg_match = str.match(/\S/);
  var test_char = str[0];
  var postfix = str[1];
  if (is_whitespace(test_char)) {
    assertEquals(test_char, pos_match[0]);
    assertEquals(postfix, neg_match[0]);
  } else {
    assertEquals(test_char, neg_match[0]);
    assertNull(pos_match);
  }
}

function test_trim(c, infix) {
  var str = c + c + c + infix + c;
  if (is_whitespace(c)) {
    assertEquals(infix, str.trim());
  } else {
    assertEquals(str, str.trim());
  }
}

function test_parseInt(c, postfix) {
  // Skip if prefix is a digit.
  if (c >= "0" && c <= "9") return;
  var str = c + c + "123" + postfix;
  if (is_whitespace(c)) {
    assertEquals(123, parseInt(str));
  } else {
    assertEquals(NaN, parseInt(str));
  }
}

function test_eval(c, content) {
  if (!is_whitespace(c)) return;
  var str = c + c + "'" + content + "'" + c + c;
  assertEquals(content, eval(str));
}

function test_stringtonumber(c, postfix) {
  // Skip if prefix is a digit.
  if (c >= "0" && c <= "9") return;
  var result = 1 + Number(c + "123" + c + postfix);
  if (is_whitespace(c)) {
    assertEquals(124, result);
  } else {
    assertEquals(NaN, result);
  }
}

// Test is split into parts to increase parallelism.
const number_of_tests = 10;
const max_codepoint = 0x10000;

function firstCodePointOfRange(i) {
  return Math.floor(i * (max_codepoint / number_of_tests));
}

function testCodePointRange(i) {
  assertTrue(i >= 0 && i < number_of_tests);

  const from = firstCodePointOfRange(i);
  const to = (i == number_of_tests - 1)
      ? max_codepoint : firstCodePointOfRange(i + 1);

  for (let i = from; i < to; i++) {
    c = String.fromCharCode(i);
    test_regexp(c + onebyte);
    test_regexp(c + twobyte);
    test_trim(c, onebyte + "trim");
    test_trim(c, twobyte + "trim");
    test_parseInt(c, onebyte);
    test_parseInt(c, twobyte);
    test_eval(c, onebyte);
    test_eval(c, twobyte);
    test_stringtonumber(c, onebytespace);
    test_stringtonumber(c, twobytespace);
  }
}