mlir/lib/AsmParser/Token.cpp


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215

//===- Token.cpp - MLIR Token Implementation ------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements the Token class for the MLIR textual form.
//
//===----------------------------------------------------------------------===//

#include "Token.h"
#include "llvm/ADT/StringExtras.h"
#include <optional>

using namespace mlir;

SMLoc Token::getLoc() const { return SMLoc::getFromPointer(spelling.data()); }

SMLoc Token::getEndLoc() const {
  return SMLoc::getFromPointer(spelling.data() + spelling.size());
}

SMRange Token::getLocRange() const { return SMRange(getLoc(), getEndLoc()); }

/// For an integer token, return its value as an unsigned.  If it doesn't fit,
/// return std::nullopt.
std::optional<unsigned> Token::getUnsignedIntegerValue() const {
  bool isHex = spelling.size() > 1 && spelling[1] == 'x';

  unsigned result = 0;
  if (spelling.getAsInteger(isHex ? 0 : 10, result))
    return std::nullopt;
  return result;
}

/// For an integer token, return its value as a uint64_t.  If it doesn't fit,
/// return std::nullopt.
std::optional<uint64_t> Token::getUInt64IntegerValue(StringRef spelling) {
  bool isHex = spelling.size() > 1 && spelling[1] == 'x';

  uint64_t result = 0;
  if (spelling.getAsInteger(isHex ? 0 : 10, result))
    return std::nullopt;
  return result;
}

/// For a floatliteral, return its value as a double. Return std::nullopt if the
/// value underflows or overflows.
std::optional<double> Token::getFloatingPointValue() const {
  double result = 0;
  if (spelling.getAsDouble(result))
    return std::nullopt;
  return result;
}

/// For an inttype token, return its bitwidth.
std::optional<unsigned> Token::getIntTypeBitwidth() const {
  assert(getKind() == inttype);
  unsigned bitwidthStart = (spelling[0] == 'i' ? 1 : 2);
  unsigned result = 0;
  if (spelling.drop_front(bitwidthStart).getAsInteger(10, result))
    return std::nullopt;
  return result;
}

std::optional<bool> Token::getIntTypeSignedness() const {
  assert(getKind() == inttype);
  if (spelling[0] == 'i')
    return std::nullopt;
  if (spelling[0] == 's')
    return true;
  assert(spelling[0] == 'u');
  return false;
}

/// Given a token containing a string literal, return its value, including
/// removing the quote characters and unescaping the contents of the string. The
/// lexer has already verified that this token is valid.
std::string Token::getStringValue() const {
  assert(getKind() == string || getKind() == code_complete ||
         (getKind() == at_identifier && getSpelling()[1] == '"'));
  // Start by dropping the quotes.
  StringRef bytes = getSpelling().drop_front();
  if (getKind() != Token::code_complete) {
    bytes = bytes.drop_back();
    if (getKind() == at_identifier)
      bytes = bytes.drop_front();
  }

  std::string result;
  result.reserve(bytes.size());
  for (unsigned i = 0, e = bytes.size(); i != e;) {
    auto c = bytes[i++];
    if (c != '\\') {
      result.push_back(c);
      continue;
    }

    assert(i + 1 <= e && "invalid string should be caught by lexer");
    auto c1 = bytes[i++];
    switch (c1) {
    case '"':
    case '\\':
      result.push_back(c1);
      continue;
    case 'n':
      result.push_back('\n');
      continue;
    case 't':
      result.push_back('\t');
      continue;
    default:
      break;
    }

    assert(i + 1 <= e && "invalid string should be caught by lexer");
    auto c2 = bytes[i++];

    assert(llvm::isHexDigit(c1) && llvm::isHexDigit(c2) && "invalid escape");
    result.push_back((llvm::hexDigitValue(c1) << 4) | llvm::hexDigitValue(c2));
  }

  return result;
}

/// Given a token containing a hex string literal, return its value or
/// std::nullopt if the token does not contain a valid hex string.
std::optional<std::string> Token::getHexStringValue() const {
  assert(getKind() == string);

  // Get the internal string data, without the quotes.
  StringRef bytes = getSpelling().drop_front().drop_back();

  // Try to extract the binary data from the hex string. We expect the hex
  // string to start with `0x` and have an even number of hex nibbles (nibbles
  // should come in pairs).
  std::string hex;
  if (!bytes.consume_front("0x") || (bytes.size() & 1) ||
      !llvm::tryGetFromHex(bytes, hex))
    return std::nullopt;
  return hex;
}

/// Given a token containing a symbol reference, return the unescaped string
/// value.
std::string Token::getSymbolReference() const {
  assert(is(Token::at_identifier) && "expected valid @-identifier");
  StringRef nameStr = getSpelling().drop_front();

  // Check to see if the reference is a string literal, or a bare identifier.
  if (nameStr.front() == '"')
    return getStringValue();
  return std::string(nameStr);
}

/// Given a hash_identifier token like #123, try to parse the number out of
/// the identifier, returning std::nullopt if it is a named identifier like #x
/// or if the integer doesn't fit.
std::optional<unsigned> Token::getHashIdentifierNumber() const {
  assert(getKind() == hash_identifier);
  unsigned result = 0;
  if (spelling.drop_front().getAsInteger(10, result))
    return std::nullopt;
  return result;
}

/// Given a punctuation or keyword token kind, return the spelling of the
/// token as a string.  Warning: This will abort on markers, identifiers and
/// literal tokens since they have no fixed spelling.
StringRef Token::getTokenSpelling(Kind kind) {
  switch (kind) {
  default:
    llvm_unreachable("This token kind has no fixed spelling");
#define TOK_PUNCTUATION(NAME, SPELLING)                                        \
  case NAME:                                                                   \
    return SPELLING;
#define TOK_KEYWORD(SPELLING)                                                  \
  case kw_##SPELLING:                                                          \
    return #SPELLING;
#include "TokenKinds.def"
  }
}

/// Return true if this is one of the keyword token kinds (e.g. kw_if).
bool Token::isKeyword() const {
  switch (kind) {
  default:
    return false;
#define TOK_KEYWORD(SPELLING)                                                  \
  case kw_##SPELLING:                                                          \
    return true;
#include "TokenKinds.def"
  }
}

bool Token::isCodeCompletionFor(Kind kind) const {
  if (!isCodeCompletion() || spelling.empty())
    return false;
  switch (kind) {
  case Kind::string:
    return spelling[0] == '"';
  case Kind::hash_identifier:
    return spelling[0] == '#';
  case Kind::percent_identifier:
    return spelling[0] == '%';
  case Kind::caret_identifier:
    return spelling[0] == '^';
  case Kind::exclamation_identifier:
    return spelling[0] == '!';
  default:
    return false;
  }
}