Prefer more inline functions in character.h

* src/buffer.h (fetch_char_advance, fetch_char_advance_no_check) (buf_next_char_len, next_char_len, buf_prev_char_len) (prev_char_len, inc_both, dec_both): New inline functions, replacing the old character.h macros FETCH_CHAR_ADVANCE, FETCH_CHAR_ADVANCE_NO_CHECK, BUF_INC_POS, INC_POS, BUF_DEC_POS, DEC_POS, INC_BOTH, DEC_BOTH respectively. All callers changed. These new functions all assume buffer primitives and so need to be here rather than in character.h. * src/casefiddle.c (make_char_unibyte): New static function, replacing the old MAKE_CHAR_UNIBYTE macro. All callers changed. (do_casify_unibyte_string): Use SINGLE_BYTE_CHAR_P instead of open-coding it. * src/ccl.c (GET_TRANSLATION_TABLE): New static function, replacing the old macro of the same name. * src/character.c (string_char): Omit 2nd arg. 3rd arg can no longer be NULL. All callers changed. * src/character.h (SINGLE_BYTE_CHAR_P): Move up. (MAKE_CHAR_UNIBYTE, MAKE_CHAR_MULTIBYTE, PREV_CHAR_BOUNDARY) (STRING_CHAR_AND_LENGTH, STRING_CHAR_ADVANCE) (FETCH_STRING_CHAR_ADVANCE) (FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE) (FETCH_STRING_CHAR_ADVANCE_NO_CHECK, FETCH_CHAR_ADVANCE) (FETCH_CHAR_ADVANCE_NO_CHECK, INC_POS, DEC_POS, INC_BOTH) (DEC_BOTH, BUF_INC_POS, BUF_DEC_POS): Remove. (make_char_multibyte): New static function, replacing the old macro MAKE_CHAR_MULTIBYTE. All callers changed. (CHAR_STRING_ADVANCE): Remove; all callers changed to use CHAR_STRING. (NEXT_CHAR_BOUNDARY): Remove; it was unused. (raw_prev_char_len): New inline function, replacing the old PREV_CHAR_BOUNDARY macro. All callers changed. (string_char_and_length): New inline function, replacing the old STRING_CHAR_AND_LENGTH macro. All callers changed. (STRING_CHAR): Rewrite in terms of string_char_and_length. (string_char_advance): New inline function, replacing the old STRING_CHAR_ADVANCE macro. All callers changed. (fetch_string_char_advance): New inline function, replacing the old FETCH_STRING_CHAR_ADVANCE macro. All callers changed. (fetch_string_char_as_multibyte_advance): New inline function, replacing the old FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE macro. All callers changed. (fetch_string_char_advance_no_check): New inline function, replacing the old FETCH_STRING_CHAR_ADVANCE_NO_CHECK macro. All callers changed. * src/regex-emacs.c (HEAD_ADDR_VSTRING): Remove; no longer used. * src/syntax.c (scan_lists): Use dec_bytepos instead of open-coding it. * src/xdisp.c (string_char_and_length): Rename from string_char_and_length to avoid name conflict with new function in character.h. All callers changed.
author: Paul Eggert <eggert@cs.ucla.edu> 2020-04-17 07:57:25 -0700
committer: Paul Eggert <eggert@cs.ucla.edu> 2020-04-17 09:17:35 -0700
commit: 27d101832ada36e431ae6cdecb5c82a180566377 (patch)
tree: 13e20d71f22cf4736bbfa02be54735b1484610bb /src/character.h
parent: 3e46a2315f1a999f5811f57a60a2a55f95d8fbb0 (diff)
download: emacs-27d101832ada36e431ae6cdecb5c82a180566377.tar.gz
1 files changed, 140 insertions, 309 deletions
diff --git a/src/character.h b/src/character.h
index d4bc718af72..81320dedd17 100644
--- a/src/character.h
+++ b/src/character.h
@@ -81,14 +81,20 @@ enum
 };
 
 extern int char_string (unsigned, unsigned char *);
-extern int string_char (const unsigned char *,
-                        const unsigned char **, int *);
+extern int string_char (const unsigned char *, int *);
 
 /* UTF-8 encodings.  Use \x escapes, so they are portable to pre-C11
    compilers and can be concatenated with ordinary string literals.  */
 #define uLSQM "\xE2\x80\x98" /* U+2018 LEFT SINGLE QUOTATION MARK */
 #define uRSQM "\xE2\x80\x99" /* U+2019 RIGHT SINGLE QUOTATION MARK */
 
+/* True iff C is a character of code less than 0x100.  */
+INLINE bool
+SINGLE_BYTE_CHAR_P (intmax_t c)
+{
+  return 0 <= c && c < 0x100;
+}
+
 /* True iff C is a character that corresponds to a raw 8-bit
    byte.  */
 INLINE bool
@@ -133,17 +139,13 @@ CHAR_BYTE8_HEAD_P (int byte)
   return byte == 0xC0 || byte == 0xC1;
 }
 
-/* If C is not ASCII, make it unibyte. */
-#define MAKE_CHAR_UNIBYTE(c)	\
-  do {				\
-    if (! ASCII_CHAR_P (c))	\
-      c = CHAR_TO_BYTE8 (c);	\
-  } while (false)
-
-
 /* If C is not ASCII, make it multibyte.  Assumes C < 256.  */
-#define MAKE_CHAR_MULTIBYTE(c) \
-  (eassert ((c) >= 0 && (c) < 256), (c) = UNIBYTE_TO_CHAR (c))
+INLINE int
+make_char_multibyte (int c)
+{
+  eassert (SINGLE_BYTE_CHAR_P (c));
+  return UNIBYTE_TO_CHAR (c);
+}
 
 /* This is the maximum byte length of multibyte form.  */
 enum { MAX_MULTIBYTE_LENGTH = 5 };
@@ -181,13 +183,6 @@ CHECK_CHARACTER_CDR (Lisp_Object x)
   CHECK_CHARACTER (XCDR (x));
 }
 
-/* True iff C is a character of code less than 0x100.  */
-INLINE bool
-SINGLE_BYTE_CHAR_P (intmax_t c)
-{
-  return 0 <= c && c < 0x100;
-}
-
 /* True if character C has a printable glyph.  */
 INLINE bool
 CHAR_PRINTABLE_P (int c)
@@ -264,29 +259,6 @@ BYTE8_STRING (int b, unsigned char *p)
 }
 
 
-/* Store multibyte form of the character C in P and advance P to the
-   end of the multibyte form.  The caller should allocate at least
-   MAX_MULTIBYTE_LENGTH bytes area at P in advance.  */
-
-#define CHAR_STRING_ADVANCE(c, p)		\
-  do {						\
-    if ((c) <= MAX_1_BYTE_CHAR)			\
-      *(p)++ = (c);				\
-    else if ((c) <= MAX_2_BYTE_CHAR)		\
-      *(p)++ = (0xC0 | ((c) >> 6)),		\
-	*(p)++ = (0x80 | ((c) & 0x3F));		\
-    else if ((c) <= MAX_3_BYTE_CHAR)		\
-      *(p)++ = (0xE0 | ((c) >> 12)),		\
-	*(p)++ = (0x80 | (((c) >> 6) & 0x3F)),	\
-	*(p)++ = (0x80 | ((c) & 0x3F));		\
-    else					\
-      {						\
-	verify (sizeof (c) <= sizeof (unsigned));	\
-	(p) += char_string (c, p);		\
-      }						\
-  } while (false)
-
-
 /* True iff BYTE starts a non-ASCII character in a multibyte form.  */
 INLINE bool
 LEADING_CODE_P (int byte)
@@ -365,281 +337,144 @@ MULTIBYTE_LENGTH_NO_CHECK (unsigned char const *p)
 	  : 0);
 }
 
-/* If P is before LIMIT, advance P to the next character boundary.
+
+/* Return number of bytes in the multibyte character just before P.
    Assumes that P is already at a character boundary of the same
-   multibyte form whose end address is LIMIT.  */
+   multibyte form, and is not at the start of that form.  */
 
-#define NEXT_CHAR_BOUNDARY(p, limit)	\
-  do {					\
-    if ((p) < (limit))			\
-      (p) += BYTES_BY_CHAR_HEAD (*(p));	\
-  } while (false)
+INLINE int
+raw_prev_char_len (unsigned char const *p)
+{
+  for (int len = 1; ; len++)
+    if (CHAR_HEAD_P (p[-len]))
+      return len;
+}
 
 
-/* If P is after LIMIT, advance P to the previous character boundary.
-   Assumes that P is already at a character boundary of the same
-   multibyte form whose beginning address is LIMIT.  */
-
-#define PREV_CHAR_BOUNDARY(p, limit)					\
-  do {									\
-    if ((p) > (limit))							\
-      {									\
-	const unsigned char *chp = (p);					\
-	do {								\
-	  chp--;							\
-	} while (chp >= limit && ! CHAR_HEAD_P (*chp));			\
-	(p) = (BYTES_BY_CHAR_HEAD (*chp) == (p) - chp) ? chp : (p) - 1;	\
-      }									\
-  } while (false)
+/* Return the character code of character whose multibyte form is at P,
+   and set *LENGTH to its length.  */
+
+INLINE int
+string_char_and_length (unsigned char const *p, int *length)
+{
+  int c, len;
+
+  if (! (p[0] & 0x80))
+    {
+      len = 1;
+      c = p[0];
+    }
+  else if (! (p[0] & 0x20))
+    {
+      len = 2;
+      c = ((((p[0] & 0x1F) << 6)
+	    | (p[1] & 0x3F))
+	   + (p[0] < 0xC2 ? 0x3FFF80 : 0));
+    }
+  else if (! (p[0] & 0x10))
+    {
+      len = 3;
+      c = (((p[0] & 0x0F) << 12)
+	   | ((p[1] & 0x3F) << 6)
+	   | (p[2] & 0x3F));
+    }
+  else
+    c = string_char (p, &len);
+
+  eassume (0 < len && len <= MAX_MULTIBYTE_LENGTH);
+  *length = len;
+  return c;
+}
 
 /* Return the character code of character whose multibyte form is at P.  */
 
 INLINE int
 STRING_CHAR (unsigned char const *p)
 {
-  return (!(p[0] & 0x80)
-	  ? p[0]
-	  : ! (p[0] & 0x20)
-	  ? ((((p[0] & 0x1F) << 6)
-	      | (p[1] & 0x3F))
-	     + (p[0] < 0xC2 ? 0x3FFF80 : 0))
-	  : ! (p[0] & 0x10)
-	  ? (((p[0] & 0x0F) << 12)
-	     | ((p[1] & 0x3F) << 6)
-	     | (p[2] & 0x3F))
-	  : string_char (p, NULL, NULL));
-}
-
-
-/* Like STRING_CHAR, but set ACTUAL_LEN to the length of multibyte
-   form.  */
-
-#define STRING_CHAR_AND_LENGTH(p, actual_len)			\
-  (!((p)[0] & 0x80)						\
-   ? ((actual_len) = 1, (p)[0])					\
-   : ! ((p)[0] & 0x20)						\
-   ? ((actual_len) = 2,						\
-      (((((p)[0] & 0x1F) << 6)					\
-	| ((p)[1] & 0x3F))					\
-       + (((unsigned char) (p)[0]) < 0xC2 ? 0x3FFF80 : 0)))	\
-   : ! ((p)[0] & 0x10)						\
-   ? ((actual_len) = 3,						\
-      ((((p)[0] & 0x0F) << 12)					\
-       | (((p)[1] & 0x3F) << 6)					\
-       | ((p)[2] & 0x3F)))					\
-   : string_char ((p), NULL, &actual_len))
-
-
-/* Like STRING_CHAR, but advance P to the end of multibyte form.  */
-
-#define STRING_CHAR_ADVANCE(p)					\
-  (!((p)[0] & 0x80)						\
-   ? *(p)++							\
-   : ! ((p)[0] & 0x20)						\
-   ? ((p) += 2,							\
-      ((((p)[-2] & 0x1F) << 6)					\
-       | ((p)[-1] & 0x3F)					\
-       | ((unsigned char) ((p)[-2]) < 0xC2 ? 0x3FFF80 : 0)))	\
-   : ! ((p)[0] & 0x10)						\
-   ? ((p) += 3,							\
-      ((((p)[-3] & 0x0F) << 12)					\
-       | (((p)[-2] & 0x3F) << 6)				\
-       | ((p)[-1] & 0x3F)))					\
-   : string_char ((p), &(p), NULL))
-
-
-/* Fetch the "next" character from Lisp string STRING at byte position
-   BYTEIDX, character position CHARIDX.  Store it into OUTPUT.
-
-   All the args must be side-effect-free.
-   BYTEIDX and CHARIDX must be lvalues;
-   we increment them past the character fetched.  */
-
-#define FETCH_STRING_CHAR_ADVANCE(OUTPUT, STRING, CHARIDX, BYTEIDX)	\
-  do                                                                    \
-    {									\
-      CHARIDX++;							\
-      if (STRING_MULTIBYTE (STRING))					\
-	{								\
-	  unsigned char *chp = &SDATA (STRING)[BYTEIDX];		\
-	  int chlen;							\
-									\
-	  OUTPUT = STRING_CHAR_AND_LENGTH (chp, chlen);			\
-	  BYTEIDX += chlen;						\
-	}								\
-      else								\
-	{								\
-	  OUTPUT = SREF (STRING, BYTEIDX);				\
-	  BYTEIDX++;							\
-	}								\
-    }									\
-  while (false)
-
-/* Like FETCH_STRING_CHAR_ADVANCE, but return a multibyte character
+  int len;
+  return string_char_and_length (p, &len);
+}
+
+
+/* Like STRING_CHAR (*PP), but advance *PP to the end of multibyte form.  */
+
+INLINE int
+string_char_advance (unsigned char const **pp)
+{
+  unsigned char const *p = *pp;
+  int len, c = string_char_and_length (p, &len);
+  *pp = p + len;
+  return c;
+}
+
+
+/* Return the next character from Lisp string STRING at byte position
+   *BYTEIDX, character position *CHARIDX.  Update *BYTEIDX and
+   *CHARIDX past the character fetched.  */
+
+INLINE int
+fetch_string_char_advance (Lisp_Object string,
+			   ptrdiff_t *charidx, ptrdiff_t *byteidx)
+{
+  int output;
+  ptrdiff_t b = *byteidx;
+  unsigned char *chp = SDATA (string) + b;
+  if (STRING_MULTIBYTE (string))
+    {
+      int chlen;
+      output = string_char_and_length (chp, &chlen);
+      b += chlen;
+    }
+  else
+    {
+      output = *chp;
+      b++;
+    }
+  (*charidx)++;
+  *byteidx = b;
+  return output;
+}
+
+/* Like fetch_string_char_advance, but return a multibyte character
    even if STRING is unibyte.  */
 
-#define FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE(OUTPUT, STRING, CHARIDX, BYTEIDX) \
-  do                                                                          \
-    {									      \
-      CHARIDX++;							      \
-      if (STRING_MULTIBYTE (STRING))					      \
-	{								      \
-	  unsigned char *chp = &SDATA (STRING)[BYTEIDX];		      \
-	  int chlen;							      \
-									      \
-	  OUTPUT = STRING_CHAR_AND_LENGTH (chp, chlen);			      \
-	  BYTEIDX += chlen;						      \
-	}								      \
-      else								      \
-	{								      \
-	  OUTPUT = SREF (STRING, BYTEIDX);				      \
-	  BYTEIDX++;							      \
-	  MAKE_CHAR_MULTIBYTE (OUTPUT);					      \
-	}								      \
-    }									      \
-  while (false)
-
-
-/* Like FETCH_STRING_CHAR_ADVANCE, but assumes STRING is multibyte.  */
-
-#define FETCH_STRING_CHAR_ADVANCE_NO_CHECK(OUTPUT, STRING, CHARIDX, BYTEIDX) \
-  do    								     \
-    {									     \
-      unsigned char *fetch_ptr = &SDATA (STRING)[BYTEIDX];		     \
-      int fetch_len;							     \
-									     \
-      OUTPUT = STRING_CHAR_AND_LENGTH (fetch_ptr, fetch_len);		     \
-      BYTEIDX += fetch_len;						     \
-      CHARIDX++;							     \
-    }									     \
-  while (false)
-
-
-/* Like FETCH_STRING_CHAR_ADVANCE, but fetch character from the current
-   buffer.  */
-
-#define FETCH_CHAR_ADVANCE(OUTPUT, CHARIDX, BYTEIDX)		\
-  do    							\
-    {								\
-      CHARIDX++;						\
-      if (!NILP (BVAR (current_buffer, enable_multibyte_characters)))	\
-	{							\
-	  unsigned char *chp = BYTE_POS_ADDR (BYTEIDX);		\
-	  int chlen;						\
-								\
-	  OUTPUT = STRING_CHAR_AND_LENGTH (chp, chlen);		\
-	  BYTEIDX += chlen;					\
-	}							\
-      else							\
-	{							\
-	  OUTPUT = *(BYTE_POS_ADDR (BYTEIDX));			\
-	  BYTEIDX++;						\
-	}							\
-    }								\
-  while (false)
-
-
-/* Like FETCH_CHAR_ADVANCE, but assumes the current buffer is multibyte.  */
-
-#define FETCH_CHAR_ADVANCE_NO_CHECK(OUTPUT, CHARIDX, BYTEIDX)	\
-  do    							\
-    {								\
-      unsigned char *chp = BYTE_POS_ADDR (BYTEIDX);		\
-      int chlen;							\
-								\
-      OUTPUT = STRING_CHAR_AND_LENGTH (chp, chlen);		\
-      BYTEIDX += chlen;						\
-      CHARIDX++;						\
-    }								\
-  while (false)
-
-
-/* Increment the buffer byte position POS_BYTE of the current buffer to
-   the next character boundary.  No range checking of POS.  */
-
-#define INC_POS(pos_byte)				\
-  do {							\
-    unsigned char *chp = BYTE_POS_ADDR (pos_byte);	\
-    pos_byte += BYTES_BY_CHAR_HEAD (*chp);		\
-  } while (false)
-
-
-/* Decrement the buffer byte position POS_BYTE of the current buffer to
-   the previous character boundary.  No range checking of POS.  */
-
-#define DEC_POS(pos_byte)			\
-  do {						\
-    unsigned char *chp;				\
-    						\
-    pos_byte--;					\
-    if (pos_byte < GPT_BYTE)			\
-      chp = BEG_ADDR + pos_byte - BEG_BYTE;	\
-    else					\
-      chp = BEG_ADDR + GAP_SIZE + pos_byte - BEG_BYTE; \
-    while (!CHAR_HEAD_P (*chp))			\
-      {						\
-	chp--;					\
-	pos_byte--;				\
-      }						\
-  } while (false)
-
-/* Increment both CHARPOS and BYTEPOS, each in the appropriate way.  */
-
-#define INC_BOTH(charpos, bytepos)				\
-  do								\
-    {								\
-      (charpos)++;						\
-      if (NILP (BVAR (current_buffer, enable_multibyte_characters)))	\
-	(bytepos)++;						\
-      else							\
-	INC_POS ((bytepos));					\
-    }								\
-  while (false)
-
-
-/* Decrement both CHARPOS and BYTEPOS, each in the appropriate way.  */
-
-#define DEC_BOTH(charpos, bytepos)				\
-  do								\
-    {								\
-      (charpos)--;						\
-      if (NILP (BVAR (current_buffer, enable_multibyte_characters)))	\
-	(bytepos)--;						\
-      else							\
-	DEC_POS ((bytepos));					\
-    }								\
-  while (false)
-
-
-/* Increment the buffer byte position POS_BYTE of the current buffer to
-   the next character boundary.  This macro relies on the fact that
-   *GPT_ADDR and *Z_ADDR are always accessible and the values are
-   '\0'.  No range checking of POS_BYTE.  */
-
-#define BUF_INC_POS(buf, pos_byte)				\
-  do {								\
-    unsigned char *chp = BUF_BYTE_ADDRESS (buf, pos_byte);	\
-    pos_byte += BYTES_BY_CHAR_HEAD (*chp);			\
-  } while (false)
-
-
-/* Decrement the buffer byte position POS_BYTE of the current buffer to
-   the previous character boundary.  No range checking of POS_BYTE.  */
-
-#define BUF_DEC_POS(buf, pos_byte)					\
-  do {									\
-    unsigned char *chp;							\
-    pos_byte--;								\
-    if (pos_byte < BUF_GPT_BYTE (buf))					\
-      chp = BUF_BEG_ADDR (buf) + pos_byte - BEG_BYTE;			\
-    else								\
-      chp = BUF_BEG_ADDR (buf) + BUF_GAP_SIZE (buf) + pos_byte - BEG_BYTE;\
-    while (!CHAR_HEAD_P (*chp))						\
-      {									\
-	chp--;								\
-	pos_byte--;							\
-      }									\
-  } while (false)
+INLINE int
+fetch_string_char_as_multibyte_advance (Lisp_Object string,
+					ptrdiff_t *charidx, ptrdiff_t *byteidx)
+{
+  int output;
+  ptrdiff_t b = *byteidx;
+  unsigned char *chp = SDATA (string) + b;
+  if (STRING_MULTIBYTE (string))
+    {
+      int chlen;
+      output = string_char_and_length (chp, &chlen);
+      b += chlen;
+    }
+  else
+    {
+      output = make_char_multibyte (*chp);
+      b++;
+    }
+  (*charidx)++;
+  *byteidx = b;
+  return output;
+}
+
+
+/* Like fetch_string_char_advance, but assumes STRING is multibyte.  */
+
+INLINE int
+fetch_string_char_advance_no_check (Lisp_Object string,
+				    ptrdiff_t *charidx, ptrdiff_t *byteidx)
+{
+  ptrdiff_t b = *byteidx;
+  unsigned char *chp = SDATA (string) + b;
+  int chlen, output = string_char_and_length (chp, &chlen);
+  (*charidx)++;
+  *byteidx = b + chlen;
+  return output;
+}
 
 
 /* If C is a variation selector, return the index of the
@@ -728,10 +563,6 @@ extern bool graphicp (int);
 extern bool printablep (int);
 extern bool blankp (int);
 
-/* Return a translation table of id number ID.  */
-#define GET_TRANSLATION_TABLE(id) \
-  (XCDR (XVECTOR (Vtranslation_table_vector)->contents[(id)]))
-
 /* Look up the element in char table OBJ at index CH, and return it as
    an integer.  If the element is not a character, return CH itself.  */
author	Paul Eggert <eggert@cs.ucla.edu>	2020-04-17 07:57:25 -0700
committer	Paul Eggert <eggert@cs.ucla.edu>	2020-04-17 09:17:35 -0700
commit	27d101832ada36e431ae6cdecb5c82a180566377 (patch)
tree	13e20d71f22cf4736bbfa02be54735b1484610bb /src/character.h
parent	3e46a2315f1a999f5811f57a60a2a55f95d8fbb0 (diff)
download	emacs-27d101832ada36e431ae6cdecb5c82a180566377.tar.gz