summaryrefslogtreecommitdiff
path: root/Parser/tokenizer.c
diff options
context:
space:
mode:
Diffstat (limited to 'Parser/tokenizer.c')
-rw-r--r--Parser/tokenizer.c23
1 files changed, 22 insertions, 1 deletions
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index 8f67e0e62a..0ccd02b58d 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -1601,7 +1601,28 @@ PyTokenizer_RestoreEncoding(struct tok_state* tok, int len, int *offset)
}
#endif
-
+/* Get -*- encoding -*- from a Python file
+
+ PyTokenizer_FindEncoding returns NULL when it can't find the encoding in
+ the first or second line of the file. In this case the encoding is
+ PyUnicode_GetDefaultEncoding().
+*/
+char *
+PyTokenizer_FindEncoding(FILE *fp) {
+ struct tok_state *tok;
+ char *p_start=NULL, *p_end=NULL;
+
+ if ((tok = PyTokenizer_FromFile(fp, NULL, NULL, NULL)) == NULL) {
+ rewind(fp);
+ return NULL;
+ }
+ while(((tok->lineno <= 2) && (tok->done == E_OK))) {
+ PyTokenizer_Get(tok, &p_start, &p_end);
+ }
+
+ rewind(fp);
+ return tok->encoding;
+}
#ifdef Py_DEBUG