118 files changed, 10370 insertions, 2719 deletions
diff --git a/innobase/btr/btr0btr.c b/innobase/btr/btr0btr.c
index 71be6d81d7c..ee27a171fa5 100644
--- a/innobase/btr/btr0btr.c
+++ b/innobase/btr/btr0btr.c
@@ -430,7 +430,8 @@ btr_page_free_for_ibuf(
 	flst_add_first(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
 		       page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, mtr);
 
-	ut_ad(flst_validate(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, mtr));
+	ut_ad(flst_validate(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
+									mtr));
 }
 
 /******************************************************************
@@ -603,8 +604,8 @@ btr_page_get_father_for_rec(
 "InnoDB: father ptr page no %lu, child page no %lu\n",
                     (UT_LIST_GET_FIRST(tree->tree_indexes))->table_name,
                     (UT_LIST_GET_FIRST(tree->tree_indexes))->name,
-                    btr_node_ptr_get_child_page_no(node_ptr),
-                    buf_frame_get_page_no(page));
+                    (unsigned long) btr_node_ptr_get_child_page_no(node_ptr),
+                    (unsigned long) buf_frame_get_page_no(page));
      		page_rec_print(page_rec_get_next(page_get_infimum_rec(page)));
      		page_rec_print(node_ptr);
 
@@ -885,7 +886,9 @@ btr_page_reorganize_low(
 "InnoDB: Error: page old data size %lu new data size %lu\n"
 "InnoDB: Error: page old max ins size %lu new max ins size %lu\n"
 "InnoDB: Make a detailed bug report and send it to mysql@lists.mysql.com\n",
-			data_size1, data_size2, max_ins_size1, max_ins_size2);
+			(unsigned long) data_size1, (unsigned long) data_size2,
+			(unsigned long) max_ins_size1,
+			(unsigned long) max_ins_size2);
 	}
 
 	buf_frame_free(new_page);
@@ -2225,7 +2228,8 @@ btr_print_recursive(
 	ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
 							MTR_MEMO_PAGE_X_FIX));
 	printf("NODE ON LEVEL %lu page number %lu\n",
-		btr_page_get_level(page, mtr), buf_frame_get_page_no(page));
+	       (ulong) btr_page_get_level(page, mtr),
+	       (ulong) buf_frame_get_page_no(page));
 	
 	page_print(page, width, width);
 	
@@ -2366,8 +2370,10 @@ btr_index_rec_validate(
 "InnoDB: Record in index %s in table %s, page %lu, at offset %lu\n"
 "InnoDB: has %lu fields, should have %lu\n",
 			index->name, index->table_name,
-			buf_frame_get_page_no(page), (ulint)(rec - page),
-			rec_get_n_fields(rec), n);
+			(unsigned long) buf_frame_get_page_no(page),
+			(unsigned long) (rec - page),
+			(unsigned long) rec_get_n_fields(rec),
+			(unsigned long) n);
 
 		if (!dump_on_error) {
 
@@ -2400,9 +2406,11 @@ btr_index_rec_validate(
 "InnoDB: Record in index %s in table %s, page %lu, at offset %lu\n"
 "InnoDB: field %lu len is %lu, should be %lu\n",
 				index->name, index->table_name,
-				buf_frame_get_page_no(page),
-				(ulint)(rec - page),
-				i, len, dtype_get_fixed_size(type));
+				(unsigned long) buf_frame_get_page_no(page),
+				(unsigned long) (rec - page),
+				(unsigned long) i,
+				(unsigned long) len,
+				(unsigned long) dtype_get_fixed_size(type));
 
 			if (!dump_on_error) {
 	
@@ -2517,8 +2525,8 @@ loop:
 	if (!page_validate(page, index)) {
 		fprintf(stderr,
 "InnoDB: Error in page %lu in index %s table %s, index tree level %lu\n",
-			buf_frame_get_page_no(page), index->name,
-			index->table_name, level);
+			(ulong) buf_frame_get_page_no(page), index->name,
+			index->table_name, (ulong) level);
 
 		ret = FALSE;
 	} else if (level == 0) {
@@ -2550,8 +2558,8 @@ loop:
 
  			fprintf(stderr,
 		"InnoDB: Error on pages %lu and %lu in index %s table %s\n",
-				buf_frame_get_page_no(page),
-				right_page_no,
+				(ulong) buf_frame_get_page_no(page),
+				(ulong) right_page_no,
 				index->name, index->table_name);
 
 			fprintf(stderr,
@@ -2591,7 +2599,7 @@ loop:
 								&mtr)) {
  			fprintf(stderr,
 			"InnoDB: Error on page %lu in index %s table %s\n",
-				buf_frame_get_page_no(page),
+				(unsigned long) buf_frame_get_page_no(page),
 				index->name, index->table_name);
 
 			fprintf(stderr,
@@ -2606,7 +2614,7 @@ loop:
 
 			fprintf(stderr,
 				"InnoDB: node ptr child page n:o %lu\n",
-				btr_node_ptr_get_child_page_no(node_ptr));
+				(unsigned long) btr_node_ptr_get_child_page_no(node_ptr));
 
 			rec_sprintf(err_buf, 900,
 			 	btr_page_get_father_for_rec(tree, page,
@@ -2634,7 +2642,7 @@ loop:
 
 	 			fprintf(stderr,
 			"InnoDB: Error on page %lu in index %s table %s\n",
-					buf_frame_get_page_no(page),
+					(ulong) buf_frame_get_page_no(page),
 					index->name, index->table_name);
 
 				buf_page_print(father_page);
@@ -2689,7 +2697,7 @@ loop:
 
 	 				fprintf(stderr,
 			"InnoDB: Error on page %lu in index %s table %s\n",
-					buf_frame_get_page_no(page),
+					(unsigned long) buf_frame_get_page_no(page),
 					index->name, index->table_name);
 
 					buf_page_print(father_page);
@@ -2709,7 +2717,7 @@ loop:
 
 	 				fprintf(stderr,
 			"InnoDB: Error on page %lu in index %s table %s\n",
-					buf_frame_get_page_no(page),
+					(unsigned long) buf_frame_get_page_no(page),
 					index->name, index->table_name);
 
 					buf_page_print(father_page);
@@ -2727,7 +2735,7 @@ loop:
 
 	 				fprintf(stderr,
 			"InnoDB: Error on page %lu in index %s table %s\n",
-					buf_frame_get_page_no(page),
+					(unsigned long) buf_frame_get_page_no(page),
 					index->name, index->table_name);
 
 					buf_page_print(father_page);
diff --git a/innobase/btr/btr0cur.c b/innobase/btr/btr0cur.c
index 6e1794c2ff7..af3a61041cb 100644
--- a/innobase/btr/btr0cur.c
+++ b/innobase/btr/btr0cur.c
@@ -291,6 +291,7 @@ btr_cur_search_to_nth_level(
 		&& latch_mode <= BTR_MODIFY_LEAF && info->last_hash_succ
 		&& !estimate
 		&& mode != PAGE_CUR_LE_OR_EXTENDS
+		&& srv_use_adaptive_hash_indexes
 	        && btr_search_guess_on_hash(index, info, tuple, mode,
 						latch_mode, cursor,
 						has_search_latch, mtr)) {
@@ -495,9 +496,11 @@ retry_page_get:
 		cursor->up_bytes = up_bytes;
 
 #ifdef BTR_CUR_ADAPT		
-		btr_search_info_update(index, cursor);
-#endif
+		if (srv_use_adaptive_hash_indexes) {
 
+			btr_search_info_update(index, cursor);
+		}
+#endif
 		ut_ad(cursor->up_match != ULINT_UNDEFINED
 						|| mode != PAGE_CUR_GE);
 		ut_ad(cursor->up_match != ULINT_UNDEFINED
@@ -871,8 +874,8 @@ btr_cur_optimistic_insert(
 	if (btr_cur_print_record_ops && thr) {
 		printf(
 	"Trx with id %lu %lu going to insert to table %s index %s\n",
-		ut_dulint_get_high(thr_get_trx(thr)->id),
-		ut_dulint_get_low(thr_get_trx(thr)->id),
+		(unsigned long) ut_dulint_get_high(thr_get_trx(thr)->id),
+		(unsigned long) ut_dulint_get_low(thr_get_trx(thr)->id),
 		index->table_name, index->name);
 		dtuple_print(entry);
 	}
@@ -954,7 +957,7 @@ calculate_sizes_again:
 	/* Now, try the insert */
 
 	*rec = page_cur_insert_rec_low(page_cursor, entry, data_size,
-								NULL, mtr);	
+								NULL, mtr);
 	if (!(*rec)) {
 		/* If the record did not fit, reorganize */
 		btr_page_reorganize(page, mtr);
@@ -975,7 +978,8 @@ calculate_sizes_again:
 			fprintf(stderr,
 	"InnoDB: Error: cannot insert tuple %s to index %s of table %s\n"
 	"InnoDB: max insert size %lu\n",
-			err_buf, index->name, index->table->name, max_size);
+			err_buf, index->name, index->table->name,
+			(unsigned long) max_size);
 
 			mem_free(err_buf);
 		}
@@ -1045,6 +1049,7 @@ btr_cur_pessimistic_insert(
 	ibool		dummy_inh;
 	ibool		success;
 	ulint		n_extents	= 0;
+	ulint		n_reserved;
 	
 	ut_ad(dtuple_check_typed(entry));
 
@@ -1064,7 +1069,7 @@ btr_cur_pessimistic_insert(
 	cursor->flag = BTR_CUR_BINARY;
 
 	err = btr_cur_optimistic_insert(flags, cursor, entry, rec, big_rec,
-								thr, mtr);	
+								thr, mtr);
 	if (err != DB_FAIL) {
 
 		return(err);
@@ -1087,7 +1092,7 @@ btr_cur_pessimistic_insert(
 
 		n_extents = cursor->tree_height / 16 + 3;
 
-		success = fsp_reserve_free_extents(index->space,
+		success = fsp_reserve_free_extents(&n_reserved, index->space,
 						n_extents, FSP_NORMAL, mtr);
 		if (!success) {
 			err = DB_OUT_OF_FILE_SPACE;
@@ -1109,7 +1114,7 @@ btr_cur_pessimistic_insert(
 		
 			if (n_extents > 0) {
 			        fil_space_release_free_extents(index->space,
-								n_extents);
+								n_reserved);
 			}
 			return(DB_TOO_BIG_RECORD);
 		}
@@ -1137,7 +1142,7 @@ btr_cur_pessimistic_insert(
 	err = DB_SUCCESS;
 
 	if (n_extents > 0) {
-		fil_space_release_free_extents(index->space, n_extents);
+		fil_space_release_free_extents(index->space, n_reserved);
 	}
 
 	*big_rec = big_rec_vec;
@@ -1318,7 +1323,8 @@ btr_cur_parse_update_in_place(
 }
 
 /*****************************************************************
-Updates a record when the update causes no size changes in its fields. */
+Updates a record when the update causes no size changes in its fields.
+We assume here that the ordering fields of the record do not change. */
 
 ulint
 btr_cur_update_in_place(
@@ -1349,8 +1355,8 @@ btr_cur_update_in_place(
 	if (btr_cur_print_record_ops && thr) {
 		printf(
 	"Trx with id %lu %lu going to update table %s index %s\n",
-		ut_dulint_get_high(thr_get_trx(thr)->id),
-		ut_dulint_get_low(thr_get_trx(thr)->id),
+		(unsigned long) ut_dulint_get_high(thr_get_trx(thr)->id),
+		(unsigned long) ut_dulint_get_low(thr_get_trx(thr)->id),
 		index->table_name, index->name);
 		rec_print(rec);
 	}
@@ -1453,8 +1459,8 @@ btr_cur_optimistic_update(
 	if (btr_cur_print_record_ops && thr) {
 		printf(
 	"Trx with id %lu %lu going to update table %s index %s\n",
-		ut_dulint_get_high(thr_get_trx(thr)->id),
-		ut_dulint_get_low(thr_get_trx(thr)->id),
+		(unsigned long) ut_dulint_get_high(thr_get_trx(thr)->id),
+		(unsigned long) ut_dulint_get_low(thr_get_trx(thr)->id),
 		index->table_name, index->name);
 		rec_print(rec);
 	}
@@ -1666,6 +1672,7 @@ btr_cur_pessimistic_update(
 	ibool		was_first;
 	ibool		success;
 	ulint		n_extents	= 0;
+	ulint		n_reserved;
 	ulint*		ext_vect;
 	ulint		n_ext_vect;
 	ulint		reserve_flag;
@@ -1711,7 +1718,8 @@ btr_cur_pessimistic_update(
 			reserve_flag = FSP_NORMAL;
 		}
 		
-		success = fsp_reserve_free_extents(cursor->index->space,
+		success = fsp_reserve_free_extents(&n_reserved,
+						cursor->index->space,
 						n_extents, reserve_flag, mtr);
 		if (!success) {
 			err = DB_OUT_OF_FILE_SPACE;
@@ -1860,7 +1868,7 @@ return_after_reservations:
 
 	if (n_extents > 0) {
 		fil_space_release_free_extents(cursor->index->space,
-							n_extents);
+							n_reserved);
 	}
 
 	*big_rec = big_rec_vec;
@@ -2000,8 +2008,8 @@ btr_cur_del_mark_set_clust_rec(
 	if (btr_cur_print_record_ops && thr) {
 		printf(
 	"Trx with id %lu %lu going to del mark table %s index %s\n",
-		ut_dulint_get_high(thr_get_trx(thr)->id),
-		ut_dulint_get_low(thr_get_trx(thr)->id),
+		(unsigned long) ut_dulint_get_high(thr_get_trx(thr)->id),
+		(unsigned long) ut_dulint_get_low(thr_get_trx(thr)->id),
 		index->table_name, index->name);
 		rec_print(rec);
 	}
@@ -2140,8 +2148,8 @@ btr_cur_del_mark_set_sec_rec(
 	if (btr_cur_print_record_ops && thr) {
 		printf(
 	"Trx with id %lu %lu going to del mark table %s index %s\n",
-		ut_dulint_get_high(thr_get_trx(thr)->id),
-		ut_dulint_get_low(thr_get_trx(thr)->id),
+		(unsigned long) ut_dulint_get_high(thr_get_trx(thr)->id),
+		(unsigned long) ut_dulint_get_low(thr_get_trx(thr)->id),
 		cursor->index->table_name, cursor->index->name);
 		rec_print(rec);
 	}
@@ -2331,6 +2339,7 @@ btr_cur_pessimistic_delete(
 	rec_t*		rec;
 	dtuple_t*	node_ptr;
 	ulint		n_extents	= 0;
+	ulint		n_reserved;
 	ibool		success;
 	ibool		ret		= FALSE;
 	mem_heap_t*	heap;
@@ -2349,7 +2358,8 @@ btr_cur_pessimistic_delete(
 
 		n_extents = cursor->tree_height / 32 + 1;
 
-		success = fsp_reserve_free_extents(cursor->index->space,
+		success = fsp_reserve_free_extents(&n_reserved,
+						cursor->index->space,
 						n_extents, FSP_CLEANING, mtr);
 		if (!success) {
 			*err = DB_OUT_OF_FILE_SPACE;
@@ -2428,7 +2438,8 @@ return_after_reservations:
 	}
 
 	if (n_extents > 0) {
-		fil_space_release_free_extents(cursor->index->space, n_extents);
+		fil_space_release_free_extents(cursor->index->space,
+								n_reserved);
 	}
 
 	return(ret);
@@ -3101,7 +3112,7 @@ btr_store_big_rec_extern_fields(
 	ut_ad(mtr_memo_contains(local_mtr, dict_tree_get_lock(index->tree),
 							MTR_MEMO_X_LOCK));
 	ut_ad(mtr_memo_contains(local_mtr, buf_block_align(rec),
-							MTR_MEMO_PAGE_X_FIX));	
+							MTR_MEMO_PAGE_X_FIX));
 	ut_a(index->type & DICT_CLUSTERED);
 							
 	space_id = buf_frame_get_space_id(rec);
@@ -3269,7 +3280,7 @@ btr_free_externally_stored_field(
 	ut_ad(mtr_memo_contains(local_mtr, dict_tree_get_lock(index->tree),
 							MTR_MEMO_X_LOCK));
 	ut_ad(mtr_memo_contains(local_mtr, buf_block_align(data),
-							MTR_MEMO_PAGE_X_FIX));	
+							MTR_MEMO_PAGE_X_FIX));
 	ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
 	local_len -= BTR_EXTERN_FIELD_REF_SIZE;
 	
diff --git a/innobase/btr/btr0pcur.c b/innobase/btr/btr0pcur.c
index 4725551d4d7..cf8a612ef28 100644
--- a/innobase/btr/btr0pcur.c
+++ b/innobase/btr/btr0pcur.c
@@ -95,7 +95,9 @@ btr_pcur_store_position(
 	ut_a(cursor->latch_mode != BTR_NO_LATCHES);
 
 	if (page_get_n_recs(page) == 0) {
-		/* It must be an empty index tree */
+		/* It must be an empty index tree; NOTE that in this case
+		we do not store the modify_clock, but always do a search
+		if we restore the cursor position */
 
 		ut_a(btr_page_get_next(page, mtr) == FIL_NULL
 		     && btr_page_get_prev(page, mtr) == FIL_NULL);
@@ -128,12 +130,13 @@ btr_pcur_store_position(
 	} else {
 		cursor->rel_pos = BTR_PCUR_ON;
 	}
-	
+
 	cursor->old_stored = BTR_PCUR_OLD_STORED;
 	cursor->old_rec = dict_tree_copy_rec_order_prefix(tree, rec,
 						&(cursor->old_rec_buf),
 						&(cursor->buf_size));
 									
+	cursor->block_when_stored = buf_block_align(page);	
 	cursor->modify_clock = buf_frame_get_modify_clock(page);
 }
 
@@ -205,6 +208,9 @@ btr_pcur_restore_position(
 	if (cursor->rel_pos == BTR_PCUR_AFTER_LAST_IN_TREE
 	    || cursor->rel_pos == BTR_PCUR_BEFORE_FIRST_IN_TREE) {
 
+	    	/* In these cases we do not try an optimistic restoration,
+	    	but always do a search */
+
 	    	if (cursor->rel_pos == BTR_PCUR_BEFORE_FIRST_IN_TREE) {
 	    		from_left = TRUE;
 	    	} else {
@@ -214,6 +220,10 @@ btr_pcur_restore_position(
 		btr_cur_open_at_index_side(from_left,
 			btr_pcur_get_btr_cur(cursor)->index, latch_mode,
 					btr_pcur_get_btr_cur(cursor), mtr);
+
+		cursor->block_when_stored =
+				buf_block_align(btr_pcur_get_page(cursor));
+
 		return(FALSE);
 	}
 	
@@ -224,8 +234,9 @@ btr_pcur_restore_position(
 	if (latch_mode == BTR_SEARCH_LEAF || latch_mode == BTR_MODIFY_LEAF) {
 		/* Try optimistic restoration */
 	    
-		if (buf_page_optimistic_get(latch_mode, page,
-						cursor->modify_clock, mtr)) {
+		if (buf_page_optimistic_get(latch_mode,
+					    cursor->block_when_stored, page,
+					    cursor->modify_clock, mtr)) {
 			cursor->pos_state = BTR_PCUR_IS_POSITIONED;
 #ifdef UNIV_SYNC_DEBUG
 			buf_page_dbg_add_level(page, SYNC_TREE_NODE);
@@ -270,8 +281,6 @@ btr_pcur_restore_position(
 
 	btr_pcur_open_with_no_init(btr_pcur_get_btr_cur(cursor)->index, tuple,
 					mode, latch_mode, cursor, 0, mtr);
-
-	cursor->old_stored = BTR_PCUR_OLD_STORED;
 	
 	/* Restore the old search mode */
 	cursor->search_mode = old_mode;
@@ -280,11 +289,18 @@ btr_pcur_restore_position(
 	    && btr_pcur_is_on_user_rec(cursor, mtr)
 	    && 0 == cmp_dtuple_rec(tuple, btr_pcur_get_rec(cursor))) {
 
-	        /* We have to store the NEW value for the modify clock, since
-	        the cursor can now be on a different page! */
+		/* We have to store the NEW value for the modify clock, since
+		the cursor can now be on a different page! But we can retain
+		the value of old_rec */
+
+		cursor->modify_clock =
+			buf_frame_get_modify_clock(btr_pcur_get_page(cursor));
+
+		cursor->block_when_stored =
+			buf_block_align(btr_pcur_get_page(cursor));
+
+		cursor->old_stored = BTR_PCUR_OLD_STORED;
 
-	        cursor->modify_clock = buf_frame_get_modify_clock(
-				    buf_frame_align(btr_pcur_get_rec(cursor)));
 		mem_heap_free(heap);
 
 		return(TRUE);
@@ -292,9 +308,10 @@ btr_pcur_restore_position(
 
 	mem_heap_free(heap);
 
-	/* We have to store position information, modify clock value, etc.
-        because the cursor may now be on a different page */
-
+	/* We have to store new position information, modify_clock etc.,
+	to the cursor because it can now be on a different page, the record
+	under it may have been removed, etc. */
+	
 	btr_pcur_store_position(cursor, mtr);
 
 	return(FALSE);
diff --git a/innobase/btr/btr0sea.c b/innobase/btr/btr0sea.c
index 9421ca48718..238f118e260 100644
--- a/innobase/btr/btr0sea.c
+++ b/innobase/btr/btr0sea.c
@@ -790,8 +790,8 @@ btr_search_guess_on_hash(
 		goto failure;
 	}
 
-	ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
-	ut_ad(page_rec_is_user_rec(rec));	
+	ut_a(block->state == BUF_BLOCK_FILE_PAGE);
+	ut_a(page_rec_is_user_rec(rec));	
 
 	btr_cur_position(index, rec, cursor);
 
@@ -1040,12 +1040,14 @@ btr_search_drop_page_hash_when_freed(
 	
 	mtr_start(&mtr);
 
-	/* We assume that if the caller has a latch on the page,
-	then the caller has already dropped the hash index for the page,
-	and we never get here. Therefore we can acquire the s-latch to
-	the page without fearing a deadlock. */
+	/* We assume that if the caller has a latch on the page, then the
+	caller has already dropped the hash index for the page, and we never
+	get here. Therefore we can acquire the s-latch to the page without
+	having to fear a deadlock. */
 	
-	page = buf_page_get(space, page_no, RW_S_LATCH, &mtr);
+	page = buf_page_get_gen(space, page_no, RW_S_LATCH, NULL,
+				BUF_GET_IF_IN_POOL, IB__FILE__, __LINE__,
+				&mtr);
 
 #ifdef UNIV_SYNC_DEBUG
 	buf_page_dbg_add_level(page, SYNC_TREE_NODE_FROM_HASH);
@@ -1563,11 +1565,12 @@ btr_search_validate(void)
 				fprintf(stderr,
 "  InnoDB: Error in an adaptive hash index pointer to page %lu\n"
 "ptr mem address %lu index id %lu %lu, node fold %lu, rec fold %lu\n",
-				buf_frame_get_page_no(page),
-				(ulint)(node->data),
-			ut_dulint_get_high(btr_page_get_index_id(page)),
-			ut_dulint_get_low(btr_page_get_index_id(page)),
-			node->fold, rec_fold((rec_t*)(node->data),
+				(ulong) buf_frame_get_page_no(page),
+				(ulong)(node->data),
+			(ulong) ut_dulint_get_high(btr_page_get_index_id(page)),
+			(ulong) ut_dulint_get_low(btr_page_get_index_id(page)),
+			(ulong) node->fold,
+			(ulong) rec_fold((rec_t*)(node->data),
 					block->curr_n_fields,
 					block->curr_n_bytes,
 					btr_page_get_index_id(page)));
@@ -1581,8 +1584,9 @@ btr_search_validate(void)
 				fprintf(stderr,
 "Page mem address %lu, is hashed %lu, n fields %lu, n bytes %lu\n"
 "side %lu\n",
-			(ulint)page, block->is_hashed, block->curr_n_fields,
-			block->curr_n_bytes, block->curr_side);
+			(ulong) page, (ulong) block->is_hashed,
+			(ulong) block->curr_n_fields,
+			(ulong) block->curr_n_bytes, (ulong) block->curr_side);
 
 				if (n_page_dumps < 20) {	
 					buf_page_print(page);
diff --git a/innobase/buf/buf0buf.c b/innobase/buf/buf0buf.c
index e2661725912..844880238fa 100644
--- a/innobase/buf/buf0buf.c
+++ b/innobase/buf/buf0buf.c
@@ -197,7 +197,29 @@ If a new page is referenced in the buf_pool, and several pages
 of its random access area (for instance, 32 consecutive pages
 in a tablespace) have recently been referenced, we may predict
 that the whole area may be needed in the near future, and issue
-the read requests for the whole area. */
+the read requests for the whole area.
+
+		AWE implementation
+		------------------
+
+By a 'block' we mean the buffer header of type buf_block_t. By a 'page'
+we mean the physical 16 kB memory area allocated from RAM for that block.
+By a 'frame' we mean a 16 kB area in the virtual address space of the
+process, in the frame_mem of buf_pool.
+
+We can map pages to the frames of the buffer pool.
+
+1) A buffer block allocated to use as a non-data page, e.g., to the lock
+table, is always mapped to a frame.
+2) A bufferfixed or io-fixed data page is always mapped to a frame.
+3) When we need to map a block to frame, we look from the list
+awe_LRU_free_mapped and try to unmap its last block, but note that
+bufferfixed or io-fixed pages cannot be unmapped.
+4) For every frame in the buffer pool there is always a block whose page is
+mapped to it. When we create the buffer pool, we map the first elements
+in the free list to the frames.
+5) When we have AWE enabled, we disable adaptive hash indexes.
+*/
 
 buf_pool_t*	buf_pool = NULL; /* The buffer buf_pool of the database */
 
@@ -221,9 +243,10 @@ buf_calc_page_new_checksum(
 {
   	ulint checksum;
 
-	/* Since the fields FIL_PAGE_FILE_FLUSH_LSN and ..._ARCH_LOG_NO
-	are written outside the buffer pool to the first pages of data
-	files, we have to skip them in the page checksum calculation.
+        /* Since the field FIL_PAGE_FILE_FLUSH_LSN, and in versions <= 4.1.x
+        ..._ARCH_LOG_NO, are written outside the buffer pool to the first
+        pages of data files, we have to skip them in the page checksum
+        calculation.
 	We must also skip the field FIL_PAGE_SPACE_OR_CHKSUM where the
 	checksum is stored, and also the last 8 bytes of page because
 	there we store the old formula checksum. */
@@ -233,7 +256,7 @@ buf_calc_page_new_checksum(
   		   + ut_fold_binary(page + FIL_PAGE_DATA, 
 				           UNIV_PAGE_SIZE - FIL_PAGE_DATA
 				           - FIL_PAGE_END_LSN_OLD_CHKSUM);
-  	checksum = checksum & 0xFFFFFFFF;
+  	checksum = checksum & 0xFFFFFFFFUL;
 
   	return(checksum);
 }
@@ -256,7 +279,7 @@ buf_calc_page_old_checksum(
   	
   	checksum = ut_fold_binary(page, FIL_PAGE_FILE_FLUSH_LSN);
 
-  	checksum = checksum & 0xFFFFFFFF;
+  	checksum = checksum & 0xFFFFFFFFUL;
 
   	return(checksum);
 }
@@ -274,8 +297,9 @@ buf_page_is_corrupted(
 	ulint	old_checksum;
 	ulint	checksum_field;
 	ulint	old_checksum_field;
+#ifndef UNIV_HOTBACKUP
 	dulint	current_lsn;
-
+#endif
 	if (mach_read_from_4(read_buf + FIL_PAGE_LSN + 4)
 	     != mach_read_from_4(read_buf + UNIV_PAGE_SIZE
 				- FIL_PAGE_END_LSN_OLD_CHKSUM + 4)) {
@@ -297,13 +321,13 @@ buf_page_is_corrupted(
 "  InnoDB: Error: page %lu log sequence number %lu %lu\n"
 "InnoDB: is in the future! Current system log sequence number %lu %lu.\n"
 "InnoDB: Your database may be corrupt.\n",
-		        mach_read_from_4(read_buf + FIL_PAGE_OFFSET),
-			ut_dulint_get_high(
+		        (ulong) mach_read_from_4(read_buf + FIL_PAGE_OFFSET),
+			(ulong) ut_dulint_get_high(
 				mach_read_from_8(read_buf + FIL_PAGE_LSN)),
-			ut_dulint_get_low(
+			(ulong) ut_dulint_get_low(
 				mach_read_from_8(read_buf + FIL_PAGE_LSN)),
-			ut_dulint_get_high(current_lsn),
-			ut_dulint_get_low(current_lsn));
+			(ulong) ut_dulint_get_high(current_lsn),
+			(ulong) ut_dulint_get_low(current_lsn));
 		}
 	}
 #endif
@@ -356,8 +380,8 @@ buf_page_print(
 
 	ut_print_timestamp(stderr);
 	fprintf(stderr,
-	"  InnoDB: Page dump in ascii and hex (%lu bytes):\n%s",
-					(ulint)UNIV_PAGE_SIZE, buf);
+"  InnoDB: Page dump in ascii and hex (%lu bytes):\n%s",
+					(ulong) UNIV_PAGE_SIZE, buf);
 	fprintf(stderr, "InnoDB: End of page dump\n");
 
 	mem_free(buf);
@@ -369,16 +393,21 @@ buf_page_print(
 	fprintf(stderr, 
 "  InnoDB: Page checksum %lu, prior-to-4.0.14-form checksum %lu\n"
 "InnoDB: stored checksum %lu, prior-to-4.0.14-form stored checksum %lu\n",
-			checksum, old_checksum,
-			mach_read_from_4(read_buf + FIL_PAGE_SPACE_OR_CHKSUM),
-			mach_read_from_4(read_buf + UNIV_PAGE_SIZE
+			(ulong) checksum, (ulong) old_checksum,
+			(ulong) mach_read_from_4(read_buf + FIL_PAGE_SPACE_OR_CHKSUM),
+			(ulong) mach_read_from_4(read_buf + UNIV_PAGE_SIZE
 					- FIL_PAGE_END_LSN_OLD_CHKSUM));
 	fprintf(stderr,
-	"InnoDB: Page lsn %lu %lu, low 4 bytes of lsn at page end %lu\n",
-		mach_read_from_4(read_buf + FIL_PAGE_LSN),
-		mach_read_from_4(read_buf + FIL_PAGE_LSN + 4),
-		mach_read_from_4(read_buf + UNIV_PAGE_SIZE
-					- FIL_PAGE_END_LSN_OLD_CHKSUM + 4));
+"InnoDB: Page lsn %lu %lu, low 4 bytes of lsn at page end %lu\n"
+"InnoDB: Page number (if stored to page already) %lu,\n"
+"InnoDB: space id (if created with >= MySQL-4.1.1 and stored already) %lu\n",
+		(ulong) mach_read_from_4(read_buf + FIL_PAGE_LSN),
+		(ulong) mach_read_from_4(read_buf + FIL_PAGE_LSN + 4),
+		(ulong) mach_read_from_4(read_buf + UNIV_PAGE_SIZE
+					- FIL_PAGE_END_LSN_OLD_CHKSUM + 4),
+		(ulong) mach_read_from_4(read_buf + FIL_PAGE_OFFSET),
+		(ulong) mach_read_from_4(read_buf + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID));
+
 	if (mach_read_from_2(read_buf + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE)
 	    == TRX_UNDO_INSERT) {
 	    	fprintf(stderr,
@@ -392,12 +421,9 @@ buf_page_print(
 
 	if (fil_page_get_type(read_buf) == FIL_PAGE_INDEX) {
 	    	fprintf(stderr,
-			"InnoDB: Page may be an index page ");
-
-		fprintf(stderr,
-			"where index id is %lu %lu\n",
-			ut_dulint_get_high(btr_page_get_index_id(read_buf)),
-			ut_dulint_get_low(btr_page_get_index_id(read_buf)));
+"InnoDB: Page may be an index page where index id is %lu %lu\n",
+			(ulong) ut_dulint_get_high(btr_page_get_index_id(read_buf)),
+			(ulong) ut_dulint_get_low(btr_page_get_index_id(read_buf)));
 
 		/* If the code is in ibbackup, dict_sys may be uninitialized,
 		i.e., NULL */
@@ -413,7 +439,6 @@ buf_page_print(
 						index->name);
 			}
 		}
-	  
 	} else if (fil_page_get_type(read_buf) == FIL_PAGE_INODE) {
 		fprintf(stderr, "InnoDB: Page may be an 'inode' page\n");
 	} else if (fil_page_get_type(read_buf) == FIL_PAGE_IBUF_FREE_LIST) {
@@ -429,23 +454,29 @@ void
 buf_block_init(
 /*===========*/
 	buf_block_t*	block,	/* in: pointer to control block */
-	byte*		frame)	/* in: pointer to buffer frame */
+	byte*		frame)	/* in: pointer to buffer frame, or NULL if in
+				the case of AWE there is no frame */
 {
 	block->state = BUF_BLOCK_NOT_USED;
 	
 	block->frame = frame;
 
+	block->awe_info = NULL;
+
 	block->modify_clock = ut_dulint_zero;
 	
 	block->file_page_was_freed = FALSE;
 
 	block->check_index_page_at_flush = FALSE;
 
+	block->in_free_list = FALSE;
+	block->in_LRU_list = FALSE;
+
+	block->n_pointers = 0;
+
 	rw_lock_create(&(block->lock));
 	ut_ad(rw_lock_validate(&(block->lock)));
 
-	rw_lock_create(&(block->read_lock));
-	rw_lock_set_level(&(block->read_lock), SYNC_NO_ORDER_CHECK);
 #ifdef UNIV_SYNC_DEBUG
 	rw_lock_create(&(block->debug_latch));
 	rw_lock_set_level(&(block->debug_latch), SYNC_NO_ORDER_CHECK);
@@ -453,25 +484,40 @@ buf_block_init(
 }
 
 /************************************************************************
-Creates a buffer buf_pool object. */
-static
+Creates the buffer pool. */
+
 buf_pool_t*
-buf_pool_create(
-/*============*/
+buf_pool_init(
+/*==========*/
 				/* out, own: buf_pool object, NULL if not
-				enough memory */
+				enough memory or error */
 	ulint	max_size,	/* in: maximum size of the buf_pool in
 				blocks */
-	ulint	curr_size)	/* in: current size to use, must be <=
+	ulint	curr_size,	/* in: current size to use, must be <=
 				max_size, currently must be equal to
 				max_size */
+	ulint	n_frames)	/* in: number of frames; if AWE is used,
+				this is the size of the address space window
+				where physical memory pages are mapped; if
+				AWE is not used then this must be the same
+				as max_size */
 {
 	byte*		frame;
 	ulint		i;
 	buf_block_t*	block;
 	
 	ut_a(max_size == curr_size);
+	ut_a(srv_use_awe || n_frames == max_size);
 	
+	if (n_frames > curr_size) {
+	        fprintf(stderr,
+"InnoDB: AWE: Error: you must specify in my.cnf .._awe_mem_mb larger\n"
+"InnoDB: than .._buffer_pool_size. Now the former is %lu pages,\n"
+"InnoDB: the latter %lu pages.\n", (ulong) curr_size, (ulong) n_frames);
+
+		return(NULL);
+	}
+
 	buf_pool = mem_alloc(sizeof(buf_pool_t));
 
 	/* 1. Initialize general fields
@@ -480,8 +526,38 @@ buf_pool_create(
 	mutex_set_level(&(buf_pool->mutex), SYNC_BUF_POOL);
 
 	mutex_enter(&(buf_pool->mutex));
-	
-	buf_pool->frame_mem = ut_malloc(UNIV_PAGE_SIZE * (max_size + 1));
+
+	if (srv_use_awe) {
+		/*----------------------------------------*/
+		/* Allocate the virtual address space window, i.e., the
+		buffer pool frames */
+
+		buf_pool->frame_mem = os_awe_allocate_virtual_mem_window(
+					UNIV_PAGE_SIZE * (n_frames + 1));
+					
+		/* Allocate the physical memory for AWE and the AWE info array
+		for buf_pool */
+
+		if ((curr_size % ((1024 * 1024) / UNIV_PAGE_SIZE)) != 0) {
+
+		        fprintf(stderr,
+"InnoDB: AWE: Error: physical memory must be allocated in full megabytes.\n"
+"InnoDB: Trying to allocate %lu database pages.\n", 
+			  (ulong) curr_size);
+
+		        return(NULL);
+		}
+
+		if (!os_awe_allocate_physical_mem(&(buf_pool->awe_info),
+			curr_size / ((1024 * 1024) / UNIV_PAGE_SIZE))) {
+
+			return(NULL);
+		}
+		/*----------------------------------------*/
+	} else {
+		buf_pool->frame_mem = ut_malloc(
+					UNIV_PAGE_SIZE * (n_frames + 1));
+	}
 
 	if (buf_pool->frame_mem == NULL) {
 
@@ -498,21 +574,60 @@ buf_pool_create(
 	buf_pool->max_size = max_size;
 	buf_pool->curr_size = curr_size;
 
+	buf_pool->n_frames = n_frames;
+
 	/* Align pointer to the first frame */
 
 	frame = ut_align(buf_pool->frame_mem, UNIV_PAGE_SIZE);
+
 	buf_pool->frame_zero = frame;
+	buf_pool->high_end = frame + UNIV_PAGE_SIZE * n_frames;
+
+	if (srv_use_awe) {
+		/*----------------------------------------*/
+		/* Map an initial part of the allocated physical memory to
+		the window */
+
+		os_awe_map_physical_mem_to_window(buf_pool->frame_zero,
+				n_frames *
+				(UNIV_PAGE_SIZE / OS_AWE_X86_PAGE_SIZE),
+					buf_pool->awe_info);
+		/*----------------------------------------*/
+	}
 
-	buf_pool->high_end = frame + UNIV_PAGE_SIZE * curr_size;
+	buf_pool->blocks_of_frames = ut_malloc(sizeof(void*) * n_frames);
+	
+	if (buf_pool->blocks_of_frames == NULL) {
+
+		return(NULL);
+	}
+
+	/* Init block structs and assign frames for them; in the case of
+	AWE there are less frames than blocks. Then we assign the frames
+	to the first blocks (we already mapped the memory above). We also
+	init the awe_info for every block. */
 
-	/* Init block structs and assign frames for them */
 	for (i = 0; i < max_size; i++) {
 
 		block = buf_pool_get_nth_block(buf_pool, i);
+
+		if (i < n_frames) {
+			frame = buf_pool->frame_zero + i * UNIV_PAGE_SIZE;
+			*(buf_pool->blocks_of_frames + i) = block;
+		} else {
+			frame = NULL;
+		}
+		
 		buf_block_init(block, frame);
-		frame = frame + UNIV_PAGE_SIZE;
+
+		if (srv_use_awe) {
+			/*----------------------------------------*/
+			block->awe_info = buf_pool->awe_info
+				+ i * (UNIV_PAGE_SIZE / OS_AWE_X86_PAGE_SIZE);
+			/*----------------------------------------*/
+		}
 	}
-	
+
 	buf_pool->page_hash = hash_create(2 * max_size);
 
 	buf_pool->n_pend_reads = 0;
@@ -522,12 +637,14 @@ buf_pool_create(
 	buf_pool->n_pages_read = 0;
 	buf_pool->n_pages_written = 0;
 	buf_pool->n_pages_created = 0;
-
+	buf_pool->n_pages_awe_remapped = 0;
+	
 	buf_pool->n_page_gets = 0;
 	buf_pool->n_page_gets_old = 0;
 	buf_pool->n_pages_read_old = 0;
 	buf_pool->n_pages_written_old = 0;
 	buf_pool->n_pages_created_old = 0;
+	buf_pool->n_pages_awe_remapped_old = 0;
 	
 	/* 2. Initialize flushing fields
 	   ---------------------------- */
@@ -550,37 +667,122 @@ buf_pool_create(
 
 	buf_pool->LRU_old = NULL;
 
+	UT_LIST_INIT(buf_pool->awe_LRU_free_mapped);
+
 	/* Add control blocks to the free list */
 	UT_LIST_INIT(buf_pool->free);
+
 	for (i = 0; i < curr_size; i++) {
 
 		block = buf_pool_get_nth_block(buf_pool, i);
 
-		UT_LIST_ADD_FIRST(free, buf_pool->free, block);
+		if (block->frame) {
+			/* Wipe contents of frame to eliminate a Purify
+			warning */
+
+#ifdef HAVE_purify
+			memset(block->frame, '\0', UNIV_PAGE_SIZE);
+#endif
+			if (srv_use_awe) {
+				/* Add to the list of blocks mapped to
+				frames */
+				
+				UT_LIST_ADD_LAST(awe_LRU_free_mapped,
+					buf_pool->awe_LRU_free_mapped, block);
+			}
+		}
+
+		UT_LIST_ADD_LAST(free, buf_pool->free, block);
+		block->in_free_list = TRUE;
 	}
 
 	mutex_exit(&(buf_pool->mutex));
 
-	btr_search_sys_create(curr_size * UNIV_PAGE_SIZE / sizeof(void*) / 64);
+	if (srv_use_adaptive_hash_indexes) {
+	  	btr_search_sys_create(
+			  curr_size * UNIV_PAGE_SIZE / sizeof(void*) / 64);
+	} else {
+	        /* Create only a small dummy system */
+	        btr_search_sys_create(1000);
+	}
 
 	return(buf_pool);
 }	
 
 /************************************************************************
-Initializes the buffer buf_pool of the database. */
+Maps the page of block to a frame, if not mapped yet. Unmaps some page
+from the end of the awe_LRU_free_mapped. */
 
 void
-buf_pool_init(
-/*==========*/
-	ulint	max_size,	/* in: maximum size of the buf_pool in blocks */
-	ulint	curr_size)	/* in: current size to use, must be <=
-				max_size */
+buf_awe_map_page_to_frame(
+/*======================*/
+	buf_block_t*	block,		/* in: block whose page should be
+					mapped to a frame */
+	ibool		add_to_mapped_list) /* in: TRUE if we in the case
+					we need to map the page should also
+					add the block to the
+					awe_LRU_free_mapped list */
 {
-	ut_a(buf_pool == NULL);
+	buf_block_t*	bck;
 
-	buf_pool_create(max_size, curr_size);
+	ut_ad(mutex_own(&(buf_pool->mutex)));
+	ut_ad(block);
+
+	if (block->frame) {
+
+		return;
+	}
 
-	ut_ad(buf_validate());
+	/* Scan awe_LRU_free_mapped from the end and try to find a block
+	which is not bufferfixed or io-fixed */
+
+	bck = UT_LIST_GET_LAST(buf_pool->awe_LRU_free_mapped);
+
+	while (bck) {	
+		if (bck->state == BUF_BLOCK_FILE_PAGE
+	    	    && (bck->buf_fix_count != 0 || bck->io_fix != 0)) {
+
+			/* We have to skip this */
+			bck = UT_LIST_GET_PREV(awe_LRU_free_mapped, bck);
+		} else {
+			/* We can map block to the frame of bck */
+
+			os_awe_map_physical_mem_to_window(
+				bck->frame,
+				UNIV_PAGE_SIZE / OS_AWE_X86_PAGE_SIZE,
+				block->awe_info);
+
+			block->frame = bck->frame;
+
+			*(buf_pool->blocks_of_frames
+				+ (((ulint)(block->frame
+						- buf_pool->frame_zero))
+						>> UNIV_PAGE_SIZE_SHIFT))
+				= block;
+			
+			bck->frame = NULL;
+			UT_LIST_REMOVE(awe_LRU_free_mapped,
+					buf_pool->awe_LRU_free_mapped,
+					bck);
+
+			if (add_to_mapped_list) {
+				UT_LIST_ADD_FIRST(awe_LRU_free_mapped,
+					buf_pool->awe_LRU_free_mapped,
+					block);
+			}
+
+			buf_pool->n_pages_awe_remapped++;
+			
+			return;
+		}
+	}
+
+	fprintf(stderr,
+"InnoDB: AWE: Fatal error: cannot find a page to unmap\n"
+"InnoDB: awe_LRU_free_mapped list length %lu\n",
+		(ulong) UT_LIST_GET_LEN(buf_pool->awe_LRU_free_mapped));
+
+	ut_a(0);
 }
 
 /************************************************************************
@@ -589,7 +791,9 @@ UNIV_INLINE
 buf_block_t*
 buf_block_alloc(void)
 /*=================*/
-				/* out, own: the allocated block */
+				/* out, own: the allocated block; also if AWE
+				is used it is guaranteed that the page is
+				mapped to a frame */
 {
 	buf_block_t*	block;
 
@@ -633,7 +837,7 @@ buf_page_make_young(
 
 	block = buf_block_align(frame);
 
-	ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
+	ut_a(block->state == BUF_BLOCK_FILE_PAGE);
 
 	buf_LRU_make_block_young(block);
 
@@ -648,7 +852,7 @@ buf_block_free(
 /*===========*/
 	buf_block_t*	block)	/* in, own: block to be freed */
 {
-	ut_ad(block->state != BUF_BLOCK_FILE_PAGE);
+	ut_a(block->state != BUF_BLOCK_FILE_PAGE);
 
 	mutex_enter(&(buf_pool->mutex));
 
@@ -912,6 +1116,8 @@ loop:
 		goto loop;
 	}
 
+	ut_a(block->state == BUF_BLOCK_FILE_PAGE);
+
 	must_read = FALSE;
 	
 	if (block->io_fix == BUF_IO_READ) {
@@ -927,6 +1133,19 @@ loop:
 		}
 	}		
 
+	/* If AWE is enabled and the page is not mapped to a frame, then
+	map it */
+
+	if (block->frame == NULL) {
+		ut_a(srv_use_awe);
+
+		/* We set second parameter TRUE because the block is in the
+		LRU list and we must put it to awe_LRU_free_mapped list once
+		mapped to a frame */
+		
+		buf_awe_map_page_to_frame(block, TRUE);
+	}
+	
 #ifdef UNIV_SYNC_DEBUG
 	buf_block_buf_fix_inc_debug(block, file, line);
 #else
@@ -981,8 +1200,26 @@ loop:
 	} else if (rw_latch == RW_NO_LATCH) {
 
 		if (must_read) {
-			rw_lock_x_lock(&(block->read_lock));
-			rw_lock_x_unlock(&(block->read_lock));
+		        /* Let us wait until the read operation
+			completes */
+
+		        for (;;) {
+			        mutex_enter(&(buf_pool->mutex));
+
+		                if (block->io_fix == BUF_IO_READ) {
+
+				        mutex_exit(&(buf_pool->mutex));
+				  
+				        /* Sleep 20 milliseconds */
+
+				        os_thread_sleep(20000);
+				} else {
+				  
+				       mutex_exit(&(buf_pool->mutex));
+
+				       break;
+				}
+			}
 		}
 
 		fix_type = MTR_MEMO_BUF_FIX;
@@ -1021,28 +1258,27 @@ buf_page_optimistic_get_func(
 /*=========================*/
 				/* out: TRUE if success */
 	ulint		rw_latch,/* in: RW_S_LATCH, RW_X_LATCH */
-	buf_frame_t*	guess,	/* in: guessed frame */
+	buf_block_t*	block,	/* in: guessed buffer block */
+	buf_frame_t*	guess,	/* in: guessed frame; note that AWE may move
+				frames */
 	dulint		modify_clock,/* in: modify clock value if mode is
 				..._GUESS_ON_CLOCK */
 	char*		file,	/* in: file name */
 	ulint		line,	/* in: line where called */
 	mtr_t*		mtr)	/* in: mini-transaction */
 {
-	buf_block_t*	block;
 	ibool		accessed;
 	ibool		success;
 	ulint		fix_type;
 
-	ut_ad(mtr && guess);
+	ut_ad(mtr && block);
 	ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
-
-	buf_pool->n_page_gets++;
-
-	block = buf_block_align(guess);
 	
 	mutex_enter(&(buf_pool->mutex));
 
-	if (block->state != BUF_BLOCK_FILE_PAGE) {
+	/* If AWE is used, block may have a different frame now, e.g., NULL */
+	
+	if (block->state != BUF_BLOCK_FILE_PAGE || block->frame != guess) {
 
 		mutex_exit(&(buf_pool->mutex));
 
@@ -1135,12 +1371,15 @@ buf_page_optimistic_get_func(
 #ifdef UNIV_IBUF_DEBUG
 	ut_a(ibuf_count_get(block->space, block->offset) == 0);
 #endif
+	buf_pool->n_page_gets++;
+
 	return(TRUE);
 }
 
 /************************************************************************
 This is used to get access to a known database page, when no waiting can be
-done. */
+done. For example, if a search in an adaptive hash index leads us to this
+frame. */
 
 ibool
 buf_page_get_known_nowait(
@@ -1159,13 +1398,11 @@ buf_page_get_known_nowait(
 
 	ut_ad(mtr);
 	ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
-
-	buf_pool->n_page_gets++;
-
-	block = buf_block_align(guess);
 	
 	mutex_enter(&(buf_pool->mutex));
 
+	block = buf_block_align(guess);
+
 	if (block->state == BUF_BLOCK_REMOVE_HASH) {
 	        /* Another thread is just freeing the block from the LRU list
 	        of the buffer pool: do not try to access this page; this
@@ -1179,6 +1416,8 @@ buf_page_get_known_nowait(
 		return(FALSE);
 	}
 
+	ut_a(block->state == BUF_BLOCK_FILE_PAGE);
+
 #ifdef UNIV_SYNC_DEBUG
 	buf_block_buf_fix_inc_debug(block, file, line);
 #else
@@ -1233,6 +1472,8 @@ buf_page_get_known_nowait(
 	ut_a((mode == BUF_KEEP_OLD)
 		|| (ibuf_count_get(block->space, block->offset) == 0));
 #endif
+	buf_pool->n_page_gets++;
+
 	return(TRUE);
 }
 
@@ -1289,7 +1530,7 @@ buf_page_init(
 #ifdef UNIV_SYNC_DEBUG
 	ut_ad(mutex_own(&(buf_pool->mutex)));
 #endif /* UNIV_SYNC_DEBUG */
-	ut_ad(block->state == BUF_BLOCK_READY_FOR_USE);
+	ut_a(block->state != BUF_BLOCK_FILE_PAGE);
 
 	/* Set the state of the block */
 	block->magic_n		= BUF_BLOCK_MAGIC_N;
@@ -1305,6 +1546,19 @@ buf_page_init(
 	
 	/* Insert into the hash table of file pages */
 
+        if (buf_page_hash_get(space, offset)) {
+                fprintf(stderr,
+"InnoDB: Error: page %lu %lu already found from the hash table\n",
+			(ulong) space,
+			(ulong) offset);
+                buf_print();
+                buf_LRU_print();
+                buf_validate();
+                buf_LRU_validate();
+
+                ut_a(0);
+        }
+
 	HASH_INSERT(buf_block_t, hash, buf_pool->page_hash,
 				buf_page_address_fold(space, offset), block);
 
@@ -1328,25 +1582,35 @@ buf_page_init(
 
 /************************************************************************
 Function which inits a page for read to the buffer buf_pool. If the page is
-already in buf_pool, does nothing. Sets the io_fix flag to BUF_IO_READ and
-sets a non-recursive exclusive lock on the buffer frame. The io-handler must
-take care that the flag is cleared and the lock released later. This is one
-of the functions which perform the state transition NOT_USED => FILE_PAGE to
-a block (the other is buf_page_create). */ 
+(1) already in buf_pool, or
+(2) if we specify to read only ibuf pages and the page is not an ibuf page, or
+(3) if the space is deleted or being deleted,
+then this function does nothing.
+Sets the io_fix flag to BUF_IO_READ and sets a non-recursive exclusive lock
+on the buffer frame. The io-handler must take care that the flag is cleared
+and the lock released later. This is one of the functions which perform the
+state transition NOT_USED => FILE_PAGE to a block (the other is
+buf_page_create). */ 
 
 buf_block_t*
 buf_page_init_for_read(
 /*===================*/
-			/* out: pointer to the block or NULL */
-	ulint	mode,	/* in: BUF_READ_IBUF_PAGES_ONLY, ... */
-	ulint	space,	/* in: space id */
-	ulint	offset)	/* in: page number */
+				/* out: pointer to the block or NULL */
+	ulint*		err,	/* out: DB_SUCCESS or DB_TABLESPACE_DELETED */
+	ulint		mode,	/* in: BUF_READ_IBUF_PAGES_ONLY, ... */
+	ulint		space,	/* in: space id */
+	ib_longlong	tablespace_version,/* in: prevents reading from a wrong
+				version of the tablespace in case we have done
+				DISCARD + IMPORT */
+	ulint		offset)	/* in: page number */
 {
 	buf_block_t*	block;
 	mtr_t		mtr;
-	
+
 	ut_ad(buf_pool);
 
+	*err = DB_SUCCESS;
+
 	if (mode == BUF_READ_IBUF_PAGES_ONLY) {
 		/* It is a read-ahead within an ibuf routine */
 
@@ -1367,13 +1631,20 @@ buf_page_init_for_read(
 	
 	block = buf_block_alloc();
 
-	ut_ad(block);
+	ut_a(block);
 
 	mutex_enter(&(buf_pool->mutex));
-	
-	if (NULL != buf_page_hash_get(space, offset)) {
 
-		/* The page is already in buf_pool, return */
+	if (fil_tablespace_deleted_or_being_deleted_in_mem(space,
+							tablespace_version)) {
+		*err = DB_TABLESPACE_DELETED;
+	}
+
+	if (*err == DB_TABLESPACE_DELETED
+	    || NULL != buf_page_hash_get(space, offset)) {
+
+		/* The page belongs to a space which has been deleted or is
+		being deleted, or the page is already in buf_pool, return */
 
 		mutex_exit(&(buf_pool->mutex));
 		buf_block_free(block);
@@ -1405,8 +1676,6 @@ buf_page_init_for_read(
 	is completed. The x-lock is cleared by the io-handler thread. */
 	
 	rw_lock_x_lock_gen(&(block->lock), BUF_IO_READ);
-
-	rw_lock_x_lock_gen(&(block->read_lock), BUF_IO_READ);
 	
  	mutex_exit(&(buf_pool->mutex));
 
@@ -1464,8 +1733,9 @@ buf_page_create(
 	/* If we get here, the page was not in buf_pool: init it there */
 
 	if (buf_debug_prints) {
-		printf("Creating space %lu page %lu to buffer\n", space,
-								offset);
+		printf("Creating space %lu page %lu to buffer\n",
+		       (ulong) space,
+		       (ulong) offset);
 	}
 
 	block = free_block;
@@ -1491,7 +1761,7 @@ buf_page_create(
 	/* Delete possible entries for the page from the insert buffer:
 	such can exist if the page belonged to an index which was dropped */
 
-	ibuf_merge_or_delete_for_page(NULL, space, offset);	
+	ibuf_merge_or_delete_for_page(NULL, space, offset, TRUE);
 
 	/* Flush pages from the end of the LRU list if necessary */
 	buf_flush_free_margin();
@@ -1526,6 +1796,8 @@ buf_page_io_complete(
 	
 	ut_ad(block);
 
+	ut_a(block->state == BUF_BLOCK_FILE_PAGE);
+
 	io_type = block->io_fix;
 
 	if (io_type == BUF_IO_READ) {
@@ -1541,7 +1813,7 @@ buf_page_io_complete(
 
 			fprintf(stderr,
 "InnoDB: Error: page n:o stored in the page read in is %lu, should be %lu!\n",
-				read_page_no, block->offset);
+				(ulong) read_page_no, (ulong) block->offset);
 		}
 		/* From version 3.23.38 up we store the page checksum
 		   to the 4 first bytes of the page end lsn field */
@@ -1549,7 +1821,7 @@ buf_page_io_complete(
 		if (buf_page_is_corrupted(block->frame)) {
 		  	fprintf(stderr,
 		"InnoDB: Database page corruption on disk or a failed\n"
-		"InnoDB: file read of page %lu.\n", block->offset);
+		"InnoDB: file read of page %lu.\n", (ulong) block->offset);
 			  
 		  	fprintf(stderr,
 		"InnoDB: You may have to recover from a backup.\n");
@@ -1558,7 +1830,7 @@ buf_page_io_complete(
 
 		  	fprintf(stderr,
 		"InnoDB: Database page corruption on disk or a failed\n"
-		"InnoDB: file read of page %lu.\n", block->offset);
+		"InnoDB: file read of page %lu.\n", (ulong) block->offset);
 		  	fprintf(stderr,
 		"InnoDB: You may have to recover from a backup.\n");
 			fprintf(stderr,
@@ -1589,7 +1861,7 @@ buf_page_io_complete(
 
 		if (!recv_no_ibuf_operations) {
 			ibuf_merge_or_delete_for_page(block->frame,
-						block->space, block->offset);
+					block->space, block->offset, TRUE);
 		}
 	}
 	
@@ -1614,9 +1886,7 @@ buf_page_io_complete(
 		buf_pool->n_pend_reads--;
 		buf_pool->n_pages_read++;
 
-
 		rw_lock_x_unlock_gen(&(block->lock), BUF_IO_READ);
-		rw_lock_x_unlock_gen(&(block->read_lock), BUF_IO_READ);
 
 		if (buf_debug_prints) {
 			printf("Has read ");
@@ -1641,8 +1911,8 @@ buf_page_io_complete(
 	mutex_exit(&(buf_pool->mutex));
 
 	if (buf_debug_prints) {
-		printf("page space %lu page no %lu", block->space,
-								block->offset);
+		printf("page space %lu page no %lu", (ulong) block->space,
+						     (ulong) block->offset);
 		id = btr_page_get_index_id(block->frame);
 
 		index = NULL;
@@ -1757,14 +2027,16 @@ buf_validate(void)
  	}
 
 	if (n_lru + n_free > buf_pool->curr_size) {
-		printf("n LRU %lu, n free %lu\n", n_lru, n_free);
+		printf("n LRU %lu, n free %lu\n", (ulong) n_lru,
+		       (ulong) n_free);
 		ut_error;
 	}
 
 	ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == n_lru);
 	if (UT_LIST_GET_LEN(buf_pool->free) != n_free) {
 		printf("Free list len %lu, free blocks %lu\n",
-		    UT_LIST_GET_LEN(buf_pool->free), n_free);
+		       (ulong) UT_LIST_GET_LEN(buf_pool->free),
+		       (ulong) n_free);
 		ut_error;
 	}
 	ut_a(UT_LIST_GET_LEN(buf_pool->flush_list) == n_flush);
@@ -1800,29 +2072,30 @@ buf_print(void)
 	
 	ut_ad(buf_pool);
 
-	size = buf_pool_get_curr_size() / UNIV_PAGE_SIZE;
+	size = buf_pool->curr_size;
 
 	index_ids = mem_alloc(sizeof(dulint) * size);
 	counts = mem_alloc(sizeof(ulint) * size);
 
 	mutex_enter(&(buf_pool->mutex));
 	
-	printf("buf_pool size %lu \n", size);
-	printf("database pages %lu \n", UT_LIST_GET_LEN(buf_pool->LRU));
-	printf("free pages %lu \n", UT_LIST_GET_LEN(buf_pool->free));
+	printf("buf_pool size %lu \n", (ulong) size);
+	printf("database pages %lu \n", (ulong) UT_LIST_GET_LEN(buf_pool->LRU));
+	printf("free pages %lu \n", (ulong) UT_LIST_GET_LEN(buf_pool->free));
 	printf("modified database pages %lu \n",
-				UT_LIST_GET_LEN(buf_pool->flush_list));
+				(ulong) UT_LIST_GET_LEN(buf_pool->flush_list));
 
-	printf("n pending reads %lu \n", buf_pool->n_pend_reads);
+	printf("n pending reads %lu \n", (ulong) buf_pool->n_pend_reads);
 
 	printf("n pending flush LRU %lu list %lu single page %lu\n",
-		buf_pool->n_flush[BUF_FLUSH_LRU],
-		buf_pool->n_flush[BUF_FLUSH_LIST],
-		buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]);
+	       (ulong) buf_pool->n_flush[BUF_FLUSH_LRU],
+	       (ulong) buf_pool->n_flush[BUF_FLUSH_LIST],
+	       (ulong) buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]);
 
 	printf("pages read %lu, created %lu, written %lu\n",
-			buf_pool->n_pages_read, buf_pool->n_pages_created,
-						buf_pool->n_pages_written);
+	       (ulong) buf_pool->n_pages_read,
+	       (ulong) buf_pool->n_pages_created,
+	       (ulong) buf_pool->n_pages_written);
 
 	/* Count the number of blocks belonging to each index in the buffer */
 	
@@ -1866,7 +2139,8 @@ buf_print(void)
 		index = dict_index_get_if_in_cache(index_ids[i]);
 
 		printf("Block count for index %lu in buffer is about %lu",
-			ut_dulint_get_low(index_ids[i]), counts[i]);
+		       (ulong) ut_dulint_get_low(index_ids[i]),
+		       (ulong) counts[i]);
 
 		if (index) {
 			printf(" index name %s table %s", index->name,
@@ -1938,35 +2212,44 @@ buf_print_io(
 		return;
 	}
 
-	size = buf_pool_get_curr_size() / UNIV_PAGE_SIZE;
+	size = buf_pool->curr_size;
 
 	mutex_enter(&(buf_pool->mutex));
 	
 	buf += sprintf(buf,
-		"Buffer pool size   %lu\n", size);
+		"Buffer pool size   %lu\n", (ulong) size);
 	buf += sprintf(buf,
-		"Free buffers       %lu\n", UT_LIST_GET_LEN(buf_pool->free));
+		"Free buffers       %lu\n", (ulong) UT_LIST_GET_LEN(buf_pool->free));
 	buf += sprintf(buf,
-		"Database pages     %lu\n", UT_LIST_GET_LEN(buf_pool->LRU));
+		"Database pages     %lu\n", (ulong) UT_LIST_GET_LEN(buf_pool->LRU));
 /*
 	buf += sprintf(buf,
-		"Lock heap buffers  %lu\n", buf_pool->n_lock_heap_pages);
+		"Lock heap buffers  %lu\n", (ulong) buf_pool->n_lock_heap_pages);
 	buf += sprintf(buf,
-		"Hash index buffers %lu\n", buf_pool->n_adaptive_hash_pages);
+		"Hash index buffers %lu\n", (ulong) buf_pool->n_adaptive_hash_pages);
 */
 	buf += sprintf(buf,
 		"Modified db pages  %lu\n",
-				UT_LIST_GET_LEN(buf_pool->flush_list));
+				(ulong) UT_LIST_GET_LEN(buf_pool->flush_list));
+	if (srv_use_awe) {
+		buf += sprintf(buf,
+		"AWE: Buffer pool memory frames                        %lu\n",
+				(ulong) buf_pool->n_frames);
+		
+		buf += sprintf(buf,
+		"AWE: Database pages and free buffers mapped in frames %lu\n",
+				(ulong) UT_LIST_GET_LEN(buf_pool->awe_LRU_free_mapped));
+	}
 
-	buf += sprintf(buf, "Pending reads %lu \n", buf_pool->n_pend_reads);
+	buf += sprintf(buf, "Pending reads %lu \n", (ulong) buf_pool->n_pend_reads);
 
 	buf += sprintf(buf,
 		"Pending writes: LRU %lu, flush list %lu, single page %lu\n",
-		buf_pool->n_flush[BUF_FLUSH_LRU]
-				+ buf_pool->init_flush[BUF_FLUSH_LRU],
-		buf_pool->n_flush[BUF_FLUSH_LIST]
-				+ buf_pool->init_flush[BUF_FLUSH_LIST],
-		buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]);
+		(ulong) (buf_pool->n_flush[BUF_FLUSH_LRU]
+				+ buf_pool->init_flush[BUF_FLUSH_LRU]),
+		(ulong) (buf_pool->n_flush[BUF_FLUSH_LIST]
+				+ buf_pool->init_flush[BUF_FLUSH_LIST]),
+		(ulong) buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]);
 
 	current_time = time(NULL);
 	time_elapsed = 0.001 + difftime(current_time,
@@ -1974,8 +2257,9 @@ buf_print_io(
 	buf_pool->last_printout_time = current_time;
 
 	buf += sprintf(buf, "Pages read %lu, created %lu, written %lu\n",
-			buf_pool->n_pages_read, buf_pool->n_pages_created,
-						buf_pool->n_pages_written);
+		       (ulong) buf_pool->n_pages_read,
+		       (ulong) buf_pool->n_pages_created,
+		       (ulong) buf_pool->n_pages_written);
 	buf += sprintf(buf, "%.2f reads/s, %.2f creates/s, %.2f writes/s\n",
 		(buf_pool->n_pages_read - buf_pool->n_pages_read_old)
 		/ time_elapsed,
@@ -1984,12 +2268,19 @@ buf_print_io(
 		(buf_pool->n_pages_written - buf_pool->n_pages_written_old)
 		/ time_elapsed);
 
+	if (srv_use_awe) {
+		buf += sprintf(buf, "AWE: %.2f page remaps/s\n",
+		(buf_pool->n_pages_awe_remapped
+				- buf_pool->n_pages_awe_remapped_old)
+			/ time_elapsed);
+	}
+		
 	if (buf_pool->n_page_gets > buf_pool->n_page_gets_old) {
 		buf += sprintf(buf, "Buffer pool hit rate %lu / 1000\n",
-		1000
+       (ulong) (1000
 		- ((1000 *
 		    (buf_pool->n_pages_read - buf_pool->n_pages_read_old))
-		/ (buf_pool->n_page_gets - buf_pool->n_page_gets_old)));
+		/ (buf_pool->n_page_gets - buf_pool->n_page_gets_old))));
 	} else {
 		buf += sprintf(buf,
 			"No buffer pool page gets since the last printout\n");
@@ -1999,6 +2290,7 @@ buf_print_io(
 	buf_pool->n_pages_read_old = buf_pool->n_pages_read;
 	buf_pool->n_pages_created_old = buf_pool->n_pages_created;
 	buf_pool->n_pages_written_old = buf_pool->n_pages_written;
+	buf_pool->n_pages_awe_remapped_old = buf_pool->n_pages_awe_remapped;
 
 	mutex_exit(&(buf_pool->mutex));
 }
@@ -2015,6 +2307,7 @@ buf_refresh_io_stats(void)
 	buf_pool->n_pages_read_old = buf_pool->n_pages_read;
 	buf_pool->n_pages_created_old = buf_pool->n_pages_created;
 	buf_pool->n_pages_written_old = buf_pool->n_pages_written;
+	buf_pool->n_pages_awe_remapped_old = buf_pool->n_pages_awe_remapped; 
 }
 
 /*************************************************************************
@@ -2039,7 +2332,7 @@ buf_all_freed(void)
 
 			if (!buf_flush_ready_for_replace(block)) {
 
-			    	/* printf("Page %lu %lu still fixed or dirty\n",
+			     /* printf("Page %lu %lu still fixed or dirty\n",
 			    		block->space, block->offset); */
 			    	ut_error;
 			}
diff --git a/innobase/buf/buf0flu.c b/innobase/buf/buf0flu.c
index 66c9bb605dc..c568d5925fa 100644
--- a/innobase/buf/buf0flu.c
+++ b/innobase/buf/buf0flu.c
@@ -24,6 +24,7 @@ Created 11/11/1995 Heikki Tuuri
 #include "log0log.h"
 #include "os0file.h"
 #include "trx0sys.h"
+#include "srv0srv.h"
 
 /* When flushed, dirty blocks are searched in neigborhoods of this size, and
 flushed along with the original page. */
@@ -51,6 +52,8 @@ buf_flush_insert_into_flush_list(
 	ut_ad(mutex_own(&(buf_pool->mutex)));
 #endif /* UNIV_SYNC_DEBUG */
 
+	ut_a(block->state == BUF_BLOCK_FILE_PAGE);
+
 	ut_ad((UT_LIST_GET_FIRST(buf_pool->flush_list) == NULL)
 	      || (ut_dulint_cmp(
 			(UT_LIST_GET_FIRST(buf_pool->flush_list))
@@ -107,7 +110,7 @@ buf_flush_ready_for_replace(
 /*========================*/
 				/* out: TRUE if can replace immediately */
 	buf_block_t*	block)	/* in: buffer control block, must be in state
-				BUF_BLOCK_FILE_PAGE and in the LRU list*/
+				BUF_BLOCK_FILE_PAGE and in the LRU list */
 {
 #ifdef UNIV_SYNC_DEBUG
 	ut_ad(mutex_own(&(buf_pool->mutex)));
@@ -138,11 +141,10 @@ buf_flush_ready_for_flush(
 #ifdef UNIV_SYNC_DEBUG
 	ut_ad(mutex_own(&(buf_pool->mutex)));
 #endif /* UNIV_SYNC_DEBUG */
-	ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
+	ut_a(block->state == BUF_BLOCK_FILE_PAGE);
 
 	if ((ut_dulint_cmp(block->oldest_modification, ut_dulint_zero) > 0)
 	    					&& (block->io_fix == 0)) {
-
 	    	if (flush_type != BUF_FLUSH_LRU) {
 
 			return(TRUE);
@@ -172,6 +174,8 @@ buf_flush_write_complete(
 #ifdef UNIV_SYNC_DEBUG
 	ut_ad(mutex_own(&(buf_pool->mutex)));
 #endif /* UNIV_SYNC_DEBUG */
+	ut_a(block->state == BUF_BLOCK_FILE_PAGE);
+
 	block->oldest_modification = ut_dulint_zero;
 
 	UT_LIST_REMOVE(flush_list, buf_pool->flush_list, block);
@@ -251,7 +255,7 @@ buf_flush_buffered_writes(void)
 	"InnoDB: to be written to data file. We intentionally crash server\n"
 	"InnoDB: to prevent corrupt data from ending up in data\n"
 	"InnoDB: files.\n",
-			block->offset, block->space);
+			(ulong) block->offset, (ulong) block->space);
 
 			ut_error;
 		}
@@ -291,6 +295,8 @@ buf_flush_buffered_writes(void)
 	for (i = 0; i < trx_doublewrite->first_free; i++) {
 		block = trx_doublewrite->buf_block_arr[i];
 
+		ut_a(block->state == BUF_BLOCK_FILE_PAGE);
+
 		fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
 			FALSE, block->space, block->offset, 0, UNIV_PAGE_SIZE,
 		 			(void*)block->frame, (void*)block);
@@ -330,6 +336,8 @@ buf_flush_post_to_doublewrite_buf(
 try_again:
 	mutex_enter(&(trx_doublewrite->mutex));
 
+	ut_a(block->state == BUF_BLOCK_FILE_PAGE);
+
 	if (trx_doublewrite->first_free
 				>= 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
 		mutex_exit(&(trx_doublewrite->mutex));
@@ -370,16 +378,15 @@ buf_flush_init_for_writing(
 	ulint	space,		/* in: space id */
 	ulint	page_no)	/* in: page number */
 {	
-	UT_NOT_USED(space);
-
 	/* Write the newest modification lsn to the page header and trailer */
 	mach_write_to_8(page + FIL_PAGE_LSN, newest_lsn);
 
 	mach_write_to_8(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
 								newest_lsn);
-	/* Write the page number */
+	/* Write the page number and the space id */
 
 	mach_write_to_4(page + FIL_PAGE_OFFSET, page_no);
+        mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, space);
 
 	/* Store the new formula checksum */
 
@@ -405,6 +412,8 @@ buf_flush_write_block_low(
 /*======================*/
 	buf_block_t*	block)	/* in: buffer block to write */
 {
+	ut_a(block->state == BUF_BLOCK_FILE_PAGE);
+
 #ifdef UNIV_IBUF_DEBUG
 	ut_a(ibuf_count_get(block->space, block->offset) == 0);
 #endif
@@ -453,12 +462,26 @@ buf_flush_try_page(
 
 	block = buf_page_hash_get(space, offset);
 
-	ut_a(block->state == BUF_BLOCK_FILE_PAGE);
+	ut_a(!block || block->state == BUF_BLOCK_FILE_PAGE);
 
 	if (flush_type == BUF_FLUSH_LIST
 	    && block && buf_flush_ready_for_flush(block, flush_type)) {
 	
 		block->io_fix = BUF_IO_WRITE;
+
+		/* If AWE is enabled and the page is not mapped to a frame,
+		then map it */
+
+		if (block->frame == NULL) {
+			ut_a(srv_use_awe);
+
+			/* We set second parameter TRUE because the block is
+			in the LRU list and we must put it to
+			awe_LRU_free_mapped list once mapped to a frame */
+		
+			buf_awe_map_page_to_frame(block, TRUE);
+		}
+
 		block->flush_type = flush_type;
 
 		if (buf_pool->n_flush[flush_type] == 0) {
@@ -490,7 +513,8 @@ buf_flush_try_page(
 
 		if (buf_debug_prints) {
 			printf("Flushing page space %lu, page no %lu \n",
-					block->space, block->offset);
+			       (ulong) block->space,
+			       (ulong) block->offset);
 		}
 
 		buf_flush_write_block_low(block);
@@ -509,6 +533,20 @@ buf_flush_try_page(
 		..._ready_for_flush). */
 
 		block->io_fix = BUF_IO_WRITE;
+
+		/* If AWE is enabled and the page is not mapped to a frame,
+		then map it */
+
+		if (block->frame == NULL) {
+			ut_a(srv_use_awe);
+
+			/* We set second parameter TRUE because the block is
+			in the LRU list and we must put it to
+			awe_LRU_free_mapped list once mapped to a frame */
+		
+			buf_awe_map_page_to_frame(block, TRUE);
+		}
+
 		block->flush_type = flush_type;
 
 		if (buf_pool->n_flush[flush_type] == 0) {
@@ -534,6 +572,20 @@ buf_flush_try_page(
 			&& buf_flush_ready_for_flush(block, flush_type)) {
 	
 		block->io_fix = BUF_IO_WRITE;
+
+		/* If AWE is enabled and the page is not mapped to a frame,
+		then map it */
+
+		if (block->frame == NULL) {
+			ut_a(srv_use_awe);
+
+			/* We set second parameter TRUE because the block is
+			in the LRU list and we must put it to
+			awe_LRU_free_mapped list once mapped to a frame */
+		
+			buf_awe_map_page_to_frame(block, TRUE);
+		}
+
 		block->flush_type = flush_type;
 
 		if (buf_pool->n_flush[block->flush_type] == 0) {
@@ -550,7 +602,8 @@ buf_flush_try_page(
 		if (buf_debug_prints) {
 			printf(
 			"Flushing single page space %lu, page no %lu \n",
-						block->space, block->offset);
+						(ulong) block->space,
+			                        (ulong) block->offset);
 		}
 
 		buf_flush_write_block_low(block);
@@ -603,6 +656,7 @@ buf_flush_try_neighbors(
 	for (i = low; i < high; i++) {
 
 		block = buf_page_hash_get(space, i);
+		ut_a(!block || block->state == BUF_BLOCK_FILE_PAGE);
 
 		if (block && flush_type == BUF_FLUSH_LRU && i != offset
 		    && !block->old) {
@@ -671,10 +725,10 @@ buf_flush_batch(
 	ulint		offset;
 	ibool		found;
 	
-	ut_ad((flush_type == BUF_FLUSH_LRU) || (flush_type == BUF_FLUSH_LIST)); 
-	ut_ad((flush_type != BUF_FLUSH_LIST) ||
-					sync_thread_levels_empty_gen(TRUE));
-
+	ut_ad((flush_type == BUF_FLUSH_LRU)
+					|| (flush_type == BUF_FLUSH_LIST)); 
+	ut_ad((flush_type != BUF_FLUSH_LIST)
+					|| sync_thread_levels_empty_gen(TRUE));
 	mutex_enter(&(buf_pool->mutex));
 
 	if ((buf_pool->n_flush[flush_type] > 0)
@@ -705,7 +759,6 @@ buf_flush_batch(
 			ut_ad(flush_type == BUF_FLUSH_LIST);
 
 			block = UT_LIST_GET_LAST(buf_pool->flush_list);
-
 			if (!block
 			    || (ut_dulint_cmp(block->oldest_modification,
 			    				lsn_limit) >= 0)) {
@@ -724,6 +777,7 @@ buf_flush_batch(
 		function a pointer to a block in the list! */
 
 	    	while ((block != NULL) && !found) {
+			ut_a(block->state == BUF_BLOCK_FILE_PAGE);
 
 			if (buf_flush_ready_for_flush(block, flush_type)) {
 
@@ -749,7 +803,6 @@ buf_flush_batch(
 			} else if (flush_type == BUF_FLUSH_LRU) {
 
 				block = UT_LIST_GET_PREV(LRU, block);
-
 			} else {
 				ut_ad(flush_type == BUF_FLUSH_LIST);
 
@@ -781,10 +834,10 @@ buf_flush_batch(
 	if (buf_debug_prints && page_count > 0) {
 		if (flush_type == BUF_FLUSH_LRU) {
 			printf("Flushed %lu pages in LRU flush\n",
-						page_count);
+						(ulong) page_count);
 		} else if (flush_type == BUF_FLUSH_LIST) {
 			printf("Flushed %lu pages in flush list flush\n",
-						page_count);
+						(ulong) page_count);
 		} else {
 			ut_error;
 		}
diff --git a/innobase/buf/buf0lru.c b/innobase/buf/buf0lru.c
index 0128ee87871..c5faec17890 100644
--- a/innobase/buf/buf0lru.c
+++ b/innobase/buf/buf0lru.c
@@ -62,6 +62,90 @@ buf_LRU_block_free_hashed_page(
 				be in a state where it can be freed */
 
 /**********************************************************************
+Invalidates all pages belonging to a given tablespace when we are deleting
+the data file(s) of that tablespace. */
+
+void
+buf_LRU_invalidate_tablespace(
+/*==========================*/
+	ulint	id)	/* in: space id */
+{
+	buf_block_t*	block;
+	ulint		page_no;
+	ibool		all_freed;
+
+scan_again:
+	mutex_enter(&(buf_pool->mutex));
+	
+	all_freed = TRUE;
+	
+	block = UT_LIST_GET_LAST(buf_pool->LRU);
+
+	while (block != NULL) {
+	        ut_a(block->state == BUF_BLOCK_FILE_PAGE);
+
+		if (block->space == id
+		    && (block->buf_fix_count > 0 || block->io_fix != 0)) {
+
+			/* We cannot remove this page during this scan yet;
+			maybe the system is currently reading it in, or
+			flushing the modifications to the file */
+			
+			all_freed = FALSE;
+
+			goto next_page;
+		}
+
+		if (block->space == id) {
+			if (buf_debug_prints) {
+				printf(
+				"Dropping space %lu page %lu\n",
+					(ulong) block->space,
+				        (ulong) block->offset);
+			}
+
+			if (block->is_hashed) {
+				page_no = block->offset;
+			
+				mutex_exit(&(buf_pool->mutex));
+
+				/* Note that the following call will acquire
+				an S-latch on the page */
+
+				btr_search_drop_page_hash_when_freed(id,
+								page_no);
+				goto scan_again;
+			}
+
+			if (0 != ut_dulint_cmp(block->oldest_modification,
+							ut_dulint_zero)) {
+
+				/* Remove from the flush list of modified
+				blocks */
+				block->oldest_modification = ut_dulint_zero;
+
+				UT_LIST_REMOVE(flush_list, 
+						buf_pool->flush_list, block);
+			}
+
+			/* Remove from the LRU list */
+			buf_LRU_block_remove_hashed_page(block);
+			buf_LRU_block_free_hashed_page(block);
+		}
+next_page:
+		block = UT_LIST_GET_PREV(LRU, block);
+	}
+
+	mutex_exit(&(buf_pool->mutex));
+	
+	if (!all_freed) {
+		os_thread_sleep(20000);
+
+	        goto scan_again;
+	}
+}
+
+/**********************************************************************
 Gets the minimum LRU_position field for the blocks in an initial segment
 (determined by BUF_LRU_INITIAL_RATIO) of the LRU list. The limit is not
 guaranteed to be precise, because the ulint_clock may wrap around. */
@@ -118,43 +202,43 @@ buf_LRU_search_and_free_block(
 	mutex_enter(&(buf_pool->mutex));
 	
 	freed = FALSE;
-	
 	block = UT_LIST_GET_LAST(buf_pool->LRU);
 
 	while (block != NULL) {
-
+	        ut_a(block->in_LRU_list);
 		if (buf_flush_ready_for_replace(block)) {
-
 			if (buf_debug_prints) {
 				printf(
 				"Putting space %lu page %lu to free list\n",
-					block->space, block->offset);
+					(ulong) block->space,
+				        (ulong) block->offset);
 			}
-			
 			buf_LRU_block_remove_hashed_page(block);
 
 			mutex_exit(&(buf_pool->mutex));
 
-			btr_search_drop_page_hash_index(block->frame);
-
+			/* Remove possible adaptive hash index built on the
+			page; in the case of AWE the block may not have a
+			frame at all */
+			
+			if (block->frame) {
+				btr_search_drop_page_hash_index(block->frame);
+			}
 			mutex_enter(&(buf_pool->mutex));
 
 			ut_a(block->buf_fix_count == 0);
 
 			buf_LRU_block_free_hashed_page(block);
-
 			freed = TRUE;
 
 			break;
 		}
-
 		block = UT_LIST_GET_PREV(LRU, block);
 		distance++;
 
 		if (!freed && n_iterations <= 10
 		    && distance > 100 + (n_iterations * buf_pool->curr_size)
 					/ 10) {
-
 			buf_pool->LRU_flush_ended = 0;
 
 			mutex_exit(&(buf_pool->mutex));
@@ -162,15 +246,12 @@ buf_LRU_search_and_free_block(
 			return(FALSE);
 		}
 	}
-
 	if (buf_pool->LRU_flush_ended > 0) {
 		buf_pool->LRU_flush_ended--;
 	}
- 
-	if (!freed) {
+ 	if (!freed) {
 		buf_pool->LRU_flush_ended = 0;
 	}
-
 	mutex_exit(&(buf_pool->mutex));
 	
 	return(freed);
@@ -211,7 +292,9 @@ list. */
 buf_block_t*
 buf_LRU_get_free_block(void)
 /*========================*/
-				/* out: the free control block */
+				/* out: the free control block; also if AWE is
+				used, it is guaranteed that the block has its
+				page mapped to a frame when we return */
 {
 	buf_block_t*	block		= NULL;
 	ibool		freed;
@@ -254,7 +337,7 @@ loop:
 "InnoDB: the buffer pool bigger?\n"
 "InnoDB: Starting the InnoDB Monitor to print diagnostics, including\n"
 "InnoDB: lock heap and hash index sizes.\n",
-		(ulong)(buf_pool->curr_size / (1024 * 1024 / UNIV_PAGE_SIZE)));
+			(ulong) (buf_pool->curr_size / (1024 * 1024 / UNIV_PAGE_SIZE)));
 
 			srv_print_innodb_monitor = TRUE;
 			os_event_set(srv_lock_timeout_thread_event);
@@ -273,7 +356,27 @@ loop:
 	if (UT_LIST_GET_LEN(buf_pool->free) > 0) {
 		
 		block = UT_LIST_GET_FIRST(buf_pool->free);
+		ut_a(block->in_free_list);
 		UT_LIST_REMOVE(free, buf_pool->free, block);
+		block->in_free_list = FALSE;
+		ut_a(block->state != BUF_BLOCK_FILE_PAGE);
+	        ut_a(!block->in_LRU_list);
+
+		if (srv_use_awe) {
+			if (block->frame) {
+				/* Remove from the list of mapped pages */
+		
+				UT_LIST_REMOVE(awe_LRU_free_mapped,
+					buf_pool->awe_LRU_free_mapped, block);
+			} else {
+				/* We map the page to a frame; second param
+				FALSE below because we do not want it to be
+				added to the awe_LRU_free_mapped list */
+
+				buf_awe_map_page_to_frame(block, FALSE);
+			}
+		}
+		
 		block->state = BUF_BLOCK_READY_FOR_USE;
 
 		mutex_exit(&(buf_pool->mutex));
@@ -302,7 +405,7 @@ loop:
 		"InnoDB: Warning: difficult to find free blocks from\n"
 		"InnoDB: the buffer pool (%lu search iterations)! Consider\n"
 		"InnoDB: increasing the buffer pool size.\n",
-						n_iterations);
+						(ulong) n_iterations);
 		fprintf(stderr,
 		"InnoDB: It is also possible that in your Unix version\n"
 		"InnoDB: fsync is very slow, or completely frozen inside\n"
@@ -312,11 +415,13 @@ loop:
 
 		fprintf(stderr,
 		"InnoDB: Pending flushes (fsync) log: %lu; buffer pool: %lu\n",
-	       			fil_n_pending_log_flushes,
-				fil_n_pending_tablespace_flushes);
+			(ulong) fil_n_pending_log_flushes,
+			(ulong) fil_n_pending_tablespace_flushes);
 		fprintf(stderr,
 	"InnoDB: %lu OS file reads, %lu OS file writes, %lu OS fsyncs\n",
-			os_n_file_reads, os_n_file_writes, os_n_fsyncs);
+			(ulong) os_n_file_reads,
+			(ulong) os_n_file_writes,
+			(ulong) os_n_fsyncs);
 
 		fprintf(stderr,
 		"InnoDB: Starting InnoDB Monitor to print further\n"
@@ -369,7 +474,7 @@ buf_LRU_old_adjust_len(void)
 	ulint	old_len;
 	ulint	new_len;
 
-	ut_ad(buf_pool->LRU_old);
+	ut_a(buf_pool->LRU_old);
 #ifdef UNIV_SYNC_DEBUG
 	ut_ad(mutex_own(&(buf_pool->mutex)));
 #endif /* UNIV_SYNC_DEBUG */
@@ -379,6 +484,8 @@ buf_LRU_old_adjust_len(void)
 		old_len = buf_pool->LRU_old_len;
 		new_len = 3 * (UT_LIST_GET_LEN(buf_pool->LRU) / 8);
 
+		ut_a(buf_pool->LRU_old->in_LRU_list);
+
 		/* Update the LRU_old pointer if necessary */
 	
 		if (old_len < new_len - BUF_LRU_OLD_TOLERANCE) {
@@ -395,7 +502,7 @@ buf_LRU_old_adjust_len(void)
 							buf_pool->LRU_old);
 			buf_pool->LRU_old_len--;
 		} else {
-			ut_ad(buf_pool->LRU_old); /* Check that we did not
+			ut_a(buf_pool->LRU_old); /* Check that we did not
 						fall out of the LRU list */
 			return;
 		}
@@ -403,9 +510,8 @@ buf_LRU_old_adjust_len(void)
 }
 
 /***********************************************************************
-Initializes the old blocks pointer in the LRU list.
-This function should be called when the LRU list grows to
-BUF_LRU_OLD_MIN_LEN length. */
+Initializes the old blocks pointer in the LRU list. This function should be
+called when the LRU list grows to BUF_LRU_OLD_MIN_LEN length. */
 static
 void
 buf_LRU_old_init(void)
@@ -413,7 +519,7 @@ buf_LRU_old_init(void)
 {
 	buf_block_t*	block;
 
-	ut_ad(UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN);
+	ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN);
 
 	/* We first initialize all blocks in the LRU list as old and then use
 	the adjust function to move the LRU_old pointer to the right
@@ -422,6 +528,8 @@ buf_LRU_old_init(void)
 	block = UT_LIST_GET_FIRST(buf_pool->LRU);
 
 	while (block != NULL) {
+		ut_a(block->state == BUF_BLOCK_FILE_PAGE);
+	        ut_a(block->in_LRU_list);
 		block->old = TRUE;
 		block = UT_LIST_GET_NEXT(LRU, block);
 	}
@@ -446,6 +554,9 @@ buf_LRU_remove_block(
 	ut_ad(mutex_own(&(buf_pool->mutex)));
 #endif /* UNIV_SYNC_DEBUG */
 		
+	ut_a(block->state == BUF_BLOCK_FILE_PAGE);
+	ut_a(block->in_LRU_list);
+
 	/* If the LRU_old pointer is defined and points to just this block,
 	move it backward one step */
 
@@ -459,11 +570,19 @@ buf_LRU_remove_block(
 		(buf_pool->LRU_old)->old = TRUE;
 
 		buf_pool->LRU_old_len++;
-		ut_ad(buf_pool->LRU_old);
+		ut_a(buf_pool->LRU_old);
 	}
 
 	/* Remove the block from the LRU list */
 	UT_LIST_REMOVE(LRU, buf_pool->LRU, block);
+	block->in_LRU_list = FALSE;
+
+	if (srv_use_awe && block->frame) {
+		/* Remove from the list of mapped pages */
+		
+		UT_LIST_REMOVE(awe_LRU_free_mapped,
+					buf_pool->awe_LRU_free_mapped, block);
+	}	
 
 	/* If the LRU list is so short that LRU_old not defined, return */
 	if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN) {
@@ -501,6 +620,8 @@ buf_LRU_add_block_to_end_low(
 	ut_ad(mutex_own(&(buf_pool->mutex)));
 #endif /* UNIV_SYNC_DEBUG */
 
+	ut_a(block->state == BUF_BLOCK_FILE_PAGE);
+
 	block->old = TRUE;
 
 	last_block = UT_LIST_GET_LAST(buf_pool->LRU);
@@ -511,8 +632,17 @@ buf_LRU_add_block_to_end_low(
 		block->LRU_position = buf_pool_clock_tic();
 	}			
 
+	ut_a(!block->in_LRU_list);
 	UT_LIST_ADD_LAST(LRU, buf_pool->LRU, block);
+	block->in_LRU_list = TRUE;
 
+	if (srv_use_awe && block->frame) {
+		/* Add to the list of mapped pages */
+		
+		UT_LIST_ADD_LAST(awe_LRU_free_mapped,
+					buf_pool->awe_LRU_free_mapped, block);
+	}
+	
 	if (UT_LIST_GET_LEN(buf_pool->LRU) >= BUF_LRU_OLD_MIN_LEN) {
 
 		buf_pool->LRU_old_len++;
@@ -555,9 +685,21 @@ buf_LRU_add_block_low(
 	ut_ad(mutex_own(&(buf_pool->mutex)));
 #endif /* UNIV_SYNC_DEBUG */
 
+	ut_a(block->state == BUF_BLOCK_FILE_PAGE);
+	ut_a(!block->in_LRU_list);
+
 	block->old = old;
 	cl = buf_pool_clock_tic();
 
+	if (srv_use_awe && block->frame) {
+		/* Add to the list of mapped pages; for simplicity we always
+		add to the start, even if the user would have set 'old'
+		TRUE */
+		
+		UT_LIST_ADD_FIRST(awe_LRU_free_mapped,
+					buf_pool->awe_LRU_free_mapped, block);
+	}
+
 	if (!old || (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN)) {
 
 		UT_LIST_ADD_FIRST(LRU, buf_pool->LRU, block);
@@ -575,6 +717,8 @@ buf_LRU_add_block_low(
 		block->LRU_position = (buf_pool->LRU_old)->LRU_position;
 	}
 
+	block->in_LRU_list = TRUE;
+
 	if (UT_LIST_GET_LEN(buf_pool->LRU) > BUF_LRU_OLD_MIN_LEN) {
 
 		ut_ad(buf_pool->LRU_old);
@@ -645,9 +789,12 @@ buf_LRU_block_free_non_file_page(
 #endif /* UNIV_SYNC_DEBUG */
 	ut_ad(block);
 	
-	ut_ad((block->state == BUF_BLOCK_MEMORY)
+	ut_a((block->state == BUF_BLOCK_MEMORY)
 	      || (block->state == BUF_BLOCK_READY_FOR_USE));
 
+	ut_a(block->n_pointers == 0);
+	ut_a(!block->in_free_list);
+
 	block->state = BUF_BLOCK_NOT_USED;
 
 #ifdef UNIV_DEBUG	
@@ -655,6 +802,14 @@ buf_LRU_block_free_non_file_page(
 	memset(block->frame, '\0', UNIV_PAGE_SIZE);
 #endif	
 	UT_LIST_ADD_FIRST(free, buf_pool->free, block);
+	block->in_free_list = TRUE;
+
+	if (srv_use_awe && block->frame) {
+		/* Add to the list of mapped pages */
+		
+		UT_LIST_ADD_FIRST(awe_LRU_free_mapped,
+					buf_pool->awe_LRU_free_mapped, block);
+	}
 }
 
 /**********************************************************************
@@ -673,8 +828,7 @@ buf_LRU_block_remove_hashed_page(
 #endif /* UNIV_SYNC_DEBUG */
 	ut_ad(block);
 	
-	ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
-
+	ut_a(block->state == BUF_BLOCK_FILE_PAGE);
 	ut_a(block->io_fix == 0);
 	ut_a(block->buf_fix_count == 0);
 	ut_a(ut_dulint_cmp(block->oldest_modification, ut_dulint_zero) == 0);
@@ -683,7 +837,31 @@ buf_LRU_block_remove_hashed_page(
 
 	buf_pool->freed_page_clock += 1;
 
- 	buf_frame_modify_clock_inc(block->frame);
+	/* Note that if AWE is enabled the block may not have a frame at all */
+	
+ 	buf_block_modify_clock_inc(block);
+		
+        if (block != buf_page_hash_get(block->space, block->offset)) {
+                fprintf(stderr,
+"InnoDB: Error: page %lu %lu not found from the hash table\n",
+			(ulong) block->space,
+			(ulong) block->offset);
+                if (buf_page_hash_get(block->space, block->offset)) {
+                        fprintf(stderr,
+"InnoDB: From hash table we find block %lx of %lu %lu which is not %lx\n",
+                (ulong) buf_page_hash_get(block->space, block->offset),
+                (ulong) buf_page_hash_get(block->space, block->offset)->space,
+                (ulong) buf_page_hash_get(block->space, block->offset)->offset,
+		(ulong) block);
+                }
+
+                buf_print();
+                buf_LRU_print();
+                buf_validate();
+                buf_LRU_validate();
+
+                ut_a(0);
+        }	
 
 	HASH_DELETE(buf_block_t, hash, buf_pool->page_hash,
 			buf_page_address_fold(block->space, block->offset),
@@ -704,7 +882,7 @@ buf_LRU_block_free_hashed_page(
 #ifdef UNIV_SYNC_DEBUG
 	ut_ad(mutex_own(&(buf_pool->mutex)));
 #endif /* UNIV_SYNC_DEBUG */
-	ut_ad(block->state == BUF_BLOCK_REMOVE_HASH);
+	ut_a(block->state == BUF_BLOCK_REMOVE_HASH);
 
 	block->state = BUF_BLOCK_MEMORY;
 
@@ -797,7 +975,7 @@ buf_LRU_print(void)
 	ut_ad(buf_pool);
 	mutex_enter(&(buf_pool->mutex));
 
-	printf("Pool ulint clock %lu\n", buf_pool->ulint_clock);
+	printf("Pool ulint clock %lu\n", (ulong) buf_pool->ulint_clock);
 
 	block = UT_LIST_GET_FIRST(buf_pool->LRU);
 
@@ -805,18 +983,18 @@ buf_LRU_print(void)
 
 	while (block != NULL) {
 
-		printf("BLOCK %lu ", block->offset);
+		printf("BLOCK %lu ", (ulong) block->offset);
 
 		if (block->old) {
 			printf("old ");
 		}
 
 		if (block->buf_fix_count) {
-			printf("buffix count %lu ", block->buf_fix_count);
+			printf("buffix count %lu ", (ulong) block->buf_fix_count);
 		}
 
 		if (block->io_fix) {
-			printf("io_fix %lu ", block->io_fix);
+			printf("io_fix %lu ", (ulong) block->io_fix);
 		}
 
 		if (ut_dulint_cmp(block->oldest_modification,
@@ -824,12 +1002,12 @@ buf_LRU_print(void)
 			printf("modif. ");
 		}
 
-		printf("LRU pos %lu ", block->LRU_position);
+		printf("LRU pos %lu ", (ulong) block->LRU_position);
 		
 		frame = buf_block_get_frame(block);
 
-		printf("type %lu ", fil_page_get_type(frame));
-		printf("index id %lu ", ut_dulint_get_low(
+		printf("type %lu ", (ulong) fil_page_get_type(frame));
+		printf("index id %lu ", (ulong) ut_dulint_get_low(
 					btr_page_get_index_id(frame)));
 
 		block = UT_LIST_GET_NEXT(LRU, block);
diff --git a/innobase/buf/buf0rea.c b/innobase/buf/buf0rea.c
index 475a5bd9cbd..5ba27b8fee8 100644
--- a/innobase/buf/buf0rea.c
+++ b/innobase/buf/buf0rea.c
@@ -49,19 +49,30 @@ ulint
 buf_read_page_low(
 /*==============*/
 			/* out: 1 if a read request was queued, 0 if the page
-			already resided in buf_pool or if the page is in
+			already resided in buf_pool, or if the page is in
 			the doublewrite buffer blocks in which case it is never
-			read into the pool */
+			read into the pool, or if the tablespace does not
+			exist or is being dropped */
+	ulint*	err,	/* out: DB_SUCCESS or DB_TABLESPACE_DELETED if we are
+			trying to read from a non-existent tablespace, or a
+			tablespace which is just now being dropped */
 	ibool	sync,	/* in: TRUE if synchronous aio is desired */
 	ulint	mode,	/* in: BUF_READ_IBUF_PAGES_ONLY, ...,
 			ORed to OS_AIO_SIMULATED_WAKE_LATER (see below
 			at read-ahead functions) */
 	ulint	space,	/* in: space id */
+	ib_longlong tablespace_version, /* in: if the space memory object has
+			this timestamp different from what we are giving here,
+			treat the tablespace as dropped; this is a timestamp we
+			use to stop dangling page reads from a tablespace
+			which we have DISCARDed + IMPORTed back */
 	ulint	offset)	/* in: page number */
 {
 	buf_block_t*	block;
 	ulint		wake_later;
 
+	*err = DB_SUCCESS;
+
 	wake_later = mode & OS_AIO_SIMULATED_WAKE_LATER;
 	mode = mode & ~OS_AIO_SIMULATED_WAKE_LATER;
 	
@@ -72,6 +83,11 @@ buf_read_page_low(
 		    || (offset >= trx_doublewrite->block2
 		        && offset < trx_doublewrite->block2
 		     		+ TRX_SYS_DOUBLEWRITE_BLOCK_SIZE))) {
+		ut_print_timestamp(stderr);
+		fprintf(stderr,
+"  InnoDB: Warning: trying to read doublewrite buffer page %lu\n",
+			(ulong) offset);
+
 		return(0);
 	}
 
@@ -97,27 +113,39 @@ buf_read_page_low(
 		sync = TRUE;
 	}
 
-	block = buf_page_init_for_read(mode, space, offset);
-
-	if (block != NULL) {
-		if (buf_debug_prints) {
-			printf("Posting read request for page %lu, sync %lu\n",
-				offset, sync);
-		}
+	/* The following call will also check if the tablespace does not exist
+	or is being dropped; if we succeed in initing the page in the buffer
+	pool for read, then DISCARD cannot proceed until the read has
+	completed */
 
-		fil_io(OS_FILE_READ | wake_later,
-			sync, space, offset, 0, UNIV_PAGE_SIZE,
-					(void*)block->frame, (void*)block);
-		if (sync) {
-			/* The i/o is already completed when we arrive from
-			fil_read */
-			buf_page_io_complete(block);
-		}
+	block = buf_page_init_for_read(err, mode, space, tablespace_version,
+								offset);
+	if (block == NULL) {
 		
-		return(1);
+		return(0);
 	}
 
-	return(0);
+	if (buf_debug_prints) {
+		printf("Posting read request for page %lu, sync %lu\n",
+							   (ulong) offset,
+		       					   (ulong) sync);
+	}
+
+	ut_a(block->state == BUF_BLOCK_FILE_PAGE);
+
+	*err = fil_io(OS_FILE_READ | wake_later,
+			sync, space,
+			offset, 0, UNIV_PAGE_SIZE,
+			(void*)block->frame, (void*)block);
+	ut_a(*err == DB_SUCCESS);
+
+	if (sync) {
+		/* The i/o is already completed when we arrive from
+		fil_read */
+		buf_page_io_complete(block);
+	}
+		
+	return(1);
 }	
 
 /************************************************************************
@@ -142,12 +170,14 @@ buf_read_ahead_random(
 	ulint	offset)	/* in: page number of a page which the current thread
 			wants to access */
 {
+	ib_longlong	tablespace_version;
 	buf_block_t*	block;
 	ulint		recent_blocks	= 0;
 	ulint		count;
 	ulint		LRU_recent_limit;
 	ulint		ibuf_mode;
 	ulint		low, high;
+	ulint		err;
 	ulint		i;
 
 	if (srv_startup_is_before_trx_rollback_phase) {
@@ -164,11 +194,16 @@ buf_read_ahead_random(
 		return(0);
 	}
 
+	/* Remember the tablespace version before we ask te tablespace size
+	below: if DISCARD + IMPORT changes the actual .ibd file meanwhile, we
+	do not try to read outside the bounds of the tablespace! */
+
+	tablespace_version = fil_space_get_version(space);
+
 	low  = (offset / BUF_READ_AHEAD_RANDOM_AREA)
 					* BUF_READ_AHEAD_RANDOM_AREA;
 	high = (offset / BUF_READ_AHEAD_RANDOM_AREA + 1)
 					* BUF_READ_AHEAD_RANDOM_AREA;
-
 	if (high > fil_space_get_size(space)) {
 
 		high = fil_space_get_size(space);
@@ -193,7 +228,6 @@ buf_read_ahead_random(
 	that is, reside near the start of the LRU list. */
 
 	for (i = low; i < high; i++) {
-
 		block = buf_page_hash_get(space, i);
 
 		if ((block)
@@ -227,10 +261,17 @@ buf_read_ahead_random(
 		mode: hence FALSE as the first parameter */
 
 		if (!ibuf_bitmap_page(i)) {
-			
-			count += buf_read_page_low(FALSE, ibuf_mode
+			count += buf_read_page_low(&err, FALSE, ibuf_mode
 					| OS_AIO_SIMULATED_WAKE_LATER,
-								space, i);
+				        space, tablespace_version, i);
+			if (err == DB_TABLESPACE_DELETED) {
+				ut_print_timestamp(stderr);
+				fprintf(stderr,
+"  InnoDB: Warning: in random readahead trying to access tablespace\n"
+"InnoDB: %lu page no. %lu,\n"
+"InnoDB: but the tablespace does not exist or is just being dropped.\n",
+					(ulong) space, (ulong) i);
+			}
 		}
 	}
 
@@ -243,7 +284,8 @@ buf_read_ahead_random(
 	if (buf_debug_prints && (count > 0)) {
 	
 		printf("Random read-ahead space %lu offset %lu pages %lu\n",
-						space, offset, count);
+						(ulong) space, (ulong) offset,
+		       				(ulong) count);
 	}
 
 	return(count);
@@ -264,15 +306,27 @@ buf_read_page(
 	ulint	space,	/* in: space id */
 	ulint	offset)	/* in: page number */
 {
-	ulint	count;
-	ulint	count2;
+	ib_longlong	tablespace_version;
+	ulint		count;
+	ulint		count2;
+	ulint		err;
+
+	tablespace_version = fil_space_get_version(space);
 
 	count = buf_read_ahead_random(space, offset);
 
 	/* We do the i/o in the synchronous aio mode to save thread
 	switches: hence TRUE */
 
-	count2 = buf_read_page_low(TRUE, BUF_READ_ANY_PAGE, space, offset);
+	count2 = buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space,
+					tablespace_version, offset);
+	if (err == DB_TABLESPACE_DELETED) {
+	        ut_print_timestamp(stderr);
+		fprintf(stderr,
+"  InnoDB: error: trying to access tablespace %lu page no. %lu,\n"
+"InnoDB: but the tablespace does not exist or is just being dropped.\n",
+				 (ulong) space, (ulong) offset);
+	}
 
 	/* Flush pages from the end of the LRU list if necessary */
 	buf_flush_free_margin();
@@ -312,6 +366,7 @@ buf_read_ahead_linear(
 	ulint	offset)	/* in: page number of a page; NOTE: the current thread
 			must want access to this page (see NOTE 3 above) */
 {
+	ib_longlong	tablespace_version;
 	buf_block_t*	block;
 	buf_frame_t*	frame;
 	buf_block_t*	pred_block	= NULL;
@@ -323,6 +378,7 @@ buf_read_ahead_linear(
 	ulint		fail_count;
 	ulint		ibuf_mode;
 	ulint		low, high;
+	ulint		err;
 	ulint		i;
 	
 	if (srv_startup_is_before_trx_rollback_phase) {
@@ -350,14 +406,21 @@ buf_read_ahead_linear(
 		return(0);
 	}
 
+	/* Remember the tablespace version before we ask te tablespace size
+	below: if DISCARD + IMPORT changes the actual .ibd file meanwhile, we
+	do not try to read outside the bounds of the tablespace! */
+
+	tablespace_version = fil_space_get_version(space);
+
+	mutex_enter(&(buf_pool->mutex));
+
 	if (high > fil_space_get_size(space)) {
+		mutex_exit(&(buf_pool->mutex));
 		/* The area is not whole, return */
 
 		return(0);
 	}
 
-	mutex_enter(&(buf_pool->mutex));
-
 	if (buf_pool->n_pend_reads >
 			buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
 		mutex_exit(&(buf_pool->mutex));
@@ -378,18 +441,15 @@ buf_read_ahead_linear(
 	fail_count = 0;
 
 	for (i = low; i < high; i++) {
-
 		block = buf_page_hash_get(space, i);
 		
 		if ((block == NULL) || !block->accessed) {
-
 			/* Not accessed */
 			fail_count++;
 
 		} else if (pred_block && (ut_ulint_cmp(block->LRU_position,
 				      		    pred_block->LRU_position)
 			       		  != asc_or_desc)) {
-
 			/* Accesses not in the right order */
 
 			fail_count++;
@@ -462,7 +522,7 @@ buf_read_ahead_linear(
 		return(0);
 	}
 
-	/* If we got this far, read-ahead can be sensible: do it */	    	
+	/* If we got this far, read-ahead can be sensible: do it */
 
 	if (ibuf_inside()) {
 		ibuf_mode = BUF_READ_IBUF_PAGES_ONLY;
@@ -483,9 +543,17 @@ buf_read_ahead_linear(
 		aio mode: hence FALSE as the first parameter */
 
 		if (!ibuf_bitmap_page(i)) {
-			count += buf_read_page_low(FALSE, ibuf_mode
+			count += buf_read_page_low(&err, FALSE, ibuf_mode
 					| OS_AIO_SIMULATED_WAKE_LATER,
-					space, i);
+					space, 	tablespace_version, i);
+			if (err == DB_TABLESPACE_DELETED) {
+				ut_print_timestamp(stderr);
+				fprintf(stderr,
+"  InnoDB: Warning: in linear readahead trying to access tablespace\n"
+"InnoDB: %lu page no. %lu,\n"
+"InnoDB: but the tablespace does not exist or is just being dropped.\n",
+				 (ulong) space, (ulong) i);
+			}
 		}
 	}
 
@@ -501,7 +569,7 @@ buf_read_ahead_linear(
 	if (buf_debug_prints && (count > 0)) {
 		printf(
 		"LINEAR read-ahead space %lu offset %lu pages %lu\n",
-		space, offset, count);
+		(ulong) space, (ulong) offset, (ulong) count);
 	}
 
 	return(count);
@@ -509,7 +577,7 @@ buf_read_ahead_linear(
 
 /************************************************************************
 Issues read requests for pages which the ibuf module wants to read in, in
-order to contract insert buffer trees. Technically, this function is like
+order to contract the insert buffer tree. Technically, this function is like
 a read-ahead function. */
 
 void
@@ -518,11 +586,17 @@ buf_read_ibuf_merge_pages(
 	ibool	sync,		/* in: TRUE if the caller wants this function
 				to wait for the highest address page to get
 				read in, before this function returns */
-	ulint	space,		/* in: space id */
+	ulint*	space_ids,	/* in: array of space ids */
+	ib_longlong* space_versions,/* in: the spaces must have this version
+				number (timestamp), otherwise we discard the
+				read; we use this to cancel reads if
+				DISCARD + IMPORT may have changed the
+				tablespace size */
 	ulint*	page_nos,	/* in: array of page numbers to read, with the
 				highest page number the last in the array */
 	ulint	n_stored)	/* in: number of page numbers in the array */
 {
+	ulint	err;
 	ulint	i;
 
 	ut_ad(!ibuf_inside());
@@ -536,11 +610,19 @@ buf_read_ibuf_merge_pages(
 
 	for (i = 0; i < n_stored; i++) {
 		if ((i + 1 == n_stored) && sync) {
-			buf_read_page_low(TRUE, BUF_READ_ANY_PAGE, space,
-								page_nos[i]);
+			buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE,
+				space_ids[i], space_versions[i], page_nos[i]);
 		} else {
-			buf_read_page_low(FALSE, BUF_READ_ANY_PAGE, space,
-								page_nos[i]);
+			buf_read_page_low(&err, FALSE, BUF_READ_ANY_PAGE,
+				space_ids[i], space_versions[i], page_nos[i]);
+		}
+
+		if (err == DB_TABLESPACE_DELETED) {
+			/* We have deleted or are deleting the single-table
+			tablespace: remove the entries for that page */
+
+			ibuf_merge_or_delete_for_page(NULL, space_ids[i],
+							page_nos[i], FALSE);
 		}
 	}
 	
@@ -548,8 +630,7 @@ buf_read_ibuf_merge_pages(
 	buf_flush_free_margin();
 
 	if (buf_debug_prints) {
-		printf("Ibuf merge read-ahead space %lu pages %lu\n",
-							space, n_stored);
+		printf("Ibuf merge read-ahead pages %lu\n", (ulong) n_stored);
 	}
 }
 
@@ -567,8 +648,12 @@ buf_read_recv_pages(
 				highest page number the last in the array */
 	ulint	n_stored)	/* in: number of page numbers in the array */
 {
-	ulint	count;
-	ulint	i;
+	ib_longlong	tablespace_version;
+	ulint		count;
+	ulint		err;
+	ulint		i;
+
+	tablespace_version = fil_space_get_version(space);
 
 	for (i = 0; i < n_stored; i++) {
 
@@ -576,7 +661,7 @@ buf_read_recv_pages(
 
 		os_aio_print_debug = FALSE;
 
-		while (buf_pool->n_pend_reads >= RECV_POOL_N_FREE_BLOCKS / 2) {
+		while (buf_pool->n_pend_reads >= recv_n_pool_free_frames / 2) {
 
 			os_aio_simulated_wake_handler_threads();
 			os_thread_sleep(500000);
@@ -587,7 +672,7 @@ buf_read_recv_pages(
 				fprintf(stderr,
 "InnoDB: Error: InnoDB has waited for 50 seconds for pending\n"
 "InnoDB: reads to the buffer pool to be finished.\n"
-"InnoDB: Number of pending reads %lu\n", buf_pool->n_pend_reads);
+"InnoDB: Number of pending reads %lu\n", (ulong) buf_pool->n_pend_reads);
 
 				os_aio_print_debug = TRUE;
 			}
@@ -596,12 +681,12 @@ buf_read_recv_pages(
 		os_aio_print_debug = FALSE;
 
 		if ((i + 1 == n_stored) && sync) {
-			buf_read_page_low(TRUE, BUF_READ_ANY_PAGE, space,
-								page_nos[i]);
+			buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space,
+					tablespace_version, page_nos[i]);
 		} else {
-			buf_read_page_low(FALSE, BUF_READ_ANY_PAGE
+			buf_read_page_low(&err, FALSE, BUF_READ_ANY_PAGE
 					| OS_AIO_SIMULATED_WAKE_LATER,
-					space, page_nos[i]);
+				       space, tablespace_version, page_nos[i]);
 		}
 	}
 	
@@ -611,6 +696,7 @@ buf_read_recv_pages(
 	buf_flush_free_margin();
 
 	if (buf_debug_prints) {
-		printf("Recovery applies read-ahead pages %lu\n", n_stored);
+		printf("Recovery applies read-ahead pages %lu\n",
+		       (ulong) n_stored);
 	}
 }
diff --git a/innobase/configure.in b/innobase/configure.in
index a94ade6dc8e..652291f1f38 100644
--- a/innobase/configure.in
+++ b/innobase/configure.in
@@ -34,9 +34,11 @@ CXXFLAGS="$CXXFLAGS "
 AC_PROG_CC
 AC_PROG_RANLIB
 AC_PROG_INSTALL
+AC_PROG_LIBTOOL
 AC_CHECK_HEADERS(aio.h sched.h)
 AC_CHECK_SIZEOF(int, 4)
 AC_CHECK_SIZEOF(long, 4)
+AC_CHECK_SIZEOF(void*, 4)
 AC_CHECK_FUNCS(sched_yield)
 AC_CHECK_FUNCS(fdatasync)
 #AC_CHECK_FUNCS(localtime_r)	# Already checked by MySQL
diff --git a/innobase/data/data0data.c b/innobase/data/data0data.c
index c3c2b135717..0ed0efeb160 100644
--- a/innobase/data/data0data.c
+++ b/innobase/data/data0data.c
@@ -196,7 +196,8 @@ dfield_check_typed_no_assert(
 
 		fprintf(stderr,
 "InnoDB: Error: data field type %lu, len %lu\n",
-			dfield_get_type(field)->mtype, dfield_get_len(field));
+			(ulong) dfield_get_type(field)->mtype,
+			(ulong) dfield_get_len(field));
 		return(FALSE);
 	}
 
@@ -219,7 +220,7 @@ dtuple_check_typed_no_assert(
 	if (dtuple_get_n_fields(tuple) > REC_MAX_N_FIELDS) {
 		fprintf(stderr,
 "InnoDB: Error: index entry has %lu fields\n",
-			dtuple_get_n_fields(tuple));
+			(ulong) dtuple_get_n_fields(tuple));
 
 		dtuple_sprintf(err_buf, 900, tuple);
 		fprintf(stderr,
@@ -259,7 +260,8 @@ dfield_check_typed(
 
 		fprintf(stderr,
 "InnoDB: Error: data field type %lu, len %lu\n",
-			dfield_get_type(field)->mtype, dfield_get_len(field));
+			(ulong) dfield_get_type(field)->mtype,
+			(ulong) dfield_get_len(field));
 
 		ut_error;
 	}
@@ -433,7 +435,7 @@ dfield_print_also_hex(
 		data = dfield_get_data(dfield);
 		
 		for (i = 0; i < len; i++) {
-			printf("%02lx", (ulint)*data);
+			printf("%02lx", (ulong)*data);
 
 			data++;
 		}
@@ -459,10 +461,10 @@ dtuple_print(
 
 	n_fields = dtuple_get_n_fields(tuple);
 
-	printf("DATA TUPLE: %lu fields;\n", n_fields);
+	printf("DATA TUPLE: %lu fields;\n", (ulong) n_fields);
 
 	for (i = 0; i < n_fields; i++) {
-		printf(" %lu:", i);	
+		printf(" %lu:", (ulong) i);
 
 		field = dtuple_get_nth_field(tuple, i);
 		
@@ -506,7 +508,7 @@ dtuple_sprintf(
 			return(len);
 		}
 
-		len += sprintf(buf + len, " %lu:", i);	
+		len += sprintf(buf + len, " %lu:", (ulong) i);
 
 		field = dtuple_get_nth_field(tuple, i);
 		
@@ -567,7 +569,7 @@ dtuple_convert_big_rec(
 
 	if (size > 1000000000) {
 		fprintf(stderr,
-"InnoDB: Warning: tuple size very big: %lu\n", size);
+"InnoDB: Warning: tuple size very big: %lu\n", (ulong) size);
 		
 		dtuple_sprintf(err_buf, 900, entry);
 		fprintf(stderr,
diff --git a/innobase/data/data0type.c b/innobase/data/data0type.c
index df430f06bcb..71ce5ff3d58 100644
--- a/innobase/data/data0type.c
+++ b/innobase/data/data0type.c
@@ -12,10 +12,99 @@ Created 1/16/1996 Heikki Tuuri
 #include "data0type.ic"
 #endif
 
+/* At the database startup we store the default-charset collation number of
+this MySQL installation to this global variable. If we have < 4.1.2 format
+column definitions, or records in the insert buffer, we use this
+charset-collation code for them. */
+
+ulint	data_mysql_default_charset_coll		= 99999999;
+ulint	data_mysql_latin1_swedish_charset_coll	= 99999999;
+
 dtype_t		dtype_binary_val = {DATA_BINARY, 0, 0, 0};
 dtype_t* 	dtype_binary 	= &dtype_binary_val;
 
 /*************************************************************************
+Checks if a data main type is a string type. Also a BLOB is considered a
+string type. */
+
+ibool
+dtype_is_string_type(
+/*=================*/
+			/* out: TRUE if string type */
+	ulint	mtype)	/* in: InnoDB main data type code: DATA_CHAR, ... */
+{
+ 	if (mtype <= DATA_BLOB
+	    || mtype == DATA_MYSQL
+	    || mtype == DATA_VARMYSQL) {
+
+		return(TRUE);
+	}
+
+	return(FALSE);
+}
+
+/*************************************************************************
+Checks if a type is a binary string type. Note that for tables created with
+< 4.0.14, we do not know if a DATA_BLOB column is a BLOB or a TEXT column. For
+those DATA_BLOB columns this function currently returns FALSE. */
+
+ibool
+dtype_is_binary_string_type(
+/*========================*/
+			/* out: TRUE if binary string type */
+	ulint	mtype,	/* in: main data type */
+	ulint	prtype)	/* in: precise type */
+{
+        if ((mtype == DATA_FIXBINARY)
+	    || (mtype == DATA_BINARY)
+	    || (mtype == DATA_BLOB && (prtype & DATA_BINARY_TYPE))) {
+
+		return(TRUE);
+	}
+
+	return(FALSE);
+}
+
+/*************************************************************************
+Checks if a type is a non-binary string type. That is, dtype_is_string_type is
+TRUE and dtype_is_binary_string_type is FALSE. Note that for tables created
+with < 4.0.14, we do not know if a DATA_BLOB column is a BLOB or a TEXT column.
+For those DATA_BLOB columns this function currently returns TRUE. */
+
+ibool
+dtype_is_non_binary_string_type(
+/*============================*/
+			/* out: TRUE if non-binary string type */
+	ulint	mtype,	/* in: main data type */
+	ulint	prtype)	/* in: precise type */
+{
+	if (dtype_is_string_type(mtype) == TRUE
+	    && dtype_is_binary_string_type(mtype, prtype) == FALSE) {
+		
+		return(TRUE);
+	}
+
+	return(FALSE);
+}
+
+/*************************************************************************
+Forms a precise type from the < 4.1.2 format precise type plus the
+charset-collation code. */
+
+ulint
+dtype_form_prtype(
+/*==============*/
+	ulint	old_prtype,	/* in: the MySQL type code and the flags
+				DATA_BINARY_TYPE etc. */
+	ulint	charset_coll)	/* in: MySQL charset-collation code */
+{
+	ut_a(old_prtype < 256 * 256);
+	ut_a(charset_coll < 256);
+
+	return(old_prtype + (charset_coll << 16));
+}
+
+/*************************************************************************
 Validates a data type structure. */
 
 ibool
@@ -63,7 +152,7 @@ dtype_print(
 	} else if (mtype == DATA_SYS) {
 		printf("DATA_SYS");
 	} else {
-		printf("type %lu", mtype);
+		printf("type %lu", (ulong) mtype);
 	}
 
 	len = type->len;
@@ -86,9 +175,9 @@ dtype_print(
 		} else if (prtype == DATA_ENGLISH) {
 			printf("DATA_ENGLISH");
 		} else {
-			printf("prtype %lu", mtype);
+			printf("prtype %lu", (ulong) mtype);
 		}
 	}
 
-	printf(" len %lu prec %lu", len, type->prec);
+	printf(" len %lu prec %lu", (ulong) len, (ulong) type->prec);
 }
diff --git a/innobase/dict/dict0boot.c b/innobase/dict/dict0boot.c
index 3abb71a842d..46cf6c7788d 100644
--- a/innobase/dict/dict0boot.c
+++ b/innobase/dict/dict0boot.c
@@ -331,8 +331,12 @@ dict_boot(void)
 	dict_mem_table_add_col(table, (char *) "PAGE_NO", DATA_INT, 0, 4, 0);
 
 	/* The '+ 2' below comes from the 2 system fields */
-	ut_ad(DICT_SYS_INDEXES_PAGE_NO_FIELD == 6 + 2);
-	ut_ad(DICT_SYS_INDEXES_SPACE_NO_FIELD == 5 + 2); 
+#if DICT_SYS_INDEXES_PAGE_NO_FIELD != 6 + 2
+#error "DICT_SYS_INDEXES_PAGE_NO_FIELD != 6 + 2"
+#endif
+#if DICT_SYS_INDEXES_SPACE_NO_FIELD != 5 + 2
+#error "DICT_SYS_INDEXES_SPACE_NO_FIELD != 5 + 2"
+#endif
 
 	table->id = DICT_INDEXES_ID;
 	dict_table_add_to_cache(table);
@@ -418,6 +422,4 @@ dict_create(void)
 	dict_boot();
 
 	dict_insert_initial_data();
-
-	sync_order_checks_on = TRUE;
 }
diff --git a/innobase/dict/dict0crea.c b/innobase/dict/dict0crea.c
index 48fcb9c1e79..6a951317d47 100644
--- a/innobase/dict/dict0crea.c
+++ b/innobase/dict/dict0crea.c
@@ -269,6 +269,8 @@ dict_build_table_def_step(
 	dict_table_t*	table;
 	dict_table_t*	cluster_table;
 	dtuple_t*	row;
+	ulint		error;
+	mtr_t		mtr;
 
 #ifdef UNIV_SYNC_DEBUG
 	ut_ad(mutex_own(&(dict_sys->mutex)));
@@ -297,6 +299,32 @@ dict_build_table_def_step(
 		table->mix_id = dict_hdr_get_new_id(DICT_HDR_MIX_ID);
 	}
 
+	if (srv_file_per_table) {
+		/* We create a new single-table tablespace for the table.
+		We initially let it be 4 pages:
+		- page 0 is the fsp header and an extent descriptor page,
+		- page 1 is an ibuf bitmap page,
+		- page 2 is the first inode page,
+		- page 3 will contain the root of the clustered index of the
+		  table we create here. */
+	
+		table->space = 0;	/* reset to zero for the call below */
+
+		error = fil_create_new_single_table_tablespace(
+					&(table->space), table->name,
+					FIL_IBD_FILE_INITIAL_SIZE);
+		if (error != DB_SUCCESS) {
+
+			return(error);
+		}
+
+		mtr_start(&mtr);
+
+		fsp_header_init(table->space, FIL_IBD_FILE_INITIAL_SIZE, &mtr);
+		
+		mtr_commit(&mtr);
+	}
+
 	row = dict_create_sys_tables_tuple(table, node->heap);
 
 	ins_node_set_new_row(node->tab_def, row);
@@ -488,8 +516,8 @@ dict_create_sys_fields_tuple(
 }	
 
 /*********************************************************************
-Creates the tuple with which the index entry is searched for
-writing the index tree root page number, if such a tree is created. */
+Creates the tuple with which the index entry is searched for writing the index
+tree root page number, if such a tree is created. */
 static
 dtuple_t*
 dict_create_search_tuple(
@@ -558,10 +586,10 @@ dict_build_index_def_step(
 	
 	index->id = dict_hdr_get_new_id(DICT_HDR_INDEX_ID);
 
-	if (index->type & DICT_CLUSTERED) {
-		/* Inherit the space from the table */
-		index->space = table->space;
-	}
+	/* Inherit the space id from the table; we store all indexes of a
+	table in the same tablespace */
+
+	index->space = table->space;
 
 	index->page_no = FIL_NULL;
 	
@@ -647,6 +675,9 @@ dict_create_index_tree_step(
 
 	index->page_no = btr_create(index->type, index->space, index->id,
 									&mtr);
+	/* printf("Created a new index tree in space %lu root page %lu\n",
+					index->space, index->page_no); */
+
 	page_rec_write_index_page_no(btr_pcur_get_rec(&pcur),
 					DICT_SYS_INDEXES_PAGE_NO_FIELD,
 					index->page_no, &mtr);
@@ -697,7 +728,14 @@ dict_drop_index_tree(
 	ut_ad(len == 4);
 
 	space = mtr_read_ulint(ptr, MLOG_4BYTES, mtr);
-	
+
+	if (!fil_tablespace_exists_in_mem(space)) {
+		/* It is a single table tablespace and the .ibd file is
+		missing: do nothing */
+
+		return;
+	}
+
 	/* We free all the pages but the root page first; this operation
 	may span several mini-transactions */
 
@@ -707,6 +745,8 @@ dict_drop_index_tree(
 	we write FIL_NULL to the appropriate field in the SYS_INDEXES
 	record: this mini-transaction marks the B-tree totally freed */
 	
+	/* printf("Dropping index tree in space %lu root page %lu\n", space,
+							 root_page_no); */
 	btr_free_root(space, root_page_no, mtr);
 
 	page_rec_write_index_page_no(rec, DICT_SYS_INDEXES_PAGE_NO_FIELD,
@@ -1107,7 +1147,8 @@ dict_create_or_check_foreign_constraint_tables(void)
 	error = trx->error_state;
 
 	if (error != DB_SUCCESS) {
-		fprintf(stderr, "InnoDB: error %lu in creation\n", error);
+		fprintf(stderr, "InnoDB: error %lu in creation\n",
+			(ulong) error);
 		
 		ut_a(error == DB_OUT_OF_FILE_SPACE);
 
@@ -1206,7 +1247,7 @@ loop:
 		/* Generate a new constraint id */
 		foreign->id = mem_heap_alloc(foreign->heap, namelen + 20);
 		/* no overflow if number < 1e13 */
-		sprintf(foreign->id, "%s_ibfk_%lu", table->name, number);
+		sprintf(foreign->id, "%s_ibfk_%lu", table->name, (ulong) number);
 		number++;
 	}
 
@@ -1219,8 +1260,8 @@ loop:
 					foreign->id,
 					table->name,
 					foreign->referenced_table_name,
-					foreign->n_fields
-					+ (foreign->type << 24));
+					(ulong) (foreign->n_fields
+					+ (foreign->type << 24)));
 
 	for (i = 0; i < foreign->n_fields; i++) {
 		ut_a(len < (sizeof buf)
@@ -1230,7 +1271,7 @@ loop:
 		len += sprintf(buf + len,
 	"INSERT INTO SYS_FOREIGN_COLS VALUES('%s', %lu, '%s', '%s');\n",
 					foreign->id,
-					i,
+					(ulong) i,
 					foreign->foreign_col_names[i],
 					foreign->referenced_col_names[i]);
 	}
@@ -1274,7 +1315,7 @@ loop:
 	if (error != DB_SUCCESS) {
 	        fprintf(stderr,
 			"InnoDB: Foreign key constraint creation failed:\n"
-			"InnoDB: internal error number %lu\n", error);
+			"InnoDB: internal error number %lu\n", (ulong) error);
 
 		mutex_enter(&dict_foreign_err_mutex);
 		ut_sprintf_timestamp(ebuf);
diff --git a/innobase/dict/dict0dict.c b/innobase/dict/dict0dict.c
index dc19997de72..bf60567ceaa 100644
--- a/innobase/dict/dict0dict.c
+++ b/innobase/dict/dict0dict.c
@@ -139,7 +139,8 @@ dict_tree_find_index_low(
 /*=====================*/
 				/* out: index */
 	dict_tree_t*	tree,	/* in: index tree */
-	rec_t*		rec);	/* in: record for which to find correct index */
+	rec_t*		rec);	/* in: record for which to find correct
+				index */
 /**************************************************************************
 Removes a foreign constraint struct from the dictionet cache. */
 static
@@ -742,7 +743,7 @@ dict_table_get_and_increment_handle_count(
 	mutex_exit(&(dict_sys->mutex));
 
 	if (table != NULL) {
-	        if (!table->stat_initialized) {
+	        if (!table->stat_initialized && !table->ibd_file_missing) {
 			dict_update_statistics(table);
 		}
 	}
@@ -897,6 +898,7 @@ dict_table_rename_in_cache(
 	ulint		old_size;
 	char*		name_buf;
 	char*		old_name;
+	ibool		success;
 	ulint		i;
 	
 	ut_ad(table);
@@ -914,6 +916,21 @@ dict_table_rename_in_cache(
 		HASH_SEARCH(name_hash, dict_sys->table_hash, fold, table2,
 				(ut_strcmp(table2->name, new_name) == 0));
 		if (table2) {
+			fprintf(stderr,
+"InnoDB: Error: dictionary cache already contains a table of name %s\n",
+	 							     new_name);
+			return(FALSE);
+		}
+	}
+
+	/* If the table is stored in a single-table tablespace, rename the
+	.ibd file */
+
+	if (table->space != 0) {
+		success = fil_rename_tablespace(table->name, table->space,
+								new_name);
+		if (!success) {
+
 			return(FALSE);
 		}
 	}
@@ -942,7 +959,6 @@ dict_table_rename_in_cache(
 	/* Add table to hash table of tables */
 	HASH_INSERT(dict_table_t, name_hash, dict_sys->table_hash, fold,
 								   table);
-	
 	dict_sys->size += (mem_heap_get_size(table->heap) - old_size);
 
 	/* Update the table_name field in indexes */
@@ -1088,6 +1104,31 @@ dict_table_rename_in_cache(
 }
 
 /**************************************************************************
+Change the id of a table object in the dictionary cache. This is used in
+DISCARD TABLESPACE. */
+
+void
+dict_table_change_id_in_cache(
+/*==========================*/
+	dict_table_t*	table,	/* in: table object already in cache */
+	dulint		new_id)	/* in: new id to set */
+{
+	ut_ad(table);
+	ut_ad(mutex_own(&(dict_sys->mutex)));
+	ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
+
+	/* Remove the table from the hash table of id's */
+
+	HASH_DELETE(dict_table_t, id_hash, dict_sys->table_id_hash,
+					ut_fold_dulint(table->id), table);
+	table->id = new_id;
+
+	/* Add the table back to the hash table */
+	HASH_INSERT(dict_table_t, id_hash, dict_sys->table_id_hash,
+					ut_fold_dulint(table->id), table);
+}
+
+/**************************************************************************
 Removes a table object from the dictionary cache. */
 
 void
@@ -2735,7 +2776,7 @@ dict_create_foreign_constraints_low(
 		sprintf(buf + strlen(buf),
 " Error in foreign key constraint of table %.500s.\n"
 "Cannot find the table from the internal data dictionary of InnoDB.\n"
-"Create table statement:\n%.2000\n", name, sql_string);
+"Create table statement:\n%.2000s\n", name, sql_string);
 		ut_a(strlen(buf) < DICT_FOREIGN_ERR_BUF_LEN);
 		mutex_exit(&dict_foreign_err_mutex);
 
@@ -3594,8 +3635,8 @@ dict_tree_free(
 /*===========*/
 	dict_tree_t*	tree)	/* in, own: index tree */
 {
-	ut_ad(tree);
-	ut_ad(tree->magic_n == DICT_TREE_MAGIC_N);
+	ut_a(tree);
+	ut_a(tree->magic_n == DICT_TREE_MAGIC_N);
 
 	rw_lock_free(&(tree->lock));
 	mem_free(tree);
@@ -3609,7 +3650,8 @@ dict_tree_find_index_low(
 /*=====================*/
 				/* out: index */
 	dict_tree_t*	tree,	/* in: index tree */
-	rec_t*		rec)	/* in: record for which to find correct index */
+	rec_t*		rec)	/* in: record for which to find correct
+				index */
 {
 	dict_index_t*	index;
 	dict_table_t*	table;
@@ -3647,7 +3689,8 @@ dict_tree_find_index(
 /*=================*/
 				/* out: index */
 	dict_tree_t*	tree,	/* in: index tree */
-	rec_t*		rec)	/* in: record for which to find correct index */
+	rec_t*		rec)	/* in: record for which to find correct
+				index */
 {
 	dict_index_t*	index;
 	
@@ -3737,7 +3780,8 @@ dict_tree_build_node_ptr(
 /*=====================*/
 				/* out, own: node pointer */
 	dict_tree_t*	tree,	/* in: index tree */
-	rec_t*		rec,	/* in: record for which to build node pointer */
+	rec_t*		rec,	/* in: record for which to build node
+				pointer */
 	ulint		page_no,/* in: page number to put in node pointer */
 	mem_heap_t*	heap,	/* in: memory heap where pointer created */
 	ulint           level)  /* in: level of rec in tree: 0 means leaf
@@ -3899,6 +3943,16 @@ dict_update_statistics_low(
 	ulint		size;
 	ulint		sum_of_index_sizes	= 0;
 
+	if (table->ibd_file_missing) {
+		ut_print_timestamp(stderr);
+		fprintf(stderr,
+"  InnoDB: cannot calculate statistics for table %s\n"
+"InnoDB: because the .ibd file is missing. See section 15.1 of\n"
+"InnoDB: http:/www.innodb.com/ibman.html for help\n", table->name);
+
+		return;
+	}
+
 	/* If we have set a high innodb_force_recovery level, do not calculate
 	statistics, as a badly corrupted index can cause a crash in it. */
 
@@ -4053,10 +4107,11 @@ dict_table_print_low(
 	printf(
    "TABLE: name %s, id %lu %lu, columns %lu, indexes %lu, appr.rows %lu\n",
 			table->name,
-			ut_dulint_get_high(table->id),
-			ut_dulint_get_low(table->id),
-			table->n_cols, UT_LIST_GET_LEN(table->indexes),
-			(ulint)table->stat_n_rows);
+			(ulong) ut_dulint_get_high(table->id),
+			(ulong) ut_dulint_get_low(table->id),
+			(ulong) table->n_cols,
+		        (ulong) UT_LIST_GET_LEN(table->indexes),
+			(ulong) table->stat_n_rows);
 	printf("  COLUMNS: ");
 
 	for (i = 0; i < table->n_cols - 1; i++) {
@@ -4136,16 +4191,16 @@ dict_index_print_low(
 	printf(
 	"  INDEX: name %s, table name %s, id %lu %lu, fields %lu/%lu, type %lu\n",
 			index->name, index->table_name,
-			ut_dulint_get_high(tree->id),
-			ut_dulint_get_low(tree->id),
-			index->n_user_defined_cols,
-			index->n_fields, index->type);
+			(ulong) ut_dulint_get_high(tree->id),
+			(ulong) ut_dulint_get_low(tree->id),
+			(ulong) index->n_user_defined_cols,
+			(ulong) index->n_fields, (ulong) index->type);
 	printf(
       "   root page %lu, appr.key vals %lu, leaf pages %lu, size pages %lu\n",
-		tree->page,
-		(ulint)n_vals,
-		index->stat_n_leaf_pages,
-		index->stat_index_size);
+		(ulong) tree->page,
+		(ulong) n_vals,
+		(ulong) index->stat_n_leaf_pages,
+		(ulong) index->stat_index_size);
 			
 	printf("   FIELDS: ");
 
@@ -4175,7 +4230,7 @@ dict_field_print_low(
 	printf(" %s", field->name);
 
 	if (field->prefix_len != 0) {
-	        printf("(%lu)", field->prefix_len);
+	        printf("(%lu)", (ulong) field->prefix_len);
 	}
 }
 
diff --git a/innobase/dict/dict0load.c b/innobase/dict/dict0load.c
index 5a5830a2517..c6a8ebc4b55 100644
--- a/innobase/dict/dict0load.c
+++ b/innobase/dict/dict0load.c
@@ -19,6 +19,7 @@ Created 4/24/1996 Heikki Tuuri
 #include "mach0data.h"
 #include "dict0dict.h"
 #include "dict0boot.h"
+#include "srv0start.h"
 
 /************************************************************************
 Finds the first table name in the given database. */
@@ -122,8 +123,8 @@ dict_print(void)
 	rec_t*		rec;
 	byte*		field;
 	ulint		len;
-	char		table_name[10000];
 	mtr_t		mtr;
+	char		table_name[10000];
 	
 	mutex_enter(&(dict_sys->mutex));
 
@@ -188,6 +189,100 @@ loop:
 }
 
 /************************************************************************
+In a crash recovery we already have all the tablespace objects created.
+This function compares the space id information in the InnoDB data dictionary
+to what we already read with fil_load_single_table_tablespaces().
+In a normal startup we just scan the biggest space id, and store it to
+fil_system. */
+
+void
+dict_check_tablespaces_or_store_max_id(
+/*===================================*/
+	ibool	in_crash_recovery)	/* in: are we doing a crash recovery */
+{
+	dict_table_t*	sys_tables;
+	dict_index_t*	sys_index;
+	btr_pcur_t	pcur;
+	rec_t*		rec;
+	byte*		field;
+	ulint		len;
+	ulint		space_id;
+	ulint		max_space_id	= 0;
+	mtr_t		mtr;
+	char		name[OS_FILE_MAX_PATH];
+	
+	mutex_enter(&(dict_sys->mutex));
+
+	mtr_start(&mtr);
+
+	sys_tables = dict_table_get_low((char *) "SYS_TABLES");
+	sys_index = UT_LIST_GET_FIRST(sys_tables->indexes);
+
+	btr_pcur_open_at_index_side(TRUE, sys_index, BTR_SEARCH_LEAF, &pcur,
+								TRUE, &mtr);
+loop:
+	btr_pcur_move_to_next_user_rec(&pcur, &mtr);
+
+	rec = btr_pcur_get_rec(&pcur);
+
+	if (!btr_pcur_is_on_user_rec(&pcur, &mtr)) {
+		/* end of index */
+
+		btr_pcur_close(&pcur);
+		mtr_commit(&mtr);
+		
+		/* We must make the tablespace cache aware of the biggest
+		known space id */
+
+		/* printf("Biggest space id in data dictionary %lu\n",
+							    max_space_id); */
+		fil_set_max_space_id_if_bigger(max_space_id);
+
+		mutex_exit(&(dict_sys->mutex));
+
+		return;
+	}	
+
+	field = rec_get_nth_field(rec, 0, &len);
+
+	if (!rec_get_deleted_flag(rec)) {
+
+		/* We found one */
+
+		ut_a(len < OS_FILE_MAX_PATH - 10);
+		ut_memcpy(name, field, len);
+		name[len] = '\0';
+
+		field = rec_get_nth_field(rec, 9, &len);
+		ut_a(len == 4);
+			
+		space_id = mach_read_from_4(field);
+
+		btr_pcur_store_position(&pcur, &mtr);
+
+		mtr_commit(&mtr);
+		
+		if (space_id != 0 && in_crash_recovery) {
+			/* Check that the tablespace (the .ibd file) really
+			exists; print a warning to the .err log if not */
+			
+			fil_space_for_table_exists_in_mem(space_id, name,
+								TRUE, TRUE);
+		}
+		
+		if (space_id > max_space_id) {
+			max_space_id = space_id;
+		}
+
+		mtr_start(&mtr);
+
+		btr_pcur_restore_position(BTR_SEARCH_LEAF, &pcur, &mtr);
+	}
+
+	goto loop;
+}
+
+/************************************************************************
 Loads definitions for table columns. */
 static
 void
@@ -269,6 +364,15 @@ dict_load_columns(
 		field = rec_get_nth_field(rec, 6, &len);
 		prtype = mach_read_from_4(field);
 
+		if (dtype_is_non_binary_string_type(mtype, prtype)
+		    && dtype_get_charset_coll(prtype) == 0) {
+			/* This is a non-binary string type, and the table
+			was created with < 4.1.2. Use the default charset. */
+
+			prtype = dtype_form_prtype(prtype,
+					data_mysql_default_charset_coll);
+		}
+
 		field = rec_get_nth_field(rec, 7, &len);
 		col_len = mach_read_from_4(field);
 
@@ -365,13 +469,13 @@ dict_load_fields(
 
 		pos_and_prefix_len = mach_read_from_4(field);
 
-		ut_a((pos_and_prefix_len & 0xFFFF) == i
-		     || (pos_and_prefix_len & 0xFFFF0000) == (i << 16));
+		ut_a((pos_and_prefix_len & 0xFFFFUL) == i
+		     || (pos_and_prefix_len & 0xFFFF0000UL) == (i << 16));
 
 		if ((i == 0 && pos_and_prefix_len > 0)
-		    || (pos_and_prefix_len & 0xFFFF0000) > 0) {
+		    || (pos_and_prefix_len & 0xFFFF0000UL) > 0) {
 
-		        prefix_len = pos_and_prefix_len & 0xFFFF;
+		        prefix_len = pos_and_prefix_len & 0xFFFFUL;
 		} else {
 		        prefix_len = 0;
 		}
@@ -486,7 +590,7 @@ dict_load_indexes(
 		ut_ad(len == 8);
 		id = mach_read_from_8(field);
 
-		ut_a(0 == ut_strcmp((char*)"NAME",
+		ut_a(0 == ut_strcmp((char*) "NAME",
 			dict_field_get_col(
 			dict_index_get_nth_field(
 			dict_table_get_first_index(sys_indexes), 4))->name));
@@ -545,11 +649,11 @@ dict_load_indexes(
 		    && ((type & DICT_CLUSTERED)
 		        || ((table == dict_sys->sys_tables)
 		            && (name_len == ut_strlen("ID_IND"))
-			    && (0 == ut_memcmp(name_buf, (char*)"ID_IND",
+			    && (0 == ut_memcmp(name_buf, (char*) "ID_IND",
 							name_len))))) {
 
-			/* The index was created in memory already in
-			booting */
+			/* The index was created in memory already at booting
+			of the database server */
 		} else {
  			index = dict_mem_index_create(table->name, name_buf,
 						space, type, n_fields);
@@ -580,9 +684,14 @@ dictionary cache. */
 dict_table_t*
 dict_load_table(
 /*============*/
-			/* out: table, NULL if does not exist */
-	char*	name)	/* in: table name */
+			/* out: table, NULL if does not exist; if the table is
+			stored in an .ibd file, but the file does not exist,
+			then we set the ibd_file_missing flag TRUE in the table
+			object we return */
+	char*	name)	/* in: table name in the databasename/tablename
+			format */
 {
+	ibool		ibd_file_missing	= FALSE;
 	dict_table_t*	table;
 	dict_table_t*	sys_tables;
 	btr_pcur_t	pcur;
@@ -651,6 +760,23 @@ dict_load_table(
 	field = rec_get_nth_field(rec, 9, &len);
 	space = mach_read_from_4(field);
 
+	/* Check if the tablespace exists and has the right name */
+	if (space != 0) {
+		if (fil_space_for_table_exists_in_mem(space, name, FALSE,
+								   FALSE)) {
+			/* Ok; (if we did a crash recovery then the tablespace
+			can already be in the memory cache) */
+		} else {
+			/* Try to open the tablespace */
+			if (!fil_open_single_table_tablespace(space, name)) {
+				/* We failed to find a sensible tablespace
+				file */
+
+				ibd_file_missing = TRUE;
+			}
+		}
+	}
+
 	ut_a(0 == ut_strcmp((char *) "N_COLS",
 		dict_field_get_col(
 		dict_index_get_nth_field(
@@ -661,6 +787,8 @@ dict_load_table(
 
 	table = dict_mem_table_create(name, space, n_cols);
 
+	table->ibd_file_missing = ibd_file_missing;
+
 	ut_a(0 == ut_strcmp((char *) "ID",
 		dict_field_get_col(
 		dict_index_get_nth_field(
@@ -1021,7 +1149,7 @@ dict_load_foreign(
 	/* We store the type to the bits 24-31 of n_fields */
 	
 	foreign->type = foreign->n_fields >> 24;
-	foreign->n_fields = foreign->n_fields & 0xFFFFFF;
+	foreign->n_fields = foreign->n_fields & 0xFFFFFFUL;
 	
 	foreign->id = mem_heap_alloc(foreign->heap, ut_strlen(id) + 1);
 				
diff --git a/innobase/dict/dict0mem.c b/innobase/dict/dict0mem.c
index c9eb7a9d8bd..c49738a0960 100644
--- a/innobase/dict/dict0mem.c
+++ b/innobase/dict/dict0mem.c
@@ -56,6 +56,8 @@ dict_mem_table_create(
 	table->type = DICT_TABLE_ORDINARY;
 	table->name = str;
 	table->space = space;
+	table->ibd_file_missing = FALSE;
+	table->tablespace_discarded = FALSE;
 	table->n_def = 0;
 	table->n_cols = n_cols + DATA_N_SYS_COLS;
 	table->mem_fix = 0;
diff --git a/innobase/fil/fil0fil.c b/innobase/fil/fil0fil.c
index 9f33013d2f9..768dda4eedc 100644
--- a/innobase/fil/fil0fil.c
+++ b/innobase/fil/fil0fil.c
@@ -1,5 +1,5 @@
 /******************************************************
-The low-level file system
+The tablespace memory cache
 
 (c) 1995 Innobase Oy
 
@@ -16,16 +16,22 @@ Created 10/25/1995 Heikki Tuuri
 #include "mach0data.h"
 #include "ibuf0ibuf.h"
 #include "buf0buf.h"
+#include "buf0flu.h"
+#include "buf0lru.h"
 #include "log0log.h"
 #include "log0recv.h"
 #include "fsp0fsp.h"
 #include "srv0srv.h"
+#include "srv0start.h"
+#include "mtr0mtr.h"
+#include "mtr0log.h"
 
+	 
 /*
-		IMPLEMENTATION OF THE LOW-LEVEL FILE SYSTEM
-		===========================================
+		IMPLEMENTATION OF THE TABLESPACE MEMORY CACHE
+		=============================================
 
-The file system is responsible for providing fast read/write access to
+The tablespace cache is responsible for providing fast read/write access to
 tablespaces and logs of the database. File creation and deletion is done
 in other modules which know more of the logic of the operation, however.
 
@@ -77,26 +83,42 @@ out of the LRU-list and keep a count of pending operations. When an operation
 completes, we decrement the count and return the file node to the LRU-list if
 the count drops to zero. */
 
+/* When mysqld is run, the default directory "." is the mysqld datadir,
+but in the MySQL Embedded Server Library and ibbackup it is not the default
+directory, and we must set the base file path explicitly */
+char*	fil_path_to_mysql_datadir	= (char*)".";
+
 ulint	fil_n_pending_log_flushes		= 0;
 ulint	fil_n_pending_tablespace_flushes	= 0;
 
 /* Null file address */
 fil_addr_t	fil_addr_null = {FIL_NULL, 0};
 
-/* File system file node data structure */
+/* File node of a tablespace or the log data space */
 typedef	struct fil_node_struct	fil_node_t;
 struct fil_node_struct {
-	char*		name;	/* the file name or path */
+	fil_space_t*	space;	/* backpointer to the space where this node
+				belongs */
+	char*		name;	/* path to the file */
 	ibool		open;	/* TRUE if file open */
 	os_file_t	handle;	/* OS handle to the file, if file open */
-	ulint		size;	/* size of the file in database pages
-				(where the possible last incomplete megabyte
-				is ignored) */
+	ibool		is_raw_disk;/* TRUE if the 'file' is actually a raw
+				device or a raw disk partition */
+	ulint		size;	/* size of the file in database pages, 0 if
+				not known yet; the possible last incomplete
+				megabyte is ignored if space == 0 */
 	ulint		n_pending;
-				/* count of pending i/o-ops on this file */
-	ibool		is_modified; /* this is set to TRUE when we write
-				to the file and FALSE when we call fil_flush
-				for this file space */
+				/* count of pending i/o's on this file;
+				closing of the file is not allowed if
+				this is > 0 */
+	ulint		n_pending_flushes;
+				/* count of pending flushes on this file;
+				closing of the file is not allowed if
+				this is > 0 */	
+	ib_longlong	modification_counter;/* when we write to the file we
+				increment this by one */
+	ib_longlong	flush_counter;/* up to what modification_counter value
+				we have flushed the modifications to disk */
 	UT_LIST_NODE_T(fil_node_t) chain;
 				/* link field for the file chain */
 	UT_LIST_NODE_T(fil_node_t) LRU;
@@ -106,19 +128,52 @@ struct fil_node_struct {
 
 #define	FIL_NODE_MAGIC_N	89389
 
-/* File system tablespace or log data structure: let us call them by a common
-name space */
+/* Tablespace or log data space: let us call them by a common name space */
 struct fil_space_struct {
-	char*		name;	/* space name */
+	char*		name;	/* space name = the path to the first file in
+				it */
 	ulint		id;	/* space id */
+	ib_longlong	tablespace_version;
+				/* in DISCARD/IMPORT this timestamp is used to
+				check if we should ignore an insert buffer
+				merge request for a page because it actually
+				was for the previous incarnation of the
+				space */
+	ibool		mark;	/* this is set to TRUE at database startup if
+				the space corresponds to a table in the InnoDB
+				data dictionary; so we can print a warning of
+				orphaned tablespaces */
+	ibool		stop_ios;/* TRUE if we want to rename the .ibd file of
+				tablespace and want to stop temporarily
+				posting of new i/o requests on the file */
+	ibool		stop_ibuf_merges;
+				/* we set this TRUE when we start deleting a
+				single-table tablespace */
+	ibool		is_being_deleted;
+				/* this is set to TRUE when we start
+				deleting a single-table tablespace and its
+				file; when this flag is set no further i/o
+				or flush requests can be placed on this space,
+				though there may be such requests still being
+				processed on this space */
 	ulint		purpose;/* FIL_TABLESPACE, FIL_LOG, or FIL_ARCH_LOG */
 	UT_LIST_BASE_NODE_T(fil_node_t) chain;
 				/* base node for the file chain */
-	ulint		size;	/* space size in pages */
+	ulint		size;	/* space size in pages; 0 if a single-table
+				tablespace whose size we do not know yet */
 	ulint		n_reserved_extents;
 				/* number of reserved free extents for
 				ongoing operations like B-tree page split */
+	ulint		n_pending_flushes; /* this is > 0 when flushing
+				the tablespace to disk; dropping of the
+				tablespace is forbidden if this is > 0 */
+	ulint		n_pending_ibuf_merges;/* this is > 0 when merging
+				insert buffer entries to a page so that we
+				may need to access the ibuf bitmap page in the
+				tablespade: dropping of the tablespace is
+				forbidden if this is > 0 */
 	hash_node_t	hash; 	/* hash chain node */
+	hash_node_t	name_hash;/* hash chain the name_hash table */
 	rw_lock_t	latch;	/* latch protecting the file space storage
 				allocation */
 	UT_LIST_NODE_T(fil_space_t) space_list;
@@ -130,80 +185,126 @@ struct fil_space_struct {
 
 #define	FIL_SPACE_MAGIC_N	89472
 
-/* The file system data structure */
+/* The tablespace memory cache; also the totality of logs = the log data space,
+is stored here; below we talk about tablespaces, but also the ib_logfiles
+form a 'space' and it is handled here */
 
 typedef	struct fil_system_struct	fil_system_t;
 struct fil_system_struct {
-	mutex_t		mutex;		/* The mutex protecting the system */
+	mutex_t		mutex;		/* The mutex protecting the cache */
 	hash_table_t*	spaces;		/* The hash table of spaces in the
-					system */	
+					system; they are hashed on the space
+					id */
+	hash_table_t*	name_hash;	/* hash table based on the space
+					name */
 	UT_LIST_BASE_NODE_T(fil_node_t) LRU;
 					/* base node for the LRU list of the
-					most recently used open files */
-	ulint		n_open_pending;	/* current number of open files with
-					pending i/o-ops on them */
-	ulint		max_n_open;	/* maximum allowed open files */
-	os_event_t	can_open;	/* this event is set to the signaled
-					state when the system is capable of
-					opening a new file, i.e.,
-					n_open_pending < max_n_open */
+					most recently used open files with no
+					pending i/o's; if we start an i/o on
+					the file, we first remove it from this
+					list, and return it to the start of
+					the list when the i/o ends;
+					log files and the system tablespace are
+					not put to this list: they are opened
+					after the startup, and kept open until
+					shutdown */
+	ulint		n_open;		/* number of files currently open */
+	ulint		max_n_open;	/* n_open is not allowed to exceed
+					this */
+	ib_longlong	modification_counter;/* when we write to a file we
+					increment this by one */
+	ulint		max_assigned_id;/* maximum space id in the existing
+					tables, or assigned during the time
+					mysqld has been up; at an InnoDB
+					startup we scan the data dictionary
+					and set here the maximum of the
+					space id's of the tables there */
+	ib_longlong	tablespace_version;
+					/* a counter which is incremented for
+					every space object memory creation;
+					every space mem object gets a
+					'timestamp' from this; in DISCARD/
+					IMPORT this is used to check if we
+					should ignore an insert buffer merge
+					request */
 	UT_LIST_BASE_NODE_T(fil_space_t) space_list;
 					/* list of all file spaces */
 };
 
-/* The file system. This variable is NULL before the module is initialized. */
+/* The tablespace memory cache. This variable is NULL before the module is
+initialized. */
 fil_system_t*	fil_system	= NULL;
 
-/* The file system hash table size */
-#define	FIL_SYSTEM_HASH_SIZE	500
+/* The tablespace memory cache hash table size */
+#define	FIL_SYSTEM_HASH_SIZE	50 /* TODO: make bigger! */
 
 
-/***********************************************************************
-Reserves a right to open a single file. The right must be released with
-fil_release_right_to_open. */
+/************************************************************************
+NOTE: you must call fil_mutex_enter_and_prepare_for_io() first!
 
+Prepares a file node for i/o. Opens the file if it is closed. Updates the
+pending i/o's field in the node and the system appropriately. Takes the node
+off the LRU list if it is in the LRU list. The caller must hold the fil_sys
+mutex. */
+static
 void
-fil_reserve_right_to_open(void)
-/*===========================*/
-{
-loop:
-	mutex_enter(&(fil_system->mutex));
-	
-	if (fil_system->n_open_pending == fil_system->max_n_open) {
-
-		/* It is not sure we can open the file if it is closed: wait */
-
-		os_event_reset(fil_system->can_open);
-
-		mutex_exit(&(fil_system->mutex));
+fil_node_prepare_for_io(
+/*====================*/
+	fil_node_t*	node,	/* in: file node */
+	fil_system_t*	system,	/* in: tablespace memory cache */
+	fil_space_t*	space);	/* in: space */
+/************************************************************************
+Updates the data structures when an i/o operation finishes. Updates the
+pending i/o's field in the node appropriately. */
+static
+void
+fil_node_complete_io(
+/*=================*/
+	fil_node_t*	node,	/* in: file node */
+	fil_system_t*	system,	/* in: tablespace memory cache */
+	ulint		type);	/* in: OS_FILE_WRITE or OS_FILE_READ; marks
+				the node as modified if
+				type == OS_FILE_WRITE */
+/***********************************************************************
+Checks if a single-table tablespace for a given table name exists in the
+tablespace memory cache. */
+static
+ulint
+fil_get_space_id_for_table(
+/*=======================*/
+				/* out: space id, ULINT_UNDEFINED if not
+				found */
+	char*	name);		/* in: table name in the standard
+				'databasename/tablename' format */
 
-		os_event_wait(fil_system->can_open);
 
-		goto loop;
-	}
+/***********************************************************************
+Returns the version number of a tablespace, -1 if not found. */
 
-	fil_system->max_n_open--;
+ib_longlong
+fil_space_get_version(
+/*==================*/
+			/* out: version number, -1 if the tablespace does not
+			exist in the memory cache */
+	ulint	id)	/* in: space id */
+{
+	fil_system_t*	system		= fil_system;
+	fil_space_t*	space;
+	ib_longlong	version		= -1;
 
-	mutex_exit(&(fil_system->mutex));
-}
+	ut_ad(system);
 
-/***********************************************************************
-Releases a right to open a single file. */
+	mutex_enter(&(system->mutex));
 
-void
-fil_release_right_to_open(void)
-/*===========================*/
-{
-	mutex_enter(&(fil_system->mutex));
-	
-	if (fil_system->n_open_pending == fil_system->max_n_open) {
+	HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
 
-		os_event_set(fil_system->can_open);
+	if (space) {
+		version = space->tablespace_version;
 	}
 
-	fil_system->max_n_open++;
+	mutex_exit(&(system->mutex));
 
-	mutex_exit(&(fil_system->mutex));
+	return(version);
 }
 
 /***********************************************************************
@@ -215,8 +316,8 @@ fil_space_get_latch(
 			/* out: latch protecting storage allocation */
 	ulint	id)	/* in: space id */
 {
-	fil_space_t*	space;
 	fil_system_t*	system		= fil_system;
+	fil_space_t*	space;
 
 	ut_ad(system);
 
@@ -224,6 +325,8 @@ fil_space_get_latch(
 
 	HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
 
+	ut_a(space);
+
 	mutex_exit(&(system->mutex));
 
 	return(&(space->latch));
@@ -238,8 +341,8 @@ fil_space_get_type(
 			/* out: FIL_TABLESPACE or FIL_LOG */
 	ulint	id)	/* in: space id */
 {
-	fil_space_t*	space;
 	fil_system_t*	system		= fil_system;
+	fil_space_t*	space;
 
 	ut_ad(system);
 
@@ -247,6 +350,8 @@ fil_space_get_type(
 
 	HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
 
+	ut_a(space);
+
 	mutex_exit(&(system->mutex));
 
 	return(space->purpose);
@@ -261,17 +366,21 @@ fil_space_get_ibuf_data(
 			/* out: ibuf data for this space */
 	ulint	id)	/* in: space id */
 {
+	fil_system_t*	system		= fil_system;
 	fil_space_t*	space;
-	fil_system_t*	system	= fil_system;
 
 	ut_ad(system);
 
+	ut_a(id == 0);
+
 	mutex_enter(&(system->mutex));
 
 	HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
 
 	mutex_exit(&(system->mutex));
 
+	ut_a(space);
+
 	return(space->ibuf_data);
 }
 
@@ -284,16 +393,16 @@ fil_node_create(
 	char*	name,	/* in: file name (file must be closed) */
 	ulint	size,	/* in: file size in database blocks, rounded downwards
 			to an integer */
-	ulint	id)	/* in: space id where to append */
+	ulint	id,	/* in: space id where to append */
+	ibool	is_raw)	/* in: TRUE if a raw device or a raw disk partition */
 {
+	fil_system_t*	system	= fil_system;
 	fil_node_t*	node;
 	fil_space_t*	space;
 	char*		name2;
-	fil_system_t*	system		= fil_system;
 
 	ut_a(system);
 	ut_a(name);
-	ut_a(size > 0);
 
 	mutex_enter(&(system->mutex));
 
@@ -305,29 +414,122 @@ fil_node_create(
 
 	node->name = name2;
 	node->open = FALSE;
+
+	ut_a(!is_raw || srv_start_raw_disk_in_use);
+
+	node->is_raw_disk = is_raw;
 	node->size = size;
 	node->magic_n = FIL_NODE_MAGIC_N;
 	node->n_pending = 0;
+	node->n_pending_flushes = 0;
 
-	node->is_modified = FALSE;
+	node->modification_counter = 0;
+	node->flush_counter = 0;
 	
 	HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
 
+	if (!space) {
+		ut_print_timestamp(stderr);
+		fprintf(stderr,
+"  InnoDB: Error: Could not find tablespace %lu for\n"
+"InnoDB: file %s from the tablespace memory cache.\n", (ulong) id, name);
+		mem_free(name2);
+
+		mem_free(node);
+
+		mutex_exit(&(system->mutex));
+
+		return;
+	}
+
 	space->size += size;
 
+	node->space = space;
+
 	UT_LIST_ADD_LAST(chain, space->chain, node);
 				
 	mutex_exit(&(system->mutex));
 }
 
+/************************************************************************
+Opens a the file of a node of a tablespace. The caller must own the fil_system
+mutex. */
+static
+void
+fil_node_open_file(
+/*===============*/
+	fil_node_t*	node,	/* in: file node */
+	fil_system_t*	system,	/* in: tablespace memory cache */
+	fil_space_t*	space)	/* in: space */
+{
+	ib_longlong	size_bytes;
+	ulint		size_low;
+	ulint		size_high;
+	ibool		ret;
+
+	ut_ad(mutex_own(&(system->mutex)));
+
+	ut_a(node->n_pending == 0);
+	ut_a(node->open == FALSE);
+
+	/* printf("Opening file %s\n", node->name); */
+
+	if (space->purpose == FIL_LOG) {	
+		node->handle = os_file_create(node->name, OS_FILE_OPEN,
+					OS_FILE_AIO, OS_LOG_FILE, &ret);
+	} else if (node->is_raw_disk) {
+		node->handle = os_file_create(node->name,
+				        OS_FILE_OPEN_RAW,
+					OS_FILE_AIO, OS_DATA_FILE, &ret);
+	} else {
+		node->handle = os_file_create(node->name, OS_FILE_OPEN,
+					OS_FILE_AIO, OS_DATA_FILE, &ret);
+	}
+		
+	ut_a(ret);
+		
+	node->open = TRUE;
+
+	system->n_open++;
+
+	if (node->size == 0) {
+		os_file_get_size(node->handle, &size_low, &size_high);
+
+		size_bytes = (((ib_longlong)size_high) << 32)
+				     		+ (ib_longlong)size_low;
+#ifdef UNIV_HOTBACKUP
+		node->size = (ulint) (size_bytes / UNIV_PAGE_SIZE);
+
+#else
+		/* It must be a single-table tablespace and we do not know the
+		size of the file yet */
+
+		ut_a(space->id != 0);
+
+		if (size_bytes >= FSP_EXTENT_SIZE * UNIV_PAGE_SIZE) {
+			node->size = (ulint) ((size_bytes / (1024 * 1024))
+					   * ((1024 * 1024) / UNIV_PAGE_SIZE));
+		} else {
+			node->size = (ulint) (size_bytes / UNIV_PAGE_SIZE);
+		}
+#endif
+		space->size += node->size;
+	}
+
+	if (space->purpose == FIL_TABLESPACE && space->id != 0) {
+		/* Put the node to the LRU list */
+		UT_LIST_ADD_FIRST(LRU, system->LRU, node);
+	}
+}
+
 /**************************************************************************
 Closes a file. */
 static
 void
-fil_node_close(
-/*===========*/
+fil_node_close_file(
+/*================*/
 	fil_node_t*	node,	/* in: file node */
-	fil_system_t*	system)	/* in: file system */
+	fil_system_t*	system)	/* in: tablespace memory cache */
 {
 	ibool	ret;
 
@@ -337,24 +539,208 @@ fil_node_close(
 #endif /* UNIV_SYNC_DEBUG */
 	ut_a(node->open);
 	ut_a(node->n_pending == 0);
+	ut_a(node->n_pending_flushes == 0);
+	ut_a(node->modification_counter == node->flush_counter);
 
 	ret = os_file_close(node->handle);
 	ut_a(ret);
 
+	/* printf("Closing file %s\n", node->name); */
+
 	node->open = FALSE;
+	ut_a(system->n_open > 0);
+	system->n_open--;
+
+	if (node->space->purpose == FIL_TABLESPACE && node->space->id != 0) {
+		ut_a(UT_LIST_GET_LEN(system->LRU) > 0);
+
+		/* The node is in the LRU list, remove it */
+		UT_LIST_REMOVE(LRU, system->LRU, node);
+	}
+}
+
+/************************************************************************
+Tries to close a file in the LRU list. The caller must hold the fil_sys
+mutex. */
+static
+ibool
+fil_try_to_close_file_in_LRU(
+/*=========================*/
+				/* out: TRUE if success, FALSE if should retry
+				later; since i/o's generally complete in < 
+				100 ms, and as InnoDB writes at most 128 pages
+				from the buffer pool in a batch, and then
+				immediately flushes the files, there is a good
+				chance that the next time we find a suitable
+				node from the LRU list */
+	ibool	print_info)	/* in: if TRUE, prints information why it
+				cannot close a file */
+{
+	fil_system_t*	system		= fil_system;
+	fil_node_t*	node;
+
+	ut_ad(mutex_own(&(system->mutex)));
+
+	node = UT_LIST_GET_LAST(system->LRU);
+
+	if (print_info) {
+		fprintf(stderr,
+"InnoDB: fil_sys open file LRU len %lu\n", (ulong) UT_LIST_GET_LEN(system->LRU));
+	}
+
+	while (node != NULL) {
+		if (node->modification_counter == node->flush_counter
+		    && node->n_pending_flushes == 0) {
+
+			fil_node_close_file(node, system);
+			
+			return(TRUE);
+		}
+		
+		if (print_info && node->n_pending_flushes > 0) {
+			fprintf(stderr,
+"InnoDB: cannot close file %s, because n_pending_flushes %lu\n", node->name,
+				       (ulong) node->n_pending_flushes);
+		}
+
+		if (print_info
+		    && node->modification_counter != node->flush_counter) {
+			fprintf(stderr,
+"InnoDB: cannot close file %s, because mod_count %lld != fl_count %lld\n",
+				node->name, node->modification_counter,
+				node->flush_counter);
+		}
+
+		node = UT_LIST_GET_PREV(LRU, node);
+	}
 
-	/* The node is in the LRU list, remove it */
-	UT_LIST_REMOVE(LRU, system->LRU, node);
+	return(FALSE);
 }
 
 /***********************************************************************
-Frees a file node object from a file system. */
+Reserves the fil_system mutex and tries to make sure we can open at least one
+file while holding it. This should be called before calling
+fil_node_prepare_for_io(), because that function may need to open a file. */
+static
+void
+fil_mutex_enter_and_prepare_for_io(
+/*===============================*/
+	ulint	space_id)	/* in: space id */
+{
+	fil_system_t*	system		= fil_system;
+	fil_space_t*	space;
+	ibool		success;
+	ibool		print_info	= FALSE;
+	ulint		count		= 0;
+	ulint		count2		= 0;
+
+	ut_ad(!mutex_own(&(system->mutex)));
+retry:
+	mutex_enter(&(system->mutex));
+
+	if (space_id == 0 || space_id >= SRV_LOG_SPACE_FIRST_ID) {
+		/* We keep log files and system tablespace files always open;
+		this is important in preventing deadlocks in this module, as
+		a page read completion often performs another read from the
+		insert buffer. The insert buffer is in tablespace 0, and we
+		cannot end up waiting in this function. */
+
+		return;
+	}
+
+	if (system->n_open < system->max_n_open) {
+
+		return;
+	}
+
+	HASH_SEARCH(hash, system->spaces, space_id, space,
+							space->id == space_id);
+	if (space != NULL && space->stop_ios) {
+		/* We are going to do a rename file and want to stop new i/o's
+		for a while */
+
+		if (count2 > 20000) {
+			fprintf(stderr,
+"InnoDB: Warning: tablespace %s has i/o ops stopped for a long time %lu\n",
+						    space->name,
+						    (ulong) count2);
+		}
+
+		mutex_exit(&(system->mutex));
+
+		os_thread_sleep(20000);
+
+		count2++;
+
+		goto retry;
+	}
+
+	/* If the file is already open, no need to do anything; if the space
+	does not exist, we handle the situation in the function which called
+	this function */
+
+	if (!space || UT_LIST_GET_FIRST(space->chain)->open) {
+
+		return;
+	}
+
+	if (count > 1) {
+		print_info = TRUE;
+	}
+
+	/* Too many files are open, try to close some */
+close_more:
+	success = fil_try_to_close_file_in_LRU(print_info);
+
+	if (success && system->n_open >= system->max_n_open) {
+
+		goto close_more;
+	}
+
+	if (system->n_open < system->max_n_open) {
+		/* Ok */
+
+		return;
+	}
+
+	if (count >= 2) {
+		ut_print_timestamp(stderr);
+		fprintf(stderr,
+"  InnoDB: Warning: too many (%lu) files stay open while the maximum\n"
+"InnoDB: allowed value would be %lu.\n"
+"InnoDB: You may need to raise the value of innodb_max_files_open in\n"
+"InnoDB: my.cnf.\n", (ulong) system->n_open, (ulong) system->max_n_open);
+
+		return;
+	}
+
+	mutex_exit(&(system->mutex));
+
+#ifndef UNIV_HOTBACKUP
+	/* Wake the i/o-handler threads to make sure pending i/o's are
+	performed */
+	os_aio_simulated_wake_handler_threads();
+
+	os_thread_sleep(20000);
+#endif
+	/* Flush tablespaces so that we can close modified files in the LRU
+	list */
+
+	fil_flush_file_spaces(FIL_TABLESPACE);		
+
+	count++;
+
+	goto retry;
+}
+
+/***********************************************************************
+Frees a file node object from a tablespace memory cache. */
 static
 void
 fil_node_free(
 /*==========*/
 	fil_node_t*	node,	/* in, own: file node */
-	fil_system_t*	system,	/* in: file system */
+	fil_system_t*	system,	/* in: tablespace memory cache */
 	fil_space_t*	space)	/* in: space where the file node is chained */
 {
 	ut_ad(node && system && space);
@@ -362,9 +748,15 @@ fil_node_free(
 	ut_ad(mutex_own(&(system->mutex)));
 #endif /* UNIV_SYNC_DEBUG */
 	ut_a(node->magic_n == FIL_NODE_MAGIC_N);
+	ut_a(node->n_pending == 0);
 
 	if (node->open) {
-		fil_node_close(node, system);
+		/* We fool the assertion in fil_node_close_file() to think
+		there are no unflushed modifications in the file */
+
+		node->modification_counter = node->flush_counter;
+
+		fil_node_close_file(node, system);
 	}
 
 	space->size -= node->size;
@@ -387,9 +779,9 @@ fil_space_truncate_start(
 				if this does not equal to the combined size of
 				some initial files in the space */
 {
+	fil_system_t*	system		= fil_system;
 	fil_node_t*	node;
 	fil_space_t*	space;
-	fil_system_t*	system		= fil_system;
 
 	mutex_enter(&(system->mutex));
 
@@ -398,7 +790,6 @@ fil_space_truncate_start(
 	ut_a(space);
 	
 	while (trunc_len > 0) {
-
 		node = UT_LIST_GET_FIRST(space->chain);
 
 		ut_a(node->size * UNIV_PAGE_SIZE >= trunc_len);
@@ -409,17 +800,346 @@ fil_space_truncate_start(
 	}	
 				
 	mutex_exit(&(system->mutex));
-}				
+}
+
+/***********************************************************************
+Creates a space memory object and puts it to the tablespace memory cache. If
+there is an error, prints an error message to the .err log. */
+
+ibool
+fil_space_create(
+/*=============*/
+			/* out: TRUE if success */
+	char*	name,	/* in: space name */
+	ulint	id,	/* in: space id */
+	ulint	purpose)/* in: FIL_TABLESPACE, or FIL_LOG if log */
+{
+	fil_system_t*	system		= fil_system;
+	fil_space_t*	space;	
+	char*		name2;
+	ulint		namesake_id;
+try_again:
+	/*printf(
+	"InnoDB: Adding tablespace %lu of name %s, purpose %lu\n", id, name,
+	  purpose);*/
+
+	ut_a(system);
+	ut_a(name);
+
+	mutex_enter(&(system->mutex));
+
+	HASH_SEARCH(name_hash, system->name_hash, ut_fold_string(name), space,
+					0 == strcmp(name, space->name));
+	if (space != NULL) {
+		ut_print_timestamp(stderr);
+		fprintf(stderr,
+"  InnoDB: Warning: trying to init to the tablespace memory cache\n"
+"InnoDB: a tablespace %lu of name %s,\n"
+"InnoDB: but a tablespace %lu of the same name %s\n"
+"InnoDB: already exists in the tablespace memory cache!\n",
+			       (ulong) id, name,
+			       (ulong) space->id, space->name);
+
+		if (id == 0 || purpose != FIL_TABLESPACE) {
+
+			mutex_exit(&(system->mutex));
+
+			return(FALSE);
+		}
+
+		fprintf(stderr,
+"InnoDB: We assume that InnoDB did a crash recovery, and you had\n"
+"InnoDB: an .ibd file for which the table did not exist in the\n"
+"InnoDB: InnoDB internal data dictionary in the ibdata files.\n"
+"InnoDB: We assume that you later removed the .ibd and .frm files,\n"
+"InnoDB: and are now trying to recreate the table. We now remove the\n"
+"InnoDB: conflicting tablespace object from the memory cache and try\n"
+"InnoDB: the init again.\n");
+
+		namesake_id = space->id;
+
+		mutex_exit(&(system->mutex));
+
+		fil_space_free(namesake_id);
+
+		goto try_again;
+	}
+
+	HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
+
+	if (space != NULL) {
+		fprintf(stderr,
+"InnoDB: Error: trying to add tablespace %lu of name %s\n"
+"InnoDB: to the tablespace memory cache, but tablespace\n"
+"InnoDB: %lu of name %s already exists in the tablespace\n"
+"InnoDB: memory cache!\n", (ulong) id, name, (ulong) space->id, space->name);
+
+		mutex_exit(&(system->mutex));
+
+		return(FALSE);
+	}
+
+	space = mem_alloc(sizeof(fil_space_t));
+
+	name2 = mem_alloc(ut_strlen(name) + 1);
+
+	ut_strcpy(name2, name);
+
+	space->name = name2;
+	space->id = id;
+
+	system->tablespace_version++;
+	space->tablespace_version = system->tablespace_version;
+	space->mark = FALSE;
+
+	if (purpose == FIL_TABLESPACE && id > system->max_assigned_id) {
+		system->max_assigned_id = id;
+	}
+
+	space->stop_ios = FALSE;
+	space->stop_ibuf_merges = FALSE;
+	space->is_being_deleted = FALSE;
+	space->purpose = purpose;
+	space->size = 0;
+
+	space->n_reserved_extents = 0;
+	
+	space->n_pending_flushes = 0;
+	space->n_pending_ibuf_merges = 0;
+
+	UT_LIST_INIT(space->chain);
+	space->magic_n = FIL_SPACE_MAGIC_N;
+
+	space->ibuf_data = NULL;
+	
+	rw_lock_create(&(space->latch));
+	rw_lock_set_level(&(space->latch), SYNC_FSP);
+	
+	HASH_INSERT(fil_space_t, hash, system->spaces, id, space);
+
+	HASH_INSERT(fil_space_t, name_hash, system->name_hash,
+						ut_fold_string(name), space);
+	UT_LIST_ADD_LAST(space_list, system->space_list, space);
+				
+	mutex_exit(&(system->mutex));
+
+	return(TRUE);
+}
+
+/***********************************************************************
+Assigns a new space id for a new single-table tablespace. This works simply by
+incrementing the global counter. If 4 billion id's is not enough, we may need
+to recycle id's. */
+static
+ulint
+fil_assign_new_space_id(void)
+/*=========================*/
+			/* out: new tablespace id; ULINT_UNDEFINED if could
+			not assign an id */
+{
+	fil_system_t*	system = fil_system;
+	ulint		id;
+
+	mutex_enter(&(system->mutex));
+
+	system->max_assigned_id++;
+
+	id = system->max_assigned_id;
+
+	if (id > (SRV_LOG_SPACE_FIRST_ID / 2) && (id % 1000000UL == 0)) {
+	        ut_print_timestamp(stderr);
+	        fprintf(stderr,
+"InnoDB: Warning: you are running out of new single-table tablespace id's.\n"
+"InnoDB: Current counter is %lu and it must not exceed %lu!\n"
+"InnoDB: To reset the counter to zero you have to dump all your tables and\n"
+"InnoDB: recreate the whole InnoDB installation.\n", (ulong) id,
+						     (ulong) SRV_LOG_SPACE_FIRST_ID);
+	}
+
+	if (id >= SRV_LOG_SPACE_FIRST_ID) {
+	        ut_print_timestamp(stderr);
+	        fprintf(stderr,
+"InnoDB: You have run out of single-table tablespace id's!\n"
+"InnoDB: Current counter is %lu.\n"
+"InnoDB: To reset the counter to zero you have to dump all your tables and\n"
+"InnoDB: recreate the whole InnoDB installation.\n", (ulong) id);
+		system->max_assigned_id--;
+
+		id = ULINT_UNDEFINED;
+	}
+
+	mutex_exit(&(system->mutex));
+
+	return(id);
+}
+
+/***********************************************************************
+Frees a space object from the tablespace memory cache. Closes the files in
+the chain but does not delete them. There must not be any pending i/o's or
+flushes on the files. */
+
+ibool
+fil_space_free(
+/*===========*/
+			/* out: TRUE if success */
+	ulint	id)	/* in: space id */
+{
+	fil_system_t*	system = fil_system;
+	fil_space_t*	space;
+	fil_space_t*	namespace;
+	fil_node_t*	fil_node;
+
+	mutex_enter(&(system->mutex));
+
+	HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
+
+	if (!space) {
+		ut_print_timestamp(stderr);
+		fprintf(stderr,
+"  InnoDB: Error: trying to remove tablespace %lu from the cache but\n"
+"InnoDB: it is not there.\n", (ulong) id);
+
+		mutex_exit(&(system->mutex));
+		
+		return(FALSE);
+	}
+
+	HASH_DELETE(fil_space_t, hash, system->spaces, id, space);
+
+	HASH_SEARCH(name_hash, system->name_hash, ut_fold_string(space->name),
+		    namespace, 0 == strcmp(space->name, namespace->name));
+	ut_a(namespace);
+	ut_a(space == namespace);
+
+	HASH_DELETE(fil_space_t, name_hash, system->name_hash,
+					   ut_fold_string(space->name), space);
+
+	UT_LIST_REMOVE(space_list, system->space_list, space);
+
+	ut_a(space->magic_n == FIL_SPACE_MAGIC_N);
+	ut_a(0 == space->n_pending_flushes);
+
+	fil_node = UT_LIST_GET_FIRST(space->chain);
+
+	while (fil_node != NULL) {
+		fil_node_free(fil_node, system, space);
+
+		fil_node = UT_LIST_GET_FIRST(space->chain);
+	}	
+	
+	ut_a(0 == UT_LIST_GET_LEN(space->chain));
+
+	mutex_exit(&(system->mutex));
+
+	rw_lock_free(&(space->latch));
+
+	mem_free(space->name);
+	mem_free(space);
+
+	return(TRUE);
+}
+
+#ifdef UNIV_HOTBACKUP
+/***********************************************************************
+Returns the tablespace object for a given id, or NULL if not found from the
+tablespace memory cache. */
+static
+fil_space_t*
+fil_get_space_for_id_low(
+/*=====================*/
+			/* out: tablespace object or NULL; NOTE that you must
+			own &(fil_system->mutex) to call this function! */
+	ulint	id)	/* in: space id */
+{
+	fil_system_t*	system		= fil_system;
+	fil_space_t*	space;
+
+	ut_ad(system);
+
+	HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
+
+	return(space);
+}
+#endif
+
+/***********************************************************************
+Returns the size of the space in pages. The tablespace must be cached in the
+memory cache. */
+
+ulint
+fil_space_get_size(
+/*===============*/
+			/* out: space size, 0 if space not found */
+	ulint	id)	/* in: space id */
+{
+	fil_system_t*	system 		= fil_system;
+	fil_node_t*	node;
+	fil_space_t*	space;
+	ulint		size;
+
+	ut_ad(system);
+
+	fil_mutex_enter_and_prepare_for_io(id);
+
+	HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
+
+	if (space == NULL) {
+		mutex_exit(&(system->mutex));
+
+		return(0);
+	}
+
+	if (space->size == 0 && space->purpose == FIL_TABLESPACE) {
+		ut_a(id != 0);
+
+		ut_a(1 == UT_LIST_GET_LEN(space->chain));
+
+		node = UT_LIST_GET_FIRST(space->chain);
+
+		/* It must be a single-table tablespace and we have not opened
+		the file yet; the following calls will open it and update the
+		size fields */
+
+		fil_node_prepare_for_io(node, system, space);
+		fil_node_complete_io(node, system, OS_FILE_READ);
+	}
+
+	size = space->size;
+	
+	mutex_exit(&(system->mutex));
+
+	return(size);
+}
+
+/***********************************************************************
+Checks if the pair space, page_no refers to an existing page in a tablespace
+file space. The tablespace must be cached in the memory cache. */
+
+ibool
+fil_check_adress_in_tablespace(
+/*===========================*/
+			/* out: TRUE if the address is meaningful */
+	ulint	id,	/* in: space id */
+	ulint	page_no)/* in: page number */
+{
+	if (fil_space_get_size(id) > page_no) {
+
+		return(TRUE);
+	}
+
+	return(FALSE);
+}		
 
 /********************************************************************
-Creates a file system object. */
+Creates a the tablespace memory cache. */
 static
 fil_system_t*
 fil_system_create(
 /*==============*/
-				/* out, own: file system object */
+				/* out, own: tablespace memory cache */
 	ulint	hash_size,	/* in: hash table size */
-	ulint	max_n_open)	/* in: maximum number of open files */
+	ulint	max_n_open)	/* in: maximum number of open files; must be
+				> 10 */
 {
 	fil_system_t*	system;
 
@@ -433,12 +1153,17 @@ fil_system_create(
 	mutex_set_level(&(system->mutex), SYNC_ANY_LATCH);
 
 	system->spaces = hash_create(hash_size);
+	system->name_hash = hash_create(hash_size);
 
 	UT_LIST_INIT(system->LRU);
 
-	system->n_open_pending = 0;
+	system->n_open = 0;
 	system->max_n_open = max_n_open;
-	system->can_open = os_event_create(NULL);
+
+	system->modification_counter = 0;
+	system->max_assigned_id = 0;
+
+	system->tablespace_version = 0;
 
 	UT_LIST_INIT(system->space_list);
 
@@ -446,7 +1171,7 @@ fil_system_create(
 }
 
 /********************************************************************
-Initializes the file system of this module. */
+Initializes the tablespace memory cache. */
 
 void
 fil_init(
@@ -455,11 +1180,120 @@ fil_init(
 {
 	ut_a(fil_system == NULL);
 
+	/*printf("Initializing the tablespace cache with max %lu open files\n",
+							       max_n_open); */
 	fil_system = fil_system_create(FIL_SYSTEM_HASH_SIZE, max_n_open);
 }
 
+/***********************************************************************
+Opens all log files and system tablespace data files. They stay open until the
+database server shutdown. This should be called at a server startup after the
+space objects for the log and the system tablespace have been created. The
+purpose of this operation is to make sure we never run out of file descriptors
+if we need to read from the insert buffer or to write to the log. */
+
+void
+fil_open_log_and_system_tablespace_files(void)
+/*==========================================*/
+{
+	fil_system_t*	system = fil_system;
+	fil_space_t*	space;
+	fil_node_t*	node;
+
+	mutex_enter(&(system->mutex));
+
+	space = UT_LIST_GET_FIRST(system->space_list);
+
+	while (space != NULL) {
+		if (space->purpose != FIL_TABLESPACE || space->id == 0) {
+			node = UT_LIST_GET_FIRST(space->chain);
+
+			while (node != NULL) {
+				if (!node->open) {
+					fil_node_open_file(node, system,
+									space);
+				}
+				if (system->max_n_open < 10 + system->n_open) {
+					fprintf(stderr,
+"InnoDB: Warning: you must raise the value of innodb_max_open_files in\n"
+"InnoDB: my.cnf! Remember that InnoDB keeps all log files and all system\n"
+"InnoDB: tablespace files open for the whole time mysqld is running, and\n"
+"InnoDB: needs to open also some .ibd files if the file-per-table storage\n"
+"InnoDB: model is used. Current open files %lu, max allowed open files %lu.\n",
+				     (ulong) system->n_open,
+				     (ulong) system->max_n_open);
+				}
+				node = UT_LIST_GET_NEXT(chain, node);
+			}
+		}
+		space = UT_LIST_GET_NEXT(space_list, space);
+	}
+
+	mutex_exit(&(system->mutex));
+}
+
+/***********************************************************************
+Closes all open files. There must not be any pending i/o's or not flushed
+modifications in the files. */
+
+void
+fil_close_all_files(void)
+/*=====================*/
+{
+	fil_system_t*	system = fil_system;
+	fil_space_t*	space;
+	fil_node_t*	node;
+
+	mutex_enter(&(system->mutex));
+
+	space = UT_LIST_GET_FIRST(system->space_list);
+
+	while (space != NULL) {
+		node = UT_LIST_GET_FIRST(space->chain);
+
+		while (node != NULL) {
+			if (node->open) {
+				fil_node_close_file(node, system);
+			}
+			node = UT_LIST_GET_NEXT(chain, node);
+		}
+		space = UT_LIST_GET_NEXT(space_list, space);
+	}
+
+	mutex_exit(&(system->mutex));
+}
+
+/***********************************************************************
+Sets the max tablespace id counter if the given number is bigger than the
+previous value. */
+
+void
+fil_set_max_space_id_if_bigger(
+/*===========================*/
+	ulint	max_id)	/* in: maximum known id */
+{
+	fil_system_t*	system = fil_system;
+
+	if (max_id >= SRV_LOG_SPACE_FIRST_ID) {
+		fprintf(stderr,
+"InnoDB: Fatal error: max tablespace id is too high, %lu\n", (ulong) max_id);
+		ut_a(0);
+	}
+
+	mutex_enter(&(system->mutex));
+
+	if (system->max_assigned_id < max_id) {
+
+		system->max_assigned_id = max_id;
+	}
+
+	mutex_exit(&(system->mutex));
+}
+
 /********************************************************************
-Writes the flushed lsn to the header of each file space. */
+Initializes the ibuf data structure for space 0 == the system tablespace.
+This can be called after the file space headers have been created and the
+dictionary system has been initialized. */
 
 void
 fil_ibuf_init_at_db_start(void)
@@ -468,39 +1302,37 @@ fil_ibuf_init_at_db_start(void)
 	fil_space_t*	space;
 
 	space = UT_LIST_GET_FIRST(fil_system->space_list);
-	
-	while (space) {
-		if (space->purpose == FIL_TABLESPACE) {
-			space->ibuf_data = ibuf_data_init_for_space(space->id);
-		}
 
-		space = UT_LIST_GET_NEXT(space_list, space);
-	}
+	ut_a(space);
+        ut_a(space->purpose == FIL_TABLESPACE);	
+
+	space->ibuf_data = ibuf_data_init_for_space(space->id);
 }
 
 /********************************************************************
-Writes the flushed lsn and the latest archived log number to the page
-header of the first page of a data file. */
+Writes the flushed lsn and the latest archived log number to the page header
+of the first page of a data file. */
 static
 ulint
 fil_write_lsn_and_arch_no_to_file(
 /*==============================*/
 	ulint	space_id,	/* in: space number */
-	ulint	sum_of_sizes,	/* in: combined size of previous files in space,
-				in database pages */
+	ulint	sum_of_sizes,	/* in: combined size of previous files in
+				space, in database pages */
 	dulint	lsn,		/* in: lsn to write */
 	ulint	arch_log_no)	/* in: archived log number to write */
 {
 	byte*	buf1;
 	byte*	buf;
 
+	UT_NOT_USED(arch_log_no);
+
 	buf1 = mem_alloc(2 * UNIV_PAGE_SIZE);
 	buf = ut_align(buf1, UNIV_PAGE_SIZE);
 
 	fil_read(TRUE, space_id, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL);
 
 	mach_write_to_8(buf + FIL_PAGE_FILE_FLUSH_LSN, lsn);
-	mach_write_to_4(buf + FIL_PAGE_ARCH_LOG_NO, arch_log_no);
 
 	fil_write(TRUE, space_id, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL);
 
@@ -509,7 +1341,7 @@ fil_write_lsn_and_arch_no_to_file(
 
 /********************************************************************
 Writes the flushed lsn and the latest archived log number to the page
-header of the first page of each data file. */
+header of the first page of each data file in the system tablespace. */
 
 ulint
 fil_write_flushed_lsn_to_data_files(
@@ -528,18 +1360,22 @@ fil_write_flushed_lsn_to_data_files(
 	space = UT_LIST_GET_FIRST(fil_system->space_list);
 	
 	while (space) {
+		/* We only write the lsn to all existing data files which have
+		been open during the lifetime of the mysqld process; they are
+		represented by the space objects in the tablespace memory
+		cache. Note that all data files in the system tablespace 0 are
+		always open. */
+
 		if (space->purpose == FIL_TABLESPACE) {
 			sum_of_sizes = 0;
 
 			node = UT_LIST_GET_FIRST(space->chain);
-
 			while (node) {
 				mutex_exit(&(fil_system->mutex));
 
 				err = fil_write_lsn_and_arch_no_to_file(
-							space->id,
-							sum_of_sizes,
-							lsn, arch_log_no);
+						space->id, sum_of_sizes,
+						lsn, arch_log_no);
 				if (err != DB_SUCCESS) {
 
 					return(err);
@@ -548,11 +1384,9 @@ fil_write_flushed_lsn_to_data_files(
 				mutex_enter(&(fil_system->mutex));
 
 				sum_of_sizes += node->size;
-
 				node = UT_LIST_GET_NEXT(chain, node);
 			}
 		}
-
 		space = UT_LIST_GET_NEXT(space_list, space);
 	}
 
@@ -579,8 +1413,9 @@ fil_read_flushed_lsn_and_arch_log_no(
 	byte*	buf;
 	byte*	buf2;
 	dulint	flushed_lsn;
-	ulint	arch_log_no;
-
+        ulint   arch_log_no     = 0;    /* since InnoDB does not archive
+                                        its own logs under MySQL, this
+                                        parameter is not relevant */
 	buf2 = ut_malloc(2 * UNIV_PAGE_SIZE);
 	/* Align the memory for a possible read from a raw device */
 	buf = ut_align(buf2, UNIV_PAGE_SIZE);
@@ -588,7 +1423,6 @@ fil_read_flushed_lsn_and_arch_log_no(
 	os_file_read(data_file, buf, 0, 0, UNIV_PAGE_SIZE);
 
 	flushed_lsn = mach_read_from_8(buf + FIL_PAGE_FILE_FLUSH_LSN);
-	arch_log_no = mach_read_from_4(buf + FIL_PAGE_ARCH_LOG_NO);
 
 	ut_free(buf2);
 
@@ -615,113 +1449,1442 @@ fil_read_flushed_lsn_and_arch_log_no(
 	}
 }
 
+/*================ SINGLE-TABLE TABLESPACES ==========================*/
+
 /***********************************************************************
-Creates a space object and puts it to the file system. */
+Increments the count of pending insert buffer page merges, if space is not
+being deleted. */
+
+ibool
+fil_inc_pending_ibuf_merges(
+/*========================*/
+			/* out: TRUE if being deleted, and ibuf merges should
+			be skipped */
+	ulint	id)	/* in: space id */
+{
+	fil_system_t*	system		= fil_system;
+	fil_space_t*	space;
+	
+	mutex_enter(&(system->mutex));
+
+	HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
+
+	if (space == NULL) {
+		fprintf(stderr,
+"InnoDB: Error: trying to do ibuf merge to a dropped tablespace %lu\n",
+			(ulong) id);
+	}
+
+	if (space == NULL || space->stop_ibuf_merges) {
+		mutex_exit(&(system->mutex));
+
+		return(TRUE);
+	}
+
+	space->n_pending_ibuf_merges++;
+
+	mutex_exit(&(system->mutex));
+
+	return(FALSE);
+}
+
+/***********************************************************************
+Decrements the count of pending insert buffer page merges. */
 
 void
-fil_space_create(
+fil_decr_pending_ibuf_merges(
+/*========================*/
+	ulint	id)	/* in: space id */
+{
+	fil_system_t*	system		= fil_system;
+	fil_space_t*	space;
+	
+	mutex_enter(&(system->mutex));
+
+	HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
+
+	if (space == NULL) {
+		fprintf(stderr,
+"InnoDB: Error: decrementing ibuf merge of a dropped tablespace %lu\n",
+			(ulong) id);
+	}
+
+	if (space != NULL) {
+		space->n_pending_ibuf_merges--;
+	}
+
+	mutex_exit(&(system->mutex));
+}
+
+/************************************************************
+Creates the database directory for a table if it does not exist yet. */
+static
+void
+fil_create_directory_for_tablename(
+/*===============================*/
+	char*	name)	/* in: name in the standard 'databasename/tablename'
+			format */
+{
+	char*	ptr;
+	char	path[OS_FILE_MAX_PATH];
+
+	sprintf(path, "%s/%s", fil_path_to_mysql_datadir, name);
+
+	ptr = path + ut_strlen(path);
+
+	while (*ptr != '/') {
+		ptr--;
+
+		ut_a(ptr >= path);
+	}
+
+	*ptr = '\0';
+
+	srv_normalize_path_for_win(path);
+
+	ut_a(os_file_create_directory(path, FALSE));
+}
+
+#ifndef UNIV_HOTBACKUP
+/************************************************************
+Writes a log record about an .ibd file create/rename/delete. */
+static
+void
+fil_op_write_log(
 /*=============*/
-	char*	name,	/* in: space name */
-	ulint	id,	/* in: space id */
-	ulint	purpose)/* in: FIL_TABLESPACE, or FIL_LOG if log */
+	ulint	type,		/* in: MLOG_FILE_CREATE, MLOG_FILE_DELETE, or
+                        	MLOG_FILE_RENAME */
+	ulint	space_id,	/* in: space id */
+	char*	name,		/* in: table name in the familiar
+				'databasename/tablename' format, or the file
+				path in the case of MLOG_FILE_DELETE */ 
+	char*	new_name,	/* in: if type is MLOG_FILE_RENAME, the new
+				table name in the 'databasename/tablename'
+				format */
+	mtr_t*	mtr)		/* in: mini-transaction handle */
 {
-	fil_space_t*	space;	
-	char*		name2;
-	fil_system_t*	system = fil_system;
+	byte*	log_ptr;
+
+	log_ptr = mlog_open(mtr, 30);
 	
-	ut_a(system);
-	ut_a(name);
+	log_ptr = mlog_write_initial_log_record_for_file_op(type, space_id, 0,
+								log_ptr, mtr);
+	/* Let us store the strings as null-terminated for easier readability
+	and handling */
+
+	mach_write_to_2(log_ptr, ut_strlen(name) + 1);
+	log_ptr += 2;
+	
+	mlog_close(mtr, log_ptr);
+
+	mlog_catenate_string(mtr, (byte*) name, ut_strlen(name) + 1);
 
-#ifndef UNIV_BASIC_LOG_DEBUG
-	/* Spaces with an odd id number are reserved to replicate spaces
-	used in log debugging */
+	if (type == MLOG_FILE_RENAME) {
+		log_ptr = mlog_open(mtr, 30);
+		mach_write_to_2(log_ptr, ut_strlen(new_name) + 1);
+		log_ptr += 2;
 	
-	ut_a((purpose == FIL_LOG) || (id % 2 == 0));
+		mlog_close(mtr, log_ptr);
+
+		mlog_catenate_string(mtr, (byte*) new_name,
+						ut_strlen(new_name) + 1);
+	}
+}
 #endif
-	mutex_enter(&(system->mutex));
 
-	space = mem_alloc(sizeof(fil_space_t));
+/***********************************************************************
+Parses the body of a log record written about an .ibd file operation. That is,
+the log record part after the standard (type, space id, page no) header of the
+log record.
+
+If desired, also replays the delete or rename operation if the .ibd file
+exists and the space id in it matches. Replays the create operation if a file
+at that path does not exist yet. If the database directory for the file to be
+created does not exist, then we create the directory, too.
+
+Note that ibbackup --apply-log sets fil_path_to_mysql_datadir to point to the
+datadir that we should use in replaying the file operations. */
+
+byte*
+fil_op_log_parse_or_replay(
+/*=======================*/
+                        	/* out: end of log record, or NULL if the
+				record was not completely contained between
+				ptr and end_ptr */
+        byte*   ptr,    	/* in: buffer containing the log record body,
+				or an initial segment of it, if the record does
+				not fir completely between ptr and end_ptr */
+        byte*   end_ptr,	/* in: buffer end */
+	ulint	type,		/* in: the type of this log record */
+	ibool	do_replay,	/* in: TRUE if we want to replay the
+				operation, and not just parse the log record */
+	ulint	space_id)	/* in: if do_replay is TRUE, the space id of
+				the tablespace in question; otherwise
+				ignored */
+{
+	ulint	name_len;
+	ulint	new_name_len;
+	char*	name;
+	char*	new_name	= NULL;
 
-	name2 = mem_alloc(ut_strlen(name) + 1);
+	if (end_ptr < ptr + 2) {
 
-	ut_strcpy(name2, name);
+		return(NULL);
+	}
 
-	space->name = name2;
-	space->id = id;
-	space->purpose = purpose;
-	space->size = 0;
+	name_len = mach_read_from_2(ptr);
 
-	space->n_reserved_extents = 0;
+	ptr += 2;
+
+	if (end_ptr < ptr + name_len) {
+		
+		return(NULL);
+	}
+
+	name = (char*) ptr;
+
+	ptr += name_len;
+
+	if (type == MLOG_FILE_RENAME) {
+		if (end_ptr < ptr + 2) {
+
+			return(NULL);
+		}
+
+		new_name_len = mach_read_from_2(ptr);
+		
+		ptr += 2;
+
+		if (end_ptr < ptr + new_name_len) {
+		
+			return(NULL);
+		}
+
+		new_name = (char*) ptr;
+
+		ptr += new_name_len;
+	}
+
+	/* We managed to parse a full log record body */
+/*
+	printf("Parsed log rec of type %lu space %lu\n"
+		"name %s\n", type, space_id, name);
+
+	if (type == MLOG_FILE_RENAME) {
+		printf("new name %s\n", new_name);
+	}
+*/
+	if (do_replay == FALSE) {
+
+		return(ptr);
+	}
+
+	/* Let us try to perform the file operation, if sensible. Note that
+	ibbackup has at this stage already read in all space id info to the
+	fil0fil.c data structures.
 	
-	UT_LIST_INIT(space->chain);
-	space->magic_n = FIL_SPACE_MAGIC_N;
+	NOTE that our algorithm is not guaranteed to work correctly if there
+	were renames of tables during the backup. See ibbackup code for more
+	on the problem. */
 
-	space->ibuf_data = NULL;
+	if (type == MLOG_FILE_DELETE) {
+		if (fil_tablespace_exists_in_mem(space_id)) {
+			ut_a(fil_delete_tablespace(space_id));
+		}
+	} else if (type == MLOG_FILE_RENAME) {
+		/* We do the rename based on space id, not old file name;
+		this should guarantee that after the log replay each .ibd file
+		has the correct name for the latest log sequence number; the
+		proof is left as an exercise :) */
+
+		if (fil_tablespace_exists_in_mem(space_id)) {
+			/* Create the database directory for the new name, if
+			it does not exist yet */
+			fil_create_directory_for_tablename(new_name);
 	
-	rw_lock_create(&(space->latch));
-	rw_lock_set_level(&(space->latch), SYNC_FSP);
+			/* Rename the table if there is not yet a tablespace
+			with the same name */
+
+			if (fil_get_space_id_for_table(new_name)
+			    == ULINT_UNDEFINED) {
+				/* We do not care of the old name, that is
+				why we pass NULL as the first argument */
+				ut_a(fil_rename_tablespace(NULL, space_id,
+								new_name));
+			}
+		}
+	} else {
+		ut_a(type == MLOG_FILE_CREATE);
+
+		if (fil_tablespace_exists_in_mem(space_id)) {
+			/* Do nothing */
+		} else if (fil_get_space_id_for_table(name) !=
+							ULINT_UNDEFINED) {
+			/* Do nothing */
+		} else {
+			/* Create the database directory for name, if it does
+			not exist yet */
+			fil_create_directory_for_tablename(name);
+
+			ut_a(space_id != 0);
+
+			ut_a(DB_SUCCESS == 
+				fil_create_new_single_table_tablespace(
+							&space_id, name,
+						FIL_IBD_FILE_INITIAL_SIZE));
+		}
+	}
+
+	return(ptr);
+}
+
+/***********************************************************************
+Deletes a single-table tablespace. The tablespace must be cached in the
+memory cache. */
+
+ibool
+fil_delete_tablespace(
+/*==================*/
+			/* out: TRUE if success */
+	ulint	id)	/* in: space id */
+{
+	fil_system_t*	system		= fil_system;
+	ibool		success;
+	fil_space_t*	space;
+	fil_node_t*	node;
+	ulint		count		= 0;
+	char		path[OS_FILE_MAX_PATH];
+
+	ut_a(id != 0);
+stop_ibuf_merges:
+	mutex_enter(&(system->mutex));
+
+	HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
+
+	if (space != NULL) {
+		space->stop_ibuf_merges = TRUE;
+
+		if (space->n_pending_ibuf_merges == 0) {
+			mutex_exit(&(system->mutex));
+
+			count = 0;
+
+			goto try_again;
+		} else {
+			if (count > 5000) {
+			   ut_print_timestamp(stderr);
+			   fprintf(stderr,
+"  InnoDB: Warning: trying to delete tablespace %s,\n"
+"InnoDB: but there are %lu pending ibuf merges on it.\n"
+"InnoDB: Loop %lu.\n", space->name, (ulong) space->n_pending_ibuf_merges,
+				   (ulong) count);
+			}
+
+			mutex_exit(&(system->mutex));
+
+			os_thread_sleep(20000);
+			count++;
+
+			goto stop_ibuf_merges;
+		}
+	}
+
+	mutex_exit(&(system->mutex));
+	count = 0;
+
+try_again:
+	mutex_enter(&(system->mutex));
+
+	HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
+
+	if (space == NULL) {
+		ut_print_timestamp(stderr);
+		fprintf(stderr,
+"  InnoDB: Error: cannot delete tablespace %lu\n"
+"InnoDB: because it is not found in the tablespace memory cache.\n",
+			(ulong) id);
+
+		mutex_exit(&(system->mutex));
 	
-	HASH_INSERT(fil_space_t, hash, system->spaces, id, space);
+		return(FALSE);
+	}	
+
+	ut_a(space);
+	ut_a(strlen(space->name) < OS_FILE_MAX_PATH);
+	ut_a(space->n_pending_ibuf_merges == 0);
+
+	strcpy(path, space->name);
+
+	space->is_being_deleted = TRUE;
+
+	ut_a(UT_LIST_GET_LEN(space->chain) == 1);
+	node = UT_LIST_GET_FIRST(space->chain);
+
+	if (space->n_pending_flushes > 0 || node->n_pending > 0) {
+		if (count > 1000) {
+			ut_print_timestamp(stderr);
+			fprintf(stderr,
+"  InnoDB: Warning: trying to delete tablespace %s,\n"
+"InnoDB: but there are %lu flushes and %lu pending i/o's on it\n"
+"InnoDB: Loop %lu.\n", space->name, (ulong) space->n_pending_flushes,
+				(ulong) node->n_pending,
+				(ulong) count);
+		}
+		mutex_exit(&(system->mutex));
+		os_thread_sleep(20000);
+
+		count++;
+
+		goto try_again;
+	}
 
-	UT_LIST_ADD_LAST(space_list, system->space_list, space);
-				
 	mutex_exit(&(system->mutex));
+#ifndef UNIV_HOTBACKUP
+	/* Invalidate in the buffer pool all pages belonging to the
+	tablespace. Since we have set space->is_being_deleted = TRUE, readahead
+	or ibuf merge can no longer read more pages of this tablespace to the
+	buffer pool. Thus we can clean the tablespace out of the buffer pool
+	completely and permanently. The flag is_being_deleted also prevents
+	fil_flush() from being applied to this tablespace. */
+
+	buf_LRU_invalidate_tablespace(id);
+#endif
+	/* printf("Deleting tablespace %s id %lu\n", space->name, id); */
+
+	success = fil_space_free(id);
+
+	if (success) {
+		success = os_file_delete(path);
+
+		if (success) {
+			/* Write a log record about the deletion of the .ibd
+			file, so that ibbackup can replay it in the
+			--apply-log phase. We use a dummy mtr and the familiar
+			log write mechanism. */
+#ifndef UNIV_HOTBACKUP
+			{
+			mtr_t		mtr;
+
+			/* When replaying the operation in ibbackup, do not try
+			to write any log record */
+			mtr_start(&mtr);
+
+			fil_op_write_log(MLOG_FILE_DELETE, id, path,
+								NULL, &mtr);
+			mtr_commit(&mtr);
+			}
+#endif
+			return(TRUE);
+		}
+	}
+
+	return(FALSE);
 }
 
 /***********************************************************************
-Frees a space object from a file system. Closes the files in the chain
-but does not delete them. */
+Discards a single-table tablespace. The tablespace must be cached in the
+memory cache. Discarding is like deleting a tablespace, but
+1) we do not drop the table from the data dictionary;
+2) we remove all insert buffer entries for the tablespace immediately; in DROP
+TABLE they are only removed gradually in the background;
+3) when the user does IMPORT TABLESPACE, the tablespace will have the same id
+as it originally had. */
 
-void
-fil_space_free(
-/*===========*/
+ibool
+fil_discard_tablespace(
+/*===================*/
+			/* out: TRUE if success */
 	ulint	id)	/* in: space id */
 {
+	ibool	success;
+
+	success = fil_delete_tablespace(id);
+
+	if (!success) {
+		fprintf(stderr,
+"InnoDB: Warning: cannot delete tablespace %lu in DISCARD TABLESPACE.\n"
+"InnoDB: But let us remove the insert buffer entries for this tablespace.\n",
+			(ulong) id); 
+	}
+
+	/* Remove all insert buffer entries for the tablespace */
+
+	ibuf_delete_for_discarded_space(id);
+
+	return(TRUE);
+}
+
+/***********************************************************************
+Renames the memory cache structures of a single-table tablespace. */
+static
+ibool
+fil_rename_tablespace_in_mem(
+/*=========================*/
+				/* out: TRUE if success */
+	fil_space_t*	space,	/* in: tablespace memory object */
+	fil_node_t*	node,	/* in: file node of that tablespace */
+	char*		path)	/* in: new name */
+{
+	fil_system_t*	system		= fil_system;
+	fil_space_t*	space2;
+	char*		old_name	= space->name;
+	
+	HASH_SEARCH(name_hash, system->name_hash, ut_fold_string(old_name),
+			       space2, 0 == strcmp(old_name, space2->name));
+	if (space != space2) {
+		fprintf(stderr,
+"InnoDB: Error: cannot find %s in tablespace memory cache\n", old_name);
+
+		return(FALSE);
+	}
+
+	HASH_SEARCH(name_hash, system->name_hash, ut_fold_string(path),
+			       space2, 0 == strcmp(path, space2->name));
+	if (space2 != NULL) {
+		fprintf(stderr,
+"InnoDB: Error: %s is already in tablespace memory cache\n", path);
+		
+		return(FALSE);
+	}
+
+	HASH_DELETE(fil_space_t, name_hash, system->name_hash,
+					   ut_fold_string(space->name), space);
+	mem_free(space->name);
+	mem_free(node->name);
+
+	space->name = mem_alloc(strlen(path) + 1);
+	node->name = mem_alloc(strlen(path) + 1);
+
+	strcpy(space->name, path);
+	strcpy(node->name, path);
+
+	HASH_INSERT(fil_space_t, name_hash, system->name_hash,
+						ut_fold_string(path), space);
+	return(TRUE);
+}
+
+/***********************************************************************
+Renames a single-table tablespace. The tablespace must be cached in the
+tablespace memory cache. */
+
+ibool
+fil_rename_tablespace(
+/*==================*/
+				/* out: TRUE if success */
+	char*	old_name,	/* in: old table name in the standard
+				databasename/tablename format of InnoDB, or
+				NULL if we do the rename based on the space
+				id only */
+	ulint	id,		/* in: space id */
+	char*	new_name)	/* in: new table name in the standard
+				databasename/tablename format of InnoDB */
+{
+	fil_system_t*	system		= fil_system;
+	ibool		success;
 	fil_space_t*	space;
-	fil_node_t*	fil_node;
-	fil_system_t*	system 		= fil_system;
+	fil_node_t*	node;
+	ulint		count		= 0;
+	char*		path		= NULL;
+	ibool		old_name_was_specified 		= TRUE;
+	char		old_path[OS_FILE_MAX_PATH];
+
+	ut_a(id != 0);
 	
+	if (old_name == NULL) {
+		old_name = (char*)"(name not specified)";
+		old_name_was_specified = FALSE;
+	}
+retry:
+	count++;
+
+	if (count > 1000) {
+		ut_print_timestamp(stderr);
+		fprintf(stderr,
+"  InnoDB: Warning: problems renaming %s to %s, %lu iterations\n",
+						old_name, new_name,
+					        (ulong) count);
+	}
+
 	mutex_enter(&(system->mutex));
 
 	HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
 
-	HASH_DELETE(fil_space_t, hash, system->spaces, id, space);
+	if (space == NULL) {
+		fprintf(stderr,
+"InnoDB: Error: cannot find space id %lu from the tablespace memory cache\n"
+"InnoDB: though the table %s in a rename operation should have that id\n",
+							  (ulong) id, old_name);
+		mutex_exit(&(system->mutex));
 
-	UT_LIST_REMOVE(space_list, system->space_list, space);
+		return(FALSE);
+	}
 
-	ut_a(space->magic_n == FIL_SPACE_MAGIC_N);
+	if (count > 25000) {
+		space->stop_ios = FALSE;
+		mutex_exit(&(system->mutex));
 
-	fil_node = UT_LIST_GET_FIRST(space->chain);
+		return(FALSE);
+	}
 
-	ut_d(UT_LIST_VALIDATE(chain, fil_node_t, space->chain));
+	/* We temporarily close the .ibd file because we do not trust that
+	operating systems can rename an open file. For the closing we have to
+	wait until there are no pending i/o's or flushes on the file. */
 
-	while (fil_node != NULL) {
-		fil_node_free(fil_node, system, space);
+	space->stop_ios = TRUE;
 
-		fil_node = UT_LIST_GET_FIRST(space->chain);
-	}	
+	ut_a(UT_LIST_GET_LEN(space->chain) == 1);
+	node = UT_LIST_GET_FIRST(space->chain);
+
+	if (node->n_pending > 0 || node->n_pending_flushes > 0) {
+		/* There are pending i/o's or flushes, sleep for a while and
+		retry */
+
+		mutex_exit(&(system->mutex));
+
+		os_thread_sleep(20000);
+
+		goto retry;
+
+	} else if (node->modification_counter > node->flush_counter) {
+		/* Flush the space */
+
+		mutex_exit(&(system->mutex));
+
+		os_thread_sleep(20000);
+
+		fil_flush(id);
+
+		goto retry;
+
+	} else if (node->open) {
+		/* Close the file */
+
+		fil_node_close_file(node, system);
+	}
+
+	/* Check that the old name in the space is right */
+
+	if (old_name_was_specified) {
+		ut_a(strlen(old_name) + strlen(fil_path_to_mysql_datadir)
+						< OS_FILE_MAX_PATH - 10);
+		sprintf(old_path, "%s/%s.ibd", fil_path_to_mysql_datadir,
+								old_name);
+		srv_normalize_path_for_win(old_path);
+
+		ut_a(strcmp(space->name, old_path) == 0);
+		ut_a(strcmp(node->name, old_path) == 0);
+	} else {
+		sprintf(old_path, "%s", space->name);
+	}
+
+	/* Rename the tablespace and the node in the memory cache */
 	
-	ut_d(UT_LIST_VALIDATE(chain, fil_node_t, space->chain));
-	ut_ad(0 == UT_LIST_GET_LEN(space->chain));
+	ut_a(strlen(new_name) + strlen(fil_path_to_mysql_datadir)
+						< OS_FILE_MAX_PATH - 10);
+	path = mem_alloc(OS_FILE_MAX_PATH);
+
+	sprintf(path, "%s/%s.ibd", fil_path_to_mysql_datadir, new_name);
+
+	srv_normalize_path_for_win(path);
+
+	success = fil_rename_tablespace_in_mem(space, node, path);
+
+	if (!success) {
+
+		goto func_exit;	
+	}
+
+	success = os_file_rename(old_path, path);
+
+	if (!success) {
+		/* We have to revert the changes we made to the tablespace
+		memory cache */
+
+		ut_a(fil_rename_tablespace_in_mem(space, node, old_path));
+	}
+
+func_exit:
+	if (path) {
+		mem_free(path);
+	}
+	space->stop_ios = FALSE;
 
 	mutex_exit(&(system->mutex));
 
-	mem_free(space->name);
-	mem_free(space);
+#ifndef UNIV_HOTBACKUP	
+	if (success) {
+		mtr_t		mtr;
+
+		mtr_start(&mtr);
+
+		fil_op_write_log(MLOG_FILE_RENAME, id, old_name, new_name,
+								&mtr);
+		mtr_commit(&mtr);
+	}
+#endif
+	return(success);
 }
 
 /***********************************************************************
-Returns the size of the space in pages. */
+Creates a new single-table tablespace to a database directory of MySQL.
+Database directories are under the 'datadir' of MySQL. The datadir is the
+directory of a running mysqld program. We can refer to it by simply the
+path '.'. */
 
 ulint
-fil_space_get_size(
-/*===============*/
-			/* out: space size */
-	ulint	id)	/* in: space id */
+fil_create_new_single_table_tablespace(
+/*===================================*/
+				/* out: DB_SUCCESS or error code */
+	ulint*	space_id,	/* in/out: space id; if this is != 0, then
+				this is an input parameter, otherwise
+				output */
+	char*	tablename,	/* in: the table name in the usual
+				databasename/tablename format of InnoDB */
+	ulint	size)		/* in: the initial size of the tablespace file
+				in pages, must be >= FIL_IBD_FILE_INITIAL_SIZE
+				*/
+{
+	os_file_t       file;
+	ibool		ret;
+	ulint		err;
+	byte*		page;
+	ibool		success;
+	char		path[OS_FILE_MAX_PATH];
+
+	ut_a(size >= FIL_IBD_FILE_INITIAL_SIZE);
+
+	ut_a(strlen(tablename) + strlen(fil_path_to_mysql_datadir)
+						< OS_FILE_MAX_PATH - 10);
+	sprintf(path, "%s/%s.ibd", fil_path_to_mysql_datadir, tablename);
+
+	srv_normalize_path_for_win(path);
+	
+	file = os_file_create(path, OS_FILE_CREATE, OS_FILE_NORMAL,
+						    OS_DATA_FILE, &ret);
+	if (ret == FALSE) {
+		ut_print_timestamp(stderr);
+		fprintf(stderr,
+"  InnoDB: Error creating file %s.\n", path);
+
+		/* The following call will print an error message */
+		 
+		err = os_file_get_last_error(TRUE);
+		
+		if (err == OS_FILE_ALREADY_EXISTS) {
+		        fprintf(stderr,
+"InnoDB: The file already exists though the corresponding table did not\n"
+"InnoDB: exist in the InnoDB data dictionary. Have you moved InnoDB\n"
+"InnoDB: .ibd files around without using the SQL commands\n"
+"InnoDB: DISCARD TABLESPACE and IMPORT TABLESPACE, or did\n"
+"InnoDB: mysqld crash in the middle of CREATE TABLE? You can\n"
+"InnoDB: resolve the problem by removing the file %s\n"
+"InnoDB: under the 'datadir' of MySQL.\n", path);
+
+			return(DB_TABLESPACE_ALREADY_EXISTS);
+		}
+
+		if (err == OS_FILE_DISK_FULL) {
+
+			return(DB_OUT_OF_FILE_SPACE);
+		}
+
+		return(DB_ERROR);
+	}
+
+	page = ut_malloc(UNIV_PAGE_SIZE);
+
+	ret = os_file_set_size(path, file, size * UNIV_PAGE_SIZE, 0);
+	
+	if (!ret) {
+		ut_free(page);
+		os_file_close(file);
+		os_file_delete(path);
+
+		return(DB_OUT_OF_FILE_SPACE);
+	}
+
+	if (*space_id == 0) {
+		*space_id = fil_assign_new_space_id();
+	}
+
+	/* printf("Creating tablespace %s id %lu\n", path, *space_id); */
+
+	if (*space_id == ULINT_UNDEFINED) {
+		ut_free(page);
+		os_file_close(file);
+		os_file_delete(path);
+
+		return(DB_ERROR);
+	}
+
+	/* We have to write the space id to the file immediately and flush the
+	file to disk. This is because in crash recovery we must be aware what
+	tablespaces exist and what are their space id's, so that we can apply
+	the log records to the right file. It may take quite a while until
+	buffer pool flush algorithms write anything to the file and flush it to
+	disk. If we would not write here anything, the file would be filled
+	with zeros from the call of os_file_set_size(), until a buffer pool
+	flush would write to it. */
+
+	memset(page, '\0', UNIV_PAGE_SIZE);
+
+	fsp_header_write_space_id(page, *space_id);		
+
+	buf_flush_init_for_writing(page, ut_dulint_zero, *space_id, 0);
+
+	ret = os_file_write(path, file, page, 0, 0, UNIV_PAGE_SIZE);
+
+	ut_free(page);
+
+	if (!ret) {
+		fprintf(stderr,
+"InnoDB: Error: could not write the first page to tablespace %s\n", path);
+
+		os_file_close(file);
+		os_file_delete(path);
+
+		return(DB_ERROR);
+	}
+
+	ret = os_file_flush(file);
+
+	if (!ret) {
+		fprintf(stderr,
+"InnoDB: Error: file flush of tablespace %s failed\n", path);
+
+		os_file_close(file);
+		os_file_delete(path);
+
+		return(DB_ERROR);
+	}
+
+	os_file_close(file);
+
+	if (*space_id == ULINT_UNDEFINED) {
+		os_file_delete(path);
+
+		return(DB_ERROR);
+	}
+
+	success = fil_space_create(path, *space_id, FIL_TABLESPACE);
+	
+	if (!success) {
+		os_file_delete(path);
+
+		return(DB_ERROR);
+	}	
+
+	fil_node_create(path, size, *space_id, FALSE);
+
+#ifndef UNIV_HOTBACKUP	
+	{
+	mtr_t		mtr;
+
+	mtr_start(&mtr);
+
+	fil_op_write_log(MLOG_FILE_CREATE, *space_id, tablename, NULL, &mtr);
+
+	mtr_commit(&mtr);
+	}
+#endif
+	return(DB_SUCCESS);
+}
+
+/************************************************************************
+It is possible, though very improbable, that the lsn's in the tablespace to be
+imported have risen above the current system lsn, if a lengthy purge, ibuf
+merge, or rollback was performed on a backup taken with ibbackup. If that is
+the case, reset page lsn's in the file. We assume that mysqld was shut down
+after it performed these cleanup operations on the .ibd file, so that it at
+the shutdown stamped the latest lsn to the FIL_PAGE_FILE_FLUSH_LSN in the
+first page of the .ibd file, and we can determine whether we need to reset the
+lsn's just by looking at that flush lsn. */
+
+ibool
+fil_reset_too_high_lsns(
+/*====================*/
+				/* out: TRUE if success */
+	char*	name,		/* in: table name in the databasename/tablename
+				format */
+	dulint	current_lsn)	/* in: reset lsn's if the lsn stamped to
+				FIL_PAGE_FILE_FLUSH_LSN in the first page is
+				too high */
+{
+	os_file_t	file;
+	char*		filepath;
+	byte*		page;
+	dulint		flush_lsn;
+	ulint		space_id;
+	ib_longlong	file_size;
+	ib_longlong	offset;
+	ulint		page_no;
+	ibool		success;
+
+	filepath = ut_malloc(OS_FILE_MAX_PATH);
+
+	ut_a(strlen(name) < OS_FILE_MAX_PATH - 10);
+
+	sprintf(filepath, "%s/%s.ibd", fil_path_to_mysql_datadir, name);
+					
+	srv_normalize_path_for_win(filepath);
+
+	file = os_file_create_simple_no_error_handling(filepath, OS_FILE_OPEN,
+						OS_FILE_READ_WRITE, &success);
+	if (!success) {
+		ut_free(filepath);
+
+		return(FALSE);
+	}
+
+	/* Read the first page of the tablespace */
+
+	page = ut_malloc(UNIV_PAGE_SIZE);
+
+	success = os_file_read(file, page, 0, 0, UNIV_PAGE_SIZE);
+	if (!success) {
+
+		goto func_exit;
+	}
+
+	/* We have to read the file flush lsn from the header of the file */
+
+	flush_lsn = mach_read_from_8(page + FIL_PAGE_FILE_FLUSH_LSN);
+
+	if (ut_dulint_cmp(current_lsn, flush_lsn) >= 0) {
+		/* Ok */
+		success = TRUE;
+
+		goto func_exit;
+	}
+
+	space_id = fsp_header_get_space_id(page);
+	
+	ut_print_timestamp(stderr);
+	fprintf(stderr,
+" InnoDB: Flush lsn in the tablespace file %lu to be imported\n"
+"InnoDB: is %lu %lu, which exceeds current system lsn %lu %lu.\n"
+"InnoDB: We reset the lsn's in the file %s.\n",
+			    (ulong) space_id,
+			    (ulong) ut_dulint_get_high(flush_lsn),
+			    (ulong) ut_dulint_get_low(flush_lsn),
+			    (ulong) ut_dulint_get_high(current_lsn),
+			    (ulong) ut_dulint_get_low(current_lsn), filepath);
+
+	/* Loop through all the pages in the tablespace and reset the lsn and
+	the page checksum if necessary */
+
+	file_size = os_file_get_size_as_iblonglong(file);
+
+	for (offset = 0; offset < file_size; offset += UNIV_PAGE_SIZE) {
+		success = os_file_read(file, page,
+				(ulint)(offset & 0xFFFFFFFFUL),
+				(ulint)(offset >> 32), UNIV_PAGE_SIZE);
+		if (!success) {
+
+			goto func_exit;
+		}
+		if (ut_dulint_cmp(mach_read_from_8(page + FIL_PAGE_LSN),
+				  current_lsn) > 0) {
+			/* We have to reset the lsn */
+			space_id = mach_read_from_4(page
+					+ FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
+			page_no = mach_read_from_4(page + FIL_PAGE_OFFSET);
+			
+			buf_flush_init_for_writing(page, current_lsn, space_id,
+								      page_no);
+			success = os_file_write(filepath, file, page,
+				(ulint)(offset & 0xFFFFFFFFUL),
+				(ulint)(offset >> 32), UNIV_PAGE_SIZE);
+			if (!success) {
+
+				goto func_exit;
+			}
+		}
+	}
+
+	success = os_file_flush(file);
+	if (!success) {
+
+		goto func_exit;
+	}
+
+	/* We now update the flush_lsn stamp at the start of the file */
+	success = os_file_read(file, page, 0, 0, UNIV_PAGE_SIZE);
+	if (!success) {
+
+		goto func_exit;
+	}
+
+	mach_write_to_8(page + FIL_PAGE_FILE_FLUSH_LSN, current_lsn);
+
+	success = os_file_write(filepath, file, page, 0, 0, UNIV_PAGE_SIZE);
+	if (!success) {
+
+		goto func_exit;
+	}
+	success = os_file_flush(file);
+func_exit:
+	os_file_close(file);
+	ut_free(page);
+	ut_free(filepath);
+
+	return(success);
+}
+
+/************************************************************************
+Tries to open a single-table tablespace and checks the space id is right in
+it. If does not succeed, prints an error message to the .err log. This
+function is used to open the tablespace when we load a table definition
+to the dictionary cache. NOTE that we assume this operation is used under the
+protection of the dictionary mutex, so that two users cannot race here. This
+operation does not leave the file associated with the tablespace open, but
+closes it after we have looked at the space id in it. */
+
+ibool
+fil_open_single_table_tablespace(
+/*=============================*/
+			/* out: TRUE if success */
+	ulint	id,	/* in: space id */
+	char*	name)	/* in: table name in the databasename/tablename
+			format */
+{
+	os_file_t	file;
+	char*		filepath;
+	ibool		success;
+	byte*		page;
+	ulint		space_id;
+	ibool		ret		= TRUE;
+
+	filepath = ut_malloc(OS_FILE_MAX_PATH);
+
+	ut_a(strlen(name) < OS_FILE_MAX_PATH - 10);
+
+	sprintf(filepath, "%s/%s.ibd", fil_path_to_mysql_datadir, name);
+					
+	srv_normalize_path_for_win(filepath);
+
+	file = os_file_create_simple_no_error_handling(filepath, OS_FILE_OPEN,
+						OS_FILE_READ_ONLY, &success);
+	if (!success) {
+		/* The following call prints an error message */
+		os_file_get_last_error(TRUE);
+
+		ut_print_timestamp(stderr);
+
+	        fprintf(stderr,
+"  InnoDB: Error: trying to open a table, but could not\n"
+"InnoDB: open the tablespace file %s!\n", filepath);
+		fprintf(stderr,
+"InnoDB: have you moved InnoDB .ibd files around without using the\n"
+"InnoDB: commands DISCARD TABLESPACE and IMPORT TABLESPACE?\n"
+"InnoDB: You can look from section 15.1 of http://www.innodb.com/ibman.html\n"
+"InnoDB: how to resolve the issue.\n");
+
+		ut_free(filepath);
+
+		return(FALSE);
+	}
+
+	/* Read the first page of the tablespace */
+
+	page = ut_malloc(UNIV_PAGE_SIZE);
+
+	success = os_file_read(file, page, 0, 0, UNIV_PAGE_SIZE);
+
+	/* We have to read the tablespace id from the file */
+
+	space_id = fsp_header_get_space_id(page);
+
+	if (space_id != id) {
+		ut_print_timestamp(stderr);
+
+	        fprintf(stderr,
+"  InnoDB: Error: tablespace id in file %s is %lu, but in the InnoDB\n"
+"InnoDB: data dictionary it is %lu.\n", filepath, (ulong) space_id, (ulong) id);
+		fprintf(stderr,
+"InnoDB: Have you moved InnoDB .ibd files around without using the\n"
+"InnoDB: commands DISCARD TABLESPACE and IMPORT TABLESPACE?\n"
+"InnoDB: You can look from section 15.1 of http://www.innodb.com/ibman.html\n"
+"InnoDB: how to resolve the issue.\n");
+
+		ret = FALSE;
+
+		goto func_exit;
+	}
+
+	success = fil_space_create(filepath, space_id, FIL_TABLESPACE);
+
+	if (!success) {
+		goto func_exit;
+	}
+
+	/* We do not measure the size of the file, that is why we pass the 0
+	below */
+
+	fil_node_create(filepath, 0, space_id, FALSE);
+func_exit:
+	os_file_close(file);
+	ut_free(page);
+	ut_free(filepath);
+
+	return(ret);
+}
+
+/************************************************************************
+Opens an .ibd file and adds the associated single-table tablespace to the
+InnoDB fil0fil.c data structures. */
+static
+void
+fil_load_single_table_tablespace(
+/*=============================*/
+	char*	dbname,		/* in: database name */
+	char*	filename)	/* in: file name (not a path), including the
+				.ibd extension */
 {
+	os_file_t	file;
+	char*		filepath;
+	ibool		success;
+	byte*		page;
+	ulint		space_id;
+	ulint		size_low;
+	ulint		size_high;
+	ib_longlong	size;
+#ifdef UNIV_HOTBACKUP
+	fil_space_t*	space;
+#endif
+	filepath = ut_malloc(OS_FILE_MAX_PATH);
+
+	ut_a(strlen(dbname) + strlen(filename) 
+	+ strlen(fil_path_to_mysql_datadir) < OS_FILE_MAX_PATH - 100);
+
+	sprintf(filepath, "%s/%s/%s", fil_path_to_mysql_datadir, dbname,
+								filename);
+	srv_normalize_path_for_win(filepath);
+
+	file = os_file_create_simple_no_error_handling(filepath, OS_FILE_OPEN,
+						OS_FILE_READ_ONLY, &success);
+	if (!success) {
+		/* The following call prints an error message */
+		os_file_get_last_error(TRUE);
+
+	        fprintf(stderr,
+"InnoDB: Error: could not open single-table tablespace file\n"
+"InnoDB: %s!", filepath);
+
+		ut_free(filepath);
+
+		return;
+	}
+
+	success = os_file_get_size(file, &size_low, &size_high);
+
+	if (!success) {
+		/* The following call prints an error message */
+		os_file_get_last_error(TRUE);
+
+	        fprintf(stderr,
+"InnoDB: Error: could not measure the size of single-table tablespace file\n"
+"InnoDB: %s!", filepath);
+
+		os_file_close(file);
+		ut_free(filepath);
+
+		return;
+	}
+
+	/* Every .ibd file is created >= 4 pages in size. Smaller files
+	cannot be ok. */
+
+	size = (((ib_longlong)size_high) << 32) + (ib_longlong)size_low;
+#ifndef UNIV_HOTBACKUP
+	if (size < FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE) {
+	        fprintf(stderr,
+"InnoDB: Error: the size of single-table tablespace file %s\n"
+"InnoDB: is only %lu %lu, should be at least %lu!", filepath,
+			(ulong) size_high,
+			(ulong) size_low, (ulong) (4 * UNIV_PAGE_SIZE));
+		os_file_close(file);
+		ut_free(filepath);
+
+		return;
+	}
+#endif
+	/* Read the first page of the tablespace if the size big enough */
+
+	page = ut_malloc(UNIV_PAGE_SIZE);
+
+	if (size >= FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE) {
+		success = os_file_read(file, page, 0, 0, UNIV_PAGE_SIZE);
+
+		/* We have to read the tablespace id from the file */
+
+		space_id = fsp_header_get_space_id(page);
+	} else {
+		space_id = ULINT_UNDEFINED;
+	}
+
+#ifndef UNIV_HOTBACKUP
+	if (space_id == ULINT_UNDEFINED || space_id == 0) {
+	        fprintf(stderr,
+"InnoDB: Error: tablespace id %lu in file %s is not sensible\n",
+			(ulong) space_id,
+			filepath);
+		goto func_exit;
+	}
+#else
+	if (space_id == ULINT_UNDEFINED || space_id == 0) {
+		char*	new_path;
+
+		fprintf(stderr,
+"InnoDB: Renaming tablespace %s of id %lu,\n"
+"InnoDB: to %s_ibbackup_old_vers_<timestamp>\n"
+"InnoDB: because its size %lld is too small (< 4 pages 16 kB each),\n"
+"InnoDB: or the space id in the file header is not sensible.\n"
+"InnoDB: This can happen in an ibbackup run, and is not dangerous.\n",
+				filepath, space_id, filepath, size);
+		os_file_close(file);
+
+		new_path = ut_malloc(OS_FILE_MAX_PATH);
+
+		sprintf(new_path, "%s_ibbackup_old_vers_", filepath);
+		ut_sprintf_timestamp_without_extra_chars(
+					new_path + ut_strlen(new_path));
+		ut_a(os_file_rename(filepath, new_path));
+
+		ut_free(page);
+		ut_free(filepath);
+		ut_free(new_path);
+
+		return;
+	}
+
+	/* A backup may contain the same space several times, if the space got
+	renamed at a sensitive time. Since it is enough to have one version of
+	the space, we rename the file if a space with the same space id
+	already exists in the tablespace memory cache. We rather rename the
+	file than delete it, because if there is a bug, we do not want to
+	destroy valuable data. */
+
+	mutex_enter(&(fil_system->mutex));
+
+	space = fil_get_space_for_id_low(space_id);
+
+	if (space) {
+		char*	new_path;
+
+		fprintf(stderr,
+"InnoDB: Renaming tablespace %s of id %lu,\n"
+"InnoDB: to %s_ibbackup_old_vers_<timestamp>\n"
+"InnoDB: because space %s with the same id\n"
+"InnoDB: was scanned earlier. This can happen if you have renamed tables\n"
+"InnoDB: during an ibbackup run.\n", filepath, space_id, filepath,
+								space->name);
+		os_file_close(file);
+
+		new_path = ut_malloc(OS_FILE_MAX_PATH);
+
+		sprintf(new_path, "%s_ibbackup_old_vers_", filepath);
+		ut_sprintf_timestamp_without_extra_chars(
+					new_path + ut_strlen(new_path));
+		mutex_exit(&(fil_system->mutex));
+
+		ut_a(os_file_rename(filepath, new_path));
+
+		ut_free(page);
+		ut_free(filepath);
+		ut_free(new_path);
+
+		return;
+	}
+	mutex_exit(&(fil_system->mutex));
+#endif
+	success = fil_space_create(filepath, space_id, FIL_TABLESPACE);
+
+	if (!success) {
+
+		goto func_exit;
+	}
+
+	/* We do not measure the size of the file, that is why we pass the 0
+	below */
+
+	fil_node_create(filepath, 0, space_id, FALSE);
+func_exit:
+	os_file_close(file);
+	ut_free(page);
+	ut_free(filepath);
+}
+
+/************************************************************************
+At the server startup, if we need crash recovery, scans the database
+directories under the MySQL datadir, looking for .ibd files. Those files are
+single-table tablespaces. We need to know the space id in each of them so that
+we know into which file we should look to check the contents of a page stored
+in the doublewrite buffer, also to know where to apply log records where the
+space id is != 0. */
+
+ulint
+fil_load_single_table_tablespaces(void)
+/*===================================*/
+			/* out: DB_SUCCESS or error number */
+{
+	int		ret;
+	char*		dbpath;
+	os_file_dir_t	dir;
+	os_file_dir_t	dbdir;
+	os_file_stat_t	dbinfo;
+	os_file_stat_t	fileinfo;
+
+	/* The datadir of MySQL is always the default directory of mysqld */
+
+	dir = os_file_opendir(fil_path_to_mysql_datadir, TRUE);
+
+	if (dir == NULL) {
+
+		return(DB_ERROR);
+	}
+
+	dbpath = ut_malloc(OS_FILE_MAX_PATH);
+
+	/* Scan all directories under the datadir. They are the database
+	directories of MySQL. */
+
+	ret = os_file_readdir_next_file(fil_path_to_mysql_datadir, dir,
+								&dbinfo);
+	while (ret == 0) {
+		/* printf("Looking at %s in datadir\n", dbinfo.name); */
+
+		if (dbinfo.type == OS_FILE_TYPE_FILE
+		    || dbinfo.type == OS_FILE_TYPE_UNKNOWN) {
+
+		        goto next_datadir_item;
+		}
+
+		/* We found a symlink or a directory; try opening it to see
+		if a symlink is a directory */
+		
+		ut_a(strlen(dbinfo.name) < OS_FILE_MAX_PATH - 10);
+
+		sprintf(dbpath, "%s/%s", fil_path_to_mysql_datadir,
+								dbinfo.name);
+		srv_normalize_path_for_win(dbpath);
+
+		dbdir = os_file_opendir(dbpath, FALSE);
+
+		if (dbdir != NULL) {
+			/* printf("Opened dir %s\n", dbinfo.name); */
+
+			/* We found a database directory; loop through it,
+			looking for possible .ibd files in it */
+
+			ret = os_file_readdir_next_file(dbpath, dbdir,
+								&fileinfo);
+			while (ret == 0) {
+				/* printf(
+"     Looking at file %s\n", fileinfo.name); */
+
+			        if (fileinfo.type == OS_FILE_TYPE_DIR
+				    || dbinfo.type == OS_FILE_TYPE_UNKNOWN) {
+				        goto next_file_item;
+				}
+
+				/* We found a symlink or a file */
+				if (strlen(fileinfo.name) > 4
+				    && 0 == strcmp(fileinfo.name + 
+						strlen(fileinfo.name) - 4,
+						".ibd")) {
+				        /* The name ends in .ibd; try opening
+					the file */
+					fil_load_single_table_tablespace(
+						dbinfo.name, fileinfo.name);
+				}
+next_file_item:
+				ret = os_file_readdir_next_file(dbpath, dbdir,
+								&fileinfo);
+			}
+
+			if (0 != os_file_closedir(dbdir)) {
+				 fprintf(stderr,
+"InnoDB: Warning: could not close database directory %s\n", dbpath);
+			}
+		}
+		
+next_datadir_item:
+		ret = os_file_readdir_next_file(fil_path_to_mysql_datadir,
+								dir, &dbinfo);
+	}
+
+	ut_free(dbpath);
+
+	/* At the end of directory we should get 1 as the return value, -1
+	if there was an error */
+	if (ret != 1) {
+		fprintf(stderr,
+"InnoDB: Error: os_file_readdir_next_file returned %d in MySQL datadir\n",
+							       ret);
+		os_file_closedir(dir);
+
+		return(DB_ERROR);
+	}
+
+	if (0 != os_file_closedir(dir)) {
+		fprintf(stderr,
+"InnoDB: Error: could not close MySQL datadir\n");
+
+		return(DB_ERROR);
+	}
+
+	return(DB_SUCCESS);
+}
+
+/************************************************************************
+If we need crash recovery, and we have called
+fil_load_single_table_tablespaces() and dict_load_single_table_tablespaces(),
+we can call this function to print an error message of orphaned .ibd files
+for which there is not a data dictionary entry with a matching table name
+and space id. */
+
+void
+fil_print_orphaned_tablespaces(void)
+/*================================*/
+{
+	fil_system_t*	system 		= fil_system;
+	fil_space_t*	space;
+
+	mutex_enter(&(system->mutex));
+
+	space = UT_LIST_GET_FIRST(system->space_list);
+
+	while (space) {
+	        if (space->purpose == FIL_TABLESPACE && space->id != 0
+							  && !space->mark) {
+			fprintf(stderr,
+"InnoDB: Warning: tablespace %s of id %lu has no matching table in\n"
+"InnoDB: the InnoDB data dictionary.\n", space->name, (ulong) space->id);
+		}
+
+		space = UT_LIST_GET_NEXT(space_list, space);
+	}
+
+	mutex_exit(&(system->mutex));	
+}
+
+/***********************************************************************
+Returns TRUE if a single-table tablespace does not exist in the memory cache,
+or is being deleted there. */
+
+ibool
+fil_tablespace_deleted_or_being_deleted_in_mem(
+/*===========================================*/
+				/* out: TRUE if does not exist or is being\
+				deleted */
+	ulint		id,	/* in: space id */
+	ib_longlong	version)/* in: tablespace_version should be this; if
+				you pass -1 as the value of this, then this
+				parameter is ignored */
+{
+	fil_system_t*	system	= fil_system;
 	fil_space_t*	space;
-	fil_system_t*	system		= fil_system;
-	ulint		size;
 
 	ut_ad(system);
 
@@ -729,29 +2892,36 @@ fil_space_get_size(
 
 	HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
 
-	size = space->size;
-	
+	if (space == NULL || space->is_being_deleted) {
+		mutex_exit(&(system->mutex));
+
+		return(TRUE);
+	}
+
+	if (version != ((ib_longlong)-1)
+				&& space->tablespace_version != version) {
+		mutex_exit(&(system->mutex));
+
+		return(TRUE);
+	}
+
 	mutex_exit(&(system->mutex));
 
-	return(size);
+	return(FALSE);
 }
 
 /***********************************************************************
-Checks if the pair space, page_no refers to an existing page in a
-tablespace file space. */
+Returns TRUE if a single-table tablespace exists in the memory cache. */
 
 ibool
-fil_check_adress_in_tablespace(
-/*===========================*/
-			/* out: TRUE if the address is meaningful */
-	ulint	id,	/* in: space id */
-	ulint	page_no)/* in: page number */
+fil_tablespace_exists_in_mem(
+/*=========================*/
+			/* out: TRUE if exists */
+	ulint	id)	/* in: space id */
 {
-	fil_space_t*	space;
 	fil_system_t*	system		= fil_system;
-	ulint		size;
-	ibool		ret;
-	
+	fil_space_t*	space;
+
 	ut_ad(system);
 
 	mutex_enter(&(system->mutex));
@@ -759,23 +2929,356 @@ fil_check_adress_in_tablespace(
 	HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
 
 	if (space == NULL) {
-		ret = FALSE;
-	} else {
-		size = space->size;
+		mutex_exit(&(system->mutex));
+
+		return(FALSE);
+	}
+
+	mutex_exit(&(system->mutex));
+
+	return(TRUE);
+}
+
+/***********************************************************************
+Returns TRUE if a matching tablespace exists in the InnoDB tablespace memory
+cache. Note that if we have not done a crash recovery at the database startup,
+there may be many tablespaces which are not yet in the memory cache. */
+
+ibool
+fil_space_for_table_exists_in_mem(
+/*==============================*/
+				/* out: TRUE if a matching tablespace exists
+				in the memory cache */
+	ulint	id,		/* in: space id */
+	char*	name,		/* in: table name in the standard
+				'databasename/tablename' format */
+	ibool	mark_space,	/* in: in crash recovery, at database startup
+				we mark all spaces which have an associated
+				table in the InnoDB data dictionary, so that
+				we can print a warning about orphaned
+				tablespaces */
+	ibool	print_error_if_does_not_exist)
+				/* in: print detailed error information to
+				the .err log if a matching tablespace is
+				not found from memory */
+{
+	fil_system_t*	system		= fil_system;
+	fil_space_t*	namespace;
+	fil_space_t*	space;
+	char		path[OS_FILE_MAX_PATH];
+
+	ut_ad(system);
+
+	mutex_enter(&(system->mutex));
+
+	sprintf(path, "%s/%s.ibd", fil_path_to_mysql_datadir, name);
+	srv_normalize_path_for_win(path);
+
+	/* Look if there is a space with the same id */
+
+	HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
+
+	/* Look if there is a space with the same name; the name is the
+	directory path from the datadir to the file */
+
+	HASH_SEARCH(name_hash, system->name_hash,
+					ut_fold_string(path), namespace,
+					0 == strcmp(namespace->name, path));
+	if (space && space == namespace) {
+		/* Found */
+		
+		if (mark_space) {
+			space->mark = TRUE;
+		}
+
+		mutex_exit(&(system->mutex));
+
+		return(TRUE);
+	}
+
+	if (!print_error_if_does_not_exist) {
+		
+		mutex_exit(&(system->mutex));
+		
+		return(FALSE);
+	}
 
-		if (page_no > size) {
-			ret = FALSE;
-		} else if (space->purpose != FIL_TABLESPACE) {
-			ret = FALSE;
+	if (space == NULL) {
+		if (namespace == NULL) {
+		        ut_print_timestamp(stderr);
+			fprintf(stderr,
+"  InnoDB: Error: table %s\n"
+"InnoDB: in InnoDB data dictionary has tablespace id %lu,\n"
+"InnoDB: but tablespace with that id or name does not exist. Have\n"
+"InnoDB: you deleted or moved .ibd files?\n",
+			       name, (ulong) id);
 		} else {
-			ret = TRUE;
+		        ut_print_timestamp(stderr);
+			fprintf(stderr,
+"  InnoDB: Error: table %s\n"
+"InnoDB: in InnoDB data dictionary has tablespace id %lu,\n"
+"InnoDB: but tablespace with that id does not exist. There is\n"
+"InnoDB: a tablespace of name %s and id %lu, though. Have\n"
+"InnoDB: you deleted or moved .ibd files?\n",
+			        name, (ulong) id, namespace->name,
+				(ulong) namespace->id);
+		}
+		fprintf(stderr,
+"InnoDB: You can look from section 15.1 of http://www.innodb.com/ibman.html\n"
+"InnoDB: how to resolve the issue.\n");
+
+		mutex_exit(&(system->mutex));
+
+		return(FALSE);
+	}
+
+	if (0 != strcmp(space->name, path)) {
+		ut_print_timestamp(stderr);
+		fprintf(stderr,
+"  InnoDB: Error: table %s\n"
+"InnoDB: in InnoDB data dictionary has tablespace id %lu,\n"
+"InnoDB: but tablespace with that id has name %s.\n"
+"InnoDB: Have you deleted or moved .ibd files?\n", name, (ulong) id, space->name);
+
+		if (namespace != NULL) {
+			fprintf(stderr,
+"InnoDB: There is a tablespace with the right name\n"
+"InnoDB: %s, but its id is %lu.\n", namespace->name, (ulong) namespace->id);
 		}
+
+		fprintf(stderr,
+"InnoDB: You can look from section 15.1 of http://www.innodb.com/ibman.html\n"
+"InnoDB: how to resolve the issue.\n");
+
+		mutex_exit(&(system->mutex));
+
+		return(FALSE);
+	}
+
+	mutex_exit(&(system->mutex));
+
+	return(FALSE);
+}
+
+/***********************************************************************
+Checks if a single-table tablespace for a given table name exists in the
+tablespace memory cache. */
+static
+ulint
+fil_get_space_id_for_table(
+/*=======================*/
+				/* out: space id, ULINT_UNDEFINED if not
+				found */
+	char*	name)		/* in: table name in the standard
+				'databasename/tablename' format */
+{
+	fil_system_t*	system		= fil_system;
+	fil_space_t*	namespace;
+	ulint		id		= ULINT_UNDEFINED;
+	char		path[OS_FILE_MAX_PATH];
+
+	ut_ad(system);
+
+	mutex_enter(&(system->mutex));
+
+	sprintf(path, "%s/%s.ibd", fil_path_to_mysql_datadir, name);
+	srv_normalize_path_for_win(path);
+
+	/* Look if there is a space with the same name; the name is the
+	directory path to the file */
+
+	HASH_SEARCH(name_hash, system->name_hash,
+					ut_fold_string(path), namespace,
+					0 == strcmp(namespace->name, path));
+	if (namespace) {
+		id = namespace->id;
+	}	
+
+	mutex_exit(&(system->mutex));
+
+	return(id);
+}
+
+/**************************************************************************
+Tries to extend a data file so that it would accommodate the number of pages
+given. The tablespace must be cached in the memory cache. If the space is big
+enough already, does nothing. */
+
+ibool
+fil_extend_space_to_desired_size(
+/*=============================*/
+				/* out: TRUE if success */
+	ulint*	actual_size,	/* out: size of the space after extension;
+				if we ran out of disk space this may be lower
+				than the desired size */
+	ulint	space_id,	/* in: space id, must be != 0 */
+	ulint	size_after_extend)/* in: desired size in pages after the
+				extension; if the current space size is bigger
+				than this already, the function does nothing */
+{
+	fil_system_t*	system		= fil_system;
+	fil_node_t*	node;
+	fil_space_t*	space;
+	byte*		buf2;
+	byte*		buf;
+	ulint		start_page_no;
+	ulint		file_start_page_no;
+	ulint		n_pages;
+	ulint		offset_high;
+	ulint		offset_low;
+	ibool		success		= TRUE;
+
+	fil_mutex_enter_and_prepare_for_io(space_id);
+
+	HASH_SEARCH(hash, system->spaces, space_id, space,
+						space->id == space_id);
+	ut_a(space);
+
+	if (space->size >= size_after_extend) {
+		/* Space already big enough */
+
+		*actual_size = space->size;
+
+		mutex_exit(&(system->mutex));	
+
+		return(TRUE);
 	}
 	
+	node = UT_LIST_GET_LAST(space->chain);
+
+	fil_node_prepare_for_io(node, system, space);
+
+	/* Extend 1 MB at a time */
+
+	buf2 = mem_alloc(1024 * 1024 + UNIV_PAGE_SIZE);
+	buf = ut_align(buf2, UNIV_PAGE_SIZE);
+
+	memset(buf, '\0', 1024 * 1024);
+
+	start_page_no = space->size;
+	file_start_page_no = space->size - node->size;
+
+	while (start_page_no < size_after_extend) {	
+		n_pages = size_after_extend - start_page_no;
+
+		if (n_pages > (1024 * 1024) / UNIV_PAGE_SIZE) {
+			n_pages = (1024 * 1024) / UNIV_PAGE_SIZE;
+		}
+
+		offset_high = (start_page_no - file_start_page_no)
+				/ (4096 * ((1024 * 1024) / UNIV_PAGE_SIZE));
+		offset_low  = ((start_page_no - file_start_page_no)
+				% (4096 * ((1024 * 1024) / UNIV_PAGE_SIZE)))
+			      * UNIV_PAGE_SIZE;
+#ifdef UNIV_HOTBACKUP
+		success = os_file_write(node->name, node->handle, buf,
+					offset_low, offset_high,
+					UNIV_PAGE_SIZE * n_pages);
+#else
+		success = os_aio(OS_FILE_WRITE, OS_AIO_SYNC,
+			node->name, node->handle, buf,
+			offset_low, offset_high,
+			UNIV_PAGE_SIZE * n_pages,
+			NULL, NULL);
+#endif
+		if (success) {
+			node->size += n_pages;
+			space->size += n_pages;
+
+			os_has_said_disk_full = FALSE;
+		} else {
+			/* Let us measure the size of the file to determine
+			how much we were able to extend it */
+			
+			n_pages = ((ulint)
+				(os_file_get_size_as_iblonglong(node->handle)
+				/ UNIV_PAGE_SIZE)) - node->size;
+
+			node->size += n_pages;
+			space->size += n_pages;
+
+			break;
+		}
+
+		start_page_no += n_pages;
+	}
+
+	mem_free(buf2);
+
+	fil_node_complete_io(node, system, OS_FILE_WRITE);
+
+	*actual_size = space->size;
+	/*
+        printf("Extended %s to %lu, actual size %lu pages\n", space->name,
+                                        size_after_extend, *actual_size); */
+	mutex_exit(&(system->mutex));	
+
+	fil_flush(space_id);
+
+	return(success);
+}
+
+#ifdef UNIV_HOTBACKUP
+/************************************************************************
+Extends all tablespaces to the size stored in the space header. During the
+ibbackup --apply-log phase we extended the spaces on-demand so that log records
+could be appllied, but that may have left spaces still too small compared to
+the size stored in the space header. */
+
+void
+fil_extend_tablespaces_to_stored_len(void)
+/*======================================*/
+{
+	fil_system_t*	system 		= fil_system;
+	fil_space_t*	space;
+	byte*		buf;
+	ulint		actual_size;
+	ulint		size_in_header;
+	ulint		error;
+	ibool		success;
+
+	buf = mem_alloc(UNIV_PAGE_SIZE);
+
+	mutex_enter(&(system->mutex));
+
+	space = UT_LIST_GET_FIRST(system->space_list);
+
+	while (space) {
+	        ut_a(space->purpose == FIL_TABLESPACE);
+
+		mutex_exit(&(system->mutex)); /* no need to protect with a
+					      mutex, because this is a single-
+					      threaded operation */
+		error = fil_read(TRUE, space->id, 0, 0, UNIV_PAGE_SIZE, buf,
+									NULL);
+		ut_a(error == DB_SUCCESS);
+
+		size_in_header = fsp_get_size_low(buf);
+
+		success = fil_extend_space_to_desired_size(&actual_size,
+						space->id, size_in_header);
+		if (!success) {
+			fprintf(stderr,
+"InnoDB: Error: could not extend the tablespace of %s\n"
+"InnoDB: to the size stored in header, %lu pages;\n"
+"InnoDB: size after extension %lu pages\n"
+"InnoDB: Check that you have free disk space and retry!\n", space->name,
+					size_in_header, actual_size);
+			exit(1);				
+		}
+
+		mutex_enter(&(system->mutex));
+
+		space = UT_LIST_GET_NEXT(space_list, space);
+	}
+
 	mutex_exit(&(system->mutex));
 
-	return(ret);
+	mem_free(buf);
 }
+#endif
+
+/*========== RESERVE FREE EXTENTS (for a B-tree split, for example) ===*/
 
 /***********************************************************************
 Tries to reserve free extents in a file space. */
@@ -788,8 +3291,8 @@ fil_space_reserve_free_extents(
 	ulint	n_free_now,	/* in: number of free extents now */
 	ulint	n_to_reserve)	/* in: how many one wants to reserve */
 {
-	fil_space_t*	space;
 	fil_system_t*	system		= fil_system;
+	fil_space_t*	space;
 	ibool		success;
 
 	ut_ad(system);
@@ -798,6 +3301,8 @@ fil_space_reserve_free_extents(
 
 	HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
 
+	ut_a(space);
+
 	if (space->n_reserved_extents + n_to_reserve > n_free_now) {
 		success = FALSE;
 	} else {
@@ -819,8 +3324,8 @@ fil_space_release_free_extents(
 	ulint	id,		/* in: space id */
 	ulint	n_reserved)	/* in: how many one reserved */
 {
-	fil_space_t*	space;
 	fil_system_t*	system		= fil_system;
+	fil_space_t*	space;
 
 	ut_ad(system);
 
@@ -828,6 +3333,7 @@ fil_space_release_free_extents(
 
 	HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
 
+	ut_a(space);
 	ut_a(space->n_reserved_extents >= n_reserved);
 	
 	space->n_reserved_extents -= n_reserved;
@@ -844,8 +3350,8 @@ fil_space_get_n_reserved_extents(
 /*=============================*/
 	ulint	id)		/* in: space id */
 {
-	fil_space_t*	space;
 	fil_system_t*	system		= fil_system;
+	fil_space_t*	space;
 	ulint		n;
 
 	ut_ad(system);
@@ -863,208 +3369,99 @@ fil_space_get_n_reserved_extents(
 	return(n);
 }
 
+/*============================ FILE I/O ================================*/
+
 /************************************************************************
+NOTE: you must call fil_mutex_enter_and_prepare_for_io() first!
+
 Prepares a file node for i/o. Opens the file if it is closed. Updates the
 pending i/o's field in the node and the system appropriately. Takes the node
-off the LRU list if it is in the LRU list. */
+off the LRU list if it is in the LRU list. The caller must hold the fil_sys
+mutex. */
 static
 void
 fil_node_prepare_for_io(
 /*====================*/
 	fil_node_t*	node,	/* in: file node */
-	fil_system_t*	system,	/* in: file system */
+	fil_system_t*	system,	/* in: tablespace memory cache */
 	fil_space_t*	space)	/* in: space */
 {
-	ibool		ret;
-	fil_node_t*	last_node;
-
 	ut_ad(node && system && space);
 #ifdef UNIV_SYNC_DEBUG
 	ut_ad(mutex_own(&(system->mutex)));
 #endif /* UNIV_SYNC_DEBUG */
 	
+	if (system->n_open > system->max_n_open + 5) {
+		ut_print_timestamp(stderr);
+		fprintf(stderr,
+"  InnoDB: Warning: open files %lu exceeds the limit %lu\n",
+			(ulong) system->n_open,
+			(ulong) system->max_n_open);
+	}
+
 	if (node->open == FALSE) {
-		/* File is closed */
+		/* File is closed: open it */
 		ut_a(node->n_pending == 0);
 
-		/* If too many files are open, close one */
-
-		if (system->n_open_pending + UT_LIST_GET_LEN(system->LRU)
-						== system->max_n_open) {
-
-		    	ut_a(UT_LIST_GET_LEN(system->LRU) > 0);
-
-			last_node = UT_LIST_GET_LAST(system->LRU);
-
-			if (last_node == NULL) {
-				fprintf(stderr,
-	"InnoDB: Error: cannot close any file to open another for i/o\n"
-	"InnoDB: Pending i/o's on %lu files exist\n",
-					system->n_open_pending);
-
-				ut_error;
-			}
-
-			fil_node_close(last_node, system);
-		}
-
-		if (space->purpose == FIL_LOG) {	
-			node->handle = os_file_create(node->name, OS_FILE_OPEN,
-					OS_FILE_AIO, OS_LOG_FILE, &ret);
-		} else {
-			node->handle = os_file_create(node->name, OS_FILE_OPEN,
-					OS_FILE_AIO, OS_DATA_FILE, &ret);
-		}
-		
-		ut_a(ret);
-		
-		node->open = TRUE;
-
-		system->n_open_pending++;
-		node->n_pending = 1;
-
-		/* File was closed: the node was not in the LRU list */
-
-		return;
+		fil_node_open_file(node, system, space);
 	}
 
-	/* File is open */
-	if (node->n_pending == 0) {
+	if (node->n_pending == 0 && space->purpose == FIL_TABLESPACE
+						      && space->id != 0) {
 		/* The node is in the LRU list, remove it */
 
-		UT_LIST_REMOVE(LRU, system->LRU, node);
-
-		system->n_open_pending++;
-		node->n_pending = 1;
-	} else {
-		/* There is already a pending i/o-op on the file: the node is
-		not in the LRU list */
+		ut_a(UT_LIST_GET_LEN(system->LRU) > 0);
 
-		node->n_pending++;
+		UT_LIST_REMOVE(LRU, system->LRU, node);
 	}
+
+	node->n_pending++;
 }
 
 /************************************************************************
 Updates the data structures when an i/o operation finishes. Updates the
-pending i/os field in the node and the system appropriately. Puts the node
-in the LRU list if there are no other pending i/os. */
+pending i/o's field in the node appropriately. */
 static
 void
 fil_node_complete_io(
 /*=================*/
 	fil_node_t*	node,	/* in: file node */
-	fil_system_t*	system,	/* in: file system */
-	ulint		type)	/* in: OS_FILE_WRITE or ..._READ */
+	fil_system_t*	system,	/* in: tablespace memory cache */
+	ulint		type)	/* in: OS_FILE_WRITE or OS_FILE_READ; marks
+				the node as modified if
+				type == OS_FILE_WRITE */
 {
 	ut_ad(node);
 	ut_ad(system);
 #ifdef UNIV_SYNC_DEBUG
 	ut_ad(mutex_own(&(system->mutex)));
 #endif /* UNIV_SYNC_DEBUG */
+
 	ut_a(node->n_pending > 0);
 	
 	node->n_pending--;
 
-	if (type != OS_FILE_READ) {
-		node->is_modified = TRUE;
+	if (type == OS_FILE_WRITE) {
+		system->modification_counter++;
+		node->modification_counter = system->modification_counter;
 	}
 	
-	if (node->n_pending == 0) {
+	if (node->n_pending == 0 && node->space->purpose == FIL_TABLESPACE
+					&& node->space->id != 0) {
 		/* The node must be put back to the LRU list */
 		UT_LIST_ADD_FIRST(LRU, system->LRU, node);
-
-		ut_a(system->n_open_pending > 0);
-
-		system->n_open_pending--;
-
-		if (system->n_open_pending == system->max_n_open - 1) {
-
-			os_event_set(system->can_open);
-		}
-	}
-}
-		
-/**************************************************************************
-Tries to extend a data file by the number of pages given. Any fractions of a
-megabyte are ignored. */
-
-ibool
-fil_extend_last_data_file(
-/*======================*/
-				/* out: TRUE if success, also if we run
-				out of disk space we may return TRUE */
-	ulint*	actual_increase,/* out: number of pages we were able to
-				extend, here the orginal size of the file and
-				the resulting size of the file are rounded
-				downwards to a full megabyte, and the
-				difference expressed in pages is returned */
-	ulint	size_increase)	/* in: try to extend this many pages */
-{
-	fil_node_t*	node;
-	fil_space_t*	space;
-	fil_system_t*	system		= fil_system;
-	byte*		buf2;
-	byte*		buf;
-	ibool		success;
-	ulint		i;
-
-	mutex_enter(&(system->mutex));
-
-	HASH_SEARCH(hash, system->spaces, 0, space, space->id == 0);
-
-	ut_a(space);
-	
-	node = UT_LIST_GET_LAST(space->chain);
-
-	fil_node_prepare_for_io(node, system, space);
-
-	buf2 = mem_alloc(1024 * 1024 + UNIV_PAGE_SIZE);
-	buf = ut_align(buf2, UNIV_PAGE_SIZE);
-
-	memset(buf, '\0', 1024 * 1024);
-
-	for (i = 0; i < size_increase / ((1024 * 1024) / UNIV_PAGE_SIZE); i++) {
-
-		/* If we use native Windows aio, then also this write is
-		done using it */
-
-		success = os_aio(OS_FILE_WRITE, OS_AIO_SYNC,
-			node->name, node->handle, buf,
-			(node->size << UNIV_PAGE_SIZE_SHIFT) & 0xFFFFFFFF,
-			node->size >> (32 - UNIV_PAGE_SIZE_SHIFT),
-			1024 * 1024, NULL, NULL);
-
-		if (!success) {
-			break;
-		}
-
-		node->size += ((1024 * 1024) / UNIV_PAGE_SIZE);
-		space->size += ((1024 * 1024) / UNIV_PAGE_SIZE);
-
-		os_has_said_disk_full = FALSE;
 	}
-
-	mem_free(buf2);
-
-	fil_node_complete_io(node, system, OS_FILE_WRITE);
-
-	mutex_exit(&(system->mutex));	
-
-	*actual_increase = i * ((1024 * 1024) / UNIV_PAGE_SIZE);
-
-	fil_flush(0);
-
-	srv_data_file_sizes[srv_n_data_files - 1] += *actual_increase;
-
-	return(TRUE);
 }
 
 /************************************************************************
 Reads or writes data. This operation is asynchronous (aio). */
 
-void
+ulint
 fil_io(
 /*===*/
+				/* out: DB_SUCCESS, or DB_TABLESPACE_DELETED
+				if we are trying to do i/o on a tablespace
+				which does not exist */
 	ulint	type,		/* in: OS_FILE_READ or OS_FILE_WRITE,
 				ORed to OS_FILE_LOG, if a log i/o
 				and ORed to OS_AIO_SIMULATED_WAKE_LATER
@@ -1089,17 +3486,15 @@ fil_io(
 	void*	message)	/* in: message for aio handler if non-sync
 				aio used, else ignored */
 {
+	fil_system_t*	system		= fil_system;
 	ulint		mode;
 	fil_space_t*	space;
 	fil_node_t*	node;
 	ulint		offset_high;
 	ulint		offset_low;
-	fil_system_t*	system;
-	os_event_t	event;
 	ibool		ret;
 	ulint		is_log;
 	ulint		wake_later;
-	ulint		count;
 	
 	is_log = type & OS_FILE_LOG;
 	type = type & ~OS_FILE_LOG;
@@ -1110,7 +3505,7 @@ fil_io(
 	ut_ad(byte_offset < UNIV_PAGE_SIZE);
 	ut_ad(buf);
 	ut_ad(len > 0);
-	ut_ad((1 << UNIV_PAGE_SIZE_SHIFT) == UNIV_PAGE_SIZE);
+	ut_a((1 << UNIV_PAGE_SIZE_SHIFT) == UNIV_PAGE_SIZE);
 	ut_ad(fil_validate());
 #ifndef UNIV_LOG_DEBUG
 	/* ibuf bitmap pages must be read in the sync aio mode: */
@@ -1132,82 +3527,47 @@ fil_io(
 		mode = OS_AIO_NORMAL;
 	}
 
-	system = fil_system;
+	/* Reserve the fil_system mutex and make sure that we can open at
+	least one file while holding it, if the file is not already open */
 
-	count = 0;
-loop:
-	count++;
-	
-	/* NOTE that there is a possibility of a hang here:
-	if the read i/o-handler thread needs to complete
-	a read by reading from the insert buffer, it may need to
-	post another read. But if the maximum number of files
-	are already open, it cannot proceed from here! */
-	
-	mutex_enter(&(system->mutex));
+	fil_mutex_enter_and_prepare_for_io(space_id);
 	
-	if (count < 500 && !is_log && !ibuf_inside()
-	    && system->n_open_pending >= (3 * system->max_n_open) / 4) {
-
-	    	/* We are not doing an ibuf operation: leave a
-	    	safety margin of openable files for possible ibuf
-	    	merges needed in page read completion */
-
-		mutex_exit(&(system->mutex));
-
-		/* Wake the i/o-handler threads to make sure pending
-		i/o's are handled and eventually we can open the file */
-		
-		os_aio_simulated_wake_handler_threads();
-
-		os_thread_sleep(100000);
-
-		if (count > 50) {
-			fprintf(stderr,
-		"InnoDB: Warning: waiting for file closes to proceed\n"
-		"InnoDB: round %lu\n", count);
-		}
-
-		goto loop;
-	}
-
-	if (system->n_open_pending == system->max_n_open) {
-
-		/* It is not sure we can open the file if it is closed: wait */
-
-		event = system->can_open;
-		os_event_reset(event);
-
+	HASH_SEARCH(hash, system->spaces, space_id, space,
+							space->id == space_id);
+	if (!space) {
 		mutex_exit(&(system->mutex));
 
-		/* Wake the i/o-handler threads to make sure pending
-		i/o's are handled and eventually we can open the file */
-		
-		os_aio_simulated_wake_handler_threads();
-
+		ut_print_timestamp(stderr);
 		fprintf(stderr,
-		"InnoDB: Warning: max allowed number of files is open\n");
+"  InnoDB: Error: trying to do i/o to a tablespace which does not exist.\n"
+"InnoDB: i/o type %lu, space id %lu, page no. %lu, i/o length %lu bytes\n",
+			(ulong) type, (ulong) space_id, (ulong) block_offset,
+			(ulong) len);
 
-		os_event_wait(event);
-
-		goto loop;
-	}	 
-
-	HASH_SEARCH(hash, system->spaces, space_id, space,
-						space->id == space_id);
-	ut_a(space);
+		return(DB_TABLESPACE_DELETED);
+	}
 
 	ut_ad((mode != OS_AIO_IBUF) || (space->purpose == FIL_TABLESPACE));
 
 	node = UT_LIST_GET_FIRST(space->chain);
 
 	for (;;) {
+		if (space->id != 0 && node->size == 0) {
+			/* We do not know the size of a single-table tablespace
+			before we open the file */
+
+			break;
+		}
+
 		if (node == NULL) {
 			fprintf(stderr,
-	"InnoDB: Error: trying to access page number %lu in space %lu\n"
+	"InnoDB: Error: trying to access page number %lu in space %lu,\n"
+	"InnoDB: space name %s,\n"
 	"InnoDB: which is outside the tablespace bounds.\n"
 	"InnoDB: Byte offset %lu, len %lu, i/o type %lu\n", 
- 			block_offset, space_id, byte_offset, len, type);
+ 			(ulong) block_offset, (ulong) space_id,
+			space->name, (ulong) byte_offset, (ulong) len,
+			(ulong) type);
  			
 			ut_error;
 		}
@@ -1224,13 +3584,29 @@ loop:
 	/* Open file if closed */
 	fil_node_prepare_for_io(node, system, space);
 
+	/* Check that at least the start offset is within the bounds of a
+	single-table tablespace */
+	if (space->purpose == FIL_TABLESPACE && space->id != 0
+	    && node->size <= block_offset) {
+
+	        fprintf(stderr,
+	"InnoDB: Error: trying to access page number %lu in space %lu,\n"
+	"InnoDB: space name %s,\n"
+	"InnoDB: which is outside the tablespace bounds.\n"
+	"InnoDB: Byte offset %lu, len %lu, i/o type %lu\n", 
+ 			(ulong) block_offset, (ulong) space_id,
+			space->name, (ulong) byte_offset, (ulong) len,
+			(ulong) type);
+ 		ut_a(0);
+	}
+
 	/* Now we have made the changes in the data structures of system */
 	mutex_exit(&(system->mutex));
 
 	/* Calculate the low 32 bits and the high 32 bits of the file offset */
 
 	offset_high = (block_offset >> (32 - UNIV_PAGE_SIZE_SHIFT));
-	offset_low  = ((block_offset << UNIV_PAGE_SIZE_SHIFT) & 0xFFFFFFFF)
+	offset_low  = ((block_offset << UNIV_PAGE_SIZE_SHIFT) & 0xFFFFFFFFUL)
 			+ byte_offset;
 
 	ut_a(node->size - block_offset >=
@@ -1241,9 +3617,20 @@ loop:
 	ut_a(byte_offset % OS_FILE_LOG_BLOCK_SIZE == 0);
 	ut_a((len % OS_FILE_LOG_BLOCK_SIZE) == 0);
 
+#ifdef UNIV_HOTBACKUP
+	/* In ibbackup do normal i/o, not aio */
+	if (type == OS_FILE_READ) {
+		ret = os_file_read(node->handle, buf, offset_low, offset_high,
+									len);
+	} else {
+		ret = os_file_write(node->name, node->handle, buf,
+					offset_low, offset_high, len);
+	}
+#else
 	/* Queue the aio request */
 	ret = os_aio(type, mode | wake_later, node->name, node->handle, buf,
 				offset_low, offset_high, len, node, message);
+#endif
 	ut_a(ret);
 
 	if (mode == OS_AIO_SYNC) {
@@ -1258,6 +3645,8 @@ loop:
 
 		ut_ad(fil_validate());
 	}
+
+	return(DB_SUCCESS);
 }
 
 /************************************************************************
@@ -1265,9 +3654,12 @@ Reads data from a space to a buffer. Remember that the possible incomplete
 blocks at the end of file are ignored: they are not taken into account when
 calculating the byte offset within a space. */
 
-void
+ulint
 fil_read(
 /*=====*/
+				/* out: DB_SUCCESS, or DB_TABLESPACE_DELETED
+				if we are trying to do i/o on a tablespace
+				which does not exist */
 	ibool	sync,		/* in: TRUE if synchronous aio is desired */
 	ulint	space_id,	/* in: space id */
 	ulint	block_offset,	/* in: offset in number of blocks */
@@ -1281,8 +3673,8 @@ fil_read(
 	void*	message)	/* in: message for aio handler if non-sync
 				aio used, else ignored */
 {
-	fil_io(OS_FILE_READ, sync, space_id, block_offset, byte_offset, len,
-								buf, message);
+	return(fil_io(OS_FILE_READ, sync, space_id, block_offset,
+					  byte_offset, len, buf, message));
 }
 
 /************************************************************************
@@ -1290,9 +3682,12 @@ Writes data to a space from a buffer. Remember that the possible incomplete
 blocks at the end of file are ignored: they are not taken into account when
 calculating the byte offset within a space. */
 
-void
+ulint
 fil_write(
 /*======*/
+				/* out: DB_SUCCESS, or DB_TABLESPACE_DELETED
+				if we are trying to do i/o on a tablespace
+				which does not exist */
 	ibool	sync,		/* in: TRUE if synchronous aio is desired */
 	ulint	space_id,	/* in: space id */
 	ulint	block_offset,	/* in: offset in number of blocks */
@@ -1306,8 +3701,8 @@ fil_write(
 	void*	message)	/* in: message for aio handler if non-sync
 				aio used, else ignored */
 {
-	fil_io(OS_FILE_WRITE, sync, space_id, block_offset, byte_offset, len,
-								buf, message);
+	return(fil_io(OS_FILE_WRITE, sync, space_id, block_offset,
+					   byte_offset, len, buf, message));
 }
 
 /**************************************************************************
@@ -1322,19 +3717,19 @@ fil_aio_wait(
 	ulint	segment)	/* in: the number of the segment in the aio
 				array to wait for */ 
 {
+	fil_system_t*	system		= fil_system;
 	ibool		ret;		
 	fil_node_t*	fil_node;
-	fil_system_t*	system		= fil_system;
 	void*		message;
 	ulint		type;
 	
 	ut_ad(fil_validate());
 
 	if (os_aio_use_native_aio) {
-		srv_io_thread_op_info[segment] = (char *) "native aio handle";
+		srv_io_thread_op_info[segment] = (char *) "handle native aio";
 #ifdef WIN_ASYNC_IO
-		ret = os_aio_windows_handle(segment, 0, &fil_node, &message,
-								&type);
+		ret = os_aio_windows_handle(segment, 0, (void**) &fil_node,
+					    &message, &type);
 #elif defined(POSIX_ASYNC_IO)
 		ret = os_aio_posix_handle(segment, &fil_node, &message);
 #else
@@ -1342,7 +3737,7 @@ fil_aio_wait(
 		ut_error;
 #endif
 	} else {
-		srv_io_thread_op_info[segment] =(char *)"simulated aio handle";
+		srv_io_thread_op_info[segment] =(char *)"handle simulated aio";
 
 		ret = os_aio_simulated_handle(segment, (void**) &fil_node,
 	                                               &message, &type);
@@ -1361,6 +3756,10 @@ fil_aio_wait(
 	ut_ad(fil_validate());
 
 	/* Do the i/o handling */
+	/* IMPORTANT: since i/o handling for reads will read also the insert
+	buffer in tablespace 0, you have to be very careful not to introduce
+	deadlocks in the i/o system. We keep tablespace 0 data files always
+	open, and use a special i/o thread to serve insert buffer requests. */
 
 	if (buf_pool_is_block(message)) {
 		srv_io_thread_op_info[segment] =
@@ -1373,7 +3772,8 @@ fil_aio_wait(
 }
 
 /**************************************************************************
-Flushes to disk possible writes cached by the OS. */
+Flushes to disk possible writes cached by the OS. If the space does not exist
+or is being dropped, does not do anything. */
 
 void
 fil_flush(
@@ -1385,41 +3785,79 @@ fil_flush(
 	fil_space_t*	space;
 	fil_node_t*	node;
 	os_file_t	file;
+	ib_longlong	old_mod_counter;
 
 	mutex_enter(&(system->mutex));
 	
 	HASH_SEARCH(hash, system->spaces, space_id, space,
-						space->id == space_id);
-	ut_a(space);
+							space->id == space_id);
+	if (!space || space->is_being_deleted) {
+		mutex_exit(&(system->mutex));
+
+		return;
+	}
 
+	space->n_pending_flushes++;	/* prevent dropping of the space while
+					we are flushing */
 	node = UT_LIST_GET_FIRST(space->chain);
 
 	while (node) {
-		if (node->open && node->is_modified) {
-			file = node->handle;
+		if (node->modification_counter > node->flush_counter) {
+			ut_a(node->open);
+
+			/* We want to flush the changes at least up to
+			old_mod_counter */
+			old_mod_counter = node->modification_counter;
 
-			node->is_modified = FALSE;
-			
 			if (space->purpose == FIL_TABLESPACE) {
 				fil_n_pending_tablespace_flushes++;
 			} else {
 				fil_n_pending_log_flushes++;
 			}
+#ifdef __WIN__
+			if (node->is_raw_disk) {
 
-			mutex_exit(&(system->mutex));
+				goto skip_flush;
+			}
+#endif
+retry:			
+			if (node->n_pending_flushes > 0) {
+				/* We want to avoid calling os_file_flush() on
+				the file twice at the same time, because we do
+				not know what bugs OS's may contain in file
+				i/o; sleep for a while */
+
+				mutex_exit(&(system->mutex));
+
+				os_thread_sleep(20000);
+
+				mutex_enter(&(system->mutex));
+
+				if (node->flush_counter >= old_mod_counter) {
+
+					goto skip_flush;
+				}
+
+				goto retry;
+			}
+
+			ut_a(node->open);
+			file = node->handle;
+			node->n_pending_flushes++;
 
-			/* Note that it is not certain, when we have
-			released the mutex above, that the file of the
-			handle is still open: we assume that the OS
-			will not crash or trap even if we pass a handle
-			to a closed file below in os_file_flush! */
+			mutex_exit(&(system->mutex));
 
 			/* printf("Flushing to file %s\n", node->name); */
-			
-			os_file_flush(file);
-			
+			os_file_flush(file);		
+
 			mutex_enter(&(system->mutex));
 
+			node->n_pending_flushes--;
+skip_flush:
+			if (node->flush_counter < old_mod_counter) {
+				node->flush_counter = old_mod_counter;
+			}
+
 			if (space->purpose == FIL_TABLESPACE) {
 				fil_n_pending_tablespace_flushes--;
 			} else {
@@ -1430,11 +3868,13 @@ fil_flush(
 		node = UT_LIST_GET_NEXT(chain, node);
 	}		
 
+	space->n_pending_flushes--;
+
 	mutex_exit(&(system->mutex));
 }
 
 /**************************************************************************
-Flushes to disk writes in file spaces of the given type possibly cached by
+Flushes to disk the writes in file spaces of the given type possibly cached by
 the OS. */
 
 void
@@ -1451,13 +3891,17 @@ fil_flush_file_spaces(
 
 	while (space) {
 		if (space->purpose == purpose) {
+			space->n_pending_flushes++; /* prevent dropping of the
+						    space while we are
+						    flushing */
 			mutex_exit(&(system->mutex));
 
 			fil_flush(space->id);
 
 			mutex_enter(&(system->mutex));
-		}
 
+			space->n_pending_flushes--;
+		}
 		space = UT_LIST_GET_NEXT(space_list, space);
 	}
 	
@@ -1465,20 +3909,18 @@ fil_flush_file_spaces(
 }
 
 /**********************************************************************
-Checks the consistency of the file system. */
+Checks the consistency of the tablespace cache. */
 
 ibool
 fil_validate(void)
 /*==============*/
 			/* out: TRUE if ok */
 {	
+	fil_system_t*	system		= fil_system;
 	fil_space_t*	space;
 	fil_node_t*	fil_node;
-	ulint		pending_count	= 0;
-	fil_system_t*	system;
+	ulint		n_open		= 0;
 	ulint		i;
-
-	system = fil_system;
 	
 	mutex_enter(&(system->mutex));
 
@@ -1489,36 +3931,35 @@ fil_validate(void)
 		space = HASH_GET_FIRST(system->spaces, i);
 	
 		while (space != NULL) {
-
 			UT_LIST_VALIDATE(chain, fil_node_t, space->chain); 
 
 			fil_node = UT_LIST_GET_FIRST(space->chain);
 
 			while (fil_node != NULL) {
-
 				if (fil_node->n_pending > 0) {
-
-					pending_count++;
 					ut_a(fil_node->open);
 				}
 
+				if (fil_node->open) {
+					n_open++;
+				}
 				fil_node = UT_LIST_GET_NEXT(chain, fil_node);
 			}
-
 			space = HASH_GET_NEXT(hash, space);
 		}
 	}
 
-	ut_a(pending_count == system->n_open_pending);
+	ut_a(system->n_open == n_open);
 
 	UT_LIST_VALIDATE(LRU, fil_node_t, system->LRU);
 
 	fil_node = UT_LIST_GET_FIRST(system->LRU);
 
 	while (fil_node != NULL) {
-
 		ut_a(fil_node->n_pending == 0);
 		ut_a(fil_node->open);
+		ut_a(fil_node->space->purpose == FIL_TABLESPACE);
+		ut_a(fil_node->space->id != 0);
 
 		fil_node = UT_LIST_GET_NEXT(LRU, fil_node);
 	}
@@ -1586,4 +4027,4 @@ fil_page_get_type(
 	ut_ad(page);
 
 	return(mach_read_from_2(page + FIL_PAGE_TYPE));
-}	
+}
diff --git a/innobase/fsp/fsp0fsp.c b/innobase/fsp/fsp0fsp.c
index 49885df07d7..9be6e1a6e50 100644
--- a/innobase/fsp/fsp0fsp.c
+++ b/innobase/fsp/fsp0fsp.c
@@ -27,6 +27,10 @@ Created 11/29/1995 Heikki Tuuri
 #include "dict0mem.h"
 #include "log0log.h"
 
+
+#define FSP_HEADER_OFFSET	FIL_PAGE_DATA	/* Offset of the space header
+						within a file page */
+
 /* The data structures in files are defined just as byte strings in C */
 typedef	byte	fsp_header_t;
 typedef	byte	xdes_t;		
@@ -38,10 +42,9 @@ File space header data structure: this data structure is contained in the
 first page of a space. The space for this header is reserved in every extent
 descriptor page, but used only in the first. */
 
-#define FSP_HEADER_OFFSET	FIL_PAGE_DATA	/* Offset of the space header
-						within a file page */
 /*-------------------------------------*/
-#define FSP_NOT_USED		0	/* this field contained a value up to
+#define FSP_SPACE_ID		0	/* space id */
+#define FSP_NOT_USED		4	/* this field contained a value up to
 					which we know that the modifications
 					in the database have been flushed to
 					the file space; not used now */
@@ -50,7 +53,13 @@ descriptor page, but used only in the first. */
 #define	FSP_FREE_LIMIT		12	/* Minimum page number for which the
 					free list has not been initialized:
 					the pages >= this limit are, by
-					definition, free */
+					definition, free; note that in a
+					single-table tablespace where size
+					< 64 pages, this number is 64, i.e.,
+					we have initialized the space
+					about the first extent, but have not
+					physically allocted those pages to the
+					file */
 #define	FSP_LOWEST_NO_WRITE	16	/* The lowest page offset for which
 					the page has not been written to disk
 					(if it has been written, we know that
@@ -83,7 +92,6 @@ descriptor page, but used only in the first. */
 #define	FSP_FREE_ADD		4	/* this many free extents are added
 					to the free list from above
 					FSP_FREE_LIMIT at a time */
-
 					
 /*			FILE SEGMENT INODE
 			==================
@@ -263,9 +271,14 @@ static
 void
 fsp_fill_free_list(
 /*===============*/
-	ulint		space,	/* in: space */
-	fsp_header_t*	header,	/* in: space header */
-	mtr_t*		mtr);	/* in: mtr */
+	ibool		init_space,	/* in: TRUE if this is a single-table
+					tablespace and we are only initing
+					the tablespace's first extent
+					descriptor page and ibuf bitmap page;
+					then we do not allocate more extents */
+	ulint		space,		/* in: space */
+	fsp_header_t*	header,		/* in: space header */
+	mtr_t*		mtr);		/* in: mtr */
 /**************************************************************************
 Allocates a single free page from a segment. This function implements
 the intelligent allocation strategy which tries to minimize file space
@@ -286,6 +299,19 @@ fseg_alloc_free_page_low(
 				FSP_UP, FSP_NO_DIR */
 	mtr_t*		mtr);	/* in: mtr handle */
 
+
+/**************************************************************************
+Reads the file space size stored in the header page. */
+
+ulint
+fsp_get_size_low(
+/*=============*/
+			/* out: tablespace size stored in the space header */
+	page_t*	page)	/* in: header page (page 0 in the tablespace) */
+{
+	return(mach_read_from_4(page + FSP_HEADER_OFFSET + FSP_SIZE));
+}
+
 /**************************************************************************
 Gets a pointer to the space header and x-locks its page. */
 UNIV_INLINE
@@ -569,7 +595,7 @@ xdes_init(
 	ut_ad((XDES_SIZE - XDES_BITMAP) % 4 == 0);
 
 	for (i = XDES_BITMAP; i < XDES_SIZE; i += 4) {
-		mlog_write_ulint(descr + i, 0xFFFFFFFF, MLOG_4BYTES, mtr);
+		mlog_write_ulint(descr + i, 0xFFFFFFFFUL, MLOG_4BYTES, mtr);
 	}
 
 	xdes_set_state(descr, XDES_FREE, mtr);
@@ -630,8 +656,8 @@ xdes_get_descriptor_with_space_hdr(
 	page_t*	descr_page;
 
 	ut_ad(mtr);
-	ut_ad(mtr_memo_contains(mtr, fil_space_get_latch(space), MTR_MEMO_X_LOCK));
-
+	ut_ad(mtr_memo_contains(mtr, fil_space_get_latch(space),
+						MTR_MEMO_X_LOCK));
 	/* Read free limit and space size */
 	limit = mtr_read_ulint(sp_header + FSP_FREE_LIMIT, MLOG_4BYTES, mtr);
 	size  = mtr_read_ulint(sp_header + FSP_SIZE, MLOG_4BYTES, mtr);
@@ -646,7 +672,7 @@ xdes_get_descriptor_with_space_hdr(
 	/* If offset is == limit, fill free list of the space. */
 
 	if (offset == limit) {
-		fsp_fill_free_list(space, sp_header, mtr);
+		fsp_fill_free_list(FALSE, space, sp_header, mtr);
 	}
 
 	descr_page_no = xdes_calc_descriptor_page(offset);
@@ -714,8 +740,8 @@ xdes_lst_get_descriptor(
 	xdes_t*	descr;
 
 	ut_ad(mtr);
-	ut_ad(mtr_memo_contains(mtr, fil_space_get_latch(space), MTR_MEMO_X_LOCK));
-	
+	ut_ad(mtr_memo_contains(mtr, fil_space_get_latch(space),
+							MTR_MEMO_X_LOCK));
 	descr = fut_get_ptr(space, lst_node, RW_X_LATCH, mtr) - XDES_FLST_NODE;
 
 	return(descr);
@@ -825,8 +851,21 @@ fsp_init(void)
 }
 
 /**************************************************************************
+Writes the space id to a tablespace header. This function is used past the
+buffer pool when we in fil0fil.c create a new single-table tablespace. */
+
+void
+fsp_header_write_space_id(
+/*======================*/
+	page_t*	page,		/* in: first page in the space */
+	ulint	space_id)	/* in: space id */
+{
+	mach_write_to_4(page + FSP_HEADER_OFFSET + FSP_SPACE_ID, space_id);
+}
+
+/**************************************************************************
 Initializes the space header of a new created space and creates also the
-insert buffer tree root. */
+insert buffer tree root if space == 0. */
 
 void
 fsp_header_init(
@@ -843,9 +882,6 @@ fsp_header_init(
 	mtr_x_lock(fil_space_get_latch(space), mtr);
 
 	page = buf_page_create(space, 0, mtr);
-#ifdef UNIV_SYNC_DEBUG
-	buf_page_dbg_add_level(page, SYNC_FSP_PAGE);
-#endif /* UNIV_SYNC_DEBUG */
 	buf_page_get(space, 0, RW_X_LATCH, mtr);
 #ifdef UNIV_SYNC_DEBUG
 	buf_page_dbg_add_level(page, SYNC_FSP_PAGE);
@@ -857,6 +893,8 @@ fsp_header_init(
 
 	header = FSP_HEADER_OFFSET + page;
 
+	mlog_write_ulint(header + FSP_SPACE_ID, space, MLOG_4BYTES, mtr);
+
 	mlog_write_ulint(header + FSP_SIZE, size, MLOG_4BYTES, mtr); 
 	mlog_write_ulint(header + FSP_FREE_LIMIT, 0, MLOG_4BYTES, mtr); 
 	mlog_write_ulint(header + FSP_LOWEST_NO_WRITE, 0, MLOG_4BYTES, mtr); 
@@ -869,10 +907,40 @@ fsp_header_init(
 	flst_init(header + FSP_SEG_INODES_FREE, mtr);
 
 	mlog_write_dulint(header + FSP_SEG_ID, ut_dulint_create(0, 1), mtr); 
-	fsp_fill_free_list(space, header, mtr);
-
-	btr_create(DICT_CLUSTERED | DICT_UNIVERSAL | DICT_IBUF, space,
+	if (space == 0) {
+		fsp_fill_free_list(FALSE, space, header, mtr);
+		btr_create(DICT_CLUSTERED | DICT_UNIVERSAL | DICT_IBUF, space,
 				ut_dulint_add(DICT_IBUF_ID_MIN, space), mtr);
+	} else {
+		fsp_fill_free_list(TRUE, space, header, mtr);
+	}
+}
+
+/**************************************************************************
+Reads the space id from the first page of a tablespace. */
+
+ulint
+fsp_header_get_space_id(
+/*====================*/
+			/* out: space id, ULINT UNDEFINED if error */
+	page_t*	page)	/* in: first page of a tablespace */
+{
+	ulint	fsp_id;
+	ulint	id;
+
+	fsp_id = mach_read_from_4(FSP_HEADER_OFFSET + page + FSP_SPACE_ID);
+
+	id = mach_read_from_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
+
+	if (id != fsp_id) {
+	        fprintf(stderr,
+"InnoDB: Error: space id in fsp header %lu, but in the page header %lu\n",
+							    (ulong) fsp_id,
+							    (ulong) id);
+		return(ULINT_UNDEFINED);
+	}
+
+	return(id);
 }
 
 /**************************************************************************
@@ -896,7 +964,8 @@ fsp_header_inc_size(
 
 	size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr);
 
-	mlog_write_ulint(header + FSP_SIZE, size + size_inc, MLOG_4BYTES, mtr); 
+	mlog_write_ulint(header + FSP_SIZE, size + size_inc, MLOG_4BYTES,
+									mtr); 
 }
 
 /**************************************************************************
@@ -909,7 +978,7 @@ ulint
 fsp_header_get_free_limit(
 /*======================*/
 			/* out: free limit in megabytes */
-	ulint	space)	/* in: space id */
+	ulint	space)	/* in: space id, must be 0 */
 {
 	fsp_header_t*	header;
 	ulint		limit;
@@ -943,7 +1012,7 @@ ulint
 fsp_header_get_tablespace_size(
 /*===========================*/
 			/* out: size in pages */
-	ulint	space)	/* in: space id */
+	ulint	space)	/* in: space id, must be 0 */
 {
 	fsp_header_t*	header;
 	ulint		size;
@@ -965,40 +1034,80 @@ fsp_header_get_tablespace_size(
 }
 
 /***************************************************************************
-Tries to extend the last data file file if it is defined as auto-extending. */
+Tries to extend a single-table tablespace so that a page would fit in the
+data file. */
+static
+ibool
+fsp_try_extend_data_file_with_pages(
+/*================================*/
+					/* out: TRUE if success */
+	ulint		space,		/* in: space */
+	ulint		page_no,	/* in: page number */
+	fsp_header_t*	header,		/* in: space header */
+	mtr_t*		mtr)		/* in: mtr */
+{
+	ibool	success;
+	ulint	actual_size;
+	ulint	size;
+
+	ut_a(space != 0);
+
+	size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr);
+	
+	ut_a(page_no >= size);
+
+	success = fil_extend_space_to_desired_size(&actual_size, space,
+								page_no + 1);
+	/* actual_size now has the space size in pages; it may be less than
+	we wanted if we ran out of disk space */
+	
+	mlog_write_ulint(header + FSP_SIZE, actual_size, MLOG_4BYTES, mtr);
+
+	return(success);
+}
+
+/***************************************************************************
+Tries to extend the last data file of a tablespace if it is auto-extending. */
 static
 ibool
-fsp_try_extend_last_file(
+fsp_try_extend_data_file(
 /*=====================*/
 					/* out: FALSE if not auto-extending */
-	ulint*		actual_increase,/* out: actual increase in pages */
+	ulint*		actual_increase,/* out: actual increase in pages, where
+					we measure the tablespace size from
+					what the header field says; it may be
+					the actual file size rounded down to
+					megabyte */
 	ulint		space,		/* in: space */
 	fsp_header_t*	header,		/* in: space header */
 	mtr_t*		mtr)		/* in: mtr */
 {
 	ulint	size;
+	ulint	new_size;
+	ulint	old_size;
 	ulint	size_increase;
+	ulint	actual_size;
 	ibool	success;
 
-	ut_a(space == 0);
-
 	*actual_increase = 0;
 
-	if (!srv_auto_extend_last_data_file) {
+	if (space == 0 && !srv_auto_extend_last_data_file) {
 
 		return(FALSE);
 	}
 
 	size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr);
 
-	if (srv_last_file_size_max != 0) {
+	old_size = size;
+
+	if (space == 0 && srv_last_file_size_max != 0) {
 		if (srv_last_file_size_max
 			 < srv_data_file_sizes[srv_n_data_files - 1]) {
 
 			fprintf(stderr,
 "InnoDB: Error: Last data file size is %lu, max size allowed %lu\n",
-				srv_data_file_sizes[srv_n_data_files - 1],
-				srv_last_file_size_max);
+				(ulong) srv_data_file_sizes[srv_n_data_files - 1],
+				(ulong) srv_last_file_size_max);
 		}
 
 		size_increase = srv_last_file_size_max
@@ -1007,24 +1116,58 @@ fsp_try_extend_last_file(
 			size_increase = SRV_AUTO_EXTEND_INCREMENT;
 		}
 	} else {
-		size_increase = SRV_AUTO_EXTEND_INCREMENT;
+	        if (space == 0) {
+			size_increase = SRV_AUTO_EXTEND_INCREMENT;
+		} else {
+		        /* We extend single-table tablespaces first one extent
+			at a time, but for bigger tablespaces more. It is not
+			enough to extend always by one extent, because some
+			extents are frag page extents. */
+
+			if (size < FSP_EXTENT_SIZE) {
+				/* Let us first extend the file to 64 pages */
+				success = fsp_try_extend_data_file_with_pages(
+					  space, FSP_EXTENT_SIZE - 1,
+					  header, mtr);
+				if (!success) {
+					new_size = mtr_read_ulint(
+					 header + FSP_SIZE, MLOG_4BYTES, mtr);
+
+					*actual_increase = new_size - old_size;
+
+				        return(FALSE);
+				}
+
+				size = FSP_EXTENT_SIZE;
+			}
+
+			if (size < 32 * FSP_EXTENT_SIZE) {
+			        size_increase = FSP_EXTENT_SIZE;
+			} else {
+				/* Below in fsp_fill_free_list() we assume
+				that we add at most FSP_FREE_ADD extents at
+				a time */
+				size_increase = FSP_FREE_ADD * FSP_EXTENT_SIZE;
+			}
+		}
 	}
 				
 	if (size_increase == 0) {
+
 		return(TRUE);
 	}
 	
-	/* Extend the data file. If we are not able to extend
-	the full requested length, the function tells us
-	the number of full megabytes (but the unit is pages!)
-	we were able to extend. */
-				
-	success = fil_extend_last_data_file(actual_increase, size_increase);
+	success = fil_extend_space_to_desired_size(&actual_size, space,
+							size + size_increase);
+	/* We ignore any fragments of a full megabyte when storing the size
+	to the space header */
 
-	if (success) {
-		mlog_write_ulint(header + FSP_SIZE, size + *actual_increase,
+	mlog_write_ulint(header + FSP_SIZE, 
+	   ut_calc_align_down(actual_size, (1024 * 1024) / UNIV_PAGE_SIZE),
 							MLOG_4BYTES, mtr);
-	}
+	new_size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr);
+
+	*actual_increase = new_size - old_size;
 
 	return(TRUE);
 }
@@ -1037,9 +1180,14 @@ static
 void
 fsp_fill_free_list(
 /*===============*/
-	ulint		space,	/* in: space */
-	fsp_header_t*	header,	/* in: space header */
-	mtr_t*		mtr)	/* in: mtr */
+	ibool		init_space,	/* in: TRUE if this is a single-table
+					tablespace and we are only initing
+					the tablespace's first extent
+					descriptor page and ibuf bitmap page;
+					then we do not allocate more extents */
+	ulint		space,		/* in: space */
+	fsp_header_t*	header,		/* in: space header */
+	mtr_t*		mtr)		/* in: mtr */
 {
 	ulint	limit;
 	ulint	size;
@@ -1058,27 +1206,37 @@ fsp_fill_free_list(
 	size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr);
 	limit = mtr_read_ulint(header + FSP_FREE_LIMIT, MLOG_4BYTES, mtr);
 
-	if (srv_auto_extend_last_data_file
+	if (space == 0 && srv_auto_extend_last_data_file
 			&& size < limit + FSP_EXTENT_SIZE * FSP_FREE_ADD) {
 
 		/* Try to increase the last data file size */
-		fsp_try_extend_last_file(&actual_increase, space, header,
-									mtr);
+		fsp_try_extend_data_file(&actual_increase, space, header, mtr);
+		size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr);
+	}
+
+	if (space != 0 && !init_space
+			&& size < limit + FSP_EXTENT_SIZE * FSP_FREE_ADD) {
+
+		/* Try to increase the .ibd file size */
+		fsp_try_extend_data_file(&actual_increase, space, header, mtr);
 		size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr);
 	}
 
 	i = limit;
 		
-	while ((i + FSP_EXTENT_SIZE <= size) && (count < FSP_FREE_ADD)) {
+	while ((init_space && i < 1)
+	       || ((i + FSP_EXTENT_SIZE <= size) && (count < FSP_FREE_ADD))) {
 
 		mlog_write_ulint(header + FSP_FREE_LIMIT, i + FSP_EXTENT_SIZE,
 							MLOG_4BYTES, mtr); 
 
 		/* Update the free limit info in the log system and make
 		a checkpoint */
-		log_fsp_current_free_limit_set_and_checkpoint(
+		if (space == 0) {
+		        log_fsp_current_free_limit_set_and_checkpoint(
 				(i + FSP_EXTENT_SIZE)
 				/ ((1024 * 1024) / UNIV_PAGE_SIZE));
+		}
 
 		if (0 == i % XDES_DESCRIBED_PER_PAGE) {
 
@@ -1088,10 +1246,6 @@ fsp_fill_free_list(
 
 			if (i > 0) {
 				descr_page = buf_page_create(space, i, mtr);
-#ifdef UNIV_SYNC_DEBUG
-				buf_page_dbg_add_level(descr_page,
-								SYNC_FSP_PAGE);
-#endif /* UNIV_SYNC_DEBUG */
 				buf_page_get(space, i, RW_X_LATCH, mtr);
 #ifdef UNIV_SYNC_DEBUG
 				buf_page_dbg_add_level(descr_page,
@@ -1100,7 +1254,7 @@ fsp_fill_free_list(
 				fsp_init_file_page(descr_page, mtr);
 			}
 
-			/* Initialize the ibuf page in a separate
+			/* Initialize the ibuf bitmap page in a separate
 			mini-transaction because it is low in the latching
 			order, and we must be able to release its latch
 			before returning from the fsp routine */
@@ -1109,9 +1263,6 @@ fsp_fill_free_list(
 
 			ibuf_page = buf_page_create(space,
 					i + FSP_IBUF_BITMAP_OFFSET, &ibuf_mtr);
-#ifdef UNIV_SYNC_DEBUG
-			buf_page_dbg_add_level(ibuf_page, SYNC_IBUF_BITMAP);
-#endif /* UNIV_SYNC_DEBUG */
 			buf_page_get(space, i + FSP_IBUF_BITMAP_OFFSET,
 							RW_X_LATCH, &ibuf_mtr);
 #ifdef UNIV_SYNC_DEBUG
@@ -1188,7 +1339,7 @@ fsp_alloc_free_extent(
 		first = flst_get_first(header + FSP_FREE, mtr);
 
 		if (fil_addr_is_null(first)) {
-			fsp_fill_free_list(space, header, mtr);
+			fsp_fill_free_list(FALSE, space, header, mtr);
 
 			first = flst_get_first(header + FSP_FREE, mtr);
 		}
@@ -1225,6 +1376,8 @@ fsp_alloc_free_page(
 	ulint		free;
 	ulint		frag_n_used;
 	ulint		page_no;
+	ulint		space_size;
+	ibool		success;
 	
 	ut_ad(mtr);
 
@@ -1278,6 +1431,30 @@ fsp_alloc_free_page(
 		ut_error;
 	}
 
+	page_no = xdes_get_offset(descr) + free;
+
+	space_size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr);
+
+	if (space_size <= page_no) {
+	        /* It must be that we are extending a single-table tablespace
+		whose size is still < 64 pages */
+
+		ut_a(space != 0);
+		if (page_no >= FSP_EXTENT_SIZE) {
+		        fprintf(stderr,
+"InnoDB: Error: trying to extend a single-table tablespace %lu\n"
+"InnoDB: by single page(s) though the space size %lu. Page no %lu.\n",
+			   (ulong) space, (ulong) space_size, (ulong) page_no);
+			return(FIL_NULL);
+		}
+		success = fsp_try_extend_data_file_with_pages(space, page_no,
+							      header, mtr);
+		if (!success) {
+			/* No disk space left */
+		        return(FIL_NULL);
+		}
+	}
+
 	xdes_set_bit(descr, XDES_FREE_BIT, free, FALSE, mtr);
 
 	/* Update the FRAG_N_USED field */
@@ -1299,8 +1476,6 @@ fsp_alloc_free_page(
 									mtr);
 	}
 
-	page_no = xdes_get_offset(descr) + free;
-
 	/* Initialize the allocated page to the buffer pool, so that it can
 	be obtained immediately with buf_page_get without need for a disk
 	read. */
@@ -1347,7 +1522,8 @@ fsp_free_page(
 	if (state != XDES_FREE_FRAG && state != XDES_FULL_FRAG) {
 		fprintf(stderr,
 "InnoDB: Error: File space extent descriptor of page %lu has state %lu\n",
-								page, state);
+								(ulong) page,
+								(ulong) state);
 		ut_sprintf_buf(buf, ((byte*)descr) - 50, 200);
 
 		fprintf(stderr, "InnoDB: Dump of descriptor: %s\n", buf);
@@ -1366,7 +1542,7 @@ fsp_free_page(
 								== TRUE) {
 		fprintf(stderr,
 "InnoDB: Error: File space extent descriptor of page %lu says it is free\n",
-									page);
+							       (ulong) page);
 		ut_sprintf_buf(buf, ((byte*)descr) - 50, 200);
 
 		fprintf(stderr, "InnoDB: Dump of descriptor: %s\n", buf);
@@ -1602,8 +1778,8 @@ fsp_alloc_seg_inode(
 
 	inode = fsp_seg_inode_page_get_nth_inode(page, n, mtr);
 
-	if (ULINT_UNDEFINED == fsp_seg_inode_page_find_free(page, n + 1, mtr)) {
-
+	if (ULINT_UNDEFINED == fsp_seg_inode_page_find_free(page, n + 1,
+								    mtr)) {
 		/* There are no other unused headers left on the page: move it
 		to another list */
 
@@ -1657,7 +1833,7 @@ fsp_free_seg_inode(
 		flst_remove(space_header + FSP_SEG_INODES_FREE,
 				page + FSEG_INODE_PAGE_NODE, mtr);
 
-		fsp_free_page(space, buf_frame_get_page_no(page), mtr);		
+		fsp_free_page(space, buf_frame_get_page_no(page), mtr);
 	}
 }
 
@@ -1821,12 +1997,12 @@ fseg_create_general(
 			will belong to the created segment */
 	ulint	byte_offset, /* in: byte offset of the created segment header
 			on the page */
-	ibool	has_done_reservation, /* in: TRUE if the caller has
-			already done the reservation for the pages
-			with fsp_reserve_free_extents (at least 2 extents:
-			one for the inode and, then there other for the
-			segment) is no need to do the check for this
-			individual operation */
+	ibool	has_done_reservation, /* in: TRUE if the caller has already
+			done the reservation for the pages with
+			fsp_reserve_free_extents (at least 2 extents: one for
+			the inode and the other for the segment) then there is
+			no need to do the check for this individual
+			operation */
 	mtr_t*	mtr)	/* in: mtr */
 {
 	fsp_header_t*	space_header;
@@ -1835,6 +2011,7 @@ fseg_create_general(
 	fseg_header_t*	header = 0; /* remove warning */
 	rw_lock_t*	latch;
 	ibool		success;
+	ulint		n_reserved;
 	page_t*		ret		= NULL;
 	ulint		i;
 
@@ -1858,12 +2035,14 @@ fseg_create_general(
 		/* This thread did not own the latch before this call: free
 		excess pages from the insert buffer free list */
 
-		ibuf_free_excess_pages(space);
+		if (space == 0) {
+			ibuf_free_excess_pages(space);
+		}
 	}
 
 	if (!has_done_reservation) { 
-		success = fsp_reserve_free_extents(space, 2, FSP_NORMAL, mtr);
-
+		success = fsp_reserve_free_extents(&n_reserved, space, 2,
+							FSP_NORMAL, mtr);
 		if (!success) {
 			return(NULL);
 		}
@@ -1926,7 +2105,7 @@ fseg_create_general(
 funct_exit:
 	if (!has_done_reservation) { 
 	
-		fil_space_release_free_extents(space, 2);
+		fil_space_release_free_extents(space, n_reserved);
 	}
 	
 	return(ret);
@@ -2144,6 +2323,8 @@ fseg_alloc_free_page_low(
 				FSP_UP, FSP_NO_DIR */
 	mtr_t*		mtr)	/* in: mtr handle */
 {
+	fsp_header_t*	space_header;
+	ulint		space_size;
 	dulint		seg_id;
 	ulint		used;
 	ulint		reserved;
@@ -2154,6 +2335,7 @@ fseg_alloc_free_page_low(
 	xdes_t*		ret_descr;	/* the extent of the allocated page */
 	page_t*		page;
 	ibool		frag_page_allocated = FALSE;
+	ibool		success;
 	ulint		n;
 					
 	ut_ad(mtr);
@@ -2166,8 +2348,10 @@ fseg_alloc_free_page_low(
 	
 	reserved = fseg_n_reserved_pages_low(seg_inode, &used, mtr);
 	
-	descr = xdes_get_descriptor(space, hint, mtr);
+	space_header = fsp_get_space_header(space, mtr);
 
+	descr = xdes_get_descriptor_with_space_hdr(space_header, space,
+								    hint, mtr);
 	if (descr == NULL) {
 		/* Hint outside space or too high above free limit: reset
 		hint */
@@ -2297,8 +2481,32 @@ fseg_alloc_free_page_low(
 		return(FIL_NULL);
 	}
 
-	if (!frag_page_allocated) {
+	if (space != 0) {
+		space_size = fil_space_get_size(space);
+
+		if (space_size <= ret_page) {
+		        /* It must be that we are extending a single-table
+			tablespace whose size is still < 64 pages */
+
+			if (ret_page >= FSP_EXTENT_SIZE) {
+			        fprintf(stderr,
+"InnoDB: Error (2): trying to extend a single-table tablespace %lu\n"
+"InnoDB: by single page(s) though the space size %lu. Page no %lu.\n",
+					(ulong) space, (ulong) space_size,
+					(ulong) ret_page);
+				return(FIL_NULL);
+			}
+			
+			success = fsp_try_extend_data_file_with_pages(space,
+						ret_page, space_header, mtr);
+			if (!success) {
+				/* No disk space left */
+				return(FIL_NULL);
+			}
+		}
+	}
 
+	if (!frag_page_allocated) {
 		/* Initialize the allocated page to buffer pool, so that it
 		can be obtained immediately with buf_page_get without need
 		for a disk read */
@@ -2359,6 +2567,7 @@ fseg_alloc_free_page_general(
 	rw_lock_t*	latch;
 	ibool		success;
 	ulint		page_no;
+	ulint		n_reserved;
 
 	space = buf_frame_get_space_id(seg_header);
 
@@ -2375,14 +2584,16 @@ fseg_alloc_free_page_general(
 		/* This thread did not own the latch before this call: free
 		excess pages from the insert buffer free list */
 
-		ibuf_free_excess_pages(space);
+		if (space == 0) {
+		        ibuf_free_excess_pages(space);
+		}
 	}
 
 	inode = fseg_inode_get(seg_header, mtr);
 
 	if (!has_done_reservation) {
-		success = fsp_reserve_free_extents(space, 2, FSP_NORMAL, mtr);
-
+		success = fsp_reserve_free_extents(&n_reserved, space, 2,
+							FSP_NORMAL, mtr);
 		if (!success) {
 			return(FIL_NULL);
 		}
@@ -2391,7 +2602,7 @@ fseg_alloc_free_page_general(
 	page_no = fseg_alloc_free_page_low(buf_frame_get_space_id(inode),
 					inode, hint, direction, mtr);
 	if (!has_done_reservation) {
-		fil_space_release_free_extents(space, 2);
+		fil_space_release_free_extents(space, n_reserved);
 	}
 
 	return(page_no);
@@ -2421,6 +2632,46 @@ fseg_alloc_free_page(
 }
 
 /**************************************************************************
+Checks that we have at least 2 frag pages free in the first extent of a
+single-table tablespace, and they are also physically initialized to the data
+file. That is we have already extended the data file so that those pages are
+inside the data file. If not, this function extends the tablespace with
+pages. */
+static
+ibool
+fsp_reserve_free_pages(
+/*===================*/
+					/* out: TRUE if there were >= 3 free
+					pages, or we were able to extend */
+        ulint		space,		/* in: space id, must be != 0 */
+	fsp_header_t*	space_header,	/* in: header of that space,
+					x-latched */
+	ulint		size,		/* in: size of the tablespace in pages,
+					must be < FSP_EXTENT_SIZE / 2 */
+	mtr_t*		mtr)		/* in: mtr */
+{
+	xdes_t*	descr;
+	ulint	n_used;
+
+	ut_a(space != 0);
+	ut_a(size < FSP_EXTENT_SIZE / 2);
+
+	descr = xdes_get_descriptor_with_space_hdr(space_header, space, 0,
+									mtr);
+	n_used = xdes_get_n_used(descr, mtr);
+
+	ut_a(n_used <= size);
+
+	if (size >= n_used + 2) {
+
+		return(TRUE);
+	}
+
+	return(fsp_try_extend_data_file_with_pages(space, n_used + 1,
+							  space_header, mtr));
+}
+
+/**************************************************************************
 Reserves free pages from a tablespace. All mini-transactions which may
 use several pages from the tablespace should call this function beforehand
 and reserve enough free extents so that they certainly will be able
@@ -2438,12 +2689,21 @@ two types of allocation: when space is scarce, FSP_NORMAL allocations
 will not succeed, but the latter two allocations will succeed, if possible.
 The purpose is to avoid dead end where the database is full but the
 user cannot free any space because these freeing operations temporarily
-reserve some space. */ 
+reserve some space.
+
+Single-table tablespaces whose size is < 32 pages are a special case. In this
+function we would liberally reserve several 64 page extents for every page
+split or merge in a B-tree. But we do not want to waste disk space if the table
+only occupies < 32 pages. That is why we apply different rules in that special
+case, just ensuring that there are 3 free pages available. */
 
 ibool
 fsp_reserve_free_extents(
 /*=====================*/
 			/* out: TRUE if we were able to make the reservation */
+	ulint*	n_reserved,/* out: number of extents actually reserved; if we
+			return TRUE and the tablespace size is < 64 pages,
+			then this can be 0, otherwise it is n_ext */
 	ulint	space,	/* in: space id */
 	ulint	n_ext,	/* in: number of extents to reserve */
 	ulint	alloc_type,/* in: FSP_NORMAL, FSP_UNDO, or FSP_CLEANING */
@@ -2466,6 +2726,8 @@ fsp_reserve_free_extents(
 	      || mtr_memo_contains(mtr, fil_space_get_latch(space),
 							MTR_MEMO_X_LOCK));
 #endif /* UNIV_SYNC_DEBUG */
+	*n_reserved = n_ext;
+
 	latch = fil_space_get_latch(space);
 
 	mtr_x_lock(latch, mtr);
@@ -2474,6 +2736,12 @@ fsp_reserve_free_extents(
 try_again:
 	size = mtr_read_ulint(space_header + FSP_SIZE, MLOG_4BYTES, mtr);
 	
+	if (size < FSP_EXTENT_SIZE / 2) {
+		/* Use different rules for small single-table tablespaces */
+		*n_reserved = 0;
+		return(fsp_reserve_free_pages(space, space_header, size, mtr));
+	}
+
 	n_free_list_ext = flst_get_len(space_header + FSP_FREE, mtr);
 	
 	free_limit = mtr_read_ulint(space_header + FSP_FREE_LIMIT,
@@ -2523,7 +2791,7 @@ try_again:
 		return(TRUE);
 	}
 try_to_extend:
-	success = fsp_try_extend_last_file(&n_pages_added, space,
+	success = fsp_try_extend_data_file(&n_pages_added, space,
 							space_header, mtr);
 	if (success && n_pages_added > 0) {
 
@@ -2574,6 +2842,13 @@ fsp_get_available_space_in_free_extents(
 							MLOG_4BYTES, &mtr);
 	mtr_commit(&mtr);
 
+	if (size < FSP_EXTENT_SIZE) {
+	        ut_a(space != 0);   /* This must be a single-table
+				    tablespace */
+		return(0);	    /* TODO: count free frag pages and return
+				    a value based on that */
+	}
+	
 	/* Below we play safe when counting free extents above the free limit:
 	some of them will contain extent descriptor pages, and therefore
 	will not be free extents */
@@ -2671,14 +2946,10 @@ fseg_free_page_low(
 	xdes_t*	descr;
 	ulint	not_full_n_used;
 	ulint	state;
+	dulint	descr_id;
+	dulint	seg_id;
 	ulint	i;
-    char	errbuf[200];
-
-#ifdef __WIN__
-    dulint desm;
-    dulint segm;
-#endif
-
+	char	errbuf[200];
 	
 	ut_ad(seg_inode && mtr);
 	ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N) ==
@@ -2703,7 +2974,7 @@ fseg_free_page_low(
 "InnoDB: though it is already marked as free in the tablespace!\n"
 "InnoDB: The tablespace free space info is corrupt.\n"
 "InnoDB: You may need to dump your InnoDB tables and recreate the whole\n"
-"InnoDB: database!\n", page);
+"InnoDB: database!\n", (ulong) page);
 
 		fprintf(stderr,
 "InnoDB: If the InnoDB recovery crashes here, see section 6.1\n"
@@ -2731,26 +3002,22 @@ fseg_free_page_low(
 		return;
 	}
 
+	/* If we get here, the page is in some extent of the segment */	
+
+	descr_id = mtr_read_dulint(descr + XDES_ID, MLOG_8BYTES, mtr);
+	seg_id = mtr_read_dulint(seg_inode + FSEG_ID, MLOG_8BYTES, mtr);
 /*
 	fprintf(stderr,
 "InnoDB: InnoDB is freeing space %lu page %lu,\n"
 "InnoDB: which belongs to descr seg %lu %lu\n"
 "InnoDB: segment %lu %lu.\n",
 		   space, page,
-		   ut_dulint_get_high(
-			mtr_read_dulint(descr + XDES_ID, mtr)),
-		   ut_dulint_get_low(
-			mtr_read_dulint(descr + XDES_ID, mtr)),
-		   ut_dulint_get_high(
-		     mtr_read_dulint(seg_inode + FSEG_ID, mtr)),
-		   ut_dulint_get_low(
-		     mtr_read_dulint(seg_inode + FSEG_ID, mtr)));
+		   ut_dulint_get_high(descr_id),
+		   ut_dulint_get_low(descr_id),
+		   ut_dulint_get_high(seg_id),
+		   ut_dulint_get_low(seg_id));
 */
-	/* If we get here, the page is in some extent of the segment */	
-	if (0 != ut_dulint_cmp(
-		mtr_read_dulint(descr + XDES_ID, mtr),
-		mtr_read_dulint(seg_inode + FSEG_ID, mtr))) {
-
+	if (0 != ut_dulint_cmp(descr_id, seg_id)) {
 		ut_sprintf_buf(errbuf, descr, 40);
 		fprintf(stderr,
 "InnoDB: Dump of the tablespace extent descriptor: %s\n", errbuf);
@@ -2758,42 +3025,15 @@ fseg_free_page_low(
 		fprintf(stderr,
 "InnoDB: Dump of the segment inode: %s\n", errbuf);
 
-
-#ifndef __WIN__
-
-			fprintf(stderr,
-"InnoDB: Serious error: InnoDB is trying to free space %lu page %lu,\n"
-"InnoDB: which does not belong to segment %lu %lu but belongs\n"
-"InnoDB: to segment %lu %lu.\n",
-		   space, page,
-		   ut_dulint_get_high(
-			mtr_read_dulint(descr + XDES_ID, mtr)),
-		   ut_dulint_get_low(
-			mtr_read_dulint(descr + XDES_ID, mtr)),
-		   ut_dulint_get_high(
-		     mtr_read_dulint(seg_inode + FSEG_ID, mtr)),
-		   ut_dulint_get_low(
-		     mtr_read_dulint(seg_inode + FSEG_ID, mtr)));
-
-#else
-
-/* More pedantic usage to avoid VC++ 6.0 compiler errors due to inline
-     function expansion issues */
-
-			desm = mtr_read_dulint(descr + XDES_ID, mtr);
-			segm = mtr_read_dulint(seg_inode + FSEG_ID, mtr);
-
-            fprintf(stderr,
+	        fprintf(stderr,
 "InnoDB: Serious error: InnoDB is trying to free space %lu page %lu,\n"
 "InnoDB: which does not belong to segment %lu %lu but belongs\n"
 "InnoDB: to segment %lu %lu.\n",
-		   space, page,
-		   ut_dulint_get_high(desm),
-		   ut_dulint_get_low(desm),
-		   ut_dulint_get_high(segm),
-		   ut_dulint_get_low(segm));
-
-#endif
+		   (ulong) space, (ulong) page,
+		   (ulong) ut_dulint_get_high(descr_id),
+		   (ulong) ut_dulint_get_low(descr_id),
+		   (ulong) ut_dulint_get_high(seg_id),
+		   (ulong) ut_dulint_get_low(seg_id));
 
 		fprintf(stderr,
 "InnoDB: If the InnoDB recovery crashes here, see section 6.1\n"
@@ -3313,11 +3553,13 @@ fseg_print_low(
 
 	printf(
     "SEGMENT id %lu %lu space %lu; page %lu; res %lu used %lu; full ext %lu\n",
-		seg_id_high, seg_id_low, space, page_no, reserved, used,
-		n_full);
+		(ulong) seg_id_high, (ulong) seg_id_low, (ulong) space,
+                (ulong) page_no, (ulong) reserved, (ulong) used,
+                (ulong) n_full);
 	printf(
     "fragm pages %lu; free extents %lu; not full extents %lu: pages %lu\n",
-		n_frag, n_free, n_not_full, n_used);
+		(ulong) n_frag, (ulong) n_free, (ulong) n_not_full,
+                (ulong) n_used);
 }
 
 /***********************************************************************
@@ -3388,7 +3630,7 @@ fsp_validate(
 	n_full_frag_pages = FSP_EXTENT_SIZE *
 				flst_get_len(header + FSP_FULL_FRAG, &mtr);
 					
-	ut_a(free_limit <= size);
+	ut_a(free_limit <= size || (space != 0 && size < FSP_EXTENT_SIZE));
 					
 	flst_validate(header + FSP_FREE, &mtr);
 	flst_validate(header + FSP_FREE_FRAG, &mtr);
@@ -3620,15 +3862,16 @@ fsp_print(
 	seg_id_low = ut_dulint_get_low(d_var);
 	seg_id_high = ut_dulint_get_high(d_var);
 
-	printf("FILE SPACE INFO: id %lu\n", space);
+	printf("FILE SPACE INFO: id %lu\n", (ulong) space);
 
 	printf("size %lu, free limit %lu, free extents %lu\n",
-						size, free_limit, n_free);
+	       (ulong) size, (ulong) free_limit, (ulong) n_free);
 	printf(
 	"not full frag extents %lu: used pages %lu, full frag extents %lu\n",
-					n_free_frag, frag_n_used, n_full_frag);
+		(ulong) n_free_frag, (ulong) frag_n_used, (ulong) n_full_frag);
 
-	printf("first seg id not used %lu %lu\n", seg_id_high, seg_id_low);
+	printf("first seg id not used %lu %lu\n", (ulong) seg_id_high,
+	       (ulong) seg_id_low);
 
 	mtr_commit(&mtr);	
 
@@ -3707,5 +3950,5 @@ fsp_print(
 	
 	mtr_commit(&mtr2);
 
-	printf("NUMBER of file segments: %lu\n", n_segs);	
+	printf("NUMBER of file segments: %lu\n", (ulong) n_segs);
 }
diff --git a/innobase/fut/fut0lst.c b/innobase/fut/fut0lst.c
index 4328fc97b33..79830c36eb5 100644
--- a/innobase/fut/fut0lst.c
+++ b/innobase/fut/fut0lst.c
@@ -511,6 +511,7 @@ flst_print(
 
 	printf("FILE-BASED LIST:\n");
 	printf("Base node in space %lu page %lu byte offset %lu; len %lu\n",
-		buf_frame_get_space_id(frame), buf_frame_get_page_no(frame),
-		(ulint) (base - frame), len);
+	       (ulong) buf_frame_get_space_id(frame),
+	       (ulong) buf_frame_get_page_no(frame),
+	       (ulong) (base - frame), (ulong) len);
 }
diff --git a/innobase/ha/ha0ha.c b/innobase/ha/ha0ha.c
index ad833312963..5e807406ce0 100644
--- a/innobase/ha/ha0ha.c
+++ b/innobase/ha/ha0ha.c
@@ -34,6 +34,12 @@ ha_create(
 
 	table = hash_create(n);
 
+	if (in_btr_search) {
+		table->adaptive = TRUE;
+	} else {
+		table->adaptive = FALSE;
+	}
+
 	if (n_mutexes == 0) {
 		if (in_btr_search) {
 			table->heap = mem_heap_create_in_btr_search(4096);
@@ -79,6 +85,7 @@ ha_insert_for_fold(
 	hash_cell_t*	cell;
 	ha_node_t*	node;
 	ha_node_t*	prev_node;
+	buf_block_t*	prev_block;
 	ulint		hash;
 
 	ut_ad(table && data);
@@ -93,6 +100,12 @@ ha_insert_for_fold(
 
 	while (prev_node != NULL) {
 		if (prev_node->fold == fold) {
+			if (table->adaptive) {
+				prev_block = buf_block_align(prev_node->data);
+				ut_a(prev_block->n_pointers > 0);
+				prev_block->n_pointers--;
+				buf_block_align(data)->n_pointers++;
+			}
 
 			prev_node->data = data;
 
@@ -116,6 +129,11 @@ ha_insert_for_fold(
 	}
 	
 	ha_node_set_data(node, data);
+
+	if (table->adaptive) {
+		buf_block_align(data)->n_pointers++;
+	}
+
 	node->fold = fold;
 
 	node->next = NULL;
@@ -148,6 +166,11 @@ ha_delete_hash_node(
 	hash_table_t*	table,		/* in: hash table */
 	ha_node_t*	del_node)	/* in: node to be deleted */
 {
+	if (table->adaptive) {
+		ut_a(buf_block_align(del_node->data)->n_pointers > 0);
+		buf_block_align(del_node->data)->n_pointers--;
+	}
+
 	HASH_DELETE_AND_COMPACT(ha_node_t, next, table, del_node);
 }
 
@@ -174,6 +197,35 @@ ha_delete(
 	ha_delete_hash_node(table, node);
 }	
 
+/*************************************************************
+Looks for an element when we know the pointer to the data, and updates
+the pointer to data, if found. */
+
+void
+ha_search_and_update_if_found(
+/*==========================*/
+	hash_table_t*	table,	/* in: hash table */
+	ulint		fold,	/* in: folded value of the searched data */
+	void*		data,	/* in: pointer to the data */
+	void*		new_data)/* in: new pointer to the data */
+{
+	ha_node_t*	node;
+
+	ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
+
+	node = ha_search_with_data(table, fold, data);
+
+	if (node) {
+		if (table->adaptive) {
+			ut_a(buf_block_align(node->data)->n_pointers > 0);
+			buf_block_align(node->data)->n_pointers--;
+			buf_block_align(new_data)->n_pointers++;
+		}
+
+		node->data = new_data;
+	}
+}
+
 /*********************************************************************
 Removes from the chain determined by fold all nodes whose data pointer
 points to the page given. */
@@ -205,10 +257,10 @@ ha_remove_all_nodes_to_page(
 
 			node = ha_chain_get_first(table, fold);
 		} else {
-			node = ha_chain_get_next(table, node);
+			node = ha_chain_get_next(node);
 		}
 	}
-
+#ifdef UNIV_DEBUG
 	/* Check that all nodes really got deleted */
 	
 	node = ha_chain_get_first(table, fold);
@@ -216,8 +268,9 @@ ha_remove_all_nodes_to_page(
 	while (node) {
 		ut_a(buf_frame_align(ha_node_get_data(node)) != page);
 
-		node = ha_chain_get_next(table, node);
+		node = ha_chain_get_next(node);
 	}
+#endif
 }
 
 /*****************************************************************
@@ -246,7 +299,7 @@ ha_validate(
 				fprintf(stderr,
 "InnoDB: Error: hash table node fold value %lu does not\n"
 "InnoDB: match with the cell number %lu.\n",
-					node->fold, i);
+					(ulong) node->fold, (ulong) i);
 
 				ok = FALSE;
 			}
@@ -269,12 +322,10 @@ ha_print_info(
 	hash_table_t*	table)	/* in: hash table */
 {
 	hash_cell_t*	cell;
-/*
-	ha_node_t*	node;
-	ulint		len	= 0;
-	ulint		max_len	= 0;
+/*	ha_node_t*	node;
 	ulint		nodes	= 0;
-*/
+	ulint		len	= 0;
+	ulint		max_len	= 0; */
 	ulint		cells	= 0;
 	ulint		n_bufs;
 	ulint		i;
@@ -315,7 +366,8 @@ ha_print_info(
 	}
 
 	buf += sprintf(buf,
-"Hash table size %lu, used cells %lu", hash_get_n_cells(table), cells);
+"Hash table size %lu, used cells %lu", (ulong) hash_get_n_cells(table),
+		       (ulong) cells);
 
 	if (table->heaps == NULL && table->heap != NULL) {
 
@@ -328,6 +380,6 @@ ha_print_info(
 			n_bufs++;
 		}
 				
-	        buf += sprintf(buf, ", node heap has %lu buffer(s)\n", n_bufs);
+	        buf += sprintf(buf, ", node heap has %lu buffer(s)\n", (ulong) n_bufs);
 	}
 }	
diff --git a/innobase/ha/hash0hash.c b/innobase/ha/hash0hash.c
index 808aa88da3d..372104e54b3 100644
--- a/innobase/ha/hash0hash.c
+++ b/innobase/ha/hash0hash.c
@@ -61,6 +61,7 @@ hash_create(
 
 	array = ut_malloc(sizeof(hash_cell_t) * prime);
 	
+	table->adaptive = FALSE;
 	table->array = array;
 	table->n_cells = prime;
 	table->n_mutexes = 0;
diff --git a/innobase/ibuf/ibuf0ibuf.c b/innobase/ibuf/ibuf0ibuf.c
index f2c631d88cd..42ca34e7f10 100644
--- a/innobase/ibuf/ibuf0ibuf.c
+++ b/innobase/ibuf/ibuf0ibuf.c
@@ -29,6 +29,35 @@ Created 7/19/1997 Heikki Tuuri
 #include "log0recv.h"
 #include "que0que.h"
 
+/*      STRUCTURE OF AN INSERT BUFFER RECORD
+
+In versions < 4.1.x:
+
+1. The first field is the page number.
+2. The second field is an array which stores type info for each subsequent
+   field. We store the information which affects the ordering of records, and
+   also the physical storage size of an SQL NULL value. E.g., for CHAR(10) it
+   is 10 bytes.
+3. Next we have the fields of the actual index record.
+
+In versions >= 4.1.x:
+
+Note that contary to what we planned in the 1990's, there will only be one
+insert buffer tree, and that is in the system tablespace of InnoDB.
+
+1. The first field is the space id.
+2. The second field is a one-byte marker which differentiates records from
+   the < 4.1.x storage format.
+3. The third field is the page number.
+4. The fourth field contains the type info, where we have also added 2 bytes to
+   store the charset. In the compressed table format of 5.0.x we must add more
+   information here so that we can build a dummy 'index' struct which 5.0.x
+   can use in the binary search on the index page in the ibuf merge phase.
+5. The rest of the fields contain the fields of the actual index record.
+
+*/
+
+
 /*	PREVENTING DEADLOCKS IN THE INSERT BUFFER SYSTEM
 
 If an OS thread performs any operation that brings in disk pages from
@@ -45,20 +74,20 @@ because they own x-latches to pages which are on a lower level than the
 insert buffer tree latch, its page latches, and the tablespace latch an
 insert buffer operation can reserve.
 
-The solution is the following: We put into each tablespace an insert buffer
-of its own. Let all the tree and page latches connected with the insert buffer
-be later in the latching order than the fsp latch and fsp page latches.
+The solution is the following: Let all the tree and page latches connected
+with the insert buffer be later in the latching order than the fsp latch and
+fsp page latches.
+
 Insert buffer pages must be such that the insert buffer is never invoked
 when these pages are accessed as this would result in a recursion violating
 the latching order. We let a special i/o-handler thread take care of i/o to
 the insert buffer pages and the ibuf bitmap pages, as well as the fsp bitmap
 pages and the first inode page, which contains the inode of the ibuf tree: let
-us call all these ibuf pages. If the OS does not support asynchronous i/o,
-then there is no special i/o thread, but to prevent deadlocks, we do not let a
-read-ahead access both non-ibuf and ibuf pages.
+us call all these ibuf pages. To prevent deadlocks, we do not let a read-ahead
+access both non-ibuf and ibuf pages.
 
-Then an i/o-handler for the insert buffer never needs to access the insert
-buffer tree and thus obeys the latching order. On the other hand, other
+Then an i/o-handler for the insert buffer never needs to access recursively the
+insert buffer tree and thus obeys the latching order. On the other hand, other
 i/o-handlers for other tablespaces may require access to the insert buffer,
 but because all kinds of latches they need to access there are later in the
 latching order, no violation of the latching order occurs in this case,
@@ -95,8 +124,8 @@ the B-tree non-leaf pages if it has latches on lower level pages. Read-ahead
 is only allowed for level 1 and 2 pages. Dedicated i/o-handler threads handle
 exclusively level 1 i/o. A dedicated i/o handler thread handles exclusively
 level 2 i/o. However, if an OS thread does the i/o handling for itself, i.e.,
-it uses synchronous aio or the OS does not support aio, it can access any
-pages, as long as it obeys the access order rules. */
+it uses synchronous aio, it can access any pages, as long as it obeys the
+access order rules. */
 
 /* Buffer pool size per the maximum insert buffer size */
 #define IBUF_POOL_SIZE_PER_MAX_SIZE	2
@@ -109,8 +138,8 @@ ulint	ibuf_rnd = 986058871;
 ulint	ibuf_flush_count	= 0;
 
 /* Dimensions for the ibuf_count array */
-#define IBUF_COUNT_N_SPACES	10
-#define IBUF_COUNT_N_PAGES	10000
+#define IBUF_COUNT_N_SPACES	500
+#define IBUF_COUNT_N_PAGES	2000
 
 /* Buffered entry counts for file pages, used in debugging */
 ulint*	ibuf_counts[IBUF_COUNT_N_SPACES];
@@ -235,6 +264,8 @@ ibuf_header_page_get(
 {
 	page_t*	page;
 
+	ut_a(space == 0);
+
 	ut_ad(!ibuf_inside());
 
 	page = buf_page_get(space, FSP_IBUF_HEADER_PAGE_NO, RW_X_LATCH, mtr);
@@ -259,6 +290,7 @@ ibuf_tree_root_get(
 {
 	page_t*	page;
 
+	ut_a(space == 0);
 	ut_ad(ibuf_inside());
 
 	mtr_x_lock(dict_tree_get_lock((data->index)->tree), mtr);
@@ -271,7 +303,7 @@ ibuf_tree_root_get(
 
 	return(page);
 }
-	
+
 /**********************************************************************
 Gets the ibuf count for a given page. */
 
@@ -294,9 +326,9 @@ ibuf_count_get(
 	return(*(ibuf_counts[space] + page_no));
 }
 
+#ifdef UNIV_IBUF_DEBUG
 /**********************************************************************
 Sets the ibuf count for a given page. */
-#ifdef UNIV_IBUF_DEBUG
 static
 void
 ibuf_count_set(
@@ -305,17 +337,17 @@ ibuf_count_set(
 	ulint	page_no,/* in: page number */
 	ulint	val)	/* in: value to set */
 {
-	ut_ad(space < IBUF_COUNT_N_SPACES);
-	ut_ad(page_no < IBUF_COUNT_N_PAGES);
-	ut_ad(val < UNIV_PAGE_SIZE);
+	ut_a(space < IBUF_COUNT_N_SPACES);
+	ut_a(page_no < IBUF_COUNT_N_PAGES);
+	ut_a(val < UNIV_PAGE_SIZE);
 
 	*(ibuf_counts[space] + page_no) = val;
 }
 #endif
 
 /**********************************************************************
-Creates the insert buffer data structure at a database startup and
-initializes the data structures for the insert buffer of each tablespace. */
+Creates the insert buffer data structure at a database startup and initializes
+the data structures for the insert buffer. */
 
 void
 ibuf_init_at_db_start(void)
@@ -407,19 +439,19 @@ ibuf_data_sizes_update(
 
 /*	printf("ibuf size %lu, space ibuf size %lu\n", ibuf->size,
 							data->size); */
-}	
+}
 
 /**********************************************************************
 Creates the insert buffer data struct for a single tablespace. Reads the
 root page of the insert buffer tree in the tablespace. This function can
 be called only after the dictionary system has been initialized, as this
-creates also the insert buffer table and index for this tablespace. */
+creates also the insert buffer table and index into this tablespace. */
 
 ibuf_data_t*
 ibuf_data_init_for_space(
 /*=====================*/
 			/* out, own: ibuf data struct, linked to the list
-			in ibuf control structure. */
+			in ibuf control structure */
 	ulint	space)	/* in: space id */
 {
 	ibuf_data_t*	data;
@@ -431,6 +463,8 @@ ibuf_data_init_for_space(
 	dict_index_t*	index;
 	ulint		n_used;
 	
+	ut_a(space == 0);
+
 #ifdef UNIV_LOG_DEBUG
 	if (space % 2 == 1) {
 
@@ -471,14 +505,22 @@ ibuf_data_init_for_space(
 	data->n_merged_recs = 0;
 	
 	ibuf_data_sizes_update(data, root, &mtr);
-
+/*
+	if (!data->empty) {
+		fprintf(stderr,
+"InnoDB: index entries found in the insert buffer\n");
+	} else {
+		fprintf(stderr,
+"InnoDB: insert buffer empty\n");
+	}
+*/
 	mutex_exit(&ibuf_mutex);
 
 	mtr_commit(&mtr);
 
 	ibuf_exit();
 
-	sprintf(buf, "SYS_IBUF_TABLE_%lu", space);
+	sprintf(buf, "SYS_IBUF_TABLE_%lu", (ulong) space);
 	
 	table = dict_mem_table_create(buf, space, 2);
 
@@ -684,7 +726,7 @@ ibuf_bitmap_get_map_page(
 	mtr_t*	mtr)	/* in: mtr */
 {
 	page_t*	page;
-	
+
 	page = buf_page_get(space, ibuf_bitmap_page_no_calc(page_no),
 							RW_X_LATCH, mtr);
 #ifdef UNIV_SYNC_DEBUG
@@ -897,7 +939,7 @@ UNIV_INLINE
 ibool
 ibuf_fixed_addr_page(
 /*=================*/
-			/* out: TRUE if a fixed address ibuf i/o page */	
+			/* out: TRUE if a fixed address ibuf i/o page */
 	ulint	page_no)/* in: page number */
 {
 	if ((ibuf_bitmap_page(page_no))
@@ -934,6 +976,12 @@ ibuf_page(
 		return(TRUE);
 	}
 
+	if (space != 0) {
+		/* Currently we only have an ibuf tree in space 0 */
+
+		return(FALSE);
+	}
+
 	ut_ad(fil_space_get_type(space) == FIL_TABLESPACE);
 
 	mtr_start(&mtr);
@@ -998,14 +1046,60 @@ ibuf_rec_get_page_no(
 	ut_ad(ibuf_inside());
 	ut_ad(rec_get_n_fields(rec) > 2);
 
-	field = rec_get_nth_field(rec, 0, &len);
+	field = rec_get_nth_field(rec, 1, &len);
 
-	ut_ad(len == 4);
+	if (len == 1) {
+		/* This is of the >= 4.1.x record format */
+		ut_a(trx_sys_multiple_tablespace_format);
+
+		field = rec_get_nth_field(rec, 2, &len);
+	} else {
+		ut_a(trx_doublewrite_must_reset_space_ids);
+		ut_a(!trx_sys_multiple_tablespace_format);
+
+	        field = rec_get_nth_field(rec, 0, &len);
+	}
+
+	ut_a(len == 4);
 
 	return(mach_read_from_4(field));
 }
 
 /************************************************************************
+Returns the space id field of an ibuf record. For < 4.1.x format records
+returns 0. */
+static
+ulint
+ibuf_rec_get_space(
+/*===============*/
+			/* out: space id */
+	rec_t*	rec)	/* in: ibuf record */
+{
+	byte*	field;
+	ulint	len;
+
+	ut_ad(ibuf_inside());
+	ut_ad(rec_get_n_fields(rec) > 2);
+
+	field = rec_get_nth_field(rec, 1, &len);
+
+	if (len == 1) {
+		/* This is of the >= 4.1.x record format */
+
+		ut_a(trx_sys_multiple_tablespace_format);
+		field = rec_get_nth_field(rec, 0, &len);
+		ut_a(len == 4);
+
+		return(mach_read_from_4(field));
+	}
+
+	ut_a(trx_doublewrite_must_reset_space_ids);
+	ut_a(!trx_sys_multiple_tablespace_format);
+
+	return(0);
+}
+
+/************************************************************************
 Returns the space taken by a stored non-clustered index entry if converted to
 an index record. */
 static
@@ -1017,6 +1111,7 @@ ibuf_rec_get_volume(
 	rec_t*	ibuf_rec)/* in: ibuf record */
 {
 	dtype_t	dtype;
+	ibool	new_format	= FALSE;
 	ulint	data_size	= 0;
 	ulint	n_fields;
 	byte*	types;
@@ -1027,17 +1122,42 @@ ibuf_rec_get_volume(
 	ut_ad(ibuf_inside());
 	ut_ad(rec_get_n_fields(ibuf_rec) > 2);
 	
-	n_fields = rec_get_n_fields(ibuf_rec) - 2;
+	data = rec_get_nth_field(ibuf_rec, 1, &len);
 
-	types = rec_get_nth_field(ibuf_rec, 1, &len);
+	if (len > 1) {
+	        /* < 4.1.x format record */
 
-	ut_ad(len == n_fields * DATA_ORDER_NULL_TYPE_BUF_SIZE);
+		ut_a(trx_doublewrite_must_reset_space_ids);
+		ut_a(!trx_sys_multiple_tablespace_format);
+
+		n_fields = rec_get_n_fields(ibuf_rec) - 2;
+
+		types = rec_get_nth_field(ibuf_rec, 1, &len);
+
+		ut_ad(len == n_fields * DATA_ORDER_NULL_TYPE_BUF_SIZE);
+	} else {
+	        /* >= 4.1.x format record */
+
+		ut_a(trx_sys_multiple_tablespace_format);
+		new_format = TRUE;
+
+		n_fields = rec_get_n_fields(ibuf_rec) - 4;
+
+		types = rec_get_nth_field(ibuf_rec, 3, &len);
+	}
 
 	for (i = 0; i < n_fields; i++) {
-		data = rec_get_nth_field(ibuf_rec, i + 2, &len);
+		if (new_format) {
+		        data = rec_get_nth_field(ibuf_rec, i + 4, &len);
+
+			dtype_new_read_for_order_and_null_size(&dtype,
+			       types + i * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE);
+		} else {
+		        data = rec_get_nth_field(ibuf_rec, i + 2, &len);
 
-		dtype_read_for_order_and_null_size(&dtype,
+			dtype_read_for_order_and_null_size(&dtype,
 				   types + i * DATA_ORDER_NULL_TYPE_BUF_SIZE);
+		}
 
 		if (len == UNIV_SQL_NULL) {
 			data_size += dtype_get_sql_null_size(&dtype);
@@ -1062,6 +1182,7 @@ ibuf_entry_build(
 				must be kept because we copy pointers to its
 				fields */
 	dtuple_t*	entry,	/* in: entry for a non-clustered index */
+	ulint		space,	/* in: space id */
 	ulint		page_no,/* in: index page number where entry should
 				be inserted */
 	mem_heap_t*	heap)	/* in: heap into which to build */
@@ -1074,49 +1195,79 @@ ibuf_entry_build(
 	byte*		buf2;
 	ulint		i;
 	
-	/* We have to build a tuple whose first field is the page number,
-	the second field contains the original type information for entry,
-	and the rest of the fields are copied from entry. All fields
-	in the tuple are of the type binary. */
+	/* Starting from 4.1.x, we have to build a tuple whose
+	(1) first field is the space id,
+	(2) the second field a single marker byte to tell that this
+	is a new format record,
+	(3) the third contains the page number, and
+	(4) the fourth contains the relevent type information of each data
+	field,
+	(5) and the rest of the fields are copied from entry. All fields
+	in the tuple are ordered like the type binary in our insert buffer
+	tree. */
 
 	n_fields = dtuple_get_n_fields(entry);
 
-	tuple = dtuple_create(heap, n_fields + 2);
+	tuple = dtuple_create(heap, n_fields + 4);
 
-	/* Store the page number in tuple */
+	/* Store the space id in tuple */
 
 	field = dtuple_get_nth_field(tuple, 0);
 
 	buf = mem_heap_alloc(heap, 4);
 
-	mach_write_to_4(buf, page_no);
+	mach_write_to_4(buf, space);
 
 	dfield_set_data(field, buf, 4);
 
-	/* Store the type info in tuple */
+	/* Store the marker byte field in tuple */
 
-	buf2 = mem_heap_alloc(heap, n_fields * DATA_ORDER_NULL_TYPE_BUF_SIZE);
+	field = dtuple_get_nth_field(tuple, 1);
 
-	for (i = 0; i < n_fields; i++) {
+	buf = mem_heap_alloc(heap, 1);
 
-		field = dtuple_get_nth_field(tuple, i + 2);
+	/* We set the marker byte zero */
 
-		entry_field = dtuple_get_nth_field(entry, i);
+	mach_write_to_1(buf, 0);
+
+	dfield_set_data(field, buf, 1);
+
+	/* Store the page number in tuple */
+
+	field = dtuple_get_nth_field(tuple, 2);
 
+	buf = mem_heap_alloc(heap, 4);
+
+	mach_write_to_4(buf, page_no);
+
+	dfield_set_data(field, buf, 4);
+
+	/* Store the type info in buf2, and add the fields from entry to
+	tuple */
+	buf2 = mem_heap_alloc(heap, n_fields
+					* DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE);
+	for (i = 0; i < n_fields; i++) {
+		/* We add 4 below because we have the 4 extra fields at the
+		start of an ibuf record */
+
+		field = dtuple_get_nth_field(tuple, i + 4);
+		entry_field = dtuple_get_nth_field(entry, i);
 		dfield_copy(field, entry_field);
 
-		dtype_store_for_order_and_null_size(
-				buf2 + i * DATA_ORDER_NULL_TYPE_BUF_SIZE,
+		dtype_new_store_for_order_and_null_size(
+				buf2 + i * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE,
 				dfield_get_type(entry_field));
 	}
 
-	field = dtuple_get_nth_field(tuple, 1);
+	/* Store the type info in buf2 to field 3 of tuple */
 
-	dfield_set_data(field, buf2, n_fields * DATA_ORDER_NULL_TYPE_BUF_SIZE);
+	field = dtuple_get_nth_field(tuple, 3);
 
-	/* Set the types in the new tuple binary */
+	dfield_set_data(field, buf2, n_fields
+					* DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE);
+	/* Set all the types in the new tuple binary */
 
-	dtuple_set_types_binary(tuple, n_fields + 2);
+	dtuple_set_types_binary(tuple, n_fields + 4);
 
 	return(tuple);
 }	
@@ -1145,35 +1296,73 @@ ibuf_build_entry_from_ibuf_rec(
 	ulint		len;
 	ulint		i;
 	
-	n_fields = rec_get_n_fields(ibuf_rec) - 2;
+	data = rec_get_nth_field(ibuf_rec, 1, &len);
+
+	if (len > 1) {
+	        /* This a < 4.1.x format record */
+
+		ut_a(trx_doublewrite_must_reset_space_ids);
+		ut_a(!trx_sys_multiple_tablespace_format);
+
+		n_fields = rec_get_n_fields(ibuf_rec) - 2;
+		tuple = dtuple_create(heap, n_fields);
+		types = rec_get_nth_field(ibuf_rec, 1, &len);
+
+		ut_a(len == n_fields * DATA_ORDER_NULL_TYPE_BUF_SIZE);
+
+		for (i = 0; i < n_fields; i++) {
+		        field = dtuple_get_nth_field(tuple, i);
+
+			data = rec_get_nth_field(ibuf_rec, i + 2, &len);
+
+			dfield_set_data(field, data, len);
+
+			dtype_read_for_order_and_null_size(
+				   dfield_get_type(field),
+				   types + i * DATA_ORDER_NULL_TYPE_BUF_SIZE);
+		}
+
+		return(tuple);
+	}
+
+	/* This a >= 4.1.x format record */
+
+	ut_a(trx_sys_multiple_tablespace_format);
+
+	ut_a(rec_get_n_fields(ibuf_rec) > 4);
+
+	n_fields = rec_get_n_fields(ibuf_rec) - 4;
 
 	tuple = dtuple_create(heap, n_fields);
 
-	types = rec_get_nth_field(ibuf_rec, 1, &len);
+	types = rec_get_nth_field(ibuf_rec, 3, &len);
 
-	ut_ad(len == n_fields * DATA_ORDER_NULL_TYPE_BUF_SIZE);
+	ut_a(len == n_fields * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE);
 
 	for (i = 0; i < n_fields; i++) {
-		field = dtuple_get_nth_field(tuple, i);
+	        field = dtuple_get_nth_field(tuple, i);
 
-		data = rec_get_nth_field(ibuf_rec, i + 2, &len);
+		data = rec_get_nth_field(ibuf_rec, i + 4, &len);
 
 		dfield_set_data(field, data, len);
 
-		dtype_read_for_order_and_null_size(dfield_get_type(field),
-				   types + i * DATA_ORDER_NULL_TYPE_BUF_SIZE);
+		dtype_new_read_for_order_and_null_size(
+			dfield_get_type(field),
+			types + i * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE);
 	}
 
 	return(tuple);
 }
 
 /*************************************************************************
-Builds a search tuple used to search buffered inserts for an index page. */
+Builds a search tuple used to search buffered inserts for an index page.
+This is for < 4.1.x format records */
 static
 dtuple_t*
 ibuf_search_tuple_build(
 /*====================*/
 				/* out, own: search tuple */
+	ulint		space,	/* in: space id */
 	ulint		page_no,/* in: index page number */
 	mem_heap_t*	heap)	/* in: heap into which to build */
 {
@@ -1181,6 +1370,10 @@ ibuf_search_tuple_build(
 	dfield_t*	field;
 	byte*		buf;
 	
+	ut_a(space == 0);
+	ut_a(trx_doublewrite_must_reset_space_ids);
+	ut_a(!trx_sys_multiple_tablespace_format);
+
 	tuple = dtuple_create(heap, 1);
 
 	/* Store the page number in tuple */
@@ -1199,6 +1392,61 @@ ibuf_search_tuple_build(
 }
 
 /*************************************************************************
+Builds a search tuple used to search buffered inserts for an index page.
+This is for >= 4.1.x format records. */
+static
+dtuple_t*
+ibuf_new_search_tuple_build(
+/*========================*/
+				/* out, own: search tuple */
+	ulint		space,	/* in: space id */
+	ulint		page_no,/* in: index page number */
+	mem_heap_t*	heap)	/* in: heap into which to build */
+{
+	dtuple_t*	tuple;
+	dfield_t*	field;
+	byte*		buf;
+	
+	ut_a(trx_sys_multiple_tablespace_format);
+
+	tuple = dtuple_create(heap, 3);
+
+	/* Store the space id in tuple */
+
+	field = dtuple_get_nth_field(tuple, 0);
+
+	buf = mem_heap_alloc(heap, 4);
+
+	mach_write_to_4(buf, space);
+
+	dfield_set_data(field, buf, 4);
+
+	/* Store the new format record marker byte */
+
+	field = dtuple_get_nth_field(tuple, 1);
+
+	buf = mem_heap_alloc(heap, 1);
+
+	mach_write_to_1(buf, 0);
+
+	dfield_set_data(field, buf, 1);
+
+	/* Store the page number in tuple */
+
+	field = dtuple_get_nth_field(tuple, 2);
+
+	buf = mem_heap_alloc(heap, 4);
+
+	mach_write_to_4(buf, page_no);
+
+	dfield_set_data(field, buf, 4);
+
+	dtuple_set_types_binary(tuple, 3);
+
+	return(tuple);
+}
+
+/*************************************************************************
 Checks if there are enough pages in the free list of the ibuf tree that we
 dare to start a pessimistic insert to the insert buffer. */
 UNIV_INLINE
@@ -1267,6 +1515,8 @@ ibuf_add_free_page(
 	page_t*	root;
 	page_t*	bitmap_page;
 
+	ut_a(space == 0);
+
 	mtr_start(&mtr);
 
 	/* Acquire the fsp latch before the ibuf header, obeying the latching
@@ -1312,7 +1562,7 @@ ibuf_add_free_page(
 		      page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, &mtr);
 
 	fil_page_set_type(page, FIL_PAGE_IBUF_FREE_LIST);
-		      
+
 	ibuf_data->seg_size++;
 	ibuf_data->free_list_len++;
 
@@ -1323,7 +1573,6 @@ ibuf_add_free_page(
 
 	ibuf_bitmap_page_set_bits(bitmap_page, page_no, IBUF_BITMAP_IBUF,
 								TRUE, &mtr);
-
 	mtr_commit(&mtr);
 
 	mutex_exit(&ibuf_mutex);
@@ -1350,6 +1599,8 @@ ibuf_remove_free_page(
 	page_t*	root;
 	page_t*	bitmap_page;
 
+	ut_a(space == 0);
+
 	mtr_start(&mtr);
 
 	/* Acquire the fsp latch before the ibuf header, obeying the latching
@@ -1461,6 +1712,13 @@ ibuf_free_excess_pages(
 {
 	ibuf_data_t*	ibuf_data;
 	ulint		i;
+
+	if (space != 0) {
+	        fprintf(stderr,
+"InnoDB: Error: calling ibuf_free_excess_pages for space %lu\n", (ulong) space);
+		return;
+	}
+
 #ifdef UNIV_SYNC_DEBUG
 	ut_ad(rw_lock_own(fil_space_get_latch(space), RW_LOCK_EX));
 #endif /* UNIV_SYNC_DEBUG */
@@ -1515,8 +1773,12 @@ ibuf_get_merge_page_nos(
 				contract the tree, FALSE if this is called
 				when a single page becomes full and we look
 				if it pays to read also nearby pages */
-	rec_t*		first_rec,/* in: record from which we read down and
-				up in the chain of records */
+	rec_t*		first_rec,/* in: record from which we read up and down
+				in the chain of records */
+	ulint*		space_ids,/* in/out: space id's of the pages */
+	ib_longlong*	space_versions,/* in/out: tablespace version
+				timestamps; used to prevent reading in old
+				pages after DISCARD + IMPORT tablespace */
 	ulint*		page_nos,/* in/out: buffer for at least
 				IBUF_MAX_N_PAGES_MERGED many page numbers;
 				the page numbers are in an ascending order */
@@ -1524,8 +1786,11 @@ ibuf_get_merge_page_nos(
 				page_nos in this function */
 {
 	ulint	prev_page_no;
+	ulint	prev_space_id;
 	ulint	first_page_no;
+	ulint	first_space_id;
 	ulint	rec_page_no;
+	ulint	rec_space_id;
 	rec_t*	rec;
 	ulint	sum_volumes;
 	ulint	volume_for_page;
@@ -1557,49 +1822,70 @@ ibuf_get_merge_page_nos(
 
 	rec = first_rec;
 	first_page_no = ibuf_rec_get_page_no(first_rec);
+	first_space_id = ibuf_rec_get_space(first_rec);
 	n_pages = 0;
 	prev_page_no = 0;
+	prev_space_id = 0;
 	
+	/* Go backwards from the first_rec until we reach the border of the
+	'merge area', or the page start or the limit of storeable pages is
+	reached */
+
 	while ((rec != page_get_infimum_rec(page)) && (n_pages < limit)) {
 
 		rec_page_no = ibuf_rec_get_page_no(rec);
+		rec_space_id = ibuf_rec_get_space(rec);
 
-		ut_ad(rec_page_no != 0);
-
-		if (rec_page_no / IBUF_MERGE_AREA
-		    != first_page_no / IBUF_MERGE_AREA) {
+		if (rec_space_id != first_space_id
+		    || rec_page_no / IBUF_MERGE_AREA
+		       != first_page_no / IBUF_MERGE_AREA) {
 
 		    	break;
 		}
 		
-		if (rec_page_no != prev_page_no) {
+		if (rec_page_no != prev_page_no
+		    || rec_space_id != prev_space_id) {
 			n_pages++;
 		}
 
 		prev_page_no = rec_page_no;
+		prev_space_id = rec_space_id;
 
 		rec = page_rec_get_prev(rec);
 	}
 
 	rec = page_rec_get_next(rec);
 
+	/* At the loop start there is no prev page; we mark this with a pair
+	of space id, page no (0, 0) for which there can never be entries in
+	the insert buffer */
+
 	prev_page_no = 0;
+	prev_space_id = 0;
 	sum_volumes = 0;
 	volume_for_page = 0;
 	
 	while (*n_stored < limit) {
 		if (rec == page_get_supremum_rec(page)) {
+			/* When no more records available, mark this with
+			another 'impossible' pair of space id, page no */
 			rec_page_no = 1;
+			rec_space_id = 0;
 		} else {
 			rec_page_no = ibuf_rec_get_page_no(rec);
+			rec_space_id = ibuf_rec_get_space(rec);
 			ut_ad(rec_page_no > IBUF_TREE_ROOT_PAGE_NO);
 		}
 
 #ifdef UNIV_IBUF_DEBUG
 		ut_a(*n_stored < IBUF_MAX_N_PAGES_MERGED);
 #endif
-		if (rec_page_no != prev_page_no) {
-			if ((prev_page_no == first_page_no)
+		if ((rec_space_id != prev_space_id
+		     || rec_page_no != prev_page_no)
+                    && (prev_space_id != 0 || prev_page_no != 0)) {
+
+			if ((prev_page_no == first_page_no
+			     && prev_space_id == first_space_id)
 			    || contract
 			    || (volume_for_page >
 			     ((IBUF_MERGE_THRESHOLD - 1)
@@ -1607,6 +1893,10 @@ ibuf_get_merge_page_nos(
 				    / IBUF_PAGE_SIZE_PER_FREE_SPACE)
 			     / IBUF_MERGE_THRESHOLD)) {
 
+			        space_ids[*n_stored] = prev_space_id;
+				space_versions[*n_stored]
+						= fil_space_get_version(
+							prev_space_id);
 				page_nos[*n_stored] = prev_page_no;
 
 				(*n_stored)++;
@@ -1614,8 +1904,9 @@ ibuf_get_merge_page_nos(
 				sum_volumes += volume_for_page;
 			}
 
-			if (rec_page_no / IBUF_MERGE_AREA
-		    		!= first_page_no / IBUF_MERGE_AREA) {
+			if (rec_space_id != first_space_id
+			    || rec_page_no / IBUF_MERGE_AREA
+			       != first_page_no / IBUF_MERGE_AREA) {
 
 		    		break;
 			}
@@ -1623,7 +1914,7 @@ ibuf_get_merge_page_nos(
 			volume_for_page = 0;
 		}
 
-		if (rec_page_no == 1) {
+		if (rec_page_no == 1 && rec_space_id == 0) {
 			/* Supremum record */
 
 			break;
@@ -1634,6 +1925,7 @@ ibuf_get_merge_page_nos(
 		volume_for_page += rec_volume;
 		
 		prev_page_no = rec_page_no;
+		prev_space_id = rec_space_id;
 
 		rec = page_rec_get_next(rec);
 	}
@@ -1666,6 +1958,8 @@ ibuf_contract_ext(
 	ulint		space;
 	ibool		all_trees_empty;
 	ulint		page_nos[IBUF_MAX_N_PAGES_MERGED];
+	ulint		space_ids[IBUF_MAX_N_PAGES_MERGED];
+	ib_longlong	space_versions[IBUF_MAX_N_PAGES_MERGED];
 	ulint		n_stored;
 	ulint		sum_sizes;
 	mtr_t		mtr;
@@ -1678,7 +1972,8 @@ loop:
 
 	ut_ad(ibuf_validate_low());	
 
-	/* Choose an ibuf tree at random */
+	/* Choose an ibuf tree at random (though there really is only one tree
+	in the current implementation) */
 	ibuf_rnd += 865558671;
 
 	rnd_pos = ibuf_rnd % ibuf->size;
@@ -1714,8 +2009,10 @@ loop:
 
 	ut_ad(data);
 
-	space = (data->index)->space;
+	space = data->index->space;
 
+	ut_a(space == 0);	/* We currently only have an ibuf tree in
+				space 0 */
 	mtr_start(&mtr);
 
 	ibuf_enter();
@@ -1744,8 +2041,8 @@ loop:
 	mutex_exit(&ibuf_mutex);
 
 	sum_sizes = ibuf_get_merge_page_nos(TRUE, btr_pcur_get_rec(&pcur),
-							page_nos, &n_stored);
-
+					space_ids, space_versions, page_nos,
+					&n_stored);
 #ifdef UNIV_IBUF_DEBUG
 	/* printf("Ibuf contract sync %lu pages %lu volume %lu\n", sync,
 						n_stored, sum_sizes); */
@@ -1755,8 +2052,8 @@ loop:
 	mtr_commit(&mtr);
 	btr_pcur_close(&pcur);
 
-	buf_read_ibuf_merge_pages(sync, space, page_nos, n_stored);
-
+	buf_read_ibuf_merge_pages(sync, space_ids, space_versions, page_nos,
+								   n_stored);
 	*n_pages = n_stored;
 	
 	return(sum_sizes + 1);
@@ -1885,6 +2182,8 @@ ibuf_get_volume_buffered(
 	ulint	next_page_no;
 	page_t*	next_page;
 	
+	ut_a(trx_sys_multiple_tablespace_format);
+
 	ut_ad((pcur->latch_mode == BTR_MODIFY_PREV)
 				|| (pcur->latch_mode == BTR_MODIFY_TREE));
 
@@ -1907,7 +2206,8 @@ ibuf_get_volume_buffered(
 			break;
 		}
 		
-		if (page_no != ibuf_rec_get_page_no(rec)) {
+		if (page_no != ibuf_rec_get_page_no(rec)
+		    || space != ibuf_rec_get_space(rec)) {
 
 			goto count_later;
 		}
@@ -1926,7 +2226,7 @@ ibuf_get_volume_buffered(
 		goto count_later;
 	}
 
-	prev_page = buf_page_get(space, prev_page_no, RW_X_LATCH, mtr);
+	prev_page = buf_page_get(0, prev_page_no, RW_X_LATCH, mtr);
 
 #ifdef UNIV_SYNC_DEBUG
 	buf_page_dbg_add_level(prev_page, SYNC_TREE_NODE);
@@ -1945,7 +2245,8 @@ ibuf_get_volume_buffered(
 			return(UNIV_PAGE_SIZE);
 		}
 		
-		if (page_no != ibuf_rec_get_page_no(rec)) {
+		if (page_no != ibuf_rec_get_page_no(rec)
+		    || space != ibuf_rec_get_space(rec)) {
 
 			goto count_later;
 		}
@@ -1968,7 +2269,8 @@ count_later:
 			break;
 		}
 		
-		if (page_no != ibuf_rec_get_page_no(rec)) {
+		if (page_no != ibuf_rec_get_page_no(rec)
+		    || space != ibuf_rec_get_space(rec)) {
 
 			return(volume);
 		}
@@ -1987,7 +2289,7 @@ count_later:
 		return(volume);
 	}
 
-	next_page = buf_page_get(space, next_page_no, RW_X_LATCH, mtr);
+	next_page = buf_page_get(0, next_page_no, RW_X_LATCH, mtr);
 
 #ifdef UNIV_SYNC_DEBUG
 	buf_page_dbg_add_level(next_page, SYNC_TREE_NODE);
@@ -2004,7 +2306,8 @@ count_later:
 			return(UNIV_PAGE_SIZE);
 		}
 		
-		if (page_no != ibuf_rec_get_page_no(rec)) {
+		if (page_no != ibuf_rec_get_page_no(rec)
+		    || space != ibuf_rec_get_space(rec)) {
 
 			return(volume);
 		}
@@ -2016,6 +2319,57 @@ count_later:
 }
 
 /*************************************************************************
+Reads the biggest tablespace id from the high end of the insert buffer
+tree and updates the counter in fil_system. */
+
+void
+ibuf_update_max_tablespace_id(void)
+/*===============================*/
+{
+	ulint		max_space_id;
+	rec_t*		rec;
+	byte*		field;
+	ulint		len;
+	ibuf_data_t*	ibuf_data;
+	dict_index_t*	ibuf_index;
+	btr_pcur_t	pcur;
+	mtr_t		mtr;
+
+	ibuf_data = fil_space_get_ibuf_data(0);
+
+	ibuf_index = ibuf_data->index;
+
+	ibuf_enter();
+
+	mtr_start(&mtr);
+
+	btr_pcur_open_at_index_side(FALSE, ibuf_index, BTR_SEARCH_LEAF,
+							&pcur, TRUE, &mtr);
+	btr_pcur_move_to_prev(&pcur, &mtr);
+
+	if (btr_pcur_is_before_first_on_page(&pcur, &mtr)) {
+		/* The tree is empty */
+
+		max_space_id = 0;
+	} else {
+		rec = btr_pcur_get_rec(&pcur);
+
+		field = rec_get_nth_field(rec, 0, &len);
+
+		ut_a(len == 4);
+		
+		max_space_id = mach_read_from_4(field);
+	}
+
+	mtr_commit(&mtr);
+	ibuf_exit();
+
+	/* printf("Maximum space id in insert buffer %lu\n", max_space_id); */
+
+	fil_set_max_space_id_if_bigger(max_space_id);
+}
+
+/*************************************************************************
 Makes an index insert to the insert buffer, instead of directly to the disk
 page, if this is possible. */
 static
@@ -2035,8 +2389,6 @@ ibuf_insert_low(
 	ulint		entry_size;
 	btr_pcur_t	pcur;
 	btr_cur_t*	cursor;
-	mtr_t		mtr;
-	mtr_t		bitmap_mtr;
 	dtuple_t*	ibuf_entry;
 	mem_heap_t*	heap;
 	ulint		buffered;
@@ -2048,16 +2400,25 @@ ibuf_insert_low(
 	page_t*		root;
 	ulint		err;
 	ibool		do_merge;
+	ulint		space_ids[IBUF_MAX_N_PAGES_MERGED];
+	ib_longlong	space_versions[IBUF_MAX_N_PAGES_MERGED];
 	ulint		page_nos[IBUF_MAX_N_PAGES_MERGED];
 	ulint		n_stored;
 	ulint		bits;
+	mtr_t		mtr;
+	mtr_t		bitmap_mtr;
 	
 	ut_a(!(index->type & DICT_CLUSTERED));
 	ut_ad(dtuple_check_typed(entry));
 
+	ut_a(trx_sys_multiple_tablespace_format);
+
 	do_merge = FALSE;
-	
-	ibuf_data = fil_space_get_ibuf_data(space);
+
+	/* Currently the insert buffer of space 0 takes care of inserts to all
+	tablespaces */
+
+	ibuf_data = fil_space_get_ibuf_data(0);
 
 	ibuf_index = ibuf_data->index;
 
@@ -2084,7 +2445,7 @@ ibuf_insert_low(
 		mutex_enter(&ibuf_pessimistic_insert_mutex);
 
 		ibuf_enter();
-	
+
 		mutex_enter(&ibuf_mutex);
 
 		while (!ibuf_data_enough_free_for_insert(ibuf_data)) {
@@ -2095,7 +2456,7 @@ ibuf_insert_low(
 			
 			mutex_exit(&ibuf_pessimistic_insert_mutex);
 
-			err = ibuf_add_free_page(space, ibuf_data);
+			err = ibuf_add_free_page(0, ibuf_data);
 
 			if (err == DB_STRONG_FAIL) {
 
@@ -2120,7 +2481,7 @@ ibuf_insert_low(
 	the first fields and the type information for other fields, and which
 	will be inserted to the insert buffer. */
 
-	ibuf_entry = ibuf_entry_build(entry, page_no, heap);
+	ibuf_entry = ibuf_entry_build(entry, space, page_no, heap);
 
 	/* Open a cursor to the insert buffer tree to calculate if we can add
 	the new entry to it without exceeding the free space limit for the
@@ -2145,7 +2506,6 @@ ibuf_insert_low(
 
 	if (buf_page_peek(space, page_no)
 			|| lock_rec_expl_exist_on_page(space, page_no)) {
-
 		err = DB_STRONG_FAIL;
 
 		mtr_commit(&bitmap_mtr);
@@ -2158,7 +2518,6 @@ ibuf_insert_low(
 
 	if (buffered + entry_size + page_dir_calc_reserved_space(1)
 				> ibuf_index_page_calc_free_from_bits(bits)) {
-
 		mtr_commit(&bitmap_mtr);
 
  		/* It may not fit */
@@ -2167,7 +2526,8 @@ ibuf_insert_low(
 		do_merge = TRUE; 
 
 		ibuf_get_merge_page_nos(FALSE, btr_pcur_get_rec(&pcur),
-							page_nos, &n_stored);
+					space_ids, space_versions, page_nos,
+					&n_stored);
 		goto function_exit;
  	}
 
@@ -2203,10 +2563,10 @@ ibuf_insert_low(
 		which would cause the x-latching of the root after that to
 		break the latching order. */
 		
-		root = ibuf_tree_root_get(ibuf_data, space, &mtr);
+		root = ibuf_tree_root_get(ibuf_data, 0, &mtr);
 
 		err = btr_cur_pessimistic_insert(BTR_NO_LOCKING_FLAG
-						| BTR_NO_UNDO_LOG_FLAG,
+						 | BTR_NO_UNDO_LOG_FLAG,
 						cursor,
 						ibuf_entry, &ins_rec,
 						&dummy_big_rec, thr,
@@ -2223,6 +2583,10 @@ ibuf_insert_low(
 function_exit:
 #ifdef UNIV_IBUF_DEBUG
 	if (err == DB_SUCCESS) {
+		printf(
+"Incrementing ibuf count of space %lu page %lu\n"
+"from %lu by 1\n", space, page_no, ibuf_count_get(space, page_no));
+
 		ibuf_count_set(space, page_no,
 					ibuf_count_get(space, page_no) + 1);
 	}
@@ -2257,7 +2621,8 @@ function_exit:
 #ifdef UNIV_IBUF_DEBUG
 		ut_a(n_stored <= IBUF_MAX_N_PAGES_MERGED);
 #endif
-		buf_read_ibuf_merge_pages(FALSE, space, page_nos, n_stored);
+		buf_read_ibuf_merge_pages(FALSE, space_ids, space_versions,
+							page_nos, n_stored);
 	}
 	
 	return(err);
@@ -2280,6 +2645,7 @@ ibuf_insert(
 {
 	ulint	err;
 
+	ut_a(trx_sys_multiple_tablespace_format);
 	ut_ad(dtuple_check_typed(entry));
 
 	ut_a(!(index->type & DICT_CLUSTERED));
@@ -2332,6 +2698,26 @@ ibuf_insert_to_index_page(
 	ut_ad(ibuf_inside());
 	ut_ad(dtuple_check_typed(entry));
 
+	if (rec_get_n_fields(page_rec_get_next(page_get_infimum_rec(page)))
+	    != dtuple_get_n_fields(entry)) {
+
+		fprintf(stderr,
+"InnoDB: Trying to insert a record from the insert buffer to an index page\n"
+"InnoDB: but the number of fields does not match!\n%s\n", errbuf);
+
+		buf_page_print(page);
+
+	        dtuple_sprintf(errbuf, 900, entry);
+
+		fprintf(stderr,
+"InnoDB: The table where where this index record belongs\n"
+"InnoDB: is now probably corrupt. Please run CHECK TABLE on\n"
+"InnoDB: your tables.\n"
+"InnoDB: Send a detailed bug report to mysql@lists.mysql.com!\n");
+
+		return;
+	}
+
 	low_match = page_cur_search(page, entry, PAGE_CUR_LE, &page_cur);
 	
 	if (low_match == dtuple_get_n_fields(entry)) {
@@ -2355,39 +2741,34 @@ ibuf_insert_to_index_page(
 
 				fprintf(stderr,
 "InnoDB: Error: Insert buffer insert fails; page free %lu, dtuple size %lu\n",
-				page_get_max_insert_size(page, 1),
-				rec_get_converted_size(entry));
+				(ulong) page_get_max_insert_size(page, 1),
+				(ulong) rec_get_converted_size(entry));
 
 				dtuple_sprintf(errbuf, 900, entry);
 				
 				fprintf(stderr,
-"InnoDB: Cannot insert index record %s\n", errbuf);
-
-				fprintf(stderr,
+"InnoDB: Cannot insert index record %s\n"
 "InnoDB: The table where where this index record belongs\n"
 "InnoDB: is now probably corrupt. Please run CHECK TABLE on\n"
-"InnoDB: that table.\n");
-				
+"InnoDB: that table.\n", errbuf);
 				bitmap_page = ibuf_bitmap_get_map_page(
 						buf_frame_get_space_id(page),
 						buf_frame_get_page_no(page),
 						mtr);
-
 				old_bits = ibuf_bitmap_page_get_bits(
 						bitmap_page,
 						buf_frame_get_page_no(page),
 						IBUF_BITMAP_FREE, mtr);
 
-				fprintf(stderr, "Bitmap bits %lu\n", old_bits);
+				fprintf(stderr, "Bitmap bits %lu\n", (ulong) old_bits);
 
 				fprintf(stderr,
 "InnoDB: Send a detailed bug report to mysql@lists.mysql.com!\n");
-				
 			}	
 		}
 	}
 }
-	
+
 /*************************************************************************
 Deletes from ibuf the record on which pcur is positioned. If we have to
 resort to a pessimistic delete, this function commits mtr and closes
@@ -2411,13 +2792,16 @@ ibuf_delete_rec(
 	ibuf_data_t*	ibuf_data;
 	page_t*		root;
 	ulint		err;
-
+	
 	ut_ad(ibuf_inside());
 
 	success = btr_cur_optimistic_delete(btr_pcur_get_btr_cur(pcur), mtr);
 
 	if (success) {
 #ifdef UNIV_IBUF_DEBUG
+		printf(
+"Decrementing ibuf count of space %lu page %lu\n"
+"from %lu by 1\n", space, page_no, ibuf_count_get(space, page_no));
 		ibuf_count_set(space, page_no,
 					ibuf_count_get(space, page_no) - 1);
 #endif
@@ -2429,7 +2813,10 @@ ibuf_delete_rec(
 
 	btr_pcur_commit_specify_mtr(pcur, mtr);
 
-	ibuf_data = fil_space_get_ibuf_data(space);
+	/* Currently the insert buffer of space 0 takes care of inserts to all
+	tablespaces */
+
+	ibuf_data = fil_space_get_ibuf_data(0);
 
 	mutex_enter(&ibuf_mutex);
 
@@ -2439,10 +2826,10 @@ ibuf_delete_rec(
 
 	if (!success) {
 		fprintf(stderr,
-		"InnoDB: ERROR: Send the output to heikki.tuuri@innodb.com\n");
-		fprintf(stderr, "InnoDB: ibuf cursor restoration fails!\n");
-		fprintf(stderr, "InnoDB: ibuf record inserted to page %lu\n",
-								page_no);
+"InnoDB: ERROR: Send the output to mysql@lists.mysql.com\n"
+"InnoDB: ibuf cursor restoration fails!\n"
+"InnoDB: ibuf record inserted to space %lu page %lu\n", (ulong) space,
+			(ulong) page_no);
 		fflush(stderr);
 
 		rec_print(btr_pcur_get_rec(pcur));
@@ -2452,18 +2839,23 @@ ibuf_delete_rec(
 		rec_print(page_rec_get_next(btr_pcur_get_rec(pcur)));
 		fflush(stdout);
 
-		mtr_commit(mtr);
+		btr_pcur_commit_specify_mtr(pcur, mtr);
 
-		fprintf(stderr, "InnoDB: Validating insert buffer tree:\n");
+		fprintf(stderr,
+		 "InnoDB: Validating insert buffer tree:\n");
 		ut_a(btr_validate_tree(ibuf_data->index->tree));
 
 		fprintf(stderr, "InnoDB: ibuf tree ok\n");
 		fflush(stderr);
+
+		btr_pcur_close(pcur);
+
+		mutex_exit(&ibuf_mutex);
+
+		return(TRUE);
 	}
-	
-	ut_a(success);
 
-	root = ibuf_tree_root_get(ibuf_data, space, mtr);
+	root = ibuf_tree_root_get(ibuf_data, 0, mtr);
 
 	btr_cur_pessimistic_delete(&err, TRUE, btr_pcur_get_btr_cur(pcur),
 								FALSE, mtr);
@@ -2499,7 +2891,11 @@ ibuf_merge_or_delete_for_page(
 	page_t*	page,	/* in: if page has been read from disk, pointer to
 			the page x-latched, else NULL */
 	ulint	space,	/* in: space id of the index page */
-	ulint	page_no)/* in: page number of the index page */
+	ulint	page_no,/* in: page number of the index page */
+	ibool	update_ibuf_bitmap)/* in: normally this is set to TRUE, but if
+			we have deleted or are deleting the tablespace, then we
+			naturally do not want to update a non-existent bitmap
+			page */
 {
 	mem_heap_t*	heap;
 	btr_pcur_t	pcur;
@@ -2516,6 +2912,7 @@ ibuf_merge_or_delete_for_page(
 	ulint		old_bits;
 	ulint		new_bits;
 	dulint		max_trx_id;
+	ibool		tablespace_being_deleted = FALSE;
 	ibool		corruption_noticed	= FALSE;
 	mtr_t		mtr;
 	char		err_buf[500];
@@ -2524,7 +2921,7 @@ ibuf_merge_or_delete_for_page(
 
 		return;
 	}
-	
+
 #ifdef UNIV_LOG_DEBUG
 	if (space % 2 != 0) {
 
@@ -2538,28 +2935,57 @@ ibuf_merge_or_delete_for_page(
 		return;
 	}
 
-	mtr_start(&mtr);
+	if (update_ibuf_bitmap) {
+		/* If the following returns FALSE, we get the counter
+		incremented, and must decrement it when we leave this
+		function. When the counter is > 0, that prevents tablespace
+		from being dropped. */
 
-	bitmap_page = ibuf_bitmap_get_map_page(space, page_no, &mtr);
+		tablespace_being_deleted = fil_inc_pending_ibuf_merges(space);
+
+		if (tablespace_being_deleted) {
+			/* Do not try to read the bitmap page from space;
+			just delete the ibuf records for the page */
+
+			page = NULL;
+			update_ibuf_bitmap = FALSE;
+		}
+	}
+
+	if (update_ibuf_bitmap) {
+		mtr_start(&mtr);
+		bitmap_page = ibuf_bitmap_get_map_page(space, page_no, &mtr);
 
-	if (!ibuf_bitmap_page_get_bits(bitmap_page, page_no,
+		if (!ibuf_bitmap_page_get_bits(bitmap_page, page_no,
 						IBUF_BITMAP_BUFFERED, &mtr)) {
-		/* No inserts buffered for this page */
+			/* No inserts buffered for this page */
+			mtr_commit(&mtr);
 
-		mtr_commit(&mtr);
+			if (!tablespace_being_deleted) {
+				fil_decr_pending_ibuf_merges(space);
+			}
 
-		return;
+			return;
+		}
+		mtr_commit(&mtr);
 	}
 
-	mtr_commit(&mtr);
+	/* Currently the insert buffer of space 0 takes care of inserts to all
+	tablespaces */
 
-	ibuf_data = fil_space_get_ibuf_data(space);
+	ibuf_data = fil_space_get_ibuf_data(0);
 
 	ibuf_enter();
 
 	heap = mem_heap_create(512);
 
-	search_tuple = ibuf_search_tuple_build(page_no, heap);
+	if (!trx_sys_multiple_tablespace_format) {
+		ut_a(trx_doublewrite_must_reset_space_ids);
+	        search_tuple = ibuf_search_tuple_build(space, page_no, heap);
+	} else {
+	        search_tuple = ibuf_new_search_tuple_build(space, page_no,
+									heap);
+	}
 		
 	if (page) {
 		/* Move the ownership of the x-latch on the page to this OS
@@ -2600,7 +3026,8 @@ ibuf_merge_or_delete_for_page(
 "InnoDB: to determine if they are corrupt after this.\n\n"
 "InnoDB: Please make a detailed bug report and send it to\n"
 "InnoDB: mysql@lists.mysql.com\n\n",
-				page_no, fil_page_get_type(page));
+				(ulong) page_no,
+				(ulong) fil_page_get_type(page));
 		}
 	}
 
@@ -2624,7 +3051,6 @@ loop:
 	index page */
 	btr_pcur_open_on_user_rec(ibuf_data->index, search_tuple, PAGE_CUR_GE,
 						BTR_MODIFY_LEAF, &pcur, &mtr);
-
 	if (!btr_pcur_is_on_user_rec(&pcur, &mtr)) {
 		ut_ad(btr_pcur_is_after_last_in_tree(&pcur, &mtr));
 
@@ -2637,29 +3063,18 @@ loop:
 		ibuf_rec = btr_pcur_get_rec(&pcur);
 
 		/* Check if the entry is for this index page */
-		if (ibuf_rec_get_page_no(ibuf_rec) != page_no) {
-
+		if (ibuf_rec_get_page_no(ibuf_rec) != page_no
+		    || ibuf_rec_get_space(ibuf_rec) != space) {
 			if (page) {
 				page_header_reset_last_insert(page, &mtr);
 			}
-
 			goto reset_bit;
 		}
 
-		/* Do NOT merge to the 4.1 code base! */
-		if (trx_sys_downgrading_from_4_1_1) {
-			fprintf(stderr,
-"InnoDB: Fatal error: you are downgrading from >= 4.1.1 to 4.0, but\n"
-"InnoDB: the insert buffer was not empty.\n");
-			ut_error;
-		}
-
 		if (corruption_noticed) {
 			rec_sprintf(err_buf, 450, ibuf_rec);
-
 			fprintf(stderr,
 "InnoDB: Discarding record\n %s\n from the insert buffer!\n\n", err_buf);
-	
 	   	} else if (page) {
 			/* Now we have at pcur a record which should be
 			inserted to the index page; NOTE that the call below
@@ -2669,14 +3084,12 @@ loop:
 
 			max_trx_id = page_get_max_trx_id(
 						buf_frame_align(ibuf_rec));
-	
 			page_update_max_trx_id(page, max_trx_id);
 			
 			entry = ibuf_build_entry_from_ibuf_rec(ibuf_rec, heap);
 #ifdef UNIV_IBUF_DEBUG
 			volume += rec_get_converted_size(entry)
  					+ page_dir_calc_reserved_space(1);
-	    
 			ut_a(volume <= 4 * UNIV_PAGE_SIZE
 					/ IBUF_PAGE_SIZE_PER_FREE_SPACE);
 #endif
@@ -2704,43 +3117,38 @@ loop:
 	}
 
 reset_bit:
-
 #ifdef UNIV_IBUF_DEBUG
 	if (ibuf_count_get(space, page_no) > 0) {
-
 		/* btr_print_tree(ibuf_data->index->tree, 100);
 		ibuf_print(); */
 	}
 #endif
-	bitmap_page = ibuf_bitmap_get_map_page(space, page_no, &mtr);
-
-	ibuf_bitmap_page_set_bits(bitmap_page, page_no,
+	if (update_ibuf_bitmap) {
+		bitmap_page = ibuf_bitmap_get_map_page(space, page_no, &mtr);
+		ibuf_bitmap_page_set_bits(bitmap_page, page_no,
 					IBUF_BITMAP_BUFFERED, FALSE, &mtr);
-	if (page) {
-		old_bits = ibuf_bitmap_page_get_bits(bitmap_page, page_no,
-						IBUF_BITMAP_FREE, &mtr);
-		new_bits = ibuf_index_page_calc_free(page);
-
+		if (page) {
+			old_bits = ibuf_bitmap_page_get_bits(bitmap_page,
+					page_no, IBUF_BITMAP_FREE, &mtr);
+			new_bits = ibuf_index_page_calc_free(page);
 #ifdef UNIV_IBUF_DEBUG
-		/* printf("Old bits %lu new bits %lu max size %lu\n", old_bits,
-			new_bits,
+			/* printf("Old bits %lu new bits %lu max size %lu\n",
+			old_bits, new_bits,
 			page_get_max_insert_size_after_reorganize(page, 1)); */
 #endif
-		if (old_bits != new_bits) {
-			
-			ibuf_bitmap_page_set_bits(bitmap_page, page_no,
+			if (old_bits != new_bits) {
+				ibuf_bitmap_page_set_bits(bitmap_page, page_no,
 							IBUF_BITMAP_FREE,
 							new_bits, &mtr);
+			}
 		}
 	}
-	
 #ifdef UNIV_IBUF_DEBUG
 	/* printf("Ibuf merge %lu records volume %lu to page no %lu\n",
 					n_inserts, volume, page_no); */
 #endif
 	mtr_commit(&mtr);
  	btr_pcur_close(&pcur);
- 	
 	mem_heap_free(heap);
 
 	/* Protect our statistics keeping from race conditions */
@@ -2751,12 +3159,123 @@ reset_bit:
 
 	mutex_exit(&ibuf_mutex);
 
+	if (update_ibuf_bitmap && !tablespace_being_deleted) {
+
+		fil_decr_pending_ibuf_merges(space);
+	}
+
 	ibuf_exit();
 #ifdef UNIV_IBUF_DEBUG
 	ut_a(ibuf_count_get(space, page_no) == 0);
 #endif
 }
 
+/*************************************************************************
+Deletes all entries in the insert buffer for a given space id. This is used
+in DISCARD TABLESPACE and IMPORT TABLESPACE.
+NOTE: this does not update the page free bitmaps in the space. The space will
+become CORRUPT when you call this function! */
+
+void
+ibuf_delete_for_discarded_space(
+/*============================*/
+	ulint	space)	/* in: space id */
+{
+	mem_heap_t*	heap;
+	btr_pcur_t	pcur;
+	dtuple_t*	search_tuple;
+	rec_t*		ibuf_rec;
+	ulint		page_no;
+	ibool		closed;
+	ibuf_data_t*	ibuf_data;
+	ulint		n_inserts;
+	mtr_t		mtr;
+
+	/* Currently the insert buffer of space 0 takes care of inserts to all
+	tablespaces */
+
+	ibuf_data = fil_space_get_ibuf_data(0);
+
+	heap = mem_heap_create(512);
+
+	/* Use page number 0 to build the search tuple so that we get the
+	cursor positioned at the first entry for this space id */
+
+	search_tuple = ibuf_new_search_tuple_build(space, 0, heap);
+		
+	n_inserts = 0;
+loop:
+	ibuf_enter();
+
+	mtr_start(&mtr);
+
+	/* Position pcur in the insert buffer at the first entry for the
+	space */
+	btr_pcur_open_on_user_rec(ibuf_data->index, search_tuple, PAGE_CUR_GE,
+						BTR_MODIFY_LEAF, &pcur, &mtr);
+	if (!btr_pcur_is_on_user_rec(&pcur, &mtr)) {
+		ut_ad(btr_pcur_is_after_last_in_tree(&pcur, &mtr));
+
+		goto leave_loop;
+	}
+
+	for (;;) {
+		ut_ad(btr_pcur_is_on_user_rec(&pcur, &mtr));
+
+		ibuf_rec = btr_pcur_get_rec(&pcur);
+
+		/* Check if the entry is for this space */
+		if (ibuf_rec_get_space(ibuf_rec) != space) {
+
+			goto leave_loop;
+		}
+
+		page_no = ibuf_rec_get_page_no(ibuf_rec);
+
+		n_inserts++;
+		
+		/* Delete the record from ibuf */
+		closed = ibuf_delete_rec(space, page_no, &pcur, search_tuple,
+									&mtr);
+		if (closed) {
+			/* Deletion was pessimistic and mtr was committed:
+			we start from the beginning again */
+
+			ibuf_exit();
+
+			goto loop;
+		}
+
+		if (btr_pcur_is_after_last_on_page(&pcur, &mtr)) {
+			mtr_commit(&mtr);
+ 			btr_pcur_close(&pcur);
+
+			ibuf_exit();
+
+			goto loop;
+		}
+	}
+
+leave_loop:
+	mtr_commit(&mtr);
+ 	btr_pcur_close(&pcur);
+
+	/* Protect our statistics keeping from race conditions */
+	mutex_enter(&ibuf_mutex);
+
+	ibuf_data->n_merges++;	
+	ibuf_data->n_merged_recs += n_inserts;
+
+	mutex_exit(&ibuf_mutex);
+
+	printf("Discarded %lu ibuf entries for space %lu\n", (ulong) n_inserts,
+	       (ulong) space);
+
+	ibuf_exit();
+
+	mem_heap_free(heap);
+}
+
 /**********************************************************************
 Validates the ibuf data structures when the caller owns ibuf_mutex. */
 
@@ -2788,6 +3307,56 @@ ibuf_validate_low(void)
 }
 
 /**********************************************************************
+Looks if the insert buffer is empty. */
+
+ibool
+ibuf_is_empty(void)
+/*===============*/
+			/* out: TRUE if empty */
+{
+	ibuf_data_t*	data;
+	ibool		is_empty;
+	page_t*		root;
+	mtr_t		mtr;
+
+	ibuf_enter();
+
+	mutex_enter(&ibuf_mutex);
+
+	data = UT_LIST_GET_FIRST(ibuf->data_list);
+
+	mtr_start(&mtr);
+
+	root = ibuf_tree_root_get(data, 0, &mtr);
+
+	if (page_get_n_recs(root) == 0) {
+
+		is_empty = TRUE;
+
+		if (data->empty == FALSE) {
+			fprintf(stderr,
+"InnoDB: Warning: insert buffer tree is empty but the data struct does not\n"
+"InnoDB: know it. This condition is legal if the master thread has not yet\n"
+"InnoDB: run to completion.\n");
+		}
+	} else {
+	        ut_a(data->empty == FALSE);
+
+		is_empty = FALSE;
+	}
+
+	mtr_commit(&mtr);
+
+	ut_a(data->space == 0);
+
+	mutex_exit(&ibuf_mutex);
+
+	ibuf_exit();
+
+	return(is_empty);
+}
+
+/**********************************************************************
 Prints info of ibuf. */
 
 void
@@ -2810,18 +3379,29 @@ ibuf_print(
 
 	while (data) {
 		buf += sprintf(buf,
-  	"Ibuf for space %lu: size %lu, free list len %lu, seg size %lu,\n",
-		data->space, data->size, data->free_list_len, data->seg_size);
+  	"Ibuf for space %lu: size %lu, free list len %lu, seg size %lu,",
+			       (ulong) data->space, (ulong) data->size,
+			       (ulong) data->free_list_len,
+			       (ulong) data->seg_size);
+
+		if (data->empty) {
+			buf += sprintf(buf, " is empty\n");
+		} else {
+			buf += sprintf(buf, " is not empty\n");
+		}
 
 		buf += sprintf(buf,
 			"%lu inserts, %lu merged recs, %lu merges\n",
-			data->n_inserts, data->n_merged_recs, data->n_merges);
+			       (ulong) data->n_inserts,
+			       (ulong) data->n_merged_recs,
+			       (ulong) data->n_merges);
 #ifdef UNIV_IBUF_DEBUG
 		for (i = 0; i < IBUF_COUNT_N_PAGES; i++) {
 			if (ibuf_count_get(data->space, i) > 0) {
 
 				printf("Ibuf count for page %lu is %lu\n",
-					i, ibuf_count_get(data->space, i));
+				       (ulong) i,
+				       (ulong) ibuf_count_get(data->space, i));
 			}
 		}
 #endif
diff --git a/innobase/include/btr0btr.ic b/innobase/include/btr0btr.ic
index fd66c7bf2a3..b0aa0756307 100644
--- a/innobase/include/btr0btr.ic
+++ b/innobase/include/btr0btr.ic
@@ -188,6 +188,7 @@ btr_node_ptr_get_child_page_no(
 	ulint	n_fields;
 	byte*	field;
 	ulint	len;
+	ulint	page_no;
 
 	n_fields = rec_get_n_fields(rec);
 
@@ -196,7 +197,16 @@ btr_node_ptr_get_child_page_no(
 
 	ut_ad(len == 4);
 	
-	return(mach_read_from_4(field));
+	page_no = mach_read_from_4(field);
+
+	if (page_no == 0) {
+		fprintf(stderr,
+"InnoDB: a nonsensical page number 0 in a node ptr record at offset %lu\n",
+		       (unsigned long)(rec - buf_frame_align(rec)));
+		buf_page_print(buf_frame_align(rec));
+	}
+
+	return(page_no);
 }
 
 /******************************************************************
diff --git a/innobase/include/btr0pcur.h b/innobase/include/btr0pcur.h
index 9d07dd0de18..81f19af4d40 100644
--- a/innobase/include/btr0pcur.h
+++ b/innobase/include/btr0pcur.h
@@ -466,6 +466,9 @@ struct btr_pcur_struct{
 					BTR_PCUR_AFTER, depending on whether
 					cursor was on, before, or after the
 					old_rec record */
+	buf_block_t*	block_when_stored;/* buffer block when the position was
+					stored; note that if AWE is on, frames
+					may move */
 	dulint		modify_clock;	/* the modify clock value of the
 					buffer block when the cursor position
 					was stored */
diff --git a/innobase/include/btr0pcur.ic b/innobase/include/btr0pcur.ic
index a1db2cc52dd..b553a569bda 100644
--- a/innobase/include/btr0pcur.ic
+++ b/innobase/include/btr0pcur.ic
@@ -564,7 +564,7 @@ btr_pcur_open_at_index_side(
 	}
 
 	btr_cur_open_at_index_side(from_left, index, latch_mode,
-					btr_pcur_get_btr_cur(pcur), mtr);	
+					btr_pcur_get_btr_cur(pcur), mtr);
 	pcur->pos_state = BTR_PCUR_IS_POSITIONED;
 
 	pcur->old_stored = BTR_PCUR_OLD_NOT_STORED;
diff --git a/innobase/include/buf0buf.h b/innobase/include/buf0buf.h
index 72cedafa7e1..3cab717546a 100644
--- a/innobase/include/buf0buf.h
+++ b/innobase/include/buf0buf.h
@@ -30,6 +30,7 @@ Created 11/5/1995 Heikki Tuuri
 #include "sync0rw.h"
 #include "hash0hash.h"
 #include "ut0byte.h"
+#include "os0proc.h"
 
 /* Flags for flush types */
 #define BUF_FLUSH_LRU		1
@@ -58,23 +59,34 @@ extern ibool		buf_debug_prints;/* If this is set TRUE, the program
 					occurs */
 
 /************************************************************************
-Initializes the buffer pool of the database. */
+Creates the buffer pool. */
 
-void
+buf_pool_t*
 buf_pool_init(
 /*==========*/
-	ulint	max_size,	/* in: maximum size of the pool in blocks */
-	ulint	curr_size);	/* in: current size to use, must be <=
+				/* out, own: buf_pool object, NULL if not
+				enough memory or error */
+	ulint	max_size,	/* in: maximum size of the buf_pool in
+				blocks */
+	ulint	curr_size,	/* in: current size to use, must be <=
+				max_size, currently must be equal to
 				max_size */
+	ulint	n_frames);	/* in: number of frames; if AWE is used,
+				this is the size of the address space window
+				where physical memory pages are mapped; if
+				AWE is not used then this must be the same
+				as max_size */
 /*************************************************************************
-Gets the current size of buffer pool in bytes. */
+Gets the current size of buffer buf_pool in bytes. In the case of AWE, the
+size of AWE window (= the frames). */
 UNIV_INLINE
 ulint
 buf_pool_get_curr_size(void);
 /*========================*/
 			/* out: size in bytes */
 /*************************************************************************
-Gets the maximum size of buffer pool in bytes. */
+Gets the maximum size of buffer pool in bytes. In the case of AWE, the
+size of AWE window (= the frames). */
 UNIV_INLINE
 ulint
 buf_pool_get_max_size(void);
@@ -138,8 +150,8 @@ improve debugging. Only values RW_S_LATCH and RW_X_LATCH are allowed as LA! */
 NOTE! The following macros should be used instead of
 buf_page_optimistic_get_func, to improve debugging. Only values RW_S_LATCH and
 RW_X_LATCH are allowed as LA! */
-#define buf_page_optimistic_get(LA, G, MC, MTR) buf_page_optimistic_get_func(\
-				LA, G, MC, IB__FILE__, __LINE__, MTR)
+#define buf_page_optimistic_get(LA, BL, G, MC, MTR) buf_page_optimistic_get_func(\
+				LA, BL, G, MC, IB__FILE__, __LINE__, MTR)
 /************************************************************************
 This is the general function used to get optimistic access to a database
 page. */
@@ -149,7 +161,9 @@ buf_page_optimistic_get_func(
 /*=========================*/
 				/* out: TRUE if success */
 	ulint		rw_latch,/* in: RW_S_LATCH, RW_X_LATCH */
-	buf_frame_t*	guess,	/* in: guessed frame */
+	buf_block_t*	block,	/* in: guessed block */
+	buf_frame_t*	guess,	/* in: guessed frame; note that AWE may move
+				frames */
 	dulint		modify_clock,/* in: modify clock value if mode is
 				..._GUESS_ON_CLOCK */
 	char*		file,	/* in: file name */
@@ -350,6 +364,16 @@ buf_frame_modify_clock_inc(
 				/* out: new value */
 	buf_frame_t*	frame);	/* in: pointer to a frame */
 /************************************************************************
+Increments the modify clock of a frame by 1. The caller must (1) own the
+buf_pool mutex and block bufferfix count has to be zero, (2) or own an x-lock
+on the block. */
+UNIV_INLINE
+dulint
+buf_block_modify_clock_inc(
+/*=======================*/
+				/* out: new value */
+	buf_block_t*	block);	/* in: block */
+/************************************************************************
 Returns the value of the modify clock. The caller must have an s-lock 
 or x-lock on the block. */
 UNIV_INLINE
@@ -441,7 +465,7 @@ UNIV_INLINE
 buf_frame_t*
 buf_frame_align(
 /*============*/
-			/* out: pointer to block */
+			/* out: pointer to frame */
 	byte*	ptr);	/* in: pointer to a frame */
 /***********************************************************************
 Checks if a pointer points to the block array of the buffer pool (blocks, not
@@ -525,6 +549,19 @@ buf_pool_invalidate(void);
 --------------------------- LOWER LEVEL ROUTINES -------------------------
 =========================================================================*/
 
+/************************************************************************
+Maps the page of block to a frame, if not mapped yet. Unmaps some page
+from the end of the awe_LRU_free_mapped. */
+
+void
+buf_awe_map_page_to_frame(
+/*======================*/
+	buf_block_t*	block,		/* in: block whose page should be
+					mapped to a frame */
+	ibool		add_to_mapped_list);/* in: TRUE if we in the case
+					we need to map the page should also
+					add the block to the
+					awe_LRU_free_mapped list */
 #ifdef UNIV_SYNC_DEBUG
 /*************************************************************************
 Adds latch level info for the rw-lock protecting the buffer frame. This
@@ -590,19 +627,27 @@ buf_pool_get_nth_block(
 	ulint		i);	/* in: index of the block */
 /************************************************************************
 Function which inits a page for read to the buffer buf_pool. If the page is
-already in buf_pool, does nothing. Sets the io_fix flag to BUF_IO_READ and
-sets a non-recursive exclusive lock on the buffer frame. The io-handler must
-take care that the flag is cleared and the lock released later. This is one
-of the functions which perform the state transition NOT_USED => FILE_PAGE to
-a block (the other is buf_page_create). */ 
+(1) already in buf_pool, or
+(2) if we specify to read only ibuf pages and the page is not an ibuf page, or
+(3) if the space is deleted or being deleted,
+then this function does nothing.
+Sets the io_fix flag to BUF_IO_READ and sets a non-recursive exclusive lock
+on the buffer frame. The io-handler must take care that the flag is cleared
+and the lock released later. This is one of the functions which perform the
+state transition NOT_USED => FILE_PAGE to a block (the other is
+buf_page_create). */ 
 
 buf_block_t*
 buf_page_init_for_read(
 /*===================*/
-			/* out: pointer to the block */
-	ulint	mode,	/* in: BUF_READ_IBUF_PAGES_ONLY, ... */
-	ulint	space,	/* in: space id */
-	ulint	offset);/* in: page number */
+				/* out: pointer to the block or NULL */
+	ulint*		err,	/* out: DB_SUCCESS or DB_TABLESPACE_DELETED */
+	ulint		mode,	/* in: BUF_READ_IBUF_PAGES_ONLY, ... */
+	ulint		space,	/* in: space id */
+	ib_longlong	tablespace_version,/* in: prevents reading from a wrong
+				version of the tablespace in case we have done
+				DISCARD + IMPORT */
+	ulint		offset);/* in: page number */
 /************************************************************************
 Completes an asynchronous read or write request of a file page to or from
 the buffer pool. */
@@ -659,7 +704,16 @@ struct buf_block_struct{
 	byte*		frame;		/* pointer to buffer frame which
 					is of size UNIV_PAGE_SIZE, and
 					aligned to an address divisible by
-					UNIV_PAGE_SIZE */
+					UNIV_PAGE_SIZE; if AWE is used, this
+					will be NULL for the pages which are
+					currently not mapped into the virtual
+					address space window of the buffer
+					pool */
+	os_awe_t*	awe_info;	/* if AWE is used, then an array of
+					awe page infos for
+					UNIV_PAGE_SIZE / OS_AWE_X86_PAGE_SIZE
+					(normally = 4) physical memory
+					pages; otherwise NULL */
 	ulint		space;		/* space id of the page */
 	ulint		offset;		/* page number within the space */
 	ulint		lock_hash_val;	/* hashed value of the page address
@@ -668,14 +722,6 @@ struct buf_block_struct{
 					record lock hash table */
 	rw_lock_t	lock;		/* read-write lock of the buffer
 					frame */
-	rw_lock_t	read_lock;	/* rw-lock reserved when a page read
-					to the frame is requested; a thread
-					can wait for this rw-lock if it wants
-					to wait for the read to complete;
-					the usual way is to wait for lock,
-					but if the thread just wants a
-					bufferfix and no latch on the page,
-					then it can wait for this rw-lock */
 	buf_block_t*	hash;		/* node used in chaining to the page
 					hash table */
 	ibool		check_index_page_at_flush;
@@ -710,8 +756,16 @@ struct buf_block_struct{
 
 	UT_LIST_NODE_T(buf_block_t) free;
 					/* node of the free block list */
+	ibool		in_free_list;	/* TRUE if in the free list; used in
+					debugging */
 	UT_LIST_NODE_T(buf_block_t) LRU;
 					/* node of the LRU list */
+	UT_LIST_NODE_T(buf_block_t) awe_LRU_free_mapped;
+					/* in the AWE version node in the
+					list of free and LRU blocks which are
+					mapped to a frame */
+	ibool		in_LRU_list;	/* TRUE of the page is in the LRU list;
+					used in debugging */
 	ulint		LRU_position;	/* value which monotonically
 					decreases (or may stay constant if
 					the block is in the old blocks) toward
@@ -772,6 +826,9 @@ struct buf_block_struct{
 					complete, though: there may have been
 					hash collisions, record deletions,
 					etc. */
+	ulint		n_pointers;	/* used in debugging: the number of
+					pointers in the adaptive hash index
+					pointing to this frame */
 	ulint		curr_n_fields;	/* prefix length for hash indexing:
 					number of full fields */
 	ulint		curr_n_bytes;	/* number of bytes in hash indexing */
@@ -803,16 +860,36 @@ struct buf_pool_struct{
 					struct and control blocks, except the
 					read-write lock in them */
 	byte*		frame_mem;	/* pointer to the memory area which
-					was allocated for the frames */
+					was allocated for the frames; in AWE
+					this is the virtual address space
+					window where we map pages stored
+					in physical memory */
 	byte*		frame_zero;	/* pointer to the first buffer frame:
 					this may differ from frame_mem, because
 					this is aligned by the frame size */
-	byte*		high_end;	/* pointer to the end of the
-					buffer pool */
+	byte*		high_end;	/* pointer to the end of the buffer
+					frames */
+	ulint		n_frames;	/* number of frames */
 	buf_block_t*	blocks;		/* array of buffer control blocks */
+	buf_block_t**	blocks_of_frames;/* inverse mapping which can be used
+					to retrieve the buffer control block
+					of a frame; this is an array which
+					lists the blocks of frames in the
+					order frame_zero,
+					frame_zero + UNIV_PAGE_SIZE, ...
+					a control block is always assigned
+					for each frame, even if the frame does
+					not contain any data; note that in AWE
+					there are more control blocks than
+					buffer frames */
+	os_awe_t*	awe_info;	/* if AWE is used, AWE info for the
+					physical 4 kB memory pages associated
+					with buffer frames */
 	ulint		max_size;	/* number of control blocks ==
 					maximum pool size in pages */
-	ulint		curr_size;	/* current pool size in pages */
+	ulint		curr_size;	/* current pool size in pages;
+					currently always the same as
+					max_size */
 	hash_table_t*	page_hash;	/* hash table of the file pages */
 
 	ulint		n_pend_reads;	/* number of pending read operations */
@@ -829,6 +906,9 @@ struct buf_pool_struct{
 					counted as page gets; this field
 					is NOT protected by the buffer
 					pool mutex */
+	ulint		n_pages_awe_remapped; /* if AWE is enabled, the
+					number of remaps of blocks to
+					buffer frames */
 	ulint		n_page_gets_old;/* n_page_gets when buf_print was
 					last time called: used to calculate
 					hit rate */
@@ -837,6 +917,7 @@ struct buf_pool_struct{
 	ulint		n_pages_written_old;/* number write operations */
 	ulint		n_pages_created_old;/* number of pages created in
 					the pool with no read */
+	ulint		n_pages_awe_remapped_old;
 	/* 2. Page flushing algorithm fields */
 
 	UT_LIST_BASE_NODE_T(buf_block_t) flush_list;
@@ -869,7 +950,10 @@ struct buf_pool_struct{
 	/* 3. LRU replacement algorithm fields */
 
 	UT_LIST_BASE_NODE_T(buf_block_t) free;
-					/* base node of the free block list */
+					/* base node of the free block list;
+					in the case of AWE, at the start are
+					always free blocks for which the
+					physical memory is mapped to a frame */
 	UT_LIST_BASE_NODE_T(buf_block_t) LRU;
 					/* base node of the LRU list */
 	buf_block_t*	LRU_old; 	/* pointer to the about 3/8 oldest
@@ -881,6 +965,12 @@ struct buf_pool_struct{
 					see buf0lru.c for the restrictions
 					on this value; not defined if
 					LRU_old == NULL */
+	UT_LIST_BASE_NODE_T(buf_block_t) awe_LRU_free_mapped;
+					/* list of those blocks which are
+					in the LRU list or the free list, and
+					where the page is mapped to a frame;
+					thus, frames allocated, e.g., to the
+					locki table, are not in this list */
 };
 
 /* States of a control block */
diff --git a/innobase/include/buf0buf.ic b/innobase/include/buf0buf.ic
index 5a4c56b0c30..cb54785128f 100644
--- a/innobase/include/buf0buf.ic
+++ b/innobase/include/buf0buf.ic
@@ -28,7 +28,6 @@ buf_block_peek_if_too_old(
 {
 	if (buf_pool->freed_page_clock >= block->freed_page_clock 
 				+ 1 + (buf_pool->curr_size / 1024)) {
-		
 		return(TRUE);
 	}
 
@@ -36,25 +35,27 @@ buf_block_peek_if_too_old(
 }
 
 /*************************************************************************
-Gets the current size of buffer buf_pool in bytes. */
+Gets the current size of buffer buf_pool in bytes. In the case of AWE, the
+size of AWE window (= the frames). */
 UNIV_INLINE
 ulint
 buf_pool_get_curr_size(void)
 /*========================*/
 			/* out: size in bytes */
 {
-	return((buf_pool->curr_size) * UNIV_PAGE_SIZE);
+	return((buf_pool->n_frames) * UNIV_PAGE_SIZE);
 }	
 
 /*************************************************************************
-Gets the maximum size of buffer buf_pool in bytes. */
+Gets the maximum size of buffer buf_pool in bytes. In the case of AWE, the
+size of AWE window (= the frames). */
 UNIV_INLINE
 ulint
 buf_pool_get_max_size(void)
 /*=======================*/
 			/* out: size in bytes */
 {
-	return((buf_pool->max_size) * UNIV_PAGE_SIZE);
+	return((buf_pool->n_frames) * UNIV_PAGE_SIZE);
 }	
 
 /***********************************************************************
@@ -169,7 +170,7 @@ buf_block_get_space(
 	ut_ad(block);
 	ut_ad(block >= buf_pool->blocks);
 	ut_ad(block < buf_pool->blocks + buf_pool->max_size);
-	ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
+	ut_a(block->state == BUF_BLOCK_FILE_PAGE);
 	ut_ad(block->buf_fix_count > 0);
 	
 	return(block->space);
@@ -187,7 +188,7 @@ buf_block_get_page_no(
 	ut_ad(block);
 	ut_ad(block >= buf_pool->blocks);
 	ut_ad(block < buf_pool->blocks + buf_pool->max_size);
-	ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
+	ut_a(block->state == BUF_BLOCK_FILE_PAGE);
 	ut_ad(block->buf_fix_count > 0);
 	
 	return(block->offset);
@@ -209,54 +210,24 @@ buf_block_align(
 
 	frame_zero = buf_pool->frame_zero;
 
-	ut_ad((ulint)ptr >= (ulint)frame_zero);
-
-	block = buf_pool_get_nth_block(buf_pool, ((ulint)(ptr - frame_zero))
-						>> UNIV_PAGE_SIZE_SHIFT);
-	if (block < buf_pool->blocks
-	    || block >= buf_pool->blocks + buf_pool->max_size) {
+	if ((ulint)ptr < (ulint)frame_zero
+	    || (ulint)ptr > (ulint)(buf_pool->high_end)) {
 
+		ut_print_timestamp(stderr);	
 		fprintf(stderr,
-"InnoDB: Error: trying to access a stray pointer %lx\n"
-"InnoDB: buf pool start is at %lx, number of pages %lu\n", (ulint)ptr,
-			(ulint)frame_zero, buf_pool->max_size);
+"  InnoDB: Error: trying to access a stray pointer %lx\n"
+"InnoDB: buf pool start is at %lx, end at %lx\n"
+"InnoDB: Probable reason is database corruption or memory\n"
+"InnoDB: corruption. If this happens in an InnoDB database recovery,\n"
+"InnoDB: you can look from section 6.1 at http://www.innodb.com/ibman.html\n"
+"InnoDB: how to force recovery.\n",
+ 			(long)ptr, (long)frame_zero,
+					(long)(buf_pool->high_end));
 		ut_error;
 	}
-
-	return(block);
-}	
-
-/***********************************************************************
-Gets the block to whose frame the pointer is pointing to. Does not
-require a file page to be bufferfixed. */
-UNIV_INLINE
-buf_block_t*
-buf_block_align_low(
-/*================*/
-			/* out: pointer to block */
-	byte*	ptr)	/* in: pointer to a frame */
-{
-	buf_block_t*	block;
-	buf_frame_t*	frame_zero;
-
-	ut_ad(ptr);
-
-	frame_zero = buf_pool->frame_zero;
-
-	ut_ad((ulint)ptr >= (ulint)frame_zero);
-
-	block = buf_pool_get_nth_block(buf_pool, ((ulint)(ptr - frame_zero))
-						>> UNIV_PAGE_SIZE_SHIFT);
-	if (block < buf_pool->blocks
-	    || block >= buf_pool->blocks + buf_pool->max_size) {
-
-		fprintf(stderr,
-"InnoDB: Error: trying to access a stray pointer %lx\n"
-"InnoDB: buf pool start is at %lx, number of pages %lu\n", (ulint)ptr,
-			(ulint)frame_zero, buf_pool->max_size);
-		ut_error;
-	}
-
+	
+	block = *(buf_pool->blocks_of_frames + (((ulint)(ptr - frame_zero))
+						>> UNIV_PAGE_SIZE_SHIFT));
 	return(block);
 }	
 
@@ -266,7 +237,7 @@ UNIV_INLINE
 buf_frame_t*
 buf_frame_align(
 /*============*/
-			/* out: pointer to block */
+			/* out: pointer to frame */
 	byte*	ptr)	/* in: pointer to a frame */
 {
 	buf_frame_t*	frame;
@@ -275,14 +246,19 @@ buf_frame_align(
 
 	frame = ut_align_down(ptr, UNIV_PAGE_SIZE);
 
-	if (((ulint)frame
-		< (ulint)(buf_pool->frame_zero))
-	    || ((ulint)frame > (ulint)(buf_pool_get_nth_block(buf_pool,
-					buf_pool->max_size - 1)->frame))) {
+	if (((ulint)frame < (ulint)(buf_pool->frame_zero))
+	    || (ulint)frame >= (ulint)(buf_pool->high_end)) {
+
+		ut_print_timestamp(stderr);	
 		fprintf(stderr,
-"InnoDB: Error: trying to access a stray pointer %lx\n"
-"InnoDB: buf pool start is at %lx, number of pages %lu\n", (ulint)ptr,
-			(ulint)(buf_pool->frame_zero), buf_pool->max_size);
+"  InnoDB: Error: trying to access a stray pointer %lx\n"
+"InnoDB: buf pool start is at %lx, end at %lx\n"
+"InnoDB: Probable reason is database corruption or memory\n"
+"InnoDB: corruption. If this happens in an InnoDB database recovery,\n"
+"InnoDB: you can look from section 6.1 at http://www.innodb.com/ibman.html\n"
+"InnoDB: how to force recovery.\n",
+ 			(long)ptr, (long)(buf_pool->frame_zero),
+					(long)(buf_pool->high_end));
 		ut_error;
 	}
 
@@ -471,8 +447,29 @@ buf_frame_modify_clock_inc(
 
 	ut_ad(frame);
 
-	block = buf_block_align_low(frame);
+	block = buf_block_align(frame);
+
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad((mutex_own(&(buf_pool->mutex)) && (block->buf_fix_count == 0))
+	      || rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE));
+#endif /*UNIV_SYNC_DEBUG */
+
+	UT_DULINT_INC(block->modify_clock);
 
+	return(block->modify_clock);
+}
+
+/************************************************************************
+Increments the modify clock of a frame by 1. The caller must (1) own the
+buf_pool mutex and block bufferfix count has to be zero, (2) or own an x-lock
+on the block. */
+UNIV_INLINE
+dulint
+buf_block_modify_clock_inc(
+/*=======================*/
+				/* out: new value */
+	buf_block_t*	block)	/* in: block */
+{
 #ifdef UNIV_SYNC_DEBUG
 	ut_ad((mutex_own(&(buf_pool->mutex)) && (block->buf_fix_count == 0))
 	      || rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE));
@@ -515,15 +512,16 @@ void
 buf_block_buf_fix_inc_debug(
 /*========================*/
 	buf_block_t*	block,	/* in: block to bufferfix */
-	char*		file,	/* in: file name */
-	ulint		line)	/* in: line */
+	char*		file __attribute__ ((unused)),	/* in: file name */
+	ulint		line __attribute__ ((unused)))	/* in: line */
 {
+#ifdef UNIV_SYNC_DEBUG	
 	ibool	ret;
-	
+
 	ret = rw_lock_s_lock_func_nowait(&(block->debug_latch), file, line);
 
 	ut_ad(ret == TRUE);
-
+#endif
 	block->buf_fix_count++;
 }
 #else /* UNIV_SYNC_DEBUG */
@@ -562,6 +560,8 @@ buf_page_hash_get(
 
 	HASH_SEARCH(hash, buf_pool->page_hash, fold, block,
 			(block->space == space) && (block->offset == offset));
+	ut_a(block == NULL || block->state == BUF_BLOCK_FILE_PAGE);
+	
 	return(block);
 }
 
@@ -629,8 +629,8 @@ buf_page_release(
 
 	mutex_enter_fast(&(buf_pool->mutex));
 
-	ut_ad(block->state == BUF_BLOCK_FILE_PAGE);
-	ut_ad(block->buf_fix_count > 0);
+	ut_a(block->state == BUF_BLOCK_FILE_PAGE);
+	ut_a(block->buf_fix_count > 0);
 
 	if (rw_latch == RW_X_LATCH && mtr->modifications) {
 
diff --git a/innobase/include/buf0lru.h b/innobase/include/buf0lru.h
index eb9d43d3b93..69a376f8cab 100644
--- a/innobase/include/buf0lru.h
+++ b/innobase/include/buf0lru.h
@@ -37,6 +37,16 @@ These are low-level functions
 #define BUF_LRU_FREE_SEARCH_LEN		(5 + 2 * BUF_READ_AHEAD_AREA)
 
 /**********************************************************************
+Invalidates all pages belonging to a given tablespace when we are deleting
+the data file(s) of that tablespace. A PROBLEM: if readahead is being started,
+what guarantees that it will not try to read in pages after this operation has
+completed? */
+
+void
+buf_LRU_invalidate_tablespace(
+/*==========================*/
+	ulint	id);	/* in: space id */
+/**********************************************************************
 Gets the minimum LRU_position field for the blocks in an initial segment
 (determined by BUF_LRU_INITIAL_RATIO) of the LRU list. The limit is not
 guaranteed to be precise, because the ulint_clock may wrap around. */
@@ -67,7 +77,9 @@ LRU list to the free list. */
 buf_block_t*
 buf_LRU_get_free_block(void);
 /*=========================*/
-				/* out: the free control block */
+				/* out: the free control block; also if AWE is
+				used, it is guaranteed that the block has its
+				page mapped to a frame when we return */
 /**********************************************************************
 Puts a block back to the free list. */
 
diff --git a/innobase/include/buf0rea.h b/innobase/include/buf0rea.h
index aed965a6b21..380a42f4b80 100644
--- a/innobase/include/buf0rea.h
+++ b/innobase/include/buf0rea.h
@@ -59,7 +59,7 @@ buf_read_ahead_linear(
 			must want access to this page (see NOTE 3 above) */
 /************************************************************************
 Issues read requests for pages which the ibuf module wants to read in, in
-order to contract insert buffer trees. Technically, this function is like
+order to contract the insert buffer tree. Technically, this function is like
 a read-ahead function. */
 
 void
@@ -68,9 +68,14 @@ buf_read_ibuf_merge_pages(
 	ibool	sync,		/* in: TRUE if the caller wants this function
 				to wait for the highest address page to get
 				read in, before this function returns */
-	ulint	space,		/* in: space id */
-	ulint*	page_nos,	/* in: array of page numbers to read, with
-				the highest page number last in the array */
+	ulint*	space_ids,	/* in: array of space ids */
+	ib_longlong* space_versions,/* in: the spaces must have this version
+				number (timestamp), otherwise we discard the
+				read; we use this to cancel reads if
+				DISCARD + IMPORT may have changed the
+				tablespace size */
+	ulint*	page_nos,	/* in: array of page numbers to read, with the
+				highest page number the last in the array */
 	ulint	n_stored);	/* in: number of page numbers in the array */
 /************************************************************************
 Issues read requests for pages which recovery wants to read in. */
diff --git a/innobase/include/data0type.h b/innobase/include/data0type.h
index 4da686bf2e1..2b27ead5fac 100644
--- a/innobase/include/data0type.h
+++ b/innobase/include/data0type.h
@@ -11,6 +11,9 @@ Created 1/16/1996 Heikki Tuuri
 
 #include "univ.i"
 
+extern ulint	data_mysql_default_charset_coll;
+extern ulint	data_mysql_latin1_swedish_charset_coll;
+
 /* SQL data type struct */
 typedef struct dtype_struct		dtype_t;
 
@@ -18,31 +21,79 @@ typedef struct dtype_struct		dtype_t;
 data type */
 extern dtype_t* 	dtype_binary;
 
-/* Data main types of SQL data */
-#define	DATA_VARCHAR	1	/* character varying */
-#define DATA_CHAR	2	/* fixed length character */
+/*-------------------------------------------*/
+/* The 'MAIN TYPE' of a column */
+#define	DATA_VARCHAR	1	/* character varying of the
+				latin1_swedish_ci charset-collation */
+#define DATA_CHAR	2	/* fixed length character of the
+				latin1_swedish_ci charset-collation */
 #define DATA_FIXBINARY	3	/* binary string of fixed length */
 #define DATA_BINARY	4	/* binary string */
-#define DATA_BLOB	5	/* binary large object, or a TEXT type; if
-				prtype & DATA_NONLATIN1 != 0 the data must
-				be compared by MySQL as a whole field; if
-				prtype & DATA_BINARY_TYPE == 0, then this is
-				actually a TEXT column */
+#define DATA_BLOB	5	/* binary large object, or a TEXT type;
+				if prtype & DATA_BINARY_TYPE == 0, then this is
+				actually a TEXT column (or a BLOB created
+				with < 4.0.14) */
 #define	DATA_INT	6	/* integer: can be any size 1 - 8 bytes */
 #define	DATA_SYS_CHILD	7	/* address of the child page in node pointer */
 #define	DATA_SYS	8	/* system column */
+
 /* Data types >= DATA_FLOAT must be compared using the whole field, not as
 binary strings */
+
 #define DATA_FLOAT	9
 #define DATA_DOUBLE	10
 #define DATA_DECIMAL	11	/* decimal number stored as an ASCII string */
-#define	DATA_VARMYSQL	12	/* non-latin1 varying length char */
-#define	DATA_MYSQL	13	/* non-latin1 fixed length char */
+#define	DATA_VARMYSQL	12	/* any charset varying length char */
+#define	DATA_MYSQL	13	/* any charset fixed length char */
+				/* NOTE that 4.1.1 used DATA_MYSQL and
+				DATA_VARMYSQL for all character sets, and the
+				charset-collation for tables created with it
+				can also be latin1_swedish_ci */
 #define DATA_MTYPE_MAX	63	/* dtype_store_for_order_and_null_size()
 				requires the values are <= 63 */
 /*-------------------------------------------*/
-/* In the lowest byte in the precise type we store the MySQL type code
-(not applicable for system columns). */
+/* The 'PRECISE TYPE' of a column */
+/*
+Tables created by a MySQL user have the following convention:
+
+- In the least significant byte in the precise type we store the MySQL type
+code (not applicable for system columns).
+
+- In the second least significant byte we OR flags DATA_NOT_NULL,
+DATA_UNSIGNED, DATA_BINARY_TYPE.
+
+- In the third least significant byte of the precise type of string types we
+store the MySQL charset-collation code. In DATA_BLOB columns created with
+< 4.0.14 we do not actually know if it is a BLOB or a TEXT column. Since there
+are no indexes on prefixes of BLOB or TEXT columns in < 4.0.14, this is no
+problem, though.
+
+Note that versions < 4.1.2 or < 5.0.1 did not store the charset code to the
+precise type, since the charset was always the default charset of the MySQL
+installation. If the stored charset code is 0 in the system table SYS_COLUMNS
+of InnoDB, that means that the default charset of this MySQL installation
+should be used.
+
+When loading a table definition from the system tables to the InnoDB data
+dictionary cache in main memory, InnoDB versions >= 4.1.2 and >= 5.0.1 check
+if the stored charset-collation is 0, and if that is the case and the type is
+a non-binary string, replace that 0 by the default charset-collation code of
+this MySQL installation. In short, in old tables, the charset-collation code
+in the system tables on disk can be 0, but in in-memory data structures
+(dtype_t), the charset-collation code is always != 0 for non-binary string
+types.
+
+In new tables, in binary string types, the charset-collation code is the
+MySQL code for the 'binary charset', that is, != 0.
+
+For binary string types and for DATA_CHAR, DATA_VARCHAR, and for those
+DATA_BLOB which are binary or have the charset-collation latin1_swedish_ci,
+InnoDB performs all comparisons internally, without resorting to the MySQL
+comparison functions. This is to save CPU time.
+
+InnoDB's own internal system tables have different precise types for their
+columns, and for them the precise type is usually not used at all.
+*/
 
 #define DATA_ENGLISH    4       /* English language character string: this
 				is a relic from pre-MySQL time and only used
@@ -69,7 +120,7 @@ be less than 256 */
 #define DATA_MIX_ID_LEN	9	/* maximum stored length for mix id (in a
 				compressed dulint form) */
 #define	DATA_N_SYS_COLS 4 	/* number of system columns defined above */
-/*-------------------------------------------*/
+
 /* Flags ORed to the precise data type */
 #define DATA_NOT_NULL	256	/* this is ORed to the precise type when
 				the column is declared as NOT NULL */
@@ -79,18 +130,53 @@ be less than 256 */
 				string, this is ORed to the precise type:
 				this only holds for tables created with
 				>= MySQL-4.0.14 */
-#define	DATA_NONLATIN1 2048	/* if the data type is a DATA_BLOB (actually
-				TEXT) of a non-latin1 type, this is ORed to
-				the precise type: this only holds for tables
-				created with >= MySQL-4.0.14 */
+/* #define	DATA_NONLATIN1	2048 This is a relic from < 4.1.2 and < 5.0.1.
+				In earlier versions this was set for some
+				BLOB columns.
+*/
 /*-------------------------------------------*/
 
 /* This many bytes we need to store the type information affecting the
 alphabetical order for a single field and decide the storage size of an
 SQL null*/
-#define DATA_ORDER_NULL_TYPE_BUF_SIZE	4
+#define DATA_ORDER_NULL_TYPE_BUF_SIZE		4
+/* In the >= 4.1.x storage format we add 2 bytes more so that we can also
+store the charset-collation number; one byte is left unused, though */
+#define DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE	6
 
 /*************************************************************************
+Checks if a data main type is a string type. Also a BLOB is considered a
+string type. */
+
+ibool
+dtype_is_string_type(
+/*=================*/
+			/* out: TRUE if string type */
+	ulint	mtype);	/* in: InnoDB main data type code: DATA_CHAR, ... */
+/*************************************************************************
+Checks if a type is a binary string type. Note that for tables created with
+< 4.0.14, we do not know if a DATA_BLOB column is a BLOB or a TEXT column. For
+those DATA_BLOB columns this function currently returns FALSE. */
+
+ibool
+dtype_is_binary_string_type(
+/*========================*/
+			/* out: TRUE if binary string type */
+	ulint	mtype,	/* in: main data type */
+	ulint	prtype);/* in: precise type */
+/*************************************************************************
+Checks if a type is a non-binary string type. That is, dtype_is_string_type is
+TRUE and dtype_is_binary_string_type is FALSE. Note that for tables created
+with < 4.0.14, we do not know if a DATA_BLOB column is a BLOB or a TEXT column.
+For those DATA_BLOB columns this function currently returns TRUE. */
+
+ibool
+dtype_is_non_binary_string_type(
+/*============================*/
+			/* out: TRUE if non-binary string type */
+	ulint	mtype,	/* in: main data type */
+	ulint	prtype);/* in: precise type */
+/*************************************************************************
 Sets a data type structure. */
 UNIV_INLINE
 void
@@ -124,6 +210,23 @@ dtype_get_prtype(
 /*=============*/
 	dtype_t*	type);
 /*************************************************************************
+Gets the MySQL charset-collation code for MySQL string types. */
+UNIV_INLINE
+ulint
+dtype_get_charset_coll(
+/*===================*/
+	ulint	prtype);/* in: precise data type */
+/*************************************************************************
+Forms a precise type from the < 4.1.2 format precise type plus the
+charset-collation code. */
+
+ulint
+dtype_form_prtype(
+/*==============*/
+	ulint	old_prtype,	/* in: the MySQL type code and the flags
+				DATA_BINARY_TYPE etc. */
+	ulint	charset_coll);	/* in: MySQL charset-collation code */
+/*************************************************************************
 Gets the type length. */
 UNIV_INLINE
 ulint
@@ -172,24 +275,36 @@ dtype_is_fixed_size(
 				/* out: TRUE if fixed size */
 	dtype_t*	type);	/* in: type */
 /**************************************************************************
+Reads to a type the stored information which determines its alphabetical
+ordering and the storage size of an SQL NULL value. */
+UNIV_INLINE
+void
+dtype_read_for_order_and_null_size(
+/*===============================*/
+	dtype_t*	type,	/* in: type struct */
+	byte*		buf);	/* in: buffer for the stored order info */
+/**************************************************************************
 Stores for a type the information which determines its alphabetical ordering
-and the storage size of an SQL NULL value. */
+and the storage size of an SQL NULL value. This is the >= 4.1.x storage
+format. */
 UNIV_INLINE
 void
-dtype_store_for_order_and_null_size(
-/*================================*/
-	byte*		buf,	/* in: buffer for DATA_ORDER_NULL_TYPE_BUF_SIZE
+dtype_new_store_for_order_and_null_size(
+/*====================================*/
+	byte*		buf,	/* in: buffer for
+				DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
 				bytes where we store the info */
 	dtype_t*	type);	/* in: type struct */
 /**************************************************************************
 Reads to a type the stored information which determines its alphabetical
-ordering and the storage size of an SQL NULL value. */
+ordering and the storage size of an SQL NULL value. This is the 4.1.x storage
+format. */
 UNIV_INLINE
 void
-dtype_read_for_order_and_null_size(
-/*===============================*/
+dtype_new_read_for_order_and_null_size(
+/*===================================*/
 	dtype_t*	type,	/* in: type struct */
-	byte*		buf);	/* in: buffer for the stored order info */
+	byte*		buf);	/* in: buffer for stored type order info */
 /*************************************************************************
 Validates a data type structure. */
 
@@ -212,7 +327,7 @@ struct dtype_struct{
 	ulint	mtype;		/* main data type */
 	ulint	prtype;		/* precise type; MySQL data type */
 
-	/* remaining two fields do not affect alphabetical ordering: */
+	/* the remaining two fields do not affect alphabetical ordering: */
 
 	ulint	len;		/* length */
 	ulint	prec;		/* precision */
diff --git a/innobase/include/data0type.ic b/innobase/include/data0type.ic
index dbc5b6615f6..946b646ffbf 100644
--- a/innobase/include/data0type.ic
+++ b/innobase/include/data0type.ic
@@ -72,6 +72,17 @@ dtype_get_prtype(
 }
 
 /*************************************************************************
+Gets the MySQL charset-collation code for MySQL string types. */
+UNIV_INLINE
+ulint
+dtype_get_charset_coll(
+/*===================*/
+	ulint	prtype)	/* in: precise data type */
+{
+	return((prtype >> 16) & 0xFFUL);
+}
+
+/*************************************************************************
 Gets the type length. */
 UNIV_INLINE
 ulint
@@ -127,35 +138,44 @@ dtype_get_pad_char(
 
 /**************************************************************************
 Stores for a type the information which determines its alphabetical ordering
-and the storage size of an SQL NULL value. */
+and the storage size of an SQL NULL value. This is the >= 4.1.x storage
+format. */
 UNIV_INLINE
 void
-dtype_store_for_order_and_null_size(
-/*================================*/
-	byte*		buf,	/* in: buffer for DATA_ORDER_NULL_TYPE_BUF_SIZE
+dtype_new_store_for_order_and_null_size(
+/*====================================*/
+	byte*		buf,	/* in: buffer for
+				DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
 				bytes where we store the info */
 	dtype_t*	type)	/* in: type struct */
 {
-	ut_ad(4 == DATA_ORDER_NULL_TYPE_BUF_SIZE);
+	ut_ad(6 == DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE);
 	
-	buf[0] = (byte)(type->mtype & 0xFF);
+	buf[0] = (byte)(type->mtype & 0xFFUL);
 
 	if (type->prtype & DATA_BINARY_TYPE) {
 		buf[0] = buf[0] | 128;
 	}
 
-	if (type->prtype & DATA_NONLATIN1) {
-		buf[0] = buf[0] | 64;
-	}
+	/* In versions < 4.1.2 we had: 	if (type->prtype & DATA_NONLATIN1) {
+						buf[0] = buf[0] | 64;
+					}
+	*/
+
+	buf[1] = (byte)(type->prtype & 0xFFUL);
 
-	buf[1] = (byte)(type->prtype & 0xFF);
+	mach_write_to_2(buf + 2, type->len & 0xFFFFUL);
 
-	mach_write_to_2(buf + 2, type->len & 0xFFFF);
+	mach_write_to_2(buf + 4, dtype_get_charset_coll(type->prtype));
+
+	/* Note that the second last byte is left unused, because the
+	charset-collation code is always < 256 */
 }
 
 /**************************************************************************
 Reads to a type the stored information which determines its alphabetical
-ordering and the storage size of an SQL NULL value. */
+ordering and the storage size of an SQL NULL value. This is the < 4.1.x
+storage format. */
 UNIV_INLINE
 void
 dtype_read_for_order_and_null_size(
@@ -172,12 +192,56 @@ dtype_read_for_order_and_null_size(
 	        type->prtype = type->prtype | DATA_BINARY_TYPE;
 	}
 
-	if (buf[0] & 64) {
-	        type->prtype = type->prtype | DATA_NONLATIN1;
+	type->len = mach_read_from_2(buf + 2);
+		
+	type->prtype = dtype_form_prtype(type->prtype,
+					data_mysql_default_charset_coll);
+}	
+
+/**************************************************************************
+Reads to a type the stored information which determines its alphabetical
+ordering and the storage size of an SQL NULL value. This is the >= 4.1.x
+storage format. */
+UNIV_INLINE
+void
+dtype_new_read_for_order_and_null_size(
+/*===================================*/
+	dtype_t*	type,	/* in: type struct */
+	byte*		buf)	/* in: buffer for stored type order info */
+{
+	ulint	charset_coll;
+
+	ut_ad(6 == DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE);
+	
+	type->mtype = buf[0] & 63;
+	type->prtype = buf[1];
+
+	if (buf[0] & 128) {
+	        type->prtype = type->prtype | DATA_BINARY_TYPE;
 	}
 
 	type->len = mach_read_from_2(buf + 2);
-}	
+
+	mach_read_from_2(buf + 4);
+
+	charset_coll = mach_read_from_2(buf + 4);
+
+	if (dtype_is_string_type(type->mtype)) {
+		ut_a(charset_coll < 256);
+
+		if (charset_coll == 0) {
+			/* This insert buffer record was inserted with MySQL
+			version < 4.1.2, and the charset-collation code was not
+			explicitly stored to dtype->prtype at that time. It
+			must be the default charset-collation of this MySQL
+			installation. */
+
+			charset_coll = data_mysql_default_charset_coll;
+		}
+		
+		type->prtype = dtype_form_prtype(type->prtype, charset_coll);
+	}						
+}
 
 /***************************************************************************
 Returns the size of a fixed size data type, 0 if not a fixed size type. */
diff --git a/innobase/include/db0err.h b/innobase/include/db0err.h
index 854b9794c00..be7667bfd0c 100644
--- a/innobase/include/db0err.h
+++ b/innobase/include/db0err.h
@@ -48,6 +48,11 @@ Created 5/24/1996 Heikki Tuuri
 					from a table failed */
 #define DB_NO_SAVEPOINT		42	/* no savepoint exists with the given
 					name */
+#define	DB_TABLESPACE_ALREADY_EXISTS 43 /* we cannot create a new single-table
+				        tablespace because a file of the same
+					name already exists */
+#define DB_TABLESPACE_DELETED	44	/* tablespace does not exist or is
+					being dropped right now */
 					
 /* The following are partial failure codes */
 #define DB_FAIL 		1000
diff --git a/innobase/include/dict0boot.h b/innobase/include/dict0boot.h
index cb631be7e35..35eff5af29a 100644
--- a/innobase/include/dict0boot.h
+++ b/innobase/include/dict0boot.h
@@ -93,7 +93,7 @@ dict_create(void);
 					indexes; ibuf tables and indexes are
 					assigned as the id the number
 					DICT_IBUF_ID_MIN plus the space id */
-#define DICT_IBUF_ID_MIN	ut_dulint_create(0xFFFFFFFF, 0)
+#define DICT_IBUF_ID_MIN	ut_dulint_create(0xFFFFFFFFUL, 0)
 					
 /* The offset of the dictionary header on the page */
 #define	DICT_HDR		FSEG_PAGE_DATA
diff --git a/innobase/include/dict0dict.h b/innobase/include/dict0dict.h
index be5d3b5b465..534c9e380b8 100644
--- a/innobase/include/dict0dict.h
+++ b/innobase/include/dict0dict.h
@@ -59,6 +59,16 @@ Inits the data dictionary module. */
 void
 dict_init(void);
 /*===========*/
+/************************************************************************
+Gets the space id of every table of the data dictionary and makes a linear
+list and a hash table of them to the data dictionary cache. This function
+can be called at database startup if we did not need to do a crash recovery.
+In crash recovery we must scan the space id's from the .ibd files in MySQL
+database directories. */
+
+void
+dict_load_space_id_list(void);
+/*=========================*/
 /**************************************************************************
 Returns a stored procedure object and memoryfixes it. */
 UNIV_INLINE
@@ -195,6 +205,15 @@ dict_table_rename_in_cache(
 					to preserve the original table name
 					in constraints which reference it */
 /**************************************************************************
+Change the id of a table object in the dictionary cache. This is used in
+DISCARD TABLESPACE. */
+
+void
+dict_table_change_id_in_cache(
+/*==========================*/
+	dict_table_t*	table,	/* in: table object already in cache */
+	dulint		new_id);/* in: new id to set */
+/**************************************************************************
 Adds a foreign key constraint object to the dictionary cache. May free
 the object if there already is an object with the same identifier in.
 At least one of foreign table or referenced table must already be in
@@ -744,7 +763,8 @@ dict_tree_build_node_ptr(
 /*=====================*/
 				/* out, own: node pointer */
 	dict_tree_t*	tree,	/* in: index tree */
-	rec_t*		rec,	/* in: record for which to build node pointer */
+	rec_t*		rec,	/* in: record for which to build node
+				pointer */
 	ulint		page_no,/* in: page number to put in node pointer */
 	mem_heap_t*	heap,	/* in: memory heap where pointer created */
 	ulint           level);  /* in: level of rec in tree: 0 means leaf
@@ -912,7 +932,7 @@ struct dict_sys_struct{
 	dict_table_t*	sys_columns;	/* SYS_COLUMNS table */
 	dict_table_t*	sys_indexes;	/* SYS_INDEXES table */
 	dict_table_t*	sys_fields;	/* SYS_FIELDS table */
-};					
+};
 
 #ifndef UNIV_NONINL
 #include "dict0dict.ic"
diff --git a/innobase/include/dict0load.h b/innobase/include/dict0load.h
index b60996a8dab..f7168a0f45f 100644
--- a/innobase/include/dict0load.h
+++ b/innobase/include/dict0load.h
@@ -15,6 +15,17 @@ Created 4/24/1996 Heikki Tuuri
 #include "ut0byte.h"
 
 /************************************************************************
+In a crash recovery we already have all the tablespace objects created.
+This function compares the space id information in the InnoDB data dictionary
+to what we already read with fil_load_single_table_tablespaces().
+In a normal startup we just scan the biggest space id, and store it to
+fil_system. */
+
+void
+dict_check_tablespaces_or_store_max_id(
+/*===================================*/
+	ibool	in_crash_recovery);	/* in: are we doing a crash recovery */
+/************************************************************************
 Finds the first table name in the given database. */
 
 char*
@@ -32,7 +43,10 @@ a foreign key references columns in this table. */
 dict_table_t*
 dict_load_table(
 /*============*/
-			/* out: table, NULL if does not exist */
+			/* out: table, NULL if does not exist; if the table is
+			stored in an .ibd file, but the file does not exist,
+			then we set the ibd_file_missing flag TRUE in the table
+			object we return */
 	char*	name);	/* in: table name */
 /***************************************************************************
 Loads a table object based on the table id. */
diff --git a/innobase/include/dict0mem.h b/innobase/include/dict0mem.h
index 1930825f601..23753df4079 100644
--- a/innobase/include/dict0mem.h
+++ b/innobase/include/dict0mem.h
@@ -310,6 +310,13 @@ struct dict_table_struct{
 	char*		name;	/* table name */
 	ulint		space;	/* space where the clustered index of the
 				table is placed */
+	ibool		ibd_file_missing;/* TRUE if this is in a single-table
+				tablespace and the .ibd file is missing; then
+				we must return in ha_innodb.cc an error if the
+				user tries to query such an orphaned table */
+	ibool		tablespace_discarded;/* this flag is set TRUE when the
+				user calls DISCARD TABLESPACE on this table,
+				and reset to FALSE in IMPORT TABLESPACE */
 	hash_node_t	name_hash; /* hash chain node */
 	hash_node_t	id_hash; /* hash chain node */
 	ulint		n_def;	/* number of columns defined so far */
diff --git a/innobase/include/dyn0dyn.ic b/innobase/include/dyn0dyn.ic
index 787615cae09..b6c4808398b 100644
--- a/innobase/include/dyn0dyn.ic
+++ b/innobase/include/dyn0dyn.ic
@@ -7,7 +7,7 @@ Created 2/5/1996 Heikki Tuuri
 *******************************************************/
 
 #define DYN_BLOCK_MAGIC_N	375767
-#define DYN_BLOCK_FULL_FLAG	0x1000000
+#define DYN_BLOCK_FULL_FLAG	0x1000000UL
 
 /****************************************************************
 Adds a new block to a dyn array. */
diff --git a/innobase/include/fil0fil.h b/innobase/include/fil0fil.h
index ad3149f0b36..310336af38e 100644
--- a/innobase/include/fil0fil.h
+++ b/innobase/include/fil0fil.h
@@ -16,6 +16,14 @@ Created 10/25/1995 Heikki Tuuri
 #include "ut0byte.h"
 #include "os0file.h"
 
+/* When mysqld is run, the default directory "." is the mysqld datadir, but in
+ibbackup we must set it explicitly; the patgh must NOT contain the trailing
+'/' or '\' */
+extern char*	fil_path_to_mysql_datadir;
+
+/* Initial size of a single-table tablespace in pages */
+#define FIL_IBD_FILE_INITIAL_SIZE	4
+
 /* 'null' (undefined) page offset in the context of file spaces */
 #define	FIL_NULL	ULINT32_UNDEFINED
 
@@ -60,10 +68,8 @@ extern fil_addr_t	fil_addr_null;
 					first page in a data file: the file
 					has been flushed to disk at least up
 					to this lsn */
-#define FIL_PAGE_ARCH_LOG_NO	34	/* this is only defined for the
-					first page in a data file: the latest
-					archived log file number when the
-					flush lsn above was written */
+#define FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID  34 /* starting from 4.1.x this
+					contains the space id of the page */
 #define FIL_PAGE_DATA		38	/* start of the data on the page */
 
 /* File page trailer */
@@ -86,50 +92,51 @@ extern fil_addr_t	fil_addr_null;
 extern ulint	fil_n_pending_log_flushes;
 extern ulint	fil_n_pending_tablespace_flushes;
 
+
 /***********************************************************************
-Reserves a right to open a single file. The right must be released with
-fil_release_right_to_open. */
+Returns the version number of a tablespace, -1 if not found. */
 
-void
-fil_reserve_right_to_open(void);
-/*===========================*/
+ib_longlong
+fil_space_get_version(
+/*==================*/
+			/* out: version number, -1 if the tablespace does not
+			exist in the memory cache */
+	ulint	id);	/* in: space id */
 /***********************************************************************
-Releases a right to open a single file. */
+Returns the latch of a file space. */
 
-void
-fil_release_right_to_open(void);
-/*===========================*/
-/************************************************************************
-Returns TRUE if file address is undefined. */
-ibool
-fil_addr_is_null(
-/*=============*/
-				/* out: TRUE if undefined */
-	fil_addr_t	addr);	/* in: address */
-/********************************************************************
-Initializes the file system of this module. */
+rw_lock_t*
+fil_space_get_latch(
+/*================*/
+			/* out: latch protecting storage allocation */
+	ulint	id);	/* in: space id */
+/***********************************************************************
+Returns the type of a file space. */
 
-void
-fil_init(
-/*=====*/
-	ulint	max_n_open);	/* in: max number of open files */
-/********************************************************************
-Initializes the ibuf indexes at a database start. This can be called
-after the file space headers have been created and the dictionary system
-has been initialized. */
+ulint
+fil_space_get_type(
+/*===============*/
+			/* out: FIL_TABLESPACE or FIL_LOG */
+	ulint	id);	/* in: space id */
+/***********************************************************************
+Returns the ibuf data of a file space. */
 
-void
-fil_ibuf_init_at_db_start(void);
-/*===========================*/
+ibuf_data_t*
+fil_space_get_ibuf_data(
+/*====================*/
+			/* out: ibuf data for this space */
+	ulint	id);	/* in: space id */
 /***********************************************************************
-Creates a space object and puts it to the file system. */
+Appends a new file to the chain of files of a space. File must be closed. */
 
 void
-fil_space_create(
-/*=============*/
-	char*	name,	/* in: space name */
-	ulint	id,	/* in: space id */
-	ulint	purpose);/* in: FIL_TABLESPACE, or FIL_LOG if log */
+fil_node_create(
+/*============*/
+	char*	name,	/* in: file name (file must be closed) */
+	ulint	size,	/* in: file size in database blocks, rounded downwards
+			to an integer */
+	ulint	id,	/* in: space id where to append */
+	ibool	is_raw);/* in: TRUE if a raw device or a raw disk partition */
 /********************************************************************
 Drops files from the start of a file space, so that its size is cut by
 the amount given. */
@@ -141,48 +148,88 @@ fil_space_truncate_start(
 	ulint	trunc_len);	/* in: truncate by this much; it is an error
 				if this does not equal to the combined size of
 				some initial files in the space */
-/**************************************************************************
-Tries to extend a data file by the number of pages given. Any fractions of a
-megabyte are ignored. */
+/***********************************************************************
+Creates a space memory object and puts it to the 'fil system' hash table. If
+there is an error, prints an error message to the .err log. */
 
 ibool
-fil_extend_last_data_file(
-/*======================*/
-				/* out: TRUE if success, also if we run
-				out of disk space we may return TRUE */
-	ulint*	actual_increase,/* out: number of pages we were able to
-				extend, here the orginal size of the file and
-				the resulting size of the file are rounded
-				downwards to a full megabyte, and the
-				difference expressed in pages is returned */
-	ulint	size_increase);	/* in: try to extend this many pages */
+fil_space_create(
+/*=============*/
+			/* out: TRUE if success */
+	char*	name,	/* in: space name */
+	ulint	id,	/* in: space id */
+	ulint	purpose);/* in: FIL_TABLESPACE, or FIL_LOG if log */
 /***********************************************************************
-Frees a space object from a file system. Closes the files in the chain
-but does not delete them. */
+Frees a space object from a the tablespace memory cache. Closes the files in
+the chain but does not delete them. */
 
-void
+ibool
 fil_space_free(
 /*===========*/
+			/* out: TRUE if success */
 	ulint	id);	/* in: space id */
 /***********************************************************************
-Returns the latch of a file space. */
-
-rw_lock_t*
-fil_space_get_latch(
-/*================*/
-			/* out: latch protecting storage allocation */
-	ulint	id);	/* in: space id */
-/***********************************************************************
-Returns the type of a file space. */
+Returns the size of the space in pages. The tablespace must be cached in the
+memory cache. */
 
 ulint
-fil_space_get_type(
+fil_space_get_size(
 /*===============*/
-			/* out: FIL_TABLESPACE or FIL_LOG */
+			/* out: space size, 0 if space not found */
 	ulint	id);	/* in: space id */
+/***********************************************************************
+Checks if the pair space, page_no refers to an existing page in a tablespace
+file space. The tablespace must be cached in the memory cache. */
+
+ibool
+fil_check_adress_in_tablespace(
+/*===========================*/
+			/* out: TRUE if the address is meaningful */
+	ulint	id,	/* in: space id */
+	ulint	page_no);/* in: page number */
+/********************************************************************
+Initializes the tablespace memory cache. */
+
+void
+fil_init(
+/*=====*/
+	ulint	max_n_open);	/* in: max number of open files */
+/***********************************************************************
+Opens all log files and system tablespace data files. They stay open until the
+database server shutdown. This should be called at a server startup after the
+space objects for the log and the system tablespace have been created. The
+purpose of this operation is to make sure we never run out of file descriptors
+if we need to read from the insert buffer or to write to the log. */
+
+void
+fil_open_log_and_system_tablespace_files(void);
+/*==========================================*/
+/***********************************************************************
+Closes all open files. There must not be any pending i/o's or not flushed
+modifications in the files. */
+
+void
+fil_close_all_files(void);
+/*=====================*/
+/***********************************************************************
+Sets the max tablespace id counter if the given number is bigger than the
+previous value. */
+
+void
+fil_set_max_space_id_if_bigger(
+/*===========================*/
+	ulint	max_id);/* in: maximum known id */
+/********************************************************************
+Initializes the ibuf data structure for space 0 == the system tablespace.
+This can be called after the file space headers have been created and the
+dictionary system has been initialized. */
+
+void
+fil_ibuf_init_at_db_start(void);
+/*===========================*/
 /********************************************************************
 Writes the flushed lsn and the latest archived log number to the page
-header of the first page of each data file. */
+header of the first page of each data file in the system tablespace. */
 
 ulint
 fil_write_flushed_lsn_to_data_files(
@@ -205,48 +252,266 @@ fil_read_flushed_lsn_and_arch_log_no(
 	dulint*	max_flushed_lsn,	/* in/out: */
 	ulint*	max_arch_log_no);	/* in/out: */
 /***********************************************************************
-Returns the ibuf data of a file space. */
+Increments the count of pending insert buffer page merges, if space is not
+being deleted. */
 
-ibuf_data_t*
-fil_space_get_ibuf_data(
-/*====================*/
-			/* out: ibuf data for this space */
+ibool
+fil_inc_pending_ibuf_merges(
+/*========================*/
+			/* out: TRUE if being deleted, and ibuf merges should
+			be skipped */
+	ulint	id);	/* in: space id */
+/***********************************************************************
+Decrements the count of pending insert buffer page merges. */
+
+void
+fil_decr_pending_ibuf_merges(
+/*========================*/
+	ulint	id);	/* in: space id */
+/***********************************************************************
+Parses the body of a log record written about an .ibd file operation. That is,
+the log record part after the standard (type, space id, page no) header of the
+log record.
+
+If desired, also replays the delete or rename operation if the .ibd file
+exists and the space id in it matches. Replays the create operation if a file
+at that path does not exist yet. If the database directory for the file to be
+created does not exist, then we create the directory, too.
+
+Note that ibbackup --apply-log sets fil_path_to_mysql_datadir to point to the
+datadir that we should use in replaying the file operations. */
+
+byte*
+fil_op_log_parse_or_replay(
+/*=======================*/
+                        	/* out: end of log record, or NULL if the
+				record was not completely contained between
+				ptr and end_ptr */
+        byte*   ptr,    	/* in: buffer containing the log record body,
+				or an initial segment of it, if the record does
+				not fir completely between ptr and end_ptr */
+        byte*   end_ptr,	/* in: buffer end */
+	ulint	type,		/* in: the type of this log record */
+	ibool	do_replay,	/* in: TRUE if we want to replay the
+				operation, and not just parse the log record */
+	ulint	space_id);	/* in: if do_replay is TRUE, the space id of
+				the tablespace in question; otherwise
+				ignored */
+/***********************************************************************
+Deletes a single-table tablespace. The tablespace must be cached in the
+memory cache. */
+
+ibool
+fil_delete_tablespace(
+/*==================*/
+			/* out: TRUE if success */
+	ulint	id);	/* in: space id */
+/***********************************************************************
+Discards a single-table tablespace. The tablespace must be cached in the
+memory cache. Discarding is like deleting a tablespace, but
+1) we do not drop the table from the data dictionary;
+2) we remove all insert buffer entries for the tablespace immediately; in DROP
+TABLE they are only removed gradually in the background;
+3) when the user does IMPORT TABLESPACE, the tablespace will have the same id
+as it originally had. */
+
+ibool
+fil_discard_tablespace(
+/*===================*/
+			/* out: TRUE if success */
 	ulint	id);	/* in: space id */
 /***********************************************************************
-Returns the size of the space in pages. */
+Renames a single-table tablespace. The tablespace must be cached in the
+tablespace memory cache. */
+
+ibool
+fil_rename_tablespace(
+/*==================*/
+				/* out: TRUE if success */
+	char*	old_name,	/* in: old table name in the standard
+				databasename/tablename format of InnoDB, or
+				NULL if we do the rename based on the space
+				id only */
+	ulint	id,		/* in: space id */
+	char*	new_name);	/* in: new table name in the standard
+				databasename/tablename format of InnoDB */
+/***********************************************************************
+Creates a new single-table tablespace to a database directory of MySQL.
+Database directories are under the 'datadir' of MySQL. The datadir is the
+directory of a running mysqld program. We can refer to it by simply the
+path '.'. */
 
 ulint
-fil_space_get_size(
-/*===============*/
-			/* out: space size */
+fil_create_new_single_table_tablespace(
+/*===================================*/
+				/* out: DB_SUCCESS or error code */
+	ulint*	space_id,	/* in/out: space id; if this is != 0, then
+				this is an input parameter, otherwise
+				output */
+	char*	tablename,	/* in: the table name in the usual
+				databasename/tablename format of InnoDB */
+	ulint	size);		/* in: the initial size of the tablespace file
+				in pages, must be > 0 */
+/************************************************************************
+Tries to open a single-table tablespace and checks the space id is right in
+it. If does not succeed, prints an error message to the .err log. This
+function is used to open the tablespace when we load a table definition
+to the dictionary cache. NOTE that we assume this operation is used under the
+protection of the dictionary mutex, so that two users cannot race here. */
+
+ibool
+fil_open_single_table_tablespace(
+/*=============================*/
+			/* out: TRUE if success */
+	ulint	id,	/* in: space id */
+	char*	name);	/* in: table name in the databasename/tablename
+			format */
+/************************************************************************
+It is possible, though very improbable, that the lsn's in the tablespace to be
+imported have risen above the current system lsn, if a lengthy purge, ibuf
+merge, or rollback was performed on a backup taken with ibbackup. If that is
+the case, reset page lsn's in the file. We assume that mysqld was shut down
+after it performed these cleanup operations on the .ibd file, so that it at
+the shutdown stamped the latest lsn to the FIL_PAGE_FILE_FLUSH_LSN in the
+first page of the .ibd file, and we can determine whether we need to reset the
+lsn's just by looking at that flush lsn. */
+
+ibool
+fil_reset_too_high_lsns(
+/*====================*/
+				/* out: TRUE if success */
+	char*	name,		/* in: table name in the databasename/tablename
+				format */
+	dulint	current_lsn);	/* in: reset lsn's if the lsn stamped to
+				FIL_PAGE_FILE_FLUSH_LSN in the first page is
+				too high */
+/************************************************************************
+At the server startup, if we need crash recovery, scans the database
+directories under the MySQL datadir, looking for .ibd files. Those files are
+single-table tablespaces. We need to know the space id in each of them so that
+we know into which file we should look to check the contents of a page stored
+in the doublewrite buffer, also to know where to apply log records where the
+space id is != 0. */
+
+ulint
+fil_load_single_table_tablespaces(void);
+/*===================================*/
+			/* out: DB_SUCCESS or error number */
+/************************************************************************
+If we need crash recovery, and we have called
+fil_load_single_table_tablespaces() and dict_load_single_table_tablespaces(),
+we can call this function to print an error message of orphaned .ibd files
+for which there is not a data dictionary entry with a matching table name
+and space id. */
+
+void
+fil_print_orphaned_tablespaces(void);
+/*================================*/
+/***********************************************************************
+Returns TRUE if a single-table tablespace does not exist in the memory cache,
+or is being deleted there. */
+
+ibool
+fil_tablespace_deleted_or_being_deleted_in_mem(
+/*===========================================*/
+				/* out: TRUE if does not exist or is being\
+				deleted */
+	ulint		id,	/* in: space id */
+	ib_longlong	version);/* in: tablespace_version should be this; if
+				you pass -1 as the value of this, then this
+				parameter is ignored */
+/***********************************************************************
+Returns TRUE if a single-table tablespace exists in the memory cache. */
+
+ibool
+fil_tablespace_exists_in_mem(
+/*=========================*/
+			/* out: TRUE if exists */
 	ulint	id);	/* in: space id */
 /***********************************************************************
-Checks if the pair space, page_no refers to an existing page in a
-tablespace file space. */
+Returns TRUE if a matching tablespace exists in the InnoDB tablespace memory
+cache. Note that if we have not done a crash recovery at the database startup,
+there may be many tablespaces which are not yet in the memory cache. */
 
 ibool
-fil_check_adress_in_tablespace(
+fil_space_for_table_exists_in_mem(
+/*==============================*/
+				/* out: TRUE if a matching tablespace
+				exists in the memory cache */
+	ulint	id,		/* in: space id */
+	char*	name,		/* in: table name in the standard
+				'databasename/tablename' format */
+	ibool	mark_space,	/* in: in crash recovery, at database startup
+				we mark all spaces which have an associated
+				table in the InnoDB data dictionary, so that
+				we can print a warning about orphaned
+				tablespaces */
+	ibool	print_error_if_does_not_exist);
+				/* in: print detailed error information to
+				the .err log if a matching tablespace is
+				not found from memory */
+/**************************************************************************
+Tries to extend a data file so that it would accommodate the number of pages
+given. The tablespace must be cached in the memory cache. If the space is big
+enough already, does nothing. */
+
+ibool
+fil_extend_space_to_desired_size(
+/*=============================*/
+				/* out: TRUE if success */
+	ulint*	actual_size,	/* out: size of the space after extension;
+				if we ran out of disk space this may be lower
+				than the desired size */
+	ulint	space_id,	/* in: space id, must be != 0 */
+	ulint	size_after_extend);/* in: desired size in pages after the
+				extension; if the current space size is bigger
+				than this already, the function does nothing */
+#ifdef UNIV_HOTBACKUP
+/************************************************************************
+Extends all tablespaces to the size stored in the space header. During the
+ibbackup --apply-log phase we extended the spaces on-demand so that log records
+could be appllied, but that may have left spaces still too small compared to
+the size stored in the space header. */
+
+void
+fil_extend_tablespaces_to_stored_len(void);
+/*======================================*/
+#endif
+/***********************************************************************
+Tries to reserve free extents in a file space. */
+
+ibool
+fil_space_reserve_free_extents(
 /*===========================*/
-			/* out: TRUE if the address is meaningful */
-	ulint	id,	/* in: space id */
-	ulint	page_no);/* in: page number */
+				/* out: TRUE if succeed */
+	ulint	id,		/* in: space id */
+	ulint	n_free_now,	/* in: number of free extents now */
+	ulint	n_to_reserve);	/* in: how many one wants to reserve */
 /***********************************************************************
-Appends a new file to the chain of files of a space.
-File must be closed. */
+Releases free extents in a file space. */
 
 void
-fil_node_create(
-/*============*/
-	char*	name,	/* in: file name (file must be closed) */
-	ulint	size,	/* in: file size in database blocks, rounded downwards
-			to an integer */
-	ulint	id);	/* in: space id where to append */
+fil_space_release_free_extents(
+/*===========================*/
+	ulint	id,		/* in: space id */
+	ulint	n_reserved);	/* in: how many one reserved */
+/***********************************************************************
+Gets the number of reserved extents. If the database is silent, this number
+should be zero. */
+
+ulint
+fil_space_get_n_reserved_extents(
+/*=============================*/
+	ulint	id);		/* in: space id */
 /************************************************************************
 Reads or writes data. This operation is asynchronous (aio). */
 
-void
+ulint
 fil_io(
 /*===*/
+				/* out: DB_SUCCESS, or DB_TABLESPACE_DELETED
+				if we are trying to do i/o on a tablespace
+				which does not exist */
 	ulint	type,		/* in: OS_FILE_READ or OS_FILE_WRITE,
 				ORed to OS_FILE_LOG, if a log i/o
 				and ORed to OS_AIO_SIMULATED_WAKE_LATER
@@ -262,9 +527,9 @@ fil_io(
 	ulint	byte_offset,	/* in: remainder of offset in bytes; in
 				aio this must be divisible by the OS block
 				size */
-	ulint	len,		/* in: how many bytes to read; this must
-				not cross a file boundary; in aio this must
-				be a block size multiple */
+	ulint	len,		/* in: how many bytes to read or write; this
+				must not cross a file boundary; in aio this
+				must be a block size multiple */
 	void*	buf,		/* in/out: buffer where to store read data
 				or from where to write; in aio this must be
 				appropriately aligned */
@@ -272,12 +537,15 @@ fil_io(
 				aio used, else ignored */
 /************************************************************************
 Reads data from a space to a buffer. Remember that the possible incomplete
-blocks at the end of a file are ignored: they are not taken into account when
+blocks at the end of file are ignored: they are not taken into account when
 calculating the byte offset within a space. */
 
-void
+ulint
 fil_read(
 /*=====*/
+				/* out: DB_SUCCESS, or DB_TABLESPACE_DELETED
+				if we are trying to do i/o on a tablespace
+				which does not exist */
 	ibool	sync,		/* in: TRUE if synchronous aio is desired */
 	ulint	space_id,	/* in: space id */
 	ulint	block_offset,	/* in: offset in number of blocks */
@@ -292,12 +560,15 @@ fil_read(
 				aio used, else ignored */
 /************************************************************************
 Writes data to a space from a buffer. Remember that the possible incomplete
-blocks at the end of a file are ignored: they are not taken into account when
+blocks at the end of file are ignored: they are not taken into account when
 calculating the byte offset within a space. */
 
-void
+ulint
 fil_write(
 /*======*/
+				/* out: DB_SUCCESS, or DB_TABLESPACE_DELETED
+				if we are trying to do i/o on a tablespace
+				which does not exist */
 	ibool	sync,		/* in: TRUE if synchronous aio is desired */
 	ulint	space_id,	/* in: space id */
 	ulint	block_offset,	/* in: offset in number of blocks */
@@ -322,7 +593,8 @@ fil_aio_wait(
 	ulint	segment);	/* in: the number of the segment in the aio
 				array to wait for */ 
 /**************************************************************************
-Flushes to disk possible writes cached by the OS. */
+Flushes to disk possible writes cached by the OS. If the space does not exist
+or is being dropped, does not do anything. */
 
 void
 fil_flush(
@@ -338,13 +610,21 @@ fil_flush_file_spaces(
 /*==================*/
 	ulint	purpose);	/* in: FIL_TABLESPACE, FIL_LOG */
 /**********************************************************************
-Checks the consistency of the file system. */
+Checks the consistency of the tablespace cache. */
 
 ibool
 fil_validate(void);
 /*==============*/
 			/* out: TRUE if ok */
 /************************************************************************
+Returns TRUE if file address is undefined. */
+
+ibool
+fil_addr_is_null(
+/*=============*/
+				/* out: TRUE if undefined */
+	fil_addr_t	addr);	/* in: address */
+/************************************************************************
 Accessor functions for a file page */
 
 ulint
@@ -368,32 +648,7 @@ fil_page_get_type(
 			/* out: type; NOTE that if the type has not been
 			written to page, the return value not defined */
 	byte* 	page);	/* in: file page */
-/***********************************************************************
-Tries to reserve free extents in a file space. */
 
-ibool
-fil_space_reserve_free_extents(
-/*===========================*/
-				/* out: TRUE if succeed */
-	ulint	id,		/* in: space id */
-	ulint	n_free_now,	/* in: number of free extents now */
-	ulint	n_to_reserve);	/* in: how many one wants to reserve */
-/***********************************************************************
-Releases free extents in a file space. */
-
-void
-fil_space_release_free_extents(
-/*===========================*/
-	ulint	id,		/* in: space id */
-	ulint	n_reserved);	/* in: how many one reserved */
-/***********************************************************************
-Gets the number of reserved extents. If the database is silent, this number
-should be zero. */
-
-ulint
-fil_space_get_n_reserved_extents(
-/*=============================*/
-	ulint	id);		/* in: space id */
 
 typedef	struct fil_space_struct	fil_space_t;
 
diff --git a/innobase/include/fsp0fsp.h b/innobase/include/fsp0fsp.h
index 3494f336b1e..2fcde882df7 100644
--- a/innobase/include/fsp0fsp.h
+++ b/innobase/include/fsp0fsp.h
@@ -55,7 +55,7 @@ ulint
 fsp_header_get_free_limit(
 /*======================*/
 			/* out: free limit in megabytes */
-	ulint	space);	/* in: space id */
+	ulint	space);	/* in: space id, must be 0 */
 /**************************************************************************
 Gets the size of the tablespace from the tablespace header. If we do not
 have an auto-extending data file, this should be equal to the size of the
@@ -65,9 +65,35 @@ ulint
 fsp_header_get_tablespace_size(
 /*===========================*/
 			/* out: size in pages */
-	ulint	space);	/* in: space id */
+	ulint	space);	/* in: space id, must be 0 */
 /**************************************************************************
-Initializes the space header of a new created space. */
+Reads the file space size stored in the header page. */
+
+ulint
+fsp_get_size_low(
+/*=============*/
+			/* out: tablespace size stored in the space header */
+	page_t*	page);	/* in: header page (page 0 in the tablespace) */
+/**************************************************************************
+Reads the space id from the first page of a tablespace. */
+
+ulint
+fsp_header_get_space_id(
+/*====================*/
+                        /* out: space id, ULINT UNDEFINED if error */
+        page_t* page);   /* in: first page of a tablespace */
+/**************************************************************************
+Writes the space id to a tablespace header. This function is used past the
+buffer pool when we in fil0fil.c create a new single-table tablespace. */
+
+void
+fsp_header_write_space_id(
+/*======================*/
+	page_t*	page,		/* in: first page in the space */
+	ulint	space_id);	/* in: space id */
+/**************************************************************************
+Initializes the space header of a new created space and creates also the
+insert buffer tree root if space == 0. */
 
 void
 fsp_header_init(
@@ -117,12 +143,12 @@ fseg_create_general(
 			will belong to the created segment */
 	ulint	byte_offset, /* in: byte offset of the created segment header
 			on the page */
-	ibool	has_done_reservation, /* in: TRUE if the caller has
-			already done the reservation for the pages
-			with fsp_reserve_free_extents (at least 2 extents:
-			one for the inode and, then there other for the
-			segment) is no need to do the check for this
-			individual operation */
+	ibool	has_done_reservation, /* in: TRUE if the caller has already
+			done the reservation for the pages with
+			fsp_reserve_free_extents (at least 2 extents: one for
+			the inode and the other for the segment) then there is
+			no need to do the check for this individual
+			operation */
 	mtr_t*	mtr);	/* in: mtr */
 /**************************************************************************
 Calculates the number of pages reserved by a segment, and how many pages are
@@ -194,12 +220,21 @@ two types of allocation: when space is scarce, FSP_NORMAL allocations
 will not succeed, but the latter two allocations will succeed, if possible.
 The purpose is to avoid dead end where the database is full but the
 user cannot free any space because these freeing operations temporarily
-reserve some space. */ 
+reserve some space.
+
+Single-table tablespaces whose size is < 32 pages are a special case. In this
+function we would liberally reserve several 64 page extents for every page
+split or merge in a B-tree. But we do not want to waste disk space if the table
+only occupies < 32 pages. That is why we apply different rules in that special
+case, just ensuring that there are 3 free pages available. */
 
 ibool
 fsp_reserve_free_extents(
 /*=====================*/
 			/* out: TRUE if we were able to make the reservation */
+        ulint*  n_reserved,/* out: number of extents actually reserved; if we
+                        return TRUE and the tablespace size is < 64 pages,
+                        then this can be 0, otherwise it is n_ext */
 	ulint	space,	/* in: space id */
 	ulint	n_ext,	/* in: number of extents to reserve */
 	ulint	alloc_type,/* in: FSP_NORMAL, FSP_UNDO, or FSP_CLEANING */
@@ -337,8 +372,8 @@ pages: */
 #define FSP_FIRST_INODE_PAGE_NO		2
 #define FSP_IBUF_HEADER_PAGE_NO		3
 #define FSP_IBUF_TREE_ROOT_PAGE_NO	4
-				/* The ibuf tree root page number in each
-				tablespace; its fseg inode is on the page
+				/* The ibuf tree root page number in
+				tablespace 0; its fseg inode is on the page
 				number FSP_FIRST_INODE_PAGE_NO */
 #define FSP_TRX_SYS_PAGE_NO		5
 #define	FSP_FIRST_RSEG_PAGE_NO		6
diff --git a/innobase/include/fut0lst.ic b/innobase/include/fut0lst.ic
index d2e79cf7640..c0d61833b48 100644
--- a/innobase/include/fut0lst.ic
+++ b/innobase/include/fut0lst.ic
@@ -23,7 +23,7 @@ Created 11/28/1995 Heikki Tuuri
 #define	FLST_FIRST	4	/* 6-byte address of the first element
 				of the list; undefined if empty list */
 #define	FLST_LAST	(4 + FIL_ADDR_SIZE) /* 6-byte address of the
-				first element of the list; undefined
+				last element of the list; undefined
 				if empty list */
 
 /************************************************************************
diff --git a/innobase/include/ha0ha.h b/innobase/include/ha0ha.h
index 0beac928b7e..c3fc04b47bb 100644
--- a/innobase/include/ha0ha.h
+++ b/innobase/include/ha0ha.h
@@ -28,7 +28,7 @@ ha_search_and_get_data(
 /*************************************************************
 Looks for an element when we know the pointer to the data and updates
 the pointer to data if found. */
-UNIV_INLINE
+
 void
 ha_search_and_update_if_found(
 /*==========================*/
diff --git a/innobase/include/ha0ha.ic b/innobase/include/ha0ha.ic
index f6faf84b9f5..5369ca7f273 100644
--- a/innobase/include/ha0ha.ic
+++ b/innobase/include/ha0ha.ic
@@ -49,11 +49,8 @@ ha_node_t*
 ha_chain_get_next(
 /*==============*/
 				/* out: next node, NULL if none */
-	hash_table_t*	table __attribute__((unused)),	/* in: hash table */
 	ha_node_t*	node)	/* in: hash chain node */
 {
-	ut_ad(table);
-
 	return(node->next);
 }
 
@@ -96,7 +93,7 @@ ha_search(
 			return(node);
 		}
 
-		node = ha_chain_get_next(table, node);
+		node = ha_chain_get_next(node);
 	}
 
 	return(NULL);
@@ -128,7 +125,7 @@ ha_search_and_get_data(
 			return(node->data);
 		}
 
-		node = ha_chain_get_next(table, node);
+		node = ha_chain_get_next(node);
 	}
 
 	return(NULL);
@@ -143,18 +140,13 @@ ha_next(
 				/* out: pointer to the next hash table node
 				in chain with the fold value, NULL if not
 				found */
-	hash_table_t*	table,	/* in: hash table */
 	ha_node_t*	node)	/* in: hash table node */
 {
 	ulint	fold;
 
 	fold = node->fold;
 
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
-#endif /* UNIV_SYNC_DEBUG */
-
-	node = ha_chain_get_next(table, node);
+	node = ha_chain_get_next(node);
 
 	while (node) {
 		if (node->fold == fold) {
@@ -162,7 +154,7 @@ ha_next(
 			return(node);
 		}
 
-		node = ha_chain_get_next(table, node);
+		node = ha_chain_get_next(node);
 	}
 
 	return(NULL);
@@ -194,38 +186,13 @@ ha_search_with_data(
 			return(node);
 		}
 
-		node = ha_chain_get_next(table, node);
+		node = ha_chain_get_next(node);
 	}
 
 	return(NULL);
 }
 
 /*************************************************************
-Looks for an element when we know the pointer to the data, and updates
-the pointer to data, if found. */
-UNIV_INLINE
-void
-ha_search_and_update_if_found(
-/*==========================*/
-	hash_table_t*	table,	/* in: hash table */
-	ulint		fold,	/* in: folded value of the searched data */
-	void*		data,	/* in: pointer to the data */
-	void*		new_data)/* in: new pointer to the data */
-{
-	ha_node_t*	node;
-
-#ifdef UNIV_SYNC_DEBUG
-	ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold)));
-#endif /* UNIV_SYNC_DEBUG */
-
-	node = ha_search_with_data(table, fold, data);
-
-	if (node) {
-		node->data = new_data;
-	}
-}
-
-/*************************************************************
 Looks for an element when we know the pointer to the data, and deletes
 it from the hash table, if found. */
 UNIV_INLINE
diff --git a/innobase/include/hash0hash.h b/innobase/include/hash0hash.h
index d325636f511..79efe016324 100644
--- a/innobase/include/hash0hash.h
+++ b/innobase/include/hash0hash.h
@@ -109,7 +109,7 @@ do {\
 \
 		while (struct3333->NAME != DATA) {\
 \
-			ut_ad(struct3333);\
+			ut_a(struct3333);\
 			struct3333 = struct3333->NAME;\
 		}\
 \
@@ -290,6 +290,8 @@ struct hash_cell_struct{
 
 /* The hash table structure */
 struct hash_table_struct {
+	ibool		adaptive;/* TRUE if this is the hash table of the
+				adaptive hash index */
 	ulint		n_cells;/* number of cells in the hash table */
 	hash_cell_t*	array;	/* pointer to cell array */
 	ulint		n_mutexes;/* if mutexes != NULL, then the number of
diff --git a/innobase/include/ibuf0ibuf.h b/innobase/include/ibuf0ibuf.h
index a64eb53bd19..8ef67df26f8 100644
--- a/innobase/include/ibuf0ibuf.h
+++ b/innobase/include/ibuf0ibuf.h
@@ -40,6 +40,13 @@ void
 ibuf_init_at_db_start(void);
 /*=======================*/
 /*************************************************************************
+Reads the biggest tablespace id from the high end of the insert buffer
+tree and updates the counter in fil_system. */
+
+void
+ibuf_update_max_tablespace_id(void);
+/*===============================*/
+/*************************************************************************
 Initializes an ibuf bitmap page. */
 
 void
@@ -198,8 +205,8 @@ When an index page is read from a disk to the buffer pool, this function
 inserts to the page the possible index entries buffered in the insert buffer.
 The entries are deleted from the insert buffer. If the page is not read, but
 created in the buffer pool, this function deletes its buffered entries from
-the insert buffer; note that there can exist entries if the page belonged to
-an index which was dropped. */
+the insert buffer; there can exist entries for such a page if the page
+belonged to an index which subsequently was dropped. */
 
 void
 ibuf_merge_or_delete_for_page(
@@ -207,7 +214,21 @@ ibuf_merge_or_delete_for_page(
 	page_t*	page,	/* in: if page has been read from disk, pointer to
 			the page x-latched, else NULL */
 	ulint	space,	/* in: space id of the index page */
-	ulint	page_no);/* in: page number of the index page */
+	ulint	page_no,/* in: page number of the index page */
+	ibool	update_ibuf_bitmap);/* in: normally this is set to TRUE, but if
+			we have deleted or are deleting the tablespace, then we
+			naturally do not want to update a non-existent bitmap
+			page */
+/*************************************************************************
+Deletes all entries in the insert buffer for a given space id. This is used
+in DISCARD TABLESPACE and IMPORT TABLESPACE.
+NOTE: this does not update the page free bitmaps in the space. The space will
+become CORRUPT when you call this function! */
+
+void
+ibuf_delete_for_discarded_space(
+/*============================*/
+	ulint	space);	/* in: space id */
 /*************************************************************************
 Contracts insert buffer trees by reading pages to the buffer pool. */
 
@@ -257,6 +278,13 @@ ibuf_count_get(
 	ulint	space,	/* in: space id */
 	ulint	page_no);/* in: page number */
 /**********************************************************************
+Looks if the insert buffer is empty. */
+
+ibool
+ibuf_is_empty(void);
+/*===============*/
+			/* out: TRUE if empty */
+/**********************************************************************
 Prints info of ibuf. */
 
 void
diff --git a/innobase/include/ibuf0ibuf.ic b/innobase/include/ibuf0ibuf.ic
index 0886c8c02cc..68f7ce9c1d0 100644
--- a/innobase/include/ibuf0ibuf.ic
+++ b/innobase/include/ibuf0ibuf.ic
@@ -218,7 +218,7 @@ ibuf_update_free_bits_if_full(
 	}
 
 	if (after == 0) {
-		/* We move the page to front of the buffer pool LRU list:
+		/* We move the page to the front of the buffer pool LRU list:
 		the purpose of this is to prevent those pages to which we
 		cannot make inserts using the insert buffer from slipping
 		out of the buffer pool */
diff --git a/innobase/include/lock0lock.h b/innobase/include/lock0lock.h
index 0fd1696b882..103d28cd130 100644
--- a/innobase/include/lock0lock.h
+++ b/innobase/include/lock0lock.h
@@ -526,12 +526,12 @@ extern lock_sys_t*	lock_sys;
 #define	LOCK_X		5	/* exclusive */
 #define	LOCK_AUTO_INC	6	/* locks the auto-inc counter of a table
 				in an exclusive mode */
-#define LOCK_MODE_MASK	0xF	/* mask used to extract mode from the
+#define LOCK_MODE_MASK	0xFUL	/* mask used to extract mode from the
 				type_mode field in a lock */
 /* Lock types */
 #define LOCK_TABLE	16	/* these type values should be so high that */
 #define	LOCK_REC	32	/* they can be ORed to the lock mode */
-#define LOCK_TYPE_MASK	0xF0	/* mask used to extract lock type from the
+#define LOCK_TYPE_MASK	0xF0UL	/* mask used to extract lock type from the
 				type_mode field in a lock */
 /* Waiting lock flag */
 #define LOCK_WAIT	256	/* this wait bit should be so high that
diff --git a/innobase/include/log0log.h b/innobase/include/log0log.h
index 24ec28a56e6..dc44429d636 100644
--- a/innobase/include/log0log.h
+++ b/innobase/include/log0log.h
@@ -519,9 +519,9 @@ Peeks the current lsn. */
 ibool
 log_peek_lsn(
 /*=========*/
-			/* out: TRUE if success, FALSE if could not get the
-			log system mutex */
-	dulint*	lsn);	/* out: if returns TRUE, current lsn is here */
+                       /* out: TRUE if success, FALSE if could not get the
+                       log system mutex */
+       dulint* lsn);   /* out: if returns TRUE, current lsn is here */
 /**************************************************************************
 Refreshes the statistics used to print per-second averages. */
 
@@ -549,7 +549,7 @@ extern log_t*	log_sys;
 					highest bit is set to 1 if this is the
 					first log block in a log flush write
 					segment */
-#define LOG_BLOCK_FLUSH_BIT_MASK 0x80000000
+#define LOG_BLOCK_FLUSH_BIT_MASK 0x80000000UL
 					/* mask used to get the highest bit in
 					the preceding field */
 #define	LOG_BLOCK_HDR_DATA_LEN	4	/* number of bytes of log written to
@@ -600,12 +600,18 @@ extern log_t*	log_sys;
 #define LOG_CHECKPOINT_CHECKSUM_1 	LOG_CHECKPOINT_ARRAY_END
 #define LOG_CHECKPOINT_CHECKSUM_2 	(4 + LOG_CHECKPOINT_ARRAY_END)
 #define LOG_CHECKPOINT_FSP_FREE_LIMIT	(8 + LOG_CHECKPOINT_ARRAY_END)
-					/* current fsp free limit in the
-					tablespace, in units of one megabyte */
+					/* current fsp free limit in
+					tablespace 0, in units of one
+					megabyte; this information is only used
+					by ibbackup to decide if it can
+					truncate unused ends of
+					non-auto-extending data files in space
+					0 */
 #define LOG_CHECKPOINT_FSP_MAGIC_N	(12 + LOG_CHECKPOINT_ARRAY_END)
 					/* this magic number tells if the
 					checkpoint contains the above field:
-					the field was added to InnoDB-3.23.50 */
+					the field was added to
+					InnoDB-3.23.50 */
 #define LOG_CHECKPOINT_SIZE		(16 + LOG_CHECKPOINT_ARRAY_END)
 
 #define LOG_CHECKPOINT_FSP_MAGIC_N_VAL	1441231243
@@ -794,11 +800,11 @@ struct log_struct{
 					called */
 
 	/* Fields involved in checkpoints */
-	ulint		log_group_capacity; /* capacity of the log group; if
-					the checkpoint age exceeds this, it is
-					a serious error because it is possible
-					we will then overwrite log and spoil
-					crash recovery */
+        ulint           log_group_capacity; /* capacity of the log group; if
+                                        the checkpoint age exceeds this, it is
+                                        a serious error because it is possible
+                                        we will then overwrite log and spoil
+                                        crash recovery */
 	ulint		max_modified_age_async;
 					/* when this recommended value for lsn
 					- buf_pool_get_oldest_modification()
@@ -840,7 +846,8 @@ struct log_struct{
 	/* Fields involved in archiving */
 	ulint		archiving_state;/* LOG_ARCH_ON, LOG_ARCH_STOPPING
 					LOG_ARCH_STOPPED, LOG_ARCH_OFF */
-	dulint		archived_lsn;	/* archiving has advanced to this lsn */
+	dulint		archived_lsn;	/* archiving has advanced to this
+					lsn */
 	ulint		max_archived_lsn_age_async;
 					/* recommended maximum age of
 					archived_lsn, before we start
diff --git a/innobase/include/log0log.ic b/innobase/include/log0log.ic
index 587291883f7..16423286f6d 100644
--- a/innobase/include/log0log.ic
+++ b/innobase/include/log0log.ic
@@ -182,9 +182,9 @@ log_block_convert_lsn_to_no(
 
 	no = ut_dulint_get_low(lsn) / OS_FILE_LOG_BLOCK_SIZE;
 	no += (ut_dulint_get_high(lsn) % OS_FILE_LOG_BLOCK_SIZE)
-		* 2 * (0x80000000 / OS_FILE_LOG_BLOCK_SIZE);
+		* 2 * (0x80000000UL / OS_FILE_LOG_BLOCK_SIZE);
 	
-	no = no & 0x3FFFFFFF;
+	no = no & 0x3FFFFFFFUL;
 
 	return(no + 1);
 }
@@ -206,7 +206,7 @@ log_block_calc_checksum(
 	sh = 0;
 	
 	for (i = 0; i < OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE; i++) {
-		sum = sum & 0x7FFFFFFF;
+		sum = sum & 0x7FFFFFFFUL;
 		sum += (((ulint)(*(block + i))) << sh) + (ulint)(*(block + i));
 		sh++;
 		if (sh > 24) {
@@ -350,7 +350,7 @@ log_reserve_and_write_fast(
 
 #ifdef UNIV_LOG_DEBUG
 	log_check_log_recs(log->buf + log->old_buf_free,
-			log->buf_free - log->old_buf_free, log->old_lsn);	
+			log->buf_free - log->old_buf_free, log->old_lsn);
 #endif
 	return(lsn);
 }
diff --git a/innobase/include/log0recv.h b/innobase/include/log0recv.h
index e5a5bc05563..c972c3ce977 100644
--- a/innobase/include/log0recv.h
+++ b/innobase/include/log0recv.h
@@ -15,6 +15,8 @@ Created 9/20/1997 Heikki Tuuri
 #include "hash0hash.h"
 #include "log0log.h"
 
+extern ibool	recv_replay_file_ops;
+
 /***********************************************************************
 Reads the checkpoint info needed in hot backup. */
 
@@ -25,8 +27,8 @@ recv_read_cp_info_for_backup(
 	byte*	hdr,	/* in: buffer containing the log group header */
 	dulint*	lsn,	/* out: checkpoint lsn */
 	ulint*	offset,	/* out: checkpoint offset in the log group */
-	ulint*	fsp_limit,/* out: fsp limit, 1000000000 if the database
-			is running with < version 3.23.50 of InnoDB */
+	ulint*	fsp_limit,/* out: fsp limit of space 0, 1000000000 if the
+			database is running with < version 3.23.50 of InnoDB */
 	dulint*	cp_no,	/* out: checkpoint number */
 	dulint*	first_header_lsn);
 			/* out: lsn of of the start of the first log file */
@@ -175,17 +177,14 @@ recv_apply_hashed_log_recs(
 				disk and invalidated in buffer pool: this
 				alternative means that no new log records
 				can be generated during the application */
+#ifdef UNIV_HOTBACKUP
 /***********************************************************************
 Applies log records in the hash table to a backup. */
 
 void
-recv_apply_log_recs_for_backup(
-/*===========================*/
-	ulint	n_data_files,	/* in: number of data files */
-	char**	data_files,	/* in: array containing the paths to the
-				data files */
-	ulint*	file_sizes);	/* in: sizes of the data files in database
-				pages */
+recv_apply_log_recs_for_backup(void);
+/*================================*/
+#endif
 /************************************************************
 Recovers from archived log files, and also from log files, if they exist. */
 
@@ -334,7 +333,6 @@ extern ibool		recv_no_ibuf_operations;
 extern ibool		recv_needed_recovery;
 
 extern ibool            recv_lsn_checks_on;
-
 extern ibool		recv_is_making_a_backup;
 extern ulint		recv_max_parsed_page_no;
 
@@ -357,12 +355,7 @@ in the debug version: spaces with an odd number as the id are replicate
 spaces */
 #define RECV_REPLICA_SPACE_ADD	1
 
-/* This many blocks must be left free in the buffer pool when we scan
-the log and store the scanned log records in the buffer pool: we will
-use these free blocks to read in pages when we start applying the
-log records to the database. */
-
-#define RECV_POOL_N_FREE_BLOCKS	 (ut_min(256, buf_pool_get_curr_size() / 8))
+extern ulint	recv_n_pool_free_frames;
 
 #ifndef UNIV_NONINL
 #include "log0recv.ic"
diff --git a/innobase/include/mach0data.ic b/innobase/include/mach0data.ic
index 65e5df2178e..3ccdcf1dc0a 100644
--- a/innobase/include/mach0data.ic
+++ b/innobase/include/mach0data.ic
@@ -17,7 +17,7 @@ mach_write_to_1(
 	ulint   n)      /* in: ulint integer to be stored, >= 0, < 256 */ 
 {
 	ut_ad(b);
-	ut_ad(n <= 0xFF);
+	ut_ad(n <= 0xFFUL);
 
 	b[0] = (byte)n;
 }
@@ -46,7 +46,7 @@ mach_write_to_2(
 	ulint	n)      /* in: ulint integer to be stored */ 
 {
 	ut_ad(b);
-	ut_ad(n <= 0xFFFF);
+	ut_ad(n <= 0xFFFFUL);
 
 	b[0] = (byte)(n >> 8);
 	b[1] = (byte)(n);
@@ -79,7 +79,7 @@ mach_write_to_3(
 	ulint	n)      /* in: ulint integer to be stored */ 
 {
 	ut_ad(b);
-	ut_ad(n <= 0xFFFFFF);
+	ut_ad(n <= 0xFFFFFFUL);
 
 	b[0] = (byte)(n >> 16);
 	b[1] = (byte)(n >> 8);
@@ -184,20 +184,20 @@ mach_write_compressed(
 {
 	ut_ad(b);
 
-	if (n < 0x80) {
+	if (n < 0x80UL) {
 		mach_write_to_1(b, n);
 		return(1);
-	} else if (n < 0x4000) {
-		mach_write_to_2(b, n | 0x8000);
+	} else if (n < 0x4000UL) {
+		mach_write_to_2(b, n | 0x8000UL);
 		return(2);
-	} else if (n < 0x200000) {
-		mach_write_to_3(b, n | 0xC00000);
+	} else if (n < 0x200000UL) {
+		mach_write_to_3(b, n | 0xC00000UL);
 		return(3);
-	} else if (n < 0x10000000) {
-		mach_write_to_4(b, n | 0xE0000000);
+	} else if (n < 0x10000000UL) {
+		mach_write_to_4(b, n | 0xE0000000UL);
 		return(4);
 	} else {
-		mach_write_to_1(b, 0xF0);
+		mach_write_to_1(b, 0xF0UL);
 		mach_write_to_4(b + 1, n);
 		return(5);
 	}
@@ -212,13 +212,13 @@ mach_get_compressed_size(
 			/* out: compressed size in bytes */
 	ulint   n)      /* in: ulint integer (< 2^32) to be stored */ 
 {
-	if (n < 0x80) {
+	if (n < 0x80UL) {
 		return(1);
-	} else if (n < 0x4000) {
+	} else if (n < 0x4000UL) {
 		return(2);
-	} else if (n < 0x200000) {
+	} else if (n < 0x200000UL) {
 		return(3);
-	} else if (n < 0x10000000) {
+	} else if (n < 0x10000000UL) {
 		return(4);
 	} else {
 		return(5);
@@ -240,16 +240,16 @@ mach_read_compressed(
 
 	flag = mach_read_from_1(b);
 
-	if (flag < 0x80) {
+	if (flag < 0x80UL) {
 		return(flag);
-	} else if (flag < 0xC0) {
-		return(mach_read_from_2(b) & 0x7FFF);
-	} else if (flag < 0xE0) {
-		return(mach_read_from_3(b) & 0x3FFFFF);
-	} else if (flag < 0xF0) {
-		return(mach_read_from_4(b) & 0x1FFFFFFF);
+	} else if (flag < 0xC0UL) {
+		return(mach_read_from_2(b) & 0x7FFFUL);
+	} else if (flag < 0xE0UL) {
+		return(mach_read_from_3(b) & 0x3FFFFFUL);
+	} else if (flag < 0xF0UL) {
+		return(mach_read_from_4(b) & 0x1FFFFFFFUL);
 	} else {
-		ut_ad(flag == 0xF0);
+		ut_ad(flag == 0xF0UL);
 		return(mach_read_from_4(b + 1));
 	}
 }
@@ -439,7 +439,7 @@ mach_dulint_write_much_compressed(
 		return(mach_write_compressed(b, ut_dulint_get_low(n)));
 	}
 	
-	*b = 0xFF;
+	*b = (byte)0xFF;
 	size = 1 + mach_write_compressed(b + 1, ut_dulint_get_high(n));
 
 	size += mach_write_compressed(b + size, ut_dulint_get_low(n));
@@ -479,7 +479,7 @@ mach_dulint_read_much_compressed(
 
 	ut_ad(b);
 
-	if (*b != 0xFF) {
+	if (*b != (byte)0xFF) {
 		high = 0;
 		size = 0;
 	} else {
@@ -679,11 +679,10 @@ mach_write_to_2_little_endian(
 {
 	ut_ad(n < 256 * 256);
 
-	*dest = (byte)(n & 0xFF);
+	*dest = (byte)(n & 0xFFUL);
 
 	n = n >> 8;
 	dest++;
 
-	*dest = (byte)(n & 0xFF);
+	*dest = (byte)(n & 0xFFUL);
 }
-
diff --git a/innobase/include/mem0pool.h b/innobase/include/mem0pool.h
index 43707bd5f61..51c53afe788 100644
--- a/innobase/include/mem0pool.h
+++ b/innobase/include/mem0pool.h
@@ -19,6 +19,8 @@ typedef struct mem_pool_struct	mem_pool_t;
 /* The common memory pool */
 extern mem_pool_t*	mem_comm_pool;
 
+extern ulint		mem_out_of_mem_err_msg_count;
+
 /* Memory area header */
 
 struct mem_area_struct{
diff --git a/innobase/include/mtr0log.h b/innobase/include/mtr0log.h
index 367c9a00651..f50c1dfcb6a 100644
--- a/innobase/include/mtr0log.h
+++ b/innobase/include/mtr0log.h
@@ -57,6 +57,19 @@ mlog_write_initial_log_record(
 	byte	type,	/* in: log item type: MLOG_1BYTE, ... */
 	mtr_t*	mtr);	/* in: mini-transaction handle */
 /************************************************************
+Writes a log record about an .ibd file create/delete/rename. */
+UNIV_INLINE
+byte*
+mlog_write_initial_log_record_for_file_op(
+/*======================================*/
+			/* out: new value of log_ptr */
+	ulint	type,	/* in: MLOG_FILE_CREATE, MLOG_FILE_DELETE, or
+			MLOG_FILE_RENAME */
+	ulint	space_id,/* in: space id, if applicable */
+	ulint	page_no,/* in: page number (not relevant currently) */
+	byte*	log_ptr,/* in: pointer to mtr log which has been opened */
+	mtr_t*	mtr);	/* in: mtr */
+/************************************************************
 Catenates 1 - 4 bytes to the mtr log. */
 UNIV_INLINE
 void
diff --git a/innobase/include/mtr0log.ic b/innobase/include/mtr0log.ic
index 54f15779078..60a5b390be9 100644
--- a/innobase/include/mtr0log.ic
+++ b/innobase/include/mtr0log.ic
@@ -163,13 +163,6 @@ mlog_write_initial_log_record_fast(
 	space = buf_block_get_space(block);
 	offset = buf_block_get_page_no(block);
 
-	if (space != 0 || offset > 0x8FFFFFFF) {
-	        fprintf(stderr,
-	"InnoDB: error: buffer page pointer %lx has nonsensical space id %lu\n"
-	"InnoDB: or page no %lu\n", (ulint)ptr, space, offset);
-	        ut_error;
-	}
-
 	mach_write_to_1(log_ptr, type);
 	log_ptr++;	
 	log_ptr += mach_write_compressed(log_ptr, space);
@@ -192,3 +185,31 @@ mlog_write_initial_log_record_fast(
 #endif
 	return(log_ptr);
 }	
+
+/************************************************************
+Writes a log record about an .ibd file create/delete/rename. */
+UNIV_INLINE
+byte*
+mlog_write_initial_log_record_for_file_op(
+/*======================================*/
+			/* out: new value of log_ptr */
+	ulint	type,	/* in: MLOG_FILE_CREATE, MLOG_FILE_DELETE, or
+			MLOG_FILE_RENAME */
+	ulint	space_id,/* in: space id, if applicable */
+	ulint	page_no,/* in: page number (not relevant currently) */
+	byte*	log_ptr,/* in: pointer to mtr log which has been opened */
+	mtr_t*	mtr)	/* in: mtr */
+{
+	ut_ad(log_ptr);
+
+	mach_write_to_1(log_ptr, type);
+	log_ptr++;
+
+	/* We write dummy space id and page number */
+	log_ptr += mach_write_compressed(log_ptr, space_id);
+	log_ptr += mach_write_compressed(log_ptr, page_no);
+
+	mtr->n_log_recs++;
+
+	return(log_ptr);
+}	
diff --git a/innobase/include/mtr0mtr.h b/innobase/include/mtr0mtr.h
index d999b7cc5b7..9cf592f71e1 100644
--- a/innobase/include/mtr0mtr.h
+++ b/innobase/include/mtr0mtr.h
@@ -96,7 +96,13 @@ flag value must give the length also! */
 						sequence of these records */
 #define MLOG_DUMMY_RECORD	((byte)32)	/* dummy log record used to
 						pad a log block full */
-#define MLOG_BIGGEST_TYPE	((byte)32) 	/* biggest value (used in
+#define MLOG_FILE_CREATE	((byte)33)	/* log record about an .ibd
+						file creation */
+#define MLOG_FILE_RENAME	((byte)34)	/* log record about an .ibd
+						file rename */
+#define MLOG_FILE_DELETE	((byte)35)	/* log record about an .ibd
+						file deletion */
+#define MLOG_BIGGEST_TYPE	((byte)35) 	/* biggest value (used in
 						asserts) */
 					
 /*******************************************************************
diff --git a/innobase/include/os0file.h b/innobase/include/os0file.h
index b221bf7aef9..cf2dbd68fb1 100644
--- a/innobase/include/os0file.h
+++ b/innobase/include/os0file.h
@@ -11,9 +11,11 @@ Created 10/21/1995 Heikki Tuuri
 
 #include "univ.i"
 
+#ifndef __WIN__
+#include <dirent.h>
+#include <sys/stat.h>
+#endif
 
-/* If the following is set to TRUE, we do not call os_file_flush in every
-os_file_write */
 extern ibool	os_do_not_call_flush_at_each_write;
 extern ibool	os_has_said_disk_full;
 extern ibool	os_aio_print_debug;
@@ -60,9 +62,11 @@ log. */
 #define	OS_FILE_OPEN			51
 #define	OS_FILE_CREATE			52
 #define OS_FILE_OVERWRITE		53
+#define OS_FILE_OPEN_RAW		54
 
 #define OS_FILE_READ_ONLY 		333
 #define	OS_FILE_READ_WRITE		444
+#define	OS_FILE_READ_ALLOW_DELETE	555	/* for ibbackup */
 
 /* Options for file_create */
 #define	OS_FILE_AIO			61
@@ -120,6 +124,36 @@ extern ulint	os_n_file_reads;
 extern ulint	os_n_file_writes;
 extern ulint	os_n_fsyncs;
 
+/* File types for directory entry data type */
+
+enum os_file_type_enum{
+    OS_FILE_TYPE_UNKNOWN = 0,
+    OS_FILE_TYPE_FILE,	 		/* regular file */
+    OS_FILE_TYPE_DIR,			/* directory */
+    OS_FILE_TYPE_LINK 			/* symbolic link */
+};
+typedef enum os_file_type_enum	  os_file_type_t;
+
+/* Maximum path string length in bytes when referring to tables with in the
+'./databasename/tablename.ibd' path format; we can allocate at least 2 buffers
+of this size from the thread stack; that is why this should not be made much
+bigger than 4000 bytes */
+#define OS_FILE_MAX_PATH	4000
+
+/* Struct used in fetching information of a file in a directory */
+typedef struct os_file_stat_struct	os_file_stat_t;
+struct os_file_stat_struct{
+       char		name[OS_FILE_MAX_PATH];	/* path to a file */
+       os_file_type_t	type;			/* file type */       
+       ib_longlong	size;			/* file size */
+};
+
+#ifdef __WIN__
+typedef HANDLE  os_file_dir_t;	/* directory stream */
+#else
+typedef DIR*	os_file_dir_t;	/* directory stream */
+#endif
+
 /***************************************************************************
 Gets the operating system version. Currently works only on Windows. */
 
@@ -133,6 +167,57 @@ Creates the seek mutexes used in positioned reads and writes. */
 void
 os_io_init_simple(void);
 /*===================*/
+/***************************************************************************
+The os_file_opendir() function opens a directory stream corresponding to the
+directory named by the dirname argument. The directory stream is positioned
+at the first entry. In both Unix and Windows we automatically skip the '.'
+and '..' items at the start of the directory listing. */
+
+os_file_dir_t
+os_file_opendir(
+/*============*/
+				/* out: directory stream, NULL if error */
+	char*	dirname,	/* in: directory name; it must not contain
+				a trailing '\' or '/' */
+	ibool	error_is_fatal);/* in: TRUE if we should treat an error as a
+				fatal error; if we try to open symlinks then
+				we do not wish a fatal error if it happens
+				not to be a directory */
+/***************************************************************************
+Closes a directory stream. */
+
+int
+os_file_closedir(
+/*=============*/
+				/* out: 0 if success, -1 if failure */
+	os_file_dir_t	dir);	/* in: directory stream */
+/***************************************************************************
+This function returns information of the next file in the directory. We jump
+over the '.' and '..' entries in the directory. */
+
+int
+os_file_readdir_next_file(
+/*======================*/
+				/* out: 0 if ok, -1 if error, 1 if at the end
+				of the directory */
+	char*		dirname,/* in: directory name or path */
+	os_file_dir_t	dir,	/* in: directory stream */
+	os_file_stat_t*	info);	/* in/out: buffer where the info is returned */
+/*********************************************************************
+This function attempts to create a directory named pathname. The new directory
+gets default permissions. On Unix, the permissions are (0770 & ~umask). If the
+directory exists already, nothing is done and the call succeeds, unless the
+fail_if_exists arguments is true. */
+
+ibool
+os_file_create_directory(
+/*=====================*/
+				/* out: TRUE if call succeeds, FALSE on
+				error */
+	char*	pathname,	/* in: directory name as null-terminated
+				string */
+	ibool	fail_if_exists);/* in: if TRUE, pre-existing directory is
+				treated as an error. */
 /********************************************************************
 A simple function to open or create a file. */
 
@@ -140,7 +225,8 @@ os_file_t
 os_file_create_simple(
 /*==================*/
 			/* out, own: handle to the file, not defined if error,
-			error number can be retrieved with os_get_last_error */
+			error number can be retrieved with
+			os_file_get_last_error */
 	char*	name,	/* in: name of the file or path as a null-terminated
 			string */
 	ulint	create_mode,/* in: OS_FILE_OPEN if an existing file is opened
@@ -155,13 +241,16 @@ os_file_t
 os_file_create_simple_no_error_handling(
 /*====================================*/
 			/* out, own: handle to the file, not defined if error,
-			error number can be retrieved with os_get_last_error */
+			error number can be retrieved with
+			os_file_get_last_error */
 	char*	name,	/* in: name of the file or path as a null-terminated
 			string */
 	ulint	create_mode,/* in: OS_FILE_OPEN if an existing file is opened
 			(if does not exist, error), or OS_FILE_CREATE if a new
 			file is created (if exists, error) */
-	ulint	access_type,/* in: OS_FILE_READ_ONLY or OS_FILE_READ_WRITE */
+	ulint	access_type,/* in: OS_FILE_READ_ONLY, OS_FILE_READ_WRITE, or
+			OS_FILE_READ_ALLOW_DELETE; the last option is used by
+			a backup program reading the file */
 	ibool*	success);/* out: TRUE if succeed, FALSE if error */
 /********************************************************************
 Opens an existing file or creates a new. */
@@ -170,13 +259,16 @@ os_file_t
 os_file_create(
 /*===========*/
 			/* out, own: handle to the file, not defined if error,
-			error number can be retrieved with os_get_last_error */
+			error number can be retrieved with
+			os_file_get_last_error */
 	char*	name,	/* in: name of the file or path as a null-terminated
 			string */
 	ulint	create_mode,/* in: OS_FILE_OPEN if an existing file is opened
 			(if does not exist, error), or OS_FILE_CREATE if a new
 			file is created (if exists, error), OS_FILE_OVERWRITE
-			if a new file is created or an old overwritten */
+			if a new file is created or an old overwritten;
+			OS_FILE_OPEN_RAW, if a raw device or disk partition
+			should be opened */
 	ulint	purpose,/* in: OS_FILE_AIO, if asynchronous, non-buffered i/o
 			is desired, OS_FILE_NORMAL, if any normal file;
 			NOTE that it also depends on type, os_aio_.. and srv_..
@@ -186,6 +278,34 @@ os_file_create(
 	ulint	type,	/* in: OS_DATA_FILE or OS_LOG_FILE */
 	ibool*	success);/* out: TRUE if succeed, FALSE if error */
 /***************************************************************************
+Deletes a file. The file has to be closed before calling this. */
+
+ibool
+os_file_delete(
+/*===========*/
+			/* out: TRUE if success */
+	char*	name);	/* in: file path as a null-terminated string */
+
+/***************************************************************************
+Deletes a file if it exists. The file has to be closed before calling this. */
+
+ibool
+os_file_delete_if_exists(
+/*=====================*/
+			/* out: TRUE if success */
+	char*	name);	/* in: file path as a null-terminated string */
+/***************************************************************************
+Renames a file (can also move it to another directory). It is safest that the
+file is closed before calling this function. */
+
+ibool
+os_file_rename(
+/*===========*/
+				/* out: TRUE if success */
+	char*	oldpath,	/* in: old file path as a null-terminated
+				string */
+	char*	newpath);	/* in: new file path */
+/***************************************************************************
 Closes a file handle. In case of error, error number can be retrieved with
 os_file_get_last_error. */
 
@@ -214,6 +334,14 @@ os_file_get_size(
 				size */
 	ulint*		size_high);/* out: most significant 32 bits of size */
 /***************************************************************************
+Gets file size as a 64-bit integer ib_longlong. */
+
+ib_longlong
+os_file_get_size_as_iblonglong(
+/*===========================*/
+				/* out: size in bytes, -1 if error */
+	os_file_t	file);	/* in: handle to a file */
+/***************************************************************************
 Sets a file size. This function can be used to extend or truncate a file. */
 
 ibool
@@ -241,9 +369,12 @@ overwrite the error number). If the number is not known to this program,
 the OS error number + 100 is returned. */
 
 ulint
-os_file_get_last_error(void);
-/*========================*/
-		/* out: error number, or OS error number + 100 */
+os_file_get_last_error(
+/*===================*/
+					/* out: error number, or OS error
+					number + 100 */
+	ibool	report_all_errors);	/* in: TRUE if we want an error message
+					printed of all errors */
 /***********************************************************************
 Requests a synchronous read operation. */
 
@@ -260,6 +391,23 @@ os_file_read(
 				offset */
 	ulint		n);	/* in: number of bytes to read */	
 /***********************************************************************
+Requests a synchronous positioned read operation. This function does not do
+any error handling. In case of error it returns FALSE. */
+
+ibool
+os_file_read_no_error_handling(
+/*===========================*/
+				/* out: TRUE if request was
+				successful, FALSE if fail */
+	os_file_t	file,	/* in: handle to a file */
+	void*		buf,	/* in: buffer where to read */
+	ulint		offset,	/* in: least significant 32 bits of file
+				offset where to read */
+	ulint		offset_high,/* in: most significant 32 bits of
+				offset */
+	ulint		n);	/* in: number of bytes to read */	
+
+/***********************************************************************
 Requests a synchronous write operation. */
 
 ibool
diff --git a/innobase/include/os0proc.h b/innobase/include/os0proc.h
index 7618032a11f..d0d3cf82e38 100644
--- a/innobase/include/os0proc.h
+++ b/innobase/include/os0proc.h
@@ -15,6 +15,76 @@ Created 9/30/1995 Heikki Tuuri
 typedef void*			os_process_t;
 typedef unsigned long int	os_process_id_t;
 
+/* The cell type in os_awe_allocate_mem page info */
+#if defined(__WIN2000__) && defined(ULONG_PTR)
+typedef ULONG_PTR	os_awe_t;
+#else
+typedef ulint		os_awe_t;
+#endif
+
+/* Physical page size when Windows AWE is used. This is the normal
+page size of an Intel x86 processor. We cannot use AWE with 2 MB or 4 MB
+pages. */
+#define	OS_AWE_X86_PAGE_SIZE	4096
+
+/********************************************************************
+Windows AWE support. Tries to enable the "lock pages in memory" privilege for
+the current process so that the current process can allocate memory-locked
+virtual address space to act as the window where AWE maps physical memory. */
+
+ibool
+os_awe_enable_lock_pages_in_mem(void);
+/*=================================*/
+				/* out: TRUE if success, FALSE if error;
+				prints error info to stderr if no success */
+/********************************************************************
+Allocates physical RAM memory up to 64 GB in an Intel 32-bit x86
+processor. */
+
+ibool
+os_awe_allocate_physical_mem(
+/*=========================*/
+				/* out: TRUE if success */
+	os_awe_t** page_info,	/* out, own: array of opaque data containing
+				the info for allocated physical memory pages;
+				each allocated 4 kB physical memory page has
+				one slot of type os_awe_t in the array */
+	ulint	  n_megabytes);	/* in: number of megabytes to allocate */
+/********************************************************************
+Allocates a window in the virtual address space where we can map then
+pages of physical memory. */
+
+byte*
+os_awe_allocate_virtual_mem_window(
+/*===============================*/
+			/* out, own: allocated memory, or NULL if did not
+			succeed */
+	ulint	size);	/* in: virtual memory allocation size in bytes, must
+			be < 2 GB */
+/********************************************************************
+With this function you can map parts of physical memory allocated with
+the ..._allocate_physical_mem to the virtual address space allocated with
+the previous function. Intel implements this so that the process page
+tables are updated accordingly. A test on a 1.5 GHz AMD processor and XP
+showed that this takes < 1 microsecond, much better than the estimated 80 us
+for copying a 16 kB page memory to memory. But, the operation will at least
+partially invalidate the translation lookaside buffer (TLB) of all
+processors. Under a real-world load the performance hit may be bigger. */
+
+ibool
+os_awe_map_physical_mem_to_window(
+/*==============================*/
+					/* out: TRUE if success; the function
+					calls exit(1) in case of an error */
+	byte*		ptr,		/* in: a page-aligned pointer to
+					somewhere in the virtual address
+					space window; we map the physical mem
+					pages here */
+	ulint		n_mem_pages,	/* in: number of 4 kB mem pages to
+					map */
+	os_awe_t*	page_info);	/* in: array of page infos for those
+					pages; each page has one slot in the
+					array */
 /********************************************************************
 Converts the current process id to a number. It is not guaranteed that the
 number is unique. In Linux returns the 'process number' of the current
diff --git a/innobase/include/page0page.h b/innobase/include/page0page.h
index 04f771c3abd..969313614e3 100644
--- a/innobase/include/page0page.h
+++ b/innobase/include/page0page.h
@@ -596,7 +596,8 @@ byte*
 page_parse_delete_rec_list(
 /*=======================*/
 			/* out: end of log record or NULL */
-	byte	type,	/* in: MLOG_LIST_END_DELETE or MLOG_LIST_START_DELETE */
+	byte	type,	/* in: MLOG_LIST_END_DELETE or
+			MLOG_LIST_START_DELETE */
 	byte*	ptr,	/* in: buffer */
 	byte*	end_ptr,/* in: buffer end */
 	page_t*	page,	/* in: page or NULL */	
diff --git a/innobase/include/que0types.h b/innobase/include/que0types.h
index c7ce09db40b..e59c2313a5a 100644
--- a/innobase/include/que0types.h
+++ b/innobase/include/que0types.h
@@ -36,7 +36,8 @@ struct que_common_struct{
 				if the buffer has been allocated dynamically:
 				if this field is != 0, and the node is a
 				symbol node or a function node, then we
-				have to free the data field in val explicitly */
+				have to free the data field in val
+				explicitly */
 };
 
 #endif
diff --git a/innobase/include/rem0rec.h b/innobase/include/rem0rec.h
index b28f39925c1..ebdd3c1ac81 100644
--- a/innobase/include/rem0rec.h
+++ b/innobase/include/rem0rec.h
@@ -21,7 +21,7 @@ Created 5/30/1994 Heikki Tuuri
 
 /* Flag denoting the predefined minimum record: this bit is ORed in the 4
 info bits of a record */
-#define REC_INFO_MIN_REC_FLAG	0x10
+#define REC_INFO_MIN_REC_FLAG	0x10UL
 
 /* Number of extra bytes in a record, in addition to the data and the
 offsets */
@@ -406,8 +406,8 @@ rec_sprintf(
 
 /* Maximum lengths for the data in a physical record if the offsets
 are given in one byte (resp. two byte) format. */
-#define REC_1BYTE_OFFS_LIMIT	0x7F
-#define REC_2BYTE_OFFS_LIMIT	0x7FFF
+#define REC_1BYTE_OFFS_LIMIT	0x7FUL
+#define REC_2BYTE_OFFS_LIMIT	0x7FFFUL
 
 /* The data size of record must be smaller than this because we reserve
 two upmost bits in a two byte offset for special purposes */
diff --git a/innobase/include/rem0rec.ic b/innobase/include/rem0rec.ic
index 9dfd4faeec8..f4acd8547db 100644
--- a/innobase/include/rem0rec.ic
+++ b/innobase/include/rem0rec.ic
@@ -29,41 +29,41 @@ significant bytes and bits are written below less significant.
 and the shift needed to obtain each bit-field of the record. */
 
 #define REC_NEXT		2
-#define REC_NEXT_MASK		0xFFFF
+#define REC_NEXT_MASK		0xFFFFUL
 #define REC_NEXT_SHIFT		0
 
 #define REC_SHORT		3	/* This is single byte bit-field */
-#define	REC_SHORT_MASK		0x1
+#define	REC_SHORT_MASK		0x1UL
 #define REC_SHORT_SHIFT		0
 
 #define	REC_N_FIELDS		4
-#define REC_N_FIELDS_MASK	0x7FE
+#define REC_N_FIELDS_MASK	0x7FEUL
 #define	REC_N_FIELDS_SHIFT	1
 
 #define	REC_HEAP_NO		5
-#define REC_HEAP_NO_MASK	0xFFF8
+#define REC_HEAP_NO_MASK	0xFFF8UL
 #define	REC_HEAP_NO_SHIFT	3
 
 #define REC_N_OWNED		6	/* This is single byte bit-field */
-#define	REC_N_OWNED_MASK	0xF
+#define	REC_N_OWNED_MASK	0xFUL
 #define REC_N_OWNED_SHIFT	0
 
-#define	REC_INFO_BITS_MASK	0xF0
+#define	REC_INFO_BITS_MASK	0xF0UL
 #define REC_INFO_BITS_SHIFT	0
 
 /* The deleted flag in info bits */
-#define REC_INFO_DELETED_FLAG 	0x20	/* when bit is set to 1, it means the
+#define REC_INFO_DELETED_FLAG 	0x20UL	/* when bit is set to 1, it means the
 					record has been delete marked */
 /* The following masks are used to filter the SQL null bit from
 one-byte and two-byte offsets */
 
-#define REC_1BYTE_SQL_NULL_MASK	0x80
-#define REC_2BYTE_SQL_NULL_MASK	0x8000
+#define REC_1BYTE_SQL_NULL_MASK	0x80UL
+#define REC_2BYTE_SQL_NULL_MASK	0x8000UL
 
 /* In a 2-byte offset the second most significant bit denotes
 a field stored to another page: */
 
-#define REC_2BYTE_EXTERN_MASK	0x4000
+#define REC_2BYTE_EXTERN_MASK	0x4000UL
 
 /****************************************************************
 Return field length or UNIV_SQL_NULL. */
@@ -133,7 +133,7 @@ rec_set_bit_field_1(
 	ut_ad(rec);
 	ut_ad(offs <= REC_N_EXTRA_BYTES);
 	ut_ad(mask);
-	ut_ad(mask <= 0xFF);
+	ut_ad(mask <= 0xFFUL);
 	ut_ad(((mask >> shift) << shift) == mask);
 	ut_ad(((val << shift) & mask) == (val << shift));
 	
@@ -172,8 +172,8 @@ rec_set_bit_field_2(
 {
 	ut_ad(rec);
 	ut_ad(offs <= REC_N_EXTRA_BYTES);
-	ut_ad(mask > 0xFF);
-	ut_ad(mask <= 0xFFFF);
+	ut_ad(mask > 0xFFUL);
+	ut_ad(mask <= 0xFFFFUL);
 	ut_ad((mask >> shift) & 1);
 	ut_ad(0 == ((mask >> shift) & ((mask >> shift) + 1)));
 	ut_ad(((mask >> shift) << shift) == mask);
@@ -188,8 +188,8 @@ rec_set_bit_field_2(
 			   + (REC_N_FIELDS_MASK << (8 * (REC_N_FIELDS - 4)))
 			   + (REC_HEAP_NO_MASK << (8 * (REC_HEAP_NO - 4)))
 			   + (REC_N_OWNED_MASK << (8 * (REC_N_OWNED - 3)))
-			   + (REC_INFO_BITS_MASK << (8 * (REC_INFO_BITS - 3))));
-	if (m != ut_dbg_zero + 0xFFFFFFFF) {
+			  + (REC_INFO_BITS_MASK << (8 * (REC_INFO_BITS - 3))));
+	if (m != ut_dbg_zero + 0xFFFFFFFFUL) {
 		printf("Sum of masks %lx\n", m);
 		ut_error;
 	}
diff --git a/innobase/include/row0mysql.h b/innobase/include/row0mysql.h
index 940b4c61b2f..fade3709631 100644
--- a/innobase/include/row0mysql.h
+++ b/innobase/include/row0mysql.h
@@ -52,6 +52,14 @@ row_mysql_read_var_ref_noninline(
 	ulint*	len,	/* out: variable-length field length */
 	byte*	field);	/* in: field */
 /***********************************************************************
+Frees the blob heap in prebuilt when no longer needed. */
+
+void
+row_mysql_prebuilt_free_blob_heap(
+/*==============================*/
+	row_prebuilt_t*	prebuilt);	/* in: prebuilt struct of a
+					ha_innobase:: table handle */
+/***********************************************************************
 Stores a reference to a BLOB in the MySQL format. */
 
 void
@@ -331,6 +339,45 @@ row_drop_table_for_mysql(
 	char*	name,	/* in: table name */
 	trx_t*	trx);	/* in: transaction handle */
 /*************************************************************************
+Discards the tablespace of a table which stored in an .ibd file. Discarding
+means that this function deletes the .ibd file and assigns a new table id for
+the table. Also the flag table->ibd_file_missing is set TRUE.
+
+How do we prevent crashes caused by ongoing operations on the table? Old
+operations could try to access non-existent pages.
+
+1) SQL queries, INSERT, SELECT, ...: we must get an exclusive MySQL table lock
+on the table before we can do DISCARD TABLESPACE. Then there are no running
+queries on the table.
+2) Purge and rollback: we assign a new table id for the table. Since purge and
+rollback look for the table based on the table id, they see the table as
+'dropped' and discard their operations.
+3) Insert buffer: we remove all entries for the tablespace in the insert
+buffer tree; as long as the tablespace mem object does not exist, ongoing
+insert buffer page merges are discarded in buf0rea.c. If we recreate the
+tablespace mem object with IMPORT TABLESPACE later, then the tablespace will
+have the same id, but the tablespace_version field in the mem object is
+different, and ongoing old insert buffer page merges get discarded.
+4) Linear readahead and random readahead: we use the same method as in 3) to
+discard ongoing operations. */
+
+int
+row_discard_tablespace_for_mysql(
+/*=============================*/
+			/* out: error code or DB_SUCCESS */
+	char*	name,	/* in: table name */
+	trx_t*	trx);	/* in: transaction handle */
+/*********************************************************************
+Imports a tablespace. The space id in the .ibd file must match the space id
+of the table in the data dictionary. */
+
+int
+row_import_tablespace_for_mysql(
+/*============================*/
+			/* out: error code or DB_SUCCESS */
+	char*	name,	/* in: table name */
+	trx_t*	trx);	/* in: transaction handle */
+/*************************************************************************
 Drops a database for MySQL. */
 
 int
diff --git a/innobase/include/row0sel.h b/innobase/include/row0sel.h
index 5ef7ff9399a..a35d588ad08 100644
--- a/innobase/include/row0sel.h
+++ b/innobase/include/row0sel.h
@@ -118,7 +118,8 @@ row_search_for_mysql(
 /*=================*/
 					/* out: DB_SUCCESS,
 					DB_RECORD_NOT_FOUND, 
-					DB_END_OF_INDEX, or DB_DEADLOCK */
+					DB_END_OF_INDEX, DB_DEADLOCK,
+					or DB_TOO_BIG_RECORD */
 	byte*		buf,		/* in/out: buffer for the fetched
 					row in the MySQL format */
 	ulint		mode,		/* in: search mode PAGE_CUR_L, ... */
diff --git a/innobase/include/row0sel.ic b/innobase/include/row0sel.ic
index 9005624b6ca..994638790c0 100644
--- a/innobase/include/row0sel.ic
+++ b/innobase/include/row0sel.ic
@@ -77,7 +77,7 @@ open_step(
 			
 	if (err != DB_SUCCESS) {
 		/* SQL error detected */
-		printf("SQL error %lu\n", err);
+		printf("SQL error %lu\n", (unsigned long) err);
 
 		ut_error;
 		que_thr_handle_error(thr, err, NULL, 0);
diff --git a/innobase/include/row0upd.ic b/innobase/include/row0upd.ic
index 3e00978be2f..6b9deeac5e3 100644
--- a/innobase/include/row0upd.ic
+++ b/innobase/include/row0upd.ic
@@ -86,8 +86,8 @@ upd_field_set_field_no(
 		fprintf(stderr,
 		"InnoDB: Error: trying to access field %lu in table %s\n"
 		"InnoDB: index %s, but index has only %lu fields\n",
-		field_no, index->table_name, index->name,
-		dict_index_get_n_fields(index));
+		(unsigned long) field_no, index->table_name, index->name,
+		(unsigned long) dict_index_get_n_fields(index));
 	}
 
 	dtype_copy(dfield_get_type(&(upd_field->new_val)),
diff --git a/innobase/include/srv0srv.h b/innobase/include/srv0srv.h
index 769d55fb66c..8aac71de2a9 100644
--- a/innobase/include/srv0srv.h
+++ b/innobase/include/srv0srv.h
@@ -37,6 +37,8 @@ extern ibool	srv_lower_case_table_names;
 extern char*	srv_data_home;
 extern char*	srv_arch_dir;
 
+extern ibool	srv_file_per_table;
+
 extern ulint	srv_n_data_files;
 extern char**	srv_data_file_names;
 extern ulint*	srv_data_file_sizes;
@@ -62,6 +64,7 @@ extern ulint	srv_flush_log_at_trx_commit;
 extern byte	srv_latin1_ordering[256];/* The sort order table of the latin1
 					character set */
 extern ulint	srv_pool_size;
+extern ulint	srv_awe_window_size;
 extern ulint	srv_mem_pool_size;
 extern ulint	srv_lock_table_size;
 
@@ -76,12 +79,14 @@ extern char*    srv_file_flush_method_str;
 extern ulint    srv_unix_file_flush_method;
 extern ulint   	srv_win_file_flush_method;
 
+extern ulint	srv_max_n_open_files;
+
 extern ulint	srv_max_dirty_pages_pct;
 
 extern ulint	srv_force_recovery;
 extern ulint	srv_thread_concurrency;
 
-extern ulint    srv_max_n_threads;
+extern ulint	srv_max_n_threads;
 
 extern lint	srv_conc_n_threads;
 
@@ -92,6 +97,8 @@ extern ibool	srv_use_doublewrite_buf;
 extern ibool    srv_set_thread_priorities;
 extern int      srv_query_thread_priority;
 
+extern ibool	srv_use_awe;
+extern ibool	srv_use_adaptive_hash_indexes;
 /*-------------------------------------------*/
 
 extern ulint	srv_n_rows_inserted;
diff --git a/innobase/include/srv0start.h b/innobase/include/srv0start.h
index c4c8dac5d7a..0074de537c3 100644
--- a/innobase/include/srv0start.h
+++ b/innobase/include/srv0start.h
@@ -11,6 +11,7 @@ Created 10/10/1995 Heikki Tuuri
 #define srv0start_h
 
 #include "univ.i"
+#include "ut0byte.h"
 
 /*************************************************************************
 Normalizes a directory path for Windows: converts slashes to backslashes. */
@@ -69,12 +70,17 @@ innobase_shutdown_for_mysql(void);
 /*=============================*/
 				/* out: DB_SUCCESS or error code */
 
+extern	dulint	srv_shutdown_lsn;
+extern	dulint	srv_start_lsn;
+
 extern  ulint   srv_sizeof_trx_t_in_ha_innodb_cc;
 
 extern  ibool   srv_is_being_started;
 extern	ibool	srv_startup_is_before_trx_rollback_phase;
 extern	ibool	srv_is_being_shut_down;
 
+extern  ibool	srv_start_raw_disk_in_use;
+
 /* At a shutdown the value first climbs from 0 to SRV_SHUTDOWN_CLEANUP
 and then to SRV_SHUTDOWN_LAST_PHASE, and so on */
 
@@ -84,4 +90,7 @@ extern  ulint   srv_shutdown_state;
 #define SRV_SHUTDOWN_LAST_PHASE	   2
 #define SRV_SHUTDOWN_EXIT_THREADS  3
 
+/* Log 'spaces' have id's >= this */
+#define SRV_LOG_SPACE_FIRST_ID		0xFFFFFFF0UL
+
 #endif
diff --git a/innobase/include/sync0sync.h b/innobase/include/sync0sync.h
index 3acf3415889..3a7203bbb56 100644
--- a/innobase/include/sync0sync.h
+++ b/innobase/include/sync0sync.h
@@ -376,8 +376,8 @@ or row lock! */
 #define SYNC_IBUF_HEADER	914
 #define SYNC_IBUF_PESS_INSERT_MUTEX 912
 #define SYNC_IBUF_MUTEX		910	/* ibuf mutex is really below
-					SYNC_FSP_PAGE: we assign value this
-					high only to get the program to pass
+					SYNC_FSP_PAGE: we assign a value this
+					high only to make the program to pass
 					the debug checks */
 /*-------------------------------*/
 #define	SYNC_INDEX_TREE		900
@@ -396,7 +396,7 @@ or row lock! */
 #define	SYNC_FSP_PAGE		395
 /*------------------------------------- Insert buffer headers */ 
 /*------------------------------------- ibuf_mutex */
-/*------------------------------------- Insert buffer trees */
+/*------------------------------------- Insert buffer tree */
 #define	SYNC_IBUF_BITMAP_MUTEX	351
 #define	SYNC_IBUF_BITMAP	350
 /*-------------------------------*/
diff --git a/innobase/include/trx0rseg.ic b/innobase/include/trx0rseg.ic
index 9a6137eb2e5..35e927f5e79 100644
--- a/innobase/include/trx0rseg.ic
+++ b/innobase/include/trx0rseg.ic
@@ -67,7 +67,7 @@ trx_rsegf_get_nth_undo(
 {
 	if (n >= TRX_RSEG_N_SLOTS) {
 		fprintf(stderr,
-		"InnoDB: Error: trying to get slot %lu of rseg\n", n);
+		"InnoDB: Error: trying to get slot %lu of rseg\n", (unsigned long) n);
 		ut_error;
 	}
 
@@ -88,7 +88,7 @@ trx_rsegf_set_nth_undo(
 {
 	if (n >= TRX_RSEG_N_SLOTS) {
 		fprintf(stderr,
-		"InnoDB: Error: trying to set slot %lu of rseg\n", n);
+		"InnoDB: Error: trying to set slot %lu of rseg\n", (unsigned long) n);
 		ut_error;
 	}
 
diff --git a/innobase/include/trx0sys.h b/innobase/include/trx0sys.h
index a8ed675a8a5..0005c4a1711 100644
--- a/innobase/include/trx0sys.h
+++ b/innobase/include/trx0sys.h
@@ -24,18 +24,6 @@ Created 3/26/1996 Heikki Tuuri
 #include "fsp0fsp.h"
 #include "read0types.h"
 
-/* Do NOT merge this to the 4.1 code base! */
-extern ibool		trx_sys_downgrading_from_4_1_1;
-
-/********************************************************************
-Do NOT merge this to the 4.1 code base!
-Marks the trx sys header when we have successfully downgraded from the >= 4.1.1
-multiple tablespace format back to the 4.0 format. */
-
-void
-trx_sys_mark_downgraded_from_4_1_1(void);
-/*====================================*/
-
 /* In a MySQL replication slave, in crash recovery we store the master log
 file name and position here. We have successfully got the updates to InnoDB
 up to this position. If .._pos is -1, it means no crash recovery was needed,
@@ -49,21 +37,35 @@ extern trx_sys_t*	trx_sys;
 
 /* Doublewrite system */
 extern trx_doublewrite_t*	trx_doublewrite;
+extern ibool			trx_doublewrite_must_reset_space_ids;
+extern ibool			trx_sys_multiple_tablespace_format;
 
 /********************************************************************
-Creates the doublewrite buffer at a database start. The header of the
+Creates the doublewrite buffer to a new InnoDB installation. The header of the
 doublewrite buffer is placed on the trx system header page. */
 
 void
 trx_sys_create_doublewrite_buf(void);
 /*================================*/
 /********************************************************************
-At a database startup uses a possible doublewrite buffer to restore
+At a database startup initializes the doublewrite buffer memory structure if
+we already have a doublewrite buffer created in the data files. If we are
+upgrading to an InnoDB version which supports multiple tablespaces, then this
+function performs the necessary update operations. If we are in a crash
+recovery, this function uses a possible doublewrite buffer to restore
 half-written pages in the data files. */
 
 void
-trx_sys_doublewrite_restore_corrupt_pages(void);
-/*===========================================*/
+trx_sys_doublewrite_init_or_restore_pages(
+/*======================================*/
+	ibool	restore_corrupt_pages);
+/********************************************************************
+Marks the trx sys header when we have successfully upgraded to the >= 4.1.x
+multiple tablespace format. */
+
+void
+trx_sys_mark_upgraded_to_multiple_tablespaces(void);
+/*===============================================*/
 /********************************************************************
 Determines if a page number is located inside the doublewrite buffer. */
 
@@ -367,14 +369,17 @@ this contains the same fields as TRX_SYS_MYSQL_LOG_INFO below */
 						to disk, we still may be able
 						to recover the information */
 #define TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED (24 + FSEG_HEADER_SIZE)
-						/* If this is set to
-						.._N, then we are
-						DOWNGRADING from >= 4.1.1 to
-						4.0 */
+						/* If this is not yet set to
+						.._N, we must reset the
+						doublewrite buffer, because
+						starting from 4.1.x the space
+						id of a data page is stored to
+					FIL_PAGE_ARCH_LOG_NO_OR_SPACE_NO */
 /*-------------------------------------------------------------*/
 #define TRX_SYS_DOUBLEWRITE_MAGIC_N	536853855
 #define TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N 1783657386
 
+
 #define TRX_SYS_DOUBLEWRITE_BLOCK_SIZE	FSP_EXTENT_SIZE	
 
 /* Doublewrite control struct */
diff --git a/innobase/include/univ.i b/innobase/include/univ.i
index 4854e5a7b78..cd471a89607 100644
--- a/innobase/include/univ.i
+++ b/innobase/include/univ.i
@@ -65,13 +65,7 @@ Microsoft Visual C++ */
 #define HAVE_PWRITE
 #endif
 
-/* Apparently in some old SCO Unixes the return type of sprintf is not
-an integer as it should be according to the modern Posix standard. Because
-of that we define sprintf inside InnoDB code as our own function ut_sprintf */
-#undef  sprintf
-#define sprintf    ut_sprintf
-
-#endif
+#endif /* #if (defined(WIN32) || ... */
 
 /*			DEBUG VERSION CONTROL
 			===================== */
@@ -88,10 +82,9 @@ memory is read outside the allocated blocks. */
 
 /*
 #define UNIV_DEBUG
-#define UNIV_SYNC_DEBUG
 #define UNIV_MEM_DEBUG
-
 #define UNIV_IBUF_DEBUG
+#define UNIV_SYNC_DEBUG
 #define UNIV_SEARCH_DEBUG
 #define UNIV_SYNC_PERF_STAT
 #define UNIV_SEARCH_PERF_STAT
@@ -182,27 +175,37 @@ management to ensure correct alignment for doubles etc. */
 */
 
 /* Note that inside MySQL 'byte' is defined as char on Linux! */
-#define byte	unsigned char
+#define byte			unsigned char
 
-/* Another basic type we use is unsigned long integer which is intended to be
-equal to the word size of the machine. */
+/* Another basic type we use is unsigned long integer which should be equal to
+the word size of the machine, that is on a 32-bit platform 32 bits, and on a
+64-bit platform 64 bits. We also give the printf format for the type as a
+macro PRULINT. */
 
 #ifdef _WIN64
 typedef unsigned __int64	ulint;
+#define ULINTPF			"%I64u"
+typedef __int64			lint;
 #else
 typedef unsigned long int	ulint;
-#endif
-
+#define ULINTPF			"%lu"
 typedef long int		lint;
+#endif
 
 #ifdef __WIN__
-typedef __int64       ib_longlong;
+typedef __int64			ib_longlong;
 #else
-typedef longlong ib_longlong;
+typedef longlong		ib_longlong;
+#endif
+
+#ifndef __WIN__
+#if SIZEOF_LONG != SIZEOF_VOIDP
+#error "Error: InnoDB's ulint must be of the same size as void*"
+#endif
 #endif
 
 /* The following type should be at least a 64-bit floating point number */
-typedef double		utfloat;
+typedef double			utfloat;
 
 /* The 'undefined' value for a ulint */
 #define ULINT_UNDEFINED		((ulint)(-1))
@@ -215,7 +218,7 @@ typedef double		utfloat;
 
 /* This 'ibool' type is used within Innobase. Remember that different included
 headers may define 'bool' differently. Do not assume that 'bool' is a ulint! */
-#define ibool	ulint
+#define ibool			ulint
 
 #ifndef TRUE
 
diff --git a/innobase/include/ut0byte.h b/innobase/include/ut0byte.h
index 4fb45221899..4274956421e 100644
--- a/innobase/include/ut0byte.h
+++ b/innobase/include/ut0byte.h
@@ -152,7 +152,7 @@ ut_dulint_align_up(
 Increments a dulint variable by 1. */
 #define UT_DULINT_INC(D)\
 {\
-	if ((D).low == 0xFFFFFFFF) {\
+	if ((D).low == 0xFFFFFFFFUL) {\
 		(D).high = (D).high + 1;\
 		(D).low = 0;\
 	} else {\
diff --git a/innobase/include/ut0byte.ic b/innobase/include/ut0byte.ic
index f0df9cc35a3..5a70dcf12a8 100644
--- a/innobase/include/ut0byte.ic
+++ b/innobase/include/ut0byte.ic
@@ -152,13 +152,13 @@ ut_dulint_add(
 	dulint	a,	/* in: dulint */
 	ulint	b)	/* in: ulint */
 {
-	if (0xFFFFFFFF - b >= a.low) {
+	if (0xFFFFFFFFUL - b >= a.low) {
 		a.low += b;
 
 		return(a);
 	}
 
-	a.low = a.low - (0xFFFFFFFF - b) - 1;
+	a.low = a.low - (0xFFFFFFFFUL - b) - 1;
 
 	a.high++;
 
@@ -183,7 +183,7 @@ ut_dulint_subtract(
 
 	b -= a.low + 1;
 
-	a.low = 0xFFFFFFFF - b;
+	a.low = 0xFFFFFFFFUL - b;
 
 	ut_ad(a.high > 0);
 	
@@ -214,7 +214,7 @@ ut_dulint_minus(
 
 	ut_ad(a.high == b.high + 1);
 
-	diff = (ulint)(0xFFFFFFFF - b.low);
+	diff = (ulint)(0xFFFFFFFFUL - b.low);
 	diff += 1 + a.low;
 
 	ut_ad(diff > a.low);
diff --git a/innobase/include/ut0dbg.h b/innobase/include/ut0dbg.h
index bec9cdd42b5..085b4811a73 100644
--- a/innobase/include/ut0dbg.h
+++ b/innobase/include/ut0dbg.h
@@ -27,7 +27,7 @@ extern const char*	ut_dbg_msg_stop;
 	if (!((ulint)(EXPR) + ut_dbg_zero)) {\
                 ut_print_timestamp(stderr);\
 	   	fprintf(stderr, ut_dbg_msg_assert_fail,\
-		os_thread_pf(os_thread_get_curr_id()), __FILE__,\
+		os_thread_pf(os_thread_get_curr_id()), IB__FILE__,\
                 (ulint)__LINE__);\
 		fputs("InnoDB: Failing assertion: " #EXPR "\n", stderr);\
 		fputs(ut_dbg_msg_trap, stderr);\
@@ -36,7 +36,7 @@ extern const char*	ut_dbg_msg_stop;
 	}\
 	if (ut_dbg_stop_threads) {\
 	        fprintf(stderr, ut_dbg_msg_stop,\
-     os_thread_pf(os_thread_get_curr_id()), __FILE__, (ulint)__LINE__);\
+     os_thread_pf(os_thread_get_curr_id()), IB__FILE__, (ulint)__LINE__);\
 		os_thread_sleep(1000000000);\
 	}\
 } while (0)
@@ -44,21 +44,20 @@ extern const char*	ut_dbg_msg_stop;
 #define ut_error do {\
         ut_print_timestamp(stderr);\
 	fprintf(stderr, ut_dbg_msg_assert_fail,\
-	os_thread_pf(os_thread_get_curr_id()), __FILE__, (ulint)__LINE__);\
+	os_thread_pf(os_thread_get_curr_id()), IB__FILE__, (ulint)__LINE__);\
 	fprintf(stderr, ut_dbg_msg_trap);\
 	ut_dbg_stop_threads = TRUE;\
 	if (*(ut_dbg_null_ptr)) ut_dbg_null_ptr = NULL;\
 } while (0)
 
 #ifdef UNIV_DEBUG
-# define ut_ad(EXPR)  	ut_a(EXPR)
-# define ut_d(EXPR)	do {EXPR;} while (0)
+#define ut_ad(EXPR)  	ut_a(EXPR)
+#define ut_d(EXPR)	do {EXPR;} while (0)
 #else
-# define ut_ad(EXPR)
-# define ut_d(EXPR)
+#define ut_ad(EXPR)
+#define ut_d(EXPR)
 #endif
 
 #define UT_NOT_USED(A)	A = A
 
 #endif
-
diff --git a/innobase/include/ut0mem.h b/innobase/include/ut0mem.h
index fea6fc243d8..ce8aabeca41 100644
--- a/innobase/include/ut0mem.h
+++ b/innobase/include/ut0mem.h
@@ -50,6 +50,16 @@ ut_malloc(
 	                /* out, own: allocated memory */
         ulint   n);     /* in: number of bytes to allocate */
 /**************************************************************************
+Tests if malloc of n bytes would succeed. ut_malloc() asserts if memory runs
+out. It cannot be used if we want to return an error message. Prints to
+stderr a message if fails. */
+
+ibool
+ut_test_malloc(
+/*===========*/
+			/* out: TRUE if succeeded */
+	ulint	n);	/* in: try to allocate this many bytes */
+/**************************************************************************
 Frees a memory bloock allocated with ut_malloc. */
 
 void
diff --git a/innobase/include/ut0ut.h b/innobase/include/ut0ut.h
index 8ec23b23dcd..4517b8f8d40 100644
--- a/innobase/include/ut0ut.h
+++ b/innobase/include/ut0ut.h
@@ -19,14 +19,47 @@ typedef time_t	ib_time_t;
 
 
 /************************************************************
-Uses vsprintf to emulate sprintf so that the function always returns
-the printed length. Apparently in some old SCO Unixes sprintf did not
-return the printed length but a pointer to the end of the printed string. */
+On the 64-bit Windows we substitute the format string
+%l -> %I64
+because we define ulint as unsigned __int64 and lint as __int64 on Windows,
+and both the Microsoft and Intel C compilers require the format string
+%I64 in that case instead of %l. */
 
-ulint
+int
+ut_printf(
+/*======*/
+			     /* out: the number of characters written, or
+			     negative in case of an error */
+        const char* format,  /* in: format of prints */
+        ...);                /* in: arguments to be printed */
+/************************************************************
+On the 64-bit Windows we substitute the format string
+%l -> %I64
+because we define ulint as unsigned __int64 and lint as __int64 on Windows,
+and both the Microsoft and Intel C compilers require the format string
+%I64 in that case instead of %l. */
+
+int
 ut_sprintf(
 /*=======*/
-        char*       buf,     /* in/out: buffer where to print */
+			     /* out: the number of characters written, or
+			     negative in case of an error */
+	char*	    buf,     /* in: buffer where to print */
+        const char* format,  /* in: format of prints */
+        ...);                /* in: arguments to be printed */
+/************************************************************
+On the 64-bit Windows we substitute the format string
+%l -> %I64
+because we define ulint as unsigned __int64 and lint as __int64 on Windows,
+and both the Microsoft and Intel C compilers require the format string
+%I64 in that case instead of %l. */
+
+int
+ut_fprintf(
+/*=======*/
+			     /* out: the number of characters written, or
+			     negative in case of an error */
+	FILE*	    stream,  /* in: stream where to print */
         const char* format,  /* in: format of prints */
         ...);                /* in: arguments to be printed */
 /************************************************************
@@ -139,7 +172,7 @@ void
 ut_ulint_sort(ulint* arr, ulint* aux_arr, ulint low, ulint high);
 /*============================================================*/
 /************************************************************
-The following function returns a clock time in milliseconds. */
+The following function returns elapsed CPU time in milliseconds. */
 
 ulint
 ut_clock(void);
@@ -174,6 +207,14 @@ ut_sprintf_timestamp(
 /*=================*/
 	char*	buf); /* in: buffer where to sprintf */
 /**************************************************************
+Sprintfs a timestamp to a buffer with no spaces and with ':' characters
+replaced by '_'. */
+
+void
+ut_sprintf_timestamp_without_extra_chars(
+/*=====================================*/
+	char*	buf); /* in: buffer where to sprintf */
+/**************************************************************
 Returns current year, month, day. */
 
 void
diff --git a/innobase/include/ut0ut.ic b/innobase/include/ut0ut.ic
index 9d7dd283f29..9a0ef1c0d5b 100644
--- a/innobase/include/ut0ut.ic
+++ b/innobase/include/ut0ut.ic
@@ -110,7 +110,7 @@ ut_2pow_remainder(
 	ulint	n,	/* in: number to be divided */
 	ulint	m)	/* in: divisor; power of 2 */
 {
-	ut_ad(0x80000000 % m == 0);
+	ut_ad(0x80000000UL % m == 0);
 
 	return(n & (m - 1));
 }
@@ -125,7 +125,7 @@ ut_2pow_round(
 	ulint	n,	/* in: number to be rounded */
 	ulint	m)	/* in: divisor; power of 2 */
 {
-	ut_ad(0x80000000 % m == 0);
+	ut_ad(0x80000000UL % m == 0);
 
 	return(n & ~(m - 1));
 }
diff --git a/innobase/lock/lock0lock.c b/innobase/lock/lock0lock.c
index c706ebceaec..2430380d65c 100644
--- a/innobase/lock/lock0lock.c
+++ b/innobase/lock/lock0lock.c
@@ -375,10 +375,10 @@ lock_check_trx_id_sanity(
 "InnoDB: is %lu %lu which is higher than the global trx id counter %lu %lu!\n"
 "InnoDB: The table is corrupt. You have to do dump + drop + reimport.\n",
 			       err_buf, index->table_name, index->name,
-			       ut_dulint_get_high(trx_id),
-			       ut_dulint_get_low(trx_id),
-			       ut_dulint_get_high(trx_sys->max_trx_id),
-			       ut_dulint_get_low(trx_sys->max_trx_id));
+			       (ulong) ut_dulint_get_high(trx_id),
+			       (ulong) ut_dulint_get_low(trx_id),
+			       (ulong) ut_dulint_get_high(trx_sys->max_trx_id),
+			       (ulong) ut_dulint_get_low(trx_sys->max_trx_id));
 
 		is_ok = FALSE;
 	}
@@ -1657,7 +1657,8 @@ index->table_name);
 
 	if (lock_print_waits) {
 		printf("Lock wait for trx %lu in index %s\n",
-				ut_dulint_get_low(trx->id), index->name);
+		       (ulong) ut_dulint_get_low(trx->id),
+		       index->name);
 	}
 	
 	return(DB_LOCK_WAIT);	
@@ -1996,7 +1997,7 @@ lock_grant(
 
 	if (lock_print_waits) {
 		printf("Lock wait for trx %lu ends\n",
-					ut_dulint_get_low(lock->trx->id));
+		       (ulong) ut_dulint_get_low(lock->trx->id));
 	}
 
 	/* If we are resolving a deadlock by choosing another transaction
@@ -3564,7 +3565,8 @@ lock_release_off_kernel(
 			ut_ad(lock_get_type(lock) == LOCK_TABLE);
 
 			if (lock_get_mode(lock) != LOCK_IS
-			    && (trx->insert_undo || trx->update_undo)) {
+			    && 0 != ut_dulint_cmp(trx->undo_no,
+						  ut_dulint_zero)) {
 
 				/* The trx may have modified the table.
 				We block the use of the MySQL query cache
@@ -3713,7 +3715,7 @@ lock_table_print(
 
 	buf += sprintf(buf, "TABLE LOCK table %s trx id %lu %lu",
 		lock->un_member.tab_lock.table->name,
-		(lock->trx)->id.high, (lock->trx)->id.low);
+		(ulong) (lock->trx)->id.high, (ulong) (lock->trx)->id.low);
 
 	if (lock_get_mode(lock) == LOCK_S) {
 		buf += sprintf(buf, " lock mode S");
@@ -3727,7 +3729,7 @@ lock_table_print(
 		buf += sprintf(buf, " lock_mode AUTO-INC");
 	} else {
 		buf += sprintf(buf,
-			" unknown lock_mode %lu", lock_get_mode(lock));
+			" unknown lock_mode %lu", (ulong) lock_get_mode(lock));
 	}
 
 	if (lock_get_wait(lock)) {
@@ -3764,11 +3766,13 @@ lock_rec_print(
  	page_no = lock->un_member.rec_lock.page_no;
 
 	buf += sprintf(buf, "RECORD LOCKS space id %lu page no %lu n bits %lu",
-		    space, page_no, lock_rec_get_n_bits(lock));
+		       (ulong) space, (ulong) page_no,
+		       (ulong) lock_rec_get_n_bits(lock));
 
 	buf += sprintf(buf, " table %s index %s trx id %lu %lu",
-		lock->index->table->name, lock->index->name,
-		(lock->trx)->id.high, (lock->trx)->id.low);
+		       lock->index->table->name, lock->index->name,
+		       (ulong) (lock->trx)->id.high,
+		       (ulong) (lock->trx)->id.low);
 
 	if (lock_get_mode(lock) == LOCK_S) {
 		buf += sprintf(buf, " lock mode S");
@@ -3838,7 +3842,8 @@ lock_rec_print(
 	
 		if (lock_rec_get_nth_bit(lock, i)) {
 
-			buf += sprintf(buf, "Record lock, heap no %lu ", i);
+			buf += sprintf(buf, "Record lock, heap no %lu ",
+				       (ulong) i);
 
 			if (page) {
 				buf += rec_sprintf(buf, 120,
@@ -3943,19 +3948,19 @@ lock_print_info(
 "------------\n");
 
 	buf += sprintf(buf, "Trx id counter %lu %lu\n", 
-		ut_dulint_get_high(trx_sys->max_trx_id),
-		ut_dulint_get_low(trx_sys->max_trx_id));
+		       (ulong) ut_dulint_get_high(trx_sys->max_trx_id),
+		       (ulong) ut_dulint_get_low(trx_sys->max_trx_id));
 
 	buf += sprintf(buf,
 	"Purge done for trx's n:o < %lu %lu undo n:o < %lu %lu\n",
-		ut_dulint_get_high(purge_sys->purge_trx_no),
-		ut_dulint_get_low(purge_sys->purge_trx_no),
-		ut_dulint_get_high(purge_sys->purge_undo_no),
-		ut_dulint_get_low(purge_sys->purge_undo_no));
+		(ulong) ut_dulint_get_high(purge_sys->purge_trx_no),
+		(ulong) ut_dulint_get_low(purge_sys->purge_trx_no),
+		(ulong) ut_dulint_get_high(purge_sys->purge_undo_no),
+		(ulong) ut_dulint_get_low(purge_sys->purge_undo_no));
 
 	buf += sprintf(buf,
 		"Total number of lock structs in row lock hash table %lu\n",
-						lock_get_n_rec_locks());
+					 (ulong) lock_get_n_rec_locks());
 
 	buf += sprintf(buf, "LIST OF TRANSACTIONS FOR EACH SESSION:\n");
 
@@ -4027,16 +4032,16 @@ loop:
 	        if (trx->read_view) {
 	  	        buf += sprintf(buf,
        "Trx read view will not see trx with id >= %lu %lu, sees < %lu %lu\n",
-		       	ut_dulint_get_high(trx->read_view->low_limit_id),
-       			ut_dulint_get_low(trx->read_view->low_limit_id),
-       			ut_dulint_get_high(trx->read_view->up_limit_id),
-       			ut_dulint_get_low(trx->read_view->up_limit_id));
+		      (ulong) ut_dulint_get_high(trx->read_view->low_limit_id),
+       		      (ulong) ut_dulint_get_low(trx->read_view->low_limit_id),
+       		      (ulong) ut_dulint_get_high(trx->read_view->up_limit_id),
+       		      (ulong) ut_dulint_get_low(trx->read_view->up_limit_id));
 	        }
 
 		if (trx->que_state == TRX_QUE_LOCK_WAIT) {
 			buf += sprintf(buf,
  "------- TRX HAS BEEN WAITING %lu SEC FOR THIS LOCK TO BE GRANTED:\n",
-		   (ulint)difftime(time(NULL), trx->wait_started));
+		   (ulong)difftime(time(NULL), trx->wait_started));
 
 			if (lock_get_type(trx->wait_lock) == LOCK_REC) {
 				lock_rec_print(buf, trx->wait_lock);
@@ -4333,7 +4338,8 @@ loop:
 			index = lock->index;
 			rec = page_find_rec_with_heap_no(page, i);
 
-			printf("Validating %lu %lu\n", space, page_no);
+			printf("Validating %lu %lu\n", (ulong) space,
+			       (ulong) page_no);
 
 			lock_mutex_exit_kernel();
 
diff --git a/innobase/log/log0log.c b/innobase/log/log0log.c
index ec0db57564a..095d84f6527 100644
--- a/innobase/log/log0log.c
+++ b/innobase/log/log0log.c
@@ -24,7 +24,8 @@ Created 12/9/1995 Heikki Tuuri
 #include "trx0sys.h"
 #include "trx0trx.h"
 
-/* Current free limit; protected by the log sys mutex; 0 means uninitialized */
+/* Current free limit of space 0; protected by the log sys mutex; 0 means
+uninitialized */
 ulint	log_fsp_current_free_limit		= 0;
 
 /* Global log system variable */
@@ -94,14 +95,6 @@ static
 void
 log_io_complete_archive(void);
 /*=========================*/
-/********************************************************************
-Tries to establish a big enough margin of free space in the log groups, such
-that a new log entry can be catenated without an immediate need for a
-archiving. */
-static
-void
-log_archive_margin(void);
-/*====================*/
 
 /********************************************************************
 Sets the global variable log_fsp_current_free_limit. Also makes a checkpoint,
@@ -197,11 +190,10 @@ loop:
 
 	if (log->archiving_state != LOG_ARCH_OFF) {
 	
-		archived_lsn_age = ut_dulint_minus(log->lsn, log->archived_lsn);
-	
+		archived_lsn_age = ut_dulint_minus(log->lsn,
+							log->archived_lsn);
 		if (archived_lsn_age + len_upper_limit
 						> log->max_archived_lsn_age) {
-	
 			/* Not enough free archived space in log groups: do a
 			synchronous archive write batch: */
 	
@@ -356,7 +348,8 @@ log_close(void)
 "InnoDB: If you are using big BLOB or TEXT rows, you must set the\n"
 "InnoDB: combined size of log files at least 10 times bigger than the\n"
 "InnoDB: largest such row.\n",
-			checkpoint_age, log->log_group_capacity);
+				(ulong) checkpoint_age,
+				(ulong) log->log_group_capacity);
 		}
 	}
 
@@ -478,7 +471,8 @@ ulint
 log_group_calc_lsn_offset(
 /*======================*/
 				/* out: offset within the log group */
-	dulint		lsn,	/* in: lsn, must be within 4 GB of group->lsn */
+	dulint		lsn,	/* in: lsn, must be within 4 GB of
+				group->lsn */
 	log_group_t*	group)	/* in: log group */
 {
         dulint	        gr_lsn;
@@ -771,7 +765,8 @@ log_init(void)
 	memset(log_sys->checkpoint_buf, '\0', OS_FILE_LOG_BLOCK_SIZE);
 	/*----------------------------*/
 
-	log_sys->archiving_state = LOG_ARCH_ON;
+	/* Under MySQL, log archiving is always off */
+	log_sys->archiving_state = LOG_ARCH_OFF;
 	log_sys->archived_lsn = log_sys->lsn;
 	log_sys->next_archived_lsn = ut_dulint_zero;
 
@@ -780,13 +775,15 @@ log_init(void)
 	rw_lock_create(&(log_sys->archive_lock));
 	rw_lock_set_level(&(log_sys->archive_lock), SYNC_NO_ORDER_CHECK);
 
-	log_sys->archive_buf = ut_align(
+	log_sys->archive_buf = NULL;
+
+			/* ut_align(
 				ut_malloc(LOG_ARCHIVE_BUF_SIZE
 					  + OS_FILE_LOG_BLOCK_SIZE),
-						OS_FILE_LOG_BLOCK_SIZE);
-	log_sys->archive_buf_size = LOG_ARCHIVE_BUF_SIZE;
+						OS_FILE_LOG_BLOCK_SIZE); */
+	log_sys->archive_buf_size = 0;
 
-	memset(log_sys->archive_buf, '\0', LOG_ARCHIVE_BUF_SIZE);
+	/* memset(log_sys->archive_buf, '\0', LOG_ARCHIVE_BUF_SIZE); */
 
 	log_sys->archiving_on = os_event_create(NULL);
 
@@ -933,7 +930,8 @@ log_group_check_flush_completion(
 	if (!log_sys->one_flushed && group->n_pending_writes == 0) {
 
 		if (log_debug_writes) {
-			printf("Log flushed first to group %lu\n", group->id);
+			printf("Log flushed first to group %lu\n",
+			       (ulong) group->id);
 		}
 	
 		log_sys->written_to_some_lsn = log_sys->write_lsn;
@@ -944,7 +942,7 @@ log_group_check_flush_completion(
 
 	if (log_debug_writes && (group->n_pending_writes == 0)) {
 
-		printf("Log flushed to group %lu\n", group->id);
+		printf("Log flushed to group %lu\n", (ulong) group->id);
 	}
 
 	return(0);
@@ -1011,7 +1009,7 @@ log_io_complete(
 		return;
 	}
 
-	if ((ulint)group & 0x1) {
+	if ((ulint)group & 0x1UL) {
 		/* It was a checkpoint write */
 		group = (log_group_t*)((ulint)group - 1);
 
@@ -1088,8 +1086,8 @@ log_group_file_header_flush(
 
 	if (log_debug_writes) {
 		printf(
-		"Writing log file header to group %lu file %lu\n", group->id,
-								nth_file);
+		"Writing log file header to group %lu file %lu\n",
+		(ulong) group->id, (ulong) nth_file);
 	}
 
 	if (log_do_write) {
@@ -1169,7 +1167,8 @@ loop:
 
 	if ((next_offset % group->file_size) + len > group->file_size) {
 
-		write_len = group->file_size - (next_offset % group->file_size);
+		write_len = group->file_size
+					- (next_offset % group->file_size);
 	} else {
 		write_len = len;
 	}
@@ -1179,13 +1178,14 @@ loop:
 		printf(
 		"Writing log file segment to group %lu offset %lu len %lu\n"
 		"start lsn %lu %lu\n",
-			group->id, next_offset, write_len,
-			ut_dulint_get_high(start_lsn),
-			ut_dulint_get_low(start_lsn));
+			(ulong) group->id, (ulong) next_offset,
+		        (ulong) write_len,
+			(ulong) ut_dulint_get_high(start_lsn),
+			(ulong) ut_dulint_get_low(start_lsn));
 		printf(
 		"First block n:o %lu last block n:o %lu\n",
-			log_block_get_hdr_no(buf),
-			log_block_get_hdr_no(
+			(ulong) log_block_get_hdr_no(buf),
+			(ulong) log_block_get_hdr_no(
 				buf + write_len - OS_FILE_LOG_BLOCK_SIZE));
 		ut_a(log_block_get_hdr_no(buf)
 			== log_block_convert_lsn_to_no(start_lsn));
@@ -1326,10 +1326,10 @@ loop:
 
 	if (log_debug_writes) {
 		printf("Writing log from %lu %lu up to lsn %lu %lu\n",
-			ut_dulint_get_high(log_sys->written_to_all_lsn),
-			ut_dulint_get_low(log_sys->written_to_all_lsn),
-					ut_dulint_get_high(log_sys->lsn),
-					ut_dulint_get_low(log_sys->lsn));
+			(ulong) ut_dulint_get_high(log_sys->written_to_all_lsn),
+			(ulong) ut_dulint_get_low(log_sys->written_to_all_lsn),
+			(ulong) ut_dulint_get_high(log_sys->lsn),
+			(ulong)	ut_dulint_get_low(log_sys->lsn));
 	}
 
 	log_sys->n_pending_writes++;
@@ -1567,7 +1567,8 @@ log_io_complete_checkpoint(
 	log_sys->n_pending_checkpoint_writes--;
 
 	if (log_debug_writes) {
-		printf("Checkpoint info written to group %lu\n", group->id);
+		printf("Checkpoint info written to group %lu\n",
+		       (ulong) group->id);
 	}
 
 	if (log_sys->n_pending_checkpoint_writes == 0) {
@@ -1722,7 +1723,7 @@ log_group_checkpoint(
 				OS_FILE_LOG_BLOCK_SIZE,
 				buf, ((byte*)group + 1));
 
-		ut_ad(((ulint)group & 0x1) == 0);
+		ut_ad(((ulint)group & 0x1UL) == 0);
 	}
 }
 
@@ -1896,9 +1897,9 @@ log_checkpoint(
 
 	if (log_debug_writes) {
 		printf("Making checkpoint no %lu at lsn %lu %lu\n",
-			ut_dulint_get_low(log_sys->next_checkpoint_no),
-			ut_dulint_get_high(oldest_lsn),
-			ut_dulint_get_low(oldest_lsn));
+			(ulong) ut_dulint_get_low(log_sys->next_checkpoint_no),
+			(ulong) ut_dulint_get_high(oldest_lsn),
+			(ulong) ut_dulint_get_low(oldest_lsn));
 	}
 
 	log_groups_write_checkpoint_info();
@@ -2125,9 +2126,11 @@ log_archived_file_name_gen(
 	ulint	id,	/* in: group id */
 	ulint	file_no)/* in: file number */
 {
+	ut_a(0);
+
 	UT_NOT_USED(id);	/* Currently we only archive the first group */
 	
-	sprintf(buf, "%sib_arch_log_%010lu", srv_arch_dir, file_no);
+	sprintf(buf, "%sib_arch_log_%010lu", srv_arch_dir, (ulong) file_no);
 }
 
 /**********************************************************
@@ -2147,6 +2150,8 @@ log_group_archive_file_header_write(
 	ulint	dest_offset;
 
 #ifdef UNIV_SYNC_DEBUG
+	ut_a(0);
+
 	ut_ad(mutex_own(&(log_sys->mutex)));
 #endif /* UNIV_SYNC_DEBUG */
 
@@ -2186,6 +2191,8 @@ log_group_archive_completed_header_write(
 	ulint	dest_offset;
 
 #ifdef UNIV_SYNC_DEBUG
+	ut_a(0);
+
 	ut_ad(mutex_own(&(log_sys->mutex)));
 #endif /* UNIV_SYNC_DEBUG */
 	ut_a(nth_file < group->n_files);
@@ -2227,6 +2234,8 @@ log_group_archive(
 	ulint	open_mode;
 	
 #ifdef UNIV_SYNC_DEBUG
+	ut_a(0);
+
 	ut_ad(mutex_own(&(log_sys->mutex)));
 #endif /* UNIV_SYNC_DEBUG */
 
@@ -2258,7 +2267,6 @@ loop:
 	
 		log_archived_file_name_gen(name, group->id,
 					group->archived_file_no + n_files);
-		fil_reserve_right_to_open();
 
 		file_handle = os_file_create(name, open_mode, OS_FILE_AIO,
 						OS_DATA_FILE, &ret);
@@ -2269,10 +2277,10 @@ loop:
 		}
 
 		if (!ret) {
-		  fprintf(stderr,
+			fprintf(stderr,
 		   "InnoDB: Cannot create or open archive log file %s.\n",
 			  name);
-		  fprintf(stderr, "InnoDB: Cannot continue operation.\n"
+			fprintf(stderr, "InnoDB: Cannot continue operation.\n"
        		  "InnoDB: Check that the log archive directory exists,\n"
 			  "InnoDB: you have access rights to it, and\n"
 			  "InnoDB: there is space available.\n");
@@ -2287,12 +2295,10 @@ loop:
 	
 		ut_a(ret);
 	
-		fil_release_right_to_open();
-	
 		/* Add the archive file as a node to the space */
 		
 		fil_node_create(name, group->file_size / UNIV_PAGE_SIZE,
-						group->archive_space_id);
+					group->archive_space_id, FALSE);
 
 		if (next_offset % group->file_size == 0) {
 			log_group_archive_file_header_write(group, n_files,
@@ -2313,9 +2319,9 @@ loop:
 	if (log_debug_writes) {
 		printf(
 		"Archiving starting at lsn %lu %lu, len %lu to group %lu\n",
-					ut_dulint_get_high(start_lsn),
-					ut_dulint_get_low(start_lsn),
-					len, group->id);
+					(ulong) ut_dulint_get_high(start_lsn),
+					(ulong) ut_dulint_get_low(start_lsn),
+					(ulong) len, (ulong) group->id);
 	}
 
 	log_sys->n_pending_archive_ios++;
@@ -2357,6 +2363,8 @@ log_archive_groups(void)
 	log_group_t*	group;
 
 #ifdef UNIV_SYNC_DEBUG
+	ut_a(0);
+
 	ut_ad(mutex_own(&(log_sys->mutex)));
 #endif /* UNIV_SYNC_DEBUG */
 
@@ -2382,6 +2390,8 @@ log_archive_write_complete_groups(void)
 	ulint		i;
 
 #ifdef UNIV_SYNC_DEBUG
+	ut_a(0);
+
 	ut_ad(mutex_own(&(log_sys->mutex)));
 #endif /* UNIV_SYNC_DEBUG */
 
@@ -2409,7 +2419,7 @@ log_archive_write_complete_groups(void)
 
 	if (log_debug_writes && trunc_files) {
 		printf("Complete file(s) archived to group %lu\n",
-								group->id);
+							  (ulong) group->id);
 	}
 
 	/* Calculate the archive file space start lsn */
@@ -2446,6 +2456,8 @@ log_archive_check_completion_low(void)
 /*==================================*/
 {
 #ifdef UNIV_SYNC_DEBUG
+	ut_a(0);
+
 	ut_ad(mutex_own(&(log_sys->mutex)));
 #endif /* UNIV_SYNC_DEBUG */
 
@@ -2483,6 +2495,8 @@ log_io_complete_archive(void)
 {
 	log_group_t*	group;
 
+	ut_a(0);
+
 	mutex_enter(&(log_sys->mutex));
 
 	group = UT_LIST_GET_FIRST(log_sys->log_groups);
@@ -2518,6 +2532,8 @@ log_archive_do(
 	dulint	start_lsn;
 	dulint	limit_lsn;
 	
+	ut_a(0);
+
 	calc_new_limit = TRUE;
 loop:	
 	mutex_enter(&(log_sys->mutex));
@@ -2544,7 +2560,7 @@ loop:
 	start_lsn = log_sys->archived_lsn;
 	
 	if (calc_new_limit) {
-		ut_a(log_sys->archive_buf_size % OS_FILE_LOG_BLOCK_SIZE == 0);
+		ut_a(log_sys->archive_buf_size % OS_FILE_LOG_BLOCK_SIZE
 		limit_lsn = ut_dulint_add(start_lsn,
 						log_sys->archive_buf_size);
 
@@ -2600,10 +2616,10 @@ loop:
 
 	if (log_debug_writes) {
 		printf("Archiving from lsn %lu %lu to lsn %lu %lu\n",
-			ut_dulint_get_high(log_sys->archived_lsn),
-			ut_dulint_get_low(log_sys->archived_lsn),
-			ut_dulint_get_high(limit_lsn),
-			ut_dulint_get_low(limit_lsn));
+			(ulong) ut_dulint_get_high(log_sys->archived_lsn),
+			(ulong) ut_dulint_get_low(log_sys->archived_lsn),
+			(ulong) ut_dulint_get_high(limit_lsn),
+			(ulong) ut_dulint_get_low(limit_lsn));
 	}
 
 	/* Read the log segment to the archive buffer */
@@ -2643,6 +2659,8 @@ log_archive_all(void)
 		return;
 	}
 
+	ut_a(0);
+
 	present_lsn = log_sys->lsn;
 
 	mutex_exit(&(log_sys->mutex));
@@ -2682,11 +2700,17 @@ log_archive_close_groups(
 	ut_ad(mutex_own(&(log_sys->mutex)));
 #endif /* UNIV_SYNC_DEBUG */
 
+	if (log_sys->archiving_state == LOG_ARCH_OFF) {
+
+		return;
+	}
+
+	ut_a(0);
+
 	group = UT_LIST_GET_FIRST(log_sys->log_groups);
 
 	trunc_len = UNIV_PAGE_SIZE
 			    * fil_space_get_size(group->archive_space_id);
-
 	if (trunc_len > 0) {
 		ut_a(trunc_len == group->file_size);
 			    
@@ -2706,7 +2730,8 @@ log_archive_close_groups(
 		if (log_debug_writes) {
 			printf(
 			"Incrementing arch file no to %lu in log group %lu\n",
-				group->archived_file_no + 2, group->id);
+				(ulong) group->archived_file_no + 2,
+			        (ulong) group->id);
 		}
 	}
 }
@@ -2714,17 +2739,18 @@ log_archive_close_groups(
 /********************************************************************
 Writes the log contents to the archive up to the lsn when this function was
 called, and stops the archiving. When archiving is started again, the archived
-log file numbers start from 2 higher, so that the archiving will
-not write again to the archived log files which exist when this function
-returns. */
+log file numbers start from 2 higher, so that the archiving will not write
+again to the archived log files which exist when this function returns. */
 
 ulint
 log_archive_stop(void)
 /*==================*/
-				/* out: DB_SUCCESS or DB_ERROR */
+			/* out: DB_SUCCESS or DB_ERROR */
 {
 	ibool	success;
 
+	ut_a(0);
+
 	mutex_enter(&(log_sys->mutex));
 
 	if (log_sys->archiving_state != LOG_ARCH_ON) {
@@ -2737,7 +2763,7 @@ log_archive_stop(void)
 	log_sys->archiving_state = LOG_ARCH_STOPPING;
 	
 	mutex_exit(&(log_sys->mutex));
-	
+
 	log_archive_all();
 
 	mutex_enter(&(log_sys->mutex));
@@ -2758,7 +2784,7 @@ log_archive_stop(void)
 	if appropriate */
 
 	log_archive_close_groups(TRUE);
-	
+
 	mutex_exit(&(log_sys->mutex));
 
 	/* Make a checkpoint, so that if recovery is needed, the file numbers
@@ -2787,6 +2813,8 @@ log_archive_start(void)
 /*===================*/
 			/* out: DB_SUCCESS or DB_ERROR */
 {
+	ut_a(0);
+
 	mutex_enter(&(log_sys->mutex));
 
 	if (log_sys->archiving_state != LOG_ARCH_STOPPED) {
@@ -2813,6 +2841,7 @@ log_archive_noarchivelog(void)
 /*==========================*/
 			/* out: DB_SUCCESS or DB_ERROR */
 {
+	ut_a(0);
 loop:
 	mutex_enter(&(log_sys->mutex));
 
@@ -2845,6 +2874,7 @@ log_archive_archivelog(void)
 /*========================*/
 			/* out: DB_SUCCESS or DB_ERROR */
 {
+	ut_a(0);
 	mutex_enter(&(log_sys->mutex));
 
 	if (log_sys->archiving_state == LOG_ARCH_OFF) {
@@ -2852,7 +2882,7 @@ log_archive_archivelog(void)
 		log_sys->archiving_state = LOG_ARCH_ON;
 
 		log_sys->archived_lsn = ut_dulint_align_down(log_sys->lsn,
-						OS_FILE_LOG_BLOCK_SIZE);	
+						OS_FILE_LOG_BLOCK_SIZE);
 		mutex_exit(&(log_sys->mutex));
 
 		return(DB_SUCCESS);
@@ -2863,6 +2893,7 @@ log_archive_archivelog(void)
 	return(DB_ERROR);	
 }
 
+#ifdef notdefined
 /********************************************************************
 Tries to establish a big enough margin of free space in the log groups, such
 that a new log entry can be catenated without an immediate need for
@@ -2916,6 +2947,7 @@ loop:
 		goto loop;
 	}
 }
+#endif
 
 /************************************************************************
 Checks that there is enough free space in the log to start a new query step.
@@ -2932,7 +2964,7 @@ loop:
 
 	log_checkpoint_margin();
 
-	log_archive_margin();
+	/* log_archive_margin(); */
 
 	mutex_enter(&(log_sys->mutex));
 	
@@ -3070,7 +3102,7 @@ loop:
 		goto loop;
 	}
 
-	log_archive_all();
+	/* log_archive_all(); */
 	log_make_checkpoint_at(ut_dulint_max, TRUE);
 
 	mutex_enter(&(log_sys->mutex));
@@ -3088,15 +3120,16 @@ loop:
 	    	goto loop;
 	}    
 
-	arch_log_no =
+	arch_log_no = 0;
+/*
 		UT_LIST_GET_FIRST(log_sys->log_groups)->archived_file_no;
 		
 	if (0 == UT_LIST_GET_FIRST(log_sys->log_groups)->archived_offset) {
 	
 		arch_log_no--;
 	}
-	
-	log_archive_close_groups(TRUE);
+*/	
+	/* log_archive_close_groups(TRUE); */
 
 	mutex_exit(&(log_sys->mutex));
 
@@ -3145,10 +3178,24 @@ loop:
 	ut_a(buf_all_freed());
 	ut_a(0 == ut_dulint_cmp(lsn, log_sys->lsn));
 
+	if (ut_dulint_cmp(lsn, srv_start_lsn) < 0) {
+		fprintf(stderr,
+"InnoDB: Error: log sequence number at shutdown %lu %lu\n"
+"InnoDB: is lower than at startup %lu %lu!\n",
+			  (ulong) ut_dulint_get_high(lsn),
+			  (ulong) ut_dulint_get_low(lsn),
+			  (ulong) ut_dulint_get_high(srv_start_lsn),
+			  (ulong) ut_dulint_get_low(srv_start_lsn));
+	}
+
+	srv_shutdown_lsn = lsn;
+
 	fil_write_flushed_lsn_to_data_files(lsn, arch_log_no);	
 
 	fil_flush_file_spaces(FIL_TABLESPACE);
 
+	fil_close_all_files();
+
 	/* Make some checks that the server really is quiet */
 	ut_a(srv_n_threads_active[SRV_MASTER] == 0);
 	ut_a(buf_all_freed());
@@ -3192,8 +3239,8 @@ log_check_log_recs(
 	ut_memcpy(scan_buf, start, end - start);
 	
 	recv_scan_log_recs(TRUE,
-				buf_pool_get_curr_size() -
-				RECV_POOL_N_FREE_BLOCKS * UNIV_PAGE_SIZE,	
+				(buf_pool->n_frames -
+				recv_n_pool_free_frames) * UNIV_PAGE_SIZE,	
 				FALSE, scan_buf, end - start,
 				ut_dulint_align_down(buf_start_lsn,
 						OS_FILE_LOG_BLOCK_SIZE),
@@ -3252,12 +3299,12 @@ log_print(
 	buf += sprintf(buf, "Log sequence number %lu %lu\n"
 	       "Log flushed up to   %lu %lu\n"
 	       "Last checkpoint at  %lu %lu\n",
-			ut_dulint_get_high(log_sys->lsn),
-			ut_dulint_get_low(log_sys->lsn),
-			ut_dulint_get_high(log_sys->flushed_to_disk_lsn),
-			ut_dulint_get_low(log_sys->flushed_to_disk_lsn),
-			ut_dulint_get_high(log_sys->last_checkpoint_lsn),
-			ut_dulint_get_low(log_sys->last_checkpoint_lsn));
+			(ulong) ut_dulint_get_high(log_sys->lsn),
+			(ulong) ut_dulint_get_low(log_sys->lsn),
+			(ulong) ut_dulint_get_high(log_sys->flushed_to_disk_lsn),
+			(ulong) ut_dulint_get_low(log_sys->flushed_to_disk_lsn),
+			(ulong) ut_dulint_get_high(log_sys->last_checkpoint_lsn),
+			(ulong) ut_dulint_get_low(log_sys->last_checkpoint_lsn));
 
 	current_time = time(NULL);
 			
@@ -3266,10 +3313,10 @@ log_print(
 	buf += sprintf(buf,
 	"%lu pending log writes, %lu pending chkp writes\n"
 	"%lu log i/o's done, %.2f log i/o's/second\n",
-	log_sys->n_pending_writes,
-	log_sys->n_pending_checkpoint_writes,
-	log_sys->n_log_ios,
-	(log_sys->n_log_ios - log_sys->n_log_ios_old) / time_elapsed);
+	(ulong) log_sys->n_pending_writes,
+	(ulong) log_sys->n_pending_checkpoint_writes,
+	(ulong) log_sys->n_log_ios,
+	((log_sys->n_log_ios - log_sys->n_log_ios_old) / time_elapsed));
 
 	log_sys->n_log_ios_old = log_sys->n_log_ios;
 	log_sys->last_printout_time = current_time;
diff --git a/innobase/log/log0recv.c b/innobase/log/log0recv.c
index 323d6c63f71..f80181cc207 100644
--- a/innobase/log/log0recv.c
+++ b/innobase/log/log0recv.c
@@ -17,6 +17,7 @@ Created 9/20/1997 Heikki Tuuri
 #include "buf0flu.h"
 #include "buf0rea.h"
 #include "srv0srv.h"
+#include "srv0start.h"
 #include "mtr0mtr.h"
 #include "mtr0log.h"
 #include "page0page.h"
@@ -33,6 +34,11 @@ Created 9/20/1997 Heikki Tuuri
 #include "dict0boot.h"
 #include "fil0fil.h"
 
+/* This is set to FALSE if the backup was originally taken with the
+ibbackup --include regexp option: then we do not want to create tables in
+directories which were not included */
+ibool	recv_replay_file_ops	= TRUE;
+
 /* Log records are stored in the hash table in chunks at most of this size;
 this must be less than UNIV_PAGE_SIZE as it is stored in the buffer pool */
 #define RECV_DATA_BLOCK_SIZE	(MEM_MAX_ALLOC_IN_BUF - sizeof(recv_data_t))
@@ -73,6 +79,13 @@ ulint	recv_previous_parsed_rec_is_multi = 0;
 
 ulint	recv_max_parsed_page_no		= 0;
 
+/* This many frames must be left free in the buffer pool when we scan
+the log and store the scanned log records in the buffer pool: we will
+use these free frames to read in pages when we start applying the
+log records to the database. */
+
+ulint  recv_n_pool_free_frames         = 256;
+
 /* The maximum lsn we see for a page during the recovery process. If this
 is bigger than the lsn we are able to scan up to, that is an indication that
 the recovery failed and the database may be corrupt. */
@@ -159,7 +172,8 @@ recv_sys_empty_hash(void)
 		fprintf(stderr,
 "InnoDB: Error: %lu pages with log records were left unprocessed!\n"
 "InnoDB: Maximum page number with log records on it %lu\n",
-			recv_sys->n_addrs, recv_max_parsed_page_no);
+			(ulong) recv_sys->n_addrs, 
+			(ulong) recv_max_parsed_page_no);
 		ut_error;
 	}
 	
@@ -300,7 +314,8 @@ recv_copy_group(
 /*============*/
 	log_group_t*	up_to_date_group,	/* in: the most up-to-date log
 						group */
-	log_group_t*	group,			/* in: copy to this log group */
+	log_group_t*	group,			/* in: copy to this log
+						group */
 	dulint		recovered_lsn)		/* in: recovery succeeded up
 						to this lsn */
 {
@@ -366,7 +381,8 @@ recv_synchronize_groups(
 	/* Read the last recovered log block to the recovery system buffer:
 	the block is always incomplete */
 
-	start_lsn = ut_dulint_align_down(recovered_lsn, OS_FILE_LOG_BLOCK_SIZE);
+	start_lsn = ut_dulint_align_down(recovered_lsn,
+						OS_FILE_LOG_BLOCK_SIZE);
 	end_lsn = ut_dulint_align_up(recovered_lsn, OS_FILE_LOG_BLOCK_SIZE);
 
 	ut_a(ut_dulint_cmp(start_lsn, end_lsn) != 0);
@@ -422,7 +438,7 @@ recv_check_cp_is_consistent(
 
 	fold = ut_fold_binary(buf, LOG_CHECKPOINT_CHECKSUM_1);
 
-	if ((fold & 0xFFFFFFFF) != mach_read_from_4(buf
+	if ((fold & 0xFFFFFFFFUL) != mach_read_from_4(buf
 				+ LOG_CHECKPOINT_CHECKSUM_1)) {		
 		return(FALSE);
 	}
@@ -430,7 +446,7 @@ recv_check_cp_is_consistent(
 	fold = ut_fold_binary(buf + LOG_CHECKPOINT_LSN,
 			LOG_CHECKPOINT_CHECKSUM_2 - LOG_CHECKPOINT_LSN);
 
-	if ((fold & 0xFFFFFFFF) != mach_read_from_4(buf
+	if ((fold & 0xFFFFFFFFUL) != mach_read_from_4(buf
 					+ LOG_CHECKPOINT_CHECKSUM_2)) {
 		return(FALSE);
 	}
@@ -474,8 +490,9 @@ recv_find_max_checkpoint(
 				if (log_debug_writes) {
 					fprintf(stderr, 
 	    "InnoDB: Checkpoint in group %lu at %lu invalid, %lu\n",
-						group->id, field,
-                                 mach_read_from_4(buf
+						(ulong) group->id,
+						(ulong) field,
+                                 (ulong) mach_read_from_4(buf
 					      + LOG_CHECKPOINT_CHECKSUM_1));
 
 				}
@@ -495,7 +512,8 @@ recv_find_max_checkpoint(
 			if (log_debug_writes) {
 				fprintf(stderr, 
 			"InnoDB: Checkpoint number %lu found in group %lu\n",
-				ut_dulint_get_low(checkpoint_no), group->id);
+				(ulong) ut_dulint_get_low(checkpoint_no),
+				(ulong) group->id);
 			}
 				
 			if (ut_dulint_cmp(checkpoint_no, max_no) >= 0) {
@@ -537,8 +555,8 @@ recv_read_cp_info_for_backup(
 	byte*	hdr,	/* in: buffer containing the log group header */
 	dulint*	lsn,	/* out: checkpoint lsn */
 	ulint*	offset,	/* out: checkpoint offset in the log group */
-	ulint*	fsp_limit,/* out: fsp limit, 1000000000 if the database
-			is running with < version 3.23.50 of InnoDB */
+	ulint*	fsp_limit,/* out: fsp limit of space 0, 1000000000 if the
+			database is running with < version 3.23.50 of InnoDB */
 	dulint*	cp_no,	/* out: checkpoint number */
 	dulint*	first_header_lsn)
 			/* out: lsn of of the start of the first log file */
@@ -683,7 +701,7 @@ recv_scan_log_seg_for_backup(
 						< *scanned_checkpoint_no
 		    && *scanned_checkpoint_no
 			- log_block_get_checkpoint_no(log_block)
-							> 0x80000000) {
+							> 0x80000000UL) {
 
 			/* Garbage from a log buffer flush which was made
 			before the most recent database recovery */
@@ -715,7 +733,7 @@ recv_scan_log_seg_for_backup(
 
 /***********************************************************************
 Tries to parse a single log record body and also applies it to a page if
-specified. */
+specified. File ops are parsed, but not applied in this function. */
 static
 byte*
 recv_parse_or_apply_log_rec_body(
@@ -792,8 +810,14 @@ recv_parse_or_apply_log_rec_body(
 	} else if (type == MLOG_INIT_FILE_PAGE) {
 		new_ptr = fsp_parse_init_file_page(ptr, end_ptr, page);
 
-	} else if (type <= MLOG_WRITE_STRING) {
+	} else if (type == MLOG_WRITE_STRING) {
 		new_ptr = mlog_parse_string(ptr, end_ptr, page);
+
+	} else if (type == MLOG_FILE_CREATE
+		   || type == MLOG_FILE_RENAME
+		   || type == MLOG_FILE_DELETE) {
+		new_ptr = fil_op_log_parse_or_replay(ptr, end_ptr, type, FALSE,
+							ULINT_UNDEFINED);
 	} else {
 		new_ptr = NULL;
 		 
@@ -880,9 +904,14 @@ recv_add_to_hash_table(
 	recv_data_t*	recv_data;
 	recv_data_t**	prev_field;
 	recv_addr_t*	recv_addr;
-
-	ut_a(space == 0); /* For debugging; TODO: remove this */
 	
+	if (fil_tablespace_deleted_or_being_deleted_in_mem(space, -1)) {
+		/* The tablespace does not exist any more: do not store the
+		log record */
+
+		return;
+	}
+
 	len = rec_end - body;
 
 	recv = mem_heap_alloc(recv_sys->heap, sizeof(recv_t));
@@ -905,6 +934,9 @@ recv_add_to_hash_table(
 		HASH_INSERT(recv_addr_t, addr_hash, recv_sys->addr_hash,
 					recv_fold(space, page_no), recv_addr);
 		recv_sys->n_addrs++;
+
+		/* printf("Inserting log rec for space %lu, page %lu\n",
+					  space, page_no); */
 	}
 
 	UT_LIST_ADD_LAST(rec_list, recv_addr->rec_list, recv);
@@ -1021,6 +1053,8 @@ recv_recover_page(
 		return;
 	}
 
+	/* printf("Recovering space %lu, page %lu\n", space, page_no); */
+
 	recv_addr->state = RECV_BEING_PROCESSED;
 	
 	mutex_exit(&(recv_sys->mutex));
@@ -1116,8 +1150,9 @@ recv_recover_page(
 			if (log_debug_writes) {
 				fprintf(stderr, 
      "InnoDB: Applying log rec type %lu len %lu to space %lu page no %lu\n",
-			(ulint)recv->type, recv->len, recv_addr->space,
-				recv_addr->page_no);
+					(ulong) recv->type, (ulong) recv->len,
+					(ulong) recv_addr->space,
+					(ulong) recv_addr->page_no);
 			}
 					
 			recv_parse_or_apply_log_rec_body(recv->type, buf,
@@ -1308,8 +1343,7 @@ loop:
 		             / hash_get_n_cells(recv_sys->addr_hash)) {
 
 		        fprintf(stderr, "%lu ",
-			  (i * 100) / hash_get_n_cells(recv_sys->addr_hash));
-
+				(ulong) ((i * 100) / hash_get_n_cells(recv_sys->addr_hash)));
 		}
 	}
 
@@ -1363,130 +1397,132 @@ loop:
 }
 
 #ifdef UNIV_HOTBACKUP
+/* This page is allocated from the buffer pool and used in the function
+below */
+page_t* recv_backup_application_page	= NULL;
+
 /***********************************************************************
 Applies log records in the hash table to a backup. */
 
 void
-recv_apply_log_recs_for_backup(
-/*===========================*/
-	ulint	n_data_files,	/* in: number of data files */
-	char**	data_files,	/* in: array containing the paths to the
-				data files */
-	ulint*	file_sizes)	/* in: sizes of the data files in database
-				pages */
+recv_apply_log_recs_for_backup(void)
+/*================================*/
 {
 	recv_addr_t*	recv_addr;
-	os_file_t	data_file;
-	ulint		n_pages_total	= 0;
-	ulint		nth_file	= 0;
-	ulint		nth_page_in_file= 0;
+	ulint		n_hash_cells;
 	byte*		page;
+	ulint		actual_size;
 	ibool		success;
+	ulint		error;
 	ulint		i;
 
 	recv_sys->apply_log_recs = TRUE;
 	recv_sys->apply_batch_on = TRUE;
 
-	page = buf_pool->frame_zero;
-	
-	for (i = 0; i < n_data_files; i++) {
-		n_pages_total += file_sizes[i];
+	if (recv_backup_application_page == NULL) {
+		recv_backup_application_page = buf_frame_alloc();
 	}
 
-	if (recv_max_parsed_page_no >= n_pages_total) {
-		printf(
-"InnoDB: Error: tablespace size %lu pages, but a log record on page %lu!\n"
-"InnoDB: Are you sure you have specified all the ibdata files right in\n"
-"InnoDB: the my.cnf file you gave as the argument to ibbackup --restore?\n",
-			n_pages_total, recv_max_parsed_page_no);
-	}
+	page = recv_backup_application_page;
 
 	printf( 
 "InnoDB: Starting an apply batch of log records to the database...\n"
 "InnoDB: Progress in percents: ");
 	
-	for (i = 0; i < n_pages_total; i++) {
+	n_hash_cells = hash_get_n_cells(recv_sys->addr_hash);
 
-		if (i == 0 || nth_page_in_file == file_sizes[nth_file]) {
-			if (i != 0) {
-				nth_file++;
-				nth_page_in_file = 0;
-				os_file_flush(data_file);
-				os_file_close(data_file);
-			}
+	for (i = 0; i < n_hash_cells; i++) {
+	        /* The address hash table is externally chained */
+		recv_addr = hash_get_nth_cell(recv_sys->addr_hash, i)->node;
 
-			data_file = os_file_create_simple(data_files[nth_file],
-							OS_FILE_OPEN,
-							OS_FILE_READ_WRITE,
-							&success);
-			if (!success) {
+		while (recv_addr != NULL) {
+
+			if (!fil_tablespace_exists_in_mem(recv_addr->space)) {
+/*
 				printf(
-"InnoDB: Error: cannot open %lu'th data file\n", nth_file);
+"InnoDB: Warning: cannot apply log record to tablespace %lu page %lu,\n"
+"InnoDB: because tablespace with that id does not exist.\n",
+				      recv_addr->space, recv_addr->page_no);
+*/
+				recv_addr->state = RECV_PROCESSED;
 
-				exit(1);
+				ut_a(recv_sys->n_addrs);
+				recv_sys->n_addrs--;
+
+				goto skip_this_recv_addr;
 			}
-		}
-		
-		recv_addr = recv_get_fil_addr_struct(0, i);
-
-		if (recv_addr != NULL) {
-			success = os_file_read(data_file, page,
-			  (nth_page_in_file << UNIV_PAGE_SIZE_SHIFT)
-				& 0xFFFFFFFF,
-			  nth_page_in_file >> (32 - UNIV_PAGE_SIZE_SHIFT), 
-				UNIV_PAGE_SIZE);
+
+			/* We simulate a page read made by the buffer pool, to
+			make sure the recovery apparatus works ok, for
+			example, the buf_frame_align() function. We must init
+			the block corresponding to buf_pool->frame_zero
+			(== page). */
+
+			buf_page_init_for_backup_restore(recv_addr->space,
+						recv_addr->page_no,
+						buf_block_align(page));
+
+			/* Extend the tablespace's last file if the page_no
+			does not fall inside its bounds; we assume the last
+			file is auto-extending, and ibbackup copied the file
+			when it still was smaller */
+
+			success = fil_extend_space_to_desired_size(
+						&actual_size,
+						recv_addr->space,
+						recv_addr->page_no + 1);
 			if (!success) {
 				printf(
-"InnoDB: Error: cannot read page no %lu from %lu'th data file\n",
-				nth_page_in_file, nth_file);
+"InnoDB: Fatal error: cannot extend tablespace %lu to hold %lu pages\n",
+				     recv_addr->space, recv_addr->page_no);
+				     
+				exit(1);
+			}
 
+			/* Read the page from the tablespace file using the
+			fil0fil.c routines */
+
+			error = fil_io(OS_FILE_READ, TRUE, recv_addr->space,
+					recv_addr->page_no, 0, UNIV_PAGE_SIZE,
+					page, NULL);
+			if (error != DB_SUCCESS) {
+				printf(
+"InnoDB: Fatal error: cannot read from tablespace %lu page number %lu\n",
+				     recv_addr->space, recv_addr->page_no);
+				     
 				exit(1);
 			}
-				
-			/* We simulate a page read made by the buffer pool,
-			to make sure recovery works ok. We must init the
-			block corresponding to buf_pool->frame_zero
-			(== page) */
 
-			buf_page_init_for_backup_restore(0, i,
-						buf_block_align(page));
+			/* Apply the log records to this page */
+			recv_recover_page(TRUE, FALSE, page, recv_addr->space,
+						       recv_addr->page_no);
 
-			recv_recover_page(TRUE, FALSE, page, 0, i);
+			/* Write the page back to the tablespace file using the
+			fil0fil.c routines */
 
 			buf_flush_init_for_writing(page,
 				mach_read_from_8(page + FIL_PAGE_LSN),
-				0, i);
-
-			success = os_file_write(data_files[nth_file],
-			  data_file, page,
-			  (nth_page_in_file << UNIV_PAGE_SIZE_SHIFT)
-				& 0xFFFFFFFF,
-			  nth_page_in_file >> (32 - UNIV_PAGE_SIZE_SHIFT), 
-				UNIV_PAGE_SIZE);
-			if (!success) {
-				printf(
-"InnoDB: Error: cannot write page no %lu to %lu'th data file\n",
-				nth_page_in_file, nth_file);
+				recv_addr->space, recv_addr->page_no);
 
-				exit(1);
-			}
+			error = fil_io(OS_FILE_WRITE, TRUE, recv_addr->space,
+					recv_addr->page_no, 0, UNIV_PAGE_SIZE,
+					page, NULL);
+skip_this_recv_addr:
+			recv_addr = HASH_GET_NEXT(addr_hash, recv_addr);
 		}
 
-		if ((100 * i) / n_pages_total
-				!= (100 * (i + 1)) / n_pages_total) {
-			printf("%lu ", (100 * i) / n_pages_total);
+		if ((100 * i) / n_hash_cells
+				!= (100 * (i + 1)) / n_hash_cells) {
+			printf("%lu ", (100 * i) / n_hash_cells);
 			fflush(stdout);
 		}
-
-		nth_page_in_file++;
 	}
-	
-	os_file_flush(data_file);
-	os_file_close(data_file);
 
 	recv_sys_empty_hash();
 }
+#endif
 
+#ifdef notdefined
 /***********************************************************************
 In the debug version, updates the replica of a file page, based on a log
 record. */
@@ -1720,7 +1756,7 @@ recv_parse_log_rec(
 	if (*ptr == MLOG_DUMMY_RECORD) {
 		*type = *ptr;
 
-		*space = 1000; /* For debugging */
+		*space = ULINT_UNDEFINED - 1; /* For debugging */
 
 		return(1);
 	}
@@ -1732,9 +1768,9 @@ recv_parse_log_rec(
 	        return(0);
 	}
 
-	/* Check that space id and page_no are sensible */
+	/* Check that page_no is sensible */
 
-	if (*space != 0 || *page_no > 0x8FFFFFFF) {
+	if (*page_no > 0x8FFFFFFFUL) {
 
 		recv_sys->found_corrupt_log = TRUE;
 
@@ -1823,19 +1859,19 @@ recv_report_corrupt_log(
 "InnoDB: ############### CORRUPT LOG RECORD FOUND\n"
 "InnoDB: Log record type %lu, space id %lu, page number %lu\n"
 "InnoDB: Log parsing proceeded successfully up to %lu %lu\n",
-	(ulint)type, space, page_no,
-	ut_dulint_get_high(recv_sys->recovered_lsn),
-	ut_dulint_get_low(recv_sys->recovered_lsn));
+	(ulong) type, (ulong) space, (ulong) page_no,
+	(ulong) ut_dulint_get_high(recv_sys->recovered_lsn),
+	(ulong) ut_dulint_get_low(recv_sys->recovered_lsn));
 
 	err_buf = ut_malloc(1000000);
 
 	fprintf(stderr,
 "InnoDB: Previous log record type %lu, is multi %lu\n"
 "InnoDB: Recv offset %lu, prev %lu\n",
-		recv_previous_parsed_rec_type,
-		recv_previous_parsed_rec_is_multi,
-		(ulint)(ptr - recv_sys->buf),
-		recv_previous_parsed_rec_offset);
+		(ulong) recv_previous_parsed_rec_type,
+		(ulong) recv_previous_parsed_rec_is_multi,
+		(ulong) (ptr - recv_sys->buf),
+		(ulong) recv_previous_parsed_rec_offset);
 
 	if ((ulint)(ptr - recv_sys->buf + 100)
 					> recv_previous_parsed_rec_offset
@@ -1910,12 +1946,16 @@ loop:
 	single_rec = (ulint)*ptr & MLOG_SINGLE_REC_FLAG;
 
 	if (single_rec || *ptr == MLOG_DUMMY_RECORD) {
-		/* The mtr only modified a single page */
+		/* The mtr only modified a single page, or this is a file op */
 
 		old_lsn = recv_sys->recovered_lsn;
 
+		/* Try to parse a log record, fetching its type, space id,
+		page no, and a pointer to the body of the log record */
+
 		len = recv_parse_log_rec(ptr, end_ptr, &type, &space,
 							&page_no, &body);
+
 		if (len == 0 || recv_sys->found_corrupt_log) {
 			if (recv_sys->found_corrupt_log) {
 
@@ -1947,12 +1987,36 @@ loop:
 		if (log_debug_writes) {
 			fprintf(stderr, 
 "InnoDB: Parsed a single log rec type %lu len %lu space %lu page no %lu\n",
-			(ulint)type, len, space, page_no);
+				(ulong) type, (ulong) len, (ulong) space,
+				(ulong) page_no);
 		}
 
 		if (type == MLOG_DUMMY_RECORD) {
 			/* Do nothing */
 		
+		} else if (store_to_hash && (type == MLOG_FILE_CREATE
+					     || type == MLOG_FILE_RENAME
+					     || type == MLOG_FILE_DELETE)) {
+#ifdef UNIV_HOTBACKUP
+			if (recv_replay_file_ops) {
+
+				/* In ibbackup --apply-log, replay an .ibd file
+				operation, if possible; note that
+				fil_path_to_mysql_datadir is set in ibbackup to
+				point to the datadir we should use there */
+			
+				if (NULL == fil_op_log_parse_or_replay(body,
+						end_ptr, type, TRUE, space)) {
+					fprintf(stderr,
+"InnoDB: Error: file op log record of type %lu space %lu not complete in\n"
+"InnoDB: the replay phase. Path %s\n", (ulint)type, space, (char*)(body + 2));
+
+					ut_a(0);
+				}
+			}
+#endif
+			/* In normal mysqld crash recovery we do not try to
+			replay file operations */
 		} else if (store_to_hash) {
 			recv_add_to_hash_table(type, space, page_no, body,
 						ptr + len, old_lsn,
@@ -2010,7 +2074,8 @@ loop:
 			if (log_debug_writes) {
 				fprintf(stderr, 
 "InnoDB: Parsed a multi log rec type %lu len %lu space %lu page no %lu\n",
-				(ulint)type, len, space, page_no);
+				(ulong) type, (ulong) len, (ulong) space,
+				(ulong) page_no);
 			}
 		
 			total_len += len;
@@ -2240,10 +2305,11 @@ recv_scan_log_recs(
 				fprintf(stderr,
 "InnoDB: Log block no %lu at lsn %lu %lu has\n"
 "InnoDB: ok header, but checksum field contains %lu, should be %lu\n",
-				no, ut_dulint_get_high(scanned_lsn),
-				ut_dulint_get_low(scanned_lsn),
-				log_block_get_checksum(log_block),
-				log_block_calc_checksum(log_block));
+				(ulong) no,
+				(ulong) ut_dulint_get_high(scanned_lsn),
+				(ulong) ut_dulint_get_low(scanned_lsn),
+				(ulong) log_block_get_checksum(log_block),
+				(ulong) log_block_calc_checksum(log_block));
 			}
 
 			/* Garbage or an incompletely written log block */
@@ -2276,7 +2342,7 @@ recv_scan_log_recs(
 		       < recv_sys->scanned_checkpoint_no)
 		    && (recv_sys->scanned_checkpoint_no
 			- log_block_get_checkpoint_no(log_block)
-			> 0x80000000)) {
+			> 0x80000000UL)) {
 
 			/* Garbage from a log buffer flush which was made
 			before the most recent database recovery */
@@ -2309,7 +2375,8 @@ recv_scan_log_recs(
 		if (ut_dulint_cmp(scanned_lsn, recv_sys->scanned_lsn) > 0) {
 
 			/* We were able to find more log data: add it to the
-			parsing buffer if parse_start_lsn is already non-zero */
+			parsing buffer if parse_start_lsn is already
+			non-zero */
 
 			if (recv_sys->len + 4 * OS_FILE_LOG_BLOCK_SIZE
 						>= RECV_PARSING_BUF_SIZE) {
@@ -2347,8 +2414,8 @@ recv_scan_log_recs(
 
 			fprintf(stderr, 
 "InnoDB: Doing recovery: scanned up to log sequence number %lu %lu\n",
-				ut_dulint_get_high(*group_scanned_lsn),
-				ut_dulint_get_low(*group_scanned_lsn));
+				(ulong) ut_dulint_get_high(*group_scanned_lsn),
+				(ulong) ut_dulint_get_low(*group_scanned_lsn));
 		}
 	}
 
@@ -2407,8 +2474,8 @@ recv_group_scan_log_recs(
 						group, start_lsn, end_lsn);
 
 		finished = recv_scan_log_recs(TRUE,
-				buf_pool_get_curr_size()
-				- RECV_POOL_N_FREE_BLOCKS * UNIV_PAGE_SIZE,
+                                (buf_pool->n_frames
+                                - recv_n_pool_free_frames) * UNIV_PAGE_SIZE,
 				TRUE, log_sys->buf,
 				RECV_SCAN_SIZE, start_lsn,
 				contiguous_lsn, group_scanned_lsn);
@@ -2418,9 +2485,9 @@ recv_group_scan_log_recs(
 	if (log_debug_writes) {
 		fprintf(stderr,
 	"InnoDB: Scanned group %lu up to log sequence number %lu %lu\n",
-				group->id,
-				ut_dulint_get_high(*group_scanned_lsn),
-				ut_dulint_get_low(*group_scanned_lsn));
+				(ulong) group->id,
+				(ulong) ut_dulint_get_high(*group_scanned_lsn),
+				(ulong) ut_dulint_get_low(*group_scanned_lsn));
 	}
 }
 
@@ -2458,7 +2525,6 @@ recv_recovery_from_checkpoint_start(
 			|| (ut_dulint_cmp(limit_lsn, ut_dulint_max) == 0));
 	
 	if (type == LOG_CHECKPOINT) {
-
 		recv_sys_create();
 		recv_sys_init(FALSE, buf_pool_get_curr_size());
 	}
@@ -2472,8 +2538,6 @@ recv_recovery_from_checkpoint_start(
 		return(DB_SUCCESS);
 	}
 
-	sync_order_checks_on = TRUE;
-
 	recv_recovery_on = TRUE;
 
 	recv_sys->limit_lsn = limit_lsn;
@@ -2546,25 +2610,72 @@ recv_recovery_from_checkpoint_start(
 		recv_sys->scanned_checkpoint_no = 0;
 		recv_sys->recovered_lsn = checkpoint_lsn;
 
-		/* NOTE: we always do recovery at startup, but only if
+		srv_start_lsn = checkpoint_lsn;
+
+		/* NOTE: we always do a 'recovery' at startup, but only if
 		there is something wrong we will print a message to the
 		user about recovery: */
 		
 		if (ut_dulint_cmp(checkpoint_lsn, max_flushed_lsn) != 0
 	    	   || ut_dulint_cmp(checkpoint_lsn, min_flushed_lsn) != 0) {
 
+			if (ut_dulint_cmp(checkpoint_lsn, max_flushed_lsn)
+								< 0) {
+				fprintf(stderr,
+"InnoDB: ##########################################################\n"
+"InnoDB:                          WARNING!\n"
+"InnoDB: The log sequence number in ibdata files is higher\n"
+"InnoDB: than the log sequence number in the ib_logfiles! Are you sure\n"
+"InnoDB: you are using the right ib_logfiles to start up the database?\n"
+"InnoDB: Log sequence number in ib_logfiles is %lu %lu, log\n"
+"InnoDB: sequence numbers stamped to ibdata file headers are between\n"
+"InnoDB: %lu %lu and %lu %lu.\n"
+"InnoDB: ##########################################################\n",
+				(ulong) ut_dulint_get_high(checkpoint_lsn),
+				(ulong) ut_dulint_get_low(checkpoint_lsn),
+				(ulong) ut_dulint_get_high(min_flushed_lsn),
+				(ulong) ut_dulint_get_low(min_flushed_lsn),
+				(ulong) ut_dulint_get_high(max_flushed_lsn),
+				(ulong) ut_dulint_get_low(max_flushed_lsn));
+			}
+
 	    	   	recv_needed_recovery = TRUE;
 	    	   
 			ut_print_timestamp(stderr);
 
 	    		fprintf(stderr,
-			  "  InnoDB: Database was not shut down normally.\n"
-	    		  "InnoDB: Starting recovery from log files...\n");
+"  InnoDB: Database was not shut down normally!\n"
+"InnoDB: Starting crash recovery.\n");
+
+			fprintf(stderr,
+"InnoDB: Reading tablespace information from the .ibd files...\n");
+
+			fil_load_single_table_tablespaces();
+
+			/* If we are using the doublewrite method, we will
+			check if there are half-written pages in data files,
+			and restore them from the doublewrite buffer if
+			possible */
+		
+			if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) {
+		
+				fprintf(stderr,
+"InnoDB: Restoring possible half-written data pages from the doublewrite\n"
+"InnoDB: buffer...\n");
+				trx_sys_doublewrite_init_or_restore_pages(
+									TRUE);
+			}
+
+			ut_print_timestamp(stderr);
+
 			fprintf(stderr, 
-			  "InnoDB: Starting log scan based on checkpoint at\n"
-			  "InnoDB: log sequence number %lu %lu\n",
-		 			ut_dulint_get_high(checkpoint_lsn),
-					ut_dulint_get_low(checkpoint_lsn));
+"  InnoDB: Starting log scan based on checkpoint at\n"
+"InnoDB: log sequence number %lu %lu.\n",
+		 			(ulong) ut_dulint_get_high(checkpoint_lsn),
+					(ulong) ut_dulint_get_low(checkpoint_lsn));
+		} else {
+			/* Init the doublewrite buffer memory structure */
+			trx_sys_doublewrite_init_or_restore_pages(FALSE);
 		}
 	}
 
@@ -2645,10 +2756,10 @@ recv_recovery_from_checkpoint_start(
 "  InnoDB: ERROR: We were only able to scan the log up to\n"
 "InnoDB: %lu %lu, but a checkpoint was at %lu %lu.\n"
 "InnoDB: It is possible that the database is now corrupt!\n",
-			 ut_dulint_get_high(group_scanned_lsn),
-			 ut_dulint_get_low(group_scanned_lsn),
-			 ut_dulint_get_high(checkpoint_lsn),
-			 ut_dulint_get_low(checkpoint_lsn));
+			 (ulong) ut_dulint_get_high(group_scanned_lsn),
+			 (ulong) ut_dulint_get_low(group_scanned_lsn),
+			 (ulong) ut_dulint_get_high(checkpoint_lsn),
+			 (ulong) ut_dulint_get_low(checkpoint_lsn));
 	}
 
 	if (ut_dulint_cmp(group_scanned_lsn, recv_max_page_lsn) < 0) {
@@ -2657,10 +2768,10 @@ recv_recovery_from_checkpoint_start(
 "  InnoDB: ERROR: We were only able to scan the log up to %lu %lu\n"
 "InnoDB: but a database page a had an lsn %lu %lu. It is possible that the\n"
 "InnoDB: database is now corrupt!\n",
-			 ut_dulint_get_high(group_scanned_lsn),
-			 ut_dulint_get_low(group_scanned_lsn),
-			 ut_dulint_get_high(recv_max_page_lsn),
-			 ut_dulint_get_low(recv_max_page_lsn));
+			 (ulong) ut_dulint_get_high(group_scanned_lsn),
+			 (ulong) ut_dulint_get_low(group_scanned_lsn),
+			 (ulong) ut_dulint_get_high(recv_max_page_lsn),
+			 (ulong) ut_dulint_get_low(recv_max_page_lsn));
 	}
 
 	if (ut_dulint_cmp(recv_sys->recovered_lsn, checkpoint_lsn) < 0) {
@@ -2686,6 +2797,21 @@ recv_recovery_from_checkpoint_start(
 	log_sys->archived_lsn = archived_lsn;
 	
 	recv_synchronize_groups(up_to_date_group);
+
+	if (!recv_needed_recovery) {
+		if (ut_dulint_cmp(checkpoint_lsn, recv_sys->recovered_lsn)
+								!= 0) {
+			fprintf(stderr,
+"InnoDB: Warning: we did not need to do crash recovery, but log scan\n"
+"InnoDB: progressed past the checkpoint lsn %lu %lu up to lsn %lu %lu\n",
+			 (ulong) ut_dulint_get_high(checkpoint_lsn),
+			 (ulong) ut_dulint_get_low(checkpoint_lsn),
+			 (ulong) ut_dulint_get_high(recv_sys->recovered_lsn),
+			 (ulong) ut_dulint_get_low(recv_sys->recovered_lsn));
+		}
+	} else {
+		srv_start_lsn = recv_sys->recovered_lsn;
+	}
 	
 	log_sys->lsn = recv_sys->recovered_lsn;
 
@@ -2714,8 +2840,6 @@ recv_recovery_from_checkpoint_start(
 	
 	mutex_exit(&(log_sys->mutex));
 
-	sync_order_checks_on = FALSE;
-
 	recv_lsn_checks_on = TRUE;
 
 	/* The database is now ready to start almost normal processing of user
@@ -2857,9 +2981,11 @@ recv_reset_log_files_for_backup(
 	
 	buf = ut_malloc(LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
 	
+	memset(buf, LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE, '\0');
+
 	for (i = 0; i < n_log_files; i++) {
 
-		sprintf(name, "%sib_logfile%lu", log_dir, i);
+		sprintf(name, "%sib_logfile%lu", log_dir, (ulong) i);
 
 		log_file = os_file_create_simple(name, OS_FILE_CREATE,
 						OS_FILE_READ_WRITE, &success);
@@ -2871,17 +2997,18 @@ recv_reset_log_files_for_backup(
 		}
 
 		printf(
-"Setting log file size to %lu %lu\n", ut_get_high32(log_file_size),
-						log_file_size & 0xFFFFFFFF);
+"Setting log file size to %lu %lu\n", (ulong) ut_get_high32(log_file_size),
+				      (ulong) (log_file_size & 0xFFFFFFFFUL));
 
 		success = os_file_set_size(name, log_file,
-					log_file_size & 0xFFFFFFFF,
+					log_file_size & 0xFFFFFFFFUL,
 					ut_get_high32(log_file_size));
 
 		if (!success) {
 			printf(
-"InnoDB: Cannot set %s size to %lu %lu\n", name, ut_get_high32(log_file_size),
-						log_file_size & 0xFFFFFFFF);
+"InnoDB: Cannot set %s size to %lu %lu\n", name,
+					  (ulong) ut_get_high32(log_file_size),
+					  (ulong) (log_file_size & 0xFFFFFFFFUL));
 			exit(1);
 		}
 
@@ -2896,7 +3023,7 @@ recv_reset_log_files_for_backup(
 	log_block_init_in_old_format(buf + LOG_FILE_HDR_SIZE, lsn);
 	log_block_set_first_rec_group(buf + LOG_FILE_HDR_SIZE,
 							LOG_BLOCK_HDR_SIZE);
-	sprintf(name, "%sib_logfile%lu", log_dir, 0);
+	sprintf(name, "%sib_logfile%lu", log_dir, (ulong) 0);
 
 	log_file = os_file_create_simple(name, OS_FILE_OPEN,
 						OS_FILE_READ_WRITE, &success);
@@ -2938,6 +3065,8 @@ log_group_recover_from_archive_file(
 	int	input_char;
 	char	name[10000];
 
+	ut_a(0);
+
 try_open_again:	
 	buf = log_sys->buf;
 
@@ -2945,13 +3074,10 @@ try_open_again:
 	
 	log_archived_file_name_gen(name, group->id, group->archived_file_no);
 
-	fil_reserve_right_to_open();
-
 	file_handle = os_file_create(name, OS_FILE_OPEN,
 					OS_FILE_LOG, OS_FILE_AIO, &ret);
 
 	if (ret == FALSE) {
-		fil_release_right_to_open();
 ask_again:
 		fprintf(stderr, 
 	"InnoDB: Do you want to copy additional archived log files\n"
@@ -2992,12 +3118,10 @@ ask_again:
 
 	ut_a(ret);
 	
-	fil_release_right_to_open();
-	
 	/* Add the archive file as a node to the space */
 		
 	fil_node_create(name, 1 + file_size / UNIV_PAGE_SIZE,
-						group->archive_space_id);
+					    group->archive_space_id, FALSE);
 	ut_a(RECV_SCAN_SIZE >= LOG_FILE_HDR_SIZE);
 
 	/* Read the archive file header */
@@ -3063,9 +3187,9 @@ ask_again:
 		if (log_debug_writes) {
 			fprintf(stderr, 
 "InnoDB: Archive read starting at lsn %lu %lu, len %lu from file %s\n",
-					ut_dulint_get_high(start_lsn),
-					ut_dulint_get_low(start_lsn),
-					len, name);
+					(ulong) ut_dulint_get_high(start_lsn),
+					(ulong) ut_dulint_get_low(start_lsn),
+					(ulong) len, name);
 		}
 
 		fil_io(OS_FILE_READ | OS_FILE_LOG, TRUE,
@@ -3073,8 +3197,8 @@ ask_again:
 			read_offset % UNIV_PAGE_SIZE, len, buf, NULL);
 
 		ret = recv_scan_log_recs(TRUE,
-				buf_pool_get_curr_size() -
-				RECV_POOL_N_FREE_BLOCKS * UNIV_PAGE_SIZE,
+                                (buf_pool->n_frames -
+                                recv_n_pool_free_frames) * UNIV_PAGE_SIZE,
 				TRUE, buf, len, start_lsn,
 				&dummy_lsn, &scanned_lsn);
 
@@ -3120,10 +3244,10 @@ recv_recovery_from_archive_start(
 	ibool		ret;
 	ulint		err;
 	
+	ut_a(0);
+
 	recv_sys_create();
 	recv_sys_init(FALSE, buf_pool_get_curr_size());
-
-	sync_order_checks_on = TRUE;
 	
 	recv_recovery_on = TRUE;
 	recv_recovery_from_backup_on = TRUE;
@@ -3146,7 +3270,7 @@ recv_recovery_from_archive_start(
 	if (!group) {
 		fprintf(stderr,
 		"InnoDB: There is no log group defined with id %lu!\n",
-								group_id);
+							   (ulong) group_id);
 		return(DB_ERROR);
 	}
 
@@ -3210,8 +3334,6 @@ recv_recovery_from_archive_start(
 
 	mutex_exit(&(log_sys->mutex));
 
-	sync_order_checks_on = FALSE;
-
 	return(DB_SUCCESS);
 }
 
@@ -3222,6 +3344,8 @@ void
 recv_recovery_from_archive_finish(void)
 /*===================================*/
 {
+	ut_a(0);
+
 	recv_recovery_from_checkpoint_finish();
 
 	recv_recovery_from_backup_on = FALSE;
diff --git a/innobase/mach/mach0data.c b/innobase/mach/mach0data.c
index 336ce106a75..ff7265b34f4 100644
--- a/innobase/mach/mach0data.c
+++ b/innobase/mach/mach0data.c
@@ -36,37 +36,37 @@ mach_parse_compressed(
 
 	flag = mach_read_from_1(ptr);
 
-	if (flag < 0x80) {
+	if (flag < 0x80UL) {
 		*val = flag;
 		return(ptr + 1);
 		
-	} else if (flag < 0xC0) {
+	} else if (flag < 0xC0UL) {
 		if (end_ptr < ptr + 2) {
 			return(NULL);
 		}
 			
-		*val = mach_read_from_2(ptr) & 0x7FFF;
+		*val = mach_read_from_2(ptr) & 0x7FFFUL;
 
 		return(ptr + 2);
 		
-	} else if (flag < 0xE0) {
+	} else if (flag < 0xE0UL) {
 		if (end_ptr < ptr + 3) {
 			return(NULL);
 		}
 			
-		*val = mach_read_from_3(ptr) & 0x3FFFFF;
+		*val = mach_read_from_3(ptr) & 0x3FFFFFUL;
 
 		return(ptr + 3);
-	} else if (flag < 0xF0) {
+	} else if (flag < 0xF0UL) {
 		if (end_ptr < ptr + 4) {
 			return(NULL);
 		}
 			
-		*val = mach_read_from_4(ptr) & 0x1FFFFFFF;
+		*val = mach_read_from_4(ptr) & 0x1FFFFFFFUL;
 
 		return(ptr + 4);
 	} else {
-		ut_ad(flag == 0xF0);
+		ut_ad(flag == 0xF0UL);
 
 		if (end_ptr < ptr + 5) {
 			return(NULL);
diff --git a/innobase/mem/mem0dbg.c b/innobase/mem/mem0dbg.c
index 07f348ab82f..1007f8413b4 100644
--- a/innobase/mem/mem0dbg.c
+++ b/innobase/mem/mem0dbg.c
@@ -338,7 +338,7 @@ mem_hash_remove(
 	if (node == NULL) {
 		printf(
     	    "Memory heap or buffer freed in %s line %lu did not exist.\n",
-			file_name, line);
+			file_name, (ulong) line);
 		ut_error;
 	}
 
@@ -351,21 +351,23 @@ mem_hash_remove(
 	mem_heap_validate_or_print(node->heap, NULL, FALSE, &error, &size,
 								NULL, NULL);
 	if (error) {
-	   printf("Inconsistency in memory heap or buffer n:o %lu created\n",
-							node->nth_heap);
-	   printf("in %s line %lu and tried to free in %s line %lu.\n",
-	  			node->file_name, node->line, file_name, line);
+	        printf(
+"Inconsistency in memory heap or buffer n:o %lu created\n",
+							(ulong) node->nth_heap);
+		printf("in %s line %lu and tried to free in %s line %lu.\n",
+		       node->file_name, (ulong) node->line,
+		       file_name, (ulong) line);
 
-	   printf(
-	   "Hex dump of 400 bytes around memory heap first block start:\n");
+		printf(
+"Hex dump of 400 bytes around memory heap first block start:\n");
 
-	   ut_print_buf((byte*)(node->heap) - 200, 400);
+		ut_print_buf((byte*)(node->heap) - 200, 400);
 
-	   printf("\nDump of the mem heap:\n");
+		printf("\nDump of the mem heap:\n");
 
-	   mem_heap_validate_or_print(node->heap, NULL, TRUE, &error, &size,
-								NULL, NULL);
-	   ut_error;
+		mem_heap_validate_or_print(node->heap, NULL, TRUE, &error,
+							 &size, NULL, NULL);
+		ut_error;
 	}
 
 	/* Free the memory occupied by the node struct */
@@ -447,6 +449,9 @@ mem_heap_validate_or_print(
 		if ((block->type == MEM_HEAP_BUFFER)
 		    && (mem_block_get_len(block) > UNIV_PAGE_SIZE)) {
 
+			fprintf(stderr,
+"InnoDB: Error: mem block %lx length %lu > UNIV_PAGE_SIZE\n", (ulong) block,
+				(ulong) mem_block_get_len(block));
 		    	/* error */
 
 		    	return;
@@ -486,6 +491,12 @@ mem_heap_validate_or_print(
 			    mem_field_trailer_get_check(user_field)) {
 				/* error */
 
+				fprintf(stderr,
+"InnoDB: Error: block %lx mem field %lx len %lu\n"
+"InnoDB: header check field is %lx but trailer %lx\n", (ulint)block,
+				   (ulint)field, len, check_field,
+				   mem_field_trailer_get_check(user_field));
+
 			     	return;
 			}
 
@@ -505,6 +516,11 @@ mem_heap_validate_or_print(
 		if (field != (byte*)block + mem_block_get_free(block)) {
 			/* error */
 
+			fprintf(stderr,
+"InnoDB: Error: block %lx end of mem fields %lx\n"
+"InnoDB: but block free at %lx\n", (ulint)block, (ulint)field,
+			(ulint)((byte*)block + mem_block_get_free(block)));
+
 			return;
 		}
 
@@ -547,7 +563,8 @@ mem_heap_print(
 				&us_size, &phys_size, &n_blocks);
 	printf(
   "\nheap type: %lu; size: user size %lu; physical size %lu; blocks %lu.\n",
-			heap->type, us_size, phys_size, n_blocks);
+			(ulong) heap->type, (ulong) us_size,
+			(ulong) phys_size, (ulong) n_blocks);
 	ut_a(!error);
 }
 
@@ -583,6 +600,10 @@ mem_heap_validate(
 
 	mem_heap_validate_or_print(heap, NULL, FALSE, &error, &us_size,
 						&phys_size, &n_blocks);
+	if (error) {
+		mem_heap_print(heap);
+	}
+
 	ut_a(!error);
 
 	return(TRUE);
@@ -738,8 +759,8 @@ mem_analyze_corruption(
 			    if (*((ulint*)p) == MEM_BLOCK_MAGIC_N) {
 				fprintf(stderr,
 			"Mem block at - %lu, file %s, line %lu\n",
-				dist, p + sizeof(ulint),
-				*(ulint*)(p + 8 + sizeof(ulint)));
+				(ulong) dist, (p + sizeof(ulint)),
+				(ulong) (*(ulint*)(p + 8 + sizeof(ulint))));
 
 				break;
 			    }
@@ -747,8 +768,8 @@ mem_analyze_corruption(
 			    if (*((ulint*)p) == MEM_FREED_BLOCK_MAGIC_N) {
 				fprintf(stderr,
 			"Freed mem block at - %lu, file %s, line %lu\n",
-				dist, p + sizeof(ulint),
-				*(ulint*)(p + 8 + sizeof(ulint)));
+				(ulong) dist, (p + sizeof(ulint)),
+				(ulong) (*(ulint*)(p + 8 + sizeof(ulint))));
 
 				break;
 			    }
@@ -775,8 +796,8 @@ mem_analyze_corruption(
 			    if (*((ulint*)p) == MEM_BLOCK_MAGIC_N) {
 				fprintf(stderr,
 			"Mem block at + %lu, file %s, line %lu\n",
-				dist, p + sizeof(ulint),
-				*(ulint*)(p + 8 + sizeof(ulint)));
+				(ulong) dist, (p + sizeof(ulint)),
+				(ulong) (*(ulint*)(p + 8 + sizeof(ulint))));
 
 				break;
 			    }
@@ -784,8 +805,8 @@ mem_analyze_corruption(
 			    if (*((ulint*)p) == MEM_FREED_BLOCK_MAGIC_N) {
 				fprintf(stderr,
 			"Freed mem block at + %lu, file %s, line %lu\n",
-				dist, p + sizeof(ulint),
-				*(ulint*)(p + 8 + sizeof(ulint)));
+				(ulong) dist, (p + sizeof(ulint)),
+				(ulong) (*(ulint*)(p + 8 + sizeof(ulint))));
 
 				break;
 			    }
diff --git a/innobase/mem/mem0pool.c b/innobase/mem/mem0pool.c
index 9a5d16cd4a2..4f1ac2bcd7c 100644
--- a/innobase/mem/mem0pool.c
+++ b/innobase/mem/mem0pool.c
@@ -281,7 +281,8 @@ mem_pool_fill_free_list(
 	                fprintf(stderr,
 "  InnoDB: Error: mem pool free list %lu length is %lu\n"
 "InnoDB: though the list is empty!\n",
-			i + 1, UT_LIST_GET_LEN(pool->free_list[i + 1]));
+			(ulong) i + 1,
+			(ulong) UT_LIST_GET_LEN(pool->free_list[i + 1]));
 		}
 
 		ret = mem_pool_fill_free_list(i + 1, pool);
@@ -362,7 +363,7 @@ mem_area_alloc(
 	        fprintf(stderr,
 "InnoDB: Error: Removing element from mem pool free list %lu though the\n"
 "InnoDB: element is not marked free!\n",
-			n);
+			(ulong) n);
 
 		mem_analyze_corruption((byte*)area);
 
@@ -382,7 +383,7 @@ mem_area_alloc(
 	        fprintf(stderr,
 "InnoDB: Error: Removing element from mem pool free list %lu\n"
 "InnoDB: though the list length is 0!\n",
-			n);
+			(ulong) n);
 		mem_analyze_corruption((byte*)area);
 
 		ut_error;
@@ -506,7 +507,7 @@ mem_area_free(
 		        fprintf(stderr,
 "InnoDB: Error: Memory area size %lu, next area size %lu not a power of 2!\n"
 "InnoDB: Possibly a memory overrun of the buffer being freed here.\n",
-			  size, next_size);
+			  (ulong) size, (ulong) next_size);
 			mem_analyze_corruption((byte*)area);
 
 			ut_error;
@@ -605,8 +606,8 @@ mem_pool_validate(
 		}
 	}
 
-	ut_a(free + pool->reserved == pool->size
-					- (pool->size % MEM_AREA_MIN_SIZE));
+	ut_a(free + pool->reserved == pool->size);
+
 	mutex_exit(&(pool->mutex));
 
 	return(TRUE);
@@ -634,13 +635,13 @@ mem_pool_print_info(
 
 			fprintf(outfile,
 			  "Free list length %lu for blocks of size %lu\n",
-			  UT_LIST_GET_LEN(pool->free_list[i]),
-			  ut_2_exp(i));
+			  (ulong) UT_LIST_GET_LEN(pool->free_list[i]),
+			  (ulong) ut_2_exp(i));
 		}	
 	}
 
-	fprintf(outfile, "Pool size %lu, reserved %lu.\n", pool->size,
-							pool->reserved);
+	fprintf(outfile, "Pool size %lu, reserved %lu.\n", (ulong) pool->size,
+						       (ulong) pool->reserved);
 	mutex_exit(&(pool->mutex));
 }
 
diff --git a/innobase/mtr/mtr0log.c b/innobase/mtr/mtr0log.c
index 91ff588713d..5a4aaa2377d 100644
--- a/innobase/mtr/mtr0log.c
+++ b/innobase/mtr/mtr0log.c
@@ -58,7 +58,7 @@ mlog_write_initial_log_record(
 	if (ptr < buf_pool->frame_zero || ptr >= buf_pool->high_end) {
 		fprintf(stderr,
 	"InnoDB: Error: trying to write to a stray memory location %lx\n",
-			(ulint)ptr);
+			(ulong) ptr);
 		ut_error;
 	}
 
@@ -171,13 +171,13 @@ mlog_parse_nbytes(
 	}
 
 	if (type == MLOG_1BYTE) {
-		if (val > 0xFF) {
+		if (val > 0xFFUL) {
 			recv_sys->found_corrupt_log = TRUE;
 
 			return(NULL);
 		}
 	} else if (type == MLOG_2BYTES) {
-		if (val > 0xFFFF) {
+		if (val > 0xFFFFUL) {
 			recv_sys->found_corrupt_log = TRUE;
 
 			return(NULL);
@@ -221,7 +221,7 @@ mlog_write_ulint(
 	if (ptr < buf_pool->frame_zero || ptr >= buf_pool->high_end) {
 		fprintf(stderr,
 	"InnoDB: Error: trying to write to a stray memory location %lx\n",
-			(ulint)ptr);
+			(ulong) ptr);
 		ut_error;
 	}
 
@@ -268,7 +268,7 @@ mlog_write_dulint(
 	if (ptr < buf_pool->frame_zero || ptr >= buf_pool->high_end) {
 		fprintf(stderr,
 	"InnoDB: Error: trying to write to a stray memory location %lx\n",
-			(ulint)ptr);
+			(ulong) ptr);
 		ut_error;
 	}
 
@@ -312,7 +312,7 @@ mlog_write_string(
 	if (ptr < buf_pool->frame_zero || ptr >= buf_pool->high_end) {
 		fprintf(stderr,
 	"InnoDB: Error: trying to write to a stray memory location %lx\n",
-			(ulint)ptr);
+			(ulong) ptr);
 		ut_error;
 	}
 	ut_ad(ptr && mtr);
diff --git a/innobase/mtr/mtr0mtr.c b/innobase/mtr/mtr0mtr.c
index b2d8d022f8c..ac1a638063d 100644
--- a/innobase/mtr/mtr0mtr.c
+++ b/innobase/mtr/mtr0mtr.c
@@ -263,11 +263,11 @@ mtr_first_to_modify_page_after_backup(
 							backup_lsn) <= 0) {
 
 				printf("Page %lu newest %lu backup %lu\n",
-					block->offset,
-					ut_dulint_get_low(
+					(ulong) block->offset,
+					(ulong) ut_dulint_get_low(
 					buf_frame_get_newest_modification(
 							block->frame)),
-					ut_dulint_get_low(backup_lsn));
+					(ulong) ut_dulint_get_low(backup_lsn));
 					
 				ret = TRUE;
 			}
@@ -517,6 +517,6 @@ mtr_print(
 {
 	printf(
 	"Mini-transaction handle: memo size %lu bytes log size %lu bytes\n",
-		dyn_array_get_data_size(&(mtr->memo)),
-		dyn_array_get_data_size(&(mtr->log)));
+		(ulong) dyn_array_get_data_size(&(mtr->memo)),
+		(ulong) dyn_array_get_data_size(&(mtr->log)));
 }
diff --git a/innobase/os/os0file.c b/innobase/os/os0file.c
index abcb2259e84..7c9272fa13f 100644
--- a/innobase/os/os0file.c
+++ b/innobase/os/os0file.c
@@ -11,6 +11,7 @@ Created 10/21/1995 Heikki Tuuri
 #include "os0thread.h"
 #include "ut0mem.h"
 #include "srv0srv.h"
+#include "srv0start.h"
 #include "fil0fil.h"
 #include "buf0buf.h"
 
@@ -33,7 +34,7 @@ ulint	os_innodb_umask		= 0;
 #endif
 
 /* If the following is set to TRUE, we do not call os_file_flush in every
-os_file_write. We can set this TRUE if the doublewrite buffer is used. */
+os_file_write. We can set this TRUE when the doublewrite buffer is used. */
 ibool	os_do_not_call_flush_at_each_write	= FALSE;
 
 /* We use these mutexes to protect lseek + file i/o operation, if the
@@ -154,7 +155,6 @@ os_mutex_t os_file_count_mutex;
 ulint	os_file_n_pending_preads  = 0;
 ulint	os_file_n_pending_pwrites = 0;
 
-
 /***************************************************************************
 Gets the operating system version. Currently works only on Windows. */
 
@@ -198,9 +198,12 @@ overwrite the error number). If the number is not known to this program,
 the OS error number + 100 is returned. */
 
 ulint
-os_file_get_last_error(void)
-/*========================*/
-		/* out: error number, or OS error number + 100 */
+os_file_get_last_error(
+/*===================*/
+					/* out: error number, or OS error
+					number + 100 */
+	ibool	report_all_errors)	/* in: TRUE if we want an error message
+					printed of all errors */
 {
 	ulint	err;
 
@@ -208,25 +211,29 @@ os_file_get_last_error(void)
 
 	err = (ulint) GetLastError();
 
-	if (err != ERROR_DISK_FULL && err != ERROR_FILE_EXISTS) {
+	if (report_all_errors
+	    || (err != ERROR_DISK_FULL && err != ERROR_FILE_EXISTS)) {
+
 		ut_print_timestamp(stderr);
 	     	fprintf(stderr,
-  "  InnoDB: Operating system error number %lu in a file operation.\n"
-  "InnoDB: See http://www.innodb.com/ibman.html for installation help.\n",
-		err);
+  "  InnoDB: Operating system error number %lu in a file operation.\n", (ulong) err);
 
 		if (err == ERROR_PATH_NOT_FOUND) {
-		         fprintf(stderr,
-  "InnoDB: The error means the system cannot find the path specified.\n"
-  "InnoDB: In installation you must create directories yourself, InnoDB\n"
-  "InnoDB: does not create them.\n");
+			fprintf(stderr,
+  "InnoDB: The error means the system cannot find the path specified.\n");
+
+			if (srv_is_being_started) {
+				fprintf(stderr,
+  "InnoDB: If you are installing InnoDB, remember that you must create\n"
+  "InnoDB: directories yourself, InnoDB does not create them.\n");
+			}
 		} else if (err == ERROR_ACCESS_DENIED) {
-		         fprintf(stderr,
+			fprintf(stderr,
   "InnoDB: The error means mysqld does not have the access rights to\n"
   "InnoDB: the directory. It may also be you have created a subdirectory\n"
   "InnoDB: of the same name as a data file.\n"); 
 		} else {
-			 fprintf(stderr,
+			fprintf(stderr,
   "InnoDB: See section 13.2 at http://www.innodb.com/ibman.html\n"
   "InnoDB: about operating system error numbers.\n");
 		}
@@ -246,30 +253,33 @@ os_file_get_last_error(void)
 #else
 	err = (ulint) errno;
 
-	if (err != ENOSPC && err != EEXIST) {
-		ut_print_timestamp(stderr);
+	if (report_all_errors
+	    || (err != ENOSPC && err != EEXIST)) {
 
+		ut_print_timestamp(stderr);
 	     	fprintf(stderr,
-  "  InnoDB: Operating system error number %lu in a file operation.\n"
-  "InnoDB: See http://www.innodb.com/ibman.html for installation help.\n",
-		err);
+  "  InnoDB: Operating system error number %lu in a file operation.\n", err);
 
 		if (err == ENOENT) {
-		         fprintf(stderr,
-  "InnoDB: The error means the system cannot find the path specified.\n"
-  "InnoDB: In installation you must create directories yourself, InnoDB\n"
-  "InnoDB: does not create them.\n");
+			fprintf(stderr,
+  "InnoDB: The error means the system cannot find the path specified.\n");
+			
+			if (srv_is_being_started) {
+				fprintf(stderr,
+  "InnoDB: If you are installing InnoDB, remember that you must create\n"
+  "InnoDB: directories yourself, InnoDB does not create them.\n");
+			}
 		} else if (err == EACCES) {
-		         fprintf(stderr,
+			fprintf(stderr,
   "InnoDB: The error means mysqld does not have the access rights to\n"
   "InnoDB: the directory.\n");
 		} else {
-			 if (strerror((int)err) != NULL) {
+			if (strerror((int)err) != NULL) {
 				fprintf(stderr,
   "InnoDB: Error number %lu means '%s'.\n", err, strerror((int)err));
-			 }
+			}
 
-			 fprintf(stderr,
+			fprintf(stderr,
   "InnoDB: See also section 13.2 at http://www.innodb.com/ibman.html\n"
   "InnoDB: about operating system error numbers.\n");
 		}
@@ -309,7 +319,7 @@ os_file_handle_error(
 
 	UT_NOT_USED(file);
 
-	err = os_file_get_last_error();
+	err = os_file_get_last_error(FALSE);
 	
 	if (err == OS_FILE_DISK_FULL) {
 		/* We only print a warning about disk full once */
@@ -336,6 +346,7 @@ os_file_handle_error(
 		return(FALSE);
 
 	} else if (err == OS_FILE_AIO_RESOURCES_RESERVED) {
+
 		return(TRUE);
 
 	} else if (err == OS_FILE_ALREADY_EXISTS) {
@@ -359,6 +370,68 @@ os_file_handle_error(
 }
 
 /********************************************************************
+Does error handling when a file operation fails. */
+static
+ibool
+os_file_handle_error_no_exit(
+/*=========================*/
+				/* out: TRUE if we should retry the
+				operation */
+	os_file_t	file,	/* in: file pointer */
+	char*		name,	/* in: name of a file or NULL */
+	const char*	operation)/* in: operation */
+{
+	ulint	err;
+
+	UT_NOT_USED(file);
+
+	err = os_file_get_last_error(FALSE);
+	
+	if (err == OS_FILE_DISK_FULL) {
+		/* We only print a warning about disk full once */
+
+		if (os_has_said_disk_full) {
+
+			return(FALSE);
+		}
+	
+		if (name) {
+			ut_print_timestamp(stderr);
+			fprintf(stderr,
+	"  InnoDB: Encountered a problem with file %s\n", name);
+		}
+
+		ut_print_timestamp(stderr);
+	        fprintf(stderr,
+	"  InnoDB: Disk is full. Try to clean the disk to free space.\n");
+
+		os_has_said_disk_full = TRUE;
+
+		fflush(stderr);
+
+		return(FALSE);
+
+	} else if (err == OS_FILE_AIO_RESOURCES_RESERVED) {
+
+		return(TRUE);
+
+	} else if (err == OS_FILE_ALREADY_EXISTS) {
+
+		return(FALSE);
+	} else {
+	        if (name) {
+	                fprintf(stderr, "InnoDB: File name %s\n", name);
+	        }
+	  
+		fprintf(stderr, "InnoDB: File operation call: '%s'.\n",
+							       operation);
+		return (FALSE);
+	}
+
+	return(FALSE);		/* not reached */
+}
+
+/********************************************************************
 Creates the seek mutexes used in positioned reads and writes. */
 
 void
@@ -374,6 +447,262 @@ os_io_init_simple(void)
 	}
 }
 
+/***************************************************************************
+The os_file_opendir() function opens a directory stream corresponding to the
+directory named by the dirname argument. The directory stream is positioned
+at the first entry. In both Unix and Windows we automatically skip the '.'
+and '..' items at the start of the directory listing. */
+
+os_file_dir_t
+os_file_opendir(
+/*============*/
+				/* out: directory stream, NULL if error */
+	char*	dirname,	/* in: directory name; it must not contain
+				a trailing '\' or '/' */
+	ibool	error_is_fatal)	/* in: TRUE if we should treat an error as a
+				fatal error; if we try to open symlinks then
+				we do not wish a fatal error if it happens
+				not to be a directory */
+{
+	os_file_dir_t		dir;
+#ifdef __WIN__
+        LPWIN32_FIND_DATA	lpFindFileData;
+	char			path[OS_FILE_MAX_PATH + 3];
+
+	ut_a(strlen(dirname) < OS_FILE_MAX_PATH);
+
+	strcpy(path, dirname);
+	strcpy(path + strlen(path), "\\*");
+
+	/* Note that in Windows opening the 'directory stream' also retrieves
+	the first entry in the directory. Since it is '.', that is no problem,
+	as we will skip over the '.' and '..' entries anyway. */
+
+	lpFindFileData = ut_malloc(sizeof(WIN32_FIND_DATA));
+
+	dir = FindFirstFile(path, lpFindFileData);
+
+	ut_free(lpFindFileData);
+
+	if (dir == INVALID_HANDLE_VALUE) {
+
+		if (error_is_fatal) {
+		        os_file_handle_error(NULL, dirname, "opendir");
+		}
+
+		return(NULL);
+	}
+
+	return(dir);	
+#else
+	dir = opendir(dirname);
+
+	if (dir == NULL && error_is_fatal) {
+	        os_file_handle_error(0, dirname, "opendir");
+	}
+
+	return(dir);
+#endif
+}
+
+/***************************************************************************
+Closes a directory stream. */
+
+int
+os_file_closedir(
+/*=============*/
+				/* out: 0 if success, -1 if failure */
+	os_file_dir_t	dir)	/* in: directory stream */
+{
+#ifdef __WIN__
+	BOOL		ret;
+
+	ret = FindClose(dir);
+
+	if (!ret) {
+	        os_file_handle_error_no_exit(NULL, NULL, "closedir");
+		
+		return(-1);
+	}
+	
+	return(0);
+#else
+	int	ret;
+	
+	ret = closedir(dir);
+
+	if (ret) {
+	        os_file_handle_error_no_exit(0, NULL, "closedir");
+	}
+
+	return(ret);
+#endif
+}
+
+/***************************************************************************
+This function returns information of the next file in the directory. We jump
+over the '.' and '..' entries in the directory. */
+
+int
+os_file_readdir_next_file(
+/*======================*/
+				/* out: 0 if ok, -1 if error, 1 if at the end
+				of the directory */
+	char*		dirname,/* in: directory name or path */
+	os_file_dir_t	dir,	/* in: directory stream */
+	os_file_stat_t*	info)	/* in/out: buffer where the info is returned */
+{
+#ifdef __WIN__
+	LPWIN32_FIND_DATA	lpFindFileData;
+	BOOL			ret;
+
+	lpFindFileData = ut_malloc(sizeof(WIN32_FIND_DATA));
+next_file:
+	ret = FindNextFile(dir, lpFindFileData);
+
+	if (ret) {
+	        ut_a(strlen(lpFindFileData->cFileName) < OS_FILE_MAX_PATH);
+
+		if (strcmp(lpFindFileData->cFileName, ".") == 0
+		    || strcmp(lpFindFileData->cFileName, "..") == 0) {
+
+		        goto next_file;
+		}
+
+		strcpy(info->name, lpFindFileData->cFileName);
+
+		info->size = (ib_longlong)(lpFindFileData->nFileSizeLow)
+		     + (((ib_longlong)(lpFindFileData->nFileSizeHigh)) << 32);
+
+		if (lpFindFileData->dwFileAttributes
+					& FILE_ATTRIBUTE_REPARSE_POINT) {
+/* TODO: test Windows symlinks */
+/* TODO: MySQL has apparently its own symlink implementation in Windows,
+dbname.sym can redirect a database directory:
+http://www.mysql.com/doc/en/Windows_symbolic_links.html */
+			info->type = OS_FILE_TYPE_LINK;
+		} else if (lpFindFileData->dwFileAttributes
+						& FILE_ATTRIBUTE_DIRECTORY) {
+		        info->type = OS_FILE_TYPE_DIR;
+		} else if (lpFindFileData->dwFileAttributes
+						& FILE_ATTRIBUTE_NORMAL) {
+/* TODO: are FILE_ATTRIBUTE_NORMAL files really all normal files? */	
+			info->type = OS_FILE_TYPE_FILE;
+		} else {
+			info->type = OS_FILE_TYPE_UNKNOWN;
+		}
+	}
+
+	ut_free(lpFindFileData);
+
+	if (ret) {
+		return(0);
+	} else if (GetLastError() == ERROR_NO_MORE_FILES) {
+
+		return(1);
+	} else {
+		os_file_handle_error_no_exit(NULL, dirname,
+						"readdir_next_file");
+		return(-1);
+	}
+#else
+	struct dirent*	ent;
+	char*		full_path;
+	int		ret;
+	struct stat	statinfo;
+next_file:
+	ent = readdir(dir);
+
+	if (ent == NULL) {
+		return(1);
+	}
+
+	ut_a(strlen(ent->d_name) < OS_FILE_MAX_PATH);
+
+	if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0) {
+
+		goto next_file;
+	}
+
+	strcpy(info->name, ent->d_name);
+
+	full_path = ut_malloc(strlen(dirname) + strlen(ent->d_name) + 10);
+	
+	sprintf(full_path, "%s/%s", dirname, ent->d_name);
+
+	ret = stat(full_path, &statinfo);
+
+	if (ret) {
+		os_file_handle_error_no_exit(0, full_path, "stat");
+
+		ut_free(full_path);
+
+		return(-1);
+	}
+
+	info->size = (ib_longlong)statinfo.st_size;
+
+	if (S_ISDIR(statinfo.st_mode)) {
+		info->type = OS_FILE_TYPE_DIR;
+	} else if (S_ISLNK(statinfo.st_mode)) {
+	        info->type = OS_FILE_TYPE_LINK;
+	} else if (S_ISREG(statinfo.st_mode)) {
+	        info->type = OS_FILE_TYPE_FILE;
+	} else {
+	        info->type = OS_FILE_TYPE_UNKNOWN;
+	}
+			
+	ut_free(full_path);
+
+	return(0);
+#endif
+}
+
+/*********************************************************************
+This function attempts to create a directory named pathname. The new directory
+gets default permissions. On Unix the permissions are (0770 & ~umask). If the
+directory exists already, nothing is done and the call succeeds, unless the
+fail_if_exists arguments is true. */
+
+ibool
+os_file_create_directory(
+/*=====================*/
+				/* out: TRUE if call succeeds, FALSE on
+				error */
+	char*	pathname,	/* in: directory name as null-terminated
+				string */
+	ibool	fail_if_exists)	/* in: if TRUE, pre-existing directory is
+				treated as an error. */
+{
+#ifdef __WIN__
+	BOOL	rcode;
+    
+	rcode = CreateDirectory(pathname, NULL);
+	if (!(rcode != 0 ||
+		   (GetLastError() == ERROR_FILE_EXISTS && !fail_if_exists))) {
+		/* failure */
+		os_file_handle_error(NULL, pathname, "CreateDirectory");
+
+		return(FALSE);
+	}
+        
+	return (TRUE);
+#else
+	int	rcode;
+
+	rcode = mkdir(pathname, 0770);
+
+	if (!(rcode == 0 || (errno == EEXIST && !fail_if_exists))) {
+		/* failure */
+		os_file_handle_error(0, pathname, "mkdir");
+
+		return(FALSE);
+	}
+        
+	return (TRUE);
+#endif    
+}
+
 /********************************************************************
 A simple function to open or create a file. */
 
@@ -381,7 +710,8 @@ os_file_t
 os_file_create_simple(
 /*==================*/
 			/* out, own: handle to the file, not defined if error,
-			error number can be retrieved with os_get_last_error */
+			error number can be retrieved with
+			os_file_get_last_error */
 	char*	name,	/* in: name of the file or path as a null-terminated
 			string */
 	ulint	create_mode,/* in: OS_FILE_OPEN if an existing file is opened
@@ -493,13 +823,16 @@ os_file_t
 os_file_create_simple_no_error_handling(
 /*====================================*/
 			/* out, own: handle to the file, not defined if error,
-			error number can be retrieved with os_get_last_error */
+			error number can be retrieved with
+			os_file_get_last_error */
 	char*	name,	/* in: name of the file or path as a null-terminated
 			string */
 	ulint	create_mode,/* in: OS_FILE_OPEN if an existing file is opened
 			(if does not exist, error), or OS_FILE_CREATE if a new
 			file is created (if exists, error) */
-	ulint	access_type,/* in: OS_FILE_READ_ONLY or OS_FILE_READ_WRITE */
+	ulint	access_type,/* in: OS_FILE_READ_ONLY, OS_FILE_READ_WRITE, or
+			OS_FILE_READ_ALLOW_DELETE; the last option is used by
+			a backup program reading the file */
 	ibool*	success)/* out: TRUE if succeed, FALSE if error */
 {
 #ifdef __WIN__
@@ -507,6 +840,7 @@ os_file_create_simple_no_error_handling(
 	DWORD		create_flag;
 	DWORD		access;
 	DWORD		attributes	= 0;
+	DWORD		share_mode	= FILE_SHARE_READ;
 	
 	ut_a(name);
 
@@ -523,6 +857,13 @@ os_file_create_simple_no_error_handling(
 		access = GENERIC_READ;
 	} else if (access_type == OS_FILE_READ_WRITE) {
 		access = GENERIC_READ | GENERIC_WRITE;
+	} else if (access_type == OS_FILE_READ_ALLOW_DELETE) {
+		access = GENERIC_READ;
+		share_mode = FILE_SHARE_DELETE | FILE_SHARE_READ
+			   | FILE_SHARE_WRITE;  /* A backup program has to give
+						mysqld the maximum freedom to
+						do what it likes with the
+						file */
 	} else {
 		access = 0;
 		ut_error;
@@ -530,8 +871,7 @@ os_file_create_simple_no_error_handling(
 
 	file = CreateFile(name,
 			access,
-			FILE_SHARE_READ,/* file can be read also by other
-					processes */
+			share_mode,
 			NULL,	/* default security attributes */
 			create_flag,
 			attributes,
@@ -587,13 +927,16 @@ os_file_t
 os_file_create(
 /*===========*/
 			/* out, own: handle to the file, not defined if error,
-			error number can be retrieved with os_get_last_error */
+			error number can be retrieved with
+			os_file_get_last_error */
 	char*	name,	/* in: name of the file or path as a null-terminated
 			string */
 	ulint	create_mode, /* in: OS_FILE_OPEN if an existing file is opened
 			(if does not exist, error), or OS_FILE_CREATE if a new
 			file is created (if exists, error), OS_FILE_OVERWRITE
-			if a new is created or an old overwritten */
+			if a new is created or an old overwritten,
+			OS_FILE_OPEN_RAW, if a raw device or disk partition
+			should be opened */
 	ulint	purpose,/* in: OS_FILE_AIO, if asynchronous, non-buffered i/o
 			is desired, OS_FILE_NORMAL, if any normal file;
 			NOTE that it also depends on type, os_aio_.. and srv_..
@@ -605,14 +948,17 @@ os_file_create(
 {
 #ifdef __WIN__
 	os_file_t	file;
+	DWORD		share_mode	= FILE_SHARE_READ;
 	DWORD		create_flag;
 	DWORD		attributes;
 	ibool		retry;
-	
 try_again:	
 	ut_a(name);
 
-	if (create_mode == OS_FILE_OPEN) {
+	if (create_mode == OS_FILE_OPEN_RAW) {
+		create_flag = OPEN_EXISTING;
+		share_mode = FILE_SHARE_WRITE;
+	} else if (create_mode == OS_FILE_OPEN) {
 		create_flag = OPEN_EXISTING;
 	} else if (create_mode == OS_FILE_CREATE) {
 		create_flag = CREATE_NEW;
@@ -662,14 +1008,17 @@ try_again:
 	file = CreateFile(name,
 			GENERIC_READ | GENERIC_WRITE, /* read and write
 							access */
-			FILE_SHARE_READ,/* File can be read also by other
+			share_mode,     /* File can be read also by other
 					processes; we must give the read
 					permission because of ibbackup. We do
 					not give the write permission to
 					others because if one would succeed to
 					start 2 instances of mysqld on the
 					SAME files, that could cause severe
-					database corruption! */
+					database corruption! When opening
+					raw disk partitions, Microsoft manuals
+					say that we must give also the write
+					permission. */
 			NULL,	/* default security attributes */
 			create_flag,
 			attributes,
@@ -679,8 +1028,8 @@ try_again:
 		*success = FALSE;
 
 		retry = os_file_handle_error(file, name,
-				create_mode == OS_FILE_OPEN ?
-				"open" : "create");
+				create_mode == OS_FILE_CREATE ?
+				"create" : "open");
 		if (retry) {
 			goto try_again;
 		}
@@ -700,17 +1049,14 @@ try_again:
 try_again:	
 	ut_a(name);
 
-	if (create_mode == OS_FILE_OPEN) {
+	if (create_mode == OS_FILE_OPEN || create_mode == OS_FILE_OPEN_RAW) {
 		mode_str = "OPEN";
-
 		create_flag = O_RDWR;
 	} else if (create_mode == OS_FILE_CREATE) {
 		mode_str = "CREATE";
-
 		create_flag = O_RDWR | O_CREAT | O_EXCL;
 	} else if (create_mode == OS_FILE_OVERWRITE) {
 		mode_str = "OVERWRITE";
-
 		create_flag = O_RDWR | O_CREAT | O_TRUNC;
 	} else {
 		create_flag = 0;
@@ -767,8 +1113,8 @@ try_again:
 		*success = FALSE;
 
 		retry = os_file_handle_error(file, name,
-				create_mode == OS_FILE_OPEN ?
-				"open" : "create");
+				create_mode == OS_FILE_CREATE ?
+				"create" : "open");
 		if (retry) {
 			goto try_again;
 		}
@@ -781,6 +1127,168 @@ try_again:
 }
 
 /***************************************************************************
+Deletes a file if it exists. The file has to be closed before calling this. */
+
+ibool
+os_file_delete_if_exists(
+/*=====================*/
+			/* out: TRUE if success */
+	char*	name)	/* in: file path as a null-terminated string */
+{
+#ifdef __WIN__
+	BOOL	ret;
+	ulint	count	= 0;
+loop:
+	/* In Windows, deleting an .ibd file may fail if ibbackup is copying
+	it */
+
+	ret = DeleteFile((LPCTSTR)name);
+
+	if (ret) {
+		return(TRUE);
+	}
+
+	if (GetLastError() == ERROR_PATH_NOT_FOUND) {
+		/* the file does not exist, this not an error */
+
+		return(TRUE);
+	}
+
+	count++;
+
+	if (count > 100 && 0 == (count % 10)) {
+		fprintf(stderr,
+"InnoDB: Warning: cannot delete file %s\n"
+"InnoDB: Are you running ibbackup to back up the file?\n", name);
+		
+		os_file_get_last_error(TRUE); /* print error information */
+	}
+
+	os_thread_sleep(1000000);	/* sleep for a second */
+
+	if (count > 2000) {
+
+		return(FALSE);
+	}
+
+	goto loop;
+#else
+	int	ret;
+
+	ret = unlink((const char*)name);
+
+	if (ret != 0 && errno != ENOENT) {
+		os_file_handle_error(0, name, "delete");
+
+		return(FALSE);
+	}
+
+	return(TRUE);
+#endif
+}
+
+/***************************************************************************
+Deletes a file. The file has to be closed before calling this. */
+
+ibool
+os_file_delete(
+/*===========*/
+			/* out: TRUE if success */
+	char*	name)	/* in: file path as a null-terminated string */
+{
+#ifdef __WIN__
+	BOOL	ret;
+	ulint	count	= 0;
+loop:
+	/* In Windows, deleting an .ibd file may fail if ibbackup is copying
+	it */
+
+	ret = DeleteFile((LPCTSTR)name);
+
+	if (ret) {
+		return(TRUE);
+	}
+
+	if (GetLastError() == ERROR_PATH_NOT_FOUND) {
+		/* If the file does not exist, we classify this as a 'mild'
+		error and return */
+
+		return(FALSE);
+	}
+
+	count++;
+
+	if (count > 100 && 0 == (count % 10)) {
+		fprintf(stderr,
+"InnoDB: Warning: cannot delete file %s\n"
+"InnoDB: Are you running ibbackup to back up the file?\n", name);
+		
+		os_file_get_last_error(TRUE); /* print error information */
+	}
+
+	os_thread_sleep(1000000);	/* sleep for a second */
+
+	if (count > 2000) {
+
+		return(FALSE);
+	}
+
+	goto loop;
+#else
+	int	ret;
+
+	ret = unlink((const char*)name);
+
+	if (ret != 0) {
+		os_file_handle_error(0, name, "delete");
+
+		return(FALSE);
+	}
+
+	return(TRUE);
+#endif
+}
+
+/***************************************************************************
+Renames a file (can also move it to another directory). It is safest that the
+file is closed before calling this function. */
+
+ibool
+os_file_rename(
+/*===========*/
+				/* out: TRUE if success */
+	char*	oldpath,	/* in: old file path as a null-terminated
+				string */
+	char*	newpath)	/* in: new file path */
+{
+#ifdef __WIN__
+	BOOL	ret;
+
+	ret = MoveFile((LPCTSTR)oldpath, (LPCTSTR)newpath);
+
+	if (ret) {
+		return(TRUE);
+	}
+
+	os_file_handle_error(NULL, oldpath, "delete");
+
+	return(FALSE);
+#else
+	int	ret;
+
+	ret = rename((const char*)oldpath, (const char*)newpath);
+
+	if (ret != 0) {
+		os_file_handle_error(0, oldpath, "rename");
+
+		return(FALSE);
+	}
+
+	return(TRUE);
+#endif
+}
+
+/***************************************************************************
 Closes a file handle. In case of error, error number can be retrieved with
 os_file_get_last_error. */
 
@@ -802,6 +1310,7 @@ os_file_close(
 	}
 
 	os_file_handle_error(file, NULL, "close");
+
 	return(FALSE);
 #else
 	int	ret;
@@ -810,6 +1319,7 @@ os_file_close(
 
 	if (ret == -1) {
 		os_file_handle_error(file, NULL, "close");
+
 		return(FALSE);
 	}
 
@@ -889,7 +1399,7 @@ os_file_get_size(
 	}
 	
 	if (sizeof(off_t) > 4) {
-	        *size = (ulint)(offs & 0xFFFFFFFF);
+	        *size = (ulint)(offs & 0xFFFFFFFFUL);
 		*size_high = (ulint)(offs >> 32);
 	} else {
 		*size = (ulint) offs;
@@ -901,6 +1411,29 @@ os_file_get_size(
 }
 
 /***************************************************************************
+Gets file size as a 64-bit integer ib_longlong. */
+
+ib_longlong
+os_file_get_size_as_iblonglong(
+/*===========================*/
+				/* out: size in bytes, -1 if error */
+	os_file_t	file)	/* in: handle to a file */
+{
+	ulint	size;
+	ulint	size_high;
+	ibool	success;
+
+	success = os_file_get_size(file, &size, &size_high);
+
+	if (!success) {
+
+		return(-1);
+	}
+
+	return((((ib_longlong)size_high) << 32) + (ib_longlong)size);
+}
+
+/***************************************************************************
 Sets a file size. This function can be used to extend or truncate a file. */
 
 ibool
@@ -966,7 +1499,7 @@ os_file_set_size(
 		    != offset / (ib_longlong)(100 * 1024 * 1024)) {
 
 		        fprintf(stderr, " %lu00",
-				(ulint)((offset + n_bytes)
+				(ulong) ((offset + n_bytes)
 					/ (ib_longlong)(100 * 1024 * 1024)));
 		}
 		
@@ -1012,6 +1545,15 @@ os_file_flush(
 		return(TRUE);
 	}
 
+	/* Since Windows returns ERROR_INVALID_FUNCTION if the 'file' is
+	actually a raw device, we choose to ignore that error if we are using
+	raw disks */
+
+	if (srv_start_raw_disk_in_use && GetLastError()
+						== ERROR_INVALID_FUNCTION) {
+	        return(TRUE);
+	}
+
 	os_file_handle_error(file, NULL, "flush");
 
 	/* It is a fatal error if a file flush does not succeed, because then
@@ -1035,9 +1577,10 @@ os_file_flush(
 	}
 	
 	/* Since Linux returns EINVAL if the 'file' is actually a raw device,
-	we choose to ignore that error */
+	we choose to ignore that error if we are using raw disks */
+
+	if (srv_start_raw_disk_in_use && errno == EINVAL) {
 
-	if (errno == EINVAL) {
 	        return(TRUE);
 	}
 
@@ -1075,7 +1618,7 @@ os_file_pread(
         off_t	offs;
 	ssize_t	n_bytes;
 
-	ut_a((offset & 0xFFFFFFFF) == offset);
+	ut_a((offset & 0xFFFFFFFFUL) == offset);
         
         /* If off_t is > 4 bytes in size, then we assume we can pass a
 	64-bit address */
@@ -1151,7 +1694,7 @@ os_file_pwrite(
 	ssize_t	ret;
         off_t	offs;
 
-	ut_a((offset & 0xFFFFFFFF) == offset);
+	ut_a((offset & 0xFFFFFFFFUL) == offset);
 
         /* If off_t is > 4 bytes in size, then we assume we can pass a
 	64-bit address */
@@ -1255,7 +1798,7 @@ os_file_read(
 	ibool		retry;
 	ulint		i;
 	
-	ut_a((offset & 0xFFFFFFFF) == offset);
+	ut_a((offset & 0xFFFFFFFFUL) == offset);
 
 	os_n_file_reads++;
 	os_bytes_read_since_printout += n;
@@ -1315,9 +1858,9 @@ error_handling:
 	fprintf(stderr,
 "InnoDB: Fatal error: cannot read from file. OS error number %lu.\n",
 #ifdef __WIN__
-		(ulint)GetLastError()
+		(ulong) GetLastError()
 #else
-		(ulint)errno
+		(ulong) errno
 #endif
 		);
 	fflush(stderr);
@@ -1328,6 +1871,92 @@ error_handling:
 }
 
 /***********************************************************************
+Requests a synchronous positioned read operation. This function does not do
+any error handling. In case of error it returns FALSE. */
+
+ibool
+os_file_read_no_error_handling(
+/*===========================*/
+				/* out: TRUE if request was
+				successful, FALSE if fail */
+	os_file_t	file,	/* in: handle to a file */
+	void*		buf,	/* in: buffer where to read */
+	ulint		offset,	/* in: least significant 32 bits of file
+				offset where to read */
+	ulint		offset_high, /* in: most significant 32 bits of
+				offset */
+	ulint		n)	/* in: number of bytes to read */	
+{
+#ifdef __WIN__
+	BOOL		ret;
+	DWORD		len;
+	DWORD		ret2;
+	DWORD		low;
+	DWORD		high;
+	ibool		retry;
+	ulint		i;
+	
+	ut_a((offset & 0xFFFFFFFFUL) == offset);
+
+	os_n_file_reads++;
+	os_bytes_read_since_printout += n;
+
+try_again:	
+	ut_ad(file);
+	ut_ad(buf);
+	ut_ad(n > 0);
+
+	low = offset;
+	high = offset_high;
+
+	/* Protect the seek / read operation with a mutex */
+	i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
+	
+	os_mutex_enter(os_file_seek_mutexes[i]);
+
+	ret2 = SetFilePointer(file, low, &high, FILE_BEGIN);
+
+	if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) {
+
+		os_mutex_exit(os_file_seek_mutexes[i]);
+
+		goto error_handling;
+	} 
+	
+	ret = ReadFile(file, buf, n, &len, NULL);
+
+	os_mutex_exit(os_file_seek_mutexes[i]);
+	
+	if (ret && len == n) {
+		return(TRUE);
+	}		
+#else
+	ibool	retry;
+	ssize_t	ret;
+
+	os_bytes_read_since_printout += n;
+
+try_again:
+	ret = os_file_pread(file, buf, n, offset, offset_high);
+
+	if ((ulint)ret == n) {
+
+		return(TRUE);
+	}
+#endif	
+#ifdef __WIN__
+error_handling:
+#endif
+	retry = os_file_handle_error_no_exit(file, NULL, "read"); 
+
+	if (retry) {
+		goto try_again;
+	}
+       
+	return(FALSE);
+}
+
+/***********************************************************************
 Requests a synchronous write operation. */
 
 ibool
@@ -1384,8 +2013,8 @@ retry:
 "InnoDB: offset %lu %lu. Operating system error number %lu.\n"
 "InnoDB: Look from section 13.2 at http://www.innodb.com/ibman.html\n"
 "InnoDB: what the error number means.\n",
-			name, offset_high, offset,
-			(ulint)GetLastError());
+			name, (ulong) offset_high, (ulong) offset,
+			(ulong) GetLastError());
 
 		return(FALSE);
 	} 
@@ -1431,12 +2060,12 @@ retry:
 "InnoDB: Operating system error number %lu.\n"
 "InnoDB: Check that your OS and file system support files of this size.\n"
 "InnoDB: Check also that the disk is not full or a disk quota exceeded.\n",
-			name, offset_high, offset, n, (ulint)len,
-			err);
+			name, (ulong) offset_high, (ulong) offset,
+			(ulong) n, (ulong) len, (ulong) err);
 
 		if (strerror((int)err) != NULL) {
 			fprintf(stderr,
-"InnoDB: Error number %lu means '%s'.\n", err, strerror((int)err));
+"InnoDB: Error number %lu means '%s'.\n", (ulong) err, strerror((int)err));
 		}
 
 		fprintf(stderr,
@@ -2482,7 +3111,7 @@ os_aio_simulated_handle(
 	ulint		biggest_age;
 	ulint		age;
 	byte*		combined_buf;
-	byte*		combined_buf2= 0;	/* Remove warning */
+	byte*		combined_buf2;
 	ibool		ret;
 	ulint		n;
 	ulint		i;
@@ -2522,7 +3151,7 @@ restart:
 
 			if (os_aio_print_debug) {
 				fprintf(stderr,
-"InnoDB: i/o for slot %lu already done, returning\n", i);
+"InnoDB: i/o for slot %lu already done, returning\n", (ulong) i);
 			}
 
 			ret = TRUE;
@@ -2669,8 +3298,8 @@ consecutive_loop:
 	if (os_aio_print_debug) {
 		fprintf(stderr,
 "InnoDB: doing i/o of type %lu at offset %lu %lu, length %lu\n",
-			slot->type, slot->offset_high, slot->offset,
-			total_len);
+			(ulong) slot->type, (ulong) slot->offset_high,
+			(ulong) slot->offset, (ulong) total_len);
 	}
 
 	/* Do the i/o with ordinary, synchronous i/o functions: */
@@ -2680,8 +3309,9 @@ consecutive_loop:
 			    || (slot->offset % UNIV_PAGE_SIZE != 0)) {
 				fprintf(stderr,
 "InnoDB: Error: trying a displaced write to %s %lu %lu, len %lu\n",
-					slot->name, slot->offset_high,
-					slot->offset, total_len);
+					slot->name, (ulong) slot->offset_high,
+					(ulong) slot->offset,
+					(ulong) total_len);
 				ut_error;
 			}
 			  
@@ -2780,7 +3410,7 @@ recommended_sleep:
 	if (os_aio_print_debug) {
 		fprintf(stderr,
 "InnoDB: i/o handler thread for i/o segment %lu wakes up\n",
-			global_segment);
+			(ulong) global_segment);
 	}
 	
 	goto restart;
@@ -2862,7 +3492,8 @@ os_aio_print(
 	}
 
 	for (i = 0; i < srv_n_file_io_threads; i++) {
-		buf += sprintf(buf, "I/O thread %lu state: %s (%s)\n", i,
+		buf += sprintf(buf, "I/O thread %lu state: %s (%s)\n",
+			                (ulong) i,
 					srv_io_thread_op_info[i],
 					srv_io_thread_function[i]);
 	}
@@ -2894,7 +3525,7 @@ loop:
 
 	ut_a(array->n_reserved == n_reserved);
 
-	buf += sprintf(buf, " %lu", n_reserved);
+	buf += sprintf(buf, " %lu", (ulong) n_reserved);
 	
 	os_mutex_exit(array->mutex);
 
@@ -2934,15 +3565,18 @@ loop:
 
 	buf += sprintf(buf,
 		"Pending flushes (fsync) log: %lu; buffer pool: %lu\n",
-	       fil_n_pending_log_flushes, fil_n_pending_tablespace_flushes);
+	       (ulong) fil_n_pending_log_flushes,
+	       (ulong) fil_n_pending_tablespace_flushes);
 	buf += sprintf(buf,
 		"%lu OS file reads, %lu OS file writes, %lu OS fsyncs\n",
-		os_n_file_reads, os_n_file_writes, os_n_fsyncs);
+		(ulong) os_n_file_reads, (ulong) os_n_file_writes,
+		(ulong) os_n_fsyncs);
 
 	if (os_file_n_pending_preads != 0 || os_file_n_pending_pwrites != 0) {
 	        buf += sprintf(buf,
 		    "%lu pending preads, %lu pending pwrites\n",
-		    os_file_n_pending_preads, os_file_n_pending_pwrites);
+		    (ulong) os_file_n_pending_preads,
+		    (ulong) os_file_n_pending_pwrites);
 	}
 
 	if (os_n_file_reads == os_n_file_reads_old) {
@@ -2956,7 +3590,7 @@ loop:
 "%.2f reads/s, %lu avg bytes/read, %.2f writes/s, %.2f fsyncs/s\n",
 		(os_n_file_reads - os_n_file_reads_old)
 		/ time_elapsed,
-		(ulint)avg_bytes_read,
+		(ulong)avg_bytes_read,
 		(os_n_file_writes - os_n_file_writes_old)
 		/ time_elapsed,
 		(os_n_fsyncs - os_n_fsyncs_old)
diff --git a/innobase/os/os0proc.c b/innobase/os/os0proc.c
index 2099d62e7fd..85791c55348 100644
--- a/innobase/os/os0proc.c
+++ b/innobase/os/os0proc.c
@@ -12,11 +12,469 @@ Created 9/30/1995 Heikki Tuuri
 #include "os0proc.ic"
 #endif
 
+#include "ut0mem.h"
+#include "ut0byte.h"
+
+
+/*
+How to get AWE to compile on Windows?
+-------------------------------------
+
+In the project settings of the innobase project the Visual C++ source,
+__WIN2000__ has to be defined.
+
+The Visual C++ has to be relatively recent and _WIN32_WINNT has to be
+defined to a value >= 0x0500 when windows.h is included.
+
+#define _WIN32_WINNT	0x0500
+
+Where does AWE work?
+-------------------
+
+See the error message in os_awe_allocate_physical_mem().
+
+How to assign privileges for mysqld to use AWE?
+-----------------------------------------------
+
+See the error message in os_awe_enable_lock_pages_in_mem().
+
+Use Windows AWE functions in this order
+---------------------------------------
+
+(1) os_awe_enable_lock_pages_in_mem();
+(2) os_awe_allocate_physical_mem();
+(3) os_awe_allocate_virtual_mem_window();
+(4) os_awe_map_physical_mem_to_window().
+
+To test 'AWE' in a computer which does not have the AWE API,
+you can compile with UNIV_SIMULATE_AWE defined in this file.
+*/
+
+#ifdef UNIV_SIMULATE_AWE
+/* If we simulate AWE, we allocate the 'physical memory' here */
+byte*		os_awe_simulate_mem;
+ulint		os_awe_simulate_mem_size;
+os_awe_t*	os_awe_simulate_page_info;
+byte*		os_awe_simulate_window;
+ulint		os_awe_simulate_window_size;
+/* In simulated AWE the following contains a NULL pointer or a pointer
+to a mapped 'physical page' for each 4 kB page in the AWE window */
+byte**		os_awe_simulate_map;
+#endif
+
+#ifdef __WIN2000__
+os_awe_t*	os_awe_page_info;
+ulint		os_awe_n_pages;
+byte*		os_awe_window;
+ulint		os_awe_window_size;
+#endif
+
+/********************************************************************
+Windows AWE support. Tries to enable the "lock pages in memory" privilege for
+the current process so that the current process can allocate memory-locked
+virtual address space to act as the window where AWE maps physical memory. */
+
+ibool
+os_awe_enable_lock_pages_in_mem(void)
+/*=================================*/
+				/* out: TRUE if success, FALSE if error;
+				prints error info to stderr if no success */
+{
+#ifdef UNIV_SIMULATE_AWE
+
+	return(TRUE);
+
+#elif defined(__WIN2000__)
+  	struct {
+    	DWORD 			Count;
+    	LUID_AND_ATTRIBUTES 	Privilege[1];
+  	} 	Info;
+	HANDLE	hProcess;
+  	HANDLE	Token;
+  	BOOL 	Result;
+
+	hProcess = GetCurrentProcess();
+
+  	/* Open the token of the current process */
+
+  	Result = OpenProcessToken(hProcess,
+                              TOKEN_ADJUST_PRIVILEGES,
+                              &Token);
+  	if (Result != TRUE) {
+    		fprintf(stderr,
+			"InnoDB: AWE: Cannot open process token, error %lu\n",
+			(ulint)GetLastError());
+    		return(FALSE);
+  	}
+
+  	Info.Count = 1;
+
+    	Info.Privilege[0].Attributes = SE_PRIVILEGE_ENABLED;
+
+  	/* Get the local unique identifier (LUID) of the SE_LOCK_MEMORY
+	privilege */
+
+  	Result = LookupPrivilegeValue(NULL, SE_LOCK_MEMORY_NAME,
+                                  &(Info.Privilege[0].Luid));
+  	if (Result != TRUE)  {
+    		fprintf(stderr,
+	"InnoDB: AWE: Cannot get local privilege value for %s, error %lu.\n",
+			SE_LOCK_MEMORY_NAME, (ulint)GetLastError());
+
+    		return(FALSE);
+  	}
+
+  	/* Try to adjust the privilege */
+
+  	Result = AdjustTokenPrivileges(Token, FALSE,
+                                   (PTOKEN_PRIVILEGES)&Info,
+                                   0, NULL, NULL);
+  	/* Check the result */
+
+  	if (Result != TRUE)  {
+    		fprintf(stderr,
+		"InnoDB: AWE: Cannot adjust process token privileges, error %u.\n",
+			GetLastError());
+    		return(FALSE);
+  	} else if (GetLastError() != ERROR_SUCCESS) {
+      		fprintf(stderr,
+"InnoDB: AWE: Cannot enable SE_LOCK_MEMORY privilege, error %lu.\n"
+"InnoDB: In Windows XP Home you cannot use AWE. In Windows 2000 and XP\n"
+"InnoDB: Professional you must go to the Control Panel, to\n"
+"InnoDB: Security Settings, to Local Policies, and enable\n"
+"InnoDB: the 'lock pages in memory' privilege for the user who runs\n"
+"InnoDB: the MySQL server.\n", GetLastError());
+
+		return(FALSE);
+	}
+
+	CloseHandle(Token);
+
+	return(TRUE);
+#else
 #ifdef __WIN__
-#include <windows.h>
+	fprintf(stderr,
+"InnoDB: AWE: Error: to use AWE you must use a ...-nt MySQL executable.\n");
+#endif	
+	return(FALSE);
 #endif
+}
 
-#include "ut0mem.h"
+/********************************************************************
+Allocates physical RAM memory up to 64 GB in an Intel 32-bit x86
+processor. */
+
+ibool
+os_awe_allocate_physical_mem(
+/*=========================*/
+				/* out: TRUE if success */
+	os_awe_t** page_info,	/* out, own: array of opaque data containing
+				the info for allocated physical memory pages;
+				each allocated 4 kB physical memory page has
+				one slot of type os_awe_t in the array */
+	ulint	  n_megabytes)	/* in: number of megabytes to allocate */
+{
+#ifdef UNIV_SIMULATE_AWE
+	os_awe_simulate_page_info = ut_malloc(sizeof(os_awe_t) *
+		n_megabytes * ((1024 * 1024) / OS_AWE_X86_PAGE_SIZE));
+
+	os_awe_simulate_mem = ut_align(ut_malloc(
+					4096 + 1024 * 1024 * n_megabytes),
+					4096);
+	os_awe_simulate_mem_size = n_megabytes * 1024 * 1024;
+
+	*page_info = os_awe_simulate_page_info;
+
+	return(TRUE);
+
+#elif defined(__WIN2000__)
+	BOOL		bResult;
+  	os_awe_t 	NumberOfPages;		/* Question: why does Windows
+  						use the name ULONG_PTR for
+  						a scalar integer type? Maybe
+  						because we may also refer to
+  						&NumberOfPages? */
+  	os_awe_t 	NumberOfPagesInitial;
+  	SYSTEM_INFO 	sSysInfo;
+  	int 		PFNArraySize;
+
+	if (n_megabytes > 64 * 1024) {
+
+		fprintf(stderr,
+"InnoDB: AWE: Error: tried to allocate %lu MB.\n"
+"InnoDB: AWE cannot allocate more than 64 GB in any computer.\n", n_megabytes);
+
+		return(FALSE);
+	}
+
+  	GetSystemInfo(&sSysInfo);  /* fill the system information structure */
+
+  	if ((ulint)OS_AWE_X86_PAGE_SIZE != (ulint)sSysInfo.dwPageSize) {
+		fprintf(stderr,
+"InnoDB: AWE: Error: this computer has a page size of %lu.\n"
+"InnoDB: Should be 4096 bytes for InnoDB AWE support to work.\n",
+			(ulint)sSysInfo.dwPageSize);
+
+		return(FALSE);
+	}
+
+  	/* Calculate the number of pages of memory to request */
+
+  	NumberOfPages = n_megabytes * ((1024 * 1024) / OS_AWE_X86_PAGE_SIZE);
+ 
+ 	/* Calculate the size of page_info for allocated physical pages */
+
+  	PFNArraySize = NumberOfPages * sizeof(os_awe_t);
+
+   	*page_info = (os_awe_t*)HeapAlloc(GetProcessHeap(), 0, PFNArraySize);
+
+	if (*page_info == NULL) {
+    		fprintf(stderr,
+"InnoDB: AWE: Failed to allocate page info array from process heap, error %lu\n",
+			(ulint)GetLastError());
+
+    		return(FALSE);
+  	}
+
+	ut_total_allocated_memory += PFNArraySize;
+
+  	/* Enable this process' privilege to lock pages to physical memory */
+
+	if (!os_awe_enable_lock_pages_in_mem()) {
+
+		return(FALSE);
+	}
+
+  	/* Allocate the physical memory */
+
+  	NumberOfPagesInitial = NumberOfPages;
+
+	os_awe_page_info = *page_info;
+	os_awe_n_pages = (ulint)NumberOfPages;
+
+	/* Compilation note: if the compiler complains the function is not
+	defined, see the note at the start of this file */
+
+ 	bResult = AllocateUserPhysicalPages(GetCurrentProcess(),
+                                       &NumberOfPages,
+                                       *page_info);
+  	if (bResult != TRUE) {
+    		fprintf(stderr,
+"InnoDB: AWE: Cannot allocate physical pages, error %lu.\n",
+			(ulint)GetLastError());
+
+    		return(FALSE);
+  	}
+
+  	if (NumberOfPagesInitial != NumberOfPages) {
+    		fprintf(stderr,
+"InnoDB: AWE: Error: allocated only %lu pages of %lu requested.\n"
+"InnoDB: Check that you have enough free RAM.\n"
+"InnoDB: In Windows XP Professional and 2000 Professional\n"
+"InnoDB: Windows PAE size is max 4 GB. In 2000 and .NET\n"
+"InnoDB: Advanced Servers and 2000 Datacenter Server it is 32 GB,\n"
+"InnoDB: and in .NET Datacenter Server it is 64 GB.\n"
+"InnoDB: A Microsoft web page said that the processor must be an Intel\n"
+"InnoDB: processor.\n",
+			(ulint)NumberOfPages,
+			(ulint)NumberOfPagesInitial);
+
+    		return(FALSE);
+  	}
+
+	fprintf(stderr,
+"InnoDB: Using Address Windowing Extensions (AWE); allocated %lu MB\n",
+		n_megabytes);
+
+	return(TRUE);	
+#else
+	return(FALSE);
+#endif
+}
+
+/********************************************************************
+Allocates a window in the virtual address space where we can map then
+pages of physical memory. */
+
+byte*
+os_awe_allocate_virtual_mem_window(
+/*===============================*/
+			/* out, own: allocated memory, or NULL if did not
+			succeed */
+	ulint	size)	/* in: virtual memory allocation size in bytes, must
+			be < 2 GB */
+{
+#ifdef UNIV_SIMULATE_AWE
+	ulint	i;
+
+	os_awe_simulate_window = ut_align(ut_malloc(4096 + size), 4096);
+	os_awe_simulate_window_size = size;
+
+	os_awe_simulate_map = ut_malloc(sizeof(byte*) * (size / 4096));
+
+	for (i = 0; i < (size / 4096); i++) {
+		*(os_awe_simulate_map + i) = NULL;
+	}
+
+	return(os_awe_simulate_window);
+	
+#elif defined(__WIN2000__)
+	byte*	ptr;
+
+	if (size > (ulint)0x7FFFFFFFUL) {
+		fprintf(stderr,
+"InnoDB: AWE: Cannot allocate %lu bytes of virtual memory\n", size);
+
+		return(NULL);
+	}
+	
+	ptr = VirtualAlloc(NULL, (SIZE_T)size, MEM_RESERVE | MEM_PHYSICAL,
+							PAGE_READWRITE);
+	if (ptr == NULL) {
+		fprintf(stderr,
+"InnoDB: AWE: Cannot allocate %lu bytes of virtual memory, error %lu\n",
+		size, (ulint)GetLastError());
+
+		return(NULL);
+	}
+
+	os_awe_window = ptr;
+	os_awe_window_size = size;
+
+	ut_total_allocated_memory += size;
+
+	return(ptr);
+#else
+	return(NULL);
+#endif
+}
+
+/********************************************************************
+With this function you can map parts of physical memory allocated with
+the ..._allocate_physical_mem to the virtual address space allocated with
+the previous function. Intel implements this so that the process page
+tables are updated accordingly. A test on a 1.5 GHz AMD processor and XP
+showed that this takes < 1 microsecond, much better than the estimated 80 us
+for copying a 16 kB page memory to memory. But, the operation will at least
+partially invalidate the translation lookaside buffer (TLB) of all
+processors. Under a real-world load the performance hit may be bigger. */
+
+ibool
+os_awe_map_physical_mem_to_window(
+/*==============================*/
+					/* out: TRUE if success; the function
+					calls exit(1) in case of an error */
+	byte*		ptr,		/* in: a page-aligned pointer to
+					somewhere in the virtual address
+					space window; we map the physical mem
+					pages here */
+	ulint		n_mem_pages,	/* in: number of 4 kB mem pages to
+					map */
+	os_awe_t*	page_info)	/* in: array of page infos for those
+					pages; each page has one slot in the
+					array */
+{
+#ifdef UNIV_SIMULATE_AWE
+	ulint	i;
+	byte**	map;
+	byte*	page;
+	byte*	phys_page;
+
+	ut_a(ptr >= os_awe_simulate_window);
+	ut_a(ptr < os_awe_simulate_window + os_awe_simulate_window_size);
+	ut_a(page_info >= os_awe_simulate_page_info);
+	ut_a(page_info < os_awe_simulate_page_info +
+			 		(os_awe_simulate_mem_size / 4096));
+
+	/* First look if some other 'physical pages' are mapped at ptr,
+	and copy them back to where they were if yes */
+
+	map = os_awe_simulate_map
+			+ ((ulint)(ptr - os_awe_simulate_window)) / 4096;
+	page = ptr;
+		
+	for (i = 0; i < n_mem_pages; i++) {
+		if (*map != NULL) {
+			ut_memcpy(*map, page, 4096);
+		}
+		map++;
+		page += 4096;
+	}
+
+	/* Then copy to ptr the 'physical pages' determined by page_info; we
+	assume page_info is a segment of the array we created at the start */
+
+	phys_page = os_awe_simulate_mem
+			+ (ulint)(page_info - os_awe_simulate_page_info)
+			  * 4096;
+
+	ut_memcpy(ptr, phys_page, n_mem_pages * 4096);
+
+	/* Update the map */
+
+	map = os_awe_simulate_map
+			+ ((ulint)(ptr - os_awe_simulate_window)) / 4096;
+
+	for (i = 0; i < n_mem_pages; i++) {
+		*map = phys_page;
+
+		map++;
+		phys_page += 4096;
+	}
+
+	return(TRUE);
+	
+#elif defined(__WIN2000__)
+	BOOL		bResult;
+	os_awe_t	n_pages;
+
+	n_pages = (os_awe_t)n_mem_pages;
+	
+	if (!(ptr >= os_awe_window)) {
+		fprintf(stderr,
+"InnoDB: AWE: Error: trying to map to address %lx but AWE window start %lx\n",
+		(ulint)ptr, (ulint)os_awe_window);
+		ut_a(0);
+	}
+
+	if (!(ptr <= os_awe_window + os_awe_window_size - UNIV_PAGE_SIZE)) {
+		fprintf(stderr,
+"InnoDB: AWE: Error: trying to map to address %lx but AWE window end %lx\n",
+		(ulint)ptr, (ulint)os_awe_window + os_awe_window_size);
+		ut_a(0);
+	}
+
+	if (!(page_info >= os_awe_page_info)) {
+		fprintf(stderr,
+"InnoDB: AWE: Error: trying to map page info at %lx but array start %lx\n",
+		(ulint)page_info, (ulint)os_awe_page_info);
+		ut_a(0);
+	}
+
+	if (!(page_info <= os_awe_page_info + (os_awe_n_pages - 4))) {
+		fprintf(stderr,
+"InnoDB: AWE: Error: trying to map page info at %lx but array end %lx\n",
+		(ulint)page_info, (ulint)(os_awe_page_info + os_awe_n_pages));
+		ut_a(0);
+	}
+
+	bResult = MapUserPhysicalPages((PVOID)ptr, n_pages, page_info);
+
+	if (bResult != TRUE) {
+		ut_print_timestamp(stderr);
+		fprintf(stderr,
+"  InnoDB: AWE: Mapping of %lu physical pages to address %lx failed,\n"
+"InnoDB: error %lu.\n"
+"InnoDB: Cannot continue operation.\n",
+			n_mem_pages, (ulint)ptr, (ulint)GetLastError());
+		exit(1);
+	}
+
+	return(TRUE);
+#else
+	return(FALSE);
+#endif
+}	
 
 /********************************************************************
 Converts the current process id to a number. It is not guaranteed that the
diff --git a/innobase/os/os0sync.c b/innobase/os/os0sync.c
index 827d68501db..7cbaf1f5123 100644
--- a/innobase/os/os0sync.c
+++ b/innobase/os/os0sync.c
@@ -125,7 +125,7 @@ os_event_create(
 	if (!event->handle) {
 	        fprintf(stderr,
 "InnoDB: Could not create a Windows event semaphore; Windows error %lu\n",
-		  (ulint)GetLastError());
+		  (ulong) GetLastError());
 	}
 #else /* Unix */
 	os_event_t	event;
@@ -182,7 +182,7 @@ os_event_create_auto(
 	if (!event->handle) {
 	        fprintf(stderr,
 "InnoDB: Could not create a Windows auto event semaphore; Windows error %lu\n",
-		  (ulint)GetLastError());
+		  (ulong) GetLastError());
 	}
 
         /* Put to the list of events */
@@ -412,7 +412,7 @@ os_event_wait_multiple(
 					FALSE,	   /* Wait for any 1 event */
 					INFINITE); /* Infinite wait time
 						   limit */
-	ut_a(index >= WAIT_OBJECT_0);
+	ut_a(index >= WAIT_OBJECT_0);	/* NOTE: Pointless comparision */
 	ut_a(index < WAIT_OBJECT_0 + n);
 
 	if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
diff --git a/innobase/page/page0cur.c b/innobase/page/page0cur.c
index b08efacf43a..eb2ba5b8bf8 100644
--- a/innobase/page/page0cur.c
+++ b/innobase/page/page0cur.c
@@ -629,7 +629,7 @@ page_cur_parse_insert_rec(
 		return(NULL);
 	}
 
-	extra_info_yes = end_seg_len & 0x1;
+	extra_info_yes = end_seg_len & 0x1UL;
 	end_seg_len = end_seg_len / 2;
 
 	if (end_seg_len >= UNIV_PAGE_SIZE) {
@@ -702,11 +702,14 @@ page_cur_parse_insert_rec(
 	/* Build the inserted record to buf */
 	
         if (mismatch_index >= UNIV_PAGE_SIZE) {
-               printf("Is short %lu, info_bits %lu, offset %lu, o_offset %lu\n"
+               printf(
+		"Is short %lu, info_bits %lu, offset %lu, o_offset %lu\n"
                     "mismatch index %lu, end_seg_len %lu\n"
                     "parsed len %lu\n",
-                    is_short, info_bits, offset, origin_offset,
-                    mismatch_index, end_seg_len, (ulint)(ptr - ptr2));
+		    (ulong) is_short, (ulong) info_bits, (ulong) offset,
+		    (ulong) origin_offset,
+		    (ulong) mismatch_index, (ulong) end_seg_len,
+		    (ulong) (ptr - ptr2));
 
 	       printf("Dump of 300 bytes of log:\n");
 	       ut_print_buf(ptr2, 300);
diff --git a/innobase/page/page0page.c b/innobase/page/page0page.c
index 21adcdea635..c64a7590b94 100644
--- a/innobase/page/page0page.c
+++ b/innobase/page/page0page.c
@@ -94,13 +94,13 @@ page_dir_find_owner_slot(
  		if (i == 0) {
 			fprintf(stderr,
 		"InnoDB: Probable data corruption on page %lu\n",
-			buf_frame_get_page_no(page));
+			(ulong) buf_frame_get_page_no(page));
 
 			rec_sprintf(err_buf, 900, original_rec);
 
 	  		fprintf(stderr,
 		"InnoDB: Original record %s\n"
-		"InnoDB: on that page. Steps %lu.\n", err_buf, steps);
+		"InnoDB: on that page. Steps %lu.\n", err_buf, (ulong) steps);
 
 			rec_sprintf(err_buf, 900, rec);
 
@@ -438,9 +438,9 @@ page_copy_rec_list_end_no_locks(
 
 			fprintf(stderr,
 "InnoDB: rec offset %lu, cur1 offset %lu, cur2 offset %lu\n",
-			      (ulint)(rec - page),
-			      (ulint)(page_cur_get_rec(&cur1) - page),
-			      (ulint)(page_cur_get_rec(&cur2) - new_page));
+			      (ulong)(rec - page),
+			      (ulong)(page_cur_get_rec(&cur1) - page),
+			      (ulong)(page_cur_get_rec(&cur2) - new_page));
 			ut_error;
 		}
 
@@ -554,7 +554,8 @@ byte*
 page_parse_delete_rec_list(
 /*=======================*/
 			/* out: end of log record or NULL */
-	byte	type,	/* in: MLOG_LIST_END_DELETE or MLOG_LIST_START_DELETE */
+	byte	type,	/* in: MLOG_LIST_END_DELETE or
+			MLOG_LIST_START_DELETE */
 	byte*	ptr,	/* in: buffer */
 	byte*	end_ptr,/* in: buffer end */
 	page_t*	page,	/* in: page or NULL */	
@@ -1123,9 +1124,9 @@ page_rec_print(
 	rec_print(rec);
 	printf(
      		"            n_owned: %lu; heap_no: %lu; next rec: %lu\n",
-		rec_get_n_owned(rec),
-		rec_get_heap_no(rec),
-		rec_get_next_offs(rec));
+		(ulong) rec_get_n_owned(rec),
+		(ulong) rec_get_heap_no(rec),
+		(ulong) rec_get_next_offs(rec));
 
 	page_rec_check(rec);
 	rec_validate(rec);
@@ -1149,9 +1150,9 @@ page_dir_print(
 	
 	printf("--------------------------------\n");
 	printf("PAGE DIRECTORY\n");
-	printf("Page address %lx\n", (ulint)page);
+	printf("Page address %lx\n", (ulong)page);
 	printf("Directory stack top at offs: %lu; number of slots: %lu\n", 
-		(ulint)(page_dir_get_nth_slot(page, n - 1) - page), n);
+		(ulong)(page_dir_get_nth_slot(page, n - 1) - page), (ulong) n);
 	for (i = 0; i < n; i++) {
 		slot = page_dir_get_nth_slot(page, i);
 		if ((i == pr_n) && (i < n - pr_n)) {
@@ -1160,11 +1161,11 @@ page_dir_print(
 	    	if ((i < pr_n) || (i >= n - pr_n)) {
 	   		printf(
 	   	   "Contents of slot: %lu: n_owned: %lu, rec offs: %lu\n",
-			i, page_dir_slot_get_n_owned(slot),
-			(ulint)(page_dir_slot_get_rec(slot) - page));
+			(ulong) i, (ulong) page_dir_slot_get_n_owned(slot),
+			(ulong)(page_dir_slot_get_rec(slot) - page));
 	    	}
 	}
-	printf("Total of %lu records\n", 2 + page_get_n_recs(page));	
+	printf("Total of %lu records\n", (ulong) (2 + page_get_n_recs(page)));
 	printf("--------------------------------\n");
 }	
 	
@@ -1185,7 +1186,7 @@ page_print_list(
 
 	printf("--------------------------------\n");
 	printf("PAGE RECORD LIST\n");
-	printf("Page address %lu\n", (ulint)page);
+	printf("Page address %lu\n", (ulong) page);
 
 	n_recs = page_get_n_recs(page);
 
@@ -1222,7 +1223,7 @@ page_print_list(
 		count++;	
 	}
 
-	printf("Total of %lu records \n", count + 1);	
+	printf("Total of %lu records \n", (ulong) (count + 1));
 	printf("--------------------------------\n");
 }	
 
@@ -1236,22 +1237,22 @@ page_header_print(
 {
 	printf("--------------------------------\n");
 	printf("PAGE HEADER INFO\n");
-	printf("Page address %lx, n records %lu\n", (ulint)page,
-		page_header_get_field(page, PAGE_N_RECS));
+	printf("Page address %lx, n records %lu\n", (ulong) page,
+	        (ulong) page_header_get_field(page, PAGE_N_RECS));
 
 	printf("n dir slots %lu, heap top %lu\n",
-		page_header_get_field(page, PAGE_N_DIR_SLOTS),
-		page_header_get_field(page, PAGE_HEAP_TOP));
+		(ulong) page_header_get_field(page, PAGE_N_DIR_SLOTS),
+		(ulong) page_header_get_field(page, PAGE_HEAP_TOP));
 
 	printf("Page n heap %lu, free %lu, garbage %lu\n",
-		page_header_get_field(page, PAGE_N_HEAP),
-		page_header_get_field(page, PAGE_FREE),
-		page_header_get_field(page, PAGE_GARBAGE));
+		(ulong) page_header_get_field(page, PAGE_N_HEAP),
+		(ulong) page_header_get_field(page, PAGE_FREE),
+		(ulong) page_header_get_field(page, PAGE_GARBAGE));
 
 	printf("Page last insert %lu, direction %lu, n direction %lu\n",
-		page_header_get_field(page, PAGE_LAST_INSERT),
-		page_header_get_field(page, PAGE_DIRECTION),
-		page_header_get_field(page, PAGE_N_DIRECTION));
+		(ulong) page_header_get_field(page, PAGE_LAST_INSERT),
+		(ulong) page_header_get_field(page, PAGE_DIRECTION),
+		(ulong) page_header_get_field(page, PAGE_N_DIRECTION));
 }
 
 /*******************************************************************
@@ -1296,15 +1297,15 @@ page_rec_validate(
 	if (!(n_owned <= PAGE_DIR_SLOT_MAX_N_OWNED)) {
 		fprintf(stderr,
 			"InnoDB: Dir slot of rec %lu, n owned too big %lu\n",
-				(ulint)(rec - page), n_owned);
+				(ulong)(rec - page), (ulong) n_owned);
 		return(FALSE);
 	}
 
 	if (!(heap_no < page_header_get_field(page, PAGE_N_HEAP))) {
 		fprintf(stderr,
 		"InnoDB: Heap no of rec %lu too big %lu %lu\n",
-				(ulint)(rec - page), heap_no,
-				page_header_get_field(page, PAGE_N_HEAP));
+				(ulong)(rec - page), (ulong) heap_no,
+				(ulong) page_header_get_field(page, PAGE_N_HEAP));
 		return(FALSE);
 	}
 	
@@ -1370,7 +1371,7 @@ page_simple_validate(
 
 	if (n_slots > UNIV_PAGE_SIZE / 4) {
 		fprintf(stderr,
-	"InnoDB: Nonsensical number %lu of page dir slots\n", n_slots);
+	"InnoDB: Nonsensical number %lu of page dir slots\n", (ulong) n_slots);
 
 		goto func_exit;
 	}
@@ -1381,8 +1382,8 @@ page_simple_validate(
 
 		fprintf(stderr,
     "InnoDB: Record heap and dir overlap on a page, heap top %lu, dir %lu\n",
-       		(ulint)(page_header_get_ptr(page, PAGE_HEAP_TOP) - page),
-       		(ulint)(page_dir_get_nth_slot(page, n_slots - 1) - page));
+       		(ulong)(page_header_get_ptr(page, PAGE_HEAP_TOP) - page),
+       		(ulong)(page_dir_get_nth_slot(page, n_slots - 1) - page));
 
        		goto func_exit;
        	}
@@ -1403,7 +1404,7 @@ page_simple_validate(
 		if (rec > rec_heap_top) {
 			fprintf(stderr,
 			"InnoDB: Record %lu is above rec heap top %lu\n",
-			(ulint)(rec - page), (ulint)(rec_heap_top - page));
+			(ulong)(rec - page), (ulong)(rec_heap_top - page));
 
 			goto func_exit;
 		}
@@ -1414,8 +1415,9 @@ page_simple_validate(
 
 				fprintf(stderr,
 		"InnoDB: Wrong owned count %lu, %lu, rec %lu\n",
-				rec_get_n_owned(rec), own_count,
-				(ulint)(rec - page));
+				(ulong) rec_get_n_owned(rec),
+				(ulong) own_count,
+				(ulong)(rec - page));
 
 				goto func_exit;
 			}
@@ -1423,7 +1425,7 @@ page_simple_validate(
 			if (page_dir_slot_get_rec(slot) != rec) {
 				fprintf(stderr,
 		"InnoDB: Dir slot does not point to right rec %lu\n",
-					(ulint)(rec - page));
+					(ulong)(rec - page));
 
 				goto func_exit;
 			}
@@ -1445,8 +1447,8 @@ page_simple_validate(
 				|| rec_get_next_offs(rec) >= UNIV_PAGE_SIZE) {
 			fprintf(stderr,
 		"InnoDB: Next record offset nonsensical %lu for rec %lu\n",
-			  rec_get_next_offs(rec),
-			  (ulint)(rec - page));
+			  (ulong) rec_get_next_offs(rec),
+			  (ulong)(rec - page));
 
 			goto func_exit;
 		}
@@ -1456,7 +1458,7 @@ page_simple_validate(
 		if (count > UNIV_PAGE_SIZE) {
 			fprintf(stderr,
 		"InnoDB: Page record list appears to be circular %lu\n",
-								count);
+								(ulong) count);
 			goto func_exit;
 		}
 		
@@ -1472,13 +1474,14 @@ page_simple_validate(
 		
 	if (slot_no != n_slots - 1) {
 		fprintf(stderr, "InnoDB: n slots wrong %lu, %lu\n",
-			slot_no, n_slots - 1);
+			(ulong) slot_no, (ulong) (n_slots - 1));
 		goto func_exit;
 	}		
 
 	if (page_header_get_field(page, PAGE_N_RECS) + 2 != count + 1) {
 		fprintf(stderr, "InnoDB: n recs wrong %lu %lu\n",
-		page_header_get_field(page, PAGE_N_RECS) + 2,  count + 1);
+		(ulong) page_header_get_field(page, PAGE_N_RECS) + 2,
+		(ulong) (count + 1));
 
 		goto func_exit;
 	}
@@ -1491,7 +1494,7 @@ page_simple_validate(
 				|| rec >= page + UNIV_PAGE_SIZE) {
 			fprintf(stderr,
 		"InnoDB: Free list record has a nonsensical offset %lu\n",
-			(ulint)(rec - page));
+			(ulong)(rec - page));
 
 			goto func_exit;
 		}
@@ -1499,7 +1502,7 @@ page_simple_validate(
 		if (rec > rec_heap_top) {
 			fprintf(stderr,
 		"InnoDB: Free list record %lu is above rec heap top %lu\n",
-			(ulint)(rec - page), (ulint)(rec_heap_top - page));
+			(ulong)(rec - page), (ulong)(rec_heap_top - page));
 
 			goto func_exit;
 		}
@@ -1509,7 +1512,7 @@ page_simple_validate(
 		if (count > UNIV_PAGE_SIZE) {
 			fprintf(stderr,
 		"InnoDB: Page free list appears to be circular %lu\n",
-								count);
+							    (ulong) count);
 			goto func_exit;
 		}
 
@@ -1519,7 +1522,8 @@ page_simple_validate(
 	if (page_header_get_field(page, PAGE_N_HEAP) != count + 1) {
 
 		fprintf(stderr, "InnoDB: N heap is wrong %lu, %lu\n",
-		page_header_get_field(page, PAGE_N_HEAP), count + 1);
+		(ulong) page_header_get_field(page, PAGE_N_HEAP),
+		(ulong) (count + 1));
 
 		goto func_exit;
 	}
@@ -1560,7 +1564,7 @@ page_validate(
 	if (!page_simple_validate(page)) {
 		fprintf(stderr,
 "InnoDB: Apparent corruption in page %lu in index %s in table %s\n",
-			buf_frame_get_page_no(page), index->name,
+			(ulong) buf_frame_get_page_no(page), index->name,
 			index->table_name);
 
 		buf_page_print(page);
@@ -1587,8 +1591,8 @@ page_validate(
 			page_dir_get_nth_slot(page, n_slots - 1))) {
 		fprintf(stderr,
 "InnoDB: Record heap and dir overlap on a page in index %s, %lu, %lu\n",
-       		index->name, (ulint)page_header_get_ptr(page, PAGE_HEAP_TOP),
-       		(ulint)page_dir_get_nth_slot(page, n_slots - 1));
+       		index->name, (ulong)page_header_get_ptr(page, PAGE_HEAP_TOP),
+       		(ulong)page_dir_get_nth_slot(page, n_slots - 1));
 
        		goto func_exit;
        	}
@@ -1615,7 +1619,7 @@ page_validate(
 			if (!(1 == cmp_rec_rec(rec, old_rec, index))) {
 				fprintf(stderr,
 "InnoDB: Records in wrong order on page %lu index %s table %s\n",
-					buf_frame_get_page_no(page),
+					(ulong) buf_frame_get_page_no(page),
 					index->name,
 					index->table_name);
 
@@ -1658,7 +1662,8 @@ page_validate(
 			if (rec_get_n_owned(rec) != own_count) {
 				fprintf(stderr,
 			"InnoDB: Wrong owned count %lu, %lu, in index %s\n",
-				rec_get_n_owned(rec), own_count,
+				(ulong) rec_get_n_owned(rec),
+				(ulong) own_count,
 				index->name);
 
 				goto func_exit;
@@ -1689,7 +1694,7 @@ page_validate(
 				|| rec_get_next_offs(rec) >= UNIV_PAGE_SIZE) {
 			fprintf(stderr,
 		"InnoDB: Next record offset wrong %lu in index %s\n",
-			  rec_get_next_offs(rec), index->name);
+			  (ulong) rec_get_next_offs(rec), index->name);
 
 			goto func_exit;
 		}
@@ -1709,13 +1714,14 @@ page_validate(
 		
 	if (slot_no != n_slots - 1) {
 		fprintf(stderr, "InnoDB: n slots wrong %lu %lu in index %s\n",
-			slot_no, n_slots - 1, index->name);
+			(ulong) slot_no, (ulong) (n_slots - 1), index->name);
 		goto func_exit;
 	}		
 
 	if (page_header_get_field(page, PAGE_N_RECS) + 2 != count + 1) {
 		fprintf(stderr, "InnoDB: n recs wrong %lu %lu in index %s\n",
-		page_header_get_field(page, PAGE_N_RECS) + 2,  count + 1,
+		(ulong) page_header_get_field(page, PAGE_N_RECS) + 2,
+		(ulong) (count + 1),
 		index->name);
 
 		goto func_exit;
@@ -1724,7 +1730,7 @@ page_validate(
 	if (data_size != page_get_data_size(page)) {
 		fprintf(stderr,
 		"InnoDB: Summed data size %lu, returned by func %lu\n",
-			data_size, page_get_data_size(page));
+			(ulong) data_size, (ulong) page_get_data_size(page));
 		goto func_exit;
 	}
 
@@ -1760,8 +1766,9 @@ page_validate(
 
 		fprintf(stderr,
 		"InnoDB: N heap is wrong %lu %lu in index %s\n",
-			page_header_get_field(page, PAGE_N_HEAP), count + 1,
-				index->name);
+			(ulong) page_header_get_field(page, PAGE_N_HEAP),
+			(ulong) count + 1,
+			index->name);
 		goto func_exit;
 	}
 
@@ -1773,7 +1780,7 @@ func_exit:
 	if (ret == FALSE) {
 		fprintf(stderr,
 "InnoDB: Apparent corruption in page %lu in index %s in table %s\n",
-			buf_frame_get_page_no(page), index->name,
+			(ulong) buf_frame_get_page_no(page), index->name,
 			index->table_name);
 
 		buf_page_print(page);
diff --git a/innobase/pars/lexyy.c b/innobase/pars/lexyy.c
index ab723cb635c..f014200b2a6 100644
--- a/innobase/pars/lexyy.c
+++ b/innobase/pars/lexyy.c
@@ -1,7 +1,7 @@
 /* A lexical scanner generated by flex */
 
 /* Scanner skeleton version:
- * $Header: /home/daffy/u0/vern/flex/RCS/flex.skl,v 2.91 96/09/10 16:58:48 vern Exp $
+ * $Header: /home/heikki/cvsroot/ib/pars/lexyy.c,v 1.2 2003/10/30 20:27:19 heikki Exp $
  */
 
 #define FLEX_SCANNER
diff --git a/innobase/pars/pars0opt.c b/innobase/pars/pars0opt.c
index 4faf83b47a3..9b0495a01cd 100644
--- a/innobase/pars/pars0opt.c
+++ b/innobase/pars/pars0opt.c
@@ -1235,7 +1235,8 @@ opt_print_query_plan(
 		printf(
 		"Table %s index %s; exact m. %lu, match %lu, end conds %lu\n",
 			plan->table->name, plan->index->name,
-			plan->n_exact_match, n_fields,
-			UT_LIST_GET_LEN(plan->end_conds));
+		        (unsigned long) plan->n_exact_match,
+		        (unsigned long) n_fields,
+			(unsigned long) UT_LIST_GET_LEN(plan->end_conds));
 	}
 }
diff --git a/innobase/que/que0que.c b/innobase/que/que0que.c
index 279f9fc21aa..127e7f84576 100644
--- a/innobase/que/que0que.c
+++ b/innobase/que/que0que.c
@@ -483,7 +483,7 @@ que_graph_free_recursive(
 		if (thr->magic_n != QUE_THR_MAGIC_N) {
 			fprintf(stderr,
 		"que_thr struct appears corrupt; magic n %lu\n",
-								thr->magic_n);
+				(unsigned long) thr->magic_n);
 			mem_analyze_corruption((byte*)thr);
 			ut_error;
 		}
@@ -595,7 +595,7 @@ que_graph_free_recursive(
 	default:
 		fprintf(stderr,
 		"que_node struct appears corrupt; type %lu\n",
-						que_node_get_type(node));
+			(unsigned long) que_node_get_type(node));
 		mem_analyze_corruption((byte*)node);
 		ut_error;
 	}
@@ -983,7 +983,8 @@ que_thr_move_to_run_state_for_mysql(
 {
 	if (thr->magic_n != QUE_THR_MAGIC_N) {
 		fprintf(stderr,
-	"que_thr struct appears corrupt; magic n %lu\n", thr->magic_n);
+	"que_thr struct appears corrupt; magic n %lu\n",
+			(unsigned long) thr->magic_n);
 
 		mem_analyze_corruption((byte*)thr);
 
@@ -1019,7 +1020,8 @@ que_thr_stop_for_mysql_no_error(
 		
 	if (thr->magic_n != QUE_THR_MAGIC_N) {
 		fprintf(stderr,
-	"que_thr struct appears corrupt; magic n %lu\n", thr->magic_n);
+	"que_thr struct appears corrupt; magic n %lu\n",
+			(unsigned long) thr->magic_n);
 
 		mem_analyze_corruption((byte*)thr);
 
@@ -1091,7 +1093,8 @@ que_node_print_info(
 		str = "UNKNOWN NODE TYPE";
 	}
 
-	fprintf(stderr, "Node type %lu: %s, address %p\n", type, str, node);
+	fprintf(stderr, "Node type %lu: %s, address %lx\n", (unsigned long) type, str,
+	       (unsigned long) addr);
 }
 
 /**************************************************************************
@@ -1250,10 +1253,6 @@ loop:
 		mutex_exit(&kernel_mutex);
 	}	
 */
-	/* TRUE below denotes that the thread is allowed to own the dictionary
-	mutex, though */
-	ut_ad(sync_thread_levels_empty_gen(TRUE));
-
 	loop_count++;
 
 	if (next_thr != thr) {
diff --git a/innobase/read/read0read.c b/innobase/read/read0read.c
index 64b6d87283d..889612deef4 100644
--- a/innobase/read/read0read.c
+++ b/innobase/read/read0read.c
@@ -236,16 +236,16 @@ read_view_print(
 	ulint	i;
 	
 	fprintf(stderr, "Read view low limit trx n:o %lu %lu\n",
-			ut_dulint_get_high(view->low_limit_no),
-			ut_dulint_get_low(view->low_limit_no));
+			(ulong) ut_dulint_get_high(view->low_limit_no),
+			(ulong) ut_dulint_get_low(view->low_limit_no));
 
 	fprintf(stderr, "Read view up limit trx id %lu %lu\n",
-			ut_dulint_get_high(view->up_limit_id),
-			ut_dulint_get_low(view->up_limit_id));		
+			(ulong) ut_dulint_get_high(view->up_limit_id),
+			(ulong) ut_dulint_get_low(view->up_limit_id));		
 
 	fprintf(stderr, "Read view low limit trx id %lu %lu\n",
-			ut_dulint_get_high(view->low_limit_id),
-			ut_dulint_get_low(view->low_limit_id));
+			(ulong) ut_dulint_get_high(view->low_limit_id),
+			(ulong) ut_dulint_get_low(view->low_limit_id));
 
 	fprintf(stderr, "Read view individually stored trx ids:\n");
 
@@ -253,7 +253,7 @@ read_view_print(
 
 	for (i = 0; i < n_ids; i++) {
 		fprintf(stderr, "Read view trx id %lu %lu\n",
-			ut_dulint_get_high(read_view_get_nth_trx_id(view, i)),
-			ut_dulint_get_low(read_view_get_nth_trx_id(view, i)));
+			(ulong) ut_dulint_get_high(read_view_get_nth_trx_id(view, i)),
+			(ulong) ut_dulint_get_low(read_view_get_nth_trx_id(view, i)));
 	}
 }
diff --git a/innobase/rem/rem0cmp.c b/innobase/rem/rem0cmp.c
index dea2621faf3..254ebeec8c9 100644
--- a/innobase/rem/rem0cmp.c
+++ b/innobase/rem/rem0cmp.c
@@ -61,10 +61,11 @@ must be a copy of the the one in ha_innobase.cc! */
 extern
 int
 innobase_mysql_cmp(
-/*===============*/	
+/*===============*/
 					/* out: 1, 0, -1, if a is greater,
 					equal, less than b, respectively */
-	int		mysql_type,	/* in: MySQL type */ 
+	int		mysql_type,	/* in: MySQL type */
+	uint		charset_number,	/* in: number of the charset */
 	unsigned char*	a,		/* in: data field */
 	unsigned int	a_length,	/* in: data field length,
 					not UNIV_SQL_NULL */
@@ -97,16 +98,28 @@ cmp_types_are_equal(
 	dtype_t*	type1,	/* in: type 1 */
 	dtype_t*	type2)	/* in: type 2 */
 {
-        if ((type1->mtype == DATA_VARCHAR && type2->mtype == DATA_CHAR)
-          || (type1->mtype == DATA_CHAR && type2->mtype == DATA_VARCHAR)
-          || (type1->mtype == DATA_FIXBINARY && type2->mtype == DATA_BINARY)
-          || (type1->mtype == DATA_BINARY && type2->mtype == DATA_FIXBINARY)
-          || (type1->mtype == DATA_MYSQL && type2->mtype == DATA_VARMYSQL)
-          || (type1->mtype == DATA_VARMYSQL && type2->mtype == DATA_MYSQL)) {
-
-                return(TRUE);
+	if (dtype_is_non_binary_string_type(type1->mtype, type1->prtype)
+	    && dtype_is_non_binary_string_type(type2->mtype, type2->prtype)) {
+
+		/* Both are non-binary string types: they can be compared if
+		and only if the charset-collation is the same */
+
+		if (dtype_get_charset_coll(type1->prtype)
+				== dtype_get_charset_coll(type2->prtype)) {
+			return(TRUE);
+		}
+
+		return(FALSE);
         }
 
+	if (dtype_is_binary_string_type(type1->mtype, type1->prtype)
+	    && dtype_is_binary_string_type(type2->mtype, type2->prtype)) {
+
+		/* Both are binary string types: they can be compared */
+
+		return(TRUE);
+	}
+	
         if (type1->mtype != type2->mtype) {
 
 		return(FALSE);
@@ -128,11 +141,6 @@ cmp_types_are_equal(
 		return(FALSE);
 	}
 
-	if (type1->mtype == DATA_BLOB && (type1->prtype & DATA_BINARY_TYPE)
-			           != (type2->prtype & DATA_BINARY_TYPE)) {
-	        return(FALSE);
-	} 
-
 	return(TRUE);
 }
 
@@ -269,10 +277,12 @@ cmp_whole_field(
 
 		return(innobase_mysql_cmp(
 				(int)(type->prtype & DATA_MYSQL_TYPE_MASK),
+				(uint)dtype_get_charset_coll(type->prtype),
 				a, a_length, b, b_length));
 	default:
 	        fprintf(stderr,
-			"InnoDB: unknown type number %lu\n", data_type);
+			"InnoDB: unknown type number %lu\n",
+			(ulong) data_type);
 	        ut_error;
 	}
 
@@ -321,7 +331,9 @@ cmp_data_data_slow(
 	
 	if (cur_type->mtype >= DATA_FLOAT
 	    || (cur_type->mtype == DATA_BLOB
-	        && (cur_type->prtype & DATA_NONLATIN1))) {
+	        && 0 == (cur_type->prtype & DATA_BINARY_TYPE)
+		&& dtype_get_charset_coll(cur_type->prtype) !=
+				data_mysql_latin1_swedish_charset_coll)) {
 
 		return(cmp_whole_field(cur_type, data1, len1, data2, len2));
 	}
@@ -522,8 +534,10 @@ cmp_dtuple_rec_with_match(
 		}
 
 		if (cur_type->mtype >= DATA_FLOAT
-		    || (cur_type->mtype == DATA_BLOB
-	                && (cur_type->prtype & DATA_NONLATIN1))) {
+	    	    || (cur_type->mtype == DATA_BLOB
+	        	&& 0 == (cur_type->prtype & DATA_BINARY_TYPE)
+			&& dtype_get_charset_coll(cur_type->prtype) !=
+				data_mysql_latin1_swedish_charset_coll)) {
 
 			ret = cmp_whole_field(cur_type,
 				dfield_get_data(dtuple_field), dtuple_f_len,
@@ -844,8 +858,10 @@ cmp_rec_rec_with_match(
 		}
 
 		if (cur_type->mtype >= DATA_FLOAT
-		    || (cur_type->mtype == DATA_BLOB
-	                && (cur_type->prtype & DATA_NONLATIN1))) {
+	    	    || (cur_type->mtype == DATA_BLOB
+	        	&& 0 == (cur_type->prtype & DATA_BINARY_TYPE)
+			&& dtype_get_charset_coll(cur_type->prtype) !=
+				data_mysql_latin1_swedish_charset_coll)) {
 
 			ret = cmp_whole_field(cur_type,
 						rec1_b_ptr, rec1_f_len,
diff --git a/innobase/rem/rem0rec.c b/innobase/rem/rem0rec.c
index fddc8eab761..3d0b997db85 100644
--- a/innobase/rem/rem0rec.c
+++ b/innobase/rem/rem0rec.c
@@ -107,7 +107,7 @@ rec_get_nth_field(
 
 	if (n > 1024) {
 		fprintf(stderr, "Error: trying to access field %lu in rec\n",
-									n);
+								(ulong) n);
 		ut_error;
 	}
 
@@ -474,7 +474,7 @@ rec_validate(
 
 	if ((n_fields == 0) || (n_fields > REC_MAX_N_FIELDS)) {
 		fprintf(stderr, "InnoDB: Error: record has %lu fields\n",
-								n_fields);
+							(ulong) n_fields);
 		return(FALSE);
 	}
 	
@@ -483,8 +483,8 @@ rec_validate(
 		
 		if (!((len < UNIV_PAGE_SIZE) || (len == UNIV_SQL_NULL))) {
 			fprintf(stderr,
-			"InnoDB: Error: record field %lu len %lu\n", i,
-								len);
+			"InnoDB: Error: record field %lu len %lu\n", (ulong) i,
+							(ulong) len);
 			return(FALSE);
 		}	
 
@@ -502,7 +502,8 @@ rec_validate(
 	if (len_sum != (ulint)(rec_get_end(rec) - rec)) {
 		fprintf(stderr,
 		"InnoDB: Error: record len should be %lu, len %lu\n",
-				len_sum, (ulint)(rec_get_end(rec) - rec));
+				(ulong) len_sum,
+			        (ulong) (rec_get_end(rec) - rec));
 		return(FALSE);
 	}	
 
@@ -537,13 +538,13 @@ rec_print(
 
 	printf(
 	    "PHYSICAL RECORD: n_fields %lu; 1-byte offs %s; info bits %lu\n",
-		n, offs, rec_get_info_bits(rec));
+		(ulong) n, offs, (ulong) rec_get_info_bits(rec));
 	
 	for (i = 0; i < n; i++) {
 
 		data = rec_get_nth_field(rec, i, &len);
 
-		printf(" %lu:", i);	
+		printf(" %lu:", (ulong) i);
 	
 		if (len != UNIV_SQL_NULL) {
 			if (len <= 30) {
@@ -556,7 +557,7 @@ rec_print(
 			}
 		} else {
 			printf(" SQL NULL, size %lu ",
-					rec_get_nth_field_size(rec, i));
+				      (ulong) rec_get_nth_field_size(rec, i));
 						
 		}
 		printf(";");
@@ -594,7 +595,8 @@ rec_sprintf(
 		return(k);
 	}
 	
-	k += sprintf(buf + k, "RECORD: info bits %lu", rec_get_info_bits(rec));
+	k += sprintf(buf + k, "RECORD: info bits %lu",
+		     (ulong) rec_get_info_bits(rec));
 	
 	for (i = 0; i < n; i++) {
 
@@ -605,7 +607,7 @@ rec_sprintf(
 		
 		data = rec_get_nth_field(rec, i, &len);
 
-		k += sprintf(buf + k, " %lu:", i);
+		k += sprintf(buf + k, " %lu:", (ulong) i);
 	
 		if (len != UNIV_SQL_NULL) {
 			if (k + 30 + 5 * len > buf_len) {
diff --git a/innobase/row/row0ins.c b/innobase/row/row0ins.c
index 84968ea4e20..fc1f7a19d53 100644
--- a/innobase/row/row0ins.c
+++ b/innobase/row/row0ins.c
@@ -1683,6 +1683,7 @@ row_ins_index_entry_low(
 	ulint		modify = 0; /* remove warning */
 	rec_t*		insert_rec;
 	rec_t*		rec;
+	rec_t*		first_rec;
 	ulint		err;
 	ulint		n_unique;
 	big_rec_t*	big_rec			= NULL;
@@ -1715,6 +1716,14 @@ row_ins_index_entry_low(
 		goto function_exit;
 	}	
 					
+	first_rec = page_rec_get_next(page_get_infimum_rec(
+			buf_frame_align(btr_cur_get_rec(&cursor))));
+
+	if (!page_rec_is_supremum(first_rec)) {
+		ut_a((rec_get_n_fields(first_rec))
+					== dtuple_get_n_fields(entry));
+	}
+
 	n_unique = dict_index_get_n_unique(index);
 
 	if (index->type & DICT_UNIQUE && (cursor.up_match >= n_unique
diff --git a/innobase/row/row0mysql.c b/innobase/row/row0mysql.c
index ab73dc2ad6d..efcca623a76 100644
--- a/innobase/row/row0mysql.c
+++ b/innobase/row/row0mysql.c
@@ -22,12 +22,15 @@ Created 9/17/2000 Heikki Tuuri
 #include "dict0dict.h"
 #include "dict0crea.h"
 #include "dict0load.h"
+#include "dict0boot.h"
 #include "trx0roll.h"
 #include "trx0purge.h"
 #include "lock0lock.h"
 #include "rem0cmp.h"
 #include "log0log.h"
 #include "btr0sea.h"
+#include "fil0fil.h"
+#include "ibuf0ibuf.h"
 
 /* A dummy variable used to fool the compiler */
 ibool	row_mysql_identically_false	= FALSE;
@@ -59,6 +62,19 @@ row_mysql_read_var_ref_noninline(
 }
 
 /***********************************************************************
+Frees the blob heap in prebuilt when no longer needed. */
+
+void
+row_mysql_prebuilt_free_blob_heap(
+/*==============================*/
+	row_prebuilt_t*	prebuilt)	/* in: prebuilt struct of a
+					ha_innobase:: table handle */
+{
+	mem_heap_free(prebuilt->blob_heap);
+	prebuilt->blob_heap = NULL;
+}
+
+/***********************************************************************
 Stores a reference to a BLOB in the MySQL format. */
 
 void
@@ -269,7 +285,8 @@ handle_new_error:
 	    "InnoDB: http://www.innodb.com/ibman.html for help.\n");
 
 	} else {
-		fprintf(stderr, "InnoDB: unknown error code %lu\n", err);
+		fprintf(stderr, "InnoDB: unknown error code %lu\n",
+			(ulong) err);
 		ut_error;
 	}		
 
@@ -379,7 +396,9 @@ row_prebuilt_free(
 		fprintf(stderr,
 "InnoDB: Error: trying to free a corrupt\n"
 "InnoDB: table handle. Magic n %lu, magic n2 %lu, table name %s\n",
-		prebuilt->magic_n, prebuilt->magic_n2, prebuilt->table->name);
+		(ulong) prebuilt->magic_n,
+		(ulong) prebuilt->magic_n2,
+		prebuilt->table->name);
 
 		mem_analyze_corruption((byte*)prebuilt);
 
@@ -459,7 +478,7 @@ row_update_prebuilt_trx(
 		fprintf(stderr,
 		"InnoDB: Error: trying to use a corrupt\n"
 		"InnoDB: trx handle. Magic n %lu\n",
-		trx->magic_n);
+		(ulong) trx->magic_n);
 
 		mem_analyze_corruption((byte*)trx);
 
@@ -470,7 +489,7 @@ row_update_prebuilt_trx(
 		fprintf(stderr,
 		"InnoDB: Error: trying to use a corrupt\n"
 		"InnoDB: table handle. Magic n %lu, table name %s\n",
-		prebuilt->magic_n, prebuilt->table->name);
+		(ulong) prebuilt->magic_n, prebuilt->table->name);
 
 		mem_analyze_corruption((byte*)prebuilt);
 
@@ -697,7 +716,7 @@ row_insert_for_mysql(
 		fprintf(stderr,
 		"InnoDB: Error: trying to free a corrupt\n"
 		"InnoDB: table handle. Magic n %lu, table name %s\n",
-		prebuilt->magic_n, prebuilt->table->name);
+		(ulong) prebuilt->magic_n, prebuilt->table->name);
 
 		mem_analyze_corruption((byte*)prebuilt);
 
@@ -913,7 +932,7 @@ row_update_for_mysql(
 		fprintf(stderr,
 		"InnoDB: Error: trying to free a corrupt\n"
 		"InnoDB: table handle. Magic n %lu, table name %s\n",
-		prebuilt->magic_n, prebuilt->table->name);
+		(ulong) prebuilt->magic_n, prebuilt->table->name);
 
 		mem_analyze_corruption((byte*)prebuilt);
 
@@ -1148,7 +1167,9 @@ row_mysql_recover_tmp_table(
 	trx_t*		trx)	/* in: transaction handle */
 {
 	char*	ptr;
-	char	old_name[1000];
+	char	old_name[OS_FILE_MAX_PATH];
+
+	ut_a(ut_strlen(table->name) + 10 < OS_FILE_MAX_PATH); 
 
 	ut_memcpy(old_name, table->name, ut_strlen(table->name) + 1);
 
@@ -1217,7 +1238,8 @@ row_mysql_lock_data_dictionary(
 /*===========================*/
 	trx_t*	trx)	/* in: transaction */
 {
-	ut_a(trx->dict_operation_lock_mode == 0);
+	ut_a(trx->dict_operation_lock_mode == 0
+	     || trx->dict_operation_lock_mode == RW_X_LATCH);
 	
 	/* Serialize data dictionary operations with dictionary mutex:
 	no deadlocks or lock waits can occur then in these operations */
@@ -1421,9 +1443,8 @@ row_create_table_for_mysql(
      "InnoDB: Warning: cannot create table %s because tablespace full\n",
 				 table->name);
 		     	row_drop_table_for_mysql(table->name, trx);
-		} else {
-		       	ut_a(err == DB_DUPLICATE_KEY);
 
+		} else if (err == DB_DUPLICATE_KEY) {
 	    		ut_print_timestamp(stderr);
 
 			fprintf(stderr, 
@@ -1439,9 +1460,12 @@ row_create_table_for_mysql(
      "InnoDB: database and moving the .frm file to the current database.\n"
      "InnoDB: Then MySQL thinks the table exists, and DROP TABLE will\n"
      "InnoDB: succeed.\n"
-     "InnoDB: You can look further help from section 15.1 of\n"
+     "InnoDB: You can look for further help from section 15.1 of\n"
      "InnoDB: http://www.innodb.com/ibman.html\n");
 		}
+		
+		/* We may also get err == DB_ERROR if the .ibd file for the
+		table already exists */
 
 		trx->error_state = DB_SUCCESS;
 	}
@@ -1482,7 +1506,7 @@ row_create_index_for_mysql(
 	trx->op_info = (char *) "creating index";
 
 	/* Check that the same column does not appear twice in the index.
-	Starting from 4.0.14 InnoDB should be able to cope with that, but
+	Starting from 4.0.14, InnoDB should be able to cope with that, but
 	safer not to allow them. */
 
 	for (i = 0; i < dict_index_get_n_fields(index); i++) {
@@ -1524,6 +1548,9 @@ row_create_index_for_mysql(
 
 	trx->dict_operation = TRUE;
 
+	/* Note that the space id where we store the index is inherited from
+	the table in dict_build_index_def_step() in dict0crea.c. */
+
 	node = ind_create_graph_create(index, heap);
 
 	thr = pars_complete_graph_for_exec(node, trx, heap);
@@ -1536,7 +1563,6 @@ row_create_index_for_mysql(
 	que_graph_free((que_t*) que_node_get_parent(thr));
 
 error_handling:
-
 	if (err != DB_SUCCESS) {
 		/* We have special error handling here */
 		
@@ -1801,6 +1827,261 @@ row_add_table_to_background_drop_list(
 }
 
 /*************************************************************************
+Discards the tablespace of a table which stored in an .ibd file. Discarding
+means that this function deletes the .ibd file and assigns a new table id for
+the table. Also the flag table->ibd_file_missing is set TRUE.
+
+How do we prevent crashes caused by ongoing operations on the table? Old
+operations could try to access non-existent pages.
+
+1) SQL queries, INSERT, SELECT, ...: we must get an exclusive MySQL table lock
+on the table before we can do DISCARD TABLESPACE. Then there are no running
+queries on the table.
+2) Purge and rollback: we assign a new table id for the table. Since purge and
+rollback look for the table based on the table id, they see the table as
+'dropped' and discard their operations.
+3) Insert buffer: we remove all entries for the tablespace in the insert
+buffer tree; as long as the tablespace mem object does not exist, ongoing
+insert buffer page merges are discarded in buf0rea.c. If we recreate the
+tablespace mem object with IMPORT TABLESPACE later, then the tablespace will
+have the same id, but the tablespace_version field in the mem object is
+different, and ongoing old insert buffer page merges get discarded.
+4) Linear readahead and random readahead: we use the same method as in 3) to
+discard ongoing operations. */
+
+int
+row_discard_tablespace_for_mysql(
+/*=============================*/
+			/* out: error code or DB_SUCCESS */
+	char*	name,	/* in: table name */
+	trx_t*	trx)	/* in: transaction handle */
+{
+	dulint		new_id;
+	dict_table_t*	table;
+	que_thr_t*	thr;
+	que_t*		graph			= NULL;
+	ibool		success;
+	ulint		err;
+	char		buf[2 * OS_FILE_MAX_PATH];
+
+	ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
+
+	trx->op_info = (char *) "discarding tablespace";
+	trx_start_if_not_started(trx);
+
+	/* Serialize data dictionary operations with dictionary mutex:
+	no deadlocks can occur then in these operations */
+
+	row_mysql_lock_data_dictionary(trx);
+
+	table = dict_table_get_low(name);
+
+	if (!table) {
+		err = DB_TABLE_NOT_FOUND;
+
+		goto funct_exit;
+	}
+
+	if (table->space == 0) {
+		ut_print_timestamp(stderr);
+		fprintf(stderr,
+"  InnoDB: Error: table %s\n"
+"InnoDB: is in the system tablespace 0 which cannot be discarded\n", name);
+		err = DB_ERROR;
+
+		goto funct_exit;
+	}
+
+	new_id = dict_hdr_get_new_id(DICT_HDR_TABLE_ID);
+
+	sprintf(buf, 
+	"PROCEDURE DISCARD_TABLESPACE_PROC () IS\n"
+	"old_id CHAR;\n"
+	"new_id CHAR;\n"
+	"new_id_low INT;\n"
+	"new_id_high INT;\n"
+	"table_name CHAR;\n"
+	"BEGIN\n"
+	"table_name :='%s';\n"
+	"new_id_high := %lu;\n"
+	"new_id_low := %lu;\n"
+   "new_id := CONCAT(TO_BINARY(new_id_high, 4), TO_BINARY(new_id_low, 4));\n"
+	"SELECT ID INTO old_id\n"
+	"FROM SYS_TABLES\n"
+	"WHERE NAME = table_name;\n"
+	"IF (SQL %% NOTFOUND) THEN\n"
+	"	COMMIT WORK;\n"
+	"	RETURN;\n"
+	"END IF;\n"
+	"UPDATE SYS_TABLES SET ID = new_id\n"
+	"WHERE ID = old_id;\n"
+	"UPDATE SYS_COLUMNS SET TABLE_ID = new_id\n"
+	"WHERE TABLE_ID = old_id;\n"
+	"UPDATE SYS_INDEXES SET TABLE_ID = new_id\n"
+	"WHERE TABLE_ID = old_id;\n"
+	"COMMIT WORK;\n"
+	"END;\n", name, (ulong) ut_dulint_get_high(new_id),
+		(ulong) ut_dulint_get_low(new_id));
+
+	ut_a(strlen(buf) < 2 * OS_FILE_MAX_PATH);
+
+	graph = pars_sql(buf);
+
+	ut_a(graph);
+
+	graph->trx = trx;
+	trx->graph = NULL;
+
+	graph->fork_type = QUE_FORK_MYSQL_INTERFACE;
+
+	ut_a(thr = que_fork_start_command(graph, SESS_COMM_EXECUTE, 0));
+
+	que_run_threads(thr);
+
+	err = trx->error_state;
+
+	if (err != DB_SUCCESS) {
+		trx->error_state = DB_SUCCESS;
+		trx_general_rollback_for_mysql(trx, FALSE, NULL);
+		trx->error_state = DB_SUCCESS;
+	} else {
+		dict_table_change_id_in_cache(table, new_id);
+
+		success = fil_discard_tablespace(table->space);
+
+		if (!success) {
+			trx->error_state = DB_SUCCESS;
+			trx_general_rollback_for_mysql(trx, FALSE, NULL);
+			trx->error_state = DB_SUCCESS;
+
+			err = DB_ERROR;
+		} else {
+			/* Set the flag which tells that now it is legal to
+			IMPORT a tablespace for this table */
+			table->tablespace_discarded = TRUE;
+			table->ibd_file_missing = TRUE;
+		}
+	}
+funct_exit:	
+	row_mysql_unlock_data_dictionary(trx);
+
+	if (graph) {
+		que_graph_free(graph);
+	}
+
+  	trx_commit_for_mysql(trx);
+
+	trx->op_info = (char *) "";
+
+	return((int) err);
+}
+
+/*********************************************************************
+Imports a tablespace. The space id in the .ibd file must match the space id
+of the table in the data dictionary. */
+
+int
+row_import_tablespace_for_mysql(
+/*============================*/
+			/* out: error code or DB_SUCCESS */
+	char*	name,	/* in: table name */
+	trx_t*	trx)	/* in: transaction handle */
+{
+	dict_table_t*	table;
+	ibool		success;
+	dulint		current_lsn;
+	ulint		err		= DB_SUCCESS;
+
+	ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
+
+	trx_start_if_not_started(trx);
+
+	trx->op_info = (char*) "importing tablespace";
+
+	current_lsn = log_get_lsn();
+	
+	/* It is possible, though very improbable, that the lsn's in the
+	tablespace to be imported have risen above the current system lsn, if
+	a lengthy purge, ibuf merge, or rollback was performed on a backup
+	taken with ibbackup. If that is the case, reset page lsn's in the
+	file. We assume that mysqld was shut down after it performed these
+	cleanup operations on the .ibd file, so that it stamped the latest lsn
+	to the FIL_PAGE_FILE_FLUSH_LSN in the first page of the .ibd file.
+
+	TODO: reset also the trx id's in clustered index records and write
+	a new space id to each data page. That would allow us to import clean
+	.ibd files from another MySQL installation. */
+
+	success = fil_reset_too_high_lsns(name, current_lsn);
+
+	if (!success) {
+		err = DB_ERROR;
+
+		row_mysql_lock_data_dictionary(trx);
+
+		goto funct_exit;
+	}
+
+	/* Serialize data dictionary operations with dictionary mutex:
+	no deadlocks can occur then in these operations */
+
+	row_mysql_lock_data_dictionary(trx);
+
+	table = dict_table_get_low(name);
+
+	if (!table) {
+		err = DB_TABLE_NOT_FOUND;
+
+		goto funct_exit;
+	}
+
+	if (table->space == 0) {
+		ut_print_timestamp(stderr);
+		fprintf(stderr,
+"  InnoDB: Error: table %s\n"
+"InnoDB: is in the system tablespace 0 which cannot be imported\n", name);
+		err = DB_ERROR;
+
+		goto funct_exit;
+	}
+
+	if (!table->tablespace_discarded) {
+		ut_print_timestamp(stderr);
+		fprintf(stderr,
+"  InnoDB: Error: you are trying to IMPORT a tablespace\n"
+"InnoDB: %s, though you have not called DISCARD on it yet\n"
+"InnoDB: during the lifetime of the mysqld process!\n", name);
+
+		err = DB_ERROR;
+
+		goto funct_exit;
+	}
+
+	/* Play safe and remove all insert buffer entries, though we should
+	have removed them already when DISCARD TABLESPACE was called */
+
+	ibuf_delete_for_discarded_space(table->space);
+
+	success = fil_open_single_table_tablespace(table->space, table->name);
+
+	if (success) {
+		table->ibd_file_missing = FALSE;
+		table->tablespace_discarded = FALSE;
+	} else {
+		err = DB_ERROR;
+	}
+
+funct_exit:	
+	row_mysql_unlock_data_dictionary(trx);
+
+  	trx_commit_for_mysql(trx);
+
+	trx->op_info = (char *) "";
+
+	return((int) err);
+}
+
+/*************************************************************************
 Drops a table for MySQL. If the name of the dropped table ends to
 characters INNODB_MONITOR, then this also stops printing of monitor
 output by the master thread. */
@@ -1808,12 +2089,13 @@ output by the master thread. */
 int
 row_drop_table_for_mysql(
 /*=====================*/
-				/* out: error code or DB_SUCCESS */
-	char*	name,		/* in: table name */
-	trx_t*	trx)		/* in: transaction handle */
+			/* out: error code or DB_SUCCESS */
+	char*	name,	/* in: table name */
+	trx_t*	trx)	/* in: transaction handle */
 {
 	dict_foreign_t*	foreign;
 	dict_table_t*	table;
+	ulint		space_id;
 	que_thr_t*	thr;
 	que_t*		graph;
 	ulint		err;
@@ -1822,8 +2104,9 @@ row_drop_table_for_mysql(
 	ulint		len;
 	ulint		namelen;
 	ulint		keywordlen;
+	ibool		success;
 	ibool		locked_dictionary	= FALSE;
-	char		buf[10000];
+	char		buf[OS_FILE_MAX_PATH + 2000];
 
 	ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
 	ut_a(name != NULL);
@@ -1964,6 +2247,8 @@ row_drop_table_for_mysql(
 
 	ut_memcpy(buf + len, str2, ut_strlen(str2) + 1);
 
+	ut_a(strlen(buf) < OS_FILE_MAX_PATH + 2000);
+
 	/* Serialize data dictionary operations with dictionary mutex:
 	no deadlocks can occur then in these operations */
 
@@ -1997,11 +2282,12 @@ row_drop_table_for_mysql(
 	    	ut_print_timestamp(stderr);
 
 		fprintf(stderr, 
-     	"  InnoDB: Error: table %s does not exist in the InnoDB internal\n"
+     	"  InnoDB: Error: table %s\n"
+	"InnoDB: does not exist in the InnoDB internal\n"
      	"InnoDB: data dictionary though MySQL is trying to drop it.\n"
      	"InnoDB: Have you copied the .frm file of the table to the\n"
 	"InnoDB: MySQL database directory from another database?\n"
-	"InnoDB: You can look further help from section 15.1 of\n"
+	"InnoDB: You can look for further help from section 15.1 of\n"
         "InnoDB: http://www.innodb.com/ibman.html\n",
 				 name);
 		goto funct_exit;
@@ -2093,13 +2379,37 @@ row_drop_table_for_mysql(
 
 		ut_error;
 	} else {
+		space_id = table->space;
 		dict_table_remove_from_cache(table);
 
 		if (dict_load_table(name) != NULL) {
 			ut_print_timestamp(stderr);
 			fprintf(stderr,
-"  InnoDB: Error: dropping of table %s failed!\n", name);
+"  InnoDB: Error: not able to remove table %s from the dictionary cache!\n",
+									name);
+			err = DB_ERROR;
+		}
+
+		/* Do not drop possible .ibd tablespace if something went
+		wrong: we do not want to delete valuable data of the user */
+
+		if (err == DB_SUCCESS && space_id > 0) {
+			if (!fil_space_for_table_exists_in_mem(space_id, name,
+								FALSE, TRUE)) {
+				err = DB_ERROR;
+
+				goto funct_exit;
+			}
+
+			success = fil_delete_tablespace(space_id);
 
+			if (!success) {
+				ut_print_timestamp(stderr);
+				fprintf(stderr,
+"  InnoDB: Error: not able to delete tablespace %lu of table %s!\n",
+					(ulong) space_id, name);
+				err = DB_ERROR;
+			}
 		}
 	}
 funct_exit:
@@ -2176,7 +2486,7 @@ loop:
 		if (err != DB_SUCCESS) {
 			fprintf(stderr,
 	"InnoDB: DROP DATABASE %s failed with error %lu for table %s\n",
-				name, (ulint)err, table_name);
+				name, (ulong) err, table_name);
 			break;
 		}
 	}
@@ -2233,13 +2543,14 @@ row_rename_table_for_mysql(
 	mem_heap_t*	heap			= NULL;
 	char**		constraints_to_drop	= NULL;
 	ulint		n_constraints_to_drop	= 0;
-        ibool           recovering_temp_table   = FALSE;
-        ulint           namelen;
-        ulint           keywordlen;
+	ibool		recovering_temp_table	= FALSE;
+	ulint		namelen;
+	ulint		keywordlen;
 	ulint		len;
 	ulint		i;
 	char*		db_name;
-	char		buf[10000];
+	ibool		success;
+	char		buf[2 * OS_FILE_MAX_PATH];
 
 	ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
 	ut_a(old_name != NULL);
@@ -2273,21 +2584,21 @@ row_rename_table_for_mysql(
 	trx->op_info = (char *) "renaming table";
 	trx_start_if_not_started(trx);
 
-        namelen = ut_strlen(new_name);
+	namelen = ut_strlen(new_name);
 
-        keywordlen = ut_strlen("_recover_innodb_tmp_table");
+	keywordlen = ut_strlen("_recover_innodb_tmp_table");
 
-        if (namelen >= keywordlen
-                    && 0 == ut_memcmp(new_name + namelen - keywordlen,
-                     (char*)"_recover_innodb_tmp_table", keywordlen)) {
+	if (namelen >= keywordlen
+		    && 0 == ut_memcmp(new_name + namelen - keywordlen,
+ 		     (char*)"_recover_innodb_tmp_table", keywordlen)) {
 
-                recovering_temp_table = TRUE;
-        }
+		recovering_temp_table = TRUE;
+	}
 
 	/* Serialize data dictionary operations with dictionary mutex:
 	no deadlocks can occur then in these operations */
 
-	if (!recovering_temp_table) {		
+	if (!recovering_temp_table) {
 		row_mysql_lock_data_dictionary(trx);
 	}
 
@@ -2295,7 +2606,30 @@ row_rename_table_for_mysql(
 
 	if (!table) {
 		err = DB_TABLE_NOT_FOUND;
+	    	ut_print_timestamp(stderr);
+
+		fprintf(stderr, 
+     	"  InnoDB: Error: table %s\n"
+	"InnoDB: does not exist in the InnoDB internal\n"
+     	"InnoDB: data dictionary though MySQL is trying to rename the table.\n"
+     	"InnoDB: Have you copied the .frm file of the table to the\n"
+	"InnoDB: MySQL database directory from another database?\n"
+	"InnoDB: You can look for further help from section 15.1 of\n"
+        "InnoDB: http://www.innodb.com/ibman.html\n",
+				 old_name);
+		goto funct_exit;
+	}
+
+	if (table->ibd_file_missing) {
+		err = DB_TABLE_NOT_FOUND;
+	    	ut_print_timestamp(stderr);
 
+		fprintf(stderr, 
+     	"  InnoDB: Error: table %s\n"
+	"InnoDB: does not have an .ibd file in the database directory.\n"
+	"InnoDB: You can look for further help from section 15.1 of\n"
+        "InnoDB: http://www.innodb.com/ibman.html\n",
+				 old_name);
 		goto funct_exit;
 	}
 
@@ -2449,6 +2783,8 @@ row_rename_table_for_mysql(
 
 	ut_memcpy(buf + len, str3, ut_strlen(str3) + 1);
 	
+	ut_a(strlen(buf) < 2 * OS_FILE_MAX_PATH);
+
 	graph = pars_sql(buf);
 
 	ut_a(graph);
@@ -2467,20 +2803,17 @@ row_rename_table_for_mysql(
 	if (err != DB_SUCCESS) {
 		if (err == DB_DUPLICATE_KEY) {
 	    		ut_print_timestamp(stderr);
-
 			fprintf(stderr,
      "  InnoDB: Error: table %s exists in the InnoDB internal data\n"
      "InnoDB: dictionary though MySQL is trying rename table %s to it.\n"
      "InnoDB: Have you deleted the .frm file and not used DROP TABLE?\n"
-     "InnoDB: You can look further help from section 15.1 of\n"
+     "InnoDB: You can look for further help from section 15.1 of\n"
      "InnoDB: http://www.innodb.com/ibman.html\n",
 			new_name, old_name);
-
 			fprintf(stderr,
      "InnoDB: If table %s is a temporary table #sql..., then it can be that\n"
      "InnoDB: there are still queries running on the table, and it will be\n"
      "InnoDB: dropped automatically when the queries end.\n", new_name);
-			
 			fprintf(stderr,
      "InnoDB: You can drop the orphaned table inside InnoDB by\n"
      "InnoDB: creating an InnoDB table with the same name in another\n"
@@ -2488,13 +2821,27 @@ row_rename_table_for_mysql(
      "InnoDB: Then MySQL thinks the table exists, and DROP TABLE will\n"
      "InnoDB: succeed.\n");
 		}
-
 		trx->error_state = DB_SUCCESS;
 		trx_general_rollback_for_mysql(trx, FALSE, NULL);
 		trx->error_state = DB_SUCCESS;
 	} else {
-		ut_a(dict_table_rename_in_cache(table, new_name,
-				!row_is_mysql_tmp_table_name(new_name)));
+		/* The following call will also rename the .ibd data file if
+		the table is stored in a single-table tablespace */
+
+		success = dict_table_rename_in_cache(table, new_name,
+				!row_is_mysql_tmp_table_name(new_name));
+		if (!success) {
+			trx->error_state = DB_SUCCESS;
+			trx_general_rollback_for_mysql(trx, FALSE, NULL);
+			trx->error_state = DB_SUCCESS;
+			ut_print_timestamp(stderr);
+				fprintf(stderr,
+" InnoDB: Error in table rename, cannot rename %s to %s\n", old_name,
+								new_name);
+			err = DB_ERROR;
+
+			goto funct_exit;
+		}
 
 		if (row_is_mysql_tmp_table_name(old_name)) {
 
@@ -2508,18 +2855,14 @@ row_rename_table_for_mysql(
 			err = dict_load_foreigns(new_name);
 
 			if (err != DB_SUCCESS) {
-
 	    			ut_print_timestamp(stderr);
-
 				fprintf(stderr,
      "  InnoDB: Error: in ALTER TABLE table %s\n"
      "InnoDB: has or is referenced in foreign key constraints\n"
      "InnoDB: which are not compatible with the new table definition.\n",
      new_name);
-     
 				ut_a(dict_table_rename_in_cache(table,
 							old_name, FALSE));
-						
 				trx->error_state = DB_SUCCESS;
 				trx_general_rollback_for_mysql(trx, FALSE,
 									NULL);
@@ -2527,8 +2870,8 @@ row_rename_table_for_mysql(
 			}
 		}
 	}
-funct_exit:
-	if (!recovering_temp_table) {		
+funct_exit:	
+	if (!recovering_temp_table) {
 		row_mysql_unlock_data_dictionary(trx);
 	}
 
@@ -2687,7 +3030,7 @@ row_check_table_for_mysql(
 	ulint		n_rows_in_table	= ULINT_UNDEFINED;
 	ulint		ret 		= DB_SUCCESS;
 	ulint		old_isolation_level;
-	
+
 	prebuilt->trx->op_info = (char *) "checking table";
 
 	old_isolation_level = prebuilt->trx->isolation_level;
@@ -2723,7 +3066,8 @@ row_check_table_for_mysql(
  
 				fprintf(stderr,
 		"Error: index %s contains %lu entries, should be %lu\n",
-					index->name, n_rows, n_rows_in_table);
+					index->name, (ulong) n_rows,
+					(ulong) n_rows_in_table);
 			}
 		}
 
diff --git a/innobase/row/row0purge.c b/innobase/row/row0purge.c
index bd3742ad589..a409b64f8e4 100644
--- a/innobase/row/row0purge.c
+++ b/innobase/row/row0purge.c
@@ -531,6 +531,16 @@ row_purge_parse_undo_rec(
 		return(FALSE);
 	}
 
+	if (node->table->ibd_file_missing) {
+		/* We skip purge of missing .ibd files */
+
+		node->table = NULL;
+
+		row_mysql_unfreeze_data_dictionary(trx);
+
+		return(FALSE);
+	}
+
 	clust_index = dict_table_get_first_index(node->table);
 
 	if (clust_index == NULL) {
diff --git a/innobase/row/row0sel.c b/innobase/row/row0sel.c
index 6ae4f791205..4f70cea2058 100644
--- a/innobase/row/row0sel.c
+++ b/innobase/row/row0sel.c
@@ -1756,7 +1756,7 @@ row_sel_step(
 		return(NULL);
 	} else {
 		/* SQL error detected */
-		printf("SQL error %lu\n", err);
+		printf("SQL error %lu\n", (ulong) err);
 
 		que_thr_handle_error(thr, DB_ERROR, NULL, 0);
 
@@ -1806,7 +1806,7 @@ fetch_step(
 	
 	if (sel_node->state == SEL_NODE_CLOSED) {
 		/* SQL error detected */
-		printf("SQL error %lu\n", (ulint)DB_ERROR);
+		printf("SQL error %lu\n", (ulong) DB_ERROR);
 
 		que_thr_handle_error(thr, DB_ERROR, NULL, 0);
 
@@ -1903,6 +1903,7 @@ row_sel_convert_mysql_key_to_innobase(
 	ulint		key_len)	/* in: MySQL key value length */
 {
 	byte*		original_buf	= buf;
+	byte*		original_key_ptr = key_ptr;
 	dict_field_t*	field;
 	dfield_t*	dfield;
 	ulint		data_offset;
@@ -2026,7 +2027,16 @@ row_sel_convert_mysql_key_to_innobase(
 		        ut_print_timestamp(stderr);
 			
 			fprintf(stderr,
-  "  InnoDB: Warning: using a partial-field key prefix in search\n");
+  "  InnoDB: Warning: using a partial-field key prefix in search.\n"
+  "InnoDB: Table name %s, index name %s. Last data field length %lu bytes,\n"
+  "InnoDB: key ptr now exceeds key end by %lu bytes.\n"
+  "InnoDB: Key value in the MySQL format:\n", index->table_name, index->name,
+					  (ulong) data_field_len,
+					  (ulong) (key_ptr - key_end));
+			fflush(stderr);
+			ut_print_buf(original_key_ptr, key_len);
+			fflush(stdout);
+			fprintf(stderr, "\n");
 
 			if (!is_null) {
 			        dfield->len -= (ulint)(key_ptr - key_end);
@@ -2069,8 +2079,8 @@ row_sel_store_row_id_to_prebuilt(
 	        fprintf(stderr,
 "InnoDB: Error: Row id field is wrong length %lu in table %s index %s\n"
 "InnoDB: Field number %lu, record:\n%s\n",
-		      len, index->table_name, index->name,
-		      dict_index_get_sys_col_pos(index, DATA_ROW_ID),
+		      (ulong) len, index->table_name, index->name,
+		      (ulong) dict_index_get_sys_col_pos(index, DATA_ROW_ID),
 		      err_buf);
 		ut_error;
 	}
@@ -2150,9 +2160,13 @@ Note that the template in prebuilt may advise us to copy only a few
 columns to mysql_rec, other columns are left blank. All columns may not
 be needed in the query. */
 static
-void
+ibool
 row_sel_store_mysql_rec(
 /*====================*/
+					/* out: TRUE if success, FALSE if
+					could not allocate memory for a BLOB
+					(though we may also assert in that
+					case) */
 	byte*		mysql_rec,	/* out: row in the MySQL format */
 	row_prebuilt_t*	prebuilt,	/* in: prebuilt struct */
 	rec_t*		rec)		/* in: Innobase record in the index
@@ -2164,6 +2178,7 @@ row_sel_store_mysql_rec(
 	byte*			data;
 	ulint			len;
 	byte*			blob_buf;
+	int			pad_char;
 	ulint			i;
 	
 	ut_ad(prebuilt->mysql_template);
@@ -2173,9 +2188,10 @@ row_sel_store_mysql_rec(
 		prebuilt->blob_heap = NULL;
 	}
 
-	/* Mark all columns as not SQL NULL */
+	/* MySQL assumes that all columns have the SQL NULL bit set unless it
+	is a nullable column with a non-NULL value */
 
-	memset(mysql_rec, '\0', prebuilt->null_bitmap_len);
+	memset(mysql_rec, 0xFF, prebuilt->null_bitmap_len);
 
 	for (i = 0; i < prebuilt->n_template; i++) {
 
@@ -2192,6 +2208,10 @@ row_sel_store_mysql_rec(
 
 			extern_field_heap = mem_heap_create(UNIV_PAGE_SIZE);
 
+			/* NOTE: if we are retrieving a big BLOB, we may
+			already run out of memory in the next call, which
+			causes an assert */
+
 			data = btr_rec_copy_externally_stored_field(rec,
 					templ->rec_field_no, &len,
 					extern_field_heap);
@@ -2203,9 +2223,29 @@ row_sel_store_mysql_rec(
 			if (templ->type == DATA_BLOB) {
 
 				ut_a(prebuilt->templ_contains_blob);
-				
-				/* Copy the BLOB data to the BLOB
-				heap of prebuilt */
+
+				/* A heuristic test that we can allocate the
+				memory for a big BLOB. We have a safety margin
+				of 1000000 bytes. Since the test takes some
+				CPU time, we do not use it for small BLOBs. */
+
+				if (len > 2000000
+				    && !ut_test_malloc(len + 1000000)) {
+
+					ut_print_timestamp(stderr);
+					fprintf(stderr,
+"  InnoDB: Warning: could not allocate %lu + 1000000 bytes to retrieve\n"
+"InnoDB: a big column. Table name %s\n", (ulong) len, prebuilt->table->name);
+
+					if (extern_field_heap) {
+						mem_heap_free(
+							extern_field_heap);
+					}
+					return(FALSE);
+				}
+
+				/* Copy the BLOB data to the BLOB heap of
+				prebuilt */
 
 				if (prebuilt->blob_heap == NULL) {
 					prebuilt->blob_heap =
@@ -2223,31 +2263,46 @@ row_sel_store_mysql_rec(
 				mysql_rec + templ->mysql_col_offset,
 				templ->mysql_col_len, data, len,
 				templ->type, templ->is_unsigned);
-
+				
+			/* Cleanup */
 			if (extern_field_heap) {
  				mem_heap_free(extern_field_heap);
 				extern_field_heap = NULL;
  			}
+			
+			if (templ->mysql_null_bit_mask) {
+				/* It is a nullable column with a non-NULL
+				value */
+				mysql_rec[templ->mysql_null_byte_offset] &=
+					~(byte) (templ->mysql_null_bit_mask);
+			}
 		} else {
 		        /* MySQL seems to assume the field for an SQL NULL
-		        value is set to zero. Not taking this into account
-		        caused seg faults with NULL BLOB fields, and
+		        value is set to zero or space. Not taking this into
+			account caused seg faults with NULL BLOB fields, and
 		        bug number 154 in the MySQL bug database: GROUP BY
 		        and DISTINCT could treat NULL values inequal. */
 
-		        memset(mysql_rec + templ->mysql_col_offset, '\0',
-			       templ->mysql_col_len);
-
-			if (!templ->mysql_null_bit_mask) {
-				fprintf(stderr,
-"InnoDB: Error: trying to return an SQL NULL field in a non-null\n"
-"innoDB: column! Table name %s\n", prebuilt->table->name);
+			if (templ->type == DATA_VARCHAR
+			    || templ->type == DATA_CHAR
+			    || templ->type == DATA_BINARY
+			    || templ->type == DATA_FIXBINARY
+			    || templ->type == DATA_MYSQL
+			    || templ->type == DATA_VARMYSQL) {
+			        /* MySQL pads all non-BLOB and non-TEXT
+				string types with space ' ' */
+			    
+				pad_char = ' ';
 			} else {
-				mysql_rec[templ->mysql_null_byte_offset] |=
-					(byte) (templ->mysql_null_bit_mask);
+				pad_char = '\0';
 			}
+
+			memset(mysql_rec + templ->mysql_col_offset, pad_char,
+							templ->mysql_col_len);
 		}
 	} 
+
+	return(TRUE);
 }
 
 /*************************************************************************
@@ -2572,9 +2627,9 @@ row_sel_push_cache_row_for_mysql(
 
 	ut_ad(prebuilt->fetch_cache_first == 0);
 
-	row_sel_store_mysql_rec(
+	ut_a(row_sel_store_mysql_rec(
 			prebuilt->fetch_cache[prebuilt->n_fetch_cached],
-			prebuilt, rec);
+			prebuilt, rec));
 
 	prebuilt->n_fetch_cached++;
 }
@@ -2656,7 +2711,8 @@ row_search_for_mysql(
 /*=================*/
 					/* out: DB_SUCCESS,
 					DB_RECORD_NOT_FOUND, 
-					DB_END_OF_INDEX, or DB_DEADLOCK */
+					DB_END_OF_INDEX, DB_DEADLOCK,
+					or DB_TOO_BIG_RECORD */
 	byte*		buf,		/* in/out: buffer for the fetched
 					row in the MySQL format */
 	ulint		mode,		/* in: search mode PAGE_CUR_L, ... */
@@ -2712,13 +2768,25 @@ row_search_for_mysql(
 		fprintf(stderr,
 		"InnoDB: Error: trying to free a corrupt\n"
 		"InnoDB: table handle. Magic n %lu, table name %s\n",
-		prebuilt->magic_n, prebuilt->table->name);
+		(ulong) prebuilt->magic_n, prebuilt->table->name);
 
 		mem_analyze_corruption((byte*)prebuilt);
 
 		ut_error;
 	}
 
+	if (trx->n_mysql_tables_in_use == 0) {
+		char	err_buf[1000];
+
+		trx_print(err_buf, trx);
+
+		fprintf(stderr,
+"InnoDB: Error: MySQL is trying to perform a SELECT\n"
+"InnoDB: but it has not locked any tables in ::external_lock()!\n%s\n",
+			err_buf);
+		ut_a(0);
+	}
+
 /*	printf("Match mode %lu\n search tuple ", match_mode);
 	dtuple_print(search_tuple);
 	
@@ -2836,10 +2904,10 @@ row_search_for_mysql(
 		retrieve also a second row if a primary key contains more than
 		1 column. Return immediately if this is not a HANDLER
 		command. */
-		
+
 		if (direction != 0 && !prebuilt->used_in_HANDLER) {
-		        
-			trx->op_info = (char*)"";
+        
+			trx->op_info = (char *) "";
 			return(DB_RECORD_NOT_FOUND);
 		}
 	}
@@ -2895,7 +2963,14 @@ row_search_for_mysql(
 #ifdef UNIV_SEARCH_DEBUG
 				ut_a(0 == cmp_dtuple_rec(search_tuple, rec));
 #endif 
-				row_sel_store_mysql_rec(buf, prebuilt, rec);
+				if (!row_sel_store_mysql_rec(buf, prebuilt,
+								rec)) {
+ 					err = DB_TOO_BIG_RECORD;
+
+					/* We let the main loop to do the
+					error handling */
+ 					goto shortcut_fails_too_big_rec;
+				}
 	
  				mtr_commit(&mtr);
 
@@ -2942,7 +3017,7 @@ row_search_for_mysql(
 
 				return(DB_RECORD_NOT_FOUND);
 			}
-
+shortcut_fails_too_big_rec:
 			mtr_commit(&mtr);
 			mtr_start(&mtr);
 		}
@@ -3018,6 +3093,18 @@ row_search_for_mysql(
 	if (!prebuilt->sql_stat_start) {
 		/* No need to set an intention lock or assign a read view */
 
+		if (trx->read_view == NULL
+		    && prebuilt->select_lock_type == LOCK_NONE) {
+			char	err_buf[1000];
+
+			trx_print(err_buf, trx);
+
+			fprintf(stderr,
+"InnoDB: Error: MySQL is trying to perform a consistent read\n"
+"InnoDB: but the read view is not assigned!\n%s\n", err_buf);
+			
+			ut_a(0);
+		}
 	} else if (prebuilt->select_lock_type == LOCK_NONE) {
 		/* This is a consistent read */	
 		/* Assign a read view for the query */
@@ -3093,8 +3180,9 @@ rec_loop:
 "  InnoDB: Index corruption: rec offs %lu next offs %lu, page no %lu,\n"
 "InnoDB: index %s, table %s. Run CHECK TABLE to table. You may need to\n"
 "InnoDB: restore from a backup, or dump + drop + reimport the table.\n",
-			   (ulint)(rec - buf_frame_align(rec)), next_offs,
-			   buf_frame_get_page_no(rec), index->name,
+			   (ulong) (rec - buf_frame_align(rec)),
+			   (ulong) next_offs,
+			   (ulong) buf_frame_get_page_no(rec), index->name,
 			   index->table_name);
 		
 			err = DB_CORRUPTION;
@@ -3107,8 +3195,9 @@ rec_loop:
 			fprintf(stderr,
 "InnoDB: Index corruption: rec offs %lu next offs %lu, page no %lu,\n"
 "InnoDB: index %s, table %s. We try to skip the rest of the page.\n",
-			   (ulint)(rec - buf_frame_align(rec)), next_offs,
-			   buf_frame_get_page_no(rec), index->name,
+			   (ulong) (rec - buf_frame_align(rec)),
+			   (ulong) next_offs,
+			   (ulong) buf_frame_get_page_no(rec), index->name,
 			   index->table_name);
 
 			btr_pcur_move_to_last_on_page(pcur, &mtr);
@@ -3123,8 +3212,9 @@ rec_loop:
 			fprintf(stderr,
 "InnoDB: Index record corruption: rec offs %lu next offs %lu, page no %lu,\n"
 "InnoDB: index %s, table %s. We try to skip the record.\n",
-			   (ulint)(rec - buf_frame_align(rec)), next_offs,
-			   buf_frame_get_page_no(rec), index->name,
+			   (ulong) (rec - buf_frame_align(rec)),
+			   (ulong) next_offs,
+			   (ulong) buf_frame_get_page_no(rec), index->name,
 			   index->table_name);
 
 			goto next_rec;
@@ -3351,7 +3441,11 @@ rec_loop:
 						rec_get_size(rec));
 			mach_write_to_4(buf, rec_get_extra_size(rec) + 4);
 		} else {
-			row_sel_store_mysql_rec(buf, prebuilt, rec);
+			if (!row_sel_store_mysql_rec(buf, prebuilt, rec)) {
+				err = DB_TOO_BIG_RECORD;
+
+				goto lock_wait_or_error;
+			}
 		}
 
 		if (prebuilt->clust_index_was_generated) {
diff --git a/innobase/row/row0uins.c b/innobase/row/row0uins.c
index fff67dcd627..08f0e29c839 100644
--- a/innobase/row/row0uins.c
+++ b/innobase/row/row0uins.c
@@ -258,6 +258,13 @@ row_undo_ins_parse_undo_rec(
 		return;
 	}
 
+	if (node->table->ibd_file_missing) {
+		/* We skip undo operations to missing .ibd files */
+		node->table = NULL;
+
+		return;
+	}
+
 	clust_index = dict_table_get_first_index(node->table);
 	
 	ptr = trx_undo_rec_get_row_ref(ptr, clust_index, &(node->ref),
diff --git a/innobase/row/row0umod.c b/innobase/row/row0umod.c
index 34c3aaf9142..1bfd71f8c64 100644
--- a/innobase/row/row0umod.c
+++ b/innobase/row/row0umod.c
@@ -708,6 +708,13 @@ row_undo_mod_parse_undo_rec(
 	        return;
 	}
 
+	if (node->table->ibd_file_missing) {
+		/* We skip undo operations to missing .ibd files */
+		node->table = NULL;
+
+		return;
+	}
+
 	clust_index = dict_table_get_first_index(node->table);
 
 	ptr = trx_undo_update_rec_get_sys_cols(ptr, &trx_id, &roll_ptr,
diff --git a/innobase/row/row0undo.c b/innobase/row/row0undo.c
index 78cfe70c260..613d0a3b890 100644
--- a/innobase/row/row0undo.c
+++ b/innobase/row/row0undo.c
@@ -323,7 +323,8 @@ row_undo_step(
 	if (err != DB_SUCCESS) {
 		/* SQL error detected */
 
-		fprintf(stderr, "InnoDB: Fatal error %lu in rollback.\n", err);
+		fprintf(stderr, "InnoDB: Fatal error %lu in rollback.\n",
+			(ulong) err);
 
 		if (err == DB_OUT_OF_FILE_SPACE) {
 			fprintf(stderr,
diff --git a/innobase/row/row0upd.c b/innobase/row/row0upd.c
index ad6542845cb..f8739b65c2f 100644
--- a/innobase/row/row0upd.c
+++ b/innobase/row/row0upd.c
@@ -1605,7 +1605,8 @@ row_upd_clust_step(
 	then we have to free the file segments of the index tree associated
 	with the index */
 
-	if (ut_dulint_cmp(node->table->id, DICT_INDEXES_ID) == 0) {
+	if (node->is_delete
+	    && ut_dulint_cmp(node->table->id, DICT_INDEXES_ID) == 0) {
 
 		dict_drop_index_tree(btr_pcur_get_rec(pcur), mtr);
 
diff --git a/innobase/srv/srv0srv.c b/innobase/srv/srv0srv.c
index 0be0ab957af..838e63b3e25 100644
--- a/innobase/srv/srv0srv.c
+++ b/innobase/srv/srv0srv.c
@@ -72,6 +72,10 @@ names, where the file name itself may also contain a path */
 char*	srv_data_home 	= NULL;
 char*	srv_arch_dir 	= NULL;
 
+ibool	srv_file_per_table = FALSE;	/* store to its own file each table
+					created by an user; data dictionary
+					tables are in the system tablespace
+					0 */
 ulint	srv_n_data_files = 0;
 char**	srv_data_file_names = NULL;
 ulint*	srv_data_file_sizes = NULL;	/* size in database pages */ 
@@ -95,7 +99,7 @@ char**	srv_log_group_home_dirs = NULL;
 ulint	srv_n_log_groups	= ULINT_MAX;
 ulint	srv_n_log_files		= ULINT_MAX;
 ulint	srv_log_file_size	= ULINT_MAX;	/* size in database pages */ 
-ibool	srv_log_archive_on	= TRUE;
+ibool	srv_log_archive_on	= FALSE;
 ulint	srv_log_buffer_size	= ULINT_MAX;	/* size in database pages */ 
 ulint	srv_flush_log_at_trx_commit = 1;
 
@@ -137,9 +141,14 @@ byte	srv_latin1_ordering[256]	/* The sort order table of the latin1
 , 0xD8, 0x55, 0x55, 0x55, 0x59, 0x59, 0xDE, 0xFF
 };
 		
-ulint	srv_pool_size		= ULINT_MAX;	/* size in database pages;
-						MySQL originally sets this
-						value in bytes */ 
+ulint	srv_pool_size		= ULINT_MAX;	/* size in pages; MySQL inits
+						this to size in kilobytes but
+						we normalize this to pages in
+						srv_boot() */
+ulint	srv_awe_window_size 	= 0;		/* size in pages; MySQL inits
+						this to bytes, but we
+						normalize it to pages in
+						srv_boot() */
 ulint	srv_mem_pool_size	= ULINT_MAX;	/* size in bytes */ 
 ulint	srv_lock_table_size	= ULINT_MAX;
 
@@ -154,6 +163,8 @@ char*   srv_file_flush_method_str = NULL;
 ulint   srv_unix_file_flush_method = SRV_UNIX_FDATASYNC;
 ulint   srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
 
+ulint	srv_max_n_open_files	  = 300;
+
 /* The InnoDB main thread tries to keep the ratio of modified pages
 in the buffer pool to all database pages in the buffer pool smaller than
 the following number. But it is not guaranteed that the value stays below
@@ -211,7 +222,7 @@ struct srv_conc_slot_struct{
 
 UT_LIST_BASE_NODE_T(srv_conc_slot_t)	srv_conc_queue;	/* queue of threads
 							waiting to get in */
-srv_conc_slot_t*			srv_conc_slots;	/* array of wait
+srv_conc_slot_t* srv_conc_slots;			/* array of wait
 							slots */
 
 /* Number of times a thread is allowed to enter InnoDB within the same
@@ -228,6 +239,13 @@ ibool	srv_use_doublewrite_buf	= TRUE;
 
 ibool   srv_set_thread_priorities = TRUE;
 int     srv_query_thread_priority = 0;
+
+/* TRUE if the Address Windowing Extensions of Windows are used; then we must
+disable adaptive hash indexes */
+ibool	srv_use_awe			= FALSE;
+ibool	srv_use_adaptive_hash_indexes 	= TRUE;
+
+
 /*-------------------------------------------*/
 ulint	srv_n_spin_wait_rounds	= 20;
 ulint	srv_spin_wait_delay	= 5;
@@ -615,7 +633,8 @@ srv_suspend_thread(void)
 	if (srv_print_thread_releases) {
 	
 		printf("Suspending thread %lu to slot %lu meter %lu\n",
-		os_thread_get_curr_id(), slot_no, srv_meter[SRV_RECOVERY]);
+		(ulong) os_thread_get_curr_id(), (ulong) slot_no,
+		(ulong) srv_meter[SRV_RECOVERY]);
 	}
 
 	slot = srv_table_get_nth_slot(slot_no);
@@ -677,7 +696,8 @@ srv_release_threads(
 			if (srv_print_thread_releases) {
 				printf(
 		"Releasing thread %lu type %lu from slot %lu meter %lu\n",
-				slot->id, type, i, srv_meter[SRV_RECOVERY]);
+				(ulong) slot->id, (ulong) type, (ulong) i,
+		                (ulong) srv_meter[SRV_RECOVERY]);
 			}
 
 			count++;
@@ -784,7 +804,7 @@ srv_init(void)
 	UT_LIST_INIT(srv_conc_queue);
 
 	srv_conc_slots = mem_alloc(OS_THREAD_MAX_N * sizeof(srv_conc_slot_t));
-	
+
 	for (i = 0; i < OS_THREAD_MAX_N; i++) {
 		conc_slot = srv_conc_slots + i;
 		conc_slot->reserved = FALSE;
@@ -908,6 +928,7 @@ retry:
 		slot = srv_conc_slots + i;
 
 		if (!slot->reserved) {
+
 			break;
 		}
 	}
@@ -1102,9 +1123,19 @@ srv_normalize_init_values(void)
 
 	srv_log_buffer_size = srv_log_buffer_size / UNIV_PAGE_SIZE;
 
-	srv_pool_size = srv_pool_size / UNIV_PAGE_SIZE;
+	srv_pool_size = srv_pool_size / (UNIV_PAGE_SIZE / 1024);
+
+	srv_awe_window_size = srv_awe_window_size / UNIV_PAGE_SIZE;
 	
-	srv_lock_table_size = 20 * srv_pool_size;
+	if (srv_use_awe) {
+	        /* If we are using AWE we must save memory in the 32-bit
+		address space of the process, and cannot bind the lock
+		table size to the real buffer pool size. */
+
+	        srv_lock_table_size = 20 * srv_awe_window_size;
+	} else {
+	        srv_lock_table_size = 5 * srv_pool_size;
+	}
 
 	return(DB_SUCCESS);
 }
@@ -1170,7 +1201,7 @@ srv_table_reserve_slot_for_mysql(void)
 "  InnoDB: There appear to be %lu MySQL threads currently waiting\n"
 "InnoDB: inside InnoDB, which is the upper limit. Cannot continue operation.\n"
 "InnoDB: We intentionally generate a seg fault to print a stack trace\n"
-"InnoDB: on Linux. But first we print a list of waiting threads.\n", i);
+"InnoDB: on Linux. But first we print a list of waiting threads.\n", (ulong) i);
 
 			for (i = 0; i < OS_THREAD_MAX_N; i++) {
 
@@ -1178,10 +1209,10 @@ srv_table_reserve_slot_for_mysql(void)
 
 			        fprintf(stderr,
 "Slot %lu: thread id %lu, type %lu, in use %lu, susp %lu, time %lu\n",
-				  i, os_thread_pf(slot->id),
-				  slot->type, slot->in_use,
-				  slot->suspended,
-			  (ulint)difftime(ut_time(), slot->suspend_time));
+				  (ulong) i, (ulong) os_thread_pf(slot->id),
+				  (ulong) slot->type, (ulong) slot->in_use,
+				  (ulong) slot->suspended,
+			  (ulong) difftime(ut_time(), slot->suspend_time));
 			}
 
 		        ut_error;
@@ -1400,7 +1431,7 @@ srv_sprintf_innodb_monitor(
 	char*	buf_end	= buf + len - 2000;
 	double	time_elapsed;
 	time_t	current_time;
-	ulint	n_reserved;
+	ulint   n_reserved;
 
 	mutex_enter(&srv_innodb_monitor_mutex);
 
@@ -1428,7 +1459,7 @@ srv_sprintf_innodb_monitor(
 
 	buf += sprintf(buf,
 "Per second averages calculated from the last %lu seconds\n",
-					(ulint)time_elapsed);
+					(ulong) time_elapsed);
 	       	       
 	buf += sprintf(buf, "----------\n"
 		       "SEMAPHORES\n"
@@ -1501,9 +1532,24 @@ srv_sprintf_innodb_monitor(
 		       "BUFFER POOL AND MEMORY\n"
 		       "----------------------\n");
 	buf += sprintf(buf,
-	"Total memory allocated %lu; in additional pool allocated %lu\n",
+	"Total memory allocated " ULINTPF
+	"; in additional pool allocated " ULINTPF "\n",
 				ut_total_allocated_memory,
 				mem_pool_get_reserved(mem_comm_pool));
+
+	if (mem_out_of_mem_err_msg_count > 0) {
+	        buf += sprintf(buf,
+	"Mem allocation has spilled out of additional mem pool" ULINTPF
+						"times\n",
+					mem_out_of_mem_err_msg_count);
+	}
+
+	if (srv_use_awe) {
+		buf += sprintf(buf,
+	"In addition to that %lu MB of AWE memory allocated\n",
+		(ulong) (srv_pool_size / ((1024 * 1024) / UNIV_PAGE_SIZE)));
+	}
+	
 	buf_print_io(buf, buf_end);
 	buf = buf + strlen(buf);
 	ut_a(buf < buf_end + 1500);
@@ -1512,30 +1558,32 @@ srv_sprintf_innodb_monitor(
 		       "ROW OPERATIONS\n"
 		       "--------------\n");
 	buf += sprintf(buf,
-	"%ld queries inside InnoDB, %lu queries in queue\n",
-			srv_conc_n_threads, srv_conc_n_waiting_threads);
+        "%ld queries inside InnoDB, %lu queries in queue\n",
+		       (long) srv_conc_n_threads,
+		       (ulong) srv_conc_n_waiting_threads);
 
-	n_reserved = fil_space_get_n_reserved_extents(0);
-	if (n_reserved > 0) {
-	        buf += sprintf(buf,
-	"%lu tablespace extents now reserved for B-tree split operations\n",
-						    n_reserved);
-	}
+        n_reserved = fil_space_get_n_reserved_extents(0);
+        if (n_reserved > 0) {
+                buf += sprintf(buf,
+        "%lu tablespace extents now reserved for B-tree split operations\n",
+                                                    (ulong) n_reserved);
+        }
 
 #ifdef UNIV_LINUX
 	buf += sprintf(buf,
 	"Main thread process no. %lu, id %lu, state: %.29s\n",
-			srv_main_thread_process_no,
-			srv_main_thread_id,
-			srv_main_thread_op_info);
+		       (ulong) srv_main_thread_process_no,
+		       (ulong) srv_main_thread_id,
+		       srv_main_thread_op_info);
 #else
 	buf += sprintf(buf,
 	"Main thread id %lu, state: %.29s\n",
-			srv_main_thread_id,
+			(ulong) srv_main_thread_id,
 			srv_main_thread_op_info);
 #endif
 	buf += sprintf(buf,
-	"Number of rows inserted %lu, updated %lu, deleted %lu, read %lu\n",
+	"Number of rows inserted " ULINTPF
+	", updated " ULINTPF ", deleted " ULINTPF ", read " ULINTPF "\n",
 			srv_n_rows_inserted, 
 			srv_n_rows_updated, 
 			srv_n_rows_deleted, 
@@ -1760,8 +1808,13 @@ srv_error_monitor_thread(
 			os_thread_create */
 {
 	ulint	cnt	= 0;
+	dulint	old_lsn;
+	dulint	new_lsn;
 
 	UT_NOT_USED(arg);
+
+	old_lsn = srv_start_lsn;
+
 #ifdef UNIV_DEBUG_THREAD_CREATION
 	printf("Error monitor thread starts, id %lu\n",
 			      os_thread_pf(os_thread_get_curr_id()));
@@ -1771,7 +1824,24 @@ loop:
 
 	cnt++;
 
-	os_thread_sleep(2000000);
+	/* Try to track a strange bug reported by Harald Fuchs and others,
+	where the lsn seems to decrease at times */
+
+	new_lsn = log_get_lsn();
+
+	if (ut_dulint_cmp(new_lsn, old_lsn) < 0) {
+		ut_print_timestamp(stderr);
+		fprintf(stderr,
+"  InnoDB: Error: old log sequence number %lu %lu was greater\n"
+"InnoDB: than the new log sequence number %lu %lu!\n"
+"InnoDB: Please send a bug report to mysql@lists.mysql.com\n",
+		(ulong) ut_dulint_get_high(old_lsn),
+		(ulong) ut_dulint_get_low(old_lsn),
+		(ulong) ut_dulint_get_high(new_lsn),
+		(ulong) ut_dulint_get_low(new_lsn));
+	}
+
+	old_lsn = new_lsn;
 
 	if (difftime(time(NULL), srv_last_monitor_time) > 60) {
 		/* We referesh InnoDB Monitor values so that averages are
@@ -1788,6 +1858,8 @@ loop:
 	fflush(stderr);
 	fflush(stdout);
 
+	os_thread_sleep(2000000);
+
 	if (srv_shutdown_state < SRV_SHUTDOWN_LAST_PHASE) {
 
 		goto loop;
@@ -1955,6 +2027,9 @@ loop:
 		srv_main_thread_op_info = (char*)"flushing log";
 		log_buffer_flush_to_disk();
 
+		srv_main_thread_op_info = (char*)"making checkpoint";
+		log_free_check();
+
 		/* If there were less than 5 i/os during the
 		one second sleep, we assume that there is free
 		disk i/o capacity available, and it makes sense to
@@ -2218,11 +2293,13 @@ flush_loop:
 		goto loop;
 	}
 	mutex_exit(&kernel_mutex);
-
+/*
 	srv_main_thread_op_info =
 				(char*)"archiving log (if log archive is on)";
 	
 	log_archive_do(FALSE, &n_bytes_archived);
+*/
+	n_bytes_archived = 0;
 
 	/* Keep looping in the background loop if still work to do */
 
@@ -2287,7 +2364,7 @@ suspend_thread:
 	os_thread_exit(NULL);
 
 #ifndef __WIN__
-        return(NULL);
+        return(NULL);				/* Not reached */
 #else
 	return(0);
 #endif
diff --git a/innobase/srv/srv0start.c b/innobase/srv/srv0start.c
index 0491aed29f5..7b50877709b 100644
--- a/innobase/srv/srv0start.c
+++ b/innobase/srv/srv0start.c
@@ -30,6 +30,7 @@ Created 2/16/1996 Heikki Tuuri
 #include "page0cur.h"
 #include "trx0trx.h"
 #include "dict0boot.h"
+#include "dict0load.h"
 #include "trx0sys.h"
 #include "dict0crea.h"
 #include "btr0btr.h"
@@ -55,6 +56,13 @@ Created 2/16/1996 Heikki Tuuri
 #include "srv0start.h"
 #include "que0que.h"
 
+/* Log sequence number immediately after startup */
+dulint		srv_start_lsn;
+/* Log sequence number at shutdown */
+dulint		srv_shutdown_lsn;
+
+ibool		srv_start_raw_disk_in_use  = FALSE;
+
 ibool           srv_start_has_been_called  = FALSE;
 
 ulint           srv_sizeof_trx_t_in_ha_innodb_cc;
@@ -86,13 +94,6 @@ ibool srv_os_test_mutex_is_locked = FALSE;
 #define SRV_N_PENDING_IOS_PER_THREAD 	OS_AIO_N_PENDING_IOS_PER_THREAD
 #define SRV_MAX_N_PENDING_SYNC_IOS	100
 
-/* The following limit may be too big in some old operating systems:
-we may get an assertion failure in os0file.c */
-
-#define SRV_MAX_N_OPEN_FILES		500
-
-#define SRV_LOG_SPACE_FIRST_ID		1000000000
-
 /*************************************************************************
 Reads the data files and their sizes from a character string given in
 the .cnf file. */
@@ -136,7 +137,8 @@ srv_parse_data_file_paths_and_sizes(
 
 		while ((*str != ':' && *str != '\0')
 		       || (*str == ':'
-			   && (*(str + 1) == '\\' || *(str + 1) == '/'))) {
+			   && (*(str + 1) == '\\' || *(str + 1) == '/'
+					     || *(str + 1) == ':'))) {
 			str++;
 		}
 
@@ -233,11 +235,15 @@ srv_parse_data_file_paths_and_sizes(
 	while (*str != '\0') {
 		path = str;
 
-		/* Note that we must ignore the ':' in a Windows path */
+		/* Note that we must step over the ':' in a Windows path;
+		a Windows path normally looks like C:\ibdata\ibdata1:1G, but
+		a Windows raw partition may have a specification like
+		\\.\C::1Gnewraw or \\.\PHYSICALDRIVE2:1Gnewraw */
 
 		while ((*str != ':' && *str != '\0')
 		       || (*str == ':'
-			   && (*(str + 1) == '\\' || *(str + 1) == '/'))) {
+			   && (*(str + 1) == '\\' || *(str + 1) == '/'
+			        || *(str + 1) == ':'))) {
 			str++;
 		}
 
@@ -433,7 +439,7 @@ io_handler_thread(
 	os_thread_exit(NULL);
 
 #ifndef __WIN__
-	return(NULL);
+	return(NULL);				/* Not reached */
 #else
 	return(0);
 #endif
@@ -451,7 +457,8 @@ Normalizes a directory path for Windows: converts slashes to backslashes. */
 void
 srv_normalize_path_for_win(
 /*=======================*/
-	char*	str __attribute__((unused)))	/* in/out: null-terminated character string */
+	char*	str __attribute__((unused)))	/* in/out: null-terminated
+							   character string */
 {
 #ifdef __WIN__
 	ulint	i;
@@ -504,7 +511,7 @@ srv_calc_low32(
 				expressed in bytes */
 	ulint	file_size)	/* in: file size in database pages */
 {
-	return(0xFFFFFFFF & (file_size << UNIV_PAGE_SIZE_SHIFT));
+	return(0xFFFFFFFFUL & (file_size << UNIV_PAGE_SIZE_SHIFT));
 }
 
 /*************************************************************************
@@ -539,7 +546,6 @@ open_or_create_log_file(
 	ulint	i)			/* in: log file number in group */
 {
 	ibool	ret;
-	ulint	arch_space_id;
 	ulint	size;
 	ulint	size_high;
 	char	name[10000];
@@ -552,12 +558,12 @@ open_or_create_log_file(
 	srv_log_group_home_dirs[k] = srv_add_path_separator_if_needed(
 						srv_log_group_home_dirs[k]);
 
-	sprintf(name, "%s%s%lu", srv_log_group_home_dirs[k], "ib_logfile", i);
+	sprintf(name, "%s%s%lu", srv_log_group_home_dirs[k], "ib_logfile", (ulong) i);
 
 	files[i] = os_file_create(name, OS_FILE_CREATE, OS_FILE_NORMAL,
 						OS_LOG_FILE, &ret);
 	if (ret == FALSE) {
-		if (os_file_get_last_error() != OS_FILE_ALREADY_EXISTS) {
+		if (os_file_get_last_error(FALSE) != OS_FILE_ALREADY_EXISTS) {
 			fprintf(stderr,
 			"InnoDB: Error in creating or opening %s\n", name);
 				
@@ -582,9 +588,9 @@ open_or_create_log_file(
 			fprintf(stderr,
 "InnoDB: Error: log file %s is of different size %lu %lu bytes\n"
 "InnoDB: than specified in the .cnf file %lu %lu bytes!\n",
-				name, size_high, size,
-				srv_calc_high32(srv_log_file_size),
-				srv_calc_low32(srv_log_file_size));
+				name, (ulong) size_high, (ulong) size,
+				(ulong) srv_calc_high32(srv_log_file_size),
+				(ulong) srv_calc_low32(srv_log_file_size));
 				
 			return(DB_ERROR);
 		}					
@@ -602,7 +608,7 @@ open_or_create_log_file(
 		}
 
 		fprintf(stderr, "InnoDB: Setting log file %s size to %lu MB\n",
-			             name, srv_log_file_size
+			             name, (ulong) srv_log_file_size
 			>> (20 - UNIV_PAGE_SIZE_SHIFT));
 
 		fprintf(stderr,
@@ -634,24 +640,27 @@ open_or_create_log_file(
 	ut_a(fil_validate());
 
 	fil_node_create(name, srv_log_file_size,
-					2 * k + SRV_LOG_SPACE_FIRST_ID);
-
+				2 * k + SRV_LOG_SPACE_FIRST_ID, FALSE);
+#ifdef notdefined
 	/* If this is the first log group, create the file space object
-	for archived logs */
+	for archived logs.
+	Under MySQL, no archiving ever done. */
 
 	if (k == 0 && i == 0) {
 		arch_space_id = 2 * k + 1 + SRV_LOG_SPACE_FIRST_ID;
 
-	    	fil_space_create((char*) "arch_log_space", arch_space_id, FIL_LOG);
+	    	fil_space_create((char*) "arch_log_space", arch_space_id,
+								FIL_LOG);
 	} else {
 		arch_space_id = ULINT_UNDEFINED;
 	}
-
+#endif
 	if (i == 0) {
 		log_group_init(k, srv_n_log_files,
 				srv_log_file_size * UNIV_PAGE_SIZE,
 				2 * k + SRV_LOG_SPACE_FIRST_ID,
-				arch_space_id);
+				SRV_LOG_SPACE_FIRST_ID + 1); /* dummy arch
+								space id */
 	}
 
 	return(DB_SUCCESS);
@@ -686,7 +695,7 @@ open_or_create_data_files(
 	if (srv_n_data_files >= 1000) {
 		fprintf(stderr, "InnoDB: can only have < 1000 data files\n"
 				"InnoDB: you have defined %lu\n",
-				srv_n_data_files);
+				(ulong) srv_n_data_files);
 		return(DB_ERROR);
 	}
 
@@ -702,18 +711,32 @@ open_or_create_data_files(
 
 		sprintf(name, "%s%s", srv_data_home, srv_data_file_names[i]);
 	
-		files[i] = os_file_create(name, OS_FILE_CREATE,
+		if (srv_data_file_is_raw_partition[i] == 0) {
+
+			/* First we try to create the file: if it already
+			exists, ret will get value FALSE */
+
+			files[i] = os_file_create(name, OS_FILE_CREATE,
 					OS_FILE_NORMAL, OS_DATA_FILE, &ret);
 
-		if (srv_data_file_is_raw_partition[i] == SRV_NEW_RAW) {
+			if (ret == FALSE && os_file_get_last_error(FALSE) !=
+						OS_FILE_ALREADY_EXISTS) {
+				fprintf(stderr,
+				"InnoDB: Error in creating or opening %s\n",
+				name);
+
+				return(DB_ERROR);
+			}
+		} else if (srv_data_file_is_raw_partition[i] == SRV_NEW_RAW) {
 			/* The partition is opened, not created; then it is
 			written over */
 
+			srv_start_raw_disk_in_use = TRUE;
 			srv_created_new_raw = TRUE;
 
 			files[i] = os_file_create(
-				name, OS_FILE_OPEN, OS_FILE_NORMAL,
-						OS_DATA_FILE, &ret);
+				name, OS_FILE_OPEN_RAW, OS_FILE_NORMAL,
+							OS_DATA_FILE, &ret);
 			if (!ret) {
 				fprintf(stderr,
 				"InnoDB: Error in opening %s\n", name);
@@ -721,19 +744,15 @@ open_or_create_data_files(
 				return(DB_ERROR);
 			}
 		} else if (srv_data_file_is_raw_partition[i] == SRV_OLD_RAW) {
+			srv_start_raw_disk_in_use = TRUE;
+
 			ret = FALSE;
+		} else {
+			ut_a(0);
 		}
 
 		if (ret == FALSE) {
-			if (srv_data_file_is_raw_partition[i] != SRV_OLD_RAW
-			    && os_file_get_last_error() !=
-						OS_FILE_ALREADY_EXISTS) {
-				fprintf(stderr,
-				"InnoDB: Error in creating or opening %s\n",
-				name);
-
-				return(DB_ERROR);
-			}
+			/* We open the data file */
 
 			if (one_created) {
 				fprintf(stderr,
@@ -744,71 +763,81 @@ open_or_create_data_files(
 				return(DB_ERROR);
 			}
 				
-			files[i] = os_file_create(
-				name, OS_FILE_OPEN, OS_FILE_NORMAL,
-						OS_DATA_FILE, &ret);
+			if (srv_data_file_is_raw_partition[i] == SRV_OLD_RAW) {
+				files[i] = os_file_create(
+					name, OS_FILE_OPEN_RAW, OS_FILE_NORMAL,
+							 OS_DATA_FILE, &ret);
+			} else {
+				files[i] = os_file_create(
+					name, OS_FILE_OPEN, OS_FILE_NORMAL,
+							 OS_DATA_FILE, &ret);
+			}
+
 			if (!ret) {
 				fprintf(stderr,
 				"InnoDB: Error in opening %s\n", name);
-				os_file_get_last_error();
+				os_file_get_last_error(TRUE);
 
 				return(DB_ERROR);
 			}
 
-			if (srv_data_file_is_raw_partition[i] != SRV_OLD_RAW) {
-			
-				ret = os_file_get_size(files[i], &size,
-								&size_high);
-				ut_a(ret);
-				/* Round size downward to megabytes */
+			if (srv_data_file_is_raw_partition[i] == SRV_OLD_RAW) {
+
+				goto skip_size_check;
+			}
+
+			ret = os_file_get_size(files[i], &size, &size_high);
+			ut_a(ret);
+			/* Round size downward to megabytes */
 		
-				rounded_size_pages = (size / (1024 * 1024)
+			rounded_size_pages = (size / (1024 * 1024)
 							+ 4096 * size_high)
 					     << (20 - UNIV_PAGE_SIZE_SHIFT);
 
-				if (i == srv_n_data_files - 1
+			if (i == srv_n_data_files - 1
 				    && srv_auto_extend_last_data_file) {
 
-				    	if (srv_data_file_sizes[i] >
+				if (srv_data_file_sizes[i] >
 				    		rounded_size_pages
 				    	   || (srv_last_file_size_max > 0
 				    	      && srv_last_file_size_max <
 				    	       rounded_size_pages)) {
 				    	       	
-						fprintf(stderr,
+					fprintf(stderr,
 "InnoDB: Error: auto-extending data file %s is of a different size\n"
 "InnoDB: %lu pages (rounded down to MB) than specified in the .cnf file:\n"
 "InnoDB: initial %lu pages, max %lu (relevant if non-zero) pages!\n",
-		  name, rounded_size_pages,
-		  srv_data_file_sizes[i], srv_last_file_size_max);
+		  name, (ulong) rounded_size_pages,
+		  (ulong) srv_data_file_sizes[i],
+		  (ulong) srv_last_file_size_max);
 
-						return(DB_ERROR);
-					}
-				    	     
-				    	srv_data_file_sizes[i] =
-				    			rounded_size_pages;
+					return(DB_ERROR);
 				}
+				    	     
+				srv_data_file_sizes[i] = rounded_size_pages;
+			}
 				
-				if (rounded_size_pages
-						!= srv_data_file_sizes[i]) {
+			if (rounded_size_pages != srv_data_file_sizes[i]) {
 
-					fprintf(stderr,
+				fprintf(stderr,
 "InnoDB: Error: data file %s is of a different size\n"
 "InnoDB: %lu pages (rounded down to MB)\n"
 "InnoDB: than specified in the .cnf file %lu pages!\n", name,
-						rounded_size_pages,
-						srv_data_file_sizes[i]);
+					       (ulong) rounded_size_pages,
+					       (ulong) srv_data_file_sizes[i]);
 				
-					return(DB_ERROR);
-				}
+				return(DB_ERROR);
 			}
-
+skip_size_check:
 			fil_read_flushed_lsn_and_arch_log_no(files[i],
 					one_opened,
 					min_flushed_lsn, min_arch_log_no,
 					max_flushed_lsn, max_arch_log_no);
 			one_opened = TRUE;
 		} else {
+		        /* We created the data file and now write it full of
+			zeros */
+
 			one_created = TRUE;
 
 			if (i > 0) {
@@ -826,7 +855,7 @@ open_or_create_data_files(
 	    		ut_print_timestamp(stderr);
 			fprintf(stderr, 
 				"  InnoDB: Setting file %s size to %lu MB\n",
-			       name, (srv_data_file_sizes[i]
+			       name, (ulong) (srv_data_file_sizes[i]
 				      >> (20 - UNIV_PAGE_SIZE_SHIFT)));
 
 			fprintf(stderr,
@@ -856,7 +885,13 @@ open_or_create_data_files(
 
 		ut_a(fil_validate());
 
-		fil_node_create(name, srv_data_file_sizes[i], 0);
+		if (srv_data_file_is_raw_partition[i]) {
+
+		        fil_node_create(name, srv_data_file_sizes[i], 0, TRUE);
+		} else {
+		        fil_node_create(name, srv_data_file_sizes[i], 0,
+									FALSE);
+		}
 	}
 
 	ios = 0;
@@ -877,6 +912,7 @@ innobase_start_or_create_for_mysql(void)
 /*====================================*/
 				/* out: DB_SUCCESS or error code */
 {
+	buf_pool_t*	ret;
 	ibool	create_new_db;
 	ibool	log_file_created;
 	ibool	log_created	= FALSE;
@@ -885,15 +921,24 @@ innobase_start_or_create_for_mysql(void)
 	dulint	max_flushed_lsn;
 	ulint	min_arch_log_no;
 	ulint	max_arch_log_no;
-	ibool	start_archive;
 	ulint   sum_of_new_sizes;
 	ulint	sum_of_data_file_sizes;
 	ulint	tablespace_size_in_header;
 	ulint	err;
 	ulint	i;
-	ulint	k;
+	ibool	srv_file_per_table_original_value  = srv_file_per_table;
 	mtr_t   mtr;
 
+	if (sizeof(ulint) != sizeof(void*)) {
+		fprintf(stderr,
+"InnoDB: Error: size of InnoDB's ulint is %lu, but size of void* is %lu.\n"
+"InnoDB: The sizes should be the same so that on a 64-bit platform you can\n"
+"InnoDB: allocate more than 4 GB of memory.",
+			(ulong)sizeof(ulint), (ulong)sizeof(void*));
+	}
+
+	srv_file_per_table = FALSE; /* system tables are created in tablespace
+									0 */
 #ifdef UNIV_DEBUG
 	fprintf(stderr,
 "InnoDB: !!!!!!!!!!!!!! UNIV_DEBUG switched on !!!!!!!!!!!!!!!\n"); 
@@ -914,12 +959,17 @@ innobase_start_or_create_for_mysql(void)
 "InnoDB: !!!!!!!!!!!!!! UNIV_MEM_DEBUG switched on !!!!!!!!!!!!!!!\n"); 
 #endif
 
+#ifdef UNIV_SIMULATE_AWE
+	fprintf(stderr,
+"InnoDB: !!!!!!!!!!!!!! UNIV_SIMULATE_AWE switched on !!!!!!!!!!!!!!!!!\n");
+#endif
         if (srv_sizeof_trx_t_in_ha_innodb_cc != (ulint)sizeof(trx_t)) {
 	        fprintf(stderr,
   "InnoDB: Error: trx_t size is %lu in ha_innodb.cc but %lu in srv0start.c\n"
   "InnoDB: Check that pthread_mutex_t is defined in the same way in these\n"
   "InnoDB: compilation modules. Cannot continue.\n",
-		  srv_sizeof_trx_t_in_ha_innodb_cc, (ulint)sizeof(trx_t));
+		 (ulong)  srv_sizeof_trx_t_in_ha_innodb_cc,
+		 (ulong) sizeof(trx_t));
 		return(DB_ERROR);
 	}
 
@@ -944,6 +994,17 @@ innobase_start_or_create_for_mysql(void)
         srv_startup_is_before_trx_rollback_phase = TRUE;
 	os_aio_use_native_aio = FALSE;
 
+#if !defined(__WIN2000__) && !defined(UNIV_SIMULATE_AWE)
+	if (srv_use_awe) {
+
+	        fprintf(stderr,
+"InnoDB: Error: You have specified innodb_buffer_pool_awe_mem_mb\n"
+"InnoDB: in my.cnf, but AWE can only be used in Windows 2000 and later.\n");
+
+	        return(DB_ERROR);
+	}
+#endif
+
 #ifdef __WIN__
 	if (os_get_os_version() == OS_WIN95
 	    || os_get_os_version() == OS_WIN31
@@ -1006,6 +1067,31 @@ innobase_start_or_create_for_mysql(void)
           				srv_file_flush_method_str);
 	  	return(DB_ERROR);
 	}
+	
+	/* Set the maximum number of threads which can wait for a semaphore
+	inside InnoDB */
+#if defined(__WIN__) || defined(__NETWARE__)
+
+/* Create less event semaphores because Win 98/ME had difficulty creating
+40000 event semaphores.
+Comment from Novell, Inc.: also, these just take a lot of memory on
+NetWare. */
+	srv_max_n_threads = 1000;
+#else
+	if (srv_pool_size >= 8 * 1024) {
+			          /* Here we still have srv_pool_size counted
+				  in kilobytes, srv_boot converts the value to
+				  pages; if buffer pool is less than 8 MB,
+				  assume fewer threads. */
+		srv_max_n_threads = 10000;
+	} else {
+	        srv_max_n_threads = 1000;	/* saves several MB of memory,
+						especially in 64-bit
+						computers */
+	}
+#endif
+	/* Note that the call srv_boot() also changes the values of
+	srv_pool_size etc. to the units used by InnoDB internally */
 
         /* Set the maximum number of threads which can wait for a semaphore
         inside InnoDB */
@@ -1044,7 +1130,6 @@ NetWare. */
 
 	if (!os_aio_use_native_aio) {
  		/* In simulated aio we currently have use only for 4 threads */
-
 		srv_n_file_io_threads = 4;
 
 		os_aio_init(8 * SRV_N_PENDING_IOS_PER_THREAD
@@ -1058,9 +1143,28 @@ NetWare. */
 					SRV_MAX_N_PENDING_SYNC_IOS);
 	}
 	
-	fil_init(SRV_MAX_N_OPEN_FILES);
+	fil_init(srv_max_n_open_files);
 
-	buf_pool_init(srv_pool_size, srv_pool_size);
+	if (srv_use_awe) {
+		fprintf(stderr,
+"InnoDB: Using AWE: Memory window is %lu MB and AWE memory is %lu MB\n",
+		(ulong) (srv_awe_window_size / ((1024 * 1024) / UNIV_PAGE_SIZE)),
+		(ulong) (srv_pool_size / ((1024 * 1024) / UNIV_PAGE_SIZE)));
+
+		/* We must disable adaptive hash indexes because they do not
+		tolerate remapping of pages in AWE */
+		
+		srv_use_adaptive_hash_indexes = FALSE;
+		ret = buf_pool_init(srv_pool_size, srv_pool_size,
+							srv_awe_window_size);
+	} else {
+		ret = buf_pool_init(srv_pool_size, srv_pool_size,
+							srv_pool_size);
+	}
+
+	if (ret == NULL) {
+		return(DB_ERROR);
+	}
 
 	fsp_init();
 	log_init();
@@ -1071,7 +1175,6 @@ NetWare. */
 
 	for (i = 0; i < srv_n_file_io_threads; i++) {
 		n[i] = i;
-
 		os_thread_create(io_handler_thread, n + i, thread_ids + i);
     	}
 
@@ -1084,7 +1187,6 @@ NetWare. */
 	}
 
 	if (srv_n_log_files * srv_log_file_size >= 262144) {
-
 		fprintf(stderr,
 		"InnoDB: Error: combined size of log files must be < 4 GB\n");
 
@@ -1130,42 +1232,25 @@ NetWare. */
 		return((int) err);
 	}
 
-	if (!create_new_db) {
-		/* If we are using the doublewrite method, we will
-		check if there are half-written pages in data files,
-		and restore them from the doublewrite buffer if
-		possible */
-		
-		if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) {
-		
-			trx_sys_doublewrite_restore_corrupt_pages();
-		}
-	}
-
 	srv_normalize_path_for_win(srv_arch_dir);
 	srv_arch_dir = srv_add_path_separator_if_needed(srv_arch_dir);
+		
+	for (i = 0; i < srv_n_log_files; i++) {
+		err = open_or_create_log_file(create_new_db, &log_file_created,
+							     log_opened, 0, i);
+		if (err != DB_SUCCESS) {
 
-	for (k = 0; k < srv_n_log_groups; k++) {
-
-		for (i = 0; i < srv_n_log_files; i++) {
-
-			err = open_or_create_log_file(create_new_db,
-						&log_file_created,
-						log_opened, k, i);
-			if (err != DB_SUCCESS) {
-
-				return((int) err);
-			}
-
-			if (log_file_created) {
-				log_created = TRUE;
-			} else {
-				log_opened = TRUE;
-			}
+			return((int) err);
+		}
 
-			if ((log_opened && create_new_db)
+		if (log_file_created) {
+			log_created = TRUE;
+		} else {
+			log_opened = TRUE;
+		}
+		if ((log_opened && create_new_db)
 			    		|| (log_opened && log_created)) {
-				fprintf(stderr, 
+			fprintf(stderr, 
 	"InnoDB: Error: all log files must be created at the same time.\n"
 	"InnoDB: All log files must be created also in database creation.\n"
 	"InnoDB: If you want bigger or smaller log files, shut down the\n"
@@ -1173,14 +1258,16 @@ NetWare. */
 	"InnoDB: Then delete the existing log files. Edit the .cnf file\n"
 	"InnoDB: and start the database again.\n");
 
-				return(DB_ERROR);
-			}
-			
+			return(DB_ERROR);
 		}
 	}
 
-	if (log_created && !create_new_db && !srv_archive_recovery) {
+	/* Open all log files and data files in the system tablespace: we
+	keep them open until database shutdown */
 
+	fil_open_log_and_system_tablespace_files();
+
+	if (log_created && !create_new_db && !srv_archive_recovery) {
 		if (ut_dulint_cmp(max_flushed_lsn, min_flushed_lsn) != 0
 				|| max_arch_log_no != min_arch_log_no) {
 			fprintf(stderr, 
@@ -1205,7 +1292,9 @@ NetWare. */
 
 		mutex_enter(&(log_sys->mutex));
 
-		recv_reset_logs(max_flushed_lsn, max_arch_log_no + 1, TRUE);
+		/* Do not + 1 arch_log_no because we do not use log
+		archiving */
+		recv_reset_logs(max_flushed_lsn, max_arch_log_no, TRUE);
 		
 		mutex_exit(&(log_sys->mutex));
 	}
@@ -1224,7 +1313,6 @@ NetWare. */
 	} else if (srv_archive_recovery) {
 		fprintf(stderr,
 	"InnoDB: Starting archive recovery from a backup...\n");
-	
 		err = recv_recovery_from_archive_start(
 					min_flushed_lsn,
 					srv_archive_recovery_limit_lsn,
@@ -1233,14 +1321,11 @@ NetWare. */
 
 			return(DB_ERROR);
 		}
-
 		/* Since ibuf init is in dict_boot, and ibuf is needed
 		in any disk i/o, first call dict_boot */
 
 		dict_boot();
-
 		trx_sys_init_at_db_start();
-		
                 srv_startup_is_before_trx_rollback_phase = FALSE;
 
 		/* Initialize the fsp free limit global variable in the log
@@ -1250,7 +1335,7 @@ NetWare. */
 		recv_recovery_from_archive_finish();
 	} else {
 		/* We always try to do a recovery, even if the database had
-		been shut down normally */
+		been shut down normally: this is the normal startup path */
 		
 		err = recv_recovery_from_checkpoint_start(LOG_CHECKPOINT,
 							ut_dulint_max,
@@ -1296,6 +1381,8 @@ NetWare. */
 
 	log_make_checkpoint_at(ut_dulint_max, TRUE);
 
+#ifdef notdefined
+	/* Archiving is always off under MySQL */
 	if (!srv_log_archive_on) {
 		ut_a(DB_SUCCESS == log_archive_noarchivelog());
 	} else {
@@ -1313,6 +1400,14 @@ NetWare. */
 			ut_a(DB_SUCCESS == log_archive_archivelog());
 		}
 	}
+#endif
+	if (!create_new_db && srv_force_recovery == 0) {
+		/* After a crash recovery we only check that the info in data
+		dictionary is consistent with what we already know about space
+		id's from the call of fil_load_single_table_tablespaces(). */
+
+		dict_check_tablespaces_or_store_max_id(recv_needed_recovery);
+	}
 
 	if (srv_measure_contention) {
 	  	/* os_thread_create(&test_measure_cont, NULL, thread_ids +
@@ -1326,17 +1421,28 @@ NetWare. */
 	and prints InnoDB monitor info */
 	
 	os_thread_create(&srv_lock_timeout_and_monitor_thread, NULL,
-					thread_ids + 2 + SRV_MAX_N_IO_THREADS);	
+				thread_ids + 2 + SRV_MAX_N_IO_THREADS);	
 
 	/* Create the thread which warns of long semaphore waits */
 	os_thread_create(&srv_error_monitor_thread, NULL,
-					thread_ids + 3 + SRV_MAX_N_IO_THREADS);	
+				thread_ids + 3 + SRV_MAX_N_IO_THREADS);	
 	srv_was_started = TRUE;
 	srv_is_being_started = FALSE;
 
+#ifdef UNIV_DEBUG
+        /* Wait a while so that the created threads have time to suspend
+	themselves before we switch sync debugging on; otherwise a thread may
+	execute mutex_enter() before the checks are on, and mutex_exit() after
+	the checks are on, which will cause an assertion failure in sync
+	debug. */
+
+        os_thread_sleep(3000000);
+#endif
 	sync_order_checks_on = TRUE;
 
-	if (srv_use_doublewrite_buf && trx_doublewrite == NULL) {
+        if (srv_use_doublewrite_buf && trx_doublewrite == NULL) {
+		/* Create the doublewrite buffer to a new tablespace */
+
 		trx_sys_create_doublewrite_buf();
 	}
 
@@ -1346,8 +1452,8 @@ NetWare. */
 		return((int)DB_ERROR);
 	}
 	
-	/* Create the master thread which monitors the database
-	server, and does purge and other utility operations */
+	/* Create the master thread which does purge and other utility
+	operations */
 
 	os_thread_create(&srv_master_thread, NULL, thread_ids + 1 +
 							SRV_MAX_N_IO_THREADS);
@@ -1367,7 +1473,8 @@ NetWare. */
 		fprintf(stderr,
 "InnoDB: Error: tablespace size stored in header is %lu pages, but\n"
 "InnoDB: the sum of data file sizes is %lu pages\n",
- 			tablespace_size_in_header, sum_of_data_file_sizes);
+ 			(ulong) tablespace_size_in_header,
+			(ulong) sum_of_data_file_sizes);
 	}
 
 	if (srv_auto_extend_last_data_file
@@ -1376,10 +1483,11 @@ NetWare. */
 		fprintf(stderr,
 "InnoDB: Error: tablespace size stored in header is %lu pages, but\n"
 "InnoDB: the sum of data file sizes is only %lu pages\n",
- 			tablespace_size_in_header, sum_of_data_file_sizes);
+ 			(ulong) tablespace_size_in_header,
+			(ulong) sum_of_data_file_sizes);
 	}
 
-	/* Check that os_fast_mutexes work as exptected */
+	/* Check that os_fast_mutexes work as expected */
 	os_fast_mutex_init(&srv_os_test_mutex);
 
 	if (0 != os_fast_mutex_trylock(&srv_os_test_mutex)) {
@@ -1397,43 +1505,73 @@ NetWare. */
 
 	os_fast_mutex_free(&srv_os_test_mutex);
 
-	/***********************************************************/
-	/* Do NOT merge to the 4.1 code base! */
-	if (trx_sys_downgrading_from_4_1_1) {
+	if (srv_print_verbose_log) {
+	  	ut_print_timestamp(stderr);
+	  	fprintf(stderr,
+"  InnoDB: Started; log sequence number %lu %lu\n",
+			(ulong) ut_dulint_get_high(srv_start_lsn),
+			(ulong) ut_dulint_get_low(srv_start_lsn));
+	}
+
+	if (srv_force_recovery > 0) {
 		fprintf(stderr,
-"InnoDB: You are downgrading from an InnoDB version which allows multiple\n"
+		"InnoDB: !!! innodb_force_recovery is set to %lu !!!\n",
+			(ulong) srv_force_recovery);
+	}
+
+	fflush(stderr);
+
+	if (trx_doublewrite_must_reset_space_ids) {
+		/* Actually, we did not change the undo log format between
+		4.0 and 4.1.1, and we would not need to run purge to
+		completion. Note also that the purge algorithm in 4.1.1
+		can process the the history list again even after a full
+		purge, because our algorithm does not cut the end of the
+		history list in all cases so that it would become empty
+		after a full purge. That mean that we may purge 4.0 type
+		undo log even after this phase.
+		
+		The insert buffer record format changed between 4.0 and
+		4.1.1. It is essential that the insert buffer is emptied
+		here! */
+
+		fprintf(stderr,
+"InnoDB: You are upgrading to an InnoDB version which allows multiple\n"
 "InnoDB: tablespaces. Wait that purge and insert buffer merge run to\n"
 "InnoDB: completion...\n");
 		for (;;) {
-			os_thread_sleep(10000000);
+			os_thread_sleep(1000000);
 
 			if (0 == strcmp(srv_main_thread_op_info,
 					"waiting for server activity")) {
+
+				ut_a(ibuf_is_empty());
+				
 				break;
 			}
 		}
 		fprintf(stderr,
 "InnoDB: Full purge and insert buffer merge completed.\n");
 
-	        trx_sys_mark_downgraded_from_4_1_1();
+	        trx_sys_mark_upgraded_to_multiple_tablespaces();
 
 		fprintf(stderr,
-"InnoDB: Downgraded from >= 4.1.1 to 4.0\n");
+"InnoDB: You have now successfully upgraded to the multiple tablespaces\n"
+"InnoDB: format. You should NOT DOWNGRADE again to an earlier version of\n"
+"InnoDB: InnoDB! But if you absolutely need to downgrade, see section 4.6 of\n"
+"InnoDB: http://www.innodb.com/ibman.php for instructions.\n");
 	}
-	/***********************************************************/
 
-	if (srv_print_verbose_log) {
-	  	ut_print_timestamp(stderr);
-	  	fprintf(stderr, "  InnoDB: Started\n");
-	}
+	if (srv_force_recovery == 0) {
+		/* In the insert buffer we may have even bigger tablespace
+		id's, because we may have dropped those tablespaces, but
+		insert buffer merge has not had time to clean the records from
+		the ibuf tree. */
 
-	if (srv_force_recovery > 0) {
-		fprintf(stderr,
-		"InnoDB: !!! innodb_force_recovery is set to %lu !!!\n",
-			srv_force_recovery);
+		ibuf_update_max_tablespace_id();
 	}
 
-	fflush(stderr);
+	srv_file_per_table = srv_file_per_table_original_value;
 
 	return((int) DB_SUCCESS);
 }
@@ -1452,17 +1590,16 @@ innobase_shutdown_for_mysql(void)
 	  	if (srv_is_being_started) {
 	    		ut_print_timestamp(stderr);
             		fprintf(stderr, 
-	"  InnoDB: Warning: shutting down a not properly started\n");
-            		fprintf(stderr, 
-	"                 InnoDB: or created database!\n");
+"  InnoDB: Warning: shutting down a not properly started\n"
+"                 InnoDB: or created database!\n");
 	  	}
 
 	  	return(DB_SUCCESS);
 	}
 
-	/* 1. Flush buffer pool to disk, write the current lsn to
+	/* 1. Flush the buffer pool to disk, write the current lsn to
 	the tablespace header(s), and copy all log data to archive.
-	The step 1 is the real InnoDB shutdown. The remaining steps
+	The step 1 is the real InnoDB shutdown. The remaining steps 2 - ...
 	just free data structures after the shutdown. */
 
 	logs_empty_and_mark_files_at_shutdown();
@@ -1486,16 +1623,16 @@ innobase_shutdown_for_mysql(void)
 	        /* NOTE: IF YOU CREATE THREADS IN INNODB, YOU MUST EXIT THEM
 	        HERE OR EARLIER */
 		
-		/* 1. Let the lock timeout thread exit */
+		/* a. Let the lock timeout thread exit */
 		os_event_set(srv_lock_timeout_thread_event);		
 
-		/* 2. srv error monitor thread exits automatically, no need
+		/* b. srv error monitor thread exits automatically, no need
 		to do anything here */
 
-		/* 3. We wake the master thread so that it exits */
+		/* c. We wake the master thread so that it exits */
 		srv_wake_master_thread();
 
-		/* 4. Exit the i/o threads */
+		/* d. Exit the i/o threads */
 
 		os_aio_wake_all_threads_at_shutdown();
 
@@ -1523,7 +1660,7 @@ innobase_shutdown_for_mysql(void)
 	if (i == 1000) {
 	        fprintf(stderr,
 "InnoDB: Warning: %lu threads created by InnoDB had not exited at shutdown!\n",
-		      os_thread_count);
+		      (ulong) os_thread_count);
 	}
 
 	/* 3. Free all InnoDB's own mutexes and the os_fast_mutexes inside
@@ -1548,13 +1685,16 @@ innobase_shutdown_for_mysql(void)
 	        fprintf(stderr,
 "InnoDB: Warning: some resources were not cleaned up in shutdown:\n"
 "InnoDB: threads %lu, events %lu, os_mutexes %lu, os_fast_mutexes %lu\n",
-		      os_thread_count, os_event_count, os_mutex_count,
-		      os_fast_mutex_count);
+			(ulong) os_thread_count, (ulong) os_event_count,
+			(ulong) os_mutex_count, (ulong) os_fast_mutex_count);
 	}
 
 	if (srv_print_verbose_log) {
 	        ut_print_timestamp(stderr);
-	        fprintf(stderr, "  InnoDB: Shutdown completed\n");
+	        fprintf(stderr,
+"  InnoDB: Shutdown completed; log sequence number %lu %lu\n",
+			       (ulong) ut_dulint_get_high(srv_shutdown_lsn),
+			       (ulong) ut_dulint_get_low(srv_shutdown_lsn));
 	}
 
 	return((int) DB_SUCCESS);
diff --git a/innobase/sync/sync0arr.c b/innobase/sync/sync0arr.c
index 67671299e3d..8082f598b0c 100644
--- a/innobase/sync/sync0arr.c
+++ b/innobase/sync/sync0arr.c
@@ -457,8 +457,9 @@ sync_array_cell_print(
 
 	buf += sprintf(buf,
 "--Thread %lu has waited at %s line %lu for %.2f seconds the semaphore:\n",
-			os_thread_pf(cell->thread), cell->file, cell->line,
-			difftime(time(NULL), cell->reservation_time));
+			(ulong) os_thread_pf(cell->thread), cell->file,
+		       (ulong) cell->line,
+		       difftime(time(NULL), cell->reservation_time));
 
 	if (type == SYNC_MUTEX) {
 		/* We use old_wait_mutex in case the cell has already
@@ -471,12 +472,12 @@ sync_array_cell_print(
 			"Last time reserved in file %s line %lu, "
 #endif /* UNIV_SYNC_DEBUG */
 			"waiters flag %lu\n",
-			mutex, mutex->cfile_name, mutex->cline,
-			mutex->lock_word,
+			mutex, mutex->cfile_name, (ulong) mutex->cline,
+			(ulong) mutex->lock_word,
 #ifdef UNIV_SYNC_DEBUG
-			mutex->file_name, mutex->line,
+			mutex->file_name, (ulong) mutex->line,
 #endif /* UNIV_SYNC_DEBUG */
-			mutex->waiters);
+			(ulong) mutex->waiters);
 
 	} else if (type == RW_LOCK_EX || type == RW_LOCK_SHARED) {
 
@@ -490,11 +491,12 @@ sync_array_cell_print(
 
 		buf += sprintf(buf,
 			" RW-latch at %lx created in file %s line %lu\n",
-			(ulint)rwlock, rwlock->cfile_name, rwlock->cline);
+			(ulong) rwlock, rwlock->cfile_name,
+			(ulong) rwlock->cline);
 		if (rwlock->writer != RW_LOCK_NOT_LOCKED) {
 			buf += sprintf(buf,
 			"a writer (thread id %lu) has reserved it in mode",
-				os_thread_pf(rwlock->writer_thread));
+				(ulong) os_thread_pf(rwlock->writer_thread));
 			if (rwlock->writer == RW_LOCK_EX) {
 				buf += sprintf(buf, " exclusive\n");
 			} else {
@@ -504,14 +506,16 @@ sync_array_cell_print(
 		
 		buf += sprintf(buf,
 				"number of readers %lu, waiters flag %lu\n",
-				rwlock->reader_count, rwlock->waiters);
+			       (ulong) rwlock->reader_count,
+			       (ulong) rwlock->waiters);
 	
 		buf += sprintf(buf,
 				"Last time read locked in file %s line %lu\n",
-			rwlock->last_s_file_name, rwlock->last_s_line);
+			rwlock->last_s_file_name,
+			(ulong) rwlock->last_s_line);
 		buf += sprintf(buf,
 			"Last time write locked in file %s line %lu\n",
-			rwlock->last_x_file_name, rwlock->last_x_line);
+			rwlock->last_x_file_name, (ulong) rwlock->last_x_line);
 	} else {
 		ut_error;
 	}
@@ -660,8 +664,8 @@ sync_array_detect_deadlock(
 				sync_array_cell_print(buf, cell);
 				printf(
 	"Mutex %lx owned by thread %lu file %s line %lu\n%s",
-			(ulint)mutex, os_thread_pf(mutex->thread_id),
-				mutex->file_name, mutex->line, buf);
+			(ulong) mutex, (ulong) os_thread_pf(mutex->thread_id),
+				mutex->file_name, (ulong) mutex->line, buf);
 
 				return(TRUE);
 			}
@@ -695,7 +699,7 @@ sync_array_detect_deadlock(
 							depth);
 			if (ret) {
 				sync_array_cell_print(buf, cell);
-				printf("rw-lock %lx %s ", (ulint) lock, buf);
+				printf("rw-lock %lx %s ", (ulong)  lock, buf);
 				rw_lock_debug_print(debug);
 				return(TRUE);
 			}
@@ -727,7 +731,7 @@ sync_array_detect_deadlock(
 							depth);
 			if (ret) {
 				sync_array_cell_print(buf, cell);
-				printf("rw-lock %lx %s ", (ulint) lock, buf);
+				printf("rw-lock %lx %s ", (ulong) lock, buf);
 				rw_lock_debug_print(debug);
 
 				return(TRUE);
@@ -991,7 +995,7 @@ sync_array_output_info(
 	
 	buf += sprintf(buf,
 		"OS WAIT ARRAY INFO: reservation count %ld, signal count %ld\n",
-						arr->res_count, arr->sg_count);
+						(long) arr->res_count, (long) arr->sg_count);
 	i = 0;
 	count = 0;
 
diff --git a/innobase/sync/sync0rw.c b/innobase/sync/sync0rw.c
index 86924c437c7..93fd9f14575 100644
--- a/innobase/sync/sync0rw.c
+++ b/innobase/sync/sync0rw.c
@@ -125,6 +125,11 @@ rw_lock_create_func(
 	lock->last_x_line = 0;
 
 	mutex_enter(&rw_lock_list_mutex);
+	
+	if (UT_LIST_GET_LEN(rw_lock_list) > 0) {
+		ut_a(UT_LIST_GET_FIRST(rw_lock_list)->magic_n
+							== RW_LOCK_MAGIC_N);
+	}
 
 	UT_LIST_ADD_FIRST(list, rw_lock_list, lock);
 
@@ -141,7 +146,7 @@ rw_lock_free(
 /*=========*/
 	rw_lock_t*	lock)	/* in: rw-lock */
 {
-	ut_ad(rw_lock_validate(lock));
+	ut_a(rw_lock_validate(lock));
 	ut_a(rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED);
 	ut_a(rw_lock_get_waiters(lock) == 0);
 	ut_a(rw_lock_get_reader_count(lock) == 0);
@@ -152,6 +157,13 @@ rw_lock_free(
 
 	mutex_enter(&rw_lock_list_mutex);
 
+	if (UT_LIST_GET_PREV(list, lock)) {
+		ut_a(UT_LIST_GET_PREV(list, lock)->magic_n == RW_LOCK_MAGIC_N);
+	}
+	if (UT_LIST_GET_NEXT(list, lock)) {
+		ut_a(UT_LIST_GET_NEXT(list, lock)->magic_n == RW_LOCK_MAGIC_N);
+	}
+
 	UT_LIST_REMOVE(list, rw_lock_list, lock);
 
 	mutex_exit(&rw_lock_list_mutex);
@@ -227,8 +239,8 @@ lock_loop:
 	if (srv_print_latch_waits) {
 		printf(
 	"Thread %lu spin wait rw-s-lock at %lx cfile %s cline %lu rnds %lu\n",
-		os_thread_pf(os_thread_get_curr_id()), (ulint)lock,
-				lock->cfile_name, lock->cline, i);
+		(ulong) os_thread_pf(os_thread_get_curr_id()), (ulong) lock,
+		lock->cfile_name, (ulong) lock->cline, (ulong) i);
 	}
 
 	mutex_enter(rw_lock_get_mutex(lock));
@@ -257,8 +269,8 @@ lock_loop:
 		if (srv_print_latch_waits) {
 			printf(
 		"Thread %lu OS wait rw-s-lock at %lx cfile %s cline %lu\n",
-			os_thread_pf(os_thread_get_curr_id()), (ulint)lock,
-				lock->cfile_name, lock->cline);
+			(ulong) os_thread_pf(os_thread_get_curr_id()),
+		        (ulong) lock, lock->cfile_name, (ulong) lock->cline);
 		}
 
 		rw_s_system_call_count++;
@@ -476,8 +488,8 @@ lock_loop:
 	if (srv_print_latch_waits) {
 		printf(
 	"Thread %lu spin wait rw-x-lock at %lx cfile %s cline %lu rnds %lu\n",
-		os_thread_pf(os_thread_get_curr_id()), (ulint)lock,
-					lock->cfile_name, lock->cline, i);
+		(ulong) os_thread_pf(os_thread_get_curr_id()), (ulong) lock,
+		lock->cfile_name, (ulong) lock->cline, (ulong) i);
 	}
 
 	rw_x_spin_wait_count++;
@@ -509,8 +521,8 @@ lock_loop:
 	if (srv_print_latch_waits) {
 		printf(
 		"Thread %lu OS wait for rw-x-lock at %lx cfile %s cline %lu\n",
-		os_thread_pf(os_thread_get_curr_id()), (ulint)lock,
-				lock->cfile_name, lock->cline);
+		(ulong) os_thread_pf(os_thread_get_curr_id()), (ulong) lock,
+		lock->cfile_name, (ulong) lock->cline);
 	}
 
 	rw_x_system_call_count++;
@@ -836,7 +848,8 @@ rw_lock_debug_print(
 	rwt 	  = info->lock_type;	
 			
 	printf("Locked: thread %ld file %s line %ld  ",
-		os_thread_pf(info->thread_id), info->file_name, info->line);
+		(ulong) os_thread_pf(info->thread_id), info->file_name,
+	        (ulong) info->line);
 	if (rwt == RW_LOCK_SHARED) {
 		printf("S-LOCK");
 	} else if (rwt == RW_LOCK_EX) {
@@ -847,7 +860,7 @@ rw_lock_debug_print(
 		ut_error;
 	}
 	if (info->pass != 0) {
-		printf(" pass value %lu", info->pass);
+		printf(" pass value %lu", (ulong) info->pass);
 	}
 	printf("\n");
 }
diff --git a/innobase/sync/sync0sync.c b/innobase/sync/sync0sync.c
index 4f5d27bcc7c..6ad766d8bc8 100644
--- a/innobase/sync/sync0sync.c
+++ b/innobase/sync/sync0sync.c
@@ -208,6 +208,10 @@ mutex_create_func(
 	
 	mutex_enter(&mutex_list_mutex);
 
+        if (UT_LIST_GET_LEN(mutex_list) > 0) {
+                ut_a(UT_LIST_GET_FIRST(mutex_list)->magic_n == MUTEX_MAGIC_N);
+        }
+
 	UT_LIST_ADD_FIRST(list, mutex_list, mutex);
 
 	mutex_exit(&mutex_list_mutex);
@@ -223,7 +227,7 @@ mutex_free(
 /*=======*/
 	mutex_t*	mutex)	/* in: mutex */
 {
-	ut_ad(mutex_validate(mutex));
+	ut_a(mutex_validate(mutex));
 	ut_a(mutex_get_lock_word(mutex) == 0);
 	ut_a(mutex_get_waiters(mutex) == 0);
 	
@@ -231,6 +235,15 @@ mutex_free(
 
 	        mutex_enter(&mutex_list_mutex);
 
+		if (UT_LIST_GET_PREV(list, mutex)) {
+			ut_a(UT_LIST_GET_PREV(list, mutex)->magic_n
+							== MUTEX_MAGIC_N);
+		}
+		if (UT_LIST_GET_NEXT(list, mutex)) {
+			ut_a(UT_LIST_GET_NEXT(list, mutex)->magic_n
+							== MUTEX_MAGIC_N);
+		}
+        
 	        UT_LIST_REMOVE(list, mutex_list, mutex);
 
 		mutex_exit(&mutex_list_mutex);
@@ -354,8 +367,8 @@ spin_loop:
 	if (srv_print_latch_waits) {
 		printf(
 	"Thread %lu spin wait mutex at %lx cfile %s cline %lu rnds %lu\n",
-		os_thread_pf(os_thread_get_curr_id()), (ulint)mutex,
-				mutex->cfile_name, mutex->cline, i);
+		(ulong) os_thread_pf(os_thread_get_curr_id()), (ulong) mutex,
+		mutex->cfile_name, (ulong) mutex->cline, (ulong) i);
 	}
 
 	mutex_spin_round_count += i;
@@ -414,7 +427,8 @@ spin_loop:
 		if (srv_print_latch_waits) {
 			printf(
 			"Thread %lu spin wait succeeds at 2: mutex at %lx\n",
-			os_thread_pf(os_thread_get_curr_id()), (ulint)mutex);
+			(ulong) os_thread_pf(os_thread_get_curr_id()),
+			(ulong) mutex);
 		}
 		
                 return;
@@ -432,8 +446,8 @@ spin_loop:
 	if (srv_print_latch_waits) {
 		printf(
 	"Thread %lu OS wait mutex at %lx cfile %s cline %lu rnds %lu\n",
-		os_thread_pf(os_thread_get_curr_id()), (ulint)mutex,
-			mutex->cfile_name, mutex->cline, i);
+		(ulong) os_thread_pf(os_thread_get_curr_id()), (ulong) mutex,
+		mutex->cfile_name, (ulong) mutex->cline, (ulong) i);
 	}
 	
 	mutex_system_call_count++;
@@ -735,11 +749,11 @@ sync_thread_levels_g(
 
 				printf(
 	"InnoDB error: sync levels should be > %lu but a level is %lu\n",
-				limit, slot->level);
+				(ulong) limit, (ulong) slot->level);
 
 				if (mutex->magic_n == MUTEX_MAGIC_N) {
 	printf("Mutex created at %s %lu\n", mutex->cfile_name,
-						mutex->cline);
+						(ulong) mutex->cline);
 
 					if (mutex_get_lock_word(mutex) != 0) {
 #ifdef UNIV_SYNC_DEBUG
@@ -752,7 +766,7 @@ sync_thread_levels_g(
 
 						fprintf(stderr,
 		"InnoDB: Locked mutex: addr %p thread %ld file %s line %ld\n",
-		mutex, os_thread_pf(thread_id), file_name, line);
+		mutex, os_thread_pf(thread_id), file_name, (ulong) line);
 #else /* UNIV_SYNC_DEBUG */
 						fprintf(stderr,
 		"InnoDB: Locked mutex: addr %p\n", mutex);
@@ -938,7 +952,7 @@ sync_thread_add_level(
 	}
 
 	array = thread_slot->levels;
-			 
+	
 	/* NOTE that there is a problem with _NODE and _LEAF levels: if the
 	B-tree height changes, then a leaf can change to an internal node
 	or the other way around. We do not know at present if this can cause
@@ -1226,10 +1240,13 @@ sync_print_wait_info(
 	sprintf(buf,
 "Mutex spin waits %lu, rounds %lu, OS waits %lu\n"
 "RW-shared spins %lu, OS waits %lu; RW-excl spins %lu, OS waits %lu\n",
-			mutex_spin_wait_count, mutex_spin_round_count,
-			mutex_os_wait_count,
-			rw_s_spin_wait_count, rw_s_os_wait_count,
-			rw_x_spin_wait_count, rw_x_os_wait_count);
+			(ulong) mutex_spin_wait_count,
+		        (ulong) mutex_spin_round_count,
+			(ulong) mutex_os_wait_count,
+			(ulong) rw_s_spin_wait_count,
+		        (ulong) rw_s_os_wait_count,
+			(ulong) rw_x_spin_wait_count,
+		        (ulong) rw_x_os_wait_count);
 }
 
 /***********************************************************************
diff --git a/innobase/trx/trx0purge.c b/innobase/trx/trx0purge.c
index 6a509b163b3..558a0825fd7 100644
--- a/innobase/trx/trx0purge.c
+++ b/innobase/trx/trx0purge.c
@@ -277,7 +277,7 @@ trx_purge_add_update_undo_to_history(
 
 		if (undo->id >= TRX_RSEG_N_SLOTS) {
 			fprintf(stderr,
-			"InnoDB: Error: undo->id is %lu\n", undo->id);
+			"InnoDB: Error: undo->id is %lu\n", (ulong) undo->id);
 			ut_error;
 		}
 
@@ -920,7 +920,7 @@ trx_purge_fetch_next_rec(
 			if (srv_print_thread_releases) {
 				printf(
 	"Purge: No logs left in the history list; pages handled %lu\n",
-					purge_sys->n_pages_handled);
+					(ulong) purge_sys->n_pages_handled);
 			}
 
 			mutex_exit(&(purge_sys->mutex));
@@ -1072,7 +1072,8 @@ trx_purge(void)
 	if (srv_print_thread_releases) {
 
 		printf(
-		"Purge ends; pages handled %lu\n", purge_sys->n_pages_handled);
+		"Purge ends; pages handled %lu\n",
+		(ulong) purge_sys->n_pages_handled);
 	}
 
 	return(purge_sys->n_pages_handled - old_pages_handled);
@@ -1089,16 +1090,16 @@ trx_purge_sys_print(void)
 	read_view_print(purge_sys->view);
 
 	fprintf(stderr, "InnoDB: Purge trx n:o %lu %lu, undo n_o %lu %lu\n",
-			ut_dulint_get_high(purge_sys->purge_trx_no),
-			ut_dulint_get_low(purge_sys->purge_trx_no),
-			ut_dulint_get_high(purge_sys->purge_undo_no),
-			ut_dulint_get_low(purge_sys->purge_undo_no));
+			(ulong) ut_dulint_get_high(purge_sys->purge_trx_no),
+			(ulong) ut_dulint_get_low(purge_sys->purge_trx_no),
+			(ulong) ut_dulint_get_high(purge_sys->purge_undo_no),
+			(ulong) ut_dulint_get_low(purge_sys->purge_undo_no));
 	fprintf(stderr,
 	"InnoDB: Purge next stored %lu, page_no %lu, offset %lu,\n"
 	"InnoDB: Purge hdr_page_no %lu, hdr_offset %lu\n",
-		purge_sys->next_stored,
-		purge_sys->page_no,
-		purge_sys->offset,
-		purge_sys->hdr_page_no,
-		purge_sys->hdr_offset);
+		(ulong) purge_sys->next_stored,
+		(ulong) purge_sys->page_no,
+		(ulong) purge_sys->offset,
+		(ulong) purge_sys->hdr_page_no,
+		(ulong) purge_sys->hdr_offset);
 }
diff --git a/innobase/trx/trx0rec.c b/innobase/trx/trx0rec.c
index 9d944e16a1e..bd37a4b506b 100644
--- a/innobase/trx/trx0rec.c
+++ b/innobase/trx/trx0rec.c
@@ -825,15 +825,16 @@ trx_undo_update_rec_get_update(
 			fprintf(stderr,
    "InnoDB: Error: trying to access update undo rec field %lu in table %s\n"
    "InnoDB: index %s, but index has only %lu fields\n",
-			field_no, index->table_name, index->name,
-			dict_index_get_n_fields(index));
+			(ulong) field_no, index->table_name, index->name,
+			(ulong) dict_index_get_n_fields(index));
   			fprintf(stderr,
    "InnoDB: Send a detailed bug report to mysql@lists.mysql.com");
    
   			fprintf(stderr,
    "InnoDB: Run also CHECK TABLE on table %s\n", index->table_name);
   			fprintf(stderr,
-   "InnoDB: n_fields = %lu, i = %lu, ptr %lx\n", n_fields, i, (ulint)ptr);
+   "InnoDB: n_fields = %lu, i = %lu, ptr %lx\n", (ulong) n_fields, (ulong) i,
+				(ulong) ptr);
 			return(NULL);
 		}
 
@@ -1336,17 +1337,18 @@ trx_undo_prev_version_build(
 		fprintf(stderr,
 			"InnoDB: Table name %s, index name %s, n_uniq %lu\n",
 			index->table_name, index->name,
-			dict_index_get_n_unique(index));
+			(ulong) dict_index_get_n_unique(index));
 		
 		fprintf(stderr,
 		"InnoDB: undo rec address %lx, type %lu cmpl_info %lu\n",
-					(ulint)undo_rec, type, cmpl_info);
+					(ulong) undo_rec, (ulong) type,
+					(ulong) cmpl_info);
 		fprintf(stderr,
 		"InnoDB: undo rec table id %lu %lu, index table id %lu %lu\n",
-			ut_dulint_get_high(table_id),
-			ut_dulint_get_low(table_id),
-			ut_dulint_get_high(index->table->id),
-			ut_dulint_get_low(index->table->id));
+			(ulong) ut_dulint_get_high(table_id),
+			(ulong) ut_dulint_get_low(table_id),
+			(ulong) ut_dulint_get_high(index->table->id),
+			(ulong) ut_dulint_get_low(index->table->id));
 		
 		ut_sprintf_buf(err_buf, undo_rec, 150);
 
@@ -1360,17 +1362,17 @@ trx_undo_prev_version_build(
 
 		fprintf(stderr,
 	"InnoDB: Record trx id %lu %lu, update rec trx id %lu %lu\n",
-		 	ut_dulint_get_high(rec_trx_id),
-		 	ut_dulint_get_low(rec_trx_id),
-		 	ut_dulint_get_high(trx_id),
-		 	ut_dulint_get_low(trx_id));
+		 	(ulong) ut_dulint_get_high(rec_trx_id),
+		 	(ulong) ut_dulint_get_low(rec_trx_id),
+		 	(ulong) ut_dulint_get_high(trx_id),
+		 	(ulong) ut_dulint_get_low(trx_id));
 
 		fprintf(stderr,
 	"InnoDB: Roll ptr in rec %lu %lu, in update rec %lu %lu\n",
-		 	ut_dulint_get_high(old_roll_ptr),
-		 	ut_dulint_get_low(old_roll_ptr),
-		 	ut_dulint_get_high(roll_ptr),
-		 	ut_dulint_get_low(roll_ptr));
+		 	(ulong) ut_dulint_get_high(old_roll_ptr),
+		 	(ulong) ut_dulint_get_low(old_roll_ptr),
+		 	(ulong) ut_dulint_get_high(roll_ptr),
+		 	(ulong) ut_dulint_get_low(roll_ptr));
 		 
 		trx_purge_sys_print();
 		 
diff --git a/innobase/trx/trx0roll.c b/innobase/trx/trx0roll.c
index c00c6f0c862..6a25304c7ef 100644
--- a/innobase/trx/trx0roll.c
+++ b/innobase/trx/trx0roll.c
@@ -392,8 +392,8 @@ loop:
 	
 	if (trx->conc_state == TRX_COMMITTED_IN_MEMORY) {	
 		fprintf(stderr, "InnoDB: Cleaning up trx with id %lu %lu\n",
-					ut_dulint_get_high(trx->id),
-					ut_dulint_get_low(trx->id));
+					(ulong) ut_dulint_get_high(trx->id),
+					(ulong) ut_dulint_get_low(trx->id));
 
 		trx_cleanup_at_db_startup(trx);
 					
@@ -428,9 +428,9 @@ loop:
 
 	fprintf(stderr,
 "InnoDB: Rolling back trx with id %lu %lu, %lu%s rows to undo",
-					ut_dulint_get_high(trx->id),
-					ut_dulint_get_low(trx->id),
-					(ulint)rows_to_undo, unit);
+					(ulong) ut_dulint_get_high(trx->id),
+					(ulong) ut_dulint_get_low(trx->id),
+					(ulong) rows_to_undo, unit);
 	mutex_exit(&kernel_mutex);
 
 	if (trx->dict_operation) {
@@ -447,7 +447,7 @@ loop:
 
 		fprintf(stderr,
 		"InnoDB: Waiting for rollback of trx id %lu to end\n",
-						ut_dulint_get_low(trx->id));
+						(ulong) ut_dulint_get_low(trx->id));
 		os_thread_sleep(100000);
 
 		mutex_enter(&kernel_mutex);
@@ -461,8 +461,8 @@ loop:
 
 		fprintf(stderr,
 "InnoDB: Dropping table with id %lu %lu in recovery if it exists\n",
-			ut_dulint_get_high(trx->table_id),
-			ut_dulint_get_low(trx->table_id));
+			(ulong) ut_dulint_get_high(trx->table_id),
+			(ulong) ut_dulint_get_low(trx->table_id));
 
 		table = dict_table_get_on_id_low(trx->table_id, trx);
 
@@ -481,8 +481,8 @@ loop:
 	}
 
 	fprintf(stderr, "\nInnoDB: Rolling back of trx id %lu %lu completed\n",
-					ut_dulint_get_high(trx->id),
-					ut_dulint_get_low(trx->id));
+					(ulong) ut_dulint_get_high(trx->id),
+					(ulong) ut_dulint_get_low(trx->id));
 	mem_heap_free(heap);
 
 	goto loop;
@@ -855,10 +855,10 @@ try_again:
 		if (progress_pct != trx_roll_progress_printed_pct) {
 			if (trx_roll_progress_printed_pct == 0) {
 				fprintf(stderr,
-			"\nInnoDB: Progress in percents: %lu", progress_pct);
+			"\nInnoDB: Progress in percents: %lu", (ulong) progress_pct);
 			} else {
 				fprintf(stderr,
-				" %lu", progress_pct);
+				" %lu", (ulong) progress_pct);
 			}
 			fflush(stderr);
 			trx_roll_progress_printed_pct = progress_pct;
@@ -1142,7 +1142,7 @@ trx_finish_rollback_off_kernel(
 
 	if (lock_print_waits) {			
 		printf("Trx %lu rollback finished\n",
-						ut_dulint_get_low(trx->id));
+						(ulong) ut_dulint_get_low(trx->id));
 	}
 
 	trx_commit_off_kernel(trx);
diff --git a/innobase/trx/trx0sys.c b/innobase/trx/trx0sys.c
index d4c14a5509c..e7439935b7e 100644
--- a/innobase/trx/trx0sys.c
+++ b/innobase/trx/trx0sys.c
@@ -26,6 +26,17 @@ Created 3/26/1996 Heikki Tuuri
 trx_sys_t*		trx_sys 	= NULL;
 trx_doublewrite_t*	trx_doublewrite = NULL;
 
+/* The following is set to TRUE when we are upgrading from the old format data
+files to the new >= 4.1.x format multiple tablespaces format data files */
+
+ibool			trx_doublewrite_must_reset_space_ids	= FALSE;
+
+/* The following is TRUE when we are using the database in the new format,
+i.e., we have successfully upgraded, or have created a new database
+installation */
+
+ibool			trx_sys_multiple_tablespace_format	= FALSE;
+
 /* In a MySQL replication slave, in crash recovery we store the master log
 file name and position here. We have successfully got the updates to InnoDB
 up to this position. If .._pos is -1, it means no crash recovery was needed,
@@ -34,45 +45,6 @@ or there was no master log position info inside InnoDB. */
 char 		trx_sys_mysql_master_log_name[TRX_SYS_MYSQL_LOG_NAME_LEN];
 ib_longlong	trx_sys_mysql_master_log_pos	= -1;
 
-/* Do NOT merge this to the 4.1 code base! */
-ibool		trx_sys_downgrading_from_4_1_1	= FALSE;
-
-/********************************************************************
-Do NOT merge this to the 4.1 code base!
-Marks the trx sys header when we have successfully downgraded from the >= 4.1.1
-multiple tablespace format back to the 4.0 format. */
-
-void
-trx_sys_mark_downgraded_from_4_1_1(void)
-/*====================================*/
-{
-	page_t*	page;
-	byte*	doublewrite;
-	mtr_t	mtr;
-
-	/* Let us mark to the trx_sys header that the downgrade has been
-	done. */
-
-	mtr_start(&mtr);
-
-	page = buf_page_get(TRX_SYS_SPACE, TRX_SYS_PAGE_NO, RW_X_LATCH, &mtr);
-#ifdef UNIV_SYNC_DEBUG
-	buf_page_dbg_add_level(page, SYNC_NO_ORDER_CHECK);
-#endif /* UNIV_SYNC_DEBUG */
-
-	doublewrite = page + TRX_SYS_DOUBLEWRITE;
-
-	mlog_write_ulint(doublewrite + TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED,
-				TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N + 1,
-				MLOG_4BYTES, &mtr);
-	mtr_commit(&mtr);
-		
-	/* Flush the modified pages to disk and make a checkpoint */
-	log_make_checkpoint_at(ut_dulint_max, TRUE);
-
-	trx_sys_downgrading_from_4_1_1 = FALSE;
-}
-
 /********************************************************************
 Determines if a page number is located inside the doublewrite buffer. */
 
@@ -114,11 +86,11 @@ trx_doublewrite_init(
 {
 	trx_doublewrite = mem_alloc(sizeof(trx_doublewrite_t));
 
-	/* When we have the doublewrite buffer in use, we do not need to
-	call os_file_flush (Unix fsync) after every write. */
-	
+	/* Since we now start to use the doublewrite buffer, no need to call
+	fsync() after every write to a data file */
+
 	os_do_not_call_flush_at_each_write = TRUE;
-	
+
 	mutex_create(&(trx_doublewrite->mutex));
 	mutex_set_level(&(trx_doublewrite->mutex), SYNC_DOUBLEWRITE);
 
@@ -144,7 +116,41 @@ trx_doublewrite_init(
 }
 
 /********************************************************************
-Creates the doublewrite buffer at a database start. The header of the
+Marks the trx sys header when we have successfully upgraded to the >= 4.1.x
+multiple tablespace format. */
+
+void
+trx_sys_mark_upgraded_to_multiple_tablespaces(void)
+/*===============================================*/
+{
+	page_t*	page;
+	byte*	doublewrite;
+	mtr_t	mtr;
+
+	/* We upgraded to 4.1.x and reset the space id fields in the
+	doublewrite buffer. Let us mark to the trx_sys header that the upgrade
+	has been done. */
+
+	mtr_start(&mtr);
+
+	page = buf_page_get(TRX_SYS_SPACE, TRX_SYS_PAGE_NO, RW_X_LATCH, &mtr);
+	buf_page_dbg_add_level(page, SYNC_NO_ORDER_CHECK);
+
+	doublewrite = page + TRX_SYS_DOUBLEWRITE;
+
+	mlog_write_ulint(doublewrite + TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED,
+				TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N,
+				MLOG_4BYTES, &mtr);
+	mtr_commit(&mtr);
+		
+	/* Flush the modified pages to disk and make a checkpoint */
+	log_make_checkpoint_at(ut_dulint_max, TRUE);
+
+	trx_sys_multiple_tablespace_format = TRUE;
+}
+
+/********************************************************************
+Creates the doublewrite buffer to a new InnoDB installation. The header of the
 doublewrite buffer is placed on the trx system header page. */
 
 void
@@ -179,31 +185,6 @@ start_again:
 	
 	if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC)
 					== TRX_SYS_DOUBLEWRITE_MAGIC_N) {
-		/* Do NOT merge to the 4.1 code base! */
-        	if (mach_read_from_4(doublewrite
-				+ TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED)
-            		== TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N) {
-			
-			fprintf(stderr,
-"InnoDB: You are downgrading from the multiple tablespace format of\n"
-"InnoDB: >= MySQL-4.1.1 back to the old format of MySQL-4.0.\n"
-"InnoDB:\n"
-"InnoDB: MAKE SURE that the mysqld server is idle, and purge and the insert\n"
-"InnoDB: buffer merge have run to completion under >= 4.1.1 before trying to\n"
-"InnoDB: downgrade! You can determine this by looking at SHOW INNODB STATUS:\n"
-"InnoDB: if the Main thread is 'waiting for server activity' and SHOW\n"
-"InnoDB: PROCESSLIST shows that you have ended all other connections\n"
-"InnoDB: to mysqld, then purge and the insert buffer merge have been\n"
-"InnoDB: completed.\n"
-"InnoDB: If you have already created tables in >= 4.1.1, then those\n"
-"InnoDB: tables cannot be used under 4.0.\n"
-"InnoDB: NOTE THAT this downgrade procedure has not been properly tested!\n"
-"InnoDB: The safe way to downgrade is to dump all InnoDB tables and recreate\n"
-"InnoDB: the whole tablespace.\n");
-
-			trx_sys_downgrading_from_4_1_1 = TRUE;
-		}
-
 		/* The doublewrite buffer has already been created:
 		just read in some numbers */
 
@@ -313,10 +294,15 @@ start_again:
 		}
 
 		mlog_write_ulint(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC,
-				TRX_SYS_DOUBLEWRITE_MAGIC_N, MLOG_4BYTES, &mtr);
+			TRX_SYS_DOUBLEWRITE_MAGIC_N, MLOG_4BYTES, &mtr);
 		mlog_write_ulint(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC
 						+ TRX_SYS_DOUBLEWRITE_REPEAT,
-				TRX_SYS_DOUBLEWRITE_MAGIC_N, MLOG_4BYTES, &mtr);
+			TRX_SYS_DOUBLEWRITE_MAGIC_N, MLOG_4BYTES, &mtr);
+
+		mlog_write_ulint(doublewrite
+				+ TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED,
+				TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N,
+				MLOG_4BYTES, &mtr);
 		mtr_commit(&mtr);
 		
 		/* Flush the modified pages to disk and make a checkpoint */
@@ -324,23 +310,31 @@ start_again:
 
 		fprintf(stderr, "InnoDB: Doublewrite buffer created\n");
 
+		trx_sys_multiple_tablespace_format = TRUE;
+
 		goto start_again;
 	}
 }
 
 /********************************************************************
-At a database startup uses a possible doublewrite buffer to restore
+At a database startup initializes the doublewrite buffer memory structure if
+we already have a doublewrite buffer created in the data files. If we are
+upgrading to an InnoDB version which supports multiple tablespaces, then this
+function performs the necessary update operations. If we are in a crash
+recovery, this function uses a possible doublewrite buffer to restore
 half-written pages in the data files. */
 
 void
-trx_sys_doublewrite_restore_corrupt_pages(void)
-/*===========================================*/
+trx_sys_doublewrite_init_or_restore_pages(
+/*======================================*/
+	ibool	restore_corrupt_pages)
 {
 	byte*	buf;
 	byte*	read_buf;
 	byte*	unaligned_read_buf;
 	ulint	block1;
 	ulint	block2;
+	ulint	source_page_no;
 	byte*	page;
 	byte*	doublewrite;
 	ulint	space_id;
@@ -352,43 +346,17 @@ trx_sys_doublewrite_restore_corrupt_pages(void)
 	unaligned_read_buf = ut_malloc(2 * UNIV_PAGE_SIZE);
 	read_buf = ut_align(unaligned_read_buf, UNIV_PAGE_SIZE);	
 
-	/* Read the trx sys header to check if we are using the
-	doublewrite buffer */
+	/* Read the trx sys header to check if we are using the doublewrite
+	buffer */
 
 	fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, TRX_SYS_PAGE_NO, 0,
 					UNIV_PAGE_SIZE, read_buf, NULL);
-
 	doublewrite = read_buf + TRX_SYS_DOUBLEWRITE;
 
 	if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC)
 					== TRX_SYS_DOUBLEWRITE_MAGIC_N) {
 		/* The doublewrite buffer has been created */
 		
-		/* Do NOT merge to the 4.1 code base! */
-        	if (mach_read_from_4(doublewrite
-				+ TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED)
-            		== TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N) {
-			
-			fprintf(stderr,
-"InnoDB: You are downgrading from the multiple tablespace format of\n"
-"InnoDB: >= MySQL-4.1.1 back to the old format of MySQL-4.0.\n"
-"InnoDB:\n"
-"InnoDB: MAKE SURE that the mysqld server is idle, and purge and the insert\n"
-"InnoDB: buffer merge have run to completion under >= 4.1.1 before trying to\n"
-"InnoDB: downgrade! You can determine this by looking at SHOW INNODB STATUS:\n"
-"InnoDB: if the Main thread is 'waiting for server activity' and SHOW\n"
-"InnoDB: PROCESSLIST shows that you have ended all other connections\n"
-"InnoDB: to mysqld, then purge and the insert buffer merge have been\n"
-"InnoDB: completed.\n"
-"InnoDB: If you have already created tables in >= 4.1.1, then those\n"
-"InnoDB: tables cannot be used under 4.0.\n"
-"InnoDB: NOTE THAT this downgrade procedure has not been properly tested!\n"
-"InnoDB: The safe way to downgrade is to dump all InnoDB tables and recreate\n"
-"InnoDB: the whole tablespace.\n");
-
-			trx_sys_downgrading_from_4_1_1 = TRUE;
-		}
-
 		trx_doublewrite_init(doublewrite);
 
 		block1 = trx_doublewrite->block1;
@@ -399,6 +367,23 @@ trx_sys_doublewrite_restore_corrupt_pages(void)
 		goto leave_func;
 	}
 
+	if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED)
+	    != TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N) {
+		        
+	        /* We are upgrading from a version < 4.1.x to a version where
+		multiple tablespaces are supported. We must reset the space id
+		field in the pages in the doublewrite buffer because starting
+		from this version the space id is stored to
+		FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID. */
+
+		trx_doublewrite_must_reset_space_ids = TRUE;
+
+		fprintf(stderr,
+"InnoDB: Resetting space id's in the doublewrite buffer\n");
+	} else {
+		trx_sys_multiple_tablespace_format = TRUE;
+	}
+
 	/* Read the pages from the doublewrite buffer to memory */
 
 	fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, block1, 0,
@@ -416,13 +401,46 @@ trx_sys_doublewrite_restore_corrupt_pages(void)
 	for (i = 0; i < TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 2; i++) {
 		
 		page_no = mach_read_from_4(page + FIL_PAGE_OFFSET);
-		space_id = 0;
 
-		if (!fil_check_adress_in_tablespace(space_id, page_no)) {
+		if (trx_doublewrite_must_reset_space_ids) {
+
+		        space_id = 0;
+			mach_write_to_4(page
+					+ FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, 0);
+			/* We do not need to calculate new checksums for the
+			pages because the field .._SPACE_ID does not affect
+			them. Write the page back to where we read it from. */
+
+			if (i < TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
+			        source_page_no = block1 + i;
+			} else {
+				source_page_no = block2
+					+ i - TRX_SYS_DOUBLEWRITE_BLOCK_SIZE;
+			}
+
+			fil_io(OS_FILE_WRITE, TRUE, 0, source_page_no, 0,
+					      UNIV_PAGE_SIZE, page, NULL);
+			/* printf("Resetting space id in page %lu\n",
+						   source_page_no); */
+		} else {
+		        space_id = mach_read_from_4(
+				page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
+		}
+
+		if (!restore_corrupt_pages) {
+			/* The database was shut down gracefully: no need to
+			restore pages */
+
+		} else if (!fil_tablespace_exists_in_mem(space_id)) {
+			/* Maybe we have dropped the single-table tablespace
+			and this page once belonged to it: do nothing */
+
+		} else if (!fil_check_adress_in_tablespace(space_id,
+								page_no)) {
 		  	fprintf(stderr,
-	"InnoDB: Warning: an inconsistent page in the doublewrite buffer\n"
-	"InnoDB: space id %lu page number %lu, %lu'th page in dblwr buf.\n",
-				space_id, page_no, i);
+"InnoDB: Warning: a page in the doublewrite buffer is not within space\n"
+"InnoDB: bounds; space id %lu page number %lu, page %lu in doublewrite buf.\n",
+				(ulong) space_id, (ulong) page_no, (ulong) i);
 		
 		} else if (space_id == TRX_SYS_SPACE
 		    && (  (page_no >= block1
@@ -445,7 +463,7 @@ trx_sys_doublewrite_restore_corrupt_pages(void)
 
 		  		fprintf(stderr,
 		"InnoDB: Warning: database page corruption or a failed\n"
-		"InnoDB: file read of page %lu.\n", page_no);
+		"InnoDB: file read of page %lu.\n", (ulong) page_no);
 		  		fprintf(stderr,
 		"InnoDB: Trying to recover it from the doublewrite buffer.\n");
 				
@@ -579,8 +597,8 @@ trx_sys_update_mysql_binlog_offset(
 	if (0 != ut_memcmp(sys_header + field + TRX_SYS_MYSQL_LOG_NAME,
 			file_name, 1 + ut_strlen(file_name))) {
 
-		mlog_write_string((byte*) (sys_header + field
-					+ TRX_SYS_MYSQL_LOG_NAME),
+		mlog_write_string(sys_header + field
+					+ TRX_SYS_MYSQL_LOG_NAME,
 			(byte*) file_name, 1 + ut_strlen(file_name), mtr);
 	}
 
@@ -596,8 +614,8 @@ trx_sys_update_mysql_binlog_offset(
 
 	mlog_write_ulint(sys_header + field
 					+ TRX_SYS_MYSQL_LOG_OFFSET_LOW,
-				(ulint)(offset & 0xFFFFFFFF),
-				MLOG_4BYTES, mtr);				
+				(ulint)(offset & 0xFFFFFFFFUL),
+				MLOG_4BYTES, mtr);
 }
 
 /*********************************************************************
@@ -620,9 +638,9 @@ trx_sys_print_mysql_binlog_offset_from_page(
 
 		printf(
 	"ibbackup: Last MySQL binlog file position %lu %lu, file name %s\n",
-		mach_read_from_4(sys_header + TRX_SYS_MYSQL_LOG_INFO
+		(ulong) mach_read_from_4(sys_header + TRX_SYS_MYSQL_LOG_INFO
 					+ TRX_SYS_MYSQL_LOG_OFFSET_HIGH),
-		mach_read_from_4(sys_header + TRX_SYS_MYSQL_LOG_INFO
+		(ulong) mach_read_from_4(sys_header + TRX_SYS_MYSQL_LOG_INFO
 					+ TRX_SYS_MYSQL_LOG_OFFSET_LOW),
 		sys_header + TRX_SYS_MYSQL_LOG_INFO + TRX_SYS_MYSQL_LOG_NAME);
 	}
@@ -654,9 +672,9 @@ trx_sys_print_mysql_binlog_offset(void)
 
 	fprintf(stderr,
 	"InnoDB: Last MySQL binlog file position %lu %lu, file name %s\n",
-		mach_read_from_4(sys_header + TRX_SYS_MYSQL_LOG_INFO
+		(ulong) mach_read_from_4(sys_header + TRX_SYS_MYSQL_LOG_INFO
 					+ TRX_SYS_MYSQL_LOG_OFFSET_HIGH),
-		mach_read_from_4(sys_header + TRX_SYS_MYSQL_LOG_INFO
+		(ulong) mach_read_from_4(sys_header + TRX_SYS_MYSQL_LOG_INFO
 					+ TRX_SYS_MYSQL_LOG_OFFSET_LOW),
 		sys_header + TRX_SYS_MYSQL_LOG_INFO + TRX_SYS_MYSQL_LOG_NAME);
 
@@ -690,9 +708,9 @@ trx_sys_print_mysql_master_log_pos(void)
 	fprintf(stderr,
 "InnoDB: In a MySQL replication slave the last master binlog file\n"
 "InnoDB: position %lu %lu, file name %s\n",
-		mach_read_from_4(sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
+		(ulong) mach_read_from_4(sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
 					+ TRX_SYS_MYSQL_LOG_OFFSET_HIGH),
-		mach_read_from_4(sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
+		(ulong) mach_read_from_4(sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
 					+ TRX_SYS_MYSQL_LOG_OFFSET_LOW),
 		sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
 						+ TRX_SYS_MYSQL_LOG_NAME);
@@ -867,12 +885,12 @@ trx_sys_init_at_db_start(void)
 		fprintf(stderr,
 "InnoDB: %lu transaction(s) which must be rolled back or cleaned up\n"
 "InnoDB: in total %lu%s row operations to undo\n",
-				UT_LIST_GET_LEN(trx_sys->trx_list),
-				(ulint)rows_to_undo, unit);
+				(ulong) UT_LIST_GET_LEN(trx_sys->trx_list),
+				(ulong) rows_to_undo, unit);
 
 		fprintf(stderr, "InnoDB: Trx id counter is %lu %lu\n", 
-			ut_dulint_get_high(trx_sys->max_trx_id),
-			ut_dulint_get_low(trx_sys->max_trx_id));
+			(ulong) ut_dulint_get_high(trx_sys->max_trx_id),
+			(ulong) ut_dulint_get_low(trx_sys->max_trx_id));
 	}
 
 	UT_LIST_INIT(trx_sys->view_list);
diff --git a/innobase/trx/trx0trx.c b/innobase/trx/trx0trx.c
index 9b6e6904537..b509d80e452 100644
--- a/innobase/trx/trx0trx.c
+++ b/innobase/trx/trx0trx.c
@@ -107,7 +107,7 @@ trx_create(
 
 	trx->mysql_log_file_name = NULL;
 	trx->mysql_log_offset = 0;
-	trx->mysql_master_log_file_name = (char*) "";
+	trx->mysql_master_log_file_name = (char*)"";
 	trx->mysql_master_log_pos = 0;
 	
 	mutex_create(&(trx->undo_mutex));
@@ -1573,26 +1573,26 @@ trx_print(
         char*   start_of_line;
 
         buf += sprintf(buf, "TRANSACTION %lu %lu",
-		ut_dulint_get_high(trx->id),
-		 ut_dulint_get_low(trx->id));
+		(ulong) ut_dulint_get_high(trx->id),
+		 (ulong) ut_dulint_get_low(trx->id));
 
   	switch (trx->conc_state) {
   		case TRX_NOT_STARTED:         buf += sprintf(buf,
 						", not started"); break;
   		case TRX_ACTIVE:              buf += sprintf(buf,
 						", ACTIVE %lu sec",
-			 (ulint)difftime(time(NULL), trx->start_time)); break;
+			 (ulong) difftime(time(NULL), trx->start_time)); break;
   		case TRX_COMMITTED_IN_MEMORY: buf += sprintf(buf,
 						", COMMITTED IN MEMORY");
 									break;
-  		default: buf += sprintf(buf, " state %lu", trx->conc_state);
+  		default: buf += sprintf(buf, " state %lu", (ulong) trx->conc_state);
   	}
 
 #ifdef UNIV_LINUX
         buf += sprintf(buf, ", process no %lu", trx->mysql_process_no);
 #endif
         buf += sprintf(buf, ", OS thread id %lu",
-		       os_thread_pf(trx->mysql_thread_id));
+		       (ulong) os_thread_pf(trx->mysql_thread_id));
 
 	if (ut_strlen(trx->op_info) > 0) {
 		buf += sprintf(buf, " %s", trx->op_info);
@@ -1604,18 +1604,18 @@ trx_print(
 
 	if (trx->declared_to_be_inside_innodb) {
 	        buf += sprintf(buf, ", thread declared inside InnoDB %lu",
-			       trx->n_tickets_to_enter_innodb);
+			       (ulong) trx->n_tickets_to_enter_innodb);
 	}
 
 	buf += sprintf(buf, "\n");
+  	
+        if (trx->n_mysql_tables_in_use > 0 || trx->mysql_n_tables_locked > 0) {
 
-	if (trx->n_mysql_tables_in_use > 0 || trx->mysql_n_tables_locked > 0) {
+                buf += sprintf(buf, "mysql tables in use %lu, locked %lu\n",
+                                    (ulong) trx->n_mysql_tables_in_use,
+                                    (ulong) trx->mysql_n_tables_locked);
+        }
 
-		buf += sprintf(buf, "mysql tables in use %lu, locked %lu\n",
-				    trx->n_mysql_tables_in_use,
-				    trx->mysql_n_tables_locked);
-	}
-  	
 	start_of_line = buf;
 
   	switch (trx->que_state) {
@@ -1626,7 +1626,7 @@ trx_print(
 						"ROLLING BACK "); break;
   		case TRX_QUE_COMMITTING:      buf += sprintf(buf,
 						"COMMITTING "); break;
-  		default: buf += sprintf(buf, "que state %lu", trx->que_state);
+  		default: buf += sprintf(buf, "que state %lu", (ulong) trx->que_state);
   	}
 
   	if (0 < UT_LIST_GET_LEN(trx->trx_locks) ||
@@ -1634,8 +1634,8 @@ trx_print(
 
   		buf += sprintf(buf,
 "%lu lock struct(s), heap size %lu",
-			       UT_LIST_GET_LEN(trx->trx_locks),
-			       mem_heap_get_size(trx->lock_heap));
+			       (ulong) UT_LIST_GET_LEN(trx->trx_locks),
+			       (ulong) mem_heap_get_size(trx->lock_heap));
 	}
 
   	if (trx->has_search_latch) {
@@ -1644,7 +1644,7 @@ trx_print(
 
 	if (ut_dulint_cmp(trx->undo_no, ut_dulint_zero) != 0) {
 		buf += sprintf(buf, ", undo log entries %lu",
-			ut_dulint_get_low(trx->undo_no));
+			(ulong) ut_dulint_get_low(trx->undo_no));
 	}
 	
 	if (buf != start_of_line) {
diff --git a/innobase/trx/trx0undo.c b/innobase/trx/trx0undo.c
index de3da382e83..f2b7227d84a 100644
--- a/innobase/trx/trx0undo.c
+++ b/innobase/trx/trx0undo.c
@@ -387,6 +387,7 @@ trx_undo_seg_create(
 	page_t* 	undo_page;
 	trx_upagef_t*	page_hdr;
 	trx_usegf_t*	seg_hdr;
+	ulint		n_reserved;
 	ibool		success;
 	
 	ut_ad(mtr && id && rseg_hdr);
@@ -413,8 +414,8 @@ trx_undo_seg_create(
 
 	space = buf_frame_get_space_id(rseg_hdr);
 
-	success = fsp_reserve_free_extents(space, 2, FSP_UNDO, mtr);
-	
+	success = fsp_reserve_free_extents(&n_reserved, space, 2, FSP_UNDO,
+									mtr);
 	if (!success) {
 
 		return(NULL);
@@ -424,7 +425,7 @@ trx_undo_seg_create(
 	undo_page = fseg_create_general(space, 0,
 			TRX_UNDO_SEG_HDR + TRX_UNDO_FSEG_HEADER, TRUE, mtr);
 
-	fil_space_release_free_extents(space, 2);
+	fil_space_release_free_extents(space, n_reserved);
 			
 	if (undo_page == NULL) {
 		/* No space left */
@@ -737,6 +738,7 @@ trx_undo_add_page(
 	page_t*		new_page;
 	trx_rseg_t*	rseg;
 	ulint		page_no;
+	ulint		n_reserved;
 	ibool		success;
 	
 #ifdef UNIV_SYNC_DEBUG
@@ -754,8 +756,8 @@ trx_undo_add_page(
 
 	header_page = trx_undo_page_get(undo->space, undo->hdr_page_no, mtr);
 
-	success = fsp_reserve_free_extents(undo->space, 1, FSP_UNDO, mtr);
-
+	success = fsp_reserve_free_extents(&n_reserved, undo->space, 1,
+							FSP_UNDO, mtr);
 	if (!success) {
 
 		return(FIL_NULL);
@@ -766,7 +768,7 @@ trx_undo_add_page(
 					undo->top_page_no + 1, FSP_UP,
 					TRUE, mtr);
 
-	fil_space_release_free_extents(undo->space, 1);
+	fil_space_release_free_extents(undo->space, n_reserved);
 					
 	if (page_no == FIL_NULL) {
 
@@ -1127,7 +1129,7 @@ trx_undo_mem_create_at_db_start(
 	
 	if (id >= TRX_RSEG_N_SLOTS) {
 		fprintf(stderr,
-		"InnoDB: Error: undo->id is %lu\n", id);
+		"InnoDB: Error: undo->id is %lu\n", (ulong) id);
 		ut_error;
 	}
 
@@ -1285,7 +1287,7 @@ trx_undo_mem_create(
 
 	if (id >= TRX_RSEG_N_SLOTS) {
 		fprintf(stderr,
-		"InnoDB: Error: undo->id is %lu\n", id);
+		"InnoDB: Error: undo->id is %lu\n", (ulong) id);
 		ut_error;
 	}
 
@@ -1330,7 +1332,8 @@ trx_undo_mem_init_for_reuse(
 #endif /* UNIV_SYNC_DEBUG */
 	
  	if (undo->id >= TRX_RSEG_N_SLOTS) {
-		fprintf(stderr, "InnoDB: Error: undo->id is %lu\n", undo->id);
+		fprintf(stderr, "InnoDB: Error: undo->id is %lu\n",
+			(ulong) undo->id);
 
 		mem_analyze_corruption((byte*)undo);
 		ut_error;
@@ -1356,7 +1359,7 @@ trx_undo_mem_free(
 {
 	if (undo->id >= TRX_RSEG_N_SLOTS) {
 		fprintf(stderr,
-		"InnoDB: Error: undo->id is %lu\n", undo->id);
+		"InnoDB: Error: undo->id is %lu\n", (ulong) undo->id);
 		ut_error;
 	}
 
@@ -1466,7 +1469,8 @@ trx_undo_reuse_cached(
 	ut_ad(undo->size == 1);
 
 	if (undo->id >= TRX_RSEG_N_SLOTS) {
-		fprintf(stderr, "InnoDB: Error: undo->id is %lu\n", undo->id);
+		fprintf(stderr, "InnoDB: Error: undo->id is %lu\n",
+			(ulong) undo->id);
 		mem_analyze_corruption((byte*)undo);
 		ut_error;
 	}
@@ -1602,7 +1606,8 @@ trx_undo_set_state_at_finish(
 	ut_ad(trx && undo && mtr);
 
 	if (undo->id >= TRX_RSEG_N_SLOTS) {
-		fprintf(stderr, "InnoDB: Error: undo->id is %lu\n", undo->id);
+		fprintf(stderr, "InnoDB: Error: undo->id is %lu\n",
+			(ulong) undo->id);
 		mem_analyze_corruption((byte*)undo);
 		ut_error;
 	}
diff --git a/innobase/ut/ut0byte.c b/innobase/ut/ut0byte.c
index 02bdf2065ee..74198419560 100644
--- a/innobase/ut/ut0byte.c
+++ b/innobase/ut/ut0byte.c
@@ -18,7 +18,7 @@ Created 5/11/1994 Heikki Tuuri
 dulint	ut_dulint_zero 		= {0, 0};
 
 /* Maximum value for a dulint */
-dulint	ut_dulint_max 		= {0xFFFFFFFF, 0xFFFFFFFF};
+dulint	ut_dulint_max 		= {0xFFFFFFFFUL, 0xFFFFFFFFUL};
 
 /****************************************************************
 Sort function for dulint arrays. */
diff --git a/innobase/ut/ut0mem.c b/innobase/ut/ut0mem.c
index eca738f0924..65229335a09 100644
--- a/innobase/ut/ut0mem.c
+++ b/innobase/ut/ut0mem.c
@@ -77,8 +77,9 @@ ut_malloc_low(
 	ret = malloc(n + sizeof(ut_mem_block_t));
 
 	if (ret == NULL) {
+		ut_print_timestamp(stderr);
 		fprintf(stderr,
-		"InnoDB: Fatal error: cannot allocate %lu bytes of\n"
+		"  InnoDB: Fatal error: cannot allocate %lu bytes of\n"
 		"InnoDB: memory with malloc! Total allocated memory\n"
 		"InnoDB: by InnoDB %lu bytes. Operating system errno: %lu\n"
 		"InnoDB: Cannot continue operation!\n"
@@ -88,11 +89,11 @@ ut_malloc_low(
 		"InnoDB: a big enough maximum process size.\n"
 		"InnoDB: We now intentionally generate a seg fault so that\n"
 		"InnoDB: on Linux we get a stack trace.\n",
-		                  n, ut_total_allocated_memory,
+		                  (ulong) n, (ulong) ut_total_allocated_memory,
 #ifdef __WIN__
-			(ulint)GetLastError()
+			(ulong) GetLastError()
 #else
-			(ulint)errno
+			(ulong) errno
 #endif
 			);
 
@@ -106,7 +107,7 @@ ut_malloc_low(
 
 		/* Make an intentional seg fault so that we get a stack
 		trace */
-		printf("%lu\n", *ut_mem_null_ptr);	
+		printf("%lu\n", (ulong) *ut_mem_null_ptr);
 	}		
 
 	if (set_to_zero) {
@@ -141,6 +142,42 @@ ut_malloc(
 }
 
 /**************************************************************************
+Tests if malloc of n bytes would succeed. ut_malloc() asserts if memory runs
+out. It cannot be used if we want to return an error message. Prints to
+stderr a message if fails. */
+
+ibool
+ut_test_malloc(
+/*===========*/
+			/* out: TRUE if succeeded */
+	ulint	n)	/* in: try to allocate this many bytes */
+{
+	void*	ret;
+
+	ret = malloc(n);
+
+	if (ret == NULL) {
+		ut_print_timestamp(stderr);
+		fprintf(stderr,
+		"  InnoDB: Error: cannot allocate %lu bytes of memory for\n"
+		"InnoDB: a BLOB with malloc! Total allocated memory\n"
+		"InnoDB: by InnoDB %lu bytes. Operating system errno: %d\n"
+		"InnoDB: Check if you should increase the swap file or\n"
+		"InnoDB: ulimits of your operating system.\n"
+		"InnoDB: On FreeBSD check you have compiled the OS with\n"
+		"InnoDB: a big enough maximum process size.\n",
+		                  (ulong) n,
+			          (ulong) ut_total_allocated_memory,
+				  (int) errno);
+		return(FALSE);
+	}
+
+	free(ret);
+
+	return(TRUE);
+}	
+
+/**************************************************************************
 Frees a memory block allocated with ut_malloc. */
 
 void
@@ -190,7 +227,7 @@ ut_free_all_mem(void)
 	if (ut_total_allocated_memory != 0) {
 		fprintf(stderr,
 "InnoDB: Warning: after shutdown total allocated memory is %lu\n",
-		  ut_total_allocated_memory);
+		  (ulong) ut_total_allocated_memory);
 	}
 }
 
diff --git a/innobase/ut/ut0rnd.c b/innobase/ut/ut0rnd.c
index 3335861384f..85d2e6094c3 100644
--- a/innobase/ut/ut0rnd.c
+++ b/innobase/ut/ut0rnd.c
@@ -71,9 +71,8 @@ ut_find_prime(
 		
 		/* Found a prime */
 		break;
-	next_n: ;
+next_n: 	;
 	}
 	
 	return(n);
 }
-
diff --git a/innobase/ut/ut0ut.c b/innobase/ut/ut0ut.c
index be311764261..77f7a997777 100644
--- a/innobase/ut/ut0ut.c
+++ b/innobase/ut/ut0ut.c
@@ -20,26 +20,224 @@ Created 5/11/1994 Heikki Tuuri
 ibool	ut_always_false	= FALSE;
 
 /************************************************************
-Uses vsprintf to emulate sprintf so that the function always returns
-the printed length. Apparently in some old SCO Unixes sprintf did not
-return the printed length but a pointer to the end of the printed string. */
+On the 64-bit Windows we substitute the format string
+%l -> %I64
+because we define ulint as unsigned __int64 and lint as __int64 on Windows,
+and both the Microsoft and Intel C compilers require the format string
+%I64 in that case instead of %l. */
+
+int
+ut_printf(
+/*======*/
+			     /* out: the number of characters written, or
+			     negative in case of an error */
+        const char* format,  /* in: format of prints */
+        ...)                 /* in: arguments to be printed */
+{
+        va_list	args;
+	ulint	len;
+	char*	format_end;
+	char*	newformat;	
+	char*	ptr;
+	char*	newptr;
+	int	ret;
+	char	format_buf_in_stack[500];
+
+	len = strlen(format);
+
+	if (len > 250) {
+		newformat = malloc(2 * len);
+	} else {
+		newformat = format_buf_in_stack;
+	}
 
-ulint
+	format_end = (char*)format + len;
+
+	ptr = (char*)format;
+	newptr = newformat;
+
+#if defined(__WIN__) && (defined(WIN64) || defined(_WIN64))
+	/* Replace %l with %I64 if it is not preceded with '\' */
+
+	while (ptr < format_end) {
+		if (*ptr == '%' && *(ptr + 1) == 'l'
+		    && (ptr == format || *(ptr - 1) != '\\')) {
+			
+			memcpy(newptr, "%I64", 4);
+			ptr += 2;
+			newptr += 4;
+		} else {
+			*newptr = *ptr;
+			ptr++;
+			newptr++;
+		}
+	}
+
+	*newptr = '\0';
+	
+	ut_a(newptr < newformat + 2 * len);
+#else
+	strcpy(newformat, format);
+#endif
+        va_start(args, format);
+
+        ret = vprintf((const char*)newformat, args);
+
+        va_end(args);
+
+	if (newformat != format_buf_in_stack) {
+		free(newformat);
+	}
+
+        return(ret);
+}
+
+/************************************************************
+On the 64-bit Windows we substitute the format string
+%l -> %I64
+because we define ulint as unsigned __int64 and lint as __int64 on Windows,
+and both the Microsoft and Intel C compilers require the format string
+%I64 in that case instead of %l. */
+
+int
 ut_sprintf(
 /*=======*/
-        char*       buf,     /* in/out: buffer where to print */
+			     /* out: the number of characters written, or
+			     negative in case of an error */
+	char*	    buf,     /* in: buffer where to print */
         const char* format,  /* in: format of prints */
         ...)                 /* in: arguments to be printed */
 {
-        va_list   args;
-  
+        va_list	args;
+	ulint	len;
+	char*	format_end;
+	char*	newformat;	
+	char*	ptr;
+	char*	newptr;
+	int	ret;
+	char	format_buf_in_stack[500];
+
+	len = strlen(format);
+
+	if (len > 250) {
+		newformat = malloc(2 * len);
+	} else {
+		newformat = format_buf_in_stack;
+	}
+
+	format_end = (char*)format + len;
+
+	ptr = (char*)format;
+	newptr = newformat;
+
+#if defined(__WIN__) && (defined(WIN64) || defined(_WIN64))
+	/* Replace %l with %I64 if it is not preceded with '\' */
+
+	while (ptr < format_end) {
+		if (*ptr == '%' && *(ptr + 1) == 'l'
+		    && (ptr == format || *(ptr - 1) != '\\')) {
+			
+			memcpy(newptr, "%I64", 4);
+			ptr += 2;
+			newptr += 4;
+		} else {
+			*newptr = *ptr;
+			ptr++;
+			newptr++;
+		}
+	}
+
+	*newptr = '\0';
+	
+	ut_a(newptr < newformat + 2 * len);
+#else
+	strcpy(newformat, format);
+#endif
         va_start(args, format);
 
-        vsprintf(buf, format, args);
+        ret = vsprintf(buf, (const char*)newformat, args);
 
         va_end(args);
 
-        return((ulint)strlen(buf));
+	if (newformat != format_buf_in_stack) {
+		free(newformat);
+	}
+
+        return(ret);
+}
+
+/************************************************************
+On the 64-bit Windows we substitute the format string
+%l -> %I64
+because we define ulint as unsigned __int64 and lint as __int64 on Windows,
+and both the Microsoft and Intel C compilers require the format string
+%I64 in that case instead of %l. */
+
+int
+ut_fprintf(
+/*=======*/
+			     /* out: the number of characters written, or
+			     negative in case of an error */
+	FILE*	    stream,  /* in: stream where to print */
+        const char* format,  /* in: format of prints */
+        ...)                 /* in: arguments to be printed */
+{
+        va_list	args;
+	ulint	len;
+	char*	format_end;
+	char*	newformat;	
+	char*	ptr;
+	char*	newptr;
+	int	ret;
+	char	format_buf_in_stack[500];
+
+	len = strlen(format);
+
+	if (len > 250) {
+		newformat = malloc(2 * len);
+	} else {
+		newformat = format_buf_in_stack;
+	}
+
+	format_end = (char*)format + len;
+
+	ptr = (char*)format;
+	newptr = newformat;
+
+#if defined(__WIN__) && (defined(WIN64) || defined(_WIN64))
+	/* Replace %l with %I64 if it is not preceded with '\' */
+
+	while (ptr < format_end) {
+		if (*ptr == '%' && *(ptr + 1) == 'l'
+		    && (ptr == format || *(ptr - 1) != '\\')) {
+			
+			memcpy(newptr, "%I64", 4);
+			ptr += 2;
+			newptr += 4;
+		} else {
+			*newptr = *ptr;
+			ptr++;
+			newptr++;
+		}
+	}
+
+	*newptr = '\0';
+	
+	ut_a(newptr < newformat + 2 * len);
+#else
+	strcpy(newformat, format);
+#endif
+        va_start(args, format);
+
+        ret = vfprintf(stream, (const char*)newformat, args);
+
+        va_end(args);
+
+	if (newformat != format_buf_in_stack) {
+		free(newformat);
+	}
+
+        return(ret);
 }
 
 /************************************************************
@@ -63,7 +261,7 @@ ut_get_high32(
 }
 
 /************************************************************
-The following function returns a clock time in milliseconds. */
+The following function returns elapsed CPU time in milliseconds. */
 
 ulint
 ut_clock(void)
@@ -182,6 +380,50 @@ ut_sprintf_timestamp(
 }
 
 /**************************************************************
+Sprintfs a timestamp to a buffer with no spaces and with ':' characters
+replaced by '_'. */
+
+void
+ut_sprintf_timestamp_without_extra_chars(
+/*=====================================*/
+	char*	buf) /* in: buffer where to sprintf */
+{
+#ifdef __WIN__
+  	SYSTEMTIME cal_tm;
+
+  	GetLocalTime(&cal_tm);
+
+  	sprintf(buf, "%02d%02d%02d_%2d_%02d_%02d",
+	  (int)cal_tm.wYear % 100,
+	  (int)cal_tm.wMonth,
+	  (int)cal_tm.wDay,
+	  (int)cal_tm.wHour,
+	  (int)cal_tm.wMinute,
+	  (int)cal_tm.wSecond);
+#else
+	struct tm  cal_tm;
+  	struct tm* cal_tm_ptr;
+  	time_t     tm;
+
+  	time(&tm);
+
+#ifdef HAVE_LOCALTIME_R
+  	localtime_r(&tm, &cal_tm);
+  	cal_tm_ptr = &cal_tm;
+#else
+  	cal_tm_ptr = localtime(&tm);
+#endif
+  	sprintf(buf, "%02d%02d%02d_%2d_%02d_%02d",
+	  cal_tm_ptr->tm_year % 100,
+	  cal_tm_ptr->tm_mon + 1,
+	  cal_tm_ptr->tm_mday,
+	  cal_tm_ptr->tm_hour,
+	  cal_tm_ptr->tm_min,
+	  cal_tm_ptr->tm_sec);
+#endif
+}
+
+/**************************************************************
 Returns current year, month, day. */
 
 void
@@ -232,7 +474,7 @@ ut_delay(
 	}
 
 	if (ut_always_false) {
-		printf("%lu", j);
+		printf("%lu", (ulong) j);
 	}
 	
 	return(j);
@@ -250,12 +492,12 @@ ut_print_buf(
 	byte*	data;
 	ulint	i;
 
-	printf(" len %lu; hex ", len);
+	printf(" len %lu; hex ", (ulong) len);
 			
 	data = buf;
 
 	for (i = 0; i < len; i++) {
-		printf("%02lx", (ulint)*data);
+		printf("%02lx", (ulong) *data);
 		data++;
 	}
 
@@ -290,12 +532,12 @@ ut_sprintf_buf(
 
 	n = 0;
 	
-	n += sprintf(str + n, " len %lu; hex ", len);
+	n += sprintf(str + n, " len %lu; hex ", (ulong) len);
 			
 	data = buf;
 
 	for (i = 0; i < len; i++) {
-		n += sprintf(str + n, "%02lx", (ulint)*data);
+		n += sprintf(str + n, "%02lx", (ulong) *data);
 		data++;
 	}