57 files changed, 2894 insertions, 219 deletions
diff --git a/drm/nouveau/dispnv04/dac.c b/drm/nouveau/dispnv04/dac.c
index b48eec395..c02f8c864 100644
--- a/drm/nouveau/dispnv04/dac.c
+++ b/drm/nouveau/dispnv04/dac.c
@@ -549,8 +549,12 @@ nv04_dac_create(struct drm_connector *connector, struct dcb_output *entry)
 	else
 		helper = &nv04_dac_helper_funcs;
 
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,5,0)
 	drm_encoder_init(dev, encoder, &nv04_dac_funcs, DRM_MODE_ENCODER_DAC,
 			 NULL);
+#else
+	drm_encoder_init(dev, encoder, &nv04_dac_funcs, DRM_MODE_ENCODER_DAC);
+#endif
 	drm_encoder_helper_add(encoder, helper);
 
 	encoder->possible_crtcs = entry->heads;
diff --git a/drm/nouveau/dispnv04/dfp.c b/drm/nouveau/dispnv04/dfp.c
index 05bfd151d..3f88afa4e 100644
--- a/drm/nouveau/dispnv04/dfp.c
+++ b/drm/nouveau/dispnv04/dfp.c
@@ -705,7 +705,11 @@ nv04_dfp_create(struct drm_connector *connector, struct dcb_output *entry)
 	nv_encoder->dcb = entry;
 	nv_encoder->or = ffs(entry->or) - 1;
 
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,5,0)
 	drm_encoder_init(connector->dev, encoder, &nv04_dfp_funcs, type, NULL);
+#else
+	drm_encoder_init(connector->dev, encoder, &nv04_dfp_funcs, type);
+#endif
 	drm_encoder_helper_add(encoder, helper);
 
 	encoder->possible_crtcs = entry->heads;
diff --git a/drm/nouveau/dispnv04/tvnv04.c b/drm/nouveau/dispnv04/tvnv04.c
index 54e9fb9eb..fd6768f9e 100644
--- a/drm/nouveau/dispnv04/tvnv04.c
+++ b/drm/nouveau/dispnv04/tvnv04.c
@@ -223,8 +223,12 @@ nv04_tv_create(struct drm_connector *connector, struct dcb_output *entry)
 	/* Initialize the common members */
 	encoder = to_drm_encoder(nv_encoder);
 
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,5,0)
 	drm_encoder_init(dev, encoder, &nv04_tv_funcs, DRM_MODE_ENCODER_TVDAC,
 			 NULL);
+#else
+	drm_encoder_init(dev, encoder, &nv04_tv_funcs, DRM_MODE_ENCODER_TVDAC);
+#endif
 	drm_encoder_helper_add(encoder, &nv04_tv_helper_funcs);
 
 	nv_encoder->enc_save = drm_i2c_encoder_save;
diff --git a/drm/nouveau/dispnv04/tvnv17.c b/drm/nouveau/dispnv04/tvnv17.c
index 163317d26..31678682b 100644
--- a/drm/nouveau/dispnv04/tvnv17.c
+++ b/drm/nouveau/dispnv04/tvnv17.c
@@ -24,6 +24,8 @@
  *
  */
 
+#include <linux/version.h>
+
 #include <drm/drmP.h>
 #include <drm/drm_crtc_helper.h>
 #include "nouveau_drm.h"
@@ -656,7 +658,9 @@ static int nv17_tv_create_resources(struct drm_encoder *encoder,
 				nouveau_tv_norm);
 	}
 
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,19,0)
 	drm_mode_create_tv_properties(dev, num_tv_norms, nv17_tv_norm_names);
+#endif
 
 	drm_object_attach_property(&connector->base,
 					conf->tv_select_subconnector_property,
@@ -814,10 +818,16 @@ nv17_tv_create(struct drm_connector *connector, struct dcb_output *entry)
 	tv_enc->base.dcb = entry;
 	tv_enc->base.or = ffs(entry->or) - 1;
 
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,5,0)
 	drm_encoder_init(dev, encoder, &nv17_tv_funcs, DRM_MODE_ENCODER_TVDAC,
 			 NULL);
 	drm_encoder_helper_add(encoder, &nv17_tv_helper_funcs);
 	to_encoder_slave(encoder)->slave_funcs = &nv17_tv_slave_funcs;
+#else
+	drm_encoder_init(dev, encoder, &nv17_tv_funcs, DRM_MODE_ENCODER_TVDAC);
+	drm_encoder_helper_add(encoder, &nv17_tv_helper_funcs);
+	to_encoder_slave(encoder)->slave_funcs = (struct drm_encoder_slave_funcs *)&nv17_tv_slave_funcs;
+#endif
 
 	tv_enc->base.enc_save = nv17_tv_save;
 	tv_enc->base.enc_restore = nv17_tv_restore;
diff --git a/drm/nouveau/include/nvkm/core/device.h b/drm/nouveau/include/nvkm/core/device.h
index 913192c94..d154a75e9 100644
--- a/drm/nouveau/include/nvkm/core/device.h
+++ b/drm/nouveau/include/nvkm/core/device.h
@@ -24,6 +24,7 @@ enum nvkm_devidx {
 	NVKM_SUBDEV_VOLT,
 	NVKM_SUBDEV_THERM,
 	NVKM_SUBDEV_CLK,
+	NVKM_SUBDEV_SECBOOT,
 
 	NVKM_ENGINE_DMAOBJ,
 	NVKM_ENGINE_IFB,
@@ -119,6 +120,7 @@ struct nvkm_device {
 	struct nvkm_therm *therm;
 	struct nvkm_timer *timer;
 	struct nvkm_volt *volt;
+	struct nvkm_secboot *secboot;
 
 	struct nvkm_engine *bsp;
 	struct nvkm_engine *ce[3];
@@ -184,6 +186,7 @@ struct nvkm_device_chip {
 	int (*therm  )(struct nvkm_device *, int idx, struct nvkm_therm **);
 	int (*timer  )(struct nvkm_device *, int idx, struct nvkm_timer **);
 	int (*volt   )(struct nvkm_device *, int idx, struct nvkm_volt **);
+	int (*secboot)(struct nvkm_device *, int idx, struct nvkm_secboot **);
 
 	int (*bsp    )(struct nvkm_device *, int idx, struct nvkm_engine **);
 	int (*ce[3]  )(struct nvkm_device *, int idx, struct nvkm_engine **);
diff --git a/drm/nouveau/include/nvkm/core/firmware.h b/drm/nouveau/include/nvkm/core/firmware.h
new file mode 100644
index 000000000..a626ce378
--- /dev/null
+++ b/drm/nouveau/include/nvkm/core/firmware.h
@@ -0,0 +1,11 @@
+#ifndef __NVKM_FIRMWARE_H__
+#define __NVKM_FIRMWARE_H__
+
+#include <core/device.h>
+
+int nvkm_firmware_get(struct nvkm_device *device, const char *fwname,
+		      const struct firmware **fw);
+
+void nvkm_firmware_put(const struct firmware *fw);
+
+#endif
diff --git a/drm/nouveau/include/nvkm/core/gpuobj.h b/drm/nouveau/include/nvkm/core/gpuobj.h
index d4f56eafb..c23da4f05 100644
--- a/drm/nouveau/include/nvkm/core/gpuobj.h
+++ b/drm/nouveau/include/nvkm/core/gpuobj.h
@@ -37,4 +37,8 @@ int nvkm_gpuobj_wrap(struct nvkm_memory *, struct nvkm_gpuobj **);
 int nvkm_gpuobj_map(struct nvkm_gpuobj *, struct nvkm_vm *, u32 access,
 		    struct nvkm_vma *);
 void nvkm_gpuobj_unmap(struct nvkm_vma *);
+void nvkm_gpuobj_memcpy_to(struct nvkm_gpuobj *dst, u32 dstoffset, void *src,
+			   u32 length);
+void nvkm_gpuobj_memcpy_from(void *dst, struct nvkm_gpuobj *src, u32 srcoffset,
+			     u32 length);
 #endif
diff --git a/drm/nouveau/include/nvkm/subdev/secboot.h b/drm/nouveau/include/nvkm/subdev/secboot.h
new file mode 100644
index 000000000..e977857df
--- /dev/null
+++ b/drm/nouveau/include/nvkm/subdev/secboot.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __NVKM_SECURE_BOOT_H__
+#define __NVKM_SECURE_BOOT_H__
+
+#include <core/subdev.h>
+
+enum nvkm_secboot_falcon {
+	NVKM_SECBOOT_FALCON_PMU	= 0,
+	NVKM_SECBOOT_FALCON_RESERVED = 1,
+	NVKM_SECBOOT_FALCON_FECS = 2,
+	NVKM_SECBOOT_FALCON_GPCCS = 3,
+	NVKM_SECBOOT_FALCON_END = 4,
+	NVKM_SECBOOT_FALCON_INVALID = 0xffffffff,
+};
+
+/**
+ * @falcon_id:		falcon that will perform secure boot
+ * @base:		base IO address of the falcon performing secure boot
+ * @irq_mask:		IRQ mask of the falcon performing secure boot
+ * @enable_mask:	enable mask of  the falcon performing secure boot
+*/
+struct nvkm_secboot {
+	const struct nvkm_secboot_func *func;
+	struct nvkm_subdev subdev;
+
+	u32 base;
+	u32 irq_mask;
+	u32 enable_mask;
+};
+#define nvkm_secboot(p) container_of((p), struct nvkm_secboot, subdev)
+
+bool nvkm_secboot_is_managed(struct nvkm_secboot *, enum nvkm_secboot_falcon);
+int nvkm_secboot_reset(struct nvkm_secboot *, u32 falcons_mask);
+
+int gm200_secboot_new(struct nvkm_device *, int, struct nvkm_secboot **);
+int gm20b_secboot_new(struct nvkm_device *, int, struct nvkm_secboot **);
+
+#endif
diff --git a/drm/nouveau/nouveau_bo.c b/drm/nouveau/nouveau_bo.c
index 78f520d05..b7671622c 100644
--- a/drm/nouveau/nouveau_bo.c
+++ b/drm/nouveau/nouveau_bo.c
@@ -173,6 +173,33 @@ nouveau_bo_fixup_align(struct nouveau_bo *nvbo, u32 flags,
 	*size = roundup(*size, PAGE_SIZE);
 }
 
+void
+nouveau_bo_update_tiling(struct nouveau_drm *drm, struct nouveau_bo *nvbo,
+			 struct nvkm_mem *mem)
+{
+	switch (drm->device.info.family) {
+	case NV_DEVICE_INFO_V0_TNT:
+	case NV_DEVICE_INFO_V0_CELSIUS:
+	case NV_DEVICE_INFO_V0_KELVIN:
+	case NV_DEVICE_INFO_V0_RANKINE:
+	case NV_DEVICE_INFO_V0_CURIE:
+		break;
+	case NV_DEVICE_INFO_V0_TESLA:
+		if (drm->device.info.chipset != 0x50)
+			mem->memtype = (nvbo->tile_flags & 0x7f00) >> 8;
+		break;
+	case NV_DEVICE_INFO_V0_FERMI:
+	case NV_DEVICE_INFO_V0_KEPLER:
+	case NV_DEVICE_INFO_V0_MAXWELL:
+		mem->memtype = (nvbo->tile_flags & 0xff00) >> 8;
+		break;
+	default:
+		NV_WARN(drm, "%s: unhandled family type %x\n", __func__,
+			drm->device.info.family);
+		break;
+	}
+}
+
 int
 nouveau_bo_new(struct drm_device *dev, int size, int align,
 	       uint32_t flags, uint32_t tile_mode, uint32_t tile_flags,
@@ -491,6 +518,40 @@ nouveau_bo_sync_for_cpu(struct nouveau_bo *nvbo)
 }
 
 int
+nouveau_bo_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan,
+		bool exclusive, bool intr)
+{
+	struct fence *fence;
+	struct reservation_object *resv = nvbo->bo.resv;
+	struct reservation_object_list *fobj;
+	int ret = 0, i;
+
+	if (!exclusive) {
+		ret = reservation_object_reserve_shared(resv);
+
+		if (ret)
+			return ret;
+	}
+
+	fobj = reservation_object_get_list(resv);
+	fence = reservation_object_get_excl(resv);
+
+	if (fence && (!exclusive || !fobj || !fobj->shared_count))
+		return nouveau_fence_sync(fence, chan, intr);
+
+	if (!exclusive || !fobj)
+		return ret;
+
+	for (i = 0; i < fobj->shared_count && !ret; ++i) {
+		fence = rcu_dereference_protected(fobj->shared[i],
+						reservation_object_held(resv));
+		ret |= nouveau_fence_sync(fence, chan, intr);
+	}
+
+	return ret;
+}
+
+int
 nouveau_bo_validate(struct nouveau_bo *nvbo, bool interruptible,
 		    bool no_wait_gpu)
 {
@@ -1073,7 +1134,7 @@ nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict, bool intr,
 	}
 
 	mutex_lock_nested(&cli->mutex, SINGLE_DEPTH_NESTING);
-	ret = nouveau_fence_sync(nouveau_bo(bo), chan, true, intr);
+	ret = nouveau_bo_sync(nouveau_bo(bo), chan, true, intr);
 	if (ret == 0) {
 		ret = drm->ttm.move(chan, bo, &bo->mem, new_mem);
 		if (ret == 0) {
diff --git a/drm/nouveau/nouveau_bo.h b/drm/nouveau/nouveau_bo.h
index e42360983..7f4177faf 100644
--- a/drm/nouveau/nouveau_bo.h
+++ b/drm/nouveau/nouveau_bo.h
@@ -69,6 +69,8 @@ nouveau_bo_ref(struct nouveau_bo *ref, struct nouveau_bo **pnvbo)
 extern struct ttm_bo_driver nouveau_bo_driver;
 
 void nouveau_bo_move_init(struct nouveau_drm *);
+void nouveau_bo_update_tiling(struct nouveau_drm *, struct nouveau_bo *,
+			      struct nvkm_mem *);
 int  nouveau_bo_new(struct drm_device *, int size, int align, u32 flags,
 		    u32 tile_mode, u32 tile_flags, struct sg_table *sg,
 		    struct reservation_object *robj,
@@ -86,6 +88,8 @@ int  nouveau_bo_validate(struct nouveau_bo *, bool interruptible,
 			 bool no_wait_gpu);
 void nouveau_bo_sync_for_device(struct nouveau_bo *nvbo);
 void nouveau_bo_sync_for_cpu(struct nouveau_bo *nvbo);
+int  nouveau_bo_sync(struct nouveau_bo *, struct nouveau_channel *,
+		     bool exclusive, bool intr);
 
 struct nvkm_vma *
 nouveau_bo_vma_find(struct nouveau_bo *, struct nvkm_vm *);
diff --git a/drm/nouveau/nouveau_connector.c b/drm/nouveau/nouveau_connector.c
index fcebfae5d..f9a5030a0 100644
--- a/drm/nouveau/nouveau_connector.c
+++ b/drm/nouveau/nouveau_connector.c
@@ -27,6 +27,7 @@
 #include <acpi/button.h>
 
 #include <linux/pm_runtime.h>
+#include <linux/version.h>
 
 #include <drm/drmP.h>
 #include <drm/drm_edid.h>
@@ -935,7 +936,11 @@ nouveau_connector_funcs_lvds = {
 	.force = nouveau_connector_force
 };
 
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,19,0)
 static int
+#else
+static void
+#endif
 nouveau_connector_dp_dpms(struct drm_connector *connector, int mode)
 {
 	struct nouveau_encoder *nv_encoder = NULL;
@@ -954,7 +959,9 @@ nouveau_connector_dp_dpms(struct drm_connector *connector, int mode)
 		}
 	}
 
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,19,0)
 	return drm_helper_connector_dpms(connector, mode);
+#endif
 }
 
 static const struct drm_connector_funcs
diff --git a/drm/nouveau/nouveau_display.c b/drm/nouveau/nouveau_display.c
index 24be27d3c..97b91ef03 100644
--- a/drm/nouveau/nouveau_display.c
+++ b/drm/nouveau/nouveau_display.c
@@ -42,6 +42,8 @@
 #include <nvif/cl0046.h>
 #include <nvif/event.h>
 
+#include <linux/version.h>
+
 static int
 nouveau_display_vblank_handler(struct nvif_notify *notify)
 {
@@ -154,6 +156,7 @@ int
 nouveau_display_vblstamp(struct drm_device *dev, unsigned int pipe,
 			 int *max_error, struct timeval *time, unsigned flags)
 {
+#ifndef CONFIG_ARCH_TEGRA
 	struct drm_crtc *crtc;
 
 	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
@@ -163,6 +166,7 @@ nouveau_display_vblstamp(struct drm_device *dev, unsigned int pipe,
 					&crtc->hwmode);
 		}
 	}
+#endif
 
 	return -EINVAL;
 }
@@ -254,7 +258,11 @@ nouveau_framebuffer_init(struct drm_device *dev,
 	struct drm_framebuffer *fb = &nv_fb->base;
 	int ret;
 
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,5,0)
 	drm_helper_mode_fill_fb_struct(fb, mode_cmd);
+#else
+	drm_helper_mode_fill_fb_struct(fb, (struct drm_mode_fb_cmd2 *)mode_cmd);
+#endif
 	nv_fb->nvbo = nvbo;
 
 	ret = drm_framebuffer_init(dev, fb, &nouveau_framebuffer_funcs);
@@ -273,7 +281,11 @@ nouveau_framebuffer_init(struct drm_device *dev,
 static struct drm_framebuffer *
 nouveau_user_framebuffer_create(struct drm_device *dev,
 				struct drm_file *file_priv,
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,5,0)
 				const struct drm_mode_fb_cmd2 *mode_cmd)
+#else
+				struct drm_mode_fb_cmd2 *mode_cmd)
+#endif
 {
 	struct nouveau_framebuffer *nouveau_fb;
 	struct drm_gem_object *gem;
@@ -680,7 +692,7 @@ nouveau_page_flip_emit(struct nouveau_channel *chan,
 	spin_unlock_irqrestore(&dev->event_lock, flags);
 
 	/* Synchronize with the old framebuffer */
-	ret = nouveau_fence_sync(old_bo, chan, false, false);
+	ret = nouveau_bo_sync(old_bo, chan, false, false);
 	if (ret)
 		goto fail;
 
@@ -744,7 +756,7 @@ nouveau_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb,
 		goto fail_unpin;
 
 	/* synchronise rendering channel with the kernel's channel */
-	ret = nouveau_fence_sync(new_bo, chan, false, true);
+	ret = nouveau_bo_sync(new_bo, chan, false, true);
 	if (ret) {
 		ttm_bo_unreserve(&new_bo->bo);
 		goto fail_unpin;
@@ -840,6 +852,7 @@ nouveau_finish_page_flip(struct nouveau_channel *chan,
 	}
 
 	s = list_first_entry(&fctx->flip, struct nouveau_page_flip_state, head);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,5,0)
 	if (s->event) {
 		if (drm->device.info.family < NV_DEVICE_INFO_V0_TESLA) {
 			drm_arm_vblank_event(dev, s->crtc, s->event);
@@ -854,6 +867,11 @@ nouveau_finish_page_flip(struct nouveau_channel *chan,
 		/* Give up ownership of vblank for page-flipped crtc */
 		drm_vblank_put(dev, s->crtc);
 	}
+#else
+	if (s->event)
+		drm_send_vblank_event(dev, s->crtc, s->event);
+	drm_vblank_put(dev, s->crtc);
+#endif
 
 	list_del(&s->head);
 	if (ps)
diff --git a/drm/nouveau/nouveau_display.h b/drm/nouveau/nouveau_display.h
index 5a57d8b47..81b7d3c09 100644
--- a/drm/nouveau/nouveau_display.h
+++ b/drm/nouveau/nouveau_display.h
@@ -5,6 +5,8 @@
 
 #include "nouveau_drm.h"
 
+#include <linux/version.h>
+
 struct nouveau_framebuffer {
 	struct drm_framebuffer base;
 	struct nouveau_bo *nvbo;
diff --git a/drm/nouveau/nouveau_dma.c b/drm/nouveau/nouveau_dma.c
index d168c6353..f0f5be7e5 100644
--- a/drm/nouveau/nouveau_dma.c
+++ b/drm/nouveau/nouveau_dma.c
@@ -79,23 +79,31 @@ READ_GET(struct nouveau_channel *chan, uint64_t *prev_get, int *timeout)
 }
 
 void
-nv50_dma_push(struct nouveau_channel *chan, struct nouveau_bo *bo,
-	      int delta, int length)
+nv50_dma_push_bo(struct nouveau_channel *chan, struct nouveau_bo *bo,
+		 int delta, int length)
 {
 	struct nouveau_cli *cli = (void *)chan->user.client;
-	struct nouveau_bo *pb = chan->push.buffer;
 	struct nvkm_vma *vma;
-	int ip = (chan->dma.ib_put * 2) + chan->dma.ib_base;
 	u64 offset;
 
 	vma = nouveau_bo_vma_find(bo, cli->vm);
 	BUG_ON(!vma);
 	offset = vma->offset + delta;
 
+	nv50_dma_push(chan, lower_32_bits(offset),
+		      upper_32_bits(offset) | length << 8);
+}
+
+void
+nv50_dma_push(struct nouveau_channel *chan, uint32_t entry0, uint32_t entry1)
+{
+	struct nouveau_bo *pb = chan->push.buffer;
+	int ip = (chan->dma.ib_put * 2) + chan->dma.ib_base;
+
 	BUG_ON(chan->dma.ib_free < 1);
 
-	nouveau_bo_wr32(pb, ip++, lower_32_bits(offset));
-	nouveau_bo_wr32(pb, ip++, upper_32_bits(offset) | length << 8);
+	nouveau_bo_wr32(pb, ip++, entry0);
+	nouveau_bo_wr32(pb, ip++, entry1);
 
 	chan->dma.ib_put = (chan->dma.ib_put + 1) & chan->dma.ib_max;
 
diff --git a/drm/nouveau/nouveau_dma.h b/drm/nouveau/nouveau_dma.h
index aff3a9d0a..089ed8498 100644
--- a/drm/nouveau/nouveau_dma.h
+++ b/drm/nouveau/nouveau_dma.h
@@ -31,8 +31,10 @@
 #include "nouveau_chan.h"
 
 int nouveau_dma_wait(struct nouveau_channel *, int slots, int size);
-void nv50_dma_push(struct nouveau_channel *, struct nouveau_bo *,
-		   int delta, int length);
+void nv50_dma_push(struct nouveau_channel *chan, uint32_t entry0,
+		   uint32_t entry1);
+void nv50_dma_push_bo(struct nouveau_channel *, struct nouveau_bo *,
+		      int delta, int length);
 
 /*
  * There's a hw race condition where you can't jump to your PUT offset,
@@ -151,8 +153,8 @@ FIRE_RING(struct nouveau_channel *chan)
 	chan->accel_done = true;
 
 	if (chan->dma.ib_max) {
-		nv50_dma_push(chan, chan->push.buffer, chan->dma.put << 2,
-			      (chan->dma.cur - chan->dma.put) << 2);
+		nv50_dma_push_bo(chan, chan->push.buffer, chan->dma.put << 2,
+			         (chan->dma.cur - chan->dma.put) << 2);
 	} else {
 		WRITE_PUT(chan->dma.cur);
 	}
diff --git a/drm/nouveau/nouveau_drm.c b/drm/nouveau/nouveau_drm.c
index 2f2f252e3..c2d542105 100644
--- a/drm/nouveau/nouveau_drm.c
+++ b/drm/nouveau/nouveau_drm.c
@@ -28,6 +28,7 @@
 #include <linux/pci.h>
 #include <linux/pm_runtime.h>
 #include <linux/vga_switcheroo.h>
+#include <linux/version.h>
 
 #include "drmP.h"
 #include "drm_crtc_helper.h"
@@ -876,6 +877,8 @@ nouveau_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_CPU_PREP, nouveau_gem_ioctl_cpu_prep, DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_CPU_FINI, nouveau_gem_ioctl_cpu_fini, DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_INFO, nouveau_gem_ioctl_info, DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_PUSHBUF_2, nouveau_gem_ioctl_pushbuf_2, DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_SET_INFO, nouveau_gem_ioctl_set_info, DRM_AUTH|DRM_RENDER_ALLOW),
 };
 
 long
@@ -918,6 +921,10 @@ nouveau_driver_fops = {
 	.llseek = noop_llseek,
 };
 
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3,19,0)
+#define DRIVER_KMS_LEGACY_CONTEXT 0
+#endif
+
 static struct drm_driver
 driver_stub = {
 	.driver_features =
@@ -936,10 +943,14 @@ driver_stub = {
 	.debugfs_cleanup = nouveau_drm_debugfs_cleanup,
 #endif
 
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,19,0)
 	.get_vblank_counter = drm_vblank_no_hw_counter,
+#endif
 	.enable_vblank = nouveau_display_vblank_enable,
 	.disable_vblank = nouveau_display_vblank_disable,
+#ifndef CONFIG_ARCH_TEGRA
 	.get_scanout_position = nouveau_display_scanoutpos,
+#endif
 	.get_vblank_timestamp = nouveau_display_vblstamp,
 
 	.ioctls = nouveau_ioctls,
diff --git a/drm/nouveau/nouveau_fbcon.c b/drm/nouveau/nouveau_fbcon.c
index 59f27e774..40d198cca 100644
--- a/drm/nouveau/nouveau_fbcon.c
+++ b/drm/nouveau/nouveau_fbcon.c
@@ -37,6 +37,7 @@
 #include <linux/screen_info.h>
 #include <linux/vga_switcheroo.h>
 #include <linux/console.h>
+#include <linux/version.h>
 
 #include <drm/drmP.h>
 #include <drm/drm_crtc.h>
@@ -84,7 +85,9 @@ nouveau_fbcon_fillrect(struct fb_info *info, const struct fb_fillrect *rect)
 
 	if (ret != -ENODEV)
 		nouveau_fbcon_gpu_lockup(info);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,19,0)
 	drm_fb_helper_cfb_fillrect(info, rect);
+#endif
 }
 
 static void
@@ -116,7 +119,9 @@ nouveau_fbcon_copyarea(struct fb_info *info, const struct fb_copyarea *image)
 
 	if (ret != -ENODEV)
 		nouveau_fbcon_gpu_lockup(info);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,19,0)
 	drm_fb_helper_cfb_copyarea(info, image);
+#endif
 }
 
 static void
@@ -148,7 +153,9 @@ nouveau_fbcon_imageblit(struct fb_info *info, const struct fb_image *image)
 
 	if (ret != -ENODEV)
 		nouveau_fbcon_gpu_lockup(info);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,19,0)
 	drm_fb_helper_cfb_imageblit(info, image);
+#endif
 }
 
 static int
@@ -221,9 +228,11 @@ static struct fb_ops nouveau_fbcon_sw_ops = {
 	.fb_release = nouveau_fbcon_release,
 	.fb_check_var = drm_fb_helper_check_var,
 	.fb_set_par = drm_fb_helper_set_par,
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,19,0)
 	.fb_fillrect = drm_fb_helper_cfb_fillrect,
 	.fb_copyarea = drm_fb_helper_cfb_copyarea,
 	.fb_imageblit = drm_fb_helper_cfb_imageblit,
+#endif
 	.fb_pan_display = drm_fb_helper_pan_display,
 	.fb_blank = drm_fb_helper_blank,
 	.fb_setcmap = drm_fb_helper_setcmap,
@@ -388,7 +397,11 @@ nouveau_fbcon_create(struct drm_fb_helper *helper,
 
 	mutex_lock(&dev->struct_mutex);
 
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,19,0)
 	info = drm_fb_helper_alloc_fbi(helper);
+#else
+	info = ERR_PTR(-EINVAL);
+#endif
 	if (IS_ERR(info)) {
 		ret = PTR_ERR(info);
 		goto out_unlock;
@@ -466,8 +479,10 @@ nouveau_fbcon_destroy(struct drm_device *dev, struct nouveau_fbdev *fbcon)
 {
 	struct nouveau_framebuffer *nouveau_fb = &fbcon->nouveau_fb;
 
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,19,0)
 	drm_fb_helper_unregister_fbi(&fbcon->helper);
 	drm_fb_helper_release_fbi(&fbcon->helper);
+#endif
 
 	if (nouveau_fb->nvbo) {
 		nouveau_bo_unmap(nouveau_fb->nvbo);
@@ -505,7 +520,9 @@ nouveau_fbcon_set_suspend(struct drm_device *dev, int state)
 		console_lock();
 		if (state == FBINFO_STATE_RUNNING)
 			nouveau_fbcon_accel_restore(dev);
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,19,0)
 		drm_fb_helper_set_suspend(&drm->fbcon->helper, state);
+#endif
 		if (state != FBINFO_STATE_RUNNING)
 			nouveau_fbcon_accel_save_disable(dev);
 		console_unlock();
diff --git a/drm/nouveau/nouveau_fence.c b/drm/nouveau/nouveau_fence.c
index 9a8c5b727..771a32b4e 100644
--- a/drm/nouveau/nouveau_fence.c
+++ b/drm/nouveau/nouveau_fence.c
@@ -38,6 +38,10 @@
 #include "nouveau_dma.h"
 #include "nouveau_fence.h"
 
+#ifdef CONFIG_SYNC
+#include "../drivers/staging/android/sync.h"
+#endif
+
 static const struct fence_ops nouveau_fence_ops_uevent;
 static const struct fence_ops nouveau_fence_ops_legacy;
 
@@ -388,66 +392,25 @@ nouveau_fence_wait(struct nouveau_fence *fence, bool lazy, bool intr)
 }
 
 int
-nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan, bool exclusive, bool intr)
+nouveau_fence_sync(struct fence *fence, struct nouveau_channel *chan, bool intr)
 {
 	struct nouveau_fence_chan *fctx = chan->fence;
-	struct fence *fence;
-	struct reservation_object *resv = nvbo->bo.resv;
-	struct reservation_object_list *fobj;
+	struct nouveau_channel *prev = NULL;
 	struct nouveau_fence *f;
-	int ret = 0, i;
-
-	if (!exclusive) {
-		ret = reservation_object_reserve_shared(resv);
-
-		if (ret)
-			return ret;
-	}
-
-	fobj = reservation_object_get_list(resv);
-	fence = reservation_object_get_excl(resv);
-
-	if (fence && (!exclusive || !fobj || !fobj->shared_count)) {
-		struct nouveau_channel *prev = NULL;
-		bool must_wait = true;
-
-		f = nouveau_local_fence(fence, chan->drm);
-		if (f) {
-			rcu_read_lock();
-			prev = rcu_dereference(f->channel);
-			if (prev && (prev == chan || fctx->sync(f, prev, chan) == 0))
-				must_wait = false;
-			rcu_read_unlock();
-		}
-
-		if (must_wait)
-			ret = fence_wait(fence, intr);
+	bool must_wait = true;
+	int ret = 0;
 
-		return ret;
+	f = nouveau_local_fence(fence, chan->drm);
+	if (f) {
+		rcu_read_lock();
+		prev = rcu_dereference(f->channel);
+		if (prev && (prev == chan || fctx->sync(f, prev, chan) == 0))
+			must_wait = false;
+		rcu_read_unlock();
 	}
 
-	if (!exclusive || !fobj)
-		return ret;
-
-	for (i = 0; i < fobj->shared_count && !ret; ++i) {
-		struct nouveau_channel *prev = NULL;
-		bool must_wait = true;
-
-		fence = rcu_dereference_protected(fobj->shared[i],
-						reservation_object_held(resv));
-
-		f = nouveau_local_fence(fence, chan->drm);
-		if (f) {
-			rcu_read_lock();
-			prev = rcu_dereference(f->channel);
-			if (prev && (prev == chan || fctx->sync(f, prev, chan) == 0))
-				must_wait = false;
-			rcu_read_unlock();
-		}
-
-		if (must_wait)
-			ret = fence_wait(fence, intr);
-	}
+	if (must_wait)
+		ret = fence_wait(fence, intr);
 
 	return ret;
 }
@@ -580,11 +543,80 @@ static bool nouveau_fence_enable_signaling(struct fence *f)
 	return ret;
 }
 
+static void nouveau_fence_timeline_value_str(struct fence *fence, char *str,
+					     int size)
+{
+	struct nouveau_fence *f = from_fence(fence);
+	struct nouveau_fence_chan *fctx = nouveau_fctx(f);
+	u32 cur;
+
+	cur = f->channel ? fctx->read(f->channel) : 0;
+	snprintf(str, size, "%d", cur);
+}
+
+static void
+nouveau_fence_value_str(struct fence *fence, char *str, int size)
+{
+	snprintf(str, size, "%d", fence->seqno);
+}
+
 static const struct fence_ops nouveau_fence_ops_uevent = {
 	.get_driver_name = nouveau_fence_get_get_driver_name,
 	.get_timeline_name = nouveau_fence_get_timeline_name,
 	.enable_signaling = nouveau_fence_enable_signaling,
 	.signaled = nouveau_fence_is_signaled,
 	.wait = fence_default_wait,
-	.release = NULL
+	.release = NULL,
+	.fence_value_str = nouveau_fence_value_str,
+	.timeline_value_str = nouveau_fence_timeline_value_str,
 };
+
+int
+nouveau_fence_install(struct fence *fence, const char *name, int *fd_out)
+{
+#ifdef CONFIG_SYNC
+	struct sync_fence *f;
+	int fd;
+
+	fd = get_unused_fd_flags(O_CLOEXEC);
+	if (fd < 0)
+		return fd;
+
+	f = sync_fence_create(name, fence);
+	if (!f) {
+		put_unused_fd(fd);
+		return -ENOMEM;
+	}
+
+	sync_fence_install(f, fd);
+	*fd_out = fd;
+	return 0;
+#else
+	return -ENODEV;
+#endif
+}
+
+int
+nouveau_fence_sync_fd(int fence_fd, struct nouveau_channel *chan, bool intr)
+{
+#ifdef CONFIG_SYNC
+	int i, ret = 0;
+	struct sync_fence *fence;
+
+	fence = sync_fence_fdget(fence_fd);
+	if (!fence)
+		return -EINVAL;
+
+	for (i = 0; i < fence->num_fences; ++i) {
+		struct fence *pt = fence->cbs[i].sync_pt;
+
+		ret |= nouveau_fence_sync(pt, chan, intr);
+	}
+
+	sync_fence_put(fence);
+
+	return ret;
+#else
+	return -ENODEV;
+#endif
+}
diff --git a/drm/nouveau/nouveau_fence.h b/drm/nouveau/nouveau_fence.h
index 2e3a62d38..cc97c9a9e 100644
--- a/drm/nouveau/nouveau_fence.h
+++ b/drm/nouveau/nouveau_fence.h
@@ -26,7 +26,9 @@ int  nouveau_fence_emit(struct nouveau_fence *, struct nouveau_channel *);
 bool nouveau_fence_done(struct nouveau_fence *);
 void nouveau_fence_work(struct fence *, void (*)(void *), void *);
 int  nouveau_fence_wait(struct nouveau_fence *, bool lazy, bool intr);
-int  nouveau_fence_sync(struct nouveau_bo *, struct nouveau_channel *, bool exclusive, bool intr);
+int  nouveau_fence_sync(struct fence *, struct nouveau_channel *, bool intr);
+int  nouveau_fence_sync_fd(int, struct nouveau_channel *, bool intr);
+int  nouveau_fence_install(struct fence *, const char *name, int *);
 
 struct nouveau_fence_chan {
 	spinlock_t lock;
diff --git a/drm/nouveau/nouveau_gem.c b/drm/nouveau/nouveau_gem.c
index a0865c49e..35c8a28bd 100644
--- a/drm/nouveau/nouveau_gem.c
+++ b/drm/nouveau/nouveau_gem.c
@@ -490,7 +490,7 @@ validate_list(struct nouveau_channel *chan, struct nouveau_cli *cli,
 			return ret;
 		}
 
-		ret = nouveau_fence_sync(nvbo, chan, !!b->write_domains, true);
+		ret = nouveau_bo_sync(nvbo, chan, !!b->write_domains, true);
 		if (unlikely(ret)) {
 			if (ret != -ERESTARTSYS)
 				NV_PRINTK(err, cli, "fail post-validate sync\n");
@@ -551,7 +551,12 @@ nouveau_gem_pushbuf_validate(struct nouveau_channel *chan,
 		validate_fini(op, NULL, NULL);
 		return ret;
 	}
-	*apply_relocs = ret;
+
+	if (apply_relocs)
+		*apply_relocs = ret;
+	else
+		BUG_ON(ret > 0);
+
 	return 0;
 }
 
@@ -665,6 +670,126 @@ nouveau_gem_pushbuf_reloc_apply(struct nouveau_cli *cli,
 }
 
 int
+nouveau_gem_ioctl_pushbuf_2(struct drm_device *dev, void *data,
+                            struct drm_file *file_priv)
+{
+	struct nouveau_abi16 *abi16 = nouveau_abi16_get(file_priv);
+	struct nouveau_cli *cli = nouveau_cli(file_priv);
+	struct nouveau_abi16_chan *temp;
+	struct nouveau_drm *drm = nouveau_drm(dev);
+	struct drm_nouveau_gem_pushbuf_2 *req = data;
+	struct drm_nouveau_gem_pushbuf_bo *bo = NULL;
+	struct nouveau_channel *chan = NULL;
+	struct validate_op op;
+	struct nouveau_fence *fence = NULL;
+	uint32_t *push = NULL;
+	int i, ret = 0;
+
+	if (unlikely(!abi16))
+		return -ENOMEM;
+
+	list_for_each_entry(temp, &abi16->channels, head) {
+		if (temp->chan->chid == req->channel) {
+			chan = temp->chan;
+			break;
+		}
+	}
+
+	if (!chan)
+		return nouveau_abi16_put(abi16, -ENOENT);
+
+	if (!chan->dma.ib_max)
+		return nouveau_abi16_put(abi16, -ENODEV);
+
+	req->vram_available = drm->gem.vram_available;
+	req->gart_available = drm->gem.gart_available;
+
+	if (unlikely(req->nr_push > NOUVEAU_GEM_MAX_PUSH)) {
+		NV_PRINTK(err, cli, "pushbuf push count exceeds limit: %d max %d\n",
+		           req->nr_push, NOUVEAU_GEM_MAX_PUSH);
+		return nouveau_abi16_put(abi16, -EINVAL);
+	}
+
+	if (unlikely(req->nr_buffers > NOUVEAU_GEM_MAX_BUFFERS)) {
+		NV_PRINTK(err, cli, "pushbuf bo count exceeds limit: %d max %d\n",
+		           req->nr_buffers, NOUVEAU_GEM_MAX_BUFFERS);
+		return nouveau_abi16_put(abi16, -EINVAL);
+	}
+
+	if (req->nr_push) {
+		push = u_memcpya(req->push, req->nr_push, 8);
+		if (IS_ERR(push))
+			return nouveau_abi16_put(abi16, PTR_ERR(push));
+	}
+
+	if (req->nr_buffers) {
+		bo = u_memcpya(req->buffers, req->nr_buffers, sizeof(*bo));
+		if (IS_ERR(bo)) {
+			u_free(push);
+			return nouveau_abi16_put(abi16, PTR_ERR(bo));
+		}
+	}
+
+	/* Validate buffer list */
+	ret = nouveau_gem_pushbuf_validate(chan, file_priv, bo, req->buffers,
+					   req->nr_buffers, &op, NULL);
+	if (ret) {
+		if (ret != -ERESTARTSYS)
+			NV_PRINTK(err, cli, "validate: %d\n", ret);
+
+		goto out_prevalid;
+	}
+
+	if (req->flags & NOUVEAU_GEM_PUSHBUF_2_FENCE_WAIT) {
+		ret = nouveau_fence_sync_fd(req->fence, chan, true);
+		if (ret) {
+			NV_PRINTK(err, cli, "fence wait: %d\n", ret);
+			goto out;
+		}
+	}
+
+	ret = nouveau_dma_wait(chan, req->nr_push + 1, 16);
+	if (ret) {
+		NV_PRINTK(err, cli, "nv50cal_space: %d\n", ret);
+		goto out;
+	}
+
+	for (i = 0; i < req->nr_push * 2; i += 2)
+		nv50_dma_push(chan, push[i], push[i + 1]);
+
+	ret = nouveau_fence_new(chan, false, &fence);
+	if (ret) {
+		NV_PRINTK(err, cli, "error fencing pushbuf: %d\n", ret);
+		WIND_RING(chan);
+		goto out;
+	}
+
+	if (req->flags & NOUVEAU_GEM_PUSHBUF_2_FENCE_EMIT) {
+		struct fence *f = fence_get(&fence->base);
+		ret = nouveau_fence_install(f, "nv-pushbuf", &req->fence);
+
+		if (ret) {
+			fence_put(f);
+			NV_PRINTK(err, cli, "fence install: %d\n", ret);
+			WIND_RING(chan);
+			goto out;
+		}
+	}
+
+out:
+	if (req->nr_buffers)
+		validate_fini(&op, fence, bo);
+
+	nouveau_fence_unref(&fence);
+
+out_prevalid:
+	u_free(bo);
+	u_free(push);
+
+	return nouveau_abi16_put(abi16, ret);
+}
+
+int
 nouveau_gem_ioctl_pushbuf(struct drm_device *dev, void *data,
 			  struct drm_file *file_priv)
 {
@@ -764,8 +889,8 @@ nouveau_gem_ioctl_pushbuf(struct drm_device *dev, void *data,
 			struct nouveau_bo *nvbo = (void *)(unsigned long)
 				bo[push[i].bo_index].user_priv;
 
-			nv50_dma_push(chan, nvbo, push[i].offset,
-				      push[i].length);
+			nv50_dma_push_bo(chan, nvbo, push[i].offset,
+				         push[i].length);
 		}
 	} else
 	if (drm->device.info.chipset >= 0x25) {
@@ -923,3 +1048,55 @@ nouveau_gem_ioctl_info(struct drm_device *dev, void *data,
 	return ret;
 }
 
+int
+nouveau_gem_ioctl_set_info(struct drm_device *dev, void *data,
+			   struct drm_file *file_priv)
+{
+	struct nouveau_drm *drm = nouveau_drm(dev);
+	struct nouveau_cli *cli = nouveau_cli(file_priv);
+	struct nvkm_fb *pfb = nvxx_fb(&drm->device);
+	struct drm_nouveau_gem_info *req = data;
+	struct drm_gem_object *gem;
+	struct nouveau_bo *nvbo;
+	struct nvkm_vma *vma;
+	int ret = 0;
+
+	if (!nvkm_fb_memtype_valid(pfb, req->tile_flags)) {
+		NV_PRINTK(err, cli, "bad page flags: 0x%08x\n", req->tile_flags);
+		return -EINVAL;
+	}
+
+	gem = drm_gem_object_lookup(dev, file_priv, req->handle);
+	if (!gem)
+		return -ENOENT;
+	nvbo = nouveau_gem_object(gem);
+
+	/* We can only change info of PRIME-imported buffers */
+	if (nvbo->bo.type != ttm_bo_type_sg) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	ret = ttm_bo_reserve(&nvbo->bo, false, false, false, NULL);
+	if (ret)
+		goto out;
+
+	if (nvbo->tile_mode != req->tile_mode ||
+	    nvbo->tile_flags != req->tile_flags) {
+		nvbo->tile_mode = req->tile_mode;
+		nvbo->tile_flags = req->tile_flags;
+
+		nouveau_bo_update_tiling(drm, nvbo, nvbo->bo.mem.mm_node);
+
+		/* remap over existing mapping with new tile parameters */
+		vma = nouveau_bo_vma_find(nvbo, cli->vm);
+		if (vma)
+			nvkm_vm_map(vma, nvbo->bo.mem.mm_node);
+	}
+
+	ttm_bo_unreserve(&nvbo->bo);
+
+out:
+	drm_gem_object_unreference_unlocked(gem);
+	return ret;
+}
diff --git a/drm/nouveau/nouveau_gem.h b/drm/nouveau/nouveau_gem.h
index e4049faca..201302c3c 100644
--- a/drm/nouveau/nouveau_gem.h
+++ b/drm/nouveau/nouveau_gem.h
@@ -27,12 +27,16 @@ extern int nouveau_gem_ioctl_new(struct drm_device *, void *,
 				 struct drm_file *);
 extern int nouveau_gem_ioctl_pushbuf(struct drm_device *, void *,
 				     struct drm_file *);
+extern int nouveau_gem_ioctl_pushbuf_2(struct drm_device *, void *,
+				       struct drm_file *);
 extern int nouveau_gem_ioctl_cpu_prep(struct drm_device *, void *,
 				      struct drm_file *);
 extern int nouveau_gem_ioctl_cpu_fini(struct drm_device *, void *,
 				      struct drm_file *);
 extern int nouveau_gem_ioctl_info(struct drm_device *, void *,
 				  struct drm_file *);
+extern int nouveau_gem_ioctl_set_info(struct drm_device *, void *,
+				      struct drm_file *);
 
 extern int nouveau_gem_prime_pin(struct drm_gem_object *);
 struct reservation_object *nouveau_gem_prime_res_obj(struct drm_gem_object *);
diff --git a/drm/nouveau/nouveau_platform.c b/drm/nouveau/nouveau_platform.c
index 8a70cec59..0140198c5 100644
--- a/drm/nouveau/nouveau_platform.c
+++ b/drm/nouveau/nouveau_platform.c
@@ -20,6 +20,7 @@
  * DEALINGS IN THE SOFTWARE.
  */
 #include "nouveau_platform.h"
+#include <linux/version.h>
 
 static int nouveau_platform_probe(struct platform_device *pdev)
 {
@@ -28,7 +29,13 @@ static int nouveau_platform_probe(struct platform_device *pdev)
 	struct drm_device *drm;
 	int ret;
 
+#if LINUX_VERSION_CODE < KERNEL_VERSION(3,19,0)
+	const struct of_device_id *match;
+	match = of_match_device(pdev->dev.driver->of_match_table, &pdev->dev);
+	func = match->data;
+#else
 	func = of_device_get_match_data(&pdev->dev);
+#endif
 
 	drm = nouveau_platform_device_create(func, pdev, &device);
 	if (IS_ERR(drm))
diff --git a/drm/nouveau/nouveau_ttm.c b/drm/nouveau/nouveau_ttm.c
index d2e7d209f..31277e57a 100644
--- a/drm/nouveau/nouveau_ttm.c
+++ b/drm/nouveau/nouveau_ttm.c
@@ -150,27 +150,7 @@ nouveau_gart_manager_new(struct ttm_mem_type_manager *man,
 
 	node->page_shift = 12;
 
-	switch (drm->device.info.family) {
-	case NV_DEVICE_INFO_V0_TNT:
-	case NV_DEVICE_INFO_V0_CELSIUS:
-	case NV_DEVICE_INFO_V0_KELVIN:
-	case NV_DEVICE_INFO_V0_RANKINE:
-	case NV_DEVICE_INFO_V0_CURIE:
-		break;
-	case NV_DEVICE_INFO_V0_TESLA:
-		if (drm->device.info.chipset != 0x50)
-			node->memtype = (nvbo->tile_flags & 0x7f00) >> 8;
-		break;
-	case NV_DEVICE_INFO_V0_FERMI:
-	case NV_DEVICE_INFO_V0_KEPLER:
-	case NV_DEVICE_INFO_V0_MAXWELL:
-		node->memtype = (nvbo->tile_flags & 0xff00) >> 8;
-		break;
-	default:
-		NV_WARN(drm, "%s: unhandled family type %x\n", __func__,
-			drm->device.info.family);
-		break;
-	}
+	nouveau_bo_update_tiling(drm, nvbo, node);
 
 	mem->mm_node = node;
 	mem->start   = 0;
diff --git a/drm/nouveau/nv50_display.c b/drm/nouveau/nv50_display.c
index ea3921652..327532073 100644
--- a/drm/nouveau/nv50_display.c
+++ b/drm/nouveau/nv50_display.c
@@ -1724,7 +1724,11 @@ nv50_dac_create(struct drm_connector *connector, struct dcb_output *dcbe)
 	encoder = to_drm_encoder(nv_encoder);
 	encoder->possible_crtcs = dcbe->heads;
 	encoder->possible_clones = 0;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,5,0)
 	drm_encoder_init(connector->dev, encoder, &nv50_dac_func, type, NULL);
+#else
+	drm_encoder_init(connector->dev, encoder, &nv50_dac_func, type);
+#endif
 	drm_encoder_helper_add(encoder, &nv50_dac_hfunc);
 
 	drm_mode_connector_attach_encoder(connector, encoder);
@@ -2139,7 +2143,11 @@ nv50_sor_create(struct drm_connector *connector, struct dcb_output *dcbe)
 	encoder = to_drm_encoder(nv_encoder);
 	encoder->possible_crtcs = dcbe->heads;
 	encoder->possible_clones = 0;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,5,0)
 	drm_encoder_init(connector->dev, encoder, &nv50_sor_func, type, NULL);
+#else
+	drm_encoder_init(connector->dev, encoder, &nv50_sor_func, type);
+#endif
 	drm_encoder_helper_add(encoder, &nv50_sor_hfunc);
 
 	drm_mode_connector_attach_encoder(connector, encoder);
@@ -2319,7 +2327,11 @@ nv50_pior_create(struct drm_connector *connector, struct dcb_output *dcbe)
 	encoder = to_drm_encoder(nv_encoder);
 	encoder->possible_crtcs = dcbe->heads;
 	encoder->possible_clones = 0;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,5,0)
 	drm_encoder_init(connector->dev, encoder, &nv50_pior_func, type, NULL);
+#else
+	drm_encoder_init(connector->dev, encoder, &nv50_pior_func, type);
+#endif
 	drm_encoder_helper_add(encoder, &nv50_pior_hfunc);
 
 	drm_mode_connector_attach_encoder(connector, encoder);
diff --git a/drm/nouveau/nvkm/core/Kbuild b/drm/nouveau/nvkm/core/Kbuild
index 7f66963f3..86a31a8e1 100644
--- a/drm/nouveau/nvkm/core/Kbuild
+++ b/drm/nouveau/nvkm/core/Kbuild
@@ -2,6 +2,7 @@ nvkm-y := nvkm/core/client.o
 nvkm-y += nvkm/core/engine.o
 nvkm-y += nvkm/core/enum.o
 nvkm-y += nvkm/core/event.o
+nvkm-y += nvkm/core/firmware.o
 nvkm-y += nvkm/core/gpuobj.o
 nvkm-y += nvkm/core/ioctl.o
 nvkm-y += nvkm/core/memory.o
diff --git a/drm/nouveau/nvkm/core/firmware.c b/drm/nouveau/nvkm/core/firmware.c
new file mode 100644
index 000000000..4a4b4a5d5
--- /dev/null
+++ b/drm/nouveau/nvkm/core/firmware.c
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <core/device.h>
+
+#include <linux/firmware.h>
+
+/**
+ * nvkm_firmware_get - load firmware from the official nvidia/chip/ directory
+ * @device	device that will use that firmware
+ * @fwname	name of firmware file to load
+ * @fw		firmware structure to load to
+ *
+ * Use this function to load firmware files in the form nvidia/chip/fwname.bin.
+ * Firmware files released by NVIDIA will always follow this format.
+ */
+int
+nvkm_firmware_get(struct nvkm_device *device, const char *fwname,
+		  const struct firmware **fw)
+{
+	char f[64];
+	char cname[16];
+	int i;
+
+	/* Convert device name to lowercase */
+	strncpy(cname, device->chip->name, sizeof(cname));
+	cname[sizeof(cname) - 1] = '\0';
+	i = strlen(cname);
+	while (i) {
+		--i;
+		cname[i] = tolower(cname[i]);
+	}
+
+	snprintf(f, sizeof(f), "nvidia/%s/%s.bin", cname, fwname);
+	return request_firmware(fw, f, device->dev);
+}
+
+/**
+ * nvkm_firmware_put - release firmware loaded with nvkm_firmware_get
+ */
+void
+nvkm_firmware_put(const struct firmware *fw)
+{
+	release_firmware(fw);
+}
diff --git a/drm/nouveau/nvkm/core/gpuobj.c b/drm/nouveau/nvkm/core/gpuobj.c
index c3a790eb8..a7bd22706 100644
--- a/drm/nouveau/nvkm/core/gpuobj.c
+++ b/drm/nouveau/nvkm/core/gpuobj.c
@@ -253,3 +253,23 @@ nvkm_gpuobj_wrap(struct nvkm_memory *memory, struct nvkm_gpuobj **pgpuobj)
 	(*pgpuobj)->size = nvkm_memory_size(memory);
 	return 0;
 }
+
+void
+nvkm_gpuobj_memcpy_to(struct nvkm_gpuobj *dst, u32 dstoffset, void *src,
+		      u32 length)
+{
+	int i;
+
+	for (i = 0; i < length; i += 4)
+		nvkm_wo32(dst, dstoffset + i, *(u32 *)(src + i));
+}
+
+void
+nvkm_gpuobj_memcpy_from(void *dst, struct nvkm_gpuobj *src, u32 srcoffset,
+			u32 length)
+{
+	int i;
+
+	for (i = 0; i < length; i += 4)
+		((u32 *)src)[i / 4] = nvkm_ro32(src, srcoffset + i);
+}
diff --git a/drm/nouveau/nvkm/core/subdev.c b/drm/nouveau/nvkm/core/subdev.c
index 7de98470a..d8b3d2a36 100644
--- a/drm/nouveau/nvkm/core/subdev.c
+++ b/drm/nouveau/nvkm/core/subdev.c
@@ -49,6 +49,7 @@ nvkm_subdev_name[NVKM_SUBDEV_NR] = {
 	[NVKM_SUBDEV_THERM  ] = "therm",
 	[NVKM_SUBDEV_TIMER  ] = "tmr",
 	[NVKM_SUBDEV_VOLT   ] = "volt",
+	[NVKM_SUBDEV_SECBOOT] = "secboot",
 	[NVKM_ENGINE_BSP    ] = "bsp",
 	[NVKM_ENGINE_CE0    ] = "ce0",
 	[NVKM_ENGINE_CE1    ] = "ce1",
diff --git a/drm/nouveau/nvkm/engine/device/base.c b/drm/nouveau/nvkm/engine/device/base.c
index 8d77bf879..513a54fbf 100644
--- a/drm/nouveau/nvkm/engine/device/base.c
+++ b/drm/nouveau/nvkm/engine/device/base.c
@@ -1991,6 +1991,7 @@ nv124_chipset = {
 	.fifo = gm204_fifo_new,
 	.gr = gm204_gr_new,
 	.sw = gf100_sw_new,
+	.secboot = gm200_secboot_new,
 };
 
 static const struct nvkm_device_chip
@@ -2022,6 +2023,7 @@ nv126_chipset = {
 	.fifo = gm204_fifo_new,
 	.gr = gm206_gr_new,
 	.sw = gf100_sw_new,
+	.secboot = gm200_secboot_new,
 };
 
 static const struct nvkm_device_chip
@@ -2044,6 +2046,7 @@ nv12b_chipset = {
 	.fifo = gm20b_fifo_new,
 	.gr = gm20b_gr_new,
 	.sw = gf100_sw_new,
+	.secboot = gm20b_secboot_new,
 };
 
 static int
@@ -2094,6 +2097,7 @@ nvkm_device_subdev(struct nvkm_device *device, int index)
 	_(THERM  , device->therm  , &device->therm->subdev);
 	_(TIMER  , device->timer  , &device->timer->subdev);
 	_(VOLT   , device->volt   , &device->volt->subdev);
+	_(SECBOOT, device->secboot, &device->secboot->subdev);
 #undef _
 	default:
 		engine = nvkm_device_engine(device, index);
@@ -2540,6 +2544,7 @@ nvkm_device_ctor(const struct nvkm_device_func *func,
 		_(NVKM_SUBDEV_THERM  ,   therm);
 		_(NVKM_SUBDEV_TIMER  ,   timer);
 		_(NVKM_SUBDEV_VOLT   ,    volt);
+		_(NVKM_SUBDEV_SECBOOT, secboot);
 		_(NVKM_ENGINE_BSP    ,     bsp);
 		_(NVKM_ENGINE_CE0    ,   ce[0]);
 		_(NVKM_ENGINE_CE1    ,   ce[1]);
diff --git a/drm/nouveau/nvkm/engine/device/priv.h b/drm/nouveau/nvkm/engine/device/priv.h
index ed3ad2c30..03fe0afbf 100644
--- a/drm/nouveau/nvkm/engine/device/priv.h
+++ b/drm/nouveau/nvkm/engine/device/priv.h
@@ -22,6 +22,7 @@
 #include <subdev/therm.h>
 #include <subdev/timer.h>
 #include <subdev/volt.h>
+#include <subdev/secboot.h>
 
 #include <engine/bsp.h>
 #include <engine/ce.h>
diff --git a/drm/nouveau/nvkm/engine/falcon.c b/drm/nouveau/nvkm/engine/falcon.c
index 74000602f..028113c6f 100644
--- a/drm/nouveau/nvkm/engine/falcon.c
+++ b/drm/nouveau/nvkm/engine/falcon.c
@@ -22,6 +22,7 @@
 #include <engine/falcon.h>
 
 #include <core/gpuobj.h>
+#include <core/firmware.h>
 #include <subdev/timer.h>
 #include <engine/fifo.h>
 
@@ -187,16 +188,15 @@ nvkm_falcon_init(struct nvkm_engine *engine)
 	 * locate a "self-bootstrapping" firmware image for the engine
 	 */
 	if (!falcon->code.data) {
-		snprintf(name, sizeof(name), "nouveau/nv%02x_fuc%03x",
-			 device->chipset, falcon->addr >> 12);
+		snprintf(name, sizeof(name), "fuc%03x", falcon->addr >> 12);
 
-		ret = request_firmware(&fw, name, device->dev);
+		ret = nvkm_firmware_get(device, name, &fw);
 		if (ret == 0) {
 			falcon->code.data = vmemdup(fw->data, fw->size);
 			falcon->code.size = fw->size;
 			falcon->data.data = NULL;
 			falcon->data.size = 0;
-			release_firmware(fw);
+			nvkm_firmware_put(fw);
 		}
 
 		falcon->external = true;
@@ -206,10 +206,9 @@ nvkm_falcon_init(struct nvkm_engine *engine)
 	 * images for the engine
 	 */
 	if (!falcon->code.data) {
-		snprintf(name, sizeof(name), "nouveau/nv%02x_fuc%03xd",
-			 device->chipset, falcon->addr >> 12);
+		snprintf(name, sizeof(name), "fuc%03xd", falcon->addr >> 12);
 
-		ret = request_firmware(&fw, name, device->dev);
+		ret = nvkm_firmware_get(device, name, &fw);
 		if (ret) {
 			nvkm_error(subdev, "unable to load firmware data\n");
 			return -ENODEV;
@@ -217,14 +216,13 @@ nvkm_falcon_init(struct nvkm_engine *engine)
 
 		falcon->data.data = vmemdup(fw->data, fw->size);
 		falcon->data.size = fw->size;
-		release_firmware(fw);
+		nvkm_firmware_put(fw);
 		if (!falcon->data.data)
 			return -ENOMEM;
 
-		snprintf(name, sizeof(name), "nouveau/nv%02x_fuc%03xc",
-			 device->chipset, falcon->addr >> 12);
+		snprintf(name, sizeof(name), "fuc%03xc", falcon->addr >> 12);
 
-		ret = request_firmware(&fw, name, device->dev);
+		ret = nvkm_firmware_get(device, name, &fw);
 		if (ret) {
 			nvkm_error(subdev, "unable to load firmware code\n");
 			return -ENODEV;
@@ -232,7 +230,7 @@ nvkm_falcon_init(struct nvkm_engine *engine)
 
 		falcon->code.data = vmemdup(fw->data, fw->size);
 		falcon->code.size = fw->size;
-		release_firmware(fw);
+		nvkm_firmware_put(fw);
 		if (!falcon->code.data)
 			return -ENOMEM;
 	}
diff --git a/drm/nouveau/nvkm/engine/fifo/Kbuild b/drm/nouveau/nvkm/engine/fifo/Kbuild
index 74993c144..ca30ea61f 100644
--- a/drm/nouveau/nvkm/engine/fifo/Kbuild
+++ b/drm/nouveau/nvkm/engine/fifo/Kbuild
@@ -28,3 +28,4 @@ nvkm-y += nvkm/engine/fifo/gpfifog84.o
 nvkm-y += nvkm/engine/fifo/gpfifogf100.o
 nvkm-y += nvkm/engine/fifo/gpfifogk104.o
 nvkm-y += nvkm/engine/fifo/gpfifogm204.o
+nvkm-y += nvkm/engine/fifo/gpfifogm20b.o
diff --git a/drm/nouveau/nvkm/engine/fifo/changk104.h b/drm/nouveau/nvkm/engine/fifo/changk104.h
index 97bdddb76..9e6ea7bb9 100644
--- a/drm/nouveau/nvkm/engine/fifo/changk104.h
+++ b/drm/nouveau/nvkm/engine/fifo/changk104.h
@@ -21,9 +21,26 @@ struct gk104_fifo_chan {
 	} engn[NVKM_SUBDEV_NR];
 };
 
+int gk104_fifo_gpfifo_kick(struct gk104_fifo_chan *);
+void *gk104_fifo_gpfifo_dtor(struct nvkm_fifo_chan *);
+void gk104_fifo_gpfifo_init(struct nvkm_fifo_chan *);
+int gk104_fifo_gpfifo_engine_ctor(struct nvkm_fifo_chan *, struct nvkm_engine *,
+				  struct nvkm_object *);
+void gk104_fifo_gpfifo_engine_dtor(struct nvkm_fifo_chan *,
+				   struct nvkm_engine *);
+int gk104_fifo_gpfifo_engine_init(struct nvkm_fifo_chan *,
+				  struct nvkm_engine *);
+int gk104_fifo_gpfifo_engine_fini(struct nvkm_fifo_chan *, struct nvkm_engine *,
+				  bool);
+
+int __gk104_fifo_gpfifo_new(struct nvkm_fifo *, const struct nvkm_oclass *,
+			    const struct nvkm_fifo_chan_func *, void *, u32,
+			    struct nvkm_object **);
+
 int gk104_fifo_gpfifo_new(struct nvkm_fifo *, const struct nvkm_oclass *,
 			  void *data, u32 size, struct nvkm_object **);
 
 extern const struct nvkm_fifo_chan_oclass gk104_fifo_gpfifo_oclass;
 extern const struct nvkm_fifo_chan_oclass gm204_fifo_gpfifo_oclass;
+extern const struct nvkm_fifo_chan_oclass gm20b_fifo_gpfifo_oclass;
 #endif
diff --git a/drm/nouveau/nvkm/engine/fifo/gk104.c b/drm/nouveau/nvkm/engine/fifo/gk104.c
index 4fcd147d4..d6a88cf67 100644
--- a/drm/nouveau/nvkm/engine/fifo/gk104.c
+++ b/drm/nouveau/nvkm/engine/fifo/gk104.c
@@ -198,11 +198,11 @@ gk104_fifo_intr_sched_ctxsw(struct gk104_fifo *fifo)
 	for (engn = 0; engn < ARRAY_SIZE(fifo->engine); engn++) {
 		u32 stat = nvkm_rd32(device, 0x002640 + (engn * 0x08));
 		u32 busy = (stat & 0x80000000);
-		u32 next = (stat & 0x07ff0000) >> 16;
+		u32 next = (stat & 0x0fff0000) >> 16;
 		u32 chsw = (stat & 0x00008000);
 		u32 save = (stat & 0x00004000);
 		u32 load = (stat & 0x00002000);
-		u32 prev = (stat & 0x000007ff);
+		u32 prev = (stat & 0x00000fff);
 		u32 chid = load ? next : prev;
 		(void)save;
 
diff --git a/drm/nouveau/nvkm/engine/fifo/gm20b.c b/drm/nouveau/nvkm/engine/fifo/gm20b.c
index ae6375d97..059faf82c 100644
--- a/drm/nouveau/nvkm/engine/fifo/gm20b.c
+++ b/drm/nouveau/nvkm/engine/fifo/gm20b.c
@@ -32,7 +32,7 @@ gm20b_fifo = {
 	.uevent_init = gk104_fifo_uevent_init,
 	.uevent_fini = gk104_fifo_uevent_fini,
 	.chan = {
-		&gm204_fifo_gpfifo_oclass,
+		&gm20b_fifo_gpfifo_oclass,
 		NULL
 	},
 };
diff --git a/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c b/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c
index 2e1df01bd..c1e2ec373 100644
--- a/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c
+++ b/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c
@@ -33,7 +33,7 @@
 #include <nvif/cla06f.h>
 #include <nvif/unpack.h>
 
-static int
+int
 gk104_fifo_gpfifo_kick(struct gk104_fifo_chan *chan)
 {
 	struct gk104_fifo *fifo = chan->fifo;
@@ -72,7 +72,7 @@ gk104_fifo_gpfifo_engine_addr(struct nvkm_engine *engine)
 	}
 }
 
-static int
+int
 gk104_fifo_gpfifo_engine_fini(struct nvkm_fifo_chan *base,
 			      struct nvkm_engine *engine, bool suspend)
 {
@@ -95,7 +95,7 @@ gk104_fifo_gpfifo_engine_fini(struct nvkm_fifo_chan *base,
 	return ret;
 }
 
-static int
+int
 gk104_fifo_gpfifo_engine_init(struct nvkm_fifo_chan *base,
 			      struct nvkm_engine *engine)
 {
@@ -114,7 +114,7 @@ gk104_fifo_gpfifo_engine_init(struct nvkm_fifo_chan *base,
 	return 0;
 }
 
-static void
+void
 gk104_fifo_gpfifo_engine_dtor(struct nvkm_fifo_chan *base,
 			      struct nvkm_engine *engine)
 {
@@ -123,7 +123,7 @@ gk104_fifo_gpfifo_engine_dtor(struct nvkm_fifo_chan *base,
 	nvkm_gpuobj_del(&chan->engn[engine->subdev.index].inst);
 }
 
-static int
+int
 gk104_fifo_gpfifo_engine_ctor(struct nvkm_fifo_chan *base,
 			      struct nvkm_engine *engine,
 			      struct nvkm_object *object)
@@ -160,7 +160,7 @@ gk104_fifo_gpfifo_fini(struct nvkm_fifo_chan *base)
 	nvkm_wr32(device, 0x800000 + coff, 0x00000000);
 }
 
-static void
+void
 gk104_fifo_gpfifo_init(struct nvkm_fifo_chan *base)
 {
 	struct gk104_fifo_chan *chan = gk104_fifo_chan(base);
@@ -180,7 +180,7 @@ gk104_fifo_gpfifo_init(struct nvkm_fifo_chan *base)
 	}
 }
 
-static void *
+void *
 gk104_fifo_gpfifo_dtor(struct nvkm_fifo_chan *base)
 {
 	struct gk104_fifo_chan *chan = gk104_fifo_chan(base);
@@ -189,21 +189,10 @@ gk104_fifo_gpfifo_dtor(struct nvkm_fifo_chan *base)
 	return chan;
 }
 
-static const struct nvkm_fifo_chan_func
-gk104_fifo_gpfifo_func = {
-	.dtor = gk104_fifo_gpfifo_dtor,
-	.init = gk104_fifo_gpfifo_init,
-	.fini = gk104_fifo_gpfifo_fini,
-	.ntfy = g84_fifo_chan_ntfy,
-	.engine_ctor = gk104_fifo_gpfifo_engine_ctor,
-	.engine_dtor = gk104_fifo_gpfifo_engine_dtor,
-	.engine_init = gk104_fifo_gpfifo_engine_init,
-	.engine_fini = gk104_fifo_gpfifo_engine_fini,
-};
-
 int
-gk104_fifo_gpfifo_new(struct nvkm_fifo *base, const struct nvkm_oclass *oclass,
-		      void *data, u32 size, struct nvkm_object **pobject)
+__gk104_fifo_gpfifo_new(struct nvkm_fifo *base, const struct nvkm_oclass *oclass,
+		        const struct nvkm_fifo_chan_func *func, void *data,
+			u32 size, struct nvkm_object **pobject)
 {
 	union {
 		struct kepler_channel_gpfifo_a_v0 v0;
@@ -257,8 +246,8 @@ gk104_fifo_gpfifo_new(struct nvkm_fifo *base, const struct nvkm_oclass *oclass,
 	chan->engine = __ffs(args->v0.engine);
 	INIT_LIST_HEAD(&chan->head);
 
-	ret = nvkm_fifo_chan_ctor(&gk104_fifo_gpfifo_func, &fifo->base,
-				  0x1000, 0x1000, true, args->v0.vm, 0,
+	ret = nvkm_fifo_chan_ctor(func, &fifo->base, 0x1000, 0x1000, true,
+				  args->v0.vm, 0,
 				  gk104_fifo_engine_subdev(chan->engine),
 				  1, fifo->user.bar.offset, 0x200,
 				  oclass, &chan->base);
@@ -315,6 +304,26 @@ gk104_fifo_gpfifo_new(struct nvkm_fifo *base, const struct nvkm_oclass *oclass,
 	return 0;
 }
 
+static const struct nvkm_fifo_chan_func
+gk104_fifo_gpfifo_func = {
+	.dtor = gk104_fifo_gpfifo_dtor,
+	.init = gk104_fifo_gpfifo_init,
+	.fini = gk104_fifo_gpfifo_fini,
+	.ntfy = g84_fifo_chan_ntfy,
+	.engine_ctor = gk104_fifo_gpfifo_engine_ctor,
+	.engine_dtor = gk104_fifo_gpfifo_engine_dtor,
+	.engine_init = gk104_fifo_gpfifo_engine_init,
+	.engine_fini = gk104_fifo_gpfifo_engine_fini,
+};
+
+int
+gk104_fifo_gpfifo_new(struct nvkm_fifo *base, const struct nvkm_oclass *oclass,
+		      void *data, u32 size, struct nvkm_object **pobject)
+{
+	return __gk104_fifo_gpfifo_new(base, oclass, &gk104_fifo_gpfifo_func,
+				       data, size, pobject);
+}
+
 const struct nvkm_fifo_chan_oclass
 gk104_fifo_gpfifo_oclass = {
 	.base.oclass = KEPLER_CHANNEL_GPFIFO_A,
diff --git a/drm/nouveau/nvkm/engine/fifo/gpfifogm20b.c b/drm/nouveau/nvkm/engine/fifo/gpfifogm20b.c
new file mode 100644
index 000000000..0f78fe797
--- /dev/null
+++ b/drm/nouveau/nvkm/engine/fifo/gpfifogm20b.c
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include "changk104.h"
+
+#include <nvif/class.h>
+
+static void
+gm20b_fifo_gpfifo_fini(struct nvkm_fifo_chan *base)
+{
+	struct gk104_fifo_chan *chan = gk104_fifo_chan(base);
+	struct gk104_fifo *fifo = chan->fifo;
+	struct nvkm_device *device = fifo->base.engine.subdev.device;
+	u32 coff = chan->base.chid * 8;
+
+	if (!list_empty(&chan->head)) {
+		list_del_init(&chan->head);
+		nvkm_mask(device, 0x800004 + coff, 0x00000800, 0x00000800);
+		gk104_fifo_runlist_commit(fifo, chan->engine);
+	}
+
+	gk104_fifo_gpfifo_kick(chan);
+	nvkm_wr32(device, 0x800000 + coff, 0x00000000);
+}
+
+static const struct nvkm_fifo_chan_func
+gm20b_fifo_gpfifo_func = {
+	.dtor = gk104_fifo_gpfifo_dtor,
+	.init = gk104_fifo_gpfifo_init,
+	.fini = gm20b_fifo_gpfifo_fini,
+	.ntfy = g84_fifo_chan_ntfy,
+	.engine_ctor = gk104_fifo_gpfifo_engine_ctor,
+	.engine_dtor = gk104_fifo_gpfifo_engine_dtor,
+	.engine_init = gk104_fifo_gpfifo_engine_init,
+	.engine_fini = gk104_fifo_gpfifo_engine_fini,
+};
+
+int
+gm20b_fifo_gpfifo_new(struct nvkm_fifo *base, const struct nvkm_oclass *oclass,
+		      void *data, u32 size, struct nvkm_object **pobject)
+{
+	return __gk104_fifo_gpfifo_new(base, oclass, &gm20b_fifo_gpfifo_func,
+				       data, size, pobject);
+}
+
+const struct nvkm_fifo_chan_oclass
+gm20b_fifo_gpfifo_oclass = {
+	.base.oclass = MAXWELL_CHANNEL_GPFIFO_A,
+	.base.minver = 0,
+	.base.maxver = 0,
+	.ctor = gm20b_fifo_gpfifo_new,
+};
diff --git a/drm/nouveau/nvkm/engine/gr/ctxgf100.c b/drm/nouveau/nvkm/engine/gr/ctxgf100.c
index 56f392d3d..5f05f96f0 100644
--- a/drm/nouveau/nvkm/engine/gr/ctxgf100.c
+++ b/drm/nouveau/nvkm/engine/gr/ctxgf100.c
@@ -24,6 +24,7 @@
 #include "ctxgf100.h"
 
 #include <subdev/fb.h>
+#include <subdev/ltc.h>
 #include <subdev/mc.h>
 #include <subdev/timer.h>
 
@@ -1272,6 +1273,7 @@ gf100_grctx_generate(struct gf100_gr *gr)
 	const struct gf100_grctx_func *grctx = gr->func->grctx;
 	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
 	struct nvkm_device *device = subdev->device;
+	struct nvkm_ltc *ltc = device->ltc;
 	struct nvkm_memory *chan;
 	struct gf100_grctx info;
 	int ret, i;
@@ -1367,6 +1369,8 @@ gf100_grctx_generate(struct gf100_gr *gr)
 		goto done;
 	}
 
+	nvkm_ltc_flush(ltc);
+
 	gr->data = kmalloc(gr->size, GFP_KERNEL);
 	if (gr->data) {
 		nvkm_kmap(chan);
diff --git a/drm/nouveau/nvkm/engine/gr/ctxgk20a.c b/drm/nouveau/nvkm/engine/gr/ctxgk20a.c
index ad0a6cfe7..27be14cfa 100644
--- a/drm/nouveau/nvkm/engine/gr/ctxgk20a.c
+++ b/drm/nouveau/nvkm/engine/gr/ctxgk20a.c
@@ -34,7 +34,7 @@ gk20a_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
 
 	gf100_gr_mmio(gr, gr->fuc_sw_ctx);
 
-	gf100_gr_wait_idle(gr);
+	gk104_gr_wait_idle(gr);
 
 	idle_timeout_save = nvkm_rd32(device, 0x404154);
 	nvkm_wr32(device, 0x404154, 0x00000000);
@@ -57,13 +57,13 @@ gk20a_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
 
 	nvkm_mask(device, 0x5044b0, 0x08000000, 0x08000000);
 
-	gf100_gr_wait_idle(gr);
+	gk104_gr_wait_idle(gr);
 
 	nvkm_wr32(device, 0x404154, idle_timeout_save);
-	gf100_gr_wait_idle(gr);
+	gk104_gr_wait_idle(gr);
 
 	gf100_gr_mthd(gr, gr->fuc_method);
-	gf100_gr_wait_idle(gr);
+	gk104_gr_wait_idle(gr);
 
 	gf100_gr_icmd(gr, gr->fuc_bundle);
 	grctx->pagepool(info);
diff --git a/drm/nouveau/nvkm/engine/gr/ctxgm20b.c b/drm/nouveau/nvkm/engine/gr/ctxgm20b.c
index 670260402..c6062cadf 100644
--- a/drm/nouveau/nvkm/engine/gr/ctxgm20b.c
+++ b/drm/nouveau/nvkm/engine/gr/ctxgm20b.c
@@ -45,7 +45,7 @@ gm20b_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
 
 	gf100_gr_mmio(gr, gr->fuc_sw_ctx);
 
-	gf100_gr_wait_idle(gr);
+	gk104_gr_wait_idle(gr);
 
 	idle_timeout_save = nvkm_rd32(device, 0x404154);
 	nvkm_wr32(device, 0x404154, 0x00000000);
@@ -72,13 +72,13 @@ gm20b_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
 
 	gm204_grctx_generate_405b60(gr);
 
-	gf100_gr_wait_idle(gr);
+	gk104_gr_wait_idle(gr);
 
 	nvkm_wr32(device, 0x404154, idle_timeout_save);
-	gf100_gr_wait_idle(gr);
+	gk104_gr_wait_idle(gr);
 
 	gf100_gr_mthd(gr, gr->fuc_method);
-	gf100_gr_wait_idle(gr);
+	gk104_gr_wait_idle(gr);
 
 	gf100_gr_icmd(gr, gr->fuc_bundle);
 	grctx->pagepool(info);
diff --git a/drm/nouveau/nvkm/engine/gr/gf100.c b/drm/nouveau/nvkm/engine/gr/gf100.c
index 1f81069ed..299d6a4f8 100644
--- a/drm/nouveau/nvkm/engine/gr/gf100.c
+++ b/drm/nouveau/nvkm/engine/gr/gf100.c
@@ -27,6 +27,8 @@
 
 #include <core/client.h>
 #include <core/option.h>
+#include <core/firmware.h>
+#include <subdev/secboot.h>
 #include <subdev/fb.h>
 #include <subdev/mc.h>
 #include <subdev/pmu.h>
@@ -729,40 +731,6 @@ gf100_gr_zbc_init(struct gf100_gr *gr)
 		gf100_gr_zbc_clear_depth(gr, index);
 }
 
-/**
- * Wait until GR goes idle. GR is considered idle if it is disabled by the
- * MC (0x200) register, or GR is not busy and a context switch is not in
- * progress.
- */
-int
-gf100_gr_wait_idle(struct gf100_gr *gr)
-{
-	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
-	struct nvkm_device *device = subdev->device;
-	unsigned long end_jiffies = jiffies + msecs_to_jiffies(2000);
-	bool gr_enabled, ctxsw_active, gr_busy;
-
-	do {
-		/*
-		 * required to make sure FIFO_ENGINE_STATUS (0x2640) is
-		 * up-to-date
-		 */
-		nvkm_rd32(device, 0x400700);
-
-		gr_enabled = nvkm_rd32(device, 0x200) & 0x1000;
-		ctxsw_active = nvkm_rd32(device, 0x2640) & 0x8000;
-		gr_busy = nvkm_rd32(device, 0x40060c) & 0x1;
-
-		if (!gr_enabled || (!gr_busy && !ctxsw_active))
-			return 0;
-	} while (time_before(jiffies, end_jiffies));
-
-	nvkm_error(subdev,
-		   "wait for idle timeout (en: %d, ctxsw: %d, busy: %d)\n",
-		   gr_enabled, ctxsw_active, gr_busy);
-	return -EAGAIN;
-}
-
 void
 gf100_gr_mmio(struct gf100_gr *gr, const struct gf100_gr_pack *p)
 {
@@ -806,7 +774,7 @@ gf100_gr_icmd(struct gf100_gr *gr, const struct gf100_gr_pack *p)
 			 * GO_IDLE bundle
 			 */
 			if ((addr & 0xffff) == 0xe100)
-				gf100_gr_wait_idle(gr);
+				gk104_gr_wait_idle(gr);
 			nvkm_msec(device, 2000,
 				if (!(nvkm_rd32(device, 0x400700) & 0x00000004))
 					break;
@@ -1427,21 +1395,48 @@ gf100_gr_init_ctxctl(struct gf100_gr *gr)
 	const struct gf100_grctx_func *grctx = gr->func->grctx;
 	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
 	struct nvkm_device *device = subdev->device;
+	u32 sec_flcns = 0;
+	int ret = 0;
 	int i;
 
 	if (gr->firmware) {
 		/* load fuc microcode */
 		nvkm_mc_unk260(device->mc, 0);
-		gf100_gr_init_fw(gr, 0x409000, &gr->fuc409c, &gr->fuc409d);
-		gf100_gr_init_fw(gr, 0x41a000, &gr->fuc41ac, &gr->fuc41ad);
+
+		/* securely-managed falcons must be reset using secure boot */
+		if (nvkm_secboot_is_managed(device->secboot,
+					    NVKM_SECBOOT_FALCON_FECS))
+			sec_flcns |= BIT(NVKM_SECBOOT_FALCON_FECS);
+		else
+			gf100_gr_init_fw(gr, 0x409000, &gr->fuc409c,
+					 &gr->fuc409d);
+		if (nvkm_secboot_is_managed(device->secboot,
+					    NVKM_SECBOOT_FALCON_GPCCS))
+			sec_flcns |= BIT(NVKM_SECBOOT_FALCON_GPCCS);
+		else
+			gf100_gr_init_fw(gr, 0x41a000, &gr->fuc41ac,
+					 &gr->fuc41ad);
+		if (sec_flcns)
+			ret = nvkm_secboot_reset(device->secboot, sec_flcns);
+		if (ret)
+			return ret;
+
 		nvkm_mc_unk260(device->mc, 1);
 
 		/* start both of them running */
 		nvkm_wr32(device, 0x409840, 0xffffffff);
 		nvkm_wr32(device, 0x41a10c, 0x00000000);
 		nvkm_wr32(device, 0x40910c, 0x00000000);
-		nvkm_wr32(device, 0x41a100, 0x00000002);
-		nvkm_wr32(device, 0x409100, 0x00000002);
+
+		/* Use FALCON_CPUCTL_ALIAS if falcon is in secure mode */
+		if (nvkm_rd32(device, 0x41a100) & 0x40)
+			nvkm_wr32(device, 0x41a130, 0x00000002);
+		else
+			nvkm_wr32(device, 0x41a100, 0x00000002);
+		if (nvkm_rd32(device, 0x409100) & 0x40)
+			nvkm_wr32(device, 0x409130, 0x00000002);
+		else
+			nvkm_wr32(device, 0x409100, 0x00000002);
 		if (nvkm_msec(device, 2000,
 			if (nvkm_rd32(device, 0x409800) & 0x00000001)
 				break;
@@ -1720,22 +1715,9 @@ gf100_gr_ctor_fw(struct gf100_gr *gr, const char *fwname,
 	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
 	struct nvkm_device *device = subdev->device;
 	const struct firmware *fw;
-	char f[64];
-	char cname[16];
 	int ret;
-	int i;
-
-	/* Convert device name to lowercase */
-	strncpy(cname, device->chip->name, sizeof(cname));
-	cname[sizeof(cname) - 1] = '\0';
-	i = strlen(cname);
-	while (i) {
-		--i;
-		cname[i] = tolower(cname[i]);
-	}
 
-	snprintf(f, sizeof(f), "nvidia/%s/%s.bin", cname, fwname);
-	ret = request_firmware(&fw, f, device->dev);
+	ret = nvkm_firmware_get(device, fwname, &fw);
 	if (ret) {
 		nvkm_error(subdev, "failed to load %s\n", fwname);
 		return ret;
@@ -1743,7 +1725,7 @@ gf100_gr_ctor_fw(struct gf100_gr *gr, const char *fwname,
 
 	fuc->size = fw->size;
 	fuc->data = kmemdup(fw->data, fuc->size, GFP_KERNEL);
-	release_firmware(fw);
+	nvkm_firmware_put(fw);
 	return (fuc->data != NULL) ? 0 : -ENOMEM;
 }
 
@@ -1765,10 +1747,16 @@ gf100_gr_ctor(const struct gf100_gr_func *func, struct nvkm_device *device,
 
 	if (gr->firmware) {
 		nvkm_info(&gr->base.engine.subdev, "using external firmware\n");
-		if (gf100_gr_ctor_fw(gr, "fecs_inst", &gr->fuc409c) ||
-		    gf100_gr_ctor_fw(gr, "fecs_data", &gr->fuc409d) ||
-		    gf100_gr_ctor_fw(gr, "gpccs_inst", &gr->fuc41ac) ||
-		    gf100_gr_ctor_fw(gr, "gpccs_data", &gr->fuc41ad))
+		if (!nvkm_secboot_is_managed(device->secboot,
+					     NVKM_SECBOOT_FALCON_FECS)
+		     && (gf100_gr_ctor_fw(gr, "fecs_inst", &gr->fuc409c) ||
+			 gf100_gr_ctor_fw(gr, "fecs_data", &gr->fuc409d)))
+			return -ENODEV;
+
+		if (!nvkm_secboot_is_managed(device->secboot,
+					     NVKM_SECBOOT_FALCON_GPCCS)
+		     && (gf100_gr_ctor_fw(gr, "gpccs_inst", &gr->fuc41ac) ||
+			 gf100_gr_ctor_fw(gr, "gpccs_data", &gr->fuc41ad)))
 			return -ENODEV;
 	}
 
diff --git a/drm/nouveau/nvkm/engine/gr/gf100.h b/drm/nouveau/nvkm/engine/gr/gf100.h
index 02e78b8d9..f42ac4650 100644
--- a/drm/nouveau/nvkm/engine/gr/gf100.h
+++ b/drm/nouveau/nvkm/engine/gr/gf100.h
@@ -198,12 +198,13 @@ extern struct gf100_gr_ucode gf100_gr_gpccs_ucode;
 extern struct gf100_gr_ucode gk110_gr_fecs_ucode;
 extern struct gf100_gr_ucode gk110_gr_gpccs_ucode;
 
-int  gf100_gr_wait_idle(struct gf100_gr *);
 void gf100_gr_mmio(struct gf100_gr *, const struct gf100_gr_pack *);
 void gf100_gr_icmd(struct gf100_gr *, const struct gf100_gr_pack *);
 void gf100_gr_mthd(struct gf100_gr *, const struct gf100_gr_pack *);
 int  gf100_gr_init_ctxctl(struct gf100_gr *);
 
+int  gk104_gr_wait_idle(struct gf100_gr *);
+
 /* register init value lists */
 
 extern const struct gf100_gr_init gf100_gr_init_main_0[];
diff --git a/drm/nouveau/nvkm/engine/gr/gk104.c b/drm/nouveau/nvkm/engine/gr/gk104.c
index abf54928a..56f8d53e6 100644
--- a/drm/nouveau/nvkm/engine/gr/gk104.c
+++ b/drm/nouveau/nvkm/engine/gr/gk104.c
@@ -177,6 +177,40 @@ gk104_gr_pack_mmio[] = {
  * PGRAPH engine/subdev functions
  ******************************************************************************/
 
+/**
+ * Wait until GR goes idle. GR is considered idle if it is disabled by the
+ * MC (0x200) register, or GR is not busy and a context switch is not in
+ * progress.
+ */
+int
+gk104_gr_wait_idle(struct gf100_gr *gr)
+{
+	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
+	struct nvkm_device *device = subdev->device;
+	unsigned long end_jiffies = jiffies + msecs_to_jiffies(2000);
+	bool gr_enabled, ctxsw_active, gr_busy;
+
+	do {
+		/*
+		 * required to make sure FIFO_ENGINE_STATUS (0x2640) is
+		 * up-to-date
+		 */
+		nvkm_rd32(device, 0x400700);
+
+		gr_enabled = nvkm_rd32(device, 0x200) & 0x1000;
+		ctxsw_active = nvkm_rd32(device, 0x2640) & 0x8000;
+		gr_busy = nvkm_rd32(device, 0x40060c) & 0x1;
+
+		if (!gr_enabled || (!gr_busy && !ctxsw_active))
+			return 0;
+	} while (time_before(jiffies, end_jiffies));
+
+	nvkm_error(subdev,
+		   "wait for idle timeout (en: %d, ctxsw: %d, busy: %d)\n",
+		   gr_enabled, ctxsw_active, gr_busy);
+	return -EAGAIN;
+}
+
 int
 gk104_gr_init(struct gf100_gr *gr)
 {
diff --git a/drm/nouveau/nvkm/engine/gr/gk20a.c b/drm/nouveau/nvkm/engine/gr/gk20a.c
index b8758d3b8..91a4d948e 100644
--- a/drm/nouveau/nvkm/engine/gr/gk20a.c
+++ b/drm/nouveau/nvkm/engine/gr/gk20a.c
@@ -198,7 +198,7 @@ gk20a_gr_init(struct gf100_gr *gr)
 	if (ret)
 		return ret;
 
-	ret = gf100_gr_wait_idle(gr);
+	ret = gk104_gr_wait_idle(gr);
 	if (ret)
 		return ret;
 
diff --git a/drm/nouveau/nvkm/engine/gr/gm204.c b/drm/nouveau/nvkm/engine/gr/gm204.c
index 90381dde4..69a3218bf 100644
--- a/drm/nouveau/nvkm/engine/gr/gm204.c
+++ b/drm/nouveau/nvkm/engine/gr/gm204.c
@@ -229,12 +229,6 @@ gm204_gr_data[] = {
  * PGRAPH engine/subdev functions
  ******************************************************************************/
 
-static int
-gm204_gr_init_ctxctl(struct gf100_gr *gr)
-{
-	return 0;
-}
-
 int
 gm204_gr_init(struct gf100_gr *gr)
 {
@@ -348,7 +342,7 @@ gm204_gr_init(struct gf100_gr *gr)
 
 	gf100_gr_zbc_init(gr);
 
-	return gm204_gr_init_ctxctl(gr);
+	return gf100_gr_init_ctxctl(gr);
 }
 
 static const struct gf100_gr_func
diff --git a/drm/nouveau/nvkm/engine/gr/gm20b.c b/drm/nouveau/nvkm/engine/gr/gm20b.c
index 65b6e3d1e..9f804c7ae 100644
--- a/drm/nouveau/nvkm/engine/gr/gm20b.c
+++ b/drm/nouveau/nvkm/engine/gr/gm20b.c
@@ -32,12 +32,15 @@ gm20b_gr_init_gpc_mmu(struct gf100_gr *gr)
 	struct nvkm_device *device = gr->base.engine.subdev.device;
 	u32 val;
 
-	/* TODO this needs to be removed once secure boot works */
-	if (1) {
+	/* Bypass MMU check for non-secure boot */
+	if (!device->secboot) {
 		nvkm_wr32(device, 0x100ce4, 0xffffffff);
+
+		if (nvkm_rd32(device, 0x100ce4) != 0xffffffff)
+			nvdev_warn(device,
+			  "cannot bypass secure boot - expect failure soon!\n");
 	}
 
-	/* TODO update once secure boot works */
 	val = nvkm_rd32(device, 0x100c80);
 	val &= 0xf000087f;
 	nvkm_wr32(device, 0x418880, val);
diff --git a/drm/nouveau/nvkm/engine/xtensa.c b/drm/nouveau/nvkm/engine/xtensa.c
index a3d4f5bce..608741a52 100644
--- a/drm/nouveau/nvkm/engine/xtensa.c
+++ b/drm/nouveau/nvkm/engine/xtensa.c
@@ -22,6 +22,7 @@
 #include <engine/xtensa.h>
 
 #include <core/gpuobj.h>
+#include <core/firmware.h>
 #include <engine/fifo.h>
 
 static int
@@ -104,10 +105,9 @@ nvkm_xtensa_init(struct nvkm_engine *engine)
 	u32 tmp;
 
 	if (!xtensa->gpu_fw) {
-		snprintf(name, sizeof(name), "nouveau/nv84_xuc%03x",
-			 xtensa->addr >> 12);
+		snprintf(name, sizeof(name), "xuc%03x", xtensa->addr >> 12);
 
-		ret = request_firmware(&fw, name, device->dev);
+		ret = nvkm_firmware_get(device, name, &fw);
 		if (ret) {
 			nvkm_warn(subdev, "unable to load firmware %s\n", name);
 			return ret;
@@ -115,7 +115,7 @@ nvkm_xtensa_init(struct nvkm_engine *engine)
 
 		if (fw->size > 0x40000) {
 			nvkm_warn(subdev, "firmware %s too large\n", name);
-			release_firmware(fw);
+			nvkm_firmware_put(fw);
 			return -EINVAL;
 		}
 
@@ -123,7 +123,7 @@ nvkm_xtensa_init(struct nvkm_engine *engine)
 				      0x40000, 0x1000, false,
 				      &xtensa->gpu_fw);
 		if (ret) {
-			release_firmware(fw);
+			nvkm_firmware_put(fw);
 			return ret;
 		}
 
@@ -131,7 +131,7 @@ nvkm_xtensa_init(struct nvkm_engine *engine)
 		for (i = 0; i < fw->size / 4; i++)
 			nvkm_wo32(xtensa->gpu_fw, i * 4, *((u32 *)fw->data + i));
 		nvkm_done(xtensa->gpu_fw);
-		release_firmware(fw);
+		nvkm_firmware_put(fw);
 	}
 
 	addr = nvkm_memory_addr(xtensa->gpu_fw);
diff --git a/drm/nouveau/nvkm/subdev/Kbuild b/drm/nouveau/nvkm/subdev/Kbuild
index ee2c38f50..ec8c5eb30 100644
--- a/drm/nouveau/nvkm/subdev/Kbuild
+++ b/drm/nouveau/nvkm/subdev/Kbuild
@@ -15,6 +15,7 @@ include $(src)/nvkm/subdev/mmu/Kbuild
 include $(src)/nvkm/subdev/mxm/Kbuild
 include $(src)/nvkm/subdev/pci/Kbuild
 include $(src)/nvkm/subdev/pmu/Kbuild
+include $(src)/nvkm/subdev/secboot/Kbuild
 include $(src)/nvkm/subdev/therm/Kbuild
 include $(src)/nvkm/subdev/timer/Kbuild
 include $(src)/nvkm/subdev/volt/Kbuild
diff --git a/drm/nouveau/nvkm/subdev/bios/shadow.c b/drm/nouveau/nvkm/subdev/bios/shadow.c
index b2557e87a..20814fc4d 100644
--- a/drm/nouveau/nvkm/subdev/bios/shadow.c
+++ b/drm/nouveau/nvkm/subdev/bios/shadow.c
@@ -24,6 +24,7 @@
 #include "priv.h"
 
 #include <core/option.h>
+#include <core/firmware.h>
 #include <subdev/bios.h>
 #include <subdev/bios/image.h>
 
@@ -143,9 +144,9 @@ shadow_fw_read(void *data, u32 offset, u32 length, struct nvkm_bios *bios)
 static void *
 shadow_fw_init(struct nvkm_bios *bios, const char *name)
 {
-	struct device *dev = bios->subdev.device->dev;
+	struct nvkm_device *device = bios->subdev.device;
 	const struct firmware *fw;
-	int ret = request_firmware(&fw, name, dev);
+	int ret = nvkm_firmware_get(device, name, &fw);
 	if (ret)
 		return ERR_PTR(-ENOENT);
 	return (void *)fw;
@@ -155,7 +156,7 @@ static const struct nvbios_source
 shadow_fw = {
 	.name = "firmware",
 	.init = shadow_fw_init,
-	.fini = (void(*)(void *))release_firmware,
+	.fini = (void(*)(void *))nvkm_firmware_put,
 	.read = shadow_fw_read,
 	.rw = false,
 };
diff --git a/drm/nouveau/nvkm/subdev/instmem/gk20a.c b/drm/nouveau/nvkm/subdev/instmem/gk20a.c
index 4c20fec64..6b8f2a19b 100644
--- a/drm/nouveau/nvkm/subdev/instmem/gk20a.c
+++ b/drm/nouveau/nvkm/subdev/instmem/gk20a.c
@@ -228,6 +228,8 @@ gk20a_instobj_release_dma(struct nvkm_memory *memory)
 	struct gk20a_instmem *imem = node->imem;
 	struct nvkm_ltc *ltc = imem->base.subdev.device->ltc;
 
+	/* in case we got a write-combined mapping */
+	wmb();
 	nvkm_ltc_invalidate(ltc);
 }
 
diff --git a/drm/nouveau/nvkm/subdev/mmu/base.c b/drm/nouveau/nvkm/subdev/mmu/base.c
index e04a2296e..21f7df4f8 100644
--- a/drm/nouveau/nvkm/subdev/mmu/base.c
+++ b/drm/nouveau/nvkm/subdev/mmu/base.c
@@ -240,6 +240,8 @@ nvkm_vm_unmap_pgt(struct nvkm_vm *vm, int big, u32 fpde, u32 lpde)
 			mmu->func->map_pgt(vpgd->obj, pde, vpgt->mem);
 		}
 
+		mmu->func->flush(vm);
+
 		nvkm_memory_del(&pgt);
 	}
 }
@@ -266,6 +268,8 @@ nvkm_vm_map_pgt(struct nvkm_vm *vm, u32 pde, u32 type)
 		mmu->func->map_pgt(vpgd->obj, pde, vpgt->mem);
 	}
 
+	mmu->func->flush(vm);
+
 	vpgt->refcount[big]++;
 	return 0;
 }
diff --git a/drm/nouveau/nvkm/subdev/secboot/Kbuild b/drm/nouveau/nvkm/subdev/secboot/Kbuild
new file mode 100644
index 000000000..b02b868a6
--- /dev/null
+++ b/drm/nouveau/nvkm/subdev/secboot/Kbuild
@@ -0,0 +1,3 @@
+nvkm-y += nvkm/subdev/secboot/base.o
+nvkm-y += nvkm/subdev/secboot/gm200.o
+nvkm-y += nvkm/subdev/secboot/gm20b.o
diff --git a/drm/nouveau/nvkm/subdev/secboot/base.c b/drm/nouveau/nvkm/subdev/secboot/base.c
new file mode 100644
index 000000000..bd5f6a6a4
--- /dev/null
+++ b/drm/nouveau/nvkm/subdev/secboot/base.c
@@ -0,0 +1,283 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "priv.h"
+#include <subdev/timer.h>
+
+static const char *
+managed_falcons_names[] = {
+	[NVKM_SECBOOT_FALCON_PMU] = "PMU",
+	[NVKM_SECBOOT_FALCON_RESERVED] = "<invalid>",
+	[NVKM_SECBOOT_FALCON_FECS] = "FECS",
+	[NVKM_SECBOOT_FALCON_GPCCS] = "GPCCS",
+	[NVKM_SECBOOT_FALCON_END] = "<invalid>",
+};
+
+/*
+ * Helper falcon functions
+ */
+
+static int
+falcon_clear_halt_interrupt(struct nvkm_device *device, u32 base)
+{
+	int ret;
+
+	/* clear halt interrupt */
+	nvkm_mask(device, base + 0x004, 0x10, 0x10);
+	/* wait until halt interrupt is cleared */
+	ret = nvkm_wait_msec(device, 10, base + 0x008, 0x10, 0x0);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static int
+falcon_wait_idle(struct nvkm_device *device, u32 base)
+{
+	int ret;
+
+	ret = nvkm_wait_msec(device, 10, base + 0x04c, 0xffff, 0x0);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static int
+nvkm_secboot_falcon_enable(struct nvkm_secboot *sb)
+{
+	struct nvkm_device *device = sb->subdev.device;
+	int ret;
+
+	/* enable engine */
+	nvkm_mask(device, 0x200, sb->enable_mask, sb->enable_mask);
+	nvkm_rd32(device, 0x200);
+	ret = nvkm_wait_msec(device, 10, sb->base + 0x10c, 0x6, 0x0);
+	if (ret < 0) {
+		nvkm_mask(device, 0x200, sb->enable_mask, 0x0);
+		nvkm_error(&sb->subdev, "Falcon mem scrubbing timeout\n");
+		return ret;
+	}
+
+	ret = falcon_wait_idle(device, sb->base);
+	if (ret)
+		return ret;
+
+	/* enable IRQs */
+	nvkm_wr32(device, sb->base + 0x010, 0xff);
+	nvkm_mask(device, 0x640, sb->irq_mask, 0x1000000);
+	nvkm_mask(device, 0x644, sb->irq_mask, 0x1000000);
+
+	return 0;
+}
+
+static int
+nvkm_secboot_falcon_disable(struct nvkm_secboot *sb)
+{
+	struct nvkm_device *device = sb->subdev.device;
+	int ret;
+
+	ret = falcon_clear_halt_interrupt(device, sb->base);
+	if (ret)
+		return ret;
+
+	ret = falcon_wait_idle(device, sb->base);
+	if (ret)
+		return ret;
+
+	if ((nvkm_rd32(device, 0x200) & sb->enable_mask) != 0) {
+		/* disable IRQs */
+		nvkm_mask(device, 0x644, sb->irq_mask, 0x0);
+		nvkm_mask(device, 0x640, sb->irq_mask, 0x0);
+		nvkm_wr32(device, sb->base + 0x014, 0xff);
+		/* disable engine */
+		nvkm_mask(device, 0x200, sb->enable_mask, 0x0);
+	}
+
+	return 0;
+}
+
+int
+nvkm_secboot_falcon_reset(struct nvkm_secboot *sb)
+{
+	int ret;
+
+	ret = nvkm_secboot_falcon_disable(sb);
+	if (ret)
+		return ret;
+
+	ret = nvkm_secboot_falcon_enable(sb);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+/**
+ * nvkm_secboot_falcon_run - run the Falcon that will perform secure boot
+ *
+ * This function is to be called after all chip-specific preparations have
+ * been completed. It will start the falcon to perform secure boot, wait for
+ * it to halt, and report if an error occurred.
+ */
+int
+nvkm_secboot_falcon_run(struct nvkm_secboot *sb)
+{
+	struct nvkm_device *device = sb->subdev.device;
+	int ret;
+
+	/* Start falcon */
+	nvkm_wr32(device, sb->base + 0x100, 0x2);
+
+	/* Wait for falcon halt */
+	ret = nvkm_wait_msec(device, 100, sb->base + 0x100, 0x10, 0x10);
+	if (ret < 0)
+		return ret;
+
+	/* If mailbox register contains an error code, then ACR has failed */
+	ret = nvkm_rd32(device, sb->base + 0x040);
+	if (ret) {
+		nvkm_error(&sb->subdev, "ACR boot failed, ret %x", ret);
+		return -EINVAL;
+	}
+
+	ret = falcon_clear_halt_interrupt(device, sb->base);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+
+/**
+ * nvkm_secboot_reset() - reset specified falcons, perform secure boot if needed
+ *
+ * Calling this function ensures that the falcons specified in the falcons_mask
+ * bitmask are in a ready-to-run state in low-secure mode. The first time it is
+ * called, it may perform secure boot to initialize all the managed falcons ;
+ * subsequent calls may reset the falcon using a method of the managing falcon,
+ * or may perform secure boot again.
+ *
+ */
+int
+nvkm_secboot_reset(struct nvkm_secboot *sb, u32 falcons_mask)
+{
+	int ret = 0;
+
+	/* More falcons than we can manage? */
+	if ((falcons_mask & sb->func->managed_falcons) != falcons_mask) {
+		nvkm_error(&sb->subdev, "cannot reset unmanaged falcon!\n");
+		return -EINVAL;
+	}
+
+	/*
+	 * Dummy GM200 implementation: perform secure boot each time we are
+	 * called on FECS. Since only FECS and GPCCS are managed, this ought
+	 * to be safe.
+	 *
+	 * Once we have proper PMU firmware and support, this will be changed
+	 * to a proper call to the PMU method.
+	 */
+	if (falcons_mask & BIT(NVKM_SECBOOT_FALCON_FECS))
+		ret = sb->func->run(sb);
+
+	return ret;
+}
+
+/**
+ * nvkm_is_secure() - check whether a given falcon is securely-managed
+ */
+bool
+nvkm_secboot_is_managed(struct nvkm_secboot *secboot,
+			enum nvkm_secboot_falcon fid)
+{
+	if (!secboot)
+		return false;
+
+	return secboot->func->managed_falcons & BIT(fid);
+}
+
+static int
+nvkm_secboot_oneinit(struct nvkm_subdev *subdev)
+{
+	struct nvkm_secboot *sb = nvkm_secboot(subdev);
+	int ret;
+
+	/* Call chip-specific init function */
+	ret = sb->func->init(sb);
+	if (ret) {
+		nvkm_error(subdev, "Secure Boot initialization failed: %d\n",
+			   ret);
+		return ret;
+	}
+
+	/*
+	 * Prepare all blobs - the same blobs can be used to perform secure boot
+	 * multiple times
+	 */
+	return sb->func->prepare_blobs(sb);
+}
+
+static void *
+nvkm_secboot_dtor(struct nvkm_subdev *subdev)
+{
+	struct nvkm_secboot *sb = nvkm_secboot(subdev);
+
+	return sb->func->dtor(sb);
+}
+
+static const struct nvkm_subdev_func
+nvkm_secboot = {
+	.oneinit = nvkm_secboot_oneinit,
+	.dtor = nvkm_secboot_dtor,
+};
+
+int
+nvkm_secboot_ctor(const struct nvkm_secboot_func *func,
+		  struct nvkm_device *device, int index,
+		  struct nvkm_secboot *sb)
+{
+	unsigned long fid;
+
+	nvkm_subdev_ctor(&nvkm_secboot, device, index, 0, &sb->subdev);
+	sb->func = func;
+
+	/* setup the performing falcon's base address and masks */
+	switch (func->boot_falcon) {
+	case NVKM_SECBOOT_FALCON_PMU:
+		sb->base = 0x10a000;
+		sb->irq_mask = 0x1000000;
+		sb->enable_mask = 0x2000;
+		break;
+	default:
+		nvkm_error(&sb->subdev, "invalid secure boot falcon\n");
+		return -EINVAL;
+	};
+
+	nvkm_info(&sb->subdev, "securely managed falcons:\n");
+	for_each_set_bit(fid, &sb->func->managed_falcons,
+			 NVKM_SECBOOT_FALCON_END)
+		nvkm_info(&sb->subdev, "- %s\n", managed_falcons_names[fid]);
+
+	return 0;
+}
diff --git a/drm/nouveau/nvkm/subdev/secboot/gm200.c b/drm/nouveau/nvkm/subdev/secboot/gm200.c
new file mode 100644
index 000000000..5c90eaa61
--- /dev/null
+++ b/drm/nouveau/nvkm/subdev/secboot/gm200.c
@@ -0,0 +1,1309 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Secure boot is the process by which NVIDIA-signed firmware is loaded into
+ * some of the falcons of a GPU. For production devices this is the only way
+ * for the firmware to access useful (but sensitive) registers.
+ *
+ * A Falcon microprocessor supporting advanced security modes can run in one of
+ * three modes:
+ *
+ * - Non-secure (NS). In this mode, functionality is similar to Falcon
+ *   architectures before security modes were introduced (pre-Maxwell), but
+ *   capability is restricted. In particular, certain registers may be
+ *   inaccessible for reads and/or writes, and physical memory access may be
+ *   disabled (on certain Falcon instances). This is the only possible mode that
+ *   can be used if you don't have microcode cryptographically signed by NVIDIA.
+ *
+ * - Heavy Secure (HS). In this mode, the microprocessor is a black box - it's
+ *   not possible to read or write any Falcon internal state or Falcon registers
+ *   from outside the Falcon (for example, from the host system). The only way
+ *   to enable this mode is by loading microcode that has been signed by NVIDIA.
+ *   (The loading process involves tagging the IMEM block as secure, writing the
+ *   signature into a Falcon register, and starting execution. The hardware will
+ *   validate the signature, and if valid, grant HS privileges.)
+ *
+ * - Light Secure (LS). In this mode, the microprocessor has more privileges
+ *   than NS but fewer than HS. Some of the microprocessor state is visible to
+ *   host software to ease debugging. The only way to enable this mode is by HS
+ *   microcode enabling LS mode. Some privileges available to HS mode are not
+ *   available here. LS mode is introduced in GM20x.
+ *
+ * Secure boot consists in temporarily switchin a HS-capable falcon (typically
+ * PMU) into HS mode in order to validate the LS firmware of managed falcons,
+ * load it, and switch managed falcons into LS mode. Once secure boot completes,
+ * no falcon remains in HS mode.
+ *
+ * Secure boot requires a write-protected memory region (WPR) which can only be
+ * written by the secure falcon. On dGPU, the driver sets up the WPR region in
+ * video memory. On Tegra, it is set up by the bootloader and its location and
+ * size written into memory controller registers.
+ *
+ * The secure boot process takes place as follows:
+ *
+ * 1) A LS blob is constructed that contains all the LS firmwares we want to
+ *    load, along with their signatures and bootloaders.
+ *
+ * 2) A HS blob (also called ACR) is created that contains the signed HS
+ *    firmware in charge of loading the LS firmwares into their respective
+ *    falcons.
+ *
+ * 3) The HS blob is loaded (via its own bootloader) and executed on the
+ *    HS-capable falcon. It authenticates itself, switches the secure falcon to
+ *    HS mode and setup the WPR region around the LS blob (dGPU) or copies the
+ *    LS blob into the WPR region (Tegra).
+ *
+ * 4) The LS blob is now secure from all external tampering. The HS falcon
+ *    checks the signatures of the LS firmwares and, if valid, switches the
+ *    managed falcons to LS mode and makes them ready to run the LS firmware.
+ *
+ * 5) The managed falcons remain in LS mode and can be started.
+ *
+ */
+
+#include "priv.h"
+
+#include <core/gpuobj.h>
+#include <core/firmware.h>
+#include <subdev/fb.h>
+
+enum {
+	FALCON_DMAIDX_UCODE		= 0,
+	FALCON_DMAIDX_VIRT		= 1,
+	FALCON_DMAIDX_PHYS_VID		= 2,
+	FALCON_DMAIDX_PHYS_SYS_COH	= 3,
+	FALCON_DMAIDX_PHYS_SYS_NCOH	= 4,
+};
+
+/*
+ *
+ * LS blob structures
+ *
+ */
+
+/**
+ * struct lsf_ucode_desc - LS falcon signatures
+ * @prd_keys:		signature to use when the GPU is in production mode
+ * @dgb_keys:		signature to use when the GPU is in debug mode
+ * @b_prd_present:	whether the production key is present
+ * @b_dgb_present:	whether the debug key is present
+ * @falcon_id:		ID of the falcon the ucode applies to
+ *
+ * Directly loaded from a signature file.
+ */
+struct lsf_ucode_desc {
+	u8  prd_keys[2][16];
+	u8  dbg_keys[2][16];
+	u32 b_prd_present;
+	u32 b_dbg_present;
+	u32 falcon_id;
+};
+
+/**
+ * struct lsf_lsb_header - LS firmware header
+ * @signature:		signature to verify the firmware against
+ * @ucode_off:		offset of the ucode blob in the WPR region. The ucode
+ *                      blob contains the bootloader, code and data of the
+ *                      LS falcon
+ * @ucode_size:		size of the ucode blob, including bootloader
+ * @data_size:		size of the ucode blob data
+ * @bl_code_size:	size of the bootloader code
+ * @bl_imem_off:	offset in imem of the bootloader
+ * @bl_data_off:	offset of the bootloader data in WPR region
+ * @bl_data_size:	size of the bootloader data
+ * @app_code_off:	offset of the app code relative to ucode_off
+ * @app_code_size:	size of the app code
+ * @app_data_off:	offset of the app data relative to ucode_off
+ * @app_data_size:	size of the app data
+ * @flags:		flags for the secure bootloader
+ *
+ * This structure is written into the WPR region for each managed falcon. Each
+ * instance is referenced by the lsb_offset member of the corresponding
+ * lsf_wpr_header.
+ */
+struct lsf_lsb_header {
+	struct lsf_ucode_desc signature;
+	u32 ucode_off;
+	u32 ucode_size;
+	u32 data_size;
+	u32 bl_code_size;
+	u32 bl_imem_off;
+	u32 bl_data_off;
+	u32 bl_data_size;
+	u32 app_code_off;
+	u32 app_code_size;
+	u32 app_data_off;
+	u32 app_data_size;
+	u32 flags;
+#define LSF_FLAG_LOAD_CODE_AT_0		1
+#define LSF_FLAG_DMACTL_REQ_CTX		4
+#define LSF_FLAG_FORCE_PRIV_LOAD	8
+};
+
+/**
+ * struct lsf_wpr_header - LS blob WPR Header
+ * @falcon_id:		LS falcon ID
+ * @lsb_offset:		offset of the lsb_lsf_header in the WPR region
+ * @bootstrap_owner:	secure falcon reponsible for bootstrapping the LS falcon
+ * @lazy_bootstrap:	skip bootstrapping by ACR
+ * @status:		bootstrapping status
+ *
+ * An array of these is written at the beginning of the WPR region, one for
+ * each managed falcon. The array is terminated by an instance which falcon_id
+ * is LSF_FALCON_ID_INVALID.
+ */
+struct lsf_wpr_header {
+	u32  falcon_id;
+	u32  lsb_offset;
+	u32  bootstrap_owner;
+	u32  lazy_bootstrap;
+	u32  status;
+#define LSF_IMAGE_STATUS_NONE				0
+#define LSF_IMAGE_STATUS_COPY				1
+#define LSF_IMAGE_STATUS_VALIDATION_CODE_FAILED		2
+#define LSF_IMAGE_STATUS_VALIDATION_DATA_FAILED		3
+#define LSF_IMAGE_STATUS_VALIDATION_DONE		4
+#define LSF_IMAGE_STATUS_VALIDATION_SKIPPED		5
+#define LSF_IMAGE_STATUS_BOOTSTRAP_READY		6
+};
+
+
+/**
+ * struct ls_ucode_desc - descriptor of firmware image
+ * @descriptor_size:		size of this descriptor
+ * @image_size:			size of the whole image
+ * @bootloader_start_offset:	start offset of the bootloader in ucode image
+ * @bootloader_size:		size of the bootloader
+ * @bootloader_imem_offset:	start off set of the bootloader in IMEM
+ * @bootloader_entry_point:	entry point of the bootloader in IMEM
+ * @app_start_offset:		start offset of the LS firmware
+ * @app_size:			size of the LS firmware's code and data
+ * @app_imem_offset:		offset of the app in IMEM
+ * @app_imem_entry:		entry point of the app in IMEM
+ * @app_dmem_offset:		offset of the data in DMEM
+ * @app_resident_code_offset:	offset of app code from app_start_offset
+ * @app_resident_code_size:	size of the code
+ * @app_resident_data_offset:	offset of data from app_start_offset
+ * @app_resident_data_size:	size of data
+ *
+ * A firmware image contains the code, data, and bootloader of a given LS
+ * falcon in a single blob. This structure describes where everything is.
+ *
+ * This can be generated from a (bootloader, code, data) set if they have
+ * been loaded separately, or come directly from a file. For the later case,
+ * we need to keep the fields that are unused by the code.
+ */
+struct ls_ucode_desc {
+	u32 descriptor_size;
+	u32 image_size;
+	u32 tools_version;
+	u32 app_version;
+	char date[64];
+	u32 bootloader_start_offset;
+	u32 bootloader_size;
+	u32 bootloader_imem_offset;
+	u32 bootloader_entry_point;
+	u32 app_start_offset;
+	u32 app_size;
+	u32 app_imem_offset;
+	u32 app_imem_entry;
+	u32 app_dmem_offset;
+	u32 app_resident_code_offset;
+	u32 app_resident_code_size;
+	u32 app_resident_data_offset;
+	u32 app_resident_data_size;
+	u32 nb_overlays;
+	struct {u32 start; u32 size; } load_ovl[32];
+	u32 compressed;
+};
+
+/**
+ * struct ls_ucode_img - temporary storage for loaded LS firmwares
+ * @node:		to link within lsf_ucode_mgr
+ * @falcon_id:		ID of the falcon this LS firmware is for
+ * @ucode_desc:		loaded or generated map of ucode_data
+ * @ucode_header:	header of the firmware
+ * @ucode_data:		firmware payload (code and data)
+ * @ucode_size:		size in bytes of data in ucode_data
+ * @wpr_header:		WPR header to be written to the LS blob
+ * @lsb_header:		LSB header to be written to the LS blob
+ *
+ * Preparing the WPR LS blob requires information about all the LS firmwares
+ * (size, etc) to be known. This structure contains all the data of one LS
+ * firmware.
+ */
+struct ls_ucode_img {
+	struct list_head node;
+	enum nvkm_secboot_falcon falcon_id;
+
+	struct ls_ucode_desc ucode_desc;
+	u32 *ucode_header;
+	u8 *ucode_data;
+	u32 ucode_size;
+
+	struct lsf_wpr_header wpr_header;
+	struct lsf_lsb_header lsb_header;
+};
+
+/**
+ * struct lsf_ucode_mgr - manager for all LS falcon firmwares
+ * @count:	number of managed LS falcons
+ * @wpr_size:	size of the required WPR region in bytes
+ * @img_list:	linked list of lsf_ucode_img
+ */
+struct ls_ucode_mgr {
+	u16 count;
+	u32 wpr_size;
+	struct list_head img_list;
+};
+
+/*
+ *
+ * HS blob structures
+ *
+ */
+
+/**
+ * struct hs_bin_hdr - header of HS firmware and bootloader files
+ * @bin_magic:		always 0x10de
+ * @bin_ver:		version of the bin format
+ * @bin_size:		entire image size including this header
+ * @header_offset:	offset of the firmware/bootloader header in the file
+ * @data_offset:	offset of the firmware/bootloader payload in the file
+ * @data_size:		size of the payload
+ *
+ * This header is located at the beginning of the HS firmware and HS bootloader
+ * files, to describe where the headers and data can be found.
+ */
+struct hsf_bin_hdr {
+	u32 bin_magic;
+	u32 bin_ver;
+	u32 bin_size;
+	u32 header_offset;
+	u32 data_offset;
+	u32 data_size;
+};
+
+/**
+ * struct hsf_bl_desc - HS firmware bootloader descriptor
+ * @bl_start_tag:		starting tag of bootloader
+ * @bl_desc_dmem_load_off:	DMEM offset of flcn_bl_dmem_desc
+ * @bl_code_off:		offset of code section
+ * @bl_code_size:		size of code section
+ * @bl_data_off:		offset of data section
+ * @bl_data_size:		size of data section
+ *
+ * This structure is embedded in the HS bootloader firmware file at
+ * hs_bin_hdr.header_offset to describe the IMEM and DMEM layout expected by the
+ * HS bootloader.
+ */
+struct hsf_bl_desc {
+	u32 bl_start_tag;
+	u32 bl_desc_dmem_load_off;
+	u32 bl_code_off;
+	u32 bl_code_size;
+	u32 bl_data_off;
+	u32 bl_data_size;
+};
+
+/**
+ * struct hsf_fw_header - HS firmware descriptor
+ * @sig_dbg_offset:	offset of the debug signature
+ * @sig_dbg_size:	size of the debug signature
+ * @sig_prod_offset:	offset of the production signature
+ * @sig_prod_size:	size of the production signature
+ * @patch_loc:		offset of the offset (sic) of where the signature is
+ * @patch_sig:		offset of the offset (sic) to add to sig_*_offset
+ * @hdr_offset:		offset of the load header (see struct hs_load_header)
+ * @hdr_size:		size of above header
+ *
+ * This structure is embedded in the HS firmware image at
+ * hs_bin_hdr.header_offset.
+ */
+struct hsf_fw_header {
+	u32 sig_dbg_offset;
+	u32 sig_dbg_size;
+	u32 sig_prod_offset;
+	u32 sig_prod_size;
+	u32 patch_loc;
+	u32 patch_sig;
+	u32 hdr_offset;
+	u32 hdr_size;
+};
+
+/**
+ * struct hsf_load_header - HS firmware loading header
+ *
+ * Data to be copied as-is into the struct flcn_bl_dmem_desc for the HS firmware
+ */
+struct hsf_load_header {
+	u32 non_sec_code_off;
+	u32 non_sec_code_size;
+	u32 data_dma_base;
+	u32 data_size;
+	u32 reserved;
+	u32 sec_code_off;
+	u32 sec_code_size;
+};
+
+/**
+ * Convenience function to duplicate a firmware file in memory and check that
+ * it has the required minimum size.
+ */
+static void *
+gm200_secboot_load_firmware(struct nvkm_device *device, const char *name,
+		    size_t min_size)
+{
+	const struct firmware *fw;
+	void *blob;
+	int ret;
+
+	ret = nvkm_firmware_get(device, name, &fw);
+	if (ret)
+		return ERR_PTR(ret);
+	if (fw->size < min_size) {
+		nvkm_firmware_put(fw);
+		return ERR_PTR(-EINVAL);
+	}
+	blob = kmemdup(fw->data, fw->size, GFP_KERNEL);
+	nvkm_firmware_put(fw);
+	if (!blob)
+		return ERR_PTR(-ENOMEM);
+
+	return blob;
+}
+
+
+/*
+ * Low-secure blob creation
+ */
+
+#define BL_DESC_BLK_SIZE 256
+/**
+ * Build a ucode image and descriptor from provided bootloader, code and data.
+ *
+ * @bl:		bootloader image, including 16-bytes descriptor
+ * @code:	LS firmware code segment
+ * @data:	LS firmware data segment
+ * @desc:	ucode descriptor to be written
+ *
+ * Return: allocated ucode image with corresponding descriptor information. desc
+ *         is also updated to contain the right offsets within returned image.
+ */
+static void *
+ls_ucode_img_build(const struct firmware *bl, const struct firmware *code,
+		   const struct firmware *data, struct ls_ucode_desc *desc)
+{
+	struct {
+		u32 start_offset;
+		u32 size;
+		u32 imem_offset;
+		u32 entry_point;
+	} *bl_desc;
+	u32 *bl_image;
+	u32 pos = 0;
+	u8 *image;
+
+	bl_desc = (void *)bl->data;
+	bl_image = (void *)(bl_desc + 1);
+
+	desc->bootloader_start_offset = pos;
+	desc->bootloader_size = ALIGN(bl_desc->size, sizeof(u32));
+	desc->bootloader_imem_offset = bl_desc->imem_offset;
+	desc->bootloader_entry_point = bl_desc->entry_point;
+
+	pos = ALIGN(pos + desc->bootloader_size, BL_DESC_BLK_SIZE);
+	desc->app_start_offset = pos;
+	desc->app_size = ALIGN(code->size, BL_DESC_BLK_SIZE) +
+			 ALIGN(data->size, BL_DESC_BLK_SIZE);
+	desc->app_imem_offset = 0;
+	desc->app_imem_entry = 0;
+	desc->app_dmem_offset = 0;
+	desc->app_resident_code_offset = 0;
+	desc->app_resident_code_size = ALIGN(code->size, BL_DESC_BLK_SIZE);
+
+	pos = ALIGN(pos + desc->app_resident_code_size, BL_DESC_BLK_SIZE);
+	desc->app_resident_data_offset = pos - desc->app_start_offset;
+	desc->app_resident_data_size = ALIGN(data->size, BL_DESC_BLK_SIZE);
+
+	desc->image_size = ALIGN(bl_desc->size, BL_DESC_BLK_SIZE) +
+			   desc->app_size;
+
+	image = kzalloc(desc->image_size, GFP_KERNEL);
+	if (!image)
+		return ERR_PTR(-ENOMEM);
+
+	memcpy(image + desc->bootloader_start_offset, bl_image, bl_desc->size);
+	memcpy(image + desc->app_start_offset, code->data, code->size);
+	memcpy(image + desc->app_start_offset + desc->app_resident_data_offset,
+	       data->data, data->size);
+
+	return image;
+}
+
+/**
+ * ls_ucode_img_load_generic() - load and prepare a LS ucode image
+ *
+ * Load the LS microcode, bootloader and signature and pack them into a single
+ * blob. Also generate the corresponding ucode descriptor.
+ */
+static int
+ls_ucode_img_load_generic(struct nvkm_device *device,
+			  struct ls_ucode_img *img, const char *falcon_name,
+			  const u32 falcon_id)
+{
+	const struct firmware *bl, *code, *data;
+	struct lsf_ucode_desc *lsf_desc;
+	char f[64];
+	int ret;
+
+	img->ucode_header = NULL;
+
+	snprintf(f, sizeof(f), "%s_bl", falcon_name);
+	ret = nvkm_firmware_get(device, f, &bl);
+	if (ret)
+		goto error;
+
+	snprintf(f, sizeof(f), "%s_inst", falcon_name);
+	ret = nvkm_firmware_get(device, f, &code);
+	if (ret)
+		goto free_bl;
+
+	snprintf(f, sizeof(f), "%s_data", falcon_name);
+	ret = nvkm_firmware_get(device, f, &data);
+	if (ret)
+		goto free_inst;
+
+	img->ucode_data = ls_ucode_img_build(bl, code, data,
+					     &img->ucode_desc);
+	if (IS_ERR(img->ucode_data)) {
+		ret = PTR_ERR(img->ucode_data);
+		goto free_data;
+	}
+	img->ucode_size = img->ucode_desc.image_size;
+
+	snprintf(f, sizeof(f), "%s_sig", falcon_name);
+	lsf_desc = gm200_secboot_load_firmware(device, f, sizeof(*lsf_desc));
+	if (IS_ERR(lsf_desc)) {
+		ret = PTR_ERR(lsf_desc);
+		goto free_image;
+	}
+	/* not needed? the signature should already have the right value */
+	lsf_desc->falcon_id = falcon_id;
+	memcpy(&img->lsb_header.signature, lsf_desc, sizeof(*lsf_desc));
+	img->falcon_id = lsf_desc->falcon_id;
+	kfree(lsf_desc);
+
+	/* success path - only free requested firmware files */
+	goto free_data;
+
+free_image:
+	kfree(img->ucode_data);
+free_data:
+	nvkm_firmware_put(data);
+free_inst:
+	nvkm_firmware_put(code);
+free_bl:
+	nvkm_firmware_put(bl);
+error:
+	return ret;
+}
+
+static int
+ls_ucode_img_load_fecs(struct nvkm_device *device, struct ls_ucode_img *img)
+{
+	return ls_ucode_img_load_generic(device, img, "fecs",
+					 NVKM_SECBOOT_FALCON_FECS);
+}
+
+static int
+ls_ucode_img_load_gpccs(struct nvkm_device *device, struct ls_ucode_img *img)
+{
+	return ls_ucode_img_load_generic(device, img, "gpccs",
+					 NVKM_SECBOOT_FALCON_GPCCS);
+}
+
+/**
+ * ls_ucode_img_populate_bl_desc() - populate a DMEM BL descriptor for LS image
+ * @img:	ucode image to generate against
+ * @desc:	descriptor to populate
+ * @sb:		secure boot state to use for base addresses
+ *
+ * Populate the DMEM BL descriptor with the information contained in a
+ * ls_ucode_desc.
+ *
+ */
+static void
+ls_ucode_img_populate_bl_desc(struct ls_ucode_img *img, u64 wpr_addr,
+			      struct gm200_flcn_bl_desc *desc)
+{
+	struct ls_ucode_desc *pdesc = &img->ucode_desc;
+	u64 addr_base;
+
+	addr_base = wpr_addr + img->lsb_header.ucode_off +
+		    pdesc->app_start_offset;
+
+	memset(desc, 0, sizeof(*desc));
+	desc->ctx_dma = FALCON_DMAIDX_UCODE;
+	desc->code_dma_base.lo = lower_32_bits(
+		(addr_base + pdesc->app_resident_code_offset));
+	desc->code_dma_base.hi = upper_32_bits(
+		(addr_base + pdesc->app_resident_code_offset));
+	desc->non_sec_code_size = pdesc->app_resident_code_size;
+	desc->data_dma_base.lo = lower_32_bits(
+		(addr_base + pdesc->app_resident_data_offset));
+	desc->data_dma_base.hi = upper_32_bits(
+		(addr_base + pdesc->app_resident_data_offset));
+	desc->data_size = pdesc->app_resident_data_size;
+	desc->code_entry_point = pdesc->app_imem_entry;
+}
+
+typedef int (*lsf_load_func)(struct nvkm_device *, struct ls_ucode_img *);
+
+/**
+ * ls_ucode_img_load() - create a lsf_ucode_img and load it
+ */
+static struct ls_ucode_img *
+ls_ucode_img_load(struct nvkm_device *device, lsf_load_func load_func)
+{
+	struct ls_ucode_img *img;
+	int ret;
+
+	img = kzalloc(sizeof(*img), GFP_KERNEL);
+	if (!img)
+		return ERR_PTR(-ENOMEM);
+
+	ret = load_func(device, img);
+	if (ret) {
+		kfree(img);
+		return ERR_PTR(ret);
+	}
+
+	return img;
+}
+
+static const lsf_load_func lsf_load_funcs[] = {
+	[NVKM_SECBOOT_FALCON_END] = NULL, /* reserve enough space */
+	[NVKM_SECBOOT_FALCON_FECS] = ls_ucode_img_load_fecs,
+	[NVKM_SECBOOT_FALCON_GPCCS] = ls_ucode_img_load_gpccs,
+};
+
+#define LSF_LSB_HEADER_ALIGN 256
+#define LSF_BL_DATA_ALIGN 256
+#define LSF_BL_DATA_SIZE_ALIGN 256
+#define LSF_BL_CODE_SIZE_ALIGN 256
+#define LSF_UCODE_DATA_ALIGN 4096
+
+/**
+ * ls_ucode_img_fill_headers - fill the WPR and LSB headers of an image
+ * @gsb:	secure boot device used
+ * @img:	image to generate for
+ * @offset:	offset in the WPR region where this image starts
+ *
+ * Allocate space in the WPR area from offset and write the WPR and LSB headers
+ * accordingly.
+ *
+ * Return: offset at the end of this image.
+ */
+static u32
+ls_ucode_img_fill_headers(struct gm200_secboot *gsb, struct ls_ucode_img *img,
+			  u32 offset)
+{
+	struct lsf_wpr_header *whdr = &img->wpr_header;
+	struct lsf_lsb_header *lhdr = &img->lsb_header;
+	struct ls_ucode_desc *desc = &img->ucode_desc;
+
+	if (img->ucode_header) {
+		nvkm_fatal(&gsb->base.subdev,
+			    "images withough loader are not supported yet!\n");
+		return offset;
+	}
+
+	/* Fill WPR header */
+	whdr->falcon_id = img->falcon_id;
+	whdr->bootstrap_owner = gsb->base.func->boot_falcon;
+	whdr->status = LSF_IMAGE_STATUS_COPY;
+
+	/* Align, save off, and include an LSB header size */
+	offset = ALIGN(offset, LSF_LSB_HEADER_ALIGN);
+	whdr->lsb_offset = offset;
+	offset += sizeof(struct lsf_lsb_header);
+
+	/*
+	 * Align, save off, and include the original (static) ucode
+	 * image size
+	 */
+	offset = ALIGN(offset, LSF_UCODE_DATA_ALIGN);
+	lhdr->ucode_off = offset;
+	offset += img->ucode_size;
+
+	/*
+	 * For falcons that use a boot loader (BL), we append a loader
+	 * desc structure on the end of the ucode image and consider
+	 * this the boot loader data. The host will then copy the loader
+	 * desc args to this space within the WPR region (before locking
+	 * down) and the HS bin will then copy them to DMEM 0 for the
+	 * loader.
+	 */
+	lhdr->bl_code_size = ALIGN(desc->bootloader_size,
+				   LSF_BL_CODE_SIZE_ALIGN);
+	lhdr->ucode_size = ALIGN(desc->app_resident_data_offset,
+				 LSF_BL_CODE_SIZE_ALIGN) + lhdr->bl_code_size;
+	lhdr->data_size = ALIGN(desc->app_size, LSF_BL_CODE_SIZE_ALIGN) +
+				lhdr->bl_code_size - lhdr->ucode_size;
+	/*
+	 * Though the BL is located at 0th offset of the image, the VA
+	 * is different to make sure that it doesn't collide the actual
+	 * OS VA range
+	 */
+	lhdr->bl_imem_off = desc->bootloader_imem_offset;
+	lhdr->app_code_off = desc->app_start_offset +
+			     desc->app_resident_code_offset;
+	lhdr->app_code_size = desc->app_resident_code_size;
+	lhdr->app_data_off = desc->app_start_offset +
+			     desc->app_resident_data_offset;
+	lhdr->app_data_size = desc->app_resident_data_size;
+
+	lhdr->flags = 0;
+	if (img->falcon_id == gsb->base.func->boot_falcon)
+		lhdr->flags = LSF_FLAG_DMACTL_REQ_CTX;
+
+	/* GPCCS will be loaded using PRI */
+	if (img->falcon_id == NVKM_SECBOOT_FALCON_GPCCS)
+		lhdr->flags |= LSF_FLAG_FORCE_PRIV_LOAD;
+
+	/* Align (size bloat) and save off BL descriptor size */
+	lhdr->bl_data_size = ALIGN(sizeof(struct gm200_flcn_bl_desc),
+				   LSF_BL_DATA_SIZE_ALIGN);
+	/*
+	 * Align, save off, and include the additional BL data
+	 */
+	offset = ALIGN(offset, LSF_BL_DATA_ALIGN);
+	lhdr->bl_data_off = offset;
+	offset += lhdr->bl_data_size;
+
+	return offset;
+}
+
+static void
+ls_ucode_mgr_init(struct ls_ucode_mgr *mgr)
+{
+	memset(mgr, 0, sizeof(*mgr));
+	INIT_LIST_HEAD(&mgr->img_list);
+}
+
+static void
+ls_ucode_mgr_cleanup(struct ls_ucode_mgr *mgr)
+{
+	struct ls_ucode_img *img, *t;
+
+	list_for_each_entry_safe(img, t, &mgr->img_list, node) {
+		kfree(img->ucode_data);
+		kfree(img->ucode_header);
+		kfree(img);
+	}
+}
+
+static void
+ls_ucode_mgr_add_img(struct ls_ucode_mgr *mgr, struct ls_ucode_img *img)
+{
+	mgr->count++;
+	list_add_tail(&img->node, &mgr->img_list);
+}
+
+/**
+ * ls_ucode_mgr_fill_headers - fill WPR and LSB headers of all managed images
+ */
+static void
+ls_ucode_mgr_fill_headers(struct gm200_secboot *gsb, struct ls_ucode_mgr *mgr)
+{
+	struct ls_ucode_img *img;
+	u32 offset;
+
+	/*
+	 * Start with an array of WPR headers at the base of the WPR.
+	 * The expectation here is that the secure falcon will do a single DMA
+	 * read of this array and cache it internally so it's ok to pack these.
+	 * Also, we add 1 to the falcon count to indicate the end of the array.
+	 */
+	offset = sizeof(struct lsf_wpr_header) * (mgr->count + 1);
+
+	/*
+	 * Walk the managed falcons, accounting for the LSB structs
+	 * as well as the ucode images.
+	 */
+	list_for_each_entry(img, &mgr->img_list, node) {
+		offset = ls_ucode_img_fill_headers(gsb, img, offset);
+	}
+
+	mgr->wpr_size = offset;
+}
+
+/**
+ * ls_ucode_mgr_write_wpr - write the WPR blob contents
+ */
+static int
+ls_ucode_mgr_write_wpr(struct gm200_secboot *gsb, struct ls_ucode_mgr *mgr,
+		       struct nvkm_gpuobj *wpr_blob)
+{
+	struct ls_ucode_img *img;
+	u32 pos = 0;
+
+	nvkm_kmap(wpr_blob);
+
+	list_for_each_entry(img, &mgr->img_list, node) {
+		nvkm_gpuobj_memcpy_to(wpr_blob, pos, &img->wpr_header,
+				      sizeof(img->wpr_header));
+
+		nvkm_gpuobj_memcpy_to(wpr_blob, img->wpr_header.lsb_offset,
+				     &img->lsb_header, sizeof(img->lsb_header));
+
+		/* Generate and write BL descriptor */
+		if (!img->ucode_header) {
+			u8 desc[gsb->bl_desc_size];
+			struct gm200_flcn_bl_desc gdesc;
+
+			ls_ucode_img_populate_bl_desc(img, gsb->wpr_addr,
+						      &gdesc);
+			gsb->fixup_bl_desc(&gdesc, &desc);
+			nvkm_gpuobj_memcpy_to(wpr_blob,
+					      img->lsb_header.bl_data_off,
+					      &desc, gsb->bl_desc_size);
+		}
+
+		/* Copy ucode */
+		nvkm_gpuobj_memcpy_to(wpr_blob, img->lsb_header.ucode_off,
+				      img->ucode_data, img->ucode_size);
+
+		pos += sizeof(img->wpr_header);
+	}
+
+	nvkm_wo32(wpr_blob, pos, NVKM_SECBOOT_FALCON_INVALID);
+
+	nvkm_done(wpr_blob);
+
+	return 0;
+}
+
+/* Both size and address of WPR need to be 128K-aligned */
+#define WPR_ALIGNMENT	0x20000
+/**
+ * gm200_secboot_prepare_ls_blob() - prepare the LS blob
+ *
+ * For each securely managed falcon, load the FW, signatures and bootloaders and
+ * prepare a ucode blob. Then, compute the offsets in the WPR region for each
+ * blob, and finally write the headers and ucode blobs into a GPU object that
+ * will be copied into the WPR region by the HS firmware.
+ */
+static int
+gm200_secboot_prepare_ls_blob(struct gm200_secboot *gsb)
+{
+	struct nvkm_secboot *sb = &gsb->base;
+	struct nvkm_device *device = sb->subdev.device;
+	struct ls_ucode_mgr mgr;
+	int falcon_id;
+	int ret;
+
+	ls_ucode_mgr_init(&mgr);
+
+	/* Load all LS blobs */
+	for_each_set_bit(falcon_id, &gsb->base.func->managed_falcons,
+			 NVKM_SECBOOT_FALCON_END) {
+		struct ls_ucode_img *img;
+
+		img = ls_ucode_img_load(device, lsf_load_funcs[falcon_id]);
+
+		if (IS_ERR(img)) {
+			ret = PTR_ERR(img);
+			goto cleanup;
+		}
+		ls_ucode_mgr_add_img(&mgr, img);
+	}
+
+	/*
+	 * Fill the WPR and LSF headers with the right offsets and compute
+	 * required WPR size
+	 */
+	ls_ucode_mgr_fill_headers(gsb, &mgr);
+	mgr.wpr_size = ALIGN(mgr.wpr_size, WPR_ALIGNMENT);
+
+	/* Allocate GPU object that will contain the WPR region */
+	ret = nvkm_gpuobj_new(device, mgr.wpr_size, WPR_ALIGNMENT, false, NULL,
+			      &gsb->ls_blob);
+	if (ret)
+		goto cleanup;
+
+	nvkm_debug(&sb->subdev, "%d managed LS falcons, WPR size is %d bytes\n",
+		    mgr.count, mgr.wpr_size);
+
+	/* If WPR address and size are not fixed, set them to fit the LS blob */
+	if (!gsb->wpr_size) {
+		gsb->wpr_addr = gsb->ls_blob->addr;
+		gsb->wpr_size = gsb->ls_blob->size;
+	}
+
+	/* Write LS blob */
+	ret = ls_ucode_mgr_write_wpr(gsb, &mgr, gsb->ls_blob);
+
+cleanup:
+	ls_ucode_mgr_cleanup(&mgr);
+
+	return ret;
+}
+
+/*
+ * High-secure blob creation
+ */
+
+/**
+ * gm200_secboot_hsf_patch_signature() - patch HS blob with correct signature
+ */
+static void
+gm200_secboot_hsf_patch_signature(struct gm200_secboot *gsb, void *acr_image)
+{
+	struct nvkm_secboot *sb = &gsb->base;
+	struct hsf_bin_hdr *hsbin_hdr = acr_image;
+	struct hsf_fw_header *fw_hdr = acr_image + hsbin_hdr->header_offset;
+	void *hs_data = acr_image + hsbin_hdr->data_offset;
+	u32 patch_loc;
+	u32 patch_sig;
+	void *sig;
+	u32 sig_size;
+
+	patch_loc = *(u32 *)(acr_image + fw_hdr->patch_loc);
+	patch_sig = *(u32 *)(acr_image + fw_hdr->patch_sig);
+
+	/* Falcon in debug or production mode? */
+	if ((nvkm_rd32(sb->subdev.device, sb->base + 0xc08) >> 20) & 0x1) {
+		sig = acr_image + fw_hdr->sig_dbg_offset;
+		sig_size = fw_hdr->sig_dbg_size;
+	} else {
+		sig = acr_image + fw_hdr->sig_prod_offset;
+		sig_size = fw_hdr->sig_prod_size;
+	}
+
+	/* Patch signature */
+	memcpy(hs_data + patch_loc, sig + patch_sig, sig_size);
+}
+
+static void
+gm200_secboot_fixup_hs_desc(struct gm200_secboot *gsb,
+			    struct hsflcn_acr_desc *desc)
+{
+	desc->ucode_blob_base = gsb->ls_blob->addr;
+	desc->ucode_blob_size = gsb->ls_blob->size;
+
+	desc->wpr_offset = 0;
+
+	/* WPR region information for the HS binary to set up */
+	desc->wpr_region_id = 1;
+	desc->regions.no_regions = 1;
+	desc->regions.region_props[0].region_id = 1;
+	desc->regions.region_props[0].start_addr = gsb->wpr_addr >> 8;
+	desc->regions.region_props[0].end_addr =
+		(gsb->wpr_addr + gsb->wpr_size) >> 8;
+}
+
+/**
+ * gm200_secboot_populate_hsf_bl_desc() - populate BL descriptor for HS image
+ */
+static void
+gm200_secboot_populate_hsf_bl_desc(void *acr_image,
+				   struct gm200_flcn_bl_desc *bl_desc)
+{
+	struct hsf_bin_hdr *hsbin_hdr = acr_image;
+	struct hsf_fw_header *fw_hdr = acr_image + hsbin_hdr->header_offset;
+	struct hsf_load_header *load_hdr = acr_image + fw_hdr->hdr_offset;
+
+	/*
+	 * Descriptor for the bootloader that will load the ACR image into
+	 * IMEM/DMEM memory.
+	 */
+	fw_hdr = acr_image + hsbin_hdr->header_offset;
+	load_hdr = acr_image + fw_hdr->hdr_offset;
+	memset(bl_desc, 0, sizeof(*bl_desc));
+	bl_desc->ctx_dma = FALCON_DMAIDX_VIRT;
+	bl_desc->non_sec_code_off = load_hdr->non_sec_code_off;
+	bl_desc->non_sec_code_size = load_hdr->non_sec_code_size;
+	bl_desc->sec_code_off = load_hdr->sec_code_off;
+	bl_desc->sec_code_size = load_hdr->sec_code_size;
+	bl_desc->code_entry_point = 0;
+	/*
+	 * We need to set code_dma_base to the virtual address of the acr_blob,
+	 * and add this address to data_dma_base before writing it into DMEM
+	 */
+	bl_desc->code_dma_base.lo = 0;
+	bl_desc->data_dma_base.lo = load_hdr->data_dma_base;
+	bl_desc->data_size = load_hdr->data_size;
+}
+
+static int
+gm200_secboot_prepare_hs_blob(struct gm200_secboot *gsb)
+{
+	struct nvkm_device *device = gsb->base.subdev.device;
+	void *acr_image;
+	struct hsf_bin_hdr *hsbin_hdr;
+	struct hsf_fw_header *fw_hdr;
+	struct hsf_load_header *load_hdr;
+	struct hsflcn_acr_desc *desc;
+	u32 img_size;
+	int ret;
+
+	acr_image = gm200_secboot_load_firmware(device, "acr_ucode_load", 0);
+	if (IS_ERR(acr_image))
+		return PTR_ERR(acr_image);
+	hsbin_hdr = acr_image;
+
+	/* Patch signature */
+	gm200_secboot_hsf_patch_signature(gsb, acr_image);
+
+	/* Patch descriptor */
+	fw_hdr = acr_image + hsbin_hdr->header_offset;
+	load_hdr = acr_image + fw_hdr->hdr_offset;
+	desc = acr_image + hsbin_hdr->data_offset + load_hdr->data_dma_base;
+	gsb->fixup_hs_desc(gsb, desc);
+
+	/* Generate HS BL descriptor */
+	gm200_secboot_populate_hsf_bl_desc(acr_image, &gsb->acr_bl_desc);
+
+	/* Create ACR blob and copy HS data to it */
+	img_size = ALIGN(hsbin_hdr->data_size, 256);
+	ret = nvkm_gpuobj_new(device, img_size, 0x1000, false, NULL,
+			      &gsb->acr_blob);
+	if (ret)
+		goto cleanup;
+
+	nvkm_kmap(gsb->acr_blob);
+	nvkm_gpuobj_memcpy_to(gsb->acr_blob, 0,
+			      acr_image + hsbin_hdr->data_offset, img_size);
+	nvkm_done(gsb->acr_blob);
+
+cleanup:
+	kfree(acr_image);
+
+	return ret;
+}
+
+/*
+ * High-secure bootloader blob creation
+ */
+
+static int
+gm200_secboot_prepare_hsbl_blob(struct gm200_secboot *gsb)
+{
+	struct nvkm_device *device = gsb->base.subdev.device;
+
+	gsb->hsbl_blob = gm200_secboot_load_firmware(device, "acr_bl", 0);
+	if (IS_ERR(gsb->hsbl_blob)) {
+		int ret = PTR_ERR(gsb->hsbl_blob);
+
+		gsb->hsbl_blob = NULL;
+		return ret;
+	}
+
+	return 0;
+}
+
+int
+gm200_secboot_prepare_blobs(struct nvkm_secboot *sb)
+{
+	struct gm200_secboot *gsb = gm200_secboot(sb);
+	int ret;
+
+	/* Load and prepare the managed falcon's firmwares */
+	ret = gm200_secboot_prepare_ls_blob(gsb);
+	if (ret)
+		return ret;
+
+	/* Load the HS firmware for the performing falcon */
+	ret = gm200_secboot_prepare_hs_blob(gsb);
+	if (ret)
+		return ret;
+
+	/* Load the HS firmware bootloader */
+	ret = gm200_secboot_prepare_hsbl_blob(gsb);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+
+
+/*
+ * Secure Boot Execution
+ */
+
+/**
+ * gm200_secboot_load_hs_bl() - load HS bootloader into DMEM and IMEM
+ */
+static void
+gm200_secboot_load_hs_bl(struct nvkm_secboot *sb, void *data, u32 data_size)
+{
+	struct nvkm_device *device = sb->subdev.device;
+	struct gm200_secboot *gsb = gm200_secboot(sb);
+	struct hsf_bin_hdr *hdr = gsb->hsbl_blob;
+	struct hsf_bl_desc *hsbl_desc = gsb->hsbl_blob + hdr->header_offset;
+	void *hsbl_code = gsb->hsbl_blob + hdr->data_offset;
+	u32 code_size = ALIGN(hsbl_desc->bl_code_size, 256);
+	u32 blk;
+	u32 tag;
+	int i;
+
+	/*
+	 * Copy HS bootloader interface structure where the HS descriptor
+	 * expects it to be
+	 */
+	nvkm_wr32(device, sb->base + 0x1c0,
+		  (hsbl_desc->bl_desc_dmem_load_off | (0x1 << 24)));
+	for (i = 0; i < data_size / 4; i++)
+		nvkm_wr32(device, sb->base + 0x1c4, ((u32 *)data)[i]);
+
+	/* Copy HS bootloader code to end of IMEM */
+	blk = (nvkm_rd32(device, sb->base + 0x108) & 0x1ff) - (code_size >> 8);
+	tag = hsbl_desc->bl_start_tag;
+	nvkm_wr32(device, sb->base + 0x180, ((blk & 0xff) << 8) | (0x1 << 24));
+	for (i = 0; i < code_size / 4; i++) {
+		/* write new tag every 256B */
+		if ((i & 0x3f) == 0) {
+			nvkm_wr32(device, sb->base + 0x188, tag & 0xffff);
+			tag++;
+		}
+		nvkm_wr32(device, sb->base + 0x184, ((u32 *)hsbl_code)[i]);
+	}
+	nvkm_wr32(device, sb->base + 0x188, 0);
+}
+
+/**
+ * gm200_secboot_setup_falcon() - set up the secure falcon for secure boot
+ */
+static int
+gm200_secboot_setup_falcon(struct nvkm_secboot *sb)
+{
+	struct nvkm_device *device = sb->subdev.device;
+	struct gm200_secboot *gsb = gm200_secboot(sb);
+	struct hsf_bin_hdr *hdr = gsb->hsbl_blob;
+	struct hsf_bl_desc *hsbl_desc = gsb->hsbl_blob + hdr->header_offset;
+	/* virtual start address for boot vector */
+	u32 virt_addr = hsbl_desc->bl_start_tag << 8;
+	const u32 reg_base = sb->base + 0xe00;
+	u32 inst_loc;
+	int ret;
+
+	ret = nvkm_secboot_falcon_reset(sb);
+	if (ret)
+		return ret;
+
+	/* setup apertures - virtual */
+	nvkm_wr32(device, reg_base + 4 * (FALCON_DMAIDX_UCODE), 0x4);
+	nvkm_wr32(device, reg_base + 4 * (FALCON_DMAIDX_VIRT), 0x0);
+	/* setup apertures - physical */
+	nvkm_wr32(device, reg_base + 4 * (FALCON_DMAIDX_PHYS_VID), 0x4);
+	nvkm_wr32(device, reg_base + 4 * (FALCON_DMAIDX_PHYS_SYS_COH),
+		  0x4 | 0x1);
+	nvkm_wr32(device, reg_base + 4 * (FALCON_DMAIDX_PHYS_SYS_NCOH),
+		  0x4 | 0x2);
+
+	/* Set context */
+	if (device->fb->ram)
+		inst_loc = 0x0; /* FB */
+	else
+		inst_loc = 0x3; /* Non-coherent sysmem */
+
+	nvkm_mask(device, sb->base + 0x048, 0x1, 0x1);
+	nvkm_wr32(device, sb->base + 0x480,
+		  ((gsb->inst->addr >> 12) & 0xfffffff) |
+		  (inst_loc << 28) | (1 << 30));
+
+	/* Set boot vector to code's starting virtual address */
+	nvkm_wr32(device, sb->base + 0x104, virt_addr);
+
+	return 0;
+}
+
+/*
+ * gm200_secboot_run() - execute secure boot from the prepared state
+ *
+ * Load the HS bootloader and ask the falcon to run it. This will in turn
+ * load the HS firmware and run it, so once the falcon stops all the managed
+ * falcons should have their LS firmware loaded and be ready to run.
+ */
+int
+gm200_secboot_run(struct nvkm_secboot *sb)
+{
+	struct gm200_secboot *gsb = gm200_secboot(sb);
+	struct gm200_flcn_bl_desc *desc = &gsb->acr_bl_desc;
+	struct nvkm_vma *vma = &gsb->acr_blob_vma;
+	u64 vma_addr;
+	u8 bl_desc[gsb->bl_desc_size];
+	int ret;
+
+	/* Map the HS firmware so the HS bootloader can see it */
+	ret = nvkm_gpuobj_map(gsb->acr_blob, gsb->vm, NV_MEM_ACCESS_RW,
+			      &gsb->acr_blob_vma);
+	if (ret)
+		return ret;
+
+	/* Add the mapping address to the DMA bases */
+	vma_addr = flcn64_to_u64(desc->code_dma_base) + vma->offset;
+	desc->code_dma_base.lo = lower_32_bits(vma_addr);
+	desc->code_dma_base.hi = upper_32_bits(vma_addr);
+	vma_addr = flcn64_to_u64(desc->data_dma_base) + vma->offset;
+	desc->data_dma_base.lo = lower_32_bits(vma_addr);
+	desc->data_dma_base.hi = upper_32_bits(vma_addr);
+
+	/* Fixup the BL header */
+	gsb->fixup_bl_desc(&gsb->acr_bl_desc, &bl_desc);
+
+	/* Reset the falcon and make it ready to run the HS bootloader */
+	ret = gm200_secboot_setup_falcon(sb);
+	if (ret)
+		goto done;
+
+	/* Load the HS bootloader into the falcon's IMEM/DMEM */
+	gm200_secboot_load_hs_bl(sb, &bl_desc, gsb->bl_desc_size);
+
+	/* Start the HS bootloader */
+	ret = nvkm_secboot_falcon_run(sb);
+	if (ret)
+		goto done;
+
+done:
+	/* Restore the original DMA addresses */
+	vma_addr = flcn64_to_u64(desc->code_dma_base) - vma->offset;
+	desc->code_dma_base.lo = lower_32_bits(vma_addr);
+	desc->code_dma_base.hi = upper_32_bits(vma_addr);
+	vma_addr = flcn64_to_u64(desc->data_dma_base) - vma->offset;
+	desc->data_dma_base.lo = lower_32_bits(vma_addr);
+	desc->data_dma_base.hi = upper_32_bits(vma_addr);
+
+	/* We don't need the ACR firmware anymore */
+	nvkm_gpuobj_unmap(&gsb->acr_blob_vma);
+
+	return ret;
+}
+
+
+
+int
+gm200_secboot_init(struct nvkm_secboot *sb)
+{
+	struct gm200_secboot *gsb = gm200_secboot(sb);
+	struct nvkm_device *device = sb->subdev.device;
+	struct nvkm_vm *vm;
+	const u64 vm_area_len = 600 * 1024;
+	int ret;
+
+	/* Allocate instance block and VM */
+	ret = nvkm_gpuobj_new(device, 0x1000, 0, true, NULL, &gsb->inst);
+	if (ret)
+		return ret;
+
+	ret = nvkm_gpuobj_new(device, 0x8000, 0, true, NULL, &gsb->pgd);
+	if (ret)
+		return ret;
+
+	ret = nvkm_vm_new(device, 0, vm_area_len, 0, NULL, &vm);
+	if (ret)
+		return ret;
+
+	atomic_inc(&vm->engref[NVKM_SUBDEV_PMU]);
+
+	ret = nvkm_vm_ref(vm, &gsb->vm, gsb->pgd);
+	nvkm_vm_ref(NULL, &vm, NULL);
+	if (ret)
+		return ret;
+
+	nvkm_kmap(gsb->inst);
+	nvkm_wo32(gsb->inst, 0x200, lower_32_bits(gsb->pgd->addr));
+	nvkm_wo32(gsb->inst, 0x204, upper_32_bits(gsb->pgd->addr));
+	nvkm_wo32(gsb->inst, 0x208, lower_32_bits(vm_area_len - 1));
+	nvkm_wo32(gsb->inst, 0x20c, upper_32_bits(vm_area_len - 1));
+	nvkm_done(gsb->inst);
+
+	return 0;
+}
+
+void *
+gm200_secboot_dtor(struct nvkm_secboot *sb)
+{
+	struct gm200_secboot *gsb = gm200_secboot(sb);
+
+	kfree(gsb->hsbl_blob);
+	nvkm_gpuobj_del(&gsb->acr_blob);
+	nvkm_gpuobj_del(&gsb->ls_blob);
+
+	nvkm_vm_ref(NULL, &gsb->vm, gsb->pgd);
+	nvkm_gpuobj_del(&gsb->pgd);
+	nvkm_gpuobj_del(&gsb->inst);
+
+	return gsb;
+}
+
+
+static const struct nvkm_secboot_func
+gm200_secboot = {
+	.dtor = gm200_secboot_dtor,
+	.init = gm200_secboot_init,
+	.prepare_blobs = gm200_secboot_prepare_blobs,
+	.run = gm200_secboot_run,
+	.managed_falcons = BIT(NVKM_SECBOOT_FALCON_FECS) |
+			   BIT(NVKM_SECBOOT_FALCON_GPCCS),
+	.boot_falcon = NVKM_SECBOOT_FALCON_PMU,
+};
+
+/**
+ * gm200_fixup_bl_desc - just copy the BL descriptor
+ *
+ * Use the GM200 descriptor format by default.
+ */
+static void
+gm200_fixup_bl_desc(const struct gm200_flcn_bl_desc *desc, void *ret)
+{
+	memcpy(ret, desc, sizeof(*desc));
+}
+
+int
+gm200_secboot_new(struct nvkm_device *device, int index,
+		  struct nvkm_secboot **psb)
+{
+	int ret;
+	struct gm200_secboot *gsb;
+
+	gsb = kzalloc(sizeof(*gsb), GFP_KERNEL);
+	if (!gsb) {
+		psb = NULL;
+		return -ENOMEM;
+	}
+	*psb = &gsb->base;
+
+	ret = nvkm_secboot_ctor(&gm200_secboot, device, index, &gsb->base);
+	if (ret)
+		return ret;
+
+	gsb->bl_desc_size = sizeof(struct gm200_flcn_bl_desc);
+	gsb->fixup_bl_desc = gm200_fixup_bl_desc;
+	gsb->fixup_hs_desc = gm200_secboot_fixup_hs_desc;
+
+	return 0;
+}
diff --git a/drm/nouveau/nvkm/subdev/secboot/gm20b.c b/drm/nouveau/nvkm/subdev/secboot/gm20b.c
new file mode 100644
index 000000000..d34a53491
--- /dev/null
+++ b/drm/nouveau/nvkm/subdev/secboot/gm20b.c
@@ -0,0 +1,212 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "priv.h"
+
+#include <core/gpuobj.h>
+
+/*
+ * The BL header format used by GM20B's firmware is slightly different
+ * from the one of GM200. Fix the differences here.
+ */
+struct gm20b_flcn_bl_desc {
+	u32 reserved[4];
+	u32 signature[4];
+	u32 ctx_dma;
+	u32 code_dma_base;
+	u32 non_sec_code_off;
+	u32 non_sec_code_size;
+	u32 sec_code_off;
+	u32 sec_code_size;
+	u32 code_entry_point;
+	u32 data_dma_base;
+	u32 data_size;
+};
+
+/**
+ * gm20b_secboot_fixup_bl_desc - adapt BL descriptor to format used by GM20B FW
+ *
+ * There is only a slight format difference (DMA addresses being 32-bits and
+ * 256B-aligned) to address.
+ */
+static void
+gm20b_secboot_fixup_bl_desc(const struct gm200_flcn_bl_desc *desc, void *ret)
+{
+	struct gm20b_flcn_bl_desc *gdesc = ret;
+	u64 addr;
+
+	memcpy(gdesc->reserved, desc->reserved, sizeof(gdesc->reserved));
+	memcpy(gdesc->signature, desc->signature, sizeof(gdesc->signature));
+	gdesc->ctx_dma = desc->ctx_dma;
+	addr = desc->code_dma_base.hi;
+	addr <<= 32;
+	addr |= desc->code_dma_base.lo;
+	gdesc->code_dma_base = lower_32_bits(addr >> 8);
+	gdesc->non_sec_code_off = desc->non_sec_code_off;
+	gdesc->non_sec_code_size = desc->non_sec_code_size;
+	gdesc->sec_code_off = desc->sec_code_off;
+	gdesc->sec_code_size = desc->sec_code_size;
+	gdesc->code_entry_point = desc->code_entry_point;
+	addr = desc->data_dma_base.hi;
+	addr <<= 32;
+	addr |= desc->data_dma_base.lo;
+	gdesc->data_dma_base = lower_32_bits(addr >> 8);
+	gdesc->data_size = desc->data_size;
+}
+
+static void
+gm20b_secboot_fixup_hs_desc(struct gm200_secboot *gsb,
+			    struct hsflcn_acr_desc *desc)
+{
+	desc->ucode_blob_base = gsb->ls_blob->addr;
+	desc->ucode_blob_size = gsb->ls_blob->size;
+
+	desc->wpr_offset = 0;
+}
+
+#ifdef CONFIG_ARCH_TEGRA
+/* TODO Should this be handled by the Tegra MC driver? */
+#define TEGRA_MC_BASE				0x70019000
+#define MC_SECURITY_CARVEOUT2_CFG0		0xc58
+#define MC_SECURITY_CARVEOUT2_BOM_0		0xc5c
+#define MC_SECURITY_CARVEOUT2_BOM_HI_0		0xc60
+#define MC_SECURITY_CARVEOUT2_SIZE_128K		0xc64
+#define TEGRA_MC_SECURITY_CARVEOUT_CFG_LOCKED	(1 << 1)
+/**
+ * sb_tegra_read_wpr() - read the WPR registers on Tegra
+ *
+ * On dGPU, we can manage the WPR region ourselves, but on Tegra the WPR region
+ * is reserved from system memory by the bootloader and irreversibly locked.
+ * This function reads the address and size of the pre-configured WPR region.
+ */
+static int
+gm20b_tegra_read_wpr(struct gm200_secboot *gsb)
+{
+	struct nvkm_secboot *sb = &gsb->base;
+	void __iomem *mc;
+	u32 cfg;
+
+	mc = ioremap(TEGRA_MC_BASE, 0xd00);
+	if (!mc) {
+		nvkm_error(&sb->subdev, "Cannot map Tegra MC registers\n");
+		return PTR_ERR(mc);
+	}
+	gsb->wpr_addr = ioread32_native(mc + MC_SECURITY_CARVEOUT2_BOM_0) |
+	      ((u64)ioread32_native(mc + MC_SECURITY_CARVEOUT2_BOM_HI_0) << 32);
+	gsb->wpr_size = ioread32_native(mc + MC_SECURITY_CARVEOUT2_SIZE_128K)
+		<< 17;
+	cfg = ioread32_native(mc + MC_SECURITY_CARVEOUT2_CFG0);
+	iounmap(mc);
+
+	/* Check that WPR settings are valid */
+	if (gsb->wpr_size == 0) {
+		nvkm_error(&sb->subdev, "WPR region is empty\n");
+		return -EINVAL;
+	}
+
+	if (!(cfg & TEGRA_MC_SECURITY_CARVEOUT_CFG_LOCKED)) {
+		nvkm_error(&sb->subdev, "WPR region not locked\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+#else
+static int
+gm20b_tegra_read_wpr(struct gm200_secboot *gsb)
+{
+	nvkm_error(&gsb->base.subdev, "Tegra support not compiled in\n");
+	return -EINVAL;
+}
+#endif
+
+static int
+gm20b_secboot_prepare_blobs(struct nvkm_secboot *sb)
+{
+	struct gm200_secboot *gsb = gm200_secboot(sb);
+	int ret;
+
+	ret = gm200_secboot_prepare_blobs(sb);
+	if (ret)
+		return ret;
+
+	/*
+	 * On Tegra the WPR region is set by the bootloader. It is illegal for
+	 * the LS blob to be larger than this region.
+	 */
+	if (gsb->acr_blob->size > gsb->wpr_size) {
+		nvkm_error(&sb->subdev, "WPR region too small for FW blob!\n");
+		nvkm_error(&sb->subdev, "required: %dB\n", gsb->acr_blob->size);
+		nvkm_error(&sb->subdev, "WPR size: %dB\n", gsb->wpr_size);
+		return -ENOSPC;
+	}
+
+	return 0;
+}
+
+static int
+gm20b_secboot_init(struct nvkm_secboot *sb)
+{
+	struct gm200_secboot *gsb = gm200_secboot(sb);
+	int ret;
+
+	ret = gm20b_tegra_read_wpr(gsb);
+	if (ret)
+		return ret;
+
+	return gm200_secboot_init(sb);
+}
+
+static const struct nvkm_secboot_func
+gm20b_secboot = {
+	.dtor = gm200_secboot_dtor,
+	.init = gm20b_secboot_init,
+	.prepare_blobs = gm20b_secboot_prepare_blobs,
+	.run = gm200_secboot_run,
+	.managed_falcons = BIT(NVKM_SECBOOT_FALCON_FECS),
+	.boot_falcon = NVKM_SECBOOT_FALCON_PMU,
+};
+
+int
+gm20b_secboot_new(struct nvkm_device *device, int index,
+		  struct nvkm_secboot **psb)
+{
+	int ret;
+	struct gm200_secboot *gsb;
+
+	gsb = kzalloc(sizeof(*gsb), GFP_KERNEL);
+	if (!gsb) {
+		psb = NULL;
+		return -ENOMEM;
+	}
+	*psb = &gsb->base;
+
+	ret = nvkm_secboot_ctor(&gm20b_secboot, device, index, &gsb->base);
+	if (ret)
+		return ret;
+
+	gsb->bl_desc_size = sizeof(struct gm20b_flcn_bl_desc);
+	gsb->fixup_bl_desc = gm20b_secboot_fixup_bl_desc;
+	gsb->fixup_hs_desc = gm20b_secboot_fixup_hs_desc;
+
+	return 0;
+}
diff --git a/drm/nouveau/nvkm/subdev/secboot/priv.h b/drm/nouveau/nvkm/subdev/secboot/priv.h
new file mode 100644
index 000000000..72d79aee7
--- /dev/null
+++ b/drm/nouveau/nvkm/subdev/secboot/priv.h
@@ -0,0 +1,187 @@
+/*
+ * Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __NVKM_SECBOOT_PRIV_H__
+#define __NVKM_SECBOOT_PRIV_H__
+
+#include <subdev/secboot.h>
+#include <subdev/mmu.h>
+
+struct nvkm_secboot_func {
+	int (*init)(struct nvkm_secboot *);
+	void *(*dtor)(struct nvkm_secboot *);
+	int (*prepare_blobs)(struct nvkm_secboot *);
+	int (*run)(struct nvkm_secboot *);
+
+	/* ID of the falcon that will perform secure boot */
+	enum nvkm_secboot_falcon boot_falcon;
+	/* Bit-mask of IDs of managed falcons */
+	unsigned long managed_falcons;
+};
+
+int nvkm_secboot_ctor(const struct nvkm_secboot_func *, struct nvkm_device *,
+		      int index, struct nvkm_secboot *);
+int nvkm_secboot_falcon_reset(struct nvkm_secboot *);
+int nvkm_secboot_falcon_run(struct nvkm_secboot *);
+
+struct flcn_u64 {
+	u32 lo;
+	u32 hi;
+};
+static inline u64 flcn64_to_u64(const struct flcn_u64 f)
+{
+	return ((u64)f.hi) << 32 | f.lo;
+}
+
+/**
+ * struct gm200_flcn_bl_desc - DMEM bootloader descriptor
+ * @signature:		16B signature for secure code. 0s if no secure code
+ * @ctx_dma:		DMA context to be used by BL while loading code/data
+ * @code_dma_base:	256B-aligned Physical FB Address where code is located
+ *			(falcon's $xcbase register)
+ * @non_sec_code_off:	offset from code_dma_base where the non-secure code is
+ *                      located. The offset must be multiple of 256 to help perf
+ * @non_sec_code_size:	the size of the nonSecure code part.
+ * @sec_code_off:	offset from code_dma_base where the secure code is
+ *                      located. The offset must be multiple of 256 to help perf
+ * @sec_code_size:	offset from code_dma_base where the secure code is
+ *                      located. The offset must be multiple of 256 to help perf
+ * @code_entry_point:	code entry point which will be invoked by BL after
+ *                      code is loaded.
+ * @data_dma_base:	256B aligned Physical FB Address where data is located.
+ *			(falcon's $xdbase register)
+ * @data_size:		size of data block. Should be multiple of 256B
+ *
+ * Structure used by the bootloader to load the rest of the code. This has
+ * to be filled by host and copied into DMEM at offset provided in the
+ * hsflcn_bl_desc.bl_desc_dmem_load_off.
+ */
+struct gm200_flcn_bl_desc {
+	u32 reserved[4];
+	u32 signature[4];
+	u32 ctx_dma;
+	struct flcn_u64 code_dma_base;
+	u32 non_sec_code_off;
+	u32 non_sec_code_size;
+	u32 sec_code_off;
+	u32 sec_code_size;
+	u32 code_entry_point;
+	struct flcn_u64 data_dma_base;
+	u32 data_size;
+};
+
+/**
+ * struct hsflcn_acr_desc - data section of the HS firmware
+ *
+ * This header is to be copied at the beginning of DMEM by the HS bootloader.
+ *
+ * @signature:		signature of ACR ucode
+ * @wpr_region_id:	region ID holding the WPR header and its details
+ * @wpr_offset:		offset from the WPR region holding the wpr header
+ * @regions:		region descriptors
+ * @nonwpr_ucode_blob_size:	size of LS blob
+ * @nonwpr_ucode_blob_start:	FB location of LS blob is
+ */
+struct hsflcn_acr_desc {
+	union {
+		u8 reserved_dmem[0x200];
+		u32 signatures[4];
+	} ucode_reserved_space;
+	u32 wpr_region_id;
+	u32 wpr_offset;
+	u32 mmu_mem_range;
+#define FLCN_ACR_MAX_REGIONS 2
+	struct {
+		u32 no_regions;
+		struct {
+			u32 start_addr;
+			u32 end_addr;
+			u32 region_id;
+			u32 read_mask;
+			u32 write_mask;
+			u32 client_mask;
+		} region_props[FLCN_ACR_MAX_REGIONS];
+	} regions;
+	u32 ucode_blob_size;
+	u64 ucode_blob_base __aligned(8);
+	struct {
+		u32 vpr_enabled;
+		u32 vpr_start;
+		u32 vpr_end;
+		u32 hdcp_policies;
+	} vpr_desc;
+};
+
+/**
+ * Contains the whole secure boot state, allowing it to be performed as needed
+ * @wpr_addr:		physical address of the WPR region
+ * @wpr_size:		size in bytes of the WPR region
+ * @ls_blob:		LS blob of all the LS firmwares, signatures, bootloaders
+ * @ls_blob_size:	size of the LS blob
+ * @ls_blob_nb_regions:	number of LS firmwares that will be loaded
+ * @acr_blob:		HS blob
+ * @acr_blob_vma:	mapping of the HS blob into the secure falcon's VM
+ * @acr_bl_desc:	bootloader descriptor of the HS blob
+ * @hsbl_blob:		HS blob bootloader
+ * @inst:		instance block for HS falcon
+ * @pgd:		page directory for the HS falcon
+ * @vm:			address space used by the HS falcon
+ * @bl_desc_size:	size of the BL descriptor used by this chip.
+ * @fixup_bl_desc:	hook that generates the proper BL descriptor format from
+ *			the generic GM200 format into a data array of size
+ *			bl_desc_size
+ */
+struct gm200_secboot {
+	struct nvkm_secboot base;
+
+	u64 wpr_addr;
+	u32 wpr_size;
+	struct nvkm_vma acr_blob_vma;
+
+	/* LS FWs, to be loaded by the HS ACR */
+	struct nvkm_gpuobj *ls_blob;
+
+	/* HS FW */
+	struct nvkm_gpuobj *acr_blob;
+
+	/* HS bootloader */
+	void *hsbl_blob;
+
+	struct gm200_flcn_bl_desc acr_bl_desc;
+
+	/* Instance block & address space */
+	struct nvkm_gpuobj *inst;
+	struct nvkm_gpuobj *pgd;
+	struct nvkm_vm *vm;
+
+	u32 bl_desc_size;
+	void (*fixup_bl_desc)(const struct gm200_flcn_bl_desc *, void *);
+	void (*fixup_hs_desc)(struct gm200_secboot *, struct hsflcn_acr_desc *);
+};
+#define gm200_secboot(sb) container_of(sb, struct gm200_secboot, base)
+
+int gm200_secboot_init(struct nvkm_secboot *);
+void *gm200_secboot_dtor(struct nvkm_secboot *);
+int gm200_secboot_prepare_blobs(struct nvkm_secboot *);
+int gm200_secboot_run(struct nvkm_secboot *);
+
+#endif
diff --git a/drm/nouveau/uapi/drm/nouveau_drm.h b/drm/nouveau/uapi/drm/nouveau_drm.h
index 500d82aec..e82eab478 100644
--- a/drm/nouveau/uapi/drm/nouveau_drm.h
+++ b/drm/nouveau/uapi/drm/nouveau_drm.h
@@ -110,6 +110,21 @@ struct drm_nouveau_gem_pushbuf {
 	__u64 gart_available;
 };
 
+#define NOUVEAU_GEM_PUSHBUF_2_FENCE_WAIT                             0x00000001
+#define NOUVEAU_GEM_PUSHBUF_2_FENCE_EMIT                             0x00000002
+struct drm_nouveau_gem_pushbuf_2 {
+	uint32_t channel;
+	uint32_t flags;
+	uint32_t nr_push;
+	uint32_t nr_buffers;
+	int32_t  fence; /* in/out, depends on flags */
+	uint32_t pad;
+	uint64_t push; /* in raw hw format */
+	uint64_t buffers; /* ptr to drm_nouveau_gem_pushbuf_bo */
+	uint64_t vram_available;
+	uint64_t gart_available;
+};
+
 #define NOUVEAU_GEM_CPU_PREP_NOWAIT                                  0x00000001
 #define NOUVEAU_GEM_CPU_PREP_WRITE                                   0x00000004
 struct drm_nouveau_gem_cpu_prep {
@@ -134,11 +149,15 @@ struct drm_nouveau_gem_cpu_fini {
 #define DRM_NOUVEAU_GEM_CPU_PREP       0x42
 #define DRM_NOUVEAU_GEM_CPU_FINI       0x43
 #define DRM_NOUVEAU_GEM_INFO           0x44
+#define DRM_NOUVEAU_GEM_PUSHBUF_2      0x51
+#define DRM_NOUVEAU_GEM_SET_INFO       0x52
 
 #define DRM_IOCTL_NOUVEAU_GEM_NEW            DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_NEW, struct drm_nouveau_gem_new)
 #define DRM_IOCTL_NOUVEAU_GEM_PUSHBUF        DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_PUSHBUF, struct drm_nouveau_gem_pushbuf)
 #define DRM_IOCTL_NOUVEAU_GEM_CPU_PREP       DRM_IOW (DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_CPU_PREP, struct drm_nouveau_gem_cpu_prep)
 #define DRM_IOCTL_NOUVEAU_GEM_CPU_FINI       DRM_IOW (DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_CPU_FINI, struct drm_nouveau_gem_cpu_fini)
 #define DRM_IOCTL_NOUVEAU_GEM_INFO           DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_INFO, struct drm_nouveau_gem_info)
+#define DRM_IOCTL_NOUVEAU_GEM_PUSHBUF_2      DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_PUSHBUF_2, struct drm_nouveau_gem_pushbuf_2)
+#define DRM_IOCTL_NOUVEAU_GEM_SET_INFO       DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_SET_INFO, struct drm_nouveau_gem_info)
 
 #endif /* __NOUVEAU_DRM_H__ */