diff --git a/TODO b/TODO
index cf7778b7c0d0b8b9640b698190dd0d6c17058cb7..6521c60c179adb9eb0ec95b563d7cd4a83cb0d0f 100644
--- a/TODO
+++ b/TODO
@@ -125,9 +125,6 @@ Once the above are completed:
 
 - general layer cleanup: @par
   https://lists.berlios.de/pipermail/openocd-development/2009-May/006590.html
-- regression: xscale does not place debug_handler.bin into the right spot. workaround:
-  use -s option on command line to place xscale/debug_handler.bin in search path @par
-  https://lists.berlios.de/pipermail/openocd-development/2009-July/009338.html
 - bug: either USBprog is broken with new tms sequence or there is a general
   problem with XScale and the new tms sequence. Workaround: use "tms_sequence long"
   @par
diff --git a/configure.in b/configure.in
index 8e2881cf43beb049931f3f71c82084095c4d2457..84574bec6e36dc0b9f22648c561fa499466150a5 100644
--- a/configure.in
+++ b/configure.in
@@ -16,6 +16,7 @@ AC_LANG_C
 AC_PROG_CC
 AC_PROG_CC_C99
 AM_PROG_CC_C_O
+AM_PROG_AS
 AC_PROG_RANLIB
 
 dnl disable checks for C++, Fortran and GNU Java Compiler
diff --git a/src/target/Makefile.am b/src/target/Makefile.am
index 9eee2f95cdf220634f993bcc6728316484f11de1..86716dca353a3f473ffbbfa2f464dbcf4d4eb94e 100644
--- a/src/target/Makefile.am
+++ b/src/target/Makefile.am
@@ -35,6 +35,7 @@ libtarget_la_SOURCES = \
 	feroceon.c \
 	etb.c \
 	xscale.c \
+	xscale_debug.S \
 	arm_simulator.c \
 	image.c \
 	armv7m.c \
@@ -98,7 +99,6 @@ noinst_HEADERS = \
 	avrt.h
 
 nobase_dist_pkglib_DATA =
-nobase_dist_pkglib_DATA += xscale/debug_handler.bin
 nobase_dist_pkglib_DATA += ecos/at91eb40a.elf
 
 MAINTAINERCLEANFILES = $(srcdir)/Makefile.in
diff --git a/src/target/xscale.c b/src/target/xscale.c
index fca578d320785f5a06c8ee8b3064c75fc1001373..82a2c571e843930403832c44eb16a5a5fa0d79a2 100644
--- a/src/target/xscale.c
+++ b/src/target/xscale.c
@@ -1559,15 +1559,6 @@ static int xscale_deassert_reset(target_t *target)
 {
 	armv4_5_common_t *armv4_5 = target->arch_info;
 	xscale_common_t *xscale = armv4_5->arch_info;
-
-	fileio_t debug_handler;
-	uint32_t address;
-	uint32_t binary_size;
-
-	uint32_t buf_cnt;
-	uint32_t i;
-	int retval;
-
 	breakpoint_t *breakpoint = target->breakpoints;
 
 	LOG_DEBUG("-");
@@ -1592,6 +1583,11 @@ static int xscale_deassert_reset(target_t *target)
 
 	if (!xscale->handler_installed)
 	{
+		uint32_t address;
+		unsigned buf_cnt;
+		const uint8_t *buffer = xscale_debug_handler;
+		int retval;
+
 		/* release SRST */
 		jtag_add_reset(0, 0);
 
@@ -1606,36 +1602,26 @@ static int xscale_deassert_reset(target_t *target)
 		buf_set_u32(xscale->reg_cache->reg_list[XSCALE_DCSR].value, 16, 1, 0x1);
 		xscale_write_dcsr(target, 1, 0);
 
-		/* Load debug handler */
-		if (fileio_open(&debug_handler, "xscale/debug_handler.bin", FILEIO_READ, FILEIO_BINARY) != ERROR_OK)
-		{
-			return ERROR_OK;
-		}
-
-		if ((binary_size = debug_handler.size) % 4)
-		{
-			LOG_ERROR("debug_handler.bin: size not a multiple of 4");
-			exit(-1);
-		}
-
-		if (binary_size > 0x800)
-		{
-			LOG_ERROR("debug_handler.bin: larger than 2kb");
-			exit(-1);
-		}
-
-		binary_size = CEIL(binary_size, 32) * 32;
-
+		/* Load the debug handler into the mini-icache.  Since
+		 * it's using halt mode (not monitor mode), it runs in
+		 * "Special Debug State" for access to registers, memory,
+		 * coprocessors, trace data, etc.
+		 *
+		 * REVISIT:  *assumes* we've had a SRST+TRST reset so the
+		 * mini-icache contents have been invalidated.  Safest to
+		 * force that, so writing new contents is reliable...
+		 */
 		address = xscale->handler_address;
-		while (binary_size > 0)
+		for (unsigned binary_size = xscale_debug_handler_size;
+				binary_size > 0;
+				binary_size -= buf_cnt, buffer += buf_cnt)
 		{
 			uint32_t cache_line[8];
-			uint8_t buffer[32];
-
-			if ((retval = fileio_read(&debug_handler, 32, buffer, &buf_cnt)) != ERROR_OK)
-			{
+			unsigned i;
 
-			}
+			buf_cnt = binary_size;
+			if (buf_cnt > 32)
+				buf_cnt = 32;
 
 			for (i = 0; i < buf_cnt; i += 4)
 			{
@@ -1651,15 +1637,23 @@ static int xscale_deassert_reset(target_t *target)
 			/* only load addresses other than the reset vectors */
 			if ((address % 0x400) != 0x0)
 			{
-				xscale_load_ic(target, address, cache_line);
+				retval = xscale_load_ic(target, address,
+						cache_line);
+				if (retval != ERROR_OK)
+					return retval;
 			}
 
 			address += buf_cnt;
-			binary_size -= buf_cnt;
 		};
 
-		xscale_load_ic(target, 0x0, xscale->low_vectors);
-		xscale_load_ic(target, 0xffff0000, xscale->high_vectors);
+		retval = xscale_load_ic(target, 0x0,
+					xscale->low_vectors);
+		if (retval != ERROR_OK)
+			return retval;
+		retval = xscale_load_ic(target, 0xffff0000,
+					xscale->high_vectors);
+		if (retval != ERROR_OK)
+			return retval;
 
 		jtag_add_runtest(30, jtag_set_end_state(TAP_IDLE));
 
@@ -1685,8 +1679,6 @@ static int xscale_deassert_reset(target_t *target)
 			/* resume the target */
 			xscale_resume(target, 1, 0x0, 1, 0);
 		}
-
-		fileio_close(&debug_handler);
 	}
 	else
 	{
@@ -3056,6 +3048,11 @@ static int xscale_target_create(struct target_s *target, Jim_Interp *interp)
 {
 	xscale_common_t *xscale;
 
+	if (xscale_debug_handler_size > 0x800) {
+		LOG_ERROR("debug_handler.bin: larger than 2kb");
+		return ERROR_FAIL;
+	}
+
 	xscale = calloc(1, sizeof(*xscale));
 	if (!xscale)
 		return ERROR_FAIL;
diff --git a/src/target/xscale.h b/src/target/xscale.h
index a5d83ee675c7a4aafe3385ac3ff1d08d1497e8d9..9d92550a6c7b84a5a5bb1dd71de6900a6941c5c7 100644
--- a/src/target/xscale.h
+++ b/src/target/xscale.h
@@ -170,4 +170,10 @@ enum
 
 #define ERROR_XSCALE_NO_TRACE_DATA	(-1500)
 
+/* This XScale "debug handler" is loaded into the processor's
+ * mini-ICache, which is 2K of code writable only via JTAG.
+ */
+extern const uint8_t xscale_debug_handler[];
+extern const uint32_t xscale_debug_handler_size;
+
 #endif /* XSCALE_H */
diff --git a/src/target/xscale_debug.S b/src/target/xscale_debug.S
new file mode 100644
index 0000000000000000000000000000000000000000..0a7b87d2771555d05c6564d86ea924e769792912
--- /dev/null
+++ b/src/target/xscale_debug.S
@@ -0,0 +1,13 @@
+	.section	.rodata
+
+	.align		4
+	.global		 xscale_debug_handler
+xscale_debug_handler:
+	.incbin		"xscale/debug_handler.bin"
+	.size		xscale_debug_handler, . - xscale_debug_handler
+
+	.align		4
+	.global		 xscale_debug_handler_size
+xscale_debug_handler_size:
+	.word		. - xscale_debug_handler
+	.size		xscale_debug_handler_size, 4