diff --git a/src/jtag/jtag.c b/src/jtag/jtag.c
index d876d8fbdace92e1a0d2f81ce5f527f671b028ca..73acb6a09ecbb47a7658e77fedb5e0d7c3788ab2 100644
--- a/src/jtag/jtag.c
+++ b/src/jtag/jtag.c
@@ -33,10 +33,6 @@
 #include "string.h"
 #include <unistd.h>
 
-#ifndef MINIDRIVER
-/* this allows JTAG devices to implement the entire jtag_xxx() layer in hw/sw */
-#define MINIDRIVER(a) a
-#endif
 
 
 /* note that this is not marked as static as it must be available from outside jtag.c for those 
@@ -411,7 +407,7 @@ int jtag_add_ir_scan(int num_fields, scan_field_t *fields, enum tap_state state)
 	
 	cmd_queue_cur_state = cmd_queue_end_state;
 	
-	int retval=interface_jtag_add_ir_scan(num_fields, fields, state);
+	int retval=interface_jtag_add_ir_scan(num_fields, fields, cmd_queue_end_state);
 	if (retval!=ERROR_OK)
 		jtag_error=retval;
 	return retval;
@@ -513,7 +509,7 @@ int jtag_add_plain_ir_scan(int num_fields, scan_field_t *fields, enum tap_state
 		
 	cmd_queue_cur_state = cmd_queue_end_state;
 	
-	return interface_jtag_add_plain_ir_scan(num_fields, fields, state);
+	return interface_jtag_add_plain_ir_scan(num_fields, fields, cmd_queue_end_state);
 }
 
 int MINIDRIVER(interface_jtag_add_plain_ir_scan)(int num_fields, scan_field_t *fields, enum tap_state state)
@@ -572,7 +568,7 @@ int jtag_add_dr_scan(int num_fields, scan_field_t *fields, enum tap_state state)
 			
 	cmd_queue_cur_state = cmd_queue_end_state;
 
-	return interface_jtag_add_dr_scan(num_fields, fields, state);
+	return interface_jtag_add_dr_scan(num_fields, fields, cmd_queue_end_state);
 }
 
 int MINIDRIVER(interface_jtag_add_dr_scan)(int num_fields, scan_field_t *fields, enum tap_state state)
@@ -658,6 +654,94 @@ int MINIDRIVER(interface_jtag_add_dr_scan)(int num_fields, scan_field_t *fields,
 	return ERROR_OK;
 }
 
+void MINIDRIVER(interface_jtag_add_dr_out)(int device_num, 
+		int num_fields,
+		int *num_bits,
+		u32 *value,
+		enum tap_state end_state)
+{
+	int i;
+	int field_count = 0;
+	int scan_size;
+	int bypass_devices = 0;
+
+	jtag_command_t **last_cmd = jtag_get_last_command_p();
+	jtag_device_t *device = jtag_devices;
+	/* count devices in bypass */
+	while (device)
+	{
+		if (device->bypass)
+			bypass_devices++;
+		device = device->next;
+	}
+	
+	/* allocate memory for a new list member */
+	*last_cmd = cmd_queue_alloc(sizeof(jtag_command_t));
+	last_comand_pointer = &((*last_cmd)->next);
+	(*last_cmd)->next = NULL;
+	(*last_cmd)->type = JTAG_SCAN;
+
+	/* allocate memory for dr scan command */
+	(*last_cmd)->cmd.scan = cmd_queue_alloc(sizeof(scan_command_t));
+	(*last_cmd)->cmd.scan->ir_scan = 0;
+	(*last_cmd)->cmd.scan->num_fields = num_fields + bypass_devices;
+	(*last_cmd)->cmd.scan->fields = cmd_queue_alloc((num_fields + bypass_devices) * sizeof(scan_field_t));
+	(*last_cmd)->cmd.scan->end_state = end_state;
+
+	for (i = 0; i < jtag_num_devices; i++)
+	{
+		(*last_cmd)->cmd.scan->fields[field_count].device = i;
+
+		if (i == device_num)
+		{
+			int j;
+#ifdef _DEBUG_JTAG_IO_
+			/* if a device is listed, the BYPASS register must not be selected */
+			if (jtag_get_device(i)->bypass)
+			{
+				ERROR("scan data for a device in BYPASS");
+				exit(-1);
+			}
+#endif
+			for (j = 0; j < num_fields; j++)
+			{
+				char out_value[4];
+				scan_size = num_bits[j];
+				buf_set_u32(out_value, 0, scan_size, value[j]);
+				(*last_cmd)->cmd.scan->fields[field_count].num_bits = scan_size;
+				(*last_cmd)->cmd.scan->fields[field_count].out_value = buf_cpy(out_value, cmd_queue_alloc(CEIL(scan_size, 8)), scan_size);
+				(*last_cmd)->cmd.scan->fields[field_count].out_mask = NULL;
+				(*last_cmd)->cmd.scan->fields[field_count].in_value = NULL;
+				(*last_cmd)->cmd.scan->fields[field_count].in_check_value = NULL;
+				(*last_cmd)->cmd.scan->fields[field_count].in_check_mask = NULL;
+				(*last_cmd)->cmd.scan->fields[field_count].in_handler = NULL;
+				(*last_cmd)->cmd.scan->fields[field_count++].in_handler_priv = NULL;
+			}
+		} else
+		{
+#ifdef _DEBUG_JTAG_IO_
+			/* if a device isn't listed, the BYPASS register should be selected */
+			if (!jtag_get_device(i)->bypass)
+			{
+				ERROR("BUG: no scan data for a device not in BYPASS");
+				exit(-1);
+			}
+#endif	
+			/* program the scan field to 1 bit length, and ignore it's value */
+			(*last_cmd)->cmd.scan->fields[field_count].num_bits = 1;
+			(*last_cmd)->cmd.scan->fields[field_count].out_value = NULL;
+			(*last_cmd)->cmd.scan->fields[field_count].out_mask = NULL;
+			(*last_cmd)->cmd.scan->fields[field_count].in_value = NULL;
+			(*last_cmd)->cmd.scan->fields[field_count].in_check_value = NULL;
+			(*last_cmd)->cmd.scan->fields[field_count].in_check_mask = NULL;
+			(*last_cmd)->cmd.scan->fields[field_count].in_handler = NULL;
+			(*last_cmd)->cmd.scan->fields[field_count++].in_handler_priv = NULL;
+		}
+	}
+}
+
+
+
 
 int jtag_add_plain_dr_scan(int num_fields, scan_field_t *fields, enum tap_state state)
 {
@@ -678,7 +762,7 @@ int jtag_add_plain_dr_scan(int num_fields, scan_field_t *fields, enum tap_state
 			
 	cmd_queue_cur_state = cmd_queue_end_state;
 
-	return interface_jtag_add_plain_dr_scan(num_fields, fields, state);
+	return interface_jtag_add_plain_dr_scan(num_fields, fields, cmd_queue_end_state);
 }
 
 int MINIDRIVER(interface_jtag_add_plain_dr_scan)(int num_fields, scan_field_t *fields, enum tap_state state)
@@ -735,7 +819,7 @@ int jtag_add_statemove(enum tap_state state)
 			
 	cmd_queue_cur_state = cmd_queue_end_state;
 
-	return interface_jtag_add_statemove(state);
+	return interface_jtag_add_statemove(cmd_queue_end_state);
 }
 
 int MINIDRIVER(interface_jtag_add_statemove)(enum tap_state state)
@@ -841,7 +925,7 @@ int jtag_add_runtest(int num_cycles, enum tap_state state)
 	cmd_queue_cur_state = cmd_queue_end_state;
 	
 	/* executed by sw or hw fifo */
-	return interface_jtag_add_runtest(num_cycles, state);
+	return interface_jtag_add_runtest(num_cycles, cmd_queue_end_state);
 }
 
 int jtag_add_reset(int req_trst, int req_srst)
@@ -968,9 +1052,9 @@ int MINIDRIVER(interface_jtag_add_end_state)(enum tap_state state)
 
 int jtag_add_end_state(enum tap_state state)
 {
-	int retval = interface_jtag_add_end_state(state);
 	if (state != -1)
 		cmd_queue_end_state = state;
+	int retval = interface_jtag_add_end_state(cmd_queue_end_state);
 	return retval;
 }
 
@@ -1197,7 +1281,7 @@ int jtag_execute_queue(void)
 	int retval=interface_jtag_execute_queue();
 	if (retval==ERROR_OK)
 	{
-		retval=jtag_error;
+	retval=jtag_error;
 	}
 	jtag_error=ERROR_OK;
 	return retval;
@@ -1892,43 +1976,6 @@ int handle_drscan_command(struct command_context_s *cmd_ctx, char *cmd, char **a
 		free(fields[i].out_value);
 
 	free(fields);
-
-	return ERROR_OK;
-}
-
-
-
-int MINIDRIVER(interface_jtag_add_shift)(const enum tap_state shift_state, const enum tap_state end_state, int num_bits, u32 value)
-{
-	u8 out_buf[4];
-	buf_set_u32(out_buf, 0, 32, value);
-
-	/* allocate memory for a new list member */
-	jtag_command_t **last_cmd;
-	last_cmd = jtag_get_last_command_p();
-	*last_cmd = cmd_queue_alloc(sizeof(jtag_command_t));
-	last_comand_pointer = &((*last_cmd)->next);
-	(*last_cmd)->next = NULL;
-	(*last_cmd)->type = JTAG_SCAN;
-
-	/* allocate memory for scan command */
-	(*last_cmd)->cmd.scan = cmd_queue_alloc(sizeof(scan_command_t));
-	(*last_cmd)->cmd.scan->ir_scan = (shift_state==TAP_SI);
-	(*last_cmd)->cmd.scan->num_fields = 1;
-	(*last_cmd)->cmd.scan->fields = cmd_queue_alloc(1 * sizeof(scan_field_t));
-	(*last_cmd)->cmd.scan->end_state = end_state;
-		
-	int num_bytes = CEIL(num_bits, 8);
-	int i=0;
-	(*last_cmd)->cmd.scan->fields[i].device = 0; /* not used by any drivers */
-	(*last_cmd)->cmd.scan->fields[i].num_bits = num_bits;
-	(*last_cmd)->cmd.scan->fields[i].out_value = buf_cpy(out_buf, cmd_queue_alloc(num_bytes), num_bits);
-	(*last_cmd)->cmd.scan->fields[i].out_mask = NULL;
-	(*last_cmd)->cmd.scan->fields[i].in_value = NULL;
-	(*last_cmd)->cmd.scan->fields[i].in_check_value = NULL;
-	(*last_cmd)->cmd.scan->fields[i].in_check_mask = NULL;
-	(*last_cmd)->cmd.scan->fields[i].in_handler = NULL;
-	(*last_cmd)->cmd.scan->fields[i].in_handler_priv = NULL;
 	
 	return ERROR_OK;
 }
diff --git a/src/jtag/jtag.h b/src/jtag/jtag.h
index 31ed51e4bf162defcc5697d08241a34e4b51221b..ada3b49dc874b7f1e30ac4f1a3bbd62715191f6c 100644
--- a/src/jtag/jtag.h
+++ b/src/jtag/jtag.h
@@ -344,28 +344,51 @@ extern int jtag_verify_capture_ir;
 #define ERROR_JTAG_DEVICE_ERROR			(-107)
 
 
-/* Here a #define MINIDRIVER() and an inline version of hw fifo interface_jtag_add_shift can be defined */
 
-#ifndef MINIDRIVER 
-extern int interface_jtag_add_shift(const enum tap_state shift_state, const enum tap_state end_state, int bits, u32 value);
-#endif
-
-/* Enter the shift_state and cycle "bits" times out of that state.
+/* this allows JTAG devices to implement the entire jtag_xxx() layer in hw/sw */
+#ifdef HAVE_JTAG_MINIDRIVER_H
+/* Here a #define MINIDRIVER() and an inline version of hw fifo interface_jtag_add_dr_out can be defined */
+#include "jtag_minidriver.h"
+#define MINIDRIVER(a) notused ## a 
+#else
+#define MINIDRIVER(a) a
+/* jtag_add_dr_out() is a faster version of jtag_add_dr_scan() 
  * 
- * So if the end_state!=shift_state, then the transition from shift_state to 
- * end_state counts as a transition out of shift_state.
+ * Current or end_state can not be TAP_TLR. end_state can be -1
  * 
- * Legal shift states TAP_SD and TAP_SI
+ * num_bits[i] is the number of bits to clock out from value[i] LSB first.
  * 
- * Legal end state does not include TAP_TLR
+ * If the device is in bypass, then that is an error condition in
+ * the caller code that is not detected by this fn, whereas jtag_add_dr_scan()
+ * does detect it. Similarly if the device is not in bypass, data must
+ * be passed to it. 
  * 
- * Bits are clocked out from value LSB first.
+ * If anything fails, then jtag_error will be set and jtag_execute() will
+ * return an error. There is no way to determine if there was a failure
+ * during this function call.
+ * 
+ * Note that this jtag_add_dr_out can be defined as an inline function.
  */
-static __inline int jtag_add_shift(const enum tap_state shift_state, const enum tap_state end_state, int bits, u32 value)
+extern void interface_jtag_add_dr_out(int device, 
+		int num_fields,
+		int *num_bits,
+		u32 *value,
+		enum tap_state end_state);
+#endif
+
+
+
+
+static __inline__ void jtag_add_dr_out(int device, 
+		int num_fields,
+		int *num_bits,
+		u32 *value,
+		enum tap_state end_state)
 {
-	int retval;
-	retval=interface_jtag_add_shift(shift_state, end_state, bits, value);
-	return retval;
+	if (end_state != -1)
+		cmd_queue_end_state=end_state;
+	cmd_queue_cur_state=cmd_queue_end_state;
+	interface_jtag_add_dr_out(device, num_fields, num_bits, value, cmd_queue_end_state);
 }
 
 
diff --git a/src/target/embeddedice.c b/src/target/embeddedice.c
index 333c1c46ca679802aaf0dae7ea8ab7933e6032c1..30258270d1c983ffa4ef41391d2363ebf5751af8 100644
--- a/src/target/embeddedice.c
+++ b/src/target/embeddedice.c
@@ -374,11 +374,6 @@ int embeddedice_set_reg_w_exec(reg_t *reg, u8 *buf)
 int embeddedice_write_reg(reg_t *reg, u32 value)
 {
 	embeddedice_reg_t *ice_reg = reg->arch_info;
-	u8 reg_addr = ice_reg->addr & 0x1f;
-	scan_field_t fields[3];
-	u8 field0_out[4];
-	u8 field1_out[1];
-	u8 field2_out[1];
 
 	DEBUG("%i: 0x%8.8x", ice_reg->addr, value);
 	
@@ -386,41 +381,8 @@ int embeddedice_write_reg(reg_t *reg, u32 value)
 	arm_jtag_scann(ice_reg->jtag_info, 0x2);
 	
 	arm_jtag_set_instr(ice_reg->jtag_info, ice_reg->jtag_info->intest_instr, NULL);
-	
-	fields[0].device = ice_reg->jtag_info->chain_pos;
-	fields[0].num_bits = 32;
-	fields[0].out_value = field0_out;
-	buf_set_u32(fields[0].out_value, 0, 32, value);
-	fields[0].out_mask = NULL;
-	fields[0].in_value = NULL;
-	fields[0].in_check_value = NULL;
-	fields[0].in_check_mask = NULL;
-	fields[0].in_handler = NULL;
-	fields[0].in_handler_priv = NULL;
-	
-	fields[1].device = ice_reg->jtag_info->chain_pos;
-	fields[1].num_bits = 5;
-	fields[1].out_value = field1_out;
-	buf_set_u32(fields[1].out_value, 0, 5, reg_addr);
-	fields[1].out_mask = NULL;
-	fields[1].in_value = NULL;
-	fields[1].in_check_value = NULL;
-	fields[1].in_check_mask = NULL;
-	fields[1].in_handler = NULL;
-	fields[1].in_handler_priv = NULL;
 
-	fields[2].device = ice_reg->jtag_info->chain_pos;
-	fields[2].num_bits = 1;
-	fields[2].out_value = field2_out;
-	buf_set_u32(fields[2].out_value, 0, 1, 1);
-	fields[2].out_mask = NULL;
-	fields[2].in_value = NULL;
-	fields[2].in_check_value = NULL;
-	fields[2].in_check_mask = NULL;
-	fields[2].in_handler = NULL;
-	fields[2].in_handler_priv = NULL;
-	
-	jtag_add_dr_scan(3, fields, -1);
+	embeddedice_write_reg_inner(reg, value);
 	
 	return ERROR_OK;
 }
diff --git a/src/target/embeddedice.h b/src/target/embeddedice.h
index 62c5b78b728568e89f0470760e608d5b84b1b1b9..dde37f65f372d2bd1e293516b34e75e5a807bde4 100644
--- a/src/target/embeddedice.h
+++ b/src/target/embeddedice.h
@@ -104,13 +104,68 @@ extern int embeddedice_handshake(arm_jtag_t *jtag_info, int hsbit, u32 timeout);
 /* If many embeddedice_write_reg() follow eachother, then the >1 invocations can be this faster version of 
  * embeddedice_write_reg
  */
-static __inline void embeddedice_write_reg_inner(reg_t *reg, u32 value)
+static __inline__ void embeddedice_write_reg_inner(reg_t *reg, u32 value)
 {
 	embeddedice_reg_t *ice_reg = reg->arch_info;
 	u8 reg_addr = ice_reg->addr & 0x1f;
-	jtag_add_shift(TAP_SD, TAP_PD, 32, value);
-	jtag_add_shift(TAP_SD, TAP_PD, 5, reg_addr);
-	jtag_add_shift(TAP_SD, TAP_RTI, 1, 1);
+#if 1
+	u32 values[3];
+	int num_bits[3];
+	
+	values[0]=value;
+	num_bits[0]=32;
+	values[1]=reg_addr;
+	num_bits[1]=5;
+	values[2]=1;
+	num_bits[2]=1;
+	
+	jtag_add_dr_out(ice_reg->jtag_info->chain_pos, 
+			3,
+			num_bits,
+			values,
+			-1);
+#else
+	scan_field_t fields[3];
+	u8 field0_out[4];
+	u8 field1_out[1];
+	u8 field2_out[1];
+
+	fields[0].device = ice_reg->jtag_info->chain_pos;
+	fields[0].num_bits = 32;
+	fields[0].out_value = field0_out;
+	buf_set_u32(fields[0].out_value, 0, 32, value);
+	fields[0].out_mask = NULL;
+	fields[0].in_value = NULL;
+	fields[0].in_check_value = NULL;
+	fields[0].in_check_mask = NULL;
+	fields[0].in_handler = NULL;
+	fields[0].in_handler_priv = NULL;
+	
+	fields[1].device = ice_reg->jtag_info->chain_pos;
+	fields[1].num_bits = 5;
+	fields[1].out_value = field1_out;
+	buf_set_u32(fields[1].out_value, 0, 5, reg_addr);
+	fields[1].out_mask = NULL;
+	fields[1].in_value = NULL;
+	fields[1].in_check_value = NULL;
+	fields[1].in_check_mask = NULL;
+	fields[1].in_handler = NULL;
+	fields[1].in_handler_priv = NULL;
+
+	fields[2].device = ice_reg->jtag_info->chain_pos;
+	fields[2].num_bits = 1;
+	fields[2].out_value = field2_out;
+	buf_set_u32(fields[2].out_value, 0, 1, 1);
+	fields[2].out_mask = NULL;
+	fields[2].in_value = NULL;
+	fields[2].in_check_value = NULL;
+	fields[2].in_check_mask = NULL;
+	fields[2].in_handler = NULL;
+	fields[2].in_handler_priv = NULL;
+	
+	jtag_add_dr_scan(3, fields, -1);
+	
+#endif
 }