ec13a7
From 519786ec38bb5123547071021e4bd7678d7673ad Mon Sep 17 00:00:00 2001
ec13a7
From: Andreas Krebbel <krebbel@linux.ibm.com>
ec13a7
Date: Mon, 23 Mar 2020 09:08:27 +0100
ec13a7
Subject: [PATCH 3/4] Optimize s390x instructions
ec13a7
ec13a7
---
ec13a7
 src/asm/jump_s390x_sysv_elf_gas.S  | 84 ++++++++++--------------------
ec13a7
 src/asm/make_s390x_sysv_elf_gas.S  | 27 ++++++----
ec13a7
 src/asm/ontop_s390x_sysv_elf_gas.S | 81 +++++++++-------------------
ec13a7
 3 files changed, 70 insertions(+), 122 deletions(-)
ec13a7
ec13a7
diff --git a/src/asm/jump_s390x_sysv_elf_gas.S b/src/asm/jump_s390x_sysv_elf_gas.S
ec13a7
index c011d53..b2163cc 100644
ec13a7
--- a/libs/context/src/asm/jump_s390x_sysv_elf_gas.S
ec13a7
+++ b/libs/context/src/asm/jump_s390x_sysv_elf_gas.S
ec13a7
@@ -36,47 +36,34 @@
ec13a7
 .global jump_fcontext
ec13a7
 .type   jump_fcontext, @function
ec13a7
 
ec13a7
+#define GR_OFFSET	0
ec13a7
+#define LR_OFFSET	64
ec13a7
+#define SP_OFFSET	72
ec13a7
+#define FP_OFFSET	80
ec13a7
+#define PC_OFFSET	112
ec13a7
+#define L_CTX		120
ec13a7
+#define L_STACKFRAME	120
ec13a7
+
ec13a7
 jump_fcontext:
ec13a7
     
ec13a7
     # Reserved the space for stack to store the data of current context
ec13a7
     # before we jump to the new context.
ec13a7
-    lay 15,-120(15)
ec13a7
+    aghi %r15,-L_STACKFRAME
ec13a7
 
ec13a7
     # save the registers to the stack
ec13a7
-    stg 6,  0(15)       # save R6     
ec13a7
-    stg 7,  8(15)       # save R7     
ec13a7
-    stg 8,  16(15)      # save R8
ec13a7
-    stg 9,  24(15)      # save R9
ec13a7
-    stg 10, 32(15)      # save R10
ec13a7
-    stg 11, 40(15)      # save R11
ec13a7
-    stg 12, 48(15)      # save R12
ec13a7
-    stg 13, 56(15)      # save R13
ec13a7
-    stg 14, 64(15)      # save R14
ec13a7
-    stg 15, 72(15)      # save R15
ec13a7
+    stmg %r6, %r15, GR_OFFSET(%r15)
ec13a7
 
ec13a7
     # save the floating point registers
ec13a7
-    # Load the FPR into R0 then save it to the stack
ec13a7
-    # Load F1 into R0
ec13a7
-    lgdr 0,1 
ec13a7
-    stg  0,80(15)       # save F1
ec13a7
-
ec13a7
-    # Load F3 into R0
ec13a7
-    lgdr 0,3
ec13a7
-    stg  0,88(15)       # save F3
ec13a7
-
ec13a7
-    # Load F5 into R0
ec13a7
-    lgdr 0,5
ec13a7
-    stg  0,96(15)       # save F5
ec13a7
-
ec13a7
-    # Load F7 into R0
ec13a7
-    lgdr 0,7
ec13a7
-    stg  0,104(15)      # save F7
ec13a7
+    std  %f0,FP_OFFSET(%r15)
ec13a7
+    std  %f3,FP_OFFSET+8(%r15)
ec13a7
+    std  %f5,FP_OFFSET+16(%r15)
ec13a7
+    std  %f7,FP_OFFSET+24(%r15)
ec13a7
 
ec13a7
     # Save LR as PC
ec13a7
-    stg 14,112(15)
ec13a7
+    stg  %r14,PC_OFFSET(%r15)
ec13a7
 
ec13a7
     # Store the SP pointing to the old context-data into R0
ec13a7
-    lgr 0,15
ec13a7
+    lgr	 %r0,%r15
ec13a7
 
ec13a7
     # Get the SP pointing to the new context-data
ec13a7
     # Note: Since the return type of the jump_fcontext is struct whose
ec13a7
@@ -88,46 +75,31 @@ jump_fcontext:
ec13a7
     # R2 --> Address of the return transfer_t struct
ec13a7
     # R3 --> Context we want to switch to
ec13a7
     # R4 --> Data
ec13a7
-    lgr 15,3
ec13a7
+    lgr	%r15,%r3
ec13a7
 
ec13a7
     # Load the registers with the data present in context-data of the
ec13a7
     # context we are going to switch to
ec13a7
-    lg 6,  0(15)       # restore R6     
ec13a7
-    lg 7,  8(15)       # restore R7     
ec13a7
-    lg 8,  16(15)      # restore R8
ec13a7
-    lg 9,  24(15)      # restore R9
ec13a7
-    lg 10, 32(15)      # restore R10
ec13a7
-    lg 11, 40(15)      # restore R11
ec13a7
-    lg 12, 48(15)      # restore R12
ec13a7
-    lg 13, 56(15)      # restore R13
ec13a7
-    lg 14, 64(15)      # restore R14
ec13a7
+    lmg	%r6, %r14, GR_OFFSET(%r15)
ec13a7
 
ec13a7
     # Restore Floating point registers
ec13a7
-    lg   1,80(15)
ec13a7
-    ldgr 1,1            # restore F1
ec13a7
-
ec13a7
-    lg   1,88(15)
ec13a7
-    ldgr 1,3            # restore F3
ec13a7
-
ec13a7
-    lg   1,96(15)
ec13a7
-    ldgr 1,5            # restore F5
ec13a7
-
ec13a7
-    lg   1,104(15)
ec13a7
-    ldgr 1,7            # restore F7
ec13a7
+    ld	 %f1,FP_OFFSET(%r15)
ec13a7
+    ld	 %f3,FP_OFFSET+8(%r15)
ec13a7
+    ld	 %f5,FP_OFFSET+16(%r15)
ec13a7
+    ld	 %f7,FP_OFFSET+24(%r15)
ec13a7
 
ec13a7
     # Load PC
ec13a7
-    lg  1,112(15)
ec13a7
+    lg   %r1,PC_OFFSET(%r15)
ec13a7
 
ec13a7
-    # Adjust the stack
ec13a7
-    lay 15, 120(15)
ec13a7
+    # Adjust the stack 
ec13a7
+    aghi %r15,120
ec13a7
 
ec13a7
     # R2 --> Address where the return transfer_t is stored
ec13a7
     # R0 --> FCTX
ec13a7
     # R4 --> DATA
ec13a7
 
ec13a7
     # Store the elements to return transfer_t
ec13a7
-    stg 15, 0(2)
ec13a7
-    stg 4, 8(2)
ec13a7
+    stg %r15, 0(%r2)
ec13a7
+    stg %r4, 8(%r2)
ec13a7
 
ec13a7
     # Note: The address in R2 points to the place where the return
ec13a7
     # transfer_t is stored. Since context_function take transfer_t
ec13a7
@@ -135,7 +107,7 @@ jump_fcontext:
ec13a7
     # first parameter value.
ec13a7
 
ec13a7
     #jump to context
ec13a7
-    br 1
ec13a7
+    br  %r1
ec13a7
 
ec13a7
 .size   jump_fcontext,.-jump_fcontext
ec13a7
 # Mark that we don't need executable stack.
ec13a7
diff --git a/src/asm/make_s390x_sysv_elf_gas.S b/src/asm/make_s390x_sysv_elf_gas.S
ec13a7
index f566533..d02856c 100644
ec13a7
--- a/libs/context/src/asm/make_s390x_sysv_elf_gas.S
ec13a7
+++ b/libs/context/src/asm/make_s390x_sysv_elf_gas.S
ec13a7
@@ -36,6 +36,14 @@
ec13a7
 .global make_fcontext
ec13a7
 .type 	 make_fcontext, @function
ec13a7
 
ec13a7
+#define GR_OFFSET	0
ec13a7
+#define LR_OFFSET	64
ec13a7
+#define SP_OFFSET	72
ec13a7
+#define FP_OFFSET	80
ec13a7
+#define PC_OFFSET	112
ec13a7
+#define L_CTX		120
ec13a7
+#define L_STACKFRAME	120
ec13a7
+
ec13a7
 make_fcontext:
ec13a7
 
ec13a7
 		# make_fcontext takes in 3 arguments
ec13a7
@@ -56,40 +64,39 @@ make_fcontext:
ec13a7
 		# address is zero or not. If not AND it with `-8`. 
ec13a7
 
ec13a7
 		# Here we AND the lower 16 bits of the memory address present in the 
ec13a7
-		# R2 with the bits 1111 1111 1111 1000 which when converted into
ec13a7
-		# decimal is 65528
ec13a7
-		nill    2,65528
ec13a7
+		# R2 with the bits 1111 1111 1111 1000 
ec13a7
+		nill    %r2,0xfff0
ec13a7
 
ec13a7
 		# Reserve space for context-data on context-stack.
ec13a7
 		# This is done by shifting the SP/address by 112 bytes.
ec13a7
-		lay 2,-120(2)
ec13a7
+		aghi	%r2,-L_CTX
ec13a7
 
ec13a7
 		# third arg of make_fcontext() == address of the context-function
ec13a7
 		# Store the address as a PC to jump in, whenever we call the 
ec13a7
 		# make_fcontext.
ec13a7
-		stg 4,112(2)
ec13a7
+		stg 	%r4,PC_OFFSET(%r2)
ec13a7
 
ec13a7
 		# Save the address of finish as return-address for context-function
ec13a7
 		# This will be entered after context-function return
ec13a7
 		# The address of finish will be saved in Link register, this register
ec13a7
 		# specifies where we need to jump after the function executes
ec13a7
 		# completely.
ec13a7
-		larl 1,finish
ec13a7
-		stg  1,64(2)
ec13a7
+		larl 	%r1,finish
ec13a7
+		stg  	%r1,LR_OFFSET(%r2)
ec13a7
 
ec13a7
 		# Return pointer to context data
ec13a7
 		# R14 acts as the link register
ec13a7
 		# R2 holds the address of the context stack. When we return from the
ec13a7
 		# make_fcontext, R2 is passed back.
ec13a7
-		br 14 
ec13a7
+		br 	%r14 
ec13a7
 
ec13a7
 	finish:
ec13a7
 
ec13a7
 		# In finish tasks, you load the exit code and exit the make_fcontext
ec13a7
 		# This is called when the context-function is entirely executed
ec13a7
 
ec13a7
-		lghi 2,0
ec13a7
-		brasl 14,_exit
ec13a7
+		lghi 	%r2,0
ec13a7
+		brasl 	%r14,_exit@PLT
ec13a7
 
ec13a7
 .size   make_fcontext,.-make_fcontext
ec13a7
 # Mark that we don't need executable stack.
ec13a7
diff --git a/src/asm/ontop_s390x_sysv_elf_gas.S b/src/asm/ontop_s390x_sysv_elf_gas.S
ec13a7
index 7ab2cf5..4488654 100644
ec13a7
--- a/libs/context/src/asm/ontop_s390x_sysv_elf_gas.S
ec13a7
+++ b/libs/context/src/asm/ontop_s390x_sysv_elf_gas.S
ec13a7
@@ -36,47 +36,32 @@
ec13a7
 .global ontop_fcontext
ec13a7
 .type   ontop_fcontext, @function
ec13a7
 
ec13a7
+#define GR_OFFSET	0
ec13a7
+#define LR_OFFSET	64
ec13a7
+#define SP_OFFSET	72
ec13a7
+#define FP_OFFSET	80
ec13a7
+#define PC_OFFSET	112
ec13a7
+#define L_CTX		120
ec13a7
+
ec13a7
 ontop_fcontext:
ec13a7
     
ec13a7
     # Reserved the space for stack to store the data of current context
ec13a7
     # before we jump to the new context.
ec13a7
-    lay 15,-120(15)
ec13a7
+    aghi %r15,-L_CTX
ec13a7
 
ec13a7
     # save the registers to the stack
ec13a7
-    stg 6,  0(15)       # save R6     
ec13a7
-    stg 7,  8(15)       # save R7     
ec13a7
-    stg 8,  16(15)      # save R8
ec13a7
-    stg 9,  24(15)      # save R9
ec13a7
-    stg 10, 32(15)      # save R10
ec13a7
-    stg 11, 40(15)      # save R11
ec13a7
-    stg 12, 48(15)      # save R12
ec13a7
-    stg 13, 56(15)      # save R13
ec13a7
-    stg 14, 64(15)      # save R14
ec13a7
-    stg 15, 72(15)      # save R15
ec13a7
+    stmg %r6, %r15, GR_OFFSET(%r15)
ec13a7
 
ec13a7
     # save the floating point registers
ec13a7
-    # Load the FPR into R0 then save it to the stack
ec13a7
-    # Load F1 into R0
ec13a7
-    lgdr 0,1 
ec13a7
-    stg  0,80(15)       # save F1
ec13a7
-
ec13a7
-    # Load F3 into R0
ec13a7
-    lgdr 0,3
ec13a7
-    stg  0,88(15)       # save F3
ec13a7
-
ec13a7
-    # Load F5 into R0
ec13a7
-    lgdr 0,5
ec13a7
-    stg  0,96(15)       # save F5
ec13a7
-
ec13a7
-    # Load F7 into R0
ec13a7
-    lgdr 0,7
ec13a7
-    stg  0,104(15)      # save F7
ec13a7
-
ec13a7
+    std  %f0,FP_OFFSET(%r15)
ec13a7
+    std  %f3,FP_OFFSET+8(%r15)
ec13a7
+    std  %f5,FP_OFFSET+16(%r15)
ec13a7
+    std  %f7,FP_OFFSET+24(%r15)
ec13a7
     # Save LR as PC
ec13a7
-    stg 14,112(15)
ec13a7
+    stg  %r14,PC_OFFSET(%r15)
ec13a7
 
ec13a7
     # Store the SP pointing to the old context-data into R0
ec13a7
-    lgr 0,15
ec13a7
+    lgr  %r0,%r15
ec13a7
 
ec13a7
     # Get the SP pointing to the new context-data
ec13a7
     # Note: Since the return type of the jump_fcontext is struct whose
ec13a7
@@ -88,38 +73,22 @@ ontop_fcontext:
ec13a7
     # R2 --> Address of the return transfer_t struct
ec13a7
     # R3 --> Context we want to switch to
ec13a7
     # R4 --> Data
ec13a7
-    lgr 15,3
ec13a7
+    lgr  %r15,%r3
ec13a7
 
ec13a7
     # Load the registers with the data present in context-data of the
ec13a7
     # context we are going to switch to
ec13a7
-    lg 6,  0(15)       # restore R6     
ec13a7
-    lg 7,  8(15)       # restore R7     
ec13a7
-    lg 8,  16(15)      # restore R8
ec13a7
-    lg 9,  24(15)      # restore R9
ec13a7
-    lg 10, 32(15)      # restore R10
ec13a7
-    lg 11, 40(15)      # restore R11
ec13a7
-    lg 12, 48(15)      # restore R12
ec13a7
-    lg 13, 56(15)      # restore R13
ec13a7
-    lg 14, 64(15)      # restore R14
ec13a7
-    lg 15, 72(15)      # restore R15
ec13a7
+    lmg  %r6,%r15,GR_OFFSET(%r15)
ec13a7
 
ec13a7
     # Restore Floating point registers
ec13a7
-    lg   1,80(15)
ec13a7
-    ldgr 1,1            # restore F1
ec13a7
-
ec13a7
-    lg   1,88(15)
ec13a7
-    ldgr 1,3            # restore F3
ec13a7
-
ec13a7
-    lg   1,96(15)
ec13a7
-    ldgr 1,5            # restore F5
ec13a7
-
ec13a7
-    lg   1,104(15)
ec13a7
-    ldgr 1,7            # restore F7
ec13a7
+    ld	 %f1,FP_OFFSET(%r15)
ec13a7
+    ld	 %f3,FP_OFFSET+8(%r15)
ec13a7
+    ld	 %f5,FP_OFFSET+16(%r15)
ec13a7
+    ld	 %f7,FP_OFFSET+24(%r15)
ec13a7
 
ec13a7
     # Skip PC
ec13a7
 
ec13a7
     # Adjust the stack
ec13a7
-    lay 15, 120(15)
ec13a7
+    aghi %r15,L_CTX
ec13a7
 
ec13a7
     # R2 --> Address where the return transfer_t is stored
ec13a7
     # R0 --> FCTX
ec13a7
@@ -127,8 +96,8 @@ ontop_fcontext:
ec13a7
     # R5 --> Context function
ec13a7
 
ec13a7
     # Store the elements to return transfer_t
ec13a7
-    stg 15, 0(2)
ec13a7
-    stg 4, 8(2)
ec13a7
+    stg  %r15, 0(%r2)
ec13a7
+    stg  %r4, 8(%r2)
ec13a7
 
ec13a7
     # Note: The address in R2 points to the place where the return
ec13a7
     # transfer_t is stored. Since context_function take transfer_t
ec13a7
@@ -136,7 +105,7 @@ ontop_fcontext:
ec13a7
     # first parameter value.
ec13a7
 
ec13a7
     #jump to context function
ec13a7
-    br 5
ec13a7
+    br 	%r5
ec13a7
 
ec13a7
 .size   ontop_fcontext,.-ontop_fcontext
ec13a7
 # Mark that we don't need executable stack.
ec13a7
-- 
ec13a7
2.18.1
ec13a7