Rust assembly generation: Mapping a bool vector to string slice vector

We have already looked at assembly code generated for a vector iteration. We will build on that knowledge to understand the assembly code generated when mapping a Rust vector to a string slice vector.

Example code: Map a vector of bools to a vector of string slices with static lifetime

The following code shows two functions:

/// Convert a vector if type A into a vector of type B. The user must provide a
/// closure that maps type A to type B. This is a generic function and
/// does not generate any assembly code.
pub fn convert<A,B> (v: Vec<A>, f: impl Fn(A) -> B) -> Vec<B> {
    v.into_iter().map(f).collect()
}

/// Convert a vector of bools into a vector of string slices with a static lifetime.
/// This function uses the convert generic function to perform the conversion.
/// This is a concrete function and generates assembly code.
pub fn convert_bool_vec_to_static_str_vec(v: Vec<bool>) -> Vec<& 'static str> {
    convert(v, |n| if n {"true"} else {"false"})
}

Visualizing the input and output vectors

Let's understand the input and output vectors of the convert_bool_vec_to_static_str_vec function. This will aid in understanding the assembly code generated.

The input vector passed to the convert_bool_vec_to_static_str_vec function is a vector of bools. The memory organization of this vector is shown below. As discussed in the vector iteration article, the memory organization of a vector is as follows:

Bool vector

The output vector of the convert_bool_vec_to_static_str_vec function is a vector of string slices. The memory organization of this vector is shown below.

String slice vector

String slice vector generation overview

The following figure gives an overview of the generated assembly code for the convert_bool_vec_to_static_str_vec function. A few key points to note here are:

Overflow and memory allocation failure handling

Length based optimization

The generated code takes the following input vector length-based decisions:

Preparing the string slice

Cleaning up the input vector on exit from the function

The compiler generates a call to __rust_dealloc to free the input vector. Note that the function owns the input vector so it is its responsibility to free up the memory allocated for the input vector array just before the function returns.

Flow chart describing the generated assembly code

String slice vector generation

Annotated assembly code for the convert_bool_vec_to_static_str_vec function

The generated assembly code has been annotated to help understand the mapping from Rust code.

; rsi points to the input vector v
; rdi points to the output vector 
; Note: The caller is responsible for reserving the space for the output vector struct.
;       The heap allocation for the data array is done by the called function.
example::convert_i64_vec_to_static_str_vec:
        push    rbp                     ; Save rbp on stack
        push    r15                     ; Save r15 on stack
        push    r14                     ; Save r14 on stack
        push    r13                     ; Save r13 on stack
        push    r12                     ; Save r12 on stack
        push    rbx                     ; Save rbx on stack
        sub     rsp, 40                 ; Allocate space for stack frame and local variables
        mov     r12, qword ptr [rsi]        ; Load the address of the input vector v's data array into r12
        mov     r8, qword ptr [rsi + 8]     ; Load the capacity of the input vector v into r8
        mov     r13, qword ptr [rsi + 16]   ; Load the length of the input vector v into r13
        lea     r15, [r12 + r13]            ; Load the address that points after the last element.
        mov     ecx, 16                 ; Set ecx to 16 (the size of each element in the output vector)
        xor     ebx, ebx                ; Set ebx to 0
        mov     rax, r13                ; Set rax to the length of vector v
        mul     rcx                     ; Set rax to rax * ecx (total memory to allocate for the output vector)
        setno   cl                      ; Set ecx to 1 if no overflow occurred
        mov     qword ptr [rsp + 8], r12    ; Store the address of vector v's data array into the stack frame
        mov     qword ptr [rsp + 16], r8    ; Store the capacity of vector v into the stack frame
        mov     qword ptr [rsp + 24], r12   ; Store the address of the last element of vector v into the stack frame
        mov     qword ptr [rsp + 32], r15   ; Store the address that points after the last element of vector v into the stack frame
        jo      .LBB1_1                 ; Jump if overflow occurred
        mov     r14, rdi                ; Set r14 to the address of the output vector
        mov     bl, cl                  ; Set bl to the value of ecx (1)
        shl     rbx, 3                  ; Set rbx to rbx * 8 (The register contains 8)
        test    rax, rax                ; Test if the length of vector v is 0
        mov     qword ptr [rsp], rax    ; Store rax into the stack frame
        je      .LBB1_4                 ; Jump if length of vector v is 0
        mov     rdi, rax                ; Copy the size of the output vector data array into rdi
        mov     rsi, rbx                ; Set the alignment of the output vector data array to 8
        mov     rbp, r14                ; Set rbp to the address of the output vector
        mov     r14, r8                 ; Set r14 to the capacity of vector v
        call    qword ptr [rip + __rust_alloc@GOTPCREL] ; Call __rust_alloc to allocate space for the output vector data array
        mov     r8, r14         ; Set r8 to the capacity of vector v
        mov     r14, rbp        ; Set the alignment of the output vector data array to 8
        test    rax, rax        ; Check if the allocation failed
        je      .LBB1_7         ; Jump if __rust_alloc returned 0 (out of memory)
.LBB1_8:
        mov     qword ptr [r14], rax        ; Store the address of the output vector data array into the output vector
        mov     qword ptr [r14 + 8], r13    ; Store the capacity of vector v into the output vector
        mov     qword ptr [r14 + 16], 0     ; Setting the length of the output vector to 0
        test    r13, r13                    ; Test if r13 is 0
        je      .LBB1_15                    ; Jump if the capacity of the output vector v is 0
        test    r13b, 1                     ; Check if the capacity of the output vector v is odd
        jne     .LBB1_11                    ; Jump if capacity of the output vector v is odd

        mov     rcx, r12                    ; Set rcx to the address of vector v's data array

        ; The following check against 1 seems redundant as r13b must be even
        ; at this this point. The complete r13 register cannot match 1.
        cmp     r13, 1                  ; Test if the length of the input vector v is 1
        jne     .LBB1_13                ; Jump if the length of the input vector v is not 1
        jmp     .LBB1_15                ; Jump if the capacity of the output vector v is 1
.LBB1_4:
        mov     rax, rbx                ; Set rax to rbx
        test    rax, rax                ; Test if rax is 0
        jne     .LBB1_8                 ; Jump to .LBB1_8 if rax is not 0
.LBB1_7:
        mov     rdi, qword ptr [rsp]    ; Set rdi to the address of the output vector
        mov     rsi, rbx                ; Set rsi to rbx
        call    qword ptr [rip + alloc::alloc::handle_alloc_error@GOTPCREL] ; Call error handler
        jmp     .LBB1_2                 ; Jump to .LBB1_2

.LBB1_11:
        ; The capacity of the output vector v is odd so one loop entry is performed
        ; before entering the loop that handles even number of entries.

        movzx   ecx, byte ptr [r12]     ; Set ecx to the value of the first element of input vector v
        and     ecx, 1                  ; Mask of the upper bits of ecx to get the bool value

        ; The length and pointer need to be initialized in the output vector entry
        ; The compiler uses a an XOR trick to determine the length of the slice.
        ; If 1 is XORed 5, the result is 4 (length of "true").
        ; If 0 is XORed 5, the result is 5 (length of "false").

        mov     rdx, rcx                ; Set rdx to ecx
        xor     rdx, 5                  ; Set rdx to rdx ^ 5
        test    rcx, rcx                ; Set the condition code bits based on the content of bool value
        lea     rcx, [rip + .L__unnamed_1]    ; Set rcx to the address of the string "true"
        lea     rsi, [rip + .L__unnamed_2]    ; Set rsi to the address of the string "false"

        ; The following conditional move is used make rsi point to "true" if bool value is 1.
        ; Otherwise, rsi points to "false".
        cmove   rsi, rcx                   ; Set rsi to rcx if ecx (bool value is 1)

        lea     rcx, [r12 + 1]             ; Set rcx to the address of the next element of vector v
        mov     qword ptr [rax], rsi       ; Store the string slice pointer into the output vector's first entry
        mov     qword ptr [rax + 8], rdx   ; Store the string slice length in the output vector's first entry
        add     rax, 16                    ; Move past the current string slice.
        cmp     r13, 1                     ; Test if the input vector capacity is 1
        je      .LBB1_15                   ; We are done as we have copied one entry already.
.LBB1_13:
        lea     r9, [rip + .L__unnamed_1]   ; Set r9 to the address of the string "true"
        lea     rdx, [rip + .L__unnamed_2]  ; Set rdx to the address of the string "false"

.LBB1_14:
        ; Begin loop for even number of entries
        movzx   ebp, byte ptr [rcx]     ; Set ebp to the value of the next element of vector v
        and     ebp, 1                  ; Mask off the upper bits of ebp to get the bool value
        mov     rbx, rbp                ; Set rbx to ebp
        xor     rbx, 5                  ; xor rbx with 5 to get the string slice length
        test    rbp, rbp                ; Test if ebp is 0
        mov     rsi, rdx                ; rsi is set to the address of the string "false"
        cmove   rsi, r9                 ; Set rsi to address of "true" if ebp is 0
        mov     qword ptr [rax], rsi            ; Store pointer to the string slice into the output vector entry
        mov     qword ptr [rax + 8], rbx        ; Store length of the string slice into the output vector entry

        movzx   esi, byte ptr [rcx + 1]     ; Set esi to the value of the second element of vector v
        and     esi, 1          ; Mask off the upper bits of esi to get the bool value
        mov     rbp, rsi        ; Set rbp to esi
        xor     rbp, 5          ; xor rbp with 5 to get the string slice length
        test    rsi, rsi        ; Test if esi is 0
        mov     rsi, rdx        ; rsi is set to the address of the string "false"
        cmove   rsi, r9         ; Set rsi the address of "true" if esi is 0
        mov     qword ptr [rax + 16], rsi   ; Store pointer to the string slice into the output vector entry
        mov     qword ptr [rax + 24], rbp   ; Store length of the string slice into the output vector entry
        add     rcx, 2             ; Move in the input vector past the two entries we have just processed.
        add     rax, 32            ; Move the output vector pointer past the two entries we have just processed.
        cmp     rcx, r15           ; Check if we have processed all the entries of the input vector
        jne     .LBB1_14           ; Jump to .LBB1_14 as more entries need to be processed.
        ; End loop for even number entries

.LBB1_15:
        mov     qword ptr [r14 + 16], r13   ; Store the length into the output vector
        test    r8, r8                      ; Test if the capacity of vector v is 0
        je      .LBB1_17                    ; Skip input buffer de-allocation if capacity is 0

        ; Free the input vector v's buffer as the function owned the vector and 
        ; vector v is going out of scope.
        mov     edx, 1          ; Set edx to 1
        mov     rdi, r12        ; Set rdi to the address of vector v's data array
        mov     rsi, r8         ; Set rsi to the capacity of vector v
        call    qword ptr [rip + __rust_dealloc@GOTPCREL]   ; Call __rust_dealloc to deallocate vector v
.LBB1_17:
        mov     rax, r14    ; Set rax to the address of the output vector
        add     rsp, 40     ; Free the space on the stack used to store the local variables
        pop     rbx         ; Restore rbx
        pop     r12         ; Restore r12
        pop     r13         ; Restore r13
        pop     r14         ; Restore r14
        pop     r15         ; Restore r15
        pop     rbp         ; Restore rbp
        ret                 ; Return

.LBB1_1:
        call    qword ptr [rip + alloc::raw_vec::capacity_overflow@GOTPCREL] ; Call capacity_overflow to handle the error
.LBB1_2:
        ud2                 ; Invalid instruction to cause an exception
        mov     rbx, rax    ; Set rbx to rax
        lea     rdi, [rsp + 8]    ; Set rdi to the address of the output vector
        call    core::ptr::drop_in_place<core::iter::adapters::map::Map<alloc::vec::into_iter::IntoIter<bool>,example::convert_i64_vec_to_static_str_vec::{{closure}}>> ; Call drop_in_place to drop the output vector
        mov     rdi, rbx    ; Set rdi to rbx
        call    _Unwind_Resume@PLT      ; Call _Unwind_Resume to resume unwinding
        ud2               ; Invalid instruction

.L__unnamed_2:
        .ascii  "true"    ; 'static "true"

.L__unnamed_1:
        .ascii  "false"   ; 'static "false"

DW.ref.rust_eh_personality:
        .quad   rust_eh_personality

View in the Compiler Explorer