Bit find first set() and bit find last set()

From Software Engineers Wiki
Jump to: navigation, search

Write a function that finds first and last bit set in 32-bit value; bit_find_first_set() and bit_find_last_set()

Answer #1

bit_find_last_set() search for a bit value 1 from MSB to LSB (this is similar to finding the number of leading zeros). bit_find_first_set() does it in reverse order; from LSB to MSB. Following code uses bitmask and searches for a bit set by doing AND operation with the bitmask.

int bit_find_last_set(unsigned int data)
{
        int bit_index;
        unsigned int mask;
               
        for (bit_index = 31, mask = 0x80000000; mask != 0; --bit_index, mask >>= 1)
                if (data & mask)
                        return bit_index;       
                  
        return -1;  
}

int bit_find_first_set(unsigned int data)
{
        int bit_index;
        unsigned int mask;
                  
        for (bit_index = 0, mask = 0x00000001; mask != 0; ++bit_index, mask <<= 1) 
                if (data & mask)  
                        return bit_index;          
                     
        return -1;     
}

For ARM 32-bit architecture, bit_find_last_set() is compiled to,

00000000 <_Z17bit_find_last_setj>:
   0:   e3a02102        mov     r2, #-2147483648        ; 0x80000000
   4:   e3a0301f        mov     r3, #31
   8:   ea000003        b       1c <_Z17bit_find_last_setj+0x1c>
   c:   e2433001        sub     r3, r3, #1
  10:   e3730001        cmn     r3, #1
  14:   e1a020a2        lsr     r2, r2, #1
  18:   0a000001        beq     24 <_Z17bit_find_last_setj+0x24>
  1c:   e1120000        tst     r2, r0
  20:   0afffff9        beq     c <_Z17bit_find_last_setj+0xc>
  24:   e1a00003        mov     r0, r3
  28:   e12fff1e        bx      lr

Answer #2

First implementation is pretty simple, and it runs less iterations if a bit set close to the MSB. But in worst case, when the data is 1, it iterates 32 times. Following codes has similar average and worst case execution time.

int bit_find_last_set(unsigned int data)
{
        int bit_index;

        if (data == 0)
                return -1;

        bit_index = 32;

        /* check bits[31:16] */
        if ((data & 0xffff0000) == 0)
                bit_index -= 16;
        else
                data >>= 16;
        /* check bits[15:8] */
        if ((data & 0x0000ff00) == 0)
                bit_index -= 8;
        else
                data >>= 8;
        /* check bits[7:4] */
        if ((data & 0x000000f0) == 0)
                bit_index -= 4;
        else
                data >>= 4;
        /* check bits[3:2] */
        if ((data & 0x0000000c) == 0)
                bit_index -= 2;
        else
                data >>= 2;
        /* check bits[1:1] */
        if ((data & 0x00000002) == 0)
                bit_index -= 1;

        return bit_index;
}

On 32-bit ARM processor,

00000000 <_Z17bit_find_last_setj>:
   0:   e2503000        subs    r3, r0, #0
   4:   0a000011        beq     50 <_Z17bit_find_last_setj+0x50>
   8:   e1a02823        lsr     r2, r3, #16
   c:   e1a01802        lsl     r1, r2, #16
  10:   e3510000        cmp     r1, #0
  14:   11a03002        movne   r3, r2
  18:   03a0000f        moveq   r0, #15
  1c:   13a0001f        movne   r0, #31
  20:   e3130cff        tst     r3, #65280      ; 0xff00
  24:   11a03423        lsrne   r3, r3, #8
  28:   02400008        subeq   r0, r0, #8
  2c:   e31300f0        tst     r3, #240        ; 0xf0
  30:   11a03223        lsrne   r3, r3, #4
  34:   02400004        subeq   r0, r0, #4
  38:   e313000c        tst     r3, #12
  3c:   11a03123        lsrne   r3, r3, #2
  40:   02400002        subeq   r0, r0, #2
  44:   e3130002        tst     r3, #2
  48:   02400001        subeq   r0, r0, #1
  4c:   e12fff1e        bx      lr
  50:   e3e00000        mvn     r0, #0
  54:   e12fff1e        bx      lr

On 32-bit x86,

00000000 <_Z17bit_find_last_setj>:
   0:   8b 54 24 04             mov    0x4(%esp),%edx
   4:   b8 ff ff ff ff          mov    $0xffffffff,%eax
   9:   85 d2                   test   %edx,%edx
   b:   74 2e                   je     3b <_Z17bit_find_last_setj+0x3b>
   d:   f7 c2 00 00 ff ff       test   $0xffff0000,%edx
  13:   b8 0f 00 00 00          mov    $0xf,%eax
  18:   75 3e                   jne    58 <_Z17bit_find_last_setj+0x58>
  1a:   f6 c6 ff                test   $0xff,%dh
  1d:   74 21                   je     40 <_Z17bit_find_last_setj+0x40>
  1f:   c1 ea 08                shr    $0x8,%edx
  22:   f6 c2 f0                test   $0xf0,%dl
  25:   75 21                   jne    48 <_Z17bit_find_last_setj+0x48>
  27:   83 e8 04                sub    $0x4,%eax
  2a:   f6 c2 0c                test   $0xc,%dl
  2d:   75 21                   jne    50 <_Z17bit_find_last_setj+0x50>
  2f:   83 e8 02                sub    $0x2,%eax
  32:   83 e2 02                and    $0x2,%edx
  35:   83 fa 01                cmp    $0x1,%edx
  38:   83 d8 00                sbb    $0x0,%eax
  3b:   f3 c3                   repz ret
  3d:   8d 76 00                lea    0x0(%esi),%esi
  40:   83 e8 08                sub    $0x8,%eax
  43:   f6 c2 f0                test   $0xf0,%dl
  46:   74 df                   je     27 <_Z17bit_find_last_setj+0x27>
  48:   c1 ea 04                shr    $0x4,%edx
  4b:   f6 c2 0c                test   $0xc,%dl
  4e:   74 df                   je     2f <_Z17bit_find_last_setj+0x2f>
  50:   c1 ea 02                shr    $0x2,%edx
  53:   eb dd                   jmp    32 <_Z17bit_find_last_setj+0x32>
  55:   8d 76 00                lea    0x0(%esi),%esi
  58:   c1 ea 10                shr    $0x10,%edx
  5b:   b0 1f                   mov    $0x1f,%al
  5d:   f6 c6 ff                test   $0xff,%dh
  60:   75 bd                   jne    1f <_Z17bit_find_last_setj+0x1f>
  62:   eb dc                   jmp    40 <_Z17bit_find_last_setj+0x40>

Following code is from same idea, but with less branching.

int bit_find_last_set(unsigned int data)
{
        int bit_index;

        if (data == 0)
                return -1;

        bit_index = 0;

        /* check bits[31:16] */
        if ((data & 0xffff0000) != 0) {
                bit_index += 16;
                data >>= 16;
        }
        /* check bits[15:8] */
        if ((data & 0x0000ff00) != 0) {
                bit_index += 8;
                data >>= 8;
        }
        /* check bits[7:4] */
        if ((data & 0x000000f0) != 0) {
                bit_index += 4;
                data >>= 4;
        }
        /* check bits[3:2] */
        if ((data & 0x0000000c) != 0) {
                bit_index += 2;
                data >>= 2;
        }
        /* check bits[1:1] */
        if ((data & 0x00000002) != 0)
                bit_index += 1;

        return bit_index;
}

On some architecture without conditional instructions, like 32-bit x86, this will result in more optimized code.

00000000 <_Z17bit_find_last_setj>:
   0:   8b 54 24 04             mov    0x4(%esp),%edx
   4:   b8 ff ff ff ff          mov    $0xffffffff,%eax
   9:   85 d2                   test   %edx,%edx
   b:   74 39                   je     46 <_Z17bit_find_last_setj+0x46>
   d:   31 c0                   xor    %eax,%eax
   f:   f7 c2 00 00 ff ff       test   $0xffff0000,%edx
  15:   74 05                   je     1c <_Z17bit_find_last_setj+0x1c>
  17:   c1 ea 10                shr    $0x10,%edx
  1a:   b0 10                   mov    $0x10,%al
  1c:   f6 c6 ff                test   $0xff,%dh
  1f:   74 06                   je     27 <_Z17bit_find_last_setj+0x27>
  21:   83 c0 08                add    $0x8,%eax
  24:   c1 ea 08                shr    $0x8,%edx
  27:   f6 c2 f0                test   $0xf0,%dl
  2a:   74 06                   je     32 <_Z17bit_find_last_setj+0x32>
  2c:   83 c0 04                add    $0x4,%eax
  2f:   c1 ea 04                shr    $0x4,%edx
  32:   f6 c2 0c                test   $0xc,%dl
  35:   74 06                   je     3d <_Z17bit_find_last_setj+0x3d>
  37:   83 c0 02                add    $0x2,%eax
  3a:   c1 ea 02                shr    $0x2,%edx
  3d:   83 e2 02                and    $0x2,%edx
  40:   83 fa 01                cmp    $0x1,%edx
  43:   83 d8 ff                sbb    $0xffffffff,%eax
  46:   f3 c3                   repz ret

Reference

Personal tools
Namespaces

Variants
Actions
Navigation
Toolbox