I modified the hello_world example to do same HMAC test I did in the PC:

#define MD5_MAC_LEN 16

extern int md5_vector(size_t num_elem, const uint8_t *addr[], const size_t *len, uint8_t *mac);

int hmac_md5_vector(const uint8_t *key, size_t key_len, size_t num_elem,
                    const uint8_t *addr[], const size_t *len, uint8_t *mac)
{
  uint8_t k_pad[64]; /* padding - key XORd with ipad/opad */
  uint8_t tk[16];
  const uint8_t *_addr[6];
  size_t i, _len[6];
  int res;

  if (num_elem > 5)
    {
      /*
       * Fixed limit on the number of fragments to avoid having to
       * allocate memory (which could fail).
       */

      return -1;
    }

  /* if key is longer than 64 bytes reset it to key = MD5(key) */

  if (key_len > 64)
    {
      if (md5_vector(1, &key, &key_len, tk))
        {
      	  return -1;
        }

      key = tk;
      key_len = 16;
    }

  /* the HMAC_MD5 transform looks like:
   *
   * MD5(K XOR opad, MD5(K XOR ipad, text))
   *
   * where K is an n byte key
   * ipad is the byte 0x36 repeated 64 times
   * opad is the byte 0x5c repeated 64 times
   * and text is the data being protected */

  /* start out by storing key in ipad */

  memset(k_pad, 0, sizeof(k_pad));
  memcpy(k_pad, key, key_len);

  /* XOR key with ipad values */

  for (i = 0; i < 64; i++)
    {
      k_pad[i] ^= 0x36;
    }
     
  /* perform inner MD5 */

  _addr[0] = k_pad;
  _len[0] = 64;

  for (i = 0; i < num_elem; i++)
    {
      _addr[i + 1] = addr[i];
      _len[i + 1] = len[i]; 
    }

  if (md5_vector(1 + num_elem, _addr, _len, mac))
    {
	  return -1;
    }

  memset(k_pad, 0, sizeof(k_pad));
  memcpy(k_pad, key, key_len);

  /* XOR key with opad values */

  for (i = 0; i < 64; i++)
    {
      k_pad[i] ^= 0x5c;
    }

  /* perform outer MD5 */

  _addr[0] = k_pad;
  _len[0] = 64;
  _addr[1] = mac;
  _len[1] = MD5_MAC_LEN;
  res = md5_vector(2, _addr, _len, mac);
  memset(k_pad, 0, sizeof(k_pad));
  memset(tk, 0, sizeof(tk));

  return res;
}

int hmac_md5(const uint8_t *key, size_t key_len, const uint8_t *data, size_t data_len,
             uint8_t *mac)
{
  return hmac_md5_vector(key, key_len, 1, &data, &data_len, mac);
}

int main(int argc, FAR char *argv[])
{
  int i;
  uint8_t key[8] = {'S', '3', 'c', 'r', 'e', 't', 's', '!'};
  const uint8_t msg[16] = {'M', 'y', ' ', 'M', 'e', 's', 's', 'a', 'g', 'e', ' ', 'p', 'l', 'a', 'i', 'n'};
  uint8_t hmac[128];

  memset(hmac, 0x00, 128);

  hmac_md5(key, 8, msg, 16, hmac);

  for (i = 0; i < 16; i++)
    {
      printf("hmac[%d] = %02X\n", i, hmac[i]);
    }

  return 0;
}

The result was the same:

nsh> hello
hmac[0] = 3D
hmac[1] = 32
hmac[2] = 76
hmac[3] = FD
hmac[4] = E3
hmac[5] = FB
hmac[6] = 42
hmac[7] = D8
hmac[8] = F0
hmac[9] = 05
hmac[10] = 53
hmac[11] = CC
hmac[12] = 12
hmac[13] = E7
hmac[14] = C9
hmac[15] = C3
nsh>